# Intro

In [1]:
from abc import abstractmethod, ABC
from os import PathLike
from typing import Dict, Union, Optional, Iterable


class base_peft(ABC):
    def __init__(self, base_model_name: Union[str, PathLike[str]], mask_token_id: int):
        self.base_model_name = base_model_name
        self.mask_token_id = mask_token_id

    def activate_task_for_training

    @abstractmethod
    def finetune_task(self, peft_name: str, train_dataset, validation_dataset):
        pass

In [1]:
cd /home/mohalisad/Developer/ProgressivePrompts

/home/mohalisad/Developer/ProgressivePrompts


In [2]:
from utils import print_system_info
print_system_info()

Python version is: 3.9.17
Torch version is: 1.13.1+cu117
Nvidia device is: NVIDIA GeForce RTX 4090
Transformers version is: 4.26.1
Adapterhub version is: 3.2.1


# Dataset

In [31]:
from _datasets import AutoLoad
from config import load_config
from _models import BertAdapterModelWrapper, TokenizerMan


config = load_config('config.yaml')

In [39]:
# import transformers
# transformers.logging.set_verbosity_debug()
adapter_wrapper = BertAdapterModelWrapper(
    base_model_name=config.base_model.name,
    mask_token_id=config.base_model.mask_token_id
)
tokenizer_man = TokenizerMan(config.base_model.kind, config.base_model.name)

loading configuration file config.json from cache at /home/mohalisad/.cache/huggingface/hub/models--bert-base-uncased/snapshots/1dbc166cf8765166998eff31ade2eb64c8a40076/config.json
Model config BertConfig {
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.26.1",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading weights file model.safetensors from cache at /home/mohalisad/.cache/huggingface/hub/models--bert-base-uncased/snapshots/1dbc166cf8765166998eff31ade2eb64c8a40076/model.safetensors
Generate co

In [40]:
auto_loader = AutoLoad()

In [41]:
for task_name in config.tasks:
    loader_out = auto_loader.get_and_map(tokenizer_man.tokenizer, task_name)
    num_labels = len(loader_out['output']['range'])
    adapter_wrapper.add_classification_adapter(task_name, num_labels=num_labels)
    adapter_wrapper.finetune_adapter(
        task_name,
        loader_out['train'],
        loader_out['valid'],
        tokenizer_man.get_col_fn(),
        config.hf_trainer_params.to_dict()
    )

Map:   0%|          | 0/8551 [00:00<?, ? examples/s]

Map:   0%|          | 0/1043 [00:00<?, ? examples/s]

Map:   0%|          | 0/1063 [00:00<?, ? examples/s]

Adding adapter 'glue:cola'.
Adding head 'glue:cola' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True}.
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
***** Running training *****
  Num examples = 8551
  Num Epochs = 15
  Instantaneous batch size per device = 32
  Total train batch size (w. parallel, distributed & accumulation) = 32
  Gradient Accumulation steps = 1
  Total optimization steps = 4020
  Number of trainable parameters = 1486658
You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a

Epoch,Training Loss,Validation Loss,Accuracy,F1-score-1,F1-score-ma
1,No log,0.521243,0.772771,0.854512,0.667956
2,0.484900,0.475989,0.795781,0.86629,0.717121
3,0.484900,0.473902,0.799616,0.868471,0.723974
4,0.390000,0.454408,0.815916,0.877707,0.752807
5,0.390000,0.460564,0.822627,0.880414,0.768593
6,0.330900,0.421414,0.831256,0.883752,0.78803
7,0.330900,0.45282,0.833174,0.885375,0.789519
8,0.292000,0.465746,0.826462,0.881777,0.777825
9,0.292000,0.491992,0.832215,0.885396,0.786169
10,0.255500,0.508437,0.827421,0.883117,0.776723


***** Running Evaluation *****
  Num examples = 1043
  Batch size = 32
Saving model checkpoint to /home/mohalisad/Developer/ProgressivePrompts/cp3/checkpoint-268
Configuration saved in /home/mohalisad/Developer/ProgressivePrompts/cp3/checkpoint-268/glue:cola/adapter_config.json
Module weights saved in /home/mohalisad/Developer/ProgressivePrompts/cp3/checkpoint-268/glue:cola/pytorch_adapter.bin
Configuration saved in /home/mohalisad/Developer/ProgressivePrompts/cp3/checkpoint-268/glue:cola/head_config.json
Module weights saved in /home/mohalisad/Developer/ProgressivePrompts/cp3/checkpoint-268/glue:cola/pytorch_model_head.bin
Configuration saved in /home/mohalisad/Developer/ProgressivePrompts/cp3/checkpoint-268/glue:cola/head_config.json
Module weights saved in /home/mohalisad/Developer/ProgressivePrompts/cp3/checkpoint-268/glue:cola/pytorch_model_head.bin
***** Running Evaluation *****
  Num examples = 1043
  Batch size = 32
Saving model checkpoint to /home/mohalisad/Developer/Progressi

# Opendelta

In [24]:
from bigmodelvis import Visualization
from transformers import BertForSequenceClassification
from opendelta import AdapterModel

In [42]:
base_model = BertForSequenceClassification.from_pretrained(config.base_model.name)

loading configuration file config.json from cache at /home/mohalisad/.cache/huggingface/hub/models--bert-base-uncased/snapshots/1dbc166cf8765166998eff31ade2eb64c8a40076/config.json
Model config BertConfig {
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.26.1",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading weights file model.safetensors from cache at /home/mohalisad/.cache/huggingface/hub/models--bert-base-uncased/snapshots/1dbc166cf8765166998eff31ade2eb64c8a40076/model.safetensors
Some weight

In [43]:
Visualization(base_model).structure_graph();

In [44]:
delta_model = AdapterModel(base_model, bottleneck_dim=48)
# leave the delta tuning modules and the newly initialized classification head tunable.
delta_model.freeze_module(exclude=["deltas", "classifier"])

In [45]:
Visualization(base_model).structure_graph();

In [None]:
from transformers import TrainingArguments, Trainer
from sklearn.metrics import classification_report


def compute_metrics(pred):
    true_labels = pred.label_ids.ravel()
    pred_labels = pred.predictions.argmax(-1).ravel()
    report = classification_report(true_labels, pred_labels, output_dict=True)
    return {
        'accuracy': report['accuracy'],
        'f1-score-1': report['1']['f1-score'],
        'f1-score-ma': report['macro avg']['f1-score']
    }


def train_model(input_model, task_name, train_dataset, eval_dataset, col_fn):
    training_args = TrainingArguments(
        evaluation_strategy="epoch",
        save_strategy="epoch",
        # The next 2 lines are important to ensure the dataset labels are properly passed to the model
        remove_unused_columns=False,
        **config.hf_trainer_params.to_dict()
    )

    trainer = Trainer(
        model=input_model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=eval_dataset,
        data_collator=col_fn,
        compute_metrics=compute_metrics
    )
    trainer.train()


for task_name in config.tasks:
    loader_out = auto_loader.get_and_map(tokenizer_man.tokenizer, task_name)
    num_labels = len(loader_out['output']['range'])
    train_model(
        base_model,
        task_name,
        loader_out['train'],
        loader_out['valid'],
        tokenizer_man.get_col_fn()
    )

In [47]:
Visualization(adapter_wrapper.model).structure_graph();

In [None]:
results

In [None]:
from _datasets import GLUEHelper
        
gl_helper = GLUEHelper()

In [None]:
for n in range(0, 1000):
    out = gl_helper.datasets['stsb']['train'][n]
    if out['label'] == 0.:
        print(out)
        break

In [None]:
from evaluate import load
glue_metric = load('glue', 'stsb')

In [None]:
results = glue_metric.compute(predictions=[-0.5, -0.3], references=[-0.5, 1])
results

In [None]:
gl_helper.datasets['mnli']

In [None]:
gl_helper.datasets['mnli_matched']


In [None]:
gl_helper.datasets['mnli_mismatched']


In [None]:
import transformers


print(transformers.__version__)

In [None]:
pip install adapter-transformers