{ "cells": [ { "cell_type": "markdown", "metadata": { "pycharm": { "name": "#%% md\n" }, "tags": [] }, "source": [ "# Intro" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "tags": [] }, "outputs": [], "source": [ "from abc import abstractmethod, ABC\n", "from os import PathLike\n", "from typing import Dict, Union, Optional, Iterable\n", "\n", "\n", "class base_peft(ABC):\n", " def __init__(self, base_model_name: Union[str, PathLike[str]], mask_token_id: int):\n", " self.base_model_name = base_model_name\n", " self.mask_token_id = mask_token_id\n", "\n", " def activate_task_for_training\n", "\n", " @abstractmethod\n", " def finetune_task(self, peft_name: str, train_dataset, validation_dataset):\n", " pass" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "ExecuteTime": { "end_time": "2023-08-15T13:16:40.910406Z", "start_time": "2023-08-15T13:16:40.860981Z" }, "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "/home/mohalisad/Developer/ProgressivePrompts\n" ] } ], "source": [ "cd /home/mohalisad/Developer/ProgressivePrompts" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "ExecuteTime": { "end_time": "2023-08-15T13:16:42.467311Z", "start_time": "2023-08-15T13:16:42.313951Z" }, "pycharm": { "is_executing": true, "name": "#%%\n" }, "scrolled": true, "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Python version is: 3.9.17\n", "Torch version is: 1.13.1+cu117\n", "Nvidia device is: NVIDIA GeForce RTX 4090\n", "Transformers version is: 4.26.1\n", "Adapterhub version is: 3.2.1\n" ] } ], "source": [ "from utils import print_system_info\n", "print_system_info()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Dataset" ] }, { "cell_type": "code", "execution_count": 31, "metadata": { "tags": [] }, "outputs": [], "source": [ "from _datasets import AutoLoad\n", "from config import load_config\n", "from _models import BertAdapterModelWrapper, TokenizerMan\n", "\n", "\n", "config = load_config('config.yaml')" ] }, { "cell_type": "code", "execution_count": 39, "metadata": { "tags": [] }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "loading configuration file config.json from cache at /home/mohalisad/.cache/huggingface/hub/models--bert-base-uncased/snapshots/1dbc166cf8765166998eff31ade2eb64c8a40076/config.json\n", "Model config BertConfig {\n", " \"architectures\": [\n", " \"BertForMaskedLM\"\n", " ],\n", " \"attention_probs_dropout_prob\": 0.1,\n", " \"classifier_dropout\": null,\n", " \"gradient_checkpointing\": false,\n", " \"hidden_act\": \"gelu\",\n", " \"hidden_dropout_prob\": 0.1,\n", " \"hidden_size\": 768,\n", " \"initializer_range\": 0.02,\n", " \"intermediate_size\": 3072,\n", " \"layer_norm_eps\": 1e-12,\n", " \"max_position_embeddings\": 512,\n", " \"model_type\": \"bert\",\n", " \"num_attention_heads\": 12,\n", " \"num_hidden_layers\": 12,\n", " \"pad_token_id\": 0,\n", " \"position_embedding_type\": \"absolute\",\n", " \"transformers_version\": \"4.26.1\",\n", " \"type_vocab_size\": 2,\n", " \"use_cache\": true,\n", " \"vocab_size\": 30522\n", "}\n", "\n", "loading weights file model.safetensors from cache at /home/mohalisad/.cache/huggingface/hub/models--bert-base-uncased/snapshots/1dbc166cf8765166998eff31ade2eb64c8a40076/model.safetensors\n", "Generate config GenerationConfig {\n", " \"pad_token_id\": 0,\n", " \"transformers_version\": \"4.26.1\"\n", "}\n", "\n", "Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertAdapterModel: ['cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.seq_relationship.bias']\n", "- This IS expected if you are initializing BertAdapterModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", "- This IS NOT expected if you are initializing BertAdapterModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n", "All the weights of BertAdapterModel were initialized from the model checkpoint at bert-base-uncased.\n", "If your task is similar to the task the model of the checkpoint was trained on, you can already use BertAdapterModel for predictions without further training.\n", "Generation config file not found, using a generation config created from the model config.\n", "loading file vocab.txt from cache at /home/mohalisad/.cache/huggingface/hub/models--bert-base-uncased/snapshots/1dbc166cf8765166998eff31ade2eb64c8a40076/vocab.txt\n", "loading file tokenizer.json from cache at /home/mohalisad/.cache/huggingface/hub/models--bert-base-uncased/snapshots/1dbc166cf8765166998eff31ade2eb64c8a40076/tokenizer.json\n", "loading file added_tokens.json from cache at None\n", "loading file special_tokens_map.json from cache at None\n", "loading file tokenizer_config.json from cache at /home/mohalisad/.cache/huggingface/hub/models--bert-base-uncased/snapshots/1dbc166cf8765166998eff31ade2eb64c8a40076/tokenizer_config.json\n", "loading configuration file config.json from cache at /home/mohalisad/.cache/huggingface/hub/models--bert-base-uncased/snapshots/1dbc166cf8765166998eff31ade2eb64c8a40076/config.json\n", "Model config BertConfig {\n", " \"_name_or_path\": \"bert-base-uncased\",\n", " \"architectures\": [\n", " \"BertForMaskedLM\"\n", " ],\n", " \"attention_probs_dropout_prob\": 0.1,\n", " \"classifier_dropout\": null,\n", " \"gradient_checkpointing\": false,\n", " \"hidden_act\": \"gelu\",\n", " \"hidden_dropout_prob\": 0.1,\n", " \"hidden_size\": 768,\n", " \"initializer_range\": 0.02,\n", " \"intermediate_size\": 3072,\n", " \"layer_norm_eps\": 1e-12,\n", " \"max_position_embeddings\": 512,\n", " \"model_type\": \"bert\",\n", " \"num_attention_heads\": 12,\n", " \"num_hidden_layers\": 12,\n", " \"pad_token_id\": 0,\n", " \"position_embedding_type\": \"absolute\",\n", " \"transformers_version\": \"4.26.1\",\n", " \"type_vocab_size\": 2,\n", " \"use_cache\": true,\n", " \"vocab_size\": 30522\n", "}\n", "\n" ] } ], "source": [ "# import transformers\n", "# transformers.logging.set_verbosity_debug()\n", "adapter_wrapper = BertAdapterModelWrapper(\n", " base_model_name=config.base_model.name,\n", " mask_token_id=config.base_model.mask_token_id\n", ")\n", "tokenizer_man = TokenizerMan(config.base_model.kind, config.base_model.name)" ] }, { "cell_type": "code", "execution_count": 40, "metadata": { "tags": [] }, "outputs": [], "source": [ "auto_loader = AutoLoad()" ] }, { "cell_type": "code", "execution_count": 41, "metadata": { "tags": [] }, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "f983a58646a54aa6841312408f00f491", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Map: 0%| | 0/8551 [00:00\n", " \n", " \n", " [4020/4020 01:08, Epoch 15/15]\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
EpochTraining LossValidation LossAccuracyF1-score-1F1-score-ma
1No log0.5212430.7727710.8545120.667956
20.4849000.4759890.7957810.8662900.717121
30.4849000.4739020.7996160.8684710.723974
40.3900000.4544080.8159160.8777070.752807
50.3900000.4605640.8226270.8804140.768593
60.3309000.4214140.8312560.8837520.788030
70.3309000.4528200.8331740.8853750.789519
80.2920000.4657460.8264620.8817770.777825
90.2920000.4919920.8322150.8853960.786169
100.2555000.5084370.8274210.8831170.776723
110.2555000.5196350.8370090.8888890.791567
120.2323000.5224340.8283800.8833880.779262
130.2323000.5323630.8350910.8869910.791013
140.2199000.5579350.8312560.8855660.782199
150.2028000.5479730.8322150.8858450.784695

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stderr", "output_type": "stream", "text": [ "***** Running Evaluation *****\n", " Num examples = 1043\n", " Batch size = 32\n", "Saving model checkpoint to /home/mohalisad/Developer/ProgressivePrompts/cp3/checkpoint-268\n", "Configuration saved in /home/mohalisad/Developer/ProgressivePrompts/cp3/checkpoint-268/glue:cola/adapter_config.json\n", "Module weights saved in /home/mohalisad/Developer/ProgressivePrompts/cp3/checkpoint-268/glue:cola/pytorch_adapter.bin\n", "Configuration saved in /home/mohalisad/Developer/ProgressivePrompts/cp3/checkpoint-268/glue:cola/head_config.json\n", "Module weights saved in /home/mohalisad/Developer/ProgressivePrompts/cp3/checkpoint-268/glue:cola/pytorch_model_head.bin\n", "Configuration saved in /home/mohalisad/Developer/ProgressivePrompts/cp3/checkpoint-268/glue:cola/head_config.json\n", "Module weights saved in /home/mohalisad/Developer/ProgressivePrompts/cp3/checkpoint-268/glue:cola/pytorch_model_head.bin\n", "***** Running Evaluation *****\n", " Num examples = 1043\n", " Batch size = 32\n", "Saving model checkpoint to /home/mohalisad/Developer/ProgressivePrompts/cp3/checkpoint-536\n", "Configuration saved in /home/mohalisad/Developer/ProgressivePrompts/cp3/checkpoint-536/glue:cola/adapter_config.json\n", "Module weights saved in /home/mohalisad/Developer/ProgressivePrompts/cp3/checkpoint-536/glue:cola/pytorch_adapter.bin\n", "Configuration saved in /home/mohalisad/Developer/ProgressivePrompts/cp3/checkpoint-536/glue:cola/head_config.json\n", "Module weights saved in /home/mohalisad/Developer/ProgressivePrompts/cp3/checkpoint-536/glue:cola/pytorch_model_head.bin\n", "Configuration saved in /home/mohalisad/Developer/ProgressivePrompts/cp3/checkpoint-536/glue:cola/head_config.json\n", "Module weights saved in /home/mohalisad/Developer/ProgressivePrompts/cp3/checkpoint-536/glue:cola/pytorch_model_head.bin\n", "***** Running Evaluation *****\n", " Num examples = 1043\n", " Batch size = 32\n", "Saving model checkpoint to /home/mohalisad/Developer/ProgressivePrompts/cp3/checkpoint-804\n", "Configuration saved in /home/mohalisad/Developer/ProgressivePrompts/cp3/checkpoint-804/glue:cola/adapter_config.json\n", "Module weights saved in /home/mohalisad/Developer/ProgressivePrompts/cp3/checkpoint-804/glue:cola/pytorch_adapter.bin\n", "Configuration saved in /home/mohalisad/Developer/ProgressivePrompts/cp3/checkpoint-804/glue:cola/head_config.json\n", "Module weights saved in /home/mohalisad/Developer/ProgressivePrompts/cp3/checkpoint-804/glue:cola/pytorch_model_head.bin\n", "Configuration saved in /home/mohalisad/Developer/ProgressivePrompts/cp3/checkpoint-804/glue:cola/head_config.json\n", "Module weights saved in /home/mohalisad/Developer/ProgressivePrompts/cp3/checkpoint-804/glue:cola/pytorch_model_head.bin\n", "***** Running Evaluation *****\n", " Num examples = 1043\n", " Batch size = 32\n", "Saving model checkpoint to /home/mohalisad/Developer/ProgressivePrompts/cp3/checkpoint-1072\n", "Configuration saved in /home/mohalisad/Developer/ProgressivePrompts/cp3/checkpoint-1072/glue:cola/adapter_config.json\n", "Module weights saved in /home/mohalisad/Developer/ProgressivePrompts/cp3/checkpoint-1072/glue:cola/pytorch_adapter.bin\n", "Configuration saved in /home/mohalisad/Developer/ProgressivePrompts/cp3/checkpoint-1072/glue:cola/head_config.json\n", "Module weights saved in /home/mohalisad/Developer/ProgressivePrompts/cp3/checkpoint-1072/glue:cola/pytorch_model_head.bin\n", "Configuration saved in /home/mohalisad/Developer/ProgressivePrompts/cp3/checkpoint-1072/glue:cola/head_config.json\n", "Module weights saved in /home/mohalisad/Developer/ProgressivePrompts/cp3/checkpoint-1072/glue:cola/pytorch_model_head.bin\n", "***** Running Evaluation *****\n", " Num examples = 1043\n", " Batch size = 32\n", "Saving model checkpoint to /home/mohalisad/Developer/ProgressivePrompts/cp3/checkpoint-1340\n", "Configuration saved in /home/mohalisad/Developer/ProgressivePrompts/cp3/checkpoint-1340/glue:cola/adapter_config.json\n", "Module weights saved in /home/mohalisad/Developer/ProgressivePrompts/cp3/checkpoint-1340/glue:cola/pytorch_adapter.bin\n", "Configuration saved in /home/mohalisad/Developer/ProgressivePrompts/cp3/checkpoint-1340/glue:cola/head_config.json\n", "Module weights saved in /home/mohalisad/Developer/ProgressivePrompts/cp3/checkpoint-1340/glue:cola/pytorch_model_head.bin\n", "Configuration saved in /home/mohalisad/Developer/ProgressivePrompts/cp3/checkpoint-1340/glue:cola/head_config.json\n", "Module weights saved in /home/mohalisad/Developer/ProgressivePrompts/cp3/checkpoint-1340/glue:cola/pytorch_model_head.bin\n", "***** Running Evaluation *****\n", " Num examples = 1043\n", " Batch size = 32\n", "Saving model checkpoint to /home/mohalisad/Developer/ProgressivePrompts/cp3/checkpoint-1608\n", "Configuration saved in /home/mohalisad/Developer/ProgressivePrompts/cp3/checkpoint-1608/glue:cola/adapter_config.json\n", "Module weights saved in /home/mohalisad/Developer/ProgressivePrompts/cp3/checkpoint-1608/glue:cola/pytorch_adapter.bin\n", "Configuration saved in /home/mohalisad/Developer/ProgressivePrompts/cp3/checkpoint-1608/glue:cola/head_config.json\n", "Module weights saved in /home/mohalisad/Developer/ProgressivePrompts/cp3/checkpoint-1608/glue:cola/pytorch_model_head.bin\n", "Configuration saved in /home/mohalisad/Developer/ProgressivePrompts/cp3/checkpoint-1608/glue:cola/head_config.json\n", "Module weights saved in /home/mohalisad/Developer/ProgressivePrompts/cp3/checkpoint-1608/glue:cola/pytorch_model_head.bin\n", "***** Running Evaluation *****\n", " Num examples = 1043\n", " Batch size = 32\n", "Saving model checkpoint to /home/mohalisad/Developer/ProgressivePrompts/cp3/checkpoint-1876\n", "Configuration saved in /home/mohalisad/Developer/ProgressivePrompts/cp3/checkpoint-1876/glue:cola/adapter_config.json\n", "Module weights saved in /home/mohalisad/Developer/ProgressivePrompts/cp3/checkpoint-1876/glue:cola/pytorch_adapter.bin\n", "Configuration saved in /home/mohalisad/Developer/ProgressivePrompts/cp3/checkpoint-1876/glue:cola/head_config.json\n", "Module weights saved in /home/mohalisad/Developer/ProgressivePrompts/cp3/checkpoint-1876/glue:cola/pytorch_model_head.bin\n", "Configuration saved in /home/mohalisad/Developer/ProgressivePrompts/cp3/checkpoint-1876/glue:cola/head_config.json\n", "Module weights saved in /home/mohalisad/Developer/ProgressivePrompts/cp3/checkpoint-1876/glue:cola/pytorch_model_head.bin\n", "***** Running Evaluation *****\n", " Num examples = 1043\n", " Batch size = 32\n", "Saving model checkpoint to /home/mohalisad/Developer/ProgressivePrompts/cp3/checkpoint-2144\n", "Configuration saved in /home/mohalisad/Developer/ProgressivePrompts/cp3/checkpoint-2144/glue:cola/adapter_config.json\n", "Module weights saved in /home/mohalisad/Developer/ProgressivePrompts/cp3/checkpoint-2144/glue:cola/pytorch_adapter.bin\n", "Configuration saved in /home/mohalisad/Developer/ProgressivePrompts/cp3/checkpoint-2144/glue:cola/head_config.json\n", "Module weights saved in /home/mohalisad/Developer/ProgressivePrompts/cp3/checkpoint-2144/glue:cola/pytorch_model_head.bin\n", "Configuration saved in /home/mohalisad/Developer/ProgressivePrompts/cp3/checkpoint-2144/glue:cola/head_config.json\n", "Module weights saved in /home/mohalisad/Developer/ProgressivePrompts/cp3/checkpoint-2144/glue:cola/pytorch_model_head.bin\n", "***** Running Evaluation *****\n", " Num examples = 1043\n", " Batch size = 32\n", "Saving model checkpoint to /home/mohalisad/Developer/ProgressivePrompts/cp3/checkpoint-2412\n", "Configuration saved in /home/mohalisad/Developer/ProgressivePrompts/cp3/checkpoint-2412/glue:cola/adapter_config.json\n", "Module weights saved in /home/mohalisad/Developer/ProgressivePrompts/cp3/checkpoint-2412/glue:cola/pytorch_adapter.bin\n", "Configuration saved in /home/mohalisad/Developer/ProgressivePrompts/cp3/checkpoint-2412/glue:cola/head_config.json\n", "Module weights saved in /home/mohalisad/Developer/ProgressivePrompts/cp3/checkpoint-2412/glue:cola/pytorch_model_head.bin\n", "Configuration saved in /home/mohalisad/Developer/ProgressivePrompts/cp3/checkpoint-2412/glue:cola/head_config.json\n", "Module weights saved in /home/mohalisad/Developer/ProgressivePrompts/cp3/checkpoint-2412/glue:cola/pytorch_model_head.bin\n", "***** Running Evaluation *****\n", " Num examples = 1043\n", " Batch size = 32\n", "Saving model checkpoint to /home/mohalisad/Developer/ProgressivePrompts/cp3/checkpoint-2680\n", "Configuration saved in /home/mohalisad/Developer/ProgressivePrompts/cp3/checkpoint-2680/glue:cola/adapter_config.json\n", "Module weights saved in /home/mohalisad/Developer/ProgressivePrompts/cp3/checkpoint-2680/glue:cola/pytorch_adapter.bin\n", "Configuration saved in /home/mohalisad/Developer/ProgressivePrompts/cp3/checkpoint-2680/glue:cola/head_config.json\n", "Module weights saved in /home/mohalisad/Developer/ProgressivePrompts/cp3/checkpoint-2680/glue:cola/pytorch_model_head.bin\n", "Configuration saved in /home/mohalisad/Developer/ProgressivePrompts/cp3/checkpoint-2680/glue:cola/head_config.json\n", "Module weights saved in /home/mohalisad/Developer/ProgressivePrompts/cp3/checkpoint-2680/glue:cola/pytorch_model_head.bin\n", "***** Running Evaluation *****\n", " Num examples = 1043\n", " Batch size = 32\n", "Saving model checkpoint to /home/mohalisad/Developer/ProgressivePrompts/cp3/checkpoint-2948\n", "Configuration saved in /home/mohalisad/Developer/ProgressivePrompts/cp3/checkpoint-2948/glue:cola/adapter_config.json\n", "Module weights saved in /home/mohalisad/Developer/ProgressivePrompts/cp3/checkpoint-2948/glue:cola/pytorch_adapter.bin\n", "Configuration saved in /home/mohalisad/Developer/ProgressivePrompts/cp3/checkpoint-2948/glue:cola/head_config.json\n", "Module weights saved in /home/mohalisad/Developer/ProgressivePrompts/cp3/checkpoint-2948/glue:cola/pytorch_model_head.bin\n", "Configuration saved in /home/mohalisad/Developer/ProgressivePrompts/cp3/checkpoint-2948/glue:cola/head_config.json\n", "Module weights saved in /home/mohalisad/Developer/ProgressivePrompts/cp3/checkpoint-2948/glue:cola/pytorch_model_head.bin\n", "***** Running Evaluation *****\n", " Num examples = 1043\n", " Batch size = 32\n", "Saving model checkpoint to /home/mohalisad/Developer/ProgressivePrompts/cp3/checkpoint-3216\n", "Configuration saved in /home/mohalisad/Developer/ProgressivePrompts/cp3/checkpoint-3216/glue:cola/adapter_config.json\n", "Module weights saved in /home/mohalisad/Developer/ProgressivePrompts/cp3/checkpoint-3216/glue:cola/pytorch_adapter.bin\n", "Configuration saved in /home/mohalisad/Developer/ProgressivePrompts/cp3/checkpoint-3216/glue:cola/head_config.json\n", "Module weights saved in /home/mohalisad/Developer/ProgressivePrompts/cp3/checkpoint-3216/glue:cola/pytorch_model_head.bin\n", "Configuration saved in /home/mohalisad/Developer/ProgressivePrompts/cp3/checkpoint-3216/glue:cola/head_config.json\n", "Module weights saved in /home/mohalisad/Developer/ProgressivePrompts/cp3/checkpoint-3216/glue:cola/pytorch_model_head.bin\n", "***** Running Evaluation *****\n", " Num examples = 1043\n", " Batch size = 32\n", "Saving model checkpoint to /home/mohalisad/Developer/ProgressivePrompts/cp3/checkpoint-3484\n", "Configuration saved in /home/mohalisad/Developer/ProgressivePrompts/cp3/checkpoint-3484/glue:cola/adapter_config.json\n", "Module weights saved in /home/mohalisad/Developer/ProgressivePrompts/cp3/checkpoint-3484/glue:cola/pytorch_adapter.bin\n", "Configuration saved in /home/mohalisad/Developer/ProgressivePrompts/cp3/checkpoint-3484/glue:cola/head_config.json\n", "Module weights saved in /home/mohalisad/Developer/ProgressivePrompts/cp3/checkpoint-3484/glue:cola/pytorch_model_head.bin\n", "Configuration saved in /home/mohalisad/Developer/ProgressivePrompts/cp3/checkpoint-3484/glue:cola/head_config.json\n", "Module weights saved in /home/mohalisad/Developer/ProgressivePrompts/cp3/checkpoint-3484/glue:cola/pytorch_model_head.bin\n", "***** Running Evaluation *****\n", " Num examples = 1043\n", " Batch size = 32\n", "Saving model checkpoint to /home/mohalisad/Developer/ProgressivePrompts/cp3/checkpoint-3752\n", "Configuration saved in /home/mohalisad/Developer/ProgressivePrompts/cp3/checkpoint-3752/glue:cola/adapter_config.json\n", "Module weights saved in /home/mohalisad/Developer/ProgressivePrompts/cp3/checkpoint-3752/glue:cola/pytorch_adapter.bin\n", "Configuration saved in /home/mohalisad/Developer/ProgressivePrompts/cp3/checkpoint-3752/glue:cola/head_config.json\n", "Module weights saved in /home/mohalisad/Developer/ProgressivePrompts/cp3/checkpoint-3752/glue:cola/pytorch_model_head.bin\n", "Configuration saved in /home/mohalisad/Developer/ProgressivePrompts/cp3/checkpoint-3752/glue:cola/head_config.json\n", "Module weights saved in /home/mohalisad/Developer/ProgressivePrompts/cp3/checkpoint-3752/glue:cola/pytorch_model_head.bin\n", "***** Running Evaluation *****\n", " Num examples = 1043\n", " Batch size = 32\n", "Saving model checkpoint to /home/mohalisad/Developer/ProgressivePrompts/cp3/checkpoint-4020\n", "Configuration saved in /home/mohalisad/Developer/ProgressivePrompts/cp3/checkpoint-4020/glue:cola/adapter_config.json\n", "Module weights saved in /home/mohalisad/Developer/ProgressivePrompts/cp3/checkpoint-4020/glue:cola/pytorch_adapter.bin\n", "Configuration saved in /home/mohalisad/Developer/ProgressivePrompts/cp3/checkpoint-4020/glue:cola/head_config.json\n", "Module weights saved in /home/mohalisad/Developer/ProgressivePrompts/cp3/checkpoint-4020/glue:cola/pytorch_model_head.bin\n", "Configuration saved in /home/mohalisad/Developer/ProgressivePrompts/cp3/checkpoint-4020/glue:cola/head_config.json\n", "Module weights saved in /home/mohalisad/Developer/ProgressivePrompts/cp3/checkpoint-4020/glue:cola/pytorch_model_head.bin\n", "\n", "\n", "Training completed. Do not forget to share your model on huggingface.co/models =)\n", "\n", "\n" ] } ], "source": [ "for task_name in config.tasks:\n", " loader_out = auto_loader.get_and_map(tokenizer_man.tokenizer, task_name)\n", " num_labels = len(loader_out['output']['range'])\n", " adapter_wrapper.add_classification_adapter(task_name, num_labels=num_labels)\n", " adapter_wrapper.finetune_adapter(\n", " task_name,\n", " loader_out['train'],\n", " loader_out['valid'],\n", " tokenizer_man.get_col_fn(),\n", " config.hf_trainer_params.to_dict()\n", " )" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Opendelta" ] }, { "cell_type": "code", "execution_count": 24, "metadata": { "tags": [] }, "outputs": [], "source": [ "from bigmodelvis import Visualization\n", "from transformers import BertForSequenceClassification\n", "from opendelta import AdapterModel" ] }, { "cell_type": "code", "execution_count": 42, "metadata": { "tags": [] }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "loading configuration file config.json from cache at /home/mohalisad/.cache/huggingface/hub/models--bert-base-uncased/snapshots/1dbc166cf8765166998eff31ade2eb64c8a40076/config.json\n", "Model config BertConfig {\n", " \"architectures\": [\n", " \"BertForMaskedLM\"\n", " ],\n", " \"attention_probs_dropout_prob\": 0.1,\n", " \"classifier_dropout\": null,\n", " \"gradient_checkpointing\": false,\n", " \"hidden_act\": \"gelu\",\n", " \"hidden_dropout_prob\": 0.1,\n", " \"hidden_size\": 768,\n", " \"initializer_range\": 0.02,\n", " \"intermediate_size\": 3072,\n", " \"layer_norm_eps\": 1e-12,\n", " \"max_position_embeddings\": 512,\n", " \"model_type\": \"bert\",\n", " \"num_attention_heads\": 12,\n", " \"num_hidden_layers\": 12,\n", " \"pad_token_id\": 0,\n", " \"position_embedding_type\": \"absolute\",\n", " \"transformers_version\": \"4.26.1\",\n", " \"type_vocab_size\": 2,\n", " \"use_cache\": true,\n", " \"vocab_size\": 30522\n", "}\n", "\n", "loading weights file model.safetensors from cache at /home/mohalisad/.cache/huggingface/hub/models--bert-base-uncased/snapshots/1dbc166cf8765166998eff31ade2eb64c8a40076/model.safetensors\n", "Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.seq_relationship.bias']\n", "- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", "- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n", "Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']\n", "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n" ] } ], "source": [ "base_model = BertForSequenceClassification.from_pretrained(config.base_model.name)" ] }, { "cell_type": "code", "execution_count": 43, "metadata": { "tags": [] }, "outputs": [ { "data": { "text/html": [ "

root\n",
       "├── bert (BertModel)\n",
       "│   ├── embeddings (BertEmbeddings)\n",
       "│   │   ├── word_embeddings (Embedding) weight:[30522, 768]\n",
       "│   │   ├── position_embeddings (Embedding) weight:[512, 768]\n",
       "│   │   ├── token_type_embeddings (Embedding) weight:[2, 768]\n",
       "│   │   └── LayerNorm (LayerNorm) weight:[768] bias:[768]\n",
       "│   ├── encoder (BertEncoder)\n",
       "│   │   └── layer (ModuleList)\n",
       "│   │       └── 0-11(BertLayer)\n",
       "│   │           ├── attention (BertAttention)\n",
       "│   │           │   ├── self (BertSelfAttention)\n",
       "│   │           │   │   ├── query,key,value(Linear) weight:[768, 768] bias:[768]\n",
       "│   │           │   │   └── prefix_tuning (PrefixTuningShim)\n",
       "│   │           │   │       └── pool (PrefixTuningPool)\n",
       "│   │           │   └── output (BertSelfOutput)\n",
       "│   │           │       ├── dense (Linear) weight:[768, 768] bias:[768]\n",
       "│   │           │       └── LayerNorm (LayerNorm) weight:[768] bias:[768]\n",
       "│   │           ├── intermediate (BertIntermediate)\n",
       "│   │           │   └── dense (Linear) weight:[3072, 768] bias:[3072]\n",
       "│   │           └── output (BertOutput)\n",
       "│   │               ├── dense (Linear) weight:[768, 3072] bias:[768]\n",
       "│   │               └── LayerNorm (LayerNorm) weight:[768] bias:[768]\n",
       "│   ├── pooler (BertPooler)\n",
       "│   │   └── dense (Linear) weight:[768, 768] bias:[768]\n",
       "│   └── prefix_tuning (PrefixTuningPool)\n",
       "└── classifier (Linear) weight:[2, 768] bias:[2]\n",
       "
\n" ], "text/plain": [ "\u001b[37mroot\u001b[0m\n", "├── \u001b[37mbert \u001b[0m\u001b[32m(BertModel)\u001b[0m\n", "│ ├── \u001b[37membeddings \u001b[0m\u001b[32m(BertEmbeddings)\u001b[0m\n", "│ │ ├── \u001b[37mword_embeddings \u001b[0m\u001b[32m(Embedding) \u001b[0m\u001b[36mweight:[30522, 768]\u001b[0m\n", "│ │ ├── \u001b[37mposition_embeddings \u001b[0m\u001b[32m(Embedding) \u001b[0m\u001b[36mweight:[512, 768]\u001b[0m\n", "│ │ ├── \u001b[37mtoken_type_embeddings \u001b[0m\u001b[32m(Embedding) \u001b[0m\u001b[36mweight:[2, 768]\u001b[0m\n", "│ │ └── \u001b[37mLayerNorm \u001b[0m\u001b[32m(LayerNorm) \u001b[0m\u001b[36mweight:[768] \u001b[0m\u001b[36mbias:[768]\u001b[0m\n", "│ ├── \u001b[37mencoder \u001b[0m\u001b[32m(BertEncoder)\u001b[0m\n", "│ │ └── \u001b[37mlayer \u001b[0m\u001b[32m(ModuleList)\u001b[0m\n", "│ │ └── \u001b[31m0-11\u001b[0m\u001b[32m(BertLayer)\u001b[0m\n", "│ │ ├── \u001b[37mattention \u001b[0m\u001b[32m(BertAttention)\u001b[0m\n", "│ │ │ ├── \u001b[37mself \u001b[0m\u001b[32m(BertSelfAttention)\u001b[0m\n", "│ │ │ │ ├── \u001b[31mquery,key,value\u001b[0m\u001b[32m(Linear) \u001b[0m\u001b[36mweight:[768, 768] \u001b[0m\u001b[36mbias:[768]\u001b[0m\n", "│ │ │ │ └── \u001b[37mprefix_tuning \u001b[0m\u001b[32m(PrefixTuningShim)\u001b[0m\n", "│ │ │ │ └── \u001b[37mpool \u001b[0m\u001b[32m(PrefixTuningPool)\u001b[0m\n", "│ │ │ └── \u001b[37moutput \u001b[0m\u001b[32m(BertSelfOutput)\u001b[0m\n", "│ │ │ ├── \u001b[37mdense \u001b[0m\u001b[32m(Linear) \u001b[0m\u001b[36mweight:[768, 768] \u001b[0m\u001b[36mbias:[768]\u001b[0m\n", "│ │ │ └── \u001b[37mLayerNorm \u001b[0m\u001b[32m(LayerNorm) \u001b[0m\u001b[36mweight:[768] \u001b[0m\u001b[36mbias:[768]\u001b[0m\n", "│ │ ├── \u001b[37mintermediate \u001b[0m\u001b[32m(BertIntermediate)\u001b[0m\n", "│ │ │ └── \u001b[37mdense \u001b[0m\u001b[32m(Linear) \u001b[0m\u001b[36mweight:[3072, 768] \u001b[0m\u001b[36mbias:[3072]\u001b[0m\n", "│ │ └── \u001b[37moutput \u001b[0m\u001b[32m(BertOutput)\u001b[0m\n", "│ │ ├── \u001b[37mdense \u001b[0m\u001b[32m(Linear) \u001b[0m\u001b[36mweight:[768, 3072] \u001b[0m\u001b[36mbias:[768]\u001b[0m\n", "│ │ └── \u001b[37mLayerNorm \u001b[0m\u001b[32m(LayerNorm) \u001b[0m\u001b[36mweight:[768] \u001b[0m\u001b[36mbias:[768]\u001b[0m\n", "│ ├── \u001b[37mpooler \u001b[0m\u001b[32m(BertPooler)\u001b[0m\n", "│ │ └── \u001b[37mdense \u001b[0m\u001b[32m(Linear) \u001b[0m\u001b[36mweight:[768, 768] \u001b[0m\u001b[36mbias:[768]\u001b[0m\n", "│ └── \u001b[37mprefix_tuning \u001b[0m\u001b[32m(PrefixTuningPool)\u001b[0m\n", "└── \u001b[37mclassifier \u001b[0m\u001b[32m(Linear) \u001b[0m\u001b[36mweight:[2, 768] \u001b[0m\u001b[36mbias:[2]\u001b[0m\n" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "Visualization(base_model).structure_graph();" ] }, { "cell_type": "code", "execution_count": 44, "metadata": { "tags": [] }, "outputs": [], "source": [ "delta_model = AdapterModel(base_model, bottleneck_dim=48)\n", "# leave the delta tuning modules and the newly initialized classification head tunable.\n", "delta_model.freeze_module(exclude=[\"deltas\", \"classifier\"])" ] }, { "cell_type": "code", "execution_count": 45, "metadata": { "tags": [] }, "outputs": [ { "data": { "text/html": [ "
root\n",
       "├── bert (BertModel)\n",
       "│   ├── embeddings (BertEmbeddings)\n",
       "│   │   ├── word_embeddings (Embedding) weight:[30522, 768]\n",
       "│   │   ├── position_embeddings (Embedding) weight:[512, 768]\n",
       "│   │   ├── token_type_embeddings (Embedding) weight:[2, 768]\n",
       "│   │   └── LayerNorm (LayerNorm) weight:[768] bias:[768]\n",
       "│   ├── encoder (BertEncoder)\n",
       "│   │   └── layer (ModuleList)\n",
       "│   │       └── 0-11(BertLayer)\n",
       "│   │           ├── attention (BertAttention)\n",
       "│   │           │   ├── self (BertSelfAttention)\n",
       "│   │           │   │   ├── query,key,value(Linear) weight:[768, 768] bias:[768]\n",
       "│   │           │   │   └── prefix_tuning (PrefixTuningShim)\n",
       "│   │           │   │       └── pool (PrefixTuningPool)\n",
       "│   │           │   └── output (BertSelfOutput)\n",
       "│   │           │       ├── dense (Linear) weight:[768, 768] bias:[768]\n",
       "│   │           │       │   └── adapter (AdapterLayer)\n",
       "│   │           │       │       └── modulelist (Sequential)\n",
       "│   │           │       │           ├── down_proj (Linear) weight:[48, 768] bias:[48]\n",
       "│   │           │       │           └── up_proj (Linear) weight:[768, 48] bias:[768]\n",
       "│   │           │       └── LayerNorm (LayerNorm) weight:[768] bias:[768]\n",
       "│   │           ├── intermediate (BertIntermediate)\n",
       "│   │           │   └── dense (Linear) weight:[3072, 768] bias:[3072]\n",
       "│   │           └── output (BertOutput)\n",
       "│   │               ├── dense (Linear) weight:[768, 3072] bias:[768]\n",
       "│   │               │   └── adapter (AdapterLayer)\n",
       "│   │               │       └── modulelist (Sequential)\n",
       "│   │               │           ├── down_proj (Linear) weight:[48, 768] bias:[48]\n",
       "│   │               │           └── up_proj (Linear) weight:[768, 48] bias:[768]\n",
       "│   │               └── LayerNorm (LayerNorm) weight:[768] bias:[768]\n",
       "│   ├── pooler (BertPooler)\n",
       "│   │   └── dense (Linear) weight:[768, 768] bias:[768]\n",
       "│   └── prefix_tuning (PrefixTuningPool)\n",
       "└── classifier (Linear) weight:[2, 768] bias:[2]\n",
       "
\n" ], "text/plain": [ "\u001b[37mroot\u001b[0m\n", "├── \u001b[37mbert \u001b[0m\u001b[32m(BertModel)\u001b[0m\n", "│ ├── \u001b[37membeddings \u001b[0m\u001b[32m(BertEmbeddings)\u001b[0m\n", "│ │ ├── \u001b[37mword_embeddings \u001b[0m\u001b[32m(Embedding) \u001b[0m\u001b[38;2;0;70;100mweight:[30522, 768]\u001b[0m\n", "│ │ ├── \u001b[37mposition_embeddings \u001b[0m\u001b[32m(Embedding) \u001b[0m\u001b[38;2;0;70;100mweight:[512, 768]\u001b[0m\n", "│ │ ├── \u001b[37mtoken_type_embeddings \u001b[0m\u001b[32m(Embedding) \u001b[0m\u001b[38;2;0;70;100mweight:[2, 768]\u001b[0m\n", "│ │ └── \u001b[37mLayerNorm \u001b[0m\u001b[32m(LayerNorm) \u001b[0m\u001b[38;2;0;70;100mweight:[768] \u001b[0m\u001b[38;2;0;70;100mbias:[768]\u001b[0m\n", "│ ├── \u001b[37mencoder \u001b[0m\u001b[32m(BertEncoder)\u001b[0m\n", "│ │ └── \u001b[37mlayer \u001b[0m\u001b[32m(ModuleList)\u001b[0m\n", "│ │ └── \u001b[31m0-11\u001b[0m\u001b[32m(BertLayer)\u001b[0m\n", "│ │ ├── \u001b[37mattention \u001b[0m\u001b[32m(BertAttention)\u001b[0m\n", "│ │ │ ├── \u001b[37mself \u001b[0m\u001b[32m(BertSelfAttention)\u001b[0m\n", "│ │ │ │ ├── \u001b[31mquery,key,value\u001b[0m\u001b[32m(Linear) \u001b[0m\u001b[38;2;0;70;100mweight:[768, 768] \u001b[0m\u001b[38;2;0;70;100mbias:[768]\u001b[0m\n", "│ │ │ │ └── \u001b[37mprefix_tuning \u001b[0m\u001b[32m(PrefixTuningShim)\u001b[0m\n", "│ │ │ │ └── \u001b[37mpool \u001b[0m\u001b[32m(PrefixTuningPool)\u001b[0m\n", "│ │ │ └── \u001b[37moutput \u001b[0m\u001b[32m(BertSelfOutput)\u001b[0m\n", "│ │ │ ├── \u001b[37mdense \u001b[0m\u001b[32m(Linear) \u001b[0m\u001b[38;2;0;70;100mweight:[768, 768] \u001b[0m\u001b[38;2;0;70;100mbias:[768]\u001b[0m\n", "│ │ │ │ └── \u001b[37madapter \u001b[0m\u001b[32m(AdapterLayer)\u001b[0m\n", "│ │ │ │ └── \u001b[37mmodulelist \u001b[0m\u001b[32m(Sequential)\u001b[0m\n", "│ │ │ │ ├── \u001b[37mdown_proj \u001b[0m\u001b[32m(Linear) \u001b[0m\u001b[38;2;175;0;255mweight:[48, 768] \u001b[0m\u001b[38;2;175;0;255mbias:[48]\u001b[0m\n", "│ │ │ │ └── \u001b[37mup_proj \u001b[0m\u001b[32m(Linear) \u001b[0m\u001b[38;2;175;0;255mweight:[768, 48] \u001b[0m\u001b[38;2;175;0;255mbias:[768]\u001b[0m\n", "│ │ │ └── \u001b[37mLayerNorm \u001b[0m\u001b[32m(LayerNorm) \u001b[0m\u001b[38;2;0;70;100mweight:[768] \u001b[0m\u001b[38;2;0;70;100mbias:[768]\u001b[0m\n", "│ │ ├── \u001b[37mintermediate \u001b[0m\u001b[32m(BertIntermediate)\u001b[0m\n", "│ │ │ └── \u001b[37mdense \u001b[0m\u001b[32m(Linear) \u001b[0m\u001b[38;2;0;70;100mweight:[3072, 768] \u001b[0m\u001b[38;2;0;70;100mbias:[3072]\u001b[0m\n", "│ │ └── \u001b[37moutput \u001b[0m\u001b[32m(BertOutput)\u001b[0m\n", "│ │ ├── \u001b[37mdense \u001b[0m\u001b[32m(Linear) \u001b[0m\u001b[38;2;0;70;100mweight:[768, 3072] \u001b[0m\u001b[38;2;0;70;100mbias:[768]\u001b[0m\n", "│ │ │ └── \u001b[37madapter \u001b[0m\u001b[32m(AdapterLayer)\u001b[0m\n", "│ │ │ └── \u001b[37mmodulelist \u001b[0m\u001b[32m(Sequential)\u001b[0m\n", "│ │ │ ├── \u001b[37mdown_proj \u001b[0m\u001b[32m(Linear) \u001b[0m\u001b[38;2;175;0;255mweight:[48, 768] \u001b[0m\u001b[38;2;175;0;255mbias:[48]\u001b[0m\n", "│ │ │ └── \u001b[37mup_proj \u001b[0m\u001b[32m(Linear) \u001b[0m\u001b[38;2;175;0;255mweight:[768, 48] \u001b[0m\u001b[38;2;175;0;255mbias:[768]\u001b[0m\n", "│ │ └── \u001b[37mLayerNorm \u001b[0m\u001b[32m(LayerNorm) \u001b[0m\u001b[38;2;0;70;100mweight:[768] \u001b[0m\u001b[38;2;0;70;100mbias:[768]\u001b[0m\n", "│ ├── \u001b[37mpooler \u001b[0m\u001b[32m(BertPooler)\u001b[0m\n", "│ │ └── \u001b[37mdense \u001b[0m\u001b[32m(Linear) \u001b[0m\u001b[38;2;0;70;100mweight:[768, 768] \u001b[0m\u001b[38;2;0;70;100mbias:[768]\u001b[0m\n", "│ └── \u001b[37mprefix_tuning \u001b[0m\u001b[32m(PrefixTuningPool)\u001b[0m\n", "└── \u001b[37mclassifier \u001b[0m\u001b[32m(Linear) \u001b[0m\u001b[36mweight:[2, 768] \u001b[0m\u001b[36mbias:[2]\u001b[0m\n" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "Visualization(base_model).structure_graph();" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2023-08-13T16:06:44.674950Z", "start_time": "2023-08-13T16:06:42.233454Z" } }, "outputs": [], "source": [ "from transformers import TrainingArguments, Trainer\n", "from sklearn.metrics import classification_report\n", "\n", "\n", "def compute_metrics(pred):\n", " true_labels = pred.label_ids.ravel()\n", " pred_labels = pred.predictions.argmax(-1).ravel()\n", " report = classification_report(true_labels, pred_labels, output_dict=True)\n", " return {\n", " 'accuracy': report['accuracy'],\n", " 'f1-score-1': report['1']['f1-score'],\n", " 'f1-score-ma': report['macro avg']['f1-score']\n", " }\n", "\n", "\n", "def train_model(input_model, task_name, train_dataset, eval_dataset, col_fn):\n", " training_args = TrainingArguments(\n", " evaluation_strategy=\"epoch\",\n", " save_strategy=\"epoch\",\n", " # The next 2 lines are important to ensure the dataset labels are properly passed to the model\n", " remove_unused_columns=False,\n", " **config.hf_trainer_params.to_dict()\n", " )\n", "\n", " trainer = Trainer(\n", " model=input_model,\n", " args=training_args,\n", " train_dataset=train_dataset,\n", " eval_dataset=eval_dataset,\n", " data_collator=col_fn,\n", " compute_metrics=compute_metrics\n", " )\n", " trainer.train()\n", "\n", "\n", "for task_name in config.tasks:\n", " loader_out = auto_loader.get_and_map(tokenizer_man.tokenizer, task_name)\n", " num_labels = len(loader_out['output']['range'])\n", " train_model(\n", " base_model,\n", " task_name,\n", " loader_out['train'],\n", " loader_out['valid'],\n", " tokenizer_man.get_col_fn()\n", " )" ] }, { "cell_type": "code", "execution_count": 47, "metadata": { "tags": [] }, "outputs": [ { "data": { "text/html": [ "
root\n",
       "├── bert (BertModel)\n",
       "│   ├── embeddings (BertEmbeddings)\n",
       "│   │   ├── word_embeddings (Embedding) weight:[30522, 768]\n",
       "│   │   ├── position_embeddings (Embedding) weight:[512, 768]\n",
       "│   │   ├── token_type_embeddings (Embedding) weight:[2, 768]\n",
       "│   │   └── LayerNorm (LayerNorm) weight:[768] bias:[768]\n",
       "│   ├── encoder (BertEncoder)\n",
       "│   │   └── layer (ModuleList)\n",
       "│   │       └── 0-11(BertLayer)\n",
       "│   │           ├── attention (BertAttention)\n",
       "│   │           │   ├── self (BertSelfAttention)\n",
       "│   │           │   │   ├── query,key,value(Linear) weight:[768, 768] bias:[768]\n",
       "│   │           │   │   └── prefix_tuning (PrefixTuningShim)\n",
       "│   │           │   │       └── pool (PrefixTuningPool)\n",
       "│   │           │   └── output (BertSelfOutput)\n",
       "│   │           │       ├── dense (Linear) weight:[768, 768] bias:[768]\n",
       "│   │           │       └── LayerNorm (LayerNorm) weight:[768] bias:[768]\n",
       "│   │           ├── intermediate (BertIntermediate)\n",
       "│   │           │   └── dense (Linear) weight:[3072, 768] bias:[3072]\n",
       "│   │           └── output (BertOutput)\n",
       "│   │               ├── dense (Linear) weight:[768, 3072] bias:[768]\n",
       "│   │               ├── LayerNorm (LayerNorm) weight:[768] bias:[768]\n",
       "│   │               └── adapters (ModuleDict)\n",
       "│   │                   └── glue:cola (Adapter)\n",
       "│   │                       ├── non_linearity (Activation_Function_Class)\n",
       "│   │                       ├── adapter_down (Sequential)\n",
       "│   │                       │   ├── 0 (Linear) weight:[48, 768] bias:[48]\n",
       "│   │                       │   └── 1 (Activation_Function_Class)\n",
       "│   │                       └── adapter_up (Linear) weight:[768, 48] bias:[768]\n",
       "│   ├── pooler (BertPooler)\n",
       "│   │   └── dense (Linear) weight:[768, 768] bias:[768]\n",
       "│   └── prefix_tuning (PrefixTuningPool)\n",
       "└── heads (ModuleDict)\n",
       "    └── glue:cola (ClassificationHead)\n",
       "        ├── 1 (Linear) weight:[768, 768] bias:[768]\n",
       "        ├── 2 (Activation_Function_Class)\n",
       "        └── 4 (Linear) weight:[2, 768] bias:[2]\n",
       "
\n" ], "text/plain": [ "\u001b[37mroot\u001b[0m\n", "├── \u001b[37mbert \u001b[0m\u001b[32m(BertModel)\u001b[0m\n", "│ ├── \u001b[37membeddings \u001b[0m\u001b[32m(BertEmbeddings)\u001b[0m\n", "│ │ ├── \u001b[37mword_embeddings \u001b[0m\u001b[32m(Embedding) \u001b[0m\u001b[38;2;0;70;100mweight:[30522, 768]\u001b[0m\n", "│ │ ├── \u001b[37mposition_embeddings \u001b[0m\u001b[32m(Embedding) \u001b[0m\u001b[38;2;0;70;100mweight:[512, 768]\u001b[0m\n", "│ │ ├── \u001b[37mtoken_type_embeddings \u001b[0m\u001b[32m(Embedding) \u001b[0m\u001b[38;2;0;70;100mweight:[2, 768]\u001b[0m\n", "│ │ └── \u001b[37mLayerNorm \u001b[0m\u001b[32m(LayerNorm) \u001b[0m\u001b[38;2;0;70;100mweight:[768] \u001b[0m\u001b[38;2;0;70;100mbias:[768]\u001b[0m\n", "│ ├── \u001b[37mencoder \u001b[0m\u001b[32m(BertEncoder)\u001b[0m\n", "│ │ └── \u001b[37mlayer \u001b[0m\u001b[32m(ModuleList)\u001b[0m\n", "│ │ └── \u001b[31m0-11\u001b[0m\u001b[32m(BertLayer)\u001b[0m\n", "│ │ ├── \u001b[37mattention \u001b[0m\u001b[32m(BertAttention)\u001b[0m\n", "│ │ │ ├── \u001b[37mself \u001b[0m\u001b[32m(BertSelfAttention)\u001b[0m\n", "│ │ │ │ ├── \u001b[31mquery,key,value\u001b[0m\u001b[32m(Linear) \u001b[0m\u001b[38;2;0;70;100mweight:[768, 768] \u001b[0m\u001b[38;2;0;70;100mbias:[768]\u001b[0m\n", "│ │ │ │ └── \u001b[37mprefix_tuning \u001b[0m\u001b[32m(PrefixTuningShim)\u001b[0m\n", "│ │ │ │ └── \u001b[37mpool \u001b[0m\u001b[32m(PrefixTuningPool)\u001b[0m\n", "│ │ │ └── \u001b[37moutput \u001b[0m\u001b[32m(BertSelfOutput)\u001b[0m\n", "│ │ │ ├── \u001b[37mdense \u001b[0m\u001b[32m(Linear) \u001b[0m\u001b[38;2;0;70;100mweight:[768, 768] \u001b[0m\u001b[38;2;0;70;100mbias:[768]\u001b[0m\n", "│ │ │ └── \u001b[37mLayerNorm \u001b[0m\u001b[32m(LayerNorm) \u001b[0m\u001b[38;2;0;70;100mweight:[768] \u001b[0m\u001b[38;2;0;70;100mbias:[768]\u001b[0m\n", "│ │ ├── \u001b[37mintermediate \u001b[0m\u001b[32m(BertIntermediate)\u001b[0m\n", "│ │ │ └── \u001b[37mdense \u001b[0m\u001b[32m(Linear) \u001b[0m\u001b[38;2;0;70;100mweight:[3072, 768] \u001b[0m\u001b[38;2;0;70;100mbias:[3072]\u001b[0m\n", "│ │ └── \u001b[37moutput \u001b[0m\u001b[32m(BertOutput)\u001b[0m\n", "│ │ ├── \u001b[37mdense \u001b[0m\u001b[32m(Linear) \u001b[0m\u001b[38;2;0;70;100mweight:[768, 3072] \u001b[0m\u001b[38;2;0;70;100mbias:[768]\u001b[0m\n", "│ │ ├── \u001b[37mLayerNorm \u001b[0m\u001b[32m(LayerNorm) \u001b[0m\u001b[38;2;0;70;100mweight:[768] \u001b[0m\u001b[38;2;0;70;100mbias:[768]\u001b[0m\n", "│ │ └── \u001b[37madapters \u001b[0m\u001b[32m(ModuleDict)\u001b[0m\n", "│ │ └── \u001b[37mglue:cola \u001b[0m\u001b[32m(Adapter)\u001b[0m\n", "│ │ ├── \u001b[37mnon_linearity \u001b[0m\u001b[32m(Activation_Function_Class)\u001b[0m\n", "│ │ ├── \u001b[37madapter_down \u001b[0m\u001b[32m(Sequential)\u001b[0m\n", "│ │ │ ├── \u001b[37m0 \u001b[0m\u001b[32m(Linear) \u001b[0m\u001b[36mweight:[48, 768] \u001b[0m\u001b[36mbias:[48]\u001b[0m\n", "│ │ │ └── \u001b[37m1 \u001b[0m\u001b[32m(Activation_Function_Class)\u001b[0m\n", "│ │ └── \u001b[37madapter_up \u001b[0m\u001b[32m(Linear) \u001b[0m\u001b[36mweight:[768, 48] \u001b[0m\u001b[36mbias:[768]\u001b[0m\n", "│ ├── \u001b[37mpooler \u001b[0m\u001b[32m(BertPooler)\u001b[0m\n", "│ │ └── \u001b[37mdense \u001b[0m\u001b[32m(Linear) \u001b[0m\u001b[38;2;0;70;100mweight:[768, 768] \u001b[0m\u001b[38;2;0;70;100mbias:[768]\u001b[0m\n", "│ └── \u001b[37mprefix_tuning \u001b[0m\u001b[32m(PrefixTuningPool)\u001b[0m\n", "└── \u001b[37mheads \u001b[0m\u001b[32m(ModuleDict)\u001b[0m\n", " └── \u001b[37mglue:cola \u001b[0m\u001b[32m(ClassificationHead)\u001b[0m\n", " ├── \u001b[37m1 \u001b[0m\u001b[32m(Linear) \u001b[0m\u001b[36mweight:[768, 768] \u001b[0m\u001b[36mbias:[768]\u001b[0m\n", " ├── \u001b[37m2 \u001b[0m\u001b[32m(Activation_Function_Class)\u001b[0m\n", " └── \u001b[37m4 \u001b[0m\u001b[32m(Linear) \u001b[0m\u001b[36mweight:[2, 768] \u001b[0m\u001b[36mbias:[2]\u001b[0m\n" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "Visualization(adapter_wrapper.model).structure_graph();" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2023-08-15T13:11:54.968862Z", "start_time": "2023-08-15T13:11:54.946870Z" } }, "outputs": [], "source": [ "results" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2023-08-15T13:23:50.492273Z", "start_time": "2023-08-15T13:22:40.985364Z" } }, "outputs": [], "source": [ "from _datasets import GLUEHelper\n", " \n", "gl_helper = GLUEHelper()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2023-08-15T13:46:17.380290Z", "start_time": "2023-08-15T13:46:17.346993Z" } }, "outputs": [], "source": [ "for n in range(0, 1000):\n", " out = gl_helper.datasets['stsb']['train'][n]\n", " if out['label'] == 0.:\n", " print(out)\n", " break" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from evaluate import load\n", "glue_metric = load('glue', 'stsb')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "results = glue_metric.compute(predictions=[-0.5, -0.3], references=[-0.5, 1])\n", "results" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2023-08-13T18:17:59.084998Z", "start_time": "2023-08-13T18:17:59.050653Z" } }, "outputs": [], "source": [ "gl_helper.datasets['mnli']" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2023-08-13T18:17:59.157406Z", "start_time": "2023-08-13T18:17:59.081370Z" } }, "outputs": [], "source": [ "gl_helper.datasets['mnli_matched']\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2023-08-13T18:18:01.203910Z", "start_time": "2023-08-13T18:18:01.171842Z" } }, "outputs": [], "source": [ "gl_helper.datasets['mnli_mismatched']\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2023-08-13T18:30:16.905587Z", "start_time": "2023-08-13T18:30:16.775197Z" } }, "outputs": [], "source": [ "import transformers\n", "\n", "\n", "print(transformers.__version__)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2023-08-13T18:29:49.383120Z", "start_time": "2023-08-13T18:29:40.017083Z" } }, "outputs": [], "source": [ "pip install adapter-transformers" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python [conda env:lll]", "language": "python", "name": "conda-env-lll-py" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.17" } }, "nbformat": 4, "nbformat_minor": 4 }