In [13]:
import torch
from transformers import GPT2ForSequenceClassification, GPT2TokenizerFast, GPT2Model
from utils import print_system_info

print_system_info()

Python version is: 3.10.11
Torch version is: 1.13.1+cu117
Nvidia device is: NVIDIA GeForce RTX 4090
Transformers version is: 4.32.1
Adapterhub not found!!!


In [14]:
DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
MODEL_NAME = 'gpt2'

In [15]:
tokenizer = GPT2TokenizerFast.from_pretrained(MODEL_NAME, add_prefix_space=True, padding_side='left')
tokenizer.pad_token = tokenizer.eos_token

model = GPT2ForSequenceClassification.from_pretrained(MODEL_NAME, pad_token_id=tokenizer.pad_token_id)

Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at gpt2 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [16]:
from peft import get_peft_model, PromptTuningConfig, PromptTuningInit, TaskType

peft_config = PromptTuningConfig(
    task_type=TaskType.SEQ_CLS,
    prompt_tuning_init=PromptTuningInit.TEXT,
    num_virtual_tokens=10,
    prompt_tuning_init_text="sentiment or value or relation of the previous text is",
    tokenizer_name_or_path=MODEL_NAME
)

peft_model = get_peft_model(model, peft_config)
peft_model.print_trainable_parameters()

trainable params: 10,752 || all params: 124,450,560 || trainable%: 0.008639575426579036


In [17]:
from datasets import load_dataset
dataset = load_dataset('glue', 'cola')
dataset = dataset.map(lambda x: tokenizer(x['sentence']), batched=True)
dataset.set_format(type='torch', columns=[
    'input_ids', 'attention_mask', 'label' # 'token_type_ids',
])

Found cached dataset glue (/home/mohalisad/.cache/huggingface/datasets/glue/cola/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)


  0%|          | 0/3 [00:00<?, ?it/s]

Map:   0%|          | 0/8551 [00:00<?, ? examples/s]

Map:   0%|          | 0/1043 [00:00<?, ? examples/s]

Map:   0%|          | 0/1063 [00:00<?, ? examples/s]

In [None]:
from transformers import TrainingArguments, Trainer, DataCollatorWithPadding
from sklearn.metrics import classification_report


def compute_metrics(pred):
    true_labels = pred.label_ids.ravel()
    pred_labels = pred.predictions.argmax(-1).ravel()
    report = classification_report(true_labels, pred_labels, output_dict=True)
    return {
        'accuracy': report['accuracy'],
        'f1-score-1': report['1']['f1-score'],
        'f1-score-ma': report['macro avg']['f1-score']
    }

col_fn = DataCollatorWithPadding(
    tokenizer, return_tensors='pt', padding='longest'
)

training_args = TrainingArguments(
    evaluation_strategy="epoch",
    save_strategy="epoch",
    # The next 2 lines are important to ensure the dataset labels are properly passed to the model
    remove_unused_columns=False,
    **{
        'output_dir': '/disks/part4/trash',
         'num_train_epochs': 160,
         'learning_rate': 0.00001,
         'per_device_train_batch_size': 32,
         'per_device_eval_batch_size': 32
    }
)

trainer = Trainer(
    model=peft_model, # manager.current_mix_model
    args=training_args,
    train_dataset=dataset['train'],
    eval_dataset=dataset['validation'],
    data_collator=col_fn,
    compute_metrics=compute_metrics
)
# trainer.label_names = ['labels']
trainer.train()

You're using a GPT2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
GPT2ForSequenceClassification will not detect padding tokens in `inputs_embeds`. Results may be unexpected if using padding tokens in conjunction with `inputs_embeds.`


Epoch,Training Loss,Validation Loss,Accuracy,F1-score-1,F1-score-ma
1,No log,0.659724,0.692234,0.817717,0.415012
2,0.825400,0.643644,0.692234,0.817717,0.415012
3,0.825400,0.634887,0.689358,0.81549,0.416836
4,0.673500,0.632855,0.691275,0.817253,0.411713
5,0.673500,0.63205,0.692234,0.817924,0.412058
6,0.663300,0.630056,0.692234,0.817924,0.412058
7,0.663300,0.627022,0.692234,0.817924,0.412058
8,0.642900,0.625471,0.692234,0.817924,0.412058
9,0.642900,0.624368,0.692234,0.817924,0.412058
10,0.629500,0.625301,0.692234,0.817924,0.412058


GPT2ForSequenceClassification will not detect padding tokens in `inputs_embeds`. Results may be unexpected if using padding tokens in conjunction with `inputs_embeds.`
GPT2ForSequenceClassification will not detect padding tokens in `inputs_embeds`. Results may be unexpected if using padding tokens in conjunction with `inputs_embeds.`
GPT2ForSequenceClassification will not detect padding tokens in `inputs_embeds`. Results may be unexpected if using padding tokens in conjunction with `inputs_embeds.`
GPT2ForSequenceClassification will not detect padding tokens in `inputs_embeds`. Results may be unexpected if using padding tokens in conjunction with `inputs_embeds.`
GPT2ForSequenceClassification will not detect padding tokens in `inputs_embeds`. Results may be unexpected if using padding tokens in conjunction with `inputs_embeds.`
GPT2ForSequenceClassification will not detect padding tokens in `inputs_embeds`. Results may be unexpected if using padding tokens in conjunction with `inputs_e