123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125 |
- from config import Config
- import os
- import random
- import numpy as np
- import torch
- import wandb
- import logging
- import transformers
- import warnings
- import subprocess
- from model import load_model, prepare_model, get_number_of_trainable_parameters, load_model_weights, get_number_of_parameters
- from data import load_dataset, load_dataloaders
- from train import Trainer, generate_evaluation_output, save_evaluation_output
- from utils import clean_hyperparameters, copy_model_weights
-
- warnings.filterwarnings("ignore", "Using a non-full backward hook when the forward contains multiple autograd Nodes")
-
- transformers.logging.set_verbosity_error()
-
- def set_seeds(seed: int):
- os.environ['PYTHONHASHSEED'] = str(seed)
- random.seed(seed)
- np.random.seed(seed)
- torch.manual_seed(seed)
- torch.cuda.manual_seed(seed)
- torch.cuda.manual_seed_all(seed) # if you are using multi-GPU.
- transformers.set_seed(seed)
-
-
- def run_metric_script(file_path):
- result = subprocess.run(["e2e/measure_scores.py", "-p", "e2e_ref.txt", file_path], stdout=subprocess.PIPE)
- output = result.stdout.decode('utf-8')
- lines = output.split('\n')
- return lines[-7:-2]
-
-
- def main(cfg):
- set_seeds(cfg.seed)
-
- model, tokenizer = load_model(cfg.model_name, cache_dir=cfg.model_cache_path)
- model = prepare_model(model, cfg)
- num_of_all_params = get_number_of_parameters(model)
- num_of_trainbale_params = get_number_of_trainable_parameters(model)
- percentage = round(100 * num_of_trainbale_params / num_of_all_params, 2)
- logging.info(f"Model loaded successfully and number of trainable params is: {num_of_trainbale_params} out of {num_of_all_params}")
- logging.info(f"Percentage of trainable parameters: {percentage} %")
-
- dataset = load_dataset(cfg.dataset, cfg.media_path, cfg.toy_example)
- cfg.train_data_size = len(dataset["train"])
- # dataset = tokenize_dataset(tokenizer, dataset, cfg.dataset, cfg.seq_length)
- train_loader, validation_loader = load_dataloaders(dataset, cfg.dataset, cfg.batch_size, cfg.virtual_batch_size, tokenizer, cfg.seq_length, cfg.dp)
- logging.info("Dataset loaded and tokenized")
-
- trainer = Trainer(cfg, model, train_loader)
- trainer.train_and_evaluate(cfg.epochs, train_loader, validation_loader)
-
- if cfg.two_step_training and cfg.dp:
- trainer.privacy_engine.save_checkpoint(path="temp.pth", module=model)
- model_two, _ = load_model(cfg.model_name, cache_dir=cfg.model_cache_path)
- model_two = prepare_model(model_two, cfg)
- copy_model_weights(model, model_two)
- del model
- model = model_two
- for a, b in model.named_parameters():
- if 'bias' in a and not 'adapter' in a:
- b.requires_grad = True
- else:
- b.requires_grad = False
- logging.info("New Model adjusted")
- num_of_all_params = get_number_of_parameters(model)
- num_of_trainbale_params = get_number_of_trainable_parameters(model)
- percentage = round(100 * num_of_trainbale_params / num_of_all_params, 2)
- logging.info(f"New Model loaded successfully and number of trainable params is: {num_of_trainbale_params} out of {num_of_all_params}")
- logging.info(f"Percentage of trainable parameters: {percentage} %")
- trainer_two = Trainer(cfg, model, train_loader, second_trainer=True)
- trainer_two.train_and_evaluate(cfg.epochs_two, train_loader, validation_loader)
-
- # evaluate model on test data
- model.eval()
- model = load_model_weights(model, cfg.peft_mode, f"{trainer.save_path}/{trainer.model_name}.pth")
- evaluation_output = generate_evaluation_output(model, tokenizer, dataset["test"], cfg.device, cfg.seq_length, cfg.beam_size)
- output_path = f"{cfg.media_path}generation_eval_outputs/{cfg.dataset}/{cfg.peft_mode}"
- if not os.path.exists(output_path):
- os.makedirs(output_path)
- output_name = cfg.run_name if cfg.run_name else "generation_output"
- save_evaluation_output(evaluation_output, f"{output_path}/{output_name}-v1.txt")
- evaluation_output = generate_evaluation_output(model, tokenizer, dataset["test"], cfg.device, cfg.seq_length, cfg.beam_size, do_sample=True)
- save_evaluation_output(evaluation_output, f"{output_path}/{output_name}-v2.txt")
- logging.info("Generation for test data saved")
- metrics = run_metric_script(f"{output_path}/{output_name}-v1.txt")
- logging.info("Metrics without sampling:")
- for metric in metrics:
- logging.info(metric)
- metrics = run_metric_script(f"{output_path}/{output_name}-v2.txt")
- logging.info("Metrics with sampling:")
- for metric in metrics:
- logging.info(metric)
-
- if cfg.use_wandb:
- wandb.finish()
-
- if __name__ == "__main__":
- cfg = Config().args
-
- log_path = f"logs/{cfg.dataset}/{cfg.peft_mode}/"
- if not os.path.exists(log_path):
- os.makedirs(log_path)
- log_file_name = f"{cfg.run_name}.log" if cfg.run_name else "logs.log"
-
- if cfg.use_wandb:
- wandb.login(key="YOUR_KEY")
- if cfg.run_name:
- wandb.init(config=cfg, project=f"{cfg.wandb_project_name}-{cfg.dataset}", name=cfg.run_name)
- else:
- wandb.init(config=cfg, project=f"{cfg.wandb_project_name}-{cfg.dataset}")
- log_file_name = wandb.run.name
-
- logging.basicConfig(filename=f"{log_path}{log_file_name}", level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', force=True)
- logging.info("Start of the logging")
- hyperparameters = {key: value for key, value in vars(cfg).items()}
- hyperparameters = clean_hyperparameters(hyperparameters)
- hyperparameters_str = "\n".join([f"{key}: {value}" for key, value in hyperparameters.items()])
- logging.info("config:\n" + hyperparameters_str)
-
- main(cfg)
|