,summary,config,name 0,"{'_step': 79, '_wandb': {'runtime': 837}, '_runtime': 834.6212244033813, '_timestamp': 1695328162.5200074, 'train_loss': 0.14249593541026115, 'valid_mean': 0.5492957746478874, 'valid_accuracy': 0.5492957746478874}","{'tasks': ['glue:wnli', 'glue:rte', 'glue:mrpc'], 'use_tqdm': True, 'model_name': 'google/t5-base-lm-adapt', 'num_epochs': 80, 'best_finder': {'save': True, 'metric': 'valid_mean', 'higher_better': True}, 'hot_modules': None, 'peft_params': None, 'random_seed': 42, 'weight_decay': 0.01, 'learning_rate': 1e-05, 'base_save_path': '/home/msadraei/trained_final', 'remove_dropout': True, 'train_batch_size': 32, 'valid_batch_size': 32, 'project_name_prefix': 'hzi_cluster', 'experiment_name_suffix': None}",full 1,"{'_step': 79, '_wandb': {'runtime': 372}, '_runtime': 373.980761051178, '_timestamp': 1695319551.4411, 'train_loss': 0.15845297500491143, 'valid_mean': 0.5633802816901409, 'valid_accuracy': 0.5633802816901409}","{'tasks': ['glue:wnli', 'glue:rte', 'glue:mrpc'], 'use_tqdm': True, 'model_name': 'google/t5-base-lm-adapt', 'num_epochs': 80, 'best_finder': {'save': True, 'metric': 'valid_mean', 'higher_better': True}, 'hot_modules': ['sadcl', 'classifier'], 'peft_params': {'kind': 'combine', 'n_tokens': 10, 'radnom_init': True, 'n_comb_tokens': 8}, 'random_seed': 42, 'weight_decay': 0.01, 'learning_rate': 0.01, 'base_save_path': '/home/msadraei/trained_final', 'remove_dropout': True, 'train_batch_size': 32, 'valid_batch_size': 32, 'project_name_prefix': 'hzi_cluster', 'experiment_name_suffix': 'random'}",10_combine_8_random 2,"{'_timestamp': 1695314124.8870673, 'train_loss': 0.1371849663555622, 'valid_mean': 0.43661971830985913, 'valid_accuracy': 0.43661971830985913, '_step': 79, '_wandb': {'runtime': 372}, '_runtime': 373.63361120224}","{'tasks': ['glue:wnli', 'glue:rte', 'glue:mrpc'], 'use_tqdm': True, 'model_name': 'google/t5-base-lm-adapt', 'num_epochs': 80, 'best_finder': {'save': True, 'metric': 'valid_mean', 'higher_better': True}, 'hot_modules': ['sadcl', 'classifier'], 'peft_params': {'kind': 'combine', 'n_tokens': 10, 'radnom_init': True, 'n_comb_tokens': 128}, 'random_seed': 42, 'weight_decay': 0.01, 'learning_rate': 0.01, 'base_save_path': '/home/msadraei/trained_final', 'remove_dropout': True, 'train_batch_size': 32, 'valid_batch_size': 32, 'project_name_prefix': 'hzi_cluster', 'experiment_name_suffix': 'random'}",10_combine_128_random 3,"{'valid_mean': 0.5633802816901409, 'valid_accuracy': 0.5633802816901409, '_step': 79, '_wandb': {'runtime': 389}, '_runtime': 389.9232409000397, '_timestamp': 1695309065.9015949, 'train_loss': 0.17796048820018767}","{'tasks': ['glue:wnli', 'glue:rte', 'glue:mrpc'], 'use_tqdm': True, 'model_name': 'google/t5-base-lm-adapt', 'num_epochs': 80, 'best_finder': {'save': True, 'metric': 'valid_mean', 'higher_better': True}, 'hot_modules': ['sadcl', 'classifier'], 'peft_params': {'kind': 'residual', 'mlp_size': 128, 'n_tokens': 10}, 'random_seed': 42, 'weight_decay': 0.01, 'learning_rate': 0.3, 'base_save_path': '/home/msadraei/trained_final', 'remove_dropout': False, 'train_batch_size': 32, 'valid_batch_size': 32, 'project_name_prefix': 'hzi_cluster', 'experiment_name_suffix': 'dropout'}",10_residual_128_dropout 4,"{'train_loss': 0.749963104724884, 'valid_mean': 0.43661971830985913, 'valid_accuracy': 0.43661971830985913, '_step': 79, '_wandb': {'runtime': 479}, '_runtime': 480.0062892436981, '_timestamp': 1695303861.035812}","{'tasks': ['glue:wnli', 'glue:rte', 'glue:mrpc'], 'use_tqdm': True, 'model_name': 'google/t5-base-lm-adapt', 'num_epochs': 80, 'best_finder': {'save': True, 'metric': 'valid_mean', 'higher_better': True}, 'hot_modules': ['sadcl', 'classifier'], 'peft_params': {'kind': 'simple', 'n_tokens': 10}, 'random_seed': 42, 'weight_decay': 0.01, 'learning_rate': 0.01, 'base_save_path': '/home/msadraei/trained_final', 'remove_dropout': False, 'train_batch_size': 32, 'valid_batch_size': 32, 'project_name_prefix': 'hzi_cluster', 'experiment_name_suffix': 'dropout'}",10_simple_dropout 5,"{'_step': 79, '_wandb': {'runtime': 413}, '_runtime': 414.14359283447266, '_timestamp': 1695298720.0363448, 'train_loss': 0.1991661325097084, 'valid_mean': 0.5633802816901409, 'valid_accuracy': 0.5633802816901409}","{'tasks': ['glue:wnli', 'glue:rte', 'glue:mrpc'], 'use_tqdm': True, 'model_name': 'google/t5-base-lm-adapt', 'num_epochs': 80, 'best_finder': {'save': True, 'metric': 'valid_mean', 'higher_better': True}, 'hot_modules': ['sadcl', 'classifier'], 'peft_params': {'kind': 'combine', 'n_tokens': 10, 'n_comb_tokens': 8}, 'random_seed': 42, 'weight_decay': 0.01, 'learning_rate': 0.01, 'base_save_path': '/home/msadraei/trained_final', 'remove_dropout': False, 'train_batch_size': 32, 'valid_batch_size': 32, 'project_name_prefix': 'hzi_cluster', 'experiment_name_suffix': 'dropout'}",10_combine_8_dropout 6,"{'valid_accuracy': 0.5633802816901409, '_step': 79, '_wandb': {'runtime': 384}, '_runtime': 384.9592313766479, '_timestamp': 1695293638.5694425, 'train_loss': 0.1572120986878872, 'valid_mean': 0.5633802816901409}","{'tasks': ['glue:wnli', 'glue:rte', 'glue:mrpc'], 'use_tqdm': True, 'model_name': 'google/t5-base-lm-adapt', 'num_epochs': 80, 'best_finder': {'save': True, 'metric': 'valid_mean', 'higher_better': True}, 'hot_modules': ['sadcl', 'classifier'], 'peft_params': {'kind': 'combine', 'n_tokens': 10, 'n_comb_tokens': 128}, 'random_seed': 42, 'weight_decay': 0.01, 'learning_rate': 0.01, 'base_save_path': '/home/msadraei/trained_final', 'remove_dropout': False, 'train_batch_size': 32, 'valid_batch_size': 32, 'project_name_prefix': 'hzi_cluster', 'experiment_name_suffix': 'dropout'}",10_combine_128_dropout 7,"{'_step': 79, '_wandb': {'runtime': 376}, '_runtime': 377.5810399055481, '_timestamp': 1695288599.143306, 'train_loss': 0.13466075621545315, 'valid_mean': 0.43661971830985913, 'valid_accuracy': 0.43661971830985913}","{'tasks': ['glue:wnli', 'glue:rte', 'glue:mrpc'], 'use_tqdm': True, 'model_name': 'google/t5-base-lm-adapt', 'num_epochs': 80, 'best_finder': {'save': True, 'metric': 'valid_mean', 'higher_better': True}, 'hot_modules': ['sadcl', 'classifier'], 'peft_params': {'kind': 'residual', 'mlp_size': 128, 'n_tokens': 10}, 'random_seed': 42, 'weight_decay': 0.01, 'learning_rate': 0.3, 'base_save_path': '/home/msadraei/trained_final', 'remove_dropout': True, 'train_batch_size': 32, 'valid_batch_size': 32, 'project_name_prefix': 'hzi_cluster', 'experiment_name_suffix': None}",10_residual_128 8,"{'_step': 79, '_wandb': {'runtime': 468}, '_runtime': 469.2816665172577, '_timestamp': 1695283548.0529184, 'train_loss': 0.19754927083849907, 'valid_mean': 0.5633802816901409, 'valid_accuracy': 0.5633802816901409}","{'tasks': ['glue:wnli', 'glue:rte', 'glue:mrpc'], 'use_tqdm': True, 'model_name': 'google/t5-base-lm-adapt', 'num_epochs': 80, 'best_finder': {'save': True, 'metric': 'valid_mean', 'higher_better': True}, 'hot_modules': ['sadcl', 'classifier'], 'peft_params': {'kind': 'simple', 'n_tokens': 10}, 'random_seed': 42, 'weight_decay': 0.01, 'learning_rate': 0.01, 'base_save_path': '/home/msadraei/trained_final', 'remove_dropout': True, 'train_batch_size': 32, 'valid_batch_size': 32, 'project_name_prefix': 'hzi_cluster', 'experiment_name_suffix': None}",10_simple 9,"{'valid_mean': 0.43661971830985913, 'valid_accuracy': 0.43661971830985913, '_step': 79, '_wandb': {'runtime': 381}, '_runtime': 381.929176568985, '_timestamp': 1695278516.4769197, 'train_loss': 0.1441124401986599}","{'tasks': ['glue:wnli', 'glue:rte', 'glue:mrpc'], 'use_tqdm': True, 'model_name': 'google/t5-base-lm-adapt', 'num_epochs': 80, 'best_finder': {'save': True, 'metric': 'valid_mean', 'higher_better': True}, 'hot_modules': ['sadcl', 'classifier'], 'peft_params': {'kind': 'combine', 'n_tokens': 10, 'n_comb_tokens': 8}, 'random_seed': 42, 'weight_decay': 0.01, 'learning_rate': 0.01, 'base_save_path': '/home/msadraei/trained_final', 'remove_dropout': True, 'train_batch_size': 32, 'valid_batch_size': 32, 'project_name_prefix': 'hzi_cluster', 'experiment_name_suffix': None}",10_combine_8 10,"{'_step': 79, '_wandb': {'runtime': 371}, '_runtime': 371.98936891555786, '_timestamp': 1695273540.236157, 'train_loss': 0.1341699216514826, 'valid_mean': 0.4225352112676056, 'valid_accuracy': 0.4225352112676056}","{'tasks': ['glue:wnli', 'glue:rte', 'glue:mrpc'], 'use_tqdm': True, 'model_name': 'google/t5-base-lm-adapt', 'num_epochs': 80, 'best_finder': {'save': True, 'metric': 'valid_mean', 'higher_better': True}, 'hot_modules': ['sadcl', 'classifier'], 'peft_params': {'kind': 'combine', 'n_tokens': 10, 'n_comb_tokens': 128}, 'random_seed': 42, 'weight_decay': 0.01, 'learning_rate': 0.01, 'base_save_path': '/home/msadraei/trained_final', 'remove_dropout': True, 'train_batch_size': 32, 'valid_batch_size': 32, 'project_name_prefix': 'hzi_cluster', 'experiment_name_suffix': None}",10_combine_128