default: &default use_tqdm: true random_seed: 42 base_save_path: /home/msadraei/trained_final model_name: google/t5-base-lm-adapt project_name_prefix: iclr_attempt_lmt5 experiment_name_suffix: null train_batch_size: 24 valid_batch_size: 24 remove_dropout: true learning_rate: 0.01 weight_decay: 0.01 num_epochs: 80 peft_params: null # no mutation hot_modules: - sadcl best_finder: save: True metric: valid_mean higher_better: true tasks: # - superglue:rte # - superglue:cb # - superglue:wic # - superglue:copa # - glue:cola # - glue:mrpc # - superglue:boolq # - glue:stsb - superglue:multirc pp: &pp - /home/msadraei/trained_final/hzi_cluster_t5_base_glue-mnli/10_combine_128 - /home/msadraei/trained_final/hzi_cluster_t5_base_glue-sst2/10_combine_128 - /home/msadraei/trained_final/hzi_cluster_t5_base_glue-qqp/10_combine_128 - /home/msadraei/trained_final/hzi_cluster_t5_base_glue-qnli/10_combine_128 run_configs: # - <<: *default # learning_rate: 0.3 # weight_decay: 0.00001 # peft_params: # kind: attempt # n_tokens: 10 # g_bottleneck: 100 # pretrained_paths: *pp # - <<: *default_large # learning_rate: 0.3 # weight_decay: 0.00001 # peft_params: # kind: attempt # n_tokens: 10 # g_bottleneck: 100 # pretrained_paths: *pp - <<: *default learning_rate: 0.3 remove_dropout: false experiment_name_suffix: dropout weight_decay: 0.00001 peft_params: kind: attempt n_tokens: 10 g_bottleneck: 100 pretrained_paths: *pp # - <<: *default_large # learning_rate: 0.3 # remove_dropout: false # experiment_name_suffix: dropout # weight_decay: 0.00001 # peft_params: # kind: attempt # n_tokens: 10 # g_bottleneck: 100 # pretrained_paths: *pp