default: &default use_tqdm: true random_seed: 42 base_save_path: /disks/ssd/trained_final/sing_thesis model_name: google/t5-small-lm-adapt project_name_prefix: sing_thesis experiment_name_suffix: null train_batch_size: 24 valid_batch_size: 24 remove_dropout: true learning_rate: 0.01 weight_decay: 0.01 num_epochs: 20 peft_params: null # no mutation hot_modules: - sadcl best_finder: save: True metric: valid_mean higher_better: true tasks: - glue:qqp - glue:mnli - glue:qnli pp: &pp # - /disks/ssd/hzi_trained/hzi_cluster_t5_small_glue-mnli/10_combine_128 # - /disks/ssd/hzi_trained/hzi_cluster_t5_small_glue-qqp/10_combine_128 # - /disks/ssd/trained_final/cont_thesis/cont_thesis_t5_base_glue-mrpc/10_combine_128 # - /disks/ssd/trained_final/cont_thesis/cont_thesis_t5_base_glue-cola/10_combine_128_simple # - /disks/ssd/trained_final/cont_thesis/cont_thesis_t5_base_glue-stsb/10_combine_128_simple # - /disks/ssd/trained_final/cont_thesis/cont_thesis_t5_base_glue-sst2/10_combine_128_simple # - /disks/ssd/trained_final/cont_thesis/cont_thesis_t5_base_superglue-rte/10_combine_128_simple # - /disks/ssd/trained_final/cont_thesis/cont_thesis_t5_base_superglue-cb/10_combine_128_simple # - /disks/ssd/trained_final/cont_thesis/cont_thesis_t5_base_superglue-copa/10_combine_128_simple run_configs: - <<: *default peft_params: kind: combine n_tokens: 50 n_comb_tokens: 128 # pretrained_paths: *pp use_pretrained_mode: simple # - <<: *default # peft_params: # kind: combine # n_tokens: 10 # n_comb_tokens: 128 # pretrained_paths: *pp # use_pretrained_mode: gumbal # - <<: *default # peft_params: # kind: combine # n_tokens: 10 # n_comb_tokens: 128 # pretrained_paths: *pp # use_pretrained_mode: softmax # tempreture: 0.2 # - <<: *default # peft_params: # kind: combine # n_tokens: 10 # n_comb_tokens: 128 # pretrained_paths: *pp # use_pretrained_mode: softmax # tempreture: 1. # - <<: *default # peft_params: # kind: combine # n_tokens: 10 # n_comb_tokens: 128 # pretrained_paths: *pp # use_pretrained_mode: softmax # tempreture: 5. # - <<: *default # peft_params: # kind: combine # n_tokens: 10 # n_comb_tokens: 128