|
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081 |
- default: &default
- use_tqdm: true
- random_seed: 42
- base_save_path: /disks/ssd/trained_final/sing_thesis
- model_name: google/t5-small-lm-adapt
- project_name_prefix: sing_thesis
- experiment_name_suffix: null
- train_batch_size: 24
- valid_batch_size: 24
- remove_dropout: true
- learning_rate: 0.01
- weight_decay: 0.01
- num_epochs: 20
- peft_params: null # no mutation
- hot_modules:
- - sadcl
- best_finder:
- save: True
- metric: valid_mean
- higher_better: true
- tasks:
- - glue:qqp
- - glue:mnli
- - glue:qnli
-
- pp: &pp
- # - /disks/ssd/hzi_trained/hzi_cluster_t5_small_glue-mnli/10_combine_128
- # - /disks/ssd/hzi_trained/hzi_cluster_t5_small_glue-qqp/10_combine_128
- # - /disks/ssd/trained_final/cont_thesis/cont_thesis_t5_base_glue-mrpc/10_combine_128
- # - /disks/ssd/trained_final/cont_thesis/cont_thesis_t5_base_glue-cola/10_combine_128_simple
- # - /disks/ssd/trained_final/cont_thesis/cont_thesis_t5_base_glue-stsb/10_combine_128_simple
- # - /disks/ssd/trained_final/cont_thesis/cont_thesis_t5_base_glue-sst2/10_combine_128_simple
- # - /disks/ssd/trained_final/cont_thesis/cont_thesis_t5_base_superglue-rte/10_combine_128_simple
- # - /disks/ssd/trained_final/cont_thesis/cont_thesis_t5_base_superglue-cb/10_combine_128_simple
- # - /disks/ssd/trained_final/cont_thesis/cont_thesis_t5_base_superglue-copa/10_combine_128_simple
-
- run_configs:
- - <<: *default
- peft_params:
- kind: combine
- n_tokens: 50
- n_comb_tokens: 128
- # pretrained_paths: *pp
- use_pretrained_mode: simple
- # - <<: *default
- # peft_params:
- # kind: combine
- # n_tokens: 10
- # n_comb_tokens: 128
- # pretrained_paths: *pp
- # use_pretrained_mode: gumbal
- # - <<: *default
- # peft_params:
- # kind: combine
- # n_tokens: 10
- # n_comb_tokens: 128
- # pretrained_paths: *pp
- # use_pretrained_mode: softmax
- # tempreture: 0.2
- # - <<: *default
- # peft_params:
- # kind: combine
- # n_tokens: 10
- # n_comb_tokens: 128
- # pretrained_paths: *pp
- # use_pretrained_mode: softmax
- # tempreture: 1.
- # - <<: *default
- # peft_params:
- # kind: combine
- # n_tokens: 10
- # n_comb_tokens: 128
- # pretrained_paths: *pp
- # use_pretrained_mode: softmax
- # tempreture: 5.
-
- # - <<: *default
- # peft_params:
- # kind: combine
- # n_tokens: 10
- # n_comb_tokens: 128
|