1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253 |
- default: &default
- use_tqdm: true
- random_seed: 42
- base_save_path: /home/msadraei/trained_final
- model_name: t5-small
- project_name_prefix: iclr_orig_t5
- experiment_name_suffix: null
- train_batch_size: 24
- valid_batch_size: 24
- remove_dropout: true
- learning_rate: 0.01
- weight_decay: 0.01
- num_epochs: 80
- peft_params: null # no mutation
- hot_modules:
- - sadcl
- best_finder:
- save: True
- metric: valid_mean
- higher_better: true
- tasks:
- - superglue:rte
- - superglue:cb
- - superglue:wic
- - superglue:copa
- - glue:cola
- - glue:mrpc
- - superglue:boolq
- - glue:qqp
- - glue:qnli
- - glue:mnli
- - glue:sst2
- - glue:stsb
-
-
-
- pp: &pp
- - /home/msadraei/trained_final/hzi_cluster_t5_small_glue-mnli/10_combine_128
- - /home/msadraei/trained_final/hzi_cluster_t5_small_glue-sst2/10_combine_128
- - /home/msadraei/trained_final/hzi_cluster_t5_small_glue-qqp/10_combine_128
-
- run_configs:
- - <<: *default
- peft_params:
- kind: combine
- n_tokens: 10
- n_comb_tokens: 128
- # - <<: *default
- # learning_rate: 0.3
- # peft_params:
- # kind: residual
- # n_tokens: 10
- # mlp_size: 128
|