shared: project_name: continual_prompt_pretrained_mlp use_tqdm: true random_seed: 42 default: &default model_name: google/t5-large-lm-adapt wandb_name: null train_batch_size: 32 valid_batch_size: 32 num_epochs: 100 peft_params: null # no mutation hot_modules: null # fine-tune all balancify_train: false best_finder: save: true metric: valid_f1-score-ma higher_better: true tasks: - glue:cola run_configs: # - <<: *default # wandb_name: large_5t_mlp128 # learning_rate: 0.02 # hot_modules: # - sadcl_learned_embeddin # train_batch_size: 24 # valid_batch_size: 24 # peft_params: # kind: encoder_emb # n_tokens: 5 # mlp_emb: 128 # - <<: *default # wandb_name: large_10t_mlp128 # learning_rate: 0.02 # hot_modules: # - sadcl_learned_embeddin # train_batch_size: 24 # valid_batch_size: 24 # peft_params: # kind: encoder_emb # n_tokens: 10 # mlp_emb: 128 # - <<: *default # wandb_name: large_5t_mlp128_not_freeze # learning_rate: 0.02 # hot_modules: # - sadcl # train_batch_size: 24 # valid_batch_size: 24 # peft_params: # kind: encoder_emb # n_tokens: 5 # mlp_emb: 128 # - <<: *default # wandb_name: large_10t_mlp128_not_freeze # learning_rate: 0.02 # hot_modules: # - sadcl # train_batch_size: 24 # valid_batch_size: 24 # peft_params: # kind: encoder_emb # n_tokens: 10 # mlp_emb: 128 # - <<: *default # wandb_name: large_5t_mlp128_not_freeze_lowlr # learning_rate: 0.001 # hot_modules: # - sadcl # train_batch_size: 24 # valid_batch_size: 24 # peft_params: # kind: encoder_emb # n_tokens: 5 # mlp_emb: 128 # - <<: *default # wandb_name: large_10t_mlp128_not_freeze_lowlr # learning_rate: 0.001 # hot_modules: # - sadcl # train_batch_size: 24 # valid_batch_size: 24 # peft_params: # kind: encoder_emb # n_tokens: 10 # mlp_emb: 128 - <<: *default wandb_name: large_100t_mlp128_lr.02 learning_rate: 0.02 hot_modules: - sadcl_learned_embeddin train_batch_size: 24 valid_batch_size: 24 peft_params: kind: encoder_emb n_tokens: 100 mlp_emb: 128