You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

config7.yaml 1.1KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354
  1. default: &default
  2. use_tqdm: true
  3. random_seed: 42
  4. base_save_path: /home/msadraei/trained_final
  5. model_name: t5-base
  6. project_name_prefix: iclr_orig_t5
  7. experiment_name_suffix: null
  8. train_batch_size: 24
  9. valid_batch_size: 24
  10. remove_dropout: true
  11. learning_rate: 0.01
  12. weight_decay: 0.01
  13. num_epochs: 80
  14. peft_params: null # no mutation
  15. hot_modules:
  16. - sadcl
  17. best_finder:
  18. save: True
  19. metric: valid_mean
  20. higher_better: true
  21. tasks:
  22. # - superglue:rte
  23. # - superglue:cb
  24. # - superglue:wic
  25. # - superglue:copa
  26. # - glue:cola
  27. # - glue:mrpc
  28. # - superglue:boolq
  29. # - glue:qqp
  30. # - glue:qnli
  31. # - glue:mnli
  32. # - glue:sst2
  33. # - glue:stsb
  34. - superglue:multirc
  35. pp: &pp
  36. - /home/msadraei/trained_final/hzi_cluster_t5_small_glue-mnli/10_combine_128
  37. - /home/msadraei/trained_final/hzi_cluster_t5_small_glue-sst2/10_combine_128
  38. - /home/msadraei/trained_final/hzi_cluster_t5_small_glue-qqp/10_combine_128
  39. run_configs:
  40. - <<: *default
  41. peft_params:
  42. kind: combine
  43. n_tokens: 10
  44. n_comb_tokens: 128
  45. # - <<: *default
  46. # learning_rate: 0.3
  47. # peft_params:
  48. # kind: residual
  49. # n_tokens: 10
  50. # mlp_size: 128