You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

config9.yaml 1.8KB

3 months ago
1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374
  1. default: &default
  2. use_tqdm: true
  3. random_seed: 42
  4. base_save_path: /home/msadraei/trained_final
  5. model_name: google/t5-small-lm-adapt
  6. project_name_prefix: iclr_attempt_lmt5
  7. experiment_name_suffix: null
  8. train_batch_size: 32
  9. valid_batch_size: 32
  10. remove_dropout: true
  11. learning_rate: 0.01
  12. weight_decay: 0.01
  13. num_epochs: 40
  14. peft_params: null # no mutation
  15. hot_modules:
  16. - sadcl
  17. best_finder:
  18. save: True
  19. metric: valid_mean
  20. higher_better: true
  21. tasks:
  22. # - superglue:rte
  23. # - superglue:cb
  24. # - superglue:wic
  25. # - superglue:copa
  26. # - glue:cola
  27. # - glue:mrpc
  28. # - superglue:boolq
  29. # - glue:stsb
  30. - superglue:multirc
  31. pp: &pp
  32. - /home/msadraei/trained_final/hzi_cluster_t5_small_glue-mnli/10_combine_128
  33. - /home/msadraei/trained_final/hzi_cluster_t5_small_glue-sst2/10_combine_128
  34. - /home/msadraei/trained_final/hzi_cluster_t5_small_glue-qqp/10_combine_128
  35. - /home/msadraei/trained_final/hzi_cluster_t5_small_glue-qnli/10_combine_128
  36. run_configs:
  37. # - <<: *default
  38. # learning_rate: 0.3
  39. # weight_decay: 0.00001
  40. # peft_params:
  41. # kind: attempt
  42. # n_tokens: 10
  43. # g_bottleneck: 100
  44. # pretrained_paths: *pp
  45. # - <<: *default_large
  46. # learning_rate: 0.3
  47. # weight_decay: 0.00001
  48. # peft_params:
  49. # kind: attempt
  50. # n_tokens: 10
  51. # g_bottleneck: 100
  52. # pretrained_paths: *pp
  53. - <<: *default
  54. learning_rate: 0.3
  55. remove_dropout: false
  56. experiment_name_suffix: dropout
  57. weight_decay: 0.00001
  58. peft_params:
  59. kind: attempt
  60. n_tokens: 10
  61. g_bottleneck: 100
  62. pretrained_paths: *pp
  63. # - <<: *default_large
  64. # learning_rate: 0.3
  65. # remove_dropout: false
  66. # experiment_name_suffix: dropout
  67. # weight_decay: 0.00001
  68. # peft_params:
  69. # kind: attempt
  70. # n_tokens: 10
  71. # g_bottleneck: 100
  72. # pretrained_paths: *pp