You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

config1.yaml 2.3KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081
  1. default: &default
  2. use_tqdm: true
  3. random_seed: 42
  4. base_save_path: /disks/ssd/trained_final/sing_thesis
  5. model_name: google/t5-small-lm-adapt
  6. project_name_prefix: sing_thesis
  7. experiment_name_suffix: null
  8. train_batch_size: 24
  9. valid_batch_size: 24
  10. remove_dropout: true
  11. learning_rate: 0.01
  12. weight_decay: 0.01
  13. num_epochs: 20
  14. peft_params: null # no mutation
  15. hot_modules:
  16. - sadcl
  17. best_finder:
  18. save: True
  19. metric: valid_mean
  20. higher_better: true
  21. tasks:
  22. - glue:qqp
  23. - glue:mnli
  24. - glue:qnli
  25. pp: &pp
  26. # - /disks/ssd/hzi_trained/hzi_cluster_t5_small_glue-mnli/10_combine_128
  27. # - /disks/ssd/hzi_trained/hzi_cluster_t5_small_glue-qqp/10_combine_128
  28. # - /disks/ssd/trained_final/cont_thesis/cont_thesis_t5_base_glue-mrpc/10_combine_128
  29. # - /disks/ssd/trained_final/cont_thesis/cont_thesis_t5_base_glue-cola/10_combine_128_simple
  30. # - /disks/ssd/trained_final/cont_thesis/cont_thesis_t5_base_glue-stsb/10_combine_128_simple
  31. # - /disks/ssd/trained_final/cont_thesis/cont_thesis_t5_base_glue-sst2/10_combine_128_simple
  32. # - /disks/ssd/trained_final/cont_thesis/cont_thesis_t5_base_superglue-rte/10_combine_128_simple
  33. # - /disks/ssd/trained_final/cont_thesis/cont_thesis_t5_base_superglue-cb/10_combine_128_simple
  34. # - /disks/ssd/trained_final/cont_thesis/cont_thesis_t5_base_superglue-copa/10_combine_128_simple
  35. run_configs:
  36. - <<: *default
  37. peft_params:
  38. kind: combine
  39. n_tokens: 50
  40. n_comb_tokens: 128
  41. # pretrained_paths: *pp
  42. use_pretrained_mode: simple
  43. # - <<: *default
  44. # peft_params:
  45. # kind: combine
  46. # n_tokens: 10
  47. # n_comb_tokens: 128
  48. # pretrained_paths: *pp
  49. # use_pretrained_mode: gumbal
  50. # - <<: *default
  51. # peft_params:
  52. # kind: combine
  53. # n_tokens: 10
  54. # n_comb_tokens: 128
  55. # pretrained_paths: *pp
  56. # use_pretrained_mode: softmax
  57. # tempreture: 0.2
  58. # - <<: *default
  59. # peft_params:
  60. # kind: combine
  61. # n_tokens: 10
  62. # n_comb_tokens: 128
  63. # pretrained_paths: *pp
  64. # use_pretrained_mode: softmax
  65. # tempreture: 1.
  66. # - <<: *default
  67. # peft_params:
  68. # kind: combine
  69. # n_tokens: 10
  70. # n_comb_tokens: 128
  71. # pretrained_paths: *pp
  72. # use_pretrained_mode: softmax
  73. # tempreture: 5.
  74. # - <<: *default
  75. # peft_params:
  76. # kind: combine
  77. # n_tokens: 10
  78. # n_comb_tokens: 128