extend Melu code to perform different meta algorithms and hyperparameters
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

hyper_main.py 3.8KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103
  1. from hyper_tunning import load_data
  2. import os
  3. from ray.tune.schedulers import ASHAScheduler
  4. from ray.tune import CLIReporter
  5. from ray import tune
  6. from functools import partial
  7. from hyper_tunning import train_melu
  8. import numpy as np
  9. import torch
  10. def main(num_samples, max_num_epochs=20, gpus_per_trial=2):
  11. data_dir = os.path.abspath("/media/external_10TB/10TB/maheri/new_data_dir3")
  12. # load_data(data_dir)
  13. config = {
  14. # meta learning
  15. "meta_algo": tune.choice(['metasgd']),
  16. "transformer": tune.choice(['metasgd']),
  17. "first_order": tune.choice([True]),
  18. "adapt_transform": tune.choice([False]),
  19. "local_lr": tune.loguniform(5e-6, 5e-3),
  20. "lr": tune.loguniform(5e-5, 5e-3),
  21. "batch_size": tune.choice([16, 32, 64]),
  22. "inner": tune.choice([1, 3, 4, 5, 7]),
  23. "test_state": tune.choice(["user_and_item_cold_state"]),
  24. # head
  25. "embedding_dim": tune.choice([16, 32, 64]),
  26. "first_fc_hidden_dim": tune.choice([32, 64, 128]),
  27. "second_fc_hidden_dim": tune.choice([32, 64]),
  28. # clustering module
  29. 'cluster_dropout_rate': tune.choice([0, 0.01, 0.1]),
  30. 'cluster_k': tune.choice([3, 5, 7, 9, 11]),
  31. 'kmeans_alpha': tune.choice([100, 0.1, 10, 20, 50, 200]),
  32. 'rnn_dropout': tune.choice([0, 0.01, 0.1]),
  33. 'rnn_hidden': tune.choice([32, 64, 128]),
  34. 'rnn_l1': tune.choice([32, 64, 128]),
  35. 'kmeans_loss_weight': tune.choice([0, 1, 10, 50, 100, 200]),
  36. 'temperature': tune.choice([0.1, 0.5, 1.0, 2.0, 5.0, 10.0]),
  37. # 'trainer_dropout_rate': tune.choice([0, 0.01, 0.1]),
  38. 'distribution_power': tune.choice([0.1, 0.8, 1, 3, 5, 7, 8, 9]),
  39. 'data_selection_pow': tune.choice([0.6, 0.65, 0.7, 0.75, 0.8, 0.9, 1, 1.1, 1.2, 1.4]),
  40. 'task_dim': tune.choice([16, 32, 64, 128, 256]),
  41. 'trainer_dropout': tune.choice([0, 0.001, 0.01, 0.05, 0.1]),
  42. 'label_noise_std': tune.choice([0, 0.01, 0.1, 0.2, 0.3, 1, 2]),
  43. 'head_dropout': tune.choice([0, 0.001, 0.01, 0.05, 0.1]),
  44. 'num_epoch': tune.choice([40]),
  45. 'use_cuda': tune.choice([True]),
  46. 'num_rate': tune.choice([6]),
  47. 'num_genre': tune.choice([25]),
  48. 'num_director': tune.choice([2186]),
  49. 'num_actor': tune.choice([8030]),
  50. 'num_gender': tune.choice([2]),
  51. 'num_age': tune.choice([7]),
  52. 'num_occupation': tune.choice([21]),
  53. 'num_zipcode': tune.choice([3402]),
  54. }
  55. scheduler = ASHAScheduler(
  56. metric="loss",
  57. mode="min",
  58. max_t=max_num_epochs,
  59. grace_period=10,
  60. reduction_factor=2)
  61. reporter = CLIReporter(
  62. # parameter_columns=["l1", "l2", "lr", "batch_size"],
  63. metric_columns=["loss", "ndcg1", "ndcg3", "training_iteration"])
  64. result = tune.run(
  65. partial(train_melu, data_dir=data_dir),
  66. resources_per_trial={"cpu": 4, "gpu": 0.5},
  67. config=config,
  68. num_samples=num_samples,
  69. scheduler=scheduler,
  70. progress_reporter=reporter,
  71. log_to_file=True,
  72. # resume=True,
  73. local_dir="./hyper_tunning_all_cold3",
  74. name="rnn_cluster_module",
  75. )
  76. best_trial = result.get_best_trial("loss", "min", "last")
  77. print("Best trial config: {}".format(best_trial.config))
  78. print("Best trial final validation loss: {}".format(
  79. best_trial.last_result["loss"]))
  80. print("Best trial final validation ndcg1: {}".format(
  81. best_trial.last_result["ndcg1"]))
  82. print("Best trial final validation ndcg3: {}".format(
  83. best_trial.last_result["ndcg3"]))
  84. #
  85. print("=======================================================")
  86. print(result.results_df)
  87. print("=======================================================\n")
  88. if __name__ == "__main__":
  89. # You can change the number of GPUs per trial here:
  90. main(num_samples=150, max_num_epochs=50, gpus_per_trial=1)