extend Melu code to perform different meta algorithms and hyperparameters
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

hyper_main.py 4.2KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113
  1. from hyper_tunning import load_data
  2. import os
  3. from ray.tune.schedulers import ASHAScheduler
  4. from ray.tune import CLIReporter
  5. from ray import tune
  6. from functools import partial
  7. from hyper_tunning import train_melu
  8. import numpy as np
  9. def main(num_samples, max_num_epochs=20, gpus_per_trial=2):
  10. data_dir = os.path.abspath("/media/external_10TB/10TB/maheri/new_data_dir3")
  11. config = {
  12. # "l1": tune.sample_from(lambda _: 2 ** np.random.randint(2, 9)),
  13. # "l2": tune.sample_from(lambda _: 2 ** np.random.randint(2, 9)),
  14. # "lr": tune.loguniform(1e-4, 1e-1),
  15. # "batch_size": tune.choice([2, 4, 8, 16])
  16. "transformer": tune.choice(['kronoker']),
  17. "meta_algo": tune.choice(['gbml', 'metasgd']),
  18. "first_order": tune.choice([False]),
  19. "adapt_transform": tune.choice([True, False]),
  20. # "local_lr":tune.choice([5e-6,5e-4,5e-3]),
  21. # "lr":tune.choice([5e-5,5e-4]),
  22. "local_lr": tune.loguniform(5e-6, 5e-3),
  23. "lr": tune.loguniform(5e-5, 5e-3),
  24. "batch_size": tune.choice([16, 32, 64]),
  25. "inner": tune.choice([1, 3, 5, 7]),
  26. "test_state": tune.choice(["user_and_item_cold_state"]),
  27. "embedding_dim": tune.choice([16, 32, 64]),
  28. "first_fc_hidden_dim": tune.choice([32, 64, 128]),
  29. "second_fc_hidden_dim": tune.choice([32, 64]),
  30. 'cluster_h1_dim': tune.choice([256, 128, 64]),
  31. 'cluster_h2_dim': tune.choice([128, 64, 32]),
  32. 'cluster_final_dim': tune.choice([64, 32]),
  33. 'cluster_dropout_rate': tune.choice([0, 0.01, 0.1]),
  34. 'cluster_k': tune.choice([3, 5, 7, 9, 11]),
  35. 'temperature': tune.choice([0.001, 0.1, 0.5, 1.0, 2.0, 10.0]),
  36. 'trainer_dropout_rate': tune.choice([0, 0.01, 0.1]),
  37. 'use_cuda': tune.choice([True]),
  38. # item
  39. 'num_rate': tune.choice([6]),
  40. 'num_genre': tune.choice([25]),
  41. 'num_director': tune.choice([2186]),
  42. 'num_actor': tune.choice([8030]),
  43. # user
  44. 'num_gender': tune.choice([2]),
  45. 'num_age': tune.choice([7]),
  46. 'num_occupation': tune.choice([21]),
  47. 'num_zipcode': tune.choice([3402]),
  48. 'num_epoch': tune.choice([30]),
  49. }
  50. scheduler = ASHAScheduler(
  51. metric="loss",
  52. mode="min",
  53. max_t=30,
  54. grace_period=10,
  55. reduction_factor=2)
  56. reporter = CLIReporter(
  57. # parameter_columns=["l1", "l2", "lr", "batch_size"],
  58. metric_columns=["loss", "ndcg1", "ndcg3", "training_iteration"])
  59. result = tune.run(
  60. partial(train_melu, data_dir=data_dir),
  61. resources_per_trial={"cpu": 8, "gpu": gpus_per_trial},
  62. config=config,
  63. num_samples=num_samples,
  64. scheduler=scheduler,
  65. progress_reporter=reporter,
  66. log_to_file=True,
  67. # resume=True,
  68. local_dir="./hyper_tunning_all_cold2",
  69. name="melu_all_cold_clustered",
  70. )
  71. best_trial = result.get_best_trial("loss", "min", "last")
  72. print("Best trial config: {}".format(best_trial.config))
  73. print("Best trial final validation loss: {}".format(
  74. best_trial.last_result["loss"]))
  75. print("Best trial final validation ndcg1: {}".format(
  76. best_trial.last_result["ndcg1"]))
  77. print("Best trial final validation ndcg3: {}".format(
  78. best_trial.last_result["ndcg3"]))
  79. #
  80. print("=======================================================")
  81. print(result.results_df)
  82. print("=======================================================\n")
  83. # best_trained_model = Net(best_trial.config["l1"], best_trial.config["l2"])
  84. # device = "cpu"
  85. # if torch.cuda.is_available():
  86. # device = "cuda:0"
  87. # if gpus_per_trial > 1:
  88. # best_trained_model = nn.DataParallel(best_trained_model)
  89. # best_trained_model.to(device)
  90. #
  91. # best_checkpoint_dir = best_trial.checkpoint.value
  92. # model_state, optimizer_state = torch.load(os.path.join(
  93. # best_checkpoint_dir, "checkpoint"))
  94. # best_trained_model.load_state_dict(model_state)
  95. #
  96. # test_acc = test_accuracy(best_trained_model, device)
  97. # print("Best trial test set accuracy: {}".format(test_acc))
  98. if __name__ == "__main__":
  99. # You can change the number of GPUs per trial here:
  100. main(num_samples=150, max_num_epochs=30, gpus_per_trial=1)