extend Melu code to perform different meta algorithms and hyperparameters
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

hyper_main.py 3.3KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889
  1. from hyper_tunning import load_data
  2. import os
  3. from ray.tune.schedulers import ASHAScheduler
  4. from ray.tune import CLIReporter
  5. from ray import tune
  6. from functools import partial
  7. from hyper_tunning import train_melu
  8. import numpy as np
  9. def main(num_samples, max_num_epochs=20, gpus_per_trial=2):
  10. data_dir = os.path.abspath("/media/external_10TB/10TB/maheri/melu_data5")
  11. load_data(data_dir)
  12. config = {
  13. # "l1": tune.sample_from(lambda _: 2 ** np.random.randint(2, 9)),
  14. # "l2": tune.sample_from(lambda _: 2 ** np.random.randint(2, 9)),
  15. # "lr": tune.loguniform(1e-4, 1e-1),
  16. # "batch_size": tune.choice([2, 4, 8, 16])
  17. "transformer": tune.choice(['kronoker']),
  18. "meta_algo":tune.choice(['gbml']),
  19. "first_order":tune.choice([False]),
  20. "adapt_transform":tune.choice([True,False]),
  21. # "local_lr":tune.choice([5e-6,5e-4,5e-3]),
  22. # "lr":tune.choice([5e-5,5e-4]),
  23. "local_lr":tune.loguniform(5e-6,5e-3),
  24. "lr":tune.loguniform(5e-5,5e-3),
  25. "batch_size":tune.choice([16,32,64]),
  26. "inner":tune.choice([7,5,4,3,1]),
  27. "test_state":tune.choice(["user_and_item_cold_state"]),
  28. # "epochs":tune.choice([5,10,20,25]),
  29. }
  30. scheduler = ASHAScheduler(
  31. metric="loss",
  32. mode="min",
  33. max_t=30,
  34. grace_period=6,
  35. reduction_factor=2)
  36. reporter = CLIReporter(
  37. # parameter_columns=["l1", "l2", "lr", "batch_size"],
  38. metric_columns=["loss", "ndcg1","ndcg3", "training_iteration"])
  39. result = tune.run(
  40. partial(train_melu, data_dir=data_dir),
  41. resources_per_trial={"cpu": 4, "gpu": gpus_per_trial},
  42. config=config,
  43. num_samples=num_samples,
  44. scheduler=scheduler,
  45. progress_reporter=reporter,
  46. log_to_file=True,
  47. # resume=True,
  48. local_dir="./hyper_tunning_all_cold",
  49. name="melu_all_cold",
  50. )
  51. best_trial = result.get_best_trial("loss", "min", "last")
  52. print("Best trial config: {}".format(best_trial.config))
  53. print("Best trial final validation loss: {}".format(
  54. best_trial.last_result["loss"]))
  55. print("Best trial final validation ndcg1: {}".format(
  56. best_trial.last_result["ndcg1"]))
  57. print("Best trial final validation ndcg3: {}".format(
  58. best_trial.last_result["ndcg3"]))
  59. #
  60. print("=======================================================")
  61. print(result.results_df)
  62. print("=======================================================\n")
  63. # best_trained_model = Net(best_trial.config["l1"], best_trial.config["l2"])
  64. # device = "cpu"
  65. # if torch.cuda.is_available():
  66. # device = "cuda:0"
  67. # if gpus_per_trial > 1:
  68. # best_trained_model = nn.DataParallel(best_trained_model)
  69. # best_trained_model.to(device)
  70. #
  71. # best_checkpoint_dir = best_trial.checkpoint.value
  72. # model_state, optimizer_state = torch.load(os.path.join(
  73. # best_checkpoint_dir, "checkpoint"))
  74. # best_trained_model.load_state_dict(model_state)
  75. #
  76. # test_acc = test_accuracy(best_trained_model, device)
  77. # print("Best trial test set accuracy: {}".format(test_acc))
  78. if __name__ == "__main__":
  79. # You can change the number of GPUs per trial here:
  80. main(num_samples=150, max_num_epochs=25, gpus_per_trial=1)