1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889 |
- from hyper_tunning import load_data
- import os
- from ray.tune.schedulers import ASHAScheduler
- from ray.tune import CLIReporter
- from ray import tune
- from functools import partial
- from hyper_tunning import train_melu
- import numpy as np
-
-
- def main(num_samples, max_num_epochs=20, gpus_per_trial=2):
- data_dir = os.path.abspath("/media/external_10TB/10TB/maheri/melu_data5")
- load_data(data_dir)
- config = {
- # "l1": tune.sample_from(lambda _: 2 ** np.random.randint(2, 9)),
- # "l2": tune.sample_from(lambda _: 2 ** np.random.randint(2, 9)),
- # "lr": tune.loguniform(1e-4, 1e-1),
- # "batch_size": tune.choice([2, 4, 8, 16])
- "transformer": tune.choice(['kronoker']),
- "meta_algo":tune.choice(['gbml']),
- "first_order":tune.choice([False]),
- "adapt_transform":tune.choice([True,False]),
- # "local_lr":tune.choice([5e-6,5e-4,5e-3]),
- # "lr":tune.choice([5e-5,5e-4]),
- "local_lr":tune.loguniform(5e-6,5e-3),
- "lr":tune.loguniform(5e-5,5e-3),
- "batch_size":tune.choice([16,32,64]),
- "inner":tune.choice([7,5,4,3,1]),
- "test_state":tune.choice(["user_and_item_cold_state"]),
- # "epochs":tune.choice([5,10,20,25]),
- }
-
- scheduler = ASHAScheduler(
- metric="loss",
- mode="min",
- max_t=30,
- grace_period=6,
- reduction_factor=2)
- reporter = CLIReporter(
- # parameter_columns=["l1", "l2", "lr", "batch_size"],
- metric_columns=["loss", "ndcg1","ndcg3", "training_iteration"])
- result = tune.run(
- partial(train_melu, data_dir=data_dir),
- resources_per_trial={"cpu": 4, "gpu": gpus_per_trial},
- config=config,
- num_samples=num_samples,
- scheduler=scheduler,
- progress_reporter=reporter,
- log_to_file=True,
- # resume=True,
- local_dir="./hyper_tunning_all_cold",
- name="melu_all_cold",
-
- )
-
- best_trial = result.get_best_trial("loss", "min", "last")
- print("Best trial config: {}".format(best_trial.config))
- print("Best trial final validation loss: {}".format(
- best_trial.last_result["loss"]))
- print("Best trial final validation ndcg1: {}".format(
- best_trial.last_result["ndcg1"]))
- print("Best trial final validation ndcg3: {}".format(
- best_trial.last_result["ndcg3"]))
-
- #
- print("=======================================================")
- print(result.results_df)
- print("=======================================================\n")
-
- # best_trained_model = Net(best_trial.config["l1"], best_trial.config["l2"])
- # device = "cpu"
- # if torch.cuda.is_available():
- # device = "cuda:0"
- # if gpus_per_trial > 1:
- # best_trained_model = nn.DataParallel(best_trained_model)
- # best_trained_model.to(device)
- #
- # best_checkpoint_dir = best_trial.checkpoint.value
- # model_state, optimizer_state = torch.load(os.path.join(
- # best_checkpoint_dir, "checkpoint"))
- # best_trained_model.load_state_dict(model_state)
- #
- # test_acc = test_accuracy(best_trained_model, device)
- # print("Best trial test set accuracy: {}".format(test_acc))
-
-
- if __name__ == "__main__":
- # You can change the number of GPUs per trial here:
- main(num_samples=150, max_num_epochs=25, gpus_per_trial=1)
|