from hyper_tunning import load_data import os from ray.tune.schedulers import ASHAScheduler from ray.tune import CLIReporter from ray import tune from functools import partial from hyper_tunning import train_melu import numpy as np import torch def main(num_samples, max_num_epochs=20, gpus_per_trial=2): data_dir = os.path.abspath("/media/external_10TB/10TB/maheri/new_data_dir3") # load_data(data_dir) config = { # meta learning "meta_algo": tune.choice(['metasgd']), "transformer": tune.choice(['metasgd']), "first_order": tune.choice([True]), "adapt_transform": tune.choice([False]), "local_lr": tune.loguniform(5e-6, 5e-3), "lr": tune.loguniform(5e-5, 5e-3), "batch_size": tune.choice([16, 32, 64]), "inner": tune.choice([1, 3, 4, 5, 7]), "test_state": tune.choice(["user_and_item_cold_state"]), # head "embedding_dim": tune.choice([16, 32, 64]), "first_fc_hidden_dim": tune.choice([32, 64, 128]), "second_fc_hidden_dim": tune.choice([32, 64]), # clustering module 'cluster_dropout_rate': tune.choice([0, 0.01, 0.1]), 'cluster_k': tune.choice([3, 5, 7, 9, 11]), 'kmeans_alpha': tune.choice([100, 0.1, 10, 20, 50, 200]), 'rnn_dropout': tune.choice([0, 0.01, 0.1]), 'rnn_hidden': tune.choice([32, 64, 128]), 'rnn_l1': tune.choice([32, 64, 128]), 'kmeans_loss_weight': tune.choice([0, 1, 10, 50, 100, 200]), 'temperature': tune.choice([0.1, 0.5, 1.0, 2.0, 5.0, 10.0]), # 'trainer_dropout_rate': tune.choice([0, 0.01, 0.1]), 'distribution_power': tune.choice([0.1, 0.8, 1, 3, 5, 7, 8, 9]), 'data_selection_pow': tune.choice([0.6, 0.65, 0.7, 0.75, 0.8, 0.9, 1, 1.1, 1.2, 1.4]), 'task_dim': tune.choice([16, 32, 64, 128, 256]), 'trainer_dropout': tune.choice([0, 0.001, 0.01, 0.05, 0.1]), 'label_noise_std': tune.choice([0, 0.01, 0.1, 0.2, 0.3, 1, 2]), 'head_dropout': tune.choice([0, 0.001, 0.01, 0.05, 0.1]), 'num_epoch': tune.choice([40]), 'use_cuda': tune.choice([True]), 'num_rate': tune.choice([6]), 'num_genre': tune.choice([25]), 'num_director': tune.choice([2186]), 'num_actor': tune.choice([8030]), 'num_gender': tune.choice([2]), 'num_age': tune.choice([7]), 'num_occupation': tune.choice([21]), 'num_zipcode': tune.choice([3402]), } scheduler = ASHAScheduler( metric="loss", mode="min", max_t=max_num_epochs, grace_period=10, reduction_factor=2) reporter = CLIReporter( # parameter_columns=["l1", "l2", "lr", "batch_size"], metric_columns=["loss", "ndcg1", "ndcg3", "training_iteration"]) result = tune.run( partial(train_melu, data_dir=data_dir), resources_per_trial={"cpu": 4, "gpu": 0.5}, config=config, num_samples=num_samples, scheduler=scheduler, progress_reporter=reporter, log_to_file=True, # resume=True, local_dir="./hyper_tunning_all_cold3", name="rnn_cluster_module", ) best_trial = result.get_best_trial("loss", "min", "last") print("Best trial config: {}".format(best_trial.config)) print("Best trial final validation loss: {}".format( best_trial.last_result["loss"])) print("Best trial final validation ndcg1: {}".format( best_trial.last_result["ndcg1"])) print("Best trial final validation ndcg3: {}".format( best_trial.last_result["ndcg3"])) # print("=======================================================") print(result.results_df) print("=======================================================\n") if __name__ == "__main__": # You can change the number of GPUs per trial here: main(num_samples=150, max_num_epochs=50, gpus_per_trial=1)