from ray.tune.schedulers import ASHAScheduler from ray.tune import CLIReporter from ray import tune from functools import partial from hyper_tunning import train_metatl import argparse import numpy as np import torch import random from trainer import * from utils import * from sampler import * import copy def get_params(): args = argparse.ArgumentParser() args.add_argument("-data", "--dataset", default="electronics", type=str) args.add_argument("-seed", "--seed", default=None, type=int) args.add_argument("-K", "--K", default=3, type=int) #NUMBER OF SHOT # args.add_argument("-dim", "--embed_dim", default=100, type=int) args.add_argument("-bs", "--batch_size", default=1024, type=int) # args.add_argument("-lr", "--learning_rate", default=0.001, type=float) args.add_argument("-epo", "--epoch", default=100000, type=int) # args.add_argument("-prt_epo", "--print_epoch", default=100, type=int) # args.add_argument("-eval_epo", "--eval_epoch", default=1000, type=int) # args.add_argument("-b", "--beta", default=5, type=float) # args.add_argument("-m", "--margin", default=1, type=float) # args.add_argument("-p", "--dropout_p", default=0.5, type=float) # args.add_argument("-gpu", "--device", default=1, type=int) args = args.parse_args() params = {} for k, v in vars(args).items(): params[k] = v params['device'] = torch.device('cuda:0') return params, args def main(num_samples, gpus_per_trial=2): params, args = get_params() if params['seed'] is not None: SEED = params['seed'] torch.manual_seed(SEED) torch.cuda.manual_seed(SEED) torch.backends.cudnn.deterministic = True np.random.seed(SEED) random.seed(SEED) user_train, usernum_train, itemnum, user_input_test, user_test, user_input_valid, user_valid = data_load(args.dataset, args.K) batch_size = params['batch_size'] # sampler = WarpSampler(user_train, usernum_train, itemnum, batch_size=batch_size, maxlen=args.K, n_workers=1) # sampler_test = DataLoader(user_input_test, user_test, itemnum, params) # sampler_valid = DataLoader(user_input_valid, user_valid, itemnum, params) config = { # "l1": tune.sample_from(lambda _: 2 ** np.random.randint(2, 9)), # "l2": tune.sample_from(lambda _: 2 ** np.random.randint(2, 9)), # "lr": tune.loguniform(1e-4, 1e-1), "embed_dim" : tune.choice([50,75,100,125,150,200,300]), # "batch_size" : tune.choice([128,256,512,1024,2048]), "learning_rate" : tune.choice([0.1,0.01,0.004,0.005,0.007,0.001,0.0001]), "beta" : tune.choice([0.05,0.1,1,4,4.5,5,5.5,6,10]), "margin" : tune.choice([1,0.9,0.8,1.1,1.2]), # "sampler":sampler, # "sampler_test":sampler_test, # "sampler_valid":sampler_valid, "itemnum":itemnum, "params":params, } scheduler = ASHAScheduler( metric="MRR", mode="max", max_t=params['epoch'], grace_period=200, reduction_factor=2) reporter = CLIReporter( # parameter_columns=["l1", "l2", "lr", "batch_size"], metric_columns=["MRR","NDCG10","NDCG5","NDCG1","Hits10","Hits5","Hits1","training_iteration"]) result = tune.run( train_metatl, resources_per_trial={"cpu": 4, "gpu": gpus_per_trial}, config=config, num_samples=num_samples, scheduler=scheduler, progress_reporter=reporter, log_to_file=True, # resume=True, local_dir="/media/external_10TB/10TB/maheri/metaTL_ray/ray_local_dir", name="metatl_rnn1", ) best_trial = result.get_best_trial("MRR", "max", "last") print("Best trial config: {}".format(best_trial.config)) print("Best trial final validation loss: {}".format( best_trial.last_result["loss"])) print("Best trial final validation MRR: {}".format( best_trial.last_result["MRR"])) print("Best trial final validation NDCG@1: {}".format( best_trial.last_result["NDCG@1"])) # print("=======================================================") print(result.results_df) print("=======================================================\n") # best_trained_model = Net(best_trial.config["l1"], best_trial.config["l2"]) # device = "cpu" # if torch.cuda.is_available(): # device = "cuda:0" # if gpus_per_trial > 1: # best_trained_model = nn.DataParallel(best_trained_model) # best_trained_model.to(device) # # best_checkpoint_dir = best_trial.checkpoint.value # model_state, optimizer_state = torch.load(os.path.join( # best_checkpoint_dir, "checkpoint")) # best_trained_model.load_state_dict(model_state) # # test_acc = test_accuracy(best_trained_model, device) # print("Best trial test set accuracy: {}".format(test_acc)) if __name__ == "__main__": # You can change the number of GPUs per trial here: main(num_samples=150, gpus_per_trial=1)