123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138 |
- from ray.tune.schedulers import ASHAScheduler
- from ray.tune import CLIReporter
- from ray import tune
- from functools import partial
- from hyper_tunning import train_metatl
- import argparse
- import numpy as np
- import torch
- import random
- from trainer import *
- from utils import *
- from sampler import *
- import copy
-
- def get_params():
- args = argparse.ArgumentParser()
- args.add_argument("-data", "--dataset", default="electronics", type=str)
- args.add_argument("-seed", "--seed", default=None, type=int)
- args.add_argument("-K", "--K", default=3, type=int) #NUMBER OF SHOT
-
- # args.add_argument("-dim", "--embed_dim", default=100, type=int)
- args.add_argument("-bs", "--batch_size", default=1024, type=int)
- # args.add_argument("-lr", "--learning_rate", default=0.001, type=float)
-
- args.add_argument("-epo", "--epoch", default=100000, type=int)
- # args.add_argument("-prt_epo", "--print_epoch", default=100, type=int)
- # args.add_argument("-eval_epo", "--eval_epoch", default=1000, type=int)
-
- # args.add_argument("-b", "--beta", default=5, type=float)
- # args.add_argument("-m", "--margin", default=1, type=float)
- # args.add_argument("-p", "--dropout_p", default=0.5, type=float)
-
- # args.add_argument("-gpu", "--device", default=1, type=int)
-
- args = args.parse_args()
- params = {}
- for k, v in vars(args).items():
- params[k] = v
-
- params['device'] = torch.device('cuda:0')
- return params, args
-
-
-
- def main(num_samples, gpus_per_trial=2):
-
- params, args = get_params()
-
- if params['seed'] is not None:
- SEED = params['seed']
- torch.manual_seed(SEED)
- torch.cuda.manual_seed(SEED)
- torch.backends.cudnn.deterministic = True
- np.random.seed(SEED)
- random.seed(SEED)
-
- user_train, usernum_train, itemnum, user_input_test, user_test, user_input_valid, user_valid = data_load(args.dataset, args.K)
-
- batch_size = params['batch_size']
- # sampler = WarpSampler(user_train, usernum_train, itemnum, batch_size=batch_size, maxlen=args.K, n_workers=1)
- # sampler_test = DataLoader(user_input_test, user_test, itemnum, params)
- # sampler_valid = DataLoader(user_input_valid, user_valid, itemnum, params)
-
- config = {
- # "l1": tune.sample_from(lambda _: 2 ** np.random.randint(2, 9)),
- # "l2": tune.sample_from(lambda _: 2 ** np.random.randint(2, 9)),
- # "lr": tune.loguniform(1e-4, 1e-1),
-
- "embed_dim" : tune.choice([50,75,100,125,150,200,300]),
- # "batch_size" : tune.choice([128,256,512,1024,2048]),
- "learning_rate" : tune.choice([0.1,0.01,0.004,0.005,0.007,0.001,0.0001]),
- "beta" : tune.choice([0.05,0.1,1,4,4.5,5,5.5,6,10]),
- "margin" : tune.choice([1,0.9,0.8,1.1,1.2]),
-
- # "sampler":sampler,
- # "sampler_test":sampler_test,
- # "sampler_valid":sampler_valid,
-
- "itemnum":itemnum,
- "params":params,
- }
-
- scheduler = ASHAScheduler(
- metric="MRR",
- mode="max",
- max_t=params['epoch'],
- grace_period=200,
- reduction_factor=2)
- reporter = CLIReporter(
- # parameter_columns=["l1", "l2", "lr", "batch_size"],
- metric_columns=["MRR","NDCG10","NDCG5","NDCG1","Hits10","Hits5","Hits1","training_iteration"])
- result = tune.run(
- train_metatl,
- resources_per_trial={"cpu": 4, "gpu": gpus_per_trial},
- config=config,
- num_samples=num_samples,
- scheduler=scheduler,
- progress_reporter=reporter,
- log_to_file=True,
- # resume=True,
- local_dir="/media/external_10TB/10TB/maheri/metaTL_ray/ray_local_dir",
- name="metatl_rnn1",
- )
-
- best_trial = result.get_best_trial("MRR", "max", "last")
- print("Best trial config: {}".format(best_trial.config))
- print("Best trial final validation loss: {}".format(
- best_trial.last_result["loss"]))
- print("Best trial final validation MRR: {}".format(
- best_trial.last_result["MRR"]))
- print("Best trial final validation NDCG@1: {}".format(
- best_trial.last_result["NDCG@1"]))
-
- #
- print("=======================================================")
- print(result.results_df)
- print("=======================================================\n")
-
- # best_trained_model = Net(best_trial.config["l1"], best_trial.config["l2"])
- # device = "cpu"
- # if torch.cuda.is_available():
- # device = "cuda:0"
- # if gpus_per_trial > 1:
- # best_trained_model = nn.DataParallel(best_trained_model)
- # best_trained_model.to(device)
- #
- # best_checkpoint_dir = best_trial.checkpoint.value
- # model_state, optimizer_state = torch.load(os.path.join(
- # best_checkpoint_dir, "checkpoint"))
- # best_trained_model.load_state_dict(model_state)
- #
- # test_acc = test_accuracy(best_trained_model, device)
- # print("Best trial test set accuracy: {}".format(test_acc))
-
-
- if __name__ == "__main__":
- # You can change the number of GPUs per trial here:
- main(num_samples=150, gpus_per_trial=1)
|