|
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131 |
- import os
- os.environ['CUDA_VISIBLE_DEVICES'] = "1"
-
- from ray.tune.schedulers import ASHAScheduler
- from ray.tune import CLIReporter
- from ray import tune
- from functools import partial
- from hyper_tunning import train_metatl
- import argparse
- import numpy as np
- import torch
- import random
- from trainer import *
- from utils import *
- from sampler import *
- import copy
-
- def get_params():
- args = argparse.ArgumentParser()
- args.add_argument("-data", "--dataset", default="electronics", type=str)
- args.add_argument("-seed", "--seed", default=None, type=int)
- args.add_argument("-K", "--K", default=3, type=int) #NUMBER OF SHOT
-
- # args.add_argument("-dim", "--embed_dim", default=100, type=int)
- # args.add_argument("-bs", "--batch_size", default=1024, type=int)
- # args.add_argument("-lr", "--learning_rate", default=0.001, type=float)
-
- args.add_argument("-epo", "--epoch", default=100000, type=int)
- # args.add_argument("-prt_epo", "--print_epoch", default=100, type=int)
- # args.add_argument("-eval_epo", "--eval_epoch", default=1000, type=int)
-
- # args.add_argument("-b", "--beta", default=5, type=float)
- # args.add_argument("-m", "--margin", default=1, type=float)
- # args.add_argument("-p", "--dropout_p", default=0.5, type=float)
-
- # args.add_argument("-gpu", "--device", default=1, type=int)
-
- args = args.parse_args()
- params = {}
- for k, v in vars(args).items():
- params[k] = v
-
- # params['device'] = torch.device('cuda:1')
- params['device'] = 0
- return params, args
-
-
-
- def main(num_samples, gpus_per_trial=2):
- print("===============",torch.cuda.device_count(),"=======")
- params, args = get_params()
-
- if params['seed'] is not None:
- SEED = params['seed']
- torch.manual_seed(SEED)
- torch.cuda.manual_seed(SEED)
- torch.backends.cudnn.deterministic = True
- np.random.seed(SEED)
- random.seed(SEED)
-
- user_train, usernum_train, itemnum, user_input_test, user_test, user_input_valid, user_valid = data_load(args.dataset, args.K)
-
- # batch_size = params['batch_size']
- # sampler = WarpSampler(user_train, usernum_train, itemnum, batch_size=batch_size, maxlen=args.K, n_workers=1)
- # sampler_test = DataLoader(user_input_test, user_test, itemnum, params)
- # sampler_valid = DataLoader(user_input_valid, user_valid, itemnum, params)
-
- config = {
- # "l1": tune.sample_from(lambda _: 2 ** np.random.randint(2, 9)),
- # "l2": tune.sample_from(lambda _: 2 ** np.random.randint(2, 9)),
- # "lr": tune.loguniform(1e-4, 1e-1),
-
- "embed_dim" : tune.choice([50,75,100,125,150,200,300]),
- # "batch_size" : tune.choice([128,256,512,1024,2048]),
- "learning_rate" : tune.choice([0.1,0.01,0.004,0.005,0.007,0.001,0.0001]),
- "beta" : tune.choice([0.05,0.1,1,4,4.5,5,5.5,6,10]),
- "margin" : tune.choice([1,0.9,0.8,1.1,1.2]),
-
- "batch_size" : tune.choice([128,256,512,1024,2048]),
- "number_of_neg" : tune.choice([1,3,5,7,10,20,30,50,70]),
- "loss_function" : tune.choice(["bpr"]),
- "eval_epoch" : tune.choice([100,250,500,1000,1500]),
- 'device' : params['device'],
-
-
- "itemnum":itemnum,
- "params":params,
- }
-
- print("===============", torch.cuda.device_count(), "=======")
- scheduler = ASHAScheduler(
- metric="MRR",
- mode="max",
- max_t=params['epoch'],
- grace_period=200,
- reduction_factor=2)
- reporter = CLIReporter(
- # parameter_columns=["l1", "l2", "lr", "batch_size"],
- metric_columns=["MRR","NDCG10","NDCG5","NDCG1","Hits10","Hits5","Hits1","training_iteration"])
- result = tune.run(
- train_metatl,
- resources_per_trial={"cpu": 4, "gpu": gpus_per_trial},
- config=config,
- num_samples=num_samples,
- scheduler=scheduler,
- progress_reporter=reporter,
- log_to_file=True,
- resume=True,
- local_dir="/media/external_10TB/10TB/maheri/metaTL_ray/ray_local_dir",
- name="bpr_rnn",
- )
-
- best_trial = result.get_best_trial("MRR", "max", "last")
- print("Best trial config: {}".format(best_trial.config))
- print("Best trial final validation loss: {}".format(
- best_trial.last_result["loss"]))
- print("Best trial final validation MRR: {}".format(
- best_trial.last_result["MRR"]))
- print("Best trial final validation NDCG@1: {}".format(
- best_trial.last_result["NDCG@1"]))
-
- #
- print("=======================================================")
- print(result.results_df)
- print("=======================================================\n")
-
-
-
- if __name__ == "__main__":
- # You can change the number of GPUs per trial here:
- main(num_samples=150, gpus_per_trial=0.5)
|