| @@ -1,3 +1,6 @@ | |||
| import os | |||
| os.environ['CUDA_VISIBLE_DEVICES'] = "1" | |||
| from ray.tune.schedulers import ASHAScheduler | |||
| from ray.tune import CLIReporter | |||
| from ray import tune | |||
| @@ -19,7 +22,7 @@ def get_params(): | |||
| args.add_argument("-K", "--K", default=3, type=int) #NUMBER OF SHOT | |||
| # args.add_argument("-dim", "--embed_dim", default=100, type=int) | |||
| args.add_argument("-bs", "--batch_size", default=1024, type=int) | |||
| # args.add_argument("-bs", "--batch_size", default=1024, type=int) | |||
| # args.add_argument("-lr", "--learning_rate", default=0.001, type=float) | |||
| args.add_argument("-epo", "--epoch", default=100000, type=int) | |||
| @@ -37,13 +40,14 @@ def get_params(): | |||
| for k, v in vars(args).items(): | |||
| params[k] = v | |||
| params['device'] = torch.device('cuda:0') | |||
| # params['device'] = torch.device('cuda:1') | |||
| params['device'] = 0 | |||
| return params, args | |||
| def main(num_samples, gpus_per_trial=2): | |||
| print("===============",torch.cuda.device_count(),"=======") | |||
| params, args = get_params() | |||
| if params['seed'] is not None: | |||
| @@ -56,7 +60,7 @@ def main(num_samples, gpus_per_trial=2): | |||
| user_train, usernum_train, itemnum, user_input_test, user_test, user_input_valid, user_valid = data_load(args.dataset, args.K) | |||
| batch_size = params['batch_size'] | |||
| # batch_size = params['batch_size'] | |||
| # sampler = WarpSampler(user_train, usernum_train, itemnum, batch_size=batch_size, maxlen=args.K, n_workers=1) | |||
| # sampler_test = DataLoader(user_input_test, user_test, itemnum, params) | |||
| # sampler_valid = DataLoader(user_input_valid, user_valid, itemnum, params) | |||
| @@ -72,14 +76,18 @@ def main(num_samples, gpus_per_trial=2): | |||
| "beta" : tune.choice([0.05,0.1,1,4,4.5,5,5.5,6,10]), | |||
| "margin" : tune.choice([1,0.9,0.8,1.1,1.2]), | |||
| # "sampler":sampler, | |||
| # "sampler_test":sampler_test, | |||
| # "sampler_valid":sampler_valid, | |||
| "batch_size" : tune.choice([128,256,512,1024,2048]), | |||
| "number_of_neg" : tune.choice([1,3,5,7,10,20,30,50,70]), | |||
| "loss_function" : tune.choice(["bpr"]), | |||
| "eval_epoch" : tune.choice([100,250,500,1000,1500]), | |||
| 'device' : params['device'], | |||
| "itemnum":itemnum, | |||
| "params":params, | |||
| } | |||
| print("===============", torch.cuda.device_count(), "=======") | |||
| scheduler = ASHAScheduler( | |||
| metric="MRR", | |||
| mode="max", | |||
| @@ -99,7 +107,7 @@ def main(num_samples, gpus_per_trial=2): | |||
| log_to_file=True, | |||
| # resume=True, | |||
| local_dir="/media/external_10TB/10TB/maheri/metaTL_ray/ray_local_dir", | |||
| name="metatl_rnn1", | |||
| name="bpr_rnn", | |||
| ) | |||
| best_trial = result.get_best_trial("MRR", "max", "last") | |||
| @@ -116,23 +124,8 @@ def main(num_samples, gpus_per_trial=2): | |||
| print(result.results_df) | |||
| print("=======================================================\n") | |||
| # best_trained_model = Net(best_trial.config["l1"], best_trial.config["l2"]) | |||
| # device = "cpu" | |||
| # if torch.cuda.is_available(): | |||
| # device = "cuda:0" | |||
| # if gpus_per_trial > 1: | |||
| # best_trained_model = nn.DataParallel(best_trained_model) | |||
| # best_trained_model.to(device) | |||
| # | |||
| # best_checkpoint_dir = best_trial.checkpoint.value | |||
| # model_state, optimizer_state = torch.load(os.path.join( | |||
| # best_checkpoint_dir, "checkpoint")) | |||
| # best_trained_model.load_state_dict(model_state) | |||
| # | |||
| # test_acc = test_accuracy(best_trained_model, device) | |||
| # print("Best trial test set accuracy: {}".format(test_acc)) | |||
| if __name__ == "__main__": | |||
| # You can change the number of GPUs per trial here: | |||
| main(num_samples=150, gpus_per_trial=1) | |||
| main(num_samples=150, gpus_per_trial=0.5) | |||
| @@ -22,13 +22,16 @@ def train_metatl(conf,checkpoint_dir=None): | |||
| random.seed(SEED) | |||
| params = conf['params'] | |||
| params['batch_size'] = conf['batch_size'] | |||
| params['number_of_neg'] = conf['number_of_neg'] | |||
| user_train, usernum_train, itemnum, user_input_test, user_test, user_input_valid, user_valid = data_load(params['dataset'], params['K']) | |||
| sampler = WarpSampler(user_train, usernum_train, itemnum, batch_size=params['batch_size'], maxlen=params['K'], n_workers=1) | |||
| sampler = WarpSampler(user_train, usernum_train, itemnum, batch_size=params['batch_size'], maxlen=params['K'], n_workers=1,params=params) | |||
| sampler_test = DataLoader(user_input_test, user_test, itemnum, params) | |||
| sampler_valid = DataLoader(user_input_valid, user_valid, itemnum, params) | |||
| ps = { | |||
| "batch_size" : conf["params"]['batch_size'], | |||
| "batch_size" : conf['batch_size'], | |||
| "learning_rate" : conf['learning_rate'], | |||
| "epoch" : conf["params"]['epoch'], | |||
| "beta" : conf['beta'], | |||
| @@ -36,11 +39,14 @@ def train_metatl(conf,checkpoint_dir=None): | |||
| "margin" : conf['margin'], | |||
| "K" : conf["params"]['K'], | |||
| "number_of_neg" : conf["number_of_neg"], | |||
| "loss_function" : conf["loss_function"], | |||
| "eval_epoch" : conf["eval_epoch"], | |||
| "device" : params['device'] | |||
| } | |||
| trainer = Trainer([sampler, sampler_valid, sampler_test], conf["itemnum"], ps) | |||
| # trainer.train() | |||
| if checkpoint_dir: | |||
| print("===================== using checkpoint =====================") | |||
| model_state, optimizer_state = torch.load( | |||
| @@ -48,9 +54,8 @@ def train_metatl(conf,checkpoint_dir=None): | |||
| trainer.MetaTL.load_state_dict(model_state) | |||
| trainer.optimizer.load_state_dict(optimizer_state) | |||
| for epoch in range(int(ps['epoch']/1000)): | |||
| for e in range(1000): | |||
| for epoch in range(int(ps['epoch']/ps['eval_epoch'])): | |||
| for e in range(ps['eval_epoch']): | |||
| # sample one batch from data_loader | |||
| train_task, curr_rel = trainer.train_data_loader.next_batch() | |||
| loss, _, _ = trainer.do_one_step(train_task, iseval=False, curr_rel=curr_rel) | |||
| @@ -58,10 +63,6 @@ def train_metatl(conf,checkpoint_dir=None): | |||
| # do evaluation on specific epoch | |||
| valid_data = trainer.eval(istest=False, epoch=(-1)) | |||
| # print('Epoch {} Testing...'.format(e)) | |||
| # test_data = self.eval(istest=True, epoch=e) | |||
| with tune.checkpoint_dir(epoch) as checkpoint_dir: | |||
| path = os.path.join(checkpoint_dir, "checkpoint") | |||
| torch.save((trainer.MetaTL.state_dict(), trainer.optimizer.state_dict()), path) | |||
| @@ -69,6 +70,6 @@ def train_metatl(conf,checkpoint_dir=None): | |||
| tune.report( | |||
| MRR=valid_data["MRR"], NDCG10=valid_data['NDCG@10'], NDCG5=valid_data["NDCG@5"], NDCG1=valid_data["NDCG@1"], | |||
| Hits10=valid_data["Hits@10"], Hits5=valid_data["Hits@5"], Hits1=valid_data["Hits@1"], | |||
| training_iteration=epoch*1000 | |||
| training_iteration=epoch*ps['eval_epoch'] | |||
| ) | |||
| @@ -34,7 +34,6 @@ def get_params(): | |||
| params['device'] = torch.device('cuda:'+str(args.device)) | |||
| # params['device'] = torch.device('cpu') | |||
| print("gpu:",params['device']) | |||
| return params, args | |||
| @@ -53,7 +53,7 @@ class EmbeddingLearner(nn.Module): | |||
| def bpr_loss(p_scores, n_values,device): | |||
| ratio = int(n_values.shape[1] / p_scores.shape[1]) | |||
| temp_pvalues = torch.tensor([]).cuda(device=device) | |||
| temp_pvalues = torch.tensor([],device=device) | |||
| for i in range(p_scores.shape[1]): | |||
| temp_pvalues = torch.cat((temp_pvalues, p_scores[:, i, None].expand(-1, ratio)), dim=1) | |||
| @@ -66,7 +66,7 @@ def bpr_loss(p_scores, n_values,device): | |||
| def bpr_max_loss(p_scores, n_values,device): | |||
| s = F.softmax(n_values,dim=1) | |||
| ratio = int(n_values.shape[1] / p_scores.shape[1]) | |||
| temp_pvalues = torch.tensor([]).cuda(device=device) | |||
| temp_pvalues = torch.tensor([],device=device) | |||
| for i in range(p_scores.shape[1]): | |||
| temp_pvalues = torch.cat((temp_pvalues,p_scores[:,i,None].expand(-1,ratio)),dim=1) | |||
| @@ -78,7 +78,7 @@ def bpr_max_loss(p_scores, n_values,device): | |||
| def top_loss(p_scores, n_values,device): | |||
| ratio = int(n_values.shape[1] / p_scores.shape[1]) | |||
| temp_pvalues = torch.tensor([]).cuda(device=device) | |||
| temp_pvalues = torch.tensor([],device=device) | |||
| for i in range(p_scores.shape[1]): | |||
| temp_pvalues = torch.cat((temp_pvalues, p_scores[:, i, None].expand(-1, ratio)), dim=1) | |||
| @@ -94,7 +94,8 @@ def top_loss(p_scores, n_values,device): | |||
| class MetaTL(nn.Module): | |||
| def __init__(self, itemnum, parameter): | |||
| super(MetaTL, self).__init__() | |||
| self.device = torch.device(parameter['device']) | |||
| # self.device = torch.device(parameter['device']) | |||
| self.device = parameter['device'] | |||
| self.beta = parameter['beta'] | |||
| # self.dropout_p = parameter['dropout_p'] | |||
| self.embed_dim = parameter['embed_dim'] | |||
| @@ -106,7 +107,7 @@ class MetaTL(nn.Module): | |||
| self.embedding_learner = EmbeddingLearner() | |||
| # self.loss_func = nn.MarginRankingLoss(self.margin) | |||
| self.loss_func = top_loss | |||
| self.loss_func = bpr_loss | |||
| self.rel_q_sharing = dict() | |||
| @@ -138,7 +139,7 @@ class MetaTL(nn.Module): | |||
| p_score, n_score = self.embedding_learner(sup_neg_e1, sup_neg_e2, rel_s, K) | |||
| y = torch.Tensor([1]).to(self.device) | |||
| # y = torch.Tensor([1]).to(self.device) | |||
| self.zero_grad() | |||
| # sorted,indecies = torch.sort(n_score, descending=True,dim=1) | |||
| @@ -21,7 +21,7 @@ def random_neq(l, r, s, user_train,usernum): | |||
| return candid_item | |||
| def sample_function_mixed(user_train, usernum, itemnum, batch_size, maxlen, result_queue, SEED): | |||
| def sample_function_mixed(user_train, usernum, itemnum, batch_size, maxlen, result_queue, SEED,number_of_neg): | |||
| def sample(): | |||
| if random.random()<=1: | |||
| @@ -31,7 +31,7 @@ def sample_function_mixed(user_train, usernum, itemnum, batch_size, maxlen, resu | |||
| seq = np.zeros([maxlen], dtype=np.int32) | |||
| pos = np.zeros([maxlen], dtype=np.int32) | |||
| neg = np.zeros([maxlen*5], dtype=np.int32) | |||
| neg = np.zeros([maxlen*number_of_neg], dtype=np.int32) | |||
| if len(user_train[user]) < maxlen: | |||
| nxt_idx = len(user_train[user]) - 1 | |||
| @@ -62,7 +62,7 @@ def sample_function_mixed(user_train, usernum, itemnum, batch_size, maxlen, resu | |||
| # for idx in range(maxlen*30 - 1): | |||
| # support_negative_triples.append([seq[-1], curr_rel, neg[idx]]) | |||
| for j in range(5): | |||
| for j in range(number_of_neg): | |||
| for idx in range(maxlen-1): | |||
| support_negative_triples.append([seq[idx], curr_rel, neg[j*maxlen + idx]]) | |||
| @@ -116,7 +116,7 @@ def sample_function_mixed(user_train, usernum, itemnum, batch_size, maxlen, resu | |||
| result_queue.put(([support, support_negative, query, negative], curr_rel)) | |||
| class WarpSampler(object): | |||
| def __init__(self, User, usernum, itemnum, batch_size=64, maxlen=10, n_workers=1): | |||
| def __init__(self, User, usernum, itemnum, batch_size=64, maxlen=10, n_workers=1,params = None): | |||
| self.result_queue = Queue(maxsize=n_workers * 10) | |||
| self.processors = [] | |||
| for i in range(n_workers): | |||
| @@ -127,7 +127,8 @@ class WarpSampler(object): | |||
| batch_size, | |||
| maxlen, | |||
| self.result_queue, | |||
| np.random.randint(2e9) | |||
| np.random.randint(2e9), | |||
| params['number_of_neg'] | |||
| ))) | |||
| self.processors[-1].daemon = True | |||
| self.processors[-1].start() | |||
| @@ -20,14 +20,19 @@ class Trainer: | |||
| self.epoch = parameter['epoch'] | |||
| # self.print_epoch = parameter['print_epoch'] | |||
| # self.eval_epoch = parameter['eval_epoch'] | |||
| self.eval_epoch = 1000 | |||
| self.device = torch.device(parameter['device']) | |||
| # self.device = torch.device(parameter['device']) | |||
| self.device = parameter['device'] | |||
| self.MetaTL = MetaTL(itemnum, parameter) | |||
| self.MetaTL.to(self.device) | |||
| self.MetaTL.to(parameter['device']) | |||
| self.optimizer = torch.optim.Adam(self.MetaTL.parameters(), self.learning_rate) | |||
| if parameter['eval_epoch']: | |||
| self.eval_epoch = parameter['eval_epoch'] | |||
| else: | |||
| self.eval_epoch = 1000 | |||
| def rank_predict(self, data, x, ranks): | |||
| # query_idx is the idx of positive score | |||
| @@ -77,7 +82,7 @@ class Trainer: | |||
| # do evaluation on specific epoch | |||
| if e % self.eval_epoch == 0 and e != 0: | |||
| loss_num = loss.item() | |||
| loss_num = loss.detach().item() | |||
| print("Epoch: {}\tLoss: {:.4f}".format(e, loss_num)) | |||
| print('Epoch {} Validating...'.format(e)) | |||
| @@ -134,14 +139,14 @@ class Trainer: | |||
| if istest: | |||
| print("TEST: \t test_loss: ",total_loss.item()) | |||
| print("TEST: \t test_loss: ",total_loss.detach().item()) | |||
| print("TEST: \tMRR: {:.3f}\tNDCG@10: {:.3f}\tNDCG@5: {:.3f}\tNDCG@1: {:.3f}\tHits@10: {:.3f}\tHits@5: {:.3f}\tHits@1: {:.3f}\r".format( | |||
| temp['MRR'], temp['NDCG@10'], temp['NDCG@5'], temp['NDCG@1'], temp['Hits@10'], temp['Hits@5'], temp['Hits@1']),"\n") | |||
| with open('results.txt', 'a') as f: | |||
| f.writelines("TEST: \tMRR: {:.3f}\tNDCG@10: {:.3f}\tNDCG@5: {:.3f}\tNDCG@1: {:.3f}\tHits@10: {:.3f}\tHits@5: {:.3f}\tHits@1: {:.3f}\r\n\n".format( | |||
| temp['MRR'], temp['NDCG@10'], temp['NDCG@5'], temp['NDCG@1'], temp['Hits@10'], temp['Hits@5'], temp['Hits@1'])) | |||
| else: | |||
| print("VALID: \t validation_loss: ", total_loss.item()) | |||
| print("VALID: \t validation_loss: ", total_loss.detach().item() ) | |||
| print("VALID: \tMRR: {:.3f}\tNDCG@10: {:.3f}\tNDCG@5: {:.3f}\tNDCG@1: {:.3f}\tHits@10: {:.3f}\tHits@5: {:.3f}\tHits@1: {:.3f}\r".format( | |||
| temp['MRR'], temp['NDCG@10'], temp['NDCG@5'], temp['NDCG@1'], temp['Hits@10'], temp['Hits@5'], temp['Hits@1'])) | |||
| with open("results.txt",'a') as f: | |||
| @@ -107,6 +107,11 @@ class DataLoader(object): | |||
| self.itemnum = itemnum | |||
| if parameter['number_of_neg']: | |||
| self.number_of_neg = parameter['number_of_neg'] | |||
| else: | |||
| self.number_of_neg = 5 | |||
| def next_one_on_eval(self): | |||
| if self.curr_tri_idx == self.num_tris: | |||
| @@ -118,8 +123,7 @@ class DataLoader(object): | |||
| seq = np.zeros([self.maxlen], dtype=np.int32) | |||
| pos = np.zeros([self.maxlen - 1], dtype=np.int32) | |||
| # neg = np.zeros([self.maxlen*30 - 1], dtype=np.int32) | |||
| neg = np.zeros([self.maxlen * 5], dtype=np.int32) | |||
| neg = np.zeros([self.maxlen * self.number_of_neg], dtype=np.int32) | |||
| idx = self.maxlen - 1 | |||
| @@ -144,7 +148,7 @@ class DataLoader(object): | |||
| # for idx in range(len(neg)): | |||
| # support_negative_triples.append([seq[-1],curr_rel,neg[idx]]) | |||
| for j in range(5): | |||
| for j in range(self.number_of_neg): | |||
| for idx in range(self.maxlen-1): | |||
| support_negative_triples.append([seq[idx], curr_rel, neg[j * self.maxlen + idx]]) | |||