| import os | |||||
| os.environ['CUDA_VISIBLE_DEVICES'] = "1" | |||||
| from ray.tune.schedulers import ASHAScheduler | from ray.tune.schedulers import ASHAScheduler | ||||
| from ray.tune import CLIReporter | from ray.tune import CLIReporter | ||||
| from ray import tune | from ray import tune | ||||
| args.add_argument("-K", "--K", default=3, type=int) #NUMBER OF SHOT | args.add_argument("-K", "--K", default=3, type=int) #NUMBER OF SHOT | ||||
| # args.add_argument("-dim", "--embed_dim", default=100, type=int) | # args.add_argument("-dim", "--embed_dim", default=100, type=int) | ||||
| args.add_argument("-bs", "--batch_size", default=1024, type=int) | |||||
| # args.add_argument("-bs", "--batch_size", default=1024, type=int) | |||||
| # args.add_argument("-lr", "--learning_rate", default=0.001, type=float) | # args.add_argument("-lr", "--learning_rate", default=0.001, type=float) | ||||
| args.add_argument("-epo", "--epoch", default=100000, type=int) | args.add_argument("-epo", "--epoch", default=100000, type=int) | ||||
| for k, v in vars(args).items(): | for k, v in vars(args).items(): | ||||
| params[k] = v | params[k] = v | ||||
| params['device'] = torch.device('cuda:0') | |||||
| # params['device'] = torch.device('cuda:1') | |||||
| params['device'] = 0 | |||||
| return params, args | return params, args | ||||
| def main(num_samples, gpus_per_trial=2): | def main(num_samples, gpus_per_trial=2): | ||||
| print("===============",torch.cuda.device_count(),"=======") | |||||
| params, args = get_params() | params, args = get_params() | ||||
| if params['seed'] is not None: | if params['seed'] is not None: | ||||
| user_train, usernum_train, itemnum, user_input_test, user_test, user_input_valid, user_valid = data_load(args.dataset, args.K) | user_train, usernum_train, itemnum, user_input_test, user_test, user_input_valid, user_valid = data_load(args.dataset, args.K) | ||||
| batch_size = params['batch_size'] | |||||
| # batch_size = params['batch_size'] | |||||
| # sampler = WarpSampler(user_train, usernum_train, itemnum, batch_size=batch_size, maxlen=args.K, n_workers=1) | # sampler = WarpSampler(user_train, usernum_train, itemnum, batch_size=batch_size, maxlen=args.K, n_workers=1) | ||||
| # sampler_test = DataLoader(user_input_test, user_test, itemnum, params) | # sampler_test = DataLoader(user_input_test, user_test, itemnum, params) | ||||
| # sampler_valid = DataLoader(user_input_valid, user_valid, itemnum, params) | # sampler_valid = DataLoader(user_input_valid, user_valid, itemnum, params) | ||||
| "beta" : tune.choice([0.05,0.1,1,4,4.5,5,5.5,6,10]), | "beta" : tune.choice([0.05,0.1,1,4,4.5,5,5.5,6,10]), | ||||
| "margin" : tune.choice([1,0.9,0.8,1.1,1.2]), | "margin" : tune.choice([1,0.9,0.8,1.1,1.2]), | ||||
| # "sampler":sampler, | |||||
| # "sampler_test":sampler_test, | |||||
| # "sampler_valid":sampler_valid, | |||||
| "batch_size" : tune.choice([128,256,512,1024,2048]), | |||||
| "number_of_neg" : tune.choice([1,3,5,7,10,20,30,50,70]), | |||||
| "loss_function" : tune.choice(["bpr"]), | |||||
| "eval_epoch" : tune.choice([100,250,500,1000,1500]), | |||||
| 'device' : params['device'], | |||||
| "itemnum":itemnum, | "itemnum":itemnum, | ||||
| "params":params, | "params":params, | ||||
| } | } | ||||
| print("===============", torch.cuda.device_count(), "=======") | |||||
| scheduler = ASHAScheduler( | scheduler = ASHAScheduler( | ||||
| metric="MRR", | metric="MRR", | ||||
| mode="max", | mode="max", | ||||
| log_to_file=True, | log_to_file=True, | ||||
| # resume=True, | # resume=True, | ||||
| local_dir="/media/external_10TB/10TB/maheri/metaTL_ray/ray_local_dir", | local_dir="/media/external_10TB/10TB/maheri/metaTL_ray/ray_local_dir", | ||||
| name="metatl_rnn1", | |||||
| name="bpr_rnn", | |||||
| ) | ) | ||||
| best_trial = result.get_best_trial("MRR", "max", "last") | best_trial = result.get_best_trial("MRR", "max", "last") | ||||
| print(result.results_df) | print(result.results_df) | ||||
| print("=======================================================\n") | print("=======================================================\n") | ||||
| # best_trained_model = Net(best_trial.config["l1"], best_trial.config["l2"]) | |||||
| # device = "cpu" | |||||
| # if torch.cuda.is_available(): | |||||
| # device = "cuda:0" | |||||
| # if gpus_per_trial > 1: | |||||
| # best_trained_model = nn.DataParallel(best_trained_model) | |||||
| # best_trained_model.to(device) | |||||
| # | |||||
| # best_checkpoint_dir = best_trial.checkpoint.value | |||||
| # model_state, optimizer_state = torch.load(os.path.join( | |||||
| # best_checkpoint_dir, "checkpoint")) | |||||
| # best_trained_model.load_state_dict(model_state) | |||||
| # | |||||
| # test_acc = test_accuracy(best_trained_model, device) | |||||
| # print("Best trial test set accuracy: {}".format(test_acc)) | |||||
| if __name__ == "__main__": | if __name__ == "__main__": | ||||
| # You can change the number of GPUs per trial here: | # You can change the number of GPUs per trial here: | ||||
| main(num_samples=150, gpus_per_trial=1) | |||||
| main(num_samples=150, gpus_per_trial=0.5) |
| random.seed(SEED) | random.seed(SEED) | ||||
| params = conf['params'] | params = conf['params'] | ||||
| params['batch_size'] = conf['batch_size'] | |||||
| params['number_of_neg'] = conf['number_of_neg'] | |||||
| user_train, usernum_train, itemnum, user_input_test, user_test, user_input_valid, user_valid = data_load(params['dataset'], params['K']) | user_train, usernum_train, itemnum, user_input_test, user_test, user_input_valid, user_valid = data_load(params['dataset'], params['K']) | ||||
| sampler = WarpSampler(user_train, usernum_train, itemnum, batch_size=params['batch_size'], maxlen=params['K'], n_workers=1) | |||||
| sampler = WarpSampler(user_train, usernum_train, itemnum, batch_size=params['batch_size'], maxlen=params['K'], n_workers=1,params=params) | |||||
| sampler_test = DataLoader(user_input_test, user_test, itemnum, params) | sampler_test = DataLoader(user_input_test, user_test, itemnum, params) | ||||
| sampler_valid = DataLoader(user_input_valid, user_valid, itemnum, params) | sampler_valid = DataLoader(user_input_valid, user_valid, itemnum, params) | ||||
| ps = { | ps = { | ||||
| "batch_size" : conf["params"]['batch_size'], | |||||
| "batch_size" : conf['batch_size'], | |||||
| "learning_rate" : conf['learning_rate'], | "learning_rate" : conf['learning_rate'], | ||||
| "epoch" : conf["params"]['epoch'], | "epoch" : conf["params"]['epoch'], | ||||
| "beta" : conf['beta'], | "beta" : conf['beta'], | ||||
| "margin" : conf['margin'], | "margin" : conf['margin'], | ||||
| "K" : conf["params"]['K'], | "K" : conf["params"]['K'], | ||||
| "number_of_neg" : conf["number_of_neg"], | |||||
| "loss_function" : conf["loss_function"], | |||||
| "eval_epoch" : conf["eval_epoch"], | |||||
| "device" : params['device'] | |||||
| } | } | ||||
| trainer = Trainer([sampler, sampler_valid, sampler_test], conf["itemnum"], ps) | trainer = Trainer([sampler, sampler_valid, sampler_test], conf["itemnum"], ps) | ||||
| # trainer.train() | |||||
| if checkpoint_dir: | if checkpoint_dir: | ||||
| print("===================== using checkpoint =====================") | print("===================== using checkpoint =====================") | ||||
| model_state, optimizer_state = torch.load( | model_state, optimizer_state = torch.load( | ||||
| trainer.MetaTL.load_state_dict(model_state) | trainer.MetaTL.load_state_dict(model_state) | ||||
| trainer.optimizer.load_state_dict(optimizer_state) | trainer.optimizer.load_state_dict(optimizer_state) | ||||
| for epoch in range(int(ps['epoch']/1000)): | |||||
| for e in range(1000): | |||||
| for epoch in range(int(ps['epoch']/ps['eval_epoch'])): | |||||
| for e in range(ps['eval_epoch']): | |||||
| # sample one batch from data_loader | # sample one batch from data_loader | ||||
| train_task, curr_rel = trainer.train_data_loader.next_batch() | train_task, curr_rel = trainer.train_data_loader.next_batch() | ||||
| loss, _, _ = trainer.do_one_step(train_task, iseval=False, curr_rel=curr_rel) | loss, _, _ = trainer.do_one_step(train_task, iseval=False, curr_rel=curr_rel) | ||||
| # do evaluation on specific epoch | # do evaluation on specific epoch | ||||
| valid_data = trainer.eval(istest=False, epoch=(-1)) | valid_data = trainer.eval(istest=False, epoch=(-1)) | ||||
| # print('Epoch {} Testing...'.format(e)) | |||||
| # test_data = self.eval(istest=True, epoch=e) | |||||
| with tune.checkpoint_dir(epoch) as checkpoint_dir: | with tune.checkpoint_dir(epoch) as checkpoint_dir: | ||||
| path = os.path.join(checkpoint_dir, "checkpoint") | path = os.path.join(checkpoint_dir, "checkpoint") | ||||
| torch.save((trainer.MetaTL.state_dict(), trainer.optimizer.state_dict()), path) | torch.save((trainer.MetaTL.state_dict(), trainer.optimizer.state_dict()), path) | ||||
| tune.report( | tune.report( | ||||
| MRR=valid_data["MRR"], NDCG10=valid_data['NDCG@10'], NDCG5=valid_data["NDCG@5"], NDCG1=valid_data["NDCG@1"], | MRR=valid_data["MRR"], NDCG10=valid_data['NDCG@10'], NDCG5=valid_data["NDCG@5"], NDCG1=valid_data["NDCG@1"], | ||||
| Hits10=valid_data["Hits@10"], Hits5=valid_data["Hits@5"], Hits1=valid_data["Hits@1"], | Hits10=valid_data["Hits@10"], Hits5=valid_data["Hits@5"], Hits1=valid_data["Hits@1"], | ||||
| training_iteration=epoch*1000 | |||||
| training_iteration=epoch*ps['eval_epoch'] | |||||
| ) | ) | ||||
| params['device'] = torch.device('cuda:'+str(args.device)) | params['device'] = torch.device('cuda:'+str(args.device)) | ||||
| # params['device'] = torch.device('cpu') | # params['device'] = torch.device('cpu') | ||||
| print("gpu:",params['device']) | |||||
| return params, args | return params, args | ||||
| def bpr_loss(p_scores, n_values,device): | def bpr_loss(p_scores, n_values,device): | ||||
| ratio = int(n_values.shape[1] / p_scores.shape[1]) | ratio = int(n_values.shape[1] / p_scores.shape[1]) | ||||
| temp_pvalues = torch.tensor([]).cuda(device=device) | |||||
| temp_pvalues = torch.tensor([],device=device) | |||||
| for i in range(p_scores.shape[1]): | for i in range(p_scores.shape[1]): | ||||
| temp_pvalues = torch.cat((temp_pvalues, p_scores[:, i, None].expand(-1, ratio)), dim=1) | temp_pvalues = torch.cat((temp_pvalues, p_scores[:, i, None].expand(-1, ratio)), dim=1) | ||||
| def bpr_max_loss(p_scores, n_values,device): | def bpr_max_loss(p_scores, n_values,device): | ||||
| s = F.softmax(n_values,dim=1) | s = F.softmax(n_values,dim=1) | ||||
| ratio = int(n_values.shape[1] / p_scores.shape[1]) | ratio = int(n_values.shape[1] / p_scores.shape[1]) | ||||
| temp_pvalues = torch.tensor([]).cuda(device=device) | |||||
| temp_pvalues = torch.tensor([],device=device) | |||||
| for i in range(p_scores.shape[1]): | for i in range(p_scores.shape[1]): | ||||
| temp_pvalues = torch.cat((temp_pvalues,p_scores[:,i,None].expand(-1,ratio)),dim=1) | temp_pvalues = torch.cat((temp_pvalues,p_scores[:,i,None].expand(-1,ratio)),dim=1) | ||||
| def top_loss(p_scores, n_values,device): | def top_loss(p_scores, n_values,device): | ||||
| ratio = int(n_values.shape[1] / p_scores.shape[1]) | ratio = int(n_values.shape[1] / p_scores.shape[1]) | ||||
| temp_pvalues = torch.tensor([]).cuda(device=device) | |||||
| temp_pvalues = torch.tensor([],device=device) | |||||
| for i in range(p_scores.shape[1]): | for i in range(p_scores.shape[1]): | ||||
| temp_pvalues = torch.cat((temp_pvalues, p_scores[:, i, None].expand(-1, ratio)), dim=1) | temp_pvalues = torch.cat((temp_pvalues, p_scores[:, i, None].expand(-1, ratio)), dim=1) | ||||
| class MetaTL(nn.Module): | class MetaTL(nn.Module): | ||||
| def __init__(self, itemnum, parameter): | def __init__(self, itemnum, parameter): | ||||
| super(MetaTL, self).__init__() | super(MetaTL, self).__init__() | ||||
| self.device = torch.device(parameter['device']) | |||||
| # self.device = torch.device(parameter['device']) | |||||
| self.device = parameter['device'] | |||||
| self.beta = parameter['beta'] | self.beta = parameter['beta'] | ||||
| # self.dropout_p = parameter['dropout_p'] | # self.dropout_p = parameter['dropout_p'] | ||||
| self.embed_dim = parameter['embed_dim'] | self.embed_dim = parameter['embed_dim'] | ||||
| self.embedding_learner = EmbeddingLearner() | self.embedding_learner = EmbeddingLearner() | ||||
| # self.loss_func = nn.MarginRankingLoss(self.margin) | # self.loss_func = nn.MarginRankingLoss(self.margin) | ||||
| self.loss_func = top_loss | |||||
| self.loss_func = bpr_loss | |||||
| self.rel_q_sharing = dict() | self.rel_q_sharing = dict() | ||||
| p_score, n_score = self.embedding_learner(sup_neg_e1, sup_neg_e2, rel_s, K) | p_score, n_score = self.embedding_learner(sup_neg_e1, sup_neg_e2, rel_s, K) | ||||
| y = torch.Tensor([1]).to(self.device) | |||||
| # y = torch.Tensor([1]).to(self.device) | |||||
| self.zero_grad() | self.zero_grad() | ||||
| # sorted,indecies = torch.sort(n_score, descending=True,dim=1) | # sorted,indecies = torch.sort(n_score, descending=True,dim=1) |
| return candid_item | return candid_item | ||||
| def sample_function_mixed(user_train, usernum, itemnum, batch_size, maxlen, result_queue, SEED): | |||||
| def sample_function_mixed(user_train, usernum, itemnum, batch_size, maxlen, result_queue, SEED,number_of_neg): | |||||
| def sample(): | def sample(): | ||||
| if random.random()<=1: | if random.random()<=1: | ||||
| seq = np.zeros([maxlen], dtype=np.int32) | seq = np.zeros([maxlen], dtype=np.int32) | ||||
| pos = np.zeros([maxlen], dtype=np.int32) | pos = np.zeros([maxlen], dtype=np.int32) | ||||
| neg = np.zeros([maxlen*5], dtype=np.int32) | |||||
| neg = np.zeros([maxlen*number_of_neg], dtype=np.int32) | |||||
| if len(user_train[user]) < maxlen: | if len(user_train[user]) < maxlen: | ||||
| nxt_idx = len(user_train[user]) - 1 | nxt_idx = len(user_train[user]) - 1 | ||||
| # for idx in range(maxlen*30 - 1): | # for idx in range(maxlen*30 - 1): | ||||
| # support_negative_triples.append([seq[-1], curr_rel, neg[idx]]) | # support_negative_triples.append([seq[-1], curr_rel, neg[idx]]) | ||||
| for j in range(5): | |||||
| for j in range(number_of_neg): | |||||
| for idx in range(maxlen-1): | for idx in range(maxlen-1): | ||||
| support_negative_triples.append([seq[idx], curr_rel, neg[j*maxlen + idx]]) | support_negative_triples.append([seq[idx], curr_rel, neg[j*maxlen + idx]]) | ||||
| result_queue.put(([support, support_negative, query, negative], curr_rel)) | result_queue.put(([support, support_negative, query, negative], curr_rel)) | ||||
| class WarpSampler(object): | class WarpSampler(object): | ||||
| def __init__(self, User, usernum, itemnum, batch_size=64, maxlen=10, n_workers=1): | |||||
| def __init__(self, User, usernum, itemnum, batch_size=64, maxlen=10, n_workers=1,params = None): | |||||
| self.result_queue = Queue(maxsize=n_workers * 10) | self.result_queue = Queue(maxsize=n_workers * 10) | ||||
| self.processors = [] | self.processors = [] | ||||
| for i in range(n_workers): | for i in range(n_workers): | ||||
| batch_size, | batch_size, | ||||
| maxlen, | maxlen, | ||||
| self.result_queue, | self.result_queue, | ||||
| np.random.randint(2e9) | |||||
| np.random.randint(2e9), | |||||
| params['number_of_neg'] | |||||
| ))) | ))) | ||||
| self.processors[-1].daemon = True | self.processors[-1].daemon = True | ||||
| self.processors[-1].start() | self.processors[-1].start() |
| self.epoch = parameter['epoch'] | self.epoch = parameter['epoch'] | ||||
| # self.print_epoch = parameter['print_epoch'] | # self.print_epoch = parameter['print_epoch'] | ||||
| # self.eval_epoch = parameter['eval_epoch'] | # self.eval_epoch = parameter['eval_epoch'] | ||||
| self.eval_epoch = 1000 | |||||
| self.device = torch.device(parameter['device']) | |||||
| # self.device = torch.device(parameter['device']) | |||||
| self.device = parameter['device'] | |||||
| self.MetaTL = MetaTL(itemnum, parameter) | self.MetaTL = MetaTL(itemnum, parameter) | ||||
| self.MetaTL.to(self.device) | |||||
| self.MetaTL.to(parameter['device']) | |||||
| self.optimizer = torch.optim.Adam(self.MetaTL.parameters(), self.learning_rate) | self.optimizer = torch.optim.Adam(self.MetaTL.parameters(), self.learning_rate) | ||||
| if parameter['eval_epoch']: | |||||
| self.eval_epoch = parameter['eval_epoch'] | |||||
| else: | |||||
| self.eval_epoch = 1000 | |||||
| def rank_predict(self, data, x, ranks): | def rank_predict(self, data, x, ranks): | ||||
| # query_idx is the idx of positive score | # query_idx is the idx of positive score | ||||
| # do evaluation on specific epoch | # do evaluation on specific epoch | ||||
| if e % self.eval_epoch == 0 and e != 0: | if e % self.eval_epoch == 0 and e != 0: | ||||
| loss_num = loss.item() | |||||
| loss_num = loss.detach().item() | |||||
| print("Epoch: {}\tLoss: {:.4f}".format(e, loss_num)) | print("Epoch: {}\tLoss: {:.4f}".format(e, loss_num)) | ||||
| print('Epoch {} Validating...'.format(e)) | print('Epoch {} Validating...'.format(e)) | ||||
| if istest: | if istest: | ||||
| print("TEST: \t test_loss: ",total_loss.item()) | |||||
| print("TEST: \t test_loss: ",total_loss.detach().item()) | |||||
| print("TEST: \tMRR: {:.3f}\tNDCG@10: {:.3f}\tNDCG@5: {:.3f}\tNDCG@1: {:.3f}\tHits@10: {:.3f}\tHits@5: {:.3f}\tHits@1: {:.3f}\r".format( | print("TEST: \tMRR: {:.3f}\tNDCG@10: {:.3f}\tNDCG@5: {:.3f}\tNDCG@1: {:.3f}\tHits@10: {:.3f}\tHits@5: {:.3f}\tHits@1: {:.3f}\r".format( | ||||
| temp['MRR'], temp['NDCG@10'], temp['NDCG@5'], temp['NDCG@1'], temp['Hits@10'], temp['Hits@5'], temp['Hits@1']),"\n") | temp['MRR'], temp['NDCG@10'], temp['NDCG@5'], temp['NDCG@1'], temp['Hits@10'], temp['Hits@5'], temp['Hits@1']),"\n") | ||||
| with open('results.txt', 'a') as f: | with open('results.txt', 'a') as f: | ||||
| f.writelines("TEST: \tMRR: {:.3f}\tNDCG@10: {:.3f}\tNDCG@5: {:.3f}\tNDCG@1: {:.3f}\tHits@10: {:.3f}\tHits@5: {:.3f}\tHits@1: {:.3f}\r\n\n".format( | f.writelines("TEST: \tMRR: {:.3f}\tNDCG@10: {:.3f}\tNDCG@5: {:.3f}\tNDCG@1: {:.3f}\tHits@10: {:.3f}\tHits@5: {:.3f}\tHits@1: {:.3f}\r\n\n".format( | ||||
| temp['MRR'], temp['NDCG@10'], temp['NDCG@5'], temp['NDCG@1'], temp['Hits@10'], temp['Hits@5'], temp['Hits@1'])) | temp['MRR'], temp['NDCG@10'], temp['NDCG@5'], temp['NDCG@1'], temp['Hits@10'], temp['Hits@5'], temp['Hits@1'])) | ||||
| else: | else: | ||||
| print("VALID: \t validation_loss: ", total_loss.item()) | |||||
| print("VALID: \t validation_loss: ", total_loss.detach().item() ) | |||||
| print("VALID: \tMRR: {:.3f}\tNDCG@10: {:.3f}\tNDCG@5: {:.3f}\tNDCG@1: {:.3f}\tHits@10: {:.3f}\tHits@5: {:.3f}\tHits@1: {:.3f}\r".format( | print("VALID: \tMRR: {:.3f}\tNDCG@10: {:.3f}\tNDCG@5: {:.3f}\tNDCG@1: {:.3f}\tHits@10: {:.3f}\tHits@5: {:.3f}\tHits@1: {:.3f}\r".format( | ||||
| temp['MRR'], temp['NDCG@10'], temp['NDCG@5'], temp['NDCG@1'], temp['Hits@10'], temp['Hits@5'], temp['Hits@1'])) | temp['MRR'], temp['NDCG@10'], temp['NDCG@5'], temp['NDCG@1'], temp['Hits@10'], temp['Hits@5'], temp['Hits@1'])) | ||||
| with open("results.txt",'a') as f: | with open("results.txt",'a') as f: |
| self.itemnum = itemnum | self.itemnum = itemnum | ||||
| if parameter['number_of_neg']: | |||||
| self.number_of_neg = parameter['number_of_neg'] | |||||
| else: | |||||
| self.number_of_neg = 5 | |||||
| def next_one_on_eval(self): | def next_one_on_eval(self): | ||||
| if self.curr_tri_idx == self.num_tris: | if self.curr_tri_idx == self.num_tris: | ||||
| seq = np.zeros([self.maxlen], dtype=np.int32) | seq = np.zeros([self.maxlen], dtype=np.int32) | ||||
| pos = np.zeros([self.maxlen - 1], dtype=np.int32) | pos = np.zeros([self.maxlen - 1], dtype=np.int32) | ||||
| # neg = np.zeros([self.maxlen*30 - 1], dtype=np.int32) | |||||
| neg = np.zeros([self.maxlen * 5], dtype=np.int32) | |||||
| neg = np.zeros([self.maxlen * self.number_of_neg], dtype=np.int32) | |||||
| idx = self.maxlen - 1 | idx = self.maxlen - 1 | ||||
| # for idx in range(len(neg)): | # for idx in range(len(neg)): | ||||
| # support_negative_triples.append([seq[-1],curr_rel,neg[idx]]) | # support_negative_triples.append([seq[-1],curr_rel,neg[idx]]) | ||||
| for j in range(5): | |||||
| for j in range(self.number_of_neg): | |||||
| for idx in range(self.maxlen-1): | for idx in range(self.maxlen-1): | ||||
| support_negative_triples.append([seq[idx], curr_rel, neg[j * self.maxlen + idx]]) | support_negative_triples.append([seq[idx], curr_rel, neg[j * self.maxlen + idx]]) | ||||