import os | |||||
os.environ['CUDA_VISIBLE_DEVICES'] = "1" | |||||
from ray.tune.schedulers import ASHAScheduler | from ray.tune.schedulers import ASHAScheduler | ||||
from ray.tune import CLIReporter | from ray.tune import CLIReporter | ||||
from ray import tune | from ray import tune | ||||
args.add_argument("-K", "--K", default=3, type=int) #NUMBER OF SHOT | args.add_argument("-K", "--K", default=3, type=int) #NUMBER OF SHOT | ||||
# args.add_argument("-dim", "--embed_dim", default=100, type=int) | # args.add_argument("-dim", "--embed_dim", default=100, type=int) | ||||
args.add_argument("-bs", "--batch_size", default=1024, type=int) | |||||
# args.add_argument("-bs", "--batch_size", default=1024, type=int) | |||||
# args.add_argument("-lr", "--learning_rate", default=0.001, type=float) | # args.add_argument("-lr", "--learning_rate", default=0.001, type=float) | ||||
args.add_argument("-epo", "--epoch", default=100000, type=int) | args.add_argument("-epo", "--epoch", default=100000, type=int) | ||||
for k, v in vars(args).items(): | for k, v in vars(args).items(): | ||||
params[k] = v | params[k] = v | ||||
params['device'] = torch.device('cuda:0') | |||||
# params['device'] = torch.device('cuda:1') | |||||
params['device'] = 0 | |||||
return params, args | return params, args | ||||
def main(num_samples, gpus_per_trial=2): | def main(num_samples, gpus_per_trial=2): | ||||
print("===============",torch.cuda.device_count(),"=======") | |||||
params, args = get_params() | params, args = get_params() | ||||
if params['seed'] is not None: | if params['seed'] is not None: | ||||
user_train, usernum_train, itemnum, user_input_test, user_test, user_input_valid, user_valid = data_load(args.dataset, args.K) | user_train, usernum_train, itemnum, user_input_test, user_test, user_input_valid, user_valid = data_load(args.dataset, args.K) | ||||
batch_size = params['batch_size'] | |||||
# batch_size = params['batch_size'] | |||||
# sampler = WarpSampler(user_train, usernum_train, itemnum, batch_size=batch_size, maxlen=args.K, n_workers=1) | # sampler = WarpSampler(user_train, usernum_train, itemnum, batch_size=batch_size, maxlen=args.K, n_workers=1) | ||||
# sampler_test = DataLoader(user_input_test, user_test, itemnum, params) | # sampler_test = DataLoader(user_input_test, user_test, itemnum, params) | ||||
# sampler_valid = DataLoader(user_input_valid, user_valid, itemnum, params) | # sampler_valid = DataLoader(user_input_valid, user_valid, itemnum, params) | ||||
"beta" : tune.choice([0.05,0.1,1,4,4.5,5,5.5,6,10]), | "beta" : tune.choice([0.05,0.1,1,4,4.5,5,5.5,6,10]), | ||||
"margin" : tune.choice([1,0.9,0.8,1.1,1.2]), | "margin" : tune.choice([1,0.9,0.8,1.1,1.2]), | ||||
# "sampler":sampler, | |||||
# "sampler_test":sampler_test, | |||||
# "sampler_valid":sampler_valid, | |||||
"batch_size" : tune.choice([128,256,512,1024,2048]), | |||||
"number_of_neg" : tune.choice([1,3,5,7,10,20,30,50,70]), | |||||
"loss_function" : tune.choice(["bpr"]), | |||||
"eval_epoch" : tune.choice([100,250,500,1000,1500]), | |||||
'device' : params['device'], | |||||
"itemnum":itemnum, | "itemnum":itemnum, | ||||
"params":params, | "params":params, | ||||
} | } | ||||
print("===============", torch.cuda.device_count(), "=======") | |||||
scheduler = ASHAScheduler( | scheduler = ASHAScheduler( | ||||
metric="MRR", | metric="MRR", | ||||
mode="max", | mode="max", | ||||
log_to_file=True, | log_to_file=True, | ||||
# resume=True, | # resume=True, | ||||
local_dir="/media/external_10TB/10TB/maheri/metaTL_ray/ray_local_dir", | local_dir="/media/external_10TB/10TB/maheri/metaTL_ray/ray_local_dir", | ||||
name="metatl_rnn1", | |||||
name="bpr_rnn", | |||||
) | ) | ||||
best_trial = result.get_best_trial("MRR", "max", "last") | best_trial = result.get_best_trial("MRR", "max", "last") | ||||
print(result.results_df) | print(result.results_df) | ||||
print("=======================================================\n") | print("=======================================================\n") | ||||
# best_trained_model = Net(best_trial.config["l1"], best_trial.config["l2"]) | |||||
# device = "cpu" | |||||
# if torch.cuda.is_available(): | |||||
# device = "cuda:0" | |||||
# if gpus_per_trial > 1: | |||||
# best_trained_model = nn.DataParallel(best_trained_model) | |||||
# best_trained_model.to(device) | |||||
# | |||||
# best_checkpoint_dir = best_trial.checkpoint.value | |||||
# model_state, optimizer_state = torch.load(os.path.join( | |||||
# best_checkpoint_dir, "checkpoint")) | |||||
# best_trained_model.load_state_dict(model_state) | |||||
# | |||||
# test_acc = test_accuracy(best_trained_model, device) | |||||
# print("Best trial test set accuracy: {}".format(test_acc)) | |||||
if __name__ == "__main__": | if __name__ == "__main__": | ||||
# You can change the number of GPUs per trial here: | # You can change the number of GPUs per trial here: | ||||
main(num_samples=150, gpus_per_trial=1) | |||||
main(num_samples=150, gpus_per_trial=0.5) |
random.seed(SEED) | random.seed(SEED) | ||||
params = conf['params'] | params = conf['params'] | ||||
params['batch_size'] = conf['batch_size'] | |||||
params['number_of_neg'] = conf['number_of_neg'] | |||||
user_train, usernum_train, itemnum, user_input_test, user_test, user_input_valid, user_valid = data_load(params['dataset'], params['K']) | user_train, usernum_train, itemnum, user_input_test, user_test, user_input_valid, user_valid = data_load(params['dataset'], params['K']) | ||||
sampler = WarpSampler(user_train, usernum_train, itemnum, batch_size=params['batch_size'], maxlen=params['K'], n_workers=1) | |||||
sampler = WarpSampler(user_train, usernum_train, itemnum, batch_size=params['batch_size'], maxlen=params['K'], n_workers=1,params=params) | |||||
sampler_test = DataLoader(user_input_test, user_test, itemnum, params) | sampler_test = DataLoader(user_input_test, user_test, itemnum, params) | ||||
sampler_valid = DataLoader(user_input_valid, user_valid, itemnum, params) | sampler_valid = DataLoader(user_input_valid, user_valid, itemnum, params) | ||||
ps = { | ps = { | ||||
"batch_size" : conf["params"]['batch_size'], | |||||
"batch_size" : conf['batch_size'], | |||||
"learning_rate" : conf['learning_rate'], | "learning_rate" : conf['learning_rate'], | ||||
"epoch" : conf["params"]['epoch'], | "epoch" : conf["params"]['epoch'], | ||||
"beta" : conf['beta'], | "beta" : conf['beta'], | ||||
"margin" : conf['margin'], | "margin" : conf['margin'], | ||||
"K" : conf["params"]['K'], | "K" : conf["params"]['K'], | ||||
"number_of_neg" : conf["number_of_neg"], | |||||
"loss_function" : conf["loss_function"], | |||||
"eval_epoch" : conf["eval_epoch"], | |||||
"device" : params['device'] | |||||
} | } | ||||
trainer = Trainer([sampler, sampler_valid, sampler_test], conf["itemnum"], ps) | trainer = Trainer([sampler, sampler_valid, sampler_test], conf["itemnum"], ps) | ||||
# trainer.train() | |||||
if checkpoint_dir: | if checkpoint_dir: | ||||
print("===================== using checkpoint =====================") | print("===================== using checkpoint =====================") | ||||
model_state, optimizer_state = torch.load( | model_state, optimizer_state = torch.load( | ||||
trainer.MetaTL.load_state_dict(model_state) | trainer.MetaTL.load_state_dict(model_state) | ||||
trainer.optimizer.load_state_dict(optimizer_state) | trainer.optimizer.load_state_dict(optimizer_state) | ||||
for epoch in range(int(ps['epoch']/1000)): | |||||
for e in range(1000): | |||||
for epoch in range(int(ps['epoch']/ps['eval_epoch'])): | |||||
for e in range(ps['eval_epoch']): | |||||
# sample one batch from data_loader | # sample one batch from data_loader | ||||
train_task, curr_rel = trainer.train_data_loader.next_batch() | train_task, curr_rel = trainer.train_data_loader.next_batch() | ||||
loss, _, _ = trainer.do_one_step(train_task, iseval=False, curr_rel=curr_rel) | loss, _, _ = trainer.do_one_step(train_task, iseval=False, curr_rel=curr_rel) | ||||
# do evaluation on specific epoch | # do evaluation on specific epoch | ||||
valid_data = trainer.eval(istest=False, epoch=(-1)) | valid_data = trainer.eval(istest=False, epoch=(-1)) | ||||
# print('Epoch {} Testing...'.format(e)) | |||||
# test_data = self.eval(istest=True, epoch=e) | |||||
with tune.checkpoint_dir(epoch) as checkpoint_dir: | with tune.checkpoint_dir(epoch) as checkpoint_dir: | ||||
path = os.path.join(checkpoint_dir, "checkpoint") | path = os.path.join(checkpoint_dir, "checkpoint") | ||||
torch.save((trainer.MetaTL.state_dict(), trainer.optimizer.state_dict()), path) | torch.save((trainer.MetaTL.state_dict(), trainer.optimizer.state_dict()), path) | ||||
tune.report( | tune.report( | ||||
MRR=valid_data["MRR"], NDCG10=valid_data['NDCG@10'], NDCG5=valid_data["NDCG@5"], NDCG1=valid_data["NDCG@1"], | MRR=valid_data["MRR"], NDCG10=valid_data['NDCG@10'], NDCG5=valid_data["NDCG@5"], NDCG1=valid_data["NDCG@1"], | ||||
Hits10=valid_data["Hits@10"], Hits5=valid_data["Hits@5"], Hits1=valid_data["Hits@1"], | Hits10=valid_data["Hits@10"], Hits5=valid_data["Hits@5"], Hits1=valid_data["Hits@1"], | ||||
training_iteration=epoch*1000 | |||||
training_iteration=epoch*ps['eval_epoch'] | |||||
) | ) | ||||
params['device'] = torch.device('cuda:'+str(args.device)) | params['device'] = torch.device('cuda:'+str(args.device)) | ||||
# params['device'] = torch.device('cpu') | # params['device'] = torch.device('cpu') | ||||
print("gpu:",params['device']) | |||||
return params, args | return params, args | ||||
def bpr_loss(p_scores, n_values,device): | def bpr_loss(p_scores, n_values,device): | ||||
ratio = int(n_values.shape[1] / p_scores.shape[1]) | ratio = int(n_values.shape[1] / p_scores.shape[1]) | ||||
temp_pvalues = torch.tensor([]).cuda(device=device) | |||||
temp_pvalues = torch.tensor([],device=device) | |||||
for i in range(p_scores.shape[1]): | for i in range(p_scores.shape[1]): | ||||
temp_pvalues = torch.cat((temp_pvalues, p_scores[:, i, None].expand(-1, ratio)), dim=1) | temp_pvalues = torch.cat((temp_pvalues, p_scores[:, i, None].expand(-1, ratio)), dim=1) | ||||
def bpr_max_loss(p_scores, n_values,device): | def bpr_max_loss(p_scores, n_values,device): | ||||
s = F.softmax(n_values,dim=1) | s = F.softmax(n_values,dim=1) | ||||
ratio = int(n_values.shape[1] / p_scores.shape[1]) | ratio = int(n_values.shape[1] / p_scores.shape[1]) | ||||
temp_pvalues = torch.tensor([]).cuda(device=device) | |||||
temp_pvalues = torch.tensor([],device=device) | |||||
for i in range(p_scores.shape[1]): | for i in range(p_scores.shape[1]): | ||||
temp_pvalues = torch.cat((temp_pvalues,p_scores[:,i,None].expand(-1,ratio)),dim=1) | temp_pvalues = torch.cat((temp_pvalues,p_scores[:,i,None].expand(-1,ratio)),dim=1) | ||||
def top_loss(p_scores, n_values,device): | def top_loss(p_scores, n_values,device): | ||||
ratio = int(n_values.shape[1] / p_scores.shape[1]) | ratio = int(n_values.shape[1] / p_scores.shape[1]) | ||||
temp_pvalues = torch.tensor([]).cuda(device=device) | |||||
temp_pvalues = torch.tensor([],device=device) | |||||
for i in range(p_scores.shape[1]): | for i in range(p_scores.shape[1]): | ||||
temp_pvalues = torch.cat((temp_pvalues, p_scores[:, i, None].expand(-1, ratio)), dim=1) | temp_pvalues = torch.cat((temp_pvalues, p_scores[:, i, None].expand(-1, ratio)), dim=1) | ||||
class MetaTL(nn.Module): | class MetaTL(nn.Module): | ||||
def __init__(self, itemnum, parameter): | def __init__(self, itemnum, parameter): | ||||
super(MetaTL, self).__init__() | super(MetaTL, self).__init__() | ||||
self.device = torch.device(parameter['device']) | |||||
# self.device = torch.device(parameter['device']) | |||||
self.device = parameter['device'] | |||||
self.beta = parameter['beta'] | self.beta = parameter['beta'] | ||||
# self.dropout_p = parameter['dropout_p'] | # self.dropout_p = parameter['dropout_p'] | ||||
self.embed_dim = parameter['embed_dim'] | self.embed_dim = parameter['embed_dim'] | ||||
self.embedding_learner = EmbeddingLearner() | self.embedding_learner = EmbeddingLearner() | ||||
# self.loss_func = nn.MarginRankingLoss(self.margin) | # self.loss_func = nn.MarginRankingLoss(self.margin) | ||||
self.loss_func = top_loss | |||||
self.loss_func = bpr_loss | |||||
self.rel_q_sharing = dict() | self.rel_q_sharing = dict() | ||||
p_score, n_score = self.embedding_learner(sup_neg_e1, sup_neg_e2, rel_s, K) | p_score, n_score = self.embedding_learner(sup_neg_e1, sup_neg_e2, rel_s, K) | ||||
y = torch.Tensor([1]).to(self.device) | |||||
# y = torch.Tensor([1]).to(self.device) | |||||
self.zero_grad() | self.zero_grad() | ||||
# sorted,indecies = torch.sort(n_score, descending=True,dim=1) | # sorted,indecies = torch.sort(n_score, descending=True,dim=1) |
return candid_item | return candid_item | ||||
def sample_function_mixed(user_train, usernum, itemnum, batch_size, maxlen, result_queue, SEED): | |||||
def sample_function_mixed(user_train, usernum, itemnum, batch_size, maxlen, result_queue, SEED,number_of_neg): | |||||
def sample(): | def sample(): | ||||
if random.random()<=1: | if random.random()<=1: | ||||
seq = np.zeros([maxlen], dtype=np.int32) | seq = np.zeros([maxlen], dtype=np.int32) | ||||
pos = np.zeros([maxlen], dtype=np.int32) | pos = np.zeros([maxlen], dtype=np.int32) | ||||
neg = np.zeros([maxlen*5], dtype=np.int32) | |||||
neg = np.zeros([maxlen*number_of_neg], dtype=np.int32) | |||||
if len(user_train[user]) < maxlen: | if len(user_train[user]) < maxlen: | ||||
nxt_idx = len(user_train[user]) - 1 | nxt_idx = len(user_train[user]) - 1 | ||||
# for idx in range(maxlen*30 - 1): | # for idx in range(maxlen*30 - 1): | ||||
# support_negative_triples.append([seq[-1], curr_rel, neg[idx]]) | # support_negative_triples.append([seq[-1], curr_rel, neg[idx]]) | ||||
for j in range(5): | |||||
for j in range(number_of_neg): | |||||
for idx in range(maxlen-1): | for idx in range(maxlen-1): | ||||
support_negative_triples.append([seq[idx], curr_rel, neg[j*maxlen + idx]]) | support_negative_triples.append([seq[idx], curr_rel, neg[j*maxlen + idx]]) | ||||
result_queue.put(([support, support_negative, query, negative], curr_rel)) | result_queue.put(([support, support_negative, query, negative], curr_rel)) | ||||
class WarpSampler(object): | class WarpSampler(object): | ||||
def __init__(self, User, usernum, itemnum, batch_size=64, maxlen=10, n_workers=1): | |||||
def __init__(self, User, usernum, itemnum, batch_size=64, maxlen=10, n_workers=1,params = None): | |||||
self.result_queue = Queue(maxsize=n_workers * 10) | self.result_queue = Queue(maxsize=n_workers * 10) | ||||
self.processors = [] | self.processors = [] | ||||
for i in range(n_workers): | for i in range(n_workers): | ||||
batch_size, | batch_size, | ||||
maxlen, | maxlen, | ||||
self.result_queue, | self.result_queue, | ||||
np.random.randint(2e9) | |||||
np.random.randint(2e9), | |||||
params['number_of_neg'] | |||||
))) | ))) | ||||
self.processors[-1].daemon = True | self.processors[-1].daemon = True | ||||
self.processors[-1].start() | self.processors[-1].start() |
self.epoch = parameter['epoch'] | self.epoch = parameter['epoch'] | ||||
# self.print_epoch = parameter['print_epoch'] | # self.print_epoch = parameter['print_epoch'] | ||||
# self.eval_epoch = parameter['eval_epoch'] | # self.eval_epoch = parameter['eval_epoch'] | ||||
self.eval_epoch = 1000 | |||||
self.device = torch.device(parameter['device']) | |||||
# self.device = torch.device(parameter['device']) | |||||
self.device = parameter['device'] | |||||
self.MetaTL = MetaTL(itemnum, parameter) | self.MetaTL = MetaTL(itemnum, parameter) | ||||
self.MetaTL.to(self.device) | |||||
self.MetaTL.to(parameter['device']) | |||||
self.optimizer = torch.optim.Adam(self.MetaTL.parameters(), self.learning_rate) | self.optimizer = torch.optim.Adam(self.MetaTL.parameters(), self.learning_rate) | ||||
if parameter['eval_epoch']: | |||||
self.eval_epoch = parameter['eval_epoch'] | |||||
else: | |||||
self.eval_epoch = 1000 | |||||
def rank_predict(self, data, x, ranks): | def rank_predict(self, data, x, ranks): | ||||
# query_idx is the idx of positive score | # query_idx is the idx of positive score | ||||
# do evaluation on specific epoch | # do evaluation on specific epoch | ||||
if e % self.eval_epoch == 0 and e != 0: | if e % self.eval_epoch == 0 and e != 0: | ||||
loss_num = loss.item() | |||||
loss_num = loss.detach().item() | |||||
print("Epoch: {}\tLoss: {:.4f}".format(e, loss_num)) | print("Epoch: {}\tLoss: {:.4f}".format(e, loss_num)) | ||||
print('Epoch {} Validating...'.format(e)) | print('Epoch {} Validating...'.format(e)) | ||||
if istest: | if istest: | ||||
print("TEST: \t test_loss: ",total_loss.item()) | |||||
print("TEST: \t test_loss: ",total_loss.detach().item()) | |||||
print("TEST: \tMRR: {:.3f}\tNDCG@10: {:.3f}\tNDCG@5: {:.3f}\tNDCG@1: {:.3f}\tHits@10: {:.3f}\tHits@5: {:.3f}\tHits@1: {:.3f}\r".format( | print("TEST: \tMRR: {:.3f}\tNDCG@10: {:.3f}\tNDCG@5: {:.3f}\tNDCG@1: {:.3f}\tHits@10: {:.3f}\tHits@5: {:.3f}\tHits@1: {:.3f}\r".format( | ||||
temp['MRR'], temp['NDCG@10'], temp['NDCG@5'], temp['NDCG@1'], temp['Hits@10'], temp['Hits@5'], temp['Hits@1']),"\n") | temp['MRR'], temp['NDCG@10'], temp['NDCG@5'], temp['NDCG@1'], temp['Hits@10'], temp['Hits@5'], temp['Hits@1']),"\n") | ||||
with open('results.txt', 'a') as f: | with open('results.txt', 'a') as f: | ||||
f.writelines("TEST: \tMRR: {:.3f}\tNDCG@10: {:.3f}\tNDCG@5: {:.3f}\tNDCG@1: {:.3f}\tHits@10: {:.3f}\tHits@5: {:.3f}\tHits@1: {:.3f}\r\n\n".format( | f.writelines("TEST: \tMRR: {:.3f}\tNDCG@10: {:.3f}\tNDCG@5: {:.3f}\tNDCG@1: {:.3f}\tHits@10: {:.3f}\tHits@5: {:.3f}\tHits@1: {:.3f}\r\n\n".format( | ||||
temp['MRR'], temp['NDCG@10'], temp['NDCG@5'], temp['NDCG@1'], temp['Hits@10'], temp['Hits@5'], temp['Hits@1'])) | temp['MRR'], temp['NDCG@10'], temp['NDCG@5'], temp['NDCG@1'], temp['Hits@10'], temp['Hits@5'], temp['Hits@1'])) | ||||
else: | else: | ||||
print("VALID: \t validation_loss: ", total_loss.item()) | |||||
print("VALID: \t validation_loss: ", total_loss.detach().item() ) | |||||
print("VALID: \tMRR: {:.3f}\tNDCG@10: {:.3f}\tNDCG@5: {:.3f}\tNDCG@1: {:.3f}\tHits@10: {:.3f}\tHits@5: {:.3f}\tHits@1: {:.3f}\r".format( | print("VALID: \tMRR: {:.3f}\tNDCG@10: {:.3f}\tNDCG@5: {:.3f}\tNDCG@1: {:.3f}\tHits@10: {:.3f}\tHits@5: {:.3f}\tHits@1: {:.3f}\r".format( | ||||
temp['MRR'], temp['NDCG@10'], temp['NDCG@5'], temp['NDCG@1'], temp['Hits@10'], temp['Hits@5'], temp['Hits@1'])) | temp['MRR'], temp['NDCG@10'], temp['NDCG@5'], temp['NDCG@1'], temp['Hits@10'], temp['Hits@5'], temp['Hits@1'])) | ||||
with open("results.txt",'a') as f: | with open("results.txt",'a') as f: |
self.itemnum = itemnum | self.itemnum = itemnum | ||||
if parameter['number_of_neg']: | |||||
self.number_of_neg = parameter['number_of_neg'] | |||||
else: | |||||
self.number_of_neg = 5 | |||||
def next_one_on_eval(self): | def next_one_on_eval(self): | ||||
if self.curr_tri_idx == self.num_tris: | if self.curr_tri_idx == self.num_tris: | ||||
seq = np.zeros([self.maxlen], dtype=np.int32) | seq = np.zeros([self.maxlen], dtype=np.int32) | ||||
pos = np.zeros([self.maxlen - 1], dtype=np.int32) | pos = np.zeros([self.maxlen - 1], dtype=np.int32) | ||||
# neg = np.zeros([self.maxlen*30 - 1], dtype=np.int32) | |||||
neg = np.zeros([self.maxlen * 5], dtype=np.int32) | |||||
neg = np.zeros([self.maxlen * self.number_of_neg], dtype=np.int32) | |||||
idx = self.maxlen - 1 | idx = self.maxlen - 1 | ||||
# for idx in range(len(neg)): | # for idx in range(len(neg)): | ||||
# support_negative_triples.append([seq[-1],curr_rel,neg[idx]]) | # support_negative_triples.append([seq[-1],curr_rel,neg[idx]]) | ||||
for j in range(5): | |||||
for j in range(self.number_of_neg): | |||||
for idx in range(self.maxlen-1): | for idx in range(self.maxlen-1): | ||||
support_negative_triples.append([seq[idx], curr_rel, neg[j * self.maxlen + idx]]) | support_negative_triples.append([seq[idx], curr_rel, neg[j * self.maxlen + idx]]) | ||||