@@ -1,3 +1,6 @@ | |||
import os | |||
os.environ['CUDA_VISIBLE_DEVICES'] = "1" | |||
from ray.tune.schedulers import ASHAScheduler | |||
from ray.tune import CLIReporter | |||
from ray import tune | |||
@@ -19,7 +22,7 @@ def get_params(): | |||
args.add_argument("-K", "--K", default=3, type=int) #NUMBER OF SHOT | |||
# args.add_argument("-dim", "--embed_dim", default=100, type=int) | |||
args.add_argument("-bs", "--batch_size", default=1024, type=int) | |||
# args.add_argument("-bs", "--batch_size", default=1024, type=int) | |||
# args.add_argument("-lr", "--learning_rate", default=0.001, type=float) | |||
args.add_argument("-epo", "--epoch", default=100000, type=int) | |||
@@ -37,13 +40,14 @@ def get_params(): | |||
for k, v in vars(args).items(): | |||
params[k] = v | |||
params['device'] = torch.device('cuda:0') | |||
# params['device'] = torch.device('cuda:1') | |||
params['device'] = 0 | |||
return params, args | |||
def main(num_samples, gpus_per_trial=2): | |||
print("===============",torch.cuda.device_count(),"=======") | |||
params, args = get_params() | |||
if params['seed'] is not None: | |||
@@ -56,7 +60,7 @@ def main(num_samples, gpus_per_trial=2): | |||
user_train, usernum_train, itemnum, user_input_test, user_test, user_input_valid, user_valid = data_load(args.dataset, args.K) | |||
batch_size = params['batch_size'] | |||
# batch_size = params['batch_size'] | |||
# sampler = WarpSampler(user_train, usernum_train, itemnum, batch_size=batch_size, maxlen=args.K, n_workers=1) | |||
# sampler_test = DataLoader(user_input_test, user_test, itemnum, params) | |||
# sampler_valid = DataLoader(user_input_valid, user_valid, itemnum, params) | |||
@@ -72,14 +76,18 @@ def main(num_samples, gpus_per_trial=2): | |||
"beta" : tune.choice([0.05,0.1,1,4,4.5,5,5.5,6,10]), | |||
"margin" : tune.choice([1,0.9,0.8,1.1,1.2]), | |||
# "sampler":sampler, | |||
# "sampler_test":sampler_test, | |||
# "sampler_valid":sampler_valid, | |||
"batch_size" : tune.choice([128,256,512,1024,2048]), | |||
"number_of_neg" : tune.choice([1,3,5,7,10,20,30,50,70]), | |||
"loss_function" : tune.choice(["bpr"]), | |||
"eval_epoch" : tune.choice([100,250,500,1000,1500]), | |||
'device' : params['device'], | |||
"itemnum":itemnum, | |||
"params":params, | |||
} | |||
print("===============", torch.cuda.device_count(), "=======") | |||
scheduler = ASHAScheduler( | |||
metric="MRR", | |||
mode="max", | |||
@@ -99,7 +107,7 @@ def main(num_samples, gpus_per_trial=2): | |||
log_to_file=True, | |||
# resume=True, | |||
local_dir="/media/external_10TB/10TB/maheri/metaTL_ray/ray_local_dir", | |||
name="metatl_rnn1", | |||
name="bpr_rnn", | |||
) | |||
best_trial = result.get_best_trial("MRR", "max", "last") | |||
@@ -116,23 +124,8 @@ def main(num_samples, gpus_per_trial=2): | |||
print(result.results_df) | |||
print("=======================================================\n") | |||
# best_trained_model = Net(best_trial.config["l1"], best_trial.config["l2"]) | |||
# device = "cpu" | |||
# if torch.cuda.is_available(): | |||
# device = "cuda:0" | |||
# if gpus_per_trial > 1: | |||
# best_trained_model = nn.DataParallel(best_trained_model) | |||
# best_trained_model.to(device) | |||
# | |||
# best_checkpoint_dir = best_trial.checkpoint.value | |||
# model_state, optimizer_state = torch.load(os.path.join( | |||
# best_checkpoint_dir, "checkpoint")) | |||
# best_trained_model.load_state_dict(model_state) | |||
# | |||
# test_acc = test_accuracy(best_trained_model, device) | |||
# print("Best trial test set accuracy: {}".format(test_acc)) | |||
if __name__ == "__main__": | |||
# You can change the number of GPUs per trial here: | |||
main(num_samples=150, gpus_per_trial=1) | |||
main(num_samples=150, gpus_per_trial=0.5) |
@@ -22,13 +22,16 @@ def train_metatl(conf,checkpoint_dir=None): | |||
random.seed(SEED) | |||
params = conf['params'] | |||
params['batch_size'] = conf['batch_size'] | |||
params['number_of_neg'] = conf['number_of_neg'] | |||
user_train, usernum_train, itemnum, user_input_test, user_test, user_input_valid, user_valid = data_load(params['dataset'], params['K']) | |||
sampler = WarpSampler(user_train, usernum_train, itemnum, batch_size=params['batch_size'], maxlen=params['K'], n_workers=1) | |||
sampler = WarpSampler(user_train, usernum_train, itemnum, batch_size=params['batch_size'], maxlen=params['K'], n_workers=1,params=params) | |||
sampler_test = DataLoader(user_input_test, user_test, itemnum, params) | |||
sampler_valid = DataLoader(user_input_valid, user_valid, itemnum, params) | |||
ps = { | |||
"batch_size" : conf["params"]['batch_size'], | |||
"batch_size" : conf['batch_size'], | |||
"learning_rate" : conf['learning_rate'], | |||
"epoch" : conf["params"]['epoch'], | |||
"beta" : conf['beta'], | |||
@@ -36,11 +39,14 @@ def train_metatl(conf,checkpoint_dir=None): | |||
"margin" : conf['margin'], | |||
"K" : conf["params"]['K'], | |||
"number_of_neg" : conf["number_of_neg"], | |||
"loss_function" : conf["loss_function"], | |||
"eval_epoch" : conf["eval_epoch"], | |||
"device" : params['device'] | |||
} | |||
trainer = Trainer([sampler, sampler_valid, sampler_test], conf["itemnum"], ps) | |||
# trainer.train() | |||
if checkpoint_dir: | |||
print("===================== using checkpoint =====================") | |||
model_state, optimizer_state = torch.load( | |||
@@ -48,9 +54,8 @@ def train_metatl(conf,checkpoint_dir=None): | |||
trainer.MetaTL.load_state_dict(model_state) | |||
trainer.optimizer.load_state_dict(optimizer_state) | |||
for epoch in range(int(ps['epoch']/1000)): | |||
for e in range(1000): | |||
for epoch in range(int(ps['epoch']/ps['eval_epoch'])): | |||
for e in range(ps['eval_epoch']): | |||
# sample one batch from data_loader | |||
train_task, curr_rel = trainer.train_data_loader.next_batch() | |||
loss, _, _ = trainer.do_one_step(train_task, iseval=False, curr_rel=curr_rel) | |||
@@ -58,10 +63,6 @@ def train_metatl(conf,checkpoint_dir=None): | |||
# do evaluation on specific epoch | |||
valid_data = trainer.eval(istest=False, epoch=(-1)) | |||
# print('Epoch {} Testing...'.format(e)) | |||
# test_data = self.eval(istest=True, epoch=e) | |||
with tune.checkpoint_dir(epoch) as checkpoint_dir: | |||
path = os.path.join(checkpoint_dir, "checkpoint") | |||
torch.save((trainer.MetaTL.state_dict(), trainer.optimizer.state_dict()), path) | |||
@@ -69,6 +70,6 @@ def train_metatl(conf,checkpoint_dir=None): | |||
tune.report( | |||
MRR=valid_data["MRR"], NDCG10=valid_data['NDCG@10'], NDCG5=valid_data["NDCG@5"], NDCG1=valid_data["NDCG@1"], | |||
Hits10=valid_data["Hits@10"], Hits5=valid_data["Hits@5"], Hits1=valid_data["Hits@1"], | |||
training_iteration=epoch*1000 | |||
training_iteration=epoch*ps['eval_epoch'] | |||
) | |||
@@ -34,7 +34,6 @@ def get_params(): | |||
params['device'] = torch.device('cuda:'+str(args.device)) | |||
# params['device'] = torch.device('cpu') | |||
print("gpu:",params['device']) | |||
return params, args | |||
@@ -53,7 +53,7 @@ class EmbeddingLearner(nn.Module): | |||
def bpr_loss(p_scores, n_values,device): | |||
ratio = int(n_values.shape[1] / p_scores.shape[1]) | |||
temp_pvalues = torch.tensor([]).cuda(device=device) | |||
temp_pvalues = torch.tensor([],device=device) | |||
for i in range(p_scores.shape[1]): | |||
temp_pvalues = torch.cat((temp_pvalues, p_scores[:, i, None].expand(-1, ratio)), dim=1) | |||
@@ -66,7 +66,7 @@ def bpr_loss(p_scores, n_values,device): | |||
def bpr_max_loss(p_scores, n_values,device): | |||
s = F.softmax(n_values,dim=1) | |||
ratio = int(n_values.shape[1] / p_scores.shape[1]) | |||
temp_pvalues = torch.tensor([]).cuda(device=device) | |||
temp_pvalues = torch.tensor([],device=device) | |||
for i in range(p_scores.shape[1]): | |||
temp_pvalues = torch.cat((temp_pvalues,p_scores[:,i,None].expand(-1,ratio)),dim=1) | |||
@@ -78,7 +78,7 @@ def bpr_max_loss(p_scores, n_values,device): | |||
def top_loss(p_scores, n_values,device): | |||
ratio = int(n_values.shape[1] / p_scores.shape[1]) | |||
temp_pvalues = torch.tensor([]).cuda(device=device) | |||
temp_pvalues = torch.tensor([],device=device) | |||
for i in range(p_scores.shape[1]): | |||
temp_pvalues = torch.cat((temp_pvalues, p_scores[:, i, None].expand(-1, ratio)), dim=1) | |||
@@ -94,7 +94,8 @@ def top_loss(p_scores, n_values,device): | |||
class MetaTL(nn.Module): | |||
def __init__(self, itemnum, parameter): | |||
super(MetaTL, self).__init__() | |||
self.device = torch.device(parameter['device']) | |||
# self.device = torch.device(parameter['device']) | |||
self.device = parameter['device'] | |||
self.beta = parameter['beta'] | |||
# self.dropout_p = parameter['dropout_p'] | |||
self.embed_dim = parameter['embed_dim'] | |||
@@ -106,7 +107,7 @@ class MetaTL(nn.Module): | |||
self.embedding_learner = EmbeddingLearner() | |||
# self.loss_func = nn.MarginRankingLoss(self.margin) | |||
self.loss_func = top_loss | |||
self.loss_func = bpr_loss | |||
self.rel_q_sharing = dict() | |||
@@ -138,7 +139,7 @@ class MetaTL(nn.Module): | |||
p_score, n_score = self.embedding_learner(sup_neg_e1, sup_neg_e2, rel_s, K) | |||
y = torch.Tensor([1]).to(self.device) | |||
# y = torch.Tensor([1]).to(self.device) | |||
self.zero_grad() | |||
# sorted,indecies = torch.sort(n_score, descending=True,dim=1) |
@@ -21,7 +21,7 @@ def random_neq(l, r, s, user_train,usernum): | |||
return candid_item | |||
def sample_function_mixed(user_train, usernum, itemnum, batch_size, maxlen, result_queue, SEED): | |||
def sample_function_mixed(user_train, usernum, itemnum, batch_size, maxlen, result_queue, SEED,number_of_neg): | |||
def sample(): | |||
if random.random()<=1: | |||
@@ -31,7 +31,7 @@ def sample_function_mixed(user_train, usernum, itemnum, batch_size, maxlen, resu | |||
seq = np.zeros([maxlen], dtype=np.int32) | |||
pos = np.zeros([maxlen], dtype=np.int32) | |||
neg = np.zeros([maxlen*5], dtype=np.int32) | |||
neg = np.zeros([maxlen*number_of_neg], dtype=np.int32) | |||
if len(user_train[user]) < maxlen: | |||
nxt_idx = len(user_train[user]) - 1 | |||
@@ -62,7 +62,7 @@ def sample_function_mixed(user_train, usernum, itemnum, batch_size, maxlen, resu | |||
# for idx in range(maxlen*30 - 1): | |||
# support_negative_triples.append([seq[-1], curr_rel, neg[idx]]) | |||
for j in range(5): | |||
for j in range(number_of_neg): | |||
for idx in range(maxlen-1): | |||
support_negative_triples.append([seq[idx], curr_rel, neg[j*maxlen + idx]]) | |||
@@ -116,7 +116,7 @@ def sample_function_mixed(user_train, usernum, itemnum, batch_size, maxlen, resu | |||
result_queue.put(([support, support_negative, query, negative], curr_rel)) | |||
class WarpSampler(object): | |||
def __init__(self, User, usernum, itemnum, batch_size=64, maxlen=10, n_workers=1): | |||
def __init__(self, User, usernum, itemnum, batch_size=64, maxlen=10, n_workers=1,params = None): | |||
self.result_queue = Queue(maxsize=n_workers * 10) | |||
self.processors = [] | |||
for i in range(n_workers): | |||
@@ -127,7 +127,8 @@ class WarpSampler(object): | |||
batch_size, | |||
maxlen, | |||
self.result_queue, | |||
np.random.randint(2e9) | |||
np.random.randint(2e9), | |||
params['number_of_neg'] | |||
))) | |||
self.processors[-1].daemon = True | |||
self.processors[-1].start() |
@@ -20,14 +20,19 @@ class Trainer: | |||
self.epoch = parameter['epoch'] | |||
# self.print_epoch = parameter['print_epoch'] | |||
# self.eval_epoch = parameter['eval_epoch'] | |||
self.eval_epoch = 1000 | |||
self.device = torch.device(parameter['device']) | |||
# self.device = torch.device(parameter['device']) | |||
self.device = parameter['device'] | |||
self.MetaTL = MetaTL(itemnum, parameter) | |||
self.MetaTL.to(self.device) | |||
self.MetaTL.to(parameter['device']) | |||
self.optimizer = torch.optim.Adam(self.MetaTL.parameters(), self.learning_rate) | |||
if parameter['eval_epoch']: | |||
self.eval_epoch = parameter['eval_epoch'] | |||
else: | |||
self.eval_epoch = 1000 | |||
def rank_predict(self, data, x, ranks): | |||
# query_idx is the idx of positive score | |||
@@ -77,7 +82,7 @@ class Trainer: | |||
# do evaluation on specific epoch | |||
if e % self.eval_epoch == 0 and e != 0: | |||
loss_num = loss.item() | |||
loss_num = loss.detach().item() | |||
print("Epoch: {}\tLoss: {:.4f}".format(e, loss_num)) | |||
print('Epoch {} Validating...'.format(e)) | |||
@@ -134,14 +139,14 @@ class Trainer: | |||
if istest: | |||
print("TEST: \t test_loss: ",total_loss.item()) | |||
print("TEST: \t test_loss: ",total_loss.detach().item()) | |||
print("TEST: \tMRR: {:.3f}\tNDCG@10: {:.3f}\tNDCG@5: {:.3f}\tNDCG@1: {:.3f}\tHits@10: {:.3f}\tHits@5: {:.3f}\tHits@1: {:.3f}\r".format( | |||
temp['MRR'], temp['NDCG@10'], temp['NDCG@5'], temp['NDCG@1'], temp['Hits@10'], temp['Hits@5'], temp['Hits@1']),"\n") | |||
with open('results.txt', 'a') as f: | |||
f.writelines("TEST: \tMRR: {:.3f}\tNDCG@10: {:.3f}\tNDCG@5: {:.3f}\tNDCG@1: {:.3f}\tHits@10: {:.3f}\tHits@5: {:.3f}\tHits@1: {:.3f}\r\n\n".format( | |||
temp['MRR'], temp['NDCG@10'], temp['NDCG@5'], temp['NDCG@1'], temp['Hits@10'], temp['Hits@5'], temp['Hits@1'])) | |||
else: | |||
print("VALID: \t validation_loss: ", total_loss.item()) | |||
print("VALID: \t validation_loss: ", total_loss.detach().item() ) | |||
print("VALID: \tMRR: {:.3f}\tNDCG@10: {:.3f}\tNDCG@5: {:.3f}\tNDCG@1: {:.3f}\tHits@10: {:.3f}\tHits@5: {:.3f}\tHits@1: {:.3f}\r".format( | |||
temp['MRR'], temp['NDCG@10'], temp['NDCG@5'], temp['NDCG@1'], temp['Hits@10'], temp['Hits@5'], temp['Hits@1'])) | |||
with open("results.txt",'a') as f: |
@@ -107,6 +107,11 @@ class DataLoader(object): | |||
self.itemnum = itemnum | |||
if parameter['number_of_neg']: | |||
self.number_of_neg = parameter['number_of_neg'] | |||
else: | |||
self.number_of_neg = 5 | |||
def next_one_on_eval(self): | |||
if self.curr_tri_idx == self.num_tris: | |||
@@ -118,8 +123,7 @@ class DataLoader(object): | |||
seq = np.zeros([self.maxlen], dtype=np.int32) | |||
pos = np.zeros([self.maxlen - 1], dtype=np.int32) | |||
# neg = np.zeros([self.maxlen*30 - 1], dtype=np.int32) | |||
neg = np.zeros([self.maxlen * 5], dtype=np.int32) | |||
neg = np.zeros([self.maxlen * self.number_of_neg], dtype=np.int32) | |||
idx = self.maxlen - 1 | |||
@@ -144,7 +148,7 @@ class DataLoader(object): | |||
# for idx in range(len(neg)): | |||
# support_negative_triples.append([seq[-1],curr_rel,neg[idx]]) | |||
for j in range(5): | |||
for j in range(self.number_of_neg): | |||
for idx in range(self.maxlen-1): | |||
support_negative_triples.append([seq[idx], curr_rel, neg[j * self.maxlen + idx]]) | |||