Browse Source

hyper parameter tuning including neg_size

RNN
mohamad maheri 2 years ago
parent
commit
c89c660a45
7 changed files with 60 additions and 56 deletions
  1. 17
    24
      hyper_main.py
  2. 12
    11
      hyper_tunning.py
  3. 0
    1
      main.py
  4. 7
    6
      models.py
  5. 6
    5
      sampler.py
  6. 11
    6
      trainer.py
  7. 7
    3
      utils.py

+ 17
- 24
hyper_main.py View File

import os
os.environ['CUDA_VISIBLE_DEVICES'] = "1"

from ray.tune.schedulers import ASHAScheduler from ray.tune.schedulers import ASHAScheduler
from ray.tune import CLIReporter from ray.tune import CLIReporter
from ray import tune from ray import tune
args.add_argument("-K", "--K", default=3, type=int) #NUMBER OF SHOT args.add_argument("-K", "--K", default=3, type=int) #NUMBER OF SHOT


# args.add_argument("-dim", "--embed_dim", default=100, type=int) # args.add_argument("-dim", "--embed_dim", default=100, type=int)
args.add_argument("-bs", "--batch_size", default=1024, type=int)
# args.add_argument("-bs", "--batch_size", default=1024, type=int)
# args.add_argument("-lr", "--learning_rate", default=0.001, type=float) # args.add_argument("-lr", "--learning_rate", default=0.001, type=float)


args.add_argument("-epo", "--epoch", default=100000, type=int) args.add_argument("-epo", "--epoch", default=100000, type=int)
for k, v in vars(args).items(): for k, v in vars(args).items():
params[k] = v params[k] = v


params['device'] = torch.device('cuda:0')
# params['device'] = torch.device('cuda:1')
params['device'] = 0
return params, args return params, args






def main(num_samples, gpus_per_trial=2): def main(num_samples, gpus_per_trial=2):
print("===============",torch.cuda.device_count(),"=======")
params, args = get_params() params, args = get_params()


if params['seed'] is not None: if params['seed'] is not None:


user_train, usernum_train, itemnum, user_input_test, user_test, user_input_valid, user_valid = data_load(args.dataset, args.K) user_train, usernum_train, itemnum, user_input_test, user_test, user_input_valid, user_valid = data_load(args.dataset, args.K)


batch_size = params['batch_size']
# batch_size = params['batch_size']
# sampler = WarpSampler(user_train, usernum_train, itemnum, batch_size=batch_size, maxlen=args.K, n_workers=1) # sampler = WarpSampler(user_train, usernum_train, itemnum, batch_size=batch_size, maxlen=args.K, n_workers=1)
# sampler_test = DataLoader(user_input_test, user_test, itemnum, params) # sampler_test = DataLoader(user_input_test, user_test, itemnum, params)
# sampler_valid = DataLoader(user_input_valid, user_valid, itemnum, params) # sampler_valid = DataLoader(user_input_valid, user_valid, itemnum, params)
"beta" : tune.choice([0.05,0.1,1,4,4.5,5,5.5,6,10]), "beta" : tune.choice([0.05,0.1,1,4,4.5,5,5.5,6,10]),
"margin" : tune.choice([1,0.9,0.8,1.1,1.2]), "margin" : tune.choice([1,0.9,0.8,1.1,1.2]),


# "sampler":sampler,
# "sampler_test":sampler_test,
# "sampler_valid":sampler_valid,
"batch_size" : tune.choice([128,256,512,1024,2048]),
"number_of_neg" : tune.choice([1,3,5,7,10,20,30,50,70]),
"loss_function" : tune.choice(["bpr"]),
"eval_epoch" : tune.choice([100,250,500,1000,1500]),
'device' : params['device'],



"itemnum":itemnum, "itemnum":itemnum,
"params":params, "params":params,
} }


print("===============", torch.cuda.device_count(), "=======")
scheduler = ASHAScheduler( scheduler = ASHAScheduler(
metric="MRR", metric="MRR",
mode="max", mode="max",
log_to_file=True, log_to_file=True,
# resume=True, # resume=True,
local_dir="/media/external_10TB/10TB/maheri/metaTL_ray/ray_local_dir", local_dir="/media/external_10TB/10TB/maheri/metaTL_ray/ray_local_dir",
name="metatl_rnn1",
name="bpr_rnn",
) )


best_trial = result.get_best_trial("MRR", "max", "last") best_trial = result.get_best_trial("MRR", "max", "last")
print(result.results_df) print(result.results_df)
print("=======================================================\n") print("=======================================================\n")


# best_trained_model = Net(best_trial.config["l1"], best_trial.config["l2"])
# device = "cpu"
# if torch.cuda.is_available():
# device = "cuda:0"
# if gpus_per_trial > 1:
# best_trained_model = nn.DataParallel(best_trained_model)
# best_trained_model.to(device)
#
# best_checkpoint_dir = best_trial.checkpoint.value
# model_state, optimizer_state = torch.load(os.path.join(
# best_checkpoint_dir, "checkpoint"))
# best_trained_model.load_state_dict(model_state)
#
# test_acc = test_accuracy(best_trained_model, device)
# print("Best trial test set accuracy: {}".format(test_acc))




if __name__ == "__main__": if __name__ == "__main__":
# You can change the number of GPUs per trial here: # You can change the number of GPUs per trial here:
main(num_samples=150, gpus_per_trial=1)
main(num_samples=150, gpus_per_trial=0.5)

+ 12
- 11
hyper_tunning.py View File

random.seed(SEED) random.seed(SEED)


params = conf['params'] params = conf['params']
params['batch_size'] = conf['batch_size']
params['number_of_neg'] = conf['number_of_neg']

user_train, usernum_train, itemnum, user_input_test, user_test, user_input_valid, user_valid = data_load(params['dataset'], params['K']) user_train, usernum_train, itemnum, user_input_test, user_test, user_input_valid, user_valid = data_load(params['dataset'], params['K'])
sampler = WarpSampler(user_train, usernum_train, itemnum, batch_size=params['batch_size'], maxlen=params['K'], n_workers=1)
sampler = WarpSampler(user_train, usernum_train, itemnum, batch_size=params['batch_size'], maxlen=params['K'], n_workers=1,params=params)
sampler_test = DataLoader(user_input_test, user_test, itemnum, params) sampler_test = DataLoader(user_input_test, user_test, itemnum, params)
sampler_valid = DataLoader(user_input_valid, user_valid, itemnum, params) sampler_valid = DataLoader(user_input_valid, user_valid, itemnum, params)


ps = { ps = {
"batch_size" : conf["params"]['batch_size'],
"batch_size" : conf['batch_size'],
"learning_rate" : conf['learning_rate'], "learning_rate" : conf['learning_rate'],
"epoch" : conf["params"]['epoch'], "epoch" : conf["params"]['epoch'],
"beta" : conf['beta'], "beta" : conf['beta'],
"margin" : conf['margin'], "margin" : conf['margin'],
"K" : conf["params"]['K'], "K" : conf["params"]['K'],


"number_of_neg" : conf["number_of_neg"],
"loss_function" : conf["loss_function"],
"eval_epoch" : conf["eval_epoch"],
"device" : params['device']
} }


trainer = Trainer([sampler, sampler_valid, sampler_test], conf["itemnum"], ps) trainer = Trainer([sampler, sampler_valid, sampler_test], conf["itemnum"], ps)


# trainer.train()
if checkpoint_dir: if checkpoint_dir:
print("===================== using checkpoint =====================") print("===================== using checkpoint =====================")
model_state, optimizer_state = torch.load( model_state, optimizer_state = torch.load(
trainer.MetaTL.load_state_dict(model_state) trainer.MetaTL.load_state_dict(model_state)
trainer.optimizer.load_state_dict(optimizer_state) trainer.optimizer.load_state_dict(optimizer_state)


for epoch in range(int(ps['epoch']/1000)):

for e in range(1000):
for epoch in range(int(ps['epoch']/ps['eval_epoch'])):
for e in range(ps['eval_epoch']):
# sample one batch from data_loader # sample one batch from data_loader
train_task, curr_rel = trainer.train_data_loader.next_batch() train_task, curr_rel = trainer.train_data_loader.next_batch()
loss, _, _ = trainer.do_one_step(train_task, iseval=False, curr_rel=curr_rel) loss, _, _ = trainer.do_one_step(train_task, iseval=False, curr_rel=curr_rel)
# do evaluation on specific epoch # do evaluation on specific epoch
valid_data = trainer.eval(istest=False, epoch=(-1)) valid_data = trainer.eval(istest=False, epoch=(-1))


# print('Epoch {} Testing...'.format(e))
# test_data = self.eval(istest=True, epoch=e)


with tune.checkpoint_dir(epoch) as checkpoint_dir: with tune.checkpoint_dir(epoch) as checkpoint_dir:
path = os.path.join(checkpoint_dir, "checkpoint") path = os.path.join(checkpoint_dir, "checkpoint")
torch.save((trainer.MetaTL.state_dict(), trainer.optimizer.state_dict()), path) torch.save((trainer.MetaTL.state_dict(), trainer.optimizer.state_dict()), path)
tune.report( tune.report(
MRR=valid_data["MRR"], NDCG10=valid_data['NDCG@10'], NDCG5=valid_data["NDCG@5"], NDCG1=valid_data["NDCG@1"], MRR=valid_data["MRR"], NDCG10=valid_data['NDCG@10'], NDCG5=valid_data["NDCG@5"], NDCG1=valid_data["NDCG@1"],
Hits10=valid_data["Hits@10"], Hits5=valid_data["Hits@5"], Hits1=valid_data["Hits@1"], Hits10=valid_data["Hits@10"], Hits5=valid_data["Hits@5"], Hits1=valid_data["Hits@1"],
training_iteration=epoch*1000
training_iteration=epoch*ps['eval_epoch']
) )



+ 0
- 1
main.py View File



params['device'] = torch.device('cuda:'+str(args.device)) params['device'] = torch.device('cuda:'+str(args.device))
# params['device'] = torch.device('cpu') # params['device'] = torch.device('cpu')
print("gpu:",params['device'])


return params, args return params, args



+ 7
- 6
models.py View File



def bpr_loss(p_scores, n_values,device): def bpr_loss(p_scores, n_values,device):
ratio = int(n_values.shape[1] / p_scores.shape[1]) ratio = int(n_values.shape[1] / p_scores.shape[1])
temp_pvalues = torch.tensor([]).cuda(device=device)
temp_pvalues = torch.tensor([],device=device)
for i in range(p_scores.shape[1]): for i in range(p_scores.shape[1]):
temp_pvalues = torch.cat((temp_pvalues, p_scores[:, i, None].expand(-1, ratio)), dim=1) temp_pvalues = torch.cat((temp_pvalues, p_scores[:, i, None].expand(-1, ratio)), dim=1)


def bpr_max_loss(p_scores, n_values,device): def bpr_max_loss(p_scores, n_values,device):
s = F.softmax(n_values,dim=1) s = F.softmax(n_values,dim=1)
ratio = int(n_values.shape[1] / p_scores.shape[1]) ratio = int(n_values.shape[1] / p_scores.shape[1])
temp_pvalues = torch.tensor([]).cuda(device=device)
temp_pvalues = torch.tensor([],device=device)
for i in range(p_scores.shape[1]): for i in range(p_scores.shape[1]):
temp_pvalues = torch.cat((temp_pvalues,p_scores[:,i,None].expand(-1,ratio)),dim=1) temp_pvalues = torch.cat((temp_pvalues,p_scores[:,i,None].expand(-1,ratio)),dim=1)




def top_loss(p_scores, n_values,device): def top_loss(p_scores, n_values,device):
ratio = int(n_values.shape[1] / p_scores.shape[1]) ratio = int(n_values.shape[1] / p_scores.shape[1])
temp_pvalues = torch.tensor([]).cuda(device=device)
temp_pvalues = torch.tensor([],device=device)
for i in range(p_scores.shape[1]): for i in range(p_scores.shape[1]):
temp_pvalues = torch.cat((temp_pvalues, p_scores[:, i, None].expand(-1, ratio)), dim=1) temp_pvalues = torch.cat((temp_pvalues, p_scores[:, i, None].expand(-1, ratio)), dim=1)


class MetaTL(nn.Module): class MetaTL(nn.Module):
def __init__(self, itemnum, parameter): def __init__(self, itemnum, parameter):
super(MetaTL, self).__init__() super(MetaTL, self).__init__()
self.device = torch.device(parameter['device'])
# self.device = torch.device(parameter['device'])
self.device = parameter['device']
self.beta = parameter['beta'] self.beta = parameter['beta']
# self.dropout_p = parameter['dropout_p'] # self.dropout_p = parameter['dropout_p']
self.embed_dim = parameter['embed_dim'] self.embed_dim = parameter['embed_dim']


self.embedding_learner = EmbeddingLearner() self.embedding_learner = EmbeddingLearner()
# self.loss_func = nn.MarginRankingLoss(self.margin) # self.loss_func = nn.MarginRankingLoss(self.margin)
self.loss_func = top_loss
self.loss_func = bpr_loss


self.rel_q_sharing = dict() self.rel_q_sharing = dict()




p_score, n_score = self.embedding_learner(sup_neg_e1, sup_neg_e2, rel_s, K) p_score, n_score = self.embedding_learner(sup_neg_e1, sup_neg_e2, rel_s, K)


y = torch.Tensor([1]).to(self.device)
# y = torch.Tensor([1]).to(self.device)
self.zero_grad() self.zero_grad()


# sorted,indecies = torch.sort(n_score, descending=True,dim=1) # sorted,indecies = torch.sort(n_score, descending=True,dim=1)

+ 6
- 5
sampler.py View File

return candid_item return candid_item




def sample_function_mixed(user_train, usernum, itemnum, batch_size, maxlen, result_queue, SEED):
def sample_function_mixed(user_train, usernum, itemnum, batch_size, maxlen, result_queue, SEED,number_of_neg):
def sample(): def sample():


if random.random()<=1: if random.random()<=1:
seq = np.zeros([maxlen], dtype=np.int32) seq = np.zeros([maxlen], dtype=np.int32)
pos = np.zeros([maxlen], dtype=np.int32) pos = np.zeros([maxlen], dtype=np.int32)
neg = np.zeros([maxlen*5], dtype=np.int32)
neg = np.zeros([maxlen*number_of_neg], dtype=np.int32)


if len(user_train[user]) < maxlen: if len(user_train[user]) < maxlen:
nxt_idx = len(user_train[user]) - 1 nxt_idx = len(user_train[user]) - 1


# for idx in range(maxlen*30 - 1): # for idx in range(maxlen*30 - 1):
# support_negative_triples.append([seq[-1], curr_rel, neg[idx]]) # support_negative_triples.append([seq[-1], curr_rel, neg[idx]])
for j in range(5):
for j in range(number_of_neg):
for idx in range(maxlen-1): for idx in range(maxlen-1):
support_negative_triples.append([seq[idx], curr_rel, neg[j*maxlen + idx]]) support_negative_triples.append([seq[idx], curr_rel, neg[j*maxlen + idx]])


result_queue.put(([support, support_negative, query, negative], curr_rel)) result_queue.put(([support, support_negative, query, negative], curr_rel))


class WarpSampler(object): class WarpSampler(object):
def __init__(self, User, usernum, itemnum, batch_size=64, maxlen=10, n_workers=1):
def __init__(self, User, usernum, itemnum, batch_size=64, maxlen=10, n_workers=1,params = None):
self.result_queue = Queue(maxsize=n_workers * 10) self.result_queue = Queue(maxsize=n_workers * 10)
self.processors = [] self.processors = []
for i in range(n_workers): for i in range(n_workers):
batch_size, batch_size,
maxlen, maxlen,
self.result_queue, self.result_queue,
np.random.randint(2e9)
np.random.randint(2e9),
params['number_of_neg']
))) )))
self.processors[-1].daemon = True self.processors[-1].daemon = True
self.processors[-1].start() self.processors[-1].start()

+ 11
- 6
trainer.py View File

self.epoch = parameter['epoch'] self.epoch = parameter['epoch']
# self.print_epoch = parameter['print_epoch'] # self.print_epoch = parameter['print_epoch']
# self.eval_epoch = parameter['eval_epoch'] # self.eval_epoch = parameter['eval_epoch']
self.eval_epoch = 1000
self.device = torch.device(parameter['device'])
# self.device = torch.device(parameter['device'])
self.device = parameter['device']


self.MetaTL = MetaTL(itemnum, parameter) self.MetaTL = MetaTL(itemnum, parameter)
self.MetaTL.to(self.device)
self.MetaTL.to(parameter['device'])


self.optimizer = torch.optim.Adam(self.MetaTL.parameters(), self.learning_rate) self.optimizer = torch.optim.Adam(self.MetaTL.parameters(), self.learning_rate)


if parameter['eval_epoch']:
self.eval_epoch = parameter['eval_epoch']
else:
self.eval_epoch = 1000



def rank_predict(self, data, x, ranks): def rank_predict(self, data, x, ranks):
# query_idx is the idx of positive score # query_idx is the idx of positive score


# do evaluation on specific epoch # do evaluation on specific epoch
if e % self.eval_epoch == 0 and e != 0: if e % self.eval_epoch == 0 and e != 0:
loss_num = loss.item()
loss_num = loss.detach().item()
print("Epoch: {}\tLoss: {:.4f}".format(e, loss_num)) print("Epoch: {}\tLoss: {:.4f}".format(e, loss_num))


print('Epoch {} Validating...'.format(e)) print('Epoch {} Validating...'.format(e))


if istest: if istest:
print("TEST: \t test_loss: ",total_loss.item())
print("TEST: \t test_loss: ",total_loss.detach().item())
print("TEST: \tMRR: {:.3f}\tNDCG@10: {:.3f}\tNDCG@5: {:.3f}\tNDCG@1: {:.3f}\tHits@10: {:.3f}\tHits@5: {:.3f}\tHits@1: {:.3f}\r".format( print("TEST: \tMRR: {:.3f}\tNDCG@10: {:.3f}\tNDCG@5: {:.3f}\tNDCG@1: {:.3f}\tHits@10: {:.3f}\tHits@5: {:.3f}\tHits@1: {:.3f}\r".format(
temp['MRR'], temp['NDCG@10'], temp['NDCG@5'], temp['NDCG@1'], temp['Hits@10'], temp['Hits@5'], temp['Hits@1']),"\n") temp['MRR'], temp['NDCG@10'], temp['NDCG@5'], temp['NDCG@1'], temp['Hits@10'], temp['Hits@5'], temp['Hits@1']),"\n")
with open('results.txt', 'a') as f: with open('results.txt', 'a') as f:
f.writelines("TEST: \tMRR: {:.3f}\tNDCG@10: {:.3f}\tNDCG@5: {:.3f}\tNDCG@1: {:.3f}\tHits@10: {:.3f}\tHits@5: {:.3f}\tHits@1: {:.3f}\r\n\n".format( f.writelines("TEST: \tMRR: {:.3f}\tNDCG@10: {:.3f}\tNDCG@5: {:.3f}\tNDCG@1: {:.3f}\tHits@10: {:.3f}\tHits@5: {:.3f}\tHits@1: {:.3f}\r\n\n".format(
temp['MRR'], temp['NDCG@10'], temp['NDCG@5'], temp['NDCG@1'], temp['Hits@10'], temp['Hits@5'], temp['Hits@1'])) temp['MRR'], temp['NDCG@10'], temp['NDCG@5'], temp['NDCG@1'], temp['Hits@10'], temp['Hits@5'], temp['Hits@1']))
else: else:
print("VALID: \t validation_loss: ", total_loss.item())
print("VALID: \t validation_loss: ", total_loss.detach().item() )
print("VALID: \tMRR: {:.3f}\tNDCG@10: {:.3f}\tNDCG@5: {:.3f}\tNDCG@1: {:.3f}\tHits@10: {:.3f}\tHits@5: {:.3f}\tHits@1: {:.3f}\r".format( print("VALID: \tMRR: {:.3f}\tNDCG@10: {:.3f}\tNDCG@5: {:.3f}\tNDCG@1: {:.3f}\tHits@10: {:.3f}\tHits@5: {:.3f}\tHits@1: {:.3f}\r".format(
temp['MRR'], temp['NDCG@10'], temp['NDCG@5'], temp['NDCG@1'], temp['Hits@10'], temp['Hits@5'], temp['Hits@1'])) temp['MRR'], temp['NDCG@10'], temp['NDCG@5'], temp['NDCG@1'], temp['Hits@10'], temp['Hits@5'], temp['Hits@1']))
with open("results.txt",'a') as f: with open("results.txt",'a') as f:

+ 7
- 3
utils.py View File

self.itemnum = itemnum self.itemnum = itemnum


if parameter['number_of_neg']:
self.number_of_neg = parameter['number_of_neg']
else:
self.number_of_neg = 5



def next_one_on_eval(self): def next_one_on_eval(self):
if self.curr_tri_idx == self.num_tris: if self.curr_tri_idx == self.num_tris:
seq = np.zeros([self.maxlen], dtype=np.int32) seq = np.zeros([self.maxlen], dtype=np.int32)
pos = np.zeros([self.maxlen - 1], dtype=np.int32) pos = np.zeros([self.maxlen - 1], dtype=np.int32)
# neg = np.zeros([self.maxlen*30 - 1], dtype=np.int32)
neg = np.zeros([self.maxlen * 5], dtype=np.int32)
neg = np.zeros([self.maxlen * self.number_of_neg], dtype=np.int32)
idx = self.maxlen - 1 idx = self.maxlen - 1




# for idx in range(len(neg)): # for idx in range(len(neg)):
# support_negative_triples.append([seq[-1],curr_rel,neg[idx]]) # support_negative_triples.append([seq[-1],curr_rel,neg[idx]])
for j in range(5):
for j in range(self.number_of_neg):
for idx in range(self.maxlen-1): for idx in range(self.maxlen-1):
support_negative_triples.append([seq[idx], curr_rel, neg[j * self.maxlen + idx]]) support_negative_triples.append([seq[idx], curr_rel, neg[j * self.maxlen + idx]])



Loading…
Cancel
Save