Browse Source

hyper parameter tuning including neg_size

RNN
mohamad maheri 2 years ago
parent
commit
c89c660a45
7 changed files with 60 additions and 56 deletions
  1. 17
    24
      hyper_main.py
  2. 12
    11
      hyper_tunning.py
  3. 0
    1
      main.py
  4. 7
    6
      models.py
  5. 6
    5
      sampler.py
  6. 11
    6
      trainer.py
  7. 7
    3
      utils.py

+ 17
- 24
hyper_main.py View File

@@ -1,3 +1,6 @@
import os
os.environ['CUDA_VISIBLE_DEVICES'] = "1"

from ray.tune.schedulers import ASHAScheduler
from ray.tune import CLIReporter
from ray import tune
@@ -19,7 +22,7 @@ def get_params():
args.add_argument("-K", "--K", default=3, type=int) #NUMBER OF SHOT

# args.add_argument("-dim", "--embed_dim", default=100, type=int)
args.add_argument("-bs", "--batch_size", default=1024, type=int)
# args.add_argument("-bs", "--batch_size", default=1024, type=int)
# args.add_argument("-lr", "--learning_rate", default=0.001, type=float)

args.add_argument("-epo", "--epoch", default=100000, type=int)
@@ -37,13 +40,14 @@ def get_params():
for k, v in vars(args).items():
params[k] = v

params['device'] = torch.device('cuda:0')
# params['device'] = torch.device('cuda:1')
params['device'] = 0
return params, args



def main(num_samples, gpus_per_trial=2):
print("===============",torch.cuda.device_count(),"=======")
params, args = get_params()

if params['seed'] is not None:
@@ -56,7 +60,7 @@ def main(num_samples, gpus_per_trial=2):

user_train, usernum_train, itemnum, user_input_test, user_test, user_input_valid, user_valid = data_load(args.dataset, args.K)

batch_size = params['batch_size']
# batch_size = params['batch_size']
# sampler = WarpSampler(user_train, usernum_train, itemnum, batch_size=batch_size, maxlen=args.K, n_workers=1)
# sampler_test = DataLoader(user_input_test, user_test, itemnum, params)
# sampler_valid = DataLoader(user_input_valid, user_valid, itemnum, params)
@@ -72,14 +76,18 @@ def main(num_samples, gpus_per_trial=2):
"beta" : tune.choice([0.05,0.1,1,4,4.5,5,5.5,6,10]),
"margin" : tune.choice([1,0.9,0.8,1.1,1.2]),

# "sampler":sampler,
# "sampler_test":sampler_test,
# "sampler_valid":sampler_valid,
"batch_size" : tune.choice([128,256,512,1024,2048]),
"number_of_neg" : tune.choice([1,3,5,7,10,20,30,50,70]),
"loss_function" : tune.choice(["bpr"]),
"eval_epoch" : tune.choice([100,250,500,1000,1500]),
'device' : params['device'],


"itemnum":itemnum,
"params":params,
}

print("===============", torch.cuda.device_count(), "=======")
scheduler = ASHAScheduler(
metric="MRR",
mode="max",
@@ -99,7 +107,7 @@ def main(num_samples, gpus_per_trial=2):
log_to_file=True,
# resume=True,
local_dir="/media/external_10TB/10TB/maheri/metaTL_ray/ray_local_dir",
name="metatl_rnn1",
name="bpr_rnn",
)

best_trial = result.get_best_trial("MRR", "max", "last")
@@ -116,23 +124,8 @@ def main(num_samples, gpus_per_trial=2):
print(result.results_df)
print("=======================================================\n")

# best_trained_model = Net(best_trial.config["l1"], best_trial.config["l2"])
# device = "cpu"
# if torch.cuda.is_available():
# device = "cuda:0"
# if gpus_per_trial > 1:
# best_trained_model = nn.DataParallel(best_trained_model)
# best_trained_model.to(device)
#
# best_checkpoint_dir = best_trial.checkpoint.value
# model_state, optimizer_state = torch.load(os.path.join(
# best_checkpoint_dir, "checkpoint"))
# best_trained_model.load_state_dict(model_state)
#
# test_acc = test_accuracy(best_trained_model, device)
# print("Best trial test set accuracy: {}".format(test_acc))


if __name__ == "__main__":
# You can change the number of GPUs per trial here:
main(num_samples=150, gpus_per_trial=1)
main(num_samples=150, gpus_per_trial=0.5)

+ 12
- 11
hyper_tunning.py View File

@@ -22,13 +22,16 @@ def train_metatl(conf,checkpoint_dir=None):
random.seed(SEED)

params = conf['params']
params['batch_size'] = conf['batch_size']
params['number_of_neg'] = conf['number_of_neg']

user_train, usernum_train, itemnum, user_input_test, user_test, user_input_valid, user_valid = data_load(params['dataset'], params['K'])
sampler = WarpSampler(user_train, usernum_train, itemnum, batch_size=params['batch_size'], maxlen=params['K'], n_workers=1)
sampler = WarpSampler(user_train, usernum_train, itemnum, batch_size=params['batch_size'], maxlen=params['K'], n_workers=1,params=params)
sampler_test = DataLoader(user_input_test, user_test, itemnum, params)
sampler_valid = DataLoader(user_input_valid, user_valid, itemnum, params)

ps = {
"batch_size" : conf["params"]['batch_size'],
"batch_size" : conf['batch_size'],
"learning_rate" : conf['learning_rate'],
"epoch" : conf["params"]['epoch'],
"beta" : conf['beta'],
@@ -36,11 +39,14 @@ def train_metatl(conf,checkpoint_dir=None):
"margin" : conf['margin'],
"K" : conf["params"]['K'],

"number_of_neg" : conf["number_of_neg"],
"loss_function" : conf["loss_function"],
"eval_epoch" : conf["eval_epoch"],
"device" : params['device']
}

trainer = Trainer([sampler, sampler_valid, sampler_test], conf["itemnum"], ps)

# trainer.train()
if checkpoint_dir:
print("===================== using checkpoint =====================")
model_state, optimizer_state = torch.load(
@@ -48,9 +54,8 @@ def train_metatl(conf,checkpoint_dir=None):
trainer.MetaTL.load_state_dict(model_state)
trainer.optimizer.load_state_dict(optimizer_state)

for epoch in range(int(ps['epoch']/1000)):

for e in range(1000):
for epoch in range(int(ps['epoch']/ps['eval_epoch'])):
for e in range(ps['eval_epoch']):
# sample one batch from data_loader
train_task, curr_rel = trainer.train_data_loader.next_batch()
loss, _, _ = trainer.do_one_step(train_task, iseval=False, curr_rel=curr_rel)
@@ -58,10 +63,6 @@ def train_metatl(conf,checkpoint_dir=None):
# do evaluation on specific epoch
valid_data = trainer.eval(istest=False, epoch=(-1))

# print('Epoch {} Testing...'.format(e))
# test_data = self.eval(istest=True, epoch=e)


with tune.checkpoint_dir(epoch) as checkpoint_dir:
path = os.path.join(checkpoint_dir, "checkpoint")
torch.save((trainer.MetaTL.state_dict(), trainer.optimizer.state_dict()), path)
@@ -69,6 +70,6 @@ def train_metatl(conf,checkpoint_dir=None):
tune.report(
MRR=valid_data["MRR"], NDCG10=valid_data['NDCG@10'], NDCG5=valid_data["NDCG@5"], NDCG1=valid_data["NDCG@1"],
Hits10=valid_data["Hits@10"], Hits5=valid_data["Hits@5"], Hits1=valid_data["Hits@1"],
training_iteration=epoch*1000
training_iteration=epoch*ps['eval_epoch']
)


+ 0
- 1
main.py View File

@@ -34,7 +34,6 @@ def get_params():

params['device'] = torch.device('cuda:'+str(args.device))
# params['device'] = torch.device('cpu')
print("gpu:",params['device'])

return params, args


+ 7
- 6
models.py View File

@@ -53,7 +53,7 @@ class EmbeddingLearner(nn.Module):

def bpr_loss(p_scores, n_values,device):
ratio = int(n_values.shape[1] / p_scores.shape[1])
temp_pvalues = torch.tensor([]).cuda(device=device)
temp_pvalues = torch.tensor([],device=device)
for i in range(p_scores.shape[1]):
temp_pvalues = torch.cat((temp_pvalues, p_scores[:, i, None].expand(-1, ratio)), dim=1)

@@ -66,7 +66,7 @@ def bpr_loss(p_scores, n_values,device):
def bpr_max_loss(p_scores, n_values,device):
s = F.softmax(n_values,dim=1)
ratio = int(n_values.shape[1] / p_scores.shape[1])
temp_pvalues = torch.tensor([]).cuda(device=device)
temp_pvalues = torch.tensor([],device=device)
for i in range(p_scores.shape[1]):
temp_pvalues = torch.cat((temp_pvalues,p_scores[:,i,None].expand(-1,ratio)),dim=1)

@@ -78,7 +78,7 @@ def bpr_max_loss(p_scores, n_values,device):

def top_loss(p_scores, n_values,device):
ratio = int(n_values.shape[1] / p_scores.shape[1])
temp_pvalues = torch.tensor([]).cuda(device=device)
temp_pvalues = torch.tensor([],device=device)
for i in range(p_scores.shape[1]):
temp_pvalues = torch.cat((temp_pvalues, p_scores[:, i, None].expand(-1, ratio)), dim=1)

@@ -94,7 +94,8 @@ def top_loss(p_scores, n_values,device):
class MetaTL(nn.Module):
def __init__(self, itemnum, parameter):
super(MetaTL, self).__init__()
self.device = torch.device(parameter['device'])
# self.device = torch.device(parameter['device'])
self.device = parameter['device']
self.beta = parameter['beta']
# self.dropout_p = parameter['dropout_p']
self.embed_dim = parameter['embed_dim']
@@ -106,7 +107,7 @@ class MetaTL(nn.Module):

self.embedding_learner = EmbeddingLearner()
# self.loss_func = nn.MarginRankingLoss(self.margin)
self.loss_func = top_loss
self.loss_func = bpr_loss

self.rel_q_sharing = dict()

@@ -138,7 +139,7 @@ class MetaTL(nn.Module):

p_score, n_score = self.embedding_learner(sup_neg_e1, sup_neg_e2, rel_s, K)

y = torch.Tensor([1]).to(self.device)
# y = torch.Tensor([1]).to(self.device)
self.zero_grad()

# sorted,indecies = torch.sort(n_score, descending=True,dim=1)

+ 6
- 5
sampler.py View File

@@ -21,7 +21,7 @@ def random_neq(l, r, s, user_train,usernum):
return candid_item


def sample_function_mixed(user_train, usernum, itemnum, batch_size, maxlen, result_queue, SEED):
def sample_function_mixed(user_train, usernum, itemnum, batch_size, maxlen, result_queue, SEED,number_of_neg):
def sample():

if random.random()<=1:
@@ -31,7 +31,7 @@ def sample_function_mixed(user_train, usernum, itemnum, batch_size, maxlen, resu
seq = np.zeros([maxlen], dtype=np.int32)
pos = np.zeros([maxlen], dtype=np.int32)
neg = np.zeros([maxlen*5], dtype=np.int32)
neg = np.zeros([maxlen*number_of_neg], dtype=np.int32)

if len(user_train[user]) < maxlen:
nxt_idx = len(user_train[user]) - 1
@@ -62,7 +62,7 @@ def sample_function_mixed(user_train, usernum, itemnum, batch_size, maxlen, resu

# for idx in range(maxlen*30 - 1):
# support_negative_triples.append([seq[-1], curr_rel, neg[idx]])
for j in range(5):
for j in range(number_of_neg):
for idx in range(maxlen-1):
support_negative_triples.append([seq[idx], curr_rel, neg[j*maxlen + idx]])

@@ -116,7 +116,7 @@ def sample_function_mixed(user_train, usernum, itemnum, batch_size, maxlen, resu
result_queue.put(([support, support_negative, query, negative], curr_rel))

class WarpSampler(object):
def __init__(self, User, usernum, itemnum, batch_size=64, maxlen=10, n_workers=1):
def __init__(self, User, usernum, itemnum, batch_size=64, maxlen=10, n_workers=1,params = None):
self.result_queue = Queue(maxsize=n_workers * 10)
self.processors = []
for i in range(n_workers):
@@ -127,7 +127,8 @@ class WarpSampler(object):
batch_size,
maxlen,
self.result_queue,
np.random.randint(2e9)
np.random.randint(2e9),
params['number_of_neg']
)))
self.processors[-1].daemon = True
self.processors[-1].start()

+ 11
- 6
trainer.py View File

@@ -20,14 +20,19 @@ class Trainer:
self.epoch = parameter['epoch']
# self.print_epoch = parameter['print_epoch']
# self.eval_epoch = parameter['eval_epoch']
self.eval_epoch = 1000
self.device = torch.device(parameter['device'])
# self.device = torch.device(parameter['device'])
self.device = parameter['device']

self.MetaTL = MetaTL(itemnum, parameter)
self.MetaTL.to(self.device)
self.MetaTL.to(parameter['device'])

self.optimizer = torch.optim.Adam(self.MetaTL.parameters(), self.learning_rate)

if parameter['eval_epoch']:
self.eval_epoch = parameter['eval_epoch']
else:
self.eval_epoch = 1000


def rank_predict(self, data, x, ranks):
# query_idx is the idx of positive score
@@ -77,7 +82,7 @@ class Trainer:

# do evaluation on specific epoch
if e % self.eval_epoch == 0 and e != 0:
loss_num = loss.item()
loss_num = loss.detach().item()
print("Epoch: {}\tLoss: {:.4f}".format(e, loss_num))

print('Epoch {} Validating...'.format(e))
@@ -134,14 +139,14 @@ class Trainer:

if istest:
print("TEST: \t test_loss: ",total_loss.item())
print("TEST: \t test_loss: ",total_loss.detach().item())
print("TEST: \tMRR: {:.3f}\tNDCG@10: {:.3f}\tNDCG@5: {:.3f}\tNDCG@1: {:.3f}\tHits@10: {:.3f}\tHits@5: {:.3f}\tHits@1: {:.3f}\r".format(
temp['MRR'], temp['NDCG@10'], temp['NDCG@5'], temp['NDCG@1'], temp['Hits@10'], temp['Hits@5'], temp['Hits@1']),"\n")
with open('results.txt', 'a') as f:
f.writelines("TEST: \tMRR: {:.3f}\tNDCG@10: {:.3f}\tNDCG@5: {:.3f}\tNDCG@1: {:.3f}\tHits@10: {:.3f}\tHits@5: {:.3f}\tHits@1: {:.3f}\r\n\n".format(
temp['MRR'], temp['NDCG@10'], temp['NDCG@5'], temp['NDCG@1'], temp['Hits@10'], temp['Hits@5'], temp['Hits@1']))
else:
print("VALID: \t validation_loss: ", total_loss.item())
print("VALID: \t validation_loss: ", total_loss.detach().item() )
print("VALID: \tMRR: {:.3f}\tNDCG@10: {:.3f}\tNDCG@5: {:.3f}\tNDCG@1: {:.3f}\tHits@10: {:.3f}\tHits@5: {:.3f}\tHits@1: {:.3f}\r".format(
temp['MRR'], temp['NDCG@10'], temp['NDCG@5'], temp['NDCG@1'], temp['Hits@10'], temp['Hits@5'], temp['Hits@1']))
with open("results.txt",'a') as f:

+ 7
- 3
utils.py View File

@@ -107,6 +107,11 @@ class DataLoader(object):
self.itemnum = itemnum

if parameter['number_of_neg']:
self.number_of_neg = parameter['number_of_neg']
else:
self.number_of_neg = 5


def next_one_on_eval(self):
if self.curr_tri_idx == self.num_tris:
@@ -118,8 +123,7 @@ class DataLoader(object):
seq = np.zeros([self.maxlen], dtype=np.int32)
pos = np.zeros([self.maxlen - 1], dtype=np.int32)
# neg = np.zeros([self.maxlen*30 - 1], dtype=np.int32)
neg = np.zeros([self.maxlen * 5], dtype=np.int32)
neg = np.zeros([self.maxlen * self.number_of_neg], dtype=np.int32)
idx = self.maxlen - 1

@@ -144,7 +148,7 @@ class DataLoader(object):

# for idx in range(len(neg)):
# support_negative_triples.append([seq[-1],curr_rel,neg[idx]])
for j in range(5):
for j in range(self.number_of_neg):
for idx in range(self.maxlen-1):
support_negative_triples.append([seq[idx], curr_rel, neg[j * self.maxlen + idx]])


Loading…
Cancel
Save