Browse Source

check result of hyper tuning (best_result1)

define_task
mohamad maheri 2 years ago
parent
commit
07bb1cd89a
3 changed files with 128 additions and 224 deletions
  1. 67
    42
      fast_adapt.py
  2. 60
    181
      learnToLearn.py
  3. 1
    1
      learnToLearnTest.py

+ 67
- 42
fast_adapt.py View File

@@ -1,20 +1,65 @@
# import torch
# import pickle
# from options import config
# import random
#
#
# def cl_loss(c):
# alpha = config['alpha']
# beta = config['beta']
# d = config['d']
# a = torch.div(1,
# torch.add(1, torch.exp(torch.mul(-1, torch.mul(alpha, torch.sub(torch.mul(d, c.squeeze()), beta))))))
# # a = 1 / (1 + torch.exp((-1) * alpha * (d * c - beta)))
# b = torch.mul(a, torch.mul(torch.sub(1, a), torch.sub(1, torch.mul(2, a))))
# # b = 1 * a * (1 - a) * (1 - 2 * a)
# loss = torch.sum(b)
# return loss
#
#
# def fast_adapt(
# learn,
# adaptation_data,
# evaluation_data,
# adaptation_labels,
# evaluation_labels,
# adaptation_steps,
# get_predictions=False,
# epoch=None):
# is_print = random.random() < 0.05
#
# for step in range(adaptation_steps):
# temp, c, k_loss = learn(adaptation_data, adaptation_labels, training=True)
# train_error = torch.nn.functional.mse_loss(temp.view(-1), adaptation_labels)
# # cluster_loss = cl_loss(c)
# # total_loss = train_error + config['cluster_loss_weight'] * cluster_loss
# total_loss = train_error + config['kmeans_loss_weight'] * k_loss
# learn.adapt(total_loss)
# if is_print:
# # print("in support:\t", round(k_loss.item(),4))
# pass
#
# predictions, c, k_loss = learn(evaluation_data, None, training=False, adaptation_data=adaptation_data,
# adaptation_labels=adaptation_labels)
# valid_error = torch.nn.functional.mse_loss(predictions.view(-1), evaluation_labels)
# # cluster_loss = cl_loss(c)
# # total_loss = valid_error + config['cluster_loss_weight'] * cluster_loss
# total_loss = valid_error + config['kmeans_loss_weight'] * k_loss
#
# if is_print:
#
# # print("in query:\t", round(k_loss.item(),4))
# print(c[0].detach().cpu().numpy(),"\t",round(k_loss.item(),3),"\n")
#
# # if random.random() < 0.05:
# # print("cl:", round(cluster_loss.item()), "\t c:", c[0].cpu().data.numpy())
#
# if get_predictions:
# return total_loss, predictions
# return total_loss, c, k_loss.item()

import torch
import pickle
from options import config
import random


def cl_loss(c):
alpha = config['alpha']
beta = config['beta']
d = config['d']
a = torch.div(1,
torch.add(1, torch.exp(torch.mul(-1, torch.mul(alpha, torch.sub(torch.mul(d, c.squeeze()), beta))))))
# a = 1 / (1 + torch.exp((-1) * alpha * (d * c - beta)))
b = torch.mul(a, torch.mul(torch.sub(1, a), torch.sub(1, torch.mul(2, a))))
# b = 1 * a * (1 - a) * (1 - 2 * a)
loss = torch.sum(b)
return loss


def fast_adapt(
@@ -24,36 +69,16 @@ def fast_adapt(
adaptation_labels,
evaluation_labels,
adaptation_steps,
get_predictions=False,
epoch=None):
is_print = random.random() < 0.05

get_predictions=False):
for step in range(adaptation_steps):
temp, c, k_loss = learn(adaptation_data, adaptation_labels, training=True)
temp = learn(adaptation_data, adaptation_labels, training=True)
train_error = torch.nn.functional.mse_loss(temp.view(-1), adaptation_labels)
# cluster_loss = cl_loss(c)
# total_loss = train_error + config['cluster_loss_weight'] * cluster_loss
total_loss = train_error + config['kmeans_loss_weight'] * k_loss
learn.adapt(total_loss)
if is_print:
# print("in support:\t", round(k_loss.item(),4))
pass
learn.adapt(train_error)

predictions, c, k_loss = learn(evaluation_data, None, training=False, adaptation_data=adaptation_data,
adaptation_labels=adaptation_labels)
predictions = learn(evaluation_data, None, training=False, adaptation_data=adaptation_data,
adaptation_labels=adaptation_labels)
valid_error = torch.nn.functional.mse_loss(predictions.view(-1), evaluation_labels)
# cluster_loss = cl_loss(c)
# total_loss = valid_error + config['cluster_loss_weight'] * cluster_loss
total_loss = valid_error + config['kmeans_loss_weight'] * k_loss

if is_print:

# print("in query:\t", round(k_loss.item(),4))
print(c[0].detach().cpu().numpy(),"\t",round(k_loss.item(),3),"\n")

# if random.random() < 0.05:
# print("cl:", round(cluster_loss.item()), "\t c:", c[0].cpu().data.numpy())

if get_predictions:
return total_loss, predictions
return total_loss, c, k_loss.item()
return valid_error, predictions
return valid_error

+ 60
- 181
learnToLearn.py View File

@@ -16,37 +16,6 @@ import numpy as np
from torch.nn import functional as F


def data_batching(indexes, C_distribs, batch_size, training_set_size, num_clusters):
probs = np.squeeze(C_distribs)
cs = [np.random.choice(num_clusters, p=i) for i in probs]
num_batch = int(training_set_size / batch_size)
res = [[] for i in range(num_batch)]
clas = [[] for i in range(num_clusters)]

for idx, c in zip(indexes, cs):
clas[c].append(idx)

t = np.array([len(i) for i in clas])
t = t / t.sum()

dif = list(set(list(np.arange(training_set_size))) - set(indexes[0:(num_batch * batch_size)]))
cnt = 0

for i in range(len(res)):
for j in range(batch_size):
temp = np.random.choice(num_clusters, p=t)
if len(clas[temp]) > 0:
res[i].append(clas[temp].pop(0))
else:
# res[i].append(indexes[training_set_size-1-cnt])
res[i].append(random.choice(dif))
cnt = cnt + 1

res = np.random.permutation(res)
final_result = np.array(res).flatten()
return final_result


def parse_args():
print("==============")
parser = argparse.ArgumentParser([], description='Fast Context Adaptation via Meta-Learning (CAVIA),'
@@ -54,19 +23,19 @@ def parse_args():
print("==============\n")

parser.add_argument('--seed', type=int, default=53)
parser.add_argument('--task', type=str, default='multi', help='problem setting: sine or celeba')
parser.add_argument('--tasks_per_metaupdate', type=int, default=32,
help='number of tasks in each batch per meta-update')
parser.add_argument('--lr_inner', type=float, default=5e-6, help='inner-loop learning rate (per task)')
parser.add_argument('--lr_meta', type=float, default=5e-5,
help='outer-loop learning rate (used with Adam optimiser)')
# parser.add_argument('--task', type=str, default='multi', help='problem setting: sine or celeba')
# parser.add_argument('--tasks_per_metaupdate', type=int, default=32,
# help='number of tasks in each batch per meta-update')
#
# parser.add_argument('--lr_inner', type=float, default=5e-6, help='inner-loop learning rate (per task)')
# parser.add_argument('--lr_meta', type=float, default=5e-5,
# help='outer-loop learning rate (used with Adam optimiser)')
# parser.add_argument('--lr_meta_decay', type=float, default=0.9, help='decay factor for meta learning rate')
parser.add_argument('--inner', type=int, default=1,
help='number of gradient steps in inner loop (during training)')
parser.add_argument('--inner_eval', type=int, default=1,
help='number of gradient updates at test time (for evaluation)')
#
# parser.add_argument('--inner', type=int, default=1,
# help='number of gradient steps in inner loop (during training)')
# parser.add_argument('--inner_eval', type=int, default=1,
# help='number of gradient updates at test time (for evaluation)')

parser.add_argument('--first_order', action='store_true', default=False,
help='run first order approximation of CAVIA')
@@ -74,7 +43,7 @@ def parse_args():
help='run adaptation transform')
parser.add_argument('--transformer', type=str, default="kronoker",
help='transformer type')
parser.add_argument('--meta_algo', type=str, default="gbml",
parser.add_argument('--meta_algo', type=str, default="metasgd",
help='MAML/MetaSGD/GBML')
parser.add_argument('--gpu', type=int, default=0,
help='number of gpu to run the code')
@@ -108,36 +77,6 @@ def parse_args():
return args


from torch.nn import functional as F


def kl_loss(C_distribs):
# batchsize * k
C_distribs = torch.stack(C_distribs).squeeze()

# print("injam:",len(C_distribs))
# print(C_distribs[0].shape)
# batchsize * k
# print("injam2",C_distribs)
C_distribs_sq = torch.pow(C_distribs, 2)
# print("injam3",C_distribs_sq)
# 1*k
C_distribs_sum = torch.sum(C_distribs, dim=0, keepdim=True)
# print("injam4",C_distribs_sum)
# batchsize * k
temp = C_distribs_sq / C_distribs_sum
# print("injam5",temp)
# batchsize * 1
temp_sum = torch.sum(temp, dim=1, keepdim=True)
# print("injam6",temp_sum)
target_distribs = temp / temp_sum
# print("injam7",target_distribs)
# calculate the kl loss
clustering_loss = F.kl_div(C_distribs.log(), target_distribs, reduction='batchmean')
# print("injam8",clustering_loss)
return clustering_loss


if __name__ == '__main__':
args = parse_args()
print(args)
@@ -145,7 +84,7 @@ if __name__ == '__main__':
if config['use_cuda']:
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu)
master_path = "/media/external_10TB/10TB/maheri/define_task_melu_data"
master_path = "/media/external_10TB/10TB/maheri/new_data_dir3"
config['master_path'] = master_path

# DATA GENERATION
@@ -163,10 +102,10 @@ if __name__ == '__main__':
fc2_out_dim = config['second_fc_hidden_dim']
use_cuda = config['use_cuda']

# fc1 = torch.nn.Linear(fc1_in_dim, fc2_in_dim)
# fc2 = torch.nn.Linear(fc2_in_dim, fc2_out_dim)
# linear_out = torch.nn.Linear(fc2_out_dim, 1)
# head = torch.nn.Sequential(fc1, fc2, linear_out)
fc1 = torch.nn.Linear(fc1_in_dim, fc2_in_dim)
fc2 = torch.nn.Linear(fc2_in_dim, fc2_out_dim)
linear_out = torch.nn.Linear(fc2_out_dim, 1)
head = torch.nn.Sequential(fc1, fc2, linear_out)

if use_cuda:
emb = EmbeddingModule(config).cuda()
@@ -184,15 +123,14 @@ if __name__ == '__main__':
transform = l2l.optim.ModuleTransform(torch.nn.Linear)

trainer = Trainer(config)
tr = trainer

# define meta algorithm
if args.meta_algo == "maml":
trainer = l2l.algorithms.MAML(trainer, lr=args.lr_inner, first_order=args.first_order)
elif args.meta_algo == 'metasgd':
trainer = l2l.algorithms.MetaSGD(trainer, lr=args.lr_inner, first_order=args.first_order)
trainer = l2l.algorithms.MetaSGD(trainer, lr=config['local_lr'], first_order=args.first_order)
elif args.meta_algo == 'gbml':
trainer = l2l.algorithms.GBML(trainer, transform=transform, lr=args.lr_inner,
trainer = l2l.algorithms.GBML(trainer, transform=transform, lr=config['local_lr'],
adapt_transform=args.adapt_transform,
first_order=args.first_order)

@@ -218,60 +156,14 @@ if __name__ == '__main__':

print("\n\n\n")

for iteration in range(config['num_epoch']):

if iteration == 0:
print("changing cluster centroids started ...")
indexes = list(np.arange(training_set_size))
supp_xs, supp_ys, query_xs, query_ys = [], [], [], []
for idx in range(0, 2500):
supp_xs.append(pickle.load(open("{}/warm_state/supp_x_{}.pkl".format(master_path, indexes[idx]), "rb")))
supp_ys.append(pickle.load(open("{}/warm_state/supp_y_{}.pkl".format(master_path, indexes[idx]), "rb")))
query_xs.append(
pickle.load(open("{}/warm_state/query_x_{}.pkl".format(master_path, indexes[idx]), "rb")))
query_ys.append(
pickle.load(open("{}/warm_state/query_y_{}.pkl".format(master_path, indexes[idx]), "rb")))
batch_sz = len(supp_xs)

user_embeddings = []

for task in range(batch_sz):
# Compute meta-training loss
supp_xs[task] = supp_xs[task].cuda()
supp_ys[task] = supp_ys[task].cuda()
# query_xs[task] = query_xs[task].cuda()
# query_ys[task] = query_ys[task].cuda()
temp_sxs = emb(supp_xs[task])
# temp_qxs = emb(query_xs[task])
y = supp_ys[task].view(-1, 1)
input_pairs = torch.cat((temp_sxs, y), dim=1)
task_embed = tr.cluster_module.input_to_hidden(input_pairs)

# todo : may be useless
mean_task = tr.cluster_module.aggregate(task_embed)
user_embeddings.append(mean_task.detach().cpu().numpy())

supp_xs[task] = supp_xs[task].cpu()
supp_ys[task] = supp_ys[task].cpu()
for iteration in range(args.epochs):

from sklearn.cluster import KMeans

user_embeddings = np.array(user_embeddings)
kmeans_model = KMeans(n_clusters=config['cluster_k'], init="k-means++").fit(user_embeddings)
tr.cluster_module.array.data = torch.Tensor(kmeans_model.cluster_centers_).cuda()

if iteration > 0:
# indexes = data_batching(indexes, C_distribs, batch_size, training_set_size, config['cluster_k'])
# random.shuffle(indexes)
C_distribs = []
else:
num_batch = int(training_set_size / batch_size)
indexes = list(np.arange(training_set_size))
random.shuffle(indexes)
num_batch = int(training_set_size / batch_size)
indexes = list(np.arange(training_set_size))
random.shuffle(indexes)

for i in range(num_batch):
meta_train_error = 0.0
meta_cluster_error = 0.0
optimizer.zero_grad()
print("EPOCH: ", iteration, " BATCH: ", i)
supp_xs, supp_ys, query_xs, query_ys = [], [], [], []
@@ -291,7 +183,6 @@ if __name__ == '__main__':
query_xs[j] = query_xs[j].cuda()
query_ys[j] = query_ys[j].cuda()

C_distribs = []
for task in range(batch_sz):
# Compute meta-training loss
# sxs = supp_xs[task].cuda()
@@ -303,18 +194,15 @@ if __name__ == '__main__':
temp_sxs = emb(supp_xs[task])
temp_qxs = emb(query_xs[task])

evaluation_error, c, k_loss = fast_adapt(learner,
temp_sxs,
temp_qxs,
supp_ys[task],
query_ys[task],
config['inner'],
epoch=iteration)
evaluation_error = fast_adapt(learner,
temp_sxs,
temp_qxs,
supp_ys[task],
query_ys[task],
config['inner'])

# C_distribs.append(c)
evaluation_error.backward(retain_graph=True)
evaluation_error.backward()
meta_train_error += evaluation_error.item()
meta_cluster_error += k_loss

# supp_xs[task].cpu()
# query_xs[task].cpu()
@@ -324,14 +212,6 @@ if __name__ == '__main__':
# Print some metrics
print('Iteration', iteration)
print('Meta Train Error', meta_train_error / batch_sz)
print('KL Train Error', meta_cluster_error / batch_sz)

# clustering_loss = config['kl_loss_weight'] * kl_loss(C_distribs)
# clustering_loss.backward()
# print("kl_loss:", round(clustering_loss.item(), 8), "\t", C_distribs[0].cpu().detach().numpy())

# if i != (num_batch - 1):
# C_distribs = []

# Average the accumulated gradients and optimize
for p in all_parameters:
@@ -343,36 +223,35 @@ if __name__ == '__main__':
gc.collect()
print("===============================================\n")

# if iteration % 2 == 0 and iteration != 0:
# # testing
# print("start of test phase")
# trainer.eval()
#
# with open("results2.txt", "a") as f:
# f.write("epoch:{}\n".format(iteration))
#
# for test_state in ['user_cold_state', 'item_cold_state', 'user_and_item_cold_state']:
# test_dataset = None
# test_set_size = int(len(os.listdir("{}/{}".format(master_path, test_state))) / 4)
# supp_xs_s = []
# supp_ys_s = []
# query_xs_s = []
# query_ys_s = []
# gc.collect()
#
# print("===================== " + test_state + " =====================")
# mse, ndc1, ndc3 = test(emb, trainer, test_dataset, batch_size=config['batch_size'],
# num_epoch=config['num_epoch'], test_state=test_state, args=args)
# with open("results2.txt", "a") as f:
# f.write("{}\t{}\t{}\n".format(mse, ndc1, ndc3))
# print("===================================================")
# del (test_dataset)
# gc.collect()
#
# trainer.train()
# with open("results2.txt", "a") as f:
# f.write("\n")
# print("\n\n\n")
if iteration % 2 == 0 or iteration>0:
# testing
print("start of test phase")
trainer.eval()

with open("results2.txt", "a") as f:
f.write("epoch:{}\n".format(iteration))

for test_state in ['user_cold_state', 'item_cold_state', 'user_and_item_cold_state']:
test_dataset = None
test_set_size = int(len(os.listdir("{}/{}".format(master_path, test_state))) / 4)
supp_xs_s = []
supp_ys_s = []
query_xs_s = []
query_ys_s = []
gc.collect()

print("===================== " + test_state + " =====================")
mse, ndc1, ndc3 = test(emb, trainer, test_dataset, batch_size=config['batch_size'],num_epoch=config['num_epoch'],test_state=test_state,args=args)
with open("results2.txt", "a") as f:
f.write("{}\t{}\t{}\n".format(mse, ndc1, ndc3))
print("===================================================")
del (test_dataset)
gc.collect()

trainer.train()
with open("results2.txt", "a") as f:
f.write("\n")
print("\n\n\n")

# save model
# final_model = torch.nn.Sequential(emb, head)

+ 1
- 1
learnToLearnTest.py View File

@@ -48,7 +48,7 @@ def test(embedding, head, total_dataset, batch_size, num_epoch, test_state=None,
config['inner'],
# args.inner_eval,
get_predictions=True,
epoch=0)
)

l1 = L1Loss(reduction='mean')
loss_q = l1(predictions.view(-1), query_ys)

Loading…
Cancel
Save