Browse Source

user clustering effects prediction network's weights

define_task
mohamad maheri 2 years ago
parent
commit
d9e027a938
5 changed files with 256 additions and 162 deletions
  1. 101
    0
      clustering.py
  2. 1
    3
      embedding_module.py
  3. 0
    1
      fast_adapt.py
  4. 108
    94
      learnToLearn.py
  5. 46
    64
      learnToLearnTest.py

+ 101
- 0
clustering.py View File

@@ -0,0 +1,101 @@
import torch.nn.init as init
import os
import torch
import pickle
from options import config
import gc
import torch.nn as nn
from torch.nn import functional as F


class ClustringModule(torch.nn.Module):
def __init__(self, config):
super(ClustringModule, self).__init__()
self.h1_dim = 64
self.h2_dim = 32
# self.final_dim = fc1_in_dim
self.final_dim = 32
self.dropout_rate = 0

layers = [nn.Linear(config['embedding_dim'] * 8, self.h1_dim),
torch.nn.Dropout(self.dropout_rate),
nn.ReLU(inplace=True),
nn.Linear(self.h1_dim, self.h2_dim),
torch.nn.Dropout(self.dropout_rate),
nn.ReLU(inplace=True),
nn.Linear(self.h2_dim, self.final_dim)]
self.input_to_hidden = nn.Sequential(*layers)

self.clusters_k = 7
self.embed_size = self.final_dim
self.array = nn.Parameter(init.xavier_uniform_(torch.FloatTensor(self.clusters_k, self.embed_size)))
self.temperature = 1.0

def aggregate(self, z_i):
return torch.mean(z_i, dim=0)

def forward(self, task_embed, training=True):
task_embed = self.input_to_hidden(task_embed)

# todo : may be useless
mean_task = self.aggregate(task_embed)

# C_distribution, new_task_embed = self.memoryunit(mean_task)
res = torch.norm(mean_task - self.array, p=2, dim=1, keepdim=True)
res = torch.pow((res / self.temperature) + 1, (self.temperature + 1) / -2)
# 1*k
C = torch.transpose(res / res.sum(), 0, 1)
# 1*k, k*d, 1*d
value = torch.mm(C, self.array)
# simple add operation
new_task_embed = value + mean_task
# calculate target distribution
return C, new_task_embed


class Trainer(torch.nn.Module):
def __init__(self, config, head=None):
super(Trainer, self).__init__()
fc1_in_dim = config['embedding_dim'] * 8
fc2_in_dim = config['first_fc_hidden_dim']
fc2_out_dim = config['second_fc_hidden_dim']
self.fc1 = torch.nn.Linear(fc1_in_dim, fc2_in_dim)
self.fc2 = torch.nn.Linear(fc2_in_dim, fc2_out_dim)
self.linear_out = torch.nn.Linear(fc2_out_dim, 1)
# cluster module
self.cluster_module = ClustringModule(config)
# self.task_dim = fc1_in_dim
self.task_dim = 32
# transform task to weights
self.film_layer_1_beta = nn.Linear(self.task_dim, fc2_in_dim, bias=False)
self.film_layer_1_gamma = nn.Linear(self.task_dim, fc2_in_dim, bias=False)
self.film_layer_2_beta = nn.Linear(self.task_dim, fc2_out_dim, bias=False)
self.film_layer_2_gamma = nn.Linear(self.task_dim, fc2_out_dim, bias=False)
# self.film_layer_3_beta = nn.Linear(self.task_dim, self.h3_dim, bias=False)
# self.film_layer_3_gamma = nn.Linear(self.task_dim, self.h3_dim, bias=False)
self.dropout_rate = 0
self.dropout = nn.Dropout(self.dropout_rate)

def aggregate(self, z_i):
return torch.mean(z_i, dim=0)

def forward(self, task_embed):
C, clustered_task_embed = self.cluster_module(task_embed)
# hidden layers
# todo : adding activation function or remove it
hidden_1 = self.fc1(task_embed)
beta_1 = torch.tanh(self.film_layer_1_beta(clustered_task_embed))
gamma_1 = torch.tanh(self.film_layer_1_gamma(clustered_task_embed))
hidden_1 = torch.mul(hidden_1, gamma_1) + beta_1
hidden_1 = self.dropout(hidden_1)
hidden_2 = F.relu(hidden_1)

hidden_2 = self.fc2(hidden_2)
beta_2 = torch.tanh(self.film_layer_2_beta(clustered_task_embed))
gamma_2 = torch.tanh(self.film_layer_2_gamma(clustered_task_embed))
hidden_2 = torch.mul(hidden_2, gamma_2) + beta_2
hidden_2 = self.dropout(hidden_2)
hidden_3 = F.relu(hidden_2)

y_pred = self.linear_out(hidden_3)
return y_pred

+ 1
- 3
embedding_module.py View File

@@ -29,6 +29,4 @@ class EmbeddingModule(torch.nn.Module):
item_emb = self.item_emb(rate_idx, genre_idx, director_idx, actor_idx)
user_emb = self.user_emb(gender_idx, age_idx, occupation_idx, area_idx)
x = torch.cat((item_emb, user_emb), 1)
return x


return x

+ 0
- 1
fast_adapt.py View File

@@ -1,7 +1,6 @@
import torch
import pickle


def fast_adapt(
learn,
adaptation_data,

+ 108
- 94
learnToLearn.py View File

@@ -11,6 +11,10 @@ from fast_adapt import fast_adapt
import gc
from learn2learn.optim.transforms import KroneckerTransform
import argparse
from clustering import ClustringModule, Trainer
import numpy as np
from torch.nn import functional as F


def parse_args():
print("==============")
@@ -46,8 +50,6 @@ def parse_args():
parser.add_argument('--epochs', type=int, default=config['num_epoch'],
help='number of gpu to run the code')



# parser.add_argument('--data_root', type=str, default="./movielens/ml-1m", help='path to data root')
# parser.add_argument('--num_workers', type=int, default=4, help='num of workers to use')
# parser.add_argument('--test', action='store_true', default=False, help='num of workers to use')
@@ -74,6 +76,7 @@ def parse_args():
# print('Running on device: {}'.format(args.device))
return args


if __name__ == '__main__':
args = parse_args()
print(args)
@@ -81,7 +84,8 @@ if __name__ == '__main__':
if config['use_cuda']:
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu)
master_path= "/media/external_10TB/10TB/maheri/melu_data5"
master_path = "/media/external_10TB/10TB/maheri/define_task_melu_data"
config['master_path'] = master_path

# DATA GENERATION
print("DATA GENERATION PHASE")
@@ -101,7 +105,7 @@ if __name__ == '__main__':
fc1 = torch.nn.Linear(fc1_in_dim, fc2_in_dim)
fc2 = torch.nn.Linear(fc2_in_dim, fc2_out_dim)
linear_out = torch.nn.Linear(fc2_out_dim, 1)
head = torch.nn.Sequential(fc1,fc2,linear_out)
head = torch.nn.Sequential(fc1, fc2, linear_out)

if use_cuda:
emb = EmbeddingModule(config).cuda()
@@ -118,20 +122,24 @@ if __name__ == '__main__':
elif args.transformer == "linear":
transform = l2l.optim.ModuleTransform(torch.nn.Linear)

trainer = Trainer(config)

# define meta algorithm
if args.meta_algo == "maml":
head = l2l.algorithms.MAML(head, lr=args.lr_inner,first_order=args.first_order)
trainer = l2l.algorithms.MAML(trainer, lr=args.lr_inner, first_order=args.first_order)
elif args.meta_algo == 'metasgd':
head = l2l.algorithms.MetaSGD(head, lr=args.lr_inner,first_order=args.first_order)
trainer = l2l.algorithms.MetaSGD(trainer, lr=args.lr_inner, first_order=args.first_order)
elif args.meta_algo == 'gbml':
head = l2l.algorithms.GBML(head, transform=transform, lr=args.lr_inner,adapt_transform=args.adapt_transform, first_order=args.first_order)
trainer = l2l.algorithms.GBML(trainer, transform=transform, lr=args.lr_inner,
adapt_transform=args.adapt_transform,
first_order=args.first_order)

if use_cuda:
head.cuda()
trainer.cuda()

# Setup optimization
print("SETUP OPTIMIZATION PHASE")
all_parameters = list(emb.parameters()) + list(head.parameters())
all_parameters = list(emb.parameters()) + list(trainer.parameters())
optimizer = torch.optim.Adam(all_parameters, lr=args.lr_meta)
# loss = torch.nn.MSELoss(reduction='mean')

@@ -142,85 +150,68 @@ if __name__ == '__main__':
supp_ys_s = []
query_xs_s = []
query_ys_s = []
for idx in range(training_set_size):
supp_xs_s.append(pickle.load(open("{}/warm_state/supp_x_{}.pkl".format(master_path, idx), "rb")))
supp_ys_s.append(pickle.load(open("{}/warm_state/supp_y_{}.pkl".format(master_path, idx), "rb")))
query_xs_s.append(pickle.load(open("{}/warm_state/query_x_{}.pkl".format(master_path, idx), "rb")))
query_ys_s.append(pickle.load(open("{}/warm_state/query_y_{}.pkl".format(master_path, idx), "rb")))
total_dataset = list(zip(supp_xs_s, supp_ys_s, query_xs_s, query_ys_s))
del(supp_xs_s, supp_ys_s, query_xs_s, query_ys_s)
training_set_size = len(total_dataset)

batch_size = config['batch_size']
# torch.cuda.empty_cache()

random.shuffle(total_dataset)
num_batch = int(training_set_size / batch_size)
a, b, c, d = zip(*total_dataset)

print("\n\n\n")

for iteration in range(args.epochs):

num_batch = int(training_set_size / batch_size)
indexes = list(np.arange(training_set_size))
random.shuffle(indexes)

for i in range(num_batch):
optimizer.zero_grad()
meta_train_error = 0.0
meta_train_accuracy = 0.0
meta_valid_error = 0.0
meta_valid_accuracy = 0.0
meta_test_error = 0.0
meta_test_accuracy = 0.0

optimizer.zero_grad()
print("EPOCH: ", iteration, " BATCH: ", i)
supp_xs = list(a[batch_size * i:batch_size * (i + 1)])
supp_ys = list(b[batch_size * i:batch_size * (i + 1)])
query_xs = list(c[batch_size * i:batch_size * (i + 1)])
query_ys = list(d[batch_size * i:batch_size * (i + 1)])
supp_xs, supp_ys, query_xs, query_ys = [], [], [], []
for idx in range(batch_size * i, batch_size * (i + 1)):
supp_xs.append(pickle.load(open("{}/warm_state/supp_x_{}.pkl".format(master_path, indexes[idx]), "rb")))
supp_ys.append(pickle.load(open("{}/warm_state/supp_y_{}.pkl".format(master_path, indexes[idx]), "rb")))
query_xs.append(
pickle.load(open("{}/warm_state/query_x_{}.pkl".format(master_path, indexes[idx]), "rb")))
query_ys.append(
pickle.load(open("{}/warm_state/query_y_{}.pkl".format(master_path, indexes[idx]), "rb")))
batch_sz = len(supp_xs)

# if use_cuda:
# for j in range(batch_size):
# supp_xs[j] = supp_xs[j].cuda()
# supp_ys[j] = supp_ys[j].cuda()
# query_xs[j] = query_xs[j].cuda()
# query_ys[j] = query_ys[j].cuda()
if use_cuda:
for j in range(batch_size):
supp_xs[j] = supp_xs[j].cuda()
supp_ys[j] = supp_ys[j].cuda()
query_xs[j] = query_xs[j].cuda()
query_ys[j] = query_ys[j].cuda()

for task in range(batch_sz):
# print("EPOCH: ", iteration," BATCH: ",i, "TASK: ",task)
# Compute meta-training loss
# if use_cuda:
sxs = supp_xs[task].cuda()
qxs = query_xs[task].cuda()
sys = supp_ys[task].cuda()
qys = query_ys[task].cuda()
# sxs = supp_xs[task].cuda()
# qxs = query_xs[task].cuda()
# sys = supp_ys[task].cuda()
# qys = query_ys[task].cuda()

learner = head.clone()
temp_sxs = emb(sxs)
temp_qxs = emb(qxs)
learner = trainer.clone()
temp_sxs = emb(supp_xs[task])
temp_qxs = emb(query_xs[task])

evaluation_error = fast_adapt(learner,
temp_sxs,
temp_qxs,
sys,
qys,
args.inner)
# config['inner'])
temp_sxs,
temp_qxs,
supp_ys[task],
query_ys[task],
args.inner)

evaluation_error.backward()
meta_train_error += evaluation_error.item()

del(sxs,qxs,sys,qys)
supp_xs[task].cpu()
query_xs[task].cpu()
supp_ys[task].cpu()
query_ys[task].cpu()

# supp_xs[task].cpu()
# query_xs[task].cpu()
# supp_ys[task].cpu()
# query_ys[task].cpu()

# Print some metrics
print('Iteration', iteration)
print('Meta Train Error', meta_train_error / batch_sz)
# print('Meta Train Accuracy', meta_train_accuracy / batch_sz)
# print('Meta Valid Error', meta_valid_error / batch_sz)
# print('Meta Valid Accuracy', meta_valid_accuracy / batch_sz)
# print('Meta Test Error', meta_test_error / batch_sz)
# print('Meta Test Accuracy', meta_test_accuracy / batch_sz)

# Average the accumulated gradients and optimize
for p in all_parameters:
@@ -228,40 +219,63 @@ if __name__ == '__main__':
optimizer.step()

# torch.cuda.empty_cache()
del(supp_xs,supp_ys,query_xs,query_ys)
del (supp_xs, supp_ys, query_xs, query_ys, learner, temp_sxs, temp_qxs)
gc.collect()
print("===============================================\n")

if iteration % 2 == 0:
# testing
print("start of test phase")
trainer.eval()

with open("results.txt", "a") as f:
f.write("epoch:{}\n".format(iteration))

for test_state in ['user_cold_state', 'item_cold_state', 'user_and_item_cold_state']:
test_dataset = None
test_set_size = int(len(os.listdir("{}/{}".format(master_path, test_state))) / 4)
supp_xs_s = []
supp_ys_s = []
query_xs_s = []
query_ys_s = []
gc.collect()

print("===================== " + test_state + " =====================")
mse, ndc1, ndc3 = test(emb, trainer, test_dataset, batch_size=config['batch_size'],num_epoch=config['num_epoch'],test_state=test_state)
with open("results.txt", "a") as f:
f.write("{}\t{}\t{}\n".format(mse, ndc1, ndc3))
print("===================================================")
del (test_dataset)
gc.collect()

trainer.train()
with open("results.txt", "a") as f:
f.write("\n")
print("\n\n\n")

# save model
final_model = torch.nn.Sequential(emb,head)
torch.save(final_model.state_dict(), master_path + "/models_gbml.pkl")
# final_model = torch.nn.Sequential(emb, head)
# torch.save(final_model.state_dict(), master_path + "/models_gbml.pkl")

# testing
print("start of test phase")
for test_state in ['warm_state', 'user_cold_state', 'item_cold_state', 'user_and_item_cold_state']:
test_dataset = None
test_set_size = int(len(os.listdir("{}/{}".format(master_path, test_state))) / 4)
supp_xs_s = []
supp_ys_s = []
query_xs_s = []
query_ys_s = []
for idx in range(test_set_size):
supp_xs_s.append(pickle.load(open("{}/{}/supp_x_{}.pkl".format(master_path, test_state, idx), "rb")))
supp_ys_s.append(pickle.load(open("{}/{}/supp_y_{}.pkl".format(master_path, test_state, idx), "rb")))
query_xs_s.append(pickle.load(open("{}/{}/query_x_{}.pkl".format(master_path, test_state, idx), "rb")))
query_ys_s.append(pickle.load(open("{}/{}/query_y_{}.pkl".format(master_path, test_state, idx), "rb")))
test_dataset = list(zip(supp_xs_s, supp_ys_s, query_xs_s, query_ys_s))
del (supp_xs_s, supp_ys_s, query_xs_s, query_ys_s)

print("===================== " + test_state + " =====================")
test(emb,head, test_dataset, batch_size=config['batch_size'], num_epoch=args.epochs,adaptation_step=args.inner_eval)
print("===================================================\n\n\n")
print(args)







# print("start of test phase")
# for test_state in ['warm_state', 'user_cold_state', 'item_cold_state', 'user_and_item_cold_state']:
# test_dataset = None
# test_set_size = int(len(os.listdir("{}/{}".format(master_path, test_state))) / 4)
# supp_xs_s = []
# supp_ys_s = []
# query_xs_s = []
# query_ys_s = []
# for idx in range(test_set_size):
# supp_xs_s.append(pickle.load(open("{}/{}/supp_x_{}.pkl".format(master_path, test_state, idx), "rb")))
# supp_ys_s.append(pickle.load(open("{}/{}/supp_y_{}.pkl".format(master_path, test_state, idx), "rb")))
# query_xs_s.append(pickle.load(open("{}/{}/query_x_{}.pkl".format(master_path, test_state, idx), "rb")))
# query_ys_s.append(pickle.load(open("{}/{}/query_y_{}.pkl".format(master_path, test_state, idx), "rb")))
# test_dataset = list(zip(supp_xs_s, supp_ys_s, query_xs_s, query_ys_s))
# del (supp_xs_s, supp_ys_s, query_xs_s, query_ys_s)
#
# print("===================== " + test_state + " =====================")
# test(emb, head, test_dataset, batch_size=config['batch_size'], num_epoch=args.epochs,
# adaptation_step=args.inner_eval)
# print("===================================================\n\n\n")
# print(args)

+ 46
- 64
learnToLearnTest.py View File

@@ -5,58 +5,50 @@ import random
from options import config, states
from torch.nn import functional as F
from torch.nn import L1Loss
import matchzoo as mz
# import matchzoo as mz
import numpy as np
from fast_adapt import fast_adapt
from sklearn.metrics import ndcg_score
import gc


def test(embedding,head, total_dataset, batch_size, num_epoch,adaptation_step=config['inner']):

test_set_size = len(total_dataset)
random.shuffle(total_dataset)
a, b, c, d = zip(*total_dataset)
def test(embedding, head, total_dataset, batch_size, num_epoch, test_state=None):
losses_q = []
# ndcgs1 = []
ndcgs11 = []
# ndcgs111 = []
# ndcgs3 = []
ndcgs33=[]
# ndcgs333 = []

for iterator in range(test_set_size):
if config['use_cuda']:
try:
supp_xs = a[iterator].cuda()
supp_ys = b[iterator].cuda()
query_xs = c[iterator].cuda()
query_ys = d[iterator].cuda()
except IndexError:
print("index error in test method")
continue
else:
try:
supp_xs = a[iterator]
supp_ys = b[iterator]
query_xs = c[iterator]
query_ys = d[iterator]
except IndexError:
print("index error in test method")
continue

num_local_update = adaptation_step
ndcgs1 = []
ndcgs3 = []
master_path = config['master_path']
test_set_size = int(len(os.listdir("{}/{}".format(master_path, test_state))) / 4)
indexes = list(np.arange(test_set_size))
random.shuffle(indexes)

for iterator in indexes:
a = pickle.load(open("{}/{}/supp_x_{}.pkl".format(master_path, test_state, iterator), "rb"))
b = pickle.load(open("{}/{}/supp_y_{}.pkl".format(master_path, test_state, iterator), "rb"))
c = pickle.load(open("{}/{}/query_x_{}.pkl".format(master_path, test_state, iterator), "rb"))
d = pickle.load(open("{}/{}/query_y_{}.pkl".format(master_path, test_state, iterator), "rb"))

try:
supp_xs = a.cuda()
supp_ys = b.cuda()
query_xs = c.cuda()
query_ys = d.cuda()
except IndexError:
print("index error in test method")
continue

num_local_update = config['inner']
learner = head.clone()
temp_sxs = embedding(supp_xs)
temp_qxs = embedding(query_xs)

evaluation_error,predictions = fast_adapt(learner,
temp_sxs,
temp_qxs,
supp_ys,
query_ys,
# config['inner'],
adaptation_step,
get_predictions=True)
evaluation_error, predictions = fast_adapt(learner,
temp_sxs,
temp_qxs,
supp_ys,
query_ys,
# config['inner'],
config['inner'],
get_predictions=True)

l1 = L1Loss(reduction='mean')
loss_q = l1(predictions.view(-1), query_ys)
@@ -66,32 +58,22 @@ def test(embedding,head, total_dataset, batch_size, num_epoch,adaptation_step=co
predictions = predictions.view(-1)
y_true = query_ys.cpu().detach().numpy()
y_pred = predictions.cpu().detach().numpy()
ndcgs1.append(float(ndcg_score([y_true], [y_pred], k=1, sample_weight=None, ignore_ties=False)))
ndcgs3.append(float(ndcg_score([y_true], [y_pred], k=3, sample_weight=None, ignore_ties=False)))

# ndcgs1.append(float(mz.metrics.NormalizedDiscountedCumulativeGain(k=1)(y_true, y_pred)))
# ndcgs3.append(float(mz.metrics.NormalizedDiscountedCumulativeGain(k=3)(y_true, y_pred)))

ndcgs11.append(float(ndcg_score([y_true], [y_pred], k=1, sample_weight=None, ignore_ties=False)))
ndcgs33.append(float(ndcg_score([y_true], [y_pred], k=3, sample_weight=None, ignore_ties=False)))

del supp_xs, supp_ys, query_xs, query_ys, predictions, y_true, y_pred, loss_q
# torch.cuda.empty_cache()

del supp_xs, supp_ys, query_xs, query_ys, y_true, y_pred, loss_q, temp_sxs, temp_qxs, predictions, l1
torch.cuda.empty_cache()

# calculate metrics
# losses_q = torch.stack(losses_q).mean(0)
losses_q = np.array(losses_q).mean()
print("mean of mse: ", losses_q)
# n1 = np.array(ndcgs1).mean()
# print("nDCG1: ", n1)
print("nDCG1: ", np.array(ndcgs11).mean())
# print("nDCG1: ", np.array(ndcgs111).mean())
# n3 = np.array(ndcgs3).mean()
# print("nDCG3: ", n3)
print("nDCG3: ", np.array(ndcgs33).mean())
# print("nDCG3: ", np.array(ndcgs333).mean())

print("is there nan? " + str(np.any(np.isnan(ndcgs11))))
print("is there nan? " + str(np.any(np.isnan(ndcgs33))))
print("is there nan? " + str(np.any(np.isnan(losses_q))))
# print("======================================")
n1 = np.array(ndcgs1).mean()
print("nDCG1: ", n1)
n3 = np.array(ndcgs3).mean()
print("nDCG3: ", n3)

del a, b, c, d, total_dataset
gc.collect()

return losses_q, n1, n3

Loading…
Cancel
Save