import torch | |||||
import numpy as np | |||||
from random import randint | |||||
from copy import deepcopy | |||||
from torch.autograd import Variable | |||||
from torch.nn import functional as F | |||||
from collections import OrderedDict | |||||
from embeddings_TaNP import Item, User, Encoder, MuSigmaEncoder, Decoder, Gating_Decoder, TaskEncoder, MemoryUnit,Movie_item,Movie_user | |||||
import torch.nn as nn | |||||
class NP(nn.Module): | |||||
def __init__(self, config): | |||||
super(NP, self).__init__() | |||||
# change for Movie lens | |||||
# self.x_dim = config['second_embedding_dim'] * 2 | |||||
self.x_dim = 32 * 8 | |||||
# use one-hot or not? | |||||
self.y_dim = 1 | |||||
self.z1_dim = config['z1_dim'] | |||||
self.z2_dim = config['z2_dim'] | |||||
# z is the dimension size of mu and sigma. | |||||
self.z_dim = config['z_dim'] | |||||
# the dimension size of rc. | |||||
self.enc_h1_dim = config['enc_h1_dim'] | |||||
self.enc_h2_dim = config['enc_h2_dim'] | |||||
self.dec_h1_dim = config['dec_h1_dim'] | |||||
self.dec_h2_dim = config['dec_h2_dim'] | |||||
self.dec_h3_dim = config['dec_h3_dim'] | |||||
self.taskenc_h1_dim = config['taskenc_h1_dim'] | |||||
self.taskenc_h2_dim = config['taskenc_h2_dim'] | |||||
self.taskenc_final_dim = config['taskenc_final_dim'] | |||||
self.clusters_k = config['clusters_k'] | |||||
self.temperture = config['temperature'] | |||||
self.dropout_rate = config['dropout_rate'] | |||||
# Initialize networks | |||||
# change for Movie Lens | |||||
# self.item_emb = Item(config) | |||||
# self.user_emb = User(config) | |||||
self.item_emb = Movie_item(config) | |||||
self.user_emb = Movie_user(config) | |||||
# This encoder is used to generated z actually, it is a latent encoder in ANP. | |||||
self.xy_to_z = Encoder(self.x_dim, self.y_dim, self.enc_h1_dim, self.enc_h2_dim, self.z1_dim, self.dropout_rate) | |||||
self.z_to_mu_sigma = MuSigmaEncoder(self.z1_dim, self.z2_dim, self.z_dim) | |||||
# This encoder is used to generated r actually, it is a deterministic encoder in ANP. | |||||
self.xy_to_task = TaskEncoder(self.x_dim, self.y_dim, self.taskenc_h1_dim, self.taskenc_h2_dim, self.taskenc_final_dim, | |||||
self.dropout_rate) | |||||
self.memoryunit = MemoryUnit(self.clusters_k, self.taskenc_final_dim, self.temperture) | |||||
#self.xz_to_y = Gating_Decoder(self.x_dim, self.z_dim, self.taskenc_final_dim, self.dec_h1_dim, self.dec_h2_dim, self.dec_h3_dim, self.y_dim, self.dropout_rate) | |||||
self.xz_to_y = Decoder(self.x_dim, self.z_dim, self.taskenc_final_dim, self.dec_h1_dim, self.dec_h2_dim, self.dec_h3_dim, self.y_dim, self.dropout_rate) | |||||
def aggregate(self, z_i): | |||||
return torch.mean(z_i, dim=0) | |||||
def xy_to_mu_sigma(self, x, y): | |||||
# Encode each point into a representation r_i | |||||
z_i = self.xy_to_z(x, y) | |||||
# Aggregate representations r_i into a single representation r | |||||
z = self.aggregate(z_i) | |||||
# Return parameters of distribution | |||||
return self.z_to_mu_sigma(z) | |||||
# embedding each (item, user) as the x for np | |||||
def embedding(self, x): | |||||
# change for Movie lens | |||||
rate_idx = Variable(x[:, 0], requires_grad=False) | |||||
genre_idx = Variable(x[:, 1:26], requires_grad=False) | |||||
director_idx = Variable(x[:, 26:2212], requires_grad=False) | |||||
actor_idx = Variable(x[:, 2212:10242], requires_grad=False) | |||||
gender_idx = Variable(x[:, 10242], requires_grad=False) | |||||
age_idx = Variable(x[:, 10243], requires_grad=False) | |||||
occupation_idx = Variable(x[:, 10244], requires_grad=False) | |||||
area_idx = Variable(x[:, 10245], requires_grad=False) | |||||
item_emb = self.item_emb(rate_idx, genre_idx, director_idx, actor_idx) | |||||
user_emb = self.user_emb(gender_idx, age_idx, occupation_idx, area_idx) | |||||
x = torch.cat((item_emb, user_emb), 1) | |||||
return x | |||||
def forward(self, x_context, y_context, x_target, y_target): | |||||
# change for Movie lens | |||||
x_context_embed = self.embedding(x_context) | |||||
x_target_embed = self.embedding(x_target) | |||||
if self.training: | |||||
# sigma is log_sigma actually | |||||
mu_target, sigma_target, z_target = self.xy_to_mu_sigma(x_target_embed, y_target) | |||||
mu_context, sigma_context, z_context = self.xy_to_mu_sigma(x_context_embed, y_context) | |||||
task = self.xy_to_task(x_context_embed, y_context) | |||||
mean_task = self.aggregate(task) | |||||
C_distribution, new_task_embed = self.memoryunit(mean_task) | |||||
p_y_pred = self.xz_to_y(x_target_embed, z_target, new_task_embed) | |||||
return p_y_pred, mu_target, sigma_target, mu_context, sigma_context, C_distribution | |||||
else: | |||||
mu_context, sigma_context, z_context = self.xy_to_mu_sigma(x_context_embed, y_context) | |||||
task = self.xy_to_task(x_context_embed, y_context) | |||||
mean_task = self.aggregate(task) | |||||
C_distribution, new_task_embed = self.memoryunit(mean_task) | |||||
p_y_pred = self.xz_to_y(x_target_embed, z_context, new_task_embed) | |||||
return p_y_pred | |||||
class Trainer(torch.nn.Module): | |||||
def __init__(self, config): | |||||
self.opt = config | |||||
super(Trainer, self).__init__() | |||||
self.use_cuda = config['use_cuda'] | |||||
self.np = NP(self.opt) | |||||
self._lambda = config['lambda'] | |||||
self.optimizer = torch.optim.Adam(self.np.parameters(), lr=config['lr']) | |||||
# our kl divergence | |||||
def kl_div(self, mu_target, logsigma_target, mu_context, logsigma_context): | |||||
target_sigma = torch.exp(logsigma_target) | |||||
context_sigma = torch.exp(logsigma_context) | |||||
kl_div = (logsigma_context - logsigma_target) - 0.5 + (((target_sigma ** 2) + (mu_target - mu_context) ** 2) / 2 * context_sigma ** 2) | |||||
#kl_div = (t.exp(posterior_var) + (posterior_mu-prior_mu) ** 2) / t.exp(prior_var) - 1. + (prior_var - posterior_var) | |||||
#kl_div = 0.5 * kl_div.sum() | |||||
kl_div = kl_div.sum() | |||||
return kl_div | |||||
# new kl divergence -- kl(st|sc) | |||||
def new_kl_div(self, prior_mu, prior_var, posterior_mu, posterior_var): | |||||
kl_div = (torch.exp(posterior_var) + (posterior_mu-prior_mu) ** 2) / torch.exp(prior_var) - 1. + (prior_var - posterior_var) | |||||
kl_div = 0.5 * kl_div.sum() | |||||
return kl_div | |||||
def loss(self, p_y_pred, y_target, mu_target, sigma_target, mu_context, sigma_context): | |||||
#print('p_y_pred size is ', p_y_pred.size()) | |||||
regression_loss = F.mse_loss(p_y_pred, y_target.view(-1, 1)) | |||||
#print('regession loss size is ', regression_loss.size()) | |||||
# kl divergence between target and context | |||||
#print('regession_loss is ', regression_loss.item()) | |||||
kl = self.new_kl_div(mu_context, sigma_context, mu_target, sigma_target) | |||||
#print('KL_loss is ', kl.item()) | |||||
return regression_loss+kl | |||||
def context_target_split(self, support_set_x, support_set_y, query_set_x, query_set_y): | |||||
total_x = torch.cat((support_set_x, query_set_x), 0) | |||||
total_y = torch.cat((support_set_y, query_set_y), 0) | |||||
total_size = total_x.size(0) | |||||
context_min = self.opt['context_min'] | |||||
context_max = self.opt['context_max'] | |||||
extra_tar_min = self.opt['target_extra_min'] | |||||
#here we simply use the total_size as the maximum of target size. | |||||
num_context = randint(context_min, context_max) | |||||
num_target = randint(extra_tar_min, total_size - num_context) | |||||
sampled = np.random.choice(total_size, num_context+num_target, replace=False) | |||||
x_context = total_x[sampled[:num_context], :] | |||||
y_context = total_y[sampled[:num_context]] | |||||
x_target = total_x[sampled, :] | |||||
y_target = total_y[sampled] | |||||
return x_context, y_context, x_target, y_target | |||||
def new_context_target_split(self, support_set_x, support_set_y, query_set_x, query_set_y): | |||||
total_x = torch.cat((support_set_x, query_set_x), 0) | |||||
total_y = torch.cat((support_set_y, query_set_y), 0) | |||||
total_size = total_x.size(0) | |||||
context_min = self.opt['context_min'] | |||||
# change for Movie lens | |||||
context_min = min(context_min, total_size - 1) | |||||
num_context = np.random.randint(context_min, total_size) | |||||
num_target = np.random.randint(0, total_size - num_context) | |||||
sampled = np.random.choice(total_size, num_context+num_target, replace=False) | |||||
x_context = total_x[sampled[:num_context], :] | |||||
y_context = total_y[sampled[:num_context]] | |||||
x_target = total_x[sampled, :] | |||||
y_target = total_y[sampled] | |||||
return x_context, y_context, x_target, y_target | |||||
def global_update(self, support_set_xs, support_set_ys, query_set_xs, query_set_ys): | |||||
batch_sz = len(support_set_xs) | |||||
losses = [] | |||||
C_distribs = [] | |||||
if self.use_cuda: | |||||
for i in range(batch_sz): | |||||
support_set_xs[i] = support_set_xs[i].cuda() | |||||
support_set_ys[i] = support_set_ys[i].cuda() | |||||
query_set_xs[i] = query_set_xs[i].cuda() | |||||
query_set_ys[i] = query_set_ys[i].cuda() | |||||
for i in range(batch_sz): | |||||
x_context, y_context, x_target, y_target = self.new_context_target_split(support_set_xs[i], support_set_ys[i], | |||||
query_set_xs[i], query_set_ys[i]) | |||||
# print("inja1: x_context_size:",x_context.size()) | |||||
p_y_pred, mu_target, sigma_target, mu_context, sigma_context, C_distribution = self.np(x_context, y_context, x_target, | |||||
y_target) | |||||
C_distribs.append(C_distribution) | |||||
loss = self.loss(p_y_pred, y_target, mu_target, sigma_target, mu_context, sigma_context) | |||||
#print('Each task has loss: ', loss) | |||||
losses.append(loss) | |||||
# calculate target distribution for clustering in batch manner. | |||||
# batchsize * k | |||||
C_distribs = torch.stack(C_distribs) | |||||
# batchsize * k | |||||
C_distribs_sq = torch.pow(C_distribs, 2) | |||||
# 1*k | |||||
C_distribs_sum = torch.sum(C_distribs, dim=0, keepdim=True) | |||||
# batchsize * k | |||||
temp = C_distribs_sq / C_distribs_sum | |||||
# batchsize * 1 | |||||
temp_sum = torch.sum(temp, dim=1, keepdim=True) | |||||
target_distribs = temp / temp_sum | |||||
# calculate the kl loss | |||||
clustering_loss = self._lambda * F.kl_div(C_distribs.log(), target_distribs, reduction='batchmean') | |||||
#print('The clustering loss is %.6f' % (clustering_loss.item())) | |||||
np_losses_mean = torch.stack(losses).mean(0) | |||||
total_loss = np_losses_mean + clustering_loss | |||||
self.optimizer.zero_grad() | |||||
total_loss.backward() | |||||
self.optimizer.step() | |||||
return total_loss.item(), C_distribs.cpu().detach().numpy() | |||||
def query_rec(self, support_set_xs, support_set_ys, query_set_xs, query_set_ys): | |||||
batch_sz = 1 | |||||
# used for calculating the rmse. | |||||
losses_q = [] | |||||
if self.use_cuda: | |||||
for i in range(batch_sz): | |||||
support_set_xs[i] = support_set_xs[i].cuda() | |||||
support_set_ys[i] = support_set_ys[i].cuda() | |||||
query_set_xs[i] = query_set_xs[i].cuda() | |||||
query_set_ys[i] = query_set_ys[i].cuda() | |||||
for i in range(batch_sz): | |||||
#query_set_y_pred = self.forward(support_set_xs[i], support_set_ys[i], query_set_xs[i], num_local_update) | |||||
query_set_y_pred = self.np(support_set_xs[i], support_set_ys[i], query_set_xs[i], query_set_ys[i]) | |||||
# obtain the mean of gaussian distribution | |||||
#(interation_size, y_dim) | |||||
#query_set_y_pred = query_set_y_pred.loc.detach() | |||||
#print('test_y_pred size is ', query_set_y_pred.size()) | |||||
loss_q = F.mse_loss(query_set_y_pred, query_set_ys[i].view(-1, 1)) | |||||
losses_q.append(loss_q) | |||||
losses_q = torch.stack(losses_q).mean(0) | |||||
output_list, recommendation_list = query_set_y_pred.view(-1).sort(descending=True) | |||||
return losses_q.item(), recommendation_list | |||||
import os | |||||
import torch | |||||
import pickle | |||||
import random | |||||
from eval import testing | |||||
def training(trainer, opt, train_dataset, test_dataset, batch_size, num_epoch, model_save=True, model_filename=None, logger=None): | |||||
training_set_size = len(train_dataset) | |||||
for epoch in range(num_epoch): | |||||
random.shuffle(train_dataset) | |||||
num_batch = int(training_set_size / batch_size) | |||||
a, b, c, d = zip(*train_dataset) | |||||
trainer.train() | |||||
all_C_distribs = [] | |||||
for i in range(num_batch): | |||||
try: | |||||
supp_xs = list(a[batch_size*i:batch_size*(i+1)]) | |||||
supp_ys = list(b[batch_size*i:batch_size*(i+1)]) | |||||
query_xs = list(c[batch_size*i:batch_size*(i+1)]) | |||||
query_ys = list(d[batch_size*i:batch_size*(i+1)]) | |||||
except IndexError: | |||||
continue | |||||
train_loss, batch_C_distribs = trainer.global_update(supp_xs, supp_ys, query_xs, query_ys) | |||||
all_C_distribs.append(batch_C_distribs) | |||||
P5, NDCG5, MAP5, P7, NDCG7, MAP7, P10, NDCG10, MAP10 = testing(trainer, opt, test_dataset) | |||||
logger.log( | |||||
"{}\t{:.6f}\t TOP-5 {:.4f}\t{:.4f}\t{:.4f}\t TOP-7: {:.4f}\t{:.4f}\t{:.4f}" | |||||
"\t TOP-10: {:.4f}\t{:.4f}\t{:.4f}". | |||||
format(epoch, train_loss, P5, NDCG5, MAP5, P7, NDCG7, MAP7, P10, NDCG10, MAP10)) | |||||
if epoch == (num_epoch-1): | |||||
with open('output_att', 'wb') as fp: | |||||
pickle.dump(all_C_distribs, fp) | |||||
if model_save: | |||||
torch.save(trainer.state_dict(), model_filename) |
import torch | |||||
import torch.nn as nn | |||||
import torch.nn.init as init | |||||
import torch.nn.functional as F | |||||
class Item(torch.nn.Module): | |||||
def __init__(self, config): | |||||
super(Item, self).__init__() | |||||
self.feature_dim = config['if_dim'] | |||||
self.first_embedding_dim = config['first_embedding_dim'] | |||||
self.second_embedding_dim = config['second_embedding_dim'] | |||||
self.first_embedding_layer = torch.nn.Linear( | |||||
in_features=self.feature_dim, | |||||
out_features=self.first_embedding_dim, | |||||
bias=True | |||||
) | |||||
self.second_embedding_layer = torch.nn.Linear( | |||||
in_features=self.first_embedding_dim, | |||||
out_features=self.second_embedding_dim, | |||||
bias=True | |||||
) | |||||
def forward(self, x, vars=None): | |||||
first_hidden = self.first_embedding_layer(x) | |||||
first_hidden = F.relu(first_hidden) | |||||
sec_hidden = self.second_embedding_layer(first_hidden) | |||||
return F.relu(sec_hidden) | |||||
class Movie_item(torch.nn.Module): | |||||
def __init__(self, config): | |||||
super(Movie_item, self).__init__() | |||||
self.num_rate = config['num_rate'] | |||||
self.num_genre = config['num_genre'] | |||||
self.num_director = config['num_director'] | |||||
self.num_actor = config['num_actor'] | |||||
self.embedding_dim = config['embedding_dim'] | |||||
# change for Movie | |||||
self.feature_dim = 4 * self.embedding_dim | |||||
self.embedding_rate = torch.nn.Embedding( | |||||
num_embeddings=self.num_rate, | |||||
embedding_dim=self.embedding_dim | |||||
) | |||||
self.embedding_genre = torch.nn.Linear( | |||||
in_features=self.num_genre, | |||||
out_features=self.embedding_dim, | |||||
bias=False | |||||
) | |||||
self.embedding_director = torch.nn.Linear( | |||||
in_features=self.num_director, | |||||
out_features=self.embedding_dim, | |||||
bias=False | |||||
) | |||||
self.embedding_actor = torch.nn.Linear( | |||||
in_features=self.num_actor, | |||||
out_features=self.embedding_dim, | |||||
bias=False | |||||
) | |||||
def forward(self, rate_idx, genre_idx, director_idx, actors_idx, vars=None): | |||||
rate_emb = self.embedding_rate(rate_idx) | |||||
genre_emb = self.embedding_genre(genre_idx.float()) / torch.sum(genre_idx.float(), 1).view(-1, 1) | |||||
director_emb = self.embedding_director(director_idx.float()) / torch.sum(director_idx.float(), 1).view(-1, 1) | |||||
actors_emb = self.embedding_actor(actors_idx.float()) / torch.sum(actors_idx.float(), 1).view(-1, 1) | |||||
return torch.cat((rate_emb, genre_emb, director_emb, actors_emb), 1) | |||||
class User(torch.nn.Module): | |||||
def __init__(self, config): | |||||
super(User, self).__init__() | |||||
self.feature_dim = config['uf_dim'] | |||||
self.first_embedding_dim = config['first_embedding_dim'] | |||||
self.second_embedding_dim = config['second_embedding_dim'] | |||||
self.first_embedding_layer = torch.nn.Linear( | |||||
in_features=self.feature_dim, | |||||
out_features=self.first_embedding_dim, | |||||
bias=True | |||||
) | |||||
self.second_embedding_layer = torch.nn.Linear( | |||||
in_features=self.first_embedding_dim, | |||||
out_features=self.second_embedding_dim, | |||||
bias=True | |||||
) | |||||
def forward(self, x, vars=None): | |||||
first_hidden = self.first_embedding_layer(x) | |||||
first_hidden = F.relu(first_hidden) | |||||
sec_hidden = self.second_embedding_layer(first_hidden) | |||||
return F.relu(sec_hidden) | |||||
class Movie_user(torch.nn.Module): | |||||
def __init__(self, config): | |||||
super(Movie_user, self).__init__() | |||||
self.num_gender = config['num_gender'] | |||||
self.num_age = config['num_age'] | |||||
self.num_occupation = config['num_occupation'] | |||||
self.num_zipcode = config['num_zipcode'] | |||||
self.embedding_dim = config['embedding_dim'] | |||||
self.embedding_gender = torch.nn.Embedding( | |||||
num_embeddings=self.num_gender, | |||||
embedding_dim=self.embedding_dim | |||||
) | |||||
self.embedding_age = torch.nn.Embedding( | |||||
num_embeddings=self.num_age, | |||||
embedding_dim=self.embedding_dim | |||||
) | |||||
self.embedding_occupation = torch.nn.Embedding( | |||||
num_embeddings=self.num_occupation, | |||||
embedding_dim=self.embedding_dim | |||||
) | |||||
self.embedding_area = torch.nn.Embedding( | |||||
num_embeddings=self.num_zipcode, | |||||
embedding_dim=self.embedding_dim | |||||
) | |||||
def forward(self, gender_idx, age_idx, occupation_idx, area_idx): | |||||
gender_emb = self.embedding_gender(gender_idx) | |||||
age_emb = self.embedding_age(age_idx) | |||||
occupation_emb = self.embedding_occupation(occupation_idx) | |||||
area_emb = self.embedding_area(area_idx) | |||||
return torch.cat((gender_emb, age_emb, occupation_emb, area_emb), 1) | |||||
class Encoder(nn.Module): | |||||
#Maps an (x_i, y_i) pair to a representation r_i. | |||||
# Add the dropout into encoder ---03.31 | |||||
def __init__(self, x_dim, y_dim, h1_dim, h2_dim, z1_dim, dropout_rate): | |||||
super(Encoder, self).__init__() | |||||
self.x_dim = x_dim | |||||
self.y_dim = y_dim | |||||
self.h1_dim = h1_dim | |||||
self.h2_dim = h2_dim | |||||
self.z1_dim = z1_dim | |||||
self.dropout_rate = dropout_rate | |||||
layers = [nn.Linear(self.x_dim + self.y_dim, self.h1_dim), | |||||
torch.nn.Dropout(self.dropout_rate), | |||||
nn.ReLU(inplace=True), | |||||
nn.Linear(self.h1_dim, self.h2_dim), | |||||
torch.nn.Dropout(self.dropout_rate), | |||||
nn.ReLU(inplace=True), | |||||
nn.Linear(self.h2_dim, self.z1_dim)] | |||||
self.input_to_hidden = nn.Sequential(*layers) | |||||
def forward(self, x, y): | |||||
y = y.view(-1, 1) | |||||
input_pairs = torch.cat((x, y), dim=1) | |||||
return self.input_to_hidden(input_pairs) | |||||
class MuSigmaEncoder(nn.Module): | |||||
def __init__(self, z1_dim, z2_dim, z_dim): | |||||
super(MuSigmaEncoder, self).__init__() | |||||
self.z1_dim = z1_dim | |||||
self.z2_dim = z2_dim | |||||
self.z_dim = z_dim | |||||
self.z_to_hidden = nn.Linear(self.z1_dim, self.z2_dim) | |||||
self.hidden_to_mu = nn.Linear(self.z2_dim, z_dim) | |||||
self.hidden_to_logsigma = nn.Linear(self.z2_dim, z_dim) | |||||
def forward(self, z_input): | |||||
hidden = torch.relu(self.z_to_hidden(z_input)) | |||||
mu = self.hidden_to_mu(hidden) | |||||
log_sigma = self.hidden_to_logsigma(hidden) | |||||
std = torch.exp(0.5 * log_sigma) | |||||
eps = torch.randn_like(std) | |||||
z = eps.mul(std).add_(mu) | |||||
return mu, log_sigma, z | |||||
class TaskEncoder(nn.Module): | |||||
def __init__(self, x_dim, y_dim, h1_dim, h2_dim, final_dim, dropout_rate): | |||||
super(TaskEncoder, self).__init__() | |||||
self.x_dim = x_dim | |||||
self.y_dim = y_dim | |||||
self.h1_dim = h1_dim | |||||
self.h2_dim = h2_dim | |||||
self.final_dim = final_dim | |||||
self.dropout_rate = dropout_rate | |||||
layers = [nn.Linear(self.x_dim + self.y_dim, self.h1_dim), | |||||
torch.nn.Dropout(self.dropout_rate), | |||||
nn.ReLU(inplace=True), | |||||
nn.Linear(self.h1_dim, self.h2_dim), | |||||
torch.nn.Dropout(self.dropout_rate), | |||||
nn.ReLU(inplace=True), | |||||
nn.Linear(self.h2_dim, self.final_dim)] | |||||
self.input_to_hidden = nn.Sequential(*layers) | |||||
def forward(self, x, y): | |||||
y = y.view(-1, 1) | |||||
input_pairs = torch.cat((x, y), dim=1) | |||||
return self.input_to_hidden(input_pairs) | |||||
class MemoryUnit(nn.Module): | |||||
# clusters_k is k keys | |||||
def __init__(self, clusters_k, emb_size, temperature): | |||||
super(MemoryUnit, self).__init__() | |||||
self.clusters_k = clusters_k | |||||
self.embed_size = emb_size | |||||
self.temperature = temperature | |||||
self.array = nn.Parameter(init.xavier_uniform_(torch.FloatTensor(self.clusters_k, self.embed_size))) | |||||
def forward(self, task_embed): | |||||
res = torch.norm(task_embed-self.array, p=2, dim=1, keepdim=True) | |||||
res = torch.pow((res / self.temperature) + 1, (self.temperature + 1) / -2) | |||||
# 1*k | |||||
C = torch.transpose(res / res.sum(), 0, 1) | |||||
# 1*k, k*d, 1*d | |||||
value = torch.mm(C, self.array) | |||||
# simple add operation | |||||
new_task_embed = value + task_embed | |||||
# calculate target distribution | |||||
return C, new_task_embed | |||||
class Decoder(nn.Module): | |||||
""" | |||||
Maps target input x_target and z, r to predictions y_target. | |||||
""" | |||||
def __init__(self, x_dim, z_dim, task_dim, h1_dim, h2_dim, h3_dim, y_dim, dropout_rate): | |||||
super(Decoder, self).__init__() | |||||
self.x_dim = x_dim | |||||
self.z_dim = z_dim | |||||
self.task_dim = task_dim | |||||
self.h1_dim = h1_dim | |||||
self.h2_dim = h2_dim | |||||
self.h3_dim = h3_dim | |||||
self.y_dim = y_dim | |||||
self.dropout_rate = dropout_rate | |||||
self.dropout = nn.Dropout(self.dropout_rate) | |||||
self.hidden_layer_1 = nn.Linear(self.x_dim + self.z_dim, self.h1_dim) | |||||
self.hidden_layer_2 = nn.Linear(self.h1_dim, self.h2_dim) | |||||
self.hidden_layer_3 = nn.Linear(self.h2_dim, self.h3_dim) | |||||
self.film_layer_1_beta = nn.Linear(self.task_dim, self.h1_dim, bias=False) | |||||
self.film_layer_1_gamma = nn.Linear(self.task_dim, self.h1_dim, bias=False) | |||||
self.film_layer_2_beta = nn.Linear(self.task_dim, self.h2_dim, bias=False) | |||||
self.film_layer_2_gamma = nn.Linear(self.task_dim, self.h2_dim, bias=False) | |||||
self.film_layer_3_beta = nn.Linear(self.task_dim, self.h3_dim, bias=False) | |||||
self.film_layer_3_gamma = nn.Linear(self.task_dim, self.h3_dim, bias=False) | |||||
self.final_projection = nn.Linear(self.h3_dim, self.y_dim) | |||||
def forward(self, x, z, task): | |||||
interaction_size, _ = x.size() | |||||
z = z.unsqueeze(0).repeat(interaction_size, 1) | |||||
# Input is concatenation of z with every row of x | |||||
inputs = torch.cat((x, z), dim=1) | |||||
hidden_1 = self.hidden_layer_1(inputs) | |||||
beta_1 = torch.tanh(self.film_layer_1_beta(task)) | |||||
gamma_1 = torch.tanh(self.film_layer_1_gamma(task)) | |||||
hidden_1 = torch.mul(hidden_1, gamma_1) + beta_1 | |||||
hidden_1 = self.dropout(hidden_1) | |||||
hidden_2 = F.relu(hidden_1) | |||||
hidden_2 = self.hidden_layer_2(hidden_2) | |||||
beta_2 = torch.tanh(self.film_layer_2_beta(task)) | |||||
gamma_2 = torch.tanh(self.film_layer_2_gamma(task)) | |||||
hidden_2 = torch.mul(hidden_2, gamma_2) + beta_2 | |||||
hidden_2 = self.dropout(hidden_2) | |||||
hidden_3 = F.relu(hidden_2) | |||||
hidden_3 = self.hidden_layer_3(hidden_3) | |||||
beta_3 = torch.tanh(self.film_layer_3_beta(task)) | |||||
gamma_3 = torch.tanh(self.film_layer_3_gamma(task)) | |||||
hidden_final = torch.mul(hidden_3, gamma_3) + beta_3 | |||||
hidden_final = self.dropout(hidden_final) | |||||
hidden_final = F.relu(hidden_final) | |||||
y_pred = self.final_projection(hidden_final) | |||||
return y_pred | |||||
class Gating_Decoder(nn.Module): | |||||
def __init__(self, x_dim, z_dim, task_dim, h1_dim, h2_dim, h3_dim, y_dim, dropout_rate): | |||||
super(Gating_Decoder, self).__init__() | |||||
self.x_dim = x_dim | |||||
self.z_dim = z_dim | |||||
self.task_dim = task_dim | |||||
self.h1_dim = h1_dim | |||||
self.h2_dim = h2_dim | |||||
self.h3_dim = h3_dim | |||||
self.y_dim = y_dim | |||||
self.dropout_rate = dropout_rate | |||||
self.dropout = nn.Dropout(self.dropout_rate) | |||||
self.hidden_layer_1 = nn.Linear(self.x_dim + self.z_dim, self.h1_dim) | |||||
self.hidden_layer_2 = nn.Linear(self.h1_dim, self.h2_dim) | |||||
self.hidden_layer_3 = nn.Linear(self.h2_dim, self.h3_dim) | |||||
self.film_layer_1_beta = nn.Linear(self.task_dim, self.h1_dim, bias=False) | |||||
self.film_layer_1_gamma = nn.Linear(self.task_dim, self.h1_dim, bias=False) | |||||
self.film_layer_1_eta = nn.Linear(self.task_dim, self.h1_dim, bias=False) | |||||
self.film_layer_1_delta = nn.Linear(self.task_dim, self.h1_dim, bias=False) | |||||
self.film_layer_2_beta = nn.Linear(self.task_dim, self.h2_dim, bias=False) | |||||
self.film_layer_2_gamma = nn.Linear(self.task_dim, self.h2_dim, bias=False) | |||||
self.film_layer_2_eta = nn.Linear(self.task_dim, self.h2_dim, bias=False) | |||||
self.film_layer_2_delta = nn.Linear(self.task_dim, self.h2_dim, bias=False) | |||||
self.film_layer_3_beta = nn.Linear(self.task_dim, self.h3_dim, bias=False) | |||||
self.film_layer_3_gamma = nn.Linear(self.task_dim, self.h3_dim, bias=False) | |||||
self.film_layer_3_eta = nn.Linear(self.task_dim, self.h3_dim, bias=False) | |||||
self.film_layer_3_delta = nn.Linear(self.task_dim, self.h3_dim, bias=False) | |||||
self.final_projection = nn.Linear(self.h3_dim, self.y_dim) | |||||
def forward(self, x, z, task): | |||||
interaction_size, _ = x.size() | |||||
z = z.unsqueeze(0).repeat(interaction_size, 1) | |||||
# Input is concatenation of z with every row of x | |||||
inputs = torch.cat((x, z), dim=1) | |||||
hidden_1 = self.hidden_layer_1(inputs) | |||||
beta_1 = torch.tanh(self.film_layer_1_beta(task)) | |||||
gamma_1 = torch.tanh(self.film_layer_1_gamma(task)) | |||||
eta_1 = torch.tanh(self.film_layer_1_eta(task)) | |||||
delta_1 = torch.sigmoid(self.film_layer_1_delta(task)) | |||||
gamma_1 = gamma_1 * delta_1 + eta_1 * (1-delta_1) | |||||
beta_1 = beta_1 * delta_1 + eta_1 * (1-delta_1) | |||||
hidden_1 = torch.mul(hidden_1, gamma_1) + beta_1 | |||||
hidden_1 = self.dropout(hidden_1) | |||||
hidden_2 = F.relu(hidden_1) | |||||
hidden_2 = self.hidden_layer_2(hidden_2) | |||||
beta_2 = torch.tanh(self.film_layer_2_beta(task)) | |||||
gamma_2 = torch.tanh(self.film_layer_2_gamma(task)) | |||||
eta_2 = torch.tanh(self.film_layer_2_eta(task)) | |||||
delta_2 = torch.sigmoid(self.film_layer_2_delta(task)) | |||||
gamma_2 = gamma_2 * delta_2 + eta_2 * (1 - delta_2) | |||||
beta_2 = beta_2 * delta_2 + eta_2 * (1 - delta_2) | |||||
hidden_2 = torch.mul(hidden_2, gamma_2) + beta_2 | |||||
hidden_2 = self.dropout(hidden_2) | |||||
hidden_3 = F.relu(hidden_2) | |||||
hidden_3 = self.hidden_layer_3(hidden_3) | |||||
beta_3 = torch.tanh(self.film_layer_3_beta(task)) | |||||
gamma_3 = torch.tanh(self.film_layer_3_gamma(task)) | |||||
eta_3 = torch.tanh(self.film_layer_3_eta(task)) | |||||
delta_3 = torch.sigmoid(self.film_layer_3_delta(task)) | |||||
gamma_3 = gamma_3 * delta_3 + eta_3 * (1 - delta_3) | |||||
beta_3 = beta_3 * delta_3 + eta_3 * (1 - delta_3) | |||||
hidden_final = torch.mul(hidden_3, gamma_3) + beta_3 | |||||
hidden_final = self.dropout(hidden_final) | |||||
hidden_final = F.relu(hidden_final) | |||||
y_pred = self.final_projection(hidden_final) | |||||
return y_pred |
""" | |||||
Run evaluation with saved models. | |||||
""" | |||||
import random | |||||
import argparse | |||||
from tqdm import tqdm | |||||
import torch | |||||
from utils.scorer import * | |||||
def testing(trainer, opt, test_dataset): | |||||
test_dataset_len = len(test_dataset) | |||||
#batch_size = opt["batch_size"] | |||||
minibatch_size = 1 | |||||
a, b, c, d = zip(*test_dataset) | |||||
trainer.eval() | |||||
all_loss = 0 | |||||
pre5 = [] | |||||
ap5 = [] | |||||
ndcg5 = [] | |||||
pre7 = [] | |||||
ap7 = [] | |||||
ndcg7 = [] | |||||
pre10 = [] | |||||
ap10 = [] | |||||
ndcg10 = [] | |||||
for i in range(test_dataset_len): | |||||
try: | |||||
supp_xs = list(a[minibatch_size * i:minibatch_size * (i + 1)]) | |||||
supp_ys = list(b[minibatch_size * i:minibatch_size * (i + 1)]) | |||||
query_xs = list(c[minibatch_size * i:minibatch_size * (i + 1)]) | |||||
query_ys = list(d[minibatch_size * i:minibatch_size * (i + 1)]) | |||||
except IndexError: | |||||
continue | |||||
test_loss, recommendation_list = trainer.query_rec(supp_xs, supp_ys, query_xs, query_ys) | |||||
all_loss += test_loss | |||||
add_metric(recommendation_list, query_ys[0].cpu().detach().numpy(), pre5, ap5, ndcg5, 5) | |||||
add_metric(recommendation_list, query_ys[0].cpu().detach().numpy(), pre7, ap7, ndcg7, 7) | |||||
add_metric(recommendation_list, query_ys[0].cpu().detach().numpy(), pre10, ap10, ndcg10, 10) | |||||
mpre5, mndcg5, map5 = cal_metric(pre5, ap5, ndcg5) | |||||
mpre7, mndcg7, map7 = cal_metric(pre7, ap7, ndcg7) | |||||
mpre10, mndcg10, map10 = cal_metric(pre10, ap10, ndcg10) | |||||
return mpre5, mndcg5, map5, mpre7, mndcg7, map7, mpre10, mndcg10, map10 |
first_embedding_dim=32 | |||||
second_embedding_dim=16 | |||||
z1_dim=32 | |||||
z2_dim=32 | |||||
z_dim=32 | |||||
enc_h1_dim=32 | |||||
enc_h2_dim=16 | |||||
taskenc_h1_dim=32 | |||||
taskenc_h2_dim=32 | |||||
taskenc_final_dim=16 | |||||
clusters_k=10 | |||||
temperature=1.0 | |||||
lambda=1.0 | |||||
dec_h1_dim=32 | |||||
dec_h2_dim=32 | |||||
dec_h3_dim=16 | |||||
dropout_rate=0 | |||||
lr=0.0001 | |||||
optim='adam' | |||||
num_epoch=100 | |||||
batch_size=32 | |||||
train_ratio=0.7 | |||||
valid_ratio=0.1 | |||||
support_size=20 | |||||
query_size=10 | |||||
max_len=200 | |||||
context_min=20 | |||||
CUDA_VISIBLE_DEVICES=0 python train_TaNP.py \ | |||||
--first_embedding_dim $first_embedding_dim \ | |||||
--second_embedding_dim $second_embedding_dim \ | |||||
--z1_dim $z1_dim \ | |||||
--z2_dim $z2_dim \ | |||||
--z_dim $z_dim \ | |||||
--enc_h1_dim $enc_h1_dim \ | |||||
--enc_h2_dim $enc_h2_dim \ | |||||
--taskenc_h1_dim $taskenc_h1_dim \ | |||||
--taskenc_h2_dim $taskenc_h2_dim \ | |||||
--taskenc_final_dim $taskenc_final_dim \ | |||||
--clusters_k $clusters_k \ | |||||
--lambda $lambda \ | |||||
--temperature $temperature \ | |||||
--dec_h1_dim $dec_h1_dim \ | |||||
--dec_h2_dim $dec_h2_dim \ | |||||
--dec_h3_dim $dec_h3_dim \ | |||||
--lr $lr \ | |||||
--dropout_rate $dropout_rate \ | |||||
--optim $optim \ | |||||
--num_epoch $num_epoch \ | |||||
--batch_size $batch_size \ | |||||
--train_ratio $train_ratio \ | |||||
--valid_ratio $valid_ratio \ | |||||
--support_size $support_size \ | |||||
--query_size $query_size \ | |||||
--max_len $max_len \ | |||||
--context_min $context_min |
import os | |||||
from datetime import datetime | |||||
import time | |||||
import numpy as np | |||||
import random | |||||
import argparse | |||||
import pickle | |||||
import torch | |||||
import torch.nn as nn | |||||
import torch.optim as optim | |||||
from torch.autograd import Variable | |||||
import json | |||||
from utils.loader import Preprocess | |||||
from TaNP import Trainer | |||||
from TaNP_training import training | |||||
from utils import helper | |||||
from eval import testing | |||||
parser = argparse.ArgumentParser() | |||||
# parser.add_argument('--data_dir', type=str, default='data/lastfm_20')#1 | |||||
# parser.add_argument('--model_save_dir', type=str, default='save_model_dir')#1 | |||||
parser.add_argument('--data_dir', type=str, default='/media/external_10TB/10TB/maheri/melu_data')#1 | |||||
parser.add_argument('--model_save_dir', type=str, default='/media/external_10TB/10TB/maheri/tanp_data/tanp_models')#1 | |||||
parser.add_argument('--id', type=str, default='1', help='used for save hyper-parameters.')#1 | |||||
parser.add_argument('--first_embedding_dim', type=int, default=32, help='Embedding dimension for item and user.')#1 | |||||
parser.add_argument('--second_embedding_dim', type=int, default=16, help='Embedding dimension for item and user.')#1 | |||||
parser.add_argument('--z1_dim', type=int, default=32, help='The dimension of z1 in latent path.') | |||||
parser.add_argument('--z2_dim', type=int, default=32, help='The dimension of z2 in latent path.') | |||||
parser.add_argument('--z_dim', type=int, default=32, help='The dimension of z in latent path.') | |||||
parser.add_argument('--enc_h1_dim', type=int, default=64, help='The hidden first dimension of encoder.') | |||||
parser.add_argument('--enc_h2_dim', type=int, default=64, help='The hidden second dimension of encoder.') | |||||
parser.add_argument('--taskenc_h1_dim', type=int, default=128, help='The hidden first dimension of task encoder.') | |||||
parser.add_argument('--taskenc_h2_dim', type=int, default=64, help='The hidden second dimension of task encoder.') | |||||
parser.add_argument('--taskenc_final_dim', type=int, default=64, help='The hidden second dimension of task encoder.') | |||||
parser.add_argument('--clusters_k', type=int, default=7, help='Cluster numbers of tasks.') | |||||
parser.add_argument('--temperature', type=float, default=1.0, help='used for student-t distribution.') | |||||
parser.add_argument('--lambda', type=float, default=0.1, help='used to balance the clustering loss and NP loss.') | |||||
parser.add_argument('--dec_h1_dim', type=int, default=128, help='The hidden first dimension of encoder.') | |||||
parser.add_argument('--dec_h2_dim', type=int, default=128, help='The hidden second dimension of encoder.') | |||||
parser.add_argument('--dec_h3_dim', type=int, default=128, help='The hidden third dimension of encoder.') | |||||
# used for movie datasets | |||||
parser.add_argument('--num_gender', type=int, default=2, help='User information.')#1 | |||||
parser.add_argument('--num_age', type=int, default=7, help='User information.')#1 | |||||
parser.add_argument('--num_occupation', type=int, default=21, help='User information.')#1 | |||||
parser.add_argument('--num_zipcode', type=int, default=3402, help='User information.')#1 | |||||
parser.add_argument('--num_rate', type=int, default=6, help='Item information.')#1 | |||||
parser.add_argument('--num_genre', type=int, default=25, help='Item information.')#1 | |||||
parser.add_argument('--num_director', type=int, default=2186, help='Item information.')#1 | |||||
parser.add_argument('--num_actor', type=int, default=8030, help='Item information.')#1 | |||||
parser.add_argument('--dropout_rate', type=float, default=0, help='used in encoder and decoder.') | |||||
parser.add_argument('--lr', type=float, default=1e-4, help='Applies to SGD and Adagrad.')#1 | |||||
parser.add_argument('--optim', type=str, default='adam', help='sgd, adagrad, adam or adamax.') | |||||
parser.add_argument('--num_epoch', type=int, default=150)#1 | |||||
parser.add_argument('--batch_size', type=int, default=32)#1 | |||||
parser.add_argument('--train_ratio', type=float, default=0.7, help='Warm user ratio for training.')#1 | |||||
parser.add_argument('--valid_ratio', type=float, default=0.1, help='Cold user ratio for validation.')#1 | |||||
parser.add_argument('--seed', type=int, default=2020)#1 | |||||
parser.add_argument('--save', type=int, default=0)#1 | |||||
parser.add_argument('--use_cuda', type=bool, default=torch.cuda.is_available())#1 | |||||
parser.add_argument('--cpu', action='store_true', help='Ignore CUDA.')#1 | |||||
parser.add_argument('--support_size', type=int, default=20)#1 | |||||
parser.add_argument('--query_size', type=int, default=10)#1 | |||||
parser.add_argument('--max_len', type=int, default=200, help='The max length of interactions for each user.') | |||||
parser.add_argument('--context_min', type=int, default=20, help='Minimum size of context range.') | |||||
# change for Movie lens | |||||
parser.add_argument('--embedding_dim', type=int, default=32, help='embedding dimension for each item/user feature of Movie lens') | |||||
parser.add_argument('--first_fc_hidden_dim', type=int, default=64, help='embedding dimension for each item/user feature of Movie lens') | |||||
parser.add_argument('--second_fc_hidden_dim', type=int, default=64, help='embedding dimension for each item/user feature of Movie lens') | |||||
args = parser.parse_args() | |||||
def seed_everything(seed=1023): | |||||
random.seed(seed) | |||||
torch.manual_seed(seed) | |||||
torch.cuda.manual_seed_all(seed) | |||||
np.random.seed(seed) | |||||
os.environ['PYTHONHASHSEED'] = str(seed) | |||||
torch.backends.cudnn.deterministic = True | |||||
torch.backends.cudnn.benchmark = False | |||||
seed = args.seed | |||||
seed_everything(seed) | |||||
if args.cpu: | |||||
args.use_cuda = False | |||||
elif args.use_cuda: | |||||
torch.cuda.manual_seed(args.seed) | |||||
opt = vars(args) | |||||
# print model info | |||||
helper.print_config(opt) | |||||
helper.ensure_dir(opt["model_save_dir"], verbose=True) | |||||
# save model config | |||||
helper.save_config(opt, opt["model_save_dir"] + "/" +opt["id"] + '.config', verbose=True) | |||||
# record training log | |||||
file_logger = helper.FileLogger(opt["model_save_dir"] + '/' + opt['id'] + ".log", | |||||
header="# epoch\ttrain_loss\tprecision5\tNDCG5\tMAP5\tprecision7" | |||||
"\tNDCG7\tMAP7\tprecision10\tNDCG10\tMAP10") | |||||
# change for Movie Lens | |||||
# preprocess = Preprocess(opt) | |||||
print("Preprocess is done.") | |||||
print("Create model TaNP...") | |||||
# opt['uf_dim'] = preprocess.uf_dim | |||||
# opt['if_dim'] = preprocess.if_dim | |||||
trainer = Trainer(opt) | |||||
if opt['use_cuda']: | |||||
trainer.cuda() | |||||
model_filename = "{}/{}.pt".format(opt['model_save_dir'], opt["id"]) | |||||
# /4 since sup_x, sup_y, query_x, query_y | |||||
# change for Movie lens | |||||
# training_set_size = int(len(os.listdir("{}/{}/{}".format(opt["data_dir"], "training", "log"))) / 4) | |||||
training_set_size = int(len(os.listdir("{}/{}".format(opt["data_dir"], "warm_state"))) / 4) | |||||
supp_xs_s = [] | |||||
supp_ys_s = [] | |||||
query_xs_s = [] | |||||
query_ys_s = [] | |||||
for idx in range(training_set_size): | |||||
# supp_xs_s.append(pickle.load(open("{}/{}/{}/supp_x_{}.pkl".format(opt["data_dir"], "training", "log", idx), "rb"))) | |||||
# supp_ys_s.append(pickle.load(open("{}/{}/{}/supp_y_{}.pkl".format(opt["data_dir"], "training", "log", idx), "rb"))) | |||||
# query_xs_s.append(pickle.load(open("{}/{}/{}/query_x_{}.pkl".format(opt["data_dir"], "training", "log", idx), "rb"))) | |||||
# query_ys_s.append(pickle.load(open("{}/{}/{}/query_y_{}.pkl".format(opt["data_dir"], "training", "log", idx), "rb"))) | |||||
supp_xs_s.append(pickle.load(open("{}/{}/supp_x_{}.pkl".format(opt["data_dir"], "warm_state", idx), "rb"))) | |||||
supp_ys_s.append(pickle.load(open("{}/{}/supp_y_{}.pkl".format(opt["data_dir"], "warm_state", idx), "rb"))) | |||||
query_xs_s.append(pickle.load(open("{}/{}/query_x_{}.pkl".format(opt["data_dir"], "warm_state", idx), "rb"))) | |||||
query_ys_s.append(pickle.load(open("{}/{}/query_y_{}.pkl".format(opt["data_dir"], "warm_state", idx), "rb"))) | |||||
train_dataset = list(zip(supp_xs_s, supp_ys_s, query_xs_s, query_ys_s)) | |||||
del (supp_xs_s, supp_ys_s, query_xs_s, query_ys_s) | |||||
# change for Movie lens | |||||
# testing_set_size = int(len(os.listdir("{}/{}/{}".format(opt["data_dir"], "testing", "log"))) / 4) | |||||
testing_set_size = int(len(os.listdir("{}/{}".format(opt["data_dir"], "user_cold_state"))) / 4) | |||||
supp_xs_s = [] | |||||
supp_ys_s = [] | |||||
query_xs_s = [] | |||||
query_ys_s = [] | |||||
for idx in range(testing_set_size): | |||||
# change for Movie lens | |||||
# supp_xs_s.append( | |||||
# pickle.load(open("{}/{}/{}/supp_x_{}.pkl".format(opt["data_dir"], "testing", "log", idx), "rb"))) | |||||
# supp_ys_s.append( | |||||
# pickle.load(open("{}/{}/{}/supp_y_{}.pkl".format(opt["data_dir"], "testing", "log", idx), "rb"))) | |||||
# query_xs_s.append( | |||||
# pickle.load(open("{}/{}/{}/query_x_{}.pkl".format(opt["data_dir"], "testing", "log", idx), "rb"))) | |||||
# query_ys_s.append( | |||||
# pickle.load(open("{}/{}/{}/query_y_{}.pkl".format(opt["data_dir"], "testing", "log", idx), "rb"))) | |||||
supp_xs_s.append( | |||||
pickle.load(open("{}/{}/supp_x_{}.pkl".format(opt["data_dir"], "user_cold_state", idx), "rb"))) | |||||
supp_ys_s.append( | |||||
pickle.load(open("{}/{}/supp_y_{}.pkl".format(opt["data_dir"], "user_cold_state", idx), "rb"))) | |||||
query_xs_s.append( | |||||
pickle.load(open("{}/{}/query_x_{}.pkl".format(opt["data_dir"], "user_cold_state", idx), "rb"))) | |||||
query_ys_s.append( | |||||
pickle.load(open("{}/{}/query_y_{}.pkl".format(opt["data_dir"], "user_cold_state", idx), "rb"))) | |||||
test_dataset = list(zip(supp_xs_s, supp_ys_s, query_xs_s, query_ys_s)) | |||||
del (supp_xs_s, supp_ys_s, query_xs_s, query_ys_s) | |||||
print("# epoch\ttrain_loss\tprecision5\tNDCG5\tMAP5\tprecision7\tNDCG7\tMAP7\tprecision10\tNDCG10\tMAP10") | |||||
if not os.path.exists(model_filename): | |||||
print("Start training...") | |||||
training(trainer, opt, train_dataset, test_dataset, batch_size=opt['batch_size'], num_epoch=opt['num_epoch'], | |||||
model_save=opt["save"], model_filename=model_filename, logger=file_logger) | |||||
else: | |||||
print("Load pre-trained model...") | |||||
opt = helper.load_config(model_filename[:-2]+"config") | |||||
helper.print_config(opt) | |||||
trained_state_dict = torch.load(model_filename) | |||||
trainer.load_state_dict(trained_state_dict) | |||||
""" | |||||
Helper functions. | |||||
""" | |||||
import os | |||||
import json | |||||
import argparse | |||||
### IO | |||||
def check_dir(d): | |||||
if not os.path.exists(d): | |||||
print("Directory {} does not exist. Exit.".format(d)) | |||||
exit(1) | |||||
def check_files(files): | |||||
for f in files: | |||||
if f is not None and not os.path.exists(f): | |||||
print("File {} does not exist. Exit.".format(f)) | |||||
exit(1) | |||||
def ensure_dir(d, verbose=True): | |||||
if not os.path.exists(d): | |||||
if verbose: | |||||
print("Directory {} do not exist; creating...".format(d)) | |||||
os.makedirs(d) | |||||
def save_config(config, path, verbose=True): | |||||
with open(path, 'w') as outfile: | |||||
json.dump(config, outfile, indent=2) | |||||
if verbose: | |||||
print("Config saved to file {}".format(path)) | |||||
return config | |||||
def load_config(path, verbose=True): | |||||
with open(path) as f: | |||||
config = json.load(f) | |||||
if verbose: | |||||
print("Config loaded from file {}".format(path)) | |||||
return config | |||||
def print_config(config): | |||||
info = "Running with the following configs:\n" | |||||
for k, v in config.items(): | |||||
info += "\t{} : {}\n".format(k, str(v)) | |||||
print("\n" + info + "\n") | |||||
return | |||||
class FileLogger(object): | |||||
""" | |||||
A file logger that opens the file periodically and write to it. | |||||
""" | |||||
def __init__(self, filename, header=None): | |||||
self.filename = filename | |||||
if os.path.exists(filename): | |||||
# remove the old file | |||||
os.remove(filename) | |||||
if header is not None: | |||||
with open(filename, 'w') as out: | |||||
print(header, file=out) | |||||
def log(self, message): | |||||
with open(self.filename, 'a') as out: | |||||
print(message) | |||||
print(message, file=out) | |||||
import json | |||||
import random | |||||
import torch | |||||
import numpy as np | |||||
import pickle | |||||
import codecs | |||||
import re | |||||
import os | |||||
import datetime | |||||
import tqdm | |||||
import pandas as pd | |||||
#convert userids to userdict key-id(int), val:onehot_vector(tensor) | |||||
#element in list is str type. | |||||
def to_onehot_dict(list): | |||||
dict={} | |||||
length = len(list) | |||||
for index, element in enumerate(list): | |||||
vector = torch.zeros(1, length).long() | |||||
element = int(element) | |||||
vector[:, element] = 1.0 | |||||
dict[element] = vector | |||||
return dict | |||||
def load_list(fname): | |||||
list_ = [] | |||||
with open(fname, encoding="utf-8") as f: | |||||
for line in f.readlines(): | |||||
list_.append(line.strip()) | |||||
return list_ | |||||
# used for merge dictionaries. | |||||
def merge_key(dict1, dict2): | |||||
res = {**dict1, **dict2} | |||||
return res | |||||
def merge_value(dict1, dict2): # merge and item_cold | |||||
for key, value in dict2.items(): | |||||
if key in dict1.keys(): | |||||
# if list(set(dict1[key]+value)) the final number of movies-1m is 1000205 | |||||
new_value = dict1[key]+value | |||||
dict1[key] = new_value | |||||
else: | |||||
print('Unexpected key.') | |||||
def count_values(dict): | |||||
count_val = 0 | |||||
for key, value in dict.items(): | |||||
count_val += len(value) | |||||
return count_val | |||||
def construct_dictionary(user_list, total_dict): | |||||
dict = {} | |||||
for i in range(len(user_list)): | |||||
dict[str(user_list[i])] = total_dict[str(user_list[i])] | |||||
return dict | |||||
class Preprocess(object): | |||||
""" | |||||
Preprocess the training, validation and test data. | |||||
Generate the episode-style data. | |||||
""" | |||||
def __init__(self, opt): | |||||
self.batch_size = opt["batch_size"] | |||||
self.opt = opt | |||||
# warm data ratio | |||||
self.train_ratio = opt['train_ratio'] | |||||
self.valid_ratio = opt['valid_ratio'] | |||||
self.test_ratio = 1 - self.train_ratio - self.valid_ratio | |||||
self.dataset_path = opt["data_dir"] | |||||
self.support_size = opt['support_size'] | |||||
self.query_size = opt['query_size'] | |||||
self.max_len = opt['max_len'] | |||||
# save one-hot dimension length | |||||
uf_dim, if_dim = self.preprocess(self.dataset_path) | |||||
self.uf_dim = uf_dim | |||||
self.if_dim = if_dim | |||||
def preprocess(self, dataset_path): | |||||
""" Preprocess the data and convert to ids. """ | |||||
#Create training-validation-test datasets | |||||
print('Create training, validation and test data from scratch!') | |||||
with open('./{}/interaction_dict_x.json'.format(dataset_path), 'r', encoding='utf-8') as f: | |||||
inter_dict_x = json.loads(f.read()) | |||||
with open('./{}/interaction_dict_y.json'.format(dataset_path), 'r', encoding='utf-8') as f: | |||||
inter_dict_y = json.loads(f.read()) | |||||
print('The size of total interactions is %d.' % (count_values(inter_dict_x))) # 42346 | |||||
assert count_values(inter_dict_x) == count_values(inter_dict_y) | |||||
with open('./{}/user_list.json'.format(dataset_path), 'r', encoding='utf-8') as f: | |||||
userids = json.loads(f.read()) | |||||
with open('./{}/item_list.json'.format(dataset_path), 'r', encoding='utf-8') as f: | |||||
itemids = json.loads(f.read()) | |||||
#userids = list(inter_dict_x.keys()) | |||||
random.shuffle(userids) | |||||
warm_user_size = int(len(userids) * self.train_ratio) | |||||
valid_user_size = int(len(userids) * self.valid_ratio) | |||||
warm_users = userids[:warm_user_size] | |||||
valid_users = userids[warm_user_size:warm_user_size+valid_user_size] | |||||
cold_users = userids[warm_user_size+valid_user_size:] | |||||
assert len(userids) == len(warm_users)+len(valid_users)+len(cold_users) | |||||
# Construct the training data dict | |||||
training_dict_x = construct_dictionary(warm_users, inter_dict_x) | |||||
training_dict_y = construct_dictionary(warm_users, inter_dict_y) | |||||
#Avoid the new items shown in test data in the case of cold user. | |||||
item_set = set() | |||||
for i in training_dict_x.values(): | |||||
i = set(i) | |||||
item_set = item_set.union(i) | |||||
# Construct one-hot dictionary | |||||
user_dict = to_onehot_dict(userids) | |||||
# only items contained in all data are encoded. | |||||
item_dict = to_onehot_dict(itemids) | |||||
# This part of data is not used, so we do not process it temporally. | |||||
valid_dict_x = construct_dictionary(valid_users, inter_dict_x) | |||||
valid_dict_y = construct_dictionary(valid_users, inter_dict_y) | |||||
assert count_values(valid_dict_x) == count_values(valid_dict_y) | |||||
test_dict_x = construct_dictionary(cold_users, inter_dict_x) | |||||
test_dict_y = construct_dictionary(cold_users, inter_dict_y) | |||||
assert count_values(test_dict_x) == count_values(test_dict_y) | |||||
print('Before delete new items in test data, test data has %d interactions.' % (count_values(test_dict_x))) | |||||
#Delete the new items in test data. | |||||
unseen_count = 0 | |||||
for key, value in test_dict_x.items(): | |||||
assert len(value) == len(test_dict_y[key]) | |||||
unseen_item_index = [index for index, i in enumerate(value) if i not in item_set] | |||||
unseen_count+=len(unseen_item_index) | |||||
if len(unseen_item_index) == 0: | |||||
continue | |||||
else: | |||||
new_value_x = [element for index, element in enumerate(value) if index not in unseen_item_index] | |||||
new_value_y = [test_dict_y[key][index] for index, element in enumerate(value) if index not in unseen_item_index] | |||||
test_dict_x[key] = new_value_x | |||||
test_dict_y[key] = new_value_y | |||||
print('After delete new items in test data, test data has %d interactions.' % (count_values(test_dict_x))) | |||||
assert count_values(test_dict_x) == count_values(test_dict_y) | |||||
print('The number of total unseen interactions is %d.' % (unseen_count)) | |||||
pickle.dump(training_dict_x, open("{}/training_dict_x_{:2f}.pkl".format(dataset_path, self.train_ratio), "wb")) | |||||
pickle.dump(training_dict_y, open("{}/training_dict_y_{:2f}.pkl".format(dataset_path, self.train_ratio), "wb")) | |||||
pickle.dump(valid_dict_x, open("{}/valid_dict_x_{:2f}.pkl".format(dataset_path, self.valid_ratio), "wb")) | |||||
pickle.dump(valid_dict_y, open("{}/valid_dict_y_{:2f}.pkl".format(dataset_path, self.valid_ratio), "wb")) | |||||
pickle.dump(test_dict_x, open("{}/test_dict_x_{:2f}.pkl".format(dataset_path, self.test_ratio), "wb")) | |||||
pickle.dump(test_dict_y, open("{}/test_dict_y_{:2f}.pkl".format(dataset_path, self.test_ratio), "wb")) | |||||
def generate_episodes(dict_x, dict_y, category, support_size, query_size, max_len, dir="log"): | |||||
idx = 0 | |||||
if not os.path.exists("{}/{}/{}".format(dataset_path, category, dir)): | |||||
os.makedirs("{}/{}/{}".format(dataset_path, category, dir)) | |||||
os.makedirs("{}/{}/{}".format(dataset_path, category, "evidence")) | |||||
for _, user_id in enumerate(dict_x.keys()): | |||||
u_id = int(user_id) | |||||
seen_music_len = len(dict_x[str(u_id)]) | |||||
indices = list(range(seen_music_len)) | |||||
# filter some users with their interactions, i.e., tasks | |||||
if seen_music_len < (support_size + query_size) or seen_music_len > max_len: | |||||
continue | |||||
random.shuffle(indices) | |||||
tmp_x = np.array(dict_x[str(u_id)]) | |||||
tmp_y = np.array(dict_y[str(u_id)]) | |||||
support_x_app = None | |||||
for m_id in tmp_x[indices[:support_size]]: | |||||
m_id = int(m_id) | |||||
tmp_x_converted = torch.cat((item_dict[m_id], user_dict[u_id]), 1) | |||||
try: | |||||
support_x_app = torch.cat((support_x_app, tmp_x_converted), 0) | |||||
except: | |||||
support_x_app = tmp_x_converted | |||||
query_x_app = None | |||||
for m_id in tmp_x[indices[support_size:]]: | |||||
m_id = int(m_id) | |||||
u_id = int(user_id) | |||||
tmp_x_converted = torch.cat((item_dict[m_id], user_dict[u_id]), 1) | |||||
try: | |||||
query_x_app = torch.cat((query_x_app, tmp_x_converted), 0) | |||||
except: | |||||
query_x_app = tmp_x_converted | |||||
support_y_app = torch.FloatTensor(tmp_y[indices[:support_size]]) | |||||
query_y_app = torch.FloatTensor(tmp_y[indices[support_size:]]) | |||||
pickle.dump(support_x_app, open("{}/{}/{}/supp_x_{}.pkl".format(dataset_path, category, dir, idx), "wb")) | |||||
pickle.dump(support_y_app, open("{}/{}/{}/supp_y_{}.pkl".format(dataset_path, category, dir, idx), "wb")) | |||||
pickle.dump(query_x_app, open("{}/{}/{}/query_x_{}.pkl".format(dataset_path, category, dir, idx), "wb")) | |||||
pickle.dump(query_y_app, open("{}/{}/{}/query_y_{}.pkl".format(dataset_path, category, dir, idx), "wb")) | |||||
# used for evidence candidate selection | |||||
with open("{}/{}/{}/supp_x_{}_u_m_ids.txt".format(dataset_path, category, "evidence", idx), "w") as f: | |||||
for m_id in tmp_x[indices[:support_size]]: | |||||
f.write("{}\t{}\n".format(u_id, m_id)) | |||||
with open("{}/{}/{}/query_x_{}_u_m_ids.txt".format(dataset_path, category, "evidence", idx), "w") as f: | |||||
for m_id in tmp_x[indices[support_size:]]: | |||||
f.write("{}\t{}\n".format(u_id, m_id)) | |||||
idx+=1 | |||||
print("Generate eposide data for training.") | |||||
generate_episodes(training_dict_x, training_dict_y, "training", self.support_size, self.query_size, self.max_len) | |||||
print("Generate eposide data for validation.") | |||||
generate_episodes(valid_dict_x, valid_dict_y, "validation", self.support_size, self.query_size, self.max_len) | |||||
print("Generate eposide data for testing.") | |||||
generate_episodes(test_dict_x, test_dict_y, "testing", self.support_size, self.query_size, self.max_len) | |||||
return len(userids), len(itemids) | |||||
import math | |||||
def AP(ranked_list, ground_truth, topn): | |||||
hits, sum_precs = 0, 0.0 | |||||
t = [a for a in ground_truth] | |||||
t.sort(reverse=True) | |||||
t=t[:topn] | |||||
for i in range(topn): | |||||
id = ranked_list[i] | |||||
if ground_truth[id] in t: | |||||
hits += 1 | |||||
sum_precs += hits / (i+1.0) | |||||
t.remove(ground_truth[id]) | |||||
if hits > 0: | |||||
return sum_precs / topn | |||||
else: | |||||
return 0.0 | |||||
def RR(ranked_list, ground_truth,topn): | |||||
t = [a for a in ground_truth] | |||||
t.sort(reverse=True) | |||||
t = t[:topn] | |||||
for i in range(topn): | |||||
id = ranked_list[i] | |||||
if ground_truth[id] in t: | |||||
return 1 / (i + 1.0) | |||||
return 0 | |||||
def precision(ranked_list,ground_truth,topn): | |||||
t = [a for a in ground_truth] | |||||
t.sort(reverse=True) | |||||
t = t[:topn] | |||||
hits = 0 | |||||
for i in range(topn): | |||||
id = ranked_list[i] | |||||
if ground_truth[id] in t: | |||||
t.remove(ground_truth[id]) | |||||
hits += 1 | |||||
pre = hits/topn | |||||
return pre | |||||
def nDCG(ranked_list, ground_truth, topn): | |||||
dcg = 0 | |||||
idcg = IDCG(ground_truth, topn) | |||||
# print(ranked_list) | |||||
# input() | |||||
for i in range(topn): | |||||
id = ranked_list[i] | |||||
dcg += ((2 ** ground_truth[id]) -1)/ math.log(i+2, 2) | |||||
# print('dcg is ', dcg, " n is ", topn) | |||||
# print('idcg is ', idcg, " n is ", topn) | |||||
return dcg / idcg | |||||
def IDCG(ground_truth,topn): | |||||
t = [a for a in ground_truth] | |||||
t.sort(reverse=True) | |||||
idcg = 0 | |||||
for i in range(topn): | |||||
idcg += ((2**t[i]) - 1) / math.log(i+2, 2) | |||||
return idcg | |||||
def add_metric(recommend_list, ALL_group_list, precision_list, ap_list, ndcg_list, topn): | |||||
ndcg = nDCG(recommend_list, ALL_group_list, topn) | |||||
ap = AP(recommend_list, ALL_group_list, topn) | |||||
pre = precision(recommend_list, ALL_group_list, topn) | |||||
precision_list.append(pre) | |||||
ap_list.append(ap) | |||||
ndcg_list.append(ndcg) | |||||
def cal_metric(precision_list,ap_list,ndcg_list): | |||||
mpre = sum(precision_list) / len(precision_list) | |||||
map = sum(ap_list) / len(ap_list) | |||||
mndcg = sum(ndcg_list) / len(ndcg_list) | |||||
return mpre, mndcg, map |
""" | |||||
Utility functions for torch. | |||||
""" | |||||
import torch | |||||
from torch import nn, optim | |||||
from torch.optim.optimizer import Optimizer | |||||
### class | |||||
class MyAdagrad(Optimizer): | |||||
"""My modification of the Adagrad optimizer that allows to specify an initial | |||||
accumulater value. This mimics the behavior of the default Adagrad implementation | |||||
in Tensorflow. The default PyTorch Adagrad uses 0 for initial acculmulator value. | |||||
Arguments: | |||||
params (iterable): iterable of parameters to optimize or dicts defining | |||||
parameter groups | |||||
lr (float, optional): learning rate (default: 1e-2) | |||||
lr_decay (float, optional): learning rate decay (default: 0) | |||||
init_accu_value (float, optional): initial accumulater value. | |||||
weight_decay (float, optional): weight decay (L2 penalty) (default: 0) | |||||
""" | |||||
def __init__(self, params, lr=1e-2, lr_decay=0, init_accu_value=0.1, weight_decay=0): | |||||
defaults = dict(lr=lr, lr_decay=lr_decay, init_accu_value=init_accu_value, \ | |||||
weight_decay=weight_decay) | |||||
super(MyAdagrad, self).__init__(params, defaults) | |||||
for group in self.param_groups: | |||||
for p in group['params']: | |||||
state = self.state[p] | |||||
state['step'] = 0 | |||||
state['sum'] = torch.ones(p.data.size()).type_as(p.data) *\ | |||||
init_accu_value | |||||
def share_memory(self): | |||||
for group in self.param_groups: | |||||
for p in group['params']: | |||||
state = self.state[p] | |||||
state['sum'].share_memory_() | |||||
def step(self, closure=None): | |||||
"""Performs a single optimization step. | |||||
Arguments: | |||||
closure (callable, optional): A closure that reevaluates the model | |||||
and returns the loss. | |||||
""" | |||||
loss = None | |||||
if closure is not None: | |||||
loss = closure() | |||||
for group in self.param_groups: | |||||
for p in group['params']: | |||||
if p.grad is None: | |||||
continue | |||||
grad = p.grad.data | |||||
state = self.state[p] | |||||
state['step'] += 1 | |||||
if group['weight_decay'] != 0: | |||||
if p.grad.data.is_sparse: | |||||
raise RuntimeError("weight_decay option is not compatible with sparse gradients ") | |||||
grad = grad.add(group['weight_decay'], p.data) | |||||
clr = group['lr'] / (1 + (state['step'] - 1) * group['lr_decay']) | |||||
if p.grad.data.is_sparse: | |||||
grad = grad.coalesce() # the update is non-linear so indices must be unique | |||||
grad_indices = grad._indices() | |||||
grad_values = grad._values() | |||||
size = torch.Size([x for x in grad.size()]) | |||||
def make_sparse(values): | |||||
constructor = type(p.grad.data) | |||||
if grad_indices.dim() == 0 or values.dim() == 0: | |||||
return constructor() | |||||
return constructor(grad_indices, values, size) | |||||
state['sum'].add_(make_sparse(grad_values.pow(2))) | |||||
std = state['sum']._sparse_mask(grad) | |||||
std_values = std._values().sqrt_().add_(1e-10) | |||||
p.data.add_(-clr, make_sparse(grad_values / std_values)) | |||||
else: | |||||
state['sum'].addcmul_(1, grad, grad) | |||||
std = state['sum'].sqrt().add_(1e-10) | |||||
p.data.addcdiv_(-clr, grad, std) | |||||
return loss | |||||
### torch specific functions | |||||
def get_optimizer(name, parameters, lr, l2=0): | |||||
if name == 'sgd': | |||||
return torch.optim.SGD(parameters, lr=lr, weight_decay=l2) | |||||
elif name in ['adagrad', 'myadagrad']: | |||||
# use my own adagrad to allow for init accumulator value | |||||
return MyAdagrad(parameters, lr=lr, init_accu_value=0.1, weight_decay=l2) | |||||
elif name == 'adam': | |||||
return torch.optim.Adam(parameters, weight_decay=l2) # use default lr | |||||
elif name == 'adamax': | |||||
return torch.optim.Adamax(parameters, weight_decay=l2) # use default lr | |||||
elif name == 'adadelta': | |||||
return torch.optim.Adadelta(parameters, lr=lr, weight_decay=l2) | |||||
else: | |||||
raise Exception("Unsupported optimizer: {}".format(name)) | |||||
def change_lr(optimizer, new_lr): | |||||
for param_group in optimizer.param_groups: | |||||
param_group['lr'] = new_lr | |||||
def flatten_indices(seq_lens, width): | |||||
flat = [] | |||||
for i, l in enumerate(seq_lens): | |||||
for j in range(l): | |||||
flat.append(i * width + j) | |||||
return flat | |||||
def set_cuda(var, cuda): | |||||
if cuda: | |||||
return var.cuda() | |||||
return var | |||||
def keep_partial_grad(grad, topk): | |||||
""" | |||||
Keep only the topk rows of grads. | |||||
""" | |||||
assert topk < grad.size(0) | |||||
grad.data[topk:].zero_() | |||||
return grad | |||||
### model IO | |||||
def save(model, optimizer, opt, filename): | |||||
params = { | |||||
'model': model.state_dict(), | |||||
'optimizer': optimizer.state_dict(), | |||||
'config': opt | |||||
} | |||||
try: | |||||
torch.save(params, filename) | |||||
except BaseException: | |||||
print("[ Warning: model saving failed. ]") | |||||
def load(model, optimizer, filename): | |||||
try: | |||||
dump = torch.load(filename) | |||||
except BaseException: | |||||
print("[ Fail: model loading failed. ]") | |||||
if model is not None: | |||||
model.load_state_dict(dump['model']) | |||||
if optimizer is not None: | |||||
optimizer.load_state_dict(dump['optimizer']) | |||||
opt = dump['config'] | |||||
return model, optimizer, opt | |||||
def load_config(filename): | |||||
try: | |||||
dump = torch.load(filename) | |||||
except BaseException: | |||||
print("[ Fail: model loading failed. ]") | |||||
return dump['config'] | |||||