| @@ -0,0 +1,245 @@ | |||
| import torch | |||
| import numpy as np | |||
| from random import randint | |||
| from copy import deepcopy | |||
| from torch.autograd import Variable | |||
| from torch.nn import functional as F | |||
| from collections import OrderedDict | |||
| from embeddings_TaNP import Item, User, Encoder, MuSigmaEncoder, Decoder, Gating_Decoder, TaskEncoder, MemoryUnit,Movie_item,Movie_user | |||
| import torch.nn as nn | |||
| class NP(nn.Module): | |||
| def __init__(self, config): | |||
| super(NP, self).__init__() | |||
| # change for Movie lens | |||
| # self.x_dim = config['second_embedding_dim'] * 2 | |||
| self.x_dim = 32 * 8 | |||
| # use one-hot or not? | |||
| self.y_dim = 1 | |||
| self.z1_dim = config['z1_dim'] | |||
| self.z2_dim = config['z2_dim'] | |||
| # z is the dimension size of mu and sigma. | |||
| self.z_dim = config['z_dim'] | |||
| # the dimension size of rc. | |||
| self.enc_h1_dim = config['enc_h1_dim'] | |||
| self.enc_h2_dim = config['enc_h2_dim'] | |||
| self.dec_h1_dim = config['dec_h1_dim'] | |||
| self.dec_h2_dim = config['dec_h2_dim'] | |||
| self.dec_h3_dim = config['dec_h3_dim'] | |||
| self.taskenc_h1_dim = config['taskenc_h1_dim'] | |||
| self.taskenc_h2_dim = config['taskenc_h2_dim'] | |||
| self.taskenc_final_dim = config['taskenc_final_dim'] | |||
| self.clusters_k = config['clusters_k'] | |||
| self.temperture = config['temperature'] | |||
| self.dropout_rate = config['dropout_rate'] | |||
| # Initialize networks | |||
| # change for Movie Lens | |||
| # self.item_emb = Item(config) | |||
| # self.user_emb = User(config) | |||
| self.item_emb = Movie_item(config) | |||
| self.user_emb = Movie_user(config) | |||
| # This encoder is used to generated z actually, it is a latent encoder in ANP. | |||
| self.xy_to_z = Encoder(self.x_dim, self.y_dim, self.enc_h1_dim, self.enc_h2_dim, self.z1_dim, self.dropout_rate) | |||
| self.z_to_mu_sigma = MuSigmaEncoder(self.z1_dim, self.z2_dim, self.z_dim) | |||
| # This encoder is used to generated r actually, it is a deterministic encoder in ANP. | |||
| self.xy_to_task = TaskEncoder(self.x_dim, self.y_dim, self.taskenc_h1_dim, self.taskenc_h2_dim, self.taskenc_final_dim, | |||
| self.dropout_rate) | |||
| self.memoryunit = MemoryUnit(self.clusters_k, self.taskenc_final_dim, self.temperture) | |||
| #self.xz_to_y = Gating_Decoder(self.x_dim, self.z_dim, self.taskenc_final_dim, self.dec_h1_dim, self.dec_h2_dim, self.dec_h3_dim, self.y_dim, self.dropout_rate) | |||
| self.xz_to_y = Decoder(self.x_dim, self.z_dim, self.taskenc_final_dim, self.dec_h1_dim, self.dec_h2_dim, self.dec_h3_dim, self.y_dim, self.dropout_rate) | |||
| def aggregate(self, z_i): | |||
| return torch.mean(z_i, dim=0) | |||
| def xy_to_mu_sigma(self, x, y): | |||
| # Encode each point into a representation r_i | |||
| z_i = self.xy_to_z(x, y) | |||
| # Aggregate representations r_i into a single representation r | |||
| z = self.aggregate(z_i) | |||
| # Return parameters of distribution | |||
| return self.z_to_mu_sigma(z) | |||
| # embedding each (item, user) as the x for np | |||
| def embedding(self, x): | |||
| # change for Movie lens | |||
| rate_idx = Variable(x[:, 0], requires_grad=False) | |||
| genre_idx = Variable(x[:, 1:26], requires_grad=False) | |||
| director_idx = Variable(x[:, 26:2212], requires_grad=False) | |||
| actor_idx = Variable(x[:, 2212:10242], requires_grad=False) | |||
| gender_idx = Variable(x[:, 10242], requires_grad=False) | |||
| age_idx = Variable(x[:, 10243], requires_grad=False) | |||
| occupation_idx = Variable(x[:, 10244], requires_grad=False) | |||
| area_idx = Variable(x[:, 10245], requires_grad=False) | |||
| item_emb = self.item_emb(rate_idx, genre_idx, director_idx, actor_idx) | |||
| user_emb = self.user_emb(gender_idx, age_idx, occupation_idx, area_idx) | |||
| x = torch.cat((item_emb, user_emb), 1) | |||
| return x | |||
| def forward(self, x_context, y_context, x_target, y_target): | |||
| # change for Movie lens | |||
| x_context_embed = self.embedding(x_context) | |||
| x_target_embed = self.embedding(x_target) | |||
| if self.training: | |||
| # sigma is log_sigma actually | |||
| mu_target, sigma_target, z_target = self.xy_to_mu_sigma(x_target_embed, y_target) | |||
| mu_context, sigma_context, z_context = self.xy_to_mu_sigma(x_context_embed, y_context) | |||
| task = self.xy_to_task(x_context_embed, y_context) | |||
| mean_task = self.aggregate(task) | |||
| C_distribution, new_task_embed = self.memoryunit(mean_task) | |||
| p_y_pred = self.xz_to_y(x_target_embed, z_target, new_task_embed) | |||
| return p_y_pred, mu_target, sigma_target, mu_context, sigma_context, C_distribution | |||
| else: | |||
| mu_context, sigma_context, z_context = self.xy_to_mu_sigma(x_context_embed, y_context) | |||
| task = self.xy_to_task(x_context_embed, y_context) | |||
| mean_task = self.aggregate(task) | |||
| C_distribution, new_task_embed = self.memoryunit(mean_task) | |||
| p_y_pred = self.xz_to_y(x_target_embed, z_context, new_task_embed) | |||
| return p_y_pred | |||
| class Trainer(torch.nn.Module): | |||
| def __init__(self, config): | |||
| self.opt = config | |||
| super(Trainer, self).__init__() | |||
| self.use_cuda = config['use_cuda'] | |||
| self.np = NP(self.opt) | |||
| self._lambda = config['lambda'] | |||
| self.optimizer = torch.optim.Adam(self.np.parameters(), lr=config['lr']) | |||
| # our kl divergence | |||
| def kl_div(self, mu_target, logsigma_target, mu_context, logsigma_context): | |||
| target_sigma = torch.exp(logsigma_target) | |||
| context_sigma = torch.exp(logsigma_context) | |||
| kl_div = (logsigma_context - logsigma_target) - 0.5 + (((target_sigma ** 2) + (mu_target - mu_context) ** 2) / 2 * context_sigma ** 2) | |||
| #kl_div = (t.exp(posterior_var) + (posterior_mu-prior_mu) ** 2) / t.exp(prior_var) - 1. + (prior_var - posterior_var) | |||
| #kl_div = 0.5 * kl_div.sum() | |||
| kl_div = kl_div.sum() | |||
| return kl_div | |||
| # new kl divergence -- kl(st|sc) | |||
| def new_kl_div(self, prior_mu, prior_var, posterior_mu, posterior_var): | |||
| kl_div = (torch.exp(posterior_var) + (posterior_mu-prior_mu) ** 2) / torch.exp(prior_var) - 1. + (prior_var - posterior_var) | |||
| kl_div = 0.5 * kl_div.sum() | |||
| return kl_div | |||
| def loss(self, p_y_pred, y_target, mu_target, sigma_target, mu_context, sigma_context): | |||
| #print('p_y_pred size is ', p_y_pred.size()) | |||
| regression_loss = F.mse_loss(p_y_pred, y_target.view(-1, 1)) | |||
| #print('regession loss size is ', regression_loss.size()) | |||
| # kl divergence between target and context | |||
| #print('regession_loss is ', regression_loss.item()) | |||
| kl = self.new_kl_div(mu_context, sigma_context, mu_target, sigma_target) | |||
| #print('KL_loss is ', kl.item()) | |||
| return regression_loss+kl | |||
| def context_target_split(self, support_set_x, support_set_y, query_set_x, query_set_y): | |||
| total_x = torch.cat((support_set_x, query_set_x), 0) | |||
| total_y = torch.cat((support_set_y, query_set_y), 0) | |||
| total_size = total_x.size(0) | |||
| context_min = self.opt['context_min'] | |||
| context_max = self.opt['context_max'] | |||
| extra_tar_min = self.opt['target_extra_min'] | |||
| #here we simply use the total_size as the maximum of target size. | |||
| num_context = randint(context_min, context_max) | |||
| num_target = randint(extra_tar_min, total_size - num_context) | |||
| sampled = np.random.choice(total_size, num_context+num_target, replace=False) | |||
| x_context = total_x[sampled[:num_context], :] | |||
| y_context = total_y[sampled[:num_context]] | |||
| x_target = total_x[sampled, :] | |||
| y_target = total_y[sampled] | |||
| return x_context, y_context, x_target, y_target | |||
| def new_context_target_split(self, support_set_x, support_set_y, query_set_x, query_set_y): | |||
| total_x = torch.cat((support_set_x, query_set_x), 0) | |||
| total_y = torch.cat((support_set_y, query_set_y), 0) | |||
| total_size = total_x.size(0) | |||
| context_min = self.opt['context_min'] | |||
| # change for Movie lens | |||
| context_min = min(context_min, total_size - 1) | |||
| num_context = np.random.randint(context_min, total_size) | |||
| num_target = np.random.randint(0, total_size - num_context) | |||
| sampled = np.random.choice(total_size, num_context+num_target, replace=False) | |||
| x_context = total_x[sampled[:num_context], :] | |||
| y_context = total_y[sampled[:num_context]] | |||
| x_target = total_x[sampled, :] | |||
| y_target = total_y[sampled] | |||
| return x_context, y_context, x_target, y_target | |||
| def global_update(self, support_set_xs, support_set_ys, query_set_xs, query_set_ys): | |||
| batch_sz = len(support_set_xs) | |||
| losses = [] | |||
| C_distribs = [] | |||
| if self.use_cuda: | |||
| for i in range(batch_sz): | |||
| support_set_xs[i] = support_set_xs[i].cuda() | |||
| support_set_ys[i] = support_set_ys[i].cuda() | |||
| query_set_xs[i] = query_set_xs[i].cuda() | |||
| query_set_ys[i] = query_set_ys[i].cuda() | |||
| for i in range(batch_sz): | |||
| x_context, y_context, x_target, y_target = self.new_context_target_split(support_set_xs[i], support_set_ys[i], | |||
| query_set_xs[i], query_set_ys[i]) | |||
| # print("inja1: x_context_size:",x_context.size()) | |||
| p_y_pred, mu_target, sigma_target, mu_context, sigma_context, C_distribution = self.np(x_context, y_context, x_target, | |||
| y_target) | |||
| C_distribs.append(C_distribution) | |||
| loss = self.loss(p_y_pred, y_target, mu_target, sigma_target, mu_context, sigma_context) | |||
| #print('Each task has loss: ', loss) | |||
| losses.append(loss) | |||
| # calculate target distribution for clustering in batch manner. | |||
| # batchsize * k | |||
| C_distribs = torch.stack(C_distribs) | |||
| # batchsize * k | |||
| C_distribs_sq = torch.pow(C_distribs, 2) | |||
| # 1*k | |||
| C_distribs_sum = torch.sum(C_distribs, dim=0, keepdim=True) | |||
| # batchsize * k | |||
| temp = C_distribs_sq / C_distribs_sum | |||
| # batchsize * 1 | |||
| temp_sum = torch.sum(temp, dim=1, keepdim=True) | |||
| target_distribs = temp / temp_sum | |||
| # calculate the kl loss | |||
| clustering_loss = self._lambda * F.kl_div(C_distribs.log(), target_distribs, reduction='batchmean') | |||
| #print('The clustering loss is %.6f' % (clustering_loss.item())) | |||
| np_losses_mean = torch.stack(losses).mean(0) | |||
| total_loss = np_losses_mean + clustering_loss | |||
| self.optimizer.zero_grad() | |||
| total_loss.backward() | |||
| self.optimizer.step() | |||
| return total_loss.item(), C_distribs.cpu().detach().numpy() | |||
| def query_rec(self, support_set_xs, support_set_ys, query_set_xs, query_set_ys): | |||
| batch_sz = 1 | |||
| # used for calculating the rmse. | |||
| losses_q = [] | |||
| if self.use_cuda: | |||
| for i in range(batch_sz): | |||
| support_set_xs[i] = support_set_xs[i].cuda() | |||
| support_set_ys[i] = support_set_ys[i].cuda() | |||
| query_set_xs[i] = query_set_xs[i].cuda() | |||
| query_set_ys[i] = query_set_ys[i].cuda() | |||
| for i in range(batch_sz): | |||
| #query_set_y_pred = self.forward(support_set_xs[i], support_set_ys[i], query_set_xs[i], num_local_update) | |||
| query_set_y_pred = self.np(support_set_xs[i], support_set_ys[i], query_set_xs[i], query_set_ys[i]) | |||
| # obtain the mean of gaussian distribution | |||
| #(interation_size, y_dim) | |||
| #query_set_y_pred = query_set_y_pred.loc.detach() | |||
| #print('test_y_pred size is ', query_set_y_pred.size()) | |||
| loss_q = F.mse_loss(query_set_y_pred, query_set_ys[i].view(-1, 1)) | |||
| losses_q.append(loss_q) | |||
| losses_q = torch.stack(losses_q).mean(0) | |||
| output_list, recommendation_list = query_set_y_pred.view(-1).sort(descending=True) | |||
| return losses_q.item(), recommendation_list | |||
| @@ -0,0 +1,36 @@ | |||
| import os | |||
| import torch | |||
| import pickle | |||
| import random | |||
| from eval import testing | |||
| def training(trainer, opt, train_dataset, test_dataset, batch_size, num_epoch, model_save=True, model_filename=None, logger=None): | |||
| training_set_size = len(train_dataset) | |||
| for epoch in range(num_epoch): | |||
| random.shuffle(train_dataset) | |||
| num_batch = int(training_set_size / batch_size) | |||
| a, b, c, d = zip(*train_dataset) | |||
| trainer.train() | |||
| all_C_distribs = [] | |||
| for i in range(num_batch): | |||
| try: | |||
| supp_xs = list(a[batch_size*i:batch_size*(i+1)]) | |||
| supp_ys = list(b[batch_size*i:batch_size*(i+1)]) | |||
| query_xs = list(c[batch_size*i:batch_size*(i+1)]) | |||
| query_ys = list(d[batch_size*i:batch_size*(i+1)]) | |||
| except IndexError: | |||
| continue | |||
| train_loss, batch_C_distribs = trainer.global_update(supp_xs, supp_ys, query_xs, query_ys) | |||
| all_C_distribs.append(batch_C_distribs) | |||
| P5, NDCG5, MAP5, P7, NDCG7, MAP7, P10, NDCG10, MAP10 = testing(trainer, opt, test_dataset) | |||
| logger.log( | |||
| "{}\t{:.6f}\t TOP-5 {:.4f}\t{:.4f}\t{:.4f}\t TOP-7: {:.4f}\t{:.4f}\t{:.4f}" | |||
| "\t TOP-10: {:.4f}\t{:.4f}\t{:.4f}". | |||
| format(epoch, train_loss, P5, NDCG5, MAP5, P7, NDCG7, MAP7, P10, NDCG10, MAP10)) | |||
| if epoch == (num_epoch-1): | |||
| with open('output_att', 'wb') as fp: | |||
| pickle.dump(all_C_distribs, fp) | |||
| if model_save: | |||
| torch.save(trainer.state_dict(), model_filename) | |||
| @@ -0,0 +1,372 @@ | |||
| import torch | |||
| import torch.nn as nn | |||
| import torch.nn.init as init | |||
| import torch.nn.functional as F | |||
| class Item(torch.nn.Module): | |||
| def __init__(self, config): | |||
| super(Item, self).__init__() | |||
| self.feature_dim = config['if_dim'] | |||
| self.first_embedding_dim = config['first_embedding_dim'] | |||
| self.second_embedding_dim = config['second_embedding_dim'] | |||
| self.first_embedding_layer = torch.nn.Linear( | |||
| in_features=self.feature_dim, | |||
| out_features=self.first_embedding_dim, | |||
| bias=True | |||
| ) | |||
| self.second_embedding_layer = torch.nn.Linear( | |||
| in_features=self.first_embedding_dim, | |||
| out_features=self.second_embedding_dim, | |||
| bias=True | |||
| ) | |||
| def forward(self, x, vars=None): | |||
| first_hidden = self.first_embedding_layer(x) | |||
| first_hidden = F.relu(first_hidden) | |||
| sec_hidden = self.second_embedding_layer(first_hidden) | |||
| return F.relu(sec_hidden) | |||
| class Movie_item(torch.nn.Module): | |||
| def __init__(self, config): | |||
| super(Movie_item, self).__init__() | |||
| self.num_rate = config['num_rate'] | |||
| self.num_genre = config['num_genre'] | |||
| self.num_director = config['num_director'] | |||
| self.num_actor = config['num_actor'] | |||
| self.embedding_dim = config['embedding_dim'] | |||
| # change for Movie | |||
| self.feature_dim = 4 * self.embedding_dim | |||
| self.embedding_rate = torch.nn.Embedding( | |||
| num_embeddings=self.num_rate, | |||
| embedding_dim=self.embedding_dim | |||
| ) | |||
| self.embedding_genre = torch.nn.Linear( | |||
| in_features=self.num_genre, | |||
| out_features=self.embedding_dim, | |||
| bias=False | |||
| ) | |||
| self.embedding_director = torch.nn.Linear( | |||
| in_features=self.num_director, | |||
| out_features=self.embedding_dim, | |||
| bias=False | |||
| ) | |||
| self.embedding_actor = torch.nn.Linear( | |||
| in_features=self.num_actor, | |||
| out_features=self.embedding_dim, | |||
| bias=False | |||
| ) | |||
| def forward(self, rate_idx, genre_idx, director_idx, actors_idx, vars=None): | |||
| rate_emb = self.embedding_rate(rate_idx) | |||
| genre_emb = self.embedding_genre(genre_idx.float()) / torch.sum(genre_idx.float(), 1).view(-1, 1) | |||
| director_emb = self.embedding_director(director_idx.float()) / torch.sum(director_idx.float(), 1).view(-1, 1) | |||
| actors_emb = self.embedding_actor(actors_idx.float()) / torch.sum(actors_idx.float(), 1).view(-1, 1) | |||
| return torch.cat((rate_emb, genre_emb, director_emb, actors_emb), 1) | |||
| class User(torch.nn.Module): | |||
| def __init__(self, config): | |||
| super(User, self).__init__() | |||
| self.feature_dim = config['uf_dim'] | |||
| self.first_embedding_dim = config['first_embedding_dim'] | |||
| self.second_embedding_dim = config['second_embedding_dim'] | |||
| self.first_embedding_layer = torch.nn.Linear( | |||
| in_features=self.feature_dim, | |||
| out_features=self.first_embedding_dim, | |||
| bias=True | |||
| ) | |||
| self.second_embedding_layer = torch.nn.Linear( | |||
| in_features=self.first_embedding_dim, | |||
| out_features=self.second_embedding_dim, | |||
| bias=True | |||
| ) | |||
| def forward(self, x, vars=None): | |||
| first_hidden = self.first_embedding_layer(x) | |||
| first_hidden = F.relu(first_hidden) | |||
| sec_hidden = self.second_embedding_layer(first_hidden) | |||
| return F.relu(sec_hidden) | |||
| class Movie_user(torch.nn.Module): | |||
| def __init__(self, config): | |||
| super(Movie_user, self).__init__() | |||
| self.num_gender = config['num_gender'] | |||
| self.num_age = config['num_age'] | |||
| self.num_occupation = config['num_occupation'] | |||
| self.num_zipcode = config['num_zipcode'] | |||
| self.embedding_dim = config['embedding_dim'] | |||
| self.embedding_gender = torch.nn.Embedding( | |||
| num_embeddings=self.num_gender, | |||
| embedding_dim=self.embedding_dim | |||
| ) | |||
| self.embedding_age = torch.nn.Embedding( | |||
| num_embeddings=self.num_age, | |||
| embedding_dim=self.embedding_dim | |||
| ) | |||
| self.embedding_occupation = torch.nn.Embedding( | |||
| num_embeddings=self.num_occupation, | |||
| embedding_dim=self.embedding_dim | |||
| ) | |||
| self.embedding_area = torch.nn.Embedding( | |||
| num_embeddings=self.num_zipcode, | |||
| embedding_dim=self.embedding_dim | |||
| ) | |||
| def forward(self, gender_idx, age_idx, occupation_idx, area_idx): | |||
| gender_emb = self.embedding_gender(gender_idx) | |||
| age_emb = self.embedding_age(age_idx) | |||
| occupation_emb = self.embedding_occupation(occupation_idx) | |||
| area_emb = self.embedding_area(area_idx) | |||
| return torch.cat((gender_emb, age_emb, occupation_emb, area_emb), 1) | |||
| class Encoder(nn.Module): | |||
| #Maps an (x_i, y_i) pair to a representation r_i. | |||
| # Add the dropout into encoder ---03.31 | |||
| def __init__(self, x_dim, y_dim, h1_dim, h2_dim, z1_dim, dropout_rate): | |||
| super(Encoder, self).__init__() | |||
| self.x_dim = x_dim | |||
| self.y_dim = y_dim | |||
| self.h1_dim = h1_dim | |||
| self.h2_dim = h2_dim | |||
| self.z1_dim = z1_dim | |||
| self.dropout_rate = dropout_rate | |||
| layers = [nn.Linear(self.x_dim + self.y_dim, self.h1_dim), | |||
| torch.nn.Dropout(self.dropout_rate), | |||
| nn.ReLU(inplace=True), | |||
| nn.Linear(self.h1_dim, self.h2_dim), | |||
| torch.nn.Dropout(self.dropout_rate), | |||
| nn.ReLU(inplace=True), | |||
| nn.Linear(self.h2_dim, self.z1_dim)] | |||
| self.input_to_hidden = nn.Sequential(*layers) | |||
| def forward(self, x, y): | |||
| y = y.view(-1, 1) | |||
| input_pairs = torch.cat((x, y), dim=1) | |||
| return self.input_to_hidden(input_pairs) | |||
| class MuSigmaEncoder(nn.Module): | |||
| def __init__(self, z1_dim, z2_dim, z_dim): | |||
| super(MuSigmaEncoder, self).__init__() | |||
| self.z1_dim = z1_dim | |||
| self.z2_dim = z2_dim | |||
| self.z_dim = z_dim | |||
| self.z_to_hidden = nn.Linear(self.z1_dim, self.z2_dim) | |||
| self.hidden_to_mu = nn.Linear(self.z2_dim, z_dim) | |||
| self.hidden_to_logsigma = nn.Linear(self.z2_dim, z_dim) | |||
| def forward(self, z_input): | |||
| hidden = torch.relu(self.z_to_hidden(z_input)) | |||
| mu = self.hidden_to_mu(hidden) | |||
| log_sigma = self.hidden_to_logsigma(hidden) | |||
| std = torch.exp(0.5 * log_sigma) | |||
| eps = torch.randn_like(std) | |||
| z = eps.mul(std).add_(mu) | |||
| return mu, log_sigma, z | |||
| class TaskEncoder(nn.Module): | |||
| def __init__(self, x_dim, y_dim, h1_dim, h2_dim, final_dim, dropout_rate): | |||
| super(TaskEncoder, self).__init__() | |||
| self.x_dim = x_dim | |||
| self.y_dim = y_dim | |||
| self.h1_dim = h1_dim | |||
| self.h2_dim = h2_dim | |||
| self.final_dim = final_dim | |||
| self.dropout_rate = dropout_rate | |||
| layers = [nn.Linear(self.x_dim + self.y_dim, self.h1_dim), | |||
| torch.nn.Dropout(self.dropout_rate), | |||
| nn.ReLU(inplace=True), | |||
| nn.Linear(self.h1_dim, self.h2_dim), | |||
| torch.nn.Dropout(self.dropout_rate), | |||
| nn.ReLU(inplace=True), | |||
| nn.Linear(self.h2_dim, self.final_dim)] | |||
| self.input_to_hidden = nn.Sequential(*layers) | |||
| def forward(self, x, y): | |||
| y = y.view(-1, 1) | |||
| input_pairs = torch.cat((x, y), dim=1) | |||
| return self.input_to_hidden(input_pairs) | |||
| class MemoryUnit(nn.Module): | |||
| # clusters_k is k keys | |||
| def __init__(self, clusters_k, emb_size, temperature): | |||
| super(MemoryUnit, self).__init__() | |||
| self.clusters_k = clusters_k | |||
| self.embed_size = emb_size | |||
| self.temperature = temperature | |||
| self.array = nn.Parameter(init.xavier_uniform_(torch.FloatTensor(self.clusters_k, self.embed_size))) | |||
| def forward(self, task_embed): | |||
| res = torch.norm(task_embed-self.array, p=2, dim=1, keepdim=True) | |||
| res = torch.pow((res / self.temperature) + 1, (self.temperature + 1) / -2) | |||
| # 1*k | |||
| C = torch.transpose(res / res.sum(), 0, 1) | |||
| # 1*k, k*d, 1*d | |||
| value = torch.mm(C, self.array) | |||
| # simple add operation | |||
| new_task_embed = value + task_embed | |||
| # calculate target distribution | |||
| return C, new_task_embed | |||
| class Decoder(nn.Module): | |||
| """ | |||
| Maps target input x_target and z, r to predictions y_target. | |||
| """ | |||
| def __init__(self, x_dim, z_dim, task_dim, h1_dim, h2_dim, h3_dim, y_dim, dropout_rate): | |||
| super(Decoder, self).__init__() | |||
| self.x_dim = x_dim | |||
| self.z_dim = z_dim | |||
| self.task_dim = task_dim | |||
| self.h1_dim = h1_dim | |||
| self.h2_dim = h2_dim | |||
| self.h3_dim = h3_dim | |||
| self.y_dim = y_dim | |||
| self.dropout_rate = dropout_rate | |||
| self.dropout = nn.Dropout(self.dropout_rate) | |||
| self.hidden_layer_1 = nn.Linear(self.x_dim + self.z_dim, self.h1_dim) | |||
| self.hidden_layer_2 = nn.Linear(self.h1_dim, self.h2_dim) | |||
| self.hidden_layer_3 = nn.Linear(self.h2_dim, self.h3_dim) | |||
| self.film_layer_1_beta = nn.Linear(self.task_dim, self.h1_dim, bias=False) | |||
| self.film_layer_1_gamma = nn.Linear(self.task_dim, self.h1_dim, bias=False) | |||
| self.film_layer_2_beta = nn.Linear(self.task_dim, self.h2_dim, bias=False) | |||
| self.film_layer_2_gamma = nn.Linear(self.task_dim, self.h2_dim, bias=False) | |||
| self.film_layer_3_beta = nn.Linear(self.task_dim, self.h3_dim, bias=False) | |||
| self.film_layer_3_gamma = nn.Linear(self.task_dim, self.h3_dim, bias=False) | |||
| self.final_projection = nn.Linear(self.h3_dim, self.y_dim) | |||
| def forward(self, x, z, task): | |||
| interaction_size, _ = x.size() | |||
| z = z.unsqueeze(0).repeat(interaction_size, 1) | |||
| # Input is concatenation of z with every row of x | |||
| inputs = torch.cat((x, z), dim=1) | |||
| hidden_1 = self.hidden_layer_1(inputs) | |||
| beta_1 = torch.tanh(self.film_layer_1_beta(task)) | |||
| gamma_1 = torch.tanh(self.film_layer_1_gamma(task)) | |||
| hidden_1 = torch.mul(hidden_1, gamma_1) + beta_1 | |||
| hidden_1 = self.dropout(hidden_1) | |||
| hidden_2 = F.relu(hidden_1) | |||
| hidden_2 = self.hidden_layer_2(hidden_2) | |||
| beta_2 = torch.tanh(self.film_layer_2_beta(task)) | |||
| gamma_2 = torch.tanh(self.film_layer_2_gamma(task)) | |||
| hidden_2 = torch.mul(hidden_2, gamma_2) + beta_2 | |||
| hidden_2 = self.dropout(hidden_2) | |||
| hidden_3 = F.relu(hidden_2) | |||
| hidden_3 = self.hidden_layer_3(hidden_3) | |||
| beta_3 = torch.tanh(self.film_layer_3_beta(task)) | |||
| gamma_3 = torch.tanh(self.film_layer_3_gamma(task)) | |||
| hidden_final = torch.mul(hidden_3, gamma_3) + beta_3 | |||
| hidden_final = self.dropout(hidden_final) | |||
| hidden_final = F.relu(hidden_final) | |||
| y_pred = self.final_projection(hidden_final) | |||
| return y_pred | |||
| class Gating_Decoder(nn.Module): | |||
| def __init__(self, x_dim, z_dim, task_dim, h1_dim, h2_dim, h3_dim, y_dim, dropout_rate): | |||
| super(Gating_Decoder, self).__init__() | |||
| self.x_dim = x_dim | |||
| self.z_dim = z_dim | |||
| self.task_dim = task_dim | |||
| self.h1_dim = h1_dim | |||
| self.h2_dim = h2_dim | |||
| self.h3_dim = h3_dim | |||
| self.y_dim = y_dim | |||
| self.dropout_rate = dropout_rate | |||
| self.dropout = nn.Dropout(self.dropout_rate) | |||
| self.hidden_layer_1 = nn.Linear(self.x_dim + self.z_dim, self.h1_dim) | |||
| self.hidden_layer_2 = nn.Linear(self.h1_dim, self.h2_dim) | |||
| self.hidden_layer_3 = nn.Linear(self.h2_dim, self.h3_dim) | |||
| self.film_layer_1_beta = nn.Linear(self.task_dim, self.h1_dim, bias=False) | |||
| self.film_layer_1_gamma = nn.Linear(self.task_dim, self.h1_dim, bias=False) | |||
| self.film_layer_1_eta = nn.Linear(self.task_dim, self.h1_dim, bias=False) | |||
| self.film_layer_1_delta = nn.Linear(self.task_dim, self.h1_dim, bias=False) | |||
| self.film_layer_2_beta = nn.Linear(self.task_dim, self.h2_dim, bias=False) | |||
| self.film_layer_2_gamma = nn.Linear(self.task_dim, self.h2_dim, bias=False) | |||
| self.film_layer_2_eta = nn.Linear(self.task_dim, self.h2_dim, bias=False) | |||
| self.film_layer_2_delta = nn.Linear(self.task_dim, self.h2_dim, bias=False) | |||
| self.film_layer_3_beta = nn.Linear(self.task_dim, self.h3_dim, bias=False) | |||
| self.film_layer_3_gamma = nn.Linear(self.task_dim, self.h3_dim, bias=False) | |||
| self.film_layer_3_eta = nn.Linear(self.task_dim, self.h3_dim, bias=False) | |||
| self.film_layer_3_delta = nn.Linear(self.task_dim, self.h3_dim, bias=False) | |||
| self.final_projection = nn.Linear(self.h3_dim, self.y_dim) | |||
| def forward(self, x, z, task): | |||
| interaction_size, _ = x.size() | |||
| z = z.unsqueeze(0).repeat(interaction_size, 1) | |||
| # Input is concatenation of z with every row of x | |||
| inputs = torch.cat((x, z), dim=1) | |||
| hidden_1 = self.hidden_layer_1(inputs) | |||
| beta_1 = torch.tanh(self.film_layer_1_beta(task)) | |||
| gamma_1 = torch.tanh(self.film_layer_1_gamma(task)) | |||
| eta_1 = torch.tanh(self.film_layer_1_eta(task)) | |||
| delta_1 = torch.sigmoid(self.film_layer_1_delta(task)) | |||
| gamma_1 = gamma_1 * delta_1 + eta_1 * (1-delta_1) | |||
| beta_1 = beta_1 * delta_1 + eta_1 * (1-delta_1) | |||
| hidden_1 = torch.mul(hidden_1, gamma_1) + beta_1 | |||
| hidden_1 = self.dropout(hidden_1) | |||
| hidden_2 = F.relu(hidden_1) | |||
| hidden_2 = self.hidden_layer_2(hidden_2) | |||
| beta_2 = torch.tanh(self.film_layer_2_beta(task)) | |||
| gamma_2 = torch.tanh(self.film_layer_2_gamma(task)) | |||
| eta_2 = torch.tanh(self.film_layer_2_eta(task)) | |||
| delta_2 = torch.sigmoid(self.film_layer_2_delta(task)) | |||
| gamma_2 = gamma_2 * delta_2 + eta_2 * (1 - delta_2) | |||
| beta_2 = beta_2 * delta_2 + eta_2 * (1 - delta_2) | |||
| hidden_2 = torch.mul(hidden_2, gamma_2) + beta_2 | |||
| hidden_2 = self.dropout(hidden_2) | |||
| hidden_3 = F.relu(hidden_2) | |||
| hidden_3 = self.hidden_layer_3(hidden_3) | |||
| beta_3 = torch.tanh(self.film_layer_3_beta(task)) | |||
| gamma_3 = torch.tanh(self.film_layer_3_gamma(task)) | |||
| eta_3 = torch.tanh(self.film_layer_3_eta(task)) | |||
| delta_3 = torch.sigmoid(self.film_layer_3_delta(task)) | |||
| gamma_3 = gamma_3 * delta_3 + eta_3 * (1 - delta_3) | |||
| beta_3 = beta_3 * delta_3 + eta_3 * (1 - delta_3) | |||
| hidden_final = torch.mul(hidden_3, gamma_3) + beta_3 | |||
| hidden_final = self.dropout(hidden_final) | |||
| hidden_final = F.relu(hidden_final) | |||
| y_pred = self.final_projection(hidden_final) | |||
| return y_pred | |||
| @@ -0,0 +1,46 @@ | |||
| """ | |||
| Run evaluation with saved models. | |||
| """ | |||
| import random | |||
| import argparse | |||
| from tqdm import tqdm | |||
| import torch | |||
| from utils.scorer import * | |||
| def testing(trainer, opt, test_dataset): | |||
| test_dataset_len = len(test_dataset) | |||
| #batch_size = opt["batch_size"] | |||
| minibatch_size = 1 | |||
| a, b, c, d = zip(*test_dataset) | |||
| trainer.eval() | |||
| all_loss = 0 | |||
| pre5 = [] | |||
| ap5 = [] | |||
| ndcg5 = [] | |||
| pre7 = [] | |||
| ap7 = [] | |||
| ndcg7 = [] | |||
| pre10 = [] | |||
| ap10 = [] | |||
| ndcg10 = [] | |||
| for i in range(test_dataset_len): | |||
| try: | |||
| supp_xs = list(a[minibatch_size * i:minibatch_size * (i + 1)]) | |||
| supp_ys = list(b[minibatch_size * i:minibatch_size * (i + 1)]) | |||
| query_xs = list(c[minibatch_size * i:minibatch_size * (i + 1)]) | |||
| query_ys = list(d[minibatch_size * i:minibatch_size * (i + 1)]) | |||
| except IndexError: | |||
| continue | |||
| test_loss, recommendation_list = trainer.query_rec(supp_xs, supp_ys, query_xs, query_ys) | |||
| all_loss += test_loss | |||
| add_metric(recommendation_list, query_ys[0].cpu().detach().numpy(), pre5, ap5, ndcg5, 5) | |||
| add_metric(recommendation_list, query_ys[0].cpu().detach().numpy(), pre7, ap7, ndcg7, 7) | |||
| add_metric(recommendation_list, query_ys[0].cpu().detach().numpy(), pre10, ap10, ndcg10, 10) | |||
| mpre5, mndcg5, map5 = cal_metric(pre5, ap5, ndcg5) | |||
| mpre7, mndcg7, map7 = cal_metric(pre7, ap7, ndcg7) | |||
| mpre10, mndcg10, map10 = cal_metric(pre10, ap10, ndcg10) | |||
| return mpre5, mndcg5, map5, mpre7, mndcg7, map7, mpre10, mndcg10, map10 | |||
| @@ -0,0 +1,55 @@ | |||
| first_embedding_dim=32 | |||
| second_embedding_dim=16 | |||
| z1_dim=32 | |||
| z2_dim=32 | |||
| z_dim=32 | |||
| enc_h1_dim=32 | |||
| enc_h2_dim=16 | |||
| taskenc_h1_dim=32 | |||
| taskenc_h2_dim=32 | |||
| taskenc_final_dim=16 | |||
| clusters_k=10 | |||
| temperature=1.0 | |||
| lambda=1.0 | |||
| dec_h1_dim=32 | |||
| dec_h2_dim=32 | |||
| dec_h3_dim=16 | |||
| dropout_rate=0 | |||
| lr=0.0001 | |||
| optim='adam' | |||
| num_epoch=100 | |||
| batch_size=32 | |||
| train_ratio=0.7 | |||
| valid_ratio=0.1 | |||
| support_size=20 | |||
| query_size=10 | |||
| max_len=200 | |||
| context_min=20 | |||
| CUDA_VISIBLE_DEVICES=0 python train_TaNP.py \ | |||
| --first_embedding_dim $first_embedding_dim \ | |||
| --second_embedding_dim $second_embedding_dim \ | |||
| --z1_dim $z1_dim \ | |||
| --z2_dim $z2_dim \ | |||
| --z_dim $z_dim \ | |||
| --enc_h1_dim $enc_h1_dim \ | |||
| --enc_h2_dim $enc_h2_dim \ | |||
| --taskenc_h1_dim $taskenc_h1_dim \ | |||
| --taskenc_h2_dim $taskenc_h2_dim \ | |||
| --taskenc_final_dim $taskenc_final_dim \ | |||
| --clusters_k $clusters_k \ | |||
| --lambda $lambda \ | |||
| --temperature $temperature \ | |||
| --dec_h1_dim $dec_h1_dim \ | |||
| --dec_h2_dim $dec_h2_dim \ | |||
| --dec_h3_dim $dec_h3_dim \ | |||
| --lr $lr \ | |||
| --dropout_rate $dropout_rate \ | |||
| --optim $optim \ | |||
| --num_epoch $num_epoch \ | |||
| --batch_size $batch_size \ | |||
| --train_ratio $train_ratio \ | |||
| --valid_ratio $valid_ratio \ | |||
| --support_size $support_size \ | |||
| --query_size $query_size \ | |||
| --max_len $max_len \ | |||
| --context_min $context_min | |||
| @@ -0,0 +1,193 @@ | |||
| import os | |||
| from datetime import datetime | |||
| import time | |||
| import numpy as np | |||
| import random | |||
| import argparse | |||
| import pickle | |||
| import torch | |||
| import torch.nn as nn | |||
| import torch.optim as optim | |||
| from torch.autograd import Variable | |||
| import json | |||
| from utils.loader import Preprocess | |||
| from TaNP import Trainer | |||
| from TaNP_training import training | |||
| from utils import helper | |||
| from eval import testing | |||
| parser = argparse.ArgumentParser() | |||
| # parser.add_argument('--data_dir', type=str, default='data/lastfm_20')#1 | |||
| # parser.add_argument('--model_save_dir', type=str, default='save_model_dir')#1 | |||
| parser.add_argument('--data_dir', type=str, default='/media/external_10TB/10TB/maheri/melu_data')#1 | |||
| parser.add_argument('--model_save_dir', type=str, default='/media/external_10TB/10TB/maheri/tanp_data/tanp_models')#1 | |||
| parser.add_argument('--id', type=str, default='1', help='used for save hyper-parameters.')#1 | |||
| parser.add_argument('--first_embedding_dim', type=int, default=32, help='Embedding dimension for item and user.')#1 | |||
| parser.add_argument('--second_embedding_dim', type=int, default=16, help='Embedding dimension for item and user.')#1 | |||
| parser.add_argument('--z1_dim', type=int, default=32, help='The dimension of z1 in latent path.') | |||
| parser.add_argument('--z2_dim', type=int, default=32, help='The dimension of z2 in latent path.') | |||
| parser.add_argument('--z_dim', type=int, default=32, help='The dimension of z in latent path.') | |||
| parser.add_argument('--enc_h1_dim', type=int, default=64, help='The hidden first dimension of encoder.') | |||
| parser.add_argument('--enc_h2_dim', type=int, default=64, help='The hidden second dimension of encoder.') | |||
| parser.add_argument('--taskenc_h1_dim', type=int, default=128, help='The hidden first dimension of task encoder.') | |||
| parser.add_argument('--taskenc_h2_dim', type=int, default=64, help='The hidden second dimension of task encoder.') | |||
| parser.add_argument('--taskenc_final_dim', type=int, default=64, help='The hidden second dimension of task encoder.') | |||
| parser.add_argument('--clusters_k', type=int, default=7, help='Cluster numbers of tasks.') | |||
| parser.add_argument('--temperature', type=float, default=1.0, help='used for student-t distribution.') | |||
| parser.add_argument('--lambda', type=float, default=0.1, help='used to balance the clustering loss and NP loss.') | |||
| parser.add_argument('--dec_h1_dim', type=int, default=128, help='The hidden first dimension of encoder.') | |||
| parser.add_argument('--dec_h2_dim', type=int, default=128, help='The hidden second dimension of encoder.') | |||
| parser.add_argument('--dec_h3_dim', type=int, default=128, help='The hidden third dimension of encoder.') | |||
| # used for movie datasets | |||
| parser.add_argument('--num_gender', type=int, default=2, help='User information.')#1 | |||
| parser.add_argument('--num_age', type=int, default=7, help='User information.')#1 | |||
| parser.add_argument('--num_occupation', type=int, default=21, help='User information.')#1 | |||
| parser.add_argument('--num_zipcode', type=int, default=3402, help='User information.')#1 | |||
| parser.add_argument('--num_rate', type=int, default=6, help='Item information.')#1 | |||
| parser.add_argument('--num_genre', type=int, default=25, help='Item information.')#1 | |||
| parser.add_argument('--num_director', type=int, default=2186, help='Item information.')#1 | |||
| parser.add_argument('--num_actor', type=int, default=8030, help='Item information.')#1 | |||
| parser.add_argument('--dropout_rate', type=float, default=0, help='used in encoder and decoder.') | |||
| parser.add_argument('--lr', type=float, default=1e-4, help='Applies to SGD and Adagrad.')#1 | |||
| parser.add_argument('--optim', type=str, default='adam', help='sgd, adagrad, adam or adamax.') | |||
| parser.add_argument('--num_epoch', type=int, default=150)#1 | |||
| parser.add_argument('--batch_size', type=int, default=32)#1 | |||
| parser.add_argument('--train_ratio', type=float, default=0.7, help='Warm user ratio for training.')#1 | |||
| parser.add_argument('--valid_ratio', type=float, default=0.1, help='Cold user ratio for validation.')#1 | |||
| parser.add_argument('--seed', type=int, default=2020)#1 | |||
| parser.add_argument('--save', type=int, default=0)#1 | |||
| parser.add_argument('--use_cuda', type=bool, default=torch.cuda.is_available())#1 | |||
| parser.add_argument('--cpu', action='store_true', help='Ignore CUDA.')#1 | |||
| parser.add_argument('--support_size', type=int, default=20)#1 | |||
| parser.add_argument('--query_size', type=int, default=10)#1 | |||
| parser.add_argument('--max_len', type=int, default=200, help='The max length of interactions for each user.') | |||
| parser.add_argument('--context_min', type=int, default=20, help='Minimum size of context range.') | |||
| # change for Movie lens | |||
| parser.add_argument('--embedding_dim', type=int, default=32, help='embedding dimension for each item/user feature of Movie lens') | |||
| parser.add_argument('--first_fc_hidden_dim', type=int, default=64, help='embedding dimension for each item/user feature of Movie lens') | |||
| parser.add_argument('--second_fc_hidden_dim', type=int, default=64, help='embedding dimension for each item/user feature of Movie lens') | |||
| args = parser.parse_args() | |||
| def seed_everything(seed=1023): | |||
| random.seed(seed) | |||
| torch.manual_seed(seed) | |||
| torch.cuda.manual_seed_all(seed) | |||
| np.random.seed(seed) | |||
| os.environ['PYTHONHASHSEED'] = str(seed) | |||
| torch.backends.cudnn.deterministic = True | |||
| torch.backends.cudnn.benchmark = False | |||
| seed = args.seed | |||
| seed_everything(seed) | |||
| if args.cpu: | |||
| args.use_cuda = False | |||
| elif args.use_cuda: | |||
| torch.cuda.manual_seed(args.seed) | |||
| opt = vars(args) | |||
| # print model info | |||
| helper.print_config(opt) | |||
| helper.ensure_dir(opt["model_save_dir"], verbose=True) | |||
| # save model config | |||
| helper.save_config(opt, opt["model_save_dir"] + "/" +opt["id"] + '.config', verbose=True) | |||
| # record training log | |||
| file_logger = helper.FileLogger(opt["model_save_dir"] + '/' + opt['id'] + ".log", | |||
| header="# epoch\ttrain_loss\tprecision5\tNDCG5\tMAP5\tprecision7" | |||
| "\tNDCG7\tMAP7\tprecision10\tNDCG10\tMAP10") | |||
| # change for Movie Lens | |||
| # preprocess = Preprocess(opt) | |||
| print("Preprocess is done.") | |||
| print("Create model TaNP...") | |||
| # opt['uf_dim'] = preprocess.uf_dim | |||
| # opt['if_dim'] = preprocess.if_dim | |||
| trainer = Trainer(opt) | |||
| if opt['use_cuda']: | |||
| trainer.cuda() | |||
| model_filename = "{}/{}.pt".format(opt['model_save_dir'], opt["id"]) | |||
| # /4 since sup_x, sup_y, query_x, query_y | |||
| # change for Movie lens | |||
| # training_set_size = int(len(os.listdir("{}/{}/{}".format(opt["data_dir"], "training", "log"))) / 4) | |||
| training_set_size = int(len(os.listdir("{}/{}".format(opt["data_dir"], "warm_state"))) / 4) | |||
| supp_xs_s = [] | |||
| supp_ys_s = [] | |||
| query_xs_s = [] | |||
| query_ys_s = [] | |||
| for idx in range(training_set_size): | |||
| # supp_xs_s.append(pickle.load(open("{}/{}/{}/supp_x_{}.pkl".format(opt["data_dir"], "training", "log", idx), "rb"))) | |||
| # supp_ys_s.append(pickle.load(open("{}/{}/{}/supp_y_{}.pkl".format(opt["data_dir"], "training", "log", idx), "rb"))) | |||
| # query_xs_s.append(pickle.load(open("{}/{}/{}/query_x_{}.pkl".format(opt["data_dir"], "training", "log", idx), "rb"))) | |||
| # query_ys_s.append(pickle.load(open("{}/{}/{}/query_y_{}.pkl".format(opt["data_dir"], "training", "log", idx), "rb"))) | |||
| supp_xs_s.append(pickle.load(open("{}/{}/supp_x_{}.pkl".format(opt["data_dir"], "warm_state", idx), "rb"))) | |||
| supp_ys_s.append(pickle.load(open("{}/{}/supp_y_{}.pkl".format(opt["data_dir"], "warm_state", idx), "rb"))) | |||
| query_xs_s.append(pickle.load(open("{}/{}/query_x_{}.pkl".format(opt["data_dir"], "warm_state", idx), "rb"))) | |||
| query_ys_s.append(pickle.load(open("{}/{}/query_y_{}.pkl".format(opt["data_dir"], "warm_state", idx), "rb"))) | |||
| train_dataset = list(zip(supp_xs_s, supp_ys_s, query_xs_s, query_ys_s)) | |||
| del (supp_xs_s, supp_ys_s, query_xs_s, query_ys_s) | |||
| # change for Movie lens | |||
| # testing_set_size = int(len(os.listdir("{}/{}/{}".format(opt["data_dir"], "testing", "log"))) / 4) | |||
| testing_set_size = int(len(os.listdir("{}/{}".format(opt["data_dir"], "user_cold_state"))) / 4) | |||
| supp_xs_s = [] | |||
| supp_ys_s = [] | |||
| query_xs_s = [] | |||
| query_ys_s = [] | |||
| for idx in range(testing_set_size): | |||
| # change for Movie lens | |||
| # supp_xs_s.append( | |||
| # pickle.load(open("{}/{}/{}/supp_x_{}.pkl".format(opt["data_dir"], "testing", "log", idx), "rb"))) | |||
| # supp_ys_s.append( | |||
| # pickle.load(open("{}/{}/{}/supp_y_{}.pkl".format(opt["data_dir"], "testing", "log", idx), "rb"))) | |||
| # query_xs_s.append( | |||
| # pickle.load(open("{}/{}/{}/query_x_{}.pkl".format(opt["data_dir"], "testing", "log", idx), "rb"))) | |||
| # query_ys_s.append( | |||
| # pickle.load(open("{}/{}/{}/query_y_{}.pkl".format(opt["data_dir"], "testing", "log", idx), "rb"))) | |||
| supp_xs_s.append( | |||
| pickle.load(open("{}/{}/supp_x_{}.pkl".format(opt["data_dir"], "user_cold_state", idx), "rb"))) | |||
| supp_ys_s.append( | |||
| pickle.load(open("{}/{}/supp_y_{}.pkl".format(opt["data_dir"], "user_cold_state", idx), "rb"))) | |||
| query_xs_s.append( | |||
| pickle.load(open("{}/{}/query_x_{}.pkl".format(opt["data_dir"], "user_cold_state", idx), "rb"))) | |||
| query_ys_s.append( | |||
| pickle.load(open("{}/{}/query_y_{}.pkl".format(opt["data_dir"], "user_cold_state", idx), "rb"))) | |||
| test_dataset = list(zip(supp_xs_s, supp_ys_s, query_xs_s, query_ys_s)) | |||
| del (supp_xs_s, supp_ys_s, query_xs_s, query_ys_s) | |||
| print("# epoch\ttrain_loss\tprecision5\tNDCG5\tMAP5\tprecision7\tNDCG7\tMAP7\tprecision10\tNDCG10\tMAP10") | |||
| if not os.path.exists(model_filename): | |||
| print("Start training...") | |||
| training(trainer, opt, train_dataset, test_dataset, batch_size=opt['batch_size'], num_epoch=opt['num_epoch'], | |||
| model_save=opt["save"], model_filename=model_filename, logger=file_logger) | |||
| else: | |||
| print("Load pre-trained model...") | |||
| opt = helper.load_config(model_filename[:-2]+"config") | |||
| helper.print_config(opt) | |||
| trained_state_dict = torch.load(model_filename) | |||
| trainer.load_state_dict(trained_state_dict) | |||
| @@ -0,0 +1,74 @@ | |||
| """ | |||
| Helper functions. | |||
| """ | |||
| import os | |||
| import json | |||
| import argparse | |||
| ### IO | |||
| def check_dir(d): | |||
| if not os.path.exists(d): | |||
| print("Directory {} does not exist. Exit.".format(d)) | |||
| exit(1) | |||
| def check_files(files): | |||
| for f in files: | |||
| if f is not None and not os.path.exists(f): | |||
| print("File {} does not exist. Exit.".format(f)) | |||
| exit(1) | |||
| def ensure_dir(d, verbose=True): | |||
| if not os.path.exists(d): | |||
| if verbose: | |||
| print("Directory {} do not exist; creating...".format(d)) | |||
| os.makedirs(d) | |||
| def save_config(config, path, verbose=True): | |||
| with open(path, 'w') as outfile: | |||
| json.dump(config, outfile, indent=2) | |||
| if verbose: | |||
| print("Config saved to file {}".format(path)) | |||
| return config | |||
| def load_config(path, verbose=True): | |||
| with open(path) as f: | |||
| config = json.load(f) | |||
| if verbose: | |||
| print("Config loaded from file {}".format(path)) | |||
| return config | |||
| def print_config(config): | |||
| info = "Running with the following configs:\n" | |||
| for k, v in config.items(): | |||
| info += "\t{} : {}\n".format(k, str(v)) | |||
| print("\n" + info + "\n") | |||
| return | |||
| class FileLogger(object): | |||
| """ | |||
| A file logger that opens the file periodically and write to it. | |||
| """ | |||
| def __init__(self, filename, header=None): | |||
| self.filename = filename | |||
| if os.path.exists(filename): | |||
| # remove the old file | |||
| os.remove(filename) | |||
| if header is not None: | |||
| with open(filename, 'w') as out: | |||
| print(header, file=out) | |||
| def log(self, message): | |||
| with open(self.filename, 'a') as out: | |||
| print(message) | |||
| print(message, file=out) | |||
| @@ -0,0 +1,218 @@ | |||
| import json | |||
| import random | |||
| import torch | |||
| import numpy as np | |||
| import pickle | |||
| import codecs | |||
| import re | |||
| import os | |||
| import datetime | |||
| import tqdm | |||
| import pandas as pd | |||
| #convert userids to userdict key-id(int), val:onehot_vector(tensor) | |||
| #element in list is str type. | |||
| def to_onehot_dict(list): | |||
| dict={} | |||
| length = len(list) | |||
| for index, element in enumerate(list): | |||
| vector = torch.zeros(1, length).long() | |||
| element = int(element) | |||
| vector[:, element] = 1.0 | |||
| dict[element] = vector | |||
| return dict | |||
| def load_list(fname): | |||
| list_ = [] | |||
| with open(fname, encoding="utf-8") as f: | |||
| for line in f.readlines(): | |||
| list_.append(line.strip()) | |||
| return list_ | |||
| # used for merge dictionaries. | |||
| def merge_key(dict1, dict2): | |||
| res = {**dict1, **dict2} | |||
| return res | |||
| def merge_value(dict1, dict2): # merge and item_cold | |||
| for key, value in dict2.items(): | |||
| if key in dict1.keys(): | |||
| # if list(set(dict1[key]+value)) the final number of movies-1m is 1000205 | |||
| new_value = dict1[key]+value | |||
| dict1[key] = new_value | |||
| else: | |||
| print('Unexpected key.') | |||
| def count_values(dict): | |||
| count_val = 0 | |||
| for key, value in dict.items(): | |||
| count_val += len(value) | |||
| return count_val | |||
| def construct_dictionary(user_list, total_dict): | |||
| dict = {} | |||
| for i in range(len(user_list)): | |||
| dict[str(user_list[i])] = total_dict[str(user_list[i])] | |||
| return dict | |||
| class Preprocess(object): | |||
| """ | |||
| Preprocess the training, validation and test data. | |||
| Generate the episode-style data. | |||
| """ | |||
| def __init__(self, opt): | |||
| self.batch_size = opt["batch_size"] | |||
| self.opt = opt | |||
| # warm data ratio | |||
| self.train_ratio = opt['train_ratio'] | |||
| self.valid_ratio = opt['valid_ratio'] | |||
| self.test_ratio = 1 - self.train_ratio - self.valid_ratio | |||
| self.dataset_path = opt["data_dir"] | |||
| self.support_size = opt['support_size'] | |||
| self.query_size = opt['query_size'] | |||
| self.max_len = opt['max_len'] | |||
| # save one-hot dimension length | |||
| uf_dim, if_dim = self.preprocess(self.dataset_path) | |||
| self.uf_dim = uf_dim | |||
| self.if_dim = if_dim | |||
| def preprocess(self, dataset_path): | |||
| """ Preprocess the data and convert to ids. """ | |||
| #Create training-validation-test datasets | |||
| print('Create training, validation and test data from scratch!') | |||
| with open('./{}/interaction_dict_x.json'.format(dataset_path), 'r', encoding='utf-8') as f: | |||
| inter_dict_x = json.loads(f.read()) | |||
| with open('./{}/interaction_dict_y.json'.format(dataset_path), 'r', encoding='utf-8') as f: | |||
| inter_dict_y = json.loads(f.read()) | |||
| print('The size of total interactions is %d.' % (count_values(inter_dict_x))) # 42346 | |||
| assert count_values(inter_dict_x) == count_values(inter_dict_y) | |||
| with open('./{}/user_list.json'.format(dataset_path), 'r', encoding='utf-8') as f: | |||
| userids = json.loads(f.read()) | |||
| with open('./{}/item_list.json'.format(dataset_path), 'r', encoding='utf-8') as f: | |||
| itemids = json.loads(f.read()) | |||
| #userids = list(inter_dict_x.keys()) | |||
| random.shuffle(userids) | |||
| warm_user_size = int(len(userids) * self.train_ratio) | |||
| valid_user_size = int(len(userids) * self.valid_ratio) | |||
| warm_users = userids[:warm_user_size] | |||
| valid_users = userids[warm_user_size:warm_user_size+valid_user_size] | |||
| cold_users = userids[warm_user_size+valid_user_size:] | |||
| assert len(userids) == len(warm_users)+len(valid_users)+len(cold_users) | |||
| # Construct the training data dict | |||
| training_dict_x = construct_dictionary(warm_users, inter_dict_x) | |||
| training_dict_y = construct_dictionary(warm_users, inter_dict_y) | |||
| #Avoid the new items shown in test data in the case of cold user. | |||
| item_set = set() | |||
| for i in training_dict_x.values(): | |||
| i = set(i) | |||
| item_set = item_set.union(i) | |||
| # Construct one-hot dictionary | |||
| user_dict = to_onehot_dict(userids) | |||
| # only items contained in all data are encoded. | |||
| item_dict = to_onehot_dict(itemids) | |||
| # This part of data is not used, so we do not process it temporally. | |||
| valid_dict_x = construct_dictionary(valid_users, inter_dict_x) | |||
| valid_dict_y = construct_dictionary(valid_users, inter_dict_y) | |||
| assert count_values(valid_dict_x) == count_values(valid_dict_y) | |||
| test_dict_x = construct_dictionary(cold_users, inter_dict_x) | |||
| test_dict_y = construct_dictionary(cold_users, inter_dict_y) | |||
| assert count_values(test_dict_x) == count_values(test_dict_y) | |||
| print('Before delete new items in test data, test data has %d interactions.' % (count_values(test_dict_x))) | |||
| #Delete the new items in test data. | |||
| unseen_count = 0 | |||
| for key, value in test_dict_x.items(): | |||
| assert len(value) == len(test_dict_y[key]) | |||
| unseen_item_index = [index for index, i in enumerate(value) if i not in item_set] | |||
| unseen_count+=len(unseen_item_index) | |||
| if len(unseen_item_index) == 0: | |||
| continue | |||
| else: | |||
| new_value_x = [element for index, element in enumerate(value) if index not in unseen_item_index] | |||
| new_value_y = [test_dict_y[key][index] for index, element in enumerate(value) if index not in unseen_item_index] | |||
| test_dict_x[key] = new_value_x | |||
| test_dict_y[key] = new_value_y | |||
| print('After delete new items in test data, test data has %d interactions.' % (count_values(test_dict_x))) | |||
| assert count_values(test_dict_x) == count_values(test_dict_y) | |||
| print('The number of total unseen interactions is %d.' % (unseen_count)) | |||
| pickle.dump(training_dict_x, open("{}/training_dict_x_{:2f}.pkl".format(dataset_path, self.train_ratio), "wb")) | |||
| pickle.dump(training_dict_y, open("{}/training_dict_y_{:2f}.pkl".format(dataset_path, self.train_ratio), "wb")) | |||
| pickle.dump(valid_dict_x, open("{}/valid_dict_x_{:2f}.pkl".format(dataset_path, self.valid_ratio), "wb")) | |||
| pickle.dump(valid_dict_y, open("{}/valid_dict_y_{:2f}.pkl".format(dataset_path, self.valid_ratio), "wb")) | |||
| pickle.dump(test_dict_x, open("{}/test_dict_x_{:2f}.pkl".format(dataset_path, self.test_ratio), "wb")) | |||
| pickle.dump(test_dict_y, open("{}/test_dict_y_{:2f}.pkl".format(dataset_path, self.test_ratio), "wb")) | |||
| def generate_episodes(dict_x, dict_y, category, support_size, query_size, max_len, dir="log"): | |||
| idx = 0 | |||
| if not os.path.exists("{}/{}/{}".format(dataset_path, category, dir)): | |||
| os.makedirs("{}/{}/{}".format(dataset_path, category, dir)) | |||
| os.makedirs("{}/{}/{}".format(dataset_path, category, "evidence")) | |||
| for _, user_id in enumerate(dict_x.keys()): | |||
| u_id = int(user_id) | |||
| seen_music_len = len(dict_x[str(u_id)]) | |||
| indices = list(range(seen_music_len)) | |||
| # filter some users with their interactions, i.e., tasks | |||
| if seen_music_len < (support_size + query_size) or seen_music_len > max_len: | |||
| continue | |||
| random.shuffle(indices) | |||
| tmp_x = np.array(dict_x[str(u_id)]) | |||
| tmp_y = np.array(dict_y[str(u_id)]) | |||
| support_x_app = None | |||
| for m_id in tmp_x[indices[:support_size]]: | |||
| m_id = int(m_id) | |||
| tmp_x_converted = torch.cat((item_dict[m_id], user_dict[u_id]), 1) | |||
| try: | |||
| support_x_app = torch.cat((support_x_app, tmp_x_converted), 0) | |||
| except: | |||
| support_x_app = tmp_x_converted | |||
| query_x_app = None | |||
| for m_id in tmp_x[indices[support_size:]]: | |||
| m_id = int(m_id) | |||
| u_id = int(user_id) | |||
| tmp_x_converted = torch.cat((item_dict[m_id], user_dict[u_id]), 1) | |||
| try: | |||
| query_x_app = torch.cat((query_x_app, tmp_x_converted), 0) | |||
| except: | |||
| query_x_app = tmp_x_converted | |||
| support_y_app = torch.FloatTensor(tmp_y[indices[:support_size]]) | |||
| query_y_app = torch.FloatTensor(tmp_y[indices[support_size:]]) | |||
| pickle.dump(support_x_app, open("{}/{}/{}/supp_x_{}.pkl".format(dataset_path, category, dir, idx), "wb")) | |||
| pickle.dump(support_y_app, open("{}/{}/{}/supp_y_{}.pkl".format(dataset_path, category, dir, idx), "wb")) | |||
| pickle.dump(query_x_app, open("{}/{}/{}/query_x_{}.pkl".format(dataset_path, category, dir, idx), "wb")) | |||
| pickle.dump(query_y_app, open("{}/{}/{}/query_y_{}.pkl".format(dataset_path, category, dir, idx), "wb")) | |||
| # used for evidence candidate selection | |||
| with open("{}/{}/{}/supp_x_{}_u_m_ids.txt".format(dataset_path, category, "evidence", idx), "w") as f: | |||
| for m_id in tmp_x[indices[:support_size]]: | |||
| f.write("{}\t{}\n".format(u_id, m_id)) | |||
| with open("{}/{}/{}/query_x_{}_u_m_ids.txt".format(dataset_path, category, "evidence", idx), "w") as f: | |||
| for m_id in tmp_x[indices[support_size:]]: | |||
| f.write("{}\t{}\n".format(u_id, m_id)) | |||
| idx+=1 | |||
| print("Generate eposide data for training.") | |||
| generate_episodes(training_dict_x, training_dict_y, "training", self.support_size, self.query_size, self.max_len) | |||
| print("Generate eposide data for validation.") | |||
| generate_episodes(valid_dict_x, valid_dict_y, "validation", self.support_size, self.query_size, self.max_len) | |||
| print("Generate eposide data for testing.") | |||
| generate_episodes(test_dict_x, test_dict_y, "testing", self.support_size, self.query_size, self.max_len) | |||
| return len(userids), len(itemids) | |||
| @@ -0,0 +1,77 @@ | |||
| import math | |||
| def AP(ranked_list, ground_truth, topn): | |||
| hits, sum_precs = 0, 0.0 | |||
| t = [a for a in ground_truth] | |||
| t.sort(reverse=True) | |||
| t=t[:topn] | |||
| for i in range(topn): | |||
| id = ranked_list[i] | |||
| if ground_truth[id] in t: | |||
| hits += 1 | |||
| sum_precs += hits / (i+1.0) | |||
| t.remove(ground_truth[id]) | |||
| if hits > 0: | |||
| return sum_precs / topn | |||
| else: | |||
| return 0.0 | |||
| def RR(ranked_list, ground_truth,topn): | |||
| t = [a for a in ground_truth] | |||
| t.sort(reverse=True) | |||
| t = t[:topn] | |||
| for i in range(topn): | |||
| id = ranked_list[i] | |||
| if ground_truth[id] in t: | |||
| return 1 / (i + 1.0) | |||
| return 0 | |||
| def precision(ranked_list,ground_truth,topn): | |||
| t = [a for a in ground_truth] | |||
| t.sort(reverse=True) | |||
| t = t[:topn] | |||
| hits = 0 | |||
| for i in range(topn): | |||
| id = ranked_list[i] | |||
| if ground_truth[id] in t: | |||
| t.remove(ground_truth[id]) | |||
| hits += 1 | |||
| pre = hits/topn | |||
| return pre | |||
| def nDCG(ranked_list, ground_truth, topn): | |||
| dcg = 0 | |||
| idcg = IDCG(ground_truth, topn) | |||
| # print(ranked_list) | |||
| # input() | |||
| for i in range(topn): | |||
| id = ranked_list[i] | |||
| dcg += ((2 ** ground_truth[id]) -1)/ math.log(i+2, 2) | |||
| # print('dcg is ', dcg, " n is ", topn) | |||
| # print('idcg is ', idcg, " n is ", topn) | |||
| return dcg / idcg | |||
| def IDCG(ground_truth,topn): | |||
| t = [a for a in ground_truth] | |||
| t.sort(reverse=True) | |||
| idcg = 0 | |||
| for i in range(topn): | |||
| idcg += ((2**t[i]) - 1) / math.log(i+2, 2) | |||
| return idcg | |||
| def add_metric(recommend_list, ALL_group_list, precision_list, ap_list, ndcg_list, topn): | |||
| ndcg = nDCG(recommend_list, ALL_group_list, topn) | |||
| ap = AP(recommend_list, ALL_group_list, topn) | |||
| pre = precision(recommend_list, ALL_group_list, topn) | |||
| precision_list.append(pre) | |||
| ap_list.append(ap) | |||
| ndcg_list.append(ndcg) | |||
| def cal_metric(precision_list,ap_list,ndcg_list): | |||
| mpre = sum(precision_list) / len(precision_list) | |||
| map = sum(ap_list) / len(ap_list) | |||
| mndcg = sum(ndcg_list) / len(ndcg_list) | |||
| return mpre, mndcg, map | |||
| @@ -0,0 +1,168 @@ | |||
| """ | |||
| Utility functions for torch. | |||
| """ | |||
| import torch | |||
| from torch import nn, optim | |||
| from torch.optim.optimizer import Optimizer | |||
| ### class | |||
| class MyAdagrad(Optimizer): | |||
| """My modification of the Adagrad optimizer that allows to specify an initial | |||
| accumulater value. This mimics the behavior of the default Adagrad implementation | |||
| in Tensorflow. The default PyTorch Adagrad uses 0 for initial acculmulator value. | |||
| Arguments: | |||
| params (iterable): iterable of parameters to optimize or dicts defining | |||
| parameter groups | |||
| lr (float, optional): learning rate (default: 1e-2) | |||
| lr_decay (float, optional): learning rate decay (default: 0) | |||
| init_accu_value (float, optional): initial accumulater value. | |||
| weight_decay (float, optional): weight decay (L2 penalty) (default: 0) | |||
| """ | |||
| def __init__(self, params, lr=1e-2, lr_decay=0, init_accu_value=0.1, weight_decay=0): | |||
| defaults = dict(lr=lr, lr_decay=lr_decay, init_accu_value=init_accu_value, \ | |||
| weight_decay=weight_decay) | |||
| super(MyAdagrad, self).__init__(params, defaults) | |||
| for group in self.param_groups: | |||
| for p in group['params']: | |||
| state = self.state[p] | |||
| state['step'] = 0 | |||
| state['sum'] = torch.ones(p.data.size()).type_as(p.data) *\ | |||
| init_accu_value | |||
| def share_memory(self): | |||
| for group in self.param_groups: | |||
| for p in group['params']: | |||
| state = self.state[p] | |||
| state['sum'].share_memory_() | |||
| def step(self, closure=None): | |||
| """Performs a single optimization step. | |||
| Arguments: | |||
| closure (callable, optional): A closure that reevaluates the model | |||
| and returns the loss. | |||
| """ | |||
| loss = None | |||
| if closure is not None: | |||
| loss = closure() | |||
| for group in self.param_groups: | |||
| for p in group['params']: | |||
| if p.grad is None: | |||
| continue | |||
| grad = p.grad.data | |||
| state = self.state[p] | |||
| state['step'] += 1 | |||
| if group['weight_decay'] != 0: | |||
| if p.grad.data.is_sparse: | |||
| raise RuntimeError("weight_decay option is not compatible with sparse gradients ") | |||
| grad = grad.add(group['weight_decay'], p.data) | |||
| clr = group['lr'] / (1 + (state['step'] - 1) * group['lr_decay']) | |||
| if p.grad.data.is_sparse: | |||
| grad = grad.coalesce() # the update is non-linear so indices must be unique | |||
| grad_indices = grad._indices() | |||
| grad_values = grad._values() | |||
| size = torch.Size([x for x in grad.size()]) | |||
| def make_sparse(values): | |||
| constructor = type(p.grad.data) | |||
| if grad_indices.dim() == 0 or values.dim() == 0: | |||
| return constructor() | |||
| return constructor(grad_indices, values, size) | |||
| state['sum'].add_(make_sparse(grad_values.pow(2))) | |||
| std = state['sum']._sparse_mask(grad) | |||
| std_values = std._values().sqrt_().add_(1e-10) | |||
| p.data.add_(-clr, make_sparse(grad_values / std_values)) | |||
| else: | |||
| state['sum'].addcmul_(1, grad, grad) | |||
| std = state['sum'].sqrt().add_(1e-10) | |||
| p.data.addcdiv_(-clr, grad, std) | |||
| return loss | |||
| ### torch specific functions | |||
| def get_optimizer(name, parameters, lr, l2=0): | |||
| if name == 'sgd': | |||
| return torch.optim.SGD(parameters, lr=lr, weight_decay=l2) | |||
| elif name in ['adagrad', 'myadagrad']: | |||
| # use my own adagrad to allow for init accumulator value | |||
| return MyAdagrad(parameters, lr=lr, init_accu_value=0.1, weight_decay=l2) | |||
| elif name == 'adam': | |||
| return torch.optim.Adam(parameters, weight_decay=l2) # use default lr | |||
| elif name == 'adamax': | |||
| return torch.optim.Adamax(parameters, weight_decay=l2) # use default lr | |||
| elif name == 'adadelta': | |||
| return torch.optim.Adadelta(parameters, lr=lr, weight_decay=l2) | |||
| else: | |||
| raise Exception("Unsupported optimizer: {}".format(name)) | |||
| def change_lr(optimizer, new_lr): | |||
| for param_group in optimizer.param_groups: | |||
| param_group['lr'] = new_lr | |||
| def flatten_indices(seq_lens, width): | |||
| flat = [] | |||
| for i, l in enumerate(seq_lens): | |||
| for j in range(l): | |||
| flat.append(i * width + j) | |||
| return flat | |||
| def set_cuda(var, cuda): | |||
| if cuda: | |||
| return var.cuda() | |||
| return var | |||
| def keep_partial_grad(grad, topk): | |||
| """ | |||
| Keep only the topk rows of grads. | |||
| """ | |||
| assert topk < grad.size(0) | |||
| grad.data[topk:].zero_() | |||
| return grad | |||
| ### model IO | |||
| def save(model, optimizer, opt, filename): | |||
| params = { | |||
| 'model': model.state_dict(), | |||
| 'optimizer': optimizer.state_dict(), | |||
| 'config': opt | |||
| } | |||
| try: | |||
| torch.save(params, filename) | |||
| except BaseException: | |||
| print("[ Warning: model saving failed. ]") | |||
| def load(model, optimizer, filename): | |||
| try: | |||
| dump = torch.load(filename) | |||
| except BaseException: | |||
| print("[ Fail: model loading failed. ]") | |||
| if model is not None: | |||
| model.load_state_dict(dump['model']) | |||
| if optimizer is not None: | |||
| optimizer.load_state_dict(dump['optimizer']) | |||
| opt = dump['config'] | |||
| return model, optimizer, opt | |||
| def load_config(filename): | |||
| try: | |||
| dump = torch.load(filename) | |||
| except BaseException: | |||
| print("[ Fail: model loading failed. ]") | |||
| return dump['config'] | |||