Browse Source

changed for movie dataset

master
mohamad maheri 3 years ago
commit
094141de63
11 changed files with 1484 additions and 0 deletions
  1. 245
    0
      TaNP.py
  2. 36
    0
      TaNP_training.py
  3. 372
    0
      embeddings_TaNP.py
  4. 46
    0
      eval.py
  5. 0
    0
      test
  6. 55
    0
      train.sh
  7. 193
    0
      train_TaNP.py
  8. 74
    0
      utils/helper.py
  9. 218
    0
      utils/loader.py
  10. 77
    0
      utils/scorer.py
  11. 168
    0
      utils/torch_utils.py

+ 245
- 0
TaNP.py View File

import torch
import numpy as np
from random import randint
from copy import deepcopy
from torch.autograd import Variable
from torch.nn import functional as F
from collections import OrderedDict
from embeddings_TaNP import Item, User, Encoder, MuSigmaEncoder, Decoder, Gating_Decoder, TaskEncoder, MemoryUnit,Movie_item,Movie_user
import torch.nn as nn

class NP(nn.Module):
def __init__(self, config):
super(NP, self).__init__()

# change for Movie lens
# self.x_dim = config['second_embedding_dim'] * 2
self.x_dim = 32 * 8


# use one-hot or not?
self.y_dim = 1
self.z1_dim = config['z1_dim']
self.z2_dim = config['z2_dim']
# z is the dimension size of mu and sigma.
self.z_dim = config['z_dim']
# the dimension size of rc.
self.enc_h1_dim = config['enc_h1_dim']
self.enc_h2_dim = config['enc_h2_dim']

self.dec_h1_dim = config['dec_h1_dim']
self.dec_h2_dim = config['dec_h2_dim']
self.dec_h3_dim = config['dec_h3_dim']

self.taskenc_h1_dim = config['taskenc_h1_dim']
self.taskenc_h2_dim = config['taskenc_h2_dim']
self.taskenc_final_dim = config['taskenc_final_dim']

self.clusters_k = config['clusters_k']
self.temperture = config['temperature']
self.dropout_rate = config['dropout_rate']

# Initialize networks

# change for Movie Lens
# self.item_emb = Item(config)
# self.user_emb = User(config)
self.item_emb = Movie_item(config)
self.user_emb = Movie_user(config)

# This encoder is used to generated z actually, it is a latent encoder in ANP.
self.xy_to_z = Encoder(self.x_dim, self.y_dim, self.enc_h1_dim, self.enc_h2_dim, self.z1_dim, self.dropout_rate)
self.z_to_mu_sigma = MuSigmaEncoder(self.z1_dim, self.z2_dim, self.z_dim)
# This encoder is used to generated r actually, it is a deterministic encoder in ANP.
self.xy_to_task = TaskEncoder(self.x_dim, self.y_dim, self.taskenc_h1_dim, self.taskenc_h2_dim, self.taskenc_final_dim,
self.dropout_rate)
self.memoryunit = MemoryUnit(self.clusters_k, self.taskenc_final_dim, self.temperture)
#self.xz_to_y = Gating_Decoder(self.x_dim, self.z_dim, self.taskenc_final_dim, self.dec_h1_dim, self.dec_h2_dim, self.dec_h3_dim, self.y_dim, self.dropout_rate)
self.xz_to_y = Decoder(self.x_dim, self.z_dim, self.taskenc_final_dim, self.dec_h1_dim, self.dec_h2_dim, self.dec_h3_dim, self.y_dim, self.dropout_rate)

def aggregate(self, z_i):
return torch.mean(z_i, dim=0)

def xy_to_mu_sigma(self, x, y):
# Encode each point into a representation r_i
z_i = self.xy_to_z(x, y)
# Aggregate representations r_i into a single representation r
z = self.aggregate(z_i)
# Return parameters of distribution
return self.z_to_mu_sigma(z)

# embedding each (item, user) as the x for np
def embedding(self, x):
# change for Movie lens
rate_idx = Variable(x[:, 0], requires_grad=False)
genre_idx = Variable(x[:, 1:26], requires_grad=False)
director_idx = Variable(x[:, 26:2212], requires_grad=False)
actor_idx = Variable(x[:, 2212:10242], requires_grad=False)
gender_idx = Variable(x[:, 10242], requires_grad=False)
age_idx = Variable(x[:, 10243], requires_grad=False)
occupation_idx = Variable(x[:, 10244], requires_grad=False)
area_idx = Variable(x[:, 10245], requires_grad=False)
item_emb = self.item_emb(rate_idx, genre_idx, director_idx, actor_idx)
user_emb = self.user_emb(gender_idx, age_idx, occupation_idx, area_idx)
x = torch.cat((item_emb, user_emb), 1)
return x

def forward(self, x_context, y_context, x_target, y_target):
# change for Movie lens
x_context_embed = self.embedding(x_context)
x_target_embed = self.embedding(x_target)

if self.training:
# sigma is log_sigma actually
mu_target, sigma_target, z_target = self.xy_to_mu_sigma(x_target_embed, y_target)
mu_context, sigma_context, z_context = self.xy_to_mu_sigma(x_context_embed, y_context)
task = self.xy_to_task(x_context_embed, y_context)
mean_task = self.aggregate(task)
C_distribution, new_task_embed = self.memoryunit(mean_task)
p_y_pred = self.xz_to_y(x_target_embed, z_target, new_task_embed)
return p_y_pred, mu_target, sigma_target, mu_context, sigma_context, C_distribution
else:
mu_context, sigma_context, z_context = self.xy_to_mu_sigma(x_context_embed, y_context)
task = self.xy_to_task(x_context_embed, y_context)
mean_task = self.aggregate(task)
C_distribution, new_task_embed = self.memoryunit(mean_task)
p_y_pred = self.xz_to_y(x_target_embed, z_context, new_task_embed)
return p_y_pred


class Trainer(torch.nn.Module):
def __init__(self, config):
self.opt = config
super(Trainer, self).__init__()
self.use_cuda = config['use_cuda']
self.np = NP(self.opt)
self._lambda = config['lambda']
self.optimizer = torch.optim.Adam(self.np.parameters(), lr=config['lr'])

# our kl divergence
def kl_div(self, mu_target, logsigma_target, mu_context, logsigma_context):
target_sigma = torch.exp(logsigma_target)
context_sigma = torch.exp(logsigma_context)
kl_div = (logsigma_context - logsigma_target) - 0.5 + (((target_sigma ** 2) + (mu_target - mu_context) ** 2) / 2 * context_sigma ** 2)
#kl_div = (t.exp(posterior_var) + (posterior_mu-prior_mu) ** 2) / t.exp(prior_var) - 1. + (prior_var - posterior_var)
#kl_div = 0.5 * kl_div.sum()
kl_div = kl_div.sum()
return kl_div

# new kl divergence -- kl(st|sc)
def new_kl_div(self, prior_mu, prior_var, posterior_mu, posterior_var):
kl_div = (torch.exp(posterior_var) + (posterior_mu-prior_mu) ** 2) / torch.exp(prior_var) - 1. + (prior_var - posterior_var)
kl_div = 0.5 * kl_div.sum()
return kl_div

def loss(self, p_y_pred, y_target, mu_target, sigma_target, mu_context, sigma_context):
#print('p_y_pred size is ', p_y_pred.size())
regression_loss = F.mse_loss(p_y_pred, y_target.view(-1, 1))
#print('regession loss size is ', regression_loss.size())
# kl divergence between target and context
#print('regession_loss is ', regression_loss.item())
kl = self.new_kl_div(mu_context, sigma_context, mu_target, sigma_target)
#print('KL_loss is ', kl.item())
return regression_loss+kl

def context_target_split(self, support_set_x, support_set_y, query_set_x, query_set_y):
total_x = torch.cat((support_set_x, query_set_x), 0)
total_y = torch.cat((support_set_y, query_set_y), 0)
total_size = total_x.size(0)
context_min = self.opt['context_min']
context_max = self.opt['context_max']
extra_tar_min = self.opt['target_extra_min']
#here we simply use the total_size as the maximum of target size.
num_context = randint(context_min, context_max)
num_target = randint(extra_tar_min, total_size - num_context)
sampled = np.random.choice(total_size, num_context+num_target, replace=False)
x_context = total_x[sampled[:num_context], :]
y_context = total_y[sampled[:num_context]]
x_target = total_x[sampled, :]
y_target = total_y[sampled]
return x_context, y_context, x_target, y_target

def new_context_target_split(self, support_set_x, support_set_y, query_set_x, query_set_y):
total_x = torch.cat((support_set_x, query_set_x), 0)
total_y = torch.cat((support_set_y, query_set_y), 0)
total_size = total_x.size(0)
context_min = self.opt['context_min']

# change for Movie lens
context_min = min(context_min, total_size - 1)

num_context = np.random.randint(context_min, total_size)
num_target = np.random.randint(0, total_size - num_context)
sampled = np.random.choice(total_size, num_context+num_target, replace=False)
x_context = total_x[sampled[:num_context], :]
y_context = total_y[sampled[:num_context]]
x_target = total_x[sampled, :]
y_target = total_y[sampled]
return x_context, y_context, x_target, y_target

def global_update(self, support_set_xs, support_set_ys, query_set_xs, query_set_ys):
batch_sz = len(support_set_xs)
losses = []
C_distribs = []
if self.use_cuda:
for i in range(batch_sz):
support_set_xs[i] = support_set_xs[i].cuda()
support_set_ys[i] = support_set_ys[i].cuda()
query_set_xs[i] = query_set_xs[i].cuda()
query_set_ys[i] = query_set_ys[i].cuda()
for i in range(batch_sz):
x_context, y_context, x_target, y_target = self.new_context_target_split(support_set_xs[i], support_set_ys[i],
query_set_xs[i], query_set_ys[i])
# print("inja1: x_context_size:",x_context.size())
p_y_pred, mu_target, sigma_target, mu_context, sigma_context, C_distribution = self.np(x_context, y_context, x_target,
y_target)
C_distribs.append(C_distribution)
loss = self.loss(p_y_pred, y_target, mu_target, sigma_target, mu_context, sigma_context)
#print('Each task has loss: ', loss)
losses.append(loss)
# calculate target distribution for clustering in batch manner.
# batchsize * k
C_distribs = torch.stack(C_distribs)
# batchsize * k
C_distribs_sq = torch.pow(C_distribs, 2)
# 1*k
C_distribs_sum = torch.sum(C_distribs, dim=0, keepdim=True)
# batchsize * k
temp = C_distribs_sq / C_distribs_sum
# batchsize * 1
temp_sum = torch.sum(temp, dim=1, keepdim=True)
target_distribs = temp / temp_sum
# calculate the kl loss
clustering_loss = self._lambda * F.kl_div(C_distribs.log(), target_distribs, reduction='batchmean')
#print('The clustering loss is %.6f' % (clustering_loss.item()))
np_losses_mean = torch.stack(losses).mean(0)
total_loss = np_losses_mean + clustering_loss
self.optimizer.zero_grad()
total_loss.backward()
self.optimizer.step()
return total_loss.item(), C_distribs.cpu().detach().numpy()

def query_rec(self, support_set_xs, support_set_ys, query_set_xs, query_set_ys):
batch_sz = 1
# used for calculating the rmse.
losses_q = []
if self.use_cuda:
for i in range(batch_sz):
support_set_xs[i] = support_set_xs[i].cuda()
support_set_ys[i] = support_set_ys[i].cuda()
query_set_xs[i] = query_set_xs[i].cuda()
query_set_ys[i] = query_set_ys[i].cuda()
for i in range(batch_sz):
#query_set_y_pred = self.forward(support_set_xs[i], support_set_ys[i], query_set_xs[i], num_local_update)
query_set_y_pred = self.np(support_set_xs[i], support_set_ys[i], query_set_xs[i], query_set_ys[i])
# obtain the mean of gaussian distribution
#(interation_size, y_dim)
#query_set_y_pred = query_set_y_pred.loc.detach()
#print('test_y_pred size is ', query_set_y_pred.size())
loss_q = F.mse_loss(query_set_y_pred, query_set_ys[i].view(-1, 1))
losses_q.append(loss_q)
losses_q = torch.stack(losses_q).mean(0)
output_list, recommendation_list = query_set_y_pred.view(-1).sort(descending=True)
return losses_q.item(), recommendation_list



+ 36
- 0
TaNP_training.py View File

import os
import torch
import pickle
import random
from eval import testing

def training(trainer, opt, train_dataset, test_dataset, batch_size, num_epoch, model_save=True, model_filename=None, logger=None):
training_set_size = len(train_dataset)
for epoch in range(num_epoch):
random.shuffle(train_dataset)
num_batch = int(training_set_size / batch_size)
a, b, c, d = zip(*train_dataset)
trainer.train()
all_C_distribs = []
for i in range(num_batch):
try:
supp_xs = list(a[batch_size*i:batch_size*(i+1)])
supp_ys = list(b[batch_size*i:batch_size*(i+1)])
query_xs = list(c[batch_size*i:batch_size*(i+1)])
query_ys = list(d[batch_size*i:batch_size*(i+1)])
except IndexError:
continue
train_loss, batch_C_distribs = trainer.global_update(supp_xs, supp_ys, query_xs, query_ys)
all_C_distribs.append(batch_C_distribs)

P5, NDCG5, MAP5, P7, NDCG7, MAP7, P10, NDCG10, MAP10 = testing(trainer, opt, test_dataset)
logger.log(
"{}\t{:.6f}\t TOP-5 {:.4f}\t{:.4f}\t{:.4f}\t TOP-7: {:.4f}\t{:.4f}\t{:.4f}"
"\t TOP-10: {:.4f}\t{:.4f}\t{:.4f}".
format(epoch, train_loss, P5, NDCG5, MAP5, P7, NDCG7, MAP7, P10, NDCG10, MAP10))
if epoch == (num_epoch-1):
with open('output_att', 'wb') as fp:
pickle.dump(all_C_distribs, fp)

if model_save:
torch.save(trainer.state_dict(), model_filename)

+ 372
- 0
embeddings_TaNP.py View File

import torch
import torch.nn as nn
import torch.nn.init as init
import torch.nn.functional as F

class Item(torch.nn.Module):
def __init__(self, config):
super(Item, self).__init__()
self.feature_dim = config['if_dim']
self.first_embedding_dim = config['first_embedding_dim']
self.second_embedding_dim = config['second_embedding_dim']

self.first_embedding_layer = torch.nn.Linear(
in_features=self.feature_dim,
out_features=self.first_embedding_dim,
bias=True
)

self.second_embedding_layer = torch.nn.Linear(
in_features=self.first_embedding_dim,
out_features=self.second_embedding_dim,
bias=True
)

def forward(self, x, vars=None):
first_hidden = self.first_embedding_layer(x)
first_hidden = F.relu(first_hidden)
sec_hidden = self.second_embedding_layer(first_hidden)
return F.relu(sec_hidden)

class Movie_item(torch.nn.Module):
def __init__(self, config):
super(Movie_item, self).__init__()
self.num_rate = config['num_rate']
self.num_genre = config['num_genre']
self.num_director = config['num_director']
self.num_actor = config['num_actor']
self.embedding_dim = config['embedding_dim']

# change for Movie
self.feature_dim = 4 * self.embedding_dim

self.embedding_rate = torch.nn.Embedding(
num_embeddings=self.num_rate,
embedding_dim=self.embedding_dim
)
self.embedding_genre = torch.nn.Linear(
in_features=self.num_genre,
out_features=self.embedding_dim,
bias=False
)
self.embedding_director = torch.nn.Linear(
in_features=self.num_director,
out_features=self.embedding_dim,
bias=False
)
self.embedding_actor = torch.nn.Linear(
in_features=self.num_actor,
out_features=self.embedding_dim,
bias=False
)

def forward(self, rate_idx, genre_idx, director_idx, actors_idx, vars=None):
rate_emb = self.embedding_rate(rate_idx)
genre_emb = self.embedding_genre(genre_idx.float()) / torch.sum(genre_idx.float(), 1).view(-1, 1)
director_emb = self.embedding_director(director_idx.float()) / torch.sum(director_idx.float(), 1).view(-1, 1)
actors_emb = self.embedding_actor(actors_idx.float()) / torch.sum(actors_idx.float(), 1).view(-1, 1)
return torch.cat((rate_emb, genre_emb, director_emb, actors_emb), 1)

class User(torch.nn.Module):
def __init__(self, config):
super(User, self).__init__()
self.feature_dim = config['uf_dim']
self.first_embedding_dim = config['first_embedding_dim']
self.second_embedding_dim = config['second_embedding_dim']

self.first_embedding_layer = torch.nn.Linear(
in_features=self.feature_dim,
out_features=self.first_embedding_dim,
bias=True
)

self.second_embedding_layer = torch.nn.Linear(
in_features=self.first_embedding_dim,
out_features=self.second_embedding_dim,
bias=True
)

def forward(self, x, vars=None):
first_hidden = self.first_embedding_layer(x)
first_hidden = F.relu(first_hidden)
sec_hidden = self.second_embedding_layer(first_hidden)
return F.relu(sec_hidden)

class Movie_user(torch.nn.Module):
def __init__(self, config):
super(Movie_user, self).__init__()
self.num_gender = config['num_gender']
self.num_age = config['num_age']
self.num_occupation = config['num_occupation']
self.num_zipcode = config['num_zipcode']
self.embedding_dim = config['embedding_dim']

self.embedding_gender = torch.nn.Embedding(
num_embeddings=self.num_gender,
embedding_dim=self.embedding_dim
)

self.embedding_age = torch.nn.Embedding(
num_embeddings=self.num_age,
embedding_dim=self.embedding_dim
)

self.embedding_occupation = torch.nn.Embedding(
num_embeddings=self.num_occupation,
embedding_dim=self.embedding_dim
)

self.embedding_area = torch.nn.Embedding(
num_embeddings=self.num_zipcode,
embedding_dim=self.embedding_dim
)

def forward(self, gender_idx, age_idx, occupation_idx, area_idx):
gender_emb = self.embedding_gender(gender_idx)
age_emb = self.embedding_age(age_idx)
occupation_emb = self.embedding_occupation(occupation_idx)
area_emb = self.embedding_area(area_idx)
return torch.cat((gender_emb, age_emb, occupation_emb, area_emb), 1)
class Encoder(nn.Module):
#Maps an (x_i, y_i) pair to a representation r_i.
# Add the dropout into encoder ---03.31
def __init__(self, x_dim, y_dim, h1_dim, h2_dim, z1_dim, dropout_rate):
super(Encoder, self).__init__()

self.x_dim = x_dim
self.y_dim = y_dim
self.h1_dim = h1_dim
self.h2_dim = h2_dim
self.z1_dim = z1_dim
self.dropout_rate = dropout_rate

layers = [nn.Linear(self.x_dim + self.y_dim, self.h1_dim),
torch.nn.Dropout(self.dropout_rate),
nn.ReLU(inplace=True),
nn.Linear(self.h1_dim, self.h2_dim),
torch.nn.Dropout(self.dropout_rate),
nn.ReLU(inplace=True),
nn.Linear(self.h2_dim, self.z1_dim)]

self.input_to_hidden = nn.Sequential(*layers)

def forward(self, x, y):
y = y.view(-1, 1)
input_pairs = torch.cat((x, y), dim=1)
return self.input_to_hidden(input_pairs)


class MuSigmaEncoder(nn.Module):
def __init__(self, z1_dim, z2_dim, z_dim):
super(MuSigmaEncoder, self).__init__()

self.z1_dim = z1_dim
self.z2_dim = z2_dim
self.z_dim = z_dim
self.z_to_hidden = nn.Linear(self.z1_dim, self.z2_dim)
self.hidden_to_mu = nn.Linear(self.z2_dim, z_dim)
self.hidden_to_logsigma = nn.Linear(self.z2_dim, z_dim)

def forward(self, z_input):
hidden = torch.relu(self.z_to_hidden(z_input))
mu = self.hidden_to_mu(hidden)
log_sigma = self.hidden_to_logsigma(hidden)
std = torch.exp(0.5 * log_sigma)
eps = torch.randn_like(std)
z = eps.mul(std).add_(mu)
return mu, log_sigma, z

class TaskEncoder(nn.Module):
def __init__(self, x_dim, y_dim, h1_dim, h2_dim, final_dim, dropout_rate):
super(TaskEncoder, self).__init__()
self.x_dim = x_dim
self.y_dim = y_dim
self.h1_dim = h1_dim
self.h2_dim = h2_dim
self.final_dim = final_dim
self.dropout_rate = dropout_rate
layers = [nn.Linear(self.x_dim + self.y_dim, self.h1_dim),
torch.nn.Dropout(self.dropout_rate),
nn.ReLU(inplace=True),
nn.Linear(self.h1_dim, self.h2_dim),
torch.nn.Dropout(self.dropout_rate),
nn.ReLU(inplace=True),
nn.Linear(self.h2_dim, self.final_dim)]

self.input_to_hidden = nn.Sequential(*layers)

def forward(self, x, y):
y = y.view(-1, 1)
input_pairs = torch.cat((x, y), dim=1)
return self.input_to_hidden(input_pairs)

class MemoryUnit(nn.Module):
# clusters_k is k keys
def __init__(self, clusters_k, emb_size, temperature):
super(MemoryUnit, self).__init__()
self.clusters_k = clusters_k
self.embed_size = emb_size
self.temperature = temperature
self.array = nn.Parameter(init.xavier_uniform_(torch.FloatTensor(self.clusters_k, self.embed_size)))

def forward(self, task_embed):
res = torch.norm(task_embed-self.array, p=2, dim=1, keepdim=True)
res = torch.pow((res / self.temperature) + 1, (self.temperature + 1) / -2)
# 1*k
C = torch.transpose(res / res.sum(), 0, 1)
# 1*k, k*d, 1*d
value = torch.mm(C, self.array)
# simple add operation
new_task_embed = value + task_embed
# calculate target distribution
return C, new_task_embed


class Decoder(nn.Module):
"""
Maps target input x_target and z, r to predictions y_target.
"""
def __init__(self, x_dim, z_dim, task_dim, h1_dim, h2_dim, h3_dim, y_dim, dropout_rate):
super(Decoder, self).__init__()
self.x_dim = x_dim
self.z_dim = z_dim
self.task_dim = task_dim
self.h1_dim = h1_dim
self.h2_dim = h2_dim
self.h3_dim = h3_dim
self.y_dim = y_dim
self.dropout_rate = dropout_rate
self.dropout = nn.Dropout(self.dropout_rate)

self.hidden_layer_1 = nn.Linear(self.x_dim + self.z_dim, self.h1_dim)
self.hidden_layer_2 = nn.Linear(self.h1_dim, self.h2_dim)
self.hidden_layer_3 = nn.Linear(self.h2_dim, self.h3_dim)

self.film_layer_1_beta = nn.Linear(self.task_dim, self.h1_dim, bias=False)
self.film_layer_1_gamma = nn.Linear(self.task_dim, self.h1_dim, bias=False)
self.film_layer_2_beta = nn.Linear(self.task_dim, self.h2_dim, bias=False)
self.film_layer_2_gamma = nn.Linear(self.task_dim, self.h2_dim, bias=False)
self.film_layer_3_beta = nn.Linear(self.task_dim, self.h3_dim, bias=False)
self.film_layer_3_gamma = nn.Linear(self.task_dim, self.h3_dim, bias=False)

self.final_projection = nn.Linear(self.h3_dim, self.y_dim)

def forward(self, x, z, task):
interaction_size, _ = x.size()
z = z.unsqueeze(0).repeat(interaction_size, 1)
# Input is concatenation of z with every row of x
inputs = torch.cat((x, z), dim=1)
hidden_1 = self.hidden_layer_1(inputs)
beta_1 = torch.tanh(self.film_layer_1_beta(task))
gamma_1 = torch.tanh(self.film_layer_1_gamma(task))
hidden_1 = torch.mul(hidden_1, gamma_1) + beta_1
hidden_1 = self.dropout(hidden_1)
hidden_2 = F.relu(hidden_1)

hidden_2 = self.hidden_layer_2(hidden_2)
beta_2 = torch.tanh(self.film_layer_2_beta(task))
gamma_2 = torch.tanh(self.film_layer_2_gamma(task))
hidden_2 = torch.mul(hidden_2, gamma_2) + beta_2
hidden_2 = self.dropout(hidden_2)
hidden_3 = F.relu(hidden_2)

hidden_3 = self.hidden_layer_3(hidden_3)
beta_3 = torch.tanh(self.film_layer_3_beta(task))
gamma_3 = torch.tanh(self.film_layer_3_gamma(task))
hidden_final = torch.mul(hidden_3, gamma_3) + beta_3
hidden_final = self.dropout(hidden_final)
hidden_final = F.relu(hidden_final)

y_pred = self.final_projection(hidden_final)
return y_pred



class Gating_Decoder(nn.Module):

def __init__(self, x_dim, z_dim, task_dim, h1_dim, h2_dim, h3_dim, y_dim, dropout_rate):
super(Gating_Decoder, self).__init__()
self.x_dim = x_dim
self.z_dim = z_dim
self.task_dim = task_dim
self.h1_dim = h1_dim
self.h2_dim = h2_dim
self.h3_dim = h3_dim
self.y_dim = y_dim
self.dropout_rate = dropout_rate
self.dropout = nn.Dropout(self.dropout_rate)

self.hidden_layer_1 = nn.Linear(self.x_dim + self.z_dim, self.h1_dim)
self.hidden_layer_2 = nn.Linear(self.h1_dim, self.h2_dim)
self.hidden_layer_3 = nn.Linear(self.h2_dim, self.h3_dim)

self.film_layer_1_beta = nn.Linear(self.task_dim, self.h1_dim, bias=False)
self.film_layer_1_gamma = nn.Linear(self.task_dim, self.h1_dim, bias=False)
self.film_layer_1_eta = nn.Linear(self.task_dim, self.h1_dim, bias=False)
self.film_layer_1_delta = nn.Linear(self.task_dim, self.h1_dim, bias=False)

self.film_layer_2_beta = nn.Linear(self.task_dim, self.h2_dim, bias=False)
self.film_layer_2_gamma = nn.Linear(self.task_dim, self.h2_dim, bias=False)
self.film_layer_2_eta = nn.Linear(self.task_dim, self.h2_dim, bias=False)
self.film_layer_2_delta = nn.Linear(self.task_dim, self.h2_dim, bias=False)


self.film_layer_3_beta = nn.Linear(self.task_dim, self.h3_dim, bias=False)
self.film_layer_3_gamma = nn.Linear(self.task_dim, self.h3_dim, bias=False)
self.film_layer_3_eta = nn.Linear(self.task_dim, self.h3_dim, bias=False)
self.film_layer_3_delta = nn.Linear(self.task_dim, self.h3_dim, bias=False)


self.final_projection = nn.Linear(self.h3_dim, self.y_dim)

def forward(self, x, z, task):
interaction_size, _ = x.size()
z = z.unsqueeze(0).repeat(interaction_size, 1)
# Input is concatenation of z with every row of x
inputs = torch.cat((x, z), dim=1)
hidden_1 = self.hidden_layer_1(inputs)
beta_1 = torch.tanh(self.film_layer_1_beta(task))
gamma_1 = torch.tanh(self.film_layer_1_gamma(task))
eta_1 = torch.tanh(self.film_layer_1_eta(task))
delta_1 = torch.sigmoid(self.film_layer_1_delta(task))

gamma_1 = gamma_1 * delta_1 + eta_1 * (1-delta_1)
beta_1 = beta_1 * delta_1 + eta_1 * (1-delta_1)

hidden_1 = torch.mul(hidden_1, gamma_1) + beta_1
hidden_1 = self.dropout(hidden_1)
hidden_2 = F.relu(hidden_1)

hidden_2 = self.hidden_layer_2(hidden_2)
beta_2 = torch.tanh(self.film_layer_2_beta(task))
gamma_2 = torch.tanh(self.film_layer_2_gamma(task))
eta_2 = torch.tanh(self.film_layer_2_eta(task))
delta_2 = torch.sigmoid(self.film_layer_2_delta(task))

gamma_2 = gamma_2 * delta_2 + eta_2 * (1 - delta_2)
beta_2 = beta_2 * delta_2 + eta_2 * (1 - delta_2)


hidden_2 = torch.mul(hidden_2, gamma_2) + beta_2
hidden_2 = self.dropout(hidden_2)
hidden_3 = F.relu(hidden_2)
hidden_3 = self.hidden_layer_3(hidden_3)
beta_3 = torch.tanh(self.film_layer_3_beta(task))
gamma_3 = torch.tanh(self.film_layer_3_gamma(task))
eta_3 = torch.tanh(self.film_layer_3_eta(task))
delta_3 = torch.sigmoid(self.film_layer_3_delta(task))

gamma_3 = gamma_3 * delta_3 + eta_3 * (1 - delta_3)
beta_3 = beta_3 * delta_3 + eta_3 * (1 - delta_3)

hidden_final = torch.mul(hidden_3, gamma_3) + beta_3
hidden_final = self.dropout(hidden_final)
hidden_final = F.relu(hidden_final)

y_pred = self.final_projection(hidden_final)
return y_pred

+ 46
- 0
eval.py View File

"""
Run evaluation with saved models.
"""
import random
import argparse
from tqdm import tqdm
import torch
from utils.scorer import *


def testing(trainer, opt, test_dataset):
test_dataset_len = len(test_dataset)
#batch_size = opt["batch_size"]
minibatch_size = 1
a, b, c, d = zip(*test_dataset)
trainer.eval()
all_loss = 0
pre5 = []
ap5 = []
ndcg5 = []
pre7 = []
ap7 = []
ndcg7 = []
pre10 = []
ap10 = []
ndcg10 = []
for i in range(test_dataset_len):
try:
supp_xs = list(a[minibatch_size * i:minibatch_size * (i + 1)])
supp_ys = list(b[minibatch_size * i:minibatch_size * (i + 1)])
query_xs = list(c[minibatch_size * i:minibatch_size * (i + 1)])
query_ys = list(d[minibatch_size * i:minibatch_size * (i + 1)])
except IndexError:
continue
test_loss, recommendation_list = trainer.query_rec(supp_xs, supp_ys, query_xs, query_ys)
all_loss += test_loss

add_metric(recommendation_list, query_ys[0].cpu().detach().numpy(), pre5, ap5, ndcg5, 5)
add_metric(recommendation_list, query_ys[0].cpu().detach().numpy(), pre7, ap7, ndcg7, 7)
add_metric(recommendation_list, query_ys[0].cpu().detach().numpy(), pre10, ap10, ndcg10, 10)

mpre5, mndcg5, map5 = cal_metric(pre5, ap5, ndcg5)
mpre7, mndcg7, map7 = cal_metric(pre7, ap7, ndcg7)
mpre10, mndcg10, map10 = cal_metric(pre10, ap10, ndcg10)

return mpre5, mndcg5, map5, mpre7, mndcg7, map7, mpre10, mndcg10, map10

+ 0
- 0
test View File


+ 55
- 0
train.sh View File

first_embedding_dim=32
second_embedding_dim=16
z1_dim=32
z2_dim=32
z_dim=32
enc_h1_dim=32
enc_h2_dim=16
taskenc_h1_dim=32
taskenc_h2_dim=32
taskenc_final_dim=16
clusters_k=10
temperature=1.0
lambda=1.0
dec_h1_dim=32
dec_h2_dim=32
dec_h3_dim=16
dropout_rate=0
lr=0.0001
optim='adam'
num_epoch=100
batch_size=32
train_ratio=0.7
valid_ratio=0.1
support_size=20
query_size=10
max_len=200
context_min=20
CUDA_VISIBLE_DEVICES=0 python train_TaNP.py \
--first_embedding_dim $first_embedding_dim \
--second_embedding_dim $second_embedding_dim \
--z1_dim $z1_dim \
--z2_dim $z2_dim \
--z_dim $z_dim \
--enc_h1_dim $enc_h1_dim \
--enc_h2_dim $enc_h2_dim \
--taskenc_h1_dim $taskenc_h1_dim \
--taskenc_h2_dim $taskenc_h2_dim \
--taskenc_final_dim $taskenc_final_dim \
--clusters_k $clusters_k \
--lambda $lambda \
--temperature $temperature \
--dec_h1_dim $dec_h1_dim \
--dec_h2_dim $dec_h2_dim \
--dec_h3_dim $dec_h3_dim \
--lr $lr \
--dropout_rate $dropout_rate \
--optim $optim \
--num_epoch $num_epoch \
--batch_size $batch_size \
--train_ratio $train_ratio \
--valid_ratio $valid_ratio \
--support_size $support_size \
--query_size $query_size \
--max_len $max_len \
--context_min $context_min

+ 193
- 0
train_TaNP.py View File

import os
from datetime import datetime
import time
import numpy as np
import random
import argparse
import pickle
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
import json
from utils.loader import Preprocess
from TaNP import Trainer
from TaNP_training import training
from utils import helper
from eval import testing

parser = argparse.ArgumentParser()
# parser.add_argument('--data_dir', type=str, default='data/lastfm_20')#1
# parser.add_argument('--model_save_dir', type=str, default='save_model_dir')#1
parser.add_argument('--data_dir', type=str, default='/media/external_10TB/10TB/maheri/melu_data')#1
parser.add_argument('--model_save_dir', type=str, default='/media/external_10TB/10TB/maheri/tanp_data/tanp_models')#1

parser.add_argument('--id', type=str, default='1', help='used for save hyper-parameters.')#1

parser.add_argument('--first_embedding_dim', type=int, default=32, help='Embedding dimension for item and user.')#1
parser.add_argument('--second_embedding_dim', type=int, default=16, help='Embedding dimension for item and user.')#1

parser.add_argument('--z1_dim', type=int, default=32, help='The dimension of z1 in latent path.')
parser.add_argument('--z2_dim', type=int, default=32, help='The dimension of z2 in latent path.')
parser.add_argument('--z_dim', type=int, default=32, help='The dimension of z in latent path.')

parser.add_argument('--enc_h1_dim', type=int, default=64, help='The hidden first dimension of encoder.')
parser.add_argument('--enc_h2_dim', type=int, default=64, help='The hidden second dimension of encoder.')

parser.add_argument('--taskenc_h1_dim', type=int, default=128, help='The hidden first dimension of task encoder.')
parser.add_argument('--taskenc_h2_dim', type=int, default=64, help='The hidden second dimension of task encoder.')
parser.add_argument('--taskenc_final_dim', type=int, default=64, help='The hidden second dimension of task encoder.')

parser.add_argument('--clusters_k', type=int, default=7, help='Cluster numbers of tasks.')
parser.add_argument('--temperature', type=float, default=1.0, help='used for student-t distribution.')
parser.add_argument('--lambda', type=float, default=0.1, help='used to balance the clustering loss and NP loss.')

parser.add_argument('--dec_h1_dim', type=int, default=128, help='The hidden first dimension of encoder.')
parser.add_argument('--dec_h2_dim', type=int, default=128, help='The hidden second dimension of encoder.')
parser.add_argument('--dec_h3_dim', type=int, default=128, help='The hidden third dimension of encoder.')

# used for movie datasets
parser.add_argument('--num_gender', type=int, default=2, help='User information.')#1
parser.add_argument('--num_age', type=int, default=7, help='User information.')#1
parser.add_argument('--num_occupation', type=int, default=21, help='User information.')#1
parser.add_argument('--num_zipcode', type=int, default=3402, help='User information.')#1
parser.add_argument('--num_rate', type=int, default=6, help='Item information.')#1
parser.add_argument('--num_genre', type=int, default=25, help='Item information.')#1
parser.add_argument('--num_director', type=int, default=2186, help='Item information.')#1
parser.add_argument('--num_actor', type=int, default=8030, help='Item information.')#1

parser.add_argument('--dropout_rate', type=float, default=0, help='used in encoder and decoder.')
parser.add_argument('--lr', type=float, default=1e-4, help='Applies to SGD and Adagrad.')#1
parser.add_argument('--optim', type=str, default='adam', help='sgd, adagrad, adam or adamax.')
parser.add_argument('--num_epoch', type=int, default=150)#1
parser.add_argument('--batch_size', type=int, default=32)#1
parser.add_argument('--train_ratio', type=float, default=0.7, help='Warm user ratio for training.')#1
parser.add_argument('--valid_ratio', type=float, default=0.1, help='Cold user ratio for validation.')#1
parser.add_argument('--seed', type=int, default=2020)#1
parser.add_argument('--save', type=int, default=0)#1
parser.add_argument('--use_cuda', type=bool, default=torch.cuda.is_available())#1
parser.add_argument('--cpu', action='store_true', help='Ignore CUDA.')#1
parser.add_argument('--support_size', type=int, default=20)#1
parser.add_argument('--query_size', type=int, default=10)#1
parser.add_argument('--max_len', type=int, default=200, help='The max length of interactions for each user.')
parser.add_argument('--context_min', type=int, default=20, help='Minimum size of context range.')


# change for Movie lens
parser.add_argument('--embedding_dim', type=int, default=32, help='embedding dimension for each item/user feature of Movie lens')
parser.add_argument('--first_fc_hidden_dim', type=int, default=64, help='embedding dimension for each item/user feature of Movie lens')
parser.add_argument('--second_fc_hidden_dim', type=int, default=64, help='embedding dimension for each item/user feature of Movie lens')

args = parser.parse_args()

def seed_everything(seed=1023):
random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
np.random.seed(seed)
os.environ['PYTHONHASHSEED'] = str(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

seed = args.seed
seed_everything(seed)

if args.cpu:
args.use_cuda = False
elif args.use_cuda:
torch.cuda.manual_seed(args.seed)

opt = vars(args)

# print model info
helper.print_config(opt)
helper.ensure_dir(opt["model_save_dir"], verbose=True)
# save model config
helper.save_config(opt, opt["model_save_dir"] + "/" +opt["id"] + '.config', verbose=True)
# record training log
file_logger = helper.FileLogger(opt["model_save_dir"] + '/' + opt['id'] + ".log",
header="# epoch\ttrain_loss\tprecision5\tNDCG5\tMAP5\tprecision7"
"\tNDCG7\tMAP7\tprecision10\tNDCG10\tMAP10")

# change for Movie Lens
# preprocess = Preprocess(opt)
print("Preprocess is done.")
print("Create model TaNP...")

# opt['uf_dim'] = preprocess.uf_dim
# opt['if_dim'] = preprocess.if_dim

trainer = Trainer(opt)
if opt['use_cuda']:
trainer.cuda()

model_filename = "{}/{}.pt".format(opt['model_save_dir'], opt["id"])

# /4 since sup_x, sup_y, query_x, query_y

# change for Movie lens
# training_set_size = int(len(os.listdir("{}/{}/{}".format(opt["data_dir"], "training", "log"))) / 4)
training_set_size = int(len(os.listdir("{}/{}".format(opt["data_dir"], "warm_state"))) / 4)

supp_xs_s = []
supp_ys_s = []
query_xs_s = []
query_ys_s = []
for idx in range(training_set_size):
# supp_xs_s.append(pickle.load(open("{}/{}/{}/supp_x_{}.pkl".format(opt["data_dir"], "training", "log", idx), "rb")))
# supp_ys_s.append(pickle.load(open("{}/{}/{}/supp_y_{}.pkl".format(opt["data_dir"], "training", "log", idx), "rb")))
# query_xs_s.append(pickle.load(open("{}/{}/{}/query_x_{}.pkl".format(opt["data_dir"], "training", "log", idx), "rb")))
# query_ys_s.append(pickle.load(open("{}/{}/{}/query_y_{}.pkl".format(opt["data_dir"], "training", "log", idx), "rb")))
supp_xs_s.append(pickle.load(open("{}/{}/supp_x_{}.pkl".format(opt["data_dir"], "warm_state", idx), "rb")))
supp_ys_s.append(pickle.load(open("{}/{}/supp_y_{}.pkl".format(opt["data_dir"], "warm_state", idx), "rb")))
query_xs_s.append(pickle.load(open("{}/{}/query_x_{}.pkl".format(opt["data_dir"], "warm_state", idx), "rb")))
query_ys_s.append(pickle.load(open("{}/{}/query_y_{}.pkl".format(opt["data_dir"], "warm_state", idx), "rb")))

train_dataset = list(zip(supp_xs_s, supp_ys_s, query_xs_s, query_ys_s))

del (supp_xs_s, supp_ys_s, query_xs_s, query_ys_s)

# change for Movie lens
# testing_set_size = int(len(os.listdir("{}/{}/{}".format(opt["data_dir"], "testing", "log"))) / 4)
testing_set_size = int(len(os.listdir("{}/{}".format(opt["data_dir"], "user_cold_state"))) / 4)

supp_xs_s = []
supp_ys_s = []
query_xs_s = []
query_ys_s = []
for idx in range(testing_set_size):
# change for Movie lens
# supp_xs_s.append(
# pickle.load(open("{}/{}/{}/supp_x_{}.pkl".format(opt["data_dir"], "testing", "log", idx), "rb")))
# supp_ys_s.append(
# pickle.load(open("{}/{}/{}/supp_y_{}.pkl".format(opt["data_dir"], "testing", "log", idx), "rb")))
# query_xs_s.append(
# pickle.load(open("{}/{}/{}/query_x_{}.pkl".format(opt["data_dir"], "testing", "log", idx), "rb")))
# query_ys_s.append(
# pickle.load(open("{}/{}/{}/query_y_{}.pkl".format(opt["data_dir"], "testing", "log", idx), "rb")))
supp_xs_s.append(
pickle.load(open("{}/{}/supp_x_{}.pkl".format(opt["data_dir"], "user_cold_state", idx), "rb")))
supp_ys_s.append(
pickle.load(open("{}/{}/supp_y_{}.pkl".format(opt["data_dir"], "user_cold_state", idx), "rb")))
query_xs_s.append(
pickle.load(open("{}/{}/query_x_{}.pkl".format(opt["data_dir"], "user_cold_state", idx), "rb")))
query_ys_s.append(
pickle.load(open("{}/{}/query_y_{}.pkl".format(opt["data_dir"], "user_cold_state", idx), "rb")))
test_dataset = list(zip(supp_xs_s, supp_ys_s, query_xs_s, query_ys_s))

del (supp_xs_s, supp_ys_s, query_xs_s, query_ys_s)

print("# epoch\ttrain_loss\tprecision5\tNDCG5\tMAP5\tprecision7\tNDCG7\tMAP7\tprecision10\tNDCG10\tMAP10")

if not os.path.exists(model_filename):
print("Start training...")
training(trainer, opt, train_dataset, test_dataset, batch_size=opt['batch_size'], num_epoch=opt['num_epoch'],
model_save=opt["save"], model_filename=model_filename, logger=file_logger)

else:
print("Load pre-trained model...")
opt = helper.load_config(model_filename[:-2]+"config")
helper.print_config(opt)
trained_state_dict = torch.load(model_filename)
trainer.load_state_dict(trained_state_dict)


+ 74
- 0
utils/helper.py View File

"""
Helper functions.
"""

import os
import json
import argparse


### IO
def check_dir(d):
if not os.path.exists(d):
print("Directory {} does not exist. Exit.".format(d))
exit(1)


def check_files(files):
for f in files:
if f is not None and not os.path.exists(f):
print("File {} does not exist. Exit.".format(f))
exit(1)


def ensure_dir(d, verbose=True):
if not os.path.exists(d):
if verbose:
print("Directory {} do not exist; creating...".format(d))
os.makedirs(d)


def save_config(config, path, verbose=True):
with open(path, 'w') as outfile:
json.dump(config, outfile, indent=2)
if verbose:
print("Config saved to file {}".format(path))
return config


def load_config(path, verbose=True):
with open(path) as f:
config = json.load(f)
if verbose:
print("Config loaded from file {}".format(path))
return config


def print_config(config):
info = "Running with the following configs:\n"
for k, v in config.items():
info += "\t{} : {}\n".format(k, str(v))
print("\n" + info + "\n")
return


class FileLogger(object):
"""
A file logger that opens the file periodically and write to it.
"""

def __init__(self, filename, header=None):
self.filename = filename
if os.path.exists(filename):
# remove the old file
os.remove(filename)
if header is not None:
with open(filename, 'w') as out:
print(header, file=out)

def log(self, message):
with open(self.filename, 'a') as out:
print(message)
print(message, file=out)



+ 218
- 0
utils/loader.py View File

import json
import random
import torch
import numpy as np
import pickle
import codecs
import re
import os
import datetime
import tqdm
import pandas as pd

#convert userids to userdict key-id(int), val:onehot_vector(tensor)
#element in list is str type.
def to_onehot_dict(list):
dict={}
length = len(list)
for index, element in enumerate(list):
vector = torch.zeros(1, length).long()
element = int(element)
vector[:, element] = 1.0
dict[element] = vector
return dict

def load_list(fname):
list_ = []
with open(fname, encoding="utf-8") as f:
for line in f.readlines():
list_.append(line.strip())
return list_

# used for merge dictionaries.
def merge_key(dict1, dict2):
res = {**dict1, **dict2}
return res

def merge_value(dict1, dict2): # merge and item_cold
for key, value in dict2.items():
if key in dict1.keys():
# if list(set(dict1[key]+value)) the final number of movies-1m is 1000205
new_value = dict1[key]+value
dict1[key] = new_value
else:
print('Unexpected key.')

def count_values(dict):
count_val = 0
for key, value in dict.items():
count_val += len(value)
return count_val

def construct_dictionary(user_list, total_dict):
dict = {}
for i in range(len(user_list)):
dict[str(user_list[i])] = total_dict[str(user_list[i])]
return dict

class Preprocess(object):
"""
Preprocess the training, validation and test data.
Generate the episode-style data.
"""

def __init__(self, opt):
self.batch_size = opt["batch_size"]
self.opt = opt
# warm data ratio
self.train_ratio = opt['train_ratio']
self.valid_ratio = opt['valid_ratio']
self.test_ratio = 1 - self.train_ratio - self.valid_ratio
self.dataset_path = opt["data_dir"]
self.support_size = opt['support_size']
self.query_size = opt['query_size']
self.max_len = opt['max_len']
# save one-hot dimension length
uf_dim, if_dim = self.preprocess(self.dataset_path)
self.uf_dim = uf_dim
self.if_dim = if_dim

def preprocess(self, dataset_path):
""" Preprocess the data and convert to ids. """
#Create training-validation-test datasets
print('Create training, validation and test data from scratch!')
with open('./{}/interaction_dict_x.json'.format(dataset_path), 'r', encoding='utf-8') as f:
inter_dict_x = json.loads(f.read())
with open('./{}/interaction_dict_y.json'.format(dataset_path), 'r', encoding='utf-8') as f:
inter_dict_y = json.loads(f.read())
print('The size of total interactions is %d.' % (count_values(inter_dict_x))) # 42346
assert count_values(inter_dict_x) == count_values(inter_dict_y)

with open('./{}/user_list.json'.format(dataset_path), 'r', encoding='utf-8') as f:
userids = json.loads(f.read())

with open('./{}/item_list.json'.format(dataset_path), 'r', encoding='utf-8') as f:
itemids = json.loads(f.read())

#userids = list(inter_dict_x.keys())
random.shuffle(userids)
warm_user_size = int(len(userids) * self.train_ratio)
valid_user_size = int(len(userids) * self.valid_ratio)
warm_users = userids[:warm_user_size]
valid_users = userids[warm_user_size:warm_user_size+valid_user_size]
cold_users = userids[warm_user_size+valid_user_size:]
assert len(userids) == len(warm_users)+len(valid_users)+len(cold_users)


# Construct the training data dict
training_dict_x = construct_dictionary(warm_users, inter_dict_x)
training_dict_y = construct_dictionary(warm_users, inter_dict_y)

#Avoid the new items shown in test data in the case of cold user.
item_set = set()
for i in training_dict_x.values():
i = set(i)
item_set = item_set.union(i)

# Construct one-hot dictionary
user_dict = to_onehot_dict(userids)
# only items contained in all data are encoded.
item_dict = to_onehot_dict(itemids)

# This part of data is not used, so we do not process it temporally.
valid_dict_x = construct_dictionary(valid_users, inter_dict_x)
valid_dict_y = construct_dictionary(valid_users, inter_dict_y)
assert count_values(valid_dict_x) == count_values(valid_dict_y)

test_dict_x = construct_dictionary(cold_users, inter_dict_x)
test_dict_y = construct_dictionary(cold_users, inter_dict_y)
assert count_values(test_dict_x) == count_values(test_dict_y)

print('Before delete new items in test data, test data has %d interactions.' % (count_values(test_dict_x)))

#Delete the new items in test data.
unseen_count = 0
for key, value in test_dict_x.items():
assert len(value) == len(test_dict_y[key])
unseen_item_index = [index for index, i in enumerate(value) if i not in item_set]
unseen_count+=len(unseen_item_index)
if len(unseen_item_index) == 0:
continue
else:
new_value_x = [element for index, element in enumerate(value) if index not in unseen_item_index]
new_value_y = [test_dict_y[key][index] for index, element in enumerate(value) if index not in unseen_item_index]
test_dict_x[key] = new_value_x
test_dict_y[key] = new_value_y
print('After delete new items in test data, test data has %d interactions.' % (count_values(test_dict_x)))
assert count_values(test_dict_x) == count_values(test_dict_y)
print('The number of total unseen interactions is %d.' % (unseen_count))

pickle.dump(training_dict_x, open("{}/training_dict_x_{:2f}.pkl".format(dataset_path, self.train_ratio), "wb"))
pickle.dump(training_dict_y, open("{}/training_dict_y_{:2f}.pkl".format(dataset_path, self.train_ratio), "wb"))
pickle.dump(valid_dict_x, open("{}/valid_dict_x_{:2f}.pkl".format(dataset_path, self.valid_ratio), "wb"))
pickle.dump(valid_dict_y, open("{}/valid_dict_y_{:2f}.pkl".format(dataset_path, self.valid_ratio), "wb"))
pickle.dump(test_dict_x, open("{}/test_dict_x_{:2f}.pkl".format(dataset_path, self.test_ratio), "wb"))
pickle.dump(test_dict_y, open("{}/test_dict_y_{:2f}.pkl".format(dataset_path, self.test_ratio), "wb"))

def generate_episodes(dict_x, dict_y, category, support_size, query_size, max_len, dir="log"):
idx = 0
if not os.path.exists("{}/{}/{}".format(dataset_path, category, dir)):
os.makedirs("{}/{}/{}".format(dataset_path, category, dir))
os.makedirs("{}/{}/{}".format(dataset_path, category, "evidence"))
for _, user_id in enumerate(dict_x.keys()):
u_id = int(user_id)
seen_music_len = len(dict_x[str(u_id)])
indices = list(range(seen_music_len))
# filter some users with their interactions, i.e., tasks
if seen_music_len < (support_size + query_size) or seen_music_len > max_len:
continue
random.shuffle(indices)
tmp_x = np.array(dict_x[str(u_id)])
tmp_y = np.array(dict_y[str(u_id)])

support_x_app = None
for m_id in tmp_x[indices[:support_size]]:
m_id = int(m_id)
tmp_x_converted = torch.cat((item_dict[m_id], user_dict[u_id]), 1)
try:
support_x_app = torch.cat((support_x_app, tmp_x_converted), 0)
except:
support_x_app = tmp_x_converted

query_x_app = None
for m_id in tmp_x[indices[support_size:]]:
m_id = int(m_id)
u_id = int(user_id)
tmp_x_converted = torch.cat((item_dict[m_id], user_dict[u_id]), 1)
try:
query_x_app = torch.cat((query_x_app, tmp_x_converted), 0)
except:
query_x_app = tmp_x_converted

support_y_app = torch.FloatTensor(tmp_y[indices[:support_size]])
query_y_app = torch.FloatTensor(tmp_y[indices[support_size:]])

pickle.dump(support_x_app, open("{}/{}/{}/supp_x_{}.pkl".format(dataset_path, category, dir, idx), "wb"))
pickle.dump(support_y_app, open("{}/{}/{}/supp_y_{}.pkl".format(dataset_path, category, dir, idx), "wb"))
pickle.dump(query_x_app, open("{}/{}/{}/query_x_{}.pkl".format(dataset_path, category, dir, idx), "wb"))
pickle.dump(query_y_app, open("{}/{}/{}/query_y_{}.pkl".format(dataset_path, category, dir, idx), "wb"))
# used for evidence candidate selection
with open("{}/{}/{}/supp_x_{}_u_m_ids.txt".format(dataset_path, category, "evidence", idx), "w") as f:
for m_id in tmp_x[indices[:support_size]]:
f.write("{}\t{}\n".format(u_id, m_id))
with open("{}/{}/{}/query_x_{}_u_m_ids.txt".format(dataset_path, category, "evidence", idx), "w") as f:
for m_id in tmp_x[indices[support_size:]]:
f.write("{}\t{}\n".format(u_id, m_id))
idx+=1

print("Generate eposide data for training.")
generate_episodes(training_dict_x, training_dict_y, "training", self.support_size, self.query_size, self.max_len)
print("Generate eposide data for validation.")
generate_episodes(valid_dict_x, valid_dict_y, "validation", self.support_size, self.query_size, self.max_len)
print("Generate eposide data for testing.")
generate_episodes(test_dict_x, test_dict_y, "testing", self.support_size, self.query_size, self.max_len)

return len(userids), len(itemids)




+ 77
- 0
utils/scorer.py View File

import math

def AP(ranked_list, ground_truth, topn):
hits, sum_precs = 0, 0.0
t = [a for a in ground_truth]
t.sort(reverse=True)
t=t[:topn]
for i in range(topn):
id = ranked_list[i]
if ground_truth[id] in t:
hits += 1
sum_precs += hits / (i+1.0)
t.remove(ground_truth[id])
if hits > 0:
return sum_precs / topn
else:
return 0.0

def RR(ranked_list, ground_truth,topn):
t = [a for a in ground_truth]
t.sort(reverse=True)
t = t[:topn]
for i in range(topn):
id = ranked_list[i]
if ground_truth[id] in t:
return 1 / (i + 1.0)
return 0

def precision(ranked_list,ground_truth,topn):
t = [a for a in ground_truth]
t.sort(reverse=True)
t = t[:topn]
hits = 0
for i in range(topn):
id = ranked_list[i]
if ground_truth[id] in t:
t.remove(ground_truth[id])
hits += 1
pre = hits/topn
return pre


def nDCG(ranked_list, ground_truth, topn):
dcg = 0
idcg = IDCG(ground_truth, topn)
# print(ranked_list)
# input()
for i in range(topn):
id = ranked_list[i]
dcg += ((2 ** ground_truth[id]) -1)/ math.log(i+2, 2)
# print('dcg is ', dcg, " n is ", topn)
# print('idcg is ', idcg, " n is ", topn)
return dcg / idcg

def IDCG(ground_truth,topn):
t = [a for a in ground_truth]
t.sort(reverse=True)
idcg = 0
for i in range(topn):
idcg += ((2**t[i]) - 1) / math.log(i+2, 2)
return idcg

def add_metric(recommend_list, ALL_group_list, precision_list, ap_list, ndcg_list, topn):
ndcg = nDCG(recommend_list, ALL_group_list, topn)
ap = AP(recommend_list, ALL_group_list, topn)
pre = precision(recommend_list, ALL_group_list, topn)
precision_list.append(pre)
ap_list.append(ap)
ndcg_list.append(ndcg)



def cal_metric(precision_list,ap_list,ndcg_list):
mpre = sum(precision_list) / len(precision_list)
map = sum(ap_list) / len(ap_list)
mndcg = sum(ndcg_list) / len(ndcg_list)
return mpre, mndcg, map

+ 168
- 0
utils/torch_utils.py View File

"""
Utility functions for torch.
"""

import torch
from torch import nn, optim
from torch.optim.optimizer import Optimizer

### class
class MyAdagrad(Optimizer):
"""My modification of the Adagrad optimizer that allows to specify an initial
accumulater value. This mimics the behavior of the default Adagrad implementation
in Tensorflow. The default PyTorch Adagrad uses 0 for initial acculmulator value.

Arguments:
params (iterable): iterable of parameters to optimize or dicts defining
parameter groups
lr (float, optional): learning rate (default: 1e-2)
lr_decay (float, optional): learning rate decay (default: 0)
init_accu_value (float, optional): initial accumulater value.
weight_decay (float, optional): weight decay (L2 penalty) (default: 0)
"""

def __init__(self, params, lr=1e-2, lr_decay=0, init_accu_value=0.1, weight_decay=0):
defaults = dict(lr=lr, lr_decay=lr_decay, init_accu_value=init_accu_value, \
weight_decay=weight_decay)
super(MyAdagrad, self).__init__(params, defaults)

for group in self.param_groups:
for p in group['params']:
state = self.state[p]
state['step'] = 0
state['sum'] = torch.ones(p.data.size()).type_as(p.data) *\
init_accu_value

def share_memory(self):
for group in self.param_groups:
for p in group['params']:
state = self.state[p]
state['sum'].share_memory_()

def step(self, closure=None):
"""Performs a single optimization step.

Arguments:
closure (callable, optional): A closure that reevaluates the model
and returns the loss.
"""
loss = None
if closure is not None:
loss = closure()

for group in self.param_groups:
for p in group['params']:
if p.grad is None:
continue

grad = p.grad.data
state = self.state[p]

state['step'] += 1

if group['weight_decay'] != 0:
if p.grad.data.is_sparse:
raise RuntimeError("weight_decay option is not compatible with sparse gradients ")
grad = grad.add(group['weight_decay'], p.data)

clr = group['lr'] / (1 + (state['step'] - 1) * group['lr_decay'])

if p.grad.data.is_sparse:
grad = grad.coalesce() # the update is non-linear so indices must be unique
grad_indices = grad._indices()
grad_values = grad._values()
size = torch.Size([x for x in grad.size()])

def make_sparse(values):
constructor = type(p.grad.data)
if grad_indices.dim() == 0 or values.dim() == 0:
return constructor()
return constructor(grad_indices, values, size)
state['sum'].add_(make_sparse(grad_values.pow(2)))
std = state['sum']._sparse_mask(grad)
std_values = std._values().sqrt_().add_(1e-10)
p.data.add_(-clr, make_sparse(grad_values / std_values))
else:
state['sum'].addcmul_(1, grad, grad)
std = state['sum'].sqrt().add_(1e-10)
p.data.addcdiv_(-clr, grad, std)

return loss

### torch specific functions
def get_optimizer(name, parameters, lr, l2=0):
if name == 'sgd':
return torch.optim.SGD(parameters, lr=lr, weight_decay=l2)
elif name in ['adagrad', 'myadagrad']:
# use my own adagrad to allow for init accumulator value
return MyAdagrad(parameters, lr=lr, init_accu_value=0.1, weight_decay=l2)
elif name == 'adam':
return torch.optim.Adam(parameters, weight_decay=l2) # use default lr
elif name == 'adamax':
return torch.optim.Adamax(parameters, weight_decay=l2) # use default lr
elif name == 'adadelta':
return torch.optim.Adadelta(parameters, lr=lr, weight_decay=l2)
else:
raise Exception("Unsupported optimizer: {}".format(name))


def change_lr(optimizer, new_lr):
for param_group in optimizer.param_groups:
param_group['lr'] = new_lr


def flatten_indices(seq_lens, width):
flat = []
for i, l in enumerate(seq_lens):
for j in range(l):
flat.append(i * width + j)
return flat


def set_cuda(var, cuda):
if cuda:
return var.cuda()
return var


def keep_partial_grad(grad, topk):
"""
Keep only the topk rows of grads.
"""
assert topk < grad.size(0)
grad.data[topk:].zero_()
return grad

### model IO
def save(model, optimizer, opt, filename):
params = {
'model': model.state_dict(),
'optimizer': optimizer.state_dict(),
'config': opt
}
try:
torch.save(params, filename)
except BaseException:
print("[ Warning: model saving failed. ]")


def load(model, optimizer, filename):
try:
dump = torch.load(filename)
except BaseException:
print("[ Fail: model loading failed. ]")
if model is not None:
model.load_state_dict(dump['model'])
if optimizer is not None:
optimizer.load_state_dict(dump['optimizer'])
opt = dump['config']
return model, optimizer, opt


def load_config(filename):
try:
dump = torch.load(filename)
except BaseException:
print("[ Fail: model loading failed. ]")
return dump['config']


Loading…
Cancel
Save