there is problems that should set inner loop larger than onedefine_task
self.final_dim = 32 | self.final_dim = 32 | ||||
self.dropout_rate = 0 | self.dropout_rate = 0 | ||||
layers = [nn.Linear(config['embedding_dim'] * 8, self.h1_dim), | |||||
layers = [nn.Linear(config['embedding_dim'] * 8 + 1, self.h1_dim), | |||||
torch.nn.Dropout(self.dropout_rate), | torch.nn.Dropout(self.dropout_rate), | ||||
nn.ReLU(inplace=True), | nn.ReLU(inplace=True), | ||||
nn.Linear(self.h1_dim, self.h2_dim), | nn.Linear(self.h1_dim, self.h2_dim), | ||||
self.clusters_k = 7 | self.clusters_k = 7 | ||||
self.embed_size = self.final_dim | self.embed_size = self.final_dim | ||||
self.array = nn.Parameter(init.xavier_uniform_(torch.FloatTensor(self.clusters_k, self.embed_size))) | self.array = nn.Parameter(init.xavier_uniform_(torch.FloatTensor(self.clusters_k, self.embed_size))) | ||||
self.temperature = 1.0 | |||||
self.temperature = 10.0 | |||||
def aggregate(self, z_i): | def aggregate(self, z_i): | ||||
return torch.mean(z_i, dim=0) | return torch.mean(z_i, dim=0) | ||||
def forward(self, task_embed, training=True): | |||||
task_embed = self.input_to_hidden(task_embed) | |||||
def forward(self, task_embed, y, training=True): | |||||
y = y.view(-1, 1) | |||||
input_pairs = torch.cat((task_embed, y), dim=1) | |||||
task_embed = self.input_to_hidden(input_pairs) | |||||
# todo : may be useless | # todo : may be useless | ||||
mean_task = self.aggregate(task_embed) | mean_task = self.aggregate(task_embed) | ||||
self.film_layer_2_gamma = nn.Linear(self.task_dim, fc2_out_dim, bias=False) | self.film_layer_2_gamma = nn.Linear(self.task_dim, fc2_out_dim, bias=False) | ||||
# self.film_layer_3_beta = nn.Linear(self.task_dim, self.h3_dim, bias=False) | # self.film_layer_3_beta = nn.Linear(self.task_dim, self.h3_dim, bias=False) | ||||
# self.film_layer_3_gamma = nn.Linear(self.task_dim, self.h3_dim, bias=False) | # self.film_layer_3_gamma = nn.Linear(self.task_dim, self.h3_dim, bias=False) | ||||
self.dropout_rate = 0 | |||||
self.dropout_rate = 0.1 | |||||
self.dropout = nn.Dropout(self.dropout_rate) | self.dropout = nn.Dropout(self.dropout_rate) | ||||
self.gamma_1, self.beta_1, self.gamma_2, self.beta_2 = None, None, None, None | |||||
def aggregate(self, z_i): | def aggregate(self, z_i): | ||||
return torch.mean(z_i, dim=0) | return torch.mean(z_i, dim=0) | ||||
def forward(self, task_embed): | |||||
C, clustered_task_embed = self.cluster_module(task_embed) | |||||
# hidden layers | |||||
# todo : adding activation function or remove it | |||||
hidden_1 = self.fc1(task_embed) | |||||
beta_1 = torch.tanh(self.film_layer_1_beta(clustered_task_embed)) | |||||
gamma_1 = torch.tanh(self.film_layer_1_gamma(clustered_task_embed)) | |||||
hidden_1 = torch.mul(hidden_1, gamma_1) + beta_1 | |||||
hidden_1 = self.dropout(hidden_1) | |||||
hidden_2 = F.relu(hidden_1) | |||||
hidden_2 = self.fc2(hidden_2) | |||||
beta_2 = torch.tanh(self.film_layer_2_beta(clustered_task_embed)) | |||||
gamma_2 = torch.tanh(self.film_layer_2_gamma(clustered_task_embed)) | |||||
hidden_2 = torch.mul(hidden_2, gamma_2) + beta_2 | |||||
hidden_2 = self.dropout(hidden_2) | |||||
hidden_3 = F.relu(hidden_2) | |||||
y_pred = self.linear_out(hidden_3) | |||||
def forward(self, task_embed, y, training): | |||||
if training: | |||||
C, clustered_task_embed = self.cluster_module(task_embed, y) | |||||
# hidden layers | |||||
# todo : adding activation function or remove it | |||||
hidden_1 = self.fc1(task_embed) | |||||
beta_1 = torch.tanh(self.film_layer_1_beta(clustered_task_embed)) | |||||
gamma_1 = torch.tanh(self.film_layer_1_gamma(clustered_task_embed)) | |||||
hidden_1 = torch.mul(hidden_1, gamma_1) + beta_1 | |||||
hidden_1 = self.dropout(hidden_1) | |||||
hidden_2 = F.relu(hidden_1) | |||||
hidden_2 = self.fc2(hidden_2) | |||||
beta_2 = torch.tanh(self.film_layer_2_beta(clustered_task_embed)) | |||||
gamma_2 = torch.tanh(self.film_layer_2_gamma(clustered_task_embed)) | |||||
hidden_2 = torch.mul(hidden_2, gamma_2) + beta_2 | |||||
hidden_2 = self.dropout(hidden_2) | |||||
hidden_3 = F.relu(hidden_2) | |||||
y_pred = self.linear_out(hidden_3) | |||||
self.gamma_1, self.beta_1, self.gamma_2, self.beta_2 = gamma_1, beta_1, gamma_2, beta_2 | |||||
else: | |||||
hidden_1 = self.fc1(task_embed) | |||||
hidden_1 = torch.mul(hidden_1, self.gamma_1) + self.beta_1 | |||||
hidden_1 = self.dropout(hidden_1) | |||||
hidden_2 = F.relu(hidden_1) | |||||
hidden_2 = self.fc2(hidden_2) | |||||
hidden_2 = torch.mul(hidden_2, self.gamma_2) + self.beta_2 | |||||
hidden_2 = self.dropout(hidden_2) | |||||
hidden_3 = F.relu(hidden_2) | |||||
y_pred = self.linear_out(hidden_3) | |||||
return y_pred | return y_pred |
import torch | import torch | ||||
import pickle | import pickle | ||||
def fast_adapt( | |||||
learn, | |||||
adaptation_data, | |||||
evaluation_data, | |||||
adaptation_labels, | |||||
evaluation_labels, | |||||
adaptation_steps, | |||||
get_predictions = False): | |||||
def fast_adapt( | |||||
learn, | |||||
adaptation_data, | |||||
evaluation_data, | |||||
adaptation_labels, | |||||
evaluation_labels, | |||||
adaptation_steps, | |||||
get_predictions=False): | |||||
for step in range(adaptation_steps): | for step in range(adaptation_steps): | ||||
temp = learn(adaptation_data) | |||||
temp = learn(adaptation_data, adaptation_labels, training=True) | |||||
train_error = torch.nn.functional.mse_loss(temp.view(-1), adaptation_labels) | train_error = torch.nn.functional.mse_loss(temp.view(-1), adaptation_labels) | ||||
learn.adapt(train_error) | learn.adapt(train_error) | ||||
predictions = learn(evaluation_data) | |||||
predictions = learn(evaluation_data, None , training=False) | |||||
# loss = torch.nn.MSELoss(reduction='mean') | # loss = torch.nn.MSELoss(reduction='mean') | ||||
# valid_error = loss(predictions, evaluation_labels) | # valid_error = loss(predictions, evaluation_labels) | ||||
valid_error = torch.nn.functional.mse_loss(predictions.view(-1),evaluation_labels) | |||||
valid_error = torch.nn.functional.mse_loss(predictions.view(-1), evaluation_labels) | |||||
if get_predictions: | if get_predictions: | ||||
return valid_error,predictions | |||||
return valid_error | |||||
return valid_error, predictions | |||||
return valid_error |
help='outer-loop learning rate (used with Adam optimiser)') | help='outer-loop learning rate (used with Adam optimiser)') | ||||
# parser.add_argument('--lr_meta_decay', type=float, default=0.9, help='decay factor for meta learning rate') | # parser.add_argument('--lr_meta_decay', type=float, default=0.9, help='decay factor for meta learning rate') | ||||
parser.add_argument('--inner', type=int, default=5, | |||||
parser.add_argument('--inner', type=int, default=2, | |||||
help='number of gradient steps in inner loop (during training)') | help='number of gradient steps in inner loop (during training)') | ||||
parser.add_argument('--inner_eval', type=int, default=5, | |||||
parser.add_argument('--inner_eval', type=int, default=2, | |||||
help='number of gradient updates at test time (for evaluation)') | help='number of gradient updates at test time (for evaluation)') | ||||
parser.add_argument('--first_order', action='store_true', default=False, | parser.add_argument('--first_order', action='store_true', default=False, | ||||
gc.collect() | gc.collect() | ||||
print("===================== " + test_state + " =====================") | print("===================== " + test_state + " =====================") | ||||
mse, ndc1, ndc3 = test(emb, trainer, test_dataset, batch_size=config['batch_size'],num_epoch=config['num_epoch'],test_state=test_state) | |||||
mse, ndc1, ndc3 = test(emb, trainer, test_dataset, batch_size=config['batch_size'],num_epoch=config['num_epoch'],test_state=test_state,args=args) | |||||
with open("results.txt", "a") as f: | with open("results.txt", "a") as f: | ||||
f.write("{}\t{}\t{}\n".format(mse, ndc1, ndc3)) | f.write("{}\t{}\t{}\n".format(mse, ndc1, ndc3)) | ||||
print("===================================================") | print("===================================================") |
import gc | import gc | ||||
def test(embedding, head, total_dataset, batch_size, num_epoch, test_state=None): | |||||
def test(embedding, head, total_dataset, batch_size, num_epoch, test_state=None,args=None): | |||||
losses_q = [] | losses_q = [] | ||||
ndcgs1 = [] | ndcgs1 = [] | ||||
ndcgs3 = [] | ndcgs3 = [] | ||||
print("index error in test method") | print("index error in test method") | ||||
continue | continue | ||||
num_local_update = config['inner'] | |||||
learner = head.clone() | learner = head.clone() | ||||
temp_sxs = embedding(supp_xs) | temp_sxs = embedding(supp_xs) | ||||
temp_qxs = embedding(query_xs) | temp_qxs = embedding(query_xs) | ||||
supp_ys, | supp_ys, | ||||
query_ys, | query_ys, | ||||
# config['inner'], | # config['inner'], | ||||
config['inner'], | |||||
args.inner_eval, | |||||
get_predictions=True) | get_predictions=True) | ||||
l1 = L1Loss(reduction='mean') | l1 = L1Loss(reduction='mean') |