Browse Source

generic algorithms and parameters

master
mohamad maheri 2 years ago
parent
commit
a23a4ec823
2 changed files with 270 additions and 173 deletions
  1. 245
    161
      learnToLearn.py
  2. 25
    12
      learnToLearnTest.py

+ 245
- 161
learnToLearn.py View File

@@ -1,179 +1,263 @@
import os
import torch
import pickle

from MeLU import MeLU
from options import config
from model_training import training
from data_generation import generate
from evidence_candidate import selection
from model_test import test
from embedding_module import EmbeddingModule

import learn2learn as l2l
from embeddings import item, user
import random
import numpy as np
from learnToLearnTest import test
from fast_adapt import fast_adapt
import gc

if config['use_cuda']:
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
master_path= "/media/external_10TB/10TB/maheri/melu_data5"

# DATA GENERATION
print("DATA GENERATION PHASE")
if not os.path.exists("{}/".format(master_path)):
os.mkdir("{}/".format(master_path))
# preparing dataset. It needs about 22GB of your hard disk space.
generate(master_path)

# TRAINING
print("TRAINING PHASE")
embedding_dim = config['embedding_dim']
fc1_in_dim = config['embedding_dim'] * 8
fc2_in_dim = config['first_fc_hidden_dim']
fc2_out_dim = config['second_fc_hidden_dim']
use_cuda = config['use_cuda']

fc1 = torch.nn.Linear(fc1_in_dim, fc2_in_dim)
fc2 = torch.nn.Linear(fc2_in_dim, fc2_out_dim)
linear_out = torch.nn.Linear(fc2_out_dim, 1)
head = torch.nn.Sequential(fc1,fc2,linear_out)

if use_cuda:
emb = EmbeddingModule(config).cuda()
else:
emb = EmbeddingModule(config)

# META LEARNING
print("META LEARNING PHASE")
# head = l2l.algorithms.MetaSGD(head, lr=config['local_lr'],first_order=True)
transform = l2l.optim.ModuleTransform(torch.nn.Linear)
head = l2l.algorithms.GBML(head , transform=transform , lr=config['local_lr'] , adapt_transform=True,first_order=False)

if use_cuda:
head.cuda()

# Setup optimization
print("SETUP OPTIMIZATION PHASE")
all_parameters = list(emb.parameters()) + list(head.parameters())
optimizer = torch.optim.Adam(all_parameters, lr=config['lr'])
# loss = torch.nn.MSELoss(reduction='mean')

# Load training dataset.
print("LOAD DATASET PHASE")
training_set_size = int(len(os.listdir("{}/warm_state".format(master_path))) / 4)
supp_xs_s = []
supp_ys_s = []
query_xs_s = []
query_ys_s = []
for idx in range(training_set_size):
supp_xs_s.append(pickle.load(open("{}/warm_state/supp_x_{}.pkl".format(master_path, idx), "rb")))
supp_ys_s.append(pickle.load(open("{}/warm_state/supp_y_{}.pkl".format(master_path, idx), "rb")))
query_xs_s.append(pickle.load(open("{}/warm_state/query_x_{}.pkl".format(master_path, idx), "rb")))
query_ys_s.append(pickle.load(open("{}/warm_state/query_y_{}.pkl".format(master_path, idx), "rb")))
total_dataset = list(zip(supp_xs_s, supp_ys_s, query_xs_s, query_ys_s))
del(supp_xs_s, supp_ys_s, query_xs_s, query_ys_s)
training_set_size = len(total_dataset)
batch_size = config['batch_size']
# torch.cuda.empty_cache()

random.shuffle(total_dataset)
num_batch = int(training_set_size / batch_size)
a, b, c, d = zip(*total_dataset)

print("\n\n\n")
for iteration in range(config['num_epoch']):
for i in range(num_batch):
optimizer.zero_grad()
meta_train_error = 0.0
meta_train_accuracy = 0.0
meta_valid_error = 0.0
meta_valid_accuracy = 0.0
meta_test_error = 0.0
meta_test_accuracy = 0.0

print("EPOCH: ", iteration, " BATCH: ", i)
supp_xs = list(a[batch_size * i:batch_size * (i + 1)])
supp_ys = list(b[batch_size * i:batch_size * (i + 1)])
query_xs = list(c[batch_size * i:batch_size * (i + 1)])
query_ys = list(d[batch_size * i:batch_size * (i + 1)])
batch_sz = len(supp_xs)

if use_cuda:
for j in range(batch_size):
supp_xs[j] = supp_xs[j].cuda()
supp_ys[j] = supp_ys[j].cuda()
query_xs[j] = query_xs[j].cuda()
query_ys[j] = query_ys[j].cuda()

for task in range(batch_sz):
# print("EPOCH: ", iteration," BATCH: ",i, "TASK: ",task)
# Compute meta-training loss
learner = head.clone()
temp_sxs = emb(supp_xs[task])
temp_qxs = emb(query_xs[task])

evaluation_error = fast_adapt(learner,
temp_sxs,
temp_qxs,
supp_ys[task],
query_ys[task],
config['inner']
)

evaluation_error.backward()
meta_train_error += evaluation_error.item()


# Print some metrics
print('Iteration', iteration)
print('Meta Train Error', meta_train_error / batch_sz)
# print('Meta Train Accuracy', meta_train_accuracy / batch_sz)
# print('Meta Valid Error', meta_valid_error / batch_sz)
# print('Meta Valid Accuracy', meta_valid_accuracy / batch_sz)
# print('Meta Test Error', meta_test_error / batch_sz)
# print('Meta Test Accuracy', meta_test_accuracy / batch_sz)

# Average the accumulated gradients and optimize
for p in all_parameters:
p.grad.data.mul_(1.0 / batch_sz)
optimizer.step()

# torch.cuda.empty_cache()
del(supp_xs,supp_ys,query_xs,query_ys)
gc.collect()

print("===============================================\n")


# save model
final_model = torch.nn.Sequential(emb,head)
torch.save(final_model.state_dict(), master_path + "/models_gbml.pkl")

# testing
print("start of test phase")
for test_state in ['warm_state', 'user_cold_state', 'item_cold_state', 'user_and_item_cold_state']:
test_dataset = None
test_set_size = int(len(os.listdir("{}/{}".format(master_path, test_state))) / 4)
from learn2learn.optim.transforms import KroneckerTransform
import argparse

def parse_args():
print("==============")
parser = argparse.ArgumentParser([], description='Fast Context Adaptation via Meta-Learning (CAVIA),'
'Clasification experiments.')
print("==============\n")

parser.add_argument('--seed', type=int, default=53)
parser.add_argument('--task', type=str, default='multi', help='problem setting: sine or celeba')
parser.add_argument('--tasks_per_metaupdate', type=int, default=32,
help='number of tasks in each batch per meta-update')

parser.add_argument('--lr_inner', type=float, default=5e-6, help='inner-loop learning rate (per task)')
parser.add_argument('--lr_meta', type=float, default=5e-5,
help='outer-loop learning rate (used with Adam optimiser)')
# parser.add_argument('--lr_meta_decay', type=float, default=0.9, help='decay factor for meta learning rate')

parser.add_argument('--inner', type=int, default=5,
help='number of gradient steps in inner loop (during training)')
parser.add_argument('--inner_eval', type=int, default=5,
help='number of gradient updates at test time (for evaluation)')

parser.add_argument('--first_order', action='store_true', default=False,
help='run first order approximation of CAVIA')
parser.add_argument('--adapt_transform', action='store_true', default=False,
help='run adaptation transform')
parser.add_argument('--transformer', type=str, default="kronoker",
help='transformer type')
parser.add_argument('--meta_algo', type=str, default="gbml",
help='MAML/MetaSGD/GBML')
parser.add_argument('--gpu', type=int, default=0,
help='number of gpu to run the code')



# parser.add_argument('--data_root', type=str, default="./movielens/ml-1m", help='path to data root')
# parser.add_argument('--num_workers', type=int, default=4, help='num of workers to use')
# parser.add_argument('--test', action='store_true', default=False, help='num of workers to use')

# parser.add_argument('--embedding_dim', type=int, default=32, help='num of workers to use')
# parser.add_argument('--first_fc_hidden_dim', type=int, default=64, help='num of workers to use')
# parser.add_argument('--second_fc_hidden_dim', type=int, default=64, help='num of workers to use')
# parser.add_argument('--num_epoch', type=int, default=30, help='num of workers to use')
# parser.add_argument('--num_genre', type=int, default=25, help='num of workers to use')
# parser.add_argument('--num_director', type=int, default=2186, help='num of workers to use')
# parser.add_argument('--num_actor', type=int, default=8030, help='num of workers to use')
# parser.add_argument('--num_rate', type=int, default=6, help='num of workers to use')
# parser.add_argument('--num_gender', type=int, default=2, help='num of workers to use')
# parser.add_argument('--num_age', type=int, default=7, help='num of workers to use')
# parser.add_argument('--num_occupation', type=int, default=21, help='num of workers to use')
# parser.add_argument('--num_zipcode', type=int, default=3402, help='num of workers to use')

# parser.add_argument('--rerun', action='store_true', default=False,
# help='Re-run experiment (will override previously saved results)')

args = parser.parse_args()
# use the GPU if available
# args.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# print('Running on device: {}'.format(args.device))
return args

if __name__ == '__main__':
args = parse_args()
print(args)

if config['use_cuda']:
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu)
master_path= "/media/external_10TB/10TB/maheri/melu_data5"

# DATA GENERATION
print("DATA GENERATION PHASE")
if not os.path.exists("{}/".format(master_path)):
os.mkdir("{}/".format(master_path))
# preparing dataset. It needs about 22GB of your hard disk space.
generate(master_path)

# TRAINING
print("TRAINING PHASE")
embedding_dim = config['embedding_dim']
fc1_in_dim = config['embedding_dim'] * 8
fc2_in_dim = config['first_fc_hidden_dim']
fc2_out_dim = config['second_fc_hidden_dim']
use_cuda = config['use_cuda']

fc1 = torch.nn.Linear(fc1_in_dim, fc2_in_dim)
fc2 = torch.nn.Linear(fc2_in_dim, fc2_out_dim)
linear_out = torch.nn.Linear(fc2_out_dim, 1)
head = torch.nn.Sequential(fc1,fc2,linear_out)

if use_cuda:
emb = EmbeddingModule(config).cuda()
else:
emb = EmbeddingModule(config)

# META LEARNING
print("META LEARNING PHASE")
# head = l2l.algorithms.MetaSGD(head, lr=config['local_lr'],first_order=True)

# define transformer
transform = None
if args.transformer == "kronoker":
transform = KroneckerTransform(l2l.nn.KroneckerLinear)
elif args.transformer == "linear":
transform = l2l.optim.ModuleTransform(torch.nn.Linear)

# define meta algorithm
if args.meta_algo == "maml":
head = l2l.algorithms.MAML(head, lr=config['local_lr'],first_order=args.first_order)
elif args.meta_algo == 'metasgd':
head = l2l.algorithms.MetaSGD(head, lr=config['local_lr'],first_order=args.first_order)
elif args.meta_algo == 'gbml':
head = l2l.algorithms.GBML(head, transform=transform, lr=config['local_lr'],adapt_transform=args.adapt_transform, first_order=args.first_order)

if use_cuda:
head.cuda()

# Setup optimization
print("SETUP OPTIMIZATION PHASE")
all_parameters = list(emb.parameters()) + list(head.parameters())
optimizer = torch.optim.Adam(all_parameters, lr=config['lr'])
# loss = torch.nn.MSELoss(reduction='mean')

# Load training dataset.
print("LOAD DATASET PHASE")
training_set_size = int(len(os.listdir("{}/warm_state".format(master_path))) / 4)
supp_xs_s = []
supp_ys_s = []
query_xs_s = []
query_ys_s = []
for idx in range(test_set_size):
supp_xs_s.append(pickle.load(open("{}/{}/supp_x_{}.pkl".format(master_path, test_state, idx), "rb")))
supp_ys_s.append(pickle.load(open("{}/{}/supp_y_{}.pkl".format(master_path, test_state, idx), "rb")))
query_xs_s.append(pickle.load(open("{}/{}/query_x_{}.pkl".format(master_path, test_state, idx), "rb")))
query_ys_s.append(pickle.load(open("{}/{}/query_y_{}.pkl".format(master_path, test_state, idx), "rb")))
test_dataset = list(zip(supp_xs_s, supp_ys_s, query_xs_s, query_ys_s))
del (supp_xs_s, supp_ys_s, query_xs_s, query_ys_s)

print("===================== " + test_state + " =====================")
test(emb,head, test_dataset, batch_size=config['batch_size'], num_epoch=config['num_epoch'])
print("===================================================\n\n\n")
for idx in range(training_set_size):
supp_xs_s.append(pickle.load(open("{}/warm_state/supp_x_{}.pkl".format(master_path, idx), "rb")))
supp_ys_s.append(pickle.load(open("{}/warm_state/supp_y_{}.pkl".format(master_path, idx), "rb")))
query_xs_s.append(pickle.load(open("{}/warm_state/query_x_{}.pkl".format(master_path, idx), "rb")))
query_ys_s.append(pickle.load(open("{}/warm_state/query_y_{}.pkl".format(master_path, idx), "rb")))
total_dataset = list(zip(supp_xs_s, supp_ys_s, query_xs_s, query_ys_s))
del(supp_xs_s, supp_ys_s, query_xs_s, query_ys_s)
training_set_size = len(total_dataset)
batch_size = config['batch_size']
# torch.cuda.empty_cache()

random.shuffle(total_dataset)
num_batch = int(training_set_size / batch_size)
a, b, c, d = zip(*total_dataset)

print("\n\n\n")
for iteration in range(config['num_epoch']):
for i in range(num_batch):
optimizer.zero_grad()
meta_train_error = 0.0
meta_train_accuracy = 0.0
meta_valid_error = 0.0
meta_valid_accuracy = 0.0
meta_test_error = 0.0
meta_test_accuracy = 0.0

print("EPOCH: ", iteration, " BATCH: ", i)
supp_xs = list(a[batch_size * i:batch_size * (i + 1)])
supp_ys = list(b[batch_size * i:batch_size * (i + 1)])
query_xs = list(c[batch_size * i:batch_size * (i + 1)])
query_ys = list(d[batch_size * i:batch_size * (i + 1)])
batch_sz = len(supp_xs)

# if use_cuda:
# for j in range(batch_size):
# supp_xs[j] = supp_xs[j].cuda()
# supp_ys[j] = supp_ys[j].cuda()
# query_xs[j] = query_xs[j].cuda()
# query_ys[j] = query_ys[j].cuda()

for task in range(batch_sz):
# print("EPOCH: ", iteration," BATCH: ",i, "TASK: ",task)
# Compute meta-training loss
# if use_cuda:
sxs = supp_xs[task].cuda()
qxs = query_xs[task].cuda()
sys = supp_ys[task].cuda()
qys = query_ys[task].cuda()

learner = head.clone()
temp_sxs = emb(sxs)
temp_qxs = emb(qxs)

evaluation_error = fast_adapt(learner,
temp_sxs,
temp_qxs,
sys,
qys,
args.inner)
# config['inner'])

evaluation_error.backward()
meta_train_error += evaluation_error.item()

del(sxs,qxs,sys,qys)
supp_xs[task].cpu()
query_xs[task].cpu()
supp_ys[task].cpu()
query_ys[task].cpu()


# Print some metrics
print('Iteration', iteration)
print('Meta Train Error', meta_train_error / batch_sz)
# print('Meta Train Accuracy', meta_train_accuracy / batch_sz)
# print('Meta Valid Error', meta_valid_error / batch_sz)
# print('Meta Valid Accuracy', meta_valid_accuracy / batch_sz)
# print('Meta Test Error', meta_test_error / batch_sz)
# print('Meta Test Accuracy', meta_test_accuracy / batch_sz)

# Average the accumulated gradients and optimize
for p in all_parameters:
p.grad.data.mul_(1.0 / batch_sz)
optimizer.step()

# torch.cuda.empty_cache()
del(supp_xs,supp_ys,query_xs,query_ys)
gc.collect()
print("===============================================\n")


# save model
final_model = torch.nn.Sequential(emb,head)
torch.save(final_model.state_dict(), master_path + "/models_gbml.pkl")

# testing
print("start of test phase")
for test_state in ['warm_state', 'user_cold_state', 'item_cold_state', 'user_and_item_cold_state']:
test_dataset = None
test_set_size = int(len(os.listdir("{}/{}".format(master_path, test_state))) / 4)
supp_xs_s = []
supp_ys_s = []
query_xs_s = []
query_ys_s = []
for idx in range(test_set_size):
supp_xs_s.append(pickle.load(open("{}/{}/supp_x_{}.pkl".format(master_path, test_state, idx), "rb")))
supp_ys_s.append(pickle.load(open("{}/{}/supp_y_{}.pkl".format(master_path, test_state, idx), "rb")))
query_xs_s.append(pickle.load(open("{}/{}/query_x_{}.pkl".format(master_path, test_state, idx), "rb")))
query_ys_s.append(pickle.load(open("{}/{}/query_y_{}.pkl".format(master_path, test_state, idx), "rb")))
test_dataset = list(zip(supp_xs_s, supp_ys_s, query_xs_s, query_ys_s))
del (supp_xs_s, supp_ys_s, query_xs_s, query_ys_s)

print("===================== " + test_state + " =====================")
test(emb,head, test_dataset, batch_size=config['batch_size'], num_epoch=config['num_epoch'],adaptation_step=args.inner_eval)
print("===================================================\n\n\n")
print(args)





+ 25
- 12
learnToLearnTest.py View File

@@ -8,16 +8,21 @@ from torch.nn import L1Loss
import matchzoo as mz
import numpy as np
from fast_adapt import fast_adapt
from sklearn.metrics import ndcg_score


def test(embedding,head, total_dataset, batch_size, num_epoch):
def test(embedding,head, total_dataset, batch_size, num_epoch,adaptation_step=config['inner']):

test_set_size = len(total_dataset)
random.shuffle(total_dataset)
a, b, c, d = zip(*total_dataset)
losses_q = []
ndcgs1 = []
ndcgs3 = []
# ndcgs1 = []
ndcgs11 = []
# ndcgs111 = []
# ndcgs3 = []
ndcgs33=[]
# ndcgs333 = []

for iterator in range(test_set_size):
if config['use_cuda']:
@@ -39,7 +44,7 @@ def test(embedding,head, total_dataset, batch_size, num_epoch):
print("index error in test method")
continue

num_local_update = config['inner']
num_local_update = adaptation_step
learner = head.clone()
temp_sxs = embedding(supp_xs)
temp_qxs = embedding(query_xs)
@@ -50,29 +55,37 @@ def test(embedding,head, total_dataset, batch_size, num_epoch):
supp_ys,
query_ys,
config['inner'],
get_predictions=True
)
get_predictions=True)

l1 = L1Loss(reduction='mean')
loss_q = l1(predictions.view(-1), query_ys)
# print("testing - iterator:{} - l1:{} ".format(iterator,loss_q))
losses_q.append(float(loss_q))

predictions = predictions.view(-1)
y_true = query_ys.cpu().detach().numpy()
y_pred = predictions.cpu().detach().numpy()
ndcgs1.append(float(mz.metrics.NormalizedDiscountedCumulativeGain(k=1)(y_true, y_pred)))
ndcgs3.append(float(mz.metrics.NormalizedDiscountedCumulativeGain(k=3)(y_true, y_pred)))
# ndcgs1.append(float(mz.metrics.NormalizedDiscountedCumulativeGain(k=1)(y_true, y_pred)))
# ndcgs3.append(float(mz.metrics.NormalizedDiscountedCumulativeGain(k=3)(y_true, y_pred)))

ndcgs11.append(float(ndcg_score([y_true], [y_pred], k=1, sample_weight=None, ignore_ties=False)))
ndcgs33.append(float(ndcg_score([y_true], [y_pred], k=3, sample_weight=None, ignore_ties=False)))

del supp_xs, supp_ys, query_xs, query_ys, predictions, y_true, y_pred, loss_q
# torch.cuda.empty_cache()


# calculate metrics
# losses_q = torch.stack(losses_q).mean(0)
losses_q = np.array(losses_q).mean()
print("mean of mse: ", losses_q)
n1 = np.array(ndcgs1).mean()
print("nDCG1: ", n1)
n3 = np.array(ndcgs3).mean()
print("nDCG3: ", n3)
# n1 = np.array(ndcgs1).mean()
# print("nDCG1: ", n1)
print("nDCG1: ", np.array(ndcgs11).mean())
# print("nDCG1: ", np.array(ndcgs111).mean())
# n3 = np.array(ndcgs3).mean()
# print("nDCG3: ", n3)
print("nDCG3: ", np.array(ndcgs33).mean())
# print("nDCG3: ", np.array(ndcgs333).mean())



Loading…
Cancel
Save