Browse Source

generic algorithms and parameters

master
mohamad maheri 2 years ago
parent
commit
a23a4ec823
2 changed files with 270 additions and 173 deletions
  1. 245
    161
      learnToLearn.py
  2. 25
    12
      learnToLearnTest.py

+ 245
- 161
learnToLearn.py View File

import os import os
import torch import torch
import pickle import pickle

from MeLU import MeLU
from options import config from options import config
from model_training import training
from data_generation import generate from data_generation import generate
from evidence_candidate import selection
from model_test import test
from embedding_module import EmbeddingModule from embedding_module import EmbeddingModule

import learn2learn as l2l import learn2learn as l2l
from embeddings import item, user
import random import random
import numpy as np
from learnToLearnTest import test from learnToLearnTest import test
from fast_adapt import fast_adapt from fast_adapt import fast_adapt
import gc import gc

if config['use_cuda']:
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
master_path= "/media/external_10TB/10TB/maheri/melu_data5"

# DATA GENERATION
print("DATA GENERATION PHASE")
if not os.path.exists("{}/".format(master_path)):
os.mkdir("{}/".format(master_path))
# preparing dataset. It needs about 22GB of your hard disk space.
generate(master_path)

# TRAINING
print("TRAINING PHASE")
embedding_dim = config['embedding_dim']
fc1_in_dim = config['embedding_dim'] * 8
fc2_in_dim = config['first_fc_hidden_dim']
fc2_out_dim = config['second_fc_hidden_dim']
use_cuda = config['use_cuda']

fc1 = torch.nn.Linear(fc1_in_dim, fc2_in_dim)
fc2 = torch.nn.Linear(fc2_in_dim, fc2_out_dim)
linear_out = torch.nn.Linear(fc2_out_dim, 1)
head = torch.nn.Sequential(fc1,fc2,linear_out)

if use_cuda:
emb = EmbeddingModule(config).cuda()
else:
emb = EmbeddingModule(config)

# META LEARNING
print("META LEARNING PHASE")
# head = l2l.algorithms.MetaSGD(head, lr=config['local_lr'],first_order=True)
transform = l2l.optim.ModuleTransform(torch.nn.Linear)
head = l2l.algorithms.GBML(head , transform=transform , lr=config['local_lr'] , adapt_transform=True,first_order=False)

if use_cuda:
head.cuda()

# Setup optimization
print("SETUP OPTIMIZATION PHASE")
all_parameters = list(emb.parameters()) + list(head.parameters())
optimizer = torch.optim.Adam(all_parameters, lr=config['lr'])
# loss = torch.nn.MSELoss(reduction='mean')

# Load training dataset.
print("LOAD DATASET PHASE")
training_set_size = int(len(os.listdir("{}/warm_state".format(master_path))) / 4)
supp_xs_s = []
supp_ys_s = []
query_xs_s = []
query_ys_s = []
for idx in range(training_set_size):
supp_xs_s.append(pickle.load(open("{}/warm_state/supp_x_{}.pkl".format(master_path, idx), "rb")))
supp_ys_s.append(pickle.load(open("{}/warm_state/supp_y_{}.pkl".format(master_path, idx), "rb")))
query_xs_s.append(pickle.load(open("{}/warm_state/query_x_{}.pkl".format(master_path, idx), "rb")))
query_ys_s.append(pickle.load(open("{}/warm_state/query_y_{}.pkl".format(master_path, idx), "rb")))
total_dataset = list(zip(supp_xs_s, supp_ys_s, query_xs_s, query_ys_s))
del(supp_xs_s, supp_ys_s, query_xs_s, query_ys_s)
training_set_size = len(total_dataset)
batch_size = config['batch_size']
# torch.cuda.empty_cache()

random.shuffle(total_dataset)
num_batch = int(training_set_size / batch_size)
a, b, c, d = zip(*total_dataset)

print("\n\n\n")
for iteration in range(config['num_epoch']):
for i in range(num_batch):
optimizer.zero_grad()
meta_train_error = 0.0
meta_train_accuracy = 0.0
meta_valid_error = 0.0
meta_valid_accuracy = 0.0
meta_test_error = 0.0
meta_test_accuracy = 0.0

print("EPOCH: ", iteration, " BATCH: ", i)
supp_xs = list(a[batch_size * i:batch_size * (i + 1)])
supp_ys = list(b[batch_size * i:batch_size * (i + 1)])
query_xs = list(c[batch_size * i:batch_size * (i + 1)])
query_ys = list(d[batch_size * i:batch_size * (i + 1)])
batch_sz = len(supp_xs)

if use_cuda:
for j in range(batch_size):
supp_xs[j] = supp_xs[j].cuda()
supp_ys[j] = supp_ys[j].cuda()
query_xs[j] = query_xs[j].cuda()
query_ys[j] = query_ys[j].cuda()

for task in range(batch_sz):
# print("EPOCH: ", iteration," BATCH: ",i, "TASK: ",task)
# Compute meta-training loss
learner = head.clone()
temp_sxs = emb(supp_xs[task])
temp_qxs = emb(query_xs[task])

evaluation_error = fast_adapt(learner,
temp_sxs,
temp_qxs,
supp_ys[task],
query_ys[task],
config['inner']
)

evaluation_error.backward()
meta_train_error += evaluation_error.item()


# Print some metrics
print('Iteration', iteration)
print('Meta Train Error', meta_train_error / batch_sz)
# print('Meta Train Accuracy', meta_train_accuracy / batch_sz)
# print('Meta Valid Error', meta_valid_error / batch_sz)
# print('Meta Valid Accuracy', meta_valid_accuracy / batch_sz)
# print('Meta Test Error', meta_test_error / batch_sz)
# print('Meta Test Accuracy', meta_test_accuracy / batch_sz)

# Average the accumulated gradients and optimize
for p in all_parameters:
p.grad.data.mul_(1.0 / batch_sz)
optimizer.step()

# torch.cuda.empty_cache()
del(supp_xs,supp_ys,query_xs,query_ys)
gc.collect()

print("===============================================\n")


# save model
final_model = torch.nn.Sequential(emb,head)
torch.save(final_model.state_dict(), master_path + "/models_gbml.pkl")

# testing
print("start of test phase")
for test_state in ['warm_state', 'user_cold_state', 'item_cold_state', 'user_and_item_cold_state']:
test_dataset = None
test_set_size = int(len(os.listdir("{}/{}".format(master_path, test_state))) / 4)
from learn2learn.optim.transforms import KroneckerTransform
import argparse

def parse_args():
print("==============")
parser = argparse.ArgumentParser([], description='Fast Context Adaptation via Meta-Learning (CAVIA),'
'Clasification experiments.')
print("==============\n")

parser.add_argument('--seed', type=int, default=53)
parser.add_argument('--task', type=str, default='multi', help='problem setting: sine or celeba')
parser.add_argument('--tasks_per_metaupdate', type=int, default=32,
help='number of tasks in each batch per meta-update')

parser.add_argument('--lr_inner', type=float, default=5e-6, help='inner-loop learning rate (per task)')
parser.add_argument('--lr_meta', type=float, default=5e-5,
help='outer-loop learning rate (used with Adam optimiser)')
# parser.add_argument('--lr_meta_decay', type=float, default=0.9, help='decay factor for meta learning rate')

parser.add_argument('--inner', type=int, default=5,
help='number of gradient steps in inner loop (during training)')
parser.add_argument('--inner_eval', type=int, default=5,
help='number of gradient updates at test time (for evaluation)')

parser.add_argument('--first_order', action='store_true', default=False,
help='run first order approximation of CAVIA')
parser.add_argument('--adapt_transform', action='store_true', default=False,
help='run adaptation transform')
parser.add_argument('--transformer', type=str, default="kronoker",
help='transformer type')
parser.add_argument('--meta_algo', type=str, default="gbml",
help='MAML/MetaSGD/GBML')
parser.add_argument('--gpu', type=int, default=0,
help='number of gpu to run the code')



# parser.add_argument('--data_root', type=str, default="./movielens/ml-1m", help='path to data root')
# parser.add_argument('--num_workers', type=int, default=4, help='num of workers to use')
# parser.add_argument('--test', action='store_true', default=False, help='num of workers to use')

# parser.add_argument('--embedding_dim', type=int, default=32, help='num of workers to use')
# parser.add_argument('--first_fc_hidden_dim', type=int, default=64, help='num of workers to use')
# parser.add_argument('--second_fc_hidden_dim', type=int, default=64, help='num of workers to use')
# parser.add_argument('--num_epoch', type=int, default=30, help='num of workers to use')
# parser.add_argument('--num_genre', type=int, default=25, help='num of workers to use')
# parser.add_argument('--num_director', type=int, default=2186, help='num of workers to use')
# parser.add_argument('--num_actor', type=int, default=8030, help='num of workers to use')
# parser.add_argument('--num_rate', type=int, default=6, help='num of workers to use')
# parser.add_argument('--num_gender', type=int, default=2, help='num of workers to use')
# parser.add_argument('--num_age', type=int, default=7, help='num of workers to use')
# parser.add_argument('--num_occupation', type=int, default=21, help='num of workers to use')
# parser.add_argument('--num_zipcode', type=int, default=3402, help='num of workers to use')

# parser.add_argument('--rerun', action='store_true', default=False,
# help='Re-run experiment (will override previously saved results)')

args = parser.parse_args()
# use the GPU if available
# args.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# print('Running on device: {}'.format(args.device))
return args

if __name__ == '__main__':
args = parse_args()
print(args)

if config['use_cuda']:
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu)
master_path= "/media/external_10TB/10TB/maheri/melu_data5"

# DATA GENERATION
print("DATA GENERATION PHASE")
if not os.path.exists("{}/".format(master_path)):
os.mkdir("{}/".format(master_path))
# preparing dataset. It needs about 22GB of your hard disk space.
generate(master_path)

# TRAINING
print("TRAINING PHASE")
embedding_dim = config['embedding_dim']
fc1_in_dim = config['embedding_dim'] * 8
fc2_in_dim = config['first_fc_hidden_dim']
fc2_out_dim = config['second_fc_hidden_dim']
use_cuda = config['use_cuda']

fc1 = torch.nn.Linear(fc1_in_dim, fc2_in_dim)
fc2 = torch.nn.Linear(fc2_in_dim, fc2_out_dim)
linear_out = torch.nn.Linear(fc2_out_dim, 1)
head = torch.nn.Sequential(fc1,fc2,linear_out)

if use_cuda:
emb = EmbeddingModule(config).cuda()
else:
emb = EmbeddingModule(config)

# META LEARNING
print("META LEARNING PHASE")
# head = l2l.algorithms.MetaSGD(head, lr=config['local_lr'],first_order=True)

# define transformer
transform = None
if args.transformer == "kronoker":
transform = KroneckerTransform(l2l.nn.KroneckerLinear)
elif args.transformer == "linear":
transform = l2l.optim.ModuleTransform(torch.nn.Linear)

# define meta algorithm
if args.meta_algo == "maml":
head = l2l.algorithms.MAML(head, lr=config['local_lr'],first_order=args.first_order)
elif args.meta_algo == 'metasgd':
head = l2l.algorithms.MetaSGD(head, lr=config['local_lr'],first_order=args.first_order)
elif args.meta_algo == 'gbml':
head = l2l.algorithms.GBML(head, transform=transform, lr=config['local_lr'],adapt_transform=args.adapt_transform, first_order=args.first_order)

if use_cuda:
head.cuda()

# Setup optimization
print("SETUP OPTIMIZATION PHASE")
all_parameters = list(emb.parameters()) + list(head.parameters())
optimizer = torch.optim.Adam(all_parameters, lr=config['lr'])
# loss = torch.nn.MSELoss(reduction='mean')

# Load training dataset.
print("LOAD DATASET PHASE")
training_set_size = int(len(os.listdir("{}/warm_state".format(master_path))) / 4)
supp_xs_s = [] supp_xs_s = []
supp_ys_s = [] supp_ys_s = []
query_xs_s = [] query_xs_s = []
query_ys_s = [] query_ys_s = []
for idx in range(test_set_size):
supp_xs_s.append(pickle.load(open("{}/{}/supp_x_{}.pkl".format(master_path, test_state, idx), "rb")))
supp_ys_s.append(pickle.load(open("{}/{}/supp_y_{}.pkl".format(master_path, test_state, idx), "rb")))
query_xs_s.append(pickle.load(open("{}/{}/query_x_{}.pkl".format(master_path, test_state, idx), "rb")))
query_ys_s.append(pickle.load(open("{}/{}/query_y_{}.pkl".format(master_path, test_state, idx), "rb")))
test_dataset = list(zip(supp_xs_s, supp_ys_s, query_xs_s, query_ys_s))
del (supp_xs_s, supp_ys_s, query_xs_s, query_ys_s)

print("===================== " + test_state + " =====================")
test(emb,head, test_dataset, batch_size=config['batch_size'], num_epoch=config['num_epoch'])
print("===================================================\n\n\n")
for idx in range(training_set_size):
supp_xs_s.append(pickle.load(open("{}/warm_state/supp_x_{}.pkl".format(master_path, idx), "rb")))
supp_ys_s.append(pickle.load(open("{}/warm_state/supp_y_{}.pkl".format(master_path, idx), "rb")))
query_xs_s.append(pickle.load(open("{}/warm_state/query_x_{}.pkl".format(master_path, idx), "rb")))
query_ys_s.append(pickle.load(open("{}/warm_state/query_y_{}.pkl".format(master_path, idx), "rb")))
total_dataset = list(zip(supp_xs_s, supp_ys_s, query_xs_s, query_ys_s))
del(supp_xs_s, supp_ys_s, query_xs_s, query_ys_s)
training_set_size = len(total_dataset)
batch_size = config['batch_size']
# torch.cuda.empty_cache()

random.shuffle(total_dataset)
num_batch = int(training_set_size / batch_size)
a, b, c, d = zip(*total_dataset)

print("\n\n\n")
for iteration in range(config['num_epoch']):
for i in range(num_batch):
optimizer.zero_grad()
meta_train_error = 0.0
meta_train_accuracy = 0.0
meta_valid_error = 0.0
meta_valid_accuracy = 0.0
meta_test_error = 0.0
meta_test_accuracy = 0.0

print("EPOCH: ", iteration, " BATCH: ", i)
supp_xs = list(a[batch_size * i:batch_size * (i + 1)])
supp_ys = list(b[batch_size * i:batch_size * (i + 1)])
query_xs = list(c[batch_size * i:batch_size * (i + 1)])
query_ys = list(d[batch_size * i:batch_size * (i + 1)])
batch_sz = len(supp_xs)

# if use_cuda:
# for j in range(batch_size):
# supp_xs[j] = supp_xs[j].cuda()
# supp_ys[j] = supp_ys[j].cuda()
# query_xs[j] = query_xs[j].cuda()
# query_ys[j] = query_ys[j].cuda()

for task in range(batch_sz):
# print("EPOCH: ", iteration," BATCH: ",i, "TASK: ",task)
# Compute meta-training loss
# if use_cuda:
sxs = supp_xs[task].cuda()
qxs = query_xs[task].cuda()
sys = supp_ys[task].cuda()
qys = query_ys[task].cuda()

learner = head.clone()
temp_sxs = emb(sxs)
temp_qxs = emb(qxs)

evaluation_error = fast_adapt(learner,
temp_sxs,
temp_qxs,
sys,
qys,
args.inner)
# config['inner'])

evaluation_error.backward()
meta_train_error += evaluation_error.item()

del(sxs,qxs,sys,qys)
supp_xs[task].cpu()
query_xs[task].cpu()
supp_ys[task].cpu()
query_ys[task].cpu()


# Print some metrics
print('Iteration', iteration)
print('Meta Train Error', meta_train_error / batch_sz)
# print('Meta Train Accuracy', meta_train_accuracy / batch_sz)
# print('Meta Valid Error', meta_valid_error / batch_sz)
# print('Meta Valid Accuracy', meta_valid_accuracy / batch_sz)
# print('Meta Test Error', meta_test_error / batch_sz)
# print('Meta Test Accuracy', meta_test_accuracy / batch_sz)

# Average the accumulated gradients and optimize
for p in all_parameters:
p.grad.data.mul_(1.0 / batch_sz)
optimizer.step()

# torch.cuda.empty_cache()
del(supp_xs,supp_ys,query_xs,query_ys)
gc.collect()
print("===============================================\n")


# save model
final_model = torch.nn.Sequential(emb,head)
torch.save(final_model.state_dict(), master_path + "/models_gbml.pkl")

# testing
print("start of test phase")
for test_state in ['warm_state', 'user_cold_state', 'item_cold_state', 'user_and_item_cold_state']:
test_dataset = None
test_set_size = int(len(os.listdir("{}/{}".format(master_path, test_state))) / 4)
supp_xs_s = []
supp_ys_s = []
query_xs_s = []
query_ys_s = []
for idx in range(test_set_size):
supp_xs_s.append(pickle.load(open("{}/{}/supp_x_{}.pkl".format(master_path, test_state, idx), "rb")))
supp_ys_s.append(pickle.load(open("{}/{}/supp_y_{}.pkl".format(master_path, test_state, idx), "rb")))
query_xs_s.append(pickle.load(open("{}/{}/query_x_{}.pkl".format(master_path, test_state, idx), "rb")))
query_ys_s.append(pickle.load(open("{}/{}/query_y_{}.pkl".format(master_path, test_state, idx), "rb")))
test_dataset = list(zip(supp_xs_s, supp_ys_s, query_xs_s, query_ys_s))
del (supp_xs_s, supp_ys_s, query_xs_s, query_ys_s)

print("===================== " + test_state + " =====================")
test(emb,head, test_dataset, batch_size=config['batch_size'], num_epoch=config['num_epoch'],adaptation_step=args.inner_eval)
print("===================================================\n\n\n")
print(args)








+ 25
- 12
learnToLearnTest.py View File

import matchzoo as mz import matchzoo as mz
import numpy as np import numpy as np
from fast_adapt import fast_adapt from fast_adapt import fast_adapt
from sklearn.metrics import ndcg_score




def test(embedding,head, total_dataset, batch_size, num_epoch):
def test(embedding,head, total_dataset, batch_size, num_epoch,adaptation_step=config['inner']):


test_set_size = len(total_dataset) test_set_size = len(total_dataset)
random.shuffle(total_dataset) random.shuffle(total_dataset)
a, b, c, d = zip(*total_dataset) a, b, c, d = zip(*total_dataset)
losses_q = [] losses_q = []
ndcgs1 = []
ndcgs3 = []
# ndcgs1 = []
ndcgs11 = []
# ndcgs111 = []
# ndcgs3 = []
ndcgs33=[]
# ndcgs333 = []


for iterator in range(test_set_size): for iterator in range(test_set_size):
if config['use_cuda']: if config['use_cuda']:
print("index error in test method") print("index error in test method")
continue continue


num_local_update = config['inner']
num_local_update = adaptation_step
learner = head.clone() learner = head.clone()
temp_sxs = embedding(supp_xs) temp_sxs = embedding(supp_xs)
temp_qxs = embedding(query_xs) temp_qxs = embedding(query_xs)
supp_ys, supp_ys,
query_ys, query_ys,
config['inner'], config['inner'],
get_predictions=True
)
get_predictions=True)


l1 = L1Loss(reduction='mean') l1 = L1Loss(reduction='mean')
loss_q = l1(predictions.view(-1), query_ys) loss_q = l1(predictions.view(-1), query_ys)
# print("testing - iterator:{} - l1:{} ".format(iterator,loss_q)) # print("testing - iterator:{} - l1:{} ".format(iterator,loss_q))
losses_q.append(float(loss_q)) losses_q.append(float(loss_q))


predictions = predictions.view(-1)
y_true = query_ys.cpu().detach().numpy() y_true = query_ys.cpu().detach().numpy()
y_pred = predictions.cpu().detach().numpy() y_pred = predictions.cpu().detach().numpy()
ndcgs1.append(float(mz.metrics.NormalizedDiscountedCumulativeGain(k=1)(y_true, y_pred)))
ndcgs3.append(float(mz.metrics.NormalizedDiscountedCumulativeGain(k=3)(y_true, y_pred)))
# ndcgs1.append(float(mz.metrics.NormalizedDiscountedCumulativeGain(k=1)(y_true, y_pred)))
# ndcgs3.append(float(mz.metrics.NormalizedDiscountedCumulativeGain(k=3)(y_true, y_pred)))

ndcgs11.append(float(ndcg_score([y_true], [y_pred], k=1, sample_weight=None, ignore_ties=False)))
ndcgs33.append(float(ndcg_score([y_true], [y_pred], k=3, sample_weight=None, ignore_ties=False)))


del supp_xs, supp_ys, query_xs, query_ys, predictions, y_true, y_pred, loss_q del supp_xs, supp_ys, query_xs, query_ys, predictions, y_true, y_pred, loss_q
# torch.cuda.empty_cache() # torch.cuda.empty_cache()



# calculate metrics # calculate metrics
# losses_q = torch.stack(losses_q).mean(0) # losses_q = torch.stack(losses_q).mean(0)
losses_q = np.array(losses_q).mean() losses_q = np.array(losses_q).mean()
print("mean of mse: ", losses_q) print("mean of mse: ", losses_q)
n1 = np.array(ndcgs1).mean()
print("nDCG1: ", n1)
n3 = np.array(ndcgs3).mean()
print("nDCG3: ", n3)
# n1 = np.array(ndcgs1).mean()
# print("nDCG1: ", n1)
print("nDCG1: ", np.array(ndcgs11).mean())
# print("nDCG1: ", np.array(ndcgs111).mean())
# n3 = np.array(ndcgs3).mean()
# print("nDCG3: ", n3)
print("nDCG3: ", np.array(ndcgs33).mean())
# print("nDCG3: ", np.array(ndcgs333).mean())





Loading…
Cancel
Save