| class ClustringModule(torch.nn.Module): | class ClustringModule(torch.nn.Module): | ||||
| def __init__(self, config): | def __init__(self, config): | ||||
| super(ClustringModule, self).__init__() | super(ClustringModule, self).__init__() | ||||
| self.h1_dim = 128 | |||||
| self.h2_dim = 64 | |||||
| # self.h1_dim = 128 | |||||
| self.h1_dim = config['cluster_h1_dim'] | |||||
| # self.h2_dim = 64 | |||||
| self.h2_dim = config['cluster_h2_dim'] | |||||
| # self.final_dim = fc1_in_dim | # self.final_dim = fc1_in_dim | ||||
| self.final_dim = 64 | |||||
| self.dropout_rate = 0 | |||||
| # self.final_dim = 64 | |||||
| self.final_dim = config['cluster_final_dim'] | |||||
| # self.dropout_rate = 0 | |||||
| self.dropout_rate = config['cluster_dropout_rate'] | |||||
| layers = [nn.Linear(config['embedding_dim'] * 8 + 1, self.h1_dim), | layers = [nn.Linear(config['embedding_dim'] * 8 + 1, self.h1_dim), | ||||
| torch.nn.Dropout(self.dropout_rate), | torch.nn.Dropout(self.dropout_rate), | ||||
| nn.Linear(self.h2_dim, self.final_dim)] | nn.Linear(self.h2_dim, self.final_dim)] | ||||
| self.input_to_hidden = nn.Sequential(*layers) | self.input_to_hidden = nn.Sequential(*layers) | ||||
| self.clusters_k = 7 | |||||
| # self.clusters_k = 7 | |||||
| self.clusters_k = config['cluster_k'] | |||||
| self.embed_size = self.final_dim | self.embed_size = self.final_dim | ||||
| self.array = nn.Parameter(init.xavier_uniform_(torch.FloatTensor(self.clusters_k, self.embed_size))) | self.array = nn.Parameter(init.xavier_uniform_(torch.FloatTensor(self.clusters_k, self.embed_size))) | ||||
| self.temperature = 1.0 | |||||
| # self.temperature = 1.0 | |||||
| self.temperature = config['temperature'] | |||||
| def aggregate(self, z_i): | def aggregate(self, z_i): | ||||
| return torch.mean(z_i, dim=0) | return torch.mean(z_i, dim=0) | ||||
| # cluster module | # cluster module | ||||
| self.cluster_module = ClustringModule(config) | self.cluster_module = ClustringModule(config) | ||||
| # self.task_dim = fc1_in_dim | # self.task_dim = fc1_in_dim | ||||
| self.task_dim = 64 | |||||
| self.task_dim = config['cluster_final_dim'] | |||||
| # transform task to weights | # transform task to weights | ||||
| self.film_layer_1_beta = nn.Linear(self.task_dim, fc2_in_dim, bias=False) | self.film_layer_1_beta = nn.Linear(self.task_dim, fc2_in_dim, bias=False) | ||||
| self.film_layer_1_gamma = nn.Linear(self.task_dim, fc2_in_dim, bias=False) | self.film_layer_1_gamma = nn.Linear(self.task_dim, fc2_in_dim, bias=False) | ||||
| self.film_layer_2_gamma = nn.Linear(self.task_dim, fc2_out_dim, bias=False) | self.film_layer_2_gamma = nn.Linear(self.task_dim, fc2_out_dim, bias=False) | ||||
| # self.film_layer_3_beta = nn.Linear(self.task_dim, self.h3_dim, bias=False) | # self.film_layer_3_beta = nn.Linear(self.task_dim, self.h3_dim, bias=False) | ||||
| # self.film_layer_3_gamma = nn.Linear(self.task_dim, self.h3_dim, bias=False) | # self.film_layer_3_gamma = nn.Linear(self.task_dim, self.h3_dim, bias=False) | ||||
| self.dropout_rate = 0 | |||||
| # self.dropout_rate = 0 | |||||
| self.dropout_rate = config['trainer_dropout_rate'] | |||||
| self.dropout = nn.Dropout(self.dropout_rate) | self.dropout = nn.Dropout(self.dropout_rate) | ||||
| def aggregate(self, z_i): | def aggregate(self, z_i): |
| def main(num_samples, max_num_epochs=20, gpus_per_trial=2): | def main(num_samples, max_num_epochs=20, gpus_per_trial=2): | ||||
| data_dir = os.path.abspath("/media/external_10TB/10TB/maheri/melu_data5") | |||||
| data_dir = os.path.abspath("/media/external_10TB/10TB/maheri/define_task_melu_data") | |||||
| load_data(data_dir) | load_data(data_dir) | ||||
| config = { | config = { | ||||
| # "l1": tune.sample_from(lambda _: 2 ** np.random.randint(2, 9)), | # "l1": tune.sample_from(lambda _: 2 ** np.random.randint(2, 9)), | ||||
| # "lr": tune.loguniform(1e-4, 1e-1), | # "lr": tune.loguniform(1e-4, 1e-1), | ||||
| # "batch_size": tune.choice([2, 4, 8, 16]) | # "batch_size": tune.choice([2, 4, 8, 16]) | ||||
| "transformer": tune.choice(['kronoker']), | "transformer": tune.choice(['kronoker']), | ||||
| "meta_algo":tune.choice(['gbml']), | |||||
| "first_order":tune.choice([False]), | |||||
| "adapt_transform":tune.choice([True,False]), | |||||
| "meta_algo": tune.choice(['gbml']), | |||||
| "first_order": tune.choice([False]), | |||||
| "adapt_transform": tune.choice([True, False]), | |||||
| # "local_lr":tune.choice([5e-6,5e-4,5e-3]), | # "local_lr":tune.choice([5e-6,5e-4,5e-3]), | ||||
| # "lr":tune.choice([5e-5,5e-4]), | # "lr":tune.choice([5e-5,5e-4]), | ||||
| "local_lr":tune.loguniform(5e-6,5e-3), | |||||
| "lr":tune.loguniform(5e-5,5e-3), | |||||
| "batch_size":tune.choice([16,32,64]), | |||||
| "inner":tune.choice([7,5,4,3,1]), | |||||
| "test_state":tune.choice(["user_and_item_cold_state"]), | |||||
| # "epochs":tune.choice([5,10,20,25]), | |||||
| "local_lr": tune.loguniform(5e-6, 5e-3), | |||||
| "lr": tune.loguniform(5e-5, 5e-3), | |||||
| "batch_size": tune.choice([16, 32, 64]), | |||||
| "inner": tune.choice([7, 5, 4, 3, 1]), | |||||
| "test_state": tune.choice(["user_and_item_cold_state"]), | |||||
| "embedding_dim": tune.choice([16, 32, 64]), | |||||
| "first_fc_hidden_dim": tune.choice([32, 64, 128]), | |||||
| "second_fc_hidden_dim": tune.choice([32, 64]), | |||||
| 'cluster_h1_dim': tune.choice([256, 128, 64]), | |||||
| 'cluster_h2_dim': tune.choice([128, 64, 32]), | |||||
| 'cluster_final_dim': tune.choice([64, 32]), | |||||
| 'cluster_dropout_rate': tune.choice([0, 0.01, 0.1]), | |||||
| 'cluster_k': tune.choice([3, 5, 7, 9, 11]), | |||||
| 'temperature': tune.choice([0.1, 0.5, 1.0, 2.0, 10.0]), | |||||
| 'trainer_dropout_rate': tune.choice([0, 0.01, 0.1]), | |||||
| } | } | ||||
| scheduler = ASHAScheduler( | scheduler = ASHAScheduler( | ||||
| metric="loss", | metric="loss", | ||||
| mode="min", | mode="min", | ||||
| max_t=30, | max_t=30, | ||||
| grace_period=6, | |||||
| grace_period=10, | |||||
| reduction_factor=2) | reduction_factor=2) | ||||
| reporter = CLIReporter( | reporter = CLIReporter( | ||||
| # parameter_columns=["l1", "l2", "lr", "batch_size"], | # parameter_columns=["l1", "l2", "lr", "batch_size"], | ||||
| metric_columns=["loss", "ndcg1","ndcg3", "training_iteration"]) | |||||
| metric_columns=["loss", "ndcg1", "ndcg3", "training_iteration"]) | |||||
| result = tune.run( | result = tune.run( | ||||
| partial(train_melu, data_dir=data_dir), | partial(train_melu, data_dir=data_dir), | ||||
| resources_per_trial={"cpu": 4, "gpu": gpus_per_trial}, | resources_per_trial={"cpu": 4, "gpu": gpus_per_trial}, | ||||
| log_to_file=True, | log_to_file=True, | ||||
| # resume=True, | # resume=True, | ||||
| local_dir="./hyper_tunning_all_cold", | local_dir="./hyper_tunning_all_cold", | ||||
| name="melu_all_cold", | |||||
| name="melu_all_cold_clustered", | |||||
| ) | ) | ||||
| if __name__ == "__main__": | if __name__ == "__main__": | ||||
| # You can change the number of GPUs per trial here: | # You can change the number of GPUs per trial here: | ||||
| main(num_samples=150, max_num_epochs=25, gpus_per_trial=1) | |||||
| main(num_samples=150, max_num_epochs=25, gpus_per_trial=1) |
| from sklearn.metrics import ndcg_score | from sklearn.metrics import ndcg_score | ||||
| def hyper_test(embedding,head, total_dataset, adaptation_step): | |||||
| def hyper_test(embedding, head, total_dataset, adaptation_step): | |||||
| test_set_size = len(total_dataset) | test_set_size = len(total_dataset) | ||||
| random.shuffle(total_dataset) | random.shuffle(total_dataset) | ||||
| a, b, c, d = zip(*total_dataset) | a, b, c, d = zip(*total_dataset) | ||||
| losses_q = [] | losses_q = [] | ||||
| ndcgs11 = [] | ndcgs11 = [] | ||||
| ndcgs33=[] | |||||
| ndcgs33 = [] | |||||
| head.eval() | |||||
| for iterator in range(test_set_size): | for iterator in range(test_set_size): | ||||
| temp_sxs = embedding(supp_xs) | temp_sxs = embedding(supp_xs) | ||||
| temp_qxs = embedding(query_xs) | temp_qxs = embedding(query_xs) | ||||
| evaluation_error,predictions = fast_adapt(learner, | |||||
| temp_sxs, | |||||
| temp_qxs, | |||||
| supp_ys, | |||||
| query_ys, | |||||
| adaptation_step, | |||||
| get_predictions=True) | |||||
| evaluation_error, predictions = fast_adapt(learner, | |||||
| temp_sxs, | |||||
| temp_qxs, | |||||
| supp_ys, | |||||
| query_ys, | |||||
| adaptation_step, | |||||
| get_predictions=True) | |||||
| l1 = L1Loss(reduction='mean') | l1 = L1Loss(reduction='mean') | ||||
| loss_q = l1(predictions.view(-1), query_ys) | loss_q = l1(predictions.view(-1), query_ys) | ||||
| ndcg1 = 0 | ndcg1 = 0 | ||||
| ndcg3 = 0 | ndcg3 = 0 | ||||
| return losses_q,ndcg1,ndcg3 | |||||
| head.train() | |||||
| return losses_q, ndcg1, ndcg3 |
| import gc | import gc | ||||
| from learn2learn.optim.transforms import KroneckerTransform | from learn2learn.optim.transforms import KroneckerTransform | ||||
| from hyper_testing import hyper_test | from hyper_testing import hyper_test | ||||
| from clustering import Trainer | |||||
| # Define paths (for data) | |||||
| master_path= "/media/external_10TB/10TB/maheri/melu_data5" | |||||
| def load_data(data_dir=master_path,test_state='warm_state'): | |||||
| # Define paths (for data) | |||||
| # master_path= "/media/external_10TB/10TB/maheri/melu_data5" | |||||
| def load_data(data_dir=None, test_state='warm_state'): | |||||
| training_set_size = int(len(os.listdir("{}/warm_state".format(data_dir))) / 4) | training_set_size = int(len(os.listdir("{}/warm_state".format(data_dir))) / 4) | ||||
| supp_xs_s = [] | supp_xs_s = [] | ||||
| supp_ys_s = [] | supp_ys_s = [] | ||||
| del (supp_xs_s, supp_ys_s, query_xs_s, query_ys_s) | del (supp_xs_s, supp_ys_s, query_xs_s, query_ys_s) | ||||
| random.shuffle(test_dataset) | random.shuffle(test_dataset) | ||||
| random.shuffle(trainset) | |||||
| val_size = int(test_set_size * 0.2) | val_size = int(test_set_size * 0.2) | ||||
| validationset = test_dataset[:val_size] | validationset = test_dataset[:val_size] | ||||
| testset = test_dataset[val_size:] | testset = test_dataset[val_size:] | ||||
| return trainset, validationset,testset | |||||
| return trainset, validationset, testset | |||||
| def train_melu(conf, checkpoint_dir=None, data_dir=None): | def train_melu(conf, checkpoint_dir=None, data_dir=None): | ||||
| embedding_dim = config['embedding_dim'] | |||||
| print("inajm1:",checkpoint_dir) | |||||
| fc1_in_dim = config['embedding_dim'] * 8 | |||||
| fc2_in_dim = config['first_fc_hidden_dim'] | |||||
| fc2_out_dim = config['second_fc_hidden_dim'] | |||||
| print("inajm1:", checkpoint_dir) | |||||
| embedding_dim = conf['embedding_dim'] | |||||
| fc1_in_dim = conf['embedding_dim'] * 8 | |||||
| fc2_in_dim = conf['first_fc_hidden_dim'] | |||||
| fc2_out_dim = conf['second_fc_hidden_dim'] | |||||
| fc1 = torch.nn.Linear(fc1_in_dim, fc2_in_dim) | |||||
| fc2 = torch.nn.Linear(fc2_in_dim, fc2_out_dim) | |||||
| linear_out = torch.nn.Linear(fc2_out_dim, 1) | |||||
| head = torch.nn.Sequential(fc1, fc2, linear_out) | |||||
| # fc1 = torch.nn.Linear(fc1_in_dim, fc2_in_dim) | |||||
| # fc2 = torch.nn.Linear(fc2_in_dim, fc2_out_dim) | |||||
| # linear_out = torch.nn.Linear(fc2_out_dim, 1) | |||||
| # head = torch.nn.Sequential(fc1, fc2, linear_out) | |||||
| emb = EmbeddingModule(config).cuda() | emb = EmbeddingModule(config).cuda() | ||||
| elif conf['transformer'] == "linear": | elif conf['transformer'] == "linear": | ||||
| transform = l2l.optim.ModuleTransform(torch.nn.Linear) | transform = l2l.optim.ModuleTransform(torch.nn.Linear) | ||||
| trainer = Trainer(config) | |||||
| # define meta algorithm | # define meta algorithm | ||||
| if conf['meta_algo'] == "maml": | if conf['meta_algo'] == "maml": | ||||
| head = l2l.algorithms.MAML(head, lr=conf['local_lr'], first_order=conf['first_order']) | |||||
| trainer = l2l.algorithms.MAML(trainer, lr=conf['local_lr'], first_order=conf['first_order']) | |||||
| elif conf['meta_algo'] == 'metasgd': | elif conf['meta_algo'] == 'metasgd': | ||||
| head = l2l.algorithms.MetaSGD(head, lr=conf['local_lr'], first_order=conf['first_order']) | |||||
| trainer = l2l.algorithms.MetaSGD(trainer, lr=conf['local_lr'], first_order=conf['first_order']) | |||||
| elif conf['meta_algo'] == 'gbml': | elif conf['meta_algo'] == 'gbml': | ||||
| head = l2l.algorithms.GBML(head, transform=transform, lr=conf['local_lr'], | |||||
| trainer = l2l.algorithms.GBML(trainer, transform=transform, lr=conf['local_lr'], | |||||
| adapt_transform=conf['adapt_transform'], first_order=conf['first_order']) | adapt_transform=conf['adapt_transform'], first_order=conf['first_order']) | ||||
| head.cuda() | |||||
| net = nn.Sequential(emb,head) | |||||
| trainer.cuda() | |||||
| # net = nn.Sequential(emb, head) | |||||
| criterion = nn.MSELoss() | criterion = nn.MSELoss() | ||||
| all_parameters = list(emb.parameters()) + list(head.parameters()) | |||||
| all_parameters = list(emb.parameters()) + list(trainer.parameters()) | |||||
| optimizer = torch.optim.Adam(all_parameters, lr=conf['lr']) | optimizer = torch.optim.Adam(all_parameters, lr=conf['lr']) | ||||
| if checkpoint_dir: | if checkpoint_dir: | ||||
| model_state, optimizer_state = torch.load( | |||||
| os.path.join(checkpoint_dir, "checkpoint")) | |||||
| net.load_state_dict(model_state) | |||||
| optimizer.load_state_dict(optimizer_state) | |||||
| print("in checkpoint - bug happened") | |||||
| # model_state, optimizer_state = torch.load( | |||||
| # os.path.join(checkpoint_dir, "checkpoint")) | |||||
| # net.load_state_dict(model_state) | |||||
| # optimizer.load_state_dict(optimizer_state) | |||||
| # loading data | # loading data | ||||
| train_dataset,validation_dataset,test_dataset = load_data(data_dir,test_state=conf['test_state']) | |||||
| print(conf['test_state']) | |||||
| train_dataset, validation_dataset, test_dataset = load_data(data_dir, test_state=conf['test_state']) | |||||
| batch_size = conf['batch_size'] | batch_size = conf['batch_size'] | ||||
| num_batch = int(len(train_dataset) / batch_size) | num_batch = int(len(train_dataset) / batch_size) | ||||
| a, b, c, d = zip(*train_dataset) | a, b, c, d = zip(*train_dataset) | ||||
| for epoch in range(config['num_epoch']): # loop over the dataset multiple times | for epoch in range(config['num_epoch']): # loop over the dataset multiple times | ||||
| sys = supp_ys[task].cuda() | sys = supp_ys[task].cuda() | ||||
| qys = query_ys[task].cuda() | qys = query_ys[task].cuda() | ||||
| learner = head.clone() | |||||
| learner = trainer.clone() | |||||
| temp_sxs = emb(sxs) | temp_sxs = emb(sxs) | ||||
| temp_qxs = emb(qxs) | temp_qxs = emb(qxs) | ||||
| evaluation_error = fast_adapt(learner, | evaluation_error = fast_adapt(learner, | ||||
| temp_sxs, | |||||
| temp_qxs, | |||||
| sys, | |||||
| qys, | |||||
| conf['inner']) | |||||
| temp_sxs, | |||||
| temp_qxs, | |||||
| sys, | |||||
| qys, | |||||
| conf['inner']) | |||||
| evaluation_error.backward() | evaluation_error.backward() | ||||
| meta_train_error += evaluation_error.item() | meta_train_error += evaluation_error.item() | ||||
| del(sxs,qxs,sys,qys) | |||||
| del (sxs, qxs, sys, qys) | |||||
| supp_xs[task].cpu() | supp_xs[task].cpu() | ||||
| query_xs[task].cpu() | query_xs[task].cpu() | ||||
| supp_ys[task].cpu() | supp_ys[task].cpu() | ||||
| gc.collect() | gc.collect() | ||||
| # test results on the validation data | # test results on the validation data | ||||
| val_loss,val_ndcg1,val_ndcg3 = hyper_test(emb,head,validation_dataset,adaptation_step=conf['inner']) | |||||
| val_loss, val_ndcg1, val_ndcg3 = hyper_test(emb, trainer, validation_dataset, adaptation_step=conf['inner']) | |||||
| with tune.checkpoint_dir(epoch) as checkpoint_dir: | |||||
| path = os.path.join(checkpoint_dir, "checkpoint") | |||||
| torch.save((net.state_dict(), optimizer.state_dict()), path) | |||||
| # with tune.checkpoint_dir(epoch) as checkpoint_dir: | |||||
| # path = os.path.join(checkpoint_dir, "checkpoint") | |||||
| # torch.save((net.state_dict(), optimizer.state_dict()), path) | |||||
| tune.report(loss=val_loss, ndcg1=val_ndcg1,ndcg3=val_ndcg3) | |||||
| print("Finished Training") | |||||
| tune.report(loss=val_loss, ndcg1=val_ndcg1, ndcg3=val_ndcg3) | |||||
| print("Finished Training") |
| print("start of test phase") | print("start of test phase") | ||||
| trainer.eval() | trainer.eval() | ||||
| with open("results.txt", "a") as f: | |||||
| with open("results2.txt", "a") as f: | |||||
| f.write("epoch:{}\n".format(iteration)) | f.write("epoch:{}\n".format(iteration)) | ||||
| for test_state in ['user_cold_state', 'item_cold_state', 'user_and_item_cold_state']: | for test_state in ['user_cold_state', 'item_cold_state', 'user_and_item_cold_state']: | ||||
| print("===================== " + test_state + " =====================") | print("===================== " + test_state + " =====================") | ||||
| mse, ndc1, ndc3 = test(emb, trainer, test_dataset, batch_size=config['batch_size'],num_epoch=config['num_epoch'],test_state=test_state,args=args) | mse, ndc1, ndc3 = test(emb, trainer, test_dataset, batch_size=config['batch_size'],num_epoch=config['num_epoch'],test_state=test_state,args=args) | ||||
| with open("results.txt", "a") as f: | |||||
| with open("results2.txt", "a") as f: | |||||
| f.write("{}\t{}\t{}\n".format(mse, ndc1, ndc3)) | f.write("{}\t{}\t{}\n".format(mse, ndc1, ndc3)) | ||||
| print("===================================================") | print("===================================================") | ||||
| del (test_dataset) | del (test_dataset) | ||||
| gc.collect() | gc.collect() | ||||
| trainer.train() | trainer.train() | ||||
| with open("results.txt", "a") as f: | |||||
| with open("results2.txt", "a") as f: | |||||
| f.write("\n") | f.write("\n") | ||||
| print("\n\n\n") | print("\n\n\n") | ||||