| @@ -9,19 +9,14 @@ from torch.nn import functional as F | |||
| class ClustringModule(torch.nn.Module): | |||
| def __init__(self, config): | |||
| def __init__(self, config_param): | |||
| super(ClustringModule, self).__init__() | |||
| # self.h1_dim = 128 | |||
| self.h1_dim = config['cluster_h1_dim'] | |||
| # self.h2_dim = 64 | |||
| self.h2_dim = config['cluster_h2_dim'] | |||
| # self.final_dim = fc1_in_dim | |||
| # self.final_dim = 64 | |||
| self.final_dim = config['cluster_final_dim'] | |||
| # self.dropout_rate = 0 | |||
| self.dropout_rate = config['cluster_dropout_rate'] | |||
| self.h1_dim = config_param['cluster_h1_dim'] | |||
| self.h2_dim = config_param['cluster_h2_dim'] | |||
| self.final_dim = config_param['cluster_final_dim'] | |||
| self.dropout_rate = config_param['cluster_dropout_rate'] | |||
| layers = [nn.Linear(config['embedding_dim'] * 8 + 1, self.h1_dim), | |||
| layers = [nn.Linear(config_param['embedding_dim'] * 8 + 1, self.h1_dim), | |||
| torch.nn.Dropout(self.dropout_rate), | |||
| nn.ReLU(inplace=True), | |||
| # nn.BatchNorm1d(self.h1_dim), | |||
| @@ -32,12 +27,10 @@ class ClustringModule(torch.nn.Module): | |||
| nn.Linear(self.h2_dim, self.final_dim)] | |||
| self.input_to_hidden = nn.Sequential(*layers) | |||
| # self.clusters_k = 7 | |||
| self.clusters_k = config['cluster_k'] | |||
| self.clusters_k = config_param['cluster_k'] | |||
| self.embed_size = self.final_dim | |||
| self.array = nn.Parameter(init.xavier_uniform_(torch.FloatTensor(self.clusters_k, self.embed_size))) | |||
| # self.temperature = 1.0 | |||
| self.temperature = config['temperature'] | |||
| self.temperature = config_param['temperature'] | |||
| def aggregate(self, z_i): | |||
| return torch.mean(z_i, dim=0) | |||
| @@ -50,7 +43,6 @@ class ClustringModule(torch.nn.Module): | |||
| # todo : may be useless | |||
| mean_task = self.aggregate(task_embed) | |||
| # C_distribution, new_task_embed = self.memoryunit(mean_task) | |||
| res = torch.norm(mean_task - self.array, p=2, dim=1, keepdim=True) | |||
| res = torch.pow((res / self.temperature) + 1, (self.temperature + 1) / -2) | |||
| # 1*k | |||
| @@ -59,23 +51,23 @@ class ClustringModule(torch.nn.Module): | |||
| value = torch.mm(C, self.array) | |||
| # simple add operation | |||
| new_task_embed = value + mean_task | |||
| # calculate target distribution | |||
| return C, new_task_embed | |||
| class Trainer(torch.nn.Module): | |||
| def __init__(self, config, head=None): | |||
| def __init__(self, config_param, head=None): | |||
| super(Trainer, self).__init__() | |||
| fc1_in_dim = config['embedding_dim'] * 8 | |||
| fc2_in_dim = config['first_fc_hidden_dim'] | |||
| fc2_out_dim = config['second_fc_hidden_dim'] | |||
| fc1_in_dim = config_param['embedding_dim'] * 8 | |||
| fc2_in_dim = config_param['first_fc_hidden_dim'] | |||
| fc2_out_dim = config_param['second_fc_hidden_dim'] | |||
| self.fc1 = torch.nn.Linear(fc1_in_dim, fc2_in_dim) | |||
| self.fc2 = torch.nn.Linear(fc2_in_dim, fc2_out_dim) | |||
| self.linear_out = torch.nn.Linear(fc2_out_dim, 1) | |||
| # cluster module | |||
| self.cluster_module = ClustringModule(config) | |||
| self.cluster_module = ClustringModule(config_param) | |||
| # self.task_dim = fc1_in_dim | |||
| self.task_dim = config['cluster_final_dim'] | |||
| self.task_dim = config_param['cluster_final_dim'] | |||
| # transform task to weights | |||
| self.film_layer_1_beta = nn.Linear(self.task_dim, fc2_in_dim, bias=False) | |||
| self.film_layer_1_gamma = nn.Linear(self.task_dim, fc2_in_dim, bias=False) | |||
| @@ -84,7 +76,7 @@ class Trainer(torch.nn.Module): | |||
| # self.film_layer_3_beta = nn.Linear(self.task_dim, self.h3_dim, bias=False) | |||
| # self.film_layer_3_gamma = nn.Linear(self.task_dim, self.h3_dim, bias=False) | |||
| # self.dropout_rate = 0 | |||
| self.dropout_rate = config['trainer_dropout_rate'] | |||
| self.dropout_rate = config_param['trainer_dropout_rate'] | |||
| self.dropout = nn.Dropout(self.dropout_rate) | |||
| def aggregate(self, z_i): | |||
| @@ -9,15 +9,15 @@ import numpy as np | |||
| def main(num_samples, max_num_epochs=20, gpus_per_trial=2): | |||
| data_dir = os.path.abspath("/media/external_10TB/10TB/maheri/define_task_melu_data") | |||
| load_data(data_dir) | |||
| data_dir = os.path.abspath("/media/external_10TB/10TB/maheri/new_data_dir3") | |||
| config = { | |||
| # "l1": tune.sample_from(lambda _: 2 ** np.random.randint(2, 9)), | |||
| # "l2": tune.sample_from(lambda _: 2 ** np.random.randint(2, 9)), | |||
| # "lr": tune.loguniform(1e-4, 1e-1), | |||
| # "batch_size": tune.choice([2, 4, 8, 16]) | |||
| "transformer": tune.choice(['kronoker']), | |||
| "meta_algo": tune.choice(['gbml']), | |||
| "meta_algo": tune.choice(['gbml', 'metasgd']), | |||
| "first_order": tune.choice([False]), | |||
| "adapt_transform": tune.choice([True, False]), | |||
| # "local_lr":tune.choice([5e-6,5e-4,5e-3]), | |||
| @@ -25,7 +25,7 @@ def main(num_samples, max_num_epochs=20, gpus_per_trial=2): | |||
| "local_lr": tune.loguniform(5e-6, 5e-3), | |||
| "lr": tune.loguniform(5e-5, 5e-3), | |||
| "batch_size": tune.choice([16, 32, 64]), | |||
| "inner": tune.choice([7, 5, 4, 3, 1]), | |||
| "inner": tune.choice([1, 3, 5, 7]), | |||
| "test_state": tune.choice(["user_and_item_cold_state"]), | |||
| "embedding_dim": tune.choice([16, 32, 64]), | |||
| @@ -37,8 +37,22 @@ def main(num_samples, max_num_epochs=20, gpus_per_trial=2): | |||
| 'cluster_final_dim': tune.choice([64, 32]), | |||
| 'cluster_dropout_rate': tune.choice([0, 0.01, 0.1]), | |||
| 'cluster_k': tune.choice([3, 5, 7, 9, 11]), | |||
| 'temperature': tune.choice([0.1, 0.5, 1.0, 2.0, 10.0]), | |||
| 'temperature': tune.choice([0.001, 0.1, 0.5, 1.0, 2.0, 10.0]), | |||
| 'trainer_dropout_rate': tune.choice([0, 0.01, 0.1]), | |||
| 'use_cuda': tune.choice([True]), | |||
| # item | |||
| 'num_rate': tune.choice([6]), | |||
| 'num_genre': tune.choice([25]), | |||
| 'num_director': tune.choice([2186]), | |||
| 'num_actor': tune.choice([8030]), | |||
| # user | |||
| 'num_gender': tune.choice([2]), | |||
| 'num_age': tune.choice([7]), | |||
| 'num_occupation': tune.choice([21]), | |||
| 'num_zipcode': tune.choice([3402]), | |||
| 'num_epoch': tune.choice([30]), | |||
| } | |||
| scheduler = ASHAScheduler( | |||
| @@ -52,16 +66,15 @@ def main(num_samples, max_num_epochs=20, gpus_per_trial=2): | |||
| metric_columns=["loss", "ndcg1", "ndcg3", "training_iteration"]) | |||
| result = tune.run( | |||
| partial(train_melu, data_dir=data_dir), | |||
| resources_per_trial={"cpu": 4, "gpu": gpus_per_trial}, | |||
| resources_per_trial={"cpu": 8, "gpu": gpus_per_trial}, | |||
| config=config, | |||
| num_samples=num_samples, | |||
| scheduler=scheduler, | |||
| progress_reporter=reporter, | |||
| log_to_file=True, | |||
| # resume=True, | |||
| local_dir="./hyper_tunning_all_cold", | |||
| local_dir="./hyper_tunning_all_cold2", | |||
| name="melu_all_cold_clustered", | |||
| ) | |||
| best_trial = result.get_best_trial("loss", "min", "last") | |||
| @@ -97,4 +110,4 @@ def main(num_samples, max_num_epochs=20, gpus_per_trial=2): | |||
| if __name__ == "__main__": | |||
| # You can change the number of GPUs per trial here: | |||
| main(num_samples=150, max_num_epochs=25, gpus_per_trial=1) | |||
| main(num_samples=150, max_num_epochs=30, gpus_per_trial=1) | |||
| @@ -3,7 +3,7 @@ import torch | |||
| import torch.nn as nn | |||
| from ray import tune | |||
| import pickle | |||
| from options import config | |||
| # from options import config | |||
| from embedding_module import EmbeddingModule | |||
| import learn2learn as l2l | |||
| import random | |||
| @@ -47,7 +47,7 @@ def load_data(data_dir=None, test_state='warm_state'): | |||
| random.shuffle(test_dataset) | |||
| random.shuffle(trainset) | |||
| val_size = int(test_set_size * 0.2) | |||
| val_size = int(test_set_size * 0.3) | |||
| validationset = test_dataset[:val_size] | |||
| testset = test_dataset[val_size:] | |||
| @@ -55,18 +55,12 @@ def load_data(data_dir=None, test_state='warm_state'): | |||
| def train_melu(conf, checkpoint_dir=None, data_dir=None): | |||
| print("inajm1:", checkpoint_dir) | |||
| embedding_dim = conf['embedding_dim'] | |||
| fc1_in_dim = conf['embedding_dim'] * 8 | |||
| fc2_in_dim = conf['first_fc_hidden_dim'] | |||
| fc2_out_dim = conf['second_fc_hidden_dim'] | |||
| # fc1 = torch.nn.Linear(fc1_in_dim, fc2_in_dim) | |||
| # fc2 = torch.nn.Linear(fc2_in_dim, fc2_out_dim) | |||
| # linear_out = torch.nn.Linear(fc2_out_dim, 1) | |||
| # head = torch.nn.Sequential(fc1, fc2, linear_out) | |||
| emb = EmbeddingModule(config).cuda() | |||
| emb = EmbeddingModule(conf).cuda() | |||
| transform = None | |||
| if conf['transformer'] == "kronoker": | |||
| @@ -74,7 +68,7 @@ def train_melu(conf, checkpoint_dir=None, data_dir=None): | |||
| elif conf['transformer'] == "linear": | |||
| transform = l2l.optim.ModuleTransform(torch.nn.Linear) | |||
| trainer = Trainer(config) | |||
| trainer = Trainer(conf) | |||
| # define meta algorithm | |||
| if conf['meta_algo'] == "maml": | |||
| @@ -85,9 +79,7 @@ def train_melu(conf, checkpoint_dir=None, data_dir=None): | |||
| trainer = l2l.algorithms.GBML(trainer, transform=transform, lr=conf['local_lr'], | |||
| adapt_transform=conf['adapt_transform'], first_order=conf['first_order']) | |||
| trainer.cuda() | |||
| # net = nn.Sequential(emb, head) | |||
| criterion = nn.MSELoss() | |||
| all_parameters = list(emb.parameters()) + list(trainer.parameters()) | |||
| optimizer = torch.optim.Adam(all_parameters, lr=conf['lr']) | |||
| @@ -105,7 +97,7 @@ def train_melu(conf, checkpoint_dir=None, data_dir=None): | |||
| a, b, c, d = zip(*train_dataset) | |||
| for epoch in range(config['num_epoch']): # loop over the dataset multiple times | |||
| for epoch in range(conf['num_epoch']): # loop over the dataset multiple times | |||
| for i in range(num_batch): | |||
| optimizer.zero_grad() | |||
| meta_train_error = 0.0 | |||
| @@ -84,7 +84,7 @@ if __name__ == '__main__': | |||
| if config['use_cuda']: | |||
| os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" | |||
| os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu) | |||
| master_path = "/media/external_10TB/10TB/maheri/define_task_melu_data" | |||
| master_path = "/media/external_10TB/10TB/maheri/define_task_melu_data2" | |||
| config['master_path'] = master_path | |||
| # DATA GENERATION | |||
| @@ -61,7 +61,7 @@ def test(embedding, head, total_dataset, batch_size, num_epoch, test_state=None, | |||
| ndcgs3.append(float(ndcg_score([y_true], [y_pred], k=3, sample_weight=None, ignore_ties=False))) | |||
| del supp_xs, supp_ys, query_xs, query_ys, y_true, y_pred, loss_q, temp_sxs, temp_qxs, predictions, l1 | |||
| torch.cuda.empty_cache() | |||
| # torch.cuda.empty_cache() | |||
| # calculate metrics | |||
| losses_q = np.array(losses_q).mean() | |||