3 years ago · 576c0e97f2
--- a/clustering.py
+++ b/clustering.py
 class ClustringModule(torch.nn.Module):
    def __init__(self, config):
    def __init__(self, config_param):
        super(ClustringModule, self).__init__()
        # self.h1_dim = 128
        self.h1_dim = config['cluster_h1_dim']
        # self.h2_dim = 64
        self.h2_dim = config['cluster_h2_dim']
        # self.final_dim = fc1_in_dim
        # self.final_dim = 64
        self.final_dim = config['cluster_final_dim']
        # self.dropout_rate = 0
        self.dropout_rate = config['cluster_dropout_rate']
        self.h1_dim = config_param['cluster_h1_dim']
        self.h2_dim = config_param['cluster_h2_dim']
        self.final_dim = config_param['cluster_final_dim']
        self.dropout_rate = config_param['cluster_dropout_rate']
        layers = [nn.Linear(config['embedding_dim'] * 8 + 1, self.h1_dim),
        layers = [nn.Linear(config_param['embedding_dim'] * 8 + 1, self.h1_dim),
                  torch.nn.Dropout(self.dropout_rate),
                  nn.ReLU(inplace=True),
                  # nn.BatchNorm1d(self.h1_dim),
                  nn.Linear(self.h2_dim, self.final_dim)]
        self.input_to_hidden = nn.Sequential(*layers)
        # self.clusters_k = 7
        self.clusters_k = config['cluster_k']
        self.clusters_k = config_param['cluster_k']
        self.embed_size = self.final_dim
        self.array = nn.Parameter(init.xavier_uniform_(torch.FloatTensor(self.clusters_k, self.embed_size)))
        # self.temperature = 1.0
        self.temperature = config['temperature']
        self.temperature = config_param['temperature']
    def aggregate(self, z_i):
        return torch.mean(z_i, dim=0)
        # todo : may be useless
        mean_task = self.aggregate(task_embed)
        # C_distribution, new_task_embed = self.memoryunit(mean_task)
        res = torch.norm(mean_task - self.array, p=2, dim=1, keepdim=True)
        res = torch.pow((res / self.temperature) + 1, (self.temperature + 1) / -2)
        # 1*k
        value = torch.mm(C, self.array)
        # simple add operation
        new_task_embed = value + mean_task
        # calculate target distribution
        return C, new_task_embed
 class Trainer(torch.nn.Module):
    def __init__(self, config, head=None):
    def __init__(self, config_param, head=None):
        super(Trainer, self).__init__()
        fc1_in_dim = config['embedding_dim'] * 8
        fc2_in_dim = config['first_fc_hidden_dim']
        fc2_out_dim = config['second_fc_hidden_dim']
        fc1_in_dim = config_param['embedding_dim'] * 8
        fc2_in_dim = config_param['first_fc_hidden_dim']
        fc2_out_dim = config_param['second_fc_hidden_dim']
        self.fc1 = torch.nn.Linear(fc1_in_dim, fc2_in_dim)
        self.fc2 = torch.nn.Linear(fc2_in_dim, fc2_out_dim)
        self.linear_out = torch.nn.Linear(fc2_out_dim, 1)
        # cluster module
        self.cluster_module = ClustringModule(config)
        self.cluster_module = ClustringModule(config_param)
        # self.task_dim = fc1_in_dim
        self.task_dim = config['cluster_final_dim']
        self.task_dim = config_param['cluster_final_dim']
        # transform task to weights
        self.film_layer_1_beta = nn.Linear(self.task_dim, fc2_in_dim, bias=False)
        self.film_layer_1_gamma = nn.Linear(self.task_dim, fc2_in_dim, bias=False)
        # self.film_layer_3_beta = nn.Linear(self.task_dim, self.h3_dim, bias=False)
        # self.film_layer_3_gamma = nn.Linear(self.task_dim, self.h3_dim, bias=False)
        # self.dropout_rate = 0
        self.dropout_rate = config['trainer_dropout_rate']
        self.dropout_rate = config_param['trainer_dropout_rate']
        self.dropout = nn.Dropout(self.dropout_rate)
    def aggregate(self, z_i):
--- a/hyper_main.py
+++ b/hyper_main.py
 def main(num_samples, max_num_epochs=20, gpus_per_trial=2):
    data_dir = os.path.abspath("/media/external_10TB/10TB/maheri/define_task_melu_data")
    load_data(data_dir)
    data_dir = os.path.abspath("/media/external_10TB/10TB/maheri/new_data_dir3")
    config = {
        # "l1": tune.sample_from(lambda _: 2 ** np.random.randint(2, 9)),
        # "l2": tune.sample_from(lambda _: 2 ** np.random.randint(2, 9)),
        # "lr": tune.loguniform(1e-4, 1e-1),
        # "batch_size": tune.choice([2, 4, 8, 16])
        "transformer": tune.choice(['kronoker']),
        "meta_algo": tune.choice(['gbml']),
        "meta_algo": tune.choice(['gbml', 'metasgd']),
        "first_order": tune.choice([False]),
        "adapt_transform": tune.choice([True, False]),
        # "local_lr":tune.choice([5e-6,5e-4,5e-3]),
        "local_lr": tune.loguniform(5e-6, 5e-3),
        "lr": tune.loguniform(5e-5, 5e-3),
        "batch_size": tune.choice([16, 32, 64]),
        "inner": tune.choice([7, 5, 4, 3, 1]),
        "inner": tune.choice([1, 3, 5, 7]),
        "test_state": tune.choice(["user_and_item_cold_state"]),
        "embedding_dim": tune.choice([16, 32, 64]),
        'cluster_final_dim': tune.choice([64, 32]),
        'cluster_dropout_rate': tune.choice([0, 0.01, 0.1]),
        'cluster_k': tune.choice([3, 5, 7, 9, 11]),
        'temperature': tune.choice([0.1, 0.5, 1.0, 2.0, 10.0]),
        'temperature': tune.choice([0.001, 0.1, 0.5, 1.0, 2.0, 10.0]),
        'trainer_dropout_rate': tune.choice([0, 0.01, 0.1]),
        'use_cuda': tune.choice([True]),
        # item
        'num_rate': tune.choice([6]),
        'num_genre': tune.choice([25]),
        'num_director': tune.choice([2186]),
        'num_actor': tune.choice([8030]),
        # user
        'num_gender': tune.choice([2]),
        'num_age': tune.choice([7]),
        'num_occupation': tune.choice([21]),
        'num_zipcode': tune.choice([3402]),
        'num_epoch': tune.choice([30]),
    }
    scheduler = ASHAScheduler(
        metric_columns=["loss", "ndcg1", "ndcg3", "training_iteration"])
    result = tune.run(
        partial(train_melu, data_dir=data_dir),
        resources_per_trial={"cpu": 4, "gpu": gpus_per_trial},
        resources_per_trial={"cpu": 8, "gpu": gpus_per_trial},
        config=config,
        num_samples=num_samples,
        scheduler=scheduler,
        progress_reporter=reporter,
        log_to_file=True,
        # resume=True,
        local_dir="./hyper_tunning_all_cold",
        local_dir="./hyper_tunning_all_cold2",
        name="melu_all_cold_clustered",
    )
    best_trial = result.get_best_trial("loss", "min", "last")
 if __name__ == "__main__":
    # You can change the number of GPUs per trial here:
    main(num_samples=150, max_num_epochs=25, gpus_per_trial=1)
    main(num_samples=150, max_num_epochs=30, gpus_per_trial=1)
--- a/hyper_tunning.py
+++ b/hyper_tunning.py
 import torch.nn as nn
 from ray import tune
 import pickle
 from options import config
 # from options import config
 from embedding_module import EmbeddingModule
 import learn2learn as l2l
 import random
    random.shuffle(test_dataset)
    random.shuffle(trainset)
    val_size = int(test_set_size * 0.2)
    val_size = int(test_set_size * 0.3)
    validationset = test_dataset[:val_size]
    testset = test_dataset[val_size:]
 def train_melu(conf, checkpoint_dir=None, data_dir=None):
    print("inajm1:", checkpoint_dir)
    embedding_dim = conf['embedding_dim']
    fc1_in_dim = conf['embedding_dim'] * 8
    fc2_in_dim = conf['first_fc_hidden_dim']
    fc2_out_dim = conf['second_fc_hidden_dim']
    # fc1 = torch.nn.Linear(fc1_in_dim, fc2_in_dim)
    # fc2 = torch.nn.Linear(fc2_in_dim, fc2_out_dim)
    # linear_out = torch.nn.Linear(fc2_out_dim, 1)
    # head = torch.nn.Sequential(fc1, fc2, linear_out)
    emb = EmbeddingModule(config).cuda()
    emb = EmbeddingModule(conf).cuda()
    transform = None
    if conf['transformer'] == "kronoker":
    elif conf['transformer'] == "linear":
        transform = l2l.optim.ModuleTransform(torch.nn.Linear)
    trainer = Trainer(config)
    trainer = Trainer(conf)
    # define meta algorithm
    if conf['meta_algo'] == "maml":
        trainer = l2l.algorithms.GBML(trainer, transform=transform, lr=conf['local_lr'],
                                   adapt_transform=conf['adapt_transform'], first_order=conf['first_order'])
    trainer.cuda()
    # net = nn.Sequential(emb, head)
    criterion = nn.MSELoss()
    all_parameters = list(emb.parameters()) + list(trainer.parameters())
    optimizer = torch.optim.Adam(all_parameters, lr=conf['lr'])
    a, b, c, d = zip(*train_dataset)
    for epoch in range(config['num_epoch']):  # loop over the dataset multiple times
    for epoch in range(conf['num_epoch']):  # loop over the dataset multiple times
        for i in range(num_batch):
            optimizer.zero_grad()
            meta_train_error = 0.0
--- a/learnToLearn.py
+++ b/learnToLearn.py
    if config['use_cuda']:
        os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
        os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu)
    master_path = "/media/external_10TB/10TB/maheri/define_task_melu_data"
    master_path = "/media/external_10TB/10TB/maheri/define_task_melu_data2"
    config['master_path'] = master_path
    # DATA GENERATION
--- a/learnToLearnTest.py
+++ b/learnToLearnTest.py
        ndcgs3.append(float(ndcg_score([y_true], [y_pred], k=3, sample_weight=None, ignore_ties=False)))
        del supp_xs, supp_ys, query_xs, query_ys, y_true, y_pred, loss_q, temp_sxs, temp_qxs, predictions, l1
        torch.cuda.empty_cache()
        # torch.cuda.empty_cache()
    # calculate metrics
    losses_q = np.array(losses_q).mean()