This reverts commit 576c0e97

3 years ago · 3a3b256e65
--- a/clustering.py
+++ b/clustering.py
@@ -50,46 +50,9 @@ class ClustringModule(torch.nn.Module):
        # 1*k, k*d, 1*d
        value = torch.mm(C, self.array)
        # simple add operation
        # new_task_embed = value + mean_task
        # new_task_embed = value
        new_task_embed = mean_task

        # print("injam1:", new_task_embed)
        # print("injam2:", self.array)
        list_dist = []
        # list_dist = torch.norm(new_task_embed - self.array, p=2, dim=1,keepdim=True)
        list_dist = torch.sum(torch.pow(new_task_embed - self.array,2),dim=1)
        stack_dist = list_dist

        # print("injam3:", stack_dist)

        ## Second, find the minimum squared distance for softmax normalization
        min_dist = min(list_dist)
        # print("injam4:", min_dist)

        ## Third, compute exponentials shifted with min_dist to avoid underflow (0/0) issues in softmaxes
        alpha = config['kmeans_alpha']  # Placeholder tensor for alpha
        list_exp = []
        for i in range(self.clusters_k):
            exp = torch.exp(-alpha * (stack_dist[i] - min_dist))
            list_exp.append(exp)
        stack_exp = torch.stack(list_exp)
        sum_exponentials = torch.sum(stack_exp)

        # print("injam5:", stack_exp, sum_exponentials)

        ## Fourth, compute softmaxes and the embedding/representative distances weighted by softmax
        list_softmax = []
        list_weighted_dist = []
        for j in range(self.clusters_k):
            softmax = stack_exp[j] / sum_exponentials
            weighted_dist = stack_dist[j] * softmax
            list_softmax.append(softmax)
            list_weighted_dist.append(weighted_dist)
        stack_weighted_dist = torch.stack(list_weighted_dist)

        kmeans_loss = torch.sum(stack_weighted_dist, dim=0)
        return C, new_task_embed, kmeans_loss
        new_task_embed = value + mean_task

        return C, new_task_embed


 class Trainer(torch.nn.Module):
@@ -119,9 +82,9 @@ class Trainer(torch.nn.Module):
    def aggregate(self, z_i):
        return torch.mean(z_i, dim=0)

    def forward(self, task_embed, y, training, adaptation_data=None, adaptation_labels=None):
    def forward(self, task_embed, y, training,adaptation_data=None,adaptation_labels=None):
        if training:
            C, clustered_task_embed, k_loss = self.cluster_module(task_embed, y)
            C, clustered_task_embed = self.cluster_module(task_embed, y)
            # hidden layers
            # todo : adding activation function or remove it
            hidden_1 = self.fc1(task_embed)
@@ -141,7 +104,7 @@ class Trainer(torch.nn.Module):
            y_pred = self.linear_out(hidden_3)

        else:
            C, clustered_task_embed, k_loss = self.cluster_module(adaptation_data, adaptation_labels)
            C, clustered_task_embed = self.cluster_module(adaptation_data, adaptation_labels)
            beta_1 = torch.tanh(self.film_layer_1_beta(clustered_task_embed))
            gamma_1 = torch.tanh(self.film_layer_1_gamma(clustered_task_embed))
            beta_2 = torch.tanh(self.film_layer_2_beta(clustered_task_embed))
@@ -159,4 +122,4 @@ class Trainer(torch.nn.Module):

            y_pred = self.linear_out(hidden_3)

        return y_pred, C, k_loss
        return y_pred
--- a/hyper_main.py
+++ b/hyper_main.py
@@ -9,15 +9,15 @@ import numpy as np


 def main(num_samples, max_num_epochs=20, gpus_per_trial=2):
    data_dir = os.path.abspath("/media/external_10TB/10TB/maheri/new_data_dir3")

    data_dir = os.path.abspath("/media/external_10TB/10TB/maheri/define_task_melu_data")
    load_data(data_dir)
    config = {
        # "l1": tune.sample_from(lambda _: 2 ** np.random.randint(2, 9)),
        # "l2": tune.sample_from(lambda _: 2 ** np.random.randint(2, 9)),
        # "lr": tune.loguniform(1e-4, 1e-1),
        # "batch_size": tune.choice([2, 4, 8, 16])
        "transformer": tune.choice(['kronoker']),
        "meta_algo": tune.choice(['gbml', 'metasgd']),
        "meta_algo": tune.choice(['gbml']),
        "first_order": tune.choice([False]),
        "adapt_transform": tune.choice([True, False]),
        # "local_lr":tune.choice([5e-6,5e-4,5e-3]),
@@ -25,7 +25,7 @@ def main(num_samples, max_num_epochs=20, gpus_per_trial=2):
        "local_lr": tune.loguniform(5e-6, 5e-3),
        "lr": tune.loguniform(5e-5, 5e-3),
        "batch_size": tune.choice([16, 32, 64]),
        "inner": tune.choice([1, 3, 5, 7]),
        "inner": tune.choice([7, 5, 4, 3, 1]),
        "test_state": tune.choice(["user_and_item_cold_state"]),

        "embedding_dim": tune.choice([16, 32, 64]),
@@ -37,22 +37,8 @@ def main(num_samples, max_num_epochs=20, gpus_per_trial=2):
        'cluster_final_dim': tune.choice([64, 32]),
        'cluster_dropout_rate': tune.choice([0, 0.01, 0.1]),
        'cluster_k': tune.choice([3, 5, 7, 9, 11]),
        'temperature': tune.choice([0.001, 0.1, 0.5, 1.0, 2.0, 10.0]),
        'temperature': tune.choice([0.1, 0.5, 1.0, 2.0, 10.0]),
        'trainer_dropout_rate': tune.choice([0, 0.01, 0.1]),

        'use_cuda': tune.choice([True]),
        # item
        'num_rate': tune.choice([6]),
        'num_genre': tune.choice([25]),
        'num_director': tune.choice([2186]),
        'num_actor': tune.choice([8030]),
        # user
        'num_gender': tune.choice([2]),
        'num_age': tune.choice([7]),
        'num_occupation': tune.choice([21]),
        'num_zipcode': tune.choice([3402]),

        'num_epoch': tune.choice([30]),
    }

    scheduler = ASHAScheduler(
@@ -66,15 +52,16 @@ def main(num_samples, max_num_epochs=20, gpus_per_trial=2):
        metric_columns=["loss", "ndcg1", "ndcg3", "training_iteration"])
    result = tune.run(
        partial(train_melu, data_dir=data_dir),
        resources_per_trial={"cpu": 8, "gpu": gpus_per_trial},
        resources_per_trial={"cpu": 4, "gpu": gpus_per_trial},
        config=config,
        num_samples=num_samples,
        scheduler=scheduler,
        progress_reporter=reporter,
        log_to_file=True,
        # resume=True,
        local_dir="./hyper_tunning_all_cold2",
        local_dir="./hyper_tunning_all_cold",
        name="melu_all_cold_clustered",

    )

    best_trial = result.get_best_trial("loss", "min", "last")
@@ -110,4 +97,4 @@ def main(num_samples, max_num_epochs=20, gpus_per_trial=2):

 if __name__ == "__main__":
    # You can change the number of GPUs per trial here:
    main(num_samples=150, max_num_epochs=30, gpus_per_trial=1)
    main(num_samples=150, max_num_epochs=25, gpus_per_trial=1)
--- a/hyper_tunning.py
+++ b/hyper_tunning.py
@@ -3,7 +3,7 @@ import torch
 import torch.nn as nn
 from ray import tune
 import pickle
 # from options import config
 from options import config
 from embedding_module import EmbeddingModule
 import learn2learn as l2l
 import random
@@ -47,7 +47,7 @@ def load_data(data_dir=None, test_state='warm_state'):

    random.shuffle(test_dataset)
    random.shuffle(trainset)
    val_size = int(test_set_size * 0.3)
    val_size = int(test_set_size * 0.2)
    validationset = test_dataset[:val_size]
    testset = test_dataset[val_size:]

@@ -55,12 +55,18 @@ def load_data(data_dir=None, test_state='warm_state'):


 def train_melu(conf, checkpoint_dir=None, data_dir=None):
    print("inajm1:", checkpoint_dir)
    embedding_dim = conf['embedding_dim']
    fc1_in_dim = conf['embedding_dim'] * 8
    fc2_in_dim = conf['first_fc_hidden_dim']
    fc2_out_dim = conf['second_fc_hidden_dim']

    emb = EmbeddingModule(conf).cuda()
    # fc1 = torch.nn.Linear(fc1_in_dim, fc2_in_dim)
    # fc2 = torch.nn.Linear(fc2_in_dim, fc2_out_dim)
    # linear_out = torch.nn.Linear(fc2_out_dim, 1)
    # head = torch.nn.Sequential(fc1, fc2, linear_out)

    emb = EmbeddingModule(config).cuda()

    transform = None
    if conf['transformer'] == "kronoker":
@@ -68,7 +74,7 @@ def train_melu(conf, checkpoint_dir=None, data_dir=None):
    elif conf['transformer'] == "linear":
        transform = l2l.optim.ModuleTransform(torch.nn.Linear)

    trainer = Trainer(conf)
    trainer = Trainer(config)

    # define meta algorithm
    if conf['meta_algo'] == "maml":
@@ -79,7 +85,9 @@ def train_melu(conf, checkpoint_dir=None, data_dir=None):
        trainer = l2l.algorithms.GBML(trainer, transform=transform, lr=conf['local_lr'],
                                   adapt_transform=conf['adapt_transform'], first_order=conf['first_order'])
    trainer.cuda()
    # net = nn.Sequential(emb, head)

    criterion = nn.MSELoss()
    all_parameters = list(emb.parameters()) + list(trainer.parameters())
    optimizer = torch.optim.Adam(all_parameters, lr=conf['lr'])

@@ -97,7 +105,7 @@ def train_melu(conf, checkpoint_dir=None, data_dir=None):

    a, b, c, d = zip(*train_dataset)

    for epoch in range(conf['num_epoch']):  # loop over the dataset multiple times
    for epoch in range(config['num_epoch']):  # loop over the dataset multiple times
        for i in range(num_batch):
            optimizer.zero_grad()
            meta_train_error = 0.0
--- a/learnToLearn.py
+++ b/learnToLearn.py
@@ -145,7 +145,7 @@ if __name__ == '__main__':
    if config['use_cuda']:
        os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
        os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu)
    master_path = "/media/external_10TB/10TB/maheri/define_task_melu_data2"
    master_path = "/media/external_10TB/10TB/maheri/define_task_melu_data"
    config['master_path'] = master_path

    # DATA GENERATION
--- a/learnToLearnTest.py
+++ b/learnToLearnTest.py
@@ -62,7 +62,7 @@ def test(embedding, head, total_dataset, batch_size, num_epoch, test_state=None,
        ndcgs3.append(float(ndcg_score([y_true], [y_pred], k=3, sample_weight=None, ignore_ties=False)))

        del supp_xs, supp_ys, query_xs, query_ys, y_true, y_pred, loss_q, temp_sxs, temp_qxs, predictions, l1
        # torch.cuda.empty_cache()
        torch.cuda.empty_cache()

    # calculate metrics
    losses_q = np.array(losses_q).mean()