Browse Source

Revert "prepare for hyper parameter tuning"

This reverts commit 576c0e97
define_task
mohamad maheri 2 years ago
parent
commit
3a3b256e65
5 changed files with 31 additions and 73 deletions
  1. 7
    44
      clustering.py
  2. 9
    22
      hyper_main.py
  3. 13
    5
      hyper_tunning.py
  4. 1
    1
      learnToLearn.py
  5. 1
    1
      learnToLearnTest.py

+ 7
- 44
clustering.py View File

@@ -50,46 +50,9 @@ class ClustringModule(torch.nn.Module):
# 1*k, k*d, 1*d
value = torch.mm(C, self.array)
# simple add operation
# new_task_embed = value + mean_task
# new_task_embed = value
new_task_embed = mean_task

# print("injam1:", new_task_embed)
# print("injam2:", self.array)
list_dist = []
# list_dist = torch.norm(new_task_embed - self.array, p=2, dim=1,keepdim=True)
list_dist = torch.sum(torch.pow(new_task_embed - self.array,2),dim=1)
stack_dist = list_dist

# print("injam3:", stack_dist)

## Second, find the minimum squared distance for softmax normalization
min_dist = min(list_dist)
# print("injam4:", min_dist)

## Third, compute exponentials shifted with min_dist to avoid underflow (0/0) issues in softmaxes
alpha = config['kmeans_alpha'] # Placeholder tensor for alpha
list_exp = []
for i in range(self.clusters_k):
exp = torch.exp(-alpha * (stack_dist[i] - min_dist))
list_exp.append(exp)
stack_exp = torch.stack(list_exp)
sum_exponentials = torch.sum(stack_exp)

# print("injam5:", stack_exp, sum_exponentials)

## Fourth, compute softmaxes and the embedding/representative distances weighted by softmax
list_softmax = []
list_weighted_dist = []
for j in range(self.clusters_k):
softmax = stack_exp[j] / sum_exponentials
weighted_dist = stack_dist[j] * softmax
list_softmax.append(softmax)
list_weighted_dist.append(weighted_dist)
stack_weighted_dist = torch.stack(list_weighted_dist)

kmeans_loss = torch.sum(stack_weighted_dist, dim=0)
return C, new_task_embed, kmeans_loss
new_task_embed = value + mean_task

return C, new_task_embed


class Trainer(torch.nn.Module):
@@ -119,9 +82,9 @@ class Trainer(torch.nn.Module):
def aggregate(self, z_i):
return torch.mean(z_i, dim=0)

def forward(self, task_embed, y, training, adaptation_data=None, adaptation_labels=None):
def forward(self, task_embed, y, training,adaptation_data=None,adaptation_labels=None):
if training:
C, clustered_task_embed, k_loss = self.cluster_module(task_embed, y)
C, clustered_task_embed = self.cluster_module(task_embed, y)
# hidden layers
# todo : adding activation function or remove it
hidden_1 = self.fc1(task_embed)
@@ -141,7 +104,7 @@ class Trainer(torch.nn.Module):
y_pred = self.linear_out(hidden_3)

else:
C, clustered_task_embed, k_loss = self.cluster_module(adaptation_data, adaptation_labels)
C, clustered_task_embed = self.cluster_module(adaptation_data, adaptation_labels)
beta_1 = torch.tanh(self.film_layer_1_beta(clustered_task_embed))
gamma_1 = torch.tanh(self.film_layer_1_gamma(clustered_task_embed))
beta_2 = torch.tanh(self.film_layer_2_beta(clustered_task_embed))
@@ -159,4 +122,4 @@ class Trainer(torch.nn.Module):

y_pred = self.linear_out(hidden_3)

return y_pred, C, k_loss
return y_pred

+ 9
- 22
hyper_main.py View File

@@ -9,15 +9,15 @@ import numpy as np


def main(num_samples, max_num_epochs=20, gpus_per_trial=2):
data_dir = os.path.abspath("/media/external_10TB/10TB/maheri/new_data_dir3")
data_dir = os.path.abspath("/media/external_10TB/10TB/maheri/define_task_melu_data")
load_data(data_dir)
config = {
# "l1": tune.sample_from(lambda _: 2 ** np.random.randint(2, 9)),
# "l2": tune.sample_from(lambda _: 2 ** np.random.randint(2, 9)),
# "lr": tune.loguniform(1e-4, 1e-1),
# "batch_size": tune.choice([2, 4, 8, 16])
"transformer": tune.choice(['kronoker']),
"meta_algo": tune.choice(['gbml', 'metasgd']),
"meta_algo": tune.choice(['gbml']),
"first_order": tune.choice([False]),
"adapt_transform": tune.choice([True, False]),
# "local_lr":tune.choice([5e-6,5e-4,5e-3]),
@@ -25,7 +25,7 @@ def main(num_samples, max_num_epochs=20, gpus_per_trial=2):
"local_lr": tune.loguniform(5e-6, 5e-3),
"lr": tune.loguniform(5e-5, 5e-3),
"batch_size": tune.choice([16, 32, 64]),
"inner": tune.choice([1, 3, 5, 7]),
"inner": tune.choice([7, 5, 4, 3, 1]),
"test_state": tune.choice(["user_and_item_cold_state"]),

"embedding_dim": tune.choice([16, 32, 64]),
@@ -37,22 +37,8 @@ def main(num_samples, max_num_epochs=20, gpus_per_trial=2):
'cluster_final_dim': tune.choice([64, 32]),
'cluster_dropout_rate': tune.choice([0, 0.01, 0.1]),
'cluster_k': tune.choice([3, 5, 7, 9, 11]),
'temperature': tune.choice([0.001, 0.1, 0.5, 1.0, 2.0, 10.0]),
'temperature': tune.choice([0.1, 0.5, 1.0, 2.0, 10.0]),
'trainer_dropout_rate': tune.choice([0, 0.01, 0.1]),

'use_cuda': tune.choice([True]),
# item
'num_rate': tune.choice([6]),
'num_genre': tune.choice([25]),
'num_director': tune.choice([2186]),
'num_actor': tune.choice([8030]),
# user
'num_gender': tune.choice([2]),
'num_age': tune.choice([7]),
'num_occupation': tune.choice([21]),
'num_zipcode': tune.choice([3402]),

'num_epoch': tune.choice([30]),
}

scheduler = ASHAScheduler(
@@ -66,15 +52,16 @@ def main(num_samples, max_num_epochs=20, gpus_per_trial=2):
metric_columns=["loss", "ndcg1", "ndcg3", "training_iteration"])
result = tune.run(
partial(train_melu, data_dir=data_dir),
resources_per_trial={"cpu": 8, "gpu": gpus_per_trial},
resources_per_trial={"cpu": 4, "gpu": gpus_per_trial},
config=config,
num_samples=num_samples,
scheduler=scheduler,
progress_reporter=reporter,
log_to_file=True,
# resume=True,
local_dir="./hyper_tunning_all_cold2",
local_dir="./hyper_tunning_all_cold",
name="melu_all_cold_clustered",

)

best_trial = result.get_best_trial("loss", "min", "last")
@@ -110,4 +97,4 @@ def main(num_samples, max_num_epochs=20, gpus_per_trial=2):

if __name__ == "__main__":
# You can change the number of GPUs per trial here:
main(num_samples=150, max_num_epochs=30, gpus_per_trial=1)
main(num_samples=150, max_num_epochs=25, gpus_per_trial=1)

+ 13
- 5
hyper_tunning.py View File

@@ -3,7 +3,7 @@ import torch
import torch.nn as nn
from ray import tune
import pickle
# from options import config
from options import config
from embedding_module import EmbeddingModule
import learn2learn as l2l
import random
@@ -47,7 +47,7 @@ def load_data(data_dir=None, test_state='warm_state'):

random.shuffle(test_dataset)
random.shuffle(trainset)
val_size = int(test_set_size * 0.3)
val_size = int(test_set_size * 0.2)
validationset = test_dataset[:val_size]
testset = test_dataset[val_size:]

@@ -55,12 +55,18 @@ def load_data(data_dir=None, test_state='warm_state'):


def train_melu(conf, checkpoint_dir=None, data_dir=None):
print("inajm1:", checkpoint_dir)
embedding_dim = conf['embedding_dim']
fc1_in_dim = conf['embedding_dim'] * 8
fc2_in_dim = conf['first_fc_hidden_dim']
fc2_out_dim = conf['second_fc_hidden_dim']

emb = EmbeddingModule(conf).cuda()
# fc1 = torch.nn.Linear(fc1_in_dim, fc2_in_dim)
# fc2 = torch.nn.Linear(fc2_in_dim, fc2_out_dim)
# linear_out = torch.nn.Linear(fc2_out_dim, 1)
# head = torch.nn.Sequential(fc1, fc2, linear_out)

emb = EmbeddingModule(config).cuda()

transform = None
if conf['transformer'] == "kronoker":
@@ -68,7 +74,7 @@ def train_melu(conf, checkpoint_dir=None, data_dir=None):
elif conf['transformer'] == "linear":
transform = l2l.optim.ModuleTransform(torch.nn.Linear)

trainer = Trainer(conf)
trainer = Trainer(config)

# define meta algorithm
if conf['meta_algo'] == "maml":
@@ -79,7 +85,9 @@ def train_melu(conf, checkpoint_dir=None, data_dir=None):
trainer = l2l.algorithms.GBML(trainer, transform=transform, lr=conf['local_lr'],
adapt_transform=conf['adapt_transform'], first_order=conf['first_order'])
trainer.cuda()
# net = nn.Sequential(emb, head)

criterion = nn.MSELoss()
all_parameters = list(emb.parameters()) + list(trainer.parameters())
optimizer = torch.optim.Adam(all_parameters, lr=conf['lr'])

@@ -97,7 +105,7 @@ def train_melu(conf, checkpoint_dir=None, data_dir=None):

a, b, c, d = zip(*train_dataset)

for epoch in range(conf['num_epoch']): # loop over the dataset multiple times
for epoch in range(config['num_epoch']): # loop over the dataset multiple times
for i in range(num_batch):
optimizer.zero_grad()
meta_train_error = 0.0

+ 1
- 1
learnToLearn.py View File

@@ -145,7 +145,7 @@ if __name__ == '__main__':
if config['use_cuda']:
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu)
master_path = "/media/external_10TB/10TB/maheri/define_task_melu_data2"
master_path = "/media/external_10TB/10TB/maheri/define_task_melu_data"
config['master_path'] = master_path

# DATA GENERATION

+ 1
- 1
learnToLearnTest.py View File

@@ -62,7 +62,7 @@ def test(embedding, head, total_dataset, batch_size, num_epoch, test_state=None,
ndcgs3.append(float(ndcg_score([y_true], [y_pred], k=3, sample_weight=None, ignore_ties=False)))

del supp_xs, supp_ys, query_xs, query_ys, y_true, y_pred, loss_q, temp_sxs, temp_qxs, predictions, l1
# torch.cuda.empty_cache()
torch.cuda.empty_cache()

# calculate metrics
losses_q = np.array(losses_q).mean()

Loading…
Cancel
Save