Browse Source

prepare for hyper parameter tuning

define_task
mohamad maheri 2 years ago
parent
commit
576c0e97f2
5 changed files with 45 additions and 48 deletions
  1. 16
    24
      clustering.py
  2. 22
    9
      hyper_main.py
  3. 5
    13
      hyper_tunning.py
  4. 1
    1
      learnToLearn.py
  5. 1
    1
      learnToLearnTest.py

+ 16
- 24
clustering.py View File





class ClustringModule(torch.nn.Module): class ClustringModule(torch.nn.Module):
def __init__(self, config):
def __init__(self, config_param):
super(ClustringModule, self).__init__() super(ClustringModule, self).__init__()
# self.h1_dim = 128
self.h1_dim = config['cluster_h1_dim']
# self.h2_dim = 64
self.h2_dim = config['cluster_h2_dim']
# self.final_dim = fc1_in_dim
# self.final_dim = 64
self.final_dim = config['cluster_final_dim']
# self.dropout_rate = 0
self.dropout_rate = config['cluster_dropout_rate']
self.h1_dim = config_param['cluster_h1_dim']
self.h2_dim = config_param['cluster_h2_dim']
self.final_dim = config_param['cluster_final_dim']
self.dropout_rate = config_param['cluster_dropout_rate']


layers = [nn.Linear(config['embedding_dim'] * 8 + 1, self.h1_dim),
layers = [nn.Linear(config_param['embedding_dim'] * 8 + 1, self.h1_dim),
torch.nn.Dropout(self.dropout_rate), torch.nn.Dropout(self.dropout_rate),
nn.ReLU(inplace=True), nn.ReLU(inplace=True),
# nn.BatchNorm1d(self.h1_dim), # nn.BatchNorm1d(self.h1_dim),
nn.Linear(self.h2_dim, self.final_dim)] nn.Linear(self.h2_dim, self.final_dim)]
self.input_to_hidden = nn.Sequential(*layers) self.input_to_hidden = nn.Sequential(*layers)


# self.clusters_k = 7
self.clusters_k = config['cluster_k']
self.clusters_k = config_param['cluster_k']
self.embed_size = self.final_dim self.embed_size = self.final_dim
self.array = nn.Parameter(init.xavier_uniform_(torch.FloatTensor(self.clusters_k, self.embed_size))) self.array = nn.Parameter(init.xavier_uniform_(torch.FloatTensor(self.clusters_k, self.embed_size)))
# self.temperature = 1.0
self.temperature = config['temperature']
self.temperature = config_param['temperature']


def aggregate(self, z_i): def aggregate(self, z_i):
return torch.mean(z_i, dim=0) return torch.mean(z_i, dim=0)
# todo : may be useless # todo : may be useless
mean_task = self.aggregate(task_embed) mean_task = self.aggregate(task_embed)


# C_distribution, new_task_embed = self.memoryunit(mean_task)
res = torch.norm(mean_task - self.array, p=2, dim=1, keepdim=True) res = torch.norm(mean_task - self.array, p=2, dim=1, keepdim=True)
res = torch.pow((res / self.temperature) + 1, (self.temperature + 1) / -2) res = torch.pow((res / self.temperature) + 1, (self.temperature + 1) / -2)
# 1*k # 1*k
value = torch.mm(C, self.array) value = torch.mm(C, self.array)
# simple add operation # simple add operation
new_task_embed = value + mean_task new_task_embed = value + mean_task
# calculate target distribution
return C, new_task_embed return C, new_task_embed




class Trainer(torch.nn.Module): class Trainer(torch.nn.Module):
def __init__(self, config, head=None):
def __init__(self, config_param, head=None):
super(Trainer, self).__init__() super(Trainer, self).__init__()
fc1_in_dim = config['embedding_dim'] * 8
fc2_in_dim = config['first_fc_hidden_dim']
fc2_out_dim = config['second_fc_hidden_dim']
fc1_in_dim = config_param['embedding_dim'] * 8
fc2_in_dim = config_param['first_fc_hidden_dim']
fc2_out_dim = config_param['second_fc_hidden_dim']
self.fc1 = torch.nn.Linear(fc1_in_dim, fc2_in_dim) self.fc1 = torch.nn.Linear(fc1_in_dim, fc2_in_dim)
self.fc2 = torch.nn.Linear(fc2_in_dim, fc2_out_dim) self.fc2 = torch.nn.Linear(fc2_in_dim, fc2_out_dim)
self.linear_out = torch.nn.Linear(fc2_out_dim, 1) self.linear_out = torch.nn.Linear(fc2_out_dim, 1)
# cluster module # cluster module
self.cluster_module = ClustringModule(config)
self.cluster_module = ClustringModule(config_param)
# self.task_dim = fc1_in_dim # self.task_dim = fc1_in_dim
self.task_dim = config['cluster_final_dim']
self.task_dim = config_param['cluster_final_dim']
# transform task to weights # transform task to weights
self.film_layer_1_beta = nn.Linear(self.task_dim, fc2_in_dim, bias=False) self.film_layer_1_beta = nn.Linear(self.task_dim, fc2_in_dim, bias=False)
self.film_layer_1_gamma = nn.Linear(self.task_dim, fc2_in_dim, bias=False) self.film_layer_1_gamma = nn.Linear(self.task_dim, fc2_in_dim, bias=False)
# self.film_layer_3_beta = nn.Linear(self.task_dim, self.h3_dim, bias=False) # self.film_layer_3_beta = nn.Linear(self.task_dim, self.h3_dim, bias=False)
# self.film_layer_3_gamma = nn.Linear(self.task_dim, self.h3_dim, bias=False) # self.film_layer_3_gamma = nn.Linear(self.task_dim, self.h3_dim, bias=False)
# self.dropout_rate = 0 # self.dropout_rate = 0
self.dropout_rate = config['trainer_dropout_rate']
self.dropout_rate = config_param['trainer_dropout_rate']
self.dropout = nn.Dropout(self.dropout_rate) self.dropout = nn.Dropout(self.dropout_rate)


def aggregate(self, z_i): def aggregate(self, z_i):

+ 22
- 9
hyper_main.py View File





def main(num_samples, max_num_epochs=20, gpus_per_trial=2): def main(num_samples, max_num_epochs=20, gpus_per_trial=2):
data_dir = os.path.abspath("/media/external_10TB/10TB/maheri/define_task_melu_data")
load_data(data_dir)
data_dir = os.path.abspath("/media/external_10TB/10TB/maheri/new_data_dir3")
config = { config = {
# "l1": tune.sample_from(lambda _: 2 ** np.random.randint(2, 9)), # "l1": tune.sample_from(lambda _: 2 ** np.random.randint(2, 9)),
# "l2": tune.sample_from(lambda _: 2 ** np.random.randint(2, 9)), # "l2": tune.sample_from(lambda _: 2 ** np.random.randint(2, 9)),
# "lr": tune.loguniform(1e-4, 1e-1), # "lr": tune.loguniform(1e-4, 1e-1),
# "batch_size": tune.choice([2, 4, 8, 16]) # "batch_size": tune.choice([2, 4, 8, 16])
"transformer": tune.choice(['kronoker']), "transformer": tune.choice(['kronoker']),
"meta_algo": tune.choice(['gbml']),
"meta_algo": tune.choice(['gbml', 'metasgd']),
"first_order": tune.choice([False]), "first_order": tune.choice([False]),
"adapt_transform": tune.choice([True, False]), "adapt_transform": tune.choice([True, False]),
# "local_lr":tune.choice([5e-6,5e-4,5e-3]), # "local_lr":tune.choice([5e-6,5e-4,5e-3]),
"local_lr": tune.loguniform(5e-6, 5e-3), "local_lr": tune.loguniform(5e-6, 5e-3),
"lr": tune.loguniform(5e-5, 5e-3), "lr": tune.loguniform(5e-5, 5e-3),
"batch_size": tune.choice([16, 32, 64]), "batch_size": tune.choice([16, 32, 64]),
"inner": tune.choice([7, 5, 4, 3, 1]),
"inner": tune.choice([1, 3, 5, 7]),
"test_state": tune.choice(["user_and_item_cold_state"]), "test_state": tune.choice(["user_and_item_cold_state"]),


"embedding_dim": tune.choice([16, 32, 64]), "embedding_dim": tune.choice([16, 32, 64]),
'cluster_final_dim': tune.choice([64, 32]), 'cluster_final_dim': tune.choice([64, 32]),
'cluster_dropout_rate': tune.choice([0, 0.01, 0.1]), 'cluster_dropout_rate': tune.choice([0, 0.01, 0.1]),
'cluster_k': tune.choice([3, 5, 7, 9, 11]), 'cluster_k': tune.choice([3, 5, 7, 9, 11]),
'temperature': tune.choice([0.1, 0.5, 1.0, 2.0, 10.0]),
'temperature': tune.choice([0.001, 0.1, 0.5, 1.0, 2.0, 10.0]),
'trainer_dropout_rate': tune.choice([0, 0.01, 0.1]), 'trainer_dropout_rate': tune.choice([0, 0.01, 0.1]),

'use_cuda': tune.choice([True]),
# item
'num_rate': tune.choice([6]),
'num_genre': tune.choice([25]),
'num_director': tune.choice([2186]),
'num_actor': tune.choice([8030]),
# user
'num_gender': tune.choice([2]),
'num_age': tune.choice([7]),
'num_occupation': tune.choice([21]),
'num_zipcode': tune.choice([3402]),

'num_epoch': tune.choice([30]),
} }


scheduler = ASHAScheduler( scheduler = ASHAScheduler(
metric_columns=["loss", "ndcg1", "ndcg3", "training_iteration"]) metric_columns=["loss", "ndcg1", "ndcg3", "training_iteration"])
result = tune.run( result = tune.run(
partial(train_melu, data_dir=data_dir), partial(train_melu, data_dir=data_dir),
resources_per_trial={"cpu": 4, "gpu": gpus_per_trial},
resources_per_trial={"cpu": 8, "gpu": gpus_per_trial},
config=config, config=config,
num_samples=num_samples, num_samples=num_samples,
scheduler=scheduler, scheduler=scheduler,
progress_reporter=reporter, progress_reporter=reporter,
log_to_file=True, log_to_file=True,
# resume=True, # resume=True,
local_dir="./hyper_tunning_all_cold",
local_dir="./hyper_tunning_all_cold2",
name="melu_all_cold_clustered", name="melu_all_cold_clustered",

) )


best_trial = result.get_best_trial("loss", "min", "last") best_trial = result.get_best_trial("loss", "min", "last")


if __name__ == "__main__": if __name__ == "__main__":
# You can change the number of GPUs per trial here: # You can change the number of GPUs per trial here:
main(num_samples=150, max_num_epochs=25, gpus_per_trial=1)
main(num_samples=150, max_num_epochs=30, gpus_per_trial=1)

+ 5
- 13
hyper_tunning.py View File

import torch.nn as nn import torch.nn as nn
from ray import tune from ray import tune
import pickle import pickle
from options import config
# from options import config
from embedding_module import EmbeddingModule from embedding_module import EmbeddingModule
import learn2learn as l2l import learn2learn as l2l
import random import random


random.shuffle(test_dataset) random.shuffle(test_dataset)
random.shuffle(trainset) random.shuffle(trainset)
val_size = int(test_set_size * 0.2)
val_size = int(test_set_size * 0.3)
validationset = test_dataset[:val_size] validationset = test_dataset[:val_size]
testset = test_dataset[val_size:] testset = test_dataset[val_size:]






def train_melu(conf, checkpoint_dir=None, data_dir=None): def train_melu(conf, checkpoint_dir=None, data_dir=None):
print("inajm1:", checkpoint_dir)
embedding_dim = conf['embedding_dim'] embedding_dim = conf['embedding_dim']
fc1_in_dim = conf['embedding_dim'] * 8 fc1_in_dim = conf['embedding_dim'] * 8
fc2_in_dim = conf['first_fc_hidden_dim'] fc2_in_dim = conf['first_fc_hidden_dim']
fc2_out_dim = conf['second_fc_hidden_dim'] fc2_out_dim = conf['second_fc_hidden_dim']


# fc1 = torch.nn.Linear(fc1_in_dim, fc2_in_dim)
# fc2 = torch.nn.Linear(fc2_in_dim, fc2_out_dim)
# linear_out = torch.nn.Linear(fc2_out_dim, 1)
# head = torch.nn.Sequential(fc1, fc2, linear_out)

emb = EmbeddingModule(config).cuda()
emb = EmbeddingModule(conf).cuda()


transform = None transform = None
if conf['transformer'] == "kronoker": if conf['transformer'] == "kronoker":
elif conf['transformer'] == "linear": elif conf['transformer'] == "linear":
transform = l2l.optim.ModuleTransform(torch.nn.Linear) transform = l2l.optim.ModuleTransform(torch.nn.Linear)


trainer = Trainer(config)
trainer = Trainer(conf)


# define meta algorithm # define meta algorithm
if conf['meta_algo'] == "maml": if conf['meta_algo'] == "maml":
trainer = l2l.algorithms.GBML(trainer, transform=transform, lr=conf['local_lr'], trainer = l2l.algorithms.GBML(trainer, transform=transform, lr=conf['local_lr'],
adapt_transform=conf['adapt_transform'], first_order=conf['first_order']) adapt_transform=conf['adapt_transform'], first_order=conf['first_order'])
trainer.cuda() trainer.cuda()
# net = nn.Sequential(emb, head)


criterion = nn.MSELoss()
all_parameters = list(emb.parameters()) + list(trainer.parameters()) all_parameters = list(emb.parameters()) + list(trainer.parameters())
optimizer = torch.optim.Adam(all_parameters, lr=conf['lr']) optimizer = torch.optim.Adam(all_parameters, lr=conf['lr'])




a, b, c, d = zip(*train_dataset) a, b, c, d = zip(*train_dataset)


for epoch in range(config['num_epoch']): # loop over the dataset multiple times
for epoch in range(conf['num_epoch']): # loop over the dataset multiple times
for i in range(num_batch): for i in range(num_batch):
optimizer.zero_grad() optimizer.zero_grad()
meta_train_error = 0.0 meta_train_error = 0.0

+ 1
- 1
learnToLearn.py View File

if config['use_cuda']: if config['use_cuda']:
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu) os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu)
master_path = "/media/external_10TB/10TB/maheri/define_task_melu_data"
master_path = "/media/external_10TB/10TB/maheri/define_task_melu_data2"
config['master_path'] = master_path config['master_path'] = master_path


# DATA GENERATION # DATA GENERATION

+ 1
- 1
learnToLearnTest.py View File

ndcgs3.append(float(ndcg_score([y_true], [y_pred], k=3, sample_weight=None, ignore_ties=False))) ndcgs3.append(float(ndcg_score([y_true], [y_pred], k=3, sample_weight=None, ignore_ties=False)))


del supp_xs, supp_ys, query_xs, query_ys, y_true, y_pred, loss_q, temp_sxs, temp_qxs, predictions, l1 del supp_xs, supp_ys, query_xs, query_ys, y_true, y_pred, loss_q, temp_sxs, temp_qxs, predictions, l1
torch.cuda.empty_cache()
# torch.cuda.empty_cache()


# calculate metrics # calculate metrics
losses_q = np.array(losses_q).mean() losses_q = np.array(losses_q).mean()

Loading…
Cancel
Save