m.maheri
/
Melu_L2L_hyperTunning

from hyper_tunning import load_data
import os
from ray.tune.schedulers import ASHAScheduler
from ray.tune import CLIReporter
from ray import tune
from functools import partial
from hyper_tunning import train_melu
import numpy as np
import torch


def main(num_samples, max_num_epochs=20, gpus_per_trial=2):
    data_dir = os.path.abspath("/media/external_10TB/10TB/maheri/new_data_dir3")
    # load_data(data_dir)
    config = {
        # meta learning
        "meta_algo": tune.choice(['metasgd']),
        "transformer": tune.choice(['metasgd']),
        "first_order": tune.choice([True]),
        "adapt_transform": tune.choice([False]),
        "local_lr": tune.loguniform(5e-6, 5e-3),
        "lr": tune.loguniform(5e-5, 5e-3),
        "batch_size": tune.choice([16, 32, 64]),
        "inner": tune.choice([1, 3, 4, 5, 7]),
        "test_state": tune.choice(["user_and_item_cold_state"]),

        # head
        "embedding_dim": tune.choice([16, 32, 64]),
        "first_fc_hidden_dim": tune.choice([32, 64, 128]),
        "second_fc_hidden_dim": tune.choice([32, 64]),

        # clustering module
        'cluster_dropout_rate': tune.choice([0, 0.01, 0.1]),
        'cluster_k': tune.choice([3, 5, 7, 9, 11]),
        'kmeans_alpha': tune.choice([100, 0.1, 10, 20, 50, 200]),
        'rnn_dropout': tune.choice([0, 0.01, 0.1]),
        'rnn_hidden': tune.choice([32, 64, 128]),
        'rnn_l1': tune.choice([32, 64, 128]),
        'kmeans_loss_weight': tune.choice([0, 1, 10, 50, 100, 200]),

        'temperature': tune.choice([0.1, 0.5, 1.0, 2.0, 5.0, 10.0]),
        # 'trainer_dropout_rate': tune.choice([0, 0.01, 0.1]),

        'distribution_power': tune.choice([0.1, 0.8, 1, 3, 5, 7, 8, 9]),
        'data_selection_pow': tune.choice([0.6, 0.65, 0.7, 0.75, 0.8, 0.9, 1, 1.1, 1.2, 1.4]),

        'task_dim': tune.choice([16, 32, 64, 128, 256]),
        'trainer_dropout': tune.choice([0, 0.001, 0.01, 0.05, 0.1]),
        'label_noise_std': tune.choice([0, 0.01, 0.1, 0.2, 0.3, 1, 2]),
        'head_dropout': tune.choice([0, 0.001, 0.01, 0.05, 0.1]),
        'num_epoch': tune.choice([40]),
        'use_cuda': tune.choice([True]),

        'num_rate': tune.choice([6]),
        'num_genre': tune.choice([25]),
        'num_director': tune.choice([2186]),
        'num_actor': tune.choice([8030]),
        'num_gender': tune.choice([2]),
        'num_age': tune.choice([7]),
        'num_occupation': tune.choice([21]),
        'num_zipcode': tune.choice([3402]),
    }

    scheduler = ASHAScheduler(
        metric="loss",
        mode="min",
        max_t=max_num_epochs,
        grace_period=10,
        reduction_factor=2)
    reporter = CLIReporter(
        # parameter_columns=["l1", "l2", "lr", "batch_size"],
        metric_columns=["loss", "ndcg1", "ndcg3", "training_iteration"])
    result = tune.run(
        partial(train_melu, data_dir=data_dir),
        resources_per_trial={"cpu": 4, "gpu": 0.5},
        config=config,
        num_samples=num_samples,
        scheduler=scheduler,
        progress_reporter=reporter,
        log_to_file=True,
        # resume=True,
        local_dir="./hyper_tunning_all_cold3",
        name="rnn_cluster_module",
    )

    best_trial = result.get_best_trial("loss", "min", "last")
    print("Best trial config: {}".format(best_trial.config))
    print("Best trial final validation loss: {}".format(
        best_trial.last_result["loss"]))
    print("Best trial final validation ndcg1: {}".format(
        best_trial.last_result["ndcg1"]))
    print("Best trial final validation ndcg3: {}".format(
        best_trial.last_result["ndcg3"]))

    #
    print("=======================================================")
    print(result.results_df)
    print("=======================================================\n")


if __name__ == "__main__":
    # You can change the number of GPUs per trial here:
    main(num_samples=150, max_num_epochs=50, gpus_per_trial=1)