2 years ago · 8a7e663616
--- a/.gitignore
+++ b/.gitignore
@@ -116,4 +116,5 @@ dmypy.json

 .idea/
 env/
 venv/
 venv/
 results/
--- a/data/weibo/config.py
+++ b/data/weibo/config.py
@@ -21,9 +21,9 @@ class WeiboConfig(Config):
    validation_text_path = data_path + 'weibo_train.csv'
    test_text_path = data_path + 'weibo_test.csv'

    batch_size = 64
    batch_size = 100
    epochs = 100
    num_workers = 4
    num_workers = 1
    head_lr = 1e-03
    image_encoder_lr = 1e-02
    text_encoder_lr = 1e-05
@@ -34,9 +34,9 @@ class WeiboConfig(Config):

    image_model_name = '../../../../../media/external_10TB/10TB/ghorbanpoor/vit-base-patch16-224'
    image_embedding = 768
    text_encoder_model = "../../../../../media/external_10TB/10TB/ghorbanpoor/chinese-xlnet-base"
    text_encoder_model = "../../../../../media/external_10TB/10TB/ghorbanpoor/xlnet-base-chinese"
    # text_encoder_model = "/home/faeze/PycharmProjects/new_fake_news_detectioin/bert/bert-base-uncased"
    text_tokenizer = "../../../../../media/external_10TB/10TB/ghorbanpoor/chinese-xlnet-base"
    text_tokenizer = "../../../../../media/external_10TB/10TB/ghorbanpoor/xlnet-base-chinese"
    # text_tokenizer = "/home/faeze/PycharmProjects/new_fake_news_detectioin/bert/bert-base-uncased"
    text_embedding = 768
    max_length = 200
@@ -47,7 +47,7 @@ class WeiboConfig(Config):

    labels = ['real', 'fake']

    wanted_accuracy = 0.80
    wanted_accuracy = 0.85

    def optuna(self, trial):
        self.head_lr = trial.suggest_loguniform('head_lr', 1e-5, 1e-1)
--- a/evaluation.py
+++ b/evaluation.py
@@ -211,6 +211,7 @@ def saving_in_tensorboard(config, x, y, fname='embedding'):

    # config.writer.add_embedding(mat=x, label_img=y, metadata=z, tag=fname)


 def plot_tsne(config, x, y, fname='tsne.png'):
    x = [i.cpu().numpy() for i in x]
    y = [i.cpu().numpy() for i in y]
@@ -231,7 +232,17 @@ def plot_tsne(config, x, y, fname='tsne.png'):
    ax.legend(fontsize='large', markerscale=2)
    plt.title('tsne of ' + str(fname.split('/')[-1].split('.')[0]))
    plt.savefig(fname=fname)
    # plt.show()
    plt.show()


 def save_embedding(config, x, fname='embedding.tsv'):
    x = [i.cpu().numpy() for i in x]

    x = np.concatenate(x, axis=0)

    embedding_df = pd.DataFrame(x)

    embedding_df.to_csv(fname, sep='\t', index=False, header=False)


 def plot_pca(config, x, y, fname='pca.png'):
--- a/learner.py
+++ b/learner.py
@@ -14,9 +14,11 @@ from utils import AvgMeter, print_lr, EarlyStopping, CheckpointSaving

 def batch_constructor(config, batch):
    b = {}
    for k, v in batch.items():
        if k != 'text':
            b[k] = v.to(config.device)
    for key, value in batch.items():
        if key != 'text':
            b[key] = value.to(config.device)
        else:
            b[key] = value
    return b


--- a/test_main.py
+++ b/test_main.py
@@ -1,13 +1,15 @@
 import random

 import numpy as np
 import pandas as pd
 import torch
 from tqdm import tqdm

 from data_loaders import make_dfs, build_loaders
 from evaluation import metrics, report_per_class, roc_auc_plot, precision_recall_plot, plot_tsne, plot_pca
 from evaluation import metrics, report_per_class, roc_auc_plot, precision_recall_plot, plot_tsne, plot_pca, \
    save_embedding
 from learner import batch_constructor
 from model import FakeNewsModel
 from model import FakeNewsModel, calculate_loss


 def test(config, test_loader, trial_number=None):
@@ -45,65 +47,47 @@ def test(config, test_loader, trial_number=None):
    targets = []
    predictions = []
    scores = []
    ids = []
    losses = []
    tqdm_object = tqdm(test_loader, total=len(test_loader))
    for i, batch in enumerate(tqdm_object):
        batch = batch_constructor(config, batch)
        with torch.no_grad():
            output, score = model(batch)

            prediction = output.detach()
            predictions.append(prediction)

            score = score.detach()
            scores.append(score)

            target = batch['label'].detach()
            targets.append(target)

            image_feature = model.image_embeddings.detach()
            image_features.append(image_feature)

            text_feature = model.text_embeddings.detach()
            text_features.append(text_feature)

            multimodal_feature = model.multimodal_embeddings.detach()
            multimodal_features.append(multimodal_feature)

            concat_feature = model.classifier.embeddings.detach()
            concat_features.append(concat_feature)

    # config.writer.add_graph(model, input_to_model=batch, verbose=True)
            loss, c_loss, s_loss = calculate_loss(model, score, batch['label'])

            predictions.append(output.detach())
            scores.append(score.detach())
            targets.append(batch['label'].detach())
            ids.append(batch['id'].detach())
            image_features.append(model.image_embeddings.detach())
            text_features.append(model.text_embeddings.detach())
            multimodal_features.append(model.multimodal_embeddings.detach())
            concat_features.append(model.classifier.embeddings.detach())
            losses.append((loss.detach(), c_loss.detach(), s_loss.detach()))

    s = ''
    s += report_per_class(targets, predictions) + '\n'
    s += metrics(targets, predictions, scores, file_path=str(config.output_path) + '/fpr_tpr.csv') + '\n'
    with open(config.output_path + '/results.txt', 'w') as f:
    s += metrics(targets, predictions, scores, file_path=str(config.output_path) + '/new_fpr_tpr.csv') + '\n'
    with open(config.output_path + '/new_results.txt', 'w') as f:
        f.write(s)

    roc_auc_plot(targets, scores, fname=str(config.output_path) + "/roc.png")
    precision_recall_plot(targets, scores, fname=str(config.output_path) + "/pr.png")

    # saving_in_tensorboard(config, image_features, targets, 'image_features')
    plot_tsne(config, image_features, targets, fname=str(config.output_path) + '/image_features_tsne.png')
    plot_pca(config, image_features, targets, fname=str(config.output_path) + '/image_features_pca.png')

    # saving_in_tensorboard(config, text_features, targets, 'text_features')
    plot_tsne(config, text_features, targets, fname=str(config.output_path) + '/text_features_tsne.png')
    plot_pca(config, text_features, targets, fname=str(config.output_path) + '/text_features_pca.png')
    #
    # saving_in_tensorboard(config, multimodal_features, targets, 'multimodal_features')
    plot_tsne(config, multimodal_features, targets, fname=str(config.output_path) + '/multimodal_features_tsne.png')
    plot_pca(config, multimodal_features, targets, fname=str(config.output_path) + '/multimodal_features_pca.png')

    # saving_in_tensorboard(config, concat_features, targets, 'concat_features')
    plot_tsne(config, concat_features, targets, fname=str(config.output_path) + '/concat_features_tsne.png')
    plot_pca(config, concat_features, targets, fname=str(config.output_path) + '/concat_features_pca.png')
    save_embedding(config, image_features, fname=str(config.output_path) + '/new_image_features.tsv')
    save_embedding(config, text_features, fname=str(config.output_path) + '/new_text_features.tsv')
    save_embedding(config, multimodal_features, fname=str(config.output_path) + '/new_multimodal_features_.tsv')
    save_embedding(config, concat_features, fname=str(config.output_path) + '/new_concat_features.tsv')

    config_parameters = str(config)
    with open(config.output_path + '/parameters.txt', 'w') as f:
    with open(config.output_path + '/new_parameters.txt', 'w') as f:
        f.write(config_parameters)
    print(config)

    pd.DataFrame({'id': ids, 'predicted_label': predictions, 'real_label': targets, 'losses': losses}).to_csv(
        str(config.output_path) + '/new_text_label.csv')


 def test_main(config, trial_number=None):
    train_df, test_df, validation_df = make_dfs(config, )