Browse Source

some changes to store losses and embeddings

master
Faeze 3 months ago
parent
commit
8a7e663616
5 changed files with 51 additions and 53 deletions
  1. 2
    1
      .gitignore
  2. 5
    5
      data/weibo/config.py
  3. 12
    1
      evaluation.py
  4. 5
    3
      learner.py
  5. 27
    43
      test_main.py

+ 2
- 1
.gitignore View File

@@ -116,4 +116,5 @@ dmypy.json

.idea/
env/
venv/
venv/
results/

+ 5
- 5
data/weibo/config.py View File

@@ -21,9 +21,9 @@ class WeiboConfig(Config):
validation_text_path = data_path + 'weibo_train.csv'
test_text_path = data_path + 'weibo_test.csv'

batch_size = 64
batch_size = 100
epochs = 100
num_workers = 4
num_workers = 1
head_lr = 1e-03
image_encoder_lr = 1e-02
text_encoder_lr = 1e-05
@@ -34,9 +34,9 @@ class WeiboConfig(Config):

image_model_name = '../../../../../media/external_10TB/10TB/ghorbanpoor/vit-base-patch16-224'
image_embedding = 768
text_encoder_model = "../../../../../media/external_10TB/10TB/ghorbanpoor/chinese-xlnet-base"
text_encoder_model = "../../../../../media/external_10TB/10TB/ghorbanpoor/xlnet-base-chinese"
# text_encoder_model = "/home/faeze/PycharmProjects/new_fake_news_detectioin/bert/bert-base-uncased"
text_tokenizer = "../../../../../media/external_10TB/10TB/ghorbanpoor/chinese-xlnet-base"
text_tokenizer = "../../../../../media/external_10TB/10TB/ghorbanpoor/xlnet-base-chinese"
# text_tokenizer = "/home/faeze/PycharmProjects/new_fake_news_detectioin/bert/bert-base-uncased"
text_embedding = 768
max_length = 200
@@ -47,7 +47,7 @@ class WeiboConfig(Config):

labels = ['real', 'fake']

wanted_accuracy = 0.80
wanted_accuracy = 0.85

def optuna(self, trial):
self.head_lr = trial.suggest_loguniform('head_lr', 1e-5, 1e-1)

+ 12
- 1
evaluation.py View File

@@ -211,6 +211,7 @@ def saving_in_tensorboard(config, x, y, fname='embedding'):

# config.writer.add_embedding(mat=x, label_img=y, metadata=z, tag=fname)


def plot_tsne(config, x, y, fname='tsne.png'):
x = [i.cpu().numpy() for i in x]
y = [i.cpu().numpy() for i in y]
@@ -231,7 +232,17 @@ def plot_tsne(config, x, y, fname='tsne.png'):
ax.legend(fontsize='large', markerscale=2)
plt.title('tsne of ' + str(fname.split('/')[-1].split('.')[0]))
plt.savefig(fname=fname)
# plt.show()
plt.show()


def save_embedding(config, x, fname='embedding.tsv'):
x = [i.cpu().numpy() for i in x]

x = np.concatenate(x, axis=0)

embedding_df = pd.DataFrame(x)

embedding_df.to_csv(fname, sep='\t', index=False, header=False)


def plot_pca(config, x, y, fname='pca.png'):

+ 5
- 3
learner.py View File

@@ -14,9 +14,11 @@ from utils import AvgMeter, print_lr, EarlyStopping, CheckpointSaving

def batch_constructor(config, batch):
b = {}
for k, v in batch.items():
if k != 'text':
b[k] = v.to(config.device)
for key, value in batch.items():
if key != 'text':
b[key] = value.to(config.device)
else:
b[key] = value
return b



+ 27
- 43
test_main.py View File

@@ -1,13 +1,15 @@
import random

import numpy as np
import pandas as pd
import torch
from tqdm import tqdm

from data_loaders import make_dfs, build_loaders
from evaluation import metrics, report_per_class, roc_auc_plot, precision_recall_plot, plot_tsne, plot_pca
from evaluation import metrics, report_per_class, roc_auc_plot, precision_recall_plot, plot_tsne, plot_pca, \
save_embedding
from learner import batch_constructor
from model import FakeNewsModel
from model import FakeNewsModel, calculate_loss


def test(config, test_loader, trial_number=None):
@@ -45,65 +47,47 @@ def test(config, test_loader, trial_number=None):
targets = []
predictions = []
scores = []
ids = []
losses = []
tqdm_object = tqdm(test_loader, total=len(test_loader))
for i, batch in enumerate(tqdm_object):
batch = batch_constructor(config, batch)
with torch.no_grad():
output, score = model(batch)

prediction = output.detach()
predictions.append(prediction)

score = score.detach()
scores.append(score)

target = batch['label'].detach()
targets.append(target)

image_feature = model.image_embeddings.detach()
image_features.append(image_feature)

text_feature = model.text_embeddings.detach()
text_features.append(text_feature)

multimodal_feature = model.multimodal_embeddings.detach()
multimodal_features.append(multimodal_feature)

concat_feature = model.classifier.embeddings.detach()
concat_features.append(concat_feature)

# config.writer.add_graph(model, input_to_model=batch, verbose=True)
loss, c_loss, s_loss = calculate_loss(model, score, batch['label'])

predictions.append(output.detach())
scores.append(score.detach())
targets.append(batch['label'].detach())
ids.append(batch['id'].detach())
image_features.append(model.image_embeddings.detach())
text_features.append(model.text_embeddings.detach())
multimodal_features.append(model.multimodal_embeddings.detach())
concat_features.append(model.classifier.embeddings.detach())
losses.append((loss.detach(), c_loss.detach(), s_loss.detach()))

s = ''
s += report_per_class(targets, predictions) + '\n'
s += metrics(targets, predictions, scores, file_path=str(config.output_path) + '/fpr_tpr.csv') + '\n'
with open(config.output_path + '/results.txt', 'w') as f:
s += metrics(targets, predictions, scores, file_path=str(config.output_path) + '/new_fpr_tpr.csv') + '\n'
with open(config.output_path + '/new_results.txt', 'w') as f:
f.write(s)

roc_auc_plot(targets, scores, fname=str(config.output_path) + "/roc.png")
precision_recall_plot(targets, scores, fname=str(config.output_path) + "/pr.png")

# saving_in_tensorboard(config, image_features, targets, 'image_features')
plot_tsne(config, image_features, targets, fname=str(config.output_path) + '/image_features_tsne.png')
plot_pca(config, image_features, targets, fname=str(config.output_path) + '/image_features_pca.png')

# saving_in_tensorboard(config, text_features, targets, 'text_features')
plot_tsne(config, text_features, targets, fname=str(config.output_path) + '/text_features_tsne.png')
plot_pca(config, text_features, targets, fname=str(config.output_path) + '/text_features_pca.png')
#
# saving_in_tensorboard(config, multimodal_features, targets, 'multimodal_features')
plot_tsne(config, multimodal_features, targets, fname=str(config.output_path) + '/multimodal_features_tsne.png')
plot_pca(config, multimodal_features, targets, fname=str(config.output_path) + '/multimodal_features_pca.png')

# saving_in_tensorboard(config, concat_features, targets, 'concat_features')
plot_tsne(config, concat_features, targets, fname=str(config.output_path) + '/concat_features_tsne.png')
plot_pca(config, concat_features, targets, fname=str(config.output_path) + '/concat_features_pca.png')
save_embedding(config, image_features, fname=str(config.output_path) + '/new_image_features.tsv')
save_embedding(config, text_features, fname=str(config.output_path) + '/new_text_features.tsv')
save_embedding(config, multimodal_features, fname=str(config.output_path) + '/new_multimodal_features_.tsv')
save_embedding(config, concat_features, fname=str(config.output_path) + '/new_concat_features.tsv')

config_parameters = str(config)
with open(config.output_path + '/parameters.txt', 'w') as f:
with open(config.output_path + '/new_parameters.txt', 'w') as f:
f.write(config_parameters)
print(config)

pd.DataFrame({'id': ids, 'predicted_label': predictions, 'real_label': targets, 'losses': losses}).to_csv(
str(config.output_path) + '/new_text_label.csv')


def test_main(config, trial_number=None):
train_df, test_df, validation_df = make_dfs(config, )

Loading…
Cancel
Save