Browse Source

some changes to store losses and embeddings

master
Faeze 1 year ago
parent
commit
8a7e663616
5 changed files with 51 additions and 53 deletions
  1. 2
    1
      .gitignore
  2. 5
    5
      data/weibo/config.py
  3. 12
    1
      evaluation.py
  4. 5
    3
      learner.py
  5. 27
    43
      test_main.py

+ 2
- 1
.gitignore View File



.idea/ .idea/
env/ env/
venv/
venv/
results/

+ 5
- 5
data/weibo/config.py View File

validation_text_path = data_path + 'weibo_train.csv' validation_text_path = data_path + 'weibo_train.csv'
test_text_path = data_path + 'weibo_test.csv' test_text_path = data_path + 'weibo_test.csv'


batch_size = 64
batch_size = 100
epochs = 100 epochs = 100
num_workers = 4
num_workers = 1
head_lr = 1e-03 head_lr = 1e-03
image_encoder_lr = 1e-02 image_encoder_lr = 1e-02
text_encoder_lr = 1e-05 text_encoder_lr = 1e-05


image_model_name = '../../../../../media/external_10TB/10TB/ghorbanpoor/vit-base-patch16-224' image_model_name = '../../../../../media/external_10TB/10TB/ghorbanpoor/vit-base-patch16-224'
image_embedding = 768 image_embedding = 768
text_encoder_model = "../../../../../media/external_10TB/10TB/ghorbanpoor/chinese-xlnet-base"
text_encoder_model = "../../../../../media/external_10TB/10TB/ghorbanpoor/xlnet-base-chinese"
# text_encoder_model = "/home/faeze/PycharmProjects/new_fake_news_detectioin/bert/bert-base-uncased" # text_encoder_model = "/home/faeze/PycharmProjects/new_fake_news_detectioin/bert/bert-base-uncased"
text_tokenizer = "../../../../../media/external_10TB/10TB/ghorbanpoor/chinese-xlnet-base"
text_tokenizer = "../../../../../media/external_10TB/10TB/ghorbanpoor/xlnet-base-chinese"
# text_tokenizer = "/home/faeze/PycharmProjects/new_fake_news_detectioin/bert/bert-base-uncased" # text_tokenizer = "/home/faeze/PycharmProjects/new_fake_news_detectioin/bert/bert-base-uncased"
text_embedding = 768 text_embedding = 768
max_length = 200 max_length = 200


labels = ['real', 'fake'] labels = ['real', 'fake']


wanted_accuracy = 0.80
wanted_accuracy = 0.85


def optuna(self, trial): def optuna(self, trial):
self.head_lr = trial.suggest_loguniform('head_lr', 1e-5, 1e-1) self.head_lr = trial.suggest_loguniform('head_lr', 1e-5, 1e-1)

+ 12
- 1
evaluation.py View File



# config.writer.add_embedding(mat=x, label_img=y, metadata=z, tag=fname) # config.writer.add_embedding(mat=x, label_img=y, metadata=z, tag=fname)



def plot_tsne(config, x, y, fname='tsne.png'): def plot_tsne(config, x, y, fname='tsne.png'):
x = [i.cpu().numpy() for i in x] x = [i.cpu().numpy() for i in x]
y = [i.cpu().numpy() for i in y] y = [i.cpu().numpy() for i in y]
ax.legend(fontsize='large', markerscale=2) ax.legend(fontsize='large', markerscale=2)
plt.title('tsne of ' + str(fname.split('/')[-1].split('.')[0])) plt.title('tsne of ' + str(fname.split('/')[-1].split('.')[0]))
plt.savefig(fname=fname) plt.savefig(fname=fname)
# plt.show()
plt.show()


def save_embedding(config, x, fname='embedding.tsv'):
x = [i.cpu().numpy() for i in x]

x = np.concatenate(x, axis=0)

embedding_df = pd.DataFrame(x)

embedding_df.to_csv(fname, sep='\t', index=False, header=False)




def plot_pca(config, x, y, fname='pca.png'): def plot_pca(config, x, y, fname='pca.png'):

+ 5
- 3
learner.py View File



def batch_constructor(config, batch): def batch_constructor(config, batch):
b = {} b = {}
for k, v in batch.items():
if k != 'text':
b[k] = v.to(config.device)
for key, value in batch.items():
if key != 'text':
b[key] = value.to(config.device)
else:
b[key] = value
return b return b





+ 27
- 43
test_main.py View File

import random import random


import numpy as np import numpy as np
import pandas as pd
import torch import torch
from tqdm import tqdm from tqdm import tqdm


from data_loaders import make_dfs, build_loaders from data_loaders import make_dfs, build_loaders
from evaluation import metrics, report_per_class, roc_auc_plot, precision_recall_plot, plot_tsne, plot_pca
from evaluation import metrics, report_per_class, roc_auc_plot, precision_recall_plot, plot_tsne, plot_pca, \
save_embedding
from learner import batch_constructor from learner import batch_constructor
from model import FakeNewsModel
from model import FakeNewsModel, calculate_loss




def test(config, test_loader, trial_number=None): def test(config, test_loader, trial_number=None):
targets = [] targets = []
predictions = [] predictions = []
scores = [] scores = []
ids = []
losses = []
tqdm_object = tqdm(test_loader, total=len(test_loader)) tqdm_object = tqdm(test_loader, total=len(test_loader))
for i, batch in enumerate(tqdm_object): for i, batch in enumerate(tqdm_object):
batch = batch_constructor(config, batch) batch = batch_constructor(config, batch)
with torch.no_grad(): with torch.no_grad():
output, score = model(batch) output, score = model(batch)

prediction = output.detach()
predictions.append(prediction)

score = score.detach()
scores.append(score)

target = batch['label'].detach()
targets.append(target)

image_feature = model.image_embeddings.detach()
image_features.append(image_feature)

text_feature = model.text_embeddings.detach()
text_features.append(text_feature)

multimodal_feature = model.multimodal_embeddings.detach()
multimodal_features.append(multimodal_feature)

concat_feature = model.classifier.embeddings.detach()
concat_features.append(concat_feature)

# config.writer.add_graph(model, input_to_model=batch, verbose=True)
loss, c_loss, s_loss = calculate_loss(model, score, batch['label'])

predictions.append(output.detach())
scores.append(score.detach())
targets.append(batch['label'].detach())
ids.append(batch['id'].detach())
image_features.append(model.image_embeddings.detach())
text_features.append(model.text_embeddings.detach())
multimodal_features.append(model.multimodal_embeddings.detach())
concat_features.append(model.classifier.embeddings.detach())
losses.append((loss.detach(), c_loss.detach(), s_loss.detach()))


s = '' s = ''
s += report_per_class(targets, predictions) + '\n' s += report_per_class(targets, predictions) + '\n'
s += metrics(targets, predictions, scores, file_path=str(config.output_path) + '/fpr_tpr.csv') + '\n'
with open(config.output_path + '/results.txt', 'w') as f:
s += metrics(targets, predictions, scores, file_path=str(config.output_path) + '/new_fpr_tpr.csv') + '\n'
with open(config.output_path + '/new_results.txt', 'w') as f:
f.write(s) f.write(s)


roc_auc_plot(targets, scores, fname=str(config.output_path) + "/roc.png") roc_auc_plot(targets, scores, fname=str(config.output_path) + "/roc.png")
precision_recall_plot(targets, scores, fname=str(config.output_path) + "/pr.png") precision_recall_plot(targets, scores, fname=str(config.output_path) + "/pr.png")


# saving_in_tensorboard(config, image_features, targets, 'image_features')
plot_tsne(config, image_features, targets, fname=str(config.output_path) + '/image_features_tsne.png')
plot_pca(config, image_features, targets, fname=str(config.output_path) + '/image_features_pca.png')

# saving_in_tensorboard(config, text_features, targets, 'text_features')
plot_tsne(config, text_features, targets, fname=str(config.output_path) + '/text_features_tsne.png')
plot_pca(config, text_features, targets, fname=str(config.output_path) + '/text_features_pca.png')
#
# saving_in_tensorboard(config, multimodal_features, targets, 'multimodal_features')
plot_tsne(config, multimodal_features, targets, fname=str(config.output_path) + '/multimodal_features_tsne.png')
plot_pca(config, multimodal_features, targets, fname=str(config.output_path) + '/multimodal_features_pca.png')

# saving_in_tensorboard(config, concat_features, targets, 'concat_features')
plot_tsne(config, concat_features, targets, fname=str(config.output_path) + '/concat_features_tsne.png')
plot_pca(config, concat_features, targets, fname=str(config.output_path) + '/concat_features_pca.png')
save_embedding(config, image_features, fname=str(config.output_path) + '/new_image_features.tsv')
save_embedding(config, text_features, fname=str(config.output_path) + '/new_text_features.tsv')
save_embedding(config, multimodal_features, fname=str(config.output_path) + '/new_multimodal_features_.tsv')
save_embedding(config, concat_features, fname=str(config.output_path) + '/new_concat_features.tsv')


config_parameters = str(config) config_parameters = str(config)
with open(config.output_path + '/parameters.txt', 'w') as f:
with open(config.output_path + '/new_parameters.txt', 'w') as f:
f.write(config_parameters) f.write(config_parameters)
print(config) print(config)


pd.DataFrame({'id': ids, 'predicted_label': predictions, 'real_label': targets, 'losses': losses}).to_csv(
str(config.output_path) + '/new_text_label.csv')



def test_main(config, trial_number=None): def test_main(config, trial_number=None):
train_df, test_df, validation_df = make_dfs(config, ) train_df, test_df, validation_df = make_dfs(config, )

Loading…
Cancel
Save