|
|
|
|
|
|
|
|
|
|
|
import os |
|
|
|
|
|
import argparse |
|
|
|
|
|
import torch |
|
|
|
|
|
import torch.nn as nn |
|
|
|
|
|
import numpy as np |
|
|
|
|
|
import pickle |
|
|
|
|
|
from wd_gc import WD_GCN |
|
|
|
|
|
from data_loader import DataLoader |
|
|
|
|
|
from utils import f1_score |
|
|
|
|
|
|
|
|
|
|
|
parser = argparse.ArgumentParser() |
|
|
|
|
|
# directories |
|
|
|
|
|
parser.add_argument('--data_root', type=str, default='data/', help='path to the root of data directory') |
|
|
|
|
|
parser.add_argument('--dataset', type=str, default='bitcoin_alpha', help='dataset name') |
|
|
|
|
|
parser.add_argument('--dataset_mat_file_path', type=str, default='Bitcoin_Alpha/saved_content_bitcoin_alpha.mat', help='dataset mat file path') |
|
|
|
|
|
parser.add_argument('--save_dir', type=str, default='results_edge_classification_bitcoin_alpha/', help='path to save directory') |
|
|
|
|
|
# model params |
|
|
|
|
|
parser.add_argument('--hidden_feat', type=list, default=[6, 2], help='hidden feature sizes') |
|
|
|
|
|
parser.add_argument('--time_partitioning', type=list, help='time partitioning for train/test/val split') |
|
|
|
|
|
# training params |
|
|
|
|
|
parser.add_argument('--epochs', type=int, default=1001, help='number of epochs') |
|
|
|
|
|
parser.add_argument('--optimizer', type=str, default='SGD', help='optimizer') |
|
|
|
|
|
parser.add_argument('--learning_rate', type=float, default=0.01, help='learning rate') |
|
|
|
|
|
parser.add_argument('--momentum', type=float, default=0.9, help='momentum') |
|
|
|
|
|
parser.add_argument('--alpha_vec', type=list, |
|
|
|
|
|
default=[.75, .76, .77, .78, .79, .80, .81, .82, .83, .84, .85, |
|
|
|
|
|
.86, .87, .88, .89, .90, .91, .92, .93, .94, .95], help='alpha rate for weighted cross-entropy') |
|
|
|
|
|
args = parser.parse_args() |
|
|
|
|
|
|
|
|
|
|
|
print('__pyTorch VERSION:', torch.__version__) |
|
|
|
|
|
print('# of available devices ', torch.cuda.device_count()) |
|
|
|
|
|
print('Is cuda GPU available? ', torch.cuda.is_available()) |
|
|
|
|
|
print('========================') |
|
|
|
|
|
if torch.cuda.is_available(): |
|
|
|
|
|
dev = "cuda:1" |
|
|
|
|
|
else: |
|
|
|
|
|
dev = "cpu" |
|
|
|
|
|
device = torch.device(dev) |
|
|
|
|
|
torch.cuda.set_device(device) |
|
|
|
|
|
print('Current cuda device', torch.cuda.current_device()) |
|
|
|
|
|
|
|
|
|
|
|
# Make the save directory |
|
|
|
|
|
if not os.path.exists(args.save_dir): |
|
|
|
|
|
os.makedirs(args.save_dir) |
|
|
|
|
|
|
|
|
|
|
|
time_slices = args.time_partitioning |
|
|
|
|
|
S_train, S_val, S_test = time_slices[0], time_slices[1], time_slices[2] |
|
|
|
|
|
|
|
|
|
|
|
# Create data loader |
|
|
|
|
|
|
|
|
|
|
|
data_loader = DataLoader(S_train=S_train, S_val=S_val, S_test=S_test, |
|
|
|
|
|
data_dir=args.data_root, |
|
|
|
|
|
mat_file_path=args.dataset_mat_file_path) |
|
|
|
|
|
|
|
|
|
|
|
# load data |
|
|
|
|
|
data, C, targets, edges = data_loader.load_data() |
|
|
|
|
|
|
|
|
|
|
|
# train |
|
|
|
|
|
print("Training Started...") |
|
|
|
|
|
for alpha in args.alpha_vec: |
|
|
|
|
|
print(">> alpha = {}".format(alpha)) |
|
|
|
|
|
class_weights = torch.tensor([alpha, 1.0 - alpha]).to(device) |
|
|
|
|
|
save_res_fname = "results_WDGCN" + "_w" + str(round(float(class_weights[0]*100))) + "_" + args.dataset |
|
|
|
|
|
|
|
|
|
|
|
# model definition |
|
|
|
|
|
gcn = WD_GCN(C['C_train'], data['X_train'].to(device), edges["edges_train"].to(device), args.hidden_feat) |
|
|
|
|
|
gcn.to(device) |
|
|
|
|
|
|
|
|
|
|
|
if args.optimizer == "SGD": |
|
|
|
|
|
optimizer = torch.optim.SGD(gcn.parameters(), lr=args.learning_rate, momentum=args.momentum) |
|
|
|
|
|
criterion = nn.CrossEntropyLoss(weight=class_weights) |
|
|
|
|
|
ep_acc_loss = np.zeros((args.epochs, 12)) # (precision_train, recall_train, f1_train, loss_train, precision_val, recall_val, f1_val, loss_val, precision_test, recall_test, f1_test, loss_test) |
|
|
|
|
|
for ep in range(args.epochs): |
|
|
|
|
|
# compute loss and take step |
|
|
|
|
|
optimizer.zero_grad() |
|
|
|
|
|
output_train = gcn(C['C_train'], data['X_train'].to(device), edges["edges_train"].to(device)) # forward passing |
|
|
|
|
|
loss_train = criterion(output_train, targets['target_train'].to(device)) |
|
|
|
|
|
loss_train.backward() # backward propagation |
|
|
|
|
|
optimizer.step() |
|
|
|
|
|
|
|
|
|
|
|
with torch.no_grad(): |
|
|
|
|
|
prediction_train = torch.argmax(output_train, dim=1) |
|
|
|
|
|
f1_train, precision_train, recall_train = f1_score(prediction_train, targets['target_train'].to(device)) |
|
|
|
|
|
if ep % 100 == 0: |
|
|
|
|
|
# validation |
|
|
|
|
|
output_val = gcn(C['C_val'], data['X_val'].to(device), edges["edges_val"].to(device)) |
|
|
|
|
|
prediction_val = torch.argmax(output_val, dim=1) |
|
|
|
|
|
f1_val, precision_val, recall_val = f1_score(prediction_val, targets['target_val'].to(device)) |
|
|
|
|
|
loss_val = criterion(output_val, targets['target_val'].to(device)) |
|
|
|
|
|
|
|
|
|
|
|
# test |
|
|
|
|
|
output_test = gcn(C['C_test'], data['X_test'].to(device), edges["edges_test"].to(device)) |
|
|
|
|
|
prediction_test = torch.argmax(output_test, dim=1) |
|
|
|
|
|
f1_test, precision_test, recall_test = f1_score(prediction_test, targets['target_test'].to(device)) |
|
|
|
|
|
loss_test = criterion(output_test, targets['target_test'].to(device)) |
|
|
|
|
|
|
|
|
|
|
|
# log |
|
|
|
|
|
print("Epoch %d:" % ep) |
|
|
|
|
|
print("%d/%d: Train precision/recall/f1 %.4f/ %.4f/ %.4f. Train loss %.4f." % (ep, args.epochs, precision_train, recall_train, f1_train, loss_train)) |
|
|
|
|
|
print("%d/%d: Val precision/recall/f1 %.4f/ %.4f/ %.4f. Val loss %.4f." % (ep, args.epochs, precision_val, recall_val, f1_val, loss_val)) |
|
|
|
|
|
print("%d/%d: Test precision/recall/f1 %.4f/ %.4f/ %.4f. Test loss %.4f." % (ep, args.epochs, precision_test, recall_test, f1_test, loss_test)) |
|
|
|
|
|
|
|
|
|
|
|
ep_acc_loss[ep] = [precision_train, recall_train, f1_train, loss_train, |
|
|
|
|
|
precision_val, recall_val, f1_val, loss_val, |
|
|
|
|
|
precision_test, recall_test, f1_test, loss_test] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
pickle.dump(ep_acc_loss, open(args.save_dir + save_res_fname, "wb")) |