|
|
@@ -1,27 +1,27 @@ |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import math |
|
|
|
import torch |
|
|
|
import torch.nn as nn |
|
|
|
import torch.nn.init as init |
|
|
|
import numpy as np |
|
|
|
import pandas as pd |
|
|
|
import networkx as nx |
|
|
|
import scipy as sp |
|
|
|
import seaborn as sns |
|
|
|
# from node2vec import Node2Vec |
|
|
|
from sklearn.decomposition import PCA |
|
|
|
import copy |
|
|
|
import time |
|
|
|
import torch |
|
|
|
import torch.nn as nn |
|
|
|
import torch.nn.init as init |
|
|
|
import torch.nn.functional as F |
|
|
|
from torch.nn.parameter import Parameter |
|
|
|
from torch.nn.modules.module import Module |
|
|
|
from torch import Tensor |
|
|
|
|
|
|
|
if torch.cuda.is_available(): |
|
|
|
torch.device('cuda') |
|
|
|
|
|
|
|
"""Utils: |
|
|
|
Data Loader / Attention / Clones / Embedder""" |
|
|
|
""" |
|
|
|
Utils: |
|
|
|
Data Loader |
|
|
|
Feature Matrix Constructor |
|
|
|
Random Node Remover |
|
|
|
""" |
|
|
|
|
|
|
|
def Graph_load_batch(min_num_nodes=20, max_num_nodes=1000, name='ENZYMES', node_attributes=True, graph_labels=True): |
|
|
|
''' |
|
|
@@ -65,235 +65,54 @@ def Graph_load_batch(min_num_nodes=20, max_num_nodes=1000, name='ENZYMES', node_ |
|
|
|
print('Loaded') |
|
|
|
return graphs |
|
|
|
|
|
|
|
def attention(query, key, value, d_key): |
|
|
|
scores = torch.matmul(query, key.transpose(-2, -1)) / math.sqrt(d_key) |
|
|
|
output = torch.matmul(scores, value) |
|
|
|
output = nn.functional.softmax(output) |
|
|
|
return output |
|
|
|
|
|
|
|
def get_clones(module, N): |
|
|
|
return nn.ModuleList([copy.deepcopy(module) for i in range(N)]) |
|
|
|
|
|
|
|
def embedder(graph, dimensions=32, walk_length=8, num_walks=200, workers=4): |
|
|
|
node2vec = Node2Vec(graph, dimensions=dimensions, walk_length=walk_length, num_walks=num_walks, workers=workers) # Use temp_folder for big graphs |
|
|
|
model = node2vec.fit(window=10, min_count=1, batch_words=4) |
|
|
|
return model.wv.vectors |
|
|
|
|
|
|
|
graphs = Graph_load_batch(min_num_nodes=10, name='ENZYMES') |
|
|
|
|
|
|
|
# G = graphs[1] |
|
|
|
# vecs = embedder(G) |
|
|
|
|
|
|
|
# pca = PCA(n_components=2) |
|
|
|
# principalComponents = pca.fit_transform(vecs) |
|
|
|
# principalDf = pd.DataFrame(data = principalComponents |
|
|
|
# , columns = ['principal component 1', 'principal component 2']) |
|
|
|
# principalDf.index = list(G.nodes()) |
|
|
|
|
|
|
|
# sns.scatterplot(principalDf['principal component 1'], principalDf['principal component 2']) |
|
|
|
|
|
|
|
"""Sublayers""" |
|
|
|
|
|
|
|
class MultiHeadAttention(nn.Module): |
|
|
|
def __init__(self, heads, d_model, dropout = 0.1): |
|
|
|
super().__init__() |
|
|
|
self.d_model = d_model |
|
|
|
self.d_k = d_model // heads |
|
|
|
self.h = heads |
|
|
|
self.q_linear = nn.Linear(d_model, d_model).cuda() |
|
|
|
self.v_linear = nn.Linear(d_model, d_model).cuda() |
|
|
|
self.k_linear = nn.Linear(d_model, d_model).cuda() |
|
|
|
self.dropout = nn.Dropout(dropout) |
|
|
|
self.out = nn.Linear(d_model, d_model) |
|
|
|
|
|
|
|
def forward(self, q, k, v): |
|
|
|
# print(q, k, v) |
|
|
|
bs = q.size(0) |
|
|
|
# perform linear operation and split into h heads |
|
|
|
k = self.k_linear(k).view(bs, -1, self.h, self.d_k) |
|
|
|
q = self.q_linear(q).view(bs, -1, self.h, self.d_k) |
|
|
|
v = self.v_linear(v).view(bs, -1, self.h, self.d_k) |
|
|
|
# transpose to get dimensions bs * h * sl * d_model |
|
|
|
k = k.transpose(1,2) |
|
|
|
q = q.transpose(1,2) |
|
|
|
v = v.transpose(1,2) |
|
|
|
|
|
|
|
scores = attention(q, k, v, self.d_k) |
|
|
|
# concatenate heads and put through final linear layer |
|
|
|
concat = scores.transpose(1,2).contiguous().view(bs, -1, self.d_model) |
|
|
|
output = self.out(concat) |
|
|
|
|
|
|
|
return output |
|
|
|
|
|
|
|
class FeedForward(nn.Module): |
|
|
|
def __init__(self, d_model, d_ff=2048, dropout = 0.1): |
|
|
|
super().__init__() |
|
|
|
self.linear_1 = nn.Linear(d_model, d_ff).cuda() |
|
|
|
self.dropout = nn.Dropout(dropout) |
|
|
|
self.linear_2 = nn.Linear(d_ff, d_model).cuda() |
|
|
|
|
|
|
|
def forward(self, x): |
|
|
|
x = self.dropout(nn.functional.relu(self.linear_1(x))) |
|
|
|
x = self.linear_2(x) |
|
|
|
return x |
|
|
|
|
|
|
|
|
|
|
|
class Norm(nn.Module): |
|
|
|
def __init__(self, d_model, eps = 1e-6): |
|
|
|
super().__init__() |
|
|
|
self.size = d_model |
|
|
|
self.alpha = nn.Parameter(torch.ones(self.size)) |
|
|
|
self.bias = nn.Parameter(torch.zeros(self.size)) |
|
|
|
self.eps = eps |
|
|
|
|
|
|
|
def forward(self, x): |
|
|
|
norm = self.alpha * (x - x.mean(dim=-1, keepdim=True)) / (x.std(dim=-1, keepdim=True) + self.eps) + self.bias |
|
|
|
return norm |
|
|
|
|
|
|
|
"""Layers""" |
|
|
|
|
|
|
|
class EncoderLayer(nn.Module): |
|
|
|
def __init__(self, d_model, heads, dropout = 0.1): |
|
|
|
super().__init__() |
|
|
|
self.norm_1 = Norm(d_model) |
|
|
|
self.norm_2 = Norm(d_model) |
|
|
|
self.attn = MultiHeadAttention(heads, d_model) |
|
|
|
self.ff = FeedForward(d_model) |
|
|
|
self.dropout_1 = nn.Dropout(dropout) |
|
|
|
self.dropout_2 = nn.Dropout(dropout) |
|
|
|
|
|
|
|
def forward(self, x): |
|
|
|
# x2 = self.norm_1(x) |
|
|
|
x = x + self.dropout_1(self.attn(x,x,x)) |
|
|
|
# x2 = self.norm_2(x) |
|
|
|
x = x + self.dropout_2(self.ff(x)) |
|
|
|
return x |
|
|
|
|
|
|
|
class DecoderLayer(nn.Module): |
|
|
|
def __init__(self, d_model, heads, dropout=0.1): |
|
|
|
super().__init__() |
|
|
|
self.norm_1 = Norm(d_model) |
|
|
|
self.norm_2 = Norm(d_model) |
|
|
|
self.norm_3 = Norm(d_model) |
|
|
|
self.dropout_1 = nn.Dropout(dropout) |
|
|
|
self.dropout_2 = nn.Dropout(dropout) |
|
|
|
self.dropout_3 = nn.Dropout(dropout) |
|
|
|
self.attn_1 = MultiHeadAttention(heads, d_model) |
|
|
|
self.attn_2 = MultiHeadAttention(heads, d_model) |
|
|
|
self.ff = FeedForward(d_model).cuda() |
|
|
|
|
|
|
|
def forward(self, x, e_outputs): |
|
|
|
# x2 = self.norm_1(x) |
|
|
|
x = x + self.dropout_1(self.attn_1(x, x, x)) |
|
|
|
# x2 = self.norm_2(x) |
|
|
|
# x2 = self.norm_2(x) |
|
|
|
x = x + self.dropout_2(self.attn_2(x, e_outputs, e_outputs)) |
|
|
|
# x2 = self.norm_3(x) |
|
|
|
x = x + self.dropout_3(self.ff(x)) |
|
|
|
return x |
|
|
|
|
|
|
|
class Encoder(nn.Module): |
|
|
|
def __init__(self, vocab_size, d_model, N, heads): |
|
|
|
super().__init__() |
|
|
|
self.N = N |
|
|
|
self.layers = get_clones(EncoderLayer(d_model, heads), N) |
|
|
|
self.norm = Norm(d_model) |
|
|
|
|
|
|
|
def forward(self, src): |
|
|
|
x = src |
|
|
|
for i in range(N): |
|
|
|
x = self.layers[i](x) |
|
|
|
return self.norm(x) |
|
|
|
|
|
|
|
class Decoder(nn.Module): |
|
|
|
def __init__(self, data_size, d_model, N, heads): |
|
|
|
super().__init__() |
|
|
|
self.N = N |
|
|
|
self.layers = get_clones(DecoderLayer(d_model, heads), N) |
|
|
|
self.norm = Norm(d_model) |
|
|
|
|
|
|
|
def forward(self, trg, e_outputs): |
|
|
|
x = trg |
|
|
|
for i in range(self.N): |
|
|
|
x = self.layers[i](x, e_outputs) |
|
|
|
return self.norm(x) |
|
|
|
|
|
|
|
"""The Mighty Transformer""" |
|
|
|
|
|
|
|
class Transformer(nn.Module): |
|
|
|
def __init__(self, src_graph, trg_graph, d_model, N, heads): |
|
|
|
super().__init__() |
|
|
|
self.encoder = Encoder(src_graph, d_model, N, heads) |
|
|
|
self.decoder = Decoder(trg_graph, d_model, N, heads) |
|
|
|
self.out = nn.Linear(d_model, trg_graph) |
|
|
|
|
|
|
|
def forward(self, src, trg): |
|
|
|
e_outputs = self.encoder(src) |
|
|
|
d_output = self.decoder(trg, e_outputs) |
|
|
|
output = self.out(d_output) |
|
|
|
return output |
|
|
|
|
|
|
|
def feature_matrix(g): |
|
|
|
''' |
|
|
|
constructs the feautre matrix (N x 3) for the enzymes datasets |
|
|
|
''' |
|
|
|
esm = nx.get_node_attributes(g, 'label') |
|
|
|
piazche = np.zeros((len(esm), 3)) |
|
|
|
for i, (k, v) in enumerate(esm.items()): |
|
|
|
piazche[i][v-1] = 1 |
|
|
|
return piazche |
|
|
|
|
|
|
|
def remove_random_node(graph, max_size=40, min_size=10): |
|
|
|
if len(graph.nodes) >= max_size or len(graph.nodes) < min_size: |
|
|
|
return None |
|
|
|
relabeled_graph = nx.relabel.convert_node_labels_to_integers(graph) |
|
|
|
choice = np.random.choice(list(relabeled_graph.nodes)) |
|
|
|
remaining_graph = nx.to_numpy_matrix(relabeled_graph.subgraph(filter(lambda x: x != choice, list(relabeled_graph.nodes)))) |
|
|
|
removed_node = nx.to_numpy_matrix(relabeled_graph)[choice] |
|
|
|
graph_length = len(remaining_graph) |
|
|
|
source_graph = np.pad(remaining_graph, [(0, max_size - graph_length), (0, max_size - graph_length)]) |
|
|
|
target_graph = np.copy(source_graph) |
|
|
|
removed_node_row = np.asarray(removed_node)[0] |
|
|
|
target_graph[graph_length] = np.pad(removed_node_row, [(0, max_size - len(removed_node_row))]) |
|
|
|
return source_graph, target_graph |
|
|
|
|
|
|
|
converted_graphs = list(filter(lambda x: x is not None, [remove_random_node(graph) for graph in graphs])) |
|
|
|
source_graphs = torch.Tensor([graph[0] for graph in converted_graphs]) |
|
|
|
target_graphs = torch.Tensor([graph[1] for graph in converted_graphs]) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
d_model = 40 |
|
|
|
heads = 8 |
|
|
|
N = 6 |
|
|
|
src_size = len(source_graphs) |
|
|
|
trg_size = len(target_graphs) |
|
|
|
|
|
|
|
model = Transformer(src_size, trg_size, d_model, N, heads).cuda() |
|
|
|
|
|
|
|
#print(model) |
|
|
|
|
|
|
|
optim = torch.optim.Adam(model.parameters(), lr=0.0001, betas=(0.9, 0.98), eps=1e-9) |
|
|
|
|
|
|
|
|
|
|
|
def train_model(epoch, print_every=100): |
|
|
|
model.train() |
|
|
|
start = time.time() |
|
|
|
temp = start |
|
|
|
total_loss = 0 |
|
|
|
for i in range(epoch): |
|
|
|
|
|
|
|
src = source_graphs.cuda() |
|
|
|
trg = target_graphs.cuda() |
|
|
|
|
|
|
|
preds = model(src.float(), trg.float()) |
|
|
|
optim.zero_grad() |
|
|
|
loss = torch.nn.functional.cross_entropy(preds.view(preds.size(-1), -1), trg.view(trg.size(0), -1)) |
|
|
|
loss.backward() |
|
|
|
optim.step() |
|
|
|
total_loss += loss.data[0] |
|
|
|
if (i + 1) % print_every == 0: |
|
|
|
loss_avg = total_loss / print_every |
|
|
|
print("time = %dm, epoch %d, iter = %d, loss = %.3f,\ |
|
|
|
# %ds per %d iters" % ((time.time() - start) // 60,\ |
|
|
|
epoch + 1, i + 1, loss_avg, time.time() - temp,\ |
|
|
|
print_every)) |
|
|
|
total_loss = 0 |
|
|
|
temp = time.time() |
|
|
|
|
|
|
|
train_model(1, 1) |
|
|
|
|
|
|
|
#preds = model(source_graphs[0].cuda(), target_graphs[0].cuda()) |
|
|
|
#loss = torch.nn.functional.cross_entropy(preds.view(preds.size(-1), -1), target_graphs.view(target_graphs.size(0), -1)) |
|
|
|
# |
|
|
|
''' |
|
|
|
removes a random node from the gragh |
|
|
|
returns the remaining graph matrix and the removed node links |
|
|
|
''' |
|
|
|
if len(graph.nodes()) >= max_size or len(graph.nodes()) < min_size: |
|
|
|
return None |
|
|
|
relabeled_graph = nx.relabel.convert_node_labels_to_integers(graph) |
|
|
|
choice = np.random.choice(list(relabeled_graph.nodes())) |
|
|
|
remaining_graph = nx.to_numpy_matrix(relabeled_graph.subgraph(filter(lambda x: x != choice, list(relabeled_graph.nodes())))) |
|
|
|
removed_node = nx.to_numpy_matrix(relabeled_graph)[choice] |
|
|
|
graph_length = len(remaining_graph) |
|
|
|
# source_graph = np.pad(remaining_graph, [(0, max_size - graph_length), (0, max_size - graph_length)]) |
|
|
|
# target_graph = np.copy(source_graph) |
|
|
|
removed_node_row = np.asarray(removed_node)[0] |
|
|
|
# target_graph[graph_length] = np.pad(removed_node_row, [(0, max_size - len(removed_node_row))]) |
|
|
|
return remaining_graph, removed_node_row |
|
|
|
|
|
|
|
"""" |
|
|
|
Layers: |
|
|
|
GCN |
|
|
|
""" |
|
|
|
|
|
|
|
class GraphConv(nn.Module): |
|
|
|
def __init__(self, input_dim, output_dim): |
|
|
|
super().__init__() |
|
|
|
self.input_dim = input_dim |
|
|
|
self.output_dim = output_dim |
|
|
|
self.weight = nn.Parameter(torch.FloatTensor(input_dim, output_dim).cuda()) |
|
|
|
self.relu = nn.ReLU() |
|
|
|
|
|
|
|
def forward(self, x, adj): |
|
|
|
''' |
|
|
|
x is hamun feature matrix |
|
|
|
adj ham ke is adjacency matrix of the graph |
|
|
|
''' |
|
|
|
y = torch.matmul(adj, x) |
|
|
|
print(y.shape) |
|
|
|
print(self.weight.shape) |
|
|
|
y = torch.matmul(y, self.weight.double()) |
|
|
|
return y |