@@ -1,27 +1,27 @@ |
import math |
import torch |
import torch.nn as nn |
import torch.nn.init as init |
import numpy as np |
import pandas as pd |
import networkx as nx |
import scipy as sp |
import seaborn as sns |
# from node2vec import Node2Vec |
from sklearn.decomposition import PCA |
import copy |
import time |
import torch |
import torch.nn as nn |
import torch.nn.init as init |
import torch.nn.functional as F |
from torch.nn.parameter import Parameter |
from torch.nn.modules.module import Module |
from torch import Tensor |
if torch.cuda.is_available(): |
torch.device('cuda') |
"""Utils: |
Data Loader / Attention / Clones / Embedder""" |
""" |
Utils: |
Data Loader |
Feature Matrix Constructor |
Random Node Remover |
""" |
def Graph_load_batch(min_num_nodes=20, max_num_nodes=1000, name='ENZYMES', node_attributes=True, graph_labels=True): |
''' |
@@ -65,235 +65,54 @@ def Graph_load_batch(min_num_nodes=20, max_num_nodes=1000, name='ENZYMES', node_ |
print('Loaded') |
return graphs |
def attention(query, key, value, d_key): |
scores = torch.matmul(query, key.transpose(-2, -1)) / math.sqrt(d_key) |
output = torch.matmul(scores, value) |
output = nn.functional.softmax(output) |
return output |
def get_clones(module, N): |
return nn.ModuleList([copy.deepcopy(module) for i in range(N)]) |
def embedder(graph, dimensions=32, walk_length=8, num_walks=200, workers=4): |
node2vec = Node2Vec(graph, dimensions=dimensions, walk_length=walk_length, num_walks=num_walks, workers=workers) # Use temp_folder for big graphs |
model = node2vec.fit(window=10, min_count=1, batch_words=4) |
return model.wv.vectors |
graphs = Graph_load_batch(min_num_nodes=10, name='ENZYMES') |
# G = graphs[1] |
# vecs = embedder(G) |
# pca = PCA(n_components=2) |
# principalComponents = pca.fit_transform(vecs) |
# principalDf = pd.DataFrame(data = principalComponents |
# , columns = ['principal component 1', 'principal component 2']) |
# principalDf.index = list(G.nodes()) |
# sns.scatterplot(principalDf['principal component 1'], principalDf['principal component 2']) |
"""Sublayers""" |
class MultiHeadAttention(nn.Module): |
def __init__(self, heads, d_model, dropout = 0.1): |
super().__init__() |
self.d_model = d_model |
self.d_k = d_model // heads |
self.h = heads |
self.q_linear = nn.Linear(d_model, d_model).cuda() |
self.v_linear = nn.Linear(d_model, d_model).cuda() |
self.k_linear = nn.Linear(d_model, d_model).cuda() |
self.dropout = nn.Dropout(dropout) |
self.out = nn.Linear(d_model, d_model) |
def forward(self, q, k, v): |
# print(q, k, v) |
bs = q.size(0) |
# perform linear operation and split into h heads |
k = self.k_linear(k).view(bs, -1, self.h, self.d_k) |
q = self.q_linear(q).view(bs, -1, self.h, self.d_k) |
v = self.v_linear(v).view(bs, -1, self.h, self.d_k) |
# transpose to get dimensions bs * h * sl * d_model |
k = k.transpose(1,2) |
q = q.transpose(1,2) |
v = v.transpose(1,2) |
scores = attention(q, k, v, self.d_k) |
# concatenate heads and put through final linear layer |
concat = scores.transpose(1,2).contiguous().view(bs, -1, self.d_model) |
output = self.out(concat) |
return output |
class FeedForward(nn.Module): |
def __init__(self, d_model, d_ff=2048, dropout = 0.1): |
super().__init__() |
self.linear_1 = nn.Linear(d_model, d_ff).cuda() |
self.dropout = nn.Dropout(dropout) |
self.linear_2 = nn.Linear(d_ff, d_model).cuda() |
def forward(self, x): |
x = self.dropout(nn.functional.relu(self.linear_1(x))) |
x = self.linear_2(x) |
return x |
class Norm(nn.Module): |
def __init__(self, d_model, eps = 1e-6): |
super().__init__() |
self.size = d_model |
self.alpha = nn.Parameter(torch.ones(self.size)) |
self.bias = nn.Parameter(torch.zeros(self.size)) |
self.eps = eps |
def forward(self, x): |
norm = self.alpha * (x - x.mean(dim=-1, keepdim=True)) / (x.std(dim=-1, keepdim=True) + self.eps) + self.bias |
return norm |
"""Layers""" |
class EncoderLayer(nn.Module): |
def __init__(self, d_model, heads, dropout = 0.1): |
super().__init__() |
self.norm_1 = Norm(d_model) |
self.norm_2 = Norm(d_model) |
self.attn = MultiHeadAttention(heads, d_model) |
self.ff = FeedForward(d_model) |
self.dropout_1 = nn.Dropout(dropout) |
self.dropout_2 = nn.Dropout(dropout) |
def forward(self, x): |
# x2 = self.norm_1(x) |
x = x + self.dropout_1(self.attn(x,x,x)) |
# x2 = self.norm_2(x) |
x = x + self.dropout_2(self.ff(x)) |
return x |
class DecoderLayer(nn.Module): |
def __init__(self, d_model, heads, dropout=0.1): |
super().__init__() |
self.norm_1 = Norm(d_model) |
self.norm_2 = Norm(d_model) |
self.norm_3 = Norm(d_model) |
self.dropout_1 = nn.Dropout(dropout) |
self.dropout_2 = nn.Dropout(dropout) |
self.dropout_3 = nn.Dropout(dropout) |
self.attn_1 = MultiHeadAttention(heads, d_model) |
self.attn_2 = MultiHeadAttention(heads, d_model) |
self.ff = FeedForward(d_model).cuda() |
def forward(self, x, e_outputs): |
# x2 = self.norm_1(x) |
x = x + self.dropout_1(self.attn_1(x, x, x)) |
# x2 = self.norm_2(x) |
# x2 = self.norm_2(x) |
x = x + self.dropout_2(self.attn_2(x, e_outputs, e_outputs)) |
# x2 = self.norm_3(x) |
x = x + self.dropout_3(self.ff(x)) |
return x |
class Encoder(nn.Module): |
def __init__(self, vocab_size, d_model, N, heads): |
super().__init__() |
self.N = N |
self.layers = get_clones(EncoderLayer(d_model, heads), N) |
self.norm = Norm(d_model) |
def forward(self, src): |
x = src |
for i in range(N): |
x = self.layers[i](x) |
return self.norm(x) |
class Decoder(nn.Module): |
def __init__(self, data_size, d_model, N, heads): |
super().__init__() |
self.N = N |
self.layers = get_clones(DecoderLayer(d_model, heads), N) |
self.norm = Norm(d_model) |
def forward(self, trg, e_outputs): |
x = trg |
for i in range(self.N): |
x = self.layers[i](x, e_outputs) |
return self.norm(x) |
"""The Mighty Transformer""" |
class Transformer(nn.Module): |
def __init__(self, src_graph, trg_graph, d_model, N, heads): |
super().__init__() |
self.encoder = Encoder(src_graph, d_model, N, heads) |
self.decoder = Decoder(trg_graph, d_model, N, heads) |
self.out = nn.Linear(d_model, trg_graph) |
def forward(self, src, trg): |
e_outputs = self.encoder(src) |
d_output = self.decoder(trg, e_outputs) |
output = self.out(d_output) |
return output |
def feature_matrix(g): |
''' |
constructs the feautre matrix (N x 3) for the enzymes datasets |
''' |
esm = nx.get_node_attributes(g, 'label') |
piazche = np.zeros((len(esm), 3)) |
for i, (k, v) in enumerate(esm.items()): |
piazche[i][v-1] = 1 |
return piazche |
def remove_random_node(graph, max_size=40, min_size=10): |
if len(graph.nodes) >= max_size or len(graph.nodes) < min_size: |
return None |
relabeled_graph = nx.relabel.convert_node_labels_to_integers(graph) |
choice = np.random.choice(list(relabeled_graph.nodes)) |
remaining_graph = nx.to_numpy_matrix(relabeled_graph.subgraph(filter(lambda x: x != choice, list(relabeled_graph.nodes)))) |
removed_node = nx.to_numpy_matrix(relabeled_graph)[choice] |
graph_length = len(remaining_graph) |
source_graph = np.pad(remaining_graph, [(0, max_size - graph_length), (0, max_size - graph_length)]) |
target_graph = np.copy(source_graph) |
removed_node_row = np.asarray(removed_node)[0] |
target_graph[graph_length] = np.pad(removed_node_row, [(0, max_size - len(removed_node_row))]) |
return source_graph, target_graph |
converted_graphs = list(filter(lambda x: x is not None, [remove_random_node(graph) for graph in graphs])) |
source_graphs = torch.Tensor([graph[0] for graph in converted_graphs]) |
target_graphs = torch.Tensor([graph[1] for graph in converted_graphs]) |
d_model = 40 |
heads = 8 |
N = 6 |
src_size = len(source_graphs) |
trg_size = len(target_graphs) |
model = Transformer(src_size, trg_size, d_model, N, heads).cuda() |
#print(model) |
optim = torch.optim.Adam(model.parameters(), lr=0.0001, betas=(0.9, 0.98), eps=1e-9) |
def train_model(epoch, print_every=100): |
model.train() |
start = time.time() |
temp = start |
total_loss = 0 |
for i in range(epoch): |
src = source_graphs.cuda() |
trg = target_graphs.cuda() |
preds = model(src.float(), trg.float()) |
optim.zero_grad() |
loss = torch.nn.functional.cross_entropy(preds.view(preds.size(-1), -1), trg.view(trg.size(0), -1)) |
loss.backward() |
optim.step() |
total_loss += loss.data[0] |
if (i + 1) % print_every == 0: |
loss_avg = total_loss / print_every |
print("time = %dm, epoch %d, iter = %d, loss = %.3f,\ |
# %ds per %d iters" % ((time.time() - start) // 60,\ |
epoch + 1, i + 1, loss_avg, time.time() - temp,\ |
print_every)) |
total_loss = 0 |
temp = time.time() |
train_model(1, 1) |
#preds = model(source_graphs[0].cuda(), target_graphs[0].cuda()) |
#loss = torch.nn.functional.cross_entropy(preds.view(preds.size(-1), -1), target_graphs.view(target_graphs.size(0), -1)) |
# |
''' |
removes a random node from the gragh |
returns the remaining graph matrix and the removed node links |
''' |
if len(graph.nodes()) >= max_size or len(graph.nodes()) < min_size: |
return None |
relabeled_graph = nx.relabel.convert_node_labels_to_integers(graph) |
choice = np.random.choice(list(relabeled_graph.nodes())) |
remaining_graph = nx.to_numpy_matrix(relabeled_graph.subgraph(filter(lambda x: x != choice, list(relabeled_graph.nodes())))) |
removed_node = nx.to_numpy_matrix(relabeled_graph)[choice] |
graph_length = len(remaining_graph) |
# source_graph = np.pad(remaining_graph, [(0, max_size - graph_length), (0, max_size - graph_length)]) |
# target_graph = np.copy(source_graph) |
removed_node_row = np.asarray(removed_node)[0] |
# target_graph[graph_length] = np.pad(removed_node_row, [(0, max_size - len(removed_node_row))]) |
return remaining_graph, removed_node_row |
"""" |
Layers: |
""" |
class GraphConv(nn.Module): |
def __init__(self, input_dim, output_dim): |
super().__init__() |
self.input_dim = input_dim |
self.output_dim = output_dim |
self.weight = nn.Parameter(torch.FloatTensor(input_dim, output_dim).cuda()) |
self.relu = nn.ReLU() |
def forward(self, x, adj): |
''' |
x is hamun feature matrix |
adj ham ke is adjacency matrix of the graph |
''' |
y = torch.matmul(adj, x) |
print(y.shape) |
print(self.weight.shape) |
y = torch.matmul(y, self.weight.double()) |
return y |