|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import math |
|
|
|
|
|
import torch |
|
|
|
|
|
import torch.nn as nn |
|
|
|
|
|
import torch.nn.init as init |
|
|
|
|
|
import numpy as np |
|
|
|
|
|
import pandas as pd |
|
|
|
|
|
import networkx as nx |
|
|
|
|
|
import scipy as sp |
|
|
|
|
|
import seaborn as sns |
|
|
|
|
|
# from node2vec import Node2Vec |
|
|
|
|
|
from sklearn.decomposition import PCA |
|
|
|
|
|
import copy |
|
|
|
|
|
import time |
|
|
|
|
|
|
|
|
|
|
|
if torch.cuda.is_available(): |
|
|
|
|
|
torch.device('cuda') |
|
|
|
|
|
|
|
|
|
|
|
"""Utils: |
|
|
|
|
|
Data Loader / Attention / Clones / Embedder""" |
|
|
|
|
|
|
|
|
|
|
|
def Graph_load_batch(min_num_nodes=20, max_num_nodes=1000, name='ENZYMES', node_attributes=True, graph_labels=True): |
|
|
|
|
|
''' |
|
|
|
|
|
load many graphs, e.g. enzymes |
|
|
|
|
|
:return: a list of graphs |
|
|
|
|
|
''' |
|
|
|
|
|
print('Loading graph dataset: ' + str(name)) |
|
|
|
|
|
G = nx.Graph() |
|
|
|
|
|
# load data |
|
|
|
|
|
# path = '../dataset/' + name + '/' |
|
|
|
|
|
path = '/content/gdrive/My Drive/' + name + '/' |
|
|
|
|
|
data_adj = np.loadtxt(path + name + '_A.txt', delimiter=',').astype(int) |
|
|
|
|
|
if node_attributes: |
|
|
|
|
|
data_node_att = np.loadtxt(path + name + '_node_attributes.txt', delimiter=',') |
|
|
|
|
|
data_node_label = np.loadtxt(path + name + '_node_labels.txt', delimiter=',').astype(int) |
|
|
|
|
|
data_graph_indicator = np.loadtxt(path + name + '_graph_indicator.txt', delimiter=',').astype(int) |
|
|
|
|
|
if graph_labels: |
|
|
|
|
|
data_graph_labels = np.loadtxt(path + name + '_graph_labels.txt', delimiter=',').astype(int) |
|
|
|
|
|
data_tuple = list(map(tuple, data_adj)) |
|
|
|
|
|
G.add_edges_from(data_tuple) |
|
|
|
|
|
for i in range(data_node_label.shape[0]): |
|
|
|
|
|
if node_attributes: |
|
|
|
|
|
G.add_node(i + 1, feature=data_node_att[i]) |
|
|
|
|
|
G.add_node(i + 1, label=data_node_label[i]) |
|
|
|
|
|
G.remove_nodes_from(list(nx.isolates(G))) |
|
|
|
|
|
graph_num = data_graph_indicator.max() |
|
|
|
|
|
node_list = np.arange(data_graph_indicator.shape[0]) + 1 |
|
|
|
|
|
graphs = [] |
|
|
|
|
|
max_nodes = 0 |
|
|
|
|
|
for i in range(graph_num): |
|
|
|
|
|
nodes = node_list[data_graph_indicator == i + 1] |
|
|
|
|
|
G_sub = G.subgraph(nodes) |
|
|
|
|
|
if graph_labels: |
|
|
|
|
|
G_sub.graph['label'] = data_graph_labels[i] |
|
|
|
|
|
|
|
|
|
|
|
if G_sub.number_of_nodes() >= min_num_nodes and G_sub.number_of_nodes() <= max_num_nodes: |
|
|
|
|
|
graphs.append(G_sub) |
|
|
|
|
|
if G_sub.number_of_nodes() > max_nodes: |
|
|
|
|
|
max_nodes = G_sub.number_of_nodes() |
|
|
|
|
|
|
|
|
|
|
|
print('Loaded') |
|
|
|
|
|
return graphs |
|
|
|
|
|
|
|
|
|
|
|
def attention(query, key, value, d_key): |
|
|
|
|
|
scores = torch.matmul(query, key.transpose(-2, -1)) / math.sqrt(d_key) |
|
|
|
|
|
output = torch.matmul(scores, value) |
|
|
|
|
|
output = nn.functional.softmax(output) |
|
|
|
|
|
return output |
|
|
|
|
|
|
|
|
|
|
|
def get_clones(module, N): |
|
|
|
|
|
return nn.ModuleList([copy.deepcopy(module) for i in range(N)]) |
|
|
|
|
|
|
|
|
|
|
|
def embedder(graph, dimensions=32, walk_length=8, num_walks=200, workers=4): |
|
|
|
|
|
node2vec = Node2Vec(graph, dimensions=dimensions, walk_length=walk_length, num_walks=num_walks, workers=workers) # Use temp_folder for big graphs |
|
|
|
|
|
model = node2vec.fit(window=10, min_count=1, batch_words=4) |
|
|
|
|
|
return model.wv.vectors |
|
|
|
|
|
|
|
|
|
|
|
graphs = Graph_load_batch(min_num_nodes=10, name='ENZYMES') |
|
|
|
|
|
|
|
|
|
|
|
# G = graphs[1] |
|
|
|
|
|
# vecs = embedder(G) |
|
|
|
|
|
|
|
|
|
|
|
# pca = PCA(n_components=2) |
|
|
|
|
|
# principalComponents = pca.fit_transform(vecs) |
|
|
|
|
|
# principalDf = pd.DataFrame(data = principalComponents |
|
|
|
|
|
# , columns = ['principal component 1', 'principal component 2']) |
|
|
|
|
|
# principalDf.index = list(G.nodes()) |
|
|
|
|
|
|
|
|
|
|
|
# sns.scatterplot(principalDf['principal component 1'], principalDf['principal component 2']) |
|
|
|
|
|
|
|
|
|
|
|
"""Sublayers""" |
|
|
|
|
|
|
|
|
|
|
|
class MultiHeadAttention(nn.Module): |
|
|
|
|
|
def __init__(self, heads, d_model, dropout = 0.1): |
|
|
|
|
|
super().__init__() |
|
|
|
|
|
self.d_model = d_model |
|
|
|
|
|
self.d_k = d_model // heads |
|
|
|
|
|
self.h = heads |
|
|
|
|
|
self.q_linear = nn.Linear(d_model, d_model).cuda() |
|
|
|
|
|
self.v_linear = nn.Linear(d_model, d_model).cuda() |
|
|
|
|
|
self.k_linear = nn.Linear(d_model, d_model).cuda() |
|
|
|
|
|
self.dropout = nn.Dropout(dropout) |
|
|
|
|
|
self.out = nn.Linear(d_model, d_model) |
|
|
|
|
|
|
|
|
|
|
|
def forward(self, q, k, v): |
|
|
|
|
|
# print(q, k, v) |
|
|
|
|
|
bs = q.size(0) |
|
|
|
|
|
# perform linear operation and split into h heads |
|
|
|
|
|
k = self.k_linear(k).view(bs, -1, self.h, self.d_k) |
|
|
|
|
|
q = self.q_linear(q).view(bs, -1, self.h, self.d_k) |
|
|
|
|
|
v = self.v_linear(v).view(bs, -1, self.h, self.d_k) |
|
|
|
|
|
# transpose to get dimensions bs * h * sl * d_model |
|
|
|
|
|
k = k.transpose(1,2) |
|
|
|
|
|
q = q.transpose(1,2) |
|
|
|
|
|
v = v.transpose(1,2) |
|
|
|
|
|
|
|
|
|
|
|
scores = attention(q, k, v, self.d_k) |
|
|
|
|
|
# concatenate heads and put through final linear layer |
|
|
|
|
|
concat = scores.transpose(1,2).contiguous().view(bs, -1, self.d_model) |
|
|
|
|
|
output = self.out(concat) |
|
|
|
|
|
|
|
|
|
|
|
return output |
|
|
|
|
|
|
|
|
|
|
|
class FeedForward(nn.Module): |
|
|
|
|
|
def __init__(self, d_model, d_ff=2048, dropout = 0.1): |
|
|
|
|
|
super().__init__() |
|
|
|
|
|
self.linear_1 = nn.Linear(d_model, d_ff).cuda() |
|
|
|
|
|
self.dropout = nn.Dropout(dropout) |
|
|
|
|
|
self.linear_2 = nn.Linear(d_ff, d_model).cuda() |
|
|
|
|
|
|
|
|
|
|
|
def forward(self, x): |
|
|
|
|
|
x = self.dropout(nn.functional.relu(self.linear_1(x))) |
|
|
|
|
|
x = self.linear_2(x) |
|
|
|
|
|
return x |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class Norm(nn.Module): |
|
|
|
|
|
def __init__(self, d_model, eps = 1e-6): |
|
|
|
|
|
super().__init__() |
|
|
|
|
|
self.size = d_model |
|
|
|
|
|
self.alpha = nn.Parameter(torch.ones(self.size)) |
|
|
|
|
|
self.bias = nn.Parameter(torch.zeros(self.size)) |
|
|
|
|
|
self.eps = eps |
|
|
|
|
|
|
|
|
|
|
|
def forward(self, x): |
|
|
|
|
|
norm = self.alpha * (x - x.mean(dim=-1, keepdim=True)) / (x.std(dim=-1, keepdim=True) + self.eps) + self.bias |
|
|
|
|
|
return norm |
|
|
|
|
|
|
|
|
|
|
|
"""Layers""" |
|
|
|
|
|
|
|
|
|
|
|
class EncoderLayer(nn.Module): |
|
|
|
|
|
def __init__(self, d_model, heads, dropout = 0.1): |
|
|
|
|
|
super().__init__() |
|
|
|
|
|
self.norm_1 = Norm(d_model) |
|
|
|
|
|
self.norm_2 = Norm(d_model) |
|
|
|
|
|
self.attn = MultiHeadAttention(heads, d_model) |
|
|
|
|
|
self.ff = FeedForward(d_model) |
|
|
|
|
|
self.dropout_1 = nn.Dropout(dropout) |
|
|
|
|
|
self.dropout_2 = nn.Dropout(dropout) |
|
|
|
|
|
|
|
|
|
|
|
def forward(self, x): |
|
|
|
|
|
# x2 = self.norm_1(x) |
|
|
|
|
|
x = x + self.dropout_1(self.attn(x,x,x)) |
|
|
|
|
|
# x2 = self.norm_2(x) |
|
|
|
|
|
x = x + self.dropout_2(self.ff(x)) |
|
|
|
|
|
return x |
|
|
|
|
|
|
|
|
|
|
|
class DecoderLayer(nn.Module): |
|
|
|
|
|
def __init__(self, d_model, heads, dropout=0.1): |
|
|
|
|
|
super().__init__() |
|
|
|
|
|
self.norm_1 = Norm(d_model) |
|
|
|
|
|
self.norm_2 = Norm(d_model) |
|
|
|
|
|
self.norm_3 = Norm(d_model) |
|
|
|
|
|
self.dropout_1 = nn.Dropout(dropout) |
|
|
|
|
|
self.dropout_2 = nn.Dropout(dropout) |
|
|
|
|
|
self.dropout_3 = nn.Dropout(dropout) |
|
|
|
|
|
self.attn_1 = MultiHeadAttention(heads, d_model) |
|
|
|
|
|
self.attn_2 = MultiHeadAttention(heads, d_model) |
|
|
|
|
|
self.ff = FeedForward(d_model).cuda() |
|
|
|
|
|
|
|
|
|
|
|
def forward(self, x, e_outputs): |
|
|
|
|
|
# x2 = self.norm_1(x) |
|
|
|
|
|
x = x + self.dropout_1(self.attn_1(x, x, x)) |
|
|
|
|
|
# x2 = self.norm_2(x) |
|
|
|
|
|
# x2 = self.norm_2(x) |
|
|
|
|
|
x = x + self.dropout_2(self.attn_2(x, e_outputs, e_outputs)) |
|
|
|
|
|
# x2 = self.norm_3(x) |
|
|
|
|
|
x = x + self.dropout_3(self.ff(x)) |
|
|
|
|
|
return x |
|
|
|
|
|
|
|
|
|
|
|
class Encoder(nn.Module): |
|
|
|
|
|
def __init__(self, vocab_size, d_model, N, heads): |
|
|
|
|
|
super().__init__() |
|
|
|
|
|
self.N = N |
|
|
|
|
|
self.layers = get_clones(EncoderLayer(d_model, heads), N) |
|
|
|
|
|
self.norm = Norm(d_model) |
|
|
|
|
|
|
|
|
|
|
|
def forward(self, src): |
|
|
|
|
|
x = src |
|
|
|
|
|
for i in range(N): |
|
|
|
|
|
x = self.layers[i](x) |
|
|
|
|
|
return self.norm(x) |
|
|
|
|
|
|
|
|
|
|
|
class Decoder(nn.Module): |
|
|
|
|
|
def __init__(self, data_size, d_model, N, heads): |
|
|
|
|
|
super().__init__() |
|
|
|
|
|
self.N = N |
|
|
|
|
|
self.layers = get_clones(DecoderLayer(d_model, heads), N) |
|
|
|
|
|
self.norm = Norm(d_model) |
|
|
|
|
|
|
|
|
|
|
|
def forward(self, trg, e_outputs): |
|
|
|
|
|
x = trg |
|
|
|
|
|
for i in range(self.N): |
|
|
|
|
|
x = self.layers[i](x, e_outputs) |
|
|
|
|
|
return self.norm(x) |
|
|
|
|
|
|
|
|
|
|
|
"""The Mighty Transformer""" |
|
|
|
|
|
|
|
|
|
|
|
class Transformer(nn.Module): |
|
|
|
|
|
def __init__(self, src_graph, trg_graph, d_model, N, heads): |
|
|
|
|
|
super().__init__() |
|
|
|
|
|
self.encoder = Encoder(src_graph, d_model, N, heads) |
|
|
|
|
|
self.decoder = Decoder(trg_graph, d_model, N, heads) |
|
|
|
|
|
self.out = nn.Linear(d_model, trg_graph) |
|
|
|
|
|
|
|
|
|
|
|
def forward(self, src, trg): |
|
|
|
|
|
e_outputs = self.encoder(src) |
|
|
|
|
|
d_output = self.decoder(trg, e_outputs) |
|
|
|
|
|
output = self.out(d_output) |
|
|
|
|
|
return output |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def remove_random_node(graph, max_size=40, min_size=10): |
|
|
|
|
|
if len(graph.nodes) >= max_size or len(graph.nodes) < min_size: |
|
|
|
|
|
return None |
|
|
|
|
|
relabeled_graph = nx.relabel.convert_node_labels_to_integers(graph) |
|
|
|
|
|
choice = np.random.choice(list(relabeled_graph.nodes)) |
|
|
|
|
|
remaining_graph = nx.to_numpy_matrix(relabeled_graph.subgraph(filter(lambda x: x != choice, list(relabeled_graph.nodes)))) |
|
|
|
|
|
removed_node = nx.to_numpy_matrix(relabeled_graph)[choice] |
|
|
|
|
|
graph_length = len(remaining_graph) |
|
|
|
|
|
source_graph = np.pad(remaining_graph, [(0, max_size - graph_length), (0, max_size - graph_length)]) |
|
|
|
|
|
target_graph = np.copy(source_graph) |
|
|
|
|
|
removed_node_row = np.asarray(removed_node)[0] |
|
|
|
|
|
target_graph[graph_length] = np.pad(removed_node_row, [(0, max_size - len(removed_node_row))]) |
|
|
|
|
|
return source_graph, target_graph |
|
|
|
|
|
|
|
|
|
|
|
converted_graphs = list(filter(lambda x: x is not None, [remove_random_node(graph) for graph in graphs])) |
|
|
|
|
|
source_graphs = torch.Tensor([graph[0] for graph in converted_graphs]) |
|
|
|
|
|
target_graphs = torch.Tensor([graph[1] for graph in converted_graphs]) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
d_model = 40 |
|
|
|
|
|
heads = 8 |
|
|
|
|
|
N = 6 |
|
|
|
|
|
src_size = len(source_graphs) |
|
|
|
|
|
trg_size = len(target_graphs) |
|
|
|
|
|
|
|
|
|
|
|
model = Transformer(src_size, trg_size, d_model, N, heads).cuda() |
|
|
|
|
|
|
|
|
|
|
|
#print(model) |
|
|
|
|
|
|
|
|
|
|
|
optim = torch.optim.Adam(model.parameters(), lr=0.0001, betas=(0.9, 0.98), eps=1e-9) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def train_model(epoch, print_every=100): |
|
|
|
|
|
model.train() |
|
|
|
|
|
start = time.time() |
|
|
|
|
|
temp = start |
|
|
|
|
|
total_loss = 0 |
|
|
|
|
|
for i in range(epoch): |
|
|
|
|
|
|
|
|
|
|
|
src = source_graphs.cuda() |
|
|
|
|
|
trg = target_graphs.cuda() |
|
|
|
|
|
|
|
|
|
|
|
preds = model(src.float(), trg.float()) |
|
|
|
|
|
optim.zero_grad() |
|
|
|
|
|
loss = torch.nn.functional.cross_entropy(preds.view(preds.size(-1), -1), trg.view(trg.size(0), -1)) |
|
|
|
|
|
loss.backward() |
|
|
|
|
|
optim.step() |
|
|
|
|
|
total_loss += loss.data[0] |
|
|
|
|
|
if (i + 1) % print_every == 0: |
|
|
|
|
|
loss_avg = total_loss / print_every |
|
|
|
|
|
print("time = %dm, epoch %d, iter = %d, loss = %.3f,\ |
|
|
|
|
|
# %ds per %d iters" % ((time.time() - start) // 60,\ |
|
|
|
|
|
epoch + 1, i + 1, loss_avg, time.time() - temp,\ |
|
|
|
|
|
print_every)) |
|
|
|
|
|
total_loss = 0 |
|
|
|
|
|
temp = time.time() |
|
|
|
|
|
|
|
|
|
|
|
train_model(1, 1) |
|
|
|
|
|
|
|
|
|
|
|
#preds = model(source_graphs[0].cuda(), target_graphs[0].cuda()) |
|
|
|
|
|
#loss = torch.nn.functional.cross_entropy(preds.view(preds.size(-1), -1), target_graphs.view(target_graphs.size(0), -1)) |
|
|
|
|
|
# |