import math import torch import torch.nn as nn import torch.nn.init as init import numpy as np import pandas as pd import networkx as nx import scipy as sp import seaborn as sns # from node2vec import Node2Vec from sklearn.decomposition import PCA import copy import time if torch.cuda.is_available(): torch.device('cuda') """Utils: Data Loader / Attention / Clones / Embedder""" def Graph_load_batch(min_num_nodes=20, max_num_nodes=1000, name='ENZYMES', node_attributes=True, graph_labels=True): ''' load many graphs, e.g. enzymes :return: a list of graphs ''' print('Loading graph dataset: ' + str(name)) G = nx.Graph() # load data # path = '../dataset/' + name + '/' path = '/content/gdrive/My Drive/' + name + '/' data_adj = np.loadtxt(path + name + '_A.txt', delimiter=',').astype(int) if node_attributes: data_node_att = np.loadtxt(path + name + '_node_attributes.txt', delimiter=',') data_node_label = np.loadtxt(path + name + '_node_labels.txt', delimiter=',').astype(int) data_graph_indicator = np.loadtxt(path + name + '_graph_indicator.txt', delimiter=',').astype(int) if graph_labels: data_graph_labels = np.loadtxt(path + name + '_graph_labels.txt', delimiter=',').astype(int) data_tuple = list(map(tuple, data_adj)) G.add_edges_from(data_tuple) for i in range(data_node_label.shape[0]): if node_attributes: G.add_node(i + 1, feature=data_node_att[i]) G.add_node(i + 1, label=data_node_label[i]) G.remove_nodes_from(list(nx.isolates(G))) graph_num = data_graph_indicator.max() node_list = np.arange(data_graph_indicator.shape[0]) + 1 graphs = [] max_nodes = 0 for i in range(graph_num): nodes = node_list[data_graph_indicator == i + 1] G_sub = G.subgraph(nodes) if graph_labels: G_sub.graph['label'] = data_graph_labels[i] if G_sub.number_of_nodes() >= min_num_nodes and G_sub.number_of_nodes() <= max_num_nodes: graphs.append(G_sub) if G_sub.number_of_nodes() > max_nodes: max_nodes = G_sub.number_of_nodes() print('Loaded') return graphs def attention(query, key, value, d_key): scores = torch.matmul(query, key.transpose(-2, -1)) / math.sqrt(d_key) output = torch.matmul(scores, value) output = nn.functional.softmax(output) return output def get_clones(module, N): return nn.ModuleList([copy.deepcopy(module) for i in range(N)]) def embedder(graph, dimensions=32, walk_length=8, num_walks=200, workers=4): node2vec = Node2Vec(graph, dimensions=dimensions, walk_length=walk_length, num_walks=num_walks, workers=workers) # Use temp_folder for big graphs model = node2vec.fit(window=10, min_count=1, batch_words=4) return model.wv.vectors graphs = Graph_load_batch(min_num_nodes=10, name='ENZYMES') # G = graphs[1] # vecs = embedder(G) # pca = PCA(n_components=2) # principalComponents = pca.fit_transform(vecs) # principalDf = pd.DataFrame(data = principalComponents # , columns = ['principal component 1', 'principal component 2']) # principalDf.index = list(G.nodes()) # sns.scatterplot(principalDf['principal component 1'], principalDf['principal component 2']) """Sublayers""" class MultiHeadAttention(nn.Module): def __init__(self, heads, d_model, dropout = 0.1): super().__init__() self.d_model = d_model self.d_k = d_model // heads self.h = heads self.q_linear = nn.Linear(d_model, d_model).cuda() self.v_linear = nn.Linear(d_model, d_model).cuda() self.k_linear = nn.Linear(d_model, d_model).cuda() self.dropout = nn.Dropout(dropout) self.out = nn.Linear(d_model, d_model) def forward(self, q, k, v): # print(q, k, v) bs = q.size(0) # perform linear operation and split into h heads k = self.k_linear(k).view(bs, -1, self.h, self.d_k) q = self.q_linear(q).view(bs, -1, self.h, self.d_k) v = self.v_linear(v).view(bs, -1, self.h, self.d_k) # transpose to get dimensions bs * h * sl * d_model k = k.transpose(1,2) q = q.transpose(1,2) v = v.transpose(1,2) scores = attention(q, k, v, self.d_k) # concatenate heads and put through final linear layer concat = scores.transpose(1,2).contiguous().view(bs, -1, self.d_model) output = self.out(concat) return output class FeedForward(nn.Module): def __init__(self, d_model, d_ff=2048, dropout = 0.1): super().__init__() self.linear_1 = nn.Linear(d_model, d_ff).cuda() self.dropout = nn.Dropout(dropout) self.linear_2 = nn.Linear(d_ff, d_model).cuda() def forward(self, x): x = self.dropout(nn.functional.relu(self.linear_1(x))) x = self.linear_2(x) return x class Norm(nn.Module): def __init__(self, d_model, eps = 1e-6): super().__init__() self.size = d_model self.alpha = nn.Parameter(torch.ones(self.size)) self.bias = nn.Parameter(torch.zeros(self.size)) self.eps = eps def forward(self, x): norm = self.alpha * (x - x.mean(dim=-1, keepdim=True)) / (x.std(dim=-1, keepdim=True) + self.eps) + self.bias return norm """Layers""" class EncoderLayer(nn.Module): def __init__(self, d_model, heads, dropout = 0.1): super().__init__() self.norm_1 = Norm(d_model) self.norm_2 = Norm(d_model) self.attn = MultiHeadAttention(heads, d_model) self.ff = FeedForward(d_model) self.dropout_1 = nn.Dropout(dropout) self.dropout_2 = nn.Dropout(dropout) def forward(self, x): # x2 = self.norm_1(x) x = x + self.dropout_1(self.attn(x,x,x)) # x2 = self.norm_2(x) x = x + self.dropout_2(self.ff(x)) return x class DecoderLayer(nn.Module): def __init__(self, d_model, heads, dropout=0.1): super().__init__() self.norm_1 = Norm(d_model) self.norm_2 = Norm(d_model) self.norm_3 = Norm(d_model) self.dropout_1 = nn.Dropout(dropout) self.dropout_2 = nn.Dropout(dropout) self.dropout_3 = nn.Dropout(dropout) self.attn_1 = MultiHeadAttention(heads, d_model) self.attn_2 = MultiHeadAttention(heads, d_model) self.ff = FeedForward(d_model).cuda() def forward(self, x, e_outputs): # x2 = self.norm_1(x) x = x + self.dropout_1(self.attn_1(x, x, x)) # x2 = self.norm_2(x) # x2 = self.norm_2(x) x = x + self.dropout_2(self.attn_2(x, e_outputs, e_outputs)) # x2 = self.norm_3(x) x = x + self.dropout_3(self.ff(x)) return x class Encoder(nn.Module): def __init__(self, vocab_size, d_model, N, heads): super().__init__() self.N = N self.layers = get_clones(EncoderLayer(d_model, heads), N) self.norm = Norm(d_model) def forward(self, src): x = src for i in range(N): x = self.layers[i](x) return self.norm(x) class Decoder(nn.Module): def __init__(self, data_size, d_model, N, heads): super().__init__() self.N = N self.layers = get_clones(DecoderLayer(d_model, heads), N) self.norm = Norm(d_model) def forward(self, trg, e_outputs): x = trg for i in range(self.N): x = self.layers[i](x, e_outputs) return self.norm(x) """The Mighty Transformer""" class Transformer(nn.Module): def __init__(self, src_graph, trg_graph, d_model, N, heads): super().__init__() self.encoder = Encoder(src_graph, d_model, N, heads) self.decoder = Decoder(trg_graph, d_model, N, heads) self.out = nn.Linear(d_model, trg_graph) def forward(self, src, trg): e_outputs = self.encoder(src) d_output = self.decoder(trg, e_outputs) output = self.out(d_output) return output def remove_random_node(graph, max_size=40, min_size=10): if len(graph.nodes) >= max_size or len(graph.nodes) < min_size: return None relabeled_graph = nx.relabel.convert_node_labels_to_integers(graph) choice = np.random.choice(list(relabeled_graph.nodes)) remaining_graph = nx.to_numpy_matrix(relabeled_graph.subgraph(filter(lambda x: x != choice, list(relabeled_graph.nodes)))) removed_node = nx.to_numpy_matrix(relabeled_graph)[choice] graph_length = len(remaining_graph) source_graph = np.pad(remaining_graph, [(0, max_size - graph_length), (0, max_size - graph_length)]) target_graph = np.copy(source_graph) removed_node_row = np.asarray(removed_node)[0] target_graph[graph_length] = np.pad(removed_node_row, [(0, max_size - len(removed_node_row))]) return source_graph, target_graph converted_graphs = list(filter(lambda x: x is not None, [remove_random_node(graph) for graph in graphs])) source_graphs = torch.Tensor([graph[0] for graph in converted_graphs]) target_graphs = torch.Tensor([graph[1] for graph in converted_graphs]) d_model = 40 heads = 8 N = 6 src_size = len(source_graphs) trg_size = len(target_graphs) model = Transformer(src_size, trg_size, d_model, N, heads).cuda() #print(model) optim = torch.optim.Adam(model.parameters(), lr=0.0001, betas=(0.9, 0.98), eps=1e-9) def train_model(epoch, print_every=100): model.train() start = time.time() temp = start total_loss = 0 for i in range(epoch): src = source_graphs.cuda() trg = target_graphs.cuda() preds = model(src.float(), trg.float()) optim.zero_grad() loss = torch.nn.functional.cross_entropy(preds.view(preds.size(-1), -1), trg.view(trg.size(0), -1)) loss.backward() optim.step() total_loss += loss.data[0] if (i + 1) % print_every == 0: loss_avg = total_loss / print_every print("time = %dm, epoch %d, iter = %d, loss = %.3f,\ # %ds per %d iters" % ((time.time() - start) // 60,\ epoch + 1, i + 1, loss_avg, time.time() - temp,\ print_every)) total_loss = 0 temp = time.time() train_model(1, 1) #preds = model(source_graphs[0].cuda(), target_graphs[0].cuda()) #loss = torch.nn.functional.cross_entropy(preds.view(preds.size(-1), -1), target_graphs.view(target_graphs.size(0), -1)) #