| @@ -0,0 +1,299 @@ | |||
| import math | |||
| import torch | |||
| import torch.nn as nn | |||
| import torch.nn.init as init | |||
| import numpy as np | |||
| import pandas as pd | |||
| import networkx as nx | |||
| import scipy as sp | |||
| import seaborn as sns | |||
| # from node2vec import Node2Vec | |||
| from sklearn.decomposition import PCA | |||
| import copy | |||
| import time | |||
| if torch.cuda.is_available(): | |||
| torch.device('cuda') | |||
| """Utils: | |||
| Data Loader / Attention / Clones / Embedder""" | |||
| def Graph_load_batch(min_num_nodes=20, max_num_nodes=1000, name='ENZYMES', node_attributes=True, graph_labels=True): | |||
| ''' | |||
| load many graphs, e.g. enzymes | |||
| :return: a list of graphs | |||
| ''' | |||
| print('Loading graph dataset: ' + str(name)) | |||
| G = nx.Graph() | |||
| # load data | |||
| # path = '../dataset/' + name + '/' | |||
| path = '/content/gdrive/My Drive/' + name + '/' | |||
| data_adj = np.loadtxt(path + name + '_A.txt', delimiter=',').astype(int) | |||
| if node_attributes: | |||
| data_node_att = np.loadtxt(path + name + '_node_attributes.txt', delimiter=',') | |||
| data_node_label = np.loadtxt(path + name + '_node_labels.txt', delimiter=',').astype(int) | |||
| data_graph_indicator = np.loadtxt(path + name + '_graph_indicator.txt', delimiter=',').astype(int) | |||
| if graph_labels: | |||
| data_graph_labels = np.loadtxt(path + name + '_graph_labels.txt', delimiter=',').astype(int) | |||
| data_tuple = list(map(tuple, data_adj)) | |||
| G.add_edges_from(data_tuple) | |||
| for i in range(data_node_label.shape[0]): | |||
| if node_attributes: | |||
| G.add_node(i + 1, feature=data_node_att[i]) | |||
| G.add_node(i + 1, label=data_node_label[i]) | |||
| G.remove_nodes_from(list(nx.isolates(G))) | |||
| graph_num = data_graph_indicator.max() | |||
| node_list = np.arange(data_graph_indicator.shape[0]) + 1 | |||
| graphs = [] | |||
| max_nodes = 0 | |||
| for i in range(graph_num): | |||
| nodes = node_list[data_graph_indicator == i + 1] | |||
| G_sub = G.subgraph(nodes) | |||
| if graph_labels: | |||
| G_sub.graph['label'] = data_graph_labels[i] | |||
| if G_sub.number_of_nodes() >= min_num_nodes and G_sub.number_of_nodes() <= max_num_nodes: | |||
| graphs.append(G_sub) | |||
| if G_sub.number_of_nodes() > max_nodes: | |||
| max_nodes = G_sub.number_of_nodes() | |||
| print('Loaded') | |||
| return graphs | |||
| def attention(query, key, value, d_key): | |||
| scores = torch.matmul(query, key.transpose(-2, -1)) / math.sqrt(d_key) | |||
| output = torch.matmul(scores, value) | |||
| output = nn.functional.softmax(output) | |||
| return output | |||
| def get_clones(module, N): | |||
| return nn.ModuleList([copy.deepcopy(module) for i in range(N)]) | |||
| def embedder(graph, dimensions=32, walk_length=8, num_walks=200, workers=4): | |||
| node2vec = Node2Vec(graph, dimensions=dimensions, walk_length=walk_length, num_walks=num_walks, workers=workers) # Use temp_folder for big graphs | |||
| model = node2vec.fit(window=10, min_count=1, batch_words=4) | |||
| return model.wv.vectors | |||
| graphs = Graph_load_batch(min_num_nodes=10, name='ENZYMES') | |||
| # G = graphs[1] | |||
| # vecs = embedder(G) | |||
| # pca = PCA(n_components=2) | |||
| # principalComponents = pca.fit_transform(vecs) | |||
| # principalDf = pd.DataFrame(data = principalComponents | |||
| # , columns = ['principal component 1', 'principal component 2']) | |||
| # principalDf.index = list(G.nodes()) | |||
| # sns.scatterplot(principalDf['principal component 1'], principalDf['principal component 2']) | |||
| """Sublayers""" | |||
| class MultiHeadAttention(nn.Module): | |||
| def __init__(self, heads, d_model, dropout = 0.1): | |||
| super().__init__() | |||
| self.d_model = d_model | |||
| self.d_k = d_model // heads | |||
| self.h = heads | |||
| self.q_linear = nn.Linear(d_model, d_model).cuda() | |||
| self.v_linear = nn.Linear(d_model, d_model).cuda() | |||
| self.k_linear = nn.Linear(d_model, d_model).cuda() | |||
| self.dropout = nn.Dropout(dropout) | |||
| self.out = nn.Linear(d_model, d_model) | |||
| def forward(self, q, k, v): | |||
| # print(q, k, v) | |||
| bs = q.size(0) | |||
| # perform linear operation and split into h heads | |||
| k = self.k_linear(k).view(bs, -1, self.h, self.d_k) | |||
| q = self.q_linear(q).view(bs, -1, self.h, self.d_k) | |||
| v = self.v_linear(v).view(bs, -1, self.h, self.d_k) | |||
| # transpose to get dimensions bs * h * sl * d_model | |||
| k = k.transpose(1,2) | |||
| q = q.transpose(1,2) | |||
| v = v.transpose(1,2) | |||
| scores = attention(q, k, v, self.d_k) | |||
| # concatenate heads and put through final linear layer | |||
| concat = scores.transpose(1,2).contiguous().view(bs, -1, self.d_model) | |||
| output = self.out(concat) | |||
| return output | |||
| class FeedForward(nn.Module): | |||
| def __init__(self, d_model, d_ff=2048, dropout = 0.1): | |||
| super().__init__() | |||
| self.linear_1 = nn.Linear(d_model, d_ff).cuda() | |||
| self.dropout = nn.Dropout(dropout) | |||
| self.linear_2 = nn.Linear(d_ff, d_model).cuda() | |||
| def forward(self, x): | |||
| x = self.dropout(nn.functional.relu(self.linear_1(x))) | |||
| x = self.linear_2(x) | |||
| return x | |||
| class Norm(nn.Module): | |||
| def __init__(self, d_model, eps = 1e-6): | |||
| super().__init__() | |||
| self.size = d_model | |||
| self.alpha = nn.Parameter(torch.ones(self.size)) | |||
| self.bias = nn.Parameter(torch.zeros(self.size)) | |||
| self.eps = eps | |||
| def forward(self, x): | |||
| norm = self.alpha * (x - x.mean(dim=-1, keepdim=True)) / (x.std(dim=-1, keepdim=True) + self.eps) + self.bias | |||
| return norm | |||
| """Layers""" | |||
| class EncoderLayer(nn.Module): | |||
| def __init__(self, d_model, heads, dropout = 0.1): | |||
| super().__init__() | |||
| self.norm_1 = Norm(d_model) | |||
| self.norm_2 = Norm(d_model) | |||
| self.attn = MultiHeadAttention(heads, d_model) | |||
| self.ff = FeedForward(d_model) | |||
| self.dropout_1 = nn.Dropout(dropout) | |||
| self.dropout_2 = nn.Dropout(dropout) | |||
| def forward(self, x): | |||
| # x2 = self.norm_1(x) | |||
| x = x + self.dropout_1(self.attn(x,x,x)) | |||
| # x2 = self.norm_2(x) | |||
| x = x + self.dropout_2(self.ff(x)) | |||
| return x | |||
| class DecoderLayer(nn.Module): | |||
| def __init__(self, d_model, heads, dropout=0.1): | |||
| super().__init__() | |||
| self.norm_1 = Norm(d_model) | |||
| self.norm_2 = Norm(d_model) | |||
| self.norm_3 = Norm(d_model) | |||
| self.dropout_1 = nn.Dropout(dropout) | |||
| self.dropout_2 = nn.Dropout(dropout) | |||
| self.dropout_3 = nn.Dropout(dropout) | |||
| self.attn_1 = MultiHeadAttention(heads, d_model) | |||
| self.attn_2 = MultiHeadAttention(heads, d_model) | |||
| self.ff = FeedForward(d_model).cuda() | |||
| def forward(self, x, e_outputs): | |||
| # x2 = self.norm_1(x) | |||
| x = x + self.dropout_1(self.attn_1(x, x, x)) | |||
| # x2 = self.norm_2(x) | |||
| # x2 = self.norm_2(x) | |||
| x = x + self.dropout_2(self.attn_2(x, e_outputs, e_outputs)) | |||
| # x2 = self.norm_3(x) | |||
| x = x + self.dropout_3(self.ff(x)) | |||
| return x | |||
| class Encoder(nn.Module): | |||
| def __init__(self, vocab_size, d_model, N, heads): | |||
| super().__init__() | |||
| self.N = N | |||
| self.layers = get_clones(EncoderLayer(d_model, heads), N) | |||
| self.norm = Norm(d_model) | |||
| def forward(self, src): | |||
| x = src | |||
| for i in range(N): | |||
| x = self.layers[i](x) | |||
| return self.norm(x) | |||
| class Decoder(nn.Module): | |||
| def __init__(self, data_size, d_model, N, heads): | |||
| super().__init__() | |||
| self.N = N | |||
| self.layers = get_clones(DecoderLayer(d_model, heads), N) | |||
| self.norm = Norm(d_model) | |||
| def forward(self, trg, e_outputs): | |||
| x = trg | |||
| for i in range(self.N): | |||
| x = self.layers[i](x, e_outputs) | |||
| return self.norm(x) | |||
| """The Mighty Transformer""" | |||
| class Transformer(nn.Module): | |||
| def __init__(self, src_graph, trg_graph, d_model, N, heads): | |||
| super().__init__() | |||
| self.encoder = Encoder(src_graph, d_model, N, heads) | |||
| self.decoder = Decoder(trg_graph, d_model, N, heads) | |||
| self.out = nn.Linear(d_model, trg_graph) | |||
| def forward(self, src, trg): | |||
| e_outputs = self.encoder(src) | |||
| d_output = self.decoder(trg, e_outputs) | |||
| output = self.out(d_output) | |||
| return output | |||
| def remove_random_node(graph, max_size=40, min_size=10): | |||
| if len(graph.nodes) >= max_size or len(graph.nodes) < min_size: | |||
| return None | |||
| relabeled_graph = nx.relabel.convert_node_labels_to_integers(graph) | |||
| choice = np.random.choice(list(relabeled_graph.nodes)) | |||
| remaining_graph = nx.to_numpy_matrix(relabeled_graph.subgraph(filter(lambda x: x != choice, list(relabeled_graph.nodes)))) | |||
| removed_node = nx.to_numpy_matrix(relabeled_graph)[choice] | |||
| graph_length = len(remaining_graph) | |||
| source_graph = np.pad(remaining_graph, [(0, max_size - graph_length), (0, max_size - graph_length)]) | |||
| target_graph = np.copy(source_graph) | |||
| removed_node_row = np.asarray(removed_node)[0] | |||
| target_graph[graph_length] = np.pad(removed_node_row, [(0, max_size - len(removed_node_row))]) | |||
| return source_graph, target_graph | |||
| converted_graphs = list(filter(lambda x: x is not None, [remove_random_node(graph) for graph in graphs])) | |||
| source_graphs = torch.Tensor([graph[0] for graph in converted_graphs]) | |||
| target_graphs = torch.Tensor([graph[1] for graph in converted_graphs]) | |||
| d_model = 40 | |||
| heads = 8 | |||
| N = 6 | |||
| src_size = len(source_graphs) | |||
| trg_size = len(target_graphs) | |||
| model = Transformer(src_size, trg_size, d_model, N, heads).cuda() | |||
| #print(model) | |||
| optim = torch.optim.Adam(model.parameters(), lr=0.0001, betas=(0.9, 0.98), eps=1e-9) | |||
| def train_model(epoch, print_every=100): | |||
| model.train() | |||
| start = time.time() | |||
| temp = start | |||
| total_loss = 0 | |||
| for i in range(epoch): | |||
| src = source_graphs.cuda() | |||
| trg = target_graphs.cuda() | |||
| preds = model(src.float(), trg.float()) | |||
| optim.zero_grad() | |||
| loss = torch.nn.functional.cross_entropy(preds.view(preds.size(-1), -1), trg.view(trg.size(0), -1)) | |||
| loss.backward() | |||
| optim.step() | |||
| total_loss += loss.data[0] | |||
| if (i + 1) % print_every == 0: | |||
| loss_avg = total_loss / print_every | |||
| print("time = %dm, epoch %d, iter = %d, loss = %.3f,\ | |||
| # %ds per %d iters" % ((time.time() - start) // 60,\ | |||
| epoch + 1, i + 1, loss_avg, time.time() - temp,\ | |||
| print_every)) | |||
| total_loss = 0 | |||
| temp = time.time() | |||
| train_model(1, 1) | |||
| #preds = model(source_graphs[0].cuda(), target_graphs[0].cuda()) | |||
| #loss = torch.nn.functional.cross_entropy(preds.view(preds.size(-1), -1), target_graphs.view(target_graphs.size(0), -1)) | |||
| # | |||