5 years ago · fc0c8df7bb
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1 @@
 .DS_Store
--- a/GraphTransformer.py
+++ b/GraphTransformer.py
@@ -1,27 +1,27 @@





 import math
 import torch
 import torch.nn as nn
 import torch.nn.init as init
 import numpy as np
 import pandas as pd
 import networkx as nx
 import scipy as sp
 import seaborn as sns
 # from node2vec import Node2Vec
 from sklearn.decomposition import PCA
 import copy
 import time
 import torch
 import torch.nn as nn
 import torch.nn.init as init
 import torch.nn.functional as F
 from torch.nn.parameter import Parameter
 from torch.nn.modules.module import Module
 from torch import Tensor

 if torch.cuda.is_available():
  torch.device('cuda')

 """Utils:
 Data Loader / Attention / Clones / Embedder"""
 """
 Utils:
 Data Loader
 Feature Matrix Constructor
 Random Node Remover
 """

 def Graph_load_batch(min_num_nodes=20, max_num_nodes=1000, name='ENZYMES', node_attributes=True, graph_labels=True):
    '''
@@ -65,235 +65,54 @@ def Graph_load_batch(min_num_nodes=20, max_num_nodes=1000, name='ENZYMES', node_
    print('Loaded')
    return graphs

 def attention(query, key, value, d_key):
  scores = torch.matmul(query, key.transpose(-2, -1)) /  math.sqrt(d_key)
  output = torch.matmul(scores, value)
  output = nn.functional.softmax(output)
  return output

 def get_clones(module, N):
    return nn.ModuleList([copy.deepcopy(module) for i in range(N)])

 def embedder(graph, dimensions=32, walk_length=8, num_walks=200, workers=4):
  node2vec = Node2Vec(graph, dimensions=dimensions, walk_length=walk_length, num_walks=num_walks, workers=workers)  # Use temp_folder for big graphs
  model = node2vec.fit(window=10, min_count=1, batch_words=4)
  return model.wv.vectors

 graphs = Graph_load_batch(min_num_nodes=10, name='ENZYMES')

 # G = graphs[1]
 # vecs = embedder(G)

 # pca = PCA(n_components=2)
 # principalComponents = pca.fit_transform(vecs)
 # principalDf = pd.DataFrame(data = principalComponents
 #              , columns = ['principal component 1', 'principal component 2'])
 # principalDf.index = list(G.nodes())

 # sns.scatterplot(principalDf['principal component 1'], principalDf['principal component 2'])

 """Sublayers"""

 class MultiHeadAttention(nn.Module):
  def __init__(self, heads, d_model, dropout = 0.1):
    super().__init__()
    self.d_model = d_model
    self.d_k = d_model // heads
    self.h = heads
    self.q_linear = nn.Linear(d_model, d_model).cuda()
    self.v_linear = nn.Linear(d_model, d_model).cuda()
    self.k_linear = nn.Linear(d_model, d_model).cuda()
    self.dropout = nn.Dropout(dropout)
    self.out = nn.Linear(d_model, d_model)

  def forward(self, q, k, v):
    # print(q, k, v)
    bs = q.size(0)
    # perform linear operation and split into h heads
    k = self.k_linear(k).view(bs, -1, self.h, self.d_k)
    q = self.q_linear(q).view(bs, -1, self.h, self.d_k)
    v = self.v_linear(v).view(bs, -1, self.h, self.d_k)
    # transpose to get dimensions bs * h * sl * d_model
    k = k.transpose(1,2)
    q = q.transpose(1,2)
    v = v.transpose(1,2)

    scores = attention(q, k, v, self.d_k)
    # concatenate heads and put through final linear layer
    concat = scores.transpose(1,2).contiguous().view(bs, -1, self.d_model)
    output = self.out(concat)

    return output

 class FeedForward(nn.Module):
  def __init__(self, d_model, d_ff=2048, dropout = 0.1):
    super().__init__()
    self.linear_1 = nn.Linear(d_model, d_ff).cuda()
    self.dropout = nn.Dropout(dropout)
    self.linear_2 = nn.Linear(d_ff, d_model).cuda()
    
  def forward(self, x):
    x = self.dropout(nn.functional.relu(self.linear_1(x)))
    x = self.linear_2(x)
    return x


 class Norm(nn.Module):
  def __init__(self, d_model, eps = 1e-6):
    super().__init__()
    self.size = d_model
    self.alpha = nn.Parameter(torch.ones(self.size))
    self.bias = nn.Parameter(torch.zeros(self.size))
    self.eps = eps
    
  def forward(self, x):
    norm = self.alpha * (x - x.mean(dim=-1, keepdim=True)) / (x.std(dim=-1, keepdim=True) + self.eps) + self.bias
    return norm

 """Layers"""

 class EncoderLayer(nn.Module):
  def __init__(self, d_model, heads, dropout = 0.1):
    super().__init__()
    self.norm_1 = Norm(d_model)
    self.norm_2 = Norm(d_model)
    self.attn = MultiHeadAttention(heads, d_model)
    self.ff = FeedForward(d_model)
    self.dropout_1 = nn.Dropout(dropout)
    self.dropout_2 = nn.Dropout(dropout)

  def forward(self, x):
    # x2 = self.norm_1(x)
    x = x + self.dropout_1(self.attn(x,x,x))
    # x2 = self.norm_2(x)
    x = x + self.dropout_2(self.ff(x))
    return x

 class DecoderLayer(nn.Module):
  def __init__(self, d_model, heads, dropout=0.1):
    super().__init__()
    self.norm_1 = Norm(d_model)
    self.norm_2 = Norm(d_model)
    self.norm_3 = Norm(d_model)
    self.dropout_1 = nn.Dropout(dropout)
    self.dropout_2 = nn.Dropout(dropout)
    self.dropout_3 = nn.Dropout(dropout)
    self.attn_1 = MultiHeadAttention(heads, d_model)
    self.attn_2 = MultiHeadAttention(heads, d_model)
    self.ff = FeedForward(d_model).cuda()

  def forward(self, x, e_outputs):
    # x2 = self.norm_1(x)
    x = x + self.dropout_1(self.attn_1(x, x, x))
    # x2 = self.norm_2(x)
    # x2 = self.norm_2(x)
    x = x + self.dropout_2(self.attn_2(x, e_outputs, e_outputs))
    # x2 = self.norm_3(x)
    x = x + self.dropout_3(self.ff(x))
    return x

 class Encoder(nn.Module):
  def __init__(self, vocab_size, d_model, N, heads):
    super().__init__()
    self.N = N
    self.layers = get_clones(EncoderLayer(d_model, heads), N)
    self.norm = Norm(d_model)
      
  def forward(self, src):
    x = src
    for i in range(N):
      x = self.layers[i](x)
    return self.norm(x)

 class Decoder(nn.Module):
  def __init__(self, data_size, d_model, N, heads):
    super().__init__()
    self.N = N
    self.layers = get_clones(DecoderLayer(d_model, heads), N)
    self.norm = Norm(d_model)

  def forward(self, trg, e_outputs):
    x = trg
    for i in range(self.N):
      x = self.layers[i](x, e_outputs)
    return self.norm(x)

 """The Mighty Transformer"""

 class Transformer(nn.Module):
  def __init__(self, src_graph, trg_graph, d_model, N, heads):
      super().__init__()
      self.encoder = Encoder(src_graph, d_model, N, heads)
      self.decoder = Decoder(trg_graph, d_model, N, heads)
      self.out = nn.Linear(d_model, trg_graph)

  def forward(self, src, trg):
      e_outputs = self.encoder(src)
      d_output = self.decoder(trg, e_outputs)
      output = self.out(d_output)
      return output

 def feature_matrix(g):
    '''
    constructs the feautre matrix (N x 3) for the enzymes datasets
    '''
    esm = nx.get_node_attributes(g, 'label')
    piazche = np.zeros((len(esm), 3))
    for i, (k, v) in enumerate(esm.items()):
        piazche[i][v-1] = 1
    return piazche

 def remove_random_node(graph, max_size=40, min_size=10):
  if len(graph.nodes) >= max_size or len(graph.nodes) < min_size:
    return None
  relabeled_graph = nx.relabel.convert_node_labels_to_integers(graph)
  choice = np.random.choice(list(relabeled_graph.nodes))
  remaining_graph = nx.to_numpy_matrix(relabeled_graph.subgraph(filter(lambda x: x != choice, list(relabeled_graph.nodes))))
  removed_node = nx.to_numpy_matrix(relabeled_graph)[choice]
  graph_length = len(remaining_graph)
  source_graph = np.pad(remaining_graph, [(0, max_size - graph_length), (0, max_size - graph_length)])
  target_graph = np.copy(source_graph)
  removed_node_row = np.asarray(removed_node)[0]
  target_graph[graph_length] = np.pad(removed_node_row, [(0, max_size - len(removed_node_row))])
  return source_graph, target_graph

 converted_graphs = list(filter(lambda x: x is not None, [remove_random_node(graph) for graph in graphs]))
 source_graphs = torch.Tensor([graph[0] for graph in converted_graphs])
 target_graphs = torch.Tensor([graph[1] for graph in converted_graphs])



 d_model = 40
 heads = 8
 N = 6
 src_size = len(source_graphs)
 trg_size = len(target_graphs)

 model = Transformer(src_size, trg_size, d_model, N, heads).cuda()

 #print(model)

 optim = torch.optim.Adam(model.parameters(), lr=0.0001, betas=(0.9, 0.98), eps=1e-9)


 def train_model(epoch, print_every=100):
  model.train()
  start = time.time()
  temp = start
  total_loss = 0
  for i in range(epoch):

    src = source_graphs.cuda()
    trg = target_graphs.cuda()

    preds = model(src.float(), trg.float())
    optim.zero_grad()
    loss = torch.nn.functional.cross_entropy(preds.view(preds.size(-1), -1), trg.view(trg.size(0), -1))
    loss.backward()
    optim.step()
    total_loss += loss.data[0]
    if (i + 1) % print_every == 0:
      loss_avg = total_loss / print_every
      print("time = %dm, epoch %d, iter = %d, loss = %.3f,\
 #       %ds per %d iters" % ((time.time() - start) // 60,\
      epoch + 1, i + 1, loss_avg, time.time() - temp,\
      print_every))
      total_loss = 0
      temp = time.time()

 train_model(1, 1)

 #preds = model(source_graphs[0].cuda(), target_graphs[0].cuda())
 #loss = torch.nn.functional.cross_entropy(preds.view(preds.size(-1), -1), target_graphs.view(target_graphs.size(0), -1))
 #
    '''
    removes a random node from the gragh
    returns the remaining graph matrix and the removed node links
    '''
    if len(graph.nodes()) >= max_size or len(graph.nodes()) < min_size:
        return None
    relabeled_graph = nx.relabel.convert_node_labels_to_integers(graph)
    choice = np.random.choice(list(relabeled_graph.nodes()))
    remaining_graph = nx.to_numpy_matrix(relabeled_graph.subgraph(filter(lambda x: x != choice, list(relabeled_graph.nodes()))))
    removed_node = nx.to_numpy_matrix(relabeled_graph)[choice]
    graph_length = len(remaining_graph)
    # source_graph = np.pad(remaining_graph, [(0, max_size - graph_length), (0, max_size - graph_length)])
    # target_graph = np.copy(source_graph)
    removed_node_row = np.asarray(removed_node)[0]
    # target_graph[graph_length] = np.pad(removed_node_row, [(0, max_size - len(removed_node_row))])
    return remaining_graph, removed_node_row

 """"
 Layers:
 GCN
 """

 class GraphConv(nn.Module):
    def __init__(self, input_dim, output_dim):
        super().__init__()
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.weight = nn.Parameter(torch.FloatTensor(input_dim, output_dim).cuda())
        self.relu = nn.ReLU()

    def forward(self, x, adj):
        '''
        x is hamun feature matrix
        adj ham ke is adjacency matrix of the graph
        '''
        y = torch.matmul(adj, x)
        print(y.shape)
        print(self.weight.shape)
        y = torch.matmul(y, self.weight.double())
        return y