ommi
/
Graph_Transformer


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150
							import math
import numpy as np
import pandas as pd
import networkx as nx
import scipy as sp
import seaborn as sns
import time
import torch
import torch.nn as nn
import torch.nn.init as init
import torch.nn.functional as F
from torch.nn.parameter import Parameter
from torch.nn.modules.module import Module
from torch import Tensor

if torch.cuda.is_available():
  torch.device('cuda')

"""
Utils:
Data Loader
Feature Matrix Constructor
Random Node Remover
"""

def Graph_load_batch(min_num_nodes=20, max_num_nodes=1000, name='ENZYMES', node_attributes=True, graph_labels=True):
    '''
    load many graphs, e.g. enzymes
    :return: a list of graphs
    '''
    print('Loading graph dataset: ' + str(name))
    G = nx.Graph()
    # load data
    # path = '../dataset/' + name + '/'
    path = '/content/gdrive/My Drive/' + name + '/'
    data_adj = np.loadtxt(path + name + '_A.txt', delimiter=',').astype(int)
    if node_attributes:
        data_node_att = np.loadtxt(path + name + '_node_attributes.txt', delimiter=',')
    data_node_label = np.loadtxt(path + name + '_node_labels.txt', delimiter=',').astype(int)
    data_graph_indicator = np.loadtxt(path + name + '_graph_indicator.txt', delimiter=',').astype(int)
    if graph_labels:
        data_graph_labels = np.loadtxt(path + name + '_graph_labels.txt', delimiter=',').astype(int)
    data_tuple = list(map(tuple, data_adj))
    G.add_edges_from(data_tuple)
    for i in range(data_node_label.shape[0]):
        if node_attributes:
            G.add_node(i + 1, feature=data_node_att[i])
        G.add_node(i + 1, label=data_node_label[i])
    G.remove_nodes_from(list(nx.isolates(G)))
    graph_num = data_graph_indicator.max()
    node_list = np.arange(data_graph_indicator.shape[0]) + 1
    graphs = []
    max_nodes = 0
    for i in range(graph_num):
        nodes = node_list[data_graph_indicator == i + 1]
        G_sub = G.subgraph(nodes)
        if graph_labels:
            G_sub.graph['label'] = data_graph_labels[i]

        if G_sub.number_of_nodes() >= min_num_nodes and G_sub.number_of_nodes() <= max_num_nodes:
            graphs.append(G_sub)
            if G_sub.number_of_nodes() > max_nodes:
                max_nodes = G_sub.number_of_nodes()

    print('Loaded')
    return graphs

def feature_matrix(g):
    '''
    constructs the feautre matrix (N x 3) for the enzymes datasets
    '''
    esm = nx.get_node_attributes(g, 'label')
    piazche = np.zeros((len(esm), 3))
    for i, (k, v) in enumerate(esm.items()):
        piazche[i][v-1] = 1
    return piazche

def remove_random_node(graph, max_size=40, min_size=10):
    '''
    removes a random node from the gragh
    returns the remaining graph matrix and the removed node links
    '''
    if len(graph.nodes()) >= max_size or len(graph.nodes()) < min_size:
        return None
    relabeled_graph = nx.relabel.convert_node_labels_to_integers(graph)
    choice = np.random.choice(list(relabeled_graph.nodes()))
    remaining_graph = nx.to_numpy_matrix(relabeled_graph.subgraph(filter(lambda x: x != choice, list(relabeled_graph.nodes()))))
    removed_node = nx.to_numpy_matrix(relabeled_graph)[choice]
    graph_length = len(remaining_graph)
    # source_graph = np.pad(remaining_graph, [(0, max_size - graph_length), (0, max_size - graph_length)])
    # target_graph = np.copy(source_graph)
    removed_node_row = np.asarray(removed_node)[0]
    # target_graph[graph_length] = np.pad(removed_node_row, [(0, max_size - len(removed_node_row))])
    return remaining_graph, removed_node_row

""""
Layers:
Graph Convolution
Graph Multihead Attention
"""

class GraphConv(nn.Module):
    def __init__(self, input_dim, output_dim):
        super().__init__()
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.weight = nn.Parameter(torch.FloatTensor(input_dim, output_dim).cuda())
        self.relu = nn.ReLU()

    def forward(self, x, adj):
        '''
        x is the feature matrix constructed in feature_matrix function
        adj ham ke is adjacency matrix of the graph
        '''
        y = torch.matmul(adj, x)
        # print(y.shape)
        # print(self.weight.shape)
        y = torch.matmul(y, self.weight.double())
        return y

class GraphAttn(nn.Module):
  def __init__(self, heads, model_dim, dropout=0.1):
    super().__init__()
    self.model_dim = model_dim
    self.key_dim = model_dim // heads
    self.heads = heads

    self.q_linear = nn.Linear(model_dim, model_dim).cuda()
    self.v_linear = nn.Linear(model_dim, model_dim).cuda()
    self.k_linear = nn.Linear(model_dim, model_dim).cuda()

    self.dropout = nn.Dropout(dropout)
    self.out = nn.Linear(model_dim, model_dim).cuda()

  def forward(self, query, key, value):
    # print(q, k, v)
    bs = query.size(0) # size of the graph
    key = self.k_linear(key).view(bs, -1, self.heads, self.key_dim)
    query = self.q_linear(query).view(bs, -1, self.heads, self.key_dim)
    value = self.v_linear(value).view(bs, -1, self.heads, self.key_dim)

    key = key.transpose(1,2)
    query = query.transpose(1,2)
    value = value.transpose(1,2)

    scores = attention(query, key, value, self.key_dim)
    concat = scores.transpose(1,2).contiguous().view(bs, -1, self.model_dim)
    output = self.out(concat)

    return output