5 years ago · 08f7b8518c
--- a/GraphTransformer.py
+++ b/GraphTransformer.py
 import math
 import time
 import torch
 import numpy as np
 import scipy as sp
 import pandas as pd
 import torch.nn as nn
 import networkx as nx
 import scipy as sp
 import seaborn as sns
 import time
 import torch
 import torch.nn as nn
 from torch import Tensor
 import torch.nn.init as init
 import torch.nn.functional as F
 from torch.nn.parameter import Parameter
 from torch.nn.modules.module import Module
 from torch import Tensor
 if torch.cuda.is_available():
  torch.device('cuda')
    load many graphs, e.g. enzymes
    :return: a list of graphs
    '''
    print('Loading graph dataset: ' + str(name))
    G = nx.Graph()
    # load data
    # path = '../dataset/' + name + '/'
    path = '/content/gdrive/My Drive/' + name + '/'
    data_adj = np.loadtxt(path + name + '_A.txt', delimiter=',').astype(int)
    if node_attributes:
        data_node_att = np.loadtxt(path + name + '_node_attributes.txt', delimiter=',')
    data_node_label = np.loadtxt(path + name + '_node_labels.txt', delimiter=',').astype(int)
    data_graph_indicator = np.loadtxt(path + name + '_graph_indicator.txt', delimiter=',').astype(int)
    if graph_labels:
        data_graph_labels = np.loadtxt(path + name + '_graph_labels.txt', delimiter=',').astype(int)
    data_tuple = list(map(tuple, data_adj))
    G.add_edges_from(data_tuple)
    for i in range(data_node_label.shape[0]):
        print('Loading graph dataset: ' + str(name))
        G = nx.Graph()
        # load data
        # path = '../dataset/' + name + '/'
        path = '/content/gdrive/My Drive/' + name + '/'
        data_adj = np.loadtxt(path + name + '_A.txt', delimiter=',').astype(int)
        if node_attributes:
            G.add_node(i + 1, feature=data_node_att[i])
        G.add_node(i + 1, label=data_node_label[i])
    G.remove_nodes_from(list(nx.isolates(G)))
    graph_num = data_graph_indicator.max()
    node_list = np.arange(data_graph_indicator.shape[0]) + 1
    graphs = []
    max_nodes = 0
    for i in range(graph_num):
        nodes = node_list[data_graph_indicator == i + 1]
        G_sub = G.subgraph(nodes)
            data_node_att = np.loadtxt(path + name + '_node_attributes.txt', delimiter=',')
        data_node_label = np.loadtxt(path + name + '_node_labels.txt', delimiter=',').astype(int)
        data_graph_indicator = np.loadtxt(path + name + '_graph_indicator.txt', delimiter=',').astype(int)
        if graph_labels:
            G_sub.graph['label'] = data_graph_labels[i]
        if G_sub.number_of_nodes() >= min_num_nodes and G_sub.number_of_nodes() <= max_num_nodes:
            graphs.append(G_sub)
            if G_sub.number_of_nodes() > max_nodes:
                max_nodes = G_sub.number_of_nodes()
    print('Loaded')
    return graphs
            data_graph_labels = np.loadtxt(path + name + '_graph_labels.txt', delimiter=',').astype(int)
        data_tuple = list(map(tuple, data_adj))
        G.add_edges_from(data_tuple)
        for i in range(data_node_label.shape[0]):
            if node_attributes:
                G.add_node(i + 1, feature=data_node_att[i])
            G.add_node(i + 1, label=data_node_label[i])
        G.remove_nodes_from(list(nx.isolates(G)))
        graph_num = data_graph_indicator.max()
        node_list = np.arange(data_graph_indicator.shape[0]) + 1
        graphs = []
        max_nodes = 0
        for i in range(graph_num):
            nodes = node_list[data_graph_indicator == i + 1]
            G_sub = G.subgraph(nodes)
            if graph_labels:
                G_sub.graph['label'] = data_graph_labels[i]
            if G_sub.number_of_nodes() >= min_num_nodes and G_sub.number_of_nodes() <= max_num_nodes:
                graphs.append(G_sub)
                if G_sub.number_of_nodes() > max_nodes:
                    max_nodes = G_sub.number_of_nodes()
        print('Loaded')
        return graphs
 def feature_matrix(g):
    '''
    constructs the feautre matrix (N x 3) for the enzymes datasets
    '''
    esm = nx.get_node_attributes(g, 'label')
    piazche = np.zeros((len(esm), 3))
    for i, (k, v) in enumerate(esm.items()):
        piazche[i][v-1] = 1
        esm = nx.get_node_attributes(g, 'label')
        piazche = np.zeros((len(esm), 3))
        for i, (k, v) in enumerate(esm.items()):
            piazche[i][v-1] = 1
    return piazche
        return piazche
 # def remove_random_node(graph, max_size=40, min_size=10):
  gets a graph as an input
  returns a graph with a randomly removed node adj matrix [0], its feature matrix [1], the removed node true links [2]
  '''
  if len(graph.nodes()) >= max_size or len(graph.nodes()) < min_size:
    return None
  relabeled_graph = nx.relabel.convert_node_labels_to_integers(graph)
  choice = np.random.choice(list(relabeled_graph.nodes()))
  remaining_graph = relabeled_graph.subgraph(filter(lambda x: x != choice, list(relabeled_graph.nodes())))
  remaining_graph_adj = nx.to_numpy_matrix(remaining_graph)
  graph_length = len(remaining_graph)
  remaining_graph_adj = np.pad(remaining_graph_adj, [(0, max_size - graph_length), (0, max_size - graph_length)])
  removed_node = nx.to_numpy_matrix(relabeled_graph)[choice]
  removed_node_row = np.asarray(removed_node)[0]
  removed_node_row = np.pad(removed_node_row, [(0, max_size - len(removed_node_row))])
  return remaining_graph_adj, feature_matrix(remaining_graph),  removed_node_row
      if len(graph.nodes()) >= max_size or len(graph.nodes()) < min_size:
        return None
      relabeled_graph = nx.relabel.convert_node_labels_to_integers(graph)
      choice = np.random.choice(list(relabeled_graph.nodes()))
      remaining_graph = relabeled_graph.subgraph(filter(lambda x: x != choice, list(relabeled_graph.nodes())))
      remaining_graph_adj = nx.to_numpy_matrix(remaining_graph)
      graph_length = len(remaining_graph)
      remaining_graph_adj = np.pad(remaining_graph_adj, [(0, max_size - graph_length), (0, max_size - graph_length)])
      removed_node = nx.to_numpy_matrix(relabeled_graph)[choice]
      removed_node_row = np.asarray(removed_node)[0]
      removed_node_row = np.pad(removed_node_row, [(0, max_size - len(removed_node_row))])
      return remaining_graph_adj, feature_matrix(remaining_graph),  removed_node_row
 """"
 Layers:
 class GraphAttn(nn.Module):
  def __init__(self, heads, model_dim, dropout=0.1):
    super().__init__()
    self.model_dim = model_dim
    self.key_dim = model_dim // heads
    self.heads = heads
        super().__init__()
        self.model_dim = model_dim
        self.key_dim = model_dim // heads
        self.heads = heads
    self.q_linear = nn.Linear(model_dim, model_dim).cuda()
    self.v_linear = nn.Linear(model_dim, model_dim).cuda()
    self.k_linear = nn.Linear(model_dim, model_dim).cuda()
        self.q_linear = nn.Linear(model_dim, model_dim).cuda()
        self.v_linear = nn.Linear(model_dim, model_dim).cuda()
        self.k_linear = nn.Linear(model_dim, model_dim).cuda()
    self.dropout = nn.Dropout(dropout)
    self.out = nn.Linear(model_dim, model_dim).cuda()
        self.dropout = nn.Dropout(dropout)
        self.out = nn.Linear(model_dim, model_dim).cuda()
  def forward(self, query, key, value):
    # print(q, k, v)
    bs = query.size(0)
        # print(q, k, v)
        bs = query.size(0)
    key = self.k_linear(key.float()).view(bs, -1, self.heads, self.key_dim)
    query = self.q_linear(query.float()).view(bs, -1, self.heads, self.key_dim)
    value = self.v_linear(value.float()).view(bs, -1, self.heads, self.key_dim)
        key = self.k_linear(key.float()).view(bs, -1, self.heads, self.key_dim)
        query = self.q_linear(query.float()).view(bs, -1, self.heads, self.key_dim)
        value = self.v_linear(value.float()).view(bs, -1, self.heads, self.key_dim)
    key = key.transpose(1,2)
    query = query.transpose(1,2)
    value = value.transpose(1,2)
        key = key.transpose(1,2)
        query = query.transpose(1,2)
        value = value.transpose(1,2)
    scores = attention(query, key, value, self.key_dim)
    concat = scores.transpose(1,2).contiguous().view(bs, -1, self.model_dim)
    output = self.out(concat)
    output = output.view(bs, self.model_dim)
        scores = attention(query, key, value, self.key_dim)
        concat = scores.transpose(1,2).contiguous().view(bs, -1, self.model_dim)
        output = self.out(concat)
        output = output.view(bs, self.model_dim)
    return output
        return output
 class FeedForward(nn.Module):
 class Hydra(nn.Module):
  def __init__(self, gcn_input, model_dim, head):
    super().__init__()
    def __init__(self, gcn_input, model_dim, head):
        super().__init__()
    self.GCN = GraphConv(input_dim=gcn_input, output_dim=model_dim).cuda()
    self.GAT = GraphAttn(heads=head, model_dim=model_dim).cuda()
    self.MLP = FeedForward(input_size=model_dim, hidden_size=gcn_input).cuda()
        self.GCN = GraphConv(input_dim=gcn_input, output_dim=model_dim).cuda()
        self.GAT = GraphAttn(heads=head, model_dim=model_dim).cuda()
        self.MLP = FeedForward(input_size=model_dim, hidden_size=gcn_input).cuda()
  def forward(self, x, adj):
    gcn_outputs = self.GCN(x, adj)
    gat_output = self.GAT(gcn_outputs)
    mlp_output = self.MLP(gat_output).reshape(1,-1)
        def forward(self, x, adj):
        gcn_outputs = self.GCN(x, adj)
        gat_output = self.GAT(gcn_outputs)
        mlp_output = self.MLP(gat_output).reshape(1,-1)
    return mlp_output
        return mlp_output