import math import numpy as np import pandas as pd import networkx as nx import scipy as sp import seaborn as sns import time import torch import torch.nn as nn import torch.nn.init as init import torch.nn.functional as F from torch.nn.parameter import Parameter from torch.nn.modules.module import Module from torch import Tensor if torch.cuda.is_available(): torch.device('cuda') """ Utils: Data Loader Feature Matrix Constructor Random Node Remover """ def Graph_load_batch(min_num_nodes=20, max_num_nodes=1000, name='ENZYMES', node_attributes=True, graph_labels=True): ''' load many graphs, e.g. enzymes :return: a list of graphs ''' print('Loading graph dataset: ' + str(name)) G = nx.Graph() # load data # path = '../dataset/' + name + '/' path = '/content/gdrive/My Drive/' + name + '/' data_adj = np.loadtxt(path + name + '_A.txt', delimiter=',').astype(int) if node_attributes: data_node_att = np.loadtxt(path + name + '_node_attributes.txt', delimiter=',') data_node_label = np.loadtxt(path + name + '_node_labels.txt', delimiter=',').astype(int) data_graph_indicator = np.loadtxt(path + name + '_graph_indicator.txt', delimiter=',').astype(int) if graph_labels: data_graph_labels = np.loadtxt(path + name + '_graph_labels.txt', delimiter=',').astype(int) data_tuple = list(map(tuple, data_adj)) G.add_edges_from(data_tuple) for i in range(data_node_label.shape[0]): if node_attributes: G.add_node(i + 1, feature=data_node_att[i]) G.add_node(i + 1, label=data_node_label[i]) G.remove_nodes_from(list(nx.isolates(G))) graph_num = data_graph_indicator.max() node_list = np.arange(data_graph_indicator.shape[0]) + 1 graphs = [] max_nodes = 0 for i in range(graph_num): nodes = node_list[data_graph_indicator == i + 1] G_sub = G.subgraph(nodes) if graph_labels: G_sub.graph['label'] = data_graph_labels[i] if G_sub.number_of_nodes() >= min_num_nodes and G_sub.number_of_nodes() <= max_num_nodes: graphs.append(G_sub) if G_sub.number_of_nodes() > max_nodes: max_nodes = G_sub.number_of_nodes() print('Loaded') return graphs def feature_matrix(g): ''' constructs the feautre matrix (N x 3) for the enzymes datasets ''' esm = nx.get_node_attributes(g, 'label') piazche = np.zeros((len(esm), 3)) for i, (k, v) in enumerate(esm.items()): piazche[i][v-1] = 1 return piazche # def remove_random_node(graph, max_size=40, min_size=10): # ''' # removes a random node from the gragh # returns the remaining graph matrix and the removed node links # ''' # if len(graph.nodes()) >= max_size or len(graph.nodes()) < min_size: # return None # relabeled_graph = nx.relabel.convert_node_labels_to_integers(graph) # choice = np.random.choice(list(relabeled_graph.nodes())) # remaining_graph = nx.to_numpy_matrix(relabeled_graph.subgraph(filter(lambda x: x != choice, list(relabeled_graph.nodes())))) # removed_node = nx.to_numpy_matrix(relabeled_graph)[choice] # graph_length = len(remaining_graph) # # source_graph = np.pad(remaining_graph, [(0, max_size - graph_length), (0, max_size - graph_length)]) # # target_graph = np.copy(source_graph) # removed_node_row = np.asarray(removed_node)[0] # # target_graph[graph_length] = np.pad(removed_node_row, [(0, max_size - len(removed_node_row))]) # return remaining_graph, removed_node_row def prepare_graph_data(graph, max_size=40, min_size=10): ''' gets a graph as an input returns a graph with a randomly removed node adj matrix [0], its feature matrix [0], the removed node true links [2] ''' if len(graph.nodes()) >= max_size or len(graph.nodes()) < min_size: return None relabeled_graph = nx.relabel.convert_node_labels_to_integers(graph) choice = np.random.choice(list(relabeled_graph.nodes())) remaining_graph = relabeled_graph.subgraph(filter(lambda x: x != choice, list(relabeled_graph.nodes()))) remaining_graph_adj = nx.to_numpy_matrix(remaining_graph) removed_node = nx.to_numpy_matrix(relabeled_graph)[choice] removed_node_row = np.asarray(removed_node)[0] return remaining_graph_adj, feature_matrix(remaining_graph), removed_node_row """" Layers: Graph Convolution Graph Multihead Attention """ class GraphConv(nn.Module): def __init__(self, input_dim, output_dim): super().__init__() self.input_dim = input_dim self.output_dim = output_dim self.weight = nn.Parameter(torch.FloatTensor(input_dim, output_dim).cuda()) self.relu = nn.ReLU() def forward(self, x, adj): ''' x is the feature matrix constructed in feature_matrix function adj ham ke is adjacency matrix of the graph ''' y = torch.matmul(adj, x) # print(y.shape) # print(self.weight.shape) y = torch.matmul(y, self.weight.double()) return y class GraphAttn(nn.Module): def __init__(self, heads, model_dim, dropout=0.1): super().__init__() self.model_dim = model_dim self.key_dim = model_dim // heads self.heads = heads self.q_linear = nn.Linear(model_dim, model_dim).cuda() self.v_linear = nn.Linear(model_dim, model_dim).cuda() self.k_linear = nn.Linear(model_dim, model_dim).cuda() self.dropout = nn.Dropout(dropout) self.out = nn.Linear(model_dim, model_dim).cuda() def forward(self, query, key, value): # print(q, k, v) bs = query.size(0) # size of the graph key = self.k_linear(key).view(bs, -1, self.heads, self.key_dim) query = self.q_linear(query).view(bs, -1, self.heads, self.key_dim) value = self.v_linear(value).view(bs, -1, self.heads, self.key_dim) key = key.transpose(1,2) query = query.transpose(1,2) value = value.transpose(1,2) scores = attention(query, key, value, self.key_dim) concat = scores.transpose(1,2).contiguous().view(bs, -1, self.model_dim) output = self.out(concat) output = output.view(bs, self.model_dim) return output