|
|
@@ -1,17 +1,17 @@ |
|
|
|
import math |
|
|
|
import time |
|
|
|
import torch |
|
|
|
import numpy as np |
|
|
|
import scipy as sp |
|
|
|
import pandas as pd |
|
|
|
import torch.nn as nn |
|
|
|
import networkx as nx |
|
|
|
import scipy as sp |
|
|
|
import seaborn as sns |
|
|
|
import time |
|
|
|
import torch |
|
|
|
import torch.nn as nn |
|
|
|
from torch import Tensor |
|
|
|
import torch.nn.init as init |
|
|
|
import torch.nn.functional as F |
|
|
|
from torch.nn.parameter import Parameter |
|
|
|
from torch.nn.modules.module import Module |
|
|
|
from torch import Tensor |
|
|
|
|
|
|
|
if torch.cuda.is_available(): |
|
|
|
torch.device('cuda') |
|
|
@@ -28,54 +28,54 @@ def Graph_load_batch(min_num_nodes=20, max_num_nodes=1000, name='ENZYMES', node_ |
|
|
|
load many graphs, e.g. enzymes |
|
|
|
:return: a list of graphs |
|
|
|
''' |
|
|
|
print('Loading graph dataset: ' + str(name)) |
|
|
|
G = nx.Graph() |
|
|
|
# load data |
|
|
|
# path = '../dataset/' + name + '/' |
|
|
|
path = '/content/gdrive/My Drive/' + name + '/' |
|
|
|
data_adj = np.loadtxt(path + name + '_A.txt', delimiter=',').astype(int) |
|
|
|
if node_attributes: |
|
|
|
data_node_att = np.loadtxt(path + name + '_node_attributes.txt', delimiter=',') |
|
|
|
data_node_label = np.loadtxt(path + name + '_node_labels.txt', delimiter=',').astype(int) |
|
|
|
data_graph_indicator = np.loadtxt(path + name + '_graph_indicator.txt', delimiter=',').astype(int) |
|
|
|
if graph_labels: |
|
|
|
data_graph_labels = np.loadtxt(path + name + '_graph_labels.txt', delimiter=',').astype(int) |
|
|
|
data_tuple = list(map(tuple, data_adj)) |
|
|
|
G.add_edges_from(data_tuple) |
|
|
|
for i in range(data_node_label.shape[0]): |
|
|
|
print('Loading graph dataset: ' + str(name)) |
|
|
|
G = nx.Graph() |
|
|
|
# load data |
|
|
|
# path = '../dataset/' + name + '/' |
|
|
|
path = '/content/gdrive/My Drive/' + name + '/' |
|
|
|
data_adj = np.loadtxt(path + name + '_A.txt', delimiter=',').astype(int) |
|
|
|
if node_attributes: |
|
|
|
G.add_node(i + 1, feature=data_node_att[i]) |
|
|
|
G.add_node(i + 1, label=data_node_label[i]) |
|
|
|
G.remove_nodes_from(list(nx.isolates(G))) |
|
|
|
graph_num = data_graph_indicator.max() |
|
|
|
node_list = np.arange(data_graph_indicator.shape[0]) + 1 |
|
|
|
graphs = [] |
|
|
|
max_nodes = 0 |
|
|
|
for i in range(graph_num): |
|
|
|
nodes = node_list[data_graph_indicator == i + 1] |
|
|
|
G_sub = G.subgraph(nodes) |
|
|
|
data_node_att = np.loadtxt(path + name + '_node_attributes.txt', delimiter=',') |
|
|
|
data_node_label = np.loadtxt(path + name + '_node_labels.txt', delimiter=',').astype(int) |
|
|
|
data_graph_indicator = np.loadtxt(path + name + '_graph_indicator.txt', delimiter=',').astype(int) |
|
|
|
if graph_labels: |
|
|
|
G_sub.graph['label'] = data_graph_labels[i] |
|
|
|
|
|
|
|
if G_sub.number_of_nodes() >= min_num_nodes and G_sub.number_of_nodes() <= max_num_nodes: |
|
|
|
graphs.append(G_sub) |
|
|
|
if G_sub.number_of_nodes() > max_nodes: |
|
|
|
max_nodes = G_sub.number_of_nodes() |
|
|
|
print('Loaded') |
|
|
|
|
|
|
|
return graphs |
|
|
|
data_graph_labels = np.loadtxt(path + name + '_graph_labels.txt', delimiter=',').astype(int) |
|
|
|
data_tuple = list(map(tuple, data_adj)) |
|
|
|
G.add_edges_from(data_tuple) |
|
|
|
for i in range(data_node_label.shape[0]): |
|
|
|
if node_attributes: |
|
|
|
G.add_node(i + 1, feature=data_node_att[i]) |
|
|
|
G.add_node(i + 1, label=data_node_label[i]) |
|
|
|
G.remove_nodes_from(list(nx.isolates(G))) |
|
|
|
graph_num = data_graph_indicator.max() |
|
|
|
node_list = np.arange(data_graph_indicator.shape[0]) + 1 |
|
|
|
graphs = [] |
|
|
|
max_nodes = 0 |
|
|
|
for i in range(graph_num): |
|
|
|
nodes = node_list[data_graph_indicator == i + 1] |
|
|
|
G_sub = G.subgraph(nodes) |
|
|
|
if graph_labels: |
|
|
|
G_sub.graph['label'] = data_graph_labels[i] |
|
|
|
|
|
|
|
if G_sub.number_of_nodes() >= min_num_nodes and G_sub.number_of_nodes() <= max_num_nodes: |
|
|
|
graphs.append(G_sub) |
|
|
|
if G_sub.number_of_nodes() > max_nodes: |
|
|
|
max_nodes = G_sub.number_of_nodes() |
|
|
|
print('Loaded') |
|
|
|
|
|
|
|
return graphs |
|
|
|
|
|
|
|
|
|
|
|
def feature_matrix(g): |
|
|
|
''' |
|
|
|
constructs the feautre matrix (N x 3) for the enzymes datasets |
|
|
|
''' |
|
|
|
esm = nx.get_node_attributes(g, 'label') |
|
|
|
piazche = np.zeros((len(esm), 3)) |
|
|
|
for i, (k, v) in enumerate(esm.items()): |
|
|
|
piazche[i][v-1] = 1 |
|
|
|
esm = nx.get_node_attributes(g, 'label') |
|
|
|
piazche = np.zeros((len(esm), 3)) |
|
|
|
for i, (k, v) in enumerate(esm.items()): |
|
|
|
piazche[i][v-1] = 1 |
|
|
|
|
|
|
|
return piazche |
|
|
|
return piazche |
|
|
|
|
|
|
|
|
|
|
|
# def remove_random_node(graph, max_size=40, min_size=10): |
|
|
@@ -101,18 +101,18 @@ def prepare_graph_data(graph, max_size=40, min_size=10): |
|
|
|
gets a graph as an input |
|
|
|
returns a graph with a randomly removed node adj matrix [0], its feature matrix [1], the removed node true links [2] |
|
|
|
''' |
|
|
|
if len(graph.nodes()) >= max_size or len(graph.nodes()) < min_size: |
|
|
|
return None |
|
|
|
relabeled_graph = nx.relabel.convert_node_labels_to_integers(graph) |
|
|
|
choice = np.random.choice(list(relabeled_graph.nodes())) |
|
|
|
remaining_graph = relabeled_graph.subgraph(filter(lambda x: x != choice, list(relabeled_graph.nodes()))) |
|
|
|
remaining_graph_adj = nx.to_numpy_matrix(remaining_graph) |
|
|
|
graph_length = len(remaining_graph) |
|
|
|
remaining_graph_adj = np.pad(remaining_graph_adj, [(0, max_size - graph_length), (0, max_size - graph_length)]) |
|
|
|
removed_node = nx.to_numpy_matrix(relabeled_graph)[choice] |
|
|
|
removed_node_row = np.asarray(removed_node)[0] |
|
|
|
removed_node_row = np.pad(removed_node_row, [(0, max_size - len(removed_node_row))]) |
|
|
|
return remaining_graph_adj, feature_matrix(remaining_graph), removed_node_row |
|
|
|
if len(graph.nodes()) >= max_size or len(graph.nodes()) < min_size: |
|
|
|
return None |
|
|
|
relabeled_graph = nx.relabel.convert_node_labels_to_integers(graph) |
|
|
|
choice = np.random.choice(list(relabeled_graph.nodes())) |
|
|
|
remaining_graph = relabeled_graph.subgraph(filter(lambda x: x != choice, list(relabeled_graph.nodes()))) |
|
|
|
remaining_graph_adj = nx.to_numpy_matrix(remaining_graph) |
|
|
|
graph_length = len(remaining_graph) |
|
|
|
remaining_graph_adj = np.pad(remaining_graph_adj, [(0, max_size - graph_length), (0, max_size - graph_length)]) |
|
|
|
removed_node = nx.to_numpy_matrix(relabeled_graph)[choice] |
|
|
|
removed_node_row = np.asarray(removed_node)[0] |
|
|
|
removed_node_row = np.pad(removed_node_row, [(0, max_size - len(removed_node_row))]) |
|
|
|
return remaining_graph_adj, feature_matrix(remaining_graph), removed_node_row |
|
|
|
|
|
|
|
"""" |
|
|
|
Layers: |
|
|
@@ -144,36 +144,36 @@ class GraphConv(nn.Module): |
|
|
|
|
|
|
|
class GraphAttn(nn.Module): |
|
|
|
def __init__(self, heads, model_dim, dropout=0.1): |
|
|
|
super().__init__() |
|
|
|
self.model_dim = model_dim |
|
|
|
self.key_dim = model_dim // heads |
|
|
|
self.heads = heads |
|
|
|
super().__init__() |
|
|
|
self.model_dim = model_dim |
|
|
|
self.key_dim = model_dim // heads |
|
|
|
self.heads = heads |
|
|
|
|
|
|
|
self.q_linear = nn.Linear(model_dim, model_dim).cuda() |
|
|
|
self.v_linear = nn.Linear(model_dim, model_dim).cuda() |
|
|
|
self.k_linear = nn.Linear(model_dim, model_dim).cuda() |
|
|
|
self.q_linear = nn.Linear(model_dim, model_dim).cuda() |
|
|
|
self.v_linear = nn.Linear(model_dim, model_dim).cuda() |
|
|
|
self.k_linear = nn.Linear(model_dim, model_dim).cuda() |
|
|
|
|
|
|
|
self.dropout = nn.Dropout(dropout) |
|
|
|
self.out = nn.Linear(model_dim, model_dim).cuda() |
|
|
|
self.dropout = nn.Dropout(dropout) |
|
|
|
self.out = nn.Linear(model_dim, model_dim).cuda() |
|
|
|
|
|
|
|
def forward(self, query, key, value): |
|
|
|
# print(q, k, v) |
|
|
|
bs = query.size(0) |
|
|
|
# print(q, k, v) |
|
|
|
bs = query.size(0) |
|
|
|
|
|
|
|
key = self.k_linear(key.float()).view(bs, -1, self.heads, self.key_dim) |
|
|
|
query = self.q_linear(query.float()).view(bs, -1, self.heads, self.key_dim) |
|
|
|
value = self.v_linear(value.float()).view(bs, -1, self.heads, self.key_dim) |
|
|
|
key = self.k_linear(key.float()).view(bs, -1, self.heads, self.key_dim) |
|
|
|
query = self.q_linear(query.float()).view(bs, -1, self.heads, self.key_dim) |
|
|
|
value = self.v_linear(value.float()).view(bs, -1, self.heads, self.key_dim) |
|
|
|
|
|
|
|
key = key.transpose(1,2) |
|
|
|
query = query.transpose(1,2) |
|
|
|
value = value.transpose(1,2) |
|
|
|
key = key.transpose(1,2) |
|
|
|
query = query.transpose(1,2) |
|
|
|
value = value.transpose(1,2) |
|
|
|
|
|
|
|
scores = attention(query, key, value, self.key_dim) |
|
|
|
concat = scores.transpose(1,2).contiguous().view(bs, -1, self.model_dim) |
|
|
|
output = self.out(concat) |
|
|
|
output = output.view(bs, self.model_dim) |
|
|
|
scores = attention(query, key, value, self.key_dim) |
|
|
|
concat = scores.transpose(1,2).contiguous().view(bs, -1, self.model_dim) |
|
|
|
output = self.out(concat) |
|
|
|
output = output.view(bs, self.model_dim) |
|
|
|
|
|
|
|
return output |
|
|
|
return output |
|
|
|
|
|
|
|
|
|
|
|
class FeedForward(nn.Module): |
|
|
@@ -196,16 +196,16 @@ class FeedForward(nn.Module): |
|
|
|
|
|
|
|
|
|
|
|
class Hydra(nn.Module): |
|
|
|
def __init__(self, gcn_input, model_dim, head): |
|
|
|
super().__init__() |
|
|
|
def __init__(self, gcn_input, model_dim, head): |
|
|
|
super().__init__() |
|
|
|
|
|
|
|
self.GCN = GraphConv(input_dim=gcn_input, output_dim=model_dim).cuda() |
|
|
|
self.GAT = GraphAttn(heads=head, model_dim=model_dim).cuda() |
|
|
|
self.MLP = FeedForward(input_size=model_dim, hidden_size=gcn_input).cuda() |
|
|
|
self.GCN = GraphConv(input_dim=gcn_input, output_dim=model_dim).cuda() |
|
|
|
self.GAT = GraphAttn(heads=head, model_dim=model_dim).cuda() |
|
|
|
self.MLP = FeedForward(input_size=model_dim, hidden_size=gcn_input).cuda() |
|
|
|
|
|
|
|
def forward(self, x, adj): |
|
|
|
gcn_outputs = self.GCN(x, adj) |
|
|
|
gat_output = self.GAT(gcn_outputs) |
|
|
|
mlp_output = self.MLP(gat_output).reshape(1,-1) |
|
|
|
def forward(self, x, adj): |
|
|
|
gcn_outputs = self.GCN(x, adj) |
|
|
|
gat_output = self.GAT(gcn_outputs) |
|
|
|
mlp_output = self.MLP(gat_output).reshape(1,-1) |
|
|
|
|
|
|
|
return mlp_output |
|
|
|
return mlp_output |