In [1]:
from google.colab import drive

In [2]:
drive.mount('/content/gdrive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/gdrive


### Imports

In [7]:
import math
import time
import torch
import numpy as np
import pandas as pd
import scipy as sp
import networkx as nx
import seaborn as sns
import torch.nn as nn
from torch import Tensor
import torch.nn.init as init
import torch.nn.functional as F
from torch.nn.parameter import Parameter
from torch.nn.modules.module import Module
from sklearn.model_selection import train_test_split


In [4]:
if torch.cuda.is_available():
  torch.device('cuda')

# Data Loading

Loading graph dataset

In [5]:
def Graph_load_batch(min_num_nodes=20, max_num_nodes=1000, name='ENZYMES', node_attributes=True, graph_labels=True):
    print('Loading graph dataset: ' + str(name))
    G = nx.Graph()
    # load data
    # path = '../dataset/' + name + '/'
    path = '/content/gdrive/My Drive/' + name + '/'
    data_adj = np.loadtxt(path + name + '_A.txt', delimiter=',').astype(int)
    if node_attributes:
        data_node_att = np.loadtxt(path + name + '_node_attributes.txt', delimiter=',')
    data_node_label = np.loadtxt(path + name + '_node_labels.txt', delimiter=',').astype(int)
    data_graph_indicator = np.loadtxt(path + name + '_graph_indicator.txt', delimiter=',').astype(int)
    if graph_labels:
        data_graph_labels = np.loadtxt(path + name + '_graph_labels.txt', delimiter=',').astype(int)
    data_tuple = list(map(tuple, data_adj))
    G.add_edges_from(data_tuple)
    for i in range(data_node_label.shape[0]):
        if node_attributes:
            G.add_node(i + 1, feature=data_node_att[i])
        G.add_node(i + 1, label=data_node_label[i])
    G.remove_nodes_from(list(nx.isolates(G)))
    graph_num = data_graph_indicator.max()
    node_list = np.arange(data_graph_indicator.shape[0]) + 1
    graphs = []
    max_nodes = 0
    for i in range(graph_num):
        nodes = node_list[data_graph_indicator == i + 1]
        G_sub = G.subgraph(nodes)
        if graph_labels:
            G_sub.graph['label'] = data_graph_labels[i]

        if G_sub.number_of_nodes() >= min_num_nodes and G_sub.number_of_nodes() <= max_num_nodes:
            graphs.append(G_sub)
            if G_sub.number_of_nodes() > max_nodes:
                max_nodes = G_sub.number_of_nodes()

    print('Loaded')
    return graphs

Constructing feature matrix of a graph

In [11]:
def feature_matrix(g, max_nodes=40):
    esm = nx.get_node_attributes(g, 'label')
    piazche = np.zeros((max_nodes, 3))
    for i, (k, v) in enumerate(esm.items()):
        # print(i, k , v)
        piazche[i][v-1] = 1
    return piazche

Removing a random node from a graph

Returns remaining graph with removed node links

In [12]:
def remove_random_node(graph, max_size=40, min_size=10):
    if len(graph.nodes()) >= max_size or len(graph.nodes()) < min_size:
        return None
    relabeled_graph = nx.relabel.convert_node_labels_to_integers(graph)
    choice = np.random.choice(list(relabeled_graph.nodes()))
    remaining_graph = nx.to_numpy_matrix(relabeled_graph.subgraph(filter(lambda x: x != choice, list(relabeled_graph.nodes()))))
    removed_node = nx.to_numpy_matrix(relabeled_graph)[choice]
    graph_length = len(remaining_graph)
    source_graph = np.pad(remaining_graph, [(0, max_size - graph_length), (0, max_size - graph_length)])
    # target_graph = np.copy(source_graph)
    removed_node_row = np.asarray(removed_node)[0]
    # target_graph[graph_length] = np.pad(removed_node_row, [(0, max_size - len(removed_node_row))])
    return remaining_graph, removed_node_row

Prepare graphs for the model

returns a graph with a randomly removed node adj matrix [0], its feature matrix [1], the removed node true links [2] 

In [13]:
def prepare_graph_data(graph, max_size=40, min_size=10):
    if len(graph.nodes()) >= max_size or len(graph.nodes()) < min_size:
        return None
    relabeled_graph = nx.relabel.convert_node_labels_to_integers(graph)
    choice = np.random.choice(list(relabeled_graph.nodes()))
    remaining_graph = relabeled_graph.subgraph(filter(lambda x: x != choice, list(relabeled_graph.nodes())))
    remaining_graph_adj = nx.to_numpy_matrix(remaining_graph)
    graph_length = len(remaining_graph)
    remaining_graph_adj = np.pad(remaining_graph_adj, [(0, max_size - graph_length), (0, max_size - graph_length)])
    removed_node = nx.to_numpy_matrix(relabeled_graph)[choice]
    removed_node_row = np.asarray(removed_node)[0]
    removed_node_row = np.pad(removed_node_row, [(0, max_size - len(removed_node_row))])
    return remaining_graph_adj, feature_matrix(remaining_graph),  removed_node_row

In [6]:
graphs = Graph_load_batch(min_num_nodes=10, name='ENZYMES')

Loading graph dataset: ENZYMES
Loaded


In [9]:
train, test = train_test_split(graphs, test_size=0.2)

In [30]:
# coop = sum([list(filter(lambda x: x is not None, [prepare_graph_data(g) for g in graphs])) for i in range(10)], [])
coop = list(filter(lambda x: x is not None, [prepare_graph_data(g) for g in train]))
dale = list(filter(lambda x: x is not None, [prepare_graph_data(g) for g in test]))

In [47]:
trainloader = torch.utils.data.DataLoader(coop, collate_fn=lambda x: x[0], batch_size=1)

In [32]:
testloader = torch.utils.data.DataLoader(dale, collate_fn=lambda x: x[0], batch_size=1)

# Building Model

Graph convolutional layer for extracting initial features

In [14]:
class GraphConv(nn.Module):
    def __init__(self, input_dim, output_dim):
        super().__init__()
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.weight = nn.Parameter(torch.FloatTensor(input_dim, output_dim).cuda())
        self.relu = nn.ReLU()

    def forward(self, x, adj):
        '''
        x is hamun feature matrix 
        adj ham ke is hamun
        '''
        y = torch.matmul(adj, x)
        # print(y.shape)
        # print(self.weight.shape)
        y = torch.matmul(y, self.weight.double())
        return y

Attention calculator using given key, query and value

In [15]:
def attention(query, key, value, key_dim):
    # print('key:', key.transpose(-2,-1))
    scores = torch.matmul(query, key.transpose(-2,-1)) / math.sqrt(key_dim)
    scores = torch.matmul(scores, value)
    # print('scores:', scores)
    scores = F.softmax(scores)
    # scores = torch.sigmoid(scores)
    return scores

Graph attention layer for more features

In [16]:
class GraphAttn(nn.Module):
    def __init__(self, heads, model_dim, dropout=0.1):
        super().__init__()
        self.model_dim = model_dim
        self.key_dim = model_dim // heads
        self.heads = heads

        self.q_linear = nn.Linear(model_dim, model_dim).cuda()
        self.v_linear = nn.Linear(model_dim, model_dim).cuda()
        self.k_linear = nn.Linear(model_dim, model_dim).cuda()

        self.dropout = nn.Dropout(dropout)
        self.out = nn.Linear(model_dim, model_dim).cuda()

    def forward(self, query, key, value):
        # print(q, k, v)
        bs = query.size(0)

        key = self.k_linear(key.float()).view(bs, -1, self.heads, self.key_dim)
        query = self.q_linear(query.float()).view(bs, -1, self.heads, self.key_dim)
        value = self.v_linear(value.float()).view(bs, -1, self.heads, self.key_dim)

        key = key.transpose(1,2)
        query = query.transpose(1,2)
        value = value.transpose(1,2)

        scores = attention(query, key, value, self.key_dim)
        concat = scores.transpose(1,2).contiguous().view(bs, -1, self.model_dim)
        output = self.out(concat)
        output = output.view(bs, self.model_dim)

        return output

MLP layer to map features to links

In [17]:
class FeedForward(nn.Module):
        def __init__(self, input_size, hidden_size):
            super().__init__()
            self.input_size = input_size
            self.hidden_size  = hidden_size
            self.fully_connected1 = nn.Linear(self.input_size, self.hidden_size).cuda()
            self.relu = nn.ReLU()
            self.fully_connected2 = nn.Linear(self.hidden_size, 1).cuda()
            self.sigmoid = nn.Sigmoid()

        def forward(self, x):
            hidden = self.fully_connected1(x.float())
            relu = self.relu(hidden)
            output = self.fully_connected2(relu)
            output = self.sigmoid(output)
            return output

Assembeled model

In [18]:
class Hydra(nn.Module):
    def __init__(self, gcn_input, model_dim, head):
        super().__init__()

        self.GCN = GraphConv(input_dim=gcn_input, output_dim=model_dim).cuda()
        self.GAT = GraphAttn(heads=head, model_dim=model_dim).cuda()
        self.MLP = FeedForward(input_size=model_dim, hidden_size=gcn_input).cuda()

    def forward(self, x, adj):
        gcn_outputs = self.GCN(x, adj)
        gat_output = self.GAT(gcn_outputs, gcn_outputs, gcn_outputs)
        mlp_output = self.MLP(gat_output).reshape(1,-1)

        return mlp_output

Building model with given inputs

In [19]:
def build_model(gcn_input, model_dim, head):
    model = Hydra(gcn_input, model_dim, head).cuda()
    return model

# Evaluating Functions

# Training Model

Training the model with given data and number of epochs

In [57]:
def train_model(model, trainloader, epoch, print_every=100):
    optim = torch.optim.Adam(model.parameters(), lr=0.001, betas=(0.9, 0.98), eps=1e-9)

    model.train()
    start = time.time()
    temp = start
    total_loss = 0

    for i in range(epoch):
        for batch, data in enumerate(trainloader, 0):
            adj, features, true_links = data
            adj, features, true_links  = torch.tensor(adj).cuda(), torch.tensor(features).cuda(), torch.tensor(true_links).cuda()
            # print(adj.shape)
            # print(features.shape)
            # print(true_links.shape)
            preds = model(features, adj)
            optim.zero_grad()
            loss = F.binary_cross_entropy(preds.double(), true_links.double())
            writer.add_scalar('Loss/train', float(loss), i)
            loss.backward()
            optim.step()
            total_loss += loss.item()
        if (i + 1) % print_every == 0:
            loss_avg = total_loss / print_every
            print("time = %dm, epoch %d, iter = %d, loss = %.3f,\
            %ds per %d iters" % ((time.time() - start) // 60,\
            epoch + 1, i + 1, loss_avg, time.time() - temp,\
            print_every))
            total_loss = 0
            temp = time.time()

In [25]:
kyle = build_model(3, 243, 9)

In [58]:
train_model(kyle, trainloader_train, 100, 10)

  


time = 0m, epoch 101, iter = 10, loss = 92.819,        19s per 10 iters
time = 0m, epoch 101, iter = 20, loss = 92.782,        19s per 10 iters
time = 0m, epoch 101, iter = 30, loss = 92.814,        19s per 10 iters
time = 1m, epoch 101, iter = 40, loss = 92.848,        19s per 10 iters
time = 1m, epoch 101, iter = 50, loss = 92.723,        19s per 10 iters
time = 1m, epoch 101, iter = 60, loss = 92.734,        19s per 10 iters
time = 2m, epoch 101, iter = 70, loss = 92.718,        19s per 10 iters
time = 2m, epoch 101, iter = 80, loss = 92.873,        19s per 10 iters
time = 2m, epoch 101, iter = 90, loss = 92.730,        19s per 10 iters
time = 3m, epoch 101, iter = 100, loss = 92.987,        19s per 10 iters


# Testing Model

Testing model and printing the loss

In [41]:
def test_model(model, trainloader, print_every=10):
    start = time.time()
    temp = start
    # total_loss = 0

    for batch, data in enumerate(trainloader, 0):
        adj, features, true_links = data
        adj, features, true_links  = torch.tensor(adj).cuda(), torch.tensor(features).cuda(), torch.tensor(true_links).cuda()
        # print(adj.shape)
        # print(features.shape)
        # print(true_links.shape)
        preds = model(features, adj)
        loss = F.binary_cross_entropy(preds.double(), true_links.double())
        # total_loss += loss.item()
        # loss_avg = total_loss / print_every
        if (batch + 1) % print_every == 0:
            print("loss = ", float(loss))
            temp = time.time()

In [42]:
test_model(kyle, trainloader_test)

loss =  0.3164060614241102
loss =  0.4639609998843695
loss =  0.2922176129342856
loss =  0.31008866565259996
loss =  0.21323858744621602
loss =  0.22083598860733727
loss =  0.3246467668552516


  
  del sys.path[0]


# Evaluating Model

# Plots

In [43]:
%load_ext tensorboard

In [44]:
from torch.utils.tensorboard import SummaryWriter
import torchvision

In [49]:
writer = SummaryWriter()

In [54]:
adj, features, true_links = next(iter(trainloader_train))
adj, features, true_links  = torch.tensor(adj).cuda(), torch.tensor(features).cuda(), torch.tensor(true_links).cuda()
grid = torchvision.utils.make_grid(adj)
writer.add_image('adj', grid, 0)
writer.add_graph(kyle, input_to_model= [features, adj])
writer.close()

  


In [55]:
%tensorboard --logdir=runs

<IPython.core.display.Javascript object>