import math
import numpy as np
import pandas as pd
import networkx as nx
import scipy as sp
import seaborn as sns
import time
import torch
import torch.nn as nn
import torch.nn.init as init
import torch.nn.functional as F
from torch.nn.parameter import Parameter
from torch.nn.modules.module import Module
from torch import Tensor

if torch.cuda.is_available():
  torch.device('cuda')

"""
Utils:
Data Loader
Feature Matrix Constructor
Random Node Remover
"""

def Graph_load_batch(min_num_nodes=20, max_num_nodes=1000, name='ENZYMES', node_attributes=True, graph_labels=True):
    '''
    load many graphs, e.g. enzymes
    :return: a list of graphs
    '''
    print('Loading graph dataset: ' + str(name))
    G = nx.Graph()
    # load data
    # path = '../dataset/' + name + '/'
    path = '/content/gdrive/My Drive/' + name + '/'
    data_adj = np.loadtxt(path + name + '_A.txt', delimiter=',').astype(int)
    if node_attributes:
        data_node_att = np.loadtxt(path + name + '_node_attributes.txt', delimiter=',')
    data_node_label = np.loadtxt(path + name + '_node_labels.txt', delimiter=',').astype(int)
    data_graph_indicator = np.loadtxt(path + name + '_graph_indicator.txt', delimiter=',').astype(int)
    if graph_labels:
        data_graph_labels = np.loadtxt(path + name + '_graph_labels.txt', delimiter=',').astype(int)
    data_tuple = list(map(tuple, data_adj))
    G.add_edges_from(data_tuple)
    for i in range(data_node_label.shape[0]):
        if node_attributes:
            G.add_node(i + 1, feature=data_node_att[i])
        G.add_node(i + 1, label=data_node_label[i])
    G.remove_nodes_from(list(nx.isolates(G)))
    graph_num = data_graph_indicator.max()
    node_list = np.arange(data_graph_indicator.shape[0]) + 1
    graphs = []
    max_nodes = 0
    for i in range(graph_num):
        nodes = node_list[data_graph_indicator == i + 1]
        G_sub = G.subgraph(nodes)
        if graph_labels:
            G_sub.graph['label'] = data_graph_labels[i]

        if G_sub.number_of_nodes() >= min_num_nodes and G_sub.number_of_nodes() <= max_num_nodes:
            graphs.append(G_sub)
            if G_sub.number_of_nodes() > max_nodes:
                max_nodes = G_sub.number_of_nodes()

    print('Loaded')
    return graphs

def feature_matrix(g):
    '''
    constructs the feautre matrix (N x 3) for the enzymes datasets
    '''
    esm = nx.get_node_attributes(g, 'label')
    piazche = np.zeros((len(esm), 3))
    for i, (k, v) in enumerate(esm.items()):
        piazche[i][v-1] = 1
    return piazche

# def remove_random_node(graph, max_size=40, min_size=10):
#     '''
#     removes a random node from the gragh
#     returns the remaining graph matrix and the removed node links
#     '''
#     if len(graph.nodes()) >= max_size or len(graph.nodes()) < min_size:
#         return None
#     relabeled_graph = nx.relabel.convert_node_labels_to_integers(graph)
#     choice = np.random.choice(list(relabeled_graph.nodes()))
#     remaining_graph = nx.to_numpy_matrix(relabeled_graph.subgraph(filter(lambda x: x != choice, list(relabeled_graph.nodes()))))
#     removed_node = nx.to_numpy_matrix(relabeled_graph)[choice]
#     graph_length = len(remaining_graph)
#     # source_graph = np.pad(remaining_graph, [(0, max_size - graph_length), (0, max_size - graph_length)])
#     # target_graph = np.copy(source_graph)
#     removed_node_row = np.asarray(removed_node)[0]
#     # target_graph[graph_length] = np.pad(removed_node_row, [(0, max_size - len(removed_node_row))])
#     return remaining_graph, removed_node_row

def prepare_graph_data(graph, max_size=40, min_size=10):
  '''
  gets a graph as an input
  returns a graph with a randomly removed node adj matrix [0], its feature matrix [0], the removed node true links [2]
  '''
  if len(graph.nodes()) >= max_size or len(graph.nodes()) < min_size:
    return None
  relabeled_graph = nx.relabel.convert_node_labels_to_integers(graph)
  choice = np.random.choice(list(relabeled_graph.nodes()))
  remaining_graph = relabeled_graph.subgraph(filter(lambda x: x != choice, list(relabeled_graph.nodes())))
  remaining_graph_adj = nx.to_numpy_matrix(remaining_graph)
  removed_node = nx.to_numpy_matrix(relabeled_graph)[choice]
  removed_node_row = np.asarray(removed_node)[0]
  return remaining_graph_adj, feature_matrix(remaining_graph),  removed_node_row

""""
Layers:
Graph Convolution
Graph Multihead Attention
"""

class GraphConv(nn.Module):
    def __init__(self, input_dim, output_dim):
        super().__init__()
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.weight = nn.Parameter(torch.FloatTensor(input_dim, output_dim).cuda())
        self.relu = nn.ReLU()

    def forward(self, x, adj):
        '''
        x is the feature matrix constructed in feature_matrix function
        adj ham ke is adjacency matrix of the graph
        '''
        y = torch.matmul(adj, x)
        # print(y.shape)
        # print(self.weight.shape)
        y = torch.matmul(y, self.weight.double())
        return y

class GraphAttn(nn.Module):
  def __init__(self, heads, model_dim, dropout=0.1):
    super().__init__()
    self.model_dim = model_dim
    self.key_dim = model_dim // heads
    self.heads = heads

    self.q_linear = nn.Linear(model_dim, model_dim).cuda()
    self.v_linear = nn.Linear(model_dim, model_dim).cuda()
    self.k_linear = nn.Linear(model_dim, model_dim).cuda()

    self.dropout = nn.Dropout(dropout)
    self.out = nn.Linear(model_dim, model_dim).cuda()

  def forward(self, query, key, value):
    # print(q, k, v)
    bs = query.size(0) # size of the graph
    key = self.k_linear(key).view(bs, -1, self.heads, self.key_dim)
    query = self.q_linear(query).view(bs, -1, self.heads, self.key_dim)
    value = self.v_linear(value).view(bs, -1, self.heads, self.key_dim)

    key = key.transpose(1,2)
    query = query.transpose(1,2)
    value = value.transpose(1,2)

    scores = attention(query, key, value, self.key_dim)
    concat = scores.transpose(1,2).contiguous().view(bs, -1, self.model_dim)
    output = self.out(concat)
    output = output.view(bs, self.model_dim)

    return output