a.amiri
/
GraphRNN2


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179
							import networkx as nx
import numpy as np
import scipy.io
import os
import shutil
import random
import torch
from sklearn.model_selection import StratifiedKFold


class S2VGraph(object):
    def __init__(self, g, label, node_tags=None, node_features=None):
        '''
            g: a networkx graph
            label: an integer graph label
            node_tags: a list of integer node tags
            node_features: a torch float tensor, one-hot representation of the tag that is used as input to neural nets
            edge_mat: a torch long tensor, contain edge list, will be used to create torch sparse tensor
            neighbors: list of neighbors (without self-loop)
        '''
        self.label = label
        self.g = g
        self.node_tags = node_tags
        self.neighbors = []
        self.node_features = 0
        self.edge_mat = 0

        self.max_neighbor = 0


def load_data(dataset, degree_as_tag):
    '''
        dataset: name of dataset
        test_proportion: ratio of test train split
        seed: random seed for random splitting of dataset
    '''

    print('loading data')
    g_list = []
    label_dict = {}
    feat_dict = {}

    with open('dataset/%s/%s.txt' % (dataset, dataset), 'r') as f:
        n_g = int(f.readline().strip())
        for i in range(n_g):
            row = f.readline().strip().split()
            n, l = [int(w) for w in row]
            if not l in label_dict:
                mapped = len(label_dict)
                label_dict[l] = mapped
            g = nx.Graph()
            node_tags = []
            node_features = []
            n_edges = 0
            for j in range(n):
                g.add_node(j)
                row = f.readline().strip().split()
                tmp = int(row[1]) + 2
                if tmp == len(row):
                    # no node attributes
                    row = [int(w) for w in row]
                    attr = None
                else:
                    row, attr = [int(w) for w in row[:tmp]], np.array([float(w) for w in row[tmp:]])
                if not row[0] in feat_dict:
                    mapped = len(feat_dict)
                    feat_dict[row[0]] = mapped
                node_tags.append(feat_dict[row[0]])

                if tmp > len(row):
                    node_features.append(attr)

                n_edges += row[1]
                for k in range(2, len(row)):
                    g.add_edge(j, row[k])

            if node_features != []:
                node_features = np.stack(node_features)
                node_feature_flag = True
            else:
                node_features = None
                node_feature_flag = False

            assert len(g) == n

            g_list.append(g)

    return g_list, len(label_dict)


def separate_data(graph_list, seed, fold_idx):
    assert 0 <= fold_idx and fold_idx < 10, "fold_idx must be from 0 to 9."
    skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)

    labels = [graph.label for graph in graph_list]
    idx_list = []
    for idx in skf.split(np.zeros(len(labels)), labels):
        idx_list.append(idx)
    train_idx, test_idx = idx_list[fold_idx]

    train_graph_list = [graph_list[i] for i in train_idx]
    test_graph_list = [graph_list[i] for i in test_idx]

    return train_graph_list, test_graph_list


def save_graphs_as_mat(graphs_list):
    if os.path.isdir("test_graphs"):
        shutil.rmtree("test_graphs")
    if not os.path.exists('test_graphs'):
        os.makedirs('test_graphs')
    counter = 0
    for g in graphs_list:
        counter += 1
        curr_graph = nx.to_numpy_array(g)
        numpy_matrix = nx.to_numpy_matrix(g)

        # print(1.0 - (np.count_nonzero(numpy_matrix) / float(numpy_matrix.size)))
        if counter == 101:
            print(1.0 - (np.count_nonzero(numpy_matrix) / float(numpy_matrix.size)))
            print("###########################################################")
            print(curr_graph[0])
        curr_graph_att = np.ones((len(curr_graph), 60))
        scipy.io.savemat('test_graphs/testgraph_{}_{}__.txt.mat'.format(curr_graph.shape[0], counter, g),
                         {'data': curr_graph})
        scipy.io.savemat('test_graphs/testgraph_{}_{}__.usr.mat'.format(curr_graph.shape[0], counter, g),
                         {'attributes': curr_graph_att})


def move_random_node_to_the_last_index(adj):
    # selecting a random node and moving it to the last node position
    random_idx_for_delete = np.random.randint(adj.shape[0])
    deleted_node = adj[:, random_idx_for_delete].copy()
    for i in range(deleted_node.__len__()):
        if i >= random_idx_for_delete and i < deleted_node.__len__() - 1:
            deleted_node[i] = deleted_node[i + 1]
        elif i == deleted_node.__len__() - 1:
            deleted_node[i] = 0
    adj[:, random_idx_for_delete:adj.shape[0] - 1] = adj[:, random_idx_for_delete + 1:adj.shape[0]]
    adj[random_idx_for_delete:adj.shape[0] - 1, :] = adj[random_idx_for_delete + 1:adj.shape[0], :]
    adj = np.delete(adj, -1, axis=1)
    adj = np.delete(adj, -1, axis=0)
    adj = np.concatenate((adj, deleted_node[:deleted_node.shape[0] - 1]), axis=1)
    adj = np.concatenate((adj, np.transpose(deleted_node)), axis=0)
    return adj


def prepare_kronEM_data(graphs_list, data_name, random_node_permutation_flag):
    if os.path.isdir("kronEM_main_graphs_" + data_name):
        shutil.rmtree("kronEM_main_graphs_" + data_name)
    if not os.path.exists("kronEM_main_graphs_" + data_name):
        os.makedirs("kronEM_main_graphs_" + data_name)
    if os.path.isdir("kronEM_graphs_with_missing_node_" + data_name):
        shutil.rmtree("kronEM_graphs_with_missing_node_" + data_name)
    if not os.path.exists("kronEM_graphs_with_missing_node_" + data_name):
        os.makedirs("kronEM_graphs_with_missing_node_" + data_name)
    counter = 0
    if random_node_permutation_flag:
        number_of_random_node_permutation_per_graph = 3
    else:
        number_of_random_node_permutation_per_graph = 1
    for g in graphs_list:
        for i in range(number_of_random_node_permutation_per_graph):
            counter += 1
            numpy_matrix = nx.to_numpy_matrix(g)
            if random_node_permutation_flag:
                numpy_matrix = move_random_node_to_the_last_index(numpy_matrix)

            file_main = open("kronEM_main_graphs_" + data_name + "/" + str(counter) + ".txt", "w")
            file_missing = open("kronEM_graphs_with_missing_node_" + data_name + "/" + str(counter) + ".txt", "w")
            with file_main as f:
                for i in range(numpy_matrix.shape[0]):
                    for j in range(numpy_matrix.shape[0]):
                        if numpy_matrix[i, j] == 1:
                            f.write(str(i + 1) + "\t" + str(j + 1) + "\n")
                            if i != numpy_matrix.shape[0] - 1 and j != numpy_matrix.shape[0] - 1:
                                file_missing.write(str(i + 1) + "\t" + str(j + 1) + "\n")
            file_main.close()
    return