123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179 |
- import networkx as nx
- import numpy as np
- import scipy.io
- import os
- import shutil
- import random
- import torch
- from sklearn.model_selection import StratifiedKFold
-
-
- class S2VGraph(object):
- def __init__(self, g, label, node_tags=None, node_features=None):
- '''
- g: a networkx graph
- label: an integer graph label
- node_tags: a list of integer node tags
- node_features: a torch float tensor, one-hot representation of the tag that is used as input to neural nets
- edge_mat: a torch long tensor, contain edge list, will be used to create torch sparse tensor
- neighbors: list of neighbors (without self-loop)
- '''
- self.label = label
- self.g = g
- self.node_tags = node_tags
- self.neighbors = []
- self.node_features = 0
- self.edge_mat = 0
-
- self.max_neighbor = 0
-
-
- def load_data(dataset, degree_as_tag):
- '''
- dataset: name of dataset
- test_proportion: ratio of test train split
- seed: random seed for random splitting of dataset
- '''
-
- print('loading data')
- g_list = []
- label_dict = {}
- feat_dict = {}
-
- with open('dataset/%s/%s.txt' % (dataset, dataset), 'r') as f:
- n_g = int(f.readline().strip())
- for i in range(n_g):
- row = f.readline().strip().split()
- n, l = [int(w) for w in row]
- if not l in label_dict:
- mapped = len(label_dict)
- label_dict[l] = mapped
- g = nx.Graph()
- node_tags = []
- node_features = []
- n_edges = 0
- for j in range(n):
- g.add_node(j)
- row = f.readline().strip().split()
- tmp = int(row[1]) + 2
- if tmp == len(row):
- # no node attributes
- row = [int(w) for w in row]
- attr = None
- else:
- row, attr = [int(w) for w in row[:tmp]], np.array([float(w) for w in row[tmp:]])
- if not row[0] in feat_dict:
- mapped = len(feat_dict)
- feat_dict[row[0]] = mapped
- node_tags.append(feat_dict[row[0]])
-
- if tmp > len(row):
- node_features.append(attr)
-
- n_edges += row[1]
- for k in range(2, len(row)):
- g.add_edge(j, row[k])
-
- if node_features != []:
- node_features = np.stack(node_features)
- node_feature_flag = True
- else:
- node_features = None
- node_feature_flag = False
-
- assert len(g) == n
- if n < 21:
- g_list.append(g)
-
- return g_list, len(label_dict)
-
-
- def separate_data(graph_list, seed, fold_idx):
- assert 0 <= fold_idx and fold_idx < 10, "fold_idx must be from 0 to 9."
- skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
-
- labels = [graph.label for graph in graph_list]
- idx_list = []
- for idx in skf.split(np.zeros(len(labels)), labels):
- idx_list.append(idx)
- train_idx, test_idx = idx_list[fold_idx]
-
- train_graph_list = [graph_list[i] for i in train_idx]
- test_graph_list = [graph_list[i] for i in test_idx]
-
- return train_graph_list, test_graph_list
-
-
- def save_graphs_as_mat(graphs_list):
- if os.path.isdir("test_graphs"):
- shutil.rmtree("test_graphs")
- if not os.path.exists('test_graphs'):
- os.makedirs('test_graphs')
- counter = 0
- for g in graphs_list:
- counter += 1
- curr_graph = nx.to_numpy_array(g)
- numpy_matrix = nx.to_numpy_matrix(g)
-
- # print(1.0 - (np.count_nonzero(numpy_matrix) / float(numpy_matrix.size)))
- if counter == 101:
- print(1.0 - (np.count_nonzero(numpy_matrix) / float(numpy_matrix.size)))
- print("###########################################################")
- print(curr_graph[0])
- curr_graph_att = np.ones((len(curr_graph), 60))
- scipy.io.savemat('test_graphs/testgraph_{}_{}__.txt.mat'.format(curr_graph.shape[0], counter, g),
- {'data': curr_graph})
- scipy.io.savemat('test_graphs/testgraph_{}_{}__.usr.mat'.format(curr_graph.shape[0], counter, g),
- {'attributes': curr_graph_att})
-
-
- def move_random_node_to_the_last_index(adj):
- # selecting a random node and moving it to the last node position
- random_idx_for_delete = np.random.randint(adj.shape[0])
- deleted_node = adj[:, random_idx_for_delete].copy()
- for i in range(deleted_node.__len__()):
- if i >= random_idx_for_delete and i < deleted_node.__len__() - 1:
- deleted_node[i] = deleted_node[i + 1]
- elif i == deleted_node.__len__() - 1:
- deleted_node[i] = 0
- adj[:, random_idx_for_delete:adj.shape[0] - 1] = adj[:, random_idx_for_delete + 1:adj.shape[0]]
- adj[random_idx_for_delete:adj.shape[0] - 1, :] = adj[random_idx_for_delete + 1:adj.shape[0], :]
- adj = np.delete(adj, -1, axis=1)
- adj = np.delete(adj, -1, axis=0)
- adj = np.concatenate((adj, deleted_node[:deleted_node.shape[0] - 1]), axis=1)
- adj = np.concatenate((adj, np.transpose(deleted_node)), axis=0)
- return adj
-
-
- def prepare_kronEM_data(graphs_list, data_name, random_node_permutation_flag):
- if os.path.isdir("kronEM_main_graphs_" + data_name):
- shutil.rmtree("kronEM_main_graphs_" + data_name)
- if not os.path.exists("kronEM_main_graphs_" + data_name):
- os.makedirs("kronEM_main_graphs_" + data_name)
- if os.path.isdir("kronEM_graphs_with_missing_node_" + data_name):
- shutil.rmtree("kronEM_graphs_with_missing_node_" + data_name)
- if not os.path.exists("kronEM_graphs_with_missing_node_" + data_name):
- os.makedirs("kronEM_graphs_with_missing_node_" + data_name)
- counter = 0
- if random_node_permutation_flag:
- number_of_random_node_permutation_per_graph = 3
- else:
- number_of_random_node_permutation_per_graph = 1
- for g in graphs_list:
- for i in range(number_of_random_node_permutation_per_graph):
- counter += 1
- numpy_matrix = nx.to_numpy_matrix(g)
- if random_node_permutation_flag:
- numpy_matrix = move_random_node_to_the_last_index(numpy_matrix)
-
- file_main = open("kronEM_main_graphs_" + data_name + "/" + str(counter) + ".txt", "w")
- file_missing = open("kronEM_graphs_with_missing_node_" + data_name + "/" + str(counter) + ".txt", "w")
- with file_main as f:
- for i in range(numpy_matrix.shape[0]):
- for j in range(numpy_matrix.shape[0]):
- if numpy_matrix[i, j] == 1:
- f.write(str(i + 1) + "\t" + str(j + 1) + "\n")
- if i != numpy_matrix.shape[0] - 1 and j != numpy_matrix.shape[0] - 1:
- file_missing.write(str(i + 1) + "\t" + str(j + 1) + "\n")
- file_main.close()
- return
|