import networkx as nx import numpy as np import scipy.io import os import shutil import random import torch from sklearn.model_selection import StratifiedKFold class S2VGraph(object): def __init__(self, g, label, node_tags=None, node_features=None): ''' g: a networkx graph label: an integer graph label node_tags: a list of integer node tags node_features: a torch float tensor, one-hot representation of the tag that is used as input to neural nets edge_mat: a torch long tensor, contain edge list, will be used to create torch sparse tensor neighbors: list of neighbors (without self-loop) ''' self.label = label self.g = g self.node_tags = node_tags self.neighbors = [] self.node_features = 0 self.edge_mat = 0 self.max_neighbor = 0 def load_data(dataset, degree_as_tag): ''' dataset: name of dataset test_proportion: ratio of test train split seed: random seed for random splitting of dataset ''' print('loading data') g_list = [] label_dict = {} feat_dict = {} with open('dataset/%s/%s.txt' % (dataset, dataset), 'r') as f: n_g = int(f.readline().strip()) for i in range(n_g): row = f.readline().strip().split() n, l = [int(w) for w in row] if not l in label_dict: mapped = len(label_dict) label_dict[l] = mapped g = nx.Graph() node_tags = [] node_features = [] n_edges = 0 for j in range(n): g.add_node(j) row = f.readline().strip().split() tmp = int(row[1]) + 2 if tmp == len(row): # no node attributes row = [int(w) for w in row] attr = None else: row, attr = [int(w) for w in row[:tmp]], np.array([float(w) for w in row[tmp:]]) if not row[0] in feat_dict: mapped = len(feat_dict) feat_dict[row[0]] = mapped node_tags.append(feat_dict[row[0]]) if tmp > len(row): node_features.append(attr) n_edges += row[1] for k in range(2, len(row)): g.add_edge(j, row[k]) if node_features != []: node_features = np.stack(node_features) node_feature_flag = True else: node_features = None node_feature_flag = False assert len(g) == n if n < 21: g_list.append(g) return g_list, len(label_dict) def separate_data(graph_list, seed, fold_idx): assert 0 <= fold_idx and fold_idx < 10, "fold_idx must be from 0 to 9." skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed) labels = [graph.label for graph in graph_list] idx_list = [] for idx in skf.split(np.zeros(len(labels)), labels): idx_list.append(idx) train_idx, test_idx = idx_list[fold_idx] train_graph_list = [graph_list[i] for i in train_idx] test_graph_list = [graph_list[i] for i in test_idx] return train_graph_list, test_graph_list def save_graphs_as_mat(graphs_list): if os.path.isdir("test_graphs"): shutil.rmtree("test_graphs") if not os.path.exists('test_graphs'): os.makedirs('test_graphs') counter = 0 for g in graphs_list: counter += 1 curr_graph = nx.to_numpy_array(g) numpy_matrix = nx.to_numpy_matrix(g) # print(1.0 - (np.count_nonzero(numpy_matrix) / float(numpy_matrix.size))) if counter == 101: print(1.0 - (np.count_nonzero(numpy_matrix) / float(numpy_matrix.size))) print("###########################################################") print(curr_graph[0]) curr_graph_att = np.ones((len(curr_graph), 60)) scipy.io.savemat('test_graphs/testgraph_{}_{}__.txt.mat'.format(curr_graph.shape[0], counter, g), {'data': curr_graph}) scipy.io.savemat('test_graphs/testgraph_{}_{}__.usr.mat'.format(curr_graph.shape[0], counter, g), {'attributes': curr_graph_att}) def move_random_node_to_the_last_index(adj): # selecting a random node and moving it to the last node position random_idx_for_delete = np.random.randint(adj.shape[0]) deleted_node = adj[:, random_idx_for_delete].copy() for i in range(deleted_node.__len__()): if i >= random_idx_for_delete and i < deleted_node.__len__() - 1: deleted_node[i] = deleted_node[i + 1] elif i == deleted_node.__len__() - 1: deleted_node[i] = 0 adj[:, random_idx_for_delete:adj.shape[0] - 1] = adj[:, random_idx_for_delete + 1:adj.shape[0]] adj[random_idx_for_delete:adj.shape[0] - 1, :] = adj[random_idx_for_delete + 1:adj.shape[0], :] adj = np.delete(adj, -1, axis=1) adj = np.delete(adj, -1, axis=0) adj = np.concatenate((adj, deleted_node[:deleted_node.shape[0] - 1]), axis=1) adj = np.concatenate((adj, np.transpose(deleted_node)), axis=0) return adj def prepare_kronEM_data(graphs_list, data_name, random_node_permutation_flag): if os.path.isdir("kronEM_main_graphs_" + data_name): shutil.rmtree("kronEM_main_graphs_" + data_name) if not os.path.exists("kronEM_main_graphs_" + data_name): os.makedirs("kronEM_main_graphs_" + data_name) if os.path.isdir("kronEM_graphs_with_missing_node_" + data_name): shutil.rmtree("kronEM_graphs_with_missing_node_" + data_name) if not os.path.exists("kronEM_graphs_with_missing_node_" + data_name): os.makedirs("kronEM_graphs_with_missing_node_" + data_name) counter = 0 if random_node_permutation_flag: number_of_random_node_permutation_per_graph = 3 else: number_of_random_node_permutation_per_graph = 1 for g in graphs_list: for i in range(number_of_random_node_permutation_per_graph): counter += 1 numpy_matrix = nx.to_numpy_matrix(g) if random_node_permutation_flag: numpy_matrix = move_random_node_to_the_last_index(numpy_matrix) file_main = open("kronEM_main_graphs_" + data_name + "/" + str(counter) + ".txt", "w") file_missing = open("kronEM_graphs_with_missing_node_" + data_name + "/" + str(counter) + ".txt", "w") with file_main as f: for i in range(numpy_matrix.shape[0]): for j in range(numpy_matrix.shape[0]): if numpy_matrix[i, j] == 1: f.write(str(i + 1) + "\t" + str(j + 1) + "\n") if i != numpy_matrix.shape[0] - 1 and j != numpy_matrix.shape[0] - 1: file_missing.write(str(i + 1) + "\t" + str(j + 1) + "\n") file_main.close() return