|
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101 |
- import networkx as nx
- import numpy as np
- import torch
- from data import bfs_seq
-
-
- class GraphAdjSampler(torch.utils.data.Dataset):
- def __init__(self, G_list, max_num_nodes, permutation_mode, bfs_mode, bfs_mode_with_arbitrary_node_deleted,
- features='id'):
- self.max_num_nodes = max_num_nodes
- self.adj_all = []
- self.len_all = []
- self.feature_all = []
- self.count = 0
- self.permutation_mode = permutation_mode
- self.bfs_mode = bfs_mode
- self.bfs_mode_with_arbitrary_node_deleted = bfs_mode_with_arbitrary_node_deleted
-
- for G in G_list:
- adj = nx.to_numpy_matrix(G)
- # the diagonal entries are 1 since they denote node probability
- self.adj_all.append(
- np.asarray(adj) + np.identity(G.number_of_nodes()))
- self.len_all.append(G.number_of_nodes())
- if features == 'id':
- self.feature_all.append(np.identity(max_num_nodes))
- elif features == 'deg':
- degs = np.sum(np.array(adj), 1)
- degs = np.expand_dims(np.pad(degs, [0, max_num_nodes - G.number_of_nodes()], 0),
- axis=1)
- self.feature_all.append(degs)
- elif features == 'struct':
- degs = np.sum(np.array(adj), 1)
- degs = np.expand_dims(np.pad(degs, [0, max_num_nodes - G.number_of_nodes()],
- 'constant'),
- axis=1)
- clusterings = np.array(list(nx.clustering(G).values()))
- clusterings = np.expand_dims(np.pad(clusterings,
- [0, max_num_nodes - G.number_of_nodes()],
- 'constant'),
- axis=1)
- self.feature_all.append(np.hstack([degs, clusterings]))
-
- def __len__(self):
- return len(self.adj_all)
-
- def __getitem__(self, idx):
- adj = self.adj_all[idx]
- if self.permutation_mode:
- x_idx = np.random.permutation(adj.shape[0])
- # self.count += 1
- adj = adj[np.ix_(x_idx, x_idx)]
- adj = np.asmatrix(adj)
- if self.bfs_mode:
- if self.bfs_mode_with_arbitrary_node_deleted:
- random_idx_for_delete = np.random.randint(adj.shape[0])
- deleted_node = adj[:, random_idx_for_delete].copy()
- for i in range(deleted_node.__len__()):
- if i >= random_idx_for_delete and i < deleted_node.__len__() - 1:
- deleted_node[i] = deleted_node[i + 1]
- elif i == deleted_node.__len__() - 1:
- deleted_node[i] = 0
- adj[:, random_idx_for_delete:adj.shape[0] - 1] = adj[:, random_idx_for_delete + 1:adj.shape[0]]
- adj[random_idx_for_delete:adj.shape[0] - 1, :] = adj[random_idx_for_delete + 1:adj.shape[0], :]
- adj = np.delete(adj, -1, axis=1)
- adj = np.delete(adj, -1, axis=0)
- G = nx.from_numpy_matrix(adj)
- # then do bfs in the permuted G
- degree_arr = np.sum(adj, axis=0)
- start_idx = np.argmax(degree_arr)
- # start_idx = np.random.randint(adj.shape[0])
- x_idx = np.array(bfs_seq(G, start_idx))
- adj = adj[np.ix_(x_idx, x_idx)]
- x_idx = np.insert(x_idx, x_idx.size, x_idx.size)
- deleted_node = deleted_node[np.ix_(x_idx)]
- adj = np.append(adj, deleted_node[:-1], axis=1)
- deleted_node = deleted_node.reshape(1, -1)
- adj = np.vstack([adj, deleted_node])
- else:
- G = nx.from_numpy_matrix(adj)
- # then do bfs in the permuted G
- start_idx = np.random.randint(adj.shape[0])
- x_idx = np.array(bfs_seq(G, start_idx))
- adj = adj[np.ix_(x_idx, x_idx)]
- num_nodes = adj.shape[0]
- adj_padded = np.zeros((self.max_num_nodes, self.max_num_nodes))
- adj_padded[:num_nodes, :num_nodes] = adj
-
- adj_decoded = np.zeros(self.max_num_nodes * (self.max_num_nodes + 1) // 2)
- node_idx = 0
-
- adj_vectorized = adj_padded[np.triu(np.ones((self.max_num_nodes, self.max_num_nodes))) == 1]
- # the following 2 lines recover the upper triangle of the adj matrix
- # recovered = np.zeros((self.max_num_nodes, self.max_num_nodes))
- # recovered[np.triu(np.ones((self.max_num_nodes, self.max_num_nodes)) ) == 1] = adj_vectorized
- # print(recovered)
-
- return {'adj': adj_padded,
- 'adj_decoded': adj_vectorized,
- 'num_nodes': num_nodes,
- 'features': self.feature_all[idx].copy()}
|