You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

faez_test_2.py 11KB

4 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267
  1. import numpy as np
  2. import scipy.sparse as sp
  3. import networkx as nx
  4. from baselines.graphvae.graphvae_model import graph_show
  5. from baselines.graphvae.util import *
  6. import baselines.graphvae.util as util
  7. from data import bfs_seq, encode_adj, decode_adj
  8. def sparse_to_tuple(sparse_mx):
  9. if not sp.isspmatrix_coo(sparse_mx):
  10. sparse_mx = sparse_mx.tocoo()
  11. coords = np.vstack((sparse_mx.row, sparse_mx.col)).transpose()
  12. values = sparse_mx.data
  13. shape = sparse_mx.shape
  14. return coords, values, shape
  15. def mask_test_edges(adj):
  16. # Function to build test set with 10% positive links
  17. # NOTE: Splits are randomized and results might slightly deviate from reported numbers in the paper.
  18. # TODO: Clean up.
  19. # Remove diagonal elements
  20. adj = adj - sp.dia_matrix((adj.diagonal()[np.newaxis, :], [0]), shape=adj.shape)
  21. adj.eliminate_zeros()
  22. # Check that diag is zero:
  23. assert np.diag(adj.todense()).sum() == 0
  24. adj_triu = sp.triu(adj)
  25. adj_tuple = sparse_to_tuple(adj_triu)
  26. edges = adj_tuple[0]
  27. edges_all = sparse_to_tuple(adj)[0]
  28. num_test = int(np.floor(edges.shape[0] / 10.))
  29. num_val = int(np.floor(edges.shape[0] / 20.))
  30. all_edge_idx = list(range(edges.shape[0]))
  31. np.random.shuffle(all_edge_idx)
  32. val_edge_idx = all_edge_idx[:num_val]
  33. test_edge_idx = all_edge_idx[num_val:(num_val + num_test)]
  34. test_edges = edges[test_edge_idx]
  35. val_edges = edges[val_edge_idx]
  36. train_edges = np.delete(edges, np.hstack([test_edge_idx, val_edge_idx]), axis=0)
  37. def ismember(a, b, tol=5):
  38. rows_close = np.all(np.round(a - b[:, None], tol) == 0, axis=-1)
  39. return np.any(rows_close)
  40. test_edges_false = []
  41. while len(test_edges_false) < len(test_edges):
  42. idx_i = np.random.randint(0, adj.shape[0])
  43. idx_j = np.random.randint(0, adj.shape[0])
  44. if idx_i == idx_j:
  45. continue
  46. if ismember([idx_i, idx_j], edges_all):
  47. continue
  48. if test_edges_false:
  49. if ismember([idx_j, idx_i], np.array(test_edges_false)):
  50. continue
  51. if ismember([idx_i, idx_j], np.array(test_edges_false)):
  52. continue
  53. test_edges_false.append([idx_i, idx_j])
  54. val_edges_false = []
  55. while len(val_edges_false) < len(val_edges):
  56. idx_i = np.random.randint(0, adj.shape[0])
  57. idx_j = np.random.randint(0, adj.shape[0])
  58. if idx_i == idx_j:
  59. continue
  60. if ismember([idx_i, idx_j], train_edges):
  61. continue
  62. if ismember([idx_j, idx_i], train_edges):
  63. continue
  64. if ismember([idx_i, idx_j], val_edges):
  65. continue
  66. if ismember([idx_j, idx_i], val_edges):
  67. continue
  68. if val_edges_false:
  69. if ismember([idx_j, idx_i], np.array(val_edges_false)):
  70. continue
  71. if ismember([idx_i, idx_j], np.array(val_edges_false)):
  72. continue
  73. val_edges_false.append([idx_i, idx_j])
  74. assert ~ismember(test_edges_false, edges_all)
  75. assert ~ismember(val_edges_false, edges_all)
  76. assert ~ismember(val_edges, train_edges)
  77. assert ~ismember(test_edges, train_edges)
  78. assert ~ismember(val_edges, test_edges)
  79. data = np.ones(train_edges.shape[0])
  80. # Re-build adj matrix
  81. adj_train = sp.csr_matrix((data, (train_edges[:, 0], train_edges[:, 1])), shape=adj.shape)
  82. adj_train = adj_train + adj_train.T
  83. # NOTE: these edge lists only contain single direction of edge!
  84. return adj_train, train_edges, val_edges, val_edges_false, test_edges, test_edges_false
  85. if __name__ == '__main__':
  86. colors = [(0.7509279299037631, 0.021203049355839054, 0.24561203044115132)]
  87. graphs = []
  88. graph = nx.grid_2d_graph(2, 3)
  89. graphs.append(graph)
  90. graphs.append(nx.grid_2d_graph(2,4))
  91. adj = nx.to_numpy_matrix(graph)
  92. # print("*** before")
  93. # print(adj)
  94. # graph_show(nx.from_numpy_matrix(adj), "1", colors)
  95. # adj = move_random_node_to_the_last_index(adj)
  96. # print("*** after")
  97. # print(adj)
  98. # graph_show(nx.from_numpy_matrix(adj), "2", colors)
  99. prepare_kronEM_data(graphs, "salam", True)
  100. print(adj)
  101. # print("*** 1) ")
  102. # print(adj)
  103. # # adj_copy = adj.copy()
  104. # random_idx_for_delete = np.random.randint(adj.shape[0])
  105. # print("*** index")
  106. # print(random_idx_for_delete)
  107. # deleted_node = adj[:, random_idx_for_delete].copy()
  108. # for i in range(deleted_node.__len__()):
  109. # if i >= random_idx_for_delete and i < deleted_node.__len__() - 1:
  110. # deleted_node[i] = deleted_node[i + 1]
  111. # elif i == deleted_node.__len__() - 1:
  112. # deleted_node[i] = 0
  113. # adj[:, random_idx_for_delete:adj.shape[0] - 1] = adj[:, random_idx_for_delete + 1:adj.shape[0]]
  114. # adj[random_idx_for_delete:adj.shape[0] - 1, :] = adj[random_idx_for_delete + 1:adj.shape[0], :]
  115. # adj = np.delete(adj, -1, axis=1)
  116. # adj = np.delete(adj, -1, axis=0)
  117. # print("************")
  118. # print(adj)
  119. # print(deleted_node)
  120. # adj = np.concatenate((adj, deleted_node[:deleted_node.shape[0]-1]), axis=1)
  121. # adj = np.concatenate((adj, np.transpose(deleted_node)), axis=0)
  122. # print("*** 2) ")
  123. # print(adj)
  124. # graph_show(nx.from_numpy_matrix(adj), "2", colors)
  125. # max_prev_node = 12
  126. # len = adj_copy.shape[0]
  127. # x = np.zeros((graph.number_of_nodes() - 1, max_prev_node)) # here zeros are padded for small graph
  128. # x[0, :] = 1 # the first input token is all ones
  129. # y = np.zeros((graph.number_of_nodes() - 1, max_prev_node)) # here zeros are padded for small graph
  130. #
  131. # column_vector = adj_copy[:, adj_copy.shape[0] - 1]
  132. # incomplete_adj = adj_copy.copy()
  133. # incomplete_adj = incomplete_adj[:, :incomplete_adj.shape[0] - 1]
  134. # incomplete_adj = incomplete_adj[:incomplete_adj.shape[0] - 1, :]
  135. # x_idx = np.random.permutation(incomplete_adj.shape[0])
  136. #
  137. # x_idx_prime = np.concatenate((x_idx, [adj.shape[0] - 1]), axis=0)
  138. # column_vector = column_vector[np.ix_(x_idx_prime)]
  139. #
  140. # incomplete_adj = incomplete_adj[np.ix_(x_idx, x_idx)]
  141. # #
  142. # incomplete_matrix = np.asmatrix(incomplete_adj)
  143. # G = nx.from_numpy_matrix(incomplete_matrix)
  144. # # then do bfs in the permuted G
  145. # start_idx = np.random.randint(incomplete_adj.shape[0])
  146. # x_idx = np.array(bfs_seq(G, start_idx))
  147. # incomplete_adj = incomplete_adj[np.ix_(x_idx, x_idx)]
  148. # adj_encoded = encode_adj(incomplete_adj.copy(), max_prev_node=12)
  149. # #
  150. # x_idx_prime = np.concatenate((x_idx, [adj.shape[0] - 1]), axis=0)
  151. # column_vector = column_vector[np.ix_(x_idx_prime)]
  152. # adj = nx.to_numpy_matrix(graph)
  153. # adj_copy = adj.copy()
  154. # column_vector = adj_copy[:, adj_copy.shape[0] - 1]
  155. # incomplete_adj = adj_copy.copy()
  156. # incomplete_adj = incomplete_adj[:, :incomplete_adj.shape[0] - 1]
  157. # incomplete_adj = incomplete_adj[:incomplete_adj.shape[0] - 1, :]
  158. # x_idx = np.random.permutation(incomplete_adj.shape[0])
  159. #
  160. # x_idx_prime = np.concatenate((x_idx, [adj.shape[0] - 1]), axis=0)
  161. # column_vector = column_vector[np.ix_(x_idx_prime)]
  162. #
  163. # incomplete_adj = incomplete_adj[np.ix_(x_idx, x_idx)]
  164. # #
  165. # incomplete_matrix = np.asmatrix(incomplete_adj)
  166. # G = nx.from_numpy_matrix(incomplete_matrix)
  167. # # then do bfs in the permuted G
  168. # start_idx = np.random.randint(incomplete_adj.shape[0])
  169. # x_idx = np.array(bfs_seq(G, start_idx))
  170. # incomplete_adj = incomplete_adj[np.ix_(x_idx, x_idx)]
  171. # #
  172. # x_idx_prime = np.concatenate((x_idx, [adj.shape[0] - 1]), axis=0)
  173. # column_vector = column_vector[np.ix_(x_idx_prime)]
  174. # row_vector = np.transpose(column_vector)
  175. # complete_adj = incomplete_adj.copy()
  176. # complete_adj = np.concatenate((complete_adj, column_vector[:adj_copy.shape[0] - 1]), axis=1)
  177. # complete_adj = np.concatenate((complete_adj, row_vector), axis=0)
  178. # adj_encoded = encode_adj(complete_adj.copy(), 12)
  179. # decoded = decode_adj(adj_encoded[:adj_encoded.shape[0]-1, :])
  180. # graph_show(nx.from_numpy_matrix(incomplete_adj), "incomplete", colors)
  181. # decoded = decode_adj(adj_encoded)
  182. # complete = np.concatenate((decoded, column_vector[:column_vector.shape[0]-1]), axis=1)
  183. # complete = np.concatenate((complete, np.transpose(column_vector)), axis=0)
  184. # graph_show(nx.from_numpy_matrix(complete), "complete", colors)
  185. #
  186. # graph_show(nx.from_numpy_matrix(adj_copy), "1", colors)
  187. # x = np.zeros((graph.number_of_nodes(), args.max_prev_node)) # here zeros are padded for small graph
  188. # x[0, :] = 1 # the first input token is all ones
  189. # y = np.zeros((graph.number_of_nodes(), args.max_prev_node)) # here zeros are padded for small graph
  190. # generate input x, y pairs
  191. # graphs.append(graph)
  192. # print(nx.to_numpy_matrix(graph))
  193. # graph = nx.grid_2d_graph(2, 2)
  194. # print("********************************")
  195. # print(nx.to_numpy_matrix(graph))
  196. # graphs.append(graph)
  197. # util.prepare_kronEM_data(graphs, "salam")
  198. # nx.write_adjlist(graph, "sala.txt")
  199. # # colors = [(0.7509279299037631, 0.021203049355839054, 0.24561203044115132)]
  200. # # graph_show(graph, "1", colors)
  201. # file = open("copy.txt", "w")
  202. # adj = nx.to_numpy_matrix(graph)
  203. # print(adj)
  204. # with file as f:
  205. # for line in adj:
  206. # np.savetxt(f, line, fmt='%.2f')
  207. # print("**** 1)")
  208. # print(adj)
  209. # random_index = np.random.randint(adj.shape[0])
  210. # random_index = 2
  211. # print("*** random_index")
  212. # print(random_index)
  213. # adj[:, random_index] = 0
  214. # adj[random_index, :] = 0
  215. # print("**** 2)")
  216. # print(adj)
  217. # graph_show(nx.from_numpy_matrix(adj), "2", colors)
  218. # adj[:, random_index] = 1
  219. # adj[random_index, :] = 1
  220. # print("**** 3)")
  221. # print(adj)
  222. # graph_show(nx.from_numpy_matrix(adj), "3", colors)
  223. # print(adj)
  224. # print(adj[:-1, :-1])
  225. # print(adj[:,-1])
  226. # label_padded = np.zeros(10)
  227. # label_padded[:4] = adj[-1,:]
  228. # print(label_padded)
  229. # graph = nx.barabasi_albert_graph(100,4)
  230. # matrix = nx.to_numpy_matrix(graph)
  231. # G = nx.karate_club_graph()
  232. # print("Node Degree")
  233. # for v in G:
  234. # print('%s %s' % (v, G.degree(v)))
  235. # colors = [(0.7509279299037631, 0.021203049355839054, 0.24561203044115132)]
  236. # graph_show(G, "Karate", colors)
  237. # adj_label = adj + sp.eye(adj.shape[0])
  238. # print(adj)
  239. # print(adj_label)
  240. # def preprocess_graph(adj):
  241. # adj = sp.coo_matrix(adj)
  242. # adj_ = adj + sp.eye(adj.shape[0])
  243. # rowsum = np.array(adj_.sum(1))
  244. # degree_mat_inv_sqrt = sp.diags(np.power(rowsum, -0.5).flatten())
  245. # adj_normalized = adj_.dot(degree_mat_inv_sqrt).transpose().dot(degree_mat_inv_sqrt).tocoo()
  246. # # return sparse_to_tuple(adj_normalized)
  247. # return sparse_mx_to_torch_sparse_tensor(adj_normalized)