You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

mmsb.py 5.5KB

4 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154
  1. """Stochastic block model."""
  2. import argparse
  3. import os
  4. from time import time
  5. import edward as ed
  6. import networkx as nx
  7. import numpy as np
  8. import tensorflow as tf
  9. from edward.models import Bernoulli, Multinomial, Beta, Dirichlet, PointMass, Normal
  10. from observations import karate
  11. from sklearn.metrics.cluster import adjusted_rand_score
  12. import utils
  13. CUDA = 2
  14. ed.set_seed(int(time()))
  15. #ed.set_seed(42)
  16. # DATA
  17. #X_data, Z_true = karate("data")
  18. def disjoint_cliques_test_graph(num_cliques, clique_size):
  19. G = nx.disjoint_union_all([nx.complete_graph(clique_size) for _ in range(num_cliques)])
  20. return nx.to_numpy_matrix(G)
  21. def mmsb(N, K, data):
  22. # sparsity
  23. rho = 0.3
  24. # MODEL
  25. # probability of belonging to each of K blocks for each node
  26. gamma = Dirichlet(concentration=tf.ones([K]))
  27. # block connectivity
  28. Pi = Beta(concentration0=tf.ones([K, K]), concentration1=tf.ones([K, K]))
  29. # probability of belonging to each of K blocks for all nodes
  30. Z = Multinomial(total_count=1.0, probs=gamma, sample_shape=N)
  31. # adjacency
  32. X = Bernoulli(probs=(1 - rho) * tf.matmul(Z, tf.matmul(Pi, tf.transpose(Z))))
  33. # INFERENCE (EM algorithm)
  34. qgamma = PointMass(params=tf.nn.softmax(tf.Variable(tf.random_normal([K]))))
  35. qPi = PointMass(params=tf.nn.sigmoid(tf.Variable(tf.random_normal([K, K]))))
  36. qZ = PointMass(params=tf.nn.softmax(tf.Variable(tf.random_normal([N, K]))))
  37. #qgamma = Normal(loc=tf.get_variable("qgamma/loc", [K]),
  38. # scale=tf.nn.softplus(
  39. # tf.get_variable("qgamma/scale", [K])))
  40. #qPi = Normal(loc=tf.get_variable("qPi/loc", [K, K]),
  41. # scale=tf.nn.softplus(
  42. # tf.get_variable("qPi/scale", [K, K])))
  43. #qZ = Normal(loc=tf.get_variable("qZ/loc", [N, K]),
  44. # scale=tf.nn.softplus(
  45. # tf.get_variable("qZ/scale", [N, K])))
  46. #inference = ed.KLqp({gamma: qgamma, Pi: qPi, Z: qZ}, data={X: data})
  47. inference = ed.MAP({gamma: qgamma, Pi: qPi, Z: qZ}, data={X: data})
  48. #inference.run()
  49. n_iter = 6000
  50. inference.initialize(optimizer=tf.train.AdamOptimizer(learning_rate=0.01), n_iter=n_iter)
  51. tf.global_variables_initializer().run()
  52. for _ in range(inference.n_iter):
  53. info_dict = inference.update()
  54. inference.print_progress(info_dict)
  55. inference.finalize()
  56. print('qgamma after: ', qgamma.mean().eval())
  57. return qZ.mean().eval(), qPi.eval()
  58. def arg_parse():
  59. parser = argparse.ArgumentParser(description='MMSB arguments.')
  60. parser.add_argument('--dataset', dest='dataset',
  61. help='Input dataset.')
  62. parser.add_argument('--K', dest='K', type=int,
  63. help='Number of blocks.')
  64. parser.add_argument('--samples-per-G', dest='samples', type=int,
  65. help='Number of samples for every graph.')
  66. parser.set_defaults(dataset='community',
  67. K=4,
  68. samples=1)
  69. return parser.parse_args()
  70. def graph_gen_from_blockmodel(B, Z):
  71. n_blocks = len(B)
  72. B = np.array(B)
  73. Z = np.array(Z)
  74. adj_prob = np.dot(Z, np.dot(B, np.transpose(Z)))
  75. adj = np.random.binomial(1, adj_prob * 0.3)
  76. return nx.from_numpy_matrix(adj)
  77. if __name__ == '__main__':
  78. prog_args = arg_parse()
  79. os.environ['CUDA_VISIBLE_DEVICES'] = str(CUDA)
  80. print('CUDA', CUDA)
  81. X_dataset = []
  82. #X_data = nx.to_numpy_matrix(nx.connected_caveman_graph(4, 7))
  83. if prog_args.dataset == 'clique_test':
  84. X_data = disjoint_cliques_test_graph(4, 7)
  85. X_dataset.append(X_data)
  86. elif prog_args.dataset == 'citeseer':
  87. graphs = utils.citeseer_ego()
  88. X_dataset = [nx.to_numpy_matrix(g) for g in graphs]
  89. elif prog_args.dataset == 'community':
  90. graphs = []
  91. for i in range(2, 3):
  92. for j in range(30, 81):
  93. for k in range(10):
  94. graphs.append(utils.caveman_special(i,j, p_edge=0.3))
  95. X_dataset = [nx.to_numpy_matrix(g) for g in graphs]
  96. elif prog_args.dataset == 'grid':
  97. graphs = []
  98. for i in range(10,20):
  99. for j in range(10,20):
  100. graphs.append(nx.grid_2d_graph(i,j))
  101. X_dataset = [nx.to_numpy_matrix(g) for g in graphs]
  102. elif prog_args.dataset.startswith('community'):
  103. graphs = []
  104. num_communities = int(prog_args.dataset[-1])
  105. print('Creating dataset with ', num_communities, ' communities')
  106. c_sizes = np.random.choice([12, 13, 14, 15, 16, 17], num_communities)
  107. for k in range(3000):
  108. graphs.append(utils.n_community(c_sizes, p_inter=0.01))
  109. X_dataset = [nx.to_numpy_matrix(g) for g in graphs]
  110. print('Number of graphs: ', len(X_dataset))
  111. K = prog_args.K # number of clusters
  112. gen_graphs = []
  113. for i in range(len(X_dataset)):
  114. if i % 5 == 0:
  115. print(i)
  116. X_data = X_dataset[i]
  117. N = X_data.shape[0] # number of vertices
  118. Zp, B = mmsb(N, K, X_data)
  119. #print("Block: ", B)
  120. Z_pred = Zp.argmax(axis=1)
  121. print("Result (label flip can happen):")
  122. #print("prob: ", Zp)
  123. print("Predicted")
  124. print(Z_pred)
  125. #print(Z_true)
  126. #print("Adjusted Rand Index =", adjusted_rand_score(Z_pred, Z_true))
  127. for j in range(prog_args.samples):
  128. gen_graphs.append(graph_gen_from_blockmodel(B, Zp))
  129. save_path = '/lfs/local/0/rexy/graph-generation/eval_results/mmsb/'
  130. utils.save_graph_list(gen_graphs, os.path.join(save_path, prog_args.dataset + '.dat'))