|
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155 |
- import networkx as nx
- import numpy as np
-
- from utils import *
- from data import *
-
- def create(args):
- ### load datasets
- graphs=[]
- # synthetic graphs
- if args.graph_type=='ladder':
- graphs = []
- for i in range(100, 201):
- graphs.append(nx.ladder_graph(i))
- args.max_prev_node = 10
- elif args.graph_type=='ladder_small':
- graphs = []
- for i in range(2, 11):
- graphs.append(nx.ladder_graph(i))
- args.max_prev_node = 10
- elif args.graph_type=='tree':
- graphs = []
- for i in range(2,5):
- for j in range(3,5):
- graphs.append(nx.balanced_tree(i,j))
- args.max_prev_node = 256
- elif args.graph_type=='caveman':
- # graphs = []
- # for i in range(5,10):
- # for j in range(5,25):
- # for k in range(5):
- # graphs.append(nx.relaxed_caveman_graph(i, j, p=0.1))
- graphs = []
- for i in range(2, 3):
- for j in range(30, 81):
- for k in range(10):
- graphs.append(caveman_special(i,j, p_edge=0.3))
- args.max_prev_node = 100
- elif args.graph_type=='caveman_small':
- # graphs = []
- # for i in range(2,5):
- # for j in range(2,6):
- # for k in range(10):
- # graphs.append(nx.relaxed_caveman_graph(i, j, p=0.1))
- graphs = []
- for i in range(2, 3):
- for j in range(6, 11):
- for k in range(20):
- graphs.append(caveman_special(i, j, p_edge=0.8)) # default 0.8
- args.max_prev_node = 20
- elif args.graph_type=='caveman_small_single':
- # graphs = []
- # for i in range(2,5):
- # for j in range(2,6):
- # for k in range(10):
- # graphs.append(nx.relaxed_caveman_graph(i, j, p=0.1))
- graphs = []
- for i in range(2, 3):
- for j in range(8, 9):
- for k in range(100):
- graphs.append(caveman_special(i, j, p_edge=0.5))
- args.max_prev_node = 20
- elif args.graph_type.startswith('community'):
- num_communities = int(args.graph_type[-1])
- print('Creating dataset with ', num_communities, ' communities')
- c_sizes = np.random.choice([12, 13, 14, 15, 16, 17], num_communities)
- #c_sizes = [15] * num_communities
- for k in range(3000):
- graphs.append(n_community(c_sizes, p_inter=0.01))
- args.max_prev_node = 80
- elif args.graph_type=='grid':
- graphs = []
- for i in range(10,20):
- for j in range(10,20):
- graphs.append(nx.grid_2d_graph(i,j))
- args.max_prev_node = 40
- elif args.graph_type=='grid_small':
- graphs = []
- for i in range(2,5):
- for j in range(2,6):
- graphs.append(nx.grid_2d_graph(i,j))
- args.max_prev_node = 15
- elif args.graph_type=='barabasi':
- graphs = []
- for i in range(100,200):
- for j in range(4,5):
- for k in range(5):
- graphs.append(nx.barabasi_albert_graph(i,j))
- args.max_prev_node = 130
- elif args.graph_type=='barabasi_small':
- graphs = []
- for i in range(4,21):
- for j in range(3,4):
- for k in range(10):
- graphs.append(nx.barabasi_albert_graph(i,j))
- args.max_prev_node = 20
- elif args.graph_type=='grid_big':
- graphs = []
- for i in range(36, 46):
- for j in range(36, 46):
- graphs.append(nx.grid_2d_graph(i, j))
- args.max_prev_node = 90
-
- elif 'barabasi_noise' in args.graph_type:
- graphs = []
- for i in range(100,101):
- for j in range(4,5):
- for k in range(500):
- graphs.append(nx.barabasi_albert_graph(i,j))
- graphs = perturb_new(graphs,p=args.noise/10.0)
- args.max_prev_node = 99
-
- # real graphs
- elif args.graph_type == 'enzymes':
- graphs= Graph_load_batch(min_num_nodes=10, name='ENZYMES')
- args.max_prev_node = 25
- elif args.graph_type == 'enzymes_small':
- graphs_raw = Graph_load_batch(min_num_nodes=10, name='ENZYMES')
- graphs = []
- for G in graphs_raw:
- if G.number_of_nodes()<=20:
- graphs.append(G)
- args.max_prev_node = 15
- elif args.graph_type == 'protein':
- graphs = Graph_load_batch(min_num_nodes=20, name='PROTEINS_full')
- args.max_prev_node = 80
- elif args.graph_type == 'DD':
- graphs = Graph_load_batch(min_num_nodes=100, max_num_nodes=500, name='DD',node_attributes=False,graph_labels=True)
- args.max_prev_node = 230
- elif args.graph_type == 'citeseer':
- _, _, G = Graph_load(dataset='citeseer')
- G = max(nx.connected_component_subgraphs(G), key=len)
- G = nx.convert_node_labels_to_integers(G)
- graphs = []
- for i in range(G.number_of_nodes()):
- G_ego = nx.ego_graph(G, i, radius=3)
- if G_ego.number_of_nodes() >= 50 and (G_ego.number_of_nodes() <= 400):
- graphs.append(G_ego)
- args.max_prev_node = 250
- elif args.graph_type == 'citeseer_small':
- _, _, G = Graph_load(dataset='citeseer')
- G = max(nx.connected_component_subgraphs(G), key=len)
- G = nx.convert_node_labels_to_integers(G)
- graphs = []
- for i in range(G.number_of_nodes()):
- G_ego = nx.ego_graph(G, i, radius=1)
- if (G_ego.number_of_nodes() >= 4) and (G_ego.number_of_nodes() <= 20):
- graphs.append(G_ego)
- shuffle(graphs)
- graphs = graphs[0:200]
- args.max_prev_node = 15
-
- return graphs
-
-
|