123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213 |
- import torch
- import torch.nn.functional as F
- from torch.autograd import Variable
- from sklearn.metrics import mean_absolute_error
- import sys
- sys.path.append('../')
- from data import *
- from train import *
- from args import Args
-
-
- def test_mlp(x_batch, y_len_unsorted, epoch, args, rnn, output, test_batch_size=32, save_histogram=False,sample_time=1):
- rnn.hidden = rnn.init_hidden(1)
- rnn.eval()
- output.eval()
-
- # generate graphs
- max_num_node = int(args.max_num_node)
- y_pred = Variable(
- torch.zeros(test_batch_size, max_num_node, args.max_prev_node)).cuda() # normalized prediction score
- y_pred_long = Variable(torch.zeros(test_batch_size, max_num_node, args.max_prev_node)).cuda() # discrete prediction
- x_step = Variable(torch.ones(1, 1, args.max_prev_node)).cuda()
-
- incompleteness_ratio = 0.96
-
- for j in range(test_batch_size):
- incomplete_graph_size = int(int(y_len_unsorted.data[j]) * incompleteness_ratio)
- # print(y_len_unsorted.data[j])
- for i in range(y_len_unsorted.data[j]-1):
- h = rnn(x_step)
- y_pred_step = output(h)
- y_pred[j:j+1, i:i + 1, :] = F.sigmoid(y_pred_step)
- if (i<incomplete_graph_size):
- x_step = (x_batch[j:j+1, i+1:i+2, :]).cuda()
- else:
- x_step = sample_sigmoid(y_pred_step, sample=True, sample_time=sample_time)
- y_pred_long[j:j+1, i:i + 1, :] = x_step
- rnn.hidden = Variable(rnn.hidden.data).cuda()
- y_pred_long_data = y_pred_long.data.long()
-
- G_pred_list = []
- adj_true_list = []
- adj_pred_list = []
- for i in range(test_batch_size):
- adj_pred = decode_adj(y_pred_long_data[i].cpu().numpy())
- adj_pred_list.append(adj_pred)
- adj_true_list.append(decode_adj(x_batch[i].cpu().numpy()))
- G_pred = get_graph(adj_pred) # get a graph from zero-padded adj
- G_pred_list.append(G_pred)
- return G_pred_list, adj_true_list, adj_pred_list
-
-
- def save_graph(graph, name):
- adj_pred = decode_adj(graph.cpu().numpy())
- G_pred = get_graph(adj_pred) # get a graph from zero-padded adj
- G = np.asarray(nx.to_numpy_matrix(G_pred))
- np.savetxt(name + '.txt', G, fmt='%d')
-
-
- def data_to_graph_converter(data):
- G_list = []
- for i in range(len(data)):
- x = data[i].numpy()
- x = x.astype(int)
- adj_pred = decode_adj(x)
- G = get_graph(adj_pred) # get a graph from zero-padded adj
- G_list.append(G)
- return G_list
-
-
- def get_incomplete_graph(x_batch, y_len_unsorted, incompleteness_ratio = 0.96):
- batch_size = len(x_batch)
- max_prev_node = len(x_batch[0][0])
- max_incomplete_num_node = int(int(max(y_len_unsorted)) * incompleteness_ratio)
- incomplete_graph = Variable(torch.zeros(batch_size, max_incomplete_num_node, max_prev_node))
- for i in range(len(y_len_unsorted)):
- incomplete_graph_size = int(int(y_len_unsorted.data[i])*incompleteness_ratio)
- incomplete_graph[i] = torch.cat((x_batch[i,:incomplete_graph_size],
- torch.zeros([max_incomplete_num_node - incomplete_graph_size,
- max_prev_node])), dim=0)
- return incomplete_graph
-
-
- if __name__ == '__main__':
- # All necessary arguments are defined in args.py
- args = Args()
- os.environ['CUDA_VISIBLE_DEVICES'] = str(args.cuda)
- print('CUDA', args.cuda)
- print('File name prefix', args.fname)
- # check if necessary directories exist
- if not os.path.isdir(args.model_save_path):
- os.makedirs(args.model_save_path)
- if not os.path.isdir(args.graph_save_path):
- os.makedirs(args.graph_save_path)
- if not os.path.isdir(args.figure_save_path):
- os.makedirs(args.figure_save_path)
- if not os.path.isdir(args.timing_save_path):
- os.makedirs(args.timing_save_path)
- if not os.path.isdir(args.figure_prediction_save_path):
- os.makedirs(args.figure_prediction_save_path)
- if not os.path.isdir(args.nll_save_path):
- os.makedirs(args.nll_save_path)
-
- graphs = create_graphs.create(args)
-
- # split datasets
- random.seed(123)
- shuffle(graphs)
- graphs_len = len(graphs)
- graphs_test = graphs[int(0.8 * graphs_len):]
- graphs_train = graphs[0:int(0.8 * graphs_len)]
- graphs_validate = graphs[0:int(0.2 * graphs_len)]
-
- graph_validate_len = 0
- for graph in graphs_validate:
- graph_validate_len += graph.number_of_nodes()
- graph_validate_len /= len(graphs_validate)
- print('graph_validate_len', graph_validate_len)
-
- graph_test_len = 0
- for graph in graphs_test:
- graph_test_len += graph.number_of_nodes()
- graph_test_len /= len(graphs_test)
- print('graph_test_len', graph_test_len)
-
- args.max_num_node = max([graphs[i].number_of_nodes() for i in range(len(graphs))])
- max_num_edge = max([graphs[i].number_of_edges() for i in range(len(graphs))])
- min_num_edge = min([graphs[i].number_of_edges() for i in range(len(graphs))])
-
- # args.max_num_node = 2000
- # show graphs statistics
- print('total graph num: {}, training set: {}'.format(len(graphs), len(graphs_train)))
- print('max number node: {}'.format(args.max_num_node))
- print('max/min number edge: {}; {}'.format(max_num_edge, min_num_edge))
- print('max previous node: {}'.format(args.max_prev_node))
-
- # save ground truth graphs
- ## To get train and test set, after loading you need to manually slice
- save_graph_list(graphs, args.graph_save_path + args.fname_train + '0.dat')
- save_graph_list(graphs, args.graph_save_path + args.fname_test + '0.dat')
- print('train and test graphs saved at: ', args.graph_save_path + args.fname_test + '0.dat')
-
- if 'nobfs' in args.note:
- args.max_prev_node = args.max_num_node-1
- dataset = Graph_sequence_sampler_pytorch_nobfs_for_completion(graphs_train,
- max_num_node=args.max_num_node)
- # dataset = Graph_sequence_sampler_pytorch(graphs_train, max_prev_node=args.max_prev_node,
- # max_num_node=args.max_num_node)
- sample_strategy = torch.utils.data.sampler.WeightedRandomSampler([1.0 / len(dataset) for i in range(len(dataset))],
- num_samples=args.batch_size * args.batch_ratio,
- replacement=True)
- dataset_loader = torch.utils.data.DataLoader(dataset, batch_size=args.batch_size, num_workers=args.num_workers,
- sampler=sample_strategy)
-
- rnn = GRU_plain(input_size=args.max_prev_node, embedding_size=args.embedding_size_rnn,
- hidden_size=args.hidden_size_rnn, num_layers=args.num_layers, has_input=True,
- has_output=False).cuda()
- output = MLP_plain(h_size=args.hidden_size_rnn, embedding_size=args.embedding_size_output,
- y_size=args.max_prev_node).cuda()
- name = "GraphRNN_MLP" + '_' + args.graph_type + '_' + str(args.num_layers) + '_' + str(args.hidden_size_rnn) + '_'
- fname = args.model_save_path + name + 'lstm_' + str(args.load_epoch) + '.dat'
- rnn.load_state_dict(torch.load(fname))
- fname = args.model_save_path + name + 'output_' + str(args.load_epoch) + '.dat'
- output.load_state_dict(torch.load(fname))
-
- # ******************************************************************
- rnn_for_graph_completion = GRU_plain(input_size=args.max_prev_node, embedding_size=args.embedding_size_rnn,
- hidden_size=args.hidden_size_rnn, num_layers=args.num_layers, has_input=True,
- has_output=False).cuda()
- output_for_graph_completion = MLP_plain(h_size=args.hidden_size_rnn, embedding_size=args.embedding_size_output,
- y_size=args.max_prev_node).cuda()
-
- graph_completion_string = 'graph_completion_one_node_'
-
- # fname = args.model_save_path + args.fname + 'lstm_' + graph_completion_string + str(args.load_epoch) + '.dat'
- # rnn_for_graph_completion.load_state_dict(torch.load(fname))
- # fname = args.model_save_path + args.fname + 'output_' + graph_completion_string + str(args.load_epoch) + '.dat'
- # output_for_graph_completion.load_state_dict(torch.load(fname))
- # ******************************************************************
-
- args.lr = 0.00001
- epoch = args.load_epoch
- print('model loaded!, lr: {}'.format(args.lr))
-
- for batch_idx, data in enumerate(dataset_loader):
- if batch_idx==0:
- rnn.zero_grad()
- output.zero_grad()
- x_unsorted = data['x'].float()
- G = data_to_graph_converter(x_unsorted[:,1:,:])
- nx.write_gpickle(G, "main_graphs.dat")
- y_unsorted = data['y'].float()
- y_len_unsorted = data['len']
- # *********************************
- G = get_incomplete_graph(x_unsorted, y_len_unsorted)
- G = data_to_graph_converter(G[:, 1:, :])
- nx.write_gpickle(G, "incomplete_graphs.dat")
- # *********************************
- G_pred_step, adj_true_list, adj_pred_list = test_mlp(x_unsorted, y_len_unsorted, epoch, args, rnn, output)
- nx.write_gpickle(G_pred_step, "completed_graphs.dat")
- mae = np.sum(np.absolute((adj_pred_list[0].astype("float") - adj_true_list[0].astype("float"))))
- print("adj_true: ")
- print(adj_true_list[0])
-
- print("my err")
- print(mae)
- print(mean_absolute_error(adj_pred_list[0], adj_true_list[0]))
- print("adj_pred_list:")
- print(adj_pred_list[0])
- # *********************************
- # G_pred_step = test_mlp(x_unsorted, y_len_unsorted, epoch, args, rnn_for_graph_completion,
- # output_for_graph_completion)
- # nx.write_gpickle(G_pred_step, "completed_graphs_with_training.dat")
|