| # an implementation for "Learning Deep Generative Models of Graphs" | # an implementation for "Learning Deep Generative Models of Graphs" | ||||
| from baselines.graphvae.util import load_data | |||||
| import os | |||||
| import random | |||||
| from statistics import mean | |||||
| import networkx as nx | |||||
| import numpy as np | |||||
| from sklearn.metrics import roc_auc_score, average_precision_score | |||||
| from main import * | from main import * | ||||
| class Args_DGMG(): | class Args_DGMG(): | ||||
| def __init__(self): | def __init__(self): | ||||
| ### CUDA | ### CUDA | ||||
| self.cuda = 1 | |||||
| self.cuda = 0 | |||||
| ### model type | ### model type | ||||
| self.note = 'Baseline_DGMG' # do GCN after adding each edge | |||||
| self.note = 'Baseline_DGMG' # do GCN after adding each edge | |||||
| # self.note = 'Baseline_DGMG_fast' # do GCN only after adding each node | # self.note = 'Baseline_DGMG_fast' # do GCN only after adding each node | ||||
| ### data config | ### data config | ||||
| # self.graph_type = 'caveman_small' | # self.graph_type = 'caveman_small' | ||||
| # self.graph_type = 'grid_small' | |||||
| self.graph_type = 'IMDBBINARY' | |||||
| self.graph_type = 'grid_small' | |||||
| # self.graph_type = 'ladder_small' | # self.graph_type = 'ladder_small' | ||||
| # self.graph_type = 'enzymes_small' | # self.graph_type = 'enzymes_small' | ||||
| # self.graph_type = 'barabasi_small' | # self.graph_type = 'barabasi_small' | ||||
| self.node_embedding_size = 64 | self.node_embedding_size = 64 | ||||
| self.test_graph_num = 200 | self.test_graph_num = 200 | ||||
| ### training config | ### training config | ||||
| self.epochs = 2000 # now one epoch means self.batch_ratio x batch_size | |||||
| self.load_epoch = 2000 | |||||
| self.epochs_test_start = 100 | |||||
| self.epochs_test = 100 | |||||
| self.epochs_log = 2 | |||||
| self.epochs_save = 100 | |||||
| self.epochs = 100 # now one epoch means self.batch_ratio x batch_size | |||||
| self.load_epoch = 100 | |||||
| self.epochs_test_start = 10 | |||||
| self.epochs_test = 10 | |||||
| self.epochs_log = 10 | |||||
| self.epochs_save = 10 | |||||
| if 'fast' in self.note: | if 'fast' in self.note: | ||||
| self.is_fast = True | self.is_fast = True | ||||
| else: | else: | ||||
| self.figure_prediction_save_path = 'figures_prediction/' | self.figure_prediction_save_path = 'figures_prediction/' | ||||
| self.nll_save_path = 'nll/' | self.nll_save_path = 'nll/' | ||||
| self.fname = self.note + '_' + self.graph_type + '_' + str(self.node_embedding_size) | self.fname = self.note + '_' + self.graph_type + '_' + str(self.node_embedding_size) | ||||
| self.fname_pred = self.note + '_' + self.graph_type + '_' + str(self.node_embedding_size) + '_pred_' | self.fname_pred = self.note + '_' + self.graph_type + '_' + str(self.node_embedding_size) + '_pred_' | ||||
| self.fname_train = self.note + '_' + self.graph_type + '_' + str(self.node_embedding_size) + '_train_' | self.fname_train = self.note + '_' + self.graph_type + '_' + str(self.node_embedding_size) + '_train_' | ||||
| self.save = True | self.save = True | ||||
| def train_DGMG_epoch(epoch, args, model, dataset, optimizer, scheduler, is_fast = False): | |||||
| def train_DGMG_epoch(epoch, args, model, dataset, optimizer, scheduler, is_fast=False): | |||||
| model.train() | model.train() | ||||
| graph_num = len(dataset) | graph_num = len(dataset) | ||||
| order = list(range(graph_num)) | order = list(range(graph_num)) | ||||
| shuffle(order) | shuffle(order) | ||||
| loss_addnode = 0 | loss_addnode = 0 | ||||
| loss_addedge = 0 | loss_addedge = 0 | ||||
| loss_node = 0 | loss_node = 0 | ||||
| order_mapping = dict(zip(graph.nodes(), node_order)) | order_mapping = dict(zip(graph.nodes(), node_order)) | ||||
| graph = nx.relabel_nodes(graph, order_mapping, copy=True) | graph = nx.relabel_nodes(graph, order_mapping, copy=True) | ||||
| # NOTE: when starting loop, we assume a node has already been generated | # NOTE: when starting loop, we assume a node has already been generated | ||||
| node_count = 1 | node_count = 1 | ||||
| node_embedding = [Variable(torch.ones(1,args.node_embedding_size)).cuda()] # list of torch tensors, each size: 1*hidden | |||||
| node_embedding = [ | |||||
| Variable(torch.ones(1, args.node_embedding_size)).cuda()] # list of torch tensors, each size: 1*hidden | |||||
| loss = 0 | loss = 0 | ||||
| while node_count<=graph.number_of_nodes(): | |||||
| node_neighbor = graph.subgraph(list(range(node_count))).adjacency_list() # list of lists (first node is zero) | |||||
| node_neighbor_new = graph.subgraph(list(range(node_count+1))).adjacency_list()[-1] # list of new node's neighbors | |||||
| while node_count <= graph.number_of_nodes(): | |||||
| node_neighbor = graph.subgraph( | |||||
| list(range(node_count))).adjacency_list() # list of lists (first node is zero) | |||||
| node_neighbor_new = graph.subgraph(list(range(node_count + 1))).adjacency_list()[ | |||||
| -1] # list of new node's neighbors | |||||
| # 1 message passing | # 1 message passing | ||||
| # do 2 times message passing | # do 2 times message passing | ||||
| if is_fast: | if is_fast: | ||||
| node_embedding_cat = torch.cat(node_embedding, dim=0) | node_embedding_cat = torch.cat(node_embedding, dim=0) | ||||
| # calc loss | # calc loss | ||||
| loss_addnode_step = F.binary_cross_entropy(p_addnode,Variable(torch.ones((1,1))).cuda()) | |||||
| loss_addnode_step = F.binary_cross_entropy(p_addnode, Variable(torch.ones((1, 1))).cuda()) | |||||
| # loss_addnode_step.backward(retain_graph=True) | # loss_addnode_step.backward(retain_graph=True) | ||||
| loss += loss_addnode_step | loss += loss_addnode_step | ||||
| loss_addnode += loss_addnode_step.data | loss_addnode += loss_addnode_step.data | ||||
| loss_addnode += loss_addnode_step.data | loss_addnode += loss_addnode_step.data | ||||
| break | break | ||||
| edge_count = 0 | edge_count = 0 | ||||
| while edge_count<=len(node_neighbor_new): | |||||
| while edge_count <= len(node_neighbor_new): | |||||
| if not is_fast: | if not is_fast: | ||||
| node_embedding = message_passing(node_neighbor, node_embedding, model) | node_embedding = message_passing(node_neighbor, node_embedding, model) | ||||
| node_embedding_cat = torch.cat(node_embedding, dim=0) | node_embedding_cat = torch.cat(node_embedding, dim=0) | ||||
| # 5 f_nodes | # 5 f_nodes | ||||
| # excluding the last node (which is the new node) | # excluding the last node (which is the new node) | ||||
| node_new_embedding_cat = node_embedding_cat[-1,:].expand(node_embedding_cat.size(0)-1,node_embedding_cat.size(1)) | |||||
| s_node = model.f_s(torch.cat((node_embedding_cat[0:-1,:],node_new_embedding_cat),dim=1)) | |||||
| p_node = F.softmax(s_node.permute(1,0)) | |||||
| node_new_embedding_cat = node_embedding_cat[-1, :].expand(node_embedding_cat.size(0) - 1, | |||||
| node_embedding_cat.size(1)) | |||||
| s_node = model.f_s(torch.cat((node_embedding_cat[0:-1, :], node_new_embedding_cat), dim=1)) | |||||
| p_node = F.softmax(s_node.permute(1, 0)) | |||||
| # get ground truth | # get ground truth | ||||
| a_node = torch.zeros((1,p_node.size(1))) | |||||
| a_node = torch.zeros((1, p_node.size(1))) | |||||
| # print('node_neighbor_new',node_neighbor_new, edge_count) | # print('node_neighbor_new',node_neighbor_new, edge_count) | ||||
| a_node[0,node_neighbor_new[edge_count]] = 1 | |||||
| a_node[0, node_neighbor_new[edge_count]] = 1 | |||||
| a_node = Variable(a_node).cuda() | a_node = Variable(a_node).cuda() | ||||
| # add edge | # add edge | ||||
| node_neighbor[-1].append(node_neighbor_new[edge_count]) | node_neighbor[-1].append(node_neighbor_new[edge_count]) | ||||
| node_neighbor[node_neighbor_new[edge_count]].append(len(node_neighbor)-1) | |||||
| node_neighbor[node_neighbor_new[edge_count]].append(len(node_neighbor) - 1) | |||||
| # calc loss | # calc loss | ||||
| loss_node_step = F.binary_cross_entropy(p_node,a_node) | |||||
| loss_node_step = F.binary_cross_entropy(p_node, a_node) | |||||
| # loss_node_step.backward(retain_graph=True) | # loss_node_step.backward(retain_graph=True) | ||||
| loss += loss_node_step | loss += loss_node_step | ||||
| loss_node += loss_node_step.data | loss_node += loss_node_step.data | ||||
| loss_all = loss_addnode + loss_addedge + loss_node | loss_all = loss_addnode + loss_addedge + loss_node | ||||
| if epoch % args.epochs_log==0: | |||||
| if epoch % args.epochs_log == 0: | |||||
| print('Epoch: {}/{}, train loss: {:.6f}, graph type: {}, hidden: {}'.format( | print('Epoch: {}/{}, train loss: {:.6f}, graph type: {}, hidden: {}'.format( | ||||
| epoch, args.epochs,loss_all.item(), args.graph_type, args.node_embedding_size)) | |||||
| epoch, args.epochs, loss_all, args.graph_type, args.node_embedding_size)) | |||||
| # loss_sum += loss.data[0]*x.size(0) | # loss_sum += loss.data[0]*x.size(0) | ||||
| # return loss_sum | # return loss_sum | ||||
| def train_DGMG_forward_epoch(args, model, dataset, is_fast = False): | |||||
| model.train() | |||||
| graph_num = len(dataset) | |||||
| order = list(range(graph_num)) | |||||
| shuffle(order) | |||||
| loss_addnode = 0 | |||||
| loss_addedge = 0 | |||||
| loss_node = 0 | |||||
| for i in order: | |||||
| model.zero_grad() | |||||
| graph = dataset[i] | |||||
| # do random ordering: relabel nodes | |||||
| node_order = list(range(graph.number_of_nodes())) | |||||
| shuffle(node_order) | |||||
| order_mapping = dict(zip(graph.nodes(), node_order)) | |||||
| graph = nx.relabel_nodes(graph, order_mapping, copy=True) | |||||
| # NOTE: when starting loop, we assume a node has already been generated | |||||
| node_count = 1 | |||||
| node_embedding = [Variable(torch.ones(1,args.node_embedding_size)).cuda()] # list of torch tensors, each size: 1*hidden | |||||
| loss = 0 | |||||
| while node_count<=graph.number_of_nodes(): | |||||
| node_neighbor = graph.subgraph(list(range(node_count))).adjacency_list() # list of lists (first node is zero) | |||||
| node_neighbor_new = graph.subgraph(list(range(node_count+1))).adjacency_list()[-1] # list of new node's neighbors | |||||
| # 1 message passing | |||||
| # do 2 times message passing | |||||
| node_embedding = message_passing(node_neighbor, node_embedding, model) | |||||
| # 2 graph embedding and new node embedding | |||||
| node_embedding_cat = torch.cat(node_embedding, dim=0) | |||||
| graph_embedding = calc_graph_embedding(node_embedding_cat, model) | |||||
| init_embedding = calc_init_embedding(node_embedding_cat, model) | |||||
| # 3 f_addnode | |||||
| p_addnode = model.f_an(graph_embedding) | |||||
| if node_count < graph.number_of_nodes(): | |||||
| # add node | |||||
| node_neighbor.append([]) | |||||
| node_embedding.append(init_embedding) | |||||
| if is_fast: | |||||
| node_embedding_cat = torch.cat(node_embedding, dim=0) | |||||
| # calc loss | |||||
| loss_addnode_step = F.binary_cross_entropy(p_addnode,Variable(torch.ones((1,1))).cuda()) | |||||
| # loss_addnode_step.backward(retain_graph=True) | |||||
| loss += loss_addnode_step | |||||
| loss_addnode += loss_addnode_step.data | |||||
| else: | |||||
| # calc loss | |||||
| loss_addnode_step = F.binary_cross_entropy(p_addnode, Variable(torch.zeros((1, 1))).cuda()) | |||||
| # loss_addnode_step.backward(retain_graph=True) | |||||
| loss += loss_addnode_step | |||||
| loss_addnode += loss_addnode_step.data | |||||
| break | |||||
| edge_count = 0 | |||||
| while edge_count<=len(node_neighbor_new): | |||||
| if not is_fast: | |||||
| node_embedding = message_passing(node_neighbor, node_embedding, model) | |||||
| node_embedding_cat = torch.cat(node_embedding, dim=0) | |||||
| graph_embedding = calc_graph_embedding(node_embedding_cat, model) | |||||
| # 4 f_addedge | |||||
| p_addedge = model.f_ae(graph_embedding) | |||||
| if edge_count < len(node_neighbor_new): | |||||
| # calc loss | |||||
| loss_addedge_step = F.binary_cross_entropy(p_addedge, Variable(torch.ones((1, 1))).cuda()) | |||||
| # loss_addedge_step.backward(retain_graph=True) | |||||
| loss += loss_addedge_step | |||||
| loss_addedge += loss_addedge_step.data | |||||
| # 5 f_nodes | |||||
| # excluding the last node (which is the new node) | |||||
| node_new_embedding_cat = node_embedding_cat[-1,:].expand(node_embedding_cat.size(0)-1,node_embedding_cat.size(1)) | |||||
| s_node = model.f_s(torch.cat((node_embedding_cat[0:-1,:],node_new_embedding_cat),dim=1)) | |||||
| p_node = F.softmax(s_node.permute(1,0)) | |||||
| # get ground truth | |||||
| a_node = torch.zeros((1,p_node.size(1))) | |||||
| # print('node_neighbor_new',node_neighbor_new, edge_count) | |||||
| a_node[0,node_neighbor_new[edge_count]] = 1 | |||||
| a_node = Variable(a_node).cuda() | |||||
| # add edge | |||||
| node_neighbor[-1].append(node_neighbor_new[edge_count]) | |||||
| node_neighbor[node_neighbor_new[edge_count]].append(len(node_neighbor)-1) | |||||
| # calc loss | |||||
| loss_node_step = F.binary_cross_entropy(p_node,a_node) | |||||
| # loss_node_step.backward(retain_graph=True) | |||||
| loss += loss_node_step | |||||
| loss_node += loss_node_step.data*p_node.size(1) | |||||
| else: | |||||
| # calc loss | |||||
| loss_addedge_step = F.binary_cross_entropy(p_addedge, Variable(torch.zeros((1, 1))).cuda()) | |||||
| # loss_addedge_step.backward(retain_graph=True) | |||||
| loss += loss_addedge_step | |||||
| loss_addedge += loss_addedge_step.data | |||||
| break | |||||
| edge_count += 1 | |||||
| node_count += 1 | |||||
| loss_all = loss_addnode + loss_addedge + loss_node | |||||
| # if epoch % args.epochs_log==0: | |||||
| # print('Epoch: {}/{}, train loss: {:.6f}, graph type: {}, hidden: {}'.format( | |||||
| # epoch, args.epochs,loss_all[0], args.graph_type, args.node_embedding_size)) | |||||
| return loss_all[0]/len(dataset) | |||||
| def test_DGMG_epoch(args, model, is_fast=False): | def test_DGMG_epoch(args, model, is_fast=False): | ||||
| model.eval() | model.eval() | ||||
| graph_num = args.test_graph_num | graph_num = args.test_graph_num | ||||
| for i in range(graph_num): | for i in range(graph_num): | ||||
| # NOTE: when starting loop, we assume a node has already been generated | # NOTE: when starting loop, we assume a node has already been generated | ||||
| node_neighbor = [[]] # list of lists (first node is zero) | node_neighbor = [[]] # list of lists (first node is zero) | ||||
| node_embedding = [Variable(torch.ones(1,args.node_embedding_size)).cuda()] # list of torch tensors, each size: 1*hidden | |||||
| node_embedding = [ | |||||
| Variable(torch.ones(1, args.node_embedding_size)).cuda()] # list of torch tensors, each size: 1*hidden | |||||
| node_count = 1 | node_count = 1 | ||||
| while node_count<=args.max_num_node: | |||||
| while node_count <= args.max_num_node: | |||||
| # 1 message passing | # 1 message passing | ||||
| # do 2 times message passing | # do 2 times message passing | ||||
| node_embedding = message_passing(node_neighbor, node_embedding, model) | node_embedding = message_passing(node_neighbor, node_embedding, model) | ||||
| p_addnode = model.f_an(graph_embedding) | p_addnode = model.f_an(graph_embedding) | ||||
| a_addnode = sample_tensor(p_addnode) | a_addnode = sample_tensor(p_addnode) | ||||
| # print(a_addnode.data[0][0]) | # print(a_addnode.data[0][0]) | ||||
| if a_addnode.data[0][0]==1: | |||||
| if a_addnode.data[0][0] == 1: | |||||
| # print('add node') | # print('add node') | ||||
| # add node | # add node | ||||
| node_neighbor.append([]) | node_neighbor.append([]) | ||||
| break | break | ||||
| edge_count = 0 | edge_count = 0 | ||||
| while edge_count<args.max_num_node: | |||||
| while edge_count < args.max_num_node: | |||||
| if not is_fast: | if not is_fast: | ||||
| node_embedding = message_passing(node_neighbor, node_embedding, model) | node_embedding = message_passing(node_neighbor, node_embedding, model) | ||||
| node_embedding_cat = torch.cat(node_embedding, dim=0) | node_embedding_cat = torch.cat(node_embedding, dim=0) | ||||
| a_addedge = sample_tensor(p_addedge) | a_addedge = sample_tensor(p_addedge) | ||||
| # print(a_addedge.data[0][0]) | # print(a_addedge.data[0][0]) | ||||
| if a_addedge.data[0][0]==1: | |||||
| if a_addedge.data[0][0] == 1: | |||||
| # print('add edge') | # print('add edge') | ||||
| # 5 f_nodes | # 5 f_nodes | ||||
| # excluding the last node (which is the new node) | # excluding the last node (which is the new node) | ||||
| node_new_embedding_cat = node_embedding_cat[-1,:].expand(node_embedding_cat.size(0)-1,node_embedding_cat.size(1)) | |||||
| s_node = model.f_s(torch.cat((node_embedding_cat[0:-1,:],node_new_embedding_cat),dim=1)) | |||||
| p_node = F.softmax(s_node.permute(1,0)) | |||||
| node_new_embedding_cat = node_embedding_cat[-1, :].expand(node_embedding_cat.size(0) - 1, | |||||
| node_embedding_cat.size(1)) | |||||
| s_node = model.f_s(torch.cat((node_embedding_cat[0:-1, :], node_new_embedding_cat), dim=1)) | |||||
| p_node = F.softmax(s_node.permute(1, 0)) | |||||
| a_node = gumbel_softmax(p_node, temperature=0.01) | a_node = gumbel_softmax(p_node, temperature=0.01) | ||||
| _, a_node_id = a_node.topk(1) | _, a_node_id = a_node.topk(1) | ||||
| a_node_id = int(a_node_id.data[0][0]) | a_node_id = int(a_node_id.data[0][0]) | ||||
| # add edge | # add edge | ||||
| node_neighbor[-1].append(a_node_id) | node_neighbor[-1].append(a_node_id) | ||||
| node_neighbor[a_node_id].append(len(node_neighbor)-1) | |||||
| node_neighbor[a_node_id].append(len(node_neighbor) - 1) | |||||
| else: | else: | ||||
| break | break | ||||
| return graphs_generated | return graphs_generated | ||||
| def test_DGMG_2(args, model, test_graph, is_fast=False): | |||||
| model.eval() | |||||
| graph_num = args.test_graph_num | |||||
| graphs_generated = [] | |||||
| # for i in range(graph_num): | |||||
| # NOTE: when starting loop, we assume a node has already been generated | |||||
| node_neighbor = [[]] # list of lists (first node is zero) | |||||
| node_embedding = [ | |||||
| Variable(torch.ones(1, args.node_embedding_size)).cuda()] # list of torch tensors, each size: 1*hidden | |||||
| node_max = len(test_graph.nodes()) | |||||
| node_count = 1 | |||||
| while node_count <= node_max: | |||||
| # 1 message passing | |||||
| # do 2 times message passing | |||||
| node_embedding = message_passing(node_neighbor, node_embedding, model) | |||||
| # 2 graph embedding and new node embedding | |||||
| node_embedding_cat = torch.cat(node_embedding, dim=0) | |||||
| graph_embedding = calc_graph_embedding(node_embedding_cat, model) | |||||
| init_embedding = calc_init_embedding(node_embedding_cat, model) | |||||
| # 3 f_addnode | |||||
| p_addnode = model.f_an(graph_embedding) | |||||
| a_addnode = sample_tensor(p_addnode) | |||||
| if a_addnode.data[0][0] == 1: | |||||
| # add node | |||||
| node_neighbor.append([]) | |||||
| node_embedding.append(init_embedding) | |||||
| if is_fast: | |||||
| node_embedding_cat = torch.cat(node_embedding, dim=0) | |||||
| else: | |||||
| break | |||||
| edge_count = 0 | |||||
| while edge_count < args.max_num_node: | |||||
| if not is_fast: | |||||
| node_embedding = message_passing(node_neighbor, node_embedding, model) | |||||
| node_embedding_cat = torch.cat(node_embedding, dim=0) | |||||
| graph_embedding = calc_graph_embedding(node_embedding_cat, model) | |||||
| # 4 f_addedge | |||||
| p_addedge = model.f_ae(graph_embedding) | |||||
| a_addedge = sample_tensor(p_addedge) | |||||
| if a_addedge.data[0][0] == 1: | |||||
| # 5 f_nodes | |||||
| # excluding the last node (which is the new node) | |||||
| node_new_embedding_cat = node_embedding_cat[-1, :].expand(node_embedding_cat.size(0) - 1, | |||||
| node_embedding_cat.size(1)) | |||||
| s_node = model.f_s(torch.cat((node_embedding_cat[0:-1, :], node_new_embedding_cat), dim=1)) | |||||
| p_node = F.softmax(s_node.permute(1, 0)) | |||||
| a_node = gumbel_softmax(p_node, temperature=0.01) | |||||
| _, a_node_id = a_node.topk(1) | |||||
| a_node_id = int(a_node_id.data[0][0]) | |||||
| # add edge | |||||
| node_neighbor[-1].append(a_node_id) | |||||
| node_neighbor[a_node_id].append(len(node_neighbor) - 1) | |||||
| else: | |||||
| break | |||||
| edge_count += 1 | |||||
| node_count += 1 | |||||
| # clear node_neighbor and build it again | |||||
| node_neighbor = [] | |||||
| for n in range(node_max): | |||||
| temp_neighbor = [k for k in test_graph.edge[n]] | |||||
| node_neighbor.append(temp_neighbor) | |||||
| # now add the last node for real | |||||
| # 1 message passing | |||||
| # do 2 times message passing | |||||
| try: | |||||
| node_embedding = message_passing(node_neighbor, node_embedding, model) | |||||
| # 2 graph embedding and new node embedding | |||||
| node_embedding_cat = torch.cat(node_embedding, dim=0) | |||||
| graph_embedding = calc_graph_embedding(node_embedding_cat, model) | |||||
| init_embedding = calc_init_embedding(node_embedding_cat, model) | |||||
| # 3 f_addnode | |||||
| p_addnode = model.f_an(graph_embedding) | |||||
| a_addnode = sample_tensor(p_addnode) | |||||
| if a_addnode.data[0][0] == 1: | |||||
| # add node | |||||
| node_neighbor.append([]) | |||||
| node_embedding.append(init_embedding) | |||||
| if is_fast: | |||||
| node_embedding_cat = torch.cat(node_embedding, dim=0) | |||||
| edge_count = 0 | |||||
| while edge_count < args.max_num_node: | |||||
| if not is_fast: | |||||
| node_embedding = message_passing(node_neighbor, node_embedding, model) | |||||
| node_embedding_cat = torch.cat(node_embedding, dim=0) | |||||
| graph_embedding = calc_graph_embedding(node_embedding_cat, model) | |||||
| # 4 f_addedge | |||||
| p_addedge = model.f_ae(graph_embedding) | |||||
| a_addedge = sample_tensor(p_addedge) | |||||
| if a_addedge.data[0][0] == 1: | |||||
| # 5 f_nodes | |||||
| # excluding the last node (which is the new node) | |||||
| node_new_embedding_cat = node_embedding_cat[-1, :].expand(node_embedding_cat.size(0) - 1, | |||||
| node_embedding_cat.size(1)) | |||||
| s_node = model.f_s(torch.cat((node_embedding_cat[0:-1, :], node_new_embedding_cat), dim=1)) | |||||
| p_node = F.softmax(s_node.permute(1, 0)) | |||||
| a_node = gumbel_softmax(p_node, temperature=0.01) | |||||
| _, a_node_id = a_node.topk(1) | |||||
| a_node_id = int(a_node_id.data[0][0]) | |||||
| # add edge | |||||
| node_neighbor[-1].append(a_node_id) | |||||
| node_neighbor[a_node_id].append(len(node_neighbor) - 1) | |||||
| else: | |||||
| break | |||||
| edge_count += 1 | |||||
| node_count += 1 | |||||
| except: | |||||
| print('error') | |||||
| # save graph | |||||
| node_neighbor_dict = dict(zip(list(range(len(node_neighbor))), node_neighbor)) | |||||
| graph = nx.from_dict_of_lists(node_neighbor_dict) | |||||
| graphs_generated.append(graph) | |||||
| return graphs_generated | |||||
| ########### train function for LSTM + VAE | ########### train function for LSTM + VAE | ||||
| train_DGMG_epoch(epoch, args, model, dataset_train, optimizer, scheduler, is_fast=args.is_fast) | train_DGMG_epoch(epoch, args, model, dataset_train, optimizer, scheduler, is_fast=args.is_fast) | ||||
| time_end = tm.time() | time_end = tm.time() | ||||
| time_all[epoch - 1] = time_end - time_start | time_all[epoch - 1] = time_end - time_start | ||||
| # print('time used',time_all[epoch - 1]) | |||||
| print('time used', time_all[epoch - 1]) | |||||
| # test | # test | ||||
| if epoch % args.epochs_test == 0 and epoch >= args.epochs_test_start: | if epoch % args.epochs_test == 0 and epoch >= args.epochs_test_start: | ||||
| graphs = test_DGMG_epoch(args,model, is_fast=args.is_fast) | |||||
| graphs = test_DGMG_epoch(args, model, is_fast=args.is_fast) | |||||
| fname = args.graph_save_path + args.fname_pred + str(epoch) + '.dat' | fname = args.graph_save_path + args.fname_pred + str(epoch) + '.dat' | ||||
| save_graph_list(graphs, fname) | save_graph_list(graphs, fname) | ||||
| # print('test done, graphs saved') | # print('test done, graphs saved') | ||||
| np.save(args.timing_save_path + args.fname, time_all) | np.save(args.timing_save_path + args.fname, time_all) | ||||
| def neigh_to_mat(neigh, size): | |||||
| ret_list = np.zeros(size) | |||||
| for i in neigh: | |||||
| ret_list[i] = 1 | |||||
| return ret_list | |||||
| def calc_lable_result(test_graphs, returned_graphs): | |||||
| labels = [] | |||||
| results = [] | |||||
| i = 0 | |||||
| for test_graph in test_graphs: | |||||
| n = len(test_graph.nodes()) | |||||
| returned_graph = returned_graphs[i] | |||||
| label = neigh_to_mat([k for k in test_graph.edge[n - 1]], n) | |||||
| try: | |||||
| result = neigh_to_mat([k for k in returned_graph.edge[n - 1]], n) | |||||
| except: | |||||
| result = np.zeros(n) | |||||
| labels.append(label) | |||||
| results.append(result) | |||||
| i += 1 | |||||
| return labels, results | |||||
| def evaluate(labels, results): | |||||
| mae_list = [] | |||||
| roc_score_list = [] | |||||
| ap_score_list = [] | |||||
| precision_list = [] | |||||
| recall_list = [] | |||||
| iter = 0 | |||||
| for result in results: | |||||
| label = labels[iter] | |||||
| iter += 1 | |||||
| part1 = label[result == 1] | |||||
| part2 = part1[part1 == 1] | |||||
| part3 = part1[part1 == 0] | |||||
| part4 = label[result == 0] | |||||
| part5 = part4[part4 == 1] | |||||
| tp = len(part2) | |||||
| fp = len(part3) | |||||
| fn = part5.sum() | |||||
| if tp + fp > 0: | |||||
| precision = tp / (tp + fp) | |||||
| else: | |||||
| precision = 0 | |||||
| recall = tp / (tp + fn) | |||||
| precision_list.append(precision) | |||||
| recall_list.append(recall) | |||||
| positive = result[label == 1] | |||||
| if len(positive) <= len(list(result[label == 0])): | |||||
| negative = random.sample(list(result[label == 0]), len(positive)) | |||||
| else: | |||||
| negative = result[label == 0] | |||||
| positive = random.sample(list(result[label == 1]), len(negative)) | |||||
| preds_all = np.hstack([positive, negative]) | |||||
| labels_all = np.hstack([np.ones(len(positive)), np.zeros(len(positive))]) | |||||
| if len(labels_all) > 0: | |||||
| roc_score = roc_auc_score(labels_all, preds_all) | |||||
| ap_score = average_precision_score(labels_all, preds_all) | |||||
| roc_score_list.append(roc_score) | |||||
| ap_score_list.append(ap_score) | |||||
| mae = 0 | |||||
| for x in range(len(result)): | |||||
| if result[x] != label[x]: | |||||
| mae += 1 | |||||
| mae = mae / len(label) | |||||
| mae_list.append(mae) | |||||
| mean_roc = mean(roc_score_list) | |||||
| mean_ap = mean(ap_score_list) | |||||
| mean_precision = mean(precision_list) | |||||
| mean_recall = mean(recall_list) | |||||
| mean_mae = mean(mae_list) | |||||
| print('roc_score ' + str(mean_roc)) | |||||
| print('ap_score ' + str(mean_ap)) | |||||
| print('precision ' + str(mean_precision)) | |||||
| print('recall ' + str(mean_recall)) | |||||
| print('mae ' + str(mean_mae)) | |||||
| return mean_roc, mean_ap, mean_precision, mean_recall | |||||
| def load_data(dataset, degree_as_tag): | |||||
| ''' | |||||
| dataset: name of dataset | |||||
| test_proportion: ratio of test train split | |||||
| seed: random seed for random splitting of dataset | |||||
| ''' | |||||
| print('loading data') | |||||
| g_list = [] | |||||
| label_dict = {} | |||||
| feat_dict = {} | |||||
| with open('dataset/%s/%s.txt' % (dataset, dataset), 'r') as f: | |||||
| n_g = int(f.readline().strip()) | |||||
| for i in range(n_g): | |||||
| row = f.readline().strip().split() | |||||
| n, l = [int(w) for w in row] | |||||
| if not l in label_dict: | |||||
| mapped = len(label_dict) | |||||
| label_dict[l] = mapped | |||||
| g = nx.Graph() | |||||
| node_tags = [] | |||||
| node_features = [] | |||||
| n_edges = 0 | |||||
| for j in range(n): | |||||
| g.add_node(j) | |||||
| row = f.readline().strip().split() | |||||
| tmp = int(row[1]) + 2 | |||||
| if tmp == len(row): | |||||
| # no node attributes | |||||
| row = [int(w) for w in row] | |||||
| attr = None | |||||
| else: | |||||
| row, attr = [int(w) for w in row[:tmp]], np.array([float(w) for w in row[tmp:]]) | |||||
| if not row[0] in feat_dict: | |||||
| mapped = len(feat_dict) | |||||
| feat_dict[row[0]] = mapped | |||||
| node_tags.append(feat_dict[row[0]]) | |||||
| if tmp > len(row): | |||||
| node_features.append(attr) | |||||
| n_edges += row[1] | |||||
| for k in range(2, len(row)): | |||||
| g.add_edge(j, row[k]) | |||||
| if node_features != []: | |||||
| node_features = np.stack(node_features) | |||||
| node_feature_flag = True | |||||
| else: | |||||
| node_features = None | |||||
| node_feature_flag = False | |||||
| assert len(g) == n | |||||
| g_list.append(g) | |||||
| ########### train function for LSTM + VAE | |||||
| def train_DGMG_nll(args, dataset_train,dataset_test, model,max_iter=1000): | |||||
| # check if load existing model | |||||
| fname = args.model_save_path + args.fname + 'model_' + str(args.load_epoch) + '.dat' | |||||
| model.load_state_dict(torch.load(fname)) | |||||
| fname_output = args.nll_save_path + args.note + '_' + args.graph_type + '.csv' | |||||
| with open(fname_output, 'w+') as f: | |||||
| f.write('train,test\n') | |||||
| # start main loop | |||||
| for iter in range(max_iter): | |||||
| nll_train = train_DGMG_forward_epoch(args, model, dataset_train, is_fast=args.is_fast) | |||||
| nll_test = train_DGMG_forward_epoch(args, model, dataset_test, is_fast=args.is_fast) | |||||
| print('train', nll_train, 'test', nll_test) | |||||
| f.write(str(nll_train) + ',' + str(nll_test) + '\n') | |||||
| return g_list, len(label_dict) | |||||
| if __name__ == '__main__': | if __name__ == '__main__': | ||||
| my_graph = nx.Graph() | |||||
| edges = nx.read_edgelist("data/main_graphs_COLLAB/my.txt") | |||||
| my_graph.add_edges_from(edges.edges()) | |||||
| integers = nx.convert_node_labels_to_integers(my_graph) | |||||
| d_graph = nx.grid_2d_graph(2, 3) | |||||
| integers2 = nx.convert_node_labels_to_integers(d_graph) | |||||
| args = Args_DGMG() | args = Args_DGMG() | ||||
| os.environ['CUDA_VISIBLE_DEVICES'] = str(args.cuda) | os.environ['CUDA_VISIBLE_DEVICES'] = str(args.cuda) | ||||
| print('CUDA', args.cuda) | print('CUDA', args.cuda) | ||||
| print('File name prefix',args.fname) | |||||
| print('File name prefix', args.fname) | |||||
| graphs = [] | graphs = [] | ||||
| for i in range(4, 10): | for i in range(4, 10): | ||||
| graphs.append(nx.ladder_graph(i)) | graphs.append(nx.ladder_graph(i)) | ||||
| model = DGM_graphs(h_size = args.node_embedding_size).cuda() | |||||
| model = DGM_graphs(h_size=args.node_embedding_size).cuda() | |||||
| if args.graph_type == 'ladder_small': | |||||
| graphs = [] | |||||
| for i in range(2, 11): | |||||
| graphs.append(nx.ladder_graph(i)) | |||||
| args.max_prev_node = 10 | |||||
| # if args.graph_type == 'caveman_small': | |||||
| # graphs = [] | |||||
| # for i in range(2, 5): | |||||
| # for j in range(2, 6): | |||||
| # for k in range(10): | |||||
| # graphs.append(nx.relaxed_caveman_graph(i, j, p=0.1)) | |||||
| # args.max_prev_node = 20 | |||||
| if args.graph_type=='caveman_small': | |||||
| graphs = [] | |||||
| for i in range(2, 3): | |||||
| for j in range(6, 11): | |||||
| for k in range(20): | |||||
| graphs.append(caveman_special(i, j, p_edge=0.8)) | |||||
| args.max_prev_node = 20 | |||||
| if args.graph_type == 'grid_small': | if args.graph_type == 'grid_small': | ||||
| graphs = [] | graphs = [] | ||||
| for i in range(2, 4): | |||||
| for i in range(2, 3): | |||||
| for j in range(2, 4): | for j in range(2, 4): | ||||
| graphs.append(nx.grid_2d_graph(i, j)) | graphs.append(nx.grid_2d_graph(i, j)) | ||||
| args.max_prev_node = 15 | |||||
| if args.graph_type == 'barabasi_small': | |||||
| graphs = [] | |||||
| for i in range(4, 21): | |||||
| for j in range(3, 4): | |||||
| for k in range(10): | |||||
| graphs.append(nx.barabasi_albert_graph(i, j)) | |||||
| args.max_prev_node = 20 | |||||
| if args.graph_type == 'enzymes_small': | |||||
| graphs_raw = Graph_load_batch(min_num_nodes=10, name='ENZYMES') | |||||
| graphs = [] | |||||
| for G in graphs_raw: | |||||
| if G.number_of_nodes()<=20: | |||||
| graphs.append(G) | |||||
| args.max_prev_node = 15 | |||||
| if args.graph_type == 'citeseer_small': | |||||
| _, _, G = Graph_load(dataset='citeseer') | |||||
| G = max(nx.connected_component_subgraphs(G), key=len) | |||||
| G = nx.convert_node_labels_to_integers(G) | |||||
| graphs = [] | |||||
| for i in range(G.number_of_nodes()): | |||||
| G_ego = nx.ego_graph(G, i, radius=1) | |||||
| if (G_ego.number_of_nodes() >= 4) and (G_ego.number_of_nodes() <= 20): | |||||
| graphs.append(G_ego) | |||||
| shuffle(graphs) | |||||
| graphs = graphs[0:200] | |||||
| args.max_prev_node = 15 | |||||
| else: | |||||
| graphs, num_classes = load_data(args.graph_type, True) | |||||
| small_graphs = [] | |||||
| for i in range(len(graphs)): | |||||
| if graphs[i].number_of_nodes() < 13: | |||||
| small_graphs.append(graphs[i]) | |||||
| graphs = small_graphs | |||||
| args.max_prev_node = 12 | |||||
| args.max_prev_node = 5 | |||||
| # remove self loops | # remove self loops | ||||
| for graph in graphs: | for graph in graphs: | ||||
| # split datasets | # split datasets | ||||
| random.seed(123) | random.seed(123) | ||||
| shuffle(graphs) | shuffle(graphs) | ||||
| graphs_len = len(graphs) | |||||
| graphs_test = graphs[int(0.8 * graphs_len):] | |||||
| graphs_train = graphs[0:int(0.8 * graphs_len)] | |||||
| # graphs_len = len(graphs) | |||||
| # graphs_test = graphs[int(0.8 * graphs_len):] | |||||
| # graphs_validate = graphs[int(0.7 * graphs_len):int(0.8 * graphs_len)] | |||||
| # graphs_train = graphs[0:int(0.7 * graphs_len)] | |||||
| args.max_num_node = max([graphs[i].number_of_nodes() for i in range(len(graphs))]) | args.max_num_node = max([graphs[i].number_of_nodes() for i in range(len(graphs))]) | ||||
| # args.max_num_node = 2000 | |||||
| # show graphs statistics | |||||
| print('total graph num: {}, training set: {}'.format(len(graphs), len(graphs_train))) | |||||
| print('max number node: {}'.format(args.max_num_node)) | print('max number node: {}'.format(args.max_num_node)) | ||||
| print('max previous node: {}'.format(args.max_prev_node)) | print('max previous node: {}'.format(args.max_prev_node)) | ||||
| test_graph = nx.grid_2d_graph(2, 3) | |||||
| test_graph.remove_node(test_graph.nodes()[5]) | |||||
| train_DGMG(args, graphs, model) | |||||
| # save ground truth graphs | |||||
| # save_graph_list(graphs, args.graph_save_path + args.fname_train + '0.dat') | |||||
| # save_graph_list(graphs, args.graph_save_path + args.fname_test + '0.dat') | |||||
| # print('train and test graphs saved') | |||||
| ## if use pre-saved graphs | |||||
| # dir_input = "graphs/" | |||||
| # fname_test = args.graph_save_path + args.fname_test + '0.dat' | |||||
| # graphs = load_graph_list(fname_test, is_real=True) | |||||
| # graphs_test = graphs[int(0.8 * graphs_len):] | |||||
| # graphs_train = graphs[0:int(0.8 * graphs_len)] | |||||
| # graphs_validate = graphs[0:int(0.2 * graphs_len)] | |||||
| # print('train') | |||||
| # for graph in graphs_validate: | |||||
| # print(graph.number_of_nodes()) | |||||
| # print('test') | |||||
| # for graph in graphs_test: | |||||
| # print(graph.number_of_nodes()) | |||||
| ### train | |||||
| train_DGMG(args,graphs,model) | |||||
| ### calc nll | |||||
| # train_DGMG_nll(args, graphs_validate,graphs_test, model,max_iter=1000) | |||||
| test_graph = nx.convert_node_labels_to_integers(test_graph) | |||||
| test_DGMG_2(args, model, test_graph) | |||||
| # labels, results = calc_lable_result(test_graphs, eval_graphs) | |||||
| # for j in range(1000): | |||||
| # graph = graphs[0] | |||||
| # # do random ordering: relabel nodes | |||||
| # node_order = list(range(graph.number_of_nodes())) | |||||
| # shuffle(node_order) | |||||
| # order_mapping = dict(zip(graph.nodes(), node_order)) | |||||
| # graph = nx.relabel_nodes(graph, order_mapping, copy=True) | |||||
| # print(graph.nodes()) | |||||
| # evaluate(labels, results) |