Browse Source

change main Deep

master
Ali Amiri 4 years ago
parent
commit
604d9f1205
1 changed files with 347 additions and 304 deletions
  1. 347
    304
      main_DeepGMG.py

+ 347
- 304
main_DeepGMG.py View File

# an implementation for "Learning Deep Generative Models of Graphs" # an implementation for "Learning Deep Generative Models of Graphs"
from baselines.graphvae.util import load_data
import os

import random
from statistics import mean

import networkx as nx
import numpy as np
from sklearn.metrics import roc_auc_score, average_precision_score

from main import * from main import *



class Args_DGMG(): class Args_DGMG():
def __init__(self): def __init__(self):
### CUDA ### CUDA
self.cuda = 1
self.cuda = 0


### model type ### model type
self.note = 'Baseline_DGMG' # do GCN after adding each edge
self.note = 'Baseline_DGMG' # do GCN after adding each edge
# self.note = 'Baseline_DGMG_fast' # do GCN only after adding each node # self.note = 'Baseline_DGMG_fast' # do GCN only after adding each node


### data config ### data config
# self.graph_type = 'caveman_small' # self.graph_type = 'caveman_small'
# self.graph_type = 'grid_small'
self.graph_type = 'IMDBBINARY'
self.graph_type = 'grid_small'
# self.graph_type = 'ladder_small' # self.graph_type = 'ladder_small'
# self.graph_type = 'enzymes_small' # self.graph_type = 'enzymes_small'
# self.graph_type = 'barabasi_small' # self.graph_type = 'barabasi_small'
self.node_embedding_size = 64 self.node_embedding_size = 64
self.test_graph_num = 200 self.test_graph_num = 200



### training config ### training config
self.epochs = 2000 # now one epoch means self.batch_ratio x batch_size
self.load_epoch = 2000
self.epochs_test_start = 100
self.epochs_test = 100
self.epochs_log = 2
self.epochs_save = 100
self.epochs = 100 # now one epoch means self.batch_ratio x batch_size
self.load_epoch = 100
self.epochs_test_start = 10
self.epochs_test = 10
self.epochs_log = 10
self.epochs_save = 10
if 'fast' in self.note: if 'fast' in self.note:
self.is_fast = True self.is_fast = True
else: else:
self.figure_prediction_save_path = 'figures_prediction/' self.figure_prediction_save_path = 'figures_prediction/'
self.nll_save_path = 'nll/' self.nll_save_path = 'nll/'



self.fname = self.note + '_' + self.graph_type + '_' + str(self.node_embedding_size) self.fname = self.note + '_' + self.graph_type + '_' + str(self.node_embedding_size)
self.fname_pred = self.note + '_' + self.graph_type + '_' + str(self.node_embedding_size) + '_pred_' self.fname_pred = self.note + '_' + self.graph_type + '_' + str(self.node_embedding_size) + '_pred_'
self.fname_train = self.note + '_' + self.graph_type + '_' + str(self.node_embedding_size) + '_train_' self.fname_train = self.note + '_' + self.graph_type + '_' + str(self.node_embedding_size) + '_train_'
self.save = True self.save = True




def train_DGMG_epoch(epoch, args, model, dataset, optimizer, scheduler, is_fast = False):
def train_DGMG_epoch(epoch, args, model, dataset, optimizer, scheduler, is_fast=False):
model.train() model.train()
graph_num = len(dataset) graph_num = len(dataset)
order = list(range(graph_num)) order = list(range(graph_num))
shuffle(order) shuffle(order)



loss_addnode = 0 loss_addnode = 0
loss_addedge = 0 loss_addedge = 0
loss_node = 0 loss_node = 0
order_mapping = dict(zip(graph.nodes(), node_order)) order_mapping = dict(zip(graph.nodes(), node_order))
graph = nx.relabel_nodes(graph, order_mapping, copy=True) graph = nx.relabel_nodes(graph, order_mapping, copy=True)



# NOTE: when starting loop, we assume a node has already been generated # NOTE: when starting loop, we assume a node has already been generated
node_count = 1 node_count = 1
node_embedding = [Variable(torch.ones(1,args.node_embedding_size)).cuda()] # list of torch tensors, each size: 1*hidden
node_embedding = [
Variable(torch.ones(1, args.node_embedding_size)).cuda()] # list of torch tensors, each size: 1*hidden


loss = 0 loss = 0
while node_count<=graph.number_of_nodes():
node_neighbor = graph.subgraph(list(range(node_count))).adjacency_list() # list of lists (first node is zero)
node_neighbor_new = graph.subgraph(list(range(node_count+1))).adjacency_list()[-1] # list of new node's neighbors
while node_count <= graph.number_of_nodes():
node_neighbor = graph.subgraph(
list(range(node_count))).adjacency_list() # list of lists (first node is zero)
node_neighbor_new = graph.subgraph(list(range(node_count + 1))).adjacency_list()[
-1] # list of new node's neighbors


# 1 message passing # 1 message passing
# do 2 times message passing # do 2 times message passing
if is_fast: if is_fast:
node_embedding_cat = torch.cat(node_embedding, dim=0) node_embedding_cat = torch.cat(node_embedding, dim=0)
# calc loss # calc loss
loss_addnode_step = F.binary_cross_entropy(p_addnode,Variable(torch.ones((1,1))).cuda())
loss_addnode_step = F.binary_cross_entropy(p_addnode, Variable(torch.ones((1, 1))).cuda())
# loss_addnode_step.backward(retain_graph=True) # loss_addnode_step.backward(retain_graph=True)
loss += loss_addnode_step loss += loss_addnode_step
loss_addnode += loss_addnode_step.data loss_addnode += loss_addnode_step.data
loss_addnode += loss_addnode_step.data loss_addnode += loss_addnode_step.data
break break



edge_count = 0 edge_count = 0
while edge_count<=len(node_neighbor_new):
while edge_count <= len(node_neighbor_new):
if not is_fast: if not is_fast:
node_embedding = message_passing(node_neighbor, node_embedding, model) node_embedding = message_passing(node_neighbor, node_embedding, model)
node_embedding_cat = torch.cat(node_embedding, dim=0) node_embedding_cat = torch.cat(node_embedding, dim=0)


# 5 f_nodes # 5 f_nodes
# excluding the last node (which is the new node) # excluding the last node (which is the new node)
node_new_embedding_cat = node_embedding_cat[-1,:].expand(node_embedding_cat.size(0)-1,node_embedding_cat.size(1))
s_node = model.f_s(torch.cat((node_embedding_cat[0:-1,:],node_new_embedding_cat),dim=1))
p_node = F.softmax(s_node.permute(1,0))
node_new_embedding_cat = node_embedding_cat[-1, :].expand(node_embedding_cat.size(0) - 1,
node_embedding_cat.size(1))
s_node = model.f_s(torch.cat((node_embedding_cat[0:-1, :], node_new_embedding_cat), dim=1))
p_node = F.softmax(s_node.permute(1, 0))
# get ground truth # get ground truth
a_node = torch.zeros((1,p_node.size(1)))
a_node = torch.zeros((1, p_node.size(1)))
# print('node_neighbor_new',node_neighbor_new, edge_count) # print('node_neighbor_new',node_neighbor_new, edge_count)
a_node[0,node_neighbor_new[edge_count]] = 1
a_node[0, node_neighbor_new[edge_count]] = 1
a_node = Variable(a_node).cuda() a_node = Variable(a_node).cuda()
# add edge # add edge
node_neighbor[-1].append(node_neighbor_new[edge_count]) node_neighbor[-1].append(node_neighbor_new[edge_count])
node_neighbor[node_neighbor_new[edge_count]].append(len(node_neighbor)-1)
node_neighbor[node_neighbor_new[edge_count]].append(len(node_neighbor) - 1)
# calc loss # calc loss
loss_node_step = F.binary_cross_entropy(p_node,a_node)
loss_node_step = F.binary_cross_entropy(p_node, a_node)
# loss_node_step.backward(retain_graph=True) # loss_node_step.backward(retain_graph=True)
loss += loss_node_step loss += loss_node_step
loss_node += loss_node_step.data loss_node += loss_node_step.data


loss_all = loss_addnode + loss_addedge + loss_node loss_all = loss_addnode + loss_addedge + loss_node


if epoch % args.epochs_log==0:
if epoch % args.epochs_log == 0:
print('Epoch: {}/{}, train loss: {:.6f}, graph type: {}, hidden: {}'.format( print('Epoch: {}/{}, train loss: {:.6f}, graph type: {}, hidden: {}'.format(
epoch, args.epochs,loss_all.item(), args.graph_type, args.node_embedding_size))

epoch, args.epochs, loss_all, args.graph_type, args.node_embedding_size))


# loss_sum += loss.data[0]*x.size(0) # loss_sum += loss.data[0]*x.size(0)
# return loss_sum # return loss_sum






def train_DGMG_forward_epoch(args, model, dataset, is_fast = False):
model.train()
graph_num = len(dataset)
order = list(range(graph_num))
shuffle(order)


loss_addnode = 0
loss_addedge = 0
loss_node = 0
for i in order:
model.zero_grad()

graph = dataset[i]
# do random ordering: relabel nodes
node_order = list(range(graph.number_of_nodes()))
shuffle(node_order)
order_mapping = dict(zip(graph.nodes(), node_order))
graph = nx.relabel_nodes(graph, order_mapping, copy=True)


# NOTE: when starting loop, we assume a node has already been generated
node_count = 1
node_embedding = [Variable(torch.ones(1,args.node_embedding_size)).cuda()] # list of torch tensors, each size: 1*hidden


loss = 0
while node_count<=graph.number_of_nodes():
node_neighbor = graph.subgraph(list(range(node_count))).adjacency_list() # list of lists (first node is zero)
node_neighbor_new = graph.subgraph(list(range(node_count+1))).adjacency_list()[-1] # list of new node's neighbors

# 1 message passing
# do 2 times message passing
node_embedding = message_passing(node_neighbor, node_embedding, model)

# 2 graph embedding and new node embedding
node_embedding_cat = torch.cat(node_embedding, dim=0)
graph_embedding = calc_graph_embedding(node_embedding_cat, model)
init_embedding = calc_init_embedding(node_embedding_cat, model)

# 3 f_addnode
p_addnode = model.f_an(graph_embedding)
if node_count < graph.number_of_nodes():
# add node
node_neighbor.append([])
node_embedding.append(init_embedding)
if is_fast:
node_embedding_cat = torch.cat(node_embedding, dim=0)
# calc loss
loss_addnode_step = F.binary_cross_entropy(p_addnode,Variable(torch.ones((1,1))).cuda())
# loss_addnode_step.backward(retain_graph=True)
loss += loss_addnode_step
loss_addnode += loss_addnode_step.data
else:
# calc loss
loss_addnode_step = F.binary_cross_entropy(p_addnode, Variable(torch.zeros((1, 1))).cuda())
# loss_addnode_step.backward(retain_graph=True)
loss += loss_addnode_step
loss_addnode += loss_addnode_step.data
break


edge_count = 0
while edge_count<=len(node_neighbor_new):
if not is_fast:
node_embedding = message_passing(node_neighbor, node_embedding, model)
node_embedding_cat = torch.cat(node_embedding, dim=0)
graph_embedding = calc_graph_embedding(node_embedding_cat, model)

# 4 f_addedge
p_addedge = model.f_ae(graph_embedding)

if edge_count < len(node_neighbor_new):
# calc loss
loss_addedge_step = F.binary_cross_entropy(p_addedge, Variable(torch.ones((1, 1))).cuda())
# loss_addedge_step.backward(retain_graph=True)
loss += loss_addedge_step
loss_addedge += loss_addedge_step.data

# 5 f_nodes
# excluding the last node (which is the new node)
node_new_embedding_cat = node_embedding_cat[-1,:].expand(node_embedding_cat.size(0)-1,node_embedding_cat.size(1))
s_node = model.f_s(torch.cat((node_embedding_cat[0:-1,:],node_new_embedding_cat),dim=1))
p_node = F.softmax(s_node.permute(1,0))
# get ground truth
a_node = torch.zeros((1,p_node.size(1)))
# print('node_neighbor_new',node_neighbor_new, edge_count)
a_node[0,node_neighbor_new[edge_count]] = 1
a_node = Variable(a_node).cuda()
# add edge
node_neighbor[-1].append(node_neighbor_new[edge_count])
node_neighbor[node_neighbor_new[edge_count]].append(len(node_neighbor)-1)
# calc loss
loss_node_step = F.binary_cross_entropy(p_node,a_node)
# loss_node_step.backward(retain_graph=True)
loss += loss_node_step
loss_node += loss_node_step.data*p_node.size(1)

else:
# calc loss
loss_addedge_step = F.binary_cross_entropy(p_addedge, Variable(torch.zeros((1, 1))).cuda())
# loss_addedge_step.backward(retain_graph=True)
loss += loss_addedge_step
loss_addedge += loss_addedge_step.data
break

edge_count += 1
node_count += 1


loss_all = loss_addnode + loss_addedge + loss_node

# if epoch % args.epochs_log==0:
# print('Epoch: {}/{}, train loss: {:.6f}, graph type: {}, hidden: {}'.format(
# epoch, args.epochs,loss_all[0], args.graph_type, args.node_embedding_size))


return loss_all[0]/len(dataset)







def test_DGMG_epoch(args, model, is_fast=False): def test_DGMG_epoch(args, model, is_fast=False):
model.eval() model.eval()
graph_num = args.test_graph_num graph_num = args.test_graph_num
for i in range(graph_num): for i in range(graph_num):
# NOTE: when starting loop, we assume a node has already been generated # NOTE: when starting loop, we assume a node has already been generated
node_neighbor = [[]] # list of lists (first node is zero) node_neighbor = [[]] # list of lists (first node is zero)
node_embedding = [Variable(torch.ones(1,args.node_embedding_size)).cuda()] # list of torch tensors, each size: 1*hidden
node_embedding = [
Variable(torch.ones(1, args.node_embedding_size)).cuda()] # list of torch tensors, each size: 1*hidden


node_count = 1 node_count = 1
while node_count<=args.max_num_node:
while node_count <= args.max_num_node:
# 1 message passing # 1 message passing
# do 2 times message passing # do 2 times message passing
node_embedding = message_passing(node_neighbor, node_embedding, model) node_embedding = message_passing(node_neighbor, node_embedding, model)
p_addnode = model.f_an(graph_embedding) p_addnode = model.f_an(graph_embedding)
a_addnode = sample_tensor(p_addnode) a_addnode = sample_tensor(p_addnode)
# print(a_addnode.data[0][0]) # print(a_addnode.data[0][0])
if a_addnode.data[0][0]==1:
if a_addnode.data[0][0] == 1:
# print('add node') # print('add node')
# add node # add node
node_neighbor.append([]) node_neighbor.append([])
break break


edge_count = 0 edge_count = 0
while edge_count<args.max_num_node:
while edge_count < args.max_num_node:
if not is_fast: if not is_fast:
node_embedding = message_passing(node_neighbor, node_embedding, model) node_embedding = message_passing(node_neighbor, node_embedding, model)
node_embedding_cat = torch.cat(node_embedding, dim=0) node_embedding_cat = torch.cat(node_embedding, dim=0)
a_addedge = sample_tensor(p_addedge) a_addedge = sample_tensor(p_addedge)
# print(a_addedge.data[0][0]) # print(a_addedge.data[0][0])


if a_addedge.data[0][0]==1:
if a_addedge.data[0][0] == 1:
# print('add edge') # print('add edge')
# 5 f_nodes # 5 f_nodes
# excluding the last node (which is the new node) # excluding the last node (which is the new node)
node_new_embedding_cat = node_embedding_cat[-1,:].expand(node_embedding_cat.size(0)-1,node_embedding_cat.size(1))
s_node = model.f_s(torch.cat((node_embedding_cat[0:-1,:],node_new_embedding_cat),dim=1))
p_node = F.softmax(s_node.permute(1,0))
node_new_embedding_cat = node_embedding_cat[-1, :].expand(node_embedding_cat.size(0) - 1,
node_embedding_cat.size(1))
s_node = model.f_s(torch.cat((node_embedding_cat[0:-1, :], node_new_embedding_cat), dim=1))
p_node = F.softmax(s_node.permute(1, 0))
a_node = gumbel_softmax(p_node, temperature=0.01) a_node = gumbel_softmax(p_node, temperature=0.01)
_, a_node_id = a_node.topk(1) _, a_node_id = a_node.topk(1)
a_node_id = int(a_node_id.data[0][0]) a_node_id = int(a_node_id.data[0][0])
# add edge # add edge
node_neighbor[-1].append(a_node_id) node_neighbor[-1].append(a_node_id)
node_neighbor[a_node_id].append(len(node_neighbor)-1)
node_neighbor[a_node_id].append(len(node_neighbor) - 1)
else: else:
break break


return graphs_generated return graphs_generated




def test_DGMG_2(args, model, test_graph, is_fast=False):
model.eval()
graph_num = args.test_graph_num


graphs_generated = []
# for i in range(graph_num):
# NOTE: when starting loop, we assume a node has already been generated
node_neighbor = [[]] # list of lists (first node is zero)
node_embedding = [
Variable(torch.ones(1, args.node_embedding_size)).cuda()] # list of torch tensors, each size: 1*hidden

node_max = len(test_graph.nodes())
node_count = 1
while node_count <= node_max:
# 1 message passing
# do 2 times message passing
node_embedding = message_passing(node_neighbor, node_embedding, model)

# 2 graph embedding and new node embedding
node_embedding_cat = torch.cat(node_embedding, dim=0)
graph_embedding = calc_graph_embedding(node_embedding_cat, model)
init_embedding = calc_init_embedding(node_embedding_cat, model)

# 3 f_addnode
p_addnode = model.f_an(graph_embedding)
a_addnode = sample_tensor(p_addnode)

if a_addnode.data[0][0] == 1:
# add node
node_neighbor.append([])
node_embedding.append(init_embedding)
if is_fast:
node_embedding_cat = torch.cat(node_embedding, dim=0)
else:
break

edge_count = 0
while edge_count < args.max_num_node:
if not is_fast:
node_embedding = message_passing(node_neighbor, node_embedding, model)
node_embedding_cat = torch.cat(node_embedding, dim=0)
graph_embedding = calc_graph_embedding(node_embedding_cat, model)

# 4 f_addedge
p_addedge = model.f_ae(graph_embedding)
a_addedge = sample_tensor(p_addedge)

if a_addedge.data[0][0] == 1:
# 5 f_nodes
# excluding the last node (which is the new node)
node_new_embedding_cat = node_embedding_cat[-1, :].expand(node_embedding_cat.size(0) - 1,
node_embedding_cat.size(1))
s_node = model.f_s(torch.cat((node_embedding_cat[0:-1, :], node_new_embedding_cat), dim=1))
p_node = F.softmax(s_node.permute(1, 0))
a_node = gumbel_softmax(p_node, temperature=0.01)
_, a_node_id = a_node.topk(1)
a_node_id = int(a_node_id.data[0][0])
# add edge

node_neighbor[-1].append(a_node_id)
node_neighbor[a_node_id].append(len(node_neighbor) - 1)
else:
break


edge_count += 1
node_count += 1

# clear node_neighbor and build it again
node_neighbor = []
for n in range(node_max):
temp_neighbor = [k for k in test_graph.edge[n]]
node_neighbor.append(temp_neighbor)

# now add the last node for real
# 1 message passing
# do 2 times message passing
try:
node_embedding = message_passing(node_neighbor, node_embedding, model)

# 2 graph embedding and new node embedding
node_embedding_cat = torch.cat(node_embedding, dim=0)
graph_embedding = calc_graph_embedding(node_embedding_cat, model)
init_embedding = calc_init_embedding(node_embedding_cat, model)

# 3 f_addnode
p_addnode = model.f_an(graph_embedding)
a_addnode = sample_tensor(p_addnode)

if a_addnode.data[0][0] == 1:
# add node
node_neighbor.append([])
node_embedding.append(init_embedding)
if is_fast:
node_embedding_cat = torch.cat(node_embedding, dim=0)

edge_count = 0
while edge_count < args.max_num_node:
if not is_fast:
node_embedding = message_passing(node_neighbor, node_embedding, model)
node_embedding_cat = torch.cat(node_embedding, dim=0)
graph_embedding = calc_graph_embedding(node_embedding_cat, model)

# 4 f_addedge
p_addedge = model.f_ae(graph_embedding)
a_addedge = sample_tensor(p_addedge)

if a_addedge.data[0][0] == 1:
# 5 f_nodes
# excluding the last node (which is the new node)
node_new_embedding_cat = node_embedding_cat[-1, :].expand(node_embedding_cat.size(0) - 1,
node_embedding_cat.size(1))
s_node = model.f_s(torch.cat((node_embedding_cat[0:-1, :], node_new_embedding_cat), dim=1))
p_node = F.softmax(s_node.permute(1, 0))
a_node = gumbel_softmax(p_node, temperature=0.01)
_, a_node_id = a_node.topk(1)
a_node_id = int(a_node_id.data[0][0])
# add edge

node_neighbor[-1].append(a_node_id)
node_neighbor[a_node_id].append(len(node_neighbor) - 1)
else:
break


edge_count += 1
node_count += 1
except:
print('error')
# save graph
node_neighbor_dict = dict(zip(list(range(len(node_neighbor))), node_neighbor))
graph = nx.from_dict_of_lists(node_neighbor_dict)
graphs_generated.append(graph)






return graphs_generated




########### train function for LSTM + VAE ########### train function for LSTM + VAE
train_DGMG_epoch(epoch, args, model, dataset_train, optimizer, scheduler, is_fast=args.is_fast) train_DGMG_epoch(epoch, args, model, dataset_train, optimizer, scheduler, is_fast=args.is_fast)
time_end = tm.time() time_end = tm.time()
time_all[epoch - 1] = time_end - time_start time_all[epoch - 1] = time_end - time_start
# print('time used',time_all[epoch - 1])
print('time used', time_all[epoch - 1])
# test # test
if epoch % args.epochs_test == 0 and epoch >= args.epochs_test_start: if epoch % args.epochs_test == 0 and epoch >= args.epochs_test_start:
graphs = test_DGMG_epoch(args,model, is_fast=args.is_fast)
graphs = test_DGMG_epoch(args, model, is_fast=args.is_fast)
fname = args.graph_save_path + args.fname_pred + str(epoch) + '.dat' fname = args.graph_save_path + args.fname_pred + str(epoch) + '.dat'
save_graph_list(graphs, fname) save_graph_list(graphs, fname)
# print('test done, graphs saved') # print('test done, graphs saved')
np.save(args.timing_save_path + args.fname, time_all) np.save(args.timing_save_path + args.fname, time_all)




def neigh_to_mat(neigh, size):
ret_list = np.zeros(size)
for i in neigh:
ret_list[i] = 1
return ret_list


def calc_lable_result(test_graphs, returned_graphs):
labels = []
results = []
i = 0
for test_graph in test_graphs:
n = len(test_graph.nodes())
returned_graph = returned_graphs[i]
label = neigh_to_mat([k for k in test_graph.edge[n - 1]], n)
try:
result = neigh_to_mat([k for k in returned_graph.edge[n - 1]], n)
except:
result = np.zeros(n)
labels.append(label)
results.append(result)
i += 1
return labels, results


def evaluate(labels, results):
mae_list = []
roc_score_list = []
ap_score_list = []
precision_list = []
recall_list = []
iter = 0
for result in results:
label = labels[iter]
iter += 1
part1 = label[result == 1]
part2 = part1[part1 == 1]
part3 = part1[part1 == 0]
part4 = label[result == 0]
part5 = part4[part4 == 1]
tp = len(part2)
fp = len(part3)
fn = part5.sum()
if tp + fp > 0:
precision = tp / (tp + fp)
else:
precision = 0
recall = tp / (tp + fn)
precision_list.append(precision)
recall_list.append(recall)

positive = result[label == 1]
if len(positive) <= len(list(result[label == 0])):
negative = random.sample(list(result[label == 0]), len(positive))
else:
negative = result[label == 0]
positive = random.sample(list(result[label == 1]), len(negative))
preds_all = np.hstack([positive, negative])
labels_all = np.hstack([np.ones(len(positive)), np.zeros(len(positive))])

if len(labels_all) > 0:
roc_score = roc_auc_score(labels_all, preds_all)
ap_score = average_precision_score(labels_all, preds_all)

roc_score_list.append(roc_score)
ap_score_list.append(ap_score)

mae = 0
for x in range(len(result)):
if result[x] != label[x]:
mae += 1

mae = mae / len(label)
mae_list.append(mae)

mean_roc = mean(roc_score_list)
mean_ap = mean(ap_score_list)
mean_precision = mean(precision_list)
mean_recall = mean(recall_list)
mean_mae = mean(mae_list)
print('roc_score ' + str(mean_roc))
print('ap_score ' + str(mean_ap))
print('precision ' + str(mean_precision))
print('recall ' + str(mean_recall))
print('mae ' + str(mean_mae))
return mean_roc, mean_ap, mean_precision, mean_recall


def load_data(dataset, degree_as_tag):
'''
dataset: name of dataset
test_proportion: ratio of test train split
seed: random seed for random splitting of dataset
'''

print('loading data')
g_list = []
label_dict = {}
feat_dict = {}

with open('dataset/%s/%s.txt' % (dataset, dataset), 'r') as f:
n_g = int(f.readline().strip())
for i in range(n_g):
row = f.readline().strip().split()
n, l = [int(w) for w in row]
if not l in label_dict:
mapped = len(label_dict)
label_dict[l] = mapped
g = nx.Graph()
node_tags = []
node_features = []
n_edges = 0
for j in range(n):
g.add_node(j)
row = f.readline().strip().split()
tmp = int(row[1]) + 2
if tmp == len(row):
# no node attributes
row = [int(w) for w in row]
attr = None
else:
row, attr = [int(w) for w in row[:tmp]], np.array([float(w) for w in row[tmp:]])
if not row[0] in feat_dict:
mapped = len(feat_dict)
feat_dict[row[0]] = mapped
node_tags.append(feat_dict[row[0]])

if tmp > len(row):
node_features.append(attr)

n_edges += row[1]
for k in range(2, len(row)):
g.add_edge(j, row[k])

if node_features != []:
node_features = np.stack(node_features)
node_feature_flag = True
else:
node_features = None
node_feature_flag = False


assert len(g) == n


g_list.append(g)




########### train function for LSTM + VAE
def train_DGMG_nll(args, dataset_train,dataset_test, model,max_iter=1000):
# check if load existing model
fname = args.model_save_path + args.fname + 'model_' + str(args.load_epoch) + '.dat'
model.load_state_dict(torch.load(fname))

fname_output = args.nll_save_path + args.note + '_' + args.graph_type + '.csv'
with open(fname_output, 'w+') as f:
f.write('train,test\n')
# start main loop
for iter in range(max_iter):
nll_train = train_DGMG_forward_epoch(args, model, dataset_train, is_fast=args.is_fast)
nll_test = train_DGMG_forward_epoch(args, model, dataset_test, is_fast=args.is_fast)
print('train', nll_train, 'test', nll_test)
f.write(str(nll_train) + ',' + str(nll_test) + '\n')



return g_list, len(label_dict)




if __name__ == '__main__': if __name__ == '__main__':

my_graph = nx.Graph()
edges = nx.read_edgelist("data/main_graphs_COLLAB/my.txt")
my_graph.add_edges_from(edges.edges())
integers = nx.convert_node_labels_to_integers(my_graph)
d_graph = nx.grid_2d_graph(2, 3)
integers2 = nx.convert_node_labels_to_integers(d_graph)
args = Args_DGMG() args = Args_DGMG()
os.environ['CUDA_VISIBLE_DEVICES'] = str(args.cuda) os.environ['CUDA_VISIBLE_DEVICES'] = str(args.cuda)
print('CUDA', args.cuda) print('CUDA', args.cuda)
print('File name prefix',args.fname)

print('File name prefix', args.fname)


graphs = [] graphs = []
for i in range(4, 10): for i in range(4, 10):
graphs.append(nx.ladder_graph(i)) graphs.append(nx.ladder_graph(i))
model = DGM_graphs(h_size = args.node_embedding_size).cuda()
model = DGM_graphs(h_size=args.node_embedding_size).cuda()


if args.graph_type == 'ladder_small':
graphs = []
for i in range(2, 11):
graphs.append(nx.ladder_graph(i))
args.max_prev_node = 10
# if args.graph_type == 'caveman_small':
# graphs = []
# for i in range(2, 5):
# for j in range(2, 6):
# for k in range(10):
# graphs.append(nx.relaxed_caveman_graph(i, j, p=0.1))
# args.max_prev_node = 20
if args.graph_type=='caveman_small':
graphs = []
for i in range(2, 3):
for j in range(6, 11):
for k in range(20):
graphs.append(caveman_special(i, j, p_edge=0.8))
args.max_prev_node = 20
if args.graph_type == 'grid_small': if args.graph_type == 'grid_small':
graphs = [] graphs = []
for i in range(2, 4):
for i in range(2, 3):
for j in range(2, 4): for j in range(2, 4):
graphs.append(nx.grid_2d_graph(i, j)) graphs.append(nx.grid_2d_graph(i, j))
args.max_prev_node = 15
if args.graph_type == 'barabasi_small':
graphs = []
for i in range(4, 21):
for j in range(3, 4):
for k in range(10):
graphs.append(nx.barabasi_albert_graph(i, j))
args.max_prev_node = 20

if args.graph_type == 'enzymes_small':
graphs_raw = Graph_load_batch(min_num_nodes=10, name='ENZYMES')
graphs = []
for G in graphs_raw:
if G.number_of_nodes()<=20:
graphs.append(G)
args.max_prev_node = 15

if args.graph_type == 'citeseer_small':
_, _, G = Graph_load(dataset='citeseer')
G = max(nx.connected_component_subgraphs(G), key=len)
G = nx.convert_node_labels_to_integers(G)
graphs = []
for i in range(G.number_of_nodes()):
G_ego = nx.ego_graph(G, i, radius=1)
if (G_ego.number_of_nodes() >= 4) and (G_ego.number_of_nodes() <= 20):
graphs.append(G_ego)
shuffle(graphs)
graphs = graphs[0:200]
args.max_prev_node = 15
else:
graphs, num_classes = load_data(args.graph_type, True)
small_graphs = []
for i in range(len(graphs)):
if graphs[i].number_of_nodes() < 13:
small_graphs.append(graphs[i])
graphs = small_graphs
args.max_prev_node = 12
args.max_prev_node = 5


# remove self loops # remove self loops
for graph in graphs: for graph in graphs:
# split datasets # split datasets
random.seed(123) random.seed(123)
shuffle(graphs) shuffle(graphs)
graphs_len = len(graphs)
graphs_test = graphs[int(0.8 * graphs_len):]
graphs_train = graphs[0:int(0.8 * graphs_len)]
# graphs_len = len(graphs)
# graphs_test = graphs[int(0.8 * graphs_len):]
# graphs_validate = graphs[int(0.7 * graphs_len):int(0.8 * graphs_len)]
# graphs_train = graphs[0:int(0.7 * graphs_len)]


args.max_num_node = max([graphs[i].number_of_nodes() for i in range(len(graphs))]) args.max_num_node = max([graphs[i].number_of_nodes() for i in range(len(graphs))])
# args.max_num_node = 2000
# show graphs statistics
print('total graph num: {}, training set: {}'.format(len(graphs), len(graphs_train)))

print('max number node: {}'.format(args.max_num_node)) print('max number node: {}'.format(args.max_num_node))
print('max previous node: {}'.format(args.max_prev_node)) print('max previous node: {}'.format(args.max_prev_node))
test_graph = nx.grid_2d_graph(2, 3)
test_graph.remove_node(test_graph.nodes()[5])
train_DGMG(args, graphs, model)


# save ground truth graphs
# save_graph_list(graphs, args.graph_save_path + args.fname_train + '0.dat')
# save_graph_list(graphs, args.graph_save_path + args.fname_test + '0.dat')
# print('train and test graphs saved')

## if use pre-saved graphs
# dir_input = "graphs/"
# fname_test = args.graph_save_path + args.fname_test + '0.dat'
# graphs = load_graph_list(fname_test, is_real=True)
# graphs_test = graphs[int(0.8 * graphs_len):]
# graphs_train = graphs[0:int(0.8 * graphs_len)]
# graphs_validate = graphs[0:int(0.2 * graphs_len)]

# print('train')
# for graph in graphs_validate:
# print(graph.number_of_nodes())
# print('test')
# for graph in graphs_test:
# print(graph.number_of_nodes())



### train
train_DGMG(args,graphs,model)

### calc nll
# train_DGMG_nll(args, graphs_validate,graphs_test, model,max_iter=1000)







test_graph = nx.convert_node_labels_to_integers(test_graph)
test_DGMG_2(args, model, test_graph)


# labels, results = calc_lable_result(test_graphs, eval_graphs)


# for j in range(1000):
# graph = graphs[0]
# # do random ordering: relabel nodes
# node_order = list(range(graph.number_of_nodes()))
# shuffle(node_order)
# order_mapping = dict(zip(graph.nodes(), node_order))
# graph = nx.relabel_nodes(graph, order_mapping, copy=True)
# print(graph.nodes())
# evaluate(labels, results)

Loading…
Cancel
Save