5 years ago · 604d9f1205
--- a/main_DeepGMG.py
+++ b/main_DeepGMG.py
@@ -1,20 +1,28 @@
 # an implementation for "Learning Deep Generative Models of Graphs"
 from baselines.graphvae.util import load_data
 import os

 import random
 from statistics import mean

 import networkx as nx
 import numpy as np
 from sklearn.metrics import roc_auc_score, average_precision_score

 from main import *


 class Args_DGMG():
    def __init__(self):
        ### CUDA
        self.cuda = 1
        self.cuda = 0

        ### model type
        self.note = 'Baseline_DGMG' # do GCN after adding each edge
        self.note = 'Baseline_DGMG'  # do GCN after adding each edge
        # self.note = 'Baseline_DGMG_fast' # do GCN only after adding each node

        ### data config
        # self.graph_type = 'caveman_small'
        # self.graph_type = 'grid_small'
        self.graph_type = 'IMDBBINARY'
        self.graph_type = 'grid_small'
        # self.graph_type = 'ladder_small'
        # self.graph_type = 'enzymes_small'
        # self.graph_type = 'barabasi_small'
@@ -26,14 +34,13 @@ class Args_DGMG():
        self.node_embedding_size = 64
        self.test_graph_num = 200


        ### training config
        self.epochs = 2000  # now one epoch means self.batch_ratio x batch_size
        self.load_epoch = 2000
        self.epochs_test_start = 100
        self.epochs_test = 100
        self.epochs_log = 2
        self.epochs_save = 100
        self.epochs = 100  # now one epoch means self.batch_ratio x batch_size
        self.load_epoch = 100
        self.epochs_test_start = 10
        self.epochs_test = 10
        self.epochs_log = 10
        self.epochs_save = 10
        if 'fast' in self.note:
            self.is_fast = True
        else:
@@ -51,7 +58,6 @@ class Args_DGMG():
        self.figure_prediction_save_path = 'figures_prediction/'
        self.nll_save_path = 'nll/'


        self.fname = self.note + '_' + self.graph_type + '_' + str(self.node_embedding_size)
        self.fname_pred = self.note + '_' + self.graph_type + '_' + str(self.node_embedding_size) + '_pred_'
        self.fname_train = self.note + '_' + self.graph_type + '_' + str(self.node_embedding_size) + '_train_'
@@ -61,13 +67,12 @@ class Args_DGMG():
        self.save = True


 def train_DGMG_epoch(epoch, args, model, dataset, optimizer, scheduler, is_fast = False):
 def train_DGMG_epoch(epoch, args, model, dataset, optimizer, scheduler, is_fast=False):
    model.train()
    graph_num = len(dataset)
    order = list(range(graph_num))
    shuffle(order)


    loss_addnode = 0
    loss_addedge = 0
    loss_node = 0
@@ -81,16 +86,17 @@ def train_DGMG_epoch(epoch, args, model, dataset, optimizer, scheduler, is_fast
        order_mapping = dict(zip(graph.nodes(), node_order))
        graph = nx.relabel_nodes(graph, order_mapping, copy=True)


        # NOTE: when starting loop, we assume a node has already been generated
        node_count = 1
        node_embedding = [Variable(torch.ones(1,args.node_embedding_size)).cuda()] # list of torch tensors, each size: 1*hidden

        node_embedding = [
            Variable(torch.ones(1, args.node_embedding_size)).cuda()]  # list of torch tensors, each size: 1*hidden

        loss = 0
        while node_count<=graph.number_of_nodes():
            node_neighbor = graph.subgraph(list(range(node_count))).adjacency_list()  # list of lists (first node is zero)
            node_neighbor_new = graph.subgraph(list(range(node_count+1))).adjacency_list()[-1] # list of new node's neighbors
        while node_count <= graph.number_of_nodes():
            node_neighbor = graph.subgraph(
                list(range(node_count))).adjacency_list()  # list of lists (first node is zero)
            node_neighbor_new = graph.subgraph(list(range(node_count + 1))).adjacency_list()[
                -1]  # list of new node's neighbors

            # 1 message passing
            # do 2 times message passing
@@ -110,7 +116,7 @@ def train_DGMG_epoch(epoch, args, model, dataset, optimizer, scheduler, is_fast
                if is_fast:
                    node_embedding_cat = torch.cat(node_embedding, dim=0)
                # calc loss
                loss_addnode_step = F.binary_cross_entropy(p_addnode,Variable(torch.ones((1,1))).cuda())
                loss_addnode_step = F.binary_cross_entropy(p_addnode, Variable(torch.ones((1, 1))).cuda())
                # loss_addnode_step.backward(retain_graph=True)
                loss += loss_addnode_step
                loss_addnode += loss_addnode_step.data
@@ -122,9 +128,8 @@ def train_DGMG_epoch(epoch, args, model, dataset, optimizer, scheduler, is_fast
                loss_addnode += loss_addnode_step.data
                break


            edge_count = 0
            while edge_count<=len(node_neighbor_new):
            while edge_count <= len(node_neighbor_new):
                if not is_fast:
                    node_embedding = message_passing(node_neighbor, node_embedding, model)
                    node_embedding_cat = torch.cat(node_embedding, dim=0)
@@ -142,19 +147,20 @@ def train_DGMG_epoch(epoch, args, model, dataset, optimizer, scheduler, is_fast

                    # 5 f_nodes
                    # excluding the last node (which is the new node)
                    node_new_embedding_cat = node_embedding_cat[-1,:].expand(node_embedding_cat.size(0)-1,node_embedding_cat.size(1))
                    s_node = model.f_s(torch.cat((node_embedding_cat[0:-1,:],node_new_embedding_cat),dim=1))
                    p_node = F.softmax(s_node.permute(1,0))
                    node_new_embedding_cat = node_embedding_cat[-1, :].expand(node_embedding_cat.size(0) - 1,
                                                                              node_embedding_cat.size(1))
                    s_node = model.f_s(torch.cat((node_embedding_cat[0:-1, :], node_new_embedding_cat), dim=1))
                    p_node = F.softmax(s_node.permute(1, 0))
                    # get ground truth
                    a_node = torch.zeros((1,p_node.size(1)))
                    a_node = torch.zeros((1, p_node.size(1)))
                    # print('node_neighbor_new',node_neighbor_new, edge_count)
                    a_node[0,node_neighbor_new[edge_count]] = 1
                    a_node[0, node_neighbor_new[edge_count]] = 1
                    a_node = Variable(a_node).cuda()
                    # add edge
                    node_neighbor[-1].append(node_neighbor_new[edge_count])
                    node_neighbor[node_neighbor_new[edge_count]].append(len(node_neighbor)-1)
                    node_neighbor[node_neighbor_new[edge_count]].append(len(node_neighbor) - 1)
                    # calc loss
                    loss_node_step = F.binary_cross_entropy(p_node,a_node)
                    loss_node_step = F.binary_cross_entropy(p_node, a_node)
                    # loss_node_step.backward(retain_graph=True)
                    loss += loss_node_step
                    loss_node += loss_node_step.data
@@ -177,142 +183,14 @@ def train_DGMG_epoch(epoch, args, model, dataset, optimizer, scheduler, is_fast

    loss_all = loss_addnode + loss_addedge + loss_node

    if epoch % args.epochs_log==0:
    if epoch % args.epochs_log == 0:
        print('Epoch: {}/{}, train loss: {:.6f}, graph type: {}, hidden: {}'.format(
            epoch, args.epochs,loss_all.item(), args.graph_type, args.node_embedding_size))

            epoch, args.epochs, loss_all, args.graph_type, args.node_embedding_size))

    # loss_sum += loss.data[0]*x.size(0)
    # return loss_sum




 def train_DGMG_forward_epoch(args, model, dataset, is_fast = False):
    model.train()
    graph_num = len(dataset)
    order = list(range(graph_num))
    shuffle(order)


    loss_addnode = 0
    loss_addedge = 0
    loss_node = 0
    for i in order:
        model.zero_grad()

        graph = dataset[i]
        # do random ordering: relabel nodes
        node_order = list(range(graph.number_of_nodes()))
        shuffle(node_order)
        order_mapping = dict(zip(graph.nodes(), node_order))
        graph = nx.relabel_nodes(graph, order_mapping, copy=True)


        # NOTE: when starting loop, we assume a node has already been generated
        node_count = 1
        node_embedding = [Variable(torch.ones(1,args.node_embedding_size)).cuda()] # list of torch tensors, each size: 1*hidden


        loss = 0
        while node_count<=graph.number_of_nodes():
            node_neighbor = graph.subgraph(list(range(node_count))).adjacency_list()  # list of lists (first node is zero)
            node_neighbor_new = graph.subgraph(list(range(node_count+1))).adjacency_list()[-1] # list of new node's neighbors

            # 1 message passing
            # do 2 times message passing
            node_embedding = message_passing(node_neighbor, node_embedding, model)

            # 2 graph embedding and new node embedding
            node_embedding_cat = torch.cat(node_embedding, dim=0)
            graph_embedding = calc_graph_embedding(node_embedding_cat, model)
            init_embedding = calc_init_embedding(node_embedding_cat, model)

            # 3 f_addnode
            p_addnode = model.f_an(graph_embedding)
            if node_count < graph.number_of_nodes():
                # add node
                node_neighbor.append([])
                node_embedding.append(init_embedding)
                if is_fast:
                    node_embedding_cat = torch.cat(node_embedding, dim=0)
                # calc loss
                loss_addnode_step = F.binary_cross_entropy(p_addnode,Variable(torch.ones((1,1))).cuda())
                # loss_addnode_step.backward(retain_graph=True)
                loss += loss_addnode_step
                loss_addnode += loss_addnode_step.data
            else:
                # calc loss
                loss_addnode_step = F.binary_cross_entropy(p_addnode, Variable(torch.zeros((1, 1))).cuda())
                # loss_addnode_step.backward(retain_graph=True)
                loss += loss_addnode_step
                loss_addnode += loss_addnode_step.data
                break


            edge_count = 0
            while edge_count<=len(node_neighbor_new):
                if not is_fast:
                    node_embedding = message_passing(node_neighbor, node_embedding, model)
                    node_embedding_cat = torch.cat(node_embedding, dim=0)
                    graph_embedding = calc_graph_embedding(node_embedding_cat, model)

                # 4 f_addedge
                p_addedge = model.f_ae(graph_embedding)

                if edge_count < len(node_neighbor_new):
                    # calc loss
                    loss_addedge_step = F.binary_cross_entropy(p_addedge, Variable(torch.ones((1, 1))).cuda())
                    # loss_addedge_step.backward(retain_graph=True)
                    loss += loss_addedge_step
                    loss_addedge += loss_addedge_step.data

                    # 5 f_nodes
                    # excluding the last node (which is the new node)
                    node_new_embedding_cat = node_embedding_cat[-1,:].expand(node_embedding_cat.size(0)-1,node_embedding_cat.size(1))
                    s_node = model.f_s(torch.cat((node_embedding_cat[0:-1,:],node_new_embedding_cat),dim=1))
                    p_node = F.softmax(s_node.permute(1,0))
                    # get ground truth
                    a_node = torch.zeros((1,p_node.size(1)))
                    # print('node_neighbor_new',node_neighbor_new, edge_count)
                    a_node[0,node_neighbor_new[edge_count]] = 1
                    a_node = Variable(a_node).cuda()
                    # add edge
                    node_neighbor[-1].append(node_neighbor_new[edge_count])
                    node_neighbor[node_neighbor_new[edge_count]].append(len(node_neighbor)-1)
                    # calc loss
                    loss_node_step = F.binary_cross_entropy(p_node,a_node)
                    # loss_node_step.backward(retain_graph=True)
                    loss += loss_node_step
                    loss_node += loss_node_step.data*p_node.size(1)

                else:
                    # calc loss
                    loss_addedge_step = F.binary_cross_entropy(p_addedge, Variable(torch.zeros((1, 1))).cuda())
                    # loss_addedge_step.backward(retain_graph=True)
                    loss += loss_addedge_step
                    loss_addedge += loss_addedge_step.data
                    break

                edge_count += 1
            node_count += 1


    loss_all = loss_addnode + loss_addedge + loss_node

    # if epoch % args.epochs_log==0:
    #     print('Epoch: {}/{}, train loss: {:.6f}, graph type: {}, hidden: {}'.format(
    #         epoch, args.epochs,loss_all[0], args.graph_type, args.node_embedding_size))


    return loss_all[0]/len(dataset)







 def test_DGMG_epoch(args, model, is_fast=False):
    model.eval()
    graph_num = args.test_graph_num
@@ -321,10 +199,11 @@ def test_DGMG_epoch(args, model, is_fast=False):
    for i in range(graph_num):
        # NOTE: when starting loop, we assume a node has already been generated
        node_neighbor = [[]]  # list of lists (first node is zero)
        node_embedding = [Variable(torch.ones(1,args.node_embedding_size)).cuda()] # list of torch tensors, each size: 1*hidden
        node_embedding = [
            Variable(torch.ones(1, args.node_embedding_size)).cuda()]  # list of torch tensors, each size: 1*hidden

        node_count = 1
        while node_count<=args.max_num_node:
        while node_count <= args.max_num_node:
            # 1 message passing
            # do 2 times message passing
            node_embedding = message_passing(node_neighbor, node_embedding, model)
@@ -338,7 +217,7 @@ def test_DGMG_epoch(args, model, is_fast=False):
            p_addnode = model.f_an(graph_embedding)
            a_addnode = sample_tensor(p_addnode)
            # print(a_addnode.data[0][0])
            if a_addnode.data[0][0]==1:
            if a_addnode.data[0][0] == 1:
                # print('add node')
                # add node
                node_neighbor.append([])
@@ -349,7 +228,7 @@ def test_DGMG_epoch(args, model, is_fast=False):
                break

            edge_count = 0
            while edge_count<args.max_num_node:
            while edge_count < args.max_num_node:
                if not is_fast:
                    node_embedding = message_passing(node_neighbor, node_embedding, model)
                    node_embedding_cat = torch.cat(node_embedding, dim=0)
@@ -360,19 +239,20 @@ def test_DGMG_epoch(args, model, is_fast=False):
                a_addedge = sample_tensor(p_addedge)
                # print(a_addedge.data[0][0])

                if a_addedge.data[0][0]==1:
                if a_addedge.data[0][0] == 1:
                    # print('add edge')
                    # 5 f_nodes
                    # excluding the last node (which is the new node)
                    node_new_embedding_cat = node_embedding_cat[-1,:].expand(node_embedding_cat.size(0)-1,node_embedding_cat.size(1))
                    s_node = model.f_s(torch.cat((node_embedding_cat[0:-1,:],node_new_embedding_cat),dim=1))
                    p_node = F.softmax(s_node.permute(1,0))
                    node_new_embedding_cat = node_embedding_cat[-1, :].expand(node_embedding_cat.size(0) - 1,
                                                                              node_embedding_cat.size(1))
                    s_node = model.f_s(torch.cat((node_embedding_cat[0:-1, :], node_new_embedding_cat), dim=1))
                    p_node = F.softmax(s_node.permute(1, 0))
                    a_node = gumbel_softmax(p_node, temperature=0.01)
                    _, a_node_id = a_node.topk(1)
                    a_node_id = int(a_node_id.data[0][0])
                    # add edge
                    node_neighbor[-1].append(a_node_id)
                    node_neighbor[a_node_id].append(len(node_neighbor)-1)
                    node_neighbor[a_node_id].append(len(node_neighbor) - 1)
                else:
                    break

@@ -386,14 +266,139 @@ def test_DGMG_epoch(args, model, is_fast=False):
    return graphs_generated


 def test_DGMG_2(args, model, test_graph, is_fast=False):
    model.eval()
    graph_num = args.test_graph_num

    graphs_generated = []
    # for i in range(graph_num):
    # NOTE: when starting loop, we assume a node has already been generated
    node_neighbor = [[]]  # list of lists (first node is zero)
    node_embedding = [
        Variable(torch.ones(1, args.node_embedding_size)).cuda()]  # list of torch tensors, each size: 1*hidden

    node_max = len(test_graph.nodes())
    node_count = 1
    while node_count <= node_max:
        # 1 message passing
        # do 2 times message passing
        node_embedding = message_passing(node_neighbor, node_embedding, model)

        # 2 graph embedding and new node embedding
        node_embedding_cat = torch.cat(node_embedding, dim=0)
        graph_embedding = calc_graph_embedding(node_embedding_cat, model)
        init_embedding = calc_init_embedding(node_embedding_cat, model)

        # 3 f_addnode
        p_addnode = model.f_an(graph_embedding)
        a_addnode = sample_tensor(p_addnode)

        if a_addnode.data[0][0] == 1:
            # add node
            node_neighbor.append([])
            node_embedding.append(init_embedding)
            if is_fast:
                node_embedding_cat = torch.cat(node_embedding, dim=0)
        else:
            break

        edge_count = 0
        while edge_count < args.max_num_node:
            if not is_fast:
                node_embedding = message_passing(node_neighbor, node_embedding, model)
                node_embedding_cat = torch.cat(node_embedding, dim=0)
                graph_embedding = calc_graph_embedding(node_embedding_cat, model)

            # 4 f_addedge
            p_addedge = model.f_ae(graph_embedding)
            a_addedge = sample_tensor(p_addedge)

            if a_addedge.data[0][0] == 1:
                # 5 f_nodes
                # excluding the last node (which is the new node)
                node_new_embedding_cat = node_embedding_cat[-1, :].expand(node_embedding_cat.size(0) - 1,
                                                                          node_embedding_cat.size(1))
                s_node = model.f_s(torch.cat((node_embedding_cat[0:-1, :], node_new_embedding_cat), dim=1))
                p_node = F.softmax(s_node.permute(1, 0))
                a_node = gumbel_softmax(p_node, temperature=0.01)
                _, a_node_id = a_node.topk(1)
                a_node_id = int(a_node_id.data[0][0])
                # add edge

                node_neighbor[-1].append(a_node_id)
                node_neighbor[a_node_id].append(len(node_neighbor) - 1)
            else:
                break

            edge_count += 1
        node_count += 1

    # clear node_neighbor and build it again
    node_neighbor = []
    for n in range(node_max):
        temp_neighbor = [k for k in test_graph.edge[n]]
        node_neighbor.append(temp_neighbor)

    # now add the last node for real
    # 1 message passing
    # do 2 times message passing
    try:
        node_embedding = message_passing(node_neighbor, node_embedding, model)

        # 2 graph embedding and new node embedding
        node_embedding_cat = torch.cat(node_embedding, dim=0)
        graph_embedding = calc_graph_embedding(node_embedding_cat, model)
        init_embedding = calc_init_embedding(node_embedding_cat, model)

        # 3 f_addnode
        p_addnode = model.f_an(graph_embedding)
        a_addnode = sample_tensor(p_addnode)

        if a_addnode.data[0][0] == 1:
            # add node
            node_neighbor.append([])
            node_embedding.append(init_embedding)
            if is_fast:
                node_embedding_cat = torch.cat(node_embedding, dim=0)

        edge_count = 0
        while edge_count < args.max_num_node:
            if not is_fast:
                node_embedding = message_passing(node_neighbor, node_embedding, model)
                node_embedding_cat = torch.cat(node_embedding, dim=0)
                graph_embedding = calc_graph_embedding(node_embedding_cat, model)

            # 4 f_addedge
            p_addedge = model.f_ae(graph_embedding)
            a_addedge = sample_tensor(p_addedge)

            if a_addedge.data[0][0] == 1:
                # 5 f_nodes
                # excluding the last node (which is the new node)
                node_new_embedding_cat = node_embedding_cat[-1, :].expand(node_embedding_cat.size(0) - 1,
                                                                          node_embedding_cat.size(1))
                s_node = model.f_s(torch.cat((node_embedding_cat[0:-1, :], node_new_embedding_cat), dim=1))
                p_node = F.softmax(s_node.permute(1, 0))
                a_node = gumbel_softmax(p_node, temperature=0.01)
                _, a_node_id = a_node.topk(1)
                a_node_id = int(a_node_id.data[0][0])
                # add edge

                node_neighbor[-1].append(a_node_id)
                node_neighbor[a_node_id].append(len(node_neighbor) - 1)
            else:
                break

            edge_count += 1
        node_count += 1
    except:
        print('error')
    # save graph
    node_neighbor_dict = dict(zip(list(range(len(node_neighbor))), node_neighbor))
    graph = nx.from_dict_of_lists(node_neighbor_dict)
    graphs_generated.append(graph)





    return graphs_generated


 ########### train function for LSTM + VAE
@@ -422,10 +427,10 @@ def train_DGMG(args, dataset_train, model):
        train_DGMG_epoch(epoch, args, model, dataset_train, optimizer, scheduler, is_fast=args.is_fast)
        time_end = tm.time()
        time_all[epoch - 1] = time_end - time_start
        # print('time used',time_all[epoch - 1])
        print('time used', time_all[epoch - 1])
        # test
        if epoch % args.epochs_test == 0 and epoch >= args.epochs_test_start:
            graphs = test_DGMG_epoch(args,model, is_fast=args.is_fast)
            graphs = test_DGMG_epoch(args, model, is_fast=args.is_fast)
            fname = args.graph_save_path + args.fname_pred + str(epoch) + '.dat'
            save_graph_list(graphs, fname)
            # print('test done, graphs saved')
@@ -439,104 +444,178 @@ def train_DGMG(args, dataset_train, model):
    np.save(args.timing_save_path + args.fname, time_all)


 def neigh_to_mat(neigh, size):
    ret_list = np.zeros(size)
    for i in neigh:
        ret_list[i] = 1
    return ret_list


 def calc_lable_result(test_graphs, returned_graphs):
    labels = []
    results = []
    i = 0
    for test_graph in test_graphs:
        n = len(test_graph.nodes())
        returned_graph = returned_graphs[i]
        label = neigh_to_mat([k for k in test_graph.edge[n - 1]], n)
        try:
            result = neigh_to_mat([k for k in returned_graph.edge[n - 1]], n)
        except:
            result = np.zeros(n)
        labels.append(label)
        results.append(result)
        i += 1
    return labels, results


 def evaluate(labels, results):
    mae_list = []
    roc_score_list = []
    ap_score_list = []
    precision_list = []
    recall_list = []
    iter = 0
    for result in results:
        label = labels[iter]
        iter += 1
        part1 = label[result == 1]
        part2 = part1[part1 == 1]
        part3 = part1[part1 == 0]
        part4 = label[result == 0]
        part5 = part4[part4 == 1]
        tp = len(part2)
        fp = len(part3)
        fn = part5.sum()
        if tp + fp > 0:
            precision = tp / (tp + fp)
        else:
            precision = 0
        recall = tp / (tp + fn)
        precision_list.append(precision)
        recall_list.append(recall)

        positive = result[label == 1]
        if len(positive) <= len(list(result[label == 0])):
            negative = random.sample(list(result[label == 0]), len(positive))
        else:
            negative = result[label == 0]
            positive = random.sample(list(result[label == 1]), len(negative))
        preds_all = np.hstack([positive, negative])
        labels_all = np.hstack([np.ones(len(positive)), np.zeros(len(positive))])

        if len(labels_all) > 0:
            roc_score = roc_auc_score(labels_all, preds_all)
            ap_score = average_precision_score(labels_all, preds_all)

        roc_score_list.append(roc_score)
        ap_score_list.append(ap_score)

        mae = 0
        for x in range(len(result)):
            if result[x] != label[x]:
                mae += 1

        mae = mae / len(label)
        mae_list.append(mae)

    mean_roc = mean(roc_score_list)
    mean_ap = mean(ap_score_list)
    mean_precision = mean(precision_list)
    mean_recall = mean(recall_list)
    mean_mae = mean(mae_list)
    print('roc_score ' + str(mean_roc))
    print('ap_score ' + str(mean_ap))
    print('precision ' + str(mean_precision))
    print('recall ' + str(mean_recall))
    print('mae ' + str(mean_mae))
    return mean_roc, mean_ap, mean_precision, mean_recall


 def load_data(dataset, degree_as_tag):
    '''
        dataset: name of dataset
        test_proportion: ratio of test train split
        seed: random seed for random splitting of dataset
    '''

    print('loading data')
    g_list = []
    label_dict = {}
    feat_dict = {}

    with open('dataset/%s/%s.txt' % (dataset, dataset), 'r') as f:
        n_g = int(f.readline().strip())
        for i in range(n_g):
            row = f.readline().strip().split()
            n, l = [int(w) for w in row]
            if not l in label_dict:
                mapped = len(label_dict)
                label_dict[l] = mapped
            g = nx.Graph()
            node_tags = []
            node_features = []
            n_edges = 0
            for j in range(n):
                g.add_node(j)
                row = f.readline().strip().split()
                tmp = int(row[1]) + 2
                if tmp == len(row):
                    # no node attributes
                    row = [int(w) for w in row]
                    attr = None
                else:
                    row, attr = [int(w) for w in row[:tmp]], np.array([float(w) for w in row[tmp:]])
                if not row[0] in feat_dict:
                    mapped = len(feat_dict)
                    feat_dict[row[0]] = mapped
                node_tags.append(feat_dict[row[0]])

                if tmp > len(row):
                    node_features.append(attr)

                n_edges += row[1]
                for k in range(2, len(row)):
                    g.add_edge(j, row[k])

            if node_features != []:
                node_features = np.stack(node_features)
                node_feature_flag = True
            else:
                node_features = None
                node_feature_flag = False

            assert len(g) == n

            g_list.append(g)



 ########### train function for LSTM + VAE
 def train_DGMG_nll(args, dataset_train,dataset_test, model,max_iter=1000):
    # check if load existing model
    fname = args.model_save_path + args.fname + 'model_' + str(args.load_epoch) + '.dat'
    model.load_state_dict(torch.load(fname))

    fname_output = args.nll_save_path + args.note + '_' + args.graph_type + '.csv'
    with open(fname_output, 'w+') as f:
        f.write('train,test\n')
        # start main loop
        for iter in range(max_iter):
            nll_train = train_DGMG_forward_epoch(args, model, dataset_train, is_fast=args.is_fast)
            nll_test = train_DGMG_forward_epoch(args, model, dataset_test, is_fast=args.is_fast)
            print('train', nll_train, 'test', nll_test)
            f.write(str(nll_train) + ',' + str(nll_test) + '\n')



    return g_list, len(label_dict)


 if __name__ == '__main__':

    my_graph = nx.Graph()
    edges = nx.read_edgelist("data/main_graphs_COLLAB/my.txt")
    my_graph.add_edges_from(edges.edges())
    integers = nx.convert_node_labels_to_integers(my_graph)
    d_graph = nx.grid_2d_graph(2, 3)
    integers2 = nx.convert_node_labels_to_integers(d_graph)
    args = Args_DGMG()
    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.cuda)
    print('CUDA', args.cuda)
    print('File name prefix',args.fname)

    print('File name prefix', args.fname)

    graphs = []
    for i in range(4, 10):
        graphs.append(nx.ladder_graph(i))
    model = DGM_graphs(h_size = args.node_embedding_size).cuda()
    model = DGM_graphs(h_size=args.node_embedding_size).cuda()

    if args.graph_type == 'ladder_small':
        graphs = []
        for i in range(2, 11):
            graphs.append(nx.ladder_graph(i))
        args.max_prev_node = 10
    # if args.graph_type == 'caveman_small':
    #     graphs = []
    #     for i in range(2, 5):
    #         for j in range(2, 6):
    #             for k in range(10):
    #                 graphs.append(nx.relaxed_caveman_graph(i, j, p=0.1))
    #     args.max_prev_node = 20
    if args.graph_type=='caveman_small':
        graphs = []
        for i in range(2, 3):
            for j in range(6, 11):
                for k in range(20):
                    graphs.append(caveman_special(i, j, p_edge=0.8))
        args.max_prev_node = 20
    if args.graph_type == 'grid_small':
        graphs = []
        for i in range(2, 4):
        for i in range(2, 3):
            for j in range(2, 4):
                graphs.append(nx.grid_2d_graph(i, j))
        args.max_prev_node = 15
    if args.graph_type == 'barabasi_small':
        graphs = []
        for i in range(4, 21):
            for j in range(3, 4):
                for k in range(10):
                    graphs.append(nx.barabasi_albert_graph(i, j))
        args.max_prev_node = 20

    if args.graph_type == 'enzymes_small':
        graphs_raw = Graph_load_batch(min_num_nodes=10, name='ENZYMES')
        graphs = []
        for G in graphs_raw:
            if G.number_of_nodes()<=20:
                graphs.append(G)
        args.max_prev_node = 15

    if args.graph_type == 'citeseer_small':
        _, _, G = Graph_load(dataset='citeseer')
        G = max(nx.connected_component_subgraphs(G), key=len)
        G = nx.convert_node_labels_to_integers(G)
        graphs = []
        for i in range(G.number_of_nodes()):
            G_ego = nx.ego_graph(G, i, radius=1)
            if (G_ego.number_of_nodes() >= 4) and (G_ego.number_of_nodes() <= 20):
                graphs.append(G_ego)
        shuffle(graphs)
        graphs = graphs[0:200]
        args.max_prev_node = 15
    else:
        graphs, num_classes = load_data(args.graph_type, True)
        small_graphs = []
        for i in range(len(graphs)):
            if graphs[i].number_of_nodes() < 13:
                small_graphs.append(graphs[i])
        graphs = small_graphs
        args.max_prev_node = 12
        args.max_prev_node = 5

    # remove self loops
    for graph in graphs:
@@ -547,58 +626,22 @@ if __name__ == '__main__':
    # split datasets
    random.seed(123)
    shuffle(graphs)
    graphs_len = len(graphs)
    graphs_test = graphs[int(0.8 * graphs_len):]
    graphs_train = graphs[0:int(0.8 * graphs_len)]
    # graphs_len = len(graphs)
    # graphs_test = graphs[int(0.8 * graphs_len):]
    # graphs_validate = graphs[int(0.7 * graphs_len):int(0.8 * graphs_len)]
    # graphs_train = graphs[0:int(0.7 * graphs_len)]

    args.max_num_node = max([graphs[i].number_of_nodes() for i in range(len(graphs))])
    # args.max_num_node = 2000
    # show graphs statistics
    print('total graph num: {}, training set: {}'.format(len(graphs), len(graphs_train)))

    print('max number node: {}'.format(args.max_num_node))
    print('max previous node: {}'.format(args.max_prev_node))
    test_graph = nx.grid_2d_graph(2, 3)
    test_graph.remove_node(test_graph.nodes()[5])
    train_DGMG(args, graphs, model)

    # save ground truth graphs
    # save_graph_list(graphs, args.graph_save_path + args.fname_train + '0.dat')
    # save_graph_list(graphs, args.graph_save_path + args.fname_test + '0.dat')
    # print('train and test graphs saved')

    ## if use pre-saved graphs
    # dir_input = "graphs/"
    # fname_test = args.graph_save_path + args.fname_test + '0.dat'
    # graphs = load_graph_list(fname_test, is_real=True)
    # graphs_test = graphs[int(0.8 * graphs_len):]
    # graphs_train = graphs[0:int(0.8 * graphs_len)]
    # graphs_validate = graphs[0:int(0.2 * graphs_len)]

    # print('train')
    # for graph in graphs_validate:
    #     print(graph.number_of_nodes())
    # print('test')
    # for graph in graphs_test:
    #     print(graph.number_of_nodes())



    ### train
    train_DGMG(args,graphs,model)

    ### calc nll
    # train_DGMG_nll(args, graphs_validate,graphs_test, model,max_iter=1000)







    test_graph = nx.convert_node_labels_to_integers(test_graph)
    test_DGMG_2(args, model, test_graph)

    # labels, results = calc_lable_result(test_graphs, eval_graphs)

    # for j in range(1000):
    #     graph = graphs[0]
    #     # do random ordering: relabel nodes
    #     node_order = list(range(graph.number_of_nodes()))
    #     shuffle(node_order)
    #     order_mapping = dict(zip(graph.nodes(), node_order))
    #     graph = nx.relabel_nodes(graph, order_mapping, copy=True)
    #     print(graph.nodes())
    # evaluate(labels, results)