5 years ago · 2a366a282d
--- a/main_DeepGMG.py
+++ b/main_DeepGMG.py
@@ -1,6 +1,13 @@
 # an implementation for "Learning Deep Generative Models of Graphs"
 import os

 import random
 from statistics import mean

 import networkx as nx
 import numpy as np
 from sklearn.metrics import roc_auc_score, average_precision_score

 from main import *


@@ -264,132 +271,132 @@ def test_DGMG_2(args, model, test_graph, is_fast=False):
    graph_num = args.test_graph_num

    graphs_generated = []
    for i in range(graph_num):
        # NOTE: when starting loop, we assume a node has already been generated
        node_neighbor = [[]]  # list of lists (first node is zero)
        node_embedding = [
            Variable(torch.ones(1, args.node_embedding_size)).cuda()]  # list of torch tensors, each size: 1*hidden

        node_max = len(test_graph.nodes())
        node_count = 1
        while node_count <= node_max:
            # 1 message passing
            # do 2 times message passing
            node_embedding = message_passing(node_neighbor, node_embedding, model)

            # 2 graph embedding and new node embedding
            node_embedding_cat = torch.cat(node_embedding, dim=0)
            graph_embedding = calc_graph_embedding(node_embedding_cat, model)
            init_embedding = calc_init_embedding(node_embedding_cat, model)

            # 3 f_addnode
            p_addnode = model.f_an(graph_embedding)
            a_addnode = sample_tensor(p_addnode)

            if a_addnode.data[0][0] == 1:
                # add node
                node_neighbor.append([])
                node_embedding.append(init_embedding)
                if is_fast:
                    node_embedding_cat = torch.cat(node_embedding, dim=0)
            else:
                break

            edge_count = 0
            while edge_count < args.max_num_node:
                if not is_fast:
                    node_embedding = message_passing(node_neighbor, node_embedding, model)
                    node_embedding_cat = torch.cat(node_embedding, dim=0)
                    graph_embedding = calc_graph_embedding(node_embedding_cat, model)

                # 4 f_addedge
                p_addedge = model.f_ae(graph_embedding)
                a_addedge = sample_tensor(p_addedge)

                if a_addedge.data[0][0] == 1:
                    # 5 f_nodes
                    # excluding the last node (which is the new node)
                    node_new_embedding_cat = node_embedding_cat[-1, :].expand(node_embedding_cat.size(0) - 1,
                                                                              node_embedding_cat.size(1))
                    s_node = model.f_s(torch.cat((node_embedding_cat[0:-1, :], node_new_embedding_cat), dim=1))
                    p_node = F.softmax(s_node.permute(1, 0))
                    a_node = gumbel_softmax(p_node, temperature=0.01)
                    _, a_node_id = a_node.topk(1)
                    a_node_id = int(a_node_id.data[0][0])
                    # add edge

                    node_neighbor[-1].append(a_node_id)
                    node_neighbor[a_node_id].append(len(node_neighbor) - 1)
                else:
                    break

                edge_count += 1
            node_count += 1

        # clear node_neighbor and build it again
        node_neighbor = []
        for n in range(node_max):
            temp_neighbor = [k for k in test_graph.edge[n]]
            node_neighbor.append(temp_neighbor)

        # now add the last node for real
    # for i in range(graph_num):
    # NOTE: when starting loop, we assume a node has already been generated
    node_neighbor = [[]]  # list of lists (first node is zero)
    node_embedding = [
        Variable(torch.ones(1, args.node_embedding_size)).cuda()]  # list of torch tensors, each size: 1*hidden

    node_max = len(test_graph.nodes())
    node_count = 1
    while node_count <= node_max:
        # 1 message passing
        # do 2 times message passing
        try:
            node_embedding = message_passing(node_neighbor, node_embedding, model)

            # 2 graph embedding and new node embedding
            node_embedding_cat = torch.cat(node_embedding, dim=0)
            graph_embedding = calc_graph_embedding(node_embedding_cat, model)
            init_embedding = calc_init_embedding(node_embedding_cat, model)

            # 3 f_addnode
            p_addnode = model.f_an(graph_embedding)
            a_addnode = sample_tensor(p_addnode)

            if a_addnode.data[0][0] == 1:
                # add node
                node_neighbor.append([])
                node_embedding.append(init_embedding)
                if is_fast:
                    node_embedding_cat = torch.cat(node_embedding, dim=0)

            edge_count = 0
            while edge_count < args.max_num_node:
                if not is_fast:
                    node_embedding = message_passing(node_neighbor, node_embedding, model)
                    node_embedding_cat = torch.cat(node_embedding, dim=0)
                    graph_embedding = calc_graph_embedding(node_embedding_cat, model)

                # 4 f_addedge
                p_addedge = model.f_ae(graph_embedding)
                a_addedge = sample_tensor(p_addedge)

                if a_addedge.data[0][0] == 1:
                    # 5 f_nodes
                    # excluding the last node (which is the new node)
                    node_new_embedding_cat = node_embedding_cat[-1, :].expand(node_embedding_cat.size(0) - 1,
                                                                              node_embedding_cat.size(1))
                    s_node = model.f_s(torch.cat((node_embedding_cat[0:-1, :], node_new_embedding_cat), dim=1))
                    p_node = F.softmax(s_node.permute(1, 0))
                    a_node = gumbel_softmax(p_node, temperature=0.01)
                    _, a_node_id = a_node.topk(1)
                    a_node_id = int(a_node_id.data[0][0])
                    # add edge
        node_embedding = message_passing(node_neighbor, node_embedding, model)

        # 2 graph embedding and new node embedding
        node_embedding_cat = torch.cat(node_embedding, dim=0)
        graph_embedding = calc_graph_embedding(node_embedding_cat, model)
        init_embedding = calc_init_embedding(node_embedding_cat, model)

        # 3 f_addnode
        p_addnode = model.f_an(graph_embedding)
        a_addnode = sample_tensor(p_addnode)

        if a_addnode.data[0][0] == 1:
            # add node
            node_neighbor.append([])
            node_embedding.append(init_embedding)
            if is_fast:
                node_embedding_cat = torch.cat(node_embedding, dim=0)
        else:
            break

        edge_count = 0
        while edge_count < args.max_num_node:
            if not is_fast:
                node_embedding = message_passing(node_neighbor, node_embedding, model)
                node_embedding_cat = torch.cat(node_embedding, dim=0)
                graph_embedding = calc_graph_embedding(node_embedding_cat, model)

            # 4 f_addedge
            p_addedge = model.f_ae(graph_embedding)
            a_addedge = sample_tensor(p_addedge)

            if a_addedge.data[0][0] == 1:
                # 5 f_nodes
                # excluding the last node (which is the new node)
                node_new_embedding_cat = node_embedding_cat[-1, :].expand(node_embedding_cat.size(0) - 1,
                                                                          node_embedding_cat.size(1))
                s_node = model.f_s(torch.cat((node_embedding_cat[0:-1, :], node_new_embedding_cat), dim=1))
                p_node = F.softmax(s_node.permute(1, 0))
                a_node = gumbel_softmax(p_node, temperature=0.01)
                _, a_node_id = a_node.topk(1)
                a_node_id = int(a_node_id.data[0][0])
                # add edge

                node_neighbor[-1].append(a_node_id)
                node_neighbor[a_node_id].append(len(node_neighbor) - 1)
            else:
                break

                    node_neighbor[-1].append(a_node_id)
                    node_neighbor[a_node_id].append(len(node_neighbor) - 1)
                else:
                    break
            edge_count += 1
        node_count += 1

    # clear node_neighbor and build it again
    node_neighbor = []
    for n in range(node_max):
        temp_neighbor = [k for k in test_graph.edge[n]]
        node_neighbor.append(temp_neighbor)

    # now add the last node for real
    # 1 message passing
    # do 2 times message passing
    try:
        node_embedding = message_passing(node_neighbor, node_embedding, model)

        # 2 graph embedding and new node embedding
        node_embedding_cat = torch.cat(node_embedding, dim=0)
        graph_embedding = calc_graph_embedding(node_embedding_cat, model)
        init_embedding = calc_init_embedding(node_embedding_cat, model)

        # 3 f_addnode
        p_addnode = model.f_an(graph_embedding)
        a_addnode = sample_tensor(p_addnode)

        if a_addnode.data[0][0] == 1:
            # add node
            node_neighbor.append([])
            node_embedding.append(init_embedding)
            if is_fast:
                node_embedding_cat = torch.cat(node_embedding, dim=0)

        edge_count = 0
        while edge_count < args.max_num_node:
            if not is_fast:
                node_embedding = message_passing(node_neighbor, node_embedding, model)
                node_embedding_cat = torch.cat(node_embedding, dim=0)
                graph_embedding = calc_graph_embedding(node_embedding_cat, model)

            # 4 f_addedge
            p_addedge = model.f_ae(graph_embedding)
            a_addedge = sample_tensor(p_addedge)

            if a_addedge.data[0][0] == 1:
                # 5 f_nodes
                # excluding the last node (which is the new node)
                node_new_embedding_cat = node_embedding_cat[-1, :].expand(node_embedding_cat.size(0) - 1,
                                                                          node_embedding_cat.size(1))
                s_node = model.f_s(torch.cat((node_embedding_cat[0:-1, :], node_new_embedding_cat), dim=1))
                p_node = F.softmax(s_node.permute(1, 0))
                a_node = gumbel_softmax(p_node, temperature=0.01)
                _, a_node_id = a_node.topk(1)
                a_node_id = int(a_node_id.data[0][0])
                # add edge

                node_neighbor[-1].append(a_node_id)
                node_neighbor[a_node_id].append(len(node_neighbor) - 1)
            else:
                break

                edge_count += 1
            node_count += 1
        except:
            print('error')
        # save graph
        node_neighbor_dict = dict(zip(list(range(len(node_neighbor))), node_neighbor))
        graph = nx.from_dict_of_lists(node_neighbor_dict)
        graphs_generated.append(graph)
            edge_count += 1
        node_count += 1
    except:
        print('error')
    # save graph
    node_neighbor_dict = dict(zip(list(range(len(node_neighbor))), node_neighbor))
    graph = nx.from_dict_of_lists(node_neighbor_dict)
    graphs_generated.append(graph)

    return graphs_generated

@@ -437,7 +444,96 @@ def train_DGMG(args, dataset_train, model):
    np.save(args.timing_save_path + args.fname, time_all)


 def neigh_to_mat(neigh, size):
    ret_list = np.zeros(size)
    for i in neigh:
        ret_list[i] = 1
    return ret_list


 def calc_lable_result(test_graphs, returned_graphs):
    labels = []
    results = []
    i = 0
    for test_graph in test_graphs:
        n = len(test_graph.nodes())
        returned_graph = returned_graphs[i]
        label = neigh_to_mat([k for k in test_graph.edge[n - 1]], n)
        try:
            result = neigh_to_mat([k for k in returned_graph.edge[n - 1]], n)
        except:
            result = np.zeros(n)
        labels.append(label)
        results.append(result)
        i += 1
    return labels, results


 def evaluate(labels, results):
    mae_list = []
    roc_score_list = []
    ap_score_list = []
    precision_list = []
    recall_list = []
    iter = 0
    for result in results:
        label = labels[iter]
        iter += 1
        part1 = label[result == 1]
        part2 = part1[part1 == 1]
        part3 = part1[part1 == 0]
        part4 = label[result == 0]
        part5 = part4[part4 == 1]
        tp = len(part2)
        fp = len(part3)
        fn = part5.sum()
        if tp + fp > 0:
            precision = tp / (tp + fp)
        else:
            precision = 0
        recall = tp / (tp + fn)
        precision_list.append(precision)
        recall_list.append(recall)

        positive = result[label == 1]
        if len(positive) <= len(list(result[label == 0])):
            negative = random.sample(list(result[label == 0]), len(positive))
        else:
            negative = result[label == 0]
            positive = random.sample(list(result[label == 1]), len(negative))
        preds_all = np.hstack([positive, negative])
        labels_all = np.hstack([np.ones(len(positive)), np.zeros(len(positive))])

        if len(labels_all) > 0:
            roc_score = roc_auc_score(labels_all, preds_all)
            ap_score = average_precision_score(labels_all, preds_all)

        roc_score_list.append(roc_score)
        ap_score_list.append(ap_score)

        mae = 0
        for x in range(len(result)):
            if result[x] != label[x]:
                mae += 1

        mae = mae / len(label)
        mae_list.append(mae)

    mean_roc = mean(roc_score_list)
    mean_ap = mean(ap_score_list)
    mean_precision = mean(precision_list)
    mean_recall = mean(recall_list)
    mean_mae = mean(mae_list)
    print('roc_score ' + str(mean_roc))
    print('ap_score ' + str(mean_ap))
    print('precision ' + str(mean_precision))
    print('recall ' + str(mean_recall))
    print('mae ' + str(mean_mae))
    return mean_roc, mean_ap, mean_precision, mean_recall


 if __name__ == '__main__':

    args = Args_DGMG()
    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.cuda)
    print('CUDA', args.cuda)
@@ -453,7 +549,7 @@ if __name__ == '__main__':
        for i in range(2, 3):
            for j in range(2, 4):
                graphs.append(nx.grid_2d_graph(i, j))
        args.max_prev_node = 6
        args.max_prev_node = 5

    # remove self loops
    for graph in graphs:
@@ -479,3 +575,7 @@ if __name__ == '__main__':

    test_graph = nx.convert_node_labels_to_integers(test_graph)
    test_DGMG_2(args, model, test_graph)

    labels, results = calc_lable_result(test_graphs, eval_graphs)

    evaluate(labels, results)