1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555 |
- from __future__ import unicode_literals, print_function, division
- from io import open
- import unicodedata
- import string
- import re
- import random
-
- import torch
- import torch.nn as nn
- from torch.autograd import Variable
- from torch import optim
- import torch.nn.functional as F
- import torch.nn.init as init
- from torch.nn.utils.rnn import pad_packed_sequence, pack_padded_sequence
-
- from collections import OrderedDict
- import math
- import numpy as np
- import time
-
-
- def binary_cross_entropy_weight(y_pred, y, has_weight=False, weight_length=1, weight_max=10):
- '''
-
- :param y_pred:
- :param y:
- :param weight_length: how long until the end of sequence shall we add weight
- :param weight_value: the magnitude that the weight is enhanced
- :return:
- '''
- if has_weight:
- weight = torch.ones(y.size(0), y.size(1), y.size(2))
- weight_linear = torch.arange(1, weight_length + 1) / weight_length * weight_max
- weight_linear = weight_linear.view(1, weight_length, 1).repeat(y.size(0), 1, y.size(2))
- weight[:, -1 * weight_length:, :] = weight_linear
- loss = F.binary_cross_entropy(y_pred, y, weight=weight.cuda())
- else:
- loss = F.binary_cross_entropy(y_pred, y)
- return loss
-
-
- def sample_tensor(y, sample=True, thresh=0.5):
- # do sampling
- if sample:
- y_thresh = Variable(torch.rand(y.size())).cuda()
- y_result = torch.gt(y, y_thresh).float()
- # do max likelihood based on some threshold
- else:
- y_thresh = Variable(torch.ones(y.size()) * thresh).cuda()
- y_result = torch.gt(y, y_thresh).float()
- return y_result
-
-
- def gumbel_softmax(logits, temperature, eps=1e-9):
- '''
-
- :param logits: shape: N*L
- :param temperature:
- :param eps:
- :return:
- '''
- # get gumbel noise
- noise = torch.rand(logits.size())
- noise.add_(eps).log_().neg_()
- noise.add_(eps).log_().neg_()
- noise = Variable(noise).cuda()
-
- x = (logits + noise) / temperature
- x = F.softmax(x)
- return x
-
-
- # for i in range(10):
- # x = Variable(torch.randn(1,10)).cuda()
- # y = gumbel_softmax(x, temperature=0.01)
- # print(x)
- # print(y)
- # _,id = y.topk(1)
- # print(id)
-
-
- def gumbel_sigmoid(logits, temperature):
- '''
-
- :param logits:
- :param temperature:
- :param eps:
- :return:
- '''
- # get gumbel noise
- noise = torch.rand(logits.size()) # uniform(0,1)
- noise_logistic = torch.log(noise) - torch.log(1 - noise) # logistic(0,1)
- noise = Variable(noise_logistic).cuda()
-
- x = (logits + noise) / temperature
- x = F.sigmoid(x)
- return x
-
-
- # x = Variable(torch.randn(100)).cuda()
- # y = gumbel_sigmoid(x,temperature=0.01)
- # print(x)
- # print(y)
-
- def sample_sigmoid(y, sample, thresh=0.5, sample_time=2):
- '''
- do sampling over unnormalized score
- :param y: input
- :param sample: Bool
- :param thresh: if not sample, the threshold
- :param sampe_time: how many times do we sample, if =1, do single sample
- :return: sampled result
- '''
-
- # do sigmoid first
- y = torch.sigmoid(y)
- # do sampling
- if sample:
- if sample_time > 1:
- y_result = Variable(torch.rand(y.size(0), y.size(1), y.size(2))).cuda()
- # loop over all batches
- for i in range(y_result.size(0)):
- # do 'multi_sample' times sampling
- for j in range(sample_time):
- y_thresh = Variable(torch.rand(y.size(1), y.size(2))).cuda()
- y_result[i] = torch.gt(y[i], y_thresh).float()
- if (torch.sum(y_result[i]).data > 0).any():
- break
- # else:
- # print('all zero',j)
- else:
- y_thresh = Variable(torch.rand(y.size(0), y.size(1), y.size(2))).cuda()
- y_result = torch.gt(y, y_thresh).float()
- # do max likelihood based on some threshold
- else:
- y_thresh = Variable(torch.ones(y.size(0), y.size(1), y.size(2)) * thresh).cuda()
- y_result = torch.gt(y, y_thresh).float()
- return y_result
-
-
- def sample_sigmoid_supervised(y_pred, y, current, y_len, sample_time=2):
- '''
- do sampling over unnormalized score
- :param y_pred: input
- :param y: supervision
- :param sample: Bool
- :param thresh: if not sample, the threshold
- :param sampe_time: how many times do we sample, if =1, do single sample
- :return: sampled result
- '''
-
- # do sigmoid first
- y_pred = F.sigmoid(y_pred)
- # do sampling
- y_result = Variable(torch.rand(y_pred.size(0), y_pred.size(1), y_pred.size(2))).cuda()
- # loop over all batches
- for i in range(y_result.size(0)):
- # using supervision
- if current < y_len[i]:
- while True:
- y_thresh = Variable(torch.rand(y_pred.size(1), y_pred.size(2))).cuda()
- y_result[i] = torch.gt(y_pred[i], y_thresh).float()
- # print('current',current)
- # print('y_result',y_result[i].data)
- # print('y',y[i])
- y_diff = y_result[i].data - y[i]
- if (y_diff >= 0).all():
- break
- # supervision done
- else:
- # do 'multi_sample' times sampling
- for j in range(sample_time):
- y_thresh = Variable(torch.rand(y_pred.size(1), y_pred.size(2))).cuda()
- y_result[i] = torch.gt(y_pred[i], y_thresh).float()
- if (torch.sum(y_result[i]).data > 0).any():
- break
- return y_result
-
-
- def sample_sigmoid_supervised_simple(y_pred, y, current, y_len, sample_time=2):
- '''
- do sampling over unnormalized score
- :param y_pred: input
- :param y: supervision
- :param sample: Bool
- :param thresh: if not sample, the threshold
- :param sampe_time: how many times do we sample, if =1, do single sample
- :return: sampled result
- '''
-
- # do sigmoid first
- y_pred = F.sigmoid(y_pred)
- # do sampling
- y_result = Variable(torch.rand(y_pred.size(0), y_pred.size(1), y_pred.size(2))).cuda()
- # loop over all batches
- for i in range(y_result.size(0)):
- # using supervision
- if current < y_len[i]:
- y_result[i] = y[i]
- # supervision done
- else:
- # do 'multi_sample' times sampling
- for j in range(sample_time):
- y_thresh = Variable(torch.rand(y_pred.size(1), y_pred.size(2))).cuda()
- y_result[i] = torch.gt(y_pred[i], y_thresh).float()
- if (torch.sum(y_result[i]).data > 0).any():
- break
- return y_result
-
-
- ################### current adopted model, LSTM+MLP || LSTM+VAE || LSTM+LSTM (where LSTM can be GRU as well)
- #####
- # definition of terms
- # h: hidden state of LSTM
- # y: edge prediction, model output
- # n: noise for generator
- # l: whether an output is real or not, binary
-
- # plain LSTM model
- class LSTM_plain(nn.Module):
- def __init__(self, input_size, embedding_size, hidden_size, num_layers, has_input=True, has_output=False,
- output_size=None):
- super(LSTM_plain, self).__init__()
- self.num_layers = num_layers
- self.hidden_size = hidden_size
- self.has_input = has_input
- self.has_output = has_output
-
- if has_input:
- self.input = nn.Linear(input_size, embedding_size)
- self.rnn = nn.LSTM(input_size=embedding_size, hidden_size=hidden_size, num_layers=num_layers,
- batch_first=True)
- else:
- self.rnn = nn.LSTM(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, batch_first=True)
- if has_output:
- self.output = nn.Sequential(
- nn.Linear(hidden_size, embedding_size),
- nn.ReLU(),
- nn.Linear(embedding_size, output_size)
- )
-
- self.relu = nn.ReLU()
- # initialize
- self.hidden = None # need initialize before forward run
-
- for name, param in self.rnn.named_parameters():
- if 'bias' in name:
- nn.init.constant(param, 0.25)
- elif 'weight' in name:
- nn.init.xavier_uniform(param, gain=nn.init.calculate_gain('sigmoid'))
- for m in self.modules():
- if isinstance(m, nn.Linear):
- m.weight.data = init.xavier_uniform(m.weight.data, gain=nn.init.calculate_gain('relu'))
-
- def init_hidden(self, batch_size):
- return (Variable(torch.zeros(self.num_layers, batch_size, self.hidden_size)).cuda(),
- Variable(torch.zeros(self.num_layers, batch_size, self.hidden_size)).cuda())
-
- def forward(self, input_raw, pack=False, input_len=None):
- if self.has_input:
- input = self.input(input_raw)
- input = self.relu(input)
- else:
- input = input_raw
- if pack:
- input = pack_padded_sequence(input, input_len, batch_first=True)
- output_raw, self.hidden = self.rnn(input, self.hidden)
- if pack:
- output_raw = pad_packed_sequence(output_raw, batch_first=True)[0]
- if self.has_output:
- output_raw = self.output(output_raw)
- # return hidden state at each time step
- return output_raw
-
-
- # plain GRU model
- class GRU_plain(nn.Module):
- def __init__(self, input_size, embedding_size, hidden_size, num_layers, has_input=True, has_output=False,
- output_size=None):
- super(GRU_plain, self).__init__()
- self.num_layers = num_layers
- self.hidden_size = hidden_size
- self.has_input = has_input
- self.has_output = has_output
-
- if has_input:
- self.input = nn.Linear(input_size, embedding_size)
- self.rnn = nn.GRU(input_size=embedding_size, hidden_size=hidden_size, num_layers=num_layers,
- batch_first=True)
- else:
- self.rnn = nn.GRU(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, batch_first=True)
- if has_output:
- self.output = nn.Sequential(
- nn.Linear(hidden_size, embedding_size),
- nn.ReLU(),
- nn.Linear(embedding_size, output_size)
- )
-
- self.relu = nn.ReLU()
- # initialize
- self.hidden = None # need initialize before forward run
-
- for name, param in self.rnn.named_parameters():
- if 'bias' in name:
- nn.init.constant(param, 0.25)
- elif 'weight' in name:
- nn.init.xavier_uniform(param, gain=nn.init.calculate_gain('sigmoid'))
- for m in self.modules():
- if isinstance(m, nn.Linear):
- m.weight.data = init.xavier_uniform(m.weight.data, gain=nn.init.calculate_gain('relu'))
-
- def init_hidden(self, batch_size):
- return Variable(torch.zeros(self.num_layers, batch_size, self.hidden_size)).cuda()
-
- def forward(self, input_raw, pack=False, input_len=None):
- if self.has_input:
- input = self.input(input_raw)
- input = self.relu(input)
- else:
- input = input_raw
- if pack:
- input = pack_padded_sequence(input, input_len, batch_first=True)
- output_raw, self.hidden = self.rnn(input, self.hidden)
- if pack:
- output_raw = pad_packed_sequence(output_raw, batch_first=True)[0]
- if self.has_output:
- output_raw = self.output(output_raw)
- # return hidden state at each time step
- return output_raw
-
-
- # a deterministic linear output
- class MLP_plain(nn.Module):
- def __init__(self, h_size, embedding_size, y_size):
- super(MLP_plain, self).__init__()
- self.deterministic_output = nn.Sequential(
- nn.Linear(h_size, embedding_size),
- nn.ReLU(),
- nn.Linear(embedding_size, y_size)
- )
-
- for m in self.modules():
- if isinstance(m, nn.Linear):
- m.weight.data = init.xavier_uniform(m.weight.data, gain=nn.init.calculate_gain('relu'))
-
- def forward(self, h):
- y = self.deterministic_output(h)
- return y
-
-
- # a deterministic linear output, additional output indicates if the sequence should continue grow
- class MLP_token_plain(nn.Module):
- def __init__(self, h_size, embedding_size, y_size):
- super(MLP_token_plain, self).__init__()
- self.deterministic_output = nn.Sequential(
- nn.Linear(h_size, embedding_size),
- nn.ReLU(),
- nn.Linear(embedding_size, y_size)
- )
- self.token_output = nn.Sequential(
- nn.Linear(h_size, embedding_size),
- nn.ReLU(),
- nn.Linear(embedding_size, 1)
- )
-
- for m in self.modules():
- if isinstance(m, nn.Linear):
- m.weight.data = init.xavier_uniform(m.weight.data, gain=nn.init.calculate_gain('relu'))
-
- def forward(self, h):
- y = self.deterministic_output(h)
- t = self.token_output(h)
- return y, t
-
-
- class Main_MLP_VAE_plain(nn.Module):
- def __init__(self, h_size, embedding_size, y_size):
- super(Main_MLP_VAE_plain, self).__init__()
- self.encode_11 = nn.Linear(h_size, embedding_size) # mu
- self.encode_12 = nn.Linear(h_size, embedding_size) # lsgms
-
- self.decode_1 = nn.Linear(embedding_size, embedding_size)
- self.decode_2 = nn.Linear(embedding_size, y_size) # make edge prediction (reconstruct)
- self.relu = nn.ReLU()
-
- for m in self.modules():
- if isinstance(m, nn.Linear):
- m.weight.data = init.xavier_uniform(m.weight.data, gain=nn.init.calculate_gain('relu'))
-
- def forward(self, h):
- # encoder
- z_mu = self.encode_11(h)
- z_lsgms = self.encode_12(h)
- # reparameterize
- z_sgm = z_lsgms.mul(0.5).exp_()
- eps = Variable(torch.randn(z_sgm.size())).cuda()
- z = eps * z_sgm + z_mu
- # decoder
- # print("### z ")
- # print(z)
- y = self.decode_1(z)
- # print("### decode_1 ")
- # print(y)
- y = self.relu(y)
- y = self.decode_2(y)
- # print("### decode_2 ")
- # print(y)
- return y, z_mu, z_lsgms
-
-
- # a deterministic linear output (update: add noise)
- class MLP_VAE_plain(nn.Module):
- def __init__(self, h_size, embedding_size, y_size):
- super(MLP_VAE_plain, self).__init__()
- self.encode_11 = nn.Linear(h_size, embedding_size) # mu
- self.encode_12 = nn.Linear(h_size, embedding_size) # lsgms
- self.decode_1 = nn.Linear(embedding_size, 32)
- self.linear = nn.Linear(h_size, y_size)
- self.bn1 = nn.BatchNorm1d(32)
- # self.decode_1_2 = nn.Linear(256, 512)
- # self.bn2 = nn.BatchNorm1d(512)
- self.decode_2 = nn.Linear(32, y_size) # make edge prediction (reconstruct)
- self.relu = nn.ReLU()
-
- for m in self.modules():
- if isinstance(m, nn.Linear):
- m.weight.data = init.xavier_uniform(m.weight.data, gain=nn.init.calculate_gain('relu'))
-
- def forward(self, h):
- # encoder
- z_mu = self.encode_11(h)
- z_lsgms = self.encode_12(h)
- # reparameterize
- z_sgm = z_lsgms.mul(0.5).exp_()
- eps = Variable(torch.randn(z_sgm.size())).cuda()
- # z = eps*z_sgm + z_mu
- z = z_mu
- # decoder
- y = self.decode_1(z)
- # y = self.bn1(y)
- y = self.relu(y)
- # y = self.decode_1_2(y)
- # y = self.bn2(y)
- y = self.relu(y)
- y = self.decode_2(y)
- # reconstruction = True
- # if reconstruction:
- # y = self.linear(h)
- return y, z_mu, z_lsgms
-
-
- class GRAN(nn.Module):
- def __init__(self, max_num_nodes):
- super(GRAN, self).__init__()
- self.max_num_nodes = max_num_nodes
- self.hidden_dim = 16
- self.num_mix_component = 1
- self.output_dim = 1
- self.output_theta = nn.Sequential(
- nn.Linear(2 * self.hidden_dim, self.hidden_dim),
- nn.ReLU(inplace=True),
- nn.Linear(self.hidden_dim, self.hidden_dim),
- nn.ReLU(inplace=True),
- nn.Linear(self.hidden_dim, self.output_dim * self.num_mix_component))
-
- def forward(self, h, batch_num_nodes, random_index_list, GRAN_arbitrary_node):
- batch_size = h.size()[0]
- diff = Variable(torch.zeros(batch_size, self.max_num_nodes - 1, h.size()[2]).cuda())
- concat = Variable(torch.zeros(batch_size, self.max_num_nodes - 1, 2 * h.size()[2]).cuda())
- for i in range(batch_size):
- diff[i, :batch_num_nodes[i] - 1, :] = h[i, :batch_num_nodes[i] - 1, :] - h[i, batch_num_nodes[i] - 1, :]
- diff[i, batch_num_nodes[i] - 1:, :] = -100
- h_duplicate = h[i, batch_num_nodes[i] - 1, :].unsqueeze(0).repeat(1, batch_num_nodes[i] - 1, 1).squeeze()
- concat[i, :batch_num_nodes[i] - 1, :] = torch.cat(
- (h[i, :batch_num_nodes[i] - 1, :], h_duplicate), 1)
- if GRAN_arbitrary_node:
- h_duplicate = h[i, random_index_list[i], :].unsqueeze(0).repeat(1, batch_num_nodes[i] - 1,
- 1).squeeze()
- concat[i, :random_index_list[i], :] = torch.cat(
- (h[i, :random_index_list[i], :], h_duplicate[:random_index_list[i], :]), 1)
- concat[i, random_index_list[i]:batch_num_nodes[i] - 1, :] = torch.cat(
- (h[i, random_index_list[i] + 1:batch_num_nodes[i], :],
- h_duplicate[random_index_list[i]:batch_num_nodes[i] - 1, :]), 1)
- # return self.output_theta(diff)
- # print("*** h")
- # print(h)
- # print("*** concat")
- # print(concat)
- return self.output_theta(concat)
-
-
- # a deterministic linear output (update: add noise)
- class MLP_VAE_conditional_plain(nn.Module):
- def __init__(self, h_size, embedding_size, y_size):
- super(MLP_VAE_conditional_plain, self).__init__()
- self.encode_11 = nn.Linear(h_size, embedding_size) # mu
- self.encode_12 = nn.Linear(h_size, embedding_size) # lsgms
-
- self.decode_1 = nn.Linear(embedding_size + h_size, embedding_size)
- self.decode_2 = nn.Linear(embedding_size, y_size) # make edge prediction (reconstruct)
- self.relu = nn.ReLU()
-
- for m in self.modules():
- if isinstance(m, nn.Linear):
- m.weight.data = init.xavier_uniform(m.weight.data, gain=nn.init.calculate_gain('relu'))
-
- def forward(self, h):
- # encoder
- z_mu = self.encode_11(h)
- z_lsgms = self.encode_12(h)
- # reparameterize
- z_sgm = z_lsgms.mul(0.5).exp_()
- eps = Variable(torch.randn(z_sgm.size(0), z_sgm.size(1), z_sgm.size(2))).cuda()
- z = eps * z_sgm + z_mu
- # decoder
- y = self.decode_1(torch.cat((h, z), dim=2))
- y = self.relu(y)
- y = self.decode_2(y)
- return y, z_mu, z_lsgms
-
-
- ########### baseline model 1: Learning deep generative model of graphs
-
- class DGM_graphs(nn.Module):
- def __init__(self, h_size):
- # h_size: node embedding size
- # h_size*2: graph embedding size
-
- super(DGM_graphs, self).__init__()
- ### all modules used by the model
- ## 1 message passing, 2 times
- self.m_uv_1 = nn.Linear(h_size * 2, h_size * 2)
- self.f_n_1 = nn.GRUCell(h_size * 2, h_size) # input_size, hidden_size
-
- self.m_uv_2 = nn.Linear(h_size * 2, h_size * 2)
- self.f_n_2 = nn.GRUCell(h_size * 2, h_size) # input_size, hidden_size
-
- ## 2 graph embedding and new node embedding
- # for graph embedding
- self.f_m = nn.Linear(h_size, h_size * 2)
- self.f_gate = nn.Sequential(
- nn.Linear(h_size, 1),
- nn.Sigmoid()
- )
- # for new node embedding
- self.f_m_init = nn.Linear(h_size, h_size * 2)
- self.f_gate_init = nn.Sequential(
- nn.Linear(h_size, 1),
- nn.Sigmoid()
- )
- self.f_init = nn.Linear(h_size * 2, h_size)
-
- ## 3 f_addnode
- self.f_an = nn.Sequential(
- nn.Linear(h_size * 2, 1),
- nn.Sigmoid()
- )
-
- ## 4 f_addedge
- self.f_ae = nn.Sequential(
- nn.Linear(h_size * 2, 1),
- nn.Sigmoid()
- )
-
- ## 5 f_nodes
- self.f_s = nn.Linear(h_size * 2, 1)
-
-
- def message_passing(node_neighbor, node_embedding, model):
- node_embedding_new = []
- for i in range(len(node_neighbor)):
- neighbor_num = len(node_neighbor[i])
- if neighbor_num > 0:
- node_self = node_embedding[i].expand(neighbor_num, node_embedding[i].size(1))
- node_self_neighbor = torch.cat([node_embedding[j] for j in node_neighbor[i]], dim=0)
- message = torch.sum(model.m_uv_1(torch.cat((node_self, node_self_neighbor), dim=1)), dim=0, keepdim=True)
- node_embedding_new.append(model.f_n_1(message, node_embedding[i]))
- else:
- message_null = Variable(torch.zeros((node_embedding[i].size(0), node_embedding[i].size(1) * 2))).cuda()
- node_embedding_new.append(model.f_n_1(message_null, node_embedding[i]))
- node_embedding = node_embedding_new
- node_embedding_new = []
- for i in range(len(node_neighbor)):
- neighbor_num = len(node_neighbor[i])
- if neighbor_num > 0:
- node_self = node_embedding[i].expand(neighbor_num, node_embedding[i].size(1))
- node_self_neighbor = torch.cat([node_embedding[j] for j in node_neighbor[i]], dim=0)
- message = torch.sum(model.m_uv_1(torch.cat((node_self, node_self_neighbor), dim=1)), dim=0, keepdim=True)
- node_embedding_new.append(model.f_n_1(message, node_embedding[i]))
- else:
- message_null = Variable(torch.zeros((node_embedding[i].size(0), node_embedding[i].size(1) * 2))).cuda()
- node_embedding_new.append(model.f_n_1(message_null, node_embedding[i]))
- return node_embedding_new
-
-
- def calc_graph_embedding(node_embedding_cat, model):
- node_embedding_graph = model.f_m(node_embedding_cat)
- node_embedding_graph_gate = model.f_gate(node_embedding_cat)
- graph_embedding = torch.sum(torch.mul(node_embedding_graph, node_embedding_graph_gate), dim=0, keepdim=True)
- return graph_embedding
-
-
- def calc_init_embedding(node_embedding_cat, model):
- node_embedding_init = model.f_m_init(node_embedding_cat)
- node_embedding_init_gate = model.f_gate_init(node_embedding_cat)
- init_embedding = torch.sum(torch.mul(node_embedding_init, node_embedding_init_gate), dim=0, keepdim=True)
- init_embedding = model.f_init(init_embedding)
- return init_embedding
-
-
- ################################################## code that are NOT used for final version #############
-
-
- # RNN that updates according to graph structure, new proposed model
- class Graph_RNN_structure(nn.Module):
- def __init__(self, hidden_size, batch_size, output_size, num_layers, is_dilation=True, is_bn=True):
- super(Graph_RNN_structure, self).__init__()
- ## model configuration
- self.hidden_size = hidden_size
- self.batch_size = batch_size
- self.output_size = output_size
- self.num_layers = num_layers # num_layers of cnn_output
- self.is_bn = is_bn
-
- ## model
- self.relu = nn.ReLU()
- # self.linear_output = nn.Linear(hidden_size, 1)
- # self.linear_output_simple = nn.Linear(hidden_size, output_size)
- # for state transition use only, input is null
- # self.gru = nn.GRU(input_size=1, hidden_size=hidden_size, num_layers=num_layers, batch_first=True)
-
- # use CNN to produce output prediction
- # self.cnn_output = nn.Sequential(
- # nn.Conv1d(hidden_size, hidden_size, kernel_size=3, dilation=1, padding=1),
- # # nn.BatchNorm1d(hidden_size),
- # nn.ReLU(),
- # nn.Conv1d(hidden_size, 1, kernel_size=3, dilation=1, padding=1)
- # )
-
- if is_dilation:
- self.conv_block = nn.ModuleList(
- [nn.Conv1d(hidden_size, hidden_size, kernel_size=3, dilation=2 ** i, padding=2 ** i) for i in
- range(num_layers - 1)])
- else:
- self.conv_block = nn.ModuleList(
- [nn.Conv1d(hidden_size, hidden_size, kernel_size=3, dilation=1, padding=1) for i in
- range(num_layers - 1)])
- self.bn_block = nn.ModuleList([nn.BatchNorm1d(hidden_size) for i in range(num_layers - 1)])
- self.conv_out = nn.Conv1d(hidden_size, 1, kernel_size=3, dilation=1, padding=1)
-
- # # use CNN to do state transition
- # self.cnn_transition = nn.Sequential(
- # nn.Conv1d(hidden_size, hidden_size, kernel_size=3, dilation=1, padding=1),
- # # nn.BatchNorm1d(hidden_size),
- # nn.ReLU(),
- # nn.Conv1d(hidden_size, hidden_size, kernel_size=3, dilation=1, padding=1)
- # )
-
- # use linear to do transition, same as GCN mean aggregator
- self.linear_transition = nn.Sequential(
- nn.Linear(hidden_size, hidden_size),
- nn.ReLU()
- )
-
- # GRU based output, output a single edge prediction at a time
- # self.gru_output = nn.GRU(input_size=1, hidden_size=hidden_size, num_layers=num_layers, batch_first=True)
- # use a list to keep all generated hidden vectors, each hidden has size batch*hidden_dim*1, and the list size is expanding
- # when using convolution to compute attention weight, we need to first concat the list into a pytorch variable: batch*hidden_dim*current_num_nodes
- self.hidden_all = []
-
- ## initialize
- for m in self.modules():
- if isinstance(m, nn.Linear):
- # print('linear')
- m.weight.data = init.xavier_uniform(m.weight.data, gain=nn.init.calculate_gain('relu'))
- # print(m.weight.data.size())
- if isinstance(m, nn.Conv1d):
- # print('conv1d')
- m.weight.data = init.xavier_uniform(m.weight.data, gain=nn.init.calculate_gain('relu'))
- # print(m.weight.data.size())
- if isinstance(m, nn.BatchNorm1d):
- # print('batchnorm1d')
- m.weight.data.fill_(1)
- m.bias.data.zero_()
- # print(m.weight.data.size())
- if isinstance(m, nn.GRU):
- # print('gru')
- m.weight_ih_l0.data = init.xavier_uniform(m.weight_ih_l0.data,
- gain=nn.init.calculate_gain('sigmoid'))
- m.weight_hh_l0.data = init.xavier_uniform(m.weight_hh_l0.data,
- gain=nn.init.calculate_gain('sigmoid'))
- m.bias_ih_l0.data = torch.ones(m.bias_ih_l0.data.size(0)) * 0.25
- m.bias_hh_l0.data = torch.ones(m.bias_hh_l0.data.size(0)) * 0.25
-
- def init_hidden(self, len=None):
- if len is None:
- return Variable(torch.ones(self.batch_size, self.hidden_size, 1)).cuda()
- else:
- hidden_list = []
- for i in range(len):
- hidden_list.append(Variable(torch.ones(self.batch_size, self.hidden_size, 1)).cuda())
- return hidden_list
-
- # only run a single forward step
- def forward(self, x, teacher_forcing, temperature=0.5, bptt=True, bptt_len=20, flexible=True, max_prev_node=100):
- # x: batch*1*self.output_size, the groud truth
- # todo: current only look back to self.output_size nodes, try to look back according to bfs sequence
-
- # 1 first compute new state
- # print('hidden_all', self.hidden_all[-1*self.output_size:])
- # hidden_all_cat = torch.cat(self.hidden_all[-1*self.output_size:], dim=2)
-
- # # # add BPTT, detach the first variable
- # if bptt:
- # self.hidden_all[0] = Variable(self.hidden_all[0].data).cuda()
-
- hidden_all_cat = torch.cat(self.hidden_all, dim=2)
- # print(hidden_all_cat.size())
-
- # print('hidden_all_cat',hidden_all_cat.size())
- # att_weight size: batch*1*current_num_nodes
- for i in range(self.num_layers - 1):
- hidden_all_cat = self.conv_block[i](hidden_all_cat)
- if self.is_bn:
- hidden_all_cat = self.bn_block[i](hidden_all_cat)
- hidden_all_cat = self.relu(hidden_all_cat)
- x_pred = self.conv_out(hidden_all_cat)
- # 2 then compute output, using a gru
- # first try the simple version, directly give the edge prediction
- # x_pred = self.linear_output_simple(hidden_new)
- # x_pred = x_pred.view(x_pred.size(0),1,x_pred.size(1))
-
- # todo: use a gru version output
- # if sample==False:
- # # when training: we know the ground truth, input the sequence at once
- # y_pred,_ = self.gru_output(x, hidden_new.permute(2,0,1))
- # y_pred = self.linear_output(y_pred)
- # else:
- # # when validating, we need to sampling at each time step
- # y_pred = Variable(torch.zeros(x.size(0), x.size(1), x.size(2))).cuda()
- # y_pred_long = Variable(torch.zeros(x.size(0), x.size(1), x.size(2))).cuda()
- # x_step = x[:, 0:1, :]
- # for i in range(x.size(1)):
- # y_step,_ = self.gru_output(x_step)
- # y_step = self.linear_output(y_step)
- # y_pred[:, i, :] = y_step
- # y_step = F.sigmoid(y_step)
- # x_step = sample(y_step, sample=True, thresh=0.45)
- # y_pred_long[:, i, :] = x_step
- # pass
-
- # 3 then update self.hidden_all list
- # i.e., model will use ground truth to update new node
- # x_pred_sample = gumbel_sigmoid(x_pred, temperature=temperature)
- x_pred_sample = sample_tensor(F.sigmoid(x_pred), sample=True)
- thresh = 0.5
- x_thresh = Variable(
- torch.ones(x_pred_sample.size(0), x_pred_sample.size(1), x_pred_sample.size(2)) * thresh).cuda()
- x_pred_sample_long = torch.gt(x_pred_sample, x_thresh).long()
- if teacher_forcing:
- # first mask previous hidden states
- hidden_all_cat_select = hidden_all_cat * x
- x_sum = torch.sum(x, dim=2, keepdim=True).float()
-
- # i.e., the model will use it's own prediction to attend
- else:
- # first mask previous hidden states
- hidden_all_cat_select = hidden_all_cat * x_pred_sample
- x_sum = torch.sum(x_pred_sample_long, dim=2, keepdim=True).float()
-
- # update hidden vector for new nodes
- hidden_new = torch.sum(hidden_all_cat_select, dim=2, keepdim=True) / x_sum
-
- hidden_new = self.linear_transition(hidden_new.permute(0, 2, 1))
- hidden_new = hidden_new.permute(0, 2, 1)
-
- if flexible:
- # use ground truth to maintaing history state
- if teacher_forcing:
- x_id = torch.min(torch.nonzero(torch.squeeze(x.data)))
- self.hidden_all = self.hidden_all[x_id:]
- # use prediction to maintaing history state
- else:
- x_id = torch.min(torch.nonzero(torch.squeeze(x_pred_sample_long.data)))
- start = max(len(self.hidden_all) - max_prev_node + 1, x_id)
- self.hidden_all = self.hidden_all[start:]
-
- # maintaing a fixed size history state
- else:
- # self.hidden_all.pop(0)
- self.hidden_all = self.hidden_all[1:]
-
- self.hidden_all.append(hidden_new)
-
- # 4 return prediction
- # print('x_pred',x_pred)
- # print('x_pred_mean', torch.mean(x_pred))
- # print('x_pred_sample_mean', torch.mean(x_pred_sample))
- return x_pred, x_pred_sample
-
-
- # batch_size = 8
- # output_size = 4
- # generator = Graph_RNN_structure(hidden_size=16, batch_size=batch_size, output_size=output_size, num_layers=1).cuda()
- # for i in range(4):
- # generator.hidden_all.append(generator.init_hidden())
- #
- # x = Variable(torch.rand(batch_size,1,output_size)).cuda()
- # x_pred = generator(x,teacher_forcing=True, sample=True)
- # print(x_pred)
-
-
- # current baseline model, generating a graph by lstm
- class Graph_generator_LSTM(nn.Module):
- def __init__(self, feature_size, input_size, hidden_size, output_size, batch_size, num_layers):
- super(Graph_generator_LSTM, self).__init__()
- self.batch_size = batch_size
- self.num_layers = num_layers
- self.hidden_size = hidden_size
- self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, batch_first=True)
- self.linear_input = nn.Linear(feature_size, input_size)
- self.linear_output = nn.Linear(hidden_size, output_size)
- self.relu = nn.ReLU()
- # initialize
- # self.hidden,self.cell = self.init_hidden()
- self.hidden = self.init_hidden()
-
- self.lstm.weight_ih_l0.data = init.xavier_uniform(self.lstm.weight_ih_l0.data,
- gain=nn.init.calculate_gain('sigmoid'))
- self.lstm.weight_hh_l0.data = init.xavier_uniform(self.lstm.weight_hh_l0.data,
- gain=nn.init.calculate_gain('sigmoid'))
- self.lstm.bias_ih_l0.data = torch.ones(self.lstm.bias_ih_l0.data.size(0)) * 0.25
- self.lstm.bias_hh_l0.data = torch.ones(self.lstm.bias_hh_l0.data.size(0)) * 0.25
- for m in self.modules():
- if isinstance(m, nn.Linear):
- m.weight.data = init.xavier_uniform(m.weight.data, gain=nn.init.calculate_gain('relu'))
-
- def init_hidden(self):
- return (Variable(torch.zeros(self.num_layers, self.batch_size, self.hidden_size)).cuda(),
- Variable(torch.zeros(self.num_layers, self.batch_size, self.hidden_size)).cuda())
-
- def forward(self, input_raw, pack=False, len=None):
- input = self.linear_input(input_raw)
- input = self.relu(input)
- if pack:
- input = pack_padded_sequence(input, len, batch_first=True)
- output_raw, self.hidden = self.lstm(input, self.hidden)
- if pack:
- output_raw = pad_packed_sequence(output_raw, batch_first=True)[0]
- output = self.linear_output(output_raw)
- return output
-
-
- # a simple MLP generator output
- class Graph_generator_LSTM_output_generator(nn.Module):
- def __init__(self, h_size, n_size, y_size):
- super(Graph_generator_LSTM_output_generator, self).__init__()
- # one layer MLP
- self.generator_output = nn.Sequential(
- nn.Linear(h_size + n_size, 64),
- nn.ReLU(),
- nn.Linear(64, y_size),
- nn.Sigmoid()
- )
-
- def forward(self, h, n, temperature):
- y_cat = torch.cat((h, n), dim=2)
- y = self.generator_output(y_cat)
- # y = gumbel_sigmoid(y,temperature=temperature)
- return y
-
-
- # a simple MLP discriminator
- class Graph_generator_LSTM_output_discriminator(nn.Module):
- def __init__(self, h_size, y_size):
- super(Graph_generator_LSTM_output_discriminator, self).__init__()
- # one layer MLP
- self.discriminator_output = nn.Sequential(
- nn.Linear(h_size + y_size, 64),
- nn.ReLU(),
- nn.Linear(64, 1),
- nn.Sigmoid()
- )
-
- def forward(self, h, y):
- y_cat = torch.cat((h, y), dim=2)
- l = self.discriminator_output(y_cat)
- return l
-
-
- # GCN basic operation
- class GraphConv(nn.Module):
- def __init__(self, input_dim, output_dim):
- super(GraphConv, self).__init__()
- self.input_dim = input_dim
- self.output_dim = output_dim
- self.weight = nn.Parameter(torch.FloatTensor(input_dim, output_dim).cuda())
- # self.relu = nn.ReLU()
-
- def forward(self, x, adj):
- y = torch.matmul(adj, x)
- y = torch.matmul(y, self.weight)
- return y
-
-
- # vanilla GCN encoder
- class GCN_encoder(nn.Module):
- def __init__(self, input_dim, hidden_dim, output_dim):
- super(GCN_encoder, self).__init__()
- self.conv1 = GraphConv(input_dim=input_dim, output_dim=hidden_dim)
- self.conv2 = GraphConv(input_dim=hidden_dim, output_dim=output_dim)
- # self.bn1 = nn.BatchNorm1d(output_dim)
- # self.bn2 = nn.BatchNorm1d(output_dim)
- self.relu = nn.ReLU()
- for m in self.modules():
- if isinstance(m, GraphConv):
- m.weight.data = init.xavier_uniform(m.weight.data, gain=nn.init.calculate_gain('relu'))
- # init_range = np.sqrt(6.0 / (m.input_dim + m.output_dim))
- # m.weight.data = torch.rand([m.input_dim, m.output_dim]).cuda()*init_range
- # print('find!')
- elif isinstance(m, nn.BatchNorm1d):
- m.weight.data.fill_(1)
- m.bias.data.zero_()
-
- def forward(self, x, adj):
- x = self.conv1(x, adj)
- # x = x/torch.sum(x, dim=2, keepdim=True)
- x = self.relu(x)
- # x = self.bn1(x)
- x = self.conv2(x, adj)
- # x = x / torch.sum(x, dim=2, keepdim=True)
- return x
-
-
- # vanilla GCN decoder
- class GCN_decoder(nn.Module):
- def __init__(self):
- super(GCN_decoder, self).__init__()
- # self.act = nn.Sigmoid()
-
- def forward(self, x):
- # x_t = x.view(-1,x.size(2),x.size(1))
- x_t = x.permute(0, 2, 1)
- # print('x',x)
- # print('x_t',x_t)
- y = torch.matmul(x, x_t)
- return y
-
-
- # GCN based graph embedding
- # allowing for arbitrary num of nodes
- class GCN_encoder_graph(nn.Module):
- def __init__(self, input_dim, hidden_dim, output_dim, num_layers):
- super(GCN_encoder_graph, self).__init__()
- self.num_layers = num_layers
- self.conv_first = GraphConv(input_dim=input_dim, output_dim=hidden_dim)
- # self.conv_hidden1 = GraphConv(input_dim=hidden_dim, output_dim=hidden_dim)
- # self.conv_hidden2 = GraphConv(input_dim=hidden_dim, output_dim=hidden_dim)
- self.conv_block = nn.ModuleList(
- [GraphConv(input_dim=hidden_dim, output_dim=hidden_dim) for i in range(num_layers)])
- self.conv_last = GraphConv(input_dim=hidden_dim, output_dim=output_dim)
- self.act = nn.ReLU()
- for m in self.modules():
- if isinstance(m, GraphConv):
- m.weight.data = init.xavier_uniform(m.weight.data, gain=nn.init.calculate_gain('relu'))
- # init_range = np.sqrt(6.0 / (m.input_dim + m.output_dim))
- # m.weight.data = torch.rand([m.input_dim, m.output_dim]).cuda()*init_range
- # print('find!')
-
- def forward(self, x, adj):
- x = self.conv_first(x, adj)
- x = self.act(x)
- out_all = []
- out, _ = torch.max(x, dim=1, keepdim=True)
- out_all.append(out)
- for i in range(self.num_layers - 2):
- x = self.conv_block[i](x, adj)
- x = self.act(x)
- out, _ = torch.max(x, dim=1, keepdim=True)
- out_all.append(out)
- x = self.conv_last(x, adj)
- x = self.act(x)
- out, _ = torch.max(x, dim=1, keepdim=True)
- out_all.append(out)
- output = torch.cat(out_all, dim=1)
- output = output.permute(1, 0, 2)
- # print(out)
- return output
-
-
- # x = Variable(torch.rand(1,8,10)).cuda()
- # adj = Variable(torch.rand(1,8,8)).cuda()
- # model = GCN_encoder_graph(10,10,10).cuda()
- # y = model(x,adj)
- # print(y.size())
-
-
- def preprocess(A):
- # Get size of the adjacency matrix
- size = A.size(1)
- # Get the degrees for each node
- degrees = torch.sum(A, dim=2)
-
- # Create diagonal matrix D from the degrees of the nodes
- D = Variable(torch.zeros(A.size(0), A.size(1), A.size(2))).cuda()
- for i in range(D.size(0)):
- D[i, :, :] = torch.diag(torch.pow(degrees[i, :], -0.5))
- # Cholesky decomposition of D
- # D = np.linalg.cholesky(D)
- # Inverse of the Cholesky decomposition of D
- # D = np.linalg.inv(D)
- # Create an identity matrix of size x size
- # Create A hat
- # Return A_hat
- A_normal = torch.matmul(torch.matmul(D, A), D)
- # print(A_normal)
- return A_normal
-
-
- # a sequential GCN model, GCN with n layers
- class GCN_generator(nn.Module):
- def __init__(self, hidden_dim):
- super(GCN_generator, self).__init__()
- # todo: add an linear_input module to map the input feature into 'hidden_dim'
- self.conv = GraphConv(input_dim=hidden_dim, output_dim=hidden_dim)
- self.act = nn.ReLU()
- # initialize
- for m in self.modules():
- if isinstance(m, GraphConv):
- m.weight.data = init.xavier_uniform(m.weight.data, gain=nn.init.calculate_gain('relu'))
-
- def forward(self, x, teacher_force=False, adj_real=None):
- # x: batch * node_num * feature
- batch_num = x.size(0)
- node_num = x.size(1)
- adj = Variable(torch.eye(node_num).view(1, node_num, node_num).repeat(batch_num, 1, 1)).cuda()
- adj_output = Variable(torch.eye(node_num).view(1, node_num, node_num).repeat(batch_num, 1, 1)).cuda()
-
- # do GCN n times
- # todo: try if residual connections are plausible
- # todo: add higher order of adj (adj^2, adj^3, ...)
- # todo: try if norm everytim is plausible
-
- # first do GCN 1 time to preprocess the raw features
-
- # x_new = self.conv(x, adj)
- # x_new = self.act(x_new)
- # x = x + x_new
-
- x = self.conv(x, adj)
- x = self.act(x)
-
- # x = x / torch.norm(x, p=2, dim=2, keepdim=True)
- # then do GCN rest n-1 times
- for i in range(1, node_num):
- # 1 calc prob of a new edge, output the result in adj_output
- x_last = x[:, i:i + 1, :].clone()
- x_prev = x[:, 0:i, :].clone()
- x_prev = x_prev
- x_last = x_last
- prob = x_prev @ x_last.permute(0, 2, 1)
- adj_output[:, i, 0:i] = prob.permute(0, 2, 1).clone()
- adj_output[:, 0:i, i] = prob.clone()
- # 2 update adj
- if teacher_force:
- adj = Variable(torch.eye(node_num).view(1, node_num, node_num).repeat(batch_num, 1, 1)).cuda()
- adj[:, 0:i + 1, 0:i + 1] = adj_real[:, 0:i + 1, 0:i + 1].clone()
- else:
- adj[:, i, 0:i] = prob.permute(0, 2, 1).clone()
- adj[:, 0:i, i] = prob.clone()
- adj = preprocess(adj)
- # print(adj)
- # print(adj.min().data[0],adj.max().data[0])
- # print(x.min().data[0],x.max().data[0])
- # 3 do graph conv, with residual connection
- # x_new = self.conv(x, adj)
- # x_new = self.act(x_new)
- # x = x + x_new
-
- x = self.conv(x, adj)
- x = self.act(x)
-
- # x = x / torch.norm(x, p=2, dim=2, keepdim=True)
- # one = Variable(torch.ones(adj_output.size(0), adj_output.size(1), adj_output.size(2)) * 1.00).cuda().float()
- # two = Variable(torch.ones(adj_output.size(0), adj_output.size(1), adj_output.size(2)) * 2.01).cuda().float()
- # adj_output = (adj_output + one) / two
- # print(adj_output.max().data[0], adj_output.min().data[0])
- return adj_output
-
-
- # #### test code ####
- # print('teacher forcing')
- # # print('no teacher forcing')
- #
- # start = time.time()
- # generator = GCN_generator(hidden_dim=4)
- # end = time.time()
- # print('model build time', end-start)
- # for run in range(10):
- # for i in [500]:
- # for batch in [1,10,100]:
- # start = time.time()
- # torch.manual_seed(123)
- # x = Variable(torch.rand(batch,i,4)).cuda()
- # adj = Variable(torch.eye(i).view(1,i,i).repeat(batch,1,1)).cuda()
- # # print('x', x)
- # # print('adj', adj)
- #
- # # y = generator(x)
- # y = generator(x,True,adj)
- # # print('y',y)
- # end = time.time()
- # print('node num', i, ' batch size',batch, ' run time', end-start)
-
-
- class CNN_decoder(nn.Module):
- def __init__(self, input_size, output_size, stride=2):
-
- super(CNN_decoder, self).__init__()
-
- self.input_size = input_size
- self.output_size = output_size
-
- self.relu = nn.ReLU()
- self.deconv1_1 = nn.ConvTranspose1d(in_channels=int(self.input_size), out_channels=int(self.input_size / 2),
- kernel_size=3, stride=stride)
- self.bn1_1 = nn.BatchNorm1d(int(self.input_size / 2))
- self.deconv1_2 = nn.ConvTranspose1d(in_channels=int(self.input_size / 2), out_channels=int(self.input_size / 2),
- kernel_size=3, stride=stride)
- self.bn1_2 = nn.BatchNorm1d(int(self.input_size / 2))
- self.deconv1_3 = nn.ConvTranspose1d(in_channels=int(self.input_size / 2), out_channels=int(self.output_size),
- kernel_size=3, stride=1, padding=1)
-
- self.deconv2_1 = nn.ConvTranspose1d(in_channels=int(self.input_size / 2), out_channels=int(self.input_size / 4),
- kernel_size=3, stride=stride)
- self.bn2_1 = nn.BatchNorm1d(int(self.input_size / 4))
- self.deconv2_2 = nn.ConvTranspose1d(in_channels=int(self.input_size / 4), out_channels=int(self.input_size / 4),
- kernel_size=3, stride=stride)
- self.bn2_2 = nn.BatchNorm1d(int(self.input_size / 4))
- self.deconv2_3 = nn.ConvTranspose1d(in_channels=int(self.input_size / 4), out_channels=int(self.output_size),
- kernel_size=3, stride=1, padding=1)
-
- self.deconv3_1 = nn.ConvTranspose1d(in_channels=int(self.input_size / 4), out_channels=int(self.input_size / 8),
- kernel_size=3, stride=stride)
- self.bn3_1 = nn.BatchNorm1d(int(self.input_size / 8))
- self.deconv3_2 = nn.ConvTranspose1d(in_channels=int(self.input_size / 8), out_channels=int(self.input_size / 8),
- kernel_size=3, stride=stride)
- self.bn3_2 = nn.BatchNorm1d(int(self.input_size / 8))
- self.deconv3_3 = nn.ConvTranspose1d(in_channels=int(self.input_size / 8), out_channels=int(self.output_size),
- kernel_size=3, stride=1, padding=1)
-
- for m in self.modules():
- if isinstance(m, nn.ConvTranspose1d):
- # n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
- # m.weight.dataset.normal_(0, math.sqrt(2. / n))
- m.weight.data = init.xavier_uniform(m.weight.data, gain=nn.init.calculate_gain('relu'))
- elif isinstance(m, nn.BatchNorm1d):
- m.weight.data.fill_(1)
- m.bias.data.zero_()
-
- def forward(self, x):
- '''
-
- :param
- x: batch * channel * length
- :return:
- '''
- # hop1
- x = self.deconv1_1(x)
- x = self.bn1_1(x)
- x = self.relu(x)
- # print(x.size())
- x = self.deconv1_2(x)
- x = self.bn1_2(x)
- x = self.relu(x)
- # print(x.size())
- x_hop1 = self.deconv1_3(x)
- # print(x_hop1.size())
-
- # hop2
- x = self.deconv2_1(x)
- x = self.bn2_1(x)
- x = self.relu(x)
- # print(x.size())
- x = self.deconv2_2(x)
- x = self.bn2_2(x)
- x = self.relu(x)
- x_hop2 = self.deconv2_3(x)
- # print(x_hop2.size())
-
- # hop3
- x = self.deconv3_1(x)
- x = self.bn3_1(x)
- x = self.relu(x)
- # print(x.size())
- x = self.deconv3_2(x)
- x = self.bn3_2(x)
- x = self.relu(x)
- # print(x.size())
- x_hop3 = self.deconv3_3(x)
- # print(x_hop3.size())
-
- return x_hop1, x_hop2, x_hop3
-
- # # reference code for doing residual connections
- # def _make_layer(self, block, planes, blocks, stride=1):
- # downsample = None
- # if stride != 1 or self.inplanes != planes * block.expansion:
- # downsample = nn.Sequential(
- # nn.Conv2d(self.inplanes, planes * block.expansion,
- # kernel_size=1, stride=stride, bias=False),
- # nn.BatchNorm2d(planes * block.expansion),
- # )
- #
- # layers = []
- # layers.append(block(self.inplanes, planes, stride, downsample))
- # self.inplanes = planes * block.expansion
- # for i in range(1, blocks):
- # layers.append(block(self.inplanes, planes))
- #
- # return nn.Sequential(*layers)
-
-
- class CNN_decoder_share(nn.Module):
- def __init__(self, input_size, output_size, stride, hops):
- super(CNN_decoder_share, self).__init__()
-
- self.input_size = input_size
- self.output_size = output_size
- self.hops = hops
-
- self.relu = nn.ReLU()
- self.deconv = nn.ConvTranspose1d(in_channels=int(self.input_size), out_channels=int(self.input_size),
- kernel_size=3, stride=stride)
- self.bn = nn.BatchNorm1d(int(self.input_size))
- self.deconv_out = nn.ConvTranspose1d(in_channels=int(self.input_size), out_channels=int(self.output_size),
- kernel_size=3, stride=1, padding=1)
-
- for m in self.modules():
- if isinstance(m, nn.ConvTranspose1d):
- # n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
- # m.weight.dataset.normal_(0, math.sqrt(2. / n))
- m.weight.data = init.xavier_uniform(m.weight.data, gain=nn.init.calculate_gain('relu'))
- elif isinstance(m, nn.BatchNorm1d):
- m.weight.data.fill_(1)
- m.bias.data.zero_()
-
- def forward(self, x):
- '''
-
- :param
- x: batch * channel * length
- :return:
- '''
-
- # hop1
- x = self.deconv(x)
- x = self.bn(x)
- x = self.relu(x)
- # print(x.size())
- x = self.deconv(x)
- x = self.bn(x)
- x = self.relu(x)
- # print(x.size())
- x_hop1 = self.deconv_out(x)
- # print(x_hop1.size())
-
- # hop2
- x = self.deconv(x)
- x = self.bn(x)
- x = self.relu(x)
- # print(x.size())
- x = self.deconv(x)
- x = self.bn(x)
- x = self.relu(x)
- x_hop2 = self.deconv_out(x)
- # print(x_hop2.size())
-
- # hop3
- x = self.deconv(x)
- x = self.bn(x)
- x = self.relu(x)
- # print(x.size())
- x = self.deconv(x)
- x = self.bn(x)
- x = self.relu(x)
- # print(x.size())
- x_hop3 = self.deconv_out(x)
- # print(x_hop3.size())
-
- return x_hop1, x_hop2, x_hop3
-
-
- class CNN_decoder_attention(nn.Module):
- def __init__(self, input_size, output_size, stride=2):
-
- super(CNN_decoder_attention, self).__init__()
-
- self.input_size = input_size
- self.output_size = output_size
-
- self.relu = nn.ReLU()
- self.deconv = nn.ConvTranspose1d(in_channels=int(self.input_size), out_channels=int(self.input_size),
- kernel_size=3, stride=stride)
- self.bn = nn.BatchNorm1d(int(self.input_size))
- self.deconv_out = nn.ConvTranspose1d(in_channels=int(self.input_size), out_channels=int(self.output_size),
- kernel_size=3, stride=1, padding=1)
- self.deconv_attention = nn.ConvTranspose1d(in_channels=int(self.input_size), out_channels=int(self.input_size),
- kernel_size=1, stride=1, padding=0)
- self.bn_attention = nn.BatchNorm1d(int(self.input_size))
- self.relu_leaky = nn.LeakyReLU(0.2)
-
- for m in self.modules():
- if isinstance(m, nn.ConvTranspose1d):
- # n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
- # m.weight.dataset.normal_(0, math.sqrt(2. / n))
- m.weight.data = init.xavier_uniform(m.weight.data, gain=nn.init.calculate_gain('relu'))
- elif isinstance(m, nn.BatchNorm1d):
- m.weight.data.fill_(1)
- m.bias.data.zero_()
-
- def forward(self, x):
- '''
-
- :param
- x: batch * channel * length
- :return:
- '''
- # hop1
- x = self.deconv(x)
- x = self.bn(x)
- x = self.relu(x)
-
- x = self.deconv(x)
- x = self.bn(x)
- x = self.relu(x)
-
- x_hop1 = self.deconv_out(x)
- x_hop1_attention = self.deconv_attention(x)
- # x_hop1_attention = self.bn_attention(x_hop1_attention)
- x_hop1_attention = self.relu(x_hop1_attention)
- x_hop1_attention = torch.matmul(x_hop1_attention,
- x_hop1_attention.view(-1, x_hop1_attention.size(2), x_hop1_attention.size(1)))
- # x_hop1_attention_sum = torch.norm(x_hop1_attention, 2, dim=1, keepdim=True)
- # x_hop1_attention = x_hop1_attention/x_hop1_attention_sum
-
- # print(x_hop1.size())
-
- # hop2
- x = self.deconv(x)
- x = self.bn(x)
- x = self.relu(x)
-
- x = self.deconv(x)
- x = self.bn(x)
- x = self.relu(x)
-
- x_hop2 = self.deconv_out(x)
- x_hop2_attention = self.deconv_attention(x)
- # x_hop2_attention = self.bn_attention(x_hop2_attention)
- x_hop2_attention = self.relu(x_hop2_attention)
- x_hop2_attention = torch.matmul(x_hop2_attention,
- x_hop2_attention.view(-1, x_hop2_attention.size(2), x_hop2_attention.size(1)))
- # x_hop2_attention_sum = torch.norm(x_hop2_attention, 2, dim=1, keepdim=True)
- # x_hop2_attention = x_hop2_attention/x_hop2_attention_sum
-
- # print(x_hop2.size())
-
- # hop3
- x = self.deconv(x)
- x = self.bn(x)
- x = self.relu(x)
-
- x = self.deconv(x)
- x = self.bn(x)
- x = self.relu(x)
-
- x_hop3 = self.deconv_out(x)
- x_hop3_attention = self.deconv_attention(x)
- # x_hop3_attention = self.bn_attention(x_hop3_attention)
- x_hop3_attention = self.relu(x_hop3_attention)
- x_hop3_attention = torch.matmul(x_hop3_attention,
- x_hop3_attention.view(-1, x_hop3_attention.size(2), x_hop3_attention.size(1)))
- # x_hop3_attention_sum = torch.norm(x_hop3_attention, 2, dim=1, keepdim=True)
- # x_hop3_attention = x_hop3_attention / x_hop3_attention_sum
-
- # print(x_hop3.size())
-
- return x_hop1, x_hop2, x_hop3, x_hop1_attention, x_hop2_attention, x_hop3_attention
-
-
- #### test code ####
- # x = Variable(torch.randn(1, 256, 1)).cuda()
- # decoder = CNN_decoder(256, 16).cuda()
- # y = decoder(x)
-
- class Graphsage_Encoder(nn.Module):
- def __init__(self, feature_size, input_size, layer_num):
- super(Graphsage_Encoder, self).__init__()
-
- self.linear_projection = nn.Linear(feature_size, input_size)
-
- self.input_size = input_size
-
- # linear for hop 3
- self.linear_3_0 = nn.Linear(input_size * (2 ** 0), input_size * (2 ** 1))
- self.linear_3_1 = nn.Linear(input_size * (2 ** 1), input_size * (2 ** 2))
- self.linear_3_2 = nn.Linear(input_size * (2 ** 2), input_size * (2 ** 3))
- # linear for hop 2
- self.linear_2_0 = nn.Linear(input_size * (2 ** 0), input_size * (2 ** 1))
- self.linear_2_1 = nn.Linear(input_size * (2 ** 1), input_size * (2 ** 2))
- # linear for hop 1
- self.linear_1_0 = nn.Linear(input_size * (2 ** 0), input_size * (2 ** 1))
- # linear for hop 0
- self.linear_0_0 = nn.Linear(input_size * (2 ** 0), input_size * (2 ** 1))
-
- self.linear = nn.Linear(input_size * (2 + 2 + 4 + 8), input_size * (16))
-
- self.bn_3_0 = nn.BatchNorm1d(self.input_size * (2 ** 1))
- self.bn_3_1 = nn.BatchNorm1d(self.input_size * (2 ** 2))
- self.bn_3_2 = nn.BatchNorm1d(self.input_size * (2 ** 3))
-
- self.bn_2_0 = nn.BatchNorm1d(self.input_size * (2 ** 1))
- self.bn_2_1 = nn.BatchNorm1d(self.input_size * (2 ** 2))
-
- self.bn_1_0 = nn.BatchNorm1d(self.input_size * (2 ** 1))
-
- self.bn_0_0 = nn.BatchNorm1d(self.input_size * (2 ** 1))
-
- self.bn = nn.BatchNorm1d(input_size * (16))
-
- self.relu = nn.ReLU()
- for m in self.modules():
- if isinstance(m, nn.Linear):
- m.weight.data = init.xavier_uniform(m.weight.data, gain=nn.init.calculate_gain('relu'))
- elif isinstance(m, nn.BatchNorm1d):
- m.weight.data.fill_(1)
- m.bias.data.zero_()
-
- def forward(self, nodes_list, nodes_count_list):
- '''
-
- :param nodes: a list, each element n_i is a tensor for node's k-i hop neighbours
- (the first nodes_hop is the furthest neighbor)
- where n_i = N * num_neighbours * features
- nodes_count: a list, each element is a list that show how many neighbours belongs to the father node
- :return:
- '''
-
- # 3-hop feature
- # nodes original features to representations
- nodes_list[0] = Variable(nodes_list[0]).cuda()
- nodes_list[0] = self.linear_projection(nodes_list[0])
- nodes_features = self.linear_3_0(nodes_list[0])
- nodes_features = self.bn_3_0(nodes_features.view(-1, nodes_features.size(2), nodes_features.size(1)))
- nodes_features = nodes_features.view(-1, nodes_features.size(2), nodes_features.size(1))
- nodes_features = self.relu(nodes_features)
- # nodes count from previous hop
- nodes_count = nodes_count_list[0]
- # print(nodes_count,nodes_count.size())
- # aggregated representations placeholder, feature dim * 2
- nodes_features_farther = Variable(
- torch.Tensor(nodes_features.size(0), nodes_count.size(1), nodes_features.size(2))).cuda()
- i = 0
- for j in range(nodes_count.size(1)):
- # mean pooling for each father node
- # print(nodes_count[:,j][0],type(nodes_count[:,j][0]))
- nodes_features_farther[:, j, :] = torch.mean(nodes_features[:, i:i + int(nodes_count[:, j][0]), :], 1,
- keepdim=False)
- i += int(nodes_count[:, j][0])
- # assign node_features
- nodes_features = nodes_features_farther
- nodes_features = self.linear_3_1(nodes_features)
- nodes_features = self.bn_3_1(nodes_features.view(-1, nodes_features.size(2), nodes_features.size(1)))
- nodes_features = nodes_features.view(-1, nodes_features.size(2), nodes_features.size(1))
- nodes_features = self.relu(nodes_features)
- # nodes count from previous hop
- nodes_count = nodes_count_list[1]
- # aggregated representations placeholder, feature dim * 2
- nodes_features_farther = Variable(
- torch.Tensor(nodes_features.size(0), nodes_count.size(1), nodes_features.size(2))).cuda()
- i = 0
- for j in range(nodes_count.size(1)):
- # mean pooling for each father node
- nodes_features_farther[:, j, :] = torch.mean(nodes_features[:, i:i + int(nodes_count[:, j][0]), :], 1,
- keepdim=False)
- i += int(nodes_count[:, j][0])
- # assign node_features
- nodes_features = nodes_features_farther
- # print('nodes_feature',nodes_features.size())
- nodes_features = self.linear_3_2(nodes_features)
- nodes_features = self.bn_3_2(nodes_features.view(-1, nodes_features.size(2), nodes_features.size(1)))
- nodes_features = nodes_features.view(-1, nodes_features.size(2), nodes_features.size(1))
- # nodes_features = self.relu(nodes_features)
- # nodes count from previous hop
- nodes_features_hop_3 = torch.mean(nodes_features, 1, keepdim=True)
- # print(nodes_features_hop_3.size())
-
- # 2-hop feature
- # nodes original features to representations
- nodes_list[1] = Variable(nodes_list[1]).cuda()
- nodes_list[1] = self.linear_projection(nodes_list[1])
- nodes_features = self.linear_2_0(nodes_list[1])
- nodes_features = self.bn_2_0(nodes_features.view(-1, nodes_features.size(2), nodes_features.size(1)))
- nodes_features = nodes_features.view(-1, nodes_features.size(2), nodes_features.size(1))
- nodes_features = self.relu(nodes_features)
- # nodes count from previous hop
- nodes_count = nodes_count_list[1]
- # aggregated representations placeholder, feature dim * 2
- nodes_features_farther = Variable(
- torch.Tensor(nodes_features.size(0), nodes_count.size(1), nodes_features.size(2))).cuda()
- i = 0
- for j in range(nodes_count.size(1)):
- # mean pooling for each father node
- nodes_features_farther[:, j, :] = torch.mean(nodes_features[:, i:i + int(nodes_count[:, j][0]), :], 1,
- keepdim=False)
- i += int(nodes_count[:, j][0])
- # assign node_features
- nodes_features = nodes_features_farther
- nodes_features = self.linear_2_1(nodes_features)
- nodes_features = self.bn_2_1(nodes_features.view(-1, nodes_features.size(2), nodes_features.size(1)))
- nodes_features = nodes_features.view(-1, nodes_features.size(2), nodes_features.size(1))
- # nodes_features = self.relu(nodes_features)
- # nodes count from previous hop
- nodes_features_hop_2 = torch.mean(nodes_features, 1, keepdim=True)
- # print(nodes_features_hop_2.size())
-
- # 1-hop feature
- # nodes original features to representations
- nodes_list[2] = Variable(nodes_list[2]).cuda()
- nodes_list[2] = self.linear_projection(nodes_list[2])
- nodes_features = self.linear_1_0(nodes_list[2])
- nodes_features = self.bn_1_0(nodes_features.view(-1, nodes_features.size(2), nodes_features.size(1)))
- nodes_features = nodes_features.view(-1, nodes_features.size(2), nodes_features.size(1))
- # nodes_features = self.relu(nodes_features)
- # nodes count from previous hop
- nodes_features_hop_1 = torch.mean(nodes_features, 1, keepdim=True)
- # print(nodes_features_hop_1.size())
-
- # own feature
- nodes_list[3] = Variable(nodes_list[3]).cuda()
- nodes_list[3] = self.linear_projection(nodes_list[3])
- nodes_features = self.linear_0_0(nodes_list[3])
- nodes_features = self.bn_0_0(nodes_features.view(-1, nodes_features.size(2), nodes_features.size(1)))
- nodes_features_hop_0 = nodes_features.view(-1, nodes_features.size(2), nodes_features.size(1))
- # print(nodes_features_hop_0.size())
-
- # concatenate
- nodes_features = torch.cat(
- (nodes_features_hop_0, nodes_features_hop_1, nodes_features_hop_2, nodes_features_hop_3), dim=2)
- nodes_features = self.linear(nodes_features)
- # nodes_features = self.bn(nodes_features.view(-1,nodes_features.size(2),nodes_features.size(1)))
- nodes_features = nodes_features.view(-1, nodes_features.size(2), nodes_features.size(1))
- # print(nodes_features.size())
- return (nodes_features)
|