Browse Source

Upload files to ''

master
Zahra Rahaie 1 year ago
parent
commit
ef8a47eb7b
5 changed files with 275 additions and 2 deletions
  1. 10
    2
      README.md
  2. 37
    0
      neuralnetworks.py
  3. 58
    0
      ssl_vae.py
  4. 104
    0
      train_classifier.py
  5. 66
    0
      util.py

+ 10
- 2
README.md View File

# DeepGenePrior

Dependencies
TensorFlow >= 0.8.0 (due to prettytensor, might work with older versions of prettytensor - not tested)
prettytensor
numpy
optionally matplotlib, seaborn for VAE images
Usage
To train latent-feature model (M1) run ssl_vae.py. Parameters set in same file.

+ 37
- 0
neuralnetworks.py View File

###
'''
Similar to M1 from https://github.com/dpkingma/nips14-ssl
Original Author: S. Saemundsson
Edited by: Z. Rahaie
'''
###
import tensorflow as tf
import prettytensor as pt
import numpy as np
import utils
class FullyConnected( object ):
def __init__( self,
dim_output,
hidden_layers,
nonlinearity = tf.nn.softplus,
l2loss = 0.0,
name = 'FullyConnected' ):
self.dim_output = dim_output
self.hidden_layers = hidden_layers
self.nonlinearity = nonlinearity
self.l2loss = l2loss
def output( self, inputs, phase = pt.Phase.train ):
inputs = pt.wrap( inputs )
with pt.defaults_scope( phase = phase, activation_fn = self.nonlinearity, l2loss = self.l2loss ):
for layer in self.hidden_layers:
inputs = inputs.fully_connected( layer )
return inputs.fully_connected( self.dim_output, activation_fn = None )

+ 58
- 0
ssl_vae.py View File

###
'''
Similar to M1 from https://github.com/dpkingma/nips14-ssl
Original Author: S. Saemundsson
Edited by: Z. Rahaie
'''
###
from vae import VariationalAutoencoder
import numpy as np
import data.asd as asd
if __name__ == '__main__':
num_batches = 0
dim_z = 0
epochs = 0
learning_rate = 1e-4
l2_loss = 1e-5
seed = 12345
hidden_layers_px = [ sqrt(input_size), sqrt(hidden_layers_px[0]), sqrt(hidden_layers_px[1])]
hidden_layers_qz = [ sqrt(input_size), sqrt(hidden_layers_px[0]), sqrt(hidden_layers_px[2]) ]
asd_path = ['nds/AutDB_ASD_cnv_dataset.txt']
#Uses anglpy module exists kingma github (linked before) to divide the dataset
train_x, train_y, valid_x, valid_y, test_x, test_y = asd.load_numpy(asd_path, binarize_y=True)
x_train, y_train = train_x.T, train_y.T
x_valid, y_valid = valid_x.T, valid_y.T
x_test, y_test = test_x.T, test_y.T
dim_x = x_train.shape[1]
dim_y = y_train.shape[1]
VAE = VariationalAutoencoder( dim_x = dim_x,
dim_z = dim_z,
hidden_layers_px = hidden_layers_px,
hidden_layers_qz = hidden_layers_qz,
l2_loss = l2_loss )
#every n iterations (set to 0 to disable)
VAE.train( x = x_train, x_valid = x_valid, epochs = epochs, num_batches = num_batches,
learning_rate = learning_rate, seed = seed, stop_iter = 30, print_every = 10, draw_img = 0 )
weights_as_numpy = VAE.get_weights()
output_weights_to_label = weights_as_numpy[6]
scores = weights_as_numpy[0] * weights_as_numpy[1] * weights_as_numpy [2] * output_weights_to_label
genes_index = [i for i,v in enumerate(scores) if v > 0]
print(genes_index)

+ 104
- 0
train_classifier.py View File

###
'''
Similar to M1 from https://github.com/dpkingma/nips14-ssl
Original Author: S. Saemundsson
Edited by: Z. Rahaie
'''
###
from genclass import GenerativeClassifier
from vae import VariationalAutoencoder
import numpy as np
import data.asd as asd
def encode_dataset( model_path, min_std = 0.0 ):
VAE = VariationalAutoencoder( dim_x, dim_z )
with VAE.session:
VAE.saver.restore( VAE.session, VAE_model_path )
enc_x_lab_mean, enc_x_lab_var = VAE.encode( x_lab )
enc_x_ulab_mean, enc_x_ulab_var = VAE.encode( x_ulab )
enc_x_valid_mean, enc_x_valid_var = VAE.encode( x_valid )
enc_x_test_mean, enc_x_test_var = VAE.encode( x_test )
id_x_keep = np.std( enc_x_ulab_mean, axis = 0 ) > min_std
enc_x_lab_mean, enc_x_lab_var = enc_x_lab_mean[ :, id_x_keep ], enc_x_lab_var[ :, id_x_keep ]
enc_x_ulab_mean, enc_x_ulab_var = enc_x_ulab_mean[ :, id_x_keep ], enc_x_ulab_var[ :, id_x_keep ]
enc_x_valid_mean, enc_x_valid_var = enc_x_valid_mean[ :, id_x_keep ], enc_x_valid_var[ :, id_x_keep ]
enc_x_test_mean, enc_x_test_var = enc_x_test_mean[ :, id_x_keep ], enc_x_test_var[ :, id_x_keep ]
data_lab = np.hstack( [ enc_x_lab_mean, enc_x_lab_var ] )
data_ulab = np.hstack( [ enc_x_ulab_mean, enc_x_ulab_var ] )
data_valid = np.hstack( [enc_x_valid_mean, enc_x_valid_var] )
data_test = np.hstack( [enc_x_test_mean, enc_x_test_var] )
return data_lab, data_ulab, data_valid, data_test
if __name__ == '__main__':
num_batches = 100 #Number of minibatches in a single epoch
epochs = 1001 #Number of epochs through the full dataset
learning_rate = 1e-4 #Learning rate of ADAM
alpha = 0.1 #Discriminatory factor (see equation (9) of http://arxiv.org/pdf/1406.5298v2.pdf)
seed = 12345 #Seed for RNG
####################
''' Load Dataset '''
####################
asd_path = ['nds/asd_case.txt', 'nds/asd_control.txt']
#Uses anglpy module from original paper (linked at top) to split the dataset for semi-supervised training
train_x, train_y, valid_x, valid_y, test_x, test_y = mnist.load_numpy_split(mnist_path, binarize_y=True)
x_l, y_l, x_u, y_u = mnist.create_semisupervised(train_x, train_y, num_lab)
x_lab, y_lab = x_l.T, y_l.T
x_ulab, y_ulab = x_u.T, y_u.T
x_valid, y_valid = valid_x.T, valid_y.T
x_test, y_test = test_x.T, test_y.T
################
''' Load VAE '''
################
VAE_model_path = 'temp/mid_training_0.cpkt'
min_std = 0.1 #Dimensions with std < min_std are removed before training with GC
data_lab, data_ulab, data_valid, data_test = encode_dataset( VAE_model_path, min_std )
dim_x = data_lab.shape[1] / 2
dim_y = y_lab.shape[1]
num_examples = data_lab.shape[0] + data_ulab.shape[0]
###################################
''' Train Generative Classifier '''
###################################
GC = GenerativeClassifier( dim_x, dim_z, dim_y,
num_examples, num_lab, num_batches,
hidden_layers_px = hidden_layers_px,
hidden_layers_qz = hidden_layers_qz,
hidden_layers_qy = hidden_layers_qy,
alpha = alpha )
GC.train( x_labelled = data_lab, y = y_lab, x_unlabelled = data_ulab,
x_valid = data_valid, y_valid = y_valid,
epochs = epochs,
learning_rate = learning_rate,
seed = seed,
print_every = 10,
load_path = None )
############################
''' Evaluate on Test Set '''
############################
GC_eval = GenerativeClassifier( dim_x, dim_z, dim_y, num_examples, num_lab, num_batches )
with GC_eval.session:
GC_eval.saver.restore( GC_eval.session, GC.save_path )
GC_eval.predict_labels( data_test, y_test )

+ 66
- 0
util.py View File

###
'''
Similar to M1 from https://github.com/dpkingma/nips14-ssl
Original Author: S. Saemundsson
Edited by: Z. Rahaie
'''
###
import prettytensor as pt
import tensorflow as tf
import numpy as np
logc = np.log(2.*np.pi)
c = - 0.5 * np.log(2*np.pi)
def tf_normal_logpdf(x, mu, log_sigma_sq):
return ( - 0.5 * logc - log_sigma_sq / 2. - tf.div( tf.square( tf.sub( x, mu ) ), 2 * tf.exp( log_sigma_sq ) ) )
def tf_stdnormal_logpdf(x):
return ( - 0.5 * ( logc + tf.square( x ) ) )
def tf_gaussian_ent(log_sigma_sq):
return ( - 0.5 * ( logc + 1.0 + log_sigma_sq ) )
def tf_gaussian_marg(mu, log_sigma_sq):
return ( - 0.5 * ( logc + ( tf.square( mu ) + tf.exp( log_sigma_sq ) ) ) )
def tf_binary_xentropy(x, y, const = 1e-10):
return - ( x * tf.log ( tf.clip_by_value( y, const, 1.0 ) ) + \
(1.0 - x) * tf.log( tf.clip_by_value( 1.0 - y, const, 1.0 ) ) )
def feed_numpy_semisupervised(num_lab_batch, num_ulab_batch, x_lab, y, x_ulab):
size = x_lab.shape[0] + x_ulab.shape[0]
batch_size = num_lab_batch + num_ulab_batch
count = int(size / batch_size)
dim = x_lab.shape[1]
for i in xrange(count):
start_lab = i * num_lab_batch
end_lab = start_lab + num_lab_batch
start_ulab = i * num_ulab_batch
end_ulab = start_ulab + num_ulab_batch
yield [ x_lab[start_lab:end_lab,:dim/2], x_lab[start_lab:end_lab,dim/2:dim], y[start_lab:end_lab],
x_ulab[start_ulab:end_ulab,:dim/2], x_ulab[start_ulab:end_ulab,dim/2:dim] ]
def feed_numpy(batch_size, x):
size = x.shape[0]
count = int(size / batch_size)
dim = x.shape[1]
for i in xrange(count):
start = i * batch_size
end = start + batch_size
yield x[start:end]

Loading…
Cancel
Save