| @@ -1,2 +1,10 @@ | |||
| # DeepGenePrior | |||
| Dependencies | |||
| TensorFlow >= 0.8.0 (due to prettytensor, might work with older versions of prettytensor - not tested) | |||
| prettytensor | |||
| numpy | |||
| optionally matplotlib, seaborn for VAE images | |||
| Usage | |||
| To train latent-feature model (M1) run ssl_vae.py. Parameters set in same file. | |||
| @@ -0,0 +1,37 @@ | |||
| ### | |||
| ''' | |||
| Similar to M1 from https://github.com/dpkingma/nips14-ssl | |||
| Original Author: S. Saemundsson | |||
| Edited by: Z. Rahaie | |||
| ''' | |||
| ### | |||
| import tensorflow as tf | |||
| import prettytensor as pt | |||
| import numpy as np | |||
| import utils | |||
| class FullyConnected( object ): | |||
| def __init__( self, | |||
| dim_output, | |||
| hidden_layers, | |||
| nonlinearity = tf.nn.softplus, | |||
| l2loss = 0.0, | |||
| name = 'FullyConnected' ): | |||
| self.dim_output = dim_output | |||
| self.hidden_layers = hidden_layers | |||
| self.nonlinearity = nonlinearity | |||
| self.l2loss = l2loss | |||
| def output( self, inputs, phase = pt.Phase.train ): | |||
| inputs = pt.wrap( inputs ) | |||
| with pt.defaults_scope( phase = phase, activation_fn = self.nonlinearity, l2loss = self.l2loss ): | |||
| for layer in self.hidden_layers: | |||
| inputs = inputs.fully_connected( layer ) | |||
| return inputs.fully_connected( self.dim_output, activation_fn = None ) | |||
| @@ -0,0 +1,58 @@ | |||
| ### | |||
| ''' | |||
| Similar to M1 from https://github.com/dpkingma/nips14-ssl | |||
| Original Author: S. Saemundsson | |||
| Edited by: Z. Rahaie | |||
| ''' | |||
| ### | |||
| from vae import VariationalAutoencoder | |||
| import numpy as np | |||
| import data.asd as asd | |||
| if __name__ == '__main__': | |||
| num_batches = 0 | |||
| dim_z = 0 | |||
| epochs = 0 | |||
| learning_rate = 1e-4 | |||
| l2_loss = 1e-5 | |||
| seed = 12345 | |||
| hidden_layers_px = [ sqrt(input_size), sqrt(hidden_layers_px[0]), sqrt(hidden_layers_px[1])] | |||
| hidden_layers_qz = [ sqrt(input_size), sqrt(hidden_layers_px[0]), sqrt(hidden_layers_px[2]) ] | |||
| asd_path = ['nds/AutDB_ASD_cnv_dataset.txt'] | |||
| #Uses anglpy module exists kingma github (linked before) to divide the dataset | |||
| train_x, train_y, valid_x, valid_y, test_x, test_y = asd.load_numpy(asd_path, binarize_y=True) | |||
| x_train, y_train = train_x.T, train_y.T | |||
| x_valid, y_valid = valid_x.T, valid_y.T | |||
| x_test, y_test = test_x.T, test_y.T | |||
| dim_x = x_train.shape[1] | |||
| dim_y = y_train.shape[1] | |||
| VAE = VariationalAutoencoder( dim_x = dim_x, | |||
| dim_z = dim_z, | |||
| hidden_layers_px = hidden_layers_px, | |||
| hidden_layers_qz = hidden_layers_qz, | |||
| l2_loss = l2_loss ) | |||
| #every n iterations (set to 0 to disable) | |||
| VAE.train( x = x_train, x_valid = x_valid, epochs = epochs, num_batches = num_batches, | |||
| learning_rate = learning_rate, seed = seed, stop_iter = 30, print_every = 10, draw_img = 0 ) | |||
| weights_as_numpy = VAE.get_weights() | |||
| output_weights_to_label = weights_as_numpy[6] | |||
| scores = weights_as_numpy[0] * weights_as_numpy[1] * weights_as_numpy [2] * output_weights_to_label | |||
| genes_index = [i for i,v in enumerate(scores) if v > 0] | |||
| print(genes_index) | |||
| @@ -0,0 +1,104 @@ | |||
| ### | |||
| ''' | |||
| Similar to M1 from https://github.com/dpkingma/nips14-ssl | |||
| Original Author: S. Saemundsson | |||
| Edited by: Z. Rahaie | |||
| ''' | |||
| ### | |||
| from genclass import GenerativeClassifier | |||
| from vae import VariationalAutoencoder | |||
| import numpy as np | |||
| import data.asd as asd | |||
| def encode_dataset( model_path, min_std = 0.0 ): | |||
| VAE = VariationalAutoencoder( dim_x, dim_z ) | |||
| with VAE.session: | |||
| VAE.saver.restore( VAE.session, VAE_model_path ) | |||
| enc_x_lab_mean, enc_x_lab_var = VAE.encode( x_lab ) | |||
| enc_x_ulab_mean, enc_x_ulab_var = VAE.encode( x_ulab ) | |||
| enc_x_valid_mean, enc_x_valid_var = VAE.encode( x_valid ) | |||
| enc_x_test_mean, enc_x_test_var = VAE.encode( x_test ) | |||
| id_x_keep = np.std( enc_x_ulab_mean, axis = 0 ) > min_std | |||
| enc_x_lab_mean, enc_x_lab_var = enc_x_lab_mean[ :, id_x_keep ], enc_x_lab_var[ :, id_x_keep ] | |||
| enc_x_ulab_mean, enc_x_ulab_var = enc_x_ulab_mean[ :, id_x_keep ], enc_x_ulab_var[ :, id_x_keep ] | |||
| enc_x_valid_mean, enc_x_valid_var = enc_x_valid_mean[ :, id_x_keep ], enc_x_valid_var[ :, id_x_keep ] | |||
| enc_x_test_mean, enc_x_test_var = enc_x_test_mean[ :, id_x_keep ], enc_x_test_var[ :, id_x_keep ] | |||
| data_lab = np.hstack( [ enc_x_lab_mean, enc_x_lab_var ] ) | |||
| data_ulab = np.hstack( [ enc_x_ulab_mean, enc_x_ulab_var ] ) | |||
| data_valid = np.hstack( [enc_x_valid_mean, enc_x_valid_var] ) | |||
| data_test = np.hstack( [enc_x_test_mean, enc_x_test_var] ) | |||
| return data_lab, data_ulab, data_valid, data_test | |||
| if __name__ == '__main__': | |||
| num_batches = 100 #Number of minibatches in a single epoch | |||
| epochs = 1001 #Number of epochs through the full dataset | |||
| learning_rate = 1e-4 #Learning rate of ADAM | |||
| alpha = 0.1 #Discriminatory factor (see equation (9) of http://arxiv.org/pdf/1406.5298v2.pdf) | |||
| seed = 12345 #Seed for RNG | |||
| #################### | |||
| ''' Load Dataset ''' | |||
| #################### | |||
| asd_path = ['nds/asd_case.txt', 'nds/asd_control.txt'] | |||
| #Uses anglpy module from original paper (linked at top) to split the dataset for semi-supervised training | |||
| train_x, train_y, valid_x, valid_y, test_x, test_y = mnist.load_numpy_split(mnist_path, binarize_y=True) | |||
| x_l, y_l, x_u, y_u = mnist.create_semisupervised(train_x, train_y, num_lab) | |||
| x_lab, y_lab = x_l.T, y_l.T | |||
| x_ulab, y_ulab = x_u.T, y_u.T | |||
| x_valid, y_valid = valid_x.T, valid_y.T | |||
| x_test, y_test = test_x.T, test_y.T | |||
| ################ | |||
| ''' Load VAE ''' | |||
| ################ | |||
| VAE_model_path = 'temp/mid_training_0.cpkt' | |||
| min_std = 0.1 #Dimensions with std < min_std are removed before training with GC | |||
| data_lab, data_ulab, data_valid, data_test = encode_dataset( VAE_model_path, min_std ) | |||
| dim_x = data_lab.shape[1] / 2 | |||
| dim_y = y_lab.shape[1] | |||
| num_examples = data_lab.shape[0] + data_ulab.shape[0] | |||
| ################################### | |||
| ''' Train Generative Classifier ''' | |||
| ################################### | |||
| GC = GenerativeClassifier( dim_x, dim_z, dim_y, | |||
| num_examples, num_lab, num_batches, | |||
| hidden_layers_px = hidden_layers_px, | |||
| hidden_layers_qz = hidden_layers_qz, | |||
| hidden_layers_qy = hidden_layers_qy, | |||
| alpha = alpha ) | |||
| GC.train( x_labelled = data_lab, y = y_lab, x_unlabelled = data_ulab, | |||
| x_valid = data_valid, y_valid = y_valid, | |||
| epochs = epochs, | |||
| learning_rate = learning_rate, | |||
| seed = seed, | |||
| print_every = 10, | |||
| load_path = None ) | |||
| ############################ | |||
| ''' Evaluate on Test Set ''' | |||
| ############################ | |||
| GC_eval = GenerativeClassifier( dim_x, dim_z, dim_y, num_examples, num_lab, num_batches ) | |||
| with GC_eval.session: | |||
| GC_eval.saver.restore( GC_eval.session, GC.save_path ) | |||
| GC_eval.predict_labels( data_test, y_test ) | |||
| @@ -0,0 +1,66 @@ | |||
| ### | |||
| ''' | |||
| Similar to M1 from https://github.com/dpkingma/nips14-ssl | |||
| Original Author: S. Saemundsson | |||
| Edited by: Z. Rahaie | |||
| ''' | |||
| ### | |||
| import prettytensor as pt | |||
| import tensorflow as tf | |||
| import numpy as np | |||
| logc = np.log(2.*np.pi) | |||
| c = - 0.5 * np.log(2*np.pi) | |||
| def tf_normal_logpdf(x, mu, log_sigma_sq): | |||
| return ( - 0.5 * logc - log_sigma_sq / 2. - tf.div( tf.square( tf.sub( x, mu ) ), 2 * tf.exp( log_sigma_sq ) ) ) | |||
| def tf_stdnormal_logpdf(x): | |||
| return ( - 0.5 * ( logc + tf.square( x ) ) ) | |||
| def tf_gaussian_ent(log_sigma_sq): | |||
| return ( - 0.5 * ( logc + 1.0 + log_sigma_sq ) ) | |||
| def tf_gaussian_marg(mu, log_sigma_sq): | |||
| return ( - 0.5 * ( logc + ( tf.square( mu ) + tf.exp( log_sigma_sq ) ) ) ) | |||
| def tf_binary_xentropy(x, y, const = 1e-10): | |||
| return - ( x * tf.log ( tf.clip_by_value( y, const, 1.0 ) ) + \ | |||
| (1.0 - x) * tf.log( tf.clip_by_value( 1.0 - y, const, 1.0 ) ) ) | |||
| def feed_numpy_semisupervised(num_lab_batch, num_ulab_batch, x_lab, y, x_ulab): | |||
| size = x_lab.shape[0] + x_ulab.shape[0] | |||
| batch_size = num_lab_batch + num_ulab_batch | |||
| count = int(size / batch_size) | |||
| dim = x_lab.shape[1] | |||
| for i in xrange(count): | |||
| start_lab = i * num_lab_batch | |||
| end_lab = start_lab + num_lab_batch | |||
| start_ulab = i * num_ulab_batch | |||
| end_ulab = start_ulab + num_ulab_batch | |||
| yield [ x_lab[start_lab:end_lab,:dim/2], x_lab[start_lab:end_lab,dim/2:dim], y[start_lab:end_lab], | |||
| x_ulab[start_ulab:end_ulab,:dim/2], x_ulab[start_ulab:end_ulab,dim/2:dim] ] | |||
| def feed_numpy(batch_size, x): | |||
| size = x.shape[0] | |||
| count = int(size / batch_size) | |||
| dim = x.shape[1] | |||
| for i in xrange(count): | |||
| start = i * batch_size | |||
| end = start + batch_size | |||
| yield x[start:end] | |||