from autoencoder import Autoencoder | from autoencoder import Autoencoder | ||||
from evaluation import Evaluation | from evaluation import Evaluation | ||||
from mlp import MLP | from mlp import MLP | ||||
from utils import * | |||||
from utils import MODEL_FOLDER | |||||
class DeepDRA(nn.Module): | class DeepDRA(nn.Module): | ||||
# Apply reverse if specified | # Apply reverse if specified | ||||
predictions = 1 - mlp_output if reverse else mlp_output | predictions = 1 - mlp_output if reverse else mlp_output | ||||
# # Store predictions and ground truth labels | |||||
# all_predictions.extend(predictions.cpu().numpy()) | |||||
# all_labels.extend(labels.cpu().numpy()) | |||||
# Evaluate the predictions using the specified metrics | # Evaluate the predictions using the specified metrics | ||||
result = Evaluation.evaluate(labels, predictions) | result = Evaluation.evaluate(labels, predictions) | ||||
from utils import * | |||||
import pandas as pd | |||||
import numpy as np | |||||
from tqdm import tqdm | |||||
from sklearn.impute import SimpleImputer | |||||
import os | import os | ||||
from utils import DRUG_DATA_FOLDER | |||||
class RawDataLoader: | class RawDataLoader: | ||||
@staticmethod | @staticmethod | ||||
# Step 5: Return the loaded data and adjusted drug screening data | # Step 5: Return the loaded data and adjusted drug screening data | ||||
return data, drug_screen | return data, drug_screen | ||||
@staticmethod | @staticmethod | ||||
def intersect_features(data1, data2): | def intersect_features(data1, data2): | ||||
""" | """ | ||||
return data[0] | return data[0] | ||||
@staticmethod | @staticmethod | ||||
def load_raw_files(raw_file_directory, data_modalities, intersect=True): | def load_raw_files(raw_file_directory, data_modalities, intersect=True): | ||||
raw_dict = {} | raw_dict = {} | ||||
df.columns = df.columns.str.replace('_cell_CN', '') | df.columns = df.columns.str.replace('_cell_CN', '') | ||||
df.columns = df.columns.str.replace('_cell_exp', '') | df.columns = df.columns.str.replace('_cell_exp', '') | ||||
# Note that drug_comp raw table has some NA values so we should impute it | # Note that drug_comp raw table has some NA values so we should impute it | ||||
if any(df.isna()): | if any(df.isna()): | ||||
df = pd.DataFrame(SimpleImputer(strategy='mean').fit_transform(df), | df = pd.DataFrame(SimpleImputer(strategy='mean').fit_transform(df), | ||||
@staticmethod | @staticmethod | ||||
def load_screening_files(filename="AUC_matS_comb.tsv", sep=',', ): | def load_screening_files(filename="AUC_matS_comb.tsv", sep=',', ): | ||||
df = pd.read_csv(filename, sep=sep, index_col=0) | df = pd.read_csv(filename, sep=sep, index_col=0) | ||||
# df = df.drop(['Erlotinib','17-AAG','PD-0325901','PHA-665752','PHA-665752','TAE684','Sorafenib','PLX4720','selumetinib','PD-0332991','Paclitaxel','Nilotinib','Saracatinib'],axis=1) | |||||
return df | return df | ||||
# return pd.read_csv(os.path.join(DATA_FOLDER, "drug_screening_matrix_GDSC.tsv"), sep='\t', index_col=0) | |||||
@staticmethod | @staticmethod | ||||
def adjust_screening_raw(drug_screen, data_dict): | def adjust_screening_raw(drug_screen, data_dict): | ||||
raw_cell_names = [] | raw_cell_names = [] | ||||
raw_drug_names = [] | |||||
for key, value in data_dict.items(): | for key, value in data_dict.items(): | ||||
if 'cell' in key: | if 'cell' in key: | ||||
if len(raw_cell_names) == 0: | if len(raw_cell_names) == 0: | ||||
screening_cell_names = drug_screen.index | screening_cell_names = drug_screen.index | ||||
screening_drug_names = drug_screen.columns | screening_drug_names = drug_screen.columns | ||||
common_cell_names = list(set(raw_cell_names).intersection(set(screening_cell_names))) | common_cell_names = list(set(raw_cell_names).intersection(set(screening_cell_names))) | ||||
common_drug_names = list(set(raw_drug_names).intersection(set(screening_drug_names))) | common_drug_names = list(set(raw_drug_names).intersection(set(screening_drug_names))) | ||||
for key, value in data_dict.items(): | for key, value in data_dict.items(): |
from sklearn.metrics import roc_auc_score, average_precision_score, confusion_matrix, f1_score, precision_score, \ | from sklearn.metrics import roc_auc_score, average_precision_score, confusion_matrix, f1_score, precision_score, \ | ||||
recall_score, accuracy_score | recall_score, accuracy_score | ||||
from matplotlib import pyplot as plt | |||||
from sklearn import metrics | |||||
import numpy as np | |||||
import pandas as pd | |||||
from statsmodels.stats.weightstats import ttest_ind | |||||
from utils import * | |||||
class Evaluation: | class Evaluation: | ||||
@staticmethod | @staticmethod | ||||
def plot_train_val_accuracy(train_accuracies, val_accuracies, num_epochs): | def plot_train_val_accuracy(train_accuracies, val_accuracies, num_epochs): | ||||
plt.xlabel('epoch') | |||||
plt.ylabel('accuracy') | |||||
plt.title('h') | |||||
plt.plot(range(1, num_epochs + 1), train_accuracies) | |||||
plt.plot(range(1, num_epochs + 1), val_accuracies) | |||||
""" | |||||
Plot training and validation accuracies over epochs. | |||||
Parameters: | |||||
- train_accuracies (list): List of training accuracies. | |||||
- val_accuracies (list): List of validation accuracies. | |||||
- num_epochs (int): Number of training epochs. | |||||
Returns: | |||||
- None | |||||
""" | |||||
plt.xlabel('Epoch') | |||||
plt.ylabel('Accuracy') | |||||
plt.title('Training and Validation Accuracies') | |||||
plt.plot(range(1, num_epochs + 1), train_accuracies, label='Train Accuracy') | |||||
plt.plot(range(1, num_epochs + 1), val_accuracies, label='Validation Accuracy') | |||||
plt.legend() | |||||
plt.show() | plt.show() | ||||
@staticmethod | @staticmethod | ||||
def plot_train_val_loss(train_loss, val_loss, num_epochs): | def plot_train_val_loss(train_loss, val_loss, num_epochs): | ||||
plt.xlabel('epoch') | |||||
plt.ylabel('loss') | |||||
plt.title('h') | |||||
plt.plot(range(1, num_epochs + 1), train_loss) | |||||
plt.plot(range(1, num_epochs + 1), val_loss) | |||||
""" | |||||
Plot training and validation losses over epochs. | |||||
Parameters: | |||||
- train_loss (list): List of training losses. | |||||
- val_loss (list): List of validation losses. | |||||
- num_epochs (int): Number of training epochs. | |||||
Returns: | |||||
- None | |||||
""" | |||||
plt.xlabel('Epoch') | |||||
plt.ylabel('Loss') | |||||
plt.title('Training and Validation Losses') | |||||
plt.plot(range(1, num_epochs + 1), train_loss, label='Train Loss') | |||||
plt.plot(range(1, num_epochs + 1), val_loss, label='Validation Loss') | |||||
plt.legend() | |||||
plt.show() | plt.show() | ||||
@staticmethod | @staticmethod | ||||
def evaluate(all_targets, mlp_output, show_plot=True): | def evaluate(all_targets, mlp_output, show_plot=True): | ||||
""" | |||||
Evaluate model performance based on predictions and targets. | |||||
Parameters: | |||||
- all_targets (numpy.ndarray): True target labels. | |||||
- mlp_output (numpy.ndarray): Predicted probabilities. | |||||
- show_plot (bool): Whether to display ROC and PR curves. | |||||
Returns: | |||||
- results (dict): Dictionary containing evaluation metrics. | |||||
""" | |||||
# Step 1: Convert predicted probabilities to binary labels | |||||
predicted_labels = np.where(mlp_output > 0.5, 1, 0) | predicted_labels = np.where(mlp_output > 0.5, 1, 0) | ||||
# Collect predictions and targets for later evaluation | |||||
predicted_labels = predicted_labels.reshape(-1) | predicted_labels = predicted_labels.reshape(-1) | ||||
# Convert predictions and targets to numpy arrays | |||||
all_predictions = predicted_labels | all_predictions = predicted_labels | ||||
# Calculate and print AUC | |||||
# Step 2: Calculate and print AUC | |||||
fpr, tpr, thresholds = metrics.roc_curve(all_targets, mlp_output) | fpr, tpr, thresholds = metrics.roc_curve(all_targets, mlp_output) | ||||
auc = np.round(metrics.auc(fpr, tpr), 2) | auc = np.round(metrics.auc(fpr, tpr), 2) | ||||
# Calculate and print AUPRC | |||||
print(all_targets) | |||||
# Step 3: Calculate and print AUPRC | |||||
precision, recall, thresholds = metrics.precision_recall_curve(all_targets, mlp_output) | precision, recall, thresholds = metrics.precision_recall_curve(all_targets, mlp_output) | ||||
auprc = np.round(metrics.auc(recall, precision), 2) | auprc = np.round(metrics.auc(recall, precision), 2) | ||||
# auprc = average_precision_score(all_targets, mlp_output) | |||||
print('Accuracy: {:.2f}'.format(np.round(accuracy_score(all_targets, all_predictions), 2))) | |||||
print('AUC: {:.2f}'.format(auc)) | |||||
print('AUPRC: {:.2f}'.format(auprc)) | |||||
# Calculate and print confusion matrix | |||||
# Step 4: Print accuracy, AUC, AUPRC, and confusion matrix | |||||
accuracy = accuracy_score(all_targets, all_predictions) | |||||
cm = confusion_matrix(all_targets, all_predictions) | cm = confusion_matrix(all_targets, all_predictions) | ||||
accuracy = cm.trace() / np.sum(cm) | |||||
precision = cm[0, 0] / (cm[0, 0] + cm[0, 1]) | precision = cm[0, 0] / (cm[0, 0] + cm[0, 1]) | ||||
recall = cm[0, 0] / (cm[0, 0] + cm[1, 0]) | recall = cm[0, 0] / (cm[0, 0] + cm[1, 0]) | ||||
f1_score = 2 * precision * recall / (precision + recall) | f1_score = 2 * precision * recall / (precision + recall) | ||||
print('Confusion matrix:\n', cm, sep='') | |||||
print(f'Accuracy: {accuracy:.3f}, Precision: {precision:.3f}, Recall: {recall:.3f}, F1 score: {f1_score:.3f}') | |||||
print(f'Accuracy: {accuracy:.2f}') | |||||
print(f'AUC: {auc:.2f}') | |||||
print(f'AUPRC: {auprc:.2f}') | |||||
print(f'Confusion matrix:\n{cm}') | |||||
print(f'Precision: {precision:.3f}, Recall: {recall:.3f}, F1 score: {f1_score:.3f}') | |||||
# Step 5: Display ROC and PR curves if requested | |||||
if show_plot: | if show_plot: | ||||
plt.xlabel('False Positive Rate') | plt.xlabel('False Positive Rate') | ||||
plt.ylabel('True Positive Rate') | plt.ylabel('True Positive Rate') | ||||
plt.title(f'ROC Curve: AUC={auc}') | plt.title(f'ROC Curve: AUC={auc}') | ||||
plt.plot(fpr, tpr) | plt.plot(fpr, tpr) | ||||
plt.show() | plt.show() | ||||
# print(f'AUC: {auc}') | |||||
plt.xlabel('Recall') | plt.xlabel('Recall') | ||||
plt.ylabel('Precision') | plt.ylabel('Precision') | ||||
plt.plot(recall, precision) | plt.plot(recall, precision) | ||||
plt.show() | plt.show() | ||||
# Violin plot for DeepDRA scores | |||||
prediction_targets = pd.DataFrame({}, columns=['Prediction', 'Target']) | prediction_targets = pd.DataFrame({}, columns=['Prediction', 'Target']) | ||||
res = pd.concat( | res = pd.concat( | ||||
res.columns = prediction_targets.columns | res.columns = prediction_targets.columns | ||||
prediction_targets = pd.concat([prediction_targets, res]) | prediction_targets = pd.concat([prediction_targets, res]) | ||||
class_one = prediction_targets.loc[prediction_targets['Target'] == 0, 'Prediction'].astype( | |||||
np.float32).tolist() | |||||
class_minus_one = prediction_targets.loc[prediction_targets['Target'] == 1, 'Prediction'].astype( | |||||
np.float32).tolist() | |||||
class_one = prediction_targets.loc[prediction_targets['Target'] == 0, 'Prediction'] | |||||
class_minus_one = prediction_targets.loc[prediction_targets['Target'] == 1, 'Prediction'] | |||||
fig, ax = plt.subplots() | fig, ax = plt.subplots() | ||||
ax.set_ylabel("DeepDRA score") | ax.set_ylabel("DeepDRA score") | ||||
p_value = np.format_float_scientific(ttest_ind(class_one, class_minus_one)[1]) | p_value = np.format_float_scientific(ttest_ind(class_one, class_minus_one)[1]) | ||||
cancer = 'all' | cancer = 'all' | ||||
plt.title( | plt.title( | ||||
f'Responder/Non responder scores for {cancer} cancer with \np-value ~= {p_value[0]}e{p_value[-3:]} ') | |||||
f'Responder/Non-responder scores for {cancer} cancer with \np-value ~= {p_value[0]}e{p_value[-3:]} ') | |||||
bp = ax.violinplot(data_to_plot, showextrema=True, showmeans=True, showmedians=True) | bp = ax.violinplot(data_to_plot, showextrema=True, showmeans=True, showmedians=True) | ||||
bp['cmeans'].set_color('r') | bp['cmeans'].set_color('r') | ||||
bp['cmedians'].set_color('g') | bp['cmedians'].set_color('g') | ||||
plt.show() | plt.show() | ||||
# Step 6: Return evaluation metrics in a dictionary | |||||
return {'Accuracy': accuracy, 'Precision': precision, 'Recall': recall, 'F1 score': f1_score, 'AUC': auc, | return {'Accuracy': accuracy, 'Precision': precision, 'Recall': recall, 'F1 score': f1_score, 'AUC': auc, | ||||
'AUPRC': auprc} | 'AUPRC': auprc} | ||||
from data_loader import RawDataLoader | from data_loader import RawDataLoader | ||||
from evaluation import Evaluation | from evaluation import Evaluation | ||||
from utils import * | from utils import * | ||||
from mlp import MLP | |||||
import random | import random | ||||
import torch | import torch | ||||
import numpy as np | import numpy as np | ||||
import pandas as pd | |||||
def train_DeepDRA(x_cell_train, x_cell_test, x_drug_train, x_drug_test, y_train, y_test, cell_sizes, drug_sizes): | def train_DeepDRA(x_cell_train, x_cell_test, x_drug_train, x_drug_test, y_train, y_test, cell_sizes, drug_sizes): | ||||
train(model, train_loader, num_epochs=num_epochs) | train(model, train_loader, num_epochs=num_epochs) | ||||
# Step 7: Save the trained model | # Step 7: Save the trained model | ||||
torch.save(model, 'DeepDRA.pth') | |||||
torch.save(model, MODEL_FOLDER + 'DeepDRA.pth') | |||||
# Step 8: Load the saved model | # Step 8: Load the saved model | ||||
model = torch.load('DeepDRA.pth') | |||||
model = torch.load( MODEL_FOLDER + 'DeepDRA.pth') | |||||
# Step 9: Convert your test data to PyTorch tensors | # Step 9: Convert your test data to PyTorch tensors | ||||
x_cell_test_tensor = torch.Tensor(x_cell_test.values) | x_cell_test_tensor = torch.Tensor(x_cell_test.values) |
from torch import optim, no_grad | from torch import optim, no_grad | ||||
import torch | import torch | ||||
from evaluation import Evaluation | from evaluation import Evaluation | ||||
from utils import data_modalities_abbreviation | |||||
class EarlyStopper: | class EarlyStopper: | ||||
def __init__(self, patience=1, min_delta=0): | def __init__(self, patience=1, min_delta=0): | ||||
self.patience = patience | self.patience = patience |
import os | import os | ||||
import pandas as pd | |||||
import numpy as np | |||||
from tqdm import tqdm | |||||
import sklearn as sk | |||||
from matplotlib import pyplot as plt | |||||
from scipy.spatial.distance import pdist, squareform | |||||
import h2o | |||||
from h2o.estimators import H2ODeepLearningEstimator | |||||
from sklearn.impute import SimpleImputer | |||||
import torch | |||||
import torch.nn as nn | |||||
import torch.optim as optim | |||||
from torch.utils.data import DataLoader, TensorDataset | |||||
import pickle | |||||
from sklearn import metrics | |||||
from copy import deepcopy | |||||
import pyreadr | |||||
import requests | |||||
from time import time | |||||
from math import ceil | |||||
from statsmodels.stats.weightstats import ttest_ind | |||||
import torch.optim.lr_scheduler as lr_scheduler | |||||
from sklearn.model_selection import KFold | |||||
DATA_FOLDER = 'data' | DATA_FOLDER = 'data' | ||||
RES_DATA_FOLDER = os.path.join(DATA_FOLDER, 'res') | |||||
TEST_DATA_FOLDER = os.path.join(DATA_FOLDER, 'final_test_data') | |||||
TEST_TCGA_DATA_FOLDER = os.path.join(DATA_FOLDER, 'TCGA_test_data') | TEST_TCGA_DATA_FOLDER = os.path.join(DATA_FOLDER, 'TCGA_test_data') | ||||
SIM_DATA_FOLDER = os.path.join(DATA_FOLDER, 'similarity_data') | |||||
RAW_DATA_FOLDER = os.path.join(DATA_FOLDER, 'raw_data') | |||||
RAW_BOTH_DATA_FOLDER = os.path.join(DATA_FOLDER, 'CTRP_GDSC_Data') | |||||
RAW_BOTH_DATA_FOLDER = os.path.join(DATA_FOLDER, 'CTRP_GDSC_data') | |||||
DRUG_DATA_FOLDER = os.path.join(DATA_FOLDER, 'drug_data') | DRUG_DATA_FOLDER = os.path.join(DATA_FOLDER, 'drug_data') | ||||
NEW_RAW_DATA_FOLDER = os.path.join(DATA_FOLDER, 'new_raw_data') | |||||
GDSC_RAW_DATA_FOLDER = os.path.join(DATA_FOLDER, 'GDSC_data') | GDSC_RAW_DATA_FOLDER = os.path.join(DATA_FOLDER, 'GDSC_data') | ||||
CCLE_RAW_DATA_FOLDER = os.path.join(DATA_FOLDER, 'CCLE_raw') | |||||
CCLE_RAW_DATA_FOLDER = os.path.join(DATA_FOLDER, 'CCLE_data') | |||||
GDSC_SCREENING_DATA_FOLDER = os.path.join(GDSC_RAW_DATA_FOLDER, 'drug_screening_matrix_GDSC.tsv') | |||||
CCLE_SCREENING_DATA_FOLDER = os.path.join(CCLE_RAW_DATA_FOLDER, 'drug_screening_matrix_ccle.tsv') | |||||
BOTH_SCREENING_DATA_FOLDER = os.path.join(RAW_BOTH_DATA_FOLDER, 'drug_screening_matrix_gdsc_ctrp.tsv') | |||||
CTRP_FOLDER = os.path.join(DATA_FOLDER, 'CTRP') | CTRP_FOLDER = os.path.join(DATA_FOLDER, 'CTRP') | ||||
GDSC_FOLDER = os.path.join(DATA_FOLDER, 'GDSC') | GDSC_FOLDER = os.path.join(DATA_FOLDER, 'GDSC') | ||||
MODEL_FOLDER = os.path.join(DATA_FOLDER, 'model') | MODEL_FOLDER = os.path.join(DATA_FOLDER, 'model') | ||||
CTRP_EXPERIMENT_FILE = os.path.join(CTRP_FOLDER, 'v20.meta.per_experiment.txt') | |||||
CTRP_COMPOUND_FILE = os.path.join(CTRP_FOLDER, 'v20.meta.per_compound.txt') | |||||
CTRP_CELLLINE_FILE = os.path.join(CTRP_FOLDER, 'v20.meta.per_cell_line.txt') | |||||
CTRP_AUC_FILE = os.path.join(CTRP_FOLDER, 'v20.data.curves_post_qc.txt') | |||||
GDSC_AUC_FILE = os.path.join(GDSC_FOLDER, 'GDSC2_fitted_dose_response.csv') | |||||
GDSC_cnv_data_FILE = os.path.join(GDSC_FOLDER, 'cnv_abs_copy_number_picnic_20191101.csv') | |||||
GDSC_methy_data_FILE = os.path.join(GDSC_FOLDER, 'F2_METH_CELL_DATA.txt') | |||||
GDSC_methy_sampleIds_FILE = os.path.join(GDSC_FOLDER, 'methSampleId_2_cosmicIds.xlsx') | |||||
GDSC_exp_data_FILE = os.path.join(GDSC_FOLDER, 'Cell_line_RMA_proc_basalExp.txt') | |||||
GDSC_exp_sampleIds_FILE = os.path.join(GDSC_FOLDER, 'E-MTAB-3610.sdrf.txt') | |||||
GDSC_mut_data_FILE = os.path.join(GDSC_FOLDER, 'mutations_all_20230202.csv') | |||||
GDSC_SCREENING_DATA_FOLDER = os.path.join(GDSC_RAW_DATA_FOLDER, 'drug_screening_matrix_GDSC.tsv') | |||||
CCLE_SCREENING_DATA_FOLDER = os.path.join(CCLE_RAW_DATA_FOLDER, 'drug_screening_matrix_ccle.tsv') | |||||
BOTH_SCREENING_DATA_FOLDER = os.path.join(RAW_BOTH_DATA_FOLDER, 'drug_screening_matrix_gdsc_ctrp.tsv') | |||||
CCLE_mut_data_FILE = os.path.join(CCLE_FOLDER, 'CCLE_mutations.csv') | |||||
TABLE_RESULTS_FILE = os.path.join(DATA_FOLDER, 'drug_screening_table.tsv') | |||||
MATRIX_RESULTS_FILE = os.path.join(DATA_FOLDER, 'drug_screening_matrix.tsv') | |||||
MODEL_FILE = os.path.join(MODEL_FOLDER, 'trained_model_V1_EMDP.sav') | |||||
TEST_FILE = os.path.join(TEST_DATA_FOLDER, 'test.gzip') | |||||
RESULT_FILE = os.path.join(RES_DATA_FOLDER, 'result.tsv') | |||||
TCGA_DATA_FOLDER = os.path.join(DATA_FOLDER, 'TCGA_test_data') | |||||
TCGA_DATA_FOLDER = os.path.join(DATA_FOLDER, 'TCGA_data') | |||||
TCGA_SCREENING_DATA = os.path.join(TCGA_DATA_FOLDER, 'TCGA_screening_matrix.tsv') | TCGA_SCREENING_DATA = os.path.join(TCGA_DATA_FOLDER, 'TCGA_screening_matrix.tsv') | ||||
BUILD_SIM_MATRICES = True # Make this variable True to build similarity matrices from raw data | BUILD_SIM_MATRICES = True # Make this variable True to build similarity matrices from raw data | ||||
RANDOM_SEED = 42 # Must be used wherever can be used | RANDOM_SEED = 42 # Must be used wherever can be used | ||||
def data_modalities_abbreviation(): | |||||
abb = [] | |||||
if 'cell_CN' in DATA_MODALITIES: | |||||
abb.append('C') | |||||
if 'cell_exp' in DATA_MODALITIES: | |||||
abb.append('E') | |||||
if 'cell_mut' in DATA_MODALITIES: | |||||
abb.append('M') | |||||
if 'cell_methy' in DATA_MODALITIES: | |||||
abb.append('T') | |||||
if 'drug_DT' in DATA_MODALITIES: | |||||
abb.append('D') | |||||
if 'drug_comp' in DATA_MODALITIES: | |||||
abb.append('P') | |||||
return ''.join(abb) | |||||
""" TRAIN_INTEGRATION_METHOD used for each cell's and drug_data's data definitions: | |||||
SIMILARITY: A kernel based integration method in which based on the similarity of each cell's data with the training cell's | |||||
data the input features for the multi layer perceptron (MLP) is constructed. The similarity function used could be different for | |||||
each data modality (euclidean, jaccard,l1_norm, or ...) | |||||
AUTO_ENCODER_V1: In this version of integrating multi-omics, for each data modality an autoencoder is trained to reduce the | |||||
dimension of the features and finally a concatenation of each autoencoder's latent space builds up the input layer of the MLP. | |||||
AUTO_ENCODER_V2: In this version of integrating multi-omics data, we train a big autoencoder which reduces the dimension of | |||||
all the different data modalities features at the same time to a smaller feature space. This version of integrating could | |||||
take a lot of memory and time to integrate the data and might be computationally expensive. | |||||
AUTO_ENCODER_V3: IN this version of integrating multi-omics data, we train an autoencoder for all the modalities kinda same as | |||||
the autoencoder version 2 but with this difference that the encoder and decoder layers are separate from each other and | |||||
just the latent layer is shared among different data modalities. | |||||
""" |