@@ -170,7 +170,7 @@ def train(model, train_loader, val_loader, num_epochs,class_weights): | |||
mlp_loss_val = mlp_weight * mlp_loss_fn(mlp_output_val, val_target) | |||
# Total loss is the sum of autoencoder losses and MLP loss | |||
total_val_loss = drug_ae_loss_val + cell_ae_loss_val + mlp_loss_val | |||
total_val_loss = (drug_ae_loss_val + cell_ae_loss_val + mlp_loss_val).item() | |||
# Calculate accuracy | |||
val_predictions = torch.round(mlp_output_val) |
@@ -134,7 +134,6 @@ class RawDataLoader: | |||
df.columns = df.columns.str.replace('_cell_mut', '') | |||
df.columns = df.columns.str.replace('_cell_CN', '') | |||
df.columns = df.columns.str.replace('_cell_exp', '') | |||
df = (df - df.min()) / (df.max() - df.min()) | |||
df = df.fillna(0) | |||
if intersect: |
@@ -161,8 +161,8 @@ def run(k, is_test=False ): | |||
# Step 2: Load training data | |||
train_data, train_drug_screen = RawDataLoader.load_data(data_modalities=DATA_MODALITIES, | |||
raw_file_directory=GDSC_RAW_DATA_FOLDER, | |||
screen_file_directory=GDSC_SCREENING_DATA_FOLDER, | |||
raw_file_directory=RAW_BOTH_DATA_FOLDER, | |||
screen_file_directory=BOTH_SCREENING_DATA_FOLDER, | |||
sep="\t") | |||
@@ -170,10 +170,15 @@ def run(k, is_test=False ): | |||
if is_test: | |||
test_data, test_drug_screen = RawDataLoader.load_data(data_modalities=DATA_MODALITIES, | |||
raw_file_directory=CCLE_RAW_DATA_FOLDER, | |||
screen_file_directory=CTRP_SCREENING_DATA_FOLDER, | |||
screen_file_directory=CCLE_SCREENING_DATA_FOLDER, | |||
sep="\t") | |||
train_data, test_data = RawDataLoader.data_features_intersect(train_data, test_data) | |||
# common_columns = list(set(train_drug_screen.columns) & set(test_drug_screen.columns)) | |||
# | |||
# train_drug_screen.drop(common_columns[1:100], axis=1, inplace=True) | |||
# test_drug_screen = test_drug_screen[common_columns[1:100]] | |||
# Step 4: Prepare input data for training | |||
x_cell_train, x_drug_train, y_train, cell_sizes, drug_sizes = RawDataLoader.prepare_input_data(train_data, | |||
@@ -226,4 +231,4 @@ if __name__ == '__main__': | |||
torch.manual_seed(RANDOM_SEED) | |||
random.seed(RANDOM_SEED) | |||
np.random.seed(RANDOM_SEED) | |||
run(30, is_test=False) | |||
run(10, is_test=True) |
@@ -31,7 +31,7 @@ SIM_KERNEL = {'cell_CN': ('euclidean', 0.001), 'cell_exp': ('euclidean', 0.01), | |||
SAVE_MODEL = False # Change it to True to save the trained model | |||
VARIATIONAL_AUTOENCODERS = False | |||
# DATA_MODALITIES=['cell_CN','cell_exp','cell_methy','cell_mut','drug_comp','drug_DT'] # Change this list to only consider specific data modalities | |||
DATA_MODALITIES = ['cell_exp', 'drug_desc','drug_finger'] | |||
DATA_MODALITIES = ['cell_CN','cell_exp','cell_methy','cell_mut', 'drug_desc','drug_finger'] | |||
RANDOM_SEED = 42 # Must be used wherever can be used | |||