You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

utils.py 1.9KB

12345678910111213141516171819202122232425262728293031323334353637
  1. import os
  2. DATA_FOLDER = 'data'
  3. TEST_TCGA_DATA_FOLDER = os.path.join(DATA_FOLDER, 'TCGA_test_data')
  4. RAW_BOTH_DATA_FOLDER = os.path.join(DATA_FOLDER, 'CTRP_GDSC_data')
  5. DRUG_DATA_FOLDER = os.path.join(DATA_FOLDER, 'drug_data')
  6. GDSC_RAW_DATA_FOLDER = os.path.join(DATA_FOLDER, 'GDSC_data')
  7. CCLE_RAW_DATA_FOLDER = os.path.join(DATA_FOLDER, 'CCLE_data')
  8. CTRP_RAW_DATA_FOLDER = os.path.join(DATA_FOLDER, 'CTRP_data')
  9. GDSC_SCREENING_DATA_FOLDER = os.path.join(GDSC_RAW_DATA_FOLDER, 'drug_screening_matrix_GDSC.tsv')
  10. CCLE_SCREENING_DATA_FOLDER = os.path.join(CCLE_RAW_DATA_FOLDER, 'drug_screening_matrix_ccle.tsv')
  11. CTRP_SCREENING_DATA_FOLDER = os.path.join(CTRP_RAW_DATA_FOLDER, 'drug_screening_matrix_ctrp.tsv')
  12. BOTH_SCREENING_DATA_FOLDER = os.path.join(RAW_BOTH_DATA_FOLDER, 'drug_screening_matrix_gdsc_ctrp.tsv')
  13. CTRP_FOLDER = os.path.join(DATA_FOLDER, 'CTRP')
  14. GDSC_FOLDER = os.path.join(DATA_FOLDER, 'GDSC')
  15. CCLE_FOLDER = os.path.join(DATA_FOLDER, 'CCLE')
  16. MODEL_FOLDER = os.path.join(DATA_FOLDER, 'model')
  17. TCGA_DATA_FOLDER = os.path.join(DATA_FOLDER, 'TCGA_data')
  18. TCGA_SCREENING_DATA = os.path.join(TCGA_DATA_FOLDER, 'TCGA_screening_matrix.tsv')
  19. BUILD_SIM_MATRICES = True # Make this variable True to build similarity matrices from raw data
  20. SIM_KERNEL = {'cell_CN': ('euclidean', 0.001), 'cell_exp': ('euclidean', 0.01), 'cell_methy': ('euclidean', 0.1),
  21. 'cell_mut': ('jaccard', 1), 'drug_DT': ('jaccard', 1), 'drug_comp': ('euclidean', 0.001),
  22. 'drug_desc': ('euclidean', 0.001), 'drug_finger': ('euclidean', 0.001)}
  23. SAVE_MODEL = False # Change it to True to save the trained model
  24. VARIATIONAL_AUTOENCODERS = False
  25. # DATA_MODALITIES=['cell_CN','cell_exp','cell_methy','cell_mut','drug_comp','drug_DT'] # Change this list to only consider specific data modalities
  26. DATA_MODALITIES = ['cell_CN','cell_exp','cell_mut', 'drug_desc']
  27. RANDOM_SEED = 42 # Must be used wherever can be used