Browse Source

add predictor base codes

main
MahsaYazdani 2 years ago
parent
commit
8f53af3ca6

+ 1
- 1
.gitignore View File

drug/data/DDI/SNAP Stanford/ChCh-Miner_durgbank-chem-chem.tsv drug/data/DDI/SNAP Stanford/ChCh-Miner_durgbank-chem-chem.tsv
cell/data/DTI/SNAP Stanford/ChG-Miner_miner-chem-gene.tsv cell/data/DTI/SNAP Stanford/ChG-Miner_miner-chem-gene.tsv
cell/data/DTI/SNAP Stanford/ChG-Miner_miner-chem-gene.tsv.gz cell/data/DTI/SNAP Stanford/ChG-Miner_miner-chem-gene.tsv.gz
drug/data/Smiles/drugbank_all_structure_links.csv.zip
drug/data/Smiles/drugbank_all_structure_links.csv.zip

+ 3
- 3
Drug/data/DrugExternalID2DrugBankID.txt View File

ERLOTINIB DB00530 ERLOTINIB DB00530
ETOPOSIDE DB00773 ETOPOSIDE DB00773
GELDANAMYCIN DB02424 GELDANAMYCIN DB02424
GEMCITABINE DB00441
L778123 15 ??????????????????????
GEMCITABINE DB00441
L778123 DB07227
LAPATINIB DB01259 LAPATINIB DB01259
METFORMIN DB00331 METFORMIN DB00331
METHOTREXATE DB00563 METHOTREXATE DB00563
MRK-003 DB17015 MRK-003 DB17015
OXALIPLATIN DB00526 OXALIPLATIN DB00526
PACLITAXEL DB01229 PACLITAXEL DB01229
PD325901 29 ?????????????????????
PD325901 DB07101
SN-38 DB05482 SN-38 DB05482
SORAFENIB DB00398 SORAFENIB DB00398
SUNITINIB DB01268 SUNITINIB DB01268

+ 21
- 0
predictor/const.py View File

import os

PROJ_DIR = os.path.dirname(os.path.abspath(os.path.dirname(__file__)))
SUB_PROJ_DIR = os.path.join(PROJ_DIR, 'predictor')
DATA_DIR = os.path.join(SUB_PROJ_DIR, 'data')
DRUG_DATA_DIR = os.path.join(PROJ_DIR, 'drug', 'data')
CELL_DATA_DIR = os.path.join(PROJ_DIR, 'cell', 'data')
OUTPUT_DIR = 'output'
if not os.path.exists(OUTPUT_DIR):
os.makedirs(OUTPUT_DIR)

SYNERGY_FILE = os.path.join(DATA_DIR, 'synergy.tsv')

DRUG_FEAT_FILE = os.path.join(DRUG_DATA_DIR, 'drug_feat.npy')
DRUG2ID_FILE = os.path.join(DRUG_DATA_DIR, 'drug2id.tsv')
CELL_FEAT_FILE = os.path.join(CELL_DATA_DIR, 'cell_feat.npy')
CELL2ID_FILE = os.path.join(CELL_DATA_DIR, 'cell2id.tsv')





+ 204
- 0
predictor/cross_validation.py View File

import argparse
import os
import logging
import time
import pickle
import torch
import torch.nn as nn

from datetime import datetime

time_str = str(datetime.now().strftime('%y%m%d%H%M'))

from model.datasets import FastSynergyDataset, FastTensorDataLoader
from model.models import DNN
from model.utils import save_args, save_best_model, find_best_model, arg_min, random_split_indices, calc_stat, conf_inv
from const import SYNERGY_FILE, DRUG2ID_FILE, DRUG_FEAT_FILE, CELL_FEAT_FILE, CELL2ID_FILE, OUTPUT_DIR

def eval_model(model, optimizer, loss_func, train_data, test_data,
batch_size, n_epoch, patience, gpu_id, mdl_dir):
tr_indices, es_indices = random_split_indices(len(train_data), test_rate=0.1)
train_loader = FastTensorDataLoader(*train_data.tensor_samples(tr_indices), batch_size=batch_size, shuffle=True)
valid_loader = FastTensorDataLoader(*train_data.tensor_samples(es_indices), batch_size=len(es_indices) // 4)
test_loader = FastTensorDataLoader(*test_data.tensor_samples(), batch_size=len(test_data) // 4)
train_model(model, optimizer, loss_func, train_loader, valid_loader, n_epoch, patience, gpu_id,
sl=True, mdl_dir=mdl_dir)
test_loss = eval_epoch(model, test_loader, loss_func, gpu_id)
test_loss /= len(test_data)
return test_loss


def step_batch(model, batch, loss_func, gpu_id=None, train=True):
drug1_feats, drug2_feats, cell_feats, y_true = batch
if gpu_id is not None:
drug1_feats, drug2_feats, cell_feats, y_true = drug1_feats.cuda(gpu_id), drug2_feats.cuda(gpu_id), \
cell_feats.cuda(gpu_id), y_true.cuda(gpu_id)
if train:
y_pred = model(drug1_feats, drug2_feats, cell_feats)
else:
yp1 = model(drug1_feats, drug2_feats, cell_feats)
yp2 = model(drug2_feats, drug1_feats, cell_feats)
y_pred = (yp1 + yp2) / 2
loss = loss_func(y_pred, y_true)
return loss


def train_epoch(model, loader, loss_func, optimizer, gpu_id=None):
model.train()
epoch_loss = 0
for _, batch in enumerate(loader):
optimizer.zero_grad()
loss = step_batch(model, batch, loss_func, gpu_id)
loss.backward()
optimizer.step()
epoch_loss += loss.item()
return epoch_loss


def eval_epoch(model, loader, loss_func, gpu_id=None):
model.eval()
with torch.no_grad():
epoch_loss = 0
for batch in loader:
loss = step_batch(model, batch, loss_func, gpu_id, train=False)
epoch_loss += loss.item()
return epoch_loss


def train_model(model, optimizer, loss_func, train_loader, valid_loader, n_epoch, patience, gpu_id,
sl=False, mdl_dir=None):
min_loss = float('inf')
angry = 0
for epoch in range(1, n_epoch + 1):
trn_loss = train_epoch(model, train_loader, loss_func, optimizer, gpu_id)
trn_loss /= train_loader.dataset_len
val_loss = eval_epoch(model, valid_loader, loss_func, gpu_id)
val_loss /= valid_loader.dataset_len
if val_loss < min_loss:
angry = 0
min_loss = val_loss
if sl:
# TODO: save best model in case of need
save_best_model(model.state_dict(), mdl_dir, epoch, keep=1)
pass
else:
angry += 1
if angry >= patience:
break
if sl:
model.load_state_dict(torch.load(find_best_model(mdl_dir)))
return min_loss


def create_model(data, hidden_size, gpu_id=None):
# TODO: use our own MLP model
model = DNN(data.cell_feat_len() + 2 * data.drug_feat_len(), hidden_size)
if gpu_id is not None:
model = model.cuda(gpu_id)
return model


def cv(args, out_dir):
save_args(args, os.path.join(out_dir, 'args.json'))
test_loss_file = os.path.join(out_dir, 'test_loss.pkl')

if torch.cuda.is_available() and (args.gpu is not None):
gpu_id = args.gpu
else:
gpu_id = None

n_folds = 5
n_delimiter = 60
loss_func = nn.MSELoss(reduction='sum')
test_losses = []
for test_fold in range(n_folds):
outer_trn_folds = [x for x in range(n_folds) if x != test_fold]
logging.info("Outer: train folds {}, test folds {}".format(outer_trn_folds, test_fold))
logging.info("-" * n_delimiter)
param = []
losses = []
for hs in args.hidden:
for lr in args.lr:
param.append((hs, lr))
logging.info("Hidden size: {} | Learning rate: {}".format(hs, lr))
ret_vals = []
for valid_fold in outer_trn_folds:
inner_trn_folds = [x for x in outer_trn_folds if x != valid_fold]
valid_folds = [valid_fold]
train_data = FastSynergyDataset(DRUG2ID_FILE, CELL2ID_FILE, DRUG_FEAT_FILE, CELL_FEAT_FILE,
SYNERGY_FILE, use_folds=inner_trn_folds)
valid_data = FastSynergyDataset(DRUG2ID_FILE, CELL2ID_FILE, DRUG_FEAT_FILE, CELL_FEAT_FILE,
SYNERGY_FILE, use_folds=valid_folds, train=False)
train_loader = FastTensorDataLoader(*train_data.tensor_samples(), batch_size=args.batch,
shuffle=True)
valid_loader = FastTensorDataLoader(*valid_data.tensor_samples(), batch_size=len(valid_data) // 4)
model = create_model(train_data, hs, gpu_id)
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
logging.info(
"Start inner loop: train folds {}, valid folds {}".format(inner_trn_folds, valid_folds))
ret = train_model(model, optimizer, loss_func, train_loader, valid_loader,
args.epoch, args.patience, gpu_id, sl=False)
ret_vals.append(ret)
del model

inner_loss = sum(ret_vals) / len(ret_vals)
logging.info("Inner loop completed. Mean valid loss: {:.4f}".format(inner_loss))
logging.info("-" * n_delimiter)
losses.append(inner_loss)
torch.cuda.empty_cache()
time.sleep(10)
min_ls, min_idx = arg_min(losses)
best_hs, best_lr = param[min_idx]
train_data = FastSynergyDataset(DRUG2ID_FILE, CELL2ID_FILE, DRUG_FEAT_FILE, CELL_FEAT_FILE,
SYNERGY_FILE, use_folds=outer_trn_folds)
test_data = FastSynergyDataset(DRUG2ID_FILE, CELL2ID_FILE, DRUG_FEAT_FILE, CELL_FEAT_FILE,
SYNERGY_FILE, use_folds=[test_fold], train=False)
model = create_model(train_data, best_hs, gpu_id)
optimizer = torch.optim.Adam(model.parameters(), lr=best_lr)

logging.info("Best hidden size: {} | Best learning rate: {}".format(best_hs, best_lr))
logging.info("Start test on fold {}.".format([test_fold]))
test_mdl_dir = os.path.join(out_dir, str(test_fold))
if not os.path.exists(test_mdl_dir):
os.makedirs(test_mdl_dir)
test_loss = eval_model(model, optimizer, loss_func, train_data, test_data,
args.batch, args.epoch, args.patience, gpu_id, test_mdl_dir)
test_losses.append(test_loss)
logging.info("Test loss: {:.4f}".format(test_loss))
logging.info("*" * n_delimiter + '\n')
logging.info("CV completed")
with open(test_loss_file, 'wb') as f:
pickle.dump(test_losses, f)
mu, sigma = calc_stat(test_losses)
logging.info("MSE: {:.4f} ± {:.4f}".format(mu, sigma))
lo, hi = conf_inv(mu, sigma, len(test_losses))
logging.info("Confidence interval: [{:.4f}, {:.4f}]".format(lo, hi))
rmse_loss = [x ** 0.5 for x in test_losses]
mu, sigma = calc_stat(rmse_loss)
logging.info("RMSE: {:.4f} ± {:.4f}".format(mu, sigma))


def main():
parser = argparse.ArgumentParser()
parser.add_argument('--epoch', type=int, default=500, help="n epoch")
parser.add_argument('--batch', type=int, default=256, help="batch size")
parser.add_argument('--gpu', type=int, default=None, help="cuda device")
parser.add_argument('--patience', type=int, default=100, help='patience for early stop')
parser.add_argument('--suffix', type=str, default=time_str, help="model dir suffix")
parser.add_argument('--hidden', type=int, nargs='+', default=[2048, 4096, 8192], help="hidden size")
parser.add_argument('--lr', type=float, nargs='+', default=[1e-3, 1e-4, 1e-5], help="learning rate")
args = parser.parse_args()
out_dir = os.path.join(OUTPUT_DIR, 'cv_{}'.format(args.suffix))
if not os.path.exists(out_dir):
os.makedirs(out_dir)
log_file = os.path.join(out_dir, 'cv.log')
logging.basicConfig(filename=log_file,
format='%(asctime)s %(message)s',
datefmt='[%Y-%m-%d %H:%M:%S]',
level=logging.INFO)
cv(args, out_dir)


if __name__ == '__main__':
main()

+ 23053
- 0
predictor/data/synergy.tsv
File diff suppressed because it is too large
View File


+ 0
- 0
predictor/model/__init__.py View File


+ 115
- 0
predictor/model/datasets.py View File

import numpy as np
import torch

import random

from torch.utils.data import Dataset
from .utils import read_map

class FastTensorDataLoader:
"""
A DataLoader-like object for a set of tensors that can be much faster than
TensorDataset + DataLoader because dataloader grabs individual indices of
the dataset and calls cat (slow).
Source: https://discuss.pytorch.org/t/dataloader-much-slower-than-manual-batching/27014/6
"""

def __init__(self, *tensors, batch_size=32, shuffle=False):
"""
Initialize a FastTensorDataLoader.
:param *tensors: tensors to store. Must have the same length @ dim 0.
:param batch_size: batch size to load.
:param shuffle: if True, shuffle the data *in-place* whenever an
iterator is created out of this object.
:returns: A FastTensorDataLoader.
"""
assert all(t.shape[0] == tensors[0].shape[0] for t in tensors)
self.tensors = tensors

self.dataset_len = self.tensors[0].shape[0]
self.batch_size = batch_size
self.shuffle = shuffle

# Calculate # batches
n_batches, remainder = divmod(self.dataset_len, self.batch_size)
if remainder > 0:
n_batches += 1
self.n_batches = n_batches

def __iter__(self):
if self.shuffle:
r = torch.randperm(self.dataset_len)
self.tensors = [t[r] for t in self.tensors]
self.i = 0
return self

def __next__(self):
if self.i >= self.dataset_len:
raise StopIteration
batch = tuple(t[self.i:self.i + self.batch_size] for t in self.tensors)
self.i += self.batch_size
return batch

def __len__(self):
return self.n_batches

class FastSynergyDataset(Dataset):

def __init__(self, drug2id_file, cell2id_file, drug_feat_file, cell_feat_file, synergy_score_file, use_folds,
train=True):
self.drug2id = read_map(drug2id_file)
self.cell2id = read_map(cell2id_file)
self.drug_feat = np.load(drug_feat_file)
self.cell_feat = np.load(cell_feat_file)
self.samples = []
self.raw_samples = []
self.train = train
valid_drugs = set(self.drug2id.keys())
valid_cells = set(self.cell2id.keys())
with open(synergy_score_file, 'r') as f:
f.readline()
for line in f:
drug1, drug2, cellname, score, fold = line.rstrip().split('\t')
if drug1 in valid_drugs and drug2 in valid_drugs and cellname in valid_cells:
if int(fold) in use_folds:
sample = [
# TODO: specify drug_feat
torch.from_numpy(self.drug_feat[self.drug2id[drug1]]).float(),
torch.from_numpy(self.drug_feat[self.drug2id[drug2]]).float(),
torch.from_numpy(self.cell_feat[self.cell2id[cellname]]).float(),
torch.FloatTensor([float(score)]),
]
self.samples.append(sample)
raw_sample = [self.drug2id[drug1], self.drug2id[drug2], self.cell2id[cellname], score]
self.raw_samples.append(raw_sample)
if train:
sample = [
torch.from_numpy(self.drug_feat[self.drug2id[drug2]]).float(),
torch.from_numpy(self.drug_feat[self.drug2id[drug1]]).float(),
torch.from_numpy(self.cell_feat[self.cell2id[cellname]]).float(),
torch.FloatTensor([float(score)]),
]
self.samples.append(sample)
raw_sample = [self.drug2id[drug2], self.drug2id[drug1], self.cell2id[cellname], score]
self.raw_samples.append(raw_sample)

def __len__(self):
return len(self.samples)

def __getitem__(self, item):
return self.samples[item]

def drug_feat_len(self):
return self.drug_feat.shape[-1]

def cell_feat_len(self):
return self.cell_feat.shape[-1]

def tensor_samples(self, indices=None):
if indices is None:
indices = list(range(len(self)))
d1 = torch.cat([torch.unsqueeze(self.samples[i][0], 0) for i in indices], dim=0)
d2 = torch.cat([torch.unsqueeze(self.samples[i][1], 0) for i in indices], dim=0)
c = torch.cat([torch.unsqueeze(self.samples[i][2], 0) for i in indices], dim=0)
y = torch.cat([torch.unsqueeze(self.samples[i][3], 0) for i in indices], dim=0)
return d1, d2, c, y

+ 367
- 0
predictor/model/models.py View File

import torch
import torch.nn as nn
import torch.nn.functional as F


class PPI2Cell(nn.Module):

def __init__(self, n_cell: int, ppi_emb: torch.Tensor, bias=True):
super(PPI2Cell, self).__init__()
self.n_cell = n_cell
self.cell_emb = nn.Embedding(n_cell, ppi_emb.shape[1], max_norm=1.0, norm_type=2.0)
if bias:
self.bias = nn.Parameter(torch.randn((1, ppi_emb.shape[0])), requires_grad=True)
else:
self.bias = 0
self.ppi_emb = ppi_emb.permute(1, 0)

def forward(self, x: torch.Tensor):
x = x.squeeze(dim=1)
emb = self.cell_emb(x)
y = emb.mm(self.ppi_emb)
y += self.bias
return y


class PPI2CellV2(nn.Module):

def __init__(self, n_cell: int, ppi_emb: torch.Tensor, hidden_dim: int, bias=True):
super(PPI2CellV2, self).__init__()
self.n_cell = n_cell
self.projector = nn.Sequential(
nn.Linear(ppi_emb.shape[1], hidden_dim, bias=bias),
nn.LeakyReLU()
)
self.cell_emb = nn.Embedding(n_cell, hidden_dim, max_norm=1.0, norm_type=2.0)
self.ppi_emb = ppi_emb

def forward(self, x: torch.Tensor):
x = x.squeeze(dim=1)
proj = self.projector(self.ppi_emb).permute(1, 0)
emb = self.cell_emb(x)
y = emb.mm(proj)
return y


class SynEmb(nn.Module):

def __init__(self, n_drug: int, drug_dim: int, n_cell: int, cell_dim: int, hidden_dim: int):
super(SynEmb, self).__init__()
self.drug_emb = nn.Embedding(n_drug, drug_dim, max_norm=1)
self.cell_emb = nn.Embedding(n_cell, cell_dim, max_norm=1)
self.network = DNN(2 * drug_dim + cell_dim, hidden_dim)

def forward(self, drug1, drug2, cell):
d1 = self.drug_emb(drug1).squeeze(1)
d2 = self.drug_emb(drug2).squeeze(1)
c = self.cell_emb(cell).squeeze(1)
return self.network(d1, d2, c)


class AutoEncoder(nn.Module):

def __init__(self, input_size: int, latent_size: int):
super(AutoEncoder, self).__init__()

self.encoder = nn.Sequential(
nn.Linear(input_size, input_size // 2),
nn.ReLU(),
nn.Linear(input_size // 2, input_size // 4),
nn.ReLU(),
nn.Linear(input_size // 4, latent_size)
)

self.decoder = nn.Sequential(
nn.Linear(latent_size, input_size // 4),
nn.ReLU(),
nn.Linear(input_size // 4, input_size // 2),
nn.ReLU(),
nn.Linear(input_size // 2, input_size)
)

def forward(self, x: torch.Tensor):
encoded = self.encoder(x)
decoded = self.decoder(encoded)
return encoded, decoded


class GeneExpressionAE(nn.Module):
def __init__(self, input_size: int, latent_size: int):
super(GeneExpressionAE, self).__init__()

self.encoder = nn.Sequential(
nn.Linear(input_size, 2048),
nn.Tanh(),
nn.Linear(2048, 1024),
nn.Tanh(),
nn.Linear(1024, latent_size),
nn.Tanh()
)
self.decoder = nn.Sequential(
nn.Linear(latent_size, 1024),
nn.Tanh(),
nn.Linear(1024, 2048),
nn.Tanh(),
nn.Linear(2048, input_size),
nn.Tanh()
)

def forward(self, x: torch.Tensor):
encoded = self.encoder(x)
decoded = self.decoder(encoded)
return encoded, decoded


class DrugFeatAE(nn.Module):
def __init__(self, input_size: int, latent_size: int):
super(DrugFeatAE, self).__init__()

self.encoder = nn.Sequential(
nn.Linear(input_size, 128),
nn.ReLU(),
nn.Linear(128, latent_size),
nn.Sigmoid(),
)
self.decoder = nn.Sequential(
nn.Linear(latent_size, 128),
nn.ReLU(),
nn.Linear(128, input_size),
nn.Sigmoid()
)

def forward(self, x: torch.Tensor):
encoded = self.encoder(x)
decoded = self.decoder(encoded)
return encoded, decoded


class DSDNN(nn.Module):
def __init__(self, input_size: int, hidden_size: int):
super(DSDNN, self).__init__()

self.network = nn.Sequential(
nn.Linear(input_size, hidden_size),
nn.ReLU(),
nn.BatchNorm1d(hidden_size),
nn.Linear(hidden_size, hidden_size // 2),
nn.ReLU(),
nn.BatchNorm1d(hidden_size // 2),
nn.Linear(hidden_size // 2, 1)
)

def forward(self, feat: torch.Tensor):
out = self.network(feat)
return out


class DNN(nn.Module):
def __init__(self, input_size: int, hidden_size: int):
super(DNN, self).__init__()
self.network = nn.Sequential(
nn.Linear(input_size, hidden_size),
nn.ReLU(),
nn.BatchNorm1d(hidden_size),
nn.Linear(hidden_size, hidden_size // 2),
nn.ReLU(),
nn.BatchNorm1d(hidden_size // 2),
nn.Linear(hidden_size // 2, 1)
)

def forward(self, drug1_feat: torch.Tensor, drug2_feat: torch.Tensor, cell_feat: torch.Tensor):
feat = torch.cat([drug1_feat, drug2_feat, cell_feat], 1)
out = self.network(feat)
return out


class BottleneckLayer(nn.Module):

def __init__(self, in_channels: int, out_channels: int):
super(BottleneckLayer, self).__init__()
self.net = nn.Sequential(
nn.Conv1d(in_channels, out_channels, 1),
nn.BatchNorm1d(out_channels),
nn.LeakyReLU()
)

def forward(self, x):
return self.net(x)


class PatchySan(nn.Module):

def __init__(self, drug_size: int, cell_size: int, hidden_size: int, field_size: int):
super(PatchySan, self).__init__()
# self.drug_proj = nn.Linear(drug_size, hidden_size, bias=False)
# self.cell_proj = nn.Linear(cell_size, hidden_size, bias=False)
self.conv = nn.Sequential(
BottleneckLayer(field_size, 16),
BottleneckLayer(16, 32),
BottleneckLayer(32, 16),
BottleneckLayer(16, 1),
)
self.network = nn.Sequential(
nn.Linear(2 * drug_size + cell_size, hidden_size),
nn.LeakyReLU(),
nn.BatchNorm1d(hidden_size),
nn.Linear(hidden_size, hidden_size // 2),
nn.LeakyReLU(),
nn.BatchNorm1d(hidden_size // 2),
nn.Linear(hidden_size // 2, 1)
)

def forward(self, drug1_feat: torch.Tensor, drug2_feat: torch.Tensor, cell_feat: torch.Tensor):
cell_feat = cell_feat.permute(0, 2, 1)
cell_feat = self.conv(cell_feat).squeeze(1)
# drug1_feat = self.drug_proj(drug1_feat)
# drug2_feat = self.drug_proj(drug2_feat)
# express = self.cell_proj(cell_feat)
# feat = torch.cat([drug1_feat, drug2_feat, express], 1)
# drug_feat = (drug1_feat + drug2_feat) / 2
feat = torch.cat([drug1_feat, drug2_feat, cell_feat], 1)
out = self.network(feat)
return out


class SynSyn(nn.Module):

def __init__(self, drug_size: int, cell_size: int, hidden_size: int):
super(SynSyn, self).__init__()

self.drug_proj = nn.Linear(drug_size, drug_size)
self.cell_proj = nn.Linear(cell_size, cell_size)
self.network = nn.Sequential(
nn.Linear(drug_size + cell_size, hidden_size),
nn.ReLU(),
nn.BatchNorm1d(hidden_size),
nn.Linear(hidden_size, hidden_size // 2),
nn.ReLU(),
nn.BatchNorm1d(hidden_size // 2),
nn.Linear(hidden_size // 2, 1)
)

def forward(self, drug1_feat: torch.Tensor, drug2_feat: torch.Tensor, cell_feat: torch.Tensor):
d1 = self.drug_proj(drug1_feat)
d2 = self.drug_proj(drug2_feat)
d = d1.mul(d2)
c = self.cell_proj(cell_feat)
feat = torch.cat([d, c], 1)
out = self.network(feat)
return out


class PPIDNN(nn.Module):

def __init__(self, drug_size: int, cell_size: int, hidden_size: int, emb_size: int):
super(PPIDNN, self).__init__()
self.conv = nn.Sequential(
BottleneckLayer(emb_size, 64),
BottleneckLayer(64, 128),
BottleneckLayer(128, 64),
BottleneckLayer(64, 1),
)
self.network = nn.Sequential(
nn.Linear(2 * drug_size + cell_size, hidden_size),
nn.LeakyReLU(),
nn.BatchNorm1d(hidden_size),
nn.Linear(hidden_size, hidden_size // 2),
nn.LeakyReLU(),
nn.BatchNorm1d(hidden_size // 2),
nn.Linear(hidden_size // 2, 1)
)

def forward(self, drug1_feat: torch.Tensor, drug2_feat: torch.Tensor, cell_feat: torch.Tensor):
cell_feat = cell_feat.permute(0, 2, 1)
cell_feat = self.conv(cell_feat).squeeze(1)
feat = torch.cat([drug1_feat, drug2_feat, cell_feat], 1)
out = self.network(feat)
return out


class StackLinearDNN(nn.Module):

def __init__(self, input_size: int, stack_size: int, hidden_size: int):
super(StackLinearDNN, self).__init__()

self.compress = nn.Parameter(torch.zeros(size=(1, stack_size)))
nn.init.xavier_uniform_(self.compress.data, gain=1.414)

self.network = nn.Sequential(
nn.Linear(input_size, hidden_size),
nn.ReLU(),
nn.BatchNorm1d(hidden_size),
nn.Linear(hidden_size, hidden_size // 2),
nn.ReLU(),
nn.BatchNorm1d(hidden_size // 2),
nn.Linear(hidden_size // 2, 1)
)

def forward(self, drug1_feat: torch.Tensor, drug2_feat: torch.Tensor, cell_feat: torch.Tensor):
cell_feat = torch.matmul(self.compress, cell_feat).squeeze(1)
feat = torch.cat([drug1_feat, drug2_feat, cell_feat], 1)
out = self.network(feat)
return out


class InteractionNet(nn.Module):

def __init__(self, drug_size: int, cell_size: int, hidden_size: int):
super(InteractionNet, self).__init__()

# self.compress = nn.Parameter(torch.ones(size=(1, stack_size)))
# self.drug_proj = nn.Sequential(
# nn.Linear(drug_size, hidden_size),
# nn.LeakyReLU(),
# nn.BatchNorm1d(hidden_size)
# )
self.inter_net = nn.Sequential(
nn.Linear(drug_size + cell_size, hidden_size),
nn.LeakyReLU(),
nn.BatchNorm1d(hidden_size)
)

self.network = nn.Sequential(
nn.Linear(hidden_size, hidden_size // 2),
nn.LeakyReLU(),
nn.BatchNorm1d(hidden_size // 2),
nn.Linear(hidden_size // 2, 1)
)

def forward(self, drug1_feat: torch.Tensor, drug2_feat: torch.Tensor, cell_feat: torch.Tensor):
# cell_feat = torch.mat
# mul(self.compress, cell_feat).squeeze(1)
# d1 = self.drug_proj(drug1_feat)
# d2 = self.drug_proj(drug2_feat)
dc1 = torch.cat([drug1_feat, cell_feat], 1)
dc2 = torch.cat([drug2_feat, cell_feat], 1)
inter1 = self.inter_net(dc1)
inter2 = self.inter_net(dc2)
inter3 = inter1 + inter2
out = self.network(inter3)
return out


class StackProjDNN(nn.Module):

def __init__(self, drug_size: int, cell_size: int, stack_size: int, hidden_size: int):
super(StackProjDNN, self).__init__()

self.projectors = nn.Parameter(torch.zeros(size=(stack_size, cell_size, cell_size)))
nn.init.xavier_uniform_(self.projectors.data, gain=1.414)

self.network = nn.Sequential(
nn.Linear(2 * drug_size + cell_size, hidden_size),
nn.ReLU(),
nn.BatchNorm1d(hidden_size),
nn.Linear(hidden_size, hidden_size // 2),
nn.ReLU(),
nn.BatchNorm1d(hidden_size // 2),
nn.Linear(hidden_size // 2, 1)
)

def forward(self, drug1_feat: torch.Tensor, drug2_feat: torch.Tensor, cell_feat: torch.Tensor):
cell_feat = cell_feat.unsqueeze(-1)
cell_feats = torch.matmul(self.projectors, cell_feat).squeeze(-1)
cell_feat = torch.sum(cell_feats, 1)
feat = torch.cat([drug1_feat, drug2_feat, cell_feat], 1)
out = self.network(feat)
return out

+ 67
- 0
predictor/model/utils.py View File

import os
import pickle

import matplotlib.pyplot as plt
import torch
import json
import random


def arg_min(lst):
m = float('inf')
idx = 0
for i, v in enumerate(lst):
if v < m:
m = v
idx = i
return m, idx


def save_best_model(state_dict, model_dir: str, best_epoch: int, keep: int):
save_to = os.path.join(model_dir, '{}.pkl'.format(best_epoch))
torch.save(state_dict, save_to)
model_files = [f for f in os.listdir(model_dir) if os.path.splitext(f)[-1] == '.pkl']
epochs = [int(os.path.splitext(f)[0]) for f in model_files if str.isdigit(f[0])]
outdated = sorted(epochs, reverse=True)[keep:]
for n in outdated:
os.remove(os.path.join(model_dir, '{}.pkl'.format(n)))


def find_best_model(model_dir: str):
model_files = [f for f in os.listdir(model_dir) if os.path.splitext(f)[-1] == '.pkl']
epochs = [int(os.path.splitext(f)[0]) for f in model_files if str.isdigit(f[0])]
best_epoch = max(epochs)
return os.path.join(model_dir, '{}.pkl'.format(best_epoch))


def save_args(args, save_to: str):
args_dict = args.__dict__
with open(save_to, 'w') as f:
json.dump(args_dict, f, indent=2)


def read_map(map_file):
d = {}
with open(map_file, 'r') as f:
f.readline()
for line in f:
k, v = line.rstrip().split('\t')
d[k] = int(v)
return d


def random_split_indices(n_samples, train_rate: float = None, test_rate: float = None):
if train_rate is not None and (train_rate < 0 or train_rate > 1):
raise ValueError("train rate should be in [0, 1], found {}".format(train_rate))
elif test_rate is not None:
if test_rate < 0 or test_rate > 1:
raise ValueError("test rate should be in [0, 1], found {}".format(test_rate))
train_rate = 1 - test_rate
elif train_rate is None and test_rate is None:
raise ValueError("Either train_rate or test_rate should be given.")
evidence = list(range(n_samples))
train_size = int(len(evidence) * train_rate)
random.shuffle(evidence)
train_indices = evidence[:train_size]
test_indices = evidence[train_size:]
return train_indices, test_indices

Loading…
Cancel
Save