Browse Source

Update 'Scenario1/Regression/train_random.py'

master
Zahra Asgari 3 days ago
parent
commit
8e0ad3522e
1 changed files with 118 additions and 118 deletions
  1. 118
    118
      Scenario1/Regression/train_random.py

+ 118
- 118
Scenario1/Regression/train_random.py View File

import argparse
import numpy as np
import torch
from sklearn.model_selection import KFold
from Regression.DeepTraCDR_model import DeepTraCDR, Optimizer
from data_sampler import RegressionSampler
from data_loader import load_data
def parse_arguments() -> argparse.Namespace:
"""
Parses command-line arguments for the DeepTraCDR regression task.
Returns:
Parsed arguments as a Namespace object.
"""
parser = argparse.ArgumentParser(description="DeepTraCDR Regression Task")
parser.add_argument('-device', type=str, default="cuda:0" if torch.cuda.is_available() else "cpu",
help="Device to run the model on (e.g., 'cuda:0' or 'cpu')")
parser.add_argument('-data', type=str, default='gdsc', help="Dataset to use (default: gdsc)")
parser.add_argument('--wd', type=float, default=1e-5, help="Weight decay for optimizer")
parser.add_argument('--layer_size', nargs='+', type=int, default=[512], help="Layer sizes for the model")
parser.add_argument('--gamma', type=float, default=15, help="Gamma parameter for decoder")
parser.add_argument('--epochs', type=int, default=1000, help="Number of training epochs")
parser.add_argument('--test_freq', type=int, default=50, help="Frequency of evaluation during training")
parser.add_argument('--lr', type=float, default=0.0001, help="Learning rate for optimizer")
parser.add_argument('--patience', type=int, default=20, help="Patience for early stopping")
return parser.parse_args()
def normalize_adj_matrix(adj_matrix: np.ndarray) -> torch.Tensor:
"""
Normalizes the adjacency matrix using min-shift normalization and converts it to a torch tensor.
Args:
adj_matrix: Input adjacency matrix as a NumPy array.
Returns:
Normalized adjacency matrix as a torch tensor.
"""
adj_matrix = adj_matrix - np.min(adj_matrix)
if isinstance(adj_matrix, np.ndarray):
adj_matrix = torch.from_numpy(adj_matrix).float()
return adj_matrix
def main():
"""
Main function to run the DeepTraCDR regression task with k-fold cross-validation.
"""
# Set precision for matrix multiplication
torch.set_float32_matmul_precision('high')
# Parse command-line arguments
args = parse_arguments()
# Load dataset
full_adj, drug_fingerprints, exprs, null_mask, pos_num, args = load_data(args)
print(f"Original full_adj shape: {full_adj.shape}")
print(f"Normalized full_adj shape: {full_adj.shape}")
print("\n--- Data Shapes ---")
print(f"Expression data shape: {exprs.shape}")
print(f"Null mask shape: {null_mask.shape}")
# Normalize adjacency matrix
full_adj = normalize_adj_matrix(full_adj)
# Initialize k-fold cross-validation parameters
k = 5
n_kfolds = 5
all_metrics = {'rmse': [], 'pcc': [], 'scc': []}
# Perform k-fold cross-validation
for n_kfold in range(n_kfolds):
kfold = KFold(n_splits=k, shuffle=True, random_state=n_kfold)
for fold, (train_idx, test_idx) in enumerate(kfold.split(np.arange(pos_num))):
# Initialize data sampler
sampler = RegressionSampler(full_adj, train_idx, test_idx, null_mask)
# Initialize model
model = DeepTraCDR(
adj_mat=full_adj,
cell_exprs=exprs,
drug_finger=drug_fingerprints,
layer_size=args.layer_size,
gamma=args.gamma,
device=args.device
)
# Initialize optimizer
opt = Optimizer(
model=model,
train_data=sampler.train_data,
test_data=sampler.test_data,
test_mask=sampler.test_mask,
train_mask=sampler.train_mask,
adj_matrix=full_adj,
lr=args.lr,
wd=args.wd,
epochs=args.epochs,
test_freq=args.test_freq,
device=args.device,
patience=args.patience
)
# Train model and collect metrics
true, pred, best_rmse, best_pcc, best_scc = opt.train()
all_metrics['rmse'].append(best_rmse)
all_metrics['pcc'].append(best_pcc)
all_metrics['scc'].append(best_scc)
print(f"Fold {n_kfold * k + fold + 1}: RMSE={best_rmse:.4f}, PCC={best_pcc:.4f}, SCC={best_scc:.4f}")
# Compute and display final average metrics
print("\nFinal Average Metrics:")
for metric, values in all_metrics.items():
mean = np.mean(values)
std = np.std(values)
print(f"{metric.upper()}: {mean:.4f} ± {std:.4f}")
if __name__ == "__main__":
import argparse
import numpy as np
import torch
from sklearn.model_selection import KFold
from DeepTraCDR_model import DeepTraCDR, Optimizer
from data_sampler import RegressionSampler
from data_loader import load_data
def parse_arguments() -> argparse.Namespace:
"""
Parses command-line arguments for the DeepTraCDR regression task.
Returns:
Parsed arguments as a Namespace object.
"""
parser = argparse.ArgumentParser(description="DeepTraCDR Regression Task")
parser.add_argument('-device', type=str, default="cuda:0" if torch.cuda.is_available() else "cpu",
help="Device to run the model on (e.g., 'cuda:0' or 'cpu')")
parser.add_argument('-data', type=str, default='gdsc', help="Dataset to use (default: gdsc)")
parser.add_argument('--wd', type=float, default=1e-5, help="Weight decay for optimizer")
parser.add_argument('--layer_size', nargs='+', type=int, default=[512], help="Layer sizes for the model")
parser.add_argument('--gamma', type=float, default=15, help="Gamma parameter for decoder")
parser.add_argument('--epochs', type=int, default=1000, help="Number of training epochs")
parser.add_argument('--test_freq', type=int, default=50, help="Frequency of evaluation during training")
parser.add_argument('--lr', type=float, default=0.0001, help="Learning rate for optimizer")
parser.add_argument('--patience', type=int, default=20, help="Patience for early stopping")
return parser.parse_args()
def normalize_adj_matrix(adj_matrix: np.ndarray) -> torch.Tensor:
"""
Normalizes the adjacency matrix using min-shift normalization and converts it to a torch tensor.
Args:
adj_matrix: Input adjacency matrix as a NumPy array.
Returns:
Normalized adjacency matrix as a torch tensor.
"""
adj_matrix = adj_matrix - np.min(adj_matrix)
if isinstance(adj_matrix, np.ndarray):
adj_matrix = torch.from_numpy(adj_matrix).float()
return adj_matrix
def main():
"""
Main function to run the DeepTraCDR regression task with k-fold cross-validation.
"""
# Set precision for matrix multiplication
torch.set_float32_matmul_precision('high')
# Parse command-line arguments
args = parse_arguments()
# Load dataset
full_adj, drug_fingerprints, exprs, null_mask, pos_num, args = load_data(args)
print(f"Original full_adj shape: {full_adj.shape}")
print(f"Normalized full_adj shape: {full_adj.shape}")
print("\n--- Data Shapes ---")
print(f"Expression data shape: {exprs.shape}")
print(f"Null mask shape: {null_mask.shape}")
# Normalize adjacency matrix
full_adj = normalize_adj_matrix(full_adj)
# Initialize k-fold cross-validation parameters
k = 5
n_kfolds = 5
all_metrics = {'rmse': [], 'pcc': [], 'scc': []}
# Perform k-fold cross-validation
for n_kfold in range(n_kfolds):
kfold = KFold(n_splits=k, shuffle=True, random_state=n_kfold)
for fold, (train_idx, test_idx) in enumerate(kfold.split(np.arange(pos_num))):
# Initialize data sampler
sampler = RegressionSampler(full_adj, train_idx, test_idx, null_mask)
# Initialize model
model = DeepTraCDR(
adj_mat=full_adj,
cell_exprs=exprs,
drug_finger=drug_fingerprints,
layer_size=args.layer_size,
gamma=args.gamma,
device=args.device
)
# Initialize optimizer
opt = Optimizer(
model=model,
train_data=sampler.train_data,
test_data=sampler.test_data,
test_mask=sampler.test_mask,
train_mask=sampler.train_mask,
adj_matrix=full_adj,
lr=args.lr,
wd=args.wd,
epochs=args.epochs,
test_freq=args.test_freq,
device=args.device,
patience=args.patience
)
# Train model and collect metrics
true, pred, best_rmse, best_pcc, best_scc = opt.train()
all_metrics['rmse'].append(best_rmse)
all_metrics['pcc'].append(best_pcc)
all_metrics['scc'].append(best_scc)
print(f"Fold {n_kfold * k + fold + 1}: RMSE={best_rmse:.4f}, PCC={best_pcc:.4f}, SCC={best_scc:.4f}")
# Compute and display final average metrics
print("\nFinal Average Metrics:")
for metric, values in all_metrics.items():
mean = np.mean(values)
std = np.std(values)
print(f"{metric.upper()}: {mean:.4f} ± {std:.4f}")
if __name__ == "__main__":
main() main()

Loading…
Cancel
Save