# Custom | |||||
*.svs | |||||
*.xml | |||||
*.csv | |||||
*.xcf | |||||
*.zip | |||||
*.json | |||||
*.state | |||||
*.tiff | |||||
*.tif | |||||
.idea | |||||
*.jpeg | |||||
*.jpg | |||||
**/data/ | |||||
**/patches/ | |||||
classification_stuff/Transfer-Learning-Library | |||||
# Byte-compiled / optimized / DLL files | |||||
__pycache__/ | |||||
*.py[cod] | |||||
*$py.class | |||||
# C extensions | |||||
*.so | |||||
# Distribution / packaging | |||||
.Python | |||||
build/ | |||||
develop-eggs/ | |||||
dist/ | |||||
downloads/ | |||||
eggs/ | |||||
.eggs/ | |||||
lib/ | |||||
lib64/ | |||||
parts/ | |||||
sdist/ | |||||
var/ | |||||
wheels/ | |||||
pip-wheel-metadata/ | |||||
share/python-wheels/ | |||||
*.egg-info/ | |||||
.installed.cfg | |||||
*.egg | |||||
MANIFEST | |||||
# PyInstaller | |||||
# Usually these files are written by a python script from a template | |||||
# before PyInstaller builds the exe, so as to inject date/other infos into it. | |||||
*.manifest | |||||
*.spec | |||||
# Installer logs | |||||
pip-log.txt | |||||
pip-delete-this-directory.txt | |||||
# Unit test / coverage reports | |||||
htmlcov/ | |||||
.tox/ | |||||
.nox/ | |||||
.coverage | |||||
.coverage.* | |||||
.cache | |||||
nosetests.xml | |||||
coverage.xml | |||||
*.cover | |||||
*.py,cover | |||||
.hypothesis/ | |||||
.pytest_cache/ | |||||
# Translations | |||||
*.mo | |||||
*.pot | |||||
# Django stuff: | |||||
*.log | |||||
local_settings.py | |||||
db.sqlite3 | |||||
db.sqlite3-journal | |||||
# Flask stuff: | |||||
instance/ | |||||
.webassets-cache | |||||
# Scrapy stuff: | |||||
.scrapy | |||||
# Sphinx documentation | |||||
docs/_build/ | |||||
# PyBuilder | |||||
target/ | |||||
# Jupyter Notebook | |||||
.ipynb_checkpoints | |||||
# IPython | |||||
profile_default/ | |||||
ipython_config.py | |||||
# pyenv | |||||
.python-version | |||||
# pipenv | |||||
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. | |||||
# However, in case of collaboration, if having platform-specific dependencies or dependencies | |||||
# having no cross-platform support, pipenv may install dependencies that don't work, or not | |||||
# install all needed dependencies. | |||||
#Pipfile.lock | |||||
# PEP 582; used by e.g. github.com/David-OConnor/pyflow | |||||
__pypackages__/ | |||||
# Celery stuff | |||||
celerybeat-schedule | |||||
celerybeat.pid | |||||
# SageMath parsed files | |||||
*.sage.py | |||||
# Environments | |||||
.env | |||||
.venv | |||||
env/ | |||||
venv/ | |||||
ENV/ | |||||
env.bak/ | |||||
venv.bak/ | |||||
# Spyder project settings | |||||
.spyderproject | |||||
.spyproject | |||||
# Rope project settings | |||||
.ropeproject | |||||
# mkdocs documentation | |||||
/site | |||||
# mypy | |||||
.mypy_cache/ | |||||
.dmypy.json | |||||
dmypy.json | |||||
# Pyre type checker | |||||
.pyre/ |
-----BEGIN CERTIFICATE----- | |||||
MIIG5jCCBc6gAwIBAgIQAze5KDR8YKauxa2xIX84YDANBgkqhkiG9w0BAQUFADBs | |||||
MQswCQYDVQQGEwJVUzEVMBMGA1UEChMMRGlnaUNlcnQgSW5jMRkwFwYDVQQLExB3 | |||||
d3cuZGlnaWNlcnQuY29tMSswKQYDVQQDEyJEaWdpQ2VydCBIaWdoIEFzc3VyYW5j | |||||
ZSBFViBSb290IENBMB4XDTA3MTEwOTEyMDAwMFoXDTIxMTExMDAwMDAwMFowaTEL | |||||
MAkGA1UEBhMCVVMxFTATBgNVBAoTDERpZ2lDZXJ0IEluYzEZMBcGA1UECxMQd3d3 | |||||
LmRpZ2ljZXJ0LmNvbTEoMCYGA1UEAxMfRGlnaUNlcnQgSGlnaCBBc3N1cmFuY2Ug | |||||
RVYgQ0EtMTCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEBAPOWYth1bhn/ | |||||
PzR8SU8xfg0ETpmB4rOFVZEwscCvcLssqOcYqj9495BoUoYBiJfiOwZlkKq9ZXbC | |||||
7L4QWzd4g2B1Rca9dKq2n6Q6AVAXxDlpufFP74LByvNK28yeUE9NQKM6kOeGZrzw | |||||
PnYoTNF1gJ5qNRQ1A57bDIzCKK1Qss72kaPDpQpYSfZ1RGy6+c7pqzoC4E3zrOJ6 | |||||
4GAiBTyC01Li85xH+DvYskuTVkq/cKs+6WjIHY9YHSpNXic9rQpZL1oRIEDZaARo | |||||
LfTAhAsKG3jf7RpY3PtBWm1r8u0c7lwytlzs16YDMqbo3rcoJ1mIgP97rYlY1R4U | |||||
pPKwcNSgPqcCAwEAAaOCA4UwggOBMA4GA1UdDwEB/wQEAwIBhjA7BgNVHSUENDAy | |||||
BggrBgEFBQcDAQYIKwYBBQUHAwIGCCsGAQUFBwMDBggrBgEFBQcDBAYIKwYBBQUH | |||||
AwgwggHEBgNVHSAEggG7MIIBtzCCAbMGCWCGSAGG/WwCATCCAaQwOgYIKwYBBQUH | |||||
AgEWLmh0dHA6Ly93d3cuZGlnaWNlcnQuY29tL3NzbC1jcHMtcmVwb3NpdG9yeS5o | |||||
dG0wggFkBggrBgEFBQcCAjCCAVYeggFSAEEAbgB5ACAAdQBzAGUAIABvAGYAIAB0 | |||||
AGgAaQBzACAAQwBlAHIAdABpAGYAaQBjAGEAdABlACAAYwBvAG4AcwB0AGkAdAB1 | |||||
AHQAZQBzACAAYQBjAGMAZQBwAHQAYQBuAGMAZQAgAG8AZgAgAHQAaABlACAARABp | |||||
AGcAaQBDAGUAcgB0ACAARQBWACAAQwBQAFMAIABhAG4AZAAgAHQAaABlACAAUgBl | |||||
AGwAeQBpAG4AZwAgAFAAYQByAHQAeQAgAEEAZwByAGUAZQBtAGUAbgB0ACAAdwBo | |||||
AGkAYwBoACAAbABpAG0AaQB0ACAAbABpAGEAYgBpAGwAaQB0AHkAIABhAG4AZAAg | |||||
AGEAcgBlACAAaQBuAGMAbwByAHAAbwByAGEAdABlAGQAIABoAGUAcgBlAGkAbgAg | |||||
AGIAeQAgAHIAZQBmAGUAcgBlAG4AYwBlAC4wEgYDVR0TAQH/BAgwBgEB/wIBADCB | |||||
gwYIKwYBBQUHAQEEdzB1MCQGCCsGAQUFBzABhhhodHRwOi8vb2NzcC5kaWdpY2Vy | |||||
dC5jb20wTQYIKwYBBQUHMAKGQWh0dHA6Ly93d3cuZGlnaWNlcnQuY29tL0NBQ2Vy | |||||
dHMvRGlnaUNlcnRIaWdoQXNzdXJhbmNlRVZSb290Q0EuY3J0MIGPBgNVHR8EgYcw | |||||
gYQwQKA+oDyGOmh0dHA6Ly9jcmwzLmRpZ2ljZXJ0LmNvbS9EaWdpQ2VydEhpZ2hB | |||||
c3N1cmFuY2VFVlJvb3RDQS5jcmwwQKA+oDyGOmh0dHA6Ly9jcmw0LmRpZ2ljZXJ0 | |||||
LmNvbS9EaWdpQ2VydEhpZ2hBc3N1cmFuY2VFVlJvb3RDQS5jcmwwHQYDVR0OBBYE | |||||
FExYyyXwQU9S9CjIgUObpqig5pLlMB8GA1UdIwQYMBaAFLE+w2kD+L9HAdSYJhoI | |||||
Au9jZCvDMA0GCSqGSIb3DQEBBQUAA4IBAQBMeheHKF0XvLIyc7/NLvVYMR3wsXFU | |||||
nNabZ5PbLwM+Fm8eA8lThKNWYB54lBuiqG+jpItSkdfdXJW777UWSemlQk808kf/ | |||||
roF/E1S3IMRwFcuBCoHLdFfcnN8kpCkMGPAc5K4HM+zxST5Vz25PDVR708noFUjU | |||||
xbvcNRx3RQdIRYW9135TuMAW2ZXNi419yWBP0aKb49Aw1rRzNubS+QOy46T15bg+ | |||||
BEkAui6mSnKDcp33C4ypieez12Qf1uNgywPE3IjpnSUBAHHLA7QpYCWP+UbRe3Gu | |||||
zVMSW4SOwg/H7ZMZ2cn6j1g0djIvruFQFGHUqFijyDATI+/GJYw2jxyA | |||||
-----END CERTIFICATE----- |
MIT License | |||||
Copyright (c) 2022 Amir Hossein | |||||
Permission is hereby granted, free of charge, to any person obtaining a copy | |||||
of this software and associated documentation files (the "Software"), to deal | |||||
in the Software without restriction, including without limitation the rights | |||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||||
copies of the Software, and to permit persons to whom the Software is | |||||
furnished to do so, subject to the following conditions: | |||||
The above copyright notice and this permission notice shall be included in all | |||||
copies or substantial portions of the Software. | |||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
SOFTWARE. |
# Thyroid-Project-Using-DL | |||||
Developing a neural network to classify thyroid carcinoma using H&E slides |
# from dalib.translation.fourier_transform import FourierTransform | |||||
import importlib.util | |||||
import sys | |||||
from PIL import Image | |||||
fourier_transform_address = "E:\\Documentwork\\sharif\\CE Project\\future\\Thyroid Project\\Thyroid-Project-Using-DL\\classification_stuff\\Transfer-Learning-Library\\dalib\\translation\\fourier_transform.py" | |||||
spec = importlib.util.spec_from_file_location("module.name", fourier_transform_address) | |||||
foo = importlib.util.module_from_spec(spec) | |||||
sys.modules["module.name"] = foo | |||||
spec.loader.exec_module(foo) | |||||
FourierTransform = foo.FourierTransform | |||||
image_list = ["bio_tile (1).jpeg", "bio_tile (2).jpeg", "bio_tile (3).jpeg", "bio_tile (4).jpeg", "bio_tile (4).jpeg"] | |||||
amplitude_dir = "amplitude_dir" | |||||
fourier_transform = FourierTransform(image_list, amplitude_dir, beta=0, rebuild=False) | |||||
source_image = Image.open("tile2.jpeg") # image form source domain | |||||
source_image_in_target_style = fourier_transform(source_image) | |||||
source_image_in_target_style.save("out_fda.jpeg") |
import random | |||||
import cv2 | |||||
import torch | |||||
import torch.nn as nn | |||||
from albumentations.augmentations.utils import read_rgb_image | |||||
from albumentations.core.transforms_interface import BasicTransform, to_tuple | |||||
class Mixup(BasicTransform): | |||||
def __init__(self, mixups, read_fn=read_rgb_image, beta_limit=0.3, **kwargs): | |||||
super().__init__(**kwargs) | |||||
self.mixups = mixups | |||||
self.read_fn = read_fn | |||||
self.beta_limit = to_tuple(beta_limit, low=0) | |||||
def apply(self, image, mixup_image=None, beta=0.1, **params): | |||||
img_type = image.dtype | |||||
image = ((1 - beta) * image + beta * mixup_image).astype(img_type) | |||||
return image | |||||
def apply_to_target(self, target, beta=0.1, mixup_target=-1, **params): | |||||
target = {"img": target, "mixup": mixup_target, "beta": beta} | |||||
return target | |||||
def get_params_dependent_on_targets(self, params): | |||||
img = params["image"] | |||||
mixup = random.choice(self.mixups) | |||||
mixup_image = self.read_fn(mixup[0]) | |||||
vertical_pad = max(0, (img.shape[0] - mixup_image.shape[0]) // 2) | |||||
horizontal_pad = max(0, (img.shape[1] - mixup_image.shape[1]) // 2) | |||||
try: | |||||
mixup_image = cv2.copyMakeBorder(mixup_image, vertical_pad, vertical_pad, horizontal_pad, horizontal_pad, | |||||
cv2.BORDER_REFLECT) | |||||
except Exception as e: | |||||
print(e) | |||||
mixup_image = cv2.resize(mixup_image, dsize=(img.shape[1], img.shape[0])) | |||||
return {"mixup_image": mixup_image, "mixup_target": mixup[1]} | |||||
def get_params(self): | |||||
return {"beta": random.uniform(self.beta_limit[0], self.beta_limit[1])} | |||||
@property | |||||
def targets(self): | |||||
return { | |||||
"image": self.apply, | |||||
"target": self.apply_to_target, | |||||
} | |||||
@property | |||||
def targets_as_params(self): | |||||
return ["image"] | |||||
def mixup_loss(output, target): | |||||
if type(target) == torch.Tensor: | |||||
loss = nn.CrossEntropyLoss() | |||||
return loss(output, target) | |||||
else: # mixup has been used | |||||
loss = nn.CrossEntropyLoss(reduction="none") | |||||
return ((1 - target["beta"]) * loss(output, target["img"]) + target["beta"] * loss(output, | |||||
target["mixup"])).mean() |
import csv | |||||
import glob | |||||
import os | |||||
import random | |||||
from tqdm import tqdm | |||||
from config import Config | |||||
class CustomFragmentLoader: | |||||
def __init__(self, datasets_folder_name): | |||||
self._datasets_folder_name = datasets_folder_name | |||||
self._database_slide_dict = {} | |||||
self._load_csv_files_to_dict() | |||||
def _load_csv_files_to_dict(self): | |||||
databases_directory = "../../../database_crawlers/" | |||||
list_dir = [os.path.join(databases_directory, o, "patches") for o in self._datasets_folder_name | |||||
if os.path.isdir(os.path.join(databases_directory, o, "patches"))] | |||||
for db_dir in list_dir: | |||||
csv_dir = os.path.join(db_dir, "patch_labels.csv") | |||||
with open(csv_dir, "r") as csv_file: | |||||
csv_reader = csv.reader(csv_file) | |||||
header = next(csv_reader, None) | |||||
for row in csv_reader: | |||||
if row: | |||||
database_id = row[0] | |||||
image_id = row[1] | |||||
slide_frag_folder_name = [o for o in os.listdir(db_dir) if image_id.startswith(o)] | |||||
if slide_frag_folder_name: | |||||
slide_frag_folder_name = slide_frag_folder_name[0] | |||||
else: | |||||
continue | |||||
slide_path = os.path.join(db_dir, slide_frag_folder_name) | |||||
image_paths = glob.glob(os.path.join(slide_path, "*.jpeg")) | |||||
if image_paths: | |||||
d = self._database_slide_dict.get(database_id, {}) | |||||
d[image_id] = [image_paths] + [row[3], row[2]] | |||||
self._database_slide_dict[database_id] = d | |||||
def load_image_path_and_labels_and_split(self, test_percent=20, val_percent=10): | |||||
train_images, val_images, test_images = [], [], [] | |||||
for database_name, slides_dict in self._database_slide_dict.items(): | |||||
image_paths_by_slide = [(len(v[0]), v[0], v[1], v[2]) for v in slides_dict.values()] | |||||
random.shuffle(image_paths_by_slide) | |||||
# image_paths_by_slide.sort() | |||||
class_slides_dict = {} | |||||
for item in image_paths_by_slide: | |||||
class_name = None | |||||
if database_name == "NationalCancerInstitute": | |||||
normal_percent = int(item[2].strip(r"(|)|\'").split("\', \'")[0]) | |||||
tumor_percent = int(item[2].strip(r"(|)|\'").split("\', \'")[1]) | |||||
stormal_percent = int(item[2].strip(r"(|)|\'").split("\', \'")[2]) | |||||
if stormal_percent == 0: | |||||
if tumor_percent == 100: | |||||
class_name = "MALIGNANT" | |||||
elif normal_percent == 100: | |||||
class_name = "BENIGN" | |||||
else: | |||||
class_name = str(tumor_percent) | |||||
elif database_name == "BioAtlasThyroidSlideProvider": | |||||
if "papillary" in item[3].lower(): | |||||
class_name = "MALIGNANT" | |||||
elif "normal" in item[3].lower(): | |||||
class_name = "BENIGN" | |||||
class_name = class_name if class_name else item[2] | |||||
if class_name in Config.class_names: | |||||
class_slides_dict[class_name] = class_slides_dict.get(class_name, []) + [ | |||||
(item[0], item[1], class_name)] | |||||
# split test val train because they must not share same slide id fragment | |||||
for thyroid_class, slide_frags in class_slides_dict.items(): | |||||
dataset_train_images, dataset_val_images, dataset_test_images = [], [], [] | |||||
total_counts = sum([item[0] for item in slide_frags]) | |||||
test_counts = total_counts * test_percent // 100 | |||||
val_counts = total_counts * val_percent // 100 | |||||
train_counts = total_counts - test_counts - val_counts | |||||
for i, slide_frags_item in enumerate(slide_frags): | |||||
if len(dataset_train_images) + slide_frags_item[0] <= train_counts: | |||||
dataset_train_images += slide_frags_item[1] | |||||
elif len(dataset_val_images) + slide_frags_item[0] <= val_counts: | |||||
dataset_val_images += slide_frags_item[1] | |||||
else: | |||||
dataset_test_images += slide_frags_item[1] | |||||
train_images += [(i, thyroid_class) for i in dataset_train_images] | |||||
val_images += [(i, thyroid_class) for i in dataset_val_images] | |||||
test_images += [(i, thyroid_class) for i in dataset_test_images] | |||||
return train_images, val_images, test_images | |||||
def national_cancer_image_and_labels_splitter_per_slide(self, test_percent=20, val_percent=10): | |||||
train_images, val_images, test_images = [], [], [] | |||||
for database_name, slides_dict in self._database_slide_dict.items(): | |||||
print(database_name) | |||||
image_paths_by_slide = [(len(v[0]), v[0], v[1], v[2], k) for k, v in slides_dict.items()] | |||||
random.shuffle(image_paths_by_slide) | |||||
# image_paths_by_slide.sort() | |||||
class_slides_dict = {} | |||||
for item in tqdm(image_paths_by_slide): | |||||
class_name = None | |||||
normal_percent = int(item[2].strip(r"(|)|\'").split("\', \'")[0]) | |||||
tumor_percent = int(item[2].strip(r"(|)|\'").split("\', \'")[1]) | |||||
stormal_percent = int(item[2].strip(r"(|)|\'").split("\', \'")[2]) | |||||
if stormal_percent == 0: | |||||
if tumor_percent == 100: | |||||
class_name = 100 | |||||
elif normal_percent == 100: | |||||
class_name = 0 | |||||
else: | |||||
class_name = tumor_percent | |||||
class_name = class_name if class_name is not None else item[2] | |||||
if class_name in Config.class_names: | |||||
class_slides_dict[class_name] = class_slides_dict.get(class_name, []) + [ | |||||
(item[0], item[1], class_name, item[4])] | |||||
# split test val train because they must not share same slide id fragment | |||||
for thyroid_class, slide_frags in class_slides_dict.items(): | |||||
dataset_train_images, dataset_val_images, dataset_test_images = [], [], [] | |||||
total_counts = sum([item[0] for item in slide_frags]) | |||||
test_counts = total_counts * test_percent // 100 | |||||
val_counts = total_counts * val_percent // 100 | |||||
train_counts = total_counts - test_counts - val_counts | |||||
for i, slide_frags_item in enumerate(slide_frags): | |||||
items_paths = [(item_path, slide_frags_item[3]) for item_path in slide_frags_item[1]] | |||||
if len(dataset_train_images) + slide_frags_item[0] <= train_counts: | |||||
dataset_train_images += items_paths | |||||
elif len(dataset_val_images) + slide_frags_item[0] <= val_counts: | |||||
dataset_val_images += items_paths | |||||
else: | |||||
dataset_test_images += items_paths | |||||
train_images += [(i, (thyroid_class, j)) for i, j in dataset_train_images] | |||||
val_images += [(i, (thyroid_class, j)) for i, j in dataset_val_images] | |||||
test_images += [(i, (thyroid_class, j)) for i, j in dataset_test_images] | |||||
return train_images, val_images, test_images | |||||
if __name__ == '__main__': | |||||
# datasets_folder = ["national_cancer_institute"] | |||||
datasets_folder = ["papsociaty"] | |||||
# datasets_folder = ["stanford_tissue_microarray"] | |||||
# datasets_folder = ["bio_atlas_at_jake_gittlen_laboratories"] | |||||
train, val, test = CustomFragmentLoader(datasets_folder).load_image_path_and_labels_and_split( | |||||
val_percent=Config.val_percent, | |||||
test_percent=Config.test_percent) | |||||
benign_train = [i for i in train if i[1] == "BENIGN"] | |||||
mal_train = [i for i in train if i[1] == "MALIGNANT"] | |||||
print(f"train: {len(train)}={len(benign_train)}+{len(mal_train)}") | |||||
benign_val = [i for i in val if i[1] == "BENIGN"] | |||||
mal_val = [i for i in val if i[1] == "MALIGNANT"] | |||||
print(f"val: {len(val)}={len(benign_val)}+{len(mal_val)}") | |||||
benign_test = [i for i in test if i[1] == "BENIGN"] | |||||
mal_test = [i for i in test if i[1] == "MALIGNANT"] | |||||
print(f"test: {len(test)}={len(benign_test)}+{len(mal_test)}") | |||||
print(set(train) & set(test)) | |||||
print(set(train) & set(val)) | |||||
print(set(test) & set(val)) | |||||
print(len(set(val) & set(val))) |
import os | |||||
def set_config_for_logger(config_label): | |||||
import logging | |||||
trains_state_dir = "./train_state" | |||||
if not os.path.isdir(trains_state_dir): | |||||
os.mkdir(trains_state_dir) | |||||
config_train_dir = os.path.join(trains_state_dir, config_label) | |||||
if not os.path.isdir(config_train_dir): | |||||
os.mkdir(config_train_dir) | |||||
log_file = os.path.join(config_train_dir, "console.log") | |||||
logger = logging.getLogger(config_label) | |||||
logger.setLevel(logging.DEBUG) | |||||
fh = logging.FileHandler(log_file) | |||||
formatter = logging.Formatter('%(asctime)s|%(levelname)s|%(message)s', datefmt='%Y-%m-%d %H:%M:%S') | |||||
fh.setFormatter(formatter) | |||||
fh.setLevel(logging.DEBUG) | |||||
logger.addHandler(fh) | |||||
return logger |
import os | |||||
import random | |||||
import time | |||||
from typing import cast | |||||
import numpy as np | |||||
import matplotlib.pyplot as plt | |||||
import timm | |||||
import torch | |||||
import torchvision | |||||
from sklearn.metrics import confusion_matrix, roc_curve, roc_auc_score | |||||
from torch import nn, optim | |||||
from torch.utils.data import DataLoader | |||||
from tqdm import tqdm | |||||
from config import Config | |||||
from fragment_splitter import CustomFragmentLoader | |||||
from model_train_logger import set_config_for_logger | |||||
from thyroid_dataset import ThyroidDataset | |||||
from thyroid_ml_model import ThyroidClassificationModel | |||||
from transformation import get_transformation | |||||
@torch.no_grad() | |||||
def validate(model, data_loader, loss_function=None, show_tqdm=False): | |||||
class_set = sorted(data_loader.dataset.class_to_idx_dict.values()) | |||||
loss_values = [] | |||||
y_preds = [] | |||||
y_targets = [] | |||||
y_positive_scores = [] | |||||
for images, labels in (data_loader if not show_tqdm else tqdm(data_loader)): | |||||
images = images.to(Config.available_device) | |||||
labels = labels.to(Config.available_device) | |||||
x = model(images, validate=True) | |||||
if loss_function: | |||||
loss_values.append(loss_function(x, labels)) | |||||
values, preds = torch.max(x, 1) | |||||
y_positive_scores += x[:, 1].cpu() | |||||
y_preds += preds.cpu() | |||||
y_targets += labels.cpu() | |||||
cf_matrix = confusion_matrix(y_targets, y_preds, normalize="true") | |||||
class_accuracies = [cf_matrix[c][c] for c in class_set] | |||||
acc = sum(class_accuracies) | |||||
acc /= len(class_set) | |||||
# TN|FN | |||||
# FP|TP | |||||
fpr, tpr, _ = roc_curve(y_targets, y_positive_scores) | |||||
auc = roc_auc_score(y_targets, y_positive_scores) | |||||
if loss_function: | |||||
loss = sum(loss_values) | |||||
loss /= len(loss_values) | |||||
return acc * 100, cf_matrix, (fpr, tpr, auc), loss | |||||
return acc * 100, cf_matrix, (fpr, tpr, auc) | |||||
def get_save_state_dirs(config_label, epoch=None): | |||||
trains_state_dir = "./train_state" | |||||
if not os.path.isdir(trains_state_dir): | |||||
os.mkdir(trains_state_dir) | |||||
config_train_dir = os.path.join(trains_state_dir, config_label) | |||||
if not os.path.isdir(config_train_dir): | |||||
os.mkdir(config_train_dir) | |||||
if epoch is not None: | |||||
save_state_dir = os.path.join(config_train_dir, f"epoch-{epoch}") | |||||
if not os.path.isdir(save_state_dir): | |||||
os.mkdir(save_state_dir) | |||||
else: | |||||
save_state_dir = None | |||||
return trains_state_dir, config_train_dir, save_state_dir | |||||
def plot_and_save_model_per_epoch(epoch, | |||||
model_to_save, | |||||
val_acc_list, | |||||
train_acc_list, | |||||
val_loss_list, | |||||
train_loss_list, | |||||
config_label): | |||||
trains_state_dir, config_train_dir, save_state_dir = get_save_state_dirs(config_label, epoch) | |||||
fig_save_path = os.path.join(config_train_dir, "val_train_acc.jpeg") | |||||
plt.plot(range(len(val_acc_list)), val_acc_list, label="validation") | |||||
plt.plot(range(len(train_acc_list)), train_acc_list, label="train") | |||||
plt.legend(loc="lower right") | |||||
plt.xlabel('Epoch') | |||||
plt.ylabel('Balanced Accuracy') | |||||
plt.savefig(fig_save_path) | |||||
plt.clf() | |||||
fig_save_path = os.path.join(config_train_dir, "val_train_loss.jpeg") | |||||
plt.plot(range(len(val_loss_list)), val_loss_list, label="validation") | |||||
plt.plot(range(len(train_loss_list)), train_loss_list, label="train") | |||||
plt.legend(loc="lower right") | |||||
plt.xlabel('Epoch') | |||||
plt.ylabel('Loss') | |||||
plt.savefig(fig_save_path) | |||||
plt.clf() | |||||
if model_to_save: | |||||
model_save_path = os.path.join(save_state_dir, "model.state") | |||||
model_to_save.save_model(model_save_path) | |||||
def save_auc_roc_chart_for_test(test_fpr, test_tpr, test_auc_score, config_label, epoch): | |||||
trains_state_dir, config_train_dir, save_dir = get_save_state_dirs(config_label, epoch) | |||||
fig_save_path = os.path.join(save_dir, f"test_roc_{time.time()}.jpeg") | |||||
plt.plot(test_fpr, test_tpr, label="test, auc=" + str(test_auc_score)) | |||||
plt.legend(loc="lower right") | |||||
plt.xlabel('FPR') | |||||
plt.ylabel('TPR') | |||||
plt.savefig(fig_save_path) | |||||
plt.clf() | |||||
def calculate_test(image_model, epoch, test_data_loader, logger, config_name, show_tqdm=False): | |||||
image_model.eval() | |||||
test_acc, test_c_acc, (test_FPR, test_TPR, test_auc_score) = validate(image_model, | |||||
test_data_loader, | |||||
show_tqdm=show_tqdm) | |||||
test_acc = float(test_acc) | |||||
save_auc_roc_chart_for_test(test_FPR, test_TPR, test_auc_score, config_name, epoch) | |||||
logger.info(f'Test|Epoch:{epoch}|Accuracy:{round(test_acc, 4)}, {test_c_acc}%') | |||||
def train_model(base_model, config_base_name, train_val_test_data_loaders, augmentation, | |||||
adaptation_sample_dataset=None, | |||||
train_model_flag=True, | |||||
load_model_from_dir=None): | |||||
config_name = f"{config_base_name}-{augmentation}-{','.join(Config.class_idx_dict.keys())}" | |||||
logger = set_config_for_logger(config_name) | |||||
logger.info(f"training config: {config_name}") | |||||
try: | |||||
_is_inception = type(base_model) == torchvision.models.inception.Inception3 | |||||
train_data_loader, val_data_loader, test_data_loader = train_val_test_data_loaders | |||||
logger.info( | |||||
f"train valid test splits:" + | |||||
f" {len(train_data_loader.dataset.samples) if train_data_loader else None}," + | |||||
f" {len(val_data_loader.dataset.samples) if val_data_loader else None}," + | |||||
f" {len(test_data_loader.dataset.samples) if test_data_loader else None}") | |||||
# MODEL | |||||
if load_model_from_dir: | |||||
# Load model from file | |||||
model_path = os.path.join(load_model_from_dir, 'model.state') | |||||
image_model = ThyroidClassificationModel(base_model).load_model(model_path).to(Config.available_device) | |||||
else: | |||||
image_model = ThyroidClassificationModel(base_model).to(Config.available_device) | |||||
if train_model_flag: | |||||
# TRAIN | |||||
transformation = get_transformation(augmentation=augmentation, base_dataset=adaptation_sample_dataset) | |||||
train_dataset = cast(ThyroidDataset, train_data_loader.dataset) | |||||
train_dataset.transform = transformation | |||||
cec = nn.CrossEntropyLoss(weight=torch.tensor(train_dataset.class_weights).to(Config.available_device)) | |||||
optimizer = optim.Adam(image_model.parameters(), lr=Config.learning_rate) | |||||
my_lr_scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer=optimizer, gamma=Config.decay_rate) | |||||
val_acc_history = [] | |||||
train_acc_history = [] | |||||
train_y_preds = [] | |||||
train_y_targets = [] | |||||
best_epoch_val_acc = 0 | |||||
for epoch in range(Config.n_epoch): | |||||
# variables to calculate train acc | |||||
class_set = sorted(train_data_loader.dataset.class_to_idx_dict.values()) | |||||
for images, labels in tqdm(train_data_loader, colour="#0000ff"): | |||||
if len(images) >= Config.batch_size // 2: | |||||
image_model.train() | |||||
images = images.to(Config.available_device) | |||||
labels = labels.to(Config.available_device) | |||||
optimizer.zero_grad() | |||||
pred = image_model(images) | |||||
# pred label: torch.max(pred, 1)[1], labels | |||||
if _is_inception: | |||||
pred, aux_pred = pred | |||||
loss, aux_loss = cec(pred, labels), cec(aux_pred, labels) | |||||
loss = loss + 0.4 * aux_loss | |||||
else: | |||||
loss = cec(pred, labels) | |||||
loss.backward() | |||||
optimizer.step() | |||||
# train preds and labels | |||||
values, preds = torch.max(pred, 1) | |||||
train_y_preds.extend(preds.cpu()) | |||||
train_y_targets.extend(labels.cpu()) | |||||
# Epoch level | |||||
# validation data | |||||
image_model.eval() | |||||
train_cf_matrix = confusion_matrix(train_y_targets, train_y_preds, normalize="true") | |||||
class_accuracies = [train_cf_matrix[c][c] for c in class_set] | |||||
train_acc = sum(class_accuracies) | |||||
train_acc /= len(class_set) | |||||
train_acc = (100 * sum(class_accuracies) / len(class_set)).item() | |||||
train_acc_history.append(train_acc) | |||||
logger.info(f'Train|E:{epoch}|Balanced Accuracy:{round(train_acc, 4)}%,\n{train_cf_matrix}') | |||||
val_acc, val_cf_matrix, _, val_loss = validate(image_model, | |||||
val_data_loader, | |||||
cec) | |||||
val_acc = float(val_acc) | |||||
val_acc_history.append(val_acc) | |||||
logger.info(f'Val|E:{epoch}|Balanced Accuracy:{round(val_acc, 4)}%,\n{val_cf_matrix}') | |||||
save_model = False | |||||
is_last_epoch = epoch == Config.n_epoch | |||||
is_a_better_epoch = val_acc >= best_epoch_val_acc | |||||
is_a_better_epoch &= abs(train_acc - val_acc) < Config.train_val_acc_max_distance_for_best_epoch | |||||
if is_a_better_epoch or is_last_epoch: | |||||
save_model = True | |||||
calculate_test(image_model, epoch, test_data_loader, logger, config_name, show_tqdm=False) | |||||
plot_and_save_model_per_epoch(epoch if save_model else None, | |||||
image_model if save_model else None, | |||||
val_acc_history, | |||||
train_acc_history, | |||||
[], | |||||
[], | |||||
config_label=config_name) | |||||
my_lr_scheduler.step() | |||||
else: | |||||
# JUST EVALUATE | |||||
calculate_test(image_model, 0, test_data_loader, logger, config_name, | |||||
show_tqdm=True) | |||||
except Exception as e: | |||||
print(e) | |||||
logger.error(str(e)) | |||||
raise e | |||||
def load_datasets(datasets_folders, test_percent=Config.test_percent, val_percent=Config.val_percent, sample_percent=1, | |||||
is_nci_per_slide=False): | |||||
if is_nci_per_slide: | |||||
l_train, l_val, l_test = CustomFragmentLoader( | |||||
datasets_folders).national_cancer_image_and_labels_splitter_per_slide( | |||||
test_percent=test_percent, | |||||
val_percent=val_percent) | |||||
else: | |||||
l_train, l_val, l_test = CustomFragmentLoader(datasets_folders).load_image_path_and_labels_and_split( | |||||
test_percent=test_percent, | |||||
val_percent=val_percent) | |||||
l_train = random.choices(l_train, k=int(sample_percent * len(l_train))) | |||||
l_val = random.choices(l_val, k=int(sample_percent * len(l_val))) | |||||
l_test = random.choices(l_test, k=int(sample_percent * len(l_test))) | |||||
l_train_ds = ThyroidDataset(l_train, Config.class_idx_dict) | |||||
l_val_ds = ThyroidDataset(l_val, Config.class_idx_dict) | |||||
l_test_ds = ThyroidDataset(l_test, Config.class_idx_dict) | |||||
l_train_data_loader = None | |||||
if l_train: | |||||
l_train_data_loader = DataLoader(l_train_ds, batch_size=Config.batch_size, shuffle=True) | |||||
l_val_data_loader = None | |||||
if l_val: | |||||
l_val_data_loader = DataLoader(l_val_ds, batch_size=Config.eval_batch_size, shuffle=True) | |||||
l_test_data_loader = None | |||||
if l_test: | |||||
l_test_data_loader = DataLoader(l_test_ds, batch_size=Config.eval_batch_size, shuffle=True) | |||||
return (l_train, l_val, l_test), (l_train_ds, l_val_ds, l_test_ds), ( | |||||
l_train_data_loader, l_val_data_loader, l_test_data_loader) | |||||
@torch.no_grad() | |||||
def evaluate_nci_dataset_per_slide(config_base_name, augmentation, base_model, data_loader, | |||||
load_model_from_dir): | |||||
config_name = f"{config_base_name}-{augmentation}-tumor-percent" | |||||
logger = set_config_for_logger(config_name) | |||||
logger.info(f"training config: {config_name}") | |||||
_is_inception = type(base_model) == torchvision.models.inception.Inception3 | |||||
logger.info( | |||||
f"test:" + | |||||
f" {len(data_loader.dataset.samples) if data_loader else None}") | |||||
# MODEL | |||||
# Load model from file | |||||
model_path = os.path.join(load_model_from_dir, 'model.state') | |||||
model = ThyroidClassificationModel(base_model).load_model(model_path).to(Config.available_device) | |||||
y_positive_scores = [] | |||||
slides_preds = {} | |||||
slide_labels = {} | |||||
for images, (labels, slides) in tqdm(data_loader): | |||||
images = images.to(Config.available_device) | |||||
x = model(images, validate=True).cpu() | |||||
preds = x[:, 1] | |||||
logger.info("zero and 1000 percent") | |||||
logger.info(x[:, 0]) | |||||
logger.info(x[:, 1]) | |||||
for row_index in range(len(labels)): | |||||
slide_id = slides[row_index] | |||||
slide_label = labels[row_index] | |||||
slide_labels[slide_id] = slide_label | |||||
slides_preds[slide_id] = slides_preds.get(slide_id, []) + [preds[row_index].item()] | |||||
y_positive_scores += x[:, 1].cpu() | |||||
y_targets = [] | |||||
y_preds = [] | |||||
for key, value in slides_preds.items(): | |||||
slides_preds[key] = (sum(slides_preds[key]) / len(slides_preds[key])) * 100 | |||||
y_preds.append(slides_preds[key]) | |||||
y_targets.append(int(slide_labels[key])) | |||||
y_targets_rounded = [int(round(x / 100, 1) * 100) for x in y_targets] | |||||
y_preds_rounded = [int(round(x / 100, 1) * 100) for x in y_preds] | |||||
cf_matrix = confusion_matrix(y_targets_rounded, y_preds_rounded, labels=Config.class_names, normalize="true") | |||||
class_accuracies = [cf_matrix[c][c] for c in range(len(cf_matrix))] | |||||
class_weights = [sum(cf_matrix[c]) for c in range(len(cf_matrix))] | |||||
acc = sum([class_accuracies[i] * class_weights[i] for i in range(len(class_accuracies))]) | |||||
acc /= sum(class_weights) | |||||
# TN|FN | |||||
# FP|TP | |||||
# fpr, tpr, _ = roc_curve(y_targets, y_positive_scores) | |||||
# auc = roc_auc_score(y_targets, y_positive_scores) | |||||
logger.info(f"target rounded:{y_targets_rounded}") | |||||
logger.info(f"pred rounded:{y_preds_rounded}") | |||||
logger.info(f"Results| acc:{acc * 100}\ncf:{cf_matrix}") | |||||
return acc * 100, cf_matrix | |||||
########## | |||||
## Runs ## | |||||
########## | |||||
# train_phase block | |||||
if __name__ == '__main__' and Config.train_phase: | |||||
_, (train_ds, _, _), (train_data_loader, val_data_loader, test_data_loader) = load_datasets( | |||||
["national_cancer_institute"], | |||||
sample_percent=1) | |||||
# Domain adaptation dataset on small real datasets | |||||
# _, (_, _, domain_sample_test_dataset), _ = load_datasets(["stanford_tissue_microarray", | |||||
# "papsociaty"], | |||||
# sample_percent=0.5, | |||||
# test_percent=100, | |||||
# val_percent=0) | |||||
for c_base_name, model, augmentations in [ | |||||
(f"resnet101_{Config.learning_rate}_{Config.decay_rate}_nci_final", | |||||
torchvision.models.resnet101(pretrained=True, progress=True), [ | |||||
"mixup", | |||||
# "jit", | |||||
# "fda", | |||||
# "jit-fda-mixup", | |||||
# "shear", | |||||
# "std" | |||||
]), | |||||
]: | |||||
for aug in augmentations: | |||||
Config.reset_random_seeds() | |||||
train_model(model, c_base_name, (train_data_loader, val_data_loader, test_data_loader), | |||||
augmentation=aug, adaptation_sample_dataset=train_ds) | |||||
# evaluate_phase block | |||||
if __name__ == '__main__' and Config.evaluate_phase: | |||||
# Main data | |||||
Config.class_names = [i for i in range(101)] | |||||
Config.class_idx_dict = {i: i for i in range(101)} | |||||
_, (train_ds, _, _), (_, _, test_data_loader) = load_datasets( | |||||
["national_cancer_institute", | |||||
], | |||||
sample_percent=1, test_percent=100, val_percent=0, is_nci_per_slide=True) | |||||
for c_base_name, model, aug_best_epoch_list in [ | |||||
(f"resnet101_{Config.learning_rate}_{Config.decay_rate}_nci_eval", | |||||
torchvision.models.resnet101(pretrained=True, progress=True), [ | |||||
("mixup", "train_state/resnet101_0.0001_1_nci_final-mixup-BENIGN,MALIGNANT/epoch-19/"), | |||||
]), | |||||
# (f"resnet101_{Config.learning_rate}_{Config.decay_rate}_test_nci_eval", | |||||
# torchvision.models.resnet101(pretrained=True, progress=True), [ | |||||
# ("fda", | |||||
# "train_state/runs_0.0001_1_nic_test_benign_mal/resnet101_0.0001_1_nci-fda-BENIGN,MALIGNANT/epoch-3/"), | |||||
# ("mixup", | |||||
# "train_state/runs_0.0001_1_nic_test_benign_mal/resnet101_0.0001_1_nci-mixup-BENIGN,MALIGNANT/epoch-3/"), | |||||
# ("jit", | |||||
# "train_state/runs_0.0001_1_nic_test_benign_mal/resnet101_0.0001_1_nci-jit-BENIGN,MALIGNANT/epoch-3/"), | |||||
# ("jit-fda-mixup", | |||||
# "train_state/runs_0.0001_1_nic_test_benign_mal/resnet101_0.0001_1_nci-jit-fda-mixup-BENIGN,MALIGNANT/epoch-3/"), | |||||
# ]), | |||||
]: | |||||
for aug, best_epoch in aug_best_epoch_list: | |||||
Config.reset_random_seeds() | |||||
evaluate_nci_dataset_per_slide(c_base_name, aug, model, test_data_loader, | |||||
load_model_from_dir=best_epoch) |
export PYTHONPATH="${PYTHONPATH}:../../../"; | |||||
export PYTHONPATH="${PYTHONPATH}:./"; | |||||
python fragment_splitter.py; |
export PYTHONPATH="${PYTHONPATH}:../../../"; | |||||
export PYTHONPATH="${PYTHONPATH}:./"; | |||||
python model_training.py; |
import os | |||||
import numpy as np | |||||
from PIL import Image | |||||
from torch.utils.data import Dataset | |||||
from config import Config | |||||
from fragment_splitter import CustomFragmentLoader | |||||
from transformation import get_transformation | |||||
from utils import show_and_wait | |||||
class ThyroidDataset(Dataset): | |||||
def __init__(self, image_paths_labels_list, class_to_index, transform=None, force_to_size_with_padding=512): | |||||
super().__init__() | |||||
self.class_to_idx_dict = class_to_index | |||||
self.force_to_size_with_padding = force_to_size_with_padding | |||||
self.transform = transform | |||||
self.samples = self._make_dataset(image_paths_labels_list) | |||||
self.class_weights = self._calculate_class_weights(image_paths_labels_list) | |||||
def _calculate_class_weights(self, image_paths_labels_list): | |||||
class_counts = {} | |||||
for image_path, (label, slide) in image_paths_labels_list: | |||||
class_counts[label] = class_counts.get(label, 0) + 1 | |||||
class_weights = [ | |||||
(self.class_to_idx_dict.get(c, None), len(image_paths_labels_list) / (len(class_counts) * v)) for c, v | |||||
in | |||||
class_counts.items()] | |||||
class_weights.sort() | |||||
return [item[1] for item in class_weights] | |||||
def _make_dataset(self, image_paths_labels_list): | |||||
images = [] | |||||
for image_path, (label, slide) in image_paths_labels_list: | |||||
if not os.path.exists(os.path.abspath(image_path)): | |||||
raise (RuntimeError(f"{image_path} not found.")) | |||||
item = (image_path, (self.class_to_idx_dict.get(label, "Unknown label"), slide)) | |||||
images.append(item) | |||||
return images | |||||
def __len__(self): | |||||
return len(self.samples) | |||||
def __getitem__(self, index): | |||||
path, target = self.samples[index] | |||||
image = Image.open(path) | |||||
image = image.convert('RGB') | |||||
image = self.add_margin(image) | |||||
image = np.array(image) | |||||
if self.transform is not None: | |||||
# show_and_wait(image, name=f"./transformations/{index}-original", wait=False, save=True) | |||||
image = self.transform(image=image)['image'] | |||||
# image_show = np.moveaxis(image.cpu().detach().numpy(), 0, -1) | |||||
# show_and_wait(image_show, name=f"./transformations/{index}-transformed", save=True) | |||||
else: | |||||
transform = get_transformation(augmentation="min") | |||||
image = transform(image=image)['image'] | |||||
return image, target | |||||
def add_margin(self, pil_img): | |||||
width, height = pil_img.size | |||||
new_width = self.force_to_size_with_padding | |||||
new_height = self.force_to_size_with_padding | |||||
result = Image.new("RGB", (new_width, new_height), (0, 0, 0)) | |||||
top_padding = (new_height - height) // 2 | |||||
left_padding = (new_width - width) // 2 | |||||
result.paste(pil_img, (left_padding, top_padding)) | |||||
return result | |||||
if __name__ == '__main__': | |||||
class_idx_dict = Config.class_idx_dict | |||||
datasets_folder = ["stanford_tissue_microarray", "papsociaty"] | |||||
train, val, test = CustomFragmentLoader(datasets_folder).load_image_path_and_labels_and_split() | |||||
train_ds = ThyroidDataset(train, class_idx_dict) | |||||
test_ds = ThyroidDataset(test, class_idx_dict) | |||||
val_ds = ThyroidDataset(val, class_idx_dict) | |||||
res = train_ds.__getitem__(0) | |||||
print(res) |
import torch | |||||
import torchvision | |||||
from torch import nn | |||||
class ThyroidClassificationModel(nn.Module): | |||||
def __init__(self, base_model): | |||||
super().__init__() | |||||
self.base_model = base_model | |||||
self.classifier = nn.Sequential( | |||||
nn.Linear(1000, 500), | |||||
nn.BatchNorm1d(500), | |||||
nn.ReLU(), | |||||
nn.Linear(500, 100), | |||||
nn.BatchNorm1d(100), | |||||
nn.ReLU(), | |||||
nn.Linear(100, 2), | |||||
nn.BatchNorm1d(2), | |||||
nn.Softmax(dim=-1) | |||||
) | |||||
self._is_inception3 = type(base_model) == torchvision.models.inception.Inception3 | |||||
if self._is_inception3: | |||||
self.classifier2 = nn.Sequential( | |||||
nn.Linear(1000, 500), | |||||
nn.BatchNorm1d(500), | |||||
nn.ReLU(), | |||||
nn.Linear(500, 100), | |||||
nn.BatchNorm1d(100), | |||||
nn.ReLU(), | |||||
nn.Linear(100, 2), | |||||
nn.BatchNorm1d(2), | |||||
nn.Softmax(dim=-1) | |||||
) | |||||
def forward(self, x, validate=False): | |||||
output = self.base_model(x.float()) | |||||
if self._is_inception3 and not validate: | |||||
return self.classifier(output[0]), self.classifier2(output[1]) | |||||
return self.classifier(output) | |||||
def save_model(self, path): | |||||
torch.save(self.state_dict(), path) | |||||
def load_model(self, path): | |||||
self.load_state_dict(torch.load(path)) | |||||
self.eval() | |||||
return self |
import albumentations as A | |||||
from albumentations.pytorch import ToTensorV2 | |||||
from albumentations_mixup import Mixup | |||||
def get_transformation(augmentation, crop_size=299, base_dataset=None): | |||||
scaled_center_crop_size = int(crop_size * 1.25) | |||||
def random_crop_transformation(x): | |||||
return A.RandomCrop(x, x, always_apply=True) | |||||
def get_flip_rotate__custom__noise_transform(transform_list, random_scale=True): | |||||
return A.Compose([ | |||||
A.Flip(p=0.25), | |||||
A.Rotate(p=0.25), | |||||
A.RandomScale(scale_limit=0.5, p=0.5 if random_scale else 0), | |||||
A.PadIfNeeded(min_height=scaled_center_crop_size, min_width=scaled_center_crop_size, | |||||
always_apply=True), | |||||
A.CenterCrop(scaled_center_crop_size, scaled_center_crop_size), | |||||
random_crop_transformation(crop_size), | |||||
] + transform_list + [ | |||||
A.Blur(p=0.25, blur_limit=2), | |||||
A.GaussNoise(p=0.25, var_limit=10), | |||||
ToTensorV2() | |||||
]) | |||||
if augmentation == "min": | |||||
trans = A.Compose([ | |||||
A.PadIfNeeded(min_height=scaled_center_crop_size, min_width=scaled_center_crop_size, always_apply=True), | |||||
A.CenterCrop(scaled_center_crop_size, scaled_center_crop_size), | |||||
random_crop_transformation(crop_size), | |||||
ToTensorV2() | |||||
]) | |||||
elif augmentation == "std": | |||||
trans = get_flip_rotate__custom__noise_transform([]) | |||||
elif augmentation == "jit-nrs": | |||||
trans = get_flip_rotate__custom__noise_transform([ | |||||
A.ColorJitter(p=0.5, hue=.5) | |||||
], random_scale=False) | |||||
elif augmentation == "jit": | |||||
trans = get_flip_rotate__custom__noise_transform([ | |||||
A.ColorJitter(p=0.5, hue=.5) | |||||
]) | |||||
elif augmentation == "fda": | |||||
fda_image_paths = [sample[0] for sample in base_dataset.samples] | |||||
trans = get_flip_rotate__custom__noise_transform([ | |||||
A.domain_adaptation.FDA(fda_image_paths, beta_limit=0.1, p=0.5) | |||||
]) | |||||
elif augmentation == "mixup": | |||||
mixups = [sample[0:2] for sample in base_dataset.samples] | |||||
trans = get_flip_rotate__custom__noise_transform([ | |||||
Mixup(mixups=mixups, p=0.5, beta_limit=(0.1)), | |||||
]) | |||||
elif augmentation == "jit-fda-mixup": | |||||
p = 0.16 | |||||
fda_image_paths = [sample[0] for sample in base_dataset.samples] | |||||
mixups = [sample[0:2] for sample in base_dataset.samples] | |||||
trans = get_flip_rotate__custom__noise_transform([ | |||||
A.domain_adaptation.FDA(fda_image_paths, beta_limit=0.1, p=p), | |||||
Mixup(mixups=mixups, p=p, beta_limit=(0.1)), | |||||
A.ColorJitter(p=p, hue=.5) | |||||
]) | |||||
elif augmentation == "jit-fda-mixup-nrs": | |||||
p = 0.16 | |||||
fda_image_paths = [sample[0] for sample in base_dataset.samples] | |||||
mixups = [sample[0:2] for sample in base_dataset.samples] | |||||
trans = get_flip_rotate__custom__noise_transform([ | |||||
A.domain_adaptation.FDA(fda_image_paths, beta_limit=0.1, p=p), | |||||
Mixup(mixups=mixups, p=p, beta_limit=(0.1)), | |||||
A.ColorJitter(p=p, hue=.5) | |||||
], random_scale=False) | |||||
elif augmentation == "shear": | |||||
trans = get_flip_rotate__custom__noise_transform([ | |||||
A.Affine(shear={"x": (-10, 10), "y": (-10, 10)}, p=0.5) | |||||
], random_scale=False) | |||||
else: | |||||
raise ValueError(f"Augmentation unknown: {augmentation}") | |||||
return trans |
import random | |||||
import torch | |||||
class Config: | |||||
DEBUG = False | |||||
batch_size = 32 | |||||
eval_batch_size = 128 | |||||
test_percent = 20 | |||||
val_percent = 10 | |||||
learning_rate = 0.0001 | |||||
decay_rate = 1 # 0.99**50=0.6, 0.99**100=0.36 | |||||
n_epoch = 2 if DEBUG else 20 | |||||
available_device = "cuda" if torch.cuda.is_available() and not DEBUG else "cpu" | |||||
print(f"Device: {available_device}") | |||||
workers = 1 if DEBUG else 40 | |||||
# learned from evaluate_image_patcher_and_visualize.py | |||||
laplacian_threshold = 298 | |||||
# RANDOM SEED | |||||
seed = 115 | |||||
@staticmethod | |||||
def reset_random_seeds(): | |||||
random.seed(Config.seed) | |||||
torch.manual_seed(Config.seed) | |||||
class_names = ["BENIGN", "MALIGNANT"] | |||||
class_idx_dict = {"BENIGN": 0, "MALIGNANT": 1} | |||||
train_val_acc_max_distance_for_best_epoch = 6 # Percent | |||||
n_epoch_for_image_patcher = 60 | |||||
train_phase = False | |||||
evaluate_phase = False | |||||
Config.reset_random_seeds() |
import ssl | |||||
import time | |||||
from urllib.parse import urlparse | |||||
from urllib.request import urlopen | |||||
from bs4 import BeautifulSoup | |||||
from database_crawlers.web_stain_sample import StainType, WebStainWSIOneDIndex | |||||
ssl._create_default_https_context = ssl._create_unverified_context | |||||
class BioAtlasAtJakeGittlenLaboratoriesImage(WebStainWSIOneDIndex): | |||||
def __init__(self, database_name, image_id, image_web_label, report, stain_type, is_wsi): | |||||
super().__init__(database_name, image_id, image_web_label, report, stain_type, is_wsi) | |||||
def _get_tile_url(self, zoom, partition=None, i=None, j=None): | |||||
return f"https://bio-atlas.psu.edu/human/tile.jpeg.php?s={self.image_id}&z={zoom}&i={partition}" | |||||
def get_slide_view_url(self): | |||||
return f"https://bio-atlas.psu.edu/human/view.php?s={self.image_id}" | |||||
def _get_file_path_name(self): | |||||
return self.save_path + self.image_id | |||||
def find_best_zoom(self): | |||||
return 0 | |||||
class BioAtlasThyroidSlideProvider: | |||||
page_link = "https://bio-atlas.psu.edu/human/search.php?q=Thyroid&organism%5B%5D=5&age_fr=&age_fr_units=1&age_to=&age_to_units=1&sex%5B%5D=all&thumbnails=on&rpp=30&as_sfid=AAAAAAW0RrspdnblpiFwz8osoAdvS8nafd1J9LG_ARQ-IF_NZ3aI2EXCMDBeqE_iD5rUo1QLg454tS63DMSgATSzgrksb4rMi-GWPl3O9f3JKlqGn8oXoqbOYok3__yZx69ewzg%3D&as_fid=6900aeb3e4cc9f39ef9738a2f11c2cefb8c3f37c#results" | |||||
database_name = "BioAtlasThyroidSlideProvider" | |||||
stain_type = StainType.H_AND_E | |||||
is_wsi = True | |||||
@classmethod | |||||
def get_web_stain_samples(cls): | |||||
print(cls.page_link) | |||||
try: | |||||
html_text = urlopen(cls.page_link).read() | |||||
soup = BeautifulSoup(html_text, 'html.parser') | |||||
search_results = soup.find_all("div", {"class": "shadow-box search-result-item search-result-slide"}) | |||||
for result_item in search_results: | |||||
image_view_url = result_item.find("a").attrs['href'] | |||||
query_param = urlparse(image_view_url).query.split("=") | |||||
if query_param[0] != "s": raise Exception("Query params does not contains image url") | |||||
image_id = query_param[1] | |||||
image_web_label = str(result_item.find("b", text="Diagnosis").next_sibling) | |||||
yield BioAtlasAtJakeGittlenLaboratoriesImage(cls.database_name, image_id, image_web_label, None, | |||||
cls.stain_type, cls.is_wsi) | |||||
except Exception as e: | |||||
print(e) | |||||
time.sleep(2) | |||||
yield cls.get_web_stain_samples() | |||||
if __name__ == '__main__': | |||||
bio_atlas_provider = BioAtlasThyroidSlideProvider() | |||||
for slide in bio_atlas_provider.get_web_stain_samples(): | |||||
if slide.image_id == "687": | |||||
print(slide.image_id, slide.image_web_label, slide.get_slide_view_url()) | |||||
slide.crawl_image_save_jpeg_and_json() | |||||
break |
export PYTHONPATH="${PYTHONPATH}:../../"; | |||||
export PYTHONPATH="${PYTHONPATH}:./"; | |||||
python database_crawler.py; |
import time | |||||
from urllib.parse import urlparse | |||||
from urllib.request import urlopen | |||||
from bs4 import BeautifulSoup | |||||
from database_crawlers.web_stain_sample import StainType, WebStainWSITwoDIndex | |||||
class HeidelbergPathologyImage(WebStainWSITwoDIndex): | |||||
def __init__(self, database_name, image_id, image_web_label, report, stain_type, is_wsi): | |||||
super().__init__(database_name, image_id, image_web_label, report, stain_type, is_wsi) | |||||
def _get_tile_url(self, zoom, partition=None, i=None, j=None): | |||||
return f"https://eliph.klinikum.uni-heidelberg.de/dzi/atlas/05-schilddruese/05-{'%.2d' % int(self.image_id)}_files/{zoom}/{i}_{j}.jpeg" | |||||
def get_slide_view_url(self): | |||||
return f"https://eliph.klinikum.uni-heidelberg.de/atlas/?c=05-schilddruese&context=image&pg={self.image_id}" | |||||
def _get_file_path_name(self): | |||||
return self.save_path + self.image_id | |||||
def find_best_zoom(self): | |||||
# 16 -> 0 | |||||
return 16 | |||||
class HeidelbergPathologyProvider: | |||||
page_link = "https://eliph.klinikum.uni-heidelberg.de/atlas/?c=05-schilddruese&context=image" | |||||
database_name = "HeidelbergPathology" | |||||
stain_type = StainType.H_AND_E | |||||
is_wsi = True | |||||
@classmethod | |||||
def get_web_stain_samples(cls): | |||||
print(cls.page_link) | |||||
try: | |||||
html_text = urlopen(cls.page_link).read() | |||||
soup = BeautifulSoup(html_text, 'html.parser') | |||||
search_results = soup.find_all("div", {"class": "casegrid"}) | |||||
for result_item in search_results: | |||||
image_view_url = result_item.find("a").attrs['href'] | |||||
query_param = urlparse(image_view_url).query.split("=") | |||||
if "image&pg" not in query_param: raise Exception("Query params does not contains image id") | |||||
image_id = query_param[-1] | |||||
image_web_label = str(result_item.find("b").next) | |||||
yield HeidelbergPathologyImage(cls.database_name, image_id, image_web_label, None, | |||||
cls.stain_type, cls.is_wsi) | |||||
except Exception as e: | |||||
print(e) | |||||
time.sleep(2) | |||||
yield cls.get_web_stain_samples() | |||||
if __name__ == '__main__': | |||||
bio_atlas_provider = HeidelbergPathologyProvider() | |||||
for slide in bio_atlas_provider.get_web_stain_samples(): | |||||
print(slide.image_id, slide.image_web_label, slide.get_slide_view_url()) | |||||
slide.crawl_image_save_jpeg_and_json() | |||||
break |
from image_patcher import ImageAndSlidePatcher | |||||
if __name__ == '__main__': | |||||
database_folder_name = "bio_atlas_at_jake_gittlen_laboratories" | |||||
database_directory = "../" | |||||
image_slide_patcher = ImageAndSlidePatcher() | |||||
image_slide_patcher.save_patches_in_folders(database_directory, database_folder_name) |
import csv | |||||
import json | |||||
import os | |||||
import os.path as os_path | |||||
import random | |||||
import re | |||||
from math import ceil | |||||
from os import listdir | |||||
from os.path import isfile, join | |||||
import cv2 | |||||
import tifffile | |||||
import zarr as ZarrObject | |||||
from tqdm import tqdm | |||||
from config import Config | |||||
from database_crawlers.web_stain_sample import ThyroidCancerLevel, WebStainImage | |||||
from utils import show_and_wait | |||||
class ThyroidFragmentFilters: | |||||
@staticmethod | |||||
def func_laplacian_threshold(threshold=Config.laplacian_threshold): | |||||
def wrapper(image_nd_array): | |||||
res = ThyroidFragmentFilters._empty_frag_with_laplacian_threshold(image_nd_array, threshold) | |||||
return res | |||||
return wrapper | |||||
@staticmethod | |||||
def _empty_frag_with_laplacian_threshold(image_nd_array, threshold=Config.laplacian_threshold, | |||||
return_variance=False): | |||||
gray = cv2.cvtColor(image_nd_array, cv2.COLOR_BGR2GRAY) | |||||
gray = cv2.GaussianBlur(gray, (3, 3), 0) | |||||
laplacian = cv2.Laplacian(gray, cv2.CV_64F, ksize=3, ) | |||||
std = cv2.meanStdDev(laplacian)[1][0][0] | |||||
variance = std ** 2 | |||||
if return_variance: | |||||
return variance >= threshold, variance | |||||
return variance >= threshold | |||||
class ImageAndSlidePatcher: | |||||
@classmethod | |||||
def _check_magnification_from_description(cls, tiff_address): | |||||
try: | |||||
tif_file_obj = tifffile.TiffFile(tiff_address) | |||||
image_description = tif_file_obj.pages.keyframe.tags["ImageDescription"].value | |||||
app_mag = int(re.findall("(AppMag = [0-9]+)", image_description)[0].split(" = ")[-1]) | |||||
return app_mag | |||||
except Exception as e: | |||||
return None | |||||
@classmethod | |||||
def _zarr_loader(cls, tiff_address, key=0): | |||||
image_zarr = tifffile.imread(tiff_address, aszarr=True, key=key, ) | |||||
zarr = ZarrObject.open(image_zarr, mode='r') | |||||
return zarr | |||||
@classmethod | |||||
def _jpeg_loader(cls, jpeg_address): | |||||
im = cv2.imread(jpeg_address) | |||||
return im | |||||
@classmethod | |||||
def _json_key_loader(cls, json_file_address, key=None): | |||||
with open(json_file_address, 'rb') as file: | |||||
json_dict = json.loads(file.read()) | |||||
if key: | |||||
return json_dict[key] | |||||
return json_dict | |||||
@classmethod | |||||
def _get_extension_from_path(cls, file_path): | |||||
return os_path.splitext(file_path)[-1] | |||||
@classmethod | |||||
def _get_file_name_from_path(cls, file_path): | |||||
return ".".join(os_path.split(file_path)[-1].split(".")[:-1]) | |||||
@classmethod | |||||
def _get_number_of_initial_frags(cls, zarr_object, frag_size=512, frag_overlap=0.1): | |||||
zarr_shape = zarr_object.shape | |||||
step_size = int(frag_size * (1 - frag_overlap)) | |||||
overlap_size = frag_size - step_size | |||||
w_range = list(range(0, ceil((zarr_shape[0] - overlap_size) / step_size) * step_size, step_size)) | |||||
h_range = list(range(0, ceil((zarr_shape[1] - overlap_size) / step_size) * step_size, step_size)) | |||||
return len(w_range) * len(h_range) | |||||
@classmethod | |||||
def _generate_raw_fragments_from_image_array_or_zarr(cls, image_object, frag_size=512, frag_overlap=0.1, | |||||
shuffle=True): | |||||
def frag_picker(w_pos, h_pos): | |||||
end_w, end_h = min(zarr_shape[0], w_pos + frag_size), min(zarr_shape[1], h_pos + frag_size) | |||||
start_w, start_h = end_w - frag_size, end_h - frag_size | |||||
return image_object[start_w:end_w, start_h: end_h], (start_w, start_h) | |||||
if image_object is None: | |||||
return None | |||||
zarr_shape = image_object.shape | |||||
step_size = int(frag_size * (1 - frag_overlap)) | |||||
overlap_size = frag_size - step_size | |||||
w_range = list(range(0, ceil((zarr_shape[0] - overlap_size) / step_size) * step_size, step_size)) | |||||
h_range = list(range(0, ceil((zarr_shape[1] - overlap_size) / step_size) * step_size, step_size)) | |||||
if shuffle: | |||||
pos_list = [None] * len(w_range) * len(h_range) | |||||
index = 0 | |||||
for w in w_range: | |||||
for h in h_range: | |||||
pos_list[index] = (w, h) | |||||
index += 1 | |||||
random.shuffle(pos_list) | |||||
for w, h in pos_list: | |||||
yield frag_picker(w, h) | |||||
else: | |||||
for w in w_range: | |||||
for h in h_range: | |||||
yield frag_picker(w, h) | |||||
@classmethod | |||||
def _filter_frag_from_generator(cls, frag_generator, filter_func_list, return_all_with_condition=False, | |||||
all_frag_count=None, output_file=None): | |||||
for next_test_item, frag_pos in tqdm(frag_generator, total=all_frag_count, file=output_file, | |||||
postfix="Filtering", position=0): | |||||
condition = True | |||||
for function in filter_func_list: | |||||
condition &= function(next_test_item) | |||||
if return_all_with_condition: | |||||
yield next_test_item, frag_pos, condition | |||||
elif condition: | |||||
# show_and_wait(frag) | |||||
yield next_test_item, frag_pos | |||||
@classmethod | |||||
def _get_json_and_image_address_of_directory(cls, directory_path, ignore_json=False): | |||||
image_formats = [".jpeg", ".tiff", ".jpg"] | |||||
json_format = ".json" | |||||
files = [f for f in listdir(directory_path) if isfile(join(directory_path, f))] | |||||
files.sort() | |||||
pairs = {} | |||||
for file_path in files: | |||||
file_path = join(directory_path, file_path) | |||||
file_name = cls._get_file_name_from_path(file_path) | |||||
pairs[file_name] = pairs.get(file_name, [None, None]) | |||||
if cls._get_extension_from_path(file_path) in image_formats: | |||||
pairs[file_name][1] = file_path | |||||
elif cls._get_extension_from_path(file_path) == json_format: | |||||
pairs[file_name][0] = file_path | |||||
if ignore_json: | |||||
return [value for key, value in pairs.values() if value is not None] | |||||
return [(key, value) for key, value in pairs.values() if key is not None and value is not None] | |||||
@staticmethod | |||||
def create_patch_dir_and_initialize_csv(database_path): | |||||
data_dir = os.path.join(database_path, "data") | |||||
patch_dir = os.path.join(database_path, "patches") | |||||
if not os.path.isdir(patch_dir): | |||||
os.mkdir(patch_dir) | |||||
label_csv_path = os.path.join(patch_dir, "patch_labels.csv") | |||||
csv_file = open(label_csv_path, "a+") | |||||
csv_writer = csv.writer(csv_file) | |||||
csv_file.seek(0) | |||||
if len(csv_file.read(100)) <= 0: | |||||
csv_writer.writerow(WebStainImage.sorted_json_keys()) | |||||
return data_dir, patch_dir, csv_writer, csv_file | |||||
@classmethod | |||||
def save_image_patches_and_update_csv(cls, thyroid_type, thyroid_desired_classes, csv_writer, web_details, | |||||
image_path, slide_patch_dir, slide_id): | |||||
csv_writer.writerow(list(web_details.values())) | |||||
if cls._get_extension_from_path(image_path) in [".tiff", ".tif", ".svs"]: | |||||
zarr_object = cls._zarr_loader(image_path) | |||||
generator = cls._generate_raw_fragments_from_image_array_or_zarr(zarr_object) | |||||
total_counts = cls._get_number_of_initial_frags(zarr_object=zarr_object) | |||||
else: | |||||
jpeg_image = cls._jpeg_loader(image_path) | |||||
jpeg_image = cls.ask_image_scale_and_rescale(jpeg_image) | |||||
generator = cls._generate_raw_fragments_from_image_array_or_zarr(jpeg_image) | |||||
total_counts = cls._get_number_of_initial_frags(zarr_object=jpeg_image) | |||||
if generator is None: | |||||
return | |||||
if not os.path.isdir(slide_patch_dir): | |||||
os.mkdir(slide_patch_dir) | |||||
filters = [ThyroidFragmentFilters.func_laplacian_threshold(Config.laplacian_threshold)] | |||||
fragment_id = 0 | |||||
slide_progress_file_path = os.path.join(slide_patch_dir, "progress.txt") | |||||
with open(slide_progress_file_path, "w") as file: | |||||
for fragment, frag_pos in cls._filter_frag_from_generator(generator, filters, all_frag_count=total_counts, | |||||
output_file=file): | |||||
fragment_file_path = os.path.join(slide_patch_dir, f"{slide_id}-{fragment_id}.jpeg") | |||||
cv2.imwrite(fragment_file_path, fragment) | |||||
fragment_id += 1 | |||||
return fragment_id, total_counts | |||||
@classmethod | |||||
def save_patches_in_folders(cls, database_directory, dataset_dir=None): | |||||
thyroid_desired_classes = [ThyroidCancerLevel.MALIGNANT, ThyroidCancerLevel.BENIGN] | |||||
datasets_dirs = os.listdir(database_directory) if dataset_dir is None else [dataset_dir] | |||||
list_dir = [os.path.join(database_directory, o) for o in datasets_dirs | |||||
if os.path.isdir(os.path.join(database_directory, o, "data"))] | |||||
for database_path in list_dir: | |||||
print("database path: ", database_path) | |||||
data_dir, patch_dir, csv_writer, csv_file = cls.create_patch_dir_and_initialize_csv(database_path) | |||||
for json_path, image_path in cls._get_json_and_image_address_of_directory(data_dir): | |||||
print("image path: ", image_path) | |||||
file_name = cls._get_file_name_from_path(image_path) | |||||
slide_id = str(hash(file_name)) | |||||
slide_patch_dir = os.path.join(patch_dir, slide_id) | |||||
if os.path.isdir(slide_patch_dir): | |||||
""" | |||||
it has already been patched | |||||
""" | |||||
continue | |||||
web_details = cls._json_key_loader(json_path) | |||||
web_details["image_id"] = slide_id | |||||
web_label = web_details["image_web_label"] | |||||
thyroid_type = ThyroidCancerLevel.get_thyroid_level_from_diagnosis_label(web_label) | |||||
web_details["image_class_label"] = thyroid_type.value[1] | |||||
cls.save_image_patches_and_update_csv(thyroid_type, thyroid_desired_classes, csv_writer, web_details, | |||||
image_path, slide_patch_dir, slide_id) | |||||
csv_file.close() | |||||
@classmethod | |||||
def save_papsociaty_patch(cls, database_path): | |||||
thyroid_desired_classes = [ThyroidCancerLevel.MALIGNANT, ThyroidCancerLevel.BENIGN] | |||||
print("database path: ", database_path) | |||||
for folder in Config.class_names: | |||||
group_path = os.path.join(database_path, "data", folder) | |||||
data_dir, patch_dir, csv_writer, csv_file = cls.create_patch_dir_and_initialize_csv(database_path) | |||||
for image_path in cls._get_json_and_image_address_of_directory(group_path, ignore_json=True): | |||||
print("image path: ", image_path) | |||||
file_name = cls._get_file_name_from_path(image_path) | |||||
slide_id = str(hash(file_name)) | |||||
slide_patch_dir = os.path.join(patch_dir, slide_id) | |||||
if os.path.isdir(slide_patch_dir): | |||||
""" | |||||
it has already been patched | |||||
""" | |||||
continue | |||||
web_label = folder + "-" + file_name | |||||
thyroid_type = ThyroidCancerLevel.get_thyroid_level_from_diagnosis_label(web_label) | |||||
web_details = {"database_name": "PapSociety", | |||||
"image_id": slide_id, | |||||
"image_web_label": web_label, | |||||
"image_class_label": thyroid_type.value[1], | |||||
"report": None, | |||||
"stain_type": "UNKNOWN", | |||||
"is_wsi": False} | |||||
cls.save_image_patches_and_update_csv(thyroid_type, thyroid_desired_classes, csv_writer, web_details, | |||||
image_path, slide_patch_dir, slide_id) | |||||
csv_file.close() | |||||
@classmethod | |||||
def ask_image_scale_and_rescale(cls, image): | |||||
# small: S, Medium: M, Large:L | |||||
show_and_wait(image) | |||||
res = input("how much plus pointer fill a cell(float, i:ignore, else repeat): ") | |||||
try: | |||||
if res == "i": | |||||
return None | |||||
elif re.match("[0-9]+(.[0-9]*)?", res): | |||||
scale = 1 / float(res) | |||||
return cv2.resize(image, (0, 0), fx=scale, fy=scale) | |||||
else: | |||||
return cls.ask_image_scale_and_rescale(image) | |||||
except Exception as e: | |||||
print(e) | |||||
return cls.ask_image_scale_and_rescale(image) | |||||
if __name__ == '__main__': | |||||
random.seed(1) | |||||
database_directory = "./" | |||||
# ImageAndSlidePatcher.save_patches_in_folders(database_directory, dataset_dir=["stanford_tissue_microarray"]) | |||||
# ImageAndSlidePatcher.save_papsociaty_patch(os.path.join(database_directory, "papsociaty")) |
import concurrent.futures | |||||
import os | |||||
import pathlib | |||||
from tqdm import tqdm | |||||
from config import Config | |||||
from image_patcher import ImageAndSlidePatcher | |||||
from national_cancer_institute.read_xml_file import get_slide_info_from_bcr_xml | |||||
def save_national_cancer_institute_patch(database_path): | |||||
def patch_image(image_path): | |||||
try: | |||||
image_path = str(image_path) | |||||
print() | |||||
print("image path: ", image_path) | |||||
file_name = ImageAndSlidePatcher._get_file_name_from_path(image_path) | |||||
slide_id = file_name.split(".")[0] | |||||
slide_patch_dir = os.path.join(patch_dir, slide_id) | |||||
if os.path.isdir(slide_patch_dir): | |||||
print("it has already been patched") | |||||
return | |||||
web_label = slide_infos.get(slide_id, None) | |||||
if web_label is None: | |||||
print("Ignored") | |||||
return | |||||
web_details = {"database_name": "NationalCancerInstitute", | |||||
"image_id": slide_id, | |||||
"image_web_label": web_label, | |||||
"image_class_label": web_label, | |||||
"report": None, | |||||
"stain_type": "H&E", | |||||
"is_wsi": True} | |||||
return ImageAndSlidePatcher.save_image_patches_and_update_csv(web_label, None, csv_writer, web_details, | |||||
image_path, slide_patch_dir, slide_id) | |||||
except Exception as e: | |||||
print(e) | |||||
data_dir = os.path.join(database_path, "data") | |||||
slide_infos = {} | |||||
for xml_path in pathlib.Path(data_dir).glob("**/*.xml"): | |||||
slide_infos.update(get_slide_info_from_bcr_xml(str(xml_path))) | |||||
data_dir, patch_dir, csv_writer, csv_file = ImageAndSlidePatcher.create_patch_dir_and_initialize_csv(database_path) | |||||
csv_file.flush() | |||||
with concurrent.futures.ThreadPoolExecutor(max_workers=Config.workers) as executor: | |||||
image_paths = pathlib.Path(data_dir).glob("**/*.svs") | |||||
image_paths = [i for i in image_paths] | |||||
print() | |||||
for res in tqdm(executor.map(patch_image, image_paths), total=len(image_paths)): | |||||
if res: | |||||
csv_file.flush() | |||||
csv_file.flush() | |||||
if __name__ == '__main__': | |||||
database_directory = "../" | |||||
save_national_cancer_institute_patch(os.path.join(database_directory, "national_cancer_institute")) |
export PYTHONPATH="${PYTHONPATH}:../../"; | |||||
export PYTHONPATH="${PYTHONPATH}:../"; | |||||
export PYTHONPATH="${PYTHONPATH}:./"; | |||||
python bio_atlas_patcher.py; |
export PYTHONPATH="${PYTHONPATH}:../../"; | |||||
export PYTHONPATH="${PYTHONPATH}:../"; | |||||
export PYTHONPATH="${PYTHONPATH}:./"; | |||||
python image_patcher.py; |
export PYTHONPATH="${PYTHONPATH}:../../"; | |||||
export PYTHONPATH="${PYTHONPATH}:../"; | |||||
export PYTHONPATH="${PYTHONPATH}:./"; | |||||
python national_cancer_patcher.py; |
import concurrent.futures | |||||
import math | |||||
import cv2 | |||||
import matplotlib.pyplot as plt | |||||
import numpy as np | |||||
from config import Config | |||||
from database_crawlers.image_patcher.image_patcher import ImageAndSlidePatcher, ThyroidFragmentFilters | |||||
from utils import check_if_generator_is_empty | |||||
def imul(a, b): | |||||
return math.ceil(a * b) | |||||
def calculate_acc_and_sensitivity(image_path, zarr_loader_mask, zarr_loader, frag_generator, scaled_masked_image, | |||||
generated_mask_scale, laplacian_threshold, slide_patch_size, | |||||
save_generated_image=True): | |||||
def process_frag(args): | |||||
next_test_item, frag_pos, condition = args | |||||
frag_shape = next_test_item.shape | |||||
mask_scaled_frag_shape = list((imul(frag_shape[i], mask_scale) for i in range(2))) | |||||
mask_frag_pos = list((imul(frag_pos[i], mask_scale) for i in range(2))) | |||||
mask_w1, mask_w2 = mask_frag_pos[0], mask_frag_pos[0] + mask_scaled_frag_shape[0] | |||||
mask_h1, mask_h2 = mask_frag_pos[1], mask_frag_pos[1] + mask_scaled_frag_shape[1] | |||||
mask_item = zarr_loader_mask[mask_w1:mask_w2, mask_h1:mask_h2] | |||||
mask_item = cv2.resize(mask_item, dsize=(0, 0), fx=1 / mask_scale, fy=1 / mask_scale) | |||||
fragment_size = next_test_item.shape | |||||
scaled_frag_size = (imul(fragment_size[0], generated_mask_scale), imul(fragment_size[1], generated_mask_scale)) | |||||
scaled_frag = cv2.resize(next_test_item[:, :, :3], dsize=scaled_frag_size, interpolation=cv2.INTER_CUBIC) | |||||
scaled_frag_size = scaled_frag.shape | |||||
if next_test_item is not None: | |||||
mask_item = mask_item[:, :, 0] | |||||
masked = mask_item.mean() > 256 * .3 | |||||
if condition and masked: | |||||
background_dict["TP"] += 1 | |||||
elif condition and not masked: | |||||
background_dict["FP"] += 1 | |||||
elif not condition and masked: | |||||
background_dict["FN"] += 1 | |||||
# show_and_wait(next_test_item) | |||||
# show_and_wait(mask_item) | |||||
elif not condition and not masked: | |||||
background_dict["TN"] += 1 | |||||
else: | |||||
return None | |||||
if not condition: | |||||
# background patches get dark | |||||
scaled_frag = (scaled_frag * 0.3).astype(np.int8) | |||||
scaled_pos = list((imul(frag_pos[i], generated_mask_scale) for i in range(2))) | |||||
try: | |||||
mask_g_w1, mask_g_w2 = scaled_pos[0], scaled_pos[0] + scaled_frag_size[0] | |||||
mask_g_h1, mask_g_h2 = scaled_pos[1], scaled_pos[1] + scaled_frag_size[1] | |||||
scaled_masked_image[mask_g_w1:mask_g_w2, mask_g_h1:mask_g_h2] = scaled_frag | |||||
except Exception as e: | |||||
print(e) | |||||
return True | |||||
mask_scale = zarr_loader_mask.shape[0] / zarr_loader.shape[0] | |||||
filter_func_list = [ThyroidFragmentFilters.func_laplacian_threshold(laplacian_threshold)] | |||||
background_dict = {"TP": 0, "FP": 0, "TN": 0, "FN": 0} | |||||
total_frags = slide_patch_size if slide_patch_size else ImageAndSlidePatcher._get_number_of_initial_frags( | |||||
zarr_loader) | |||||
frag_filtered = ImageAndSlidePatcher._filter_frag_from_generator(frag_generator, filter_func_list, | |||||
return_all_with_condition=True, | |||||
all_frag_count=total_frags) | |||||
with concurrent.futures.ThreadPoolExecutor(max_workers=Config.workers) as executor: | |||||
futures = [] | |||||
patch_count = 0 | |||||
for args in frag_filtered: | |||||
patch_count += 1 | |||||
future_res = executor.submit(process_frag, args) | |||||
futures.append(future_res) | |||||
if len(futures) >= Config.workers or patch_count == slide_patch_size: | |||||
for future in concurrent.futures.as_completed(futures): | |||||
future.result() | |||||
futures = [] | |||||
if patch_count == slide_patch_size: | |||||
break | |||||
if save_generated_image: | |||||
masked_image_path = ".".join(image_path.split(".")[:-1]) + "_generated_mask.jpg" | |||||
cv2.imwrite(masked_image_path, scaled_masked_image) | |||||
return background_dict | |||||
def score_calculator(accuracy, specificity, acc_w=0.75): | |||||
return accuracy * acc_w + specificity * (1 - acc_w) | |||||
def get_zarr_loaders_and_generators(): | |||||
zarr_loaders_and_generators = [] | |||||
for _img_mask_path, _img_path in image_lists: | |||||
_zarr_loader_mask = ImageAndSlidePatcher._zarr_loader(_img_mask_path) | |||||
_zarr_loader = ImageAndSlidePatcher._zarr_loader(_img_path) | |||||
_frag_generator = ImageAndSlidePatcher._generate_raw_fragments_from_image_array_or_zarr(_zarr_loader, | |||||
shuffle=True) | |||||
_zarr_shape = _zarr_loader.shape | |||||
_generated_mask_scale = 10 / 512 | |||||
_scaled_zarr_shape = ( | |||||
imul(_zarr_shape[0], _generated_mask_scale) + 5, imul(_zarr_shape[1], _generated_mask_scale) + 5, 3) | |||||
_scaled_masked_image = np.zeros(_scaled_zarr_shape) | |||||
zarr_loaders_and_generators.append([ | |||||
_zarr_loader_mask, _zarr_loader, _frag_generator, _scaled_masked_image, _generated_mask_scale | |||||
]) | |||||
return zarr_loaders_and_generators | |||||
def update_and_find_best_threshold(initial_thresh, learn_threshold_and_log_cf_matrix_per_patch=True): | |||||
initial_threshold_jump_size_const = 120 | |||||
threshold_jump_size = initial_threshold_jump_size_const | |||||
decay_const = 0.85 | |||||
decay_count = 0 | |||||
threshold_jump_increase = 1 | |||||
threshold_score = None | |||||
# update after initial run | |||||
laplacian_threshold = initial_thresh | |||||
threshold_history = [] | |||||
score_history = [] | |||||
for epoch in range((Config.n_epoch_for_image_patcher if learn_threshold_and_log_cf_matrix_per_patch else 1)): | |||||
print("New Epoch") | |||||
zarr_loaders_and_generators = get_zarr_loaders_and_generators() | |||||
whole_background_dict_per_slide = [{} for i in range(len(zarr_loaders_and_generators))] | |||||
whole_background_dict = {} | |||||
while sum([item is not None for item in zarr_loaders_and_generators]) >= 1: | |||||
none_empty_generators = [i for i in range(len(zarr_loaders_and_generators)) if | |||||
zarr_loaders_and_generators[i] is not None] | |||||
if learn_threshold_and_log_cf_matrix_per_patch: | |||||
whole_background_dict = {} | |||||
if len(none_empty_generators) >= 6 or not learn_threshold_and_log_cf_matrix_per_patch: | |||||
for slide_pick in none_empty_generators: | |||||
img_path = image_lists[slide_pick][1] | |||||
zarr_loader_mask = zarr_loaders_and_generators[slide_pick][0] | |||||
zarr_loader = zarr_loaders_and_generators[slide_pick][1] | |||||
frag_generator = zarr_loaders_and_generators[slide_pick][2] | |||||
generated_scaled_mask_image = zarr_loaders_and_generators[slide_pick][3] | |||||
generated_mask_scale = zarr_loaders_and_generators[slide_pick][4] | |||||
group_dict = calculate_acc_and_sensitivity(img_path, | |||||
zarr_loader_mask, | |||||
zarr_loader, | |||||
frag_generator, | |||||
generated_scaled_mask_image, | |||||
generated_mask_scale, | |||||
laplacian_threshold, | |||||
slide_patch_size=2000, | |||||
save_generated_image=not learn_threshold_and_log_cf_matrix_per_patch) | |||||
for i in range(len(zarr_loaders_and_generators)): | |||||
if zarr_loaders_and_generators[i]: | |||||
generator = check_if_generator_is_empty(zarr_loaders_and_generators[i][2]) | |||||
if generator: | |||||
zarr_loaders_and_generators[i][2] = generator | |||||
else: | |||||
zarr_loaders_and_generators[i] = None | |||||
for key, value in group_dict.items(): | |||||
whole_background_dict[key] = whole_background_dict.get(key, 0) + value | |||||
whole_background_dict_per_slide[slide_pick][key] = whole_background_dict_per_slide[ | |||||
slide_pick].get(key, 0) + value | |||||
if learn_threshold_and_log_cf_matrix_per_patch: | |||||
e = .000001 | |||||
total_preds = (sum(list(whole_background_dict.values())) + e) | |||||
acc = (whole_background_dict["TP"] + whole_background_dict["TN"]) / total_preds | |||||
positive_preds = (whole_background_dict["TP"] + whole_background_dict["FP"] + e) | |||||
precision = whole_background_dict["TP"] / positive_preds | |||||
next_score = score_calculator(acc, precision) | |||||
if threshold_score is None: | |||||
threshold_score = next_score | |||||
else: | |||||
threshold_history.append(laplacian_threshold) | |||||
score_history.append(next_score) | |||||
if next_score > threshold_score: | |||||
threshold_score = next_score | |||||
laplacian_threshold += threshold_jump_increase * threshold_jump_size | |||||
elif next_score <= threshold_score: | |||||
threshold_score = next_score | |||||
threshold_jump_increase *= -1 | |||||
threshold_jump_size *= decay_const | |||||
laplacian_threshold += threshold_jump_increase * threshold_jump_size | |||||
decay_count += 1 | |||||
save_threshold_and_score_chart(threshold_history, score_history) | |||||
acc = round(acc, 3) | |||||
precision = round(precision, 3) | |||||
threshold_score_rounded = round(threshold_score, 3) | |||||
print( | |||||
f"acc:{acc},precision:{precision},score:{threshold_score_rounded},table:{whole_background_dict}" + | |||||
f"thresh:{laplacian_threshold},jump_size:{threshold_jump_size}") | |||||
else: | |||||
print(f"table:{whole_background_dict},table_per_slide:{whole_background_dict_per_slide}" + | |||||
f"threshold:{laplacian_threshold},jump_size:{threshold_jump_size}") | |||||
else: | |||||
break | |||||
return laplacian_threshold | |||||
def save_threshold_and_score_chart(threshold_history, score_history): | |||||
fig_save_path = "laplacian_threshold_history_chart.jpeg" | |||||
plt.plot(range(len(threshold_history)), threshold_history) | |||||
plt.xlabel('Batch') | |||||
plt.ylabel('Laplacian threshold') | |||||
plt.savefig(fig_save_path) | |||||
plt.clf() | |||||
fig_save_path = "laplacian_threshold_score_history_chart.jpeg" | |||||
plt.plot(range(len(score_history)), score_history) | |||||
plt.xlabel('Batch') | |||||
plt.ylabel('Objective function - Sore') | |||||
plt.savefig(fig_save_path) | |||||
plt.clf() | |||||
if __name__ == '__main__': | |||||
image_lists = [ | |||||
( # "('0', '100', '0')" | |||||
"./TCGA-BJ-A3F0-01A-01-TSA.728CE583-95BE-462B-AFDF-FC0B228DF3DE__3_masked.tiff", | |||||
"./TCGA-BJ-A3F0-01A-01-TSA.728CE583-95BE-462B-AFDF-FC0B228DF3DE__3.svs" | |||||
), | |||||
( # "('0', '100', '0')" | |||||
"./TCGA-DJ-A1QG-01A-01-TSA.04c62c21-dd45-49ea-a74f-53822defe097__2000_masked.tiff", | |||||
"./TCGA-DJ-A1QG-01A-01-TSA.04c62c21-dd45-49ea-a74f-53822defe097__2000.svs" | |||||
), | |||||
# ( # "('0', '100', '0')" | |||||
# "./TCGA-EL-A3ZQ-01A-01-TS1.344610D2-AB50-41C6-916E-FF0F08940BF1__2000_masked.tiff", | |||||
# "./TCGA-EL-A3ZQ-01A-01-TS1.344610D2-AB50-41C6-916E-FF0F08940BF1__2000.svs" | |||||
# ), | |||||
( # "('45', '55', '0')" | |||||
"./TCGA-ET-A39N-01A-01-TSA.C38FCE19-9558-4035-9F0B-AD05B9BE321D___198_masked.tiff", | |||||
"./TCGA-ET-A39N-01A-01-TSA.C38FCE19-9558-4035-9F0B-AD05B9BE321D___198.svs" | |||||
), | |||||
# ( # "('0', '40', '60')" | |||||
# "./TCGA-J8-A42S-01A-01-TSA.7B80CBEB-7B85-417E-AA0C-11C79DE40250__0_masked.tiff", | |||||
# "./TCGA-J8-A42S-01A-01-TSA.7B80CBEB-7B85-417E-AA0C-11C79DE40250__0.svs" | |||||
# ), | |||||
( # "('0', '90', '10')" | |||||
"./TCGA-ET-A39O-01A-01-TSA.3829C900-7597-4EA9-AFC7-AA238221CE69_7000_masked.tiff", | |||||
"./TCGA-ET-A39O-01A-01-TSA.3829C900-7597-4EA9-AFC7-AA238221CE69_7000.svs" | |||||
), | |||||
( # "('100', '0', '0')" | |||||
"./TCGA-EL-A4K7-11A-01-TS1.C08B59AA-87DF-4ABB-8B70-25FEF9893C7F__70_masked.tiff", | |||||
"./TCGA-EL-A4K7-11A-01-TS1.C08B59AA-87DF-4ABB-8B70-25FEF9893C7F__70.svs" | |||||
), | |||||
( # "('100', '0', '0')" | |||||
"./TCGA-EL-A3TB-11A-01-TS1.6E0966C9-1552-4B30-9008-8ACF737CA8C3__2000_masked.tiff", | |||||
"./TCGA-EL-A3TB-11A-01-TS1.6E0966C9-1552-4B30-9008-8ACF737CA8C3__2000.svs" | |||||
), | |||||
] | |||||
learned_threshold = update_and_find_best_threshold(500, learn_threshold_and_log_cf_matrix_per_patch=True) | |||||
update_and_find_best_threshold(learned_threshold, learn_threshold_and_log_cf_matrix_per_patch=False) | |||||
# Start with 500 with jump size 120 and decay 0.85 | |||||
# table:{'TP': 15018, 'FP': 412, 'TN': 66898, 'FN': 2389}, | |||||
# table_per_slide:[ | |||||
# {'TP': 460, 'FP': 0, 'TN': 19618, 'FN': 1426}, | |||||
# {'TP': 4624, 'FP': 126, 'TN': 14100, 'FN': 226}, | |||||
# {'TP': 1138, 'FP': 4, 'TN': 6671, 'FN': 492}, | |||||
# {'TP': 7615, 'FP': 92, 'TN': 20871, 'FN': 234}, | |||||
# {'TP': 78, 'FP': 18, 'TN': 1880, 'FN': 4}, | |||||
# {'TP': 1103, 'FP': 172, 'TN': 3758, 'FN': 7} | |||||
# ] | |||||
# threshold:298.86314585743395,jump_size:120 |
export PYTHONPATH="${PYTHONPATH}:../../../../"; | |||||
export PYTHONPATH="${PYTHONPATH}:../../../"; | |||||
export PYTHONPATH="${PYTHONPATH}:../../"; | |||||
export PYTHONPATH="${PYTHONPATH}:../"; | |||||
export PYTHONPATH="${PYTHONPATH}:./"; | |||||
python evaluate_image_patcher_and_visualize.py; |
export PYTHONPATH="${PYTHONPATH}:../../"; | |||||
export PYTHONPATH="${PYTHONPATH}:../"; | |||||
export PYTHONPATH="${PYTHONPATH}:./"; | |||||
python patcher_distribution.py; |
import pathlib | |||||
import matplotlib.pyplot as plt | |||||
from national_cancer_institute.read_xml_file import get_slide_info_from_bcr_xml | |||||
if __name__ == '__main__': | |||||
data_dir = "data/" | |||||
slide_infos = {} | |||||
for xml_path in pathlib.Path(data_dir).glob("**/*.xml"): | |||||
slide_infos.update(get_slide_info_from_bcr_xml(str(xml_path))) | |||||
cell_percents = [int(item[1]) for item in slide_infos.values() if int(item[2]) == 0] | |||||
print("tumor:", len([i for i in cell_percents if i == 100])) | |||||
print("normal", len([i for i in cell_percents if i == 0])) | |||||
print([i for i in cell_percents if i != 0 and i != 100]) | |||||
print(len(cell_percents)) | |||||
plt.hist(cell_percents, bins=150) | |||||
plt.savefig("tumor_cell_distribution.jpeg") |
import concurrent.futures | |||||
import os | |||||
import pathlib | |||||
import matplotlib.pyplot as plt | |||||
from tqdm import tqdm | |||||
from config import Config | |||||
from image_patcher import ImageAndSlidePatcher | |||||
def save_patch_distribution(database_path): | |||||
def patch_image(image_path): | |||||
try: | |||||
image_path = str(image_path) | |||||
file_name = ImageAndSlidePatcher._get_file_name_from_path(image_path) | |||||
slide_id = file_name.split(".")[0] | |||||
slide_patch_dir = os.path.join(patch_dir, slide_id) | |||||
if ImageAndSlidePatcher._get_extension_from_path(image_path) in [".tiff", ".tif", ".svs"]: | |||||
zarr_object = ImageAndSlidePatcher._zarr_loader(image_path) | |||||
total_counts = ImageAndSlidePatcher._get_number_of_initial_frags(zarr_object=zarr_object) | |||||
else: | |||||
jpeg_image = ImageAndSlidePatcher._jpeg_loader(image_path) | |||||
jpeg_image = ImageAndSlidePatcher.ask_image_scale_and_rescale(jpeg_image) | |||||
total_counts = ImageAndSlidePatcher._get_number_of_initial_frags(zarr_object=jpeg_image) | |||||
if os.path.exists(slide_patch_dir): | |||||
fragment_id = len([i for i in pathlib.Path(slide_patch_dir).glob("*.jpeg")]) | |||||
return fragment_id, total_counts | |||||
except Exception as e: | |||||
print("error") | |||||
print(e) | |||||
raise e | |||||
res_patch_counts = [] | |||||
data_dir = os.path.join(database_path, "data") | |||||
patch_dir = os.path.join(database_path, "patches") | |||||
with concurrent.futures.ThreadPoolExecutor(max_workers=Config.workers) as executor: | |||||
image_paths = pathlib.Path(data_dir).glob("**/*.svs") | |||||
image_paths = [i for i in image_paths] | |||||
print() | |||||
for res in tqdm(executor.map(patch_image, image_paths), total=len(image_paths)): | |||||
if res: | |||||
frags, total = res | |||||
res_patch_counts.append(res) | |||||
print(res_patch_counts) | |||||
plt.hist([i[0] for i in res_patch_counts], bins=100) | |||||
plt.xlabel("Patch per slide") | |||||
plt.ylabel("Frequency") | |||||
plt.savefig("patch_distribution.jpeg") | |||||
plt.clf() | |||||
plt.hist([round(i[0] / (i[1] + 0.00001), 5) * 100 for i in res_patch_counts], bins=100) | |||||
plt.xlabel("Patch per slide percent") | |||||
plt.ylabel("Frequency") | |||||
plt.savefig("patch_percent_distribution.jpeg") | |||||
plt.clf() | |||||
if __name__ == '__main__': | |||||
database_directory = "../" | |||||
save_patch_distribution(os.path.join(database_directory, "national_cancer_institute")) |
from xml.dom import minidom | |||||
def get_slide_info_from_bcr_xml(xml_path): | |||||
file = minidom.parse(xml_path) | |||||
patient = file.childNodes[0].getElementsByTagName("bio:patient")[0] | |||||
data_dict = {} | |||||
try: | |||||
for i in range(10): | |||||
percent_tumor_cells = patient.getElementsByTagName("bio:percent_tumor_cells")[i].childNodes[ | |||||
0].data.strip() | |||||
percent_normal_cells = patient.getElementsByTagName("bio:percent_normal_cells")[i].childNodes[ | |||||
0].data.strip() | |||||
percent_stormal_cells = patient.getElementsByTagName("bio:percent_stromal_cells")[i].childNodes[ | |||||
0].data.strip() | |||||
slide_barcode = patient.getElementsByTagName("shared:bcr_slide_barcode")[i].childNodes[0].data.strip() | |||||
data_dict[slide_barcode] = (percent_normal_cells, percent_tumor_cells, percent_stormal_cells) | |||||
except Exception as e: | |||||
pass | |||||
return data_dict | |||||
if __name__ == '__main__': | |||||
path = "../national_cancer_institute/data/1aea8f2a-f809-4f19-bed3-1365e9aab33b/nationwidechildrens.org_biospecimen.TCGA-BJ-A28X.xml" | |||||
res = get_slide_info_from_bcr_xml(path) | |||||
print(res) |
export PYTHONPATH="${PYTHONPATH}:../../"; | |||||
export PYTHONPATH="${PYTHONPATH}:../"; | |||||
export PYTHONPATH="${PYTHONPATH}:./"; | |||||
python cells_chart.py; |
export PYTHONPATH="${PYTHONPATH}:../../"; | |||||
export PYTHONPATH="${PYTHONPATH}:../"; | |||||
export PYTHONPATH="${PYTHONPATH}:../image_patcher"; | |||||
export PYTHONPATH="${PYTHONPATH}:./"; | |||||
python patch_distribution.py; |
1811210076455461803,BENIGN-BTNpap-hyperplZC04-10370x4_0 | |||||
7845783054062606488,BENIGN-Dan_Colloid1_0 | |||||
7637345021064072354,BENIGN-FTH468BTN_sheets_follicles_1_0 | |||||
814792220375115888,BENIGN-GD40T | |||||
2888555316355101926,BENIGN-gd-fn20_0 | |||||
-8312464544528256569,BENIGN-gd10p_0 | |||||
-2254510488499374008,BENIGN-gd20g_0 | |||||
2271447961045108683,MALIGNANT-ATC20L_0 | |||||
7269337178939971574,MALIGNANT-ATC20b_0 | |||||
-9073528708751422131,MALIGNANT-ATC40BB_0 | |||||
-8571937763202005072,MALIGNANT-ATC40FC_0 | |||||
-8610450256188951874,MALIGNANT-ATC40J_0 | |||||
-7854679564093375561,MALIGNANT-ATC40p_0 | |||||
-4977783033606377395,MALIGNANT-ATC5A_0 | |||||
1342853015262631578,MALIGNANT-Atc63w_0 | |||||
431837691807971266,MALIGNANT-C03-54313A_0 | |||||
8004646368797684873,MALIGNANT-C03-54313B_0 | |||||
8041832687277297518,MALIGNANT-C03-54313C_0 | |||||
6449521961463025237,MALIGNANT-C03-54313D_0 | |||||
-8688277356782858138,MALIGNANT-C03-54313E_0 | |||||
-9068734717515564721,MALIGNANT-C03-54313F_0 | |||||
-5752120463987418399,MALIGNANT-C03-54313G_0 | |||||
8889530878367993817,MALIGNANT-Ed82C_1_0 | |||||
2934951614446666978,"MALIGNANT-Follicular_neoplasm2,_low_power,_confirmed_FVPTC_DQ_SM_0" | |||||
-1541914789037593200,"MALIGNANT-Pap_CA,_excellent_inclusion,_DQ_0" | |||||
-1717557305576530323,"MALIGNANT-Pap_CA,_powdery_nuclei,_small_nucleoli_and_small_inclusion,_PAP_hp_0" | |||||
947862089311894766,"MALIGNANT-Papillary_CA,_giant_cell,_DQ_lp_0" | |||||
-8520432463383972777,"MALIGNANT-Papillary_CA,_giant_cell_and_tumor_sheets,_PAP_lp2_0" | |||||
-4759582710547943524,"MALIGNANT-Papillary_CA,_high_cellularity_3,_PAP_lp_0" |
import os | |||||
import shutil | |||||
if __name__ == '__main__': | |||||
duplicate_info_file_path = "duplicate_image.txt" | |||||
with open(duplicate_info_file_path, "r") as file: | |||||
for line in file.readlines(): | |||||
folder_id = line.split(",")[0] | |||||
folder_path = os.path.join("./patches", folder_id) | |||||
if os.path.exists(folder_path): | |||||
shutil.rmtree(folder_path) | |||||
print("deleted") | |||||
else: | |||||
print("no") |
import json | |||||
from urllib.parse import urlparse | |||||
from urllib.request import urlretrieve | |||||
import requests | |||||
from bs4 import BeautifulSoup | |||||
from database_crawlers.web_stain_sample import WebStainImage, StainType | |||||
class StanfordTissueMicroArrayStainSample(WebStainImage): | |||||
def __init__(self, database_name, image_id, image_web_label, report, stain_type, is_wsi): | |||||
super().__init__(database_name, image_id, image_web_label, report, stain_type, is_wsi) | |||||
def get_slide_view_url(self): | |||||
return f"https://storage.googleapis.com/jpg.tma.im/{self.image_id}" | |||||
def get_file_name(self): | |||||
image_raw_id = self.image_id.replace("/", "_") | |||||
image_raw_id = ".".join(image_raw_id.split(".")[:len(image_raw_id.split(".")) - 1]) | |||||
return self.save_path + image_raw_id | |||||
def get_relative_image_path(self): | |||||
return self.get_file_name() + ".jpeg" | |||||
def get_relative_json_path(self): | |||||
return self.get_file_name() + ".json" | |||||
def crawl_image_save_jpeg(self): | |||||
urlretrieve(self.get_slide_view_url(), self.get_relative_image_path()) | |||||
json_object = json.dumps(self.to_json()) | |||||
with open(self.get_relative_json_path(), "w") as outfile: | |||||
outfile.write(json_object) | |||||
class StanfordTissueMicroArraySlideProvider: | |||||
page_link = "https://tma.im/cgi-bin/selectImages.pl?organ=thyroid" | |||||
database_name = "StanfordTissueMicroArray" | |||||
stain_type = StainType.UNKNOWN | |||||
is_wsi = False | |||||
@classmethod | |||||
def get_web_stain_samples(cls): | |||||
payload = {'250 small images': '250 small images'} | |||||
files = [] | |||||
headers = { | |||||
'Cookie': 'DAD_ATTEMPTS=0; DAD_SID=36d77eb69e009b1cf1ebc9c3d7866546; DAD_USERID=WORLD' | |||||
} | |||||
html_text = requests.post(cls.page_link, files=files, headers=headers, data=payload).content.decode("utf-8") | |||||
soup = BeautifulSoup(html_text, 'html.parser') | |||||
search_results = soup.find_all("div", {"class": "iDiv0", "style": "width: 86px; height: 260px;"}) | |||||
for result_item in search_results: | |||||
image_url = result_item.find("a", {"target": "_blank"}).attrs['href'] | |||||
image_id = "/".join(urlparse(image_url).path.strip("/").split("/")[1:]) | |||||
image_web_label = list(result_item.find_all("p", {"class": "iDiv1"}))[-2].text | |||||
yield StanfordTissueMicroArrayStainSample(cls.database_name, image_id, image_web_label, None, | |||||
cls.stain_type, cls.is_wsi) | |||||
if __name__ == '__main__': | |||||
for slide in StanfordTissueMicroArraySlideProvider.get_web_stain_samples(): | |||||
print(slide.image_id, slide.image_web_label, slide.get_slide_view_url()) | |||||
slide.crawl_image_save_jpeg() |
import concurrent.futures | |||||
import concurrent.futures | |||||
import time | |||||
from urllib.error import HTTPError | |||||
from urllib.request import urlretrieve | |||||
from torch.utils.data import IterableDataset | |||||
from tqdm import tqdm | |||||
from config import Config | |||||
def find_in_log_n(start, end, func, bias=0.3): | |||||
if end - start <= 1: | |||||
return start | |||||
mid = int(start * (1 - bias) + end * bias) | |||||
if start == mid: | |||||
mid += 1 | |||||
if func(mid): | |||||
return find_in_log_n(mid, end, func) | |||||
else: | |||||
return find_in_log_n(start, mid, func) | |||||
def fetch_tile_content(tile_url, retry=15): | |||||
for i in range(retry): | |||||
try: | |||||
image_path = urlretrieve(tile_url)[0] | |||||
with open(image_path, "rb") as file: | |||||
return file.read() | |||||
except Exception as e: | |||||
print("e", end="|") | |||||
time.sleep(2 ** (0.3 * (i + 1))) | |||||
if i == retry - 1: | |||||
if input("continue") == "y": | |||||
return fetch_tile_content(tile_url, retry) | |||||
raise e | |||||
raise HTTPError("Not able for fetch image tile", code=500, msg="", hdrs={}, fp=None) | |||||
def download_urls_in_thread(url_and_index_list): | |||||
def download(args): | |||||
url, index = args | |||||
file_content = fetch_tile_content(url) | |||||
return file_content, index | |||||
with concurrent.futures.ThreadPoolExecutor(max_workers=Config.workers) as executor: | |||||
for tile, i in tqdm(executor.map(download, url_and_index_list), total=len(url_and_index_list)): | |||||
yield tile, i | |||||
def _get_alignment_sore_and_percent(seq1, seq2, match_score=2, mismatch_score=-1, gap_score=-1): | |||||
from alignment.sequence import Sequence | |||||
from alignment.sequencealigner import SimpleScoring, GlobalSequenceAligner | |||||
from alignment.vocabulary import Vocabulary | |||||
a = Sequence(seq1) | |||||
b = Sequence(seq2) | |||||
v = Vocabulary() | |||||
aEncoded = v.encodeSequence(a) | |||||
bEncoded = v.encodeSequence(b) | |||||
scoring = SimpleScoring(match_score, mismatch_score) | |||||
aligner = GlobalSequenceAligner(scoring, gap_score) | |||||
score = aligner.align(aEncoded, bEncoded, backtrace=False) | |||||
return score | |||||
def get_normalized_score(seq1, seq2): | |||||
score = _get_alignment_sore_and_percent(seq1, seq2) | |||||
return score / (len(seq2) + len(seq1)) | |||||
class DatasetWithGenerator(IterableDataset): | |||||
def __init__(self, generator): | |||||
self.generator = generator | |||||
def __iter__(self): | |||||
return self.generator | |||||
if __name__ == '__main__': | |||||
import math | |||||
print(math.log2(1000 * 1000)) | |||||
print(find_in_log_n(0, 100, lambda x: x <= 76)) |
import enum | |||||
import json | |||||
import time | |||||
from io import BytesIO | |||||
from urllib.request import Request, urlopen | |||||
import cv2 | |||||
import numpy as np | |||||
from PIL import Image | |||||
from tifffile import TiffWriter | |||||
from database_crawlers.utils import find_in_log_n, fetch_tile_content, download_urls_in_thread | |||||
class StainType(enum.Enum): | |||||
H_AND_E = 0, "H&E" | |||||
UNKNOWN = 1, "UNKNOWN" | |||||
class ThyroidCancerLevel(enum.Enum): | |||||
UNKNOWN = -1, "UNKNOWN" | |||||
MALIGNANT = 0, "MALIGNANT" | |||||
BENIGN = 1, "BENIGN" | |||||
@staticmethod | |||||
def get_thyroid_level_from_diagnosis_label(label: str): | |||||
label = label.lower() | |||||
if "malignant" in label: | |||||
return ThyroidCancerLevel.MALIGNANT | |||||
elif "benign" in label: | |||||
return ThyroidCancerLevel.BENIGN | |||||
else: | |||||
return ThyroidCancerLevel.UNKNOWN | |||||
class ThyroidType(enum.Enum): | |||||
UNKNOWN = -1, "UNKNOWN" | |||||
NORMAL = 0, "NORMAL" | |||||
PAPILLARY_CARCINOMA = 1, "PAPILLARY_CARCINOMA" | |||||
GRAVES_DISEASE = 2, "GRAVES_DISEASE" | |||||
NODULAR_GOITER = 3, "NODULAR_GOITER" | |||||
HASHIMOTO_THYROIDITIS = 4, "HASHIMOTO_THYROIDITIS" | |||||
FOLLICULAR_CARCINOMA = 5, "FOLLICULAR_CARCINOMA" | |||||
FOLLICULAR_ADENOMA = 6, "FOLLICULAR_ADENOMA" | |||||
COLLOID_GOITER = 7, "COLLOID_GOITER" | |||||
@staticmethod | |||||
def get_thyroid_type_from_diagnosis_label(label: str): | |||||
label = label.lower() | |||||
if "normal" in label: | |||||
return ThyroidType.NORMAL | |||||
elif "papillary" in label: | |||||
return ThyroidType.PAPILLARY_CARCINOMA | |||||
elif "grave" in label: | |||||
return ThyroidType.GRAVES_DISEASE | |||||
elif "nodular" in label and "goiter" in label: | |||||
return ThyroidType.NODULAR_GOITER | |||||
elif "hashimoto" in label: | |||||
return ThyroidType.HASHIMOTO_THYROIDITIS | |||||
elif "follicular" in label: | |||||
if "adenoma" in label: | |||||
return ThyroidType.FOLLICULAR_ADENOMA | |||||
else: | |||||
return ThyroidType.FOLLICULAR_CARCINOMA | |||||
elif "colloid" in label and "goiter" in label: | |||||
return ThyroidType.COLLOID_GOITER | |||||
else: | |||||
return ThyroidType.UNKNOWN | |||||
class WebStainImage: | |||||
save_path = "data/" | |||||
def __init__(self, database_name, image_id, image_web_label, report, stain_type, is_wsi): | |||||
self.database_name = database_name | |||||
self.image_id = image_id | |||||
self.image_web_label = image_web_label | |||||
self.report = report | |||||
self.stain_type = stain_type | |||||
self.is_wsi = is_wsi | |||||
def to_json(self): | |||||
return {"database_name": self.database_name, | |||||
"image_id": self.image_id, | |||||
"image_web_label": self.image_web_label, | |||||
"image_class_label": self.image_class_label, | |||||
"report": self.report, | |||||
"stain_type": self.stain_type.value[1], | |||||
"is_wsi": self.is_wsi} | |||||
@staticmethod | |||||
def sorted_json_keys(): | |||||
return ["database_name", | |||||
"image_id", | |||||
"image_web_label", | |||||
"image_class_label", | |||||
"report", | |||||
"stain_type", | |||||
"is_wsi"] | |||||
@property | |||||
def image_class_label(self): | |||||
return ThyroidType.get_thyroid_type_from_diagnosis_label(self.image_web_label).value[1] | |||||
def get_slide_view_url(self): | |||||
raise NotImplemented("get_slide_view_url") | |||||
def crawl_image_save_jpeg_and_json(self): | |||||
raise NotImplemented("crawl_image_get_jpeg") | |||||
def _get_file_path_name(self): | |||||
return self.save_path + self.image_id | |||||
def _get_relative_image_path(self): | |||||
return self._get_file_path_name() + ".jpeg" | |||||
def _get_relative_tiff_image_path(self): | |||||
return self._get_file_path_name() + ".tiff" | |||||
def _get_relative_json_path(self): | |||||
return self._get_file_path_name() + ".json" | |||||
def _save_json_file(self): | |||||
json_object = json.dumps(self.to_json()) | |||||
with open(self._get_relative_json_path(), "w") as outfile: | |||||
outfile.write(json_object) | |||||
class WebStainWSI(WebStainImage): | |||||
def __init__(self, database_name, image_id, image_web_label, report, stain_type, is_wsi): | |||||
super().__init__(database_name, image_id, image_web_label, report, stain_type, is_wsi) | |||||
def _get_tile_url(self, zoom, partition=None, i=None, j=None): | |||||
raise NotImplemented("_get_tile_url") | |||||
def _generate_tile_urls(self): | |||||
raise NotImplemented("generate tile urls") | |||||
def find_best_zoom(self): | |||||
return 0 | |||||
def _find_first_tile_width(self): | |||||
image_content = fetch_tile_content(self._get_tile_url(self.find_best_zoom(), partition=0, i=0, j=0)) | |||||
img = Image.open(BytesIO(image_content)) | |||||
return img.size[0], img.size[1] | |||||
def _fetch_all_tiles(self): | |||||
batch = [] | |||||
index = 0 | |||||
for url in self._generate_tile_urls(): | |||||
batch.append((url, index)) | |||||
# DONE | |||||
index += 1 | |||||
# download last batch | |||||
if len(batch) != 0: | |||||
for content, downloaded_index in download_urls_in_thread(batch): | |||||
yield content, downloaded_index | |||||
print("Slide download tiles done!!!") | |||||
def crawl_image_save_jpeg_and_json(self): | |||||
raise NotImplemented("crawl_image_save_jpeg_and_json") | |||||
class WebStainWSIOneDIndex(WebStainWSI): | |||||
def __init__(self, database_name, image_id, image_web_label, report, stain_type, is_wsi): | |||||
super().__init__(database_name, image_id, image_web_label, report, stain_type, is_wsi) | |||||
self.last_partition = None | |||||
def _find_last_partition(self): | |||||
print("Finding last partition: ", end="") | |||||
def func(partition, retry=3): | |||||
print(partition, end="") | |||||
for i in range(retry): | |||||
try: | |||||
request = Request(self._get_tile_url(self.find_best_zoom(), partition=partition), method='HEAD') | |||||
resp = urlopen(request) | |||||
headers = resp.info() | |||||
print("<", end=", ") | |||||
return True | |||||
except Exception as e: | |||||
print("e", end="") | |||||
time.sleep(2 ** (0.1 * (i + 1))) | |||||
print(">", end=", ") | |||||
return False | |||||
return find_in_log_n(0, 1000 * 1000, func) | |||||
def _generate_tile_urls(self): | |||||
for partition in range(self.last_partition + 1): | |||||
yield self._get_tile_url(self.find_best_zoom(), partition=partition) | |||||
def crawl_image_save_jpeg_and_json(self): | |||||
def generator(): | |||||
while True: | |||||
if first_temp_rows: | |||||
yield first_temp_rows[0] | |||||
del first_temp_rows[0] | |||||
else: | |||||
res = next(content_fetcher, -1) | |||||
if res == -1: | |||||
break | |||||
img = cv2.imdecode(np.frombuffer(res[0], np.uint8), -1) | |||||
if len(img.shape) == 2: | |||||
img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR) | |||||
yield img | |||||
first_image_width, first_image_height = self._find_first_tile_width() | |||||
first_temp_rows = [] | |||||
column_tiles, row_tiles = None, None | |||||
self.last_partition = self._find_last_partition() | |||||
content_fetcher = self._fetch_all_tiles() | |||||
with TiffWriter(self._get_relative_tiff_image_path(), bigtiff=True) as tif: | |||||
while column_tiles is None: | |||||
content, index = content_fetcher.__next__() | |||||
image_array = cv2.imdecode(np.frombuffer(content, np.uint8), cv2.IMREAD_COLOR) | |||||
first_temp_rows.append(image_array) | |||||
if image_array.shape[1] != first_image_width: | |||||
column_tiles = index + 1 | |||||
row_tiles = (self.last_partition + 1) // column_tiles | |||||
shape = (first_image_height * row_tiles, first_image_width * column_tiles, 3) | |||||
tif.write(generator(), subfiletype=1, tile=(first_image_height, first_image_width), shape=shape, | |||||
dtype=np.uint8, | |||||
compression='JPEG', # TODO | |||||
photometric='rgb') | |||||
""" | |||||
Save json file | |||||
""" | |||||
self._save_json_file() | |||||
class WebStainWSITwoDIndex(WebStainWSI): | |||||
def __init__(self, database_name, image_id, image_web_label, report, stain_type, is_wsi): | |||||
super().__init__(database_name, image_id, image_web_label, report, stain_type, is_wsi) | |||||
self.last_i = None | |||||
self.last_j = None | |||||
def _generate_tile_urls(self): | |||||
for j in range(self.last_j + 1): | |||||
for i in range(self.last_i + 1): | |||||
yield self._get_tile_url(self.find_best_zoom(), i=i, j=j) | |||||
def _find_last_i_and_j(self): | |||||
def func(i, j, retry=3): | |||||
print(f"{i}-{j}", end="") | |||||
for r in range(retry): | |||||
try: | |||||
request = Request(self._get_tile_url(self.find_best_zoom(), i=i, j=j), method='HEAD') | |||||
resp = urlopen(request) | |||||
headers = resp.info() | |||||
print("<", end=", ") | |||||
return True | |||||
except Exception as e: | |||||
print("e", end="") | |||||
time.sleep(2 ** (0.1 * (r + 1))) | |||||
print(">", end=", ") | |||||
return False | |||||
print("Finding last i: ", end="") | |||||
i_func = lambda i: func(i=i, j=0) | |||||
last_i = find_in_log_n(0, 1000, i_func) | |||||
print("\nFinding last j: ") | |||||
j_func = lambda j: func(i=0, j=j) | |||||
last_j = find_in_log_n(0, 1000, j_func) | |||||
return last_i, last_j | |||||
def crawl_image_save_jpeg_and_json(self): | |||||
def generator(): | |||||
while True: | |||||
res = next(content_fetcher, -1) | |||||
if res == -1: | |||||
break | |||||
res = cv2.imdecode(np.frombuffer(res[0], np.uint8), -1) | |||||
if max(res.shape) >= 260: | |||||
raise Exception(f"warning shape: {res.shape}") | |||||
res = cv2.resize(res, (min(res.shape[1], 256), min(res.shape[0], 256))) | |||||
yield res | |||||
first_image_width = 256 | |||||
first_image_height = 256 | |||||
self.last_i, self.last_j = self._find_last_i_and_j() | |||||
content_fetcher = self._fetch_all_tiles() | |||||
with TiffWriter(self._get_relative_tiff_image_path(), bigtiff=True) as tif: | |||||
shape = (first_image_height * (self.last_j + 1), first_image_width * (self.last_i + 1), 3) | |||||
tif.write(generator(), subfiletype=1, | |||||
tile=(first_image_height, first_image_width), | |||||
shape=shape, | |||||
dtype=np.uint8, | |||||
compression='JPEG', # TODO | |||||
photometric='rgb') | |||||
""" | |||||
Save json file | |||||
""" | |||||
self._save_json_file() |
from PIL import Image | |||||
import zarr | |||||
import tifffile | |||||
def convert_tif_to_jpeg(): | |||||
input_address = "data/test/1672.tiff" | |||||
# outfile = "data/test/out.jpeg" | |||||
outfile = "data/test/out.zarr" | |||||
image_zarr = tifffile.imread(input_address, aszarr=True, key=0) | |||||
zarr_image = zarr.open(image_zarr, mode='r') | |||||
zarr.save(outfile, zarr_image) | |||||
## RAM PROBLEM | |||||
# im = Image.open() | |||||
# out = im.convert("RGB") | |||||
# out.save(outfile, "JPEG", quality=90) | |||||
if __name__ == '__main__': | |||||
Image.MAX_IMAGE_PIXELS = 1000 * 1000 * 256 * 256 | |||||
convert_tif_to_jpeg() |
# import libtiff | |||||
# import pytiff | |||||
import cv2 | |||||
import tifffile | |||||
def show_tif_image(address, name, key=0, w_from=0, h_from=0, size=700, whole_image=False): | |||||
import zarr | |||||
image_zarr = tifffile.imread(address, aszarr=True, key=key) | |||||
zarr = zarr.open(image_zarr, mode='r') | |||||
if not whole_image: | |||||
image_frag = zarr[w_from:min(w_from + size, zarr.shape[0]), h_from:min(h_from + size, zarr.shape[1])] | |||||
else: | |||||
image_frag = zarr[0:zarr.shape[0], 0:zarr.shape[1]] | |||||
cv2.imshow(f"name:{name} - shape:{image_frag.shape} - page:{key}", image_frag) | |||||
print(f"name: {name}, shape: {zarr.shape}") | |||||
image_zarr.close() | |||||
def show_CAMELYON16_sample_view(): | |||||
# show_tif_image('data/CAMELYON16/tumor_084.tif', "CAMELYON16", key=7) | |||||
show_tif_image('data/CAMELYON16/tumor_084.tif', "CAMELYON16", key=0, w_from=10000, h_from=50000) | |||||
def show_CAMELYON17_sample_view(): | |||||
show_tif_image('data/CAMELYON17/patient_083_node_4.tif', "CAMELYON17", key=7) | |||||
def show_Papsociety_sample_view(): | |||||
image_frag = cv2.imread( | |||||
'data/Papsociety/Follicular_neoplasm2,_low_power,_confirmed_FVPTC_DQ_SM.jpg') | |||||
cv2.imshow(f"Papsociety - {image_frag.shape}", image_frag) | |||||
def show_test(name, ): | |||||
# show_tif_image('data/CAMELYON16/tumor_084.tif', "CAMELYON16", key=7) | |||||
show_tif_image('data/test/1272.tiff', name, key=0, w_from=1300, h_from=0, size=1000) | |||||
if __name__ == '__main__': | |||||
# show_CAMELYON16_sample_view() | |||||
# show_CAMELYON17_sample_view() | |||||
# show_Papsociety_sample_view() | |||||
show_tif_image('data/test/1272.tiff', "1", key=0, w_from=1000, h_from=100, size=1000) | |||||
show_tif_image('data/test/1272.tiff', "2", key=0, w_from=1000, h_from=1000, size=1000) | |||||
while True: | |||||
if cv2.waitKey(1) == ord('q'): | |||||
break |
absl-py==1.0.0 | |||||
aiohttp==3.8.1 | |||||
aiosignal==1.2.0 | |||||
alignment==1.0.10 | |||||
appdirs==1.4.4 | |||||
argon2-cffi==20.1.0 | |||||
asgiref==3.2.10 | |||||
astunparse==1.6.3 | |||||
async-generator==1.10 | |||||
async-timeout==4.0.2 | |||||
attrs==21.2.0 | |||||
backcall==0.2.0 | |||||
bleach==3.3.0 | |||||
blis==0.7.5 | |||||
cachetools==4.2.4 | |||||
catalogue==2.0.6 | |||||
certifi==2021.10.8 | |||||
cffi==1.14.5 | |||||
charset-normalizer==2.0.8 | |||||
click==8.0.3 | |||||
colorama==0.4.4 | |||||
convertapi==1.4.0 | |||||
cryptography==3.4.7 | |||||
cycler==0.11.0 | |||||
cymem==2.0.6 | |||||
Cython==0.29.23 | |||||
decorator==5.0.9 | |||||
defusedxml==0.7.1 | |||||
distlib==0.3.2 | |||||
dj-database-url==0.5.0 | |||||
Django==3.1.2 | |||||
django-crispy-forms==1.9.2 | |||||
django-heroku==0.3.1 | |||||
django-rest==0.8.7 | |||||
djangorestframework==3.13.1 | |||||
djangorestframework-simplejwt==5.0.0 | |||||
entrypoints==0.3 | |||||
et-xmlfile==1.1.0 | |||||
factory-boy==3.2.1 | |||||
Faker==12.3.0 | |||||
filelock==3.0.12 | |||||
flatbuffers==2.0 | |||||
fonttools==4.28.2 | |||||
frozenlist==1.3.0 | |||||
gast==0.4.0 | |||||
gensim==4.1.2 | |||||
google-auth==2.3.3 | |||||
google-auth-oauthlib==0.4.6 | |||||
google-pasta==0.2.0 | |||||
grpcio==1.42.0 | |||||
gunicorn==20.0.4 | |||||
h5py==3.6.0 | |||||
hazm==0.7.0 | |||||
huggingface-hub==0.6.0 | |||||
idna==3.3 | |||||
importlib-metadata==4.8.2 | |||||
ipykernel==5.5.5 | |||||
ipython==7.24.1 | |||||
ipython-genutils==0.2.0 | |||||
ipywidgets==7.6.3 | |||||
jedi==0.18.0 | |||||
Jinja2==3.0.1 | |||||
joblib==1.0.1 | |||||
jsonschema==3.2.0 | |||||
jupyter==1.0.0 | |||||
jupyter-client==6.1.12 | |||||
jupyter-console==6.4.0 | |||||
jupyter-core==4.7.1 | |||||
jupyterlab-pygments==0.1.2 | |||||
jupyterlab-widgets==1.0.0 | |||||
keras==2.7.0 | |||||
Keras-Preprocessing==1.1.2 | |||||
kiwisolver==1.3.2 | |||||
langcodes==3.3.0 | |||||
libclang==12.0.0 | |||||
libtiff==0.4.2 | |||||
Markdown==3.3.6 | |||||
MarkupSafe==2.0.1 | |||||
matplotlib==3.5.0 | |||||
matplotlib-inline==0.1.2 | |||||
mistune==0.8.4 | |||||
multidict==6.0.2 | |||||
murmurhash==1.0.6 | |||||
nbclient==0.5.3 | |||||
nbconvert==6.0.7 | |||||
nbformat==5.1.3 | |||||
nest-asyncio==1.5.1 | |||||
nltk==3.3 | |||||
notebook==6.4.0 | |||||
numpy==1.20.3 | |||||
oauthlib==3.1.1 | |||||
opencv-python==4.5.2.54 | |||||
openpyxl==3.0.7 | |||||
opt-einsum==3.3.0 | |||||
packaging==20.9 | |||||
pandas==1.2.4 | |||||
pandocfilters==1.4.3 | |||||
parso==0.8.2 | |||||
pathy==0.6.1 | |||||
pickleshare==0.7.5 | |||||
Pillow==8.4.0 | |||||
preshed==3.0.6 | |||||
prometheus-client==0.11.0 | |||||
prompt-toolkit==3.0.18 | |||||
protobuf==3.19.1 | |||||
psycopg2==2.8.6 | |||||
pyaes==1.6.1 | |||||
pyaml==21.10.1 | |||||
pyasn1==0.4.8 | |||||
pyasn1-modules==0.2.8 | |||||
pycparser==2.20 | |||||
pydantic==1.8.2 | |||||
Pygments==2.9.0 | |||||
PyJWT==2.3.0 | |||||
pynput==1.7.5 | |||||
pyOpenSSL==20.0.1 | |||||
pyparsing==2.4.7 | |||||
pyrsistent==0.17.3 | |||||
pyTelegramBotAPI==4.4.0 | |||||
python-dateutil==2.8.1 | |||||
python-decouple==3.6 | |||||
pytz==2020.1 | |||||
PyWavelets==1.1.1 | |||||
pywin32==301 | |||||
pywinpty==1.1.1 | |||||
PyYAML==6.0 | |||||
pyzmq==22.1.0 | |||||
qtconsole==5.1.0 | |||||
QtPy==1.9.0 | |||||
regex==2022.4.24 | |||||
requests==2.26.0 | |||||
requests-oauthlib==1.3.0 | |||||
rsa==4.8 | |||||
scikit-learn==0.24.2 | |||||
scipy==1.7.0 | |||||
Send2Trash==1.5.0 | |||||
setuptools-scm==6.3.2 | |||||
six==1.16.0 | |||||
sklearn==0.0 | |||||
smart-open==5.2.1 | |||||
spacy==3.2.1 | |||||
spacy-legacy==3.0.8 | |||||
spacy-loggers==1.0.1 | |||||
sqlparse==0.4.1 | |||||
srsly==2.4.2 | |||||
Telethon==1.24.0 | |||||
tensorboard==2.7.0 | |||||
tensorboard-data-server==0.6.1 | |||||
tensorboard-plugin-wit==1.8.0 | |||||
tensorflow==2.7.0 | |||||
tensorflow-estimator==2.7.0 | |||||
tensorflow-io-gcs-filesystem==0.22.0 | |||||
termcolor==1.1.0 | |||||
terminado==0.10.0 | |||||
testpath==0.5.0 | |||||
thinc==8.0.13 | |||||
threadpoolctl==2.2.0 | |||||
tokenizers==0.12.1 | |||||
tomli==1.2.2 | |||||
torch==1.10.1 | |||||
torchtext==0.11.1 | |||||
torchvision==0.11.1 | |||||
tornado==6.1 | |||||
tqdm==4.62.3 | |||||
traitlets==5.0.5 | |||||
transformers==4.19.2 | |||||
typer==0.4.0 | |||||
typing_extensions==4.0.1 | |||||
urllib3==1.26.7 | |||||
virtualenv==20.4.7 | |||||
wasabi==0.9.0 | |||||
wcwidth==0.2.5 | |||||
webencodings==0.5.1 | |||||
Werkzeug==2.0.2 | |||||
whitenoise==5.2.0 | |||||
widgetsnbextension==3.5.1 | |||||
wrapt==1.13.3 | |||||
xlrd==2.0.1 | |||||
yarl==1.7.2 | |||||
zipp==3.6.0 |
import itertools | |||||
import cv2 | |||||
def show_and_wait(img, name="img", wait=True, save=False): | |||||
cv2.imshow(name, img) | |||||
if wait: | |||||
while cv2.waitKey() != ord('q'): | |||||
continue | |||||
cv2.destroyAllWindows() | |||||
if save: | |||||
cv2.imwrite(f"{name}.jpeg", img) | |||||
def check_if_generator_is_empty(generator): | |||||
try: | |||||
first = next(generator) | |||||
except StopIteration: | |||||
return None | |||||
return itertools.chain([first], generator) |