@@ -0,0 +1,145 @@ | |||
# Custom | |||
*.svs | |||
*.xml | |||
*.csv | |||
*.xcf | |||
*.zip | |||
*.json | |||
*.state | |||
*.tiff | |||
*.tif | |||
.idea | |||
*.jpeg | |||
*.jpg | |||
**/data/ | |||
**/patches/ | |||
classification_stuff/Transfer-Learning-Library | |||
# Byte-compiled / optimized / DLL files | |||
__pycache__/ | |||
*.py[cod] | |||
*$py.class | |||
# C extensions | |||
*.so | |||
# Distribution / packaging | |||
.Python | |||
build/ | |||
develop-eggs/ | |||
dist/ | |||
downloads/ | |||
eggs/ | |||
.eggs/ | |||
lib/ | |||
lib64/ | |||
parts/ | |||
sdist/ | |||
var/ | |||
wheels/ | |||
pip-wheel-metadata/ | |||
share/python-wheels/ | |||
*.egg-info/ | |||
.installed.cfg | |||
*.egg | |||
MANIFEST | |||
# PyInstaller | |||
# Usually these files are written by a python script from a template | |||
# before PyInstaller builds the exe, so as to inject date/other infos into it. | |||
*.manifest | |||
*.spec | |||
# Installer logs | |||
pip-log.txt | |||
pip-delete-this-directory.txt | |||
# Unit test / coverage reports | |||
htmlcov/ | |||
.tox/ | |||
.nox/ | |||
.coverage | |||
.coverage.* | |||
.cache | |||
nosetests.xml | |||
coverage.xml | |||
*.cover | |||
*.py,cover | |||
.hypothesis/ | |||
.pytest_cache/ | |||
# Translations | |||
*.mo | |||
*.pot | |||
# Django stuff: | |||
*.log | |||
local_settings.py | |||
db.sqlite3 | |||
db.sqlite3-journal | |||
# Flask stuff: | |||
instance/ | |||
.webassets-cache | |||
# Scrapy stuff: | |||
.scrapy | |||
# Sphinx documentation | |||
docs/_build/ | |||
# PyBuilder | |||
target/ | |||
# Jupyter Notebook | |||
.ipynb_checkpoints | |||
# IPython | |||
profile_default/ | |||
ipython_config.py | |||
# pyenv | |||
.python-version | |||
# pipenv | |||
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. | |||
# However, in case of collaboration, if having platform-specific dependencies or dependencies | |||
# having no cross-platform support, pipenv may install dependencies that don't work, or not | |||
# install all needed dependencies. | |||
#Pipfile.lock | |||
# PEP 582; used by e.g. github.com/David-OConnor/pyflow | |||
__pypackages__/ | |||
# Celery stuff | |||
celerybeat-schedule | |||
celerybeat.pid | |||
# SageMath parsed files | |||
*.sage.py | |||
# Environments | |||
.env | |||
.venv | |||
env/ | |||
venv/ | |||
ENV/ | |||
env.bak/ | |||
venv.bak/ | |||
# Spyder project settings | |||
.spyderproject | |||
.spyproject | |||
# Rope project settings | |||
.ropeproject | |||
# mkdocs documentation | |||
/site | |||
# mypy | |||
.mypy_cache/ | |||
.dmypy.json | |||
dmypy.json | |||
# Pyre type checker | |||
.pyre/ |
@@ -0,0 +1,39 @@ | |||
-----BEGIN CERTIFICATE----- | |||
MIIG5jCCBc6gAwIBAgIQAze5KDR8YKauxa2xIX84YDANBgkqhkiG9w0BAQUFADBs | |||
MQswCQYDVQQGEwJVUzEVMBMGA1UEChMMRGlnaUNlcnQgSW5jMRkwFwYDVQQLExB3 | |||
d3cuZGlnaWNlcnQuY29tMSswKQYDVQQDEyJEaWdpQ2VydCBIaWdoIEFzc3VyYW5j | |||
ZSBFViBSb290IENBMB4XDTA3MTEwOTEyMDAwMFoXDTIxMTExMDAwMDAwMFowaTEL | |||
MAkGA1UEBhMCVVMxFTATBgNVBAoTDERpZ2lDZXJ0IEluYzEZMBcGA1UECxMQd3d3 | |||
LmRpZ2ljZXJ0LmNvbTEoMCYGA1UEAxMfRGlnaUNlcnQgSGlnaCBBc3N1cmFuY2Ug | |||
RVYgQ0EtMTCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEBAPOWYth1bhn/ | |||
PzR8SU8xfg0ETpmB4rOFVZEwscCvcLssqOcYqj9495BoUoYBiJfiOwZlkKq9ZXbC | |||
7L4QWzd4g2B1Rca9dKq2n6Q6AVAXxDlpufFP74LByvNK28yeUE9NQKM6kOeGZrzw | |||
PnYoTNF1gJ5qNRQ1A57bDIzCKK1Qss72kaPDpQpYSfZ1RGy6+c7pqzoC4E3zrOJ6 | |||
4GAiBTyC01Li85xH+DvYskuTVkq/cKs+6WjIHY9YHSpNXic9rQpZL1oRIEDZaARo | |||
LfTAhAsKG3jf7RpY3PtBWm1r8u0c7lwytlzs16YDMqbo3rcoJ1mIgP97rYlY1R4U | |||
pPKwcNSgPqcCAwEAAaOCA4UwggOBMA4GA1UdDwEB/wQEAwIBhjA7BgNVHSUENDAy | |||
BggrBgEFBQcDAQYIKwYBBQUHAwIGCCsGAQUFBwMDBggrBgEFBQcDBAYIKwYBBQUH | |||
AwgwggHEBgNVHSAEggG7MIIBtzCCAbMGCWCGSAGG/WwCATCCAaQwOgYIKwYBBQUH | |||
AgEWLmh0dHA6Ly93d3cuZGlnaWNlcnQuY29tL3NzbC1jcHMtcmVwb3NpdG9yeS5o | |||
dG0wggFkBggrBgEFBQcCAjCCAVYeggFSAEEAbgB5ACAAdQBzAGUAIABvAGYAIAB0 | |||
AGgAaQBzACAAQwBlAHIAdABpAGYAaQBjAGEAdABlACAAYwBvAG4AcwB0AGkAdAB1 | |||
AHQAZQBzACAAYQBjAGMAZQBwAHQAYQBuAGMAZQAgAG8AZgAgAHQAaABlACAARABp | |||
AGcAaQBDAGUAcgB0ACAARQBWACAAQwBQAFMAIABhAG4AZAAgAHQAaABlACAAUgBl | |||
AGwAeQBpAG4AZwAgAFAAYQByAHQAeQAgAEEAZwByAGUAZQBtAGUAbgB0ACAAdwBo | |||
AGkAYwBoACAAbABpAG0AaQB0ACAAbABpAGEAYgBpAGwAaQB0AHkAIABhAG4AZAAg | |||
AGEAcgBlACAAaQBuAGMAbwByAHAAbwByAGEAdABlAGQAIABoAGUAcgBlAGkAbgAg | |||
AGIAeQAgAHIAZQBmAGUAcgBlAG4AYwBlAC4wEgYDVR0TAQH/BAgwBgEB/wIBADCB | |||
gwYIKwYBBQUHAQEEdzB1MCQGCCsGAQUFBzABhhhodHRwOi8vb2NzcC5kaWdpY2Vy | |||
dC5jb20wTQYIKwYBBQUHMAKGQWh0dHA6Ly93d3cuZGlnaWNlcnQuY29tL0NBQ2Vy | |||
dHMvRGlnaUNlcnRIaWdoQXNzdXJhbmNlRVZSb290Q0EuY3J0MIGPBgNVHR8EgYcw | |||
gYQwQKA+oDyGOmh0dHA6Ly9jcmwzLmRpZ2ljZXJ0LmNvbS9EaWdpQ2VydEhpZ2hB | |||
c3N1cmFuY2VFVlJvb3RDQS5jcmwwQKA+oDyGOmh0dHA6Ly9jcmw0LmRpZ2ljZXJ0 | |||
LmNvbS9EaWdpQ2VydEhpZ2hBc3N1cmFuY2VFVlJvb3RDQS5jcmwwHQYDVR0OBBYE | |||
FExYyyXwQU9S9CjIgUObpqig5pLlMB8GA1UdIwQYMBaAFLE+w2kD+L9HAdSYJhoI | |||
Au9jZCvDMA0GCSqGSIb3DQEBBQUAA4IBAQBMeheHKF0XvLIyc7/NLvVYMR3wsXFU | |||
nNabZ5PbLwM+Fm8eA8lThKNWYB54lBuiqG+jpItSkdfdXJW777UWSemlQk808kf/ | |||
roF/E1S3IMRwFcuBCoHLdFfcnN8kpCkMGPAc5K4HM+zxST5Vz25PDVR708noFUjU | |||
xbvcNRx3RQdIRYW9135TuMAW2ZXNi419yWBP0aKb49Aw1rRzNubS+QOy46T15bg+ | |||
BEkAui6mSnKDcp33C4ypieez12Qf1uNgywPE3IjpnSUBAHHLA7QpYCWP+UbRe3Gu | |||
zVMSW4SOwg/H7ZMZ2cn6j1g0djIvruFQFGHUqFijyDATI+/GJYw2jxyA | |||
-----END CERTIFICATE----- |
@@ -0,0 +1,21 @@ | |||
MIT License | |||
Copyright (c) 2022 Amir Hossein | |||
Permission is hereby granted, free of charge, to any person obtaining a copy | |||
of this software and associated documentation files (the "Software"), to deal | |||
in the Software without restriction, including without limitation the rights | |||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||
copies of the Software, and to permit persons to whom the Software is | |||
furnished to do so, subject to the following conditions: | |||
The above copyright notice and this permission notice shall be included in all | |||
copies or substantial portions of the Software. | |||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
SOFTWARE. |
@@ -0,0 +1,3 @@ | |||
# Thyroid-Project-Using-DL | |||
Developing a neural network to classify thyroid carcinoma using H&E slides |
@@ -0,0 +1,19 @@ | |||
# from dalib.translation.fourier_transform import FourierTransform | |||
import importlib.util | |||
import sys | |||
from PIL import Image | |||
fourier_transform_address = "E:\\Documentwork\\sharif\\CE Project\\future\\Thyroid Project\\Thyroid-Project-Using-DL\\classification_stuff\\Transfer-Learning-Library\\dalib\\translation\\fourier_transform.py" | |||
spec = importlib.util.spec_from_file_location("module.name", fourier_transform_address) | |||
foo = importlib.util.module_from_spec(spec) | |||
sys.modules["module.name"] = foo | |||
spec.loader.exec_module(foo) | |||
FourierTransform = foo.FourierTransform | |||
image_list = ["bio_tile (1).jpeg", "bio_tile (2).jpeg", "bio_tile (3).jpeg", "bio_tile (4).jpeg", "bio_tile (4).jpeg"] | |||
amplitude_dir = "amplitude_dir" | |||
fourier_transform = FourierTransform(image_list, amplitude_dir, beta=0, rebuild=False) | |||
source_image = Image.open("tile2.jpeg") # image form source domain | |||
source_image_in_target_style = fourier_transform(source_image) | |||
source_image_in_target_style.save("out_fda.jpeg") |
@@ -0,0 +1,62 @@ | |||
import random | |||
import cv2 | |||
import torch | |||
import torch.nn as nn | |||
from albumentations.augmentations.utils import read_rgb_image | |||
from albumentations.core.transforms_interface import BasicTransform, to_tuple | |||
class Mixup(BasicTransform): | |||
def __init__(self, mixups, read_fn=read_rgb_image, beta_limit=0.3, **kwargs): | |||
super().__init__(**kwargs) | |||
self.mixups = mixups | |||
self.read_fn = read_fn | |||
self.beta_limit = to_tuple(beta_limit, low=0) | |||
def apply(self, image, mixup_image=None, beta=0.1, **params): | |||
img_type = image.dtype | |||
image = ((1 - beta) * image + beta * mixup_image).astype(img_type) | |||
return image | |||
def apply_to_target(self, target, beta=0.1, mixup_target=-1, **params): | |||
target = {"img": target, "mixup": mixup_target, "beta": beta} | |||
return target | |||
def get_params_dependent_on_targets(self, params): | |||
img = params["image"] | |||
mixup = random.choice(self.mixups) | |||
mixup_image = self.read_fn(mixup[0]) | |||
vertical_pad = max(0, (img.shape[0] - mixup_image.shape[0]) // 2) | |||
horizontal_pad = max(0, (img.shape[1] - mixup_image.shape[1]) // 2) | |||
try: | |||
mixup_image = cv2.copyMakeBorder(mixup_image, vertical_pad, vertical_pad, horizontal_pad, horizontal_pad, | |||
cv2.BORDER_REFLECT) | |||
except Exception as e: | |||
print(e) | |||
mixup_image = cv2.resize(mixup_image, dsize=(img.shape[1], img.shape[0])) | |||
return {"mixup_image": mixup_image, "mixup_target": mixup[1]} | |||
def get_params(self): | |||
return {"beta": random.uniform(self.beta_limit[0], self.beta_limit[1])} | |||
@property | |||
def targets(self): | |||
return { | |||
"image": self.apply, | |||
"target": self.apply_to_target, | |||
} | |||
@property | |||
def targets_as_params(self): | |||
return ["image"] | |||
def mixup_loss(output, target): | |||
if type(target) == torch.Tensor: | |||
loss = nn.CrossEntropyLoss() | |||
return loss(output, target) | |||
else: # mixup has been used | |||
loss = nn.CrossEntropyLoss(reduction="none") | |||
return ((1 - target["beta"]) * loss(output, target["img"]) + target["beta"] * loss(output, | |||
target["mixup"])).mean() |
@@ -0,0 +1,162 @@ | |||
import csv | |||
import glob | |||
import os | |||
import random | |||
from tqdm import tqdm | |||
from config import Config | |||
class CustomFragmentLoader: | |||
def __init__(self, datasets_folder_name): | |||
self._datasets_folder_name = datasets_folder_name | |||
self._database_slide_dict = {} | |||
self._load_csv_files_to_dict() | |||
def _load_csv_files_to_dict(self): | |||
databases_directory = "../../../database_crawlers/" | |||
list_dir = [os.path.join(databases_directory, o, "patches") for o in self._datasets_folder_name | |||
if os.path.isdir(os.path.join(databases_directory, o, "patches"))] | |||
for db_dir in list_dir: | |||
csv_dir = os.path.join(db_dir, "patch_labels.csv") | |||
with open(csv_dir, "r") as csv_file: | |||
csv_reader = csv.reader(csv_file) | |||
header = next(csv_reader, None) | |||
for row in csv_reader: | |||
if row: | |||
database_id = row[0] | |||
image_id = row[1] | |||
slide_frag_folder_name = [o for o in os.listdir(db_dir) if image_id.startswith(o)] | |||
if slide_frag_folder_name: | |||
slide_frag_folder_name = slide_frag_folder_name[0] | |||
else: | |||
continue | |||
slide_path = os.path.join(db_dir, slide_frag_folder_name) | |||
image_paths = glob.glob(os.path.join(slide_path, "*.jpeg")) | |||
if image_paths: | |||
d = self._database_slide_dict.get(database_id, {}) | |||
d[image_id] = [image_paths] + [row[3], row[2]] | |||
self._database_slide_dict[database_id] = d | |||
def load_image_path_and_labels_and_split(self, test_percent=20, val_percent=10): | |||
train_images, val_images, test_images = [], [], [] | |||
for database_name, slides_dict in self._database_slide_dict.items(): | |||
image_paths_by_slide = [(len(v[0]), v[0], v[1], v[2]) for v in slides_dict.values()] | |||
random.shuffle(image_paths_by_slide) | |||
# image_paths_by_slide.sort() | |||
class_slides_dict = {} | |||
for item in image_paths_by_slide: | |||
class_name = None | |||
if database_name == "NationalCancerInstitute": | |||
normal_percent = int(item[2].strip(r"(|)|\'").split("\', \'")[0]) | |||
tumor_percent = int(item[2].strip(r"(|)|\'").split("\', \'")[1]) | |||
stormal_percent = int(item[2].strip(r"(|)|\'").split("\', \'")[2]) | |||
if stormal_percent == 0: | |||
if tumor_percent == 100: | |||
class_name = "MALIGNANT" | |||
elif normal_percent == 100: | |||
class_name = "BENIGN" | |||
else: | |||
class_name = str(tumor_percent) | |||
elif database_name == "BioAtlasThyroidSlideProvider": | |||
if "papillary" in item[3].lower(): | |||
class_name = "MALIGNANT" | |||
elif "normal" in item[3].lower(): | |||
class_name = "BENIGN" | |||
class_name = class_name if class_name else item[2] | |||
if class_name in Config.class_names: | |||
class_slides_dict[class_name] = class_slides_dict.get(class_name, []) + [ | |||
(item[0], item[1], class_name)] | |||
# split test val train because they must not share same slide id fragment | |||
for thyroid_class, slide_frags in class_slides_dict.items(): | |||
dataset_train_images, dataset_val_images, dataset_test_images = [], [], [] | |||
total_counts = sum([item[0] for item in slide_frags]) | |||
test_counts = total_counts * test_percent // 100 | |||
val_counts = total_counts * val_percent // 100 | |||
train_counts = total_counts - test_counts - val_counts | |||
for i, slide_frags_item in enumerate(slide_frags): | |||
if len(dataset_train_images) + slide_frags_item[0] <= train_counts: | |||
dataset_train_images += slide_frags_item[1] | |||
elif len(dataset_val_images) + slide_frags_item[0] <= val_counts: | |||
dataset_val_images += slide_frags_item[1] | |||
else: | |||
dataset_test_images += slide_frags_item[1] | |||
train_images += [(i, thyroid_class) for i in dataset_train_images] | |||
val_images += [(i, thyroid_class) for i in dataset_val_images] | |||
test_images += [(i, thyroid_class) for i in dataset_test_images] | |||
return train_images, val_images, test_images | |||
def national_cancer_image_and_labels_splitter_per_slide(self, test_percent=20, val_percent=10): | |||
train_images, val_images, test_images = [], [], [] | |||
for database_name, slides_dict in self._database_slide_dict.items(): | |||
print(database_name) | |||
image_paths_by_slide = [(len(v[0]), v[0], v[1], v[2], k) for k, v in slides_dict.items()] | |||
random.shuffle(image_paths_by_slide) | |||
# image_paths_by_slide.sort() | |||
class_slides_dict = {} | |||
for item in tqdm(image_paths_by_slide): | |||
class_name = None | |||
normal_percent = int(item[2].strip(r"(|)|\'").split("\', \'")[0]) | |||
tumor_percent = int(item[2].strip(r"(|)|\'").split("\', \'")[1]) | |||
stormal_percent = int(item[2].strip(r"(|)|\'").split("\', \'")[2]) | |||
if stormal_percent == 0: | |||
if tumor_percent == 100: | |||
class_name = 100 | |||
elif normal_percent == 100: | |||
class_name = 0 | |||
else: | |||
class_name = tumor_percent | |||
class_name = class_name if class_name is not None else item[2] | |||
if class_name in Config.class_names: | |||
class_slides_dict[class_name] = class_slides_dict.get(class_name, []) + [ | |||
(item[0], item[1], class_name, item[4])] | |||
# split test val train because they must not share same slide id fragment | |||
for thyroid_class, slide_frags in class_slides_dict.items(): | |||
dataset_train_images, dataset_val_images, dataset_test_images = [], [], [] | |||
total_counts = sum([item[0] for item in slide_frags]) | |||
test_counts = total_counts * test_percent // 100 | |||
val_counts = total_counts * val_percent // 100 | |||
train_counts = total_counts - test_counts - val_counts | |||
for i, slide_frags_item in enumerate(slide_frags): | |||
items_paths = [(item_path, slide_frags_item[3]) for item_path in slide_frags_item[1]] | |||
if len(dataset_train_images) + slide_frags_item[0] <= train_counts: | |||
dataset_train_images += items_paths | |||
elif len(dataset_val_images) + slide_frags_item[0] <= val_counts: | |||
dataset_val_images += items_paths | |||
else: | |||
dataset_test_images += items_paths | |||
train_images += [(i, (thyroid_class, j)) for i, j in dataset_train_images] | |||
val_images += [(i, (thyroid_class, j)) for i, j in dataset_val_images] | |||
test_images += [(i, (thyroid_class, j)) for i, j in dataset_test_images] | |||
return train_images, val_images, test_images | |||
if __name__ == '__main__': | |||
# datasets_folder = ["national_cancer_institute"] | |||
datasets_folder = ["papsociaty"] | |||
# datasets_folder = ["stanford_tissue_microarray"] | |||
# datasets_folder = ["bio_atlas_at_jake_gittlen_laboratories"] | |||
train, val, test = CustomFragmentLoader(datasets_folder).load_image_path_and_labels_and_split( | |||
val_percent=Config.val_percent, | |||
test_percent=Config.test_percent) | |||
benign_train = [i for i in train if i[1] == "BENIGN"] | |||
mal_train = [i for i in train if i[1] == "MALIGNANT"] | |||
print(f"train: {len(train)}={len(benign_train)}+{len(mal_train)}") | |||
benign_val = [i for i in val if i[1] == "BENIGN"] | |||
mal_val = [i for i in val if i[1] == "MALIGNANT"] | |||
print(f"val: {len(val)}={len(benign_val)}+{len(mal_val)}") | |||
benign_test = [i for i in test if i[1] == "BENIGN"] | |||
mal_test = [i for i in test if i[1] == "MALIGNANT"] | |||
print(f"test: {len(test)}={len(benign_test)}+{len(mal_test)}") | |||
print(set(train) & set(test)) | |||
print(set(train) & set(val)) | |||
print(set(test) & set(val)) | |||
print(len(set(val) & set(val))) |
@@ -0,0 +1,20 @@ | |||
import os | |||
def set_config_for_logger(config_label): | |||
import logging | |||
trains_state_dir = "./train_state" | |||
if not os.path.isdir(trains_state_dir): | |||
os.mkdir(trains_state_dir) | |||
config_train_dir = os.path.join(trains_state_dir, config_label) | |||
if not os.path.isdir(config_train_dir): | |||
os.mkdir(config_train_dir) | |||
log_file = os.path.join(config_train_dir, "console.log") | |||
logger = logging.getLogger(config_label) | |||
logger.setLevel(logging.DEBUG) | |||
fh = logging.FileHandler(log_file) | |||
formatter = logging.Formatter('%(asctime)s|%(levelname)s|%(message)s', datefmt='%Y-%m-%d %H:%M:%S') | |||
fh.setFormatter(formatter) | |||
fh.setLevel(logging.DEBUG) | |||
logger.addHandler(fh) | |||
return logger |
@@ -0,0 +1,399 @@ | |||
import os | |||
import random | |||
import time | |||
from typing import cast | |||
import numpy as np | |||
import matplotlib.pyplot as plt | |||
import timm | |||
import torch | |||
import torchvision | |||
from sklearn.metrics import confusion_matrix, roc_curve, roc_auc_score | |||
from torch import nn, optim | |||
from torch.utils.data import DataLoader | |||
from tqdm import tqdm | |||
from config import Config | |||
from fragment_splitter import CustomFragmentLoader | |||
from model_train_logger import set_config_for_logger | |||
from thyroid_dataset import ThyroidDataset | |||
from thyroid_ml_model import ThyroidClassificationModel | |||
from transformation import get_transformation | |||
@torch.no_grad() | |||
def validate(model, data_loader, loss_function=None, show_tqdm=False): | |||
class_set = sorted(data_loader.dataset.class_to_idx_dict.values()) | |||
loss_values = [] | |||
y_preds = [] | |||
y_targets = [] | |||
y_positive_scores = [] | |||
for images, labels in (data_loader if not show_tqdm else tqdm(data_loader)): | |||
images = images.to(Config.available_device) | |||
labels = labels.to(Config.available_device) | |||
x = model(images, validate=True) | |||
if loss_function: | |||
loss_values.append(loss_function(x, labels)) | |||
values, preds = torch.max(x, 1) | |||
y_positive_scores += x[:, 1].cpu() | |||
y_preds += preds.cpu() | |||
y_targets += labels.cpu() | |||
cf_matrix = confusion_matrix(y_targets, y_preds, normalize="true") | |||
class_accuracies = [cf_matrix[c][c] for c in class_set] | |||
acc = sum(class_accuracies) | |||
acc /= len(class_set) | |||
# TN|FN | |||
# FP|TP | |||
fpr, tpr, _ = roc_curve(y_targets, y_positive_scores) | |||
auc = roc_auc_score(y_targets, y_positive_scores) | |||
if loss_function: | |||
loss = sum(loss_values) | |||
loss /= len(loss_values) | |||
return acc * 100, cf_matrix, (fpr, tpr, auc), loss | |||
return acc * 100, cf_matrix, (fpr, tpr, auc) | |||
def get_save_state_dirs(config_label, epoch=None): | |||
trains_state_dir = "./train_state" | |||
if not os.path.isdir(trains_state_dir): | |||
os.mkdir(trains_state_dir) | |||
config_train_dir = os.path.join(trains_state_dir, config_label) | |||
if not os.path.isdir(config_train_dir): | |||
os.mkdir(config_train_dir) | |||
if epoch is not None: | |||
save_state_dir = os.path.join(config_train_dir, f"epoch-{epoch}") | |||
if not os.path.isdir(save_state_dir): | |||
os.mkdir(save_state_dir) | |||
else: | |||
save_state_dir = None | |||
return trains_state_dir, config_train_dir, save_state_dir | |||
def plot_and_save_model_per_epoch(epoch, | |||
model_to_save, | |||
val_acc_list, | |||
train_acc_list, | |||
val_loss_list, | |||
train_loss_list, | |||
config_label): | |||
trains_state_dir, config_train_dir, save_state_dir = get_save_state_dirs(config_label, epoch) | |||
fig_save_path = os.path.join(config_train_dir, "val_train_acc.jpeg") | |||
plt.plot(range(len(val_acc_list)), val_acc_list, label="validation") | |||
plt.plot(range(len(train_acc_list)), train_acc_list, label="train") | |||
plt.legend(loc="lower right") | |||
plt.xlabel('Epoch') | |||
plt.ylabel('Balanced Accuracy') | |||
plt.savefig(fig_save_path) | |||
plt.clf() | |||
fig_save_path = os.path.join(config_train_dir, "val_train_loss.jpeg") | |||
plt.plot(range(len(val_loss_list)), val_loss_list, label="validation") | |||
plt.plot(range(len(train_loss_list)), train_loss_list, label="train") | |||
plt.legend(loc="lower right") | |||
plt.xlabel('Epoch') | |||
plt.ylabel('Loss') | |||
plt.savefig(fig_save_path) | |||
plt.clf() | |||
if model_to_save: | |||
model_save_path = os.path.join(save_state_dir, "model.state") | |||
model_to_save.save_model(model_save_path) | |||
def save_auc_roc_chart_for_test(test_fpr, test_tpr, test_auc_score, config_label, epoch): | |||
trains_state_dir, config_train_dir, save_dir = get_save_state_dirs(config_label, epoch) | |||
fig_save_path = os.path.join(save_dir, f"test_roc_{time.time()}.jpeg") | |||
plt.plot(test_fpr, test_tpr, label="test, auc=" + str(test_auc_score)) | |||
plt.legend(loc="lower right") | |||
plt.xlabel('FPR') | |||
plt.ylabel('TPR') | |||
plt.savefig(fig_save_path) | |||
plt.clf() | |||
def calculate_test(image_model, epoch, test_data_loader, logger, config_name, show_tqdm=False): | |||
image_model.eval() | |||
test_acc, test_c_acc, (test_FPR, test_TPR, test_auc_score) = validate(image_model, | |||
test_data_loader, | |||
show_tqdm=show_tqdm) | |||
test_acc = float(test_acc) | |||
save_auc_roc_chart_for_test(test_FPR, test_TPR, test_auc_score, config_name, epoch) | |||
logger.info(f'Test|Epoch:{epoch}|Accuracy:{round(test_acc, 4)}, {test_c_acc}%') | |||
def train_model(base_model, config_base_name, train_val_test_data_loaders, augmentation, | |||
adaptation_sample_dataset=None, | |||
train_model_flag=True, | |||
load_model_from_dir=None): | |||
config_name = f"{config_base_name}-{augmentation}-{','.join(Config.class_idx_dict.keys())}" | |||
logger = set_config_for_logger(config_name) | |||
logger.info(f"training config: {config_name}") | |||
try: | |||
_is_inception = type(base_model) == torchvision.models.inception.Inception3 | |||
train_data_loader, val_data_loader, test_data_loader = train_val_test_data_loaders | |||
logger.info( | |||
f"train valid test splits:" + | |||
f" {len(train_data_loader.dataset.samples) if train_data_loader else None}," + | |||
f" {len(val_data_loader.dataset.samples) if val_data_loader else None}," + | |||
f" {len(test_data_loader.dataset.samples) if test_data_loader else None}") | |||
# MODEL | |||
if load_model_from_dir: | |||
# Load model from file | |||
model_path = os.path.join(load_model_from_dir, 'model.state') | |||
image_model = ThyroidClassificationModel(base_model).load_model(model_path).to(Config.available_device) | |||
else: | |||
image_model = ThyroidClassificationModel(base_model).to(Config.available_device) | |||
if train_model_flag: | |||
# TRAIN | |||
transformation = get_transformation(augmentation=augmentation, base_dataset=adaptation_sample_dataset) | |||
train_dataset = cast(ThyroidDataset, train_data_loader.dataset) | |||
train_dataset.transform = transformation | |||
cec = nn.CrossEntropyLoss(weight=torch.tensor(train_dataset.class_weights).to(Config.available_device)) | |||
optimizer = optim.Adam(image_model.parameters(), lr=Config.learning_rate) | |||
my_lr_scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer=optimizer, gamma=Config.decay_rate) | |||
val_acc_history = [] | |||
train_acc_history = [] | |||
train_y_preds = [] | |||
train_y_targets = [] | |||
best_epoch_val_acc = 0 | |||
for epoch in range(Config.n_epoch): | |||
# variables to calculate train acc | |||
class_set = sorted(train_data_loader.dataset.class_to_idx_dict.values()) | |||
for images, labels in tqdm(train_data_loader, colour="#0000ff"): | |||
if len(images) >= Config.batch_size // 2: | |||
image_model.train() | |||
images = images.to(Config.available_device) | |||
labels = labels.to(Config.available_device) | |||
optimizer.zero_grad() | |||
pred = image_model(images) | |||
# pred label: torch.max(pred, 1)[1], labels | |||
if _is_inception: | |||
pred, aux_pred = pred | |||
loss, aux_loss = cec(pred, labels), cec(aux_pred, labels) | |||
loss = loss + 0.4 * aux_loss | |||
else: | |||
loss = cec(pred, labels) | |||
loss.backward() | |||
optimizer.step() | |||
# train preds and labels | |||
values, preds = torch.max(pred, 1) | |||
train_y_preds.extend(preds.cpu()) | |||
train_y_targets.extend(labels.cpu()) | |||
# Epoch level | |||
# validation data | |||
image_model.eval() | |||
train_cf_matrix = confusion_matrix(train_y_targets, train_y_preds, normalize="true") | |||
class_accuracies = [train_cf_matrix[c][c] for c in class_set] | |||
train_acc = sum(class_accuracies) | |||
train_acc /= len(class_set) | |||
train_acc = (100 * sum(class_accuracies) / len(class_set)).item() | |||
train_acc_history.append(train_acc) | |||
logger.info(f'Train|E:{epoch}|Balanced Accuracy:{round(train_acc, 4)}%,\n{train_cf_matrix}') | |||
val_acc, val_cf_matrix, _, val_loss = validate(image_model, | |||
val_data_loader, | |||
cec) | |||
val_acc = float(val_acc) | |||
val_acc_history.append(val_acc) | |||
logger.info(f'Val|E:{epoch}|Balanced Accuracy:{round(val_acc, 4)}%,\n{val_cf_matrix}') | |||
save_model = False | |||
is_last_epoch = epoch == Config.n_epoch | |||
is_a_better_epoch = val_acc >= best_epoch_val_acc | |||
is_a_better_epoch &= abs(train_acc - val_acc) < Config.train_val_acc_max_distance_for_best_epoch | |||
if is_a_better_epoch or is_last_epoch: | |||
save_model = True | |||
calculate_test(image_model, epoch, test_data_loader, logger, config_name, show_tqdm=False) | |||
plot_and_save_model_per_epoch(epoch if save_model else None, | |||
image_model if save_model else None, | |||
val_acc_history, | |||
train_acc_history, | |||
[], | |||
[], | |||
config_label=config_name) | |||
my_lr_scheduler.step() | |||
else: | |||
# JUST EVALUATE | |||
calculate_test(image_model, 0, test_data_loader, logger, config_name, | |||
show_tqdm=True) | |||
except Exception as e: | |||
print(e) | |||
logger.error(str(e)) | |||
raise e | |||
def load_datasets(datasets_folders, test_percent=Config.test_percent, val_percent=Config.val_percent, sample_percent=1, | |||
is_nci_per_slide=False): | |||
if is_nci_per_slide: | |||
l_train, l_val, l_test = CustomFragmentLoader( | |||
datasets_folders).national_cancer_image_and_labels_splitter_per_slide( | |||
test_percent=test_percent, | |||
val_percent=val_percent) | |||
else: | |||
l_train, l_val, l_test = CustomFragmentLoader(datasets_folders).load_image_path_and_labels_and_split( | |||
test_percent=test_percent, | |||
val_percent=val_percent) | |||
l_train = random.choices(l_train, k=int(sample_percent * len(l_train))) | |||
l_val = random.choices(l_val, k=int(sample_percent * len(l_val))) | |||
l_test = random.choices(l_test, k=int(sample_percent * len(l_test))) | |||
l_train_ds = ThyroidDataset(l_train, Config.class_idx_dict) | |||
l_val_ds = ThyroidDataset(l_val, Config.class_idx_dict) | |||
l_test_ds = ThyroidDataset(l_test, Config.class_idx_dict) | |||
l_train_data_loader = None | |||
if l_train: | |||
l_train_data_loader = DataLoader(l_train_ds, batch_size=Config.batch_size, shuffle=True) | |||
l_val_data_loader = None | |||
if l_val: | |||
l_val_data_loader = DataLoader(l_val_ds, batch_size=Config.eval_batch_size, shuffle=True) | |||
l_test_data_loader = None | |||
if l_test: | |||
l_test_data_loader = DataLoader(l_test_ds, batch_size=Config.eval_batch_size, shuffle=True) | |||
return (l_train, l_val, l_test), (l_train_ds, l_val_ds, l_test_ds), ( | |||
l_train_data_loader, l_val_data_loader, l_test_data_loader) | |||
@torch.no_grad() | |||
def evaluate_nci_dataset_per_slide(config_base_name, augmentation, base_model, data_loader, | |||
load_model_from_dir): | |||
config_name = f"{config_base_name}-{augmentation}-tumor-percent" | |||
logger = set_config_for_logger(config_name) | |||
logger.info(f"training config: {config_name}") | |||
_is_inception = type(base_model) == torchvision.models.inception.Inception3 | |||
logger.info( | |||
f"test:" + | |||
f" {len(data_loader.dataset.samples) if data_loader else None}") | |||
# MODEL | |||
# Load model from file | |||
model_path = os.path.join(load_model_from_dir, 'model.state') | |||
model = ThyroidClassificationModel(base_model).load_model(model_path).to(Config.available_device) | |||
y_positive_scores = [] | |||
slides_preds = {} | |||
slide_labels = {} | |||
for images, (labels, slides) in tqdm(data_loader): | |||
images = images.to(Config.available_device) | |||
x = model(images, validate=True).cpu() | |||
preds = x[:, 1] | |||
logger.info("zero and 1000 percent") | |||
logger.info(x[:, 0]) | |||
logger.info(x[:, 1]) | |||
for row_index in range(len(labels)): | |||
slide_id = slides[row_index] | |||
slide_label = labels[row_index] | |||
slide_labels[slide_id] = slide_label | |||
slides_preds[slide_id] = slides_preds.get(slide_id, []) + [preds[row_index].item()] | |||
y_positive_scores += x[:, 1].cpu() | |||
y_targets = [] | |||
y_preds = [] | |||
for key, value in slides_preds.items(): | |||
slides_preds[key] = (sum(slides_preds[key]) / len(slides_preds[key])) * 100 | |||
y_preds.append(slides_preds[key]) | |||
y_targets.append(int(slide_labels[key])) | |||
y_targets_rounded = [int(round(x / 100, 1) * 100) for x in y_targets] | |||
y_preds_rounded = [int(round(x / 100, 1) * 100) for x in y_preds] | |||
cf_matrix = confusion_matrix(y_targets_rounded, y_preds_rounded, labels=Config.class_names, normalize="true") | |||
class_accuracies = [cf_matrix[c][c] for c in range(len(cf_matrix))] | |||
class_weights = [sum(cf_matrix[c]) for c in range(len(cf_matrix))] | |||
acc = sum([class_accuracies[i] * class_weights[i] for i in range(len(class_accuracies))]) | |||
acc /= sum(class_weights) | |||
# TN|FN | |||
# FP|TP | |||
# fpr, tpr, _ = roc_curve(y_targets, y_positive_scores) | |||
# auc = roc_auc_score(y_targets, y_positive_scores) | |||
logger.info(f"target rounded:{y_targets_rounded}") | |||
logger.info(f"pred rounded:{y_preds_rounded}") | |||
logger.info(f"Results| acc:{acc * 100}\ncf:{cf_matrix}") | |||
return acc * 100, cf_matrix | |||
########## | |||
## Runs ## | |||
########## | |||
# train_phase block | |||
if __name__ == '__main__' and Config.train_phase: | |||
_, (train_ds, _, _), (train_data_loader, val_data_loader, test_data_loader) = load_datasets( | |||
["national_cancer_institute"], | |||
sample_percent=1) | |||
# Domain adaptation dataset on small real datasets | |||
# _, (_, _, domain_sample_test_dataset), _ = load_datasets(["stanford_tissue_microarray", | |||
# "papsociaty"], | |||
# sample_percent=0.5, | |||
# test_percent=100, | |||
# val_percent=0) | |||
for c_base_name, model, augmentations in [ | |||
(f"resnet101_{Config.learning_rate}_{Config.decay_rate}_nci_final", | |||
torchvision.models.resnet101(pretrained=True, progress=True), [ | |||
"mixup", | |||
# "jit", | |||
# "fda", | |||
# "jit-fda-mixup", | |||
# "shear", | |||
# "std" | |||
]), | |||
]: | |||
for aug in augmentations: | |||
Config.reset_random_seeds() | |||
train_model(model, c_base_name, (train_data_loader, val_data_loader, test_data_loader), | |||
augmentation=aug, adaptation_sample_dataset=train_ds) | |||
# evaluate_phase block | |||
if __name__ == '__main__' and Config.evaluate_phase: | |||
# Main data | |||
Config.class_names = [i for i in range(101)] | |||
Config.class_idx_dict = {i: i for i in range(101)} | |||
_, (train_ds, _, _), (_, _, test_data_loader) = load_datasets( | |||
["national_cancer_institute", | |||
], | |||
sample_percent=1, test_percent=100, val_percent=0, is_nci_per_slide=True) | |||
for c_base_name, model, aug_best_epoch_list in [ | |||
(f"resnet101_{Config.learning_rate}_{Config.decay_rate}_nci_eval", | |||
torchvision.models.resnet101(pretrained=True, progress=True), [ | |||
("mixup", "train_state/resnet101_0.0001_1_nci_final-mixup-BENIGN,MALIGNANT/epoch-19/"), | |||
]), | |||
# (f"resnet101_{Config.learning_rate}_{Config.decay_rate}_test_nci_eval", | |||
# torchvision.models.resnet101(pretrained=True, progress=True), [ | |||
# ("fda", | |||
# "train_state/runs_0.0001_1_nic_test_benign_mal/resnet101_0.0001_1_nci-fda-BENIGN,MALIGNANT/epoch-3/"), | |||
# ("mixup", | |||
# "train_state/runs_0.0001_1_nic_test_benign_mal/resnet101_0.0001_1_nci-mixup-BENIGN,MALIGNANT/epoch-3/"), | |||
# ("jit", | |||
# "train_state/runs_0.0001_1_nic_test_benign_mal/resnet101_0.0001_1_nci-jit-BENIGN,MALIGNANT/epoch-3/"), | |||
# ("jit-fda-mixup", | |||
# "train_state/runs_0.0001_1_nic_test_benign_mal/resnet101_0.0001_1_nci-jit-fda-mixup-BENIGN,MALIGNANT/epoch-3/"), | |||
# ]), | |||
]: | |||
for aug, best_epoch in aug_best_epoch_list: | |||
Config.reset_random_seeds() | |||
evaluate_nci_dataset_per_slide(c_base_name, aug, model, test_data_loader, | |||
load_model_from_dir=best_epoch) |
@@ -0,0 +1,3 @@ | |||
export PYTHONPATH="${PYTHONPATH}:../../../"; | |||
export PYTHONPATH="${PYTHONPATH}:./"; | |||
python fragment_splitter.py; |
@@ -0,0 +1,3 @@ | |||
export PYTHONPATH="${PYTHONPATH}:../../../"; | |||
export PYTHONPATH="${PYTHONPATH}:./"; | |||
python model_training.py; |
@@ -0,0 +1,83 @@ | |||
import os | |||
import numpy as np | |||
from PIL import Image | |||
from torch.utils.data import Dataset | |||
from config import Config | |||
from fragment_splitter import CustomFragmentLoader | |||
from transformation import get_transformation | |||
from utils import show_and_wait | |||
class ThyroidDataset(Dataset): | |||
def __init__(self, image_paths_labels_list, class_to_index, transform=None, force_to_size_with_padding=512): | |||
super().__init__() | |||
self.class_to_idx_dict = class_to_index | |||
self.force_to_size_with_padding = force_to_size_with_padding | |||
self.transform = transform | |||
self.samples = self._make_dataset(image_paths_labels_list) | |||
self.class_weights = self._calculate_class_weights(image_paths_labels_list) | |||
def _calculate_class_weights(self, image_paths_labels_list): | |||
class_counts = {} | |||
for image_path, (label, slide) in image_paths_labels_list: | |||
class_counts[label] = class_counts.get(label, 0) + 1 | |||
class_weights = [ | |||
(self.class_to_idx_dict.get(c, None), len(image_paths_labels_list) / (len(class_counts) * v)) for c, v | |||
in | |||
class_counts.items()] | |||
class_weights.sort() | |||
return [item[1] for item in class_weights] | |||
def _make_dataset(self, image_paths_labels_list): | |||
images = [] | |||
for image_path, (label, slide) in image_paths_labels_list: | |||
if not os.path.exists(os.path.abspath(image_path)): | |||
raise (RuntimeError(f"{image_path} not found.")) | |||
item = (image_path, (self.class_to_idx_dict.get(label, "Unknown label"), slide)) | |||
images.append(item) | |||
return images | |||
def __len__(self): | |||
return len(self.samples) | |||
def __getitem__(self, index): | |||
path, target = self.samples[index] | |||
image = Image.open(path) | |||
image = image.convert('RGB') | |||
image = self.add_margin(image) | |||
image = np.array(image) | |||
if self.transform is not None: | |||
# show_and_wait(image, name=f"./transformations/{index}-original", wait=False, save=True) | |||
image = self.transform(image=image)['image'] | |||
# image_show = np.moveaxis(image.cpu().detach().numpy(), 0, -1) | |||
# show_and_wait(image_show, name=f"./transformations/{index}-transformed", save=True) | |||
else: | |||
transform = get_transformation(augmentation="min") | |||
image = transform(image=image)['image'] | |||
return image, target | |||
def add_margin(self, pil_img): | |||
width, height = pil_img.size | |||
new_width = self.force_to_size_with_padding | |||
new_height = self.force_to_size_with_padding | |||
result = Image.new("RGB", (new_width, new_height), (0, 0, 0)) | |||
top_padding = (new_height - height) // 2 | |||
left_padding = (new_width - width) // 2 | |||
result.paste(pil_img, (left_padding, top_padding)) | |||
return result | |||
if __name__ == '__main__': | |||
class_idx_dict = Config.class_idx_dict | |||
datasets_folder = ["stanford_tissue_microarray", "papsociaty"] | |||
train, val, test = CustomFragmentLoader(datasets_folder).load_image_path_and_labels_and_split() | |||
train_ds = ThyroidDataset(train, class_idx_dict) | |||
test_ds = ThyroidDataset(test, class_idx_dict) | |||
val_ds = ThyroidDataset(val, class_idx_dict) | |||
res = train_ds.__getitem__(0) | |||
print(res) |
@@ -0,0 +1,47 @@ | |||
import torch | |||
import torchvision | |||
from torch import nn | |||
class ThyroidClassificationModel(nn.Module): | |||
def __init__(self, base_model): | |||
super().__init__() | |||
self.base_model = base_model | |||
self.classifier = nn.Sequential( | |||
nn.Linear(1000, 500), | |||
nn.BatchNorm1d(500), | |||
nn.ReLU(), | |||
nn.Linear(500, 100), | |||
nn.BatchNorm1d(100), | |||
nn.ReLU(), | |||
nn.Linear(100, 2), | |||
nn.BatchNorm1d(2), | |||
nn.Softmax(dim=-1) | |||
) | |||
self._is_inception3 = type(base_model) == torchvision.models.inception.Inception3 | |||
if self._is_inception3: | |||
self.classifier2 = nn.Sequential( | |||
nn.Linear(1000, 500), | |||
nn.BatchNorm1d(500), | |||
nn.ReLU(), | |||
nn.Linear(500, 100), | |||
nn.BatchNorm1d(100), | |||
nn.ReLU(), | |||
nn.Linear(100, 2), | |||
nn.BatchNorm1d(2), | |||
nn.Softmax(dim=-1) | |||
) | |||
def forward(self, x, validate=False): | |||
output = self.base_model(x.float()) | |||
if self._is_inception3 and not validate: | |||
return self.classifier(output[0]), self.classifier2(output[1]) | |||
return self.classifier(output) | |||
def save_model(self, path): | |||
torch.save(self.state_dict(), path) | |||
def load_model(self, path): | |||
self.load_state_dict(torch.load(path)) | |||
self.eval() | |||
return self |
@@ -0,0 +1,82 @@ | |||
import albumentations as A | |||
from albumentations.pytorch import ToTensorV2 | |||
from albumentations_mixup import Mixup | |||
def get_transformation(augmentation, crop_size=299, base_dataset=None): | |||
scaled_center_crop_size = int(crop_size * 1.25) | |||
def random_crop_transformation(x): | |||
return A.RandomCrop(x, x, always_apply=True) | |||
def get_flip_rotate__custom__noise_transform(transform_list, random_scale=True): | |||
return A.Compose([ | |||
A.Flip(p=0.25), | |||
A.Rotate(p=0.25), | |||
A.RandomScale(scale_limit=0.5, p=0.5 if random_scale else 0), | |||
A.PadIfNeeded(min_height=scaled_center_crop_size, min_width=scaled_center_crop_size, | |||
always_apply=True), | |||
A.CenterCrop(scaled_center_crop_size, scaled_center_crop_size), | |||
random_crop_transformation(crop_size), | |||
] + transform_list + [ | |||
A.Blur(p=0.25, blur_limit=2), | |||
A.GaussNoise(p=0.25, var_limit=10), | |||
ToTensorV2() | |||
]) | |||
if augmentation == "min": | |||
trans = A.Compose([ | |||
A.PadIfNeeded(min_height=scaled_center_crop_size, min_width=scaled_center_crop_size, always_apply=True), | |||
A.CenterCrop(scaled_center_crop_size, scaled_center_crop_size), | |||
random_crop_transformation(crop_size), | |||
ToTensorV2() | |||
]) | |||
elif augmentation == "std": | |||
trans = get_flip_rotate__custom__noise_transform([]) | |||
elif augmentation == "jit-nrs": | |||
trans = get_flip_rotate__custom__noise_transform([ | |||
A.ColorJitter(p=0.5, hue=.5) | |||
], random_scale=False) | |||
elif augmentation == "jit": | |||
trans = get_flip_rotate__custom__noise_transform([ | |||
A.ColorJitter(p=0.5, hue=.5) | |||
]) | |||
elif augmentation == "fda": | |||
fda_image_paths = [sample[0] for sample in base_dataset.samples] | |||
trans = get_flip_rotate__custom__noise_transform([ | |||
A.domain_adaptation.FDA(fda_image_paths, beta_limit=0.1, p=0.5) | |||
]) | |||
elif augmentation == "mixup": | |||
mixups = [sample[0:2] for sample in base_dataset.samples] | |||
trans = get_flip_rotate__custom__noise_transform([ | |||
Mixup(mixups=mixups, p=0.5, beta_limit=(0.1)), | |||
]) | |||
elif augmentation == "jit-fda-mixup": | |||
p = 0.16 | |||
fda_image_paths = [sample[0] for sample in base_dataset.samples] | |||
mixups = [sample[0:2] for sample in base_dataset.samples] | |||
trans = get_flip_rotate__custom__noise_transform([ | |||
A.domain_adaptation.FDA(fda_image_paths, beta_limit=0.1, p=p), | |||
Mixup(mixups=mixups, p=p, beta_limit=(0.1)), | |||
A.ColorJitter(p=p, hue=.5) | |||
]) | |||
elif augmentation == "jit-fda-mixup-nrs": | |||
p = 0.16 | |||
fda_image_paths = [sample[0] for sample in base_dataset.samples] | |||
mixups = [sample[0:2] for sample in base_dataset.samples] | |||
trans = get_flip_rotate__custom__noise_transform([ | |||
A.domain_adaptation.FDA(fda_image_paths, beta_limit=0.1, p=p), | |||
Mixup(mixups=mixups, p=p, beta_limit=(0.1)), | |||
A.ColorJitter(p=p, hue=.5) | |||
], random_scale=False) | |||
elif augmentation == "shear": | |||
trans = get_flip_rotate__custom__noise_transform([ | |||
A.Affine(shear={"x": (-10, 10), "y": (-10, 10)}, p=0.5) | |||
], random_scale=False) | |||
else: | |||
raise ValueError(f"Augmentation unknown: {augmentation}") | |||
return trans |
@@ -0,0 +1,45 @@ | |||
import random | |||
import torch | |||
class Config: | |||
DEBUG = False | |||
batch_size = 32 | |||
eval_batch_size = 128 | |||
test_percent = 20 | |||
val_percent = 10 | |||
learning_rate = 0.0001 | |||
decay_rate = 1 # 0.99**50=0.6, 0.99**100=0.36 | |||
n_epoch = 2 if DEBUG else 20 | |||
available_device = "cuda" if torch.cuda.is_available() and not DEBUG else "cpu" | |||
print(f"Device: {available_device}") | |||
workers = 1 if DEBUG else 40 | |||
# learned from evaluate_image_patcher_and_visualize.py | |||
laplacian_threshold = 298 | |||
# RANDOM SEED | |||
seed = 115 | |||
@staticmethod | |||
def reset_random_seeds(): | |||
random.seed(Config.seed) | |||
torch.manual_seed(Config.seed) | |||
class_names = ["BENIGN", "MALIGNANT"] | |||
class_idx_dict = {"BENIGN": 0, "MALIGNANT": 1} | |||
train_val_acc_max_distance_for_best_epoch = 6 # Percent | |||
n_epoch_for_image_patcher = 60 | |||
train_phase = False | |||
evaluate_phase = False | |||
Config.reset_random_seeds() |
@@ -0,0 +1,64 @@ | |||
import ssl | |||
import time | |||
from urllib.parse import urlparse | |||
from urllib.request import urlopen | |||
from bs4 import BeautifulSoup | |||
from database_crawlers.web_stain_sample import StainType, WebStainWSIOneDIndex | |||
ssl._create_default_https_context = ssl._create_unverified_context | |||
class BioAtlasAtJakeGittlenLaboratoriesImage(WebStainWSIOneDIndex): | |||
def __init__(self, database_name, image_id, image_web_label, report, stain_type, is_wsi): | |||
super().__init__(database_name, image_id, image_web_label, report, stain_type, is_wsi) | |||
def _get_tile_url(self, zoom, partition=None, i=None, j=None): | |||
return f"https://bio-atlas.psu.edu/human/tile.jpeg.php?s={self.image_id}&z={zoom}&i={partition}" | |||
def get_slide_view_url(self): | |||
return f"https://bio-atlas.psu.edu/human/view.php?s={self.image_id}" | |||
def _get_file_path_name(self): | |||
return self.save_path + self.image_id | |||
def find_best_zoom(self): | |||
return 0 | |||
class BioAtlasThyroidSlideProvider: | |||
page_link = "https://bio-atlas.psu.edu/human/search.php?q=Thyroid&organism%5B%5D=5&age_fr=&age_fr_units=1&age_to=&age_to_units=1&sex%5B%5D=all&thumbnails=on&rpp=30&as_sfid=AAAAAAW0RrspdnblpiFwz8osoAdvS8nafd1J9LG_ARQ-IF_NZ3aI2EXCMDBeqE_iD5rUo1QLg454tS63DMSgATSzgrksb4rMi-GWPl3O9f3JKlqGn8oXoqbOYok3__yZx69ewzg%3D&as_fid=6900aeb3e4cc9f39ef9738a2f11c2cefb8c3f37c#results" | |||
database_name = "BioAtlasThyroidSlideProvider" | |||
stain_type = StainType.H_AND_E | |||
is_wsi = True | |||
@classmethod | |||
def get_web_stain_samples(cls): | |||
print(cls.page_link) | |||
try: | |||
html_text = urlopen(cls.page_link).read() | |||
soup = BeautifulSoup(html_text, 'html.parser') | |||
search_results = soup.find_all("div", {"class": "shadow-box search-result-item search-result-slide"}) | |||
for result_item in search_results: | |||
image_view_url = result_item.find("a").attrs['href'] | |||
query_param = urlparse(image_view_url).query.split("=") | |||
if query_param[0] != "s": raise Exception("Query params does not contains image url") | |||
image_id = query_param[1] | |||
image_web_label = str(result_item.find("b", text="Diagnosis").next_sibling) | |||
yield BioAtlasAtJakeGittlenLaboratoriesImage(cls.database_name, image_id, image_web_label, None, | |||
cls.stain_type, cls.is_wsi) | |||
except Exception as e: | |||
print(e) | |||
time.sleep(2) | |||
yield cls.get_web_stain_samples() | |||
if __name__ == '__main__': | |||
bio_atlas_provider = BioAtlasThyroidSlideProvider() | |||
for slide in bio_atlas_provider.get_web_stain_samples(): | |||
if slide.image_id == "687": | |||
print(slide.image_id, slide.image_web_label, slide.get_slide_view_url()) | |||
slide.crawl_image_save_jpeg_and_json() | |||
break |
@@ -0,0 +1,3 @@ | |||
export PYTHONPATH="${PYTHONPATH}:../../"; | |||
export PYTHONPATH="${PYTHONPATH}:./"; | |||
python database_crawler.py; |
@@ -0,0 +1,61 @@ | |||
import time | |||
from urllib.parse import urlparse | |||
from urllib.request import urlopen | |||
from bs4 import BeautifulSoup | |||
from database_crawlers.web_stain_sample import StainType, WebStainWSITwoDIndex | |||
class HeidelbergPathologyImage(WebStainWSITwoDIndex): | |||
def __init__(self, database_name, image_id, image_web_label, report, stain_type, is_wsi): | |||
super().__init__(database_name, image_id, image_web_label, report, stain_type, is_wsi) | |||
def _get_tile_url(self, zoom, partition=None, i=None, j=None): | |||
return f"https://eliph.klinikum.uni-heidelberg.de/dzi/atlas/05-schilddruese/05-{'%.2d' % int(self.image_id)}_files/{zoom}/{i}_{j}.jpeg" | |||
def get_slide_view_url(self): | |||
return f"https://eliph.klinikum.uni-heidelberg.de/atlas/?c=05-schilddruese&context=image&pg={self.image_id}" | |||
def _get_file_path_name(self): | |||
return self.save_path + self.image_id | |||
def find_best_zoom(self): | |||
# 16 -> 0 | |||
return 16 | |||
class HeidelbergPathologyProvider: | |||
page_link = "https://eliph.klinikum.uni-heidelberg.de/atlas/?c=05-schilddruese&context=image" | |||
database_name = "HeidelbergPathology" | |||
stain_type = StainType.H_AND_E | |||
is_wsi = True | |||
@classmethod | |||
def get_web_stain_samples(cls): | |||
print(cls.page_link) | |||
try: | |||
html_text = urlopen(cls.page_link).read() | |||
soup = BeautifulSoup(html_text, 'html.parser') | |||
search_results = soup.find_all("div", {"class": "casegrid"}) | |||
for result_item in search_results: | |||
image_view_url = result_item.find("a").attrs['href'] | |||
query_param = urlparse(image_view_url).query.split("=") | |||
if "image&pg" not in query_param: raise Exception("Query params does not contains image id") | |||
image_id = query_param[-1] | |||
image_web_label = str(result_item.find("b").next) | |||
yield HeidelbergPathologyImage(cls.database_name, image_id, image_web_label, None, | |||
cls.stain_type, cls.is_wsi) | |||
except Exception as e: | |||
print(e) | |||
time.sleep(2) | |||
yield cls.get_web_stain_samples() | |||
if __name__ == '__main__': | |||
bio_atlas_provider = HeidelbergPathologyProvider() | |||
for slide in bio_atlas_provider.get_web_stain_samples(): | |||
print(slide.image_id, slide.image_web_label, slide.get_slide_view_url()) | |||
slide.crawl_image_save_jpeg_and_json() | |||
break |
@@ -0,0 +1,7 @@ | |||
from image_patcher import ImageAndSlidePatcher | |||
if __name__ == '__main__': | |||
database_folder_name = "bio_atlas_at_jake_gittlen_laboratories" | |||
database_directory = "../" | |||
image_slide_patcher = ImageAndSlidePatcher() | |||
image_slide_patcher.save_patches_in_folders(database_directory, database_folder_name) |
@@ -0,0 +1,287 @@ | |||
import csv | |||
import json | |||
import os | |||
import os.path as os_path | |||
import random | |||
import re | |||
from math import ceil | |||
from os import listdir | |||
from os.path import isfile, join | |||
import cv2 | |||
import tifffile | |||
import zarr as ZarrObject | |||
from tqdm import tqdm | |||
from config import Config | |||
from database_crawlers.web_stain_sample import ThyroidCancerLevel, WebStainImage | |||
from utils import show_and_wait | |||
class ThyroidFragmentFilters: | |||
@staticmethod | |||
def func_laplacian_threshold(threshold=Config.laplacian_threshold): | |||
def wrapper(image_nd_array): | |||
res = ThyroidFragmentFilters._empty_frag_with_laplacian_threshold(image_nd_array, threshold) | |||
return res | |||
return wrapper | |||
@staticmethod | |||
def _empty_frag_with_laplacian_threshold(image_nd_array, threshold=Config.laplacian_threshold, | |||
return_variance=False): | |||
gray = cv2.cvtColor(image_nd_array, cv2.COLOR_BGR2GRAY) | |||
gray = cv2.GaussianBlur(gray, (3, 3), 0) | |||
laplacian = cv2.Laplacian(gray, cv2.CV_64F, ksize=3, ) | |||
std = cv2.meanStdDev(laplacian)[1][0][0] | |||
variance = std ** 2 | |||
if return_variance: | |||
return variance >= threshold, variance | |||
return variance >= threshold | |||
class ImageAndSlidePatcher: | |||
@classmethod | |||
def _check_magnification_from_description(cls, tiff_address): | |||
try: | |||
tif_file_obj = tifffile.TiffFile(tiff_address) | |||
image_description = tif_file_obj.pages.keyframe.tags["ImageDescription"].value | |||
app_mag = int(re.findall("(AppMag = [0-9]+)", image_description)[0].split(" = ")[-1]) | |||
return app_mag | |||
except Exception as e: | |||
return None | |||
@classmethod | |||
def _zarr_loader(cls, tiff_address, key=0): | |||
image_zarr = tifffile.imread(tiff_address, aszarr=True, key=key, ) | |||
zarr = ZarrObject.open(image_zarr, mode='r') | |||
return zarr | |||
@classmethod | |||
def _jpeg_loader(cls, jpeg_address): | |||
im = cv2.imread(jpeg_address) | |||
return im | |||
@classmethod | |||
def _json_key_loader(cls, json_file_address, key=None): | |||
with open(json_file_address, 'rb') as file: | |||
json_dict = json.loads(file.read()) | |||
if key: | |||
return json_dict[key] | |||
return json_dict | |||
@classmethod | |||
def _get_extension_from_path(cls, file_path): | |||
return os_path.splitext(file_path)[-1] | |||
@classmethod | |||
def _get_file_name_from_path(cls, file_path): | |||
return ".".join(os_path.split(file_path)[-1].split(".")[:-1]) | |||
@classmethod | |||
def _get_number_of_initial_frags(cls, zarr_object, frag_size=512, frag_overlap=0.1): | |||
zarr_shape = zarr_object.shape | |||
step_size = int(frag_size * (1 - frag_overlap)) | |||
overlap_size = frag_size - step_size | |||
w_range = list(range(0, ceil((zarr_shape[0] - overlap_size) / step_size) * step_size, step_size)) | |||
h_range = list(range(0, ceil((zarr_shape[1] - overlap_size) / step_size) * step_size, step_size)) | |||
return len(w_range) * len(h_range) | |||
@classmethod | |||
def _generate_raw_fragments_from_image_array_or_zarr(cls, image_object, frag_size=512, frag_overlap=0.1, | |||
shuffle=True): | |||
def frag_picker(w_pos, h_pos): | |||
end_w, end_h = min(zarr_shape[0], w_pos + frag_size), min(zarr_shape[1], h_pos + frag_size) | |||
start_w, start_h = end_w - frag_size, end_h - frag_size | |||
return image_object[start_w:end_w, start_h: end_h], (start_w, start_h) | |||
if image_object is None: | |||
return None | |||
zarr_shape = image_object.shape | |||
step_size = int(frag_size * (1 - frag_overlap)) | |||
overlap_size = frag_size - step_size | |||
w_range = list(range(0, ceil((zarr_shape[0] - overlap_size) / step_size) * step_size, step_size)) | |||
h_range = list(range(0, ceil((zarr_shape[1] - overlap_size) / step_size) * step_size, step_size)) | |||
if shuffle: | |||
pos_list = [None] * len(w_range) * len(h_range) | |||
index = 0 | |||
for w in w_range: | |||
for h in h_range: | |||
pos_list[index] = (w, h) | |||
index += 1 | |||
random.shuffle(pos_list) | |||
for w, h in pos_list: | |||
yield frag_picker(w, h) | |||
else: | |||
for w in w_range: | |||
for h in h_range: | |||
yield frag_picker(w, h) | |||
@classmethod | |||
def _filter_frag_from_generator(cls, frag_generator, filter_func_list, return_all_with_condition=False, | |||
all_frag_count=None, output_file=None): | |||
for next_test_item, frag_pos in tqdm(frag_generator, total=all_frag_count, file=output_file, | |||
postfix="Filtering", position=0): | |||
condition = True | |||
for function in filter_func_list: | |||
condition &= function(next_test_item) | |||
if return_all_with_condition: | |||
yield next_test_item, frag_pos, condition | |||
elif condition: | |||
# show_and_wait(frag) | |||
yield next_test_item, frag_pos | |||
@classmethod | |||
def _get_json_and_image_address_of_directory(cls, directory_path, ignore_json=False): | |||
image_formats = [".jpeg", ".tiff", ".jpg"] | |||
json_format = ".json" | |||
files = [f for f in listdir(directory_path) if isfile(join(directory_path, f))] | |||
files.sort() | |||
pairs = {} | |||
for file_path in files: | |||
file_path = join(directory_path, file_path) | |||
file_name = cls._get_file_name_from_path(file_path) | |||
pairs[file_name] = pairs.get(file_name, [None, None]) | |||
if cls._get_extension_from_path(file_path) in image_formats: | |||
pairs[file_name][1] = file_path | |||
elif cls._get_extension_from_path(file_path) == json_format: | |||
pairs[file_name][0] = file_path | |||
if ignore_json: | |||
return [value for key, value in pairs.values() if value is not None] | |||
return [(key, value) for key, value in pairs.values() if key is not None and value is not None] | |||
@staticmethod | |||
def create_patch_dir_and_initialize_csv(database_path): | |||
data_dir = os.path.join(database_path, "data") | |||
patch_dir = os.path.join(database_path, "patches") | |||
if not os.path.isdir(patch_dir): | |||
os.mkdir(patch_dir) | |||
label_csv_path = os.path.join(patch_dir, "patch_labels.csv") | |||
csv_file = open(label_csv_path, "a+") | |||
csv_writer = csv.writer(csv_file) | |||
csv_file.seek(0) | |||
if len(csv_file.read(100)) <= 0: | |||
csv_writer.writerow(WebStainImage.sorted_json_keys()) | |||
return data_dir, patch_dir, csv_writer, csv_file | |||
@classmethod | |||
def save_image_patches_and_update_csv(cls, thyroid_type, thyroid_desired_classes, csv_writer, web_details, | |||
image_path, slide_patch_dir, slide_id): | |||
csv_writer.writerow(list(web_details.values())) | |||
if cls._get_extension_from_path(image_path) in [".tiff", ".tif", ".svs"]: | |||
zarr_object = cls._zarr_loader(image_path) | |||
generator = cls._generate_raw_fragments_from_image_array_or_zarr(zarr_object) | |||
total_counts = cls._get_number_of_initial_frags(zarr_object=zarr_object) | |||
else: | |||
jpeg_image = cls._jpeg_loader(image_path) | |||
jpeg_image = cls.ask_image_scale_and_rescale(jpeg_image) | |||
generator = cls._generate_raw_fragments_from_image_array_or_zarr(jpeg_image) | |||
total_counts = cls._get_number_of_initial_frags(zarr_object=jpeg_image) | |||
if generator is None: | |||
return | |||
if not os.path.isdir(slide_patch_dir): | |||
os.mkdir(slide_patch_dir) | |||
filters = [ThyroidFragmentFilters.func_laplacian_threshold(Config.laplacian_threshold)] | |||
fragment_id = 0 | |||
slide_progress_file_path = os.path.join(slide_patch_dir, "progress.txt") | |||
with open(slide_progress_file_path, "w") as file: | |||
for fragment, frag_pos in cls._filter_frag_from_generator(generator, filters, all_frag_count=total_counts, | |||
output_file=file): | |||
fragment_file_path = os.path.join(slide_patch_dir, f"{slide_id}-{fragment_id}.jpeg") | |||
cv2.imwrite(fragment_file_path, fragment) | |||
fragment_id += 1 | |||
return fragment_id, total_counts | |||
@classmethod | |||
def save_patches_in_folders(cls, database_directory, dataset_dir=None): | |||
thyroid_desired_classes = [ThyroidCancerLevel.MALIGNANT, ThyroidCancerLevel.BENIGN] | |||
datasets_dirs = os.listdir(database_directory) if dataset_dir is None else [dataset_dir] | |||
list_dir = [os.path.join(database_directory, o) for o in datasets_dirs | |||
if os.path.isdir(os.path.join(database_directory, o, "data"))] | |||
for database_path in list_dir: | |||
print("database path: ", database_path) | |||
data_dir, patch_dir, csv_writer, csv_file = cls.create_patch_dir_and_initialize_csv(database_path) | |||
for json_path, image_path in cls._get_json_and_image_address_of_directory(data_dir): | |||
print("image path: ", image_path) | |||
file_name = cls._get_file_name_from_path(image_path) | |||
slide_id = str(hash(file_name)) | |||
slide_patch_dir = os.path.join(patch_dir, slide_id) | |||
if os.path.isdir(slide_patch_dir): | |||
""" | |||
it has already been patched | |||
""" | |||
continue | |||
web_details = cls._json_key_loader(json_path) | |||
web_details["image_id"] = slide_id | |||
web_label = web_details["image_web_label"] | |||
thyroid_type = ThyroidCancerLevel.get_thyroid_level_from_diagnosis_label(web_label) | |||
web_details["image_class_label"] = thyroid_type.value[1] | |||
cls.save_image_patches_and_update_csv(thyroid_type, thyroid_desired_classes, csv_writer, web_details, | |||
image_path, slide_patch_dir, slide_id) | |||
csv_file.close() | |||
@classmethod | |||
def save_papsociaty_patch(cls, database_path): | |||
thyroid_desired_classes = [ThyroidCancerLevel.MALIGNANT, ThyroidCancerLevel.BENIGN] | |||
print("database path: ", database_path) | |||
for folder in Config.class_names: | |||
group_path = os.path.join(database_path, "data", folder) | |||
data_dir, patch_dir, csv_writer, csv_file = cls.create_patch_dir_and_initialize_csv(database_path) | |||
for image_path in cls._get_json_and_image_address_of_directory(group_path, ignore_json=True): | |||
print("image path: ", image_path) | |||
file_name = cls._get_file_name_from_path(image_path) | |||
slide_id = str(hash(file_name)) | |||
slide_patch_dir = os.path.join(patch_dir, slide_id) | |||
if os.path.isdir(slide_patch_dir): | |||
""" | |||
it has already been patched | |||
""" | |||
continue | |||
web_label = folder + "-" + file_name | |||
thyroid_type = ThyroidCancerLevel.get_thyroid_level_from_diagnosis_label(web_label) | |||
web_details = {"database_name": "PapSociety", | |||
"image_id": slide_id, | |||
"image_web_label": web_label, | |||
"image_class_label": thyroid_type.value[1], | |||
"report": None, | |||
"stain_type": "UNKNOWN", | |||
"is_wsi": False} | |||
cls.save_image_patches_and_update_csv(thyroid_type, thyroid_desired_classes, csv_writer, web_details, | |||
image_path, slide_patch_dir, slide_id) | |||
csv_file.close() | |||
@classmethod | |||
def ask_image_scale_and_rescale(cls, image): | |||
# small: S, Medium: M, Large:L | |||
show_and_wait(image) | |||
res = input("how much plus pointer fill a cell(float, i:ignore, else repeat): ") | |||
try: | |||
if res == "i": | |||
return None | |||
elif re.match("[0-9]+(.[0-9]*)?", res): | |||
scale = 1 / float(res) | |||
return cv2.resize(image, (0, 0), fx=scale, fy=scale) | |||
else: | |||
return cls.ask_image_scale_and_rescale(image) | |||
except Exception as e: | |||
print(e) | |||
return cls.ask_image_scale_and_rescale(image) | |||
if __name__ == '__main__': | |||
random.seed(1) | |||
database_directory = "./" | |||
# ImageAndSlidePatcher.save_patches_in_folders(database_directory, dataset_dir=["stanford_tissue_microarray"]) | |||
# ImageAndSlidePatcher.save_papsociaty_patch(os.path.join(database_directory, "papsociaty")) |
@@ -0,0 +1,60 @@ | |||
import concurrent.futures | |||
import os | |||
import pathlib | |||
from tqdm import tqdm | |||
from config import Config | |||
from image_patcher import ImageAndSlidePatcher | |||
from national_cancer_institute.read_xml_file import get_slide_info_from_bcr_xml | |||
def save_national_cancer_institute_patch(database_path): | |||
def patch_image(image_path): | |||
try: | |||
image_path = str(image_path) | |||
print() | |||
print("image path: ", image_path) | |||
file_name = ImageAndSlidePatcher._get_file_name_from_path(image_path) | |||
slide_id = file_name.split(".")[0] | |||
slide_patch_dir = os.path.join(patch_dir, slide_id) | |||
if os.path.isdir(slide_patch_dir): | |||
print("it has already been patched") | |||
return | |||
web_label = slide_infos.get(slide_id, None) | |||
if web_label is None: | |||
print("Ignored") | |||
return | |||
web_details = {"database_name": "NationalCancerInstitute", | |||
"image_id": slide_id, | |||
"image_web_label": web_label, | |||
"image_class_label": web_label, | |||
"report": None, | |||
"stain_type": "H&E", | |||
"is_wsi": True} | |||
return ImageAndSlidePatcher.save_image_patches_and_update_csv(web_label, None, csv_writer, web_details, | |||
image_path, slide_patch_dir, slide_id) | |||
except Exception as e: | |||
print(e) | |||
data_dir = os.path.join(database_path, "data") | |||
slide_infos = {} | |||
for xml_path in pathlib.Path(data_dir).glob("**/*.xml"): | |||
slide_infos.update(get_slide_info_from_bcr_xml(str(xml_path))) | |||
data_dir, patch_dir, csv_writer, csv_file = ImageAndSlidePatcher.create_patch_dir_and_initialize_csv(database_path) | |||
csv_file.flush() | |||
with concurrent.futures.ThreadPoolExecutor(max_workers=Config.workers) as executor: | |||
image_paths = pathlib.Path(data_dir).glob("**/*.svs") | |||
image_paths = [i for i in image_paths] | |||
print() | |||
for res in tqdm(executor.map(patch_image, image_paths), total=len(image_paths)): | |||
if res: | |||
csv_file.flush() | |||
csv_file.flush() | |||
if __name__ == '__main__': | |||
database_directory = "../" | |||
save_national_cancer_institute_patch(os.path.join(database_directory, "national_cancer_institute")) |
@@ -0,0 +1,4 @@ | |||
export PYTHONPATH="${PYTHONPATH}:../../"; | |||
export PYTHONPATH="${PYTHONPATH}:../"; | |||
export PYTHONPATH="${PYTHONPATH}:./"; | |||
python bio_atlas_patcher.py; |
@@ -0,0 +1,4 @@ | |||
export PYTHONPATH="${PYTHONPATH}:../../"; | |||
export PYTHONPATH="${PYTHONPATH}:../"; | |||
export PYTHONPATH="${PYTHONPATH}:./"; | |||
python image_patcher.py; |
@@ -0,0 +1,4 @@ | |||
export PYTHONPATH="${PYTHONPATH}:../../"; | |||
export PYTHONPATH="${PYTHONPATH}:../"; | |||
export PYTHONPATH="${PYTHONPATH}:./"; | |||
python national_cancer_patcher.py; |
@@ -0,0 +1,280 @@ | |||
import concurrent.futures | |||
import math | |||
import cv2 | |||
import matplotlib.pyplot as plt | |||
import numpy as np | |||
from config import Config | |||
from database_crawlers.image_patcher.image_patcher import ImageAndSlidePatcher, ThyroidFragmentFilters | |||
from utils import check_if_generator_is_empty | |||
def imul(a, b): | |||
return math.ceil(a * b) | |||
def calculate_acc_and_sensitivity(image_path, zarr_loader_mask, zarr_loader, frag_generator, scaled_masked_image, | |||
generated_mask_scale, laplacian_threshold, slide_patch_size, | |||
save_generated_image=True): | |||
def process_frag(args): | |||
next_test_item, frag_pos, condition = args | |||
frag_shape = next_test_item.shape | |||
mask_scaled_frag_shape = list((imul(frag_shape[i], mask_scale) for i in range(2))) | |||
mask_frag_pos = list((imul(frag_pos[i], mask_scale) for i in range(2))) | |||
mask_w1, mask_w2 = mask_frag_pos[0], mask_frag_pos[0] + mask_scaled_frag_shape[0] | |||
mask_h1, mask_h2 = mask_frag_pos[1], mask_frag_pos[1] + mask_scaled_frag_shape[1] | |||
mask_item = zarr_loader_mask[mask_w1:mask_w2, mask_h1:mask_h2] | |||
mask_item = cv2.resize(mask_item, dsize=(0, 0), fx=1 / mask_scale, fy=1 / mask_scale) | |||
fragment_size = next_test_item.shape | |||
scaled_frag_size = (imul(fragment_size[0], generated_mask_scale), imul(fragment_size[1], generated_mask_scale)) | |||
scaled_frag = cv2.resize(next_test_item[:, :, :3], dsize=scaled_frag_size, interpolation=cv2.INTER_CUBIC) | |||
scaled_frag_size = scaled_frag.shape | |||
if next_test_item is not None: | |||
mask_item = mask_item[:, :, 0] | |||
masked = mask_item.mean() > 256 * .3 | |||
if condition and masked: | |||
background_dict["TP"] += 1 | |||
elif condition and not masked: | |||
background_dict["FP"] += 1 | |||
elif not condition and masked: | |||
background_dict["FN"] += 1 | |||
# show_and_wait(next_test_item) | |||
# show_and_wait(mask_item) | |||
elif not condition and not masked: | |||
background_dict["TN"] += 1 | |||
else: | |||
return None | |||
if not condition: | |||
# background patches get dark | |||
scaled_frag = (scaled_frag * 0.3).astype(np.int8) | |||
scaled_pos = list((imul(frag_pos[i], generated_mask_scale) for i in range(2))) | |||
try: | |||
mask_g_w1, mask_g_w2 = scaled_pos[0], scaled_pos[0] + scaled_frag_size[0] | |||
mask_g_h1, mask_g_h2 = scaled_pos[1], scaled_pos[1] + scaled_frag_size[1] | |||
scaled_masked_image[mask_g_w1:mask_g_w2, mask_g_h1:mask_g_h2] = scaled_frag | |||
except Exception as e: | |||
print(e) | |||
return True | |||
mask_scale = zarr_loader_mask.shape[0] / zarr_loader.shape[0] | |||
filter_func_list = [ThyroidFragmentFilters.func_laplacian_threshold(laplacian_threshold)] | |||
background_dict = {"TP": 0, "FP": 0, "TN": 0, "FN": 0} | |||
total_frags = slide_patch_size if slide_patch_size else ImageAndSlidePatcher._get_number_of_initial_frags( | |||
zarr_loader) | |||
frag_filtered = ImageAndSlidePatcher._filter_frag_from_generator(frag_generator, filter_func_list, | |||
return_all_with_condition=True, | |||
all_frag_count=total_frags) | |||
with concurrent.futures.ThreadPoolExecutor(max_workers=Config.workers) as executor: | |||
futures = [] | |||
patch_count = 0 | |||
for args in frag_filtered: | |||
patch_count += 1 | |||
future_res = executor.submit(process_frag, args) | |||
futures.append(future_res) | |||
if len(futures) >= Config.workers or patch_count == slide_patch_size: | |||
for future in concurrent.futures.as_completed(futures): | |||
future.result() | |||
futures = [] | |||
if patch_count == slide_patch_size: | |||
break | |||
if save_generated_image: | |||
masked_image_path = ".".join(image_path.split(".")[:-1]) + "_generated_mask.jpg" | |||
cv2.imwrite(masked_image_path, scaled_masked_image) | |||
return background_dict | |||
def score_calculator(accuracy, specificity, acc_w=0.75): | |||
return accuracy * acc_w + specificity * (1 - acc_w) | |||
def get_zarr_loaders_and_generators(): | |||
zarr_loaders_and_generators = [] | |||
for _img_mask_path, _img_path in image_lists: | |||
_zarr_loader_mask = ImageAndSlidePatcher._zarr_loader(_img_mask_path) | |||
_zarr_loader = ImageAndSlidePatcher._zarr_loader(_img_path) | |||
_frag_generator = ImageAndSlidePatcher._generate_raw_fragments_from_image_array_or_zarr(_zarr_loader, | |||
shuffle=True) | |||
_zarr_shape = _zarr_loader.shape | |||
_generated_mask_scale = 10 / 512 | |||
_scaled_zarr_shape = ( | |||
imul(_zarr_shape[0], _generated_mask_scale) + 5, imul(_zarr_shape[1], _generated_mask_scale) + 5, 3) | |||
_scaled_masked_image = np.zeros(_scaled_zarr_shape) | |||
zarr_loaders_and_generators.append([ | |||
_zarr_loader_mask, _zarr_loader, _frag_generator, _scaled_masked_image, _generated_mask_scale | |||
]) | |||
return zarr_loaders_and_generators | |||
def update_and_find_best_threshold(initial_thresh, learn_threshold_and_log_cf_matrix_per_patch=True): | |||
initial_threshold_jump_size_const = 120 | |||
threshold_jump_size = initial_threshold_jump_size_const | |||
decay_const = 0.85 | |||
decay_count = 0 | |||
threshold_jump_increase = 1 | |||
threshold_score = None | |||
# update after initial run | |||
laplacian_threshold = initial_thresh | |||
threshold_history = [] | |||
score_history = [] | |||
for epoch in range((Config.n_epoch_for_image_patcher if learn_threshold_and_log_cf_matrix_per_patch else 1)): | |||
print("New Epoch") | |||
zarr_loaders_and_generators = get_zarr_loaders_and_generators() | |||
whole_background_dict_per_slide = [{} for i in range(len(zarr_loaders_and_generators))] | |||
whole_background_dict = {} | |||
while sum([item is not None for item in zarr_loaders_and_generators]) >= 1: | |||
none_empty_generators = [i for i in range(len(zarr_loaders_and_generators)) if | |||
zarr_loaders_and_generators[i] is not None] | |||
if learn_threshold_and_log_cf_matrix_per_patch: | |||
whole_background_dict = {} | |||
if len(none_empty_generators) >= 6 or not learn_threshold_and_log_cf_matrix_per_patch: | |||
for slide_pick in none_empty_generators: | |||
img_path = image_lists[slide_pick][1] | |||
zarr_loader_mask = zarr_loaders_and_generators[slide_pick][0] | |||
zarr_loader = zarr_loaders_and_generators[slide_pick][1] | |||
frag_generator = zarr_loaders_and_generators[slide_pick][2] | |||
generated_scaled_mask_image = zarr_loaders_and_generators[slide_pick][3] | |||
generated_mask_scale = zarr_loaders_and_generators[slide_pick][4] | |||
group_dict = calculate_acc_and_sensitivity(img_path, | |||
zarr_loader_mask, | |||
zarr_loader, | |||
frag_generator, | |||
generated_scaled_mask_image, | |||
generated_mask_scale, | |||
laplacian_threshold, | |||
slide_patch_size=2000, | |||
save_generated_image=not learn_threshold_and_log_cf_matrix_per_patch) | |||
for i in range(len(zarr_loaders_and_generators)): | |||
if zarr_loaders_and_generators[i]: | |||
generator = check_if_generator_is_empty(zarr_loaders_and_generators[i][2]) | |||
if generator: | |||
zarr_loaders_and_generators[i][2] = generator | |||
else: | |||
zarr_loaders_and_generators[i] = None | |||
for key, value in group_dict.items(): | |||
whole_background_dict[key] = whole_background_dict.get(key, 0) + value | |||
whole_background_dict_per_slide[slide_pick][key] = whole_background_dict_per_slide[ | |||
slide_pick].get(key, 0) + value | |||
if learn_threshold_and_log_cf_matrix_per_patch: | |||
e = .000001 | |||
total_preds = (sum(list(whole_background_dict.values())) + e) | |||
acc = (whole_background_dict["TP"] + whole_background_dict["TN"]) / total_preds | |||
positive_preds = (whole_background_dict["TP"] + whole_background_dict["FP"] + e) | |||
precision = whole_background_dict["TP"] / positive_preds | |||
next_score = score_calculator(acc, precision) | |||
if threshold_score is None: | |||
threshold_score = next_score | |||
else: | |||
threshold_history.append(laplacian_threshold) | |||
score_history.append(next_score) | |||
if next_score > threshold_score: | |||
threshold_score = next_score | |||
laplacian_threshold += threshold_jump_increase * threshold_jump_size | |||
elif next_score <= threshold_score: | |||
threshold_score = next_score | |||
threshold_jump_increase *= -1 | |||
threshold_jump_size *= decay_const | |||
laplacian_threshold += threshold_jump_increase * threshold_jump_size | |||
decay_count += 1 | |||
save_threshold_and_score_chart(threshold_history, score_history) | |||
acc = round(acc, 3) | |||
precision = round(precision, 3) | |||
threshold_score_rounded = round(threshold_score, 3) | |||
print( | |||
f"acc:{acc},precision:{precision},score:{threshold_score_rounded},table:{whole_background_dict}" + | |||
f"thresh:{laplacian_threshold},jump_size:{threshold_jump_size}") | |||
else: | |||
print(f"table:{whole_background_dict},table_per_slide:{whole_background_dict_per_slide}" + | |||
f"threshold:{laplacian_threshold},jump_size:{threshold_jump_size}") | |||
else: | |||
break | |||
return laplacian_threshold | |||
def save_threshold_and_score_chart(threshold_history, score_history): | |||
fig_save_path = "laplacian_threshold_history_chart.jpeg" | |||
plt.plot(range(len(threshold_history)), threshold_history) | |||
plt.xlabel('Batch') | |||
plt.ylabel('Laplacian threshold') | |||
plt.savefig(fig_save_path) | |||
plt.clf() | |||
fig_save_path = "laplacian_threshold_score_history_chart.jpeg" | |||
plt.plot(range(len(score_history)), score_history) | |||
plt.xlabel('Batch') | |||
plt.ylabel('Objective function - Sore') | |||
plt.savefig(fig_save_path) | |||
plt.clf() | |||
if __name__ == '__main__': | |||
image_lists = [ | |||
( # "('0', '100', '0')" | |||
"./TCGA-BJ-A3F0-01A-01-TSA.728CE583-95BE-462B-AFDF-FC0B228DF3DE__3_masked.tiff", | |||
"./TCGA-BJ-A3F0-01A-01-TSA.728CE583-95BE-462B-AFDF-FC0B228DF3DE__3.svs" | |||
), | |||
( # "('0', '100', '0')" | |||
"./TCGA-DJ-A1QG-01A-01-TSA.04c62c21-dd45-49ea-a74f-53822defe097__2000_masked.tiff", | |||
"./TCGA-DJ-A1QG-01A-01-TSA.04c62c21-dd45-49ea-a74f-53822defe097__2000.svs" | |||
), | |||
# ( # "('0', '100', '0')" | |||
# "./TCGA-EL-A3ZQ-01A-01-TS1.344610D2-AB50-41C6-916E-FF0F08940BF1__2000_masked.tiff", | |||
# "./TCGA-EL-A3ZQ-01A-01-TS1.344610D2-AB50-41C6-916E-FF0F08940BF1__2000.svs" | |||
# ), | |||
( # "('45', '55', '0')" | |||
"./TCGA-ET-A39N-01A-01-TSA.C38FCE19-9558-4035-9F0B-AD05B9BE321D___198_masked.tiff", | |||
"./TCGA-ET-A39N-01A-01-TSA.C38FCE19-9558-4035-9F0B-AD05B9BE321D___198.svs" | |||
), | |||
# ( # "('0', '40', '60')" | |||
# "./TCGA-J8-A42S-01A-01-TSA.7B80CBEB-7B85-417E-AA0C-11C79DE40250__0_masked.tiff", | |||
# "./TCGA-J8-A42S-01A-01-TSA.7B80CBEB-7B85-417E-AA0C-11C79DE40250__0.svs" | |||
# ), | |||
( # "('0', '90', '10')" | |||
"./TCGA-ET-A39O-01A-01-TSA.3829C900-7597-4EA9-AFC7-AA238221CE69_7000_masked.tiff", | |||
"./TCGA-ET-A39O-01A-01-TSA.3829C900-7597-4EA9-AFC7-AA238221CE69_7000.svs" | |||
), | |||
( # "('100', '0', '0')" | |||
"./TCGA-EL-A4K7-11A-01-TS1.C08B59AA-87DF-4ABB-8B70-25FEF9893C7F__70_masked.tiff", | |||
"./TCGA-EL-A4K7-11A-01-TS1.C08B59AA-87DF-4ABB-8B70-25FEF9893C7F__70.svs" | |||
), | |||
( # "('100', '0', '0')" | |||
"./TCGA-EL-A3TB-11A-01-TS1.6E0966C9-1552-4B30-9008-8ACF737CA8C3__2000_masked.tiff", | |||
"./TCGA-EL-A3TB-11A-01-TS1.6E0966C9-1552-4B30-9008-8ACF737CA8C3__2000.svs" | |||
), | |||
] | |||
learned_threshold = update_and_find_best_threshold(500, learn_threshold_and_log_cf_matrix_per_patch=True) | |||
update_and_find_best_threshold(learned_threshold, learn_threshold_and_log_cf_matrix_per_patch=False) | |||
# Start with 500 with jump size 120 and decay 0.85 | |||
# table:{'TP': 15018, 'FP': 412, 'TN': 66898, 'FN': 2389}, | |||
# table_per_slide:[ | |||
# {'TP': 460, 'FP': 0, 'TN': 19618, 'FN': 1426}, | |||
# {'TP': 4624, 'FP': 126, 'TN': 14100, 'FN': 226}, | |||
# {'TP': 1138, 'FP': 4, 'TN': 6671, 'FN': 492}, | |||
# {'TP': 7615, 'FP': 92, 'TN': 20871, 'FN': 234}, | |||
# {'TP': 78, 'FP': 18, 'TN': 1880, 'FN': 4}, | |||
# {'TP': 1103, 'FP': 172, 'TN': 3758, 'FN': 7} | |||
# ] | |||
# threshold:298.86314585743395,jump_size:120 |
@@ -0,0 +1,6 @@ | |||
export PYTHONPATH="${PYTHONPATH}:../../../../"; | |||
export PYTHONPATH="${PYTHONPATH}:../../../"; | |||
export PYTHONPATH="${PYTHONPATH}:../../"; | |||
export PYTHONPATH="${PYTHONPATH}:../"; | |||
export PYTHONPATH="${PYTHONPATH}:./"; | |||
python evaluate_image_patcher_and_visualize.py; |
@@ -0,0 +1,4 @@ | |||
export PYTHONPATH="${PYTHONPATH}:../../"; | |||
export PYTHONPATH="${PYTHONPATH}:../"; | |||
export PYTHONPATH="${PYTHONPATH}:./"; | |||
python patcher_distribution.py; |
@@ -0,0 +1,18 @@ | |||
import pathlib | |||
import matplotlib.pyplot as plt | |||
from national_cancer_institute.read_xml_file import get_slide_info_from_bcr_xml | |||
if __name__ == '__main__': | |||
data_dir = "data/" | |||
slide_infos = {} | |||
for xml_path in pathlib.Path(data_dir).glob("**/*.xml"): | |||
slide_infos.update(get_slide_info_from_bcr_xml(str(xml_path))) | |||
cell_percents = [int(item[1]) for item in slide_infos.values() if int(item[2]) == 0] | |||
print("tumor:", len([i for i in cell_percents if i == 100])) | |||
print("normal", len([i for i in cell_percents if i == 0])) | |||
print([i for i in cell_percents if i != 0 and i != 100]) | |||
print(len(cell_percents)) | |||
plt.hist(cell_percents, bins=150) | |||
plt.savefig("tumor_cell_distribution.jpeg") |
@@ -0,0 +1,65 @@ | |||
import concurrent.futures | |||
import os | |||
import pathlib | |||
import matplotlib.pyplot as plt | |||
from tqdm import tqdm | |||
from config import Config | |||
from image_patcher import ImageAndSlidePatcher | |||
def save_patch_distribution(database_path): | |||
def patch_image(image_path): | |||
try: | |||
image_path = str(image_path) | |||
file_name = ImageAndSlidePatcher._get_file_name_from_path(image_path) | |||
slide_id = file_name.split(".")[0] | |||
slide_patch_dir = os.path.join(patch_dir, slide_id) | |||
if ImageAndSlidePatcher._get_extension_from_path(image_path) in [".tiff", ".tif", ".svs"]: | |||
zarr_object = ImageAndSlidePatcher._zarr_loader(image_path) | |||
total_counts = ImageAndSlidePatcher._get_number_of_initial_frags(zarr_object=zarr_object) | |||
else: | |||
jpeg_image = ImageAndSlidePatcher._jpeg_loader(image_path) | |||
jpeg_image = ImageAndSlidePatcher.ask_image_scale_and_rescale(jpeg_image) | |||
total_counts = ImageAndSlidePatcher._get_number_of_initial_frags(zarr_object=jpeg_image) | |||
if os.path.exists(slide_patch_dir): | |||
fragment_id = len([i for i in pathlib.Path(slide_patch_dir).glob("*.jpeg")]) | |||
return fragment_id, total_counts | |||
except Exception as e: | |||
print("error") | |||
print(e) | |||
raise e | |||
res_patch_counts = [] | |||
data_dir = os.path.join(database_path, "data") | |||
patch_dir = os.path.join(database_path, "patches") | |||
with concurrent.futures.ThreadPoolExecutor(max_workers=Config.workers) as executor: | |||
image_paths = pathlib.Path(data_dir).glob("**/*.svs") | |||
image_paths = [i for i in image_paths] | |||
print() | |||
for res in tqdm(executor.map(patch_image, image_paths), total=len(image_paths)): | |||
if res: | |||
frags, total = res | |||
res_patch_counts.append(res) | |||
print(res_patch_counts) | |||
plt.hist([i[0] for i in res_patch_counts], bins=100) | |||
plt.xlabel("Patch per slide") | |||
plt.ylabel("Frequency") | |||
plt.savefig("patch_distribution.jpeg") | |||
plt.clf() | |||
plt.hist([round(i[0] / (i[1] + 0.00001), 5) * 100 for i in res_patch_counts], bins=100) | |||
plt.xlabel("Patch per slide percent") | |||
plt.ylabel("Frequency") | |||
plt.savefig("patch_percent_distribution.jpeg") | |||
plt.clf() | |||
if __name__ == '__main__': | |||
database_directory = "../" | |||
save_patch_distribution(os.path.join(database_directory, "national_cancer_institute")) |
@@ -0,0 +1,26 @@ | |||
from xml.dom import minidom | |||
def get_slide_info_from_bcr_xml(xml_path): | |||
file = minidom.parse(xml_path) | |||
patient = file.childNodes[0].getElementsByTagName("bio:patient")[0] | |||
data_dict = {} | |||
try: | |||
for i in range(10): | |||
percent_tumor_cells = patient.getElementsByTagName("bio:percent_tumor_cells")[i].childNodes[ | |||
0].data.strip() | |||
percent_normal_cells = patient.getElementsByTagName("bio:percent_normal_cells")[i].childNodes[ | |||
0].data.strip() | |||
percent_stormal_cells = patient.getElementsByTagName("bio:percent_stromal_cells")[i].childNodes[ | |||
0].data.strip() | |||
slide_barcode = patient.getElementsByTagName("shared:bcr_slide_barcode")[i].childNodes[0].data.strip() | |||
data_dict[slide_barcode] = (percent_normal_cells, percent_tumor_cells, percent_stormal_cells) | |||
except Exception as e: | |||
pass | |||
return data_dict | |||
if __name__ == '__main__': | |||
path = "../national_cancer_institute/data/1aea8f2a-f809-4f19-bed3-1365e9aab33b/nationwidechildrens.org_biospecimen.TCGA-BJ-A28X.xml" | |||
res = get_slide_info_from_bcr_xml(path) | |||
print(res) |
@@ -0,0 +1,4 @@ | |||
export PYTHONPATH="${PYTHONPATH}:../../"; | |||
export PYTHONPATH="${PYTHONPATH}:../"; | |||
export PYTHONPATH="${PYTHONPATH}:./"; | |||
python cells_chart.py; |
@@ -0,0 +1,5 @@ | |||
export PYTHONPATH="${PYTHONPATH}:../../"; | |||
export PYTHONPATH="${PYTHONPATH}:../"; | |||
export PYTHONPATH="${PYTHONPATH}:../image_patcher"; | |||
export PYTHONPATH="${PYTHONPATH}:./"; | |||
python patch_distribution.py; |
@@ -0,0 +1,29 @@ | |||
1811210076455461803,BENIGN-BTNpap-hyperplZC04-10370x4_0 | |||
7845783054062606488,BENIGN-Dan_Colloid1_0 | |||
7637345021064072354,BENIGN-FTH468BTN_sheets_follicles_1_0 | |||
814792220375115888,BENIGN-GD40T | |||
2888555316355101926,BENIGN-gd-fn20_0 | |||
-8312464544528256569,BENIGN-gd10p_0 | |||
-2254510488499374008,BENIGN-gd20g_0 | |||
2271447961045108683,MALIGNANT-ATC20L_0 | |||
7269337178939971574,MALIGNANT-ATC20b_0 | |||
-9073528708751422131,MALIGNANT-ATC40BB_0 | |||
-8571937763202005072,MALIGNANT-ATC40FC_0 | |||
-8610450256188951874,MALIGNANT-ATC40J_0 | |||
-7854679564093375561,MALIGNANT-ATC40p_0 | |||
-4977783033606377395,MALIGNANT-ATC5A_0 | |||
1342853015262631578,MALIGNANT-Atc63w_0 | |||
431837691807971266,MALIGNANT-C03-54313A_0 | |||
8004646368797684873,MALIGNANT-C03-54313B_0 | |||
8041832687277297518,MALIGNANT-C03-54313C_0 | |||
6449521961463025237,MALIGNANT-C03-54313D_0 | |||
-8688277356782858138,MALIGNANT-C03-54313E_0 | |||
-9068734717515564721,MALIGNANT-C03-54313F_0 | |||
-5752120463987418399,MALIGNANT-C03-54313G_0 | |||
8889530878367993817,MALIGNANT-Ed82C_1_0 | |||
2934951614446666978,"MALIGNANT-Follicular_neoplasm2,_low_power,_confirmed_FVPTC_DQ_SM_0" | |||
-1541914789037593200,"MALIGNANT-Pap_CA,_excellent_inclusion,_DQ_0" | |||
-1717557305576530323,"MALIGNANT-Pap_CA,_powdery_nuclei,_small_nucleoli_and_small_inclusion,_PAP_hp_0" | |||
947862089311894766,"MALIGNANT-Papillary_CA,_giant_cell,_DQ_lp_0" | |||
-8520432463383972777,"MALIGNANT-Papillary_CA,_giant_cell_and_tumor_sheets,_PAP_lp2_0" | |||
-4759582710547943524,"MALIGNANT-Papillary_CA,_high_cellularity_3,_PAP_lp_0" |
@@ -0,0 +1,13 @@ | |||
import os | |||
import shutil | |||
if __name__ == '__main__': | |||
duplicate_info_file_path = "duplicate_image.txt" | |||
with open(duplicate_info_file_path, "r") as file: | |||
for line in file.readlines(): | |||
folder_id = line.split(",")[0] | |||
folder_path = os.path.join("./patches", folder_id) | |||
if os.path.exists(folder_path): | |||
shutil.rmtree(folder_path) | |||
print("deleted") | |||
else: | |||
print("no") |
@@ -0,0 +1,64 @@ | |||
import json | |||
from urllib.parse import urlparse | |||
from urllib.request import urlretrieve | |||
import requests | |||
from bs4 import BeautifulSoup | |||
from database_crawlers.web_stain_sample import WebStainImage, StainType | |||
class StanfordTissueMicroArrayStainSample(WebStainImage): | |||
def __init__(self, database_name, image_id, image_web_label, report, stain_type, is_wsi): | |||
super().__init__(database_name, image_id, image_web_label, report, stain_type, is_wsi) | |||
def get_slide_view_url(self): | |||
return f"https://storage.googleapis.com/jpg.tma.im/{self.image_id}" | |||
def get_file_name(self): | |||
image_raw_id = self.image_id.replace("/", "_") | |||
image_raw_id = ".".join(image_raw_id.split(".")[:len(image_raw_id.split(".")) - 1]) | |||
return self.save_path + image_raw_id | |||
def get_relative_image_path(self): | |||
return self.get_file_name() + ".jpeg" | |||
def get_relative_json_path(self): | |||
return self.get_file_name() + ".json" | |||
def crawl_image_save_jpeg(self): | |||
urlretrieve(self.get_slide_view_url(), self.get_relative_image_path()) | |||
json_object = json.dumps(self.to_json()) | |||
with open(self.get_relative_json_path(), "w") as outfile: | |||
outfile.write(json_object) | |||
class StanfordTissueMicroArraySlideProvider: | |||
page_link = "https://tma.im/cgi-bin/selectImages.pl?organ=thyroid" | |||
database_name = "StanfordTissueMicroArray" | |||
stain_type = StainType.UNKNOWN | |||
is_wsi = False | |||
@classmethod | |||
def get_web_stain_samples(cls): | |||
payload = {'250 small images': '250 small images'} | |||
files = [] | |||
headers = { | |||
'Cookie': 'DAD_ATTEMPTS=0; DAD_SID=36d77eb69e009b1cf1ebc9c3d7866546; DAD_USERID=WORLD' | |||
} | |||
html_text = requests.post(cls.page_link, files=files, headers=headers, data=payload).content.decode("utf-8") | |||
soup = BeautifulSoup(html_text, 'html.parser') | |||
search_results = soup.find_all("div", {"class": "iDiv0", "style": "width: 86px; height: 260px;"}) | |||
for result_item in search_results: | |||
image_url = result_item.find("a", {"target": "_blank"}).attrs['href'] | |||
image_id = "/".join(urlparse(image_url).path.strip("/").split("/")[1:]) | |||
image_web_label = list(result_item.find_all("p", {"class": "iDiv1"}))[-2].text | |||
yield StanfordTissueMicroArrayStainSample(cls.database_name, image_id, image_web_label, None, | |||
cls.stain_type, cls.is_wsi) | |||
if __name__ == '__main__': | |||
for slide in StanfordTissueMicroArraySlideProvider.get_web_stain_samples(): | |||
print(slide.image_id, slide.image_web_label, slide.get_slide_view_url()) | |||
slide.crawl_image_save_jpeg() |
@@ -0,0 +1,87 @@ | |||
import concurrent.futures | |||
import concurrent.futures | |||
import time | |||
from urllib.error import HTTPError | |||
from urllib.request import urlretrieve | |||
from torch.utils.data import IterableDataset | |||
from tqdm import tqdm | |||
from config import Config | |||
def find_in_log_n(start, end, func, bias=0.3): | |||
if end - start <= 1: | |||
return start | |||
mid = int(start * (1 - bias) + end * bias) | |||
if start == mid: | |||
mid += 1 | |||
if func(mid): | |||
return find_in_log_n(mid, end, func) | |||
else: | |||
return find_in_log_n(start, mid, func) | |||
def fetch_tile_content(tile_url, retry=15): | |||
for i in range(retry): | |||
try: | |||
image_path = urlretrieve(tile_url)[0] | |||
with open(image_path, "rb") as file: | |||
return file.read() | |||
except Exception as e: | |||
print("e", end="|") | |||
time.sleep(2 ** (0.3 * (i + 1))) | |||
if i == retry - 1: | |||
if input("continue") == "y": | |||
return fetch_tile_content(tile_url, retry) | |||
raise e | |||
raise HTTPError("Not able for fetch image tile", code=500, msg="", hdrs={}, fp=None) | |||
def download_urls_in_thread(url_and_index_list): | |||
def download(args): | |||
url, index = args | |||
file_content = fetch_tile_content(url) | |||
return file_content, index | |||
with concurrent.futures.ThreadPoolExecutor(max_workers=Config.workers) as executor: | |||
for tile, i in tqdm(executor.map(download, url_and_index_list), total=len(url_and_index_list)): | |||
yield tile, i | |||
def _get_alignment_sore_and_percent(seq1, seq2, match_score=2, mismatch_score=-1, gap_score=-1): | |||
from alignment.sequence import Sequence | |||
from alignment.sequencealigner import SimpleScoring, GlobalSequenceAligner | |||
from alignment.vocabulary import Vocabulary | |||
a = Sequence(seq1) | |||
b = Sequence(seq2) | |||
v = Vocabulary() | |||
aEncoded = v.encodeSequence(a) | |||
bEncoded = v.encodeSequence(b) | |||
scoring = SimpleScoring(match_score, mismatch_score) | |||
aligner = GlobalSequenceAligner(scoring, gap_score) | |||
score = aligner.align(aEncoded, bEncoded, backtrace=False) | |||
return score | |||
def get_normalized_score(seq1, seq2): | |||
score = _get_alignment_sore_and_percent(seq1, seq2) | |||
return score / (len(seq2) + len(seq1)) | |||
class DatasetWithGenerator(IterableDataset): | |||
def __init__(self, generator): | |||
self.generator = generator | |||
def __iter__(self): | |||
return self.generator | |||
if __name__ == '__main__': | |||
import math | |||
print(math.log2(1000 * 1000)) | |||
print(find_in_log_n(0, 100, lambda x: x <= 76)) |
@@ -0,0 +1,296 @@ | |||
import enum | |||
import json | |||
import time | |||
from io import BytesIO | |||
from urllib.request import Request, urlopen | |||
import cv2 | |||
import numpy as np | |||
from PIL import Image | |||
from tifffile import TiffWriter | |||
from database_crawlers.utils import find_in_log_n, fetch_tile_content, download_urls_in_thread | |||
class StainType(enum.Enum): | |||
H_AND_E = 0, "H&E" | |||
UNKNOWN = 1, "UNKNOWN" | |||
class ThyroidCancerLevel(enum.Enum): | |||
UNKNOWN = -1, "UNKNOWN" | |||
MALIGNANT = 0, "MALIGNANT" | |||
BENIGN = 1, "BENIGN" | |||
@staticmethod | |||
def get_thyroid_level_from_diagnosis_label(label: str): | |||
label = label.lower() | |||
if "malignant" in label: | |||
return ThyroidCancerLevel.MALIGNANT | |||
elif "benign" in label: | |||
return ThyroidCancerLevel.BENIGN | |||
else: | |||
return ThyroidCancerLevel.UNKNOWN | |||
class ThyroidType(enum.Enum): | |||
UNKNOWN = -1, "UNKNOWN" | |||
NORMAL = 0, "NORMAL" | |||
PAPILLARY_CARCINOMA = 1, "PAPILLARY_CARCINOMA" | |||
GRAVES_DISEASE = 2, "GRAVES_DISEASE" | |||
NODULAR_GOITER = 3, "NODULAR_GOITER" | |||
HASHIMOTO_THYROIDITIS = 4, "HASHIMOTO_THYROIDITIS" | |||
FOLLICULAR_CARCINOMA = 5, "FOLLICULAR_CARCINOMA" | |||
FOLLICULAR_ADENOMA = 6, "FOLLICULAR_ADENOMA" | |||
COLLOID_GOITER = 7, "COLLOID_GOITER" | |||
@staticmethod | |||
def get_thyroid_type_from_diagnosis_label(label: str): | |||
label = label.lower() | |||
if "normal" in label: | |||
return ThyroidType.NORMAL | |||
elif "papillary" in label: | |||
return ThyroidType.PAPILLARY_CARCINOMA | |||
elif "grave" in label: | |||
return ThyroidType.GRAVES_DISEASE | |||
elif "nodular" in label and "goiter" in label: | |||
return ThyroidType.NODULAR_GOITER | |||
elif "hashimoto" in label: | |||
return ThyroidType.HASHIMOTO_THYROIDITIS | |||
elif "follicular" in label: | |||
if "adenoma" in label: | |||
return ThyroidType.FOLLICULAR_ADENOMA | |||
else: | |||
return ThyroidType.FOLLICULAR_CARCINOMA | |||
elif "colloid" in label and "goiter" in label: | |||
return ThyroidType.COLLOID_GOITER | |||
else: | |||
return ThyroidType.UNKNOWN | |||
class WebStainImage: | |||
save_path = "data/" | |||
def __init__(self, database_name, image_id, image_web_label, report, stain_type, is_wsi): | |||
self.database_name = database_name | |||
self.image_id = image_id | |||
self.image_web_label = image_web_label | |||
self.report = report | |||
self.stain_type = stain_type | |||
self.is_wsi = is_wsi | |||
def to_json(self): | |||
return {"database_name": self.database_name, | |||
"image_id": self.image_id, | |||
"image_web_label": self.image_web_label, | |||
"image_class_label": self.image_class_label, | |||
"report": self.report, | |||
"stain_type": self.stain_type.value[1], | |||
"is_wsi": self.is_wsi} | |||
@staticmethod | |||
def sorted_json_keys(): | |||
return ["database_name", | |||
"image_id", | |||
"image_web_label", | |||
"image_class_label", | |||
"report", | |||
"stain_type", | |||
"is_wsi"] | |||
@property | |||
def image_class_label(self): | |||
return ThyroidType.get_thyroid_type_from_diagnosis_label(self.image_web_label).value[1] | |||
def get_slide_view_url(self): | |||
raise NotImplemented("get_slide_view_url") | |||
def crawl_image_save_jpeg_and_json(self): | |||
raise NotImplemented("crawl_image_get_jpeg") | |||
def _get_file_path_name(self): | |||
return self.save_path + self.image_id | |||
def _get_relative_image_path(self): | |||
return self._get_file_path_name() + ".jpeg" | |||
def _get_relative_tiff_image_path(self): | |||
return self._get_file_path_name() + ".tiff" | |||
def _get_relative_json_path(self): | |||
return self._get_file_path_name() + ".json" | |||
def _save_json_file(self): | |||
json_object = json.dumps(self.to_json()) | |||
with open(self._get_relative_json_path(), "w") as outfile: | |||
outfile.write(json_object) | |||
class WebStainWSI(WebStainImage): | |||
def __init__(self, database_name, image_id, image_web_label, report, stain_type, is_wsi): | |||
super().__init__(database_name, image_id, image_web_label, report, stain_type, is_wsi) | |||
def _get_tile_url(self, zoom, partition=None, i=None, j=None): | |||
raise NotImplemented("_get_tile_url") | |||
def _generate_tile_urls(self): | |||
raise NotImplemented("generate tile urls") | |||
def find_best_zoom(self): | |||
return 0 | |||
def _find_first_tile_width(self): | |||
image_content = fetch_tile_content(self._get_tile_url(self.find_best_zoom(), partition=0, i=0, j=0)) | |||
img = Image.open(BytesIO(image_content)) | |||
return img.size[0], img.size[1] | |||
def _fetch_all_tiles(self): | |||
batch = [] | |||
index = 0 | |||
for url in self._generate_tile_urls(): | |||
batch.append((url, index)) | |||
# DONE | |||
index += 1 | |||
# download last batch | |||
if len(batch) != 0: | |||
for content, downloaded_index in download_urls_in_thread(batch): | |||
yield content, downloaded_index | |||
print("Slide download tiles done!!!") | |||
def crawl_image_save_jpeg_and_json(self): | |||
raise NotImplemented("crawl_image_save_jpeg_and_json") | |||
class WebStainWSIOneDIndex(WebStainWSI): | |||
def __init__(self, database_name, image_id, image_web_label, report, stain_type, is_wsi): | |||
super().__init__(database_name, image_id, image_web_label, report, stain_type, is_wsi) | |||
self.last_partition = None | |||
def _find_last_partition(self): | |||
print("Finding last partition: ", end="") | |||
def func(partition, retry=3): | |||
print(partition, end="") | |||
for i in range(retry): | |||
try: | |||
request = Request(self._get_tile_url(self.find_best_zoom(), partition=partition), method='HEAD') | |||
resp = urlopen(request) | |||
headers = resp.info() | |||
print("<", end=", ") | |||
return True | |||
except Exception as e: | |||
print("e", end="") | |||
time.sleep(2 ** (0.1 * (i + 1))) | |||
print(">", end=", ") | |||
return False | |||
return find_in_log_n(0, 1000 * 1000, func) | |||
def _generate_tile_urls(self): | |||
for partition in range(self.last_partition + 1): | |||
yield self._get_tile_url(self.find_best_zoom(), partition=partition) | |||
def crawl_image_save_jpeg_and_json(self): | |||
def generator(): | |||
while True: | |||
if first_temp_rows: | |||
yield first_temp_rows[0] | |||
del first_temp_rows[0] | |||
else: | |||
res = next(content_fetcher, -1) | |||
if res == -1: | |||
break | |||
img = cv2.imdecode(np.frombuffer(res[0], np.uint8), -1) | |||
if len(img.shape) == 2: | |||
img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR) | |||
yield img | |||
first_image_width, first_image_height = self._find_first_tile_width() | |||
first_temp_rows = [] | |||
column_tiles, row_tiles = None, None | |||
self.last_partition = self._find_last_partition() | |||
content_fetcher = self._fetch_all_tiles() | |||
with TiffWriter(self._get_relative_tiff_image_path(), bigtiff=True) as tif: | |||
while column_tiles is None: | |||
content, index = content_fetcher.__next__() | |||
image_array = cv2.imdecode(np.frombuffer(content, np.uint8), cv2.IMREAD_COLOR) | |||
first_temp_rows.append(image_array) | |||
if image_array.shape[1] != first_image_width: | |||
column_tiles = index + 1 | |||
row_tiles = (self.last_partition + 1) // column_tiles | |||
shape = (first_image_height * row_tiles, first_image_width * column_tiles, 3) | |||
tif.write(generator(), subfiletype=1, tile=(first_image_height, first_image_width), shape=shape, | |||
dtype=np.uint8, | |||
compression='JPEG', # TODO | |||
photometric='rgb') | |||
""" | |||
Save json file | |||
""" | |||
self._save_json_file() | |||
class WebStainWSITwoDIndex(WebStainWSI): | |||
def __init__(self, database_name, image_id, image_web_label, report, stain_type, is_wsi): | |||
super().__init__(database_name, image_id, image_web_label, report, stain_type, is_wsi) | |||
self.last_i = None | |||
self.last_j = None | |||
def _generate_tile_urls(self): | |||
for j in range(self.last_j + 1): | |||
for i in range(self.last_i + 1): | |||
yield self._get_tile_url(self.find_best_zoom(), i=i, j=j) | |||
def _find_last_i_and_j(self): | |||
def func(i, j, retry=3): | |||
print(f"{i}-{j}", end="") | |||
for r in range(retry): | |||
try: | |||
request = Request(self._get_tile_url(self.find_best_zoom(), i=i, j=j), method='HEAD') | |||
resp = urlopen(request) | |||
headers = resp.info() | |||
print("<", end=", ") | |||
return True | |||
except Exception as e: | |||
print("e", end="") | |||
time.sleep(2 ** (0.1 * (r + 1))) | |||
print(">", end=", ") | |||
return False | |||
print("Finding last i: ", end="") | |||
i_func = lambda i: func(i=i, j=0) | |||
last_i = find_in_log_n(0, 1000, i_func) | |||
print("\nFinding last j: ") | |||
j_func = lambda j: func(i=0, j=j) | |||
last_j = find_in_log_n(0, 1000, j_func) | |||
return last_i, last_j | |||
def crawl_image_save_jpeg_and_json(self): | |||
def generator(): | |||
while True: | |||
res = next(content_fetcher, -1) | |||
if res == -1: | |||
break | |||
res = cv2.imdecode(np.frombuffer(res[0], np.uint8), -1) | |||
if max(res.shape) >= 260: | |||
raise Exception(f"warning shape: {res.shape}") | |||
res = cv2.resize(res, (min(res.shape[1], 256), min(res.shape[0], 256))) | |||
yield res | |||
first_image_width = 256 | |||
first_image_height = 256 | |||
self.last_i, self.last_j = self._find_last_i_and_j() | |||
content_fetcher = self._fetch_all_tiles() | |||
with TiffWriter(self._get_relative_tiff_image_path(), bigtiff=True) as tif: | |||
shape = (first_image_height * (self.last_j + 1), first_image_width * (self.last_i + 1), 3) | |||
tif.write(generator(), subfiletype=1, | |||
tile=(first_image_height, first_image_width), | |||
shape=shape, | |||
dtype=np.uint8, | |||
compression='JPEG', # TODO | |||
photometric='rgb') | |||
""" | |||
Save json file | |||
""" | |||
self._save_json_file() |
@@ -0,0 +1,20 @@ | |||
from PIL import Image | |||
import zarr | |||
import tifffile | |||
def convert_tif_to_jpeg(): | |||
input_address = "data/test/1672.tiff" | |||
# outfile = "data/test/out.jpeg" | |||
outfile = "data/test/out.zarr" | |||
image_zarr = tifffile.imread(input_address, aszarr=True, key=0) | |||
zarr_image = zarr.open(image_zarr, mode='r') | |||
zarr.save(outfile, zarr_image) | |||
## RAM PROBLEM | |||
# im = Image.open() | |||
# out = im.convert("RGB") | |||
# out.save(outfile, "JPEG", quality=90) | |||
if __name__ == '__main__': | |||
Image.MAX_IMAGE_PIXELS = 1000 * 1000 * 256 * 256 | |||
convert_tif_to_jpeg() |
@@ -0,0 +1,50 @@ | |||
# import libtiff | |||
# import pytiff | |||
import cv2 | |||
import tifffile | |||
def show_tif_image(address, name, key=0, w_from=0, h_from=0, size=700, whole_image=False): | |||
import zarr | |||
image_zarr = tifffile.imread(address, aszarr=True, key=key) | |||
zarr = zarr.open(image_zarr, mode='r') | |||
if not whole_image: | |||
image_frag = zarr[w_from:min(w_from + size, zarr.shape[0]), h_from:min(h_from + size, zarr.shape[1])] | |||
else: | |||
image_frag = zarr[0:zarr.shape[0], 0:zarr.shape[1]] | |||
cv2.imshow(f"name:{name} - shape:{image_frag.shape} - page:{key}", image_frag) | |||
print(f"name: {name}, shape: {zarr.shape}") | |||
image_zarr.close() | |||
def show_CAMELYON16_sample_view(): | |||
# show_tif_image('data/CAMELYON16/tumor_084.tif', "CAMELYON16", key=7) | |||
show_tif_image('data/CAMELYON16/tumor_084.tif', "CAMELYON16", key=0, w_from=10000, h_from=50000) | |||
def show_CAMELYON17_sample_view(): | |||
show_tif_image('data/CAMELYON17/patient_083_node_4.tif', "CAMELYON17", key=7) | |||
def show_Papsociety_sample_view(): | |||
image_frag = cv2.imread( | |||
'data/Papsociety/Follicular_neoplasm2,_low_power,_confirmed_FVPTC_DQ_SM.jpg') | |||
cv2.imshow(f"Papsociety - {image_frag.shape}", image_frag) | |||
def show_test(name, ): | |||
# show_tif_image('data/CAMELYON16/tumor_084.tif', "CAMELYON16", key=7) | |||
show_tif_image('data/test/1272.tiff', name, key=0, w_from=1300, h_from=0, size=1000) | |||
if __name__ == '__main__': | |||
# show_CAMELYON16_sample_view() | |||
# show_CAMELYON17_sample_view() | |||
# show_Papsociety_sample_view() | |||
show_tif_image('data/test/1272.tiff', "1", key=0, w_from=1000, h_from=100, size=1000) | |||
show_tif_image('data/test/1272.tiff', "2", key=0, w_from=1000, h_from=1000, size=1000) | |||
while True: | |||
if cv2.waitKey(1) == ord('q'): | |||
break |
@@ -0,0 +1,180 @@ | |||
absl-py==1.0.0 | |||
aiohttp==3.8.1 | |||
aiosignal==1.2.0 | |||
alignment==1.0.10 | |||
appdirs==1.4.4 | |||
argon2-cffi==20.1.0 | |||
asgiref==3.2.10 | |||
astunparse==1.6.3 | |||
async-generator==1.10 | |||
async-timeout==4.0.2 | |||
attrs==21.2.0 | |||
backcall==0.2.0 | |||
bleach==3.3.0 | |||
blis==0.7.5 | |||
cachetools==4.2.4 | |||
catalogue==2.0.6 | |||
certifi==2021.10.8 | |||
cffi==1.14.5 | |||
charset-normalizer==2.0.8 | |||
click==8.0.3 | |||
colorama==0.4.4 | |||
convertapi==1.4.0 | |||
cryptography==3.4.7 | |||
cycler==0.11.0 | |||
cymem==2.0.6 | |||
Cython==0.29.23 | |||
decorator==5.0.9 | |||
defusedxml==0.7.1 | |||
distlib==0.3.2 | |||
dj-database-url==0.5.0 | |||
Django==3.1.2 | |||
django-crispy-forms==1.9.2 | |||
django-heroku==0.3.1 | |||
django-rest==0.8.7 | |||
djangorestframework==3.13.1 | |||
djangorestframework-simplejwt==5.0.0 | |||
entrypoints==0.3 | |||
et-xmlfile==1.1.0 | |||
factory-boy==3.2.1 | |||
Faker==12.3.0 | |||
filelock==3.0.12 | |||
flatbuffers==2.0 | |||
fonttools==4.28.2 | |||
frozenlist==1.3.0 | |||
gast==0.4.0 | |||
gensim==4.1.2 | |||
google-auth==2.3.3 | |||
google-auth-oauthlib==0.4.6 | |||
google-pasta==0.2.0 | |||
grpcio==1.42.0 | |||
gunicorn==20.0.4 | |||
h5py==3.6.0 | |||
hazm==0.7.0 | |||
huggingface-hub==0.6.0 | |||
idna==3.3 | |||
importlib-metadata==4.8.2 | |||
ipykernel==5.5.5 | |||
ipython==7.24.1 | |||
ipython-genutils==0.2.0 | |||
ipywidgets==7.6.3 | |||
jedi==0.18.0 | |||
Jinja2==3.0.1 | |||
joblib==1.0.1 | |||
jsonschema==3.2.0 | |||
jupyter==1.0.0 | |||
jupyter-client==6.1.12 | |||
jupyter-console==6.4.0 | |||
jupyter-core==4.7.1 | |||
jupyterlab-pygments==0.1.2 | |||
jupyterlab-widgets==1.0.0 | |||
keras==2.7.0 | |||
Keras-Preprocessing==1.1.2 | |||
kiwisolver==1.3.2 | |||
langcodes==3.3.0 | |||
libclang==12.0.0 | |||
libtiff==0.4.2 | |||
Markdown==3.3.6 | |||
MarkupSafe==2.0.1 | |||
matplotlib==3.5.0 | |||
matplotlib-inline==0.1.2 | |||
mistune==0.8.4 | |||
multidict==6.0.2 | |||
murmurhash==1.0.6 | |||
nbclient==0.5.3 | |||
nbconvert==6.0.7 | |||
nbformat==5.1.3 | |||
nest-asyncio==1.5.1 | |||
nltk==3.3 | |||
notebook==6.4.0 | |||
numpy==1.20.3 | |||
oauthlib==3.1.1 | |||
opencv-python==4.5.2.54 | |||
openpyxl==3.0.7 | |||
opt-einsum==3.3.0 | |||
packaging==20.9 | |||
pandas==1.2.4 | |||
pandocfilters==1.4.3 | |||
parso==0.8.2 | |||
pathy==0.6.1 | |||
pickleshare==0.7.5 | |||
Pillow==8.4.0 | |||
preshed==3.0.6 | |||
prometheus-client==0.11.0 | |||
prompt-toolkit==3.0.18 | |||
protobuf==3.19.1 | |||
psycopg2==2.8.6 | |||
pyaes==1.6.1 | |||
pyaml==21.10.1 | |||
pyasn1==0.4.8 | |||
pyasn1-modules==0.2.8 | |||
pycparser==2.20 | |||
pydantic==1.8.2 | |||
Pygments==2.9.0 | |||
PyJWT==2.3.0 | |||
pynput==1.7.5 | |||
pyOpenSSL==20.0.1 | |||
pyparsing==2.4.7 | |||
pyrsistent==0.17.3 | |||
pyTelegramBotAPI==4.4.0 | |||
python-dateutil==2.8.1 | |||
python-decouple==3.6 | |||
pytz==2020.1 | |||
PyWavelets==1.1.1 | |||
pywin32==301 | |||
pywinpty==1.1.1 | |||
PyYAML==6.0 | |||
pyzmq==22.1.0 | |||
qtconsole==5.1.0 | |||
QtPy==1.9.0 | |||
regex==2022.4.24 | |||
requests==2.26.0 | |||
requests-oauthlib==1.3.0 | |||
rsa==4.8 | |||
scikit-learn==0.24.2 | |||
scipy==1.7.0 | |||
Send2Trash==1.5.0 | |||
setuptools-scm==6.3.2 | |||
six==1.16.0 | |||
sklearn==0.0 | |||
smart-open==5.2.1 | |||
spacy==3.2.1 | |||
spacy-legacy==3.0.8 | |||
spacy-loggers==1.0.1 | |||
sqlparse==0.4.1 | |||
srsly==2.4.2 | |||
Telethon==1.24.0 | |||
tensorboard==2.7.0 | |||
tensorboard-data-server==0.6.1 | |||
tensorboard-plugin-wit==1.8.0 | |||
tensorflow==2.7.0 | |||
tensorflow-estimator==2.7.0 | |||
tensorflow-io-gcs-filesystem==0.22.0 | |||
termcolor==1.1.0 | |||
terminado==0.10.0 | |||
testpath==0.5.0 | |||
thinc==8.0.13 | |||
threadpoolctl==2.2.0 | |||
tokenizers==0.12.1 | |||
tomli==1.2.2 | |||
torch==1.10.1 | |||
torchtext==0.11.1 | |||
torchvision==0.11.1 | |||
tornado==6.1 | |||
tqdm==4.62.3 | |||
traitlets==5.0.5 | |||
transformers==4.19.2 | |||
typer==0.4.0 | |||
typing_extensions==4.0.1 | |||
urllib3==1.26.7 | |||
virtualenv==20.4.7 | |||
wasabi==0.9.0 | |||
wcwidth==0.2.5 | |||
webencodings==0.5.1 | |||
Werkzeug==2.0.2 | |||
whitenoise==5.2.0 | |||
widgetsnbextension==3.5.1 | |||
wrapt==1.13.3 | |||
xlrd==2.0.1 | |||
yarl==1.7.2 | |||
zipp==3.6.0 |
@@ -0,0 +1,21 @@ | |||
import itertools | |||
import cv2 | |||
def show_and_wait(img, name="img", wait=True, save=False): | |||
cv2.imshow(name, img) | |||
if wait: | |||
while cv2.waitKey() != ord('q'): | |||
continue | |||
cv2.destroyAllWindows() | |||
if save: | |||
cv2.imwrite(f"{name}.jpeg", img) | |||
def check_if_generator_is_empty(generator): | |||
try: | |||
first = next(generator) | |||
except StopIteration: | |||
return None | |||
return itertools.chain([first], generator) |