| @@ -0,0 +1,145 @@ | |||
| # Custom | |||
| *.svs | |||
| *.xml | |||
| *.csv | |||
| *.xcf | |||
| *.zip | |||
| *.json | |||
| *.state | |||
| *.tiff | |||
| *.tif | |||
| .idea | |||
| *.jpeg | |||
| *.jpg | |||
| **/data/ | |||
| **/patches/ | |||
| classification_stuff/Transfer-Learning-Library | |||
| # Byte-compiled / optimized / DLL files | |||
| __pycache__/ | |||
| *.py[cod] | |||
| *$py.class | |||
| # C extensions | |||
| *.so | |||
| # Distribution / packaging | |||
| .Python | |||
| build/ | |||
| develop-eggs/ | |||
| dist/ | |||
| downloads/ | |||
| eggs/ | |||
| .eggs/ | |||
| lib/ | |||
| lib64/ | |||
| parts/ | |||
| sdist/ | |||
| var/ | |||
| wheels/ | |||
| pip-wheel-metadata/ | |||
| share/python-wheels/ | |||
| *.egg-info/ | |||
| .installed.cfg | |||
| *.egg | |||
| MANIFEST | |||
| # PyInstaller | |||
| # Usually these files are written by a python script from a template | |||
| # before PyInstaller builds the exe, so as to inject date/other infos into it. | |||
| *.manifest | |||
| *.spec | |||
| # Installer logs | |||
| pip-log.txt | |||
| pip-delete-this-directory.txt | |||
| # Unit test / coverage reports | |||
| htmlcov/ | |||
| .tox/ | |||
| .nox/ | |||
| .coverage | |||
| .coverage.* | |||
| .cache | |||
| nosetests.xml | |||
| coverage.xml | |||
| *.cover | |||
| *.py,cover | |||
| .hypothesis/ | |||
| .pytest_cache/ | |||
| # Translations | |||
| *.mo | |||
| *.pot | |||
| # Django stuff: | |||
| *.log | |||
| local_settings.py | |||
| db.sqlite3 | |||
| db.sqlite3-journal | |||
| # Flask stuff: | |||
| instance/ | |||
| .webassets-cache | |||
| # Scrapy stuff: | |||
| .scrapy | |||
| # Sphinx documentation | |||
| docs/_build/ | |||
| # PyBuilder | |||
| target/ | |||
| # Jupyter Notebook | |||
| .ipynb_checkpoints | |||
| # IPython | |||
| profile_default/ | |||
| ipython_config.py | |||
| # pyenv | |||
| .python-version | |||
| # pipenv | |||
| # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. | |||
| # However, in case of collaboration, if having platform-specific dependencies or dependencies | |||
| # having no cross-platform support, pipenv may install dependencies that don't work, or not | |||
| # install all needed dependencies. | |||
| #Pipfile.lock | |||
| # PEP 582; used by e.g. github.com/David-OConnor/pyflow | |||
| __pypackages__/ | |||
| # Celery stuff | |||
| celerybeat-schedule | |||
| celerybeat.pid | |||
| # SageMath parsed files | |||
| *.sage.py | |||
| # Environments | |||
| .env | |||
| .venv | |||
| env/ | |||
| venv/ | |||
| ENV/ | |||
| env.bak/ | |||
| venv.bak/ | |||
| # Spyder project settings | |||
| .spyderproject | |||
| .spyproject | |||
| # Rope project settings | |||
| .ropeproject | |||
| # mkdocs documentation | |||
| /site | |||
| # mypy | |||
| .mypy_cache/ | |||
| .dmypy.json | |||
| dmypy.json | |||
| # Pyre type checker | |||
| .pyre/ | |||
| @@ -0,0 +1,39 @@ | |||
| -----BEGIN CERTIFICATE----- | |||
| MIIG5jCCBc6gAwIBAgIQAze5KDR8YKauxa2xIX84YDANBgkqhkiG9w0BAQUFADBs | |||
| MQswCQYDVQQGEwJVUzEVMBMGA1UEChMMRGlnaUNlcnQgSW5jMRkwFwYDVQQLExB3 | |||
| d3cuZGlnaWNlcnQuY29tMSswKQYDVQQDEyJEaWdpQ2VydCBIaWdoIEFzc3VyYW5j | |||
| ZSBFViBSb290IENBMB4XDTA3MTEwOTEyMDAwMFoXDTIxMTExMDAwMDAwMFowaTEL | |||
| MAkGA1UEBhMCVVMxFTATBgNVBAoTDERpZ2lDZXJ0IEluYzEZMBcGA1UECxMQd3d3 | |||
| LmRpZ2ljZXJ0LmNvbTEoMCYGA1UEAxMfRGlnaUNlcnQgSGlnaCBBc3N1cmFuY2Ug | |||
| RVYgQ0EtMTCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEBAPOWYth1bhn/ | |||
| PzR8SU8xfg0ETpmB4rOFVZEwscCvcLssqOcYqj9495BoUoYBiJfiOwZlkKq9ZXbC | |||
| 7L4QWzd4g2B1Rca9dKq2n6Q6AVAXxDlpufFP74LByvNK28yeUE9NQKM6kOeGZrzw | |||
| PnYoTNF1gJ5qNRQ1A57bDIzCKK1Qss72kaPDpQpYSfZ1RGy6+c7pqzoC4E3zrOJ6 | |||
| 4GAiBTyC01Li85xH+DvYskuTVkq/cKs+6WjIHY9YHSpNXic9rQpZL1oRIEDZaARo | |||
| LfTAhAsKG3jf7RpY3PtBWm1r8u0c7lwytlzs16YDMqbo3rcoJ1mIgP97rYlY1R4U | |||
| pPKwcNSgPqcCAwEAAaOCA4UwggOBMA4GA1UdDwEB/wQEAwIBhjA7BgNVHSUENDAy | |||
| BggrBgEFBQcDAQYIKwYBBQUHAwIGCCsGAQUFBwMDBggrBgEFBQcDBAYIKwYBBQUH | |||
| AwgwggHEBgNVHSAEggG7MIIBtzCCAbMGCWCGSAGG/WwCATCCAaQwOgYIKwYBBQUH | |||
| AgEWLmh0dHA6Ly93d3cuZGlnaWNlcnQuY29tL3NzbC1jcHMtcmVwb3NpdG9yeS5o | |||
| dG0wggFkBggrBgEFBQcCAjCCAVYeggFSAEEAbgB5ACAAdQBzAGUAIABvAGYAIAB0 | |||
| AGgAaQBzACAAQwBlAHIAdABpAGYAaQBjAGEAdABlACAAYwBvAG4AcwB0AGkAdAB1 | |||
| AHQAZQBzACAAYQBjAGMAZQBwAHQAYQBuAGMAZQAgAG8AZgAgAHQAaABlACAARABp | |||
| AGcAaQBDAGUAcgB0ACAARQBWACAAQwBQAFMAIABhAG4AZAAgAHQAaABlACAAUgBl | |||
| AGwAeQBpAG4AZwAgAFAAYQByAHQAeQAgAEEAZwByAGUAZQBtAGUAbgB0ACAAdwBo | |||
| AGkAYwBoACAAbABpAG0AaQB0ACAAbABpAGEAYgBpAGwAaQB0AHkAIABhAG4AZAAg | |||
| AGEAcgBlACAAaQBuAGMAbwByAHAAbwByAGEAdABlAGQAIABoAGUAcgBlAGkAbgAg | |||
| AGIAeQAgAHIAZQBmAGUAcgBlAG4AYwBlAC4wEgYDVR0TAQH/BAgwBgEB/wIBADCB | |||
| gwYIKwYBBQUHAQEEdzB1MCQGCCsGAQUFBzABhhhodHRwOi8vb2NzcC5kaWdpY2Vy | |||
| dC5jb20wTQYIKwYBBQUHMAKGQWh0dHA6Ly93d3cuZGlnaWNlcnQuY29tL0NBQ2Vy | |||
| dHMvRGlnaUNlcnRIaWdoQXNzdXJhbmNlRVZSb290Q0EuY3J0MIGPBgNVHR8EgYcw | |||
| gYQwQKA+oDyGOmh0dHA6Ly9jcmwzLmRpZ2ljZXJ0LmNvbS9EaWdpQ2VydEhpZ2hB | |||
| c3N1cmFuY2VFVlJvb3RDQS5jcmwwQKA+oDyGOmh0dHA6Ly9jcmw0LmRpZ2ljZXJ0 | |||
| LmNvbS9EaWdpQ2VydEhpZ2hBc3N1cmFuY2VFVlJvb3RDQS5jcmwwHQYDVR0OBBYE | |||
| FExYyyXwQU9S9CjIgUObpqig5pLlMB8GA1UdIwQYMBaAFLE+w2kD+L9HAdSYJhoI | |||
| Au9jZCvDMA0GCSqGSIb3DQEBBQUAA4IBAQBMeheHKF0XvLIyc7/NLvVYMR3wsXFU | |||
| nNabZ5PbLwM+Fm8eA8lThKNWYB54lBuiqG+jpItSkdfdXJW777UWSemlQk808kf/ | |||
| roF/E1S3IMRwFcuBCoHLdFfcnN8kpCkMGPAc5K4HM+zxST5Vz25PDVR708noFUjU | |||
| xbvcNRx3RQdIRYW9135TuMAW2ZXNi419yWBP0aKb49Aw1rRzNubS+QOy46T15bg+ | |||
| BEkAui6mSnKDcp33C4ypieez12Qf1uNgywPE3IjpnSUBAHHLA7QpYCWP+UbRe3Gu | |||
| zVMSW4SOwg/H7ZMZ2cn6j1g0djIvruFQFGHUqFijyDATI+/GJYw2jxyA | |||
| -----END CERTIFICATE----- | |||
| @@ -0,0 +1,21 @@ | |||
| MIT License | |||
| Copyright (c) 2022 Amir Hossein | |||
| Permission is hereby granted, free of charge, to any person obtaining a copy | |||
| of this software and associated documentation files (the "Software"), to deal | |||
| in the Software without restriction, including without limitation the rights | |||
| to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||
| copies of the Software, and to permit persons to whom the Software is | |||
| furnished to do so, subject to the following conditions: | |||
| The above copyright notice and this permission notice shall be included in all | |||
| copies or substantial portions of the Software. | |||
| THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
| IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
| FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
| AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
| LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
| OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
| SOFTWARE. | |||
| @@ -0,0 +1,3 @@ | |||
| # Thyroid-Project-Using-DL | |||
| Developing a neural network to classify thyroid carcinoma using H&E slides | |||
| @@ -0,0 +1,19 @@ | |||
| # from dalib.translation.fourier_transform import FourierTransform | |||
| import importlib.util | |||
| import sys | |||
| from PIL import Image | |||
| fourier_transform_address = "E:\\Documentwork\\sharif\\CE Project\\future\\Thyroid Project\\Thyroid-Project-Using-DL\\classification_stuff\\Transfer-Learning-Library\\dalib\\translation\\fourier_transform.py" | |||
| spec = importlib.util.spec_from_file_location("module.name", fourier_transform_address) | |||
| foo = importlib.util.module_from_spec(spec) | |||
| sys.modules["module.name"] = foo | |||
| spec.loader.exec_module(foo) | |||
| FourierTransform = foo.FourierTransform | |||
| image_list = ["bio_tile (1).jpeg", "bio_tile (2).jpeg", "bio_tile (3).jpeg", "bio_tile (4).jpeg", "bio_tile (4).jpeg"] | |||
| amplitude_dir = "amplitude_dir" | |||
| fourier_transform = FourierTransform(image_list, amplitude_dir, beta=0, rebuild=False) | |||
| source_image = Image.open("tile2.jpeg") # image form source domain | |||
| source_image_in_target_style = fourier_transform(source_image) | |||
| source_image_in_target_style.save("out_fda.jpeg") | |||
| @@ -0,0 +1,62 @@ | |||
| import random | |||
| import cv2 | |||
| import torch | |||
| import torch.nn as nn | |||
| from albumentations.augmentations.utils import read_rgb_image | |||
| from albumentations.core.transforms_interface import BasicTransform, to_tuple | |||
| class Mixup(BasicTransform): | |||
| def __init__(self, mixups, read_fn=read_rgb_image, beta_limit=0.3, **kwargs): | |||
| super().__init__(**kwargs) | |||
| self.mixups = mixups | |||
| self.read_fn = read_fn | |||
| self.beta_limit = to_tuple(beta_limit, low=0) | |||
| def apply(self, image, mixup_image=None, beta=0.1, **params): | |||
| img_type = image.dtype | |||
| image = ((1 - beta) * image + beta * mixup_image).astype(img_type) | |||
| return image | |||
| def apply_to_target(self, target, beta=0.1, mixup_target=-1, **params): | |||
| target = {"img": target, "mixup": mixup_target, "beta": beta} | |||
| return target | |||
| def get_params_dependent_on_targets(self, params): | |||
| img = params["image"] | |||
| mixup = random.choice(self.mixups) | |||
| mixup_image = self.read_fn(mixup[0]) | |||
| vertical_pad = max(0, (img.shape[0] - mixup_image.shape[0]) // 2) | |||
| horizontal_pad = max(0, (img.shape[1] - mixup_image.shape[1]) // 2) | |||
| try: | |||
| mixup_image = cv2.copyMakeBorder(mixup_image, vertical_pad, vertical_pad, horizontal_pad, horizontal_pad, | |||
| cv2.BORDER_REFLECT) | |||
| except Exception as e: | |||
| print(e) | |||
| mixup_image = cv2.resize(mixup_image, dsize=(img.shape[1], img.shape[0])) | |||
| return {"mixup_image": mixup_image, "mixup_target": mixup[1]} | |||
| def get_params(self): | |||
| return {"beta": random.uniform(self.beta_limit[0], self.beta_limit[1])} | |||
| @property | |||
| def targets(self): | |||
| return { | |||
| "image": self.apply, | |||
| "target": self.apply_to_target, | |||
| } | |||
| @property | |||
| def targets_as_params(self): | |||
| return ["image"] | |||
| def mixup_loss(output, target): | |||
| if type(target) == torch.Tensor: | |||
| loss = nn.CrossEntropyLoss() | |||
| return loss(output, target) | |||
| else: # mixup has been used | |||
| loss = nn.CrossEntropyLoss(reduction="none") | |||
| return ((1 - target["beta"]) * loss(output, target["img"]) + target["beta"] * loss(output, | |||
| target["mixup"])).mean() | |||
| @@ -0,0 +1,162 @@ | |||
| import csv | |||
| import glob | |||
| import os | |||
| import random | |||
| from tqdm import tqdm | |||
| from config import Config | |||
| class CustomFragmentLoader: | |||
| def __init__(self, datasets_folder_name): | |||
| self._datasets_folder_name = datasets_folder_name | |||
| self._database_slide_dict = {} | |||
| self._load_csv_files_to_dict() | |||
| def _load_csv_files_to_dict(self): | |||
| databases_directory = "../../../database_crawlers/" | |||
| list_dir = [os.path.join(databases_directory, o, "patches") for o in self._datasets_folder_name | |||
| if os.path.isdir(os.path.join(databases_directory, o, "patches"))] | |||
| for db_dir in list_dir: | |||
| csv_dir = os.path.join(db_dir, "patch_labels.csv") | |||
| with open(csv_dir, "r") as csv_file: | |||
| csv_reader = csv.reader(csv_file) | |||
| header = next(csv_reader, None) | |||
| for row in csv_reader: | |||
| if row: | |||
| database_id = row[0] | |||
| image_id = row[1] | |||
| slide_frag_folder_name = [o for o in os.listdir(db_dir) if image_id.startswith(o)] | |||
| if slide_frag_folder_name: | |||
| slide_frag_folder_name = slide_frag_folder_name[0] | |||
| else: | |||
| continue | |||
| slide_path = os.path.join(db_dir, slide_frag_folder_name) | |||
| image_paths = glob.glob(os.path.join(slide_path, "*.jpeg")) | |||
| if image_paths: | |||
| d = self._database_slide_dict.get(database_id, {}) | |||
| d[image_id] = [image_paths] + [row[3], row[2]] | |||
| self._database_slide_dict[database_id] = d | |||
| def load_image_path_and_labels_and_split(self, test_percent=20, val_percent=10): | |||
| train_images, val_images, test_images = [], [], [] | |||
| for database_name, slides_dict in self._database_slide_dict.items(): | |||
| image_paths_by_slide = [(len(v[0]), v[0], v[1], v[2]) for v in slides_dict.values()] | |||
| random.shuffle(image_paths_by_slide) | |||
| # image_paths_by_slide.sort() | |||
| class_slides_dict = {} | |||
| for item in image_paths_by_slide: | |||
| class_name = None | |||
| if database_name == "NationalCancerInstitute": | |||
| normal_percent = int(item[2].strip(r"(|)|\'").split("\', \'")[0]) | |||
| tumor_percent = int(item[2].strip(r"(|)|\'").split("\', \'")[1]) | |||
| stormal_percent = int(item[2].strip(r"(|)|\'").split("\', \'")[2]) | |||
| if stormal_percent == 0: | |||
| if tumor_percent == 100: | |||
| class_name = "MALIGNANT" | |||
| elif normal_percent == 100: | |||
| class_name = "BENIGN" | |||
| else: | |||
| class_name = str(tumor_percent) | |||
| elif database_name == "BioAtlasThyroidSlideProvider": | |||
| if "papillary" in item[3].lower(): | |||
| class_name = "MALIGNANT" | |||
| elif "normal" in item[3].lower(): | |||
| class_name = "BENIGN" | |||
| class_name = class_name if class_name else item[2] | |||
| if class_name in Config.class_names: | |||
| class_slides_dict[class_name] = class_slides_dict.get(class_name, []) + [ | |||
| (item[0], item[1], class_name)] | |||
| # split test val train because they must not share same slide id fragment | |||
| for thyroid_class, slide_frags in class_slides_dict.items(): | |||
| dataset_train_images, dataset_val_images, dataset_test_images = [], [], [] | |||
| total_counts = sum([item[0] for item in slide_frags]) | |||
| test_counts = total_counts * test_percent // 100 | |||
| val_counts = total_counts * val_percent // 100 | |||
| train_counts = total_counts - test_counts - val_counts | |||
| for i, slide_frags_item in enumerate(slide_frags): | |||
| if len(dataset_train_images) + slide_frags_item[0] <= train_counts: | |||
| dataset_train_images += slide_frags_item[1] | |||
| elif len(dataset_val_images) + slide_frags_item[0] <= val_counts: | |||
| dataset_val_images += slide_frags_item[1] | |||
| else: | |||
| dataset_test_images += slide_frags_item[1] | |||
| train_images += [(i, thyroid_class) for i in dataset_train_images] | |||
| val_images += [(i, thyroid_class) for i in dataset_val_images] | |||
| test_images += [(i, thyroid_class) for i in dataset_test_images] | |||
| return train_images, val_images, test_images | |||
| def national_cancer_image_and_labels_splitter_per_slide(self, test_percent=20, val_percent=10): | |||
| train_images, val_images, test_images = [], [], [] | |||
| for database_name, slides_dict in self._database_slide_dict.items(): | |||
| print(database_name) | |||
| image_paths_by_slide = [(len(v[0]), v[0], v[1], v[2], k) for k, v in slides_dict.items()] | |||
| random.shuffle(image_paths_by_slide) | |||
| # image_paths_by_slide.sort() | |||
| class_slides_dict = {} | |||
| for item in tqdm(image_paths_by_slide): | |||
| class_name = None | |||
| normal_percent = int(item[2].strip(r"(|)|\'").split("\', \'")[0]) | |||
| tumor_percent = int(item[2].strip(r"(|)|\'").split("\', \'")[1]) | |||
| stormal_percent = int(item[2].strip(r"(|)|\'").split("\', \'")[2]) | |||
| if stormal_percent == 0: | |||
| if tumor_percent == 100: | |||
| class_name = 100 | |||
| elif normal_percent == 100: | |||
| class_name = 0 | |||
| else: | |||
| class_name = tumor_percent | |||
| class_name = class_name if class_name is not None else item[2] | |||
| if class_name in Config.class_names: | |||
| class_slides_dict[class_name] = class_slides_dict.get(class_name, []) + [ | |||
| (item[0], item[1], class_name, item[4])] | |||
| # split test val train because they must not share same slide id fragment | |||
| for thyroid_class, slide_frags in class_slides_dict.items(): | |||
| dataset_train_images, dataset_val_images, dataset_test_images = [], [], [] | |||
| total_counts = sum([item[0] for item in slide_frags]) | |||
| test_counts = total_counts * test_percent // 100 | |||
| val_counts = total_counts * val_percent // 100 | |||
| train_counts = total_counts - test_counts - val_counts | |||
| for i, slide_frags_item in enumerate(slide_frags): | |||
| items_paths = [(item_path, slide_frags_item[3]) for item_path in slide_frags_item[1]] | |||
| if len(dataset_train_images) + slide_frags_item[0] <= train_counts: | |||
| dataset_train_images += items_paths | |||
| elif len(dataset_val_images) + slide_frags_item[0] <= val_counts: | |||
| dataset_val_images += items_paths | |||
| else: | |||
| dataset_test_images += items_paths | |||
| train_images += [(i, (thyroid_class, j)) for i, j in dataset_train_images] | |||
| val_images += [(i, (thyroid_class, j)) for i, j in dataset_val_images] | |||
| test_images += [(i, (thyroid_class, j)) for i, j in dataset_test_images] | |||
| return train_images, val_images, test_images | |||
| if __name__ == '__main__': | |||
| # datasets_folder = ["national_cancer_institute"] | |||
| datasets_folder = ["papsociaty"] | |||
| # datasets_folder = ["stanford_tissue_microarray"] | |||
| # datasets_folder = ["bio_atlas_at_jake_gittlen_laboratories"] | |||
| train, val, test = CustomFragmentLoader(datasets_folder).load_image_path_and_labels_and_split( | |||
| val_percent=Config.val_percent, | |||
| test_percent=Config.test_percent) | |||
| benign_train = [i for i in train if i[1] == "BENIGN"] | |||
| mal_train = [i for i in train if i[1] == "MALIGNANT"] | |||
| print(f"train: {len(train)}={len(benign_train)}+{len(mal_train)}") | |||
| benign_val = [i for i in val if i[1] == "BENIGN"] | |||
| mal_val = [i for i in val if i[1] == "MALIGNANT"] | |||
| print(f"val: {len(val)}={len(benign_val)}+{len(mal_val)}") | |||
| benign_test = [i for i in test if i[1] == "BENIGN"] | |||
| mal_test = [i for i in test if i[1] == "MALIGNANT"] | |||
| print(f"test: {len(test)}={len(benign_test)}+{len(mal_test)}") | |||
| print(set(train) & set(test)) | |||
| print(set(train) & set(val)) | |||
| print(set(test) & set(val)) | |||
| print(len(set(val) & set(val))) | |||
| @@ -0,0 +1,20 @@ | |||
| import os | |||
| def set_config_for_logger(config_label): | |||
| import logging | |||
| trains_state_dir = "./train_state" | |||
| if not os.path.isdir(trains_state_dir): | |||
| os.mkdir(trains_state_dir) | |||
| config_train_dir = os.path.join(trains_state_dir, config_label) | |||
| if not os.path.isdir(config_train_dir): | |||
| os.mkdir(config_train_dir) | |||
| log_file = os.path.join(config_train_dir, "console.log") | |||
| logger = logging.getLogger(config_label) | |||
| logger.setLevel(logging.DEBUG) | |||
| fh = logging.FileHandler(log_file) | |||
| formatter = logging.Formatter('%(asctime)s|%(levelname)s|%(message)s', datefmt='%Y-%m-%d %H:%M:%S') | |||
| fh.setFormatter(formatter) | |||
| fh.setLevel(logging.DEBUG) | |||
| logger.addHandler(fh) | |||
| return logger | |||
| @@ -0,0 +1,399 @@ | |||
| import os | |||
| import random | |||
| import time | |||
| from typing import cast | |||
| import numpy as np | |||
| import matplotlib.pyplot as plt | |||
| import timm | |||
| import torch | |||
| import torchvision | |||
| from sklearn.metrics import confusion_matrix, roc_curve, roc_auc_score | |||
| from torch import nn, optim | |||
| from torch.utils.data import DataLoader | |||
| from tqdm import tqdm | |||
| from config import Config | |||
| from fragment_splitter import CustomFragmentLoader | |||
| from model_train_logger import set_config_for_logger | |||
| from thyroid_dataset import ThyroidDataset | |||
| from thyroid_ml_model import ThyroidClassificationModel | |||
| from transformation import get_transformation | |||
| @torch.no_grad() | |||
| def validate(model, data_loader, loss_function=None, show_tqdm=False): | |||
| class_set = sorted(data_loader.dataset.class_to_idx_dict.values()) | |||
| loss_values = [] | |||
| y_preds = [] | |||
| y_targets = [] | |||
| y_positive_scores = [] | |||
| for images, labels in (data_loader if not show_tqdm else tqdm(data_loader)): | |||
| images = images.to(Config.available_device) | |||
| labels = labels.to(Config.available_device) | |||
| x = model(images, validate=True) | |||
| if loss_function: | |||
| loss_values.append(loss_function(x, labels)) | |||
| values, preds = torch.max(x, 1) | |||
| y_positive_scores += x[:, 1].cpu() | |||
| y_preds += preds.cpu() | |||
| y_targets += labels.cpu() | |||
| cf_matrix = confusion_matrix(y_targets, y_preds, normalize="true") | |||
| class_accuracies = [cf_matrix[c][c] for c in class_set] | |||
| acc = sum(class_accuracies) | |||
| acc /= len(class_set) | |||
| # TN|FN | |||
| # FP|TP | |||
| fpr, tpr, _ = roc_curve(y_targets, y_positive_scores) | |||
| auc = roc_auc_score(y_targets, y_positive_scores) | |||
| if loss_function: | |||
| loss = sum(loss_values) | |||
| loss /= len(loss_values) | |||
| return acc * 100, cf_matrix, (fpr, tpr, auc), loss | |||
| return acc * 100, cf_matrix, (fpr, tpr, auc) | |||
| def get_save_state_dirs(config_label, epoch=None): | |||
| trains_state_dir = "./train_state" | |||
| if not os.path.isdir(trains_state_dir): | |||
| os.mkdir(trains_state_dir) | |||
| config_train_dir = os.path.join(trains_state_dir, config_label) | |||
| if not os.path.isdir(config_train_dir): | |||
| os.mkdir(config_train_dir) | |||
| if epoch is not None: | |||
| save_state_dir = os.path.join(config_train_dir, f"epoch-{epoch}") | |||
| if not os.path.isdir(save_state_dir): | |||
| os.mkdir(save_state_dir) | |||
| else: | |||
| save_state_dir = None | |||
| return trains_state_dir, config_train_dir, save_state_dir | |||
| def plot_and_save_model_per_epoch(epoch, | |||
| model_to_save, | |||
| val_acc_list, | |||
| train_acc_list, | |||
| val_loss_list, | |||
| train_loss_list, | |||
| config_label): | |||
| trains_state_dir, config_train_dir, save_state_dir = get_save_state_dirs(config_label, epoch) | |||
| fig_save_path = os.path.join(config_train_dir, "val_train_acc.jpeg") | |||
| plt.plot(range(len(val_acc_list)), val_acc_list, label="validation") | |||
| plt.plot(range(len(train_acc_list)), train_acc_list, label="train") | |||
| plt.legend(loc="lower right") | |||
| plt.xlabel('Epoch') | |||
| plt.ylabel('Balanced Accuracy') | |||
| plt.savefig(fig_save_path) | |||
| plt.clf() | |||
| fig_save_path = os.path.join(config_train_dir, "val_train_loss.jpeg") | |||
| plt.plot(range(len(val_loss_list)), val_loss_list, label="validation") | |||
| plt.plot(range(len(train_loss_list)), train_loss_list, label="train") | |||
| plt.legend(loc="lower right") | |||
| plt.xlabel('Epoch') | |||
| plt.ylabel('Loss') | |||
| plt.savefig(fig_save_path) | |||
| plt.clf() | |||
| if model_to_save: | |||
| model_save_path = os.path.join(save_state_dir, "model.state") | |||
| model_to_save.save_model(model_save_path) | |||
| def save_auc_roc_chart_for_test(test_fpr, test_tpr, test_auc_score, config_label, epoch): | |||
| trains_state_dir, config_train_dir, save_dir = get_save_state_dirs(config_label, epoch) | |||
| fig_save_path = os.path.join(save_dir, f"test_roc_{time.time()}.jpeg") | |||
| plt.plot(test_fpr, test_tpr, label="test, auc=" + str(test_auc_score)) | |||
| plt.legend(loc="lower right") | |||
| plt.xlabel('FPR') | |||
| plt.ylabel('TPR') | |||
| plt.savefig(fig_save_path) | |||
| plt.clf() | |||
| def calculate_test(image_model, epoch, test_data_loader, logger, config_name, show_tqdm=False): | |||
| image_model.eval() | |||
| test_acc, test_c_acc, (test_FPR, test_TPR, test_auc_score) = validate(image_model, | |||
| test_data_loader, | |||
| show_tqdm=show_tqdm) | |||
| test_acc = float(test_acc) | |||
| save_auc_roc_chart_for_test(test_FPR, test_TPR, test_auc_score, config_name, epoch) | |||
| logger.info(f'Test|Epoch:{epoch}|Accuracy:{round(test_acc, 4)}, {test_c_acc}%') | |||
| def train_model(base_model, config_base_name, train_val_test_data_loaders, augmentation, | |||
| adaptation_sample_dataset=None, | |||
| train_model_flag=True, | |||
| load_model_from_dir=None): | |||
| config_name = f"{config_base_name}-{augmentation}-{','.join(Config.class_idx_dict.keys())}" | |||
| logger = set_config_for_logger(config_name) | |||
| logger.info(f"training config: {config_name}") | |||
| try: | |||
| _is_inception = type(base_model) == torchvision.models.inception.Inception3 | |||
| train_data_loader, val_data_loader, test_data_loader = train_val_test_data_loaders | |||
| logger.info( | |||
| f"train valid test splits:" + | |||
| f" {len(train_data_loader.dataset.samples) if train_data_loader else None}," + | |||
| f" {len(val_data_loader.dataset.samples) if val_data_loader else None}," + | |||
| f" {len(test_data_loader.dataset.samples) if test_data_loader else None}") | |||
| # MODEL | |||
| if load_model_from_dir: | |||
| # Load model from file | |||
| model_path = os.path.join(load_model_from_dir, 'model.state') | |||
| image_model = ThyroidClassificationModel(base_model).load_model(model_path).to(Config.available_device) | |||
| else: | |||
| image_model = ThyroidClassificationModel(base_model).to(Config.available_device) | |||
| if train_model_flag: | |||
| # TRAIN | |||
| transformation = get_transformation(augmentation=augmentation, base_dataset=adaptation_sample_dataset) | |||
| train_dataset = cast(ThyroidDataset, train_data_loader.dataset) | |||
| train_dataset.transform = transformation | |||
| cec = nn.CrossEntropyLoss(weight=torch.tensor(train_dataset.class_weights).to(Config.available_device)) | |||
| optimizer = optim.Adam(image_model.parameters(), lr=Config.learning_rate) | |||
| my_lr_scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer=optimizer, gamma=Config.decay_rate) | |||
| val_acc_history = [] | |||
| train_acc_history = [] | |||
| train_y_preds = [] | |||
| train_y_targets = [] | |||
| best_epoch_val_acc = 0 | |||
| for epoch in range(Config.n_epoch): | |||
| # variables to calculate train acc | |||
| class_set = sorted(train_data_loader.dataset.class_to_idx_dict.values()) | |||
| for images, labels in tqdm(train_data_loader, colour="#0000ff"): | |||
| if len(images) >= Config.batch_size // 2: | |||
| image_model.train() | |||
| images = images.to(Config.available_device) | |||
| labels = labels.to(Config.available_device) | |||
| optimizer.zero_grad() | |||
| pred = image_model(images) | |||
| # pred label: torch.max(pred, 1)[1], labels | |||
| if _is_inception: | |||
| pred, aux_pred = pred | |||
| loss, aux_loss = cec(pred, labels), cec(aux_pred, labels) | |||
| loss = loss + 0.4 * aux_loss | |||
| else: | |||
| loss = cec(pred, labels) | |||
| loss.backward() | |||
| optimizer.step() | |||
| # train preds and labels | |||
| values, preds = torch.max(pred, 1) | |||
| train_y_preds.extend(preds.cpu()) | |||
| train_y_targets.extend(labels.cpu()) | |||
| # Epoch level | |||
| # validation data | |||
| image_model.eval() | |||
| train_cf_matrix = confusion_matrix(train_y_targets, train_y_preds, normalize="true") | |||
| class_accuracies = [train_cf_matrix[c][c] for c in class_set] | |||
| train_acc = sum(class_accuracies) | |||
| train_acc /= len(class_set) | |||
| train_acc = (100 * sum(class_accuracies) / len(class_set)).item() | |||
| train_acc_history.append(train_acc) | |||
| logger.info(f'Train|E:{epoch}|Balanced Accuracy:{round(train_acc, 4)}%,\n{train_cf_matrix}') | |||
| val_acc, val_cf_matrix, _, val_loss = validate(image_model, | |||
| val_data_loader, | |||
| cec) | |||
| val_acc = float(val_acc) | |||
| val_acc_history.append(val_acc) | |||
| logger.info(f'Val|E:{epoch}|Balanced Accuracy:{round(val_acc, 4)}%,\n{val_cf_matrix}') | |||
| save_model = False | |||
| is_last_epoch = epoch == Config.n_epoch | |||
| is_a_better_epoch = val_acc >= best_epoch_val_acc | |||
| is_a_better_epoch &= abs(train_acc - val_acc) < Config.train_val_acc_max_distance_for_best_epoch | |||
| if is_a_better_epoch or is_last_epoch: | |||
| save_model = True | |||
| calculate_test(image_model, epoch, test_data_loader, logger, config_name, show_tqdm=False) | |||
| plot_and_save_model_per_epoch(epoch if save_model else None, | |||
| image_model if save_model else None, | |||
| val_acc_history, | |||
| train_acc_history, | |||
| [], | |||
| [], | |||
| config_label=config_name) | |||
| my_lr_scheduler.step() | |||
| else: | |||
| # JUST EVALUATE | |||
| calculate_test(image_model, 0, test_data_loader, logger, config_name, | |||
| show_tqdm=True) | |||
| except Exception as e: | |||
| print(e) | |||
| logger.error(str(e)) | |||
| raise e | |||
| def load_datasets(datasets_folders, test_percent=Config.test_percent, val_percent=Config.val_percent, sample_percent=1, | |||
| is_nci_per_slide=False): | |||
| if is_nci_per_slide: | |||
| l_train, l_val, l_test = CustomFragmentLoader( | |||
| datasets_folders).national_cancer_image_and_labels_splitter_per_slide( | |||
| test_percent=test_percent, | |||
| val_percent=val_percent) | |||
| else: | |||
| l_train, l_val, l_test = CustomFragmentLoader(datasets_folders).load_image_path_and_labels_and_split( | |||
| test_percent=test_percent, | |||
| val_percent=val_percent) | |||
| l_train = random.choices(l_train, k=int(sample_percent * len(l_train))) | |||
| l_val = random.choices(l_val, k=int(sample_percent * len(l_val))) | |||
| l_test = random.choices(l_test, k=int(sample_percent * len(l_test))) | |||
| l_train_ds = ThyroidDataset(l_train, Config.class_idx_dict) | |||
| l_val_ds = ThyroidDataset(l_val, Config.class_idx_dict) | |||
| l_test_ds = ThyroidDataset(l_test, Config.class_idx_dict) | |||
| l_train_data_loader = None | |||
| if l_train: | |||
| l_train_data_loader = DataLoader(l_train_ds, batch_size=Config.batch_size, shuffle=True) | |||
| l_val_data_loader = None | |||
| if l_val: | |||
| l_val_data_loader = DataLoader(l_val_ds, batch_size=Config.eval_batch_size, shuffle=True) | |||
| l_test_data_loader = None | |||
| if l_test: | |||
| l_test_data_loader = DataLoader(l_test_ds, batch_size=Config.eval_batch_size, shuffle=True) | |||
| return (l_train, l_val, l_test), (l_train_ds, l_val_ds, l_test_ds), ( | |||
| l_train_data_loader, l_val_data_loader, l_test_data_loader) | |||
| @torch.no_grad() | |||
| def evaluate_nci_dataset_per_slide(config_base_name, augmentation, base_model, data_loader, | |||
| load_model_from_dir): | |||
| config_name = f"{config_base_name}-{augmentation}-tumor-percent" | |||
| logger = set_config_for_logger(config_name) | |||
| logger.info(f"training config: {config_name}") | |||
| _is_inception = type(base_model) == torchvision.models.inception.Inception3 | |||
| logger.info( | |||
| f"test:" + | |||
| f" {len(data_loader.dataset.samples) if data_loader else None}") | |||
| # MODEL | |||
| # Load model from file | |||
| model_path = os.path.join(load_model_from_dir, 'model.state') | |||
| model = ThyroidClassificationModel(base_model).load_model(model_path).to(Config.available_device) | |||
| y_positive_scores = [] | |||
| slides_preds = {} | |||
| slide_labels = {} | |||
| for images, (labels, slides) in tqdm(data_loader): | |||
| images = images.to(Config.available_device) | |||
| x = model(images, validate=True).cpu() | |||
| preds = x[:, 1] | |||
| logger.info("zero and 1000 percent") | |||
| logger.info(x[:, 0]) | |||
| logger.info(x[:, 1]) | |||
| for row_index in range(len(labels)): | |||
| slide_id = slides[row_index] | |||
| slide_label = labels[row_index] | |||
| slide_labels[slide_id] = slide_label | |||
| slides_preds[slide_id] = slides_preds.get(slide_id, []) + [preds[row_index].item()] | |||
| y_positive_scores += x[:, 1].cpu() | |||
| y_targets = [] | |||
| y_preds = [] | |||
| for key, value in slides_preds.items(): | |||
| slides_preds[key] = (sum(slides_preds[key]) / len(slides_preds[key])) * 100 | |||
| y_preds.append(slides_preds[key]) | |||
| y_targets.append(int(slide_labels[key])) | |||
| y_targets_rounded = [int(round(x / 100, 1) * 100) for x in y_targets] | |||
| y_preds_rounded = [int(round(x / 100, 1) * 100) for x in y_preds] | |||
| cf_matrix = confusion_matrix(y_targets_rounded, y_preds_rounded, labels=Config.class_names, normalize="true") | |||
| class_accuracies = [cf_matrix[c][c] for c in range(len(cf_matrix))] | |||
| class_weights = [sum(cf_matrix[c]) for c in range(len(cf_matrix))] | |||
| acc = sum([class_accuracies[i] * class_weights[i] for i in range(len(class_accuracies))]) | |||
| acc /= sum(class_weights) | |||
| # TN|FN | |||
| # FP|TP | |||
| # fpr, tpr, _ = roc_curve(y_targets, y_positive_scores) | |||
| # auc = roc_auc_score(y_targets, y_positive_scores) | |||
| logger.info(f"target rounded:{y_targets_rounded}") | |||
| logger.info(f"pred rounded:{y_preds_rounded}") | |||
| logger.info(f"Results| acc:{acc * 100}\ncf:{cf_matrix}") | |||
| return acc * 100, cf_matrix | |||
| ########## | |||
| ## Runs ## | |||
| ########## | |||
| # train_phase block | |||
| if __name__ == '__main__' and Config.train_phase: | |||
| _, (train_ds, _, _), (train_data_loader, val_data_loader, test_data_loader) = load_datasets( | |||
| ["national_cancer_institute"], | |||
| sample_percent=1) | |||
| # Domain adaptation dataset on small real datasets | |||
| # _, (_, _, domain_sample_test_dataset), _ = load_datasets(["stanford_tissue_microarray", | |||
| # "papsociaty"], | |||
| # sample_percent=0.5, | |||
| # test_percent=100, | |||
| # val_percent=0) | |||
| for c_base_name, model, augmentations in [ | |||
| (f"resnet101_{Config.learning_rate}_{Config.decay_rate}_nci_final", | |||
| torchvision.models.resnet101(pretrained=True, progress=True), [ | |||
| "mixup", | |||
| # "jit", | |||
| # "fda", | |||
| # "jit-fda-mixup", | |||
| # "shear", | |||
| # "std" | |||
| ]), | |||
| ]: | |||
| for aug in augmentations: | |||
| Config.reset_random_seeds() | |||
| train_model(model, c_base_name, (train_data_loader, val_data_loader, test_data_loader), | |||
| augmentation=aug, adaptation_sample_dataset=train_ds) | |||
| # evaluate_phase block | |||
| if __name__ == '__main__' and Config.evaluate_phase: | |||
| # Main data | |||
| Config.class_names = [i for i in range(101)] | |||
| Config.class_idx_dict = {i: i for i in range(101)} | |||
| _, (train_ds, _, _), (_, _, test_data_loader) = load_datasets( | |||
| ["national_cancer_institute", | |||
| ], | |||
| sample_percent=1, test_percent=100, val_percent=0, is_nci_per_slide=True) | |||
| for c_base_name, model, aug_best_epoch_list in [ | |||
| (f"resnet101_{Config.learning_rate}_{Config.decay_rate}_nci_eval", | |||
| torchvision.models.resnet101(pretrained=True, progress=True), [ | |||
| ("mixup", "train_state/resnet101_0.0001_1_nci_final-mixup-BENIGN,MALIGNANT/epoch-19/"), | |||
| ]), | |||
| # (f"resnet101_{Config.learning_rate}_{Config.decay_rate}_test_nci_eval", | |||
| # torchvision.models.resnet101(pretrained=True, progress=True), [ | |||
| # ("fda", | |||
| # "train_state/runs_0.0001_1_nic_test_benign_mal/resnet101_0.0001_1_nci-fda-BENIGN,MALIGNANT/epoch-3/"), | |||
| # ("mixup", | |||
| # "train_state/runs_0.0001_1_nic_test_benign_mal/resnet101_0.0001_1_nci-mixup-BENIGN,MALIGNANT/epoch-3/"), | |||
| # ("jit", | |||
| # "train_state/runs_0.0001_1_nic_test_benign_mal/resnet101_0.0001_1_nci-jit-BENIGN,MALIGNANT/epoch-3/"), | |||
| # ("jit-fda-mixup", | |||
| # "train_state/runs_0.0001_1_nic_test_benign_mal/resnet101_0.0001_1_nci-jit-fda-mixup-BENIGN,MALIGNANT/epoch-3/"), | |||
| # ]), | |||
| ]: | |||
| for aug, best_epoch in aug_best_epoch_list: | |||
| Config.reset_random_seeds() | |||
| evaluate_nci_dataset_per_slide(c_base_name, aug, model, test_data_loader, | |||
| load_model_from_dir=best_epoch) | |||
| @@ -0,0 +1,3 @@ | |||
| export PYTHONPATH="${PYTHONPATH}:../../../"; | |||
| export PYTHONPATH="${PYTHONPATH}:./"; | |||
| python fragment_splitter.py; | |||
| @@ -0,0 +1,3 @@ | |||
| export PYTHONPATH="${PYTHONPATH}:../../../"; | |||
| export PYTHONPATH="${PYTHONPATH}:./"; | |||
| python model_training.py; | |||
| @@ -0,0 +1,83 @@ | |||
| import os | |||
| import numpy as np | |||
| from PIL import Image | |||
| from torch.utils.data import Dataset | |||
| from config import Config | |||
| from fragment_splitter import CustomFragmentLoader | |||
| from transformation import get_transformation | |||
| from utils import show_and_wait | |||
| class ThyroidDataset(Dataset): | |||
| def __init__(self, image_paths_labels_list, class_to_index, transform=None, force_to_size_with_padding=512): | |||
| super().__init__() | |||
| self.class_to_idx_dict = class_to_index | |||
| self.force_to_size_with_padding = force_to_size_with_padding | |||
| self.transform = transform | |||
| self.samples = self._make_dataset(image_paths_labels_list) | |||
| self.class_weights = self._calculate_class_weights(image_paths_labels_list) | |||
| def _calculate_class_weights(self, image_paths_labels_list): | |||
| class_counts = {} | |||
| for image_path, (label, slide) in image_paths_labels_list: | |||
| class_counts[label] = class_counts.get(label, 0) + 1 | |||
| class_weights = [ | |||
| (self.class_to_idx_dict.get(c, None), len(image_paths_labels_list) / (len(class_counts) * v)) for c, v | |||
| in | |||
| class_counts.items()] | |||
| class_weights.sort() | |||
| return [item[1] for item in class_weights] | |||
| def _make_dataset(self, image_paths_labels_list): | |||
| images = [] | |||
| for image_path, (label, slide) in image_paths_labels_list: | |||
| if not os.path.exists(os.path.abspath(image_path)): | |||
| raise (RuntimeError(f"{image_path} not found.")) | |||
| item = (image_path, (self.class_to_idx_dict.get(label, "Unknown label"), slide)) | |||
| images.append(item) | |||
| return images | |||
| def __len__(self): | |||
| return len(self.samples) | |||
| def __getitem__(self, index): | |||
| path, target = self.samples[index] | |||
| image = Image.open(path) | |||
| image = image.convert('RGB') | |||
| image = self.add_margin(image) | |||
| image = np.array(image) | |||
| if self.transform is not None: | |||
| # show_and_wait(image, name=f"./transformations/{index}-original", wait=False, save=True) | |||
| image = self.transform(image=image)['image'] | |||
| # image_show = np.moveaxis(image.cpu().detach().numpy(), 0, -1) | |||
| # show_and_wait(image_show, name=f"./transformations/{index}-transformed", save=True) | |||
| else: | |||
| transform = get_transformation(augmentation="min") | |||
| image = transform(image=image)['image'] | |||
| return image, target | |||
| def add_margin(self, pil_img): | |||
| width, height = pil_img.size | |||
| new_width = self.force_to_size_with_padding | |||
| new_height = self.force_to_size_with_padding | |||
| result = Image.new("RGB", (new_width, new_height), (0, 0, 0)) | |||
| top_padding = (new_height - height) // 2 | |||
| left_padding = (new_width - width) // 2 | |||
| result.paste(pil_img, (left_padding, top_padding)) | |||
| return result | |||
| if __name__ == '__main__': | |||
| class_idx_dict = Config.class_idx_dict | |||
| datasets_folder = ["stanford_tissue_microarray", "papsociaty"] | |||
| train, val, test = CustomFragmentLoader(datasets_folder).load_image_path_and_labels_and_split() | |||
| train_ds = ThyroidDataset(train, class_idx_dict) | |||
| test_ds = ThyroidDataset(test, class_idx_dict) | |||
| val_ds = ThyroidDataset(val, class_idx_dict) | |||
| res = train_ds.__getitem__(0) | |||
| print(res) | |||
| @@ -0,0 +1,47 @@ | |||
| import torch | |||
| import torchvision | |||
| from torch import nn | |||
| class ThyroidClassificationModel(nn.Module): | |||
| def __init__(self, base_model): | |||
| super().__init__() | |||
| self.base_model = base_model | |||
| self.classifier = nn.Sequential( | |||
| nn.Linear(1000, 500), | |||
| nn.BatchNorm1d(500), | |||
| nn.ReLU(), | |||
| nn.Linear(500, 100), | |||
| nn.BatchNorm1d(100), | |||
| nn.ReLU(), | |||
| nn.Linear(100, 2), | |||
| nn.BatchNorm1d(2), | |||
| nn.Softmax(dim=-1) | |||
| ) | |||
| self._is_inception3 = type(base_model) == torchvision.models.inception.Inception3 | |||
| if self._is_inception3: | |||
| self.classifier2 = nn.Sequential( | |||
| nn.Linear(1000, 500), | |||
| nn.BatchNorm1d(500), | |||
| nn.ReLU(), | |||
| nn.Linear(500, 100), | |||
| nn.BatchNorm1d(100), | |||
| nn.ReLU(), | |||
| nn.Linear(100, 2), | |||
| nn.BatchNorm1d(2), | |||
| nn.Softmax(dim=-1) | |||
| ) | |||
| def forward(self, x, validate=False): | |||
| output = self.base_model(x.float()) | |||
| if self._is_inception3 and not validate: | |||
| return self.classifier(output[0]), self.classifier2(output[1]) | |||
| return self.classifier(output) | |||
| def save_model(self, path): | |||
| torch.save(self.state_dict(), path) | |||
| def load_model(self, path): | |||
| self.load_state_dict(torch.load(path)) | |||
| self.eval() | |||
| return self | |||
| @@ -0,0 +1,82 @@ | |||
| import albumentations as A | |||
| from albumentations.pytorch import ToTensorV2 | |||
| from albumentations_mixup import Mixup | |||
| def get_transformation(augmentation, crop_size=299, base_dataset=None): | |||
| scaled_center_crop_size = int(crop_size * 1.25) | |||
| def random_crop_transformation(x): | |||
| return A.RandomCrop(x, x, always_apply=True) | |||
| def get_flip_rotate__custom__noise_transform(transform_list, random_scale=True): | |||
| return A.Compose([ | |||
| A.Flip(p=0.25), | |||
| A.Rotate(p=0.25), | |||
| A.RandomScale(scale_limit=0.5, p=0.5 if random_scale else 0), | |||
| A.PadIfNeeded(min_height=scaled_center_crop_size, min_width=scaled_center_crop_size, | |||
| always_apply=True), | |||
| A.CenterCrop(scaled_center_crop_size, scaled_center_crop_size), | |||
| random_crop_transformation(crop_size), | |||
| ] + transform_list + [ | |||
| A.Blur(p=0.25, blur_limit=2), | |||
| A.GaussNoise(p=0.25, var_limit=10), | |||
| ToTensorV2() | |||
| ]) | |||
| if augmentation == "min": | |||
| trans = A.Compose([ | |||
| A.PadIfNeeded(min_height=scaled_center_crop_size, min_width=scaled_center_crop_size, always_apply=True), | |||
| A.CenterCrop(scaled_center_crop_size, scaled_center_crop_size), | |||
| random_crop_transformation(crop_size), | |||
| ToTensorV2() | |||
| ]) | |||
| elif augmentation == "std": | |||
| trans = get_flip_rotate__custom__noise_transform([]) | |||
| elif augmentation == "jit-nrs": | |||
| trans = get_flip_rotate__custom__noise_transform([ | |||
| A.ColorJitter(p=0.5, hue=.5) | |||
| ], random_scale=False) | |||
| elif augmentation == "jit": | |||
| trans = get_flip_rotate__custom__noise_transform([ | |||
| A.ColorJitter(p=0.5, hue=.5) | |||
| ]) | |||
| elif augmentation == "fda": | |||
| fda_image_paths = [sample[0] for sample in base_dataset.samples] | |||
| trans = get_flip_rotate__custom__noise_transform([ | |||
| A.domain_adaptation.FDA(fda_image_paths, beta_limit=0.1, p=0.5) | |||
| ]) | |||
| elif augmentation == "mixup": | |||
| mixups = [sample[0:2] for sample in base_dataset.samples] | |||
| trans = get_flip_rotate__custom__noise_transform([ | |||
| Mixup(mixups=mixups, p=0.5, beta_limit=(0.1)), | |||
| ]) | |||
| elif augmentation == "jit-fda-mixup": | |||
| p = 0.16 | |||
| fda_image_paths = [sample[0] for sample in base_dataset.samples] | |||
| mixups = [sample[0:2] for sample in base_dataset.samples] | |||
| trans = get_flip_rotate__custom__noise_transform([ | |||
| A.domain_adaptation.FDA(fda_image_paths, beta_limit=0.1, p=p), | |||
| Mixup(mixups=mixups, p=p, beta_limit=(0.1)), | |||
| A.ColorJitter(p=p, hue=.5) | |||
| ]) | |||
| elif augmentation == "jit-fda-mixup-nrs": | |||
| p = 0.16 | |||
| fda_image_paths = [sample[0] for sample in base_dataset.samples] | |||
| mixups = [sample[0:2] for sample in base_dataset.samples] | |||
| trans = get_flip_rotate__custom__noise_transform([ | |||
| A.domain_adaptation.FDA(fda_image_paths, beta_limit=0.1, p=p), | |||
| Mixup(mixups=mixups, p=p, beta_limit=(0.1)), | |||
| A.ColorJitter(p=p, hue=.5) | |||
| ], random_scale=False) | |||
| elif augmentation == "shear": | |||
| trans = get_flip_rotate__custom__noise_transform([ | |||
| A.Affine(shear={"x": (-10, 10), "y": (-10, 10)}, p=0.5) | |||
| ], random_scale=False) | |||
| else: | |||
| raise ValueError(f"Augmentation unknown: {augmentation}") | |||
| return trans | |||
| @@ -0,0 +1,45 @@ | |||
| import random | |||
| import torch | |||
| class Config: | |||
| DEBUG = False | |||
| batch_size = 32 | |||
| eval_batch_size = 128 | |||
| test_percent = 20 | |||
| val_percent = 10 | |||
| learning_rate = 0.0001 | |||
| decay_rate = 1 # 0.99**50=0.6, 0.99**100=0.36 | |||
| n_epoch = 2 if DEBUG else 20 | |||
| available_device = "cuda" if torch.cuda.is_available() and not DEBUG else "cpu" | |||
| print(f"Device: {available_device}") | |||
| workers = 1 if DEBUG else 40 | |||
| # learned from evaluate_image_patcher_and_visualize.py | |||
| laplacian_threshold = 298 | |||
| # RANDOM SEED | |||
| seed = 115 | |||
| @staticmethod | |||
| def reset_random_seeds(): | |||
| random.seed(Config.seed) | |||
| torch.manual_seed(Config.seed) | |||
| class_names = ["BENIGN", "MALIGNANT"] | |||
| class_idx_dict = {"BENIGN": 0, "MALIGNANT": 1} | |||
| train_val_acc_max_distance_for_best_epoch = 6 # Percent | |||
| n_epoch_for_image_patcher = 60 | |||
| train_phase = False | |||
| evaluate_phase = False | |||
| Config.reset_random_seeds() | |||
| @@ -0,0 +1,64 @@ | |||
| import ssl | |||
| import time | |||
| from urllib.parse import urlparse | |||
| from urllib.request import urlopen | |||
| from bs4 import BeautifulSoup | |||
| from database_crawlers.web_stain_sample import StainType, WebStainWSIOneDIndex | |||
| ssl._create_default_https_context = ssl._create_unverified_context | |||
| class BioAtlasAtJakeGittlenLaboratoriesImage(WebStainWSIOneDIndex): | |||
| def __init__(self, database_name, image_id, image_web_label, report, stain_type, is_wsi): | |||
| super().__init__(database_name, image_id, image_web_label, report, stain_type, is_wsi) | |||
| def _get_tile_url(self, zoom, partition=None, i=None, j=None): | |||
| return f"https://bio-atlas.psu.edu/human/tile.jpeg.php?s={self.image_id}&z={zoom}&i={partition}" | |||
| def get_slide_view_url(self): | |||
| return f"https://bio-atlas.psu.edu/human/view.php?s={self.image_id}" | |||
| def _get_file_path_name(self): | |||
| return self.save_path + self.image_id | |||
| def find_best_zoom(self): | |||
| return 0 | |||
| class BioAtlasThyroidSlideProvider: | |||
| page_link = "https://bio-atlas.psu.edu/human/search.php?q=Thyroid&organism%5B%5D=5&age_fr=&age_fr_units=1&age_to=&age_to_units=1&sex%5B%5D=all&thumbnails=on&rpp=30&as_sfid=AAAAAAW0RrspdnblpiFwz8osoAdvS8nafd1J9LG_ARQ-IF_NZ3aI2EXCMDBeqE_iD5rUo1QLg454tS63DMSgATSzgrksb4rMi-GWPl3O9f3JKlqGn8oXoqbOYok3__yZx69ewzg%3D&as_fid=6900aeb3e4cc9f39ef9738a2f11c2cefb8c3f37c#results" | |||
| database_name = "BioAtlasThyroidSlideProvider" | |||
| stain_type = StainType.H_AND_E | |||
| is_wsi = True | |||
| @classmethod | |||
| def get_web_stain_samples(cls): | |||
| print(cls.page_link) | |||
| try: | |||
| html_text = urlopen(cls.page_link).read() | |||
| soup = BeautifulSoup(html_text, 'html.parser') | |||
| search_results = soup.find_all("div", {"class": "shadow-box search-result-item search-result-slide"}) | |||
| for result_item in search_results: | |||
| image_view_url = result_item.find("a").attrs['href'] | |||
| query_param = urlparse(image_view_url).query.split("=") | |||
| if query_param[0] != "s": raise Exception("Query params does not contains image url") | |||
| image_id = query_param[1] | |||
| image_web_label = str(result_item.find("b", text="Diagnosis").next_sibling) | |||
| yield BioAtlasAtJakeGittlenLaboratoriesImage(cls.database_name, image_id, image_web_label, None, | |||
| cls.stain_type, cls.is_wsi) | |||
| except Exception as e: | |||
| print(e) | |||
| time.sleep(2) | |||
| yield cls.get_web_stain_samples() | |||
| if __name__ == '__main__': | |||
| bio_atlas_provider = BioAtlasThyroidSlideProvider() | |||
| for slide in bio_atlas_provider.get_web_stain_samples(): | |||
| if slide.image_id == "687": | |||
| print(slide.image_id, slide.image_web_label, slide.get_slide_view_url()) | |||
| slide.crawl_image_save_jpeg_and_json() | |||
| break | |||
| @@ -0,0 +1,3 @@ | |||
| export PYTHONPATH="${PYTHONPATH}:../../"; | |||
| export PYTHONPATH="${PYTHONPATH}:./"; | |||
| python database_crawler.py; | |||
| @@ -0,0 +1,61 @@ | |||
| import time | |||
| from urllib.parse import urlparse | |||
| from urllib.request import urlopen | |||
| from bs4 import BeautifulSoup | |||
| from database_crawlers.web_stain_sample import StainType, WebStainWSITwoDIndex | |||
| class HeidelbergPathologyImage(WebStainWSITwoDIndex): | |||
| def __init__(self, database_name, image_id, image_web_label, report, stain_type, is_wsi): | |||
| super().__init__(database_name, image_id, image_web_label, report, stain_type, is_wsi) | |||
| def _get_tile_url(self, zoom, partition=None, i=None, j=None): | |||
| return f"https://eliph.klinikum.uni-heidelberg.de/dzi/atlas/05-schilddruese/05-{'%.2d' % int(self.image_id)}_files/{zoom}/{i}_{j}.jpeg" | |||
| def get_slide_view_url(self): | |||
| return f"https://eliph.klinikum.uni-heidelberg.de/atlas/?c=05-schilddruese&context=image&pg={self.image_id}" | |||
| def _get_file_path_name(self): | |||
| return self.save_path + self.image_id | |||
| def find_best_zoom(self): | |||
| # 16 -> 0 | |||
| return 16 | |||
| class HeidelbergPathologyProvider: | |||
| page_link = "https://eliph.klinikum.uni-heidelberg.de/atlas/?c=05-schilddruese&context=image" | |||
| database_name = "HeidelbergPathology" | |||
| stain_type = StainType.H_AND_E | |||
| is_wsi = True | |||
| @classmethod | |||
| def get_web_stain_samples(cls): | |||
| print(cls.page_link) | |||
| try: | |||
| html_text = urlopen(cls.page_link).read() | |||
| soup = BeautifulSoup(html_text, 'html.parser') | |||
| search_results = soup.find_all("div", {"class": "casegrid"}) | |||
| for result_item in search_results: | |||
| image_view_url = result_item.find("a").attrs['href'] | |||
| query_param = urlparse(image_view_url).query.split("=") | |||
| if "image&pg" not in query_param: raise Exception("Query params does not contains image id") | |||
| image_id = query_param[-1] | |||
| image_web_label = str(result_item.find("b").next) | |||
| yield HeidelbergPathologyImage(cls.database_name, image_id, image_web_label, None, | |||
| cls.stain_type, cls.is_wsi) | |||
| except Exception as e: | |||
| print(e) | |||
| time.sleep(2) | |||
| yield cls.get_web_stain_samples() | |||
| if __name__ == '__main__': | |||
| bio_atlas_provider = HeidelbergPathologyProvider() | |||
| for slide in bio_atlas_provider.get_web_stain_samples(): | |||
| print(slide.image_id, slide.image_web_label, slide.get_slide_view_url()) | |||
| slide.crawl_image_save_jpeg_and_json() | |||
| break | |||
| @@ -0,0 +1,7 @@ | |||
| from image_patcher import ImageAndSlidePatcher | |||
| if __name__ == '__main__': | |||
| database_folder_name = "bio_atlas_at_jake_gittlen_laboratories" | |||
| database_directory = "../" | |||
| image_slide_patcher = ImageAndSlidePatcher() | |||
| image_slide_patcher.save_patches_in_folders(database_directory, database_folder_name) | |||
| @@ -0,0 +1,287 @@ | |||
| import csv | |||
| import json | |||
| import os | |||
| import os.path as os_path | |||
| import random | |||
| import re | |||
| from math import ceil | |||
| from os import listdir | |||
| from os.path import isfile, join | |||
| import cv2 | |||
| import tifffile | |||
| import zarr as ZarrObject | |||
| from tqdm import tqdm | |||
| from config import Config | |||
| from database_crawlers.web_stain_sample import ThyroidCancerLevel, WebStainImage | |||
| from utils import show_and_wait | |||
| class ThyroidFragmentFilters: | |||
| @staticmethod | |||
| def func_laplacian_threshold(threshold=Config.laplacian_threshold): | |||
| def wrapper(image_nd_array): | |||
| res = ThyroidFragmentFilters._empty_frag_with_laplacian_threshold(image_nd_array, threshold) | |||
| return res | |||
| return wrapper | |||
| @staticmethod | |||
| def _empty_frag_with_laplacian_threshold(image_nd_array, threshold=Config.laplacian_threshold, | |||
| return_variance=False): | |||
| gray = cv2.cvtColor(image_nd_array, cv2.COLOR_BGR2GRAY) | |||
| gray = cv2.GaussianBlur(gray, (3, 3), 0) | |||
| laplacian = cv2.Laplacian(gray, cv2.CV_64F, ksize=3, ) | |||
| std = cv2.meanStdDev(laplacian)[1][0][0] | |||
| variance = std ** 2 | |||
| if return_variance: | |||
| return variance >= threshold, variance | |||
| return variance >= threshold | |||
| class ImageAndSlidePatcher: | |||
| @classmethod | |||
| def _check_magnification_from_description(cls, tiff_address): | |||
| try: | |||
| tif_file_obj = tifffile.TiffFile(tiff_address) | |||
| image_description = tif_file_obj.pages.keyframe.tags["ImageDescription"].value | |||
| app_mag = int(re.findall("(AppMag = [0-9]+)", image_description)[0].split(" = ")[-1]) | |||
| return app_mag | |||
| except Exception as e: | |||
| return None | |||
| @classmethod | |||
| def _zarr_loader(cls, tiff_address, key=0): | |||
| image_zarr = tifffile.imread(tiff_address, aszarr=True, key=key, ) | |||
| zarr = ZarrObject.open(image_zarr, mode='r') | |||
| return zarr | |||
| @classmethod | |||
| def _jpeg_loader(cls, jpeg_address): | |||
| im = cv2.imread(jpeg_address) | |||
| return im | |||
| @classmethod | |||
| def _json_key_loader(cls, json_file_address, key=None): | |||
| with open(json_file_address, 'rb') as file: | |||
| json_dict = json.loads(file.read()) | |||
| if key: | |||
| return json_dict[key] | |||
| return json_dict | |||
| @classmethod | |||
| def _get_extension_from_path(cls, file_path): | |||
| return os_path.splitext(file_path)[-1] | |||
| @classmethod | |||
| def _get_file_name_from_path(cls, file_path): | |||
| return ".".join(os_path.split(file_path)[-1].split(".")[:-1]) | |||
| @classmethod | |||
| def _get_number_of_initial_frags(cls, zarr_object, frag_size=512, frag_overlap=0.1): | |||
| zarr_shape = zarr_object.shape | |||
| step_size = int(frag_size * (1 - frag_overlap)) | |||
| overlap_size = frag_size - step_size | |||
| w_range = list(range(0, ceil((zarr_shape[0] - overlap_size) / step_size) * step_size, step_size)) | |||
| h_range = list(range(0, ceil((zarr_shape[1] - overlap_size) / step_size) * step_size, step_size)) | |||
| return len(w_range) * len(h_range) | |||
| @classmethod | |||
| def _generate_raw_fragments_from_image_array_or_zarr(cls, image_object, frag_size=512, frag_overlap=0.1, | |||
| shuffle=True): | |||
| def frag_picker(w_pos, h_pos): | |||
| end_w, end_h = min(zarr_shape[0], w_pos + frag_size), min(zarr_shape[1], h_pos + frag_size) | |||
| start_w, start_h = end_w - frag_size, end_h - frag_size | |||
| return image_object[start_w:end_w, start_h: end_h], (start_w, start_h) | |||
| if image_object is None: | |||
| return None | |||
| zarr_shape = image_object.shape | |||
| step_size = int(frag_size * (1 - frag_overlap)) | |||
| overlap_size = frag_size - step_size | |||
| w_range = list(range(0, ceil((zarr_shape[0] - overlap_size) / step_size) * step_size, step_size)) | |||
| h_range = list(range(0, ceil((zarr_shape[1] - overlap_size) / step_size) * step_size, step_size)) | |||
| if shuffle: | |||
| pos_list = [None] * len(w_range) * len(h_range) | |||
| index = 0 | |||
| for w in w_range: | |||
| for h in h_range: | |||
| pos_list[index] = (w, h) | |||
| index += 1 | |||
| random.shuffle(pos_list) | |||
| for w, h in pos_list: | |||
| yield frag_picker(w, h) | |||
| else: | |||
| for w in w_range: | |||
| for h in h_range: | |||
| yield frag_picker(w, h) | |||
| @classmethod | |||
| def _filter_frag_from_generator(cls, frag_generator, filter_func_list, return_all_with_condition=False, | |||
| all_frag_count=None, output_file=None): | |||
| for next_test_item, frag_pos in tqdm(frag_generator, total=all_frag_count, file=output_file, | |||
| postfix="Filtering", position=0): | |||
| condition = True | |||
| for function in filter_func_list: | |||
| condition &= function(next_test_item) | |||
| if return_all_with_condition: | |||
| yield next_test_item, frag_pos, condition | |||
| elif condition: | |||
| # show_and_wait(frag) | |||
| yield next_test_item, frag_pos | |||
| @classmethod | |||
| def _get_json_and_image_address_of_directory(cls, directory_path, ignore_json=False): | |||
| image_formats = [".jpeg", ".tiff", ".jpg"] | |||
| json_format = ".json" | |||
| files = [f for f in listdir(directory_path) if isfile(join(directory_path, f))] | |||
| files.sort() | |||
| pairs = {} | |||
| for file_path in files: | |||
| file_path = join(directory_path, file_path) | |||
| file_name = cls._get_file_name_from_path(file_path) | |||
| pairs[file_name] = pairs.get(file_name, [None, None]) | |||
| if cls._get_extension_from_path(file_path) in image_formats: | |||
| pairs[file_name][1] = file_path | |||
| elif cls._get_extension_from_path(file_path) == json_format: | |||
| pairs[file_name][0] = file_path | |||
| if ignore_json: | |||
| return [value for key, value in pairs.values() if value is not None] | |||
| return [(key, value) for key, value in pairs.values() if key is not None and value is not None] | |||
| @staticmethod | |||
| def create_patch_dir_and_initialize_csv(database_path): | |||
| data_dir = os.path.join(database_path, "data") | |||
| patch_dir = os.path.join(database_path, "patches") | |||
| if not os.path.isdir(patch_dir): | |||
| os.mkdir(patch_dir) | |||
| label_csv_path = os.path.join(patch_dir, "patch_labels.csv") | |||
| csv_file = open(label_csv_path, "a+") | |||
| csv_writer = csv.writer(csv_file) | |||
| csv_file.seek(0) | |||
| if len(csv_file.read(100)) <= 0: | |||
| csv_writer.writerow(WebStainImage.sorted_json_keys()) | |||
| return data_dir, patch_dir, csv_writer, csv_file | |||
| @classmethod | |||
| def save_image_patches_and_update_csv(cls, thyroid_type, thyroid_desired_classes, csv_writer, web_details, | |||
| image_path, slide_patch_dir, slide_id): | |||
| csv_writer.writerow(list(web_details.values())) | |||
| if cls._get_extension_from_path(image_path) in [".tiff", ".tif", ".svs"]: | |||
| zarr_object = cls._zarr_loader(image_path) | |||
| generator = cls._generate_raw_fragments_from_image_array_or_zarr(zarr_object) | |||
| total_counts = cls._get_number_of_initial_frags(zarr_object=zarr_object) | |||
| else: | |||
| jpeg_image = cls._jpeg_loader(image_path) | |||
| jpeg_image = cls.ask_image_scale_and_rescale(jpeg_image) | |||
| generator = cls._generate_raw_fragments_from_image_array_or_zarr(jpeg_image) | |||
| total_counts = cls._get_number_of_initial_frags(zarr_object=jpeg_image) | |||
| if generator is None: | |||
| return | |||
| if not os.path.isdir(slide_patch_dir): | |||
| os.mkdir(slide_patch_dir) | |||
| filters = [ThyroidFragmentFilters.func_laplacian_threshold(Config.laplacian_threshold)] | |||
| fragment_id = 0 | |||
| slide_progress_file_path = os.path.join(slide_patch_dir, "progress.txt") | |||
| with open(slide_progress_file_path, "w") as file: | |||
| for fragment, frag_pos in cls._filter_frag_from_generator(generator, filters, all_frag_count=total_counts, | |||
| output_file=file): | |||
| fragment_file_path = os.path.join(slide_patch_dir, f"{slide_id}-{fragment_id}.jpeg") | |||
| cv2.imwrite(fragment_file_path, fragment) | |||
| fragment_id += 1 | |||
| return fragment_id, total_counts | |||
| @classmethod | |||
| def save_patches_in_folders(cls, database_directory, dataset_dir=None): | |||
| thyroid_desired_classes = [ThyroidCancerLevel.MALIGNANT, ThyroidCancerLevel.BENIGN] | |||
| datasets_dirs = os.listdir(database_directory) if dataset_dir is None else [dataset_dir] | |||
| list_dir = [os.path.join(database_directory, o) for o in datasets_dirs | |||
| if os.path.isdir(os.path.join(database_directory, o, "data"))] | |||
| for database_path in list_dir: | |||
| print("database path: ", database_path) | |||
| data_dir, patch_dir, csv_writer, csv_file = cls.create_patch_dir_and_initialize_csv(database_path) | |||
| for json_path, image_path in cls._get_json_and_image_address_of_directory(data_dir): | |||
| print("image path: ", image_path) | |||
| file_name = cls._get_file_name_from_path(image_path) | |||
| slide_id = str(hash(file_name)) | |||
| slide_patch_dir = os.path.join(patch_dir, slide_id) | |||
| if os.path.isdir(slide_patch_dir): | |||
| """ | |||
| it has already been patched | |||
| """ | |||
| continue | |||
| web_details = cls._json_key_loader(json_path) | |||
| web_details["image_id"] = slide_id | |||
| web_label = web_details["image_web_label"] | |||
| thyroid_type = ThyroidCancerLevel.get_thyroid_level_from_diagnosis_label(web_label) | |||
| web_details["image_class_label"] = thyroid_type.value[1] | |||
| cls.save_image_patches_and_update_csv(thyroid_type, thyroid_desired_classes, csv_writer, web_details, | |||
| image_path, slide_patch_dir, slide_id) | |||
| csv_file.close() | |||
| @classmethod | |||
| def save_papsociaty_patch(cls, database_path): | |||
| thyroid_desired_classes = [ThyroidCancerLevel.MALIGNANT, ThyroidCancerLevel.BENIGN] | |||
| print("database path: ", database_path) | |||
| for folder in Config.class_names: | |||
| group_path = os.path.join(database_path, "data", folder) | |||
| data_dir, patch_dir, csv_writer, csv_file = cls.create_patch_dir_and_initialize_csv(database_path) | |||
| for image_path in cls._get_json_and_image_address_of_directory(group_path, ignore_json=True): | |||
| print("image path: ", image_path) | |||
| file_name = cls._get_file_name_from_path(image_path) | |||
| slide_id = str(hash(file_name)) | |||
| slide_patch_dir = os.path.join(patch_dir, slide_id) | |||
| if os.path.isdir(slide_patch_dir): | |||
| """ | |||
| it has already been patched | |||
| """ | |||
| continue | |||
| web_label = folder + "-" + file_name | |||
| thyroid_type = ThyroidCancerLevel.get_thyroid_level_from_diagnosis_label(web_label) | |||
| web_details = {"database_name": "PapSociety", | |||
| "image_id": slide_id, | |||
| "image_web_label": web_label, | |||
| "image_class_label": thyroid_type.value[1], | |||
| "report": None, | |||
| "stain_type": "UNKNOWN", | |||
| "is_wsi": False} | |||
| cls.save_image_patches_and_update_csv(thyroid_type, thyroid_desired_classes, csv_writer, web_details, | |||
| image_path, slide_patch_dir, slide_id) | |||
| csv_file.close() | |||
| @classmethod | |||
| def ask_image_scale_and_rescale(cls, image): | |||
| # small: S, Medium: M, Large:L | |||
| show_and_wait(image) | |||
| res = input("how much plus pointer fill a cell(float, i:ignore, else repeat): ") | |||
| try: | |||
| if res == "i": | |||
| return None | |||
| elif re.match("[0-9]+(.[0-9]*)?", res): | |||
| scale = 1 / float(res) | |||
| return cv2.resize(image, (0, 0), fx=scale, fy=scale) | |||
| else: | |||
| return cls.ask_image_scale_and_rescale(image) | |||
| except Exception as e: | |||
| print(e) | |||
| return cls.ask_image_scale_and_rescale(image) | |||
| if __name__ == '__main__': | |||
| random.seed(1) | |||
| database_directory = "./" | |||
| # ImageAndSlidePatcher.save_patches_in_folders(database_directory, dataset_dir=["stanford_tissue_microarray"]) | |||
| # ImageAndSlidePatcher.save_papsociaty_patch(os.path.join(database_directory, "papsociaty")) | |||
| @@ -0,0 +1,60 @@ | |||
| import concurrent.futures | |||
| import os | |||
| import pathlib | |||
| from tqdm import tqdm | |||
| from config import Config | |||
| from image_patcher import ImageAndSlidePatcher | |||
| from national_cancer_institute.read_xml_file import get_slide_info_from_bcr_xml | |||
| def save_national_cancer_institute_patch(database_path): | |||
| def patch_image(image_path): | |||
| try: | |||
| image_path = str(image_path) | |||
| print() | |||
| print("image path: ", image_path) | |||
| file_name = ImageAndSlidePatcher._get_file_name_from_path(image_path) | |||
| slide_id = file_name.split(".")[0] | |||
| slide_patch_dir = os.path.join(patch_dir, slide_id) | |||
| if os.path.isdir(slide_patch_dir): | |||
| print("it has already been patched") | |||
| return | |||
| web_label = slide_infos.get(slide_id, None) | |||
| if web_label is None: | |||
| print("Ignored") | |||
| return | |||
| web_details = {"database_name": "NationalCancerInstitute", | |||
| "image_id": slide_id, | |||
| "image_web_label": web_label, | |||
| "image_class_label": web_label, | |||
| "report": None, | |||
| "stain_type": "H&E", | |||
| "is_wsi": True} | |||
| return ImageAndSlidePatcher.save_image_patches_and_update_csv(web_label, None, csv_writer, web_details, | |||
| image_path, slide_patch_dir, slide_id) | |||
| except Exception as e: | |||
| print(e) | |||
| data_dir = os.path.join(database_path, "data") | |||
| slide_infos = {} | |||
| for xml_path in pathlib.Path(data_dir).glob("**/*.xml"): | |||
| slide_infos.update(get_slide_info_from_bcr_xml(str(xml_path))) | |||
| data_dir, patch_dir, csv_writer, csv_file = ImageAndSlidePatcher.create_patch_dir_and_initialize_csv(database_path) | |||
| csv_file.flush() | |||
| with concurrent.futures.ThreadPoolExecutor(max_workers=Config.workers) as executor: | |||
| image_paths = pathlib.Path(data_dir).glob("**/*.svs") | |||
| image_paths = [i for i in image_paths] | |||
| print() | |||
| for res in tqdm(executor.map(patch_image, image_paths), total=len(image_paths)): | |||
| if res: | |||
| csv_file.flush() | |||
| csv_file.flush() | |||
| if __name__ == '__main__': | |||
| database_directory = "../" | |||
| save_national_cancer_institute_patch(os.path.join(database_directory, "national_cancer_institute")) | |||
| @@ -0,0 +1,4 @@ | |||
| export PYTHONPATH="${PYTHONPATH}:../../"; | |||
| export PYTHONPATH="${PYTHONPATH}:../"; | |||
| export PYTHONPATH="${PYTHONPATH}:./"; | |||
| python bio_atlas_patcher.py; | |||
| @@ -0,0 +1,4 @@ | |||
| export PYTHONPATH="${PYTHONPATH}:../../"; | |||
| export PYTHONPATH="${PYTHONPATH}:../"; | |||
| export PYTHONPATH="${PYTHONPATH}:./"; | |||
| python image_patcher.py; | |||
| @@ -0,0 +1,4 @@ | |||
| export PYTHONPATH="${PYTHONPATH}:../../"; | |||
| export PYTHONPATH="${PYTHONPATH}:../"; | |||
| export PYTHONPATH="${PYTHONPATH}:./"; | |||
| python national_cancer_patcher.py; | |||
| @@ -0,0 +1,280 @@ | |||
| import concurrent.futures | |||
| import math | |||
| import cv2 | |||
| import matplotlib.pyplot as plt | |||
| import numpy as np | |||
| from config import Config | |||
| from database_crawlers.image_patcher.image_patcher import ImageAndSlidePatcher, ThyroidFragmentFilters | |||
| from utils import check_if_generator_is_empty | |||
| def imul(a, b): | |||
| return math.ceil(a * b) | |||
| def calculate_acc_and_sensitivity(image_path, zarr_loader_mask, zarr_loader, frag_generator, scaled_masked_image, | |||
| generated_mask_scale, laplacian_threshold, slide_patch_size, | |||
| save_generated_image=True): | |||
| def process_frag(args): | |||
| next_test_item, frag_pos, condition = args | |||
| frag_shape = next_test_item.shape | |||
| mask_scaled_frag_shape = list((imul(frag_shape[i], mask_scale) for i in range(2))) | |||
| mask_frag_pos = list((imul(frag_pos[i], mask_scale) for i in range(2))) | |||
| mask_w1, mask_w2 = mask_frag_pos[0], mask_frag_pos[0] + mask_scaled_frag_shape[0] | |||
| mask_h1, mask_h2 = mask_frag_pos[1], mask_frag_pos[1] + mask_scaled_frag_shape[1] | |||
| mask_item = zarr_loader_mask[mask_w1:mask_w2, mask_h1:mask_h2] | |||
| mask_item = cv2.resize(mask_item, dsize=(0, 0), fx=1 / mask_scale, fy=1 / mask_scale) | |||
| fragment_size = next_test_item.shape | |||
| scaled_frag_size = (imul(fragment_size[0], generated_mask_scale), imul(fragment_size[1], generated_mask_scale)) | |||
| scaled_frag = cv2.resize(next_test_item[:, :, :3], dsize=scaled_frag_size, interpolation=cv2.INTER_CUBIC) | |||
| scaled_frag_size = scaled_frag.shape | |||
| if next_test_item is not None: | |||
| mask_item = mask_item[:, :, 0] | |||
| masked = mask_item.mean() > 256 * .3 | |||
| if condition and masked: | |||
| background_dict["TP"] += 1 | |||
| elif condition and not masked: | |||
| background_dict["FP"] += 1 | |||
| elif not condition and masked: | |||
| background_dict["FN"] += 1 | |||
| # show_and_wait(next_test_item) | |||
| # show_and_wait(mask_item) | |||
| elif not condition and not masked: | |||
| background_dict["TN"] += 1 | |||
| else: | |||
| return None | |||
| if not condition: | |||
| # background patches get dark | |||
| scaled_frag = (scaled_frag * 0.3).astype(np.int8) | |||
| scaled_pos = list((imul(frag_pos[i], generated_mask_scale) for i in range(2))) | |||
| try: | |||
| mask_g_w1, mask_g_w2 = scaled_pos[0], scaled_pos[0] + scaled_frag_size[0] | |||
| mask_g_h1, mask_g_h2 = scaled_pos[1], scaled_pos[1] + scaled_frag_size[1] | |||
| scaled_masked_image[mask_g_w1:mask_g_w2, mask_g_h1:mask_g_h2] = scaled_frag | |||
| except Exception as e: | |||
| print(e) | |||
| return True | |||
| mask_scale = zarr_loader_mask.shape[0] / zarr_loader.shape[0] | |||
| filter_func_list = [ThyroidFragmentFilters.func_laplacian_threshold(laplacian_threshold)] | |||
| background_dict = {"TP": 0, "FP": 0, "TN": 0, "FN": 0} | |||
| total_frags = slide_patch_size if slide_patch_size else ImageAndSlidePatcher._get_number_of_initial_frags( | |||
| zarr_loader) | |||
| frag_filtered = ImageAndSlidePatcher._filter_frag_from_generator(frag_generator, filter_func_list, | |||
| return_all_with_condition=True, | |||
| all_frag_count=total_frags) | |||
| with concurrent.futures.ThreadPoolExecutor(max_workers=Config.workers) as executor: | |||
| futures = [] | |||
| patch_count = 0 | |||
| for args in frag_filtered: | |||
| patch_count += 1 | |||
| future_res = executor.submit(process_frag, args) | |||
| futures.append(future_res) | |||
| if len(futures) >= Config.workers or patch_count == slide_patch_size: | |||
| for future in concurrent.futures.as_completed(futures): | |||
| future.result() | |||
| futures = [] | |||
| if patch_count == slide_patch_size: | |||
| break | |||
| if save_generated_image: | |||
| masked_image_path = ".".join(image_path.split(".")[:-1]) + "_generated_mask.jpg" | |||
| cv2.imwrite(masked_image_path, scaled_masked_image) | |||
| return background_dict | |||
| def score_calculator(accuracy, specificity, acc_w=0.75): | |||
| return accuracy * acc_w + specificity * (1 - acc_w) | |||
| def get_zarr_loaders_and_generators(): | |||
| zarr_loaders_and_generators = [] | |||
| for _img_mask_path, _img_path in image_lists: | |||
| _zarr_loader_mask = ImageAndSlidePatcher._zarr_loader(_img_mask_path) | |||
| _zarr_loader = ImageAndSlidePatcher._zarr_loader(_img_path) | |||
| _frag_generator = ImageAndSlidePatcher._generate_raw_fragments_from_image_array_or_zarr(_zarr_loader, | |||
| shuffle=True) | |||
| _zarr_shape = _zarr_loader.shape | |||
| _generated_mask_scale = 10 / 512 | |||
| _scaled_zarr_shape = ( | |||
| imul(_zarr_shape[0], _generated_mask_scale) + 5, imul(_zarr_shape[1], _generated_mask_scale) + 5, 3) | |||
| _scaled_masked_image = np.zeros(_scaled_zarr_shape) | |||
| zarr_loaders_and_generators.append([ | |||
| _zarr_loader_mask, _zarr_loader, _frag_generator, _scaled_masked_image, _generated_mask_scale | |||
| ]) | |||
| return zarr_loaders_and_generators | |||
| def update_and_find_best_threshold(initial_thresh, learn_threshold_and_log_cf_matrix_per_patch=True): | |||
| initial_threshold_jump_size_const = 120 | |||
| threshold_jump_size = initial_threshold_jump_size_const | |||
| decay_const = 0.85 | |||
| decay_count = 0 | |||
| threshold_jump_increase = 1 | |||
| threshold_score = None | |||
| # update after initial run | |||
| laplacian_threshold = initial_thresh | |||
| threshold_history = [] | |||
| score_history = [] | |||
| for epoch in range((Config.n_epoch_for_image_patcher if learn_threshold_and_log_cf_matrix_per_patch else 1)): | |||
| print("New Epoch") | |||
| zarr_loaders_and_generators = get_zarr_loaders_and_generators() | |||
| whole_background_dict_per_slide = [{} for i in range(len(zarr_loaders_and_generators))] | |||
| whole_background_dict = {} | |||
| while sum([item is not None for item in zarr_loaders_and_generators]) >= 1: | |||
| none_empty_generators = [i for i in range(len(zarr_loaders_and_generators)) if | |||
| zarr_loaders_and_generators[i] is not None] | |||
| if learn_threshold_and_log_cf_matrix_per_patch: | |||
| whole_background_dict = {} | |||
| if len(none_empty_generators) >= 6 or not learn_threshold_and_log_cf_matrix_per_patch: | |||
| for slide_pick in none_empty_generators: | |||
| img_path = image_lists[slide_pick][1] | |||
| zarr_loader_mask = zarr_loaders_and_generators[slide_pick][0] | |||
| zarr_loader = zarr_loaders_and_generators[slide_pick][1] | |||
| frag_generator = zarr_loaders_and_generators[slide_pick][2] | |||
| generated_scaled_mask_image = zarr_loaders_and_generators[slide_pick][3] | |||
| generated_mask_scale = zarr_loaders_and_generators[slide_pick][4] | |||
| group_dict = calculate_acc_and_sensitivity(img_path, | |||
| zarr_loader_mask, | |||
| zarr_loader, | |||
| frag_generator, | |||
| generated_scaled_mask_image, | |||
| generated_mask_scale, | |||
| laplacian_threshold, | |||
| slide_patch_size=2000, | |||
| save_generated_image=not learn_threshold_and_log_cf_matrix_per_patch) | |||
| for i in range(len(zarr_loaders_and_generators)): | |||
| if zarr_loaders_and_generators[i]: | |||
| generator = check_if_generator_is_empty(zarr_loaders_and_generators[i][2]) | |||
| if generator: | |||
| zarr_loaders_and_generators[i][2] = generator | |||
| else: | |||
| zarr_loaders_and_generators[i] = None | |||
| for key, value in group_dict.items(): | |||
| whole_background_dict[key] = whole_background_dict.get(key, 0) + value | |||
| whole_background_dict_per_slide[slide_pick][key] = whole_background_dict_per_slide[ | |||
| slide_pick].get(key, 0) + value | |||
| if learn_threshold_and_log_cf_matrix_per_patch: | |||
| e = .000001 | |||
| total_preds = (sum(list(whole_background_dict.values())) + e) | |||
| acc = (whole_background_dict["TP"] + whole_background_dict["TN"]) / total_preds | |||
| positive_preds = (whole_background_dict["TP"] + whole_background_dict["FP"] + e) | |||
| precision = whole_background_dict["TP"] / positive_preds | |||
| next_score = score_calculator(acc, precision) | |||
| if threshold_score is None: | |||
| threshold_score = next_score | |||
| else: | |||
| threshold_history.append(laplacian_threshold) | |||
| score_history.append(next_score) | |||
| if next_score > threshold_score: | |||
| threshold_score = next_score | |||
| laplacian_threshold += threshold_jump_increase * threshold_jump_size | |||
| elif next_score <= threshold_score: | |||
| threshold_score = next_score | |||
| threshold_jump_increase *= -1 | |||
| threshold_jump_size *= decay_const | |||
| laplacian_threshold += threshold_jump_increase * threshold_jump_size | |||
| decay_count += 1 | |||
| save_threshold_and_score_chart(threshold_history, score_history) | |||
| acc = round(acc, 3) | |||
| precision = round(precision, 3) | |||
| threshold_score_rounded = round(threshold_score, 3) | |||
| print( | |||
| f"acc:{acc},precision:{precision},score:{threshold_score_rounded},table:{whole_background_dict}" + | |||
| f"thresh:{laplacian_threshold},jump_size:{threshold_jump_size}") | |||
| else: | |||
| print(f"table:{whole_background_dict},table_per_slide:{whole_background_dict_per_slide}" + | |||
| f"threshold:{laplacian_threshold},jump_size:{threshold_jump_size}") | |||
| else: | |||
| break | |||
| return laplacian_threshold | |||
| def save_threshold_and_score_chart(threshold_history, score_history): | |||
| fig_save_path = "laplacian_threshold_history_chart.jpeg" | |||
| plt.plot(range(len(threshold_history)), threshold_history) | |||
| plt.xlabel('Batch') | |||
| plt.ylabel('Laplacian threshold') | |||
| plt.savefig(fig_save_path) | |||
| plt.clf() | |||
| fig_save_path = "laplacian_threshold_score_history_chart.jpeg" | |||
| plt.plot(range(len(score_history)), score_history) | |||
| plt.xlabel('Batch') | |||
| plt.ylabel('Objective function - Sore') | |||
| plt.savefig(fig_save_path) | |||
| plt.clf() | |||
| if __name__ == '__main__': | |||
| image_lists = [ | |||
| ( # "('0', '100', '0')" | |||
| "./TCGA-BJ-A3F0-01A-01-TSA.728CE583-95BE-462B-AFDF-FC0B228DF3DE__3_masked.tiff", | |||
| "./TCGA-BJ-A3F0-01A-01-TSA.728CE583-95BE-462B-AFDF-FC0B228DF3DE__3.svs" | |||
| ), | |||
| ( # "('0', '100', '0')" | |||
| "./TCGA-DJ-A1QG-01A-01-TSA.04c62c21-dd45-49ea-a74f-53822defe097__2000_masked.tiff", | |||
| "./TCGA-DJ-A1QG-01A-01-TSA.04c62c21-dd45-49ea-a74f-53822defe097__2000.svs" | |||
| ), | |||
| # ( # "('0', '100', '0')" | |||
| # "./TCGA-EL-A3ZQ-01A-01-TS1.344610D2-AB50-41C6-916E-FF0F08940BF1__2000_masked.tiff", | |||
| # "./TCGA-EL-A3ZQ-01A-01-TS1.344610D2-AB50-41C6-916E-FF0F08940BF1__2000.svs" | |||
| # ), | |||
| ( # "('45', '55', '0')" | |||
| "./TCGA-ET-A39N-01A-01-TSA.C38FCE19-9558-4035-9F0B-AD05B9BE321D___198_masked.tiff", | |||
| "./TCGA-ET-A39N-01A-01-TSA.C38FCE19-9558-4035-9F0B-AD05B9BE321D___198.svs" | |||
| ), | |||
| # ( # "('0', '40', '60')" | |||
| # "./TCGA-J8-A42S-01A-01-TSA.7B80CBEB-7B85-417E-AA0C-11C79DE40250__0_masked.tiff", | |||
| # "./TCGA-J8-A42S-01A-01-TSA.7B80CBEB-7B85-417E-AA0C-11C79DE40250__0.svs" | |||
| # ), | |||
| ( # "('0', '90', '10')" | |||
| "./TCGA-ET-A39O-01A-01-TSA.3829C900-7597-4EA9-AFC7-AA238221CE69_7000_masked.tiff", | |||
| "./TCGA-ET-A39O-01A-01-TSA.3829C900-7597-4EA9-AFC7-AA238221CE69_7000.svs" | |||
| ), | |||
| ( # "('100', '0', '0')" | |||
| "./TCGA-EL-A4K7-11A-01-TS1.C08B59AA-87DF-4ABB-8B70-25FEF9893C7F__70_masked.tiff", | |||
| "./TCGA-EL-A4K7-11A-01-TS1.C08B59AA-87DF-4ABB-8B70-25FEF9893C7F__70.svs" | |||
| ), | |||
| ( # "('100', '0', '0')" | |||
| "./TCGA-EL-A3TB-11A-01-TS1.6E0966C9-1552-4B30-9008-8ACF737CA8C3__2000_masked.tiff", | |||
| "./TCGA-EL-A3TB-11A-01-TS1.6E0966C9-1552-4B30-9008-8ACF737CA8C3__2000.svs" | |||
| ), | |||
| ] | |||
| learned_threshold = update_and_find_best_threshold(500, learn_threshold_and_log_cf_matrix_per_patch=True) | |||
| update_and_find_best_threshold(learned_threshold, learn_threshold_and_log_cf_matrix_per_patch=False) | |||
| # Start with 500 with jump size 120 and decay 0.85 | |||
| # table:{'TP': 15018, 'FP': 412, 'TN': 66898, 'FN': 2389}, | |||
| # table_per_slide:[ | |||
| # {'TP': 460, 'FP': 0, 'TN': 19618, 'FN': 1426}, | |||
| # {'TP': 4624, 'FP': 126, 'TN': 14100, 'FN': 226}, | |||
| # {'TP': 1138, 'FP': 4, 'TN': 6671, 'FN': 492}, | |||
| # {'TP': 7615, 'FP': 92, 'TN': 20871, 'FN': 234}, | |||
| # {'TP': 78, 'FP': 18, 'TN': 1880, 'FN': 4}, | |||
| # {'TP': 1103, 'FP': 172, 'TN': 3758, 'FN': 7} | |||
| # ] | |||
| # threshold:298.86314585743395,jump_size:120 | |||
| @@ -0,0 +1,6 @@ | |||
| export PYTHONPATH="${PYTHONPATH}:../../../../"; | |||
| export PYTHONPATH="${PYTHONPATH}:../../../"; | |||
| export PYTHONPATH="${PYTHONPATH}:../../"; | |||
| export PYTHONPATH="${PYTHONPATH}:../"; | |||
| export PYTHONPATH="${PYTHONPATH}:./"; | |||
| python evaluate_image_patcher_and_visualize.py; | |||
| @@ -0,0 +1,4 @@ | |||
| export PYTHONPATH="${PYTHONPATH}:../../"; | |||
| export PYTHONPATH="${PYTHONPATH}:../"; | |||
| export PYTHONPATH="${PYTHONPATH}:./"; | |||
| python patcher_distribution.py; | |||
| @@ -0,0 +1,18 @@ | |||
| import pathlib | |||
| import matplotlib.pyplot as plt | |||
| from national_cancer_institute.read_xml_file import get_slide_info_from_bcr_xml | |||
| if __name__ == '__main__': | |||
| data_dir = "data/" | |||
| slide_infos = {} | |||
| for xml_path in pathlib.Path(data_dir).glob("**/*.xml"): | |||
| slide_infos.update(get_slide_info_from_bcr_xml(str(xml_path))) | |||
| cell_percents = [int(item[1]) for item in slide_infos.values() if int(item[2]) == 0] | |||
| print("tumor:", len([i for i in cell_percents if i == 100])) | |||
| print("normal", len([i for i in cell_percents if i == 0])) | |||
| print([i for i in cell_percents if i != 0 and i != 100]) | |||
| print(len(cell_percents)) | |||
| plt.hist(cell_percents, bins=150) | |||
| plt.savefig("tumor_cell_distribution.jpeg") | |||
| @@ -0,0 +1,65 @@ | |||
| import concurrent.futures | |||
| import os | |||
| import pathlib | |||
| import matplotlib.pyplot as plt | |||
| from tqdm import tqdm | |||
| from config import Config | |||
| from image_patcher import ImageAndSlidePatcher | |||
| def save_patch_distribution(database_path): | |||
| def patch_image(image_path): | |||
| try: | |||
| image_path = str(image_path) | |||
| file_name = ImageAndSlidePatcher._get_file_name_from_path(image_path) | |||
| slide_id = file_name.split(".")[0] | |||
| slide_patch_dir = os.path.join(patch_dir, slide_id) | |||
| if ImageAndSlidePatcher._get_extension_from_path(image_path) in [".tiff", ".tif", ".svs"]: | |||
| zarr_object = ImageAndSlidePatcher._zarr_loader(image_path) | |||
| total_counts = ImageAndSlidePatcher._get_number_of_initial_frags(zarr_object=zarr_object) | |||
| else: | |||
| jpeg_image = ImageAndSlidePatcher._jpeg_loader(image_path) | |||
| jpeg_image = ImageAndSlidePatcher.ask_image_scale_and_rescale(jpeg_image) | |||
| total_counts = ImageAndSlidePatcher._get_number_of_initial_frags(zarr_object=jpeg_image) | |||
| if os.path.exists(slide_patch_dir): | |||
| fragment_id = len([i for i in pathlib.Path(slide_patch_dir).glob("*.jpeg")]) | |||
| return fragment_id, total_counts | |||
| except Exception as e: | |||
| print("error") | |||
| print(e) | |||
| raise e | |||
| res_patch_counts = [] | |||
| data_dir = os.path.join(database_path, "data") | |||
| patch_dir = os.path.join(database_path, "patches") | |||
| with concurrent.futures.ThreadPoolExecutor(max_workers=Config.workers) as executor: | |||
| image_paths = pathlib.Path(data_dir).glob("**/*.svs") | |||
| image_paths = [i for i in image_paths] | |||
| print() | |||
| for res in tqdm(executor.map(patch_image, image_paths), total=len(image_paths)): | |||
| if res: | |||
| frags, total = res | |||
| res_patch_counts.append(res) | |||
| print(res_patch_counts) | |||
| plt.hist([i[0] for i in res_patch_counts], bins=100) | |||
| plt.xlabel("Patch per slide") | |||
| plt.ylabel("Frequency") | |||
| plt.savefig("patch_distribution.jpeg") | |||
| plt.clf() | |||
| plt.hist([round(i[0] / (i[1] + 0.00001), 5) * 100 for i in res_patch_counts], bins=100) | |||
| plt.xlabel("Patch per slide percent") | |||
| plt.ylabel("Frequency") | |||
| plt.savefig("patch_percent_distribution.jpeg") | |||
| plt.clf() | |||
| if __name__ == '__main__': | |||
| database_directory = "../" | |||
| save_patch_distribution(os.path.join(database_directory, "national_cancer_institute")) | |||
| @@ -0,0 +1,26 @@ | |||
| from xml.dom import minidom | |||
| def get_slide_info_from_bcr_xml(xml_path): | |||
| file = minidom.parse(xml_path) | |||
| patient = file.childNodes[0].getElementsByTagName("bio:patient")[0] | |||
| data_dict = {} | |||
| try: | |||
| for i in range(10): | |||
| percent_tumor_cells = patient.getElementsByTagName("bio:percent_tumor_cells")[i].childNodes[ | |||
| 0].data.strip() | |||
| percent_normal_cells = patient.getElementsByTagName("bio:percent_normal_cells")[i].childNodes[ | |||
| 0].data.strip() | |||
| percent_stormal_cells = patient.getElementsByTagName("bio:percent_stromal_cells")[i].childNodes[ | |||
| 0].data.strip() | |||
| slide_barcode = patient.getElementsByTagName("shared:bcr_slide_barcode")[i].childNodes[0].data.strip() | |||
| data_dict[slide_barcode] = (percent_normal_cells, percent_tumor_cells, percent_stormal_cells) | |||
| except Exception as e: | |||
| pass | |||
| return data_dict | |||
| if __name__ == '__main__': | |||
| path = "../national_cancer_institute/data/1aea8f2a-f809-4f19-bed3-1365e9aab33b/nationwidechildrens.org_biospecimen.TCGA-BJ-A28X.xml" | |||
| res = get_slide_info_from_bcr_xml(path) | |||
| print(res) | |||
| @@ -0,0 +1,4 @@ | |||
| export PYTHONPATH="${PYTHONPATH}:../../"; | |||
| export PYTHONPATH="${PYTHONPATH}:../"; | |||
| export PYTHONPATH="${PYTHONPATH}:./"; | |||
| python cells_chart.py; | |||
| @@ -0,0 +1,5 @@ | |||
| export PYTHONPATH="${PYTHONPATH}:../../"; | |||
| export PYTHONPATH="${PYTHONPATH}:../"; | |||
| export PYTHONPATH="${PYTHONPATH}:../image_patcher"; | |||
| export PYTHONPATH="${PYTHONPATH}:./"; | |||
| python patch_distribution.py; | |||
| @@ -0,0 +1,29 @@ | |||
| 1811210076455461803,BENIGN-BTNpap-hyperplZC04-10370x4_0 | |||
| 7845783054062606488,BENIGN-Dan_Colloid1_0 | |||
| 7637345021064072354,BENIGN-FTH468BTN_sheets_follicles_1_0 | |||
| 814792220375115888,BENIGN-GD40T | |||
| 2888555316355101926,BENIGN-gd-fn20_0 | |||
| -8312464544528256569,BENIGN-gd10p_0 | |||
| -2254510488499374008,BENIGN-gd20g_0 | |||
| 2271447961045108683,MALIGNANT-ATC20L_0 | |||
| 7269337178939971574,MALIGNANT-ATC20b_0 | |||
| -9073528708751422131,MALIGNANT-ATC40BB_0 | |||
| -8571937763202005072,MALIGNANT-ATC40FC_0 | |||
| -8610450256188951874,MALIGNANT-ATC40J_0 | |||
| -7854679564093375561,MALIGNANT-ATC40p_0 | |||
| -4977783033606377395,MALIGNANT-ATC5A_0 | |||
| 1342853015262631578,MALIGNANT-Atc63w_0 | |||
| 431837691807971266,MALIGNANT-C03-54313A_0 | |||
| 8004646368797684873,MALIGNANT-C03-54313B_0 | |||
| 8041832687277297518,MALIGNANT-C03-54313C_0 | |||
| 6449521961463025237,MALIGNANT-C03-54313D_0 | |||
| -8688277356782858138,MALIGNANT-C03-54313E_0 | |||
| -9068734717515564721,MALIGNANT-C03-54313F_0 | |||
| -5752120463987418399,MALIGNANT-C03-54313G_0 | |||
| 8889530878367993817,MALIGNANT-Ed82C_1_0 | |||
| 2934951614446666978,"MALIGNANT-Follicular_neoplasm2,_low_power,_confirmed_FVPTC_DQ_SM_0" | |||
| -1541914789037593200,"MALIGNANT-Pap_CA,_excellent_inclusion,_DQ_0" | |||
| -1717557305576530323,"MALIGNANT-Pap_CA,_powdery_nuclei,_small_nucleoli_and_small_inclusion,_PAP_hp_0" | |||
| 947862089311894766,"MALIGNANT-Papillary_CA,_giant_cell,_DQ_lp_0" | |||
| -8520432463383972777,"MALIGNANT-Papillary_CA,_giant_cell_and_tumor_sheets,_PAP_lp2_0" | |||
| -4759582710547943524,"MALIGNANT-Papillary_CA,_high_cellularity_3,_PAP_lp_0" | |||
| @@ -0,0 +1,13 @@ | |||
| import os | |||
| import shutil | |||
| if __name__ == '__main__': | |||
| duplicate_info_file_path = "duplicate_image.txt" | |||
| with open(duplicate_info_file_path, "r") as file: | |||
| for line in file.readlines(): | |||
| folder_id = line.split(",")[0] | |||
| folder_path = os.path.join("./patches", folder_id) | |||
| if os.path.exists(folder_path): | |||
| shutil.rmtree(folder_path) | |||
| print("deleted") | |||
| else: | |||
| print("no") | |||
| @@ -0,0 +1,64 @@ | |||
| import json | |||
| from urllib.parse import urlparse | |||
| from urllib.request import urlretrieve | |||
| import requests | |||
| from bs4 import BeautifulSoup | |||
| from database_crawlers.web_stain_sample import WebStainImage, StainType | |||
| class StanfordTissueMicroArrayStainSample(WebStainImage): | |||
| def __init__(self, database_name, image_id, image_web_label, report, stain_type, is_wsi): | |||
| super().__init__(database_name, image_id, image_web_label, report, stain_type, is_wsi) | |||
| def get_slide_view_url(self): | |||
| return f"https://storage.googleapis.com/jpg.tma.im/{self.image_id}" | |||
| def get_file_name(self): | |||
| image_raw_id = self.image_id.replace("/", "_") | |||
| image_raw_id = ".".join(image_raw_id.split(".")[:len(image_raw_id.split(".")) - 1]) | |||
| return self.save_path + image_raw_id | |||
| def get_relative_image_path(self): | |||
| return self.get_file_name() + ".jpeg" | |||
| def get_relative_json_path(self): | |||
| return self.get_file_name() + ".json" | |||
| def crawl_image_save_jpeg(self): | |||
| urlretrieve(self.get_slide_view_url(), self.get_relative_image_path()) | |||
| json_object = json.dumps(self.to_json()) | |||
| with open(self.get_relative_json_path(), "w") as outfile: | |||
| outfile.write(json_object) | |||
| class StanfordTissueMicroArraySlideProvider: | |||
| page_link = "https://tma.im/cgi-bin/selectImages.pl?organ=thyroid" | |||
| database_name = "StanfordTissueMicroArray" | |||
| stain_type = StainType.UNKNOWN | |||
| is_wsi = False | |||
| @classmethod | |||
| def get_web_stain_samples(cls): | |||
| payload = {'250 small images': '250 small images'} | |||
| files = [] | |||
| headers = { | |||
| 'Cookie': 'DAD_ATTEMPTS=0; DAD_SID=36d77eb69e009b1cf1ebc9c3d7866546; DAD_USERID=WORLD' | |||
| } | |||
| html_text = requests.post(cls.page_link, files=files, headers=headers, data=payload).content.decode("utf-8") | |||
| soup = BeautifulSoup(html_text, 'html.parser') | |||
| search_results = soup.find_all("div", {"class": "iDiv0", "style": "width: 86px; height: 260px;"}) | |||
| for result_item in search_results: | |||
| image_url = result_item.find("a", {"target": "_blank"}).attrs['href'] | |||
| image_id = "/".join(urlparse(image_url).path.strip("/").split("/")[1:]) | |||
| image_web_label = list(result_item.find_all("p", {"class": "iDiv1"}))[-2].text | |||
| yield StanfordTissueMicroArrayStainSample(cls.database_name, image_id, image_web_label, None, | |||
| cls.stain_type, cls.is_wsi) | |||
| if __name__ == '__main__': | |||
| for slide in StanfordTissueMicroArraySlideProvider.get_web_stain_samples(): | |||
| print(slide.image_id, slide.image_web_label, slide.get_slide_view_url()) | |||
| slide.crawl_image_save_jpeg() | |||
| @@ -0,0 +1,87 @@ | |||
| import concurrent.futures | |||
| import concurrent.futures | |||
| import time | |||
| from urllib.error import HTTPError | |||
| from urllib.request import urlretrieve | |||
| from torch.utils.data import IterableDataset | |||
| from tqdm import tqdm | |||
| from config import Config | |||
| def find_in_log_n(start, end, func, bias=0.3): | |||
| if end - start <= 1: | |||
| return start | |||
| mid = int(start * (1 - bias) + end * bias) | |||
| if start == mid: | |||
| mid += 1 | |||
| if func(mid): | |||
| return find_in_log_n(mid, end, func) | |||
| else: | |||
| return find_in_log_n(start, mid, func) | |||
| def fetch_tile_content(tile_url, retry=15): | |||
| for i in range(retry): | |||
| try: | |||
| image_path = urlretrieve(tile_url)[0] | |||
| with open(image_path, "rb") as file: | |||
| return file.read() | |||
| except Exception as e: | |||
| print("e", end="|") | |||
| time.sleep(2 ** (0.3 * (i + 1))) | |||
| if i == retry - 1: | |||
| if input("continue") == "y": | |||
| return fetch_tile_content(tile_url, retry) | |||
| raise e | |||
| raise HTTPError("Not able for fetch image tile", code=500, msg="", hdrs={}, fp=None) | |||
| def download_urls_in_thread(url_and_index_list): | |||
| def download(args): | |||
| url, index = args | |||
| file_content = fetch_tile_content(url) | |||
| return file_content, index | |||
| with concurrent.futures.ThreadPoolExecutor(max_workers=Config.workers) as executor: | |||
| for tile, i in tqdm(executor.map(download, url_and_index_list), total=len(url_and_index_list)): | |||
| yield tile, i | |||
| def _get_alignment_sore_and_percent(seq1, seq2, match_score=2, mismatch_score=-1, gap_score=-1): | |||
| from alignment.sequence import Sequence | |||
| from alignment.sequencealigner import SimpleScoring, GlobalSequenceAligner | |||
| from alignment.vocabulary import Vocabulary | |||
| a = Sequence(seq1) | |||
| b = Sequence(seq2) | |||
| v = Vocabulary() | |||
| aEncoded = v.encodeSequence(a) | |||
| bEncoded = v.encodeSequence(b) | |||
| scoring = SimpleScoring(match_score, mismatch_score) | |||
| aligner = GlobalSequenceAligner(scoring, gap_score) | |||
| score = aligner.align(aEncoded, bEncoded, backtrace=False) | |||
| return score | |||
| def get_normalized_score(seq1, seq2): | |||
| score = _get_alignment_sore_and_percent(seq1, seq2) | |||
| return score / (len(seq2) + len(seq1)) | |||
| class DatasetWithGenerator(IterableDataset): | |||
| def __init__(self, generator): | |||
| self.generator = generator | |||
| def __iter__(self): | |||
| return self.generator | |||
| if __name__ == '__main__': | |||
| import math | |||
| print(math.log2(1000 * 1000)) | |||
| print(find_in_log_n(0, 100, lambda x: x <= 76)) | |||
| @@ -0,0 +1,296 @@ | |||
| import enum | |||
| import json | |||
| import time | |||
| from io import BytesIO | |||
| from urllib.request import Request, urlopen | |||
| import cv2 | |||
| import numpy as np | |||
| from PIL import Image | |||
| from tifffile import TiffWriter | |||
| from database_crawlers.utils import find_in_log_n, fetch_tile_content, download_urls_in_thread | |||
| class StainType(enum.Enum): | |||
| H_AND_E = 0, "H&E" | |||
| UNKNOWN = 1, "UNKNOWN" | |||
| class ThyroidCancerLevel(enum.Enum): | |||
| UNKNOWN = -1, "UNKNOWN" | |||
| MALIGNANT = 0, "MALIGNANT" | |||
| BENIGN = 1, "BENIGN" | |||
| @staticmethod | |||
| def get_thyroid_level_from_diagnosis_label(label: str): | |||
| label = label.lower() | |||
| if "malignant" in label: | |||
| return ThyroidCancerLevel.MALIGNANT | |||
| elif "benign" in label: | |||
| return ThyroidCancerLevel.BENIGN | |||
| else: | |||
| return ThyroidCancerLevel.UNKNOWN | |||
| class ThyroidType(enum.Enum): | |||
| UNKNOWN = -1, "UNKNOWN" | |||
| NORMAL = 0, "NORMAL" | |||
| PAPILLARY_CARCINOMA = 1, "PAPILLARY_CARCINOMA" | |||
| GRAVES_DISEASE = 2, "GRAVES_DISEASE" | |||
| NODULAR_GOITER = 3, "NODULAR_GOITER" | |||
| HASHIMOTO_THYROIDITIS = 4, "HASHIMOTO_THYROIDITIS" | |||
| FOLLICULAR_CARCINOMA = 5, "FOLLICULAR_CARCINOMA" | |||
| FOLLICULAR_ADENOMA = 6, "FOLLICULAR_ADENOMA" | |||
| COLLOID_GOITER = 7, "COLLOID_GOITER" | |||
| @staticmethod | |||
| def get_thyroid_type_from_diagnosis_label(label: str): | |||
| label = label.lower() | |||
| if "normal" in label: | |||
| return ThyroidType.NORMAL | |||
| elif "papillary" in label: | |||
| return ThyroidType.PAPILLARY_CARCINOMA | |||
| elif "grave" in label: | |||
| return ThyroidType.GRAVES_DISEASE | |||
| elif "nodular" in label and "goiter" in label: | |||
| return ThyroidType.NODULAR_GOITER | |||
| elif "hashimoto" in label: | |||
| return ThyroidType.HASHIMOTO_THYROIDITIS | |||
| elif "follicular" in label: | |||
| if "adenoma" in label: | |||
| return ThyroidType.FOLLICULAR_ADENOMA | |||
| else: | |||
| return ThyroidType.FOLLICULAR_CARCINOMA | |||
| elif "colloid" in label and "goiter" in label: | |||
| return ThyroidType.COLLOID_GOITER | |||
| else: | |||
| return ThyroidType.UNKNOWN | |||
| class WebStainImage: | |||
| save_path = "data/" | |||
| def __init__(self, database_name, image_id, image_web_label, report, stain_type, is_wsi): | |||
| self.database_name = database_name | |||
| self.image_id = image_id | |||
| self.image_web_label = image_web_label | |||
| self.report = report | |||
| self.stain_type = stain_type | |||
| self.is_wsi = is_wsi | |||
| def to_json(self): | |||
| return {"database_name": self.database_name, | |||
| "image_id": self.image_id, | |||
| "image_web_label": self.image_web_label, | |||
| "image_class_label": self.image_class_label, | |||
| "report": self.report, | |||
| "stain_type": self.stain_type.value[1], | |||
| "is_wsi": self.is_wsi} | |||
| @staticmethod | |||
| def sorted_json_keys(): | |||
| return ["database_name", | |||
| "image_id", | |||
| "image_web_label", | |||
| "image_class_label", | |||
| "report", | |||
| "stain_type", | |||
| "is_wsi"] | |||
| @property | |||
| def image_class_label(self): | |||
| return ThyroidType.get_thyroid_type_from_diagnosis_label(self.image_web_label).value[1] | |||
| def get_slide_view_url(self): | |||
| raise NotImplemented("get_slide_view_url") | |||
| def crawl_image_save_jpeg_and_json(self): | |||
| raise NotImplemented("crawl_image_get_jpeg") | |||
| def _get_file_path_name(self): | |||
| return self.save_path + self.image_id | |||
| def _get_relative_image_path(self): | |||
| return self._get_file_path_name() + ".jpeg" | |||
| def _get_relative_tiff_image_path(self): | |||
| return self._get_file_path_name() + ".tiff" | |||
| def _get_relative_json_path(self): | |||
| return self._get_file_path_name() + ".json" | |||
| def _save_json_file(self): | |||
| json_object = json.dumps(self.to_json()) | |||
| with open(self._get_relative_json_path(), "w") as outfile: | |||
| outfile.write(json_object) | |||
| class WebStainWSI(WebStainImage): | |||
| def __init__(self, database_name, image_id, image_web_label, report, stain_type, is_wsi): | |||
| super().__init__(database_name, image_id, image_web_label, report, stain_type, is_wsi) | |||
| def _get_tile_url(self, zoom, partition=None, i=None, j=None): | |||
| raise NotImplemented("_get_tile_url") | |||
| def _generate_tile_urls(self): | |||
| raise NotImplemented("generate tile urls") | |||
| def find_best_zoom(self): | |||
| return 0 | |||
| def _find_first_tile_width(self): | |||
| image_content = fetch_tile_content(self._get_tile_url(self.find_best_zoom(), partition=0, i=0, j=0)) | |||
| img = Image.open(BytesIO(image_content)) | |||
| return img.size[0], img.size[1] | |||
| def _fetch_all_tiles(self): | |||
| batch = [] | |||
| index = 0 | |||
| for url in self._generate_tile_urls(): | |||
| batch.append((url, index)) | |||
| # DONE | |||
| index += 1 | |||
| # download last batch | |||
| if len(batch) != 0: | |||
| for content, downloaded_index in download_urls_in_thread(batch): | |||
| yield content, downloaded_index | |||
| print("Slide download tiles done!!!") | |||
| def crawl_image_save_jpeg_and_json(self): | |||
| raise NotImplemented("crawl_image_save_jpeg_and_json") | |||
| class WebStainWSIOneDIndex(WebStainWSI): | |||
| def __init__(self, database_name, image_id, image_web_label, report, stain_type, is_wsi): | |||
| super().__init__(database_name, image_id, image_web_label, report, stain_type, is_wsi) | |||
| self.last_partition = None | |||
| def _find_last_partition(self): | |||
| print("Finding last partition: ", end="") | |||
| def func(partition, retry=3): | |||
| print(partition, end="") | |||
| for i in range(retry): | |||
| try: | |||
| request = Request(self._get_tile_url(self.find_best_zoom(), partition=partition), method='HEAD') | |||
| resp = urlopen(request) | |||
| headers = resp.info() | |||
| print("<", end=", ") | |||
| return True | |||
| except Exception as e: | |||
| print("e", end="") | |||
| time.sleep(2 ** (0.1 * (i + 1))) | |||
| print(">", end=", ") | |||
| return False | |||
| return find_in_log_n(0, 1000 * 1000, func) | |||
| def _generate_tile_urls(self): | |||
| for partition in range(self.last_partition + 1): | |||
| yield self._get_tile_url(self.find_best_zoom(), partition=partition) | |||
| def crawl_image_save_jpeg_and_json(self): | |||
| def generator(): | |||
| while True: | |||
| if first_temp_rows: | |||
| yield first_temp_rows[0] | |||
| del first_temp_rows[0] | |||
| else: | |||
| res = next(content_fetcher, -1) | |||
| if res == -1: | |||
| break | |||
| img = cv2.imdecode(np.frombuffer(res[0], np.uint8), -1) | |||
| if len(img.shape) == 2: | |||
| img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR) | |||
| yield img | |||
| first_image_width, first_image_height = self._find_first_tile_width() | |||
| first_temp_rows = [] | |||
| column_tiles, row_tiles = None, None | |||
| self.last_partition = self._find_last_partition() | |||
| content_fetcher = self._fetch_all_tiles() | |||
| with TiffWriter(self._get_relative_tiff_image_path(), bigtiff=True) as tif: | |||
| while column_tiles is None: | |||
| content, index = content_fetcher.__next__() | |||
| image_array = cv2.imdecode(np.frombuffer(content, np.uint8), cv2.IMREAD_COLOR) | |||
| first_temp_rows.append(image_array) | |||
| if image_array.shape[1] != first_image_width: | |||
| column_tiles = index + 1 | |||
| row_tiles = (self.last_partition + 1) // column_tiles | |||
| shape = (first_image_height * row_tiles, first_image_width * column_tiles, 3) | |||
| tif.write(generator(), subfiletype=1, tile=(first_image_height, first_image_width), shape=shape, | |||
| dtype=np.uint8, | |||
| compression='JPEG', # TODO | |||
| photometric='rgb') | |||
| """ | |||
| Save json file | |||
| """ | |||
| self._save_json_file() | |||
| class WebStainWSITwoDIndex(WebStainWSI): | |||
| def __init__(self, database_name, image_id, image_web_label, report, stain_type, is_wsi): | |||
| super().__init__(database_name, image_id, image_web_label, report, stain_type, is_wsi) | |||
| self.last_i = None | |||
| self.last_j = None | |||
| def _generate_tile_urls(self): | |||
| for j in range(self.last_j + 1): | |||
| for i in range(self.last_i + 1): | |||
| yield self._get_tile_url(self.find_best_zoom(), i=i, j=j) | |||
| def _find_last_i_and_j(self): | |||
| def func(i, j, retry=3): | |||
| print(f"{i}-{j}", end="") | |||
| for r in range(retry): | |||
| try: | |||
| request = Request(self._get_tile_url(self.find_best_zoom(), i=i, j=j), method='HEAD') | |||
| resp = urlopen(request) | |||
| headers = resp.info() | |||
| print("<", end=", ") | |||
| return True | |||
| except Exception as e: | |||
| print("e", end="") | |||
| time.sleep(2 ** (0.1 * (r + 1))) | |||
| print(">", end=", ") | |||
| return False | |||
| print("Finding last i: ", end="") | |||
| i_func = lambda i: func(i=i, j=0) | |||
| last_i = find_in_log_n(0, 1000, i_func) | |||
| print("\nFinding last j: ") | |||
| j_func = lambda j: func(i=0, j=j) | |||
| last_j = find_in_log_n(0, 1000, j_func) | |||
| return last_i, last_j | |||
| def crawl_image_save_jpeg_and_json(self): | |||
| def generator(): | |||
| while True: | |||
| res = next(content_fetcher, -1) | |||
| if res == -1: | |||
| break | |||
| res = cv2.imdecode(np.frombuffer(res[0], np.uint8), -1) | |||
| if max(res.shape) >= 260: | |||
| raise Exception(f"warning shape: {res.shape}") | |||
| res = cv2.resize(res, (min(res.shape[1], 256), min(res.shape[0], 256))) | |||
| yield res | |||
| first_image_width = 256 | |||
| first_image_height = 256 | |||
| self.last_i, self.last_j = self._find_last_i_and_j() | |||
| content_fetcher = self._fetch_all_tiles() | |||
| with TiffWriter(self._get_relative_tiff_image_path(), bigtiff=True) as tif: | |||
| shape = (first_image_height * (self.last_j + 1), first_image_width * (self.last_i + 1), 3) | |||
| tif.write(generator(), subfiletype=1, | |||
| tile=(first_image_height, first_image_width), | |||
| shape=shape, | |||
| dtype=np.uint8, | |||
| compression='JPEG', # TODO | |||
| photometric='rgb') | |||
| """ | |||
| Save json file | |||
| """ | |||
| self._save_json_file() | |||
| @@ -0,0 +1,20 @@ | |||
| from PIL import Image | |||
| import zarr | |||
| import tifffile | |||
| def convert_tif_to_jpeg(): | |||
| input_address = "data/test/1672.tiff" | |||
| # outfile = "data/test/out.jpeg" | |||
| outfile = "data/test/out.zarr" | |||
| image_zarr = tifffile.imread(input_address, aszarr=True, key=0) | |||
| zarr_image = zarr.open(image_zarr, mode='r') | |||
| zarr.save(outfile, zarr_image) | |||
| ## RAM PROBLEM | |||
| # im = Image.open() | |||
| # out = im.convert("RGB") | |||
| # out.save(outfile, "JPEG", quality=90) | |||
| if __name__ == '__main__': | |||
| Image.MAX_IMAGE_PIXELS = 1000 * 1000 * 256 * 256 | |||
| convert_tif_to_jpeg() | |||
| @@ -0,0 +1,50 @@ | |||
| # import libtiff | |||
| # import pytiff | |||
| import cv2 | |||
| import tifffile | |||
| def show_tif_image(address, name, key=0, w_from=0, h_from=0, size=700, whole_image=False): | |||
| import zarr | |||
| image_zarr = tifffile.imread(address, aszarr=True, key=key) | |||
| zarr = zarr.open(image_zarr, mode='r') | |||
| if not whole_image: | |||
| image_frag = zarr[w_from:min(w_from + size, zarr.shape[0]), h_from:min(h_from + size, zarr.shape[1])] | |||
| else: | |||
| image_frag = zarr[0:zarr.shape[0], 0:zarr.shape[1]] | |||
| cv2.imshow(f"name:{name} - shape:{image_frag.shape} - page:{key}", image_frag) | |||
| print(f"name: {name}, shape: {zarr.shape}") | |||
| image_zarr.close() | |||
| def show_CAMELYON16_sample_view(): | |||
| # show_tif_image('data/CAMELYON16/tumor_084.tif', "CAMELYON16", key=7) | |||
| show_tif_image('data/CAMELYON16/tumor_084.tif', "CAMELYON16", key=0, w_from=10000, h_from=50000) | |||
| def show_CAMELYON17_sample_view(): | |||
| show_tif_image('data/CAMELYON17/patient_083_node_4.tif', "CAMELYON17", key=7) | |||
| def show_Papsociety_sample_view(): | |||
| image_frag = cv2.imread( | |||
| 'data/Papsociety/Follicular_neoplasm2,_low_power,_confirmed_FVPTC_DQ_SM.jpg') | |||
| cv2.imshow(f"Papsociety - {image_frag.shape}", image_frag) | |||
| def show_test(name, ): | |||
| # show_tif_image('data/CAMELYON16/tumor_084.tif', "CAMELYON16", key=7) | |||
| show_tif_image('data/test/1272.tiff', name, key=0, w_from=1300, h_from=0, size=1000) | |||
| if __name__ == '__main__': | |||
| # show_CAMELYON16_sample_view() | |||
| # show_CAMELYON17_sample_view() | |||
| # show_Papsociety_sample_view() | |||
| show_tif_image('data/test/1272.tiff', "1", key=0, w_from=1000, h_from=100, size=1000) | |||
| show_tif_image('data/test/1272.tiff', "2", key=0, w_from=1000, h_from=1000, size=1000) | |||
| while True: | |||
| if cv2.waitKey(1) == ord('q'): | |||
| break | |||
| @@ -0,0 +1,180 @@ | |||
| absl-py==1.0.0 | |||
| aiohttp==3.8.1 | |||
| aiosignal==1.2.0 | |||
| alignment==1.0.10 | |||
| appdirs==1.4.4 | |||
| argon2-cffi==20.1.0 | |||
| asgiref==3.2.10 | |||
| astunparse==1.6.3 | |||
| async-generator==1.10 | |||
| async-timeout==4.0.2 | |||
| attrs==21.2.0 | |||
| backcall==0.2.0 | |||
| bleach==3.3.0 | |||
| blis==0.7.5 | |||
| cachetools==4.2.4 | |||
| catalogue==2.0.6 | |||
| certifi==2021.10.8 | |||
| cffi==1.14.5 | |||
| charset-normalizer==2.0.8 | |||
| click==8.0.3 | |||
| colorama==0.4.4 | |||
| convertapi==1.4.0 | |||
| cryptography==3.4.7 | |||
| cycler==0.11.0 | |||
| cymem==2.0.6 | |||
| Cython==0.29.23 | |||
| decorator==5.0.9 | |||
| defusedxml==0.7.1 | |||
| distlib==0.3.2 | |||
| dj-database-url==0.5.0 | |||
| Django==3.1.2 | |||
| django-crispy-forms==1.9.2 | |||
| django-heroku==0.3.1 | |||
| django-rest==0.8.7 | |||
| djangorestframework==3.13.1 | |||
| djangorestframework-simplejwt==5.0.0 | |||
| entrypoints==0.3 | |||
| et-xmlfile==1.1.0 | |||
| factory-boy==3.2.1 | |||
| Faker==12.3.0 | |||
| filelock==3.0.12 | |||
| flatbuffers==2.0 | |||
| fonttools==4.28.2 | |||
| frozenlist==1.3.0 | |||
| gast==0.4.0 | |||
| gensim==4.1.2 | |||
| google-auth==2.3.3 | |||
| google-auth-oauthlib==0.4.6 | |||
| google-pasta==0.2.0 | |||
| grpcio==1.42.0 | |||
| gunicorn==20.0.4 | |||
| h5py==3.6.0 | |||
| hazm==0.7.0 | |||
| huggingface-hub==0.6.0 | |||
| idna==3.3 | |||
| importlib-metadata==4.8.2 | |||
| ipykernel==5.5.5 | |||
| ipython==7.24.1 | |||
| ipython-genutils==0.2.0 | |||
| ipywidgets==7.6.3 | |||
| jedi==0.18.0 | |||
| Jinja2==3.0.1 | |||
| joblib==1.0.1 | |||
| jsonschema==3.2.0 | |||
| jupyter==1.0.0 | |||
| jupyter-client==6.1.12 | |||
| jupyter-console==6.4.0 | |||
| jupyter-core==4.7.1 | |||
| jupyterlab-pygments==0.1.2 | |||
| jupyterlab-widgets==1.0.0 | |||
| keras==2.7.0 | |||
| Keras-Preprocessing==1.1.2 | |||
| kiwisolver==1.3.2 | |||
| langcodes==3.3.0 | |||
| libclang==12.0.0 | |||
| libtiff==0.4.2 | |||
| Markdown==3.3.6 | |||
| MarkupSafe==2.0.1 | |||
| matplotlib==3.5.0 | |||
| matplotlib-inline==0.1.2 | |||
| mistune==0.8.4 | |||
| multidict==6.0.2 | |||
| murmurhash==1.0.6 | |||
| nbclient==0.5.3 | |||
| nbconvert==6.0.7 | |||
| nbformat==5.1.3 | |||
| nest-asyncio==1.5.1 | |||
| nltk==3.3 | |||
| notebook==6.4.0 | |||
| numpy==1.20.3 | |||
| oauthlib==3.1.1 | |||
| opencv-python==4.5.2.54 | |||
| openpyxl==3.0.7 | |||
| opt-einsum==3.3.0 | |||
| packaging==20.9 | |||
| pandas==1.2.4 | |||
| pandocfilters==1.4.3 | |||
| parso==0.8.2 | |||
| pathy==0.6.1 | |||
| pickleshare==0.7.5 | |||
| Pillow==8.4.0 | |||
| preshed==3.0.6 | |||
| prometheus-client==0.11.0 | |||
| prompt-toolkit==3.0.18 | |||
| protobuf==3.19.1 | |||
| psycopg2==2.8.6 | |||
| pyaes==1.6.1 | |||
| pyaml==21.10.1 | |||
| pyasn1==0.4.8 | |||
| pyasn1-modules==0.2.8 | |||
| pycparser==2.20 | |||
| pydantic==1.8.2 | |||
| Pygments==2.9.0 | |||
| PyJWT==2.3.0 | |||
| pynput==1.7.5 | |||
| pyOpenSSL==20.0.1 | |||
| pyparsing==2.4.7 | |||
| pyrsistent==0.17.3 | |||
| pyTelegramBotAPI==4.4.0 | |||
| python-dateutil==2.8.1 | |||
| python-decouple==3.6 | |||
| pytz==2020.1 | |||
| PyWavelets==1.1.1 | |||
| pywin32==301 | |||
| pywinpty==1.1.1 | |||
| PyYAML==6.0 | |||
| pyzmq==22.1.0 | |||
| qtconsole==5.1.0 | |||
| QtPy==1.9.0 | |||
| regex==2022.4.24 | |||
| requests==2.26.0 | |||
| requests-oauthlib==1.3.0 | |||
| rsa==4.8 | |||
| scikit-learn==0.24.2 | |||
| scipy==1.7.0 | |||
| Send2Trash==1.5.0 | |||
| setuptools-scm==6.3.2 | |||
| six==1.16.0 | |||
| sklearn==0.0 | |||
| smart-open==5.2.1 | |||
| spacy==3.2.1 | |||
| spacy-legacy==3.0.8 | |||
| spacy-loggers==1.0.1 | |||
| sqlparse==0.4.1 | |||
| srsly==2.4.2 | |||
| Telethon==1.24.0 | |||
| tensorboard==2.7.0 | |||
| tensorboard-data-server==0.6.1 | |||
| tensorboard-plugin-wit==1.8.0 | |||
| tensorflow==2.7.0 | |||
| tensorflow-estimator==2.7.0 | |||
| tensorflow-io-gcs-filesystem==0.22.0 | |||
| termcolor==1.1.0 | |||
| terminado==0.10.0 | |||
| testpath==0.5.0 | |||
| thinc==8.0.13 | |||
| threadpoolctl==2.2.0 | |||
| tokenizers==0.12.1 | |||
| tomli==1.2.2 | |||
| torch==1.10.1 | |||
| torchtext==0.11.1 | |||
| torchvision==0.11.1 | |||
| tornado==6.1 | |||
| tqdm==4.62.3 | |||
| traitlets==5.0.5 | |||
| transformers==4.19.2 | |||
| typer==0.4.0 | |||
| typing_extensions==4.0.1 | |||
| urllib3==1.26.7 | |||
| virtualenv==20.4.7 | |||
| wasabi==0.9.0 | |||
| wcwidth==0.2.5 | |||
| webencodings==0.5.1 | |||
| Werkzeug==2.0.2 | |||
| whitenoise==5.2.0 | |||
| widgetsnbextension==3.5.1 | |||
| wrapt==1.13.3 | |||
| xlrd==2.0.1 | |||
| yarl==1.7.2 | |||
| zipp==3.6.0 | |||
| @@ -0,0 +1,21 @@ | |||
| import itertools | |||
| import cv2 | |||
| def show_and_wait(img, name="img", wait=True, save=False): | |||
| cv2.imshow(name, img) | |||
| if wait: | |||
| while cv2.waitKey() != ord('q'): | |||
| continue | |||
| cv2.destroyAllWindows() | |||
| if save: | |||
| cv2.imwrite(f"{name}.jpeg", img) | |||
| def check_if_generator_is_empty(generator): | |||
| try: | |||
| first = next(generator) | |||
| except StopIteration: | |||
| return None | |||
| return itertools.chain([first], generator) | |||