123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287 |
- import csv
- import json
- import os
- import os.path as os_path
- import random
- import re
- from math import ceil
- from os import listdir
- from os.path import isfile, join
-
- import cv2
- import tifffile
- import zarr as ZarrObject
- from tqdm import tqdm
-
- from config import Config
- from database_crawlers.web_stain_sample import ThyroidCancerLevel, WebStainImage
- from utils import show_and_wait
-
-
- class ThyroidFragmentFilters:
- @staticmethod
- def func_laplacian_threshold(threshold=Config.laplacian_threshold):
- def wrapper(image_nd_array):
- res = ThyroidFragmentFilters._empty_frag_with_laplacian_threshold(image_nd_array, threshold)
- return res
-
- return wrapper
-
- @staticmethod
- def _empty_frag_with_laplacian_threshold(image_nd_array, threshold=Config.laplacian_threshold,
- return_variance=False):
- gray = cv2.cvtColor(image_nd_array, cv2.COLOR_BGR2GRAY)
- gray = cv2.GaussianBlur(gray, (3, 3), 0)
-
- laplacian = cv2.Laplacian(gray, cv2.CV_64F, ksize=3, )
- std = cv2.meanStdDev(laplacian)[1][0][0]
-
- variance = std ** 2
- if return_variance:
- return variance >= threshold, variance
- return variance >= threshold
-
-
- class ImageAndSlidePatcher:
- @classmethod
- def _check_magnification_from_description(cls, tiff_address):
- try:
- tif_file_obj = tifffile.TiffFile(tiff_address)
- image_description = tif_file_obj.pages.keyframe.tags["ImageDescription"].value
- app_mag = int(re.findall("(AppMag = [0-9]+)", image_description)[0].split(" = ")[-1])
- return app_mag
- except Exception as e:
- return None
-
- @classmethod
- def _zarr_loader(cls, tiff_address, key=0):
- image_zarr = tifffile.imread(tiff_address, aszarr=True, key=key, )
- zarr = ZarrObject.open(image_zarr, mode='r')
- return zarr
-
- @classmethod
- def _jpeg_loader(cls, jpeg_address):
- im = cv2.imread(jpeg_address)
- return im
-
- @classmethod
- def _json_key_loader(cls, json_file_address, key=None):
- with open(json_file_address, 'rb') as file:
- json_dict = json.loads(file.read())
- if key:
- return json_dict[key]
- return json_dict
-
- @classmethod
- def _get_extension_from_path(cls, file_path):
- return os_path.splitext(file_path)[-1]
-
- @classmethod
- def _get_file_name_from_path(cls, file_path):
- return ".".join(os_path.split(file_path)[-1].split(".")[:-1])
-
- @classmethod
- def _get_number_of_initial_frags(cls, zarr_object, frag_size=512, frag_overlap=0.1):
- zarr_shape = zarr_object.shape
-
- step_size = int(frag_size * (1 - frag_overlap))
- overlap_size = frag_size - step_size
- w_range = list(range(0, ceil((zarr_shape[0] - overlap_size) / step_size) * step_size, step_size))
- h_range = list(range(0, ceil((zarr_shape[1] - overlap_size) / step_size) * step_size, step_size))
- return len(w_range) * len(h_range)
-
- @classmethod
- def _generate_raw_fragments_from_image_array_or_zarr(cls, image_object, frag_size=512, frag_overlap=0.1,
- shuffle=True):
- def frag_picker(w_pos, h_pos):
- end_w, end_h = min(zarr_shape[0], w_pos + frag_size), min(zarr_shape[1], h_pos + frag_size)
- start_w, start_h = end_w - frag_size, end_h - frag_size
- return image_object[start_w:end_w, start_h: end_h], (start_w, start_h)
-
- if image_object is None:
- return None
- zarr_shape = image_object.shape
-
- step_size = int(frag_size * (1 - frag_overlap))
- overlap_size = frag_size - step_size
- w_range = list(range(0, ceil((zarr_shape[0] - overlap_size) / step_size) * step_size, step_size))
-
- h_range = list(range(0, ceil((zarr_shape[1] - overlap_size) / step_size) * step_size, step_size))
-
- if shuffle:
- pos_list = [None] * len(w_range) * len(h_range)
- index = 0
- for w in w_range:
- for h in h_range:
- pos_list[index] = (w, h)
- index += 1
- random.shuffle(pos_list)
- for w, h in pos_list:
- yield frag_picker(w, h)
- else:
- for w in w_range:
- for h in h_range:
- yield frag_picker(w, h)
-
- @classmethod
- def _filter_frag_from_generator(cls, frag_generator, filter_func_list, return_all_with_condition=False,
- all_frag_count=None, output_file=None):
- for next_test_item, frag_pos in tqdm(frag_generator, total=all_frag_count, file=output_file,
- postfix="Filtering", position=0):
- condition = True
- for function in filter_func_list:
- condition &= function(next_test_item)
- if return_all_with_condition:
- yield next_test_item, frag_pos, condition
- elif condition:
- # show_and_wait(frag)
- yield next_test_item, frag_pos
-
- @classmethod
- def _get_json_and_image_address_of_directory(cls, directory_path, ignore_json=False):
- image_formats = [".jpeg", ".tiff", ".jpg"]
- json_format = ".json"
- files = [f for f in listdir(directory_path) if isfile(join(directory_path, f))]
- files.sort()
- pairs = {}
- for file_path in files:
- file_path = join(directory_path, file_path)
- file_name = cls._get_file_name_from_path(file_path)
- pairs[file_name] = pairs.get(file_name, [None, None])
- if cls._get_extension_from_path(file_path) in image_formats:
- pairs[file_name][1] = file_path
- elif cls._get_extension_from_path(file_path) == json_format:
- pairs[file_name][0] = file_path
- if ignore_json:
- return [value for key, value in pairs.values() if value is not None]
- return [(key, value) for key, value in pairs.values() if key is not None and value is not None]
-
- @staticmethod
- def create_patch_dir_and_initialize_csv(database_path):
- data_dir = os.path.join(database_path, "data")
- patch_dir = os.path.join(database_path, "patches")
- if not os.path.isdir(patch_dir):
- os.mkdir(patch_dir)
- label_csv_path = os.path.join(patch_dir, "patch_labels.csv")
- csv_file = open(label_csv_path, "a+")
- csv_writer = csv.writer(csv_file)
- csv_file.seek(0)
- if len(csv_file.read(100)) <= 0:
- csv_writer.writerow(WebStainImage.sorted_json_keys())
- return data_dir, patch_dir, csv_writer, csv_file
-
- @classmethod
- def save_image_patches_and_update_csv(cls, thyroid_type, thyroid_desired_classes, csv_writer, web_details,
- image_path, slide_patch_dir, slide_id):
- csv_writer.writerow(list(web_details.values()))
-
- if cls._get_extension_from_path(image_path) in [".tiff", ".tif", ".svs"]:
- zarr_object = cls._zarr_loader(image_path)
- generator = cls._generate_raw_fragments_from_image_array_or_zarr(zarr_object)
- total_counts = cls._get_number_of_initial_frags(zarr_object=zarr_object)
- else:
- jpeg_image = cls._jpeg_loader(image_path)
- jpeg_image = cls.ask_image_scale_and_rescale(jpeg_image)
- generator = cls._generate_raw_fragments_from_image_array_or_zarr(jpeg_image)
- total_counts = cls._get_number_of_initial_frags(zarr_object=jpeg_image)
- if generator is None:
- return
-
- if not os.path.isdir(slide_patch_dir):
- os.mkdir(slide_patch_dir)
- filters = [ThyroidFragmentFilters.func_laplacian_threshold(Config.laplacian_threshold)]
- fragment_id = 0
- slide_progress_file_path = os.path.join(slide_patch_dir, "progress.txt")
- with open(slide_progress_file_path, "w") as file:
- for fragment, frag_pos in cls._filter_frag_from_generator(generator, filters, all_frag_count=total_counts,
- output_file=file):
- fragment_file_path = os.path.join(slide_patch_dir, f"{slide_id}-{fragment_id}.jpeg")
- cv2.imwrite(fragment_file_path, fragment)
- fragment_id += 1
- return fragment_id, total_counts
-
- @classmethod
- def save_patches_in_folders(cls, database_directory, dataset_dir=None):
- thyroid_desired_classes = [ThyroidCancerLevel.MALIGNANT, ThyroidCancerLevel.BENIGN]
- datasets_dirs = os.listdir(database_directory) if dataset_dir is None else [dataset_dir]
- list_dir = [os.path.join(database_directory, o) for o in datasets_dirs
- if os.path.isdir(os.path.join(database_directory, o, "data"))]
- for database_path in list_dir:
- print("database path: ", database_path)
- data_dir, patch_dir, csv_writer, csv_file = cls.create_patch_dir_and_initialize_csv(database_path)
- for json_path, image_path in cls._get_json_and_image_address_of_directory(data_dir):
- print("image path: ", image_path)
- file_name = cls._get_file_name_from_path(image_path)
- slide_id = str(hash(file_name))
- slide_patch_dir = os.path.join(patch_dir, slide_id)
- if os.path.isdir(slide_patch_dir):
- """
- it has already been patched
- """
- continue
-
- web_details = cls._json_key_loader(json_path)
- web_details["image_id"] = slide_id
- web_label = web_details["image_web_label"]
- thyroid_type = ThyroidCancerLevel.get_thyroid_level_from_diagnosis_label(web_label)
- web_details["image_class_label"] = thyroid_type.value[1]
-
- cls.save_image_patches_and_update_csv(thyroid_type, thyroid_desired_classes, csv_writer, web_details,
- image_path, slide_patch_dir, slide_id)
- csv_file.close()
-
- @classmethod
- def save_papsociaty_patch(cls, database_path):
- thyroid_desired_classes = [ThyroidCancerLevel.MALIGNANT, ThyroidCancerLevel.BENIGN]
- print("database path: ", database_path)
- for folder in Config.class_names:
- group_path = os.path.join(database_path, "data", folder)
- data_dir, patch_dir, csv_writer, csv_file = cls.create_patch_dir_and_initialize_csv(database_path)
- for image_path in cls._get_json_and_image_address_of_directory(group_path, ignore_json=True):
- print("image path: ", image_path)
- file_name = cls._get_file_name_from_path(image_path)
- slide_id = str(hash(file_name))
- slide_patch_dir = os.path.join(patch_dir, slide_id)
- if os.path.isdir(slide_patch_dir):
- """
- it has already been patched
- """
- continue
- web_label = folder + "-" + file_name
- thyroid_type = ThyroidCancerLevel.get_thyroid_level_from_diagnosis_label(web_label)
- web_details = {"database_name": "PapSociety",
- "image_id": slide_id,
- "image_web_label": web_label,
- "image_class_label": thyroid_type.value[1],
- "report": None,
- "stain_type": "UNKNOWN",
- "is_wsi": False}
- cls.save_image_patches_and_update_csv(thyroid_type, thyroid_desired_classes, csv_writer, web_details,
- image_path, slide_patch_dir, slide_id)
-
- csv_file.close()
-
- @classmethod
- def ask_image_scale_and_rescale(cls, image):
- # small: S, Medium: M, Large:L
- show_and_wait(image)
- res = input("how much plus pointer fill a cell(float, i:ignore, else repeat): ")
- try:
- if res == "i":
- return None
- elif re.match("[0-9]+(.[0-9]*)?", res):
- scale = 1 / float(res)
- return cv2.resize(image, (0, 0), fx=scale, fy=scale)
- else:
- return cls.ask_image_scale_and_rescale(image)
- except Exception as e:
- print(e)
- return cls.ask_image_scale_and_rescale(image)
-
-
- if __name__ == '__main__':
- random.seed(1)
-
- database_directory = "./"
- # ImageAndSlidePatcher.save_patches_in_folders(database_directory, dataset_dir=["stanford_tissue_microarray"])
- # ImageAndSlidePatcher.save_papsociaty_patch(os.path.join(database_directory, "papsociaty"))
|