|
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960 |
- import concurrent.futures
- import os
- import pathlib
-
- from tqdm import tqdm
-
- from config import Config
- from image_patcher import ImageAndSlidePatcher
- from national_cancer_institute.read_xml_file import get_slide_info_from_bcr_xml
-
-
- def save_national_cancer_institute_patch(database_path):
- def patch_image(image_path):
- try:
- image_path = str(image_path)
- print()
- print("image path: ", image_path)
- file_name = ImageAndSlidePatcher._get_file_name_from_path(image_path)
- slide_id = file_name.split(".")[0]
- slide_patch_dir = os.path.join(patch_dir, slide_id)
- if os.path.isdir(slide_patch_dir):
- print("it has already been patched")
- return
- web_label = slide_infos.get(slide_id, None)
- if web_label is None:
- print("Ignored")
- return
- web_details = {"database_name": "NationalCancerInstitute",
- "image_id": slide_id,
- "image_web_label": web_label,
- "image_class_label": web_label,
- "report": None,
- "stain_type": "H&E",
- "is_wsi": True}
- return ImageAndSlidePatcher.save_image_patches_and_update_csv(web_label, None, csv_writer, web_details,
- image_path, slide_patch_dir, slide_id)
- except Exception as e:
- print(e)
-
- data_dir = os.path.join(database_path, "data")
- slide_infos = {}
- for xml_path in pathlib.Path(data_dir).glob("**/*.xml"):
- slide_infos.update(get_slide_info_from_bcr_xml(str(xml_path)))
-
- data_dir, patch_dir, csv_writer, csv_file = ImageAndSlidePatcher.create_patch_dir_and_initialize_csv(database_path)
- csv_file.flush()
-
- with concurrent.futures.ThreadPoolExecutor(max_workers=Config.workers) as executor:
- image_paths = pathlib.Path(data_dir).glob("**/*.svs")
- image_paths = [i for i in image_paths]
- print()
- for res in tqdm(executor.map(patch_image, image_paths), total=len(image_paths)):
- if res:
- csv_file.flush()
- csv_file.flush()
-
-
- if __name__ == '__main__':
- database_directory = "../"
- save_national_cancer_institute_patch(os.path.join(database_directory, "national_cancer_institute"))
|