You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

national_cancer_patcher.py 2.4KB

2 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960
  1. import concurrent.futures
  2. import os
  3. import pathlib
  4. from tqdm import tqdm
  5. from config import Config
  6. from image_patcher import ImageAndSlidePatcher
  7. from national_cancer_institute.read_xml_file import get_slide_info_from_bcr_xml
  8. def save_national_cancer_institute_patch(database_path):
  9. def patch_image(image_path):
  10. try:
  11. image_path = str(image_path)
  12. print()
  13. print("image path: ", image_path)
  14. file_name = ImageAndSlidePatcher._get_file_name_from_path(image_path)
  15. slide_id = file_name.split(".")[0]
  16. slide_patch_dir = os.path.join(patch_dir, slide_id)
  17. if os.path.isdir(slide_patch_dir):
  18. print("it has already been patched")
  19. return
  20. web_label = slide_infos.get(slide_id, None)
  21. if web_label is None:
  22. print("Ignored")
  23. return
  24. web_details = {"database_name": "NationalCancerInstitute",
  25. "image_id": slide_id,
  26. "image_web_label": web_label,
  27. "image_class_label": web_label,
  28. "report": None,
  29. "stain_type": "H&E",
  30. "is_wsi": True}
  31. return ImageAndSlidePatcher.save_image_patches_and_update_csv(web_label, None, csv_writer, web_details,
  32. image_path, slide_patch_dir, slide_id)
  33. except Exception as e:
  34. print(e)
  35. data_dir = os.path.join(database_path, "data")
  36. slide_infos = {}
  37. for xml_path in pathlib.Path(data_dir).glob("**/*.xml"):
  38. slide_infos.update(get_slide_info_from_bcr_xml(str(xml_path)))
  39. data_dir, patch_dir, csv_writer, csv_file = ImageAndSlidePatcher.create_patch_dir_and_initialize_csv(database_path)
  40. csv_file.flush()
  41. with concurrent.futures.ThreadPoolExecutor(max_workers=Config.workers) as executor:
  42. image_paths = pathlib.Path(data_dir).glob("**/*.svs")
  43. image_paths = [i for i in image_paths]
  44. print()
  45. for res in tqdm(executor.map(patch_image, image_paths), total=len(image_paths)):
  46. if res:
  47. csv_file.flush()
  48. csv_file.flush()
  49. if __name__ == '__main__':
  50. database_directory = "../"
  51. save_national_cancer_institute_patch(os.path.join(database_directory, "national_cancer_institute"))