You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

patch_distribution.py 2.4KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465
  1. import concurrent.futures
  2. import os
  3. import pathlib
  4. import matplotlib.pyplot as plt
  5. from tqdm import tqdm
  6. from config import Config
  7. from image_patcher import ImageAndSlidePatcher
  8. def save_patch_distribution(database_path):
  9. def patch_image(image_path):
  10. try:
  11. image_path = str(image_path)
  12. file_name = ImageAndSlidePatcher._get_file_name_from_path(image_path)
  13. slide_id = file_name.split(".")[0]
  14. slide_patch_dir = os.path.join(patch_dir, slide_id)
  15. if ImageAndSlidePatcher._get_extension_from_path(image_path) in [".tiff", ".tif", ".svs"]:
  16. zarr_object = ImageAndSlidePatcher._zarr_loader(image_path)
  17. total_counts = ImageAndSlidePatcher._get_number_of_initial_frags(zarr_object=zarr_object)
  18. else:
  19. jpeg_image = ImageAndSlidePatcher._jpeg_loader(image_path)
  20. jpeg_image = ImageAndSlidePatcher.ask_image_scale_and_rescale(jpeg_image)
  21. total_counts = ImageAndSlidePatcher._get_number_of_initial_frags(zarr_object=jpeg_image)
  22. if os.path.exists(slide_patch_dir):
  23. fragment_id = len([i for i in pathlib.Path(slide_patch_dir).glob("*.jpeg")])
  24. return fragment_id, total_counts
  25. except Exception as e:
  26. print("error")
  27. print(e)
  28. raise e
  29. res_patch_counts = []
  30. data_dir = os.path.join(database_path, "data")
  31. patch_dir = os.path.join(database_path, "patches")
  32. with concurrent.futures.ThreadPoolExecutor(max_workers=Config.workers) as executor:
  33. image_paths = pathlib.Path(data_dir).glob("**/*.svs")
  34. image_paths = [i for i in image_paths]
  35. print()
  36. for res in tqdm(executor.map(patch_image, image_paths), total=len(image_paths)):
  37. if res:
  38. frags, total = res
  39. res_patch_counts.append(res)
  40. print(res_patch_counts)
  41. plt.hist([i[0] for i in res_patch_counts], bins=100)
  42. plt.xlabel("Patch per slide")
  43. plt.ylabel("Frequency")
  44. plt.savefig("patch_distribution.jpeg")
  45. plt.clf()
  46. plt.hist([round(i[0] / (i[1] + 0.00001), 5) * 100 for i in res_patch_counts], bins=100)
  47. plt.xlabel("Patch per slide percent")
  48. plt.ylabel("Frequency")
  49. plt.savefig("patch_percent_distribution.jpeg")
  50. plt.clf()
  51. if __name__ == '__main__':
  52. database_directory = "../"
  53. save_patch_distribution(os.path.join(database_directory, "national_cancer_institute"))