You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

cells_chart.py 727B

123456789101112131415161718
  1. import pathlib
  2. import matplotlib.pyplot as plt
  3. from national_cancer_institute.read_xml_file import get_slide_info_from_bcr_xml
  4. if __name__ == '__main__':
  5. data_dir = "data/"
  6. slide_infos = {}
  7. for xml_path in pathlib.Path(data_dir).glob("**/*.xml"):
  8. slide_infos.update(get_slide_info_from_bcr_xml(str(xml_path)))
  9. cell_percents = [int(item[1]) for item in slide_infos.values() if int(item[2]) == 0]
  10. print("tumor:", len([i for i in cell_percents if i == 100]))
  11. print("normal", len([i for i in cell_percents if i == 0]))
  12. print([i for i in cell_percents if i != 0 and i != 100])
  13. print(len(cell_percents))
  14. plt.hist(cell_percents, bins=150)
  15. plt.savefig("tumor_cell_distribution.jpeg")