123456789101112131415161718 |
- import pathlib
-
- import matplotlib.pyplot as plt
-
- from national_cancer_institute.read_xml_file import get_slide_info_from_bcr_xml
-
- if __name__ == '__main__':
- data_dir = "data/"
- slide_infos = {}
- for xml_path in pathlib.Path(data_dir).glob("**/*.xml"):
- slide_infos.update(get_slide_info_from_bcr_xml(str(xml_path)))
- cell_percents = [int(item[1]) for item in slide_infos.values() if int(item[2]) == 0]
- print("tumor:", len([i for i in cell_percents if i == 100]))
- print("normal", len([i for i in cell_percents if i == 0]))
- print([i for i in cell_percents if i != 0 and i != 100])
- print(len(cell_percents))
- plt.hist(cell_percents, bins=150)
- plt.savefig("tumor_cell_distribution.jpeg")
|