| 123456789101112131415161718 | import pathlib
import matplotlib.pyplot as plt
from national_cancer_institute.read_xml_file import get_slide_info_from_bcr_xml
if __name__ == '__main__':
    data_dir = "data/"
    slide_infos = {}
    for xml_path in pathlib.Path(data_dir).glob("**/*.xml"):
        slide_infos.update(get_slide_info_from_bcr_xml(str(xml_path)))
    cell_percents = [int(item[1]) for item in slide_infos.values() if int(item[2]) == 0]
    print("tumor:", len([i for i in cell_percents if i == 100]))
    print("normal", len([i for i in cell_percents if i == 0]))
    print([i for i in cell_percents if i != 0 and i != 100])
    print(len(cell_percents))
    plt.hist(cell_percents, bins=150)
    plt.savefig("tumor_cell_distribution.jpeg")
 |