You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

read_xml_file.py 1.1KB

1234567891011121314151617181920212223242526
  1. from xml.dom import minidom
  2. def get_slide_info_from_bcr_xml(xml_path):
  3. file = minidom.parse(xml_path)
  4. patient = file.childNodes[0].getElementsByTagName("bio:patient")[0]
  5. data_dict = {}
  6. try:
  7. for i in range(10):
  8. percent_tumor_cells = patient.getElementsByTagName("bio:percent_tumor_cells")[i].childNodes[
  9. 0].data.strip()
  10. percent_normal_cells = patient.getElementsByTagName("bio:percent_normal_cells")[i].childNodes[
  11. 0].data.strip()
  12. percent_stormal_cells = patient.getElementsByTagName("bio:percent_stromal_cells")[i].childNodes[
  13. 0].data.strip()
  14. slide_barcode = patient.getElementsByTagName("shared:bcr_slide_barcode")[i].childNodes[0].data.strip()
  15. data_dict[slide_barcode] = (percent_normal_cells, percent_tumor_cells, percent_stormal_cells)
  16. except Exception as e:
  17. pass
  18. return data_dict
  19. if __name__ == '__main__':
  20. path = "../national_cancer_institute/data/1aea8f2a-f809-4f19-bed3-1365e9aab33b/nationwidechildrens.org_biospecimen.TCGA-BJ-A28X.xml"
  21. res = get_slide_info_from_bcr_xml(path)
  22. print(res)