Organ-aware 3D lesion segmentation dataset and pipeline for abdominal CT analysis (ACM Multimedia 2025 candidate)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

generateData.py 7.2KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225
  1. import numpy as np
  2. import nibabel as nib
  3. import os
  4. import cv2
  5. import csv
  6. import nibabel as nib
  7. import pydicom
  8. from pydicom.dataset import FileDataset
  9. import numpy as np
  10. import os
  11. import datetime
  12. import zipfile
  13. def slices2nifti(ims, fn_out, spacing):
  14. """Save 2D slices to 3D NIfTI file considering the spacing."""
  15. if len(ims) < 300: # cv2.merge does not support too many channels
  16. V = cv2.merge(ims)
  17. else:
  18. V = np.empty((ims[0].shape[0], ims[0].shape[1], len(ims)))
  19. for i in range(len(ims)):
  20. V[:, :, i] = ims[i]
  21. # The transformation matrix suitable for 3D slicer and ITK-SNAP
  22. T = np.array([[0, -spacing[1], 0, 0],
  23. [-spacing[0], 0, 0, 0],
  24. [0, 0, -spacing[2], 0],
  25. [0, 0, 0, 1]])
  26. img = nib.Nifti1Image(V, T)
  27. path_out = os.path.join(dir_out, fn_out)
  28. nib.save(img, path_out)
  29. return path_out
  30. def load_slices(dir, slice_idxs):
  31. """Load slices from 16-bit PNG files and return images with their filenames."""
  32. slice_idxs = np.array(slice_idxs)
  33. if not np.all(slice_idxs[1:] - slice_idxs[:-1] == 1):
  34. print(f"⚠️ Slice indices are not consecutive")
  35. ims = []
  36. filenames = []
  37. for slice_idx in slice_idxs:
  38. fn = f'{slice_idx:03d}.png'
  39. path = os.path.join(dir_in, dir, fn)
  40. im = cv2.imread(path, -1) # -1 to preserve 16-bit depth
  41. assert im is not None, f'Error reading {path}'
  42. im_corrected = (im.astype(np.int32) - 32768).astype(np.int16)
  43. ims.append(im_corrected)
  44. filenames.append(fn.split('.')[0])
  45. return ims, filenames
  46. def read_DL_info():
  47. """Read spacings and image indices in DeepLesion."""
  48. spacings = []
  49. idxs = []
  50. with open(info_fn, 'r') as csvfile: # Use 'r' mode for reading text files
  51. reader = csv.reader(csvfile)
  52. rownum = 0
  53. for row in reader:
  54. if rownum == 0:
  55. header = row
  56. rownum += 1
  57. else:
  58. idxs.append([int(d) for d in row[1:4]])
  59. spacings.append([float(d) for d in row[12].split(',')])
  60. idxs = np.array(idxs)
  61. spacings = np.array(spacings)
  62. return idxs, spacings
  63. def nii_to_dicom(nii_path, output_folder, filenames):
  64. nii_img = nib.load(nii_path)
  65. data = nii_img.get_fdata()
  66. affine = nii_img.affine
  67. num_slices = data.shape[2]
  68. assert len(filenames) == num_slices, "Length of filename list must match number of slices."
  69. nii_base = os.path.splitext(os.path.basename(nii_path))[0]
  70. nii_base = nii_base.split('.')[0]
  71. dicom_subfolder = os.path.join(output_folder, nii_base)
  72. os.makedirs(dicom_subfolder, exist_ok=True)
  73. for i in range(num_slices):
  74. filename = os.path.join(dicom_subfolder, filenames[i])+'.dcm' # Use provided filename
  75. file_meta = pydicom.Dataset()
  76. file_meta.MediaStorageSOPClassUID = pydicom.uid.SecondaryCaptureImageStorage
  77. file_meta.MediaStorageSOPInstanceUID = pydicom.uid.generate_uid()
  78. file_meta.ImplementationClassUID = "1.2.3.4.5.6.7.8.9.0"
  79. file_meta.TransferSyntaxUID = pydicom.uid.ExplicitVRLittleEndian
  80. ds = FileDataset(filename, {}, file_meta=file_meta, preamble=b"\0" * 128)
  81. dt = datetime.datetime.now()
  82. ds.PatientName = "Test^Patient"
  83. ds.PatientID = "123456"
  84. ds.Modality = "MR"
  85. ds.StudyInstanceUID = pydicom.uid.generate_uid()
  86. ds.SeriesInstanceUID = pydicom.uid.generate_uid()
  87. ds.SOPInstanceUID = file_meta.MediaStorageSOPInstanceUID
  88. ds.SOPClassUID = file_meta.MediaStorageSOPClassUID
  89. ds.StudyDate = dt.strftime('%Y%m%d')
  90. ds.StudyTime = dt.strftime('%H%M%S')
  91. ds.Rows, ds.Columns = data.shape[:2]
  92. ds.InstanceNumber = i + 1
  93. ds.ImagePositionPatient = [float(affine[0,3]), float(affine[1,3]), float(affine[2,3] + i)]
  94. ds.ImageOrientationPatient = [1,0,0,0,1,0]
  95. ds.PixelSpacing = [1.0, 1.0]
  96. ds.SliceThickness = 1.0
  97. ds.SamplesPerPixel = 1
  98. ds.PhotometricInterpretation = "MONOCHROME2"
  99. ds.BitsAllocated = 16
  100. ds.BitsStored = 16
  101. ds.HighBit = 15
  102. ds.PixelRepresentation = 1
  103. pixel_array = data[:, :, i].astype(np.uint16)
  104. ds.PixelData = pixel_array.tobytes()
  105. ds.is_little_endian = True
  106. ds.is_implicit_VR = False
  107. ds.save_as(filename)
  108. def extract_and_collect_main_folders(zip_root_dir):
  109. extracted_paths = []
  110. for zip_name in sorted(os.listdir(zip_root_dir)):
  111. zip_path = os.path.join(zip_root_dir, zip_name)
  112. # فقط فایل‌های .zip واقعی
  113. if zip_name.lower().endswith('.zip') and os.path.isfile(zip_path):
  114. try:
  115. extract_folder = os.path.join(zip_root_dir, zip_name.replace('.zip', ''))
  116. os.makedirs(extract_folder, exist_ok=True)
  117. with zipfile.ZipFile(zip_path, 'r') as zip_ref:
  118. zip_ref.extractall(extract_folder)
  119. os.remove(zip_path)
  120. # حذف پوشه‌های مخفی
  121. extracted_subfolders = [
  122. os.path.join(extract_folder, name)
  123. for name in os.listdir(extract_folder)
  124. if os.path.isdir(os.path.join(extract_folder, name)) and not name.startswith('.')
  125. ]
  126. if not extracted_subfolders:
  127. extracted_paths.append(extract_folder)
  128. else:
  129. extracted_paths.extend(extracted_subfolders)
  130. except zipfile.BadZipFile:
  131. print(f"⚠️ Skipping bad zip file: {zip_name}")
  132. continue
  133. return extracted_paths
  134. # Main
  135. zip_root_dir =''
  136. folders = extract_and_collect_main_folders(zip_root_dir)
  137. def find_image_folders(directory):
  138. image_folders = []
  139. #
  140. for root, dirs, files in os.walk(directory):
  141. for dir_name in dirs:
  142. if dir_name.startswith("Images_png_"):
  143. image_folders.append(os.path.join(root, dir_name))
  144. return image_folders
  145. folder_path = ''
  146. result = find_image_folders(folder_path)
  147. print(result)
  148. dir_out = ''
  149. out_fmt = '%s.nii.gz' # format of the nifti file name to output
  150. info_fn ='' # file name of the information file
  151. idxs, spacings = read_DL_info()
  152. for folder in result :
  153. dir_in = folder + '/Images_png'
  154. if not os.path.exists(dir_out):
  155. os.mkdir(dir_out)
  156. img_dirs = os.listdir(dir_in)
  157. img_dirs.sort()
  158. for dir1 in img_dirs:
  159. #Find the image info according to the folder's name
  160. idxs1 = np.array([int(d) for d in dir1.split('_')])
  161. i1 = np.where(np.all(idxs == idxs1, axis=1))[0]
  162. spacings1 = spacings[i1[0]]
  163. fns = os.listdir(os.path.join(dir_in, dir1))
  164. slices = [int(d[:-4]) for d in fns if d.endswith('.png')]
  165. slices.sort()
  166. groups = [slices]
  167. for group in groups:
  168. # Group contains slices indices of a sub-volume
  169. ims,names = load_slices(dir1, group)
  170. fn_out = out_fmt % (dir1)
  171. path_out = slices2nifti(ims, fn_out, spacings1)
  172. nii_to_dicom(path_out, "",names)