Meta Byte Track
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

convert_mot20_to_coco_metaway.py 6.6KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132
  1. import os
  2. import numpy as np
  3. import json
  4. import cv2
  5. # Use the same script for MOT16
  6. DATA_PATH = '/media/external_10TB/10TB/vision/ByteTrackData/MOT20'
  7. OUT_PATH = os.path.join(DATA_PATH, 'annotations')
  8. SPLITS = ['train', 'test'] # --> split training data to train_half and val_half.
  9. HALF_VIDEO = True
  10. CREATE_SPLITTED_ANN = True
  11. CREATE_SPLITTED_DET = True
  12. if __name__ == '__main__':
  13. if not os.path.exists(OUT_PATH):
  14. os.makedirs(OUT_PATH)
  15. for split in SPLITS:
  16. if split == "test":
  17. data_path = os.path.join(DATA_PATH, 'test')
  18. else:
  19. data_path = os.path.join(DATA_PATH, 'train')
  20. seqs = os.listdir(data_path)
  21. for seq in sorted(seqs):
  22. out_path = os.path.join(OUT_PATH, '{}_{}.json'.format(split, seq))
  23. out = {'images': [], 'annotations': [], 'videos': [],
  24. 'categories': [{'id': 1, 'name': 'pedestrian'}]}
  25. image_cnt = 0
  26. ann_cnt = 0
  27. video_cnt = 0
  28. tid_curr = 0
  29. tid_last = -1
  30. if '.DS_Store' in seq:
  31. continue
  32. video_cnt += 1 # video sequence number.
  33. out['videos'].append({'id': video_cnt, 'file_name': seq})
  34. seq_path = os.path.join(data_path, seq)
  35. img_path = os.path.join(seq_path, 'img1')
  36. ann_path = os.path.join(seq_path, 'gt/gt.txt')
  37. images = os.listdir(img_path)
  38. num_images = len([image for image in images if 'jpg' in image]) # half and half
  39. if HALF_VIDEO and ('half' in split):
  40. image_range = [0, num_images // 2] if 'train' in split else \
  41. [num_images // 2 + 1, num_images - 1]
  42. else:
  43. image_range = [0, num_images - 1]
  44. for i in range(num_images):
  45. if i < image_range[0] or i > image_range[1]:
  46. continue
  47. img = cv2.imread(os.path.join(data_path, '{}/img1/{:06d}.jpg'.format(seq, i + 1)))
  48. height, width = img.shape[:2]
  49. image_info = {'file_name': '{}/img1/{:06d}.jpg'.format(seq, i + 1), # image name.
  50. 'id': image_cnt + i + 1, # image number in the entire training set.
  51. 'frame_id': i + 1 - image_range[0],
  52. # image number in the video sequence, starting from 1.
  53. 'prev_image_id': image_cnt + i if i > 0 else -1,
  54. # image number in the entire training set.
  55. 'next_image_id': image_cnt + i + 2 if i < num_images - 1 else -1,
  56. 'video_id': video_cnt,
  57. 'height': height, 'width': width}
  58. out['images'].append(image_info)
  59. print('{}: {} images'.format(seq, num_images))
  60. if split != 'test':
  61. det_path = os.path.join(seq_path, 'det/det.txt')
  62. anns = np.loadtxt(ann_path, dtype=np.float32, delimiter=',')
  63. dets = np.loadtxt(det_path, dtype=np.float32, delimiter=',')
  64. if CREATE_SPLITTED_ANN and ('half' in split):
  65. anns_out = np.array([anns[i] for i in range(anns.shape[0])
  66. if int(anns[i][0]) - 1 >= image_range[0] and
  67. int(anns[i][0]) - 1 <= image_range[1]], np.float32)
  68. anns_out[:, 0] -= image_range[0]
  69. gt_out = os.path.join(seq_path, 'gt/gt_{}.txt'.format(split))
  70. fout = open(gt_out, 'w')
  71. for o in anns_out:
  72. fout.write('{:d},{:d},{:d},{:d},{:d},{:d},{:d},{:d},{:.6f}\n'.format(
  73. int(o[0]), int(o[1]), int(o[2]), int(o[3]), int(o[4]), int(o[5]),
  74. int(o[6]), int(o[7]), o[8]))
  75. fout.close()
  76. if CREATE_SPLITTED_DET and ('half' in split):
  77. dets_out = np.array([dets[i] for i in range(dets.shape[0])
  78. if int(dets[i][0]) - 1 >= image_range[0] and
  79. int(dets[i][0]) - 1 <= image_range[1]], np.float32)
  80. dets_out[:, 0] -= image_range[0]
  81. det_out = os.path.join(seq_path, 'det/det_{}.txt'.format(split))
  82. dout = open(det_out, 'w')
  83. for o in dets_out:
  84. dout.write('{:d},{:d},{:.1f},{:.1f},{:.1f},{:.1f},{:.6f}\n'.format(
  85. int(o[0]), int(o[1]), float(o[2]), float(o[3]), float(o[4]), float(o[5]),
  86. float(o[6])))
  87. dout.close()
  88. print('{} ann images'.format(int(anns[:, 0].max())))
  89. for i in range(anns.shape[0]):
  90. frame_id = int(anns[i][0])
  91. if frame_id - 1 < image_range[0] or frame_id - 1 > image_range[1]:
  92. continue
  93. track_id = int(anns[i][1])
  94. cat_id = int(anns[i][7])
  95. ann_cnt += 1
  96. if not ('15' in DATA_PATH):
  97. # if not (float(anns[i][8]) >= 0.25): # visibility.
  98. # continue
  99. if not (int(anns[i][6]) == 1): # whether ignore.
  100. continue
  101. if int(anns[i][7]) in [3, 4, 5, 6, 9, 10, 11]: # Non-person
  102. continue
  103. if int(anns[i][7]) in [2, 7, 8, 12]: # Ignored person
  104. # category_id = -1
  105. continue
  106. else:
  107. category_id = 1 # pedestrian(non-static)
  108. if not track_id == tid_last:
  109. tid_curr += 1
  110. tid_last = track_id
  111. else:
  112. category_id = 1
  113. ann = {'id': ann_cnt,
  114. 'category_id': category_id,
  115. 'image_id': image_cnt + frame_id,
  116. 'track_id': tid_curr,
  117. 'bbox': anns[i][2:6].tolist(),
  118. 'conf': float(anns[i][6]),
  119. 'iscrowd': 0,
  120. 'area': float(anns[i][4] * anns[i][5])}
  121. out['annotations'].append(ann)
  122. image_cnt += num_images
  123. print(tid_curr, tid_last)
  124. print('loaded {} for {} images and {} samples'.format(split, len(out['images']), len(out['annotations'])))
  125. json.dump(out, open(out_path, 'w'))