123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680 |
- from collections import defaultdict
- from loguru import logger
- from tqdm import tqdm
-
- import torch
-
- from yolox.utils import (
- gather,
- is_main_process,
- postprocess,
- synchronize,
- time_synchronized,
- xyxy2xywh
- )
- from yolox.tracker.byte_tracker import BYTETracker
- from yolox.sort_tracker.sort import Sort
- from yolox.deepsort_tracker.deepsort import DeepSort
- from yolox.motdt_tracker.motdt_tracker import OnlineTracker
-
- import contextlib
- import io
- import os
- import itertools
- import json
- import tempfile
- import time
-
-
- def write_results(filename, results):
- save_format = '{frame},{id},{x1},{y1},{w},{h},{s},-1,-1,-1\n'
- with open(filename, 'w') as f:
- for frame_id, tlwhs, track_ids, scores in results:
- for tlwh, track_id, score in zip(tlwhs, track_ids, scores):
- if track_id < 0:
- continue
- x1, y1, w, h = tlwh
- line = save_format.format(frame=frame_id, id=track_id, x1=round(x1, 1), y1=round(y1, 1), w=round(w, 1), h=round(h, 1), s=round(score, 2))
- f.write(line)
- logger.info('save results to {}'.format(filename))
-
-
- def write_results_no_score(filename, results):
- save_format = '{frame},{id},{x1},{y1},{w},{h},-1,-1,-1,-1\n'
- with open(filename, 'w') as f:
- for frame_id, tlwhs, track_ids in results:
- for tlwh, track_id in zip(tlwhs, track_ids):
- if track_id < 0:
- continue
- x1, y1, w, h = tlwh
- line = save_format.format(frame=frame_id, id=track_id, x1=round(x1, 1), y1=round(y1, 1), w=round(w, 1), h=round(h, 1))
- f.write(line)
- logger.info('save results to {}'.format(filename))
-
-
- class MOTEvaluator:
- """
- COCO AP Evaluation class. All the data in the val2017 dataset are processed
- and evaluated by COCO API.
- """
-
- def __init__(
- self, args, dataloader, img_size, confthre, nmsthre, num_classes):
- """
- Args:
- dataloader (Dataloader): evaluate dataloader.
- img_size (int): image size after preprocess. images are resized
- to squares whose shape is (img_size, img_size).
- confthre (float): confidence threshold ranging from 0 to 1, which
- is defined in the config file.
- nmsthre (float): IoU threshold of non-max supression ranging from 0 to 1.
- """
- self.dataloader = dataloader
- self.img_size = img_size
- self.confthre = confthre
- self.nmsthre = nmsthre
- self.num_classes = num_classes
- self.args = args
-
- def evaluate(
- self,
- model,
- distributed=False,
- half=False,
- trt_file=None,
- decoder=None,
- test_size=None,
- result_folder=None
- ):
- """
- COCO average precision (AP) Evaluation. Iterate inference on the test dataset
- and the results are evaluated by COCO API.
-
- NOTE: This function will change training mode to False, please save states if needed.
-
- Args:
- model : model to evaluate.
-
- Returns:
- ap50_95 (float) : COCO AP of IoU=50:95
- ap50 (float) : COCO AP of IoU=50
- summary (sr): summary info of evaluation.
- """
- # TODO half to amp_test
- tensor_type = torch.cuda.HalfTensor if half else torch.cuda.FloatTensor
- model = model.eval()
- if half:
- model = model.half()
- ids = []
- data_list = []
- results = []
- video_names = defaultdict()
- progress_bar = tqdm if is_main_process() else iter
-
- inference_time = 0
- track_time = 0
- n_samples = len(self.dataloader) - 1
-
- if trt_file is not None:
- from torch2trt import TRTModule
-
- model_trt = TRTModule()
- model_trt.load_state_dict(torch.load(trt_file))
-
- x = torch.ones(1, 3, test_size[0], test_size[1]).cuda()
- model(x)
- model = model_trt
-
- tracker = BYTETracker(self.args)
- ori_thresh = self.args.track_thresh
- for cur_iter, (imgs, _, info_imgs, ids) in enumerate(
- progress_bar(self.dataloader)
- ):
- with torch.no_grad():
- # init tracker
- frame_id = info_imgs[2].item()
- video_id = info_imgs[3].item()
- img_file_name = info_imgs[4]
- video_name = img_file_name[0].split('/')[0]
- if video_name == 'MOT17-05-FRCNN' or video_name == 'MOT17-06-FRCNN':
- self.args.track_buffer = 14
- elif video_name == 'MOT17-13-FRCNN' or video_name == 'MOT17-14-FRCNN':
- self.args.track_buffer = 25
- else:
- self.args.track_buffer = 30
-
- if video_name == 'MOT17-01-FRCNN':
- self.args.track_thresh = 0.65
- elif video_name == 'MOT17-06-FRCNN':
- self.args.track_thresh = 0.65
- elif video_name == 'MOT17-12-FRCNN':
- self.args.track_thresh = 0.7
- elif video_name == 'MOT17-14-FRCNN':
- self.args.track_thresh = 0.67
- else:
- self.args.track_thresh = ori_thresh
-
- if video_name == 'MOT20-06' or video_name == 'MOT20-08':
- self.args.track_thresh = 0.3
- else:
- self.args.track_thresh = ori_thresh
-
- if video_name not in video_names:
- video_names[video_id] = video_name
- if frame_id == 1:
- tracker = BYTETracker(self.args)
- if len(results) != 0:
- result_filename = os.path.join(result_folder, '{}.txt'.format(video_names[video_id - 1]))
- write_results(result_filename, results)
- results = []
-
- imgs = imgs.type(tensor_type)
-
- # skip the the last iters since batchsize might be not enough for batch inference
- is_time_record = cur_iter < len(self.dataloader) - 1
- if is_time_record:
- start = time.time()
-
- outputs = model(imgs)
-
- if decoder is not None:
- outputs = decoder(outputs, dtype=outputs.type())
- print('outputs', outputs.shape)
- outputs = postprocess(outputs, self.num_classes, self.confthre, self.nmsthre)
-
- if is_time_record:
- infer_end = time_synchronized()
- inference_time += infer_end - start
-
- output_results = self.convert_to_coco_format(outputs, info_imgs, ids)
- data_list.extend(output_results)
-
- # run tracking
- if outputs[0] is not None:
- online_targets = tracker.update(outputs[0], info_imgs, self.img_size)
- online_tlwhs = []
- online_ids = []
- online_scores = []
- for t in online_targets:
- tlwh = t.tlwh
- tid = t.track_id
- vertical = tlwh[2] / tlwh[3] > 1.6
- if tlwh[2] * tlwh[3] > self.args.min_box_area and not vertical:
- online_tlwhs.append(tlwh)
- online_ids.append(tid)
- online_scores.append(t.score)
- # save results
- results.append((frame_id, online_tlwhs, online_ids, online_scores))
-
- if is_time_record:
- track_end = time_synchronized()
- track_time += track_end - infer_end
-
- if cur_iter == len(self.dataloader) - 1:
- result_filename = os.path.join(result_folder, '{}.txt'.format(video_names[video_id]))
- write_results(result_filename, results)
-
- statistics = torch.cuda.FloatTensor([inference_time, track_time, n_samples])
- if distributed:
- data_list = gather(data_list, dst=0)
- data_list = list(itertools.chain(*data_list))
- torch.distributed.reduce(statistics, dst=0)
-
- eval_results = self.evaluate_prediction(data_list, statistics)
- synchronize()
- return eval_results
-
- def evaluate_sort(
- self,
- model,
- distributed=False,
- half=False,
- trt_file=None,
- decoder=None,
- test_size=None,
- result_folder=None
- ):
- """
- COCO average precision (AP) Evaluation. Iterate inference on the test dataset
- and the results are evaluated by COCO API.
-
- NOTE: This function will change training mode to False, please save states if needed.
-
- Args:
- model : model to evaluate.
-
- Returns:
- ap50_95 (float) : COCO AP of IoU=50:95
- ap50 (float) : COCO AP of IoU=50
- summary (sr): summary info of evaluation.
- """
- # TODO half to amp_test
- tensor_type = torch.cuda.HalfTensor if half else torch.cuda.FloatTensor
- model = model.eval()
- if half:
- model = model.half()
- ids = []
- data_list = []
- results = []
- video_names = defaultdict()
- progress_bar = tqdm if is_main_process() else iter
-
- inference_time = 0
- track_time = 0
- n_samples = len(self.dataloader) - 1
-
- if trt_file is not None:
- from torch2trt import TRTModule
-
- model_trt = TRTModule()
- model_trt.load_state_dict(torch.load(trt_file))
-
- x = torch.ones(1, 3, test_size[0], test_size[1]).cuda()
- model(x)
- model = model_trt
-
- tracker = Sort(self.args.track_thresh)
-
- for cur_iter, (imgs, _, info_imgs, ids) in enumerate(
- progress_bar(self.dataloader)
- ):
- with torch.no_grad():
- # init tracker
- frame_id = info_imgs[2].item()
- video_id = info_imgs[3].item()
- img_file_name = info_imgs[4]
- video_name = img_file_name[0].split('/')[0]
-
- if video_name not in video_names:
- video_names[video_id] = video_name
- if frame_id == 1:
- tracker = Sort(self.args.track_thresh)
- if len(results) != 0:
- result_filename = os.path.join(result_folder, '{}.txt'.format(video_names[video_id - 1]))
- write_results_no_score(result_filename, results)
- results = []
-
- imgs = imgs.type(tensor_type)
-
- # skip the the last iters since batchsize might be not enough for batch inference
- is_time_record = cur_iter < len(self.dataloader) - 1
- if is_time_record:
- start = time.time()
-
- outputs = model(imgs)
- if decoder is not None:
- outputs = decoder(outputs, dtype=outputs.type())
-
- outputs = postprocess(outputs, self.num_classes, self.confthre, self.nmsthre)
-
- if is_time_record:
- infer_end = time_synchronized()
- inference_time += infer_end - start
-
- output_results = self.convert_to_coco_format(outputs, info_imgs, ids)
- data_list.extend(output_results)
-
- # run tracking
- online_targets = tracker.update(outputs[0], info_imgs, self.img_size)
- online_tlwhs = []
- online_ids = []
- for t in online_targets:
- tlwh = [t[0], t[1], t[2] - t[0], t[3] - t[1]]
- tid = t[4]
- vertical = tlwh[2] / tlwh[3] > 1.6
- if tlwh[2] * tlwh[3] > self.args.min_box_area and not vertical:
- online_tlwhs.append(tlwh)
- online_ids.append(tid)
- # save results
- results.append((frame_id, online_tlwhs, online_ids))
-
- if is_time_record:
- track_end = time_synchronized()
- track_time += track_end - infer_end
-
- if cur_iter == len(self.dataloader) - 1:
- result_filename = os.path.join(result_folder, '{}.txt'.format(video_names[video_id]))
- write_results_no_score(result_filename, results)
-
- statistics = torch.cuda.FloatTensor([inference_time, track_time, n_samples])
- if distributed:
- data_list = gather(data_list, dst=0)
- data_list = list(itertools.chain(*data_list))
- torch.distributed.reduce(statistics, dst=0)
-
- eval_results = self.evaluate_prediction(data_list, statistics)
- synchronize()
- return eval_results
-
- def evaluate_deepsort(
- self,
- model,
- distributed=False,
- half=False,
- trt_file=None,
- decoder=None,
- test_size=None,
- result_folder=None,
- model_folder=None
- ):
- """
- COCO average precision (AP) Evaluation. Iterate inference on the test dataset
- and the results are evaluated by COCO API.
-
- NOTE: This function will change training mode to False, please save states if needed.
-
- Args:
- model : model to evaluate.
-
- Returns:
- ap50_95 (float) : COCO AP of IoU=50:95
- ap50 (float) : COCO AP of IoU=50
- summary (sr): summary info of evaluation.
- """
- # TODO half to amp_test
- tensor_type = torch.cuda.HalfTensor if half else torch.cuda.FloatTensor
- model = model.eval()
- if half:
- model = model.half()
- ids = []
- data_list = []
- results = []
- video_names = defaultdict()
- progress_bar = tqdm if is_main_process() else iter
-
- inference_time = 0
- track_time = 0
- n_samples = len(self.dataloader) - 1
-
- if trt_file is not None:
- from torch2trt import TRTModule
-
- model_trt = TRTModule()
- model_trt.load_state_dict(torch.load(trt_file))
-
- x = torch.ones(1, 3, test_size[0], test_size[1]).cuda()
- model(x)
- model = model_trt
-
- tracker = DeepSort(model_folder, min_confidence=self.args.track_thresh)
-
- for cur_iter, (imgs, _, info_imgs, ids) in enumerate(
- progress_bar(self.dataloader)
- ):
- with torch.no_grad():
- # init tracker
- frame_id = info_imgs[2].item()
- video_id = info_imgs[3].item()
- img_file_name = info_imgs[4]
- video_name = img_file_name[0].split('/')[0]
-
- if video_name not in video_names:
- video_names[video_id] = video_name
- if frame_id == 1:
- tracker = DeepSort(model_folder, min_confidence=self.args.track_thresh)
- if len(results) != 0:
- result_filename = os.path.join(result_folder, '{}.txt'.format(video_names[video_id - 1]))
- write_results_no_score(result_filename, results)
- results = []
-
- imgs = imgs.type(tensor_type)
-
- # skip the the last iters since batchsize might be not enough for batch inference
- is_time_record = cur_iter < len(self.dataloader) - 1
- if is_time_record:
- start = time.time()
-
- outputs = model(imgs)
- if decoder is not None:
- outputs = decoder(outputs, dtype=outputs.type())
-
- outputs = postprocess(outputs, self.num_classes, self.confthre, self.nmsthre)
-
- if is_time_record:
- infer_end = time_synchronized()
- inference_time += infer_end - start
-
- output_results = self.convert_to_coco_format(outputs, info_imgs, ids)
- data_list.extend(output_results)
-
- # run tracking
- online_targets = tracker.update(outputs[0], info_imgs, self.img_size, img_file_name[0])
- online_tlwhs = []
- online_ids = []
- for t in online_targets:
- tlwh = [t[0], t[1], t[2] - t[0], t[3] - t[1]]
- tid = t[4]
- vertical = tlwh[2] / tlwh[3] > 1.6
- if tlwh[2] * tlwh[3] > self.args.min_box_area and not vertical:
- online_tlwhs.append(tlwh)
- online_ids.append(tid)
- # save results
- results.append((frame_id, online_tlwhs, online_ids))
-
- if is_time_record:
- track_end = time_synchronized()
- track_time += track_end - infer_end
-
- if cur_iter == len(self.dataloader) - 1:
- result_filename = os.path.join(result_folder, '{}.txt'.format(video_names[video_id]))
- write_results_no_score(result_filename, results)
-
- statistics = torch.cuda.FloatTensor([inference_time, track_time, n_samples])
- if distributed:
- data_list = gather(data_list, dst=0)
- data_list = list(itertools.chain(*data_list))
- torch.distributed.reduce(statistics, dst=0)
-
- eval_results = self.evaluate_prediction(data_list, statistics)
- synchronize()
- return eval_results
-
- def evaluate_motdt(
- self,
- model,
- distributed=False,
- half=False,
- trt_file=None,
- decoder=None,
- test_size=None,
- result_folder=None,
- model_folder=None
- ):
- """
- COCO average precision (AP) Evaluation. Iterate inference on the test dataset
- and the results are evaluated by COCO API.
-
- NOTE: This function will change training mode to False, please save states if needed.
-
- Args:
- model : model to evaluate.
-
- Returns:
- ap50_95 (float) : COCO AP of IoU=50:95
- ap50 (float) : COCO AP of IoU=50
- summary (sr): summary info of evaluation.
- """
- # TODO half to amp_test
- tensor_type = torch.cuda.HalfTensor if half else torch.cuda.FloatTensor
- model = model.eval()
- if half:
- model = model.half()
- ids = []
- data_list = []
- results = []
- video_names = defaultdict()
- progress_bar = tqdm if is_main_process() else iter
-
- inference_time = 0
- track_time = 0
- n_samples = len(self.dataloader) - 1
-
- if trt_file is not None:
- from torch2trt import TRTModule
-
- model_trt = TRTModule()
- model_trt.load_state_dict(torch.load(trt_file))
-
- x = torch.ones(1, 3, test_size[0], test_size[1]).cuda()
- model(x)
- model = model_trt
-
- tracker = OnlineTracker(model_folder, min_cls_score=self.args.track_thresh)
- for cur_iter, (imgs, _, info_imgs, ids) in enumerate(
- progress_bar(self.dataloader)
- ):
- with torch.no_grad():
- # init tracker
- frame_id = info_imgs[2].item()
- video_id = info_imgs[3].item()
- img_file_name = info_imgs[4]
- video_name = img_file_name[0].split('/')[0]
-
- if video_name not in video_names:
- video_names[video_id] = video_name
- if frame_id == 1:
- tracker = OnlineTracker(model_folder, min_cls_score=self.args.track_thresh)
- if len(results) != 0:
- result_filename = os.path.join(result_folder, '{}.txt'.format(video_names[video_id - 1]))
- write_results(result_filename, results)
- results = []
-
- imgs = imgs.type(tensor_type)
-
- # skip the the last iters since batchsize might be not enough for batch inference
- is_time_record = cur_iter < len(self.dataloader) - 1
- if is_time_record:
- start = time.time()
-
- outputs = model(imgs)
- if decoder is not None:
- outputs = decoder(outputs, dtype=outputs.type())
-
- outputs = postprocess(outputs, self.num_classes, self.confthre, self.nmsthre)
-
- if is_time_record:
- infer_end = time_synchronized()
- inference_time += infer_end - start
-
- output_results = self.convert_to_coco_format(outputs, info_imgs, ids)
- data_list.extend(output_results)
-
- # run tracking
- online_targets = tracker.update(outputs[0], info_imgs, self.img_size, img_file_name[0])
- online_tlwhs = []
- online_ids = []
- online_scores = []
- for t in online_targets:
- tlwh = t.tlwh
- tid = t.track_id
- vertical = tlwh[2] / tlwh[3] > 1.6
- if tlwh[2] * tlwh[3] > self.args.min_box_area and not vertical:
- online_tlwhs.append(tlwh)
- online_ids.append(tid)
- online_scores.append(t.score)
- # save results
- results.append((frame_id, online_tlwhs, online_ids, online_scores))
-
- if is_time_record:
- track_end = time_synchronized()
- track_time += track_end - infer_end
-
- if cur_iter == len(self.dataloader) - 1:
- result_filename = os.path.join(result_folder, '{}.txt'.format(video_names[video_id]))
- write_results(result_filename, results)
-
- statistics = torch.cuda.FloatTensor([inference_time, track_time, n_samples])
- if distributed:
- data_list = gather(data_list, dst=0)
- data_list = list(itertools.chain(*data_list))
- torch.distributed.reduce(statistics, dst=0)
-
- eval_results = self.evaluate_prediction(data_list, statistics)
- synchronize()
- return eval_results
-
- def convert_to_coco_format(self, outputs, info_imgs, ids):
- data_list = []
- for (output, img_h, img_w, img_id) in zip(
- outputs, info_imgs[0], info_imgs[1], ids
- ):
- if output is None:
- continue
- output = output.cpu()
-
- bboxes = output[:, 0:4]
-
- # preprocessing: resize
- scale = min(
- self.img_size[0] / float(img_h), self.img_size[1] / float(img_w)
- )
- bboxes /= scale
- bboxes = xyxy2xywh(bboxes)
-
- cls = output[:, 6]
- scores = output[:, 4] * output[:, 5]
- for ind in range(bboxes.shape[0]):
- label = self.dataloader.dataset.class_ids[int(cls[ind])]
- pred_data = {
- "image_id": int(img_id),
- "category_id": label,
- "bbox": bboxes[ind].numpy().tolist(),
- "score": scores[ind].numpy().item(),
- "segmentation": [],
- } # COCO json format
- data_list.append(pred_data)
- return data_list
-
- def evaluate_prediction(self, data_dict, statistics):
- if not is_main_process():
- return 0, 0, None
-
- logger.info("Evaluate in main process...")
-
- annType = ["segm", "bbox", "keypoints"]
-
- inference_time = statistics[0].item()
- track_time = statistics[1].item()
- n_samples = statistics[2].item()
-
- a_infer_time = 1000 * inference_time / (n_samples * self.dataloader.batch_size)
- a_track_time = 1000 * track_time / (n_samples * self.dataloader.batch_size)
-
- time_info = ", ".join(
- [
- "Average {} time: {:.2f} ms".format(k, v)
- for k, v in zip(
- ["forward", "track", "inference"],
- [a_infer_time, a_track_time, (a_infer_time + a_track_time)],
- )
- ]
- )
-
- info = time_info + "\n"
-
- # Evaluate the Dt (detection) json comparing with the ground truth
- if len(data_dict) > 0:
- cocoGt = self.dataloader.dataset.coco
- # TODO: since pycocotools can't process dict in py36, write data to json file.
- _, tmp = tempfile.mkstemp()
- json.dump(data_dict, open(tmp, "w"))
- cocoDt = cocoGt.loadRes(tmp)
- '''
- try:
- from yolox.layers import COCOeval_opt as COCOeval
- except ImportError:
- from pycocotools import cocoeval as COCOeval
- logger.warning("Use standard COCOeval.")
- '''
- #from pycocotools.cocoeval import COCOeval
- from yolox.layers import COCOeval_opt as COCOeval
- cocoEval = COCOeval(cocoGt, cocoDt, annType[1])
- cocoEval.evaluate()
- cocoEval.accumulate()
- redirect_string = io.StringIO()
- with contextlib.redirect_stdout(redirect_string):
- cocoEval.summarize()
- info += redirect_string.getvalue()
- return cocoEval.stats[0], cocoEval.stats[1], info
- else:
- return 0, 0, info
|