Browse Source

annotations for test time adaptation are not compatible, have to compare shape and value

master
Mahdi Abdollah Pour 2 years ago
parent
commit
64d356450f

+ 18
- 0
experiments/bytetrack_on_17half.sh View File

@@ -0,0 +1,18 @@
#PBS -N bytetrack_17_on_17half
#PBS -m abe
#PBS -M [email protected]
#PBS -l nodes=1:ppn=1:gpus=1
#PBS -q cuda9


export LD_LIBRARY_PATH=/share/apps/cuda/cuda-10.1/lib64:$LD_LIBRARY_PATH
export PATH=/share/apps/cuda/cuda-10.1/bin/:$PATH
source /share/apps/Anaconda/anaconda3.6/bin/activate abdollahpour.ce.sharif
conda activate abd_env


cd /home/abdollahpour.ce.sharif/ByteTrack



python tools/track.py -t mot -f exps/example/mot/yolox_x_mot17_half.py -d 1 -b 1 --fp16 -c /home/abdollahpour.ce.sharif/ByteTrackModels/bytetrack_x_mot17.pth.tar --local_rank 0 -expn bytetrack_17_on_17half

+ 18
- 0
experiments/bytetrack_on_20.sh View File

@@ -0,0 +1,18 @@
#PBS -N bytetrack_17_on_20
#PBS -m abe
#PBS -M [email protected]
#PBS -l nodes=1:ppn=1:gpus=1
#PBS -q cuda9


export LD_LIBRARY_PATH=/share/apps/cuda/cuda-10.1/lib64:$LD_LIBRARY_PATH
export PATH=/share/apps/cuda/cuda-10.1/bin/:$PATH
source /share/apps/Anaconda/anaconda3.6/bin/activate abdollahpour.ce.sharif
conda activate abd_env


cd /home/abdollahpour.ce.sharif/ByteTrack



python tools/track.py -t mot -f exps/example/mot/yolox_x_mot20_on_mot20.py -d 1 -b 1 --fp16 -c /home/abdollahpour.ce.sharif/ByteTrackModels/bytetrack_x_mot17.pth.tar --local_rank 0 -expn bytetrack_17_on_20 --mot20

+ 2
- 1
experiments/meta_gpu_resume.sh View File

@@ -8,6 +8,7 @@
export LD_LIBRARY_PATH=/share/apps/cuda/cuda-10.1/lib64:$LD_LIBRARY_PATH
export PATH=/share/apps/cuda/cuda-10.1/bin/:$PATH


source /share/apps/Anaconda/anaconda3.6/bin/activate abdollahpour.ce.sharif
conda activate abd_env

@@ -16,4 +17,4 @@ cd /home/abdollahpour.ce.sharif/ByteTrack



python tools/train.py -t metamot -f exps/example/metamot/yolox_x_mot17_on_mot20.py -d 1 -b 1 --fp16 -o --resume --start_epoch 2 -c /home/abdollahpour.ce.sharif/ByteTrack/meta_experiments/train_17_on_20/latest_ckpt.pth.tar --local_rank 0 -expn train_17_on_20_resume
python tools/train.py -t metamot -f exps/example/metamot/yolox_x_mot17_on_mot20.py -d 1 -b 1 --fp16 -o --resume --start_epoch 6 -c /home/abdollahpour.ce.sharif/ByteTrack/meta_experiments/train_17_on_20_resume/latest_ckpt.pth.tar --local_rank 0 -expn train_17_on_20_resume2

+ 11
- 0
experiments/test.sh View File

@@ -0,0 +1,11 @@
export LD_LIBRARY_PATH=/share/apps/cuda/cuda-10.1/lib64:$LD_LIBRARY_PATH
export PATH=/share/apps/cuda/cuda-10.1/bin/:$PATH
source /share/apps/Anaconda/anaconda3.6/bin/activate abdollahpour.ce.sharif
conda activate abd_env


cd /home/abdollahpour.ce.sharif/ByteTrack



python tools/test.py

+ 18
- 0
experiments/track_metamot17_on_17.sh View File

@@ -0,0 +1,18 @@
#PBS -N track_metamot17_on_17_no_adapt
#PBS -m abe
#PBS -M [email protected]
#PBS -l nodes=1:ppn=1:gpus=1
#PBS -q cuda9


export LD_LIBRARY_PATH=/share/apps/cuda/cuda-10.1/lib64:$LD_LIBRARY_PATH
export PATH=/share/apps/cuda/cuda-10.1/bin/:$PATH
source /share/apps/Anaconda/anaconda3.6/bin/activate abdollahpour.ce.sharif
conda activate abd_env


cd /home/abdollahpour.ce.sharif/ByteTrack



python tools/track.py -t metamot -f exps/example/metamot/yolox_x_mot17_on_mot17.py -d 1 -b 1 --fp16 -c /home/abdollahpour.ce.sharif/ByteTrack/meta_experiments/train_17_on_20_resume2/best_ckpt.pth.tar --local_rank 0 -expn track_metamot17_on_17

+ 18
- 0
experiments/track_on_20.sh View File

@@ -0,0 +1,18 @@
#PBS -N track_17_on_20_ada_12
#PBS -m abe
#PBS -M [email protected]
#PBS -l nodes=1:ppn=1:gpus=1
#PBS -q cuda9


export LD_LIBRARY_PATH=/share/apps/cuda/cuda-10.1/lib64:$LD_LIBRARY_PATH
export PATH=/share/apps/cuda/cuda-10.1/bin/:$PATH
source /share/apps/Anaconda/anaconda3.6/bin/activate abdollahpour.ce.sharif
conda activate abd_env


cd /home/abdollahpour.ce.sharif/ByteTrack



python tools/track.py -t metamot -f exps/example/metamot/yolox_x_mot17_on_mot20.py -d 1 -b 1 -c /home/abdollahpour.ce.sharif/ByteTrack/meta_experiments/train_17_on_20_resume2/best_ckpt.pth.tar --local_rank 0 -expn track_17_on_20_ada_12 --mot20 --adaptation_period 12

+ 19
- 0
experiments/weak_label.sh View File

@@ -0,0 +1,19 @@
#PBS -N weak_labels
#PBS -m abe
#PBS -M [email protected]
#PBS -l nodes=1:ppn=1:gpus=1
#PBS -q cuda9

export LD_LIBRARY_PATH=/share/apps/cuda/cuda-10.1/lib64:$LD_LIBRARY_PATH
export PATH=/share/apps/cuda/cuda-10.1/bin/:$PATH


source /share/apps/Anaconda/anaconda3.6/bin/activate abdollahpour.ce.sharif
conda activate abd_env


cd /home/abdollahpour.ce.sharif/ByteTrack



python tools/generate_weak_labels.py

+ 158
- 0
exps/example/metamot/yolox_x_mot17_on_mot17.py View File

@@ -0,0 +1,158 @@
# encoding: utf-8
import os
import random
import torch
import torch.nn as nn
import torch.distributed as dist

from yolox.exp import MetaExp as MyMetaExp
from yolox.data import get_yolox_datadir
from os import listdir
from os.path import isfile, join


class Exp(MyMetaExp):
def __init__(self):
super(Exp, self).__init__()
self.num_classes = 1
self.depth = 1.33
self.width = 1.25
self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
self.train_dir = '/home/abdollahpour.ce.sharif/ByteTrackData/MOT17/annotations'
onlyfiles = [f for f in listdir(self.train_dir) if isfile(join(self.train_dir, f))]
self.train_anns = [file for file in onlyfiles if file.__contains__('train') and file.__contains__('FRCNN')]
# # TODO: remove
# self.train_anns = self.train_anns[3:]

self.val_dir = '/home/abdollahpour.ce.sharif/ByteTrackData/MOT17/annotations'
onlyfiles = [f for f in listdir(self.val_dir) if isfile(join(self.val_dir, f))]
self.val_anns = [file for file in onlyfiles if file.__contains__('train') and file.__contains__('FRCNN')]
# self.val_anns = self.val_anns[-1:]
print('train_anns', self.train_anns)
print('val_anns', self.val_anns)
self.input_size = (800, 1440)
self.test_size = (800, 1440)
# self.test_size = (736, 1920)
self.random_size = (20, 36)
self.max_epoch = 80
self.print_interval = 250
self.eval_interval = 5
self.test_conf = 0.1
self.nmsthre = 0.7
self.no_aug_epochs = 10
# self.basic_lr_per_img = 0.001 / 64.0
self.basic_lr_per_img = 0.0001 / 64.0
self.warmup_epochs = 1

def get_data_loaders(self, batch_size, is_distributed, no_aug=False):
from yolox.data import (
MOTDataset,
TrainTransform,
YoloBatchSampler,
DataLoader,
InfiniteSampler,
MosaicDetection,
)
train_loaders = []
for train_ann in self.train_anns:
dataset = MOTDataset(
data_dir=os.path.join(get_yolox_datadir(), "MOT17"),
json_file=train_ann,
name='train',
img_size=self.input_size,
preproc=TrainTransform(
rgb_means=(0.485, 0.456, 0.406),
std=(0.229, 0.224, 0.225),
max_labels=500,
),
)
dataset = MosaicDetection(
dataset,
mosaic=not no_aug,
img_size=self.input_size,
preproc=TrainTransform(
rgb_means=(0.485, 0.456, 0.406),
std=(0.229, 0.224, 0.225),
max_labels=1000,
),
degrees=self.degrees,
translate=self.translate,
scale=self.scale,
shear=self.shear,
perspective=self.perspective,
enable_mixup=self.enable_mixup,
)

self.dataset = dataset

if is_distributed:
batch_size = batch_size // dist.get_world_size()

sampler = InfiniteSampler(
len(self.dataset), seed=self.seed if self.seed else 0
)

batch_sampler = YoloBatchSampler(
sampler=sampler,
batch_size=batch_size,
drop_last=False,
input_dimension=self.input_size,
mosaic=not no_aug,
)

dataloader_kwargs = {"num_workers": self.data_num_workers, "pin_memory": True}
dataloader_kwargs["batch_sampler"] = batch_sampler
train_loader = DataLoader(self.dataset, **dataloader_kwargs)
train_loaders.append(train_loader)

return train_loaders

def get_eval_loaders(self, batch_size, is_distributed, testdev=False):
from yolox.data import MOTDataset, ValTransform, ValTransformWithPseudo
val_loaders = []
for val_ann in self.val_anns:
valdataset = MOTDataset(
data_dir=os.path.join(get_yolox_datadir(), "MOT17"),
json_file=val_ann,
img_size=self.test_size,
name='train', # change to train when running on training set
preproc=ValTransformWithPseudo(
rgb_means=(0.485, 0.456, 0.406),
std=(0.229, 0.224, 0.225),
),
)

if is_distributed:
batch_size = batch_size // dist.get_world_size()
sampler = torch.utils.data.distributed.DistributedSampler(
valdataset, shuffle=False
)
else:
sampler = torch.utils.data.SequentialSampler(valdataset)

dataloader_kwargs = {
"num_workers": self.data_num_workers,
"pin_memory": True,
"sampler": sampler,
}
dataloader_kwargs["batch_size"] = batch_size
val_loader = torch.utils.data.DataLoader(valdataset, **dataloader_kwargs)
val_loaders.append(val_loader)
return val_loaders

def get_evaluator(self, batch_size, is_distributed, testdev=False):
from yolox.evaluators import COCOEvaluator

val_loaders = self.get_eval_loaders(batch_size, is_distributed, testdev=testdev)
evaluators = []
for val_loader in val_loaders:
evaluator = COCOEvaluator(
dataloader=val_loader,
img_size=self.test_size,
confthre=self.test_conf,
nmsthre=self.nmsthre,
num_classes=self.num_classes,
testdev=testdev,
)
evaluators.append(evaluator)
return evaluators

+ 20
- 14
exps/example/metamot/yolox_x_mot17_on_mot20.py View File

@@ -28,6 +28,7 @@ class Exp(MyMetaExp):
onlyfiles = [f for f in listdir(self.val_dir) if isfile(join(self.val_dir, f))]
self.val_anns = [file for file in onlyfiles if file.__contains__('train') and file.__contains__(
'MOT20')]
# self.val_anns = self.val_anns[-1:]
print('train_anns', self.train_anns)
print('val_anns', self.val_anns)
self.input_size = (800, 1440)
@@ -35,12 +36,13 @@ class Exp(MyMetaExp):
# self.test_size = (736, 1920)
self.random_size = (20, 36)
self.max_epoch = 80
self.print_interval = 100
self.print_interval = 250
self.eval_interval = 5
self.test_conf = 0.001
self.nmsthre = 0.7
self.no_aug_epochs = 10
self.basic_lr_per_img = 0.001 / 64.0
# self.basic_lr_per_img = 0.001 / 64.0
self.basic_lr_per_img = 0.0001 / 64.0
self.warmup_epochs = 1

def get_data_loaders(self, batch_size, is_distributed, no_aug=False):
@@ -107,7 +109,7 @@ class Exp(MyMetaExp):
return train_loaders

def get_eval_loaders(self, batch_size, is_distributed, testdev=False):
from yolox.data import MOTDataset, ValTransform
from yolox.data import MOTDataset, ValTransform, ValTransformWithPseudo
val_loaders = []
for val_ann in self.val_anns:
valdataset = MOTDataset(
@@ -115,10 +117,11 @@ class Exp(MyMetaExp):
json_file=val_ann,
img_size=self.test_size,
name='train', # change to train when running on training set
preproc=ValTransform(
preproc=ValTransformWithPseudo(
rgb_means=(0.485, 0.456, 0.406),
std=(0.229, 0.224, 0.225),
),
load_weak=True
)

if is_distributed:
@@ -142,13 +145,16 @@ class Exp(MyMetaExp):
def get_evaluator(self, batch_size, is_distributed, testdev=False):
from yolox.evaluators import COCOEvaluator

val_loader = self.get_eval_loaders(batch_size, is_distributed, testdev=testdev)
evaluator = COCOEvaluator(
dataloader=val_loader,
img_size=self.test_size,
confthre=self.test_conf,
nmsthre=self.nmsthre,
num_classes=self.num_classes,
testdev=testdev,
)
return evaluator
val_loaders = self.get_eval_loaders(batch_size, is_distributed, testdev=testdev)
evaluators = []
for val_loader in val_loaders:
evaluator = COCOEvaluator(
dataloader=val_loader,
img_size=self.test_size,
confthre=self.test_conf,
nmsthre=self.nmsthre,
num_classes=self.num_classes,
testdev=testdev,
)
evaluators.append(evaluator)
return evaluators

+ 2
- 2
exps/example/mot/yolox_x_mot17_half.py View File

@@ -40,7 +40,7 @@ class Exp(MyExp):
)

dataset = MOTDataset(
data_dir=os.path.join(get_yolox_datadir(), "mot"),
data_dir=os.path.join(get_yolox_datadir(), "MOT17"),
json_file=self.train_ann,
name='train',
img_size=self.input_size,
@@ -95,7 +95,7 @@ class Exp(MyExp):
from yolox.data import MOTDataset, ValTransform

valdataset = MOTDataset(
data_dir=os.path.join(get_yolox_datadir(), "mot"),
data_dir=os.path.join(get_yolox_datadir(), "MOT17"),
json_file=self.val_ann,
img_size=self.test_size,
name='train',

+ 139
- 0
exps/example/mot/yolox_x_mot20_on_mot20.py View File

@@ -0,0 +1,139 @@
# encoding: utf-8
import os
import random
import torch
import torch.nn as nn
import torch.distributed as dist

from yolox.exp import Exp as MyExp
from yolox.data import get_yolox_datadir

class Exp(MyExp):
def __init__(self):
super(Exp, self).__init__()
self.num_classes = 1
self.depth = 1.33
self.width = 1.25
self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
self.train_ann = "train.json"
self.val_ann = "train.json" # change to train.json when running on training set
self.input_size = (896, 1600)
self.test_size = (896, 1600)
#self.test_size = (736, 1920)
self.random_size = (20, 36)
self.max_epoch = 80
self.print_interval = 20
self.eval_interval = 5
self.test_conf = 0.001
self.nmsthre = 0.7
self.no_aug_epochs = 10
self.basic_lr_per_img = 0.001 / 64.0
self.warmup_epochs = 1

def get_data_loader(self, batch_size, is_distributed, no_aug=False):
from yolox.data import (
MOTDataset,
TrainTransform,
YoloBatchSampler,
DataLoader,
InfiniteSampler,
MosaicDetection,
)

dataset = MOTDataset(
data_dir=os.path.join(get_yolox_datadir(), "MOT20"),
json_file=self.train_ann,
name='',
img_size=self.input_size,
preproc=TrainTransform(
rgb_means=(0.485, 0.456, 0.406),
std=(0.229, 0.224, 0.225),
max_labels=600,
),
)

dataset = MosaicDetection(
dataset,
mosaic=not no_aug,
img_size=self.input_size,
preproc=TrainTransform(
rgb_means=(0.485, 0.456, 0.406),
std=(0.229, 0.224, 0.225),
max_labels=1200,
),
degrees=self.degrees,
translate=self.translate,
scale=self.scale,
shear=self.shear,
perspective=self.perspective,
enable_mixup=self.enable_mixup,
)

self.dataset = dataset

if is_distributed:
batch_size = batch_size // dist.get_world_size()

sampler = InfiniteSampler(
len(self.dataset), seed=self.seed if self.seed else 0
)

batch_sampler = YoloBatchSampler(
sampler=sampler,
batch_size=batch_size,
drop_last=False,
input_dimension=self.input_size,
mosaic=not no_aug,
)

dataloader_kwargs = {"num_workers": self.data_num_workers, "pin_memory": True}
dataloader_kwargs["batch_sampler"] = batch_sampler
train_loader = DataLoader(self.dataset, **dataloader_kwargs)

return train_loader

def get_eval_loader(self, batch_size, is_distributed, testdev=False):
from yolox.data import MOTDataset, ValTransform

valdataset = MOTDataset(
data_dir=os.path.join(get_yolox_datadir(), "MOT20"),
json_file=self.val_ann,
img_size=self.test_size,
name='train', # change to train when running on training set
preproc=ValTransform(
rgb_means=(0.485, 0.456, 0.406),
std=(0.229, 0.224, 0.225),
),
)

if is_distributed:
batch_size = batch_size // dist.get_world_size()
sampler = torch.utils.data.distributed.DistributedSampler(
valdataset, shuffle=False
)
else:
sampler = torch.utils.data.SequentialSampler(valdataset)

dataloader_kwargs = {
"num_workers": self.data_num_workers,
"pin_memory": True,
"sampler": sampler,
}
dataloader_kwargs["batch_size"] = batch_size
val_loader = torch.utils.data.DataLoader(valdataset, **dataloader_kwargs)

return val_loader

def get_evaluator(self, batch_size, is_distributed, testdev=False):
from yolox.evaluators import COCOEvaluator

val_loader = self.get_eval_loader(batch_size, is_distributed, testdev=testdev)
evaluator = COCOEvaluator(
dataloader=val_loader,
img_size=self.test_size,
confthre=self.test_conf,
nmsthre=self.nmsthre,
num_classes=self.num_classes,
testdev=testdev,
)
return evaluator

+ 109
- 0
tools/generate_mot20_weak_labels _metaway.py View File

@@ -0,0 +1,109 @@
import os
import numpy as np
import json
import cv2

# Use the same script for MOT16
DATA_PATH = '/media/external_10TB/10TB/vision/ByteTrackData/MOT20'
OUT_PATH = os.path.join(DATA_PATH, 'annotations')
SPLITS = ['train', 'test'] # --> split training data to train_half and val_half.
HALF_VIDEO = True
CREATE_SPLITTED_ANN = True
CREATE_SPLITTED_DET = True

if __name__ == '__main__':

if not os.path.exists(OUT_PATH):
os.makedirs(OUT_PATH)

for split in SPLITS:
if split == "test":
data_path = os.path.join(DATA_PATH, 'test')
else:
data_path = os.path.join(DATA_PATH, 'train')
seqs = os.listdir(data_path)
for seq in sorted(seqs):
out_path = os.path.join(OUT_PATH, '{}_{}_weak.json'.format(split, seq))
out = {'images': [], 'annotations': [], 'videos': [],
'categories': [{'id': 1, 'name': 'pedestrian'}]}
image_cnt = 0
ann_cnt = 0
video_cnt = 0
tid_curr = 0
tid_last = -1
if '.DS_Store' in seq:
continue
video_cnt += 1 # video sequence number.
out['videos'].append({'id': video_cnt, 'file_name': seq})
seq_path = os.path.join(data_path, seq)
img_path = os.path.join(seq_path, 'img1')
ann_path = os.path.join(seq_path, 'gt/gt.txt')
images = os.listdir(img_path)
num_images = len([image for image in images if 'jpg' in image]) # half and half

if HALF_VIDEO and ('half' in split):
image_range = [0, num_images // 2] if 'train' in split else \
[num_images // 2 + 1, num_images - 1]
else:
image_range = [0, num_images - 1]

for i in range(num_images):
if i < image_range[0] or i > image_range[1]:
continue
img = cv2.imread(os.path.join(data_path, '{}/img1/{:06d}.jpg'.format(seq, i + 1)))
height, width = img.shape[:2]
image_info = {'file_name': '{}/img1/{:06d}.jpg'.format(seq, i + 1), # image name.
'id': image_cnt + i + 1, # image number in the entire training set.
'frame_id': i + 1 - image_range[0],
# image number in the video sequence, starting from 1.
'prev_image_id': image_cnt + i if i > 0 else -1,
# image number in the entire training set.
'next_image_id': image_cnt + i + 2 if i < num_images - 1 else -1,
'video_id': video_cnt,
'height': height, 'width': width}
out['images'].append(image_info)
print('{}: {} images'.format(seq, num_images))
if split != 'test':
det_path = os.path.join(seq_path, 'det/det.txt')
anns = np.loadtxt(ann_path, dtype=np.float32, delimiter=',')
dets = np.loadtxt(det_path, dtype=np.float32, delimiter=',')

print('{} ann images'.format(int(anns[:, 0].max())))
for i in range(anns.shape[0]):
frame_id = int(anns[i][0])
if frame_id - 1 < image_range[0] or frame_id - 1 > image_range[1]:
continue
track_id = int(anns[i][1])
cat_id = int(anns[i][7])
ann_cnt += 1
if not ('15' in DATA_PATH):
# if not (float(anns[i][8]) >= 0.25): # visibility.
# continue
if not (int(anns[i][6]) == 1): # whether ignore.
continue
if int(anns[i][7]) in [3, 4, 5, 6, 9, 10, 11]: # Non-person
continue
if int(anns[i][7]) in [2, 7, 8, 12]: # Ignored person
# category_id = -1
continue
else:
category_id = 1 # pedestrian(non-static)
if not track_id == tid_last:
tid_curr += 1
tid_last = track_id
else:
category_id = 1
ann = {'id': ann_cnt,
'category_id': category_id,
'image_id': image_cnt + frame_id,
'track_id': -1,
'bbox': '',
'conf': '',
'iscrowd': 0,
'area': ''}
# float(anns[i][4] * anns[i][5])
out['annotations'].append(ann)
image_cnt += num_images
print(tid_curr, tid_last)
print('loaded {} for {} images and {} samples'.format(split, len(out['images']), len(out['annotations'])))
json.dump(out, open(out_path, 'w'))

+ 323
- 0
tools/generate_weak_labels.py View File

@@ -0,0 +1,323 @@
# Mahdi Abdollahpour, 22/12/2021, 02:26 PM, PyCharm, ByteTrack

import os
import time
from loguru import logger
# from opts import opts

from os import listdir
from os.path import isfile, join
import cv2
import numpy as np
import torch

from yolox.data.data_augment import ValTransform
# from yolox.data.datasets import COCO_CLASSES
from yolox.exp import get_exp
from yolox.utils import fuse_model, get_model_info, postprocess, vis
from yolox import statics

COCO_CLASSES = (
"person",
"bicycle",
"car",
"motorcycle",
"airplane",
"bus",
"train",
"truck",
"boat",
"traffic light",
"fire hydrant",
"stop sign",
"parking meter",
"bench",
"bird",
"cat",
"dog",
"horse",
"sheep",
"cow",
"elephant",
"bear",
"zebra",
"giraffe",
"backpack",
"umbrella",
"handbag",
"tie",
"suitcase",
"frisbee",
"skis",
"snowboard",
"sports ball",
"kite",
"baseball bat",
"baseball glove",
"skateboard",
"surfboard",
"tennis racket",
"bottle",
"wine glass",
"cup",
"fork",
"knife",
"spoon",
"bowl",
"banana",
"apple",
"sandwich",
"orange",
"broccoli",
"carrot",
"hot dog",
"pizza",
"donut",
"cake",
"chair",
"couch",
"potted plant",
"bed",
"dining table",
"toilet",
"tv",
"laptop",
"mouse",
"remote",
"keyboard",
"cell phone",
"microwave",
"oven",
"toaster",
"sink",
"refrigerator",
"book",
"clock",
"vase",
"scissors",
"teddy bear",
"hair drier",
"toothbrush",
)


IMAGE_EXT = [".jpg", ".jpeg", ".webp", ".bmp", ".png"]

use_cuda = True

MOT = 'MOT20'
section = 'train'

root_dir = os.path.join(statics.DATA_PATH, MOT, section)

classes = ['person', 'bicycle', 'car', 'motorcycle', 'truck', 'bus']
fuse = False


def get_labels(bboxes, cls, scores, th, tw):
id = 0
labels = []

# print(pred['scores'])
n, _ = bboxes.shape
for i in range(n):

if COCO_CLASSES[int(cls[i])] not in classes:
# print('Rejecting',COCO_CLASSES[int(cls[i])],scores[i])
continue
if use_cuda:
box = bboxes[i, :].detach().cpu().numpy()
else:
box = bboxes[i, :].detach().numpy()
## TODO: check if matches
# print(box[0], box[1], box[2], box[3], '--', th, tw)
# print(box[0] / th, box[1] / tw, box[2] / th, box[3] / tw)
x = box[0] / th
y = box[1] / tw
w = (box[2] - box[0]) / th
h = (box[3] - box[1]) / tw
x += w / 2
y += h / 2
label = [0, id, x, y, w, h]
# label = [0, id, box[0], box[1], (box[2] - box[0]), (box[3] - box[1])]
id += 1
labels.append(label)
# print(id)
labels0 = np.array(labels)
return labels0


class Predictor(object):
def __init__(
self,
model,
exp,
cls_names=COCO_CLASSES,
trt_file=None,
decoder=None,
device="cpu",
fp16=False,
legacy=False,
):
self.model = model
self.cls_names = cls_names
self.decoder = decoder
self.num_classes = exp.num_classes
self.confthre = 0.1
self.nmsthre = 0.3
self.test_size = exp.test_size
self.device = device
self.fp16 = fp16
self.preproc = ValTransform()
# if trt_file is not None:
# from torch2trt import TRTModule
#
# model_trt = TRTModule()
# model_trt.load_state_dict(torch.load(trt_file))
#
# x = torch.ones(1, 3, exp.test_size[0], exp.test_size[1]).cuda()
# self.model(x)
# self.model = model_trt

def inference(self, img):
img_info = {"id": 0}
if isinstance(img, str):
img_info["file_name"] = os.path.basename(img)
img = cv2.imread(img)
else:
img_info["file_name"] = None

height, width = img.shape[:2]
img_info["height"] = height
img_info["width"] = width
img_info["raw_img"] = img

ratio = min(self.test_size[0] / img.shape[0], self.test_size[1] / img.shape[1])
# print(self.test_size[0] , img.shape[0], self.test_size[1] , img.shape[1])
img_info["ratio"] = ratio

img, _ = self.preproc(img, None, self.test_size)
img = torch.from_numpy(img).unsqueeze(0)
img = img.float()
if self.device == "gpu":
img = img.cuda()
# if self.fp16:
# img = img.half() # to FP16

with torch.no_grad():
t0 = time.time()
outputs = self.model(img)
if self.decoder is not None:
outputs = self.decoder(outputs, dtype=outputs.type())
outputs = postprocess(
outputs, self.num_classes, self.confthre,
self.nmsthre
)
# logger.info("Infer time: {:.4f}s".format(time.time() - t0))
# print(img.shape)
_, _, tw, th = img.shape
img_info['tw'] = tw
img_info['th'] = th
return outputs, img_info

def visual(self, output, img_info, cls_conf=0.35):
ratio = img_info["ratio"]
img = img_info["raw_img"]
if output is None:
return img
output = output.cpu()

bboxes = output[:, 0:4]

# preprocessing: resize
bboxes /= ratio

cls = output[:, 6]
scores = output[:, 4] * output[:, 5]

vis_res = vis(img, bboxes, scores, cls, cls_conf, self.cls_names)
return vis_res


def image_demo(predictor, path):
folders = [f for f in listdir(path)]
# folders = folders[3:]

for folder in folders:
print(folder)
images_folder = join(join(path, folder), 'img1')
images = [f for f in listdir(images_folder) if isfile(join(images_folder, f))]
images = [a for a in images if a.endswith('.jpg')]
images.sort()

for i, image_name in enumerate(images):
if i % 300 == 0:
print(folder, i)
outputs, img_info = predictor.inference(join(images_folder, image_name))

ratio = img_info["ratio"]
# print(ratio)
img = img_info["raw_img"]
output = outputs[0]
if output is None:
continue
output = output.cpu()

bboxes = output[:, 0:4]

# preprocessing: resize
bboxes /= ratio

cls = output[:, 6]
scores = output[:, 4] * output[:, 5]
# print('cls',cls)
labels0 = get_labels(bboxes, cls, scores, img_info["width"], img_info["height"])

# out_path = join(images_folder, 'weak_' + imm + '.npy')
# print(imm)
np.savetxt(join(images_folder, image_name + '_weak_' + model_name + '.txt'), labels0, delimiter=' ')


def main(exp, ckpt_file):
model = exp.get_model()

if use_cuda:
model = model.cuda()
device = 'gpu'
else:
device = 'cpu'
model.eval()

logger.info("loading checkpoint")
ckpt = torch.load(ckpt_file, map_location="cpu")
# load the model state dict
model.load_state_dict(ckpt["model"])
logger.info("loaded checkpoint done.")

if fuse:
logger.info("\tFusing model...")
model = fuse_model(model)

trt_file = None
decoder = None

predictor = Predictor(
model, exp, COCO_CLASSES, trt_file, decoder,
device, False, False,
)
current_time = time.localtime()
image_demo(predictor, root_dir)


model_name = 'yolox-x'
# cuda = torch.device('cuda:1')
if __name__ == "__main__":
# print(COCO_CLASSES)
# if use_cuda:
# torch.cuda.set_device(1)
# with torch.cuda.device(1):
# os.environ['CUDA_VISIBLE_DEVICES'] = '1'
ckpt_file = '/home/abdollahpour.ce.sharif/yolox_x.pth'

exp = get_exp(None, model_name)
main(exp, ckpt_file)

+ 23
- 0
tools/test.py View File

@@ -0,0 +1,23 @@
# Mahdi Abdollahpour, 30/12/2021, 07:47 PM, PyCharm, ByteTrack
from yolox.core import launch
from yolox.data import MOTDataset, ValTransform, ValTransformWithPseudo
test_size = (896, 1600)
import os
from yolox.data import get_yolox_datadir
if __name__ == "__main__":
valdataset = MOTDataset(
data_dir=os.path.join(get_yolox_datadir(), "MOT20"),
json_file='train_MOT20-01.json',
img_size=test_size,
name='train', # change to train when running on training set
preproc=ValTransformWithPseudo(
rgb_means=(0.485, 0.456, 0.406),
std=(0.229, 0.224, 0.225),
),
load_weak=True
)


for batch in valdataset:
print(batch)
exit()

+ 127
- 74
tools/track.py View File

@@ -18,6 +18,8 @@ import glob
import motmetrics as mm
from collections import OrderedDict
from pathlib import Path
import learn2learn as l2l
import yolox.statics as statics


def make_parser():
@@ -26,6 +28,10 @@ def make_parser():
parser.add_argument("-expn", "--experiment-name", type=str, default=None)
parser.add_argument("-n", "--name", type=str, default=None, help="model name")

parser.add_argument(
"--adaptation_period", default=4, type=int, help="if 4, then adapts to one batch in four batches"
)

# distributed
parser.add_argument(
"--dist-backend", default="nccl", type=str, help="distributed backend"
@@ -109,6 +115,8 @@ def make_parser():
parser.add_argument("--match_thresh", type=float, default=0.9, help="matching threshold for tracking")
parser.add_argument("--min-box-area", type=float, default=100, help='filter out tiny boxes')
parser.add_argument("--mot20", dest="mot20", default=False, action="store_true", help="test mot20.")

parser.add_argument("--use_existing_files", default=False, action="store_true", help="to use already created files")
return parser


@@ -126,38 +134,18 @@ def compare_dataframes(gts, ts):
return accs, names


def process_loader(args, val_loader, model, is_distributed):
if args.seed is not None:
random.seed(args.seed)
torch.manual_seed(args.seed)
cudnn.deterministic = True
warnings.warn(
"You have chosen to seed testing. This will turn on the CUDNN deterministic setting, "
)

# set environment variables for distributed training
cudnn.benchmark = True
rank = args.local_rank

# rank = get_local_rank()

def process_loader(args, exp, val_loader, model, is_distributed, trt_file, decoder, val_ann):
file_name = os.path.join(exp.output_dir, args.experiment_name)
rank = args.local_rank
if rank == 0:
os.makedirs(file_name, exist_ok=True)

results_folder = os.path.join(file_name, "track_results")
os.makedirs(results_folder, exist_ok=True)

setup_logger(file_name, distributed_rank=rank, filename="val_log.txt", mode="a")
logger.info("Args: {}".format(args))

if args.conf is not None:
exp.test_conf = args.conf
if args.nms is not None:
exp.nmsthre = args.nms
if args.tsize is not None:
exp.test_size = (args.tsize, args.tsize)
adaptation_period = None
if args.task == 'metamot':
adaptation_period = args.adaptation_period

evaluator = MOTEvaluator(
args=args,
@@ -167,61 +155,35 @@ def process_loader(args, val_loader, model, is_distributed):
nmsthre=exp.nmsthre,
num_classes=exp.num_classes,
)

torch.cuda.set_device(rank)
model.cuda(rank)
model.eval()

if not args.speed and not args.trt:
if args.ckpt is None:
ckpt_file = os.path.join(file_name, "best_ckpt.pth.tar")
else:
ckpt_file = args.ckpt
logger.info("loading checkpoint")
loc = "cuda:{}".format(rank)
ckpt = torch.load(ckpt_file, map_location=loc)
# load the model state dict
model.load_state_dict(ckpt["model"])
logger.info("loaded checkpoint done.")

if is_distributed:
model = DDP(model, device_ids=[rank])

if args.fuse:
logger.info("\tFusing model...")
model = fuse_model(model)

if args.trt:
assert (
not args.fuse and not is_distributed and args.batch_size == 1
), "TensorRT model is not support model fusing and distributed inferencing!"
trt_file = os.path.join(file_name, "model_trt.pth")
assert os.path.exists(
trt_file
), "TensorRT model is not found!\n Run tools/trt.py first!"
model.head.decode_in_inference = False
decoder = model.head.decode_outputs
else:
trt_file = None
decoder = None

# start evaluate
*_, summary = evaluator.evaluate(
model, is_distributed, args.fp16, trt_file, decoder, exp.test_size, results_folder
model, is_distributed, args.fp16, trt_file, decoder, exp.test_size, results_folder,
adaptation_period=adaptation_period,
)
logger.info("\n" + summary)


def eval_MOT(args, exp, val_ann=None):
file_name = os.path.join(exp.output_dir, args.experiment_name)
rank = args.local_rank
if rank == 0:
os.makedirs(file_name, exist_ok=True)

results_folder = os.path.join(file_name, "track_results")
os.makedirs(results_folder, exist_ok=True)

# evaluate MOTA
mm.lap.default_solver = 'lap'

if exp.val_ann == 'val_half.json':
if val_ann == 'val_half.json':
gt_type = '_val_half'
else:
gt_type = ''
print('gt_type', gt_type)
if args.mot20:
gtfiles = glob.glob(os.path.join('datasets/MOT20/train', '*/gt/gt{}.txt'.format(gt_type)))
gtfiles = glob.glob(os.path.join(statics.DATA_PATH, 'MOT20/train', '*/gt/gt{}.txt'.format(gt_type)))
else:
gtfiles = glob.glob(os.path.join('datasets/mot/train', '*/gt/gt{}.txt'.format(gt_type)))
gtfiles = glob.glob(os.path.join(statics.DATA_PATH, 'MOT17/train', '*/gt/gt{}.txt'.format(gt_type)))
print('gt_files', gtfiles)
tsfiles = [f for f in glob.glob(os.path.join(results_folder, '*.txt')) if
not os.path.basename(f).startswith('eval')]
@@ -267,23 +229,114 @@ def process_loader(args, val_loader, model, is_distributed):
logger.info('Completed')


def load_model(args, exp, is_distributed):
model = exp.get_model()
logger.info("Model Summary: {}".format(get_model_info(model, exp.test_size)))
if args.seed is not None:
random.seed(args.seed)
torch.manual_seed(args.seed)
cudnn.deterministic = True
warnings.warn(
"You have chosen to seed testing. This will turn on the CUDNN deterministic setting, "
)

# set environment variables for distributed training
cudnn.benchmark = True
rank = args.local_rank

# rank = get_local_rank()

file_name = os.path.join(exp.output_dir, args.experiment_name)

if rank == 0:
os.makedirs(file_name, exist_ok=True)

setup_logger(file_name, distributed_rank=rank, filename="val_log.txt", mode="a")
logger.info("Args: {}".format(args))

if args.conf is not None:
exp.test_conf = args.conf
if args.nms is not None:
exp.nmsthre = args.nms
if args.tsize is not None:
exp.test_size = (args.tsize, args.tsize)

if args.task == "metamot":
model = l2l.algorithms.MAML(model, lr=exp.inner_lr, first_order=exp.first_order, allow_nograd=True)
torch.cuda.set_device(rank)
model.cuda(rank)
model.eval()

if not args.speed and not args.trt:
if args.ckpt is None:
ckpt_file = os.path.join(file_name, "best_ckpt.pth.tar")
else:
ckpt_file = args.ckpt

logger.info("loading checkpoint")
loc = "cuda:{}".format(rank)
ckpt = torch.load(ckpt_file, map_location=loc)

# handling meta models
new_dict = {}
if (not list(ckpt["model"].keys())[0].startswith('module')) and args.task == "metamot":
for key in ckpt["model"].keys():
if not key.startswith('module.'):
new_dict['module.' + key] = ckpt["model"][key]
else:
new_dict[key] = ckpt["model"][key]
del ckpt["model"]
ckpt["model"] = new_dict

# load the model state dict
model.load_state_dict(ckpt["model"])
logger.info("loaded checkpoint done.")

if is_distributed:
model = DDP(model, device_ids=[rank])

if args.fuse:
logger.info("\tFusing model...")
model = fuse_model(model)

if args.trt:
assert (
not args.fuse and not is_distributed and args.batch_size == 1
), "TensorRT model is not support model fusing and distributed inferencing!"
trt_file = os.path.join(file_name, "model_trt.pth")
assert os.path.exists(
trt_file
), "TensorRT model is not found!\n Run tools/trt.py first!"
model.head.decode_in_inference = False
decoder = model.head.decode_outputs
else:
trt_file = None
decoder = None

return model, trt_file, decoder


@logger.catch
def main(exp, args, num_gpu):
is_distributed = num_gpu > 1
print('is_distributed', is_distributed)
print('num_gpu', num_gpu)

model = exp.get_model()
logger.info("Model Summary: {}".format(get_model_info(model, exp.test_size)))
# logger.info("Model Structure:\n{}".format(str(model)))
model, trt_file, decoder = load_model(args, exp, is_distributed)

if args.task == 'metamot':
val_loaders = exp.get_eval_loaders(args.batch_size, is_distributed, args.test)
for val_loader in val_loaders:
learner = model.clone()
process_loader(args, val_loader, learner, is_distributed)
if not args.use_existing_files:
for val_loader, val_ann in zip(val_loaders, exp.val_anns):
logger.info('processing loader...')
process_loader(args, exp, val_loader, model, is_distributed, trt_file, decoder, val_ann)
eval_MOT(args, exp)
else:
val_loader = exp.get_eval_loader(args.batch_size, is_distributed, args.test)
process_loader(args, val_loader, model, is_distributed)
if not args.use_existing_files:
val_loader = exp.get_eval_loader(args.batch_size, is_distributed, args.test)
process_loader(args, exp, val_loader, model, is_distributed, trt_file, decoder, exp.val_ann)
eval_MOT(args, exp, exp.val_ann)


if __name__ == "__main__":

+ 5
- 2
yolox/core/meta_trainer.py View File

@@ -76,6 +76,7 @@ class MetaTrainer:
self.after_train()

def train_in_epoch(self):
# self.evaluate_and_save_model()
for self.epoch in range(self.start_epoch, self.max_epoch):
self.before_epoch()
self.train_in_task()
@@ -212,7 +213,7 @@ class MetaTrainer:
# self.model = model
self.model.train()

self.evaluator = self.exp.get_evaluator(
self.evaluators = self.exp.get_evaluators(
batch_size=self.args.batch_size, is_distributed=self.is_distributed
)
# Tensorboard logger
@@ -320,6 +321,7 @@ class MetaTrainer:

ckpt = torch.load(ckpt_file, map_location=self.device)

# TODO: handle pretrained BYTETrack
# handling meta models
# new_dict = {}
# for key in ckpt["model"].keys():
@@ -355,9 +357,10 @@ class MetaTrainer:
return model

def evaluate_and_save_model(self):
logger.info("starting eval...")
evalmodel = self.ema_model.ema if self.use_model_ema else self.model
ap50_95, ap50, summary = self.exp.eval(
evalmodel, self.evaluator, self.is_distributed
evalmodel, self.evaluators, self.is_distributed
)
self.model.train()
if self.rank == 0:

+ 1
- 1
yolox/data/__init__.py View File

@@ -2,7 +2,7 @@
# -*- coding:utf-8 -*-
# Copyright (c) Megvii, Inc. and its affiliates.

from .data_augment import TrainTransform, ValTransform
from .data_augment import TrainTransform, ValTransform,ValTransformWithPseudo
from .data_prefetcher import DataPrefetcher
from .dataloading import DataLoader, get_yolox_datadir
from .datasets import *

+ 80
- 0
yolox/data/data_augment.py View File

@@ -297,3 +297,83 @@ class ValTransform:
def __call__(self, img, res, input_size):
img, _ = preproc(img, input_size, self.means, self.std, self.swap)
return img, np.zeros((1, 5))


class ValTransformWithPseudo:
"""
Defines the transformations that should be applied to test PIL image
for input into the network

dimension -> tensorize -> color adj

Arguments:
resize (int): input dimension to SSD
rgb_means ((int,int,int)): average RGB of the dataset
(104,117,123)
swap ((int,int,int)): final order of channels

Returns:
transform (transform) : callable transform to be applied to test/val
data
"""

def __init__(self, rgb_means=None, std=None, swap=(2, 0, 1), max_labels=100):
self.means = rgb_means
self.swap = swap
self.std = std
self.max_labels = max_labels



def __call__(self, image, targets, input_dim):
boxes = targets[:, :4].copy()
labels = targets[:, 4].copy()
ids = targets[:, 5].copy()
if len(boxes) == 0:
targets = np.zeros((self.max_labels, 6), dtype=np.float32)
image, r_o = preproc(image, input_dim, self.means, self.std)
image = np.ascontiguousarray(image, dtype=np.float32)
return image, targets

image_o = image.copy()
targets_o = targets.copy()
height_o, width_o, _ = image_o.shape
boxes_o = targets_o[:, :4]
labels_o = targets_o[:, 4]
ids_o = targets_o[:, 5]
# bbox_o: [xyxy] to [c_x,c_y,w,h]
boxes_o = xyxy2cxcywh(boxes_o)

# image_t = _distort(image)
image_t = image
# image_t, boxes = _mirror(image_t, boxes)
height, width, _ = image_t.shape
image_t, r_ = preproc(image_t, input_dim, self.means, self.std)
# boxes [xyxy] 2 [cx,cy,w,h]
boxes = xyxy2cxcywh(boxes)
boxes *= r_

mask_b = np.minimum(boxes[:, 2], boxes[:, 3]) > 1
boxes_t = boxes[mask_b]
labels_t = labels[mask_b]
ids_t = ids[mask_b]

if len(boxes_t) == 0:
image_t, r_o = preproc(image_o, input_dim, self.means, self.std)
boxes_o *= r_o
boxes_t = boxes_o
labels_t = labels_o
ids_t = ids_o

labels_t = np.expand_dims(labels_t, 1)
ids_t = np.expand_dims(ids_t, 1)

targets_t = np.hstack((labels_t, boxes_t, ids_t))
padded_labels = np.zeros((self.max_labels, 6))
padded_labels[range(len(targets_t))[: self.max_labels]] = targets_t[
: self.max_labels
]
padded_labels = np.ascontiguousarray(padded_labels, dtype=np.float32)
image_t = np.ascontiguousarray(image_t, dtype=np.float32)
return image_t, padded_labels


+ 32
- 8
yolox/data/datasets/mot.py View File

@@ -14,12 +14,13 @@ class MOTDataset(Dataset):
"""

def __init__(
self,
data_dir=None,
json_file="train_half.json",
name="train",
img_size=(608, 1088),
preproc=None,
self,
data_dir=None,
json_file="train_half.json",
name="train",
img_size=(608, 1088),
preproc=None,
load_weak=False,
):
"""
COCO dataset initialization. Annotation data are read into memory by COCO API.
@@ -45,6 +46,7 @@ class MOTDataset(Dataset):
self.name = name
self.img_size = img_size
self.preproc = preproc
self.load_weak = load_weak

def __len__(self):
return len(self.ids)
@@ -98,9 +100,31 @@ class MOTDataset(Dataset):
img_file = os.path.join(
self.data_dir, self.name, file_name
)
head_tail = os.path.split(img_file)
# label_path = os.path.join(head_tail[0], head_tail[1].replace('.jpg','.txt'))

if self.load_weak:
weak_label_path = os.path.join(head_tail[0], head_tail[1] + '_weak_yolox-x.txt')
# load weak labels from weak_label_path
width = img_info[1]
height = img_info[0]
labels = np.loadtxt(weak_label_path)
res = np.ones_like(labels)
labels[2, :] *= width
labels[4, :] *= width
labels[3, :] *= height
labels[5, :] *= height
labels[4, :] += labels[2, :]
labels[5, :] += labels[3, :]

res[:, 0:4] = labels[:, -4:]
res[:, 5] = labels[:, 1]
# all are from class one
# res[:, 4] = labels[:, 0]

img = cv2.imread(img_file)
# if img is None:
# print('img_file is None',img_file)
if img is None:
print('img_file is None', img_file)
assert img is not None

return img, res.copy(), img_info, np.array([id_])

+ 7
- 3
yolox/evaluators/coco_evaluator.py View File

@@ -192,7 +192,7 @@ class COCOEvaluator:

info = time_info + "\n"

# Evaluate the Dt (detection) json comparing with the ground truth
# Evaluate the Dt (detection) jsoncomparing with the ground truth
if len(data_dict) > 0:
cocoGt = self.dataloader.dataset.coco
# TODO: since pycocotools can't process dict in py36, write data to json file.
@@ -210,8 +210,12 @@ class COCOEvaluator:
from pycocotools import cocoeval as COCOeval
logger.warning("Use standard COCOeval.")
'''
#from pycocotools.cocoeval import COCOeval
from yolox.layers import COCOeval_opt as COCOeval
# TODO: commenting this and trying to use pycocotools
from pycocotools.cocoeval import COCOeval
# from yolox.layers import COCOeval_opt as COCOeval



cocoEval = COCOeval(cocoGt, cocoDt, annType[1])
cocoEval.evaluate()
cocoEval.accumulate()

+ 145
- 117
yolox/evaluators/mot_evaluator.py View File

@@ -34,7 +34,8 @@ def write_results(filename, results):
if track_id < 0:
continue
x1, y1, w, h = tlwh
line = save_format.format(frame=frame_id, id=track_id, x1=round(x1, 1), y1=round(y1, 1), w=round(w, 1), h=round(h, 1), s=round(score, 2))
line = save_format.format(frame=frame_id, id=track_id, x1=round(x1, 1), y1=round(y1, 1), w=round(w, 1),
h=round(h, 1), s=round(score, 2))
f.write(line)
logger.info('save results to {}'.format(filename))

@@ -47,7 +48,8 @@ def write_results_no_score(filename, results):
if track_id < 0:
continue
x1, y1, w, h = tlwh
line = save_format.format(frame=frame_id, id=track_id, x1=round(x1, 1), y1=round(y1, 1), w=round(w, 1), h=round(h, 1))
line = save_format.format(frame=frame_id, id=track_id, x1=round(x1, 1), y1=round(y1, 1), w=round(w, 1),
h=round(h, 1))
f.write(line)
logger.info('save results to {}'.format(filename))

@@ -59,7 +61,7 @@ class MOTEvaluator:
"""

def __init__(
self, args, dataloader, img_size, confthre, nmsthre, num_classes):
self, args, dataloader, img_size, confthre, nmsthre, num_classes):
"""
Args:
dataloader (Dataloader): evaluate dataloader.
@@ -77,14 +79,15 @@ class MOTEvaluator:
self.args = args

def evaluate(
self,
model,
distributed=False,
half=False,
trt_file=None,
decoder=None,
test_size=None,
result_folder=None
self,
model,
distributed=False,
half=False,
trt_file=None,
decoder=None,
test_size=None,
result_folder=None,
adaptation_period=None,
):
"""
COCO average precision (AP) Evaluation. Iterate inference on the test dataset
@@ -100,11 +103,17 @@ class MOTEvaluator:
ap50 (float) : COCO AP of IoU=50
summary (sr): summary info of evaluation.
"""

if adaptation_period is not None:
logger.info('cloning model...')
learner = model.clone()
else:
learner = model
# TODO half to amp_test
tensor_type = torch.cuda.HalfTensor if half else torch.cuda.FloatTensor
model = model.eval()
learner = learner.eval()
if half:
model = model.half()
learner = learner.half()
ids = []
data_list = []
results = []
@@ -117,74 +126,90 @@ class MOTEvaluator:

if trt_file is not None:
from torch2trt import TRTModule
logger.info('Loading trt file')

model_trt = TRTModule()
model_trt.load_state_dict(torch.load(trt_file))

x = torch.ones(1, 3, test_size[0], test_size[1]).cuda()
model(x)
model = model_trt
learner(x)
learner = model_trt
tracker = BYTETracker(self.args)
ori_thresh = self.args.track_thresh
for cur_iter, (imgs, _, info_imgs, ids) in enumerate(
progress_bar(self.dataloader)
):
with torch.no_grad():
# init tracker
frame_id = info_imgs[2].item()
video_id = info_imgs[3].item()
img_file_name = info_imgs[4]
video_name = img_file_name[0].split('/')[0]
if video_name == 'MOT17-05-FRCNN' or video_name == 'MOT17-06-FRCNN':
self.args.track_buffer = 14
elif video_name == 'MOT17-13-FRCNN' or video_name == 'MOT17-14-FRCNN':
self.args.track_buffer = 25
else:
self.args.track_buffer = 30

if video_name == 'MOT17-01-FRCNN':
self.args.track_thresh = 0.65
elif video_name == 'MOT17-06-FRCNN':
self.args.track_thresh = 0.65
elif video_name == 'MOT17-12-FRCNN':
self.args.track_thresh = 0.7
elif video_name == 'MOT17-14-FRCNN':
self.args.track_thresh = 0.67
else:
self.args.track_thresh = ori_thresh
if video_name == 'MOT20-06' or video_name == 'MOT20-08':
self.args.track_thresh = 0.3
else:
self.args.track_thresh = ori_thresh

if video_name not in video_names:
video_names[video_id] = video_name
if frame_id == 1:
tracker = BYTETracker(self.args)
if len(results) != 0:
result_filename = os.path.join(result_folder, '{}.txt'.format(video_names[video_id - 1]))
write_results(result_filename, results)
results = []

imgs = imgs.type(tensor_type)
for cur_iter, (imgs, targets, info_imgs, ids) in enumerate(
progress_bar(self.dataloader)
):
if cur_iter % 100 == 0:
logger.info('cur_iter: {}'.format(cur_iter))
# with torch.no_grad():
# init tracker
# imgs = imgs.to(self.data_type)
# targets = targets.to(self.data_type)

frame_id = info_imgs[2].item()
video_id = info_imgs[3].item()
img_file_name = info_imgs[4]
video_name = img_file_name[0].split('/')[0]
if video_name == 'MOT17-05-FRCNN' or video_name == 'MOT17-06-FRCNN':
self.args.track_buffer = 14
elif video_name == 'MOT17-13-FRCNN' or video_name == 'MOT17-14-FRCNN':
self.args.track_buffer = 25
else:
self.args.track_buffer = 30

if video_name == 'MOT17-01-FRCNN':
self.args.track_thresh = 0.65
elif video_name == 'MOT17-06-FRCNN':
self.args.track_thresh = 0.65
elif video_name == 'MOT17-12-FRCNN':
self.args.track_thresh = 0.7
elif video_name == 'MOT17-14-FRCNN':
self.args.track_thresh = 0.67
else:
self.args.track_thresh = ori_thresh

if video_name == 'MOT20-06' or video_name == 'MOT20-08':
self.args.track_thresh = 0.3
else:
self.args.track_thresh = ori_thresh

if video_name not in video_names:
video_names[video_id] = video_name
if frame_id == 1:
tracker = BYTETracker(self.args)
if len(results) != 0:
result_filename = os.path.join(result_folder, '{}.txt'.format(video_names[video_id - 1]))
write_results(result_filename, results)
results = []

imgs = imgs.type(tensor_type)

# skip the the last iters since batchsize might be not enough for batch inference
is_time_record = cur_iter < len(self.dataloader) - 1
if is_time_record:
start = time.time()
if adaptation_period is not None and cur_iter % adaptation_period == 0:
learner.train()
targets = targets.type(tensor_type)
targets.requires_grad = False
outputs = learner(imgs, targets)
loss = outputs["total_loss"]
learner.adapt(loss)
learner.eval()

# skip the the last iters since batchsize might be not enough for batch inference
is_time_record = cur_iter < len(self.dataloader) - 1
if is_time_record:
start = time.time()
with torch.no_grad():
outputs = learner(imgs)

outputs = model(imgs)
if decoder is not None:
outputs = decoder(outputs, dtype=outputs.type())
# print('outputs', outputs.shape)
outputs = postprocess(outputs, self.num_classes, self.confthre, self.nmsthre)

if decoder is not None:
outputs = decoder(outputs, dtype=outputs.type())
print('outputs', outputs.shape)
outputs = postprocess(outputs, self.num_classes, self.confthre, self.nmsthre)
if is_time_record:
infer_end = time_synchronized()
inference_time += infer_end - start
if is_time_record:
infer_end = time_synchronized()
inference_time += infer_end - start

output_results = self.convert_to_coco_format(outputs, info_imgs, ids)
data_list.extend(output_results)
@@ -209,7 +234,7 @@ class MOTEvaluator:
if is_time_record:
track_end = time_synchronized()
track_time += track_end - infer_end
if cur_iter == len(self.dataloader) - 1:
result_filename = os.path.join(result_folder, '{}.txt'.format(video_names[video_id]))
write_results(result_filename, results)
@@ -225,14 +250,14 @@ class MOTEvaluator:
return eval_results

def evaluate_sort(
self,
model,
distributed=False,
half=False,
trt_file=None,
decoder=None,
test_size=None,
result_folder=None
self,
model,
distributed=False,
half=False,
trt_file=None,
decoder=None,
test_size=None,
result_folder=None
):
"""
COCO average precision (AP) Evaluation. Iterate inference on the test dataset
@@ -272,12 +297,14 @@ class MOTEvaluator:
x = torch.ones(1, 3, test_size[0], test_size[1]).cuda()
model(x)
model = model_trt
tracker = Sort(self.args.track_thresh)
for cur_iter, (imgs, _, info_imgs, ids) in enumerate(
progress_bar(self.dataloader)
progress_bar(self.dataloader)
):
if cur_iter % 250 == 0:
logger.info('cur_iter: {}'.format(cur_iter))
with torch.no_grad():
# init tracker
frame_id = info_imgs[2].item()
@@ -306,7 +333,7 @@ class MOTEvaluator:
outputs = decoder(outputs, dtype=outputs.type())

outputs = postprocess(outputs, self.num_classes, self.confthre, self.nmsthre)
if is_time_record:
infer_end = time_synchronized()
inference_time += infer_end - start
@@ -331,7 +358,7 @@ class MOTEvaluator:
if is_time_record:
track_end = time_synchronized()
track_time += track_end - infer_end
if cur_iter == len(self.dataloader) - 1:
result_filename = os.path.join(result_folder, '{}.txt'.format(video_names[video_id]))
write_results_no_score(result_filename, results)
@@ -347,15 +374,15 @@ class MOTEvaluator:
return eval_results

def evaluate_deepsort(
self,
model,
distributed=False,
half=False,
trt_file=None,
decoder=None,
test_size=None,
result_folder=None,
model_folder=None
self,
model,
distributed=False,
half=False,
trt_file=None,
decoder=None,
test_size=None,
result_folder=None,
model_folder=None
):
"""
COCO average precision (AP) Evaluation. Iterate inference on the test dataset
@@ -395,11 +422,11 @@ class MOTEvaluator:
x = torch.ones(1, 3, test_size[0], test_size[1]).cuda()
model(x)
model = model_trt
tracker = DeepSort(model_folder, min_confidence=self.args.track_thresh)
for cur_iter, (imgs, _, info_imgs, ids) in enumerate(
progress_bar(self.dataloader)
progress_bar(self.dataloader)
):
with torch.no_grad():
# init tracker
@@ -429,7 +456,7 @@ class MOTEvaluator:
outputs = decoder(outputs, dtype=outputs.type())

outputs = postprocess(outputs, self.num_classes, self.confthre, self.nmsthre)
if is_time_record:
infer_end = time_synchronized()
inference_time += infer_end - start
@@ -454,7 +481,7 @@ class MOTEvaluator:
if is_time_record:
track_end = time_synchronized()
track_time += track_end - infer_end
if cur_iter == len(self.dataloader) - 1:
result_filename = os.path.join(result_folder, '{}.txt'.format(video_names[video_id]))
write_results_no_score(result_filename, results)
@@ -470,15 +497,15 @@ class MOTEvaluator:
return eval_results

def evaluate_motdt(
self,
model,
distributed=False,
half=False,
trt_file=None,
decoder=None,
test_size=None,
result_folder=None,
model_folder=None
self,
model,
distributed=False,
half=False,
trt_file=None,
decoder=None,
test_size=None,
result_folder=None,
model_folder=None
):
"""
COCO average precision (AP) Evaluation. Iterate inference on the test dataset
@@ -518,10 +545,10 @@ class MOTEvaluator:
x = torch.ones(1, 3, test_size[0], test_size[1]).cuda()
model(x)
model = model_trt
tracker = OnlineTracker(model_folder, min_cls_score=self.args.track_thresh)
for cur_iter, (imgs, _, info_imgs, ids) in enumerate(
progress_bar(self.dataloader)
progress_bar(self.dataloader)
):
with torch.no_grad():
# init tracker
@@ -551,7 +578,7 @@ class MOTEvaluator:
outputs = decoder(outputs, dtype=outputs.type())

outputs = postprocess(outputs, self.num_classes, self.confthre, self.nmsthre)
if is_time_record:
infer_end = time_synchronized()
inference_time += infer_end - start
@@ -578,7 +605,7 @@ class MOTEvaluator:
if is_time_record:
track_end = time_synchronized()
track_time += track_end - infer_end
if cur_iter == len(self.dataloader) - 1:
result_filename = os.path.join(result_folder, '{}.txt'.format(video_names[video_id]))
write_results(result_filename, results)
@@ -596,7 +623,7 @@ class MOTEvaluator:
def convert_to_coco_format(self, outputs, info_imgs, ids):
data_list = []
for (output, img_h, img_w, img_id) in zip(
outputs, info_imgs[0], info_imgs[1], ids
outputs, info_imgs[0], info_imgs[1], ids
):
if output is None:
continue
@@ -644,9 +671,9 @@ class MOTEvaluator:
[
"Average {} time: {:.2f} ms".format(k, v)
for k, v in zip(
["forward", "track", "inference"],
[a_infer_time, a_track_time, (a_infer_time + a_track_time)],
)
["forward", "track", "inference"],
[a_infer_time, a_track_time, (a_infer_time + a_track_time)],
)
]
)

@@ -666,8 +693,9 @@ class MOTEvaluator:
from pycocotools import cocoeval as COCOeval
logger.warning("Use standard COCOeval.")
'''
#from pycocotools.cocoeval import COCOeval
from yolox.layers import COCOeval_opt as COCOeval
# I changed it
from pycocotools.cocoeval import COCOeval
# from yolox.layers import COCOeval_opt as COCOeval
cocoEval = COCOeval(cocoGt, cocoDt, annType[1])
cocoEval.evaluate()
cocoEval.accumulate()

+ 1
- 1
yolox/exp/base_meta_exp.py View File

@@ -24,7 +24,7 @@ class BaseMetaExp(metaclass=ABCMeta):
self.seed = None
# self.output_dir = "./YOLOX_outputs"
self.output_dir = "./meta_experiments"
self.print_interval = 100
self.print_interval = 250
self.eval_interval = 10

@abstractmethod

+ 27
- 15
yolox/exp/meta_yolox_base.py View File

@@ -62,7 +62,7 @@ class MetaExp(BaseMetaExp):

# ----------------- Meta-learning ------------------ #
self.first_order = True
self.inner_lr = 1e-5
self.inner_lr = 1e-6

def get_model(self):
from yolox.models import YOLOPAFPN, YOLOX, YOLOXHead
@@ -241,19 +241,31 @@ class MetaExp(BaseMetaExp):
val_loaders.append(val_loader)
return val_loaders

def get_evaluator(self, batch_size, is_distributed, testdev=False):
def get_evaluators(self, batch_size, is_distributed, testdev=False):
from yolox.evaluators import COCOEvaluator

val_loader = self.get_eval_loader(batch_size, is_distributed, testdev=testdev)
evaluator = COCOEvaluator(
dataloader=val_loader,
img_size=self.test_size,
confthre=self.test_conf,
nmsthre=self.nmsthre,
num_classes=self.num_classes,
testdev=testdev,
)
return evaluator

def eval(self, model, evaluator, is_distributed, half=False):
return evaluator.evaluate(model, is_distributed, half)
val_loaders = self.get_eval_loaders(batch_size, is_distributed, testdev=testdev)
evaluators = []
for val_loader in val_loaders:
evaluator = COCOEvaluator(
dataloader=val_loader,
img_size=self.test_size,
confthre=self.test_conf,
nmsthre=self.nmsthre,
num_classes=self.num_classes,
testdev=testdev,
)
evaluators.append(evaluator)
return evaluators

def eval(self, model, evaluators, is_distributed, half=False):
ap50_95s = 0.0
ap50s = 0.0
summarys = ''
for evaluator in evaluators:
ap50_95, ap50, summary = evaluator.evaluate(model, is_distributed, half)
ap50_95s += ap50_95
ap50s += ap50
summarys += ("\n" + summary)
n = len(evaluators)
return (ap50_95s / n), (ap50s / n), summarys

+ 37
- 3
yolox/models/yolo_head.py View File

@@ -197,6 +197,9 @@ class YOLOXHead(nn.Module):
if self.training:
# logger.info("labels.shape:{}".format(labels.shape))
# logger.info("torch.cat(outputs, 1).shape:{}".format(torch.cat(outputs, 1).shape))
# if torch.isnan(torch.cat(outputs, 1)).sum().item():
# logger.info('There is Nan value in outputs {}'.format(torch.isnan(torch.cat(outputs, 1)).sum().item()))

return self.get_losses(
imgs,
x_shifts,
@@ -397,22 +400,45 @@ class YOLOXHead(nn.Module):
if self.use_l1:
l1_targets = torch.cat(l1_targets, 0)


# TODO: check loss parts shapes

num_fg = max(num_fg, 1)
# if bbox_preds.view(-1, 4)[fg_masks].shape != reg_targets.shape:
# logger.info("some shape mismatch")
# logger.info("bbox_preds.view(-1, 4)[fg_masks].shape {}".format(bbox_preds.view(-1, 4)[fg_masks].shape))
# logger.info("reg_targets {}".format(reg_targets.shape))
# logger.info("--------------------")
loss_iou = (
self.iou_loss(bbox_preds.view(-1, 4)[fg_masks], reg_targets)
).sum() / num_fg
# if obj_preds.view(-1, 1).shape != obj_targets.shape:
# logger.info("some shape mismatch")
# logger.info("obj_preds.view(-1, 1).shape {}".format(obj_preds.view(-1, 1).shape))
# logger.info("obj_targets.shape {}".format(obj_targets.shape))
# logger.info("--------------------")
loss_obj = (
self.bcewithlog_loss(obj_preds.view(-1, 1), obj_targets)
).sum() / num_fg
# if cls_preds.view(-1, self.num_classes)[fg_masks].shape != cls_targets.shape:
# logger.info("some shape mismatch")
# logger.info("cls_preds.view(-1, self.num_classes)[fg_masks].shape {}".format(
# cls_preds.view(-1, self.num_classes)[fg_masks].shape))
# logger.info("cls_targets.shape {}".format(cls_targets.shape))
# logger.info("--------------------")

loss_cls = (
self.bcewithlog_loss(
cls_preds.view(-1, self.num_classes)[fg_masks], cls_targets
)
).sum() / num_fg
if self.use_l1:
# if origin_preds.view(-1, 4)[fg_masks].shape != l1_targets.shape:
# logger.info("some shape mismatch")
# logger.info("origin_preds.view(-1, 4)[fg_masks].shape {}".format(
# origin_preds.view(-1, 4)[fg_masks].shape))
# logger.info("l1_targets.shape {}".format(l1_targets.shape))
# logger.info("--------------------")

loss_l1 = (
self.l1_loss(origin_preds.view(-1, 4)[fg_masks], l1_targets)
).sum() / num_fg
@@ -457,7 +483,7 @@ class YOLOXHead(nn.Module):
imgs,
mode="gpu",
):
# TODO: check loss mismatches here
if mode == "cpu":
print("------------CPU Mode for This Batch-------------")
gt_bboxes_per_image = gt_bboxes_per_image.cpu().float()
@@ -477,6 +503,11 @@ class YOLOXHead(nn.Module):
num_gt,
img_size
)
# if torch.isnan(cls_preds).sum().item() or torch.isnan(obj_preds).sum().item() or torch.isnan(
# bboxes_preds_per_image).sum().item():
# logger.info("cls_preds is Nan {}".format(torch.isnan(cls_preds).sum().item()))
# logger.info("obj_preds is Nan {}".format(torch.isnan(obj_preds).sum().item()))
# logger.info("bboxes_preds_per_image is Nan {}".format(torch.isnan(bboxes_preds_per_image).sum().item()))

bboxes_preds_per_image = bboxes_preds_per_image[fg_mask]
cls_preds_ = cls_preds[batch_idx][fg_mask]
@@ -495,8 +526,10 @@ class YOLOXHead(nn.Module):
.unsqueeze(1)
.repeat(1, num_in_boxes_anchor, 1)
)
pair_wise_ious_loss = -torch.log(pair_wise_ious + 1e-8)

pair_wise_ious_loss = -torch.log(pair_wise_ious + 1e-8)
# if torch.isnan(pair_wise_ious_loss).sum().item():
# logger.info("pair_wise_ious_loss is Nan {}".format(torch.isnan(pair_wise_ious_loss).sum().item()))
if mode == "cpu":
cls_preds_, obj_preds_ = cls_preds_.cpu(), obj_preds_.cpu()

@@ -505,6 +538,7 @@ class YOLOXHead(nn.Module):
cls_preds_.float().unsqueeze(0).repeat(num_gt, 1, 1).sigmoid_()
* obj_preds_.float().unsqueeze(0).repeat(num_gt, 1, 1).sigmoid_()
)

pair_wise_cls_loss = F.binary_cross_entropy(
cls_preds_.sqrt_(), gt_cls_per_image, reduction="none"
).sum(-1)

Loading…
Cancel
Save