@@ -0,0 +1,18 @@ | |||
#PBS -N bytetrack_17_on_17half | |||
#PBS -m abe | |||
#PBS -M [email protected] | |||
#PBS -l nodes=1:ppn=1:gpus=1 | |||
#PBS -q cuda9 | |||
export LD_LIBRARY_PATH=/share/apps/cuda/cuda-10.1/lib64:$LD_LIBRARY_PATH | |||
export PATH=/share/apps/cuda/cuda-10.1/bin/:$PATH | |||
source /share/apps/Anaconda/anaconda3.6/bin/activate abdollahpour.ce.sharif | |||
conda activate abd_env | |||
cd /home/abdollahpour.ce.sharif/ByteTrack | |||
python tools/track.py -t mot -f exps/example/mot/yolox_x_mot17_half.py -d 1 -b 1 --fp16 -c /home/abdollahpour.ce.sharif/ByteTrackModels/bytetrack_x_mot17.pth.tar --local_rank 0 -expn bytetrack_17_on_17half |
@@ -0,0 +1,18 @@ | |||
#PBS -N bytetrack_17_on_20 | |||
#PBS -m abe | |||
#PBS -M [email protected] | |||
#PBS -l nodes=1:ppn=1:gpus=1 | |||
#PBS -q cuda9 | |||
export LD_LIBRARY_PATH=/share/apps/cuda/cuda-10.1/lib64:$LD_LIBRARY_PATH | |||
export PATH=/share/apps/cuda/cuda-10.1/bin/:$PATH | |||
source /share/apps/Anaconda/anaconda3.6/bin/activate abdollahpour.ce.sharif | |||
conda activate abd_env | |||
cd /home/abdollahpour.ce.sharif/ByteTrack | |||
python tools/track.py -t mot -f exps/example/mot/yolox_x_mot20_on_mot20.py -d 1 -b 1 --fp16 -c /home/abdollahpour.ce.sharif/ByteTrackModels/bytetrack_x_mot17.pth.tar --local_rank 0 -expn bytetrack_17_on_20 --mot20 |
@@ -8,6 +8,7 @@ | |||
export LD_LIBRARY_PATH=/share/apps/cuda/cuda-10.1/lib64:$LD_LIBRARY_PATH | |||
export PATH=/share/apps/cuda/cuda-10.1/bin/:$PATH | |||
source /share/apps/Anaconda/anaconda3.6/bin/activate abdollahpour.ce.sharif | |||
conda activate abd_env | |||
@@ -16,4 +17,4 @@ cd /home/abdollahpour.ce.sharif/ByteTrack | |||
python tools/train.py -t metamot -f exps/example/metamot/yolox_x_mot17_on_mot20.py -d 1 -b 1 --fp16 -o --resume --start_epoch 2 -c /home/abdollahpour.ce.sharif/ByteTrack/meta_experiments/train_17_on_20/latest_ckpt.pth.tar --local_rank 0 -expn train_17_on_20_resume | |||
python tools/train.py -t metamot -f exps/example/metamot/yolox_x_mot17_on_mot20.py -d 1 -b 1 --fp16 -o --resume --start_epoch 6 -c /home/abdollahpour.ce.sharif/ByteTrack/meta_experiments/train_17_on_20_resume/latest_ckpt.pth.tar --local_rank 0 -expn train_17_on_20_resume2 |
@@ -0,0 +1,11 @@ | |||
export LD_LIBRARY_PATH=/share/apps/cuda/cuda-10.1/lib64:$LD_LIBRARY_PATH | |||
export PATH=/share/apps/cuda/cuda-10.1/bin/:$PATH | |||
source /share/apps/Anaconda/anaconda3.6/bin/activate abdollahpour.ce.sharif | |||
conda activate abd_env | |||
cd /home/abdollahpour.ce.sharif/ByteTrack | |||
python tools/test.py |
@@ -0,0 +1,18 @@ | |||
#PBS -N track_metamot17_on_17_no_adapt | |||
#PBS -m abe | |||
#PBS -M [email protected] | |||
#PBS -l nodes=1:ppn=1:gpus=1 | |||
#PBS -q cuda9 | |||
export LD_LIBRARY_PATH=/share/apps/cuda/cuda-10.1/lib64:$LD_LIBRARY_PATH | |||
export PATH=/share/apps/cuda/cuda-10.1/bin/:$PATH | |||
source /share/apps/Anaconda/anaconda3.6/bin/activate abdollahpour.ce.sharif | |||
conda activate abd_env | |||
cd /home/abdollahpour.ce.sharif/ByteTrack | |||
python tools/track.py -t metamot -f exps/example/metamot/yolox_x_mot17_on_mot17.py -d 1 -b 1 --fp16 -c /home/abdollahpour.ce.sharif/ByteTrack/meta_experiments/train_17_on_20_resume2/best_ckpt.pth.tar --local_rank 0 -expn track_metamot17_on_17 |
@@ -0,0 +1,18 @@ | |||
#PBS -N track_17_on_20_ada_12 | |||
#PBS -m abe | |||
#PBS -M [email protected] | |||
#PBS -l nodes=1:ppn=1:gpus=1 | |||
#PBS -q cuda9 | |||
export LD_LIBRARY_PATH=/share/apps/cuda/cuda-10.1/lib64:$LD_LIBRARY_PATH | |||
export PATH=/share/apps/cuda/cuda-10.1/bin/:$PATH | |||
source /share/apps/Anaconda/anaconda3.6/bin/activate abdollahpour.ce.sharif | |||
conda activate abd_env | |||
cd /home/abdollahpour.ce.sharif/ByteTrack | |||
python tools/track.py -t metamot -f exps/example/metamot/yolox_x_mot17_on_mot20.py -d 1 -b 1 -c /home/abdollahpour.ce.sharif/ByteTrack/meta_experiments/train_17_on_20_resume2/best_ckpt.pth.tar --local_rank 0 -expn track_17_on_20_ada_12 --mot20 --adaptation_period 12 |
@@ -0,0 +1,19 @@ | |||
#PBS -N weak_labels | |||
#PBS -m abe | |||
#PBS -M [email protected] | |||
#PBS -l nodes=1:ppn=1:gpus=1 | |||
#PBS -q cuda9 | |||
export LD_LIBRARY_PATH=/share/apps/cuda/cuda-10.1/lib64:$LD_LIBRARY_PATH | |||
export PATH=/share/apps/cuda/cuda-10.1/bin/:$PATH | |||
source /share/apps/Anaconda/anaconda3.6/bin/activate abdollahpour.ce.sharif | |||
conda activate abd_env | |||
cd /home/abdollahpour.ce.sharif/ByteTrack | |||
python tools/generate_weak_labels.py |
@@ -0,0 +1,158 @@ | |||
# encoding: utf-8 | |||
import os | |||
import random | |||
import torch | |||
import torch.nn as nn | |||
import torch.distributed as dist | |||
from yolox.exp import MetaExp as MyMetaExp | |||
from yolox.data import get_yolox_datadir | |||
from os import listdir | |||
from os.path import isfile, join | |||
class Exp(MyMetaExp): | |||
def __init__(self): | |||
super(Exp, self).__init__() | |||
self.num_classes = 1 | |||
self.depth = 1.33 | |||
self.width = 1.25 | |||
self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0] | |||
self.train_dir = '/home/abdollahpour.ce.sharif/ByteTrackData/MOT17/annotations' | |||
onlyfiles = [f for f in listdir(self.train_dir) if isfile(join(self.train_dir, f))] | |||
self.train_anns = [file for file in onlyfiles if file.__contains__('train') and file.__contains__('FRCNN')] | |||
# # TODO: remove | |||
# self.train_anns = self.train_anns[3:] | |||
self.val_dir = '/home/abdollahpour.ce.sharif/ByteTrackData/MOT17/annotations' | |||
onlyfiles = [f for f in listdir(self.val_dir) if isfile(join(self.val_dir, f))] | |||
self.val_anns = [file for file in onlyfiles if file.__contains__('train') and file.__contains__('FRCNN')] | |||
# self.val_anns = self.val_anns[-1:] | |||
print('train_anns', self.train_anns) | |||
print('val_anns', self.val_anns) | |||
self.input_size = (800, 1440) | |||
self.test_size = (800, 1440) | |||
# self.test_size = (736, 1920) | |||
self.random_size = (20, 36) | |||
self.max_epoch = 80 | |||
self.print_interval = 250 | |||
self.eval_interval = 5 | |||
self.test_conf = 0.1 | |||
self.nmsthre = 0.7 | |||
self.no_aug_epochs = 10 | |||
# self.basic_lr_per_img = 0.001 / 64.0 | |||
self.basic_lr_per_img = 0.0001 / 64.0 | |||
self.warmup_epochs = 1 | |||
def get_data_loaders(self, batch_size, is_distributed, no_aug=False): | |||
from yolox.data import ( | |||
MOTDataset, | |||
TrainTransform, | |||
YoloBatchSampler, | |||
DataLoader, | |||
InfiniteSampler, | |||
MosaicDetection, | |||
) | |||
train_loaders = [] | |||
for train_ann in self.train_anns: | |||
dataset = MOTDataset( | |||
data_dir=os.path.join(get_yolox_datadir(), "MOT17"), | |||
json_file=train_ann, | |||
name='train', | |||
img_size=self.input_size, | |||
preproc=TrainTransform( | |||
rgb_means=(0.485, 0.456, 0.406), | |||
std=(0.229, 0.224, 0.225), | |||
max_labels=500, | |||
), | |||
) | |||
dataset = MosaicDetection( | |||
dataset, | |||
mosaic=not no_aug, | |||
img_size=self.input_size, | |||
preproc=TrainTransform( | |||
rgb_means=(0.485, 0.456, 0.406), | |||
std=(0.229, 0.224, 0.225), | |||
max_labels=1000, | |||
), | |||
degrees=self.degrees, | |||
translate=self.translate, | |||
scale=self.scale, | |||
shear=self.shear, | |||
perspective=self.perspective, | |||
enable_mixup=self.enable_mixup, | |||
) | |||
self.dataset = dataset | |||
if is_distributed: | |||
batch_size = batch_size // dist.get_world_size() | |||
sampler = InfiniteSampler( | |||
len(self.dataset), seed=self.seed if self.seed else 0 | |||
) | |||
batch_sampler = YoloBatchSampler( | |||
sampler=sampler, | |||
batch_size=batch_size, | |||
drop_last=False, | |||
input_dimension=self.input_size, | |||
mosaic=not no_aug, | |||
) | |||
dataloader_kwargs = {"num_workers": self.data_num_workers, "pin_memory": True} | |||
dataloader_kwargs["batch_sampler"] = batch_sampler | |||
train_loader = DataLoader(self.dataset, **dataloader_kwargs) | |||
train_loaders.append(train_loader) | |||
return train_loaders | |||
def get_eval_loaders(self, batch_size, is_distributed, testdev=False): | |||
from yolox.data import MOTDataset, ValTransform, ValTransformWithPseudo | |||
val_loaders = [] | |||
for val_ann in self.val_anns: | |||
valdataset = MOTDataset( | |||
data_dir=os.path.join(get_yolox_datadir(), "MOT17"), | |||
json_file=val_ann, | |||
img_size=self.test_size, | |||
name='train', # change to train when running on training set | |||
preproc=ValTransformWithPseudo( | |||
rgb_means=(0.485, 0.456, 0.406), | |||
std=(0.229, 0.224, 0.225), | |||
), | |||
) | |||
if is_distributed: | |||
batch_size = batch_size // dist.get_world_size() | |||
sampler = torch.utils.data.distributed.DistributedSampler( | |||
valdataset, shuffle=False | |||
) | |||
else: | |||
sampler = torch.utils.data.SequentialSampler(valdataset) | |||
dataloader_kwargs = { | |||
"num_workers": self.data_num_workers, | |||
"pin_memory": True, | |||
"sampler": sampler, | |||
} | |||
dataloader_kwargs["batch_size"] = batch_size | |||
val_loader = torch.utils.data.DataLoader(valdataset, **dataloader_kwargs) | |||
val_loaders.append(val_loader) | |||
return val_loaders | |||
def get_evaluator(self, batch_size, is_distributed, testdev=False): | |||
from yolox.evaluators import COCOEvaluator | |||
val_loaders = self.get_eval_loaders(batch_size, is_distributed, testdev=testdev) | |||
evaluators = [] | |||
for val_loader in val_loaders: | |||
evaluator = COCOEvaluator( | |||
dataloader=val_loader, | |||
img_size=self.test_size, | |||
confthre=self.test_conf, | |||
nmsthre=self.nmsthre, | |||
num_classes=self.num_classes, | |||
testdev=testdev, | |||
) | |||
evaluators.append(evaluator) | |||
return evaluators |
@@ -28,6 +28,7 @@ class Exp(MyMetaExp): | |||
onlyfiles = [f for f in listdir(self.val_dir) if isfile(join(self.val_dir, f))] | |||
self.val_anns = [file for file in onlyfiles if file.__contains__('train') and file.__contains__( | |||
'MOT20')] | |||
# self.val_anns = self.val_anns[-1:] | |||
print('train_anns', self.train_anns) | |||
print('val_anns', self.val_anns) | |||
self.input_size = (800, 1440) | |||
@@ -35,12 +36,13 @@ class Exp(MyMetaExp): | |||
# self.test_size = (736, 1920) | |||
self.random_size = (20, 36) | |||
self.max_epoch = 80 | |||
self.print_interval = 100 | |||
self.print_interval = 250 | |||
self.eval_interval = 5 | |||
self.test_conf = 0.001 | |||
self.nmsthre = 0.7 | |||
self.no_aug_epochs = 10 | |||
self.basic_lr_per_img = 0.001 / 64.0 | |||
# self.basic_lr_per_img = 0.001 / 64.0 | |||
self.basic_lr_per_img = 0.0001 / 64.0 | |||
self.warmup_epochs = 1 | |||
def get_data_loaders(self, batch_size, is_distributed, no_aug=False): | |||
@@ -107,7 +109,7 @@ class Exp(MyMetaExp): | |||
return train_loaders | |||
def get_eval_loaders(self, batch_size, is_distributed, testdev=False): | |||
from yolox.data import MOTDataset, ValTransform | |||
from yolox.data import MOTDataset, ValTransform, ValTransformWithPseudo | |||
val_loaders = [] | |||
for val_ann in self.val_anns: | |||
valdataset = MOTDataset( | |||
@@ -115,10 +117,11 @@ class Exp(MyMetaExp): | |||
json_file=val_ann, | |||
img_size=self.test_size, | |||
name='train', # change to train when running on training set | |||
preproc=ValTransform( | |||
preproc=ValTransformWithPseudo( | |||
rgb_means=(0.485, 0.456, 0.406), | |||
std=(0.229, 0.224, 0.225), | |||
), | |||
load_weak=True | |||
) | |||
if is_distributed: | |||
@@ -142,13 +145,16 @@ class Exp(MyMetaExp): | |||
def get_evaluator(self, batch_size, is_distributed, testdev=False): | |||
from yolox.evaluators import COCOEvaluator | |||
val_loader = self.get_eval_loaders(batch_size, is_distributed, testdev=testdev) | |||
evaluator = COCOEvaluator( | |||
dataloader=val_loader, | |||
img_size=self.test_size, | |||
confthre=self.test_conf, | |||
nmsthre=self.nmsthre, | |||
num_classes=self.num_classes, | |||
testdev=testdev, | |||
) | |||
return evaluator | |||
val_loaders = self.get_eval_loaders(batch_size, is_distributed, testdev=testdev) | |||
evaluators = [] | |||
for val_loader in val_loaders: | |||
evaluator = COCOEvaluator( | |||
dataloader=val_loader, | |||
img_size=self.test_size, | |||
confthre=self.test_conf, | |||
nmsthre=self.nmsthre, | |||
num_classes=self.num_classes, | |||
testdev=testdev, | |||
) | |||
evaluators.append(evaluator) | |||
return evaluators |
@@ -40,7 +40,7 @@ class Exp(MyExp): | |||
) | |||
dataset = MOTDataset( | |||
data_dir=os.path.join(get_yolox_datadir(), "mot"), | |||
data_dir=os.path.join(get_yolox_datadir(), "MOT17"), | |||
json_file=self.train_ann, | |||
name='train', | |||
img_size=self.input_size, | |||
@@ -95,7 +95,7 @@ class Exp(MyExp): | |||
from yolox.data import MOTDataset, ValTransform | |||
valdataset = MOTDataset( | |||
data_dir=os.path.join(get_yolox_datadir(), "mot"), | |||
data_dir=os.path.join(get_yolox_datadir(), "MOT17"), | |||
json_file=self.val_ann, | |||
img_size=self.test_size, | |||
name='train', |
@@ -0,0 +1,139 @@ | |||
# encoding: utf-8 | |||
import os | |||
import random | |||
import torch | |||
import torch.nn as nn | |||
import torch.distributed as dist | |||
from yolox.exp import Exp as MyExp | |||
from yolox.data import get_yolox_datadir | |||
class Exp(MyExp): | |||
def __init__(self): | |||
super(Exp, self).__init__() | |||
self.num_classes = 1 | |||
self.depth = 1.33 | |||
self.width = 1.25 | |||
self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0] | |||
self.train_ann = "train.json" | |||
self.val_ann = "train.json" # change to train.json when running on training set | |||
self.input_size = (896, 1600) | |||
self.test_size = (896, 1600) | |||
#self.test_size = (736, 1920) | |||
self.random_size = (20, 36) | |||
self.max_epoch = 80 | |||
self.print_interval = 20 | |||
self.eval_interval = 5 | |||
self.test_conf = 0.001 | |||
self.nmsthre = 0.7 | |||
self.no_aug_epochs = 10 | |||
self.basic_lr_per_img = 0.001 / 64.0 | |||
self.warmup_epochs = 1 | |||
def get_data_loader(self, batch_size, is_distributed, no_aug=False): | |||
from yolox.data import ( | |||
MOTDataset, | |||
TrainTransform, | |||
YoloBatchSampler, | |||
DataLoader, | |||
InfiniteSampler, | |||
MosaicDetection, | |||
) | |||
dataset = MOTDataset( | |||
data_dir=os.path.join(get_yolox_datadir(), "MOT20"), | |||
json_file=self.train_ann, | |||
name='', | |||
img_size=self.input_size, | |||
preproc=TrainTransform( | |||
rgb_means=(0.485, 0.456, 0.406), | |||
std=(0.229, 0.224, 0.225), | |||
max_labels=600, | |||
), | |||
) | |||
dataset = MosaicDetection( | |||
dataset, | |||
mosaic=not no_aug, | |||
img_size=self.input_size, | |||
preproc=TrainTransform( | |||
rgb_means=(0.485, 0.456, 0.406), | |||
std=(0.229, 0.224, 0.225), | |||
max_labels=1200, | |||
), | |||
degrees=self.degrees, | |||
translate=self.translate, | |||
scale=self.scale, | |||
shear=self.shear, | |||
perspective=self.perspective, | |||
enable_mixup=self.enable_mixup, | |||
) | |||
self.dataset = dataset | |||
if is_distributed: | |||
batch_size = batch_size // dist.get_world_size() | |||
sampler = InfiniteSampler( | |||
len(self.dataset), seed=self.seed if self.seed else 0 | |||
) | |||
batch_sampler = YoloBatchSampler( | |||
sampler=sampler, | |||
batch_size=batch_size, | |||
drop_last=False, | |||
input_dimension=self.input_size, | |||
mosaic=not no_aug, | |||
) | |||
dataloader_kwargs = {"num_workers": self.data_num_workers, "pin_memory": True} | |||
dataloader_kwargs["batch_sampler"] = batch_sampler | |||
train_loader = DataLoader(self.dataset, **dataloader_kwargs) | |||
return train_loader | |||
def get_eval_loader(self, batch_size, is_distributed, testdev=False): | |||
from yolox.data import MOTDataset, ValTransform | |||
valdataset = MOTDataset( | |||
data_dir=os.path.join(get_yolox_datadir(), "MOT20"), | |||
json_file=self.val_ann, | |||
img_size=self.test_size, | |||
name='train', # change to train when running on training set | |||
preproc=ValTransform( | |||
rgb_means=(0.485, 0.456, 0.406), | |||
std=(0.229, 0.224, 0.225), | |||
), | |||
) | |||
if is_distributed: | |||
batch_size = batch_size // dist.get_world_size() | |||
sampler = torch.utils.data.distributed.DistributedSampler( | |||
valdataset, shuffle=False | |||
) | |||
else: | |||
sampler = torch.utils.data.SequentialSampler(valdataset) | |||
dataloader_kwargs = { | |||
"num_workers": self.data_num_workers, | |||
"pin_memory": True, | |||
"sampler": sampler, | |||
} | |||
dataloader_kwargs["batch_size"] = batch_size | |||
val_loader = torch.utils.data.DataLoader(valdataset, **dataloader_kwargs) | |||
return val_loader | |||
def get_evaluator(self, batch_size, is_distributed, testdev=False): | |||
from yolox.evaluators import COCOEvaluator | |||
val_loader = self.get_eval_loader(batch_size, is_distributed, testdev=testdev) | |||
evaluator = COCOEvaluator( | |||
dataloader=val_loader, | |||
img_size=self.test_size, | |||
confthre=self.test_conf, | |||
nmsthre=self.nmsthre, | |||
num_classes=self.num_classes, | |||
testdev=testdev, | |||
) | |||
return evaluator |
@@ -0,0 +1,109 @@ | |||
import os | |||
import numpy as np | |||
import json | |||
import cv2 | |||
# Use the same script for MOT16 | |||
DATA_PATH = '/media/external_10TB/10TB/vision/ByteTrackData/MOT20' | |||
OUT_PATH = os.path.join(DATA_PATH, 'annotations') | |||
SPLITS = ['train', 'test'] # --> split training data to train_half and val_half. | |||
HALF_VIDEO = True | |||
CREATE_SPLITTED_ANN = True | |||
CREATE_SPLITTED_DET = True | |||
if __name__ == '__main__': | |||
if not os.path.exists(OUT_PATH): | |||
os.makedirs(OUT_PATH) | |||
for split in SPLITS: | |||
if split == "test": | |||
data_path = os.path.join(DATA_PATH, 'test') | |||
else: | |||
data_path = os.path.join(DATA_PATH, 'train') | |||
seqs = os.listdir(data_path) | |||
for seq in sorted(seqs): | |||
out_path = os.path.join(OUT_PATH, '{}_{}_weak.json'.format(split, seq)) | |||
out = {'images': [], 'annotations': [], 'videos': [], | |||
'categories': [{'id': 1, 'name': 'pedestrian'}]} | |||
image_cnt = 0 | |||
ann_cnt = 0 | |||
video_cnt = 0 | |||
tid_curr = 0 | |||
tid_last = -1 | |||
if '.DS_Store' in seq: | |||
continue | |||
video_cnt += 1 # video sequence number. | |||
out['videos'].append({'id': video_cnt, 'file_name': seq}) | |||
seq_path = os.path.join(data_path, seq) | |||
img_path = os.path.join(seq_path, 'img1') | |||
ann_path = os.path.join(seq_path, 'gt/gt.txt') | |||
images = os.listdir(img_path) | |||
num_images = len([image for image in images if 'jpg' in image]) # half and half | |||
if HALF_VIDEO and ('half' in split): | |||
image_range = [0, num_images // 2] if 'train' in split else \ | |||
[num_images // 2 + 1, num_images - 1] | |||
else: | |||
image_range = [0, num_images - 1] | |||
for i in range(num_images): | |||
if i < image_range[0] or i > image_range[1]: | |||
continue | |||
img = cv2.imread(os.path.join(data_path, '{}/img1/{:06d}.jpg'.format(seq, i + 1))) | |||
height, width = img.shape[:2] | |||
image_info = {'file_name': '{}/img1/{:06d}.jpg'.format(seq, i + 1), # image name. | |||
'id': image_cnt + i + 1, # image number in the entire training set. | |||
'frame_id': i + 1 - image_range[0], | |||
# image number in the video sequence, starting from 1. | |||
'prev_image_id': image_cnt + i if i > 0 else -1, | |||
# image number in the entire training set. | |||
'next_image_id': image_cnt + i + 2 if i < num_images - 1 else -1, | |||
'video_id': video_cnt, | |||
'height': height, 'width': width} | |||
out['images'].append(image_info) | |||
print('{}: {} images'.format(seq, num_images)) | |||
if split != 'test': | |||
det_path = os.path.join(seq_path, 'det/det.txt') | |||
anns = np.loadtxt(ann_path, dtype=np.float32, delimiter=',') | |||
dets = np.loadtxt(det_path, dtype=np.float32, delimiter=',') | |||
print('{} ann images'.format(int(anns[:, 0].max()))) | |||
for i in range(anns.shape[0]): | |||
frame_id = int(anns[i][0]) | |||
if frame_id - 1 < image_range[0] or frame_id - 1 > image_range[1]: | |||
continue | |||
track_id = int(anns[i][1]) | |||
cat_id = int(anns[i][7]) | |||
ann_cnt += 1 | |||
if not ('15' in DATA_PATH): | |||
# if not (float(anns[i][8]) >= 0.25): # visibility. | |||
# continue | |||
if not (int(anns[i][6]) == 1): # whether ignore. | |||
continue | |||
if int(anns[i][7]) in [3, 4, 5, 6, 9, 10, 11]: # Non-person | |||
continue | |||
if int(anns[i][7]) in [2, 7, 8, 12]: # Ignored person | |||
# category_id = -1 | |||
continue | |||
else: | |||
category_id = 1 # pedestrian(non-static) | |||
if not track_id == tid_last: | |||
tid_curr += 1 | |||
tid_last = track_id | |||
else: | |||
category_id = 1 | |||
ann = {'id': ann_cnt, | |||
'category_id': category_id, | |||
'image_id': image_cnt + frame_id, | |||
'track_id': -1, | |||
'bbox': '', | |||
'conf': '', | |||
'iscrowd': 0, | |||
'area': ''} | |||
# float(anns[i][4] * anns[i][5]) | |||
out['annotations'].append(ann) | |||
image_cnt += num_images | |||
print(tid_curr, tid_last) | |||
print('loaded {} for {} images and {} samples'.format(split, len(out['images']), len(out['annotations']))) | |||
json.dump(out, open(out_path, 'w')) |
@@ -0,0 +1,323 @@ | |||
# Mahdi Abdollahpour, 22/12/2021, 02:26 PM, PyCharm, ByteTrack | |||
import os | |||
import time | |||
from loguru import logger | |||
# from opts import opts | |||
from os import listdir | |||
from os.path import isfile, join | |||
import cv2 | |||
import numpy as np | |||
import torch | |||
from yolox.data.data_augment import ValTransform | |||
# from yolox.data.datasets import COCO_CLASSES | |||
from yolox.exp import get_exp | |||
from yolox.utils import fuse_model, get_model_info, postprocess, vis | |||
from yolox import statics | |||
COCO_CLASSES = ( | |||
"person", | |||
"bicycle", | |||
"car", | |||
"motorcycle", | |||
"airplane", | |||
"bus", | |||
"train", | |||
"truck", | |||
"boat", | |||
"traffic light", | |||
"fire hydrant", | |||
"stop sign", | |||
"parking meter", | |||
"bench", | |||
"bird", | |||
"cat", | |||
"dog", | |||
"horse", | |||
"sheep", | |||
"cow", | |||
"elephant", | |||
"bear", | |||
"zebra", | |||
"giraffe", | |||
"backpack", | |||
"umbrella", | |||
"handbag", | |||
"tie", | |||
"suitcase", | |||
"frisbee", | |||
"skis", | |||
"snowboard", | |||
"sports ball", | |||
"kite", | |||
"baseball bat", | |||
"baseball glove", | |||
"skateboard", | |||
"surfboard", | |||
"tennis racket", | |||
"bottle", | |||
"wine glass", | |||
"cup", | |||
"fork", | |||
"knife", | |||
"spoon", | |||
"bowl", | |||
"banana", | |||
"apple", | |||
"sandwich", | |||
"orange", | |||
"broccoli", | |||
"carrot", | |||
"hot dog", | |||
"pizza", | |||
"donut", | |||
"cake", | |||
"chair", | |||
"couch", | |||
"potted plant", | |||
"bed", | |||
"dining table", | |||
"toilet", | |||
"tv", | |||
"laptop", | |||
"mouse", | |||
"remote", | |||
"keyboard", | |||
"cell phone", | |||
"microwave", | |||
"oven", | |||
"toaster", | |||
"sink", | |||
"refrigerator", | |||
"book", | |||
"clock", | |||
"vase", | |||
"scissors", | |||
"teddy bear", | |||
"hair drier", | |||
"toothbrush", | |||
) | |||
IMAGE_EXT = [".jpg", ".jpeg", ".webp", ".bmp", ".png"] | |||
use_cuda = True | |||
MOT = 'MOT20' | |||
section = 'train' | |||
root_dir = os.path.join(statics.DATA_PATH, MOT, section) | |||
classes = ['person', 'bicycle', 'car', 'motorcycle', 'truck', 'bus'] | |||
fuse = False | |||
def get_labels(bboxes, cls, scores, th, tw): | |||
id = 0 | |||
labels = [] | |||
# print(pred['scores']) | |||
n, _ = bboxes.shape | |||
for i in range(n): | |||
if COCO_CLASSES[int(cls[i])] not in classes: | |||
# print('Rejecting',COCO_CLASSES[int(cls[i])],scores[i]) | |||
continue | |||
if use_cuda: | |||
box = bboxes[i, :].detach().cpu().numpy() | |||
else: | |||
box = bboxes[i, :].detach().numpy() | |||
## TODO: check if matches | |||
# print(box[0], box[1], box[2], box[3], '--', th, tw) | |||
# print(box[0] / th, box[1] / tw, box[2] / th, box[3] / tw) | |||
x = box[0] / th | |||
y = box[1] / tw | |||
w = (box[2] - box[0]) / th | |||
h = (box[3] - box[1]) / tw | |||
x += w / 2 | |||
y += h / 2 | |||
label = [0, id, x, y, w, h] | |||
# label = [0, id, box[0], box[1], (box[2] - box[0]), (box[3] - box[1])] | |||
id += 1 | |||
labels.append(label) | |||
# print(id) | |||
labels0 = np.array(labels) | |||
return labels0 | |||
class Predictor(object): | |||
def __init__( | |||
self, | |||
model, | |||
exp, | |||
cls_names=COCO_CLASSES, | |||
trt_file=None, | |||
decoder=None, | |||
device="cpu", | |||
fp16=False, | |||
legacy=False, | |||
): | |||
self.model = model | |||
self.cls_names = cls_names | |||
self.decoder = decoder | |||
self.num_classes = exp.num_classes | |||
self.confthre = 0.1 | |||
self.nmsthre = 0.3 | |||
self.test_size = exp.test_size | |||
self.device = device | |||
self.fp16 = fp16 | |||
self.preproc = ValTransform() | |||
# if trt_file is not None: | |||
# from torch2trt import TRTModule | |||
# | |||
# model_trt = TRTModule() | |||
# model_trt.load_state_dict(torch.load(trt_file)) | |||
# | |||
# x = torch.ones(1, 3, exp.test_size[0], exp.test_size[1]).cuda() | |||
# self.model(x) | |||
# self.model = model_trt | |||
def inference(self, img): | |||
img_info = {"id": 0} | |||
if isinstance(img, str): | |||
img_info["file_name"] = os.path.basename(img) | |||
img = cv2.imread(img) | |||
else: | |||
img_info["file_name"] = None | |||
height, width = img.shape[:2] | |||
img_info["height"] = height | |||
img_info["width"] = width | |||
img_info["raw_img"] = img | |||
ratio = min(self.test_size[0] / img.shape[0], self.test_size[1] / img.shape[1]) | |||
# print(self.test_size[0] , img.shape[0], self.test_size[1] , img.shape[1]) | |||
img_info["ratio"] = ratio | |||
img, _ = self.preproc(img, None, self.test_size) | |||
img = torch.from_numpy(img).unsqueeze(0) | |||
img = img.float() | |||
if self.device == "gpu": | |||
img = img.cuda() | |||
# if self.fp16: | |||
# img = img.half() # to FP16 | |||
with torch.no_grad(): | |||
t0 = time.time() | |||
outputs = self.model(img) | |||
if self.decoder is not None: | |||
outputs = self.decoder(outputs, dtype=outputs.type()) | |||
outputs = postprocess( | |||
outputs, self.num_classes, self.confthre, | |||
self.nmsthre | |||
) | |||
# logger.info("Infer time: {:.4f}s".format(time.time() - t0)) | |||
# print(img.shape) | |||
_, _, tw, th = img.shape | |||
img_info['tw'] = tw | |||
img_info['th'] = th | |||
return outputs, img_info | |||
def visual(self, output, img_info, cls_conf=0.35): | |||
ratio = img_info["ratio"] | |||
img = img_info["raw_img"] | |||
if output is None: | |||
return img | |||
output = output.cpu() | |||
bboxes = output[:, 0:4] | |||
# preprocessing: resize | |||
bboxes /= ratio | |||
cls = output[:, 6] | |||
scores = output[:, 4] * output[:, 5] | |||
vis_res = vis(img, bboxes, scores, cls, cls_conf, self.cls_names) | |||
return vis_res | |||
def image_demo(predictor, path): | |||
folders = [f for f in listdir(path)] | |||
# folders = folders[3:] | |||
for folder in folders: | |||
print(folder) | |||
images_folder = join(join(path, folder), 'img1') | |||
images = [f for f in listdir(images_folder) if isfile(join(images_folder, f))] | |||
images = [a for a in images if a.endswith('.jpg')] | |||
images.sort() | |||
for i, image_name in enumerate(images): | |||
if i % 300 == 0: | |||
print(folder, i) | |||
outputs, img_info = predictor.inference(join(images_folder, image_name)) | |||
ratio = img_info["ratio"] | |||
# print(ratio) | |||
img = img_info["raw_img"] | |||
output = outputs[0] | |||
if output is None: | |||
continue | |||
output = output.cpu() | |||
bboxes = output[:, 0:4] | |||
# preprocessing: resize | |||
bboxes /= ratio | |||
cls = output[:, 6] | |||
scores = output[:, 4] * output[:, 5] | |||
# print('cls',cls) | |||
labels0 = get_labels(bboxes, cls, scores, img_info["width"], img_info["height"]) | |||
# out_path = join(images_folder, 'weak_' + imm + '.npy') | |||
# print(imm) | |||
np.savetxt(join(images_folder, image_name + '_weak_' + model_name + '.txt'), labels0, delimiter=' ') | |||
def main(exp, ckpt_file): | |||
model = exp.get_model() | |||
if use_cuda: | |||
model = model.cuda() | |||
device = 'gpu' | |||
else: | |||
device = 'cpu' | |||
model.eval() | |||
logger.info("loading checkpoint") | |||
ckpt = torch.load(ckpt_file, map_location="cpu") | |||
# load the model state dict | |||
model.load_state_dict(ckpt["model"]) | |||
logger.info("loaded checkpoint done.") | |||
if fuse: | |||
logger.info("\tFusing model...") | |||
model = fuse_model(model) | |||
trt_file = None | |||
decoder = None | |||
predictor = Predictor( | |||
model, exp, COCO_CLASSES, trt_file, decoder, | |||
device, False, False, | |||
) | |||
current_time = time.localtime() | |||
image_demo(predictor, root_dir) | |||
model_name = 'yolox-x' | |||
# cuda = torch.device('cuda:1') | |||
if __name__ == "__main__": | |||
# print(COCO_CLASSES) | |||
# if use_cuda: | |||
# torch.cuda.set_device(1) | |||
# with torch.cuda.device(1): | |||
# os.environ['CUDA_VISIBLE_DEVICES'] = '1' | |||
ckpt_file = '/home/abdollahpour.ce.sharif/yolox_x.pth' | |||
exp = get_exp(None, model_name) | |||
main(exp, ckpt_file) |
@@ -0,0 +1,23 @@ | |||
# Mahdi Abdollahpour, 30/12/2021, 07:47 PM, PyCharm, ByteTrack | |||
from yolox.core import launch | |||
from yolox.data import MOTDataset, ValTransform, ValTransformWithPseudo | |||
test_size = (896, 1600) | |||
import os | |||
from yolox.data import get_yolox_datadir | |||
if __name__ == "__main__": | |||
valdataset = MOTDataset( | |||
data_dir=os.path.join(get_yolox_datadir(), "MOT20"), | |||
json_file='train_MOT20-01.json', | |||
img_size=test_size, | |||
name='train', # change to train when running on training set | |||
preproc=ValTransformWithPseudo( | |||
rgb_means=(0.485, 0.456, 0.406), | |||
std=(0.229, 0.224, 0.225), | |||
), | |||
load_weak=True | |||
) | |||
for batch in valdataset: | |||
print(batch) | |||
exit() |
@@ -18,6 +18,8 @@ import glob | |||
import motmetrics as mm | |||
from collections import OrderedDict | |||
from pathlib import Path | |||
import learn2learn as l2l | |||
import yolox.statics as statics | |||
def make_parser(): | |||
@@ -26,6 +28,10 @@ def make_parser(): | |||
parser.add_argument("-expn", "--experiment-name", type=str, default=None) | |||
parser.add_argument("-n", "--name", type=str, default=None, help="model name") | |||
parser.add_argument( | |||
"--adaptation_period", default=4, type=int, help="if 4, then adapts to one batch in four batches" | |||
) | |||
# distributed | |||
parser.add_argument( | |||
"--dist-backend", default="nccl", type=str, help="distributed backend" | |||
@@ -109,6 +115,8 @@ def make_parser(): | |||
parser.add_argument("--match_thresh", type=float, default=0.9, help="matching threshold for tracking") | |||
parser.add_argument("--min-box-area", type=float, default=100, help='filter out tiny boxes') | |||
parser.add_argument("--mot20", dest="mot20", default=False, action="store_true", help="test mot20.") | |||
parser.add_argument("--use_existing_files", default=False, action="store_true", help="to use already created files") | |||
return parser | |||
@@ -126,38 +134,18 @@ def compare_dataframes(gts, ts): | |||
return accs, names | |||
def process_loader(args, val_loader, model, is_distributed): | |||
if args.seed is not None: | |||
random.seed(args.seed) | |||
torch.manual_seed(args.seed) | |||
cudnn.deterministic = True | |||
warnings.warn( | |||
"You have chosen to seed testing. This will turn on the CUDNN deterministic setting, " | |||
) | |||
# set environment variables for distributed training | |||
cudnn.benchmark = True | |||
rank = args.local_rank | |||
# rank = get_local_rank() | |||
def process_loader(args, exp, val_loader, model, is_distributed, trt_file, decoder, val_ann): | |||
file_name = os.path.join(exp.output_dir, args.experiment_name) | |||
rank = args.local_rank | |||
if rank == 0: | |||
os.makedirs(file_name, exist_ok=True) | |||
results_folder = os.path.join(file_name, "track_results") | |||
os.makedirs(results_folder, exist_ok=True) | |||
setup_logger(file_name, distributed_rank=rank, filename="val_log.txt", mode="a") | |||
logger.info("Args: {}".format(args)) | |||
if args.conf is not None: | |||
exp.test_conf = args.conf | |||
if args.nms is not None: | |||
exp.nmsthre = args.nms | |||
if args.tsize is not None: | |||
exp.test_size = (args.tsize, args.tsize) | |||
adaptation_period = None | |||
if args.task == 'metamot': | |||
adaptation_period = args.adaptation_period | |||
evaluator = MOTEvaluator( | |||
args=args, | |||
@@ -167,61 +155,35 @@ def process_loader(args, val_loader, model, is_distributed): | |||
nmsthre=exp.nmsthre, | |||
num_classes=exp.num_classes, | |||
) | |||
torch.cuda.set_device(rank) | |||
model.cuda(rank) | |||
model.eval() | |||
if not args.speed and not args.trt: | |||
if args.ckpt is None: | |||
ckpt_file = os.path.join(file_name, "best_ckpt.pth.tar") | |||
else: | |||
ckpt_file = args.ckpt | |||
logger.info("loading checkpoint") | |||
loc = "cuda:{}".format(rank) | |||
ckpt = torch.load(ckpt_file, map_location=loc) | |||
# load the model state dict | |||
model.load_state_dict(ckpt["model"]) | |||
logger.info("loaded checkpoint done.") | |||
if is_distributed: | |||
model = DDP(model, device_ids=[rank]) | |||
if args.fuse: | |||
logger.info("\tFusing model...") | |||
model = fuse_model(model) | |||
if args.trt: | |||
assert ( | |||
not args.fuse and not is_distributed and args.batch_size == 1 | |||
), "TensorRT model is not support model fusing and distributed inferencing!" | |||
trt_file = os.path.join(file_name, "model_trt.pth") | |||
assert os.path.exists( | |||
trt_file | |||
), "TensorRT model is not found!\n Run tools/trt.py first!" | |||
model.head.decode_in_inference = False | |||
decoder = model.head.decode_outputs | |||
else: | |||
trt_file = None | |||
decoder = None | |||
# start evaluate | |||
*_, summary = evaluator.evaluate( | |||
model, is_distributed, args.fp16, trt_file, decoder, exp.test_size, results_folder | |||
model, is_distributed, args.fp16, trt_file, decoder, exp.test_size, results_folder, | |||
adaptation_period=adaptation_period, | |||
) | |||
logger.info("\n" + summary) | |||
def eval_MOT(args, exp, val_ann=None): | |||
file_name = os.path.join(exp.output_dir, args.experiment_name) | |||
rank = args.local_rank | |||
if rank == 0: | |||
os.makedirs(file_name, exist_ok=True) | |||
results_folder = os.path.join(file_name, "track_results") | |||
os.makedirs(results_folder, exist_ok=True) | |||
# evaluate MOTA | |||
mm.lap.default_solver = 'lap' | |||
if exp.val_ann == 'val_half.json': | |||
if val_ann == 'val_half.json': | |||
gt_type = '_val_half' | |||
else: | |||
gt_type = '' | |||
print('gt_type', gt_type) | |||
if args.mot20: | |||
gtfiles = glob.glob(os.path.join('datasets/MOT20/train', '*/gt/gt{}.txt'.format(gt_type))) | |||
gtfiles = glob.glob(os.path.join(statics.DATA_PATH, 'MOT20/train', '*/gt/gt{}.txt'.format(gt_type))) | |||
else: | |||
gtfiles = glob.glob(os.path.join('datasets/mot/train', '*/gt/gt{}.txt'.format(gt_type))) | |||
gtfiles = glob.glob(os.path.join(statics.DATA_PATH, 'MOT17/train', '*/gt/gt{}.txt'.format(gt_type))) | |||
print('gt_files', gtfiles) | |||
tsfiles = [f for f in glob.glob(os.path.join(results_folder, '*.txt')) if | |||
not os.path.basename(f).startswith('eval')] | |||
@@ -267,23 +229,114 @@ def process_loader(args, val_loader, model, is_distributed): | |||
logger.info('Completed') | |||
def load_model(args, exp, is_distributed): | |||
model = exp.get_model() | |||
logger.info("Model Summary: {}".format(get_model_info(model, exp.test_size))) | |||
if args.seed is not None: | |||
random.seed(args.seed) | |||
torch.manual_seed(args.seed) | |||
cudnn.deterministic = True | |||
warnings.warn( | |||
"You have chosen to seed testing. This will turn on the CUDNN deterministic setting, " | |||
) | |||
# set environment variables for distributed training | |||
cudnn.benchmark = True | |||
rank = args.local_rank | |||
# rank = get_local_rank() | |||
file_name = os.path.join(exp.output_dir, args.experiment_name) | |||
if rank == 0: | |||
os.makedirs(file_name, exist_ok=True) | |||
setup_logger(file_name, distributed_rank=rank, filename="val_log.txt", mode="a") | |||
logger.info("Args: {}".format(args)) | |||
if args.conf is not None: | |||
exp.test_conf = args.conf | |||
if args.nms is not None: | |||
exp.nmsthre = args.nms | |||
if args.tsize is not None: | |||
exp.test_size = (args.tsize, args.tsize) | |||
if args.task == "metamot": | |||
model = l2l.algorithms.MAML(model, lr=exp.inner_lr, first_order=exp.first_order, allow_nograd=True) | |||
torch.cuda.set_device(rank) | |||
model.cuda(rank) | |||
model.eval() | |||
if not args.speed and not args.trt: | |||
if args.ckpt is None: | |||
ckpt_file = os.path.join(file_name, "best_ckpt.pth.tar") | |||
else: | |||
ckpt_file = args.ckpt | |||
logger.info("loading checkpoint") | |||
loc = "cuda:{}".format(rank) | |||
ckpt = torch.load(ckpt_file, map_location=loc) | |||
# handling meta models | |||
new_dict = {} | |||
if (not list(ckpt["model"].keys())[0].startswith('module')) and args.task == "metamot": | |||
for key in ckpt["model"].keys(): | |||
if not key.startswith('module.'): | |||
new_dict['module.' + key] = ckpt["model"][key] | |||
else: | |||
new_dict[key] = ckpt["model"][key] | |||
del ckpt["model"] | |||
ckpt["model"] = new_dict | |||
# load the model state dict | |||
model.load_state_dict(ckpt["model"]) | |||
logger.info("loaded checkpoint done.") | |||
if is_distributed: | |||
model = DDP(model, device_ids=[rank]) | |||
if args.fuse: | |||
logger.info("\tFusing model...") | |||
model = fuse_model(model) | |||
if args.trt: | |||
assert ( | |||
not args.fuse and not is_distributed and args.batch_size == 1 | |||
), "TensorRT model is not support model fusing and distributed inferencing!" | |||
trt_file = os.path.join(file_name, "model_trt.pth") | |||
assert os.path.exists( | |||
trt_file | |||
), "TensorRT model is not found!\n Run tools/trt.py first!" | |||
model.head.decode_in_inference = False | |||
decoder = model.head.decode_outputs | |||
else: | |||
trt_file = None | |||
decoder = None | |||
return model, trt_file, decoder | |||
@logger.catch | |||
def main(exp, args, num_gpu): | |||
is_distributed = num_gpu > 1 | |||
print('is_distributed', is_distributed) | |||
print('num_gpu', num_gpu) | |||
model = exp.get_model() | |||
logger.info("Model Summary: {}".format(get_model_info(model, exp.test_size))) | |||
# logger.info("Model Structure:\n{}".format(str(model))) | |||
model, trt_file, decoder = load_model(args, exp, is_distributed) | |||
if args.task == 'metamot': | |||
val_loaders = exp.get_eval_loaders(args.batch_size, is_distributed, args.test) | |||
for val_loader in val_loaders: | |||
learner = model.clone() | |||
process_loader(args, val_loader, learner, is_distributed) | |||
if not args.use_existing_files: | |||
for val_loader, val_ann in zip(val_loaders, exp.val_anns): | |||
logger.info('processing loader...') | |||
process_loader(args, exp, val_loader, model, is_distributed, trt_file, decoder, val_ann) | |||
eval_MOT(args, exp) | |||
else: | |||
val_loader = exp.get_eval_loader(args.batch_size, is_distributed, args.test) | |||
process_loader(args, val_loader, model, is_distributed) | |||
if not args.use_existing_files: | |||
val_loader = exp.get_eval_loader(args.batch_size, is_distributed, args.test) | |||
process_loader(args, exp, val_loader, model, is_distributed, trt_file, decoder, exp.val_ann) | |||
eval_MOT(args, exp, exp.val_ann) | |||
if __name__ == "__main__": |
@@ -76,6 +76,7 @@ class MetaTrainer: | |||
self.after_train() | |||
def train_in_epoch(self): | |||
# self.evaluate_and_save_model() | |||
for self.epoch in range(self.start_epoch, self.max_epoch): | |||
self.before_epoch() | |||
self.train_in_task() | |||
@@ -212,7 +213,7 @@ class MetaTrainer: | |||
# self.model = model | |||
self.model.train() | |||
self.evaluator = self.exp.get_evaluator( | |||
self.evaluators = self.exp.get_evaluators( | |||
batch_size=self.args.batch_size, is_distributed=self.is_distributed | |||
) | |||
# Tensorboard logger | |||
@@ -320,6 +321,7 @@ class MetaTrainer: | |||
ckpt = torch.load(ckpt_file, map_location=self.device) | |||
# TODO: handle pretrained BYTETrack | |||
# handling meta models | |||
# new_dict = {} | |||
# for key in ckpt["model"].keys(): | |||
@@ -355,9 +357,10 @@ class MetaTrainer: | |||
return model | |||
def evaluate_and_save_model(self): | |||
logger.info("starting eval...") | |||
evalmodel = self.ema_model.ema if self.use_model_ema else self.model | |||
ap50_95, ap50, summary = self.exp.eval( | |||
evalmodel, self.evaluator, self.is_distributed | |||
evalmodel, self.evaluators, self.is_distributed | |||
) | |||
self.model.train() | |||
if self.rank == 0: |
@@ -2,7 +2,7 @@ | |||
# -*- coding:utf-8 -*- | |||
# Copyright (c) Megvii, Inc. and its affiliates. | |||
from .data_augment import TrainTransform, ValTransform | |||
from .data_augment import TrainTransform, ValTransform,ValTransformWithPseudo | |||
from .data_prefetcher import DataPrefetcher | |||
from .dataloading import DataLoader, get_yolox_datadir | |||
from .datasets import * |
@@ -297,3 +297,83 @@ class ValTransform: | |||
def __call__(self, img, res, input_size): | |||
img, _ = preproc(img, input_size, self.means, self.std, self.swap) | |||
return img, np.zeros((1, 5)) | |||
class ValTransformWithPseudo: | |||
""" | |||
Defines the transformations that should be applied to test PIL image | |||
for input into the network | |||
dimension -> tensorize -> color adj | |||
Arguments: | |||
resize (int): input dimension to SSD | |||
rgb_means ((int,int,int)): average RGB of the dataset | |||
(104,117,123) | |||
swap ((int,int,int)): final order of channels | |||
Returns: | |||
transform (transform) : callable transform to be applied to test/val | |||
data | |||
""" | |||
def __init__(self, rgb_means=None, std=None, swap=(2, 0, 1), max_labels=100): | |||
self.means = rgb_means | |||
self.swap = swap | |||
self.std = std | |||
self.max_labels = max_labels | |||
def __call__(self, image, targets, input_dim): | |||
boxes = targets[:, :4].copy() | |||
labels = targets[:, 4].copy() | |||
ids = targets[:, 5].copy() | |||
if len(boxes) == 0: | |||
targets = np.zeros((self.max_labels, 6), dtype=np.float32) | |||
image, r_o = preproc(image, input_dim, self.means, self.std) | |||
image = np.ascontiguousarray(image, dtype=np.float32) | |||
return image, targets | |||
image_o = image.copy() | |||
targets_o = targets.copy() | |||
height_o, width_o, _ = image_o.shape | |||
boxes_o = targets_o[:, :4] | |||
labels_o = targets_o[:, 4] | |||
ids_o = targets_o[:, 5] | |||
# bbox_o: [xyxy] to [c_x,c_y,w,h] | |||
boxes_o = xyxy2cxcywh(boxes_o) | |||
# image_t = _distort(image) | |||
image_t = image | |||
# image_t, boxes = _mirror(image_t, boxes) | |||
height, width, _ = image_t.shape | |||
image_t, r_ = preproc(image_t, input_dim, self.means, self.std) | |||
# boxes [xyxy] 2 [cx,cy,w,h] | |||
boxes = xyxy2cxcywh(boxes) | |||
boxes *= r_ | |||
mask_b = np.minimum(boxes[:, 2], boxes[:, 3]) > 1 | |||
boxes_t = boxes[mask_b] | |||
labels_t = labels[mask_b] | |||
ids_t = ids[mask_b] | |||
if len(boxes_t) == 0: | |||
image_t, r_o = preproc(image_o, input_dim, self.means, self.std) | |||
boxes_o *= r_o | |||
boxes_t = boxes_o | |||
labels_t = labels_o | |||
ids_t = ids_o | |||
labels_t = np.expand_dims(labels_t, 1) | |||
ids_t = np.expand_dims(ids_t, 1) | |||
targets_t = np.hstack((labels_t, boxes_t, ids_t)) | |||
padded_labels = np.zeros((self.max_labels, 6)) | |||
padded_labels[range(len(targets_t))[: self.max_labels]] = targets_t[ | |||
: self.max_labels | |||
] | |||
padded_labels = np.ascontiguousarray(padded_labels, dtype=np.float32) | |||
image_t = np.ascontiguousarray(image_t, dtype=np.float32) | |||
return image_t, padded_labels | |||
@@ -14,12 +14,13 @@ class MOTDataset(Dataset): | |||
""" | |||
def __init__( | |||
self, | |||
data_dir=None, | |||
json_file="train_half.json", | |||
name="train", | |||
img_size=(608, 1088), | |||
preproc=None, | |||
self, | |||
data_dir=None, | |||
json_file="train_half.json", | |||
name="train", | |||
img_size=(608, 1088), | |||
preproc=None, | |||
load_weak=False, | |||
): | |||
""" | |||
COCO dataset initialization. Annotation data are read into memory by COCO API. | |||
@@ -45,6 +46,7 @@ class MOTDataset(Dataset): | |||
self.name = name | |||
self.img_size = img_size | |||
self.preproc = preproc | |||
self.load_weak = load_weak | |||
def __len__(self): | |||
return len(self.ids) | |||
@@ -98,9 +100,31 @@ class MOTDataset(Dataset): | |||
img_file = os.path.join( | |||
self.data_dir, self.name, file_name | |||
) | |||
head_tail = os.path.split(img_file) | |||
# label_path = os.path.join(head_tail[0], head_tail[1].replace('.jpg','.txt')) | |||
if self.load_weak: | |||
weak_label_path = os.path.join(head_tail[0], head_tail[1] + '_weak_yolox-x.txt') | |||
# load weak labels from weak_label_path | |||
width = img_info[1] | |||
height = img_info[0] | |||
labels = np.loadtxt(weak_label_path) | |||
res = np.ones_like(labels) | |||
labels[2, :] *= width | |||
labels[4, :] *= width | |||
labels[3, :] *= height | |||
labels[5, :] *= height | |||
labels[4, :] += labels[2, :] | |||
labels[5, :] += labels[3, :] | |||
res[:, 0:4] = labels[:, -4:] | |||
res[:, 5] = labels[:, 1] | |||
# all are from class one | |||
# res[:, 4] = labels[:, 0] | |||
img = cv2.imread(img_file) | |||
# if img is None: | |||
# print('img_file is None',img_file) | |||
if img is None: | |||
print('img_file is None', img_file) | |||
assert img is not None | |||
return img, res.copy(), img_info, np.array([id_]) |
@@ -192,7 +192,7 @@ class COCOEvaluator: | |||
info = time_info + "\n" | |||
# Evaluate the Dt (detection) json comparing with the ground truth | |||
# Evaluate the Dt (detection) jsoncomparing with the ground truth | |||
if len(data_dict) > 0: | |||
cocoGt = self.dataloader.dataset.coco | |||
# TODO: since pycocotools can't process dict in py36, write data to json file. | |||
@@ -210,8 +210,12 @@ class COCOEvaluator: | |||
from pycocotools import cocoeval as COCOeval | |||
logger.warning("Use standard COCOeval.") | |||
''' | |||
#from pycocotools.cocoeval import COCOeval | |||
from yolox.layers import COCOeval_opt as COCOeval | |||
# TODO: commenting this and trying to use pycocotools | |||
from pycocotools.cocoeval import COCOeval | |||
# from yolox.layers import COCOeval_opt as COCOeval | |||
cocoEval = COCOeval(cocoGt, cocoDt, annType[1]) | |||
cocoEval.evaluate() | |||
cocoEval.accumulate() |
@@ -34,7 +34,8 @@ def write_results(filename, results): | |||
if track_id < 0: | |||
continue | |||
x1, y1, w, h = tlwh | |||
line = save_format.format(frame=frame_id, id=track_id, x1=round(x1, 1), y1=round(y1, 1), w=round(w, 1), h=round(h, 1), s=round(score, 2)) | |||
line = save_format.format(frame=frame_id, id=track_id, x1=round(x1, 1), y1=round(y1, 1), w=round(w, 1), | |||
h=round(h, 1), s=round(score, 2)) | |||
f.write(line) | |||
logger.info('save results to {}'.format(filename)) | |||
@@ -47,7 +48,8 @@ def write_results_no_score(filename, results): | |||
if track_id < 0: | |||
continue | |||
x1, y1, w, h = tlwh | |||
line = save_format.format(frame=frame_id, id=track_id, x1=round(x1, 1), y1=round(y1, 1), w=round(w, 1), h=round(h, 1)) | |||
line = save_format.format(frame=frame_id, id=track_id, x1=round(x1, 1), y1=round(y1, 1), w=round(w, 1), | |||
h=round(h, 1)) | |||
f.write(line) | |||
logger.info('save results to {}'.format(filename)) | |||
@@ -59,7 +61,7 @@ class MOTEvaluator: | |||
""" | |||
def __init__( | |||
self, args, dataloader, img_size, confthre, nmsthre, num_classes): | |||
self, args, dataloader, img_size, confthre, nmsthre, num_classes): | |||
""" | |||
Args: | |||
dataloader (Dataloader): evaluate dataloader. | |||
@@ -77,14 +79,15 @@ class MOTEvaluator: | |||
self.args = args | |||
def evaluate( | |||
self, | |||
model, | |||
distributed=False, | |||
half=False, | |||
trt_file=None, | |||
decoder=None, | |||
test_size=None, | |||
result_folder=None | |||
self, | |||
model, | |||
distributed=False, | |||
half=False, | |||
trt_file=None, | |||
decoder=None, | |||
test_size=None, | |||
result_folder=None, | |||
adaptation_period=None, | |||
): | |||
""" | |||
COCO average precision (AP) Evaluation. Iterate inference on the test dataset | |||
@@ -100,11 +103,17 @@ class MOTEvaluator: | |||
ap50 (float) : COCO AP of IoU=50 | |||
summary (sr): summary info of evaluation. | |||
""" | |||
if adaptation_period is not None: | |||
logger.info('cloning model...') | |||
learner = model.clone() | |||
else: | |||
learner = model | |||
# TODO half to amp_test | |||
tensor_type = torch.cuda.HalfTensor if half else torch.cuda.FloatTensor | |||
model = model.eval() | |||
learner = learner.eval() | |||
if half: | |||
model = model.half() | |||
learner = learner.half() | |||
ids = [] | |||
data_list = [] | |||
results = [] | |||
@@ -117,74 +126,90 @@ class MOTEvaluator: | |||
if trt_file is not None: | |||
from torch2trt import TRTModule | |||
logger.info('Loading trt file') | |||
model_trt = TRTModule() | |||
model_trt.load_state_dict(torch.load(trt_file)) | |||
x = torch.ones(1, 3, test_size[0], test_size[1]).cuda() | |||
model(x) | |||
model = model_trt | |||
learner(x) | |||
learner = model_trt | |||
tracker = BYTETracker(self.args) | |||
ori_thresh = self.args.track_thresh | |||
for cur_iter, (imgs, _, info_imgs, ids) in enumerate( | |||
progress_bar(self.dataloader) | |||
): | |||
with torch.no_grad(): | |||
# init tracker | |||
frame_id = info_imgs[2].item() | |||
video_id = info_imgs[3].item() | |||
img_file_name = info_imgs[4] | |||
video_name = img_file_name[0].split('/')[0] | |||
if video_name == 'MOT17-05-FRCNN' or video_name == 'MOT17-06-FRCNN': | |||
self.args.track_buffer = 14 | |||
elif video_name == 'MOT17-13-FRCNN' or video_name == 'MOT17-14-FRCNN': | |||
self.args.track_buffer = 25 | |||
else: | |||
self.args.track_buffer = 30 | |||
if video_name == 'MOT17-01-FRCNN': | |||
self.args.track_thresh = 0.65 | |||
elif video_name == 'MOT17-06-FRCNN': | |||
self.args.track_thresh = 0.65 | |||
elif video_name == 'MOT17-12-FRCNN': | |||
self.args.track_thresh = 0.7 | |||
elif video_name == 'MOT17-14-FRCNN': | |||
self.args.track_thresh = 0.67 | |||
else: | |||
self.args.track_thresh = ori_thresh | |||
if video_name == 'MOT20-06' or video_name == 'MOT20-08': | |||
self.args.track_thresh = 0.3 | |||
else: | |||
self.args.track_thresh = ori_thresh | |||
if video_name not in video_names: | |||
video_names[video_id] = video_name | |||
if frame_id == 1: | |||
tracker = BYTETracker(self.args) | |||
if len(results) != 0: | |||
result_filename = os.path.join(result_folder, '{}.txt'.format(video_names[video_id - 1])) | |||
write_results(result_filename, results) | |||
results = [] | |||
imgs = imgs.type(tensor_type) | |||
for cur_iter, (imgs, targets, info_imgs, ids) in enumerate( | |||
progress_bar(self.dataloader) | |||
): | |||
if cur_iter % 100 == 0: | |||
logger.info('cur_iter: {}'.format(cur_iter)) | |||
# with torch.no_grad(): | |||
# init tracker | |||
# imgs = imgs.to(self.data_type) | |||
# targets = targets.to(self.data_type) | |||
frame_id = info_imgs[2].item() | |||
video_id = info_imgs[3].item() | |||
img_file_name = info_imgs[4] | |||
video_name = img_file_name[0].split('/')[0] | |||
if video_name == 'MOT17-05-FRCNN' or video_name == 'MOT17-06-FRCNN': | |||
self.args.track_buffer = 14 | |||
elif video_name == 'MOT17-13-FRCNN' or video_name == 'MOT17-14-FRCNN': | |||
self.args.track_buffer = 25 | |||
else: | |||
self.args.track_buffer = 30 | |||
if video_name == 'MOT17-01-FRCNN': | |||
self.args.track_thresh = 0.65 | |||
elif video_name == 'MOT17-06-FRCNN': | |||
self.args.track_thresh = 0.65 | |||
elif video_name == 'MOT17-12-FRCNN': | |||
self.args.track_thresh = 0.7 | |||
elif video_name == 'MOT17-14-FRCNN': | |||
self.args.track_thresh = 0.67 | |||
else: | |||
self.args.track_thresh = ori_thresh | |||
if video_name == 'MOT20-06' or video_name == 'MOT20-08': | |||
self.args.track_thresh = 0.3 | |||
else: | |||
self.args.track_thresh = ori_thresh | |||
if video_name not in video_names: | |||
video_names[video_id] = video_name | |||
if frame_id == 1: | |||
tracker = BYTETracker(self.args) | |||
if len(results) != 0: | |||
result_filename = os.path.join(result_folder, '{}.txt'.format(video_names[video_id - 1])) | |||
write_results(result_filename, results) | |||
results = [] | |||
imgs = imgs.type(tensor_type) | |||
# skip the the last iters since batchsize might be not enough for batch inference | |||
is_time_record = cur_iter < len(self.dataloader) - 1 | |||
if is_time_record: | |||
start = time.time() | |||
if adaptation_period is not None and cur_iter % adaptation_period == 0: | |||
learner.train() | |||
targets = targets.type(tensor_type) | |||
targets.requires_grad = False | |||
outputs = learner(imgs, targets) | |||
loss = outputs["total_loss"] | |||
learner.adapt(loss) | |||
learner.eval() | |||
# skip the the last iters since batchsize might be not enough for batch inference | |||
is_time_record = cur_iter < len(self.dataloader) - 1 | |||
if is_time_record: | |||
start = time.time() | |||
with torch.no_grad(): | |||
outputs = learner(imgs) | |||
outputs = model(imgs) | |||
if decoder is not None: | |||
outputs = decoder(outputs, dtype=outputs.type()) | |||
# print('outputs', outputs.shape) | |||
outputs = postprocess(outputs, self.num_classes, self.confthre, self.nmsthre) | |||
if decoder is not None: | |||
outputs = decoder(outputs, dtype=outputs.type()) | |||
print('outputs', outputs.shape) | |||
outputs = postprocess(outputs, self.num_classes, self.confthre, self.nmsthre) | |||
if is_time_record: | |||
infer_end = time_synchronized() | |||
inference_time += infer_end - start | |||
if is_time_record: | |||
infer_end = time_synchronized() | |||
inference_time += infer_end - start | |||
output_results = self.convert_to_coco_format(outputs, info_imgs, ids) | |||
data_list.extend(output_results) | |||
@@ -209,7 +234,7 @@ class MOTEvaluator: | |||
if is_time_record: | |||
track_end = time_synchronized() | |||
track_time += track_end - infer_end | |||
if cur_iter == len(self.dataloader) - 1: | |||
result_filename = os.path.join(result_folder, '{}.txt'.format(video_names[video_id])) | |||
write_results(result_filename, results) | |||
@@ -225,14 +250,14 @@ class MOTEvaluator: | |||
return eval_results | |||
def evaluate_sort( | |||
self, | |||
model, | |||
distributed=False, | |||
half=False, | |||
trt_file=None, | |||
decoder=None, | |||
test_size=None, | |||
result_folder=None | |||
self, | |||
model, | |||
distributed=False, | |||
half=False, | |||
trt_file=None, | |||
decoder=None, | |||
test_size=None, | |||
result_folder=None | |||
): | |||
""" | |||
COCO average precision (AP) Evaluation. Iterate inference on the test dataset | |||
@@ -272,12 +297,14 @@ class MOTEvaluator: | |||
x = torch.ones(1, 3, test_size[0], test_size[1]).cuda() | |||
model(x) | |||
model = model_trt | |||
tracker = Sort(self.args.track_thresh) | |||
for cur_iter, (imgs, _, info_imgs, ids) in enumerate( | |||
progress_bar(self.dataloader) | |||
progress_bar(self.dataloader) | |||
): | |||
if cur_iter % 250 == 0: | |||
logger.info('cur_iter: {}'.format(cur_iter)) | |||
with torch.no_grad(): | |||
# init tracker | |||
frame_id = info_imgs[2].item() | |||
@@ -306,7 +333,7 @@ class MOTEvaluator: | |||
outputs = decoder(outputs, dtype=outputs.type()) | |||
outputs = postprocess(outputs, self.num_classes, self.confthre, self.nmsthre) | |||
if is_time_record: | |||
infer_end = time_synchronized() | |||
inference_time += infer_end - start | |||
@@ -331,7 +358,7 @@ class MOTEvaluator: | |||
if is_time_record: | |||
track_end = time_synchronized() | |||
track_time += track_end - infer_end | |||
if cur_iter == len(self.dataloader) - 1: | |||
result_filename = os.path.join(result_folder, '{}.txt'.format(video_names[video_id])) | |||
write_results_no_score(result_filename, results) | |||
@@ -347,15 +374,15 @@ class MOTEvaluator: | |||
return eval_results | |||
def evaluate_deepsort( | |||
self, | |||
model, | |||
distributed=False, | |||
half=False, | |||
trt_file=None, | |||
decoder=None, | |||
test_size=None, | |||
result_folder=None, | |||
model_folder=None | |||
self, | |||
model, | |||
distributed=False, | |||
half=False, | |||
trt_file=None, | |||
decoder=None, | |||
test_size=None, | |||
result_folder=None, | |||
model_folder=None | |||
): | |||
""" | |||
COCO average precision (AP) Evaluation. Iterate inference on the test dataset | |||
@@ -395,11 +422,11 @@ class MOTEvaluator: | |||
x = torch.ones(1, 3, test_size[0], test_size[1]).cuda() | |||
model(x) | |||
model = model_trt | |||
tracker = DeepSort(model_folder, min_confidence=self.args.track_thresh) | |||
for cur_iter, (imgs, _, info_imgs, ids) in enumerate( | |||
progress_bar(self.dataloader) | |||
progress_bar(self.dataloader) | |||
): | |||
with torch.no_grad(): | |||
# init tracker | |||
@@ -429,7 +456,7 @@ class MOTEvaluator: | |||
outputs = decoder(outputs, dtype=outputs.type()) | |||
outputs = postprocess(outputs, self.num_classes, self.confthre, self.nmsthre) | |||
if is_time_record: | |||
infer_end = time_synchronized() | |||
inference_time += infer_end - start | |||
@@ -454,7 +481,7 @@ class MOTEvaluator: | |||
if is_time_record: | |||
track_end = time_synchronized() | |||
track_time += track_end - infer_end | |||
if cur_iter == len(self.dataloader) - 1: | |||
result_filename = os.path.join(result_folder, '{}.txt'.format(video_names[video_id])) | |||
write_results_no_score(result_filename, results) | |||
@@ -470,15 +497,15 @@ class MOTEvaluator: | |||
return eval_results | |||
def evaluate_motdt( | |||
self, | |||
model, | |||
distributed=False, | |||
half=False, | |||
trt_file=None, | |||
decoder=None, | |||
test_size=None, | |||
result_folder=None, | |||
model_folder=None | |||
self, | |||
model, | |||
distributed=False, | |||
half=False, | |||
trt_file=None, | |||
decoder=None, | |||
test_size=None, | |||
result_folder=None, | |||
model_folder=None | |||
): | |||
""" | |||
COCO average precision (AP) Evaluation. Iterate inference on the test dataset | |||
@@ -518,10 +545,10 @@ class MOTEvaluator: | |||
x = torch.ones(1, 3, test_size[0], test_size[1]).cuda() | |||
model(x) | |||
model = model_trt | |||
tracker = OnlineTracker(model_folder, min_cls_score=self.args.track_thresh) | |||
for cur_iter, (imgs, _, info_imgs, ids) in enumerate( | |||
progress_bar(self.dataloader) | |||
progress_bar(self.dataloader) | |||
): | |||
with torch.no_grad(): | |||
# init tracker | |||
@@ -551,7 +578,7 @@ class MOTEvaluator: | |||
outputs = decoder(outputs, dtype=outputs.type()) | |||
outputs = postprocess(outputs, self.num_classes, self.confthre, self.nmsthre) | |||
if is_time_record: | |||
infer_end = time_synchronized() | |||
inference_time += infer_end - start | |||
@@ -578,7 +605,7 @@ class MOTEvaluator: | |||
if is_time_record: | |||
track_end = time_synchronized() | |||
track_time += track_end - infer_end | |||
if cur_iter == len(self.dataloader) - 1: | |||
result_filename = os.path.join(result_folder, '{}.txt'.format(video_names[video_id])) | |||
write_results(result_filename, results) | |||
@@ -596,7 +623,7 @@ class MOTEvaluator: | |||
def convert_to_coco_format(self, outputs, info_imgs, ids): | |||
data_list = [] | |||
for (output, img_h, img_w, img_id) in zip( | |||
outputs, info_imgs[0], info_imgs[1], ids | |||
outputs, info_imgs[0], info_imgs[1], ids | |||
): | |||
if output is None: | |||
continue | |||
@@ -644,9 +671,9 @@ class MOTEvaluator: | |||
[ | |||
"Average {} time: {:.2f} ms".format(k, v) | |||
for k, v in zip( | |||
["forward", "track", "inference"], | |||
[a_infer_time, a_track_time, (a_infer_time + a_track_time)], | |||
) | |||
["forward", "track", "inference"], | |||
[a_infer_time, a_track_time, (a_infer_time + a_track_time)], | |||
) | |||
] | |||
) | |||
@@ -666,8 +693,9 @@ class MOTEvaluator: | |||
from pycocotools import cocoeval as COCOeval | |||
logger.warning("Use standard COCOeval.") | |||
''' | |||
#from pycocotools.cocoeval import COCOeval | |||
from yolox.layers import COCOeval_opt as COCOeval | |||
# I changed it | |||
from pycocotools.cocoeval import COCOeval | |||
# from yolox.layers import COCOeval_opt as COCOeval | |||
cocoEval = COCOeval(cocoGt, cocoDt, annType[1]) | |||
cocoEval.evaluate() | |||
cocoEval.accumulate() |
@@ -24,7 +24,7 @@ class BaseMetaExp(metaclass=ABCMeta): | |||
self.seed = None | |||
# self.output_dir = "./YOLOX_outputs" | |||
self.output_dir = "./meta_experiments" | |||
self.print_interval = 100 | |||
self.print_interval = 250 | |||
self.eval_interval = 10 | |||
@abstractmethod |
@@ -62,7 +62,7 @@ class MetaExp(BaseMetaExp): | |||
# ----------------- Meta-learning ------------------ # | |||
self.first_order = True | |||
self.inner_lr = 1e-5 | |||
self.inner_lr = 1e-6 | |||
def get_model(self): | |||
from yolox.models import YOLOPAFPN, YOLOX, YOLOXHead | |||
@@ -241,19 +241,31 @@ class MetaExp(BaseMetaExp): | |||
val_loaders.append(val_loader) | |||
return val_loaders | |||
def get_evaluator(self, batch_size, is_distributed, testdev=False): | |||
def get_evaluators(self, batch_size, is_distributed, testdev=False): | |||
from yolox.evaluators import COCOEvaluator | |||
val_loader = self.get_eval_loader(batch_size, is_distributed, testdev=testdev) | |||
evaluator = COCOEvaluator( | |||
dataloader=val_loader, | |||
img_size=self.test_size, | |||
confthre=self.test_conf, | |||
nmsthre=self.nmsthre, | |||
num_classes=self.num_classes, | |||
testdev=testdev, | |||
) | |||
return evaluator | |||
def eval(self, model, evaluator, is_distributed, half=False): | |||
return evaluator.evaluate(model, is_distributed, half) | |||
val_loaders = self.get_eval_loaders(batch_size, is_distributed, testdev=testdev) | |||
evaluators = [] | |||
for val_loader in val_loaders: | |||
evaluator = COCOEvaluator( | |||
dataloader=val_loader, | |||
img_size=self.test_size, | |||
confthre=self.test_conf, | |||
nmsthre=self.nmsthre, | |||
num_classes=self.num_classes, | |||
testdev=testdev, | |||
) | |||
evaluators.append(evaluator) | |||
return evaluators | |||
def eval(self, model, evaluators, is_distributed, half=False): | |||
ap50_95s = 0.0 | |||
ap50s = 0.0 | |||
summarys = '' | |||
for evaluator in evaluators: | |||
ap50_95, ap50, summary = evaluator.evaluate(model, is_distributed, half) | |||
ap50_95s += ap50_95 | |||
ap50s += ap50 | |||
summarys += ("\n" + summary) | |||
n = len(evaluators) | |||
return (ap50_95s / n), (ap50s / n), summarys |
@@ -197,6 +197,9 @@ class YOLOXHead(nn.Module): | |||
if self.training: | |||
# logger.info("labels.shape:{}".format(labels.shape)) | |||
# logger.info("torch.cat(outputs, 1).shape:{}".format(torch.cat(outputs, 1).shape)) | |||
# if torch.isnan(torch.cat(outputs, 1)).sum().item(): | |||
# logger.info('There is Nan value in outputs {}'.format(torch.isnan(torch.cat(outputs, 1)).sum().item())) | |||
return self.get_losses( | |||
imgs, | |||
x_shifts, | |||
@@ -397,22 +400,45 @@ class YOLOXHead(nn.Module): | |||
if self.use_l1: | |||
l1_targets = torch.cat(l1_targets, 0) | |||
# TODO: check loss parts shapes | |||
num_fg = max(num_fg, 1) | |||
# if bbox_preds.view(-1, 4)[fg_masks].shape != reg_targets.shape: | |||
# logger.info("some shape mismatch") | |||
# logger.info("bbox_preds.view(-1, 4)[fg_masks].shape {}".format(bbox_preds.view(-1, 4)[fg_masks].shape)) | |||
# logger.info("reg_targets {}".format(reg_targets.shape)) | |||
# logger.info("--------------------") | |||
loss_iou = ( | |||
self.iou_loss(bbox_preds.view(-1, 4)[fg_masks], reg_targets) | |||
).sum() / num_fg | |||
# if obj_preds.view(-1, 1).shape != obj_targets.shape: | |||
# logger.info("some shape mismatch") | |||
# logger.info("obj_preds.view(-1, 1).shape {}".format(obj_preds.view(-1, 1).shape)) | |||
# logger.info("obj_targets.shape {}".format(obj_targets.shape)) | |||
# logger.info("--------------------") | |||
loss_obj = ( | |||
self.bcewithlog_loss(obj_preds.view(-1, 1), obj_targets) | |||
).sum() / num_fg | |||
# if cls_preds.view(-1, self.num_classes)[fg_masks].shape != cls_targets.shape: | |||
# logger.info("some shape mismatch") | |||
# logger.info("cls_preds.view(-1, self.num_classes)[fg_masks].shape {}".format( | |||
# cls_preds.view(-1, self.num_classes)[fg_masks].shape)) | |||
# logger.info("cls_targets.shape {}".format(cls_targets.shape)) | |||
# logger.info("--------------------") | |||
loss_cls = ( | |||
self.bcewithlog_loss( | |||
cls_preds.view(-1, self.num_classes)[fg_masks], cls_targets | |||
) | |||
).sum() / num_fg | |||
if self.use_l1: | |||
# if origin_preds.view(-1, 4)[fg_masks].shape != l1_targets.shape: | |||
# logger.info("some shape mismatch") | |||
# logger.info("origin_preds.view(-1, 4)[fg_masks].shape {}".format( | |||
# origin_preds.view(-1, 4)[fg_masks].shape)) | |||
# logger.info("l1_targets.shape {}".format(l1_targets.shape)) | |||
# logger.info("--------------------") | |||
loss_l1 = ( | |||
self.l1_loss(origin_preds.view(-1, 4)[fg_masks], l1_targets) | |||
).sum() / num_fg | |||
@@ -457,7 +483,7 @@ class YOLOXHead(nn.Module): | |||
imgs, | |||
mode="gpu", | |||
): | |||
# TODO: check loss mismatches here | |||
if mode == "cpu": | |||
print("------------CPU Mode for This Batch-------------") | |||
gt_bboxes_per_image = gt_bboxes_per_image.cpu().float() | |||
@@ -477,6 +503,11 @@ class YOLOXHead(nn.Module): | |||
num_gt, | |||
img_size | |||
) | |||
# if torch.isnan(cls_preds).sum().item() or torch.isnan(obj_preds).sum().item() or torch.isnan( | |||
# bboxes_preds_per_image).sum().item(): | |||
# logger.info("cls_preds is Nan {}".format(torch.isnan(cls_preds).sum().item())) | |||
# logger.info("obj_preds is Nan {}".format(torch.isnan(obj_preds).sum().item())) | |||
# logger.info("bboxes_preds_per_image is Nan {}".format(torch.isnan(bboxes_preds_per_image).sum().item())) | |||
bboxes_preds_per_image = bboxes_preds_per_image[fg_mask] | |||
cls_preds_ = cls_preds[batch_idx][fg_mask] | |||
@@ -495,8 +526,10 @@ class YOLOXHead(nn.Module): | |||
.unsqueeze(1) | |||
.repeat(1, num_in_boxes_anchor, 1) | |||
) | |||
pair_wise_ious_loss = -torch.log(pair_wise_ious + 1e-8) | |||
pair_wise_ious_loss = -torch.log(pair_wise_ious + 1e-8) | |||
# if torch.isnan(pair_wise_ious_loss).sum().item(): | |||
# logger.info("pair_wise_ious_loss is Nan {}".format(torch.isnan(pair_wise_ious_loss).sum().item())) | |||
if mode == "cpu": | |||
cls_preds_, obj_preds_ = cls_preds_.cpu(), obj_preds_.cpu() | |||
@@ -505,6 +538,7 @@ class YOLOXHead(nn.Module): | |||
cls_preds_.float().unsqueeze(0).repeat(num_gt, 1, 1).sigmoid_() | |||
* obj_preds_.float().unsqueeze(0).repeat(num_gt, 1, 1).sigmoid_() | |||
) | |||
pair_wise_cls_loss = F.binary_cross_entropy( | |||
cls_preds_.sqrt_(), gt_cls_per_image, reduction="none" | |||
).sum(-1) |