Browse Source

annotations for test time adaptation are not compatible, have to compare shape and value

master
Mahdi Abdollah Pour 2 years ago
parent
commit
64d356450f

+ 18
- 0
experiments/bytetrack_on_17half.sh View File

#PBS -N bytetrack_17_on_17half
#PBS -m abe
#PBS -M [email protected]
#PBS -l nodes=1:ppn=1:gpus=1
#PBS -q cuda9


export LD_LIBRARY_PATH=/share/apps/cuda/cuda-10.1/lib64:$LD_LIBRARY_PATH
export PATH=/share/apps/cuda/cuda-10.1/bin/:$PATH
source /share/apps/Anaconda/anaconda3.6/bin/activate abdollahpour.ce.sharif
conda activate abd_env


cd /home/abdollahpour.ce.sharif/ByteTrack



python tools/track.py -t mot -f exps/example/mot/yolox_x_mot17_half.py -d 1 -b 1 --fp16 -c /home/abdollahpour.ce.sharif/ByteTrackModels/bytetrack_x_mot17.pth.tar --local_rank 0 -expn bytetrack_17_on_17half

+ 18
- 0
experiments/bytetrack_on_20.sh View File

#PBS -N bytetrack_17_on_20
#PBS -m abe
#PBS -M [email protected]
#PBS -l nodes=1:ppn=1:gpus=1
#PBS -q cuda9


export LD_LIBRARY_PATH=/share/apps/cuda/cuda-10.1/lib64:$LD_LIBRARY_PATH
export PATH=/share/apps/cuda/cuda-10.1/bin/:$PATH
source /share/apps/Anaconda/anaconda3.6/bin/activate abdollahpour.ce.sharif
conda activate abd_env


cd /home/abdollahpour.ce.sharif/ByteTrack



python tools/track.py -t mot -f exps/example/mot/yolox_x_mot20_on_mot20.py -d 1 -b 1 --fp16 -c /home/abdollahpour.ce.sharif/ByteTrackModels/bytetrack_x_mot17.pth.tar --local_rank 0 -expn bytetrack_17_on_20 --mot20

+ 2
- 1
experiments/meta_gpu_resume.sh View File

export LD_LIBRARY_PATH=/share/apps/cuda/cuda-10.1/lib64:$LD_LIBRARY_PATH export LD_LIBRARY_PATH=/share/apps/cuda/cuda-10.1/lib64:$LD_LIBRARY_PATH
export PATH=/share/apps/cuda/cuda-10.1/bin/:$PATH export PATH=/share/apps/cuda/cuda-10.1/bin/:$PATH



source /share/apps/Anaconda/anaconda3.6/bin/activate abdollahpour.ce.sharif source /share/apps/Anaconda/anaconda3.6/bin/activate abdollahpour.ce.sharif
conda activate abd_env conda activate abd_env








python tools/train.py -t metamot -f exps/example/metamot/yolox_x_mot17_on_mot20.py -d 1 -b 1 --fp16 -o --resume --start_epoch 2 -c /home/abdollahpour.ce.sharif/ByteTrack/meta_experiments/train_17_on_20/latest_ckpt.pth.tar --local_rank 0 -expn train_17_on_20_resume
python tools/train.py -t metamot -f exps/example/metamot/yolox_x_mot17_on_mot20.py -d 1 -b 1 --fp16 -o --resume --start_epoch 6 -c /home/abdollahpour.ce.sharif/ByteTrack/meta_experiments/train_17_on_20_resume/latest_ckpt.pth.tar --local_rank 0 -expn train_17_on_20_resume2

+ 11
- 0
experiments/test.sh View File

export LD_LIBRARY_PATH=/share/apps/cuda/cuda-10.1/lib64:$LD_LIBRARY_PATH
export PATH=/share/apps/cuda/cuda-10.1/bin/:$PATH
source /share/apps/Anaconda/anaconda3.6/bin/activate abdollahpour.ce.sharif
conda activate abd_env


cd /home/abdollahpour.ce.sharif/ByteTrack



python tools/test.py

+ 18
- 0
experiments/track_metamot17_on_17.sh View File

#PBS -N track_metamot17_on_17_no_adapt
#PBS -m abe
#PBS -M [email protected]
#PBS -l nodes=1:ppn=1:gpus=1
#PBS -q cuda9


export LD_LIBRARY_PATH=/share/apps/cuda/cuda-10.1/lib64:$LD_LIBRARY_PATH
export PATH=/share/apps/cuda/cuda-10.1/bin/:$PATH
source /share/apps/Anaconda/anaconda3.6/bin/activate abdollahpour.ce.sharif
conda activate abd_env


cd /home/abdollahpour.ce.sharif/ByteTrack



python tools/track.py -t metamot -f exps/example/metamot/yolox_x_mot17_on_mot17.py -d 1 -b 1 --fp16 -c /home/abdollahpour.ce.sharif/ByteTrack/meta_experiments/train_17_on_20_resume2/best_ckpt.pth.tar --local_rank 0 -expn track_metamot17_on_17

+ 18
- 0
experiments/track_on_20.sh View File

#PBS -N track_17_on_20_ada_12
#PBS -m abe
#PBS -M [email protected]
#PBS -l nodes=1:ppn=1:gpus=1
#PBS -q cuda9


export LD_LIBRARY_PATH=/share/apps/cuda/cuda-10.1/lib64:$LD_LIBRARY_PATH
export PATH=/share/apps/cuda/cuda-10.1/bin/:$PATH
source /share/apps/Anaconda/anaconda3.6/bin/activate abdollahpour.ce.sharif
conda activate abd_env


cd /home/abdollahpour.ce.sharif/ByteTrack



python tools/track.py -t metamot -f exps/example/metamot/yolox_x_mot17_on_mot20.py -d 1 -b 1 -c /home/abdollahpour.ce.sharif/ByteTrack/meta_experiments/train_17_on_20_resume2/best_ckpt.pth.tar --local_rank 0 -expn track_17_on_20_ada_12 --mot20 --adaptation_period 12

+ 19
- 0
experiments/weak_label.sh View File

#PBS -N weak_labels
#PBS -m abe
#PBS -M [email protected]
#PBS -l nodes=1:ppn=1:gpus=1
#PBS -q cuda9

export LD_LIBRARY_PATH=/share/apps/cuda/cuda-10.1/lib64:$LD_LIBRARY_PATH
export PATH=/share/apps/cuda/cuda-10.1/bin/:$PATH


source /share/apps/Anaconda/anaconda3.6/bin/activate abdollahpour.ce.sharif
conda activate abd_env


cd /home/abdollahpour.ce.sharif/ByteTrack



python tools/generate_weak_labels.py

+ 158
- 0
exps/example/metamot/yolox_x_mot17_on_mot17.py View File

# encoding: utf-8
import os
import random
import torch
import torch.nn as nn
import torch.distributed as dist

from yolox.exp import MetaExp as MyMetaExp
from yolox.data import get_yolox_datadir
from os import listdir
from os.path import isfile, join


class Exp(MyMetaExp):
def __init__(self):
super(Exp, self).__init__()
self.num_classes = 1
self.depth = 1.33
self.width = 1.25
self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
self.train_dir = '/home/abdollahpour.ce.sharif/ByteTrackData/MOT17/annotations'
onlyfiles = [f for f in listdir(self.train_dir) if isfile(join(self.train_dir, f))]
self.train_anns = [file for file in onlyfiles if file.__contains__('train') and file.__contains__('FRCNN')]
# # TODO: remove
# self.train_anns = self.train_anns[3:]

self.val_dir = '/home/abdollahpour.ce.sharif/ByteTrackData/MOT17/annotations'
onlyfiles = [f for f in listdir(self.val_dir) if isfile(join(self.val_dir, f))]
self.val_anns = [file for file in onlyfiles if file.__contains__('train') and file.__contains__('FRCNN')]
# self.val_anns = self.val_anns[-1:]
print('train_anns', self.train_anns)
print('val_anns', self.val_anns)
self.input_size = (800, 1440)
self.test_size = (800, 1440)
# self.test_size = (736, 1920)
self.random_size = (20, 36)
self.max_epoch = 80
self.print_interval = 250
self.eval_interval = 5
self.test_conf = 0.1
self.nmsthre = 0.7
self.no_aug_epochs = 10
# self.basic_lr_per_img = 0.001 / 64.0
self.basic_lr_per_img = 0.0001 / 64.0
self.warmup_epochs = 1

def get_data_loaders(self, batch_size, is_distributed, no_aug=False):
from yolox.data import (
MOTDataset,
TrainTransform,
YoloBatchSampler,
DataLoader,
InfiniteSampler,
MosaicDetection,
)
train_loaders = []
for train_ann in self.train_anns:
dataset = MOTDataset(
data_dir=os.path.join(get_yolox_datadir(), "MOT17"),
json_file=train_ann,
name='train',
img_size=self.input_size,
preproc=TrainTransform(
rgb_means=(0.485, 0.456, 0.406),
std=(0.229, 0.224, 0.225),
max_labels=500,
),
)
dataset = MosaicDetection(
dataset,
mosaic=not no_aug,
img_size=self.input_size,
preproc=TrainTransform(
rgb_means=(0.485, 0.456, 0.406),
std=(0.229, 0.224, 0.225),
max_labels=1000,
),
degrees=self.degrees,
translate=self.translate,
scale=self.scale,
shear=self.shear,
perspective=self.perspective,
enable_mixup=self.enable_mixup,
)

self.dataset = dataset

if is_distributed:
batch_size = batch_size // dist.get_world_size()

sampler = InfiniteSampler(
len(self.dataset), seed=self.seed if self.seed else 0
)

batch_sampler = YoloBatchSampler(
sampler=sampler,
batch_size=batch_size,
drop_last=False,
input_dimension=self.input_size,
mosaic=not no_aug,
)

dataloader_kwargs = {"num_workers": self.data_num_workers, "pin_memory": True}
dataloader_kwargs["batch_sampler"] = batch_sampler
train_loader = DataLoader(self.dataset, **dataloader_kwargs)
train_loaders.append(train_loader)

return train_loaders

def get_eval_loaders(self, batch_size, is_distributed, testdev=False):
from yolox.data import MOTDataset, ValTransform, ValTransformWithPseudo
val_loaders = []
for val_ann in self.val_anns:
valdataset = MOTDataset(
data_dir=os.path.join(get_yolox_datadir(), "MOT17"),
json_file=val_ann,
img_size=self.test_size,
name='train', # change to train when running on training set
preproc=ValTransformWithPseudo(
rgb_means=(0.485, 0.456, 0.406),
std=(0.229, 0.224, 0.225),
),
)

if is_distributed:
batch_size = batch_size // dist.get_world_size()
sampler = torch.utils.data.distributed.DistributedSampler(
valdataset, shuffle=False
)
else:
sampler = torch.utils.data.SequentialSampler(valdataset)

dataloader_kwargs = {
"num_workers": self.data_num_workers,
"pin_memory": True,
"sampler": sampler,
}
dataloader_kwargs["batch_size"] = batch_size
val_loader = torch.utils.data.DataLoader(valdataset, **dataloader_kwargs)
val_loaders.append(val_loader)
return val_loaders

def get_evaluator(self, batch_size, is_distributed, testdev=False):
from yolox.evaluators import COCOEvaluator

val_loaders = self.get_eval_loaders(batch_size, is_distributed, testdev=testdev)
evaluators = []
for val_loader in val_loaders:
evaluator = COCOEvaluator(
dataloader=val_loader,
img_size=self.test_size,
confthre=self.test_conf,
nmsthre=self.nmsthre,
num_classes=self.num_classes,
testdev=testdev,
)
evaluators.append(evaluator)
return evaluators

+ 20
- 14
exps/example/metamot/yolox_x_mot17_on_mot20.py View File

onlyfiles = [f for f in listdir(self.val_dir) if isfile(join(self.val_dir, f))] onlyfiles = [f for f in listdir(self.val_dir) if isfile(join(self.val_dir, f))]
self.val_anns = [file for file in onlyfiles if file.__contains__('train') and file.__contains__( self.val_anns = [file for file in onlyfiles if file.__contains__('train') and file.__contains__(
'MOT20')] 'MOT20')]
# self.val_anns = self.val_anns[-1:]
print('train_anns', self.train_anns) print('train_anns', self.train_anns)
print('val_anns', self.val_anns) print('val_anns', self.val_anns)
self.input_size = (800, 1440) self.input_size = (800, 1440)
# self.test_size = (736, 1920) # self.test_size = (736, 1920)
self.random_size = (20, 36) self.random_size = (20, 36)
self.max_epoch = 80 self.max_epoch = 80
self.print_interval = 100
self.print_interval = 250
self.eval_interval = 5 self.eval_interval = 5
self.test_conf = 0.001 self.test_conf = 0.001
self.nmsthre = 0.7 self.nmsthre = 0.7
self.no_aug_epochs = 10 self.no_aug_epochs = 10
self.basic_lr_per_img = 0.001 / 64.0
# self.basic_lr_per_img = 0.001 / 64.0
self.basic_lr_per_img = 0.0001 / 64.0
self.warmup_epochs = 1 self.warmup_epochs = 1


def get_data_loaders(self, batch_size, is_distributed, no_aug=False): def get_data_loaders(self, batch_size, is_distributed, no_aug=False):
return train_loaders return train_loaders


def get_eval_loaders(self, batch_size, is_distributed, testdev=False): def get_eval_loaders(self, batch_size, is_distributed, testdev=False):
from yolox.data import MOTDataset, ValTransform
from yolox.data import MOTDataset, ValTransform, ValTransformWithPseudo
val_loaders = [] val_loaders = []
for val_ann in self.val_anns: for val_ann in self.val_anns:
valdataset = MOTDataset( valdataset = MOTDataset(
json_file=val_ann, json_file=val_ann,
img_size=self.test_size, img_size=self.test_size,
name='train', # change to train when running on training set name='train', # change to train when running on training set
preproc=ValTransform(
preproc=ValTransformWithPseudo(
rgb_means=(0.485, 0.456, 0.406), rgb_means=(0.485, 0.456, 0.406),
std=(0.229, 0.224, 0.225), std=(0.229, 0.224, 0.225),
), ),
load_weak=True
) )


if is_distributed: if is_distributed:
def get_evaluator(self, batch_size, is_distributed, testdev=False): def get_evaluator(self, batch_size, is_distributed, testdev=False):
from yolox.evaluators import COCOEvaluator from yolox.evaluators import COCOEvaluator


val_loader = self.get_eval_loaders(batch_size, is_distributed, testdev=testdev)
evaluator = COCOEvaluator(
dataloader=val_loader,
img_size=self.test_size,
confthre=self.test_conf,
nmsthre=self.nmsthre,
num_classes=self.num_classes,
testdev=testdev,
)
return evaluator
val_loaders = self.get_eval_loaders(batch_size, is_distributed, testdev=testdev)
evaluators = []
for val_loader in val_loaders:
evaluator = COCOEvaluator(
dataloader=val_loader,
img_size=self.test_size,
confthre=self.test_conf,
nmsthre=self.nmsthre,
num_classes=self.num_classes,
testdev=testdev,
)
evaluators.append(evaluator)
return evaluators

+ 2
- 2
exps/example/mot/yolox_x_mot17_half.py View File

) )


dataset = MOTDataset( dataset = MOTDataset(
data_dir=os.path.join(get_yolox_datadir(), "mot"),
data_dir=os.path.join(get_yolox_datadir(), "MOT17"),
json_file=self.train_ann, json_file=self.train_ann,
name='train', name='train',
img_size=self.input_size, img_size=self.input_size,
from yolox.data import MOTDataset, ValTransform from yolox.data import MOTDataset, ValTransform


valdataset = MOTDataset( valdataset = MOTDataset(
data_dir=os.path.join(get_yolox_datadir(), "mot"),
data_dir=os.path.join(get_yolox_datadir(), "MOT17"),
json_file=self.val_ann, json_file=self.val_ann,
img_size=self.test_size, img_size=self.test_size,
name='train', name='train',

+ 139
- 0
exps/example/mot/yolox_x_mot20_on_mot20.py View File

# encoding: utf-8
import os
import random
import torch
import torch.nn as nn
import torch.distributed as dist

from yolox.exp import Exp as MyExp
from yolox.data import get_yolox_datadir

class Exp(MyExp):
def __init__(self):
super(Exp, self).__init__()
self.num_classes = 1
self.depth = 1.33
self.width = 1.25
self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
self.train_ann = "train.json"
self.val_ann = "train.json" # change to train.json when running on training set
self.input_size = (896, 1600)
self.test_size = (896, 1600)
#self.test_size = (736, 1920)
self.random_size = (20, 36)
self.max_epoch = 80
self.print_interval = 20
self.eval_interval = 5
self.test_conf = 0.001
self.nmsthre = 0.7
self.no_aug_epochs = 10
self.basic_lr_per_img = 0.001 / 64.0
self.warmup_epochs = 1

def get_data_loader(self, batch_size, is_distributed, no_aug=False):
from yolox.data import (
MOTDataset,
TrainTransform,
YoloBatchSampler,
DataLoader,
InfiniteSampler,
MosaicDetection,
)

dataset = MOTDataset(
data_dir=os.path.join(get_yolox_datadir(), "MOT20"),
json_file=self.train_ann,
name='',
img_size=self.input_size,
preproc=TrainTransform(
rgb_means=(0.485, 0.456, 0.406),
std=(0.229, 0.224, 0.225),
max_labels=600,
),
)

dataset = MosaicDetection(
dataset,
mosaic=not no_aug,
img_size=self.input_size,
preproc=TrainTransform(
rgb_means=(0.485, 0.456, 0.406),
std=(0.229, 0.224, 0.225),
max_labels=1200,
),
degrees=self.degrees,
translate=self.translate,
scale=self.scale,
shear=self.shear,
perspective=self.perspective,
enable_mixup=self.enable_mixup,
)

self.dataset = dataset

if is_distributed:
batch_size = batch_size // dist.get_world_size()

sampler = InfiniteSampler(
len(self.dataset), seed=self.seed if self.seed else 0
)

batch_sampler = YoloBatchSampler(
sampler=sampler,
batch_size=batch_size,
drop_last=False,
input_dimension=self.input_size,
mosaic=not no_aug,
)

dataloader_kwargs = {"num_workers": self.data_num_workers, "pin_memory": True}
dataloader_kwargs["batch_sampler"] = batch_sampler
train_loader = DataLoader(self.dataset, **dataloader_kwargs)

return train_loader

def get_eval_loader(self, batch_size, is_distributed, testdev=False):
from yolox.data import MOTDataset, ValTransform

valdataset = MOTDataset(
data_dir=os.path.join(get_yolox_datadir(), "MOT20"),
json_file=self.val_ann,
img_size=self.test_size,
name='train', # change to train when running on training set
preproc=ValTransform(
rgb_means=(0.485, 0.456, 0.406),
std=(0.229, 0.224, 0.225),
),
)

if is_distributed:
batch_size = batch_size // dist.get_world_size()
sampler = torch.utils.data.distributed.DistributedSampler(
valdataset, shuffle=False
)
else:
sampler = torch.utils.data.SequentialSampler(valdataset)

dataloader_kwargs = {
"num_workers": self.data_num_workers,
"pin_memory": True,
"sampler": sampler,
}
dataloader_kwargs["batch_size"] = batch_size
val_loader = torch.utils.data.DataLoader(valdataset, **dataloader_kwargs)

return val_loader

def get_evaluator(self, batch_size, is_distributed, testdev=False):
from yolox.evaluators import COCOEvaluator

val_loader = self.get_eval_loader(batch_size, is_distributed, testdev=testdev)
evaluator = COCOEvaluator(
dataloader=val_loader,
img_size=self.test_size,
confthre=self.test_conf,
nmsthre=self.nmsthre,
num_classes=self.num_classes,
testdev=testdev,
)
return evaluator

+ 109
- 0
tools/generate_mot20_weak_labels _metaway.py View File

import os
import numpy as np
import json
import cv2

# Use the same script for MOT16
DATA_PATH = '/media/external_10TB/10TB/vision/ByteTrackData/MOT20'
OUT_PATH = os.path.join(DATA_PATH, 'annotations')
SPLITS = ['train', 'test'] # --> split training data to train_half and val_half.
HALF_VIDEO = True
CREATE_SPLITTED_ANN = True
CREATE_SPLITTED_DET = True

if __name__ == '__main__':

if not os.path.exists(OUT_PATH):
os.makedirs(OUT_PATH)

for split in SPLITS:
if split == "test":
data_path = os.path.join(DATA_PATH, 'test')
else:
data_path = os.path.join(DATA_PATH, 'train')
seqs = os.listdir(data_path)
for seq in sorted(seqs):
out_path = os.path.join(OUT_PATH, '{}_{}_weak.json'.format(split, seq))
out = {'images': [], 'annotations': [], 'videos': [],
'categories': [{'id': 1, 'name': 'pedestrian'}]}
image_cnt = 0
ann_cnt = 0
video_cnt = 0
tid_curr = 0
tid_last = -1
if '.DS_Store' in seq:
continue
video_cnt += 1 # video sequence number.
out['videos'].append({'id': video_cnt, 'file_name': seq})
seq_path = os.path.join(data_path, seq)
img_path = os.path.join(seq_path, 'img1')
ann_path = os.path.join(seq_path, 'gt/gt.txt')
images = os.listdir(img_path)
num_images = len([image for image in images if 'jpg' in image]) # half and half

if HALF_VIDEO and ('half' in split):
image_range = [0, num_images // 2] if 'train' in split else \
[num_images // 2 + 1, num_images - 1]
else:
image_range = [0, num_images - 1]

for i in range(num_images):
if i < image_range[0] or i > image_range[1]:
continue
img = cv2.imread(os.path.join(data_path, '{}/img1/{:06d}.jpg'.format(seq, i + 1)))
height, width = img.shape[:2]
image_info = {'file_name': '{}/img1/{:06d}.jpg'.format(seq, i + 1), # image name.
'id': image_cnt + i + 1, # image number in the entire training set.
'frame_id': i + 1 - image_range[0],
# image number in the video sequence, starting from 1.
'prev_image_id': image_cnt + i if i > 0 else -1,
# image number in the entire training set.
'next_image_id': image_cnt + i + 2 if i < num_images - 1 else -1,
'video_id': video_cnt,
'height': height, 'width': width}
out['images'].append(image_info)
print('{}: {} images'.format(seq, num_images))
if split != 'test':
det_path = os.path.join(seq_path, 'det/det.txt')
anns = np.loadtxt(ann_path, dtype=np.float32, delimiter=',')
dets = np.loadtxt(det_path, dtype=np.float32, delimiter=',')

print('{} ann images'.format(int(anns[:, 0].max())))
for i in range(anns.shape[0]):
frame_id = int(anns[i][0])
if frame_id - 1 < image_range[0] or frame_id - 1 > image_range[1]:
continue
track_id = int(anns[i][1])
cat_id = int(anns[i][7])
ann_cnt += 1
if not ('15' in DATA_PATH):
# if not (float(anns[i][8]) >= 0.25): # visibility.
# continue
if not (int(anns[i][6]) == 1): # whether ignore.
continue
if int(anns[i][7]) in [3, 4, 5, 6, 9, 10, 11]: # Non-person
continue
if int(anns[i][7]) in [2, 7, 8, 12]: # Ignored person
# category_id = -1
continue
else:
category_id = 1 # pedestrian(non-static)
if not track_id == tid_last:
tid_curr += 1
tid_last = track_id
else:
category_id = 1
ann = {'id': ann_cnt,
'category_id': category_id,
'image_id': image_cnt + frame_id,
'track_id': -1,
'bbox': '',
'conf': '',
'iscrowd': 0,
'area': ''}
# float(anns[i][4] * anns[i][5])
out['annotations'].append(ann)
image_cnt += num_images
print(tid_curr, tid_last)
print('loaded {} for {} images and {} samples'.format(split, len(out['images']), len(out['annotations'])))
json.dump(out, open(out_path, 'w'))

+ 323
- 0
tools/generate_weak_labels.py View File

# Mahdi Abdollahpour, 22/12/2021, 02:26 PM, PyCharm, ByteTrack

import os
import time
from loguru import logger
# from opts import opts

from os import listdir
from os.path import isfile, join
import cv2
import numpy as np
import torch

from yolox.data.data_augment import ValTransform
# from yolox.data.datasets import COCO_CLASSES
from yolox.exp import get_exp
from yolox.utils import fuse_model, get_model_info, postprocess, vis
from yolox import statics

COCO_CLASSES = (
"person",
"bicycle",
"car",
"motorcycle",
"airplane",
"bus",
"train",
"truck",
"boat",
"traffic light",
"fire hydrant",
"stop sign",
"parking meter",
"bench",
"bird",
"cat",
"dog",
"horse",
"sheep",
"cow",
"elephant",
"bear",
"zebra",
"giraffe",
"backpack",
"umbrella",
"handbag",
"tie",
"suitcase",
"frisbee",
"skis",
"snowboard",
"sports ball",
"kite",
"baseball bat",
"baseball glove",
"skateboard",
"surfboard",
"tennis racket",
"bottle",
"wine glass",
"cup",
"fork",
"knife",
"spoon",
"bowl",
"banana",
"apple",
"sandwich",
"orange",
"broccoli",
"carrot",
"hot dog",
"pizza",
"donut",
"cake",
"chair",
"couch",
"potted plant",
"bed",
"dining table",
"toilet",
"tv",
"laptop",
"mouse",
"remote",
"keyboard",
"cell phone",
"microwave",
"oven",
"toaster",
"sink",
"refrigerator",
"book",
"clock",
"vase",
"scissors",
"teddy bear",
"hair drier",
"toothbrush",
)


IMAGE_EXT = [".jpg", ".jpeg", ".webp", ".bmp", ".png"]

use_cuda = True

MOT = 'MOT20'
section = 'train'

root_dir = os.path.join(statics.DATA_PATH, MOT, section)

classes = ['person', 'bicycle', 'car', 'motorcycle', 'truck', 'bus']
fuse = False


def get_labels(bboxes, cls, scores, th, tw):
id = 0
labels = []

# print(pred['scores'])
n, _ = bboxes.shape
for i in range(n):

if COCO_CLASSES[int(cls[i])] not in classes:
# print('Rejecting',COCO_CLASSES[int(cls[i])],scores[i])
continue
if use_cuda:
box = bboxes[i, :].detach().cpu().numpy()
else:
box = bboxes[i, :].detach().numpy()
## TODO: check if matches
# print(box[0], box[1], box[2], box[3], '--', th, tw)
# print(box[0] / th, box[1] / tw, box[2] / th, box[3] / tw)
x = box[0] / th
y = box[1] / tw
w = (box[2] - box[0]) / th
h = (box[3] - box[1]) / tw
x += w / 2
y += h / 2
label = [0, id, x, y, w, h]
# label = [0, id, box[0], box[1], (box[2] - box[0]), (box[3] - box[1])]
id += 1
labels.append(label)
# print(id)
labels0 = np.array(labels)
return labels0


class Predictor(object):
def __init__(
self,
model,
exp,
cls_names=COCO_CLASSES,
trt_file=None,
decoder=None,
device="cpu",
fp16=False,
legacy=False,
):
self.model = model
self.cls_names = cls_names
self.decoder = decoder
self.num_classes = exp.num_classes
self.confthre = 0.1
self.nmsthre = 0.3
self.test_size = exp.test_size
self.device = device
self.fp16 = fp16
self.preproc = ValTransform()
# if trt_file is not None:
# from torch2trt import TRTModule
#
# model_trt = TRTModule()
# model_trt.load_state_dict(torch.load(trt_file))
#
# x = torch.ones(1, 3, exp.test_size[0], exp.test_size[1]).cuda()
# self.model(x)
# self.model = model_trt

def inference(self, img):
img_info = {"id": 0}
if isinstance(img, str):
img_info["file_name"] = os.path.basename(img)
img = cv2.imread(img)
else:
img_info["file_name"] = None

height, width = img.shape[:2]
img_info["height"] = height
img_info["width"] = width
img_info["raw_img"] = img

ratio = min(self.test_size[0] / img.shape[0], self.test_size[1] / img.shape[1])
# print(self.test_size[0] , img.shape[0], self.test_size[1] , img.shape[1])
img_info["ratio"] = ratio

img, _ = self.preproc(img, None, self.test_size)
img = torch.from_numpy(img).unsqueeze(0)
img = img.float()
if self.device == "gpu":
img = img.cuda()
# if self.fp16:
# img = img.half() # to FP16

with torch.no_grad():
t0 = time.time()
outputs = self.model(img)
if self.decoder is not None:
outputs = self.decoder(outputs, dtype=outputs.type())
outputs = postprocess(
outputs, self.num_classes, self.confthre,
self.nmsthre
)
# logger.info("Infer time: {:.4f}s".format(time.time() - t0))
# print(img.shape)
_, _, tw, th = img.shape
img_info['tw'] = tw
img_info['th'] = th
return outputs, img_info

def visual(self, output, img_info, cls_conf=0.35):
ratio = img_info["ratio"]
img = img_info["raw_img"]
if output is None:
return img
output = output.cpu()

bboxes = output[:, 0:4]

# preprocessing: resize
bboxes /= ratio

cls = output[:, 6]
scores = output[:, 4] * output[:, 5]

vis_res = vis(img, bboxes, scores, cls, cls_conf, self.cls_names)
return vis_res


def image_demo(predictor, path):
folders = [f for f in listdir(path)]
# folders = folders[3:]

for folder in folders:
print(folder)
images_folder = join(join(path, folder), 'img1')
images = [f for f in listdir(images_folder) if isfile(join(images_folder, f))]
images = [a for a in images if a.endswith('.jpg')]
images.sort()

for i, image_name in enumerate(images):
if i % 300 == 0:
print(folder, i)
outputs, img_info = predictor.inference(join(images_folder, image_name))

ratio = img_info["ratio"]
# print(ratio)
img = img_info["raw_img"]
output = outputs[0]
if output is None:
continue
output = output.cpu()

bboxes = output[:, 0:4]

# preprocessing: resize
bboxes /= ratio

cls = output[:, 6]
scores = output[:, 4] * output[:, 5]
# print('cls',cls)
labels0 = get_labels(bboxes, cls, scores, img_info["width"], img_info["height"])

# out_path = join(images_folder, 'weak_' + imm + '.npy')
# print(imm)
np.savetxt(join(images_folder, image_name + '_weak_' + model_name + '.txt'), labels0, delimiter=' ')


def main(exp, ckpt_file):
model = exp.get_model()

if use_cuda:
model = model.cuda()
device = 'gpu'
else:
device = 'cpu'
model.eval()

logger.info("loading checkpoint")
ckpt = torch.load(ckpt_file, map_location="cpu")
# load the model state dict
model.load_state_dict(ckpt["model"])
logger.info("loaded checkpoint done.")

if fuse:
logger.info("\tFusing model...")
model = fuse_model(model)

trt_file = None
decoder = None

predictor = Predictor(
model, exp, COCO_CLASSES, trt_file, decoder,
device, False, False,
)
current_time = time.localtime()
image_demo(predictor, root_dir)


model_name = 'yolox-x'
# cuda = torch.device('cuda:1')
if __name__ == "__main__":
# print(COCO_CLASSES)
# if use_cuda:
# torch.cuda.set_device(1)
# with torch.cuda.device(1):
# os.environ['CUDA_VISIBLE_DEVICES'] = '1'
ckpt_file = '/home/abdollahpour.ce.sharif/yolox_x.pth'

exp = get_exp(None, model_name)
main(exp, ckpt_file)

+ 23
- 0
tools/test.py View File

# Mahdi Abdollahpour, 30/12/2021, 07:47 PM, PyCharm, ByteTrack
from yolox.core import launch
from yolox.data import MOTDataset, ValTransform, ValTransformWithPseudo
test_size = (896, 1600)
import os
from yolox.data import get_yolox_datadir
if __name__ == "__main__":
valdataset = MOTDataset(
data_dir=os.path.join(get_yolox_datadir(), "MOT20"),
json_file='train_MOT20-01.json',
img_size=test_size,
name='train', # change to train when running on training set
preproc=ValTransformWithPseudo(
rgb_means=(0.485, 0.456, 0.406),
std=(0.229, 0.224, 0.225),
),
load_weak=True
)


for batch in valdataset:
print(batch)
exit()

+ 127
- 74
tools/track.py View File

import motmetrics as mm import motmetrics as mm
from collections import OrderedDict from collections import OrderedDict
from pathlib import Path from pathlib import Path
import learn2learn as l2l
import yolox.statics as statics




def make_parser(): def make_parser():
parser.add_argument("-expn", "--experiment-name", type=str, default=None) parser.add_argument("-expn", "--experiment-name", type=str, default=None)
parser.add_argument("-n", "--name", type=str, default=None, help="model name") parser.add_argument("-n", "--name", type=str, default=None, help="model name")


parser.add_argument(
"--adaptation_period", default=4, type=int, help="if 4, then adapts to one batch in four batches"
)

# distributed # distributed
parser.add_argument( parser.add_argument(
"--dist-backend", default="nccl", type=str, help="distributed backend" "--dist-backend", default="nccl", type=str, help="distributed backend"
parser.add_argument("--match_thresh", type=float, default=0.9, help="matching threshold for tracking") parser.add_argument("--match_thresh", type=float, default=0.9, help="matching threshold for tracking")
parser.add_argument("--min-box-area", type=float, default=100, help='filter out tiny boxes') parser.add_argument("--min-box-area", type=float, default=100, help='filter out tiny boxes')
parser.add_argument("--mot20", dest="mot20", default=False, action="store_true", help="test mot20.") parser.add_argument("--mot20", dest="mot20", default=False, action="store_true", help="test mot20.")

parser.add_argument("--use_existing_files", default=False, action="store_true", help="to use already created files")
return parser return parser




return accs, names return accs, names




def process_loader(args, val_loader, model, is_distributed):
if args.seed is not None:
random.seed(args.seed)
torch.manual_seed(args.seed)
cudnn.deterministic = True
warnings.warn(
"You have chosen to seed testing. This will turn on the CUDNN deterministic setting, "
)

# set environment variables for distributed training
cudnn.benchmark = True
rank = args.local_rank

# rank = get_local_rank()

def process_loader(args, exp, val_loader, model, is_distributed, trt_file, decoder, val_ann):
file_name = os.path.join(exp.output_dir, args.experiment_name) file_name = os.path.join(exp.output_dir, args.experiment_name)
rank = args.local_rank
if rank == 0: if rank == 0:
os.makedirs(file_name, exist_ok=True) os.makedirs(file_name, exist_ok=True)


results_folder = os.path.join(file_name, "track_results") results_folder = os.path.join(file_name, "track_results")
os.makedirs(results_folder, exist_ok=True) os.makedirs(results_folder, exist_ok=True)


setup_logger(file_name, distributed_rank=rank, filename="val_log.txt", mode="a")
logger.info("Args: {}".format(args))

if args.conf is not None:
exp.test_conf = args.conf
if args.nms is not None:
exp.nmsthre = args.nms
if args.tsize is not None:
exp.test_size = (args.tsize, args.tsize)
adaptation_period = None
if args.task == 'metamot':
adaptation_period = args.adaptation_period


evaluator = MOTEvaluator( evaluator = MOTEvaluator(
args=args, args=args,
nmsthre=exp.nmsthre, nmsthre=exp.nmsthre,
num_classes=exp.num_classes, num_classes=exp.num_classes,
) )

torch.cuda.set_device(rank)
model.cuda(rank)
model.eval()

if not args.speed and not args.trt:
if args.ckpt is None:
ckpt_file = os.path.join(file_name, "best_ckpt.pth.tar")
else:
ckpt_file = args.ckpt
logger.info("loading checkpoint")
loc = "cuda:{}".format(rank)
ckpt = torch.load(ckpt_file, map_location=loc)
# load the model state dict
model.load_state_dict(ckpt["model"])
logger.info("loaded checkpoint done.")

if is_distributed:
model = DDP(model, device_ids=[rank])

if args.fuse:
logger.info("\tFusing model...")
model = fuse_model(model)

if args.trt:
assert (
not args.fuse and not is_distributed and args.batch_size == 1
), "TensorRT model is not support model fusing and distributed inferencing!"
trt_file = os.path.join(file_name, "model_trt.pth")
assert os.path.exists(
trt_file
), "TensorRT model is not found!\n Run tools/trt.py first!"
model.head.decode_in_inference = False
decoder = model.head.decode_outputs
else:
trt_file = None
decoder = None

# start evaluate # start evaluate
*_, summary = evaluator.evaluate( *_, summary = evaluator.evaluate(
model, is_distributed, args.fp16, trt_file, decoder, exp.test_size, results_folder
model, is_distributed, args.fp16, trt_file, decoder, exp.test_size, results_folder,
adaptation_period=adaptation_period,
) )
logger.info("\n" + summary) logger.info("\n" + summary)


def eval_MOT(args, exp, val_ann=None):
file_name = os.path.join(exp.output_dir, args.experiment_name)
rank = args.local_rank
if rank == 0:
os.makedirs(file_name, exist_ok=True)

results_folder = os.path.join(file_name, "track_results")
os.makedirs(results_folder, exist_ok=True)

# evaluate MOTA # evaluate MOTA
mm.lap.default_solver = 'lap' mm.lap.default_solver = 'lap'


if exp.val_ann == 'val_half.json':
if val_ann == 'val_half.json':
gt_type = '_val_half' gt_type = '_val_half'
else: else:
gt_type = '' gt_type = ''
print('gt_type', gt_type) print('gt_type', gt_type)
if args.mot20: if args.mot20:
gtfiles = glob.glob(os.path.join('datasets/MOT20/train', '*/gt/gt{}.txt'.format(gt_type)))
gtfiles = glob.glob(os.path.join(statics.DATA_PATH, 'MOT20/train', '*/gt/gt{}.txt'.format(gt_type)))
else: else:
gtfiles = glob.glob(os.path.join('datasets/mot/train', '*/gt/gt{}.txt'.format(gt_type)))
gtfiles = glob.glob(os.path.join(statics.DATA_PATH, 'MOT17/train', '*/gt/gt{}.txt'.format(gt_type)))
print('gt_files', gtfiles) print('gt_files', gtfiles)
tsfiles = [f for f in glob.glob(os.path.join(results_folder, '*.txt')) if tsfiles = [f for f in glob.glob(os.path.join(results_folder, '*.txt')) if
not os.path.basename(f).startswith('eval')] not os.path.basename(f).startswith('eval')]
logger.info('Completed') logger.info('Completed')




def load_model(args, exp, is_distributed):
model = exp.get_model()
logger.info("Model Summary: {}".format(get_model_info(model, exp.test_size)))
if args.seed is not None:
random.seed(args.seed)
torch.manual_seed(args.seed)
cudnn.deterministic = True
warnings.warn(
"You have chosen to seed testing. This will turn on the CUDNN deterministic setting, "
)

# set environment variables for distributed training
cudnn.benchmark = True
rank = args.local_rank

# rank = get_local_rank()

file_name = os.path.join(exp.output_dir, args.experiment_name)

if rank == 0:
os.makedirs(file_name, exist_ok=True)

setup_logger(file_name, distributed_rank=rank, filename="val_log.txt", mode="a")
logger.info("Args: {}".format(args))

if args.conf is not None:
exp.test_conf = args.conf
if args.nms is not None:
exp.nmsthre = args.nms
if args.tsize is not None:
exp.test_size = (args.tsize, args.tsize)

if args.task == "metamot":
model = l2l.algorithms.MAML(model, lr=exp.inner_lr, first_order=exp.first_order, allow_nograd=True)
torch.cuda.set_device(rank)
model.cuda(rank)
model.eval()

if not args.speed and not args.trt:
if args.ckpt is None:
ckpt_file = os.path.join(file_name, "best_ckpt.pth.tar")
else:
ckpt_file = args.ckpt

logger.info("loading checkpoint")
loc = "cuda:{}".format(rank)
ckpt = torch.load(ckpt_file, map_location=loc)

# handling meta models
new_dict = {}
if (not list(ckpt["model"].keys())[0].startswith('module')) and args.task == "metamot":
for key in ckpt["model"].keys():
if not key.startswith('module.'):
new_dict['module.' + key] = ckpt["model"][key]
else:
new_dict[key] = ckpt["model"][key]
del ckpt["model"]
ckpt["model"] = new_dict

# load the model state dict
model.load_state_dict(ckpt["model"])
logger.info("loaded checkpoint done.")

if is_distributed:
model = DDP(model, device_ids=[rank])

if args.fuse:
logger.info("\tFusing model...")
model = fuse_model(model)

if args.trt:
assert (
not args.fuse and not is_distributed and args.batch_size == 1
), "TensorRT model is not support model fusing and distributed inferencing!"
trt_file = os.path.join(file_name, "model_trt.pth")
assert os.path.exists(
trt_file
), "TensorRT model is not found!\n Run tools/trt.py first!"
model.head.decode_in_inference = False
decoder = model.head.decode_outputs
else:
trt_file = None
decoder = None

return model, trt_file, decoder


@logger.catch @logger.catch
def main(exp, args, num_gpu): def main(exp, args, num_gpu):
is_distributed = num_gpu > 1 is_distributed = num_gpu > 1
print('is_distributed', is_distributed) print('is_distributed', is_distributed)
print('num_gpu', num_gpu) print('num_gpu', num_gpu)


model = exp.get_model()
logger.info("Model Summary: {}".format(get_model_info(model, exp.test_size)))
# logger.info("Model Structure:\n{}".format(str(model))) # logger.info("Model Structure:\n{}".format(str(model)))
model, trt_file, decoder = load_model(args, exp, is_distributed)

if args.task == 'metamot': if args.task == 'metamot':
val_loaders = exp.get_eval_loaders(args.batch_size, is_distributed, args.test) val_loaders = exp.get_eval_loaders(args.batch_size, is_distributed, args.test)
for val_loader in val_loaders:
learner = model.clone()
process_loader(args, val_loader, learner, is_distributed)
if not args.use_existing_files:
for val_loader, val_ann in zip(val_loaders, exp.val_anns):
logger.info('processing loader...')
process_loader(args, exp, val_loader, model, is_distributed, trt_file, decoder, val_ann)
eval_MOT(args, exp)
else: else:
val_loader = exp.get_eval_loader(args.batch_size, is_distributed, args.test)
process_loader(args, val_loader, model, is_distributed)
if not args.use_existing_files:
val_loader = exp.get_eval_loader(args.batch_size, is_distributed, args.test)
process_loader(args, exp, val_loader, model, is_distributed, trt_file, decoder, exp.val_ann)
eval_MOT(args, exp, exp.val_ann)




if __name__ == "__main__": if __name__ == "__main__":

+ 5
- 2
yolox/core/meta_trainer.py View File

self.after_train() self.after_train()


def train_in_epoch(self): def train_in_epoch(self):
# self.evaluate_and_save_model()
for self.epoch in range(self.start_epoch, self.max_epoch): for self.epoch in range(self.start_epoch, self.max_epoch):
self.before_epoch() self.before_epoch()
self.train_in_task() self.train_in_task()
# self.model = model # self.model = model
self.model.train() self.model.train()


self.evaluator = self.exp.get_evaluator(
self.evaluators = self.exp.get_evaluators(
batch_size=self.args.batch_size, is_distributed=self.is_distributed batch_size=self.args.batch_size, is_distributed=self.is_distributed
) )
# Tensorboard logger # Tensorboard logger


ckpt = torch.load(ckpt_file, map_location=self.device) ckpt = torch.load(ckpt_file, map_location=self.device)


# TODO: handle pretrained BYTETrack
# handling meta models # handling meta models
# new_dict = {} # new_dict = {}
# for key in ckpt["model"].keys(): # for key in ckpt["model"].keys():
return model return model


def evaluate_and_save_model(self): def evaluate_and_save_model(self):
logger.info("starting eval...")
evalmodel = self.ema_model.ema if self.use_model_ema else self.model evalmodel = self.ema_model.ema if self.use_model_ema else self.model
ap50_95, ap50, summary = self.exp.eval( ap50_95, ap50, summary = self.exp.eval(
evalmodel, self.evaluator, self.is_distributed
evalmodel, self.evaluators, self.is_distributed
) )
self.model.train() self.model.train()
if self.rank == 0: if self.rank == 0:

+ 1
- 1
yolox/data/__init__.py View File

# -*- coding:utf-8 -*- # -*- coding:utf-8 -*-
# Copyright (c) Megvii, Inc. and its affiliates. # Copyright (c) Megvii, Inc. and its affiliates.


from .data_augment import TrainTransform, ValTransform
from .data_augment import TrainTransform, ValTransform,ValTransformWithPseudo
from .data_prefetcher import DataPrefetcher from .data_prefetcher import DataPrefetcher
from .dataloading import DataLoader, get_yolox_datadir from .dataloading import DataLoader, get_yolox_datadir
from .datasets import * from .datasets import *

+ 80
- 0
yolox/data/data_augment.py View File

def __call__(self, img, res, input_size): def __call__(self, img, res, input_size):
img, _ = preproc(img, input_size, self.means, self.std, self.swap) img, _ = preproc(img, input_size, self.means, self.std, self.swap)
return img, np.zeros((1, 5)) return img, np.zeros((1, 5))


class ValTransformWithPseudo:
"""
Defines the transformations that should be applied to test PIL image
for input into the network

dimension -> tensorize -> color adj

Arguments:
resize (int): input dimension to SSD
rgb_means ((int,int,int)): average RGB of the dataset
(104,117,123)
swap ((int,int,int)): final order of channels

Returns:
transform (transform) : callable transform to be applied to test/val
data
"""

def __init__(self, rgb_means=None, std=None, swap=(2, 0, 1), max_labels=100):
self.means = rgb_means
self.swap = swap
self.std = std
self.max_labels = max_labels



def __call__(self, image, targets, input_dim):
boxes = targets[:, :4].copy()
labels = targets[:, 4].copy()
ids = targets[:, 5].copy()
if len(boxes) == 0:
targets = np.zeros((self.max_labels, 6), dtype=np.float32)
image, r_o = preproc(image, input_dim, self.means, self.std)
image = np.ascontiguousarray(image, dtype=np.float32)
return image, targets

image_o = image.copy()
targets_o = targets.copy()
height_o, width_o, _ = image_o.shape
boxes_o = targets_o[:, :4]
labels_o = targets_o[:, 4]
ids_o = targets_o[:, 5]
# bbox_o: [xyxy] to [c_x,c_y,w,h]
boxes_o = xyxy2cxcywh(boxes_o)

# image_t = _distort(image)
image_t = image
# image_t, boxes = _mirror(image_t, boxes)
height, width, _ = image_t.shape
image_t, r_ = preproc(image_t, input_dim, self.means, self.std)
# boxes [xyxy] 2 [cx,cy,w,h]
boxes = xyxy2cxcywh(boxes)
boxes *= r_

mask_b = np.minimum(boxes[:, 2], boxes[:, 3]) > 1
boxes_t = boxes[mask_b]
labels_t = labels[mask_b]
ids_t = ids[mask_b]

if len(boxes_t) == 0:
image_t, r_o = preproc(image_o, input_dim, self.means, self.std)
boxes_o *= r_o
boxes_t = boxes_o
labels_t = labels_o
ids_t = ids_o

labels_t = np.expand_dims(labels_t, 1)
ids_t = np.expand_dims(ids_t, 1)

targets_t = np.hstack((labels_t, boxes_t, ids_t))
padded_labels = np.zeros((self.max_labels, 6))
padded_labels[range(len(targets_t))[: self.max_labels]] = targets_t[
: self.max_labels
]
padded_labels = np.ascontiguousarray(padded_labels, dtype=np.float32)
image_t = np.ascontiguousarray(image_t, dtype=np.float32)
return image_t, padded_labels


+ 32
- 8
yolox/data/datasets/mot.py View File

""" """


def __init__( def __init__(
self,
data_dir=None,
json_file="train_half.json",
name="train",
img_size=(608, 1088),
preproc=None,
self,
data_dir=None,
json_file="train_half.json",
name="train",
img_size=(608, 1088),
preproc=None,
load_weak=False,
): ):
""" """
COCO dataset initialization. Annotation data are read into memory by COCO API. COCO dataset initialization. Annotation data are read into memory by COCO API.
self.name = name self.name = name
self.img_size = img_size self.img_size = img_size
self.preproc = preproc self.preproc = preproc
self.load_weak = load_weak


def __len__(self): def __len__(self):
return len(self.ids) return len(self.ids)
img_file = os.path.join( img_file = os.path.join(
self.data_dir, self.name, file_name self.data_dir, self.name, file_name
) )
head_tail = os.path.split(img_file)
# label_path = os.path.join(head_tail[0], head_tail[1].replace('.jpg','.txt'))

if self.load_weak:
weak_label_path = os.path.join(head_tail[0], head_tail[1] + '_weak_yolox-x.txt')
# load weak labels from weak_label_path
width = img_info[1]
height = img_info[0]
labels = np.loadtxt(weak_label_path)
res = np.ones_like(labels)
labels[2, :] *= width
labels[4, :] *= width
labels[3, :] *= height
labels[5, :] *= height
labels[4, :] += labels[2, :]
labels[5, :] += labels[3, :]

res[:, 0:4] = labels[:, -4:]
res[:, 5] = labels[:, 1]
# all are from class one
# res[:, 4] = labels[:, 0]

img = cv2.imread(img_file) img = cv2.imread(img_file)
# if img is None:
# print('img_file is None',img_file)
if img is None:
print('img_file is None', img_file)
assert img is not None assert img is not None


return img, res.copy(), img_info, np.array([id_]) return img, res.copy(), img_info, np.array([id_])

+ 7
- 3
yolox/evaluators/coco_evaluator.py View File



info = time_info + "\n" info = time_info + "\n"


# Evaluate the Dt (detection) json comparing with the ground truth
# Evaluate the Dt (detection) jsoncomparing with the ground truth
if len(data_dict) > 0: if len(data_dict) > 0:
cocoGt = self.dataloader.dataset.coco cocoGt = self.dataloader.dataset.coco
# TODO: since pycocotools can't process dict in py36, write data to json file. # TODO: since pycocotools can't process dict in py36, write data to json file.
from pycocotools import cocoeval as COCOeval from pycocotools import cocoeval as COCOeval
logger.warning("Use standard COCOeval.") logger.warning("Use standard COCOeval.")
''' '''
#from pycocotools.cocoeval import COCOeval
from yolox.layers import COCOeval_opt as COCOeval
# TODO: commenting this and trying to use pycocotools
from pycocotools.cocoeval import COCOeval
# from yolox.layers import COCOeval_opt as COCOeval



cocoEval = COCOeval(cocoGt, cocoDt, annType[1]) cocoEval = COCOeval(cocoGt, cocoDt, annType[1])
cocoEval.evaluate() cocoEval.evaluate()
cocoEval.accumulate() cocoEval.accumulate()

+ 145
- 117
yolox/evaluators/mot_evaluator.py View File

if track_id < 0: if track_id < 0:
continue continue
x1, y1, w, h = tlwh x1, y1, w, h = tlwh
line = save_format.format(frame=frame_id, id=track_id, x1=round(x1, 1), y1=round(y1, 1), w=round(w, 1), h=round(h, 1), s=round(score, 2))
line = save_format.format(frame=frame_id, id=track_id, x1=round(x1, 1), y1=round(y1, 1), w=round(w, 1),
h=round(h, 1), s=round(score, 2))
f.write(line) f.write(line)
logger.info('save results to {}'.format(filename)) logger.info('save results to {}'.format(filename))


if track_id < 0: if track_id < 0:
continue continue
x1, y1, w, h = tlwh x1, y1, w, h = tlwh
line = save_format.format(frame=frame_id, id=track_id, x1=round(x1, 1), y1=round(y1, 1), w=round(w, 1), h=round(h, 1))
line = save_format.format(frame=frame_id, id=track_id, x1=round(x1, 1), y1=round(y1, 1), w=round(w, 1),
h=round(h, 1))
f.write(line) f.write(line)
logger.info('save results to {}'.format(filename)) logger.info('save results to {}'.format(filename))


""" """


def __init__( def __init__(
self, args, dataloader, img_size, confthre, nmsthre, num_classes):
self, args, dataloader, img_size, confthre, nmsthre, num_classes):
""" """
Args: Args:
dataloader (Dataloader): evaluate dataloader. dataloader (Dataloader): evaluate dataloader.
self.args = args self.args = args


def evaluate( def evaluate(
self,
model,
distributed=False,
half=False,
trt_file=None,
decoder=None,
test_size=None,
result_folder=None
self,
model,
distributed=False,
half=False,
trt_file=None,
decoder=None,
test_size=None,
result_folder=None,
adaptation_period=None,
): ):
""" """
COCO average precision (AP) Evaluation. Iterate inference on the test dataset COCO average precision (AP) Evaluation. Iterate inference on the test dataset
ap50 (float) : COCO AP of IoU=50 ap50 (float) : COCO AP of IoU=50
summary (sr): summary info of evaluation. summary (sr): summary info of evaluation.
""" """

if adaptation_period is not None:
logger.info('cloning model...')
learner = model.clone()
else:
learner = model
# TODO half to amp_test # TODO half to amp_test
tensor_type = torch.cuda.HalfTensor if half else torch.cuda.FloatTensor tensor_type = torch.cuda.HalfTensor if half else torch.cuda.FloatTensor
model = model.eval()
learner = learner.eval()
if half: if half:
model = model.half()
learner = learner.half()
ids = [] ids = []
data_list = [] data_list = []
results = [] results = []


if trt_file is not None: if trt_file is not None:
from torch2trt import TRTModule from torch2trt import TRTModule
logger.info('Loading trt file')


model_trt = TRTModule() model_trt = TRTModule()
model_trt.load_state_dict(torch.load(trt_file)) model_trt.load_state_dict(torch.load(trt_file))


x = torch.ones(1, 3, test_size[0], test_size[1]).cuda() x = torch.ones(1, 3, test_size[0], test_size[1]).cuda()
model(x)
model = model_trt
learner(x)
learner = model_trt
tracker = BYTETracker(self.args) tracker = BYTETracker(self.args)
ori_thresh = self.args.track_thresh ori_thresh = self.args.track_thresh
for cur_iter, (imgs, _, info_imgs, ids) in enumerate(
progress_bar(self.dataloader)
):
with torch.no_grad():
# init tracker
frame_id = info_imgs[2].item()
video_id = info_imgs[3].item()
img_file_name = info_imgs[4]
video_name = img_file_name[0].split('/')[0]
if video_name == 'MOT17-05-FRCNN' or video_name == 'MOT17-06-FRCNN':
self.args.track_buffer = 14
elif video_name == 'MOT17-13-FRCNN' or video_name == 'MOT17-14-FRCNN':
self.args.track_buffer = 25
else:
self.args.track_buffer = 30

if video_name == 'MOT17-01-FRCNN':
self.args.track_thresh = 0.65
elif video_name == 'MOT17-06-FRCNN':
self.args.track_thresh = 0.65
elif video_name == 'MOT17-12-FRCNN':
self.args.track_thresh = 0.7
elif video_name == 'MOT17-14-FRCNN':
self.args.track_thresh = 0.67
else:
self.args.track_thresh = ori_thresh
if video_name == 'MOT20-06' or video_name == 'MOT20-08':
self.args.track_thresh = 0.3
else:
self.args.track_thresh = ori_thresh


if video_name not in video_names:
video_names[video_id] = video_name
if frame_id == 1:
tracker = BYTETracker(self.args)
if len(results) != 0:
result_filename = os.path.join(result_folder, '{}.txt'.format(video_names[video_id - 1]))
write_results(result_filename, results)
results = []

imgs = imgs.type(tensor_type)
for cur_iter, (imgs, targets, info_imgs, ids) in enumerate(
progress_bar(self.dataloader)
):
if cur_iter % 100 == 0:
logger.info('cur_iter: {}'.format(cur_iter))
# with torch.no_grad():
# init tracker
# imgs = imgs.to(self.data_type)
# targets = targets.to(self.data_type)

frame_id = info_imgs[2].item()
video_id = info_imgs[3].item()
img_file_name = info_imgs[4]
video_name = img_file_name[0].split('/')[0]
if video_name == 'MOT17-05-FRCNN' or video_name == 'MOT17-06-FRCNN':
self.args.track_buffer = 14
elif video_name == 'MOT17-13-FRCNN' or video_name == 'MOT17-14-FRCNN':
self.args.track_buffer = 25
else:
self.args.track_buffer = 30

if video_name == 'MOT17-01-FRCNN':
self.args.track_thresh = 0.65
elif video_name == 'MOT17-06-FRCNN':
self.args.track_thresh = 0.65
elif video_name == 'MOT17-12-FRCNN':
self.args.track_thresh = 0.7
elif video_name == 'MOT17-14-FRCNN':
self.args.track_thresh = 0.67
else:
self.args.track_thresh = ori_thresh

if video_name == 'MOT20-06' or video_name == 'MOT20-08':
self.args.track_thresh = 0.3
else:
self.args.track_thresh = ori_thresh

if video_name not in video_names:
video_names[video_id] = video_name
if frame_id == 1:
tracker = BYTETracker(self.args)
if len(results) != 0:
result_filename = os.path.join(result_folder, '{}.txt'.format(video_names[video_id - 1]))
write_results(result_filename, results)
results = []

imgs = imgs.type(tensor_type)

# skip the the last iters since batchsize might be not enough for batch inference
is_time_record = cur_iter < len(self.dataloader) - 1
if is_time_record:
start = time.time()
if adaptation_period is not None and cur_iter % adaptation_period == 0:
learner.train()
targets = targets.type(tensor_type)
targets.requires_grad = False
outputs = learner(imgs, targets)
loss = outputs["total_loss"]
learner.adapt(loss)
learner.eval()


# skip the the last iters since batchsize might be not enough for batch inference
is_time_record = cur_iter < len(self.dataloader) - 1
if is_time_record:
start = time.time()
with torch.no_grad():
outputs = learner(imgs)


outputs = model(imgs)
if decoder is not None:
outputs = decoder(outputs, dtype=outputs.type())
# print('outputs', outputs.shape)
outputs = postprocess(outputs, self.num_classes, self.confthre, self.nmsthre)


if decoder is not None:
outputs = decoder(outputs, dtype=outputs.type())
print('outputs', outputs.shape)
outputs = postprocess(outputs, self.num_classes, self.confthre, self.nmsthre)
if is_time_record:
infer_end = time_synchronized()
inference_time += infer_end - start
if is_time_record:
infer_end = time_synchronized()
inference_time += infer_end - start


output_results = self.convert_to_coco_format(outputs, info_imgs, ids) output_results = self.convert_to_coco_format(outputs, info_imgs, ids)
data_list.extend(output_results) data_list.extend(output_results)
if is_time_record: if is_time_record:
track_end = time_synchronized() track_end = time_synchronized()
track_time += track_end - infer_end track_time += track_end - infer_end
if cur_iter == len(self.dataloader) - 1: if cur_iter == len(self.dataloader) - 1:
result_filename = os.path.join(result_folder, '{}.txt'.format(video_names[video_id])) result_filename = os.path.join(result_folder, '{}.txt'.format(video_names[video_id]))
write_results(result_filename, results) write_results(result_filename, results)
return eval_results return eval_results


def evaluate_sort( def evaluate_sort(
self,
model,
distributed=False,
half=False,
trt_file=None,
decoder=None,
test_size=None,
result_folder=None
self,
model,
distributed=False,
half=False,
trt_file=None,
decoder=None,
test_size=None,
result_folder=None
): ):
""" """
COCO average precision (AP) Evaluation. Iterate inference on the test dataset COCO average precision (AP) Evaluation. Iterate inference on the test dataset
x = torch.ones(1, 3, test_size[0], test_size[1]).cuda() x = torch.ones(1, 3, test_size[0], test_size[1]).cuda()
model(x) model(x)
model = model_trt model = model_trt
tracker = Sort(self.args.track_thresh) tracker = Sort(self.args.track_thresh)
for cur_iter, (imgs, _, info_imgs, ids) in enumerate( for cur_iter, (imgs, _, info_imgs, ids) in enumerate(
progress_bar(self.dataloader)
progress_bar(self.dataloader)
): ):
if cur_iter % 250 == 0:
logger.info('cur_iter: {}'.format(cur_iter))
with torch.no_grad(): with torch.no_grad():
# init tracker # init tracker
frame_id = info_imgs[2].item() frame_id = info_imgs[2].item()
outputs = decoder(outputs, dtype=outputs.type()) outputs = decoder(outputs, dtype=outputs.type())


outputs = postprocess(outputs, self.num_classes, self.confthre, self.nmsthre) outputs = postprocess(outputs, self.num_classes, self.confthre, self.nmsthre)
if is_time_record: if is_time_record:
infer_end = time_synchronized() infer_end = time_synchronized()
inference_time += infer_end - start inference_time += infer_end - start
if is_time_record: if is_time_record:
track_end = time_synchronized() track_end = time_synchronized()
track_time += track_end - infer_end track_time += track_end - infer_end
if cur_iter == len(self.dataloader) - 1: if cur_iter == len(self.dataloader) - 1:
result_filename = os.path.join(result_folder, '{}.txt'.format(video_names[video_id])) result_filename = os.path.join(result_folder, '{}.txt'.format(video_names[video_id]))
write_results_no_score(result_filename, results) write_results_no_score(result_filename, results)
return eval_results return eval_results


def evaluate_deepsort( def evaluate_deepsort(
self,
model,
distributed=False,
half=False,
trt_file=None,
decoder=None,
test_size=None,
result_folder=None,
model_folder=None
self,
model,
distributed=False,
half=False,
trt_file=None,
decoder=None,
test_size=None,
result_folder=None,
model_folder=None
): ):
""" """
COCO average precision (AP) Evaluation. Iterate inference on the test dataset COCO average precision (AP) Evaluation. Iterate inference on the test dataset
x = torch.ones(1, 3, test_size[0], test_size[1]).cuda() x = torch.ones(1, 3, test_size[0], test_size[1]).cuda()
model(x) model(x)
model = model_trt model = model_trt
tracker = DeepSort(model_folder, min_confidence=self.args.track_thresh) tracker = DeepSort(model_folder, min_confidence=self.args.track_thresh)
for cur_iter, (imgs, _, info_imgs, ids) in enumerate( for cur_iter, (imgs, _, info_imgs, ids) in enumerate(
progress_bar(self.dataloader)
progress_bar(self.dataloader)
): ):
with torch.no_grad(): with torch.no_grad():
# init tracker # init tracker
outputs = decoder(outputs, dtype=outputs.type()) outputs = decoder(outputs, dtype=outputs.type())


outputs = postprocess(outputs, self.num_classes, self.confthre, self.nmsthre) outputs = postprocess(outputs, self.num_classes, self.confthre, self.nmsthre)
if is_time_record: if is_time_record:
infer_end = time_synchronized() infer_end = time_synchronized()
inference_time += infer_end - start inference_time += infer_end - start
if is_time_record: if is_time_record:
track_end = time_synchronized() track_end = time_synchronized()
track_time += track_end - infer_end track_time += track_end - infer_end
if cur_iter == len(self.dataloader) - 1: if cur_iter == len(self.dataloader) - 1:
result_filename = os.path.join(result_folder, '{}.txt'.format(video_names[video_id])) result_filename = os.path.join(result_folder, '{}.txt'.format(video_names[video_id]))
write_results_no_score(result_filename, results) write_results_no_score(result_filename, results)
return eval_results return eval_results


def evaluate_motdt( def evaluate_motdt(
self,
model,
distributed=False,
half=False,
trt_file=None,
decoder=None,
test_size=None,
result_folder=None,
model_folder=None
self,
model,
distributed=False,
half=False,
trt_file=None,
decoder=None,
test_size=None,
result_folder=None,
model_folder=None
): ):
""" """
COCO average precision (AP) Evaluation. Iterate inference on the test dataset COCO average precision (AP) Evaluation. Iterate inference on the test dataset
x = torch.ones(1, 3, test_size[0], test_size[1]).cuda() x = torch.ones(1, 3, test_size[0], test_size[1]).cuda()
model(x) model(x)
model = model_trt model = model_trt
tracker = OnlineTracker(model_folder, min_cls_score=self.args.track_thresh) tracker = OnlineTracker(model_folder, min_cls_score=self.args.track_thresh)
for cur_iter, (imgs, _, info_imgs, ids) in enumerate( for cur_iter, (imgs, _, info_imgs, ids) in enumerate(
progress_bar(self.dataloader)
progress_bar(self.dataloader)
): ):
with torch.no_grad(): with torch.no_grad():
# init tracker # init tracker
outputs = decoder(outputs, dtype=outputs.type()) outputs = decoder(outputs, dtype=outputs.type())


outputs = postprocess(outputs, self.num_classes, self.confthre, self.nmsthre) outputs = postprocess(outputs, self.num_classes, self.confthre, self.nmsthre)
if is_time_record: if is_time_record:
infer_end = time_synchronized() infer_end = time_synchronized()
inference_time += infer_end - start inference_time += infer_end - start
if is_time_record: if is_time_record:
track_end = time_synchronized() track_end = time_synchronized()
track_time += track_end - infer_end track_time += track_end - infer_end
if cur_iter == len(self.dataloader) - 1: if cur_iter == len(self.dataloader) - 1:
result_filename = os.path.join(result_folder, '{}.txt'.format(video_names[video_id])) result_filename = os.path.join(result_folder, '{}.txt'.format(video_names[video_id]))
write_results(result_filename, results) write_results(result_filename, results)
def convert_to_coco_format(self, outputs, info_imgs, ids): def convert_to_coco_format(self, outputs, info_imgs, ids):
data_list = [] data_list = []
for (output, img_h, img_w, img_id) in zip( for (output, img_h, img_w, img_id) in zip(
outputs, info_imgs[0], info_imgs[1], ids
outputs, info_imgs[0], info_imgs[1], ids
): ):
if output is None: if output is None:
continue continue
[ [
"Average {} time: {:.2f} ms".format(k, v) "Average {} time: {:.2f} ms".format(k, v)
for k, v in zip( for k, v in zip(
["forward", "track", "inference"],
[a_infer_time, a_track_time, (a_infer_time + a_track_time)],
)
["forward", "track", "inference"],
[a_infer_time, a_track_time, (a_infer_time + a_track_time)],
)
] ]
) )


from pycocotools import cocoeval as COCOeval from pycocotools import cocoeval as COCOeval
logger.warning("Use standard COCOeval.") logger.warning("Use standard COCOeval.")
''' '''
#from pycocotools.cocoeval import COCOeval
from yolox.layers import COCOeval_opt as COCOeval
# I changed it
from pycocotools.cocoeval import COCOeval
# from yolox.layers import COCOeval_opt as COCOeval
cocoEval = COCOeval(cocoGt, cocoDt, annType[1]) cocoEval = COCOeval(cocoGt, cocoDt, annType[1])
cocoEval.evaluate() cocoEval.evaluate()
cocoEval.accumulate() cocoEval.accumulate()

+ 1
- 1
yolox/exp/base_meta_exp.py View File

self.seed = None self.seed = None
# self.output_dir = "./YOLOX_outputs" # self.output_dir = "./YOLOX_outputs"
self.output_dir = "./meta_experiments" self.output_dir = "./meta_experiments"
self.print_interval = 100
self.print_interval = 250
self.eval_interval = 10 self.eval_interval = 10


@abstractmethod @abstractmethod

+ 27
- 15
yolox/exp/meta_yolox_base.py View File



# ----------------- Meta-learning ------------------ # # ----------------- Meta-learning ------------------ #
self.first_order = True self.first_order = True
self.inner_lr = 1e-5
self.inner_lr = 1e-6


def get_model(self): def get_model(self):
from yolox.models import YOLOPAFPN, YOLOX, YOLOXHead from yolox.models import YOLOPAFPN, YOLOX, YOLOXHead
val_loaders.append(val_loader) val_loaders.append(val_loader)
return val_loaders return val_loaders


def get_evaluator(self, batch_size, is_distributed, testdev=False):
def get_evaluators(self, batch_size, is_distributed, testdev=False):
from yolox.evaluators import COCOEvaluator from yolox.evaluators import COCOEvaluator


val_loader = self.get_eval_loader(batch_size, is_distributed, testdev=testdev)
evaluator = COCOEvaluator(
dataloader=val_loader,
img_size=self.test_size,
confthre=self.test_conf,
nmsthre=self.nmsthre,
num_classes=self.num_classes,
testdev=testdev,
)
return evaluator

def eval(self, model, evaluator, is_distributed, half=False):
return evaluator.evaluate(model, is_distributed, half)
val_loaders = self.get_eval_loaders(batch_size, is_distributed, testdev=testdev)
evaluators = []
for val_loader in val_loaders:
evaluator = COCOEvaluator(
dataloader=val_loader,
img_size=self.test_size,
confthre=self.test_conf,
nmsthre=self.nmsthre,
num_classes=self.num_classes,
testdev=testdev,
)
evaluators.append(evaluator)
return evaluators

def eval(self, model, evaluators, is_distributed, half=False):
ap50_95s = 0.0
ap50s = 0.0
summarys = ''
for evaluator in evaluators:
ap50_95, ap50, summary = evaluator.evaluate(model, is_distributed, half)
ap50_95s += ap50_95
ap50s += ap50
summarys += ("\n" + summary)
n = len(evaluators)
return (ap50_95s / n), (ap50s / n), summarys

+ 37
- 3
yolox/models/yolo_head.py View File

if self.training: if self.training:
# logger.info("labels.shape:{}".format(labels.shape)) # logger.info("labels.shape:{}".format(labels.shape))
# logger.info("torch.cat(outputs, 1).shape:{}".format(torch.cat(outputs, 1).shape)) # logger.info("torch.cat(outputs, 1).shape:{}".format(torch.cat(outputs, 1).shape))
# if torch.isnan(torch.cat(outputs, 1)).sum().item():
# logger.info('There is Nan value in outputs {}'.format(torch.isnan(torch.cat(outputs, 1)).sum().item()))

return self.get_losses( return self.get_losses(
imgs, imgs,
x_shifts, x_shifts,
if self.use_l1: if self.use_l1:
l1_targets = torch.cat(l1_targets, 0) l1_targets = torch.cat(l1_targets, 0)



# TODO: check loss parts shapes # TODO: check loss parts shapes


num_fg = max(num_fg, 1) num_fg = max(num_fg, 1)
# if bbox_preds.view(-1, 4)[fg_masks].shape != reg_targets.shape:
# logger.info("some shape mismatch")
# logger.info("bbox_preds.view(-1, 4)[fg_masks].shape {}".format(bbox_preds.view(-1, 4)[fg_masks].shape))
# logger.info("reg_targets {}".format(reg_targets.shape))
# logger.info("--------------------")
loss_iou = ( loss_iou = (
self.iou_loss(bbox_preds.view(-1, 4)[fg_masks], reg_targets) self.iou_loss(bbox_preds.view(-1, 4)[fg_masks], reg_targets)
).sum() / num_fg ).sum() / num_fg
# if obj_preds.view(-1, 1).shape != obj_targets.shape:
# logger.info("some shape mismatch")
# logger.info("obj_preds.view(-1, 1).shape {}".format(obj_preds.view(-1, 1).shape))
# logger.info("obj_targets.shape {}".format(obj_targets.shape))
# logger.info("--------------------")
loss_obj = ( loss_obj = (
self.bcewithlog_loss(obj_preds.view(-1, 1), obj_targets) self.bcewithlog_loss(obj_preds.view(-1, 1), obj_targets)
).sum() / num_fg ).sum() / num_fg
# if cls_preds.view(-1, self.num_classes)[fg_masks].shape != cls_targets.shape:
# logger.info("some shape mismatch")
# logger.info("cls_preds.view(-1, self.num_classes)[fg_masks].shape {}".format(
# cls_preds.view(-1, self.num_classes)[fg_masks].shape))
# logger.info("cls_targets.shape {}".format(cls_targets.shape))
# logger.info("--------------------")

loss_cls = ( loss_cls = (
self.bcewithlog_loss( self.bcewithlog_loss(
cls_preds.view(-1, self.num_classes)[fg_masks], cls_targets cls_preds.view(-1, self.num_classes)[fg_masks], cls_targets
) )
).sum() / num_fg ).sum() / num_fg
if self.use_l1: if self.use_l1:
# if origin_preds.view(-1, 4)[fg_masks].shape != l1_targets.shape:
# logger.info("some shape mismatch")
# logger.info("origin_preds.view(-1, 4)[fg_masks].shape {}".format(
# origin_preds.view(-1, 4)[fg_masks].shape))
# logger.info("l1_targets.shape {}".format(l1_targets.shape))
# logger.info("--------------------")

loss_l1 = ( loss_l1 = (
self.l1_loss(origin_preds.view(-1, 4)[fg_masks], l1_targets) self.l1_loss(origin_preds.view(-1, 4)[fg_masks], l1_targets)
).sum() / num_fg ).sum() / num_fg
imgs, imgs,
mode="gpu", mode="gpu",
): ):
# TODO: check loss mismatches here
if mode == "cpu": if mode == "cpu":
print("------------CPU Mode for This Batch-------------") print("------------CPU Mode for This Batch-------------")
gt_bboxes_per_image = gt_bboxes_per_image.cpu().float() gt_bboxes_per_image = gt_bboxes_per_image.cpu().float()
num_gt, num_gt,
img_size img_size
) )
# if torch.isnan(cls_preds).sum().item() or torch.isnan(obj_preds).sum().item() or torch.isnan(
# bboxes_preds_per_image).sum().item():
# logger.info("cls_preds is Nan {}".format(torch.isnan(cls_preds).sum().item()))
# logger.info("obj_preds is Nan {}".format(torch.isnan(obj_preds).sum().item()))
# logger.info("bboxes_preds_per_image is Nan {}".format(torch.isnan(bboxes_preds_per_image).sum().item()))


bboxes_preds_per_image = bboxes_preds_per_image[fg_mask] bboxes_preds_per_image = bboxes_preds_per_image[fg_mask]
cls_preds_ = cls_preds[batch_idx][fg_mask] cls_preds_ = cls_preds[batch_idx][fg_mask]
.unsqueeze(1) .unsqueeze(1)
.repeat(1, num_in_boxes_anchor, 1) .repeat(1, num_in_boxes_anchor, 1)
) )
pair_wise_ious_loss = -torch.log(pair_wise_ious + 1e-8)


pair_wise_ious_loss = -torch.log(pair_wise_ious + 1e-8)
# if torch.isnan(pair_wise_ious_loss).sum().item():
# logger.info("pair_wise_ious_loss is Nan {}".format(torch.isnan(pair_wise_ious_loss).sum().item()))
if mode == "cpu": if mode == "cpu":
cls_preds_, obj_preds_ = cls_preds_.cpu(), obj_preds_.cpu() cls_preds_, obj_preds_ = cls_preds_.cpu(), obj_preds_.cpu()


cls_preds_.float().unsqueeze(0).repeat(num_gt, 1, 1).sigmoid_() cls_preds_.float().unsqueeze(0).repeat(num_gt, 1, 1).sigmoid_()
* obj_preds_.float().unsqueeze(0).repeat(num_gt, 1, 1).sigmoid_() * obj_preds_.float().unsqueeze(0).repeat(num_gt, 1, 1).sigmoid_()
) )

pair_wise_cls_loss = F.binary_cross_entropy( pair_wise_cls_loss = F.binary_cross_entropy(
cls_preds_.sqrt_(), gt_cls_per_image, reduction="none" cls_preds_.sqrt_(), gt_cls_per_image, reduction="none"
).sum(-1) ).sum(-1)

Loading…
Cancel
Save