3 years ago · 64d356450f
--- a/experiments/bytetrack_on_17half.sh
+++ b/experiments/bytetrack_on_17half.sh
@@ -0,0 +1,18 @@
 #PBS -N bytetrack_17_on_17half
 #PBS -m abe
 #PBS -M [email protected]
 #PBS -l nodes=1:ppn=1:gpus=1
 #PBS -q cuda9


 export LD_LIBRARY_PATH=/share/apps/cuda/cuda-10.1/lib64:$LD_LIBRARY_PATH
 export PATH=/share/apps/cuda/cuda-10.1/bin/:$PATH
 source /share/apps/Anaconda/anaconda3.6/bin/activate abdollahpour.ce.sharif
 conda activate abd_env


 cd /home/abdollahpour.ce.sharif/ByteTrack



 python tools/track.py -t mot -f exps/example/mot/yolox_x_mot17_half.py -d 1 -b 1 --fp16 -c /home/abdollahpour.ce.sharif/ByteTrackModels/bytetrack_x_mot17.pth.tar --local_rank 0 -expn bytetrack_17_on_17half
--- a/experiments/bytetrack_on_20.sh
+++ b/experiments/bytetrack_on_20.sh
@@ -0,0 +1,18 @@
 #PBS -N bytetrack_17_on_20
 #PBS -m abe
 #PBS -M [email protected]
 #PBS -l nodes=1:ppn=1:gpus=1
 #PBS -q cuda9


 export LD_LIBRARY_PATH=/share/apps/cuda/cuda-10.1/lib64:$LD_LIBRARY_PATH
 export PATH=/share/apps/cuda/cuda-10.1/bin/:$PATH
 source /share/apps/Anaconda/anaconda3.6/bin/activate abdollahpour.ce.sharif
 conda activate abd_env


 cd /home/abdollahpour.ce.sharif/ByteTrack



 python tools/track.py -t mot -f exps/example/mot/yolox_x_mot20_on_mot20.py -d 1 -b 1 --fp16 -c /home/abdollahpour.ce.sharif/ByteTrackModels/bytetrack_x_mot17.pth.tar --local_rank 0 -expn bytetrack_17_on_20 --mot20
--- a/experiments/meta_gpu_resume.sh
+++ b/experiments/meta_gpu_resume.sh
@@ -8,6 +8,7 @@
 export LD_LIBRARY_PATH=/share/apps/cuda/cuda-10.1/lib64:$LD_LIBRARY_PATH
 export PATH=/share/apps/cuda/cuda-10.1/bin/:$PATH


 source /share/apps/Anaconda/anaconda3.6/bin/activate abdollahpour.ce.sharif
 conda activate abd_env

@@ -16,4 +17,4 @@ cd /home/abdollahpour.ce.sharif/ByteTrack



 python tools/train.py -t metamot -f exps/example/metamot/yolox_x_mot17_on_mot20.py -d 1 -b 1 --fp16 -o --resume --start_epoch 2 -c /home/abdollahpour.ce.sharif/ByteTrack/meta_experiments/train_17_on_20/latest_ckpt.pth.tar --local_rank 0 -expn train_17_on_20_resume
 python tools/train.py -t metamot -f exps/example/metamot/yolox_x_mot17_on_mot20.py -d 1 -b 1 --fp16 -o --resume --start_epoch 6 -c /home/abdollahpour.ce.sharif/ByteTrack/meta_experiments/train_17_on_20_resume/latest_ckpt.pth.tar --local_rank 0 -expn train_17_on_20_resume2
--- a/experiments/test.sh
+++ b/experiments/test.sh
@@ -0,0 +1,11 @@
 export LD_LIBRARY_PATH=/share/apps/cuda/cuda-10.1/lib64:$LD_LIBRARY_PATH
 export PATH=/share/apps/cuda/cuda-10.1/bin/:$PATH
 source /share/apps/Anaconda/anaconda3.6/bin/activate abdollahpour.ce.sharif
 conda activate abd_env


 cd /home/abdollahpour.ce.sharif/ByteTrack



 python tools/test.py
--- a/experiments/track_metamot17_on_17.sh
+++ b/experiments/track_metamot17_on_17.sh
@@ -0,0 +1,18 @@
 #PBS -N track_metamot17_on_17_no_adapt
 #PBS -m abe
 #PBS -M [email protected]
 #PBS -l nodes=1:ppn=1:gpus=1
 #PBS -q cuda9


 export LD_LIBRARY_PATH=/share/apps/cuda/cuda-10.1/lib64:$LD_LIBRARY_PATH
 export PATH=/share/apps/cuda/cuda-10.1/bin/:$PATH
 source /share/apps/Anaconda/anaconda3.6/bin/activate abdollahpour.ce.sharif
 conda activate abd_env


 cd /home/abdollahpour.ce.sharif/ByteTrack



 python tools/track.py -t metamot -f exps/example/metamot/yolox_x_mot17_on_mot17.py -d 1 -b 1 --fp16 -c /home/abdollahpour.ce.sharif/ByteTrack/meta_experiments/train_17_on_20_resume2/best_ckpt.pth.tar --local_rank 0 -expn track_metamot17_on_17
--- a/experiments/track_on_20.sh
+++ b/experiments/track_on_20.sh
@@ -0,0 +1,18 @@
 #PBS -N track_17_on_20_ada_12
 #PBS -m abe
 #PBS -M [email protected]
 #PBS -l nodes=1:ppn=1:gpus=1
 #PBS -q cuda9


 export LD_LIBRARY_PATH=/share/apps/cuda/cuda-10.1/lib64:$LD_LIBRARY_PATH
 export PATH=/share/apps/cuda/cuda-10.1/bin/:$PATH
 source /share/apps/Anaconda/anaconda3.6/bin/activate abdollahpour.ce.sharif
 conda activate abd_env


 cd /home/abdollahpour.ce.sharif/ByteTrack



 python tools/track.py -t metamot -f exps/example/metamot/yolox_x_mot17_on_mot20.py -d 1 -b 1 -c /home/abdollahpour.ce.sharif/ByteTrack/meta_experiments/train_17_on_20_resume2/best_ckpt.pth.tar --local_rank 0 -expn track_17_on_20_ada_12 --mot20 --adaptation_period 12
--- a/experiments/weak_label.sh
+++ b/experiments/weak_label.sh
@@ -0,0 +1,19 @@
 #PBS -N weak_labels
 #PBS -m abe
 #PBS -M [email protected]
 #PBS -l nodes=1:ppn=1:gpus=1
 #PBS -q cuda9

 export LD_LIBRARY_PATH=/share/apps/cuda/cuda-10.1/lib64:$LD_LIBRARY_PATH
 export PATH=/share/apps/cuda/cuda-10.1/bin/:$PATH


 source /share/apps/Anaconda/anaconda3.6/bin/activate abdollahpour.ce.sharif
 conda activate abd_env


 cd /home/abdollahpour.ce.sharif/ByteTrack



 python tools/generate_weak_labels.py
--- a/exps/example/metamot/yolox_x_mot17_on_mot17.py
+++ b/exps/example/metamot/yolox_x_mot17_on_mot17.py
@@ -0,0 +1,158 @@
 # encoding: utf-8
 import os
 import random
 import torch
 import torch.nn as nn
 import torch.distributed as dist

 from yolox.exp import MetaExp as MyMetaExp
 from yolox.data import get_yolox_datadir
 from os import listdir
 from os.path import isfile, join


 class Exp(MyMetaExp):
    def __init__(self):
        super(Exp, self).__init__()
        self.num_classes = 1
        self.depth = 1.33
        self.width = 1.25
        self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
        self.train_dir = '/home/abdollahpour.ce.sharif/ByteTrackData/MOT17/annotations'
        onlyfiles = [f for f in listdir(self.train_dir) if isfile(join(self.train_dir, f))]
        self.train_anns = [file for file in onlyfiles if file.__contains__('train') and file.__contains__('FRCNN')]
        # # TODO: remove
        # self.train_anns = self.train_anns[3:]

        self.val_dir = '/home/abdollahpour.ce.sharif/ByteTrackData/MOT17/annotations'
        onlyfiles = [f for f in listdir(self.val_dir) if isfile(join(self.val_dir, f))]
        self.val_anns = [file for file in onlyfiles if file.__contains__('train') and file.__contains__('FRCNN')]
        # self.val_anns = self.val_anns[-1:]
        print('train_anns', self.train_anns)
        print('val_anns', self.val_anns)
        self.input_size = (800, 1440)
        self.test_size = (800, 1440)
        # self.test_size = (736, 1920)
        self.random_size = (20, 36)
        self.max_epoch = 80
        self.print_interval = 250
        self.eval_interval = 5
        self.test_conf = 0.1
        self.nmsthre = 0.7
        self.no_aug_epochs = 10
        # self.basic_lr_per_img = 0.001 / 64.0
        self.basic_lr_per_img = 0.0001 / 64.0
        self.warmup_epochs = 1

    def get_data_loaders(self, batch_size, is_distributed, no_aug=False):
        from yolox.data import (
            MOTDataset,
            TrainTransform,
            YoloBatchSampler,
            DataLoader,
            InfiniteSampler,
            MosaicDetection,
        )
        train_loaders = []
        for train_ann in self.train_anns:
            dataset = MOTDataset(
                data_dir=os.path.join(get_yolox_datadir(), "MOT17"),
                json_file=train_ann,
                name='train',
                img_size=self.input_size,
                preproc=TrainTransform(
                    rgb_means=(0.485, 0.456, 0.406),
                    std=(0.229, 0.224, 0.225),
                    max_labels=500,
                ),
            )
            dataset = MosaicDetection(
                dataset,
                mosaic=not no_aug,
                img_size=self.input_size,
                preproc=TrainTransform(
                    rgb_means=(0.485, 0.456, 0.406),
                    std=(0.229, 0.224, 0.225),
                    max_labels=1000,
                ),
                degrees=self.degrees,
                translate=self.translate,
                scale=self.scale,
                shear=self.shear,
                perspective=self.perspective,
                enable_mixup=self.enable_mixup,
            )

            self.dataset = dataset

            if is_distributed:
                batch_size = batch_size // dist.get_world_size()

            sampler = InfiniteSampler(
                len(self.dataset), seed=self.seed if self.seed else 0
            )

            batch_sampler = YoloBatchSampler(
                sampler=sampler,
                batch_size=batch_size,
                drop_last=False,
                input_dimension=self.input_size,
                mosaic=not no_aug,
            )

            dataloader_kwargs = {"num_workers": self.data_num_workers, "pin_memory": True}
            dataloader_kwargs["batch_sampler"] = batch_sampler
            train_loader = DataLoader(self.dataset, **dataloader_kwargs)
            train_loaders.append(train_loader)

        return train_loaders

    def get_eval_loaders(self, batch_size, is_distributed, testdev=False):
        from yolox.data import MOTDataset, ValTransform, ValTransformWithPseudo
        val_loaders = []
        for val_ann in self.val_anns:
            valdataset = MOTDataset(
                data_dir=os.path.join(get_yolox_datadir(), "MOT17"),
                json_file=val_ann,
                img_size=self.test_size,
                name='train',  # change to train when running on training set
                preproc=ValTransformWithPseudo(
                    rgb_means=(0.485, 0.456, 0.406),
                    std=(0.229, 0.224, 0.225),
                ),
            )

            if is_distributed:
                batch_size = batch_size // dist.get_world_size()
                sampler = torch.utils.data.distributed.DistributedSampler(
                    valdataset, shuffle=False
                )
            else:
                sampler = torch.utils.data.SequentialSampler(valdataset)

            dataloader_kwargs = {
                "num_workers": self.data_num_workers,
                "pin_memory": True,
                "sampler": sampler,
            }
            dataloader_kwargs["batch_size"] = batch_size
            val_loader = torch.utils.data.DataLoader(valdataset, **dataloader_kwargs)
            val_loaders.append(val_loader)
        return val_loaders

    def get_evaluator(self, batch_size, is_distributed, testdev=False):
        from yolox.evaluators import COCOEvaluator

        val_loaders = self.get_eval_loaders(batch_size, is_distributed, testdev=testdev)
        evaluators = []
        for val_loader in val_loaders:
            evaluator = COCOEvaluator(
                dataloader=val_loader,
                img_size=self.test_size,
                confthre=self.test_conf,
                nmsthre=self.nmsthre,
                num_classes=self.num_classes,
                testdev=testdev,
            )
            evaluators.append(evaluator)
        return evaluators
--- a/exps/example/metamot/yolox_x_mot17_on_mot20.py
+++ b/exps/example/metamot/yolox_x_mot17_on_mot20.py
@@ -28,6 +28,7 @@ class Exp(MyMetaExp):
        onlyfiles = [f for f in listdir(self.val_dir) if isfile(join(self.val_dir, f))]
        self.val_anns = [file for file in onlyfiles if file.__contains__('train') and file.__contains__(
            'MOT20')]
        # self.val_anns = self.val_anns[-1:]
        print('train_anns', self.train_anns)
        print('val_anns', self.val_anns)
        self.input_size = (800, 1440)
@@ -35,12 +36,13 @@ class Exp(MyMetaExp):
        # self.test_size = (736, 1920)
        self.random_size = (20, 36)
        self.max_epoch = 80
        self.print_interval = 100
        self.print_interval = 250
        self.eval_interval = 5
        self.test_conf = 0.001
        self.nmsthre = 0.7
        self.no_aug_epochs = 10
        self.basic_lr_per_img = 0.001 / 64.0
        # self.basic_lr_per_img = 0.001 / 64.0
        self.basic_lr_per_img = 0.0001 / 64.0
        self.warmup_epochs = 1

    def get_data_loaders(self, batch_size, is_distributed, no_aug=False):
@@ -107,7 +109,7 @@ class Exp(MyMetaExp):
        return train_loaders

    def get_eval_loaders(self, batch_size, is_distributed, testdev=False):
        from yolox.data import MOTDataset, ValTransform
        from yolox.data import MOTDataset, ValTransform, ValTransformWithPseudo
        val_loaders = []
        for val_ann in self.val_anns:
            valdataset = MOTDataset(
@@ -115,10 +117,11 @@ class Exp(MyMetaExp):
                json_file=val_ann,
                img_size=self.test_size,
                name='train',  # change to train when running on training set
                preproc=ValTransform(
                preproc=ValTransformWithPseudo(
                    rgb_means=(0.485, 0.456, 0.406),
                    std=(0.229, 0.224, 0.225),
                ),
                load_weak=True
            )

            if is_distributed:
@@ -142,13 +145,16 @@ class Exp(MyMetaExp):
    def get_evaluator(self, batch_size, is_distributed, testdev=False):
        from yolox.evaluators import COCOEvaluator

        val_loader = self.get_eval_loaders(batch_size, is_distributed, testdev=testdev)
        evaluator = COCOEvaluator(
            dataloader=val_loader,
            img_size=self.test_size,
            confthre=self.test_conf,
            nmsthre=self.nmsthre,
            num_classes=self.num_classes,
            testdev=testdev,
        )
        return evaluator
        val_loaders = self.get_eval_loaders(batch_size, is_distributed, testdev=testdev)
        evaluators = []
        for val_loader in val_loaders:
            evaluator = COCOEvaluator(
                dataloader=val_loader,
                img_size=self.test_size,
                confthre=self.test_conf,
                nmsthre=self.nmsthre,
                num_classes=self.num_classes,
                testdev=testdev,
            )
            evaluators.append(evaluator)
        return evaluators
--- a/exps/example/mot/yolox_x_mot17_half.py
+++ b/exps/example/mot/yolox_x_mot17_half.py
@@ -40,7 +40,7 @@ class Exp(MyExp):
        )

        dataset = MOTDataset(
            data_dir=os.path.join(get_yolox_datadir(), "mot"),
            data_dir=os.path.join(get_yolox_datadir(), "MOT17"),
            json_file=self.train_ann,
            name='train',
            img_size=self.input_size,
@@ -95,7 +95,7 @@ class Exp(MyExp):
        from yolox.data import MOTDataset, ValTransform

        valdataset = MOTDataset(
            data_dir=os.path.join(get_yolox_datadir(), "mot"),
            data_dir=os.path.join(get_yolox_datadir(), "MOT17"),
            json_file=self.val_ann,
            img_size=self.test_size,
            name='train',
--- a/exps/example/mot/yolox_x_mot20_on_mot20.py
+++ b/exps/example/mot/yolox_x_mot20_on_mot20.py
@@ -0,0 +1,139 @@
 # encoding: utf-8
 import os
 import random
 import torch
 import torch.nn as nn
 import torch.distributed as dist

 from yolox.exp import Exp as MyExp
 from yolox.data import get_yolox_datadir

 class Exp(MyExp):
    def __init__(self):
        super(Exp, self).__init__()
        self.num_classes = 1
        self.depth = 1.33
        self.width = 1.25
        self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
        self.train_ann = "train.json"
        self.val_ann = "train.json"   # change to train.json when running on training set
        self.input_size = (896, 1600)
        self.test_size = (896, 1600)
        #self.test_size = (736, 1920)
        self.random_size = (20, 36)
        self.max_epoch = 80
        self.print_interval = 20
        self.eval_interval = 5
        self.test_conf = 0.001
        self.nmsthre = 0.7
        self.no_aug_epochs = 10
        self.basic_lr_per_img = 0.001 / 64.0
        self.warmup_epochs = 1

    def get_data_loader(self, batch_size, is_distributed, no_aug=False):
        from yolox.data import (
            MOTDataset,
            TrainTransform,
            YoloBatchSampler,
            DataLoader,
            InfiniteSampler,
            MosaicDetection,
        )

        dataset = MOTDataset(
            data_dir=os.path.join(get_yolox_datadir(), "MOT20"),
            json_file=self.train_ann,
            name='',
            img_size=self.input_size,
            preproc=TrainTransform(
                rgb_means=(0.485, 0.456, 0.406),
                std=(0.229, 0.224, 0.225),
                max_labels=600,
            ),
        )

        dataset = MosaicDetection(
            dataset,
            mosaic=not no_aug,
            img_size=self.input_size,
            preproc=TrainTransform(
                rgb_means=(0.485, 0.456, 0.406),
                std=(0.229, 0.224, 0.225),
                max_labels=1200,
            ),
            degrees=self.degrees,
            translate=self.translate,
            scale=self.scale,
            shear=self.shear,
            perspective=self.perspective,
            enable_mixup=self.enable_mixup,
        )

        self.dataset = dataset

        if is_distributed:
            batch_size = batch_size // dist.get_world_size()

        sampler = InfiniteSampler(
            len(self.dataset), seed=self.seed if self.seed else 0
        )

        batch_sampler = YoloBatchSampler(
            sampler=sampler,
            batch_size=batch_size,
            drop_last=False,
            input_dimension=self.input_size,
            mosaic=not no_aug,
        )

        dataloader_kwargs = {"num_workers": self.data_num_workers, "pin_memory": True}
        dataloader_kwargs["batch_sampler"] = batch_sampler
        train_loader = DataLoader(self.dataset, **dataloader_kwargs)

        return train_loader

    def get_eval_loader(self, batch_size, is_distributed, testdev=False):
        from yolox.data import MOTDataset, ValTransform

        valdataset = MOTDataset(
            data_dir=os.path.join(get_yolox_datadir(), "MOT20"),
            json_file=self.val_ann,
            img_size=self.test_size,
            name='train', # change to train when running on training set
            preproc=ValTransform(
                rgb_means=(0.485, 0.456, 0.406),
                std=(0.229, 0.224, 0.225),
            ),
        )

        if is_distributed:
            batch_size = batch_size // dist.get_world_size()
            sampler = torch.utils.data.distributed.DistributedSampler(
                valdataset, shuffle=False
            )
        else:
            sampler = torch.utils.data.SequentialSampler(valdataset)

        dataloader_kwargs = {
            "num_workers": self.data_num_workers,
            "pin_memory": True,
            "sampler": sampler,
        }
        dataloader_kwargs["batch_size"] = batch_size
        val_loader = torch.utils.data.DataLoader(valdataset, **dataloader_kwargs)

        return val_loader

    def get_evaluator(self, batch_size, is_distributed, testdev=False):
        from yolox.evaluators import COCOEvaluator

        val_loader = self.get_eval_loader(batch_size, is_distributed, testdev=testdev)
        evaluator = COCOEvaluator(
            dataloader=val_loader,
            img_size=self.test_size,
            confthre=self.test_conf,
            nmsthre=self.nmsthre,
            num_classes=self.num_classes,
            testdev=testdev,
        )
        return evaluator
--- a/tools/generate_mot20_weak_labels
+++ b/tools/generate_mot20_weak_labels
@@ -0,0 +1,109 @@
 import os
 import numpy as np
 import json
 import cv2

 # Use the same script for MOT16
 DATA_PATH = '/media/external_10TB/10TB/vision/ByteTrackData/MOT20'
 OUT_PATH = os.path.join(DATA_PATH, 'annotations')
 SPLITS = ['train', 'test']  # --> split training data to train_half and val_half.
 HALF_VIDEO = True
 CREATE_SPLITTED_ANN = True
 CREATE_SPLITTED_DET = True

 if __name__ == '__main__':

    if not os.path.exists(OUT_PATH):
        os.makedirs(OUT_PATH)

    for split in SPLITS:
        if split == "test":
            data_path = os.path.join(DATA_PATH, 'test')
        else:
            data_path = os.path.join(DATA_PATH, 'train')
        seqs = os.listdir(data_path)
        for seq in sorted(seqs):
            out_path = os.path.join(OUT_PATH, '{}_{}_weak.json'.format(split, seq))
            out = {'images': [], 'annotations': [], 'videos': [],
                   'categories': [{'id': 1, 'name': 'pedestrian'}]}
            image_cnt = 0
            ann_cnt = 0
            video_cnt = 0
            tid_curr = 0
            tid_last = -1
            if '.DS_Store' in seq:
                continue
            video_cnt += 1  # video sequence number.
            out['videos'].append({'id': video_cnt, 'file_name': seq})
            seq_path = os.path.join(data_path, seq)
            img_path = os.path.join(seq_path, 'img1')
            ann_path = os.path.join(seq_path, 'gt/gt.txt')
            images = os.listdir(img_path)
            num_images = len([image for image in images if 'jpg' in image])  # half and half

            if HALF_VIDEO and ('half' in split):
                image_range = [0, num_images // 2] if 'train' in split else \
                    [num_images // 2 + 1, num_images - 1]
            else:
                image_range = [0, num_images - 1]

            for i in range(num_images):
                if i < image_range[0] or i > image_range[1]:
                    continue
                img = cv2.imread(os.path.join(data_path, '{}/img1/{:06d}.jpg'.format(seq, i + 1)))
                height, width = img.shape[:2]
                image_info = {'file_name': '{}/img1/{:06d}.jpg'.format(seq, i + 1),  # image name.
                              'id': image_cnt + i + 1,  # image number in the entire training set.
                              'frame_id': i + 1 - image_range[0],
                              # image number in the video sequence, starting from 1.
                              'prev_image_id': image_cnt + i if i > 0 else -1,
                              # image number in the entire training set.
                              'next_image_id': image_cnt + i + 2 if i < num_images - 1 else -1,
                              'video_id': video_cnt,
                              'height': height, 'width': width}
                out['images'].append(image_info)
            print('{}: {} images'.format(seq, num_images))
            if split != 'test':
                det_path = os.path.join(seq_path, 'det/det.txt')
                anns = np.loadtxt(ann_path, dtype=np.float32, delimiter=',')
                dets = np.loadtxt(det_path, dtype=np.float32, delimiter=',')

                print('{} ann images'.format(int(anns[:, 0].max())))
                for i in range(anns.shape[0]):
                    frame_id = int(anns[i][0])
                    if frame_id - 1 < image_range[0] or frame_id - 1 > image_range[1]:
                        continue
                    track_id = int(anns[i][1])
                    cat_id = int(anns[i][7])
                    ann_cnt += 1
                    if not ('15' in DATA_PATH):
                        # if not (float(anns[i][8]) >= 0.25):  # visibility.
                        # continue
                        if not (int(anns[i][6]) == 1):  # whether ignore.
                            continue
                        if int(anns[i][7]) in [3, 4, 5, 6, 9, 10, 11]:  # Non-person
                            continue
                        if int(anns[i][7]) in [2, 7, 8, 12]:  # Ignored person
                            # category_id = -1
                            continue
                        else:
                            category_id = 1  # pedestrian(non-static)
                            if not track_id == tid_last:
                                tid_curr += 1
                                tid_last = track_id
                    else:
                        category_id = 1
                    ann = {'id': ann_cnt,
                           'category_id': category_id,
                           'image_id': image_cnt + frame_id,
                           'track_id': -1,
                           'bbox': '',
                           'conf': '',
                           'iscrowd': 0,
                           'area': ''}
                    # float(anns[i][4] * anns[i][5])
                    out['annotations'].append(ann)
            image_cnt += num_images
            print(tid_curr, tid_last)
            print('loaded {} for {} images and {} samples'.format(split, len(out['images']), len(out['annotations'])))
            json.dump(out, open(out_path, 'w'))
--- a/tools/generate_weak_labels.py
+++ b/tools/generate_weak_labels.py
@@ -0,0 +1,323 @@
 # Mahdi Abdollahpour, 22/12/2021, 02:26 PM, PyCharm, ByteTrack

 import os
 import time
 from loguru import logger
 # from opts import opts

 from os import listdir
 from os.path import isfile, join
 import cv2
 import numpy as np
 import torch

 from yolox.data.data_augment import ValTransform
 # from yolox.data.datasets import COCO_CLASSES
 from yolox.exp import get_exp
 from yolox.utils import fuse_model, get_model_info, postprocess, vis
 from yolox import statics

 COCO_CLASSES = (
    "person",
    "bicycle",
    "car",
    "motorcycle",
    "airplane",
    "bus",
    "train",
    "truck",
    "boat",
    "traffic light",
    "fire hydrant",
    "stop sign",
    "parking meter",
    "bench",
    "bird",
    "cat",
    "dog",
    "horse",
    "sheep",
    "cow",
    "elephant",
    "bear",
    "zebra",
    "giraffe",
    "backpack",
    "umbrella",
    "handbag",
    "tie",
    "suitcase",
    "frisbee",
    "skis",
    "snowboard",
    "sports ball",
    "kite",
    "baseball bat",
    "baseball glove",
    "skateboard",
    "surfboard",
    "tennis racket",
    "bottle",
    "wine glass",
    "cup",
    "fork",
    "knife",
    "spoon",
    "bowl",
    "banana",
    "apple",
    "sandwich",
    "orange",
    "broccoli",
    "carrot",
    "hot dog",
    "pizza",
    "donut",
    "cake",
    "chair",
    "couch",
    "potted plant",
    "bed",
    "dining table",
    "toilet",
    "tv",
    "laptop",
    "mouse",
    "remote",
    "keyboard",
    "cell phone",
    "microwave",
    "oven",
    "toaster",
    "sink",
    "refrigerator",
    "book",
    "clock",
    "vase",
    "scissors",
    "teddy bear",
    "hair drier",
    "toothbrush",
 )


 IMAGE_EXT = [".jpg", ".jpeg", ".webp", ".bmp", ".png"]

 use_cuda = True

 MOT = 'MOT20'
 section = 'train'

 root_dir = os.path.join(statics.DATA_PATH, MOT, section)

 classes = ['person', 'bicycle', 'car', 'motorcycle', 'truck', 'bus']
 fuse = False


 def get_labels(bboxes, cls, scores, th, tw):
    id = 0
    labels = []

    # print(pred['scores'])
    n, _ = bboxes.shape
    for i in range(n):

        if COCO_CLASSES[int(cls[i])] not in classes:
            # print('Rejecting',COCO_CLASSES[int(cls[i])],scores[i])
            continue
        if use_cuda:
            box = bboxes[i, :].detach().cpu().numpy()
        else:
            box = bboxes[i, :].detach().numpy()
        ## TODO: check if matches
        # print(box[0], box[1], box[2], box[3], '--', th, tw)
        # print(box[0] / th, box[1] / tw, box[2] / th, box[3] / tw)
        x = box[0] / th
        y = box[1] / tw
        w = (box[2] - box[0]) / th
        h = (box[3] - box[1]) / tw
        x += w / 2
        y += h / 2
        label = [0, id, x, y, w, h]
        # label = [0, id, box[0], box[1], (box[2] - box[0]), (box[3] - box[1])]
        id += 1
        labels.append(label)
    # print(id)
    labels0 = np.array(labels)
    return labels0


 class Predictor(object):
    def __init__(
            self,
            model,
            exp,
            cls_names=COCO_CLASSES,
            trt_file=None,
            decoder=None,
            device="cpu",
            fp16=False,
            legacy=False,
    ):
        self.model = model
        self.cls_names = cls_names
        self.decoder = decoder
        self.num_classes = exp.num_classes
        self.confthre = 0.1
        self.nmsthre = 0.3
        self.test_size = exp.test_size
        self.device = device
        self.fp16 = fp16
        self.preproc = ValTransform()
        # if trt_file is not None:
        #     from torch2trt import TRTModule
        #
        #     model_trt = TRTModule()
        #     model_trt.load_state_dict(torch.load(trt_file))
        #
        #     x = torch.ones(1, 3, exp.test_size[0], exp.test_size[1]).cuda()
        #     self.model(x)
        #     self.model = model_trt

    def inference(self, img):
        img_info = {"id": 0}
        if isinstance(img, str):
            img_info["file_name"] = os.path.basename(img)
            img = cv2.imread(img)
        else:
            img_info["file_name"] = None

        height, width = img.shape[:2]
        img_info["height"] = height
        img_info["width"] = width
        img_info["raw_img"] = img

        ratio = min(self.test_size[0] / img.shape[0], self.test_size[1] / img.shape[1])
        # print(self.test_size[0] , img.shape[0], self.test_size[1] , img.shape[1])
        img_info["ratio"] = ratio

        img, _ = self.preproc(img, None, self.test_size)
        img = torch.from_numpy(img).unsqueeze(0)
        img = img.float()
        if self.device == "gpu":
            img = img.cuda()
            # if self.fp16:
            #     img = img.half()  # to FP16

        with torch.no_grad():
            t0 = time.time()
            outputs = self.model(img)
            if self.decoder is not None:
                outputs = self.decoder(outputs, dtype=outputs.type())
            outputs = postprocess(
                outputs, self.num_classes, self.confthre,
                self.nmsthre
            )
            # logger.info("Infer time: {:.4f}s".format(time.time() - t0))
        # print(img.shape)
        _, _, tw, th = img.shape
        img_info['tw'] = tw
        img_info['th'] = th
        return outputs, img_info

    def visual(self, output, img_info, cls_conf=0.35):
        ratio = img_info["ratio"]
        img = img_info["raw_img"]
        if output is None:
            return img
        output = output.cpu()

        bboxes = output[:, 0:4]

        # preprocessing: resize
        bboxes /= ratio

        cls = output[:, 6]
        scores = output[:, 4] * output[:, 5]

        vis_res = vis(img, bboxes, scores, cls, cls_conf, self.cls_names)
        return vis_res


 def image_demo(predictor, path):
    folders = [f for f in listdir(path)]
    # folders = folders[3:]

    for folder in folders:
        print(folder)
        images_folder = join(join(path, folder), 'img1')
        images = [f for f in listdir(images_folder) if isfile(join(images_folder, f))]
        images = [a for a in images if a.endswith('.jpg')]
        images.sort()

        for i, image_name in enumerate(images):
            if i % 300 == 0:
                print(folder, i)
            outputs, img_info = predictor.inference(join(images_folder, image_name))

            ratio = img_info["ratio"]
            # print(ratio)
            img = img_info["raw_img"]
            output = outputs[0]
            if output is None:
                continue
            output = output.cpu()

            bboxes = output[:, 0:4]

            # preprocessing: resize
            bboxes /= ratio

            cls = output[:, 6]
            scores = output[:, 4] * output[:, 5]
            # print('cls',cls)
            labels0 = get_labels(bboxes, cls, scores, img_info["width"], img_info["height"])

            # out_path = join(images_folder, 'weak_' + imm + '.npy')
            # print(imm)
            np.savetxt(join(images_folder, image_name + '_weak_' + model_name + '.txt'), labels0, delimiter=' ')


 def main(exp, ckpt_file):
    model = exp.get_model()

    if use_cuda:
        model = model.cuda()
        device = 'gpu'
    else:
        device = 'cpu'
    model.eval()

    logger.info("loading checkpoint")
    ckpt = torch.load(ckpt_file, map_location="cpu")
    # load the model state dict
    model.load_state_dict(ckpt["model"])
    logger.info("loaded checkpoint done.")

    if fuse:
        logger.info("\tFusing model...")
        model = fuse_model(model)

    trt_file = None
    decoder = None

    predictor = Predictor(
        model, exp, COCO_CLASSES, trt_file, decoder,
        device, False, False,
    )
    current_time = time.localtime()
    image_demo(predictor, root_dir)


 model_name = 'yolox-x'
 # cuda = torch.device('cuda:1')
 if __name__ == "__main__":
    # print(COCO_CLASSES)
    # if use_cuda:
    # torch.cuda.set_device(1)
    # with torch.cuda.device(1):
    # os.environ['CUDA_VISIBLE_DEVICES'] = '1'
    ckpt_file = '/home/abdollahpour.ce.sharif/yolox_x.pth'

    exp = get_exp(None, model_name)
    main(exp, ckpt_file)
--- a/tools/test.py
+++ b/tools/test.py
@@ -0,0 +1,23 @@
 # Mahdi Abdollahpour, 30/12/2021, 07:47 PM, PyCharm, ByteTrack
 from yolox.core import launch
 from yolox.data import MOTDataset, ValTransform, ValTransformWithPseudo
 test_size = (896, 1600)
 import os
 from yolox.data import get_yolox_datadir
 if __name__ == "__main__":
    valdataset = MOTDataset(
        data_dir=os.path.join(get_yolox_datadir(), "MOT20"),
        json_file='train_MOT20-01.json',
        img_size=test_size,
        name='train',  # change to train when running on training set
        preproc=ValTransformWithPseudo(
            rgb_means=(0.485, 0.456, 0.406),
            std=(0.229, 0.224, 0.225),
        ),
        load_weak=True
    )


    for batch in valdataset:
        print(batch)
        exit()
--- a/tools/track.py
+++ b/tools/track.py
@@ -18,6 +18,8 @@ import glob
 import motmetrics as mm
 from collections import OrderedDict
 from pathlib import Path
 import learn2learn as l2l
 import yolox.statics as statics


 def make_parser():
@@ -26,6 +28,10 @@ def make_parser():
    parser.add_argument("-expn", "--experiment-name", type=str, default=None)
    parser.add_argument("-n", "--name", type=str, default=None, help="model name")

    parser.add_argument(
        "--adaptation_period", default=4, type=int, help="if 4, then adapts to one batch in four batches"
    )

    # distributed
    parser.add_argument(
        "--dist-backend", default="nccl", type=str, help="distributed backend"
@@ -109,6 +115,8 @@ def make_parser():
    parser.add_argument("--match_thresh", type=float, default=0.9, help="matching threshold for tracking")
    parser.add_argument("--min-box-area", type=float, default=100, help='filter out tiny boxes')
    parser.add_argument("--mot20", dest="mot20", default=False, action="store_true", help="test mot20.")

    parser.add_argument("--use_existing_files", default=False, action="store_true", help="to use already created files")
    return parser


@@ -126,38 +134,18 @@ def compare_dataframes(gts, ts):
    return accs, names


 def process_loader(args, val_loader, model, is_distributed):
    if args.seed is not None:
        random.seed(args.seed)
        torch.manual_seed(args.seed)
        cudnn.deterministic = True
        warnings.warn(
            "You have chosen to seed testing. This will turn on the CUDNN deterministic setting, "
        )

        # set environment variables for distributed training
    cudnn.benchmark = True
    rank = args.local_rank

    # rank = get_local_rank()

 def process_loader(args, exp, val_loader, model, is_distributed, trt_file, decoder, val_ann):
    file_name = os.path.join(exp.output_dir, args.experiment_name)

    rank = args.local_rank
    if rank == 0:
        os.makedirs(file_name, exist_ok=True)

    results_folder = os.path.join(file_name, "track_results")
    os.makedirs(results_folder, exist_ok=True)

    setup_logger(file_name, distributed_rank=rank, filename="val_log.txt", mode="a")
    logger.info("Args: {}".format(args))

    if args.conf is not None:
        exp.test_conf = args.conf
    if args.nms is not None:
        exp.nmsthre = args.nms
    if args.tsize is not None:
        exp.test_size = (args.tsize, args.tsize)
    adaptation_period = None
    if args.task == 'metamot':
        adaptation_period = args.adaptation_period

    evaluator = MOTEvaluator(
        args=args,
@@ -167,61 +155,35 @@ def process_loader(args, val_loader, model, is_distributed):
        nmsthre=exp.nmsthre,
        num_classes=exp.num_classes,
    )

    torch.cuda.set_device(rank)
    model.cuda(rank)
    model.eval()

    if not args.speed and not args.trt:
        if args.ckpt is None:
            ckpt_file = os.path.join(file_name, "best_ckpt.pth.tar")
        else:
            ckpt_file = args.ckpt
        logger.info("loading checkpoint")
        loc = "cuda:{}".format(rank)
        ckpt = torch.load(ckpt_file, map_location=loc)
        # load the model state dict
        model.load_state_dict(ckpt["model"])
        logger.info("loaded checkpoint done.")

    if is_distributed:
        model = DDP(model, device_ids=[rank])

    if args.fuse:
        logger.info("\tFusing model...")
        model = fuse_model(model)

    if args.trt:
        assert (
                not args.fuse and not is_distributed and args.batch_size == 1
        ), "TensorRT model is not support model fusing and distributed inferencing!"
        trt_file = os.path.join(file_name, "model_trt.pth")
        assert os.path.exists(
            trt_file
        ), "TensorRT model is not found!\n Run tools/trt.py first!"
        model.head.decode_in_inference = False
        decoder = model.head.decode_outputs
    else:
        trt_file = None
        decoder = None

    # start evaluate
    *_, summary = evaluator.evaluate(
        model, is_distributed, args.fp16, trt_file, decoder, exp.test_size, results_folder
        model, is_distributed, args.fp16, trt_file, decoder, exp.test_size, results_folder,
        adaptation_period=adaptation_period,
    )
    logger.info("\n" + summary)


 def eval_MOT(args, exp, val_ann=None):
    file_name = os.path.join(exp.output_dir, args.experiment_name)
    rank = args.local_rank
    if rank == 0:
        os.makedirs(file_name, exist_ok=True)

    results_folder = os.path.join(file_name, "track_results")
    os.makedirs(results_folder, exist_ok=True)

    # evaluate MOTA
    mm.lap.default_solver = 'lap'

    if exp.val_ann == 'val_half.json':
    if val_ann == 'val_half.json':
        gt_type = '_val_half'
    else:
        gt_type = ''
    print('gt_type', gt_type)
    if args.mot20:
        gtfiles = glob.glob(os.path.join('datasets/MOT20/train', '*/gt/gt{}.txt'.format(gt_type)))
        gtfiles = glob.glob(os.path.join(statics.DATA_PATH, 'MOT20/train', '*/gt/gt{}.txt'.format(gt_type)))
    else:
        gtfiles = glob.glob(os.path.join('datasets/mot/train', '*/gt/gt{}.txt'.format(gt_type)))
        gtfiles = glob.glob(os.path.join(statics.DATA_PATH, 'MOT17/train', '*/gt/gt{}.txt'.format(gt_type)))
    print('gt_files', gtfiles)
    tsfiles = [f for f in glob.glob(os.path.join(results_folder, '*.txt')) if
               not os.path.basename(f).startswith('eval')]
@@ -267,23 +229,114 @@ def process_loader(args, val_loader, model, is_distributed):
    logger.info('Completed')


 def load_model(args, exp, is_distributed):
    model = exp.get_model()
    logger.info("Model Summary: {}".format(get_model_info(model, exp.test_size)))
    if args.seed is not None:
        random.seed(args.seed)
        torch.manual_seed(args.seed)
        cudnn.deterministic = True
        warnings.warn(
            "You have chosen to seed testing. This will turn on the CUDNN deterministic setting, "
        )

        # set environment variables for distributed training
    cudnn.benchmark = True
    rank = args.local_rank

    # rank = get_local_rank()

    file_name = os.path.join(exp.output_dir, args.experiment_name)

    if rank == 0:
        os.makedirs(file_name, exist_ok=True)

    setup_logger(file_name, distributed_rank=rank, filename="val_log.txt", mode="a")
    logger.info("Args: {}".format(args))

    if args.conf is not None:
        exp.test_conf = args.conf
    if args.nms is not None:
        exp.nmsthre = args.nms
    if args.tsize is not None:
        exp.test_size = (args.tsize, args.tsize)

    if args.task == "metamot":
        model = l2l.algorithms.MAML(model, lr=exp.inner_lr, first_order=exp.first_order, allow_nograd=True)
    torch.cuda.set_device(rank)
    model.cuda(rank)
    model.eval()

    if not args.speed and not args.trt:
        if args.ckpt is None:
            ckpt_file = os.path.join(file_name, "best_ckpt.pth.tar")
        else:
            ckpt_file = args.ckpt

        logger.info("loading checkpoint")
        loc = "cuda:{}".format(rank)
        ckpt = torch.load(ckpt_file, map_location=loc)

        # handling meta models
        new_dict = {}
        if (not list(ckpt["model"].keys())[0].startswith('module')) and args.task == "metamot":
            for key in ckpt["model"].keys():
                if not key.startswith('module.'):
                    new_dict['module.' + key] = ckpt["model"][key]
                else:
                    new_dict[key] = ckpt["model"][key]
            del ckpt["model"]
            ckpt["model"] = new_dict

        # load the model state dict
        model.load_state_dict(ckpt["model"])
        logger.info("loaded checkpoint done.")

    if is_distributed:
        model = DDP(model, device_ids=[rank])

    if args.fuse:
        logger.info("\tFusing model...")
        model = fuse_model(model)

    if args.trt:
        assert (
                not args.fuse and not is_distributed and args.batch_size == 1
        ), "TensorRT model is not support model fusing and distributed inferencing!"
        trt_file = os.path.join(file_name, "model_trt.pth")
        assert os.path.exists(
            trt_file
        ), "TensorRT model is not found!\n Run tools/trt.py first!"
        model.head.decode_in_inference = False
        decoder = model.head.decode_outputs
    else:
        trt_file = None
        decoder = None

    return model, trt_file, decoder


@logger.catch
 def main(exp, args, num_gpu):
    is_distributed = num_gpu > 1
    print('is_distributed', is_distributed)
    print('num_gpu', num_gpu)

    model = exp.get_model()
    logger.info("Model Summary: {}".format(get_model_info(model, exp.test_size)))
    # logger.info("Model Structure:\n{}".format(str(model)))
    model, trt_file, decoder = load_model(args, exp, is_distributed)

    if args.task == 'metamot':
        val_loaders = exp.get_eval_loaders(args.batch_size, is_distributed, args.test)
        for val_loader in val_loaders:
            learner = model.clone()
            process_loader(args, val_loader, learner, is_distributed)
        if not args.use_existing_files:
            for val_loader, val_ann in zip(val_loaders, exp.val_anns):
                logger.info('processing loader...')
                process_loader(args, exp, val_loader, model, is_distributed, trt_file, decoder, val_ann)
        eval_MOT(args, exp)
    else:
        val_loader = exp.get_eval_loader(args.batch_size, is_distributed, args.test)
        process_loader(args, val_loader, model, is_distributed)
        if not args.use_existing_files:
            val_loader = exp.get_eval_loader(args.batch_size, is_distributed, args.test)
            process_loader(args, exp, val_loader, model, is_distributed, trt_file, decoder, exp.val_ann)
        eval_MOT(args, exp, exp.val_ann)


 if __name__ == "__main__":
--- a/yolox/core/meta_trainer.py
+++ b/yolox/core/meta_trainer.py
@@ -76,6 +76,7 @@ class MetaTrainer:
            self.after_train()

    def train_in_epoch(self):
        # self.evaluate_and_save_model()
        for self.epoch in range(self.start_epoch, self.max_epoch):
            self.before_epoch()
            self.train_in_task()
@@ -212,7 +213,7 @@ class MetaTrainer:
        # self.model = model
        self.model.train()

        self.evaluator = self.exp.get_evaluator(
        self.evaluators = self.exp.get_evaluators(
            batch_size=self.args.batch_size, is_distributed=self.is_distributed
        )
        # Tensorboard logger
@@ -320,6 +321,7 @@ class MetaTrainer:

            ckpt = torch.load(ckpt_file, map_location=self.device)

            # TODO: handle pretrained BYTETrack
            # handling meta models
            # new_dict = {}
            # for key in ckpt["model"].keys():
@@ -355,9 +357,10 @@ class MetaTrainer:
        return model

    def evaluate_and_save_model(self):
        logger.info("starting eval...")
        evalmodel = self.ema_model.ema if self.use_model_ema else self.model
        ap50_95, ap50, summary = self.exp.eval(
            evalmodel, self.evaluator, self.is_distributed
            evalmodel, self.evaluators, self.is_distributed
        )
        self.model.train()
        if self.rank == 0:
--- a/yolox/data/__init__.py
+++ b/yolox/data/__init__.py
@@ -2,7 +2,7 @@
 # -*- coding:utf-8 -*-
 # Copyright (c) Megvii, Inc. and its affiliates.

 from .data_augment import TrainTransform, ValTransform
 from .data_augment import TrainTransform, ValTransform,ValTransformWithPseudo
 from .data_prefetcher import DataPrefetcher
 from .dataloading import DataLoader, get_yolox_datadir
 from .datasets import *
--- a/yolox/data/data_augment.py
+++ b/yolox/data/data_augment.py
@@ -297,3 +297,83 @@ class ValTransform:
    def __call__(self, img, res, input_size):
        img, _ = preproc(img, input_size, self.means, self.std, self.swap)
        return img, np.zeros((1, 5))


 class ValTransformWithPseudo:
    """
    Defines the transformations that should be applied to test PIL image
    for input into the network

    dimension -> tensorize -> color adj

    Arguments:
        resize (int): input dimension to SSD
        rgb_means ((int,int,int)): average RGB of the dataset
            (104,117,123)
        swap ((int,int,int)): final order of channels

    Returns:
        transform (transform) : callable transform to be applied to test/val
        data
    """

    def __init__(self, rgb_means=None, std=None, swap=(2, 0, 1), max_labels=100):
        self.means = rgb_means
        self.swap = swap
        self.std = std
        self.max_labels = max_labels



    def __call__(self, image, targets, input_dim):
        boxes = targets[:, :4].copy()
        labels = targets[:, 4].copy()
        ids = targets[:, 5].copy()
        if len(boxes) == 0:
            targets = np.zeros((self.max_labels, 6), dtype=np.float32)
            image, r_o = preproc(image, input_dim, self.means, self.std)
            image = np.ascontiguousarray(image, dtype=np.float32)
            return image, targets

        image_o = image.copy()
        targets_o = targets.copy()
        height_o, width_o, _ = image_o.shape
        boxes_o = targets_o[:, :4]
        labels_o = targets_o[:, 4]
        ids_o = targets_o[:, 5]
        # bbox_o: [xyxy] to [c_x,c_y,w,h]
        boxes_o = xyxy2cxcywh(boxes_o)

        # image_t = _distort(image)
        image_t = image
        # image_t, boxes = _mirror(image_t, boxes)
        height, width, _ = image_t.shape
        image_t, r_ = preproc(image_t, input_dim, self.means, self.std)
        # boxes [xyxy] 2 [cx,cy,w,h]
        boxes = xyxy2cxcywh(boxes)
        boxes *= r_

        mask_b = np.minimum(boxes[:, 2], boxes[:, 3]) > 1
        boxes_t = boxes[mask_b]
        labels_t = labels[mask_b]
        ids_t = ids[mask_b]

        if len(boxes_t) == 0:
            image_t, r_o = preproc(image_o, input_dim, self.means, self.std)
            boxes_o *= r_o
            boxes_t = boxes_o
            labels_t = labels_o
            ids_t = ids_o

        labels_t = np.expand_dims(labels_t, 1)
        ids_t = np.expand_dims(ids_t, 1)

        targets_t = np.hstack((labels_t, boxes_t, ids_t))
        padded_labels = np.zeros((self.max_labels, 6))
        padded_labels[range(len(targets_t))[: self.max_labels]] = targets_t[
                                                                  : self.max_labels
                                                                  ]
        padded_labels = np.ascontiguousarray(padded_labels, dtype=np.float32)
        image_t = np.ascontiguousarray(image_t, dtype=np.float32)
        return image_t, padded_labels

--- a/yolox/data/datasets/mot.py
+++ b/yolox/data/datasets/mot.py
@@ -14,12 +14,13 @@ class MOTDataset(Dataset):
    """

    def __init__(
        self,
        data_dir=None,
        json_file="train_half.json",
        name="train",
        img_size=(608, 1088),
        preproc=None,
            self,
            data_dir=None,
            json_file="train_half.json",
            name="train",
            img_size=(608, 1088),
            preproc=None,
            load_weak=False,
    ):
        """
        COCO dataset initialization. Annotation data are read into memory by COCO API.
@@ -45,6 +46,7 @@ class MOTDataset(Dataset):
        self.name = name
        self.img_size = img_size
        self.preproc = preproc
        self.load_weak = load_weak

    def __len__(self):
        return len(self.ids)
@@ -98,9 +100,31 @@ class MOTDataset(Dataset):
        img_file = os.path.join(
            self.data_dir, self.name, file_name
        )
        head_tail = os.path.split(img_file)
        # label_path = os.path.join(head_tail[0],   head_tail[1].replace('.jpg','.txt'))

        if self.load_weak:
            weak_label_path = os.path.join(head_tail[0], head_tail[1] + '_weak_yolox-x.txt')
            # load weak labels from weak_label_path
            width = img_info[1]
            height = img_info[0]
            labels = np.loadtxt(weak_label_path)
            res = np.ones_like(labels)
            labels[2, :] *= width
            labels[4, :] *= width
            labels[3, :] *= height
            labels[5, :] *= height
            labels[4, :] += labels[2, :]
            labels[5, :] += labels[3, :]

            res[:, 0:4] = labels[:, -4:]
            res[:, 5] = labels[:, 1]
            # all are from class one
            # res[:, 4] = labels[:, 0]

        img = cv2.imread(img_file)
        # if img is None:
        #     print('img_file is None',img_file)
        if img is None:
            print('img_file is None', img_file)
        assert img is not None

        return img, res.copy(), img_info, np.array([id_])
--- a/yolox/evaluators/coco_evaluator.py
+++ b/yolox/evaluators/coco_evaluator.py
@@ -192,7 +192,7 @@ class COCOEvaluator:

        info = time_info + "\n"

        # Evaluate the Dt (detection) json comparing with the ground truth
        # Evaluate the Dt (detection)  jsoncomparing with the ground truth
        if len(data_dict) > 0:
            cocoGt = self.dataloader.dataset.coco
            # TODO: since pycocotools can't process dict in py36, write data to json file.
@@ -210,8 +210,12 @@ class COCOEvaluator:
                from pycocotools import cocoeval as COCOeval
                logger.warning("Use standard COCOeval.")
            '''
            #from pycocotools.cocoeval import COCOeval
            from yolox.layers import COCOeval_opt as COCOeval
            # TODO: commenting this and trying to use pycocotools
            from pycocotools.cocoeval import COCOeval
            # from yolox.layers import COCOeval_opt as COCOeval



            cocoEval = COCOeval(cocoGt, cocoDt, annType[1])
            cocoEval.evaluate()
            cocoEval.accumulate()
--- a/yolox/evaluators/mot_evaluator.py
+++ b/yolox/evaluators/mot_evaluator.py
@@ -34,7 +34,8 @@ def write_results(filename, results):
                if track_id < 0:
                    continue
                x1, y1, w, h = tlwh
                line = save_format.format(frame=frame_id, id=track_id, x1=round(x1, 1), y1=round(y1, 1), w=round(w, 1), h=round(h, 1), s=round(score, 2))
                line = save_format.format(frame=frame_id, id=track_id, x1=round(x1, 1), y1=round(y1, 1), w=round(w, 1),
                                          h=round(h, 1), s=round(score, 2))
                f.write(line)
    logger.info('save results to {}'.format(filename))

@@ -47,7 +48,8 @@ def write_results_no_score(filename, results):
                if track_id < 0:
                    continue
                x1, y1, w, h = tlwh
                line = save_format.format(frame=frame_id, id=track_id, x1=round(x1, 1), y1=round(y1, 1), w=round(w, 1), h=round(h, 1))
                line = save_format.format(frame=frame_id, id=track_id, x1=round(x1, 1), y1=round(y1, 1), w=round(w, 1),
                                          h=round(h, 1))
                f.write(line)
    logger.info('save results to {}'.format(filename))

@@ -59,7 +61,7 @@ class MOTEvaluator:
    """

    def __init__(
        self, args, dataloader, img_size, confthre, nmsthre, num_classes):
            self, args, dataloader, img_size, confthre, nmsthre, num_classes):
        """
        Args:
            dataloader (Dataloader): evaluate dataloader.
@@ -77,14 +79,15 @@ class MOTEvaluator:
        self.args = args

    def evaluate(
        self,
        model,
        distributed=False,
        half=False,
        trt_file=None,
        decoder=None,
        test_size=None,
        result_folder=None
            self,
            model,
            distributed=False,
            half=False,
            trt_file=None,
            decoder=None,
            test_size=None,
            result_folder=None,
            adaptation_period=None,
    ):
        """
        COCO average precision (AP) Evaluation. Iterate inference on the test dataset
@@ -100,11 +103,17 @@ class MOTEvaluator:
            ap50 (float) : COCO AP of IoU=50
            summary (sr): summary info of evaluation.
        """

        if adaptation_period is not None:
            logger.info('cloning model...')
            learner = model.clone()
        else:
            learner = model
        # TODO half to amp_test
        tensor_type = torch.cuda.HalfTensor if half else torch.cuda.FloatTensor
        model = model.eval()
        learner = learner.eval()
        if half:
            model = model.half()
            learner = learner.half()
        ids = []
        data_list = []
        results = []
@@ -117,74 +126,90 @@ class MOTEvaluator:

        if trt_file is not None:
            from torch2trt import TRTModule
            logger.info('Loading trt file')

            model_trt = TRTModule()
            model_trt.load_state_dict(torch.load(trt_file))

            x = torch.ones(1, 3, test_size[0], test_size[1]).cuda()
            model(x)
            model = model_trt
            
            learner(x)
            learner = model_trt

        tracker = BYTETracker(self.args)
        ori_thresh = self.args.track_thresh
        for cur_iter, (imgs, _, info_imgs, ids) in enumerate(
            progress_bar(self.dataloader)
        ):
            with torch.no_grad():
                # init tracker
                frame_id = info_imgs[2].item()
                video_id = info_imgs[3].item()
                img_file_name = info_imgs[4]
                video_name = img_file_name[0].split('/')[0]
                if video_name == 'MOT17-05-FRCNN' or video_name == 'MOT17-06-FRCNN':
                    self.args.track_buffer = 14
                elif video_name == 'MOT17-13-FRCNN' or video_name == 'MOT17-14-FRCNN':
                    self.args.track_buffer = 25
                else:
                    self.args.track_buffer = 30

                if video_name == 'MOT17-01-FRCNN':
                    self.args.track_thresh = 0.65
                elif video_name == 'MOT17-06-FRCNN':
                    self.args.track_thresh = 0.65
                elif video_name == 'MOT17-12-FRCNN':
                    self.args.track_thresh = 0.7
                elif video_name == 'MOT17-14-FRCNN':
                    self.args.track_thresh = 0.67
                else:
                    self.args.track_thresh = ori_thresh
                
                if video_name == 'MOT20-06' or video_name == 'MOT20-08':
                    self.args.track_thresh = 0.3
                else:
                    self.args.track_thresh = ori_thresh

                if video_name not in video_names:
                    video_names[video_id] = video_name
                if frame_id == 1:
                    tracker = BYTETracker(self.args)
                    if len(results) != 0:
                        result_filename = os.path.join(result_folder, '{}.txt'.format(video_names[video_id - 1]))
                        write_results(result_filename, results)
                        results = []

                imgs = imgs.type(tensor_type)
        for cur_iter, (imgs, targets, info_imgs, ids) in enumerate(
                progress_bar(self.dataloader)
        ):
            if cur_iter % 100 == 0:
                logger.info('cur_iter: {}'.format(cur_iter))
            # with torch.no_grad():
            # init tracker
            # imgs = imgs.to(self.data_type)
            # targets = targets.to(self.data_type)

            frame_id = info_imgs[2].item()
            video_id = info_imgs[3].item()
            img_file_name = info_imgs[4]
            video_name = img_file_name[0].split('/')[0]
            if video_name == 'MOT17-05-FRCNN' or video_name == 'MOT17-06-FRCNN':
                self.args.track_buffer = 14
            elif video_name == 'MOT17-13-FRCNN' or video_name == 'MOT17-14-FRCNN':
                self.args.track_buffer = 25
            else:
                self.args.track_buffer = 30

            if video_name == 'MOT17-01-FRCNN':
                self.args.track_thresh = 0.65
            elif video_name == 'MOT17-06-FRCNN':
                self.args.track_thresh = 0.65
            elif video_name == 'MOT17-12-FRCNN':
                self.args.track_thresh = 0.7
            elif video_name == 'MOT17-14-FRCNN':
                self.args.track_thresh = 0.67
            else:
                self.args.track_thresh = ori_thresh

            if video_name == 'MOT20-06' or video_name == 'MOT20-08':
                self.args.track_thresh = 0.3
            else:
                self.args.track_thresh = ori_thresh

            if video_name not in video_names:
                video_names[video_id] = video_name
            if frame_id == 1:
                tracker = BYTETracker(self.args)
                if len(results) != 0:
                    result_filename = os.path.join(result_folder, '{}.txt'.format(video_names[video_id - 1]))
                    write_results(result_filename, results)
                    results = []

            imgs = imgs.type(tensor_type)

            # skip the the last iters since batchsize might be not enough for batch inference
            is_time_record = cur_iter < len(self.dataloader) - 1
            if is_time_record:
                start = time.time()
            if adaptation_period is not None and cur_iter % adaptation_period == 0:
                learner.train()
                targets = targets.type(tensor_type)
                targets.requires_grad = False
                outputs = learner(imgs, targets)
                loss = outputs["total_loss"]
                learner.adapt(loss)
                learner.eval()

                # skip the the last iters since batchsize might be not enough for batch inference
                is_time_record = cur_iter < len(self.dataloader) - 1
                if is_time_record:
                    start = time.time()
            with torch.no_grad():
                outputs = learner(imgs)

                outputs = model(imgs)
            if decoder is not None:
                outputs = decoder(outputs, dtype=outputs.type())
            # print('outputs', outputs.shape)
            outputs = postprocess(outputs, self.num_classes, self.confthre, self.nmsthre)

                if decoder is not None:
                    outputs = decoder(outputs, dtype=outputs.type())
                print('outputs', outputs.shape)
                outputs = postprocess(outputs, self.num_classes, self.confthre, self.nmsthre)
            
                if is_time_record:
                    infer_end = time_synchronized()
                    inference_time += infer_end - start
            if is_time_record:
                infer_end = time_synchronized()
                inference_time += infer_end - start

            output_results = self.convert_to_coco_format(outputs, info_imgs, ids)
            data_list.extend(output_results)
@@ -209,7 +234,7 @@ class MOTEvaluator:
            if is_time_record:
                track_end = time_synchronized()
                track_time += track_end - infer_end
            

            if cur_iter == len(self.dataloader) - 1:
                result_filename = os.path.join(result_folder, '{}.txt'.format(video_names[video_id]))
                write_results(result_filename, results)
@@ -225,14 +250,14 @@ class MOTEvaluator:
        return eval_results

    def evaluate_sort(
        self,
        model,
        distributed=False,
        half=False,
        trt_file=None,
        decoder=None,
        test_size=None,
        result_folder=None
            self,
            model,
            distributed=False,
            half=False,
            trt_file=None,
            decoder=None,
            test_size=None,
            result_folder=None
    ):
        """
        COCO average precision (AP) Evaluation. Iterate inference on the test dataset
@@ -272,12 +297,14 @@ class MOTEvaluator:
            x = torch.ones(1, 3, test_size[0], test_size[1]).cuda()
            model(x)
            model = model_trt
            

        tracker = Sort(self.args.track_thresh)
        

        for cur_iter, (imgs, _, info_imgs, ids) in enumerate(
            progress_bar(self.dataloader)
                progress_bar(self.dataloader)
        ):
            if cur_iter % 250 == 0:
                logger.info('cur_iter: {}'.format(cur_iter))
            with torch.no_grad():
                # init tracker
                frame_id = info_imgs[2].item()
@@ -306,7 +333,7 @@ class MOTEvaluator:
                    outputs = decoder(outputs, dtype=outputs.type())

                outputs = postprocess(outputs, self.num_classes, self.confthre, self.nmsthre)
            

                if is_time_record:
                    infer_end = time_synchronized()
                    inference_time += infer_end - start
@@ -331,7 +358,7 @@ class MOTEvaluator:
            if is_time_record:
                track_end = time_synchronized()
                track_time += track_end - infer_end
            

            if cur_iter == len(self.dataloader) - 1:
                result_filename = os.path.join(result_folder, '{}.txt'.format(video_names[video_id]))
                write_results_no_score(result_filename, results)
@@ -347,15 +374,15 @@ class MOTEvaluator:
        return eval_results

    def evaluate_deepsort(
        self,
        model,
        distributed=False,
        half=False,
        trt_file=None,
        decoder=None,
        test_size=None,
        result_folder=None,
        model_folder=None
            self,
            model,
            distributed=False,
            half=False,
            trt_file=None,
            decoder=None,
            test_size=None,
            result_folder=None,
            model_folder=None
    ):
        """
        COCO average precision (AP) Evaluation. Iterate inference on the test dataset
@@ -395,11 +422,11 @@ class MOTEvaluator:
            x = torch.ones(1, 3, test_size[0], test_size[1]).cuda()
            model(x)
            model = model_trt
            

        tracker = DeepSort(model_folder, min_confidence=self.args.track_thresh)
        

        for cur_iter, (imgs, _, info_imgs, ids) in enumerate(
            progress_bar(self.dataloader)
                progress_bar(self.dataloader)
        ):
            with torch.no_grad():
                # init tracker
@@ -429,7 +456,7 @@ class MOTEvaluator:
                    outputs = decoder(outputs, dtype=outputs.type())

                outputs = postprocess(outputs, self.num_classes, self.confthre, self.nmsthre)
            

                if is_time_record:
                    infer_end = time_synchronized()
                    inference_time += infer_end - start
@@ -454,7 +481,7 @@ class MOTEvaluator:
            if is_time_record:
                track_end = time_synchronized()
                track_time += track_end - infer_end
            

            if cur_iter == len(self.dataloader) - 1:
                result_filename = os.path.join(result_folder, '{}.txt'.format(video_names[video_id]))
                write_results_no_score(result_filename, results)
@@ -470,15 +497,15 @@ class MOTEvaluator:
        return eval_results

    def evaluate_motdt(
        self,
        model,
        distributed=False,
        half=False,
        trt_file=None,
        decoder=None,
        test_size=None,
        result_folder=None,
        model_folder=None
            self,
            model,
            distributed=False,
            half=False,
            trt_file=None,
            decoder=None,
            test_size=None,
            result_folder=None,
            model_folder=None
    ):
        """
        COCO average precision (AP) Evaluation. Iterate inference on the test dataset
@@ -518,10 +545,10 @@ class MOTEvaluator:
            x = torch.ones(1, 3, test_size[0], test_size[1]).cuda()
            model(x)
            model = model_trt
            

        tracker = OnlineTracker(model_folder, min_cls_score=self.args.track_thresh)
        for cur_iter, (imgs, _, info_imgs, ids) in enumerate(
            progress_bar(self.dataloader)
                progress_bar(self.dataloader)
        ):
            with torch.no_grad():
                # init tracker
@@ -551,7 +578,7 @@ class MOTEvaluator:
                    outputs = decoder(outputs, dtype=outputs.type())

                outputs = postprocess(outputs, self.num_classes, self.confthre, self.nmsthre)
            

                if is_time_record:
                    infer_end = time_synchronized()
                    inference_time += infer_end - start
@@ -578,7 +605,7 @@ class MOTEvaluator:
            if is_time_record:
                track_end = time_synchronized()
                track_time += track_end - infer_end
            

            if cur_iter == len(self.dataloader) - 1:
                result_filename = os.path.join(result_folder, '{}.txt'.format(video_names[video_id]))
                write_results(result_filename, results)
@@ -596,7 +623,7 @@ class MOTEvaluator:
    def convert_to_coco_format(self, outputs, info_imgs, ids):
        data_list = []
        for (output, img_h, img_w, img_id) in zip(
            outputs, info_imgs[0], info_imgs[1], ids
                outputs, info_imgs[0], info_imgs[1], ids
        ):
            if output is None:
                continue
@@ -644,9 +671,9 @@ class MOTEvaluator:
            [
                "Average {} time: {:.2f} ms".format(k, v)
                for k, v in zip(
                    ["forward", "track", "inference"],
                    [a_infer_time, a_track_time, (a_infer_time + a_track_time)],
                )
                ["forward", "track", "inference"],
                [a_infer_time, a_track_time, (a_infer_time + a_track_time)],
            )
            ]
        )

@@ -666,8 +693,9 @@ class MOTEvaluator:
                from pycocotools import cocoeval as COCOeval
                logger.warning("Use standard COCOeval.")
            '''
            #from pycocotools.cocoeval import COCOeval
            from yolox.layers import COCOeval_opt as COCOeval
            # I changed it
            from pycocotools.cocoeval import COCOeval
            # from yolox.layers import COCOeval_opt as COCOeval
            cocoEval = COCOeval(cocoGt, cocoDt, annType[1])
            cocoEval.evaluate()
            cocoEval.accumulate()
--- a/yolox/exp/base_meta_exp.py
+++ b/yolox/exp/base_meta_exp.py
@@ -24,7 +24,7 @@ class BaseMetaExp(metaclass=ABCMeta):
        self.seed = None
        # self.output_dir = "./YOLOX_outputs"
        self.output_dir = "./meta_experiments"
        self.print_interval = 100
        self.print_interval = 250
        self.eval_interval = 10

    @abstractmethod
--- a/yolox/exp/meta_yolox_base.py
+++ b/yolox/exp/meta_yolox_base.py
@@ -62,7 +62,7 @@ class MetaExp(BaseMetaExp):

        # -----------------  Meta-learning ------------------ #
        self.first_order = True
        self.inner_lr = 1e-5
        self.inner_lr = 1e-6

    def get_model(self):
        from yolox.models import YOLOPAFPN, YOLOX, YOLOXHead
@@ -241,19 +241,31 @@ class MetaExp(BaseMetaExp):
            val_loaders.append(val_loader)
        return val_loaders

    def get_evaluator(self, batch_size, is_distributed, testdev=False):
    def get_evaluators(self, batch_size, is_distributed, testdev=False):
        from yolox.evaluators import COCOEvaluator

        val_loader = self.get_eval_loader(batch_size, is_distributed, testdev=testdev)
        evaluator = COCOEvaluator(
            dataloader=val_loader,
            img_size=self.test_size,
            confthre=self.test_conf,
            nmsthre=self.nmsthre,
            num_classes=self.num_classes,
            testdev=testdev,
        )
        return evaluator

    def eval(self, model, evaluator, is_distributed, half=False):
        return evaluator.evaluate(model, is_distributed, half)
        val_loaders = self.get_eval_loaders(batch_size, is_distributed, testdev=testdev)
        evaluators = []
        for val_loader in val_loaders:
            evaluator = COCOEvaluator(
                dataloader=val_loader,
                img_size=self.test_size,
                confthre=self.test_conf,
                nmsthre=self.nmsthre,
                num_classes=self.num_classes,
                testdev=testdev,
            )
            evaluators.append(evaluator)
        return evaluators

    def eval(self, model, evaluators, is_distributed, half=False):
        ap50_95s = 0.0
        ap50s = 0.0
        summarys = ''
        for evaluator in evaluators:
            ap50_95, ap50, summary = evaluator.evaluate(model, is_distributed, half)
            ap50_95s += ap50_95
            ap50s += ap50
            summarys += ("\n" + summary)
        n = len(evaluators)
        return (ap50_95s / n), (ap50s / n), summarys
--- a/yolox/models/yolo_head.py
+++ b/yolox/models/yolo_head.py
@@ -197,6 +197,9 @@ class YOLOXHead(nn.Module):
        if self.training:
            # logger.info("labels.shape:{}".format(labels.shape))
            # logger.info("torch.cat(outputs, 1).shape:{}".format(torch.cat(outputs, 1).shape))
            # if torch.isnan(torch.cat(outputs, 1)).sum().item():
            #     logger.info('There is Nan value in outputs {}'.format(torch.isnan(torch.cat(outputs, 1)).sum().item()))

            return self.get_losses(
                imgs,
                x_shifts,
@@ -397,22 +400,45 @@ class YOLOXHead(nn.Module):
        if self.use_l1:
            l1_targets = torch.cat(l1_targets, 0)


        # TODO: check loss parts shapes

        num_fg = max(num_fg, 1)
        # if bbox_preds.view(-1, 4)[fg_masks].shape != reg_targets.shape:
        #     logger.info("some shape mismatch")
        #     logger.info("bbox_preds.view(-1, 4)[fg_masks].shape {}".format(bbox_preds.view(-1, 4)[fg_masks].shape))
        #     logger.info("reg_targets {}".format(reg_targets.shape))
        #     logger.info("--------------------")
        loss_iou = (
                       self.iou_loss(bbox_preds.view(-1, 4)[fg_masks], reg_targets)
                   ).sum() / num_fg
        # if obj_preds.view(-1, 1).shape != obj_targets.shape:
        #     logger.info("some shape mismatch")
        #     logger.info("obj_preds.view(-1, 1).shape {}".format(obj_preds.view(-1, 1).shape))
        #     logger.info("obj_targets.shape {}".format(obj_targets.shape))
        #     logger.info("--------------------")
        loss_obj = (
                       self.bcewithlog_loss(obj_preds.view(-1, 1), obj_targets)
                   ).sum() / num_fg
        # if cls_preds.view(-1, self.num_classes)[fg_masks].shape != cls_targets.shape:
        #     logger.info("some shape mismatch")
        #     logger.info("cls_preds.view(-1, self.num_classes)[fg_masks].shape {}".format(
        #         cls_preds.view(-1, self.num_classes)[fg_masks].shape))
        #     logger.info("cls_targets.shape {}".format(cls_targets.shape))
        #     logger.info("--------------------")

        loss_cls = (
                       self.bcewithlog_loss(
                           cls_preds.view(-1, self.num_classes)[fg_masks], cls_targets
                       )
                   ).sum() / num_fg
        if self.use_l1:
            # if origin_preds.view(-1, 4)[fg_masks].shape != l1_targets.shape:
                # logger.info("some shape mismatch")
                # logger.info("origin_preds.view(-1, 4)[fg_masks].shape {}".format(
                #     origin_preds.view(-1, 4)[fg_masks].shape))
                # logger.info("l1_targets.shape {}".format(l1_targets.shape))
                # logger.info("--------------------")

            loss_l1 = (
                          self.l1_loss(origin_preds.view(-1, 4)[fg_masks], l1_targets)
                      ).sum() / num_fg
@@ -457,7 +483,7 @@ class YOLOXHead(nn.Module):
            imgs,
            mode="gpu",
    ):

        # TODO: check loss mismatches here
        if mode == "cpu":
            print("------------CPU Mode for This Batch-------------")
            gt_bboxes_per_image = gt_bboxes_per_image.cpu().float()
@@ -477,6 +503,11 @@ class YOLOXHead(nn.Module):
            num_gt,
            img_size
        )
        # if torch.isnan(cls_preds).sum().item() or torch.isnan(obj_preds).sum().item() or torch.isnan(
        #         bboxes_preds_per_image).sum().item():
        #     logger.info("cls_preds is Nan {}".format(torch.isnan(cls_preds).sum().item()))
        #     logger.info("obj_preds is Nan {}".format(torch.isnan(obj_preds).sum().item()))
        #     logger.info("bboxes_preds_per_image is Nan {}".format(torch.isnan(bboxes_preds_per_image).sum().item()))

        bboxes_preds_per_image = bboxes_preds_per_image[fg_mask]
        cls_preds_ = cls_preds[batch_idx][fg_mask]
@@ -495,8 +526,10 @@ class YOLOXHead(nn.Module):
                .unsqueeze(1)
                .repeat(1, num_in_boxes_anchor, 1)
        )
        pair_wise_ious_loss = -torch.log(pair_wise_ious + 1e-8)

        pair_wise_ious_loss = -torch.log(pair_wise_ious + 1e-8)
        # if torch.isnan(pair_wise_ious_loss).sum().item():
        #     logger.info("pair_wise_ious_loss is Nan {}".format(torch.isnan(pair_wise_ious_loss).sum().item()))
        if mode == "cpu":
            cls_preds_, obj_preds_ = cls_preds_.cpu(), obj_preds_.cpu()

@@ -505,6 +538,7 @@ class YOLOXHead(nn.Module):
                    cls_preds_.float().unsqueeze(0).repeat(num_gt, 1, 1).sigmoid_()
                    * obj_preds_.float().unsqueeze(0).repeat(num_gt, 1, 1).sigmoid_()
            )

            pair_wise_cls_loss = F.binary_cross_entropy(
                cls_preds_.sqrt_(), gt_cls_per_image, reduction="none"
            ).sum(-1)