abdollahpour
/
MetaMOT


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323
							# Mahdi Abdollahpour, 22/12/2021, 02:26 PM, PyCharm, ByteTrack

import os
import time
from loguru import logger
# from opts import opts

from os import listdir
from os.path import isfile, join
import cv2
import numpy as np
import torch

from yolox.data.data_augment import ValTransform
# from yolox.data.datasets import COCO_CLASSES
from yolox.exp import get_exp
from yolox.utils import fuse_model, get_model_info, postprocess, vis
from yolox import statics

COCO_CLASSES = (
    "person",
    "bicycle",
    "car",
    "motorcycle",
    "airplane",
    "bus",
    "train",
    "truck",
    "boat",
    "traffic light",
    "fire hydrant",
    "stop sign",
    "parking meter",
    "bench",
    "bird",
    "cat",
    "dog",
    "horse",
    "sheep",
    "cow",
    "elephant",
    "bear",
    "zebra",
    "giraffe",
    "backpack",
    "umbrella",
    "handbag",
    "tie",
    "suitcase",
    "frisbee",
    "skis",
    "snowboard",
    "sports ball",
    "kite",
    "baseball bat",
    "baseball glove",
    "skateboard",
    "surfboard",
    "tennis racket",
    "bottle",
    "wine glass",
    "cup",
    "fork",
    "knife",
    "spoon",
    "bowl",
    "banana",
    "apple",
    "sandwich",
    "orange",
    "broccoli",
    "carrot",
    "hot dog",
    "pizza",
    "donut",
    "cake",
    "chair",
    "couch",
    "potted plant",
    "bed",
    "dining table",
    "toilet",
    "tv",
    "laptop",
    "mouse",
    "remote",
    "keyboard",
    "cell phone",
    "microwave",
    "oven",
    "toaster",
    "sink",
    "refrigerator",
    "book",
    "clock",
    "vase",
    "scissors",
    "teddy bear",
    "hair drier",
    "toothbrush",
)


IMAGE_EXT = [".jpg", ".jpeg", ".webp", ".bmp", ".png"]

use_cuda = True

MOT = 'MOT20'
section = 'train'

root_dir = os.path.join(statics.DATA_PATH, MOT, section)

classes = ['person', 'bicycle', 'car', 'motorcycle', 'truck', 'bus']
fuse = False


def get_labels(bboxes, cls, scores, th, tw):
    id = 0
    labels = []

    # print(pred['scores'])
    n, _ = bboxes.shape
    for i in range(n):

        if COCO_CLASSES[int(cls[i])] not in classes:
            # print('Rejecting',COCO_CLASSES[int(cls[i])],scores[i])
            continue
        if use_cuda:
            box = bboxes[i, :].detach().cpu().numpy()
        else:
            box = bboxes[i, :].detach().numpy()
        ## TODO: check if matches
        # print(box[0], box[1], box[2], box[3], '--', th, tw)
        # print(box[0] / th, box[1] / tw, box[2] / th, box[3] / tw)
        x = box[0] / th
        y = box[1] / tw
        w = (box[2] - box[0]) / th
        h = (box[3] - box[1]) / tw
        x += w / 2
        y += h / 2
        label = [0, id, x, y, w, h]
        # label = [0, id, box[0], box[1], (box[2] - box[0]), (box[3] - box[1])]
        id += 1
        labels.append(label)
    # print(id)
    labels0 = np.array(labels)
    return labels0


class Predictor(object):
    def __init__(
            self,
            model,
            exp,
            cls_names=COCO_CLASSES,
            trt_file=None,
            decoder=None,
            device="cpu",
            fp16=False,
            legacy=False,
    ):
        self.model = model
        self.cls_names = cls_names
        self.decoder = decoder
        self.num_classes = exp.num_classes
        self.confthre = 0.1
        self.nmsthre = 0.3
        self.test_size = exp.test_size
        self.device = device
        self.fp16 = fp16
        self.preproc = ValTransform()
        # if trt_file is not None:
        #     from torch2trt import TRTModule
        #
        #     model_trt = TRTModule()
        #     model_trt.load_state_dict(torch.load(trt_file))
        #
        #     x = torch.ones(1, 3, exp.test_size[0], exp.test_size[1]).cuda()
        #     self.model(x)
        #     self.model = model_trt

    def inference(self, img):
        img_info = {"id": 0}
        if isinstance(img, str):
            img_info["file_name"] = os.path.basename(img)
            img = cv2.imread(img)
        else:
            img_info["file_name"] = None

        height, width = img.shape[:2]
        img_info["height"] = height
        img_info["width"] = width
        img_info["raw_img"] = img

        ratio = min(self.test_size[0] / img.shape[0], self.test_size[1] / img.shape[1])
        # print(self.test_size[0] , img.shape[0], self.test_size[1] , img.shape[1])
        img_info["ratio"] = ratio

        img, _ = self.preproc(img, None, self.test_size)
        img = torch.from_numpy(img).unsqueeze(0)
        img = img.float()
        if self.device == "gpu":
            img = img.cuda()
            # if self.fp16:
            #     img = img.half()  # to FP16

        with torch.no_grad():
            t0 = time.time()
            outputs = self.model(img)
            if self.decoder is not None:
                outputs = self.decoder(outputs, dtype=outputs.type())
            outputs = postprocess(
                outputs, self.num_classes, self.confthre,
                self.nmsthre
            )
            # logger.info("Infer time: {:.4f}s".format(time.time() - t0))
        # print(img.shape)
        _, _, tw, th = img.shape
        img_info['tw'] = tw
        img_info['th'] = th
        return outputs, img_info

    def visual(self, output, img_info, cls_conf=0.35):
        ratio = img_info["ratio"]
        img = img_info["raw_img"]
        if output is None:
            return img
        output = output.cpu()

        bboxes = output[:, 0:4]

        # preprocessing: resize
        bboxes /= ratio

        cls = output[:, 6]
        scores = output[:, 4] * output[:, 5]

        vis_res = vis(img, bboxes, scores, cls, cls_conf, self.cls_names)
        return vis_res


def image_demo(predictor, path):
    folders = [f for f in listdir(path)]
    # folders = folders[3:]

    for folder in folders:
        print(folder)
        images_folder = join(join(path, folder), 'img1')
        images = [f for f in listdir(images_folder) if isfile(join(images_folder, f))]
        images = [a for a in images if a.endswith('.jpg')]
        images.sort()

        for i, image_name in enumerate(images):
            if i % 300 == 0:
                print(folder, i)
            outputs, img_info = predictor.inference(join(images_folder, image_name))

            ratio = img_info["ratio"]
            # print(ratio)
            img = img_info["raw_img"]
            output = outputs[0]
            if output is None:
                continue
            output = output.cpu()

            bboxes = output[:, 0:4]

            # preprocessing: resize
            bboxes /= ratio

            cls = output[:, 6]
            scores = output[:, 4] * output[:, 5]
            # print('cls',cls)
            labels0 = get_labels(bboxes, cls, scores, img_info["width"], img_info["height"])

            # out_path = join(images_folder, 'weak_' + imm + '.npy')
            # print(imm)
            np.savetxt(join(images_folder, image_name + '_weak_' + model_name + '.txt'), labels0, delimiter=' ')


def main(exp, ckpt_file):
    model = exp.get_model()

    if use_cuda:
        model = model.cuda()
        device = 'gpu'
    else:
        device = 'cpu'
    model.eval()

    logger.info("loading checkpoint")
    ckpt = torch.load(ckpt_file, map_location="cpu")
    # load the model state dict
    model.load_state_dict(ckpt["model"])
    logger.info("loaded checkpoint done.")

    if fuse:
        logger.info("\tFusing model...")
        model = fuse_model(model)

    trt_file = None
    decoder = None

    predictor = Predictor(
        model, exp, COCO_CLASSES, trt_file, decoder,
        device, False, False,
    )
    current_time = time.localtime()
    image_demo(predictor, root_dir)


model_name = 'yolox-x'
# cuda = torch.device('cuda:1')
if __name__ == "__main__":
    # print(COCO_CLASSES)
    # if use_cuda:
    # torch.cuda.set_device(1)
    # with torch.cuda.device(1):
    # os.environ['CUDA_VISIBLE_DEVICES'] = '1'
    ckpt_file = '/home/abdollahpour.ce.sharif/yolox_x.pth'

    exp = get_exp(None, model_name)
    main(exp, ckpt_file)