123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323 |
- # Mahdi Abdollahpour, 22/12/2021, 02:26 PM, PyCharm, ByteTrack
-
- import os
- import time
- from loguru import logger
- # from opts import opts
-
- from os import listdir
- from os.path import isfile, join
- import cv2
- import numpy as np
- import torch
-
- from yolox.data.data_augment import ValTransform
- # from yolox.data.datasets import COCO_CLASSES
- from yolox.exp import get_exp
- from yolox.utils import fuse_model, get_model_info, postprocess, vis
- from yolox import statics
-
- COCO_CLASSES = (
- "person",
- "bicycle",
- "car",
- "motorcycle",
- "airplane",
- "bus",
- "train",
- "truck",
- "boat",
- "traffic light",
- "fire hydrant",
- "stop sign",
- "parking meter",
- "bench",
- "bird",
- "cat",
- "dog",
- "horse",
- "sheep",
- "cow",
- "elephant",
- "bear",
- "zebra",
- "giraffe",
- "backpack",
- "umbrella",
- "handbag",
- "tie",
- "suitcase",
- "frisbee",
- "skis",
- "snowboard",
- "sports ball",
- "kite",
- "baseball bat",
- "baseball glove",
- "skateboard",
- "surfboard",
- "tennis racket",
- "bottle",
- "wine glass",
- "cup",
- "fork",
- "knife",
- "spoon",
- "bowl",
- "banana",
- "apple",
- "sandwich",
- "orange",
- "broccoli",
- "carrot",
- "hot dog",
- "pizza",
- "donut",
- "cake",
- "chair",
- "couch",
- "potted plant",
- "bed",
- "dining table",
- "toilet",
- "tv",
- "laptop",
- "mouse",
- "remote",
- "keyboard",
- "cell phone",
- "microwave",
- "oven",
- "toaster",
- "sink",
- "refrigerator",
- "book",
- "clock",
- "vase",
- "scissors",
- "teddy bear",
- "hair drier",
- "toothbrush",
- )
-
-
- IMAGE_EXT = [".jpg", ".jpeg", ".webp", ".bmp", ".png"]
-
- use_cuda = True
-
- MOT = 'MOT20'
- section = 'train'
-
- root_dir = os.path.join(statics.DATA_PATH, MOT, section)
-
- classes = ['person', 'bicycle', 'car', 'motorcycle', 'truck', 'bus']
- fuse = False
-
-
- def get_labels(bboxes, cls, scores, th, tw):
- id = 0
- labels = []
-
- # print(pred['scores'])
- n, _ = bboxes.shape
- for i in range(n):
-
- if COCO_CLASSES[int(cls[i])] not in classes:
- # print('Rejecting',COCO_CLASSES[int(cls[i])],scores[i])
- continue
- if use_cuda:
- box = bboxes[i, :].detach().cpu().numpy()
- else:
- box = bboxes[i, :].detach().numpy()
- ## TODO: check if matches
- # print(box[0], box[1], box[2], box[3], '--', th, tw)
- # print(box[0] / th, box[1] / tw, box[2] / th, box[3] / tw)
- x = box[0] / th
- y = box[1] / tw
- w = (box[2] - box[0]) / th
- h = (box[3] - box[1]) / tw
- x += w / 2
- y += h / 2
- label = [0, id, x, y, w, h]
- # label = [0, id, box[0], box[1], (box[2] - box[0]), (box[3] - box[1])]
- id += 1
- labels.append(label)
- # print(id)
- labels0 = np.array(labels)
- return labels0
-
-
- class Predictor(object):
- def __init__(
- self,
- model,
- exp,
- cls_names=COCO_CLASSES,
- trt_file=None,
- decoder=None,
- device="cpu",
- fp16=False,
- legacy=False,
- ):
- self.model = model
- self.cls_names = cls_names
- self.decoder = decoder
- self.num_classes = exp.num_classes
- self.confthre = 0.1
- self.nmsthre = 0.3
- self.test_size = exp.test_size
- self.device = device
- self.fp16 = fp16
- self.preproc = ValTransform()
- # if trt_file is not None:
- # from torch2trt import TRTModule
- #
- # model_trt = TRTModule()
- # model_trt.load_state_dict(torch.load(trt_file))
- #
- # x = torch.ones(1, 3, exp.test_size[0], exp.test_size[1]).cuda()
- # self.model(x)
- # self.model = model_trt
-
- def inference(self, img):
- img_info = {"id": 0}
- if isinstance(img, str):
- img_info["file_name"] = os.path.basename(img)
- img = cv2.imread(img)
- else:
- img_info["file_name"] = None
-
- height, width = img.shape[:2]
- img_info["height"] = height
- img_info["width"] = width
- img_info["raw_img"] = img
-
- ratio = min(self.test_size[0] / img.shape[0], self.test_size[1] / img.shape[1])
- # print(self.test_size[0] , img.shape[0], self.test_size[1] , img.shape[1])
- img_info["ratio"] = ratio
-
- img, _ = self.preproc(img, None, self.test_size)
- img = torch.from_numpy(img).unsqueeze(0)
- img = img.float()
- if self.device == "gpu":
- img = img.cuda()
- # if self.fp16:
- # img = img.half() # to FP16
-
- with torch.no_grad():
- t0 = time.time()
- outputs = self.model(img)
- if self.decoder is not None:
- outputs = self.decoder(outputs, dtype=outputs.type())
- outputs = postprocess(
- outputs, self.num_classes, self.confthre,
- self.nmsthre
- )
- # logger.info("Infer time: {:.4f}s".format(time.time() - t0))
- # print(img.shape)
- _, _, tw, th = img.shape
- img_info['tw'] = tw
- img_info['th'] = th
- return outputs, img_info
-
- def visual(self, output, img_info, cls_conf=0.35):
- ratio = img_info["ratio"]
- img = img_info["raw_img"]
- if output is None:
- return img
- output = output.cpu()
-
- bboxes = output[:, 0:4]
-
- # preprocessing: resize
- bboxes /= ratio
-
- cls = output[:, 6]
- scores = output[:, 4] * output[:, 5]
-
- vis_res = vis(img, bboxes, scores, cls, cls_conf, self.cls_names)
- return vis_res
-
-
- def image_demo(predictor, path):
- folders = [f for f in listdir(path)]
- # folders = folders[3:]
-
- for folder in folders:
- print(folder)
- images_folder = join(join(path, folder), 'img1')
- images = [f for f in listdir(images_folder) if isfile(join(images_folder, f))]
- images = [a for a in images if a.endswith('.jpg')]
- images.sort()
-
- for i, image_name in enumerate(images):
- if i % 300 == 0:
- print(folder, i)
- outputs, img_info = predictor.inference(join(images_folder, image_name))
-
- ratio = img_info["ratio"]
- # print(ratio)
- img = img_info["raw_img"]
- output = outputs[0]
- if output is None:
- continue
- output = output.cpu()
-
- bboxes = output[:, 0:4]
-
- # preprocessing: resize
- bboxes /= ratio
-
- cls = output[:, 6]
- scores = output[:, 4] * output[:, 5]
- # print('cls',cls)
- labels0 = get_labels(bboxes, cls, scores, img_info["width"], img_info["height"])
-
- # out_path = join(images_folder, 'weak_' + imm + '.npy')
- # print(imm)
- np.savetxt(join(images_folder, image_name + '_weak_' + model_name + '.txt'), labels0, delimiter=' ')
-
-
- def main(exp, ckpt_file):
- model = exp.get_model()
-
- if use_cuda:
- model = model.cuda()
- device = 'gpu'
- else:
- device = 'cpu'
- model.eval()
-
- logger.info("loading checkpoint")
- ckpt = torch.load(ckpt_file, map_location="cpu")
- # load the model state dict
- model.load_state_dict(ckpt["model"])
- logger.info("loaded checkpoint done.")
-
- if fuse:
- logger.info("\tFusing model...")
- model = fuse_model(model)
-
- trt_file = None
- decoder = None
-
- predictor = Predictor(
- model, exp, COCO_CLASSES, trt_file, decoder,
- device, False, False,
- )
- current_time = time.localtime()
- image_demo(predictor, root_dir)
-
-
- model_name = 'yolox-x'
- # cuda = torch.device('cuda:1')
- if __name__ == "__main__":
- # print(COCO_CLASSES)
- # if use_cuda:
- # torch.cuda.set_device(1)
- # with torch.cuda.device(1):
- # os.environ['CUDA_VISIBLE_DEVICES'] = '1'
- ckpt_file = '/home/abdollahpour.ce.sharif/yolox_x.pth'
-
- exp = get_exp(None, model_name)
- main(exp, ckpt_file)
|