Meta Byte Track
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

data_augment.py 12KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381
  1. #!/usr/bin/env python3
  2. # -*- coding:utf-8 -*-
  3. # Copyright (c) Megvii, Inc. and its affiliates.
  4. """
  5. Data augmentation functionality. Passed as callable transformations to
  6. Dataset classes.
  7. The data augmentation procedures were interpreted from @weiliu89's SSD paper
  8. http://arxiv.org/abs/1512.02325
  9. """
  10. import cv2
  11. import numpy as np
  12. import torch
  13. from yolox.utils import xyxy2cxcywh
  14. import math
  15. import random
  16. def augment_hsv(img, hgain=0.015, sgain=0.7, vgain=0.4):
  17. r = np.random.uniform(-1, 1, 3) * [hgain, sgain, vgain] + 1 # random gains
  18. hue, sat, val = cv2.split(cv2.cvtColor(img, cv2.COLOR_BGR2HSV))
  19. dtype = img.dtype # uint8
  20. x = np.arange(0, 256, dtype=np.int16)
  21. lut_hue = ((x * r[0]) % 180).astype(dtype)
  22. lut_sat = np.clip(x * r[1], 0, 255).astype(dtype)
  23. lut_val = np.clip(x * r[2], 0, 255).astype(dtype)
  24. img_hsv = cv2.merge(
  25. (cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val))
  26. ).astype(dtype)
  27. cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img) # no return needed
  28. def box_candidates(box1, box2, wh_thr=2, ar_thr=20, area_thr=0.2):
  29. # box1(4,n), box2(4,n)
  30. # Compute candidate boxes which include follwing 5 things:
  31. # box1 before augment, box2 after augment, wh_thr (pixels), aspect_ratio_thr, area_ratio
  32. w1, h1 = box1[2] - box1[0], box1[3] - box1[1]
  33. w2, h2 = box2[2] - box2[0], box2[3] - box2[1]
  34. ar = np.maximum(w2 / (h2 + 1e-16), h2 / (w2 + 1e-16)) # aspect ratio
  35. return (
  36. (w2 > wh_thr)
  37. & (h2 > wh_thr)
  38. & (w2 * h2 / (w1 * h1 + 1e-16) > area_thr)
  39. & (ar < ar_thr)
  40. ) # candidates
  41. def random_perspective(
  42. img,
  43. targets=(),
  44. degrees=10,
  45. translate=0.1,
  46. scale=0.1,
  47. shear=10,
  48. perspective=0.0,
  49. border=(0, 0),
  50. ):
  51. # targets = [cls, xyxy]
  52. height = img.shape[0] + border[0] * 2 # shape(h,w,c)
  53. width = img.shape[1] + border[1] * 2
  54. # Center
  55. C = np.eye(3)
  56. C[0, 2] = -img.shape[1] / 2 # x translation (pixels)
  57. C[1, 2] = -img.shape[0] / 2 # y translation (pixels)
  58. # Rotation and Scale
  59. R = np.eye(3)
  60. a = random.uniform(-degrees, degrees)
  61. # a += random.choice([-180, -90, 0, 90]) # add 90deg rotations to small rotations
  62. s = random.uniform(scale[0], scale[1])
  63. # s = 2 ** random.uniform(-scale, scale)
  64. R[:2] = cv2.getRotationMatrix2D(angle=a, center=(0, 0), scale=s)
  65. # Shear
  66. S = np.eye(3)
  67. S[0, 1] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # x shear (deg)
  68. S[1, 0] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # y shear (deg)
  69. # Translation
  70. T = np.eye(3)
  71. T[0, 2] = (
  72. random.uniform(0.5 - translate, 0.5 + translate) * width
  73. ) # x translation (pixels)
  74. T[1, 2] = (
  75. random.uniform(0.5 - translate, 0.5 + translate) * height
  76. ) # y translation (pixels)
  77. # Combined rotation matrix
  78. M = T @ S @ R @ C # order of operations (right to left) is IMPORTANT
  79. ###########################
  80. # For Aug out of Mosaic
  81. # s = 1.
  82. # M = np.eye(3)
  83. ###########################
  84. if (border[0] != 0) or (border[1] != 0) or (M != np.eye(3)).any(): # image changed
  85. if perspective:
  86. img = cv2.warpPerspective(
  87. img, M, dsize=(width, height), borderValue=(114, 114, 114)
  88. )
  89. else: # affine
  90. img = cv2.warpAffine(
  91. img, M[:2], dsize=(width, height), borderValue=(114, 114, 114)
  92. )
  93. # Transform label coordinates
  94. n = len(targets)
  95. if n:
  96. # warp points
  97. xy = np.ones((n * 4, 3))
  98. xy[:, :2] = targets[:, [0, 1, 2, 3, 0, 3, 2, 1]].reshape(
  99. n * 4, 2
  100. ) # x1y1, x2y2, x1y2, x2y1
  101. xy = xy @ M.T # transform
  102. if perspective:
  103. xy = (xy[:, :2] / xy[:, 2:3]).reshape(n, 8) # rescale
  104. else: # affine
  105. xy = xy[:, :2].reshape(n, 8)
  106. # create new boxes
  107. x = xy[:, [0, 2, 4, 6]]
  108. y = xy[:, [1, 3, 5, 7]]
  109. xy = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T
  110. # clip boxes
  111. #xy[:, [0, 2]] = xy[:, [0, 2]].clip(0, width)
  112. #xy[:, [1, 3]] = xy[:, [1, 3]].clip(0, height)
  113. # filter candidates
  114. i = box_candidates(box1=targets[:, :4].T * s, box2=xy.T)
  115. targets = targets[i]
  116. targets[:, :4] = xy[i]
  117. targets = targets[targets[:, 0] < width]
  118. targets = targets[targets[:, 2] > 0]
  119. targets = targets[targets[:, 1] < height]
  120. targets = targets[targets[:, 3] > 0]
  121. return img, targets
  122. def _distort(image):
  123. def _convert(image, alpha=1, beta=0):
  124. tmp = image.astype(float) * alpha + beta
  125. tmp[tmp < 0] = 0
  126. tmp[tmp > 255] = 255
  127. image[:] = tmp
  128. image = image.copy()
  129. if random.randrange(2):
  130. _convert(image, beta=random.uniform(-32, 32))
  131. if random.randrange(2):
  132. _convert(image, alpha=random.uniform(0.5, 1.5))
  133. image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
  134. if random.randrange(2):
  135. tmp = image[:, :, 0].astype(int) + random.randint(-18, 18)
  136. tmp %= 180
  137. image[:, :, 0] = tmp
  138. if random.randrange(2):
  139. _convert(image[:, :, 1], alpha=random.uniform(0.5, 1.5))
  140. image = cv2.cvtColor(image, cv2.COLOR_HSV2BGR)
  141. return image
  142. def _mirror(image, boxes):
  143. _, width, _ = image.shape
  144. if random.randrange(2):
  145. image = image[:, ::-1]
  146. boxes = boxes.copy()
  147. boxes[:, 0::2] = width - boxes[:, 2::-2]
  148. return image, boxes
  149. def preproc(image, input_size, mean, std, swap=(2, 0, 1)):
  150. if len(image.shape) == 3:
  151. padded_img = np.ones((input_size[0], input_size[1], 3)) * 114.0
  152. else:
  153. padded_img = np.ones(input_size) * 114.0
  154. img = np.array(image)
  155. r = min(input_size[0] / img.shape[0], input_size[1] / img.shape[1])
  156. resized_img = cv2.resize(
  157. img,
  158. (int(img.shape[1] * r), int(img.shape[0] * r)),
  159. interpolation=cv2.INTER_LINEAR,
  160. ).astype(np.float32)
  161. padded_img[: int(img.shape[0] * r), : int(img.shape[1] * r)] = resized_img
  162. padded_img = padded_img[:, :, ::-1]
  163. padded_img /= 255.0
  164. if mean is not None:
  165. padded_img -= mean
  166. if std is not None:
  167. padded_img /= std
  168. padded_img = padded_img.transpose(swap)
  169. padded_img = np.ascontiguousarray(padded_img, dtype=np.float32)
  170. return padded_img, r
  171. class TrainTransform:
  172. def __init__(self, p=0.5, rgb_means=None, std=None, max_labels=100):
  173. self.means = rgb_means
  174. self.std = std
  175. self.p = p
  176. self.max_labels = max_labels
  177. def __call__(self, image, targets, input_dim):
  178. boxes = targets[:, :4].copy()
  179. labels = targets[:, 4].copy()
  180. ids = targets[:, 5].copy()
  181. if len(boxes) == 0:
  182. targets = np.zeros((self.max_labels, 6), dtype=np.float32)
  183. image, r_o = preproc(image, input_dim, self.means, self.std)
  184. image = np.ascontiguousarray(image, dtype=np.float32)
  185. return image, targets
  186. image_o = image.copy()
  187. targets_o = targets.copy()
  188. height_o, width_o, _ = image_o.shape
  189. boxes_o = targets_o[:, :4]
  190. labels_o = targets_o[:, 4]
  191. ids_o = targets_o[:, 5]
  192. # bbox_o: [xyxy] to [c_x,c_y,w,h]
  193. boxes_o = xyxy2cxcywh(boxes_o)
  194. image_t = _distort(image)
  195. image_t, boxes = _mirror(image_t, boxes)
  196. height, width, _ = image_t.shape
  197. image_t, r_ = preproc(image_t, input_dim, self.means, self.std)
  198. # boxes [xyxy] 2 [cx,cy,w,h]
  199. boxes = xyxy2cxcywh(boxes)
  200. boxes *= r_
  201. mask_b = np.minimum(boxes[:, 2], boxes[:, 3]) > 1
  202. boxes_t = boxes[mask_b]
  203. labels_t = labels[mask_b]
  204. ids_t = ids[mask_b]
  205. if len(boxes_t) == 0:
  206. image_t, r_o = preproc(image_o, input_dim, self.means, self.std)
  207. boxes_o *= r_o
  208. boxes_t = boxes_o
  209. labels_t = labels_o
  210. ids_t = ids_o
  211. labels_t = np.expand_dims(labels_t, 1)
  212. ids_t = np.expand_dims(ids_t, 1)
  213. targets_t = np.hstack((labels_t, boxes_t, ids_t))
  214. padded_labels = np.zeros((self.max_labels, 6))
  215. padded_labels[range(len(targets_t))[: self.max_labels]] = targets_t[
  216. : self.max_labels
  217. ]
  218. padded_labels = np.ascontiguousarray(padded_labels, dtype=np.float32)
  219. image_t = np.ascontiguousarray(image_t, dtype=np.float32)
  220. return image_t, padded_labels
  221. class ValTransform:
  222. """
  223. Defines the transformations that should be applied to test PIL image
  224. for input into the network
  225. dimension -> tensorize -> color adj
  226. Arguments:
  227. resize (int): input dimension to SSD
  228. rgb_means ((int,int,int)): average RGB of the dataset
  229. (104,117,123)
  230. swap ((int,int,int)): final order of channels
  231. Returns:
  232. transform (transform) : callable transform to be applied to test/val
  233. data
  234. """
  235. def __init__(self, rgb_means=None, std=None, swap=(2, 0, 1)):
  236. self.means = rgb_means
  237. self.swap = swap
  238. self.std = std
  239. # assume input is cv2 img for now
  240. def __call__(self, img, res, input_size):
  241. img, _ = preproc(img, input_size, self.means, self.std, self.swap)
  242. return img, np.zeros((1, 5))
  243. class ValTransformWithPseudo:
  244. """
  245. Defines the transformations that should be applied to test PIL image
  246. for input into the network
  247. dimension -> tensorize -> color adj
  248. Arguments:
  249. resize (int): input dimension to SSD
  250. rgb_means ((int,int,int)): average RGB of the dataset
  251. (104,117,123)
  252. swap ((int,int,int)): final order of channels
  253. Returns:
  254. transform (transform) : callable transform to be applied to test/val
  255. data
  256. """
  257. def __init__(self, rgb_means=None, std=None, swap=(2, 0, 1), max_labels=150):
  258. self.means = rgb_means
  259. self.swap = swap
  260. self.std = std
  261. self.max_labels = max_labels
  262. def __call__(self, image, targets, input_dim):
  263. boxes = targets[:, :4].copy()
  264. labels = targets[:, 4].copy()
  265. ids = targets[:, 5].copy()
  266. if len(boxes) == 0:
  267. print('inside if 1')
  268. targets = np.zeros((self.max_labels, 6), dtype=np.float32)
  269. image, r_o = preproc(image, input_dim, self.means, self.std)
  270. image = np.ascontiguousarray(image, dtype=np.float32)
  271. return image, targets
  272. image_o = image.copy()
  273. targets_o = targets.copy()
  274. height_o, width_o, _ = image_o.shape
  275. boxes_o = targets_o[:, :4]
  276. labels_o = targets_o[:, 4]
  277. ids_o = targets_o[:, 5]
  278. # bbox_o: [xyxy] to [c_x,c_y,w,h]
  279. boxes_o = xyxy2cxcywh(boxes_o)
  280. # image_t = _distort(image)
  281. image_t = image
  282. # image_t, boxes = _mirror(image_t, boxes)
  283. height, width, _ = image_t.shape
  284. image_t, r_ = preproc(image_t, input_dim, self.means, self.std)
  285. # boxes [xyxy] 2 [cx,cy,w,h]
  286. boxes = xyxy2cxcywh(boxes)
  287. boxes *= r_
  288. mask_b = np.minimum(boxes[:, 2], boxes[:, 3]) > 1
  289. boxes_t = boxes[mask_b]
  290. labels_t = labels[mask_b]
  291. ids_t = ids[mask_b]
  292. if len(boxes_t) == 0:
  293. print('inside if 2')
  294. image_t, r_o = preproc(image_o, input_dim, self.means, self.std)
  295. boxes_o *= r_o
  296. boxes_t = boxes_o
  297. labels_t = labels_o
  298. ids_t = ids_o
  299. labels_t = np.expand_dims(labels_t, 1)
  300. ids_t = np.expand_dims(ids_t, 1)
  301. targets_t = np.hstack((labels_t, boxes_t, ids_t))
  302. padded_labels = np.zeros((self.max_labels, 6))
  303. padded_labels[range(len(targets_t))[: self.max_labels]] = targets_t[
  304. : self.max_labels
  305. ]
  306. padded_labels = np.ascontiguousarray(padded_labels, dtype=np.float32)
  307. image_t = np.ascontiguousarray(image_t, dtype=np.float32)
  308. return image_t, padded_labels