#import torch
import cv2
import numpy as np
import types
from numpy import random
import cvtools
[文档]def intersect(box_a, box_b):
max_xy = np.minimum(box_a[:, 2:], box_b[2:])
min_xy = np.maximum(box_a[:, :2], box_b[:2])
inter = np.clip((max_xy - min_xy), a_min=0, a_max=np.inf)
return inter[:, 0] * inter[:, 1]
# crop用到
[文档]def jaccard_numpy(box_a, box_b):
"""Compute the jaccard overlap of two sets of boxes. The jaccard overlap
is simply the intersection over union of two boxes.
E.g.:
A ∩ B / A ∪ B = A ∩ B / (area(A) + area(B) - A ∩ B)
Args:
box_a: Multiple bounding boxes, Shape: [num_boxes,4]
box_b: Single bounding box, Shape: [4]
Return:
jaccard overlap: Shape: [box_a.shape[0], box_a.shape[1]]
"""
inter = intersect(box_a, box_b)
area_a = ((box_a[:, 2]-box_a[:, 0]) *
(box_a[:, 3]-box_a[:, 1])) # [A,B]
area_b = ((box_b[2]-box_b[0]) *
(box_b[3]-box_b[1])) # [A,B]
union = area_a + area_b - inter
return inter / union # [A,B]
[文档]class Compose(object):
"""Composes several augmentations together.
Args:
transforms (List[Transform]): list of transforms to compose.
Example:
>>> augmentations.Compose([
>>> transforms.CenterCrop(10),
>>> transforms.ToTensor(),
>>> ])
"""
def __init__(self, transforms):
self.transforms = transforms
def __call__(self, img, boxes=None, labels=None):
for t in self.transforms:
img, boxes, labels = t(img, boxes, labels)
return img, boxes, labels
[文档]class Lambda(object):
"""Applies a lambda as a transform."""
def __init__(self, lambd):
assert isinstance(lambd, types.LambdaType)
self.lambd = lambd
def __call__(self, img, boxes=None, labels=None):
return self.lambd(img, boxes, labels)
[文档]class ConvertFromInts(object):
def __call__(self, image, boxes=None, labels=None):
return image.astype(np.float32), boxes, labels
[文档]class SubtractMeans(object):
def __init__(self, mean):
self.mean = np.array(mean, dtype=np.float32)
def __call__(self, image, boxes=None, labels=None):
image = image.astype(np.float32)
image -= self.mean
return image.astype(np.float32), boxes, labels
[文档]class ToAbsoluteCoords(object):
def __call__(self, image, boxes=None, labels=None):
height, width, channels = image.shape
boxes[:, 0] *= width
boxes[:, 2] *= width
boxes[:, 1] *= height
boxes[:, 3] *= height
return image, boxes, labels
[文档]class ToPercentCoords(object):
def __call__(self, image, boxes=None, labels=None):
height, width, channels = image.shape
boxes[:, 0] /= width
boxes[:, 2] /= width
boxes[:, 1] /= height
boxes[:, 3] /= height
return image, boxes, labels
[文档]class Resize(object):
def __init__(self, size=300):
self.size = size
def __call__(self, image, boxes=None, labels=None):
image = cv2.resize(image, (self.size, self.size))
return image, boxes, labels
[文档]class ResizeFilled(object):
def __init__(self, size=300):
self.size = size
def __call__(self, image, boxes=None, labels=None):
h, w, _ = image.shape
dim_diff = np.abs(h - w)
# Upper (left) and lower (right) padding
pad1, pad2 = dim_diff // 2, dim_diff - dim_diff // 2
# Determine padding
pad = ((pad1, pad2), (0, 0), (0, 0)) if h <= w else ((0, 0), (pad1, pad2), (0, 0)) # h, w, c
# Add padding
image = np.pad(image, pad, 'constant', constant_values=0.)
padded_h, padded_w, _ = image.shape
# Resize
image = cv2.resize(image, (self.size, self.size))
# cv2.imwrite("temp.jpg", image+(104, 117, 123))
boxes[:, [0, 2]] = (boxes[:, [0, 2]]*w + pad[1][0]) / float(padded_w)
boxes[:, [1, 3]] = (boxes[:, [1, 3]]*h + pad[0][0]) / float(padded_h)
# for box in boxes:
# x1, y1, x2, y2 = box*300
# x1 = max(0, np.floor(x1 + 0.5).astype('int32'))
# y1 = max(0, np.floor(y1 + 0.5).astype('int32'))
# x2 = min(image.shape[1], np.floor(x2 + 0.5).astype('int32')) # 这里image是ndarray,size是标量,shape才是向量
# y2 = min(image.shape[0], np.floor(y2 + 0.5).astype('int32'))
# cv2.rectangle(image, (x1, y1), (x2, y2), (0, 0, 255), 2)
# import time
# cv2.imwrite("results/"+time.strftime('%Y-%m-%d_%H-%M-%S', time.localtime(time.time()))+".jpg", image)
return image, boxes, labels
[文档]class RandomSaturation(object):
def __init__(self, lower=0.5, upper=1.5):
self.lower = lower
self.upper = upper
assert self.upper >= self.lower, "contrast upper must be >= lower."
assert self.lower >= 0, "contrast lower must be non-negative."
def __call__(self, image, boxes=None, labels=None):
if random.randint(2):
image[:, :, 1] *= random.uniform(self.lower, self.upper)
return image, boxes, labels
[文档]class RandomHue(object):
def __init__(self, delta=18.0):
assert 0.0 <= delta <= 360.0
self.delta = delta
def __call__(self, image, boxes=None, labels=None):
if random.randint(2):
image[:, :, 0] += random.uniform(-self.delta, self.delta)
image[:, :, 0][image[:, :, 0] > 360.0] -= 360.0
image[:, :, 0][image[:, :, 0] < 0.0] += 360.0
return image, boxes, labels
[文档]class RandomLightingNoise(object):
def __init__(self):
self.perms = ((0, 1, 2), (0, 2, 1),
(1, 0, 2), (1, 2, 0),
(2, 0, 1), (2, 1, 0))
def __call__(self, image, boxes=None, labels=None):
if random.randint(2):
swap = self.perms[random.randint(len(self.perms))]
shuffle = SwapChannels(swap) # shuffle channels
image = shuffle(image)
return image, boxes, labels
[文档]class ConvertColor(object):
def __init__(self, current='BGR', transform='HSV'):
self.transform = transform
self.current = current
def __call__(self, image, boxes=None, labels=None):
if self.current == 'BGR' and self.transform == 'HSV':
image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
elif self.current == 'HSV' and self.transform == 'BGR':
image = cv2.cvtColor(image, cv2.COLOR_HSV2BGR)
else:
raise NotImplementedError
return image, boxes, labels
[文档]class RandomContrast(object):
def __init__(self, lower=0.5, upper=1.5):
self.lower = lower
self.upper = upper
assert self.upper >= self.lower, "contrast upper must be >= lower."
assert self.lower >= 0, "contrast lower must be non-negative."
# expects float image
def __call__(self, image, boxes=None, labels=None):
if random.randint(2):
alpha = random.uniform(self.lower, self.upper)
image *= alpha
return image, boxes, labels
# 随机对图像每个像素添加一个值,
# 该添加值是从 [-delat, delta] 中随机选取的. 默认的 delta 值是 32.
[文档]class RandomBrightness(object):
def __init__(self, delta=32):
assert delta >= 0.0
assert delta <= 255.0
self.delta = delta
def __call__(self, image, boxes=None, labels=None):
if random.randint(2):
delta = random.uniform(-self.delta, self.delta)
# image += delta
image = image.astype(np.int)
np.add(image, delta, out=image, casting='unsafe')
image = np.clip(image, 0, 255)
image = image.astype(np.uint8)
return image, boxes, labels
[文档]class ToCV2Image(object):
def __call__(self, tensor, boxes=None, labels=None):
return tensor.cpu().numpy().astype(np.float32).transpose((1, 2, 0)), boxes, labels
[文档]class ToTensor(object):
def __call__(self, cvimage, boxes=None, labels=None):
return torch.from_numpy(cvimage.astype(np.float32)).permute(2, 0, 1), boxes, labels
# resize后,相当于局部放大了图像
# 此函数确保该图像块至少与一个 groundtruth box 有重叠,
# 至少一个 gt box 的中心位于该图像块中.
# 这样可以避免不包含明显的前景目标的图像块用于网络训练
[文档]class RandomSampleCrop(object):
"""Crop
Arguments:
img (Image): the image being input during training
boxes (Tensor): the original bounding boxes in pt form
labels (Tensor): the class labels for each bbox
mode (float tuple): the min and max jaccard overlaps
Return:
(img, boxes, classes)
img (Image): the cropped image
boxes (Tensor): the adjusted bounding boxes in pt form
labels (Tensor): the class labels for each bbox
"""
def __init__(self):
self.sample_options = (
# using entire original input image
None,
# sample a patch s.t. MIN jaccard w/ obj in .1,.3,.4,.7,.9
(0.1, None),
(0.3, None),
(0.7, None),
(0.9, None),
# randomly sample a patch
(None, None),
)
def __call__(self, image, boxes=None, labels=None):
height, width, _ = image.shape
while True:
# randomly choose a mode
mode = random.choice(self.sample_options)
if mode is None:
return image, boxes, labels
min_iou, max_iou = mode
if min_iou is None:
min_iou = float('-inf')
if max_iou is None:
max_iou = float('inf')
# max trails (50)
for _ in range(50):
current_image = image
w = random.uniform(0.3 * width, width)
h = random.uniform(0.3 * height, height)
# aspect ratio constraint b/t .5 & 2
if h / w < 0.5 or h / w > 2:
continue
left = random.uniform(width - w)
top = random.uniform(height - h)
# convert to integer rect x1,y1,x2,y2
rect = np.array([int(left), int(top), int(left+w), int(top+h)])
# calculate IoU (jaccard overlap) b/t the cropped and gt boxes
overlap = jaccard_numpy(boxes, rect)
# is min and max overlap constraint satisfied? if not try again
if overlap.min() < min_iou and max_iou < overlap.max(): # 目前只对min做了限制,0.1, 0.3, 0.7, 0.9
continue
# cut the crop from the image
current_image = current_image[rect[1]:rect[3], rect[0]:rect[2], :]
# keep overlap with gt box IF center in sampled patch
centers = (boxes[:, :2] + boxes[:, 2:]) / 2.0
# mask in all gt boxes that above and to the left of centers
m1 = (rect[0] < centers[:, 0]) * (rect[1] < centers[:, 1])
# mask in all gt boxes that under and to the right of centers
m2 = (rect[2] > centers[:, 0]) * (rect[3] > centers[:, 1])
# mask in that both m1 and m2 are true
mask = m1 * m2
# have any valid boxes? try again if not
if not mask.any():
continue
# take only matching gt boxes
current_boxes = boxes[mask, :].copy()
# take only matching gt labels
current_labels = labels[mask]
# should we use the box left and top corner or the crop's
current_boxes[:, :2] = np.maximum(current_boxes[:, :2],
rect[:2])
# adjust to crop (by substracting crop's left,top)
current_boxes[:, :2] -= rect[:2]
current_boxes[:, 2:] = np.minimum(current_boxes[:, 2:],
rect[2:])
# adjust to crop (by substracting crop's left,top)
current_boxes[:, 2:] -= rect[:2]
return current_image, current_boxes, current_labels
# resize后,相当于整体缩小了图像
[文档]class Expand(object):
def __init__(self, mean):
self.mean = mean
def __call__(self, image, boxes, labels):
if random.randint(2):
return image, boxes, labels
height, width, depth = image.shape
ratio = random.uniform(1, 4)
left = random.uniform(0, width*ratio - width)
top = random.uniform(0, height*ratio - height)
expand_image = np.zeros(
(int(height*ratio), int(width*ratio), depth),
dtype=image.dtype)
expand_image[:, :, :] = self.mean
expand_image[int(top):int(top + height),
int(left):int(left + width)] = image
image = expand_image
boxes = boxes.copy()
boxes[:, :2] += (int(left), int(top))
boxes[:, 2:] += (int(left), int(top))
return image, boxes, labels
[文档]class SwapChannels(object):
"""Transforms a tensorized image by swapping the channels in the order
specified in the swap tuple.
Args:
swaps (int triple): final order of channels
eg: (2, 1, 0)
"""
def __init__(self, swaps):
self.swaps = swaps
def __call__(self, image):
"""
Args:
image (Tensor): image tensor to be transformed
Return:
a tensor with channels swapped according to swap
"""
# if torch.is_tensor(image):
# image = image.data.cpu().numpy()
# else:
# image = np.array(image)
image = image[:, :, self.swaps]
return image
[文档]class PhotometricDistort(object):
def __init__(self):
self.pd = [
RandomContrast(),
ConvertColor(transform='HSV'),
RandomSaturation(),
RandomHue(),
ConvertColor(current='HSV', transform='BGR'),
RandomContrast()
]
self.rand_brightness = RandomBrightness(delta=32)
self.rand_light_noise = RandomLightingNoise()
def __call__(self, image, boxes, labels):
im = image.copy()
im, boxes, labels = self.rand_brightness(im, boxes, labels)
if random.randint(2):
distort = Compose(self.pd[:-1])
else:
distort = Compose(self.pd[1:])
im, boxes, labels = distort(im, boxes, labels)
return im, boxes, labels
# return self.rand_light_noise(im, boxes, labels)
[文档]class SSDAugmentation(object):
def __init__(self, size=300, mean=(104, 117, 123)): # mean值应该通过对自己的数据集聚类得到,但是似乎影响不大,暂时没修改
self.mean = mean
self.size = size
self.augment = Compose([
ConvertFromInts(), # int->np.float32
ToAbsoluteCoords(), # Absolute Coords
PhotometricDistort(), # 光度变形
Expand(self.mean), # 概率图像扩展
RandomSampleCrop(), # 随机裁剪
RandomMirror(), # 随机镜像
ToPercentCoords(), # [0, 1] Relative Coords
Resize(self.size),
SubtractMeans(self.mean)
])
def __call__(self, img, boxes, labels):
return self.augment(img, boxes, labels)
[文档]class RandomMirror(object):
def __init__(self, both=True):
self.both = both
def __call__(self, image, boxes):
# _, width, _ = image.shape
# if random.randint(2):
# image = image[:, ::-1]
# boxes = boxes.copy()
# boxes[:, 0::2] = width - boxes[:, 2::-2]
if not isinstance(boxes, np.ndarray):
boxes = np.array(boxes) # 返回的数据类型一致性
# 如果同时触发水平镜像和竖直镜像就等价于旋转180度
flip_code = random.randint(4)
if not self.both:
flip_code = random.randint(3)
if flip_code == 1:
image, boxes = horizontal_mirror(image, boxes)
elif flip_code == 2:
image, boxes = vertical_mirror(image, boxes)
elif flip_code == 3:
image, boxes = horizontal_mirror(image, boxes)
image, boxes = vertical_mirror(image, boxes)
return image, boxes
[文档]def horizontal_mirror(image, boxes):
"""水平方向(flipping around the y-axis)镜像
Args:
image(np.ndarray): numpy数组
boxes(np.ndarray): numpy数组,nx4 or nx8
"""
_, width, _ = image.shape
image = cv2.flip(image, 1) # 不修改原图,image内存连续
# image = image[:, ::-1] # image不内存连续
boxes = boxes.copy()
box_coor_len = boxes.shape[1]
# TODO: 分离bbox和polygon
if box_coor_len == 4: # 对角线形式,一般而言认为第一个点是左上点
boxes[:, 0::2] = width - boxes[:, 2::-2]
elif box_coor_len == 8: # polygon形式不改变点的相邻关系
boxes[:, 0::2] = width - boxes[:, 0::2]
else:
raise NotImplementedError
return image, boxes
[文档]class RandomHorMirror(object):
"""水平方向(flipping around the y-axis)镜像"""
def __call__(self, image, boxes, r=True):
"""image参数被原位修改,boxes参数不会被修改"""
if not isinstance(boxes, np.ndarray):
boxes = np.array(boxes) # 返回的数据类型一致性
if not r or random.randint(2):
image, boxes = horizontal_mirror(image, boxes)
return image, boxes
[文档]def vertical_mirror(image, boxes):
height, _, _ = image.shape
image = cv2.flip(image, 0) # 不修改原图,image内存连续
# image = image[::-1] # image不内存连续
box_coor_len = boxes.shape[1]
boxes = boxes.copy()
if box_coor_len == 4:
boxes[:, 1::2] = height - boxes[:, 3::-2]
elif box_coor_len == 8:
boxes[:, 1::2] = height - boxes[:, 1::2]
else:
raise NotImplementedError
return image, boxes
[文档]class RandomVerMirror(object):
"""竖直方向(flipping around the x-axis)镜像"""
def __call__(self, image, boxes):
"""image参数被原位修改,boxes参数不会被修改"""
if not isinstance(boxes, np.ndarray):
boxes = np.array(boxes) # 返回的数据类型一致性
if random.randint(2):
image, boxes = vertical_mirror(image, boxes)
return image, boxes
[文档]def rotate_90(img, bboxes=None):
"""顺时针旋转90度
bboxes支持的格式
两点式: x1y1x2y2,
四点式:x1y1x2y2x3y3x4y4
Args:
img(np.ndarray): the format of opencv image
bboxes(list or np.ndarray): supported format
two points: x1y1x2y2,
four points: x1y1x2y2x3y3x4y4
"""
h, w, _ = img.shape
trans_img = cv2.transpose(img)
new_img = cv2.flip(trans_img, 1)
if bboxes is None:
return new_img
else:
# bounding box 的变换: 一个图像的宽高是W,H,
# 如果顺时90度转换,那么原来的原点(0, 0)到了 (H, 0) 这个最右边的顶点了,
# 设图像中任何一个转换前的点(x1, y1), 转换后,x1, y1是表示到 (H, 0)这个点的距离,
# 所以我们只要转换回到(0, 0) 这个点的距离即可!
# 所以+90度转换后的点为 (H-y1, x1), -90度转换后的点为(y1, W-x1)
if not isinstance(bboxes, np.ndarray):
bboxes = np.array(bboxes)
else:
bboxes = bboxes.copy()
box_coor_len = bboxes.shape[1]
x_axis = range(0, box_coor_len, 2)
y_axis = range(1, box_coor_len, 2)
ori_axis = range(box_coor_len)
new_axis = cvtools.concat_list(zip(y_axis, x_axis))
bboxes[:, ori_axis] = bboxes[:, new_axis]
bboxes[:, x_axis] = h - bboxes[:, x_axis]
return new_img, bboxes
[文档]def rotate_270(img, bboxes=None):
"""逆时针旋转90度
bboxes支持的格式
两点式: x1y1x2y2,
四点式:x1y1x2y2x3y3x4y4
Args:
img(np.ndarray): the format of opencv image
bboxes(list or np.ndarray): supported format
two points: x1y1x2y2,
four points: x1y1x2y2x3y3x4y4
"""
h, w, _ = img.shape
trans_img = cv2.transpose(img)
new_img = cv2.flip(trans_img, 0)
if bboxes is None:
return new_img
else:
# bounding box 的变换: 一个图像的宽高是W,H,
# 如果顺时90度转换,那么原来的原点(0, 0)到了 (H, 0) 这个最右边的顶点了,
# 设图像中任何一个转换前的点(x1, y1), 转换后,x1, y1是表示到 (H, 0)这个点的距离,
# 所以我们只要转换回到(0, 0) 这个点的距离即可!
# 所以+90度转换后的点为 (H-y1, x1), -90度转换后的点为(y1, W-x1)
if not isinstance(bboxes, np.ndarray):
bboxes = np.array(bboxes)
else:
bboxes = bboxes.copy()
box_coor_len = bboxes.shape[1]
x_axis = range(0, box_coor_len, 2)
y_axis = range(1, box_coor_len, 2)
ori_axis = range(box_coor_len)
new_axis = cvtools.concat_list(zip(y_axis, x_axis))
bboxes[:, ori_axis] = bboxes[:, new_axis]
bboxes[:, y_axis] = w - bboxes[:, y_axis]
return new_img, bboxes
[文档]def rotate_180(img, bboxes=None):
"""顺时针旋转180度
bboxes支持的格式
两点式: x1y1x2y2,
四点式:x1y1x2y2x3y3x4y4
Args:
img(np.ndarray): the format of opencv image
bboxes(list or np.ndarray): supported format
two points: x1y1x2y2,
four points: x1y1x2y2x3y3x4y4
"""
h, w, _ = img.shape
new_img = cv2.flip(img, -1)
if bboxes is None:
return new_img
else:
# bounding box 的变换: 一个图像的宽高是W,H,
# 如果顺时90度转换,那么原来的原点(0, 0)到了 (H, 0) 这个最右边的顶点了,
# 设图像中任何一个转换前的点(x1, y1), 转换后,x1, y1是表示到 (H, 0)这个点的距离,
# 所以我们只要转换回到(0, 0) 这个点的距离即可!
# 所以+90度转换后的点为 (H-y1, x1), -90度转换后的点为(y1, W-x1)
if not isinstance(bboxes, np.ndarray):
bboxes = np.array(bboxes)
else:
bboxes = bboxes.copy()
box_coor_len = bboxes.shape[1]
x_axis = list(range(0, box_coor_len, 2))
y_axis = list(range(1, box_coor_len, 2))
bboxes[:, y_axis] = h - bboxes[:, y_axis]
bboxes[:, x_axis] = w - bboxes[:, x_axis]
return new_img, bboxes
[文档]class RandomRotate(object):
"""随机旋转0度、90度、180度、270度
"""
def __call__(self, image, boxes, rotate=None):
if not isinstance(boxes, np.ndarray):
boxes = np.array(boxes)
if not rotate:
rotate = random.choice([None, 'rotate_90', 'rotate_180', 'rotate_270'])
if rotate:
image, boxes = eval(rotate)(image, boxes)
return image, boxes