引言
- 最近在做目标检测相关的一些项目,用到了目标检测数据集的增强部分
- 因为涉及到检测框要和图像做相同的操作,自己实现较为麻烦,用到了强大的imgaug库,可以较为轻松地实现框和图像的一起增强
但是有一点不足的是,涉及到图像旋转90|270度时,会自动补黑边→ 后来发现imagaug有专门的函数旋转90度Rot90(),只要设置keep_size=False
,就不会补黑边了 -_-!- 为了不让其补充黑边,想着自己做这两个角度的旋转
代码
import urllib
import cv2
import numpy as np
from matplotlib import pyplot as plt
def rotate_box(bbox, image_height, image_width, angle):
x_min, y_min, x_max, y_max = bbox
loc_re = np.array([float(x_min), float(y_min), float(x_max), float(y_max)]).reshape((2, 2))
w_list = []
if angle == 90:
for point in loc_re:
x_r, y_r = point[1], image_width-point[0]
w_list.append(x_r)
w_list.append(y_r)
elif angle == 180:
for point in loc_re:
x_r, y_r = image_width-point[0], image_height-point[1]
w_list.append(x_r)
w_list.append(y_r)
elif angle == 270:
for point in loc_re:
x_r, y_r = image_height-point[1], point[0]
w_list.append(x_r)
w_list.append(y_r)
else:
raise ValueError(f'The angle {angle} is not supported!')
box_width = abs(w_list[2]-w_list[0])
box_height = abs(w_list[3]-w_list[1])
left_top_x = (w_list[0] + w_list[2]) / 2 - (box_width / 2)
left_top_y = (w_list[1] + w_list[3]) / 2 - (box_height / 2)
right_bottom_x = (w_list[0] + w_list[2]) / 2 + (box_width / 2)
right_bottom_y = (w_list[1] + w_list[3]) / 2 + (box_height / 2)
return [int(left_top_x), int(left_top_y), int(right_bottom_x), int(right_bottom_y)]
def rotate_image(mat, angle):
"""
Rotates an image (angle in degrees) and expands image to avoid cropping
"""
height, width = mat.shape[:2]
image_center = (width/2, height/2)
rotation_mat = cv2.getRotationMatrix2D(image_center, -angle, 1.)
abs_cos = abs(rotation_mat[0,0])
abs_sin = abs(rotation_mat[0,1])
bound_w = int(height * abs_sin + width * abs_cos)
bound_h = int(height * abs_cos + width * abs_sin)
rotation_mat[0, 2] += bound_w/2 - image_center[0]
rotation_mat[1, 2] += bound_h/2 - image_center[1]
rotated_mat = cv2.warpAffine(mat, rotation_mat, (bound_w, bound_h))
return rotated_mat
def visualize_bbox(img, bbox, class_name, color=(255,0,0), thickness=1):
"""Visualizes a single bounding box on the image"""
x_min, y_min, x_max, y_max = bbox[:4]
cv2.rectangle(img, (x_min, y_min), (x_max, y_max), color=color, thickness=thickness)
((text_width, text_height), _) = cv2.getTextSize(class_name, cv2.FONT_HERSHEY_SIMPLEX, 0.35, 1)
cv2.rectangle(img, (x_min, y_min - int(1.3 * text_height)), (x_min + text_width, y_min), BOX_COLOR, -1)
cv2.putText(
img,
text=class_name,
org=(x_min, y_min - int(0.3 * text_height)),
fontFace=cv2.FONT_HERSHEY_SIMPLEX,
fontScale=0.35,
color=TEXT_COLOR,
lineType=cv2.LINE_AA,
)
return img
if __name__ == '__main__':
angle = 90
url = 'https://upload.wikimedia.org/wikipedia/commons/8/8e/Yellow-headed_caracara_%28Milvago_chimachima%29_on_capybara_%28Hydrochoeris_hydrochaeris%29.JPG'
resp = urllib.request.urlopen(url)
im = np.asarray(bytearray(resp.read()), dtype="uint8")
im = cv2.imdecode(im, cv2.IMREAD_COLOR)
box = [50, 150, 250, 200]
plot_im = visualize_bbox(im, box, class_name='1')
rot_im = rotate_image(im, angle)
image_height, image_width = im.shape[:2]
rot_box = rotate_box(bbox, image_height, image_width, angle)
plot_im = visualize_bbox(rot_im , rot_box, class_name='1')
plt.imshow(plot_im)
参考资料
[1] 基于python的目标检测之数据集增强(旋转)