目录
1.水平翻转
(对图片和文本框进行水平翻转)
def horizontal_flip(self, im: np.ndarray, text_polys: np.ndarray) -> tuple:
"""
对图片和文本框进行水平翻转
:param im: 图片
:param text_polys: 文本框
:return: 水平翻转之后的图片和文本框
"""
flip_text_polys = text_polys.copy()
flip_im = cv2.flip(im, 1)
h, w, _ = flip_im.shape
flip_text_polys[:, :, 0] = w - flip_text_polys[:, :, 0]
return flip_im, flip_text_polys
2.垂直翻转
(对图片和文本框进行竖直翻转)
def vertical_flip(self, im: np.ndarray, text_polys: np.ndarray) -> tuple:
"""
对图片和文本框进行竖直翻转
:param im: 图片
:param text_polys: 文本框
:return: 竖直翻转之后的图片和文本框
"""
flip_text_polys = text_polys.copy()
flip_im = cv2.flip(im, 0)
h, w, _ = flip_im.shape
flip_text_polys[:, :, 1] = h - flip_text_polys[:, :, 1]
return flip_im, flip_text_polys
3.随机旋转
(从给定的角度中选择一个角度,对图片和文本框进行旋转)
def random_rotate_img_bbox(self, img, text_polys, degrees: numbers.Number or list or tuple or np.ndarray,
same_size=False):
"""
从给定的角度中选择一个角度,对图片和文本框进行旋转
:param img: 图片
:param text_polys: 文本框
:param degrees: 角度,可以是一个数值或者list
:param same_size: 是否保持和原图一样大
:return: 旋转后的图片和角度
"""
if isinstance(degrees, numbers.Number):
if degrees < 0:
raise ValueError("If degrees is a single number, it must be positive.")
degrees = (-degrees, degrees)
elif isinstance(degrees, list) or isinstance(degrees, tuple) or isinstance(degrees, np.ndarray):
if len(degrees) != 2:
raise ValueError("If degrees is a sequence, it must be of len 2.")
degrees = degrees
else:
raise Exception('degrees must in Number or list or tuple or np.ndarray')
# ---------------------- 旋转图像 ----------------------
w = img.shape[1]
h = img.shape[0]
angle = np.random.uniform(degrees[0], degrees[1])
if same_size:
nw = w
nh = h
else:
# 角度变弧度
rangle = np.deg2rad(angle)
# 计算旋转之后图像的w, h
nw = (abs(np.sin(rangle) * h) + abs(np.cos(rangle) * w))
nh = (abs(np.cos(rangle) * h) + abs(np.sin(rangle) * w))
# 构造仿射矩阵
rot_mat = cv2.getRotationMatrix2D((nw * 0.5, nh * 0.5), angle, 1)
# 计算原图中心点到新图中心点的偏移量
rot_move = np.dot(rot_mat, np.array([(nw - w) * 0.5, (nh - h) * 0.5, 0]))
# 更新仿射矩阵
rot_mat[0, 2] += rot_move[0]
rot_mat[1, 2] += rot_move[1]
# 仿射变换
rot_img = cv2.warpAffine(img, rot_mat, (int(math.ceil(nw)), int(math.ceil(nh))), flags=cv2.INTER_LANCZOS4)
# ---------------------- 矫正bbox坐标 ----------------------
# rot_mat是最终的旋转矩阵
# 获取原始bbox的四个中点,然后将这四个点转换到旋转后的坐标系下
rot_text_polys = list()
for bbox in text_polys:
point1 = np.dot(rot_mat, np.array([bbox[0, 0], bbox[0, 1], 1]))
point2 = np.dot(rot_mat, np.array([bbox[1, 0], bbox[1, 1], 1]))
point3 = np.dot(rot_mat, np.array([bbox[2, 0], bbox[2, 1], 1]))
point4 = np.dot(rot_mat, np.array([bbox[3, 0], bbox[3, 1], 1]))
rot_text_polys.append([point1, point2, point3, point4])
return rot_img, np.array(rot_text_polys, dtype=np.float32)
4. 改变图像的属性
亮度(brightness)、对比度(contrast)、饱和度(saturation)和色调(hue)
# 单独设置
# 随机改变图像的亮度
brightness_change = transforms.ColorJitter(brightness=0.5)
# 随机改变图像的色调
hue_change = transforms.ColorJitter(hue=0.5)
# 随机改变图像的对比度
contrast_change = transforms.ColorJitter(contrast=0.5)