在京东的实习工作中遇到了一个业务场景,做下简单的描述:
对这样的图片,知道每个文字对应的bbox和label,需要将它裁剪出来,作为单条数据来组成训练集。
但是问题是: 比如“秀女坊商行专柜代购”都是倾斜的,没法直接裁剪。
解决思路: 根据bbox获得最小外接矩形,先将外接矩形裁剪出来,然后对得到的图片做仿射变化,然后根据现在的bbox估算每个词条的宽度,根据宽度,从终点再次裁剪,得到最终的结果。
原图如下:
结果如下:
代码如下:
import cv2
import os
import math
def getAngle(xmin, ymin, xmax, ymax):
a = abs(xmax - xmin)
b = abs(ymax - ymin)
c = math.sqrt(a**2 + b**2)
angle = math.degrees(math.acos((a*a-b*b-c*c)/(-2*b*c)))
return angle
def clipRotate(img, x1, y1, x2, y2, x3, y3, x4, y4):
xmin = min(x1, x2, x3, x4)
ymin = min(y1, y2, y3, y4)
xmax = max(x1, x2, x3, x4)
ymax = max(y1, y2, y3, y4)
angle = getAngle(xmin, ymin, xmax, ymax)
# print(xmin, ymin, xmax, ymax)
# ##### 切分图片 #####
clip = img[ymin:ymax, xmin:xmax, :]
# rotate image
rows, cols = clip.shape[:2]
M = cv2.getRotationMatrix2D((cols/2,rows/2), angle-90, 0.8)
clip = cv2.warpAffine(clip, M, (cols,rows), borderValue=(255, 255, 255))
# ##### 进一步裁剪得到条形的图片 #####
x = abs(x3 - x2)
y = abs(y3 - y2)
dis = math.sqrt((x**2 + y**2))
yy1 = int(rows//2-dis//2)
yy2 = int(rows//2+dis//2)
th = 2
clip = clip[yy1-th:yy2+th, :cols, :]
return clip
def clip(img_path, bbox_path, save_path = None):
# ##### 读取1.txt #####
with open(bbox_path, 'r', encoding='utf8') as f:
bbox_infos = f.readlines()
# print(bbox_infos)
# ##### 读取图片 #####
img = cv2.imread(img_path, -1)
for i, info in enumerate(bbox_infos):
info = info.strip().split(',')
bbox = info[:-1]
name = info[-1]
# print(bbox, name)
clip_save_path = '.\gene_cut_res\\'
label_path = '.\gene_cut_res\\label.txt'
x1, y1, x2, y2, x3, y3, x4, y4 = tuple(bbox)
x1, y1, x2, y2, x3, y3, x4, y4 = int(x1), int(y1), int(x2), int(y2), int(x3), int(y3), int(x4), int(y4)
print(x1, y1, x2, y2, x3, y3, x4, y4)
# 添加是否需要旋转的判断
if abs(y2 - y1) > 3:
# 需要对切分出来的图片旋转
clip = clipRotate(img, x1, y1, x2, y2, x3, y3, x4, y4)
else:
# ##### 切分图片 #####
clip = img[y1:y3, x1:x3, :]
# save clip
clip_save_path = os.path.join(clip_save_path, str(i)+'.png')
print(clip_save_path, clip.shape)
cv2.imwrite(clip_save_path, clip)
# construct label
with open(label_path, 'a', encoding='utf8') as lf:
lf.write(clip_save_path + ' ' + str(name) + '\n')
if __name__ == '__main__':
# 实际应用cut的路径:img:dataHub/dataHub/AliComp/images,gt: dataHub/dataHub/AliComp/gts
img_path = 'D:\maozan1\Desktop\JDWork\\vscode\PDF\\9023.jpg'
bbox_path = 'D:\maozan1\Desktop\JDWork\\vscode\PDF\\9023.txt'
# save = AliComp
clip(img_path, bbox_path)