以MLT19数据集为例,希望把场景文本图片,根据四边形的标注剪裁出矩形的文字框并生成对应标注(本文还加入了分语种的功能)。
之前看到的很多方法思路是这样的:先生成四边形的最小外接矩形,确定旋转角度和中心点,根据角度旋转整张图片,然后算出旋转后的坐标,但是这种方法的问题是,在旋转过程中,超出边缘的部分会被剪裁掉,导致图片不完整,且四个点对应的旋转后的坐标可能是负的,无法进行下一步剪裁工作。
而如果补全图片,旋转后的坐标又不好计算。
所以本文的方法是,先生成四边形的最小外接矩形a(这时候很可能是一个斜着的矩形),根据矩形的最宽和最长的地方对整个图片进行剪裁,得到一个横平竖直的矩形b(也就是a的外接矩形),再根据a确定旋转角度和中心点,在不剪裁图片的情况下对b进行旋转,保证b中的文字区域是水平或竖直的,然后再计算文字区域旋转后的坐标(只需要旋转中心和长宽就能计算),根据坐标进行剪裁。
举个例子。
原图是这样的:
我们本次要剪裁的对象是电话右边的那个拉丁文。
矩形b是这样的:
旋转后是这样的:
剪裁后是这样的:
懒得写了,附上代码,自己悟吧。
import cv2
import numpy as np
import math
import os
def imagecrop(image,box):
xs = [x[1] for x in box]
ys = [x[0] for x in box]
min_x = min(xs) if min(xs)>=0 else 0
min_y = min(ys) if min(ys)>=0 else 0
cropimage = image[min_x:max(xs),min_y:max(ys)]
return cropimage, min_y, min_x
def write_label(lang, gt_label, img_name, i):
if lang == 'Arabic':
f1.write(img_name + "_" + str(i) + ".jpg\t" + gt_label)
elif lang == 'Bangla':
f2.write(img_name + "_" + str(i) + ".jpg\t" + gt_label)
elif lang =='Chinese':
f3.write(img_name + "_" + str(i) + ".jpg\t" + gt_label)
elif lang =='Hindi':
f4.write(img_name + "_" + str(i) + ".jpg\t" + gt_label)
elif lang =='Latin':
f5.write(img_name + "_" + str(i) + ".jpg\t" + gt_label)
elif lang =='Korean':
f6.write(img_name + "_" + str(i) + ".jpg\t" + gt_label)
elif lang =='Japanese':
f7.write(img_name + "_" + str(i) + ".jpg\t" + gt_label)
elif lang =='Sysmbols':
f8.write(img_name + "_" + str(i) + ".jpg\t" + gt_label)
else:
f9.write(img_name + "_" + str(i) + ".jpg\t" + gt_label)
img_path = './detection/imgs/'
gt_path = './detection/gt/'
save_path = './cropped_gm/'
f1 = open("Arabic_label.txt","w")
f2 = open("Bangla_label.txt","w")
f3 = open("Chinese_label.txt","w")
f4 = open("Hindi_label.txt","w")
f5 = open("Latin_label.txt","w")
f6 = open("Korean_label.txt","w")
f7 = open("Japanese_label.txt","w")
f8 = open("Sysmbols_label.txt","w")
f9 = open("Mixed_label.txt","w")
err = open("err.txt","w")
files = os.listdir(gt_path)
files.sort()
for ff in files:
img_name = str(ff).split('.')[0]
gt_file = open(gt_path+ff, "r")
lines = gt_file.readlines()
img = cv2.imread(img_path+img_name+".jpg")
if img is None:
print("{} is destoryed.".format(img_name))
err.write(img_name + "is destoryed.\n")
continue
i = 0
for line in lines:
gt_label = line.split(",",9)[9]
gt_lang = line.split(",",9)[8]
#print(gt_label,gt_lang)
if gt_lang == "None" or gt_label=="###\n" or gt_label=="###":
i = i+1
continue
x1, y1, x2, y2, x3, y3, x4, y4 = map(int, line.split(",",9)[0:8]) #读入四个点坐标
if x1 == x2 == x3 == x4 or y1 == y2 ==y3 ==y4:
print("{}_{} is not an image.".format(img_name,i))
err.write("{}_{} is not an image.\n".format(img_name,i))
i = i+1
continue
write_label(gt_lang, gt_label, img_name, i)
cnt = np.array([[x1,y1],[x2,y2],[x3,y3],[x4,y4]])
rect = cv2.minAreaRect(cnt) #确定原始四个点的最小外接矩形的中心点、长宽、旋转角度
box_origin = cv2.boxPoints(rect) #确定矩形的四个顶点坐标
img_crop, minx, miny = imagecrop(img, np.int0(box_origin))
image_crop_center_x = rect[0][0]-minx
image_crop_center_y = rect[0][1]-miny
box_new = box_origin - [minx, miny]
M = cv2.getRotationMatrix2D((image_crop_center_x, image_crop_center_y),rect[2],1) #获得旋转变换矩阵
abs_cos = abs(M[0,0])
abs_sin = abs(M[0,1])
bound_w = int(img_crop.shape[0] * abs_sin + img_crop.shape[1] * abs_cos)
bound_h = int(img_crop.shape[0] * abs_cos + img_crop.shape[1] * abs_sin)
M[0, 2] += bound_w/2 - image_crop_center_x
M[1, 2] += bound_h/2 - image_crop_center_y
#cv2.imshow("img_crop",img_crop)
dst = cv2.warpAffine(img_crop,M,(bound_w,bound_h))
#cv2.imshow("dst",dst)
cen_dx = dst.shape[1]/2
cen_dy = dst.shape[0]/2
top = int(cen_dx - rect[1][0]/2)
bot = int(cen_dx + rect[1][0]/2)
left = int(cen_dy - rect[1][1]/2)
right = int(cen_dy + rect[1][1]/2)
box = [[top, left],[top, right],[bot, left],[bot, right]]
img_rot, xx ,yy = imagecrop(dst,np.int0(box))
if img_rot.shape[0]>img_rot.shape[1]*2:
img_rot = cv2.rotate(img_rot, cv2.cv2.ROTATE_90_CLOCKWISE)
out_path = save_path + gt_lang + '/' + img_name + '_' + str(i) + '.jpg'
print(save_path + gt_lang + '/' + img_name + '_' + str(i) + '.jpg saved.')
try:
cv2.imwrite(out_path, img_rot)
except:
print("{}_{} is empty at cropping step.".format(img_name,i))
err.write("{}_{} is empty at cropping step.\n".format(img_name,i))
#cv2.waitKey(0)
i = i+1
#cv2.destroyAllWindows()
gt_file.close()
f1.close()
f2.close()
f3.close()
f4.close()
f5.close()
f6.close()
f7.close()
f8.close()
f9.close()
err.close()