【可食用系列】按四边形标注从图像上剪裁出矩形文字标注

最新推荐文章于 2023-02-25 11:06:17 发布

vivigreeeen

最新推荐文章于 2023-02-25 11:06:17 发布

阅读量328

点赞数

分类专栏：可食用系列文章标签：计算机视觉 css3 几何学

本文链接：https://blog.csdn.net/vivigreeeen/article/details/123257101

版权

可食用系列专栏收录该内容

2 篇文章 0 订阅

订阅专栏

以MLT19数据集为例，希望把场景文本图片，根据四边形的标注剪裁出矩形的文字框并生成对应标注（本文还加入了分语种的功能）。

之前看到的很多方法思路是这样的：先生成四边形的最小外接矩形，确定旋转角度和中心点，根据角度旋转整张图片，然后算出旋转后的坐标，但是这种方法的问题是，在旋转过程中，超出边缘的部分会被剪裁掉，导致图片不完整，且四个点对应的旋转后的坐标可能是负的，无法进行下一步剪裁工作。

而如果补全图片，旋转后的坐标又不好计算。

所以本文的方法是，先生成四边形的最小外接矩形a（这时候很可能是一个斜着的矩形），根据矩形的最宽和最长的地方对整个图片进行剪裁，得到一个横平竖直的矩形b（也就是a的外接矩形），再根据a确定旋转角度和中心点，在不剪裁图片的情况下对b进行旋转，保证b中的文字区域是水平或竖直的，然后再计算文字区域旋转后的坐标（只需要旋转中心和长宽就能计算），根据坐标进行剪裁。

举个例子。

原图是这样的：

我们本次要剪裁的对象是电话右边的那个拉丁文。

矩形b是这样的：

旋转后是这样的：

剪裁后是这样的：

懒得写了，附上代码，自己悟吧。

import cv2
import numpy as np
import math
import os

def imagecrop(image,box):
      xs = [x[1] for x in box]
      ys = [x[0] for x in box]
      min_x = min(xs) if min(xs)>=0 else 0
      min_y = min(ys) if min(ys)>=0 else 0
      cropimage = image[min_x:max(xs),min_y:max(ys)]
      return cropimage, min_y, min_x

def write_label(lang,  gt_label, img_name, i):
    if lang == 'Arabic':
        f1.write(img_name + "_" + str(i) + ".jpg\t" + gt_label)
    elif lang == 'Bangla':
        f2.write(img_name + "_" + str(i) + ".jpg\t" + gt_label)
    elif lang =='Chinese':
        f3.write(img_name + "_" + str(i) + ".jpg\t" + gt_label)
    elif lang =='Hindi':
        f4.write(img_name + "_" + str(i) + ".jpg\t" + gt_label)
    elif lang =='Latin':
        f5.write(img_name + "_" + str(i) + ".jpg\t" + gt_label)
    elif lang =='Korean':
        f6.write(img_name + "_" + str(i) + ".jpg\t" + gt_label)
    elif lang =='Japanese':
        f7.write(img_name + "_" + str(i) + ".jpg\t" + gt_label)
    elif lang =='Sysmbols':
        f8.write(img_name + "_" + str(i) + ".jpg\t" + gt_label)
    else:
        f9.write(img_name + "_" + str(i) + ".jpg\t" + gt_label)


img_path = './detection/imgs/'
gt_path = './detection/gt/'
save_path = './cropped_gm/'
f1 = open("Arabic_label.txt","w")
f2 = open("Bangla_label.txt","w")
f3 = open("Chinese_label.txt","w")
f4 = open("Hindi_label.txt","w")
f5 = open("Latin_label.txt","w")
f6 = open("Korean_label.txt","w")
f7 = open("Japanese_label.txt","w")
f8 = open("Sysmbols_label.txt","w")
f9 = open("Mixed_label.txt","w")
err = open("err.txt","w")

files = os.listdir(gt_path)
files.sort()

for ff in files:
    img_name = str(ff).split('.')[0]
    gt_file = open(gt_path+ff, "r")
    lines = gt_file.readlines()
    img = cv2.imread(img_path+img_name+".jpg")
    if img is None:
        print("{} is destoryed.".format(img_name))
        err.write(img_name + "is destoryed.\n")
        continue
    i = 0
    for line in lines:
        gt_label = line.split(",",9)[9]
        gt_lang = line.split(",",9)[8]
        #print(gt_label,gt_lang)
        if gt_lang == "None" or gt_label=="###\n" or gt_label=="###":
            i = i+1
            continue
        x1, y1, x2, y2, x3, y3, x4, y4 = map(int, line.split(",",9)[0:8]) #读入四个点坐标
        if x1 == x2 == x3 == x4 or y1 == y2 ==y3 ==y4:
            print("{}_{} is not an image.".format(img_name,i))
            err.write("{}_{} is not an image.\n".format(img_name,i))
            i = i+1
            continue
        write_label(gt_lang, gt_label, img_name, i)
        cnt = np.array([[x1,y1],[x2,y2],[x3,y3],[x4,y4]])
        rect = cv2.minAreaRect(cnt) #确定原始四个点的最小外接矩形的中心点、长宽、旋转角度
        box_origin = cv2.boxPoints(rect) #确定矩形的四个顶点坐标
        img_crop, minx, miny = imagecrop(img, np.int0(box_origin))

        image_crop_center_x = rect[0][0]-minx
        image_crop_center_y = rect[0][1]-miny
        box_new = box_origin - [minx, miny]
        M = cv2.getRotationMatrix2D((image_crop_center_x, image_crop_center_y),rect[2],1) #获得旋转变换矩阵
        abs_cos = abs(M[0,0]) 
        abs_sin = abs(M[0,1])
        bound_w = int(img_crop.shape[0] * abs_sin + img_crop.shape[1] * abs_cos)
        bound_h = int(img_crop.shape[0] * abs_cos + img_crop.shape[1] * abs_sin)
        M[0, 2] += bound_w/2 - image_crop_center_x
        M[1, 2] += bound_h/2 - image_crop_center_y
        #cv2.imshow("img_crop",img_crop)

        dst = cv2.warpAffine(img_crop,M,(bound_w,bound_h))
        #cv2.imshow("dst",dst)
        cen_dx = dst.shape[1]/2
        cen_dy = dst.shape[0]/2
        top = int(cen_dx - rect[1][0]/2)
        bot = int(cen_dx + rect[1][0]/2)
        left = int(cen_dy - rect[1][1]/2)
        right = int(cen_dy + rect[1][1]/2)
        box = [[top, left],[top, right],[bot, left],[bot, right]]

        img_rot, xx ,yy = imagecrop(dst,np.int0(box))

        if img_rot.shape[0]>img_rot.shape[1]*2:
            img_rot = cv2.rotate(img_rot, cv2.cv2.ROTATE_90_CLOCKWISE)

        out_path = save_path + gt_lang + '/' + img_name + '_' + str(i) + '.jpg'
        print(save_path + gt_lang + '/' + img_name + '_' + str(i) + '.jpg saved.')
        try:
            cv2.imwrite(out_path, img_rot)
        except:
            print("{}_{} is empty at cropping step.".format(img_name,i))
            err.write("{}_{} is empty at cropping step.\n".format(img_name,i))
        #cv2.waitKey(0)
        i = i+1
    #cv2.destroyAllWindows()
gt_file.close()
f1.close()
f2.close()
f3.close()
f4.close()
f5.close()
f6.close()
f7.close()
f8.close()
f9.close()
err.close()

vivigreeeen

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
【可食用系列】按四边形标注从图像上剪裁出矩形文字标注

以MLT19数据集为例，希望把场景文本图片，根据四边形的标注剪裁出矩形的文字框并生成对应标注（本文还加入了分语种的功能）。之前看到的很多方法思路是这样的：先生成四边形的最小外接矩形，确定旋转角度和中心点，根据角度旋转整张图片，然后算出旋转后的坐标，但是这种方法的问题是，在旋转过程中，超出边缘的部分会被剪裁掉，导致图片不完整，且四个点对应的旋转后的坐标可能是负的，无法进行下一步剪裁工作。而如果补全图片，旋转后的坐标又不好计算。所以本文的方法是，先生成四边形的最小外接矩形a（这时候很可能是一个斜着的
复制链接

扫一扫