如何更高效的标注文字检测算法的label(二)

     在做自然场景下的文字检测算法时,第一步就是要标注文字位置的label,如果手动从头开始标注就太累了,所以我们小组总结出一种方法,试了试比较高效,因此分享出来。

    先使用一个基础的baseline算法,然后把真实的样本过一遍baseline的模型,每张样本图片会生成相应的bounddingbox信息的txt文件,接着使用脚本将这些txt文件反转成标注软件能识别的xml格式(或者json),然后将这些xml格式的文件导入到标注软件,打开标注软件的时候,就可以看到bounddingbox在图片上的编辑位置,最后就可以对这些方框进行编辑修改,正确的可以忽略,错误的进行调整。

    对于标注软件,我们使用过标注精灵助手IphotoDraw两款软件,最后还是觉得IphotoDraw功能比较强大和方便,所以最后选择了IphotoDraw作为标注软件,下面是将baseline模型跑的boundingbox位置点信息txt文件,转成IphotoDraw能识别的xml格式文件:

代码如下:

# -*- coding: utf-8 -*-
"""
Created on Fri Feb  2 09:57:25 2018

@author: new
"""
import os
import math
import numpy as np

# 将txt中所有四边形角点生成对应xml文件,进行标注

def endWith(s,*endstring):
    array=map(s.endswith,endstring)
    if True in array:
        return True
    else:
        return False

def writeXml(xmlName,txtName,imageName):
    xml_file=open(xmlName,'w',encoding='utf-8')
    xml_file.write('<!--Document-->\n')
    xml_file.write('<Document FileVersion="1.0">\n')
    xml_file.write('    <ExportImageSettings FileName='+'"'+imageName+'"'+'/>\n')
    xml_file.write('    <Layers>\n')
    xml_file.write('    <Layer Name="Layer1" Visible="True" LockedShapesIndex="">\n')
    xml_file.write('        <Shapes>\n')

    fopen=open(txtName,encoding='utf-8')
    for line in fopen.readlines():
        # txtData = line.split(',')
        # print(txtData)

        p,w,h,angle,label = get_coords(line)
        if w > 0 and h > 0:
            num = 3
            text = label
            xml_file.write('<Shape Type="Rectangle">\n')
            xml_file.write('<Settings>\n')
            xml_file.write('<MiscSettings GroupRendering="Unknown" />\n')
            xml_file.write('<Font Name="Arial" Size="4" Style="Regular">\n')
            xml_file.write('<Color Alpha="255" R="0" G="0" B="0" />\n')
            xml_file.write('</Font>\n')

            xml_file.write('<Line Width="1" Dash="Solid" Join="Round" OutlineType="Color" DashOffset="False" StartRoundCap="False" EndRoundCap="False">\n')
            if int(float(num)) % 3 == 0:
                xml_file.write('<Color Alpha="255" R="26" G="170" B="66" />\n')
            elif int(float(num)) % 3 == 1:
                xml_file.write('<Color Alpha="255" R="34" G="72" B="234" />\n')
            elif int(float(num)) % 3 == 2:
                xml_file.write('<Color Alpha="255" R="255" G="0" B="0" />\n')
            xml_file.write('</Line>\n')


            xml_file.write('<Fill FillType="None">\n')
            # if int(float(num)) % 3 == 0:
            #     xml_file.write('<Color Alpha="58" R="26" G="170" B="66" />\n')
            # elif int(float(num)) % 3 == 1:
            #     xml_file.write('<Color Alpha="58" R="34" G="72" B="234" />\n')
            # elif int(float(num)) % 3 == 2:
            #     xml_file.write('<Color Alpha="58" R="234" G="22" B="30" />\n')
            xml_file.write('<Color Alpha="255" R="255" G="255" B="255" />\n')
            xml_file.write('<GradientSettings Type="Linear" Angle="0" HorizontalOffset="0" VerticalOffset="0" StartExtension="0" EndExtension="0" BoundaryResize="100">\n')
            xml_file.write('<StartingColor Alpha="255" R="0" G="0" B="0" />\n')
            xml_file.write('<EndingColor Alpha="255" R="255" G="255" B="255" />\n')
            xml_file.write('<Blend />')
            xml_file.write('</GradientSettings>')

            xml_file.write('<EmbeddedImage Align="Center" ImageFillType="Stretch" Alpha="255" FileName="">\n')
            xml_file.write('<StretchSettings Type="KeepOriginalSize" Align="Center" ZoomFactor="100">\n')
            xml_file.write('<Offset X="0" Y="0" />\n')
            xml_file.write('</StretchSettings>\n')
            xml_file.write('<TileSettings WrapMode="Tile">\n')
            xml_file.write('<Offset X="0" Y="0" />\n')
            xml_file.write('</TileSettings>\n')
            xml_file.write('<ImageOptions Rotation="0">\n')
            xml_file.write('<Flip HorizontalFlip="False" VerticalFlip="False" />\n')
            xml_file.write('</ImageOptions>\n')
            xml_file.write('<ImageData><![CDATA[]]></ImageData>\n')
            xml_file.write('</EmbeddedImage>\n')
            xml_file.write('</Fill>\n')


            xml_file.write('<TextEffect UseTextEffect="False" />\n')
            xml_file.write('<EffectSettings>\n')
            xml_file.write('<Shadow UseShadow="False" Angle="45" Offset="5" Size="100" BlurLevel="0">\n')
            xml_file.write('<Color Alpha="255" R="0" G="0" B="0" />\n')
            xml_file.write('</Shadow>\n')
            xml_file.write('<Glow UseGlow="False" BlurLevel="20" Thickness="8">\n')
            xml_file.write('<Color Alpha="255" R="29" G="199" B="244" />\n')
            xml_file.write('</Glow>\n')
            xml_file.write('<WavyLine UseWavyLine="False" WavePattern="CosineSmooth" Ridges="5" Height="20" VerticalFlip="False" OffsetAtStartPoint="0" OffsetAtEndPoint="0" />\n')
            xml_file.write('</EffectSettings>\n')
            xml_file.write('</Settings>\n')
            xml_file.write('<BlockText Align="Center" VerticalAlign="Middle" RightToLeft="Unknown">\n')
            xml_file.write('<Text>' + text + '</Text>\n')
            xml_file.write('<Margin Left="0" Top="0" Right="0" Bottom="0" />\n')
            xml_file.write('</BlockText>\n')

            xml_file.write('<Data IsRoundCorner="False" RoundCornerRadius="0" Rotation="' + str(angle) +'">\n')
            xml_file.write('<Extent X=')
            xml_file.write('"' + str(p[0]) + '"')
            xml_file.write(' Y=')
            xml_file.write('"' + str(p[1]) + '"')
            xml_file.write(' Width=')
            xml_file.write('"' + str(w) + '"')
            xml_file.write(' Height=')
            xml_file.write('"' + str(h) + '"')
            xml_file.write('/>\n')
            xml_file.write('</Data>\n')
            xml_file.write('</Shape>\n')


    xml_file.write('</Shapes>\n')
    xml_file.write('</Layer>\n')
    xml_file.write(' </Layers>\n')
    xml_file.write('<Snapshots />\n')
    xml_file.write('</Document>')
    xml_file.close()
    fopen.close()


def get_new_coord(center_coord,ori_coord,rotate_angle):
    x_new = (ori_coord[0]-center_coord[0])*math.cos((rotate_angle/180.)*math.pi)+(ori_coord[1]-center_coord[1])*math.sin((rotate_angle/180.)*math.pi)+center_coord[0]
    y_new = (ori_coord[1]-center_coord[1])*math.cos((rotate_angle/180.)*math.pi)-(ori_coord[0]-center_coord[0])*math.sin((rotate_angle/180.)*math.pi)+center_coord[1]
    return x_new,y_new


def get_rotation_coord(iphotodraw_result):
    result = []
    center_X = 1/2*(iphotodraw_result[0]+iphotodraw_result[0]+iphotodraw_result[2])
    center_Y = 1/2*(iphotodraw_result[1]+iphotodraw_result[1]+iphotodraw_result[3])
    x1,y1 = iphotodraw_result[0],iphotodraw_result[1]
    x2,y2 = iphotodraw_result[0]+iphotodraw_result[2],iphotodraw_result[1]
    x3,y3 = iphotodraw_result[0]+iphotodraw_result[2],iphotodraw_result[1]+iphotodraw_result[3]
    x4,y4 = iphotodraw_result[0],iphotodraw_result[1]+iphotodraw_result[3]
    result.extend(cal_coord((center_X,center_Y),(x1,y1),iphotodraw_result[-1]))
    result.extend(cal_coord((center_X,center_Y),(x2,y2),iphotodraw_result[-1]))
    result.extend(cal_coord((center_X,center_Y),(x3,y3),iphotodraw_result[-1]))
    result.extend(cal_coord((center_X,center_Y),(x4,y4),iphotodraw_result[-1]))
    return result

def cal_coord(center_coord,ori_coord,angle):
    angle = angle*math.pi/180
    out_x = math.cos(angle)*(ori_coord[0]-center_coord[0])-math.sin(angle)*(ori_coord[1]-center_coord[1])+center_coord[0]
    out_y = math.sin(angle)*(ori_coord[0]-center_coord[0])+math.cos(angle)*(ori_coord[1]-center_coord[1])+center_coord[1]
    return [out_x,out_y]

def coord_to_iphotodrawFormat(bbox):
    ### bbox shape 1*8
    angle = math.atan((bbox[3]-bbox[1])/(bbox[2]-bbox[0]))*(180/math.pi)
    width = math.sqrt((bbox[3]-bbox[1])**2+(bbox[2]-bbox[0])**2)
    height = math.sqrt((bbox[5]-bbox[3])**2+(bbox[4]-bbox[2])**2)
    center_coord = [1 / 2 * (bbox[0] + bbox[4]), 1 / 2 * (bbox[1] + bbox[5])]
    ori_coord = [bbox[0], bbox[1]]
    ori_coord = cal_coord(center_coord, ori_coord,-math.atan((bbox[3] - bbox[1]) / (bbox[2] - bbox[0])) * 180 / math.pi)
    return (ori_coord[0],ori_coord[1]),width,height,angle


def get_coords(line):
    """
    文本检测结果
    :param txt_path: CTPN结果路径
    :return:
    """

    try:
        label = line.strip().split(',')[-1]
        point1_x = int(line.strip().split(',')[0]) #右下角点,顺时针
        point1_y = int(line.strip().split(',')[1])
        point1 = [point1_x,point1_y]

        point2_x = int(line.strip().split(',')[2])
        point2_y = int(line.strip().split(',')[3])
        point2 = [point2_x, point2_y]

        point3_x = int(line.strip().split(',')[4])
        point3_y = int(line.strip().split(',')[5])
        point3 = [point3_x, point3_y]

        point4_x = int(line.strip().split(',')[6])
        point4_y = int(line.strip().split(',')[7])
        point4 = [point4_x, point4_y]

        bbox = []
        bbox.extend(point1)
        bbox.extend(point2)
        bbox.extend(point3)
        bbox.extend(point4)
        p,w,h,angle = coord_to_iphotodrawFormat(bbox)
        return p, w, h, angle, label
    except:
        return (0,0), 0, 0, 0, '#'



def sortPoint(points,center):
    '''
    将四边形的四个角点进行排序,返回 左上角,左下角,右下角,右上角
    :param point_list:
    :return:
    '''
    idx_list = np.where(np.array(points)[:, 0] > center[0])[0]
    right_point = [points[idx] for idx in idx_list]
    right_point = sorted(right_point, key=lambda x: x[1])
    right_up = right_point[0]
    right_bottom = right_point[1]

    idx_list = np.where(np.array(points)[:, 0] < center[0])[0]
    left_point = [points[idx] for idx in idx_list]
    left_point = sorted(left_point, key=lambda x: x[1])
    left_up = left_point[0]
    left_bottom = left_point[1]
    return left_up,left_bottom,right_bottom,right_up

import cv2

if __name__=='__main__':
    #生成的xml在basepath文件夹下
    basepath = r'xxx'
    out_path = r'xxx'
    import shutil
    from PIL import Image

    if not os.path.exists(out_path):
        os.makedirs(out_path)


    # jpgNames = os.listdir(basepath)
    # for name in jpgNames:
    #     if endWith(name,'.txt'):
    #
    #         xmlName = name.split('.')[0].split('_')[0] + '_data.xml'
    #         imageName = name.split('.')[0].split('_')[0] + '.jpg'
    #
    #         if os.path.exists(os.path.join(basepath,imageName)):
    #             xmlName = os.path.join(basepath,xmlName)
    #             txtName = os.path.join(basepath,name)
    #             writeXml(xmlName,txtName,imageName)

    jpgNames = os.listdir(basepath)
    for name in jpgNames:
        if endWith(name, '.txt'):
            print(name)
            xmlName = name.replace('.txt', '_data.xml')
            # IMIMkey = name.replace('_gt_for_xml.txt','').split('_')[-1]
            imageName = name.replace('.txt', '.jpg')

            if os.path.exists(os.path.join(basepath, imageName)):
                xmlName = os.path.join(out_path, xmlName)
                txtName = os.path.join(basepath, name)

                # image = Image.open(os.path.join(basepath, imageName))
                # image = image.convert('RGB')
                # image = np.array(image)
                image = cv2.imread(os.path.join(basepath, imageName))
                cv2.imwrite(os.path.join(out_path, imageName),image)

                # shutil.copy( imageName,os.path.join(out_path,name.replace('_gt_for_xml.txt', '.jpg') ))
                writeXml(xmlName, txtName, imageName)

 

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值