在做自然场景下的文字检测算法时,第一步就是要标注文字位置的label,如果手动从头开始标注就太累了,所以我们小组总结出一种方法,试了试比较高效,因此分享出来。
先使用一个基础的baseline算法,然后把真实的样本过一遍baseline的模型,每张样本图片会生成相应的bounddingbox信息的txt文件,接着使用脚本将这些txt文件反转成标注软件能识别的xml格式(或者json),然后将这些xml格式的文件导入到标注软件,打开标注软件的时候,就可以看到bounddingbox在图片上的编辑位置,最后就可以对这些方框进行编辑修改,正确的可以忽略,错误的进行调整。
对于标注软件,我们使用过标注精灵助手和IphotoDraw两款软件,最后还是觉得IphotoDraw功能比较强大和方便,所以最后选择了IphotoDraw作为标注软件,下面是将baseline模型跑的boundingbox位置点信息txt文件,转成IphotoDraw能识别的xml格式文件:
代码如下:
# -*- coding: utf-8 -*-
"""
Created on Fri Feb 2 09:57:25 2018
@author: new
"""
import os
import math
import numpy as np
# 将txt中所有四边形角点生成对应xml文件,进行标注
def endWith(s,*endstring):
array=map(s.endswith,endstring)
if True in array:
return True
else:
return False
def writeXml(xmlName,txtName,imageName):
xml_file=open(xmlName,'w',encoding='utf-8')
xml_file.write('<!--Document-->\n')
xml_file.write('<Document FileVersion="1.0">\n')
xml_file.write(' <ExportImageSettings FileName='+'"'+imageName+'"'+'/>\n')
xml_file.write(' <Layers>\n')
xml_file.write(' <Layer Name="Layer1" Visible="True" LockedShapesIndex="">\n')
xml_file.write(' <Shapes>\n')
fopen=open(txtName,encoding='utf-8')
for line in fopen.readlines():
# txtData = line.split(',')
# print(txtData)
p,w,h,angle,label = get_coords(line)
if w > 0 and h > 0:
num = 3
text = label
xml_file.write('<Shape Type="Rectangle">\n')
xml_file.write('<Settings>\n')
xml_file.write('<MiscSettings GroupRendering="Unknown" />\n')
xml_file.write('<Font Name="Arial" Size="4" Style="Regular">\n')
xml_file.write('<Color Alpha="255" R="0" G="0" B="0" />\n')
xml_file.write('</Font>\n')
xml_file.write('<Line Width="1" Dash="Solid" Join="Round" OutlineType="Color" DashOffset="False" StartRoundCap="False" EndRoundCap="False">\n')
if int(float(num)) % 3 == 0:
xml_file.write('<Color Alpha="255" R="26" G="170" B="66" />\n')
elif int(float(num)) % 3 == 1:
xml_file.write('<Color Alpha="255" R="34" G="72" B="234" />\n')
elif int(float(num)) % 3 == 2:
xml_file.write('<Color Alpha="255" R="255" G="0" B="0" />\n')
xml_file.write('</Line>\n')
xml_file.write('<Fill FillType="None">\n')
# if int(float(num)) % 3 == 0:
# xml_file.write('<Color Alpha="58" R="26" G="170" B="66" />\n')
# elif int(float(num)) % 3 == 1:
# xml_file.write('<Color Alpha="58" R="34" G="72" B="234" />\n')
# elif int(float(num)) % 3 == 2:
# xml_file.write('<Color Alpha="58" R="234" G="22" B="30" />\n')
xml_file.write('<Color Alpha="255" R="255" G="255" B="255" />\n')
xml_file.write('<GradientSettings Type="Linear" Angle="0" HorizontalOffset="0" VerticalOffset="0" StartExtension="0" EndExtension="0" BoundaryResize="100">\n')
xml_file.write('<StartingColor Alpha="255" R="0" G="0" B="0" />\n')
xml_file.write('<EndingColor Alpha="255" R="255" G="255" B="255" />\n')
xml_file.write('<Blend />')
xml_file.write('</GradientSettings>')
xml_file.write('<EmbeddedImage Align="Center" ImageFillType="Stretch" Alpha="255" FileName="">\n')
xml_file.write('<StretchSettings Type="KeepOriginalSize" Align="Center" ZoomFactor="100">\n')
xml_file.write('<Offset X="0" Y="0" />\n')
xml_file.write('</StretchSettings>\n')
xml_file.write('<TileSettings WrapMode="Tile">\n')
xml_file.write('<Offset X="0" Y="0" />\n')
xml_file.write('</TileSettings>\n')
xml_file.write('<ImageOptions Rotation="0">\n')
xml_file.write('<Flip HorizontalFlip="False" VerticalFlip="False" />\n')
xml_file.write('</ImageOptions>\n')
xml_file.write('<ImageData><![CDATA[]]></ImageData>\n')
xml_file.write('</EmbeddedImage>\n')
xml_file.write('</Fill>\n')
xml_file.write('<TextEffect UseTextEffect="False" />\n')
xml_file.write('<EffectSettings>\n')
xml_file.write('<Shadow UseShadow="False" Angle="45" Offset="5" Size="100" BlurLevel="0">\n')
xml_file.write('<Color Alpha="255" R="0" G="0" B="0" />\n')
xml_file.write('</Shadow>\n')
xml_file.write('<Glow UseGlow="False" BlurLevel="20" Thickness="8">\n')
xml_file.write('<Color Alpha="255" R="29" G="199" B="244" />\n')
xml_file.write('</Glow>\n')
xml_file.write('<WavyLine UseWavyLine="False" WavePattern="CosineSmooth" Ridges="5" Height="20" VerticalFlip="False" OffsetAtStartPoint="0" OffsetAtEndPoint="0" />\n')
xml_file.write('</EffectSettings>\n')
xml_file.write('</Settings>\n')
xml_file.write('<BlockText Align="Center" VerticalAlign="Middle" RightToLeft="Unknown">\n')
xml_file.write('<Text>' + text + '</Text>\n')
xml_file.write('<Margin Left="0" Top="0" Right="0" Bottom="0" />\n')
xml_file.write('</BlockText>\n')
xml_file.write('<Data IsRoundCorner="False" RoundCornerRadius="0" Rotation="' + str(angle) +'">\n')
xml_file.write('<Extent X=')
xml_file.write('"' + str(p[0]) + '"')
xml_file.write(' Y=')
xml_file.write('"' + str(p[1]) + '"')
xml_file.write(' Width=')
xml_file.write('"' + str(w) + '"')
xml_file.write(' Height=')
xml_file.write('"' + str(h) + '"')
xml_file.write('/>\n')
xml_file.write('</Data>\n')
xml_file.write('</Shape>\n')
xml_file.write('</Shapes>\n')
xml_file.write('</Layer>\n')
xml_file.write(' </Layers>\n')
xml_file.write('<Snapshots />\n')
xml_file.write('</Document>')
xml_file.close()
fopen.close()
def get_new_coord(center_coord,ori_coord,rotate_angle):
x_new = (ori_coord[0]-center_coord[0])*math.cos((rotate_angle/180.)*math.pi)+(ori_coord[1]-center_coord[1])*math.sin((rotate_angle/180.)*math.pi)+center_coord[0]
y_new = (ori_coord[1]-center_coord[1])*math.cos((rotate_angle/180.)*math.pi)-(ori_coord[0]-center_coord[0])*math.sin((rotate_angle/180.)*math.pi)+center_coord[1]
return x_new,y_new
def get_rotation_coord(iphotodraw_result):
result = []
center_X = 1/2*(iphotodraw_result[0]+iphotodraw_result[0]+iphotodraw_result[2])
center_Y = 1/2*(iphotodraw_result[1]+iphotodraw_result[1]+iphotodraw_result[3])
x1,y1 = iphotodraw_result[0],iphotodraw_result[1]
x2,y2 = iphotodraw_result[0]+iphotodraw_result[2],iphotodraw_result[1]
x3,y3 = iphotodraw_result[0]+iphotodraw_result[2],iphotodraw_result[1]+iphotodraw_result[3]
x4,y4 = iphotodraw_result[0],iphotodraw_result[1]+iphotodraw_result[3]
result.extend(cal_coord((center_X,center_Y),(x1,y1),iphotodraw_result[-1]))
result.extend(cal_coord((center_X,center_Y),(x2,y2),iphotodraw_result[-1]))
result.extend(cal_coord((center_X,center_Y),(x3,y3),iphotodraw_result[-1]))
result.extend(cal_coord((center_X,center_Y),(x4,y4),iphotodraw_result[-1]))
return result
def cal_coord(center_coord,ori_coord,angle):
angle = angle*math.pi/180
out_x = math.cos(angle)*(ori_coord[0]-center_coord[0])-math.sin(angle)*(ori_coord[1]-center_coord[1])+center_coord[0]
out_y = math.sin(angle)*(ori_coord[0]-center_coord[0])+math.cos(angle)*(ori_coord[1]-center_coord[1])+center_coord[1]
return [out_x,out_y]
def coord_to_iphotodrawFormat(bbox):
### bbox shape 1*8
angle = math.atan((bbox[3]-bbox[1])/(bbox[2]-bbox[0]))*(180/math.pi)
width = math.sqrt((bbox[3]-bbox[1])**2+(bbox[2]-bbox[0])**2)
height = math.sqrt((bbox[5]-bbox[3])**2+(bbox[4]-bbox[2])**2)
center_coord = [1 / 2 * (bbox[0] + bbox[4]), 1 / 2 * (bbox[1] + bbox[5])]
ori_coord = [bbox[0], bbox[1]]
ori_coord = cal_coord(center_coord, ori_coord,-math.atan((bbox[3] - bbox[1]) / (bbox[2] - bbox[0])) * 180 / math.pi)
return (ori_coord[0],ori_coord[1]),width,height,angle
def get_coords(line):
"""
文本检测结果
:param txt_path: CTPN结果路径
:return:
"""
try:
label = line.strip().split(',')[-1]
point1_x = int(line.strip().split(',')[0]) #右下角点,顺时针
point1_y = int(line.strip().split(',')[1])
point1 = [point1_x,point1_y]
point2_x = int(line.strip().split(',')[2])
point2_y = int(line.strip().split(',')[3])
point2 = [point2_x, point2_y]
point3_x = int(line.strip().split(',')[4])
point3_y = int(line.strip().split(',')[5])
point3 = [point3_x, point3_y]
point4_x = int(line.strip().split(',')[6])
point4_y = int(line.strip().split(',')[7])
point4 = [point4_x, point4_y]
bbox = []
bbox.extend(point1)
bbox.extend(point2)
bbox.extend(point3)
bbox.extend(point4)
p,w,h,angle = coord_to_iphotodrawFormat(bbox)
return p, w, h, angle, label
except:
return (0,0), 0, 0, 0, '#'
def sortPoint(points,center):
'''
将四边形的四个角点进行排序,返回 左上角,左下角,右下角,右上角
:param point_list:
:return:
'''
idx_list = np.where(np.array(points)[:, 0] > center[0])[0]
right_point = [points[idx] for idx in idx_list]
right_point = sorted(right_point, key=lambda x: x[1])
right_up = right_point[0]
right_bottom = right_point[1]
idx_list = np.where(np.array(points)[:, 0] < center[0])[0]
left_point = [points[idx] for idx in idx_list]
left_point = sorted(left_point, key=lambda x: x[1])
left_up = left_point[0]
left_bottom = left_point[1]
return left_up,left_bottom,right_bottom,right_up
import cv2
if __name__=='__main__':
#生成的xml在basepath文件夹下
basepath = r'xxx'
out_path = r'xxx'
import shutil
from PIL import Image
if not os.path.exists(out_path):
os.makedirs(out_path)
# jpgNames = os.listdir(basepath)
# for name in jpgNames:
# if endWith(name,'.txt'):
#
# xmlName = name.split('.')[0].split('_')[0] + '_data.xml'
# imageName = name.split('.')[0].split('_')[0] + '.jpg'
#
# if os.path.exists(os.path.join(basepath,imageName)):
# xmlName = os.path.join(basepath,xmlName)
# txtName = os.path.join(basepath,name)
# writeXml(xmlName,txtName,imageName)
jpgNames = os.listdir(basepath)
for name in jpgNames:
if endWith(name, '.txt'):
print(name)
xmlName = name.replace('.txt', '_data.xml')
# IMIMkey = name.replace('_gt_for_xml.txt','').split('_')[-1]
imageName = name.replace('.txt', '.jpg')
if os.path.exists(os.path.join(basepath, imageName)):
xmlName = os.path.join(out_path, xmlName)
txtName = os.path.join(basepath, name)
# image = Image.open(os.path.join(basepath, imageName))
# image = image.convert('RGB')
# image = np.array(image)
image = cv2.imread(os.path.join(basepath, imageName))
cv2.imwrite(os.path.join(out_path, imageName),image)
# shutil.copy( imageName,os.path.join(out_path,name.replace('_gt_for_xml.txt', '.jpg') ))
writeXml(xmlName, txtName, imageName)