OCR助手训练少于三个字符的训练方法-CSDN博客

本文链接：https://blog.csdn.net/m0_57218867/article/details/135814583

文章讲述了OCR助手在训练字符时遇到的挑战，尤其是抠图训练的不一致性可能导致识别问题。作者强调了多字符自动分割的优势，并提到了针对单个字符或少于三个字符的训练方法，即通过抠图和复制来扩展训练样本。

摘要由CSDN通过智能技术生成

OCR助手是我们常见的一个训练字符的工具，在训练字符的过程中，对于三个及以上的字符能够很快的自动完成分割识别，能够帮助我们很快的完成训练的目的。

不会改变原图像的大小（Image_size）

在使用抠图完成训练集训练时，往往因为抠图的大小，角度等各方面因素不统一，抠出来的图像对比原图像变形很多，这样训练好的分类器在使用的时候往往会因为和原图像差距过大，出现识别错误或者无法识别的现象，导致在实际的项目中需要实时识别时完全无法使用。

这个训练的样本就是采用抠图完成的，实际效果的话就只有在已经采集的本地图像中可以完全识别，在实时检测中或者其他客户端上识别的效果很差，已经训练过的字符部分也无法识别。

这是采用抠图方式完成训练的代码，一共分成了三个部分，抠图，训练，测试生成的OMC

抠图部分：

* 类别数据集
classes := ['0','1','2','3','4','5','6','7','8','9','.']

*训练集的路径
trainPath  := './train112/'
classesPath := 'E:/工作/数码管训练/类别训练集/'
*train 路径


list_files ('E:/工作/数码管训练/Image', ['files','follow_links','recursive'], ImageFiles)
tuple_regexp_select (ImageFiles, ['\\.(tif|tiff|gif|bmp|jpg|jpeg|jp2|png|pcx|pgm|ppm|pbm|xwd|ima|hobj)$','ignore_case'], ImageFiles)

dev_close_window ()

read_image (Image, ImageFiles[0])
get_image_size (Image, Width, Height)
dev_open_window (0, 0, Width/3, Height/3, 'black', WindowHandle)

for I := 0 to |classes|-1 by 1
    for J := 0 to |ImageFiles|-1 by 1
         

    read_image (Image1, ImageFiles[J])
    disp_message (WindowHandle, '当前类别是'+classes[I], 'image', 12, 12, 'black', 'true')
    disp_message (WindowHandle, '当前图片是'+J+'/'+|ImageFiles|, 'image', 12, 12, 'black', 'true')
     draw_rectangle1 (WindowHandle, Row1, Column1, Row2, Column2)
        gen_rectangle1 (Rectangle, Row1, Column1, Row2, Column2)
        area_center (Rectangle, Area, Row, Column)
        if (Area<5)
            * 没有框字符
            continue
        endif
        * 框了字符 抠图保存到相应类别文件夹中
        * 抠图
        reduce_domain (Image1, Rectangle, ImageReduced)
        * 裁剪抠出来的图
        crop_domain (ImageReduced, ImagePart)
         num:=0
      
         temp:=classesPath+classes[I]+'/'+num+'.png'
        * 保存
        write_image (ImagePart, 'png', 0, \
                     temp)
          num := num + 1
        
        stop ()
          
            
            
    endfor
endfor

创建类别训练集

训练部分



* 类别数据集
classes := ['0','1','2','3','4','5','6','7','8','9','.','P']

*训练集的路径
trainPath  := './train115/'
classesPath := 'E:/工作/数码管训练/类别训练集/'
*train 路径

for I := 0 to |classes|-1 by 1
    *拼接类别数据集的文件夹路径
    classesPath :=classesPath+classes[I]
    
    *读类别文件夹图片
    * Image Acquisition 05: Code generated by Image Acquisition 05
    list_files ('E:/工作/数码管训练/类别训练集', \
                ['files','follow_links','recursive'], ImageFiles)
    tuple_regexp_select (ImageFiles, ['\\.(tif|tiff|gif|bmp|jpg|jpeg|jp2|png|pcx|pgm|ppm|pbm|xwd|ima|hobj)$',\
                         'ignore_case'], ImageFiles)
   
    for Index := 0 to |ImageFiles|-1 by 1
        read_image (Image, ImageFiles[Index])

        threshold (Image, Regions, 217, 255)
       connection (Regions, ConnectedRegions)
       
        select_shape (ConnectedRegions, SelectedRegions,\
                      'area', 'and', 200, 9999999)
     
        union1 (SelectedRegions, RegionUnion)
 ***************************************************************** 
        connection (RegionUnion, ConnectedRegions)
        select_shape (ConnectedRegions, SelectedRegions, \
                      'area', 'and', 5000, 99999)
        union1 (SelectedRegions, RegionUnion)
   *********************************************************     
        *添加到训练文件.trf*
        append_ocr_trainf (RegionUnion, Image, \
                           classes[I], 'train115.trf')
        stop ()
        
   endfor
    
endfor
 read_ocr_trainf_names ('train115.trf', CharacterNames, CharacterCount)
 * 训练模型
 create_ocr_class_mlp (8, 10, 'constant', 'default', \
                      CharacterNames, \
                      80, 'none', 10, 42, OCRHandle)
 *开始训练
 trainf_ocr_class_mlp (OCRHandle, 'train115.trf',\
                       200, 1, 0.01, Error, ErrorLog)
 
 
 write_ocr_class_mlp (OCRHandle, 'train115.omc')

测试部分

这部分是为了验证生成的OMC文件是否能准确识别字符

* Image Acquisition 01: Code generated by Image Acquisition 01
list_files ('E:/工作/数码管训练/Image', ['files','follow_links','recursive'], ImageFiles)
tuple_regexp_select (ImageFiles, ['\\.(tif|tiff|gif|bmp|jpg|jpeg|jp2|png|pcx|pgm|ppm|pbm|xwd|ima|hobj)$','ignore_case'], ImageFiles)
read_ocr_class_mlp ('E:/工作/数码管训练/train115.omc', OCRHandle)
for Index := 0 to |ImageFiles| - 1 by 1
    read_image (Image, ImageFiles[Index])
    * Image Acquisition 01: Do something
    
*     gen_rectangle1 (ROI_0, 363.244, 367.985, 708.025, 847.73)
    gen_rectangle1 (ROI_0, 377.035, 400.648, 714.921, 847.73)
    reduce_domain (Image, ROI_0, ImageReduced)
    
    threshold (ImageReduced, Regions, 3, 255)
    connection (Regions, ConnectedRegions)
    
    sort_region (ConnectedRegions, SortedRegions, \
                 'character', 'true', 'column')
    do_ocr_multi_class_mlp (SortedRegions, ImageReduced,\
                            OCRHandle, Class, Confidence)
    dev_get_window (WindowHandle)
    dev_display (Image)
  dev_set_color ('red')
  set_display_font (3600, 56, 'mono', 'true', 'false')
  disp_message (WindowHandle, Class, 'window', 12, 12, 'black', 'true')

    
    stop ()
endfor

OCR助手训练部分

OCR助手训练的流程：

我们打开一个新的HALCON程序，点击左上方的助手，然后打开新的OCR，就会出现以下界面

我们按照以下界面的步骤操作，第一步加载自己需要训练的图片，第二步选择矩形或者旋转矩形画出自己要识别的字符的区域（一般都是选择矩阵，如果字体倾斜的话，可以选择先在HALCON中处理好图像之后再用OCR进行训练）；第三步是在下面的输入文本框中输入希望读取的文本，注意，此部分只能处理至少三个字符，如下图：

两个字符的可以采用图片拼接的方法或者是抠图训练的方法来进行训练，图片拼接的方法我会写在后面

输入文本后，根据图片类型勾选设置，如字符背景，是否是结构化之类的，此部分配置完成后，点击第五步的快速应用设置，等待一下，简单易识别的字符OCR助手会快速分割识别完成的，如下图

这是OCR自动分割好的数据

然后点击字体，进入训练部分

点击保存，自定义TRF文件名字和存放位置

点击下面的学习输入文本，按照右边的提示框出现的字符输入正确的字符，输入完成后，点击开始训练，会提示未保存的数据要先进行保存，点击保存即可

保存完毕可以在下图红圈那个地方查看训练好的数据

在生成样本后，有时候样本数据会显得比较混乱，比如1的样本中有其他的字符，这样在识别时就会出现识别错误的问题，就需要选中其中错误的样本然后删除，或者粘贴到其他的样本中去，但是这种错误一般只发生在抠图训练中，采用OCR助手训练的样本不会出现此错误

样本太少的话可以点击此处生成变形样本，以对应不同的情况

可以选择自己想要的情况生成不同的样本

查看结果的精准度

后续测试生成的OMC和抠图部分的测试一样

也可以直接生成文本代码查找

* OCR 01: Code generated by OCR 01
* OCR 01: 
* OCR 01: Prepare text model
* OCR 01: 
create_text_model_reader ('manual', [], TextModel)
set_text_model_param (TextModel, 'manual_polarity', 'light_on_dark')
set_text_model_param (TextModel, 'manual_char_width', 94)
set_text_model_param (TextModel, 'manual_char_height', 164)
set_text_model_param (TextModel, 'manual_stroke_width', 16.2)
set_text_model_param (TextModel, 'manual_return_separators', 'false')
set_text_model_param (TextModel, 'manual_uppercase_only', 'true')
set_text_model_param (TextModel, 'manual_fragment_size_min', 100)
set_text_model_param (TextModel, 'manual_eliminate_border_blobs', 'true')
set_text_model_param (TextModel, 'manual_base_line_tolerance', 0.2)
set_text_model_param (TextModel, 'manual_max_line_num', 1)
* OCR 01: 
* OCR 01: Load classifier
* OCR 01: 
read_ocr_class_mlp ('E:/CSDN.omc', OcrHandle)
* OCR 01: ***************************************************************
* OCR 01: * The code which follows is to be executed once / image       *
* OCR 01: ***************************************************************
* OCR 01: 
* OCR 01: Load image
read_image (Image, 'E:/工作/数码管项目/Image/6.bmp')
* OCR 01: 
* OCR 01: Perform actual processing (once per ROI)
* OCR 01: Generate regions of interest
gen_rectangle1 (ROI_OCR_01_0, 385.377, 316.495, 715.699, 971.519)
* OCR 01: 
* OCR 01: Extract symbol regions (segmentation step)
* OCR 01: Only consider first channel for color images
access_channel (Image, TmpObj_Mono, 1)
reduce_domain (TmpObj_Mono, ROI_OCR_01_0, TmpObj_MonoReduced_OCR_01_0)
* OCR 01: 
* OCR 01: Orientation Correction (for consistent border and domain
* OCR 01: handling, this is always applied, even with no rotation)
* OCR 01: 
hom_mat2d_identity (TmpCtrl_MatrixIdentity)
* OCR 01: Apply transformation to image and domain
get_domain (TmpObj_MonoReduced_OCR_01_0, TmpObj_Domain)
get_system ('clip_region', TmpCtrl_ClipRegion)
set_system ('clip_region', 'false')
dilation_circle (TmpObj_Domain, TmpObj_DomainExpanded, 81)
affine_trans_region (TmpObj_DomainExpanded, TmpObj_DomainTransformedRaw, TmpCtrl_MatrixIdentity, 'true')
smallest_rectangle1 (TmpObj_DomainTransformedRaw, TmpCtrl_Row1, TmpCtrl_Col1, TmpCtrl_Row2, TmpCtrl_Col2)
hom_mat2d_translate (TmpCtrl_MatrixIdentity, -TmpCtrl_Row1, -TmpCtrl_Col1, TmpCtrl_MatrixTranslation)
hom_mat2d_compose (TmpCtrl_MatrixTranslation, TmpCtrl_MatrixIdentity, TmpCtrl_MatrixComposite)
affine_trans_region (TmpObj_Domain, TmpObj_DomainTransformed, TmpCtrl_MatrixComposite, 'true')
affine_trans_image (TmpObj_MonoReduced_OCR_01_0, TmpObj_ImageTransformed, TmpCtrl_MatrixComposite, 'constant', 'true')
dilation_circle (TmpObj_Domain, TmpObj_DomainExpanded, 81)
expand_domain_gray (TmpObj_ImageTransformed, TmpObj_ImageTransformedExpanded, 81)
reduce_domain (TmpObj_ImageTransformed, TmpObj_DomainTransformed, TmpObj_ImageTransformedReduced)
crop_part (TmpObj_ImageTransformedReduced, TmpObj_MonoReduced_OCR_01_0, 0, 0, TmpCtrl_Col2-TmpCtrl_Col1+1, TmpCtrl_Row2-TmpCtrl_Row1+1)
set_system ('clip_region', TmpCtrl_ClipRegion)
find_text (TmpObj_MonoReduced_OCR_01_0, TextModel, TmpCtrl_ResultHandle_OCR_01_0)
* OCR 01: 
* OCR 01: Read text (classification step)
* OCR 01: Gray values for reading must be dark on bright
invert_image (TmpObj_MonoReduced_OCR_01_0, TmpObj_MonoInverted_OCR_01_0)
get_text_object (Symbols_OCR_01_0, TmpCtrl_ResultHandle_OCR_01_0, 'manual_all_lines')
dev_display (TmpObj_MonoInverted_OCR_01_0)
dev_set_draw ('fill')
dev_set_colored (3)
dev_display (Symbols_OCR_01_0)
do_ocr_multi_class_mlp (Symbols_OCR_01_0, TmpObj_MonoInverted_OCR_01_0, OcrHandle, SymbolNames_OCR_01_0, Confidences_OCR_01_0)
* OCR 01: 
* OCR 01: Do something with the results

前面介绍了抠图训练方法和OCR助手训练的方法

现在开始进行少于3个字符的训练方法的介绍了，原理很简单，就是在图像中进行抠图复制的操作，比如这个图像中只有两个字符，再扣一个字符复制，就是三个字符了，就可以在图像中进行助手训练的操作了

read_image (Image, './Image/1.bmp')
dev_get_window (WindowHandle)
draw_rectangle1 (WindowHandle, Row1, Column1, Row2, Column2)
gen_rectangle1 (Rectangle, Row1, Column1, Row2, Column2)
reduce_domain (Image, Rectangle, ImageReduced)
get_region_points (Rectangle, Rows, Columns)
get_grayval (ImageReduced, Rows, Columns, Grayval)
tuple_length (Columns, Length)
tuple_gen_const (Length, Column2-Column1, Newtuple)
set_grayval (Image, Rows, Columns-Newtuple, Grayval)