yolov5之识别点选

liberty888

已于 2023-11-17 19:14:40 修改

阅读量247

点赞数

于 2023-07-19 14:42:54 首次发布

本文链接：https://blog.csdn.net/liberty888/article/details/131808351

版权

python 同时被 2 个专栏收录

5 篇文章 0 订阅

订阅专栏

orc识别

3 篇文章 0 订阅

订阅专栏

文章描述了一个OCR识别系统的构建过程，包括使用EasyDL标注数据集，转换为VOC和V5格式，将数据集应用于Yolov5目标检测模型训练，之后对图片进行切割并使用单字识别模型，最后构建服务进行文字检测和识别。

摘要由CSDN通过智能技术生成

1.使用easydl标注数据集

具体操作查看官网就行

2.将数据集转换为v5格式

首先将easydl数据集转换为voc

import glob
import os
import random
import shutil
import math
# 将xml转移到Annotations文件夹
# 把jpg图片按比例随机放入train和val文件夹
# 同时生成train.txt,val.txt，这两个文件放的是图片路径

src_path = r'E:\ocr识别\jiangsu\images\labelimg'
dst_path = r'E:\ocr识别\jiangsu\voc'
dst_path_main = os.path.join(dst_path, 'VOC')
dst_path_xml = os.path.join(dst_path_main,'Annotations')
dst_path_train = os.path.join(dst_path_main,'train')
dst_path_val = os.path.join(dst_path_main,'val')
if not os.path.exists(dst_path_main):
    os.makedirs(dst_path_main)

if not os.path.exists(dst_path_xml):
    os.makedirs(dst_path_xml)

if not os.path.exists(dst_path_train):
    os.makedirs(dst_path_train)

if not os.path.exists(dst_path_val):
    os.makedirs(dst_path_val)

files = os.listdir(src_path)
for file in files:
    file_name = file.split('.')[0]
    file_ext = file.split('.')[-1]
    if file_ext == 'jpg':
        continue
    shutil.copyfile(os.path.join(src_path,file), os.path.join(dst_path_xml, file))

img_files = glob.glob(os.path.join(src_path, '*.jpg'))
print(type(img_files))
total_img_count = len(img_files)
val_rate = 0.2
val_img_count = int(total_img_count * val_rate)
print(val_img_count)
val_files = random.sample(img_files,val_img_count)
train_files = list(set(img_files).difference(set(val_files)))
print(len(train_files))

f_train = open(os.path.join(dst_path_main,'train.txt'), 'w')
f_val = open(os.path.join(dst_path_main,'val.txt'), 'w')

for file in val_files:
    shutil.copyfile(file, os.path.join(dst_path_val, os.path.split(file)[-1]))
    f_val.write(os.path.join(dst_path_val, os.path.split(file)[-1])+'\n')

for file in train_files:
    shutil.copyfile(file, os.path.join(dst_path_train, os.path.split(file)[-1]))
    f_train.write(os.path.join(dst_path_train, os.path.split(file)[-1])+'\n')

f_val.close()
f_train.close()

再将voc转换为v5

import glob
import os
import pickle
import shutil
import xml.etree.ElementTree as ET
from os import listdir, getcwd
from os.path import join

dirs = ['train', 'val']
classes = ['cn']

main_dir = r'E:\ocr识别\jiangsu\voc\VOC'
out_dir = r'E:\ocr识别\jiangsu\voc\VOC\yolov5'


def getImagesInDir(dir_path):
    image_list = []
    for filename in glob.glob(dir_path + '/*.jpg'):
        image_list.append(filename)

    return image_list

def convert(size, box):
    dw = 1./(size[0])
    dh = 1./(size[1])
    x = (box[0] + box[1])/2.0 - 1
    y = (box[2] + box[3])/2.0 - 1
    w = box[1] - box[0]
    h = box[3] - box[2]
    x = x*dw
    w = w*dw
    y = y*dh
    h = h*dh
    return (x,y,w,h)

def convert_annotation(dir_path, output_path, image_path):
    basename = os.path.basename(image_path)
    basename_no_ext = os.path.splitext(basename)[0]

    dir_path = fr'{main_dir}\Annotations'

    in_file = open(dir_path + '\\' + basename_no_ext + '.xml')
    out_label_path = os.path.join(output_path,'labels')
    if not os.path.exists(out_label_path):
        os.makedirs(out_label_path)
    out_file = open(out_label_path + '\\' + basename_no_ext + '.txt', 'w')
    tree = ET.parse(in_file)
    root = tree.getroot()
    size = root.find('size')
    w = int(size.find('width').text)
    h = int(size.find('height').text)

    for obj in root.iter('object'):
        difficult = obj.find('difficult').text
        # cls = obj.find('name').text
        cls = 'cn'
        if cls not in classes or int(difficult)==1:
            continue
        cls_id = classes.index(cls)
        xmlbox = obj.find('bndbox')
        b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text))
        bb = convert((w,h), b)
        out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')

# cwd = getcwd()

for dir_path in dirs:
    # full_dir_path = cwd + '/' + dir_path
    # output_path = full_dir_path +'/yolo/'
    full_dir_path = os.path.join(main_dir, dir_path)
    output_path = os.path.join(out_dir, dir_path)
    if not os.path.exists(output_path):
        os.makedirs(output_path)

    image_paths = getImagesInDir(full_dir_path)
    list_file = open(full_dir_path + '.txt', 'w')

    yolo_img_path = os.path.join(output_path,'images')
    if not os.path.exists(yolo_img_path):
        os.makedirs(yolo_img_path)
    for image_path in image_paths:
        list_file.write(image_path + '\n')
        convert_annotation(full_dir_path, output_path, image_path)
        src_img_name = os.path.basename(image_path)
        shutil.copy(image_path,os.path.join(yolo_img_path,src_img_name))

    list_file.close()

    print("Finished processing: " + dir_path)

3.将转换后的数据放到F:\yolov5-master\data目录下

4.在F:\yolov5-master\data目录下增加jiangsu.yaml

文件内容

train: F:\yolov5-master\data\yolov5\train

val: F:\yolov5-master\data\yolov5\val

nc: 1

names: ['cn']

5.执行目标检测训练

python train.py --img 320 --batch 1 --epochs 100 --data jiangsu.yaml --weights yolov5s.pt

6.切割图片

将图片进行切割，每部分包含一个单字

7.使用单字识别代码进行训练

对单字进行训练，生成模型

8.编写服务，调用模型进行测试

服务中，先使用目标检测模型，获取到每个单字的位置，然后根据位置切割图片，再使用单字模型获取到单字的文本，这样每个单字的图片文本就有了，根据入参返回响应的单子位置组合即可。

主要是提供一个思路。

liberty888

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
打赏
0
评论
复制链接

分享到 QQ

分享到新浪微博

扫一扫

专栏目录