1.使用easydl标注数据集
具体操作查看官网就行
2.将数据集转换为v5格式
首先将easydl数据集转换为voc
import glob
import os
import random
import shutil
import math
# 将xml转移到Annotations文件夹
# 把jpg图片按比例随机放入train和val文件夹
# 同时生成train.txt,val.txt,这两个文件放的是图片路径
src_path = r'E:\ocr识别\jiangsu\images\labelimg'
dst_path = r'E:\ocr识别\jiangsu\voc'
dst_path_main = os.path.join(dst_path, 'VOC')
dst_path_xml = os.path.join(dst_path_main,'Annotations')
dst_path_train = os.path.join(dst_path_main,'train')
dst_path_val = os.path.join(dst_path_main,'val')
if not os.path.exists(dst_path_main):
os.makedirs(dst_path_main)
if not os.path.exists(dst_path_xml):
os.makedirs(dst_path_xml)
if not os.path.exists(dst_path_train):
os.makedirs(dst_path_train)
if not os.path.exists(dst_path_val):
os.makedirs(dst_path_val)
files = os.listdir(src_path)
for file in files:
file_name = file.split('.')[0]
file_ext = file.split('.')[-1]
if file_ext == 'jpg':
continue
shutil.copyfile(os.path.join(src_path,file), os.path.join(dst_path_xml, file))
img_files = glob.glob(os.path.join(src_path, '*.jpg'))
print(type(img_files))
total_img_count = len(img_files)
val_rate = 0.2
val_img_count = int(total_img_count * val_rate)
print(val_img_count)
val_files = random.sample(img_files,val_img_count)
train_files = list(set(img_files).difference(set(val_files)))
print(len(train_files))
f_train = open(os.path.join(dst_path_main,'train.txt'), 'w')
f_val = open(os.path.join(dst_path_main,'val.txt'), 'w')
for file in val_files:
shutil.copyfile(file, os.path.join(dst_path_val, os.path.split(file)[-1]))
f_val.write(os.path.join(dst_path_val, os.path.split(file)[-1])+'\n')
for file in train_files:
shutil.copyfile(file, os.path.join(dst_path_train, os.path.split(file)[-1]))
f_train.write(os.path.join(dst_path_train, os.path.split(file)[-1])+'\n')
f_val.close()
f_train.close()
再将voc转换为v5
import glob
import os
import pickle
import shutil
import xml.etree.ElementTree as ET
from os import listdir, getcwd
from os.path import join
dirs = ['train', 'val']
classes = ['cn']
main_dir = r'E:\ocr识别\jiangsu\voc\VOC'
out_dir = r'E:\ocr识别\jiangsu\voc\VOC\yolov5'
def getImagesInDir(dir_path):
image_list = []
for filename in glob.glob(dir_path + '/*.jpg'):
image_list.append(filename)
return image_list
def convert(size, box):
dw = 1./(size[0])
dh = 1./(size[1])
x = (box[0] + box[1])/2.0 - 1
y = (box[2] + box[3])/2.0 - 1
w = box[1] - box[0]
h = box[3] - box[2]
x = x*dw
w = w*dw
y = y*dh
h = h*dh
return (x,y,w,h)
def convert_annotation(dir_path, output_path, image_path):
basename = os.path.basename(image_path)
basename_no_ext = os.path.splitext(basename)[0]
dir_path = fr'{main_dir}\Annotations'
in_file = open(dir_path + '\\' + basename_no_ext + '.xml')
out_label_path = os.path.join(output_path,'labels')
if not os.path.exists(out_label_path):
os.makedirs(out_label_path)
out_file = open(out_label_path + '\\' + basename_no_ext + '.txt', 'w')
tree = ET.parse(in_file)
root = tree.getroot()
size = root.find('size')
w = int(size.find('width').text)
h = int(size.find('height').text)
for obj in root.iter('object'):
difficult = obj.find('difficult').text
# cls = obj.find('name').text
cls = 'cn'
if cls not in classes or int(difficult)==1:
continue
cls_id = classes.index(cls)
xmlbox = obj.find('bndbox')
b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text))
bb = convert((w,h), b)
out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')
# cwd = getcwd()
for dir_path in dirs:
# full_dir_path = cwd + '/' + dir_path
# output_path = full_dir_path +'/yolo/'
full_dir_path = os.path.join(main_dir, dir_path)
output_path = os.path.join(out_dir, dir_path)
if not os.path.exists(output_path):
os.makedirs(output_path)
image_paths = getImagesInDir(full_dir_path)
list_file = open(full_dir_path + '.txt', 'w')
yolo_img_path = os.path.join(output_path,'images')
if not os.path.exists(yolo_img_path):
os.makedirs(yolo_img_path)
for image_path in image_paths:
list_file.write(image_path + '\n')
convert_annotation(full_dir_path, output_path, image_path)
src_img_name = os.path.basename(image_path)
shutil.copy(image_path,os.path.join(yolo_img_path,src_img_name))
list_file.close()
print("Finished processing: " + dir_path)
3.将转换后的数据放到F:\yolov5-master\data目录下
4.在F:\yolov5-master\data目录下增加jiangsu.yaml
文件内容
train: F:\yolov5-master\data\yolov5\train
val: F:\yolov5-master\data\yolov5\val
nc: 1
names: ['cn']
5.执行目标检测训练
python train.py --img 320 --batch 1 --epochs 100 --data jiangsu.yaml --weights yolov5s.pt
6.切割图片
将图片进行切割,每部分包含一个单字
7.使用单字识别代码进行训练
对单字进行训练,生成模型
8.编写服务,调用模型进行测试
服务中,先使用目标检测模型,获取到每个单字的位置,然后根据位置切割图片,再使用单字模型获取到单字的文本,这样每个单字的图片文本就有了,根据入参返回响应的单子位置组合即可。
主要是提供一个思路。