1、前言
文字识别(OCR)已经被广泛的运用至各个场景之中,包括自然场景中文字识别、车牌识别、票据识别等等,是机器服务人类的重要场景之一。paddleocr是一个功能十分强大、领先的OCR工具库,能够解决OCR部署时许多的问题。同时,利用地平线X3派5TOPS的算力,本文将对paddleocr中的模型部署到地平线X3派上的过程进行详细说明。本文中提到的文本检测模型与文本识别模型可参考https://developer.horizon.ai/forumDetail/118363917083902710。
paddleocr项目地址:https://github.com/PaddlePaddle/PaddleOCR
本项目测试地址:https://github.com/Rex-LK/ai_arm_learning
本项目模型以及源代码: https://pan.baidu.com/s/193hnW7jm5ELw6nEkaKnwsw?pwd=mjgt 提取码: mjgt
2、导出onnx
首先按照paddle的官方安装环境,先在本地测试一下文本检测和文本识别,正常推理之后就可以导出onnx了,本文测试的模型链接为:
-
文件检测: https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_infer.tar
-
文本识别: https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_infer.tar
最近paddle对onnx的支持更好了,相较于之前有了更大的提升,通过下面这下面的命令进行导出:
2.1、导出文本检测模型
paddle2onnx --model_dir inference/ch_PP-OCRv3_det_infer/ \
--model_filename inference.pdmodel \
--params_filename inference.pdiparams \
--save_file inference/onnx/det.onnx \
--enable_dev_version True \
--opset_version 11
python -m paddle2onnx.optimize --input_model inference/onnx/det.onnx \
--output_model inference/onnx/det_sim.onnx
python -m paddle2onnx.optimize --input_model inference/onnx/det_sim.onnx \
--output_model inference/onnx/det_static.onnx \
--input_shape_dict "{'x':[1,3,640,640]}"
2.2、导出文本识别模型
paddle2onnx --model_dir inference/en_PP-OCRv3_rec_infer/ \
--model_filename inference.pdmodel \
--params_filename inference.pdiparams \
--save_file inference/onnx/rec.onnx --enable_dev_version True
--opset_version 11
python -m paddle2onnx.optimize --input_model inference/onnx/rec.onnx \
--output_model inference/onnx/rec_sim.onnx \
python -m paddle2onnx.optimize --input_model inference/onnx/rec_sim.onnx \
--output_model inference/onnx/rec_static.onnx \
--input_shape_dict "{'x':[1,3,48,320]}"
2.3、修改ir_version
导出onnx后,直接使用AI工具链进行量化会报错,需要将onnx模型的ir_version设置为7
import onnx
path='inference/onnx/det_static.onnx'
model = onnx.load(path)
model.ir_version = 7
onnx.save(model, path)
path='inference/onnx/rec_static.onnx'
model = onnx.load(path)
model.ir_version = 7
onnx.save(model, path)
3、模型量化
3.1、文本检测模型量化配置文件
model_parameters:
onnx_model: 'det_static.onnx'
march: "bernoulli2"
layer_out_dump: False
log_level: 'debug'
working_dir: 'model_output'
output_model_file_prefix: 'det_static'
input_parameters:
input_type_rt: 'nv12'
input_layout_rt: 'NHWC'
input_type_train: 'rgb'
input_layout_train: 'NCHW'
norm_type: 'data_mean_and_scale'
mean_value: 123.68 116.28 103.53
scale_value: 0.0171 0.0175 0.0174
calibration_parameters:
cal_data_dir: './calibration_data_rgb_f32'
preprocess_on: False
calibration_type: 'kl'
compiler_parameters:
compile_mode: 'latency'
debug: False
core_num: 2
optimize_level: 'O3'
3.2、文本识别模型配置文件
model_parameters:
onnx_model: '/home/rex/Desktop/paddle_2_x3/model/rec_static.onnx'
output_model_file_prefix: 'rec_static'
march: 'bernoulli2'
input_parameters:
input_type_train: 'rgb'
input_layout_train: 'NCHW'
input_type_rt: 'nv12'
norm_type: 'data_mean_and_scale'
mean_value: 128
scale_value: 0.0078125
input_layout_rt: 'NHWC'
calibration_parameters:
cal_data_dir: './calibration_data_rgb_f32'
calibration_type: 'max'
max_percentile: 0.9999
compiler_parameters:
compile_mode: 'latency'
optimize_level: 'O3'
debug: False
core_num: 2
4、上板测试
本项目提供了两种推理方式:“onnx"和"x3”,通过infer_type的值进行设置,当infer_type=="onnx"时,表示使用onnx推理,当infer_ty=="x3"时,表示在x3上进行推理。代码如下:
import numpy as np
import cv2
import argparse
import pyclipper
import torch
from torch.autograd import Variable
import os
def get_hw(pro):
if pro.layout == "NCHW":
return pro.shape[2], pro.shape[3]
else:
return pro.shape[1], pro.shape[2]
def bgr2nv12_opencv(image):
height, width = image.shape[0], image.shape[1]
area = height * width
yuv420p = cv2.cvtColor(image, cv2.COLOR_BGR2YUV_I420).reshape((area * 3 // 2,))
y = yuv420p[:area]
uv_planar = yuv420p[area:].reshape((2, area // 4))
uv_packed = uv_planar.transpose((1, 0)).reshape((area // 2,))
nv12 = np.zeros_like(yuv420p)
nv12[:height * width] = y
nv12[height * width:] = uv_packed
return nv12
def draw_bbox(img_path, result, color=(128, 240, 128), thickness=3):
if isinstance(img_path, str):
img_path = cv2.imread(img_path)
# img_path = cv2.cvtColor(img_path, cv2.COLOR_BGR2RGB)
img_path = img_path.copy()
for point in result:
point = point.astype(int)
cv2.line(img_path, tuple(point[0]), tuple(point[1]), color, thickness)
cv2.line(img_path, tuple(point[1]), tuple(point[2]), color, thickness)
cv2.line(img_path, tuple(point[2]), tuple(point[3]), color, thickness)
cv2.line(img_path, tuple(point[3]), tuple(point[0]), color, thickness)
return img_path
class det_model:
def __init__(self, model_path,infer_type):
self.infer_type = infer_type
if self.infer_type == "onnx":
import onnxruntime
self.model = onnxruntime.InferenceSession(model_path, providers=["CPUExecutionProvider"])
elif self.infer_type == "x3":
from hobot_dnn import pyeasy_dnn
self.model = pyeasy_dnn.load(model_path)
h, w = get_hw(self.model[0].inputs[0].properties)
self.des_dim = (w, h)
self.thr = 0.5
# 可变参数,控制文本检测范围大小
self.ratio_prime = 2
def predict(self, img, d_size = (640,640), min_area: int = 100):
img0_h,img0_w = img.shape[:2]
if self.infer_type == "onnx":
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]
image_input = cv2.resize(img, d_size)
image_input = image_input[..., ::-1] # bgr -> rgb
image_input = (image_input / 255.0 - mean) / std
image_input = image_input.astype(np.float32)
image_input = image_input.transpose(2, 0, 1)
image_input = np.ascontiguousarray(image_input)
image_input = image_input[None, ...]
preds = self.model.run(["sigmoid_0.tmp_0"], {"x": image_input})[0]
elif self.infer_type == "x3":
image_input = cv2.resize(img, self.des_dim, interpolation=cv2.INTER_AREA)
image_input = bgr2nv12_opencv(image_input)
preds = self.model[0].forward(image_input)
preds = preds[0].buffer
preds = np.where(preds[0][0]>0.5,255,0)
preds = preds.astype(np.uint8)
preds = cv2.resize(preds,(img0_w,img0_h))
contours, hierarchy = cv2.findContours(preds, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
dilated_polys = []
for poly in contours:
poly = poly[:,0,:]
D_prime = cv2.contourArea(poly) * self.ratio_prime / cv2.arcLength(poly, True) # formula(10) in the thesis
pco = pyclipper.PyclipperOffset()
pco.AddPath(poly, pyclipper.JT_ROUND, pyclipper.ET_CLOSEDPOLYGON)
dilated_poly = np.array(pco.Execute(D_prime))
if dilated_poly.size == 0 or dilated_poly.dtype != np.int_ or len(dilated_poly) != 1:
continue
dilated_polys.append(dilated_poly)
boxes_list = []
for cnt in dilated_polys:
if cv2.contourArea(cnt) < min_area:
continue
rect = cv2.minAreaRect(cnt)
box = (cv2.boxPoints(rect)).astype(np.int_)
boxes_list.append(box)
boxes_list = np.array(boxes_list)
return dilated_polys, boxes_list
class rec_model:
def __init__(self, model_path,converter,infer_type):
self.infer_type = infer_type
if self.infer_type == "onnx":
import onnxruntime
self.model = onnxruntime.InferenceSession(model_path, providers=["CPUExecutionProvider"])
elif self.infer_type == "x3":
from hobot_dnn import pyeasy_dnn
self.model = pyeasy_dnn.load(model_path)
h, w = get_hw(self.model[0].inputs[0].properties)
self.des_dim = (w, h)
self.converter = converter
def preprocess(self,image_o, image_d_size, imagenet_mean = [0.5], imagenet_std = [0.5]):
if self.infer_type == "onnx":
image_input = cv2.resize(image_o, image_d_size) # resize
image_input = image_input[..., ::-1] # bgr -> rgb
image_input = (image_input / 255.0 - imagenet_mean) / imagenet_std # normalize
image_input = image_input.astype(np.float32) # float64 -> float32
image_input = np.ascontiguousarray(image_input) # contiguous array memory
image_input = image_input.transpose(2, 0, 1)
image_input = np.ascontiguousarray(image_input)
image_input = image_input[None, ...]
return image_input
elif self.infer_type == "x3":
image_input = cv2.resize(image_o, self.des_dim, interpolation=cv2.INTER_AREA)
image_input = bgr2nv12_opencv(image_input)
return image_input
def predict(self,img):
image_input = self.preprocess(img,(320,48))
if self.infer_type == "onnx":
preds = self.model.run(["softmax_2.tmp_0"], {"x": image_input})[0]
elif self.infer_type == "x3":
preds = self.model[0].forward(image_input)
preds = preds[0].buffer
preds = torch.from_numpy(preds)
_, preds = preds.max(2)
preds = preds.transpose(1, 0).contiguous().view(-1)
preds_size = Variable(torch.IntTensor([preds.size(0)]))
raw_pred = self.converter.decode(preds.data, preds_size.data, raw=True)
sim_pred = self.converter.decode(preds.data, preds_size.data, raw=False)
return raw_pred,sim_pred
class Run:
def __init__(self, args):
self.args = args
self.det_net = det_model(self.args.det_model_path,self.args.infer_type)
# alphabet最后面必须有空格
alphabet = """0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~!"#$%&'()*+,-./ """
converter = utils.strLabelConverter(alphabet)
self.rec_net = rec_model(self.args.rec_model_path,converter,args.infer_type)
def run(self,img0):
img_rec = img0.copy()
img0_h,img0_w = img0.shape[:2]
contours, boxes_list = self.det_net.predict(img0)
for i,box in enumerate(boxes_list):
mask_t = np.zeros((img0_h, img0_w), dtype=np.uint8)
cv2.fillPoly(mask_t, [box], (255), 8, 0)
pick_img = cv2.bitwise_and(img0, img0, mask=mask_t)
x, y, w, h = cv2.boundingRect(box)
rec_infer_img = pick_img[y:y+h,x:x+w,:]
raw_pred,sim_pred = self.rec_net.predict(rec_infer_img)
print('%-20s => %-20s' % (raw_pred, sim_pred))
if self.args.output_folder:
cv2.putText(img_rec, sim_pred, (x,y+20), cv2.FONT_HERSHEY_SIMPLEX, 0.75, (0, 0, 255), 1)
if self.args.output_folder:
if not os.path.exists(self.args.output_folder):
os.mkdir(self.args.output_folder)
img_det = img0[:, :, ::-1]
imgc = img_det.copy()
cv2.drawContours(imgc, contours, -1, (22,222,22), 1, cv2.LINE_AA)
cv2.imwrite(self.args.output_folder + '/contour.png', imgc)
img_draw = draw_bbox(img_rec, boxes_list)
cv2.imwrite(self.args.output_folder + '/predict.jpg', img_draw)
def init_args():
parser = argparse.ArgumentParser(description='paddleocr')
parser.add_argument('--infer_type', choices=["onnx","x3"],default='x3', type=str)
parser.add_argument('--det_model_path', default='model/det_static.bin', type=str)
parser.add_argument('--rec_model_path', default='model/rec_static.bin', type=str)
parser.add_argument('--image_path', default='word.jpg', type=str, help='img path for predict')
parser.add_argument('--show', default=True, type=bool)
parser.add_argument('--output_folder', default='./output', type=str, help='img path for output')
args = parser.parse_args()
return args
if __name__ == '__main__':
import utils
args = init_args()
img0 = cv2.imread(args.image_path)
work = Run(args)
work.run(img0)
下图为检测结果,模型在下面这张图片上的效果还是不错的,文本检测结果:
文本识别结果:
--m-----o----c---k---u---p-- --o---o--1- => mockup oo1
--b---i-l----l----b----o----a----r---d-- => billboard
5、总结
本文主要介绍了paddleocr模型在X3派上的部署流程,主要分为导出onnx、模型量化、模型部署三个部分,paddleocr模型库中有众多关于OCR的模型,了解了这个paddleocr在X3派上的部署方式后,就可以快乐的进行其他模型的部署了。