OpenCV视频学习笔记（10）-项目实战-文档扫描OCR识别

最新推荐文章于 2024-06-27 11:23:20 发布

长颈鹿与麋鹿

最新推荐文章于 2024-06-27 11:23:20 发布

阅读量474

点赞数

本文链接：https://blog.csdn.net/weixin_41802429/article/details/108604221

版权

十、项目实战-文档扫描OCR识别

实践步骤：
首先将包含代码的文件夹复制到opencv工程中；
打开sacn.py;
配置图片参数：RUN–>Edit Configurations–>scan–>Parameters输入–image ./images/receipt.jpg–>apply
代码处理步骤：
（1）读取图片（图片拍摄角度任意）；
（2）边缘检测；
（3）轮廓检测；
（4）一系列图片变换操作（平移、旋转、翻转），把图片位置调正；
（5）字符识别，中文、英文、数字；
因为作者使用的opencv版本不是最新版的，为了顺利运行代码，这里切换一下用的Python版本，换成anaconda中配置的py3.6，因为里面用的opencv版本和作者的相同：
File–>settings–>Project:opencv–>Python Interpreter–>设置–>add–>conda Environment–>Existing encironment–>选择已经存在的3.6版本即可；
//导入工具包
import numpy as np
import argparse
import cv2

// 设置参数
ap = argparse.ArgumentParser()
ap.add_argument("-i", “–image”, required = True,
help = “Path to the image to be scanned”)
args = vars(ap.parse_args())

def order_points(pts):
//一共4个坐标点
rect = np.zeros((4, 2), dtype = “float32”)

// 按顺序找到对应坐标0123分别是左上，右上，右下，左下
//计算左上，右下
s = pts.sum(axis = 1)
rect[0] = pts[np.argmin(s)]
rect[2] = pts[np.argmax(s)]

// 计算右上和左下
diff = np.diff(pts, axis = 1)
rect[1] = pts[np.argmin(diff)]
rect[3] = pts[np.argmax(diff)]

return rect

def four_point_transform(image, pts):
//获取输入坐标点
rect = order_points(pts)
(tl, tr, br, bl) = rect

//计算输入的w和h值
widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))#基于两个点的距离算
widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
maxWidth = max(int(widthA), int(widthB))#因为不能保证是矩形，所以宽度选择大的那个边

heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
maxHeight = max(int(heightA), int(heightB))

// 变换后对应坐标位置
dst = np.array([
[0, 0],
[maxWidth - 1, 0],
[maxWidth - 1, maxHeight - 1],
[0, maxHeight - 1]], dtype = “float32”)
#四个目标点
//计算变换矩阵：
M = cv2.getPerspectiveTransform(rect, dst)#3×3的矩阵，根据输入点和目标点算变换矩阵
warped = cv2.warpPerspective(image, M, (maxWidth, maxHeight))

// 返回变换后结果
return warped

def resize(image, width=None, height=None, inter=cv2.INTER_AREA):
dim = None
(h, w) = image.shape[:2]
if width is None and height is None:
return image
if width is None:
r = height / float(h)#通过指定的h，算一下比例
dim = (int(w * r), height)#基于比例算一下相应的w和h
else:
r = width / float(w)
dim = (width, int(h * r))
resized = cv2.resize(image, dim, interpolation=inter)
return resized

// 读取输入
image = cv2.imread(args[“image”])
#坐标也会相同变化：算出坐标转换比例
ratio = image.shape[0] / 500.0
orig = image.copy()

image = resize(orig, height = 500)#调用上面定义的resize

// 预处理
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)#转换为灰度图
gray = cv2.GaussianBlur(gray, (5, 5), 0)#通过高斯滤波操作剔除干扰项，噪音点
edged = cv2.Canny(gray, 75, 200)#检测边缘

//展示预处理结果
print(“STEP 1: 边缘检测”)
cv2.imshow(“Image”, image)#展示输入
cv2.imshow(“Edged”, edged)#展示边缘检测结果
cv2.waitKey(0)
cv2.destroyAllWindows()
显示：

//轮廓检测
cnts = cv2.findContours(edged.copy(), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)[1]
#复制边缘检测结果
#检测结果有好几个，但只需保留最外面的轮廓，最外面的轮廓的特点：面积/周长最大
cnts = sorted(cnts, key = cv2.contourArea, reverse = True)[:5]
#按检测结果的面积进行排序操作，取前五个轮廓

// 遍历轮廓
for c in cnts:
// 计算轮廓近似
peri = cv2.arcLength(c, True)
// C表示输入的点集
// epsilon表示从原始轮廓到近似轮廓的最大距离，它是一个准确度参数
//True表示封闭的
approx = cv2.approxPolyDP(c, 0.02 * peri, True)
#传入参数，第一个：c：当前的每一个轮廓；第二个：精度的控制，基于长度的百分比；
#轮廓近似，把轮廓转换为矩形，矩形有四个点
// 4个点的时候就拿出来
if len(approx) == 4:
screenCnt = approx
break

// 展示结果
print(“STEP 2: 获取轮廓”)
cv2.drawContours(image, [screenCnt], -1, (0, 255, 0), 2)
cv2.imshow(“Outline”, image)
cv2.waitKey(0)
cv2.destroyAllWindows()
#展示绘图结果

// 透视变换，只要给定原始的四个坐标点和想要得到的四个坐标点，下面这个函数就可以完成透视变换
warped = four_point_transform(orig, screenCnt.reshape(4, 2) * ratio)#调用函数
#两个参数：轮廓检测得到的四个点是原始坐标，两组坐标，原始坐标以及根据w和h得到的新的坐标值；
Orig:原始图像的坐标点
screenCnt.reshape(4, 2) * ratio):因为上面进行了resize，所以这里进行还原

//二值处理
warped = cv2.cvtColor(warped, cv2.COLOR_BGR2GRAY)
ref = cv2.threshold(warped, 100, 255, cv2.THRESH_BINARY)[1]
cv2.imwrite(‘scan.jpg’, ref)

// 展示结果
print(“STEP 3: 变换”)
cv2.imshow(“Original”, resize(orig, height = 650))
cv2.imshow(“Scanned”, resize(ref, height = 650))
cv2.waitKey(0)
显示：

得到扫描完的结果后如何进行的OCR字符识别？
//https://digi.bib.uni-mannheim.de/tesseract/
#下载最新版本即可
// 配置环境变量如E:\Program Files (x86)\Tesseract-OCR
// tesseract -v进行测试
//tesseract XXX.png result得到结果，把结果存到result中
// pip install pytesseract
// anaconda —>lib -->site-packges–> pytesseract -->pytesseract.py–>
#将tesseract_cmd 修改为绝对路径即可E:\Program Files (x86)\Tesseract-OCR/act.exe
from PIL import Image
import pytesseract
import cv2
import os

preprocess = ‘blur’ #thresh
#读取图像
image = cv2.imread(‘scan.jpg’)
#预处理操作，灰度处理
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
#滤波操作
if preprocess == “thresh”:
gray = cv2.threshold(gray, 0, 255,cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
#二值化操作
if preprocess == “blur”:
gray = cv2.medianBlur(gray, 3)

filename = “{}.png”.format(os.getpid())
cv2.imwrite(filename, gray)

text = pytesseract.image_to_string(Image.open(filename))
print(text)
os.remove(filename)

cv2.imshow(“Image”, image)
cv2.imshow(“Output”, gray)
cv2.waitKey(0)
#将图片中的内容转换为文字保存下来

长颈鹿与麋鹿

关注

0
点赞
踩
2

收藏

觉得还不错? 一键收藏
0
评论
OpenCV视频学习笔记（10）-项目实战-文档扫描OCR识别

十、项目实战-文档扫描OCR识别实践步骤：首先将包含代码的文件夹复制到opencv工程中；打开sacn.py;配置图片参数：RUN–>Edit Configurations–>scan–>Parameters输入–image ./images/receipt.jpg–>apply代码处理步骤：（1）读取图片（图片拍摄角度任意）；（2）边缘检测；（3）轮廓检测；（4）一系列图片变换操作（平移、旋转、翻转），把图片位置调正；（5）字符识别，中文、英文、数字；因为作者
复制链接

扫一扫