opencv-ocr项目跟练

最新推荐文章于 2024-05-14 15:31:07 发布

德国鸡腿堡广door♂分店

最新推荐文章于 2024-05-14 15:31:07 发布

阅读量893

点赞数 21

文章标签： ocr opencv 计算机视觉 python

本文链接：https://blog.csdn.net/yybsts/article/details/136653588

版权

本文详细介绍了使用Python和OpenCV库进行图像处理，包括读取图像、边缘检测、轮廓检测、透视变换，以及使用OCR技术进行文本识别的过程。重点展示了如何通过透视变换对图像进行校正，以便于后续的文字识别。

摘要由CSDN通过智能技术生成

python语言编写

主程序块：

#模块导入
import cv2 as cv
import numpy as np

#调用其他程序
import resize
import point_trans
import test

如果你直接写import cv2，那后面程序中的cv. 都要写成cv2.

读入图像，记录比例

image = cv.imread('key.jpg')
#因为坐标也会变化，所以记录缩放比例，记录原始坐标？
ratio = image.shape[0]/500.0
orig = image.copy()#后面使用copy的进行变换

image = resize.resize(orig,height = 500)#只要规定了高度，宽度自动设置，所以ratio只需要知道一个比例即可

resize.py在后面

预处理和边缘检测

#预处理
gray = cv.cvtColor(image,cv.COLOR_BGR2GRAY)#转灰度图，主要是bgr to gray
gray = cv.GaussianBlur(gray,(5,5),0)#高斯滤波
edged = cv.Canny(gray,50,100)#边缘检测

print('step 1:边缘检测')
cv.imshow('image',image)
cv.imshow('edged',edged)
cv.waitKey(0)
cv.destroyAllWindows()

轮廓检测

#轮廓检测
cnts = cv.findContours(edged.copy(),cv.RETR_LIST,cv.CHAIN_APPROX_SIMPLE)[0]#用边缘检测的结果进行轮廓检测，但轮廓有很多，我们要选外轮廓
#cv.RETR_LIST: 这是轮廓检索模式。cv.CHAIN_APPROX_SIMPLE: 这是轮廓近似方法。[0] 索引表示我们只取返回值的第1个元素，即轮廓列表。
#!!!opencv4.*findContours只返回contours, hierarchy，而3.*返回3个值_, contours, hierarchy；如果是opencv3版本，则[0]改成[1]
#外轮廓有很多筛选方法，例如它的面积肯定是最大的

cnts = sorted(cnts,key=cv.contourArea,reverse=True)[:5]#以轮廓面积排序，降序排序，也就是说最大的排最前面；[:5]切片操作，也就是取面积最大的五个轮廓

#遍历轮廓
for c in cnts:#c是输入点集
    peri = cv.arcLength(c,True)#计算点集长度，True表示封闭
    approx = cv.approxPolyDP(c,0.02*peri,True)#第二个参数位置是epsilon，表示从轮廓到近似轮廓最大距离，一般就用这种长度乘比例的形式，而不是自己写
    #epsilon越大，轮廓越不近似，但形状越规矩，反之~

    #4个点的时候就拿出来
    if len(approx) == 4:#len() 是 Python 的一个内置函数，用于返回对象（如列表、元组、字符串、字典、集合等）的长度或项目数。当你想要知道某个集合类型对象中有多少元素时，你可以使用这个函数。
        screenCnt = approx
        break
print('step 2:获取轮廓')
cv.drawContours(image,[screenCnt],-1,(0,255,0),2)#绘制轮廓，-1是全画的意思
cv.imshow('outline',image)
cv.waitKey(0)
cv.destroyAllWindows()

透视变换

#透视变换/校正
warped = point_trans.four_point_transform(orig,screenCnt.reshape(4,2)*ratio)#虽然改变大小后坐标都变了，但他们之间的比例关系不会变。乘比例后得到resize后的坐标
#orig是没做resize的图像的copy
#二值处理
warped = cv.cvtColor(warped,cv.COLOR_BGR2GRAY)
ref = cv.threshold(warped,150,255,cv.THRESH_BINARY)[1]#返回retval, dst；[1]就是仅返回dst，阈值化后的图像
cv.imwrite('scan.jpg',ref)

print('step 3:变换')
#cv.imshow('original',resize.resize(orig,height=650))
cv.imshow('scanned',resize.resize(ref,height=650))
cv.waitKey(0)
cv.destroyAllWindows()

point_trans也是另外写的py文件

这里二值处理参数设置对后面OCR影响挺大的

OCR

#ocr
print('step 4:识别')
test.function()

test中写的是ocr实现代码

resize.py

import cv2 as cv

def resize(image,width = None,height = None,inter = cv.INTER_AREA):
    dim = None
    (h,w)= image.shape[:2]#从image.shape元组中取出前两个元素，并将它们分别赋值给变量h和w。
    if width is None and height is None:
        return image
    if width is None:
        r = height / float(h) #算出比例
        dim = (int(w*r),height) #由算出宽度，再将宽度和高度赋值给dim
    else:
        r = width/float(w)
        dim = (width,int(h*r))#同理
    resized = cv.resize(image,dim,interpolation=inter)#inter是一种插值方法
    return resized

point_trans.py

import cv2 as cv
import numpy as np

def order_points(pts):#pts是传入的坐标

#一共4个坐标点
 rect = np.zeros((4, 2), dtype = "float32")#创建一个4*2的0填充数组
 s = pts.sum(axis = 1)#sum是求和，axis=1是水平，axis=0是垂直
# 按顺序找到对应坐标0123分别是 左上，右上，右下，左下
# 计算左上和右下
 rect[0] = pts[np.argmin(s)]#返回最小元素的索引
 rect[2] = pts[np.argmax(s)]

# 计算右上和左下
 diff = np.diff(pts, axis = 1)
 rect[1] = pts[np.argmin(diff)]
 rect[3] = pts[np.argmax(diff)]
 return rect



def four_point_transform(image,pts):
 rect = order_points(pts)#获取输入坐标点
 (tl,tr,br,bl) = rect

 #计算输入的w和h值
 widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))#距离公式
 widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
 maxWidth = max(int(widthA), int(widthB))#取最大的值来用

 heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
 heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
 maxHeight = max(int(heightA), int(heightB))

 dst = np.array([#dst是输出坐标
  [0, 0],
  [maxWidth - 1, 0],#-1是防止出现错误
  [maxWidth - 1, maxHeight - 1],
  [0, maxHeight - 1]], dtype="float32")

#变换：2维到3维再到2维
 M=cv.getPerspectiveTransform(rect, dst)#计算M矩阵,rect是原始坐标点，dst是目标坐标点
 warped = cv.warpPerspective(image, M, (maxWidth, maxHeight))#变换
 return warped

test.py

from PIL import Image
import pytesseract #需要安装tesseract，并配置环境变量,然后pip install pytesseract
#要去pytesseract里改动，具体自己查
import cv2 as cv
import os
def function():


 # 改这里的preprocess的值，选择不同的操作，对比效果
 preprocess = 'blur'  # thresh

 image = cv.imread('scan.jpg')
 gray = cv.cvtColor(image, cv.COLOR_BGR2GRAY)

 if preprocess == "thresh":
  gray = cv.threshold(gray, 0, 255, cv.THRESH_BINARY | cv.THRESH_OTSU)[1]

 if preprocess == "blur":
  gray = cv.medianBlur(gray, 3)

# 读取图片
 filename = "{}.png".format(os.getpid())
 cv.imwrite(filename, gray)

 text = pytesseract.image_to_string(Image.open(filename))
 print(text)
 os.remove(filename)

# cv.imshow('image',image)
# cv.imshow('output',gray)
# cv.waitKey(0)

blur输出：

pe

yeas

OLE F0GRS Med - UESTPORI.CT 06800
YOR POST RD WEST - (205) 227-6088

e ses BACON S$ me q9t
e 36S BACCHUS ME agot
Ld deb aera 15 WP ao
= 366 aelom 8 8e ae
. bani cer eg we
. FLouk ALAQNG MP OT: NE
. Caan ONST PULSE a mF 18 80+
* MEATY CREAM WP $39 F
‘ DMA REDUCI 4a
. EF CAND bb 6.00
. ICE COP caseEN = aoe

Docs pint on case

WHY ALBONE 8. oe

ones TAL 00, 1

thresh输出：

WHOLE
FOODS
CEE

WHOLE FOODS MARKET - UESTPORT,CT 06880
399 POST RO WEST ~ (203) 227-6858

365 BACON 1S NP
365 BACON LS NP
365 BACON LS NP
365 BACON iS NP
BROI# CHIC NE
FLOUR ALAND WP
CHKN BRST BWLSS SK NP
WEAVY CREAN NP
BALSNC REOUCT NP
GEEF ORND 86/15 NP
WUICE COF CASHEW U NP
BOCS PINT GRAAMIC NP
HAY ALMOND BUTER NP
enae TAX «00 BAL.

.
.
.
e
*
.
.
.
a
.
.

Sena