图片文字透视变换及ocr识别

最新推荐文章于 2025-02-14 10:56:43 发布

佐倉

最新推荐文章于 2025-02-14 10:56:43 发布

阅读量381

点赞数

分类专栏： python 图像处理

本文链接：https://blog.csdn.net/qq_38641985/article/details/115367115

版权

python 同时被 2 个专栏收录

194 篇文章

订阅专栏

图像处理

54 篇文章

订阅专栏

pytesseract 模块安装

https://digi.bib.uni-mannheim.de/tesseract/tesseract-ocr-w64-setup-v4.0.0-beta.1.20180414.exe
安装后配置安装目录环境变量到path

轮廓检测

def contours_img(image):
    gray = cv.cvtColor(image,cv.COLOR_BGR2GRAY)
    threshold = cv.threshold(gray,160,255,cv.THRESH_BINARY_INV)[1]
    display_img(threshold)
    contour,hierarchy= cv.findContours(threshold.copy(),cv.RETR_CCOMP,cv.CHAIN_APPROX_SIMPLE)
    import_contours = sorted(contour,key=cv.contourArea,reverse=True)[1]
    #import_contours = contour[-1]
    cv.drawContours(image,import_contours,-1,(0,255,0),3)

    (gx,gy,gw,gh) = cv.boundingRect(import_contours)
    cv.rectangle(image, (gx , gy ),(gx + gw , gy + gh ), (255, 0,0), 1)
    display_img(image)

在这里插入图片描述

轮廓近似

contour,hierarchy= cv.findContours(edged.copy(),cv.RETR_CCOMP,cv.CHAIN_APPROX_SIMPLE)
import_contours = sorted(contour,key=cv.contourArea,reverse=True)[1]
peri = cv.arcLength(import_contours,True)
approx = cv.approxPolyDP(import_contours,0.02*peri,True)
cv.drawContours(image,[approx],-1,(0,0,255),1)
display_img(image)

在这里插入图片描述

透视变换

def four_point_transform(image, pts):
    rect = order_points(pts)
    (tl, tr, br, bl) = rect
    # 计算输入的w和h值
    widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
    widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
    maxWidth = max(int(widthA), int(widthB))

    heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
    heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
    maxHeight = max(int(heightA), int(heightB))
    # 变换后对应坐标位置
    dst = np.array([
		[0, 0],
		[maxWidth - 1, 0],
		[maxWidth - 1, maxHeight - 1],
		[0, maxHeight - 1]], dtype = "float32")

    # 计算变换矩阵
    M = cv.getPerspectiveTransform(rect, dst)
    warped = cv.warpPerspective(image, M, (maxWidth, maxHeight))
    return warped

在这里插入图片描述

文字识别

from PIL import Image
import pytesseract
import cv2
import os

def get_pic_text(file_):

    text = pytesseract.image_to_string(Image.open(file_))
    print(text)
                    

def main():
    pic= 'get.png'
    get_pic_text(pic)

main()

在这里插入图片描述

完整代码

# -*- coding: utf-8 -*-

from PIL import Image
import pytesseract
import cv2 as cv
import os
import numpy as np

def display_img(img):
    cv.imshow('image',img)
    cv.waitKey(0)
    cv.destroyAllWindows()

def img_resize(image,width=None,height=None):
    h,w = image.shape[:2]
    if width!=None and height==None:
        data = (width,int(width/w*h))
    elif height!=None and width==None:
        data = (int(height/h*w),height)
    else:
        data = (width,height)
    image=cv.resize(image,data,interpolation = cv.INTER_AREA)
    return image


def operate_img(file1):
    img = cv.imread(file1)
    h,w = img.shape[:2]
    img_reszie = img_resize(img,height=1280)
    return img_reszie
def contours_img(image):
    gray = cv.cvtColor(image,cv.COLOR_BGR2GRAY)
    threshold = cv.threshold(gray,160,255,cv.THRESH_BINARY_INV)[1]
    display_img(threshold)
    contour,hierarchy= cv.findContours(threshold.copy(),cv.RETR_CCOMP,cv.CHAIN_APPROX_SIMPLE)
    import_contours = sorted(contour,key=cv.contourArea,reverse=True)[1]
    #import_contours = contour[-1]
    cv.drawContours(image,import_contours,-1,(0,255,0),3)

    (gx,gy,gw,gh) = cv.boundingRect(import_contours)
    cv.rectangle(image, (gx , gy ),(gx + gw , gy + gh ), (255, 0,0), 1)
    display_img(image)

    
def order_points(pts):
    rect = np.zeros((4,2),dtype="float32")
    sum_ = pts.sum(axis=1)
    rect[0]=pts[np.argmin(sum_)]
    rect[2]=pts[np.argmax(sum_)]

    diff = np.diff(pts, axis = 1)
    rect[1] = pts[np.argmin(diff)]
    rect[3] = pts[np.argmax(diff)]

    return rect

def four_point_transform(image, pts):
    rect = order_points(pts)
    (tl, tr, br, bl) = rect
    # 计算输入的w和h值
    widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
    widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
    maxWidth = max(int(widthA), int(widthB))

    heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
    heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
    maxHeight = max(int(heightA), int(heightB))
    # 变换后对应坐标位置
    dst = np.array([
		[0, 0],
		[maxWidth - 1, 0],
		[maxWidth - 1, maxHeight - 1],
		[0, maxHeight - 1]], dtype = "float32")

    # 计算变换矩阵
    M = cv.getPerspectiveTransform(rect, dst)
    warped = cv.warpPerspective(image, M, (maxWidth, maxHeight))

    # 返回变换后结果
    return warped


    
    
def contours_pic(img):
    image = img.copy()

    contours_img(image)
    return
    gray = cv.cvtColor(image,cv.COLOR_BGR2GRAY)
    gray = cv.GaussianBlur(gray,(5,5),0)
    edged = cv.Canny(gray,75,200)
    #display_img(image)

    contour,hierarchy= cv.findContours(edged.copy(),cv.RETR_CCOMP,cv.CHAIN_APPROX_SIMPLE)
    import_contours = sorted(contour,key=cv.contourArea,reverse=True)[1]
    #import_contours = import_contours[0]

    peri = cv.arcLength(import_contours,True)
    approx = cv.approxPolyDP(import_contours,0.02*peri,True)
    cv.drawContours(image,[approx],-1,(0,0,255),1)
    screenCnt = approx
    display_img(image)
    
    
    warped = four_point_transform(image,screenCnt.reshape(4,2))
   
    warped = cv.cvtColor(warped, cv.COLOR_BGR2GRAY)
    display_img( warped)
    cv.imwrite("get.png",warped)
  

def main():
    img = operate_img("images/page.jpg")
    contours_pic(img)


main()

参考：
https://blog.csdn.net/LaoYuanPython/article/details/114003964