python实现文档图像倾斜矫正，实现类似扫描仪功能

最新推荐文章于 2024-09-23 17:43:01 发布

Ai_Smith

最新推荐文章于 2024-09-23 17:43:01 发布

阅读量1.6w

点赞数 23

分类专栏：机器学习相关文章标签：文档提取文档扫描图像倾斜矫正

本文链接：https://blog.csdn.net/Ai_Smith/article/details/83617248

版权

机器学习相关专栏收录该内容

3 篇文章 0 订阅

订阅专栏

图片中文档提取与矫正，实现类似扫描仪功能

这几天看见一个软件，可以手机拍照一个文档，自动提取出文档后把歪曲的图像矫正，就好像扫描出来的一样，很有意思。作为图像处理爱好者，手痒忍不住自己试试（^ o ^）。废话不多说，直接上代码（我是代码搬运工…）:

第一个文档，scan.py

from transform import four_point_transform
import cv2, imutils
import imgEnhance

def preProcess(image):
    ratio = image.shape[0] / 500.0
    image = imutils.resize(image, height=500)

    grayImage  = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    gaussImage = cv2.GaussianBlur(grayImage, (5, 5), 0)
    edgedImage = cv2.Canny(gaussImage, 75, 200)

    cnts = cv2.findContours(edgedImage.copy(), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
    cnts = cnts[0] if imutils.is_cv2() else cnts[1]
    cnts = sorted(cnts, key=cv2.contourArea, reverse=True)[:5]

    for c in cnts:
        peri = cv2.arcLength(c, True)  # Calculating contour circumference
        approx = cv2.approxPolyDP(c, 0.02 * peri, True)

        if len(approx) == 4:
            screenCnt = approx
            break

    return  screenCnt, ratio

if __name__ == "__main__":

    image = cv2.imread("image.jpg")
    screenCnt, ratio = preProcess(image)
    warped = four_point_transform(image, screenCnt.reshape(4, 2) * ratio)

    enhancer = imgEnhance.Enhancer()
    enhancedImg = enhancer.gamma(warped,1.63)

    cv2.imshow("org", imutils.resize(image, height=500))
    cv2.imshow("gamma", imutils.resize(enhancedImg, height=500))
    cv2.waitKey(0)
    cv2.destroyAllWindows()

首先，一股脑预处理，然后提取文档四个顶角坐标（假设我们的文档都是方形的，存在四个顶角，这个假设通常还是成立的）。毫无疑问，主要工作在提取四角旋转函数，这就上第二个文档。

第二个文档，transform .py


import numpy as np
import cv2

def order_points(pts):
    rect = np.zeros((4,2), dtype = "float32")

    s = np.sum(pts, axis = 1)
    rect[0] = pts[np.argmin(s)]
    rect[2] = pts[np.argmax(s)]

    diff = np.diff(pts, axis=1)
    rect[1] = pts[np.argmin(diff)]
    rect[3] = pts[np.argmax(diff)]

    return rect

def four_point_transform(image, pts):
    rect = order_points(pts)
    (tl, tr, br, bl) = rect

    widthA = np.sqrt((tr[0] - tl[0]) ** 2 + (tr[1] - tl[1]) ** 2)
    widthB = np.sqrt((br[0] - bl[0]) ** 2 + (br[1] - bl[1]) ** 2)
    maxWidth = max(int(widthA), int(widthB))

    heightA = np.sqrt((tr[0] - br[0]) ** 2 + (tr[1] - br[1]) ** 2)
    heightB = np.sqrt((tl[0] - bl[0]) ** 2 + (tl[1] - bl[1]) ** 2)
    maxHeight = max(int(heightA), int(heightB))

    dst = np.array([
        [0,0],
        [maxWidth - 1, 0],
        [maxWidth -1, maxHeight -1],
        [0, maxHeight -1]], dtype = "float32")

    M = cv2.getPerspectiveTransform(rect, dst)
    warped = cv2.warpPerspective(image, M, (maxWidth, maxHeight))
    
    return warped

先做一个order_points（）函数，搞清楚哪个是左上角、右上角、右下角、左下角，接着，计算下新图像的长宽，然后旋转起来吧。作为追求极致的人，当然不能止步于此（^ _ ^），继续折腾，图像增强，把图片质量搞好点，毕竟手机拍的照片有时光线不太好。看文档三。

第三个文档，imgEnhance.py

from PIL import ImageEnhance
import numpy as np
import cv2

class Enhancer:
    def bright(self, image, brightness):
        enh_bri = ImageEnhance.Brightness(image)
        brightness = brightness
        imageBrightend = enh_bri.enhance(brightness)
        return imageBrightend

    def color(self, image, color):
        enh_col = ImageEnhance.Color(image)
        color = color
        imageColored = enh_col.enhance(color)
        return imageColored

    def contrast(self, image, contrast):
        enh_con = ImageEnhance.Contrast(image)
        contrast = contrast
        image_contrasted = enh_con.enhance(contrast)
        return image_contrasted

    def sharp(self, image, sharpness):
        enh_sha = ImageEnhance.Sharpness(image)
        sharpness = sharpness
        image_sharped = enh_sha.enhance(sharpness)
        return image_sharped

    def gamma(self, image, gamma):
        # gamma_table = [np.power(x / 255.0, gamma) * 255.0 for x in range(256)]
        # gamma_table = np.round(np.array(gamma_table)).astype(np.uint8)
        # return cv2.LUT(image, gamma_table)
        gamma_image = np.power(image / float(np.max(image)), gamma)
        return gamma_image