使用opencv构建文档扫描仅需四个步骤即可完成:
- 步骤1:检测边缘
- 步骤2:使用图像轮廓找到扫描纸张的轮廓
- 步骤3:应用透视变换
- 步骤4:阈值输出黑白图像
步骤1:检测边缘
使用opencv构建文档扫描仪程序的第一步就是执行边缘检测。建立项目docScanner.py,代码如下:
image = cv2.imread('./1.png')
# 图像原始高度与新高度的比率
ratio = image.shape[0] / 500.0
orig = image.copy()
# 将图像高度调整为500pix,可以加快图像处理的速度
image = imutils.resize(image, height=500)
# 转换成灰度图像
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# 执行高斯模糊消除高频噪声
gray = cv2.GaussianBlur(gray, (5, 5), 0)
# 轮廓检测
edged = cv2.Canny(gray, 75, 200)
# cv2.imshow('edged', edged)
步骤二:寻找轮廓
纸张是一个矩形,正好具有四个点的最大轮廓就是纸张的轮廓。代码如下:
# 绘制轮廓
cnts = cv2.findContours(edged.copy(), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
cnts = imutils.grab_contours(cnts)
cnts = sorted(cnts, key=cv2.contourArea, reverse=True)[:5]
screenCnt = None
for c in cnts:
peri = cv2.arcLength(c, True)
# approxPolyDP主要功能是把一个连续光滑曲线折线化,对图像轮廓点进行多边形拟合。
# 近似轮廓的多边形曲线, 近似精度为轮廓周长的2%
approx = cv2.approxPolyDP(c , 0.02*peri, True)
if len(approx) == 4:
screenCnt = approx
break
print('step 2: Find Contours of paper')
cv2.drawContours(image, [screenCnt], -1, (0, 255, 0), 2)
#cv2.imshow('outline', image)
步骤三:应用透视变换
使用透视变换getperspectiveTransform获得文档内容的鸟瞰图。可以查看专栏文章使用python opencv透视变换getperspectiveTransform获得图像的鸟瞰图
# 边缘检测是高度为500pix,对原始图像执行扫描,而不是对调整大小后图像进行扫描
pts = screenCnt.reshape(4, 2) * ratio
# 转化为鸟瞰图
warped = perTran(orig, pts)
print('step 3: apply Perspective Transform')
#cv2.imshow('warped', warped)
步骤四:阈值进行分割
为了使图像获得黑白的感觉,将其转换为灰度并应用自适应阈值。然后对阈值进行分割。
warped = cv2.cvtColor(warped, cv2.COLOR_BGR2GRAY)
# local算法可以指定block的大小,offset有助于调整图像,以获得更好的效果
T = filters.threshold_local(warped, 11, offset=10, method='gaussian')
# 根据阈值进行分割
warped = (warped > T).astype('uint8') * 255
print('step 4: apply threshold')
执行完整代码,结果如下:
完整源代码
#! /usr/bin/env python
# -*- coding: utf-8 -*-
from skimage import filters
import numpy as np
import imutils
import cv2
def order_points(pts):
# 初始化矩形4个顶点的坐标
rect = np.zeros((4, 2), dtype='float32')
# 坐标点求和 x+y
s = pts.sum(axis = 1)
# np.argmin(s) 返回最小值在s中的序号
rect[0] = pts[np.argmin(s)]
rect[2] = pts[np.argmax(s)]
# diff就是后一个元素减去前一个元素 y-x
diff = np.diff(pts, axis=1)
rect[1] = pts[np.argmin(diff)]
rect[3] = pts[np.argmax(diff)]
# 返回矩形有序的4个坐标点
return rect
def perTran(image, pts):
rect = order_points(pts)
tl, tr, br, bl = rect
# 计算宽度
widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
maxWidth = max(int(widthA), int(widthB))
# 计算高度
heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
maxHeight = max(int(heightA), int(heightB))
# 定义变换后新图像的尺寸
dst = np.array([[0, 0], [maxWidth-1, 0], [maxWidth-1, maxHeight-1],
[0, maxHeight-1]], dtype='float32')
# 变换矩阵
M = cv2.getPerspectiveTransform(rect, dst)
# 透视变换
warped = cv2.warpPerspective(image, M, (maxWidth, maxHeight))
return warped
def main():
image = cv2.imread('./1.png')
# 图像原始高度与新高度的比率
ratio = image.shape[0] / 500.0
orig = image.copy()
# 将图像高度调整为500pix,可以加快图像处理的速度
image = imutils.resize(image, height=500)
# 转换成灰度图像
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# 执行高斯模糊消除高频噪声
gray = cv2.GaussianBlur(gray, (5, 5), 0)
# 轮廓检测
edged = cv2.Canny(gray, 75, 200)
print('step 1: Edge Detection')
#cv2.imshow('image', image)
#cv2.imshow('edged', edged)
# 绘制轮廓
cnts = cv2.findContours(edged.copy(), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
cnts = imutils.grab_contours(cnts)
cnts = sorted(cnts, key=cv2.contourArea, reverse=True)[:5]
screenCnt = None
for c in cnts:
peri = cv2.arcLength(c, True)
# approxPolyDP主要功能是把一个连续光滑曲线折线化,对图像轮廓点进行多边形拟合。
# 近似轮廓的多边形曲线, 近似精度为轮廓周长的2%
approx = cv2.approxPolyDP(c , 0.02*peri, True)
if len(approx) == 4:
screenCnt = approx
break
print('step 2: Find Contours of paper')
cv2.drawContours(image, [screenCnt], -1, (0, 255, 0), 2)
#cv2.imshow('outline', image)
# 边缘检测是高度为500pix,对原始图像执行扫描,而不是对调整大小后图像进行扫描
pts = screenCnt.reshape(4, 2) * ratio
# 转化为鸟瞰图
warped = perTran(orig, pts)
print('step 3: apply Perspective Transform')
#cv2.imshow('warped', warped)
warped = cv2.cvtColor(warped, cv2.COLOR_BGR2GRAY)
# local算法可以指定block的大小,offset有助于调整图像,以获得更好的效果
T = filters.threshold_local(warped, 11, offset=10, method='gaussian')
# 根据阈值进行分割
warped = (warped > T).astype('uint8') * 255
print('step 4: apply threshold')
cv2.imshow('Original', imutils.resize(orig, height=650))
cv2.imshow('Scanned', imutils.resize(warped, height=650))
cv2.waitKey(0)
cv2.destroyAllWindows()
if __name__ == "__main__":
main()