如何使用python opencv在短短的几步之内构建文档扫描

最新推荐文章于 2024-05-05 08:44:41 发布

Abc2149

最新推荐文章于 2024-05-05 08:44:41 发布

阅读量491

点赞数

分类专栏： OpenCv 文章标签： opencv python 计算机视觉

本文链接：https://blog.csdn.net/qq_36281080/article/details/103930080

版权

OpenCv 专栏收录该内容

5 篇文章 0 订阅

订阅专栏

使用opencv构建文档扫描仅需四个步骤即可完成：

步骤1：检测边缘
步骤2：使用图像轮廓找到扫描纸张的轮廓
步骤3：应用透视变换
步骤4：阈值输出黑白图像

步骤1：检测边缘

使用opencv构建文档扫描仪程序的第一步就是执行边缘检测。建立项目docScanner.py，代码如下：

	image = cv2.imread('./1.png')
	# 图像原始高度与新高度的比率
	ratio = image.shape[0] / 500.0
	orig = image.copy()
	# 将图像高度调整为500pix，可以加快图像处理的速度
	image = imutils.resize(image, height=500)
	# 转换成灰度图像
	gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
	# 执行高斯模糊消除高频噪声
	gray = cv2.GaussianBlur(gray, (5, 5), 0)
	# 轮廓检测
	edged = cv2.Canny(gray, 75, 200)
	# cv2.imshow('edged', edged)

步骤二：寻找轮廓

纸张是一个矩形，正好具有四个点的最大轮廓就是纸张的轮廓。代码如下：

	# 绘制轮廓
	cnts = cv2.findContours(edged.copy(), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
	cnts = imutils.grab_contours(cnts)
	cnts = sorted(cnts, key=cv2.contourArea, reverse=True)[:5]

	screenCnt = None
	for c in cnts:
		peri = cv2.arcLength(c, True)
		# approxPolyDP主要功能是把一个连续光滑曲线折线化，对图像轮廓点进行多边形拟合。
		# 近似轮廓的多边形曲线， 近似精度为轮廓周长的2%
		approx = cv2.approxPolyDP(c , 0.02*peri, True)
		if len(approx) == 4:
			screenCnt = approx
			break

	print('step 2: Find Contours of paper')
	cv2.drawContours(image, [screenCnt], -1, (0, 255, 0), 2)
	#cv2.imshow('outline', image)

步骤三：应用透视变换

使用透视变换getperspectiveTransform获得文档内容的鸟瞰图。可以查看专栏文章使用python opencv透视变换getperspectiveTransform获得图像的鸟瞰图

	# 边缘检测是高度为500pix，对原始图像执行扫描，而不是对调整大小后图像进行扫描
	pts = screenCnt.reshape(4, 2) * ratio
	# 转化为鸟瞰图
	warped = perTran(orig, pts)
	print('step 3: apply Perspective Transform')
	#cv2.imshow('warped', warped)

步骤四：阈值进行分割

为了使图像获得黑白的感觉，将其转换为灰度并应用自适应阈值。然后对阈值进行分割。

	warped = cv2.cvtColor(warped, cv2.COLOR_BGR2GRAY)
	# local算法可以指定block的大小，offset有助于调整图像，以获得更好的效果
	T = filters.threshold_local(warped, 11, offset=10, method='gaussian')
	# 根据阈值进行分割
	warped = (warped > T).astype('uint8') * 255
	print('step 4: apply threshold')

执行完整代码，结果如下：
在这里插入图片描述

完整源代码

#! /usr/bin/env python
# -*- coding: utf-8 -*-

from skimage import filters
import numpy as np
import imutils
import cv2

def order_points(pts):
	# 初始化矩形4个顶点的坐标
	rect = np.zeros((4, 2), dtype='float32')
	# 坐标点求和 x+y
	s = pts.sum(axis = 1)
	# np.argmin(s) 返回最小值在s中的序号
	rect[0] = pts[np.argmin(s)]
	rect[2] = pts[np.argmax(s)]
	# diff就是后一个元素减去前一个元素  y-x
	diff = np.diff(pts, axis=1)
	rect[1] = pts[np.argmin(diff)]
	rect[3] = pts[np.argmax(diff)]
	# 返回矩形有序的4个坐标点
	return rect

def perTran(image, pts):
	rect = order_points(pts)
	tl, tr, br, bl = rect
	# 计算宽度
	widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
	widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
	maxWidth = max(int(widthA), int(widthB))
	# 计算高度
	heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
	heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
	maxHeight = max(int(heightA), int(heightB))
	# 定义变换后新图像的尺寸
	dst = np.array([[0, 0], [maxWidth-1, 0], [maxWidth-1, maxHeight-1],
				   [0, maxHeight-1]], dtype='float32')
	# 变换矩阵
	M = cv2.getPerspectiveTransform(rect, dst)
	# 透视变换
	warped = cv2.warpPerspective(image, M, (maxWidth, maxHeight))
	return warped

def main():
	image = cv2.imread('./1.png')
	# 图像原始高度与新高度的比率
	ratio = image.shape[0] / 500.0
	orig = image.copy()
	# 将图像高度调整为500pix，可以加快图像处理的速度
	image = imutils.resize(image, height=500)
	# 转换成灰度图像
	gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
	# 执行高斯模糊消除高频噪声
	gray = cv2.GaussianBlur(gray, (5, 5), 0)
	# 轮廓检测
	edged = cv2.Canny(gray, 75, 200)

	print('step 1: Edge Detection')
	#cv2.imshow('image', image)
	#cv2.imshow('edged', edged)
	# 绘制轮廓
	cnts = cv2.findContours(edged.copy(), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
	cnts = imutils.grab_contours(cnts)
	cnts = sorted(cnts, key=cv2.contourArea, reverse=True)[:5]

	screenCnt = None
	for c in cnts:
		peri = cv2.arcLength(c, True)
		# approxPolyDP主要功能是把一个连续光滑曲线折线化，对图像轮廓点进行多边形拟合。
		# 近似轮廓的多边形曲线， 近似精度为轮廓周长的2%
		approx = cv2.approxPolyDP(c , 0.02*peri, True)
		if len(approx) == 4:
			screenCnt = approx
			break

	print('step 2: Find Contours of paper')
	cv2.drawContours(image, [screenCnt], -1, (0, 255, 0), 2)
	#cv2.imshow('outline', image)
	# 边缘检测是高度为500pix，对原始图像执行扫描，而不是对调整大小后图像进行扫描
	pts = screenCnt.reshape(4, 2) * ratio
	# 转化为鸟瞰图
	warped = perTran(orig, pts)
	print('step 3: apply Perspective Transform')
	#cv2.imshow('warped', warped)

	warped = cv2.cvtColor(warped, cv2.COLOR_BGR2GRAY)
	# local算法可以指定block的大小，offset有助于调整图像，以获得更好的效果
	T = filters.threshold_local(warped, 11, offset=10, method='gaussian')
	# 根据阈值进行分割
	warped = (warped > T).astype('uint8') * 255
	print('step 4: apply threshold')
	cv2.imshow('Original', imutils.resize(orig, height=650))
	cv2.imshow('Scanned', imutils.resize(warped, height=650))

	cv2.waitKey(0)
	cv2.destroyAllWindows()

if __name__ == "__main__":
	main()