之前有写一个关于全能扫描王的介绍。但是这个代码不太适用于我的数据集。所以微调了一下。
因为涉及保密,所以不能放出微调后的效果图,简单介绍下就是把一个不规则的缺了一块的图形用这种方法进行了调整。最后实现了无论是歪了还是缺了的一个多边形都找到了他的最小外接矩阵,然后把这一块抠出来就好了。这样可以降低我的目标检测 yolo模型的误检。
建议看懂代码后,根据自己图像的特点进行微调。加别的功能啥的。
全能扫描王的实现可以参考这篇文章
代码如下:
# import the necessary packages
# from pyimagesearch.transform import four_point_transform
from skimage.filters import threshold_local
import numpy as np
import argparse
import cv2
import imutils
from glob import glob
def order_points(pts):
rect = np.zeros((4, 2), dtype = "float32")
s = pts.sum(axis = 1)
rect[0] = pts[np.argmin(s)]
rect[2] = pts[np.argmax(s)]
# now, compute the difference between the points, the
# top-right point will have the smallest difference,
# whereas the bottom-left will have the largest difference
diff = np.diff(pts, axis = 1)
rect[1] = pts[np.argmin(diff)]
rect[3] = pts[np.argmax(diff)]
# return the ordered coordinates
return rect
def four_point_transform(image, pts):
# obtain a consistent order of the points and unpack them
# individually
rect = order_points(pts)
(tl, tr, br, bl) = rect
# compute the width of the new image, which will be the
# maximum distance between bottom-right and bottom-left
# x-coordiates or the top-right and top-left x-coordinates
widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
maxWidth = max(int(widthA), int(widthB))
# compute the height of the new image, which will be the
# maximum distance between the top-right and bottom-right
# y-coordinates or the top-left and bottom-left y-coordinates
heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
maxHeight = max(int(heightA), int(heightB))
# now that we have the dimensions of the new image, construct
# the set of destination points to obtain a "birds eye view",
# (i.e. top-down view) of the image, again specifying points
# in the top-left, top-right, bottom-right, and bottom-left
# order
dst = np.array([
[0, 0],
[maxWidth - 1, 0],
[maxWidth - 1, maxHeight - 1],
[0, maxHeight - 1]], dtype = "float32")
# compute the perspective transform matrix and then apply it
M = cv2.getPerspectiveTransform(rect, dst)
warped = cv2.warpPerspective(image, M, (maxWidth, maxHeight))
# return the warped image
return warped
def cut(path):
# 对原图进行Resize
# load the image and compute the ratio of the old height
# to the new height, clone it, and resize it
image = cv2.imread(path)
ratio = image.shape[0] / 500.0
orig = image.copy()
image = imutils.resize(image, height=500) # 根据长宽比自动计算另外一边的尺寸进行resize
# 根据找到边缘
# convert the image to grayscale, blur it, and find edges
# in the image
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
ret, binary = cv2.threshold(gray,60,255,cv2.THRESH_BINARY)
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (21, 21))
gray = cv2.dilate(binary, kernel)
gray = cv2.GaussianBlur(gray, (5, 5), 0)
edged = cv2.Canny(gray, 75, 200)
print("STEP 1: Edge Detection")
# find the contours in the edged image, keeping only the
# largest ones, and initialize the screen contour
cnts = cv2.findContours(edged.copy(), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE) ## 找到轮廓
cnts = imutils.grab_contours(cnts)
cnts = sorted(cnts, key=cv2.contourArea, reverse=True) # 根据轮廓面积进行排序
for c in cnts:
x, y, w, h = cv2.boundingRect(c) # 计算点集最外面的矩形边界
cv2.rectangle(image, (x, y), (x + w, y + h), (0, 255, 0), 2)
# 找面积最小的矩形
rect = cv2.minAreaRect(c)
# 得到最小矩形的坐标
box = cv2.boxPoints(rect)
# 标准化坐标到整数
box = np.int0(box)
# 画出边界
# cv2.drawContours(image, [box], 0, (0, 0, 255), 3)
break
print("STEP 2: Find contours of paper")
warped = four_point_transform(orig, box.reshape(4, 2) * ratio)
return warped
if __name__=="__main__":
a = glob("data\\ori_image\\*.jpg")
for j in a:
results=cut(j)
cv2.imwrite("data\\cut_results\\{}".format(j.split("\\")[-1]),results)