答题卡识别
很简单的一个任务!
目标是识别选项,根据正确答案打分
经典两件套
import cv2
import numpy as np
def imshow(img):
cv2.imshow('img',img)
cv2.waitKey(0)
cv2.destroyAllWindows()
自定义一下正确答案
answer = [2,3,1,3,4]
读取图片,转换成灰度图,高斯滤波去噪,二值化
img_rgb = cv2.imread(r'images\test_05.png')
img_gray = cv2.cvtColor(img_rgb,cv2.COLOR_BGR2GRAY)
img_gray = cv2.GaussianBlur(img_gray,(3,3),1)
# 自适应二值化,并取反
img_binary = cv2.threshold(img_gray,0,255,cv2.THRESH_BINARY_INV|cv2.THRESH_OTSU)[1]
imshow(img_binary)
定位答题卡
# 获取轮廓,cv2.CHAIN_APPROX_SIMPLE只保留起点终点
contours = cv2.findContours(img_binary.copy(),cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)[0]
# 先定位答题卡
# 按周长大小排序
contours = sorted(contours,key= lambda x:cv2.arcLength(x,True))
# 宽和高大于0.4倍,小于0.95整个图片的第一个视作答题卡
for contour in contours:
x,y,w,h, = cv2.boundingRect(contour)
if w>img_gray.shape[1]*0.4 and w<img_gray.shape[1]*0.95:
if h>img_gray.shape[0]*0.4 and w<img_gray.shape[0]*0.95:
break
img_1 = img_rgb.copy()
cv2.drawContours(img_1,[contour],-1,(255,255,0),2)
imshow(img_1)
# 将答题卡用四边形近似
# 将阈值设定为0.01倍的矩形周长,True表示闭合
epsilon = 0.01*cv2.arcLength(contour,True)
# 直线近似
contour = cv2.approxPolyDP(contour,epsilon,True)
透视变换,提取答题卡
'''
对答题卡进行视角转换 并提取答题卡
'''
# 轮廓的点不规律 对轮廓的点进行排序
# 此时contour 是个 (4,1,2)数组 reshape成(4,2) 方便访问
contour = contour.reshape(4,2)
K_input = np.zeros((4,2),dtype = 'float32')
# 行方向求和 即每个坐标x,y相加
# 左上角值最小 右下角值最大
sum = np.sum(contour,axis=1)
K_input[2] = contour[np.argmax(sum)]
K_input[0] = contour[np.argmin(sum)]
# 行方向求差 右上角最小
dif = np.diff(contour,axis=1)
K_input[1] = contour[np.argmin(dif)]
K_input[3] = contour[np.argmax(dif)]
# 计算宽度和高度 透视变换要用
# widthA下底 widthB上底
(tl, tr, br, bl) = K_input
widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
w = max(int(widthA), int(widthB))
heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
h = max(int(heightA), int(heightB))
# 透视变换后的输出大小 -1防止过界
output = np.array([
[0,0],
[w-1,0],
[w-1,h-1],
[0,h-1]
],dtype = "float32")
# 变换矩阵K 输入必须都是float32类型
# K矩阵的输入坐标 是 视角变换函数输入img 中 对应要变换部分 的坐标
# K矩阵的output 是输出的四个坐标
# 视角变换函数的 img要和K 的输入坐标对应 (w,h)是输出图像的宽和高的大小
K = cv2.getPerspectiveTransform(K_input,output)
card_rgb = cv2.warpPerspective(img_rgb,K,(w,h))
imshow(card_rgb)
提取所有选项
'''
提取各个选项,筛选
'''
card_gray = cv2.cvtColor(card_rgb,cv2.COLOR_BGR2GRAY)
card_gray = cv2.GaussianBlur(card_gray,(3,3),1)
# 自适应二值化,并取反
card_binary = cv2.threshold(card_gray,0,255,cv2.THRESH_BINARY_INV|cv2.THRESH_OTSU)[1]
# 获取轮廓,cv2.CHAIN_APPROX_SIMPLE只保留起点终点
contours = cv2.findContours(card_binary.copy(),cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)[0]
card_0 = card_rgb.copy()
cv2.drawContours(card_0,contours,-1,(0,255,0),2)
imshow(card_0)
# 筛选
conts = []
for contour in contours:
x,y,w,h = cv2.boundingRect(contour)
if abs(w/float(h)-1) < 0.2:
if w>20:
conts.append(contour)
card_1 = card_rgb.copy()
cv2.drawContours(card_1,conts,-1,(255,255,0),2)
imshow(card_1)
按顺序读取选项,对答案
先按y的均值排序,分出每一行的5个
再逐行排序分出abcde
'''
按顺序读取每一个选项,对答案
'''
# conts 是由轮廓组成的list
# 轮廓是一个(n,1,2)的数组,reshape成(n,2) 按y的均值排序
conts = sorted(conts,key = lambda x:np.sum(x.reshape(-1,2)/x.shape[0],axis=0)[1])
# 再按每5个一组 按y的均值排序
predict=[]
# 遍历每行
for i in range(int(len(conts)/5)):
# 第一个存值,第二个存选项,第三个存轮廓的索引
number = (0,0,0)
# 按x的均值排序
conts[i*5:5+i*5] = sorted(conts[i*5:5+i*5],key = lambda x:np.sum(x.reshape(-1,2)/x.shape[0],axis=0)[0])
# 每一个选项 找到白色点最多的选项,视作所选答案
for j in range(5):
# 掩模
mask = np.zeros_like(card_binary)
# 最后一个-1的位置是线的宽度,-1表示填充轮廓 掩模只有该选项是亮着的
cv2.drawContours(mask,[conts[i*5+j]],-1,255,-1)
# 与之后其他选项都是黑色
result = cv2.bitwise_and(card_binary,mask)
# 计算白色点的数量,最多的视作选择的答案保存
total = cv2.countNonZero(result)
if total > number[0]:
# 第一个存值,第二个存选项,第三个存轮廓的索引
number = (total,j,i*5+j)
predict.append(number)
correct =0
'''
for cont in conts:
card_1 = card_rgb.copy()
cv2.drawContours(card_1,[cont],-1,(255,255,0),2)
imshow(card_1)
'''
计算分数 并显示
# 计算分数
for (i,ans) in enumerate(predict):
if ans[1] == answer[i]:
correct +=1
grade = int(correct/len(answer)*100)
card_1 = card_rgb.copy()
correct = 0
# 遍历预测的选项 对答案
for (i, ans) in enumerate(predict):
# 把正确答案和所选答案画出来
cv2.drawContours(card_1, [conts[ans[2]]], -1, (0, 0, 255), 2)
cv2.drawContours(card_1, [conts[answer[i] + i * 5]], -1, (0, 255, 0), 2)
if ans[1] == answer[i]:
correct += 1
grade = int(correct / len(answer) * 100)
# 分数画出来
cv2.putText(card_1, 'score:' + str(grade), (0, 50), cv2.FONT_HERSHEY_PLAIN, 2, (255, 255, 0), 2)
imshow(card_1)
很简单的一个任务,很适合练手
可以自己保存图片试试
所有代码如下
import cv2
import numpy as np
def imshow(img):
cv2.imshow('img',img)
cv2.waitKey(0)
cv2.destroyAllWindows()
# 定义下正确答案
answer = [2,3,1,3,4]
img_rgb = cv2.imread(r'images\test_01.png')
img_gray = cv2.cvtColor(img_rgb,cv2.COLOR_BGR2GRAY)
img_gray = cv2.GaussianBlur(img_gray,(3,3),1)
# 自适应二值化,并取反
img_binary = cv2.threshold(img_gray,0,255,cv2.THRESH_BINARY_INV|cv2.THRESH_OTSU)[1]
imshow(img_binary)
# 获取轮廓,cv2.CHAIN_APPROX_SIMPLE只保留起点终点
contours = cv2.findContours(img_binary.copy(),cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)[0]
# 按周长大小排序
contours = sorted(contours,key= lambda x:cv2.arcLength(x,True))
# 先定位答题卡
# 宽和高大于0.4倍,小于0.95整个图片的第一个视作答题卡
for contour in contours:
x,y,w,h, = cv2.boundingRect(contour)
if w>img_gray.shape[1]*0.4 and w<img_gray.shape[1]*0.95:
if h>img_gray.shape[0]*0.4 and w<img_gray.shape[0]*0.95:
break
img_1 = img_rgb.copy()
cv2.drawContours(img_1,[contour],-1,(255,255,0),2)
# 将答题卡用四边形近似
# 将阈值设定为0.01倍的矩形周长,True表示闭合
epsilon = 0.01*cv2.arcLength(contour,True)
# 直线近似
contour = cv2.approxPolyDP(contour,epsilon,True)
'''
对答题卡进行视角转换 并提取答题卡
'''
# 轮廓的点不规律 对轮廓的点进行排序
# 此时contour 是个 (4,1,2)数组 reshape成(4,2) 方便访问
contour = contour.reshape(4,2)
K_input = np.zeros((4,2),dtype = 'float32')
# 行方向求和 即每个坐标x,y相加
# 左上角值最小 右下角值最大 左上角为[0]
sum = np.sum(contour,axis=1)
K_input[2] = contour[np.argmax(sum)]
K_input[0] = contour[np.argmin(sum)]
# 行方向求差 右上角最小
dif = np.diff(contour,axis=1)
K_input[1] = contour[np.argmin(dif)]
K_input[3] = contour[np.argmax(dif)]
# 计算宽度和高度 透视变换要用
# widthA下底 widthB上底
(tl, tr, br, bl) = K_input
widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
w = max(int(widthA), int(widthB))
heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
h = max(int(heightA), int(heightB))
# 透视变换后的输出大小 -1防止过界
output = np.array([
[0,0],
[w-1,0],
[w-1,h-1],
[0,h-1]
],dtype = "float32")
# 变换矩阵K 输入必须都是float32类型
# K矩阵的输入坐标 是 视角变换函数输入img 中 对应要变换部分 的坐标
# K矩阵的output 是输出的四个坐标
# 视角变换函数的 img要和K 的输入坐标对应 (w,h)是输出图像的宽和高的大小
K = cv2.getPerspectiveTransform(K_input,output)
card_rgb = cv2.warpPerspective(img_rgb,K,(w,h))
imshow(card_rgb)
'''
提取各个选项
'''
card_gray = cv2.cvtColor(card_rgb,cv2.COLOR_BGR2GRAY)
card_gray = cv2.GaussianBlur(card_gray,(3,3),1)
# 自适应二值化,并取反
card_binary = cv2.threshold(card_gray,0,255,cv2.THRESH_BINARY_INV|cv2.THRESH_OTSU)[1]
# 获取轮廓,cv2.CHAIN_APPROX_SIMPLE只保留起点终点
contours = cv2.findContours(card_binary.copy(),cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)[0]
card_0 = card_rgb.copy()
cv2.drawContours(card_0,contours,-1,(0,255,0),2)
imshow(card_0)
# 筛选
conts = []
for contour in contours:
x,y,w,h = cv2.boundingRect(contour)
if abs(w/float(h)-1) < 0.2:
if w>20:
conts.append(contour)
card_1 = card_rgb.copy()
cv2.drawContours(card_1,conts,-1,(255,255,0),2)
imshow(card_1)
'''
按顺序读取每一个选项,对答案
'''
# conts 是由轮廓组成的list
# 轮廓是一个(n,1,2)的数组,reshape成(n,2) 按y的均值排序
conts = sorted(conts,key = lambda x:np.sum(x.reshape(-1,2)/x.shape[0],axis=0)[1])
# 再按每5个一组 按y的均值排序
predict=[]
# 遍历每一题,即每行
for i in range(int(len(conts)/5)):
# 第一个存值,第二个存选项,第三个存轮廓的索引
number = (0,0,0)
# 按x的均值排序
conts[i*5:5+i*5] = sorted(conts[i*5:5+i*5],key = lambda x:np.sum(x.reshape(-1,2)/x.shape[0],axis=0)[0])
# 每一个选项 找到白色点最多的选项,视作所选答案
for j in range(5):
# 掩模
mask = np.zeros_like(card_binary)
# 最后一个-1是线的宽度,-1表示填充轮廓 掩模只有该选项是亮着的,第一个-1表示所有轮廓
cv2.drawContours(mask,[conts[i*5+j]],-1,255,-1)
# 与之后其他选项都是黑色
result = cv2.bitwise_and(card_binary,mask)
# 计算白色点的数量,最多的视作选择的答案保存
total = cv2.countNonZero(result)
if total > number[0]:
number = (total,j,i*5+j)
predict.append(number)
correct =0
'''
for cont in conts:
card_1 = card_rgb.copy()
cv2.drawContours(card_1,[cont],-1,(255,255,0),2)
imshow(card_1)
'''
'''
计算分数并显示
'''
# 计算分数
for (i,ans) in enumerate(predict):
if ans[1] == answer[i]:
correct +=1
grade = int(correct/len(answer)*100)
card_1 = card_rgb.copy()
correct = 0
# 遍历预测的选项 对答案
for (i, ans) in enumerate(predict):
# 把正确答案和所选答案画出来
cv2.drawContours(card_1, [conts[ans[2]]], -1, (0, 0, 255), 2)
cv2.drawContours(card_1, [conts[answer[i] + i * 5]], -1, (0, 255, 0), 2)
if ans[1] == answer[i]:
correct += 1
grade = int(correct / len(answer) * 100)
# 分数画出来
cv2.putText(card_1, 'score:' + str(grade), (0, 50), cv2.FONT_HERSHEY_PLAIN, 2, (255, 255, 0), 2)
imshow(card_1)