参考:https://github.com/makelove/OpenCV-Python-Tutorial
ch200_Extra_modules
Camera Calibration using ChArUco and Python
acruco_write_print_img.py
# -*- coding: utf-8 -*-
# @Time : 2017/7/27 15:39
# @Author : play4fun
# @File : acruco_write_print_img.py
# @Software: PyCharm
"""
acruco_write_print_img.py:
"""
import cv2
import numpy as np
num = 5
dictionary = cv2.aruco.getPredefinedDictionary(cv2.aruco.DICT_4X4_50)
# board = cv2.aruco.CharucoBoard_create(9, 9, .025, .0125, dictionary)
board = cv2.aruco.CharucoBoard_create(num, num, .025, .0125, dictionary)
# img = board.draw((200 * 9, 200 * 9))
img = board.draw((200 * num, 200 * num))
# Dump the calibration board to a file
cv2.imwrite(f'charuco_{num}x{num}.png', img)
# 用打印机打印出来
# 或放在平板电脑里
aruco_test1.py
# -*- coding: utf-8 -*-
# @Time : 2017/7/27 15:38
# @Author : play4fun
# @File : aruco11.py
# @Software: PyCharm
"""
aruco11.py:
"""
import time, cv2
# import cv2.aruco as A
import numpy as np
dictionary = cv2.aruco.getPredefinedDictionary(cv2.aruco.DICT_4X4_50)
board = cv2.aruco.CharucoBoard_create(9, 9, .025, .0125, dictionary)
img = board.draw((200 * 9, 200 * 9))
# cv2.imshow('board',img)
# cv2.waitKey(0)
# Dump the calibration board to a file
# cv2.imwrite('charuco.png', img)
#用打印机打印出来
# Start capturing images for calibration
cap = cv2.VideoCapture(0)
allCorners = []
allIds = []
decimator = 0
for i in range(200):
ret, frame = cap.read()
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
res = cv2.aruco.detectMarkers(gray, dictionary)
if len(res[0]) > 0:
print('len(res[0]):',len(res[0]))
res2 = cv2.aruco.interpolateCornersCharuco(res[0], res[1], gray, board)
if res2[1] is not None and res2[2] is not None and len(res2[1]) > 3 and decimator % 3 == 0:
allCorners.append(res2[1])
allIds.append(res2[2])
cv2.aruco.drawDetectedMarkers(gray, res[0], res[1])
cv2.imshow('frame', gray)
key = cv2.waitKey(1)
if key == ord('q'):
break
decimator += 1
imsize = gray.shape
# Calibration fails for lots of reasons. Release the video if we do
try:
cal = cv2.aruco.calibrateCameraCharuco(allCorners, allIds, board, imsize, None, None)#return retval, cameraMatrix, distCoeffs, rvecs, tvecs
print(cal)
retval, cameraMatrix, distCoeffs, rvecs, tvecs = cal#TODO 然后怎么办?
#TODO saveCameraParams
np.savez('calib.npz',mtx=cameraMatrix,dist=distCoeffs,rvecs=rvecs,tvecs=tvecs)#保存下载,下次不用校准了。
# np.savez(outfile, x=x, y=y)
except:
cap.release()
cap.release()
cv2.destroyAllWindows()
'''
retval, cameraMatrix, distCoeffs, rvecs, tvecs
(40.66987516955983, array([[ 1.51699257e+03, 0.00000000e+00, 8.53629301e+02],
[ 0.00000000e+00, 4.50213990e+02, 7.76441549e+02],
[ 0.00000000e+00, 0.00000000e+00, 1.00000000e+00]]), array([[-0.11643503, -0.05270646, -0.02284758, 0.00088231, 0.01165172]]), [array([[ 0.6077912 ],
[-1.42834429],
[ 0.30243197]]), array([[ 0.6201644 ],
[-1.43661477],
[ 0.27205568]]), array([[ 0.62042201],
[-1.42419143],
[ 0.30919894]]), array([[ 0.6249636 ],
[-1.42055174],
[ 0.31823623]]), array([[ 0.51973251],
[-1.46121409],
[ 0.15266338]]), array([[ 0.62205772],
[-1.42727173],
[ 0.31075834]]), array([[ 2.02157844],
[-1.75270325],
[ 0.40542169]]), array([[ 2.01139176],
[-1.75092153],
[ 0.39164267]]), array([[ 2.02392487],
[-1.74645837],
[ 0.40174685]]), array([[ 2.01210339],
[-1.74882078],
[ 0.39050991]]), array([[ 0.87226228],
[-1.50949168],
[ 0.44230903]]), array([[ 0.87536626],
[-1.51319442],
[ 0.44075465]]), array([[ 0.88520206],
[-1.52335331],
[ 0.37352831]]), array([[ 0.07658779],
[ 0.58155207],
[ 1.24758432]])], [array([[ 0.10636704],
[-0.25196328],
[ 0.14970101]]), array([[ 0.10401605],
[-0.24774933],
[ 0.13947949]]), array([[ 0.10785286],
[-0.24809633],
[ 0.1447296 ]]), array([[ 0.10974635],
[-0.25258058],
[ 0.1578239 ]]), array([[ 0.07912493],
[-0.24214649],
[ 0.09198621]]), array([[ 0.10862882],
[-0.24818928],
[ 0.14389561]]), array([[ 0.1889342 ],
[-0.4414372 ],
[ 0.18697331]]), array([[ 0.18888891],
[-0.44753235],
[ 0.18719762]]), array([[ 0.18888745],
[-0.45388025],
[ 0.192372 ]]), array([[ 0.18836521],
[-0.43764759],
[ 0.18195927]]), array([[ 0.09873032],
[-0.44646521],
[ 0.09927093]]), array([[ 0.0997728 ],
[-0.43749325],
[ 0.09494866]]), array([[ 0.09320065],
[-0.44113935],
[ 0.08896787]]), array([[ 0.10408231],
[-0.5532671 ],
[ 0.17006767]])])
'''
calibrateCamera2.py
# -*- coding: utf-8 -*-
# @Time : 2017/7/27 18:04
# @Author : play4fun
# @File : calibrateCamera2.py
# @Software: PyCharm
"""
calibrateCamera2.py:
"""
import cv2
import numpy as np
def draw_axis(img, charuco_corners, charuco_ids, board):
vecs = np.load("./calib.npz") # I already calibrated the camera
mtx, dist, _, _ = [vecs[i] for i in ('mtx', 'dist', 'rvecs', 'tvecs')]
ret, rvec, tvec = cv2.aruco.estimatePoseCharucoBoard(
charuco_corners, charuco_ids, board, mtx, dist)
if ret is not None and ret is True:
cv2.aruco.drawAxis(img, mtx, dist, rvec, tvec, 0.1)
def get_image(camera):
ret, img = camera.read()
return img
def make_grayscale(img):
ret = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
return ret
def main():
camera = cv2.VideoCapture(0)
img = get_image(camera)
while True:
cv2.imshow('calibration', img)
cv2.waitKey(10)
img = get_image(camera)
gray = make_grayscale(img)
corners, ids, rejected = cv2.aruco.detectMarkers(gray, aruco_dict,
corners, ids)
cv2.aruco.drawDetectedMarkers(img, corners, ids)
if ids is not None and corners is not None \
and len(ids) > 0 and len(ids) == len(corners):
diamond_corners, diamond_ids = \
cv2.aruco.detectCharucoDiamond(img, corners, ids,
0.05 / 0.03, cameraMatrix=mtx,
distCoeffs=dist)
cv2.aruco.drawDetectedDiamonds(img, diamond_corners, diamond_ids)
'''if diamond_ids is not None and len(diamond_ids) >= 4:
break'''
board = cv2.aruco.CharucoBoard_create(9, 6, 0.05, 0.03,
aruco_dict)
if diamond_corners is not None and diamond_ids is not None \
and len(diamond_corners) == len(diamond_ids):
count, char_corners, char_ids = \
cv2.aruco.interpolateCornersCharuco(diamond_corners,
diamond_ids, gray,
board)
if count >= 3:
draw_axis(img, char_corners, char_ids, board)
if __name__ == '__main__':
main()
cv-MSER区域检测
MSER_create1.py
# -*- coding: utf-8 -*-
# @Time : 2017/8/7 19:34
# @Author : play4fun
# @File : MSER_create1.py
# @Software: PyCharm
"""
MSER_create1.py:
https://stackoverflow.com/questions/40443988/python-opencv-ocr-image-segmentation
"""
import cv2
img = cv2.imread('WQbGH.jpg')
img = img[5:-5, 5:-5, :]
mser = cv2.MSER_create()
# Resize the image so that MSER can work better
img2 = cv2.resize(img, (img.shape[1] * 2, img.shape[0] * 2))#扩大
gray = cv2.cvtColor(img2, cv2.COLOR_BGR2GRAY)
vis = img2.copy()
regions = mser.detectRegions(gray)
hulls = [cv2.convexHull(p.reshape(-1, 1, 2)) for p in regions[0]]
cv2.polylines(vis, hulls, 1, (0, 255, 0))
img3 = cv2.resize(vis, (img.shape[1], img.shape[0]))
cv2.namedWindow('img', 0)
cv2.imshow('img', img3)
cv2.imwrite('mser-result.jpg', vis)
cv2.waitKey(0)
cv2.destroyAllWindows()
my01-OCR文字识别
使用-模板匹配-识别信用卡号码
matchTemplate_credit_card_num1.py
# -*- coding: utf-8 -*-
# @Time : 2017/8/6 11:32
# @Author : play4fun
# @File : matchTemplate_credit_card_num1.py
# @Software: PyCharm
"""
matchTemplate_credit_card_num1.py:
http://www.pyimagesearch.com/2017/07/17/credit-card-ocr-with-opencv-and-python/
python ocr_template_match.py --reference ocr_a_reference.png --image images/credit_card_04.png
Credit Card Type: Visa
Credit Card #: 4000123456789010
检测图像中信用卡的位置。
本地化四位数字,与信用卡上十六位数相关。
应用OCR来识别信用卡上的十六位数字。
识别信用卡类型(即Visa,万事达卡,美国运通等)。
"""
# import the necessary packages
from imutils import contours
import numpy as np
import argparse
import imutils
import cv2
# construct the argument parser and parse the arguments解析命令行参数
ap = argparse.ArgumentParser()
ap.add_argument("-i", "--image", required=True,
help="path to input image")
ap.add_argument("-r", "--reference", required=True,
help="path to reference OCR-A image")
args = vars(ap.parse_args())
# define a dictionary that maps the first digit of a credit card
# number to the credit card type定义信用卡类型
FIRST_NUMBER = {
'0': 'None',
"3": "American Express",
"4": "Visa",
"5": "MasterCard",
"6": "Discover Card"
}
# load the reference OCR-A image from disk, convert it to grayscale,
# and threshold it, such that the digits appear as *white* on a
# *black* background
# and invert it, such that the digits appear as *white* on a *black*
ref = cv2.imread(args["reference"])
ref = cv2.cvtColor(ref, cv2.COLOR_BGR2GRAY)
ref = cv2.threshold(ref, 10, 255, cv2.THRESH_BINARY_INV)[1]
cv2.imshow('ref', ref)
cv2.waitKey(0)
'''
# find contours in the OCR-A image (i.e,. the outlines of the digits)
# sort them from left to right, and initialize a dictionary to map
# digit name to the ROI
# refCnts = cv2.findContours(ref.copy(), cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)#有问题
refCnts = cv2.findContours(ref.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
# refCnts = refCnts[0] if imutils.is_cv2() else refCnts[1]
refCnts = refCnts[1]
print('len cnt:',len(refCnts))
refCnts = contours.sort_contours(refCnts, method="left-to-right")[0]#排列轮廓,没意义
print('sort_contours len cnt:',len(refCnts))
digits = {}
# 循环浏览轮廓,提取ROI并将其与相应的数字相关联
# loop over the OCR-A reference contours
for (i, c) in enumerate(refCnts):
# compute the bounding box for the digit, extract it, and resize
# it to a fixed size
(x, y, w, h) = cv2.boundingRect(c)
roi = ref[y:y + h, x:x + w]
roi = cv2.resize(roi, (57, 88))
cv2.imshow('roi', roi)
cv2.waitKey(500)
# update the digits dictionary, mapping the digit name to the ROI
digits[i] = roi
# 从参考图像中提取数字,并将其与相应的数字名称相关联
print('digits:',digits.keys())
'''
# try1
digits = {}
rows, cols = ref.shape
per = int(cols / 10)
for x in range(10):
roi = ref[:, x * per:(x + 1) * per]
roi = cv2.resize(roi, (57, 88))
cv2.imshow('roi', roi)
cv2.waitKey(500)
# update the digits dictionary, mapping the digit name to the ROI
digits[x] = roi
# 从参考图像中提取数字,并将其与相应的数字名称相关联
print('digits:', digits.keys())
# 初始化一对结构化的内核:
# 您可以将内核看作是一个小矩阵,我们在图像上滑动以进行(卷积)操作,例如模糊,锐化,边缘检测或其他图像处理操作。
# initialize a rectangular (wider than it is tall) and square
# structuring kernel
rectKernel = cv2.getStructuringElement(cv2.MORPH_RECT, (9, 3))
sqKernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
# 读取信用卡相片
# load the input image, resize it, and convert it to grayscale
image = cv2.imread(args["image"])
image = imutils.resize(image, width=300)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# apply a tophat (whitehat) morphological operator to find light
# regions against a dark background (i.e., the credit card numbers)
tophat = cv2.morphologyEx(gray, cv2.MORPH_TOPHAT, rectKernel)
# compute the Scharr gradient of the tophat image, then scale
# the rest back into the range [0, 255]
gradX = cv2.Sobel(tophat, ddepth=cv2.CV_32F, dx=1, dy=0,
ksize=-1)
gradX = np.absolute(gradX)
(minVal, maxVal) = (np.min(gradX), np.max(gradX))
gradX = (255 * ((gradX - minVal) / (maxVal - minVal)))
gradX = gradX.astype("uint8")
# apply a closing operation using the rectangular kernel to help
# cloes gaps in between credit card number digits, then apply
# Otsu's thresholding method to binarize the image
gradX = cv2.morphologyEx(gradX, cv2.MORPH_CLOSE, rectKernel)
thresh = cv2.threshold(gradX, 0, 255,
cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
# apply a second closing operation to the binary image, again
# to help close gaps between credit card number regions
thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, sqKernel)
# find contours in the thresholded image, then initialize the
# list of digit locations找到轮廓并初始化数字分组位置列表。
cnts = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL,
cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if imutils.is_cv2() else cnts[1]
locs = []
# loop over the contours
for (i, c) in enumerate(cnts):
# compute the bounding box of the contour, then use the
# bounding box coordinates to derive the aspect ratio
(x, y, w, h) = cv2.boundingRect(c)
ar = w / float(h)
# since credit cards used a fixed size fonts with 4 groups
# of 4 digits, we can prune potential contours based on the
# aspect ratio根据每个轮廓的宽高比进行过滤
if ar > 2.5 and ar < 4.0:
# contours can further be pruned on minimum/maximum width
# and height使用纵横比,我们分析每个轮廓的形状。如果 ar 在2.5到4.0之间(比它高),以及 40到55个像素之间的 w以及 10到20像素之间的h,我们将一个方便的元组的边界矩形参数附加到 locs
if (w > 40 and w < 55) and (h > 10 and h < 20):
# append the bounding box region of the digits group
# to our locations list
locs.append((x, y, w, h))
# sort the digit locations from left-to-right, then initialize the
# list of classified digits
locs = sorted(locs, key=lambda x: x[0])
output = []
# loop over the 4 groupings of 4 digits
for (i, (gX, gY, gW, gH)) in enumerate(locs):
# initialize the list of group digits
groupOutput = []
# extract the group ROI of 4 digits from the grayscale image,
# then apply thresholding to segment the digits from the
# background of the credit card
group = gray[gY - 5:gY + gH + 5, gX - 5:gX + gW + 5]
group = cv2.threshold(group, 0, 255,
cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
# detect the contours of each individual digit in the group,
# then sort the digit contours from left to right
digitCnts = cv2.findContours(group.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cv2.imshow('digitCnts', digitCnts[0])
cv2.waitKey(1000)
# digitCnts = digitCnts[0] if imutils.is_cv2() else digitCnts[1]
digitCnts = digitCnts[1]
# digitCnts = contours.sort_contours(digitCnts,method="left-to-right")[0]
# loop over the digit contours
for c in digitCnts:
# compute the bounding box of the individual digit, extract
# the digit, and resize it to have the same fixed size as
# the reference OCR-A images
(x, y, w, h) = cv2.boundingRect(c)
roi = group[y:y + h, x:x + w]
roi = cv2.resize(roi, (57, 88))
# initialize a list of template matching scores
scores = []
# loop over the reference digit name and digit ROI
for (digit, digitROI) in digits.items():
# apply correlation-based template matching, take the
# score, and update the scores list
result = cv2.matchTemplate(roi, digitROI,
cv2.TM_CCOEFF)
(_, score, _, _) = cv2.minMaxLoc(result)
scores.append(score)
# the classification for the digit ROI will be the reference
# digit name with the *largest* template matching score
groupOutput.append(str(np.argmax(scores))) # draw the digit classifications around the group
cv2.rectangle(image, (gX - 5, gY - 5),
(gX + gW + 5, gY + gH + 5), (0, 0, 255), 2)
cv2.putText(image, "".join(groupOutput), (gX, gY - 15),
cv2.FONT_HERSHEY_SIMPLEX, 0.65, (0, 0, 255), 2)
# update the output digits list
output.extend(groupOutput)
# display the output credit card information to the screen
print("Credit Card Type: {}".format(FIRST_NUMBER.get(output[0], 'None')))
print("Credit Card #: {}".format("".join(output)))
cv2.imshow("Image", image) # TODO 效果不是很好,需要改进
cv2.waitKey(0)
pytesseract
ocr.py
# -*- coding: utf-8 -*-
# @Time : 2017/7/15 下午6:13
# @Author : play4fun
# @File : ocr.py.py
# @Software: PyCharm
"""
ocr.py:
"""
# import the necessary packages
from PIL import Image
import pytesseract
import argparse
import cv2
import os
# construct the argument parse and parse the arguments
ap = argparse.ArgumentParser()
ap.add_argument("-i", "--image", required=True,
help="path to input image to be OCR'd")
ap.add_argument("-p", "--preprocess", type=str, default="thresh",
help="type of preprocessing to be done")
args = vars(ap.parse_args())
# load the example image and convert it to grayscale
image = cv2.imread(args["image"])
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# check to see if we should apply thresholding to preprocess the
# image
if args["preprocess"] == "thresh":
gray = cv2.threshold(gray, 0, 255,
cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
# make a check to see if median blurring should be done to remove
# noise
elif args["preprocess"] == "blur":
gray = cv2.medianBlur(gray, 3)
# write the grayscale image to disk as a temporary file so we can
# apply OCR to it
filename = "{}.png".format(os.getpid())
cv2.imwrite(filename, gray)
# load the image as a PIL/Pillow image, apply OCR, and then delete
# the temporary file
text = pytesseract.image_to_string(Image.open(filename))
os.remove(filename)
print(text)
# show the output images
cv2.imshow("Image", image)
cv2.imshow("Output", gray)
cv2.waitKey(0)
tesserocr
GetComponentImages-example.py
# -*- coding: utf-8 -*-
# @Time : 2017/8/21 14:49
# @Author : play4fun
# @File : GetComponentImages-example.py.py
# @Software: PyCharm
"""
GetComponentImages-example.py:
"""
from PIL import Image
from tesserocr import PyTessBaseAPI, RIL
with PyTessBaseAPI() as api:
# image = Image.open('/usr/src/tesseract/testing/phototest.tif')
image = Image.open('phototest.tif') # 图片有问题
print(image.format, image.info, image.height, image.width)
api.SetImage(image)
boxes = api.GetComponentImages(RIL.TEXTLINE, True)
print('Found {} textline image components.'.format(len(boxes)))
for i, (im, box, _, _) in enumerate(boxes):
# im is a PIL image object
# box is a dict with x, y, w and h keys
api.SetRectangle(box['x'], box['y'], box['w'], box['h'])
ocrResult = api.GetUTF8Text()
conf = api.MeanTextConf()
print(u"Box[{0}]: x={x}, y={y}, w={w}, h={h}, "
"confidence: {1}, text: {2}").format(i, conf, ocrResult, **box)
tesserocr_demo1.py
# -*-coding:utf8-*-#
__author__ = 'play4fun'
"""
create time:16/10/21 11:44
"""
from tesserocr import PyTessBaseAPI
images = ['/Volumes/GF/Project/Python/Tesserocr/tesserocr/sample1.jpeg', '/Volumes/GF/Project/Python/Tesserocr/tesserocr/sample2.jpeg',
'/Volumes/GF/Project/Python/Tesserocr/tesserocr/sample3.jpeg']
with PyTessBaseAPI() as api:
for img in images:
api.SetImageFile(img)
print('text:', api.GetUTF8Text())
print('-----')
print(api.AllWordConfidences())
print('-----')
# api is automatically finalized when used in a with-statement (context manager).
# otherwise api.End() should be explicitly called when it's no longer needed.
tesserocr_demo2.py
# -*-coding:utf8-*-#
__author__ = 'play4fun'
"""
create time:16/10/21 11:47
"""
import tesserocr
from PIL import Image
print(tesserocr.tesseract_version()) # print tesseract-ocr version
print(tesserocr.get_languages()) # prints tessdata path and list of available languages
image = Image.open('sample.jpg')
print(tesserocr.image_to_text(image)) # print ocr text from image
# or
print(tesserocr.file_to_text('sample.jpg'))
tesserocr_demo3.py
# -*-coding:utf8-*-#
__author__ = 'play4fun'
"""
create time:16/10/21 11:47
"""
# Orientation and script detection (OSD)
from PIL import Image
from tesserocr import PyTessBaseAPI, PSM
with PyTessBaseAPI(psm=PSM.AUTO_OSD) as api:
# image = Image.open("/usr/src/tesseract/testing/eurotext.tif")#No such file
# image = Image.open("eurotext.tif")
image = Image.open('phototest.tif')
api.SetImage(image)
api.Recognize()
it = api.AnalyseLayout()
orientation, direction, order, deskew_angle = it.Orientation()
print("Orientation: {:d}".format(orientation))
print("WritingDirection: {:d}".format(direction))
print("TextlineOrder: {:d}".format(order))
print("Deskew angle: {:.4f}".format(deskew_angle))
#
ocrResult = api.GetUTF8Text()
print('result:\n',ocrResult)
my02-视频-对象跟踪
tracker.py
'''
https://www.learnopencv.com/object-tracking-using-opencv-cpp-python/
'''
import cv2
import sys
if __name__ == '__main__':
# Set up tracker.
# Instead of MIL, you can also use
tracker_types = ['BOOSTING', 'MIL', 'KCF', 'TLD', 'MEDIANFLOW', 'GOTURN']
# tracker_type = tracker_types[2]
tracker_type = tracker_types[0]
tracker = cv2.Tracker_create(tracker_type)
# Read video
video = cv2.VideoCapture("videos/chaplin.mp4")
# Exit if video not opened.
if not video.isOpened():
print("Could not open video")
sys.exit()
# Read first frame.
ok, frame = video.read()
if not ok:
print('Cannot read video file')
sys.exit()
# Define an initial bounding box
bbox = (287, 23, 86, 320)
# Uncomment the line below to select a different bounding box
# bbox = cv2.selectROI(frame, False)
# Initialize tracker with first frame and bounding box
ok = tracker.init(frame, bbox)
while True:
# Read a new frame
ok, frame = video.read()
if not ok:
break
# Start timer
timer = cv2.getTickCount()
# Update tracker
ok, bbox = tracker.update(frame)
# Calculate Frames per second (FPS)
fps = cv2.getTickFrequency() / (cv2.getTickCount() - timer);
# Draw bounding box
if ok:
# Tracking success
p1 = (int(bbox[0]), int(bbox[1]))
p2 = (int(bbox[0] + bbox[2]), int(bbox[1] + bbox[3]))
cv2.rectangle(frame, p1, p2, (255, 0, 0), 2, 1)
else:
# Tracking failure
cv2.putText(frame, "Tracking failure detected", (100, 80), cv2.FONT_HERSHEY_SIMPLEX, 0.75, (0, 0, 255), 2)
# Display tracker type on frame
cv2.putText(frame, tracker_type + " Tracker", (100, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.75, (50, 170, 50), 2)
# Display FPS on frame
cv2.putText(frame, "FPS : " + str(int(fps)), (100, 50), cv2.FONT_HERSHEY_SIMPLEX, 0.75, (50, 170, 50), 2)
# Display result
cv2.imshow("Tracking", frame)
# Exit if ESC pressed
k = cv2.waitKey(1) & 0xff
if k == 27: break
motiondetect.py
'''
http://www.technicdynamic.com/2017/08/28/python-motion-detection-with-opencv-simple/
'''
import cv2 # importing Python OpenCV
from datetime import datetime # importing datetime for naming files w/ timestamp
def diffImg(t0, t1, t2): # Function to calculate difference between images.
d1 = cv2.absdiff(t2, t1)
d2 = cv2.absdiff(t1, t0)
return cv2.bitwise_and(d1, d2)
threshold = 81500 # Threshold for triggering "motion detection"
cam = cv2.VideoCapture(0) # Lets initialize capture on webcam
winName = "Movement Indicator" # comment to hide window
cv2.namedWindow(winName) # comment to hide window
# Read three images first:
t_minus = cv2.cvtColor(cam.read()[1], cv2.COLOR_RGB2GRAY)
t = cv2.cvtColor(cam.read()[1], cv2.COLOR_RGB2GRAY)
t_plus = cv2.cvtColor(cam.read()[1], cv2.COLOR_RGB2GRAY)
# Lets use a time check so we only take 1 pic per sec
timeCheck = datetime.now().strftime('%Ss')
while True:
cv2.imshow(winName, cam.read()[1]) # comment to hide window
if cv2.countNonZero(diffImg(t_minus, t, t_plus)) > threshold and timeCheck != datetime.now().strftime('%Ss'):
dimg = cam.read()[1]
cv2.imwrite(datetime.now().strftime('%Y%m%d_%Hh%Mm%Ss%f') + '.jpg', dimg)
timeCheck = datetime.now().strftime('%Ss')
# Read next image
t_minus = t
t = t_plus
t_plus = cv2.cvtColor(cam.read()[1], cv2.COLOR_RGB2GRAY)
key = cv2.waitKey(10)
if key == ord('q'):
cv2.destroyWindow(winName) # comment to hide window
break
my03-面向对象
检测线条和形状-几何形状.py
# -*- coding: utf-8 -*-
# @Time : 2017/7/27 11:55
# @Author : play4fun
# @File : 检测线条和形状-几何形状.py
# @Software: PyCharm
"""
检测线条和形状-几何形状.py:
https://stackoverflow.com/questions/31974843/detecting-lines-and-shapes-in-opencv-using-python
"""
import cv2
import numpy as np
class File(object):
def __init__(self, filename):
self.filename = filename
def open(self, filename=None, mode='r'):
if filename is None:
filename = self.filename
return cv2.imread(filename), open(filename, mode)
def save(self, image=None, filename_override=None):
filename = "output/" + self.filename.split('/')[-1]
if filename_override:
filename = "output/" + filename_override
return cv2.imwrite(filename, image)
class Image(object):
def __init__(self, image):
self.image = image
def grayscale(self):
return cv2.cvtColor(self.image, cv2.COLOR_BGR2GRAY)
def edges(self):
return cv2.Canny(self.image, 0, 255)
def lines(self):
lines = cv2.HoughLinesP(self.image, 1, np.pi / 2, 6, None, 50, 10)
for line in lines[0]:
pt1 = (line[0], line[1])
pt2 = (line[2], line[3])
cv2.line(self.image, pt1, pt2, (0, 0, 255), 2)
if __name__ == '__main__':
File = File('images/a.png')
Image = Image(File.open()[0])
Image.image = Image.grayscale()
Image.lines()
File.save(Image.image)
my04-Maze-Solver迷宫解密
aStar1.py
# -*- coding: utf-8 -*-
# @Time : 2017/7/30 18:16
# @Author : play4fun
# @File : aStar1.py.py
# @Software: PyCharm
"""
aStar1.py: 不行!??
"""
import sys
# from Queue import Queue
from multiprocessing import Queue
from PIL import Image
start = (400, 984)
end = (398, 25)
def iswhite(value):
if value == (255, 255, 255):
return True
def getadjacent(n):
x, y = n
return [(x - 1, y), (x, y - 1), (x + 1, y), (x, y + 1)]
def BFS(start, end, pixels):
queue = Queue()
queue.put([start]) # Wrapping the start tuple in a list
while not queue.empty():
path = queue.get()
pixel = path[-1]
if pixel == end:
return path
for adjacent in getadjacent(pixel):
x, y = adjacent
if iswhite(pixels[x, y]):
pixels[x, y] = (127, 127, 127) # see note
new_path = list(path)
new_path.append(adjacent)
queue.put(new_path)
print("Queue has been exhausted. No answer was found.")
if __name__ == '__main__':
# invoke: python mazesolver.py <mazefile> <outputfile>[.jpg|.png|etc.]
base_img = Image.open(sys.argv[1])
base_pixels = base_img.load()
print(base_pixels)
path = BFS(start, end, base_pixels)
if path is None:
print('path is None')
exit(-1)
print('path:',path)
path_img = Image.open(sys.argv[1])
path_pixels = path_img.load()
for position in path:
x, y = position
path_pixels[x, y] = (255, 0, 0) # red
path_img.save(sys.argv[2])
maze.py
'''
源文件是使用opencv2.4,
改成opencv3.2有点问题。
https://ishankgulati.github.io/posts/Maze-Solver/
'''
import cv2
import numpy as np
img = cv2.imread('SampleImages/1.png')
# img = cv2.imread('SampleImages/2.png')
# img = cv2.imread('SampleImages/3.jpg')#不行,得修改
# img = cv2.imread('SampleImages/huge_maze.jpg')#不行,得修改
cv2.imshow('maze',img)
cv2.waitKey(0)
# Binary conversion
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
ret, thresh = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY_INV)#反转tholdolding将给我们一个二进制的图像与白色的墙壁和黑色的背景。
cv2.imshow('THRESH_BINARY_INV',thresh)
cv2.waitKey(0)
# Contours
image,contours, hierarchy = cv2.findContours(thresh, cv2.RETR_EXTERNAL,
cv2.CHAIN_APPROX_NONE)
print('len(contours):',len(contours))
# dc=cv2.drawContours(thresh, contours, 0, (255, 255, 255), -1)
dc=cv2.drawContours(thresh, contours, 0, (255, 255, 255), 5)#用不同颜色来标注
dc=cv2.drawContours(dc, contours, 1, (0, 0, 0), 5)# TODO 大迷宫的len(contours): 26
cv2.imshow('drawContours',dc)
cv2.waitKey(0)
ret, thresh = cv2.threshold(dc, 240, 255, cv2.THRESH_BINARY)
# ret, thresh = cv2.threshold(thresh, 240, 255, cv2.THRESH_BINARY)
cv2.imshow('thresh2',thresh)
cv2.waitKey(0)
# Dilate
'''
扩张
扩张是数学形态领域的两个基本操作者之一,另一个是侵蚀。它通常应用于二进制图像,但有一些版本可用于灰度图像。操作者对二进制图像的基本效果是逐渐扩大前景像素区域的边界(通常为白色像素)。因此,前景像素的面积大小增加,而这些区域内的孔变小。
'''
ke = 10
# kernel = np.ones((19, 19), np.uint8)
kernel = np.ones((ke, ke), np.uint8)
dilation = cv2.dilate(thresh, kernel, iterations=1)
cv2.imshow('dilation',dilation)
cv2.waitKey(0)
# Erosion
#侵蚀是第二个形态运算符。它也适用于二进制图像。操作者对二进制图像的基本效果是消除前景像素区域的边界(通常为白色像素)。因此,前景像素的面积缩小,并且这些区域内的孔变大。
erosion = cv2.erode(dilation, kernel, iterations=1)
cv2.imshow('erosion',erosion)
cv2.waitKey(0)
#找到两个图像的差异
diff = cv2.absdiff(dilation, erosion)
cv2.imshow('diff',diff)
cv2.waitKey(0)
# splitting the channels of maze
b, g, r = cv2.split(img)
mask_inv = cv2.bitwise_not(diff)
#为了在原始迷宫图像上显示解决方案,首先将原来的迷宫分割成r,g,b组件。现在通过反转diff图像创建一个掩码。使用在最后一步中创建的掩码的原始迷宫的按位和r和g分量。这一步将从迷宫解决方案的图像部分去除红色和绿色成分。最后一个是合并所有组件,我们将使用蓝色标记的解决方案。
# masking out the green and red colour from the solved path
r = cv2.bitwise_and(r, r, mask=mask_inv)
g = cv2.bitwise_and(g, g, mask=mask_inv)
res = cv2.merge((b, g, r))
cv2.imshow('Solved Maze', res)
cv2.imwrite('SampleImages/Solved-Maze-1.png',res)
cv2.waitKey(0)
cv2.destroyAllWindows()
my05-删除图像中的水印
参考:
https://stackoverflow.com/questions/32125281/removing-watermark-out-of-an-image-using-opencv
1.py
import cv2
import numpy as np
im2=cv2.imread("YZeOg.jpg")
cv2.namedWindow("x",cv2.WINDOW_FREERATIO)
cv2.imshow("x",im2)
# converting it to gray
gray=cv2.cvtColor(im2,cv2.COLOR_RGB2GRAY)
# creating a new image that will have the cropped ellipse
# 创建一个新的图像,将裁剪椭圆
ElipseImg=np.zeros([im2.shape[0],im2.shape[1]],np.uint8)
# detecting the largest circle
gray=cv2.GaussianBlur(gray,(5,5),0)
circles = cv2.HoughCircles(gray, cv2.HOUGH_GRADIENT, 1, gray.shape[0]/8, param1=100, param2=100, minRadius=100, maxRadius=0)
circles = np.uint16(np.around(circles))
print(circles)
measure=0;id=0
for i in circles[0, :]:
if(np.round(i[2])>measure and np.round(i[2])<1000):
measure=np.round(i[2])
id=i
center=(np.round(id[0]),np.round(id[1]))
radius=np.round(id[2])
cv2.circle(im2, center=center, radius=3, color=(0, 255, 0), thickness=-1,lineType=8,shift=0)
cv2.circle(im2, center=center, radius=radius, color=(0, 255, 0), thickness=2,lineType=8,shift=0)
cv2.ellipse(ElipseImg,center,(radius,radius),0,0,360,(255,255,255),-1,8)
print("center: ",center," radius: ",radius)
result=cv2.bitwise_and(gray,ElipseImg)
cv2.namedWindow("bitwise and",cv2.WINDOW_FREERATIO)
cv2.imshow("bitwise and",result)
# trying to estimate the Intensity of the circle for the thresholding
# x=result[np.round(id[0]+30),np.round(id[1])]
x=result[np.round(id[1]),np.round(id[0]+30)]
print(int(x))
# thresholding the output image
ret, ElipseImg=cv2.threshold(ElipseImg,int(x)-10,255,cv2.THRESH_BINARY)
cv2.namedWindow("threshold",cv2.WINDOW_FREERATIO)
cv2.imshow("threshold",ElipseImg)
# making bitwise_or
res=cv2.bitwise_or(gray,ElipseImg)
cv2.namedWindow("bitwise or",cv2.WINDOW_FREERATIO)
cv2.imshow("bitwise or",res)
cv2.waitKey(0)
cv2.destroyAllWindows()
2.py
import cv2
import numpy as np
im=cv2.imread("YZeOg.jpg")
# converting it to gray
gr=cv2.cvtColor(im,cv2.COLOR_RGB2GRAY)
cv2.namedWindow("gr",cv2.WINDOW_FREERATIO)
cv2.imshow("gr",gr)
# approximate the background
bg = gr.copy()
for r in range(1,5):
kernel2=cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(2*r+1,2*r+1))
bg=cv2.morphologyEx(bg,cv2.MORPH_CLOSE,kernel2)
bg=cv2.morphologyEx(bg,cv2.MORPH_OPEN,kernel2)
cv2.namedWindow("bg",cv2.WINDOW_FREERATIO)
cv2.imshow("bg",bg)
# difference = background - initial
dif = bg - gr
cv2.namedWindow("dif",cv2.WINDOW_FREERATIO)
cv2.imshow("dif",dif)
# threshold the difference image so we get dark letters
# 阈值的差异图像,所以我们得到黑暗的信件
ret0,bw=cv2.threshold(dif,0,255,cv2.THRESH_BINARY_INV|cv2.THRESH_OTSU)
# threshold the background image so we get dark region
# 阈值的背景图像,所以我们得到黑暗的地区
ret,dark=cv2.threshold(bg,0,255,cv2.THRESH_BINARY_INV|cv2.THRESH_OTSU)
# extract pixels in the dark region
# 提取黑暗区域的像素
darkpix=[] # 存放椭圆范围内像素
for r in range(dark.shape[0]):
for c in range(dark.shape[1]):
if dark[r,c]: # 像素落在椭圆范围
darkpix.append(gr[r,c])
# threshold the dark region so we get the darker pixels inside it
# 阈值的黑暗区域,所以我们得到它里面的黑暗像素
ret2,darkpix=cv2.threshold(np.array(darkpix),0,255,cv2.THRESH_BINARY|cv2.THRESH_OTSU)
# paste the extracted darker pixels
# 粘贴提取的较暗的像素
index=0
for r in range(dark.shape[0]):
for c in range(dark.shape[1]):
if dark[r,c]:
bw[r,c]=darkpix[index]
index+=1
cv2.namedWindow("bw",cv2.WINDOW_FREERATIO)
cv2.imshow("bw",bw)
res=cv2.bitwise_or(gr,bw)
cv2.namedWindow("bitwise or",cv2.WINDOW_FREERATIO)
cv2.imshow("bitwise or",res)
cv2.waitKey(0)
cv2.destroyAllWindows()
my06-验证码识别
获取验证码-然后识别.py
# -*- coding: utf-8 -*-
# @Time : 2017/8/21 13:26
# @Author : play4fun
# @File : 获取验证码-然后识别.py
# @Software: PyCharm
"""
获取验证码-然后识别.py:
"""
from io import BytesIO
from PIL import Image
import requests
from tesserocr import PyTessBaseAPI
import tesserocr
import numpy as np
import cv2
user_agent = 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.106 Safari/537.36'
headers = {'User-Agent': user_agent}
url = 'http://www.bjsuperpass.com/captcha.svl?d=1503144107405'
rs = requests.get(url, headers=headers, timeout=10)
print('获取公交一卡通网站的验证码',rs.status_code)
#TODO 获取cookies
print('用BytesIO导入到Image,Numpy,Opencv')
s1 = BytesIO(rs.content) # img = Image.open(BytesIO(resp.read()))
#
img = Image.open(s1)
img = img.convert("RGB")
im=np.array(img)
cv2.imshow('src',im)
cv2.waitKey(0)
cv2.imwrite('captcha.jpg',im)
ocr = PyTessBaseAPI()
# ocr.Init(".", "eng", tesseract.OEM_DEFAULT)
ocr.SetVariable("tessedit_char_whitelist", "0123456789abcdefghijklmnopqrstuvwxyz")
# ocr.SetPageSegMode(tesseract.PSM_AUTO)
# ocr.SetImage(img)
print('验证码是',tesserocr.image_to_text(img))
#TODO 发送cookies
deep-learning-opencv深度学习dnn
deep_learning_with_opencv.py
# USAGE
'''
python deep_learning_with_opencv.py --image images/jemma.png \
--prototxt bvlc_googlenet.prototxt \
--model bvlc_googlenet.caffemodel --labels synset_words.txt
python deep_learning_with_opencv.py --image images/traffic_light.png \
--prototxt bvlc_googlenet.prototxt \
--model bvlc_googlenet.caffemodel --labels synset_words.txt
python deep_learning_with_opencv.py --image images/eagle.png \
--prototxt bvlc_googlenet.prototxt \
--model bvlc_googlenet.caffemodel --labels synset_words.txt
'''
# import the necessary packages
import numpy as np
import argparse
import time
import cv2
# construct the argument parse and parse the arguments
ap = argparse.ArgumentParser()
ap.add_argument("-i", "--image", required=True,
help="path to input image")
ap.add_argument("-p", "--prototxt", required=True,
help="path to Caffe 'deploy' prototxt file")
ap.add_argument("-m", "--model", required=True,
help="path to Caffe pre-trained model")
ap.add_argument("-l", "--labels", required=True,
help="path to ImageNet labels (i.e., syn-sets)")
args = vars(ap.parse_args())
# load the input image from disk
image = cv2.imread(args["image"])
# load the class labels from disk
rows = open(args["labels"]).read().strip().split("\n")
classes = [r[r.find(" ") + 1:].split(",")[0] for r in rows]
# our CNN requires fixed spatial dimensions for our input image(s)
# so we need to ensure it is resized to 224x224 pixels while
# performing mean subtraction (104, 117, 123) to normalize the input;
# after executing this command our "blob" now has the shape:
# (1, 3, 224, 224)
blob = cv2.dnn.blobFromImage(image, 1, (224, 224), (104, 117, 123))
# load our serialized model from disk
print("[INFO] loading model...")
net = cv2.dnn.readNetFromCaffe(args["prototxt"], args["model"])
# set the blob as input to the network and perform a forward-pass to
# obtain our output classification
net.setInput(blob)
start = time.time()
preds = net.forward()
end = time.time()
print("[INFO] classification took {:.5} seconds".format(end - start))
# sort the indexes of the probabilities in descending order (higher
# probabilitiy first) and grab the top-5 predictions
idxs = np.argsort(preds[0])[::-1][:5]
# loop over the top-5 predictions and display them
for (i, idx) in enumerate(idxs):
# draw the top prediction on the input image
if i == 0:
text = "Label: {}, {:.2f}%".format(classes[idx], preds[0][idx] * 100)
cv2.putText(image, text, (5, 25), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
# display the predicted label + associated probability to the
# console
print("[INFO] {}. label: {}, probability: {:.5}".format(i + 1, classes[idx], preds[0][idx]))
# display the output image
cv2.imshow("Image", image)
cv2.waitKey(0)
OpenCV_Window_Management
opencv_windows_management.py
# -*- coding: utf-8 -*-
# @Time : 2017/7/18 下午10:34
# @Author : play4fun
# @File : opencv_windows_management.py
# @Software: PyCharm
"""
opencv_windows_management.py:
"""
import cv2, math
import tkinter as tk
class Window:
def __init__(self, name, image, weight=1):
self.name = name
self.image = image.copy()
self.weight = weight
self.shape = self.image.shape
self.hight_x = self.shape[0]
self.lenght_y = self.shape[1]
class opencv_windows_management:
def __init__(self):
self.windows = dict()
root = tk.Tk()
screen_width = root.winfo_screenwidth()
screen_height = root.winfo_screenheight()
self.screen_size = (screen_width, screen_height) # (1280, 800)
root.quit()
def add(self, name, image, weight=1):
'''
权重,越高,图片显示越大
:return:
'''
cv2.namedWindow(name, flags=cv2.WINDOW_AUTOSIZE)
window = Window(name, image, weight)
self.windows[name] = window
# self.windows[name] = image
def show(self):
lenw = len(self.windows)
w_l = int(self.screen_size[0] / lenw)
max_num_line = math.ceil(math.sqrt(lenw)) # 取平方根
# TODO 权重
for i, name in enumerate(self.windows):
# if (i+1) >max_num_line:
# #TODO 换行
# cv2.moveWindow(name, w_l * i, h_x*j)
# pass
win = self.windows[name]
image = win.image
# image = self.windows[name]
# h_x = int(image.shape[1] / w_l * image.shape[0]) #保持比例
h_x = int(w_l / win.lenght_y * win.hight_x) # 保持比例
# print((w_l,h_x))
img2 = cv2.resize(image, (w_l, h_x))
cv2.moveWindow(name, w_l * i, 0)
cv2.imshow(name, img2)
test_cvwm_images.py
# -*- coding: utf-8 -*-
# @Time : 2017/7/18 下午12:42
# @Author : play4fun
# @File : test_cvwm_images.py
# @Software: PyCharm
"""
test_cvwm_images.py:
# show 多张相片
"""
import cv2
import numpy as np
import os
import errno
from opencv_windows_management import opencv_windows_management
cvwm = opencv_windows_management()
path = '../../data/messi5.jpg'
if not os.path.exists(path):
raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), path)
img = cv2.imread(path, cv2.IMREAD_UNCHANGED) # 包括图像的 alpha 通道
print(img.shape)
# cv2.imshow('src', img)
cvwm.add('src', img)
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# cv2.imshow('gray', gray)
cvwm.add('gray', gray)
ret, thresh1 = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)
cvwm.add('thresh1', thresh1)
cvwm.show()
cv2.waitKey(0)
test_cvwm_videos.py
# -*- coding: utf-8 -*-
# @Time : 2017/7/18 下午12:43
# @Author : play4fun
# @File : test_cvwm_videos.py
# @Software: PyCharm
"""
test_cvwm_videos.py:
"""
import cv2
from opencv_windows_management import opencv_windows_management
cvwm = opencv_windows_management()
cap = cv2.VideoCapture(0)
ret = cap.set(3, 640)
ret = cap.set(4, 480)
#
face_cascade = cv2.CascadeClassifier('/usr/local/share/OpenCV/haarcascades/haarcascade_frontalface_default.xml')
while cap.isOpened():
ret, frame = cap.read()
frame = cv2.flip(frame, flipCode=1)
cvwm.add('frame', frame)
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
# cv2.imshow('frame', gray)
cvwm.add('gray', gray)
#人脸识别
faces = face_cascade.detectMultiScale(gray, 1.3, 5)
print("Detected ", len(faces), " face")
for (x, y, w, h) in faces:
face = gray[y:y + h, x:x + w]
cvwm.add('face', face)
cvwm.show()
key = cv2.waitKey(delay=1)
if key == ord("q"):
break
# When everything done, release the capture
cap.release()
cv2.destroyAllWindows()