opencv快速入门
文章目录
准备
下载
pip install opencv-python
# 或者
conda install opencv
检查
# 在终端中输入python
python # 进入python环境
import cv2 # 检查有无报错异常
# 没有异常则环境准备就位
开始入门
导入opencv包
import cv2
读取图片
img=cv.imread(filename=""[, flags])
filename
可以是绝对路径或者是相对路径
flags
可选项,参数有:
- cv2.IMREAD_GRAYSCALE or 0 读取灰度图像
- cv2.IMREAD_COLOR or 1 读取彩色图像(默认)(忽略alpha通道)
- cv2.IMREAD_UNCHANGED or -1 读取完成图片(包括alpha通道)
其中,alpha通道是8位灰度通道,256级记录图像透明度信息
另外,opencv读取彩色图片(RGB)是按BGR读取,如果需要按RGB显示,需要img=img[:,:,::-1]
来做调整
查看图像属性
img.shape
查看图像形状,通常彩色图像为(宽,高,3),灰度图像为(宽,高)
img.dtype
查看图像中元素在内存中占用空间的数据类型,通常为uint8或float32
split方法与merge方法
cv2.split()方法分别读取三个通道
b, g, r = cv2.split(img_NZ_bgr)
cv2.merge()方法合并三个通道
imgMerged = cv2.merge((b, g, r))
cvtColor方法
cv2.cvtColor()用来转换图像的色彩空间
语法格式为:dst = cv2.cvtColor( src, code )
BGR to RGB
img_NZ_rgb = cv2.cvtColor(img_NZ_bgr, cv2.COLOR_BGR2RGB)
BGR to HSV
img_hsv = cv2.cvtColor(img_NZ_bgr, cv2.COLOR_BGR2HSV)
HSV to RGB
img_NZ_rgb = cv2.cvtColor(img_NZ_merged, cv2.COLOR_HSV2RGB)
图片保存
cv2.imwrite( filename, img[, params] )
如:cv2.imwrite("New_Zealand_Lake_SAVED.png", img_NZ_bgr)
基本图像处理
包括获取并处理像素、缩放、裁剪、翻转等
读取图像
cb_img = cv2.imread("checkerboard_18x18.png", 0)
获取像素值
通过行、列索引获取像素值
print(cb_img[0, 0]) # 0
print(cb_img[0, 6]) # 255
修改像素值
通过行、列索引修改像素值
cb_img_copy = cb_img.copy()
cb_img_copy[2, 2] = 200
cb_img_copy[2, 3] = 200
cb_img_copy[3, 2] = 200
cb_img_copy[3, 3] = 200
裁剪
通过行、列索引实现裁剪
cropped_region = img_NZ_rgb[200:400, 300:600]
resize
resized_cropped_region_2x = cv2.resize(cropped_region, None, fx=2, fy=2)
扩大为原来的两倍
或者不按倍率,手动给定尺寸
resized_cropped_region = cv2.resize(cropped_region, dsize=(100,200), interpolation=cv2.INTER_AREA)
interpolation
为插值方法,还有cv2.INTER_NEAREST、cv2.INTER_LINEAR、cv2.INTER_CUBIC
等
翻转
dst = cv.flip( src, flipCode )
src
为需要翻转的图像
flipCode
为1水平翻转,为0垂直翻转,为-1既水平又垂直
数据标注
不太关注,一笔带过了
-
画一条线
img = cv2.line(img, pt1, pt2, color[, thickness[, lineType[, shift]]])
cv2.line(imageLine, (200, 100), (400, 100), (0, 255, 255), thickness=5, lineType=cv2.LINE_AA);
-
画一个圆
img = cv2.circle(img, center, radius, color[, thickness[, lineType[, shift]]])
cv2.circle(imageCircle, (900,500), 100, (0, 0, 255), thickness=5, lineType=cv2.LINE_AA);
-
画一个长方形
img = cv2.rectangle(img, pt1, pt2, color[, thickness[, lineType[, shift]]])
cv2.rectangle(imageRectangle, (500, 100), (700, 600), (255, 0, 255), thickness=5, lineType=cv2.LINE_8)
-
添加一句话
img = cv2.putText(img, text, org, fontFace, fontScale, color[, thickness[, lineType[, bottomLeftOrigin]]])
cv2.putText(imageText, text, (200, 700), fontFace, fontScale, fontColor, fontThickness, cv2.LINE_AA);
图像增强
像素加减
matrix = np.ones(img_rgb.shape, dtype="uint8") * 50
img_rgb_brighter = cv2.add(img_rgb, matrix)# 255纯白,越靠255越白(亮)
img_rgb_darker = cv2.subtract(img_rgb, matrix)# 0纯白,越靠0越黑
像素乘法与对比度
matrix1 = np.ones(img_rgb.shape) * 0.8
matrix2 = np.ones(img_rgb.shape) * 1.2
img_rgb_darker = np.uint8(cv2.multiply(np.float64(img_rgb), matrix1))
img_rgb_brighter = np.uint8(cv2.multiply(np.float64(img_rgb), matrix2))
道理和加减一样
或者使用np.clip()
实现
matrix1 = np.ones(img_rgb.shape) * 0.8
matrix2 = np.ones(img_rgb.shape) * 1.2
img_rgb_lower = np.uint8(cv2.multiply(np.float64(img_rgb), matrix1))
img_rgb_higher = np.uint8(np.clip(cv2.multiply(np.float64(img_rgb), matrix2), 0, 255))
阈值
阈值retval, dst = cv2.threshold( src, thresh, maxval, type[, dst] )
自动阈值dst = cv.adaptiveThreshold( src, maxValue, adaptiveMethod, thresholdType, blockSize, C[, dst] )
img_read = cv2.imread("building-windows.jpg", cv2.IMREAD_GRAYSCALE)
retval, img_thresh = cv2.threshold(img_read, 100, 255, cv2.THRESH_BINARY)
逐位计算
cv2.bitwise_and() # 与
dst = cv2.bitwise_and( src1, src2[, dst[, mask]] )
cv2.bitwise_or() # 或
cv2.bitwise_xor() # 异或
cv2.bitwise_not() # 非
👇result = cv2.bitwise_and(img_rec, img_cir, mask=None)
👇
👇result = cv2.bitwise_or(img_rec, img_cir, mask=None)
👇
👇result = cv2.bitwise_xor(img_rec, img_cir, mask=None)
👇
Application: Logo Manipulation
img_bgr = cv2.imread("coca-cola-logo.png")
img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)
plt.imshow(img_rgb)
print(img_rgb.shape)
logo_w = img_rgb.shape[0]
logo_h = img_rgb.shape[1]
# Read in image of color cheackerboad background
img_background_bgr = cv2.imread("checkerboard_color.png")
img_background_rgb = cv2.cvtColor(img_background_bgr, cv2.COLOR_BGR2RGB)
# Set desired width (logo_w) and maintain image aspect ratio
aspect_ratio = logo_w / img_background_rgb.shape[1]
dim = (logo_w, int(img_background_rgb.shape[0] * aspect_ratio))
# Resize background image to sae size as logo image
img_background_rgb = cv2.resize(img_background_rgb, dim, interpolation=cv2.INTER_AREA)
plt.imshow(img_background_rgb)
print(img_background_rgb.shape)
img_gray = cv2.cvtColor(img_rgb, cv2.COLOR_RGB2GRAY)
# Apply global thresholding to creat a binary mask of the logo
retval, img_mask = cv2.threshold(img_gray, 127, 255, cv2.THRESH_BINARY)
plt.imshow(img_mask, cmap="gray")
print(img_mask.shape)
# Create an inverse mask
img_mask_inv = cv2.bitwise_not(img_mask)
plt.imshow(img_mask_inv, cmap="gray")
# Create colorful background "behind" the logo lettering
img_background = cv2.bitwise_and(img_background_rgb, img_background_rgb, mask=img_mask)
plt.imshow(img_background)
# Isolate foreground (red from original image) using the inverse mask
img_foreground = cv2.bitwise_and(img_rgb, img_rgb, mask=img_mask_inv)
plt.imshow(img_foreground)
# Add the two previous results obtain the final result
result = cv2.add(img_background, img_foreground)
plt.imshow(result)
cv2.imwrite("logo_final.png", result[:, :, ::-1])
模版对齐
原理: 同构
# Imports
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
from zipfile import ZipFile
from urllib.request import urlretrieve
%matplotlib inline
# download dataset
def download_and_unzip(url, save_path):
print(f"Downloading and extracting assests....", end="")
# Downloading zip file using urllib package.
urlretrieve(url, save_path)
try:
# Extracting zip file using the zipfile package.
with ZipFile(save_path) as z:
# Extract ZIP file contents in the same directory.
z.extractall(os.path.split(save_path)[0])
print("Done")
except Exception as e:
print("\nInvalid file.", e)
URL = r"https://www.dropbox.com/s/zuwnn6rqe0f4zgh/opencv_bootcamp_assets_NB8.zip?dl=1"
asset_zip_path = os.path.join(os.getcwd(), "opencv_bootcamp_assets_NB8.zip")
# Download if assest ZIP does not exists.
if not os.path.exists(asset_zip_path):
download_and_unzip(URL, asset_zip_path)
# Read reference image
refFilename = "form.jpg"
print("Reading reference image:", refFilename)
im1 = cv2.imread(refFilename, cv2.IMREAD_COLOR)
im1 = cv2.cvtColor(im1, cv2.COLOR_BGR2RGB)
# Read image to be aligned
imFilename = "scanned-form.jpg"
print("Reading image to align:", imFilename)
im2 = cv2.imread(imFilename, cv2.IMREAD_COLOR)
im2 = cv2.cvtColor(im2, cv2.COLOR_BGR2RGB)
# Display Images
plt.figure(figsize=[20, 10]);
plt.subplot(121); plt.axis('off'); plt.imshow(im1); plt.title("Original Form")
plt.subplot(122); plt.axis('off'); plt.imshow(im2); plt.title("Scanned Form")
# Convert images to grayscale
im1_gray = cv2.cvtColor(im1, cv2.COLOR_BGR2GRAY)
im2_gray = cv2.cvtColor(im2, cv2.COLOR_BGR2GRAY)
# Detect ORB features and compute descriptors.
MAX_NUM_FEATURES = 500
orb = cv2.ORB_create(MAX_NUM_FEATURES)
keypoints1, descriptors1 = orb.detectAndCompute(im1_gray, None)
keypoints2, descriptors2 = orb.detectAndCompute(im2_gray, None)
# Display
im1_display = cv2.drawKeypoints(im1, keypoints1, outImage=np.array([]),
color=(255, 0, 0), flags=cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)
im2_display = cv2.drawKeypoints(im2, keypoints2, outImage=np.array([]),
color=(255, 0, 0), flags=cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)
# Match features.
matcher = cv2.DescriptorMatcher_create(cv2.DESCRIPTOR_MATCHER_BRUTEFORCE_HAMMING)
# Converting to list for sorting as tuples are immutable objects.
matches = list(matcher.match(descriptors1, descriptors2, None))
# Sort matches by score
matches.sort(key=lambda x: x.distance, reverse=False)
# Remove not so good matches
numGoodMatches = int(len(matches) * 0.1)
matches = matches[:numGoodMatches]
# Draw top matches
im_matches = cv2.drawMatches(im1, keypoints1, im2, keypoints2, matches, None)
plt.figure(figsize=[40, 10])
plt.imshow(im_matches);plt.axis("off");plt.title("Original Form")
# Extract location of good matches
points1 = np.zeros((len(matches), 2), dtype=np.float32)
points2 = np.zeros((len(matches), 2), dtype=np.float32)
for i, match in enumerate(matches):
points1[i, :] = keypoints1[match.queryIdx].pt
points2[i, :] = keypoints2[match.trainIdx].pt
# Find homography
h, mask = cv2.findHomography(points2, points1, cv2.RANSAC)
# Use homography to warp image
height, width, channels = im1.shape
im2_reg = cv2.warpPerspective(im2, h, (width, height))
# Display results
plt.figure(figsize=[20, 10])
plt.subplot(121);plt.imshow(im1); plt.axis("off");plt.title("Original Form")
plt.subplot(122);plt.imshow(im2_reg);plt.axis("off");plt.title("Scanned Form")
[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-3Dizzpn7-1690352778107)(assets/16903485341970.jpg)]
人脸识别
import os
import cv2
import sys
from zipfile import ZipFile
from urllib.request import urlretrieve
# ========================-Downloading Assets-========================
def download_and_unzip(url, save_path):
print(f"Downloading and extracting assests....", end="")
# Downloading zip file using urllib package.
urlretrieve(url, save_path)
try:
# Extracting zip file using the zipfile package.
with ZipFile(save_path) as z:
# Extract ZIP file contents in the same directory.
z.extractall(os.path.split(save_path)[0])
print("Done")
except Exception as e:
print("\nInvalid file.", e)
URL = r"https://www.dropbox.com/s/efitgt363ada95a/opencv_bootcamp_assets_12.zip?dl=1"
asset_zip_path = os.path.join(os.getcwd(), f"opencv_bootcamp_assets_12.zip")
# Download if assest ZIP does not exists.
if not os.path.exists(asset_zip_path):
download_and_unzip(URL, asset_zip_path)
# ====================================================================
s = 0
if len(sys.argv) > 1:
s = sys.argv[1]
source = cv2.VideoCapture(s)
win_name = "Camera Preview"
cv2.namedWindow(win_name, cv2.WINDOW_NORMAL)
net = cv2.dnn.readNetFromCaffe("deploy.prototxt", "res10_300x300_ssd_iter_140000_fp16.caffemodel")
# Model parameters
in_width = 300
in_height = 300
mean = [104, 117, 123]
conf_threshold = 0.7
while cv2.waitKey(1) != 27:
has_frame, frame = source.read()
if not has_frame:
break
frame = cv2.flip(frame, 1)
frame_height = frame.shape[0]
frame_width = frame.shape[1]
# Create a 4D blob from a frame.
blob = cv2.dnn.blobFromImage(frame, 1.0, (in_width, in_height), mean, swapRB=False, crop=False)
# Run a model
net.setInput(blob)
detections = net.forward()
for i in range(detections.shape[2]):
confidence = detections[0, 0, i, 2]
if confidence > conf_threshold:
x_left_bottom = int(detections[0, 0, i, 3] * frame_width)
y_left_bottom = int(detections[0, 0, i, 4] * frame_height)
x_right_top = int(detections[0, 0, i, 5] * frame_width)
y_right_top = int(detections[0, 0, i, 6] * frame_height)
cv2.rectangle(frame, (x_left_bottom, y_left_bottom), (x_right_top, y_right_top), (0, 255, 0))
label = "Confidence: %.4f" % confidence
label_size, base_line = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
cv2.rectangle(
frame,
(x_left_bottom, y_left_bottom - label_size[1]),
(x_left_bottom + label_size[0], y_left_bottom + base_line),
(255, 255, 255),
cv2.FILLED,
)
cv2.putText(frame, label, (x_left_bottom, y_left_bottom), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0))
t, _ = net.getPerfProfile()
label = "Inference time: %.2f ms" % (t * 1000.0 / cv2.getTickFrequency())
cv2.putText(frame, label, (0, 15), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0))
cv2.imshow(win_name, frame)
source.release()
cv2.destroyWindow(win_name)