opencv 快速入门

青铜念诗

已于 2023-07-26 14:28:04 修改

阅读量173

点赞数

文章标签： opencv 人工智能计算机视觉

于 2023-07-06 20:48:27 首次发布

本文链接：https://blog.csdn.net/CSDN_Ethan2086/article/details/131584938

版权

opencv快速入门

准备

下载

pip install opencv-python
# 或者
conda install opencv

检查


# 在终端中输入python
python # 进入python环境

import cv2 # 检查有无报错异常

# 没有异常则环境准备就位

开始入门

导入opencv包

import cv2

读取图片

img=cv.imread(filename=""[, flags])

filename 可以是绝对路径或者是相对路径
flags 可选项，参数有：

cv2.IMREAD_GRAYSCALE or 0 读取灰度图像
cv2.IMREAD_COLOR or 1 读取彩色图像（默认）（忽略alpha通道）
cv2.IMREAD_UNCHANGED or -1 读取完成图片（包括alpha通道）
其中，alpha通道是8位灰度通道，256级记录图像透明度信息
另外，opencv读取彩色图片（RGB）是按BGR读取，如果需要按RGB显示，需要img=img[:,:,::-1]来做调整

查看图像属性

img.shape
查看图像形状，通常彩色图像为（宽，高，3），灰度图像为（宽，高）

img.dtype
查看图像中元素在内存中占用空间的数据类型，通常为uint8或float32

split方法与merge方法

cv2.split()方法分别读取三个通道

b, g, r = cv2.split(img_NZ_bgr)

cv2.merge()方法合并三个通道

imgMerged = cv2.merge((b, g, r))

在这里插入图片描述

cvtColor方法

cv2.cvtColor()用来转换图像的色彩空间

语法格式为：dst = cv2.cvtColor( src, code )

BGR to RGB

img_NZ_rgb = cv2.cvtColor(img_NZ_bgr, cv2.COLOR_BGR2RGB)

BGR to HSV

img_hsv = cv2.cvtColor(img_NZ_bgr, cv2.COLOR_BGR2HSV)

HSV to RGB

img_NZ_rgb = cv2.cvtColor(img_NZ_merged, cv2.COLOR_HSV2RGB)

图片保存

cv2.imwrite( filename, img[, params] )

如：cv2.imwrite("New_Zealand_Lake_SAVED.png", img_NZ_bgr)

基本图像处理

包括获取并处理像素、缩放、裁剪、翻转等

读取图像
cb_img = cv2.imread("checkerboard_18x18.png", 0)
在这里插入图片描述

获取像素值

通过行、列索引获取像素值
print(cb_img[0, 0]) # 0
print(cb_img[0, 6]) # 255

修改像素值

通过行、列索引修改像素值

cb_img_copy = cb_img.copy()
cb_img_copy[2, 2] = 200
cb_img_copy[2, 3] = 200
cb_img_copy[3, 2] = 200
cb_img_copy[3, 3] = 200

在这里插入图片描述

裁剪

通过行、列索引实现裁剪

cropped_region = img_NZ_rgb[200:400, 300:600]

在这里插入图片描述

resize

resized_cropped_region_2x = cv2.resize(cropped_region, None, fx=2, fy=2)
扩大为原来的两倍

或者不按倍率，手动给定尺寸
resized_cropped_region = cv2.resize(cropped_region, dsize=(100,200), interpolation=cv2.INTER_AREA)
interpolation为插值方法，还有cv2.INTER_NEAREST、cv2.INTER_LINEAR、cv2.INTER_CUBIC等
在这里插入图片描述

翻转

dst = cv.flip( src, flipCode )
src为需要翻转的图像
flipCode为1水平翻转，为0垂直翻转，为-1既水平又垂直
在这里插入图片描述

数据标注

不太关注，一笔带过了

画一条线
img = cv2.line(img, pt1, pt2, color[, thickness[, lineType[, shift]]])
cv2.line(imageLine, (200, 100), (400, 100), (0, 255, 255), thickness=5, lineType=cv2.LINE_AA);
画一个圆
img = cv2.circle(img, center, radius, color[, thickness[, lineType[, shift]]])
cv2.circle(imageCircle, (900,500), 100, (0, 0, 255), thickness=5, lineType=cv2.LINE_AA);
画一个长方形
img = cv2.rectangle(img, pt1, pt2, color[, thickness[, lineType[, shift]]])
cv2.rectangle(imageRectangle, (500, 100), (700, 600), (255, 0, 255), thickness=5, lineType=cv2.LINE_8)
添加一句话
img = cv2.putText(img, text, org, fontFace, fontScale, color[, thickness[, lineType[, bottomLeftOrigin]]])
cv2.putText(imageText, text, (200, 700), fontFace, fontScale, fontColor, fontThickness, cv2.LINE_AA);

图像增强

像素加减

matrix = np.ones(img_rgb.shape, dtype="uint8") * 50
img_rgb_brighter = cv2.add(img_rgb, matrix)# 255纯白，越靠255越白（亮）
img_rgb_darker   = cv2.subtract(img_rgb, matrix)# 0纯白，越靠0越黑

在这里插入图片描述

像素乘法与对比度

matrix1 = np.ones(img_rgb.shape) * 0.8
matrix2 = np.ones(img_rgb.shape) * 1.2

img_rgb_darker   = np.uint8(cv2.multiply(np.float64(img_rgb), matrix1))
img_rgb_brighter = np.uint8(cv2.multiply(np.float64(img_rgb), matrix2))

道理和加减一样
在这里插入图片描述

或者使用np.clip()实现

matrix1 = np.ones(img_rgb.shape) * 0.8
matrix2 = np.ones(img_rgb.shape) * 1.2

img_rgb_lower  = np.uint8(cv2.multiply(np.float64(img_rgb), matrix1))
img_rgb_higher = np.uint8(np.clip(cv2.multiply(np.float64(img_rgb), matrix2), 0, 255))

在这里插入图片描述

阈值

阈值retval, dst = cv2.threshold( src, thresh, maxval, type[, dst] )

自动阈值dst = cv.adaptiveThreshold( src, maxValue, adaptiveMethod, thresholdType, blockSize, C[, dst] )

img_read = cv2.imread("building-windows.jpg", cv2.IMREAD_GRAYSCALE)
retval, img_thresh = cv2.threshold(img_read, 100, 255, cv2.THRESH_BINARY)

在这里插入图片描述

逐位计算

cv2.bitwise_and() # 与 
    dst = cv2.bitwise_and( src1, src2[, dst[, mask]] )
cv2.bitwise_or()  # 或
cv2.bitwise_xor() # 异或
cv2.bitwise_not() # 非

在这里插入图片描述
👇result = cv2.bitwise_and(img_rec, img_cir, mask=None)👇

👇result = cv2.bitwise_or(img_rec, img_cir, mask=None)👇

👇result = cv2.bitwise_xor(img_rec, img_cir, mask=None)👇

Application: Logo Manipulation

img_bgr = cv2.imread("coca-cola-logo.png")
img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)
plt.imshow(img_rgb)

print(img_rgb.shape)

logo_w = img_rgb.shape[0]
logo_h = img_rgb.shape[1]


# Read in image of color cheackerboad background
img_background_bgr = cv2.imread("checkerboard_color.png")
img_background_rgb = cv2.cvtColor(img_background_bgr, cv2.COLOR_BGR2RGB)

# Set desired width (logo_w) and maintain image aspect ratio
aspect_ratio = logo_w / img_background_rgb.shape[1]
dim = (logo_w, int(img_background_rgb.shape[0] * aspect_ratio))

# Resize background image to sae size as logo image
img_background_rgb = cv2.resize(img_background_rgb, dim, interpolation=cv2.INTER_AREA)

plt.imshow(img_background_rgb)
print(img_background_rgb.shape)


img_gray = cv2.cvtColor(img_rgb, cv2.COLOR_RGB2GRAY)

# Apply global thresholding to creat a binary mask of the logo
retval, img_mask = cv2.threshold(img_gray, 127, 255, cv2.THRESH_BINARY)

plt.imshow(img_mask, cmap="gray")
print(img_mask.shape)


# Create an inverse mask
img_mask_inv = cv2.bitwise_not(img_mask)
plt.imshow(img_mask_inv, cmap="gray")


# Create colorful background "behind" the logo lettering
img_background = cv2.bitwise_and(img_background_rgb, img_background_rgb, mask=img_mask)
plt.imshow(img_background)



# Isolate foreground (red from original image) using the inverse mask
img_foreground = cv2.bitwise_and(img_rgb, img_rgb, mask=img_mask_inv)
plt.imshow(img_foreground)



# Add the two previous results obtain the final result
result = cv2.add(img_background, img_foreground)
plt.imshow(result)
cv2.imwrite("logo_final.png", result[:, :, ::-1])

在这里插入图片描述

模版对齐

在这里插入图片描述

原理：同构

在这里插入图片描述

# Imports
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
    
from zipfile import ZipFile
from urllib.request import urlretrieve
    
%matplotlib inline

# download dataset
def download_and_unzip(url, save_path):
    print(f"Downloading and extracting assests....", end="")

    # Downloading zip file using urllib package.
    urlretrieve(url, save_path)

    try:
        # Extracting zip file using the zipfile package.
        with ZipFile(save_path) as z:
            # Extract ZIP file contents in the same directory.
            z.extractall(os.path.split(save_path)[0])

        print("Done")

    except Exception as e:
        print("\nInvalid file.", e)
        
URL = r"https://www.dropbox.com/s/zuwnn6rqe0f4zgh/opencv_bootcamp_assets_NB8.zip?dl=1"

asset_zip_path = os.path.join(os.getcwd(), "opencv_bootcamp_assets_NB8.zip")

# Download if assest ZIP does not exists. 
if not os.path.exists(asset_zip_path):
    download_and_unzip(URL, asset_zip_path)

# Read reference image
refFilename = "form.jpg"
print("Reading reference image:", refFilename)
im1 = cv2.imread(refFilename, cv2.IMREAD_COLOR)
im1 = cv2.cvtColor(im1, cv2.COLOR_BGR2RGB)

# Read image to be aligned
imFilename = "scanned-form.jpg"
print("Reading image to align:", imFilename)
im2 = cv2.imread(imFilename, cv2.IMREAD_COLOR)
im2 = cv2.cvtColor(im2, cv2.COLOR_BGR2RGB)

# Display Images
plt.figure(figsize=[20, 10]); 
plt.subplot(121); plt.axis('off'); plt.imshow(im1); plt.title("Original Form")
plt.subplot(122); plt.axis('off'); plt.imshow(im2); plt.title("Scanned Form")

在这里插入图片描述

# Convert images to grayscale
im1_gray = cv2.cvtColor(im1, cv2.COLOR_BGR2GRAY)
im2_gray = cv2.cvtColor(im2, cv2.COLOR_BGR2GRAY)


# Detect ORB features and compute descriptors.
MAX_NUM_FEATURES = 500
orb = cv2.ORB_create(MAX_NUM_FEATURES)
keypoints1, descriptors1 = orb.detectAndCompute(im1_gray, None)
keypoints2, descriptors2 = orb.detectAndCompute(im2_gray, None)

# Display
im1_display = cv2.drawKeypoints(im1, keypoints1, outImage=np.array([]), 
                                color=(255, 0, 0), flags=cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)

im2_display = cv2.drawKeypoints(im2, keypoints2, outImage=np.array([]), 
                                color=(255, 0, 0), flags=cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)

# Match features.
matcher = cv2.DescriptorMatcher_create(cv2.DESCRIPTOR_MATCHER_BRUTEFORCE_HAMMING)

# Converting to list for sorting as tuples are immutable objects.
matches = list(matcher.match(descriptors1, descriptors2, None))

# Sort matches by score
matches.sort(key=lambda x: x.distance, reverse=False)

# Remove not so good matches
numGoodMatches = int(len(matches) * 0.1)
matches = matches[:numGoodMatches]

# Draw top matches
im_matches = cv2.drawMatches(im1, keypoints1, im2, keypoints2, matches, None)

plt.figure(figsize=[40, 10])
plt.imshow(im_matches);plt.axis("off");plt.title("Original Form")

在这里插入图片描述

# Extract location of good matches
points1 = np.zeros((len(matches), 2), dtype=np.float32)
points2 = np.zeros((len(matches), 2), dtype=np.float32)

for i, match in enumerate(matches):
    points1[i, :] = keypoints1[match.queryIdx].pt
    points2[i, :] = keypoints2[match.trainIdx].pt

# Find homography
h, mask = cv2.findHomography(points2, points1, cv2.RANSAC)

# Use homography to warp image
height, width, channels = im1.shape
im2_reg = cv2.warpPerspective(im2, h, (width, height))

# Display results
plt.figure(figsize=[20, 10])
plt.subplot(121);plt.imshow(im1);    plt.axis("off");plt.title("Original Form")
plt.subplot(122);plt.imshow(im2_reg);plt.axis("off");plt.title("Scanned Form")

[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-3Dizzpn7-1690352778107)(assets/16903485341970.jpg)]

人脸识别

import os
import cv2
import sys
from zipfile import ZipFile
from urllib.request import urlretrieve


# ========================-Downloading Assets-========================
def download_and_unzip(url, save_path):
    print(f"Downloading and extracting assests....", end="")

    # Downloading zip file using urllib package.
    urlretrieve(url, save_path)

    try:
        # Extracting zip file using the zipfile package.
        with ZipFile(save_path) as z:
            # Extract ZIP file contents in the same directory.
            z.extractall(os.path.split(save_path)[0])

        print("Done")

    except Exception as e:
        print("\nInvalid file.", e)


URL = r"https://www.dropbox.com/s/efitgt363ada95a/opencv_bootcamp_assets_12.zip?dl=1"

asset_zip_path = os.path.join(os.getcwd(), f"opencv_bootcamp_assets_12.zip")

# Download if assest ZIP does not exists.
if not os.path.exists(asset_zip_path):
    download_and_unzip(URL, asset_zip_path)
# ====================================================================


s = 0
if len(sys.argv) > 1:
    s = sys.argv[1]

source = cv2.VideoCapture(s)

win_name = "Camera Preview"
cv2.namedWindow(win_name, cv2.WINDOW_NORMAL)

net = cv2.dnn.readNetFromCaffe("deploy.prototxt", "res10_300x300_ssd_iter_140000_fp16.caffemodel")
# Model parameters
in_width = 300
in_height = 300
mean = [104, 117, 123]
conf_threshold = 0.7

while cv2.waitKey(1) != 27:
    has_frame, frame = source.read()
    if not has_frame:
        break
    frame = cv2.flip(frame, 1)
    frame_height = frame.shape[0]
    frame_width = frame.shape[1]

    # Create a 4D blob from a frame.
    blob = cv2.dnn.blobFromImage(frame, 1.0, (in_width, in_height), mean, swapRB=False, crop=False)
    # Run a model
    net.setInput(blob)
    detections = net.forward()

    for i in range(detections.shape[2]):
        confidence = detections[0, 0, i, 2]
        if confidence > conf_threshold:
            x_left_bottom = int(detections[0, 0, i, 3] * frame_width)
            y_left_bottom = int(detections[0, 0, i, 4] * frame_height)
            x_right_top = int(detections[0, 0, i, 5] * frame_width)
            y_right_top = int(detections[0, 0, i, 6] * frame_height)

            cv2.rectangle(frame, (x_left_bottom, y_left_bottom), (x_right_top, y_right_top), (0, 255, 0))
            label = "Confidence: %.4f" % confidence
            label_size, base_line = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)

            cv2.rectangle(
                frame,
                (x_left_bottom, y_left_bottom - label_size[1]),
                (x_left_bottom + label_size[0], y_left_bottom + base_line),
                (255, 255, 255),
                cv2.FILLED,
            )
            cv2.putText(frame, label, (x_left_bottom, y_left_bottom), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0))

    t, _ = net.getPerfProfile()
    label = "Inference time: %.2f ms" % (t * 1000.0 / cv2.getTickFrequency())
    cv2.putText(frame, label, (0, 15), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0))
    cv2.imshow(win_name, frame)

source.release()
cv2.destroyWindow(win_name)

青铜念诗

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
打赏
0
评论
opencv 快速入门

查看图像中元素在内存中占用空间的数据类型，通常为uint8或float32。查看图像形状，通常彩色图像为（宽，高，3），灰度图像为（宽，高）cv2.cvtColor()用来转换图像的色彩空间。opencv读取彩色图片（RGB）是按BGR读取。为1水平翻转，为0垂直翻转，为-1既水平又垂直。cv2.split()方法分别读取三个通道。cv2.merge()方法合并三个通道。可以是绝对路径或者是相对路径。或者不按倍率，手动给定尺寸。通过行、列索引获取像素值。通过行、列索引修改像素值。通过行、列索引实现裁剪。
复制链接

扫一扫