opencv图像拼接

最新推荐文章于 2025-05-08 17:15:53 发布

原创最新推荐文章于 2025-05-08 17:15:53 发布 · 2k 阅读

34 ·

CC 4.0 BY-SA版权

文章标签：

#图像识别 #cv #图像拼接 #opencv #stitch

Opencv 专栏收录该内容

7 篇文章

订阅专栏

文章目录

项目场景：
解决过程：
- 阶段一：
- 阶段二
拼接后图片黑边去除
总结

项目场景：

固定相机角度拍摄多张船舶局部图像，要求拼出完整的船舶图像。
在这里插入图片描述

解决过程：

阶段一：

一开始运用opencv自带的sift特征匹配算法，效果如下图
在这里插入图片描述

出现这个结果的原因：因为相机是固定的，特征匹配时，主要匹配点为背景；

基于以上结论的做法：
1.读取文件时只把船的主体进行特征匹配
在这里插入图片描述
以下代码是基于opencv-python=3.4.1.15

# -*- coding:utf-8 -*-
# @Time : 2021/1/12 19:35
# @Author : JulyLi
# @File : pingjie.py
# @Software: PyCharm

import numpy as np
import cv2 as cv
import imutils


class Stitcher:
    def __init__(self):
        self.isv3 = imutils.is_cv3()

    def stitch(self, imgs, ratio=0.75, reprojThresh=4.0, showMatches=False):
        print('A')
        (img2, img1) = imgs
        # 获取关键点和描述符
        (kp1, des1) = self.detectAndDescribe(img1)
        (kp2, des2) = self.detectAndDescribe(img2)
        print(len(kp1), len(des1))
        print(len(kp2), len(des2))
        R = self.matchKeyPoints(kp1, kp2, des1, des2, ratio, reprojThresh)

        # 如果没有足够的最佳匹配点，M为None
        if R is None:
            return None
        (good, M, mask) = R
        print(M)
        # 对img1透视变换，M是ROI区域矩阵， 变换后的大小是(img1.w+img2.w, img1.h)
        result = cv.warpPerspective(img1, M, (img1.shape[1] + img2.shape[1], img1.shape[0]))
        # 将img2的值赋给结果图像
        result[0:img2.shape[0], 0:img2.shape[1]] = img2

        # 是否需要显示ROI区域
        if showMatches:
            vis = self.drawMatches1(img1, img2, kp1, kp2, good, mask)
            return (result, vis)

        return result

    def detectAndDescribe(self, img):
        print('B')
        gray = cv.cvtColor(img, cv.COLOR_BGR2GRAY)

        # 检查我们使用的是否是penCV3.x
        if self.isv3:
            sift = cv.xfeatures2d.SIFT_create()
            (kps, des) = sift.detectAndCompute(img, None)
        else:
            sift = cv.FastFeatureDetector_create('SIFT')
            kps = sift.detect(gray)
            des = sift.compute(gray, kps)

        kps = np.float32([kp.pt for kp in kps])  # **********************************
        # 返回关键点和描述符
        return (kps, des)

    def matchKeyPoints(self, kp1, kp2, des1, des2, ratio, reprojThresh):
        print('C')
        # 初始化BF,因为使用的是SIFT ，所以使用默认参数
        matcher = cv.DescriptorMatcher_create('BruteForce')
        # bf = cv.BFMatcher()
        # matches = bf.knnMatch(des1, des2, k=2)
        matches = matcher.knnMatch(des1, des2, 2)  # ***********************************

        # 获取理想匹配
        good = []
        for m in matches:
            if len(m) == 2 and m[0].distance < ratio * m[1].distance:
                good.append((m[0].trainIdx, m[0].queryIdx))

        print(len(good))
        # 最少要有四个点才能做透视变换
        if len(good) > 4:
            # 获取关键点的坐标
            # src_pts = np.float32([kp1[m.queryIdx].pt for m in good]).reshape(-1, 1, 2)
            # dst_pts = np.float32([kp2[m.trainIdx].pt for m in good]).reshape(-1, 1, 2)
            src_pts = np.float32([kp1[i] for (_, i) in good])
            dst_pts = np.float32([kp2[i] for (i, _) in good])

            # 通过两个图像的关键点计算变换矩阵
            (M, mask) = cv.findHomography(src_pts, dst_pts, cv.RANSAC, reprojThresh)

            # 返回最佳匹配点、变换矩阵和掩模
            return (good, M, mask)
        # 如果不满足最少四个 就返回None
        return None

    def drawMatches(img1, img2, kp1, kp2, matches, mask, M):
        # 获得原图像的高和宽
        h, w = img1.shape[:2]
        # 使用得到的变换矩阵对原图像的四个角进行变换，获得目标图像上对应的坐标
        pts = np.float32([[0, 0], [0, h - 1], [w - 1, h - 1], [w - 1, 0]]).reshape(-1, 1, 2)
        dst = cv.perspectiveTransform(pts, M)
        matchesMask = mask.ravel().tolist()

        draw_params = dict(matchColor=(0, 255, 0),
                           singlePointColor=None,
                           matchesMask=matchesMask,
                           flags=2)
        img = cv.drawMatches(img1, kp1, img2, kp2, matches, None, **draw_params)

        return img

    def drawMatches1(self, img1, img2, kp1, kp2, metches, mask):
        print('D')
        (hA, wA) = img1.shape[:2]
        (hB, wB) = img2.shape[:2]
        vis = np.zeros((max(hA, hB), wA + wB, 3), dtype='uint8')
        vis[0:hA, 0:wA] = img1
        vis[0:hB, wA:] = img2
        for ((trainIdx, queryIdx), s) in zip(metches, mask):
            if s == 1:
                ptA = (int(kp1[queryIdx][0]), int(kp1[queryIdx][1]))
                ptB = (int(kp2[trainIdx][0]) + wA, int(kp2[trainIdx][1]))
                cv.line(vis, ptA, ptB, (0, 255, 0), 1)

        return vis


def show():
    # img1 = cv.imread(r'5.jpg')
    # img2 = cv.imread(r'6.jpg')
    img1 = cv.imread('myplot1.jpg')
    img2 = cv.imread('myplot2.jpg')
    img1 = imutils.resize(img1, width=400)
    img2 = imutils.resize(img2, width=400)
    # img3 = imutils.resize(img3, width=400)
    # img4 = imutils.resize(img4, width=400)

    stitched = Stitcher()
    # (result, vis) = stitched.stitch([img1, img2])
    (result, vis) = stitched.stitch([img2[100:200, :], img1[100:200, :]], showMatches=True)
    # (result, vis) = stitched.stitch([img2, img1], showMatches=True)

    # cv.imshow('image A', img1)
    # cv.imshow('image B', img2)
    cv.imshow('keyPoint Matches', vis)
    cv.imshow('Result', result)

    cv.waitKey(0)
    cv.destroyAllWindows()


if __name__ == '__main__':
    show()

2.对黑边进行处理
在这里插入图片描述

# -*- coding:utf-8 -*-
# @Time : 2021/1/12 20:20
# @Author : JulyLi
# @File : 图像拼接后的黑边去除.py
# @Software: PyCharm

import cv2
import numpy as np
import datetime


def change_size(read_file):
    image = cv2.imread(read_file, 1)  # 读取图片 image_name应该是变量
    img = cv2.medianBlur(image, 5)  # 中值滤波，去除黑色边际中可能含有的噪声干扰
    b = cv2.threshold(img, 15, 255, cv2.THRESH_BINARY)  # 调整裁剪效果
    binary_image = b[1]  # 二值图--具有三通道
    binary_image = cv2.cvtColor(binary_image, cv2.COLOR_BGR2GRAY)
    print(binary_image.shape)  # 改为单通道

    indexes = np.where(binary_image == 255)  # 提取白色像素点的坐标

    left = min(indexes[0])  # 左边界
    right = max(indexes[0])  # 右边界
    width = right - left  # 宽度
    bottom = min(indexes[1])  # 底部
    top = max(indexes[1])  # 顶部
    height = top - bottom  # 高度

    pre1_picture = image[left:left + width, bottom:bottom + height]  # 图片截取
    return pre1_picture  # 返回图片数据


if __name__=='__main__':
    source_file = r"temp/Result5.jpg"  # 原始图片
    save_path = r"temp/Result6.jpg"  # 裁剪后图片

    starttime = datetime.datetime.now()
    x = change_size(source_file)
    cv2.imwrite(save_path, x)
    print("裁剪完毕")
    endtime = datetime.datetime.now()  # 记录结束时间
    endtime = (endtime - starttime).seconds
    print("裁剪总用时", endtime)

3.重复1.2步
在这里插入图片描述

阶段二

以上的效果肯定是不能满足项目需求的，此时便想办法把图像的前景给提取出来，幸运的是我找到了一篇大牛的技术文档，实现了对前景的分割。
CSDN转载文档

大牛技术文档原文

这里附上我改写的可以实现批量提取文件夹下图片的前景的代码：

# -*- coding:utf-8 -*-
# @Time : 2021/1/14 11:17
# @Author : JulyLi
# @File : 删除背景.py
# @Software: PyCharm

import os
# from google.colab import drive
#
# drive.mount('/content/drive')
# path = "/content/drive/My Drive/Colab Notebooks"
# os.chdir(path)
# os.listdir(path)

from torchvision import models
from PIL import Image
import matplotlib.pyplot as plt
import torch
import numpy as np
import cv2

# Apply the transformations needed
import torchvision.transforms as T


# Define the helper function
def decode_segmap(image, source, nc=21):
    label_colors = np.array([(0, 0, 0),  # 0=background
                             # 1=aeroplane, 2=bicycle, 3=bird, 4=boat, 5=bottle
                             (128, 0, 0), (0, 128, 0), (128, 128, 0), (128, 128, 128), (128, 0, 128),
                             # 6=bus, 7=car, 8=cat, 9=chair, 10=cow
                             (0, 128, 128), (128, 128, 128), (64, 0, 0), (192, 0, 0), (64, 128, 0),
                             # 11=dining table, 12=dog, 13=horse, 14=motorbike, 15=person
                             (192, 128, 0), (64, 0, 128), (192, 0, 128), (64, 128, 128), (192, 128, 128),
                             # 16=potted plant, 17=sheep, 18=sofa, 19=train, 20=tv/monitor
                             (0, 64, 0), (128, 64, 0), (0, 192, 0), (128, 192, 0), (0, 64, 128)])
    r = np.zeros_like(image).astype(np.uint8)
    g = np.zeros_like(image).astype(np.uint8)
    b = np.zeros_like(image).astype(np.uint8)
    for l in range(0, nc):
        idx = image == l
        r[idx] = label_colors[l, 0]
        g[idx] = label_colors[l, 1]
        b[idx] = label_colors[l, 2]
    rgb = np.stack([r, g, b], axis=2)
    # Load the foreground input image
    foreground = cv2.imread(source)
    print('foreground1')
    # print(foreground)
    # Change the color of foreground image to RGB
    # and resize image to match shape of R-band in RGB output map
    # 将前景图像的颜色更改为RGB，并调整图像大小以匹配RGB输出地图中R波段的形状
    foreground = cv2.cvtColor(foreground, cv2.COLOR_BGR2RGB)
    foreground = cv2.resize(foreground, (r.shape[1], r.shape[0]))
    print('foreground2')
    # Create a background array to hold white pixels
    # with the same size as RGB output map
    # 创建背景数组以容纳白色像素与RGB输出贴图大小相同
    background = 255 * np.ones_like(rgb).astype(np.uint8)
    # Convert uint8 to float
    foreground = foreground.astype(float)
    background = background.astype(float)
    # Create a binary mask of the RGB output map using the threshold value 0
    # 使用阈值0创建RGB输出贴图的二进制掩码
    th, alpha = cv2.threshold(np.array(rgb), 0, 255, cv2.THRESH_BINARY)
    # Apply a slight blur to the mask to soften edges
    print('alpha1')
    # print(alpha)
    alpha = cv2.GaussianBlur(alpha, (7, 7), 0)
    print('alpha2')
    # print(alpha)
    # Normalize the alpha mask to keep intensity between 0 and 1
    alpha = alpha.astype(float) / 255
    print('alpha3')
    # print(alpha)
    # Multiply the foreground with the alpha matte
    foreground = cv2.multiply(alpha, foreground)
    # Multiply the background with ( 1 - alpha )
    background = cv2.multiply(1.0 - alpha, background)
    # Add the masked foreground and background
    outImage = cv2.add(foreground, background)
    # Return a normalized output image for display
    # return outImage.astype(int)
    return outImage / 255


# def segment(net, path, show_orig=True, dev='cuda'):
def segment(net, path, show_orig=True, dev='cpu'):
    img = Image.open(path)
    if show_orig:
        plt.imshow(img)
        plt.axis('off')
        plt.show()
    # Comment the Resize and CenterCrop for better inference results
    trf = T.Compose([T.Resize(450),
                     # T.CenterCrop(224),
                     T.ToTensor(),
                     T.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])])
    inp = trf(img).unsqueeze(0).to(dev)
    out = net.to(dev)(inp)['out']
    om = torch.argmax(out.squeeze(), dim=0).detach().cpu().numpy()
    rgb = decode_segmap(om, path)

    plt.imshow(rgb)
    plt.axis('off')
    # plt.show()
    # plt.savefig("./imgsave/1.png", pad_inches=0)


if __name__ == '__main__':
    img_dir = r'E:/opencv/Stitching/img/13'  # 原始文件目录
    save_dir = r'E:/opencv/Stitching/img/14/'  # 保存目录
    names = os.listdir(img_dir)
    # print(names)
    # print(save_dir + str(1) + ".png")
    dlab = models.segmentation.deeplabv3_resnet101(pretrained=1).eval()
    i = 0
    for name in names:
        img_path = os.path.join(img_dir, name)
        segment(dlab, img_path, show_orig=False)
        plt.savefig(save_dir + str(i) + ".png", bbox_inches='tight', pad_inches=0)
        print(save_dir + str(i) + ".png"+'处理完成')
        i += 1

    # dlab = models.segmentation.deeplabv3_resnet101(pretrained=1).eval()
    # segment(dlab, "01/5.jpg", show_orig=False)
    # plt.savefig(save_dir + str(1) + ".png", bbox_inches='tight', pad_inches=0)

注意:本例中的不管是人还是车，标签对应的颜色，3个通道的值均不能为0，否则二值化掩码的时候就会丢失某些通道信息。
运行后就得到了船舶主体
在这里插入图片描述
下面就是调用opencv自带的stitch类进行图片拼接，以下代码是是基于opencv-python=4.5

# -*- coding:utf-8 -*-
# @Time : 2021/1/15 15:09
# @Author : JulyLi
# @File : test3.py
# @Software: PyCharm

import os
import cv2

import numpy as np

img_dir = './img/14'  # 原始文件目录
names = os.listdir(img_dir)

images = []
for name in names:
    img_path = os.path.join(img_dir, name)
    image = cv2.imread(img_path)
    # image = cv2.resize(image, (0, 0), fx=0.4, fy=0.4)
    images.append(image)

# print(np.array(images))


print(len(images))
# for i, img in enumerate(images):
#     cv2.imshow(f"img {i}", img)

stitcher = cv2.Stitcher_create()
status, stitched = stitcher.stitch(images)

cv2.imshow("res", stitched)
cv2.imwrite("res14.png",stitched)

cv2.waitKey(0)
cv2.destroyAllWindows()

拼接后的效果如下图：
在这里插入图片描述
注意：这里尽量保证每张图图直接存在较多的特征点，否则可能不得到正确的结果或者报错。

拼接后图片黑边去除

# -*- coding:utf-8 -*-
# @Time : 2021/1/16 17:48
# @Author : JulyLi
# @File : test5.py
# @Software: PyCharm

import os
import cv2

import numpy as np

img_dir = './img/17'  # 原始文件目录
names = os.listdir(img_dir)

images = []
for name in names:
    img_path = os.path.join(img_dir, name)
    image = cv2.imread(img_path)

    images.append(image)

# for i, img in enumerate(images):
#     cv2.imshow(f"img {i}", img)

stitcher = cv2.Stitcher_create()
status, stitched = stitcher.stitch(images)

# 四周填充黑色像素，再得到阈值图
stitched = cv2.copyMakeBorder(stitched, 10, 10, 10, 10, cv2.BORDER_CONSTANT, (0, 0, 0))
gray = cv2.cvtColor(stitched, cv2.COLOR_BGR2GRAY)
ret, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY)

cnts, hierarchy = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnt = max(cnts, key=cv2.contourArea)

mask = np.zeros(thresh.shape, dtype="uint8")
x, y, w, h = cv2.boundingRect(cnt)
cv2.rectangle(mask, (x, y), (x + w, y + h), 255, -1)

minRect = mask.copy()
sub = mask.copy()

# 开始while循环，直到sub中不再有前景像素
while cv2.countNonZero(sub) > 0:
    minRect = cv2.erode(minRect, None)
    sub = cv2.subtract(minRect, thresh)

cnts, hierarchy = cv2.findContours(minRect.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnt = max(cnts, key=cv2.contourArea)
x, y, w, h = cv2.boundingRect(cnt)

# 使用边界框坐标提取最终的全景图
stitched = stitched[y:y + h, x:x + w]

cv2.imshow("res", stitched)
cv2.imwrite('final.jpg', stitched)

cv2.waitKey(0)
cv2.destroyAllWindows()