【python, C++, CUDA】双线性插值(Bilinear Interpolation)实现

原创已于 2024-11-29 00:00:11 修改 · 1.5k 阅读

14 ·

CC 4.0 BY-SA版权

文章标签：

#python #c++ #图像处理

于 2024-11-28 23:59:57 首次发布

C++学习同时被 3 个专栏收录

45 篇文章

订阅专栏

Python

20 篇文章

订阅专栏

CUDA编程

5 篇文章

订阅专栏

文章目录

理论基础

双线性插值是一种更为平滑的图像缩放算法，它考虑了目标像素周围四个源像素的影响，并根据距离进行加权。其主要经过三次线性插值得到，如下为线性插值的公式，可以将插值点的值解释为两个点的距离加权，其中距离近的点权重大，距离远的点权重小。
在这里插入图片描述
已知Q11(x1,y1)、Q12(x1,y2)、Q21(x2,y1)、Q22(x2,y2)，求其中点P(x,y)的值。

代入整理得：

不难发现，其中每个点的权重和对角点到点P的距离是相关的（水平距离x垂直距离）。

当然，也可以简化为如下的加权公式，可以看到该点由四个点加权求和得到。
在这里插入图片描述

最后将如下公式用于实际计算：
在这里插入图片描述

python实现

import numpy as np
import cv2


def four_pixels(src_i, src_j, src_img, src_row, src_col):
    i, j = int(src_i), int(src_j)  # x,y的整数部分
    u = src_i - i  # 求出x的小数部分
    v = src_j - j  # 求出y的小数部分
    src_i1 = i+1
    src_j1 = j+1
    if src_i1 >= src_row - 1:
        src_i1 = src_row - 1
    if src_j1 >= src_col - 1:
        src_j1 = src_col - 1
    pixel = int((1-u) * (1-v) * src_img[i, j] + u * (1 - v) * src_img[src_i1, j] +
                (1-u) * v * src_img[i, src_j1] + u * v * src_img[src_i1, src_j1])
    return pixel


def bilinear_interpolation(src_img, des_shape):
    src_size = src_img.shape[:-1]
    des_size = des_shape[:-1]
    src_row, src_col = src_size
    des_row, des_col = des_size
    des_img = np.zeros(des_shape)
    for i in range(des_row):
        for j in range(des_col):
            src_i = float(i) * (src_row / des_row)
            src_j = float(j) * (src_col / des_col)

            des_img[i, j, 0] = four_pixels(src_i, src_j, src_img[:, :, 0], src_row, src_col)
            des_img[i, j, 1] = four_pixels(src_i, src_j, src_img[:, :, 1], src_row, src_col)
            des_img[i, j, 2] = four_pixels(src_i, src_j, src_img[:, :, 2], src_row, src_col)

    des_img = des_img.astype(np.uint8)
    return des_img


def nearest_interpolation(src_img, des_shape):
    src_size = src_img.shape[:-1]
    des_size = des_shape[:-1]
    src_row, src_col = src_size
    des_row, des_col = des_size
    des_img = np.zeros(des_shape)
    for i in range(des_row):
        for j in range(des_col):
            src_i = float(i) * (src_row / des_row)
            src_j = float(j) * (src_col / des_col)
            src_i = int(src_i)
            src_j = int(src_j)
            des_img[i, j, :] = src_img[src_i, src_j, :]

    des_img = des_img.astype(np.uint8)
    return des_img

image = cv2.imread(r"xiaowu.jpeg", -1)

b_image = bilinear_interpolation(image, (300, 240, 3))
n_image = nearest_interpolation(image, (450, 360, 3))
print(image.shape)
print(b_image.shape)
print(n_image.shape)

cv2.imshow('src', image)
cv2.imshow('b image', b_image)
cv2.imshow('n image', n_image)
cv2.waitKey(0)

C++实现

#include <opencv2/opencv.hpp>

cv::Mat resizeBilinear(const cv::Mat& src, int newWidth, int newHeight) {
    cv::Mat dst(newHeight, newWidth, src.type(), cv::Scalar(0,0,0)); // 初始化目标图像为黑色
    double xRatio = static_cast<double>(src.cols) / newWidth;
    double yRatio = static_cast<double>(src.rows) / newHeight;

    for (int i = 0; i < newHeight; ++i) {
        for (int j = 0; j < newWidth; ++j) {
            double srcX = j * xRatio;
            double srcY = i * yRatio;

            int x0 = static_cast<int>(floor(srcX));
            int x1 = std::min(x0 + 1, src.cols - 1);
            int y0 = static_cast<int>(floor(srcY));
            int y1 = std::min(y0 + 1, src.rows - 1);

            double dx = srcX - x0;
            double dy = srcY - y0;

            // 获取周围四个像素的值
            cv::Vec3b p0 = src.at<cv::Vec3b>(y0, x0);
            cv::Vec3b p1 = src.at<cv::Vec3b>(y0, x1);
            cv::Vec3b p2 = src.at<cv::Vec3b>(y1, x0);
            cv::Vec3b p3 = src.at<cv::Vec3b>(y1, x1);

            // 根据双线性插值公式计算目标像素的值
            cv::Vec3b interp = (1 - dx) * (1 - dy) * p0 +
                               dx * (1 - dy) * p1 +
                               (1 - dx) * dy * p2 +
                               dx * dy * p3;

            // 将计算得到的像素值赋给目标图像的对应位置
            dst.at<cv::Vec3b>(i, j) = interp;
        }
    }

    return dst;
}

int main() {
    // 加载源图像
    cv::Mat src = cv::imread("path_to_image.jpg");
    if (src.empty()) {
        std::cerr << "Image load failed!" << std::endl;
        return -1;
    }

    // 设置目标图像的宽度和高度
    int newWidth = 800;
    int newHeight = 600;

    // 调用双线性插值resize函数
    cv::Mat resizedImage = resizeBilinear(src, newWidth, newHeight);

    // 显示结果图像
    cv::imshow("Resized Image", resizedImage);
    cv::waitKey(0);

    return 0;
}

CUDA实现

#include <opencv2/opencv.hpp>
#include <iostream>

// Kernel function for bilinear interpolation
__global__ void bilinearInterpolationKernel(const uchar* src, uchar* dst, int srcWidth, int srcHeight, int dstWidth, int dstHeight) {
    int x = threadIdx.x + blockIdx.x * blockDim.x;
    int y = threadIdx.y + blockIdx.y * blockDim.y;

    if (x < dstWidth && y < dstHeight) {
        double srcX = x * (static_cast<double>(srcWidth) / dstWidth);
        double srcY = y * (static_cast<double>(srcHeight) / dstHeight);

        int x0 = floor(srcX);
        int x1 = min(x0 + 1, srcWidth - 1);
        int y0 = floor(srcY);
        int y1 = min(y0 + 1, srcHeight - 1);

        double dx = srcX - x0;
        double dy = srcY - y0;

        uchar p0 = src[y0 * srcWidth + x0];
        uchar p1 = src[y0 * srcWidth + x1];
        uchar p2 = src[y1 * srcWidth + x0];
        uchar p3 = src[y1 * srcWidth + x1];

        uchar interp;
        interp = (1 - dx) * (1 - dy) * p0 + dx * (1 - dy) * p1 + (1 - dx) * dy * p2 + dx * dy * p3;

        dst[y * dstWidth + x] = interp;
    }
}

cv::Mat resizeBilinearCUDA(const cv::Mat& src, int newWidth, int newHeight) {
    cv::Mat dst(newHeight, newWidth, src.type(), cv::Scalar(0,0,0));

    // Allocate memory on the GPU
    uchar* d_src;
    uchar* d_dst;
    cudaMalloc(&d_src, src.total() * sizeof(uchar));
    cudaMalloc(&d_dst, dst.total() * sizeof(uchar));

    // Copy data from host to device
    cudaMemcpy(d_src, src.ptr(), src.total() * sizeof(uchar), cudaMemcpyHostToDevice);

    // Define block and grid sizes
    dim3 blockSize(16, 16);
    dim3 gridSize((newWidth + blockSize.x - 1) / blockSize.x, (newHeight + blockSize.y - 1) / blockSize.y);

    // Launch the kernel
    bilinearInterpolationKernel<<<gridSize, blockSize>>>(d_src, d_dst, src.cols, src.rows, newWidth, newHeight);

    // Copy the result back to host
    cudaMemcpy(dst.ptr(), d_dst, dst.total() * sizeof(uchar), cudaMemcpyDeviceToHost);

    // Free the GPU memory
    cudaFree(d_src);
    cudaFree(d_dst);

    return dst;
}

int main() {
    cv::Mat src = cv::imread("path_to_image.jpg", 0);
    if (src.empty()) {
        std::cerr << "Image load failed!" << std::endl;
        return -1;
    }

    int newWidth = 800;
    int newHeight = 600;

    cv::Mat resizedImage = resizeBilinearCUDA(src, newWidth, newHeight);

    cv::imshow("Resized Image", resizedImage);
    cv::waitKey(0);

    return 0;
}

附：CUDA实现最近邻插值

这个原理很简单，就直接上代码了，需要的可以直接取用。

#include <iostream>
#include <cuda_runtime.h>

// 宏定义用于检查CUDA错误
#define CHECK_CUDA_ERROR(call) \
    do { \
        cudaError_t error = call; \
        if (error != cudaSuccess) { \
            std::cerr << "CUDA error: " << cudaGetErrorString(error) << " at " << __FILE__ << ":" << __LINE__ << std::endl; \
            exit(EXIT_FAILURE); \
        } \
    } while (0)

// 最近邻插值CUDA核函数
__global__ void nearestNeighborInterpolationKernel(unsigned char* output, const unsigned char* input, int inputWidth, int inputHeight, int outputWidth, int outputHeight) {
    int x = blockIdx.x * blockDim.x + threadIdx.x;
    int y = blockIdx.y * blockDim.y + threadIdx.y;

    if (x < outputWidth && y < outputHeight) {
        // 计算输入图像中的最近邻点
        int inputX = min((int)((float)x / (float)outputWidth * inputWidth), inputWidth - 1);
        int inputY = min((int)((float)y / (float)outputHeight * inputHeight), inputHeight - 1);

        // 将输入图像中的像素值复制到输出图像中
        output[y * outputWidth + x] = input[inputY * inputWidth + inputX];
    }
}

void nearestNeighborInterpolation(const unsigned char* inputImage, unsigned char* outputImage, int inputWidth, int inputHeight, int outputWidth, int outputHeight) {
    unsigned char *d_inputImage, *d_outputImage;

    // 分配设备内存
    CHECK_CUDA_ERROR(cudaMalloc(&d_inputImage, inputWidth * inputHeight * sizeof(unsigned char)));
    CHECK_CUDA_ERROR(cudaMalloc(&d_outputImage, outputWidth * outputHeight * sizeof(unsigned char)));

    // 将数据从主机复制到设备
    CHECK_CUDA_ERROR(cudaMemcpy(d_inputImage, inputImage, inputWidth * inputHeight * sizeof(unsigned char), cudaMemcpyHostToDevice));

    // 定义块和线程数量
    dim3 blockSize(16, 16);
    dim3 gridSize((outputWidth + blockSize.x - 1) / blockSize.x, (outputHeight + blockSize.y - 1) / blockSize.y);

    // 调用CUDA核函数
    nearestNeighborInterpolationKernel<<<gridSize, blockSize>>>(d_outputImage, d_inputImage, inputWidth, inputHeight, outputWidth, outputHeight);

    // 确保CUDA核函数执行完成
    CHECK_CUDA_ERROR(cudaDeviceSynchronize());

    // 将数据从设备复制回主机
    CHECK_CUDA_ERROR(cudaMemcpy(outputImage, d_outputImage, outputWidth * outputHeight * sizeof(unsigned char), cudaMemcpyDeviceToHost));

    // 释放设备内存
    CHECK_CUDA_ERROR(cudaFree(d_inputImage));
    CHECK_CUDA_ERROR(cudaFree(d_outputImage));
}

int main() {
    // 示例输入和输出图像尺寸
    int inputWidth = 256;
    int inputHeight = 256;
    int outputWidth = 512;
    int outputHeight = 512;

    // 分配主机内存
    unsigned char* inputImage = new unsigned char[inputWidth * inputHeight];
    unsigned char* outputImage = new unsigned char[outputWidth * outputHeight];

    // 初始化输入图像（这里简单使用渐变值进行初始化，可以根据需要进行修改）
    for (int y = 0; y < inputHeight; ++y) {
        for (int x = 0; x < inputWidth; ++x) {
            inputImage[y * inputWidth + x] = (unsigned char)((float)x / inputWidth * 255);
        }
    }

    // 调用最近邻插值函数
    nearestNeighborInterpolation(inputImage, outputImage, inputWidth, inputHeight, outputWidth, outputHeight);

    // 输出或保存输出图像（这里简单输出部分像素值进行验证）
    for (int y = 0; y < 10; ++y) {
        for (int x = 0; x < 10; ++x) {
            std::cout << (int)outputImage[y * outputWidth + x] << " ";
        }
        std::cout << std::endl;
    }

    // 释放主机内存
    delete[] inputImage;
    delete[] outputImage;

    return 0;
}