文章目录
1.简单描述OpenCV的Sobel算子;
OpenCV的Sobel算子是一种常用的边缘检测算法,用于检测图像中的水平和垂直边缘。
Sobel算子通过卷积操作来计算图像中每个像素的梯度值。它使用两个3×3的卷积核,一个用于检测水平边缘,另一个用于检测垂直边缘。这两个卷积核分别为:
-1 0 1 -1 -2 -1
-2 0 2 0 0 0
-1 0 1 1 2 1
在进行卷积时,将卷积核应用于图像的每个像素,计算其与相邻像素之间的差异。这些差异将被用来计算梯度大小和方向,从而检测出图像中的边缘。
Sobel算子是一种简单但有效的边缘检测方法,常用于图像处理和计算机视觉领域中的特征提取和对象识别任务。
2. Numpy 复现代码
复现的时候需要注意的点:
- Sobel其实就是简单的2个卷积操作,但是,正常卷积操作为了保持原图大小不变,是应该padding的。opencv官方也是针对3*3卷积的Sobel算子的原始图像进行了padding,我们为了效果一致也进行padding,
2.1 numpy的padding操作如下:
在 NumPy 中,可以使用 numpy.pad() 函数来进行数组的 padding。它的语法如下(opencv官方的对原图采用的reflect进行的对称反射填充):
numpy.pad(array, pad_width, mode, **kwargs)
其中,array 是要进行 padding 的数组,pad_width 是一个整数或整数序列,指定每个轴向上要填充的宽度,mode 指定 padding 的方式,常见的取值有:
- constant:用常数填充,需要指定填充值(constant_values)。
- edge:用数组的边缘值填充。
- reflect:用对称的方式填充,即从数组的边缘开始反射数组值。
- wrap:用数组的另一端的值填充,即用数组的首位相连的方式填充。
2.2 Numpy完整复现代码
我们这里为了方便之后写Cuda C代码,手动实现了reflect操作
import cv2
import numpy as np
def reflect(array):
ndim = array.shape[0]
img_expand = np.zeros((ndim + 2, ndim + 2), dtype=np.uint8)
ndims = img_expand.shape[0]
start = 0
end = ndims
# 1. 复制
img_expand[start + 1: end-1, start + 1: end-1] = array
img_expand[0] = img_expand[2]
img_expand[:, 0] = img_expand[:, 2]
img_expand[:, end - 1] = img_expand[:, end - 3]
img_expand[end - 1] = img_expand[end - 3]
return img_expand
def sobel(img):
# 定义 Sobel 算子
sobel_x = np.array([[-1, 0, 1], [-2, 0, 2], [-1, 0, 1]])
sobel_y = np.array([[-1, -2, -1], [0, 0, 0], [1, 2, 1]])
# 图像矩阵的行数和列数
rows, cols = img.shape
# 定义输出图像矩阵
output_x = np.zeros((rows, cols))
output_y = np.zeros((rows, cols))
output = np.zeros((rows, cols))
# 对图像进行边缘扩展
# img_expand = np.pad(img, ((1, 1), (1, 1)), 'reflect')
img_expand = reflect(img)
# 对每个像素点进行卷积计算
for i in range(rows):
for j in range(cols):
output_x[i, j] = np.sum(sobel_x * img_expand[i:i+3, j:j+3])
output_y[i, j] = np.sum(sobel_y * img_expand[i:i+3, j:j+3])
# 计算梯度幅值
# output = np.sqrt(output_x ** 2 + output_y ** 2)
return output_x, output_y
raw_img = cv2.imread("feijidongche2_4_64730_input_gray.bmp", cv2.IMREAD_COLOR)
gray_img = cv2.cvtColor(raw_img, cv2.COLOR_BGR2GRAY)
print(f"====================== verify: sobel x =================================")
sobel_x = cv2.Sobel(gray_img, -1, 1, 0, ksize=3)
sobel_self_wrtie_x, _ = sobel(gray_img)
sobel_self_wrtie_x = np.clip(sobel_self_wrtie_x, 0, 255)
print(sobel_x)
print("="*50)
print(sobel_self_wrtie_x)
isclose = np.isclose(sobel_self_wrtie_x, sobel_x, rtol=1e-9, atol=1e-9)
print("="*50)
print(sobel_self_wrtie_x[isclose==False])
print(f"====================== verify: sobel y =================================")
sobel_y = cv2.Sobel(gray_img, -1, 0, 1, ksize=3)
_, sobel_self_wrtie_y = sobel(gray_img)
sobel_self_wrtie_y = np.clip(sobel_self_wrtie_y, 0, 255)
print(sobel_y)
print("="*50)
print(sobel_self_wrtie_y)
isclose = np.isclose(sobel_self_wrtie_x, sobel_x, rtol=1e-9, atol=1e-9)
print("="*50)
print(sobel_self_wrtie_x[isclose==False])
代码执行结果:
====================== verify: sobel x =================================
[[ 0 0 12 ... 8 0 0]
[ 0 0 12 ... 9 0 0]
[ 0 24 21 ... 5 0 0]
...
[ 0 0 52 ... 0 0 0]
[ 0 0 0 ... 11 0 0]
[ 0 0 0 ... 26 0 0]]
==================================================
[[ 0. 0. 12. ... 8. 0. 0.]
[ 0. 0. 12. ... 9. 0. 0.]
[ 0. 24. 21. ... 5. 0. 0.]
...
[ 0. 0. 52. ... 0. 0. 0.]
[ 0. 0. 0. ... 11. 0. 0.]
[ 0. 0. 0. ... 26. 0. 0.]]
==================================================
[]
====================== verify: sobel y =================================
[[ 0 0 0 ... 0 0 0]
[ 0 0 6 ... 0 0 0]
[ 0 0 0 ... 0 3 12]
...
[84 67 18 ... 0 0 0]
[72 60 23 ... 15 49 68]
[ 0 0 0 ... 0 0 0]]
==================================================
[[ 0. 0. 0. ... 0. 0. 0.]
[ 0. 0. 6. ... 0. 0. 0.]
[ 0. 0. 0. ... 0. 3. 12.]
...
[84. 67. 18. ... 0. 0. 0.]
[72. 60. 23. ... 15. 49. 68.]
[ 0. 0. 0. ... 0. 0. 0.]]
==================================================
[] # 最后结果为空,证明我们复现的sobel和opencv的sobel算子效果一模一样
3.Cuda代码今日稍后追加;
3.1 kernel 代码:
#include "cuda_runtime.h"
typedef unsigned char uint8_t;
#define checkCudaKernel(...) \
__VA_ARGS__; \
do{cudaError_t cudaStatus = cudaPeekAtLastError(); \
if (cudaStatus != cudaSuccess){ \
printf("launch failed: %s\n", cudaGetErrorString(cudaStatus)); \
}} while(0);
static __device__ uint8_t cast(float value){
return value < 0 ? 0 : (value > 255 ? 255 : value);
}
static __global__ void sobelKernel(const unsigned char* imgIn, uint8_t* imgOut, const int width, const int height, const int channels, const bool xDirection)
{
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
if (x >= width || y >= height)
return;
if (x < width && y < height)
{
int sumX = 0, sumY = 0;
const int gx[3][3] = { {-1, 0, 1}, {-2, 0, 2}, {-1, 0, 1} };
const int gy[3][3] = { {-1, -2, -1}, {0, 0, 0}, {1, 2, 1} };
int xEdge, yEdge;
for (int k = 0; k < channels; ++k)
{
for (int j = -1; j <= 1; ++j)
{
for (int i = -1; i <= 1; ++i)
{
xEdge = i;
yEdge = j;
if (y == 0 && j == -1)
{
yEdge = 1;
}
if (x == 0 && i == -1)
{
xEdge = 1;
}
if (y == height - 1 && j == 1)
{
yEdge = -1;
}
if (x == width - 1 && i == 1)
{
xEdge = -1;
}
int idx = (y + yEdge) * width * channels + (x + xEdge) * channels + k;
if (x >= 0 && x < width && y >= 0 && y < height)
{
if (xDirection)
{
sumX += gx[j + 1][i + 1] * imgIn[idx];
}
else
{
sumY += gy[j + 1][i + 1] * imgIn[idx];
}
}
}
}
}
int idx = y * width * channels + x * channels;
if (xDirection)
{
imgOut[idx] = cast(sumX);
}
else
{
imgOut[idx] = cast(sumY);
}
}
return;
}
/**
* \brief: Sobel算子的gpu实现
* \param: src 输入的灰度图像的gpu数据
* \param: src_width 输入的灰度图像的宽度
* \param: src_height 输入的灰度图像的高度
* \param: dst 提取的边缘的信息, 需要传入一个与输入大小相同的gpu的空间的首地址
* \param: xDirection 是否选择 x 反向的, 如果选择X方向, 则返回的dst也为X方向的边缘信息
* \param: stream cuda任务流
*
* \retval: None
*/
void Sobel(
uint8_t* src, int src_width, int src_height,
uint8_t* dst, bool xDirection,
cudaStream_t stream)
{
// 32 * 32 = 1024; Pascal 架构和 Turing 架构的显卡一般最多为1个block能启动1024个thread;
// 多了则会报错, 核函数会启动失败
dim3 threadPerBlock(32, 32);
dim3 blockPerGrid((src_width + threadPerBlock.x - 1) / threadPerBlock.x,
(src_height + threadPerBlock.y - 1) / threadPerBlock.y);
checkCudaKernel(sobelKernel << <blockPerGrid, threadPerBlock, 0, stream >> > (
src, dst, src_width, src_height, 1, xDirection
));
}
3.2 C++调用 kernel 代码,并且验证效果代码:
#include <memory>
#include <iostream>
#include "opencv2/opencv.hpp"
#include <cassert>
#include "tools/ilogger.hpp"
#include "cuda_runtime.h"
#define Debug 0
#define checkCudaRuntime(call) check_runtime(call, #call, __LINE__, __FILE__)
bool check_runtime(cudaError_t e, const char* call, int line, const char *file){
if (e != cudaSuccess) {
printf("CUDA Runtime error %s # %s, code = %s [ %d ] in file %s:%d\n", call, cudaGetErrorString(e), cudaGetErrorName(e), e, file, line);
return false;
}
return true;
}
/**
* \brief: Sobel算子的gpu实现
* \param: src 输入的灰度图像的gpu数据
* \param: src_width 输入的灰度图像的宽度
* \param: src_height 输入的灰度图像的高度
* \param: dst 提取的边缘的信息, 需要传入一个与输入大小相同的gpu的空间的首地址
* \param: xDirection 是否选择 x 反向的, 如果选择X方向, 则返回的dst也为X方向的边缘信息
* \param: stream cuda任务流
*
* \retval: None
*/
void Sobel(
uint8_t* src, int src_width, int src_height,
uint8_t* dst, bool xDirection,
cudaStream_t stream);
void write_sobel_result(cv::Mat mat, std::string file_name)
{
std::ofstream ofs(file_name);
ofs.setf(std::ios::fixed, std::ios::floatfield); // 设定为 fixed 模式,以小数点表示浮点数
ofs.precision(3); // 设置精度 2
for (int i = 0; i < mat.rows; i++)
{
for (int j = 0; j < mat.cols; j++)
{
ofs << (int)mat.at<u_char>(i, j) << " ";
}
ofs << std::endl;
}
ofs.close();
}
int main()
{
cv::Mat img = cv::imread("workspace/1.jpg", cv::IMREAD_COLOR);
assert(!img.empty());
cv::Mat grayMat;
cv::cvtColor(img, grayMat, cv::COLOR_BGR2GRAY);
write_sobel_result(grayMat, "workspace/grayMat_1.txt");
// 1. Malloc -> Copy -> rgb
uint8_t* pbyImgBufferDev = nullptr; // 200 * 200 * 3
checkCudaRuntime(cudaMalloc(&pbyImgBufferDev, img.rows * img.cols * 3 * sizeof(uint8_t)));
checkCudaRuntime(cudaMemcpy(pbyImgBufferDev, img.data, img.rows * img.cols * 3 * sizeof(uint8_t), cudaMemcpyHostToDevice));
// BGR2GRAY test pass
uint8_t* pbyImgBufferGrayDev = nullptr;
uint8_t* pbyImgBufferGrayHost = nullptr;
pbyImgBufferGrayHost = new uint8_t[img.rows * img.cols]; // 200*200
checkCudaRuntime(cudaMalloc(&pbyImgBufferGrayDev, img.rows * img.cols * sizeof(uint8_t)));
// linestep width height
rgb_to_gray(pbyImgBufferDev, img.step.p[0], img.cols, img.rows, pbyImgBufferGrayDev, nullptr);
checkCudaRuntime(cudaDeviceSynchronize());
checkCudaRuntime(cudaMemcpy(pbyImgBufferGrayHost, pbyImgBufferGrayDev, img.rows * img.cols * sizeof(uint8_t), cudaMemcpyDeviceToHost));
cv::Mat img_gray(img.rows, img.cols, CV_8UC1, pbyImgBufferGrayHost);
#if Debug
write_sobel_result(img_gray, "workspace/grayMat_K1.txt");
cv::imwrite("workspace/1.bmp", img_gray);
#endif
// 保存opencv 的sobel 算子结果
#if 0
cv::Mat Sobel_x, Sobel_y;
cv::Sobel(img_gray, Sobel_x, -1, 1, 0, 3);
std::cout << Sobel_x.type() << std::endl;
write_sobel_result(Sobel_x, "Sobel_x_1.txt");
// cv::imwrite("workspace/1_sobel_x.bmp", Sobel_x);
cv::Sobel(img_gray, Sobel_y, -1, 0, 1, 3);
write_sobel_result(Sobel_y, "Sobel_y_1.txt");
// cv::Mat src;
exit(0);
#endif
typedef uint8_t test_type;
test_type* pbySobelX = nullptr;
checkCudaRuntime(cudaMalloc(&pbySobelX, img.rows * img.cols * sizeof(test_type)));
checkCudaRuntime(cudaMemset(pbySobelX, 0, img.rows * img.cols * sizeof(test_type)));
test_type* pbySobelY = nullptr;
checkCudaRuntime(cudaMalloc(&pbySobelY, img.rows * img.cols * sizeof(test_type)));
checkCudaRuntime(cudaMemset(pbySobelY, 0, img.rows * img.cols * sizeof(test_type)));
// 测试Sobel 算子的性能
while (0)
{
auto t0 = std::chrono::high_resolution_clock::now();
for (int i = 0; i < 45; ++i)
{
Sobel(pbyImgBufferGrayDev, img.size[1], img.size[0], pbySobelX, true, nullptr);
Sobel(pbyImgBufferGrayDev, img.size[1], img.size[0], pbySobelY, false, nullptr);
}
checkCudaRuntime(cudaDeviceSynchronize());
auto t1 = std::chrono::high_resolution_clock::now();
INFO("Sobel time: %f ms", std::chrono::duration_cast<std::chrono::microseconds>(t1 - t0).count() / 1000.0);
}
auto t0 = std::chrono::high_resolution_clock::now();
Sobel(pbyImgBufferGrayDev, img.size[1], img.size[0], pbySobelX, true, nullptr);
Sobel(pbyImgBufferGrayDev, img.size[1], img.size[0], pbySobelY, false, nullptr);
auto t1 = std::chrono::high_resolution_clock::now();
checkCudaRuntime(cudaDeviceSynchronize());
INFO("Sobel time: %f ms", std::chrono::duration_cast<std::chrono::microseconds>(t1 - t0).count() / 1000.0);
// 保存Nvidai Kernel 的sobel 算子结果
cv::Mat SobeX_mat(img.rows, img.cols, CV_8U);
cv::Mat SobeY_mat(img.rows, img.cols, CV_8U);
checkCudaRuntime(cudaMemcpy(SobeX_mat.data, pbySobelX, img.rows * img.cols * sizeof(test_type), cudaMemcpyDeviceToHost));
checkCudaRuntime(cudaMemcpy(SobeY_mat.data, pbySobelY, img.rows * img.cols * sizeof(test_type), cudaMemcpyDeviceToHost));
write_sobel_result(SobeX_mat, "Sobel_dev_x.txt");
write_sobel_result(SobeY_mat, "Sobel_dev_y.txt");
checkCudaRuntime(cudaFree(pbyImgBufferDev));
checkCudaRuntime(cudaFree(pbyImgBufferGrayDev));
delete [] pbyImgBufferGrayHost;
return 0;
}
PS:
如果你觉得我的代码有帮助到你,请关注一波,非常感谢。
使用本教程遇到问题无法解决的话,可以在私信联系我或者在本文章下方进行评论(一般24小时内回复),我会尽我所能帮助你解决问题。