自从进入电气工程,阴差阳错的学习了cuda,在学习的过程中初步了解并行计算的一些思路,
由于我的电脑空间有限,所以我准备记录平时的一些cuda编程的练习,放到博客园上这样方便我
以后的学习,这样也可以为博客园的成长尽一下绵薄之力。接下来进入正题:
图像灰度转换cuda实现:
1 #include "cuda_runtime.h"
2 #include "device_launch_parameters.h"
3 #include <stdio.h>
4 #include<iostream>
5
6 #include <iostream>
7 #include <opencv2/core/core.hpp>
8 #include <opencv2/highgui/highgui.hpp>
9
10 #define CHANNELS 3
11
12 __global__ void colorToGreyScaleConversion(unsigned char *pout, unsigned char *pin, int width, int height) {
13 int Col = blockIdx.x*blockDim.x + threadIdx.x;
14 int Row = blockIdx.y*blockDim.y + threadIdx.y;
15
16 if (Col < width && Row < height) {
17 int greyoffset = Row * width + Col;
18 int rgbOffset = greyoffset * CHANNELS;
19
20 unsigned char r = pin[rgbOffset];
21 unsigned char g = pin[rgbOffset + 1];
22 unsigned char b = pin[rgbOffset + 2];
23
24 pout[greyoffset] = 0.21f*r + 0.71f*g + 0.07f*b;
25 }
26 }
27
28 using namespace cv;
29 int main(void) {
30
31 // 读入一张图片(缩小图)
32 Mat img = imread("E:\\opencv\\lena512color.tiff");
33 // 创建一个名为 "图片"窗口
34 namedWindow("lena");
35 // 在窗口中显示图片
36 imshow("lena", img);
37 // 等待6000 ms后窗口自动关闭
38 waitKey(6000);
39
40 const int imgheight = img.rows;
41 const int imgwidth = img.cols;
42 const int imgchannel = img.channels();
43
44 Mat grayImage(imgheight, imgwidth, CV_8UC1, Scalar(0));
45
46 unsigned char *dev_pin;
47 unsigned char *dev_pout;
48
49 cudaMalloc((void**)&dev_pin, imgheight*imgwidth*imgchannel* sizeof(unsigned char));
50 cudaMalloc((void**)&dev_pout, imgheight*imgwidth*sizeof(unsigned char));
51
52 cudaMemcpy(dev_pin, img.data, imgheight*imgwidth*imgchannel * sizeof(unsigned char), cudaMemcpyHostToDevice);
53
54
55 dim3 BlockDim(16, 16);
56 dim3 GridDim((imgwidth - 1) / BlockDim.x + 1, (imgheight - 1) / BlockDim.y + 1);
57 colorToGreyScaleConversion << <GridDim, BlockDim >> > (dev_pout, dev_pin, imgwidth, imgheight);
58
59 cudaMemcpy(grayImage.data, dev_pout, imgheight*imgwidth*sizeof(unsigned char), cudaMemcpyDeviceToHost);
60
61 cudaFree(dev_pin);
62 cudaFree(dev_pout);
63 imshow("grayImage", grayImage);
64 waitKey(3000);
65 return 0;
66 }
值得注意的是:我是在vs2017平台上实现的,在编程的时候调用了openCV的库函数,读者需要自行配置该环境,为了不让
大家在配置环境时耗费太长时间,我附上CSDN上一位大神的配置教程链接:https://blog.csdn.net/qq_41175905/article/details/80560429
转换前如下图:
转换后如下图:
另外读者也可用Matlab, openCV等库函数实现,比较发现,cuda并行计算的时间要快一些。