1 openCV 简介及安装 + windows10 系统
-
opencv官网下载opencv包,下载的时候经尽量不要选择带*版本的,这些版本还在持续更新中,然后直接双击按照提示进行安装即可,记住安装的位置目录
-
-
环境变量-> Path->新建->将opencv安装目录下的bin文件所在路径复制到新建的环境变量里面,也就是对应的第4步,我的安装路径如下:
F:\Opencv\opencv\build\x64\vc15\bin
-》应用(选vc14还是15和你安装的版本有关系)
-
配置VS环境-》项目属性》
-
包含目录:将两个路径添加进去
F:\Opencv\opencv\build\include\opencv2
和F:\Opencv\opencv\build\include
-
库目录:在下面路径里面
F:\Opencv\opencv\build\x64\vc15\lib
添加到库目录里面
点黄色的地方就可以进行编辑
- C/C++ ->链接期->t附加依赖项目-》将目录
F:\Opencv\opencv\build\x64\vc15\lib
下I的opencv_world410d,lib
添加进入环境
2 openCV使用
3 图像灰度化和边缘提取 cpu实现呵GPU实现
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <stdio.h>
#include <opencv2/opencv.hpp>
#include <iostream>
#include<vector>
using namespace std;
using namespace cv;
#define TILE 16
const int KERNEL_RADIUS = 1; // 卷积核的半径
//stride_input输入图像的步长,stride_output:输出图像的步长
// 卷积核写入GPU常量内存中
__constant__ int KERNEL[2 * KERNEL_RADIUS + 1][2 * KERNEL_RADIUS + 1] =
{ 1, 1, 1,
1, -8, 1,
1, 1, 1 };
__global__ void edge_kernel(unsigned char* input, unsigned char* output, int width, int height, int stride_input, int stride_output)
{
/* 每个像素对应一个线程,获取对应线程的x,y坐标,映射到图像上刚好对应图像的索引
每个像素对应一个线程,获取对应线程的x,y坐标,映射到图像上刚好对应图像的索引*/
int x = blockDim.x * blockIdx.x + threadIdx.x;
int y = blockDim.y * blockIdx.y + threadIdx.y;
int maskSize = 3;
int channel = stride_input / width; //如果等于1表示是灰度图,如果等于3是RGB图
float temp = 0;
if (x > 0 && x < width-1 && y > 0 && y < height-1 && channel == 1)
{
for (int i = 0; i < maskSize; i++) {
for (int j = 0; j < maskSize; j++) {
temp += input[(y+j)* width + x + i] * KERNEL[j][i];
}
}
output[y * stride_output + x] = temp;
}
}
int main()
{
Mat frame = imread("I:\\ONE_grade\\SZ_code\\class\\CudaRuntime2\\bg.jpg");
cout << frame.cols << " " << frame.rows << " " << frame.step << ",static_cast<int>(frame.step) = " << static_cast<int>(frame.step) << endl;
Mat grey, edge_gpu(frame.rows, frame.cols, CV_8UC1); //8bite, 灰度图是1通道
Mat Edge_pic;
//********************************** CPU掩膜实现边缘提取 ****************************
//灰度化
cvtColor(frame, grey, COLOR_BGR2GRAY);
clock_t time_start, time_end;
time_start = clock();
//边缘提取
Mat mask = (Mat_<char>(3, 3) << -1, -1, -1, -1, 8, -1, -1, -1, -1);
filter2D(grey, Edge_pic, grey.depth(), mask);
time_end = clock();
cout << " the cpu time = " << time_end - time_start << endl;
//**************************** GPU掩膜实现边缘提取 ****************************
//分配GPU空间
unsigned char* input, * output;
cudaMalloc((void**)&input, static_cast<int>(grey.step) * grey.rows * sizeof(unsigned char));
cudaMalloc((void**)&output, static_cast<int>(edge_gpu.step) * edge_gpu.rows * sizeof(unsigned char));
//将数据从cpu拷贝到GPU
cudaMemcpy(input, grey.data, static_cast<int>(grey.step) * grey.rows * sizeof(unsigned char), cudaMemcpyHostToDevice);
time_start = clock();
//gpu上处理数据
dim3 blockdim(TILE, TILE);
//下面是一种设置技巧,能刚好设置一个线程处理一个像素,尽可能减少浪费
dim3 griddim((edge_gpu.cols + TILE - 1) / TILE, (edge_gpu.rows + TILE - 1) / TILE);
edge_kernel << <griddim, blockdim >> > (input, output, grey.cols, grey.rows, static_cast<int>(grey.step), static_cast<int>(edge_gpu.step));
//将GPU上处理完毕的数据从GPU拷贝到CPU
cudaMemcpy(edge_gpu.data, output, static_cast<int>(edge_gpu.step) * edge_gpu.rows * sizeof(unsigned char), cudaMemcpyDeviceToHost);
cudaDeviceSynchronize();
time_end = clock();
cout << " the GPU time = " << time_end - time_start << endl;
namedWindow("grey", WINDOW_NORMAL); //给图片命名并设置可以随意调整大小
imshow("grey", grey);
namedWindow("cpu result", WINDOW_NORMAL); //给图片命名并设置可以随意调整大小
imshow("cpu result", Edge_pic);
namedWindow("edge_gpu", WINDOW_NORMAL); //给图片命名并设置可以随意调整大小
imshow("edge_gpu", edge_gpu);
//如果想要实现按到ESC才退出,
while (true)
{
int key = waitKey();
if (key == 27)
break;
}
return 0;
}
- 灰度图和BGR图在openCVL里面的存储方式如下,BGR是连续3个点表示一个数据点的值,3个点分别是BGR
- 灰度图
-
RGB
-
-
一些特殊情况,补的哪些位置都是不记录任何信息的,他可以是任意值,但不会影响图像
4 图像直方图均衡化和图像增强
5 BGR图转灰度图
(1)常用的openCV处理图像的命令
- 获取灰度图某点的像素值示例
(2)BGR图转灰度图,对比openCV自带库函数和GPU转换之间的速度
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <stdio.h>
#include <opencv2/opencv.hpp>
#include <iostream>
#include<vector>
using namespace std;
using namespace cv;
#define TILE 16
//stride_input输入图像的步长,stride_output:输出图像的步长
__global__ void bgr2grey_kernel(unsigned char *input, unsigned char *output, int width, int height, int stride_input, int stride_output)
{
//每个像素对应一个线程,获取对应线程的x,y坐标,映射到图像上刚好对应图像的索引
int x = blockDim.x * blockIdx.x + threadIdx.x;
int y = blockDim.y * blockIdx.y + threadIdx.y;
int channel = stride_input / width; //如果等于1表示是灰度图,如果等于3是RGB图
if(x < width && y < height)
{
if (channel == 1) output[y * width + x] = input[y * width + x];
if (channel == 3) output[y * stride_output + x] = input[y * stride_input + x*3+0] * 0.114 +
input[y * stride_input + x * 3 + 1] * 0.587 +
input[y * stride_input + x * 3 + 2] * 0.299;
}
}
int main()
{
Mat frame = imread("I:\\ONE_grade\\SZ_code\\class\\CudaRuntime2\\bg.jpg");
cout << frame.cols << " " << frame.rows << " " << frame.step << ",static_cast<int>(frame.step) = " << static_cast<int>(frame.step)<<endl;
//---------------------------------------------------
clock_t time_start, time_end;
time_start = clock();
Mat grey, grey_gpu(frame.rows, frame.cols, CV_8UC1); //8bite, 灰度图是1通道
cvtColor(frame, grey, COLOR_BGR2GRAY); //灰度化
time_end = clock();
cout << " the cpu time = " << time_end - time_start << endl;
//---------------------------------------------------
//分配GPU空间
unsigned char* input, * output;
cudaMalloc((void**)&input, static_cast<int>(frame.step) * frame.rows * sizeof(unsigned char));
cudaMalloc((void**)&output, static_cast<int>(grey_gpu.step) * grey_gpu.rows * sizeof(unsigned char));
//将数据从cpu拷贝到GPU
cudaMemcpy(input, frame.data, static_cast<int>(frame.step) * frame.rows * sizeof(unsigned char), cudaMemcpyHostToDevice);
time_start = clock();
//gpu上处理数据
dim3 blockdim(TILE, TILE);
//下面是一种设置技巧,能刚好设置一个线程处理一个像素,尽可能减少浪费
dim3 griddim((grey_gpu.cols + TILE - 1) / TILE, (grey_gpu.rows + TILE - 1) / TILE);
bgr2grey_kernel << <griddim, blockdim >> > (input, output, frame.cols, frame.rows, static_cast<int>(frame.step), static_cast<int>(grey_gpu.step));
//将GPU上处理完毕的数据从GPU拷贝到CPU
cudaMemcpy( grey_gpu.data, output, static_cast<int>(grey_gpu.step) * grey_gpu.rows * sizeof(unsigned char), cudaMemcpyDeviceToHost);
cudaDeviceSynchronize();
time_end = clock();
cout << " the GPU time = " << time_end - time_start << endl;
namedWindow("grey", WINDOW_NORMAL); //给图片命名并设置可以随意调整大小
imshow("grey", grey);
namedWindow("grey_gpu", WINDOW_NORMAL); //给图片命名并设置可以随意调整大小
imshow("grey_gpu", grey_gpu);
//如果想要实现按到ESC才退出,
while (true)
{
int key = waitKey();
if (key == 27)
break;
}
return 0;
}
6 图片的resize
7 图像直方图增强
- 有bug,待调试。。。。。。。!!!!!!!!!!!!!!!!!
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <stdio.h>
#include <opencv2/opencv.hpp>
#include <iostream>
#include<vector>
using namespace std;
using namespace cv;
#define TILE 16
//step1:统计各个通道的灰度直方图
__global__ void hist_cal_kernel(unsigned char* input, int* hist, int width, int height, int stride)
{
//每个像素对应一个线程,获取对应线程的x,y坐标,映射到图像上刚好对应图像的索引
int x = blockDim.x * blockIdx.x + threadIdx.x;
int y = blockDim.y * blockIdx.y + threadIdx.y;
int z = blockDim.z;
int channels = stride / width;
extern __shared__ int hist_S[];
//获取当前线程在线程块里面的ID,共享内存只能在block里面同步,16*16的一个线程块,刚好能存储长度为256的hist
int tid = threadIdx.y * blockDim.x + threadIdx.x;
hist_S[tid] = 0; //初始化为0
__syncthreads();
if (x < width && y < height && z < channels)
{
int value = input[y * stride + x * channels + z];
atomicAdd(&hist_S[value],1);
}
__syncthreads();
//动态内存的大小是256*frame.channels()*sizeof(int) ,也就是总的有256*3个数据
//红。绿,蓝的顺序,前256是红通道的数据,依次类推
atomicAdd(&hist[z * 256 + tid], hist_S[tid]);
}
//step2:求出总点数
__global__ void hist_sum(int *hist)
{
//这里相当于是一个block一个block的去完成各自的累加
int tid = threadIdx.y * blockDim.x + threadIdx.x;
//这里blockIdx.x相当于通道数,因为核函数调用的时候,对256*3个数设置的是3个256的一维的block,所以blockID相当于通道数
int x = blockIdx.x;
__shared__ int hist_S[256];//调用了共享内存但是没有调用动态内存
hist_S[tid] = hist[256*x + tid]; //初始化为0
__syncthreads();
for (int s = 128; s > 0; s >> 1)
{
if (tid < s)
{
hist_S[tid] += hist[s + tid];
}
__syncthreads();
}
//计算Fi
int total = hist_S[0];
if (tid == 0)
{
int sum = 0;
for (int i = 0; i < 256; i++)
{
sum += hist[x * 256 + i];
hist[x * 256 + i] = (int)((float)255 * sum / total);
}
}
}
//step3:直方图均衡化
__global__ void hist_trans_kernel(unsigned char* input, unsigned char* output, int* hist, int width, int height, int stride)
{
int x = blockDim.x * blockIdx.x + threadIdx.x;
int y = blockDim.y * blockIdx.y + threadIdx.y;
int z = blockDim.z;
int channels = stride/ width;
if (x < width && y < height && z < channels)
{
int value = input[y * stride + x * channels + z];
int F_value = hist[z * 256 + value];
output[y * stride + channels * x + z] = F_value;
}
}
int main()
{
Mat frame = imread("I:\\ONE_grade\\SZ_code\\class\\CudaRuntime2\\bg.jpg");
cout << frame.cols << " " << frame.rows << " " << frame.step << ",static_cast<int>(frame.step) = " << static_cast<int>(frame.step)<<endl;
// ******************************** 调用openCV对RGB图像做直方图均衡化 ********************************
Mat frame_hist_cpu, feame_hist_gpu(frame.rows, frame.cols, CV_8UC3);
clock_t time_start, time_end;
time_start = clock();
vector<Mat> bgr_channels;
split(frame, bgr_channels); //调用openCV自带函数将图像的三个通道分开
for (int i = 0; i < frame.channels(); i++)
{
equalizeHist(bgr_channels[i], bgr_channels[i]);
}
merge(bgr_channels, frame_hist_cpu);
time_end = clock();
cout << " the CPU time = " << time_end - time_start << endl;
// ******************************** 调用GPU对RGB图像做直方图均衡化 ********************************
//分配GPU空间
unsigned char* input, * output;
int* hist;
cudaMalloc((void**)&input, static_cast<int>(frame.step) * frame.rows * sizeof(unsigned char));
cudaMalloc((void**)&output, static_cast<int>(feame_hist_gpu.step) * feame_hist_gpu.rows * sizeof(unsigned char));
cudaMalloc((void**)&hist, frame.channels()*256*sizeof(int));
//将数据从cpu拷贝到GPU
cudaMemcpy(input, frame.data, static_cast<int>(frame.step) * frame.rows * sizeof(unsigned char), cudaMemcpyHostToDevice);
time_start = clock();
//gpu上处理数据
dim3 blockdim(TILE, TILE);
//下面是一种设置技巧,能刚好设置一个线程处理一个像素,尽可能减少浪费
dim3 griddim((feame_hist_gpu.cols + TILE - 1) / TILE, (feame_hist_gpu.rows + TILE - 1) / TILE, feame_hist_gpu.channels());
//调用了共享内存,所以参数有3个
hist_cal_kernel << <griddim, blockdim,256*frame.channels()*sizeof(int) >> > (input, hist, frame.cols, frame.rows, static_cast<int>(frame.step));
hist_sum << <frame.channels(), 256>> > (hist);
hist_trans_kernel << < griddim, blockdim >> > (input, output, hist, frame.cols, frame.rows, static_cast<int>(frame.step));
将GPU上处理完毕的数据从GPU拷贝到CPU
cudaMemcpy(feame_hist_gpu.data, output, static_cast<int>(feame_hist_gpu.step) * feame_hist_gpu.rows * sizeof(unsigned char), cudaMemcpyDeviceToHost);
time_end = clock();
cout << " the GPU time = " << time_end - time_start << endl;
namedWindow("原图", WINDOW_NORMAL); //给图片命名并设置可以随意调整大小
imshow("原图", frame);
namedWindow("CPU 直方图均衡化的图", WINDOW_NORMAL); //给图片命名并设置可以随意调整大小
imshow("CPU 直方图均衡化的图", frame_hist_cpu);
namedWindow("GPU 直方图均衡化的图", WINDOW_NORMAL); //给图片命名并设置可以随意调整大小
imshow("GPU 直方图均衡化的图", feame_hist_gpu);
//如果想要实现按到ESC才退出,
while (true)
{
int key = waitKey();
if (key == 27)
break;
}
return 0;
}
8 BGR转换成RGB
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <stdio.h>
#include <opencv2/opencv.hpp>
#include <iostream>
#include<vector>
using namespace std;
using namespace cv;
#define TILE 16
__global__ void bgrTOrgb_kernel(unsigned char* input, unsigned char* output, int width, int height, int stride_input, int stride_output)
{
/* 每个像素对应一个线程,获取对应线程的x,y坐标,映射到图像上刚好对应图像的索引
每个像素对应一个线程,获取对应线程的x,y坐标,映射到图像上刚好对应图像的索引*/
int x = blockDim.x * blockIdx.x + threadIdx.x;
int y = blockDim.y * blockIdx.y + threadIdx.y;
int z = blockDim.z;
//int channels = stride_input / width; //如果等于1表示是灰度图,如果等于3是RGB图
//
//if (x < width && y < height && z < channels && channels == 3)
//{
// int bgr_temp = input[y * stride_input + x * channels + z];
// output[y * stride_output + x * channels + (2- z)] = bgr_temp;
//}
int index_in = (y * width + x) * 3;
int index_out = (y * width + x) * 3;
if (x < width && y < height) {
// BRG to RGB
output[index_out] = input[index_in + 2]; // R
output[index_out + 1] = input[index_in + 1]; // G
output[index_out + 2] = input[index_in]; // B
}
}
int main()
{
Mat frame = imread("I:\\ONE_grade\\SZ_code\\class\\CudaRuntime2\\bg.jpg");
cout << frame.cols << " " << frame.rows << " " << frame.step << ",static_cast<int>(frame.step) = " << static_cast<int>(frame.step) << endl;
Mat pic_cpu, pic_gpu(frame.rows, frame.cols, CV_8UC3); //8bite, 灰度图是1通道
//********************************** CPU BGR转RGB ****************************
clock_t time_start, time_end;
time_start = clock();
cvtColor(frame, pic_cpu, COLOR_BGR2RGB);
time_end = clock();
cout << " the cpu time = " << time_end - time_start << endl;
**************************** GPU掩膜实现边缘提取 ****************************
// //分配GPU空间
unsigned char* input, * output;
cudaMalloc((void**)&input, static_cast<int>(frame.step) * frame.rows * sizeof(unsigned char));
cudaMalloc((void**)&output, static_cast<int>(pic_gpu.step) * pic_gpu.rows * sizeof(unsigned char));
//将数据从cpu拷贝到GPU
cudaMemcpy(input, frame.data, static_cast<int>(frame.step) * frame.rows * sizeof(unsigned char), cudaMemcpyHostToDevice);
time_start = clock();
//gpu上处理数据
dim3 blockdim(TILE, TILE);
//下面是一种设置技巧,能刚好设置一个线程处理一个像素,尽可能减少浪费
dim3 griddim((pic_gpu.cols + TILE - 1) / TILE, (pic_gpu.rows + TILE - 1) / TILE);
bgrTOrgb_kernel << <griddim, blockdim >> > (input, output, pic_gpu.cols, pic_gpu.rows, static_cast<int>(frame.step), static_cast<int>(pic_gpu.step));
//将GPU上处理完毕的数据从GPU拷贝到CPU
cudaMemcpy(pic_gpu.data, output, static_cast<int>(pic_gpu.step) * pic_gpu.rows * sizeof(unsigned char), cudaMemcpyDeviceToHost);
cudaDeviceSynchronize();
time_end = clock();
cout << " the GPU time = " << time_end - time_start << endl;
namedWindow("原图", WINDOW_NORMAL); //给图片命名并设置可以随意调整大小
imshow("原图", frame);
namedWindow("cpu result", WINDOW_NORMAL); //给图片命名并设置可以随意调整大小
imshow("cpu result", pic_cpu);
namedWindow("gpu result", WINDOW_NORMAL); //给图片命名并设置可以随意调整大小
imshow("gpu result", pic_gpu);
//如果想要实现按到ESC才退出,
while (true)
{
int key = waitKey();
if (key == 27)
break;
}
return 0;
}