基于CUDA的图像放大快速实现

最新推荐文章于 2022-02-18 10:16:36 发布

北冥有鱼1989

最新推荐文章于 2022-02-18 10:16:36 发布

阅读量1.4k

点赞数 1

分类专栏：图像处理文章标签： cuda texture image multiplication

本文链接：https://blog.csdn.net/wangkun7673/article/details/84948652

版权

图像处理专栏收录该内容

5 篇文章 0 订阅

订阅专栏

本文基于双线性插值方法对原图像进行放大，同时利用CUDA架构中纹理内存texture的硬件插值功能来加速整个过程。

图像内插法放大缩小图像的原理和matlab代码见链接：https://blog.csdn.net/Goldfish442/article/details/61933735

本文是基于C+CUDA C实现，具体代码如下:

//Includes, projects
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include "cufft.h"
#include "texture_fetch_functions.h"

//Includes, system
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <windows.h>

texture<float, 2, cudaReadModeElementType> texRef2D;

__global__ void Extension(float* dev_img, float ratio, int h, int w)
{
	int x = threadIdx.x + blockDim.x * blockIdx.x;
	int y = threadIdx.y + blockDim.y * blockIdx.y;

	float yy = (float)y / ratio;
	float xx = (float)x / ratio;

	dev_img[y * w + x] = tex2D(texRef2D, xx + 0.5, yy + 0.5);
}

int main()
{
	char* path_proj = "F:\\lvjian\\process\\proj\\proj";
	char* path_ext = "F:\\lvjian\\process - 2048\\proj\\proj";
	char path_temp[MAX_PATH];

	const int h1 = 1024;
	const int w1 = 1024;
	const int h2 = 2048;
	const int w2 = 2048;

	double ratio = (double)h2 / (double)h1;
	int N = 360;

	FILE* fp;
	float* pImg = (float*)malloc(sizeof(float) * h1 * w1);
	float* pExt = (float*)malloc(sizeof(float) * h2 * w2);
	memset(pExt, 0.0, sizeof(float) * h2 * w2);

	//Device memory allocation
	float* ext_img;
	cudaMalloc((void**)&ext_img, sizeof(float) * h2 * w2);
	cudaMemset(ext_img, 0, sizeof(float) * h2 * w2);

	cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<float>();
	cudaArray *cuArray;
	cudaMallocArray(&cuArray, &channelDesc, w1, h1);
	cudaBindTextureToArray(&texRef2D, cuArray, &channelDesc);

	dim3 blockPerGrid(h2/16, w2/32);
	dim3 threadPerBlock(16, 32);

	for(int i=0;i<N;i++)
	{
		sprintf(path_temp, "%s%04d.img", path_proj, i);
		fp = fopen(path_temp, "rb");
		fread(pImg, sizeof(float), h1 * w1, fp);
		fclose(fp);

		cudaMemcpyToArray(cuArray, 0, 0, pImg, sizeof(float) * w1 * h1, cudaMemcpyHostToDevice);

		Extension<<<blockPerGrid, threadPerBlock>>>(ext_img, ratio, h2, w2);

		cudaMemcpy(pExt, ext_img, sizeof(float) * h2 * w2, cudaMemcpyDeviceToHost);

		sprintf(path_temp, "%s%04d.img", path_ext, i);
		fp = fopen(path_temp, "wb");
		fwrite(pExt, sizeof(float), h2 * w2, fp);
		fclose(fp);

		printf(".........processing proj%04d.img.........\n", i);
	}

	free(pImg);
	free(pExt);
	cudaFree(ext_img);
	system("pause");
	return 0;
}

原始图像尺寸是1024×1024，放大的图像尺寸是2048×2048。首先从内存加载原始图像，然后将其与纹理内存绑定，然后设备端调用Extension这个kernel函数，设置的grid和block大小分别是（128，64）和（16，32）。通过纹理内存的硬件级双线性插值来加速计算过程。