基于CUDA的图像放大快速实现

本文基于双线性插值方法对原图像进行放大,同时利用CUDA架构中纹理内存texture的硬件插值功能来加速整个过程。

图像内插法放大缩小图像的原理和matlab代码见链接:https://blog.csdn.net/Goldfish442/article/details/61933735

本文是基于C+CUDA C实现,具体代码如下:

//Includes, projects
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include "cufft.h"
#include "texture_fetch_functions.h"

//Includes, system
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <windows.h>

texture<float, 2, cudaReadModeElementType> texRef2D;

__global__ void Extension(float* dev_img, float ratio, int h, int w)
{
	int x = threadIdx.x + blockDim.x * blockIdx.x;
	int y = threadIdx.y + blockDim.y * blockIdx.y;

	float yy = (float)y / ratio;
	float xx = (float)x / ratio;

	dev_img[y * w + x] = tex2D(texRef2D, xx + 0.5, yy + 0.5);
}

int main()
{
	char* path_proj = "F:\\lvjian\\process\\proj\\proj";
	char* path_ext = "F:\\lvjian\\process - 2048\\proj\\proj";
	char path_temp[MAX_PATH];

	const int h1 = 1024;
	const int w1 = 1024;
	const int h2 = 2048;
	const int w2 = 2048;

	double ratio = (double)h2 / (double)h1;
	int N = 360;

	FILE* fp;
	float* pImg = (float*)malloc(sizeof(float) * h1 * w1);
	float* pExt = (float*)malloc(sizeof(float) * h2 * w2);
	memset(pExt, 0.0, sizeof(float) * h2 * w2);

	//Device memory allocation
	float* ext_img;
	cudaMalloc((void**)&ext_img, sizeof(float) * h2 * w2);
	cudaMemset(ext_img, 0, sizeof(float) * h2 * w2);

	cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<float>();
	cudaArray *cuArray;
	cudaMallocArray(&cuArray, &channelDesc, w1, h1);
	cudaBindTextureToArray(&texRef2D, cuArray, &channelDesc);

	dim3 blockPerGrid(h2/16, w2/32);
	dim3 threadPerBlock(16, 32);

	for(int i=0;i<N;i++)
	{
		sprintf(path_temp, "%s%04d.img", path_proj, i);
		fp = fopen(path_temp, "rb");
		fread(pImg, sizeof(float), h1 * w1, fp);
		fclose(fp);

		cudaMemcpyToArray(cuArray, 0, 0, pImg, sizeof(float) * w1 * h1, cudaMemcpyHostToDevice);

		Extension<<<blockPerGrid, threadPerBlock>>>(ext_img, ratio, h2, w2);

		cudaMemcpy(pExt, ext_img, sizeof(float) * h2 * w2, cudaMemcpyDeviceToHost);

		sprintf(path_temp, "%s%04d.img", path_ext, i);
		fp = fopen(path_temp, "wb");
		fwrite(pExt, sizeof(float), h2 * w2, fp);
		fclose(fp);

		printf(".........processing proj%04d.img.........\n", i);
	}

	free(pImg);
	free(pExt);
	cudaFree(ext_img);
	system("pause");
	return 0;
}

原始图像尺寸是1024×1024,放大的图像尺寸是2048×2048。首先从内存加载原始图像,然后将其与纹理内存绑定,然后设备端调用Extension这个kernel函数,设置的grid和block大小分别是(128,64)和(16,32)。通过纹理内存的硬件级双线性插值来加速计算过程。

  • 1
    点赞
  • 8
    收藏
    觉得还不错? 一键收藏
  • 2
    评论
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值