本文基于双线性插值方法对原图像进行放大,同时利用CUDA架构中纹理内存texture的硬件插值功能来加速整个过程。
图像内插法放大缩小图像的原理和matlab代码见链接:https://blog.csdn.net/Goldfish442/article/details/61933735
本文是基于C+CUDA C实现,具体代码如下:
//Includes, projects
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include "cufft.h"
#include "texture_fetch_functions.h"
//Includes, system
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <windows.h>
texture<float, 2, cudaReadModeElementType> texRef2D;
__global__ void Extension(float* dev_img, float ratio, int h, int w)
{
int x = threadIdx.x + blockDim.x * blockIdx.x;
int y = threadIdx.y + blockDim.y * blockIdx.y;
float yy = (float)y / ratio;
float xx = (float)x / ratio;
dev_img[y * w + x] = tex2D(texRef2D, xx + 0.5, yy + 0.5);
}
int main()
{
char* path_proj = "F:\\lvjian\\process\\proj\\proj";
char* path_ext = "F:\\lvjian\\process - 2048\\proj\\proj";
char path_temp[MAX_PATH];
const int h1 = 1024;
const int w1 = 1024;
const int h2 = 2048;
const int w2 = 2048;
double ratio = (double)h2 / (double)h1;
int N = 360;
FILE* fp;
float* pImg = (float*)malloc(sizeof(float) * h1 * w1);
float* pExt = (float*)malloc(sizeof(float) * h2 * w2);
memset(pExt, 0.0, sizeof(float) * h2 * w2);
//Device memory allocation
float* ext_img;
cudaMalloc((void**)&ext_img, sizeof(float) * h2 * w2);
cudaMemset(ext_img, 0, sizeof(float) * h2 * w2);
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<float>();
cudaArray *cuArray;
cudaMallocArray(&cuArray, &channelDesc, w1, h1);
cudaBindTextureToArray(&texRef2D, cuArray, &channelDesc);
dim3 blockPerGrid(h2/16, w2/32);
dim3 threadPerBlock(16, 32);
for(int i=0;i<N;i++)
{
sprintf(path_temp, "%s%04d.img", path_proj, i);
fp = fopen(path_temp, "rb");
fread(pImg, sizeof(float), h1 * w1, fp);
fclose(fp);
cudaMemcpyToArray(cuArray, 0, 0, pImg, sizeof(float) * w1 * h1, cudaMemcpyHostToDevice);
Extension<<<blockPerGrid, threadPerBlock>>>(ext_img, ratio, h2, w2);
cudaMemcpy(pExt, ext_img, sizeof(float) * h2 * w2, cudaMemcpyDeviceToHost);
sprintf(path_temp, "%s%04d.img", path_ext, i);
fp = fopen(path_temp, "wb");
fwrite(pExt, sizeof(float), h2 * w2, fp);
fclose(fp);
printf(".........processing proj%04d.img.........\n", i);
}
free(pImg);
free(pExt);
cudaFree(ext_img);
system("pause");
return 0;
}
原始图像尺寸是1024×1024,放大的图像尺寸是2048×2048。首先从内存加载原始图像,然后将其与纹理内存绑定,然后设备端调用Extension这个kernel函数,设置的grid和block大小分别是(128,64)和(16,32)。通过纹理内存的硬件级双线性插值来加速计算过程。