部分cuda的三维cubic B样条插值代码如下:
#ifndef _CUDA_CUBIC3D_H_
#define _CUDA_CUBIC3D_H_
#include "bSplineKernel.cu"
//! Tricubic interpolated texture lookup, using unnormalized coordinates.
//! Straight forward implementation, using 64 nearest neighbour lookups.
//! @param tex 3D texture
//! @param coord unnormalized 3D texture coordinate
template<class T, enum cudaTextureReadMode mode>
__device__ float cubicTex3DSimple(texture<T, 3, mode> tex, float3 coord)
{
// transform the coordinate from [0,extent] to [-0.5, extent-0.5]
const float3 coord_grid = coord - 0.5f;
float3 index = floor(coord_grid);
const float3 fraction = coord_grid - index;
index = index + 0.5f; //move from [-0.5, extent-0.5] to [0, extent]
float result = 0.0f;
for (float z=-1; z < 2.5f; z++) //range [-1, 2]
{
float bsplineZ = bspline(z-fraction.z);
float w = index.z + z;
for (float y=-1; y < 2.5f; y++)
{
float bsplineYZ = bspline(y-fraction.y) * bsplineZ;
float v = index.y + y;
for (float x=-1; x < 2.5f; x++)
{float bsplineXYZ = bspline(x-fraction.x) * bsplineYZ;
float u = index.x + x;
result += bsplineXYZ * tex3D(tex, u, v, w);
}
}
}
return result;
}
//! Tricubic interpolated texture lookup, using unnormalized coordinates.
//! Fast implementation, using 8 trilinear lookups.
//! @param tex 3D texture
//! @param coord unnormalized 3D texture coordinate
template<class T, enum cudaTextureReadMode mode>
__device__ float cubicTex3D(texture<T, 3, mode> tex, float3 coord)
{
// shift the coordinate from [0,extent] to [-0.5, extent-0.5]
const float3 coord_grid = coord - 0.5f;
const float3 index = floor(coord_grid);
const float3 fraction = coord_grid - index;
float3 w0, w1, w2, w3;
//bspline_weights_1st_derivative_x(fraction, w0, w1, w2, w3);
//bspline_weights_1st_derivative_y(fraction, w0, w1, w2, w3);
//bspline_weights_1st_derivative_z(fraction, w0, w1, w2, w3);
bspline_weights(fraction, w0, w1, w2, w3);
const float3 g0 = w0 + w1;
const float3 g1 = w2 + w3;
const float3 h0 = (w1 / g0) - 0.5f + index; //h0 = w1/g0 - 1, move from [-0.5, extent-0.5] to [0, extent]
const float3 h1 = (w3 / g1) + 1.5f + index; //h1 = w3/g1 + 1, move from [-0.5, extent-0.5] to [0, extent]
// fetch the eight linear interpolations
// weighting and fetching is interleaved for performance and stability reasons
float tex000 = tex3D(tex, h0.x, h0.y, h0.z);
float tex100 = tex3D(tex, h1.x, h0.y, h0.z);
tex000 = g0.x * tex000 + g1.x * tex100; //weigh along the x-direction
float tex010 = tex3D(tex, h0.x, h1.y, h0.z);
float tex110 = tex3D(tex, h1.x, h1.y, h0.z);
tex010 = g0.x * tex010 + g1.x * tex110; //weigh along the x-direction
tex000 = g0.y * tex000 + g1.y * tex010; //weigh along the y-direction
float tex001 = tex3D(tex, h0.x, h0.y, h1.z);
float tex101 = tex3D(tex, h1.x, h0.y, h1.z);
tex001 = g0.x * tex001 + g1.x * tex101; //weigh along the x-direction
float tex011 = tex3D(tex, h0.x, h1.y, h1.z);
float tex111 = tex3D(tex, h1.x, h1.y, h1.z);
tex011 = g0.x * tex011 + g1.x * tex111; //weigh along the x-direction
tex001 = g0.y * tex001 + g1.y * tex011; //weigh along the y-direction
return (g0.z * tex000 + g1.z * tex001); //weigh along the z-direction
}
#endif
//name: bSplineKernel.cu
// Cubic B-spline function
inline __host__ __device__ float bspline(float t)
{
t = fabs(t);
const float a = 2.0f - t;
if (t < 1.0f) return 2.0f/3.0f - 0.5f*t*t*a;
else if (t < 2.0f) return a*a*a / 6.0f;
else return 0.0f;
}
template<class T> inline __device__ void bspline_weights(T fraction, T& w0, T& w1, T& w2, T& w3)
{
const T one_frac = 1.0f - fraction;
const T squared = fraction * fraction;
const T one_sqd = one_frac * one_frac;
w0 = 1.0f/6.0f * one_sqd * one_frac;
w1 = 2.0f/3.0f - 0.5f * squared * (2.0f-fraction);
w2 = 2.0f/3.0f - 0.5f * one_sqd * (2.0f-one_frac);
w3 = 1.0f/6.0f * squared * fraction;
}
#endif
参考文献:
[1] Philippe Thévenaz, Thierry Blu, Michael Unser.Interpolation Revisited. IEEE. 2000.
[2] Daniel Ruijters, Philippe Thévenaz. GPU Prefilter for Accurate Cubic B-spline Interpolation. 2010.
[3] Christian Sigg. Markus Hadwiger. Fast Third-Order Texture Filtering. 2005.
[4] Daniel Ruijters. Bart M. ter Haar Romeny. Paul Suetens. Efficient GPU-Based Texture
Interpolation using Uniform B-Splines. 2008.