1、最近邻插值
1)原理
最简单的插值方式,目标图像上(des_x,des_ y)处的像素值等于:坐标转换到源图像(src_x, xrc_y),距离最近的坐标点像素值。
2)源码
tensorflow实现最近邻的源码如下:
template <typename T, bool half_pixel_centers, bool align_corners>
struct ResizeNearestNeighbor<CPUDevice, T, half_pixel_centers, align_corners> {
bool operator()(const CPUDevice& d, typename TTypes<T, 4>::ConstTensor input,
const float height_scale, const float width_scale,
typename TTypes<T, 4>::Tensor output) {
typename BoolToScaler<half_pixel_centers>::Scaler scaler;
const Eigen::Index batch_size = input.dimension(0);
const Eigen::Index in_height = input.dimension(1);
const Eigen::Index in_width = input.dimension(2);
const Eigen::Index channels = input.dimension(3);
const Eigen::Index out_height = output.dimension(1);
const Eigen::Index out_width = output.dimension(2);
for (Eigen::Index b = 0; b < batch_size; ++b) {
for (Eigen::Index y = 0; y < out_height; ++y) {
Eigen::Index in_y = std::min(
(align_corners)
? static_cast<Eigen::Index>(roundf(scaler(y, height_scale)))
: static_cast<Eigen::Index>(floorf(scaler(y, height_scale))),
in_height - 1);
if (half_pixel_centers) {
in_y = std::max(static_cast<Eigen::Index>(0), in_y);
}
for (Eigen::Index x = 0; x < out_width; ++x) {
Eigen::Index in_x = std::min(
(align_corners)
? static_cast<Eigen::Index>(roundf(scaler(x, width_scale)))
: static_cast<Eigen::Index>(floorf(scaler(x, width_scale))),
in_width - 1);
if (half_pixel_centers) {
in_x = std::max(static_cast<Eigen::Index>(0), in_x);
}
std::copy_n(&input(b, in_y, in_x, 0), channels, &output(b, y, x, 0));
}
}
}
return true;
}
};
struct HalfPixelScalerForNN {
inline float operator()(const int x, const float scale) const {
return (static_cast<float>(x) + 0.5f) * scale;
}
};
template <>
struct BoolToScaler<true> {
typedef HalfPixelScalerForNN Scaler;
};
struct LegacyScaler {
LegacyScaler(){};
inline float operator()(const int x, const float scale) const {
return static_cast<float>(x) * scale;
}
};
template <>
struct BoolToScaler<false> {
typedef LegacyScaler Scaler;
};
3)tensorflow和pytorch结果:
输入的Tensor:
tensor([[[[1., 2., 3.],
[4., 5., 6.],
[7., 8., 9.]]]])
输出的结果:
torch结果:
[[1., 1., 2., 2., 3., 3.],
[1., 1., 2., 2., 3., 3.],
[4., 4., 5., 5., 6., 6.],
[4., 4., 5., 5., 6., 6.],
[7., 7., 8., 8., 9., 9.],
[7., 7., 8., 8., 9., 9.]]
tf结果:
tf.Tensor(
[[1. 1. 2. 2. 3. 3.]
[1. 1. 2. 2. 3. 3.]
[4. 4. 5. 5. 6. 6.]
[4. 4. 5. 5. 6. 6.]
[7. 7. 8. 8. 9. 9.]
[7. 7. 8. 8. 9. 9.]], shape=(6, 6), dtype=float32)
torch和tf最近邻的结果是相同的
4)优缺点:
优点:算法简单,易于实现,计算量小,运算速度较快
缺点:采用取最近点像素的值,造成像素的变化不连续,在目标图中边缘会产生锯齿
2、双线性插值
1)原理
双线性插值,又称为双线性内插。在数学上,双线性插值是有两个变量的插值函数的线性插值扩展,其核心思想是在两个方向分别进行一次线性插值。
先说一下线性插值:
已知数据 (x0, y0) 与 (x1, y1),要计算 [x0, x1] 区间内某一位置 x 在直线上的y值。
y值的计算是:用x和x0,x1的距离作为一个权重,y0和y1是加权值。双线性插值就是在x和y两个方向上做插值。
双线性插值:
假设我们已知 Q11 = (x1, y1)、Q12 = (x1, y2), Q21 = (x2, y1) 以及 Q22 = (x2, y2) 四个点的值。我们想得到在点 P = (x, y) 的像素值。
首先在 x 方向进行线性插值,得到:
然后在 y 方向进行线性插值,得到:
上面三个公式联合起来,得到:
令x-x1=u,y2-y=v, 上面的公式可以转换成:
2)源码:
template <typename T>
struct ResizeBilinear<CPUDevice, T> {
void operator()(const CPUDevice& d, typename TTypes<T, 4>::ConstTensor images,
const float height_scale, const float width_scale,
bool half_pixel_centers,
typename TTypes<float, 4>::Tensor output) {
const int batch_size = images.dimension(0);
const int64 in_height = images.dimension(1);
const int64 in_width = images.dimension(2);
const int channels = images.dimension(3);
const int64 out_height = output.dimension(1);
const int64 out_width = output.dimension(2);
// Handle no-op resizes efficiently.
if (out_height == in_height && out_width == in_width) {
output = images.template cast<float>();
return;
}
std::vector<CachedInterpolation> ys(out_height + 1);
std::vector<CachedInterpolation> xs(out_width + 1);
if (half_pixel_centers) {
compute_interpolation_weights(HalfPixelScaler(), out_height, in_height,
height_scale, ys.data());
compute_interpolation_weights(HalfPixelScaler(), out_width, in_width,
width_scale, xs.data());
} else {
// Compute the cached interpolation weights on the x and y dimensions.
compute_interpolation_weights(LegacyScaler(), out_height, in_height,
height_scale, ys.data());
compute_interpolation_weights(LegacyScaler(), out_width, in_width,
width_scale, xs.data());
}
// Scale x interpolation weights to avoid a multiplication during iteration.
for (int i = 0; i < xs.size(); ++i) {
xs[i].lower *= channels;
xs[i].upper *= channels;
}
resize_image<T>(images, batch_size, in_height, in_width, out_height,
out_width, channels, xs, ys, output);
}
};
template <typename Scaler>
inline void compute_interpolation_weights(const Scaler scaler,
const int64 out_size,
const int64 in_size,
const float scale,
CachedInterpolation* interpolation) {
interpolation[out_size].lower = 0;
interpolation[out_size].upper = 0;
for (int64 i = out_size - 1; i >= 0; --i) {
const float in = scaler(i, scale);
const float in_f = std::floor(in);
interpolation[i].lower =
std::max(static_cast<int64>(in_f), static_cast<int64>(0));
interpolation[i].upper =
std::min(static_cast<int64>(std::ceil(in)), in_size - 1);
interpolation[i].lerp = in - in_f;
}
}
struct HalfPixelScaler {
HalfPixelScaler(){};
inline float operator()(const int x, const float scale) const {
// Note that we subtract 0.5 from the return value, as the existing bilinear
// sampling code etc assumes pixels are in the old coordinate system.
return (static_cast<float>(x) + 0.5f) * scale - 0.5f;
}
};
struct LegacyScaler {
LegacyScaler(){};
inline float operator()(const int x, const float scale) const {
return static_cast<float>(x) * scale;
}
};
3)tensorflow和pytorch结果:
torch:
align_corners=False
tensor([[[[1.0000, 1.2500, 1.7500, 2.2500, 2.7500, 3.0000],
[1.7500, 2.0000, 2.5000, 3.0000, 3.5000, 3.7500],
[3.2500, 3.5000, 4.0000, 4.5000, 5.0000, 5.2500],
[4.7500, 5.0000, 5.5000, 6.0000, 6.5000, 6.7500],
[6.2500, 6.5000, 7.0000, 7.5000, 8.0000, 8.2500],
[7.0000, 7.2500, 7.7500, 8.2500, 8.7500, 9.0000]]]])
align_corners=True
tensor([[[[1.0000, 1.4000, 1.8000, 2.2000, 2.6000, 3.0000],
[2.2000, 2.6000, 3.0000, 3.4000, 3.8000, 4.2000],
[3.4000, 3.8000, 4.2000, 4.6000, 5.0000, 5.4000],
[4.6000, 5.0000, 5.4000, 5.8000, 6.2000, 6.6000],
[5.8000, 6.2000, 6.6000, 7.0000, 7.4000, 7.8000],
[7.0000, 7.4000, 7.8000, 8.2000, 8.6000, 9.0000]]]])
tensorflow
align_corners=False
tf.Tensor(
[[1. 1.5 2. 2.5 3. 3. ]
[2.5 3. 3.5 4. 4.5 4.5]
[4. 4.5 5. 5.5 6. 6. ]
[5.5 6. 6.5 7. 7.5 7.5]
[7. 7.5 8. 8.5 9. 9. ]
[7. 7.5 8. 8.5 9. 9. ]], shape=(6, 6), dtype=float32)
align_corners=True
tf.Tensor(
[[1. 1.4 1.8 2.2 2.6 3. ]
[2.2 2.6 3. 3.4 3.8 4.2 ]
[3.4 3.8000002 4.2000003 4.6 5. 5.4 ]
[4.6000004 5. 5.4000006 5.8 6.2000003 6.6000004]
[5.8 6.2000003 6.6000004 7. 7.4 7.8 ]
[7. 7.4 7.8 8.2 8.6 9. ]], shape=(6, 6), dtype=float32)
从上面的结果可以看出来,当align_corners=True时,tf和torch的结果是一样的。经过实验,onnx2tf的时候,目前只有align_corners=True是支持的。
4)优缺点:
缺点:相比最近邻,计算量稍大些,算法复杂些;双线性插值具有低通滤波器的性质, 导致缩放后图像的边缘变得较为模糊
优点:基本克服了最近邻插值像素值不连续的特点
3、双三次插值
1)原理
双三次插值算法是基于周围的16个像素点,通过计算16个像素点的权重,累积得到增加点的像素值的。
P点像素的计算公式如下:
其中f(xi, yj)表示坐标(xi, yj)的像素值,wij表示坐标(xi, yj)的权重。权重wij的计算公式如下:
wij = wxi *wyj
其中wxi表示x轴方向的权重,wyj表示y方向的权重。wyj的计算公式如下:
2)tensorflow和pytoch的结果:
torch
align_corners=False
tensor([[[[0.5781, 0.8750, 1.3516, 2.0156, 2.4922, 2.7891],
[1.4688, 1.7656, 2.2422, 2.9062, 3.3828, 3.6797],
[2.8984, 3.1953, 3.6719, 4.3359, 4.8125, 5.1094],
[4.8906, 5.1875, 5.6641, 6.3281, 6.8047, 7.1016],
[6.3203, 6.6172, 7.0938, 7.7578, 8.2344, 8.5312],
[7.2109, 7.5078, 7.9844, 8.6484, 9.1250, 9.4219]]]])
align_corners=True
tensor([[[[1.0000, 1.3160, 1.7280, 2.2720, 2.6840, 3.0000],
[1.9480, 2.2640, 2.6760, 3.2200, 3.6320, 3.9480],
[3.1840, 3.5000, 3.9120, 4.4560, 4.8680, 5.1840],
[4.8160, 5.1320, 5.5440, 6.0880, 6.5000, 6.8160],
[6.0520, 6.3680, 6.7800, 7.3240, 7.7360, 8.0520],
[7.0000, 7.3160, 7.7280, 8.2720, 8.6840, 9.0000]]]])
tensorflow
align_corners=False
tf.Tensor(
[[1. 1.40625 2. 2.59375 3. 3.09375]
[2.21875 2.625 3.21875 3.8125 4.21875 4.3125 ]
[4. 4.40625 5. 5.59375 6. 6.09375]
[5.78125 6.1875 6.78125 7.375 7.78125 7.875 ]
[7. 7.40625 8. 8.59375 9. 9.09375]
[7.28125 7.6875 8.28125 8.875 9.28125 9.375 ]], shape=(6, 6), dtype=float32)
align_corners=true
tf.Tensor(
[[1. 1.3163399 1.7277598 2.2722402 2.68366 3. ]
[1.9490197 2.2653594 2.6767793 3.2212596 3.6326795 3.9490197]
[3.1832795 3.499619 3.9110389 4.455519 4.8669395 5.1832795]
[4.8167205 5.1330605 5.54448 6.0889606 6.5003805 6.816721 ]
[6.0509806 6.36732 6.77874 7.3232203 7.7346406 8.050981 ]
[7. 7.3163395 7.72776 8.27224 8.68366 9. ]], shape=(6, 6), dtype=float32)
从上面的结果可以看出来,当align_corners=True时,tf和torch的结果是一样的。经过实验,onnx2tf的时候,对双三次插值不支持。
3)、opencv源码
for (int j = 0; j < matDst1.rows; ++j)
{
float fy = (float)((j + 0.5) * scale_y - 0.5);
int sy = cvFloor(fy);
fy -= sy;
sy = std::min(sy, matSrc.rows - 3);
sy = std::max(1, sy);
const float A = -0.75f;
float coeffsY[4];
coeffsY[0] = ((A*(fy + 1) - 5*A)*(fy + 1) + 8*A)*(fy + 1) - 4*A;
coeffsY[1] = ((A + 2)*fy - (A + 3))*fy*fy + 1;
coeffsY[2] = ((A + 2)*(1 - fy) - (A + 3))*(1 - fy)*(1 - fy) + 1;
coeffsY[3] = 1.f - coeffsY[0] - coeffsY[1] - coeffsY[2];
short cbufY[4];
cbufY[0] = cv::saturate_cast<short>(coeffsY[0] * 2048);
cbufY[1] = cv::saturate_cast<short>(coeffsY[1] * 2048);
cbufY[2] = cv::saturate_cast<short>(coeffsY[2] * 2048);
cbufY[3] = cv::saturate_cast<short>(coeffsY[3] * 2048);
for (int i = 0; i < matDst1.cols; ++i)
{
float fx = (float)((i + 0.5) * scale_x - 0.5);
int sx = cvFloor(fx);
fx -= sx;
if (sx < 1) {
fx = 0, sx = 1;
}
if (sx >= matSrc.cols - 3) {
fx = 0, sx = matSrc.cols - 3;
}
float coeffsX[4];
coeffsX[0] = ((A*(fx + 1) - 5*A)*(fx + 1) + 8*A)*(fx + 1) - 4*A;
coeffsX[1] = ((A + 2)*fx - (A + 3))*fx*fx + 1;
coeffsX[2] = ((A + 2)*(1 - fx) - (A + 3))*(1 - fx)*(1 - fx) + 1;
coeffsX[3] = 1.f - coeffsX[0] - coeffsX[1] - coeffsX[2];
short cbufX[4];
cbufX[0] = cv::saturate_cast<short>(coeffsX[0] * 2048);
cbufX[1] = cv::saturate_cast<short>(coeffsX[1] * 2048);
cbufX[2] = cv::saturate_cast<short>(coeffsX[2] * 2048);
cbufX[3] = cv::saturate_cast<short>(coeffsX[3] * 2048);
for (int k = 0; k < matSrc.channels(); ++k)
{
matDst1.at<cv::Vec3b>(j, i)[k] = abs((matSrc.at<cv::Vec3b>(sy-1, sx-1)[k] * cbufX[0] * cbufY[0] + matSrc.at<cv::Vec3b>(sy, sx-1)[k] * cbufX[0] * cbufY[1] +
matSrc.at<cv::Vec3b>(sy+1, sx-1)[k] * cbufX[0] * cbufY[2] + matSrc.at<cv::Vec3b>(sy+2, sx-1)[k] * cbufX[0] * cbufY[3] +
matSrc.at<cv::Vec3b>(sy-1, sx)[k] * cbufX[1] * cbufY[0] + matSrc.at<cv::Vec3b>(sy, sx)[k] * cbufX[1] * cbufY[1] +
matSrc.at<cv::Vec3b>(sy+1, sx)[k] * cbufX[1] * cbufY[2] + matSrc.at<cv::Vec3b>(sy+2, sx)[k] * cbufX[1] * cbufY[3] +
matSrc.at<cv::Vec3b>(sy-1, sx+1)[k] * cbufX[2] * cbufY[0] + matSrc.at<cv::Vec3b>(sy, sx+1)[k] * cbufX[2] * cbufY[1] +
matSrc.at<cv::Vec3b>(sy+1, sx+1)[k] * cbufX[2] * cbufY[2] + matSrc.at<cv::Vec3b>(sy+2, sx+1)[k] * cbufX[2] * cbufY[3] +
matSrc.at<cv::Vec3b>(sy-1, sx+2)[k] * cbufX[3] * cbufY[0] + matSrc.at<cv::Vec3b>(sy, sx+2)[k] * cbufX[3] * cbufY[1] +
matSrc.at<cv::Vec3b>(sy+1, sx+2)[k] * cbufX[3] * cbufY[2] + matSrc.at<cv::Vec3b>(sy+2, sx+2)[k] * cbufX[3] * cbufY[3] ) >> 22);
}
}
}