图像插值

最新推荐文章于 2023-11-18 23:02:10 发布

努力向上的小白

最新推荐文章于 2023-11-18 23:02:10 发布

阅读量803

点赞数 1

文章标签：图像识别 c++ tensorflow 深度学习

本文链接：https://blog.csdn.net/weixin_42227520/article/details/108468772

版权

1、最近邻插值

1）原理

最简单的插值方式，目标图像上(des_x,des_ y)处的像素值等于：坐标转换到源图像(src_x, xrc_y)，距离最近的坐标点像素值。

2）源码

tensorflow实现最近邻的源码如下：

template <typename T, bool half_pixel_centers, bool align_corners>
struct ResizeNearestNeighbor<CPUDevice, T, half_pixel_centers, align_corners> {
  bool operator()(const CPUDevice& d, typename TTypes<T, 4>::ConstTensor input,
                  const float height_scale, const float width_scale,
                  typename TTypes<T, 4>::Tensor output) {
    typename BoolToScaler<half_pixel_centers>::Scaler scaler;
    const Eigen::Index batch_size = input.dimension(0);
    const Eigen::Index in_height = input.dimension(1);
    const Eigen::Index in_width = input.dimension(2);
    const Eigen::Index channels = input.dimension(3);

    const Eigen::Index out_height = output.dimension(1);
    const Eigen::Index out_width = output.dimension(2);

    for (Eigen::Index b = 0; b < batch_size; ++b) {
      for (Eigen::Index y = 0; y < out_height; ++y) {
        Eigen::Index in_y = std::min(
            (align_corners)
                ? static_cast<Eigen::Index>(roundf(scaler(y, height_scale)))
                : static_cast<Eigen::Index>(floorf(scaler(y, height_scale))),
            in_height - 1);
        if (half_pixel_centers) {
          in_y = std::max(static_cast<Eigen::Index>(0), in_y);
        }
        for (Eigen::Index x = 0; x < out_width; ++x) {
          Eigen::Index in_x = std::min(
              (align_corners)
                  ? static_cast<Eigen::Index>(roundf(scaler(x, width_scale)))
                  : static_cast<Eigen::Index>(floorf(scaler(x, width_scale))),
              in_width - 1);
          if (half_pixel_centers) {
            in_x = std::max(static_cast<Eigen::Index>(0), in_x);
          }
          std::copy_n(&input(b, in_y, in_x, 0), channels, &output(b, y, x, 0));
        }
      }
    }
    return true;
  }
};


struct HalfPixelScalerForNN {
  inline float operator()(const int x, const float scale) const {
    return (static_cast<float>(x) + 0.5f) * scale;
  }
};


template <>
struct BoolToScaler<true> {
  typedef HalfPixelScalerForNN Scaler;
};

struct LegacyScaler {
  LegacyScaler(){};
  inline float operator()(const int x, const float scale) const {
    return static_cast<float>(x) * scale;
  }
};

template <>
struct BoolToScaler<false> {
  typedef LegacyScaler Scaler;
};

3）tensorflow和pytorch结果：

输入的Tensor：

tensor([[[[1., 2., 3.],
          [4., 5., 6.],
          [7., 8., 9.]]]])

输出的结果：

torch结果：
[[1., 1., 2., 2., 3., 3.],
 [1., 1., 2., 2., 3., 3.],
 [4., 4., 5., 5., 6., 6.],
 [4., 4., 5., 5., 6., 6.],
 [7., 7., 8., 8., 9., 9.],
 [7., 7., 8., 8., 9., 9.]]

tf结果：
tf.Tensor(
[[1. 1. 2. 2. 3. 3.]
 [1. 1. 2. 2. 3. 3.]
 [4. 4. 5. 5. 6. 6.]
 [4. 4. 5. 5. 6. 6.]
 [7. 7. 8. 8. 9. 9.]
 [7. 7. 8. 8. 9. 9.]], shape=(6, 6), dtype=float32)

torch和tf最近邻的结果是相同的

4）优缺点：

优点：算法简单，易于实现，计算量小，运算速度较快

缺点：采用取最近点像素的值，造成像素的变化不连续，在目标图中边缘会产生锯齿

2、双线性插值

1）原理

双线性插值，又称为双线性内插。在数学上，双线性插值是有两个变量的插值函数的线性插值扩展，其核心思想是在两个方向分别进行一次线性插值。

先说一下线性插值：

已知数据 (x0, y0) 与 (x1, y1)，要计算 [x0, x1] 区间内某一位置 x 在直线上的y值。

y值的计算是：用x和x0，x1的距离作为一个权重，y0和y1是加权值。双线性插值就是在x和y两个方向上做插值。

双线性插值：

假设我们已知 Q11 = (x1, y1)、Q12 = (x1, y2), Q21 = (x2, y1) 以及 Q22 = (x2, y2) 四个点的值。我们想得到在点 P = (x, y) 的像素值。

首先在 x 方向进行线性插值，得到：

然后在 y 方向进行线性插值，得到：

上面三个公式联合起来，得到：

令x-x1=u,y2-y=v, 上面的公式可以转换成：

2）源码：

template <typename T>
struct ResizeBilinear<CPUDevice, T> {
  void operator()(const CPUDevice& d, typename TTypes<T, 4>::ConstTensor images,
                  const float height_scale, const float width_scale,
                  bool half_pixel_centers,
                  typename TTypes<float, 4>::Tensor output) {
    const int batch_size = images.dimension(0);
    const int64 in_height = images.dimension(1);
    const int64 in_width = images.dimension(2);
    const int channels = images.dimension(3);

    const int64 out_height = output.dimension(1);
    const int64 out_width = output.dimension(2);

    // Handle no-op resizes efficiently.
    if (out_height == in_height && out_width == in_width) {
      output = images.template cast<float>();
      return;
    }

    std::vector<CachedInterpolation> ys(out_height + 1);
    std::vector<CachedInterpolation> xs(out_width + 1);

    if (half_pixel_centers) {
      compute_interpolation_weights(HalfPixelScaler(), out_height, in_height,
                                    height_scale, ys.data());
      compute_interpolation_weights(HalfPixelScaler(), out_width, in_width,
                                    width_scale, xs.data());

    } else {
      // Compute the cached interpolation weights on the x and y dimensions.
      compute_interpolation_weights(LegacyScaler(), out_height, in_height,
                                    height_scale, ys.data());
      compute_interpolation_weights(LegacyScaler(), out_width, in_width,
                                    width_scale, xs.data());
    }
    // Scale x interpolation weights to avoid a multiplication during iteration.
    for (int i = 0; i < xs.size(); ++i) {
      xs[i].lower *= channels;
      xs[i].upper *= channels;
    }

    resize_image<T>(images, batch_size, in_height, in_width, out_height,
                    out_width, channels, xs, ys, output);
  }
};

template <typename Scaler>
inline void compute_interpolation_weights(const Scaler scaler,
                                          const int64 out_size,
                                          const int64 in_size,
                                          const float scale,
                                          CachedInterpolation* interpolation) {
  interpolation[out_size].lower = 0;
  interpolation[out_size].upper = 0;
  for (int64 i = out_size - 1; i >= 0; --i) {
    const float in = scaler(i, scale);
    const float in_f = std::floor(in);
    interpolation[i].lower =
        std::max(static_cast<int64>(in_f), static_cast<int64>(0));
    interpolation[i].upper =
        std::min(static_cast<int64>(std::ceil(in)), in_size - 1);
    interpolation[i].lerp = in - in_f;
  }
}


struct HalfPixelScaler {
  HalfPixelScaler(){};
  inline float operator()(const int x, const float scale) const {
    // Note that we subtract 0.5 from the return value, as the existing bilinear
    // sampling code etc assumes pixels are in the old coordinate system.
    return (static_cast<float>(x) + 0.5f) * scale - 0.5f;
  }
};


struct LegacyScaler {
  LegacyScaler(){};
  inline float operator()(const int x, const float scale) const {
    return static_cast<float>(x) * scale;
  }
};

3）tensorflow和pytorch结果：

torch：

align_corners=False
tensor([[[[1.0000, 1.2500, 1.7500, 2.2500, 2.7500, 3.0000],
          [1.7500, 2.0000, 2.5000, 3.0000, 3.5000, 3.7500],
          [3.2500, 3.5000, 4.0000, 4.5000, 5.0000, 5.2500],
          [4.7500, 5.0000, 5.5000, 6.0000, 6.5000, 6.7500],
          [6.2500, 6.5000, 7.0000, 7.5000, 8.0000, 8.2500],
          [7.0000, 7.2500, 7.7500, 8.2500, 8.7500, 9.0000]]]])

align_corners=True
tensor([[[[1.0000, 1.4000, 1.8000, 2.2000, 2.6000, 3.0000],
          [2.2000, 2.6000, 3.0000, 3.4000, 3.8000, 4.2000],
          [3.4000, 3.8000, 4.2000, 4.6000, 5.0000, 5.4000],
          [4.6000, 5.0000, 5.4000, 5.8000, 6.2000, 6.6000],
          [5.8000, 6.2000, 6.6000, 7.0000, 7.4000, 7.8000],
          [7.0000, 7.4000, 7.8000, 8.2000, 8.6000, 9.0000]]]])

tensorflow

align_corners=False
 tf.Tensor(
[[1.  1.5 2.  2.5 3.  3. ]
 [2.5 3.  3.5 4.  4.5 4.5]
 [4.  4.5 5.  5.5 6.  6. ]
 [5.5 6.  6.5 7.  7.5 7.5]
 [7.  7.5 8.  8.5 9.  9. ]
 [7.  7.5 8.  8.5 9.  9. ]], shape=(6, 6), dtype=float32)

align_corners=True
 tf.Tensor(
[[1.        1.4       1.8       2.2       2.6       3.       ]
 [2.2       2.6       3.        3.4       3.8       4.2      ]
 [3.4       3.8000002 4.2000003 4.6       5.        5.4      ]
 [4.6000004 5.        5.4000006 5.8       6.2000003 6.6000004]
 [5.8       6.2000003 6.6000004 7.        7.4       7.8      ]
 [7.        7.4       7.8       8.2       8.6       9.       ]], shape=(6, 6), dtype=float32)

从上面的结果可以看出来，当align_corners=True时，tf和torch的结果是一样的。经过实验，onnx2tf的时候，目前只有align_corners=True是支持的。

4）优缺点：

缺点：相比最近邻，计算量稍大些，算法复杂些；双线性插值具有低通滤波器的性质, 导致缩放后图像的边缘变得较为模糊

优点：基本克服了最近邻插值像素值不连续的特点

3、双三次插值

1）原理

双三次插值算法是基于周围的16个像素点，通过计算16个像素点的权重，累积得到增加点的像素值的。

P点像素的计算公式如下：

其中f(xi, yj)表示坐标(xi, yj)的像素值，wij表示坐标(xi, yj)的权重。权重wij的计算公式如下：

wij = wxi *wyj

其中wxi表示x轴方向的权重，wyj表示y方向的权重。wyj的计算公式如下：

2）tensorflow和pytoch的结果：

torch

align_corners=False
tensor([[[[0.5781, 0.8750, 1.3516, 2.0156, 2.4922, 2.7891],
          [1.4688, 1.7656, 2.2422, 2.9062, 3.3828, 3.6797],
          [2.8984, 3.1953, 3.6719, 4.3359, 4.8125, 5.1094],
          [4.8906, 5.1875, 5.6641, 6.3281, 6.8047, 7.1016],
          [6.3203, 6.6172, 7.0938, 7.7578, 8.2344, 8.5312],
          [7.2109, 7.5078, 7.9844, 8.6484, 9.1250, 9.4219]]]])

align_corners=True
tensor([[[[1.0000, 1.3160, 1.7280, 2.2720, 2.6840, 3.0000],
          [1.9480, 2.2640, 2.6760, 3.2200, 3.6320, 3.9480],
          [3.1840, 3.5000, 3.9120, 4.4560, 4.8680, 5.1840],
          [4.8160, 5.1320, 5.5440, 6.0880, 6.5000, 6.8160],
          [6.0520, 6.3680, 6.7800, 7.3240, 7.7360, 8.0520],
          [7.0000, 7.3160, 7.7280, 8.2720, 8.6840, 9.0000]]]])

tensorflow

align_corners=False
 tf.Tensor(
[[1.      1.40625 2.      2.59375 3.      3.09375]
 [2.21875 2.625   3.21875 3.8125  4.21875 4.3125 ]
 [4.      4.40625 5.      5.59375 6.      6.09375]
 [5.78125 6.1875  6.78125 7.375   7.78125 7.875  ]
 [7.      7.40625 8.      8.59375 9.      9.09375]
 [7.28125 7.6875  8.28125 8.875   9.28125 9.375  ]], shape=(6, 6), dtype=float32)

align_corners=true
 tf.Tensor(
[[1.        1.3163399 1.7277598 2.2722402 2.68366   3.       ]
 [1.9490197 2.2653594 2.6767793 3.2212596 3.6326795 3.9490197]
 [3.1832795 3.499619  3.9110389 4.455519  4.8669395 5.1832795]
 [4.8167205 5.1330605 5.54448   6.0889606 6.5003805 6.816721 ]
 [6.0509806 6.36732   6.77874   7.3232203 7.7346406 8.050981 ]
 [7.        7.3163395 7.72776   8.27224   8.68366   9.       ]], shape=(6, 6), dtype=float32)

从上面的结果可以看出来，当align_corners=True时，tf和torch的结果是一样的。经过实验，onnx2tf的时候，对双三次插值不支持。

3)、opencv源码

for (int j = 0; j < matDst1.rows; ++j)
{
  float fy = (float)((j + 0.5) * scale_y - 0.5);
  int sy = cvFloor(fy);
  fy -= sy;
  sy = std::min(sy, matSrc.rows - 3);
  sy = std::max(1, sy);

  const float A = -0.75f;

  float coeffsY[4];
  coeffsY[0] = ((A*(fy + 1) - 5*A)*(fy + 1) + 8*A)*(fy + 1) - 4*A;
  coeffsY[1] = ((A + 2)*fy - (A + 3))*fy*fy + 1;
  coeffsY[2] = ((A + 2)*(1 - fy) - (A + 3))*(1 - fy)*(1 - fy) + 1;
  coeffsY[3] = 1.f - coeffsY[0] - coeffsY[1] - coeffsY[2];

  short cbufY[4];
  cbufY[0] = cv::saturate_cast<short>(coeffsY[0] * 2048);
  cbufY[1] = cv::saturate_cast<short>(coeffsY[1] * 2048);
  cbufY[2] = cv::saturate_cast<short>(coeffsY[2] * 2048);
  cbufY[3] = cv::saturate_cast<short>(coeffsY[3] * 2048);

  for (int i = 0; i < matDst1.cols; ++i)
  {
    float fx = (float)((i + 0.5) * scale_x - 0.5);
    int sx = cvFloor(fx);
    fx -= sx;

    if (sx < 1) {
      fx = 0, sx = 1;
    }
    if (sx >= matSrc.cols - 3) {
      fx = 0, sx = matSrc.cols - 3;
    }

    float coeffsX[4];
    coeffsX[0] = ((A*(fx + 1) - 5*A)*(fx + 1) + 8*A)*(fx + 1) - 4*A;
    coeffsX[1] = ((A + 2)*fx - (A + 3))*fx*fx + 1;
    coeffsX[2] = ((A + 2)*(1 - fx) - (A + 3))*(1 - fx)*(1 - fx) + 1;
    coeffsX[3] = 1.f - coeffsX[0] - coeffsX[1] - coeffsX[2];

    short cbufX[4];
    cbufX[0] = cv::saturate_cast<short>(coeffsX[0] * 2048);
    cbufX[1] = cv::saturate_cast<short>(coeffsX[1] * 2048);
    cbufX[2] = cv::saturate_cast<short>(coeffsX[2] * 2048);
    cbufX[3] = cv::saturate_cast<short>(coeffsX[3] * 2048);

    for (int k = 0; k < matSrc.channels(); ++k)
    {
      matDst1.at<cv::Vec3b>(j, i)[k] = abs((matSrc.at<cv::Vec3b>(sy-1, sx-1)[k] * cbufX[0] * cbufY[0] + matSrc.at<cv::Vec3b>(sy, sx-1)[k] * cbufX[0] * cbufY[1] +
        matSrc.at<cv::Vec3b>(sy+1, sx-1)[k] * cbufX[0] * cbufY[2] + matSrc.at<cv::Vec3b>(sy+2, sx-1)[k] * cbufX[0] * cbufY[3] +
        matSrc.at<cv::Vec3b>(sy-1, sx)[k] * cbufX[1] * cbufY[0] + matSrc.at<cv::Vec3b>(sy, sx)[k] * cbufX[1] * cbufY[1] +
        matSrc.at<cv::Vec3b>(sy+1, sx)[k] * cbufX[1] * cbufY[2] + matSrc.at<cv::Vec3b>(sy+2, sx)[k] * cbufX[1] * cbufY[3] +
        matSrc.at<cv::Vec3b>(sy-1, sx+1)[k] * cbufX[2] * cbufY[0] + matSrc.at<cv::Vec3b>(sy, sx+1)[k] * cbufX[2] * cbufY[1] +
        matSrc.at<cv::Vec3b>(sy+1, sx+1)[k] * cbufX[2] * cbufY[2] + matSrc.at<cv::Vec3b>(sy+2, sx+1)[k] * cbufX[2] * cbufY[3] +
        matSrc.at<cv::Vec3b>(sy-1, sx+2)[k] * cbufX[3] * cbufY[0] + matSrc.at<cv::Vec3b>(sy, sx+2)[k] * cbufX[3] * cbufY[1] +
        matSrc.at<cv::Vec3b>(sy+1, sx+2)[k] * cbufX[3] * cbufY[2] + matSrc.at<cv::Vec3b>(sy+2, sx+2)[k] * cbufX[3] * cbufY[3] ) >> 22);
    }
  }
}

努力向上的小白

关注

1
点赞
踩
1

收藏

觉得还不错? 一键收藏
0
评论
图像插值

1、最近邻插值1）原理最简单的插值方式，目标图像上(des_x,des_ y)处的像素值等于( (des_x, des_y)对应到源图像坐标为(src_x, xrc_y) )：距离(src_x, xrc_y)最近坐标点的像素值。2）源码tensorflow实现最近邻的源码如下：template <typename T, bool half_pixel_centers, bool align_corners>struct ResizeNearestNeighbor&lt..
复制链接

扫一扫