halide编程技术指南（连载三）

最新推荐文章于 2022-12-31 16:48:50 发布

Aoulun

最新推荐文章于 2022-12-31 16:48:50 发布

阅读量631

点赞数

分类专栏：深度学习

本文链接：https://blog.csdn.net/Aoulun/article/details/105265743

版权

深度学习专栏收录该内容

45 篇文章 5 订阅

订阅专栏

本文是halide编程指南的连载，已同步至公众号

第六章在任意域上实现函数

第七章多级管道

第六章在任意域上实现函数

本课演示如何在不从（0，0）开始的域上计算Func。

#include "Halide.h"
#include <stdio.h>
using namespace Halide;
int main(int argc, char **argv) {
    // 上一课涉及的内容很多，复杂的多级管道调度就在前面。作为一个插曲，让我们考虑一些简单的事情：在不从原点开始的矩形域上计算函数。
    // 我们定义我们熟悉的梯度函数.
    Func gradient("gradient");
    Var x("x"), y("y");
    gradient(x, y) = x + y;

    // 打开跟踪，这样我们就可以看到它是如何被评估的。
    gradient.trace_stores();

    // 以前我们已经认识到梯度是这样的:
    // gradient.realize(8, 8);
    // 这里面有三件事:
    // 1) 生成的代码无法计算任意矩形上的渐变。
    // 2) 分配新的8 x 8图像.
    // 3) 运行生成的代码来计算从（0，0）到（7，7）的所有x，y的梯度，并将结果放入图像中。
    // 4) 作为实现调用的结果返回新图像。

    // 如果我们小心地管理内存，不想让Halide为我们分配一个新的图像呢？我们可以用另一种方式来实现。我们可以给它一个我们想要它来填充的图像。下面将函数计算为现有图像：
    printf("Evaluating gradient from (0, 0) to (7, 7)\n");
    Buffer<int> result(8, 8);
    gradient.realize(result);
-----------------------------------------------------------------------
 > Begin pipeline gradient.0()
 > Store gradient.0(0, 0) = 0
 > Store gradient.0(1, 0) = 1
 > Store gradient.0(2, 0) = 2
 > Store gradient.0(3, 0) = 3
 > Store gradient.0(4, 0) = 4
 > Store gradient.0(5, 0) = 5
 > Store gradient.0(6, 0) = 6
 > Store gradient.0(7, 0) = 7
 > Store gradient.0(0, 1) = 1
 > Store gradient.0(1, 1) = 2
 > Store gradient.0(2, 1) = 3
 > Store gradient.0(3, 1) = 4
 > Store gradient.0(4, 1) = 5
 > Store gradient.0(5, 1) = 6
 > Store gradient.0(6, 1) = 7
 > Store gradient.0(7, 1) = 8
 > Store gradient.0(0, 2) = 2
 > Store gradient.0(1, 2) = 3
 > Store gradient.0(2, 2) = 4
 > Store gradient.0(3, 2) = 5
 > Store gradient.0(4, 2) = 6
 > Store gradient.0(5, 2) = 7
 > Store gradient.0(6, 2) = 8
 > Store gradient.0(7, 2) = 9
 > Store gradient.0(0, 3) = 3
 > Store gradient.0(1, 3) = 4
 > Store gradient.0(2, 3) = 5
 > Store gradient.0(3, 3) = 6
 > Store gradient.0(4, 3) = 7
 > Store gradient.0(5, 3) = 8
 > Store gradient.0(6, 3) = 9
 > Store gradient.0(7, 3) = 10
 > Store gradient.0(0, 4) = 4
 > Store gradient.0(1, 4) = 5
 > Store gradient.0(2, 4) = 6
 > Store gradient.0(3, 4) = 7
 > Store gradient.0(4, 4) = 8
 > Store gradient.0(5, 4) = 9
 > Store gradient.0(6, 4) = 10
 > Store gradient.0(7, 4) = 11
 > Store gradient.0(0, 5) = 5
 > Store gradient.0(1, 5) = 6
 > Store gradient.0(2, 5) = 7
 > Store gradient.0(3, 5) = 8
 > Store gradient.0(4, 5) = 9
 > Store gradient.0(5, 5) = 10
 > Store gradient.0(6, 5) = 11
 > Store gradient.0(7, 5) = 12
 > Store gradient.0(0, 6) = 6
 > Store gradient.0(1, 6) = 7
 > Store gradient.0(2, 6) = 8
 > Store gradient.0(3, 6) = 9
 > Store gradient.0(4, 6) = 10
 > Store gradient.0(5, 6) = 11
 > Store gradient.0(6, 6) = 12
 > Store gradient.0(7, 6) = 13
 > Store gradient.0(0, 7) = 7
 > Store gradient.0(1, 7) = 8
 > Store gradient.0(2, 7) = 9
 > Store gradient.0(3, 7) = 10
 > Store gradient.0(4, 7) = 11
 > Store gradient.0(5, 7) = 12
 > Store gradient.0(6, 7) = 13
 > Store gradient.0(7, 7) = 14
 > End pipeline gradient.0()
-----------------------------------------------------------------------

    // 让我们检查一下它是否达到了我们的预期:
    for (int y = 0; y < 8; y++) {
        for (int x = 0; x < 8; x++) {
            if (result(x, y) != x + y) {
                printf("Something went wrong!\n");
                return -1;
            }
        }
    }

    // 现在让我们计算一个从其他地方开始的5x 7矩形上的梯度——位置（100，50）。所以x和y从（100，50）到（104，56）。

    // 我们首先创建一个表示矩形的图像:
    Buffer<int> shifted(5, 7); // 在构造器中我们告诉它大小.
    shifted.set_min(100, 50); // 然后我们在左上角告诉它.

    printf("Evaluating gradient from (100, 50) to (104, 56)\n");

    // 注意，这不需要编译任何新代码，因为当我们第一次实现它时，我们生成了能够计算任意矩形上的梯度的代码。
    gradient.realize(shifted);
--------------------------------------------------------------------------
 > Begin pipeline gradient.0()
 > Store gradient.0(100, 50) = 150
 > Store gradient.0(101, 50) = 151
 > Store gradient.0(102, 50) = 152
 > Store gradient.0(103, 50) = 153
 > Store gradient.0(104, 50) = 154
 > Store gradient.0(100, 51) = 151
 > Store gradient.0(101, 51) = 152
 > Store gradient.0(102, 51) = 153
 > Store gradient.0(103, 51) = 154
 > Store gradient.0(104, 51) = 155
 > Store gradient.0(100, 52) = 152
 > Store gradient.0(101, 52) = 153
 > Store gradient.0(102, 52) = 154
 > Store gradient.0(103, 52) = 155
 > Store gradient.0(104, 52) = 156
 > Store gradient.0(100, 53) = 153
 > Store gradient.0(101, 53) = 154
 > Store gradient.0(102, 53) = 155
 > Store gradient.0(103, 53) = 156
 > Store gradient.0(104, 53) = 157
 > Store gradient.0(100, 54) = 154
 > Store gradient.0(101, 54) = 155
 > Store gradient.0(102, 54) = 156
 > Store gradient.0(103, 54) = 157
 > Store gradient.0(104, 54) = 158
 > Store gradient.0(100, 55) = 155
 > Store gradient.0(101, 55) = 156
 > Store gradient.0(102, 55) = 157
 > Store gradient.0(103, 55) = 158
 > Store gradient.0(104, 55) = 159
 > Store gradient.0(100, 56) = 156
 > Store gradient.0(101, 56) = 157
 > Store gradient.0(102, 56) = 158
 > Store gradient.0(103, 56) = 159
 > Store gradient.0(104, 56) = 160
 > End pipeline gradient.0()
-------------------------------------------------------------------------


    // 从C++，我们也使用从（100, 50）开始的坐标访问图像对象。
    for (int y = 50; y < 57; y++) {
        for (int x = 100; x < 105; x++) {
            if (shifted(x, y) != x + y) {
                printf("Something went wrong!\n");
                return -1;
            }
        }
    }
    // 图像“shifted”将Func的值存储在一个从（100，50）开始的域上，因此请求shifted（0，0）实际上会读取越界，并可能崩溃。

    // 如果我们想计算某个非矩形区域上的Func怎么办？太糟糕了。halide只做矩形。

    printf("Success!\n");
    return 0;
}

第七章多级管道

#include "Halide.h"
#include <stdio.h>
using namespace Halide;
// 加载PNG的支持代码.
#include "halide_image_io.h"
using namespace Halide::Tools;

int main(int argc, char **argv) {
    // 首先，我们将在下面声明一些要使用的变量.
    Var x("x"), y("y"), c("c");

    // 现在我们将表示一个多级管道，它先水平模糊图像，然后垂直模糊图像.
    {
        // 获取8位彩色输入
        Buffer<uint8_t> input = load_image("images/rgb.png");

        // 把它升级到16位，这样我们就可以计算而不会溢出.
        Func input_16("input_16");
        input_16(x, y, c) = cast<uint16_t>(input(x, y, c));

        // 水平模糊:
        Func blur_x("blur_x");
        blur_x(x, y, c) = (input_16(x-1, y, c) +
                           2 * input_16(x, y, c) +
                           input_16(x+1, y, c)) / 4;

        // 垂直模糊:
        Func blur_y("blur_y");
        blur_y(x, y, c) = (blur_x(x, y-1, c) +
                           2 * blur_x(x, y, c) +
                           blur_x(x, y+1, c)) / 4;

        // 转换回8位.
        Func output("output");
        output(x, y, c) = cast<uint8_t>(blur_y(x, y, c));

        // 此管道中的每个Func都使用熟悉的函数调用语法调用前一个Func（我们在Func对象上重载了operator（））。Func可以调用已给出定义的任何其他Func。此限制可防止管道中包含循环。halide管道总是Funcs的前向图.
        // 现在让我们实现它...
        // Buffer<uint8_t> result = output.realize(input.width(), input.height(), 3);

        // 只是上面这一行行不通。取消注释以查看发生了什么。

        // 在与输入图像相同的域上实现此管道需要读取超出输入边界的像素，因为blur_xstage水平向外延伸，blur_ystage垂直向外延伸。Halide通过在管道顶部注入一段代码来检测这一点，该代码计算将在其上读取输入的区域。当它开始运行管道时，它首先运行此代码，确定将读取超出界限的输入，并拒绝继续。在内部循环中没有实际的边界检查；这会很慢。
        //也就是说要注意图像边界问题，防止越界
        // 那我们该怎么办？有几个选择。如果我们意识到在一个域上向内移动了一个像素，我们就不会要求halide程序读取越界。我们在上一课中看到了如何做到这一点：
        Buffer<uint8_t> result(input.width()-2, input.height()-2, 3);
        result.set_min(1, 1);
        output.realize(result);

        // 保存结果。它应该看起来像一只略带模糊的鹦鹉，并且应该比输入图像窄两个像素，短两个像素（这是因为边界的问题）。
        save_image(result, "blurry_parrot_1.png");

        //这通常是处理边界的最快方法：不要编写读取越界的代码：）下一个示例是更一般的解决方案。
    }

    // 相同的管道，在输入端有一个边界条件.
    {
        // 获取8位彩色输入
        Buffer<uint8_t> input = load_image("images/rgb.png");

        // 这次，我们将把输入包装在一个Func中，以防止读取超出界限：
        Func clamped("clamped");

        // 定义一个表达式，将x钳制在[0，input.width（）-1]范围内。
        Expr clamped_x = clamp(x, 0, input.width()-1);
        // clamp(x, a, b) 等价于 max(min(x, b), a).

        // 类似的对y进行限制.
        Expr clamped_y = clamp(y, 0, input.height()-1);
        // 在限制的范围内读取图像。这意味着无论我们如何计算Func‘clapped’，我们永远不会读取输入的越界值。这是一个钳制到边的边界条件，是用halide表示的最简单的边界条件。
        clamped(x, y, c) = input(clamped_x, clamped_y, c);

        // 使用BoundaryConditions命名空间中的helper函数可以更简洁地定义“clamped”，如下所示:
        // clamped = BoundaryConditions::repeat_edge(input);
        // 这些对于其他边界条件的使用很重要，因为它们以halide能够最好地理解和优化的方式表示。如果使用正确，它们和没有边界条件一样好用。

        // 将它升级到16位，这样我们就可以在不溢出的情况下进行计算。这次我们将引用我们的新函数“clamped”，而不是直接引用输入图像。
        Func input_16("input_16");
        input_16(x, y, c) = cast<uint16_t>(clamped(x, y, c));

        // 其余的管道都是一样的...

        // 水平模糊:
        Func blur_x("blur_x");
        blur_x(x, y, c) = (input_16(x-1, y, c) +
                           2 * input_16(x, y, c) +
                           input_16(x+1, y, c)) / 4;

        // 垂直模糊:
        Func blur_y("blur_y");
        blur_y(x, y, c) = (blur_x(x, y-1, c) +
                           2 * blur_x(x, y, c) +
                           blur_x(x, y+1, c)) / 4;

        // 转换为 8-bit.
        Func output("output");
        output(x, y, c) = cast<uint8_t>(blur_y(x, y, c));

        // 这一次可以安全地计算某个域上的输出作为输入，因为我们有一个边界条件。
        Buffer<uint8_t> result = output.realize(input.width(), input.height(), 3);

        // 保存结果。它看起来像一只略带模糊的鹦鹉，但这次它的大小与输入的大小相同。
        save_image(result, "blurry_parrot_2.png");
    }

    printf("Success!\n");
    return 0;
}

Aoulun

关注

0
点赞
踩
1

收藏

觉得还不错? 一键收藏
0
评论
halide编程技术指南（连载三）

本文是halide编程指南的连载，已同步至公众号目录第六章在任意域上实现函数第七章多级管道第六章在任意域上实现函数本课演示如何在不从（0，0）开始的域上计算Func。#include "Halide.h"#include <stdio.h>using namespace Halide;int main(int argc, char ...
复制链接

扫一扫