Halide学习笔记----Halide tutorial源码阅读9

本文链接：https://blog.csdn.net/luzhanbo207/article/details/78876942
Halide入门教程09

// Halide tutorial lesson 9: Multi-pass Funcs, update definitions, and reductions
// Halide入门第九课： 多通函数，更行定义和约减

// On linux, you can compile and run it like so:
// g++ lesson_09*.cpp -g -std=c++11 -I ../include -I ../tools -L ../bin -lHalide `libpng-config --cflags --ldflags` -ljpeg -lpthread -ldl -fopenmp -o lesson_09
// LD_LIBRARY_PATH=../bin ./lesson_09

#include "Halide.h"
#include <stdio.h>

#ifdef __SSE2__
#include <emmintrin.h>
#endif

#include "clock.h"

using namespace Halide;

// Support code for loading pngs.
#include "halide_image_io.h"
using namespace Halide::Tools;

int main(int argc, char **argv) {
    // Declare some Vars to use below.
    Var x("x"), y("y");

    // Load a grayscale image to use as an input.
    Buffer<uint8_t> input = load_image("images/gray.png");

    // You can define a Func in multiple passes. Let's see a toy
    // example first.
    // 通过多次来定义一个函数
    {
        // The first definition must be one like we have seen already
        // - a mapping from Vars to an Expr:
        Func f;
        f(x, y) = x + y;
        // We call this first definition the "pure" definition.
        // 我们成第一次定义为pure定义

        // But the later definitions can include computed expressions on
        // both sides. The simplest example is modifying a single point:
        // 随后的定义可以在等号两边使用表达式，最贱的的例子是修改单点的数值。
        f(3, 7) = 42;

        // We call these extra definitions "update" definitions, or
        // "reduction" definitions. A reduction definition is an
        // update definition that recursively refers back to the
        // function's current value at the same site:
        // 我们称这些额外的定义为“更新”定义或者“约减”定义。约减定义值得是循环或者递归地在原始数据点上
        // 更新的定义。
        f(x, y) = f(x, y) + 17;

        // If we confine our update to a single row, we can
        // recursively refer to values in the same column:
        // 更新定义某一列
        f(x, 3) = f(x, 0) * f(x, 10);

        // Similarly, if we confine our update to a single column, we
        // can recursively refer to other values in the same row.
        // 更新定义某一行
        f(0, y) = f(0, y) / f(3, y);

        // The general rule is: Each Var used in an update definition
        // must appear unadorned in the same position as in the pure
        // definition in all references to the function on the left-
        // and right-hand sides. So the following definitions are
        // legal updates:
        // 一般的规则是，用于更新的变量必须是朴素出现在同样的作为，如下是一些合法的更新
        // 比如x方向的更新，x是朴素不变的
        // y方向的更新，y是朴素不变的
        f(x, 17) = x + 8;
        f(0, y) = y * 8;
        f(x, x + 1) = x + 8;
        f(y/2, y) = f(0, y) * 17;

        // But these ones would cause an error:

        // f(x, 0) = f(x + 1, 0);
        // First argument to f on the right-hand-side must be 'x', not 'x + 1'.

        // f(y, y + 1) = y + 8;
        // Second argument to f on the left-hand-side must be 'y', not 'y + 1'.

        // f(y, x) = y - x;
        // Arguments to f on the left-hand-side are in the wrong places.

        // f(3, 4) = x + y;
        // Free variables appear on the right-hand-side but not the left-hand-side.

        // We'll realize this one just to make sure it compiles. The
        // second-to-last definition forces us to realize over a
        // domain that is taller than it is wide.
        f.realize(100, 101);

        // For each realization of f, each step runs in its entirety
        // before the next one begins. Let's trace the loads and
        // stores for a simpler example:
        // 每步的更新都独自完成之后，才进行下一步更新
        Func g("g");
        g(x, y) = x + y;   // Pure definition
        g(2, 1) = 42;      // First update definition
        g(x, 0) = g(x, 1); // Second update definition

        g.trace_loads();
        g.trace_stores();

        g.realize(4, 4);

        // See figures/lesson_09_update.gif for a visualization.

        // Reading the log, we see that each pass is applied in
        // turn. The equivalent C is:
        int result[4][4];
        // Pure definition
        for (int y = 0; y < 4; y++) {
            for (int x = 0; x < 4; x++) {
                result[y][x] = x + y;
            }
        }
        // First update definition
        result[1][2] = 42;
        // Second update definition
        for (int x = 0; x < 4; x++) {
            result[0][x] = result[1][x];
        }
    }

    // Putting update passes inside loops.
    {
        // Starting with this pure definition:
        Func f;
        f(x, y) = (x + y)/100.0f;

        // Say we want an update that squares the first fifty rows. We
        // could do this by adding 50 update definitions:

        // f(x, 0) = f(x, 0) * f(x, 0);
        // f(x, 1) = f(x, 1) * f(x, 1);
        // f(x, 2) = f(x, 2) * f(x, 2);
        // ...
        // f(x, 49) = f(x, 49) * f(x, 49);

        // Or equivalently using a compile-time loop in our C++:
        // for (int i = 0; i < 50; i++) {
   
        //   f(x, i) = f(x, i) * f(x, i);
        // }

        // But it's more manageable and more flexible to put the loop
        // in the generated code. We do this by defining a "reduction
        // domain" and using it inside an update definition:
        // 在指定区域进行更新
        RDom r(0, 50);
        f(x, r) = f(x, r) * f(x, r);
        Buffer<float> halide_result = f.realize(100, 100);

        // See figures/lesson_09_update_rdom.mp4 for a visualization.

        // The equivalent C is:
        float c_result[100][100];
        for (int y = 0; y < 100; y++) {
            for (int x = 0; x < 100; x++) {
                c_result[y][x] = (x + y)/100.0f;
            }
        }
        for (int x = 0; x < 100; x++) {
            for (int r = 0; r < 50; r++) {
                // The loop over the reduction domain occurs inside of
                // the loop over any pure variables used in the update
                // step:
                c_result[r][x] = c_result[r][x] * c_result[r][x];
            }
        }

        // Check the results match:
        for (int y = 0; y < 100; y++) {
            for (int x = 0; x < 100; x++) {
                if (fabs(halide_result(x, y) - c_result[y][x]) > 0.01f) {
                    printf("halide_result(%d, %d) = %f instead of %f\n",
                           x, y, halide_result(x, y), c_result[y][x]);
                    return -1;
                }
            }
        }
    }

    // Now we'll examine a real-world use for an update definition:
    // computing a histogram.
    {

        // Some operations on images can't be cleanly expressed as a pure
        // function from the output coordinates to the value stored
        // there. The classic example is computing a histogram. The
        // natural way to do it is to iterate over the input image,
        // updating histogram buckets. Here's how you do that in Halide:
        Func histogram("histogram");

        // Histogram buckets start as zero.
        histogram(x) = 0;

        // Define a multi-dimensional reduction domain over the input image:
        RDom r(0, input.width(), 0, input.height());

        // For every point in the reduction domain, increment the
        // histogram bucket corresponding to the intensity of the
        // input image at that point.
        histogram(input(r.x, r.y)) += 1;

        Buffer<int> halide_result = histogram.realize(256);

        // The equivalent C is:
        int c_result[256];
        for (int x = 0; x < 256; x++) {
            c_result[x] = 0;
        }
        for (int r_y = 0; r_y < input.height(); r_y++) {