Halide入门教程09
// Halide tutorial lesson 9: Multi-pass Funcs, update definitions, and reductions
// Halide入门第九课: 多通函数,更行定义和约减
// On linux, you can compile and run it like so:
// g++ lesson_09*.cpp -g -std=c++11 -I ../include -I ../tools -L ../bin -lHalide `libpng-config --cflags --ldflags` -ljpeg -lpthread -ldl -fopenmp -o lesson_09
// LD_LIBRARY_PATH=../bin ./lesson_09
#include "Halide.h"
#include <stdio.h>
#ifdef __SSE2__
#include <emmintrin.h>
#endif
#include "clock.h"
using namespace Halide;
// Support code for loading pngs.
#include "halide_image_io.h"
using namespace Halide::Tools;
int main(int argc, char **argv) {
// Declare some Vars to use below.
Var x("x"), y("y");
// Load a grayscale image to use as an input.
Buffer<uint8_t> input = load_image("images/gray.png");
// You can define a Func in multiple passes. Let's see a toy
// example first.
// 通过多次来定义一个函数
{
// The first definition must be one like we have seen already
// - a mapping from Vars to an Expr:
Func f;
f(x, y) = x + y;
// We call this first definition the "pure" definition.
// 我们成第一次定义为pure定义
// But the later definitions can include computed expressions on
// both sides. The simplest example is modifying a single point:
// 随后的定义可以在等号两边使用表达式,最贱的的例子是修改单点的数值。
f(3, 7) = 42;
// We call these extra definitions "update" definitions, or
// "reduction" definitions. A reduction definition is an
// update definition that recursively refers back to the
// function's current value at the same site:
// 我们称这些额外的定义为“更新”定义或者“约减”定义。约减定义值得是循环或者递归地在原始数据点上
// 更新的定义。
f(x, y) = f(x, y) + 17;
// If we confine our update to a single row, we can
// recursively refer to values in the same column:
// 更新定义某一列
f(x, 3) = f(x, 0) * f(x, 10);
// Similarly, if we confine our update to a single column, we
// can recursively refer to other values in the same row.
// 更新定义某一行
f(0, y) = f(0, y) / f(3, y);
// The general rule is: Each Var used in an update definition
// must appear unadorned in the same position as in the pure
// definition in all references to the function on the left-
// and right-hand sides. So the following definitions are
// legal updates:
// 一般的规则是,用于更新的变量必须是朴素出现在同样的作为,如下是一些合法的更新
// 比如x方向的更新,x是朴素不变的
// y方向的更新,y是朴素不变的
f(x, 17) = x + 8;
f(0, y) = y * 8;
f(x, x + 1) = x + 8;
f(y/2, y) = f(0, y) * 17;
// But these ones would cause an error:
// f(x, 0) = f(x + 1, 0);
// First argument to f on the right-hand-side must be 'x', not 'x + 1'.
// f(y, y + 1) = y + 8;
// Second argument to f on the left-hand-side must be 'y', not 'y + 1'.
// f(y, x) = y - x;
// Arguments to f on the left-hand-side are in the wrong places.
// f(3, 4) = x + y;
// Free variables appear on the right-hand-side but not the left-hand-side.
// We'll realize this one just to make sure it compiles. The
// second-to-last definition forces us to realize over a
// domain that is taller than it is wide.
f.realize(100, 101);
// For each realization of f, each step runs in its entirety
// before the next one begins. Let's trace the loads and
// stores for a simpler example:
// 每步的更新都独自完成之后,才进行下一步更新
Func g("g");
g(x, y) = x + y; // Pure definition
g(2, 1) = 42; // First update definition
g(x, 0) = g(x, 1); // Second update definition
g.trace_loads();
g.trace_stores();
g.realize(4, 4);
// See figures/lesson_09_update.gif for a visualization.
// Reading the log, we see that each pass is applied in
// turn. The equivalent C is:
int result[4][4];
// Pure definition
for (int y = 0; y < 4; y++) {
for (int x = 0; x < 4; x++) {
result[y][x] = x + y;
}
}
// First update definition
result[1][2] = 42;
// Second update definition
for (int x = 0; x < 4; x++) {
result[0][x] = result[1][x];
}
}
// Putting update passes inside loops.
{
// Starting with this pure definition:
Func f;
f(x, y) = (x + y)/100.0f;
// Say we want an update that squares the first fifty rows. We
// could do this by adding 50 update definitions:
// f(x, 0) = f(x, 0) * f(x, 0);
// f(x, 1) = f(x, 1) * f(x, 1);
// f(x, 2) = f(x, 2) * f(x, 2);
// ...
// f(x, 49) = f(x, 49) * f(x, 49);
// Or equivalently using a compile-time loop in our C++:
// for (int i = 0; i < 50; i++) {
// f(x, i) = f(x, i) * f(x, i);
// }
// But it's more manageable and more flexible to put the loop
// in the generated code. We do this by defining a "reduction
// domain" and using it inside an update definition:
// 在指定区域进行更新
RDom r(0, 50);
f(x, r) = f(x, r) * f(x, r);
Buffer<float> halide_result = f.realize(100, 100);
// See figures/lesson_09_update_rdom.mp4 for a visualization.
// The equivalent C is:
float c_result[100][100];
for (int y = 0; y < 100; y++) {
for (int x = 0; x < 100; x++) {
c_result[y][x] = (x + y)/100.0f;
}
}
for (int x = 0; x < 100; x++) {
for (int r = 0; r < 50; r++) {
// The loop over the reduction domain occurs inside of
// the loop over any pure variables used in the update
// step:
c_result[r][x] = c_result[r][x] * c_result[r][x];
}
}
// Check the results match:
for (int y = 0; y < 100; y++) {
for (int x = 0; x < 100; x++) {
if (fabs(halide_result(x, y) - c_result[y][x]) > 0.01f) {
printf("halide_result(%d, %d) = %f instead of %f\n",
x, y, halide_result(x, y), c_result[y][x]);
return -1;
}
}
}
}
// Now we'll examine a real-world use for an update definition:
// computing a histogram.
{
// Some operations on images can't be cleanly expressed as a pure
// function from the output coordinates to the value stored
// there. The classic example is computing a histogram. The
// natural way to do it is to iterate over the input image,
// updating histogram buckets. Here's how you do that in Halide:
Func histogram("histogram");
// Histogram buckets start as zero.
histogram(x) = 0;
// Define a multi-dimensional reduction domain over the input image:
RDom r(0, input.width(), 0, input.height());
// For every point in the reduction domain, increment the
// histogram bucket corresponding to the intensity of the
// input image at that point.
histogram(input(r.x, r.y)) += 1;
Buffer<int> halide_result = histogram.realize(256);
// The equivalent C is:
int c_result[256];
for (int x = 0; x < 256; x++) {
c_result[x] = 0;
}
for (int r_y = 0; r_y < input.height(); r_y++) {