// Halide教程第五课:向量化,并行化,平铺,数据分块// 本课展示了如何才操作函数像素索引的计算顺序,包括向量化/并行化/平铺/分块等技术// 在linux系统中,采用如下指令编译并执行// g++ lesson_05*.cpp -g -I ../include -L ../bin -lHalide -lpthread -ldl -o lesson_05 -std=c++11// LD_LIBRARY_PATH=../bin ./lesson_05#include "Halide.h"#include <stdio.h>#include <algorithm>usingnamespace Halide;
int main(int argc, char **argv) {
Var x("x"), y("y");
// First we observe the default ordering.
{
Func gradient("gradient");
gradient(x, y) = x + y;
gradient.trace_stores();
//默认遍历像素的顺序是行优先,即内层循环沿着行方向,外层循环沿着列方向printf("Evaluating gradient row-major\n");
Buffer<int> output = gradient.realize(4, 4);
// The equivalent C is:printf("Equivalent C:\n");
for (int y = 0; y < 4; y++) {
for (int x = 0; x < 4; x++) {
printf("Evaluating at x = %d, y = %d: %d\n", x, y, x + y);
}
}
printf("\n\n");
// 跟踪系统调度可以很容易理解调度系统如何工作。可以通过Halide提供的函数来打印出实际工作// 是执行的哪种循环调度。printf("Pseudo-code for the schedule:\n");
gradient.print_loop_nest();
printf("\n");
// Because we're using the default ordering, it should print:// compute gradient:// for y:// for x:// gradient(...) = ...
}
// Reorder variables.
{
Func gradient("gradient_col_major");
gradient(x, y) = x + y;
gradient.trace_stores();
// 可以通过reorder函数来改变函数遍历的顺序,下面的语句将行方向(y)置于内层循环,而将原本的内层// 循环调整到了外循环。也就是说y遍历比x遍历更快。是一种列优先的遍历方法
gradient.reorder(y, x);
printf("Evaluating gradient column-major\n");
Buffer<int> output = gradient.realize(4, 4);
printf("Equivalent C:\n");
for (int x = 0; x < 4; x++) {
for (int y = 0; y < 4; y++) {
printf("Evaluating at x = %d, y = %d: %d\n", x, y, x + y);
}
}
printf("\n");
// printf("Pseudo-code for the schedule:\n");
gradient.print_loop_nest();
printf("\n");
}
// Split a variable into two.
{
Func gradient("gradient_split");
gradient(x, y) = x + y;
gradient.trace_stores();
// 原始调度中,最有效的就是split调度了,它将一个大循环,拆解成一个外部循环和一个内部循环;// 即,将x方向的循环,拆成一个外部循环x_outer和一个内部循环x_inner// 下面的split将x拆成x_outer,x_inner, 内循环的长度为2
Var x_outer, x_inner;
gradient.split(x, x_outer, x_inner, 2);
printf("Evaluating gradient with x split into x_outer and x_inner \n");
Buffer<int> output = gradient.realize(4, 4);
printf("Equivalent C:\n");
for (int y = 0; y < 4; y++) {
for (int x_outer = 0; x_outer < 2; x_outer++) {
for (int x_inner = 0; x_inner < 2; x_inner++) {
int x = x_outer * 2 + x_inner;
printf("Evaluating at x = %d, y = %d: %d\n", x, y, x + y);
}
}
}
printf("\n");
printf("Pseudo-code for the schedule:\n");
gradient.print_loop_nest();
printf("\n");
}
// Fuse two variables into one.
{
Func gradient("gradient_fused");
gradient(x, y) = x + y;
// 和split相反的是fuse,它将两个变量融合成一个变量,fuse的重要性并没有split高。
Var fused;
gradient.fuse(x, y, fused);
printf("Evaluating gradient with x and y fused\n");
Buffer<int> output = gradient.reali