在BuildRelay编译relay ir形式的模型时,会调用GraphExecutorCodegen::CodeGen生成代码,该方法定义:
LoweredOutput Codegen(IRModule mod, relay::Function func, String mod_name) {
mod_name_ = mod_name;
VLOG_CONTEXT << "GraphExecutorCodegen";
VLOG(1) << "compiling:" << std::endl << PrettyPrint(func);
// TODO(mbs): Why plan memory and update workspace sizes before lowering?
// 为func分配内存
memory_plan_ = GraphPlanMemory(func);
backend::FunctionInfo func_info;
// defined()判断memory_plan_的数据是否为空,这里表示内存分配是否成功
if (memory_plan_.defined()) {
// TODO(@electriclilies, @jroesch): remove UpdateMainWorkspaceSize
// 使用新的内存分配更新mod工作空间大小
func_info =
relay::tec::UpdateMainWorkspaceSize(mod, config_, memory_plan_->expr_to_storage_info);
// 给mod加一个main_func_info属性,值为刚才更新后的函数信息
mod = WithAttr(mod, "main_func_info", func_info);
}
// 将模型的relay ir形式转换为tensor表达式形式
IRModule lowered_mod = tec::LowerTE(mod_name_, config_, [this](BaseFunc func) {
// We need to maintain the constant map for external
// functions so we pass this processing function which
// allows us to process each function as we lower it.
// 这是一个lamabda函数的函数体
// 如果传入的函数定义了attr::kCompiler属性
if (func->GetAttr<String>(attr::kCompiler).defined()) {
// 这里是使用全局的常量,来更新params_中的对应的常量的值.
UpdateConstants(func, ¶ms_);
}
// TODO(@areusch, @jroesch): We should refactor this to
// execute as a further pass, instead writing data to the
// lowering process directly.
// 更新函数的元数据
tec::UpdateFunctionMetadata(func, this->function_metadata_);
})(mod);
Optional<backend::FunctionInfo> main_func_info =
lowered_mod->GetAttr<backend::FunctionInfo>("main_func_info");
//在函数的元数据中添加__tvm_main__项,值为main_func_info的值
function_metadata_.Set(runtime::symbol::tvm_module_main, main_func_info.value());
// 从模型的张量表达中找到main函数
Function lowered_main_func = Downcast<Function>(lowered_mod->Lookup("main"));
// Now that we have lowered all operators to TIR code, we can proceed with compilation.
//
// We need to unfortunately re-plan as the previous results have been invalidated by lowering
// we will fix this in future refactors.
// 再次为模型分配内存
memory_plan_ = GraphPlanMemory(lowered_main_func);
// The graph planner also can not handle planning calls to global variables to we must remap
// First we convert all the parameters into input nodes.
//对低级化后的main的每个参数,加入var_map_表中
for (auto param : lowered_main_func->params) {
auto node_ptr = GraphInputNode::make_node_ptr(param->name_hint(), GraphAttrs());
var_map_[param.get()] = AddNode(node_ptr, param);
}
// 遍历模型函数的每个节点,将其转换化为json格式
heads_ = VisitExpr(lowered_main_func->body);
std::ostringstream os;
// 写json文件
dmlc::JSONWriter writer(&os);
GetJSON(&writer);
LoweredOutput ret;
ret.graph_json = os.str();
// Collect any runtime modules generated by external codegen.
// 收集外部代码生成器生成的运行时模块
ret.external_mods =
lowered_mod->GetAttr<Array<runtime::Module>>(tvm::attr::kExternalMods).value_or({});
// Collect any constants extracted by external codegen.
// 收集外部代码生成器提取的常量
ret.params = std::unordered_map<std::string, tvm::runtime::NDArray>();
Map<String, runtime::NDArray> const_name_to_constant =
lowered_mod->GetAttr<Map<String, runtime::NDArray>>(tvm::attr::kConstNameToConstant)
.value_or({});
for (const auto& kv : const_name_to_constant) {
VLOG(1) << "constant '" << kv.first << "' contributed by external codegen";
ICHECK(ret.params.emplace(kv.first, kv.second).second);
}
// Collect any constants extracted during lowering.
// 收集低级化时提取的常量
for (const auto& kv : params_) {
VLOG(1) << "constant '" << kv.first << "' contributed by TECompiler";
ICHECK(ret.params.emplace(kv.first, kv.second).second);
}
ret.function_metadata = std::move(function_metadata_);
// This is the point where we separate the functions in the module by target
// 按target分离device和host模块
ret.lowered_funcs = tec::GetPerTargetModules(lowered_mod);
ret.metadata =
ExecutorCodegenMetadata({} /* inputs */, {} /* input_tensor_types */, {} /* outputs */,
{} /* output_tensor_types */, {} /* pools */, {} /* devices */,
runtime::kTvmExecutorGraph /* executor */, mod_name_ /* mod_name */,
"packed" /* interface_api */, Bool(false) /* unpacked_api */);
return ret;
}
这里,我们暂时简单的理解这个流程:
1. 为代码生成分配内存,并处理分配的内存;
2. 将模型的relay ir低级化为张量表达形式;
3. 生成json形式并写文件;
4. 将模块分为device侧和host侧;
后面我们将逐一试图分析每个步骤都干了什么。