在CompilerDriver的CompileMethod()方法中,可以看到调用了Compile()方法,而这个方法是由dex2oat的执行参数判断由QuickCompiler或OptimizationCompiler执行,本节主要分析–compiler-backend=Optimizing的情况,即调用OptimizationCompiler的Compile()方法。
OptimizationCompiler的实现源码位于art/compiler/optimizing/optimizing_compiler.cc
OptimizingCompiler::Compile()
在这个方法中,首先会判断是否校验成功,成功的话继续调用TryCompile()方法。
CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item,
uint32_t access_flags,
InvokeType invoke_type,
uint16_t class_def_idx,
uint32_t method_idx,
jobject jclass_loader,
const DexFile& dex_file) const {
CompilerDriver* compiler_driver = GetCompilerDriver();
CompiledMethod* method = nullptr;
if (compiler_driver->IsMethodVerifiedWithoutFailures(method_idx, class_def_idx, dex_file) &&
!compiler_driver->GetVerifiedMethod(&dex_file, method_idx)->HasRuntimeThrow()) {
method = TryCompile(code_item, access_flags, invoke_type, class_def_idx,
method_idx, jclass_loader, dex_file);
} else {
if (compiler_driver->GetCompilerOptions().VerifyAtRuntime()) {
MaybeRecordStat(MethodCompilationStat::kNotCompiledVerifyAtRuntime);
} else {
MaybeRecordStat(MethodCompilationStat::kNotCompiledClassNotVerified);
}
}
if (method != nullptr) {
return method;
}
method = delegate_->Compile(code_item, access_flags, invoke_type, class_def_idx, method_idx,
jclass_loader, dex_file);
if (method != nullptr) {
MaybeRecordStat(MethodCompilationStat::kCompiledQuick);
}
return method;
}
OptimizingCompiler::TryCompile()
TryCompile(),顾名思义,就是试着编译,如果编译成功就编译,编译不成功就调用其他方式编译。在这个方法中,会根据各种参数判断是否需要优化,
CompiledMethod* OptimizingCompiler::TryCompile(const DexFile::CodeItem* code_item,
uint32_t access_flags,
InvokeType invoke_type,
uint16_t class_def_idx,
uint32_t method_idx,
jobject class_loader,
const DexFile& dex_file) const {
UNUSED(invoke_type);
std::string method_name = PrettyMethod(method_idx, dex_file);
MaybeRecordStat(MethodCompilationStat::kAttemptCompilation);
CompilerDriver* compiler_driver = GetCompilerDriver();
InstructionSet instruction_set = compiler_driver->GetInstructionSet();
// Always use the thumb2 assembler: some runtime functionality (like implicit stack
// overflow checks) assume thumb2.
if (instruction_set == kArm) {
instruction_set = kThumb2;
}
// `run_optimizations_` is set explicitly (either through a compiler filter
// or the debuggable flag). If it is set, we can run baseline. Otherwise, we
// fall back to Quick.
bool should_use_baseline = !run_optimizations_;
bool can_optimize = CanOptimize(*code_item);
if (!can_optimize && !should_use_baseline) {
// We know we will not compile this method. Bail out before doing any work.
return nullptr;
}
// Do not attempt to compile on architectures we do not support.
if (!IsInstructionSetSupported(instruction_set)) {
MaybeRecordStat(MethodCompilationStat::kNotCompiledUnsupportedIsa);
return nullptr;
}
if (Compiler::IsPathologicalCase(*code_item, method_idx, dex_file)) {
MaybeRecordStat(MethodCompilationStat::kNotCompiledPathological);
return nullptr;
}
// Implementation of the space filter: do not compile a code item whose size in
// code units is bigger than 128.
static constexpr size_t kSpaceFilterOptimizingThreshold = 128;
const CompilerOptions& compiler_options = compiler_driver->GetCompilerOptions();
if ((compiler_options.GetCompilerFilter() == CompilerOptions::kSpace)
&& (code_item->insns_size_in_code_units_ > kSpaceFilterOptimizingThreshold)) {
MaybeRecordStat(MethodCompilationStat::kNotCompiledSpaceFilter);
return nullptr;
}
DexCompilationUnit dex_compilation_unit(
nullptr, class_loader, art::Runtime::Current()->GetClassLinker(), dex_file, code_item,
class_def_idx, method_idx, access_flags,
compiler_driver->GetVerifiedMethod(&dex_file, method_idx));
ArenaAllocator arena(Runtime::Current()->GetArenaPool());
之后构造一个HGraph对象
HGraph* graph = new (&arena) HGraph(
&arena, dex_file, method_idx, compiler_driver->GetInstructionSet(),
compiler_driver->GetCompilerOptions().GetDebuggable());
// For testing purposes, we put a special marker on method names that should be compiled
// with this compiler. This makes sure we're not regressing.
bool shouldCompile = method_name.find("$opt$") != std::string::npos;
bool shouldOptimize = method_name.find("$opt$reg$") != std::string::npos && run_optimizations_;
std::unique_ptr<CodeGenerator> codegen(
CodeGenerator::Create(graph,
instruction_set,
*compiler_driver->GetInstructionSetFeatures(),
compiler_driver->GetCompilerOptions()));
if (codegen.get() == nullptr) {
CHECK(!shouldCompile) << "Could not find code generator for optimizing compiler";
MaybeRecordStat(MethodCompilationStat::kNotCompiledNoCodegen);
return nullptr;
}
codegen->GetAssembler()->cfi().SetEnabled(
compiler_driver->GetCompilerOptions().GetGenerateDebugInfo());
PassInfoPrinter pass_info_printer(graph,
method_name.c_str(),
*codegen.get(),
visualizer_output_.get(),
compiler_driver);
构造一个HGraphBuilder对象,然后调用HGraphBuilder的BuildGraph方法构造这个图。
HGraphBuilder builder(graph,
&dex_compilation_unit,
&dex_compilation_unit,
&dex_file,
compiler_driver,
compilation_stats_.get());
VLOG(compiler) << "Building " << method_name;
{
PassInfo pass_info(HGraphBuilder::kBuilderPassName, &pass_info_printer);
if (!builder.BuildGraph(*code_item)) {
DCHECK(!(IsCompilingWithCoreImage() && shouldCompile))
<< "Could not build graph in optimizing compiler";
return nullptr;
}
}
尝试对HGraph构造SSA。
bool can_allocate_registers = RegisterAllocator::CanAllocateRegistersFor(*graph, instruction_set);
if (run_optimizations_ && can_optimize && can_allocate_registers) {
VLOG(compiler) << "Optimizing " << method_name;
{
PassInfo pass_info(SsaBuilder::kSsaBuilderPassName, &pass_info_printer);
if (!graph->TryBuildingSsa()) {
// We could not transform the graph to SSA, bailout.
LOG(INFO) << "Skipping compilation of " << method_name << ": it contains a non natural loop";
MaybeRecordStat(MethodCompilationStat::kNotCompiledCannotBuildSSA);
return nullptr;
}
}
return CompileOptimized(graph,
codegen.get(),
compiler_driver,
dex_file,
dex_compilation_unit,
&pass_info_printer);
} else if (shouldOptimize && can_allocate_registers) {
LOG(FATAL) << "Could not allocate registers in optimizing compiler";
UNREACHABLE();
下面是不优化的情况:
} else if (should_use_baseline) {
VLOG(compiler) << "Compile baseline " << method_name;
if (!run_optimizations_) {
MaybeRecordStat(MethodCompilationStat::kNotOptimizedDisabled);
} else if (!can_optimize) {
MaybeRecordStat(MethodCompilationStat::kNotOptimizedTryCatch);
} else if (!can_allocate_registers) {
MaybeRecordStat(MethodCompilationStat::kNotOptimizedRegisterAllocator);
}
return CompileBaseline(codegen.get(), compiler_driver, dex_compilation_unit);
} else {
return nullptr;
}
}
从上边代码可以看到,优化的部分调用了CompileOptimized()方法。
OptimizingCompiler::CompileOptimized()
CompileOptimized()主要完成的是优化编译,这里包含一个很重要的方法——RunOptimizations(graph, compiler_driver, compilation_stats_.get(),dex_file, dex_compilation_unit, pass_info_printer, &handles);
优化编译过程首先在在RunOptimizations()方法完成15轮优化,之后调用AllocateRegisters(graph, codegen, pass_info_printer)完成最后一轮优化。
CompiledMethod* OptimizingCompiler::CompileOptimized(HGraph* graph,
CodeGenerator* codegen,
CompilerDriver* compiler_driver,
const DexFile& dex_file,
const DexCompilationUnit& dex_compilation_unit,
PassInfoPrinter* pass_info_printer) const {
StackHandleScopeCollection handles(Thread::Current());
RunOptimizations(graph, compiler_driver, compilation_stats_.get(),
dex_file, dex_compilation_unit, pass_info_printer, &handles);
AllocateRegisters(graph, codegen, pass_info_printer);
CodeVectorAllocator allocator;
codegen->CompileOptimized(&allocator);
DefaultSrcMap src_mapping_table;
if (compiler_driver->GetCompilerOptions().GetGenerateDebugInfo()) {
codegen->BuildSourceMap(&src_mapping_table);
}
std::vector<uint8_t> stack_map;
codegen->BuildStackMaps(&stack_map);
MaybeRecordStat(MethodCompilationStat::kCompiledOptimized);
return CompiledMethod::SwapAllocCompiledMethod(
compiler_driver,
codegen->GetInstructionSet(),
ArrayRef<const uint8_t>(allocator.GetMemory()),
// Follow Quick's behavior and set the frame size to zero if it is
// considered "empty" (see the definition of
// art::CodeGenerator::HasEmptyFrame).
codegen->HasEmptyFrame() ? 0 : codegen->GetFrameSize(),
codegen->GetCoreSpillMask(),
codegen->GetFpuSpillMask(),
&src_mapping_table,
ArrayRef<const uint8_t>(), // mapping_table.
ArrayRef<const uint8_t>(stack_map),
ArrayRef<const uint8_t>(), // native_gc_map.
ArrayRef<const uint8_t>(*codegen->GetAssembler()->cfi().data()),
ArrayRef<const LinkerPatch>());
}
static void RunOptimizations()
RunOptimizations()完成了一共15轮优化,分别为
- IntrinsicsRecognizer
- 第一次HConstantFolding
- 第二次InstructionSimplifier
- HDeadCodeElimination
- HInliner
- HBooleanSimplifier
- 第二次HConstantFolding
- SideEffectsAnalysis
- GVNOptimization
- LICM
- BoundsCheckElimination
- ReferenceTypePropagation
- 第二次InstructionSimplifier
- 第二次HDeadCodeElimination
- 第三次InstructionSimplifier
static void RunOptimizations(HOptimization* optimizations[],
size_t length,
PassInfoPrinter* pass_info_printer) {
for (size_t i = 0; i < length; ++i) {
HOptimization* optimization = optimizations[i];
{
PassInfo pass_info(optimization->GetPassName(), pass_info_printer);
optimization->Run();
}
optimization->Check();
}
}
static void RunOptimizations(HGraph* graph,
CompilerDriver* driver,
OptimizingCompilerStats* stats,
const DexFile& dex_file,
const DexCompilationUnit& dex_compilation_unit,
PassInfoPrinter* pass_info_printer,
StackHandleScopeCollection* handles) {
HDeadCodeElimination dce1(graph, stats,
HDeadCodeElimination::kInitialDeadCodeEliminationPassName);
HDeadCodeElimination dce2(graph, stats,
HDeadCodeElimination::kFinalDeadCodeEliminationPassName);
HConstantFolding fold1(graph);
InstructionSimplifier simplify1(graph, stats);
HBooleanSimplifier boolean_simplify(graph);
HInliner inliner(graph, dex_compilation_unit, dex_compilation_unit, driver, stats);
HConstantFolding fold2(graph, "constant_folding_after_inlining");
SideEffectsAnalysis side_effects(graph);
GVNOptimization gvn(graph, side_effects);
LICM licm(graph, side_effects);
BoundsCheckElimination bce(graph);
ReferenceTypePropagation type_propagation(graph, dex_file, dex_compilation_unit, handles);
InstructionSimplifier simplify2(graph, stats, "instruction_simplifier_after_types");
InstructionSimplifier simplify3(graph, stats, "instruction_simplifier_before_codegen");
IntrinsicsRecognizer intrinsics(graph, dex_compilation_unit.GetDexFile(), driver);
HOptimization* optimizations[] = {
&intrinsics,
&fold1,
&simplify1,
&dce1,
&inliner,
// BooleanSimplifier depends on the InstructionSimplifier removing redundant
// suspend checks to recognize empty blocks.
&boolean_simplify,
&fold2,
&side_effects,
&gvn,
&licm,
&bce,
&type_propagation,
&simplify2,
&dce2,
// The codegen has a few assumptions that only the instruction simplifier can
// satisfy. For example, the code generator does not expect to see a
// HTypeConversion from a type to the same type.
&simplify3,
};
RunOptimizations(optimizations, arraysize(optimizations), pass_info_printer);
}