【hotspot】执行字节码指令流程(4)：编译器C1/C2

最新推荐文章于 2024-03-29 08:00:58 发布

0x13

最新推荐文章于 2024-03-29 08:00:58 发布

阅读量434

点赞数

分类专栏： HotSpot JVM 源码分析文章标签： jvm java 算法

本文链接：https://blog.csdn.net/qq_34448345/article/details/130360801

版权

HotSpot JVM 源码分析专栏收录该内容

12 篇文章 4 订阅

订阅专栏

1.编译器初始化以及编译入口

首先看 JIT 的初始化，是在 JVM 初始化时 threads::create_vm() 调用 CompileBroker::compilation_init() 创建编译器线程：

jint Threads::create_vm(JavaVMInitArgs* args, bool* canTryAgain) {
  // 省略
  // initialize compiler(s)
#if defined(COMPILER1) || defined(COMPILER2) || defined(SHARK)
  CompileBroker::compilation_init();
#endif
  // 省略
  return JNI_OK;
}

编译器管理器 CompileBroker 就负载编译器的初始化，管理等等，根据注释 The broker for all compilation requests 就能看出他负责所有编译请求的处理定义如下：

class CompileBroker: AllStatic {
  // 省略
private:
  static bool _initialized;
  static volatile bool _should_block; // CompileBroker状态
  static volatile jint _should_compile_new_jobs; // 打标用，用来表示是否停止编译或者开启编译
  static AbstractCompiler* _compilers[2]; // 度固定为2，保存编译器实例
  static volatile jint _compilation_id; // 临时保存的编译ID
  static volatile jint _osr_compilation_id;
  static int  _last_compile_type; // 上一次编译的编译类型，是一个枚举值，no_compile, normal_compile, osr_compile, native_compile
  static int  _last_compile_level; // 上一次的编译级别
  static char _last_method_compiled[name_buffer_length]; // 上一次编译的方法名
  static CompileQueue* _c2_compile_queue; // C1和C2的编译任务队列
  static CompileQueue* _c1_compile_queue; // C1和C2的编译任务队列
  static GrowableArray<CompilerThread*>* _compiler_threads; // 编译线程数组
};

调用 CompileBroker::compilation_inint() 进行编译器初始化，会根据宏定义初始化 C1/C2 编译器，并初始化编译器线程和计数器等等。


void CompileBroker::compilation_init() {
  _last_method_compiled[0] = '\0';
  // 如果不使用编译,编译器保存在 _compilers 中
  if (!UseCompiler) {
    return;
  }
#ifndef SHARK
  // Set the interface to the current compiler(s).
  int c1_count = CompilationPolicy::policy()->compiler_count(CompLevel_simple);
  int c2_count = CompilationPolicy::policy()->compiler_count(CompLevel_full_optimization);
#ifdef COMPILER1 C1
  if (c1_count > 0) {
    _compilers[0] = new Compiler();
  }
#endif // COMPILER1
#ifdef COMPILER2 C2
  if (c2_count > 0) {
    _compilers[1] = new C2Compiler();
  }
#endif // COMPILER2
#else // SHARK SharkCompiler就是新的基于LLVM架构的编译器
  int c1_count = 0;
  int c2_count = 1;
  _compilers[1] = new SharkCompiler();
#endif // SHARK
  // 初始化CompilerThreads 和 CompileQueue
  init_compiler_threads(c1_count, c2_count);
  // totalTime performance counter is always created as it is required
  // by the implementation of java.lang.management.CompilationMBean.
  // 初始化性能统计相关计数器
  {
    EXCEPTION_MARK;
    _perf_total_compilation = PerfDataManager::create_counter(JAVA_CI, "totalTime",PerfData::U_Ticks, CHECK);
  }
  // 编译器编译一个Java方法会有很多步骤,下面是对这些步骤的性能统计
  if (UsePerfData) {
    EXCEPTION_MARK;
    // create the jvmstat performance counters
    _perf_osr_compilation = PerfDataManager::create_counter(SUN_CI, "osrTime",PerfData::U_Ticks, CHECK);
    _perf_standard_compilation = PerfDataManager::create_counter(SUN_CI, "standardTime",PerfData::U_Ticks, CHECK);
    _perf_total_bailout_count = PerfDataManager::create_counter(SUN_CI, "totalBailouts",PerfData::U_Events, CHECK);
    _perf_total_invalidated_count = PerfDataManager::create_counter(SUN_CI, "totalInvalidates",PerfData::U_Events, CHECK);
    _perf_total_compile_count = PerfDataManager::create_counter(SUN_CI, "totalCompiles",PerfData::U_Events, CHECK);
    _perf_total_osr_compile_count = PerfDataManager::create_counter(SUN_CI, "osrCompiles",PerfData::U_Events, CHECK);
    _perf_total_standard_compile_count = PerfDataManager::create_counter(SUN_CI, "standardCompiles",PerfData::U_Events, CHECK);
    _perf_sum_osr_bytes_compiled = PerfDataManager::create_counter(SUN_CI, "osrBytes",PerfData::U_Bytes, CHECK);
    _perf_sum_standard_bytes_compiled = PerfDataManager::create_counter(SUN_CI, "standardBytes",PerfData::U_Bytes, CHECK);
    _perf_sum_nmethod_size = PerfDataManager::create_counter(SUN_CI, "nmethodSize",PerfData::U_Bytes, CHECK);
    _perf_sum_nmethod_code_size = PerfDataManager::create_counter(SUN_CI, "nmethodCodeSize",PerfData::U_Bytes, CHECK);
    _perf_last_method = PerfDataManager::create_string_variable(SUN_CI, "lastMethod",CompilerCounters::cmname_buffer_length,"", CHECK);
    _perf_last_failed_method = PerfDataManager::create_string_variable(SUN_CI, "lastFailedMethod",CompilerCounters::cmname_buffer_length,"", CHECK);
    _perf_last_invalidated_method = PerfDataManager::create_string_variable(SUN_CI, "lastInvalidatedMethod",CompilerCounters::cmname_buffer_length,"", CHECK);
    _perf_last_compile_type = PerfDataManager::create_variable(SUN_CI, "lastType",PerfData::U_None,(jlong)CompileBroker::no_compile,CHECK);
    _perf_last_compile_size = PerfDataManager::create_variable(SUN_CI, "lastSize",PerfData::U_Bytes,(jlong)CompileBroker::no_compile,CHECK);
    _perf_last_failed_type = PerfDataManager::create_variable(SUN_CI, "lastFailedType",PerfData::U_None,(jlong)CompileBroker::no_compile,CHECK);
    _perf_last_invalidated_type = PerfDataManager::create_variable(SUN_CI, "lastInvalidatedType",PerfData::U_None,(jlong)CompileBroker::no_compile,CHECK);
  }
  _initialized = true;
}

接下来初始化编译队列，C2 CompileQueue、C1 CompileQueue，并创建 compiler_count 个编译线程：

CompileBroker::init_compiler_threads()

void CompileBroker::init_compiler_threads(int c1_compiler_count, int c2_compiler_count) {
  EXCEPTION_MARK;
#if !defined(ZERO) && !defined(SHARK)
  assert(c2_compiler_count > 0 || c1_compiler_count > 0, "No compilers?");
#endif // !ZERO && !SHARK
  // 设置C1队列
  if (c2_compiler_count > 0) {
    _c2_compile_queue  = new CompileQueue("C2 CompileQueue",  MethodCompileQueue_lock);
    _compilers[1]->set_num_compiler_threads(c2_compiler_count);
  }
  // 设置C2队列
  if (c1_compiler_count > 0) {
    _c1_compile_queue  = new CompileQueue("C1 CompileQueue",  MethodCompileQueue_lock);
    _compilers[0]->set_num_compiler_threads(c1_compiler_count);
  }
  // 编译器线程总数
  int compiler_count = c1_compiler_count + c2_compiler_count;
  _compiler_threads = new (ResourceObj::C_HEAP, mtCompiler) GrowableArray<CompilerThread*>(compiler_count, true);
  char name_buffer[256];
  // 启动C2编译线程
  for (int i = 0; i < c2_compiler_count; i++) {
    sprintf(name_buffer, "C2 CompilerThread%d", i);
    CompilerCounters* counters = new CompilerCounters("compilerThread", i, CHECK);
    CompilerThread* new_thread = make_compiler_thread(name_buffer, _c2_compile_queue, counters, _compilers[1], CHECK);
    _compiler_threads->append(new_thread);
  }
  // 启动C1编译线程
  for (int i = c2_compiler_count; i < compiler_count; i++) {
    sprintf(name_buffer, "C1 CompilerThread%d", i);
    CompilerCounters* counters = new CompilerCounters("compilerThread", i, CHECK);
    CompilerThread* new_thread = make_compiler_thread(name_buffer, _c1_compile_queue, counters, _compilers[0], CHECK);
    _compiler_threads->append(new_thread);
  }
  if (UsePerfData) {
    PerfDataManager::create_constant(SUN_CI, "threads", PerfData::U_Bytes, compiler_count, CHECK);
  }
}

CompileBroker::make_compiler_thread() 创建编译线程：

CompilerThread* CompileBroker::make_compiler_thread(const char* name, CompileQueue* queue, CompilerCounters* counters,AbstractCompiler* comp, TRAPS) {
  CompilerThread* compiler_thread = NULL;
  // 获取java.lang.Thread对应的Klass
  Klass* k = SystemDictionary::resolve_or_fail(vmSymbols::java_lang_Thread(),true, CHECK_0);
  instanceKlassHandle klass (THREAD, k);
  instanceHandle thread_oop = klass->allocate_instance_handle(CHECK_0);
  // 创建一个java.lang.String 对象
  Handle string = java_lang_String::create_from_str(name, CHECK_0);
  // 获取system threadGroup 对应的Handle
  Handle thread_group (THREAD,  Universe::system_thread_group());
  JavaValue result(T_VOID);
  // 实际调用Thread(ThreadGroup group, String name)构造方法创建一个新的Thread实例，实例保存到thread_oop
  JavaCalls::call_special(&result, thread_oop,klass,vmSymbols::object_initializer_name(),vmSymbols::threadgroup_string_void_signature(),thread_group,string,CHECK_0);
  {
    MutexLocker mu(Threads_lock, THREAD);
    // 创建一个新的 CompilerThread
    compiler_thread = new CompilerThread(queue, counters);
    if (compiler_thread == NULL || compiler_thread->osthread() == NULL){
      vm_exit_during_initialization("java.lang.OutOfMemoryError","unable to create new native thread");
    }
    // 将新创建的compiler_thread同Java Thread实例关联
    java_lang_Thread::set_thread(thread_oop(), compiler_thread);
    java_lang_Thread::set_priority(thread_oop(), NearMaxPriority);
    int native_prio = CompilerThreadPriority;
    if (native_prio == -1) {
      if (UseCriticalCompilerThreadPriority) {
        native_prio = os::java_to_os_priority[CriticalPriority];
      } else {
        native_prio = os::java_to_os_priority[NearMaxPriority];
      }
    }
    os::set_native_priority(compiler_thread, native_prio);
    java_lang_Thread::set_daemon(thread_oop());
    compiler_thread->set_threadObj(thread_oop());
    compiler_thread->set_compiler(comp);
    Threads::add(compiler_thread);
    Thread::start(compiler_thread);
  }
  os::yield(); // make sure that the compiler thread is started early (especially helpful on SOLARIS)
  return compiler_thread;
}

其中编译线程启动后会执行 CompileBroker::compiler_thread_loop() 方法，循环从编译队列中取出编译任务进行编译。

CompileBroker::compiler_thread_loop()

void CompileBroker::compiler_thread_loop() {
  CompilerThread* thread = CompilerThread::current();
  CompileQueue* queue = thread->queue();
  // 编译过程会通过Area分配内存，通过ResourceMark来管理分配过程
  ResourceMark rm;
  // 省略
  // 只有编译器初始化时因为CodeCache内存不足导致初始化失败这一种情形，编译会被禁用
  // 编译器初始化完成后，在JVM整个的运行期，编译器就会不断从编译任务队列中拉取新的编译任务
  // 如果没有禁用编译，初始化正常，该方法永远返回false
  while (!is_compilation_disabled_forever()) {
    HandleMark hm(thread);
    // 如果分配CodeCache剩余内存不足了,打印日志或者警告，清理CodeCache
    if (CodeCache::unallocated_capacity() < CodeCacheMinimumFreeSpace) {
      handle_full_code_cache();
    }
    // 从编译队列中获取一个新的任务，如果没有新的编译任务，则get方法等待5s
    CompileTask* task = queue->get();
    if (task == NULL) {
      continue;
    }
    // Give compiler threads an extra quanta.  They tend to be bursty and
    // this helps the compiler to finish up the job.
    if( CompilerThreadHintNoPreempt )
      os::hint_no_preempt();
    CompilerCounters* counters = ((CompilerThread*)thread)->counters();
    PerfTraceTimedEvent(counters->time_counter(), counters->compile_counter());
    // 通过CompileTaskWrapper的构造方法将task置为当前准备编译的task
    CompileTaskWrapper ctw(task);
    // 初始化nmethodLocker，nmethodLocker中的nmethod是在编译完成后根据编译结果创建的
    nmethodLocker result_handle;  // (handle for the nmethod produced by this task)
    task->set_code_handle(&result_handle);
    methodHandle method(thread, task->method());
    // 被编译方法不能有断点
    if (method()->number_of_breakpoints() == 0) {
      // 执行编译.
      if ((UseCompiler || AlwaysCompileLoopMethods) && CompileBroker::should_compile_new_jobs()) {
        invoke_compiler_on_method(task);
      } else {
        method->clear_queued_for_compilation();
        task->set_failure_reason("compilation is disabled");
      }
    }
  }
  // 禁用编译,这里线程退出
  shutdown_compiler_runtime(thread->compiler(), thread);
}

到了这一步就进入了编译器的编译入口函数了，调试从这里进去即可，会根据编译策略获取对应的编译器被调用编译器的编译方法：

CompileBroker::invoke_compiler_on_method(task) => 编译任务执行入口

Compiler::compile_method() => 获取编译器，调用编译器的编译方法

代码片段如下：

void CompileBroker::invoke_compiler_on_method(CompileTask* task) {

// 获取编译器,并执行编译

AbstractCompiler *comp = compiler(task_level);

if (comp == NULL) {

ci_env.record_method_not_compilable("no compiler", !TieredCompilation);

} else {

comp->compile_method(&ci_env, target, osr_bci);

}

// 其他省略

}

最终调用不同的编译器：

其中C1用于客户端模，需要快速响应用户请求，编译速度块，资源占用少，产出的代码性能适中。

C2用于服务端模式，适用于长期运行的服务端程序，在编译上花更多时间使用更激进的优化以提高全局性能。

2. C1 编译器

C1编译器流程分为三步：

build_hir() 构造hir，生成控制流图、基本块用于处理控制流、数据流。

emit_lir() 构造lir，为指令操作数分配虚拟寄存器，根据变量存活周期映射到物理寄存器。

emit_code_body() 生成机器代码，将编译好的机器码存储到内存缓冲区。

IR，也就是中间代码（Intermediate Representation，有时也称 Intermediate Code，IC），它是编译器中很重要的一种数据结构。编译器在做完前端工作以后，首先就是生成 IR，并在此基础上执行各种优化算法，最后再生成目标代码。基于这种从高到低的抽象层次，IR 可以归结为 HIR、MIR 和 LIR 三类。

HIR：基于源语言做一些分析和变换

MIR：独立于源语言和 CPU 架构做分析和优化

LIR：依赖于 CPU 架构做优化和代码生成

在 Compilation 构造函数中会进入 Compilation::compile_java_method() 这个就是 C1 编译器：

int Compilation::compile_java_method() {
   // 省略
  {
    PhaseTraceTime timeit(_t_buildIR);
    build_hir();
  }
  if (BailoutAfterHIR) {
    BAILOUT_("Bailing out because of -XX:+BailoutAfterHIR", no_frame_size);
  }
  {
    PhaseTraceTime timeit(_t_emit_lir);
    _frame_map = new FrameMap(method(), hir()->number_of_locks(), MAX2(4, hir()->max_stack()));
    emit_lir();
  }
  CHECK_BAILOUT_(no_frame_size);
  {
    PhaseTraceTime timeit(_t_codeemit);
    return emit_code_body();
  }
}

构造HIR：

这一步用于将字节码得到高级中间表示HIR，HIR是由基本块构造的控制流图，内部是SSA形式的指令序列并进行一系列优化。对于编译器优化来说构造中间表示IR是第一步。C1的HIR类似《Advanced Compiler Design and Implementation》中的基于静态单赋值形式（SSA）的MIR。每个块的SSA以控制流图（CFG）的形式组合在一起。代码如下：

void Compilation::build_hir() {


  // 省略
  // 构造hir
  _hir = new IR(this, method(), osr_bci());
  if (UseC1Optimizations) {
    // 优化,条件表达式消除、基本块消除
    _hir->optimize_blocks();
  }
    // 省略
   // 全局值编号优化
  if (UseGlobalValueNumbering) {
    // No resource mark here! LoopInvariantCodeMotion can allocate ValueStack objects.
    int instructions = Instruction::number_of_instructions();
    GlobalValueNumbering gvn(_hir);
    assert(instructions == Instruction::number_of_instructions(),"shouldn't have created an instructions");
  }
    // 省略
   // 范围检查消除优化
  if (RangeCheckElimination) {
    if (_hir->osr_entry() == NULL) {
      PhaseTraceTime timeit(_t_rangeCheckElimination);
      RangeCheckElimination::eliminate(_hir);
    }
  }
    // 省略
    // NULL检查消除优化
  if (UseC1Optimizations) {
    NEEDS_CLEANUP
    PhaseTraceTime timeit(_t_optimize_null_checks);
    _hir->eliminate_null_checks();
  }
  // 省略
}

其中构建控制流图CFG的流程是：

1. 找开始指令：找到所有块开始指令（leader），给所有leader指令建立基本块（这就是CFG的节点）。（识别leader指令有两种方法，一种是根据标号label，另一种是根据跳转、分支指令的目标。字节码没有标号，只能用后一种方法。）

2. 找结尾指令：找到所有结尾指令，并根据结尾指令添加基本块之间跳转的边（这就是CFG的边）。

Compilation::build_hir =>

IR::IR构造函数 =>

IRScope::IRScope构造函数 =>

IRScope::build_graph

接下来是将HIR高级表示转为LIR低级表示，同时将虚拟寄存器映射到物理寄存器，Compilation::emit_lir()：

void Compilation::emit_lir() {
  CHECK_BAILOUT();
  LIRGenerator gen(this, method());  {
    PhaseTraceTime timeit(_t_lirGeneration);
    hir()->iterate_linear_scan_order(&gen);
  }
  CHECK_BAILOUT();  {
    PhaseTraceTime timeit(_t_linearScan);
    LinearScan* allocator = new LinearScan(hir(), &gen, frame_map());
    set_allocator(allocator);
    // 线性扫描算法，赋值物理寄存器给LIR操作数
    allocator->do_linear_scan();
    CHECK_BAILOUT();
    _max_spills = allocator->max_spills();
  }
  if (BailoutAfterLIR) {
    if (PrintLIR && !bailed_out()) {
      print_LIR(hir()->code());
    }
    bailout("Bailing out because of -XX:+BailoutAfterLIR");
  }
}

最后是生成机器代码并缓存，Compilation::emit_code_body()：

Compilation::emit_code_body()    =>    生成机器代码
lir_asm.emit_code(hir()->code())    =>    
LIR_Assembler::emit_block()    =>    
LIR_Assembler::emit_lir_list()    =>    
Compilation::install_code()    =>    代码安装
ciEnv::register_method()    =>    调用 set_code()，修改 entry_point，替换方法。
int Compilation::emit_code_body() {
  // C1_MacroAssembler继承自MacroAssembler
  if (!setup_code_buffer(code(), allocator()->num_calls())) {
    BAILOUT_("size requested greater than avail code buffer size", 0);
  }
  code()->initialize_oop_recorder(env()->oop_recorder());
  _masm = new C1_MacroAssembler(code());
  _masm->set_oop_recorder(env()->oop_recorder());
  LIR_Assembler lir_asm(this);
  lir_asm.emit_code(hir()->code());
  CHECK_BAILOUT_(0);
  emit_code_epilog(&lir_asm);
  CHECK_BAILOUT_(0);
  generate_exception_handler_table();
#ifndef PRODUCT
  if (PrintExceptionHandlers && Verbose) {
    exception_handler_table()->print();
  }
#endif /* PRODUCT */
  return frame_map()->framesize();
}

优化后的代码存储到CodeBuffer中。编译完成以后的代码对应着一个nmethod。在最后阶段install_code将进行栈上替换。编译过程可以，通过 -XX:+PrintIR 、-XX:+PrintLIRWithAssembly 输出每一个步骤的HIR。通过 -XX:+PrintCFGToFile 参数生成控制流图 .cfg 文件。其中buntu，HotSpot使用C2编译器，而PrintCFGToFile仅应用于C1。默认情况可能没有输出。所以解决方案是： java -XX:+PrintCFGToFile -XX:+TieredCompilation Test。

3.C2 编译器

C2编译器比较复杂，总体来说时字节码解析、构造理想图、机器无关优化、代码生成（指令选择/全局代码提出/指令调度/寄存器分配/窥孔优化/生成机器代码）、代码安装。整体架构如图：

C2即时编译器首先分析字节码并生成中间表示Ideal图，中间表示是一种SSA(Single Static Assignment)的中间表示Ideal图，所有优化和代码产生都是基于它

接着进行平台无关优化并生成平台相关的 MachNode图:最后进行平台相关优化，包括指令选择、代码重排、寄存器分配、窥孔优化，直至输出目标机器代码。

C2即时编译器采用了激进乐观的优化技术，并采用传统编译器许多的编译优化手段，如内联优化、全局值编码(GVN)、图着色的寄存器分配、BURS指令选择算法等。

C2即时编译器在构建中间表示过程中需要两次遍历热方法的字节码指令:第一遍用来确定基本块(Basic Block，BB)，同时判断出BB之间的前后关系:

第二遍才为每个BB块进行构建中间表示的 Ideal图。在构建中间表示过程中遵循必须先处理前驱的原则，这就可以充分利用到类型的传播信息。

在构建过程中，还进行不断的多次循环优化，如死循环消除、常数传播、冗余代码消除等。

最后阶段是进行指令选择，C2是基于确定有限状态机生成器（DFA）匹配最优的指令和操作数。

它一般通过指令的属性，如指令的访存代价、流水线结构等众多属性分析每种指令的优劣并作出最优匹配选择:

接着进行机器平台相关的优化，如寄存器分配、窥孔优化等，直至最后生成热方法的本地机器代码。关于编译器知识见《高级编译器设计与实现》。

IdealGraphVisualizer是研究C2的实用工具之一可以查看理想图的各个node。

总体上用到的一些传统编译器的技术：

* SSA形式的中间表现（intermediate representation）

* GVN（global value numbering）

* CSE（common sub-expression elimination）

* CCP（conditional constant propagation）

* Constant folding

* DCE（dead code elimination）

* Alias analysis

* LICM（loop-invariant code motion）

* Loop unrolling

* Loop peeling

* Escape analysis

* Scalar replacement / SRoA（Scalar Replacement of Aggregate）

* BURS（bottom-up rewrite system）

* Code Scheduling: Global Code Motion (GCM) / Local Code Motion (LCM)

* Graph-coloring register allocator

* Peephole optimization

代码入口：

C2Compiler::compile_method() =>

Compile::Compile() => 进入C2编译器

Compile::Compile( ciEnv* ci_env, C2Compiler* compiler, ciMethod* target, int osr_bci,
                  bool subsume_loads, bool do_escape_analysis, bool eliminate_boxing )
                : Phase(Compiler),
                  _env(ci_env),
                  _log(ci_env->log()),
                  _compile_id(ci_env->compile_id()),
                  _save_argument_registers(false),
                  _stub_name(NULL),
                  _stub_function(NULL),
                  _stub_entry_point(NULL),
                  _method(target),
                  _entry_bci(osr_bci),
                  _initial_gvn(NULL),
                  _for_igvn(NULL),
                  _warm_calls(NULL),
                  _subsume_loads(subsume_loads),
                  _do_escape_analysis(do_escape_analysis),
                  _eliminate_boxing(eliminate_boxing),
                  _failure_reason(NULL),
                  _code_buffer("Compile::Fill_buffer"),
                  _orig_pc_slot(0),
                  _orig_pc_slot_offset_in_bytes(0),
                  _has_method_handle_invokes(false),
                  _mach_constant_base_node(NULL),
                  _node_bundling_limit(0),
                  _node_bundling_base(NULL),
                  _java_calls(0),
                  _inner_loops(0),
                  _scratch_const_size(-1),
                  _in_scratch_emit_size(false),
                  _dead_node_list(comp_arena()),
                  _dead_node_count(0),
#ifndef PRODUCT
                  _trace_opto_output(TraceOptoOutput || method()->has_option("TraceOptoOutput")),
                  _in_dump_cnt(0),
                  _printer(IdealGraphPrinter::printer()),
#endif
                  _congraph(NULL),
                  _comp_arena(mtCompiler),
                  _node_arena(mtCompiler),
                  _old_arena(mtCompiler),
                  _Compile_types(mtCompiler),
                  _replay_inline_data(NULL),
                  _late_inlines(comp_arena(), 2, 0, NULL),
                  _string_late_inlines(comp_arena(), 2, 0, NULL),
                  _boxing_late_inlines(comp_arena(), 2, 0, NULL),
                  _late_inlines_pos(0),
                  _number_of_mh_late_inlines(0),
                  _inlining_progress(false),
                  _inlining_incrementally(false),
                  _print_inlining_list(NULL),
                  _print_inlining_idx(0),
                  _interpreter_frame_size(0),
                  _max_node_limit(MaxNodeLimit) {
  C = this;
  CompileWrapper cw(this);
#ifndef PRODUCT
  if (TimeCompiler2) {
    tty->print(" ");
    target->holder()->name()->print();
    tty->print(".");
    target->print_short_name();
    tty->print("  ");
  }
  TraceTime t1("Total compilation time", &_t_totalCompilation, TimeCompiler, TimeCompiler2);
  TraceTime t2(NULL, &_t_methodCompilation, TimeCompiler, false);
  bool print_opto_assembly = PrintOptoAssembly || _method->has_option("PrintOptoAssembly");
  if (!print_opto_assembly) {
    bool print_assembly = (PrintAssembly || _method->should_print_assembly());
    if (print_assembly && !Disassembler::can_decode()) {
      tty->print_cr("PrintAssembly request changed to PrintOptoAssembly");
      print_opto_assembly = true;
    }
  }
  set_print_assembly(print_opto_assembly);
  set_parsed_irreducible_loop(false);
  if (method()->has_option("ReplayInline")) {
    _replay_inline_data = ciReplay::load_inline_data(method(), entry_bci(), ci_env->comp_level());
  }
#endif
  set_print_inlining(PrintInlining || method()->has_option("PrintInlining") NOT_PRODUCT( || PrintOptoInlining));
  set_print_intrinsics(PrintIntrinsics || method()->has_option("PrintIntrinsics"));
  set_has_irreducible_loop(true); // conservative until build_loop_tree() reset it
  if (ProfileTraps RTM_OPT_ONLY( || UseRTMLocking )) {
    method()->ensure_method_data();
  }
  Init(::AliasLevel);
  print_compile_messages();
  _ilt = InlineTree::build_inline_tree_root();
  assert(num_alias_types() >= AliasIdxRaw, "");
#define MINIMUM_NODE_HASH  1023
  // Node list that Iterative GVN will start with
  Unique_Node_List for_igvn(comp_arena());
  set_for_igvn(&for_igvn);
  uint estimated_size = method()->code_size()*4+64;
  estimated_size = (estimated_size < MINIMUM_NODE_HASH ? MINIMUM_NODE_HASH : estimated_size);
  PhaseGVN gvn(node_arena(), estimated_size);
  set_initial_gvn(&gvn);
  if (print_inlining() || print_intrinsics()) {
    _print_inlining_list = new (comp_arena())GrowableArray<PrintInliningBuffer>(comp_arena(), 1, 1, PrintInliningBuffer());
  }
  {
    TracePhase t3("parse", &_t_parser, true);
    initial_gvn()->transform_no_reclaim(top());
    CallGenerator* cg = NULL;
    if (is_osr_compilation()) {
      const TypeTuple *domain = StartOSRNode::osr_domain();
      const TypeTuple *range = TypeTuple::make_range(method()->signature());
      init_tf(TypeFunc::make(domain, range));
      StartNode* s = new (this) StartOSRNode(root(), domain);
      initial_gvn()->set_type_bottom(s);
      init_start(s);
      cg = CallGenerator::for_osr(method(), entry_bci());
    } else {
      // Normal case.
      init_tf(TypeFunc::make(method()));
      StartNode* s = new (this) StartNode(root(), tf()->domain());
      initial_gvn()->set_type_bottom(s);
      init_start(s);
      if (method()->intrinsic_id() == vmIntrinsics::_Reference_get && UseG1GC) {
        cg = find_intrinsic(method(), false);
      }
      if (cg == NULL) {
        float past_uses = method()->interpreter_invocation_count();
        float expected_uses = past_uses;
        cg = CallGenerator::for_inline(method(), expected_uses);
      }
    }
    if (failing())  return;
    if (cg == NULL) {
      record_method_not_compilable_all_tiers("cannot parse method");
      return;
    }
    JVMState* jvms = build_start_state(start(), tf());
    if ((jvms = cg->generate(jvms)) == NULL) {
      record_method_not_compilable("method parse failed");
      return;
    }
    GraphKit kit(jvms);
    if (!kit.stopped()) {
      return_values(kit.jvms());
    }
    if (kit.has_exceptions()) {
      rethrow_exceptions(kit.transfer_exceptions_into_jvms());
    }
    assert(IncrementalInline || (_late_inlines.length() == 0 && !has_mh_late_inlines()), "incremental inlining is off");
    if (_late_inlines.length() == 0 && !has_mh_late_inlines() && !failing() && has_stringbuilder()) {
      inline_string_calls(true);
    }
    if (failing())  return;
    print_method(PHASE_BEFORE_REMOVEUSELESS, 3);
    if (!failing()) {
      ResourceMark rm;
      PhaseRemoveUseless pru(initial_gvn(), &for_igvn);
    }
  }
  if (failing())  return;
  set_default_node_notes(NULL);
  for (;;) {
    int successes = Inline_Warm();
    if (failing())  return;
    if (successes == 0)  break;
  }
  Finish_Warm();
#ifndef PRODUCT
  if (_printer) {
    _printer->print_inlining(this);
  }
#endif
  if (failing())  return;
  NOT_PRODUCT( verify_graph_edges(); )
  // Now optimize
  Optimize();
  if (failing())  return;
  NOT_PRODUCT( verify_graph_edges(); )
#ifndef PRODUCT
  if (PrintIdeal) {
    ttyLocker ttyl;  // keep the following output all in one block
    // This output goes directly to the tty, not the compiler log.
    // To enable tools to match it up with the compilation activity,
    // be sure to tag this tty output with the compile ID.
    if (xtty != NULL) {
      xtty->head("ideal compile_id='%d'%s", compile_id(),is_osr_compilation()    ? " compile_kind='osr'" :
                 "");
    }
    root()->dump(9999);
    if (xtty != NULL) {
      xtty->tail("ideal");
    }
  }
#endif
  NOT_PRODUCT( verify_barriers(); )
  if (method()->has_option("DumpReplay")) {
    env()->dump_replay_data(_compile_id);
  }
  if (method()->has_option("DumpInline") && (ilt() != NULL)) {
    env()->dump_inline_data(_compile_id);
  }
  _orig_pc_slot =  fixed_slots();
  int next_slot = _orig_pc_slot + (sizeof(address) / VMRegImpl::stack_slot_size);
  set_fixed_slots(next_slot);
  set_allowed_deopt_reasons();
  // 代码生成
  Code_Gen();
  if (failing())  return;
  // Check if we want to skip execution of all compiled code.
  {
#ifndef PRODUCT
    if (OptoNoExecute) {
      record_method_not_compilable("+OptoNoExecute");  // Flag as failed
      return;
    }
    TracePhase t2("install_code", &_t_registerMethod, TimeCompiler);
#endif
    if (is_osr_compilation()) {
      _code_offsets.set_value(CodeOffsets::Verified_Entry, 0);
      _code_offsets.set_value(CodeOffsets::OSR_Entry, _first_block_size);
    } else {
      _code_offsets.set_value(CodeOffsets::Verified_Entry, _first_block_size);
      _code_offsets.set_value(CodeOffsets::OSR_Entry, 0);
    }
    // 设置机器代码
    env()->register_method(_method, _entry_bci,
                           &_code_offsets,
                           _orig_pc_slot_offset_in_bytes,
                           code_buffer(),
                           frame_size_in_words(), _oop_map_set,
                           &_handler_table, &_inc_table,
                           compiler,
                           env()->comp_level(),
                           has_unsafe_access(),
                           SharedRuntime::is_wide_vector(max_vector_size()),
                           rtm_state()
                           );
    if (log() != NULL) // Print code cache state into compiler log
      log()->code_cache_state();
  }
}

4.代码缓存 CodeCache

在HotSpot VM 中，除了模板解释器外，有很多地方也会用到运行时机器代码生成技术，如广为人知的C1编译器产出、C2编译器产出、C2I/I2C 适配器代码片段、解释器到JNI适配器的代码片段等。

为了统一管理这些运行时生成的机器代码，HotSpotVM抽象出一个 CodeBlob体系，由CodeBlob作为基类表示所有运行时生成的机器代码，并衍生出五花八门的子类:

1 ) CompiledMethod:编译后的Java方法。
    a) nmethod:JIT编译后的Java方法。
    b)AOTCompiledMethod: AOT编译的方法。
2) RuntimeBlob:非编译后的代码片段。
    a) BufferBlob:解释器等使用的代码片段。AdapterBlob: C2I/I2C适配器代码片段。VtableBlob:虚表代码片段。MethodHandleAdapterBlob: MethodHandle代码片段。
    b) RuntimeStub:调用运行时方法的代码片段。
    c) SingletonBlob:单例代码片段。DeoptimizationBlob:退优化代码片段。ExceptionBlob:异常处理代码片段。SafepointBlob:错误指令异常处理代码片段。UncommonTrapBlob:打破编译器假设的稀有情况代码片段。

前面提到过C2I/12C适配器代码片段，它们就存放在 AdapterBlob中。解释器到JNI的调用约定适配器代码片段和模板解释器一样，都存放在BufferBlob中。前面进行分类是为了区分代码片段的类型，而统一管理这些即时生成的机器代码片段的区域是CodeCache，由虚拟机将所有CodeBlob都放入 CodeCache。

0x13

关注

0
点赞
踩
1

收藏

觉得还不错? 一键收藏
打赏
0
评论
【hotspot】执行字节码指令流程(4)：编译器C1/C2

在HotSpot VM 中，除了模板解释器外，有很多地方也会用到运行时机器代码生成技术，如广为人知的C1编译器产出、C2编译器产出、C2I/I2C 适配器代码片段、解释器到JNI适配器的代码片段等。C2即时编译器采用了激进乐观的优化技术，并采用传统编译器许多的编译优化手段，如内联优化、全局值编码(GVN)、图着色的寄存器分配、BURS指令选择算法等。接着进行平台无关优化并生成平台相关的 MachNode图:最后进行平台相关优化，包括指令选择、代码重排、寄存器分配、窥孔优化，直至输出目标机器代码。
复制链接

扫一扫