深入解析JVM字节码解释器执行流程（OpenJDK 17源码实现）-CSDN博客

本文链接：https://blog.csdn.net/u014200244/article/details/147953156

一、核心流程概述

JVM解释器的核心任务是将Java字节码逐条翻译为本地机器指令并执行。其执行流程可分为以下关键阶段：

方法调用入口构建：生成栈帧、处理参数、同步锁等。
字节码分派（Dispatch）：根据字节码跳转到对应处理逻辑。
操作数栈与局部变量管理：维护方法执行上下文。
方法返回与栈帧销毁：处理返回值、恢复调用者栈帧。

以下结合OpenJDK 17源码详细分析各环节实现。

二、方法调用入口构建

1. `generate_call_stub`：调用桩（Call Stub）生成

这是从本地代码（如JNI）调用Java方法的核心入口，负责设置调用环境：

address generate_call_stub(address& return_address) {
    // 保存调用者寄存器状态（rbx, r12-r15等）
    __ enter();
    __ subptr(rsp, -rsp_after_call_off * wordSize);

    // 处理参数传递（Windows与Linux差异）
    __ movptr(parameters, c_rarg5);   // 参数指针
    __ movptr(entry_point, c_rarg4);  // 方法入口地址

    // 保存线程上下文到栈帧
    __ movptr(rbx_save, rbx);
    __ movptr(r12_save, r12);
    // ... 其他寄存器保存

    // 加载线程对象并检查异常
    __ movptr(r15_thread, thread);
    __ reinit_heapbase();  // 重置堆基址

    // 压入参数到栈（通过循环逐个压入）
    Label loop;
    __ movptr(c_rarg2, parameters);
    __ movl(c_rarg1, c_rarg3);  // 参数计数器
    __ BIND(loop);
    __ movptr(rax, Address(c_rarg2, 0));
    __ push(rax);  // 参数入栈
    __ jcc(Assembler::notZero, loop);

    // 调用Java方法（entry_point为方法入口）
    __ call(c_rarg1);  // c_rarg1=entry_point
    return_address = __ pc();

    // 处理返回值（根据类型存储到指定地址）
    __ movptr(c_rarg0, result);
    Label is_long, is_float, exit;
    __ cmpl(c_rarg1, T_OBJECT);
    __ jcc(Assembler::equal, is_long);
    // ... 其他类型处理

关键点：

寄存器保存与恢复：通过movptr保存调用者寄存器，调用结束后恢复。
参数传递：根据ABI规范处理不同平台（如Windows使用rcx, rdx, r8, r9传递前4参数）。
异常检查：通过cmpptr验证线程是否有未处理异常。

三、字节码分派机制

1. 分发表（Dispatch Table）

每个字节码对应一个处理函数地址，按栈顶状态（TosState）分类：

void InterpreterMacroAssembler::dispatch_base(TosState state, address* table) {
    // 验证栈帧完整性（调试模式）
    if (VerifyActivationFrameSize) { /* ... */ }

    // 安全点轮询：检查是否需要进入安全点
    if (generate_poll) {
        testb(Address(r15_thread, JavaThread::polling_word_offset()), poll_bit());
        jccb(Assembler::zero, no_safepoint);
        lea(rscratch1, ExternalAddress(safepoint_table));
    }

    // 跳转到分发表项
    lea(rscratch1, ExternalAddress(table));
    jmp(Address(rscratch1, rbx, Address::times_8)); // rbx=字节码值
}

分发表结构：

// 每个TosState对应一个分发表
address* Interpreter::dispatch_table(TosState state) {
    switch(state) {
        case vtos: return _active_table[vtos];
        case itos: return _active_table[itos];
        // ... 其他状态
    }
}

2. `dispatch_next`：逐条分派字节码

void InterpreterMacroAssembler::dispatch_next(TosState state, int step) {
    load_unsigned_byte(rbx, Address(_bcp_register, step)); // 加载下一字节码
    increment(_bcp_register, step);                        // 移动字节码指针
    dispatch_base(state, Interpreter::dispatch_table(state)); // 分派
}

核心逻辑：

加载字节码：从_bcp_register（字节码指针）读取下一条指令。
指针前进：increment调整指针位置。
分派跳转：通过dispatch_base跳转到对应处理函数。

四、方法调用与虚方法分派

1. `invokevirtual`实现

void TemplateTable::invokevirtual(int byte_no) {
    prepare_invoke(byte_no, rbx, noreg, rcx, rdx); // 准备调用参数
    invokevirtual_helper(rbx, rcx, rdx);            // 实际分派逻辑
}

void invokevirtual_helper(Register index, Register recv, Register flags) {
    // 检查是否为final方法
    __ testl(rax, (1 << ConstantPoolCacheEntry::is_vfinal_shift));
    __ jcc(Assembler::zero, notFinal);

    // Final方法直接调用（无虚表查找）
    __ jump_from_interpreted(method, rax);

    __ bind(notFinal);
    // 虚方法查找：通过接收者klass查找vtable
    __ load_klass(rax, recv);                   // 加载接收者类
    __ lookup_virtual_method(rax, index, method);// 查找虚方法
    __ jump_from_interpreted(method, rdx);       // 跳转执行
}

虚方法查找逻辑：

void MacroAssembler::lookup_virtual_method(Register klass, Register index, Register method) {
    // 计算vtable条目地址
    Address vtable_entry_addr(klass, index, Address::times_ptr, 
                             base + vtableEntry::method_offset_in_bytes());
    movptr(method, vtable_entry_addr); // 加载Method*
}

2. 同步方法处理

在方法入口处理同步锁：

address TemplateInterpreterGenerator::generate_normal_entry(bool synchronized) {
    if (synchronized) {
        lock_method(); // 生成monitorenter指令
    }
    // ... 后续执行逻辑
}

void lock_method() {
    __ movptr(rax, Address(rbx, Method::const_offset()));
    __ movl(rax, Address(rax, ConstMethod::access_flags_offset()));
    __ testl(rax, JVM_ACC_SYNCHRONIZED);
    __ jcc(Assembler::zero, done);
    // 生成锁获取代码（monitorenter）
}

五、栈帧构建与上下文管理

1. 栈帧初始化

在generate_normal_entry中构建解释器栈帧：

// 分配局部变量空间
__ load_unsigned_short(rdx, size_of_locals);
__ subl(rdx, rcx); // 计算额外局部变量数
Label loop;
__ bind(loop);
__ push((int)NULL_WORD); // 初始化局部变量为NULL
__ decrementl(rdx);
__ jcc(Assembler::greater, loop);

// 构建固定帧头（包含返回地址、方法指针等）
generate_fixed_frame(false);

固定帧结构：

| Return Address       |
| Method*              |
| Constant Pool        |
| Local Variables      |
| Operand Stack        |
| Monitor Block        |

2. 调试支持（新增的自定义逻辑）

在方法入口插入调试信息打印（用户自定义代码）：

// 保存寄存器状态
__ push(rax);
__ push(rcx);
// ... 保存其他寄存器

// 调用调试函数打印方法名
__ movptr(rdi, rbx); // Method*作为参数
__ call(CAST_FROM_FN_PTR(address, print_debug_info));

// 恢复寄存器
__ pop(r15);
__ pop(r11);
// ... 恢复其他寄存器

作用：通过os::write系统调用输出调试信息（如打印"yym"标识），便于跟踪方法调用链。

六、性能优化与安全机制

1. 调用计数器与JIT编译

Label invocation_counter_overflow;
generate_counter_incr(&invocation_counter_overflow); // 递增计数器

// 计数器溢出触发编译
__ bind(invocation_counter_overflow);
generate_counter_overflow(continue_after_compile);

逻辑：当方法调用次数超过阈值，调用InterpreterRuntime::frequency_counter_overflow触发JIT编译。

2. 安全点轮询

在分派前插入安全点检查：

testb(Address(r15_thread, JavaThread::polling_word_offset()), poll_bit());
jccb(Assembler::zero, no_safepoint);
lea(rscratch1, ExternalAddress(safepoint_table)); // 跳转到安全点处理

作用：确保在GC或Deoptimization时，线程能及时响应。

七、总结

OpenJDK解释器的执行流程通过高度优化的汇编代码实现，核心特点包括：

高效分派：通过分发表实现字节码快速跳转。
栈帧精细管理：严格处理局部变量、操作数栈和监视器锁。
与JIT协作：通过调用计数器触发编译优化。
安全机制：集成安全点、异常检查等关键功能。

新增的调试代码（如"yym"打印）展示了如何在解释器关键路径插入定制逻辑，为开发者提供灵活的调试能力。

##源码

address generate_call_stub(address& return_address) {
    assert((int)frame::entry_frame_after_call_words == -(int)rsp_after_call_off + 1 &&
           (int)frame::entry_frame_call_wrapper_offset == (int)call_wrapper_off,
           "adjust this code");
    StubCodeMark mark(this, "StubRoutines", "call_stub");
    address start = __ pc();

    // same as in generate_catch_exception()!
    const Address rsp_after_call(rbp, rsp_after_call_off * wordSize);

    const Address call_wrapper  (rbp, call_wrapper_off   * wordSize);
    const Address result        (rbp, result_off         * wordSize);
    const Address result_type   (rbp, result_type_off    * wordSize);
    const Address method        (rbp, method_off         * wordSize);
    const Address entry_point   (rbp, entry_point_off    * wordSize);
    const Address parameters    (rbp, parameters_off     * wordSize);
    const Address parameter_size(rbp, parameter_size_off * wordSize);

    // same as in generate_catch_exception()!
    const Address thread        (rbp, thread_off         * wordSize);

    const Address r15_save(rbp, r15_off * wordSize);
    const Address r14_save(rbp, r14_off * wordSize);
    const Address r13_save(rbp, r13_off * wordSize);
    const Address r12_save(rbp, r12_off * wordSize);
    const Address rbx_save(rbp, rbx_off * wordSize);

    // stub code
    __ enter();
    __ subptr(rsp, -rsp_after_call_off * wordSize);

    // save register parameters
#ifndef _WIN64
    __ movptr(parameters,   c_rarg5); // parameters
    __ movptr(entry_point,  c_rarg4); // entry_point
#endif

    __ movptr(method,       c_rarg3); // method
    __ movl(result_type,  c_rarg2);   // result type
    __ movptr(result,       c_rarg1); // result
    __ movptr(call_wrapper, c_rarg0); // call wrapper

    // save regs belonging to calling function
    __ movptr(rbx_save, rbx);
    __ movptr(r12_save, r12);
    __ movptr(r13_save, r13);
    __ movptr(r14_save, r14);
    __ movptr(r15_save, r15);

#ifdef _WIN64
    int last_reg = 15;
    if (UseAVX > 2) {
      last_reg = 31;
    }
    if (VM_Version::supports_evex()) {
      for (int i = xmm_save_first; i <= last_reg; i++) {
        __ vextractf32x4(xmm_save(i), as_XMMRegister(i), 0);
      }
    } else {
      for (int i = xmm_save_first; i <= last_reg; i++) {
        __ movdqu(xmm_save(i), as_XMMRegister(i));
      }
    }

    const Address rdi_save(rbp, rdi_off * wordSize);
    const Address rsi_save(rbp, rsi_off * wordSize);

    __ movptr(rsi_save, rsi);
    __ movptr(rdi_save, rdi);
#else
    const Address mxcsr_save(rbp, mxcsr_off * wordSize);
    {
      Label skip_ldmx;
      __ stmxcsr(mxcsr_save);
      __ movl(rax, mxcsr_save);
      __ andl(rax, MXCSR_MASK);    // Only check control and mask bits
      ExternalAddress mxcsr_std(StubRoutines::x86::addr_mxcsr_std());
      __ cmp32(rax, mxcsr_std);
      __ jcc(Assembler::equal, skip_ldmx);
      __ ldmxcsr(mxcsr_std);
      __ bind(skip_ldmx);
    }
#endif

    // Load up thread register
    __ movptr(r15_thread, thread);
    __ reinit_heapbase();

#ifdef ASSERT
    // make sure we have no pending exceptions
    {
      Label L;
      __ cmpptr(Address(r15_thread, Thread::pending_exception_offset()), (int32_t)NULL_WORD);
      __ jcc(Assembler::equal, L);
      __ stop("StubRoutines::call_stub: entered with pending exception");
      __ bind(L);
    }
#endif

    // pass parameters if any
    BLOCK_COMMENT("pass parameters if any");
    Label parameters_done;
    __ movl(c_rarg3, parameter_size);
    __ testl(c_rarg3, c_rarg3);
    __ jcc(Assembler::zero, parameters_done);

    Label loop;
    __ movptr(c_rarg2, parameters);       // parameter pointer
    __ movl(c_rarg1, c_rarg3);            // parameter counter is in c_rarg1
    __ BIND(loop);
    __ movptr(rax, Address(c_rarg2, 0));// get parameter
    __ addptr(c_rarg2, wordSize);       // advance to next parameter
    __ decrementl(c_rarg1);             // decrement counter
    __ push(rax);                       // pass parameter
    __ jcc(Assembler::notZero, loop);

    // call Java function
    __ BIND(parameters_done);
    __ movptr(rbx, method);             // get Method*

    //yym-gaizao
    // #ifdef DEBUG_PRINT_METHOD_NAME
    // 打印 "yym" 字符串
    {
      __ push(rax);                      // 保存寄存器
      __ push(rdi);
      __ push(rsi);
      __ push(rdx);

      __ movl(rax, 1);                   // syscall number for sys_write (1)
      __ movl(rdi, 1);                   // fd = stdout (1)
      __ lea(rsi, ExternalAddress((address)"yym\n")); // 字符串地址
      __ movl(rdx, 4);                   // length = 4 ("yym\n")
      __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, os::write))); // 调用系统调用

      __ pop(rdx);                       // 恢复寄存器
      __ pop(rsi);
      __ pop(rdi);
      __ pop(rax);
    }
    // #endif

    __ movptr(c_rarg1, entry_point);    // get entry_point
    __ mov(r13, rsp);                   // set sender sp
    BLOCK_COMMENT("call Java function");
    __ call(c_rarg1);

    BLOCK_COMMENT("call_stub_return_address:");
    return_address = __ pc();

    // store result depending on type (everything that is not
    // T_OBJECT, T_LONG, T_FLOAT or T_DOUBLE is treated as T_INT)
    __ movptr(c_rarg0, result);
    Label is_long, is_float, is_double, exit;
    __ movl(c_rarg1, result_type);
    __ cmpl(c_rarg1, T_OBJECT);
    __ jcc(Assembler::equal, is_long);
    __ cmpl(c_rarg1, T_LONG);
    __ jcc(Assembler::equal, is_long);
    __ cmpl(c_rarg1, T_FLOAT);
    __ jcc(Assembler::equal, is_float);
    __ cmpl(c_rarg1, T_DOUBLE);
    __ jcc(Assembler::equal, is_double);

    // handle T_INT case
    __ movl(Address(c_rarg0, 0), rax);

    __ BIND(exit);

    // pop parameters
    __ lea(rsp, rsp_after_call);

#ifdef ASSERT
    // verify that threads correspond
    {
     Label L1, L2, L3;
      __ cmpptr(r15_thread, thread);
      __ jcc(Assembler::equal, L1);
      __ stop("StubRoutines::call_stub: r15_thread is corrupted");
      __ bind(L1);
      __ get_thread(rbx);
      __ cmpptr(r15_thread, thread);
      __ jcc(Assembler::equal, L2);
      __ stop("StubRoutines::call_stub: r15_thread is modified by call");
      __ bind(L2);
      __ cmpptr(r15_thread, rbx);
      __ jcc(Assembler::equal, L3);
      __ stop("StubRoutines::call_stub: threads must correspond");
      __ bind(L3);
    }
#endif

    // restore regs belonging to calling function
#ifdef _WIN64
    // emit the restores for xmm regs
    if (VM_Version::supports_evex()) {
      for (int i = xmm_save_first; i <= last_reg; i++) {
        __ vinsertf32x4(as_XMMRegister(i), as_XMMRegister(i), xmm_save(i), 0);
      }
    } else {
      for (int i = xmm_save_first; i <= last_reg; i++) {
        __ movdqu(as_XMMRegister(i), xmm_save(i));
      }
    }
#endif
    __ movptr(r15, r15_save);
    __ movptr(r14, r14_save);
    __ movptr(r13, r13_save);
    __ movptr(r12, r12_save);
    __ movptr(rbx, rbx_save);

#ifdef _WIN64
    __ movptr(rdi, rdi_save);
    __ movptr(rsi, rsi_save);
#else
    __ ldmxcsr(mxcsr_save);
#endif

    // restore rsp
    __ addptr(rsp, -rsp_after_call_off * wordSize);

    // return
    __ vzeroupper();
    __ pop(rbp);
    __ ret(0);

    // handle return types different from T_INT
    __ BIND(is_long);
    __ movq(Address(c_rarg0, 0), rax);
    __ jmp(exit);

    __ BIND(is_float);
    __ movflt(Address(c_rarg0, 0), xmm0);
    __ jmp(exit);

    __ BIND(is_double);
    __ movdbl(Address(c_rarg0, 0), xmm0);
    __ jmp(exit);

    return start;
  }

void TemplateTable::invokevirtual_helper(Register index,
                                         Register recv,
                                         Register flags) {
  // Uses temporary registers rax, rdx
  assert_different_registers(index, recv, rax, rdx);
  assert(index == rbx, "");
  assert(recv  == rcx, "");

  // Test for an invoke of a final method
  Label notFinal;
  __ movl(rax, flags);
  __ andl(rax, (1 << ConstantPoolCacheEntry::is_vfinal_shift));
  __ jcc(Assembler::zero, notFinal);

  const Register method = index;  // method must be rbx
  assert(method == rbx,
         "Method* must be rbx for interpreter calling convention");

  // do the call - the index is actually the method to call
  // that is, f2 is a vtable index if !is_vfinal, else f2 is a Method*

  // It's final, need a null check here!
  __ null_check(recv);

  // profile this call
  __ profile_final_call(rax);
  __ profile_arguments_type(rax, method, rbcp, true);

  __ jump_from_interpreted(method, rax);

  __ bind(notFinal);

  // get receiver klass
  __ null_check(recv, oopDesc::klass_offset_in_bytes());
  Register tmp_load_klass = LP64_ONLY(rscratch1) NOT_LP64(noreg);
  __ load_klass(rax, recv, tmp_load_klass);

  // profile this call
  __ profile_virtual_call(rax, rlocals, rdx);
  // get target Method* & entry point
  __ lookup_virtual_method(rax, index, method);

  __ profile_arguments_type(rdx, method, rbcp, true);
  __ jump_from_interpreted(method, rdx);
}

// virtual method calling
void MacroAssembler::lookup_virtual_method(Register recv_klass,
                                           RegisterOrConstant vtable_index,
                                           Register method_result) {
  const int base = in_bytes(Klass::vtable_start_offset());
  assert(vtableEntry::size() * wordSize == wordSize, "else adjust the scaling in the code below");
  Address vtable_entry_addr(recv_klass,
                            vtable_index, Address::times_ptr,
                            base + vtableEntry::method_offset_in_bytes());
  movptr(method_result, vtable_entry_addr);
}

// Jump to from_interpreted entry of a call unless single stepping is possible
// in this thread in which case we must call the i2i entry
void InterpreterMacroAssembler::jump_from_interpreted(Register method, Register temp) {
  prepare_to_jump_from_interpreted();

  if (JvmtiExport::can_post_interpreter_events()) {
    Label run_compiled_code;
    // JVMTI events, such as single-stepping, are implemented partly by avoiding running
    // compiled code in threads for which the event is enabled.  Check here for
    // interp_only_mode if these events CAN be enabled.
    // interp_only is an int, on little endian it is sufficient to test the byte only
    // Is a cmpl faster?
    LP64_ONLY(temp = r15_thread;)
    NOT_LP64(get_thread(temp);)
    cmpb(Address(temp, JavaThread::interp_only_mode_offset()), 0);
    jccb(Assembler::zero, run_compiled_code);
    jmp(Address(method, Method::interpreter_entry_offset()));
    bind(run_compiled_code);
  }

  jmp(Address(method, Method::from_interpreted_offset()));
}

void TemplateTable::invokevirtual(int byte_no) {
  transition(vtos, vtos);
  assert(byte_no == f2_byte, "use this argument");
  prepare_invoke(byte_no,
                 rbx,    // method or vtable index
                 noreg,  // unused itable index
                 rcx, rdx); // recv, flags

  // rbx: index
  // rcx: receiver
  // rdx: flags

  invokevirtual_helper(rbx, rcx, rdx);
}

void TemplateTable::prepare_invoke(int byte_no,
                                   Register method,  // linked method (or i-klass)
                                   Register index,   // itable index, MethodType, etc.
                                   Register recv,    // if caller wants to see it
                                   Register flags    // if caller wants to test it
                                   ) {
  // determine flags
  const Bytecodes::Code code = bytecode();
  const bool is_invokeinterface  = code == Bytecodes::_invokeinterface;
  const bool is_invokedynamic    = code == Bytecodes::_invokedynamic;
  const bool is_invokehandle     = code == Bytecodes::_invokehandle;
  const bool is_invokevirtual    = code == Bytecodes::_invokevirtual;
  const bool is_invokespecial    = code == Bytecodes::_invokespecial;
  const bool load_receiver       = (recv  != noreg);
  const bool save_flags          = (flags != noreg);
  assert(load_receiver == (code != Bytecodes::_invokestatic && code != Bytecodes::_invokedynamic), "");
  assert(save_flags    == (is_invokeinterface || is_invokevirtual), "need flags for vfinal");
  assert(flags == noreg || flags == rdx, "");
  assert(recv  == noreg || recv  == rcx, "");

  // setup registers & access constant pool cache
  if (recv  == noreg)  recv  = rcx;
  if (flags == noreg)  flags = rdx;
  assert_different_registers(method, index, recv, flags);

  // save 'interpreter return address'
  __ save_bcp();

  load_invoke_cp_cache_entry(byte_no, method, index, flags, is_invokevirtual, false, is_invokedynamic);

  // maybe push appendix to arguments (just before return address)
  if (is_invokedynamic || is_invokehandle) {
    Label L_no_push;
    __ testl(flags, (1 << ConstantPoolCacheEntry::has_appendix_shift));
    __ jcc(Assembler::zero, L_no_push);
    // Push the appendix as a trailing parameter.
    // This must be done before we get the receiver,
    // since the parameter_size includes it.
    __ push(rbx);
    __ mov(rbx, index);
    __ load_resolved_reference_at_index(index, rbx);
    __ pop(rbx);
    __ push(index);  // push appendix (MethodType, CallSite, etc.)
    __ bind(L_no_push);
  }

void TemplateTable::resolve_cache_and_index(int byte_no,
                                            Register cache,
                                            Register index,
                                            size_t index_size) {
  const Register temp = rbx;
  assert_different_registers(cache, index, temp);

  Label L_clinit_barrier_slow;
  Label resolved;

  Bytecodes::Code code = bytecode();
  switch (code) {
  case Bytecodes::_nofast_getfield: code = Bytecodes::_getfield; break;
  case Bytecodes::_nofast_putfield: code = Bytecodes::_putfield; break;
  default: break;
  }

  assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range");
  __ get_cache_and_index_and_bytecode_at_bcp(cache, index, temp, byte_no, 1, index_size);
  __ cmpl(temp, code);  // have we resolved this bytecode?
  __ jcc(Assembler::equal, resolved);

  // resolve first time through
  // Class initialization barrier slow path lands here as well.
  __ bind(L_clinit_barrier_slow);
  // std::cout << "@@@@yym%%%%" << "method begin" << "----begin" << std::endl;
  address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_from_cache);
  __ movl(temp, code);
  __ call_VM(noreg, entry, temp);
  // std::cout << "@@@@yym%%%%" << "method end" << "----end" << std::endl;
  // Update registers with resolved info
  __ get_cache_and_index_at_bcp(cache, index, 1, index_size);

  __ bind(resolved);

  // Class initialization barrier for static methods
  if (VM_Version::supports_fast_class_init_checks() && bytecode() == Bytecodes::_invokestatic) {
    const Register method = temp;
    const Register klass  = temp;
    const Register thread = LP64_ONLY(r15_thread) NOT_LP64(noreg);
    assert(thread != noreg, "x86_32 not supported");

    __ load_resolved_method_at_index(byte_no, method, cache, index);
    __ load_method_holder(klass, method);
    __ clinit_barrier(klass, thread, NULL /*L_fast_path*/, &L_clinit_barrier_slow);
  }
}

void TemplateTable::load_invoke_cp_cache_entry(int byte_no,
                                               Register method,
                                               Register itable_index,
                                               Register flags,
                                               bool is_invokevirtual,
                                               bool is_invokevfinal, /*unused*/
                                               bool is_invokedynamic) {
  // setup registers
  const Register cache = rcx;
  const Register index = rdx;
  assert_different_registers(method, flags);
  assert_different_registers(method, cache, index);
  assert_different_registers(itable_index, flags);
  assert_different_registers(itable_index, cache, index);
  // determine constant pool cache field offsets
  assert(is_invokevirtual == (byte_no == f2_byte), "is_invokevirtual flag redundant");
  const int flags_offset = in_bytes(ConstantPoolCache::base_offset() +
                                    ConstantPoolCacheEntry::flags_offset());
  // access constant pool cache fields
  const int index_offset = in_bytes(ConstantPoolCache::base_offset() +
                                    ConstantPoolCacheEntry::f2_offset());

  size_t index_size = (is_invokedynamic ? sizeof(u4) : sizeof(u2));
  resolve_cache_and_index(byte_no, cache, index, index_size);
  __ load_resolved_method_at_index(byte_no, method, cache, index);

  if (itable_index != noreg) {
    // pick up itable or appendix index from f2 also:
    __ movptr(itable_index, Address(cache, index, Address::times_ptr, index_offset));
  }
  __ movl(flags, Address(cache, index, Address::times_ptr, flags_offset));
}

void TemplateTable::prepare_invoke(int byte_no,
                                   Register method,  // linked method (or i-klass)
                                   Register index,   // itable index, MethodType, etc.
                                   Register recv,    // if caller wants to see it
                                   Register flags    // if caller wants to test it
                                   ) {
  // determine flags
  const Bytecodes::Code code = bytecode();
  const bool is_invokeinterface  = code == Bytecodes::_invokeinterface;
  const bool is_invokedynamic    = code == Bytecodes::_invokedynamic;
  const bool is_invokehandle     = code == Bytecodes::_invokehandle;
  const bool is_invokevirtual    = code == Bytecodes::_invokevirtual;
  const bool is_invokespecial    = code == Bytecodes::_invokespecial;
  const bool load_receiver       = (recv  != noreg);
  const bool save_flags          = (flags != noreg);
  assert(load_receiver == (code != Bytecodes::_invokestatic && code != Bytecodes::_invokedynamic), "");
  assert(save_flags    == (is_invokeinterface || is_invokevirtual), "need flags for vfinal");
  assert(flags == noreg || flags == rdx, "");
  assert(recv  == noreg || recv  == rcx, "");

  // setup registers & access constant pool cache
  if (recv  == noreg)  recv  = rcx;
  if (flags == noreg)  flags = rdx;
  assert_different_registers(method, index, recv, flags);

  // save 'interpreter return address'
  __ save_bcp();

  load_invoke_cp_cache_entry(byte_no, method, index, flags, is_invokevirtual, false, is_invokedynamic);

  // maybe push appendix to arguments (just before return address)
  if (is_invokedynamic || is_invokehandle) {
    Label L_no_push;
    __ testl(flags, (1 << ConstantPoolCacheEntry::has_appendix_shift));
    __ jcc(Assembler::zero, L_no_push);
    // Push the appendix as a trailing parameter.
    // This must be done before we get the receiver,
    // since the parameter_size includes it.
    __ push(rbx);
    __ mov(rbx, index);
    __ load_resolved_reference_at_index(index, rbx);
    __ pop(rbx);
    __ push(index);  // push appendix (MethodType, CallSite, etc.)
    __ bind(L_no_push);
  }

  // load receiver if needed (after appendix is pushed so parameter size is correct)
  // Note: no return address pushed yet
  if (load_receiver) {
    __ movl(recv, flags);
    __ andl(recv, ConstantPoolCacheEntry::parameter_size_mask);
    const int no_return_pc_pushed_yet = -1;  // argument slot correction before we push return address
    const int receiver_is_at_end      = -1;  // back off one slot to get receiver
    Address recv_addr = __ argument_address(recv, no_return_pc_pushed_yet + receiver_is_at_end);
    __ movptr(recv, recv_addr);
    __ verify_oop(recv);
  }

  if (save_flags) {
    __ movl(rbcp, flags);
  }

  // compute return type
  __ shrl(flags, ConstantPoolCacheEntry::tos_state_shift);
  // Make sure we don't need to mask flags after the above shift
  ConstantPoolCacheEntry::verify_tos_state_shift();
  // load return address
  {
    const address table_addr = (address) Interpreter::invoke_return_entry_table_for(code);
    ExternalAddress table(table_addr);
    LP64_ONLY(__ lea(rscratch1, table));
    LP64_ONLY(__ movptr(flags, Address(rscratch1, flags, Address::times_ptr)));
    NOT_LP64(__ movptr(flags, ArrayAddress(table, Address(noreg, flags, Address::times_ptr))));
  }

  // push return address
  __ push(flags);

  // Restore flags value from the constant pool cache, and restore rsi
  // for later null checks.  r13 is the bytecode pointer
  if (save_flags) {
    __ movl(flags, rbcp);
    __ restore_bcp();
  }
}


//
// Generic interpreted method entry to (asm) interpreter
//
address TemplateInterpreterGenerator::generate_normal_entry(bool synchronized) {
  // determine code generation flags
  bool inc_counter  = UseCompiler || CountCompiledCalls || LogTouchedMethods;

  // ebx: Method*
  // rbcp: sender sp
  address entry_point = __ pc();

  const Address constMethod(rbx, Method::const_offset());
  const Address access_flags(rbx, Method::access_flags_offset());
  const Address size_of_parameters(rdx,
                                   ConstMethod::size_of_parameters_offset());
  const Address size_of_locals(rdx, ConstMethod::size_of_locals_offset());


  // get parameter size (always needed)
  __ movptr(rdx, constMethod);
  __ load_unsigned_short(rcx, size_of_parameters);

  // rbx: Method*
  // rcx: size of parameters
  // rbcp: sender_sp (could differ from sp+wordSize if we were called via c2i )

  __ load_unsigned_short(rdx, size_of_locals); // get size of locals in words
  __ subl(rdx, rcx); // rdx = no. of additional locals

  // YYY
//   __ incrementl(rdx);
//   __ andl(rdx, -2);

  // see if we've got enough room on the stack for locals plus overhead.
  generate_stack_overflow_check();

  //yym-gaizao
  // #ifdef DEBUG_PRINT_METHOD_NAME
  // ---yym--- 打印代码移动到堆栈检查之后
  {
    // 保存寄存器状态
    __ push(rax);
    __ push(rcx);
    __ push(rdx);
    __ push(rdi);
    __ push(rsi);
    __ push(r8);
    __ push(r9);
    __ push(r10);
    __ push(r11);
    NOT_LP64(__ get_thread(r15_thread));
    __ push(r15);

    // 准备调用参数
    __ movptr(rdi, rbx);    // Method* 作为第一个参数
    __ lea(rsi, Address(rsp, 14*wordSize)); // 原始rsp地址
    __ mov(r8, rcx);        // 参数大小
    __ mov(r9, rdx);        // 本地变量大小

    // 调用调试信息输出函数
    __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, ::TemplateInterpreterGenerator::print_debug_info)));

    // 恢复寄存器
    __ pop(r15);
    NOT_LP64(__ restore_thread(r15_thread));
    __ pop(r11);
    __ pop(r10);
    __ pop(r9);
    __ pop(r8);
    __ pop(rsi);
    __ pop(rdi);
    __ pop(rdx);
    __ pop(rcx);
    __ pop(rax);
  }
  // #endif

  // get return address
  __ pop(rax);

  // compute beginning of parameters
  __ lea(rlocals, Address(rsp, rcx, Interpreter::stackElementScale(), -wordSize));

  // rdx - # of additional locals
  // allocate space for locals
  // explicitly initialize locals
  {
    Label exit, loop;
    __ testl(rdx, rdx);
    __ jcc(Assembler::lessEqual, exit); // do nothing if rdx <= 0
    __ bind(loop);
    __ push((int) NULL_WORD); // initialize local variables
    __ decrementl(rdx); // until everything initialized
    __ jcc(Assembler::greater, loop);
    __ bind(exit);
  }

  // initialize fixed part of activation frame
  generate_fixed_frame(false);

  // make sure method is not native & not abstract
#ifdef ASSERT
  __ movl(rax, access_flags);
  {
    Label L;
    __ testl(rax, JVM_ACC_NATIVE);
    __ jcc(Assembler::zero, L);
    __ stop("tried to execute native method as non-native");
    __ bind(L);
  }
  {
    Label L;
    __ testl(rax, JVM_ACC_ABSTRACT);
    __ jcc(Assembler::zero, L);
    __ stop("tried to execute abstract method in interpreter");
    __ bind(L);
  }
#endif

  // Since at this point in the method invocation the exception
  // handler would try to exit the monitor of synchronized methods
  // which hasn't been entered yet, we set the thread local variable
  // _do_not_unlock_if_synchronized to true. The remove_activation
  // will check this flag.

  const Register thread = NOT_LP64(rax) LP64_ONLY(r15_thread);
  NOT_LP64(__ get_thread(thread));
  const Address do_not_unlock_if_synchronized(thread,
        in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()));
  __ movbool(do_not_unlock_if_synchronized, true);

  __ profile_parameters_type(rax, rcx, rdx);
  // increment invocation count & check for overflow
  Label invocation_counter_overflow;
  if (inc_counter) {
    generate_counter_incr(&invocation_counter_overflow);
  }

  Label continue_after_compile;
  __ bind(continue_after_compile);

  // check for synchronized interpreted methods
  bang_stack_shadow_pages(false);

  // reset the _do_not_unlock_if_synchronized flag
  NOT_LP64(__ get_thread(thread));
  __ movbool(do_not_unlock_if_synchronized, false);

  // check for synchronized methods
  // Must happen AFTER invocation_counter check and stack overflow check,
  // so method is not locked if overflows.
  if (synchronized) {
    // Allocate monitor and lock method
    lock_method();
  } else {
    // no synchronization necessary
#ifdef ASSERT
    {
      Label L;
      __ movl(rax, access_flags);
      __ testl(rax, JVM_ACC_SYNCHRONIZED);
      __ jcc(Assembler::zero, L);
      __ stop("method needs synchronization");
      __ bind(L);
    }
#endif
  }

  // start execution
#ifdef ASSERT
  {
    Label L;
     const Address monitor_block_top (rbp,
                 frame::interpreter_frame_monitor_block_top_offset * wordSize);
    __ movptr(rax, monitor_block_top);
    __ cmpptr(rax, rsp);
    __ jcc(Assembler::equal, L);
    __ stop("broken stack frame setup in interpreter");
    __ bind(L);
  }
#endif

  // jvmti support
  __ notify_method_entry();

  __ dispatch_next(vtos);

  // invocation counter overflow
  if (inc_counter) {
    // Handle overflow of counter and compile method
    __ bind(invocation_counter_overflow);
    generate_counter_overflow(continue_after_compile);
  }

  return entry_point;
}

void InterpreterMacroAssembler::dispatch_next(TosState state, int step, bool generate_poll) {
  // load next bytecode (load before advancing _bcp_register to prevent AGI)
  load_unsigned_byte(rbx, Address(_bcp_register, step));
  // advance _bcp_register
  increment(_bcp_register, step);
  dispatch_base(state, Interpreter::dispatch_table(state), true, generate_poll);
}

void InterpreterMacroAssembler::dispatch_base(TosState state,
                                              address* table,
                                              bool verifyoop,
                                              bool generate_poll) {
  verify_FPU(1, state);
  if (VerifyActivationFrameSize) {
    Label L;
    mov(rcx, rbp);
    subptr(rcx, rsp);
    int32_t min_frame_size =
      (frame::link_offset - frame::interpreter_frame_initial_sp_offset) *
      wordSize;
    cmpptr(rcx, (int32_t)min_frame_size);
    jcc(Assembler::greaterEqual, L);
    stop("broken stack frame");
    bind(L);
  }
  if (verifyoop) {
    interp_verify_oop(rax, state);
  }

  address* const safepoint_table = Interpreter::safept_table(state);
#ifdef _LP64
  Label no_safepoint, dispatch;
  if (table != safepoint_table && generate_poll) {
    NOT_PRODUCT(block_comment("Thread-local Safepoint poll"));
    testb(Address(r15_thread, JavaThread::polling_word_offset()), SafepointMechanism::poll_bit());

    jccb(Assembler::zero, no_safepoint);
    lea(rscratch1, ExternalAddress((address)safepoint_table));
    jmpb(dispatch);
  }

  bind(no_safepoint);
  lea(rscratch1, ExternalAddress((address)table));
  bind(dispatch);
  jmp(Address(rscratch1, rbx, Address::times_8));

#else
  Address index(noreg, rbx, Address::times_ptr);
  if (table != safepoint_table && generate_poll) {
    NOT_PRODUCT(block_comment("Thread-local Safepoint poll"));
    Label no_safepoint;
    const Register thread = rcx;
    get_thread(thread);
    testb(Address(thread, JavaThread::polling_word_offset()), SafepointMechanism::poll_bit());

    jccb(Assembler::zero, no_safepoint);
    ArrayAddress dispatch_addr(ExternalAddress((address)safepoint_table), index);
    jump(dispatch_addr);
    bind(no_safepoint);
  }

  {
    ArrayAddress dispatch_addr(ExternalAddress((address)table), index);
    jump(dispatch_addr);
  }
#endif // _LP64
}