上一篇我们从查看了HotSpot线程与桩(stub)的实现,其中桩代码的实现涉及到手工汇编(__ 前缀的代码),这些手工汇编最终都是通过HotSpot内部汇编器生成不同硬件架构对应的汇编指令机器码。虚拟机内部根据不同的cpu生成了各自对应的汇编指令以实现跨平台兼容。今天我们来看HotSpot模板解释器如何结合汇编器将虚拟机性能大幅提升的。
一.汇编器
hotspot/src/share/vm/runtime/stubRoutines.cpp
BufferBlob是CodeBlob的一种,BufferBlob并向CodeCcache申请分配一块CodeHeap内存。CodeHeap内部通过HeapBlock空闲联表维护着对应的VitualSpace范围内的内存。CodeBuffer内部维护着一组CodeSection,封装了一组指令存储查询操作。
void StubRoutines::initialize1() {
if (_code1 == NULL) {
//创建BufferBlob并向codecacche申请分配一块内存(通过重载new操作符)
_code1 = BufferBlob::create("StubRoutines (1)", code_size1);
//将BufferBlob对应的内存起始地址,内存可用长度映射到CodeBuffer的CodeSection的起始结束位置
CodeBuffer buffer(_code1);
//上一篇生成call_stub桩代码时也走过这里
StubGenerator_generate(&buffer, false);
}
}
hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp
StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) {}
hotspot/src/share/vm/runtime/stubCodeGenerator.cpp
创建宏汇编器,传入BufferBlob
StubCodeGenerator::StubCodeGenerator(CodeBuffer* code, bool print_code) {
_masm = new MacroAssembler(code );
}
hotspot/src/cpu/x86/vm/macroAssembler_x86.hpp
宏汇编继承自Assembler
MacroAssembler(CodeBuffer* code) : Assembler(code) {}
hotspot/src/cpu/x86/vm/assembler_x86.hpp
Assembler继承自抽象汇编
Assembler(CodeBuffer* code) : AbstractAssembler(code) {}
hotspot/src/share/vm/asm/assembler.cpp
抽象汇编持有指令CodeSection,CodeSection存储汇编后的指令
AbstractAssembler::AbstractAssembler(CodeBuffer* code) {
CodeSection* cs = code->insts(); //从CodeBuffer中取指令类型的CodeSection
cs->clear_mark();
_code_section = cs; //保存引用
_oop_recorder= code->oop_recorder();
}
- AbstractAssembler提供了一组汇编指令数据保存获取操作,如emit_int8(),addr_at()…,Label伪指令等。
- Assembler提供了硬件相关的汇编指令集如mov(),lea(),…,Address封装内存寻址,Register封装寄存器。
- MacroAssembler进一步封装了更高级的操作,如load_klass(),call_VM()等高级操作。
二.模板表生成
hotspot/src/share/vm/interpreter/templateInterpreter.cpp
void TemplateInterpreter::initialize() {
AbstractInterpreter::initialize(); //抽象解释器
TemplateTable::initialize(); //模板表
// generate interpreter
{ ResourceMark rm;
int code_size = InterpreterCodeSize;
//初始化一个StubQueue,分配BufferBlob给StubQueue,队列的Stub类型为InterpreterCodelet
_code = new StubQueue(new InterpreterCodeletInterface, code_size, NULL,"Interpreter");
//生成方法entry_point和字节码的entry_point调用点,上一篇已详述方法entry_point调用点的生成
TemplateInterpreterGenerator g(_code);
}
//初始化指令派发表,生成的字节码指令entry_point在_normal_table中,赋给派发表_active_table
_active_table = _normal_table;
}
hotspot/src/share/vm/interpreter/templateTable.cpp
初始化模板表,in,out 代表指令进出栈栈顶缓存数据类型,generator代表字节码指令对应的模板生成器函数,argument为generator入参。
void TemplateTable::initialize() {
// For better readability
const char _ = ' ';
const int ____ = 0;
const int ubcp = 1 << Template::uses_bcp_bit;
const int disp = 1 << Template::does_dispatch_bit;
const int clvm = 1 << Template::calls_vm_bit;
const int iswd = 1 << Template::wide_bit;
// interpr. templates
// Java spec bytecodes ubcp|disp|clvm|iswd in out generator argument
def(Bytecodes::_nop , ____|____|____|____, vtos, vtos, nop , _ );
def(Bytecodes::_aconst_null , ____|____|____|____, vtos, atos, aconst_null , _ );
def(Bytecodes::_iconst_m1 , ____|____|____|____, vtos, itos, iconst , -1 );
def(Bytecodes::_iconst_0 , ____|____|____|____, vtos, itos, iconst , 0 );
def(Bytecodes::_iconst_1 , ____|____|____|____, vtos, itos, iconst , 1 );
......
hotspot/src/share/vm/interpreter/templateTable.cpp
void (*gen)()是一个指针函数,指向模板指令生成器
void TemplateTable::def(Bytecodes::Code code, int flags, TosState in, TosState out, void (*gen)(), char filler) {
def(code, flags, in, out, (Template::generator)gen, 0);
}
void TemplateTable::def(Bytecodes::Code code, int flags, TosState in, TosState out, void (*gen)(int arg), int arg) {
//生成模板
t->initialize(flags, in, out, gen, arg);
}
hotspot/src/share/vm/interpreter/templateTable.cpp
每条指令对应一个模板
void Template::initialize(int flags, TosState tos_in, TosState tos_out, generator gen, int arg) {
_flags = flags;
_tos_in = tos_in;
_tos_out = tos_out;
_gen = gen;
_arg = arg;
}
三.模板解释器
hotspot/src/share/vm/interpreter/templateInterpreterGenerator.cpp
TemplateInterpreterGenerator::TemplateInterpreterGenerator(StubQueue* _code): AbstractInterpreterGenerator(_code) {
generate_all();
}
hotspot/src/share/vm/interpreter/templateInterpreterGenerator.cpp
生成方法entry_point和指令码entry_point
void TemplateInterpreterGenerator::generate_all() {
....
set_entry_points_for_all_bytes(); //生成模板指令
......
}
hotspot/src/share/vm/interpreter/templateInterpreterGenerator.cpp
循环生成所有字节码指令的汇编指令
void TemplateInterpreterGenerator::set_entry_points_for_all_bytes() {
for (int i = 0; i < DispatchTable::length; i++) {
Bytecodes::Code code = (Bytecodes::Code)i;
if (Bytecodes::is_defined(code)) {
set_entry_points(code);
} else {
set_unimplemented(i);
}
}
}
hotspot/src/share/vm/interpreter/templateInterpreterGenerator.cpp
每条字节码指令的栈顶缓存是不确定的,因此有必要为每条字节码指令根据所有栈顶缓存类型生成一个在内存的入口地址,将着10个地址封装成EntryPoint,然后以code为key,EntryPoint为value存储到_normal_table,_normal_table稍后赋给派发表,当方法中的第一条字节码开始执行时将从派发表按code和栈顶缓存状态得到对应汇编指令在内存中的位置,然后执行它。
void TemplateInterpreterGenerator::set_entry_points(Bytecodes::Code code) {
//这里的CodeletMark会生成一个InterpreterCodelet用以保存字节码和汇编码
//在CodeletMark析构时会将InterpreterCodelet提交到StubQueue
CodeletMark cm(_masm, Bytecodes::name(code), code);
//这里是为一种栈顶缓存类型生成一个内存地址
address bep = _illegal_bytecode_sequence;
address zep = _illegal_bytecode_sequence;
address cep = _illegal_bytecode_sequence;
address sep = _illegal_bytecode_sequence;
address aep = _illegal_bytecode_sequence;
address iep = _illegal_bytecode_sequence;
address lep = _illegal_bytecode_sequence;
address fep = _illegal_bytecode_sequence;
address dep = _illegal_bytecode_sequence;
address vep = _unimplemented_bytecode;
address wep = _unimplemented_bytecode;
// code for short & wide version of bytecode
if (Bytecodes::is_defined(code)) {
Template* t = TemplateTable::template_for(code);
set_short_entry_points(t, bep, cep, sep, aep, iep, lep, fep, dep, vep);
}
......
//封装entry_point
EntryPoint entry(bep, zep, cep, sep, aep, iep, lep, fep, dep, vep);
//保存到_normal_table
Interpreter::_normal_table.set_entry(code, entry);
Interpreter::_wentry_point[code] = wep;
}
hotspot/src/share/vm/interpreter/templateInterpreterGenerator.cpp
根据栈顶缓存类型生成entry_point地址,生成汇编指令
void TemplateInterpreterGenerator::set_short_entry_points(Template* t, address& bep, address& cep, address& sep, address& aep, address& iep, address& lep, address& fep, address& dep, address& vep) {
switch (t->tos_in()) {
case btos:
case ztos:
case ctos:
case stos:
ShouldNotReachHere(); // btos/ctos/stos should use itos.
break;
case atos: vep = __ pc(); __ pop(atos); aep = __ pc(); generate_and_dispatch(t); break;
case itos: vep = __ pc(); __ pop(itos); iep = __ pc(); generate_and_dispatch(t); break;
case ltos: vep = __ pc(); __ pop(ltos); lep = __ pc(); generate_and_dispatch(t); break;
case ftos: vep = __ pc(); __ pop(ftos); fep = __ pc(); generate_and_dispatch(t); break;
case dtos: vep = __ pc(); __ pop(dtos); dep = __ pc(); generate_and_dispatch(t); break;
case vtos: set_vtos_entry_points(t, bep, cep, sep, aep, iep, lep, fep, dep, vep); break;
default : ShouldNotReachHere(); break;
}
}
hotspot/src/share/vm/interpreter/templateInterpreterGenerator.cpp
派发和生成字节码指令,在将对应的字节码汇编指令数据写入内存后接着在其后面写入"寻找并执行下一条指令" 的汇编指令数据,所有指令后面都会插入。这样当虚拟机执行完这条字节码对应的汇编指令后就不用返回,而是继续执行完"寻找并执行下一条指令"的汇编代码,然后跳到下一条指令处执行,具体来说就是更具当前线程的pc+1获取字节码然后从派发表取得对应的汇编指令地址,跳转执行,循环往复直到方法中的code执行完。这就是HotSpot的线索寻址,比switch…case效率高。
void TemplateInterpreterGenerator::generate_and_dispatch(Template* t, TosState tos_out) {
int step = 0;
// generate template
t->generate(_masm);
// advance
if (t->does_dispatch()) {
#ifdef ASSERT
// make sure execution doesn't go beyond this point if code is broken
__ should_not_reach_here();
#endif // ASSERT
} else {
//在已生成的汇编指令内存后面插入一条寻找下一条指令并执行的逻辑
__ dispatch_epilog(tos_out, step);
}
}
hotspot/src/share/vm/interpreter/templateTable.cpp
_gen和_arg均由def函数生成模板时定义
void Template::generate(InterpreterMacroAssembler* masm) {
// parameter passing
TemplateTable::_desc = this;
TemplateTable::_masm = masm;
//生成汇编码
_gen(_arg);
//刷新硬件缓存
masm->flush();
}
hotspot/src/cpu/x86/vm/templateTable_x86.cpp
以iconst字节码指令为例,iconst对应_gen,value对应_arg
void TemplateTable::iconst(int value) {
transition(vtos, itos);
if (value == 0) {
__ xorl(rax, rax);
} else {
__ movl(rax, value);
}
}
hotspot/src/cpu/x86/vm/assembler_x86.cpp
取xorl指令来看
void Assembler::xorl(Register dst, Register src) {
(void) prefix_and_encode(dst->encoding(), src->encoding());
emit_arith(0x33, 0xC0, dst, src);
}
hotspot/src/cpu/x86/vm/assembler_x86.cpp
写入汇编代码
void Assembler::emit_arith(int op1, int op2, Register dst, Register src) {
assert(isByte(op1) && isByte(op2), "wrong opcode");
emit_int8(op1);
emit_int8(op2 | encode(dst) << 3 | encode(src));
}
hotspot/src/share/vm/asm/assembler.hpp
void emit_int8( int8_t x) { code_section()->emit_int8( x); }
hotspot/src/share/vm/asm/codeBuffer.hpp
向CodeSection内存写入汇编指令数据,移动end()
void emit_int8 ( int8_t x) { *((int8_t*) end()) = x; set_end(end() + sizeof(int8_t)); }
hotspot/src/cpu/x86/vm/interp_masm_x86.cpp
生成跳转执行指令,step = 1
void InterpreterMacroAssembler::dispatch_epilog(TosState state, int step) {
dispatch_next(state, step);
}
hotspot/src/cpu/x86/vm/interp_masm_x86.cpp
生成"寻找并执行下一条指令"的汇编指令,上一篇生成方法固定调用点的函数generate_normal_entry里,在生成固定帧以后,插入了__ dispatch_next(vtos),就是这里的dispatch_next
void InterpreterMacroAssembler::dispatch_next(TosState state, int step) {
//加载下一条字节码
load_unsigned_byte(rbx, Address(_bcp_register, step));
//代码计数器指针+1
increment(_bcp_register, step);
//根据栈顶缓存状态查询派发表,跳转执行
dispatch_base(state, Interpreter::dispatch_table(state));
}
相比于c解释执行,HotSpot通过将模板解释器与汇编器联合在一起工作,结合线索寻址将虚拟机的执行效率大幅提升。