nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
methodHandle method,
int compile_id,
BasicType* in_sig_bt,
VMRegPair* in_regs,
BasicType ret_type) {
if (method->is_method_handle_intrinsic()) {
vmIntrinsics::ID iid = method->intrinsic_id();
intptr_t start = (intptr_t)__ pc();
int vep_offset = ((intptr_t)__ pc()) - start;
gen_special_dispatch(masm,
method,
in_sig_bt,
in_regs);
int frame_complete = ((intptr_t)__ pc()) - start; // not complete, period
__ flush();
int stack_slots = SharedRuntime::out_preserve_stack_slots(); // no out slots at all, actually
return nmethod::new_native_nmethod(method,
compile_id,
masm->code(),
vep_offset,
frame_complete,
stack_slots / VMRegImpl::slots_per_word,
in_ByteSize(-1),
in_ByteSize(-1),
(OopMapSet*)NULL);
}
bool is_critical_native = true;
address native_func = method->critical_native_function();
if (native_func == NULL) {
native_func = method->native_function();
is_critical_native = false;
}
assert(native_func != NULL, "must have function");
OopMapSet *oop_maps = new OopMapSet();
intptr_t start = (intptr_t)__ pc();
const int total_in_args = method->size_of_parameters();
int total_c_args = total_in_args;
if (!is_critical_native) {
total_c_args += 1;
if (method->is_static()) {
total_c_args++;
}
} else {
for (int i = 0; i < total_in_args; i++) {
if (in_sig_bt[i] == T_ARRAY) {
total_c_args++;
}
}
}
BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args);
VMRegPair* out_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args);
BasicType* in_elem_bt = NULL;
int argc = 0;
if (!is_critical_native) {
out_sig_bt[argc++] = T_ADDRESS;
if (method->is_static()) {
out_sig_bt[argc++] = T_OBJECT;
}
for (int i = 0; i < total_in_args ; i++ ) {
out_sig_bt[argc++] = in_sig_bt[i];
}
} else {
Thread* THREAD = Thread::current();
in_elem_bt = NEW_RESOURCE_ARRAY(BasicType, total_in_args);
SignatureStream ss(method->signature());
for (int i = 0; i < total_in_args ; i++ ) {
if (in_sig_bt[i] == T_ARRAY) {
out_sig_bt[argc++] = T_INT;
out_sig_bt[argc++] = T_ADDRESS;
Symbol* atype = ss.as_symbol(CHECK_NULL);
const char* at = atype->as_C_string();
if (strlen(at) == 2) {
assert(at[0] == '[', "must be");
switch (at[1]) {
case 'B': in_elem_bt[i] = T_BYTE; break;
case 'C': in_elem_bt[i] = T_CHAR; break;
case 'D': in_elem_bt[i] = T_DOUBLE; break;
case 'F': in_elem_bt[i] = T_FLOAT; break;
case 'I': in_elem_bt[i] = T_INT; break;
case 'J': in_elem_bt[i] = T_LONG; break;
case 'S': in_elem_bt[i] = T_SHORT; break;
case 'Z': in_elem_bt[i] = T_BOOLEAN; break;
default: ShouldNotReachHere();
}
}
} else {
out_sig_bt[argc++] = in_sig_bt[i];
in_elem_bt[i] = T_VOID;
}
if (in_sig_bt[i] != T_VOID) {
assert(in_sig_bt[i] == ss.type(), "must match");
ss.next();
}
}
}
int out_arg_slots;
out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args);
int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots;
int total_save_slots = 6 * VMRegImpl::slots_per_word; // 6 arguments passed in registers
if (is_critical_native) {
int double_slots = 0;
int single_slots = 0;
for ( int i = 0; i < total_in_args; i++) {
if (in_regs[i].first()->is_Register()) {
const Register reg = in_regs[i].first()->as_Register();
switch (in_sig_bt[i]) {
case T_BOOLEAN:
case T_BYTE:
case T_SHORT:
case T_CHAR:
case T_INT: single_slots++; break;
case T_ARRAY: // specific to LP64 (7145024)
case T_LONG: double_slots++; break;
default: ShouldNotReachHere();
}
} else if (in_regs[i].first()->is_XMMRegister()) {
switch (in_sig_bt[i]) {
case T_FLOAT: single_slots++; break;
case T_DOUBLE: double_slots++; break;
default: ShouldNotReachHere();
}
} else if (in_regs[i].first()->is_FloatRegister()) {
ShouldNotReachHere();
}
}
total_save_slots = double_slots * 2 + single_slots;
if (double_slots != 0) {
stack_slots = round_to(stack_slots, 2);
}
}
int oop_handle_offset = stack_slots;
stack_slots += total_save_slots;
int klass_slot_offset = 0;
int klass_offset = -1;
int lock_slot_offset = 0;
bool is_static = false;
if (method->is_static()) {
klass_slot_offset = stack_slots;
stack_slots += VMRegImpl::slots_per_word;
klass_offset = klass_slot_offset * VMRegImpl::stack_slot_size;
is_static = true;
}
if (method->is_synchronized()) {
lock_slot_offset = stack_slots;
stack_slots += VMRegImpl::slots_per_word;
}
stack_slots += 6;
stack_slots = round_to(stack_slots, StackAlignmentInSlots);
int stack_size = stack_slots * VMRegImpl::stack_slot_size;
const Register ic_reg = rax;
const Register receiver = j_rarg0;
Label hit;
Label exception_pending;
assert_different_registers(ic_reg, receiver, rscratch1);
__ verify_oop(receiver);
__ load_klass(rscratch1, receiver);
__ cmpq(ic_reg, rscratch1);
__ jcc(Assembler::equal, hit);
__ jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
__ align(8);
__ bind(hit);
int vep_offset = ((intptr_t)__ pc()) - start;
if (UseStackBanging) {
__ bang_stack_with_offset(StackShadowPages*os::vm_page_size());
} else {
__ fat_nop();
}
__ enter();
__ subptr(rsp, stack_size - 2*wordSize);
int frame_complete = ((intptr_t)__ pc()) - start;
if (UseRTMLocking) {
__ xabort(0);
}
#ifdef ASSERT
{
Label L;
__ mov(rax, rsp);
__ andptr(rax, -16); // must be 16 byte boundary (see amd64 ABI)
__ cmpptr(rax, rsp);
__ jcc(Assembler::equal, L);
__ stop("improperly aligned stack");
__ bind(L);
}
#endif /* ASSERT */
const Register oop_handle_reg = r14;
if (is_critical_native) {
check_needs_gc_for_critical_native(masm, stack_slots, total_c_args, total_in_args,
oop_handle_offset, oop_maps, in_regs, in_sig_bt);
}
int receiver_offset = -1;
OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
#ifdef ASSERT
bool reg_destroyed[RegisterImpl::number_of_registers];
bool freg_destroyed[XMMRegisterImpl::number_of_registers];
for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) {
reg_destroyed[r] = false;
}
for ( int f = 0 ; f < XMMRegisterImpl::number_of_registers ; f++ ) {
freg_destroyed[f] = false;
}
#endif /* ASSERT */
GrowableArray<int> arg_order(2 * total_in_args);
VMRegPair tmp_vmreg;
tmp_vmreg.set2(rbx->as_VMReg());
if (!is_critical_native) {
for (int i = total_in_args - 1, c_arg = total_c_args - 1; i >= 0; i--, c_arg--) {
arg_order.push(i);
arg_order.push(c_arg);
}
} else {
ComputeMoveOrder cmo(total_in_args, in_regs, total_c_args, out_regs, in_sig_bt, arg_order, tmp_vmreg);
}
int temploc = -1;
for (int ai = 0; ai < arg_order.length(); ai += 2) {
int i = arg_order.at(ai);
int c_arg = arg_order.at(ai + 1);
__ block_comment(err_msg("move %d -> %d", i, c_arg));
if (c_arg == -1) {
assert(is_critical_native, "should only be required for critical natives");
__ mov(tmp_vmreg.first()->as_Register(), in_regs[i].first()->as_Register());
in_regs[i] = tmp_vmreg;
temploc = i;
continue;
} else if (i == -1) {
assert(is_critical_native, "should only be required for critical natives");
assert(temploc != -1, "must be valid");
i = temploc;
temploc = -1;
}
#ifdef ASSERT
if (in_regs[i].first()->is_Register()) {
assert(!reg_destroyed[in_regs[i].first()->as_Register()->encoding()], "destroyed reg!");
} else if (in_regs[i].first()->is_XMMRegister()) {
assert(!freg_destroyed[in_regs[i].first()->as_XMMRegister()->encoding()], "destroyed reg!");
}
if (out_regs[c_arg].first()->is_Register()) {
reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true;
} else if (out_regs[c_arg].first()->is_XMMRegister()) {
freg_destroyed[out_regs[c_arg].first()->as_XMMRegister()->encoding()] = true;
}
#endif /* ASSERT */
switch (in_sig_bt[i]) {
case T_ARRAY:
if (is_critical_native) {
unpack_array_argument(masm, in_regs[i], in_elem_bt[i], out_regs[c_arg + 1], out_regs[c_arg]);
c_arg++;
#ifdef ASSERT
if (out_regs[c_arg].first()->is_Register()) {
reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true;
} else if (out_regs[c_arg].first()->is_XMMRegister()) {
freg_destroyed[out_regs[c_arg].first()->as_XMMRegister()->encoding()] = true;
}
#endif
break;
}
case T_OBJECT:
assert(!is_critical_native, "no oop arguments");
object_move(masm, map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg],
((i == 0) && (!is_static)),
&receiver_offset);
break;
case T_VOID:
break;
case T_FLOAT:
float_move(masm, in_regs[i], out_regs[c_arg]);
break;
case T_DOUBLE:
assert( i + 1 < total_in_args &&
in_sig_bt[i + 1] == T_VOID &&
out_sig_bt[c_arg+1] == T_VOID, "bad arg list");
double_move(masm, in_regs[i], out_regs[c_arg]);
break;
case T_LONG :
long_move(masm, in_regs[i], out_regs[c_arg]);
break;
case T_ADDRESS: assert(false, "found T_ADDRESS in java args");
default:
move32_64(masm, in_regs[i], out_regs[c_arg]);
}
}
int c_arg;
if (!is_critical_native) {
c_arg = total_c_args - total_in_args;
if (method->is_static()) {
__ movoop(oop_handle_reg, JNIHandles::make_local(method->method_holder()->java_mirror()));
__ movptr(Address(rsp, klass_offset), oop_handle_reg);
map->set_oop(VMRegImpl::stack2reg(klass_slot_offset));
__ lea(oop_handle_reg, Address(rsp, klass_offset));
__ movptr(c_rarg1, oop_handle_reg);
c_arg--;
}
} else {
c_arg = 0;
}
intptr_t the_pc = (intptr_t) __ pc();
oop_maps->add_gc_map(the_pc - start, map);
__ set_last_Java_frame(rsp, noreg, (address)the_pc);
{
SkipIfEqual skip(masm, &DTraceMethodProbes, false);
save_args(masm, total_c_args, c_arg, out_regs);
__ mov_metadata(c_rarg1, method());
__ call_VM_leaf(
CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry),
r15_thread, c_rarg1);
restore_args(masm, total_c_args, c_arg, out_regs);
}
if (RC_TRACE_IN_RANGE(0x00001000, 0x00002000)) {
save_args(masm, total_c_args, c_arg, out_regs);
__ mov_metadata(c_rarg1, method());
__ call_VM_leaf(
CAST_FROM_FN_PTR(address, SharedRuntime::rc_trace_method_entry),
r15_thread, c_rarg1);
restore_args(masm, total_c_args, c_arg, out_regs);
}
const Register swap_reg = rax; // Must use rax for cmpxchg instruction
const Register obj_reg = rbx; // Will contain the oop
const Register lock_reg = r13; // Address of compiler lock object (BasicLock)
const Register old_hdr = r13; // value of old header at unlock time
Label slow_path_lock;
Label lock_done;
if (method->is_synchronized()) {
assert(!is_critical_native, "unhandled");
const int mark_word_offset = BasicLock::displaced_header_offset_in_bytes();
__ mov(oop_handle_reg, c_rarg1);
__ lea(lock_reg, Address(rsp, lock_slot_offset * VMRegImpl::stack_slot_size));
__ movptr(obj_reg, Address(oop_handle_reg, 0));
if (UseBiasedLocking) {
__ biased_locking_enter(lock_reg, obj_reg, swap_reg, rscratch1, false, lock_done, &slow_path_lock);
}
__ movl(swap_reg, 1);
__ orptr(swap_reg, Address(obj_reg, 0));
__ movptr(Address(lock_reg, mark_word_offset), swap_reg);
if (os::is_MP()) {
__ lock();
}
__ cmpxchgptr(lock_reg, Address(obj_reg, 0));
__ jcc(Assembler::equal, lock_done);
__ subptr(swap_reg, rsp);
__ andptr(swap_reg, 3 - os::vm_page_size());
__ movptr(Address(lock_reg, mark_word_offset), swap_reg);
__ jcc(Assembler::notEqual, slow_path_lock);
__ bind(lock_done);
}
if (!is_critical_native) {
__ lea(c_rarg0, Address(r15_thread, in_bytes(JavaThread::jni_environment_offset())));
}
__ movl(Address(r15_thread, JavaThread::thread_state_offset()), _thread_in_native);
__ call(RuntimeAddress(native_func));
__ restore_cpu_control_state_after_jni();
switch (ret_type) {
case T_BOOLEAN: __ c2bool(rax); break;
case T_CHAR : __ movzwl(rax, rax); break;
case T_BYTE : __ sign_extend_byte (rax); break;
case T_SHORT : __ sign_extend_short(rax); break;
case T_INT : /* nothing to do */ break;
case T_DOUBLE :
case T_FLOAT :
break;
case T_ARRAY: // Really a handle
case T_OBJECT: // Really a handle
break; // can't de-handlize until after safepoint check
case T_VOID: break;
case T_LONG: break;
default : ShouldNotReachHere();
}
__ movl(Address(r15_thread, JavaThread::thread_state_offset()), _thread_in_native_trans);
if(os::is_MP()) {
if (UseMembar) {
__ membar(Assembler::Membar_mask_bits(
Assembler::LoadLoad | Assembler::LoadStore |
Assembler::StoreLoad | Assembler::StoreStore));
} else {
__ serialize_memory(r15_thread, rcx);
}
}
Label after_transition;
{
Label Continue;
__ cmp32(ExternalAddress((address)SafepointSynchronize::address_of_state()),
SafepointSynchronize::_not_synchronized);
Label L;
__ jcc(Assembler::notEqual, L);
__ cmpl(Address(r15_thread, JavaThread::suspend_flags_offset()), 0);
__ jcc(Assembler::equal, Continue);
__ bind(L);
save_native_result(masm, ret_type, stack_slots);
__ mov(c_rarg0, r15_thread);
__ mov(r12, rsp); // remember sp
__ subptr(rsp, frame::arg_reg_save_area_bytes); // windows
__ andptr(rsp, -16); // align stack as required by ABI
if (!is_critical_native) {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans)));
} else {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans_and_transition)));
}
__ mov(rsp, r12); // restore sp
__ reinit_heapbase();
restore_native_result(masm, ret_type, stack_slots);
if (is_critical_native) {
__ jmpb(after_transition);
}
__ bind(Continue);
}
__ movl(Address(r15_thread, JavaThread::thread_state_offset()), _thread_in_Java);
__ bind(after_transition);
Label reguard;
Label reguard_done;
__ cmpl(Address(r15_thread, JavaThread::stack_guard_state_offset()), JavaThread::stack_guard_yellow_disabled);
__ jcc(Assembler::equal, reguard);
__ bind(reguard_done);
Label unlock_done;
Label slow_path_unlock;
if (method->is_synchronized()) {
__ movptr(obj_reg, Address(oop_handle_reg, 0));
Label done;
if (UseBiasedLocking) {
__ biased_locking_exit(obj_reg, old_hdr, done);
}
__ cmpptr(Address(rsp, lock_slot_offset * VMRegImpl::stack_slot_size), (int32_t)NULL_WORD);
__ jcc(Assembler::equal, done);
if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) {
save_native_result(masm, ret_type, stack_slots);
}
__ lea(rax, Address(rsp, lock_slot_offset * VMRegImpl::stack_slot_size));
__ movptr(old_hdr, Address(rax, 0));
if (os::is_MP()) {
__ lock();
}
__ cmpxchgptr(old_hdr, Address(obj_reg, 0));
__ jcc(Assembler::notEqual, slow_path_unlock);
__ bind(unlock_done);
if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) {
restore_native_result(masm, ret_type, stack_slots);
}
__ bind(done);
}
{
SkipIfEqual skip(masm, &DTraceMethodProbes, false);
save_native_result(masm, ret_type, stack_slots);
__ mov_metadata(c_rarg1, method());
__ call_VM_leaf(
CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit),
r15_thread, c_rarg1);
restore_native_result(masm, ret_type, stack_slots);
}
__ reset_last_Java_frame(false);
if (ret_type == T_OBJECT || ret_type == T_ARRAY) {
__ resolve_jobject(rax /* value */,
r15_thread /* thread */,
rcx /* tmp */);
}
if (!is_critical_native) {
__ movptr(rcx, Address(r15_thread, JavaThread::active_handles_offset()));
__ movl(Address(rcx, JNIHandleBlock::top_offset_in_bytes()), (int32_t)NULL_WORD);
}
__ leave();
if (!is_critical_native) {
__ cmpptr(Address(r15_thread, in_bytes(Thread::pending_exception_offset())), (int32_t)NULL_WORD);
__ jcc(Assembler::notEqual, exception_pending);
}
__ ret(0);
if (!is_critical_native) {
__ bind(exception_pending);
__ jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
}
if (method->is_synchronized()) {
__ bind(slow_path_lock);
save_args(masm, total_c_args, c_arg, out_regs);
__ mov(c_rarg0, obj_reg);
__ mov(c_rarg1, lock_reg);
__ mov(c_rarg2, r15_thread);
__ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C), 3);
restore_args(masm, total_c_args, c_arg, out_regs);
#ifdef ASSERT
{ Label L;
__ cmpptr(Address(r15_thread, in_bytes(Thread::pending_exception_offset())), (int32_t)NULL_WORD);
__ jcc(Assembler::equal, L);
__ stop("no pending exception allowed on exit from monitorenter");
__ bind(L);
}
#endif
__ jmp(lock_done);
__ bind(slow_path_unlock);
if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) {
save_native_result(masm, ret_type, stack_slots);
}
__ lea(c_rarg1, Address(rsp, lock_slot_offset * VMRegImpl::stack_slot_size));
__ mov(c_rarg0, obj_reg);
__ mov(r12, rsp); // remember sp
__ subptr(rsp, frame::arg_reg_save_area_bytes); // windows
__ andptr(rsp, -16); // align stack as required by ABI
__ movptr(rbx, Address(r15_thread, in_bytes(Thread::pending_exception_offset())));
__ movptr(Address(r15_thread, in_bytes(Thread::pending_exception_offset())), (int32_t)NULL_WORD);
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C)));
__ mov(rsp, r12); // restore sp
__ reinit_heapbase();
#ifdef ASSERT
{
Label L;
__ cmpptr(Address(r15_thread, in_bytes(Thread::pending_exception_offset())), (int)NULL_WORD);
__ jcc(Assembler::equal, L);
__ stop("no pending exception allowed on exit complete_monitor_unlocking_C");
__ bind(L);
}
#endif /* ASSERT */
__ movptr(Address(r15_thread, in_bytes(Thread::pending_exception_offset())), rbx);
if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) {
restore_native_result(masm, ret_type, stack_slots);
}
__ jmp(unlock_done);
} // synchronized
__ bind(reguard);
save_native_result(masm, ret_type, stack_slots);
__ mov(r12, rsp); // remember sp
__ subptr(rsp, frame::arg_reg_save_area_bytes); // windows
__ andptr(rsp, -16); // align stack as required by ABI
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages)));
__ mov(rsp, r12); // restore sp
__ reinit_heapbase();
restore_native_result(masm, ret_type, stack_slots);
__ jmp(reguard_done);
__ flush();
nmethod *nm = nmethod::new_native_nmethod(method,
compile_id,
masm->code(),
vep_offset,
frame_complete,
stack_slots / VMRegImpl::slots_per_word,
(is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)),
in_ByteSize(lock_slot_offset*VMRegImpl::stack_slot_size),
oop_maps);
if (is_critical_native) {
nm->set_lazy_critical_native(true);
}
return nm;
}
#ifdef HAVE_DTRACE_H
static int fp_offset[ConcreteRegisterImpl::number_of_registers] = { 0 };
static bool offsets_initialized = false;
nmethod *SharedRuntime::generate_dtrace_nmethod(MacroAssembler *masm,
methodHandle method) {
assert(AdapterHandlerLibrary_lock->owned_by_self(), "must be");
if (!offsets_initialized) {
fp_offset[c_rarg0->as_VMReg()->value()] = -1 * wordSize;
fp_offset[c_rarg1->as_VMReg()->value()] = -2 * wordSize;
fp_offset[c_rarg2->as_VMReg()->value()] = -3 * wordSize;
fp_offset[c_rarg3->as_VMReg()->value()] = -4 * wordSize;
fp_offset[c_rarg4->as_VMReg()->value()] = -5 * wordSize;
fp_offset[c_rarg5->as_VMReg()->value()] = -6 * wordSize;
fp_offset[c_farg0->as_VMReg()->value()] = -7 * wordSize;
fp_offset[c_farg1->as_VMReg()->value()] = -8 * wordSize;
fp_offset[c_farg2->as_VMReg()->value()] = -9 * wordSize;
fp_offset[c_farg3->as_VMReg()->value()] = -10 * wordSize;
fp_offset[c_farg4->as_VMReg()->value()] = -11 * wordSize;
fp_offset[c_farg5->as_VMReg()->value()] = -12 * wordSize;
fp_offset[c_farg6->as_VMReg()->value()] = -13 * wordSize;
fp_offset[c_farg7->as_VMReg()->value()] = -14 * wordSize;
offsets_initialized = true;
}
int total_args_passed = method->size_of_parameters();
BasicType* in_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_args_passed);
VMRegPair *in_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_args_passed);
BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_args_passed * 2);
VMRegPair* out_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_args_passed * 2);
int i=0;
int total_strings = 0;
int first_arg_to_pass = 0;
int total_c_args = 0;
if( !method->is_static() ) {
in_sig_bt[i++] = T_OBJECT;
first_arg_to_pass = 1;
}
SignatureStream ss(method->signature());
for ( ; !ss.at_return_type(); ss.next()) {
BasicType bt = ss.type();
in_sig_bt[i++] = bt; // Collect remaining bits of signature
out_sig_bt[total_c_args++] = bt;
if( bt == T_OBJECT) {
Symbol* s = ss.as_symbol_or_null(); // symbol is created
if (s == vmSymbols::java_lang_String()) {
total_strings++;
out_sig_bt[total_c_args-1] = T_ADDRESS;
} else if (s == vmSymbols::java_lang_Boolean() ||
s == vmSymbols::java_lang_Character() ||
s == vmSymbols::java_lang_Byte() ||
s == vmSymbols::java_lang_Short() ||
s == vmSymbols::java_lang_Integer() ||
s == vmSymbols::java_lang_Float()) {
out_sig_bt[total_c_args-1] = T_INT;
} else if (s == vmSymbols::java_lang_Long() ||
s == vmSymbols::java_lang_Double()) {
out_sig_bt[total_c_args-1] = T_LONG;
out_sig_bt[total_c_args++] = T_VOID;
}
} else if ( bt == T_LONG || bt == T_DOUBLE ) {
in_sig_bt[i++] = T_VOID; // Longs & doubles take 2 Java slots
out_sig_bt[total_c_args-1] = T_LONG;
out_sig_bt[total_c_args++] = T_VOID;
} else if ( bt == T_FLOAT) {
out_sig_bt[total_c_args-1] = T_INT;
}
}
assert(i==total_args_passed, "validly parsed signature");
int comp_args_on_stack;
comp_args_on_stack = SharedRuntime::java_calling_convention(
in_sig_bt, in_regs, total_args_passed, false);
int out_arg_slots;
out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args);
int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots;
int* string_locs = NEW_RESOURCE_ARRAY(int, total_strings + 1);
for (i = 0; i < total_strings ; i++) {
string_locs[i] = stack_slots;
stack_slots += max_dtrace_string_size / VMRegImpl::stack_slot_size;
}
stack_slots += (Argument::n_int_register_parameters_c +
Argument::n_float_register_parameters_c) * 2;
stack_slots += 4;
stack_slots = round_to(stack_slots, 4 * VMRegImpl::slots_per_word);
int stack_size = stack_slots * VMRegImpl::stack_slot_size;
intptr_t start = (intptr_t)__ pc();
const Register ic_reg = rax;
const Register receiver = rcx;
Label hit;
Label exception_pending;
__ verify_oop(receiver);
__ cmpl(ic_reg, Address(receiver, oopDesc::klass_offset_in_bytes()));
__ jcc(Assembler::equal, hit);
__ jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
__ align(8);
__ bind(hit);
int vep_offset = ((intptr_t)__ pc()) - start;
if (UseStackBanging) {
if (stack_size <= StackShadowPages*os::vm_page_size()) {
__ bang_stack_with_offset(StackShadowPages*os::vm_page_size());
} else {
__ movl(rax, stack_size);
__ bang_stack_size(rax, rbx);
}
} else {
__ fat_nop();
}
assert(((uintptr_t)__ pc() - start - vep_offset) >= 5,
"valid size for make_non_entrant");
__ enter();
if (stack_size - 2*wordSize != 0) {
__ subq(rsp, stack_size - 2*wordSize);
}
int frame_complete = ((intptr_t)__ pc()) - start;
int c_arg, j_arg;
bool live[ConcreteRegisterImpl::number_of_registers];
live[j_rarg0->as_VMReg()->value()] = false;
live[j_rarg1->as_VMReg()->value()] = false;
live[j_rarg2->as_VMReg()->value()] = false;
live[j_rarg3->as_VMReg()->value()] = false;
live[j_rarg4->as_VMReg()->value()] = false;
live[j_rarg5->as_VMReg()->value()] = false;
live[j_farg0->as_VMReg()->value()] = false;
live[j_farg1->as_VMReg()->value()] = false;
live[j_farg2->as_VMReg()->value()] = false;
live[j_farg3->as_VMReg()->value()] = false;
live[j_farg4->as_VMReg()->value()] = false;
live[j_farg5->as_VMReg()->value()] = false;
live[j_farg6->as_VMReg()->value()] = false;
live[j_farg7->as_VMReg()->value()] = false;
bool rax_is_zero = false;
for (j_arg = first_arg_to_pass, c_arg = 0 ;
j_arg < total_args_passed ; j_arg++, c_arg++ ) {
VMRegPair src = in_regs[j_arg];
VMRegPair dst = out_regs[c_arg];
int src_reg = src.first()->is_reg() ?
src.first()->value() :
rsp->as_VMReg()->value();
bool useless = in_sig_bt[j_arg] == T_ARRAY ||
(in_sig_bt[j_arg] == T_OBJECT &&
out_sig_bt[c_arg] != T_INT &&
out_sig_bt[c_arg] != T_ADDRESS &&
out_sig_bt[c_arg] != T_LONG);
live[src_reg] = !useless;
if (dst.first()->is_stack()) {
live[src_reg] = false;
switch (in_sig_bt[j_arg]) {
case T_ARRAY:
case T_OBJECT:
{
Address stack_dst(rsp, reg2offset_out(dst.first()));
if (out_sig_bt[c_arg] == T_INT || out_sig_bt[c_arg] == T_LONG) {
Register in_reg = rax;
if ( src.first()->is_reg() ) {
in_reg = src.first()->as_Register();
} else {
__ movq(rax, Address(rbp, reg2offset_in(src.first())));
rax_is_zero = false;
}
Label skipUnbox;
__ movptr(Address(rsp, reg2offset_out(dst.first())),
(int32_t)NULL_WORD);
__ testq(in_reg, in_reg);
__ jcc(Assembler::zero, skipUnbox);
BasicType bt = out_sig_bt[c_arg];
int box_offset = java_lang_boxing_object::value_offset_in_bytes(bt);
Address src1(in_reg, box_offset);
if ( bt == T_LONG ) {
__ movq(in_reg, src1);
__ movq(stack_dst, in_reg);
assert(out_sig_bt[c_arg+1] == T_VOID, "must be");
++c_arg; // skip over T_VOID to keep the loop indices in sync
} else {
__ movl(in_reg, src1);
__ movl(stack_dst, in_reg);
}
__ bind(skipUnbox);
} else if (out_sig_bt[c_arg] != T_ADDRESS) {
if (!rax_is_zero) {
__ xorq(rax, rax);
rax_is_zero = true;
}
__ movq(stack_dst, rax);
}
}
break;
case T_VOID:
break;
case T_FLOAT:
float_move(masm, src, dst);
break;
case T_DOUBLE:
double_move(masm, src, dst);
break;
case T_LONG :
long_move(masm, src, dst);
break;
case T_ADDRESS: assert(false, "found T_ADDRESS in java args");
default:
move32_64(masm, src, dst);
}
}
}
int sid = 0;
if (total_strings > 0 ) {
for (j_arg = first_arg_to_pass, c_arg = 0 ;
j_arg < total_args_passed ; j_arg++, c_arg++ ) {
VMRegPair src = in_regs[j_arg];
VMRegPair dst = out_regs[c_arg];
if (src.first()->is_reg()) {
Address src_tmp(rbp, fp_offset[src.first()->value()]);
if (out_sig_bt[c_arg] == T_ADDRESS) {
Address utf8_addr = Address(
rsp, string_locs[sid++] * VMRegImpl::stack_slot_size);
if (sid != 1) {
__ movq(utf8_addr, src.first()->as_Register());
}
} else if (dst.first()->is_reg()) {
if (in_sig_bt[j_arg] == T_FLOAT || in_sig_bt[j_arg] == T_DOUBLE) {
XMMRegister f = src.first()->as_XMMRegister();
if (in_sig_bt[j_arg] == T_FLOAT) {
__ movflt(src_tmp, f);
} else {
__ movdbl(src_tmp, f);
}
} else {
bool useless = in_sig_bt[j_arg] == T_ARRAY ||
(in_sig_bt[j_arg] == T_OBJECT &&
out_sig_bt[c_arg] != T_INT &&
out_sig_bt[c_arg] != T_LONG);
if (!useless) {
__ movq(src_tmp, src.first()->as_Register());
}
}
}
}
if (in_sig_bt[j_arg] == T_OBJECT && out_sig_bt[c_arg] == T_LONG) {
assert(out_sig_bt[c_arg+1] == T_VOID, "must be");
++c_arg; // skip over T_VOID to keep the loop indices in sync
}
}
sid = 0;
for (j_arg = first_arg_to_pass, c_arg = 0 ;
j_arg < total_args_passed ; j_arg++, c_arg++ ) {
if (out_sig_bt[c_arg] == T_ADDRESS) {
Address utf8_addr = Address(
rsp, string_locs[sid++] * VMRegImpl::stack_slot_size);
VMReg src = in_regs[j_arg].first();
int src_off = src->is_reg() ?
fp_offset[src->value()] : reg2offset_in(src);
VMRegPair dst = out_regs[c_arg];
if (src->is_reg()) {
if (sid == 1) {
__ movq(c_rarg0, src->as_Register());
} else {
__ movq(c_rarg0, utf8_addr);
}
} else {
__ movq(c_rarg0, Address(rbp, reg2offset_in(src)));
}
Label done, convert;
__ testq(c_rarg0, c_rarg0);
__ jcc(Assembler::notEqual, convert);
if (dst.first()->is_reg()) {
__ movq(Address(rbp, src_off), c_rarg0);
} else {
__ movq(Address(rsp, reg2offset_out(dst.first())), c_rarg0);
}
__ jmp(done);
__ bind(convert);
__ lea(c_rarg1, utf8_addr);
if (dst.first()->is_reg()) {
__ movq(Address(rbp, src_off), c_rarg1);
} else {
__ movq(Address(rsp, reg2offset_out(dst.first())), c_rarg1);
}
__ call(RuntimeAddress(
CAST_FROM_FN_PTR(address, SharedRuntime::get_utf)));
__ bind(done);
}
if (in_sig_bt[j_arg] == T_OBJECT && out_sig_bt[c_arg] == T_LONG) {
assert(out_sig_bt[c_arg+1] == T_VOID, "must be");
++c_arg; // skip over T_VOID to keep the loop indices in sync
}
}
live[c_rarg0->as_VMReg()->value()] = false;
live[c_rarg1->as_VMReg()->value()] = false;
live[c_rarg2->as_VMReg()->value()] = false;
live[c_rarg3->as_VMReg()->value()] = false;
live[c_rarg4->as_VMReg()->value()] = false;
live[c_rarg5->as_VMReg()->value()] = false;
live[c_farg0->as_VMReg()->value()] = false;
live[c_farg1->as_VMReg()->value()] = false;
live[c_farg2->as_VMReg()->value()] = false;
live[c_farg3->as_VMReg()->value()] = false;
live[c_farg4->as_VMReg()->value()] = false;
live[c_farg5->as_VMReg()->value()] = false;
live[c_farg6->as_VMReg()->value()] = false;
live[c_farg7->as_VMReg()->value()] = false;
}
rax_is_zero = false;
for (j_arg = first_arg_to_pass, c_arg = 0 ;
j_arg < total_args_passed ; j_arg++, c_arg++ ) {
VMRegPair src = in_regs[j_arg];
VMRegPair dst = out_regs[c_arg];
if (dst.first()->is_reg()) {
Register r = dst.first()->as_Register();
bool useless = in_sig_bt[j_arg] == T_ARRAY ||
(in_sig_bt[j_arg] == T_OBJECT &&
out_sig_bt[c_arg] != T_INT &&
out_sig_bt[c_arg] != T_ADDRESS &&
out_sig_bt[c_arg] != T_LONG);
if (live[dst.first()->value()]) {
if (src.first() != dst.first()) {
__ movq(Address(rbp, fp_offset[dst.first()->value()]), r);
}
}
if (src.first()->is_reg()) {
if (live[src.first()->value()] ) {
if (in_sig_bt[j_arg] == T_FLOAT) {
__ movdl(r, src.first()->as_XMMRegister());
} else if (in_sig_bt[j_arg] == T_DOUBLE) {
__ movdq(r, src.first()->as_XMMRegister());
} else if (r != src.first()->as_Register()) {
if (!useless) {
__ movq(r, src.first()->as_Register());
}
}
} else {
if (!useless) {
if (in_sig_bt[j_arg] == T_DOUBLE ||
in_sig_bt[j_arg] == T_LONG ||
in_sig_bt[j_arg] == T_OBJECT ) {
__ movq(r, Address(rbp, fp_offset[src.first()->value()]));
} else {
__ movl(r, Address(rbp, fp_offset[src.first()->value()]));
}
}
}
live[src.first()->value()] = false;
} else if (!useless) {
__ movq(r, Address(rbp, reg2offset_in(src.first())));
}
if (in_sig_bt[j_arg] == T_ARRAY || in_sig_bt[j_arg] == T_OBJECT ) {
if (out_sig_bt[c_arg] == T_INT || out_sig_bt[c_arg] == T_LONG) {
Label skip;
__ testq(r, r);
__ jcc(Assembler::equal, skip);
BasicType bt = out_sig_bt[c_arg];
int box_offset = java_lang_boxing_object::value_offset_in_bytes(bt);
Address src1(r, box_offset);
if ( bt == T_LONG ) {
__ movq(r, src1);
} else {
__ movl(r, src1);
}
__ bind(skip);
} else if (out_sig_bt[c_arg] != T_ADDRESS) {
__ xorq(r, r);
}
}
live[dst.first()->value()] = false;
}
if (in_sig_bt[j_arg] == T_OBJECT && out_sig_bt[c_arg] == T_LONG) {
assert(out_sig_bt[c_arg+1] == T_VOID, "must be");
++c_arg; // skip over T_VOID to keep the loop indices in sync
}
}
int patch_offset = ((intptr_t)__ pc()) - start;
__ nop();
__ leave();
__ ret(0);
__ flush();
nmethod *nm = nmethod::new_dtrace_nmethod(
method, masm->code(), vep_offset, patch_offset, frame_complete,
stack_slots / VMRegImpl::slots_per_word);
return nm;
}
#endif // HAVE_DTRACE_H
int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals ) {
return (callee_locals - callee_parameters) * Interpreter::stackElementWords;
}
uint SharedRuntime::out_preserve_stack_slots() {
return 0;
}
void SharedRuntime::generate_deopt_blob() {
ResourceMark rm;
CodeBuffer buffer("deopt_blob", 2048, 1024);
MacroAssembler* masm = new MacroAssembler(&buffer);
int frame_size_in_words;
OopMap* map = NULL;
OopMapSet *oop_maps = new OopMapSet();
address start = __ pc();
Label cont;
map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words);
__ movl(r14, Deoptimization::Unpack_deopt); // callee-saved
__ jmp(cont);
int reexecute_offset = __ pc() - start;
(void) RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words);
__ movl(r14, Deoptimization::Unpack_reexecute); // callee-saved
__ jmp(cont);
int exception_offset = __ pc() - start;
__ movptr(Address(r15_thread, JavaThread::exception_pc_offset()), rdx);
__ movptr(Address(r15_thread, JavaThread::exception_oop_offset()), rax);
int exception_in_tls_offset = __ pc() - start;
__ push(0);
map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words);
__ movl(r14, Deoptimization::Unpack_exception); // callee-saved
__ movptr(rdx, Address(r15_thread, JavaThread::exception_pc_offset()));
__ movptr(Address(rbp, wordSize), rdx);
__ movptr(Address(r15_thread, JavaThread::exception_pc_offset()), (int32_t)NULL_WORD);
#ifdef ASSERT
__ movptr(rax, Address(r15_thread, JavaThread::exception_oop_offset()));
__ verify_oop(rax);
Label no_pending_exception;
__ movptr(rax, Address(r15_thread, Thread::pending_exception_offset()));
__ testptr(rax, rax);
__ jcc(Assembler::zero, no_pending_exception);
__ stop("must not have pending exception here");
__ bind(no_pending_exception);
#endif
__ bind(cont);
__ set_last_Java_frame(noreg, noreg, NULL);
#ifdef ASSERT
{ Label L;
__ cmpptr(Address(r15_thread,
JavaThread::last_Java_fp_offset()),
(int32_t)0);
__ jcc(Assembler::equal, L);
__ stop("SharedRuntime::generate_deopt_blob: last_Java_fp not cleared");
__ bind(L);
}
#endif // ASSERT
__ mov(c_rarg0, r15_thread);
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info)));
oop_maps->add_gc_map(__ pc() - start, map);
__ reset_last_Java_frame(false);
__ mov(rdi, rax);
Label noException;
__ cmpl(r14, Deoptimization::Unpack_exception); // Was exception pending?
__ jcc(Assembler::notEqual, noException);
__ movptr(rax, Address(r15_thread, JavaThread::exception_oop_offset()));
__ movptr(rdx, Address(r15_thread, JavaThread::exception_pc_offset()));
__ movptr(Address(r15_thread, JavaThread::exception_oop_offset()), (int32_t)NULL_WORD);
__ movptr(Address(r15_thread, JavaThread::exception_pc_offset()), (int32_t)NULL_WORD);
__ verify_oop(rax);
__ movptr(Address(rsp, RegisterSaver::rax_offset_in_bytes()), rax);
__ movptr(Address(rsp, RegisterSaver::rdx_offset_in_bytes()), rdx);
__ bind(noException);
RegisterSaver::restore_result_registers(masm);
__ movl(rcx, Address(rdi, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes()));
__ addptr(rsp, rcx);
__ movptr(rbp, Address(rdi, Deoptimization::UnrollBlock::initial_info_offset_in_bytes()));
#ifdef ASSERT
if (UseStackBanging) {
__ movl(rbx, Address(rdi, Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes()));
__ bang_stack_size(rbx, rcx);
}
#endif
__ movptr(rcx, Address(rdi, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()));
__ addptr(rsp, wordSize);
__ movptr(rsi, Address(rdi, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes()));
__ movl(rdx, Address(rdi, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes()));
const Register sender_sp = r8;
__ mov(sender_sp, rsp);
__ movl(rbx, Address(rdi,
Deoptimization::UnrollBlock::
caller_adjustment_offset_in_bytes()));
__ subptr(rsp, rbx);
Label loop;
__ bind(loop);
__ movptr(rbx, Address(rsi, 0)); // Load frame size
#ifdef CC_INTERP
__ subptr(rbx, 4*wordSize); // we'll push pc and ebp by hand and
#ifdef ASSERT
__ push(0xDEADDEAD); // Make a recognizable pattern
__ push(0xDEADDEAD);
#else /* ASSERT */
__ subptr(rsp, 2*wordSize); // skip the "static long no_param"
#endif /* ASSERT */
#else
__ subptr(rbx, 2*wordSize); // We'll push pc and ebp by hand
#endif // CC_INTERP
__ pushptr(Address(rcx, 0)); // Save return address
__ enter(); // Save old & set new ebp
__ subptr(rsp, rbx); // Prolog
#ifdef CC_INTERP
__ movptr(Address(rbp,
-(sizeof(BytecodeInterpreter)) + in_bytes(byte_offset_of(BytecodeInterpreter, _sender_sp))),
sender_sp); // Make it walkable
#else /* CC_INTERP */
__ movptr(Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize), (int32_t)NULL_WORD );
__ movptr(Address(rbp, frame::interpreter_frame_sender_sp_offset * wordSize), sender_sp); // Make it walkable
#endif /* CC_INTERP */
__ mov(sender_sp, rsp); // Pass sender_sp to next frame
__ addptr(rsi, wordSize); // Bump array pointer (sizes)
__ addptr(rcx, wordSize); // Bump array pointer (pcs)
__ decrementl(rdx); // Decrement counter
__ jcc(Assembler::notZero, loop);
__ pushptr(Address(rcx, 0)); // Save final return address
__ enter(); // Save old & set new ebp
__ subptr(rsp, (frame_size_in_words - 2) * wordSize);
__ movdbl(Address(rsp, RegisterSaver::xmm0_offset_in_bytes()), xmm0);
__ movptr(Address(rsp, RegisterSaver::rax_offset_in_bytes()), rax);
address the_pc = __ pc();
__ set_last_Java_frame(noreg, rbp, the_pc);
__ andptr(rsp, -(StackAlignmentInBytes)); // Fix stack alignment as required by ABI
__ mov(c_rarg0, r15_thread);
__ movl(c_rarg1, r14); // second arg: exec_mode
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames)));
__ movptr(rsp, Address(r15_thread, JavaThread::last_Java_sp_offset()));
oop_maps->add_gc_map(the_pc - start,
new OopMap( frame_size_in_words, 0 ));
__ reset_last_Java_frame(true);
__ movdbl(xmm0, Address(rsp, RegisterSaver::xmm0_offset_in_bytes()));
__ movptr(rax, Address(rsp, RegisterSaver::rax_offset_in_bytes()));
__ movptr(rdx, Address(rsp, RegisterSaver::rdx_offset_in_bytes()));
__ leave(); // Epilog
__ ret(0);
masm->flush();
_deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, frame_size_in_words);
_deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset);
}
#ifdef COMPILER2
void SharedRuntime::generate_uncommon_trap_blob() {
ResourceMark rm;
CodeBuffer buffer("uncommon_trap_blob", 2048, 1024);
MacroAssembler* masm = new MacroAssembler(&buffer);
assert(SimpleRuntimeFrame::framesize % 4 == 0, "sp not 16-byte aligned");
address start = __ pc();
if (UseRTMLocking) {
__ xabort(0);
}
__ subptr(rsp, SimpleRuntimeFrame::return_off << LogBytesPerInt); // Epilog!
__ movptr(Address(rsp, SimpleRuntimeFrame::rbp_off << LogBytesPerInt), rbp);
__ movl(c_rarg1, j_rarg0);
__ set_last_Java_frame(noreg, noreg, NULL);
__ mov(c_rarg0, r15_thread);
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::uncommon_trap)));
OopMapSet* oop_maps = new OopMapSet();
OopMap* map = new OopMap(SimpleRuntimeFrame::framesize, 0);
oop_maps->add_gc_map(__ pc() - start, map);
__ reset_last_Java_frame(false);
__ mov(rdi, rax);
__ addptr(rsp, (SimpleRuntimeFrame::framesize - 2) << LogBytesPerInt); // Epilog!
__ movl(rcx, Address(rdi,
Deoptimization::UnrollBlock::
size_of_deoptimized_frame_offset_in_bytes()));
__ addptr(rsp, rcx);
__ movptr(rbp, Address(rdi, Deoptimization::UnrollBlock::initial_info_offset_in_bytes()));
#ifdef ASSERT
if (UseStackBanging) {
__ movl(rbx, Address(rdi ,Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes()));
__ bang_stack_size(rbx, rcx);
}
#endif
__ movptr(rcx, Address(rdi, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()));
__ addptr(rsp, wordSize);
__ movptr(rsi, Address(rdi, Deoptimization::UnrollBlock:: frame_sizes_offset_in_bytes()));
__ movl(rdx, Address(rdi, Deoptimization::UnrollBlock:: number_of_frames_offset_in_bytes())); // (int)
const Register sender_sp = r8;
__ mov(sender_sp, rsp);
__ movl(rbx, Address(rdi, Deoptimization::UnrollBlock:: caller_adjustment_offset_in_bytes())); // (int)
__ subptr(rsp, rbx);
Label loop;
__ bind(loop);
__ movptr(rbx, Address(rsi, 0)); // Load frame size
__ subptr(rbx, 2 * wordSize); // We'll push pc and rbp by hand
__ pushptr(Address(rcx, 0)); // Save return address
__ enter(); // Save old & set new rbp
__ subptr(rsp, rbx); // Prolog
#ifdef CC_INTERP
__ movptr(Address(rbp,
-(sizeof(BytecodeInterpreter)) + in_bytes(byte_offset_of(BytecodeInterpreter, _sender_sp))),
sender_sp); // Make it walkable
#else // CC_INTERP
__ movptr(Address(rbp, frame::interpreter_frame_sender_sp_offset * wordSize),
sender_sp); // Make it walkable
__ movptr(Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize), (int32_t)NULL_WORD );
#endif // CC_INTERP
__ mov(sender_sp, rsp); // Pass sender_sp to next frame
__ addptr(rsi, wordSize); // Bump array pointer (sizes)
__ addptr(rcx, wordSize); // Bump array pointer (pcs)
__ decrementl(rdx); // Decrement counter
__ jcc(Assembler::notZero, loop);
__ pushptr(Address(rcx, 0)); // Save final return address
__ enter(); // Save old & set new rbp
__ subptr(rsp, (SimpleRuntimeFrame::framesize - 4) << LogBytesPerInt);
address the_pc = __ pc();
__ set_last_Java_frame(noreg, rbp, the_pc);
__ andptr(rsp, -(StackAlignmentInBytes)); // Align SP as required by ABI
__ mov(c_rarg0, r15_thread);
__ movl(c_rarg1, Deoptimization::Unpack_uncommon_trap);
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames)));
oop_maps->add_gc_map(the_pc - start, new OopMap(SimpleRuntimeFrame::framesize, 0));
__ reset_last_Java_frame(true);
__ leave(); // Epilog
__ ret(0);
masm->flush();
_uncommon_trap_blob = UncommonTrapBlob::create(&buffer, oop_maps,
SimpleRuntimeFrame::framesize >> 1);
}
#endif // COMPILER2
SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_type) {
assert(StubRoutines::forward_exception_entry() != NULL,
"must be generated before");
ResourceMark rm;
OopMapSet *oop_maps = new OopMapSet();
OopMap* map;
CodeBuffer buffer("handler_blob", 2048, 1024);
MacroAssembler* masm = new MacroAssembler(&buffer);
address start = __ pc();
address call_pc = NULL;
int frame_size_in_words;
bool cause_return = (poll_type == POLL_AT_RETURN);
bool save_vectors = (poll_type == POLL_AT_VECTOR_LOOP);
if (UseRTMLocking) {
__ xabort(0);
}
if (!cause_return) {
__ push(rbx);
}
map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words, save_vectors);
__ set_last_Java_frame(noreg, noreg, NULL);
if (!cause_return) {
__ movptr(c_rarg0, Address(r15_thread, JavaThread::saved_exception_pc_offset()));
__ movptr(Address(rbp, wordSize), c_rarg0);
}
__ mov(c_rarg0, r15_thread);
__ call(RuntimeAddress(call_ptr));
oop_maps->add_gc_map( __ pc() - start, map);
Label noException;
__ reset_last_Java_frame(false);
__ cmpptr(Address(r15_thread, Thread::pending_exception_offset()), (int32_t)NULL_WORD);
__ jcc(Assembler::equal, noException);
RegisterSaver::restore_live_registers(masm, save_vectors);
__ jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
__ bind(noException);
RegisterSaver::restore_live_registers(masm, save_vectors);
__ ret(0);
masm->flush();
return SafepointBlob::create(&buffer, oop_maps, frame_size_in_words);
}
RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const char* name) {
assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before");
ResourceMark rm;
CodeBuffer buffer(name, 1000, 512);
MacroAssembler* masm = new MacroAssembler(&buffer);
int frame_size_in_words;
OopMapSet *oop_maps = new OopMapSet();
OopMap* map = NULL;
int start = __ offset();
map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words);
int frame_complete = __ offset();
__ set_last_Java_frame(noreg, noreg, NULL);
__ mov(c_rarg0, r15_thread);
__ call(RuntimeAddress(destination));
oop_maps->add_gc_map( __ offset() - start, map);
__ reset_last_Java_frame(false);
Label pending;
__ cmpptr(Address(r15_thread, Thread::pending_exception_offset()), (int32_t)NULL_WORD);
__ jcc(Assembler::notEqual, pending);
__ get_vm_result_2(rbx, r15_thread);
__ movptr(Address(rsp, RegisterSaver::rbx_offset_in_bytes()), rbx);
__ movptr(Address(rsp, RegisterSaver::rax_offset_in_bytes()), rax);
RegisterSaver::restore_live_registers(masm);
__ jmp(rax);
__ bind(pending);
RegisterSaver::restore_live_registers(masm);
__ movptr(Address(r15_thread, JavaThread::vm_result_offset()), (int)NULL_WORD);
__ movptr(rax, Address(r15_thread, Thread::pending_exception_offset()));
__ jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
masm->flush();
return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_in_words, oop_maps, true);
}
#ifndef _WINDOWS
#define ASM_SUBTRACT
#ifdef ASM_SUBTRACT
static unsigned long
sub(unsigned long a[], unsigned long b[], unsigned long carry, long len) {
long i = 0, cnt = len;
unsigned long tmp;
asm volatile("clc; "
"0: ; "
"mov (%[b], %[i], 8), %[tmp]; "
"sbb %[tmp], (%[a], %[i], 8); "
"inc %[i]; dec %[cnt]; "
"jne 0b; "
"mov %[carry], %[tmp]; sbb $0, %[tmp]; "
: [i]"+r"(i), [cnt]"+r"(cnt), [tmp]"=&r"(tmp)
: [a]"r"(a), [b]"r"(b), [carry]"r"(carry)
: "memory");
return tmp;
}
#else // ASM_SUBTRACT
typedef int __attribute__((mode(TI))) int128;
static unsigned long
sub(unsigned long a[], unsigned long b[], unsigned long carry, int len) {
int128 tmp = 0;
int i;
for (i = 0; i < len; i++) {
tmp += a[i];
tmp -= b[i];
a[i] = tmp;
tmp >>= 64;
assert(-1 <= tmp && tmp <= 0, "invariant");
}
return tmp + carry;
}
#endif // ! ASM_SUBTRACT
#define MACC(A, B, T0, T1, T2) \
do { \
unsigned long hi, lo; \
asm volatile("mul %5; add %%rax, %2; adc %%rdx, %3; adc $0, %4" \
: "=&d"(hi), "=a"(lo), "+r"(T0), "+r"(T1), "+g"(T2) \
: "r"(A), "a"(B) : "cc"); \
} while(0)
#define MACC2(A, B, T0, T1, T2) \
do { \
unsigned long hi, lo; \
asm volatile("mul %5; add %%rax, %2; adc %%rdx, %3; adc $0, %4;" \
"add %%rax, %2; adc %%rdx, %3; adc $0, %4" \
: "=&d"(hi), "=a"(lo), "+r"(T0), "+r"(T1), "+g"(T2) \
: "r"(A), "a"(B) : "cc"); \
} while(0)
static void __attribute__((noinline))
montgomery_multiply(unsigned long a[], unsigned long b[], unsigned long n[],
unsigned long m[], unsigned long inv, int len) {
unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator
int i;
assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply");
for (i = 0; i < len; i++) {
int j;
for (j = 0; j < i; j++) {
MACC(a[j], b[i-j], t0, t1, t2);
MACC(m[j], n[i-j], t0, t1, t2);
}
MACC(a[i], b[0], t0, t1, t2);
m[i] = t0 * inv;
MACC(m[i], n[0], t0, t1, t2);
assert(t0 == 0, "broken Montgomery multiply");
t0 = t1; t1 = t2; t2 = 0;
}
for (i = len; i < 2*len; i++) {
int j;
for (j = i-len+1; j < len; j++) {
MACC(a[j], b[i-j], t0, t1, t2);
MACC(m[j], n[i-j], t0, t1, t2);
}
m[i-len] = t0;
t0 = t1; t1 = t2; t2 = 0;
}
while (t0)
t0 = sub(m, n, t0, len);
}
static void __attribute__((noinline))
montgomery_square(unsigned long a[], unsigned long n[],
unsigned long m[], unsigned long inv, int len) {
unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator
int i;
assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply");
for (i = 0; i < len; i++) {
int j;
int end = (i+1)/2;
for (j = 0; j < end; j++) {
MACC2(a[j], a[i-j], t0, t1, t2);
MACC(m[j], n[i-j], t0, t1, t2);
}
if ((i & 1) == 0) {
MACC(a[j], a[j], t0, t1, t2);
}
for (; j < i; j++) {
MACC(m[j], n[i-j], t0, t1, t2);
}
m[i] = t0 * inv;
MACC(m[i], n[0], t0, t1, t2);
assert(t0 == 0, "broken Montgomery square");
t0 = t1; t1 = t2; t2 = 0;
}
for (i = len; i < 2*len; i++) {
int start = i-len+1;
int end = start + (len - start)/2;
int j;
for (j = start; j < end; j++) {
MACC2(a[j], a[i-j], t0, t1, t2);
MACC(m[j], n[i-j], t0, t1, t2);
}
if ((i & 1) == 0) {
MACC(a[j], a[j], t0, t1, t2);
}
for (; j < len; j++) {
MACC(m[j], n[i-j], t0, t1, t2);
}
m[i-len] = t0;
t0 = t1; t1 = t2; t2 = 0;
}
while (t0)
t0 = sub(m, n, t0, len);
}
static unsigned long swap(unsigned long x) {
return (x << 32) | (x >> 32);
}
static void reverse_words(unsigned long *s, unsigned long *d, int len) {
d += len;
while(len-- > 0) {
d--;
s++;
}
}
#define MONTGOMERY_SQUARING_THRESHOLD 64
void SharedRuntime::montgomery_multiply(jint *a_ints, jint *b_ints, jint *n_ints,
jint len, jlong inv,
jint *m_ints) {
assert(len % 2 == 0, "array length in montgomery_multiply must be even");
int longwords = len/2;
int total_allocation = longwords * sizeof (unsigned long) * 4;
guarantee(total_allocation <= 8192, "must be");
unsigned long *scratch = (unsigned long *)alloca(total_allocation);
unsigned long
reverse_words((unsigned long *)a_ints, a, longwords);
reverse_words((unsigned long *)b_ints, b, longwords);
reverse_words((unsigned long *)n_ints, n, longwords);
::montgomery_multiply(a, b, n, m, (unsigned long)inv, longwords);
reverse_words(m, (unsigned long *)m_ints, longwords);
}
void SharedRuntime::montgomery_square(jint *a_ints, jint *n_ints,
jint len, jlong inv,
jint *m_ints) {
assert(len % 2 == 0, "array length in montgomery_square must be even");
int longwords = len/2;
int total_allocation = longwords * sizeof (unsigned long) * 3;
guarantee(total_allocation <= 8192, "must be");
unsigned long *scratch = (unsigned long *)alloca(total_allocation);
unsigned long
reverse_words((unsigned long *)a_ints, a, longwords);
reverse_words((unsigned long *)n_ints, n, longwords);
#ifndef SOLARIS
if (len >= MONTGOMERY_SQUARING_THRESHOLD) {
#else
if (0) {
#endif
::montgomery_square(a, n, m, (unsigned long)inv, longwords);
} else {
::montgomery_multiply(a, a, n, m, (unsigned long)inv, longwords);
}
reverse_words(m, (unsigned long *)m_ints, longwords);
}
#endif // WINDOWS
#ifdef COMPILER2
void OptoRuntime::generate_exception_blob() {
assert(!OptoRuntime::is_callee_saved_register(RDX_num), "");
assert(!OptoRuntime::is_callee_saved_register(RAX_num), "");
assert(!OptoRuntime::is_callee_saved_register(RCX_num), "");
assert(SimpleRuntimeFrame::framesize % 4 == 0, "sp not 16-byte aligned");
ResourceMark rm;
CodeBuffer buffer("exception_blob", 2048, 1024);
MacroAssembler* masm = new MacroAssembler(&buffer);
address start = __ pc();
__ push(rdx);
__ subptr(rsp, SimpleRuntimeFrame::return_off << LogBytesPerInt); // Prolog
__ movptr(Address(rsp, SimpleRuntimeFrame::rbp_off << LogBytesPerInt), rbp);
__ movptr(Address(r15_thread, JavaThread::exception_oop_offset()),rax);
__ movptr(Address(r15_thread, JavaThread::exception_pc_offset()), rdx);
address the_pc = __ pc();
__ set_last_Java_frame(noreg, noreg, the_pc);
__ mov(c_rarg0, r15_thread);
__ andptr(rsp, -(StackAlignmentInBytes)); // Align stack
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, OptoRuntime::handle_exception_C)));
OopMapSet* oop_maps = new OopMapSet();
oop_maps->add_gc_map(the_pc - start, new OopMap(SimpleRuntimeFrame::framesize, 0));
__ reset_last_Java_frame(false);
__ movptr(rbp, Address(rsp, SimpleRuntimeFrame::rbp_off << LogBytesPerInt));
__ addptr(rsp, SimpleRuntimeFrame::return_off << LogBytesPerInt); // Epilog
__ pop(rdx); // No need for exception pc anymore
__ mov(r8, rax);
__ movptr(rax, Address(r15_thread, JavaThread::exception_oop_offset()));
__ movptr(rdx, Address(r15_thread, JavaThread::exception_pc_offset()));
#ifdef ASSERT
__ movptr(Address(r15_thread, JavaThread::exception_handler_pc_offset()), (int)NULL_WORD);
__ movptr(Address(r15_thread, JavaThread::exception_pc_offset()), (int)NULL_WORD);
#endif
__ movptr(Address(r15_thread, JavaThread::exception_oop_offset()), (int)NULL_WORD);
__ jmp(r8);
masm->flush();
_exception_blob = ExceptionBlob::create(&buffer, oop_maps, SimpleRuntimeFrame::framesize >> 1);
}
#endif // COMPILER2
C:\hotspot-69087d08d473\src\cpu\x86\vm/stubGenerator_x86_32.cpp
#include "precompiled.hpp"
#include "asm/macroAssembler.hpp"
#include "asm/macroAssembler.inline.hpp"
#include "interpreter/interpreter.hpp"
#include "nativeInst_x86.hpp"
#include "oops/instanceOop.hpp"
#include "oops/method.hpp"
#include "oops/objArrayKlass.hpp"
#include "oops/oop.inline.hpp"
#include "prims/methodHandles.hpp"
#include "runtime/frame.inline.hpp"
#include "runtime/handles.inline.hpp"
#include "runtime/sharedRuntime.hpp"
#include "runtime/stubCodeGenerator.hpp"
#include "runtime/stubRoutines.hpp"
#include "runtime/thread.inline.hpp"
#include "utilities/top.hpp"
#ifdef COMPILER2
#include "opto/runtime.hpp"
#endif
#define __ _masm->
#define a__ ((Assembler*)_masm)->
#ifdef PRODUCT
#define BLOCK_COMMENT(str) /* nothing */
#else
#define BLOCK_COMMENT(str) __ block_comment(str)
#endif
#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
const int MXCSR_MASK = 0xFFC0; // Mask out any pending exceptions
const int FPU_CNTRL_WRD_MASK = 0xFFFF;
static address handle_unsafe_access() {
JavaThread* thread = JavaThread::current();
address pc = thread->saved_exception_pc();
address npc = Assembler::locate_next_instruction(pc);
thread->set_pending_unsafe_access_error();
return npc;
}
class StubGenerator: public StubCodeGenerator {
private:
#ifdef PRODUCT
#define inc_counter_np(counter) ((void)0)
#else
void inc_counter_np_(int& counter) {
__ incrementl(ExternalAddress((address)&counter));
}
#define inc_counter_np(counter) \
BLOCK_COMMENT("inc_counter " #counter); \
inc_counter_np_(counter);
#endif //PRODUCT
void inc_copy_counter_np(BasicType t) {
#ifndef PRODUCT
switch (t) {
case T_BYTE: inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr); return;
case T_SHORT: inc_counter_np(SharedRuntime::_jshort_array_copy_ctr); return;
case T_INT: inc_counter_np(SharedRuntime::_jint_array_copy_ctr); return;
case T_LONG: inc_counter_np(SharedRuntime::_jlong_array_copy_ctr); return;
case T_OBJECT: inc_counter_np(SharedRuntime::_oop_array_copy_ctr); return;
}
ShouldNotReachHere();
#endif //PRODUCT
}
address generate_call_stub(address& return_address) {
StubCodeMark mark(this, "StubRoutines", "call_stub");
address start = __ pc();
assert(frame::entry_frame_call_wrapper_offset == 2, "adjust this code");
bool sse_save = false;
const Address rsp_after_call(rbp, -4 * wordSize); // same as in generate_catch_exception()!
const int locals_count_in_bytes (4*wordSize);
const Address mxcsr_save (rbp, -4 * wordSize);
const Address saved_rbx (rbp, -3 * wordSize);
const Address saved_rsi (rbp, -2 * wordSize);
const Address saved_rdi (rbp, -1 * wordSize);
const Address result (rbp, 3 * wordSize);
const Address result_type (rbp, 4 * wordSize);
const Address method (rbp, 5 * wordSize);
const Address entry_point (rbp, 6 * wordSize);
const Address parameters (rbp, 7 * wordSize);
const Address parameter_size(rbp, 8 * wordSize);
const Address thread (rbp, 9 * wordSize); // same as in generate_catch_exception()!
sse_save = UseSSE > 0;
__ enter();
__ movptr(rcx, parameter_size); // parameter counter
__ shlptr(rcx, Interpreter::logStackElementSize); // convert parameter count to bytes
__ addptr(rcx, locals_count_in_bytes); // reserve space for register saves
__ subptr(rsp, rcx);
__ andptr(rsp, -(StackAlignmentInBytes)); // Align stack
__ movptr(saved_rdi, rdi);
__ movptr(saved_rsi, rsi);
__ movptr(saved_rbx, rbx);
if (sse_save) {
Label skip_ldmx;
__ stmxcsr(mxcsr_save);
__ movl(rax, mxcsr_save);
__ andl(rax, MXCSR_MASK); // Only check control and mask bits
ExternalAddress mxcsr_std(StubRoutines::addr_mxcsr_std());
__ cmp32(rax, mxcsr_std);
__ jcc(Assembler::equal, skip_ldmx);
__ ldmxcsr(mxcsr_std);
__ bind(skip_ldmx);
}
__ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
#ifdef ASSERT
{ Label L;
__ movptr(rcx, thread);
__ cmpptr(Address(rcx, Thread::pending_exception_offset()), (int32_t)NULL_WORD);
__ jcc(Assembler::equal, L);
__ stop("StubRoutines::call_stub: entered with pending exception");
__ bind(L);
}
#endif
BLOCK_COMMENT("pass parameters if any");
Label parameters_done;
__ movl(rcx, parameter_size); // parameter counter
__ testl(rcx, rcx);
__ jcc(Assembler::zero, parameters_done);
Label loop;
__ movptr(rdx, parameters); // parameter pointer
__ xorptr(rbx, rbx);
__ BIND(loop);
__ movptr(rax, Address(rdx, rcx, Interpreter::stackElementScale(), -wordSize));
__ movptr(Address(rsp, rbx, Interpreter::stackElementScale(),
Interpreter::expr_offset_in_bytes(0)), rax); // store parameter
__ increment(rbx);
__ decrement(rcx);
__ jcc(Assembler::notZero, loop);
__ BIND(parameters_done);
__ movptr(rbx, method); // get Method*
__ movptr(rax, entry_point); // get entry_point
__ mov(rsi, rsp); // set sender sp
BLOCK_COMMENT("call Java function");
__ call(rax);
BLOCK_COMMENT("call_stub_return_address:");
return_address = __ pc();
#ifdef COMPILER2
{
Label L_skip;
if (UseSSE >= 2) {
__ verify_FPU(0, "call_stub_return");
} else {
for (int i = 1; i < 8; i++) {
__ ffree(i);
}
__ movl(rsi, result_type);
__ cmpl(rsi, T_DOUBLE);
__ jcc(Assembler::equal, L_skip);
if (UseSSE == 0) {
__ cmpl(rsi, T_FLOAT);
__ jcc(Assembler::equal, L_skip);
}
__ ffree(0);
}
__ BIND(L_skip);
}
#endif // COMPILER2
__ movptr(rdi, result);
Label is_long, is_float, is_double, exit;
__ movl(rsi, result_type);
__ cmpl(rsi, T_LONG);
__ jcc(Assembler::equal, is_long);
__ cmpl(rsi, T_FLOAT);
__ jcc(Assembler::equal, is_float);
__ cmpl(rsi, T_DOUBLE);
__ jcc(Assembler::equal, is_double);
__ movl(Address(rdi, 0), rax);
__ BIND(exit);
__ verify_FPU(0, "generate_call_stub");
__ lea(rsp, rsp_after_call);
if (sse_save) {
__ ldmxcsr(mxcsr_save);
}
__ movptr(rbx, saved_rbx);
__ movptr(rsi, saved_rsi);
__ movptr(rdi, saved_rdi);
__ addptr(rsp, 4*wordSize);
__ pop(rbp);
__ ret(0);
__ BIND(is_long);
__ movl(Address(rdi, 0 * wordSize), rax);
__ movl(Address(rdi, 1 * wordSize), rdx);
__ jmp(exit);
__ BIND(is_float);
if (UseSSE >= 1) {
__ movflt(Address(rdi, 0), xmm0);
} else {
__ fstp_s(Address(rdi, 0));
}
__ jmp(exit);
__ BIND(is_double);
if (UseSSE >= 2) {
__ movdbl(Address(rdi, 0), xmm0);
} else {
__ fstp_d(Address(rdi, 0));
}
__ jmp(exit);
return start;
}
address generate_catch_exception() {
StubCodeMark mark(this, "StubRoutines", "catch_exception");
const Address rsp_after_call(rbp, -4 * wordSize); // same as in generate_call_stub()!
const Address thread (rbp, 9 * wordSize); // same as in generate_call_stub()!
address start = __ pc();
__ movptr(rcx, thread);
#ifdef ASSERT
{ Label L;
__ get_thread(rbx);
__ cmpptr(rbx, rcx);
__ jcc(Assembler::equal, L);
__ stop("StubRoutines::catch_exception: threads must correspond");
__ bind(L);
}
#endif
__ verify_oop(rax);
__ movptr(Address(rcx, Thread::pending_exception_offset()), rax );
__ lea(Address(rcx, Thread::exception_file_offset ()),
ExternalAddress((address)__FILE__));
__ movl(Address(rcx, Thread::exception_line_offset ()), __LINE__ );
assert(StubRoutines::_call_stub_return_address != NULL, "_call_stub_return_address must have been generated before");
__ jump(RuntimeAddress(StubRoutines::_call_stub_return_address));
return start;
}
address generate_forward_exception() {
StubCodeMark mark(this, "StubRoutines", "forward exception");
address start = __ pc();
const Register thread = rcx;
const Register exception_oop = rax;
const Register handler_addr = rbx;
const Register exception_pc = rdx;
#ifdef ASSERT
{ Label L;
__ get_thread(thread);
__ cmpptr(Address(thread, Thread::pending_exception_offset()), (int32_t)NULL_WORD);
__ jcc(Assembler::notEqual, L);
__ stop("StubRoutines::forward exception: no pending exception (1)");
__ bind(L);
}
#endif
__ get_thread(thread);
__ movptr(exception_pc, Address(rsp, 0));
BLOCK_COMMENT("call exception_handler_for_return_address");
__ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), thread, exception_pc);
__ mov(handler_addr, rax);
__ get_thread(thread);
__ pop(exception_pc);
__ movptr(exception_oop, Address(thread, Thread::pending_exception_offset()));
__ movptr(Address(thread, Thread::pending_exception_offset()), NULL_WORD);
#ifdef ASSERT
{ Label L;
__ testptr(exception_oop, exception_oop);
__ jcc(Assembler::notEqual, L);
__ stop("StubRoutines::forward exception: no pending exception (2)");
__ bind(L);
}
#endif
__ verify_oop(exception_oop);
__ jmp(handler_addr);
return start;
}
address generate_atomic_xchg() {
StubCodeMark mark(this, "StubRoutines", "atomic_xchg");
address start = __ pc();
__ push(rdx);
Address exchange(rsp, 2 * wordSize);
Address dest_addr(rsp, 3 * wordSize);
__ movl(rax, exchange);
__ movptr(rdx, dest_addr);
__ xchgl(rax, Address(rdx, 0));
__ pop(rdx);
__ ret(0);
return start;
}
address generate_verify_mxcsr() {
StubCodeMark mark(this, "StubRoutines", "verify_mxcsr");
address start = __ pc();
const Address mxcsr_save(rsp, 0);
if (CheckJNICalls && UseSSE > 0 ) {
Label ok_ret;
ExternalAddress mxcsr_std(StubRoutines::addr_mxcsr_std());
__ push(rax);
__ subptr(rsp, wordSize); // allocate a temp location
__ stmxcsr(mxcsr_save);
__ movl(rax, mxcsr_save);
__ andl(rax, MXCSR_MASK);
__ cmp32(rax, mxcsr_std);
__ jcc(Assembler::equal, ok_ret);
__ warn("MXCSR changed by native JNI code.");
__ ldmxcsr(mxcsr_std);
__ bind(ok_ret);
__ addptr(rsp, wordSize);
__ pop(rax);
}
__ ret(0);
return start;
}
address generate_verify_fpu_cntrl_wrd() {
StubCodeMark mark(this, "StubRoutines", "verify_spcw");
address start = __ pc();
const Address fpu_cntrl_wrd_save(rsp, 0);
if (CheckJNICalls) {
Label ok_ret;
__ push(rax);
__ subptr(rsp, wordSize); // allocate a temp location
__ fnstcw(fpu_cntrl_wrd_save);
__ movl(rax, fpu_cntrl_wrd_save);
__ andl(rax, FPU_CNTRL_WRD_MASK);
ExternalAddress fpu_std(StubRoutines::addr_fpu_cntrl_wrd_std());
__ cmp32(rax, fpu_std);
__ jcc(Assembler::equal, ok_ret);
__ warn("Floating point control word changed by native JNI code.");
__ fldcw(fpu_std);
__ bind(ok_ret);
__ addptr(rsp, wordSize);
__ pop(rax);
}
__ ret(0);
return start;
}
address generate_d2i_wrapper(BasicType t, address fcn) {
StubCodeMark mark(this, "StubRoutines", "d2i_wrapper");
address start = __ pc();
enum layout { FPUState_off = 0,
rbp_off = FPUStateSizeInWords,
rdi_off,
rsi_off,
rcx_off,
rbx_off,
saved_argument_off,
saved_argument_off2, // 2nd half of double
framesize
};
assert(FPUStateSizeInWords == 27, "update stack layout");
__ subptr(rsp, wordSize * 2);
__ fstp_d(Address(rsp, 0));
__ push(rbx);
__ push(rcx);
__ push(rsi);
__ push(rdi);
__ push(rbp);
__ push_FPU_state();
__ fld_d(Address(rsp, saved_argument_off * wordSize));
__ subptr(rsp, wordSize*2);
__ fst_d(Address(rsp, 0));
__ empty_FPU_stack();
if (t == T_INT)
{ BLOCK_COMMENT("SharedRuntime::d2i"); }
else if (t == T_LONG)
{ BLOCK_COMMENT("SharedRuntime::d2l"); }
__ call_VM_leaf( fcn, 2 );
__ pop_FPU_state();
__ pop(rbp);
__ pop(rdi);
__ pop(rsi);
__ pop(rcx);
__ pop(rbx);
__ addptr(rsp, wordSize * 2);
__ ret(0);
return start;
}
address generate_handler_for_unsafe_access() {
StubCodeMark mark(this, "StubRoutines", "handler_for_unsafe_access");
address start = __ pc();
__ push(0); // hole for return address-to-be
__ pusha(); // push registers
Address next_pc(rsp, RegisterImpl::number_of_registers * BytesPerWord);
BLOCK_COMMENT("call handle_unsafe_access");
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, handle_unsafe_access)));
__ movptr(next_pc, rax); // stuff next address
__ popa();
__ ret(0); // jump to next address
return start;
}
address generate_verify_oop() {
StubCodeMark mark(this, "StubRoutines", "verify_oop");
address start = __ pc();
Label exit, error;
__ pushf();
__ incrementl(ExternalAddress((address) StubRoutines::verify_oop_count_addr()));
__ push(rdx); // save rdx
__ movptr(rax, Address(rsp, 4 * wordSize)); // get object
__ testptr(rax, rax);
__ jcc(Assembler::zero, exit); // if obj is NULL it is ok
const int oop_mask = Universe::verify_oop_mask();
const int oop_bits = Universe::verify_oop_bits();
__ mov(rdx, rax);
__ andptr(rdx, oop_mask);
__ cmpptr(rdx, oop_bits);
__ jcc(Assembler::notZero, error);
__ movptr(rax, Address(rax, oopDesc::klass_offset_in_bytes())); // get klass
__ testptr(rax, rax);
__ jcc(Assembler::zero, error); // if klass is NULL it is broken
__ bind(exit);
__ movptr(rax, Address(rsp, 5 * wordSize)); // get saved rax, back
__ pop(rdx); // restore rdx
__ popf(); // restore EFLAGS
__ ret(3 * wordSize); // pop arguments
__ bind(error);
__ movptr(rax, Address(rsp, 5 * wordSize)); // get saved rax, back
__ pop(rdx); // get saved rdx back
__ popf(); // get saved EFLAGS off stack -- will be ignored
__ pusha(); // push registers (eip = return address & msg are already pushed)
BLOCK_COMMENT("call MacroAssembler::debug");
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug32)));
__ popa();
__ ret(3 * wordSize); // pop arguments
return start;
}
void gen_write_ref_array_pre_barrier(Register start, Register count, bool uninitialized_target) {
assert_different_registers(start, count);
BarrierSet* bs = Universe::heap()->barrier_set();
switch (bs->kind()) {
case BarrierSet::G1SATBCT:
case BarrierSet::G1SATBCTLogging:
if (!uninitialized_target) {
__ pusha(); // push registers
__ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre),
start, count);
__ popa();
}
break;
case BarrierSet::CardTableModRef:
case BarrierSet::CardTableExtension:
case BarrierSet::ModRef:
break;
default :
ShouldNotReachHere();
}
}
void gen_write_ref_array_post_barrier(Register start, Register count) {
BarrierSet* bs = Universe::heap()->barrier_set();
assert_different_registers(start, count);
switch (bs->kind()) {
case BarrierSet::G1SATBCT:
case BarrierSet::G1SATBCTLogging:
{
__ pusha(); // push registers
__ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post),
start, count);
__ popa();
}
break;
case BarrierSet::CardTableModRef:
case BarrierSet::CardTableExtension:
{
CardTableModRefBS* ct = (CardTableModRefBS*)bs;
assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
Label L_loop;
const Register end = count; // elements count; end == start+count-1
assert_different_registers(start, end);
__ lea(end, Address(start, count, Address::times_ptr, -wordSize));
__ shrptr(start, CardTableModRefBS::card_shift);
__ shrptr(end, CardTableModRefBS::card_shift);
__ subptr(end, start); // end --> count
__ BIND(L_loop);
intptr_t disp = (intptr_t) ct->byte_map_base;
Address cardtable(start, count, Address::times_1, disp);
__ movb(cardtable, 0);
__ decrement(count);
__ jcc(Assembler::greaterEqual, L_loop);
}
break;
case BarrierSet::ModRef:
break;
default :
ShouldNotReachHere();
}
}
void xmm_copy_forward(Register from, Register to_from, Register qword_count) {
assert( UseSSE >= 2, "supported cpu only" );
Label L_copy_64_bytes_loop, L_copy_64_bytes, L_copy_8_bytes, L_exit;
__ jmpb(L_copy_64_bytes);
__ align(OptoLoopAlignment);
__ BIND(L_copy_64_bytes_loop);
if (UseUnalignedLoadStores) {
if (UseAVX >= 2) {
__ vmovdqu(xmm0, Address(from, 0));
__ vmovdqu(Address(from, to_from, Address::times_1, 0), xmm0);
__ vmovdqu(xmm1, Address(from, 32));
__ vmovdqu(Address(from, to_from, Address::times_1, 32), xmm1);
} else {
__ movdqu(xmm0, Address(from, 0));
__ movdqu(Address(from, to_from, Address::times_1, 0), xmm0);
__ movdqu(xmm1, Address(from, 16));
__ movdqu(Address(from, to_from, Address::times_1, 16), xmm1);
__ movdqu(xmm2, Address(from, 32));
__ movdqu(Address(from, to_from, Address::times_1, 32), xmm2);
__ movdqu(xmm3, Address(from, 48));
__ movdqu(Address(from, to_from, Address::times_1, 48), xmm3);
}
} else {
__ movq(xmm0, Address(from, 0));
__ movq(Address(from, to_from, Address::times_1, 0), xmm0);
__ movq(xmm1, Address(from, 8));
__ movq(Address(from, to_from, Address::times_1, 8), xmm1);
__ movq(xmm2, Address(from, 16));
__ movq(Address(from, to_from, Address::times_1, 16), xmm2);
__ movq(xmm3, Address(from, 24));
__ movq(Address(from, to_from, Address::times_1, 24), xmm3);
__ movq(xmm4, Address(from, 32));
__ movq(Address(from, to_from, Address::times_1, 32), xmm4);
__ movq(xmm5, Address(from, 40));
__ movq(Address(from, to_from, Address::times_1, 40), xmm5);
__ movq(xmm6, Address(from, 48));
__ movq(Address(from, to_from, Address::times_1, 48), xmm6);
__ movq(xmm7, Address(from, 56));
__ movq(Address(from, to_from, Address::times_1, 56), xmm7);
}
__ addl(from, 64);
__ BIND(L_copy_64_bytes);
__ subl(qword_count, 8);
__ jcc(Assembler::greaterEqual, L_copy_64_bytes_loop);
if (UseUnalignedLoadStores && (UseAVX >= 2)) {
__ vpxor(xmm0, xmm0);
__ vpxor(xmm1, xmm1);
}
__ addl(qword_count, 8);
__ jccb(Assembler::zero, L_exit);
__ BIND(L_copy_8_bytes);
__ movq(xmm0, Address(from, 0));
__ movq(Address(from, to_from, Address::times_1), xmm0);
__ addl(from, 8);
__ decrement(qword_count);
__ jcc(Assembler::greater, L_copy_8_bytes);
__ BIND(L_exit);
}
void mmx_copy_forward(Register from, Register to_from, Register qword_count) {
assert( VM_Version::supports_mmx(), "supported cpu only" );
Label L_copy_64_bytes_loop, L_copy_64_bytes, L_copy_8_bytes, L_exit;
__ jmpb(L_copy_64_bytes);
__ align(OptoLoopAlignment);
__ BIND(L_copy_64_bytes_loop);
__ movq(mmx0, Address(from, 0));
__ movq(mmx1, Address(from, 8));
__ movq(mmx2, Address(from, 16));
__ movq(Address(from, to_from, Address::times_1, 0), mmx0);
__ movq(mmx3, Address(from, 24));
__ movq(Address(from, to_from, Address::times_1, 8), mmx1);
__ movq(mmx4, Address(from, 32));
__ movq(Address(from, to_from, Address::times_1, 16), mmx2);
__ movq(mmx5, Address(from, 40));
__ movq(Address(from, to_from, Address::times_1, 24), mmx3);
__ movq(mmx6, Address(from, 48));
__ movq(Address(from, to_from, Address::times_1, 32), mmx4);
__ movq(mmx7, Address(from, 56));
__ movq(Address(from, to_from, Address::times_1, 40), mmx5);
__ movq(Address(from, to_from, Address::times_1, 48), mmx6);
__ movq(Address(from, to_from, Address::times_1, 56), mmx7);
__ addptr(from, 64);
__ BIND(L_copy_64_bytes);
__ subl(qword_count, 8);
__ jcc(Assembler::greaterEqual, L_copy_64_bytes_loop);
__ addl(qword_count, 8);
__ jccb(Assembler::zero, L_exit);
__ BIND(L_copy_8_bytes);
__ movq(mmx0, Address(from, 0));
__ movq(Address(from, to_from, Address::times_1), mmx0);
__ addptr(from, 8);
__ decrement(qword_count);
__ jcc(Assembler::greater, L_copy_8_bytes);
__ BIND(L_exit);
__ emms();
}
address generate_disjoint_copy(BasicType t, bool aligned,
Address::ScaleFactor sf,
address* entry, const char *name,
bool dest_uninitialized = false) {
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", name);
address start = __ pc();
Label L_0_count, L_exit, L_skip_align1, L_skip_align2, L_copy_byte;
Label L_copy_2_bytes, L_copy_4_bytes, L_copy_64_bytes;
int shift = Address::times_ptr - sf;
const Register from = rsi; // source array address
const Register to = rdi; // destination array address
const Register count = rcx; // elements count
const Register to_from = to; // (to - from)
const Register saved_to = rdx; // saved destination array address
__ enter(); // required for proper stackwalking of RuntimeStub frame
__ push(rsi);
__ push(rdi);
__ movptr(from , Address(rsp, 12+ 4));
__ movptr(to , Address(rsp, 12+ 8));
__ movl(count, Address(rsp, 12+ 12));
if (entry != NULL) {
BLOCK_COMMENT("Entry:");
}
if (t == T_OBJECT) {
__ testl(count, count);
__ jcc(Assembler::zero, L_0_count);
gen_write_ref_array_pre_barrier(to, count, dest_uninitialized);
__ mov(saved_to, to); // save 'to'
}
__ subptr(to, from); // to --> to_from
__ cmpl(count, 2<<shift); // Short arrays (< 8 bytes) copy by element
__ jcc(Assembler::below, L_copy_4_bytes); // use unsigned cmp
if (!UseUnalignedLoadStores && !aligned && (t == T_BYTE || t == T_SHORT)) {
if (t == T_BYTE) {
__ testl(from, 1);
__ jccb(Assembler::zero, L_skip_align1);
__ movb(rax, Address(from, 0));
__ movb(Address(from, to_from, Address::times_1, 0), rax);
__ increment(from);
__ decrement(count);
__ BIND(L_skip_align1);
}
__ testl(from, 2);
__ jccb(Assembler::zero, L_skip_align2);
__ movw(rax, Address(from, 0));
__ movw(Address(from, to_from, Address::times_1, 0), rax);
__ addptr(from, 2);
__ subl(count, 1<<(shift-1));
__ BIND(L_skip_align2);
}
if (!VM_Version::supports_mmx()) {
__ mov(rax, count); // save 'count'
__ shrl(count, shift); // bytes count
__ addptr(to_from, from);// restore 'to'
__ rep_mov();
__ subptr(to_from, from);// restore 'to_from'
__ mov(count, rax); // restore 'count'
__ jmpb(L_copy_2_bytes); // all dwords were copied
} else {
if (!UseUnalignedLoadStores) {
__ testptr(from, 4);
__ jccb(Assembler::zero, L_copy_64_bytes);
__ movl(rax, Address(from, 0));
__ movl(Address(from, to_from, Address::times_1, 0), rax);
__ addptr(from, 4);
__ subl(count, 1<<shift);
}
__ BIND(L_copy_64_bytes);
__ mov(rax, count);
__ shrl(rax, shift+1); // 8 bytes chunk count
if (UseXMMForArrayCopy) {
xmm_copy_forward(from, to_from, rax);
} else {
mmx_copy_forward(from, to_from, rax);
}
}
__ BIND(L_copy_4_bytes);
__ testl(count, 1<<shift);
__ jccb(Assembler::zero, L_copy_2_bytes);
__ movl(rax, Address(from, 0));
__ movl(Address(from, to_from, Address::times_1, 0), rax);
if (t == T_BYTE || t == T_SHORT) {
__ addptr(from, 4);
__ BIND(L_copy_2_bytes);
__ testl(count, 1<<(shift-1));
__ jccb(Assembler::zero, L_copy_byte);
__ movw(rax, Address(from, 0));
__ movw(Address(from, to_from, Address::times_1, 0), rax);
if (t == T_BYTE) {
__ addptr(from, 2);
__ BIND(L_copy_byte);
__ testl(count, 1);
__ jccb(Assembler::zero, L_exit);
__ movb(rax, Address(from, 0));
__ movb(Address(from, to_from, Address::times_1, 0), rax);
__ BIND(L_exit);
} else {
__ BIND(L_copy_byte);
}
} else {
__ BIND(L_copy_2_bytes);
}
if (t == T_OBJECT) {
__ movl(count, Address(rsp, 12+12)); // reread 'count'
__ mov(to, saved_to); // restore 'to'
gen_write_ref_array_post_barrier(to, count);
__ BIND(L_0_count);
}
inc_copy_counter_np(t);
__ pop(rdi);
__ pop(rsi);
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ xorptr(rax, rax); // return 0
__ ret(0);
return start;
}
address generate_fill(BasicType t, bool aligned, const char *name) {
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", name);
address start = __ pc();
BLOCK_COMMENT("Entry:");
const Register to = rdi; // source array address
const Register value = rdx; // value
const Register count = rsi; // elements count
__ enter(); // required for proper stackwalking of RuntimeStub frame
__ push(rsi);
__ push(rdi);
__ movptr(to , Address(rsp, 12+ 4));
__ movl(value, Address(rsp, 12+ 8));
__ movl(count, Address(rsp, 12+ 12));
__ generate_fill(t, aligned, to, value, count, rax, xmm0);
__ pop(rdi);
__ pop(rsi);
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
return start;
}
address generate_conjoint_copy(BasicType t, bool aligned,
Address::ScaleFactor sf,
address nooverlap_target,
address* entry, const char *name,
bool dest_uninitialized = false) {
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", name);
address start = __ pc();
Label L_0_count, L_exit, L_skip_align1, L_skip_align2, L_copy_byte;
Label L_copy_2_bytes, L_copy_4_bytes, L_copy_8_bytes, L_copy_8_bytes_loop;
int shift = Address::times_ptr - sf;
const Register src = rax; // source array address
const Register dst = rdx; // destination array address
const Register from = rsi; // source array address
const Register to = rdi; // destination array address
const Register count = rcx; // elements count
const Register end = rax; // array end address
__ enter(); // required for proper stackwalking of RuntimeStub frame
__ push(rsi);
__ push(rdi);
__ movptr(src , Address(rsp, 12+ 4)); // from
__ movptr(dst , Address(rsp, 12+ 8)); // to
__ movl2ptr(count, Address(rsp, 12+12)); // count
if (entry != NULL) {
BLOCK_COMMENT("Entry:");
}
__ mov(from, src);
__ mov(to , dst);
RuntimeAddress nooverlap(nooverlap_target);
__ cmpptr(dst, src);
__ lea(end, Address(src, count, sf, 0)); // src + count * elem_size
__ jump_cc(Assembler::belowEqual, nooverlap);
__ cmpptr(dst, end);
__ jump_cc(Assembler::aboveEqual, nooverlap);
if (t == T_OBJECT) {
__ testl(count, count);
__ jcc(Assembler::zero, L_0_count);
gen_write_ref_array_pre_barrier(dst, count, dest_uninitialized);
}
__ cmpl(count, 2<<shift); // Short arrays (< 8 bytes) copy by element
__ jcc(Assembler::below, L_copy_4_bytes); // use unsigned cmp
if (t == T_BYTE || t == T_SHORT) {
__ lea(end, Address(dst, count, sf, 0));
if (t == T_BYTE) {
__ testl(end, 1);
__ jccb(Assembler::zero, L_skip_align1);
__ decrement(count);
__ movb(rdx, Address(from, count, sf, 0));
__ movb(Address(to, count, sf, 0), rdx);
__ BIND(L_skip_align1);
}
__ testl(end, 2);
__ jccb(Assembler::zero, L_skip_align2);
__ subptr(count, 1<<(shift-1));
__ movw(rdx, Address(from, count, sf, 0));
__ movw(Address(to, count, sf, 0), rdx);
__ BIND(L_skip_align2);
__ cmpl(count, 2<<shift); // Short arrays (< 8 bytes) copy by element
__ jcc(Assembler::below, L_copy_4_bytes);
}
if (!VM_Version::supports_mmx()) {
__ std();
__ mov(rax, count); // Save 'count'
__ mov(rdx, to); // Save 'to'
__ lea(rsi, Address(from, count, sf, -4));
__ lea(rdi, Address(to , count, sf, -4));
__ shrptr(count, shift); // bytes count
__ rep_mov();
__ cld();
__ mov(count, rax); // restore 'count'
__ andl(count, (1<<shift)-1); // mask the number of rest elements
__ movptr(from, Address(rsp, 12+4)); // reread 'from'
__ mov(to, rdx); // restore 'to'
__ jmpb(L_copy_2_bytes); // all dword were copied
} else {
__ testptr(end, 4);
__ jccb(Assembler::zero, L_copy_8_bytes);
__ subl(count, 1<<shift);
__ movl(rdx, Address(from, count, sf, 0));
__ movl(Address(to, count, sf, 0), rdx);
__ jmpb(L_copy_8_bytes);
__ align(OptoLoopAlignment);
__ BIND(L_copy_8_bytes_loop);
if (UseXMMForArrayCopy) {
__ movq(xmm0, Address(from, count, sf, 0));
__ movq(Address(to, count, sf, 0), xmm0);
} else {
__ movq(mmx0, Address(from, count, sf, 0));
__ movq(Address(to, count, sf, 0), mmx0);
}
__ BIND(L_copy_8_bytes);
__ subl(count, 2<<shift);
__ jcc(Assembler::greaterEqual, L_copy_8_bytes_loop);
__ addl(count, 2<<shift);
if (!UseXMMForArrayCopy) {
__ emms();
}
}
__ BIND(L_copy_4_bytes);
__ testl(count, 1<<shift);
__ jccb(Assembler::zero, L_copy_2_bytes);
__ movl(rdx, Address(from, count, sf, -4));
__ movl(Address(to, count, sf, -4), rdx);
if (t == T_BYTE || t == T_SHORT) {
__ subl(count, (1<<shift));
__ BIND(L_copy_2_bytes);
__ testl(count, 1<<(shift-1));
__ jccb(Assembler::zero, L_copy_byte);
__ movw(rdx, Address(from, count, sf, -2));
__ movw(Address(to, count, sf, -2), rdx);
if (t == T_BYTE) {
__ subl(count, 1<<(shift-1));
__ BIND(L_copy_byte);
__ testl(count, 1);
__ jccb(Assembler::zero, L_exit);
__ movb(rdx, Address(from, 0));
__ movb(Address(to, 0), rdx);
__ BIND(L_exit);
} else {
__ BIND(L_copy_byte);
}
} else {
__ BIND(L_copy_2_bytes);
}
if (t == T_OBJECT) {
__ movl2ptr(count, Address(rsp, 12+12)); // reread count
gen_write_ref_array_post_barrier(to, count);
__ BIND(L_0_count);
}
inc_copy_counter_np(t);
__ pop(rdi);
__ pop(rsi);
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ xorptr(rax, rax); // return 0
__ ret(0);
return start;
}
address generate_disjoint_long_copy(address* entry, const char *name) {
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", name);
address start = __ pc();
Label L_copy_8_bytes, L_copy_8_bytes_loop;
const Register from = rax; // source array address
const Register to = rdx; // destination array address
const Register count = rcx; // elements count
const Register to_from = rdx; // (to - from)
__ enter(); // required for proper stackwalking of RuntimeStub frame
__ movptr(from , Address(rsp, 8+0)); // from
__ movptr(to , Address(rsp, 8+4)); // to
__ movl2ptr(count, Address(rsp, 8+8)); // count
BLOCK_COMMENT("Entry:");
__ subptr(to, from); // to --> to_from
if (VM_Version::supports_mmx()) {
if (UseXMMForArrayCopy) {
xmm_copy_forward(from, to_from, count);
} else {
mmx_copy_forward(from, to_from, count);
}
} else {
__ jmpb(L_copy_8_bytes);
__ align(OptoLoopAlignment);
__ BIND(L_copy_8_bytes_loop);
__ fild_d(Address(from, 0));
__ fistp_d(Address(from, to_from, Address::times_1));
__ addptr(from, 8);
__ BIND(L_copy_8_bytes);
__ decrement(count);
__ jcc(Assembler::greaterEqual, L_copy_8_bytes_loop);
}
inc_copy_counter_np(T_LONG);
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ xorptr(rax, rax); // return 0
__ ret(0);
return start;
}
address generate_conjoint_long_copy(address nooverlap_target,
address* entry, const char *name) {
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", name);
address start = __ pc();
Label L_copy_8_bytes, L_copy_8_bytes_loop;
const Register from = rax; // source array address
const Register to = rdx; // destination array address
const Register count = rcx; // elements count
const Register end_from = rax; // source array end address
__ enter(); // required for proper stackwalking of RuntimeStub frame
__ movptr(from , Address(rsp, 8+0)); // from
__ movptr(to , Address(rsp, 8+4)); // to
__ movl2ptr(count, Address(rsp, 8+8)); // count
BLOCK_COMMENT("Entry:");
__ cmpptr(to, from);
RuntimeAddress nooverlap(nooverlap_target);
__ jump_cc(Assembler::belowEqual, nooverlap);
__ lea(end_from, Address(from, count, Address::times_8, 0));
__ cmpptr(to, end_from);
__ movptr(from, Address(rsp, 8)); // from
__ jump_cc(Assembler::aboveEqual, nooverlap);
__ jmpb(L_copy_8_bytes);
__ align(OptoLoopAlignment);
__ BIND(L_copy_8_bytes_loop);
if (VM_Version::supports_mmx()) {
if (UseXMMForArrayCopy) {
__ movq(xmm0, Address(from, count, Address::times_8));
__ movq(Address(to, count, Address::times_8), xmm0);
} else {
__ movq(mmx0, Address(from, count, Address::times_8));
__ movq(Address(to, count, Address::times_8), mmx0);
}
} else {
__ fild_d(Address(from, count, Address::times_8));
__ fistp_d(Address(to, count, Address::times_8));
}
__ BIND(L_copy_8_bytes);
__ decrement(count);
__ jcc(Assembler::greaterEqual, L_copy_8_bytes_loop);
if (VM_Version::supports_mmx() && !UseXMMForArrayCopy) {
__ emms();
}
inc_copy_counter_np(T_LONG);
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ xorptr(rax, rax); // return 0
__ ret(0);
return start;
}
void generate_type_check(Register sub_klass,
Address& super_check_offset_addr,
Address& super_klass_addr,
Register temp,
Label* L_success, Label* L_failure) {
BLOCK_COMMENT("type_check:");
Label L_fallthrough;
#define LOCAL_JCC(assembler_con, label_ptr) \
if (label_ptr != NULL) __ jcc(assembler_con, *(label_ptr)); \
else __ jcc(assembler_con, L_fallthrough) /*omit semi*/
assert_different_registers(sub_klass, temp);
int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
__ cmpptr(sub_klass, super_klass_addr);
LOCAL_JCC(Assembler::equal, L_success);
__ movl2ptr(temp, super_check_offset_addr);
Address super_check_addr(sub_klass, temp, Address::times_1, 0);
__ movptr(temp, super_check_addr); // load displayed supertype
__ cmpptr(temp, super_klass_addr); // test the super type
LOCAL_JCC(Assembler::equal, L_success);
__ cmpl(super_check_offset_addr, sc_offset);
LOCAL_JCC(Assembler::notEqual, L_failure);
Register super_klass = temp;
__ movptr(super_klass, super_klass_addr);
__ check_klass_subtype_slow_path(sub_klass, super_klass, noreg, noreg,
L_success, L_failure);
__ bind(L_fallthrough);
if (L_success == NULL) { BLOCK_COMMENT("L_success:"); }
if (L_failure == NULL) { BLOCK_COMMENT("L_failure:"); }
#undef LOCAL_JCC
}
address generate_checkcast_copy(const char *name, address* entry, bool dest_uninitialized = false) {
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", name);
address start = __ pc();
Label L_load_element, L_store_element, L_do_card_marks, L_done;
const Register from = rax; // source array address
const Register to = rdx; // destination array address
const Register length = rcx; // elements count
const Register elem = rdi; // each oop copied
const Register elem_klass = rsi; // each elem._klass (sub_klass)
const Register temp = rbx; // lone remaining temp
__ enter(); // required for proper stackwalking of RuntimeStub frame
__ push(rsi);
__ push(rdi);
__ push(rbx);
Address from_arg(rsp, 16+ 4); // from
Address to_arg(rsp, 16+ 8); // to
Address length_arg(rsp, 16+12); // elements count
Address ckoff_arg(rsp, 16+16); // super_check_offset
Address ckval_arg(rsp, 16+20); // super_klass
__ movptr(from, from_arg);
__ movptr(to, to_arg);
__ movl2ptr(length, length_arg);
if (entry != NULL) {
BLOCK_COMMENT("Entry:");
}
Address end_from_addr(from, length, Address::times_ptr, 0);
Address end_to_addr(to, length, Address::times_ptr, 0);
Register end_from = from; // re-use
Register end_to = to; // re-use
Register count = length; // re-use
Address from_element_addr(end_from, count, Address::times_ptr, 0);
Address to_element_addr(end_to, count, Address::times_ptr, 0);
Address elem_klass_addr(elem, oopDesc::klass_offset_in_bytes());
gen_write_ref_array_pre_barrier(to, count, dest_uninitialized);
__ lea(end_from, end_from_addr);
__ lea(end_to, end_to_addr);
assert(length == count, ""); // else fix next line:
__ negptr(count); // negate and test the length
__ jccb(Assembler::notZero, L_load_element);
__ xorptr(rax, rax); // return 0 on (trivial) success
__ jmp(L_done);
__ align(OptoLoopAlignment);
__ BIND(L_store_element);
__ movptr(to_element_addr, elem); // store the oop
__ increment(count); // increment the count toward zero
__ jccb(Assembler::zero, L_do_card_marks);
__ BIND(L_load_element);
__ movptr(elem, from_element_addr); // load the oop
__ testptr(elem, elem);
__ jccb(Assembler::zero, L_store_element);
__ movptr(elem_klass, elem_klass_addr); // query the object klass
generate_type_check(elem_klass, ckoff_arg, ckval_arg, temp,
&L_store_element, NULL);
assert_different_registers(to, count, rax);
Label L_post_barrier;
__ addl(count, length_arg); // transfers = (length - remaining)
__ movl2ptr(rax, count); // save the value
__ notptr(rax); // report (-1^K) to caller (does not affect flags)
__ jccb(Assembler::notZero, L_post_barrier);
__ jmp(L_done); // K == 0, nothing was copied, skip post barrier
__ BIND(L_do_card_marks);
__ xorptr(rax, rax); // return 0 on success
__ movl2ptr(count, length_arg);
__ BIND(L_post_barrier);
__ movptr(to, to_arg); // reload
gen_write_ref_array_post_barrier(to, count);
__ BIND(L_done);
__ pop(rbx);
__ pop(rdi);
__ pop(rsi);
inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr);
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
return start;
}
address generate_unsafe_copy(const char *name,
address byte_copy_entry,
address short_copy_entry,
address int_copy_entry,
address long_copy_entry) {
Label L_long_aligned, L_int_aligned, L_short_aligned;
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", name);
address start = __ pc();
const Register from = rax; // source array address
const Register to = rdx; // destination array address
const Register count = rcx; // elements count
__ enter(); // required for proper stackwalking of RuntimeStub frame
__ push(rsi);
__ push(rdi);
Address from_arg(rsp, 12+ 4); // from
Address to_arg(rsp, 12+ 8); // to
Address count_arg(rsp, 12+12); // byte count
__ movptr(from , from_arg);
__ movptr(to , to_arg);
__ movl2ptr(count, count_arg);
inc_counter_np(SharedRuntime::_unsafe_array_copy_ctr);
const Register bits = rsi;
__ mov(bits, from);
__ orptr(bits, to);
__ orptr(bits, count);
__ testl(bits, BytesPerLong-1);
__ jccb(Assembler::zero, L_long_aligned);
__ testl(bits, BytesPerInt-1);
__ jccb(Assembler::zero, L_int_aligned);
__ testl(bits, BytesPerShort-1);
__ jump_cc(Assembler::notZero, RuntimeAddress(byte_copy_entry));
__ BIND(L_short_aligned);
__ shrptr(count, LogBytesPerShort); // size => short_count
__ movl(count_arg, count); // update 'count'
__ jump(RuntimeAddress(short_copy_entry));
__ BIND(L_int_aligned);
__ shrptr(count, LogBytesPerInt); // size => int_count
__ movl(count_arg, count); // update 'count'
__ jump(RuntimeAddress(int_copy_entry));
__ BIND(L_long_aligned);
__ shrptr(count, LogBytesPerLong); // size => qword_count
__ movl(count_arg, count); // update 'count'
__ pop(rdi); // Do pops here since jlong_arraycopy stub does not do it.
__ pop(rsi);
__ jump(RuntimeAddress(long_copy_entry));
return start;
}
void arraycopy_range_checks(Register src,
Register src_pos,
Register dst,
Register dst_pos,
Address& length,
Label& L_failed) {
BLOCK_COMMENT("arraycopy_range_checks:");
const Register src_end = src_pos; // source array end position
const Register dst_end = dst_pos; // destination array end position
__ addl(src_end, length); // src_pos + length
__ addl(dst_end, length); // dst_pos + length
__ cmpl(src_end, Address(src, arrayOopDesc::length_offset_in_bytes()));
__ jcc(Assembler::above, L_failed);
__ cmpl(dst_end, Address(dst, arrayOopDesc::length_offset_in_bytes()));
__ jcc(Assembler::above, L_failed);
BLOCK_COMMENT("arraycopy_range_checks done");
}
address generate_generic_copy(const char *name,
address entry_jbyte_arraycopy,
address entry_jshort_arraycopy,
address entry_jint_arraycopy,
address entry_oop_arraycopy,
address entry_jlong_arraycopy,
address entry_checkcast_arraycopy) {
Label L_failed, L_failed_0, L_objArray;
{ int modulus = CodeEntryAlignment;
int target = modulus - 5; // 5 = sizeof jmp(L_failed)
int advance = target - (__ offset() % modulus);
if (advance < 0) advance += modulus;
if (advance > 0) __ nop(advance);
}
StubCodeMark mark(this, "StubRoutines", name);
__ BIND(L_failed_0);
__ jmp(L_failed);
assert(__ offset() % CodeEntryAlignment == 0, "no further alignment needed");
__ align(CodeEntryAlignment);
address start = __ pc();
__ enter(); // required for proper stackwalking of RuntimeStub frame
__ push(rsi);
__ push(rdi);
inc_counter_np(SharedRuntime::_generic_array_copy_ctr);
Address SRC (rsp, 12+ 4);
Address SRC_POS (rsp, 12+ 8);
Address DST (rsp, 12+12);
Address DST_POS (rsp, 12+16);
Address LENGTH (rsp, 12+20);
const Register src = rax; // source array oop
const Register src_pos = rsi;
const Register dst = rdx; // destination array oop
const Register dst_pos = rdi;
const Register length = rcx; // transfer count
__ movptr(src, SRC); // src oop
__ testptr(src, src);
__ jccb(Assembler::zero, L_failed_0);
__ movl2ptr(src_pos, SRC_POS); // src_pos
__ testl(src_pos, src_pos);
__ jccb(Assembler::negative, L_failed_0);
__ movptr(dst, DST); // dst oop
__ testptr(dst, dst);
__ jccb(Assembler::zero, L_failed_0);
__ movl2ptr(dst_pos, DST_POS); // dst_pos
__ testl(dst_pos, dst_pos);
__ jccb(Assembler::negative, L_failed_0);
__ movl2ptr(length, LENGTH); // length
__ testl(length, length);
__ jccb(Assembler::negative, L_failed_0);
Address src_klass_addr(src, oopDesc::klass_offset_in_bytes());
Address dst_klass_addr(dst, oopDesc::klass_offset_in_bytes());
const Register rcx_src_klass = rcx; // array klass
__ movptr(rcx_src_klass, Address(src, oopDesc::klass_offset_in_bytes()));
#ifdef ASSERT
BLOCK_COMMENT("assert klasses not null");
{ Label L1, L2;
__ testptr(rcx_src_klass, rcx_src_klass);
__ jccb(Assembler::notZero, L2); // it is broken if klass is NULL
__ bind(L1);
__ stop("broken null klass");
__ bind(L2);
__ cmpptr(dst_klass_addr, (int32_t)NULL_WORD);
__ jccb(Assembler::equal, L1); // this would be broken also
BLOCK_COMMENT("assert done");
}
#endif //ASSERT
int lh_offset = in_bytes(Klass::layout_helper_offset());
Address src_klass_lh_addr(rcx_src_klass, lh_offset);
jint objArray_lh = Klass::array_layout_helper(T_OBJECT);
__ cmpl(src_klass_lh_addr, objArray_lh);
__ jcc(Assembler::equal, L_objArray);
__ cmpptr(rcx_src_klass, dst_klass_addr);
__ jccb(Assembler::notEqual, L_failed_0);
const Register rcx_lh = rcx; // layout helper
assert(rcx_lh == rcx_src_klass, "known alias");
__ movl(rcx_lh, src_klass_lh_addr);
__ cmpl(rcx_lh, Klass::_lh_neutral_value);
__ jcc(Assembler::greaterEqual, L_failed_0); // signed cmp
#ifdef ASSERT
{ Label L;
__ cmpl(rcx_lh, (Klass::_lh_array_tag_type_value << Klass::_lh_array_tag_shift));
__ jcc(Assembler::greaterEqual, L); // signed cmp
__ stop("must be a primitive array");
__ bind(L);
}
#endif
assert_different_registers(src, src_pos, dst, dst_pos, rcx_lh);
arraycopy_range_checks(src, src_pos, dst, dst_pos, LENGTH, L_failed);
const Register rsi_offset = rsi; // array offset
const Register src_array = src; // src array offset
const Register dst_array = dst; // dst array offset
const Register rdi_elsize = rdi; // log2 element size
__ mov(rsi_offset, rcx_lh);
__ shrptr(rsi_offset, Klass::_lh_header_size_shift);
__ andptr(rsi_offset, Klass::_lh_header_size_mask); // array_offset
__ addptr(src_array, rsi_offset); // src array offset
__ addptr(dst_array, rsi_offset); // dst array offset
__ andptr(rcx_lh, Klass::_lh_log2_element_size_mask); // log2 elsize
const Register from = src; // source array address
const Register to = dst; // destination array address
const Register count = rcx; // elements count
#define FROM Address(rsp, 12+ 4)
#define TO Address(rsp, 12+ 8) // Not used now
#define COUNT Address(rsp, 12+12) // Only for oop arraycopy
BLOCK_COMMENT("scale indexes to element size");
__ movl2ptr(rsi, SRC_POS); // src_pos
__ shlptr(rsi); // src_pos << rcx (log2 elsize)
assert(src_array == from, "");
__ addptr(from, rsi); // from = src_array + SRC_POS << log2 elsize
__ movl2ptr(rdi, DST_POS); // dst_pos
__ shlptr(rdi); // dst_pos << rcx (log2 elsize)
assert(dst_array == to, "");
__ addptr(to, rdi); // to = dst_array + DST_POS << log2 elsize
__ movptr(FROM, from); // src_addr
__ mov(rdi_elsize, rcx_lh); // log2 elsize
__ movl2ptr(count, LENGTH); // elements count
BLOCK_COMMENT("choose copy loop based on element size");
__ cmpl(rdi_elsize, 0);
__ jump_cc(Assembler::equal, RuntimeAddress(entry_jbyte_arraycopy));
__ cmpl(rdi_elsize, LogBytesPerShort);
__ jump_cc(Assembler::equal, RuntimeAddress(entry_jshort_arraycopy));
__ cmpl(rdi_elsize, LogBytesPerInt);
__ jump_cc(Assembler::equal, RuntimeAddress(entry_jint_arraycopy));
#ifdef ASSERT
__ cmpl(rdi_elsize, LogBytesPerLong);
__ jccb(Assembler::notEqual, L_failed);
#endif
__ pop(rdi); // Do pops here since jlong_arraycopy stub does not do it.
__ pop(rsi);
__ jump(RuntimeAddress(entry_jlong_arraycopy));
__ BIND(L_failed);
__ xorptr(rax, rax);
__ notptr(rax); // return -1
__ pop(rdi);
__ pop(rsi);
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
__ BIND(L_objArray);
Label L_plain_copy, L_checkcast_copy;
__ cmpptr(rcx_src_klass, dst_klass_addr); // usual case is exact equality
__ jccb(Assembler::notEqual, L_checkcast_copy);
assert_different_registers(src, src_pos, dst, dst_pos, rcx_src_klass);
arraycopy_range_checks(src, src_pos, dst, dst_pos, LENGTH, L_failed);
__ BIND(L_plain_copy);
__ movl2ptr(count, LENGTH); // elements count
__ movl2ptr(src_pos, SRC_POS); // reload src_pos
__ lea(from, Address(src, src_pos, Address::times_ptr,
arrayOopDesc::base_offset_in_bytes(T_OBJECT))); // src_addr
__ movl2ptr(dst_pos, DST_POS); // reload dst_pos
__ lea(to, Address(dst, dst_pos, Address::times_ptr,
arrayOopDesc::base_offset_in_bytes(T_OBJECT))); // dst_addr
__ movptr(FROM, from); // src_addr
__ movptr(TO, to); // dst_addr
__ movl(COUNT, count); // count
__ jump(RuntimeAddress(entry_oop_arraycopy));
__ BIND(L_checkcast_copy);
{
int ek_offset = in_bytes(ObjArrayKlass::element_klass_offset());
int sco_offset = in_bytes(Klass::super_check_offset_offset());
Register rsi_dst_klass = rsi;
Register rdi_temp = rdi;
assert(rsi_dst_klass == src_pos, "expected alias w/ src_pos");
assert(rdi_temp == dst_pos, "expected alias w/ dst_pos");
Address dst_klass_lh_addr(rsi_dst_klass, lh_offset);
__ movptr(rsi_dst_klass, dst_klass_addr);
__ cmpl(dst_klass_lh_addr, objArray_lh);
__ jccb(Assembler::notEqual, L_failed);
__ movl2ptr(src_pos, SRC_POS); // reload rsi
arraycopy_range_checks(src, src_pos, dst, dst_pos, LENGTH, L_failed);
__ push(rbx);
Register rbx_src_klass = rbx;
__ mov(rbx_src_klass, rcx_src_klass); // spill away from rcx
__ movptr(rsi_dst_klass, dst_klass_addr);
Address super_check_offset_addr(rsi_dst_klass, sco_offset);
Label L_fail_array_check;
generate_type_check(rbx_src_klass,
super_check_offset_addr, dst_klass_addr,
rdi_temp, NULL, &L_fail_array_check);
__ pop(rbx);
__ jmp(L_plain_copy);
__ BIND(L_fail_array_check);
Address from_arg(rsp, 16+ 4); // from
Address to_arg(rsp, 16+ 8); // to
Address length_arg(rsp, 16+12); // elements count
Address ckoff_arg(rsp, 16+16); // super_check_offset
Address ckval_arg(rsp, 16+20); // super_klass
Address SRC_POS_arg(rsp, 16+ 8);
Address DST_POS_arg(rsp, 16+16);
Address LENGTH_arg(rsp, 16+20);
__ movptr(rbx, Address(rsi_dst_klass, ek_offset));
__ movl2ptr(length, LENGTH_arg); // reload elements count
__ movl2ptr(src_pos, SRC_POS_arg); // reload src_pos
__ movl2ptr(dst_pos, DST_POS_arg); // reload dst_pos
__ movptr(ckval_arg, rbx); // destination element type
__ movl(rbx, Address(rbx, sco_offset));
__ movl(ckoff_arg, rbx); // corresponding class check offset
__ movl(length_arg, length); // outgoing length argument
__ lea(from, Address(src, src_pos, Address::times_ptr,
arrayOopDesc::base_offset_in_bytes(T_OBJECT)));
__ movptr(from_arg, from);
__ lea(to, Address(dst, dst_pos, Address::times_ptr,
arrayOopDesc::base_offset_in_bytes(T_OBJECT)));
__ movptr(to_arg, to);
__ jump(RuntimeAddress(entry_checkcast_arraycopy));
}
return start;
}
void generate_arraycopy_stubs() {
address entry;
address entry_jbyte_arraycopy;
address entry_jshort_arraycopy;
address entry_jint_arraycopy;
address entry_oop_arraycopy;
address entry_jlong_arraycopy;
address entry_checkcast_arraycopy;
StubRoutines::_arrayof_jbyte_disjoint_arraycopy =
generate_disjoint_copy(T_BYTE, true, Address::times_1, &entry,
"arrayof_jbyte_disjoint_arraycopy");
StubRoutines::_arrayof_jbyte_arraycopy =
generate_conjoint_copy(T_BYTE, true, Address::times_1, entry,
NULL, "arrayof_jbyte_arraycopy");
StubRoutines::_jbyte_disjoint_arraycopy =
generate_disjoint_copy(T_BYTE, false, Address::times_1, &entry,
"jbyte_disjoint_arraycopy");
StubRoutines::_jbyte_arraycopy =
generate_conjoint_copy(T_BYTE, false, Address::times_1, entry,
&entry_jbyte_arraycopy, "jbyte_arraycopy");
StubRoutines::_arrayof_jshort_disjoint_arraycopy =
generate_disjoint_copy(T_SHORT, true, Address::times_2, &entry,
"arrayof_jshort_disjoint_arraycopy");
StubRoutines::_arrayof_jshort_arraycopy =
generate_conjoint_copy(T_SHORT, true, Address::times_2, entry,
NULL, "arrayof_jshort_arraycopy");
StubRoutines::_jshort_disjoint_arraycopy =
generate_disjoint_copy(T_SHORT, false, Address::times_2, &entry,
"jshort_disjoint_arraycopy");
StubRoutines::_jshort_arraycopy =
generate_conjoint_copy(T_SHORT, false, Address::times_2, entry,
&entry_jshort_arraycopy, "jshort_arraycopy");
StubRoutines::_jint_disjoint_arraycopy =
generate_disjoint_copy(T_INT, true, Address::times_4, &entry,
"jint_disjoint_arraycopy");
StubRoutines::_jint_arraycopy =
generate_conjoint_copy(T_INT, true, Address::times_4, entry,
&entry_jint_arraycopy, "jint_arraycopy");
StubRoutines::_oop_disjoint_arraycopy =
generate_disjoint_copy(T_OBJECT, true, Address::times_ptr, &entry,
"oop_disjoint_arraycopy");
StubRoutines::_oop_arraycopy =
generate_conjoint_copy(T_OBJECT, true, Address::times_ptr, entry,
&entry_oop_arraycopy, "oop_arraycopy");
StubRoutines::_oop_disjoint_arraycopy_uninit =
generate_disjoint_copy(T_OBJECT, true, Address::times_ptr, &entry,
"oop_disjoint_arraycopy_uninit",
StubRoutines::_oop_arraycopy_uninit =
generate_conjoint_copy(T_OBJECT, true, Address::times_ptr, entry,
NULL, "oop_arraycopy_uninit",
StubRoutines::_jlong_disjoint_arraycopy =
generate_disjoint_long_copy(&entry, "jlong_disjoint_arraycopy");
StubRoutines::_jlong_arraycopy =
generate_conjoint_long_copy(entry, &entry_jlong_arraycopy,
"jlong_arraycopy");
StubRoutines::_jbyte_fill = generate_fill(T_BYTE, false, "jbyte_fill");
StubRoutines::_jshort_fill = generate_fill(T_SHORT, false, "jshort_fill");
StubRoutines::_jint_fill = generate_fill(T_INT, false, "jint_fill");
StubRoutines::_arrayof_jbyte_fill = generate_fill(T_BYTE, true, "arrayof_jbyte_fill");
StubRoutines::_arrayof_jshort_fill = generate_fill(T_SHORT, true, "arrayof_jshort_fill");
StubRoutines::_arrayof_jint_fill = generate_fill(T_INT, true, "arrayof_jint_fill");
StubRoutines::_arrayof_jint_disjoint_arraycopy = StubRoutines::_jint_disjoint_arraycopy;
StubRoutines::_arrayof_oop_disjoint_arraycopy = StubRoutines::_oop_disjoint_arraycopy;
StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit = StubRoutines::_oop_disjoint_arraycopy_uninit;
StubRoutines::_arrayof_jlong_disjoint_arraycopy = StubRoutines::_jlong_disjoint_arraycopy;
StubRoutines::_arrayof_jint_arraycopy = StubRoutines::_jint_arraycopy;
StubRoutines::_arrayof_oop_arraycopy = StubRoutines::_oop_arraycopy;
StubRoutines::_arrayof_oop_arraycopy_uninit = StubRoutines::_oop_arraycopy_uninit;
StubRoutines::_arrayof_jlong_arraycopy = StubRoutines::_jlong_arraycopy;
StubRoutines::_checkcast_arraycopy =
generate_checkcast_copy("checkcast_arraycopy", &entry_checkcast_arraycopy);
StubRoutines::_checkcast_arraycopy_uninit =
generate_checkcast_copy("checkcast_arraycopy_uninit", NULL, /*dest_uninitialized*/true);
StubRoutines::_unsafe_arraycopy =
generate_unsafe_copy("unsafe_arraycopy",
entry_jbyte_arraycopy,
entry_jshort_arraycopy,
entry_jint_arraycopy,
entry_jlong_arraycopy);
StubRoutines::_generic_arraycopy =
generate_generic_copy("generic_arraycopy",
entry_jbyte_arraycopy,
entry_jshort_arraycopy,
entry_jint_arraycopy,
entry_oop_arraycopy,
entry_jlong_arraycopy,
entry_checkcast_arraycopy);
}
void generate_math_stubs() {
{
StubCodeMark mark(this, "StubRoutines", "log");
StubRoutines::_intrinsic_log = (double (*)(double)) __ pc();
__ fld_d(Address(rsp, 4));
__ flog();
__ ret(0);
}
{
StubCodeMark mark(this, "StubRoutines", "log10");
StubRoutines::_intrinsic_log10 = (double (*)(double)) __ pc();
__ fld_d(Address(rsp, 4));
__ flog10();
__ ret(0);
}
{
StubCodeMark mark(this, "StubRoutines", "sin");
StubRoutines::_intrinsic_sin = (double (*)(double)) __ pc();
__ fld_d(Address(rsp, 4));
__ trigfunc('s');
__ ret(0);
}
{
StubCodeMark mark(this, "StubRoutines", "cos");
StubRoutines::_intrinsic_cos = (double (*)(double)) __ pc();
__ fld_d(Address(rsp, 4));
__ trigfunc('c');
__ ret(0);
}
{
StubCodeMark mark(this, "StubRoutines", "tan");
StubRoutines::_intrinsic_tan = (double (*)(double)) __ pc();
__ fld_d(Address(rsp, 4));
__ trigfunc('t');
__ ret(0);
}
{
StubCodeMark mark(this, "StubRoutines", "exp");
StubRoutines::_intrinsic_exp = (double (*)(double)) __ pc();
__ fld_d(Address(rsp, 4));
__ exp_with_fallback(0);
__ ret(0);
}
{
StubCodeMark mark(this, "StubRoutines", "pow");
StubRoutines::_intrinsic_pow = (double (*)(double,double)) __ pc();
__ fld_d(Address(rsp, 12));
__ fld_d(Address(rsp, 4));
__ pow_with_fallback(0);
__ ret(0);
}
}
enum {AESBlockSize = 16};
address generate_key_shuffle_mask() {
__ align(16);
StubCodeMark mark(this, "StubRoutines", "key_shuffle_mask");
address start = __ pc();
__ emit_data(0x00010203, relocInfo::none, 0 );
__ emit_data(0x04050607, relocInfo::none, 0 );
__ emit_data(0x08090a0b, relocInfo::none, 0 );
__ emit_data(0x0c0d0e0f, relocInfo::none, 0 );
return start;
}
void load_key(XMMRegister xmmdst, Register key, int offset, XMMRegister xmm_shuf_mask=NULL) {
__ movdqu(xmmdst, Address(key, offset));
if (xmm_shuf_mask != NULL) {
__ pshufb(xmmdst, xmm_shuf_mask);
} else {
__ pshufb(xmmdst, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
}
}
void aes_enc_key(XMMRegister xmmdst, XMMRegister xmmtmp, Register key, int offset, XMMRegister xmm_shuf_mask=NULL) {
load_key(xmmtmp, key, offset, xmm_shuf_mask);
__ aesenc(xmmdst, xmmtmp);
}
void aes_dec_key(XMMRegister xmmdst, XMMRegister xmmtmp, Register key, int offset, XMMRegister xmm_shuf_mask=NULL) {
load_key(xmmtmp, key, offset, xmm_shuf_mask);
__ aesdec(xmmdst, xmmtmp);
}
address generate_aescrypt_encryptBlock() {
assert(UseAES, "need AES instructions and misaligned SSE support");
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", "aescrypt_encryptBlock");
Label L_doLast;
address start = __ pc();
const Register from = rdx; // source array address
const Register to = rdx; // destination array address
const Register key = rcx; // key array address
const Register keylen = rax;
const Address from_param(rbp, 8+0);
const Address to_param (rbp, 8+4);
const Address key_param (rbp, 8+8);
const XMMRegister xmm_result = xmm0;
const XMMRegister xmm_key_shuf_mask = xmm1;
const XMMRegister xmm_temp1 = xmm2;
const XMMRegister xmm_temp2 = xmm3;
const XMMRegister xmm_temp3 = xmm4;
const XMMRegister xmm_temp4 = xmm5;
__ enter(); // required for proper stackwalking of RuntimeStub frame
__ movptr(from, from_param);
__ movptr(key, key_param);
__ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
__ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
__ movdqu(xmm_result, Address(from, 0)); // get 16 bytes of input
__ movptr(to, to_param);
load_key(xmm_temp1, key, 0x00, xmm_key_shuf_mask);
__ pxor(xmm_result, xmm_temp1);
load_key(xmm_temp1, key, 0x10, xmm_key_shuf_mask);
load_key(xmm_temp2, key, 0x20, xmm_key_shuf_mask);
load_key(xmm_temp3, key, 0x30, xmm_key_shuf_mask);
load_key(xmm_temp4, key, 0x40, xmm_key_shuf_mask);
__ aesenc(xmm_result, xmm_temp1);
__ aesenc(xmm_result, xmm_temp2);
__ aesenc(xmm_result, xmm_temp3);
__ aesenc(xmm_result, xmm_temp4);
load_key(xmm_temp1, key, 0x50, xmm_key_shuf_mask);
load_key(xmm_temp2, key, 0x60, xmm_key_shuf_mask);
load_key(xmm_temp3, key, 0x70, xmm_key_shuf_mask);
load_key(xmm_temp4, key, 0x80, xmm_key_shuf_mask);
__ aesenc(xmm_result, xmm_temp1);
__ aesenc(xmm_result, xmm_temp2);
__ aesenc(xmm_result, xmm_temp3);
__ aesenc(xmm_result, xmm_temp4);
load_key(xmm_temp1, key, 0x90, xmm_key_shuf_mask);
load_key(xmm_temp2, key, 0xa0, xmm_key_shuf_mask);
__ cmpl(keylen, 44);
__ jccb(Assembler::equal, L_doLast);
__ aesenc(xmm_result, xmm_temp1);
__ aesenc(xmm_result, xmm_temp2);
load_key(xmm_temp1, key, 0xb0, xmm_key_shuf_mask);
load_key(xmm_temp2, key, 0xc0, xmm_key_shuf_mask);
__ cmpl(keylen, 52);
__ jccb(Assembler::equal, L_doLast);
__ aesenc(xmm_result, xmm_temp1);
__ aesenc(xmm_result, xmm_temp2);
load_key(xmm_temp1, key, 0xd0, xmm_key_shuf_mask);
load_key(xmm_temp2, key, 0xe0, xmm_key_shuf_mask);
__ BIND(L_doLast);
__ aesenc(xmm_result, xmm_temp1);
__ aesenclast(xmm_result, xmm_temp2);
__ movdqu(Address(to, 0), xmm_result); // store the result
__ xorptr(rax, rax); // return 0
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
return start;
}
address generate_aescrypt_decryptBlock() {
assert(UseAES, "need AES instructions and misaligned SSE support");
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", "aescrypt_decryptBlock");
Label L_doLast;
address start = __ pc();
const Register from = rdx; // source array address
const Register to = rdx; // destination array address
const Register key = rcx; // key array address
const Register keylen = rax;
const Address from_param(rbp, 8+0);
const Address to_param (rbp, 8+4);
const Address key_param (rbp, 8+8);
const XMMRegister xmm_result = xmm0;
const XMMRegister xmm_key_shuf_mask = xmm1;
const XMMRegister xmm_temp1 = xmm2;
const XMMRegister xmm_temp2 = xmm3;
const XMMRegister xmm_temp3 = xmm4;
const XMMRegister xmm_temp4 = xmm5;
__ enter(); // required for proper stackwalking of RuntimeStub frame
__ movptr(from, from_param);
__ movptr(key, key_param);
__ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
__ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
__ movdqu(xmm_result, Address(from, 0));
__ movptr(to, to_param);
load_key(xmm_temp1, key, 0x10, xmm_key_shuf_mask);
load_key(xmm_temp2, key, 0x20, xmm_key_shuf_mask);
load_key(xmm_temp3, key, 0x30, xmm_key_shuf_mask);
load_key(xmm_temp4, key, 0x40, xmm_key_shuf_mask);
__ pxor (xmm_result, xmm_temp1);
__ aesdec(xmm_result, xmm_temp2);
__ aesdec(xmm_result, xmm_temp3);
__ aesdec(xmm_result, xmm_temp4);
load_key(xmm_temp1, key, 0x50, xmm_key_shuf_mask);
load_key(xmm_temp2, key, 0x60, xmm_key_shuf_mask);
load_key(xmm_temp3, key, 0x70, xmm_key_shuf_mask);
load_key(xmm_temp4, key, 0x80, xmm_key_shuf_mask);
__ aesdec(xmm_result, xmm_temp1);
__ aesdec(xmm_result, xmm_temp2);
__ aesdec(xmm_result, xmm_temp3);
__ aesdec(xmm_result, xmm_temp4);
load_key(xmm_temp1, key, 0x90, xmm_key_shuf_mask);
load_key(xmm_temp2, key, 0xa0, xmm_key_shuf_mask);
load_key(xmm_temp3, key, 0x00, xmm_key_shuf_mask);
__ cmpl(keylen, 44);
__ jccb(Assembler::equal, L_doLast);
__ aesdec(xmm_result, xmm_temp1);
__ aesdec(xmm_result, xmm_temp2);
load_key(xmm_temp1, key, 0xb0, xmm_key_shuf_mask);
load_key(xmm_temp2, key, 0xc0, xmm_key_shuf_mask);
__ cmpl(keylen, 52);
__ jccb(Assembler::equal, L_doLast);
__ aesdec(xmm_result, xmm_temp1);
__ aesdec(xmm_result, xmm_temp2);
load_key(xmm_temp1, key, 0xd0, xmm_key_shuf_mask);
load_key(xmm_temp2, key, 0xe0, xmm_key_shuf_mask);
__ BIND(L_doLast);
__ aesdec(xmm_result, xmm_temp1);
__ aesdec(xmm_result, xmm_temp2);
__ aesdeclast(xmm_result, xmm_temp3);
__ movdqu(Address(to, 0), xmm_result); // store the result
__ xorptr(rax, rax); // return 0
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
return start;
}
void handleSOERegisters(bool saving) {
const int saveFrameSizeInBytes = 4 * wordSize;
const Address saved_rbx (rbp, -3 * wordSize);
const Address saved_rsi (rbp, -2 * wordSize);
const Address saved_rdi (rbp, -1 * wordSize);
if (saving) {
__ subptr(rsp, saveFrameSizeInBytes);
__ movptr(saved_rsi, rsi);
__ movptr(saved_rdi, rdi);
__ movptr(saved_rbx, rbx);
} else {
__ movptr(rsi, saved_rsi);
__ movptr(rdi, saved_rdi);
__ movptr(rbx, saved_rbx);
}
}
address generate_cipherBlockChaining_encryptAESCrypt() {
assert(UseAES, "need AES instructions and misaligned SSE support");
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_encryptAESCrypt");
address start = __ pc();
Label L_exit, L_key_192_256, L_key_256, L_loopTop_128, L_loopTop_192, L_loopTop_256;
const Register from = rsi; // source array address
const Register to = rdx; // destination array address
const Register key = rcx; // key array address
const Register rvec = rdi; // r byte array initialized from initvector array address
const Register len_reg = rbx; // src len (must be multiple of blocksize 16)
const Register pos = rax;
const XMMRegister xmm_result = xmm0;
const XMMRegister xmm_temp = xmm1;
const int XMM_REG_NUM_KEY_FIRST = 2;
const int XMM_REG_NUM_KEY_LAST = 7;
const XMMRegister xmm_key0 = as_XMMRegister(XMM_REG_NUM_KEY_FIRST);
__ enter(); // required for proper stackwalking of RuntimeStub frame
handleSOERegisters(true /*saving*/);
const Address from_param(rbp, 8+0);
const Address to_param (rbp, 8+4);
const Address key_param (rbp, 8+8);
const Address rvec_param (rbp, 8+12);
const Address len_param (rbp, 8+16);
__ movptr(from , from_param);
__ movptr(to , to_param);
__ movptr(key , key_param);
__ movptr(rvec , rvec_param);
__ movptr(len_reg , len_param);
const XMMRegister xmm_key_shuf_mask = xmm_temp; // used temporarily to swap key bytes up front
__ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
for (int rnum = XMM_REG_NUM_KEY_FIRST, offset = 0x00; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) {
load_key(as_XMMRegister(rnum), key, offset, xmm_key_shuf_mask);
offset += 0x10;
}
__ movdqu(xmm_result, Address(rvec, 0x00)); // initialize xmm_result with r vec
__ movl(rax, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
__ cmpl(rax, 44);
__ jcc(Assembler::notEqual, L_key_192_256);
__ movl(pos, 0);
__ align(OptoLoopAlignment);
__ BIND(L_loopTop_128);
__ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input
__ pxor (xmm_result, xmm_temp); // xor with the current r vector
__ pxor (xmm_result, xmm_key0); // do the aes rounds
for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) {
__ aesenc(xmm_result, as_XMMRegister(rnum));
}
for (int key_offset = 0x60; key_offset <= 0x90; key_offset += 0x10) {
aes_enc_key(xmm_result, xmm_temp, key, key_offset);
}
load_key(xmm_temp, key, 0xa0);
__ aesenclast(xmm_result, xmm_temp);
__ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output
__ addptr(pos, AESBlockSize);
__ subptr(len_reg, AESBlockSize);
__ jcc(Assembler::notEqual, L_loopTop_128);
__ BIND(L_exit);
__ movdqu(Address(rvec, 0), xmm_result); // final value of r stored in rvec of CipherBlockChaining object
handleSOERegisters(false /*restoring*/);
__ movptr(rax, len_param); // return length
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
__ BIND(L_key_192_256);
__ cmpl(rax, 52);
__ jcc(Assembler::notEqual, L_key_256);
__ movl(pos, 0);
__ align(OptoLoopAlignment);
__ BIND(L_loopTop_192);
__ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input
__ pxor (xmm_result, xmm_temp); // xor with the current r vector
__ pxor (xmm_result, xmm_key0); // do the aes rounds
for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) {
__ aesenc(xmm_result, as_XMMRegister(rnum));
}
for (int key_offset = 0x60; key_offset <= 0xb0; key_offset += 0x10) {
aes_enc_key(xmm_result, xmm_temp, key, key_offset);
}
load_key(xmm_temp, key, 0xc0);
__ aesenclast(xmm_result, xmm_temp);
__ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output
__ addptr(pos, AESBlockSize);
__ subptr(len_reg, AESBlockSize);
__ jcc(Assembler::notEqual, L_loopTop_192);
__ jmp(L_exit);
__ BIND(L_key_256);
__ movl(pos, 0);
__ align(OptoLoopAlignment);
__ BIND(L_loopTop_256);
__ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input
__ pxor (xmm_result, xmm_temp); // xor with the current r vector
__ pxor (xmm_result, xmm_key0); // do the aes rounds
for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) {
__ aesenc(xmm_result, as_XMMRegister(rnum));
}
for (int key_offset = 0x60; key_offset <= 0xd0; key_offset += 0x10) {
aes_enc_key(xmm_result, xmm_temp, key, key_offset);
}
load_key(xmm_temp, key, 0xe0);
__ aesenclast(xmm_result, xmm_temp);
__ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output
__ addptr(pos, AESBlockSize);
__ subptr(len_reg, AESBlockSize);
__ jcc(Assembler::notEqual, L_loopTop_256);
__ jmp(L_exit);
return start;
}
address generate_cipherBlockChaining_decryptAESCrypt() {
assert(UseAES, "need AES instructions and misaligned SSE support");
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_decryptAESCrypt");
address start = __ pc();
Label L_exit, L_key_192_256, L_key_256;
Label L_singleBlock_loopTop_128;
Label L_singleBlock_loopTop_192, L_singleBlock_loopTop_256;
const Register from = rsi; // source array address
const Register to = rdx; // destination array address
const Register key = rcx; // key array address
const Register rvec = rdi; // r byte array initialized from initvector array address
const Register len_reg = rbx; // src len (must be multiple of blocksize 16)
const Register pos = rax;
const XMMRegister xmm_result = xmm0;
const XMMRegister xmm_temp = xmm1;
const int XMM_REG_NUM_KEY_FIRST = 2;
const int XMM_REG_NUM_KEY_LAST = 7;
const int FIRST_NON_REG_KEY_offset = 0x70;
const XMMRegister xmm_key_first = as_XMMRegister(XMM_REG_NUM_KEY_FIRST);
__ enter(); // required for proper stackwalking of RuntimeStub frame
handleSOERegisters(true /*saving*/);
const Address from_param(rbp, 8+0);
const Address to_param (rbp, 8+4);
const Address key_param (rbp, 8+8);
const Address rvec_param (rbp, 8+12);
const Address len_param (rbp, 8+16);
__ movptr(from , from_param);
__ movptr(to , to_param);
__ movptr(key , key_param);
__ movptr(rvec , rvec_param);
__ movptr(len_reg , len_param);
const XMMRegister xmm_key_shuf_mask = xmm1; // used temporarily to swap key bytes up front
__ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
for (int rnum = XMM_REG_NUM_KEY_FIRST, offset = 0x10; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) {
load_key(as_XMMRegister(rnum), key, offset, xmm_key_shuf_mask);
offset += 0x10;
}
const Register prev_block_cipher_ptr = rvec;
__ movl(rax, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
__ cmpl(rax, 44);
__ jcc(Assembler::notEqual, L_key_192_256);
__ movl(pos, 0);
__ align(OptoLoopAlignment);
__ BIND(L_singleBlock_loopTop_128);
__ cmpptr(len_reg, 0); // any blocks left??
__ jcc(Assembler::equal, L_exit);
__ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input
__ pxor (xmm_result, xmm_key_first); // do the aes dec rounds
for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) {
__ aesdec(xmm_result, as_XMMRegister(rnum));
}
for (int key_offset = FIRST_NON_REG_KEY_offset; key_offset <= 0xa0; key_offset += 0x10) { // 128-bit runs up to key offset a0
aes_dec_key(xmm_result, xmm_temp, key, key_offset);
}
load_key(xmm_temp, key, 0x00); // final key is stored in java expanded array at offset 0
__ aesdeclast(xmm_result, xmm_temp);
__ movdqu(xmm_temp, Address(prev_block_cipher_ptr, 0x00));
__ pxor (xmm_result, xmm_temp); // xor with the current r vector
__ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output
__ lea(prev_block_cipher_ptr, Address(from, pos, Address::times_1, 0)); // set up new ptr
__ addptr(pos, AESBlockSize);
__ subptr(len_reg, AESBlockSize);
__ jmp(L_singleBlock_loopTop_128);
__ BIND(L_exit);
__ movdqu(xmm_temp, Address(prev_block_cipher_ptr, 0x00));
__ movptr(rvec , rvec_param); // restore this since used in loop
__ movdqu(Address(rvec, 0), xmm_temp); // final value of r stored in rvec of CipherBlockChaining object
handleSOERegisters(false /*restoring*/);
__ movptr(rax, len_param); // return length
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
__ BIND(L_key_192_256);
__ cmpl(rax, 52);
__ jcc(Assembler::notEqual, L_key_256);
__ movl(pos, 0);
__ align(OptoLoopAlignment);
__ BIND(L_singleBlock_loopTop_192);
__ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input
__ pxor (xmm_result, xmm_key_first); // do the aes dec rounds
for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) {
__ aesdec(xmm_result, as_XMMRegister(rnum));
}
for (int key_offset = FIRST_NON_REG_KEY_offset; key_offset <= 0xc0; key_offset += 0x10) { // 192-bit runs up to key offset c0
aes_dec_key(xmm_result, xmm_temp, key, key_offset);
}
load_key(xmm_temp, key, 0x00); // final key is stored in java expanded array at offset 0
__ aesdeclast(xmm_result, xmm_temp);
__ movdqu(xmm_temp, Address(prev_block_cipher_ptr, 0x00));
__ pxor (xmm_result, xmm_temp); // xor with the current r vector
__ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output
__ lea(prev_block_cipher_ptr, Address(from, pos, Address::times_1, 0)); // set up new ptr
__ addptr(pos, AESBlockSize);
__ subptr(len_reg, AESBlockSize);
__ jcc(Assembler::notEqual,L_singleBlock_loopTop_192);
__ jmp(L_exit);
__ BIND(L_key_256);
__ movl(pos, 0);
__ align(OptoLoopAlignment);
__ BIND(L_singleBlock_loopTop_256);
__ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input
__ pxor (xmm_result, xmm_key_first); // do the aes dec rounds
for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) {
__ aesdec(xmm_result, as_XMMRegister(rnum));
}
for (int key_offset = FIRST_NON_REG_KEY_offset; key_offset <= 0xe0; key_offset += 0x10) { // 256-bit runs up to key offset e0
aes_dec_key(xmm_result, xmm_temp, key, key_offset);
}
load_key(xmm_temp, key, 0x00); // final key is stored in java expanded array at offset 0
__ aesdeclast(xmm_result, xmm_temp);
__ movdqu(xmm_temp, Address(prev_block_cipher_ptr, 0x00));
__ pxor (xmm_result, xmm_temp); // xor with the current r vector
__ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output
__ lea(prev_block_cipher_ptr, Address(from, pos, Address::times_1, 0)); // set up new ptr
__ addptr(pos, AESBlockSize);
__ subptr(len_reg, AESBlockSize);
__ jcc(Assembler::notEqual,L_singleBlock_loopTop_256);
__ jmp(L_exit);
return start;
}
address generate_ghash_long_swap_mask() {
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", "ghash_long_swap_mask");
address start = __ pc();
__ emit_data(0x0b0a0908, relocInfo::none, 0);
__ emit_data(0x0f0e0d0c, relocInfo::none, 0);
__ emit_data(0x03020100, relocInfo::none, 0);
__ emit_data(0x07060504, relocInfo::none, 0);
return start;
}
address generate_ghash_byte_swap_mask() {
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", "ghash_byte_swap_mask");
address start = __ pc();
__ emit_data(0x0c0d0e0f, relocInfo::none, 0);
__ emit_data(0x08090a0b, relocInfo::none, 0);
__ emit_data(0x04050607, relocInfo::none, 0);
__ emit_data(0x00010203, relocInfo::none, 0);
return start;
}
address generate_ghash_processBlocks() {
assert(UseGHASHIntrinsics, "need GHASH intrinsics and CLMUL support");
__ align(CodeEntryAlignment);
Label L_ghash_loop, L_exit;
StubCodeMark mark(this, "StubRoutines", "ghash_processBlocks");
address start = __ pc();
const Register state = rdi;
const Register subkeyH = rsi;
const Register data = rdx;
const Register blocks = rcx;
const Address state_param(rbp, 8+0);
const Address subkeyH_param(rbp, 8+4);
const Address data_param(rbp, 8+8);
const Address blocks_param(rbp, 8+12);
const XMMRegister xmm_temp0 = xmm0;
const XMMRegister xmm_temp1 = xmm1;
const XMMRegister xmm_temp2 = xmm2;
const XMMRegister xmm_temp3 = xmm3;
const XMMRegister xmm_temp4 = xmm4;
const XMMRegister xmm_temp5 = xmm5;
const XMMRegister xmm_temp6 = xmm6;
const XMMRegister xmm_temp7 = xmm7;
__ enter();
handleSOERegisters(true); // Save registers
__ movptr(state, state_param);
__ movptr(subkeyH, subkeyH_param);
__ movptr(data, data_param);
__ movptr(blocks, blocks_param);
__ movdqu(xmm_temp0, Address(state, 0));
__ pshufb(xmm_temp0, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr()));
__ movdqu(xmm_temp1, Address(subkeyH, 0));
__ pshufb(xmm_temp1, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr()));
__ BIND(L_ghash_loop);
__ movdqu(xmm_temp2, Address(data, 0));
__ pshufb(xmm_temp2, ExternalAddress(StubRoutines::x86::ghash_byte_swap_mask_addr()));
__ pxor(xmm_temp0, xmm_temp2);
__ movdqu(xmm_temp3, xmm_temp0);
__ pclmulqdq(xmm_temp3, xmm_temp1, 0); // xmm3 holds a0*b0
__ movdqu(xmm_temp4, xmm_temp0);
__ pclmulqdq(xmm_temp4, xmm_temp1, 16); // xmm4 holds a0*b1
__ movdqu(xmm_temp5, xmm_temp0);
__ pclmulqdq(xmm_temp5, xmm_temp1, 1); // xmm5 holds a1*b0
__ movdqu(xmm_temp6, xmm_temp0);
__ pclmulqdq(xmm_temp6, xmm_temp1, 17); // xmm6 holds a1*b1
__ pxor(xmm_temp4, xmm_temp5); // xmm4 holds a0*b1 + a1*b0
__ movdqu(xmm_temp5, xmm_temp4); // move the contents of xmm4 to xmm5
__ psrldq(xmm_temp4, 8); // shift by xmm4 64 bits to the right
__ pslldq(xmm_temp5, 8); // shift by xmm5 64 bits to the left
__ pxor(xmm_temp3, xmm_temp5);
__ pxor(xmm_temp6, xmm_temp4); // Register pair <xmm6:xmm3> holds the result
__ movdqu(xmm_temp7, xmm_temp3);
__ movdqu(xmm_temp4, xmm_temp6);
__ pslld (xmm_temp3, 1);
__ pslld(xmm_temp6, 1);
__ psrld(xmm_temp7, 31);
__ psrld(xmm_temp4, 31);
__ movdqu(xmm_temp5, xmm_temp7);
__ pslldq(xmm_temp4, 4);
__ pslldq(xmm_temp7, 4);
__ psrldq(xmm_temp5, 12);
__ por(xmm_temp3, xmm_temp7);
__ por(xmm_temp6, xmm_temp4);
__ por(xmm_temp6, xmm_temp5);
__ movdqu(xmm_temp7, xmm_temp3);
__ movdqu(xmm_temp4, xmm_temp3);
__ movdqu(xmm_temp5, xmm_temp3);
__ pslld(xmm_temp7, 31); // packed right shift shifting << 31
__ pslld(xmm_temp4, 30); // packed right shift shifting << 30
__ pslld(xmm_temp5, 25); // packed right shift shifting << 25
__ pxor(xmm_temp7, xmm_temp4); // xor the shifted versions
__ pxor(xmm_temp7, xmm_temp5);
__ movdqu(xmm_temp4, xmm_temp7);
__ pslldq(xmm_temp7, 12);
__ psrldq(xmm_temp4, 4);
__ pxor(xmm_temp3, xmm_temp7); // first phase of the reduction complete
__ movdqu(xmm_temp2, xmm_temp3);
__ movdqu(xmm_temp7, xmm_temp3);
__ movdqu(xmm_temp5, xmm_temp3);
__ psrld(xmm_temp2, 1); // packed left shifting >> 1
__ psrld(xmm_temp7, 2); // packed left shifting >> 2
__ psrld(xmm_temp5, 7); // packed left shifting >> 7
__ pxor(xmm_temp2, xmm_temp7); // xor the shifted versions
__ pxor(xmm_temp2, xmm_temp5);
__ pxor(xmm_temp2, xmm_temp4);
__ pxor(xmm_temp3, xmm_temp2);
__ pxor(xmm_temp6, xmm_temp3); // the result is in xmm6
__ decrement(blocks);
__ jcc(Assembler::zero, L_exit);
__ movdqu(xmm_temp0, xmm_temp6);
__ addptr(data, 16);
__ jmp(L_ghash_loop);
__ BIND(L_exit);
__ pshufb(xmm_temp6, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr()));
__ movdqu(Address(state, 0), xmm_temp6); // store the result
handleSOERegisters(false); // restore registers
__ leave();
__ ret(0);
return start;
}
address generate_updateBytesCRC32() {
assert(UseCRC32Intrinsics, "need AVX and CLMUL instructions");
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", "updateBytesCRC32");
address start = __ pc();
const Register crc = rdx; // crc
const Register buf = rsi; // source java byte array address
const Register len = rcx; // length
const Register table = rdi; // crc_table address (reuse register)
const Register tmp = rbx;
assert_different_registers(crc, buf, len, table, tmp, rax);
BLOCK_COMMENT("Entry:");
__ enter(); // required for proper stackwalking of RuntimeStub frame
__ push(rsi);
__ push(rdi);
__ push(rbx);
Address crc_arg(rbp, 8 + 0);
Address buf_arg(rbp, 8 + 4);
Address len_arg(rbp, 8 + 8);
__ movl(crc, crc_arg);
__ movptr(buf, buf_arg);
__ movl(len, len_arg);
__ kernel_crc32(crc, buf, len, table, tmp);
__ movl(rax, crc);
__ pop(rbx);
__ pop(rdi);
__ pop(rsi);
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
return start;
}
void generate_safefetch(const char* name, int size, address* entry,
address* fault_pc, address* continuation_pc) {
StubCodeMark mark(this, "StubRoutines", name);
__ movl(rax, Address(rsp, 0x8));
__ movl(rcx, Address(rsp, 0x4));
switch (size) {
case 4:
__ movl(rax, Address(rcx, 0));
break;
case 8:
Unimplemented();
break;
default:
ShouldNotReachHere();
}
__ ret(0);
}
public:
enum layout {
thread_off, // last_java_sp
arg1_off,
arg2_off,
rbp_off, // callee saved register
ret_pc,
framesize
};
private:
#undef __
#define __ masm->
address generate_throw_exception(const char* name, address runtime_entry,
Register arg1 = noreg, Register arg2 = noreg) {
int insts_size = 256;
int locs_size = 32;
CodeBuffer code(name, insts_size, locs_size);
OopMapSet* oop_maps = new OopMapSet();
MacroAssembler* masm = new MacroAssembler(&code);
address start = __ pc();
Register java_thread = rbx;
__ get_thread(java_thread);
__ enter(); // required for proper stackwalking of RuntimeStub frame
__ subptr(rsp, (framesize-2) * wordSize); // prolog
int frame_complete = __ pc() - start;
__ movptr(Address(rsp, thread_off * wordSize), java_thread);
if (arg1 != noreg) {
__ movptr(Address(rsp, arg1_off * wordSize), arg1);
}
if (arg2 != noreg) {
assert(arg1 != noreg, "missing reg arg");
__ movptr(Address(rsp, arg2_off * wordSize), arg2);
}
__ set_last_Java_frame(java_thread, rsp, rbp, NULL);
BLOCK_COMMENT("call runtime_entry");
__ call(RuntimeAddress(runtime_entry));
OopMap* map = new OopMap(framesize, 0);
oop_maps->add_gc_map(__ pc() - start, map);
__ get_thread(java_thread);
__ reset_last_Java_frame(java_thread, true);
__ leave(); // required for proper stackwalking of RuntimeStub frame
#ifdef ASSERT
Label L;
__ cmpptr(Address(java_thread, Thread::pending_exception_offset()), (int32_t)NULL_WORD);
__ jcc(Assembler::notEqual, L);
__ should_not_reach_here();
__ bind(L);
#endif /* ASSERT */
__ jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
RuntimeStub* stub = RuntimeStub::new_runtime_stub(name, &code, frame_complete, framesize, oop_maps, false);
return stub->entry_point();
}
void create_control_words() {
StubRoutines::_fpu_cntrl_wrd_std = 0x027F;
StubRoutines::_fpu_cntrl_wrd_trunc = 0x0D7F;
StubRoutines::_fpu_cntrl_wrd_24 = 0x007F;
StubRoutines::_fpu_cntrl_wrd_64 = 0x037F;
StubRoutines::_mxcsr_std = 0x1F80;
StubRoutines::_fpu_subnormal_bias1[0]= 0x00000000; // 2^(-15360) == 0x03ff 8000 0000 0000 0000
StubRoutines::_fpu_subnormal_bias1[1]= 0x80000000;
StubRoutines::_fpu_subnormal_bias1[2]= 0x03ff;
StubRoutines::_fpu_subnormal_bias2[0]= 0x00000000; // 2^(+15360) == 0x7bff 8000 0000 0000 0000
StubRoutines::_fpu_subnormal_bias2[1]= 0x80000000;
StubRoutines::_fpu_subnormal_bias2[2]= 0x7bff;
}
void generate_initial() {
StubRoutines::_forward_exception_entry = generate_forward_exception();
StubRoutines::_call_stub_entry =
generate_call_stub(StubRoutines::_call_stub_return_address);
StubRoutines::_catch_exception_entry = generate_catch_exception();
StubRoutines::_atomic_xchg_entry = generate_atomic_xchg();
StubRoutines::_handler_for_unsafe_access_entry =
generate_handler_for_unsafe_access();
create_control_words();
StubRoutines::x86::_verify_mxcsr_entry = generate_verify_mxcsr();
StubRoutines::x86::_verify_fpu_cntrl_wrd_entry = generate_verify_fpu_cntrl_wrd();
StubRoutines::_d2i_wrapper = generate_d2i_wrapper(T_INT,
CAST_FROM_FN_PTR(address, SharedRuntime::d2i));
StubRoutines::_d2l_wrapper = generate_d2i_wrapper(T_LONG,
CAST_FROM_FN_PTR(address, SharedRuntime::d2l));
StubRoutines::_throw_StackOverflowError_entry = generate_throw_exception("StackOverflowError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_StackOverflowError));
if (UseCRC32Intrinsics) {
StubRoutines::_crc_table_adr = (address)StubRoutines::x86::_crc_table;
StubRoutines::_updateBytesCRC32 = generate_updateBytesCRC32();
}
}
void generate_all() {
StubRoutines::_throw_AbstractMethodError_entry = generate_throw_exception("AbstractMethodError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_AbstractMethodError));
StubRoutines::_throw_IncompatibleClassChangeError_entry= generate_throw_exception("IncompatibleClassChangeError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_IncompatibleClassChangeError));
StubRoutines::_throw_NullPointerException_at_call_entry= generate_throw_exception("NullPointerException at call throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_NullPointerException_at_call));
StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop();
generate_arraycopy_stubs();
generate_math_stubs();
if (UseAESIntrinsics) {
StubRoutines::x86::_key_shuffle_mask_addr = generate_key_shuffle_mask(); // might be needed by the others
StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock();
StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock();
StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_encryptAESCrypt();
StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt();
}
if (UseGHASHIntrinsics) {
StubRoutines::x86::_ghash_long_swap_mask_addr = generate_ghash_long_swap_mask();
StubRoutines::x86::_ghash_byte_swap_mask_addr = generate_ghash_byte_swap_mask();
StubRoutines::_ghash_processBlocks = generate_ghash_processBlocks();
}
generate_safefetch("SafeFetch32", sizeof(int), &StubRoutines::_safefetch32_entry,
&StubRoutines::_safefetch32_fault_pc,
&StubRoutines::_safefetch32_continuation_pc);
StubRoutines::_safefetchN_entry = StubRoutines::_safefetch32_entry;
StubRoutines::_safefetchN_fault_pc = StubRoutines::_safefetch32_fault_pc;
StubRoutines::_safefetchN_continuation_pc = StubRoutines::_safefetch32_continuation_pc;
}
public:
StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) {
if (all) {
generate_all();
} else {
generate_initial();
}
}
}; // end class declaration
void StubGenerator_generate(CodeBuffer* code, bool all) {
StubGenerator g(code, all);
}
C:\hotspot-69087d08d473\src\cpu\x86\vm/stubGenerator_x86_64.cpp
#include "precompiled.hpp"
#include "asm/macroAssembler.hpp"
#include "asm/macroAssembler.inline.hpp"
#include "interpreter/interpreter.hpp"
#include "nativeInst_x86.hpp"
#include "oops/instanceOop.hpp"
#include "oops/method.hpp"
#include "oops/objArrayKlass.hpp"
#include "oops/oop.inline.hpp"
#include "prims/methodHandles.hpp"
#include "runtime/frame.inline.hpp"
#include "runtime/handles.inline.hpp"
#include "runtime/sharedRuntime.hpp"
#include "runtime/stubCodeGenerator.hpp"
#include "runtime/stubRoutines.hpp"
#include "runtime/thread.inline.hpp"
#include "utilities/top.hpp"
#ifdef COMPILER2
#include "opto/runtime.hpp"
#endif
#define __ _masm->
#define TIMES_OOP (UseCompressedOops ? Address::times_4 : Address::times_8)
#define a__ ((Assembler*)_masm)->
#ifdef PRODUCT
#define BLOCK_COMMENT(str) /* nothing */
#else
#define BLOCK_COMMENT(str) __ block_comment(str)
#endif
#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
const int MXCSR_MASK = 0xFFC0; // Mask out any pending exceptions
static address handle_unsafe_access() {
JavaThread* thread = JavaThread::current();
address pc = thread->saved_exception_pc();
address npc = Assembler::locate_next_instruction(pc);
thread->set_pending_unsafe_access_error();
return npc;
}
class StubGenerator: public StubCodeGenerator {
private:
#ifdef PRODUCT
#define inc_counter_np(counter) ((void)0)
#else
void inc_counter_np_(int& counter) {
__ incrementl(ExternalAddress((address)&counter));
}
#define inc_counter_np(counter) \
BLOCK_COMMENT("inc_counter " #counter); \
inc_counter_np_(counter);
#endif
enum call_stub_layout {
#ifdef _WIN64
xmm_save_first = 6, // save from xmm6
xmm_save_last = 15, // to xmm15
xmm_save_base = -9,
rsp_after_call_off = xmm_save_base - 2 * (xmm_save_last - xmm_save_first), // -27
r15_off = -7,
r14_off = -6,
r13_off = -5,
r12_off = -4,
rdi_off = -3,
rsi_off = -2,
rbx_off = -1,
rbp_off = 0,
retaddr_off = 1,
call_wrapper_off = 2,
result_off = 3,
result_type_off = 4,
method_off = 5,
entry_point_off = 6,
parameters_off = 7,
parameter_size_off = 8,
thread_off = 9
#else
rsp_after_call_off = -12,
mxcsr_off = rsp_after_call_off,
r15_off = -11,
r14_off = -10,
r13_off = -9,
r12_off = -8,
rbx_off = -7,
call_wrapper_off = -6,
result_off = -5,
result_type_off = -4,
method_off = -3,
entry_point_off = -2,
parameters_off = -1,
rbp_off = 0,
retaddr_off = 1,
parameter_size_off = 2,
thread_off = 3
#endif
};
#ifdef _WIN64
Address xmm_save(int reg) {
assert(reg >= xmm_save_first && reg <= xmm_save_last, "XMM register number out of range");
return Address(rbp, (xmm_save_base - (reg - xmm_save_first) * 2) * wordSize);
}
#endif
address generate_call_stub(address& return_address) {
assert((int)frame::entry_frame_after_call_words == -(int)rsp_after_call_off + 1 &&
(int)frame::entry_frame_call_wrapper_offset == (int)call_wrapper_off,
"adjust this code");
StubCodeMark mark(this, "StubRoutines", "call_stub");
address start = __ pc();
const Address rsp_after_call(rbp, rsp_after_call_off * wordSize);
const Address call_wrapper (rbp, call_wrapper_off * wordSize);
const Address result (rbp, result_off * wordSize);
const Address result_type (rbp, result_type_off * wordSize);
const Address method (rbp, method_off * wordSize);
const Address entry_point (rbp, entry_point_off * wordSize);
const Address parameters (rbp, parameters_off * wordSize);
const Address parameter_size(rbp, parameter_size_off * wordSize);
const Address thread (rbp, thread_off * wordSize);
const Address r15_save(rbp, r15_off * wordSize);
const Address r14_save(rbp, r14_off * wordSize);
const Address r13_save(rbp, r13_off * wordSize);
const Address r12_save(rbp, r12_off * wordSize);
const Address rbx_save(rbp, rbx_off * wordSize);
__ enter();
__ subptr(rsp, -rsp_after_call_off * wordSize);
#ifndef _WIN64
__ movptr(parameters, c_rarg5); // parameters
__ movptr(entry_point, c_rarg4); // entry_point
#endif
__ movptr(method, c_rarg3); // method
__ movl(result_type, c_rarg2); // result type
__ movptr(result, c_rarg1); // result
__ movptr(call_wrapper, c_rarg0); // call wrapper
__ movptr(rbx_save, rbx);
__ movptr(r12_save, r12);
__ movptr(r13_save, r13);
__ movptr(r14_save, r14);
__ movptr(r15_save, r15);
#ifdef _WIN64
for (int i = 6; i <= 15; i++) {
__ movdqu(xmm_save(i), as_XMMRegister(i));
}
const Address rdi_save(rbp, rdi_off * wordSize);
const Address rsi_save(rbp, rsi_off * wordSize);
__ movptr(rsi_save, rsi);
__ movptr(rdi_save, rdi);
#else
const Address mxcsr_save(rbp, mxcsr_off * wordSize);
{
Label skip_ldmx;
__ stmxcsr(mxcsr_save);
__ movl(rax, mxcsr_save);
__ andl(rax, MXCSR_MASK); // Only check control and mask bits
ExternalAddress mxcsr_std(StubRoutines::addr_mxcsr_std());
__ cmp32(rax, mxcsr_std);
__ jcc(Assembler::equal, skip_ldmx);
__ ldmxcsr(mxcsr_std);
__ bind(skip_ldmx);
}
#endif
__ movptr(r15_thread, thread);
__ reinit_heapbase();
#ifdef ASSERT
{
Label L;
__ cmpptr(Address(r15_thread, Thread::pending_exception_offset()), (int32_t)NULL_WORD);
__ jcc(Assembler::equal, L);
__ stop("StubRoutines::call_stub: entered with pending exception");
__ bind(L);
}
#endif
BLOCK_COMMENT("pass parameters if any");
Label parameters_done;
__ movl(c_rarg3, parameter_size);
__ testl(c_rarg3, c_rarg3);
__ jcc(Assembler::zero, parameters_done);
Label loop;
__ movptr(c_rarg2, parameters); // parameter pointer
__ movl(c_rarg1, c_rarg3); // parameter counter is in c_rarg1
__ BIND(loop);
__ movptr(rax, Address(c_rarg2, 0));// get parameter
__ addptr(c_rarg2, wordSize); // advance to next parameter
__ decrementl(c_rarg1); // decrement counter
__ push(rax); // pass parameter
__ jcc(Assembler::notZero, loop);
__ BIND(parameters_done);
__ movptr(rbx, method); // get Method*
__ movptr(c_rarg1, entry_point); // get entry_point
__ mov(r13, rsp); // set sender sp
BLOCK_COMMENT("call Java function");
__ call(c_rarg1);
BLOCK_COMMENT("call_stub_return_address:");
return_address = __ pc();
__ movptr(c_rarg0, result);
Label is_long, is_float, is_double, exit;
__ movl(c_rarg1, result_type);
__ cmpl(c_rarg1, T_OBJECT);
__ jcc(Assembler::equal, is_long);
__ cmpl(c_rarg1, T_LONG);
__ jcc(Assembler::equal, is_long);
__ cmpl(c_rarg1, T_FLOAT);
__ jcc(Assembler::equal, is_float);
__ cmpl(c_rarg1, T_DOUBLE);
__ jcc(Assembler::equal, is_double);
__ movl(Address(c_rarg0, 0), rax);
__ BIND(exit);
__ lea(rsp, rsp_after_call);
#ifdef ASSERT
{
Label L, S;
__ cmpptr(r15_thread, thread);
__ jcc(Assembler::notEqual, S);
__ get_thread(rbx);
__ cmpptr(r15_thread, rbx);
__ jcc(Assembler::equal, L);
__ bind(S);
__ jcc(Assembler::equal, L);
__ stop("StubRoutines::call_stub: threads must correspond");
__ bind(L);
}
#endif
#ifdef _WIN64
for (int i = 15; i >= 6; i--) {
__ movdqu(as_XMMRegister(i), xmm_save(i));
}
#endif
__ movptr(r15, r15_save);
__ movptr(r14, r14_save);
__ movptr(r13, r13_save);
__ movptr(r12, r12_save);
__ movptr(rbx, rbx_save);
#ifdef _WIN64
__ movptr(rdi, rdi_save);
__ movptr(rsi, rsi_save);
#else
__ ldmxcsr(mxcsr_save);
#endif
__ addptr(rsp, -rsp_after_call_off * wordSize);
__ pop(rbp);
__ ret(0);
__ BIND(is_long);
__ movq(Address(c_rarg0, 0), rax);
__ jmp(exit);
__ BIND(is_float);
__ movflt(Address(c_rarg0, 0), xmm0);
__ jmp(exit);
__ BIND(is_double);
__ movdbl(Address(c_rarg0, 0), xmm0);
__ jmp(exit);
return start;
}
address generate_catch_exception() {
StubCodeMark mark(this, "StubRoutines", "catch_exception");
address start = __ pc();
const Address rsp_after_call(rbp, rsp_after_call_off * wordSize);
const Address thread (rbp, thread_off * wordSize);
#ifdef ASSERT
{
Label L, S;
__ cmpptr(r15_thread, thread);
__ jcc(Assembler::notEqual, S);
__ get_thread(rbx);
__ cmpptr(r15_thread, rbx);
__ jcc(Assembler::equal, L);
__ bind(S);
__ stop("StubRoutines::catch_exception: threads must correspond");
__ bind(L);
}
#endif
__ verify_oop(rax);
__ movptr(Address(r15_thread, Thread::pending_exception_offset()), rax);
__ lea(rscratch1, ExternalAddress((address)__FILE__));
__ movptr(Address(r15_thread, Thread::exception_file_offset()), rscratch1);
__ movl(Address(r15_thread, Thread::exception_line_offset()), (int) __LINE__);
assert(StubRoutines::_call_stub_return_address != NULL,
"_call_stub_return_address must have been generated before");
__ jump(RuntimeAddress(StubRoutines::_call_stub_return_address));
return start;
}
address generate_forward_exception() {
StubCodeMark mark(this, "StubRoutines", "forward exception");
address start = __ pc();
#ifdef ASSERT
{
Label L;
__ cmpptr(Address(r15_thread, Thread::pending_exception_offset()), (int32_t) NULL);
__ jcc(Assembler::notEqual, L);
__ stop("StubRoutines::forward exception: no pending exception (1)");
__ bind(L);
}
#endif
__ movptr(c_rarg0, Address(rsp, 0));
BLOCK_COMMENT("call exception_handler_for_return_address");
__ call_VM_leaf(CAST_FROM_FN_PTR(address,
SharedRuntime::exception_handler_for_return_address),
r15_thread, c_rarg0);
__ mov(rbx, rax);
__ pop(rdx);
__ movptr(rax, Address(r15_thread, Thread::pending_exception_offset()));
__ movptr(Address(r15_thread, Thread::pending_exception_offset()), (int32_t)NULL_WORD);
#ifdef ASSERT
{
Label L;
__ testptr(rax, rax);
__ jcc(Assembler::notEqual, L);
__ stop("StubRoutines::forward exception: no pending exception (2)");
__ bind(L);
}
#endif
__ verify_oop(rax);
__ jmp(rbx);
return start;
}
address generate_atomic_xchg() {
StubCodeMark mark(this, "StubRoutines", "atomic_xchg");
address start = __ pc();
__ movl(rax, c_rarg0); // Copy to eax we need a return value anyhow
__ xchgl(rax, Address(c_rarg1, 0)); // automatic LOCK
__ ret(0);
return start;
}
address generate_atomic_xchg_ptr() {
StubCodeMark mark(this, "StubRoutines", "atomic_xchg_ptr");
address start = __ pc();
__ movptr(rax, c_rarg0); // Copy to eax we need a return value anyhow
__ xchgptr(rax, Address(c_rarg1, 0)); // automatic LOCK
__ ret(0);
return start;
}
address generate_atomic_cmpxchg() {
StubCodeMark mark(this, "StubRoutines", "atomic_cmpxchg");
address start = __ pc();
__ movl(rax, c_rarg2);
if ( os::is_MP() ) __ lock();
__ cmpxchgl(c_rarg0, Address(c_rarg1, 0));
__ ret(0);
return start;
}
address generate_atomic_cmpxchg_long() {
StubCodeMark mark(this, "StubRoutines", "atomic_cmpxchg_long");
address start = __ pc();
__ movq(rax, c_rarg2);
if ( os::is_MP() ) __ lock();
__ cmpxchgq(c_rarg0, Address(c_rarg1, 0));
__ ret(0);
return start;
}
address generate_atomic_add() {
StubCodeMark mark(this, "StubRoutines", "atomic_add");
address start = __ pc();
__ movl(rax, c_rarg0);
if ( os::is_MP() ) __ lock();
__ xaddl(Address(c_rarg1, 0), c_rarg0);
__ addl(rax, c_rarg0);
__ ret(0);
return start;
}
address generate_atomic_add_ptr() {
StubCodeMark mark(this, "StubRoutines", "atomic_add_ptr");
address start = __ pc();
__ movptr(rax, c_rarg0); // Copy to eax we need a return value anyhow
if ( os::is_MP() ) __ lock();
__ xaddptr(Address(c_rarg1, 0), c_rarg0);
__ addptr(rax, c_rarg0);
__ ret(0);
return start;
}
address generate_orderaccess_fence() {
StubCodeMark mark(this, "StubRoutines", "orderaccess_fence");
address start = __ pc();
__ membar(Assembler::StoreLoad);
__ ret(0);
return start;
}
address generate_get_previous_fp() {
StubCodeMark mark(this, "StubRoutines", "get_previous_fp");
const Address old_fp(rbp, 0);
const Address older_fp(rax, 0);
address start = __ pc();
__ enter();
__ movptr(rax, old_fp); // callers fp
__ movptr(rax, older_fp); // the frame for ps()
__ pop(rbp);
__ ret(0);
return start;
}
address generate_get_previous_sp() {
StubCodeMark mark(this, "StubRoutines", "get_previous_sp");
address start = __ pc();
__ movptr(rax, rsp);
__ addptr(rax, 8); // return address is at the top of the stack.
__ ret(0);
return start;
}
address generate_verify_mxcsr() {
StubCodeMark mark(this, "StubRoutines", "verify_mxcsr");
address start = __ pc();
const Address mxcsr_save(rsp, 0);
if (CheckJNICalls) {
Label ok_ret;
ExternalAddress mxcsr_std(StubRoutines::addr_mxcsr_std());
__ push(rax);
__ subptr(rsp, wordSize); // allocate a temp location
__ stmxcsr(mxcsr_save);
__ movl(rax, mxcsr_save);
__ andl(rax, MXCSR_MASK); // Only check control and mask bits
__ cmp32(rax, mxcsr_std);
__ jcc(Assembler::equal, ok_ret);
__ warn("MXCSR changed by native JNI code, use -XX:+RestoreMXCSROnJNICall");
__ ldmxcsr(mxcsr_std);
__ bind(ok_ret);
__ addptr(rsp, wordSize);
__ pop(rax);
}
__ ret(0);
return start;
}
address generate_f2i_fixup() {
StubCodeMark mark(this, "StubRoutines", "f2i_fixup");
Address inout(rsp, 5 * wordSize); // return address + 4 saves
address start = __ pc();
Label L;
__ push(rax);
__ push(c_rarg3);
__ push(c_rarg2);
__ push(c_rarg1);
__ movl(rax, 0x7f800000);
__ xorl(c_rarg3, c_rarg3);
__ movl(c_rarg2, inout);
__ movl(c_rarg1, c_rarg2);
__ andl(c_rarg1, 0x7fffffff);
__ cmpl(rax, c_rarg1); // NaN? -> 0
__ jcc(Assembler::negative, L);
__ testl(c_rarg2, c_rarg2); // signed ? min_jint : max_jint
__ movl(c_rarg3, 0x80000000);
__ movl(rax, 0x7fffffff);
__ cmovl(Assembler::positive, c_rarg3, rax);
__ bind(L);
__ movptr(inout, c_rarg3);
__ pop(c_rarg1);
__ pop(c_rarg2);
__ pop(c_rarg3);
__ pop(rax);
__ ret(0);
return start;
}
address generate_f2l_fixup() {
StubCodeMark mark(this, "StubRoutines", "f2l_fixup");
Address inout(rsp, 5 * wordSize); // return address + 4 saves
address start = __ pc();
Label L;
__ push(rax);
__ push(c_rarg3);
__ push(c_rarg2);
__ push(c_rarg1);
__ movl(rax, 0x7f800000);
__ xorl(c_rarg3, c_rarg3);
__ movl(c_rarg2, inout);
__ movl(c_rarg1, c_rarg2);
__ andl(c_rarg1, 0x7fffffff);
__ cmpl(rax, c_rarg1); // NaN? -> 0
__ jcc(Assembler::negative, L);
__ testl(c_rarg2, c_rarg2); // signed ? min_jlong : max_jlong
__ mov64(c_rarg3, 0x8000000000000000);
__ mov64(rax, 0x7fffffffffffffff);
__ cmov(Assembler::positive, c_rarg3, rax);
__ bind(L);
__ movptr(inout, c_rarg3);
__ pop(c_rarg1);
__ pop(c_rarg2);
__ pop(c_rarg3);
__ pop(rax);
__ ret(0);
return start;
}
address generate_d2i_fixup() {
StubCodeMark mark(this, "StubRoutines", "d2i_fixup");
Address inout(rsp, 6 * wordSize); // return address + 5 saves
address start = __ pc();
Label L;
__ push(rax);
__ push(c_rarg3);
__ push(c_rarg2);
__ push(c_rarg1);
__ push(c_rarg0);
__ movl(rax, 0x7ff00000);
__ movq(c_rarg2, inout);
__ movl(c_rarg3, c_rarg2);
__ mov(c_rarg1, c_rarg2);
__ mov(c_rarg0, c_rarg2);
__ negl(c_rarg3);
__ shrptr(c_rarg1, 0x20);
__ orl(c_rarg3, c_rarg2);
__ andl(c_rarg1, 0x7fffffff);
__ xorl(c_rarg2, c_rarg2);
__ shrl(c_rarg3, 0x1f);
__ orl(c_rarg1, c_rarg3);
__ cmpl(rax, c_rarg1);
__ jcc(Assembler::negative, L); // NaN -> 0
__ testptr(c_rarg0, c_rarg0); // signed ? min_jint : max_jint
__ movl(c_rarg2, 0x80000000);
__ movl(rax, 0x7fffffff);
__ cmov(Assembler::positive, c_rarg2, rax);
__ bind(L);
__ movptr(inout, c_rarg2);
__ pop(c_rarg0);
__ pop(c_rarg1);
__ pop(c_rarg2);
__ pop(c_rarg3);
__ pop(rax);
__ ret(0);
return start;
}
address generate_d2l_fixup() {
StubCodeMark mark(this, "StubRoutines", "d2l_fixup");
Address inout(rsp, 6 * wordSize); // return address + 5 saves
address start = __ pc();
Label L;
__ push(rax);
__ push(c_rarg3);
__ push(c_rarg2);
__ push(c_rarg1);
__ push(c_rarg0);
__ movl(rax, 0x7ff00000);
__ movq(c_rarg2, inout);
__ movl(c_rarg3, c_rarg2);
__ mov(c_rarg1, c_rarg2);
__ mov(c_rarg0, c_rarg2);
__ negl(c_rarg3);
__ shrptr(c_rarg1, 0x20);
__ orl(c_rarg3, c_rarg2);
__ andl(c_rarg1, 0x7fffffff);
__ xorl(c_rarg2, c_rarg2);
__ shrl(c_rarg3, 0x1f);
__ orl(c_rarg1, c_rarg3);
__ cmpl(rax, c_rarg1);
__ jcc(Assembler::negative, L); // NaN -> 0
__ testq(c_rarg0, c_rarg0); // signed ? min_jlong : max_jlong
__ mov64(c_rarg2, 0x8000000000000000);
__ mov64(rax, 0x7fffffffffffffff);
__ cmovq(Assembler::positive, c_rarg2, rax);
__ bind(L);
__ movq(inout, c_rarg2);
__ pop(c_rarg0);
__ pop(c_rarg1);
__ pop(c_rarg2);
__ pop(c_rarg3);
__ pop(rax);
__ ret(0);
return start;
}
address generate_fp_mask(const char *stub_name, int64_t mask) {
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", stub_name);
address start = __ pc();
__ emit_data64( mask, relocInfo::none );
__ emit_data64( mask, relocInfo::none );
return start;
}
address generate_handler_for_unsafe_access() {
StubCodeMark mark(this, "StubRoutines", "handler_for_unsafe_access");
address start = __ pc();
__ push(0); // hole for return address-to-be
__ pusha(); // push registers
Address next_pc(rsp, RegisterImpl::number_of_registers * BytesPerWord);
__ subptr(rsp, frame::arg_reg_save_area_bytes);
BLOCK_COMMENT("call handle_unsafe_access");
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, handle_unsafe_access)));
__ addptr(rsp, frame::arg_reg_save_area_bytes);
__ movptr(next_pc, rax); // stuff next address
__ popa();
__ ret(0); // jump to next address
return start;
}
address generate_verify_oop() {
StubCodeMark mark(this, "StubRoutines", "verify_oop");
address start = __ pc();
Label exit, error;
__ pushf();
__ incrementl(ExternalAddress((address) StubRoutines::verify_oop_count_addr()));
__ push(r12);
__ push(c_rarg2);
__ push(c_rarg3);
enum {
oop_to_verify = 6 * wordSize,
saved_rax = 7 * wordSize,
saved_r10 = 8 * wordSize,
return_addr = 16 * wordSize,
error_msg = 17 * wordSize
};
__ movptr(rax, Address(rsp, oop_to_verify));
__ testptr(rax, rax);
__ jcc(Assembler::zero, exit); // if obj is NULL it is OK
__ movptr(c_rarg2, rax);
__ movptr(c_rarg3, (intptr_t) Universe::verify_oop_mask());
__ andptr(c_rarg2, c_rarg3);
__ movptr(c_rarg3, (intptr_t) Universe::verify_oop_bits());
__ cmpptr(c_rarg2, c_rarg3);
__ jcc(Assembler::notZero, error);
__ reinit_heapbase();
__ load_klass(rax, rax); // get klass
__ testptr(rax, rax);
__ jcc(Assembler::zero, error); // if klass is NULL it is broken
__ bind(exit);
__ movptr(rax, Address(rsp, saved_rax)); // get saved rax back
__ movptr(rscratch1, Address(rsp, saved_r10)); // get saved r10 back
__ pop(c_rarg3); // restore c_rarg3
__ pop(c_rarg2); // restore c_rarg2
__ pop(r12); // restore r12
__ popf(); // restore flags
__ ret(4 * wordSize); // pop caller saved stuff
__ bind(error);
__ movptr(rax, Address(rsp, saved_rax)); // get saved rax back
__ movptr(rscratch1, Address(rsp, saved_r10)); // get saved r10 back
__ pop(c_rarg3); // get saved c_rarg3 back
__ pop(c_rarg2); // get saved c_rarg2 back
__ pop(r12); // get saved r12 back
__ popf(); // get saved flags off stack --
__ pusha(); // push registers
__ movptr(c_rarg0, Address(rsp, error_msg)); // pass address of error message
__ movptr(c_rarg1, Address(rsp, return_addr)); // pass return address
__ movq(c_rarg2, rsp); // pass address of regs on stack
__ mov(r12, rsp); // remember rsp
__ subptr(rsp, frame::arg_reg_save_area_bytes); // windows
__ andptr(rsp, -16); // align stack as required by ABI
BLOCK_COMMENT("call MacroAssembler::debug");
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug64)));
__ mov(rsp, r12); // restore rsp
__ popa(); // pop registers (includes r12)
__ ret(4 * wordSize); // pop caller saved stuff
return start;
}
void assert_clean_int(Register Rint, Register Rtmp) {
#ifdef ASSERT
Label L;
assert_different_registers(Rtmp, Rint);
__ movslq(Rtmp, Rint);
__ cmpq(Rtmp, Rint);
__ jcc(Assembler::equal, L);
__ stop("high 32-bits of int value are not 0");
__ bind(L);
#endif
}
void array_overlap_test(address no_overlap_target, Address::ScaleFactor sf) {
assert(no_overlap_target != NULL, "must be generated");
array_overlap_test(no_overlap_target, NULL, sf);
}
void array_overlap_test(Label& L_no_overlap, Address::ScaleFactor sf) {
array_overlap_test(NULL, &L_no_overlap, sf);
}
void array_overlap_test(address no_overlap_target, Label* NOLp, Address::ScaleFactor sf) {
const Register from = c_rarg0;
const Register to = c_rarg1;
const Register count = c_rarg2;
const Register end_from = rax;
__ cmpptr(to, from);
__ lea(end_from, Address(from, count, sf, 0));
if (NOLp == NULL) {
ExternalAddress no_overlap(no_overlap_target);
__ jump_cc(Assembler::belowEqual, no_overlap);
__ cmpptr(to, end_from);
__ jump_cc(Assembler::aboveEqual, no_overlap);
} else {
__ jcc(Assembler::belowEqual, (*NOLp));
__ cmpptr(to, end_from);
__ jcc(Assembler::aboveEqual, (*NOLp));
}
}
void setup_arg_regs(int nargs = 3) {
const Register saved_rdi = r9;
const Register saved_rsi = r10;
assert(nargs == 3 || nargs == 4, "else fix");
#ifdef _WIN64
assert(c_rarg0 == rcx && c_rarg1 == rdx && c_rarg2 == r8 && c_rarg3 == r9,
"unexpected argument registers");
if (nargs >= 4)
__ mov(rax, r9); // r9 is also saved_rdi
__ movptr(saved_rdi, rdi);
__ movptr(saved_rsi, rsi);
__ mov(rdi, rcx); // c_rarg0
__ mov(rsi, rdx); // c_rarg1
__ mov(rdx, r8); // c_rarg2
if (nargs >= 4)
__ mov(rcx, rax); // c_rarg3 (via rax)
#else
assert(c_rarg0 == rdi && c_rarg1 == rsi && c_rarg2 == rdx && c_rarg3 == rcx,
"unexpected argument registers");
#endif
}
void restore_arg_regs() {
const Register saved_rdi = r9;
const Register saved_rsi = r10;
#ifdef _WIN64
__ movptr(rdi, saved_rdi);
__ movptr(rsi, saved_rsi);
#endif
}
void gen_write_ref_array_pre_barrier(Register addr, Register count, bool dest_uninitialized) {
BarrierSet* bs = Universe::heap()->barrier_set();
switch (bs->kind()) {
case BarrierSet::G1SATBCT:
case BarrierSet::G1SATBCTLogging:
if (!dest_uninitialized) {
__ pusha(); // push registers
if (count == c_rarg0) {
if (addr == c_rarg1) {
__ xchgptr(c_rarg1, c_rarg0);
} else {
__ movptr(c_rarg1, count);
__ movptr(c_rarg0, addr);
}
} else {
__ movptr(c_rarg0, addr);
__ movptr(c_rarg1, count);
}
__ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre), 2);
__ popa();
}
break;
case BarrierSet::CardTableModRef:
case BarrierSet::CardTableExtension:
case BarrierSet::ModRef:
break;
default:
ShouldNotReachHere();
}
}
void gen_write_ref_array_post_barrier(Register start, Register count, Register scratch) {
assert_different_registers(start, count, scratch);
BarrierSet* bs = Universe::heap()->barrier_set();
switch (bs->kind()) {
case BarrierSet::G1SATBCT:
case BarrierSet::G1SATBCTLogging:
{
__ pusha(); // push registers (overkill)
if (c_rarg0 == count) { // On win64 c_rarg0 == rcx
assert_different_registers(c_rarg1, start);
__ mov(c_rarg1, count);
__ mov(c_rarg0, start);
} else {
assert_different_registers(c_rarg0, count);
__ mov(c_rarg0, start);
__ mov(c_rarg1, count);
}
__ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post), 2);
__ popa();
}
break;
case BarrierSet::CardTableModRef:
case BarrierSet::CardTableExtension:
{
CardTableModRefBS* ct = (CardTableModRefBS*)bs;
assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
Label L_loop;
const Register end = count;
__ leaq(end, Address(start, count, TIMES_OOP, 0)); // end == start+count*oop_size
__ subptr(end, BytesPerHeapOop); // end - 1 to make inclusive
__ shrptr(start, CardTableModRefBS::card_shift);
__ shrptr(end, CardTableModRefBS::card_shift);
__ subptr(end, start); // end --> cards count
int64_t disp = (int64_t) ct->byte_map_base;
__ mov64(scratch, disp);
__ addptr(start, scratch);
__ BIND(L_loop);
__ movb(Address(start, count, Address::times_1), 0);
__ decrement(count);
__ jcc(Assembler::greaterEqual, L_loop);
}
break;
default:
ShouldNotReachHere();
}
}
void copy_bytes_forward(Register end_from, Register end_to,
Register qword_count, Register to,
Label& L_copy_bytes, Label& L_copy_8_bytes) {
DEBUG_ONLY(__ stop("enter at entry label, not here"));
Label L_loop;
__ align(OptoLoopAlignment);
if (UseUnalignedLoadStores) {
Label L_end;
__ BIND(L_loop);
if (UseAVX >= 2) {
__ vmovdqu(xmm0, Address(end_from, qword_count, Address::times_8, -56));
__ vmovdqu(Address(end_to, qword_count, Address::times_8, -56), xmm0);
__ vmovdqu(xmm1, Address(end_from, qword_count, Address::times_8, -24));
__ vmovdqu(Address(end_to, qword_count, Address::times_8, -24), xmm1);
} else {
__ movdqu(xmm0, Address(end_from, qword_count, Address::times_8, -56));
__ movdqu(Address(end_to, qword_count, Address::times_8, -56), xmm0);
__ movdqu(xmm1, Address(end_from, qword_count, Address::times_8, -40));
__ movdqu(Address(end_to, qword_count, Address::times_8, -40), xmm1);
__ movdqu(xmm2, Address(end_from, qword_count, Address::times_8, -24));
__ movdqu(Address(end_to, qword_count, Address::times_8, -24), xmm2);
__ movdqu(xmm3, Address(end_from, qword_count, Address::times_8, - 8));
__ movdqu(Address(end_to, qword_count, Address::times_8, - 8), xmm3);
}
__ BIND(L_copy_bytes);
__ addptr(qword_count, 8);
__ jcc(Assembler::lessEqual, L_loop);
__ subptr(qword_count, 4); // sub(8) and add(4)
__ jccb(Assembler::greater, L_end);
if (UseAVX >= 2) {
__ vmovdqu(xmm0, Address(end_from, qword_count, Address::times_8, -24));
__ vmovdqu(Address(end_to, qword_count, Address::times_8, -24), xmm0);
} else {
__ movdqu(xmm0, Address(end_from, qword_count, Address::times_8, -24));
__ movdqu(Address(end_to, qword_count, Address::times_8, -24), xmm0);
__ movdqu(xmm1, Address(end_from, qword_count, Address::times_8, - 8));
__ movdqu(Address(end_to, qword_count, Address::times_8, - 8), xmm1);
}
__ addptr(qword_count, 4);
__ BIND(L_end);
if (UseAVX >= 2) {
__ vpxor(xmm0, xmm0);
__ vpxor(xmm1, xmm1);
}
} else {
__ BIND(L_loop);
__ movq(to, Address(end_from, qword_count, Address::times_8, -24));
__ movq(Address(end_to, qword_count, Address::times_8, -24), to);
__ movq(to, Address(end_from, qword_count, Address::times_8, -16));
__ movq(Address(end_to, qword_count, Address::times_8, -16), to);
__ movq(to, Address(end_from, qword_count, Address::times_8, - 8));
__ movq(Address(end_to, qword_count, Address::times_8, - 8), to);
__ movq(to, Address(end_from, qword_count, Address::times_8, - 0));
__ movq(Address(end_to, qword_count, Address::times_8, - 0), to);
__ BIND(L_copy_bytes);
__ addptr(qword_count, 4);
__ jcc(Assembler::lessEqual, L_loop);
}
__ subptr(qword_count, 4);
__ jcc(Assembler::less, L_copy_8_bytes); // Copy trailing qwords
}
void copy_bytes_backward(Register from, Register dest,
Register qword_count, Register to,
Label& L_copy_bytes, Label& L_copy_8_bytes) {
DEBUG_ONLY(__ stop("enter at entry label, not here"));
Label L_loop;
__ align(OptoLoopAlignment);
if (UseUnalignedLoadStores) {
Label L_end;
__ BIND(L_loop);
if (UseAVX >= 2) {
__ vmovdqu(xmm0, Address(from, qword_count, Address::times_8, 32));
__ vmovdqu(Address(dest, qword_count, Address::times_8, 32), xmm0);
__ vmovdqu(xmm1, Address(from, qword_count, Address::times_8, 0));
__ vmovdqu(Address(dest, qword_count, Address::times_8, 0), xmm1);
} else {
__ movdqu(xmm0, Address(from, qword_count, Address::times_8, 48));
__ movdqu(Address(dest, qword_count, Address::times_8, 48), xmm0);
__ movdqu(xmm1, Address(from, qword_count, Address::times_8, 32));
__ movdqu(Address(dest, qword_count, Address::times_8, 32), xmm1);
__ movdqu(xmm2, Address(from, qword_count, Address::times_8, 16));
__ movdqu(Address(dest, qword_count, Address::times_8, 16), xmm2);
__ movdqu(xmm3, Address(from, qword_count, Address::times_8, 0));
__ movdqu(Address(dest, qword_count, Address::times_8, 0), xmm3);
}
__ BIND(L_copy_bytes);
__ subptr(qword_count, 8);
__ jcc(Assembler::greaterEqual, L_loop);
__ addptr(qword_count, 4); // add(8) and sub(4)
__ jccb(Assembler::less, L_end);
if (UseAVX >= 2) {
__ vmovdqu(xmm0, Address(from, qword_count, Address::times_8, 0));
__ vmovdqu(Address(dest, qword_count, Address::times_8, 0), xmm0);
} else {
__ movdqu(xmm0, Address(from, qword_count, Address::times_8, 16));
__ movdqu(Address(dest, qword_count, Address::times_8, 16), xmm0);
__ movdqu(xmm1, Address(from, qword_count, Address::times_8, 0));
__ movdqu(Address(dest, qword_count, Address::times_8, 0), xmm1);
}
__ subptr(qword_count, 4);
__ BIND(L_end);
if (UseAVX >= 2) {
__ vpxor(xmm0, xmm0);
__ vpxor(xmm1, xmm1);
}
} else {
__ BIND(L_loop);
__ movq(to, Address(from, qword_count, Address::times_8, 24));
__ movq(Address(dest, qword_count, Address::times_8, 24), to);
__ movq(to, Address(from, qword_count, Address::times_8, 16));
__ movq(Address(dest, qword_count, Address::times_8, 16), to);
__ movq(to, Address(from, qword_count, Address::times_8, 8));
__ movq(Address(dest, qword_count, Address::times_8, 8), to);
__ movq(to, Address(from, qword_count, Address::times_8, 0));
__ movq(Address(dest, qword_count, Address::times_8, 0), to);
__ BIND(L_copy_bytes);
__ subptr(qword_count, 4);
__ jcc(Assembler::greaterEqual, L_loop);
}
__ addptr(qword_count, 4);
__ jcc(Assembler::greater, L_copy_8_bytes); // Copy trailing qwords
}
address generate_disjoint_byte_copy(bool aligned, address* entry, const char *name) {
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", name);
address start = __ pc();
Label L_copy_bytes, L_copy_8_bytes, L_copy_4_bytes, L_copy_2_bytes;
Label L_copy_byte, L_exit;
const Register from = rdi; // source array address
const Register to = rsi; // destination array address
const Register count = rdx; // elements count
const Register byte_count = rcx;
const Register qword_count = count;
const Register end_from = from; // source array end address
const Register end_to = to; // destination array end address
__ enter(); // required for proper stackwalking of RuntimeStub frame
assert_clean_int(c_rarg2, rax); // Make sure 'count' is clean int.
if (entry != NULL) {
BLOCK_COMMENT("Entry:");
}
setup_arg_regs(); // from => rdi, to => rsi, count => rdx
__ movptr(byte_count, count);
__ shrptr(count, 3); // count => qword_count
__ lea(end_from, Address(from, qword_count, Address::times_8, -8));
__ lea(end_to, Address(to, qword_count, Address::times_8, -8));
__ negptr(qword_count); // make the count negative
__ jmp(L_copy_bytes);
__ BIND(L_copy_8_bytes);
__ movq(rax, Address(end_from, qword_count, Address::times_8, 8));
__ movq(Address(end_to, qword_count, Address::times_8, 8), rax);
__ increment(qword_count);
__ jcc(Assembler::notZero, L_copy_8_bytes);
__ BIND(L_copy_4_bytes);
__ testl(byte_count, 4);
__ jccb(Assembler::zero, L_copy_2_bytes);
__ movl(rax, Address(end_from, 8));
__ movl(Address(end_to, 8), rax);
__ addptr(end_from, 4);
__ addptr(end_to, 4);
__ BIND(L_copy_2_bytes);
__ testl(byte_count, 2);
__ jccb(Assembler::zero, L_copy_byte);
__ movw(rax, Address(end_from, 8));
__ movw(Address(end_to, 8), rax);
__ addptr(end_from, 2);
__ addptr(end_to, 2);
__ BIND(L_copy_byte);
__ testl(byte_count, 1);
__ jccb(Assembler::zero, L_exit);
__ movb(rax, Address(end_from, 8));
__ movb(Address(end_to, 8), rax);
__ BIND(L_exit);
restore_arg_regs();
inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr); // Update counter after rscratch1 is free
__ xorptr(rax, rax); // return 0
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
copy_bytes_forward(end_from, end_to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
__ jmp(L_copy_4_bytes);
return start;
}
address generate_conjoint_byte_copy(bool aligned, address nooverlap_target,
address* entry, const char *name) {
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", name);
address start = __ pc();
Label L_copy_bytes, L_copy_8_bytes, L_copy_4_bytes, L_copy_2_bytes;
const Register from = rdi; // source array address
const Register to = rsi; // destination array address
const Register count = rdx; // elements count
const Register byte_count = rcx;
const Register qword_count = count;
__ enter(); // required for proper stackwalking of RuntimeStub frame
assert_clean_int(c_rarg2, rax); // Make sure 'count' is clean int.
if (entry != NULL) {
BLOCK_COMMENT("Entry:");
}
array_overlap_test(nooverlap_target, Address::times_1);
setup_arg_regs(); // from => rdi, to => rsi, count => rdx
__ movptr(byte_count, count);
__ shrptr(count, 3); // count => qword_count
__ testl(byte_count, 1);
__ jcc(Assembler::zero, L_copy_2_bytes);
__ movb(rax, Address(from, byte_count, Address::times_1, -1));
__ movb(Address(to, byte_count, Address::times_1, -1), rax);
__ decrement(byte_count); // Adjust for possible trailing word
__ BIND(L_copy_2_bytes);
__ testl(byte_count, 2);
__ jcc(Assembler::zero, L_copy_4_bytes);
__ movw(rax, Address(from, byte_count, Address::times_1, -2));
__ movw(Address(to, byte_count, Address::times_1, -2), rax);
__ BIND(L_copy_4_bytes);
__ testl(byte_count, 4);
__ jcc(Assembler::zero, L_copy_bytes);
__ movl(rax, Address(from, qword_count, Address::times_8));
__ movl(Address(to, qword_count, Address::times_8), rax);
__ jmp(L_copy_bytes);
__ BIND(L_copy_8_bytes);
__ movq(rax, Address(from, qword_count, Address::times_8, -8));
__ movq(Address(to, qword_count, Address::times_8, -8), rax);
__ decrement(qword_count);
__ jcc(Assembler::notZero, L_copy_8_bytes);
restore_arg_regs();
inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr); // Update counter after rscratch1 is free
__ xorptr(rax, rax); // return 0
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
copy_bytes_backward(from, to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
restore_arg_regs();
inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr); // Update counter after rscratch1 is free
__ xorptr(rax, rax); // return 0
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
return start;
}
address generate_disjoint_short_copy(bool aligned, address *entry, const char *name) {
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", name);
address start = __ pc();
Label L_copy_bytes, L_copy_8_bytes, L_copy_4_bytes,L_copy_2_bytes,L_exit;
const Register from = rdi; // source array address
const Register to = rsi; // destination array address
const Register count = rdx; // elements count
const Register word_count = rcx;
const Register qword_count = count;
const Register end_from = from; // source array end address
const Register end_to = to; // destination array end address
__ enter(); // required for proper stackwalking of RuntimeStub frame
assert_clean_int(c_rarg2, rax); // Make sure 'count' is clean int.
if (entry != NULL) {
BLOCK_COMMENT("Entry:");
}
setup_arg_regs(); // from => rdi, to => rsi, count => rdx
__ movptr(word_count, count);
__ shrptr(count, 2); // count => qword_count
__ lea(end_from, Address(from, qword_count, Address::times_8, -8));
__ lea(end_to, Address(to, qword_count, Address::times_8, -8));
__ negptr(qword_count);
__ jmp(L_copy_bytes);
__ BIND(L_copy_8_bytes);
__ movq(rax, Address(end_from, qword_count, Address::times_8, 8));
__ movq(Address(end_to, qword_count, Address::times_8, 8), rax);
__ increment(qword_count);
__ jcc(Assembler::notZero, L_copy_8_bytes);
__ BIND(L_copy_4_bytes);
__ testl(word_count, 2);
__ jccb(Assembler::zero, L_copy_2_bytes);
__ movl(rax, Address(end_from, 8));
__ movl(Address(end_to, 8), rax);
__ addptr(end_from, 4);
__ addptr(end_to, 4);
__ BIND(L_copy_2_bytes);
__ testl(word_count, 1);
__ jccb(Assembler::zero, L_exit);
__ movw(rax, Address(end_from, 8));
__ movw(Address(end_to, 8), rax);
__ BIND(L_exit);
restore_arg_regs();
inc_counter_np(SharedRuntime::_jshort_array_copy_ctr); // Update counter after rscratch1 is free
__ xorptr(rax, rax); // return 0
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
copy_bytes_forward(end_from, end_to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
__ jmp(L_copy_4_bytes);
return start;
}
address generate_fill(BasicType t, bool aligned, const char *name) {
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", name);
address start = __ pc();
BLOCK_COMMENT("Entry:");
const Register to = c_rarg0; // source array address
const Register value = c_rarg1; // value
const Register count = c_rarg2; // elements count
__ enter(); // required for proper stackwalking of RuntimeStub frame
__ generate_fill(t, aligned, to, value, count, rax, xmm0);
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
return start;
}
address generate_conjoint_short_copy(bool aligned, address nooverlap_target,
address *entry, const char *name) {
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", name);
address start = __ pc();
Label L_copy_bytes, L_copy_8_bytes, L_copy_4_bytes;
const Register from = rdi; // source array address
const Register to = rsi; // destination array address
const Register count = rdx; // elements count
const Register word_count = rcx;
const Register qword_count = count;
__ enter(); // required for proper stackwalking of RuntimeStub frame
assert_clean_int(c_rarg2, rax); // Make sure 'count' is clean int.
if (entry != NULL) {
BLOCK_COMMENT("Entry:");
}
array_overlap_test(nooverlap_target, Address::times_2);
setup_arg_regs(); // from => rdi, to => rsi, count => rdx
__ movptr(word_count, count);
__ shrptr(count, 2); // count => qword_count
__ testl(word_count, 1);
__ jccb(Assembler::zero, L_copy_4_bytes);
__ movw(rax, Address(from, word_count, Address::times_2, -2));
__ movw(Address(to, word_count, Address::times_2, -2), rax);
__ BIND(L_copy_4_bytes);
__ testl(word_count, 2);
__ jcc(Assembler::zero, L_copy_bytes);
__ movl(rax, Address(from, qword_count, Address::times_8));
__ movl(Address(to, qword_count, Address::times_8), rax);
__ jmp(L_copy_bytes);
__ BIND(L_copy_8_bytes);
__ movq(rax, Address(from, qword_count, Address::times_8, -8));
__ movq(Address(to, qword_count, Address::times_8, -8), rax);
__ decrement(qword_count);
__ jcc(Assembler::notZero, L_copy_8_bytes);
restore_arg_regs();
inc_counter_np(SharedRuntime::_jshort_array_copy_ctr); // Update counter after rscratch1 is free
__ xorptr(rax, rax); // return 0
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
copy_bytes_backward(from, to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
restore_arg_regs();
inc_counter_np(SharedRuntime::_jshort_array_copy_ctr); // Update counter after rscratch1 is free
__ xorptr(rax, rax); // return 0
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
return start;
}
address generate_disjoint_int_oop_copy(bool aligned, bool is_oop, address* entry,
const char *name, bool dest_uninitialized = false) {
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", name);
address start = __ pc();
Label L_copy_bytes, L_copy_8_bytes, L_copy_4_bytes, L_exit;
const Register from = rdi; // source array address
const Register to = rsi; // destination array address
const Register count = rdx; // elements count
const Register dword_count = rcx;
const Register qword_count = count;
const Register end_from = from; // source array end address
const Register end_to = to; // destination array end address
const Register saved_to = r11; // saved destination array address
__ enter(); // required for proper stackwalking of RuntimeStub frame
assert_clean_int(c_rarg2, rax); // Make sure 'count' is clean int.
if (entry != NULL) {
BLOCK_COMMENT("Entry:");
}
setup_arg_regs(); // from => rdi, to => rsi, count => rdx
if (is_oop) {
__ movq(saved_to, to);
gen_write_ref_array_pre_barrier(to, count, dest_uninitialized);
}
__ movptr(dword_count, count);
__ shrptr(count, 1); // count => qword_count
__ lea(end_from, Address(from, qword_count, Address::times_8, -8));
__ lea(end_to, Address(to, qword_count, Address::times_8, -8));
__ negptr(qword_count);
__ jmp(L_copy_bytes);
__ BIND(L_copy_8_bytes);
__ movq(rax, Address(end_from, qword_count, Address::times_8, 8));
__ movq(Address(end_to, qword_count, Address::times_8, 8), rax);
__ increment(qword_count);
__ jcc(Assembler::notZero, L_copy_8_bytes);
__ BIND(L_copy_4_bytes);
__ testl(dword_count, 1); // Only byte test since the value is 0 or 1
__ jccb(Assembler::zero, L_exit);
__ movl(rax, Address(end_from, 8));
__ movl(Address(end_to, 8), rax);
__ BIND(L_exit);
if (is_oop) {
gen_write_ref_array_post_barrier(saved_to, dword_count, rax);
}
restore_arg_regs();
inc_counter_np(SharedRuntime::_jint_array_copy_ctr); // Update counter after rscratch1 is free
__ xorptr(rax, rax); // return 0
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
copy_bytes_forward(end_from, end_to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
__ jmp(L_copy_4_bytes);
return start;
}
address generate_conjoint_int_oop_copy(bool aligned, bool is_oop, address nooverlap_target,
address *entry, const char *name,
bool dest_uninitialized = false) {
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", name);
address start = __ pc();
Label L_copy_bytes, L_copy_8_bytes, L_copy_2_bytes, L_exit;
const Register from = rdi; // source array address
const Register to = rsi; // destination array address
const Register count = rdx; // elements count
const Register dword_count = rcx;
const Register qword_count = count;
__ enter(); // required for proper stackwalking of RuntimeStub frame
assert_clean_int(c_rarg2, rax); // Make sure 'count' is clean int.
if (entry != NULL) {
BLOCK_COMMENT("Entry:");
}
array_overlap_test(nooverlap_target, Address::times_4);
setup_arg_regs(); // from => rdi, to => rsi, count => rdx
if (is_oop) {
gen_write_ref_array_pre_barrier(to, count, dest_uninitialized);
}
assert_clean_int(count, rax); // Make sure 'count' is clean int.
__ movptr(dword_count, count);
__ shrptr(count, 1); // count => qword_count
__ testl(dword_count, 1);
__ jcc(Assembler::zero, L_copy_bytes);
__ movl(rax, Address(from, dword_count, Address::times_4, -4));
__ movl(Address(to, dword_count, Address::times_4, -4), rax);
__ jmp(L_copy_bytes);
__ BIND(L_copy_8_bytes);
__ movq(rax, Address(from, qword_count, Address::times_8, -8));
__ movq(Address(to, qword_count, Address::times_8, -8), rax);
__ decrement(qword_count);
__ jcc(Assembler::notZero, L_copy_8_bytes);
if (is_oop) {
__ jmp(L_exit);
}
restore_arg_regs();
inc_counter_np(SharedRuntime::_jint_array_copy_ctr); // Update counter after rscratch1 is free
__ xorptr(rax, rax); // return 0
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
copy_bytes_backward(from, to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
__ BIND(L_exit);
if (is_oop) {
gen_write_ref_array_post_barrier(to, dword_count, rax);
}
restore_arg_regs();
inc_counter_np(SharedRuntime::_jint_array_copy_ctr); // Update counter after rscratch1 is free
__ xorptr(rax, rax); // return 0
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
return start;
}
address generate_disjoint_long_oop_copy(bool aligned, bool is_oop, address *entry,
const char *name, bool dest_uninitialized = false) {
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", name);
address start = __ pc();
Label L_copy_bytes, L_copy_8_bytes, L_exit;
const Register from = rdi; // source array address
const Register to = rsi; // destination array address
const Register qword_count = rdx; // elements count
const Register end_from = from; // source array end address
const Register end_to = rcx; // destination array end address
const Register saved_to = to;
const Register saved_count = r11;
__ enter(); // required for proper stackwalking of RuntimeStub frame
assert_clean_int(c_rarg2, rax); // Make sure 'count' is clean int.
if (entry != NULL) {
BLOCK_COMMENT("Entry:");
}
setup_arg_regs(); // from => rdi, to => rsi, count => rdx
if (is_oop) {
__ movptr(saved_count, qword_count);
gen_write_ref_array_pre_barrier(to, qword_count, dest_uninitialized);
}
__ lea(end_from, Address(from, qword_count, Address::times_8, -8));
__ lea(end_to, Address(to, qword_count, Address::times_8, -8));
__ negptr(qword_count);
__ jmp(L_copy_bytes);
__ BIND(L_copy_8_bytes);
__ movq(rax, Address(end_from, qword_count, Address::times_8, 8));
__ movq(Address(end_to, qword_count, Address::times_8, 8), rax);
__ increment(qword_count);
__ jcc(Assembler::notZero, L_copy_8_bytes);
if (is_oop) {
__ jmp(L_exit);
} else {
restore_arg_regs();
inc_counter_np(SharedRuntime::_jlong_array_copy_ctr); // Update counter after rscratch1 is free
__ xorptr(rax, rax); // return 0
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
}
copy_bytes_forward(end_from, end_to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
if (is_oop) {
__ BIND(L_exit);
gen_write_ref_array_post_barrier(saved_to, saved_count, rax);
}
restore_arg_regs();
if (is_oop) {
inc_counter_np(SharedRuntime::_oop_array_copy_ctr); // Update counter after rscratch1 is free
} else {
inc_counter_np(SharedRuntime::_jlong_array_copy_ctr); // Update counter after rscratch1 is free
}
__ xorptr(rax, rax); // return 0
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
return start;
}
address generate_conjoint_long_oop_copy(bool aligned, bool is_oop,
address nooverlap_target, address *entry,
const char *name, bool dest_uninitialized = false) {
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", name);
address start = __ pc();
Label L_copy_bytes, L_copy_8_bytes, L_exit;
const Register from = rdi; // source array address
const Register to = rsi; // destination array address
const Register qword_count = rdx; // elements count
const Register saved_count = rcx;
__ enter(); // required for proper stackwalking of RuntimeStub frame
assert_clean_int(c_rarg2, rax); // Make sure 'count' is clean int.
if (entry != NULL) {
BLOCK_COMMENT("Entry:");
}
array_overlap_test(nooverlap_target, Address::times_8);
setup_arg_regs(); // from => rdi, to => rsi, count => rdx
if (is_oop) {
__ movptr(saved_count, qword_count);
gen_write_ref_array_pre_barrier(to, saved_count, dest_uninitialized);
}
__ jmp(L_copy_bytes);
__ BIND(L_copy_8_bytes);
__ movq(rax, Address(from, qword_count, Address::times_8, -8));
__ movq(Address(to, qword_count, Address::times_8, -8), rax);
__ decrement(qword_count);
__ jcc(Assembler::notZero, L_copy_8_bytes);
if (is_oop) {
__ jmp(L_exit);
} else {
restore_arg_regs();
inc_counter_np(SharedRuntime::_jlong_array_copy_ctr); // Update counter after rscratch1 is free
__ xorptr(rax, rax); // return 0
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
}
copy_bytes_backward(from, to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
if (is_oop) {
__ BIND(L_exit);
gen_write_ref_array_post_barrier(to, saved_count, rax);
}
restore_arg_regs();
if (is_oop) {
inc_counter_np(SharedRuntime::_oop_array_copy_ctr); // Update counter after rscratch1 is free
} else {
inc_counter_np(SharedRuntime::_jlong_array_copy_ctr); // Update counter after rscratch1 is free
}
__ xorptr(rax, rax); // return 0
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
return start;
}
void generate_type_check(Register sub_klass,
Register super_check_offset,
Register super_klass,
Label& L_success) {
assert_different_registers(sub_klass, super_check_offset, super_klass);
BLOCK_COMMENT("type_check:");
Label L_miss;
__ check_klass_subtype_fast_path(sub_klass, super_klass, noreg, &L_success, &L_miss, NULL,
super_check_offset);
__ check_klass_subtype_slow_path(sub_klass, super_klass, noreg, noreg, &L_success, NULL);
__ BIND(L_miss);
}
address generate_checkcast_copy(const char *name, address *entry,
bool dest_uninitialized = false) {
Label L_load_element, L_store_element, L_do_card_marks, L_done;
const Register from = rdi; // source array address
const Register to = rsi; // destination array address
const Register length = rdx; // elements count
const Register ckoff = rcx; // super_check_offset
const Register ckval = r8; // super_klass
const Register end_from = from; // source array end address
const Register end_to = r13; // destination array end address
const Register count = rdx; // -(count_remaining)
const Register r14_length = r14; // saved copy of length
const Register rax_oop = rax; // actual oop copied
const Register r11_klass = r11; // oop._klass
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", name);
address start = __ pc();
__ enter(); // required for proper stackwalking of RuntimeStub frame
#ifdef ASSERT
{ Label L;
array_overlap_test(L, TIMES_OOP);
__ stop("checkcast_copy within a single array");
__ bind(L);
}
#endif //ASSERT
setup_arg_regs(4); // from => rdi, to => rsi, length => rdx
#ifdef _WIN64
__ movptr(ckval, Address(rsp, 6 * wordSize));
#endif
if (entry != NULL) {
BLOCK_COMMENT("Entry:");
}
enum {
saved_r13_offset,
saved_r14_offset,
saved_rbp_offset
};
__ subptr(rsp, saved_rbp_offset * wordSize);
__ movptr(Address(rsp, saved_r13_offset * wordSize), r13);
__ movptr(Address(rsp, saved_r14_offset * wordSize), r14);
assert_clean_int(length, rax);
assert_clean_int(ckoff, rax);
#ifdef ASSERT
BLOCK_COMMENT("assert consistent ckoff/ckval");
{ Label L;
int sco_offset = in_bytes(Klass::super_check_offset_offset());
__ cmpl(ckoff, Address(ckval, sco_offset));
__ jcc(Assembler::equal, L);
__ stop("super_check_offset inconsistent");
__ bind(L);
}
#endif //ASSERT
Address end_from_addr(from, length, TIMES_OOP, 0);
Address end_to_addr(to, length, TIMES_OOP, 0);
Address from_element_addr(end_from, count, TIMES_OOP, 0);
Address to_element_addr(end_to, count, TIMES_OOP, 0);
gen_write_ref_array_pre_barrier(to, count, dest_uninitialized);
__ lea(end_from, end_from_addr);
__ lea(end_to, end_to_addr);
__ movptr(r14_length, length); // save a copy of the length
assert(length == count, ""); // else fix next line:
__ negptr(count); // negate and test the length
__ jcc(Assembler::notZero, L_load_element);
__ xorptr(rax, rax); // return 0 on (trivial) success
__ jmp(L_done);
__ align(OptoLoopAlignment);
__ BIND(L_store_element);
__ store_heap_oop(to_element_addr, rax_oop); // store the oop
__ increment(count); // increment the count toward zero
__ jcc(Assembler::zero, L_do_card_marks);
__ BIND(L_load_element);
__ load_heap_oop(rax_oop, from_element_addr); // load the oop
__ testptr(rax_oop, rax_oop);
__ jcc(Assembler::zero, L_store_element);
__ load_klass(r11_klass, rax_oop);// query the object klass
generate_type_check(r11_klass, ckoff, ckval, L_store_element);
assert_different_registers(rax, r14_length, count, to, end_to, rcx, rscratch1);
Label L_post_barrier;
__ addptr(r14_length, count); // K = (original - remaining) oops
__ movptr(rax, r14_length); // save the value
__ notptr(rax); // report (-1^K) to caller (does not affect flags)
__ jccb(Assembler::notZero, L_post_barrier);
__ jmp(L_done); // K == 0, nothing was copied, skip post barrier
__ BIND(L_do_card_marks);
__ xorptr(rax, rax); // return 0 on success
__ BIND(L_post_barrier);
gen_write_ref_array_post_barrier(to, r14_length, rscratch1);
__ BIND(L_done);
__ movptr(r13, Address(rsp, saved_r13_offset * wordSize));
__ movptr(r14, Address(rsp, saved_r14_offset * wordSize));
restore_arg_regs();
inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr); // Update counter after rscratch1 is free
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
return start;
}
cccccccccc10
最新推荐文章于 2024-07-26 12:28:02 发布