// Insure high score for immediate-use spill copies so they get a color
if( n->is_SpillCopy()
&& lrgs(r).is_singledef() // MultiDef live range can still split
&& n->outcnt() == 1 // and use must be in this block
&& _cfg.get_block_for_node(n->unique_out()) == block) {
// All single-use MachSpillCopy(s) that immediately precede their
// use must color early. If a longer live range steals their
// color, the spill copy will split and may push another spill copy
// further away resulting in an infinite spill-split-retry cycle.
// Assigning a zero area results in a high score() and a good
// location in the simplify list.
//
Node *single_use = n->unique_out();
assert(block->find_node(single_use) >= j, "Use must be later in block");
// Use can be earlier in block if it is a Phi, but then I should be a MultiDef
// Find first non SpillCopy 'm' that follows the current instruction
// (j - 1) is index for current instruction 'n'
Node *m = n;
for (uint i = j; i <= last_inst && m->is_SpillCopy(); ++i) {
m = block->get_node(i);
}
if (m == single_use) {
lrgs(r)._area = 0.0;
}
}
// Remove from live-out set
if( liveout.remove(r) ) {
// Adjust register pressure.
// Capture last hi-to-lo pressure transition
lower_pressure(&lrgs(r), j - 1, block, pressure, hrp_index);
assert( pressure[0] == count_int_pressure (&liveout), "" );
assert( pressure[1] == count_float_pressure(&liveout), "" );
}
// Copies do not define a new value and so do not interfere.
// Remove the copies source from the liveout set before interfering.
uint idx = n->is_Copy();
if (idx) {
uint x = _lrg_map.live_range_id(n->in(idx));
if (liveout.remove(x)) {
lrgs(x)._area -= cost;
// Adjust register pressure.
lower_pressure(&lrgs(x), j - 1, block, pressure, hrp_index);
assert( pressure[0] == count_int_pressure (&liveout), "" );
assert( pressure[1] == count_float_pressure(&liveout), "" );
}
}
} // End of if live or not
// Interfere with everything live. If the defined value must
// go in a particular register, just remove that register from
// all conflicting parties and avoid the interference.
// Make exclusions for rematerializable defs. Since rematerializable
// DEFs are not bound but the live range is, some uses must be bound.
// If we spill live range 'r', it can rematerialize at each use site
// according to its bindings.
const RegMask &rmask = lrgs(r).mask();
if( lrgs(r).is_bound() && !(n->rematerialize()) && rmask.is_NotEmpty() ) {
// Check for common case
int r_size = lrgs(r).num_regs();
OptoReg::Name r_reg = (r_size == 1) ? rmask.find_first_elem() : OptoReg::Physical;
// Smear odd bits
IndexSetIterator elements(&liveout);
uint l;
while ((l = elements.next()) != 0) {
LRG &lrg = lrgs(l);
// If 'l' must spill already, do not further hack his bits.
// He'll get some interferences and be forced to spill later.
if( lrg._must_spill ) continue;
// Remove bound register(s) from 'l's choices
RegMask old = lrg.mask();
uint old_size = lrg.mask_size();
// Remove the bits from LRG 'r' from LRG 'l' so 'l' no
// longer interferes with 'r'. If 'l' requires aligned
// adjacent pairs, subtract out bit pairs.
assert(!lrg._is_vector || !lrg._fat_proj, "sanity");
if (lrg.num_regs() > 1 && !lrg._fat_proj) {
RegMask r2mask = rmask;
// Leave only aligned set of bits.
r2mask.smear_to_sets(lrg.num_regs());
// It includes vector case.
lrg.SUBTRACT( r2mask );
lrg.compute_set_mask_size();
} else if( r_size != 1 ) { // fat proj
lrg.SUBTRACT( rmask );
lrg.compute_set_mask_size();
} else { // Common case: size 1 bound removal
if( lrg.mask().Member(r_reg) ) {
lrg.Remove(r_reg);
lrg.set_mask_size(lrg.mask().is_AllStack() ? LRG::AllStack_size : old_size - 1);
}
}
// If 'l' goes completely dry, it must spill.
if( lrg.not_free() ) {
// Give 'l' some kind of reasonable mask, so he picks up
// interferences (and will spill later).
lrg.set_mask( old );
lrg.set_mask_size(old_size);
must_spill++;
lrg._must_spill = 1;
lrg.set_reg(OptoReg::Name(LRG::SPILL_REG));
}
}
} // End of if bound
// Now interference with everything that is live and has
// compatible register sets.
interfere_with_live(r,&liveout);
} // End of if normal register-allocated value
// Area remaining in the block
inst_count--;
cost = (inst_count <= 0) ? 0.0 : block->_freq * double(inst_count);
// Make all inputs live
if( !n->is_Phi() ) { // Phi function uses come from prior block
JVMState* jvms = n->jvms();
uint debug_start = jvms ? jvms->debug_start() : 999999;
// Start loop at 1 (skip control edge) for most Nodes.
// SCMemProj's might be the sole use of a StoreLConditional.
// While StoreLConditionals set memory (the SCMemProj use)
// they also def flags; if that flag def is unused the
// allocator sees a flag-setting instruction with no use of
// the flags and assumes it's dead. This keeps the (useless)
// flag-setting behavior alive while also keeping the (useful)
// memory update effect.
for (uint k = ((n->Opcode() == Op_SCMemProj) ? 0:1); k < n->req(); k++) {
Node *def = n->in(k);
uint x = _lrg_map.live_range_id(def);
if (!x) {
continue;
}
LRG &lrg = lrgs(x);
// No use-side cost for spilling debug info
if (k < debug_start) {
// A USE costs twice block frequency (once for the Load, once
// for a Load-delay). Rematerialized uses only cost once.
lrg._cost += (def->rematerialize() ? block->_freq : (block->_freq + block->_freq));
}
// It is live now
if (liveout.insert(x)) {
// Newly live things assumed live from here to top of block
lrg._area += cost;
// Adjust register pressure
if (lrg.mask().is_UP() && lrg.mask_size()) {
if (lrg._is_float || lrg._is_vector) {
pressure[1] += lrg.reg_pressure();
if (pressure[1] > block->_freg_pressure) {
block->_freg_pressure = pressure[1];
}
} else if( lrg.mask().overlap(*Matcher::idealreg2regmask[Op_RegI]) ) {
pressure[0] += lrg.reg_pressure();
if (pressure[0] > block->_reg_pressure) {
block->_reg_pressure = pressure[0];
}
}
}
assert( pressure[0] == count_int_pressure (&liveout), "" );
assert( pressure[1] == count_float_pressure(&liveout), "" );
}
assert(!(lrg._area < 0.0), "negative spill area" );
}
}
} // End of reverse pass over all instructions in block
// If we run off the top of the block with high pressure and
// never see a hi-to-low pressure transition, just record that
// the whole block is high pressure.
if (pressure[0] > (uint)INTPRESSURE) {
hrp_index[0] = 0;
if (pressure[0] > block->_reg_pressure) {
block->_reg_pressure = pressure[0];
}
}
if (pressure[1] > (uint)FLOATPRESSURE) {
hrp_index[1] = 0;
if (pressure[1] > block->_freg_pressure) {
block->_freg_pressure = pressure[1];
}
}
// Compute high pressure indice; avoid landing in the middle of projnodes
j = hrp_index[0];
if (j < block->number_of_nodes() && j < block->end_idx() + 1) {
Node* cur = block->get_node(j);
while (cur->is_Proj() || (cur->is_MachNullCheck()) || cur->is_Catch()) {
j--;
cur = block->get_node(j);
}
}
block->_ihrp_index = j;
j = hrp_index[1];
if (j < block->number_of_nodes() && j < block->end_idx() + 1) {
Node* cur = block->get_node(j);
while (cur->is_Proj() || (cur->is_MachNullCheck()) || cur->is_Catch()) {
j--;
cur = block->get_node(j);
}
}
block->_fhrp_index = j;
#ifndef PRODUCT
// Gather Register Pressure Statistics
if( PrintOptoStatistics ) {
if (block->_reg_pressure > (uint)INTPRESSURE || block->_freg_pressure > (uint)FLOATPRESSURE) {
_high_pressure++;
} else {
_low_pressure++;
}
}
#endif
} // End of for all blocks
return must_spill;
}
C:\hotspot-69087d08d473\src\share\vm/opto/ifnode.cpp
/*
* Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
#include "precompiled.hpp"
#include "memory/allocation.inline.hpp"
#include "opto/addnode.hpp"
#include "opto/cfgnode.hpp"
#include "opto/connode.hpp"
#include "opto/loopnode.hpp"
#include "opto/phaseX.hpp"
#include "opto/runtime.hpp"
#include "opto/subnode.hpp"
// Portions of code courtesy of Clifford Click
// Optimization - Graph Style
extern int explicit_null_checks_elided;
//=============================================================================
//------------------------------Value------------------------------------------
// Return a tuple for whichever arm of the IF is reachable
const Type *IfNode::Value( PhaseTransform *phase ) const {
if( !in(0) ) return Type::TOP;
if( phase->type(in(0)) == Type::TOP )
return Type::TOP;
const Type *t = phase->type(in(1));
if( t == Type::TOP ) // data is undefined
return TypeTuple::IFNEITHER; // unreachable altogether
if( t == TypeInt::ZERO ) // zero, or false
return TypeTuple::IFFALSE; // only false branch is reachable
if( t == TypeInt::ONE ) // 1, or true
return TypeTuple::IFTRUE; // only true branch is reachable
assert( t == TypeInt::BOOL, "expected boolean type" );
return TypeTuple::IFBOTH; // No progress
}
const RegMask &IfNode::out_RegMask() const {
return RegMask::Empty;
}
//------------------------------split_if---------------------------------------
// Look for places where we merge constants, then test on the merged value.
// If the IF test will be constant folded on the path with the constant, we
// win by splitting the IF to before the merge point.
static Node* split_if(IfNode *iff, PhaseIterGVN *igvn) {
// I could be a lot more general here, but I'm trying to squeeze this
// in before the Christmas '98 break so I'm gonna be kinda restrictive
// on the patterns I accept. CNC
// Look for a compare of a constant and a merged value
Node *i1 = iff->in(1);
if( !i1->is_Bool() ) return NULL;
BoolNode *b = i1->as_Bool();
Node *cmp = b->in(1);
if( !cmp->is_Cmp() ) return NULL;
i1 = cmp->in(1);
if( i1 == NULL || !i1->is_Phi() ) return NULL;
PhiNode *phi = i1->as_Phi();
if( phi->is_copy() ) return NULL;
Node *con2 = cmp->in(2);
if( !con2->is_Con() ) return NULL;
// See that the merge point contains some constants
Node *con1=NULL;
uint i4;
for( i4 = 1; i4 < phi->req(); i4++ ) {
con1 = phi->in(i4);
if( !con1 ) return NULL; // Do not optimize partially collapsed merges
if( con1->is_Con() ) break; // Found a constant
// Also allow null-vs-not-null checks
const TypePtr *tp = igvn->type(con1)->isa_ptr();
if( tp && tp->_ptr == TypePtr::NotNull )
break;
}
if( i4 >= phi->req() ) return NULL; // Found no constants
igvn->C->set_has_split_ifs(true); // Has chance for split-if
// Make sure that the compare can be constant folded away
Node *cmp2 = cmp->clone();
cmp2->set_req(1,con1);
cmp2->set_req(2,con2);
const Type *t = cmp2->Value(igvn);
// This compare is dead, so whack it!
igvn->remove_dead_node(cmp2);
if( !t->singleton() ) return NULL;
// No intervening control, like a simple Call
Node *r = iff->in(0);
if( !r->is_Region() ) return NULL;
if( phi->region() != r ) return NULL;
// No other users of the cmp/bool
if (b->outcnt() != 1 || cmp->outcnt() != 1) {
//tty->print_cr("many users of cmp/bool");
return NULL;
}
// Make sure we can determine where all the uses of merged values go
for (DUIterator_Fast jmax, j = r->fast_outs(jmax); j < jmax; j++) {
Node* u = r->fast_out(j);
if( u == r ) continue;
if( u == iff ) continue;
if( u->outcnt() == 0 ) continue; // use is dead & ignorable
if( !u->is_Phi() ) {
/*
if( u->is_Start() ) {
tty->print_cr("Region has inlined start use");
} else {
tty->print_cr("Region has odd use");
u->dump(2);
}*/
return NULL;
}
if( u != phi ) {
// CNC - do not allow any other merged value
//tty->print_cr("Merging another value");
//u->dump(2);
return NULL;
}
// Make sure we can account for all Phi uses
for (DUIterator_Fast kmax, k = u->fast_outs(kmax); k < kmax; k++) {
Node* v = u->fast_out(k); // User of the phi
// CNC - Allow only really simple patterns.
// In particular I disallow AddP of the Phi, a fairly common pattern
if( v == cmp ) continue; // The compare is OK
if( (v->is_ConstraintCast()) &&
v->in(0)->in(0) == iff )
continue; // CastPP/II of the IfNode is OK
// Disabled following code because I cannot tell if exactly one
// path dominates without a real dominator check. CNC 9/9/1999
//uint vop = v->Opcode();
//if( vop == Op_Phi ) { // Phi from another merge point might be OK
// Node *r = v->in(0); // Get controlling point
// if( !r ) return NULL; // Degraded to a copy
// // Find exactly one path in (either True or False doms, but not IFF)
// int cnt = 0;
// for( uint i = 1; i < r->req(); i++ )
// if( r->in(i) && r->in(i)->in(0) == iff )
// cnt++;
// if( cnt == 1 ) continue; // Exactly one of True or False guards Phi
//}
if( !v->is_Call() ) {
/*
if( v->Opcode() == Op_AddP ) {
tty->print_cr("Phi has AddP use");
} else if( v->Opcode() == Op_CastPP ) {
tty->print_cr("Phi has CastPP use");
} else if( v->Opcode() == Op_CastII ) {
tty->print_cr("Phi has CastII use");
} else {
tty->print_cr("Phi has use I cant be bothered with");
}
*/
}
return NULL;
/* CNC - Cut out all the fancy acceptance tests
// Can we clone this use when doing the transformation?
// If all uses are from Phis at this merge or constants, then YES.
if( !v->in(0) && v != cmp ) {
tty->print_cr("Phi has free-floating use");
v->dump(2);
return NULL;
}
for( uint l = 1; l < v->req(); l++ ) {
if( (!v->in(l)->is_Phi() || v->in(l)->in(0) != r) &&
!v->in(l)->is_Con() ) {
tty->print_cr("Phi has use");
v->dump(2);
return NULL;
} // End of if Phi-use input is neither Phi nor Constant
} // End of for all inputs to Phi-use
*/
} // End of for all uses of Phi
} // End of for all uses of Region
// Only do this if the IF node is in a sane state
if (iff->outcnt() != 2)
return NULL;
// Got a hit! Do the Mondo Hack!
//
//ABC a1c def ghi B 1 e h A C a c d f g i
// R - Phi - Phi - Phi Rc - Phi - Phi - Phi Rx - Phi - Phi - Phi
// cmp - 2 cmp - 2 cmp - 2
// bool bool_c bool_x
// if if_c if_x
// T F T F T F
// ..s.. ..t .. ..s.. ..t.. ..s.. ..t..
//
// Split the paths coming into the merge point into 2 separate groups of
// merges. On the left will be all the paths feeding constants into the
// Cmp's Phi. On the right will be the remaining paths. The Cmp's Phi
// will fold up into a constant; this will let the Cmp fold up as well as
// all the control flow. Below the original IF we have 2 control
// dependent regions, 's' and 't'. Now we will merge the two paths
// just prior to 's' and 't' from the two IFs. At least 1 path (and quite
// likely 2 or more) will promptly constant fold away.
PhaseGVN *phase = igvn;
// Make a region merging constants and a region merging the rest
uint req_c = 0;
Node* predicate_proj = NULL;
for (uint ii = 1; ii < r->req(); ii++) {
if (phi->in(ii) == con1) {
req_c++;
}
Node* proj = PhaseIdealLoop::find_predicate(r->in(ii));
if (proj != NULL) {
assert(predicate_proj == NULL, "only one predicate entry expected");
predicate_proj = proj;
}
}
// If all the defs of the phi are the same constant, we already have the desired end state.
// Skip the split that would create empty phi and region nodes.
if((r->req() - req_c) == 1) {
return NULL;
}
Node* predicate_c = NULL;
Node* predicate_x = NULL;
bool counted_loop = r->is_CountedLoop();
Node *region_c = new (igvn->C) RegionNode(req_c + 1);
Node *phi_c = con1;
uint len = r->req();
Node *region_x = new (igvn->C) RegionNode(len - req_c);
Node *phi_x = PhiNode::make_blank(region_x, phi);
for (uint i = 1, i_c = 1, i_x = 1; i < len; i++) {
if (phi->in(i) == con1) {
region_c->init_req( i_c++, r ->in(i) );
if (r->in(i) == predicate_proj)
predicate_c = predicate_proj;
} else {
region_x->init_req( i_x, r ->in(i) );
phi_x ->init_req( i_x++, phi->in(i) );
if (r->in(i) == predicate_proj)
predicate_x = predicate_proj;
}
}
if (predicate_c != NULL && (req_c > 1)) {
assert(predicate_x == NULL, "only one predicate entry expected");
predicate_c = NULL; // Do not clone predicate below merge point
}
if (predicate_x != NULL && ((len - req_c) > 2)) {
assert(predicate_c == NULL, "only one predicate entry expected");
predicate_x = NULL; // Do not clone predicate below merge point
}
// Register the new RegionNodes but do not transform them. Cannot
// transform until the entire Region/Phi conglomerate has been hacked
// as a single huge transform.
igvn->register_new_node_with_optimizer( region_c );
igvn->register_new_node_with_optimizer( region_x );
// Prevent the untimely death of phi_x. Currently he has no uses. He is
// about to get one. If this only use goes away, then phi_x will look dead.
// However, he will be picking up some more uses down below.
Node *hook = new (igvn->C) Node(4);
hook->init_req(0, phi_x);
hook->init_req(1, phi_c);
phi_x = phase->transform( phi_x );
// Make the compare
Node *cmp_c = phase->makecon(t);
Node *cmp_x = cmp->clone();
cmp_x->set_req(1,phi_x);
cmp_x->set_req(2,con2);
cmp_x = phase->transform(cmp_x);
// Make the bool
Node *b_c = phase->transform(new (igvn->C) BoolNode(cmp_c,b->_test._test));
Node *b_x = phase->transform(new (igvn->C) BoolNode(cmp_x,b->_test._test));
// Make the IfNode
IfNode *iff_c = new (igvn->C) IfNode(region_c,b_c,iff->_prob,iff->_fcnt);
igvn->set_type_bottom(iff_c);
igvn->_worklist.push(iff_c);
hook->init_req(2, iff_c);
IfNode *iff_x = new (igvn->C) IfNode(region_x,b_x,iff->_prob, iff->_fcnt);
igvn->set_type_bottom(iff_x);
igvn->_worklist.push(iff_x);
hook->init_req(3, iff_x);
// Make the true/false arms
Node *iff_c_t = phase->transform(new (igvn->C) IfTrueNode (iff_c));
Node *iff_c_f = phase->transform(new (igvn->C) IfFalseNode(iff_c));
if (predicate_c != NULL) {
assert(predicate_x == NULL, "only one predicate entry expected");
// Clone loop predicates to each path
iff_c_t = igvn->clone_loop_predicates(predicate_c, iff_c_t, !counted_loop);
iff_c_f = igvn->clone_loop_predicates(predicate_c, iff_c_f, !counted_loop);
}
Node *iff_x_t = phase->transform(new (igvn->C) IfTrueNode (iff_x));
Node *iff_x_f = phase->transform(new (igvn->C) IfFalseNode(iff_x));
if (predicate_x != NULL) {
assert(predicate_c == NULL, "only one predicate entry expected");
// Clone loop predicates to each path
iff_x_t = igvn->clone_loop_predicates(predicate_x, iff_x_t, !counted_loop);
iff_x_f = igvn->clone_loop_predicates(predicate_x, iff_x_f, !counted_loop);
}
// Merge the TRUE paths
Node *region_s = new (igvn->C) RegionNode(3);
igvn->_worklist.push(region_s);
region_s->init_req(1, iff_c_t);
region_s->init_req(2, iff_x_t);
igvn->register_new_node_with_optimizer( region_s );
// Merge the FALSE paths
Node *region_f = new (igvn->C) RegionNode(3);
igvn->_worklist.push(region_f);
region_f->init_req(1, iff_c_f);
region_f->init_req(2, iff_x_f);
igvn->register_new_node_with_optimizer( region_f );
igvn->hash_delete(cmp);// Remove soon-to-be-dead node from hash table.
cmp->set_req(1,NULL); // Whack the inputs to cmp because it will be dead
cmp->set_req(2,NULL);
// Check for all uses of the Phi and give them a new home.
// The 'cmp' got cloned, but CastPP/IIs need to be moved.
Node *phi_s = NULL; // do not construct unless needed
Node *phi_f = NULL; // do not construct unless needed
for (DUIterator_Last i2min, i2 = phi->last_outs(i2min); i2 >= i2min; --i2) {
Node* v = phi->last_out(i2);// User of the phi
igvn->rehash_node_delayed(v); // Have to fixup other Phi users
uint vop = v->Opcode();
Node *proj = NULL;
if( vop == Op_Phi ) { // Remote merge point
Node *r = v->in(0);
for (uint i3 = 1; i3 < r->req(); i3++)
if (r->in(i3) && r->in(i3)->in(0) == iff) {
proj = r->in(i3);
break;
}
} else if( v->is_ConstraintCast() ) {
proj = v->in(0); // Controlling projection
} else {
assert( 0, "do not know how to handle this guy" );
}
Node *proj_path_data, *proj_path_ctrl;
if( proj->Opcode() == Op_IfTrue ) {
if( phi_s == NULL ) {
// Only construct phi_s if needed, otherwise provides
// interfering use.
phi_s = PhiNode::make_blank(region_s,phi);
phi_s->init_req( 1, phi_c );
phi_s->init_req( 2, phi_x );
hook->add_req(phi_s);
phi_s = phase->transform(phi_s);
}
proj_path_data = phi_s;
proj_path_ctrl = region_s;
} else {
if( phi_f == NULL ) {
// Only construct phi_f if needed, otherwise provides
// interfering use.
phi_f = PhiNode::make_blank(region_f,phi);
phi_f->init_req( 1, phi_c );
phi_f->init_req( 2, phi_x );
hook->add_req(phi_f);
phi_f = phase->transform(phi_f);
}
proj_path_data = phi_f;
proj_path_ctrl = region_f;
}
// Fixup 'v' for for the split
if( vop == Op_Phi ) { // Remote merge point
uint i;
for( i = 1; i < v->req(); i++ )
if( v->in(i) == phi )
break;
v->set_req(i, proj_path_data );
} else if( v->is_ConstraintCast() ) {
v->set_req(0, proj_path_ctrl );
v->set_req(1, proj_path_data );
} else
ShouldNotReachHere();
}
// Now replace the original iff's True/False with region_s/region_t.
// This makes the original iff go dead.
for (DUIterator_Last i3min, i3 = iff->last_outs(i3min); i3 >= i3min; --i3) {
Node* p = iff->last_out(i3);
assert( p->Opcode() == Op_IfTrue || p->Opcode() == Op_IfFalse, "" );
Node *u = (p->Opcode() == Op_IfTrue) ? region_s : region_f;
// Replace p with u
igvn->add_users_to_worklist(p);
for (DUIterator_Last lmin, l = p->last_outs(lmin); l >= lmin;) {
Node* x = p->last_out(l);
igvn->hash_delete(x);
uint uses_found = 0;
for( uint j = 0; j < x->req(); j++ ) {
if( x->in(j) == p ) {
x->set_req(j, u);
uses_found++;
}
}
l -= uses_found; // we deleted 1 or more copies of this edge
}
igvn->remove_dead_node(p);
}
// Force the original merge dead
igvn->hash_delete(r);
// First, remove region's dead users.
for (DUIterator_Last lmin, l = r->last_outs(lmin); l >= lmin;) {
Node* u = r->last_out(l);
if( u == r ) {
r->set_req(0, NULL);
} else {
assert(u->outcnt() == 0, "only dead users");
igvn->remove_dead_node(u);
}
l -= 1;
}
igvn->remove_dead_node(r);
// Now remove the bogus extra edges used to keep things alive
igvn->remove_dead_node( hook );
// Must return either the original node (now dead) or a new node
// (Do not return a top here, since that would break the uniqueness of top.)
return new (igvn->C) ConINode(TypeInt::ZERO);
}
//------------------------------is_range_check---------------------------------
// Return 0 if not a range check. Return 1 if a range check and set index and
// offset. Return 2 if we had to negate the test. Index is NULL if the check
// is versus a constant.
int IfNode::is_range_check(Node* &range, Node* &index, jint &offset) {
if (outcnt() != 2) {
return 0;
}
Node* b = in(1);
if (b == NULL || !b->is_Bool()) return 0;
BoolNode* bn = b->as_Bool();
Node* cmp = bn->in(1);
if (cmp == NULL) return 0;
if (cmp->Opcode() != Op_CmpU) return 0;
Node* l = cmp->in(1);
Node* r = cmp->in(2);
int flip_test = 1;
if (bn->_test._test == BoolTest::le) {
l = cmp->in(2);
r = cmp->in(1);
flip_test = 2;
} else if (bn->_test._test != BoolTest::lt) {
return 0;
}
if (l->is_top()) return 0; // Top input means dead test
if (r->Opcode() != Op_LoadRange) return 0;
// We have recognized one of these forms:
// Flip 1: If (Bool[<] CmpU(l, LoadRange)) ...
// Flip 2: If (Bool[<=] CmpU(LoadRange, l)) ...
// Make sure it's a real range check by requiring an uncommon trap
// along the OOB path. Otherwise, it's possible that the user wrote
// something which optimized to look like a range check but behaves
// in some other way.
Node* iftrap = proj_out(flip_test == 2 ? true : false);
bool found_trap = false;
if (iftrap != NULL) {
Node* u = iftrap->unique_ctrl_out();
if (u != NULL) {
// It could be a merge point (Region) for uncommon trap.
if (u->is_Region()) {
Node* c = u->unique_ctrl_out();
if (c != NULL) {
iftrap = u;
u = c;
}
}
if (u->in(0) == iftrap && u->is_CallStaticJava()) {
int req = u->as_CallStaticJava()->uncommon_trap_request();
if (Deoptimization::trap_request_reason(req) ==
Deoptimization::Reason_range_check) {
found_trap = true;
}
}
}
}
if (!found_trap) return 0; // sorry, no cigar
// Look for index+offset form
Node* ind = l;
jint off = 0;
if (l->is_top()) {
return 0;
} else if (l->Opcode() == Op_AddI) {
if ((off = l->in(1)->find_int_con(0)) != 0) {
ind = l->in(2);
} else if ((off = l->in(2)->find_int_con(0)) != 0) {
ind = l->in(1);
}
} else if ((off = l->find_int_con(-1)) >= 0) {
// constant offset with no variable index
ind = NULL;
} else {
// variable index with no constant offset (or dead negative index)
off = 0;
}
// Return all the values:
index = ind;
offset = off;
range = r;
return flip_test;
}
//------------------------------adjust_check-----------------------------------
// Adjust (widen) a prior range check
static void adjust_check(Node* proj, Node* range, Node* index,
int flip, jint off_lo, PhaseIterGVN* igvn) {
PhaseGVN *gvn = igvn;
// Break apart the old check
Node *iff = proj->in(0);
Node *bol = iff->in(1);
if( bol->is_top() ) return; // In case a partially dead range check appears
// bail (or bomb[ASSERT/DEBUG]) if NOT projection-->IfNode-->BoolNode
DEBUG_ONLY( if( !bol->is_Bool() ) { proj->dump(3); fatal("Expect projection-->IfNode-->BoolNode"); } )
if( !bol->is_Bool() ) return;
Node *cmp = bol->in(1);
// Compute a new check
Node *new_add = gvn->intcon(off_lo);
if( index ) {
new_add = off_lo ? gvn->transform(new (gvn->C) AddINode( index, new_add )) : index;
}
Node *new_cmp = (flip == 1)
? new (gvn->C) CmpUNode( new_add, range )
: new (gvn->C) CmpUNode( range, new_add );
new_cmp = gvn->transform(new_cmp);
// See if no need to adjust the existing check
if( new_cmp == cmp ) return;
// Else, adjust existing check
Node *new_bol = gvn->transform( new (gvn->C) BoolNode( new_cmp, bol->as_Bool()->_test._test ) );
igvn->rehash_node_delayed( iff );
iff->set_req_X( 1, new_bol, igvn );
}
//------------------------------up_one_dom-------------------------------------
// Walk up the dominator tree one step. Return NULL at root or true
// complex merges. Skips through small diamonds.
Node* IfNode::up_one_dom(Node *curr, bool linear_only) {
Node *dom = curr->in(0);
if( !dom ) // Found a Region degraded to a copy?
return curr->nonnull_req(); // Skip thru it
if( curr != dom ) // Normal walk up one step?
return dom;
// Use linear_only if we are still parsing, since we cannot
// trust the regions to be fully filled in.
if (linear_only)
return NULL;
if( dom->is_Root() )
return NULL;
// Else hit a Region. Check for a loop header
if( dom->is_Loop() )
return dom->in(1); // Skip up thru loops
// Check for small diamonds
Node *din1, *din2, *din3, *din4;
if( dom->req() == 3 && // 2-path merge point
(din1 = dom ->in(1)) && // Left path exists
(din2 = dom ->in(2)) && // Right path exists
(din3 = din1->in(0)) && // Left path up one
(din4 = din2->in(0)) ) { // Right path up one
if( din3->is_Call() && // Handle a slow-path call on either arm
(din3 = din3->in(0)) )
din3 = din3->in(0);
if( din4->is_Call() && // Handle a slow-path call on either arm
(din4 = din4->in(0)) )
din4 = din4->in(0);
if (din3 != NULL && din3 == din4 && din3->is_If()) // Regions not degraded to a copy
return din3; // Skip around diamonds
}
// Give up the search at true merges
return NULL; // Dead loop? Or hit root?
}
//------------------------------filtered_int_type--------------------------------
// Return a possibly more restrictive type for val based on condition control flow for an if
const TypeInt* IfNode::filtered_int_type(PhaseGVN* gvn, Node *val, Node* if_proj) {
assert(if_proj &&
(if_proj->Opcode() == Op_IfTrue || if_proj->Opcode() == Op_IfFalse), "expecting an if projection");
if (if_proj->in(0) && if_proj->in(0)->is_If()) {
IfNode* iff = if_proj->in(0)->as_If();
if (iff->in(1) && iff->in(1)->is_Bool()) {
BoolNode* bol = iff->in(1)->as_Bool();
if (bol->in(1) && bol->in(1)->is_Cmp()) {
const CmpNode* cmp = bol->in(1)->as_Cmp();
if (cmp->in(1) == val) {
const TypeInt* cmp2_t = gvn->type(cmp->in(2))->isa_int();
if (cmp2_t != NULL) {
jint lo = cmp2_t->_lo;
jint hi = cmp2_t->_hi;
BoolTest::mask msk = if_proj->Opcode() == Op_IfTrue ? bol->_test._test : bol->_test.negate();
switch (msk) {
case BoolTest::ne:
// Can't refine type
return NULL;
case BoolTest::eq:
return cmp2_t;
case BoolTest::lt:
lo = TypeInt::INT->_lo;
if (hi - 1 < hi) {
hi = hi - 1;
}
break;
case BoolTest::le:
lo = TypeInt::INT->_lo;
break;
case BoolTest::gt:
if (lo + 1 > lo) {
lo = lo + 1;
}
hi = TypeInt::INT->_hi;
break;
case BoolTest::ge:
// lo unchanged
hi = TypeInt::INT->_hi;
break;
}
const TypeInt* rtn_t = TypeInt::make(lo, hi, cmp2_t->_widen);
return rtn_t;
}
}
}
}
}
return NULL;
}
//------------------------------fold_compares----------------------------
// See if a pair of CmpIs can be converted into a CmpU. In some cases
// the direction of this if is determined by the preceding if so it
// can be eliminate entirely. Given an if testing (CmpI n c) check
// for an immediately control dependent if that is testing (CmpI n c2)
// and has one projection leading to this if and the other projection
// leading to a region that merges one of this ifs control
// projections.
//
// If
// / |
// / |
// / |
// If |
// /\ |
// / \ |
// / \ |
// / Region
//
Node* IfNode::fold_compares(PhaseGVN* phase) {
if (Opcode() != Op_If) return NULL;
Node* this_cmp = in(1)->in(1);
if (this_cmp != NULL && this_cmp->Opcode() == Op_CmpI &&
this_cmp->in(2)->is_Con() && this_cmp->in(2) != phase->C->top()) {
Node* ctrl = in(0);
BoolNode* this_bool = in(1)->as_Bool();
Node* n = this_cmp->in(1);
int hi = this_cmp->in(2)->get_int();
if (ctrl != NULL && ctrl->is_Proj() && ctrl->outcnt() == 1 &&
ctrl->in(0)->is_If() &&
ctrl->in(0)->outcnt() == 2 &&
ctrl->in(0)->in(1)->is_Bool() &&
ctrl->in(0)->in(1)->in(1)->Opcode() == Op_CmpI &&
ctrl->in(0)->in(1)->in(1)->in(2)->is_Con() &&
ctrl->in(0)->in(1)->in(1)->in(2) != phase->C->top() &&
ctrl->in(0)->in(1)->in(1)->in(1) == n) {
IfNode* dom_iff = ctrl->in(0)->as_If();
Node* otherproj = dom_iff->proj_out(!ctrl->as_Proj()->_con);
if (otherproj->outcnt() == 1 && otherproj->unique_out()->is_Region() &&
this_bool->_test._test != BoolTest::ne && this_bool->_test._test != BoolTest::eq) {
// Identify which proj goes to the region and which continues on
RegionNode* region = otherproj->unique_out()->as_Region();
Node* success = NULL;
Node* fail = NULL;
for (int i = 0; i < 2; i++) {
Node* proj = proj_out(i);
if (success == NULL && proj->outcnt() == 1 && proj->unique_out() == region) {
success = proj;
} else if (fail == NULL) {
fail = proj;
} else {
success = fail = NULL;
}
}
if (success != NULL && fail != NULL && !region->has_phi()) {
int lo = dom_iff->in(1)->in(1)->in(2)->get_int();
BoolNode* dom_bool = dom_iff->in(1)->as_Bool();
Node* dom_cmp = dom_bool->in(1);
const TypeInt* failtype = filtered_int_type(phase, n, ctrl);
if (failtype != NULL) {
const TypeInt* type2 = filtered_int_type(phase, n, fail);
if (type2 != NULL) {
failtype = failtype->join(type2)->is_int();
} else {
failtype = NULL;
}
}
if (failtype != NULL &&
dom_bool->_test._test != BoolTest::ne && dom_bool->_test._test != BoolTest::eq) {
int bound = failtype->_hi - failtype->_lo + 1;
if (failtype->_hi != max_jint && failtype->_lo != min_jint && bound > 1) {
// Merge the two compares into a single unsigned compare by building (CmpU (n - lo) hi)
BoolTest::mask cond = fail->as_Proj()->_con ? BoolTest::lt : BoolTest::ge;
Node* adjusted = phase->transform(new (phase->C) SubINode(n, phase->intcon(failtype->_lo)));
Node* newcmp = phase->transform(new (phase->C) CmpUNode(adjusted, phase->intcon(bound)));
Node* newbool = phase->transform(new (phase->C) BoolNode(newcmp, cond));
phase->is_IterGVN()->replace_input_of(dom_iff, 1, phase->intcon(ctrl->as_Proj()->_con));
phase->hash_delete(this);
set_req(1, newbool);
return this;
}
if (failtype->_lo > failtype->_hi) {
// previous if determines the result of this if so
// replace Bool with constant
phase->hash_delete(this);
set_req(1, phase->intcon(success->as_Proj()->_con));
return this;
}
}
}
}
}
}
return NULL;
}
//------------------------------remove_useless_bool----------------------------
// Check for people making a useless boolean: things like
// if( (x < y ? true : false) ) { ... }
// Replace with if( x < y ) { ... }
static Node *remove_useless_bool(IfNode *iff, PhaseGVN *phase) {
Node *i1 = iff->in(1);
if( !i1->is_Bool() ) return NULL;
BoolNode *bol = i1->as_Bool();
Node *cmp = bol->in(1);
if( cmp->Opcode() != Op_CmpI ) return NULL;
// Must be comparing against a bool
const Type *cmp2_t = phase->type( cmp->in(2) );
if( cmp2_t != TypeInt::ZERO &&
cmp2_t != TypeInt::ONE )
return NULL;
// Find a prior merge point merging the boolean
i1 = cmp->in(1);
if( !i1->is_Phi() ) return NULL;
PhiNode *phi = i1->as_Phi();
if( phase->type( phi ) != TypeInt::BOOL )
return NULL;
// Check for diamond pattern
int true_path = phi->is_diamond_phi();
if( true_path == 0 ) return NULL;
// Make sure that iff and the control of the phi are different. This
// should really only happen for dead control flow since it requires
// an illegal cycle.
if (phi->in(0)->in(1)->in(0) == iff) return NULL;
// phi->region->if_proj->ifnode->bool->cmp
BoolNode *bol2 = phi->in(0)->in(1)->in(0)->in(1)->as_Bool();
// Now get the 'sense' of the test correct so we can plug in
// either iff2->in(1) or its complement.
int flip = 0;
if( bol->_test._test == BoolTest::ne ) flip = 1-flip;
else if( bol->_test._test != BoolTest::eq ) return NULL;
if( cmp2_t == TypeInt::ZERO ) flip = 1-flip;
const Type *phi1_t = phase->type( phi->in(1) );
const Type *phi2_t = phase->type( phi->in(2) );
// Check for Phi(0,1) and flip
if( phi1_t == TypeInt::ZERO ) {
if( phi2_t != TypeInt::ONE ) return NULL;
flip = 1-flip;
} else {
// Check for Phi(1,0)
if( phi1_t != TypeInt::ONE ) return NULL;
if( phi2_t != TypeInt::ZERO ) return NULL;
}
if( true_path == 2 ) {
flip = 1-flip;
}
Node* new_bol = (flip ? phase->transform( bol2->negate(phase) ) : bol2);
assert(new_bol != iff->in(1), "must make progress");
iff->set_req(1, new_bol);
// Intervening diamond probably goes dead
phase->C->set_major_progress();
return iff;
}
static IfNode* idealize_test(PhaseGVN* phase, IfNode* iff);
struct RangeCheck {
Node* ctl;
jint off;
};
//------------------------------Ideal------------------------------------------
// Return a node which is more "ideal" than the current node. Strip out
// control copies
Node *IfNode::Ideal(PhaseGVN *phase, bool can_reshape) {
if (remove_dead_region(phase, can_reshape)) return this;
// No Def-Use info?
if (!can_reshape) return NULL;
PhaseIterGVN *igvn = phase->is_IterGVN();
// Don't bother trying to transform a dead if
if (in(0)->is_top()) return NULL;
// Don't bother trying to transform an if with a dead test
if (in(1)->is_top()) return NULL;
// Another variation of a dead test
if (in(1)->is_Con()) return NULL;
// Another variation of a dead if
if (outcnt() < 2) return NULL;
// Canonicalize the test.
Node* idt_if = idealize_test(phase, this);
if (idt_if != NULL) return idt_if;
// Try to split the IF
Node *s = split_if(this, igvn);
if (s != NULL) return s;
// Check for people making a useless boolean: things like
// if( (x < y ? true : false) ) { ... }
// Replace with if( x < y ) { ... }
Node *bol2 = remove_useless_bool(this, phase);
if( bol2 ) return bol2;
// Setup to scan up the CFG looking for a dominating test
Node *dom = in(0);
Node *prev_dom = this;
// Check for range-check vs other kinds of tests
Node *index1, *range1;
jint offset1;
int flip1 = is_range_check(range1, index1, offset1);
if( flip1 ) {
// Try to remove extra range checks. All 'up_one_dom' gives up at merges
// so all checks we inspect post-dominate the top-most check we find.
// If we are going to fail the current check and we reach the top check
// then we are guaranteed to fail, so just start interpreting there.
// We 'expand' the top 3 range checks to include all post-dominating
// checks.
// The top 3 range checks seen
const int NRC =3;
RangeCheck prev_checks[NRC];
int nb_checks = 0;
// Low and high offsets seen so far
jint off_lo = offset1;
jint off_hi = offset1;
bool found_immediate_dominator = false;
// Scan for the top checks and collect range of offsets
for (int dist = 0; dist < 999; dist++) { // Range-Check scan limit
if (dom->Opcode() == Op_If && // Not same opcode?
prev_dom->in(0) == dom) { // One path of test does dominate?
if (dom == this) return NULL; // dead loop
// See if this is a range check
Node *index2, *range2;
jint offset2;
int flip2 = dom->as_If()->is_range_check(range2, index2, offset2);
// See if this is a _matching_ range check, checking against
// the same array bounds.
if (flip2 == flip1 && range2 == range1 && index2 == index1 &&
dom->outcnt() == 2) {
if (nb_checks == 0 && dom->in(1) == in(1)) {
// Found an immediately dominating test at the same offset.
// This kind of back-to-back test can be eliminated locally,
// and there is no need to search further for dominating tests.
assert(offset2 == offset1, "Same test but different offsets");
found_immediate_dominator = true;
break;
}
// Gather expanded bounds
off_lo = MIN2(off_lo,offset2);
off_hi = MAX2(off_hi,offset2);
// Record top NRC range checks
prev_checks[nb_checks%NRC].ctl = prev_dom;
prev_checks[nb_checks%NRC].off = offset2;
nb_checks++;
}
}
prev_dom = dom;
dom = up_one_dom(dom);
if (!dom) break;
}
if (!found_immediate_dominator) {
// Attempt to widen the dominating range check to cover some later
// ones. Since range checks "fail" by uncommon-trapping to the
// interpreter, widening a check can make us speculatively enter
// the interpreter. If we see range-check deopt's, do not widen!
if (!phase->C->allow_range_check_smearing()) return NULL;
// Didn't find prior covering check, so cannot remove anything.
if (nb_checks == 0) {
return NULL;
}
// Constant indices only need to check the upper bound.
// Non-constant indices must check both low and high.
int chk0 = (nb_checks - 1) % NRC;
if (index1) {
if (nb_checks == 1) {
return NULL;
} else {
// If the top range check's constant is the min or max of
// all constants we widen the next one to cover the whole
// range of constants.
RangeCheck rc0 = prev_checks[chk0];
int chk1 = (nb_checks - 2) % NRC;
RangeCheck rc1 = prev_checks[chk1];
if (rc0.off == off_lo) {
adjust_check(rc1.ctl, range1, index1, flip1, off_hi, igvn);
prev_dom = rc1.ctl;
} else if (rc0.off == off_hi) {
adjust_check(rc1.ctl, range1, index1, flip1, off_lo, igvn);
prev_dom = rc1.ctl;
} else {
// If the top test's constant is not the min or max of all
// constants, we need 3 range checks. We must leave the
// top test unchanged because widening it would allow the
// accesses it protects to successfully read/write out of
// bounds.
if (nb_checks == 2) {
return NULL;
}
int chk2 = (nb_checks - 3) % NRC;
RangeCheck rc2 = prev_checks[chk2];
// The top range check a+i covers interval: -a <= i < length-a
// The second range check b+i covers interval: -b <= i < length-b
if (rc1.off <= rc0.off) {
// if b <= a, we change the second range check to:
// -min_of_all_constants <= i < length-min_of_all_constants
// Together top and second range checks now cover:
// -min_of_all_constants <= i < length-a
// which is more restrictive than -b <= i < length-b:
// -b <= -min_of_all_constants <= i < length-a <= length-b
// The third check is then changed to:
// -max_of_all_constants <= i < length-max_of_all_constants
// so 2nd and 3rd checks restrict allowed values of i to:
// -min_of_all_constants <= i < length-max_of_all_constants
adjust_check(rc1.ctl, range1, index1, flip1, off_lo, igvn);
adjust_check(rc2.ctl, range1, index1, flip1, off_hi, igvn);
} else {
// if b > a, we change the second range check to:
// -max_of_all_constants <= i < length-max_of_all_constants
// Together top and second range checks now cover:
// -a <= i < length-max_of_all_constants
// which is more restrictive than -b <= i < length-b:
// -b < -a <= i < length-max_of_all_constants <= length-b
// The third check is then changed to:
// -max_of_all_constants <= i < length-max_of_all_constants
// so 2nd and 3rd checks restrict allowed values of i to:
// -min_of_all_constants <= i < length-max_of_all_constants
adjust_check(rc1.ctl, range1, index1, flip1, off_hi, igvn);
adjust_check(rc2.ctl, range1, index1, flip1, off_lo, igvn);
}
prev_dom = rc2.ctl;
}
}
} else {
RangeCheck rc0 = prev_checks[chk0];
// 'Widen' the offset of the 1st and only covering check
adjust_check(rc0.ctl, range1, index1, flip1, off_hi, igvn);
// Test is now covered by prior checks, dominate it out
prev_dom = rc0.ctl;
}
}
} else { // Scan for an equivalent test
Node *cmp;
int dist = 0; // Cutoff limit for search
int op = Opcode();
if( op == Op_If &&
(cmp=in(1)->in(1))->Opcode() == Op_CmpP ) {
if( cmp->in(2) != NULL && // make sure cmp is not already dead
cmp->in(2)->bottom_type() == TypePtr::NULL_PTR ) {
dist = 64; // Limit for null-pointer scans
} else {
dist = 4; // Do not bother for random pointer tests
}
} else {
dist = 4; // Limit for random junky scans
}
// Normal equivalent-test check.
if( !dom ) return NULL; // Dead loop?
Node* result = fold_compares(phase);
if (result != NULL) {
return result;
}
// Search up the dominator tree for an If with an identical test
while( dom->Opcode() != op || // Not same opcode?
dom->in(1) != in(1) || // Not same input 1?
(req() == 3 && dom->in(2) != in(2)) || // Not same input 2?
prev_dom->in(0) != dom ) { // One path of test does not dominate?
if( dist < 0 ) return NULL;
dist--;
prev_dom = dom;
dom = up_one_dom( dom );
if( !dom ) return NULL;
}
// Check that we did not follow a loop back to ourselves
if( this == dom )
return NULL;
if( dist > 2 ) // Add to count of NULL checks elided
explicit_null_checks_elided++;
} // End of Else scan for an equivalent test
// Hit! Remove this IF
#ifndef PRODUCT
if( TraceIterativeGVN ) {
tty->print(" Removing IfNode: "); this->dump();
}
if( VerifyOpto && !phase->allow_progress() ) {
// Found an equivalent dominating test,
// we can not guarantee reaching a fix-point for these during iterativeGVN
// since intervening nodes may not change.
return NULL;
}
#endif
// Replace dominated IfNode
dominated_by( prev_dom, igvn );
// Must return either the original node (now dead) or a new node
// (Do not return a top here, since that would break the uniqueness of top.)
return new (phase->C) ConINode(TypeInt::ZERO);
}
//------------------------------dominated_by-----------------------------------
void IfNode::dominated_by( Node *prev_dom, PhaseIterGVN *igvn ) {
igvn->hash_delete(this); // Remove self to prevent spurious V-N
Node *idom = in(0);
// Need opcode to decide which way 'this' test goes
int prev_op = prev_dom->Opcode();
Node *top = igvn->C->top(); // Shortcut to top
// Loop predicates may have depending checks which should not
// be skipped. For example, range check predicate has two checks
// for lower and upper bounds.
ProjNode* unc_proj = proj_out(1 - prev_dom->as_Proj()->_con)->as_Proj();
if ((unc_proj != NULL) && (unc_proj->is_uncommon_trap_proj(Deoptimization::Reason_predicate))) {
prev_dom = idom;
}
// Now walk the current IfNode's projections.
// Loop ends when 'this' has no more uses.
for (DUIterator_Last imin, i = last_outs(imin); i >= imin; --i) {
Node *ifp = last_out(i); // Get IfTrue/IfFalse
igvn->add_users_to_worklist(ifp);
// Check which projection it is and set target.
// Data-target is either the dominating projection of the same type
// or TOP if the dominating projection is of opposite type.
// Data-target will be used as the new control edge for the non-CFG
// nodes like Casts and Loads.
Node *data_target = (ifp->Opcode() == prev_op) ? prev_dom : top;
// Control-target is just the If's immediate dominator or TOP.
Node *ctrl_target = (ifp->Opcode() == prev_op) ? idom : top;
// For each child of an IfTrue/IfFalse projection, reroute.
// Loop ends when projection has no more uses.
for (DUIterator_Last jmin, j = ifp->last_outs(jmin); j >= jmin; --j) {
Node* s = ifp->last_out(j); // Get child of IfTrue/IfFalse
if( !s->depends_only_on_test() ) {
// Find the control input matching this def-use edge.
// For Regions it may not be in slot 0.
uint l;
for( l = 0; s->in(l) != ifp; l++ ) { }
igvn->replace_input_of(s, l, ctrl_target);
} else { // Else, for control producers,
igvn->replace_input_of(s, 0, data_target); // Move child to data-target
}
} // End for each child of a projection
igvn->remove_dead_node(ifp);
} // End for each IfTrue/IfFalse child of If
// Kill the IfNode
igvn->remove_dead_node(this);
}
//------------------------------Identity---------------------------------------
// If the test is constant & we match, then we are the input Control
Node *IfTrueNode::Identity( PhaseTransform *phase ) {
// Can only optimize if cannot go the other way
const TypeTuple *t = phase->type(in(0))->is_tuple();
return ( t == TypeTuple::IFNEITHER || t == TypeTuple::IFTRUE )
? in(0)->in(0) // IfNode control
: this; // no progress
}
//------------------------------dump_spec--------------------------------------
#ifndef PRODUCT
void IfNode::dump_spec(outputStream *st) const {
st->print("P=%f, C=%f",_prob,_fcnt);
}
#endif
//------------------------------idealize_test----------------------------------
// Try to canonicalize tests better. Peek at the Cmp/Bool/If sequence and
// come up with a canonical sequence. Bools getting 'eq', 'gt' and 'ge' forms
// converted to 'ne', 'le' and 'lt' forms. IfTrue/IfFalse get swapped as
// needed.
static IfNode* idealize_test(PhaseGVN* phase, IfNode* iff) {
assert(iff->in(0) != NULL, "If must be live");
if (iff->outcnt() != 2) return NULL; // Malformed projections.
Node* old_if_f = iff->proj_out(false);
Node* old_if_t = iff->proj_out(true);
// CountedLoopEnds want the back-control test to be TRUE, irregardless of
// whether they are testing a 'gt' or 'lt' condition. The 'gt' condition
// happens in count-down loops
if (iff->is_CountedLoopEnd()) return NULL;
if (!iff->in(1)->is_Bool()) return NULL; // Happens for partially optimized IF tests
BoolNode *b = iff->in(1)->as_Bool();
BoolTest bt = b->_test;
// Test already in good order?
if( bt.is_canonical() )
return NULL;
// Flip test to be canonical. Requires flipping the IfFalse/IfTrue and
// cloning the IfNode.
Node* new_b = phase->transform( new (phase->C) BoolNode(b->in(1), bt.negate()) );
if( !new_b->is_Bool() ) return NULL;
b = new_b->as_Bool();
PhaseIterGVN *igvn = phase->is_IterGVN();
assert( igvn, "Test is not canonical in parser?" );
// The IF node never really changes, but it needs to be cloned
iff = new (phase->C) IfNode( iff->in(0), b, 1.0-iff->_prob, iff->_fcnt);
Node *prior = igvn->hash_find_insert(iff);
if( prior ) {
igvn->remove_dead_node(iff);
iff = (IfNode*)prior;
} else {
// Cannot call transform on it just yet
igvn->set_type_bottom(iff);
}
igvn->_worklist.push(iff);
// Now handle projections. Cloning not required.
Node* new_if_f = (Node*)(new (phase->C) IfFalseNode( iff ));
Node* new_if_t = (Node*)(new (phase->C) IfTrueNode ( iff ));
igvn->register_new_node_with_optimizer(new_if_f);
igvn->register_new_node_with_optimizer(new_if_t);
// Flip test, so flip trailing control
igvn->replace_node(old_if_f, new_if_t);
igvn->replace_node(old_if_t, new_if_f);
// Progress
return iff;
}
//------------------------------Identity---------------------------------------
// If the test is constant & we match, then we are the input Control
Node *IfFalseNode::Identity( PhaseTransform *phase ) {
// Can only optimize if cannot go the other way
const TypeTuple *t = phase->type(in(0))->is_tuple();
return ( t == TypeTuple::IFNEITHER || t == TypeTuple::IFFALSE )
? in(0)->in(0) // IfNode control
: this; // no progress
}
C:\hotspot-69087d08d473\src\share\vm/opto/indexSet.cpp
/*
* Copyright (c) 1998, 2011, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
#include "precompiled.hpp"
#include "memory/allocation.inline.hpp"
#include "opto/chaitin.hpp"
#include "opto/compile.hpp"
#include "opto/indexSet.hpp"
#include "opto/regmask.hpp"
// This file defines the IndexSet class, a set of sparse integer indices.
// This data structure is used by the compiler in its liveness analysis and
// during register allocation. It also defines an iterator for this class.
//-------------------------------- Initializations ------------------------------
IndexSet::BitBlock IndexSet::_empty_block = IndexSet::BitBlock();
#ifdef ASSERT
// Initialize statistics counters
julong IndexSet::_alloc_new = 0;
julong IndexSet::_alloc_total = 0;
julong IndexSet::_total_bits = 0;
julong IndexSet::_total_used_blocks = 0;
julong IndexSet::_total_unused_blocks = 0;
// Per set, or all sets operation tracing
int IndexSet::_serial_count = 1;
#endif
// What is the first set bit in a 5 bit integer?
const byte IndexSetIterator::_first_bit[32] = {
0, 0, 1, 0,
2, 0, 1, 0,
3, 0, 1, 0,
2, 0, 1, 0,
4, 0, 1, 0,
2, 0, 1, 0,
3, 0, 1, 0,
2, 0, 1, 0
};
// What is the second set bit in a 5 bit integer?
const byte IndexSetIterator::_second_bit[32] = {
5, 5, 5, 1,
5, 2, 2, 1,
5, 3, 3, 1,
3, 2, 2, 1,
5, 4, 4, 1,
4, 2, 2, 1,
4, 3, 3, 1,
3, 2, 2, 1
};
// I tried implementing the IndexSetIterator with a window_size of 8 and
// didn't seem to get a noticeable speedup. I am leaving in the tables
// in case we want to switch back.
/*const byte IndexSetIterator::_first_bit[256] = {
8, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
};
const byte IndexSetIterator::_second_bit[256] = {
8, 8, 8, 1, 8, 2, 2, 1, 8, 3, 3, 1, 3, 2, 2, 1,
8, 4, 4, 1, 4, 2, 2, 1, 4, 3, 3, 1, 3, 2, 2, 1,
8, 5, 5, 1, 5, 2, 2, 1, 5, 3, 3, 1, 3, 2, 2, 1,
5, 4, 4, 1, 4, 2, 2, 1, 4, 3, 3, 1, 3, 2, 2, 1,
8, 6, 6, 1, 6, 2, 2, 1, 6, 3, 3, 1, 3, 2, 2, 1,
6, 4, 4, 1, 4, 2, 2, 1, 4, 3, 3, 1, 3, 2, 2, 1,
6, 5, 5, 1, 5, 2, 2, 1, 5, 3, 3, 1, 3, 2, 2, 1,
5, 4, 4, 1, 4, 2, 2, 1, 4, 3, 3, 1, 3, 2, 2, 1,
8, 7, 7, 1, 7, 2, 2, 1, 7, 3, 3, 1, 3, 2, 2, 1,
7, 4, 4, 1, 4, 2, 2, 1, 4, 3, 3, 1, 3, 2, 2, 1,
7, 5, 5, 1, 5, 2, 2, 1, 5, 3, 3, 1, 3, 2, 2, 1,
5, 4, 4, 1, 4, 2, 2, 1, 4, 3, 3, 1, 3, 2, 2, 1,
7, 6, 6, 1, 6, 2, 2, 1, 6, 3, 3, 1, 3, 2, 2, 1,
6, 4, 4, 1, 4, 2, 2, 1, 4, 3, 3, 1, 3, 2, 2, 1,
6, 5, 5, 1, 5, 2, 2, 1, 5, 3, 3, 1, 3, 2, 2, 1,
5, 4, 4, 1, 4, 2, 2, 1, 4, 3, 3, 1, 3, 2, 2, 1
};*/
//---------------------------- IndexSet::populate_free_list() -----------------------------
// Populate the free BitBlock list with a batch of BitBlocks. The BitBlocks
// are 32 bit aligned.
void IndexSet::populate_free_list() {
Compile *compile = Compile::current();
BitBlock *free = (BitBlock*)compile->indexSet_free_block_list();
char *mem = (char*)arena()->Amalloc_4(sizeof(BitBlock) *
bitblock_alloc_chunk_size + 32);
// Align the pointer to a 32 bit boundary.
BitBlock *new_blocks = (BitBlock*)(((uintptr_t)mem + 32) & ~0x001F);
// Add the new blocks to the free list.
for (int i = 0; i < bitblock_alloc_chunk_size; i++) {
new_blocks->set_next(free);
free = new_blocks;
new_blocks++;
}
compile->set_indexSet_free_block_list(free);
#ifdef ASSERT
if (CollectIndexSetStatistics) {
inc_stat_counter(&_alloc_new, bitblock_alloc_chunk_size);
}
#endif
}
//---------------------------- IndexSet::alloc_block() ------------------------
// Allocate a BitBlock from the free list. If the free list is empty,
// prime it.
IndexSet::BitBlock *IndexSet::alloc_block() {
#ifdef ASSERT
if (CollectIndexSetStatistics) {
inc_stat_counter(&_alloc_total, 1);
}
#endif
Compile *compile = Compile::current();
BitBlock* free_list = (BitBlock*)compile->indexSet_free_block_list();
if (free_list == NULL) {
populate_free_list();
free_list = (BitBlock*)compile->indexSet_free_block_list();
}
BitBlock *block = free_list;
compile->set_indexSet_free_block_list(block->next());
block->clear();
return block;
}
//---------------------------- IndexSet::alloc_block_containing() -------------
// Allocate a new BitBlock and put it into the position in the _blocks array
// corresponding to element.
IndexSet::BitBlock *IndexSet::alloc_block_containing(uint element) {
BitBlock *block = alloc_block();
uint bi = get_block_index(element);
_blocks[bi] = block;
return block;
}
//---------------------------- IndexSet::free_block() -------------------------
// Add a BitBlock to the free list.
void IndexSet::free_block(uint i) {
debug_only(check_watch("free block", i));
assert(i < _max_blocks, "block index too large");
BitBlock *block = _blocks[i];
assert(block != &_empty_block, "cannot free the empty block");
block->set_next((IndexSet::BitBlock*)Compile::current()->indexSet_free_block_list());
Compile::current()->set_indexSet_free_block_list(block);
set_block(i,&_empty_block);
}
//------------------------------lrg_union--------------------------------------
// Compute the union of all elements of one and two which interfere with
// the RegMask mask. If the degree of the union becomes exceeds
// fail_degree, the union bails out. The underlying set is cleared before
// the union is performed.
uint IndexSet::lrg_union(uint lr1, uint lr2,
const uint fail_degree,
const PhaseIFG *ifg,
const RegMask &mask ) {
IndexSet *one = ifg->neighbors(lr1);
IndexSet *two = ifg->neighbors(lr2);
LRG &lrg1 = ifg->lrgs(lr1);
LRG &lrg2 = ifg->lrgs(lr2);
#ifdef ASSERT
assert(_max_elements == one->_max_elements, "max element mismatch");
check_watch("union destination");
one->check_watch("union source");
two->check_watch("union source");
#endif
// Compute the degree of the combined live-range. The combined
// live-range has the union of the original live-ranges' neighbors set as
// well as the neighbors of all intermediate copies, minus those neighbors
// that can not use the intersected allowed-register-set.
// Copy the larger set. Insert the smaller set into the larger.
if (two->count() > one->count()) {
IndexSet *temp = one;
one = two;
two = temp;
}
clear();
// Used to compute degree of register-only interferences. Infinite-stack
// neighbors do not alter colorability, as they can always color to some
// other color. (A variant of the Briggs assertion)
uint reg_degree = 0;
uint element;
// Load up the combined interference set with the neighbors of one
IndexSetIterator elements(one);
while ((element = elements.next()) != 0) {
LRG &lrg = ifg->lrgs(element);
if (mask.overlap(lrg.mask())) {
insert(element);
if( !lrg.mask().is_AllStack() ) {
reg_degree += lrg1.compute_degree(lrg);
if( reg_degree >= fail_degree ) return reg_degree;
} else {
// !!!!! Danger! No update to reg_degree despite having a neighbor.
// A variant of the Briggs assertion.
// Not needed if I simplify during coalesce, ala George/Appel.
assert( lrg.lo_degree(), "" );
}
}
}
// Add neighbors of two as well
IndexSetIterator elements2(two);
while ((element = elements2.next()) != 0) {
LRG &lrg = ifg->lrgs(element);
if (mask.overlap(lrg.mask())) {
if (insert(element)) {
if( !lrg.mask().is_AllStack() ) {
reg_degree += lrg2.compute_degree(lrg);
if( reg_degree >= fail_degree ) return reg_degree;
} else {
// !!!!! Danger! No update to reg_degree despite having a neighbor.
// A variant of the Briggs assertion.
// Not needed if I simplify during coalesce, ala George/Appel.
assert( lrg.lo_degree(), "" );
}
}
}
}
return reg_degree;
}
//---------------------------- IndexSet() -----------------------------
// A deep copy constructor. This is used when you need a scratch copy of this set.
IndexSet::IndexSet (IndexSet *set) {
#ifdef ASSERT
_serial_number = _serial_count++;
set->check_watch("copied", _serial_number);
check_watch("initialized by copy", set->_serial_number);
_max_elements = set->_max_elements;
#endif
_count = set->_count;
_max_blocks = set->_max_blocks;
if (_max_blocks <= preallocated_block_list_size) {
_blocks = _preallocated_block_list;
} else {
_blocks =
(IndexSet::BitBlock**) arena()->Amalloc_4(sizeof(IndexSet::BitBlock**) * _max_blocks);
}
for (uint i = 0; i < _max_blocks; i++) {
BitBlock *block = set->_blocks[i];
if (block == &_empty_block) {
set_block(i, &_empty_block);
} else {
BitBlock *new_block = alloc_block();
memcpy(new_block->words(), block->words(), sizeof(uint32) * words_per_block);
set_block(i, new_block);
}
}
}
//---------------------------- IndexSet::initialize() -----------------------------
// Prepare an IndexSet for use.
void IndexSet::initialize(uint max_elements) {
#ifdef ASSERT
_serial_number = _serial_count++;
check_watch("initialized", max_elements);
_max_elements = max_elements;
#endif
_count = 0;
_max_blocks = (max_elements + bits_per_block - 1) / bits_per_block;
if (_max_blocks <= preallocated_block_list_size) {
_blocks = _preallocated_block_list;
} else {
_blocks = (IndexSet::BitBlock**) arena()->Amalloc_4(sizeof(IndexSet::BitBlock**) * _max_blocks);
}
for (uint i = 0; i < _max_blocks; i++) {
set_block(i, &_empty_block);
}
}
//---------------------------- IndexSet::initialize()------------------------------
// Prepare an IndexSet for use. If it needs to allocate its _blocks array, it does
// so from the Arena passed as a parameter. BitBlock allocation is still done from
// the static Arena which was set with reset_memory().
void IndexSet::initialize(uint max_elements, Arena *arena) {
#ifdef ASSERT
_serial_number = _serial_count++;
check_watch("initialized2", max_elements);
_max_elements = max_elements;
#endif // ASSERT
_count = 0;
_max_blocks = (max_elements + bits_per_block - 1) / bits_per_block;
if (_max_blocks <= preallocated_block_list_size) {
_blocks = _preallocated_block_list;
} else {
_blocks = (IndexSet::BitBlock**) arena->Amalloc_4(sizeof(IndexSet::BitBlock**) * _max_blocks);
}
for (uint i = 0; i < _max_blocks; i++) {
set_block(i, &_empty_block);
}
}
//---------------------------- IndexSet::swap() -----------------------------
// Exchange two IndexSets.
void IndexSet::swap(IndexSet *set) {
#ifdef ASSERT
assert(_max_elements == set->_max_elements, "must have same universe size to swap");
check_watch("swap", set->_serial_number);
set->check_watch("swap", _serial_number);
#endif
for (uint i = 0; i < _max_blocks; i++) {
BitBlock *temp = _blocks[i];
set_block(i, set->_blocks[i]);
set->set_block(i, temp);
}
uint temp = _count;
_count = set->_count;
set->_count = temp;
}
//---------------------------- IndexSet::dump() -----------------------------
// Print this set. Used for debugging.
#ifndef PRODUCT
void IndexSet::dump() const {
IndexSetIterator elements(this);
tty->print("{");
uint i;
while ((i = elements.next()) != 0) {
tty->print("L%d ", i);
}
tty->print_cr("}");
}
#endif
#ifdef ASSERT
//---------------------------- IndexSet::tally_iteration_statistics() -----------------------------
// Update block/bit counts to reflect that this set has been iterated over.
void IndexSet::tally_iteration_statistics() const {
inc_stat_counter(&_total_bits, count());
for (uint i = 0; i < _max_blocks; i++) {
if (_blocks[i] != &_empty_block) {
inc_stat_counter(&_total_used_blocks, 1);
} else {
inc_stat_counter(&_total_unused_blocks, 1);
}
}
}
//---------------------------- IndexSet::print_statistics() -----------------------------
// Print statistics about IndexSet usage.
void IndexSet::print_statistics() {
julong total_blocks = _total_used_blocks + _total_unused_blocks;
tty->print_cr ("Accumulated IndexSet usage statistics:");
tty->print_cr ("--------------------------------------");
tty->print_cr (" Iteration:");
tty->print_cr (" blocks visited: " UINT64_FORMAT, total_blocks);
tty->print_cr (" blocks empty: %4.2f%%", 100.0*(double)_total_unused_blocks/total_blocks);
tty->print_cr (" bit density (bits/used blocks): %4.2f", (double)_total_bits/_total_used_blocks);
tty->print_cr (" bit density (bits/all blocks): %4.2f", (double)_total_bits/total_blocks);
tty->print_cr (" Allocation:");
tty->print_cr (" blocks allocated: " UINT64_FORMAT, _alloc_new);
tty->print_cr (" blocks used/reused: " UINT64_FORMAT, _alloc_total);
}
//---------------------------- IndexSet::verify() -----------------------------
// Expensive test of IndexSet sanity. Ensure that the count agrees with the
// number of bits in the blocks. Make sure the iterator is seeing all elements
// of the set. Meant for use during development.
void IndexSet::verify() const {
assert(!member(0), "zero cannot be a member");
uint count = 0;
uint i;
for (i = 1; i < _max_elements; i++) {
if (member(i)) {
count++;
assert(count <= _count, "_count is messed up");
}
}
IndexSetIterator elements(this);
count = 0;
while ((i = elements.next()) != 0) {
count++;
assert(member(i), "returned a non member");
assert(count <= _count, "iterator returned wrong number of elements");
}
}
#endif
//---------------------------- IndexSetIterator() -----------------------------
// Create an iterator for a set. If empty blocks are detected when iterating
// over the set, these blocks are replaced.
IndexSetIterator::IndexSetIterator(IndexSet *set) {
#ifdef ASSERT
if (CollectIndexSetStatistics) {
set->tally_iteration_statistics();
}
set->check_watch("traversed", set->count());
#endif
if (set->is_empty()) {
_current = 0;
_next_word = IndexSet::words_per_block;
_next_block = 1;
_max_blocks = 1;
// We don't need the following values when we iterate over an empty set.
// The commented out code is left here to document that the omission
// is intentional.
//
//_value = 0;
//_words = NULL;
//_blocks = NULL;
//_set = NULL;
} else {
_current = 0;
_value = 0;
_next_block = 0;
_next_word = IndexSet::words_per_block;
_max_blocks = set->_max_blocks;
_words = NULL;
_blocks = set->_blocks;
_set = set;
}
}
//---------------------------- IndexSetIterator(const) -----------------------------
// Iterate over a constant IndexSet.
IndexSetIterator::IndexSetIterator(const IndexSet *set) {
#ifdef ASSERT
if (CollectIndexSetStatistics) {
set->tally_iteration_statistics();
}
// We don't call check_watch from here to avoid bad recursion.
// set->check_watch("traversed const", set->count());
#endif
if (set->is_empty()) {
_current = 0;
_next_word = IndexSet::words_per_block;
_next_block = 1;
_max_blocks = 1;
// We don't need the following values when we iterate over an empty set.
// The commented out code is left here to document that the omission
// is intentional.
//
//_value = 0;
//_words = NULL;
//_blocks = NULL;
//_set = NULL;
} else {
_current = 0;
_value = 0;
_next_block = 0;
_next_word = IndexSet::words_per_block;
_max_blocks = set->_max_blocks;
_words = NULL;
_blocks = set->_blocks;
_set = NULL;
}
}
//---------------------------- List16Iterator::advance_and_next() -----------------------------
// Advance to the next non-empty word in the set being iterated over. Return the next element
// if there is one. If we are done, return 0. This method is called from the next() method
// when it gets done with a word.
uint IndexSetIterator::advance_and_next() {
// See if there is another non-empty word in the current block.
for (uint wi = _next_word; wi < (unsigned)IndexSet::words_per_block; wi++) {
if (_words[wi] != 0) {
// Found a non-empty word.
_value = ((_next_block - 1) * IndexSet::bits_per_block) + (wi * IndexSet::bits_per_word);
_current = _words[wi];
_next_word = wi+1;
return next();
}
}
// We ran out of words in the current block. Advance to next non-empty block.
for (uint bi = _next_block; bi < _max_blocks; bi++) {
if (_blocks[bi] != &IndexSet::_empty_block) {
// Found a non-empty block.
_words = _blocks[bi]->words();
for (uint wi = 0; wi < (unsigned)IndexSet::words_per_block; wi++) {
if (_words[wi] != 0) {
// Found a non-empty word.
_value = (bi * IndexSet::bits_per_block) + (wi * IndexSet::bits_per_word);
_current = _words[wi];
_next_block = bi+1;
_next_word = wi+1;
return next();
}
}
// All of the words in the block were empty. Replace
// the block with the empty block.
if (_set) {
_set->free_block(bi);
}
}
}
// These assignments make redundant calls to next on a finished iterator
// faster. Probably not necessary.
_next_block = _max_blocks;
_next_word = IndexSet::words_per_block;
// No more words.
return 0;
}
C:\hotspot-69087d08d473\src\share\vm/opto/indexSet.hpp
/*
* Copyright (c) 1998, 2011, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
#ifndef SHARE_VM_OPTO_INDEXSET_HPP
#define SHARE_VM_OPTO_INDEXSET_HPP
#include "memory/allocation.hpp"
#include "memory/resourceArea.hpp"
#include "opto/compile.hpp"
#include "opto/regmask.hpp"
// This file defines the IndexSet class, a set of sparse integer indices.
// This data structure is used by the compiler in its liveness analysis and
// during register allocation.
//-------------------------------- class IndexSet ----------------------------
// An IndexSet is a piece-wise bitvector. At the top level, we have an array
// of pointers to bitvector chunks called BitBlocks. Each BitBlock has a fixed
// size and is allocated from a shared free list. The bits which are set in
// each BitBlock correspond to the elements of the set.
class IndexSet : public ResourceObj {
friend class IndexSetIterator;
public:
// When we allocate an IndexSet, it starts off with an array of top level block
// pointers of a set length. This size is intended to be large enough for the
// majority of IndexSets. In the cases when this size is not large enough,
// a separately allocated array is used.
// The length of the preallocated top level block array
enum { preallocated_block_list_size = 16 };
// Elements of a IndexSet get decomposed into three fields. The highest order
// bits are the block index, which tell which high level block holds the element.
// Within that block, the word index indicates which word holds the element.
// Finally, the bit index determines which single bit within that word indicates
// membership of the element in the set.
// The lengths of the index bitfields
enum { bit_index_length = 5,
word_index_length = 3,
block_index_length = 8 // not used
};
// Derived constants used for manipulating the index bitfields
enum {
bit_index_offset = 0, // not used
word_index_offset = bit_index_length,
block_index_offset = bit_index_length + word_index_length,
bits_per_word = 1 << bit_index_length,
words_per_block = 1 << word_index_length,
bits_per_block = bits_per_word * words_per_block,
bit_index_mask = right_n_bits(bit_index_length),
word_index_mask = right_n_bits(word_index_length)
};
// These routines are used for extracting the block, word, and bit index
// from an element.
static uint get_block_index(uint element) {
return element >> block_index_offset;
}
static uint get_word_index(uint element) {
return mask_bits(element >> word_index_offset,word_index_mask);
}
static uint get_bit_index(uint element) {
return mask_bits(element,bit_index_mask);
}
//------------------------------ class BitBlock ----------------------------
// The BitBlock class is a segment of a bitvector set.
class BitBlock : public ResourceObj {
friend class IndexSetIterator;
friend class IndexSet;
private:
// All of BitBlocks fields and methods are declared private. We limit
// access to IndexSet and IndexSetIterator.
// A BitBlock is composed of some number of 32 bit words. When a BitBlock
// is not in use by any IndexSet, it is stored on a free list. The next field
// is used by IndexSet to mainting this free list.
union {
uint32 _words[words_per_block];
BitBlock *_next;
} _data;
// accessors
uint32 *words() { return _data._words; }
void set_next(BitBlock *next) { _data._next = next; }
BitBlock *next() { return _data._next; }
// Operations. A BitBlock supports four simple operations,
// clear(), member(), insert(), and remove(). These methods do
// not assume that the block index has been masked out.
void clear() {
memset(words(), 0, sizeof(uint32) * words_per_block);
}
bool member(uint element) {
uint word_index = IndexSet::get_word_index(element);
uint bit_index = IndexSet::get_bit_index(element);
return ((words()[word_index] & (uint32)(0x1 << bit_index)) != 0);
}
bool insert(uint element) {
uint word_index = IndexSet::get_word_index(element);
uint bit_index = IndexSet::get_bit_index(element);
uint32 bit = (0x1 << bit_index);
uint32 before = words()[word_index];
words()[word_index] = before | bit;
return ((before & bit) != 0);
}
bool remove(uint element) {
uint word_index = IndexSet::get_word_index(element);
uint bit_index = IndexSet::get_bit_index(element);
uint32 bit = (0x1 << bit_index);
uint32 before = words()[word_index];
words()[word_index] = before & ~bit;
return ((before & bit) != 0);
}
};
//-------------------------- BitBlock allocation ---------------------------
private:
// All IndexSets share an arena from which they allocate BitBlocks. Unused
// BitBlocks are placed on a free list.
// The number of BitBlocks to allocate at a time
enum { bitblock_alloc_chunk_size = 50 };
static Arena *arena() { return Compile::current()->indexSet_arena(); }
static void populate_free_list();
public:
// Invalidate the current free BitBlock list and begin allocation
// from a new arena. It is essential that this method is called whenever
// the Arena being used for BitBlock allocation is reset.
static void reset_memory(Compile* compile, Arena *arena) {
compile->set_indexSet_free_block_list(NULL);
compile->set_indexSet_arena(arena);
// This should probably be done in a static initializer
_empty_block.clear();
}
private:
friend class BitBlock;
// A distinguished BitBlock which always remains empty. When a new IndexSet is
// created, all of its top level BitBlock pointers are initialized to point to
// this.
static BitBlock _empty_block;
//-------------------------- Members ------------------------------------------
// The number of elements in the set
uint _count;
// Our top level array of bitvector segments
BitBlock **_blocks;
BitBlock *_preallocated_block_list[preallocated_block_list_size];
// The number of top level array entries in use
uint _max_blocks;
// Our assertions need to know the maximum number allowed in the set
#ifdef ASSERT
uint _max_elements;
#endif
// The next IndexSet on the free list (not used at same time as count)
IndexSet *_next;
public:
//-------------------------- Free list operations ------------------------------
// Individual IndexSets can be placed on a free list. This is done in PhaseLive.
IndexSet *next() {
#ifdef ASSERT
if( VerifyOpto ) {
check_watch("removed from free list?", ((_next == NULL) ? 0 : _next->_serial_number));
}
#endif
return _next;
}
void set_next(IndexSet *next) {
#ifdef ASSERT
if( VerifyOpto ) {
check_watch("put on free list?", ((next == NULL) ? 0 : next->_serial_number));
}
#endif
_next = next;
}
private:
//-------------------------- Utility methods -----------------------------------
// Get the block which holds element
BitBlock *get_block_containing(uint element) const {
assert(element < _max_elements, "element out of bounds");
return _blocks[get_block_index(element)];
}
// Set a block in the top level array
void set_block(uint index, BitBlock *block) {
#ifdef ASSERT
if( VerifyOpto )
check_watch("set block", index);
#endif
_blocks[index] = block;
}
// Get a BitBlock from the free list
BitBlock *alloc_block();
// Get a BitBlock from the free list and place it in the top level array
BitBlock *alloc_block_containing(uint element);
// Free a block from the top level array, placing it on the free BitBlock list
void free_block(uint i);
public:
//-------------------------- Primitive set operations --------------------------
void clear() {
#ifdef ASSERT
if( VerifyOpto )
check_watch("clear");
#endif
_count = 0;
for (uint i = 0; i < _max_blocks; i++) {
BitBlock *block = _blocks[i];
if (block != &_empty_block) {
free_block(i);
}
}
}
uint count() const { return _count; }
bool is_empty() const { return _count == 0; }
bool member(uint element) const {
return get_block_containing(element)->member(element);
}
bool insert(uint element) {
#ifdef ASSERT
if( VerifyOpto )
check_watch("insert", element);
#endif
if (element == 0) {
return 0;
}
BitBlock *block = get_block_containing(element);
if (block == &_empty_block) {
block = alloc_block_containing(element);
}
bool present = block->insert(element);
if (!present) {
_count++;
}
return !present;
}
bool remove(uint element) {
#ifdef ASSERT
if( VerifyOpto )
check_watch("remove", element);
#endif
BitBlock *block = get_block_containing(element);
bool present = block->remove(element);
if (present) {
_count--;
}
return present;
}
//-------------------------- Compound set operations ------------------------
// Compute the union of all elements of one and two which interfere
// with the RegMask mask. If the degree of the union becomes
// exceeds fail_degree, the union bails out. The underlying set is
// cleared before the union is performed.
uint lrg_union(uint lr1, uint lr2,
const uint fail_degree,
const class PhaseIFG *ifg,
const RegMask &mask);
//------------------------- Construction, initialization -----------------------
IndexSet() {}
// This constructor is used for making a deep copy of a IndexSet.
IndexSet(IndexSet *set);
// Perform initialization on a IndexSet
void initialize(uint max_element);
// Initialize a IndexSet. If the top level BitBlock array needs to be
// allocated, do it from the proffered arena. BitBlocks are still allocated
// from the static Arena member.
void initialize(uint max_element, Arena *arena);
// Exchange two sets
void swap(IndexSet *set);
//-------------------------- Debugging and statistics --------------------------
#ifndef PRODUCT
// Output a IndexSet for debugging
void dump() const;
#endif
#ifdef ASSERT
void tally_iteration_statistics() const;
// BitBlock allocation statistics
static julong _alloc_new;
static julong _alloc_total;
// Block density statistics
static julong _total_bits;
static julong _total_used_blocks;
static julong _total_unused_blocks;
// Sanity tests
void verify() const;
static int _serial_count;
int _serial_number;
// Check to see if the serial number of the current set is the one we're tracing.
// If it is, print a message.
void check_watch(const char *operation, uint operand) const {
if (IndexSetWatch != 0) {
if (IndexSetWatch == -1 || _serial_number == IndexSetWatch) {
tty->print_cr("IndexSet %d : %s ( %d )", _serial_number, operation, operand);
}
}
}
void check_watch(const char *operation) const {
if (IndexSetWatch != 0) {
if (IndexSetWatch == -1 || _serial_number == IndexSetWatch) {
tty->print_cr("IndexSet %d : %s", _serial_number, operation);
}
}
}
public:
static void print_statistics();
#endif
};
//-------------------------------- class IndexSetIterator --------------------
// An iterator for IndexSets.
class IndexSetIterator VALUE_OBJ_CLASS_SPEC {
friend class IndexSet;
public:
// We walk over the bits in a word in chunks of size window_size.
enum { window_size = 5,
window_mask = right_n_bits(window_size),
table_size = (1 << window_size) };
// For an integer of length window_size, what is the first set bit?
static const byte _first_bit[table_size];
// For an integer of length window_size, what is the second set bit?
static const byte _second_bit[table_size];
private:
// The current word we are inspecting
uint32 _current;
// What element number are we currently on?
uint _value;
// The index of the next word we will inspect
uint _next_word;
// A pointer to the contents of the current block
uint32 *_words;
// The index of the next block we will inspect
uint _next_block;
// A pointer to the blocks in our set
IndexSet::BitBlock **_blocks;
// The number of blocks in the set
uint _max_blocks;
// If the iterator was created from a non-const set, we replace
// non-canonical empty blocks with the _empty_block pointer. If
// _set is NULL, we do no replacement.
IndexSet *_set;
// Advance to the next non-empty word and return the next
// element in the set.
uint advance_and_next();
public:
// If an iterator is built from a constant set then empty blocks
// are not canonicalized.
IndexSetIterator(IndexSet *set);
IndexSetIterator(const IndexSet *set);
// Return the next element of the set. Return 0 when done.
uint next() {
uint current = _current;
if (current != 0) {
uint value = _value;
while (mask_bits(current,window_mask) == 0) {
current >>= window_size;
value += window_size;
}
uint advance = _second_bit[mask_bits(current,window_mask)];
_current = current >> advance;
_value = value + advance;
return value + _first_bit[mask_bits(current,window_mask)];
} else {
return advance_and_next();
}
}
};
#endif // SHARE_VM_OPTO_INDEXSET_HPP
C:\hotspot-69087d08d473\src\share\vm/opto/lcm.cpp
/*
* Copyright (c) 1998, 2016, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
#include "precompiled.hpp"
#include "memory/allocation.inline.hpp"
#include "opto/block.hpp"
#include "opto/c2compiler.hpp"
#include "opto/callnode.hpp"
#include "opto/cfgnode.hpp"
#include "opto/machnode.hpp"
#include "opto/runtime.hpp"
#if defined AD_MD_HPP
# include AD_MD_HPP
#elif defined TARGET_ARCH_MODEL_x86_32
# include "adfiles/ad_x86_32.hpp"
#elif defined TARGET_ARCH_MODEL_x86_64
# include "adfiles/ad_x86_64.hpp"
#elif defined TARGET_ARCH_MODEL_aarch64
# include "adfiles/ad_aarch64.hpp"
#elif defined TARGET_ARCH_MODEL_sparc
# include "adfiles/ad_sparc.hpp"
#elif defined TARGET_ARCH_MODEL_zero
# include "adfiles/ad_zero.hpp"
#elif defined TARGET_ARCH_MODEL_ppc_64
# include "adfiles/ad_ppc_64.hpp"
#endif
// Optimization - Graph Style
// Check whether val is not-null-decoded compressed oop,
// i.e. will grab into the base of the heap if it represents NULL.
static bool accesses_heap_base_zone(Node *val) {
if (Universe::narrow_oop_base() != NULL) { // Implies UseCompressedOops.
if (val && val->is_Mach()) {
if (val->as_Mach()->ideal_Opcode() == Op_DecodeN) {
// This assumes all Decodes with TypePtr::NotNull are matched to nodes that
// decode NULL to point to the heap base (Decode_NN).
if (val->bottom_type()->is_oopptr()->ptr() == TypePtr::NotNull) {
return true;
}
}
// Must recognize load operation with Decode matched in memory operand.
// We should not reach here exept for PPC/AIX, as os::zero_page_read_protected()
// returns true everywhere else. On PPC, no such memory operands
// exist, therefore we did not yet implement a check for such operands.
NOT_AIX(Unimplemented());
}
}
return false;
}
static bool needs_explicit_null_check_for_read(Node *val) {
// On some OSes (AIX) the page at address 0 is only write protected.
// If so, only Store operations will trap.
if (os::zero_page_read_protected()) {
return false; // Implicit null check will work.
}
// Also a read accessing the base of a heap-based compressed heap will trap.
if (accesses_heap_base_zone(val) && // Hits the base zone page.
Universe::narrow_oop_use_implicit_null_checks()) { // Base zone page is protected.
return false;
}
return true;
}
//------------------------------implicit_null_check----------------------------
// Detect implicit-null-check opportunities. Basically, find NULL checks
// with suitable memory ops nearby. Use the memory op to do the NULL check.
// I can generate a memory op if there is not one nearby.
// The proj is the control projection for the not-null case.
// The val is the pointer being checked for nullness or
// decodeHeapOop_not_null node if it did not fold into address.
void PhaseCFG::implicit_null_check(Block* block, Node *proj, Node *val, int allowed_reasons) {
// Assume if null check need for 0 offset then always needed
// Intel solaris doesn't support any null checks yet and no
// mechanism exists (yet) to set the switches at an os_cpu level
if( !ImplicitNullChecks || MacroAssembler::needs_explicit_null_check(0)) return;
// Make sure the ptr-is-null path appears to be uncommon!
float f = block->end()->as_MachIf()->_prob;
if( proj->Opcode() == Op_IfTrue ) f = 1.0f - f;
if( f > PROB_UNLIKELY_MAG(4) ) return;
uint bidx = 0; // Capture index of value into memop
bool was_store; // Memory op is a store op
// Get the successor block for if the test ptr is non-null
Block* not_null_block; // this one goes with the proj
Block* null_block;
if (block->get_node(block->number_of_nodes()-1) == proj) {
null_block = block->_succs[0];
not_null_block = block->_succs[1];
} else {
assert(block->get_node(block->number_of_nodes()-2) == proj, "proj is one or the other");
not_null_block = block->_succs[0];
null_block = block->_succs[1];
}
while (null_block->is_Empty() == Block::empty_with_goto) {
null_block = null_block->_succs[0];
}
// Search the exception block for an uncommon trap.
// (See Parse::do_if and Parse::do_ifnull for the reason
// we need an uncommon trap. Briefly, we need a way to
// detect failure of this optimization, as in 6366351.)
{
bool found_trap = false;
for (uint i1 = 0; i1 < null_block->number_of_nodes(); i1++) {
Node* nn = null_block->get_node(i1);
if (nn->is_MachCall() &&
nn->as_MachCall()->entry_point() == SharedRuntime::uncommon_trap_blob()->entry_point()) {
const Type* trtype = nn->in(TypeFunc::Parms)->bottom_type();
if (trtype->isa_int() && trtype->is_int()->is_con()) {
jint tr_con = trtype->is_int()->get_con();
Deoptimization::DeoptReason reason = Deoptimization::trap_request_reason(tr_con);
Deoptimization::DeoptAction action = Deoptimization::trap_request_action(tr_con);
assert((int)reason < (int)BitsPerInt, "recode bit map");
if (is_set_nth_bit(allowed_reasons, (int) reason)
&& action != Deoptimization::Action_none) {
// This uncommon trap is sure to recompile, eventually.
// When that happens, C->too_many_traps will prevent
// this transformation from happening again.
found_trap = true;
}
}
break;
}
}
if (!found_trap) {
// We did not find an uncommon trap.
return;
}
}
// Check for decodeHeapOop_not_null node which did not fold into address
bool is_decoden = ((intptr_t)val) & 1;
val = (Node*)(((intptr_t)val) & ~1);
assert(!is_decoden || (val->in(0) == NULL) && val->is_Mach() &&
(val->as_Mach()->ideal_Opcode() == Op_DecodeN), "sanity");
// Search the successor block for a load or store who's base value is also
// the tested value. There may be several.
Node_List *out = new Node_List(Thread::current()->resource_area());
MachNode *best = NULL; // Best found so far
for (DUIterator i = val->outs(); val->has_out(i); i++) {
Node *m = val->out(i);
if( !m->is_Mach() ) continue;
MachNode *mach = m->as_Mach();
was_store = false;
int iop = mach->ideal_Opcode();
switch( iop ) {
case Op_LoadB:
case Op_LoadUB:
case Op_LoadUS:
case Op_LoadD:
case Op_LoadF:
case Op_LoadI:
case Op_LoadL:
case Op_LoadP:
case Op_LoadN:
case Op_LoadS:
case Op_LoadKlass:
case Op_LoadNKlass:
case Op_LoadRange:
case Op_LoadD_unaligned:
case Op_LoadL_unaligned:
assert(mach->in(2) == val, "should be address");
break;
case Op_StoreB:
case Op_StoreC:
case Op_StoreCM:
case Op_StoreD:
case Op_StoreF:
case Op_StoreI:
case Op_StoreL:
case Op_StoreP:
case Op_StoreN:
case Op_StoreNKlass:
was_store = true; // Memory op is a store op
// Stores will have their address in slot 2 (memory in slot 1).
// If the value being nul-checked is in another slot, it means we
// are storing the checked value, which does NOT check the value!
if( mach->in(2) != val ) continue;
break; // Found a memory op?
case Op_StrComp:
case Op_StrEquals:
case Op_StrIndexOf:
case Op_AryEq:
case Op_EncodeISOArray:
// Not a legit memory op for implicit null check regardless of
// embedded loads
continue;
default: // Also check for embedded loads
if( !mach->needs_anti_dependence_check() )
continue; // Not an memory op; skip it
if( must_clone[iop] ) {
// Do not move nodes which produce flags because
// RA will try to clone it to place near branch and
// it will cause recompilation, see clone_node().
continue;
}
{
// Check that value is used in memory address in
// instructions with embedded load (CmpP val1,(val2+off)).
Node* base;
Node* index;
const MachOper* oper = mach->memory_inputs(base, index);
if (oper == NULL || oper == (MachOper*)-1) {
continue; // Not an memory op; skip it
}
if (val == base ||
val == index && val->bottom_type()->isa_narrowoop()) {
break; // Found it
} else {
continue; // Skip it
}
}
break;
}
// On some OSes (AIX) the page at address 0 is only write protected.
// If so, only Store operations will trap.
// But a read accessing the base of a heap-based compressed heap will trap.
if (!was_store && needs_explicit_null_check_for_read(val)) {
continue;
}
// Check that node's control edge is not-null block's head or dominates it,
// otherwise we can't hoist it because there are other control dependencies.
Node* ctrl = mach->in(0);
if (ctrl != NULL && !(ctrl == not_null_block->head() ||
get_block_for_node(ctrl)->dominates(not_null_block))) {
continue;
}
// check if the offset is not too high for implicit exception
{
intptr_t offset = 0;
const TypePtr *adr_type = NULL; // Do not need this return value here
const Node* base = mach->get_base_and_disp(offset, adr_type);
if (base == NULL || base == NodeSentinel) {
// Narrow oop address doesn't have base, only index
if( val->bottom_type()->isa_narrowoop() &&
MacroAssembler::needs_explicit_null_check(offset) )
continue; // Give up if offset is beyond page size
// cannot reason about it; is probably not implicit null exception
} else {
const TypePtr* tptr;
if (UseCompressedOops && (Universe::narrow_oop_shift() == 0 ||
Universe::narrow_klass_shift() == 0)) {
// 32-bits narrow oop can be the base of address expressions
tptr = base->get_ptr_type();
} else {
// only regular oops are expected here
tptr = base->bottom_type()->is_ptr();
}
// Give up if offset is not a compile-time constant
if( offset == Type::OffsetBot || tptr->_offset == Type::OffsetBot )
continue;
offset += tptr->_offset; // correct if base is offseted
if( MacroAssembler::needs_explicit_null_check(offset) )
continue; // Give up is reference is beyond 4K page size
}
}
// Check ctrl input to see if the null-check dominates the memory op
Block *cb = get_block_for_node(mach);
cb = cb->_idom; // Always hoist at least 1 block
if( !was_store ) { // Stores can be hoisted only one block
while( cb->_dom_depth > (block->_dom_depth + 1))
cb = cb->_idom; // Hoist loads as far as we want
// The non-null-block should dominate the memory op, too. Live
// range spilling will insert a spill in the non-null-block if it is
// needs to spill the memory op for an implicit null check.
if (cb->_dom_depth == (block->_dom_depth + 1)) {
if (cb != not_null_block) continue;
cb = cb->_idom;
}
}
if( cb != block ) continue;
// Found a memory user; see if it can be hoisted to check-block
uint vidx = 0; // Capture index of value into memop
uint j;
for( j = mach->req()-1; j > 0; j-- ) {
if( mach->in(j) == val ) {
vidx = j;
// Ignore DecodeN val which could be hoisted to where needed.
if( is_decoden ) continue;
}
// Block of memory-op input
Block *inb = get_block_for_node(mach->in(j));
Block *b = block; // Start from nul check
while( b != inb && b->_dom_depth > inb->_dom_depth )
b = b->_idom; // search upwards for input
// See if input dominates null check
if( b != inb )
break;
}
if( j > 0 )
continue;
Block *mb = get_block_for_node(mach);
// Hoisting stores requires more checks for the anti-dependence case.
// Give up hoisting if we have to move the store past any load.
if( was_store ) {
Block *b = mb; // Start searching here for a local load
// mach use (faulting) trying to hoist
// n might be blocker to hoisting
while( b != block ) {
uint k;
for( k = 1; k < b->number_of_nodes(); k++ ) {
Node *n = b->get_node(k);
if( n->needs_anti_dependence_check() &&
n->in(LoadNode::Memory) == mach->in(StoreNode::Memory) )
break; // Found anti-dependent load
}
if( k < b->number_of_nodes() )
break; // Found anti-dependent load
// Make sure control does not do a merge (would have to check allpaths)
if( b->num_preds() != 2 ) break;
b = get_block_for_node(b->pred(1)); // Move up to predecessor block
}
if( b != block ) continue;
}
// Make sure this memory op is not already being used for a NullCheck
Node *e = mb->end();
if( e->is_MachNullCheck() && e->in(1) == mach )
continue; // Already being used as a NULL check
// Found a candidate! Pick one with least dom depth - the highest
// in the dom tree should be closest to the null check.
if (best == NULL || get_block_for_node(mach)->_dom_depth < get_block_for_node(best)->_dom_depth) {
best = mach;
bidx = vidx;
}
}
// No candidate!
if (best == NULL) {
return;
}
// ---- Found an implicit null check
extern int implicit_null_checks;
implicit_null_checks++;
if( is_decoden ) {
// Check if we need to hoist decodeHeapOop_not_null first.
Block *valb = get_block_for_node(val);
if( block != valb && block->_dom_depth < valb->_dom_depth ) {
// Hoist it up to the end of the test block.
valb->find_remove(val);
block->add_inst(val);
map_node_to_block(val, block);
// DecodeN on x86 may kill flags. Check for flag-killing projections
// that also need to be hoisted.
for (DUIterator_Fast jmax, j = val->fast_outs(jmax); j < jmax; j++) {
Node* n = val->fast_out(j);
if( n->is_MachProj() ) {
get_block_for_node(n)->find_remove(n);
block->add_inst(n);
map_node_to_block(n, block);
}
}
}
}
// Hoist the memory candidate up to the end of the test block.
Block *old_block = get_block_for_node(best);
old_block->find_remove(best);
block->add_inst(best);
map_node_to_block(best, block);
// Move the control dependence if it is pinned to not-null block.
// Don't change it in other cases: NULL or dominating control.
if (best->in(0) == not_null_block->head()) {
// Set it to control edge of null check.
best->set_req(0, proj->in(0)->in(0));
}
// Check for flag-killing projections that also need to be hoisted
// Should be DU safe because no edge updates.
for (DUIterator_Fast jmax, j = best->fast_outs(jmax); j < jmax; j++) {
Node* n = best->fast_out(j);
if( n->is_MachProj() ) {
get_block_for_node(n)->find_remove(n);
block->add_inst(n);
map_node_to_block(n, block);
}
}
// proj==Op_True --> ne test; proj==Op_False --> eq test.
// One of two graph shapes got matched:
// (IfTrue (If (Bool NE (CmpP ptr NULL))))
// (IfFalse (If (Bool EQ (CmpP ptr NULL))))
// NULL checks are always branch-if-eq. If we see a IfTrue projection
// then we are replacing a 'ne' test with a 'eq' NULL check test.
// We need to flip the projections to keep the same semantics.
if( proj->Opcode() == Op_IfTrue ) {
// Swap order of projections in basic block to swap branch targets
Node *tmp1 = block->get_node(block->end_idx()+1);
Node *tmp2 = block->get_node(block->end_idx()+2);
block->map_node(tmp2, block->end_idx()+1);
block->map_node(tmp1, block->end_idx()+2);
Node *tmp = new (C) Node(C->top()); // Use not NULL input
tmp1->replace_by(tmp);
tmp2->replace_by(tmp1);
tmp->replace_by(tmp2);
tmp->destruct();
}
// Remove the existing null check; use a new implicit null check instead.
// Since schedule-local needs precise def-use info, we need to correct
// it as well.
Node *old_tst = proj->in(0);
MachNode *nul_chk = new (C) MachNullCheckNode(old_tst->in(0),best,bidx);
block->map_node(nul_chk, block->end_idx());
map_node_to_block(nul_chk, block);
// Redirect users of old_test to nul_chk
for (DUIterator_Last i2min, i2 = old_tst->last_outs(i2min); i2 >= i2min; --i2)
old_tst->last_out(i2)->set_req(0, nul_chk);
// Clean-up any dead code
for (uint i3 = 0; i3 < old_tst->req(); i3++) {
Node* in = old_tst->in(i3);
old_tst->set_req(i3, NULL);
if (in->outcnt() == 0) {
// Remove dead input node
in->disconnect_inputs(NULL, C);
block->find_remove(in);
}
}
latency_from_uses(nul_chk);
latency_from_uses(best);
// insert anti-dependences to defs in this block
if (! best->needs_anti_dependence_check()) {
for (uint k = 1; k < block->number_of_nodes(); k++) {
Node *n = block->get_node(k);
if (n->needs_anti_dependence_check() &&
n->in(LoadNode::Memory) == best->in(StoreNode::Memory)) {
// Found anti-dependent load
insert_anti_dependences(block, n);
}
}
}
}
//------------------------------select-----------------------------------------
// Select a nice fellow from the worklist to schedule next. If there is only
// one choice, then use it. Projections take top priority for correctness
// reasons - if I see a projection, then it is next. There are a number of
// other special cases, for instructions that consume condition codes, et al.
// These are chosen immediately. Some instructions are required to immediately
// precede the last instruction in the block, and these are taken last. Of the
// remaining cases (most), choose the instruction with the greatest latency
// (that is, the most number of pseudo-cycles required to the end of the
// routine). If there is a tie, choose the instruction with the most inputs.
Node* PhaseCFG::select(Block* block, Node_List &worklist, GrowableArray<int> &ready_cnt, VectorSet &next_call, uint sched_slot) {
// If only a single entry on the stack, use it
uint cnt = worklist.size();
if (cnt == 1) {
Node *n = worklist[0];
worklist.map(0,worklist.pop());
return n;
}
uint choice = 0; // Bigger is most important
uint latency = 0; // Bigger is scheduled first
uint score = 0; // Bigger is better
int idx = -1; // Index in worklist
int cand_cnt = 0; // Candidate count
for( uint i=0; i<cnt; i++ ) { // Inspect entire worklist
// Order in worklist is used to break ties.
// See caller for how this is used to delay scheduling
// of induction variable increments to after the other
// uses of the phi are scheduled.
Node *n = worklist[i]; // Get Node on worklist
int iop = n->is_Mach() ? n->as_Mach()->ideal_Opcode() : 0;
if( n->is_Proj() || // Projections always win
n->Opcode()== Op_Con || // So does constant 'Top'
iop == Op_CreateEx || // Create-exception must start block
iop == Op_CheckCastPP
) {
worklist.map(i,worklist.pop());
return n;
}
// Final call in a block must be adjacent to 'catch'
Node *e = block->end();
if( e->is_Catch() && e->in(0)->in(0) == n )
continue;
// Memory op for an implicit null check has to be at the end of the block
if( e->is_MachNullCheck() && e->in(1) == n )
continue;
// Schedule IV increment last.
if (e->is_Mach() && e->as_Mach()->ideal_Opcode() == Op_CountedLoopEnd &&
e->in(1)->in(1) == n && n->is_iteratively_computed())
continue;
uint n_choice = 2;
// See if this instruction is consumed by a branch. If so, then (as the
// branch is the last instruction in the basic block) force it to the
// end of the basic block
if ( must_clone[iop] ) {
// See if any use is a branch
bool found_machif = false;
for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
Node* use = n->fast_out(j);
// The use is a conditional branch, make them adjacent
if (use->is_MachIf() && get_block_for_node(use) == block) {
found_machif = true;
break;
}
// More than this instruction pending for successor to be ready,
// don't choose this if other opportunities are ready
if (ready_cnt.at(use->_idx) > 1)
n_choice = 1;
}
// loop terminated, prefer not to use this instruction
if (found_machif)
continue;
}
// See if this has a predecessor that is "must_clone", i.e. sets the
// condition code. If so, choose this first
for (uint j = 0; j < n->req() ; j++) {
Node *inn = n->in(j);
if (inn) {
if (inn->is_Mach() && must_clone[inn->as_Mach()->ideal_Opcode()] ) {
n_choice = 3;
break;
}
}
}
// MachTemps should be scheduled last so they are near their uses
if (n->is_MachTemp()) {
n_choice = 1;
}
uint n_latency = get_latency_for_node(n);
uint n_score = n->req(); // Many inputs get high score to break ties
// Keep best latency found
cand_cnt++;
if (choice < n_choice ||
(choice == n_choice &&
((StressLCM && Compile::randomized_select(cand_cnt)) ||
(!StressLCM &&
(latency < n_latency ||
(latency == n_latency &&
(score < n_score))))))) {
choice = n_choice;
latency = n_latency;
score = n_score;
idx = i; // Also keep index in worklist
}
} // End of for all ready nodes in worklist
assert(idx >= 0, "index should be set");
Node *n = worklist[(uint)idx]; // Get the winner
worklist.map((uint)idx, worklist.pop()); // Compress worklist
return n;
}
//------------------------------set_next_call----------------------------------
void PhaseCFG::set_next_call(Block* block, Node* n, VectorSet& next_call) {
if( next_call.test_set(n->_idx) ) return;
for( uint i=0; i<n->len(); i++ ) {
Node *m = n->in(i);
if( !m ) continue; // must see all nodes in block that precede call
if (get_block_for_node(m) == block) {
set_next_call(block, m, next_call);
}
}
}
//------------------------------needed_for_next_call---------------------------
// Set the flag 'next_call' for each Node that is needed for the next call to
// be scheduled. This flag lets me bias scheduling so Nodes needed for the
// next subroutine call get priority - basically it moves things NOT needed
// for the next call till after the call. This prevents me from trying to
// carry lots of stuff live across a call.
void PhaseCFG::needed_for_next_call(Block* block, Node* this_call, VectorSet& next_call) {
// Find the next control-defining Node in this block
Node* call = NULL;
for (DUIterator_Fast imax, i = this_call->fast_outs(imax); i < imax; i++) {
Node* m = this_call->fast_out(i);
if (get_block_for_node(m) == block && // Local-block user
m != this_call && // Not self-start node
m->is_MachCall()) {
call = m;
break;
}
}
if (call == NULL) return; // No next call (e.g., block end is near)
// Set next-call for all inputs to this call
set_next_call(block, call, next_call);
}
//------------------------------add_call_kills-------------------------------------
// helper function that adds caller save registers to MachProjNode
static void add_call_kills(MachProjNode *proj, RegMask& regs, const char* save_policy, bool exclude_soe) {
// Fill in the kill mask for the call
for( OptoReg::Name r = OptoReg::Name(0); r < _last_Mach_Reg; r=OptoReg::add(r,1) ) {
if( !regs.Member(r) ) { // Not already defined by the call
// Save-on-call register?
if ((save_policy[r] == 'C') ||
(save_policy[r] == 'A') ||
((save_policy[r] == 'E') && exclude_soe)) {
proj->_rout.Insert(r);
}
}
}
}
//------------------------------sched_call-------------------------------------
uint PhaseCFG::sched_call(Block* block, uint node_cnt, Node_List& worklist, GrowableArray<int>& ready_cnt, MachCallNode* mcall, VectorSet& next_call) {
RegMask regs;
// Schedule all the users of the call right now. All the users are
// projection Nodes, so they must be scheduled next to the call.
// Collect all the defined registers.
for (DUIterator_Fast imax, i = mcall->fast_outs(imax); i < imax; i++) {
Node* n = mcall->fast_out(i);
assert( n->is_MachProj(), "" );
int n_cnt = ready_cnt.at(n->_idx)-1;
ready_cnt.at_put(n->_idx, n_cnt);
assert( n_cnt == 0, "" );
// Schedule next to call
block->map_node(n, node_cnt++);
// Collect defined registers
regs.OR(n->out_RegMask());
// Check for scheduling the next control-definer
if( n->bottom_type() == Type::CONTROL )
// Warm up next pile of heuristic bits
needed_for_next_call(block, n, next_call);
// Children of projections are now all ready
for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
Node* m = n->fast_out(j); // Get user
if(get_block_for_node(m) != block) {
continue;
}
if( m->is_Phi() ) continue;
int m_cnt = ready_cnt.at(m->_idx)-1;
ready_cnt.at_put(m->_idx, m_cnt);
if( m_cnt == 0 )
worklist.push(m);
}
}
// Act as if the call defines the Frame Pointer.
// Certainly the FP is alive and well after the call.
regs.Insert(_matcher.c_frame_pointer());
// Set all registers killed and not already defined by the call.
uint r_cnt = mcall->tf()->range()->cnt();
int op = mcall->ideal_Opcode();
MachProjNode *proj = new (C) MachProjNode( mcall, r_cnt+1, RegMask::Empty, MachProjNode::fat_proj );
map_node_to_block(proj, block);
block->insert_node(proj, node_cnt++);
// Select the right register save policy.
const char *save_policy = NULL;
switch (op) {
case Op_CallRuntime:
case Op_CallLeaf:
case Op_CallLeafNoFP:
// Calling C code so use C calling convention
save_policy = _matcher._c_reg_save_policy;
break;
case Op_CallStaticJava:
case Op_CallDynamicJava:
// Calling Java code so use Java calling convention
save_policy = _matcher._register_save_policy;
break;
default:
ShouldNotReachHere();
}
// When using CallRuntime mark SOE registers as killed by the call
// so values that could show up in the RegisterMap aren't live in a
// callee saved register since the register wouldn't know where to
// find them. CallLeaf and CallLeafNoFP are ok because they can't
// have debug info on them. Strictly speaking this only needs to be
// done for oops since idealreg2debugmask takes care of debug info
// references but there no way to handle oops differently than other
// pointers as far as the kill mask goes.
bool exclude_soe = op == Op_CallRuntime;
// If the call is a MethodHandle invoke, we need to exclude the
// register which is used to save the SP value over MH invokes from
// the mask. Otherwise this register could be used for
// deoptimization information.
if (op == Op_CallStaticJava) {
MachCallStaticJavaNode* mcallstaticjava = (MachCallStaticJavaNode*) mcall;
if (mcallstaticjava->_method_handle_invoke)
proj->_rout.OR(Matcher::method_handle_invoke_SP_save_mask());
}
add_call_kills(proj, regs, save_policy, exclude_soe);
return node_cnt;
}
//------------------------------schedule_local---------------------------------
// Topological sort within a block. Someday become a real scheduler.
bool PhaseCFG::schedule_local(Block* block, GrowableArray<int>& ready_cnt, VectorSet& next_call) {
// Already "sorted" are the block start Node (as the first entry), and
// the block-ending Node and any trailing control projections. We leave
// these alone. PhiNodes and ParmNodes are made to follow the block start
// Node. Everything else gets topo-sorted.
#ifndef PRODUCT
if (trace_opto_pipelining()) {
tty->print_cr("# --- schedule_local B%d, before: ---", block->_pre_order);
for (uint i = 0;i < block->number_of_nodes(); i++) {
tty->print("# ");
block->get_node(i)->fast_dump();
}
tty->print_cr("#");
}
#endif
// RootNode is already sorted
if (block->number_of_nodes() == 1) {
return true;
}
// Move PhiNodes and ParmNodes from 1 to cnt up to the start
uint node_cnt = block->end_idx();
uint phi_cnt = 1;
uint i;
for( i = 1; i<node_cnt; i++ ) { // Scan for Phi
Node *n = block->get_node(i);
if( n->is_Phi() || // Found a PhiNode or ParmNode
(n->is_Proj() && n->in(0) == block->head()) ) {
// Move guy at 'phi_cnt' to the end; makes a hole at phi_cnt
block->map_node(block->get_node(phi_cnt), i);
block->map_node(n, phi_cnt++); // swap Phi/Parm up front
} else { // All others
// Count block-local inputs to 'n'
uint cnt = n->len(); // Input count
uint local = 0;
for( uint j=0; j<cnt; j++ ) {
Node *m = n->in(j);
if( m && get_block_for_node(m) == block && !m->is_top() )
local++; // One more block-local input
}
ready_cnt.at_put(n->_idx, local); // Count em up
#ifdef ASSERT
if( UseConcMarkSweepGC || UseG1GC ) {
if( n->is_Mach() && n->as_Mach()->ideal_Opcode() == Op_StoreCM ) {
// Check the precedence edges
for (uint prec = n->req(); prec < n->len(); prec++) {
Node* oop_store = n->in(prec);
if (oop_store != NULL) {
assert(get_block_for_node(oop_store)->_dom_depth <= block->_dom_depth, "oop_store must dominate card-mark");
}
}
}
}
#endif
// A few node types require changing a required edge to a precedence edge
// before allocation.
if( n->is_Mach() && n->req() > TypeFunc::Parms &&
(n->as_Mach()->ideal_Opcode() == Op_MemBarAcquire ||
n->as_Mach()->ideal_Opcode() == Op_MemBarVolatile) ) {
// MemBarAcquire could be created without Precedent edge.
// del_req() replaces the specified edge with the last input edge
// and then removes the last edge. If the specified edge > number of
// edges the last edge will be moved outside of the input edges array
// and the edge will be lost. This is why this code should be
// executed only when Precedent (== TypeFunc::Parms) edge is present.
Node *x = n->in(TypeFunc::Parms);
if (x != NULL && get_block_for_node(x) == block && n->find_prec_edge(x) != -1) {
// Old edge to node within same block will get removed, but no precedence
// edge will get added because it already exists. Update ready count.
int cnt = ready_cnt.at(n->_idx);
assert(cnt > 1, err_msg("MemBar node %d must not get ready here", n->_idx));
ready_cnt.at_put(n->_idx, cnt-1);
}
n->del_req(TypeFunc::Parms);
n->add_prec(x);
}
}
}
for(uint i2=i; i2< block->number_of_nodes(); i2++ ) // Trailing guys get zapped count
ready_cnt.at_put(block->get_node(i2)->_idx, 0);
// All the prescheduled guys do not hold back internal nodes
uint i3;
for(i3 = 0; i3<phi_cnt; i3++ ) { // For all pre-scheduled
Node *n = block->get_node(i3); // Get pre-scheduled
for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
Node* m = n->fast_out(j);
if (get_block_for_node(m) == block) { // Local-block user
int m_cnt = ready_cnt.at(m->_idx)-1;
ready_cnt.at_put(m->_idx, m_cnt); // Fix ready count
}
}
}
Node_List delay;
// Make a worklist
Node_List worklist;
for(uint i4=i3; i4<node_cnt; i4++ ) { // Put ready guys on worklist
Node *m = block->get_node(i4);
if( !ready_cnt.at(m->_idx) ) { // Zero ready count?
if (m->is_iteratively_computed()) {
// Push induction variable increments last to allow other uses
// of the phi to be scheduled first. The select() method breaks
// ties in scheduling by worklist order.
delay.push(m);
} else if (m->is_Mach() && m->as_Mach()->ideal_Opcode() == Op_CreateEx) {
// Force the CreateEx to the top of the list so it's processed
// first and ends up at the start of the block.
worklist.insert(0, m);
} else {
worklist.push(m); // Then on to worklist!
}
}
}
while (delay.size()) {
Node* d = delay.pop();
worklist.push(d);
}
// Warm up the 'next_call' heuristic bits
needed_for_next_call(block, block->head(), next_call);
#ifndef PRODUCT
if (trace_opto_pipelining()) {
for (uint j=0; j< block->number_of_nodes(); j++) {
Node *n = block->get_node(j);
int idx = n->_idx;
tty->print("# ready cnt:%3d ", ready_cnt.at(idx));
tty->print("latency:%3d ", get_latency_for_node(n));
tty->print("%4d: %s\n", idx, n->Name());
}
}
#endif
uint max_idx = (uint)ready_cnt.length();
// Pull from worklist and schedule
while( worklist.size() ) { // Worklist is not ready
#ifndef PRODUCT
if (trace_opto_pipelining()) {
tty->print("# ready list:");
for( uint i=0; i<worklist.size(); i++ ) { // Inspect entire worklist
Node *n = worklist[i]; // Get Node on worklist
tty->print(" %d", n->_idx);
}
tty->cr();
}
#endif
// Select and pop a ready guy from worklist
Node* n = select(block, worklist, ready_cnt, next_call, phi_cnt);
block->map_node(n, phi_cnt++); // Schedule him next
#ifndef PRODUCT
if (trace_opto_pipelining()) {
tty->print("# select %d: %s", n->_idx, n->Name());
tty->print(", latency:%d", get_latency_for_node(n));
n->dump();
if (Verbose) {
tty->print("# ready list:");
for( uint i=0; i<worklist.size(); i++ ) { // Inspect entire worklist
Node *n = worklist[i]; // Get Node on worklist
tty->print(" %d", n->_idx);
}
tty->cr();
}
}
#endif
if( n->is_MachCall() ) {
MachCallNode *mcall = n->as_MachCall();
phi_cnt = sched_call(block, phi_cnt, worklist, ready_cnt, mcall, next_call);
continue;
}
if (n->is_Mach() && n->as_Mach()->has_call()) {
RegMask regs;
regs.Insert(_matcher.c_frame_pointer());
regs.OR(n->out_RegMask());
MachProjNode *proj = new (C) MachProjNode( n, 1, RegMask::Empty, MachProjNode::fat_proj );
map_node_to_block(proj, block);
block->insert_node(proj, phi_cnt++);
add_call_kills(proj, regs, _matcher._c_reg_save_policy, false);
}
// Children are now all ready
for (DUIterator_Fast i5max, i5 = n->fast_outs(i5max); i5 < i5max; i5++) {
Node* m = n->fast_out(i5); // Get user
if (get_block_for_node(m) != block) {
continue;
}
if( m->is_Phi() ) continue;
if (m->_idx >= max_idx) { // new node, skip it
assert(m->is_MachProj() && n->is_Mach() && n->as_Mach()->has_call(), "unexpected node types");
continue;
}
int m_cnt = ready_cnt.at(m->_idx)-1;
ready_cnt.at_put(m->_idx, m_cnt);
if( m_cnt == 0 )
worklist.push(m);
}
}
if( phi_cnt != block->end_idx() ) {
// did not schedule all. Retry, Bailout, or Die
if (C->subsume_loads() == true && !C->failing()) {
// Retry with subsume_loads == false
// If this is the first failure, the sentinel string will "stick"
// to the Compile object, and the C2Compiler will see it and retry.
C->record_failure(C2Compiler::retry_no_subsuming_loads());
} else {
assert(false, "graph should be schedulable");
}
// assert( phi_cnt == end_idx(), "did not schedule all" );
return false;
}
#ifndef PRODUCT
if (trace_opto_pipelining()) {
tty->print_cr("#");
tty->print_cr("# after schedule_local");
for (uint i = 0;i < block->number_of_nodes();i++) {
tty->print("# ");
block->get_node(i)->fast_dump();
}
tty->cr();
}
#endif
return true;
}
//--------------------------catch_cleanup_fix_all_inputs-----------------------
static void catch_cleanup_fix_all_inputs(Node *use, Node *old_def, Node *new_def) {
for (uint l = 0; l < use->len(); l++) {
if (use->in(l) == old_def) {
if (l < use->req()) {
use->set_req(l, new_def);
} else {
use->rm_prec(l);
use->add_prec(new_def);
l--;
}
}
}
}
//------------------------------catch_cleanup_find_cloned_def------------------
Node* PhaseCFG::catch_cleanup_find_cloned_def(Block *use_blk, Node *def, Block *def_blk, int n_clone_idx) {
assert( use_blk != def_blk, "Inter-block cleanup only");
// The use is some block below the Catch. Find and return the clone of the def
// that dominates the use. If there is no clone in a dominating block, then
// create a phi for the def in a dominating block.
// Find which successor block dominates this use. The successor
// blocks must all be single-entry (from the Catch only; I will have
// split blocks to make this so), hence they all dominate.
while( use_blk->_dom_depth > def_blk->_dom_depth+1 )
use_blk = use_blk->_idom;
// Find the successor
Node *fixup = NULL;
uint j;
for( j = 0; j < def_blk->_num_succs; j++ )
if( use_blk == def_blk->_succs[j] )
break;
if( j == def_blk->_num_succs ) {
// Block at same level in dom-tree is not a successor. It needs a
// PhiNode, the PhiNode uses from the def and IT's uses need fixup.
Node_Array inputs = new Node_List(Thread::current()->resource_area());
for(uint k = 1; k < use_blk->num_preds(); k++) {
Block* block = get_block_for_node(use_blk->pred(k));
inputs.map(k, catch_cleanup_find_cloned_def(block, def, def_blk, n_clone_idx));
}
// Check to see if the use_blk already has an identical phi inserted.
// If it exists, it will be at the first position since all uses of a
// def are processed together.
Node *phi = use_blk->get_node(1);
if( phi->is_Phi() ) {
fixup = phi;
for (uint k = 1; k < use_blk->num_preds(); k++) {
if (phi->in(k) != inputs[k]) {
// Not a match
fixup = NULL;
break;
}
}
}
// If an existing PhiNode was not found, make a new one.
if (fixup == NULL) {
Node *new_phi = PhiNode::make(use_blk->head(), def);
use_blk->insert_node(new_phi, 1);
map_node_to_block(new_phi, use_blk);
for (uint k = 1; k < use_blk->num_preds(); k++) {
new_phi->set_req(k, inputs[k]);
}
fixup = new_phi;
}
} else {
// Found the use just below the Catch. Make it use the clone.
fixup = use_blk->get_node(n_clone_idx);
}
return fixup;
}
//--------------------------catch_cleanup_intra_block--------------------------
// Fix all input edges in use that reference "def". The use is in the same
// block as the def and both have been cloned in each successor block.
static void catch_cleanup_intra_block(Node *use, Node *def, Block *blk, int beg, int n_clone_idx) {
// Both the use and def have been cloned. For each successor block,
// get the clone of the use, and make its input the clone of the def
// found in that block.
uint use_idx = blk->find_node(use);
uint offset_idx = use_idx - beg;
for( uint k = 0; k < blk->_num_succs; k++ ) {
// Get clone in each successor block
Block *sb = blk->_succs[k];
Node *clone = sb->get_node(offset_idx+1);
assert( clone->Opcode() == use->Opcode(), "" );
// Make use-clone reference the def-clone
catch_cleanup_fix_all_inputs(clone, def, sb->get_node(n_clone_idx));
}
}
//------------------------------catch_cleanup_inter_block---------------------
// Fix all input edges in use that reference "def". The use is in a different
// block than the def.
void PhaseCFG::catch_cleanup_inter_block(Node *use, Block *use_blk, Node *def, Block *def_blk, int n_clone_idx) {
if( !use_blk ) return; // Can happen if the use is a precedence edge
Node *new_def = catch_cleanup_find_cloned_def(use_blk, def, def_blk, n_clone_idx);
catch_cleanup_fix_all_inputs(use, def, new_def);
}
//------------------------------call_catch_cleanup-----------------------------
// If we inserted any instructions between a Call and his CatchNode,
// clone the instructions on all paths below the Catch.
void PhaseCFG::call_catch_cleanup(Block* block) {
// End of region to clone
uint end = block->end_idx();
if( !block->get_node(end)->is_Catch() ) return;
// Start of region to clone
uint beg = end;
while(!block->get_node(beg-1)->is_MachProj() ||
!block->get_node(beg-1)->in(0)->is_MachCall() ) {
beg--;
assert(beg > 0,"Catch cleanup walking beyond block boundary");
}
// Range of inserted instructions is [beg, end)
if( beg == end ) return;
// Clone along all Catch output paths. Clone area between the 'beg' and
// 'end' indices.
for( uint i = 0; i < block->_num_succs; i++ ) {
Block *sb = block->_succs[i];
// Clone the entire area; ignoring the edge fixup for now.
for( uint j = end; j > beg; j-- ) {
Node *clone = block->get_node(j-1)->clone();
sb->insert_node(clone, 1);
map_node_to_block(clone, sb);
if (clone->needs_anti_dependence_check()) {
insert_anti_dependences(sb, clone);
}
}
}
// Fixup edges. Check the def-use info per cloned Node
for(uint i2 = beg; i2 < end; i2++ ) {
uint n_clone_idx = i2-beg+1; // Index of clone of n in each successor block
Node *n = block->get_node(i2); // Node that got cloned
// Need DU safe iterator because of edge manipulation in calls.
Unique_Node_List *out = new Unique_Node_List(Thread::current()->resource_area());
for (DUIterator_Fast j1max, j1 = n->fast_outs(j1max); j1 < j1max; j1++) {
out->push(n->fast_out(j1));
}
uint max = out->size();
for (uint j = 0; j < max; j++) {// For all users
Node *use = out->pop();
Block *buse = get_block_for_node(use);
if( use->is_Phi() ) {
for( uint k = 1; k < use->req(); k++ )
if( use->in(k) == n ) {
Block* b = get_block_for_node(buse->pred(k));
Node *fixup = catch_cleanup_find_cloned_def(b, n, block, n_clone_idx);
use->set_req(k, fixup);
}
} else {
if (block == buse) {
catch_cleanup_intra_block(use, n, block, beg, n_clone_idx);
} else {
catch_cleanup_inter_block(use, buse, n, block, n_clone_idx);
}
}
} // End for all users
} // End of for all Nodes in cloned area
// Remove the now-dead cloned ops
for(uint i3 = beg; i3 < end; i3++ ) {
block->get_node(beg)->disconnect_inputs(NULL, C);
block->remove_node(beg);
}
// If the successor blocks have a CreateEx node, move it back to the top
for(uint i4 = 0; i4 < block->_num_succs; i4++ ) {
Block *sb = block->_succs[i4];
uint new_cnt = end - beg;
// Remove any newly created, but dead, nodes.
for( uint j = new_cnt; j > 0; j-- ) {
Node *n = sb->get_node(j);
if (n->outcnt() == 0 &&
(!n->is_Proj() || n->as_Proj()->in(0)->outcnt() == 1) ){
n->disconnect_inputs(NULL, C);
sb->remove_node(j);
new_cnt--;
}
}
// If any newly created nodes remain, move the CreateEx node to the top
if (new_cnt > 0) {
Node *cex = sb->get_node(1+new_cnt);
if( cex->is_Mach() && cex->as_Mach()->ideal_Opcode() == Op_CreateEx ) {
sb->remove_node(1+new_cnt);
sb->insert_node(cex, 1);
}
}
}
}
C:\hotspot-69087d08d473\src\share\vm/opto/library_call.cpp
/*
* Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
#include "precompiled.hpp"
#include "classfile/systemDictionary.hpp"
#include "classfile/vmSymbols.hpp"
#include "compiler/compileBroker.hpp"
#include "compiler/compileLog.hpp"
#include "jfr/support/jfrIntrinsics.hpp"
#include "oops/objArrayKlass.hpp"
#include "opto/addnode.hpp"
#include "opto/callGenerator.hpp"
#include "opto/cfgnode.hpp"
#include "opto/connode.hpp"
#include "opto/idealKit.hpp"
#include "opto/mathexactnode.hpp"
#include "opto/mulnode.hpp"
#include "opto/parse.hpp"
#include "opto/runtime.hpp"
#include "opto/subnode.hpp"
#include "prims/nativeLookup.hpp"
#include "runtime/sharedRuntime.hpp"
#include "utilities/macros.hpp"
class LibraryIntrinsic : public InlineCallGenerator {
// Extend the set of intrinsics known to the runtime:
public:
private:
bool _is_virtual;
bool _does_virtual_dispatch;
int8_t _predicates_count; // Intrinsic is predicated by several conditions
int8_t _last_predicate; // Last generated predicate
vmIntrinsics::ID _intrinsic_id;
public:
LibraryIntrinsic(ciMethod* m, bool is_virtual, int predicates_count, bool does_virtual_dispatch, vmIntrinsics::ID id)
: InlineCallGenerator(m),
_is_virtual(is_virtual),
_does_virtual_dispatch(does_virtual_dispatch),
_predicates_count((int8_t)predicates_count),
_last_predicate((int8_t)-1),
_intrinsic_id(id)
{
}
virtual bool is_intrinsic() const { return true; }
virtual bool is_virtual() const { return _is_virtual; }
virtual bool is_predicated() const { return _predicates_count > 0; }
virtual int predicates_count() const { return _predicates_count; }
virtual bool does_virtual_dispatch() const { return _does_virtual_dispatch; }
virtual JVMState* generate(JVMState* jvms);
virtual Node* generate_predicate(JVMState* jvms, int predicate);
vmIntrinsics::ID intrinsic_id() const { return _intrinsic_id; }
};
// Local helper class for LibraryIntrinsic:
class LibraryCallKit : public GraphKit {
private:
LibraryIntrinsic* _intrinsic; // the library intrinsic being called
Node* _result; // the result node, if any
int _reexecute_sp; // the stack pointer when bytecode needs to be reexecuted
const TypeOopPtr* sharpen_unsafe_type(Compile::AliasType* alias_type, const TypePtr *adr_type, bool is_native_ptr = false);
public:
LibraryCallKit(JVMState* jvms, LibraryIntrinsic* intrinsic)
: GraphKit(jvms),
_intrinsic(intrinsic),
_result(NULL)
{
// Check if this is a root compile. In that case we don't have a caller.
if (!jvms->has_method()) {
_reexecute_sp = sp();
} else {
// Find out how many arguments the interpreter needs when deoptimizing
// and save the stack pointer value so it can used by uncommon_trap.
// We find the argument count by looking at the declared signature.
bool ignored_will_link;
ciSignature* declared_signature = NULL;
ciMethod* ignored_callee = caller()->get_method_at_bci(bci(), ignored_will_link, &declared_signature);
const int nargs = declared_signature->arg_size_for_bc(caller()->java_code_at_bci(bci()));
_reexecute_sp = sp() + nargs; // "push" arguments back on stack
}
}
virtual LibraryCallKit* is_LibraryCallKit() const { return (LibraryCallKit*)this; }
ciMethod* caller() const { return jvms()->method(); }
int bci() const { return jvms()->bci(); }
LibraryIntrinsic* intrinsic() const { return _intrinsic; }
vmIntrinsics::ID intrinsic_id() const { return _intrinsic->intrinsic_id(); }
ciMethod* callee() const { return _intrinsic->method(); }
bool try_to_inline(int predicate);
Node* try_to_predicate(int predicate);
void push_result() {
// Push the result onto the stack.
if (!stopped() && result() != NULL) {
BasicType bt = result()->bottom_type()->basic_type();
push_node(bt, result());
}
}
private:
void fatal_unexpected_iid(vmIntrinsics::ID iid) {
fatal(err_msg_res("unexpected intrinsic %d: %s", iid, vmIntrinsics::name_at(iid)));
}
void set_result(Node* n) { assert(_result == NULL, "only set once"); _result = n; }
void set_result(RegionNode* region, PhiNode* value);
Node* result() { return _result; }
virtual int reexecute_sp() { return _reexecute_sp; }
// Helper functions to inline natives
Node* generate_guard(Node* test, RegionNode* region, float true_prob);
Node* generate_slow_guard(Node* test, RegionNode* region);
Node* generate_fair_guard(Node* test, RegionNode* region);
Node* generate_negative_guard(Node* index, RegionNode* region,
// resulting CastII of index:
Node* *pos_index = NULL);
Node* generate_nonpositive_guard(Node* index, bool never_negative,
// resulting CastII of index:
Node* *pos_index = NULL);
Node* generate_limit_guard(Node* offset, Node* subseq_length,
Node* array_length,
RegionNode* region);
Node* generate_current_thread(Node* &tls_output);
address basictype2arraycopy(BasicType t, Node *src_offset, Node *dest_offset,
bool disjoint_bases, const char* &name, bool dest_uninitialized);
Node* load_mirror_from_klass(Node* klass);
Node* load_klass_from_mirror_common(Node* mirror, bool never_see_null,
RegionNode* region, int null_path,
int offset);
Node* load_klass_from_mirror(Node* mirror, bool never_see_null,
RegionNode* region, int null_path) {
int offset = java_lang_Class::klass_offset_in_bytes();
return load_klass_from_mirror_common(mirror, never_see_null,
region, null_path,
offset);
}
Node* load_array_klass_from_mirror(Node* mirror, bool never_see_null,
RegionNode* region, int null_path) {
int offset = java_lang_Class::array_klass_offset_in_bytes();
return load_klass_from_mirror_common(mirror, never_see_null,
region, null_path,
offset);
}
Node* generate_access_flags_guard(Node* kls,
int modifier_mask, int modifier_bits,
RegionNode* region);
Node* generate_interface_guard(Node* kls, RegionNode* region);
Node* generate_array_guard(Node* kls, RegionNode* region) {
return generate_array_guard_common(kls, region, false, false);
}
Node* generate_non_array_guard(Node* kls, RegionNode* region) {
return generate_array_guard_common(kls, region, false, true);
}
Node* generate_objArray_guard(Node* kls, RegionNode* region) {
return generate_array_guard_common(kls, region, true, false);
}
Node* generate_non_objArray_guard(Node* kls, RegionNode* region) {
return generate_array_guard_common(kls, region, true, true);
}
Node* generate_array_guard_common(Node* kls, RegionNode* region,
bool obj_array, bool not_array);
Node* generate_virtual_guard(Node* obj_klass, RegionNode* slow_region);
CallJavaNode* generate_method_call(vmIntrinsics::ID method_id,
bool is_virtual = false, bool is_static = false);
CallJavaNode* generate_method_call_static(vmIntrinsics::ID method_id) {
return generate_method_call(method_id, false, true);
}
CallJavaNode* generate_method_call_virtual(vmIntrinsics::ID method_id) {
return generate_method_call(method_id, true, false);
}
Node * load_field_from_object(Node * fromObj, const char * fieldName, const char * fieldTypeString, bool is_exact, bool is_static);
Node* make_string_method_node(int opcode, Node* str1_start, Node* cnt1, Node* str2_start, Node* cnt2);
Node* make_string_method_node(int opcode, Node* str1, Node* str2);
bool inline_string_compareTo();
bool inline_string_indexOf();
Node* string_indexOf(Node* string_object, ciTypeArray* target_array, jint offset, jint cache_i, jint md2_i);
bool inline_string_equals();
Node* round_double_node(Node* n);
bool runtime_math(const TypeFunc* call_type, address funcAddr, const char* funcName);
bool inline_math_native(vmIntrinsics::ID id);
bool inline_trig(vmIntrinsics::ID id);
bool inline_math(vmIntrinsics::ID id);
template <typename OverflowOp>
bool inline_math_overflow(Node* arg1, Node* arg2);
void inline_math_mathExact(Node* math, Node* test);
bool inline_math_addExactI(bool is_increment);
bool inline_math_addExactL(bool is_increment);
bool inline_math_multiplyExactI();
bool inline_math_multiplyExactL();
bool inline_math_negateExactI();
bool inline_math_negateExactL();
bool inline_math_subtractExactI(bool is_decrement);
bool inline_math_subtractExactL(bool is_decrement);
bool inline_exp();
bool inline_pow();
Node* finish_pow_exp(Node* result, Node* x, Node* y, const TypeFunc* call_type, address funcAddr, const char* funcName);
bool inline_min_max(vmIntrinsics::ID id);
Node* generate_min_max(vmIntrinsics::ID id, Node* x, Node* y);
// This returns Type::AnyPtr, RawPtr, or OopPtr.
int classify_unsafe_addr(Node* &base, Node* &offset);
Node* make_unsafe_address(Node* base, Node* offset);
// Helper for inline_unsafe_access.
// Generates the guards that check whether the result of
// Unsafe.getObject should be recorded in an SATB log buffer.
void insert_pre_barrier(Node* base_oop, Node* offset, Node* pre_val, bool need_mem_bar);
bool inline_unsafe_access(bool is_native_ptr, bool is_store, BasicType type, bool is_volatile, bool is_unaligned);
bool inline_unsafe_prefetch(bool is_native_ptr, bool is_store, bool is_static);
static bool klass_needs_init_guard(Node* kls);
bool inline_unsafe_allocate();
bool inline_unsafe_copyMemory();
bool inline_native_currentThread();
#ifdef JFR_HAVE_INTRINSICS
bool inline_native_classID();
bool inline_native_getEventWriter();
#endif
bool inline_native_time_funcs(address method, const char* funcName);
bool inline_native_isInterrupted();
bool inline_native_Class_query(vmIntrinsics::ID id);
bool inline_native_subtype_check();
bool inline_native_newArray();
bool inline_native_getLength();
bool inline_array_copyOf(bool is_copyOfRange);
bool inline_array_equals();
void copy_to_clone(Node* obj, Node* alloc_obj, Node* obj_size, bool is_array, bool card_mark);
bool inline_native_clone(bool is_virtual);
bool inline_native_Reflection_getCallerClass();
// Helper function for inlining native object hash method
bool inline_native_hashcode(bool is_virtual, bool is_static);
bool inline_native_getClass();
// Helper functions for inlining arraycopy
bool inline_arraycopy();
void generate_arraycopy(const TypePtr* adr_type,
BasicType basic_elem_type,
Node* src, Node* src_offset,
Node* dest, Node* dest_offset,
Node* copy_length,
bool disjoint_bases = false,
bool length_never_negative = false,
RegionNode* slow_region = NULL);
AllocateArrayNode* tightly_coupled_allocation(Node* ptr,
RegionNode* slow_region);
void generate_clear_array(const TypePtr* adr_type,
Node* dest,
BasicType basic_elem_type,
Node* slice_off,
Node* slice_len,
Node* slice_end);
bool generate_block_arraycopy(const TypePtr* adr_type,
BasicType basic_elem_type,
AllocateNode* alloc,
Node* src, Node* src_offset,
Node* dest, Node* dest_offset,
Node* dest_size, bool dest_uninitialized);
void generate_slow_arraycopy(const TypePtr* adr_type,
Node* src, Node* src_offset,
Node* dest, Node* dest_offset,
Node* copy_length, bool dest_uninitialized);
Node* generate_checkcast_arraycopy(const TypePtr* adr_type,
Node* dest_elem_klass,
Node* src, Node* src_offset,
Node* dest, Node* dest_offset,
Node* copy_length, bool dest_uninitialized);
Node* generate_generic_arraycopy(const TypePtr* adr_type,
Node* src, Node* src_offset,
Node* dest, Node* dest_offset,
Node* copy_length, bool dest_uninitialized);
void generate_unchecked_arraycopy(const TypePtr* adr_type,
BasicType basic_elem_type,
bool disjoint_bases,
Node* src, Node* src_offset,
Node* dest, Node* dest_offset,
Node* copy_length, bool dest_uninitialized);
typedef enum { LS_xadd, LS_xchg, LS_cmpxchg } LoadStoreKind;
bool inline_unsafe_load_store(BasicType type, LoadStoreKind kind);
bool inline_unsafe_ordered_store(BasicType type);
bool inline_unsafe_fence(vmIntrinsics::ID id);
bool inline_fp_conversions(vmIntrinsics::ID id);
bool inline_number_methods(vmIntrinsics::ID id);
bool inline_reference_get();
bool inline_aescrypt_Block(vmIntrinsics::ID id);
bool inline_cipherBlockChaining_AESCrypt(vmIntrinsics::ID id);
Node* inline_cipherBlockChaining_AESCrypt_predicate(bool decrypting);
Node* get_key_start_from_aescrypt_object(Node* aescrypt_object);
Node* get_original_key_start_from_aescrypt_object(Node* aescrypt_object);
bool inline_ghash_processBlocks();
bool inline_sha_implCompress(vmIntrinsics::ID id);
bool inline_digestBase_implCompressMB(int predicate);
bool inline_sha_implCompressMB(Node* digestBaseObj, ciInstanceKlass* instklass_SHA,
bool long_state, address stubAddr, const char *stubName,
Node* src_start, Node* ofs, Node* limit);
Node* get_state_from_sha_object(Node *sha_object);
Node* get_state_from_sha5_object(Node *sha_object);
Node* inline_digestBase_implCompressMB_predicate(int predicate);
bool inline_encodeISOArray();
bool inline_updateCRC32();
bool inline_updateBytesCRC32();
bool inline_updateByteBufferCRC32();
bool inline_multiplyToLen();
bool inline_squareToLen();
bool inline_mulAdd();
bool inline_montgomeryMultiply();
bool inline_montgomerySquare();
bool inline_profileBoolean();
};
//---------------------------make_vm_intrinsic----------------------------
CallGenerator* Compile::make_vm_intrinsic(ciMethod* m, bool is_virtual) {
vmIntrinsics::ID id = m->intrinsic_id();
assert(id != vmIntrinsics::_none, "must be a VM intrinsic");
ccstr disable_intr = NULL;
if ((DisableIntrinsic[0] != '\0'
&& strstr(DisableIntrinsic, vmIntrinsics::name_at(id)) != NULL) ||
(method_has_option_value("DisableIntrinsic", disable_intr)
&& strstr(disable_intr, vmIntrinsics::name_at(id)) != NULL)) {
// disabled by a user request on the command line:
// example: -XX:DisableIntrinsic=_hashCode,_getClass
return NULL;
}
if (!m->is_loaded()) {
// do not attempt to inline unloaded methods
return NULL;
}
// Only a few intrinsics implement a virtual dispatch.
// They are expensive calls which are also frequently overridden.
if (is_virtual) {
switch (id) {
case vmIntrinsics::_hashCode:
case vmIntrinsics::_clone:
// OK, Object.hashCode and Object.clone intrinsics come in both flavors
break;
default:
return NULL;
}
}
// -XX:-InlineNatives disables nearly all intrinsics:
if (!InlineNatives) {
switch (id) {
case vmIntrinsics::_indexOf:
case vmIntrinsics::_compareTo:
case vmIntrinsics::_equals:
case vmIntrinsics::_equalsC:
case vmIntrinsics::_getAndAddInt:
case vmIntrinsics::_getAndAddLong:
case vmIntrinsics::_getAndSetInt:
case vmIntrinsics::_getAndSetLong:
case vmIntrinsics::_getAndSetObject:
case vmIntrinsics::_loadFence:
case vmIntrinsics::_storeFence:
case vmIntrinsics::_fullFence:
break; // InlineNatives does not control String.compareTo
case vmIntrinsics::_Reference_get:
break; // InlineNatives does not control Reference.get
default:
return NULL;
}
}
int predicates = 0;
bool does_virtual_dispatch = false;
switch (id) {
case vmIntrinsics::_compareTo:
if (!SpecialStringCompareTo) return NULL;
if (!Matcher::match_rule_supported(Op_StrComp)) return NULL;
break;
case vmIntrinsics::_indexOf:
if (!SpecialStringIndexOf) return NULL;
break;
case vmIntrinsics::_equals:
if (!SpecialStringEquals) return NULL;
if (!Matcher::match_rule_supported(Op_StrEquals)) return NULL;
break;
case vmIntrinsics::_equalsC:
if (!SpecialArraysEquals) return NULL;
if (!Matcher::match_rule_supported(Op_AryEq)) return NULL;
break;
case vmIntrinsics::_arraycopy:
if (!InlineArrayCopy) return NULL;
break;
case vmIntrinsics::_copyMemory:
if (StubRoutines::unsafe_arraycopy() == NULL) return NULL;
if (!InlineArrayCopy) return NULL;
break;
case vmIntrinsics::_hashCode:
if (!InlineObjectHash) return NULL;
does_virtual_dispatch = true;
break;
case vmIntrinsics::_clone:
does_virtual_dispatch = true;
case vmIntrinsics::_copyOf:
case vmIntrinsics::_copyOfRange:
if (!InlineObjectCopy) return NULL;
// These also use the arraycopy intrinsic mechanism:
if (!InlineArrayCopy) return NULL;
break;
case vmIntrinsics::_encodeISOArray:
if (!SpecialEncodeISOArray) return NULL;
if (!Matcher::match_rule_supported(Op_EncodeISOArray)) return NULL;
break;
case vmIntrinsics::_checkIndex:
// We do not intrinsify this. The optimizer does fine with it.
return NULL;
case vmIntrinsics::_getCallerClass:
if (!UseNewReflection) return NULL;
if (!InlineReflectionGetCallerClass) return NULL;
if (SystemDictionary::reflect_CallerSensitive_klass() == NULL) return NULL;
break;
case vmIntrinsics::_bitCount_i:
if (!Matcher::match_rule_supported(Op_PopCountI)) return NULL;
break;
case vmIntrinsics::_bitCount_l:
if (!Matcher::match_rule_supported(Op_PopCountL)) return NULL;
break;
case vmIntrinsics::_numberOfLeadingZeros_i:
if (!Matcher::match_rule_supported(Op_CountLeadingZerosI)) return NULL;
break;
case vmIntrinsics::_numberOfLeadingZeros_l:
if (!Matcher::match_rule_supported(Op_CountLeadingZerosL)) return NULL;
break;
case vmIntrinsics::_numberOfTrailingZeros_i:
if (!Matcher::match_rule_supported(Op_CountTrailingZerosI)) return NULL;
break;
case vmIntrinsics::_numberOfTrailingZeros_l:
if (!Matcher::match_rule_supported(Op_CountTrailingZerosL)) return NULL;
break;
case vmIntrinsics::_reverseBytes_c:
if (!Matcher::match_rule_supported(Op_ReverseBytesUS)) return NULL;
break;
case vmIntrinsics::_reverseBytes_s:
if (!Matcher::match_rule_supported(Op_ReverseBytesS)) return NULL;
break;
case vmIntrinsics::_reverseBytes_i:
if (!Matcher::match_rule_supported(Op_ReverseBytesI)) return NULL;
break;
case vmIntrinsics::_reverseBytes_l:
if (!Matcher::match_rule_supported(Op_ReverseBytesL)) return NULL;
break;
case vmIntrinsics::_Reference_get:
// Use the intrinsic version of Reference.get() so that the value in
// the referent field can be registered by the G1 pre-barrier code.
// Also add memory barrier to prevent commoning reads from this field
// across safepoint since GC can change it value.
break;
case vmIntrinsics::_compareAndSwapObject:
#ifdef _LP64
if (!UseCompressedOops && !Matcher::match_rule_supported(Op_CompareAndSwapP)) return NULL;
#endif
break;
case vmIntrinsics::_compareAndSwapLong:
if (!Matcher::match_rule_supported(Op_CompareAndSwapL)) return NULL;
break;
case vmIntrinsics::_getAndAddInt:
if (!Matcher::match_rule_supported(Op_GetAndAddI)) return NULL;
break;
case vmIntrinsics::_getAndAddLong:
if (!Matcher::match_rule_supported(Op_GetAndAddL)) return NULL;
break;
case vmIntrinsics::_getAndSetInt:
if (!Matcher::match_rule_supported(Op_GetAndSetI)) return NULL;
break;
case vmIntrinsics::_getAndSetLong:
if (!Matcher::match_rule_supported(Op_GetAndSetL)) return NULL;
break;
case vmIntrinsics::_getAndSetObject:
#ifdef _LP64
if (!UseCompressedOops && !Matcher::match_rule_supported(Op_GetAndSetP)) return NULL;
if (UseCompressedOops && !Matcher::match_rule_supported(Op_GetAndSetN)) return NULL;
break;
#else
if (!Matcher::match_rule_supported(Op_GetAndSetP)) return NULL;
break;
#endif
case vmIntrinsics::_aescrypt_encryptBlock:
case vmIntrinsics::_aescrypt_decryptBlock:
if (!UseAESIntrinsics) return NULL;
break;
case vmIntrinsics::_multiplyToLen:
if (!UseMultiplyToLenIntrinsic) return NULL;
break;
case vmIntrinsics::_squareToLen:
if (!UseSquareToLenIntrinsic) return NULL;
break;
case vmIntrinsics::_mulAdd:
if (!UseMulAddIntrinsic) return NULL;
break;
case vmIntrinsics::_montgomeryMultiply:
if (!UseMontgomeryMultiplyIntrinsic) return NULL;
break;
case vmIntrinsics::_montgomerySquare:
if (!UseMontgomerySquareIntrinsic) return NULL;
break;
case vmIntrinsics::_cipherBlockChaining_encryptAESCrypt:
case vmIntrinsics::_cipherBlockChaining_decryptAESCrypt:
if (!UseAESIntrinsics) return NULL;
// these two require the predicated logic
predicates = 1;
break;
case vmIntrinsics::_sha_implCompress:
if (!UseSHA1Intrinsics) return NULL;
break;
case vmIntrinsics::_sha2_implCompress:
if (!UseSHA256Intrinsics) return NULL;
break;
case vmIntrinsics::_sha5_implCompress:
if (!UseSHA512Intrinsics) return NULL;
break;
case vmIntrinsics::_digestBase_implCompressMB:
if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) return NULL;
predicates = 3;
break;
case vmIntrinsics::_ghash_processBlocks:
if (!UseGHASHIntrinsics) return NULL;
break;
case vmIntrinsics::_updateCRC32:
case vmIntrinsics::_updateBytesCRC32:
case vmIntrinsics::_updateByteBufferCRC32:
if (!UseCRC32Intrinsics) return NULL;
break;
case vmIntrinsics::_incrementExactI:
case vmIntrinsics::_addExactI:
if (!Matcher::match_rule_supported(Op_OverflowAddI) || !UseMathExactIntrinsics) return NULL;
break;
case vmIntrinsics::_incrementExactL:
case vmIntrinsics::_addExactL:
if (!Matcher::match_rule_supported(Op_OverflowAddL) || !UseMathExactIntrinsics) return NULL;
break;
case vmIntrinsics::_decrementExactI:
case vmIntrinsics::_subtractExactI:
if (!Matcher::match_rule_supported(Op_OverflowSubI) || !UseMathExactIntrinsics) return NULL;
break;
case vmIntrinsics::_decrementExactL:
case vmIntrinsics::_subtractExactL:
if (!Matcher::match_rule_supported(Op_OverflowSubL) || !UseMathExactIntrinsics) return NULL;
break;
case vmIntrinsics::_negateExactI:
if (!Matcher::match_rule_supported(Op_OverflowSubI) || !UseMathExactIntrinsics) return NULL;
break;
case vmIntrinsics::_negateExactL:
if (!Matcher::match_rule_supported(Op_OverflowSubL) || !UseMathExactIntrinsics) return NULL;
break;
case vmIntrinsics::_multiplyExactI:
if (!Matcher::match_rule_supported(Op_OverflowMulI) || !UseMathExactIntrinsics) return NULL;
break;
case vmIntrinsics::_multiplyExactL:
if (!Matcher::match_rule_supported(Op_OverflowMulL) || !UseMathExactIntrinsics) return NULL;
break;
default:
assert(id <= vmIntrinsics::LAST_COMPILER_INLINE, "caller responsibility");
assert(id != vmIntrinsics::_Object_init && id != vmIntrinsics::_invoke, "enum out of order?");
break;
}
// -XX:-InlineClassNatives disables natives from the Class class.
// The flag applies to all reflective calls, notably Array.newArray
// (visible to Java programmers as Array.newInstance).
if (m->holder()->name() == ciSymbol::java_lang_Class() ||
m->holder()->name() == ciSymbol::java_lang_reflect_Array()) {
if (!InlineClassNatives) return NULL;
}
// -XX:-InlineThreadNatives disables natives from the Thread class.
if (m->holder()->name() == ciSymbol::java_lang_Thread()) {
if (!InlineThreadNatives) return NULL;
}
// -XX:-InlineMathNatives disables natives from the Math,Float and Double classes.
if (m->holder()->name() == ciSymbol::java_lang_Math() ||
m->holder()->name() == ciSymbol::java_lang_Float() ||
m->holder()->name() == ciSymbol::java_lang_Double()) {
if (!InlineMathNatives) return NULL;
}
// -XX:-InlineUnsafeOps disables natives from the Unsafe class.
if (m->holder()->name() == ciSymbol::sun_misc_Unsafe()) {
if (!InlineUnsafeOps) return NULL;
}
return new LibraryIntrinsic(m, is_virtual, predicates, does_virtual_dispatch, (vmIntrinsics::ID) id);
}
//----------------------register_library_intrinsics-----------------------
// Initialize this file's data structures, for each Compile instance.
void Compile::register_library_intrinsics() {
// Nothing to do here.
}
JVMState* LibraryIntrinsic::generate(JVMState* jvms) {
LibraryCallKit kit(jvms, this);
Compile* C = kit.C;
int nodes = C->unique();
#ifndef PRODUCT
if ((C->print_intrinsics() || C->print_inlining()) && Verbose) {
char buf[1000];
const char* str = vmIntrinsics::short_name_as_C_string(intrinsic_id(), buf, sizeof(buf));
tty->print_cr("Intrinsic %s", str);
}
#endif
ciMethod* callee = kit.callee();
const int bci = kit.bci();
// Try to inline the intrinsic.
if (kit.try_to_inline(_last_predicate)) {
if (C->print_intrinsics() || C->print_inlining()) {
C->print_inlining(callee, jvms->depth() - 1, bci, is_virtual() ? "(intrinsic, virtual)" : "(intrinsic)");
}
C->gather_intrinsic_statistics(intrinsic_id(), is_virtual(), Compile::_intrinsic_worked);
if (C->log()) {
C->log()->elem("intrinsic id='%s'%s nodes='%d'",
vmIntrinsics::name_at(intrinsic_id()),
(is_virtual() ? " virtual='1'" : ""),
C->unique() - nodes);
}
// Push the result from the inlined method onto the stack.
kit.push_result();
return kit.transfer_exceptions_into_jvms();
}
// The intrinsic bailed out
if (C->print_intrinsics() || C->print_inlining()) {
if (jvms->has_method()) {
// Not a root compile.
const char* msg = is_virtual() ? "failed to inline (intrinsic, virtual)" : "failed to inline (intrinsic)";
C->print_inlining(callee, jvms->depth() - 1, bci, msg);
} else {
// Root compile
tty->print("Did not generate intrinsic %s%s at bci:%d in",
vmIntrinsics::name_at(intrinsic_id()),
(is_virtual() ? " (virtual)" : ""), bci);
}
}
C->gather_intrinsic_statistics(intrinsic_id(), is_virtual(), Compile::_intrinsic_failed);
return NULL;
}
Node* LibraryIntrinsic::generate_predicate(JVMState* jvms, int predicate) {
LibraryCallKit kit(jvms, this);
Compile* C = kit.C;
int nodes = C->unique();
_last_predicate = predicate;
#ifndef PRODUCT
assert(is_predicated() && predicate < predicates_count(), "sanity");
if ((C->print_intrinsics() || C->print_inlining()) && Verbose) {
char buf[1000];
const char* str = vmIntrinsics::short_name_as_C_string(intrinsic_id(), buf, sizeof(buf));
tty->print_cr("Predicate for intrinsic %s", str);
}
#endif
ciMethod* callee = kit.callee();
const int bci = kit.bci();
Node* slow_ctl = kit.try_to_predicate(predicate);
if (!kit.failing()) {
if (C->print_intrinsics() || C->print_inlining()) {
C->print_inlining(callee, jvms->depth() - 1, bci, is_virtual() ? "(intrinsic, virtual, predicate)" : "(intrinsic, predicate)");
}
C->gather_intrinsic_statistics(intrinsic_id(), is_virtual(), Compile::_intrinsic_worked);
if (C->log()) {
C->log()->elem("predicate_intrinsic id='%s'%s nodes='%d'",
vmIntrinsics::name_at(intrinsic_id()),
(is_virtual() ? " virtual='1'" : ""),
C->unique() - nodes);
}
return slow_ctl; // Could be NULL if the check folds.
}
// The intrinsic bailed out
if (C->print_intrinsics() || C->print_inlining()) {
if (jvms->has_method()) {
// Not a root compile.
const char* msg = "failed to generate predicate for intrinsic";
C->print_inlining(kit.callee(), jvms->depth() - 1, bci, msg);
} else {
// Root compile
C->print_inlining_stream()->print("Did not generate predicate for intrinsic %s%s at bci:%d in",
vmIntrinsics::name_at(intrinsic_id()),
(is_virtual() ? " (virtual)" : ""), bci);
}
}
C->gather_intrinsic_statistics(intrinsic_id(), is_virtual(), Compile::_intrinsic_failed);
return NULL;
}
bool LibraryCallKit::try_to_inline(int predicate) {
// Handle symbolic names for otherwise undistinguished boolean switches:
const bool is_store = true;
const bool is_native_ptr = true;
const bool is_static = true;
const bool is_volatile = true;
if (!jvms()->has_method()) {
// Root JVMState has a null method.
assert(map()->memory()->Opcode() == Op_Parm, "");
// Insert the memory aliasing node
set_all_memory(reset_memory());
}
assert(merged_memory(), "");
switch (intrinsic_id()) {
case vmIntrinsics::_hashCode: return inline_native_hashcode(intrinsic()->is_virtual(), !is_static);
case vmIntrinsics::_identityHashCode: return inline_native_hashcode(/*!virtual*/ false, is_static);
case vmIntrinsics::_getClass: return inline_native_getClass();
case vmIntrinsics::_dsin:
case vmIntrinsics::_dcos:
case vmIntrinsics::_dtan:
case vmIntrinsics::_dabs:
case vmIntrinsics::_datan2:
case vmIntrinsics::_dsqrt:
case vmIntrinsics::_dexp:
case vmIntrinsics::_dlog:
case vmIntrinsics::_dlog10:
case vmIntrinsics::_dpow: return inline_math_native(intrinsic_id());
case vmIntrinsics::_min:
case vmIntrinsics::_max: return inline_min_max(intrinsic_id());
case vmIntrinsics::_addExactI: return inline_math_addExactI(false /* add */);
case vmIntrinsics::_addExactL: return inline_math_addExactL(false /* add */);
case vmIntrinsics::_decrementExactI: return inline_math_subtractExactI(true /* decrement */);
case vmIntrinsics::_decrementExactL: return inline_math_subtractExactL(true /* decrement */);
case vmIntrinsics::_incrementExactI: return inline_math_addExactI(true /* increment */);
case vmIntrinsics::_incrementExactL: return inline_math_addExactL(true /* increment */);
case vmIntrinsics::_multiplyExactI: return inline_math_multiplyExactI();
case vmIntrinsics::_multiplyExactL: return inline_math_multiplyExactL();
case vmIntrinsics::_negateExactI: return inline_math_negateExactI();
case vmIntrinsics::_negateExactL: return inline_math_negateExactL();
case vmIntrinsics::_subtractExactI: return inline_math_subtractExactI(false /* subtract */);
case vmIntrinsics::_subtractExactL: return inline_math_subtractExactL(false /* subtract */);
case vmIntrinsics::_arraycopy: return inline_arraycopy();
case vmIntrinsics::_compareTo: return inline_string_compareTo();
case vmIntrinsics::_indexOf: return inline_string_indexOf();
case vmIntrinsics::_equals: return inline_string_equals();
case vmIntrinsics::_getObject: return inline_unsafe_access(!is_native_ptr, !is_store, T_OBJECT, !is_volatile, false);
case vmIntrinsics::_getBoolean: return inline_unsafe_access(!is_native_ptr, !is_store, T_BOOLEAN, !is_volatile, false);
case vmIntrinsics::_getByte: return inline_unsafe_access(!is_native_ptr, !is_store, T_BYTE, !is_volatile, false);
case vmIntrinsics::_getShort: return inline_unsafe_access(!is_native_ptr, !is_store, T_SHORT, !is_volatile, false);
case vmIntrinsics::_getChar: return inline_unsafe_access(!is_native_ptr, !is_store, T_CHAR, !is_volatile, false);
case vmIntrinsics::_getInt: return inline_unsafe_access(!is_native_ptr, !is_store, T_INT, !is_volatile, false);
case vmIntrinsics::_getLong: return inline_unsafe_access(!is_native_ptr, !is_store, T_LONG, !is_volatile, false);
case vmIntrinsics::_getFloat: return inline_unsafe_access(!is_native_ptr, !is_store, T_FLOAT, !is_volatile, false);
case vmIntrinsics::_getDouble: return inline_unsafe_access(!is_native_ptr, !is_store, T_DOUBLE, !is_volatile, false);
case vmIntrinsics::_putObject: return inline_unsafe_access(!is_native_ptr, is_store, T_OBJECT, !is_volatile, false);
case vmIntrinsics::_putBoolean: return inline_unsafe_access(!is_native_ptr, is_store, T_BOOLEAN, !is_volatile, false);
case vmIntrinsics::_putByte: return inline_unsafe_access(!is_native_ptr, is_store, T_BYTE, !is_volatile, false);
case vmIntrinsics::_putShort: return inline_unsafe_access(!is_native_ptr, is_store, T_SHORT, !is_volatile, false);
case vmIntrinsics::_putChar: return inline_unsafe_access(!is_native_ptr, is_store, T_CHAR, !is_volatile, false);
case vmIntrinsics::_putInt: return inline_unsafe_access(!is_native_ptr, is_store, T_INT, !is_volatile, false);
case vmIntrinsics::_putLong: return inline_unsafe_access(!is_native_ptr, is_store, T_LONG, !is_volatile, false);
case vmIntrinsics::_putFloat: return inline_unsafe_access(!is_native_ptr, is_store, T_FLOAT, !is_volatile, false);
case vmIntrinsics::_putDouble: return inline_unsafe_access(!is_native_ptr, is_store, T_DOUBLE, !is_volatile, false);
case vmIntrinsics::_getByte_raw: return inline_unsafe_access( is_native_ptr, !is_store, T_BYTE, !is_volatile, false);
case vmIntrinsics::_getShort_raw: return inline_unsafe_access( is_native_ptr, !is_store, T_SHORT, !is_volatile, false);
case vmIntrinsics::_getChar_raw: return inline_unsafe_access( is_native_ptr, !is_store, T_CHAR, !is_volatile, false);
case vmIntrinsics::_getInt_raw: return inline_unsafe_access( is_native_ptr, !is_store, T_INT, !is_volatile, false);
case vmIntrinsics::_getLong_raw: return inline_unsafe_access( is_native_ptr, !is_store, T_LONG, !is_volatile, false);
case vmIntrinsics::_getFloat_raw: return inline_unsafe_access( is_native_ptr, !is_store, T_FLOAT, !is_volatile, false);
case vmIntrinsics::_getDouble_raw: return inline_unsafe_access( is_native_ptr, !is_store, T_DOUBLE, !is_volatile, false);
case vmIntrinsics::_getAddress_raw: return inline_unsafe_access( is_native_ptr, !is_store, T_ADDRESS, !is_volatile, false);
case vmIntrinsics::_putByte_raw: return inline_unsafe_access( is_native_ptr, is_store, T_BYTE, !is_volatile, false);
case vmIntrinsics::_putShort_raw: return inline_unsafe_access( is_native_ptr, is_store, T_SHORT, !is_volatile, false);
case vmIntrinsics::_putChar_raw: return inline_unsafe_access( is_native_ptr, is_store, T_CHAR, !is_volatile, false);
case vmIntrinsics::_putInt_raw: return inline_unsafe_access( is_native_ptr, is_store, T_INT, !is_volatile, false);
case vmIntrinsics::_putLong_raw: return inline_unsafe_access( is_native_ptr, is_store, T_LONG, !is_volatile, false);
case vmIntrinsics::_putFloat_raw: return inline_unsafe_access( is_native_ptr, is_store, T_FLOAT, !is_volatile, false);
case vmIntrinsics::_putDouble_raw: return inline_unsafe_access( is_native_ptr, is_store, T_DOUBLE, !is_volatile, false);
case vmIntrinsics::_putAddress_raw: return inline_unsafe_access( is_native_ptr, is_store, T_ADDRESS, !is_volatile, false);
case vmIntrinsics::_getObjectVolatile: return inline_unsafe_access(!is_native_ptr, !is_store, T_OBJECT, is_volatile, false);
case vmIntrinsics::_getBooleanVolatile: return inline_unsafe_access(!is_native_ptr, !is_store, T_BOOLEAN, is_volatile, false);
case vmIntrinsics::_getByteVolatile: return inline_unsafe_access(!is_native_ptr, !is_store, T_BYTE, is_volatile, false);
case vmIntrinsics::_getShortVolatile: return inline_unsafe_access(!is_native_ptr, !is_store, T_SHORT, is_volatile, false);
case vmIntrinsics::_getCharVolatile: return inline_unsafe_access(!is_native_ptr, !is_store, T_CHAR, is_volatile, false);
case vmIntrinsics::_getIntVolatile: return inline_unsafe_access(!is_native_ptr, !is_store, T_INT, is_volatile, false);
case vmIntrinsics::_getLongVolatile: return inline_unsafe_access(!is_native_ptr, !is_store, T_LONG, is_volatile, false);
case vmIntrinsics::_getFloatVolatile: return inline_unsafe_access(!is_native_ptr, !is_store, T_FLOAT, is_volatile, false);
case vmIntrinsics::_getDoubleVolatile: return inline_unsafe_access(!is_native_ptr, !is_store, T_DOUBLE, is_volatile, false);
case vmIntrinsics::_putObjectVolatile: return inline_unsafe_access(!is_native_ptr, is_store, T_OBJECT, is_volatile, false);
case vmIntrinsics::_putBooleanVolatile: return inline_unsafe_access(!is_native_ptr, is_store, T_BOOLEAN, is_volatile, false);
case vmIntrinsics::_putByteVolatile: return inline_unsafe_access(!is_native_ptr, is_store, T_BYTE, is_volatile, false);
case vmIntrinsics::_putShortVolatile: return inline_unsafe_access(!is_native_ptr, is_store, T_SHORT, is_volatile, false);
case vmIntrinsics::_putCharVolatile: return inline_unsafe_access(!is_native_ptr, is_store, T_CHAR, is_volatile, false);
case vmIntrinsics::_putIntVolatile: return inline_unsafe_access(!is_native_ptr, is_store, T_INT, is_volatile, false);
case vmIntrinsics::_putLongVolatile: return inline_unsafe_access(!is_native_ptr, is_store, T_LONG, is_volatile, false);
case vmIntrinsics::_putFloatVolatile: return inline_unsafe_access(!is_native_ptr, is_store, T_FLOAT, is_volatile, false);
case vmIntrinsics::_putDoubleVolatile: return inline_unsafe_access(!is_native_ptr, is_store, T_DOUBLE, is_volatile, false);
case vmIntrinsics::_prefetchRead: return inline_unsafe_prefetch(!is_native_ptr, !is_store, !is_static);
case vmIntrinsics::_prefetchWrite: return inline_unsafe_prefetch(!is_native_ptr, is_store, !is_static);
case vmIntrinsics::_prefetchReadStatic: return inline_unsafe_prefetch(!is_native_ptr, !is_store, is_static);
case vmIntrinsics::_prefetchWriteStatic: return inline_unsafe_prefetch(!is_native_ptr, is_store, is_static);
case vmIntrinsics::_compareAndSwapObject: return inline_unsafe_load_store(T_OBJECT, LS_cmpxchg);
case vmIntrinsics::_compareAndSwapInt: return inline_unsafe_load_store(T_INT, LS_cmpxchg);
case vmIntrinsics::_compareAndSwapLong: return inline_unsafe_load_store(T_LONG, LS_cmpxchg);
case vmIntrinsics::_putOrderedObject: return inline_unsafe_ordered_store(T_OBJECT);
case vmIntrinsics::_putOrderedInt: return inline_unsafe_ordered_store(T_INT);
case vmIntrinsics::_putOrderedLong: return inline_unsafe_ordered_store(T_LONG);
case vmIntrinsics::_getAndAddInt: return inline_unsafe_load_store(T_INT, LS_xadd);
case vmIntrinsics::_getAndAddLong: return inline_unsafe_load_store(T_LONG, LS_xadd);
case vmIntrinsics::_getAndSetInt: return inline_unsafe_load_store(T_INT, LS_xchg);
case vmIntrinsics::_getAndSetLong: return inline_unsafe_load_store(T_LONG, LS_xchg);
case vmIntrinsics::_getAndSetObject: return inline_unsafe_load_store(T_OBJECT, LS_xchg);
case vmIntrinsics::_loadFence:
case vmIntrinsics::_storeFence:
case vmIntrinsics::_fullFence: return inline_unsafe_fence(intrinsic_id());
case vmIntrinsics::_currentThread: return inline_native_currentThread();
case vmIntrinsics::_isInterrupted: return inline_native_isInterrupted();
#ifdef JFR_HAVE_INTRINSICS
case vmIntrinsics::_counterTime: return inline_native_time_funcs(CAST_FROM_FN_PTR(address, JFR_TIME_FUNCTION), "counterTime");
case vmIntrinsics::_getClassId: return inline_native_classID();
case vmIntrinsics::_getEventWriter: return inline_native_getEventWriter();
#endif
case vmIntrinsics::_currentTimeMillis: return inline_native_time_funcs(CAST_FROM_FN_PTR(address, os::javaTimeMillis), "currentTimeMillis");
case vmIntrinsics::_nanoTime: return inline_native_time_funcs(CAST_FROM_FN_PTR(address, os::javaTimeNanos), "nanoTime");
case vmIntrinsics::_allocateInstance: return inline_unsafe_allocate();
case vmIntrinsics::_copyMemory: return inline_unsafe_copyMemory();
case vmIntrinsics::_newArray: return inline_native_newArray();
case vmIntrinsics::_getLength: return inline_native_getLength();
case vmIntrinsics::_copyOf: return inline_array_copyOf(false);
case vmIntrinsics::_copyOfRange: return inline_array_copyOf(true);
case vmIntrinsics::_equalsC: return inline_array_equals();
case vmIntrinsics::_clone: return inline_native_clone(intrinsic()->is_virtual());
case vmIntrinsics::_isAssignableFrom: return inline_native_subtype_check();
case vmIntrinsics::_isInstance:
case vmIntrinsics::_getModifiers:
case vmIntrinsics::_isInterface:
case vmIntrinsics::_isArray:
case vmIntrinsics::_isPrimitive:
case vmIntrinsics::_getSuperclass:
case vmIntrinsics::_getComponentType:
case vmIntrinsics::_getClassAccessFlags: return inline_native_Class_query(intrinsic_id());
case vmIntrinsics::_floatToRawIntBits:
case vmIntrinsics::_floatToIntBits:
case vmIntrinsics::_intBitsToFloat:
case vmIntrinsics::_doubleToRawLongBits:
case vmIntrinsics::_doubleToLongBits:
case vmIntrinsics::_longBitsToDouble: return inline_fp_conversions(intrinsic_id());
case vmIntrinsics::_numberOfLeadingZeros_i:
case vmIntrinsics::_numberOfLeadingZeros_l:
case vmIntrinsics::_numberOfTrailingZeros_i:
case vmIntrinsics::_numberOfTrailingZeros_l:
case vmIntrinsics::_bitCount_i:
case vmIntrinsics::_bitCount_l:
case vmIntrinsics::_reverseBytes_i:
case vmIntrinsics::_reverseBytes_l:
case vmIntrinsics::_reverseBytes_s:
case vmIntrinsics::_reverseBytes_c: return inline_number_methods(intrinsic_id());
case vmIntrinsics::_getCallerClass: return inline_native_Reflection_getCallerClass();
case vmIntrinsics::_Reference_get: return inline_reference_get();
case vmIntrinsics::_aescrypt_encryptBlock:
case vmIntrinsics::_aescrypt_decryptBlock: return inline_aescrypt_Block(intrinsic_id());
case vmIntrinsics::_cipherBlockChaining_encryptAESCrypt:
case vmIntrinsics::_cipherBlockChaining_decryptAESCrypt:
return inline_cipherBlockChaining_AESCrypt(intrinsic_id());
case vmIntrinsics::_sha_implCompress:
case vmIntrinsics::_sha2_implCompress:
case vmIntrinsics::_sha5_implCompress:
return inline_sha_implCompress(intrinsic_id());
case vmIntrinsics::_digestBase_implCompressMB:
return inline_digestBase_implCompressMB(predicate);
case vmIntrinsics::_multiplyToLen:
return inline_multiplyToLen();
case vmIntrinsics::_squareToLen:
return inline_squareToLen();
case vmIntrinsics::_mulAdd:
return inline_mulAdd();
case vmIntrinsics::_montgomeryMultiply:
return inline_montgomeryMultiply();
case vmIntrinsics::_montgomerySquare:
return inline_montgomerySquare();
case vmIntrinsics::_ghash_processBlocks:
return inline_ghash_processBlocks();
case vmIntrinsics::_encodeISOArray:
return inline_encodeISOArray();
case vmIntrinsics::_updateCRC32:
return inline_updateCRC32();
case vmIntrinsics::_updateBytesCRC32:
return inline_updateBytesCRC32();
case vmIntrinsics::_updateByteBufferCRC32:
return inline_updateByteBufferCRC32();
case vmIntrinsics::_profileBoolean:
return inline_profileBoolean();
default:
// If you get here, it may be that someone has added a new intrinsic
// to the list in vmSymbols.hpp without implementing it here.
#ifndef PRODUCT
if ((PrintMiscellaneous && (Verbose || WizardMode)) || PrintOpto) {
tty->print_cr("*** Warning: Unimplemented intrinsic %s(%d)",
vmIntrinsics::name_at(intrinsic_id()), intrinsic_id());
}
#endif
return false;
}
}
Node* LibraryCallKit::try_to_predicate(int predicate) {
if (!jvms()->has_method()) {
// Root JVMState has a null method.
assert(map()->memory()->Opcode() == Op_Parm, "");
// Insert the memory aliasing node
set_all_memory(reset_memory());
}
assert(merged_memory(), "");
switch (intrinsic_id()) {
case vmIntrinsics::_cipherBlockChaining_encryptAESCrypt:
return inline_cipherBlockChaining_AESCrypt_predicate(false);
case vmIntrinsics::_cipherBlockChaining_decryptAESCrypt:
return inline_cipherBlockChaining_AESCrypt_predicate(true);
case vmIntrinsics::_digestBase_implCompressMB:
return inline_digestBase_implCompressMB_predicate(predicate);
default:
// If you get here, it may be that someone has added a new intrinsic
// to the list in vmSymbols.hpp without implementing it here.
#ifndef PRODUCT
if ((PrintMiscellaneous && (Verbose || WizardMode)) || PrintOpto) {
tty->print_cr("*** Warning: Unimplemented predicate for intrinsic %s(%d)",
vmIntrinsics::name_at(intrinsic_id()), intrinsic_id());
}
#endif
Node* slow_ctl = control();
set_control(top()); // No fast path instrinsic
return slow_ctl;
}
}
//------------------------------set_result-------------------------------
// Helper function for finishing intrinsics.
void LibraryCallKit::set_result(RegionNode* region, PhiNode* value) {
record_for_igvn(region);
set_control(_gvn.transform(region));
set_result( _gvn.transform(value));
assert(value->type()->basic_type() == result()->bottom_type()->basic_type(), "sanity");
}
//------------------------------generate_guard---------------------------
// Helper function for generating guarded fast-slow graph structures.
// The given 'test', if true, guards a slow path. If the test fails
// then a fast path can be taken. (We generally hope it fails.)
// In all cases, GraphKit::control() is updated to the fast path.
// The returned value represents the control for the slow path.
// The return value is never 'top'; it is either a valid control
// or NULL if it is obvious that the slow path can never be taken.
// Also, if region and the slow control are not NULL, the slow edge
// is appended to the region.
Node* LibraryCallKit::generate_guard(Node* test, RegionNode* region, float true_prob) {
if (stopped()) {
// Already short circuited.
return NULL;
}
// Build an if node and its projections.
// If test is true we take the slow path, which we assume is uncommon.
if (_gvn.type(test) == TypeInt::ZERO) {
// The slow branch is never taken. No need to build this guard.
return NULL;
}
IfNode* iff = create_and_map_if(control(), test, true_prob, COUNT_UNKNOWN);
Node* if_slow = _gvn.transform(new (C) IfTrueNode(iff));
if (if_slow == top()) {
// The slow branch is never taken. No need to build this guard.
return NULL;
}
if (region != NULL)
region->add_req(if_slow);
Node* if_fast = _gvn.transform(new (C) IfFalseNode(iff));
set_control(if_fast);
return if_slow;
}
inline Node* LibraryCallKit::generate_slow_guard(Node* test, RegionNode* region) {
return generate_guard(test, region, PROB_UNLIKELY_MAG(3));
}
inline Node* LibraryCallKit::generate_fair_guard(Node* test, RegionNode* region) {
return generate_guard(test, region, PROB_FAIR);
}
inline Node* LibraryCallKit::generate_negative_guard(Node* index, RegionNode* region,
Node* *pos_index) {
if (stopped())
return NULL; // already stopped
if (_gvn.type(index)->higher_equal(TypeInt::POS)) // [0,maxint]
return NULL; // index is already adequately typed
Node* cmp_lt = _gvn.transform(new (C) CmpINode(index, intcon(0)));
Node* bol_lt = _gvn.transform(new (C) BoolNode(cmp_lt, BoolTest::lt));
Node* is_neg = generate_guard(bol_lt, region, PROB_MIN);
if (is_neg != NULL && pos_index != NULL) {
// Emulate effect of Parse::adjust_map_after_if.
Node* ccast = new (C) CastIINode(index, TypeInt::POS);
ccast->set_req(0, control());
(*pos_index) = _gvn.transform(ccast);
}
return is_neg;
}
inline Node* LibraryCallKit::generate_nonpositive_guard(Node* index, bool never_negative,
Node* *pos_index) {
if (stopped())
return NULL; // already stopped
if (_gvn.type(index)->higher_equal(TypeInt::POS1)) // [1,maxint]
return NULL; // index is already adequately typed
Node* cmp_le = _gvn.transform(new (C) CmpINode(index, intcon(0)));
BoolTest::mask le_or_eq = (never_negative ? BoolTest::eq : BoolTest::le);
Node* bol_le = _gvn.transform(new (C) BoolNode(cmp_le, le_or_eq));
Node* is_notp = generate_guard(bol_le, NULL, PROB_MIN);
if (is_notp != NULL && pos_index != NULL) {
// Emulate effect of Parse::adjust_map_after_if.
Node* ccast = new (C) CastIINode(index, TypeInt::POS1);
ccast->set_req(0, control());
(*pos_index) = _gvn.transform(ccast);
}
return is_notp;
}
// Make sure that 'position' is a valid limit index, in [0..length].
// There are two equivalent plans for checking this:
// A. (offset + copyLength) unsigned<= arrayLength
// B. offset <= (arrayLength - copyLength)
// We require that all of the values above, except for the sum and
// difference, are already known to be non-negative.
// Plan A is robust in the face of overflow, if offset and copyLength
// are both hugely positive.
//
// Plan B is less direct and intuitive, but it does not overflow at
// all, since the difference of two non-negatives is always
// representable. Whenever Java methods must perform the equivalent
// check they generally use Plan B instead of Plan A.
// For the moment we use Plan A.
inline Node* LibraryCallKit::generate_limit_guard(Node* offset,
Node* subseq_length,
Node* array_length,
RegionNode* region) {
if (stopped())
return NULL; // already stopped
bool zero_offset = _gvn.type(offset) == TypeInt::ZERO;
if (zero_offset && subseq_length->eqv_uncast(array_length))
return NULL; // common case of whole-array copy
Node* last = subseq_length;
if (!zero_offset) // last += offset
last = _gvn.transform(new (C) AddINode(last, offset));
Node* cmp_lt = _gvn.transform(new (C) CmpUNode(array_length, last));
Node* bol_lt = _gvn.transform(new (C) BoolNode(cmp_lt, BoolTest::lt));
Node* is_over = generate_guard(bol_lt, region, PROB_MIN);
return is_over;
}
//--------------------------generate_current_thread--------------------
Node* LibraryCallKit::generate_current_thread(Node* &tls_output) {
ciKlass* thread_klass = env()->Thread_klass();
const Type* thread_type = TypeOopPtr::make_from_klass(thread_klass)->cast_to_ptr_type(TypePtr::NotNull);
Node* thread = _gvn.transform(new (C) ThreadLocalNode());
Node* p = basic_plus_adr(top()/*!oop*/, thread, in_bytes(JavaThread::threadObj_offset()));
Node* threadObj = make_load(NULL, p, thread_type, T_OBJECT, MemNode::unordered);
tls_output = thread;
return threadObj;
}
//------------------------------make_string_method_node------------------------
// Helper method for String intrinsic functions. This version is called
// with str1 and str2 pointing to String object nodes.
//
Node* LibraryCallKit::make_string_method_node(int opcode, Node* str1, Node* str2) {
Node* no_ctrl = NULL;
// Get start addr of string
Node* str1_value = load_String_value(no_ctrl, str1);
Node* str1_offset = load_String_offset(no_ctrl, str1);
Node* str1_start = array_element_address(str1_value, str1_offset, T_CHAR);
// Get length of string 1
Node* str1_len = load_String_length(no_ctrl, str1);
Node* str2_value = load_String_value(no_ctrl, str2);
Node* str2_offset = load_String_offset(no_ctrl, str2);
Node* str2_start = array_element_address(str2_value, str2_offset, T_CHAR);
Node* str2_len = NULL;
Node* result = NULL;
switch (opcode) {
case Op_StrIndexOf:
// Get length of string 2
str2_len = load_String_length(no_ctrl, str2);
result = new (C) StrIndexOfNode(control(), memory(TypeAryPtr::CHARS),
str1_start, str1_len, str2_start, str2_len);
break;
case Op_StrComp:
// Get length of string 2
str2_len = load_String_length(no_ctrl, str2);
result = new (C) StrCompNode(control(), memory(TypeAryPtr::CHARS),
str1_start, str1_len, str2_start, str2_len);
break;
case Op_StrEquals:
result = new (C) StrEqualsNode(control(), memory(TypeAryPtr::CHARS),
str1_start, str2_start, str1_len);
break;
default:
ShouldNotReachHere();
return NULL;
}
// All these intrinsics have checks.
C->set_has_split_ifs(true); // Has chance for split-if optimization
return _gvn.transform(result);
}
// Helper method for String intrinsic functions. This version is called
// with str1 and str2 pointing to char[] nodes, with cnt1 and cnt2 pointing
// to Int nodes containing the lenghts of str1 and str2.
//
Node* LibraryCallKit::make_string_method_node(int opcode, Node* str1_start, Node* cnt1, Node* str2_start, Node* cnt2) {
Node* result = NULL;
switch (opcode) {
case Op_StrIndexOf:
result = new (C) StrIndexOfNode(control(), memory(TypeAryPtr::CHARS),
str1_start, cnt1, str2_start, cnt2);
break;
case Op_StrComp:
result = new (C) StrCompNode(control(), memory(TypeAryPtr::CHARS),
str1_start, cnt1, str2_start, cnt2);
break;
case Op_StrEquals:
result = new (C) StrEqualsNode(control(), memory(TypeAryPtr::CHARS),
str1_start, str2_start, cnt1);
break;
default:
ShouldNotReachHere();
return NULL;
}
// All these intrinsics have checks.
C->set_has_split_ifs(true); // Has chance for split-if optimization
return _gvn.transform(result);
}
//------------------------------inline_string_compareTo------------------------
// public int java.lang.String.compareTo(String anotherString);
bool LibraryCallKit::inline_string_compareTo() {
Node* receiver = null_check(argument(0));
Node* arg = null_check(argument(1));
if (stopped()) {
return true;
}
set_result(make_string_method_node(Op_StrComp, receiver, arg));
return true;
}
//------------------------------inline_string_equals------------------------
bool LibraryCallKit::inline_string_equals() {
Node* receiver = null_check_receiver();
// NOTE: Do not null check argument for String.equals() because spec
// allows to specify NULL as argument.
Node* argument = this->argument(1);
if (stopped()) {
return true;
}
// paths (plus control) merge
RegionNode* region = new (C) RegionNode(5);
Node* phi = new (C) PhiNode(region, TypeInt::BOOL);
// does source == target string?
Node* cmp = _gvn.transform(new (C) CmpPNode(receiver, argument));
Node* bol = _gvn.transform(new (C) BoolNode(cmp, BoolTest::eq));
Node* if_eq = generate_slow_guard(bol, NULL);
if (if_eq != NULL) {
// receiver == argument
phi->init_req(2, intcon(1));
region->init_req(2, if_eq);
}
// get String klass for instanceOf
ciInstanceKlass* klass = env()->String_klass();
if (!stopped()) {
Node* inst = gen_instanceof(argument, makecon(TypeKlassPtr::make(klass)));
Node* cmp = _gvn.transform(new (C) CmpINode(inst, intcon(1)));
Node* bol = _gvn.transform(new (C) BoolNode(cmp, BoolTest::ne));
Node* inst_false = generate_guard(bol, NULL, PROB_MIN);
//instanceOf == true, fallthrough
if (inst_false != NULL) {
phi->init_req(3, intcon(0));
region->init_req(3, inst_false);
}
}
if (!stopped()) {
const TypeOopPtr* string_type = TypeOopPtr::make_from_klass(klass);
// Properly cast the argument to String
argument = _gvn.transform(new (C) CheckCastPPNode(control(), argument, string_type));
// This path is taken only when argument's type is String:NotNull.
argument = cast_not_null(argument, false);
Node* no_ctrl = NULL;
// Get start addr of receiver
Node* receiver_val = load_String_value(no_ctrl, receiver);
Node* receiver_offset = load_String_offset(no_ctrl, receiver);
Node* receiver_start = array_element_address(receiver_val, receiver_offset, T_CHAR);
// Get length of receiver
Node* receiver_cnt = load_String_length(no_ctrl, receiver);
// Get start addr of argument
Node* argument_val = load_String_value(no_ctrl, argument);
Node* argument_offset = load_String_offset(no_ctrl, argument);
Node* argument_start = array_element_address(argument_val, argument_offset, T_CHAR);
// Get length of argument
Node* argument_cnt = load_String_length(no_ctrl, argument);
// Check for receiver count != argument count
Node* cmp = _gvn.transform(new(C) CmpINode(receiver_cnt, argument_cnt));
Node* bol = _gvn.transform(new(C) BoolNode(cmp, BoolTest::ne));
Node* if_ne = generate_slow_guard(bol, NULL);
if (if_ne != NULL) {
phi->init_req(4, intcon(0));
region->init_req(4, if_ne);
}
// Check for count == 0 is done by assembler code for StrEquals.
if (!stopped()) {
Node* equals = make_string_method_node(Op_StrEquals, receiver_start, receiver_cnt, argument_start, argument_cnt);
phi->init_req(1, equals);
region->init_req(1, control());
}
}
// post merge
set_control(_gvn.transform(region));
record_for_igvn(region);
set_result(_gvn.transform(phi));
return true;
}
//------------------------------inline_array_equals----------------------------
bool LibraryCallKit::inline_array_equals() {
Node* arg1 = argument(0);
Node* arg2 = argument(1);
set_result(_gvn.transform(new (C) AryEqNode(control(), memory(TypeAryPtr::CHARS), arg1, arg2)));
return true;
}
// Java version of String.indexOf(constant string)
// class StringDecl {
// StringDecl(char[] ca) {
// offset = 0;
// count = ca.length;
// value = ca;
// }
// int offset;
// int count;
// char[] value;
// }
//
// static int string_indexOf_J(StringDecl string_object, char[] target_object,
// int targetOffset, int cache_i, int md2) {
// int cache = cache_i;
// int sourceOffset = string_object.offset;
// int sourceCount = string_object.count;
// int targetCount = target_object.length;
//
// int targetCountLess1 = targetCount - 1;
// int sourceEnd = sourceOffset + sourceCount - targetCountLess1;
//
// char[] source = string_object.value;
// char[] target = target_object;
// int lastChar = target[targetCountLess1];
//
// outer_loop:
// for (int i = sourceOffset; i < sourceEnd; ) {
// int src = source[i + targetCountLess1];
// if (src == lastChar) {
// // With random strings and a 4-character alphabet,
// // reverse matching at this point sets up 0.8% fewer
// // frames, but (paradoxically) makes 0.3% more probes.
// // Since those probes are nearer the lastChar probe,
// // there is may be a net D$ win with reverse matching.
// // But, reversing loop inhibits unroll of inner loop
// // for unknown reason. So, does running outer loop from
// // (sourceOffset - targetCountLess1) to (sourceOffset + sourceCount)
// for (int j = 0; j < targetCountLess1; j++) {
// if (target[targetOffset + j] != source[i+j]) {
// if ((cache & (1 << source[i+j])) == 0) {
// if (md2 < j+1) {
// i += j+1;
// continue outer_loop;
// }
// }
// i += md2;
// continue outer_loop;
// }
// }
// return i - sourceOffset;
// }
// if ((cache & (1 << src)) == 0) {
// i += targetCountLess1;
// } // using "i += targetCount;" and an "else i++;" causes a jump to jump.
// i++;
// }
// return -1;
// }
//------------------------------string_indexOf------------------------
Node* LibraryCallKit::string_indexOf(Node* string_object, ciTypeArray* target_array, jint targetOffset_i,
jint cache_i, jint md2_i) {
Node* no_ctrl = NULL;
float likely = PROB_LIKELY(0.9);
float unlikely = PROB_UNLIKELY(0.9);
const int nargs = 0; // no arguments to push back for uncommon trap in predicate
Node* source = load_String_value(no_ctrl, string_object);
Node* sourceOffset = load_String_offset(no_ctrl, string_object);
Node* sourceCount = load_String_length(no_ctrl, string_object);
Node* target = _gvn.transform( makecon(TypeOopPtr::make_from_constant(target_array, true)));
jint target_length = target_array->length();
const TypeAry* target_array_type = TypeAry::make(TypeInt::CHAR, TypeInt::make(0, target_length, Type::WidenMin));
const TypeAryPtr* target_type = TypeAryPtr::make(TypePtr::BotPTR, target_array_type, target_array->klass(), true, Type::OffsetBot);
// String.value field is known to be @Stable.
if (UseImplicitStableValues) {
target = cast_array_to_stable(target, target_type);
}
IdealKit kit(this, false, true);
#define __ kit.
Node* zero = __ ConI(0);
Node* one = __ ConI(1);
Node* cache = __ ConI(cache_i);
Node* md2 = __ ConI(md2_i);
Node* lastChar = __ ConI(target_array->char_at(target_length - 1));
Node* targetCount = __ ConI(target_length);
Node* targetCountLess1 = __ ConI(target_length - 1);
Node* targetOffset = __ ConI(targetOffset_i);
Node* sourceEnd = __ SubI(__ AddI(sourceOffset, sourceCount), targetCountLess1);
IdealVariable rtn(kit), i(kit), j(kit); __ declarations_done();
Node* outer_loop = __ make_label(2 /* goto */);
Node* return_ = __ make_label(1);
__ set(rtn,__ ConI(-1));
__ loop(this, nargs, i, sourceOffset, BoolTest::lt, sourceEnd); {
Node* i2 = __ AddI(__ value(i), targetCountLess1);
// pin to prohibit loading of "next iteration" value which may SEGV (rare)
Node* src = load_array_element(__ ctrl(), source, i2, TypeAryPtr::CHARS);
__ if_then(src, BoolTest::eq, lastChar, unlikely); {
__ loop(this, nargs, j, zero, BoolTest::lt, targetCountLess1); {
Node* tpj = __ AddI(targetOffset, __ value(j));
Node* targ = load_array_element(no_ctrl, target, tpj, target_type);
Node* ipj = __ AddI(__ value(i), __ value(j));
Node* src2 = load_array_element(no_ctrl, source, ipj, TypeAryPtr::CHARS);
__ if_then(targ, BoolTest::ne, src2); {
__ if_then(__ AndI(cache, __ LShiftI(one, src2)), BoolTest::eq, zero); {
__ if_then(md2, BoolTest::lt, __ AddI(__ value(j), one)); {
__ increment(i, __ AddI(__ value(j), one));
__ goto_(outer_loop);
} __ end_if(); __ dead(j);
}__ end_if(); __ dead(j);
__ increment(i, md2);
__ goto_(outer_loop);
}__ end_if();
__ increment(j, one);
}__ end_loop(); __ dead(j);
__ set(rtn, __ SubI(__ value(i), sourceOffset)); __ dead(i);
__ goto_(return_);
}__ end_if();
__ if_then(__ AndI(cache, __ LShiftI(one, src)), BoolTest::eq, zero, likely); {
__ increment(i, targetCountLess1);
}__ end_if();
__ increment(i, one);
__ bind(outer_loop);
}__ end_loop(); __ dead(i);
__ bind(return_);
// Final sync IdealKit and GraphKit.
final_sync(kit);
Node* result = __ value(rtn);
#undef __
C->set_has_loops(true);
return result;
}
//------------------------------inline_string_indexOf------------------------
bool LibraryCallKit::inline_string_indexOf() {
Node* receiver = argument(0);
Node* arg = argument(1);
Node* result;
// Disable the use of pcmpestri until it can be guaranteed that
// the load doesn't cross into the uncommited space.
if (Matcher::has_match_rule(Op_StrIndexOf) &&
UseSSE42Intrinsics) {
// Generate SSE4.2 version of indexOf
// We currently only have match rules that use SSE4.2
receiver = null_check(receiver);
arg = null_check(arg);
if (stopped()) {
return true;
}
ciInstanceKlass* str_klass = env()->String_klass();
const TypeOopPtr* string_type = TypeOopPtr::make_from_klass(str_klass);
// Make the merge point
RegionNode* result_rgn = new (C) RegionNode(4);
Node* result_phi = new (C) PhiNode(result_rgn, TypeInt::INT);
Node* no_ctrl = NULL;
// Get start addr of source string
Node* source = load_String_value(no_ctrl, receiver);
Node* source_offset = load_String_offset(no_ctrl, receiver);
Node* source_start = array_element_address(source, source_offset, T_CHAR);
// Get length of source string
Node* source_cnt = load_String_length(no_ctrl, receiver);
// Get start addr of substring
Node* substr = load_String_value(no_ctrl, arg);
Node* substr_offset = load_String_offset(no_ctrl, arg);
Node* substr_start = array_element_address(substr, substr_offset, T_CHAR);
// Get length of source string
Node* substr_cnt = load_String_length(no_ctrl, arg);
// Check for substr count > string count
Node* cmp = _gvn.transform(new(C) CmpINode(substr_cnt, source_cnt));
Node* bol = _gvn.transform(new(C) BoolNode(cmp, BoolTest::gt));
Node* if_gt = generate_slow_guard(bol, NULL);
if (if_gt != NULL) {
result_phi->init_req(2, intcon(-1));
result_rgn->init_req(2, if_gt);
}
if (!stopped()) {
// Check for substr count == 0
cmp = _gvn.transform(new(C) CmpINode(substr_cnt, intcon(0)));
bol = _gvn.transform(new(C) BoolNode(cmp, BoolTest::eq));
Node* if_zero = generate_slow_guard(bol, NULL);
if (if_zero != NULL) {
result_phi->init_req(3, intcon(0));
result_rgn->init_req(3, if_zero);
}
}
if (!stopped()) {
result = make_string_method_node(Op_StrIndexOf, source_start, source_cnt, substr_start, substr_cnt);
result_phi->init_req(1, result);
result_rgn->init_req(1, control());
}
set_control(_gvn.transform(result_rgn));
record_for_igvn(result_rgn);
result = _gvn.transform(result_phi);
} else { // Use LibraryCallKit::string_indexOf
// don't intrinsify if argument isn't a constant string.
if (!arg->is_Con()) {
return false;
}
const TypeOopPtr* str_type = _gvn.type(arg)->isa_oopptr();
if (str_type == NULL) {
return false;
}
ciInstanceKlass* klass = env()->String_klass();
ciObject* str_const = str_type->const_oop();
if (str_const == NULL || str_const->klass() != klass) {
return false;
}
ciInstance* str = str_const->as_instance();
assert(str != NULL, "must be instance");
ciObject* v = str->field_value_by_offset(java_lang_String::value_offset_in_bytes()).as_object();
ciTypeArray* pat = v->as_type_array(); // pattern (argument) character array
int o;
int c;
if (java_lang_String::has_offset_field()) {
o = str->field_value_by_offset(java_lang_String::offset_offset_in_bytes()).as_int();
c = str->field_value_by_offset(java_lang_String::count_offset_in_bytes()).as_int();
} else {
o = 0;
c = pat->length();
}
// constant strings have no offset and count == length which
// simplifies the resulting code somewhat so lets optimize for that.
if (o != 0 || c != pat->length()) {
return false;
}
receiver = null_check(receiver, T_OBJECT);
// NOTE: No null check on the argument is needed since it's a constant String oop.
if (stopped()) {
return true;
}
// The null string as a pattern always returns 0 (match at beginning of string)
if (c == 0) {
set_result(intcon(0));
return true;
}
// Generate default indexOf
jchar lastChar = pat->char_at(o + (c - 1));
int cache = 0;
int i;
for (i = 0; i < c - 1; i++) {
assert(i < pat->length(), "out of range");
cache |= (1 << (pat->char_at(o + i) & (sizeof(cache) * BitsPerByte - 1)));
}
int md2 = c;
for (i = 0; i < c - 1; i++) {
assert(i < pat->length(), "out of range");
if (pat->char_at(o + i) == lastChar) {
md2 = (c - 1) - i;
}
}
result = string_indexOf(receiver, pat, o, cache, md2);
}
set_result(result);
return true;
}
//--------------------------round_double_node--------------------------------
// Round a double node if necessary.
Node* LibraryCallKit::round_double_node(Node* n) {
if (Matcher::strict_fp_requires_explicit_rounding && UseSSE <= 1)
n = _gvn.transform(new (C) RoundDoubleNode(0, n));
return n;
}
//------------------------------inline_math-----------------------------------
// public static double Math.abs(double)
// public static double Math.sqrt(double)
// public static double Math.log(double)
// public static double Math.log10(double)
bool LibraryCallKit::inline_math(vmIntrinsics::ID id) {
Node* arg = round_double_node(argument(0));
Node* n = NULL;
switch (id) {
case vmIntrinsics::_dabs: n = new (C) AbsDNode( arg); break;
case vmIntrinsics::_dsqrt: n = new (C) SqrtDNode(C, control(), arg); break;
case vmIntrinsics::_dlog: n = new (C) LogDNode(C, control(), arg); break;
case vmIntrinsics::_dlog10: n = new (C) Log10DNode(C, control(), arg); break;
default: fatal_unexpected_iid(id); break;
}
set_result(_gvn.transform(n));
return true;
}
//------------------------------inline_trig----------------------------------
// Inline sin/cos/tan instructions, if possible. If rounding is required, do
// argument reduction which will turn into a fast/slow diamond.
bool LibraryCallKit::inline_trig(vmIntrinsics::ID id) {
Node* arg = round_double_node(argument(0));
Node* n = NULL;
switch (id) {
case vmIntrinsics::_dsin: n = new (C) SinDNode(C, control(), arg); break;
case vmIntrinsics::_dcos: n = new (C) CosDNode(C, control(), arg); break;
case vmIntrinsics::_dtan: n = new (C) TanDNode(C, control(), arg); break;
default: fatal_unexpected_iid(id); break;
}
n = _gvn.transform(n);
// Rounding required? Check for argument reduction!
if (Matcher::strict_fp_requires_explicit_rounding) {
static const double pi_4 = 0.7853981633974483;
static const double neg_pi_4 = -0.7853981633974483;
// pi/2 in 80-bit extended precision
// static const unsigned char pi_2_bits_x[] = {0x35,0xc2,0x68,0x21,0xa2,0xda,0x0f,0xc9,0xff,0x3f,0x00,0x00,0x00,0x00,0x00,0x00};
// -pi/2 in 80-bit extended precision
// static const unsigned char neg_pi_2_bits_x[] = {0x35,0xc2,0x68,0x21,0xa2,0xda,0x0f,0xc9,0xff,0xbf,0x00,0x00,0x00,0x00,0x00,0x00};
// Cutoff value for using this argument reduction technique
//static const double pi_2_minus_epsilon = 1.564660403643354;
//static const double neg_pi_2_plus_epsilon = -1.564660403643354;
// Pseudocode for sin:
// if (x <= Math.PI / 4.0) {
// if (x >= -Math.PI / 4.0) return fsin(x);
// if (x >= -Math.PI / 2.0) return -fcos(x + Math.PI / 2.0);
// } else {
// if (x <= Math.PI / 2.0) return fcos(x - Math.PI / 2.0);
// }
// return StrictMath.sin(x);
// Pseudocode for cos:
// if (x <= Math.PI / 4.0) {
// if (x >= -Math.PI / 4.0) return fcos(x);
// if (x >= -Math.PI / 2.0) return fsin(x + Math.PI / 2.0);
// } else {
// if (x <= Math.PI / 2.0) return -fsin(x - Math.PI / 2.0);
// }
// return StrictMath.cos(x);
// Actually, sticking in an 80-bit Intel value into C2 will be tough; it
// requires a special machine instruction to load it. Instead we'll try
// the 'easy' case. If we really need the extra range +/- PI/2 we'll
// probably do the math inside the SIN encoding.
// Make the merge point
RegionNode* r = new (C) RegionNode(3);
Node* phi = new (C) PhiNode(r, Type::DOUBLE);
// Flatten arg so we need only 1 test
Node *abs = _gvn.transform(new (C) AbsDNode(arg));
// Node for PI/4 constant
Node *pi4 = makecon(TypeD::make(pi_4));
// Check PI/4 : abs(arg)
Node *cmp = _gvn.transform(new (C) CmpDNode(pi4,abs));
// Check: If PI/4 < abs(arg) then go slow
Node *bol = _gvn.transform(new (C) BoolNode( cmp, BoolTest::lt ));
// Branch either way
IfNode *iff = create_and_xform_if(control(),bol, PROB_STATIC_FREQUENT, COUNT_UNKNOWN);
set_control(opt_iff(r,iff));
// Set fast path result
phi->init_req(2, n);
// Slow path - non-blocking leaf call
Node* call = NULL;
switch (id) {
case vmIntrinsics::_dsin:
call = make_runtime_call(RC_LEAF, OptoRuntime::Math_D_D_Type(),
CAST_FROM_FN_PTR(address, SharedRuntime::dsin),
"Sin", NULL, arg, top());
break;
case vmIntrinsics::_dcos:
call = make_runtime_call(RC_LEAF, OptoRuntime::Math_D_D_Type(),
CAST_FROM_FN_PTR(address, SharedRuntime::dcos),
"Cos", NULL, arg, top());
break;
case vmIntrinsics::_dtan:
call = make_runtime_call(RC_LEAF, OptoRuntime::Math_D_D_Type(),
CAST_FROM_FN_PTR(address, SharedRuntime::dtan),
"Tan", NULL, arg, top());
break;
}
assert(control()->in(0) == call, "");
Node* slow_result = _gvn.transform(new (C) ProjNode(call, TypeFunc::Parms));
r->init_req(1, control());
phi->init_req(1, slow_result);
// Post-merge
set_control(_gvn.transform(r));
record_for_igvn(r);
n = _gvn.transform(phi);
C->set_has_split_ifs(true); // Has chance for split-if optimization
}
set_result(n);
return true;
}
Node* LibraryCallKit::finish_pow_exp(Node* result, Node* x, Node* y, const TypeFunc* call_type, address funcAddr, const char* funcName) {
//-------------------
//result=(result.isNaN())? funcAddr():result;
// Check: If isNaN() by checking result!=result? then either trap
// or go to runtime
Node* cmpisnan = _gvn.transform(new (C) CmpDNode(result, result));
// Build the boolean node
Node* bolisnum = _gvn.transform(new (C) BoolNode(cmpisnan, BoolTest::eq));
if (!too_many_traps(Deoptimization::Reason_intrinsic)) {
{ BuildCutout unless(this, bolisnum, PROB_STATIC_FREQUENT);
// The pow or exp intrinsic returned a NaN, which requires a call
// to the runtime. Recompile with the runtime call.
uncommon_trap(Deoptimization::Reason_intrinsic,
Deoptimization::Action_make_not_entrant);
}
return result;
} else {
// If this inlining ever returned NaN in the past, we compile a call
// to the runtime to properly handle corner cases
IfNode* iff = create_and_xform_if(control(), bolisnum, PROB_STATIC_FREQUENT, COUNT_UNKNOWN);
Node* if_slow = _gvn.transform(new (C) IfFalseNode(iff));
Node* if_fast = _gvn.transform(new (C) IfTrueNode(iff));
if (!if_slow->is_top()) {
RegionNode* result_region = new (C) RegionNode(3);
PhiNode* result_val = new (C) PhiNode(result_region, Type::DOUBLE);
result_region->init_req(1, if_fast);
result_val->init_req(1, result);
set_control(if_slow);
const TypePtr* no_memory_effects = NULL;
Node* rt = make_runtime_call(RC_LEAF, call_type, funcAddr, funcName,
no_memory_effects,
x, top(), y, y ? top() : NULL);
Node* value = _gvn.transform(new (C) ProjNode(rt, TypeFunc::Parms+0));
#ifdef ASSERT
Node* value_top = _gvn.transform(new (C) ProjNode(rt, TypeFunc::Parms+1));
assert(value_top == top(), "second value must be top");
#endif
result_region->init_req(2, control());
result_val->init_req(2, value);
set_control(_gvn.transform(result_region));
return _gvn.transform(result_val);
} else {
return result;
}
}
}
//------------------------------inline_exp-------------------------------------
// Inline exp instructions, if possible. The Intel hardware only misses
// really odd corner cases (+/- Infinity). Just uncommon-trap them.
bool LibraryCallKit::inline_exp() {
Node* arg = round_double_node(argument(0));
Node* n = _gvn.transform(new (C) ExpDNode(C, control(), arg));
n = finish_pow_exp(n, arg, NULL, OptoRuntime::Math_D_D_Type(), CAST_FROM_FN_PTR(address, SharedRuntime::dexp), "EXP");
set_result(n);
C->set_has_split_ifs(true); // Has chance for split-if optimization
return true;
}
//------------------------------inline_pow-------------------------------------
// Inline power instructions, if possible.
bool LibraryCallKit::inline_pow() {
// Pseudocode for pow
// if (y == 2) {
// return x * x;
// } else {
// if (x <= 0.0) {
// long longy = (long)y;
// if ((double)longy == y) { // if y is long
// if (y + 1 == y) longy = 0; // huge number: even
// result = ((1&longy) == 0)?-DPow(abs(x), y):DPow(abs(x), y);
// } else {
// result = NaN;
// }
// } else {
// result = DPow(x,y);
// }
// if (result != result)? {
// result = uncommon_trap() or runtime_call();
// }
// return result;
// }
Node* x = round_double_node(argument(0));
Node* y = round_double_node(argument(2));
Node* result = NULL;
Node* const_two_node = makecon(TypeD::make(2.0));
Node* cmp_node = _gvn.transform(new (C) CmpDNode(y, const_two_node));
Node* bool_node = _gvn.transform(new (C) BoolNode(cmp_node, BoolTest::eq));
IfNode* if_node = create_and_xform_if(control(), bool_node, PROB_STATIC_INFREQUENT, COUNT_UNKNOWN);
Node* if_true = _gvn.transform(new (C) IfTrueNode(if_node));
Node* if_false = _gvn.transform(new (C) IfFalseNode(if_node));
RegionNode* region_node = new (C) RegionNode(3);
region_node->init_req(1, if_true);
Node* phi_node = new (C) PhiNode(region_node, Type::DOUBLE);
// special case for x^y where y == 2, we can convert it to x * x
phi_node->init_req(1, _gvn.transform(new (C) MulDNode(x, x)));
// set control to if_false since we will now process the false branch
set_control(if_false);
if (!too_many_traps(Deoptimization::Reason_intrinsic)) {
// Short form: skip the fancy tests and just check for NaN result.
result = _gvn.transform(new (C) PowDNode(C, control(), x, y));
} else {
// If this inlining ever returned NaN in the past, include all
// checks + call to the runtime.
// Set the merge point for If node with condition of (x <= 0.0)
// There are four possible paths to region node and phi node
RegionNode *r = new (C) RegionNode(4);
Node *phi = new (C) PhiNode(r, Type::DOUBLE);
// Build the first if node: if (x <= 0.0)
// Node for 0 constant
Node *zeronode = makecon(TypeD::ZERO);
// Check x:0
Node *cmp = _gvn.transform(new (C) CmpDNode(x, zeronode));
// Check: If (x<=0) then go complex path
Node *bol1 = _gvn.transform(new (C) BoolNode( cmp, BoolTest::le ));
// Branch either way
IfNode *if1 = create_and_xform_if(control(),bol1, PROB_STATIC_INFREQUENT, COUNT_UNKNOWN);
// Fast path taken; set region slot 3
Node *fast_taken = _gvn.transform(new (C) IfFalseNode(if1));
r->init_req(3,fast_taken); // Capture fast-control
// Fast path not-taken, i.e. slow path
Node *complex_path = _gvn.transform(new (C) IfTrueNode(if1));
// Set fast path result
Node *fast_result = _gvn.transform(new (C) PowDNode(C, control(), x, y));
phi->init_req(3, fast_result);
// Complex path
// Build the second if node (if y is long)
// Node for (long)y
Node *longy = _gvn.transform(new (C) ConvD2LNode(y));
// Node for (double)((long) y)
Node *doublelongy= _gvn.transform(new (C) ConvL2DNode(longy));
// Check (double)((long) y) : y
Node *cmplongy= _gvn.transform(new (C) CmpDNode(doublelongy, y));
// Check if (y isn't long) then go to slow path
Node *bol2 = _gvn.transform(new (C) BoolNode( cmplongy, BoolTest::ne ));
// Branch either way
IfNode *if2 = create_and_xform_if(complex_path,bol2, PROB_STATIC_INFREQUENT, COUNT_UNKNOWN);
Node* ylong_path = _gvn.transform(new (C) IfFalseNode(if2));
Node *slow_path = _gvn.transform(new (C) IfTrueNode(if2));
// Calculate DPow(abs(x), y)*(1 & (long)y)
// Node for constant 1
Node *conone = longcon(1);
// 1& (long)y
Node *signnode= _gvn.transform(new (C) AndLNode(conone, longy));
// A huge number is always even. Detect a huge number by checking
// if y + 1 == y and set integer to be tested for parity to 0.
// Required for corner case:
// (long)9.223372036854776E18 = max_jlong
// (double)(long)9.223372036854776E18 = 9.223372036854776E18
// max_jlong is odd but 9.223372036854776E18 is even
Node* yplus1 = _gvn.transform(new (C) AddDNode(y, makecon(TypeD::make(1))));
Node *cmpyplus1= _gvn.transform(new (C) CmpDNode(yplus1, y));
Node *bolyplus1 = _gvn.transform(new (C) BoolNode( cmpyplus1, BoolTest::eq ));
Node* correctedsign = NULL;
if (ConditionalMoveLimit != 0) {
correctedsign = _gvn.transform( CMoveNode::make(C, NULL, bolyplus1, signnode, longcon(0), TypeLong::LONG));
} else {
IfNode *ifyplus1 = create_and_xform_if(ylong_path,bolyplus1, PROB_FAIR, COUNT_UNKNOWN);
RegionNode *r = new (C) RegionNode(3);
Node *phi = new (C) PhiNode(r, TypeLong::LONG);
r->init_req(1, _gvn.transform(new (C) IfFalseNode(ifyplus1)));
r->init_req(2, _gvn.transform(new (C) IfTrueNode(ifyplus1)));
phi->init_req(1, signnode);
phi->init_req(2, longcon(0));
correctedsign = _gvn.transform(phi);
ylong_path = _gvn.transform(r);
record_for_igvn(r);
}
// zero node
Node *conzero = longcon(0);
// Check (1&(long)y)==0?
Node *cmpeq1 = _gvn.transform(new (C) CmpLNode(correctedsign, conzero));
// Check if (1&(long)y)!=0?, if so the result is negative
Node *bol3 = _gvn.transform(new (C) BoolNode( cmpeq1, BoolTest::ne ));
// abs(x)
Node *absx=_gvn.transform(new (C) AbsDNode(x));
// abs(x)^y
Node *absxpowy = _gvn.transform(new (C) PowDNode(C, control(), absx, y));
// -abs(x)^y
Node *negabsxpowy = _gvn.transform(new (C) NegDNode (absxpowy));
// (1&(long)y)==1?-DPow(abs(x), y):DPow(abs(x), y)
Node *signresult = NULL;
if (ConditionalMoveLimit != 0) {
signresult = _gvn.transform( CMoveNode::make(C, NULL, bol3, absxpowy, negabsxpowy, Type::DOUBLE));
} else {
IfNode *ifyeven = create_and_xform_if(ylong_path,bol3, PROB_FAIR, COUNT_UNKNOWN);
RegionNode *r = new (C) RegionNode(3);
Node *phi = new (C) PhiNode(r, Type::DOUBLE);
r->init_req(1, _gvn.transform(new (C) IfFalseNode(ifyeven)));
r->init_req(2, _gvn.transform(new (C) IfTrueNode(ifyeven)));
phi->init_req(1, absxpowy);
phi->init_req(2, negabsxpowy);
signresult = _gvn.transform(phi);
ylong_path = _gvn.transform(r);
record_for_igvn(r);
}
// Set complex path fast result
r->init_req(2, ylong_path);
phi->init_req(2, signresult);
static const jlong nan_bits = CONST64(0x7ff8000000000000);
Node *slow_result = makecon(TypeD::make(*(double*)&nan_bits)); // return NaN
r->init_req(1,slow_path);
phi->init_req(1,slow_result);
// Post merge
set_control(_gvn.transform(r));
record_for_igvn(r);
result = _gvn.transform(phi);
}
result = finish_pow_exp(result, x, y, OptoRuntime::Math_DD_D_Type(), CAST_FROM_FN_PTR(address, SharedRuntime::dpow), "POW");
// control from finish_pow_exp is now input to the region node
region_node->set_req(2, control());
// the result from finish_pow_exp is now input to the phi node
phi_node->init_req(2, result);
set_control(_gvn.transform(region_node));
record_for_igvn(region_node);
set_result(_gvn.transform(phi_node));
C->set_has_split_ifs(true); // Has chance for split-if optimization
return true;
}
//------------------------------runtime_math-----------------------------
bool LibraryCallKit::runtime_math(const TypeFunc* call_type, address funcAddr, const char* funcName) {
assert(call_type == OptoRuntime::Math_DD_D_Type() || call_type == OptoRuntime::Math_D_D_Type(),
"must be (DD)D or (D)D type");
// Inputs
Node* a = round_double_node(argument(0));
Node* b = (call_type == OptoRuntime::Math_DD_D_Type()) ? round_double_node(argument(2)) : NULL;
const TypePtr* no_memory_effects = NULL;
Node* trig = make_runtime_call(RC_LEAF, call_type, funcAddr, funcName,
no_memory_effects,
a, top(), b, b ? top() : NULL);
Node* value = _gvn.transform(new (C) ProjNode(trig, TypeFunc::Parms+0));
#ifdef ASSERT
Node* value_top = _gvn.transform(new (C) ProjNode(trig, TypeFunc::Parms+1));
assert(value_top == top(), "second value must be top");
#endif
set_result(value);
return true;
}
//------------------------------inline_math_native-----------------------------
bool LibraryCallKit::inline_math_native(vmIntrinsics::ID id) {
#define FN_PTR(f) CAST_FROM_FN_PTR(address, f)
switch (id) {
// These intrinsics are not properly supported on all hardware
case vmIntrinsics::_dcos: return Matcher::has_match_rule(Op_CosD) ? inline_trig(id) :
runtime_math(OptoRuntime::Math_D_D_Type(), FN_PTR(SharedRuntime::dcos), "COS");
case vmIntrinsics::_dsin: return Matcher::has_match_rule(Op_SinD) ? inline_trig(id) :
runtime_math(OptoRuntime::Math_D_D_Type(), FN_PTR(SharedRuntime::dsin), "SIN");
case vmIntrinsics::_dtan: return Matcher::has_match_rule(Op_TanD) ? inline_trig(id) :
runtime_math(OptoRuntime::Math_D_D_Type(), FN_PTR(SharedRuntime::dtan), "TAN");
case vmIntrinsics::_dlog: return Matcher::has_match_rule(Op_LogD) ? inline_math(id) :
runtime_math(OptoRuntime::Math_D_D_Type(), FN_PTR(SharedRuntime::dlog), "LOG");
case vmIntrinsics::_dlog10: return Matcher::has_match_rule(Op_Log10D) ? inline_math(id) :
runtime_math(OptoRuntime::Math_D_D_Type(), FN_PTR(SharedRuntime::dlog10), "LOG10");
// These intrinsics are supported on all hardware
case vmIntrinsics::_dsqrt: return Matcher::match_rule_supported(Op_SqrtD) ? inline_math(id) : false;
case vmIntrinsics::_dabs: return Matcher::has_match_rule(Op_AbsD) ? inline_math(id) : false;
case vmIntrinsics::_dexp: return Matcher::has_match_rule(Op_ExpD) ? inline_exp() :
runtime_math(OptoRuntime::Math_D_D_Type(), FN_PTR(SharedRuntime::dexp), "EXP");
case vmIntrinsics::_dpow: return Matcher::has_match_rule(Op_PowD) ? inline_pow() :
runtime_math(OptoRuntime::Math_DD_D_Type(), FN_PTR(SharedRuntime::dpow), "POW");
#undef FN_PTR
// These intrinsics are not yet correctly implemented
case vmIntrinsics::_datan2:
return false;
default:
fatal_unexpected_iid(id);
return false;
}
}
static bool is_simple_name(Node* n) {
return (n->req() == 1 // constant
|| (n->is_Type() && n->as_Type()->type()->singleton())
|| n->is_Proj() // parameter or return value
|| n->is_Phi() // local of some sort
);
}
//----------------------------inline_min_max-----------------------------------
bool LibraryCallKit::inline_min_max(vmIntrinsics::ID id) {
set_result(generate_min_max(id, argument(0), argument(1)));
return true;
}
void LibraryCallKit::inline_math_mathExact(Node* math, Node *test) {
Node* bol = _gvn.transform( new (C) BoolNode(test, BoolTest::overflow) );
IfNode* check = create_and_map_if(control(), bol, PROB_UNLIKELY_MAG(3), COUNT_UNKNOWN);
Node* fast_path = _gvn.transform( new (C) IfFalseNode(check));
Node* slow_path = _gvn.transform( new (C) IfTrueNode(check) );
{
PreserveJVMState pjvms(this);
PreserveReexecuteState preexecs(this);
jvms()->set_should_reexecute(true);
set_control(slow_path);
set_i_o(i_o());
uncommon_trap(Deoptimization::Reason_intrinsic,
Deoptimization::Action_none);
}
set_control(fast_path);
set_result(math);
}
template <typename OverflowOp>
bool LibraryCallKit::inline_math_overflow(Node* arg1, Node* arg2) {
typedef typename OverflowOp::MathOp MathOp;
MathOp* mathOp = new(C) MathOp(arg1, arg2);
Node* operation = _gvn.transform( mathOp );
Node* ofcheck = _gvn.transform( new(C) OverflowOp(arg1, arg2) );
inline_math_mathExact(operation, ofcheck);
return true;
}
bool LibraryCallKit::inline_math_addExactI(bool is_increment) {
return inline_math_overflow<OverflowAddINode>(argument(0), is_increment ? intcon(1) : argument(1));
}
bool LibraryCallKit::inline_math_addExactL(bool is_increment) {
return inline_math_overflow<OverflowAddLNode>(argument(0), is_increment ? longcon(1) : argument(2));
}
bool LibraryCallKit::inline_math_subtractExactI(bool is_decrement) {
return inline_math_overflow<OverflowSubINode>(argument(0), is_decrement ? intcon(1) : argument(1));
}
bool LibraryCallKit::inline_math_subtractExactL(bool is_decrement) {
return inline_math_overflow<OverflowSubLNode>(argument(0), is_decrement ? longcon(1) : argument(2));
}
bool LibraryCallKit::inline_math_negateExactI() {
return inline_math_overflow<OverflowSubINode>(intcon(0), argument(0));
}
bool LibraryCallKit::inline_math_negateExactL() {
return inline_math_overflow<OverflowSubLNode>(longcon(0), argument(0));
}
bool LibraryCallKit::inline_math_multiplyExactI() {
return inline_math_overflow<OverflowMulINode>(argument(0), argument(1));
}
bool LibraryCallKit::inline_math_multiplyExactL() {
return inline_math_overflow<OverflowMulLNode>(argument(0), argument(2));
}
Node*
LibraryCallKit::generate_min_max(vmIntrinsics::ID id, Node* x0, Node* y0) {
// These are the candidate return value:
Node* xvalue = x0;
Node* yvalue = y0;
if (xvalue == yvalue) {
return xvalue;
}
bool want_max = (id == vmIntrinsics::_max);
const TypeInt* txvalue = _gvn.type(xvalue)->isa_int();
const TypeInt* tyvalue = _gvn.type(yvalue)->isa_int();
if (txvalue == NULL || tyvalue == NULL) return top();
// This is not really necessary, but it is consistent with a
// hypothetical MaxINode::Value method:
int widen = MAX2(txvalue->_widen, tyvalue->_widen);
// %%% This folding logic should (ideally) be in a different place.
// Some should be inside IfNode, and there to be a more reliable
// transformation of ?: style patterns into cmoves. We also want
// more powerful optimizations around cmove and min/max.
// Try to find a dominating comparison of these guys.
// It can simplify the index computation for Arrays.copyOf
// and similar uses of System.arraycopy.
// First, compute the normalized version of CmpI(x, y).
int cmp_op = Op_CmpI;
Node* xkey = xvalue;
Node* ykey = yvalue;
Node* ideal_cmpxy = _gvn.transform(new(C) CmpINode(xkey, ykey));
if (ideal_cmpxy->is_Cmp()) {
// E.g., if we have CmpI(length - offset, count),
// it might idealize to CmpI(length, count + offset)
cmp_op = ideal_cmpxy->Opcode();
xkey = ideal_cmpxy->in(1);
ykey = ideal_cmpxy->in(2);
}
// Start by locating any relevant comparisons.
Node* start_from = (xkey->outcnt() < ykey->outcnt()) ? xkey : ykey;
Node* cmpxy = NULL;
Node* cmpyx = NULL;
for (DUIterator_Fast kmax, k = start_from->fast_outs(kmax); k < kmax; k++) {
Node* cmp = start_from->fast_out(k);
if (cmp->outcnt() > 0 && // must have prior uses
cmp->in(0) == NULL && // must be context-independent
cmp->Opcode() == cmp_op) { // right kind of compare
if (cmp->in(1) == xkey && cmp->in(2) == ykey) cmpxy = cmp;
if (cmp->in(1) == ykey && cmp->in(2) == xkey) cmpyx = cmp;
}
}
const int NCMPS = 2;
Node* cmps[NCMPS] = { cmpxy, cmpyx };
int cmpn;
for (cmpn = 0; cmpn < NCMPS; cmpn++) {
if (cmps[cmpn] != NULL) break; // find a result
}
if (cmpn < NCMPS) {
// Look for a dominating test that tells us the min and max.
int depth = 0; // Limit search depth for speed
Node* dom = control();
for (; dom != NULL; dom = IfNode::up_one_dom(dom, true)) {
if (++depth >= 100) break;
Node* ifproj = dom;
if (!ifproj->is_Proj()) continue;
Node* iff = ifproj->in(0);
if (!iff->is_If()) continue;
Node* bol = iff->in(1);
if (!bol->is_Bool()) continue;
Node* cmp = bol->in(1);
if (cmp == NULL) continue;
for (cmpn = 0; cmpn < NCMPS; cmpn++)
if (cmps[cmpn] == cmp) break;
if (cmpn == NCMPS) continue;
BoolTest::mask btest = bol->as_Bool()->_test._test;
if (ifproj->is_IfFalse()) btest = BoolTest(btest).negate();
if (cmp->in(1) == ykey) btest = BoolTest(btest).commute();
// At this point, we know that 'x btest y' is true.
switch (btest) {
case BoolTest::eq:
// They are proven equal, so we can collapse the min/max.
// Either value is the answer. Choose the simpler.
if (is_simple_name(yvalue) && !is_simple_name(xvalue))
return yvalue;
return xvalue;
case BoolTest::lt: // x < y
case BoolTest::le: // x <= y
return (want_max ? yvalue : xvalue);
case BoolTest::gt: // x > y
case BoolTest::ge: // x >= y
return (want_max ? xvalue : yvalue);
}
}
}
// We failed to find a dominating test.
// Let's pick a test that might GVN with prior tests.
Node* best_bol = NULL;
BoolTest::mask best_btest = BoolTest::illegal;
for (cmpn = 0; cmpn < NCMPS; cmpn++) {
Node* cmp = cmps[cmpn];
if (cmp == NULL) continue;
for (DUIterator_Fast jmax, j = cmp->fast_outs(jmax); j < jmax; j++) {
Node* bol = cmp->fast_out(j);
if (!bol->is_Bool()) continue;
BoolTest::mask btest = bol->as_Bool()->_test._test;
if (btest == BoolTest::eq || btest == BoolTest::ne) continue;
if (cmp->in(1) == ykey) btest = BoolTest(btest).commute();
if (bol->outcnt() > (best_bol == NULL ? 0 : best_bol->outcnt())) {
best_bol = bol->as_Bool();
best_btest = btest;
}
}
}
Node* answer_if_true = NULL;
Node* answer_if_false = NULL;
switch (best_btest) {
default:
if (cmpxy == NULL)
cmpxy = ideal_cmpxy;
best_bol = _gvn.transform(new(C) BoolNode(cmpxy, BoolTest::lt));
// and fall through:
case BoolTest::lt: // x < y
case BoolTest::le: // x <= y
answer_if_true = (want_max ? yvalue : xvalue);
answer_if_false = (want_max ? xvalue : yvalue);
break;
case BoolTest::gt: // x > y
case BoolTest::ge: // x >= y
answer_if_true = (want_max ? xvalue : yvalue);
answer_if_false = (want_max ? yvalue : xvalue);
break;
}
jint hi, lo;
if (want_max) {
// We can sharpen the minimum.
hi = MAX2(txvalue->_hi, tyvalue->_hi);
lo = MAX2(txvalue->_lo, tyvalue->_lo);
} else {
// We can sharpen the maximum.
hi = MIN2(txvalue->_hi, tyvalue->_hi);
lo = MIN2(txvalue->_lo, tyvalue->_lo);
}
// Use a flow-free graph structure, to avoid creating excess control edges
// which could hinder other optimizations.
// Since Math.min/max is often used with arraycopy, we want
// tightly_coupled_allocation to be able to see beyond min/max expressions.
Node* cmov = CMoveNode::make(C, NULL, best_bol,
answer_if_false, answer_if_true,
TypeInt::make(lo, hi, widen));
return _gvn.transform(cmov);
/*
// This is not as desirable as it may seem, since Min and Max
// nodes do not have a full set of optimizations.
// And they would interfere, anyway, with 'if' optimizations
// and with CMoveI canonical forms.
switch (id) {
case vmIntrinsics::_min:
result_val = _gvn.transform(new (C, 3) MinINode(x,y)); break;
case vmIntrinsics::_max:
result_val = _gvn.transform(new (C, 3) MaxINode(x,y)); break;
default:
ShouldNotReachHere();
}
*/
}
inline int
LibraryCallKit::classify_unsafe_addr(Node* &base, Node* &offset) {
const TypePtr* base_type = TypePtr::NULL_PTR;
if (base != NULL) base_type = _gvn.type(base)->isa_ptr();
if (base_type == NULL) {
// Unknown type.
return Type::AnyPtr;
} else if (base_type == TypePtr::NULL_PTR) {
// Since this is a NULL+long form, we have to switch to a rawptr.
base = _gvn.transform(new (C) CastX2PNode(offset));
offset = MakeConX(0);
return Type::RawPtr;
} else if (base_type->base() == Type::RawPtr) {
return Type::RawPtr;
} else if (base_type->isa_oopptr()) {
// Base is never null => always a heap address.
if (base_type->ptr() == TypePtr::NotNull) {
return Type::OopPtr;
}
// Offset is small => always a heap address.
const TypeX* offset_type = _gvn.type(offset)->isa_intptr_t();
if (offset_type != NULL &&
base_type->offset() == 0 && // (should always be?)
offset_type->_lo >= 0 &&
!MacroAssembler::needs_explicit_null_check(offset_type->_hi)) {
return Type::OopPtr;
}
// Otherwise, it might either be oop+off or NULL+addr.
return Type::AnyPtr;
} else {
// No information:
return Type::AnyPtr;
}
}
inline Node* LibraryCallKit::make_unsafe_address(Node* base, Node* offset) {
int kind = classify_unsafe_addr(base, offset);
if (kind == Type::RawPtr) {
return basic_plus_adr(top(), base, offset);
} else {
return basic_plus_adr(base, offset);
}
}
//--------------------------inline_number_methods-----------------------------
// inline int Integer.numberOfLeadingZeros(int)
// inline int Long.numberOfLeadingZeros(long)
//
// inline int Integer.numberOfTrailingZeros(int)
// inline int Long.numberOfTrailingZeros(long)
//
// inline int Integer.bitCount(int)
// inline int Long.bitCount(long)
//
// inline char Character.reverseBytes(char)
// inline short Short.reverseBytes(short)
// inline int Integer.reverseBytes(int)
// inline long Long.reverseBytes(long)
bool LibraryCallKit::inline_number_methods(vmIntrinsics::ID id) {
Node* arg = argument(0);
Node* n = NULL;
switch (id) {
case vmIntrinsics::_numberOfLeadingZeros_i: n = new (C) CountLeadingZerosINode( arg); break;
case vmIntrinsics::_numberOfLeadingZeros_l: n = new (C) CountLeadingZerosLNode( arg); break;
case vmIntrinsics::_numberOfTrailingZeros_i: n = new (C) CountTrailingZerosINode(arg); break;
case vmIntrinsics::_numberOfTrailingZeros_l: n = new (C) CountTrailingZerosLNode(arg); break;
case vmIntrinsics::_bitCount_i: n = new (C) PopCountINode( arg); break;
case vmIntrinsics::_bitCount_l: n = new (C) PopCountLNode( arg); break;
case vmIntrinsics::_reverseBytes_c: n = new (C) ReverseBytesUSNode(0, arg); break;
case vmIntrinsics::_reverseBytes_s: n = new (C) ReverseBytesSNode( 0, arg); break;
case vmIntrinsics::_reverseBytes_i: n = new (C) ReverseBytesINode( 0, arg); break;
case vmIntrinsics::_reverseBytes_l: n = new (C) ReverseBytesLNode( 0, arg); break;
default: fatal_unexpected_iid(id); break;
}
set_result(_gvn.transform(n));
return true;
}
//----------------------------inline_unsafe_access----------------------------
const static BasicType T_ADDRESS_HOLDER = T_LONG;
// Helper that guards and inserts a pre-barrier.
void LibraryCallKit::insert_pre_barrier(Node* base_oop, Node* offset,
Node* pre_val, bool need_mem_bar) {
// We could be accessing the referent field of a reference object. If so, when G1
// is enabled, we need to log the value in the referent field in an SATB buffer.
// This routine performs some compile time filters and generates suitable
// runtime filters that guard the pre-barrier code.
// Also add memory barrier for non volatile load from the referent field
// to prevent commoning of loads across safepoint.
if (!UseG1GC && !need_mem_bar)
return;
// Some compile time checks.
// If offset is a constant, is it java_lang_ref_Reference::_reference_offset?
const TypeX* otype = offset->find_intptr_t_type();
if (otype != NULL && otype->is_con() &&
otype->get_con() != java_lang_ref_Reference::referent_offset) {
// Constant offset but not the reference_offset so just return
return;
}
// We only need to generate the runtime guards for instances.
const TypeOopPtr* btype = base_oop->bottom_type()->isa_oopptr();
if (btype != NULL) {
if (btype->isa_aryptr()) {
// Array type so nothing to do
return;
}
const TypeInstPtr* itype = btype->isa_instptr();
if (itype != NULL) {
// Can the klass of base_oop be statically determined to be
// _not_ a sub-class of Reference and _not_ Object?
ciKlass* klass = itype->klass();
if ( klass->is_loaded() &&
!klass->is_subtype_of(env()->Reference_klass()) &&
!env()->Object_klass()->is_subtype_of(klass)) {
return;
}
}
}
// The compile time filters did not reject base_oop/offset so
// we need to generate the following runtime filters
//
// if (offset == java_lang_ref_Reference::_reference_offset) {
// if (instance_of(base, java.lang.ref.Reference)) {
// pre_barrier(_, pre_val, ...);
// }
// }
float likely = PROB_LIKELY( 0.999);
float unlikely = PROB_UNLIKELY(0.999);
IdealKit ideal(this);
#define __ ideal.
Node* referent_off = __ ConX(java_lang_ref_Reference::referent_offset);
__ if_then(offset, BoolTest::eq, referent_off, unlikely); {
// Update graphKit memory and control from IdealKit.
sync_kit(ideal);
Node* ref_klass_con = makecon(TypeKlassPtr::make(env()->Reference_klass()));
Node* is_instof = gen_instanceof(base_oop, ref_klass_con);
// Update IdealKit memory and control from graphKit.
__ sync_kit(this);
Node* one = __ ConI(1);
// is_instof == 0 if base_oop == NULL
__ if_then(is_instof, BoolTest::eq, one, unlikely); {
// Update graphKit from IdeakKit.
sync_kit(ideal);
// Use the pre-barrier to record the value in the referent field
pre_barrier(false /* do_load */,
__ ctrl(),
NULL /* obj */, NULL /* adr */, max_juint /* alias_idx */, NULL /* val */, NULL /* val_type */,
pre_val /* pre_val */,
T_OBJECT);
if (need_mem_bar) {
// Add memory barrier to prevent commoning reads from this field
// across safepoint since GC can change its value.
insert_mem_bar(Op_MemBarCPUOrder);
}
// Update IdealKit from graphKit.
__ sync_kit(this);
} __ end_if(); // _ref_type != ref_none
} __ end_if(); // offset == referent_offset
// Final sync IdealKit and GraphKit.
final_sync(ideal);
#undef __
}
// Interpret Unsafe.fieldOffset cookies correctly:
extern jlong Unsafe_field_offset_to_byte_offset(jlong field_offset);
const TypeOopPtr* LibraryCallKit::sharpen_unsafe_type(Compile::AliasType* alias_type, const TypePtr *adr_type, bool is_native_ptr) {
// Attempt to infer a sharper value type from the offset and base type.
ciKlass* sharpened_klass = NULL;
// See if it is an instance field, with an object type.
if (alias_type->field() != NULL) {
assert(!is_native_ptr, "native pointer op cannot use a java address");
if (alias_type->field()->type()->is_klass()) {
sharpened_klass = alias_type->field()->type()->as_klass();
}
}
// See if it is a narrow oop array.
if (adr_type->isa_aryptr()) {
if (adr_type->offset() >= objArrayOopDesc::base_offset_in_bytes()) {
const TypeOopPtr *elem_type = adr_type->is_aryptr()->elem()->isa_oopptr();
if (elem_type != NULL) {
sharpened_klass = elem_type->klass();
}
}
}
// The sharpened class might be unloaded if there is no class loader
// contraint in place.
if (sharpened_klass != NULL && sharpened_klass->is_loaded()) {
const TypeOopPtr* tjp = TypeOopPtr::make_from_klass(sharpened_klass);
#ifndef PRODUCT
if (C->print_intrinsics() || C->print_inlining()) {
tty->print(" from base type: "); adr_type->dump(); tty->cr();
tty->print(" sharpened value: "); tjp->dump(); tty->cr();
}
#endif
// Sharpen the value type.
return tjp;
}
return NULL;
}
bool LibraryCallKit::inline_unsafe_access(bool is_native_ptr, bool is_store, BasicType type, bool is_volatile, bool unaligned) {
if (callee()->is_static()) return false; // caller must have the capability!
assert(type != T_OBJECT || !unaligned, "unaligned access not supported with object type");
#ifndef PRODUCT
{
ResourceMark rm;
// Check the signatures.
ciSignature* sig = callee()->signature();
#ifdef ASSERT
if (!is_store) {
// Object getObject(Object base, int/long offset), etc.
BasicType rtype = sig->return_type()->basic_type();
if (rtype == T_ADDRESS_HOLDER && callee()->name() == ciSymbol::getAddress_name())
rtype = T_ADDRESS; // it is really a C void*
assert(rtype == type, "getter must return the expected value");
if (!is_native_ptr) {
assert(sig->count() == 2, "oop getter has 2 arguments");
assert(sig->type_at(0)->basic_type() == T_OBJECT, "getter base is object");
assert(sig->type_at(1)->basic_type() == T_LONG, "getter offset is correct");
} else {
assert(sig->count() == 1, "native getter has 1 argument");
assert(sig->type_at(0)->basic_type() == T_LONG, "getter base is long");
}
} else {
// void putObject(Object base, int/long offset, Object x), etc.
assert(sig->return_type()->basic_type() == T_VOID, "putter must not return a value");
if (!is_native_ptr) {
assert(sig->count() == 3, "oop putter has 3 arguments");
assert(sig->type_at(0)->basic_type() == T_OBJECT, "putter base is object");
assert(sig->type_at(1)->basic_type() == T_LONG, "putter offset is correct");
} else {
assert(sig->count() == 2, "native putter has 2 arguments");
assert(sig->type_at(0)->basic_type() == T_LONG, "putter base is long");
}
BasicType vtype = sig->type_at(sig->count()-1)->basic_type();
if (vtype == T_ADDRESS_HOLDER && callee()->name() == ciSymbol::putAddress_name())
vtype = T_ADDRESS; // it is really a C void*
assert(vtype == type, "putter must accept the expected value");
}
#endif // ASSERT
}
#endif //PRODUCT
C->set_has_unsafe_access(true); // Mark eventual nmethod as "unsafe".
Node* receiver = argument(0); // type: oop
// Build address expression. See the code in inline_unsafe_prefetch.
Node* adr;
Node* heap_base_oop = top();
Node* offset = top();
Node* val;
// The base is either a Java object or a value produced by Unsafe.staticFieldBase
Node* base = argument(1); // type: oop
if (!is_native_ptr) {
// The offset is a value produced by Unsafe.staticFieldOffset or Unsafe.objectFieldOffset
offset = argument(2); // type: long
// We currently rely on the cookies produced by Unsafe.xxxFieldOffset
// to be plain byte offsets, which are also the same as those accepted
// by oopDesc::field_base.
assert(Unsafe_field_offset_to_byte_offset(11) == 11,
"fieldOffset must be byte-scaled");
// 32-bit machines ignore the high half!
offset = ConvL2X(offset);
adr = make_unsafe_address(base, offset);
heap_base_oop = base;
val = is_store ? argument(4) : NULL;
} else {
Node* ptr = argument(1); // type: long
ptr = ConvL2X(ptr); // adjust Java long to machine word
adr = make_unsafe_address(NULL, ptr);
val = is_store ? argument(3) : NULL;
}
if ((_gvn.type(base)->isa_ptr() == TypePtr::NULL_PTR) && type == T_OBJECT) {
return false; // off-heap oop accesses are not supported
}
const TypePtr *adr_type = _gvn.type(adr)->isa_ptr();
// Try to categorize the address.
Compile::AliasType* alias_type = C->alias_type(adr_type);
assert(alias_type->index() != Compile::AliasIdxBot, "no bare pointers here");
if (alias_type->adr_type() == TypeInstPtr::KLASS ||
alias_type->adr_type() == TypeAryPtr::RANGE) {
return false; // not supported
}
bool mismatched = false;
BasicType bt = alias_type->basic_type();
if (bt != T_ILLEGAL) {
assert(alias_type->adr_type()->is_oopptr(), "should be on-heap access");
if (bt == T_BYTE && adr_type->isa_aryptr()) {
// Alias type doesn't differentiate between byte[] and boolean[]).
// Use address type to get the element type.
bt = adr_type->is_aryptr()->elem()->array_element_basic_type();
}
if (bt == T_ARRAY || bt == T_NARROWOOP) {
// accessing an array field with getObject is not a mismatch
bt = T_OBJECT;
}
if ((bt == T_OBJECT) != (type == T_OBJECT)) {
// Don't intrinsify mismatched object accesses
return false;
}
mismatched = (bt != type);
}
assert(!mismatched || alias_type->adr_type()->is_oopptr(), "off-heap access can't be mismatched");
// First guess at the value type.
const Type *value_type = Type::get_const_basic_type(type);
// We will need memory barriers unless we can determine a unique
// alias category for this reference. (Note: If for some reason
// the barriers get omitted and the unsafe reference begins to "pollute"
// the alias analysis of the rest of the graph, either Compile::can_alias
// or Compile::must_alias will throw a diagnostic assert.)
bool need_mem_bar = (alias_type->adr_type() == TypeOopPtr::BOTTOM);
// If we are reading the value of the referent field of a Reference
// object (either by using Unsafe directly or through reflection)
// then, if G1 is enabled, we need to record the referent in an
// SATB log buffer using the pre-barrier mechanism.
// Also we need to add memory barrier to prevent commoning reads
// from this field across safepoint since GC can change its value.
bool need_read_barrier = !is_native_ptr && !is_store &&
offset != top() && heap_base_oop != top();
if (!is_store && type == T_OBJECT) {
const TypeOopPtr* tjp = sharpen_unsafe_type(alias_type, adr_type, is_native_ptr);
if (tjp != NULL) {
value_type = tjp;
}
}
receiver = null_check(receiver);
if (stopped()) {
return true;
}
// Heap pointers get a null-check from the interpreter,
// as a courtesy. However, this is not guaranteed by Unsafe,
// and it is not possible to fully distinguish unintended nulls
// from intended ones in this API.
Node* load = NULL;
Node* store = NULL;
Node* leading_membar = NULL;
if (is_volatile) {
// We need to emit leading and trailing CPU membars (see below) in
// addition to memory membars when is_volatile. This is a little
// too strong, but avoids the need to insert per-alias-type
// volatile membars (for stores; compare Parse::do_put_xxx), which
// we cannot do effectively here because we probably only have a
// rough approximation of type.
need_mem_bar = true;
// For Stores, place a memory ordering barrier now.
if (is_store) {
leading_membar = insert_mem_bar(Op_MemBarRelease);
} else {
if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
leading_membar = insert_mem_bar(Op_MemBarVolatile);
}
}
}
// Memory barrier to prevent normal and 'unsafe' accesses from
// bypassing each other. Happens after null checks, so the
// exception paths do not take memory state from the memory barrier,
// so there's no problems making a strong assert about mixing users
// of safe & unsafe memory. Otherwise fails in a CTW of rt.jar
// around 5701, class sun/reflect/UnsafeBooleanFieldAccessorImpl.
if (need_mem_bar) insert_mem_bar(Op_MemBarCPUOrder);
if (!is_store) {
MemNode::MemOrd mo = is_volatile ? MemNode::acquire : MemNode::unordered;
// To be valid, unsafe loads may depend on other conditions than
// the one that guards them: pin the Load node
load = make_load(control(), adr, value_type, type, adr_type, mo, LoadNode::Pinned, is_volatile, unaligned, mismatched);
// load value
switch (type) {
case T_BOOLEAN:
case T_CHAR:
case T_BYTE:
case T_SHORT:
case T_INT:
case T_LONG:
case T_FLOAT:
case T_DOUBLE:
break;
case T_OBJECT:
if (need_read_barrier) {
insert_pre_barrier(heap_base_oop, offset, load, !(is_volatile || need_mem_bar));
}
break;
case T_ADDRESS:
// Cast to an int type.
load = _gvn.transform(new (C) CastP2XNode(NULL, load));
load = ConvX2UL(load);
break;
default:
fatal(err_msg_res("unexpected type %d: %s", type, type2name(type)));
break;
}
// The load node has the control of the preceding MemBarCPUOrder. All
// following nodes will have the control of the MemBarCPUOrder inserted at
// the end of this method. So, pushing the load onto the stack at a later
// point is fine.
set_result(load);
} else {
// place effect of store into memory
switch (type) {
case T_DOUBLE:
val = dstore_rounding(val);
break;
case T_ADDRESS:
// Repackage the long as a pointer.
val = ConvL2X(val);
val = _gvn.transform(new (C) CastX2PNode(val));
break;
}
MemNode::MemOrd mo = is_volatile ? MemNode::release : MemNode::unordered;
if (type == T_OBJECT ) {
store = store_oop_to_unknown(control(), heap_base_oop, adr, adr_type, val, type, mo, mismatched);
} else {
store = store_to_memory(control(), adr, val, type, adr_type, mo, is_volatile, unaligned, mismatched);
}
}
if (is_volatile) {
if (!is_store) {
Node* mb = insert_mem_bar(Op_MemBarAcquire, load);
mb->as_MemBar()->set_trailing_load();
} else {
if (!support_IRIW_for_not_multiple_copy_atomic_cpu) {
Node* mb = insert_mem_bar(Op_MemBarVolatile, store);
MemBarNode::set_store_pair(leading_membar->as_MemBar(), mb->as_MemBar());
}
}
}
if (need_mem_bar) insert_mem_bar(Op_MemBarCPUOrder);
return true;
}
//----------------------------inline_unsafe_prefetch----------------------------
bool LibraryCallKit::inline_unsafe_prefetch(bool is_native_ptr, bool is_store, bool is_static) {
#ifndef PRODUCT
{
ResourceMark rm;
// Check the signatures.
ciSignature* sig = callee()->signature();
#ifdef ASSERT
// Object getObject(Object base, int/long offset), etc.
BasicType rtype = sig->return_type()->basic_type();
if (!is_native_ptr) {
assert(sig->count() == 2, "oop prefetch has 2 arguments");
assert(sig->type_at(0)->basic_type() == T_OBJECT, "prefetch base is object");
assert(sig->type_at(1)->basic_type() == T_LONG, "prefetcha offset is correct");
} else {
assert(sig->count() == 1, "native prefetch has 1 argument");
assert(sig->type_at(0)->basic_type() == T_LONG, "prefetch base is long");
}
#endif // ASSERT
}
#endif // !PRODUCT
C->set_has_unsafe_access(true); // Mark eventual nmethod as "unsafe".
const int idx = is_static ? 0 : 1;
if (!is_static) {
null_check_receiver();
if (stopped()) {
return true;
}
}
// Build address expression. See the code in inline_unsafe_access.
Node *adr;
if (!is_native_ptr) {
// The base is either a Java object or a value produced by Unsafe.staticFieldBase
Node* base = argument(idx + 0); // type: oop
// The offset is a value produced by Unsafe.staticFieldOffset or Unsafe.objectFieldOffset
Node* offset = argument(idx + 1); // type: long
// We currently rely on the cookies produced by Unsafe.xxxFieldOffset
// to be plain byte offsets, which are also the same as those accepted
// by oopDesc::field_base.
assert(Unsafe_field_offset_to_byte_offset(11) == 11,
"fieldOffset must be byte-scaled");
// 32-bit machines ignore the high half!
offset = ConvL2X(offset);
adr = make_unsafe_address(base, offset);
} else {
Node* ptr = argument(idx + 0); // type: long
ptr = ConvL2X(ptr); // adjust Java long to machine word
adr = make_unsafe_address(NULL, ptr);
}
// Generate the read or write prefetch
Node *prefetch;
if (is_store) {
prefetch = new (C) PrefetchWriteNode(i_o(), adr);
} else {
prefetch = new (C) PrefetchReadNode(i_o(), adr);
}
prefetch->init_req(0, control());
set_i_o(_gvn.transform(prefetch));
return true;
}
//----------------------------inline_unsafe_load_store----------------------------
// This method serves a couple of different customers (depending on LoadStoreKind):
//
// LS_cmpxchg:
// public final native boolean compareAndSwapObject(Object o, long offset, Object expected, Object x);
// public final native boolean compareAndSwapInt( Object o, long offset, int expected, int x);
// public final native boolean compareAndSwapLong( Object o, long offset, long expected, long x);
//
// LS_xadd:
// public int getAndAddInt( Object o, long offset, int delta)
// public long getAndAddLong(Object o, long offset, long delta)
//
// LS_xchg:
// int getAndSet(Object o, long offset, int newValue)
// long getAndSet(Object o, long offset, long newValue)
// Object getAndSet(Object o, long offset, Object newValue)
//
bool LibraryCallKit::inline_unsafe_load_store(BasicType type, LoadStoreKind kind) {
// This basic scheme here is the same as inline_unsafe_access, but
// differs in enough details that combining them would make the code
// overly confusing. (This is a true fact! I originally combined
// them, but even I was confused by it!) As much code/comments as
// possible are retained from inline_unsafe_access though to make
// the correspondences clearer. - dl
if (callee()->is_static()) return false; // caller must have the capability!
#ifndef PRODUCT
BasicType rtype;
{
ResourceMark rm;
// Check the signatures.
ciSignature* sig = callee()->signature();
rtype = sig->return_type()->basic_type();
if (kind == LS_xadd || kind == LS_xchg) {
// Check the signatures.
#ifdef ASSERT
assert(rtype == type, "get and set must return the expected type");
assert(sig->count() == 3, "get and set has 3 arguments");
assert(sig->type_at(0)->basic_type() == T_OBJECT, "get and set base is object");
assert(sig->type_at(1)->basic_type() == T_LONG, "get and set offset is long");
assert(sig->type_at(2)->basic_type() == type, "get and set must take expected type as new value/delta");
#endif // ASSERT
} else if (kind == LS_cmpxchg) {
// Check the signatures.
#ifdef ASSERT
assert(rtype == T_BOOLEAN, "CAS must return boolean");
assert(sig->count() == 4, "CAS has 4 arguments");
assert(sig->type_at(0)->basic_type() == T_OBJECT, "CAS base is object");
assert(sig->type_at(1)->basic_type() == T_LONG, "CAS offset is long");
#endif // ASSERT
} else {
ShouldNotReachHere();
}
}
#endif //PRODUCT
C->set_has_unsafe_access(true); // Mark eventual nmethod as "unsafe".
// Get arguments:
Node* receiver = NULL;
Node* base = NULL;
Node* offset = NULL;
Node* oldval = NULL;
Node* newval = NULL;
if (kind == LS_cmpxchg) {
const bool two_slot_type = type2size[type] == 2;
receiver = argument(0); // type: oop
base = argument(1); // type: oop
offset = argument(2); // type: long
oldval = argument(4); // type: oop, int, or long
newval = argument(two_slot_type ? 6 : 5); // type: oop, int, or long
} else if (kind == LS_xadd || kind == LS_xchg){
receiver = argument(0); // type: oop
base = argument(1); // type: oop
offset = argument(2); // type: long
oldval = NULL;
newval = argument(4); // type: oop, int, or long
}
// Build field offset expression.
// We currently rely on the cookies produced by Unsafe.xxxFieldOffset
// to be plain byte offsets, which are also the same as those accepted
// by oopDesc::field_base.
assert(Unsafe_field_offset_to_byte_offset(11) == 11, "fieldOffset must be byte-scaled");
// 32-bit machines ignore the high half of long offsets
offset = ConvL2X(offset);
Node* adr = make_unsafe_address(base, offset);
const TypePtr *adr_type = _gvn.type(adr)->isa_ptr();
Compile::AliasType* alias_type = C->alias_type(adr_type);
BasicType bt = alias_type->basic_type();
if (bt != T_ILLEGAL &&
((bt == T_OBJECT || bt == T_ARRAY) != (type == T_OBJECT))) {
// Don't intrinsify mismatched object accesses.
return false;
}
// For CAS, unlike inline_unsafe_access, there seems no point in
// trying to refine types. Just use the coarse types here.
assert(alias_type->index() != Compile::AliasIdxBot, "no bare pointers here");
const Type *value_type = Type::get_const_basic_type(type);
if (kind == LS_xchg && type == T_OBJECT) {
const TypeOopPtr* tjp = sharpen_unsafe_type(alias_type, adr_type);
if (tjp != NULL) {
value_type = tjp;
}
}
// Null check receiver.
receiver = null_check(receiver);
if (stopped()) {
return true;
}
int alias_idx = C->get_alias_index(adr_type);
// Memory-model-wise, a LoadStore acts like a little synchronized
// block, so needs barriers on each side. These don't translate
// into actual barriers on most machines, but we still need rest of
// compiler to respect ordering.
Node* leading_membar = insert_mem_bar(Op_MemBarRelease);
insert_mem_bar(Op_MemBarCPUOrder);
// 4984716: MemBars must be inserted before this
// memory node in order to avoid a false
// dependency which will confuse the scheduler.
Node *mem = memory(alias_idx);
// For now, we handle only those cases that actually exist: ints,
// longs, and Object. Adding others should be straightforward.
Node* load_store = NULL;
switch(type) {
case T_INT:
if (kind == LS_xadd) {
load_store = _gvn.transform(new (C) GetAndAddINode(control(), mem, adr, newval, adr_type));
} else if (kind == LS_xchg) {
load_store = _gvn.transform(new (C) GetAndSetINode(control(), mem, adr, newval, adr_type));
} else if (kind == LS_cmpxchg) {
load_store = _gvn.transform(new (C) CompareAndSwapINode(control(), mem, adr, newval, oldval));
} else {
ShouldNotReachHere();
}
break;
case T_LONG:
if (kind == LS_xadd) {
load_store = _gvn.transform(new (C) GetAndAddLNode(control(), mem, adr, newval, adr_type));
} else if (kind == LS_xchg) {
load_store = _gvn.transform(new (C) GetAndSetLNode(control(), mem, adr, newval, adr_type));
} else if (kind == LS_cmpxchg) {
load_store = _gvn.transform(new (C) CompareAndSwapLNode(control(), mem, adr, newval, oldval));
} else {
ShouldNotReachHere();
}
break;
case T_OBJECT:
// Transformation of a value which could be NULL pointer (CastPP #NULL)
// could be delayed during Parse (for example, in adjust_map_after_if()).
// Execute transformation here to avoid barrier generation in such case.
if (_gvn.type(newval) == TypePtr::NULL_PTR)
newval = _gvn.makecon(TypePtr::NULL_PTR);
// Reference stores need a store barrier.
if (kind == LS_xchg) {
// If pre-barrier must execute before the oop store, old value will require do_load here.
if (!can_move_pre_barrier()) {
pre_barrier(true /* do_load*/,
control(), base, adr, alias_idx, newval, value_type->make_oopptr(),
NULL /* pre_val*/,
T_OBJECT);
} // Else move pre_barrier to use load_store value, see below.
} else if (kind == LS_cmpxchg) {
// Same as for newval above:
if (_gvn.type(oldval) == TypePtr::NULL_PTR) {
oldval = _gvn.makecon(TypePtr::NULL_PTR);
}
// The only known value which might get overwritten is oldval.
pre_barrier(false /* do_load */,
control(), NULL, NULL, max_juint, NULL, NULL,
oldval /* pre_val */,
T_OBJECT);
} else {
ShouldNotReachHere();
}
#ifdef _LP64
if (adr->bottom_type()->is_ptr_to_narrowoop()) {
Node *newval_enc = _gvn.transform(new (C) EncodePNode(newval, newval->bottom_type()->make_narrowoop()));
if (kind == LS_xchg) {
load_store = _gvn.transform(new (C) GetAndSetNNode(control(), mem, adr,
newval_enc, adr_type, value_type->make_narrowoop()));
} else {
assert(kind == LS_cmpxchg, "wrong LoadStore operation");
Node *oldval_enc = _gvn.transform(new (C) EncodePNode(oldval, oldval->bottom_type()->make_narrowoop()));
load_store = _gvn.transform(new (C) CompareAndSwapNNode(control(), mem, adr,
newval_enc, oldval_enc));
}
} else
#endif
{
if (kind == LS_xchg) {
load_store = _gvn.transform(new (C) GetAndSetPNode(control(), mem, adr, newval, adr_type, value_type->is_oopptr()));
} else {
assert(kind == LS_cmpxchg, "wrong LoadStore operation");
load_store = _gvn.transform(new (C) CompareAndSwapPNode(control(), mem, adr, newval, oldval));
}
}
post_barrier(control(), load_store, base, adr, alias_idx, newval, T_OBJECT, true);
break;
default:
fatal(err_msg_res("unexpected type %d: %s", type, type2name(type)));
break;
}
// SCMemProjNodes represent the memory state of a LoadStore. Their
// main role is to prevent LoadStore nodes from being optimized away
// when their results aren't used.
Node* proj = _gvn.transform(new (C) SCMemProjNode(load_store));
set_memory(proj, alias_idx);
Node* access = load_store;
if (type == T_OBJECT && kind == LS_xchg) {
#ifdef _LP64
if (adr->bottom_type()->is_ptr_to_narrowoop()) {
load_store = _gvn.transform(new (C) DecodeNNode(load_store, load_store->get_ptr_type()));
}
#endif
if (can_move_pre_barrier()) {
// Don't need to load pre_val. The old value is returned by load_store.
// The pre_barrier can execute after the xchg as long as no safepoint
// gets inserted between them.
pre_barrier(false /* do_load */,
control(), NULL, NULL, max_juint, NULL, NULL,
load_store /* pre_val */,
T_OBJECT);
}
}
// Add the trailing membar surrounding the access
insert_mem_bar(Op_MemBarCPUOrder);
Node* mb = insert_mem_bar(Op_MemBarAcquire, access);
MemBarNode::set_load_store_pair(leading_membar->as_MemBar(), mb->as_MemBar());
assert(type2size[load_store->bottom_type()->basic_type()] == type2size[rtype], "result type should match");
set_result(load_store);
return true;
}
//----------------------------inline_unsafe_ordered_store----------------------
// public native void sun.misc.Unsafe.putOrderedObject(Object o, long offset, Object x);
// public native void sun.misc.Unsafe.putOrderedInt(Object o, long offset, int x);
// public native void sun.misc.Unsafe.putOrderedLong(Object o, long offset, long x);
bool LibraryCallKit::inline_unsafe_ordered_store(BasicType type) {
// This is another variant of inline_unsafe_access, differing in
// that it always issues store-store ("release") barrier and ensures
// store-atomicity (which only matters for "long").
if (callee()->is_static()) return false; // caller must have the capability!
#ifndef PRODUCT
{
ResourceMark rm;
// Check the signatures.
ciSignature* sig = callee()->signature();
#ifdef ASSERT
BasicType rtype = sig->return_type()->basic_type();
assert(rtype == T_VOID, "must return void");
assert(sig->count() == 3, "has 3 arguments");
assert(sig->type_at(0)->basic_type() == T_OBJECT, "base is object");
assert(sig->type_at(1)->basic_type() == T_LONG, "offset is long");
#endif // ASSERT
}
#endif //PRODUCT
C->set_has_unsafe_access(true); // Mark eventual nmethod as "unsafe".
// Get arguments:
Node* receiver = argument(0); // type: oop
Node* base = argument(1); // type: oop
Node* offset = argument(2); // type: long
Node* val = argument(4); // type: oop, int, or long
// Null check receiver.
receiver = null_check(receiver);
if (stopped()) {
return true;
}
// Build field offset expression.
assert(Unsafe_field_offset_to_byte_offset(11) == 11, "fieldOffset must be byte-scaled");
// 32-bit machines ignore the high half of long offsets
offset = ConvL2X(offset);
Node* adr = make_unsafe_address(base, offset);
const TypePtr *adr_type = _gvn.type(adr)->isa_ptr();
const Type *value_type = Type::get_const_basic_type(type);
Compile::AliasType* alias_type = C->alias_type(adr_type);
insert_mem_bar(Op_MemBarRelease);
insert_mem_bar(Op_MemBarCPUOrder);
// Ensure that the store is atomic for longs:
const bool require_atomic_access = true;
Node* store;
if (type == T_OBJECT) // reference stores need a store barrier.
store = store_oop_to_unknown(control(), base, adr, adr_type, val, type, MemNode::release);
else {
store = store_to_memory(control(), adr, val, type, adr_type, MemNode::release, require_atomic_access);
}
insert_mem_bar(Op_MemBarCPUOrder);
return true;
}
bool LibraryCallKit::inline_unsafe_fence(vmIntrinsics::ID id) {
// Regardless of form, don't allow previous ld/st to move down,
// then issue acquire, release, or volatile mem_bar.
insert_mem_bar(Op_MemBarCPUOrder);
switch(id) {
case vmIntrinsics::_loadFence:
insert_mem_bar(Op_LoadFence);
return true;
case vmIntrinsics::_storeFence:
insert_mem_bar(Op_StoreFence);
return true;
case vmIntrinsics::_fullFence:
insert_mem_bar(Op_MemBarVolatile);
return true;
default:
fatal_unexpected_iid(id);
return false;
}
}
bool LibraryCallKit::klass_needs_init_guard(Node* kls) {
if (!kls->is_Con()) {
return true;
}
const TypeKlassPtr* klsptr = kls->bottom_type()->isa_klassptr();
if (klsptr == NULL) {
return true;
}
ciInstanceKlass* ik = klsptr->klass()->as_instance_klass();
// don't need a guard for a klass that is already initialized
return !ik->is_initialized();
}
//----------------------------inline_unsafe_allocate---------------------------
// public native Object sun.misc.Unsafe.allocateInstance(Class<?> cls);
bool LibraryCallKit::inline_unsafe_allocate() {
if (callee()->is_static()) return false; // caller must have the capability!
null_check_receiver(); // null-check, then ignore
Node* cls = null_check(argument(1));
if (stopped()) return true;
Node* kls = load_klass_from_mirror(cls, false, NULL, 0);
kls = null_check(kls);
if (stopped()) return true; // argument was like int.class
Node* test = NULL;
if (LibraryCallKit::klass_needs_init_guard(kls)) {
// Note: The argument might still be an illegal value like
// Serializable.class or Object[].class. The runtime will handle it.
// But we must make an explicit check for initialization.
Node* insp = basic_plus_adr(kls, in_bytes(InstanceKlass::init_state_offset()));
// Use T_BOOLEAN for InstanceKlass::_init_state so the compiler
// can generate code to load it as unsigned byte.
Node* inst = make_load(NULL, insp, TypeInt::UBYTE, T_BOOLEAN, MemNode::unordered);
Node* bits = intcon(InstanceKlass::fully_initialized);
test = _gvn.transform(new (C) SubINode(inst, bits));
// The 'test' is non-zero if we need to take a slow path.
}
Node* obj = new_instance(kls, test);
set_result(obj);
return true;
}
#ifdef JFR_HAVE_INTRINSICS
/*
* oop -> myklass
* myklass->trace_id |= USED
* return myklass->trace_id & ~0x3
*/
bool LibraryCallKit::inline_native_classID() {
Node* cls = null_check(argument(0), T_OBJECT);
Node* kls = load_klass_from_mirror(cls, false, NULL, 0);
kls = null_check(kls, T_OBJECT);
ByteSize offset = KLASS_TRACE_ID_OFFSET;
Node* insp = basic_plus_adr(kls, in_bytes(offset));
Node* tvalue = make_load(NULL, insp, TypeLong::LONG, T_LONG, MemNode::unordered);
Node* clsused = longcon(0x01l); // set the class bit
Node* orl = _gvn.transform(new (C) OrLNode(tvalue, clsused));
const TypePtr *adr_type = _gvn.type(insp)->isa_ptr();
store_to_memory(control(), insp, orl, T_LONG, adr_type, MemNode::unordered);
#ifdef TRACE_ID_META_BITS
Node* mbits = longcon(~TRACE_ID_META_BITS);
tvalue = _gvn.transform(new (C) AndLNode(tvalue, mbits));
#endif
#ifdef TRACE_ID_SHIFT
Node* cbits = intcon(TRACE_ID_SHIFT);
tvalue = _gvn.transform(new (C) URShiftLNode(tvalue, cbits));
#endif
set_result(tvalue);
return true;
}
bool LibraryCallKit::inline_native_getEventWriter() {
Node* tls_ptr = _gvn.transform(new (C) ThreadLocalNode());
Node* jobj_ptr = basic_plus_adr(top(), tls_ptr,
in_bytes(THREAD_LOCAL_WRITER_OFFSET_JFR)
);
Node* jobj = make_load(control(), jobj_ptr, TypeRawPtr::BOTTOM, T_ADDRESS, MemNode::unordered);
Node* jobj_cmp_null = _gvn.transform( new (C) CmpPNode(jobj, null()) );
Node* test_jobj_eq_null = _gvn.transform( new (C) BoolNode(jobj_cmp_null, BoolTest::eq) );
IfNode* iff_jobj_null =
create_and_map_if(control(), test_jobj_eq_null, PROB_MIN, COUNT_UNKNOWN);
enum { _normal_path = 1,
_null_path = 2,
PATH_LIMIT };
RegionNode* result_rgn = new (C) RegionNode(PATH_LIMIT);
PhiNode* result_val = new (C) PhiNode(result_rgn, TypePtr::BOTTOM);
Node* jobj_is_null = _gvn.transform(new (C) IfTrueNode(iff_jobj_null));
result_rgn->init_req(_null_path, jobj_is_null);
result_val->init_req(_null_path, null());
Node* jobj_is_not_null = _gvn.transform(new (C) IfFalseNode(iff_jobj_null));
result_rgn->init_req(_normal_path, jobj_is_not_null);
Node* res = make_load(jobj_is_not_null, jobj, TypeInstPtr::NOTNULL, T_OBJECT, MemNode::unordered);
result_val->init_req(_normal_path, res);
set_result(result_rgn, result_val);
return true;
}
#endif // JFR_HAVE_INTRINSICS
//------------------------inline_native_time_funcs--------------
// inline code for System.currentTimeMillis() and System.nanoTime()
// these have the same type and signature
bool LibraryCallKit::inline_native_time_funcs(address funcAddr, const char* funcName) {
const TypeFunc* tf = OptoRuntime::void_long_Type();
const TypePtr* no_memory_effects = NULL;
Node* time = make_runtime_call(RC_LEAF, tf, funcAddr, funcName, no_memory_effects);
Node* value = _gvn.transform(new (C) ProjNode(time, TypeFunc::Parms+0));
#ifdef ASSERT
Node* value_top = _gvn.transform(new (C) ProjNode(time, TypeFunc::Parms+1));
assert(value_top == top(), "second value must be top");
#endif
set_result(value);
return true;
}
//------------------------inline_native_currentThread------------------
bool LibraryCallKit::inline_native_currentThread() {
Node* junk = NULL;
set_result(generate_current_thread(junk));
return true;
}
//------------------------inline_native_isInterrupted------------------
// private native boolean java.lang.Thread.isInterrupted(boolean ClearInterrupted);
bool LibraryCallKit::inline_native_isInterrupted() {
// Add a fast path to t.isInterrupted(clear_int):
// (t == Thread.current() &&
// (!TLS._osthread._interrupted || WINDOWS_ONLY(false) NOT_WINDOWS(!clear_int)))
// ? TLS._osthread._interrupted : /*slow path:*/ t.isInterrupted(clear_int)
// So, in the common case that the interrupt bit is false,
// we avoid making a call into the VM. Even if the interrupt bit
// is true, if the clear_int argument is false, we avoid the VM call.
// However, if the receiver is not currentThread, we must call the VM,
// because there must be some locking done around the operation.
// We only go to the fast case code if we pass two guards.
// Paths which do not pass are accumulated in the slow_region.
enum {
no_int_result_path = 1, // t == Thread.current() && !TLS._osthread._interrupted
no_clear_result_path = 2, // t == Thread.current() && TLS._osthread._interrupted && !clear_int
slow_result_path = 3, // slow path: t.isInterrupted(clear_int)
PATH_LIMIT
};
// Ensure that it's not possible to move the load of TLS._osthread._interrupted flag
// out of the function.
insert_mem_bar(Op_MemBarCPUOrder);
RegionNode* result_rgn = new (C) RegionNode(PATH_LIMIT);
PhiNode* result_val = new (C) PhiNode(result_rgn, TypeInt::BOOL);
RegionNode* slow_region = new (C) RegionNode(1);
record_for_igvn(slow_region);
// (a) Receiving thread must be the current thread.
Node* rec_thr = argument(0);
Node* tls_ptr = NULL;
Node* cur_thr = generate_current_thread(tls_ptr);
Node* cmp_thr = _gvn.transform(new (C) CmpPNode(cur_thr, rec_thr));
Node* bol_thr = _gvn.transform(new (C) BoolNode(cmp_thr, BoolTest::ne));
generate_slow_guard(bol_thr, slow_region);
// (b) Interrupt bit on TLS must be false.
Node* p = basic_plus_adr(top()/*!oop*/, tls_ptr, in_bytes(JavaThread::osthread_offset()));
Node* osthread = make_load(NULL, p, TypeRawPtr::NOTNULL, T_ADDRESS, MemNode::unordered);
p = basic_plus_adr(top()/*!oop*/, osthread, in_bytes(OSThread::interrupted_offset()));
// Set the control input on the field _interrupted read to prevent it floating up.
Node* int_bit = make_load(control(), p, TypeInt::BOOL, T_INT, MemNode::unordered);
Node* cmp_bit = _gvn.transform(new (C) CmpINode(int_bit, intcon(0)));
Node* bol_bit = _gvn.transform(new (C) BoolNode(cmp_bit, BoolTest::ne));
IfNode* iff_bit = create_and_map_if(control(), bol_bit, PROB_UNLIKELY_MAG(3), COUNT_UNKNOWN);
// First fast path: if (!TLS._interrupted) return false;
Node* false_bit = _gvn.transform(new (C) IfFalseNode(iff_bit));
result_rgn->init_req(no_int_result_path, false_bit);
result_val->init_req(no_int_result_path, intcon(0));
// drop through to next case
set_control( _gvn.transform(new (C) IfTrueNode(iff_bit)));
#ifndef TARGET_OS_FAMILY_windows
// (c) Or, if interrupt bit is set and clear_int is false, use 2nd fast path.
Node* clr_arg = argument(1);
Node* cmp_arg = _gvn.transform(new (C) CmpINode(clr_arg, intcon(0)));
Node* bol_arg = _gvn.transform(new (C) BoolNode(cmp_arg, BoolTest::ne));
IfNode* iff_arg = create_and_map_if(control(), bol_arg, PROB_FAIR, COUNT_UNKNOWN);
// Second fast path: ... else if (!clear_int) return true;
Node* false_arg = _gvn.transform(new (C) IfFalseNode(iff_arg));
result_rgn->init_req(no_clear_result_path, false_arg);
result_val->init_req(no_clear_result_path, intcon(1));
// drop through to next case
set_control( _gvn.transform(new (C) IfTrueNode(iff_arg)));
#else
// To return true on Windows you must read the _interrupted field
// and check the the event state i.e. take the slow path.
#endif // TARGET_OS_FAMILY_windows
// (d) Otherwise, go to the slow path.
slow_region->add_req(control());
set_control( _gvn.transform(slow_region));
if (stopped()) {
// There is no slow path.
result_rgn->init_req(slow_result_path, top());
result_val->init_req(slow_result_path, top());
} else {
// non-virtual because it is a private non-static
CallJavaNode* slow_call = generate_method_call(vmIntrinsics::_isInterrupted);
Node* slow_val = set_results_for_java_call(slow_call);
// this->control() comes from set_results_for_java_call
Node* fast_io = slow_call->in(TypeFunc::I_O);
Node* fast_mem = slow_call->in(TypeFunc::Memory);
ssssssssss66
最新推荐文章于 2024-07-24 08:48:18 发布