//---------------------------catch_call_exceptions-----------------------------
// Put a Catch and CatchProj nodes behind a just-created call.
// Send their caught exceptions to the proper handler.
// This may be used after a call to the rethrow VM stub,
// when it is needed to process unloaded exception classes.
void Parse::catch_call_exceptions(ciExceptionHandlerStream& handlers) {
// Exceptions are delivered through this channel:
Node* i_o = this->i_o();
// Add a CatchNode.
GrowableArray<int>* bcis = new (C->node_arena()) GrowableArray<int>(C->node_arena(), 8, 0, -1);
GrowableArray<const Type*>* extypes = new (C->node_arena()) GrowableArray<const Type*>(C->node_arena(), 8, 0, NULL);
GrowableArray<int>* saw_unloaded = new (C->node_arena()) GrowableArray<int>(C->node_arena(), 8, 0, 0);
for (; !handlers.is_done(); handlers.next()) {
ciExceptionHandler* h = handlers.handler();
int h_bci = h->handler_bci();
ciInstanceKlass* h_klass = h->is_catch_all() ? env()->Throwable_klass() : h->catch_klass();
// Do not introduce unloaded exception types into the graph:
if (!h_klass->is_loaded()) {
if (saw_unloaded->contains(h_bci)) {
/* We've already seen an unloaded exception with h_bci,
so don't duplicate. Duplication will cause the CatchNode to be
unnecessarily large. See 4713716. */
continue;
} else {
saw_unloaded->append(h_bci);
}
}
const Type* h_extype = TypeOopPtr::make_from_klass(h_klass);
// (We use make_from_klass because it respects UseUniqueSubclasses.)
h_extype = h_extype->join(TypeInstPtr::NOTNULL);
assert(!h_extype->empty(), "sanity");
// Note: It's OK if the BCIs repeat themselves.
bcis->append(h_bci);
extypes->append(h_extype);
}
int len = bcis->length();
CatchNode *cn = new (C) CatchNode(control(), i_o, len+1);
Node *catch_ = _gvn.transform(cn);
// now branch with the exception state to each of the (potential)
// handlers
for(int i=0; i < len; i++) {
// Setup JVM state to enter the handler.
PreserveJVMState pjvms(this);
// Locals are just copied from before the call.
// Get control from the CatchNode.
int handler_bci = bcis->at(i);
Node* ctrl = _gvn.transform( new (C) CatchProjNode(catch_, i+1,handler_bci));
// This handler cannot happen?
if (ctrl == top()) continue;
set_control(ctrl);
// Create exception oop
const TypeInstPtr* extype = extypes->at(i)->is_instptr();
Node *ex_oop = _gvn.transform(new (C) CreateExNode(extypes->at(i), ctrl, i_o));
// Handle unloaded exception classes.
if (saw_unloaded->contains(handler_bci)) {
// An unloaded exception type is coming here. Do an uncommon trap.
#ifndef PRODUCT
// We do not expect the same handler bci to take both cold unloaded
// and hot loaded exceptions. But, watch for it.
if ((Verbose || WizardMode) && extype->is_loaded()) {
tty->print("Warning: Handler @%d takes mixed loaded/unloaded exceptions in ", bci());
method()->print_name(); tty->cr();
} else if (PrintOpto && (Verbose || WizardMode)) {
tty->print("Bailing out on unloaded exception type ");
extype->klass()->print_name();
tty->print(" at bci:%d in ", bci());
method()->print_name(); tty->cr();
}
#endif
// Emit an uncommon trap instead of processing the block.
set_bci(handler_bci);
push_ex_oop(ex_oop);
uncommon_trap(Deoptimization::Reason_unloaded,
Deoptimization::Action_reinterpret,
extype->klass(), "!loaded exception");
set_bci(iter().cur_bci()); // put it back
continue;
}
// go to the exception handler
if (handler_bci < 0) { // merge with corresponding rethrow node
throw_to_exit(make_exception_state(ex_oop));
} else { // Else jump to corresponding handle
push_ex_oop(ex_oop); // Clear stack and push just the oop.
merge_exception(handler_bci);
}
}
// The first CatchProj is for the normal return.
// (Note: If this is a call to rethrow_Java, this node goes dead.)
set_control(_gvn.transform( new (C) CatchProjNode(catch_, CatchProjNode::fall_through_index, CatchProjNode::no_handler_bci)));
}
//----------------------------catch_inline_exceptions--------------------------
// Handle all exceptions thrown by an inlined method or individual bytecode.
// Common case 1: we have no handler, so all exceptions merge right into
// the rethrow case.
// Case 2: we have some handlers, with loaded exception klasses that have
// no subklasses. We do a Deutsch-Shiffman style type-check on the incoming
// exception oop and branch to the handler directly.
// Case 3: We have some handlers with subklasses or are not loaded at
// compile-time. We have to call the runtime to resolve the exception.
// So we insert a RethrowCall and all the logic that goes with it.
void Parse::catch_inline_exceptions(SafePointNode* ex_map) {
// Caller is responsible for saving away the map for normal control flow!
assert(stopped(), "call set_map(NULL) first");
assert(method()->has_exception_handlers(), "don't come here w/o work to do");
Node* ex_node = saved_ex_oop(ex_map);
if (ex_node == top()) {
// No action needed.
return;
}
const TypeInstPtr* ex_type = _gvn.type(ex_node)->isa_instptr();
NOT_PRODUCT(if (ex_type==NULL) tty->print_cr("*** Exception not InstPtr"));
if (ex_type == NULL)
ex_type = TypeOopPtr::make_from_klass(env()->Throwable_klass())->is_instptr();
// determine potential exception handlers
ciExceptionHandlerStream handlers(method(), bci(),
ex_type->klass()->as_instance_klass(),
ex_type->klass_is_exact());
// Start executing from the given throw state. (Keep its stack, for now.)
// Get the exception oop as known at compile time.
ex_node = use_exception_state(ex_map);
// Get the exception oop klass from its header
Node* ex_klass_node = NULL;
if (has_ex_handler() && !ex_type->klass_is_exact()) {
Node* p = basic_plus_adr( ex_node, ex_node, oopDesc::klass_offset_in_bytes());
ex_klass_node = _gvn.transform(LoadKlassNode::make(_gvn, NULL, immutable_memory(), p, TypeInstPtr::KLASS, TypeKlassPtr::OBJECT));
// Compute the exception klass a little more cleverly.
// Obvious solution is to simple do a LoadKlass from the 'ex_node'.
// However, if the ex_node is a PhiNode, I'm going to do a LoadKlass for
// each arm of the Phi. If I know something clever about the exceptions
// I'm loading the class from, I can replace the LoadKlass with the
// klass constant for the exception oop.
if (ex_node->is_Phi()) {
ex_klass_node = new (C) PhiNode(ex_node->in(0), TypeKlassPtr::OBJECT);
for (uint i = 1; i < ex_node->req(); i++) {
Node* ex_in = ex_node->in(i);
if (ex_in == top() || ex_in == NULL) {
// This path was not taken.
ex_klass_node->init_req(i, top());
continue;
}
Node* p = basic_plus_adr(ex_in, ex_in, oopDesc::klass_offset_in_bytes());
Node* k = _gvn.transform(LoadKlassNode::make(_gvn, NULL, immutable_memory(), p, TypeInstPtr::KLASS, TypeKlassPtr::OBJECT));
ex_klass_node->init_req( i, k );
}
_gvn.set_type(ex_klass_node, TypeKlassPtr::OBJECT);
}
}
// Scan the exception table for applicable handlers.
// If none, we can call rethrow() and be done!
// If precise (loaded with no subklasses), insert a D.S. style
// pointer compare to the correct handler and loop back.
// If imprecise, switch to the Rethrow VM-call style handling.
int remaining = handlers.count_remaining();
// iterate through all entries sequentially
for (;!handlers.is_done(); handlers.next()) {
ciExceptionHandler* handler = handlers.handler();
if (handler->is_rethrow()) {
// If we fell off the end of the table without finding an imprecise
// exception klass (and without finding a generic handler) then we
// know this exception is not handled in this method. We just rethrow
// the exception into the caller.
throw_to_exit(make_exception_state(ex_node));
return;
}
// exception handler bci range covers throw_bci => investigate further
int handler_bci = handler->handler_bci();
if (remaining == 1) {
push_ex_oop(ex_node); // Push exception oop for handler
#ifndef PRODUCT
if (PrintOpto && WizardMode) {
tty->print_cr(" Catching every inline exception bci:%d -> handler_bci:%d", bci(), handler_bci);
}
#endif
merge_exception(handler_bci); // jump to handler
return; // No more handling to be done here!
}
// Get the handler's klass
ciInstanceKlass* klass = handler->catch_klass();
if (!klass->is_loaded()) { // klass is not loaded?
// fall through into catch_call_exceptions which will emit a
// handler with an uncommon trap.
break;
}
if (klass->is_interface()) // should not happen, but...
break; // bail out
// Check the type of the exception against the catch type
const TypeKlassPtr *tk = TypeKlassPtr::make(klass);
Node* con = _gvn.makecon(tk);
Node* not_subtype_ctrl = gen_subtype_check(ex_klass_node, con);
if (!stopped()) {
PreserveJVMState pjvms(this);
const TypeInstPtr* tinst = TypeOopPtr::make_from_klass_unique(klass)->cast_to_ptr_type(TypePtr::NotNull)->is_instptr();
assert(klass->has_subklass() || tinst->klass_is_exact(), "lost exactness");
Node* ex_oop = _gvn.transform(new (C) CheckCastPPNode(control(), ex_node, tinst));
push_ex_oop(ex_oop); // Push exception oop for handler
#ifndef PRODUCT
if (PrintOpto && WizardMode) {
tty->print(" Catching inline exception bci:%d -> handler_bci:%d -- ", bci(), handler_bci);
klass->print_name();
tty->cr();
}
#endif
merge_exception(handler_bci);
}
set_control(not_subtype_ctrl);
// Come here if exception does not match handler.
// Carry on with more handler checks.
--remaining;
}
assert(!stopped(), "you should return if you finish the chain");
// Oops, need to call into the VM to resolve the klasses at runtime.
// Note: This call must not deoptimize, since it is not a real at this bci!
kill_dead_locals();
make_runtime_call(RC_NO_LEAF | RC_MUST_THROW,
OptoRuntime::rethrow_Type(),
OptoRuntime::rethrow_stub(),
NULL, NULL,
ex_node);
// Rethrow is a pure call, no side effects, only a result.
// The result cannot be allocated, so we use I_O
// Catch exceptions from the rethrow
catch_call_exceptions(handlers);
}
// (Note: Moved add_debug_info into GraphKit::add_safepoint_edges.)
#ifndef PRODUCT
void Parse::count_compiled_calls(bool at_method_entry, bool is_inline) {
if( CountCompiledCalls ) {
if( at_method_entry ) {
// bump invocation counter if top method (for statistics)
if (CountCompiledCalls && depth() == 1) {
const TypePtr* addr_type = TypeMetadataPtr::make(method());
Node* adr1 = makecon(addr_type);
Node* adr2 = basic_plus_adr(adr1, adr1, in_bytes(Method::compiled_invocation_counter_offset()));
increment_counter(adr2);
}
} else if (is_inline) {
switch (bc()) {
case Bytecodes::_invokevirtual: increment_counter(SharedRuntime::nof_inlined_calls_addr()); break;
case Bytecodes::_invokeinterface: increment_counter(SharedRuntime::nof_inlined_interface_calls_addr()); break;
case Bytecodes::_invokestatic:
case Bytecodes::_invokedynamic:
case Bytecodes::_invokespecial: increment_counter(SharedRuntime::nof_inlined_static_calls_addr()); break;
default: fatal("unexpected call bytecode");
}
} else {
switch (bc()) {
case Bytecodes::_invokevirtual: increment_counter(SharedRuntime::nof_normal_calls_addr()); break;
case Bytecodes::_invokeinterface: increment_counter(SharedRuntime::nof_interface_calls_addr()); break;
case Bytecodes::_invokestatic:
case Bytecodes::_invokedynamic:
case Bytecodes::_invokespecial: increment_counter(SharedRuntime::nof_static_calls_addr()); break;
default: fatal("unexpected call bytecode");
}
}
}
}
#endif //PRODUCT
ciMethod* Compile::optimize_virtual_call(ciMethod* caller, int bci, ciInstanceKlass* klass,
ciKlass* holder, ciMethod* callee,
const TypeOopPtr* receiver_type, bool is_virtual,
bool& call_does_dispatch, int& vtable_index,
bool check_access) {
// Set default values for out-parameters.
call_does_dispatch = true;
vtable_index = Method::invalid_vtable_index;
// Choose call strategy.
ciMethod* optimized_virtual_method = optimize_inlining(caller, bci, klass, callee,
receiver_type, check_access);
// Have the call been sufficiently improved such that it is no longer a virtual?
if (optimized_virtual_method != NULL) {
callee = optimized_virtual_method;
call_does_dispatch = false;
} else if (!UseInlineCaches && is_virtual && callee->is_loaded()) {
// We can make a vtable call at this site
vtable_index = callee->resolve_vtable_index(caller->holder(), holder);
}
return callee;
}
// Identify possible target method and inlining style
ciMethod* Compile::optimize_inlining(ciMethod* caller, int bci, ciInstanceKlass* klass,
ciMethod* callee, const TypeOopPtr* receiver_type,
bool check_access) {
// only use for virtual or interface calls
// If it is obviously final, do not bother to call find_monomorphic_target,
// because the class hierarchy checks are not needed, and may fail due to
// incompletely loaded classes. Since we do our own class loading checks
// in this module, we may confidently bind to any method.
if (callee->can_be_statically_bound()) {
return callee;
}
// Attempt to improve the receiver
bool actual_receiver_is_exact = false;
ciInstanceKlass* actual_receiver = klass;
if (receiver_type != NULL) {
// Array methods are all inherited from Object, and are monomorphic.
// finalize() call on array is not allowed.
if (receiver_type->isa_aryptr() &&
callee->holder() == env()->Object_klass() &&
callee->name() != ciSymbol::finalize_method_name()) {
return callee;
}
// All other interesting cases are instance klasses.
if (!receiver_type->isa_instptr()) {
return NULL;
}
ciInstanceKlass *ikl = receiver_type->klass()->as_instance_klass();
if (ikl->is_loaded() && ikl->is_initialized() && !ikl->is_interface() &&
(ikl == actual_receiver || ikl->is_subtype_of(actual_receiver))) {
// ikl is a same or better type than the original actual_receiver,
// e.g. static receiver from bytecodes.
actual_receiver = ikl;
// Is the actual_receiver exact?
actual_receiver_is_exact = receiver_type->klass_is_exact();
}
}
ciInstanceKlass* calling_klass = caller->holder();
ciMethod* cha_monomorphic_target = callee->find_monomorphic_target(calling_klass, klass, actual_receiver, check_access);
if (cha_monomorphic_target != NULL) {
assert(!cha_monomorphic_target->is_abstract(), "");
// Look at the method-receiver type. Does it add "too much information"?
ciKlass* mr_klass = cha_monomorphic_target->holder();
const Type* mr_type = TypeInstPtr::make(TypePtr::BotPTR, mr_klass);
if (receiver_type == NULL || !receiver_type->higher_equal(mr_type)) {
// Calling this method would include an implicit cast to its holder.
// %%% Not yet implemented. Would throw minor asserts at present.
// %%% The most common wins are already gained by +UseUniqueSubclasses.
// To fix, put the higher_equal check at the call of this routine,
// and add a CheckCastPP to the receiver.
if (TraceDependencies) {
tty->print_cr("found unique CHA method, but could not cast up");
tty->print(" method = ");
cha_monomorphic_target->print();
tty->cr();
}
if (log() != NULL) {
log()->elem("missed_CHA_opportunity klass='%d' method='%d'",
log()->identify(klass),
log()->identify(cha_monomorphic_target));
}
cha_monomorphic_target = NULL;
}
}
if (cha_monomorphic_target != NULL) {
// Hardwiring a virtual.
// If we inlined because CHA revealed only a single target method,
// then we are dependent on that target method not getting overridden
// by dynamic class loading. Be sure to test the "static" receiver
// dest_method here, as opposed to the actual receiver, which may
// falsely lead us to believe that the receiver is final or private.
dependencies()->assert_unique_concrete_method(actual_receiver, cha_monomorphic_target);
return cha_monomorphic_target;
}
// If the type is exact, we can still bind the method w/o a vcall.
// (This case comes after CHA so we can see how much extra work it does.)
if (actual_receiver_is_exact) {
// In case of evolution, there is a dependence on every inlined method, since each
// such method can be changed when its class is redefined.
ciMethod* exact_method = callee->resolve_invoke(calling_klass, actual_receiver);
if (exact_method != NULL) {
#ifndef PRODUCT
if (PrintOpto) {
tty->print(" Calling method via exact type @%d --- ", bci);
exact_method->print_name();
tty->cr();
}
#endif
return exact_method;
}
}
return NULL;
}
C:\hotspot-69087d08d473\src\share\vm/opto/domgraph.cpp
/*
* Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
#include "precompiled.hpp"
#include "libadt/vectset.hpp"
#include "memory/allocation.hpp"
#include "opto/block.hpp"
#include "opto/machnode.hpp"
#include "opto/phaseX.hpp"
#include "opto/rootnode.hpp"
// Portions of code courtesy of Clifford Click
// A data structure that holds all the information needed to find dominators.
struct Tarjan {
Block *_block; // Basic block for this info
uint _semi; // Semi-dominators
uint _size; // Used for faster LINK and EVAL
Tarjan *_parent; // Parent in DFS
Tarjan *_label; // Used for LINK and EVAL
Tarjan *_ancestor; // Used for LINK and EVAL
Tarjan *_child; // Used for faster LINK and EVAL
Tarjan *_dom; // Parent in dominator tree (immediate dom)
Tarjan *_bucket; // Set of vertices with given semidominator
Tarjan *_dom_child; // Child in dominator tree
Tarjan *_dom_next; // Next in dominator tree
// Fast union-find work
void COMPRESS();
Tarjan *EVAL(void);
void LINK( Tarjan *w, Tarjan *tarjan0 );
void setdepth( uint size );
};
// Compute the dominator tree of the CFG. The CFG must already have been
// constructed. This is the Lengauer & Tarjan O(E-alpha(E,V)) algorithm.
void PhaseCFG::build_dominator_tree() {
// Pre-grow the blocks array, prior to the ResourceMark kicking in
_blocks.map(number_of_blocks(), 0);
ResourceMark rm;
// Setup mappings from my Graph to Tarjan's stuff and back
// Note: Tarjan uses 1-based arrays
Tarjan* tarjan = NEW_RESOURCE_ARRAY(Tarjan, number_of_blocks() + 1);
// Tarjan's algorithm, almost verbatim:
// Step 1:
uint dfsnum = do_DFS(tarjan, number_of_blocks());
if (dfsnum - 1 != number_of_blocks()) { // Check for unreachable loops!
// If the returned dfsnum does not match the number of blocks, then we
// must have some unreachable loops. These can be made at any time by
// IterGVN. They are cleaned up by CCP or the loop opts, but the last
// IterGVN can always make more that are not cleaned up. Highly unlikely
// except in ZKM.jar, where endless irreducible loops cause the loop opts
// to not get run.
//
// Having found unreachable loops, we have made a bad RPO _block layout.
// We can re-run the above DFS pass with the correct number of blocks,
// and hack the Tarjan algorithm below to be robust in the presence of
// such dead loops (as was done for the NTarjan code farther below).
// Since this situation is so unlikely, instead I've decided to bail out.
// CNC 7/24/2001
C->record_method_not_compilable("unreachable loop");
return;
}
_blocks._cnt = number_of_blocks();
// Tarjan is using 1-based arrays, so these are some initialize flags
tarjan[0]._size = tarjan[0]._semi = 0;
tarjan[0]._label = &tarjan[0];
for (uint i = number_of_blocks(); i >= 2; i--) { // For all vertices in DFS order
Tarjan *w = &tarjan[i]; // Get vertex from DFS
// Step 2:
Node *whead = w->_block->head();
for (uint j = 1; j < whead->req(); j++) {
Block* b = get_block_for_node(whead->in(j));
Tarjan *vx = &tarjan[b->_pre_order];
Tarjan *u = vx->EVAL();
if( u->_semi < w->_semi )
w->_semi = u->_semi;
}
// w is added to a bucket here, and only here.
// Thus w is in at most one bucket and the sum of all bucket sizes is O(n).
// Thus bucket can be a linked list.
// Thus we do not need a small integer name for each Block.
w->_bucket = tarjan[w->_semi]._bucket;
tarjan[w->_semi]._bucket = w;
w->_parent->LINK( w, &tarjan[0] );
// Step 3:
for( Tarjan *vx = w->_parent->_bucket; vx; vx = vx->_bucket ) {
Tarjan *u = vx->EVAL();
vx->_dom = (u->_semi < vx->_semi) ? u : w->_parent;
}
}
// Step 4:
for (uint i = 2; i <= number_of_blocks(); i++) {
Tarjan *w = &tarjan[i];
if( w->_dom != &tarjan[w->_semi] )
w->_dom = w->_dom->_dom;
w->_dom_next = w->_dom_child = NULL; // Initialize for building tree later
}
// No immediate dominator for the root
Tarjan *w = &tarjan[get_root_block()->_pre_order];
w->_dom = NULL;
w->_dom_next = w->_dom_child = NULL; // Initialize for building tree later
// Convert the dominator tree array into my kind of graph
for(uint i = 1; i <= number_of_blocks(); i++){ // For all Tarjan vertices
Tarjan *t = &tarjan[i]; // Handy access
Tarjan *tdom = t->_dom; // Handy access to immediate dominator
if( tdom ) { // Root has no immediate dominator
t->_block->_idom = tdom->_block; // Set immediate dominator
t->_dom_next = tdom->_dom_child; // Make me a sibling of parent's child
tdom->_dom_child = t; // Make me a child of my parent
} else
t->_block->_idom = NULL; // Root
}
w->setdepth(number_of_blocks() + 1); // Set depth in dominator tree
}
class Block_Stack {
private:
struct Block_Descr {
Block *block; // Block
int index; // Index of block's successor pushed on stack
int freq_idx; // Index of block's most frequent successor
};
Block_Descr *_stack_top;
Block_Descr *_stack_max;
Block_Descr *_stack;
Tarjan *_tarjan;
uint most_frequent_successor( Block *b );
public:
Block_Stack(Tarjan *tarjan, int size) : _tarjan(tarjan) {
_stack = NEW_RESOURCE_ARRAY(Block_Descr, size);
_stack_max = _stack + size;
_stack_top = _stack - 1; // stack is empty
}
void push(uint pre_order, Block *b) {
Tarjan *t = &_tarjan[pre_order]; // Fast local access
b->_pre_order = pre_order; // Flag as visited
t->_block = b; // Save actual block
t->_semi = pre_order; // Block to DFS map
t->_label = t; // DFS to vertex map
t->_ancestor = NULL; // Fast LINK & EVAL setup
t->_child = &_tarjan[0]; // Sentenial
t->_size = 1;
t->_bucket = NULL;
if (pre_order == 1)
t->_parent = NULL; // first block doesn't have parent
else {
// Save parent (current top block on stack) in DFS
t->_parent = &_tarjan[_stack_top->block->_pre_order];
}
// Now put this block on stack
++_stack_top;
assert(_stack_top < _stack_max, ""); // assert if stack have to grow
_stack_top->block = b;
_stack_top->index = -1;
// Find the index into b->succs[] array of the most frequent successor.
_stack_top->freq_idx = most_frequent_successor(b); // freq_idx >= 0
}
Block* pop() { Block* b = _stack_top->block; _stack_top--; return b; }
bool is_nonempty() { return (_stack_top >= _stack); }
bool last_successor() { return (_stack_top->index == _stack_top->freq_idx); }
Block* next_successor() {
int i = _stack_top->index;
i++;
if (i == _stack_top->freq_idx) i++;
if (i >= (int)(_stack_top->block->_num_succs)) {
i = _stack_top->freq_idx; // process most frequent successor last
}
_stack_top->index = i;
return _stack_top->block->_succs[ i ];
}
};
// Find the index into the b->succs[] array of the most frequent successor.
uint Block_Stack::most_frequent_successor( Block *b ) {
uint freq_idx = 0;
int eidx = b->end_idx();
Node *n = b->get_node(eidx);
int op = n->is_Mach() ? n->as_Mach()->ideal_Opcode() : n->Opcode();
switch( op ) {
case Op_CountedLoopEnd:
case Op_If: { // Split frequency amongst children
float prob = n->as_MachIf()->_prob;
// Is succ[0] the TRUE branch or the FALSE branch?
if( b->get_node(eidx+1)->Opcode() == Op_IfFalse )
prob = 1.0f - prob;
freq_idx = prob < PROB_FAIR; // freq=1 for succ[0] < 0.5 prob
break;
}
case Op_Catch: // Split frequency amongst children
for( freq_idx = 0; freq_idx < b->_num_succs; freq_idx++ )
if( b->get_node(eidx+1+freq_idx)->as_CatchProj()->_con == CatchProjNode::fall_through_index )
break;
// Handle case of no fall-thru (e.g., check-cast MUST throw an exception)
if( freq_idx == b->_num_succs ) freq_idx = 0;
break;
// Currently there is no support for finding out the most
// frequent successor for jumps, so lets just make it the first one
case Op_Jump:
case Op_Root:
case Op_Goto:
case Op_NeverBranch:
freq_idx = 0; // fall thru
break;
case Op_TailCall:
case Op_TailJump:
case Op_Return:
case Op_Halt:
case Op_Rethrow:
break;
default:
ShouldNotReachHere();
}
return freq_idx;
}
// Perform DFS search. Setup 'vertex' as DFS to vertex mapping. Setup
// 'semi' as vertex to DFS mapping. Set 'parent' to DFS parent.
uint PhaseCFG::do_DFS(Tarjan *tarjan, uint rpo_counter) {
Block* root_block = get_root_block();
uint pre_order = 1;
// Allocate stack of size number_of_blocks() + 1 to avoid frequent realloc
Block_Stack bstack(tarjan, number_of_blocks() + 1);
// Push on stack the state for the first block
bstack.push(pre_order, root_block);
++pre_order;
while (bstack.is_nonempty()) {
if (!bstack.last_successor()) {
// Walk over all successors in pre-order (DFS).
Block* next_block = bstack.next_successor();
if (next_block->_pre_order == 0) { // Check for no-pre-order, not-visited
// Push on stack the state of successor
bstack.push(pre_order, next_block);
++pre_order;
}
}
else {
// Build a reverse post-order in the CFG _blocks array
Block *stack_top = bstack.pop();
stack_top->_rpo = --rpo_counter;
_blocks.map(stack_top->_rpo, stack_top);
}
}
return pre_order;
}
void Tarjan::COMPRESS()
{
assert( _ancestor != 0, "" );
if( _ancestor->_ancestor != 0 ) {
_ancestor->COMPRESS( );
if( _ancestor->_label->_semi < _label->_semi )
_label = _ancestor->_label;
_ancestor = _ancestor->_ancestor;
}
}
Tarjan *Tarjan::EVAL() {
if( !_ancestor ) return _label;
COMPRESS();
return (_ancestor->_label->_semi >= _label->_semi) ? _label : _ancestor->_label;
}
void Tarjan::LINK( Tarjan *w, Tarjan *tarjan0 ) {
Tarjan *s = w;
while( w->_label->_semi < s->_child->_label->_semi ) {
if( s->_size + s->_child->_child->_size >= (s->_child->_size << 1) ) {
s->_child->_ancestor = s;
s->_child = s->_child->_child;
} else {
s->_child->_size = s->_size;
s = s->_ancestor = s->_child;
}
}
s->_label = w->_label;
_size += w->_size;
if( _size < (w->_size << 1) ) {
Tarjan *tmp = s; s = _child; _child = tmp;
}
while( s != tarjan0 ) {
s->_ancestor = this;
s = s->_child;
}
}
void Tarjan::setdepth( uint stack_size ) {
Tarjan **top = NEW_RESOURCE_ARRAY(Tarjan*, stack_size);
Tarjan **next = top;
Tarjan **last;
uint depth = 0;
*top = this;
++top;
do {
// next level
++depth;
last = top;
do {
// Set current depth for all tarjans on this level
Tarjan *t = *next; // next tarjan from stack
++next;
do {
t->_block->_dom_depth = depth; // Set depth in dominator tree
Tarjan *dom_child = t->_dom_child;
t = t->_dom_next; // next tarjan
if (dom_child != NULL) {
*top = dom_child; // save child on stack
++top;
}
} while (t != NULL);
} while (next < last);
} while (last < top);
}
// Compute dominators on the Sea of Nodes form
// A data structure that holds all the information needed to find dominators.
struct NTarjan {
Node *_control; // Control node associated with this info
uint _semi; // Semi-dominators
uint _size; // Used for faster LINK and EVAL
NTarjan *_parent; // Parent in DFS
NTarjan *_label; // Used for LINK and EVAL
NTarjan *_ancestor; // Used for LINK and EVAL
NTarjan *_child; // Used for faster LINK and EVAL
NTarjan *_dom; // Parent in dominator tree (immediate dom)
NTarjan *_bucket; // Set of vertices with given semidominator
NTarjan *_dom_child; // Child in dominator tree
NTarjan *_dom_next; // Next in dominator tree
// Perform DFS search.
// Setup 'vertex' as DFS to vertex mapping.
// Setup 'semi' as vertex to DFS mapping.
// Set 'parent' to DFS parent.
static int DFS( NTarjan *ntarjan, VectorSet &visited, PhaseIdealLoop *pil, uint *dfsorder );
void setdepth( uint size, uint *dom_depth );
// Fast union-find work
void COMPRESS();
NTarjan *EVAL(void);
void LINK( NTarjan *w, NTarjan *ntarjan0 );
#ifndef PRODUCT
void dump(int offset) const;
#endif
};
// Compute the dominator tree of the sea of nodes. This version walks all CFG
// nodes (using the is_CFG() call) and places them in a dominator tree. Thus,
// it needs a count of the CFG nodes for the mapping table. This is the
// Lengauer & Tarjan O(E-alpha(E,V)) algorithm.
void PhaseIdealLoop::Dominators() {
ResourceMark rm;
// Setup mappings from my Graph to Tarjan's stuff and back
// Note: Tarjan uses 1-based arrays
NTarjan *ntarjan = NEW_RESOURCE_ARRAY(NTarjan,C->unique()+1);
// Initialize _control field for fast reference
int i;
for( i= C->unique()-1; i>=0; i-- )
ntarjan[i]._control = NULL;
// Store the DFS order for the main loop
uint *dfsorder = NEW_RESOURCE_ARRAY(uint,C->unique()+1);
memset(dfsorder, max_uint, (C->unique()+1) * sizeof(uint));
// Tarjan's algorithm, almost verbatim:
// Step 1:
VectorSet visited(Thread::current()->resource_area());
int dfsnum = NTarjan::DFS( ntarjan, visited, this, dfsorder);
// Tarjan is using 1-based arrays, so these are some initialize flags
ntarjan[0]._size = ntarjan[0]._semi = 0;
ntarjan[0]._label = &ntarjan[0];
for( i = dfsnum-1; i>1; i-- ) { // For all nodes in reverse DFS order
NTarjan *w = &ntarjan[i]; // Get Node from DFS
assert(w->_control != NULL,"bad DFS walk");
// Step 2:
Node *whead = w->_control;
for( uint j=0; j < whead->req(); j++ ) { // For each predecessor
if( whead->in(j) == NULL || !whead->in(j)->is_CFG() )
continue; // Only process control nodes
uint b = dfsorder[whead->in(j)->_idx];
if(b == max_uint) continue;
NTarjan *vx = &ntarjan[b];
NTarjan *u = vx->EVAL();
if( u->_semi < w->_semi )
w->_semi = u->_semi;
}
// w is added to a bucket here, and only here.
// Thus w is in at most one bucket and the sum of all bucket sizes is O(n).
// Thus bucket can be a linked list.
w->_bucket = ntarjan[w->_semi]._bucket;
ntarjan[w->_semi]._bucket = w;
w->_parent->LINK( w, &ntarjan[0] );
// Step 3:
for( NTarjan *vx = w->_parent->_bucket; vx; vx = vx->_bucket ) {
NTarjan *u = vx->EVAL();
vx->_dom = (u->_semi < vx->_semi) ? u : w->_parent;
}
// Cleanup any unreachable loops now. Unreachable loops are loops that
// flow into the main graph (and hence into ROOT) but are not reachable
// from above. Such code is dead, but requires a global pass to detect
// it; this global pass was the 'build_loop_tree' pass run just prior.
if( !_verify_only && whead->is_Region() ) {
for( uint i = 1; i < whead->req(); i++ ) {
if (!has_node(whead->in(i))) {
// Kill dead input path
assert( !visited.test(whead->in(i)->_idx),
"input with no loop must be dead" );
_igvn.delete_input_of(whead, i);
for (DUIterator_Fast jmax, j = whead->fast_outs(jmax); j < jmax; j++) {
Node* p = whead->fast_out(j);
if( p->is_Phi() ) {
_igvn.delete_input_of(p, i);
}
}
i--; // Rerun same iteration
} // End of if dead input path
} // End of for all input paths
} // End if if whead is a Region
} // End of for all Nodes in reverse DFS order
// Step 4:
for( i=2; i < dfsnum; i++ ) { // DFS order
NTarjan *w = &ntarjan[i];
assert(w->_control != NULL,"Bad DFS walk");
if( w->_dom != &ntarjan[w->_semi] )
w->_dom = w->_dom->_dom;
w->_dom_next = w->_dom_child = NULL; // Initialize for building tree later
}
// No immediate dominator for the root
NTarjan *w = &ntarjan[dfsorder[C->root()->_idx]];
w->_dom = NULL;
w->_parent = NULL;
w->_dom_next = w->_dom_child = NULL; // Initialize for building tree later
// Convert the dominator tree array into my kind of graph
for( i=1; i<dfsnum; i++ ) { // For all Tarjan vertices
NTarjan *t = &ntarjan[i]; // Handy access
assert(t->_control != NULL,"Bad DFS walk");
NTarjan *tdom = t->_dom; // Handy access to immediate dominator
if( tdom ) { // Root has no immediate dominator
_idom[t->_control->_idx] = tdom->_control; // Set immediate dominator
t->_dom_next = tdom->_dom_child; // Make me a sibling of parent's child
tdom->_dom_child = t; // Make me a child of my parent
} else
_idom[C->root()->_idx] = NULL; // Root
}
w->setdepth( C->unique()+1, _dom_depth ); // Set depth in dominator tree
// Pick up the 'top' node as well
_idom [C->top()->_idx] = C->root();
_dom_depth[C->top()->_idx] = 1;
// Debug Print of Dominator tree
if( PrintDominators ) {
#ifndef PRODUCT
w->dump(0);
#endif
}
}
// Perform DFS search. Setup 'vertex' as DFS to vertex mapping. Setup
// 'semi' as vertex to DFS mapping. Set 'parent' to DFS parent.
int NTarjan::DFS( NTarjan *ntarjan, VectorSet &visited, PhaseIdealLoop *pil, uint *dfsorder) {
// Allocate stack of size C->live_nodes()/8 to avoid frequent realloc
GrowableArray <Node *> dfstack(pil->C->live_nodes() >> 3);
Node *b = pil->C->root();
int dfsnum = 1;
dfsorder[b->_idx] = dfsnum; // Cache parent's dfsnum for a later use
dfstack.push(b);
while (dfstack.is_nonempty()) {
b = dfstack.pop();
if( !visited.test_set(b->_idx) ) { // Test node and flag it as visited
NTarjan *w = &ntarjan[dfsnum];
// Only fully process control nodes
w->_control = b; // Save actual node
// Use parent's cached dfsnum to identify "Parent in DFS"
w->_parent = &ntarjan[dfsorder[b->_idx]];
dfsorder[b->_idx] = dfsnum; // Save DFS order info
w->_semi = dfsnum; // Node to DFS map
w->_label = w; // DFS to vertex map
w->_ancestor = NULL; // Fast LINK & EVAL setup
w->_child = &ntarjan[0]; // Sentinal
w->_size = 1;
w->_bucket = NULL;
// Need DEF-USE info for this pass
for ( int i = b->outcnt(); i-- > 0; ) { // Put on stack backwards
Node* s = b->raw_out(i); // Get a use
// CFG nodes only and not dead stuff
if( s->is_CFG() && pil->has_node(s) && !visited.test(s->_idx) ) {
dfsorder[s->_idx] = dfsnum; // Cache parent's dfsnum for a later use
dfstack.push(s);
}
}
dfsnum++; // update after parent's dfsnum has been cached.
}
}
return dfsnum;
}
void NTarjan::COMPRESS()
{
assert( _ancestor != 0, "" );
if( _ancestor->_ancestor != 0 ) {
_ancestor->COMPRESS( );
if( _ancestor->_label->_semi < _label->_semi )
_label = _ancestor->_label;
_ancestor = _ancestor->_ancestor;
}
}
NTarjan *NTarjan::EVAL() {
if( !_ancestor ) return _label;
COMPRESS();
return (_ancestor->_label->_semi >= _label->_semi) ? _label : _ancestor->_label;
}
void NTarjan::LINK( NTarjan *w, NTarjan *ntarjan0 ) {
NTarjan *s = w;
while( w->_label->_semi < s->_child->_label->_semi ) {
if( s->_size + s->_child->_child->_size >= (s->_child->_size << 1) ) {
s->_child->_ancestor = s;
s->_child = s->_child->_child;
} else {
s->_child->_size = s->_size;
s = s->_ancestor = s->_child;
}
}
s->_label = w->_label;
_size += w->_size;
if( _size < (w->_size << 1) ) {
NTarjan *tmp = s; s = _child; _child = tmp;
}
while( s != ntarjan0 ) {
s->_ancestor = this;
s = s->_child;
}
}
void NTarjan::setdepth( uint stack_size, uint *dom_depth ) {
NTarjan **top = NEW_RESOURCE_ARRAY(NTarjan*, stack_size);
NTarjan **next = top;
NTarjan **last;
uint depth = 0;
*top = this;
++top;
do {
// next level
++depth;
last = top;
do {
// Set current depth for all tarjans on this level
NTarjan *t = *next; // next tarjan from stack
++next;
do {
dom_depth[t->_control->_idx] = depth; // Set depth in dominator tree
NTarjan *dom_child = t->_dom_child;
t = t->_dom_next; // next tarjan
if (dom_child != NULL) {
*top = dom_child; // save child on stack
++top;
}
} while (t != NULL);
} while (next < last);
} while (last < top);
}
#ifndef PRODUCT
void NTarjan::dump(int offset) const {
// Dump the data from this node
int i;
for(i = offset; i >0; i--) // Use indenting for tree structure
tty->print(" ");
tty->print("Dominator Node: ");
_control->dump(); // Control node for this dom node
tty->print("\n");
for(i = offset; i >0; i--) // Use indenting for tree structure
tty->print(" ");
tty->print("semi:%d, size:%d\n",_semi, _size);
for(i = offset; i >0; i--) // Use indenting for tree structure
tty->print(" ");
tty->print("DFS Parent: ");
if(_parent != NULL)
_parent->_control->dump(); // Parent in DFS
tty->print("\n");
for(i = offset; i >0; i--) // Use indenting for tree structure
tty->print(" ");
tty->print("Dom Parent: ");
if(_dom != NULL)
_dom->_control->dump(); // Parent in Dominator Tree
tty->print("\n");
// Recurse over remaining tree
if( _dom_child ) _dom_child->dump(offset+2); // Children in dominator tree
if( _dom_next ) _dom_next ->dump(offset ); // Siblings in dominator tree
}
#endif
C:\hotspot-69087d08d473\src\share\vm/opto/escape.cpp
/*
* Copyright (c) 2005, 2019, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
#include "precompiled.hpp"
#include "ci/bcEscapeAnalyzer.hpp"
#include "compiler/compileLog.hpp"
#include "libadt/vectset.hpp"
#include "memory/allocation.hpp"
#include "opto/c2compiler.hpp"
#include "opto/callnode.hpp"
#include "opto/cfgnode.hpp"
#include "opto/compile.hpp"
#include "opto/escape.hpp"
#include "opto/phaseX.hpp"
#include "opto/rootnode.hpp"
ConnectionGraph::ConnectionGraph(Compile * C, PhaseIterGVN *igvn) :
_nodes(C->comp_arena(), C->unique(), C->unique(), NULL),
_in_worklist(C->comp_arena()),
_next_pidx(0),
_collecting(true),
_verify(false),
_compile(C),
_igvn(igvn),
_node_map(C->comp_arena()) {
// Add unknown java object.
add_java_object(C->top(), PointsToNode::GlobalEscape);
phantom_obj = ptnode_adr(C->top()->_idx)->as_JavaObject();
// Add ConP(#NULL) and ConN(#NULL) nodes.
Node* oop_null = igvn->zerocon(T_OBJECT);
assert(oop_null->_idx < nodes_size(), "should be created already");
add_java_object(oop_null, PointsToNode::NoEscape);
null_obj = ptnode_adr(oop_null->_idx)->as_JavaObject();
if (UseCompressedOops) {
Node* noop_null = igvn->zerocon(T_NARROWOOP);
assert(noop_null->_idx < nodes_size(), "should be created already");
map_ideal_node(noop_null, null_obj);
}
_pcmp_neq = NULL; // Should be initialized
_pcmp_eq = NULL;
}
bool ConnectionGraph::has_candidates(Compile *C) {
// EA brings benefits only when the code has allocations and/or locks which
// are represented by ideal Macro nodes.
int cnt = C->macro_count();
for (int i = 0; i < cnt; i++) {
Node *n = C->macro_node(i);
if (n->is_Allocate())
return true;
if (n->is_Lock()) {
Node* obj = n->as_Lock()->obj_node()->uncast();
if (!(obj->is_Parm() || obj->is_Con()))
return true;
}
if (n->is_CallStaticJava() &&
n->as_CallStaticJava()->is_boxing_method()) {
return true;
}
}
return false;
}
void ConnectionGraph::do_analysis(Compile *C, PhaseIterGVN *igvn) {
Compile::TracePhase t2("escapeAnalysis", &Phase::_t_escapeAnalysis, true);
ResourceMark rm;
// Add ConP#NULL and ConN#NULL nodes before ConnectionGraph construction
// to create space for them in ConnectionGraph::_nodes[].
Node* oop_null = igvn->zerocon(T_OBJECT);
Node* noop_null = igvn->zerocon(T_NARROWOOP);
ConnectionGraph* congraph = new(C->comp_arena()) ConnectionGraph(C, igvn);
// Perform escape analysis
if (congraph->compute_escape()) {
// There are non escaping objects.
C->set_congraph(congraph);
}
// Cleanup.
if (oop_null->outcnt() == 0)
igvn->hash_delete(oop_null);
if (noop_null->outcnt() == 0)
igvn->hash_delete(noop_null);
}
bool ConnectionGraph::compute_escape() {
Compile* C = _compile;
PhaseGVN* igvn = _igvn;
// Worklists used by EA.
Unique_Node_List delayed_worklist;
GrowableArray<Node*> alloc_worklist;
GrowableArray<Node*> ptr_cmp_worklist;
GrowableArray<Node*> storestore_worklist;
GrowableArray<PointsToNode*> ptnodes_worklist;
GrowableArray<JavaObjectNode*> java_objects_worklist;
GrowableArray<JavaObjectNode*> non_escaped_worklist;
GrowableArray<FieldNode*> oop_fields_worklist;
DEBUG_ONLY( GrowableArray<Node*> addp_worklist; )
{ Compile::TracePhase t3("connectionGraph", &Phase::_t_connectionGraph, true);
// 1. Populate Connection Graph (CG) with PointsTo nodes.
ideal_nodes.map(C->live_nodes(), NULL); // preallocate space
// Initialize worklist
if (C->root() != NULL) {
ideal_nodes.push(C->root());
}
// Processed ideal nodes are unique on ideal_nodes list
// but several ideal nodes are mapped to the phantom_obj.
// To avoid duplicated entries on the following worklists
// add the phantom_obj only once to them.
ptnodes_worklist.append(phantom_obj);
java_objects_worklist.append(phantom_obj);
for( uint next = 0; next < ideal_nodes.size(); ++next ) {
Node* n = ideal_nodes.at(next);
// Create PointsTo nodes and add them to Connection Graph. Called
// only once per ideal node since ideal_nodes is Unique_Node list.
add_node_to_connection_graph(n, &delayed_worklist);
PointsToNode* ptn = ptnode_adr(n->_idx);
if (ptn != NULL && ptn != phantom_obj) {
ptnodes_worklist.append(ptn);
if (ptn->is_JavaObject()) {
java_objects_worklist.append(ptn->as_JavaObject());
if ((n->is_Allocate() || n->is_CallStaticJava()) &&
(ptn->escape_state() < PointsToNode::GlobalEscape)) {
// Only allocations and java static calls results are interesting.
non_escaped_worklist.append(ptn->as_JavaObject());
}
} else if (ptn->is_Field() && ptn->as_Field()->is_oop()) {
oop_fields_worklist.append(ptn->as_Field());
}
}
if (n->is_MergeMem()) {
// Collect all MergeMem nodes to add memory slices for
// scalar replaceable objects in split_unique_types().
_mergemem_worklist.append(n->as_MergeMem());
} else if (OptimizePtrCompare && n->is_Cmp() &&
(n->Opcode() == Op_CmpP || n->Opcode() == Op_CmpN)) {
// Collect compare pointers nodes.
ptr_cmp_worklist.append(n);
} else if (n->is_MemBarStoreStore()) {
// Collect all MemBarStoreStore nodes so that depending on the
// escape status of the associated Allocate node some of them
// may be eliminated.
storestore_worklist.append(n);
} else if (n->is_MemBar() && (n->Opcode() == Op_MemBarRelease) &&
(n->req() > MemBarNode::Precedent)) {
record_for_optimizer(n);
#ifdef ASSERT
} else if (n->is_AddP()) {
// Collect address nodes for graph verification.
addp_worklist.append(n);
#endif
}
for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
Node* m = n->fast_out(i); // Get user
ideal_nodes.push(m);
}
}
if (non_escaped_worklist.length() == 0) {
_collecting = false;
return false; // Nothing to do.
}
// Add final simple edges to graph.
while(delayed_worklist.size() > 0) {
Node* n = delayed_worklist.pop();
add_final_edges(n);
}
int ptnodes_length = ptnodes_worklist.length();
#ifdef ASSERT
if (VerifyConnectionGraph) {
// Verify that no new simple edges could be created and all
// local vars has edges.
_verify = true;
for (int next = 0; next < ptnodes_length; ++next) {
PointsToNode* ptn = ptnodes_worklist.at(next);
add_final_edges(ptn->ideal_node());
if (ptn->is_LocalVar() && ptn->edge_count() == 0) {
ptn->dump();
assert(ptn->as_LocalVar()->edge_count() > 0, "sanity");
}
}
_verify = false;
}
#endif
// Bytecode analyzer BCEscapeAnalyzer, used for Call nodes
// processing, calls to CI to resolve symbols (types, fields, methods)
// referenced in bytecode. During symbol resolution VM may throw
// an exception which CI cleans and converts to compilation failure.
if (C->failing()) return false;
// 2. Finish Graph construction by propagating references to all
// java objects through graph.
if (!complete_connection_graph(ptnodes_worklist, non_escaped_worklist,
java_objects_worklist, oop_fields_worklist)) {
// All objects escaped or hit time or iterations limits.
_collecting = false;
return false;
}
// 3. Adjust scalar_replaceable state of nonescaping objects and push
// scalar replaceable allocations on alloc_worklist for processing
// in split_unique_types().
int non_escaped_length = non_escaped_worklist.length();
for (int next = 0; next < non_escaped_length; next++) {
JavaObjectNode* ptn = non_escaped_worklist.at(next);
bool noescape = (ptn->escape_state() == PointsToNode::NoEscape);
Node* n = ptn->ideal_node();
if (n->is_Allocate()) {
n->as_Allocate()->_is_non_escaping = noescape;
}
if (n->is_CallStaticJava()) {
n->as_CallStaticJava()->_is_non_escaping = noescape;
}
if (noescape && ptn->scalar_replaceable()) {
adjust_scalar_replaceable_state(ptn);
if (ptn->scalar_replaceable()) {
alloc_worklist.append(ptn->ideal_node());
}
}
}
#ifdef ASSERT
if (VerifyConnectionGraph) {
// Verify that graph is complete - no new edges could be added or needed.
verify_connection_graph(ptnodes_worklist, non_escaped_worklist,
java_objects_worklist, addp_worklist);
}
assert(C->unique() == nodes_size(), "no new ideal nodes should be added during ConnectionGraph build");
assert(null_obj->escape_state() == PointsToNode::NoEscape &&
null_obj->edge_count() == 0 &&
!null_obj->arraycopy_src() &&
!null_obj->arraycopy_dst(), "sanity");
#endif
_collecting = false;
} // TracePhase t3("connectionGraph")
// 4. Optimize ideal graph based on EA information.
bool has_non_escaping_obj = (non_escaped_worklist.length() > 0);
if (has_non_escaping_obj) {
optimize_ideal_graph(ptr_cmp_worklist, storestore_worklist);
}
#ifndef PRODUCT
if (PrintEscapeAnalysis) {
dump(ptnodes_worklist); // Dump ConnectionGraph
}
#endif
bool has_scalar_replaceable_candidates = (alloc_worklist.length() > 0);
#ifdef ASSERT
if (VerifyConnectionGraph) {
int alloc_length = alloc_worklist.length();
for (int next = 0; next < alloc_length; ++next) {
Node* n = alloc_worklist.at(next);
PointsToNode* ptn = ptnode_adr(n->_idx);
assert(ptn->escape_state() == PointsToNode::NoEscape && ptn->scalar_replaceable(), "sanity");
}
}
#endif
// 5. Separate memory graph for scalar replaceable allcations.
if (has_scalar_replaceable_candidates &&
C->AliasLevel() >= 3 && EliminateAllocations) {
// Now use the escape information to create unique types for
// scalar replaceable objects.
split_unique_types(alloc_worklist);
if (C->failing()) return false;
C->print_method(PHASE_AFTER_EA, 2);
#ifdef ASSERT
} else if (Verbose && (PrintEscapeAnalysis || PrintEliminateAllocations)) {
tty->print("=== No allocations eliminated for ");
C->method()->print_short_name();
if(!EliminateAllocations) {
tty->print(" since EliminateAllocations is off ===");
} else if(!has_scalar_replaceable_candidates) {
tty->print(" since there are no scalar replaceable candidates ===");
} else if(C->AliasLevel() < 3) {
tty->print(" since AliasLevel < 3 ===");
}
tty->cr();
#endif
}
return has_non_escaping_obj;
}
// Utility function for nodes that load an object
void ConnectionGraph::add_objload_to_connection_graph(Node *n, Unique_Node_List *delayed_worklist) {
// Using isa_ptr() instead of isa_oopptr() for LoadP and Phi because
// ThreadLocal has RawPtr type.
const Type* t = _igvn->type(n);
if (t->make_ptr() != NULL) {
Node* adr = n->in(MemNode::Address);
#ifdef ASSERT
if (!adr->is_AddP()) {
assert(_igvn->type(adr)->isa_rawptr(), "sanity");
} else {
assert((ptnode_adr(adr->_idx) == NULL ||
ptnode_adr(adr->_idx)->as_Field()->is_oop()), "sanity");
}
#endif
add_local_var_and_edge(n, PointsToNode::NoEscape,
adr, delayed_worklist);
}
}
// Populate Connection Graph with PointsTo nodes and create simple
// connection graph edges.
void ConnectionGraph::add_node_to_connection_graph(Node *n, Unique_Node_List *delayed_worklist) {
assert(!_verify, "this method sould not be called for verification");
PhaseGVN* igvn = _igvn;
uint n_idx = n->_idx;
PointsToNode* n_ptn = ptnode_adr(n_idx);
if (n_ptn != NULL)
return; // No need to redefine PointsTo node during first iteration.
if (n->is_Call()) {
// Arguments to allocation and locking don't escape.
if (n->is_AbstractLock()) {
// Put Lock and Unlock nodes on IGVN worklist to process them during
// first IGVN optimization when escape information is still available.
record_for_optimizer(n);
} else if (n->is_Allocate()) {
add_call_node(n->as_Call());
record_for_optimizer(n);
} else {
if (n->is_CallStaticJava()) {
const char* name = n->as_CallStaticJava()->_name;
if (name != NULL && strcmp(name, "uncommon_trap") == 0)
return; // Skip uncommon traps
}
// Don't mark as processed since call's arguments have to be processed.
delayed_worklist->push(n);
// Check if a call returns an object.
if ((n->as_Call()->returns_pointer() &&
n->as_Call()->proj_out(TypeFunc::Parms) != NULL) ||
(n->is_CallStaticJava() &&
n->as_CallStaticJava()->is_boxing_method())) {
add_call_node(n->as_Call());
}
}
return;
}
// Put this check here to process call arguments since some call nodes
// point to phantom_obj.
if (n_ptn == phantom_obj || n_ptn == null_obj)
return; // Skip predefined nodes.
int opcode = n->Opcode();
switch (opcode) {
case Op_AddP: {
Node* base = get_addp_base(n);
PointsToNode* ptn_base = ptnode_adr(base->_idx);
// Field nodes are created for all field types. They are used in
// adjust_scalar_replaceable_state() and split_unique_types().
// Note, non-oop fields will have only base edges in Connection
// Graph because such fields are not used for oop loads and stores.
int offset = address_offset(n, igvn);
add_field(n, PointsToNode::NoEscape, offset);
if (ptn_base == NULL) {
delayed_worklist->push(n); // Process it later.
} else {
n_ptn = ptnode_adr(n_idx);
add_base(n_ptn->as_Field(), ptn_base);
}
break;
}
case Op_CastX2P: {
map_ideal_node(n, phantom_obj);
break;
}
case Op_CastPP:
case Op_CheckCastPP:
case Op_EncodeP:
case Op_DecodeN:
case Op_EncodePKlass:
case Op_DecodeNKlass: {
add_local_var_and_edge(n, PointsToNode::NoEscape,
n->in(1), delayed_worklist);
break;
}
case Op_CMoveP: {
add_local_var(n, PointsToNode::NoEscape);
// Do not add edges during first iteration because some could be
// not defined yet.
delayed_worklist->push(n);
break;
}
case Op_ConP:
case Op_ConN:
case Op_ConNKlass: {
// assume all oop constants globally escape except for null
PointsToNode::EscapeState es;
const Type* t = igvn->type(n);
if (t == TypePtr::NULL_PTR || t == TypeNarrowOop::NULL_PTR) {
es = PointsToNode::NoEscape;
} else {
es = PointsToNode::GlobalEscape;
}
add_java_object(n, es);
break;
}
case Op_CreateEx: {
// assume that all exception objects globally escape
map_ideal_node(n, phantom_obj);
break;
}
case Op_LoadKlass:
case Op_LoadNKlass: {
// Unknown class is loaded
map_ideal_node(n, phantom_obj);
break;
}
case Op_LoadP:
case Op_LoadN:
case Op_LoadPLocked: {
add_objload_to_connection_graph(n, delayed_worklist);
break;
}
case Op_Parm: {
map_ideal_node(n, phantom_obj);
break;
}
case Op_PartialSubtypeCheck: {
// Produces Null or notNull and is used in only in CmpP so
// phantom_obj could be used.
map_ideal_node(n, phantom_obj); // Result is unknown
break;
}
case Op_Phi: {
// Using isa_ptr() instead of isa_oopptr() for LoadP and Phi because
// ThreadLocal has RawPtr type.
const Type* t = n->as_Phi()->type();
if (t->make_ptr() != NULL) {
add_local_var(n, PointsToNode::NoEscape);
// Do not add edges during first iteration because some could be
// not defined yet.
delayed_worklist->push(n);
}
break;
}
case Op_Proj: {
// we are only interested in the oop result projection from a call
if (n->as_Proj()->_con == TypeFunc::Parms && n->in(0)->is_Call() &&
n->in(0)->as_Call()->returns_pointer()) {
add_local_var_and_edge(n, PointsToNode::NoEscape,
n->in(0), delayed_worklist);
}
break;
}
case Op_Rethrow: // Exception object escapes
case Op_Return: {
if (n->req() > TypeFunc::Parms &&
igvn->type(n->in(TypeFunc::Parms))->isa_oopptr()) {
// Treat Return value as LocalVar with GlobalEscape escape state.
add_local_var_and_edge(n, PointsToNode::GlobalEscape,
n->in(TypeFunc::Parms), delayed_worklist);
}
break;
}
case Op_GetAndSetP:
case Op_GetAndSetN: {
add_objload_to_connection_graph(n, delayed_worklist);
// fallthrough
}
case Op_StoreP:
case Op_StoreN:
case Op_StoreNKlass:
case Op_StorePConditional:
case Op_CompareAndSwapP:
case Op_CompareAndSwapN: {
Node* adr = n->in(MemNode::Address);
const Type *adr_type = igvn->type(adr);
adr_type = adr_type->make_ptr();
if (adr_type == NULL) {
break; // skip dead nodes
}
if (adr_type->isa_oopptr() ||
(opcode == Op_StoreP || opcode == Op_StoreN || opcode == Op_StoreNKlass) &&
(adr_type == TypeRawPtr::NOTNULL &&
adr->in(AddPNode::Address)->is_Proj() &&
adr->in(AddPNode::Address)->in(0)->is_Allocate())) {
delayed_worklist->push(n); // Process it later.
#ifdef ASSERT
assert(adr->is_AddP(), "expecting an AddP");
if (adr_type == TypeRawPtr::NOTNULL) {
// Verify a raw address for a store captured by Initialize node.
int offs = (int)igvn->find_intptr_t_con(adr->in(AddPNode::Offset), Type::OffsetBot);
assert(offs != Type::OffsetBot, "offset must be a constant");
}
#endif
} else {
// Ignore copy the displaced header to the BoxNode (OSR compilation).
if (adr->is_BoxLock())
break;
// Stored value escapes in unsafe access.
if ((opcode == Op_StoreP) && (adr_type == TypeRawPtr::BOTTOM)) {
// Pointer stores in G1 barriers looks like unsafe access.
// Ignore such stores to be able scalar replace non-escaping
// allocations.
if (UseG1GC && adr->is_AddP()) {
Node* base = get_addp_base(adr);
if (base->Opcode() == Op_LoadP &&
base->in(MemNode::Address)->is_AddP()) {
adr = base->in(MemNode::Address);
Node* tls = get_addp_base(adr);
if (tls->Opcode() == Op_ThreadLocal) {
int offs = (int)igvn->find_intptr_t_con(adr->in(AddPNode::Offset), Type::OffsetBot);
if (offs == in_bytes(JavaThread::satb_mark_queue_offset() +
PtrQueue::byte_offset_of_buf())) {
break; // G1 pre barier previous oop value store.
}
if (offs == in_bytes(JavaThread::dirty_card_queue_offset() +
PtrQueue::byte_offset_of_buf())) {
break; // G1 post barier card address store.
}
}
}
}
delayed_worklist->push(n); // Process unsafe access later.
break;
}
#ifdef ASSERT
n->dump(1);
assert(false, "not unsafe or G1 barrier raw StoreP");
#endif
}
break;
}
case Op_AryEq:
case Op_StrComp:
case Op_StrEquals:
case Op_StrIndexOf:
case Op_EncodeISOArray: {
add_local_var(n, PointsToNode::ArgEscape);
delayed_worklist->push(n); // Process it later.
break;
}
case Op_ThreadLocal: {
add_java_object(n, PointsToNode::ArgEscape);
break;
}
default:
; // Do nothing for nodes not related to EA.
}
return;
}
#ifdef ASSERT
#define ELSE_FAIL(name) \
/* Should not be called for not pointer type. */ \
n->dump(1); \
assert(false, name); \
break;
#else
#define ELSE_FAIL(name) \
break;
#endif
// Add final simple edges to graph.
void ConnectionGraph::add_final_edges(Node *n) {
PointsToNode* n_ptn = ptnode_adr(n->_idx);
#ifdef ASSERT
if (_verify && n_ptn->is_JavaObject())
return; // This method does not change graph for JavaObject.
#endif
if (n->is_Call()) {
process_call_arguments(n->as_Call());
return;
}
assert(n->is_Store() || n->is_LoadStore() ||
(n_ptn != NULL) && (n_ptn->ideal_node() != NULL),
"node should be registered already");
int opcode = n->Opcode();
switch (opcode) {
case Op_AddP: {
Node* base = get_addp_base(n);
PointsToNode* ptn_base = ptnode_adr(base->_idx);
assert(ptn_base != NULL, "field's base should be registered");
add_base(n_ptn->as_Field(), ptn_base);
break;
}
case Op_CastPP:
case Op_CheckCastPP:
case Op_EncodeP:
case Op_DecodeN:
case Op_EncodePKlass:
case Op_DecodeNKlass: {
add_local_var_and_edge(n, PointsToNode::NoEscape,
n->in(1), NULL);
break;
}
case Op_CMoveP: {
for (uint i = CMoveNode::IfFalse; i < n->req(); i++) {
Node* in = n->in(i);
if (in == NULL)
continue; // ignore NULL
Node* uncast_in = in->uncast();
if (uncast_in->is_top() || uncast_in == n)
continue; // ignore top or inputs which go back this node
PointsToNode* ptn = ptnode_adr(in->_idx);
assert(ptn != NULL, "node should be registered");
add_edge(n_ptn, ptn);
}
break;
}
case Op_LoadP:
case Op_LoadN:
case Op_LoadPLocked: {
// Using isa_ptr() instead of isa_oopptr() for LoadP and Phi because
// ThreadLocal has RawPtr type.
const Type* t = _igvn->type(n);
if (t->make_ptr() != NULL) {
Node* adr = n->in(MemNode::Address);
add_local_var_and_edge(n, PointsToNode::NoEscape, adr, NULL);
break;
}
ELSE_FAIL("Op_LoadP");
}
case Op_Phi: {
// Using isa_ptr() instead of isa_oopptr() for LoadP and Phi because
// ThreadLocal has RawPtr type.
const Type* t = n->as_Phi()->type();
if (t->make_ptr() != NULL) {
for (uint i = 1; i < n->req(); i++) {
Node* in = n->in(i);
if (in == NULL)
continue; // ignore NULL
Node* uncast_in = in->uncast();
if (uncast_in->is_top() || uncast_in == n)
continue; // ignore top or inputs which go back this node
PointsToNode* ptn = ptnode_adr(in->_idx);
assert(ptn != NULL, "node should be registered");
add_edge(n_ptn, ptn);
}
break;
}
ELSE_FAIL("Op_Phi");
}
case Op_Proj: {
// we are only interested in the oop result projection from a call
if (n->as_Proj()->_con == TypeFunc::Parms && n->in(0)->is_Call() &&
n->in(0)->as_Call()->returns_pointer()) {
add_local_var_and_edge(n, PointsToNode::NoEscape, n->in(0), NULL);
break;
}
ELSE_FAIL("Op_Proj");
}
case Op_Rethrow: // Exception object escapes
case Op_Return: {
if (n->req() > TypeFunc::Parms &&
_igvn->type(n->in(TypeFunc::Parms))->isa_oopptr()) {
// Treat Return value as LocalVar with GlobalEscape escape state.
add_local_var_and_edge(n, PointsToNode::GlobalEscape,
n->in(TypeFunc::Parms), NULL);
break;
}
ELSE_FAIL("Op_Return");
}
case Op_StoreP:
case Op_StoreN:
case Op_StoreNKlass:
case Op_StorePConditional:
case Op_CompareAndSwapP:
case Op_CompareAndSwapN:
case Op_GetAndSetP:
case Op_GetAndSetN: {
Node* adr = n->in(MemNode::Address);
const Type *adr_type = _igvn->type(adr);
adr_type = adr_type->make_ptr();
#ifdef ASSERT
if (adr_type == NULL) {
n->dump(1);
assert(adr_type != NULL, "dead node should not be on list");
break;
}
#endif
if (opcode == Op_GetAndSetP || opcode == Op_GetAndSetN) {
add_local_var_and_edge(n, PointsToNode::NoEscape, adr, NULL);
}
if (adr_type->isa_oopptr() ||
(opcode == Op_StoreP || opcode == Op_StoreN || opcode == Op_StoreNKlass) &&
(adr_type == TypeRawPtr::NOTNULL &&
adr->in(AddPNode::Address)->is_Proj() &&
adr->in(AddPNode::Address)->in(0)->is_Allocate())) {
// Point Address to Value
PointsToNode* adr_ptn = ptnode_adr(adr->_idx);
assert(adr_ptn != NULL &&
adr_ptn->as_Field()->is_oop(), "node should be registered");
Node *val = n->in(MemNode::ValueIn);
PointsToNode* ptn = ptnode_adr(val->_idx);
assert(ptn != NULL, "node should be registered");
add_edge(adr_ptn, ptn);
break;
} else if ((opcode == Op_StoreP) && (adr_type == TypeRawPtr::BOTTOM)) {
// Stored value escapes in unsafe access.
Node *val = n->in(MemNode::ValueIn);
PointsToNode* ptn = ptnode_adr(val->_idx);
assert(ptn != NULL, "node should be registered");
set_escape_state(ptn, PointsToNode::GlobalEscape);
// Add edge to object for unsafe access with offset.
PointsToNode* adr_ptn = ptnode_adr(adr->_idx);
assert(adr_ptn != NULL, "node should be registered");
if (adr_ptn->is_Field()) {
assert(adr_ptn->as_Field()->is_oop(), "should be oop field");
add_edge(adr_ptn, ptn);
}
break;
}
ELSE_FAIL("Op_StoreP");
}
case Op_AryEq:
case Op_StrComp:
case Op_StrEquals:
case Op_StrIndexOf:
case Op_EncodeISOArray: {
// char[] arrays passed to string intrinsic do not escape but
// they are not scalar replaceable. Adjust escape state for them.
// Start from in(2) edge since in(1) is memory edge.
for (uint i = 2; i < n->req(); i++) {
Node* adr = n->in(i);
const Type* at = _igvn->type(adr);
if (!adr->is_top() && at->isa_ptr()) {
assert(at == Type::TOP || at == TypePtr::NULL_PTR ||
at->isa_ptr() != NULL, "expecting a pointer");
if (adr->is_AddP()) {
adr = get_addp_base(adr);
}
PointsToNode* ptn = ptnode_adr(adr->_idx);
assert(ptn != NULL, "node should be registered");
add_edge(n_ptn, ptn);
}
}
break;
}
default: {
// This method should be called only for EA specific nodes which may
// miss some edges when they were created.
#ifdef ASSERT
n->dump(1);
#endif
guarantee(false, "unknown node");
}
}
return;
}
void ConnectionGraph::add_call_node(CallNode* call) {
assert(call->returns_pointer(), "only for call which returns pointer");
uint call_idx = call->_idx;
if (call->is_Allocate()) {
Node* k = call->in(AllocateNode::KlassNode);
const TypeKlassPtr* kt = k->bottom_type()->isa_klassptr();
assert(kt != NULL, "TypeKlassPtr required.");
ciKlass* cik = kt->klass();
PointsToNode::EscapeState es = PointsToNode::NoEscape;
bool scalar_replaceable = true;
if (call->is_AllocateArray()) {
if (!cik->is_array_klass()) { // StressReflectiveCode
es = PointsToNode::GlobalEscape;
} else {
int length = call->in(AllocateNode::ALength)->find_int_con(-1);
if (length < 0 || length > EliminateAllocationArraySizeLimit) {
// Not scalar replaceable if the length is not constant or too big.
scalar_replaceable = false;
}
}
} else { // Allocate instance
if (cik->is_subclass_of(_compile->env()->Thread_klass()) ||
cik->is_subclass_of(_compile->env()->Reference_klass()) ||
!cik->is_instance_klass() || // StressReflectiveCode
cik->as_instance_klass()->has_finalizer()) {
es = PointsToNode::GlobalEscape;
}
}
add_java_object(call, es);
PointsToNode* ptn = ptnode_adr(call_idx);
if (!scalar_replaceable && ptn->scalar_replaceable()) {
ptn->set_scalar_replaceable(false);
}
} else if (call->is_CallStaticJava()) {
// Call nodes could be different types:
//
// 1. CallDynamicJavaNode (what happened during call is unknown):
//
// - mapped to GlobalEscape JavaObject node if oop is returned;
//
// - all oop arguments are escaping globally;
//
// 2. CallStaticJavaNode (execute bytecode analysis if possible):
//
// - the same as CallDynamicJavaNode if can't do bytecode analysis;
//
// - mapped to GlobalEscape JavaObject node if unknown oop is returned;
// - mapped to NoEscape JavaObject node if non-escaping object allocated
// during call is returned;
// - mapped to ArgEscape LocalVar node pointed to object arguments
// which are returned and does not escape during call;
//
// - oop arguments escaping status is defined by bytecode analysis;
//
// For a static call, we know exactly what method is being called.
// Use bytecode estimator to record whether the call's return value escapes.
ciMethod* meth = call->as_CallJava()->method();
if (meth == NULL) {
const char* name = call->as_CallStaticJava()->_name;
assert(strncmp(name, "_multianewarray", 15) == 0, "TODO: add failed case check");
// Returns a newly allocated unescaped object.
add_java_object(call, PointsToNode::NoEscape);
ptnode_adr(call_idx)->set_scalar_replaceable(false);
} else if (meth->is_boxing_method()) {
// Returns boxing object
PointsToNode::EscapeState es;
vmIntrinsics::ID intr = meth->intrinsic_id();
if (intr == vmIntrinsics::_floatValue || intr == vmIntrinsics::_doubleValue) {
// It does not escape if object is always allocated.
es = PointsToNode::NoEscape;
} else {
// It escapes globally if object could be loaded from cache.
es = PointsToNode::GlobalEscape;
}
add_java_object(call, es);
} else {
BCEscapeAnalyzer* call_analyzer = meth->get_bcea();
call_analyzer->copy_dependencies(_compile->dependencies());
if (call_analyzer->is_return_allocated()) {
// Returns a newly allocated unescaped object, simply
// update dependency information.
// Mark it as NoEscape so that objects referenced by
// it's fields will be marked as NoEscape at least.
add_java_object(call, PointsToNode::NoEscape);
ptnode_adr(call_idx)->set_scalar_replaceable(false);
} else {
// Determine whether any arguments are returned.
const TypeTuple* d = call->tf()->domain();
bool ret_arg = false;
for (uint i = TypeFunc::Parms; i < d->cnt(); i++) {
if (d->field_at(i)->isa_ptr() != NULL &&
call_analyzer->is_arg_returned(i - TypeFunc::Parms)) {
ret_arg = true;
break;
}
}
if (ret_arg) {
add_local_var(call, PointsToNode::ArgEscape);
} else {
// Returns unknown object.
map_ideal_node(call, phantom_obj);
}
}
}
} else {
// An other type of call, assume the worst case:
// returned value is unknown and globally escapes.
assert(call->Opcode() == Op_CallDynamicJava, "add failed case check");
map_ideal_node(call, phantom_obj);
}
}
void ConnectionGraph::process_call_arguments(CallNode *call) {
bool is_arraycopy = false;
switch (call->Opcode()) {
#ifdef ASSERT
case Op_Allocate:
case Op_AllocateArray:
case Op_Lock:
case Op_Unlock:
assert(false, "should be done already");
break;
#endif
case Op_CallLeafNoFP:
is_arraycopy = (call->as_CallLeaf()->_name != NULL &&
strstr(call->as_CallLeaf()->_name, "arraycopy") != 0);
// fall through
case Op_CallLeaf: {
// Stub calls, objects do not escape but they are not scale replaceable.
// Adjust escape state for outgoing arguments.
const TypeTuple * d = call->tf()->domain();
bool src_has_oops = false;
for (uint i = TypeFunc::Parms; i < d->cnt(); i++) {
const Type* at = d->field_at(i);
Node *arg = call->in(i);
const Type *aat = _igvn->type(arg);
if (arg->is_top() || !at->isa_ptr() || !aat->isa_ptr())
continue;
if (arg->is_AddP()) {
//
// The inline_native_clone() case when the arraycopy stub is called
// after the allocation before Initialize and CheckCastPP nodes.
// Or normal arraycopy for object arrays case.
//
// Set AddP's base (Allocate) as not scalar replaceable since
// pointer to the base (with offset) is passed as argument.
//
arg = get_addp_base(arg);
}
PointsToNode* arg_ptn = ptnode_adr(arg->_idx);
assert(arg_ptn != NULL, "should be registered");
PointsToNode::EscapeState arg_esc = arg_ptn->escape_state();
if (is_arraycopy || arg_esc < PointsToNode::ArgEscape) {
assert(aat == Type::TOP || aat == TypePtr::NULL_PTR ||
aat->isa_ptr() != NULL, "expecting an Ptr");
bool arg_has_oops = aat->isa_oopptr() &&
(aat->isa_oopptr()->klass() == NULL || aat->isa_instptr() ||
(aat->isa_aryptr() && aat->isa_aryptr()->klass()->is_obj_array_klass()));
if (i == TypeFunc::Parms) {
src_has_oops = arg_has_oops;
}
//
// src or dst could be j.l.Object when other is basic type array:
//
// arraycopy(char[],0,Object*,0,size);
// arraycopy(Object*,0,char[],0,size);
//
// Don't add edges in such cases.
//
bool arg_is_arraycopy_dest = src_has_oops && is_arraycopy &&
arg_has_oops && (i > TypeFunc::Parms);
#ifdef ASSERT
if (!(is_arraycopy ||
(call->as_CallLeaf()->_name != NULL &&
(strcmp(call->as_CallLeaf()->_name, "g1_wb_pre") == 0 ||
strcmp(call->as_CallLeaf()->_name, "g1_wb_post") == 0 ||
strcmp(call->as_CallLeaf()->_name, "updateBytesCRC32") == 0 ||
strcmp(call->as_CallLeaf()->_name, "aescrypt_encryptBlock") == 0 ||
strcmp(call->as_CallLeaf()->_name, "aescrypt_decryptBlock") == 0 ||
strcmp(call->as_CallLeaf()->_name, "cipherBlockChaining_encryptAESCrypt") == 0 ||
strcmp(call->as_CallLeaf()->_name, "cipherBlockChaining_decryptAESCrypt") == 0 ||
strcmp(call->as_CallLeaf()->_name, "ghash_processBlocks") == 0 ||
strcmp(call->as_CallLeaf()->_name, "sha1_implCompress") == 0 ||
strcmp(call->as_CallLeaf()->_name, "sha1_implCompressMB") == 0 ||
strcmp(call->as_CallLeaf()->_name, "sha256_implCompress") == 0 ||
strcmp(call->as_CallLeaf()->_name, "sha256_implCompressMB") == 0 ||
strcmp(call->as_CallLeaf()->_name, "sha512_implCompress") == 0 ||
strcmp(call->as_CallLeaf()->_name, "sha512_implCompressMB") == 0 ||
strcmp(call->as_CallLeaf()->_name, "multiplyToLen") == 0 ||
strcmp(call->as_CallLeaf()->_name, "squareToLen") == 0 ||
strcmp(call->as_CallLeaf()->_name, "mulAdd") == 0 ||
strcmp(call->as_CallLeaf()->_name, "montgomery_multiply") == 0 ||
strcmp(call->as_CallLeaf()->_name, "montgomery_square") == 0)
))) {
call->dump();
fatal(err_msg_res("EA unexpected CallLeaf %s", call->as_CallLeaf()->_name));
}
#endif
// Always process arraycopy's destination object since
// we need to add all possible edges to references in
// source object.
if (arg_esc >= PointsToNode::ArgEscape &&
!arg_is_arraycopy_dest) {
continue;
}
set_escape_state(arg_ptn, PointsToNode::ArgEscape);
if (arg_is_arraycopy_dest) {
Node* src = call->in(TypeFunc::Parms);
if (src->is_AddP()) {
src = get_addp_base(src);
}
PointsToNode* src_ptn = ptnode_adr(src->_idx);
assert(src_ptn != NULL, "should be registered");
if (arg_ptn != src_ptn) {
// Special arraycopy edge:
// A destination object's field can't have the source object
// as base since objects escape states are not related.
// Only escape state of destination object's fields affects
// escape state of fields in source object.
add_arraycopy(call, PointsToNode::ArgEscape, src_ptn, arg_ptn);
}
}
}
}
break;
}
case Op_CallStaticJava: {
// For a static call, we know exactly what method is being called.
// Use bytecode estimator to record the call's escape affects
#ifdef ASSERT
const char* name = call->as_CallStaticJava()->_name;
assert((name == NULL || strcmp(name, "uncommon_trap") != 0), "normal calls only");
#endif
ciMethod* meth = call->as_CallJava()->method();
if ((meth != NULL) && meth->is_boxing_method()) {
break; // Boxing methods do not modify any oops.
}
BCEscapeAnalyzer* call_analyzer = (meth !=NULL) ? meth->get_bcea() : NULL;
// fall-through if not a Java method or no analyzer information
if (call_analyzer != NULL) {
PointsToNode* call_ptn = ptnode_adr(call->_idx);
const TypeTuple* d = call->tf()->domain();
for (uint i = TypeFunc::Parms; i < d->cnt(); i++) {
const Type* at = d->field_at(i);
int k = i - TypeFunc::Parms;
Node* arg = call->in(i);
PointsToNode* arg_ptn = ptnode_adr(arg->_idx);
if (at->isa_ptr() != NULL &&
call_analyzer->is_arg_returned(k)) {
// The call returns arguments.
if (call_ptn != NULL) { // Is call's result used?
assert(call_ptn->is_LocalVar(), "node should be registered");
assert(arg_ptn != NULL, "node should be registered");
add_edge(call_ptn, arg_ptn);
}
}
if (at->isa_oopptr() != NULL &&
arg_ptn->escape_state() < PointsToNode::GlobalEscape) {
if (!call_analyzer->is_arg_stack(k)) {
// The argument global escapes
set_escape_state(arg_ptn, PointsToNode::GlobalEscape);
} else {
set_escape_state(arg_ptn, PointsToNode::ArgEscape);
if (!call_analyzer->is_arg_local(k)) {
// The argument itself doesn't escape, but any fields might
set_fields_escape_state(arg_ptn, PointsToNode::GlobalEscape);
}
}
}
}
if (call_ptn != NULL && call_ptn->is_LocalVar()) {
// The call returns arguments.
assert(call_ptn->edge_count() > 0, "sanity");
if (!call_analyzer->is_return_local()) {
// Returns also unknown object.
add_edge(call_ptn, phantom_obj);
}
}
break;
}
}
default: {
// Fall-through here if not a Java method or no analyzer information
// or some other type of call, assume the worst case: all arguments
// globally escape.
const TypeTuple* d = call->tf()->domain();
for (uint i = TypeFunc::Parms; i < d->cnt(); i++) {
const Type* at = d->field_at(i);
if (at->isa_oopptr() != NULL) {
Node* arg = call->in(i);
if (arg->is_AddP()) {
arg = get_addp_base(arg);
}
assert(ptnode_adr(arg->_idx) != NULL, "should be defined already");
set_escape_state(ptnode_adr(arg->_idx), PointsToNode::GlobalEscape);
}
}
}
}
}
// Finish Graph construction.
bool ConnectionGraph::complete_connection_graph(
GrowableArray<PointsToNode*>& ptnodes_worklist,
GrowableArray<JavaObjectNode*>& non_escaped_worklist,
GrowableArray<JavaObjectNode*>& java_objects_worklist,
GrowableArray<FieldNode*>& oop_fields_worklist) {
// Normally only 1-3 passes needed to build Connection Graph depending
// on graph complexity. Observed 8 passes in jvm2008 compiler.compiler.
// Set limit to 20 to catch situation when something did go wrong and
// bailout Escape Analysis.
// Also limit build time to 20 sec (60 in debug VM), EscapeAnalysisTimeout flag.
#define CG_BUILD_ITER_LIMIT 20
// Propagate GlobalEscape and ArgEscape escape states and check that
// we still have non-escaping objects. The method pushs on _worklist
// Field nodes which reference phantom_object.
if (!find_non_escaped_objects(ptnodes_worklist, non_escaped_worklist)) {
return false; // Nothing to do.
}
// Now propagate references to all JavaObject nodes.
int java_objects_length = java_objects_worklist.length();
elapsedTimer time;
bool timeout = false;
int new_edges = 1;
int iterations = 0;
do {
while ((new_edges > 0) &&
(iterations++ < CG_BUILD_ITER_LIMIT)) {
double start_time = time.seconds();
time.start();
new_edges = 0;
// Propagate references to phantom_object for nodes pushed on _worklist
// by find_non_escaped_objects() and find_field_value().
new_edges += add_java_object_edges(phantom_obj, false);
for (int next = 0; next < java_objects_length; ++next) {
JavaObjectNode* ptn = java_objects_worklist.at(next);
new_edges += add_java_object_edges(ptn, true);
#define SAMPLE_SIZE 4
if ((next % SAMPLE_SIZE) == 0) {
// Each 4 iterations calculate how much time it will take
// to complete graph construction.
time.stop();
// Poll for requests from shutdown mechanism to quiesce compiler
// because Connection graph construction may take long time.
CompileBroker::maybe_block();
double stop_time = time.seconds();
double time_per_iter = (stop_time - start_time) / (double)SAMPLE_SIZE;
double time_until_end = time_per_iter * (double)(java_objects_length - next);
if ((start_time + time_until_end) >= EscapeAnalysisTimeout) {
timeout = true;
break; // Timeout
}
start_time = stop_time;
time.start();
}
#undef SAMPLE_SIZE
}
if (timeout) break;
if (new_edges > 0) {
// Update escape states on each iteration if graph was updated.
if (!find_non_escaped_objects(ptnodes_worklist, non_escaped_worklist)) {
return false; // Nothing to do.
}
}
time.stop();
if (time.seconds() >= EscapeAnalysisTimeout) {
timeout = true;
break;
}
}
if ((iterations < CG_BUILD_ITER_LIMIT) && !timeout) {
time.start();
// Find fields which have unknown value.
int fields_length = oop_fields_worklist.length();
for (int next = 0; next < fields_length; next++) {
FieldNode* field = oop_fields_worklist.at(next);
if (field->edge_count() == 0) {
new_edges += find_field_value(field);
// This code may added new edges to phantom_object.
// Need an other cycle to propagate references to phantom_object.
}
}
time.stop();
if (time.seconds() >= EscapeAnalysisTimeout) {
timeout = true;
break;
}
} else {
new_edges = 0; // Bailout
}
} while (new_edges > 0);
// Bailout if passed limits.
if ((iterations >= CG_BUILD_ITER_LIMIT) || timeout) {
Compile* C = _compile;
if (C->log() != NULL) {
C->log()->begin_elem("connectionGraph_bailout reason='reached ");
C->log()->text("%s", timeout ? "time" : "iterations");
C->log()->end_elem(" limit'");
}
assert(ExitEscapeAnalysisOnTimeout, err_msg_res("infinite EA connection graph build (%f sec, %d iterations) with %d nodes and worklist size %d",
time.seconds(), iterations, nodes_size(), ptnodes_worklist.length()));
// Possible infinite build_connection_graph loop,
// bailout (no changes to ideal graph were made).
return false;
}
#ifdef ASSERT
if (Verbose && PrintEscapeAnalysis) {
tty->print_cr("EA: %d iterations to build connection graph with %d nodes and worklist size %d",
iterations, nodes_size(), ptnodes_worklist.length());
}
#endif
#undef CG_BUILD_ITER_LIMIT
// Find fields initialized by NULL for non-escaping Allocations.
int non_escaped_length = non_escaped_worklist.length();
for (int next = 0; next < non_escaped_length; next++) {
JavaObjectNode* ptn = non_escaped_worklist.at(next);
PointsToNode::EscapeState es = ptn->escape_state();
assert(es <= PointsToNode::ArgEscape, "sanity");
if (es == PointsToNode::NoEscape) {
if (find_init_values(ptn, null_obj, _igvn) > 0) {
// Adding references to NULL object does not change escape states
// since it does not escape. Also no fields are added to NULL object.
add_java_object_edges(null_obj, false);
}
}
Node* n = ptn->ideal_node();
if (n->is_Allocate()) {
// The object allocated by this Allocate node will never be
// seen by an other thread. Mark it so that when it is
// expanded no MemBarStoreStore is added.
InitializeNode* ini = n->as_Allocate()->initialization();
if (ini != NULL)
ini->set_does_not_escape();
}
}
return true; // Finished graph construction.
}
// Propagate GlobalEscape and ArgEscape escape states to all nodes
// and check that we still have non-escaping java objects.
bool ConnectionGraph::find_non_escaped_objects(GrowableArray<PointsToNode*>& ptnodes_worklist,
GrowableArray<JavaObjectNode*>& non_escaped_worklist) {
GrowableArray<PointsToNode*> escape_worklist;
// First, put all nodes with GlobalEscape and ArgEscape states on worklist.
int ptnodes_length = ptnodes_worklist.length();
for (int next = 0; next < ptnodes_length; ++next) {
PointsToNode* ptn = ptnodes_worklist.at(next);
if (ptn->escape_state() >= PointsToNode::ArgEscape ||
ptn->fields_escape_state() >= PointsToNode::ArgEscape) {
escape_worklist.push(ptn);
}
}
// Set escape states to referenced nodes (edges list).
while (escape_worklist.length() > 0) {
PointsToNode* ptn = escape_worklist.pop();
PointsToNode::EscapeState es = ptn->escape_state();
PointsToNode::EscapeState field_es = ptn->fields_escape_state();
if (ptn->is_Field() && ptn->as_Field()->is_oop() &&
es >= PointsToNode::ArgEscape) {
// GlobalEscape or ArgEscape state of field means it has unknown value.
if (add_edge(ptn, phantom_obj)) {
// New edge was added
add_field_uses_to_worklist(ptn->as_Field());
}
}
for (EdgeIterator i(ptn); i.has_next(); i.next()) {
PointsToNode* e = i.get();
if (e->is_Arraycopy()) {
assert(ptn->arraycopy_dst(), "sanity");
// Propagate only fields escape state through arraycopy edge.
if (e->fields_escape_state() < field_es) {
set_fields_escape_state(e, field_es);
escape_worklist.push(e);
}
} else if (es >= field_es) {
// fields_escape_state is also set to 'es' if it is less than 'es'.
if (e->escape_state() < es) {
set_escape_state(e, es);
escape_worklist.push(e);
}
} else {
// Propagate field escape state.
bool es_changed = false;
if (e->fields_escape_state() < field_es) {
set_fields_escape_state(e, field_es);
es_changed = true;
}
if ((e->escape_state() < field_es) &&
e->is_Field() && ptn->is_JavaObject() &&
e->as_Field()->is_oop()) {
// Change escape state of referenced fileds.
set_escape_state(e, field_es);
es_changed = true;;
} else if (e->escape_state() < es) {
set_escape_state(e, es);
es_changed = true;;
}
if (es_changed) {
escape_worklist.push(e);
}
}
}
}
// Remove escaped objects from non_escaped list.
for (int next = non_escaped_worklist.length()-1; next >= 0 ; --next) {
JavaObjectNode* ptn = non_escaped_worklist.at(next);
if (ptn->escape_state() >= PointsToNode::GlobalEscape) {
non_escaped_worklist.delete_at(next);
}
if (ptn->escape_state() == PointsToNode::NoEscape) {
// Find fields in non-escaped allocations which have unknown value.
find_init_values(ptn, phantom_obj, NULL);
}
}
return (non_escaped_worklist.length() > 0);
}
// Add all references to JavaObject node by walking over all uses.
int ConnectionGraph::add_java_object_edges(JavaObjectNode* jobj, bool populate_worklist) {
int new_edges = 0;
if (populate_worklist) {
// Populate _worklist by uses of jobj's uses.
for (UseIterator i(jobj); i.has_next(); i.next()) {
PointsToNode* use = i.get();
if (use->is_Arraycopy())
continue;
add_uses_to_worklist(use);
if (use->is_Field() && use->as_Field()->is_oop()) {
// Put on worklist all field's uses (loads) and
// related field nodes (same base and offset).
add_field_uses_to_worklist(use->as_Field());
}
}
}
for (int l = 0; l < _worklist.length(); l++) {
PointsToNode* use = _worklist.at(l);
if (PointsToNode::is_base_use(use)) {
// Add reference from jobj to field and from field to jobj (field's base).
use = PointsToNode::get_use_node(use)->as_Field();
if (add_base(use->as_Field(), jobj)) {
new_edges++;
}
continue;
}
assert(!use->is_JavaObject(), "sanity");
if (use->is_Arraycopy()) {
if (jobj == null_obj) // NULL object does not have field edges
continue;
// Added edge from Arraycopy node to arraycopy's source java object
if (add_edge(use, jobj)) {
jobj->set_arraycopy_src();
new_edges++;
}
// and stop here.
continue;
}
if (!add_edge(use, jobj))
continue; // No new edge added, there was such edge already.
new_edges++;
if (use->is_LocalVar()) {
add_uses_to_worklist(use);
if (use->arraycopy_dst()) {
for (EdgeIterator i(use); i.has_next(); i.next()) {
PointsToNode* e = i.get();
if (e->is_Arraycopy()) {
if (jobj == null_obj) // NULL object does not have field edges
continue;
// Add edge from arraycopy's destination java object to Arraycopy node.
if (add_edge(jobj, e)) {
new_edges++;
jobj->set_arraycopy_dst();
}
}
}
}
} else {
// Added new edge to stored in field values.
// Put on worklist all field's uses (loads) and
// related field nodes (same base and offset).
add_field_uses_to_worklist(use->as_Field());
}
}
_worklist.clear();
_in_worklist.Reset();
return new_edges;
}
// Put on worklist all related field nodes.
void ConnectionGraph::add_field_uses_to_worklist(FieldNode* field) {
assert(field->is_oop(), "sanity");
int offset = field->offset();
add_uses_to_worklist(field);
// Loop over all bases of this field and push on worklist Field nodes
// with the same offset and base (since they may reference the same field).
for (BaseIterator i(field); i.has_next(); i.next()) {
PointsToNode* base = i.get();
add_fields_to_worklist(field, base);
// Check if the base was source object of arraycopy and go over arraycopy's
// destination objects since values stored to a field of source object are
// accessable by uses (loads) of fields of destination objects.
if (base->arraycopy_src()) {
for (UseIterator j(base); j.has_next(); j.next()) {
PointsToNode* arycp = j.get();
if (arycp->is_Arraycopy()) {
for (UseIterator k(arycp); k.has_next(); k.next()) {
PointsToNode* abase = k.get();
if (abase->arraycopy_dst() && abase != base) {
// Look for the same arracopy reference.
add_fields_to_worklist(field, abase);
}
}
}
}
}
}
}
// Put on worklist all related field nodes.
void ConnectionGraph::add_fields_to_worklist(FieldNode* field, PointsToNode* base) {
int offset = field->offset();
if (base->is_LocalVar()) {
for (UseIterator j(base); j.has_next(); j.next()) {
PointsToNode* f = j.get();
if (PointsToNode::is_base_use(f)) { // Field
f = PointsToNode::get_use_node(f);
if (f == field || !f->as_Field()->is_oop())
continue;
int offs = f->as_Field()->offset();
if (offs == offset || offset == Type::OffsetBot || offs == Type::OffsetBot) {
add_to_worklist(f);
}
}
}
} else {
assert(base->is_JavaObject(), "sanity");
if (// Skip phantom_object since it is only used to indicate that
// this field's content globally escapes.
(base != phantom_obj) &&
// NULL object node does not have fields.
(base != null_obj)) {
for (EdgeIterator i(base); i.has_next(); i.next()) {
PointsToNode* f = i.get();
// Skip arraycopy edge since store to destination object field
// does not update value in source object field.
if (f->is_Arraycopy()) {
assert(base->arraycopy_dst(), "sanity");
continue;
}
if (f == field || !f->as_Field()->is_oop())
continue;
int offs = f->as_Field()->offset();
if (offs == offset || offset == Type::OffsetBot || offs == Type::OffsetBot) {
add_to_worklist(f);
}
}
}
}
}
// Find fields which have unknown value.
int ConnectionGraph::find_field_value(FieldNode* field) {
// Escaped fields should have init value already.
assert(field->escape_state() == PointsToNode::NoEscape, "sanity");
int new_edges = 0;
for (BaseIterator i(field); i.has_next(); i.next()) {
PointsToNode* base = i.get();
if (base->is_JavaObject()) {
// Skip Allocate's fields which will be processed later.
if (base->ideal_node()->is_Allocate())
return 0;
assert(base == null_obj, "only NULL ptr base expected here");
}
}
if (add_edge(field, phantom_obj)) {
// New edge was added
new_edges++;
add_field_uses_to_worklist(field);
}
return new_edges;
}
// Find fields initializing values for allocations.
int ConnectionGraph::find_init_values(JavaObjectNode* pta, PointsToNode* init_val, PhaseTransform* phase) {
assert(pta->escape_state() == PointsToNode::NoEscape, "Not escaped Allocate nodes only");
int new_edges = 0;
Node* alloc = pta->ideal_node();
if (init_val == phantom_obj) {
// Do nothing for Allocate nodes since its fields values are "known".
if (alloc->is_Allocate())
return 0;
assert(alloc->as_CallStaticJava(), "sanity");
#ifdef ASSERT
if (alloc->as_CallStaticJava()->method() == NULL) {
const char* name = alloc->as_CallStaticJava()->_name;
assert(strncmp(name, "_multianewarray", 15) == 0, "sanity");
}
#endif
// Non-escaped allocation returned from Java or runtime call have
// unknown values in fields.
for (EdgeIterator i(pta); i.has_next(); i.next()) {
PointsToNode* field = i.get();
if (field->is_Field() && field->as_Field()->is_oop()) {
if (add_edge(field, phantom_obj)) {
// New edge was added
new_edges++;
add_field_uses_to_worklist(field->as_Field());
}
}
}
return new_edges;
}
assert(init_val == null_obj, "sanity");
// Do nothing for Call nodes since its fields values are unknown.
if (!alloc->is_Allocate())
return 0;
InitializeNode* ini = alloc->as_Allocate()->initialization();
Compile* C = _compile;
bool visited_bottom_offset = false;
GrowableArray<int> offsets_worklist;
// Check if an oop field's initializing value is recorded and add
// a corresponding NULL if field's value if it is not recorded.
// Connection Graph does not record a default initialization by NULL
// captured by Initialize node.
//
for (EdgeIterator i(pta); i.has_next(); i.next()) {
PointsToNode* field = i.get(); // Field (AddP)
if (!field->is_Field() || !field->as_Field()->is_oop())
continue; // Not oop field
int offset = field->as_Field()->offset();
if (offset == Type::OffsetBot) {
if (!visited_bottom_offset) {
// OffsetBot is used to reference array's element,
// always add reference to NULL to all Field nodes since we don't
// known which element is referenced.
if (add_edge(field, null_obj)) {
// New edge was added
new_edges++;
add_field_uses_to_worklist(field->as_Field());
visited_bottom_offset = true;
}
}
} else {
// Check only oop fields.
const Type* adr_type = field->ideal_node()->as_AddP()->bottom_type();
if (adr_type->isa_rawptr()) {
#ifdef ASSERT
// Raw pointers are used for initializing stores so skip it
// since it should be recorded already
Node* base = get_addp_base(field->ideal_node());
assert(adr_type->isa_rawptr() && base->is_Proj() &&
(base->in(0) == alloc),"unexpected pointer type");
#endif
continue;
}
if (!offsets_worklist.contains(offset)) {
offsets_worklist.append(offset);
Node* value = NULL;
if (ini != NULL) {
// StoreP::memory_type() == T_ADDRESS
BasicType ft = UseCompressedOops ? T_NARROWOOP : T_ADDRESS;
Node* store = ini->find_captured_store(offset, type2aelembytes(ft, true), phase);
// Make sure initializing store has the same type as this AddP.
// This AddP may reference non existing field because it is on a
// dead branch of bimorphic call which is not eliminated yet.
if (store != NULL && store->is_Store() &&
store->as_Store()->memory_type() == ft) {
value = store->in(MemNode::ValueIn);
#ifdef ASSERT
if (VerifyConnectionGraph) {
// Verify that AddP already points to all objects the value points to.
PointsToNode* val = ptnode_adr(value->_idx);
assert((val != NULL), "should be processed already");
PointsToNode* missed_obj = NULL;
if (val->is_JavaObject()) {
if (!field->points_to(val->as_JavaObject())) {
missed_obj = val;
}
} else {
if (!val->is_LocalVar() || (val->edge_count() == 0)) {
tty->print_cr("----------init store has invalid value -----");
store->dump();
val->dump();
assert(val->is_LocalVar() && (val->edge_count() > 0), "should be processed already");
}
for (EdgeIterator j(val); j.has_next(); j.next()) {
PointsToNode* obj = j.get();
if (obj->is_JavaObject()) {
if (!field->points_to(obj->as_JavaObject())) {
missed_obj = obj;
break;
}
}
}
}
if (missed_obj != NULL) {
tty->print_cr("----------field---------------------------------");
field->dump();
tty->print_cr("----------missed referernce to object-----------");
missed_obj->dump();
tty->print_cr("----------object referernced by init store -----");
store->dump();
val->dump();
assert(!field->points_to(missed_obj->as_JavaObject()), "missed JavaObject reference");
}
}
#endif
} else {
// There could be initializing stores which follow allocation.
// For example, a volatile field store is not collected
// by Initialize node.
//
// Need to check for dependent loads to separate such stores from
// stores which follow loads. For now, add initial value NULL so
// that compare pointers optimization works correctly.
}
}
if (value == NULL) {
// A field's initializing value was not recorded. Add NULL.
if (add_edge(field, null_obj)) {
// New edge was added
new_edges++;
add_field_uses_to_worklist(field->as_Field());
}
}
}
}
}
return new_edges;
}
// Adjust scalar_replaceable state after Connection Graph is built.
void ConnectionGraph::adjust_scalar_replaceable_state(JavaObjectNode* jobj) {
// Search for non-escaping objects which are not scalar replaceable
// and mark them to propagate the state to referenced objects.
// 1. An object is not scalar replaceable if the field into which it is
// stored has unknown offset (stored into unknown element of an array).
//
for (UseIterator i(jobj); i.has_next(); i.next()) {
PointsToNode* use = i.get();
assert(!use->is_Arraycopy(), "sanity");
if (use->is_Field()) {
FieldNode* field = use->as_Field();
assert(field->is_oop() && field->scalar_replaceable() &&
field->fields_escape_state() == PointsToNode::NoEscape, "sanity");
if (field->offset() == Type::OffsetBot) {
jobj->set_scalar_replaceable(false);
return;
}
// 2. An object is not scalar replaceable if the field into which it is
// stored has multiple bases one of which is null.
if (field->base_count() > 1) {
for (BaseIterator i(field); i.has_next(); i.next()) {
PointsToNode* base = i.get();
if (base == null_obj) {
jobj->set_scalar_replaceable(false);
return;
}
}
}
}
assert(use->is_Field() || use->is_LocalVar(), "sanity");
// 3. An object is not scalar replaceable if it is merged with other objects.
for (EdgeIterator j(use); j.has_next(); j.next()) {
PointsToNode* ptn = j.get();
if (ptn->is_JavaObject() && ptn != jobj) {
// Mark all objects.
jobj->set_scalar_replaceable(false);
ptn->set_scalar_replaceable(false);
}
}
if (!jobj->scalar_replaceable()) {
return;
}
}
for (EdgeIterator j(jobj); j.has_next(); j.next()) {
// Non-escaping object node should point only to field nodes.
FieldNode* field = j.get()->as_Field();
int offset = field->as_Field()->offset();
// 4. An object is not scalar replaceable if it has a field with unknown
// offset (array's element is accessed in loop).
if (offset == Type::OffsetBot) {
jobj->set_scalar_replaceable(false);
return;
}
// 5. Currently an object is not scalar replaceable if a LoadStore node
// access its field since the field value is unknown after it.
//
Node* n = field->ideal_node();
for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
if (n->fast_out(i)->is_LoadStore()) {
jobj->set_scalar_replaceable(false);
return;
}
}
// 6. Or the address may point to more then one object. This may produce
// the false positive result (set not scalar replaceable)
// since the flow-insensitive escape analysis can't separate
// the case when stores overwrite the field's value from the case
// when stores happened on different control branches.
//
// Note: it will disable scalar replacement in some cases:
//
// Point p[] = new Point[1];
// p[0] = new Point(); // Will be not scalar replaced
//
// but it will save us from incorrect optimizations in next cases:
//
// Point p[] = new Point[1];
// if ( x ) p[0] = new Point(); // Will be not scalar replaced
//
if (field->base_count() > 1) {
for (BaseIterator i(field); i.has_next(); i.next()) {
PointsToNode* base = i.get();
// Don't take into account LocalVar nodes which
// may point to only one object which should be also
// this field's base by now.
if (base->is_JavaObject() && base != jobj) {
// Mark all bases.
jobj->set_scalar_replaceable(false);
base->set_scalar_replaceable(false);
}
}
}
}
}
#ifdef ASSERT
void ConnectionGraph::verify_connection_graph(
GrowableArray<PointsToNode*>& ptnodes_worklist,
GrowableArray<JavaObjectNode*>& non_escaped_worklist,
GrowableArray<JavaObjectNode*>& java_objects_worklist,
GrowableArray<Node*>& addp_worklist) {
// Verify that graph is complete - no new edges could be added.
int java_objects_length = java_objects_worklist.length();
int non_escaped_length = non_escaped_worklist.length();
int new_edges = 0;
for (int next = 0; next < java_objects_length; ++next) {
JavaObjectNode* ptn = java_objects_worklist.at(next);
new_edges += add_java_object_edges(ptn, true);
}
assert(new_edges == 0, "graph was not complete");
// Verify that escape state is final.
int length = non_escaped_worklist.length();
find_non_escaped_objects(ptnodes_worklist, non_escaped_worklist);
assert((non_escaped_length == non_escaped_worklist.length()) &&
(non_escaped_length == length) &&
(_worklist.length() == 0), "escape state was not final");
// Verify fields information.
int addp_length = addp_worklist.length();
for (int next = 0; next < addp_length; ++next ) {
Node* n = addp_worklist.at(next);
FieldNode* field = ptnode_adr(n->_idx)->as_Field();
if (field->is_oop()) {
// Verify that field has all bases
Node* base = get_addp_base(n);
PointsToNode* ptn = ptnode_adr(base->_idx);
if (ptn->is_JavaObject()) {
assert(field->has_base(ptn->as_JavaObject()), "sanity");
} else {
assert(ptn->is_LocalVar(), "sanity");
for (EdgeIterator i(ptn); i.has_next(); i.next()) {
PointsToNode* e = i.get();
if (e->is_JavaObject()) {
assert(field->has_base(e->as_JavaObject()), "sanity");
}
}
}
// Verify that all fields have initializing values.
if (field->edge_count() == 0) {
tty->print_cr("----------field does not have references----------");
field->dump();
for (BaseIterator i(field); i.has_next(); i.next()) {
PointsToNode* base = i.get();
tty->print_cr("----------field has next base---------------------");
base->dump();
if (base->is_JavaObject() && (base != phantom_obj) && (base != null_obj)) {
tty->print_cr("----------base has fields-------------------------");
for (EdgeIterator j(base); j.has_next(); j.next()) {
j.get()->dump();
}
tty->print_cr("----------base has references---------------------");
for (UseIterator j(base); j.has_next(); j.next()) {
j.get()->dump();
}
}
}
for (UseIterator i(field); i.has_next(); i.next()) {
i.get()->dump();
}
assert(field->edge_count() > 0, "sanity");
}
}
}
}
#endif
// Optimize ideal graph.
void ConnectionGraph::optimize_ideal_graph(GrowableArray<Node*>& ptr_cmp_worklist,
GrowableArray<Node*>& storestore_worklist) {
Compile* C = _compile;
PhaseIterGVN* igvn = _igvn;
if (EliminateLocks) {
// Mark locks before changing ideal graph.
int cnt = C->macro_count();
for( int i=0; i < cnt; i++ ) {
Node *n = C->macro_node(i);
if (n->is_AbstractLock()) { // Lock and Unlock nodes
AbstractLockNode* alock = n->as_AbstractLock();
if (!alock->is_non_esc_obj()) {
if (not_global_escape(alock->obj_node())) {
assert(!alock->is_eliminated() || alock->is_coarsened(), "sanity");
// The lock could be marked eliminated by lock coarsening
// code during first IGVN before EA. Replace coarsened flag
// to eliminate all associated locks/unlocks.
#ifdef ASSERT
alock->log_lock_optimization(C, "eliminate_lock_set_non_esc3");
#endif
alock->set_non_esc_obj();
}
}
}
}
}
if (OptimizePtrCompare) {
// Add ConI(#CC_GT) and ConI(#CC_EQ).
_pcmp_neq = igvn->makecon(TypeInt::CC_GT);
_pcmp_eq = igvn->makecon(TypeInt::CC_EQ);
// Optimize objects compare.
while (ptr_cmp_worklist.length() != 0) {
Node *n = ptr_cmp_worklist.pop();
Node *res = optimize_ptr_compare(n);
if (res != NULL) {
#ifndef PRODUCT
if (PrintOptimizePtrCompare) {
tty->print_cr("++++ Replaced: %d %s(%d,%d) --> %s", n->_idx, (n->Opcode() == Op_CmpP ? "CmpP" : "CmpN"), n->in(1)->_idx, n->in(2)->_idx, (res == _pcmp_eq ? "EQ" : "NotEQ"));
if (Verbose) {
n->dump(1);
}
}
#endif
igvn->replace_node(n, res);
}
}
// cleanup
if (_pcmp_neq->outcnt() == 0)
igvn->hash_delete(_pcmp_neq);
if (_pcmp_eq->outcnt() == 0)
igvn->hash_delete(_pcmp_eq);
}
// For MemBarStoreStore nodes added in library_call.cpp, check
// escape status of associated AllocateNode and optimize out
// MemBarStoreStore node if the allocated object never escapes.
while (storestore_worklist.length() != 0) {
Node *n = storestore_worklist.pop();
MemBarStoreStoreNode *storestore = n ->as_MemBarStoreStore();
Node *alloc = storestore->in(MemBarNode::Precedent)->in(0);
assert (alloc->is_Allocate(), "storestore should point to AllocateNode");
if (not_global_escape(alloc)) {
MemBarNode* mb = MemBarNode::make(C, Op_MemBarCPUOrder, Compile::AliasIdxBot);
mb->init_req(TypeFunc::Memory, storestore->in(TypeFunc::Memory));
mb->init_req(TypeFunc::Control, storestore->in(TypeFunc::Control));
igvn->register_new_node_with_optimizer(mb);
igvn->replace_node(storestore, mb);
}
}
}
// Optimize objects compare.
Node* ConnectionGraph::optimize_ptr_compare(Node* n) {
assert(OptimizePtrCompare, "sanity");
PointsToNode* ptn1 = ptnode_adr(n->in(1)->_idx);
PointsToNode* ptn2 = ptnode_adr(n->in(2)->_idx);
JavaObjectNode* jobj1 = unique_java_object(n->in(1));
JavaObjectNode* jobj2 = unique_java_object(n->in(2));
assert(ptn1->is_JavaObject() || ptn1->is_LocalVar(), "sanity");
assert(ptn2->is_JavaObject() || ptn2->is_LocalVar(), "sanity");
// Check simple cases first.
if (jobj1 != NULL) {
if (jobj1->escape_state() == PointsToNode::NoEscape) {
if (jobj1 == jobj2) {
// Comparing the same not escaping object.
return _pcmp_eq;
}
Node* obj = jobj1->ideal_node();
// Comparing not escaping allocation.
if ((obj->is_Allocate() || obj->is_CallStaticJava()) &&
!ptn2->points_to(jobj1)) {
return _pcmp_neq; // This includes nullness check.
}
}
}
if (jobj2 != NULL) {
if (jobj2->escape_state() == PointsToNode::NoEscape) {
Node* obj = jobj2->ideal_node();
// Comparing not escaping allocation.
if ((obj->is_Allocate() || obj->is_CallStaticJava()) &&
!ptn1->points_to(jobj2)) {
return _pcmp_neq; // This includes nullness check.
}
}
}
if (jobj1 != NULL && jobj1 != phantom_obj &&
jobj2 != NULL && jobj2 != phantom_obj &&
jobj1->ideal_node()->is_Con() &&
jobj2->ideal_node()->is_Con()) {
// Klass or String constants compare. Need to be careful with
// compressed pointers - compare types of ConN and ConP instead of nodes.
const Type* t1 = jobj1->ideal_node()->get_ptr_type();
const Type* t2 = jobj2->ideal_node()->get_ptr_type();
if (t1->make_ptr() == t2->make_ptr()) {
return _pcmp_eq;
} else {
return _pcmp_neq;
}
}
if (ptn1->meet(ptn2)) {
return NULL; // Sets are not disjoint
}
// Sets are disjoint.
bool set1_has_unknown_ptr = ptn1->points_to(phantom_obj);
bool set2_has_unknown_ptr = ptn2->points_to(phantom_obj);
bool set1_has_null_ptr = ptn1->points_to(null_obj);
bool set2_has_null_ptr = ptn2->points_to(null_obj);
if (set1_has_unknown_ptr && set2_has_null_ptr ||
set2_has_unknown_ptr && set1_has_null_ptr) {
// Check nullness of unknown object.
return NULL;
}
// Disjointness by itself is not sufficient since
// alias analysis is not complete for escaped objects.
// Disjoint sets are definitely unrelated only when
// at least one set has only not escaping allocations.
if (!set1_has_unknown_ptr && !set1_has_null_ptr) {
if (ptn1->non_escaping_allocation()) {
return _pcmp_neq;
}
}
if (!set2_has_unknown_ptr && !set2_has_null_ptr) {
if (ptn2->non_escaping_allocation()) {
return _pcmp_neq;
}
}
return NULL;
}
// Connection Graph constuction functions.
void ConnectionGraph::add_local_var(Node *n, PointsToNode::EscapeState es) {
PointsToNode* ptadr = _nodes.at(n->_idx);
if (ptadr != NULL) {
assert(ptadr->is_LocalVar() && ptadr->ideal_node() == n, "sanity");
return;
}
Compile* C = _compile;
ptadr = new (C->comp_arena()) LocalVarNode(this, n, es);
_nodes.at_put(n->_idx, ptadr);
}
void ConnectionGraph::add_java_object(Node *n, PointsToNode::EscapeState es) {
PointsToNode* ptadr = _nodes.at(n->_idx);
if (ptadr != NULL) {
assert(ptadr->is_JavaObject() && ptadr->ideal_node() == n, "sanity");
return;
}
Compile* C = _compile;
ptadr = new (C->comp_arena()) JavaObjectNode(this, n, es);
_nodes.at_put(n->_idx, ptadr);
}
void ConnectionGraph::add_field(Node *n, PointsToNode::EscapeState es, int offset) {
PointsToNode* ptadr = _nodes.at(n->_idx);
if (ptadr != NULL) {
assert(ptadr->is_Field() && ptadr->ideal_node() == n, "sanity");
return;
}
bool unsafe = false;
bool is_oop = is_oop_field(n, offset, &unsafe);
if (unsafe) {
es = PointsToNode::GlobalEscape;
}
Compile* C = _compile;
FieldNode* field = new (C->comp_arena()) FieldNode(this, n, es, offset, is_oop);
_nodes.at_put(n->_idx, field);
}
void ConnectionGraph::add_arraycopy(Node *n, PointsToNode::EscapeState es,
PointsToNode* src, PointsToNode* dst) {
assert(!src->is_Field() && !dst->is_Field(), "only for JavaObject and LocalVar");
assert((src != null_obj) && (dst != null_obj), "not for ConP NULL");
PointsToNode* ptadr = _nodes.at(n->_idx);
if (ptadr != NULL) {
assert(ptadr->is_Arraycopy() && ptadr->ideal_node() == n, "sanity");
return;
}
Compile* C = _compile;
ptadr = new (C->comp_arena()) ArraycopyNode(this, n, es);
_nodes.at_put(n->_idx, ptadr);
// Add edge from arraycopy node to source object.
(void)add_edge(ptadr, src);
src->set_arraycopy_src();
// Add edge from destination object to arraycopy node.
(void)add_edge(dst, ptadr);
dst->set_arraycopy_dst();
}
bool ConnectionGraph::is_oop_field(Node* n, int offset, bool* unsafe) {
const Type* adr_type = n->as_AddP()->bottom_type();
BasicType bt = T_INT;
if (offset == Type::OffsetBot) {
// Check only oop fields.
if (!adr_type->isa_aryptr() ||
(adr_type->isa_aryptr()->klass() == NULL) ||
adr_type->isa_aryptr()->klass()->is_obj_array_klass()) {
// OffsetBot is used to reference array's element. Ignore first AddP.
if (find_second_addp(n, n->in(AddPNode::Base)) == NULL) {
bt = T_OBJECT;
}
}
} else if (offset != oopDesc::klass_offset_in_bytes()) {
if (adr_type->isa_instptr()) {
ciField* field = _compile->alias_type(adr_type->isa_instptr())->field();
if (field != NULL) {
bt = field->layout_type();
} else {
// Check for unsafe oop field access
for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
int opcode = n->fast_out(i)->Opcode();
if (opcode == Op_StoreP || opcode == Op_StoreN ||
opcode == Op_LoadP || opcode == Op_LoadN ||
opcode == Op_GetAndSetP || opcode == Op_GetAndSetN ||
opcode == Op_CompareAndSwapP || opcode == Op_CompareAndSwapN) {
bt = T_OBJECT;
(*unsafe) = true;
break;
}
}
}
} else if (adr_type->isa_aryptr()) {
if (offset == arrayOopDesc::length_offset_in_bytes()) {
// Ignore array length load.
} else if (find_second_addp(n, n->in(AddPNode::Base)) != NULL) {
// Ignore first AddP.
} else {
const Type* elemtype = adr_type->isa_aryptr()->elem();
bt = elemtype->array_element_basic_type();
}
} else if (adr_type->isa_rawptr() || adr_type->isa_klassptr()) {
// Allocation initialization, ThreadLocal field access, unsafe access
for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
int opcode = n->fast_out(i)->Opcode();
if (opcode == Op_StoreP || opcode == Op_StoreN ||
opcode == Op_LoadP || opcode == Op_LoadN ||
opcode == Op_GetAndSetP || opcode == Op_GetAndSetN ||
opcode == Op_CompareAndSwapP || opcode == Op_CompareAndSwapN) {
bt = T_OBJECT;
break;
}
}
}
}
return (bt == T_OBJECT || bt == T_NARROWOOP || bt == T_ARRAY);
}
// Returns unique pointed java object or NULL.
JavaObjectNode* ConnectionGraph::unique_java_object(Node *n) {
assert(!_collecting, "should not call when contructed graph");
// If the node was created after the escape computation we can't answer.
uint idx = n->_idx;
if (idx >= nodes_size()) {
return NULL;
}
PointsToNode* ptn = ptnode_adr(idx);
if (ptn == NULL) {
return NULL;
}
if (ptn->is_JavaObject()) {
return ptn->as_JavaObject();
}
assert(ptn->is_LocalVar(), "sanity");
// Check all java objects it points to.
JavaObjectNode* jobj = NULL;
for (EdgeIterator i(ptn); i.has_next(); i.next()) {
PointsToNode* e = i.get();
if (e->is_JavaObject()) {
if (jobj == NULL) {
jobj = e->as_JavaObject();
} else if (jobj != e) {
return NULL;
}
}
}
return jobj;
}
// Return true if this node points only to non-escaping allocations.
bool PointsToNode::non_escaping_allocation() {
if (is_JavaObject()) {
Node* n = ideal_node();
if (n->is_Allocate() || n->is_CallStaticJava()) {
return (escape_state() == PointsToNode::NoEscape);
} else {
return false;
}
}
assert(is_LocalVar(), "sanity");
// Check all java objects it points to.
for (EdgeIterator i(this); i.has_next(); i.next()) {
PointsToNode* e = i.get();
if (e->is_JavaObject()) {
Node* n = e->ideal_node();
if ((e->escape_state() != PointsToNode::NoEscape) ||
!(n->is_Allocate() || n->is_CallStaticJava())) {
return false;
}
}
}
return true;
}
// Return true if we know the node does not escape globally.
bool ConnectionGraph::not_global_escape(Node *n) {
assert(!_collecting, "should not call during graph construction");
// If the node was created after the escape computation we can't answer.
uint idx = n->_idx;
if (idx >= nodes_size()) {
return false;
}
PointsToNode* ptn = ptnode_adr(idx);
if (ptn == NULL) {
return false; // not in congraph (e.g. ConI)
}
PointsToNode::EscapeState es = ptn->escape_state();
// If we have already computed a value, return it.
if (es >= PointsToNode::GlobalEscape)
return false;
if (ptn->is_JavaObject()) {
return true; // (es < PointsToNode::GlobalEscape);
}
assert(ptn->is_LocalVar(), "sanity");
// Check all java objects it points to.
for (EdgeIterator i(ptn); i.has_next(); i.next()) {
if (i.get()->escape_state() >= PointsToNode::GlobalEscape)
return false;
}
return true;
}
// Helper functions
// Return true if this node points to specified node or nodes it points to.
bool PointsToNode::points_to(JavaObjectNode* ptn) const {
if (is_JavaObject()) {
return (this == ptn);
}
assert(is_LocalVar() || is_Field(), "sanity");
for (EdgeIterator i(this); i.has_next(); i.next()) {
if (i.get() == ptn)
return true;
}
return false;
}
// Return true if one node points to an other.
bool PointsToNode::meet(PointsToNode* ptn) {
if (this == ptn) {
return true;
} else if (ptn->is_JavaObject()) {
return this->points_to(ptn->as_JavaObject());
} else if (this->is_JavaObject()) {
return ptn->points_to(this->as_JavaObject());
}
assert(this->is_LocalVar() && ptn->is_LocalVar(), "sanity");
int ptn_count = ptn->edge_count();
for (EdgeIterator i(this); i.has_next(); i.next()) {
PointsToNode* this_e = i.get();
for (int j = 0; j < ptn_count; j++) {
if (this_e == ptn->edge(j))
return true;
}
}
return false;
}
#ifdef ASSERT
// Return true if bases point to this java object.
bool FieldNode::has_base(JavaObjectNode* jobj) const {
for (BaseIterator i(this); i.has_next(); i.next()) {
if (i.get() == jobj)
return true;
}
return false;
}
#endif
int ConnectionGraph::address_offset(Node* adr, PhaseTransform *phase) {
const Type *adr_type = phase->type(adr);
if (adr->is_AddP() && adr_type->isa_oopptr() == NULL &&
adr->in(AddPNode::Address)->is_Proj() &&
adr->in(AddPNode::Address)->in(0)->is_Allocate()) {
// We are computing a raw address for a store captured by an Initialize
// compute an appropriate address type. AddP cases #3 and #5 (see below).
int offs = (int)phase->find_intptr_t_con(adr->in(AddPNode::Offset), Type::OffsetBot);
assert(offs != Type::OffsetBot ||
adr->in(AddPNode::Address)->in(0)->is_AllocateArray(),
"offset must be a constant or it is initialization of array");
return offs;
}
const TypePtr *t_ptr = adr_type->isa_ptr();
assert(t_ptr != NULL, "must be a pointer type");
return t_ptr->offset();
}
Node* ConnectionGraph::get_addp_base(Node *addp) {
assert(addp->is_AddP(), "must be AddP");
//
// AddP cases for Base and Address inputs:
// case #1. Direct object's field reference:
// Allocate
// |
// Proj #5 ( oop result )
// |
// CheckCastPP (cast to instance type)
// | |
// AddP ( base == address )
//
// case #2. Indirect object's field reference:
// Phi
// |
// CastPP (cast to instance type)
// | |
// AddP ( base == address )
//
// case #3. Raw object's field reference for Initialize node:
// Allocate
// |
// Proj #5 ( oop result )
// top |
// \ |
// AddP ( base == top )
//
// case #4. Array's element reference:
// {CheckCastPP | CastPP}
// | | |
// | AddP ( array's element offset )
// | |
// AddP ( array's offset )
//
// case #5. Raw object's field reference for arraycopy stub call:
// The inline_native_clone() case when the arraycopy stub is called
// after the allocation before Initialize and CheckCastPP nodes.
// Allocate
// |
// Proj #5 ( oop result )
// | |
// AddP ( base == address )
//
// case #6. Constant Pool, ThreadLocal, CastX2P or
// Raw object's field reference:
// {ConP, ThreadLocal, CastX2P, raw Load}
// top |
// \ |
// AddP ( base == top )
//
// case #7. Klass's field reference.
// LoadKlass
// | |
// AddP ( base == address )
//
// case #8. narrow Klass's field reference.
// LoadNKlass
// |
// DecodeN
// | |
// AddP ( base == address )
//
Node *base = addp->in(AddPNode::Base);
if (base->uncast()->is_top()) { // The AddP case #3 and #6.
base = addp->in(AddPNode::Address);
while (base->is_AddP()) {
// Case #6 (unsafe access) may have several chained AddP nodes.
assert(base->in(AddPNode::Base)->uncast()->is_top(), "expected unsafe access address only");
base = base->in(AddPNode::Address);
}
Node* uncast_base = base->uncast();
int opcode = uncast_base->Opcode();
assert(opcode == Op_ConP || opcode == Op_ThreadLocal ||
opcode == Op_CastX2P || uncast_base->is_DecodeNarrowPtr() ||
(uncast_base->is_Mem() && (uncast_base->bottom_type()->isa_rawptr() != NULL)) ||
(uncast_base->is_Proj() && uncast_base->in(0)->is_Allocate()), "sanity");
}
return base;
}
Node* ConnectionGraph::find_second_addp(Node* addp, Node* n) {
assert(addp->is_AddP() && addp->outcnt() > 0, "Don't process dead nodes");
Node* addp2 = addp->raw_out(0);
if (addp->outcnt() == 1 && addp2->is_AddP() &&
addp2->in(AddPNode::Base) == n &&
addp2->in(AddPNode::Address) == addp) {
assert(addp->in(AddPNode::Base) == n, "expecting the same base");
//
// Find array's offset to push it on worklist first and
// as result process an array's element offset first (pushed second)
// to avoid CastPP for the array's offset.
// Otherwise the inserted CastPP (LocalVar) will point to what
// the AddP (Field) points to. Which would be wrong since
// the algorithm expects the CastPP has the same point as
// as AddP's base CheckCastPP (LocalVar).
//
// ArrayAllocation
// |
// CheckCastPP
// |
// memProj (from ArrayAllocation CheckCastPP)
// | ||
// | || Int (element index)
// | || | ConI (log(element size))
// | || | /
// | || LShift
// | || /
// | AddP (array's element offset)
// | |
// | | ConI (array's offset: #12(32-bits) or #24(64-bits))
// | / /
// AddP (array's offset)
// |
// Load/Store (memory operation on array's element)
//
return addp2;
}
return NULL;
}
//
// Adjust the type and inputs of an AddP which computes the
// address of a field of an instance
//
bool ConnectionGraph::split_AddP(Node *addp, Node *base) {
PhaseGVN* igvn = _igvn;
const TypeOopPtr *base_t = igvn->type(base)->isa_oopptr();
assert(base_t != NULL && base_t->is_known_instance(), "expecting instance oopptr");
const TypeOopPtr *t = igvn->type(addp)->isa_oopptr();
if (t == NULL) {
// We are computing a raw address for a store captured by an Initialize
// compute an appropriate address type (cases #3 and #5).
assert(igvn->type(addp) == TypeRawPtr::NOTNULL, "must be raw pointer");
assert(addp->in(AddPNode::Address)->is_Proj(), "base of raw address must be result projection from allocation");
intptr_t offs = (int)igvn->find_intptr_t_con(addp->in(AddPNode::Offset), Type::OffsetBot);
assert(offs != Type::OffsetBot, "offset must be a constant");
t = base_t->add_offset(offs)->is_oopptr();
}
int inst_id = base_t->instance_id();
assert(!t->is_known_instance() || t->instance_id() == inst_id,
"old type must be non-instance or match new type");
// The type 't' could be subclass of 'base_t'.
// As result t->offset() could be large then base_t's size and it will
// cause the failure in add_offset() with narrow oops since TypeOopPtr()
// constructor verifies correctness of the offset.
//
// It could happened on subclass's branch (from the type profiling
// inlining) which was not eliminated during parsing since the exactness
// of the allocation type was not propagated to the subclass type check.
//
// Or the type 't' could be not related to 'base_t' at all.
// It could happened when CHA type is different from MDO type on a dead path
// (for example, from instanceof check) which is not collapsed during parsing.
//
// Do nothing for such AddP node and don't process its users since
// this code branch will go away.
//
if (!t->is_known_instance() &&
!base_t->klass()->is_subtype_of(t->klass())) {
return false; // bail out
}
const TypeOopPtr *tinst = base_t->add_offset(t->offset())->is_oopptr();
// Do NOT remove the next line: ensure a new alias index is allocated
// for the instance type. Note: C++ will not remove it since the call
// has side effect.
int alias_idx = _compile->get_alias_index(tinst);
igvn->set_type(addp, tinst);
// record the allocation in the node map
set_map(addp, get_map(base->_idx));
// Set addp's Base and Address to 'base'.
Node *abase = addp->in(AddPNode::Base);
Node *adr = addp->in(AddPNode::Address);
if (adr->is_Proj() && adr->in(0)->is_Allocate() &&
adr->in(0)->_idx == (uint)inst_id) {
// Skip AddP cases #3 and #5.
} else {
assert(!abase->is_top(), "sanity"); // AddP case #3
if (abase != base) {
igvn->hash_delete(addp);
addp->set_req(AddPNode::Base, base);
if (abase == adr) {
addp->set_req(AddPNode::Address, base);
} else {
// AddP case #4 (adr is array's element offset AddP node)
#ifdef ASSERT
const TypeOopPtr *atype = igvn->type(adr)->isa_oopptr();
assert(adr->is_AddP() && atype != NULL &&
atype->instance_id() == inst_id, "array's element offset should be processed first");
#endif
}
igvn->hash_insert(addp);
}
}
// Put on IGVN worklist since at least addp's type was changed above.
record_for_optimizer(addp);
return true;
}
//
// Create a new version of orig_phi if necessary. Returns either the newly
// created phi or an existing phi. Sets create_new to indicate whether a new
// phi was created. Cache the last newly created phi in the node map.
//
PhiNode *ConnectionGraph::create_split_phi(PhiNode *orig_phi, int alias_idx, GrowableArray<PhiNode *> &orig_phi_worklist, bool &new_created) {
Compile *C = _compile;
PhaseGVN* igvn = _igvn;
new_created = false;
int phi_alias_idx = C->get_alias_index(orig_phi->adr_type());
// nothing to do if orig_phi is bottom memory or matches alias_idx
if (phi_alias_idx == alias_idx) {
return orig_phi;
}
// Have we recently created a Phi for this alias index?
PhiNode *result = get_map_phi(orig_phi->_idx);
if (result != NULL && C->get_alias_index(result->adr_type()) == alias_idx) {
return result;
}
// Previous check may fail when the same wide memory Phi was split into Phis
// for different memory slices. Search all Phis for this region.
if (result != NULL) {
Node* region = orig_phi->in(0);
for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) {
Node* phi = region->fast_out(i);
if (phi->is_Phi() &&
C->get_alias_index(phi->as_Phi()->adr_type()) == alias_idx) {
assert(phi->_idx >= nodes_size(), "only new Phi per instance memory slice");
return phi->as_Phi();
}
}
}
if (C->live_nodes() + 2*NodeLimitFudgeFactor > C->max_node_limit()) {
if (C->do_escape_analysis() == true && !C->failing()) {
// Retry compilation without escape analysis.
// If this is the first failure, the sentinel string will "stick"
// to the Compile object, and the C2Compiler will see it and retry.
C->record_failure(C2Compiler::retry_no_escape_analysis());
}
return NULL;
}
orig_phi_worklist.append_if_missing(orig_phi);
const TypePtr *atype = C->get_adr_type(alias_idx);
result = PhiNode::make(orig_phi->in(0), NULL, Type::MEMORY, atype);
C->copy_node_notes_to(result, orig_phi);
igvn->set_type(result, result->bottom_type());
record_for_optimizer(result);
set_map(orig_phi, result);
new_created = true;
return result;
}
//
// Return a new version of Memory Phi "orig_phi" with the inputs having the
// specified alias index.
//
PhiNode *ConnectionGraph::split_memory_phi(PhiNode *orig_phi, int alias_idx, GrowableArray<PhiNode *> &orig_phi_worklist) {
assert(alias_idx != Compile::AliasIdxBot, "can't split out bottom memory");
Compile *C = _compile;
PhaseGVN* igvn = _igvn;
bool new_phi_created;
PhiNode *result = create_split_phi(orig_phi, alias_idx, orig_phi_worklist, new_phi_created);
if (!new_phi_created) {
return result;
}
GrowableArray<PhiNode *> phi_list;
GrowableArray<uint> cur_input;
PhiNode *phi = orig_phi;
uint idx = 1;
bool finished = false;
while(!finished) {
while (idx < phi->req()) {
Node *mem = find_inst_mem(phi->in(idx), alias_idx, orig_phi_worklist);
if (mem != NULL && mem->is_Phi()) {
PhiNode *newphi = create_split_phi(mem->as_Phi(), alias_idx, orig_phi_worklist, new_phi_created);
if (new_phi_created) {
// found an phi for which we created a new split, push current one on worklist and begin
// processing new one
phi_list.push(phi);
cur_input.push(idx);
phi = mem->as_Phi();
result = newphi;
idx = 1;
continue;
} else {
mem = newphi;
}
}
if (C->failing()) {
return NULL;
}
result->set_req(idx++, mem);
}
#ifdef ASSERT
// verify that the new Phi has an input for each input of the original
assert( phi->req() == result->req(), "must have same number of inputs.");
assert( result->in(0) != NULL && result->in(0) == phi->in(0), "regions must match");
#endif
// Check if all new phi's inputs have specified alias index.
// Otherwise use old phi.
for (uint i = 1; i < phi->req(); i++) {
Node* in = result->in(i);
assert((phi->in(i) == NULL) == (in == NULL), "inputs must correspond.");
}
// we have finished processing a Phi, see if there are any more to do
finished = (phi_list.length() == 0 );
if (!finished) {
phi = phi_list.pop();
idx = cur_input.pop();
PhiNode *prev_result = get_map_phi(phi->_idx);
prev_result->set_req(idx++, result);
result = prev_result;
}
}
return result;
}
//
// The next methods are derived from methods in MemNode.
//
Node* ConnectionGraph::step_through_mergemem(MergeMemNode *mmem, int alias_idx, const TypeOopPtr *toop) {
Node *mem = mmem;
// TypeOopPtr::NOTNULL+any is an OOP with unknown offset - generally
// means an array I have not precisely typed yet. Do not do any
// alias stuff with it any time soon.
if (toop->base() != Type::AnyPtr &&
!(toop->klass() != NULL &&
toop->klass()->is_java_lang_Object() &&
toop->offset() == Type::OffsetBot)) {
mem = mmem->memory_at(alias_idx);
// Update input if it is progress over what we have now
}
return mem;
}
//
// Move memory users to their memory slices.
//
void ConnectionGraph::move_inst_mem(Node* n, GrowableArray<PhiNode *> &orig_phis) {
Compile* C = _compile;
PhaseGVN* igvn = _igvn;
const TypePtr* tp = igvn->type(n->in(MemNode::Address))->isa_ptr();
assert(tp != NULL, "ptr type");
int alias_idx = C->get_alias_index(tp);
int general_idx = C->get_general_index(alias_idx);
// Move users first
for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
Node* use = n->fast_out(i);
if (use->is_MergeMem()) {
MergeMemNode* mmem = use->as_MergeMem();
assert(n == mmem->memory_at(alias_idx), "should be on instance memory slice");
if (n != mmem->memory_at(general_idx) || alias_idx == general_idx) {
continue; // Nothing to do
}
// Replace previous general reference to mem node.
uint orig_uniq = C->unique();
Node* m = find_inst_mem(n, general_idx, orig_phis);
assert(orig_uniq == C->unique(), "no new nodes");
mmem->set_memory_at(general_idx, m);
--imax;
--i;
} else if (use->is_MemBar()) {
assert(!use->is_Initialize(), "initializing stores should not be moved");
if (use->req() > MemBarNode::Precedent &&
use->in(MemBarNode::Precedent) == n) {
// Don't move related membars.
record_for_optimizer(use);
continue;
}
tp = use->as_MemBar()->adr_type()->isa_ptr();
if (tp != NULL && C->get_alias_index(tp) == alias_idx ||
alias_idx == general_idx) {
continue; // Nothing to do
}
// Move to general memory slice.
uint orig_uniq = C->unique();
Node* m = find_inst_mem(n, general_idx, orig_phis);
assert(orig_uniq == C->unique(), "no new nodes");
igvn->hash_delete(use);
imax -= use->replace_edge(n, m);
igvn->hash_insert(use);
record_for_optimizer(use);
--i;
#ifdef ASSERT
} else if (use->is_Mem()) {
if (use->Opcode() == Op_StoreCM && use->in(MemNode::OopStore) == n) {
// Don't move related cardmark.
continue;
}
// Memory nodes should have new memory input.
tp = igvn->type(use->in(MemNode::Address))->isa_ptr();
assert(tp != NULL, "ptr type");
int idx = C->get_alias_index(tp);
assert(get_map(use->_idx) != NULL || idx == alias_idx,
"Following memory nodes should have new memory input or be on the same memory slice");
} else if (use->is_Phi()) {
// Phi nodes should be split and moved already.
tp = use->as_Phi()->adr_type()->isa_ptr();
assert(tp != NULL, "ptr type");
int idx = C->get_alias_index(tp);
assert(idx == alias_idx, "Following Phi nodes should be on the same memory slice");
} else {
use->dump();
assert(false, "should not be here");
#endif
}
}
}
//
// Search memory chain of "mem" to find a MemNode whose address
// is the specified alias index.
//
Node* ConnectionGraph::find_inst_mem(Node *orig_mem, int alias_idx, GrowableArray<PhiNode *> &orig_phis) {
if (orig_mem == NULL)
return orig_mem;
Compile* C = _compile;
PhaseGVN* igvn = _igvn;
const TypeOopPtr *toop = C->get_adr_type(alias_idx)->isa_oopptr();
bool is_instance = (toop != NULL) && toop->is_known_instance();
Node *start_mem = C->start()->proj_out(TypeFunc::Memory);
Node *prev = NULL;
Node *result = orig_mem;
while (prev != result) {
prev = result;
if (result == start_mem)
break; // hit one of our sentinels
if (result->is_Mem()) {
const Type *at = igvn->type(result->in(MemNode::Address));
if (at == Type::TOP)
break; // Dead
assert (at->isa_ptr() != NULL, "pointer type required.");
int idx = C->get_alias_index(at->is_ptr());
if (idx == alias_idx)
break; // Found
if (!is_instance && (at->isa_oopptr() == NULL ||
!at->is_oopptr()->is_known_instance())) {
break; // Do not skip store to general memory slice.
}
result = result->in(MemNode::Memory);
}
if (!is_instance)
continue; // don't search further for non-instance types
// skip over a call which does not affect this memory slice
if (result->is_Proj() && result->as_Proj()->_con == TypeFunc::Memory) {
Node *proj_in = result->in(0);
if (proj_in->is_Allocate() && proj_in->_idx == (uint)toop->instance_id()) {
break; // hit one of our sentinels
} else if (proj_in->is_Call()) {
CallNode *call = proj_in->as_Call();
if (!call->may_modify(toop, igvn)) {
result = call->in(TypeFunc::Memory);
}
} else if (proj_in->is_Initialize()) {
AllocateNode* alloc = proj_in->as_Initialize()->allocation();
// Stop if this is the initialization for the object instance which
// which contains this memory slice, otherwise skip over it.
if (alloc == NULL || alloc->_idx != (uint)toop->instance_id()) {
result = proj_in->in(TypeFunc::Memory);
}
} else if (proj_in->is_MemBar()) {
result = proj_in->in(TypeFunc::Memory);
}
} else if (result->is_MergeMem()) {
MergeMemNode *mmem = result->as_MergeMem();
result = step_through_mergemem(mmem, alias_idx, toop);
if (result == mmem->base_memory()) {
// Didn't find instance memory, search through general slice recursively.
result = mmem->memory_at(C->get_general_index(alias_idx));
result = find_inst_mem(result, alias_idx, orig_phis);
if (C->failing()) {
return NULL;
}
mmem->set_memory_at(alias_idx, result);
}
} else if (result->is_Phi() &&
C->get_alias_index(result->as_Phi()->adr_type()) != alias_idx) {
Node *un = result->as_Phi()->unique_input(igvn);
if (un != NULL) {
orig_phis.append_if_missing(result->as_Phi());
result = un;
} else {
break;
}
} else if (result->is_ClearArray()) {
if (!ClearArrayNode::step_through(&result, (uint)toop->instance_id(), igvn)) {
// Can not bypass initialization of the instance
// we are looking for.
break;
}
// Otherwise skip it (the call updated 'result' value).
} else if (result->Opcode() == Op_SCMemProj) {
Node* mem = result->in(0);
Node* adr = NULL;
if (mem->is_LoadStore()) {
adr = mem->in(MemNode::Address);
} else {
assert(mem->Opcode() == Op_EncodeISOArray, "sanity");
adr = mem->in(3); // Memory edge corresponds to destination array
}
const Type *at = igvn->type(adr);
if (at != Type::TOP) {
assert (at->isa_ptr() != NULL, "pointer type required.");
int idx = C->get_alias_index(at->is_ptr());
assert(idx != alias_idx, "Object is not scalar replaceable if a LoadStore node access its field");
break;
}
result = mem->in(MemNode::Memory);
}
}
if (result->is_Phi()) {
PhiNode *mphi = result->as_Phi();
assert(mphi->bottom_type() == Type::MEMORY, "memory phi required");
const TypePtr *t = mphi->adr_type();
if (!is_instance) {
// Push all non-instance Phis on the orig_phis worklist to update inputs
// during Phase 4 if needed.
orig_phis.append_if_missing(mphi);
} else if (C->get_alias_index(t) != alias_idx) {
// Create a new Phi with the specified alias index type.
result = split_memory_phi(mphi, alias_idx, orig_phis);
}
}
// the result is either MemNode, PhiNode, InitializeNode.
return result;
}
//
// Convert the types of unescaped object to instance types where possible,
// propagate the new type information through the graph, and update memory
// edges and MergeMem inputs to reflect the new type.
//
// We start with allocations (and calls which may be allocations) on alloc_worklist.
// The processing is done in 4 phases:
//
// Phase 1: Process possible allocations from alloc_worklist. Create instance
// types for the CheckCastPP for allocations where possible.
// Propagate the the new types through users as follows:
// casts and Phi: push users on alloc_worklist
// AddP: cast Base and Address inputs to the instance type
// push any AddP users on alloc_worklist and push any memnode
// users onto memnode_worklist.
// Phase 2: Process MemNode's from memnode_worklist. compute new address type and
// search the Memory chain for a store with the appropriate type
// address type. If a Phi is found, create a new version with
// the appropriate memory slices from each of the Phi inputs.
// For stores, process the users as follows:
// MemNode: push on memnode_worklist
// MergeMem: push on mergemem_worklist
// Phase 3: Process MergeMem nodes from mergemem_worklist. Walk each memory slice
// moving the first node encountered of each instance type to the
// the input corresponding to its alias index.
// appropriate memory slice.
// Phase 4: Update the inputs of non-instance memory Phis and the Memory input of memnodes.
//
// In the following example, the CheckCastPP nodes are the cast of allocation
// results and the allocation of node 29 is unescaped and eligible to be an
// instance type.
//
// We start with:
//
// 7 Parm #memory
// 10 ConI "12"
// 19 CheckCastPP "Foo"
// 20 AddP _ 19 19 10 Foo+12 alias_index=4
// 29 CheckCastPP "Foo"
// 30 AddP _ 29 29 10 Foo+12 alias_index=4
//
// 40 StoreP 25 7 20 ... alias_index=4
// 50 StoreP 35 40 30 ... alias_index=4
// 60 StoreP 45 50 20 ... alias_index=4
// 70 LoadP _ 60 30 ... alias_index=4
// 80 Phi 75 50 60 Memory alias_index=4
// 90 LoadP _ 80 30 ... alias_index=4
// 100 LoadP _ 80 20 ... alias_index=4
//
//
// Phase 1 creates an instance type for node 29 assigning it an instance id of 24
// and creating a new alias index for node 30. This gives:
//
// 7 Parm #memory
// 10 ConI "12"
// 19 CheckCastPP "Foo"
// 20 AddP _ 19 19 10 Foo+12 alias_index=4
// 29 CheckCastPP "Foo" iid=24
// 30 AddP _ 29 29 10 Foo+12 alias_index=6 iid=24
//
// 40 StoreP 25 7 20 ... alias_index=4
// 50 StoreP 35 40 30 ... alias_index=6
// 60 StoreP 45 50 20 ... alias_index=4
// 70 LoadP _ 60 30 ... alias_index=6
// 80 Phi 75 50 60 Memory alias_index=4
// 90 LoadP _ 80 30 ... alias_index=6
// 100 LoadP _ 80 20 ... alias_index=4
//
// In phase 2, new memory inputs are computed for the loads and stores,
// And a new version of the phi is created. In phase 4, the inputs to
// node 80 are updated and then the memory nodes are updated with the
// values computed in phase 2. This results in:
//
// 7 Parm #memory
// 10 ConI "12"
// 19 CheckCastPP "Foo"
// 20 AddP _ 19 19 10 Foo+12 alias_index=4
// 29 CheckCastPP "Foo" iid=24
// 30 AddP _ 29 29 10 Foo+12 alias_index=6 iid=24
//
// 40 StoreP 25 7 20 ... alias_index=4
// 50 StoreP 35 7 30 ... alias_index=6
// 60 StoreP 45 40 20 ... alias_index=4
// 70 LoadP _ 50 30 ... alias_index=6
// 80 Phi 75 40 60 Memory alias_index=4
// 120 Phi 75 50 50 Memory alias_index=6
// 90 LoadP _ 120 30 ... alias_index=6
// 100 LoadP _ 80 20 ... alias_index=4
//
void ConnectionGraph::split_unique_types(GrowableArray<Node *> &alloc_worklist) {
GrowableArray<Node *> memnode_worklist;
GrowableArray<PhiNode *> orig_phis;
PhaseIterGVN *igvn = _igvn;
uint new_index_start = (uint) _compile->num_alias_types();
Arena* arena = Thread::current()->resource_area();
VectorSet visited(arena);
ideal_nodes.clear(); // Reset for use with set_map/get_map.
uint unique_old = _compile->unique();
// Phase 1: Process possible allocations from alloc_worklist.
// Create instance types for the CheckCastPP for allocations where possible.
//
// (Note: don't forget to change the order of the second AddP node on
// the alloc_worklist if the order of the worklist processing is changed,
// see the comment in find_second_addp().)
//
while (alloc_worklist.length() != 0) {
Node *n = alloc_worklist.pop();
uint ni = n->_idx;
if (n->is_Call()) {
CallNode *alloc = n->as_Call();
// copy escape information to call node
PointsToNode* ptn = ptnode_adr(alloc->_idx);
PointsToNode::EscapeState es = ptn->escape_state();
// We have an allocation or call which returns a Java object,
// see if it is unescaped.
if (es != PointsToNode::NoEscape || !ptn->scalar_replaceable())
continue;
// Find CheckCastPP for the allocate or for the return value of a call
n = alloc->result_cast();
if (n == NULL) { // No uses except Initialize node
if (alloc->is_Allocate()) {
// Set the scalar_replaceable flag for allocation
// so it could be eliminated if it has no uses.
alloc->as_Allocate()->_is_scalar_replaceable = true;
}
if (alloc->is_CallStaticJava()) {
// Set the scalar_replaceable flag for boxing method
// so it could be eliminated if it has no uses.
alloc->as_CallStaticJava()->_is_scalar_replaceable = true;
}
continue;
}
if (!n->is_CheckCastPP()) { // not unique CheckCastPP.
assert(!alloc->is_Allocate(), "allocation should have unique type");
continue;
}
// The inline code for Object.clone() casts the allocation result to
// java.lang.Object and then to the actual type of the allocated
// object. Detect this case and use the second cast.
// Also detect j.l.reflect.Array.newInstance(jobject, jint) case when
// the allocation result is cast to java.lang.Object and then
// to the actual Array type.
if (alloc->is_Allocate() && n->as_Type()->type() == TypeInstPtr::NOTNULL
&& (alloc->is_AllocateArray() ||
igvn->type(alloc->in(AllocateNode::KlassNode)) != TypeKlassPtr::OBJECT)) {
Node *cast2 = NULL;
for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
Node *use = n->fast_out(i);
if (use->is_CheckCastPP()) {
cast2 = use;
break;
}
}
if (cast2 != NULL) {
n = cast2;
} else {
// Non-scalar replaceable if the allocation type is unknown statically
// (reflection allocation), the object can't be restored during
// deoptimization without precise type.
continue;
}
}
const TypeOopPtr *t = igvn->type(n)->isa_oopptr();
if (t == NULL)
continue; // not a TypeOopPtr
if (!t->klass_is_exact())
continue; // not an unique type
if (alloc->is_Allocate()) {
// Set the scalar_replaceable flag for allocation
// so it could be eliminated.
alloc->as_Allocate()->_is_scalar_replaceable = true;
}
if (alloc->is_CallStaticJava()) {
// Set the scalar_replaceable flag for boxing method
// so it could be eliminated.
alloc->as_CallStaticJava()->_is_scalar_replaceable = true;
}
set_escape_state(ptnode_adr(n->_idx), es); // CheckCastPP escape state
// in order for an object to be scalar-replaceable, it must be:
// - a direct allocation (not a call returning an object)
// - non-escaping
// - eligible to be a unique type
// - not determined to be ineligible by escape analysis
set_map(alloc, n);
set_map(n, alloc);
const TypeOopPtr* tinst = t->cast_to_instance_id(ni);
igvn->hash_delete(n);
igvn->set_type(n, tinst);
n->raise_bottom_type(tinst);
igvn->hash_insert(n);
record_for_optimizer(n);
if (alloc->is_Allocate() && (t->isa_instptr() || t->isa_aryptr())) {
// First, put on the worklist all Field edges from Connection Graph
// which is more accurate then putting immediate users from Ideal Graph.
for (EdgeIterator e(ptn); e.has_next(); e.next()) {
PointsToNode* tgt = e.get();
Node* use = tgt->ideal_node();
assert(tgt->is_Field() && use->is_AddP(),
"only AddP nodes are Field edges in CG");
if (use->outcnt() > 0) { // Don't process dead nodes
Node* addp2 = find_second_addp(use, use->in(AddPNode::Base));
if (addp2 != NULL) {
assert(alloc->is_AllocateArray(),"array allocation was expected");
alloc_worklist.append_if_missing(addp2);
}
alloc_worklist.append_if_missing(use);
}
}
// An allocation may have an Initialize which has raw stores. Scan
// the users of the raw allocation result and push AddP users
// on alloc_worklist.
Node *raw_result = alloc->proj_out(TypeFunc::Parms);
assert (raw_result != NULL, "must have an allocation result");
for (DUIterator_Fast imax, i = raw_result->fast_outs(imax); i < imax; i++) {
Node *use = raw_result->fast_out(i);
if (use->is_AddP() && use->outcnt() > 0) { // Don't process dead nodes
Node* addp2 = find_second_addp(use, raw_result);
if (addp2 != NULL) {
assert(alloc->is_AllocateArray(),"array allocation was expected");
alloc_worklist.append_if_missing(addp2);
}
alloc_worklist.append_if_missing(use);
} else if (use->is_MemBar()) {
memnode_worklist.append_if_missing(use);
}
}
}
} else if (n->is_AddP()) {
JavaObjectNode* jobj = unique_java_object(get_addp_base(n));
if (jobj == NULL || jobj == phantom_obj) {
#ifdef ASSERT
ptnode_adr(get_addp_base(n)->_idx)->dump();
ptnode_adr(n->_idx)->dump();
assert(jobj != NULL && jobj != phantom_obj, "escaped allocation");
#endif
_compile->record_failure(C2Compiler::retry_no_escape_analysis());
return;
}
Node *base = get_map(jobj->idx()); // CheckCastPP node
if (!split_AddP(n, base)) continue; // wrong type from dead path
} else if (n->is_Phi() ||
n->is_CheckCastPP() ||
n->is_EncodeP() ||
n->is_DecodeN() ||
(n->is_ConstraintCast() && n->Opcode() == Op_CastPP)) {
if (visited.test_set(n->_idx)) {
assert(n->is_Phi(), "loops only through Phi's");
continue; // already processed
}
JavaObjectNode* jobj = unique_java_object(n);
if (jobj == NULL || jobj == phantom_obj) {
#ifdef ASSERT
ptnode_adr(n->_idx)->dump();
assert(jobj != NULL && jobj != phantom_obj, "escaped allocation");
#endif
_compile->record_failure(C2Compiler::retry_no_escape_analysis());
return;
} else {
Node *val = get_map(jobj->idx()); // CheckCastPP node
TypeNode *tn = n->as_Type();
const TypeOopPtr* tinst = igvn->type(val)->isa_oopptr();
assert(tinst != NULL && tinst->is_known_instance() &&
tinst->instance_id() == jobj->idx() , "instance type expected.");
const Type *tn_type = igvn->type(tn);
const TypeOopPtr *tn_t;
if (tn_type->isa_narrowoop()) {
tn_t = tn_type->make_ptr()->isa_oopptr();
} else {
tn_t = tn_type->isa_oopptr();
}
if (tn_t != NULL && tinst->klass()->is_subtype_of(tn_t->klass())) {
if (tn_type->isa_narrowoop()) {
tn_type = tinst->make_narrowoop();
} else {
tn_type = tinst;
}
igvn->hash_delete(tn);
igvn->set_type(tn, tn_type);
tn->set_type(tn_type);
igvn->hash_insert(tn);
record_for_optimizer(n);
} else {
assert(tn_type == TypePtr::NULL_PTR ||
tn_t != NULL && !tinst->klass()->is_subtype_of(tn_t->klass()),
"unexpected type");
continue; // Skip dead path with different type
}
}
} else {
debug_only(n->dump();)
assert(false, "EA: unexpected node");
continue;
}
// push allocation's users on appropriate worklist
for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
Node *use = n->fast_out(i);
if(use->is_Mem() && use->in(MemNode::Address) == n) {
// Load/store to instance's field
memnode_worklist.append_if_missing(use);
} else if (use->is_MemBar()) {
if (use->in(TypeFunc::Memory) == n) { // Ignore precedent edge
memnode_worklist.append_if_missing(use);
}
} else if (use->is_AddP() && use->outcnt() > 0) { // No dead nodes
Node* addp2 = find_second_addp(use, n);
if (addp2 != NULL) {
alloc_worklist.append_if_missing(addp2);
}
alloc_worklist.append_if_missing(use);
} else if (use->is_Phi() ||
use->is_CheckCastPP() ||
use->is_EncodeNarrowPtr() ||
use->is_DecodeNarrowPtr() ||
(use->is_ConstraintCast() && use->Opcode() == Op_CastPP)) {
alloc_worklist.append_if_missing(use);
#ifdef ASSERT
} else if (use->is_Mem()) {
assert(use->in(MemNode::Address) != n, "EA: missing allocation reference path");
} else if (use->is_MergeMem()) {
assert(_mergemem_worklist.contains(use->as_MergeMem()), "EA: missing MergeMem node in the worklist");
} else if (use->is_SafePoint()) {
// Look for MergeMem nodes for calls which reference unique allocation
// (through CheckCastPP nodes) even for debug info.
Node* m = use->in(TypeFunc::Memory);
if (m->is_MergeMem()) {
assert(_mergemem_worklist.contains(m->as_MergeMem()), "EA: missing MergeMem node in the worklist");
}
} else if (use->Opcode() == Op_EncodeISOArray) {
if (use->in(MemNode::Memory) == n || use->in(3) == n) {
// EncodeISOArray overwrites destination array
memnode_worklist.append_if_missing(use);
}
} else {
uint op = use->Opcode();
if (!(op == Op_CmpP || op == Op_Conv2B ||
op == Op_CastP2X || op == Op_StoreCM ||
op == Op_FastLock || op == Op_AryEq || op == Op_StrComp ||
op == Op_StrEquals || op == Op_StrIndexOf)) {
n->dump();
use->dump();
assert(false, "EA: missing allocation reference path");
}
#endif
}
}
}
// New alias types were created in split_AddP().
uint new_index_end = (uint) _compile->num_alias_types();
assert(unique_old == _compile->unique(), "there should be no new ideal nodes after Phase 1");
// Phase 2: Process MemNode's from memnode_worklist. compute new address type and
// compute new values for Memory inputs (the Memory inputs are not
// actually updated until phase 4.)
if (memnode_worklist.length() == 0)
return; // nothing to do
while (memnode_worklist.length() != 0) {
Node *n = memnode_worklist.pop();
if (visited.test_set(n->_idx))
continue;
if (n->is_Phi() || n->is_ClearArray()) {
// we don't need to do anything, but the users must be pushed
} else if (n->is_MemBar()) { // Initialize, MemBar nodes
// we don't need to do anything, but the users must be pushed
n = n->as_MemBar()->proj_out(TypeFunc::Memory);
if (n == NULL)
continue;
} else if (n->Opcode() == Op_EncodeISOArray) {
// get the memory projection
for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
Node *use = n->fast_out(i);
if (use->Opcode() == Op_SCMemProj) {
n = use;
break;
}
}
assert(n->Opcode() == Op_SCMemProj, "memory projection required");
} else {
assert(n->is_Mem(), "memory node required.");
Node *addr = n->in(MemNode::Address);
const Type *addr_t = igvn->type(addr);
if (addr_t == Type::TOP)
continue;
assert (addr_t->isa_ptr() != NULL, "pointer type required.");
int alias_idx = _compile->get_alias_index(addr_t->is_ptr());
assert ((uint)alias_idx < new_index_end, "wrong alias index");
Node *mem = find_inst_mem(n->in(MemNode::Memory), alias_idx, orig_phis);
if (_compile->failing()) {
return;
}
if (mem != n->in(MemNode::Memory)) {
// We delay the memory edge update since we need old one in
// MergeMem code below when instances memory slices are separated.
set_map(n, mem);
}
if (n->is_Load()) {
continue; // don't push users
} else if (n->is_LoadStore()) {
// get the memory projection
for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
Node *use = n->fast_out(i);
if (use->Opcode() == Op_SCMemProj) {
n = use;
break;
}
}
assert(n->Opcode() == Op_SCMemProj, "memory projection required");
}
}
// push user on appropriate worklist
for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
Node *use = n->fast_out(i);
if (use->is_Phi() || use->is_ClearArray()) {
memnode_worklist.append_if_missing(use);
} else if (use->is_Mem() && use->in(MemNode::Memory) == n) {
if (use->Opcode() == Op_StoreCM) // Ignore cardmark stores
continue;
memnode_worklist.append_if_missing(use);
} else if (use->is_MemBar()) {
if (use->in(TypeFunc::Memory) == n) { // Ignore precedent edge
memnode_worklist.append_if_missing(use);
}
#ifdef ASSERT
} else if(use->is_Mem()) {
assert(use->in(MemNode::Memory) != n, "EA: missing memory path");
} else if (use->is_MergeMem()) {
assert(_mergemem_worklist.contains(use->as_MergeMem()), "EA: missing MergeMem node in the worklist");
} else if (use->Opcode() == Op_EncodeISOArray) {
if (use->in(MemNode::Memory) == n || use->in(3) == n) {
// EncodeISOArray overwrites destination array
memnode_worklist.append_if_missing(use);
}
} else {
uint op = use->Opcode();
if (!(op == Op_StoreCM ||
(op == Op_CallLeaf && use->as_CallLeaf()->_name != NULL &&
strcmp(use->as_CallLeaf()->_name, "g1_wb_pre") == 0) ||
op == Op_AryEq || op == Op_StrComp ||
op == Op_StrEquals || op == Op_StrIndexOf)) {
n->dump();
use->dump();
assert(false, "EA: missing memory path");
}
#endif
}
}
}
// Phase 3: Process MergeMem nodes from mergemem_worklist.
// Walk each memory slice moving the first node encountered of each
// instance type to the the input corresponding to its alias index.
uint length = _mergemem_worklist.length();
for( uint next = 0; next < length; ++next ) {
MergeMemNode* nmm = _mergemem_worklist.at(next);
assert(!visited.test_set(nmm->_idx), "should not be visited before");
// Note: we don't want to use MergeMemStream here because we only want to
// scan inputs which exist at the start, not ones we add during processing.
// Note 2: MergeMem may already contains instance memory slices added
// during find_inst_mem() call when memory nodes were processed above.
igvn->hash_delete(nmm);
uint nslices = MIN2(nmm->req(), new_index_start);
for (uint i = Compile::AliasIdxRaw+1; i < nslices; i++) {
Node* mem = nmm->in(i);
Node* cur = NULL;
if (mem == NULL || mem->is_top())
continue;
// First, update mergemem by moving memory nodes to corresponding slices
// if their type became more precise since this mergemem was created.
while (mem->is_Mem()) {
const Type *at = igvn->type(mem->in(MemNode::Address));
if (at != Type::TOP) {
assert (at->isa_ptr() != NULL, "pointer type required.");
uint idx = (uint)_compile->get_alias_index(at->is_ptr());
if (idx == i) {
if (cur == NULL)
cur = mem;
} else {
if (idx >= nmm->req() || nmm->is_empty_memory(nmm->in(idx))) {
nmm->set_memory_at(idx, mem);
}
}
}
mem = mem->in(MemNode::Memory);
}
nmm->set_memory_at(i, (cur != NULL) ? cur : mem);
// Find any instance of the current type if we haven't encountered
// already a memory slice of the instance along the memory chain.
for (uint ni = new_index_start; ni < new_index_end; ni++) {
if((uint)_compile->get_general_index(ni) == i) {
Node *m = (ni >= nmm->req()) ? nmm->empty_memory() : nmm->in(ni);
if (nmm->is_empty_memory(m)) {
Node* result = find_inst_mem(mem, ni, orig_phis);
if (_compile->failing()) {
return;
}
nmm->set_memory_at(ni, result);
}
}
}
}
// Find the rest of instances values
for (uint ni = new_index_start; ni < new_index_end; ni++) {
const TypeOopPtr *tinst = _compile->get_adr_type(ni)->isa_oopptr();
Node* result = step_through_mergemem(nmm, ni, tinst);
if (result == nmm->base_memory()) {
// Didn't find instance memory, search through general slice recursively.
result = nmm->memory_at(_compile->get_general_index(ni));
result = find_inst_mem(result, ni, orig_phis);
if (_compile->failing()) {
return;
}
nmm->set_memory_at(ni, result);
}
}
igvn->hash_insert(nmm);
record_for_optimizer(nmm);
}
// Phase 4: Update the inputs of non-instance memory Phis and
// the Memory input of memnodes
// First update the inputs of any non-instance Phi's from
// which we split out an instance Phi. Note we don't have
// to recursively process Phi's encounted on the input memory
// chains as is done in split_memory_phi() since they will
// also be processed here.
for (int j = 0; j < orig_phis.length(); j++) {
PhiNode *phi = orig_phis.at(j);
int alias_idx = _compile->get_alias_index(phi->adr_type());
igvn->hash_delete(phi);
for (uint i = 1; i < phi->req(); i++) {
Node *mem = phi->in(i);
Node *new_mem = find_inst_mem(mem, alias_idx, orig_phis);
if (_compile->failing()) {
return;
}
if (mem != new_mem) {
phi->set_req(i, new_mem);
}
}
igvn->hash_insert(phi);
record_for_optimizer(phi);
}
// Update the memory inputs of MemNodes with the value we computed
// in Phase 2 and move stores memory users to corresponding memory slices.
// Disable memory split verification code until the fix for 6984348.
// Currently it produces false negative results since it does not cover all cases.
#if 0 // ifdef ASSERT
visited.Reset();
Node_Stack old_mems(arena, _compile->unique() >> 2);
#endif
for (uint i = 0; i < ideal_nodes.size(); i++) {
Node* n = ideal_nodes.at(i);
Node* nmem = get_map(n->_idx);
assert(nmem != NULL, "sanity");
if (n->is_Mem()) {
#if 0 // ifdef ASSERT
Node* old_mem = n->in(MemNode::Memory);
if (!visited.test_set(old_mem->_idx)) {
old_mems.push(old_mem, old_mem->outcnt());
}
#endif
assert(n->in(MemNode::Memory) != nmem, "sanity");
if (!n->is_Load()) {
// Move memory users of a store first.
move_inst_mem(n, orig_phis);
}
// Now update memory input
igvn->hash_delete(n);
n->set_req(MemNode::Memory, nmem);
igvn->hash_insert(n);
record_for_optimizer(n);
} else {
assert(n->is_Allocate() || n->is_CheckCastPP() ||
n->is_AddP() || n->is_Phi(), "unknown node used for set_map()");
}
}
#if 0 // ifdef ASSERT
// Verify that memory was split correctly
while (old_mems.is_nonempty()) {
Node* old_mem = old_mems.node();
uint old_cnt = old_mems.index();
old_mems.pop();
assert(old_cnt == old_mem->outcnt(), "old mem could be lost");
}
#endif
}
#ifndef PRODUCT
static const char *node_type_names[] = {
"UnknownType",
"JavaObject",
"LocalVar",
"Field",
"Arraycopy"
};
static const char *esc_names[] = {
"UnknownEscape",
"NoEscape",
"ArgEscape",
"GlobalEscape"
};
void PointsToNode::dump(bool print_state) const {
NodeType nt = node_type();
tty->print("%s ", node_type_names[(int) nt]);
if (print_state) {
EscapeState es = escape_state();
EscapeState fields_es = fields_escape_state();
tty->print("%s(%s) ", esc_names[(int)es], esc_names[(int)fields_es]);
if (nt == PointsToNode::JavaObject && !this->scalar_replaceable())
tty->print("NSR ");
}
if (is_Field()) {
FieldNode* f = (FieldNode*)this;
if (f->is_oop())
tty->print("oop ");
if (f->offset() > 0)
tty->print("+%d ", f->offset());
tty->print("(");
for (BaseIterator i(f); i.has_next(); i.next()) {
PointsToNode* b = i.get();
tty->print(" %d%s", b->idx(),(b->is_JavaObject() ? "P" : ""));
}
tty->print(" )");
}
tty->print("[");
for (EdgeIterator i(this); i.has_next(); i.next()) {
PointsToNode* e = i.get();
tty->print(" %d%s%s", e->idx(),(e->is_JavaObject() ? "P" : (e->is_Field() ? "F" : "")), e->is_Arraycopy() ? "cp" : "");
}
tty->print(" [");
for (UseIterator i(this); i.has_next(); i.next()) {
PointsToNode* u = i.get();
bool is_base = false;
if (PointsToNode::is_base_use(u)) {
is_base = true;
u = PointsToNode::get_use_node(u)->as_Field();
}
tty->print(" %d%s%s", u->idx(), is_base ? "b" : "", u->is_Arraycopy() ? "cp" : "");
}
tty->print(" ]] ");
if (_node == NULL)
tty->print_cr("<null>");
else
_node->dump();
}
void ConnectionGraph::dump(GrowableArray<PointsToNode*>& ptnodes_worklist) {
bool first = true;
int ptnodes_length = ptnodes_worklist.length();
for (int i = 0; i < ptnodes_length; i++) {
PointsToNode *ptn = ptnodes_worklist.at(i);
if (ptn == NULL || !ptn->is_JavaObject())
continue;
PointsToNode::EscapeState es = ptn->escape_state();
if ((es != PointsToNode::NoEscape) && !Verbose) {
continue;
}
Node* n = ptn->ideal_node();
if (n->is_Allocate() || (n->is_CallStaticJava() &&
n->as_CallStaticJava()->is_boxing_method())) {
if (first) {
tty->cr();
tty->print("======== Connection graph for ");
_compile->method()->print_short_name();
tty->cr();
first = false;
}
ptn->dump();
// Print all locals and fields which reference this allocation
for (UseIterator j(ptn); j.has_next(); j.next()) {
PointsToNode* use = j.get();
if (use->is_LocalVar()) {
use->dump(Verbose);
} else if (Verbose) {
use->dump();
}
}
tty->cr();
}
}
}
#endif
C:\hotspot-69087d08d473\src\share\vm/opto/escape.hpp
/*
* Copyright (c) 2005, 2012, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
#ifndef SHARE_VM_OPTO_ESCAPE_HPP
#define SHARE_VM_OPTO_ESCAPE_HPP
#include "opto/addnode.hpp"
#include "opto/node.hpp"
#include "utilities/growableArray.hpp"
//
// Adaptation for C2 of the escape analysis algorithm described in:
//
// [Choi99] Jong-Deok Shoi, Manish Gupta, Mauricio Seffano,
// Vugranam C. Sreedhar, Sam Midkiff,
// "Escape Analysis for Java", Procedings of ACM SIGPLAN
// OOPSLA Conference, November 1, 1999
//
// The flow-insensitive analysis described in the paper has been implemented.
//
// The analysis requires construction of a "connection graph" (CG) for
// the method being analyzed. The nodes of the connection graph are:
//
// - Java objects (JO)
// - Local variables (LV)
// - Fields of an object (OF), these also include array elements
//
// The CG contains 3 types of edges:
//
// - PointsTo (-P>) {LV, OF} to JO
// - Deferred (-D>) from {LV, OF} to {LV, OF}
// - Field (-F>) from JO to OF
//
// The following utility functions is used by the algorithm:
//
// PointsTo(n) - n is any CG node, it returns the set of JO that n could
// point to.
//
// The algorithm describes how to construct the connection graph
// in the following 4 cases:
//
// Case Edges Created
//
// (1) p = new T() LV -P> JO
// (2) p = q LV -D> LV
// (3) p.f = q JO -F> OF, OF -D> LV
// (4) p = q.f JO -F> OF, LV -D> OF
//
// In all these cases, p and q are local variables. For static field
// references, we can construct a local variable containing a reference
// to the static memory.
//
// C2 does not have local variables. However for the purposes of constructing
// the connection graph, the following IR nodes are treated as local variables:
// Phi (pointer values)
// LoadP, LoadN
// Proj#5 (value returned from callnodes including allocations)
// CheckCastPP, CastPP
//
// The LoadP, Proj and CheckCastPP behave like variables assigned to only once.
// Only a Phi can have multiple assignments. Each input to a Phi is treated
// as an assignment to it.
//
// The following node types are JavaObject:
//
// phantom_object (general globally escaped object)
// Allocate
// AllocateArray
// Parm (for incoming arguments)
// CastX2P ("unsafe" operations)
// CreateEx
// ConP
// LoadKlass
// ThreadLocal
// CallStaticJava (which returns Object)
//
// AddP nodes are fields.
//
// After building the graph, a pass is made over the nodes, deleting deferred
// nodes and copying the edges from the target of the deferred edge to the
// source. This results in a graph with no deferred edges, only:
//
// LV -P> JO
// OF -P> JO (the object whose oop is stored in the field)
// JO -F> OF
//
// Then, for each node which is GlobalEscape, anything it could point to
// is marked GlobalEscape. Finally, for any node marked ArgEscape, anything
// it could point to is marked ArgEscape.
//
class Compile;
class Node;
class CallNode;
class PhiNode;
class PhaseTransform;
class PointsToNode;
class Type;
class TypePtr;
class VectorSet;
class JavaObjectNode;
class LocalVarNode;
class FieldNode;
class ArraycopyNode;
class ConnectionGraph;
// ConnectionGraph nodes
class PointsToNode : public ResourceObj {
GrowableArray<PointsToNode*> _edges; // List of nodes this node points to
GrowableArray<PointsToNode*> _uses; // List of nodes which point to this node
const u1 _type; // NodeType
u1 _flags; // NodeFlags
u1 _escape; // EscapeState of object
u1 _fields_escape; // EscapeState of object's fields
Node* const _node; // Ideal node corresponding to this PointsTo node.
const int _idx; // Cached ideal node's _idx
const uint _pidx; // Index of this node
public:
typedef enum {
UnknownType = 0,
JavaObject = 1,
LocalVar = 2,
Field = 3,
Arraycopy = 4
} NodeType;
typedef enum {
UnknownEscape = 0,
NoEscape = 1, // An object does not escape method or thread and it is
// not passed to call. It could be replaced with scalar.
ArgEscape = 2, // An object does not escape method or thread but it is
// passed as argument to call or referenced by argument
// and it does not escape during call.
GlobalEscape = 3 // An object escapes the method or thread.
} EscapeState;
typedef enum {
ScalarReplaceable = 1, // Not escaped object could be replaced with scalar
PointsToUnknown = 2, // Has edge to phantom_object
ArraycopySrc = 4, // Has edge from Arraycopy node
ArraycopyDst = 8 // Has edge to Arraycopy node
} NodeFlags;
inline PointsToNode(ConnectionGraph* CG, Node* n, EscapeState es, NodeType type);
uint pidx() const { return _pidx; }
Node* ideal_node() const { return _node; }
int idx() const { return _idx; }
bool is_JavaObject() const { return _type == (u1)JavaObject; }
bool is_LocalVar() const { return _type == (u1)LocalVar; }
bool is_Field() const { return _type == (u1)Field; }
bool is_Arraycopy() const { return _type == (u1)Arraycopy; }
JavaObjectNode* as_JavaObject() { assert(is_JavaObject(),""); return (JavaObjectNode*)this; }
LocalVarNode* as_LocalVar() { assert(is_LocalVar(),""); return (LocalVarNode*)this; }
FieldNode* as_Field() { assert(is_Field(),""); return (FieldNode*)this; }
ArraycopyNode* as_Arraycopy() { assert(is_Arraycopy(),""); return (ArraycopyNode*)this; }
EscapeState escape_state() const { return (EscapeState)_escape; }
void set_escape_state(EscapeState state) { _escape = (u1)state; }
EscapeState fields_escape_state() const { return (EscapeState)_fields_escape; }
void set_fields_escape_state(EscapeState state) { _fields_escape = (u1)state; }
bool has_unknown_ptr() const { return (_flags & PointsToUnknown) != 0; }
void set_has_unknown_ptr() { _flags |= PointsToUnknown; }
bool arraycopy_src() const { return (_flags & ArraycopySrc) != 0; }
void set_arraycopy_src() { _flags |= ArraycopySrc; }
bool arraycopy_dst() const { return (_flags & ArraycopyDst) != 0; }
void set_arraycopy_dst() { _flags |= ArraycopyDst; }
bool scalar_replaceable() const { return (_flags & ScalarReplaceable) != 0;}
void set_scalar_replaceable(bool v) {
if (v)
_flags |= ScalarReplaceable;
else
_flags &= ~ScalarReplaceable;
}
int edge_count() const { return _edges.length(); }
PointsToNode* edge(int e) const { return _edges.at(e); }
bool add_edge(PointsToNode* edge) { return _edges.append_if_missing(edge); }
int use_count() const { return _uses.length(); }
PointsToNode* use(int e) const { return _uses.at(e); }
bool add_use(PointsToNode* use) { return _uses.append_if_missing(use); }
// Mark base edge use to distinguish from stored value edge.
bool add_base_use(FieldNode* use) { return _uses.append_if_missing((PointsToNode*)((intptr_t)use + 1)); }
static bool is_base_use(PointsToNode* use) { return (((intptr_t)use) & 1); }
static PointsToNode* get_use_node(PointsToNode* use) { return (PointsToNode*)(((intptr_t)use) & ~1); }
// Return true if this node points to specified node or nodes it points to.
bool points_to(JavaObjectNode* ptn) const;
// Return true if this node points only to non-escaping allocations.
bool non_escaping_allocation();
// Return true if one node points to an other.
bool meet(PointsToNode* ptn);
#ifndef PRODUCT
NodeType node_type() const { return (NodeType)_type;}
void dump(bool print_state=true) const;
#endif
};
class LocalVarNode: public PointsToNode {
public:
LocalVarNode(ConnectionGraph *CG, Node* n, EscapeState es):
PointsToNode(CG, n, es, LocalVar) {}
};
class JavaObjectNode: public PointsToNode {
public:
JavaObjectNode(ConnectionGraph *CG, Node* n, EscapeState es):
PointsToNode(CG, n, es, JavaObject) {
if (es > NoEscape)
set_scalar_replaceable(false);
}
};
class FieldNode: public PointsToNode {
GrowableArray<PointsToNode*> _bases; // List of JavaObject nodes which point to this node
const int _offset; // Field's offset.
const bool _is_oop; // Field points to object
bool _has_unknown_base; // Has phantom_object base
public:
FieldNode(ConnectionGraph *CG, Node* n, EscapeState es, int offs, bool is_oop):
PointsToNode(CG, n, es, Field),
_offset(offs), _is_oop(is_oop),
_has_unknown_base(false) {}
int offset() const { return _offset;}
bool is_oop() const { return _is_oop;}
bool has_unknown_base() const { return _has_unknown_base; }
void set_has_unknown_base() { _has_unknown_base = true; }
int base_count() const { return _bases.length(); }
PointsToNode* base(int e) const { return _bases.at(e); }
bool add_base(PointsToNode* base) { return _bases.append_if_missing(base); }
#ifdef ASSERT
// Return true if bases points to this java object.
bool has_base(JavaObjectNode* ptn) const;
#endif
};
class ArraycopyNode: public PointsToNode {
public:
ArraycopyNode(ConnectionGraph *CG, Node* n, EscapeState es):
PointsToNode(CG, n, es, Arraycopy) {}
};
// Iterators for PointsTo node's edges:
// for (EdgeIterator i(n); i.has_next(); i.next()) {
// PointsToNode* u = i.get();
class PointsToIterator: public StackObj {
protected:
const PointsToNode* node;
const int cnt;
int i;
public:
inline PointsToIterator(const PointsToNode* n, int cnt) : node(n), cnt(cnt), i(0) { }
inline bool has_next() const { return i < cnt; }
inline void next() { i++; }
PointsToNode* get() const { ShouldNotCallThis(); return NULL; }
};
class EdgeIterator: public PointsToIterator {
public:
inline EdgeIterator(const PointsToNode* n) : PointsToIterator(n, n->edge_count()) { }
inline PointsToNode* get() const { return node->edge(i); }
};
class UseIterator: public PointsToIterator {
public:
inline UseIterator(const PointsToNode* n) : PointsToIterator(n, n->use_count()) { }
inline PointsToNode* get() const { return node->use(i); }
};
class BaseIterator: public PointsToIterator {
public:
inline BaseIterator(const FieldNode* n) : PointsToIterator(n, n->base_count()) { }
inline PointsToNode* get() const { return ((PointsToNode*)node)->as_Field()->base(i); }
};
class ConnectionGraph: public ResourceObj {
friend class PointsToNode;
private:
GrowableArray<PointsToNode*> _nodes; // Map from ideal nodes to
// ConnectionGraph nodes.
GrowableArray<PointsToNode*> _worklist; // Nodes to be processed
VectorSet _in_worklist;
uint _next_pidx;
bool _collecting; // Indicates whether escape information
// is still being collected. If false,
// no new nodes will be processed.
bool _verify; // verify graph
JavaObjectNode* phantom_obj; // Unknown object
JavaObjectNode* null_obj;
Node* _pcmp_neq; // ConI(#CC_GT)
Node* _pcmp_eq; // ConI(#CC_EQ)
Compile* _compile; // Compile object for current compilation
PhaseIterGVN* _igvn; // Value numbering
Unique_Node_List ideal_nodes; // Used by CG construction and types splitting.
// Address of an element in _nodes. Used when the element is to be modified
PointsToNode* ptnode_adr(int idx) const {
// There should be no new ideal nodes during ConnectionGraph build,
// growableArray::at() will throw assert otherwise.
return _nodes.at(idx);
}
uint nodes_size() const { return _nodes.length(); }
uint next_pidx() { return _next_pidx++; }
// Add nodes to ConnectionGraph.
void add_local_var(Node* n, PointsToNode::EscapeState es);
void add_java_object(Node* n, PointsToNode::EscapeState es);
void add_field(Node* n, PointsToNode::EscapeState es, int offset);
void add_arraycopy(Node* n, PointsToNode::EscapeState es, PointsToNode* src, PointsToNode* dst);
// Compute the escape state for arguments to a call.
void process_call_arguments(CallNode *call);
// Add PointsToNode node corresponding to a call
void add_call_node(CallNode* call);
// Map ideal node to existing PointsTo node (usually phantom_object).
void map_ideal_node(Node *n, PointsToNode* ptn) {
assert(ptn != NULL, "only existing PointsTo node");
_nodes.at_put(n->_idx, ptn);
}
// Utility function for nodes that load an object
void add_objload_to_connection_graph(Node *n, Unique_Node_List *delayed_worklist);
// Create PointsToNode node and add it to Connection Graph.
void add_node_to_connection_graph(Node *n, Unique_Node_List *delayed_worklist);
// Add final simple edges to graph.
void add_final_edges(Node *n);
// Finish Graph construction.
bool complete_connection_graph(GrowableArray<PointsToNode*>& ptnodes_worklist,
GrowableArray<JavaObjectNode*>& non_escaped_worklist,
GrowableArray<JavaObjectNode*>& java_objects_worklist,
GrowableArray<FieldNode*>& oop_fields_worklist);
#ifdef ASSERT
void verify_connection_graph(GrowableArray<PointsToNode*>& ptnodes_worklist,
GrowableArray<JavaObjectNode*>& non_escaped_worklist,
GrowableArray<JavaObjectNode*>& java_objects_worklist,
GrowableArray<Node*>& addp_worklist);
#endif
// Add all references to this JavaObject node.
int add_java_object_edges(JavaObjectNode* jobj, bool populate_worklist);
// Put node on worklist if it is (or was) not there.
inline void add_to_worklist(PointsToNode* pt) {
PointsToNode* ptf = pt;
uint pidx_bias = 0;
if (PointsToNode::is_base_use(pt)) {
// Create a separate entry in _in_worklist for a marked base edge
// because _worklist may have an entry for a normal edge pointing
// to the same node. To separate them use _next_pidx as bias.
ptf = PointsToNode::get_use_node(pt)->as_Field();
pidx_bias = _next_pidx;
}
if (!_in_worklist.test_set(ptf->pidx() + pidx_bias)) {
_worklist.append(pt);
}
}
// Put on worklist all uses of this node.
inline void add_uses_to_worklist(PointsToNode* pt) {
for (UseIterator i(pt); i.has_next(); i.next()) {
add_to_worklist(i.get());
}
}
// Put on worklist all field's uses and related field nodes.
void add_field_uses_to_worklist(FieldNode* field);
// Put on worklist all related field nodes.
void add_fields_to_worklist(FieldNode* field, PointsToNode* base);
// Find fields which have unknown value.
int find_field_value(FieldNode* field);
// Find fields initializing values for allocations.
int find_init_values(JavaObjectNode* ptn, PointsToNode* init_val, PhaseTransform* phase);
// Set the escape state of an object and its fields.
void set_escape_state(PointsToNode* ptn, PointsToNode::EscapeState esc) {
// Don't change non-escaping state of NULL pointer.
if (ptn != null_obj) {
if (ptn->escape_state() < esc)
ptn->set_escape_state(esc);
if (ptn->fields_escape_state() < esc)
ptn->set_fields_escape_state(esc);
}
}
void set_fields_escape_state(PointsToNode* ptn, PointsToNode::EscapeState esc) {
// Don't change non-escaping state of NULL pointer.
if (ptn != null_obj) {
if (ptn->fields_escape_state() < esc)
ptn->set_fields_escape_state(esc);
}
}
// Propagate GlobalEscape and ArgEscape escape states to all nodes
// and check that we still have non-escaping java objects.
bool find_non_escaped_objects(GrowableArray<PointsToNode*>& ptnodes_worklist,
GrowableArray<JavaObjectNode*>& non_escaped_worklist);
// Adjust scalar_replaceable state after Connection Graph is built.
void adjust_scalar_replaceable_state(JavaObjectNode* jobj);
// Optimize ideal graph.
void optimize_ideal_graph(GrowableArray<Node*>& ptr_cmp_worklist,
GrowableArray<Node*>& storestore_worklist);
// Optimize objects compare.
Node* optimize_ptr_compare(Node* n);
// Returns unique corresponding java object or NULL.
JavaObjectNode* unique_java_object(Node *n);
// Add an edge of the specified type pointing to the specified target.
bool add_edge(PointsToNode* from, PointsToNode* to) {
assert(!from->is_Field() || from->as_Field()->is_oop(), "sanity");
if (to == phantom_obj) {
if (from->has_unknown_ptr()) {
return false; // already points to phantom_obj
}
from->set_has_unknown_ptr();
}
bool is_new = from->add_edge(to);
assert(to != phantom_obj || is_new, "sanity");
if (is_new) { // New edge?
assert(!_verify, "graph is incomplete");
is_new = to->add_use(from);
assert(is_new, "use should be also new");
}
return is_new;
}
// Add an edge from Field node to its base and back.
bool add_base(FieldNode* from, PointsToNode* to) {
assert(!to->is_Arraycopy(), "sanity");
if (to == phantom_obj) {
if (from->has_unknown_base()) {
return false; // already has phantom_obj base
}
from->set_has_unknown_base();
}
bool is_new = from->add_base(to);
assert(to != phantom_obj || is_new, "sanity");
if (is_new) { // New edge?
assert(!_verify, "graph is incomplete");
if (to == null_obj)
return is_new; // Don't add fields to NULL pointer.
if (to->is_JavaObject()) {
is_new = to->add_edge(from);
} else {
is_new = to->add_base_use(from);
}
assert(is_new, "use should be also new");
}
return is_new;
}
// Add LocalVar node and edge if possible
void add_local_var_and_edge(Node* n, PointsToNode::EscapeState es, Node* to,
Unique_Node_List *delayed_worklist) {
PointsToNode* ptn = ptnode_adr(to->_idx);
if (delayed_worklist != NULL) { // First iteration of CG construction
add_local_var(n, es);
if (ptn == NULL) {
delayed_worklist->push(n);
return; // Process it later.
}
} else {
assert(ptn != NULL, "node should be registered");
}
add_edge(ptnode_adr(n->_idx), ptn);
}
// Helper functions
bool is_oop_field(Node* n, int offset, bool* unsafe);
static Node* get_addp_base(Node *addp);
static Node* find_second_addp(Node* addp, Node* n);
// offset of a field reference
int address_offset(Node* adr, PhaseTransform *phase);
// Propagate unique types created for unescaped allocated objects
// through the graph
void split_unique_types(GrowableArray<Node *> &alloc_worklist);
// Helper methods for unique types split.
bool split_AddP(Node *addp, Node *base);
PhiNode *create_split_phi(PhiNode *orig_phi, int alias_idx, GrowableArray<PhiNode *> &orig_phi_worklist, bool &new_created);
PhiNode *split_memory_phi(PhiNode *orig_phi, int alias_idx, GrowableArray<PhiNode *> &orig_phi_worklist);
void move_inst_mem(Node* n, GrowableArray<PhiNode *> &orig_phis);
Node* find_inst_mem(Node* mem, int alias_idx,GrowableArray<PhiNode *> &orig_phi_worklist);
Node* step_through_mergemem(MergeMemNode *mmem, int alias_idx, const TypeOopPtr *toop);
GrowableArray<MergeMemNode*> _mergemem_worklist; // List of all MergeMem nodes
Node_Array _node_map; // used for bookeeping during type splitting
// Used for the following purposes:
// Memory Phi - most recent unique Phi split out
// from this Phi
// MemNode - new memory input for this node
// ChecCastPP - allocation that this is a cast of
// allocation - CheckCastPP of the allocation
// manage entries in _node_map
void set_map(Node* from, Node* to) {
ideal_nodes.push(from);
_node_map.map(from->_idx, to);
}
Node* get_map(int idx) { return _node_map[idx]; }
PhiNode* get_map_phi(int idx) {
Node* phi = _node_map[idx];
return (phi == NULL) ? NULL : phi->as_Phi();
}
// Notify optimizer that a node has been modified
void record_for_optimizer(Node *n) {
_igvn->_worklist.push(n);
_igvn->add_users_to_worklist(n);
}
// Compute the escape information
bool compute_escape();
public:
ConnectionGraph(Compile *C, PhaseIterGVN *igvn);
// Check for non-escaping candidates
static bool has_candidates(Compile *C);
// Perform escape analysis
static void do_analysis(Compile *C, PhaseIterGVN *igvn);
bool not_global_escape(Node *n);
#ifndef PRODUCT
void dump(GrowableArray<PointsToNode*>& ptnodes_worklist);
#endif
};
inline PointsToNode::PointsToNode(ConnectionGraph *CG, Node* n, EscapeState es, NodeType type):
_edges(CG->_compile->comp_arena(), 2, 0, NULL),
_uses (CG->_compile->comp_arena(), 2, 0, NULL),
_node(n),
_idx(n->_idx),
_pidx(CG->next_pidx()),
_type((u1)type),
_escape((u1)es),
_fields_escape((u1)es),
_flags(ScalarReplaceable) {
assert(n != NULL && es != UnknownEscape, "sanity");
}
#endif // SHARE_VM_OPTO_ESCAPE_HPP
C:\hotspot-69087d08d473\src\share\vm/opto/gcm.cpp
/*
* Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
#include "precompiled.hpp"
#include "libadt/vectset.hpp"
#include "memory/allocation.inline.hpp"
#include "opto/block.hpp"
#include "opto/c2compiler.hpp"
#include "opto/callnode.hpp"
#include "opto/cfgnode.hpp"
#include "opto/machnode.hpp"
#include "opto/opcodes.hpp"
#include "opto/phaseX.hpp"
#include "opto/rootnode.hpp"
#include "opto/runtime.hpp"
#include "runtime/deoptimization.hpp"
#if defined AD_MD_HPP
# include AD_MD_HPP
#elif defined TARGET_ARCH_MODEL_x86_32
# include "adfiles/ad_x86_32.hpp"
#elif defined TARGET_ARCH_MODEL_x86_64
# include "adfiles/ad_x86_64.hpp"
#elif defined TARGET_ARCH_MODEL_aarch64
# include "adfiles/ad_aarch64.hpp"
#elif defined TARGET_ARCH_MODEL_sparc
# include "adfiles/ad_sparc.hpp"
#elif defined TARGET_ARCH_MODEL_zero
# include "adfiles/ad_zero.hpp"
#elif defined TARGET_ARCH_MODEL_ppc_64
# include "adfiles/ad_ppc_64.hpp"
#endif
// Portions of code courtesy of Clifford Click
// Optimization - Graph Style
// To avoid float value underflow
#define MIN_BLOCK_FREQUENCY 1.e-35f
//----------------------------schedule_node_into_block-------------------------
// Insert node n into block b. Look for projections of n and make sure they
// are in b also.
void PhaseCFG::schedule_node_into_block( Node *n, Block *b ) {
// Set basic block of n, Add n to b,
map_node_to_block(n, b);
b->add_inst(n);
// After Matching, nearly any old Node may have projections trailing it.
// These are usually machine-dependent flags. In any case, they might
// float to another block below this one. Move them up.
for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
Node* use = n->fast_out(i);
if (use->is_Proj()) {
Block* buse = get_block_for_node(use);
if (buse != b) { // In wrong block?
if (buse != NULL) {
buse->find_remove(use); // Remove from wrong block
}
map_node_to_block(use, b);
b->add_inst(use);
}
}
}
}
//----------------------------replace_block_proj_ctrl-------------------------
// Nodes that have is_block_proj() nodes as their control need to use
// the appropriate Region for their actual block as their control since
// the projection will be in a predecessor block.
void PhaseCFG::replace_block_proj_ctrl( Node *n ) {
const Node *in0 = n->in(0);
assert(in0 != NULL, "Only control-dependent");
const Node *p = in0->is_block_proj();
if (p != NULL && p != n) { // Control from a block projection?
assert(!n->pinned() || n->is_MachConstantBase(), "only pinned MachConstantBase node is expected here");
// Find trailing Region
Block *pb = get_block_for_node(in0); // Block-projection already has basic block
uint j = 0;
if (pb->_num_succs != 1) { // More then 1 successor?
// Search for successor
uint max = pb->number_of_nodes();
assert( max > 1, "" );
uint start = max - pb->_num_succs;
// Find which output path belongs to projection
for (j = start; j < max; j++) {
if( pb->get_node(j) == in0 )
break;
}
assert( j < max, "must find" );
// Change control to match head of successor basic block
j -= start;
}
n->set_req(0, pb->_succs[j]->head());
}
}
//------------------------------schedule_pinned_nodes--------------------------
// Set the basic block for Nodes pinned into blocks
void PhaseCFG::schedule_pinned_nodes(VectorSet &visited) {
// Allocate node stack of size C->live_nodes()+8 to avoid frequent realloc
GrowableArray <Node *> spstack(C->live_nodes() + 8);
spstack.push(_root);
while (spstack.is_nonempty()) {
Node* node = spstack.pop();
if (!visited.test_set(node->_idx)) { // Test node and flag it as visited
if (node->pinned() && !has_block(node)) { // Pinned? Nail it down!
assert(node->in(0), "pinned Node must have Control");
// Before setting block replace block_proj control edge
replace_block_proj_ctrl(node);
Node* input = node->in(0);
while (!input->is_block_start()) {
input = input->in(0);
}
Block* block = get_block_for_node(input); // Basic block of controlling input
schedule_node_into_block(node, block);
}
// process all inputs that are non NULL
for (int i = node->req() - 1; i >= 0; --i) {
if (node->in(i) != NULL) {
spstack.push(node->in(i));
}
}
}
}
}
#ifdef ASSERT
// Assert that new input b2 is dominated by all previous inputs.
// Check this by by seeing that it is dominated by b1, the deepest
// input observed until b2.
static void assert_dom(Block* b1, Block* b2, Node* n, const PhaseCFG* cfg) {
if (b1 == NULL) return;
assert(b1->_dom_depth < b2->_dom_depth, "sanity");
Block* tmp = b2;
while (tmp != b1 && tmp != NULL) {
tmp = tmp->_idom;
}
if (tmp != b1) {
// Detected an unschedulable graph. Print some nice stuff and die.
tty->print_cr("!!! Unschedulable graph !!!");
for (uint j=0; j<n->len(); j++) { // For all inputs
Node* inn = n->in(j); // Get input
if (inn == NULL) continue; // Ignore NULL, missing inputs
Block* inb = cfg->get_block_for_node(inn);
tty->print("B%d idom=B%d depth=%2d ",inb->_pre_order,
inb->_idom ? inb->_idom->_pre_order : 0, inb->_dom_depth);
inn->dump();
}
tty->print("Failing node: ");
n->dump();
assert(false, "unscheduable graph");
}
}
#endif
static Block* find_deepest_input(Node* n, const PhaseCFG* cfg) {
// Find the last input dominated by all other inputs.
Block* deepb = NULL; // Deepest block so far
int deepb_dom_depth = 0;
for (uint k = 0; k < n->len(); k++) { // For all inputs
Node* inn = n->in(k); // Get input
if (inn == NULL) continue; // Ignore NULL, missing inputs
Block* inb = cfg->get_block_for_node(inn);
assert(inb != NULL, "must already have scheduled this input");
if (deepb_dom_depth < (int) inb->_dom_depth) {
// The new inb must be dominated by the previous deepb.
// The various inputs must be linearly ordered in the dom
// tree, or else there will not be a unique deepest block.
DEBUG_ONLY(assert_dom(deepb, inb, n, cfg));
deepb = inb; // Save deepest block
deepb_dom_depth = deepb->_dom_depth;
}
}
assert(deepb != NULL, "must be at least one input to n");
return deepb;
}
//------------------------------schedule_early---------------------------------
// Find the earliest Block any instruction can be placed in. Some instructions
// are pinned into Blocks. Unpinned instructions can appear in last block in
// which all their inputs occur.
bool PhaseCFG::schedule_early(VectorSet &visited, Node_List &roots) {
// Allocate stack with enough space to avoid frequent realloc
Node_Stack nstack(roots.Size() + 8);
// _root will be processed among C->top() inputs
roots.push(C->top());
visited.set(C->top()->_idx);
while (roots.size() != 0) {
// Use local variables nstack_top_n & nstack_top_i to cache values
// on stack's top.
Node* parent_node = roots.pop();
uint input_index = 0;
while (true) {
if (input_index == 0) {
// Fixup some control. Constants without control get attached
// to root and nodes that use is_block_proj() nodes should be attached
// to the region that starts their block.
const Node* control_input = parent_node->in(0);
if (control_input != NULL) {
replace_block_proj_ctrl(parent_node);
} else {
// Is a constant with NO inputs?
if (parent_node->req() == 1) {
parent_node->set_req(0, _root);
}
}
}
// First, visit all inputs and force them to get a block. If an
// input is already in a block we quit following inputs (to avoid
// cycles). Instead we put that Node on a worklist to be handled
// later (since IT'S inputs may not have a block yet).
// Assume all n's inputs will be processed
bool done = true;
while (input_index < parent_node->len()) {
Node* in = parent_node->in(input_index++);
if (in == NULL) {
continue;
}
int is_visited = visited.test_set(in->_idx);
if (!has_block(in)) {
if (is_visited) {
assert(false, "graph should be schedulable");
return false;
}
// Save parent node and next input's index.
nstack.push(parent_node, input_index);
// Process current input now.
parent_node = in;
input_index = 0;
// Not all n's inputs processed.
done = false;
break;
} else if (!is_visited) {
// Visit this guy later, using worklist
roots.push(in);
}
}
if (done) {
// All of n's inputs have been processed, complete post-processing.
// Some instructions are pinned into a block. These include Region,
// Phi, Start, Return, and other control-dependent instructions and
// any projections which depend on them.
if (!parent_node->pinned()) {
// Set earliest legal block.
Block* earliest_block = find_deepest_input(parent_node, this);
map_node_to_block(parent_node, earliest_block);
} else {
assert(get_block_for_node(parent_node) == get_block_for_node(parent_node->in(0)), "Pinned Node should be at the same block as its control edge");
}
if (nstack.is_empty()) {
// Finished all nodes on stack.
// Process next node on the worklist 'roots'.
break;
}
// Get saved parent node and next input's index.
parent_node = nstack.node();
input_index = nstack.index();
nstack.pop();
}
}
}
return true;
}
//------------------------------dom_lca----------------------------------------
// Find least common ancestor in dominator tree
// LCA is a current notion of LCA, to be raised above 'this'.
// As a convenient boundary condition, return 'this' if LCA is NULL.
// Find the LCA of those two nodes.
Block* Block::dom_lca(Block* LCA) {
if (LCA == NULL || LCA == this) return this;
Block* anc = this;
while (anc->_dom_depth > LCA->_dom_depth)
anc = anc->_idom; // Walk up till anc is as high as LCA
while (LCA->_dom_depth > anc->_dom_depth)
LCA = LCA->_idom; // Walk up till LCA is as high as anc
while (LCA != anc) { // Walk both up till they are the same
LCA = LCA->_idom;
anc = anc->_idom;
}
return LCA;
}
//--------------------------raise_LCA_above_use--------------------------------
// We are placing a definition, and have been given a def->use edge.
// The definition must dominate the use, so move the LCA upward in the
// dominator tree to dominate the use. If the use is a phi, adjust
// the LCA only with the phi input paths which actually use this def.
static Block* raise_LCA_above_use(Block* LCA, Node* use, Node* def, const PhaseCFG* cfg) {
Block* buse = cfg->get_block_for_node(use);
if (buse == NULL) return LCA; // Unused killing Projs have no use block
if (!use->is_Phi()) return buse->dom_lca(LCA);
uint pmax = use->req(); // Number of Phi inputs
// Why does not this loop just break after finding the matching input to
// the Phi? Well...it's like this. I do not have true def-use/use-def
// chains. Means I cannot distinguish, from the def-use direction, which
// of many use-defs lead from the same use to the same def. That is, this
// Phi might have several uses of the same def. Each use appears in a
// different predecessor block. But when I enter here, I cannot distinguish
// which use-def edge I should find the predecessor block for. So I find
// them all. Means I do a little extra work if a Phi uses the same value
// more than once.
for (uint j=1; j<pmax; j++) { // For all inputs
if (use->in(j) == def) { // Found matching input?
Block* pred = cfg->get_block_for_node(buse->pred(j));
LCA = pred->dom_lca(LCA);
}
}
return LCA;
}
//----------------------------raise_LCA_above_marks----------------------------
// Return a new LCA that dominates LCA and any of its marked predecessors.
// Search all my parents up to 'early' (exclusive), looking for predecessors
// which are marked with the given index. Return the LCA (in the dom tree)
// of all marked blocks. If there are none marked, return the original
// LCA.
static Block* raise_LCA_above_marks(Block* LCA, node_idx_t mark, Block* early, const PhaseCFG* cfg) {
Block_List worklist;
worklist.push(LCA);
while (worklist.size() > 0) {
Block* mid = worklist.pop();
if (mid == early) continue; // stop searching here
// Test and set the visited bit.
if (mid->raise_LCA_visited() == mark) continue; // already visited
// Don't process the current LCA, otherwise the search may terminate early
if (mid != LCA && mid->raise_LCA_mark() == mark) {
// Raise the LCA.
LCA = mid->dom_lca(LCA);
if (LCA == early) break; // stop searching everywhere
assert(early->dominates(LCA), "early is high enough");
// Resume searching at that point, skipping intermediate levels.
worklist.push(LCA);
if (LCA == mid)
continue; // Don't mark as visited to avoid early termination.
} else {
// Keep searching through this block's predecessors.
for (uint j = 1, jmax = mid->num_preds(); j < jmax; j++) {
Block* mid_parent = cfg->get_block_for_node(mid->pred(j));
worklist.push(mid_parent);
}
}
mid->set_raise_LCA_visited(mark);
}
return LCA;
}
//--------------------------memory_early_block--------------------------------
// This is a variation of find_deepest_input, the heart of schedule_early.
// Find the "early" block for a load, if we considered only memory and
// address inputs, that is, if other data inputs were ignored.
//
// Because a subset of edges are considered, the resulting block will
// be earlier (at a shallower dom_depth) than the true schedule_early
// point of the node. We compute this earlier block as a more permissive
// site for anti-dependency insertion, but only if subsume_loads is enabled.
static Block* memory_early_block(Node* load, Block* early, const PhaseCFG* cfg) {
Node* base;
Node* index;
Node* store = load->in(MemNode::Memory);
load->as_Mach()->memory_inputs(base, index);
assert(base != NodeSentinel && index != NodeSentinel,
"unexpected base/index inputs");
Node* mem_inputs[4];
int mem_inputs_length = 0;
if (base != NULL) mem_inputs[mem_inputs_length++] = base;
if (index != NULL) mem_inputs[mem_inputs_length++] = index;
if (store != NULL) mem_inputs[mem_inputs_length++] = store;
// In the comparision below, add one to account for the control input,
// which may be null, but always takes up a spot in the in array.
if (mem_inputs_length + 1 < (int) load->req()) {
// This "load" has more inputs than just the memory, base and index inputs.
// For purposes of checking anti-dependences, we need to start
// from the early block of only the address portion of the instruction,
// and ignore other blocks that may have factored into the wider
// schedule_early calculation.
if (load->in(0) != NULL) mem_inputs[mem_inputs_length++] = load->in(0);
Block* deepb = NULL; // Deepest block so far
int deepb_dom_depth = 0;
for (int i = 0; i < mem_inputs_length; i++) {
Block* inb = cfg->get_block_for_node(mem_inputs[i]);
if (deepb_dom_depth < (int) inb->_dom_depth) {
// The new inb must be dominated by the previous deepb.
// The various inputs must be linearly ordered in the dom
// tree, or else there will not be a unique deepest block.
DEBUG_ONLY(assert_dom(deepb, inb, load, cfg));
deepb = inb; // Save deepest block
deepb_dom_depth = deepb->_dom_depth;
}
}
early = deepb;
}
return early;
}
//--------------------------insert_anti_dependences---------------------------
// A load may need to witness memory that nearby stores can overwrite.
// For each nearby store, either insert an "anti-dependence" edge
// from the load to the store, or else move LCA upward to force the
// load to (eventually) be scheduled in a block above the store.
//
// Do not add edges to stores on distinct control-flow paths;
// only add edges to stores which might interfere.
//
// Return the (updated) LCA. There will not be any possibly interfering
// store between the load's "early block" and the updated LCA.
// Any stores in the updated LCA will have new precedence edges
// back to the load. The caller is expected to schedule the load
// in the LCA, in which case the precedence edges will make LCM
// preserve anti-dependences. The caller may also hoist the load
// above the LCA, if it is not the early block.
Block* PhaseCFG::insert_anti_dependences(Block* LCA, Node* load, bool verify) {
assert(load->needs_anti_dependence_check(), "must be a load of some sort");
assert(LCA != NULL, "");
DEBUG_ONLY(Block* LCA_orig = LCA);
// Compute the alias index. Loads and stores with different alias indices
// do not need anti-dependence edges.
uint load_alias_idx = C->get_alias_index(load->adr_type());
#ifdef ASSERT
if (load_alias_idx == Compile::AliasIdxBot && C->AliasLevel() > 0 &&
(PrintOpto || VerifyAliases ||
PrintMiscellaneous && (WizardMode || Verbose))) {
// Load nodes should not consume all of memory.
// Reporting a bottom type indicates a bug in adlc.
// If some particular type of node validly consumes all of memory,
// sharpen the preceding "if" to exclude it, so we can catch bugs here.
tty->print_cr("*** Possible Anti-Dependence Bug: Load consumes all of memory.");
load->dump(2);
if (VerifyAliases) assert(load_alias_idx != Compile::AliasIdxBot, "");
}
#endif
assert(load_alias_idx || (load->is_Mach() && load->as_Mach()->ideal_Opcode() == Op_StrComp),
"String compare is only known 'load' that does not conflict with any stores");
assert(load_alias_idx || (load->is_Mach() && load->as_Mach()->ideal_Opcode() == Op_StrEquals),
"String equals is a 'load' that does not conflict with any stores");
assert(load_alias_idx || (load->is_Mach() && load->as_Mach()->ideal_Opcode() == Op_StrIndexOf),
"String indexOf is a 'load' that does not conflict with any stores");
assert(load_alias_idx || (load->is_Mach() && load->as_Mach()->ideal_Opcode() == Op_AryEq),
"Arrays equals is a 'load' that do not conflict with any stores");
if (!C->alias_type(load_alias_idx)->is_rewritable()) {
// It is impossible to spoil this load by putting stores before it,
// because we know that the stores will never update the value
// which 'load' must witness.
return LCA;
}
node_idx_t load_index = load->_idx;
// Note the earliest legal placement of 'load', as determined by
// by the unique point in the dom tree where all memory effects
// and other inputs are first available. (Computed by schedule_early.)
// For normal loads, 'early' is the shallowest place (dom graph wise)
// to look for anti-deps between this load and any store.
Block* early = get_block_for_node(load);
// If we are subsuming loads, compute an "early" block that only considers
// memory or address inputs. This block may be different than the
// schedule_early block in that it could be at an even shallower depth in the
// dominator tree, and allow for a broader discovery of anti-dependences.
if (C->subsume_loads()) {
early = memory_early_block(load, early, this);
}
ResourceArea *area = Thread::current()->resource_area();
Node_List worklist_mem(area); // prior memory state to store
Node_List worklist_store(area); // possible-def to explore
Node_List worklist_visited(area); // visited mergemem nodes
Node_List non_early_stores(area); // all relevant stores outside of early
bool must_raise_LCA = false;
#ifdef TRACK_PHI_INPUTS
// %%% This extra checking fails because MergeMem nodes are not GVNed.
// Provide "phi_inputs" to check if every input to a PhiNode is from the
// original memory state. This indicates a PhiNode for which should not
// prevent the load from sinking. For such a block, set_raise_LCA_mark
// may be overly conservative.
// Mechanism: count inputs seen for each Phi encountered in worklist_store.
DEBUG_ONLY(GrowableArray<uint> phi_inputs(area, C->unique(),0,0));
#endif
// 'load' uses some memory state; look for users of the same state.
// Recurse through MergeMem nodes to the stores that use them.
// Each of these stores is a possible definition of memory
// that 'load' needs to use. We need to force 'load'
// to occur before each such store. When the store is in
// the same block as 'load', we insert an anti-dependence
// edge load->store.
// The relevant stores "nearby" the load consist of a tree rooted
// at initial_mem, with internal nodes of type MergeMem.
// Therefore, the branches visited by the worklist are of this form:
// initial_mem -> (MergeMem ->)* store
// The anti-dependence constraints apply only to the fringe of this tree.
Node* initial_mem = load->in(MemNode::Memory);
worklist_store.push(initial_mem);
worklist_visited.push(initial_mem);
worklist_mem.push(NULL);
while (worklist_store.size() > 0) {
// Examine a nearby store to see if it might interfere with our load.
Node* mem = worklist_mem.pop();
Node* store = worklist_store.pop();
uint op = store->Opcode();
// MergeMems do not directly have anti-deps.
// Treat them as internal nodes in a forward tree of memory states,
// the leaves of which are each a 'possible-def'.
if (store == initial_mem // root (exclusive) of tree we are searching
|| op == Op_MergeMem // internal node of tree we are searching
) {
mem = store; // It's not a possibly interfering store.
if (store == initial_mem)
initial_mem = NULL; // only process initial memory once
for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
store = mem->fast_out(i);
if (store->is_MergeMem()) {
// Be sure we don't get into combinatorial problems.
// (Allow phis to be repeated; they can merge two relevant states.)
uint j = worklist_visited.size();
for (; j > 0; j--) {
if (worklist_visited.at(j-1) == store) break;
}
if (j > 0) continue; // already on work list; do not repeat
worklist_visited.push(store);
}
worklist_mem.push(mem);
worklist_store.push(store);
}
continue;
}
if (op == Op_MachProj || op == Op_Catch) continue;
if (store->needs_anti_dependence_check()) continue; // not really a store
// Compute the alias index. Loads and stores with different alias
// indices do not need anti-dependence edges. Wide MemBar's are
// anti-dependent on everything (except immutable memories).
const TypePtr* adr_type = store->adr_type();
if (!C->can_alias(adr_type, load_alias_idx)) continue;
// Most slow-path runtime calls do NOT modify Java memory, but
// they can block and so write Raw memory.
if (store->is_Mach()) {
MachNode* mstore = store->as_Mach();
if (load_alias_idx != Compile::AliasIdxRaw) {
// Check for call into the runtime using the Java calling
// convention (and from there into a wrapper); it has no
// _method. Can't do this optimization for Native calls because
// they CAN write to Java memory.
if (mstore->ideal_Opcode() == Op_CallStaticJava) {
assert(mstore->is_MachSafePoint(), "");
MachSafePointNode* ms = (MachSafePointNode*) mstore;
assert(ms->is_MachCallJava(), "");
MachCallJavaNode* mcj = (MachCallJavaNode*) ms;
if (mcj->_method == NULL) {
// These runtime calls do not write to Java visible memory
// (other than Raw) and so do not require anti-dependence edges.
continue;
}
}
// Same for SafePoints: they read/write Raw but only read otherwise.
// This is basically a workaround for SafePoints only defining control
// instead of control + memory.
if (mstore->ideal_Opcode() == Op_SafePoint)
continue;
} else {
// Some raw memory, such as the load of "top" at an allocation,
// can be control dependent on the previous safepoint. See
// comments in GraphKit::allocate_heap() about control input.
// Inserting an anti-dep between such a safepoint and a use
// creates a cycle, and will cause a subsequent failure in
// local scheduling. (BugId 4919904)
// (%%% How can a control input be a safepoint and not a projection??)
if (mstore->ideal_Opcode() == Op_SafePoint && load->in(0) == mstore)
continue;
}
}
// Identify a block that the current load must be above,
// or else observe that 'store' is all the way up in the
// earliest legal block for 'load'. In the latter case,
// immediately insert an anti-dependence edge.
Block* store_block = get_block_for_node(store);
assert(store_block != NULL, "unused killing projections skipped above");
if (store->is_Phi()) {
// 'load' uses memory which is one (or more) of the Phi's inputs.
// It must be scheduled not before the Phi, but rather before
// each of the relevant Phi inputs.
//
// Instead of finding the LCA of all inputs to a Phi that match 'mem',
// we mark each corresponding predecessor block and do a combined
// hoisting operation later (raise_LCA_above_marks).
//
// Do not assert(store_block != early, "Phi merging memory after access")
// PhiNode may be at start of block 'early' with backedge to 'early'
DEBUG_ONLY(bool found_match = false);
for (uint j = PhiNode::Input, jmax = store->req(); j < jmax; j++) {
if (store->in(j) == mem) { // Found matching input?
DEBUG_ONLY(found_match = true);
Block* pred_block = get_block_for_node(store_block->pred(j));
if (pred_block != early) {
// If any predecessor of the Phi matches the load's "early block",
// we do not need a precedence edge between the Phi and 'load'
// since the load will be forced into a block preceding the Phi.
pred_block->set_raise_LCA_mark(load_index);
assert(!LCA_orig->dominates(pred_block) ||
early->dominates(pred_block), "early is high enough");
must_raise_LCA = true;
} else {
// anti-dependent upon PHI pinned below 'early', no edge needed
LCA = early; // but can not schedule below 'early'
}
}
}
assert(found_match, "no worklist bug");
#ifdef TRACK_PHI_INPUTS
#ifdef ASSERT
// This assert asks about correct handling of PhiNodes, which may not
// have all input edges directly from 'mem'. See BugId 4621264
int num_mem_inputs = phi_inputs.at_grow(store->_idx,0) + 1;
// Increment by exactly one even if there are multiple copies of 'mem'
// coming into the phi, because we will run this block several times
// if there are several copies of 'mem'. (That's how DU iterators work.)
phi_inputs.at_put(store->_idx, num_mem_inputs);
assert(PhiNode::Input + num_mem_inputs < store->req(),
"Expect at least one phi input will not be from original memory state");
#endif //ASSERT
#endif //TRACK_PHI_INPUTS
} else if (store_block != early) {
// 'store' is between the current LCA and earliest possible block.
// Label its block, and decide later on how to raise the LCA
// to include the effect on LCA of this store.
// If this store's block gets chosen as the raised LCA, we
// will find him on the non_early_stores list and stick him
// with a precedence edge.
// (But, don't bother if LCA is already raised all the way.)
if (LCA != early) {
store_block->set_raise_LCA_mark(load_index);
must_raise_LCA = true;
non_early_stores.push(store);
}
} else {
// Found a possibly-interfering store in the load's 'early' block.
// This means 'load' cannot sink at all in the dominator tree.
// Add an anti-dep edge, and squeeze 'load' into the highest block.
assert(store != load->in(0), "dependence cycle found");
if (verify) {
assert(store->find_edge(load) != -1, "missing precedence edge");
} else {
store->add_prec(load);
}
LCA = early;
// This turns off the process of gathering non_early_stores.
}
}
// (Worklist is now empty; all nearby stores have been visited.)
// Finished if 'load' must be scheduled in its 'early' block.
// If we found any stores there, they have already been given
// precedence edges.
if (LCA == early) return LCA;
// We get here only if there are no possibly-interfering stores
// in the load's 'early' block. Move LCA up above all predecessors
// which contain stores we have noted.
//
// The raised LCA block can be a home to such interfering stores,
// but its predecessors must not contain any such stores.
//
// The raised LCA will be a lower bound for placing the load,
// preventing the load from sinking past any block containing
// a store that may invalidate the memory state required by 'load'.
if (must_raise_LCA)
LCA = raise_LCA_above_marks(LCA, load->_idx, early, this);
if (LCA == early) return LCA;
// Insert anti-dependence edges from 'load' to each store
// in the non-early LCA block.
// Mine the non_early_stores list for such stores.
if (LCA->raise_LCA_mark() == load_index) {
while (non_early_stores.size() > 0) {
Node* store = non_early_stores.pop();
Block* store_block = get_block_for_node(store);
if (store_block == LCA) {
// add anti_dependence from store to load in its own block
assert(store != load->in(0), "dependence cycle found");
if (verify) {
assert(store->find_edge(load) != -1, "missing precedence edge");
} else {
store->add_prec(load);
}
} else {
assert(store_block->raise_LCA_mark() == load_index, "block was marked");
// Any other stores we found must be either inside the new LCA
// or else outside the original LCA. In the latter case, they
// did not interfere with any use of 'load'.
assert(LCA->dominates(store_block)
|| !LCA_orig->dominates(store_block), "no stray stores");
}
}
}
// Return the highest block containing stores; any stores
// within that block have been given anti-dependence edges.
return LCA;
}
// This class is used to iterate backwards over the nodes in the graph.
class Node_Backward_Iterator {
private:
Node_Backward_Iterator();
public:
// Constructor for the iterator
Node_Backward_Iterator(Node *root, VectorSet &visited, Node_List &stack, PhaseCFG &cfg);
// Postincrement operator to iterate over the nodes
Node *next();
private:
VectorSet &_visited;
Node_List &_stack;
PhaseCFG &_cfg;
};
// Constructor for the Node_Backward_Iterator
Node_Backward_Iterator::Node_Backward_Iterator( Node *root, VectorSet &visited, Node_List &stack, PhaseCFG &cfg)
: _visited(visited), _stack(stack), _cfg(cfg) {
// The stack should contain exactly the root
stack.clear();
stack.push(root);
// Clear the visited bits
visited.Clear();
}
// Iterator for the Node_Backward_Iterator
Node *Node_Backward_Iterator::next() {
// If the _stack is empty, then just return NULL: finished.
if ( !_stack.size() )
return NULL;
// '_stack' is emulating a real _stack. The 'visit-all-users' loop has been
// made stateless, so I do not need to record the index 'i' on my _stack.
// Instead I visit all users each time, scanning for unvisited users.
// I visit unvisited not-anti-dependence users first, then anti-dependent
// children next.
Node *self = _stack.pop();
// I cycle here when I am entering a deeper level of recursion.
// The key variable 'self' was set prior to jumping here.
while( 1 ) {
_visited.set(self->_idx);
// Now schedule all uses as late as possible.
const Node* src = self->is_Proj() ? self->in(0) : self;
uint src_rpo = _cfg.get_block_for_node(src)->_rpo;
// Schedule all nodes in a post-order visit
Node *unvisited = NULL; // Unvisited anti-dependent Node, if any
// Scan for unvisited nodes
for (DUIterator_Fast imax, i = self->fast_outs(imax); i < imax; i++) {
// For all uses, schedule late
Node* n = self->fast_out(i); // Use
// Skip already visited children
if ( _visited.test(n->_idx) )
continue;
// do not traverse backward control edges
Node *use = n->is_Proj() ? n->in(0) : n;
uint use_rpo = _cfg.get_block_for_node(use)->_rpo;
if ( use_rpo < src_rpo )
continue;
// Phi nodes always precede uses in a basic block
if ( use_rpo == src_rpo && use->is_Phi() )
continue;
unvisited = n; // Found unvisited
// Check for possible-anti-dependent
if( !n->needs_anti_dependence_check() )
break; // Not visited, not anti-dep; schedule it NOW
}
// Did I find an unvisited not-anti-dependent Node?
if ( !unvisited )
break; // All done with children; post-visit 'self'
// Visit the unvisited Node. Contains the obvious push to
// indicate I'm entering a deeper level of recursion. I push the
// old state onto the _stack and set a new state and loop (recurse).
_stack.push(self);
self = unvisited;
} // End recursion loop
return self;
}
//------------------------------ComputeLatenciesBackwards----------------------
// Compute the latency of all the instructions.
void PhaseCFG::compute_latencies_backwards(VectorSet &visited, Node_List &stack) {
#ifndef PRODUCT
if (trace_opto_pipelining())
tty->print("\n#---- ComputeLatenciesBackwards ----\n");
#endif
Node_Backward_Iterator iter((Node *)_root, visited, stack, *this);
Node *n;
// Walk over all the nodes from last to first
while (n = iter.next()) {
// Set the latency for the definitions of this instruction
partial_latency_of_defs(n);
}
} // end ComputeLatenciesBackwards
//------------------------------partial_latency_of_defs------------------------
// Compute the latency impact of this node on all defs. This computes
// a number that increases as we approach the beginning of the routine.
void PhaseCFG::partial_latency_of_defs(Node *n) {
// Set the latency for this instruction
#ifndef PRODUCT
if (trace_opto_pipelining()) {
tty->print("# latency_to_inputs: node_latency[%d] = %d for node", n->_idx, get_latency_for_node(n));
dump();
}
#endif
if (n->is_Proj()) {
n = n->in(0);
}
if (n->is_Root()) {
return;
}
uint nlen = n->len();
uint use_latency = get_latency_for_node(n);
uint use_pre_order = get_block_for_node(n)->_pre_order;
for (uint j = 0; j < nlen; j++) {
Node *def = n->in(j);
if (!def || def == n) {
continue;
}
// Walk backwards thru projections
if (def->is_Proj()) {
def = def->in(0);
}
#ifndef PRODUCT
if (trace_opto_pipelining()) {
tty->print("# in(%2d): ", j);
def->dump();
}
#endif
// If the defining block is not known, assume it is ok
Block *def_block = get_block_for_node(def);
uint def_pre_order = def_block ? def_block->_pre_order : 0;
if ((use_pre_order < def_pre_order) || (use_pre_order == def_pre_order && n->is_Phi())) {
continue;
}
uint delta_latency = n->latency(j);
uint current_latency = delta_latency + use_latency;
if (get_latency_for_node(def) < current_latency) {
set_latency_for_node(def, current_latency);
}
#ifndef PRODUCT
if (trace_opto_pipelining()) {
tty->print_cr("# %d + edge_latency(%d) == %d -> %d, node_latency[%d] = %d", use_latency, j, delta_latency, current_latency, def->_idx, get_latency_for_node(def));
}
#endif
}
}
//------------------------------latency_from_use-------------------------------
// Compute the latency of a specific use
int PhaseCFG::latency_from_use(Node *n, const Node *def, Node *use) {
// If self-reference, return no latency
if (use == n || use->is_Root()) {
return 0;
}
uint def_pre_order = get_block_for_node(def)->_pre_order;
uint latency = 0;
// If the use is not a projection, then it is simple...
if (!use->is_Proj()) {
#ifndef PRODUCT
if (trace_opto_pipelining()) {
tty->print("# out(): ");
use->dump();
}
#endif
uint use_pre_order = get_block_for_node(use)->_pre_order;
if (use_pre_order < def_pre_order)
return 0;
if (use_pre_order == def_pre_order && use->is_Phi())
return 0;
uint nlen = use->len();
uint nl = get_latency_for_node(use);
for ( uint j=0; j<nlen; j++ ) {
if (use->in(j) == n) {
// Change this if we want local latencies
uint ul = use->latency(j);
uint l = ul + nl;
if (latency < l) latency = l;
#ifndef PRODUCT
if (trace_opto_pipelining()) {
tty->print_cr("# %d + edge_latency(%d) == %d -> %d, latency = %d",
nl, j, ul, l, latency);
}
#endif
}
}
} else {
// This is a projection, just grab the latency of the use(s)
for (DUIterator_Fast jmax, j = use->fast_outs(jmax); j < jmax; j++) {
uint l = latency_from_use(use, def, use->fast_out(j));
if (latency < l) latency = l;
}
}
return latency;
}
//------------------------------latency_from_uses------------------------------
// Compute the latency of this instruction relative to all of it's uses.
// This computes a number that increases as we approach the beginning of the
// routine.
void PhaseCFG::latency_from_uses(Node *n) {
// Set the latency for this instruction
#ifndef PRODUCT
if (trace_opto_pipelining()) {
tty->print("# latency_from_outputs: node_latency[%d] = %d for node", n->_idx, get_latency_for_node(n));
dump();
}
#endif
uint latency=0;
const Node *def = n->is_Proj() ? n->in(0): n;
for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
uint l = latency_from_use(n, def, n->fast_out(i));
if (latency < l) latency = l;
}
set_latency_for_node(n, latency);
}
//------------------------------hoist_to_cheaper_block-------------------------
// Pick a block for node self, between early and LCA, that is a cheaper
// alternative to LCA.
Block* PhaseCFG::hoist_to_cheaper_block(Block* LCA, Block* early, Node* self) {
const double delta = 1+PROB_UNLIKELY_MAG(4);
Block* least = LCA;
double least_freq = least->_freq;
uint target = get_latency_for_node(self);
uint start_latency = get_latency_for_node(LCA->head());
uint end_latency = get_latency_for_node(LCA->get_node(LCA->end_idx()));
bool in_latency = (target <= start_latency);
const Block* root_block = get_block_for_node(_root);
// Turn off latency scheduling if scheduling is just plain off
if (!C->do_scheduling())
in_latency = true;
// Do not hoist (to cover latency) instructions which target a
// single register. Hoisting stretches the live range of the
// single register and may force spilling.
MachNode* mach = self->is_Mach() ? self->as_Mach() : NULL;
if (mach && mach->out_RegMask().is_bound1() && mach->out_RegMask().is_NotEmpty())
in_latency = true;
#ifndef PRODUCT
if (trace_opto_pipelining()) {
tty->print("# Find cheaper block for latency %d: ", get_latency_for_node(self));
self->dump();
tty->print_cr("# B%d: start latency for [%4d]=%d, end latency for [%4d]=%d, freq=%g",
LCA->_pre_order,
LCA->head()->_idx,
start_latency,
LCA->get_node(LCA->end_idx())->_idx,
end_latency,
least_freq);
}
#endif
int cand_cnt = 0; // number of candidates tried
// Walk up the dominator tree from LCA (Lowest common ancestor) to
// the earliest legal location. Capture the least execution frequency.
while (LCA != early) {
LCA = LCA->_idom; // Follow up the dominator tree
if (LCA == NULL) {
// Bailout without retry
assert(false, "graph should be schedulable");
C->record_method_not_compilable("late schedule failed: LCA == NULL");
return least;
}
// Don't hoist machine instructions to the root basic block
if (mach && LCA == root_block)
break;
uint start_lat = get_latency_for_node(LCA->head());
uint end_idx = LCA->end_idx();
uint end_lat = get_latency_for_node(LCA->get_node(end_idx));
double LCA_freq = LCA->_freq;
#ifndef PRODUCT
if (trace_opto_pipelining()) {
tty->print_cr("# B%d: start latency for [%4d]=%d, end latency for [%4d]=%d, freq=%g",
LCA->_pre_order, LCA->head()->_idx, start_lat, end_idx, end_lat, LCA_freq);
}
#endif
cand_cnt++;
if (LCA_freq < least_freq || // Better Frequency
(StressGCM && Compile::randomized_select(cand_cnt)) || // Should be randomly accepted in stress mode
(!StressGCM && // Otherwise, choose with latency
!in_latency && // No block containing latency
LCA_freq < least_freq * delta && // No worse frequency
target >= end_lat && // within latency range
!self->is_iteratively_computed() ) // But don't hoist IV increments
// because they may end up above other uses of their phi forcing
// their result register to be different from their input.
) {
least = LCA; // Found cheaper block
least_freq = LCA_freq;
start_latency = start_lat;
end_latency = end_lat;
if (target <= start_lat)
in_latency = true;
}
}
#ifndef PRODUCT
if (trace_opto_pipelining()) {
tty->print_cr("# Choose block B%d with start latency=%d and freq=%g",
least->_pre_order, start_latency, least_freq);
}
#endif
// See if the latency needs to be updated
if (target < end_latency) {
#ifndef PRODUCT
if (trace_opto_pipelining()) {
tty->print_cr("# Change latency for [%4d] from %d to %d", self->_idx, target, end_latency);
}
#endif
set_latency_for_node(self, end_latency);
partial_latency_of_defs(self);
}
return least;
}
//------------------------------schedule_late-----------------------------------
// Now schedule all codes as LATE as possible. This is the LCA in the
// dominator tree of all USES of a value. Pick the block with the least
// loop nesting depth that is lowest in the dominator tree.
extern const char must_clone[];
void PhaseCFG::schedule_late(VectorSet &visited, Node_List &stack) {
#ifndef PRODUCT
if (trace_opto_pipelining())
tty->print("\n#---- schedule_late ----\n");
#endif
Node_Backward_Iterator iter((Node *)_root, visited, stack, *this);
Node *self;
// Walk over all the nodes from last to first
while (self = iter.next()) {
Block* early = get_block_for_node(self); // Earliest legal placement
if (self->is_top()) {
// Top node goes in bb #2 with other constants.
// It must be special-cased, because it has no out edges.
early->add_inst(self);
continue;
}
// No uses, just terminate
if (self->outcnt() == 0) {
assert(self->is_MachProj(), "sanity");
continue; // Must be a dead machine projection
}
// If node is pinned in the block, then no scheduling can be done.
if( self->pinned() ) // Pinned in block?
continue;
MachNode* mach = self->is_Mach() ? self->as_Mach() : NULL;
if (mach) {
switch (mach->ideal_Opcode()) {
case Op_CreateEx:
// Don't move exception creation
early->add_inst(self);
continue;
break;
case Op_CheckCastPP:
// Don't move CheckCastPP nodes away from their input, if the input
// is a rawptr (5071820).
Node *def = self->in(1);
if (def != NULL && def->bottom_type()->base() == Type::RawPtr) {
early->add_inst(self);
#ifdef ASSERT
_raw_oops.push(def);
#endif
continue;
}
break;
}
}
// Gather LCA of all uses
Block *LCA = NULL;
{
for (DUIterator_Fast imax, i = self->fast_outs(imax); i < imax; i++) {
// For all uses, find LCA
Node* use = self->fast_out(i);
LCA = raise_LCA_above_use(LCA, use, self, this);
}
} // (Hide defs of imax, i from rest of block.)
// Place temps in the block of their use. This isn't a
// requirement for correctness but it reduces useless
// interference between temps and other nodes.
if (mach != NULL && mach->is_MachTemp()) {
map_node_to_block(self, LCA);
LCA->add_inst(self);
continue;
}
// Check if 'self' could be anti-dependent on memory
if (self->needs_anti_dependence_check()) {
// Hoist LCA above possible-defs and insert anti-dependences to
// defs in new LCA block.
LCA = insert_anti_dependences(LCA, self);
}
if (early->_dom_depth > LCA->_dom_depth) {
// Somehow the LCA has moved above the earliest legal point.
// (One way this can happen is via memory_early_block.)
if (C->subsume_loads() == true && !C->failing()) {
// Retry with subsume_loads == false
// If this is the first failure, the sentinel string will "stick"
// to the Compile object, and the C2Compiler will see it and retry.
C->record_failure(C2Compiler::retry_no_subsuming_loads());
} else {
// Bailout without retry when (early->_dom_depth > LCA->_dom_depth)
assert(false, "graph should be schedulable");
C->record_method_not_compilable("late schedule failed: incorrect graph");
}
return;
}
// If there is no opportunity to hoist, then we're done.
// In stress mode, try to hoist even the single operations.
bool try_to_hoist = StressGCM || (LCA != early);
// Must clone guys stay next to use; no hoisting allowed.
// Also cannot hoist guys that alter memory or are otherwise not
// allocatable (hoisting can make a value live longer, leading to
// anti and output dependency problems which are normally resolved
// by the register allocator giving everyone a different register).
if (mach != NULL && must_clone[mach->ideal_Opcode()])
try_to_hoist = false;
Block* late = NULL;
if (try_to_hoist) {
// Now find the block with the least execution frequency.
// Start at the latest schedule and work up to the earliest schedule
// in the dominator tree. Thus the Node will dominate all its uses.
late = hoist_to_cheaper_block(LCA, early, self);
} else {
// Just use the LCA of the uses.
late = LCA;
}
// Put the node into target block
schedule_node_into_block(self, late);
#ifdef ASSERT
if (self->needs_anti_dependence_check()) {
// since precedence edges are only inserted when we're sure they
// are needed make sure that after placement in a block we don't
// need any new precedence edges.
verify_anti_dependences(late, self);
}
#endif
} // Loop until all nodes have been visited
} // end ScheduleLate
//------------------------------GlobalCodeMotion-------------------------------
void PhaseCFG::global_code_motion() {
ResourceMark rm;
#ifndef PRODUCT
if (trace_opto_pipelining()) {
tty->print("\n---- Start GlobalCodeMotion ----\n");
}
#endif
// Initialize the node to block mapping for things on the proj_list
for (uint i = 0; i < _matcher.number_of_projections(); i++) {
unmap_node_from_block(_matcher.get_projection(i));
}
// Set the basic block for Nodes pinned into blocks
Arena* arena = Thread::current()->resource_area();
VectorSet visited(arena);
schedule_pinned_nodes(visited);
// Find the earliest Block any instruction can be placed in. Some
// instructions are pinned into Blocks. Unpinned instructions can
// appear in last block in which all their inputs occur.
visited.Clear();
Node_List stack(arena);
// Pre-grow the list
stack.map((C->live_nodes() >> 1) + 16, NULL);
if (!schedule_early(visited, stack)) {
// Bailout without retry
C->record_method_not_compilable("early schedule failed");
return;
}
// Build Def-Use edges.
// Compute the latency information (via backwards walk) for all the
// instructions in the graph
_node_latency = new GrowableArray<uint>(); // resource_area allocation
if (C->do_scheduling()) {
compute_latencies_backwards(visited, stack);
}
// Now schedule all codes as LATE as possible. This is the LCA in the
// dominator tree of all USES of a value. Pick the block with the least
// loop nesting depth that is lowest in the dominator tree.
// ( visited.Clear() called in schedule_late()->Node_Backward_Iterator() )
schedule_late(visited, stack);
if (C->failing()) {
return;
}
#ifndef PRODUCT
if (trace_opto_pipelining()) {
tty->print("\n---- Detect implicit null checks ----\n");
}
#endif
// Detect implicit-null-check opportunities. Basically, find NULL checks
// with suitable memory ops nearby. Use the memory op to do the NULL check.
// I can generate a memory op if there is not one nearby.
if (C->is_method_compilation()) {
// By reversing the loop direction we get a very minor gain on mpegaudio.
// Feel free to revert to a forward loop for clarity.
// for( int i=0; i < (int)matcher._null_check_tests.size(); i+=2 ) {
for (int i = _matcher._null_check_tests.size() - 2; i >= 0; i -= 2) {
Node* proj = _matcher._null_check_tests[i];
Node* val = _matcher._null_check_tests[i + 1];
Block* block = get_block_for_node(proj);
implicit_null_check(block, proj, val, C->allowed_deopt_reasons());
// The implicit_null_check will only perform the transformation
// if the null branch is truly uncommon, *and* it leads to an
// uncommon trap. Combined with the too_many_traps guards
// above, this prevents SEGV storms reported in 6366351,
// by recompiling offending methods without this optimization.
}
}
#ifndef PRODUCT
if (trace_opto_pipelining()) {
tty->print("\n---- Start Local Scheduling ----\n");
}
#endif
// Schedule locally. Right now a simple topological sort.
// Later, do a real latency aware scheduler.
GrowableArray<int> ready_cnt(C->unique(), C->unique(), -1);
visited.Clear();
for (uint i = 0; i < number_of_blocks(); i++) {
Block* block = get_block(i);
if (!schedule_local(block, ready_cnt, visited)) {
if (!C->failure_reason_is(C2Compiler::retry_no_subsuming_loads())) {
C->record_method_not_compilable("local schedule failed");
}
return;
}
}
// If we inserted any instructions between a Call and his CatchNode,
// clone the instructions on all paths below the Catch.
for (uint i = 0; i < number_of_blocks(); i++) {
Block* block = get_block(i);
call_catch_cleanup(block);
}
#ifndef PRODUCT
if (trace_opto_pipelining()) {
tty->print("\n---- After GlobalCodeMotion ----\n");
for (uint i = 0; i < number_of_blocks(); i++) {
Block* block = get_block(i);
block->dump();
}
}
#endif
// Dead.
_node_latency = (GrowableArray<uint> *)((intptr_t)0xdeadbeef);
}
bool PhaseCFG::do_global_code_motion() {
build_dominator_tree();
if (C->failing()) {
return false;
}
NOT_PRODUCT( C->verify_graph_edges(); )
estimate_block_frequency();
global_code_motion();
if (C->failing()) {
return false;
}
return true;
}
//------------------------------Estimate_Block_Frequency-----------------------
// Estimate block frequencies based on IfNode probabilities.
void PhaseCFG::estimate_block_frequency() {
// Force conditional branches leading to uncommon traps to be unlikely,
// not because we get to the uncommon_trap with less relative frequency,
// but because an uncommon_trap typically causes a deopt, so we only get
// there once.
if (C->do_freq_based_layout()) {
Block_List worklist;
Block* root_blk = get_block(0);
for (uint i = 1; i < root_blk->num_preds(); i++) {
Block *pb = get_block_for_node(root_blk->pred(i));
if (pb->has_uncommon_code()) {
worklist.push(pb);
}
}
while (worklist.size() > 0) {
Block* uct = worklist.pop();
if (uct == get_root_block()) {
continue;
}
for (uint i = 1; i < uct->num_preds(); i++) {
Block *pb = get_block_for_node(uct->pred(i));
if (pb->_num_succs == 1) {
worklist.push(pb);
} else if (pb->num_fall_throughs() == 2) {
pb->update_uncommon_branch(uct);
}
}
}
}
// Create the loop tree and calculate loop depth.
_root_loop = create_loop_tree();
_root_loop->compute_loop_depth(0);
// Compute block frequency of each block, relative to a single loop entry.
_root_loop->compute_freq();
// Adjust all frequencies to be relative to a single method entry
_root_loop->_freq = 1.0;
_root_loop->scale_freq();
// Save outmost loop frequency for LRG frequency threshold
_outer_loop_frequency = _root_loop->outer_loop_freq();
// force paths ending at uncommon traps to be infrequent
if (!C->do_freq_based_layout()) {
Block_List worklist;
Block* root_blk = get_block(0);
for (uint i = 1; i < root_blk->num_preds(); i++) {
Block *pb = get_block_for_node(root_blk->pred(i));
if (pb->has_uncommon_code()) {
worklist.push(pb);
}
}
while (worklist.size() > 0) {
Block* uct = worklist.pop();
uct->_freq = PROB_MIN;
for (uint i = 1; i < uct->num_preds(); i++) {
Block *pb = get_block_for_node(uct->pred(i));
if (pb->_num_succs == 1 && pb->_freq > PROB_MIN) {
worklist.push(pb);
}
}
}
}
#ifdef ASSERT
for (uint i = 0; i < number_of_blocks(); i++) {
Block* b = get_block(i);
assert(b->_freq >= MIN_BLOCK_FREQUENCY, "Register Allocator requires meaningful block frequency");
}
#endif
#ifndef PRODUCT
if (PrintCFGBlockFreq) {
tty->print_cr("CFG Block Frequencies");
_root_loop->dump_tree();
if (Verbose) {
tty->print_cr("PhaseCFG dump");
dump();
tty->print_cr("Node dump");
_root->dump(99999);
}
}
#endif
}
//----------------------------create_loop_tree--------------------------------
// Create a loop tree from the CFG
CFGLoop* PhaseCFG::create_loop_tree() {
#ifdef ASSERT
assert(get_block(0) == get_root_block(), "first block should be root block");
for (uint i = 0; i < number_of_blocks(); i++) {
Block* block = get_block(i);
// Check that _loop field are clear...we could clear them if not.
assert(block->_loop == NULL, "clear _loop expected");
// Sanity check that the RPO numbering is reflected in the _blocks array.
// It doesn't have to be for the loop tree to be built, but if it is not,
// then the blocks have been reordered since dom graph building...which
// may question the RPO numbering
assert(block->_rpo == i, "unexpected reverse post order number");
}
#endif
int idct = 0;
CFGLoop* root_loop = new CFGLoop(idct++);
Block_List worklist;
// Assign blocks to loops
for(uint i = number_of_blocks() - 1; i > 0; i-- ) { // skip Root block
Block* block = get_block(i);
if (block->head()->is_Loop()) {
Block* loop_head = block;
assert(loop_head->num_preds() - 1 == 2, "loop must have 2 predecessors");
Node* tail_n = loop_head->pred(LoopNode::LoopBackControl);
Block* tail = get_block_for_node(tail_n);
// Defensively filter out Loop nodes for non-single-entry loops.
// For all reasonable loops, the head occurs before the tail in RPO.
if (i <= tail->_rpo) {
// The tail and (recursive) predecessors of the tail
// are made members of a new loop.
assert(worklist.size() == 0, "nonempty worklist");
CFGLoop* nloop = new CFGLoop(idct++);
assert(loop_head->_loop == NULL, "just checking");
loop_head->_loop = nloop;
// Add to nloop so push_pred() will skip over inner loops
nloop->add_member(loop_head);
nloop->push_pred(loop_head, LoopNode::LoopBackControl, worklist, this);
while (worklist.size() > 0) {
Block* member = worklist.pop();
if (member != loop_head) {
for (uint j = 1; j < member->num_preds(); j++) {
nloop->push_pred(member, j, worklist, this);
}
}
}
}
}
}
// Create a member list for each loop consisting
// of both blocks and (immediate child) loops.
for (uint i = 0; i < number_of_blocks(); i++) {
Block* block = get_block(i);
CFGLoop* lp = block->_loop;
if (lp == NULL) {
// Not assigned to a loop. Add it to the method's pseudo loop.
block->_loop = root_loop;
lp = root_loop;
}
if (lp == root_loop || block != lp->head()) { // loop heads are already members
lp->add_member(block);
}
if (lp != root_loop) {
if (lp->parent() == NULL) {
// Not a nested loop. Make it a child of the method's pseudo loop.
root_loop->add_nested_loop(lp);
}
if (block == lp->head()) {
// Add nested loop to member list of parent loop.
lp->parent()->add_member(lp);
}
}
}
return root_loop;
}
//------------------------------push_pred--------------------------------------
void CFGLoop::push_pred(Block* blk, int i, Block_List& worklist, PhaseCFG* cfg) {
Node* pred_n = blk->pred(i);
Block* pred = cfg->get_block_for_node(pred_n);
CFGLoop *pred_loop = pred->_loop;
if (pred_loop == NULL) {
// Filter out blocks for non-single-entry loops.
// For all reasonable loops, the head occurs before the tail in RPO.
if (pred->_rpo > head()->_rpo) {
pred->_loop = this;
worklist.push(pred);
}
} else if (pred_loop != this) {
// Nested loop.
while (pred_loop->_parent != NULL && pred_loop->_parent != this) {
pred_loop = pred_loop->_parent;
}
// Make pred's loop be a child
if (pred_loop->_parent == NULL) {
add_nested_loop(pred_loop);
// Continue with loop entry predecessor.
Block* pred_head = pred_loop->head();
assert(pred_head->num_preds() - 1 == 2, "loop must have 2 predecessors");
assert(pred_head != head(), "loop head in only one loop");
push_pred(pred_head, LoopNode::EntryControl, worklist, cfg);
} else {
assert(pred_loop->_parent == this && _parent == NULL, "just checking");
}
}
}
//------------------------------add_nested_loop--------------------------------
// Make cl a child of the current loop in the loop tree.
void CFGLoop::add_nested_loop(CFGLoop* cl) {
assert(_parent == NULL, "no parent yet");
assert(cl != this, "not my own parent");
cl->_parent = this;
CFGLoop* ch = _child;
if (ch == NULL) {
_child = cl;
} else {
while (ch->_sibling != NULL) { ch = ch->_sibling; }
ch->_sibling = cl;
}
}
//------------------------------compute_loop_depth-----------------------------
// Store the loop depth in each CFGLoop object.
// Recursively walk the children to do the same for them.
void CFGLoop::compute_loop_depth(int depth) {
_depth = depth;
CFGLoop* ch = _child;
while (ch != NULL) {
ch->compute_loop_depth(depth + 1);
ch = ch->_sibling;
}
}
//------------------------------compute_freq-----------------------------------
// Compute the frequency of each block and loop, relative to a single entry
// into the dominating loop head.
void CFGLoop::compute_freq() {
// Bottom up traversal of loop tree (visit inner loops first.)
// Set loop head frequency to 1.0, then transitively
// compute frequency for all successors in the loop,
// as well as for each exit edge. Inner loops are
// treated as single blocks with loop exit targets
// as the successor blocks.
// Nested loops first
CFGLoop* ch = _child;
while (ch != NULL) {
ch->compute_freq();
ch = ch->_sibling;
}
assert (_members.length() > 0, "no empty loops");
Block* hd = head();
hd->_freq = 1.0f;
for (int i = 0; i < _members.length(); i++) {
CFGElement* s = _members.at(i);
float freq = s->_freq;
if (s->is_block()) {
Block* b = s->as_Block();
for (uint j = 0; j < b->_num_succs; j++) {
Block* sb = b->_succs[j];
update_succ_freq(sb, freq * b->succ_prob(j));
}
} else {
CFGLoop* lp = s->as_CFGLoop();
assert(lp->_parent == this, "immediate child");
for (int k = 0; k < lp->_exits.length(); k++) {
Block* eb = lp->_exits.at(k).get_target();
float prob = lp->_exits.at(k).get_prob();
update_succ_freq(eb, freq * prob);
}
}
}
// For all loops other than the outer, "method" loop,
// sum and normalize the exit probability. The "method" loop
// should keep the initial exit probability of 1, so that
// inner blocks do not get erroneously scaled.
if (_depth != 0) {
// Total the exit probabilities for this loop.
float exits_sum = 0.0f;
for (int i = 0; i < _exits.length(); i++) {
exits_sum += _exits.at(i).get_prob();
}
// Normalize the exit probabilities. Until now, the
// probabilities estimate the possibility of exit per
// a single loop iteration; afterward, they estimate
// the probability of exit per loop entry.
for (int i = 0; i < _exits.length(); i++) {
Block* et = _exits.at(i).get_target();
float new_prob = 0.0f;
if (_exits.at(i).get_prob() > 0.0f) {
new_prob = _exits.at(i).get_prob() / exits_sum;
}
BlockProbPair bpp(et, new_prob);
_exits.at_put(i, bpp);
}
// Save the total, but guard against unreasonable probability,
// as the value is used to estimate the loop trip count.
// An infinite trip count would blur relative block
// frequencies.
if (exits_sum > 1.0f) exits_sum = 1.0;
if (exits_sum < PROB_MIN) exits_sum = PROB_MIN;
_exit_prob = exits_sum;
}
}
//------------------------------succ_prob-------------------------------------
// Determine the probability of reaching successor 'i' from the receiver block.
float Block::succ_prob(uint i) {
int eidx = end_idx();
Node *n = get_node(eidx); // Get ending Node
int op = n->Opcode();
if (n->is_Mach()) {
if (n->is_MachNullCheck()) {
// Can only reach here if called after lcm. The original Op_If is gone,
// so we attempt to infer the probability from one or both of the
// successor blocks.
assert(_num_succs == 2, "expecting 2 successors of a null check");
// If either successor has only one predecessor, then the
// probability estimate can be derived using the
// relative frequency of the successor and this block.
if (_succs[i]->num_preds() == 2) {
return _succs[i]->_freq / _freq;
} else if (_succs[1-i]->num_preds() == 2) {
return 1 - (_succs[1-i]->_freq / _freq);
} else {
// Estimate using both successor frequencies
float freq = _succs[i]->_freq;
return freq / (freq + _succs[1-i]->_freq);
}
}
op = n->as_Mach()->ideal_Opcode();
}
// Switch on branch type
switch( op ) {
case Op_CountedLoopEnd:
case Op_If: {
assert (i < 2, "just checking");
// Conditionals pass on only part of their frequency
float prob = n->as_MachIf()->_prob;
assert(prob >= 0.0 && prob <= 1.0, "out of range probability");
// If succ[i] is the FALSE branch, invert path info
if( get_node(i + eidx + 1)->Opcode() == Op_IfFalse ) {
return 1.0f - prob; // not taken
} else {
return prob; // taken
}
}
case Op_Jump:
// Divide the frequency between all successors evenly
return 1.0f/_num_succs;
case Op_Catch: {
const CatchProjNode *ci = get_node(i + eidx + 1)->as_CatchProj();
if (ci->_con == CatchProjNode::fall_through_index) {
// Fall-thru path gets the lion's share.
return 1.0f - PROB_UNLIKELY_MAG(5)*_num_succs;
} else {
// Presume exceptional paths are equally unlikely
return PROB_UNLIKELY_MAG(5);
}
}
case Op_Root:
case Op_Goto:
// Pass frequency straight thru to target
return 1.0f;
case Op_NeverBranch:
return 0.0f;
case Op_TailCall:
case Op_TailJump:
case Op_Return:
case Op_Halt:
case Op_Rethrow:
// Do not push out freq to root block
return 0.0f;
default:
ShouldNotReachHere();
}
return 0.0f;
}
//------------------------------num_fall_throughs-----------------------------
// Return the number of fall-through candidates for a block
int Block::num_fall_throughs() {
int eidx = end_idx();
Node *n = get_node(eidx); // Get ending Node
int op = n->Opcode();
if (n->is_Mach()) {
if (n->is_MachNullCheck()) {
// In theory, either side can fall-thru, for simplicity sake,
// let's say only the false branch can now.
return 1;
}
op = n->as_Mach()->ideal_Opcode();
}
// Switch on branch type
switch( op ) {
case Op_CountedLoopEnd:
case Op_If:
return 2;
case Op_Root:
case Op_Goto:
return 1;
case Op_Catch: {
for (uint i = 0; i < _num_succs; i++) {
const CatchProjNode *ci = get_node(i + eidx + 1)->as_CatchProj();
if (ci->_con == CatchProjNode::fall_through_index) {
return 1;
}
}
return 0;
}
case Op_Jump:
case Op_NeverBranch:
case Op_TailCall:
case Op_TailJump:
case Op_Return:
case Op_Halt:
case Op_Rethrow:
return 0;
default:
ShouldNotReachHere();
}
return 0;
}
//------------------------------succ_fall_through-----------------------------
// Return true if a specific successor could be fall-through target.
bool Block::succ_fall_through(uint i) {
int eidx = end_idx();
Node *n = get_node(eidx); // Get ending Node
int op = n->Opcode();
if (n->is_Mach()) {
if (n->is_MachNullCheck()) {
// In theory, either side can fall-thru, for simplicity sake,
// let's say only the false branch can now.
return get_node(i + eidx + 1)->Opcode() == Op_IfFalse;
}
op = n->as_Mach()->ideal_Opcode();
}
// Switch on branch type
switch( op ) {
case Op_CountedLoopEnd:
case Op_If:
case Op_Root:
case Op_Goto:
return true;
case Op_Catch: {
const CatchProjNode *ci = get_node(i + eidx + 1)->as_CatchProj();
return ci->_con == CatchProjNode::fall_through_index;
}
case Op_Jump:
case Op_NeverBranch:
case Op_TailCall:
case Op_TailJump:
case Op_Return:
case Op_Halt:
case Op_Rethrow:
return false;
default:
ShouldNotReachHere();
}
return false;
}
//------------------------------update_uncommon_branch------------------------
// Update the probability of a two-branch to be uncommon
void Block::update_uncommon_branch(Block* ub) {
int eidx = end_idx();
Node *n = get_node(eidx); // Get ending Node
int op = n->as_Mach()->ideal_Opcode();
assert(op == Op_CountedLoopEnd || op == Op_If, "must be a If");
assert(num_fall_throughs() == 2, "must be a two way branch block");
// Which successor is ub?
uint s;
for (s = 0; s <_num_succs; s++) {
if (_succs[s] == ub) break;
}
assert(s < 2, "uncommon successor must be found");
// If ub is the true path, make the proability small, else
// ub is the false path, and make the probability large
bool invert = (get_node(s + eidx + 1)->Opcode() == Op_IfFalse);
// Get existing probability
float p = n->as_MachIf()->_prob;
if (invert) p = 1.0 - p;
if (p > PROB_MIN) {
p = PROB_MIN;
}
if (invert) p = 1.0 - p;
n->as_MachIf()->_prob = p;
}
//------------------------------update_succ_freq-------------------------------
// Update the appropriate frequency associated with block 'b', a successor of
// a block in this loop.
void CFGLoop::update_succ_freq(Block* b, float freq) {
if (b->_loop == this) {
if (b == head()) {
// back branch within the loop
// Do nothing now, the loop carried frequency will be
// adjust later in scale_freq().
} else {
// simple branch within the loop
b->_freq += freq;
}
} else if (!in_loop_nest(b)) {
// branch is exit from this loop
BlockProbPair bpp(b, freq);
_exits.append(bpp);
} else {
// branch into nested loop
CFGLoop* ch = b->_loop;
ch->_freq += freq;
}
}
//------------------------------in_loop_nest-----------------------------------
// Determine if block b is in the receiver's loop nest.
bool CFGLoop::in_loop_nest(Block* b) {
int depth = _depth;
CFGLoop* b_loop = b->_loop;
int b_depth = b_loop->_depth;
if (depth == b_depth) {
return true;
}
while (b_depth > depth) {
b_loop = b_loop->_parent;
b_depth = b_loop->_depth;
}
return b_loop == this;
}
//------------------------------scale_freq-------------------------------------
// Scale frequency of loops and blocks by trip counts from outer loops
// Do a top down traversal of loop tree (visit outer loops first.)
void CFGLoop::scale_freq() {
float loop_freq = _freq * trip_count();
_freq = loop_freq;
for (int i = 0; i < _members.length(); i++) {
CFGElement* s = _members.at(i);
float block_freq = s->_freq * loop_freq;
if (g_isnan(block_freq) || block_freq < MIN_BLOCK_FREQUENCY)
block_freq = MIN_BLOCK_FREQUENCY;
s->_freq = block_freq;
}
CFGLoop* ch = _child;
while (ch != NULL) {
ch->scale_freq();
ch = ch->_sibling;
}
}
// Frequency of outer loop
float CFGLoop::outer_loop_freq() const {
if (_child != NULL) {
return _child->_freq;
}
return _freq;
}
#ifndef PRODUCT
//------------------------------dump_tree--------------------------------------
void CFGLoop::dump_tree() const {
dump();
if (_child != NULL) _child->dump_tree();
if (_sibling != NULL) _sibling->dump_tree();
}
//------------------------------dump-------------------------------------------
void CFGLoop::dump() const {
for (int i = 0; i < _depth; i++) tty->print(" ");
tty->print("%s: %d trip_count: %6.0f freq: %6.0f\n",
_depth == 0 ? "Method" : "Loop", _id, trip_count(), _freq);
for (int i = 0; i < _depth; i++) tty->print(" ");
tty->print(" members:");
int k = 0;
for (int i = 0; i < _members.length(); i++) {
if (k++ >= 6) {
tty->print("\n ");
for (int j = 0; j < _depth+1; j++) tty->print(" ");
k = 0;
}
CFGElement *s = _members.at(i);
if (s->is_block()) {
Block *b = s->as_Block();
tty->print(" B%d(%6.3f)", b->_pre_order, b->_freq);
} else {
CFGLoop* lp = s->as_CFGLoop();
tty->print(" L%d(%6.3f)", lp->_id, lp->_freq);
}
}
tty->print("\n");
for (int i = 0; i < _depth; i++) tty->print(" ");
tty->print(" exits: ");
k = 0;
for (int i = 0; i < _exits.length(); i++) {
if (k++ >= 7) {
tty->print("\n ");
for (int j = 0; j < _depth+1; j++) tty->print(" ");
k = 0;
}
Block *blk = _exits.at(i).get_target();
float prob = _exits.at(i).get_prob();
tty->print(" ->%d@%d%%", blk->_pre_order, (int)(prob*100));
}
tty->print("\n");
}
#endif
C:\hotspot-69087d08d473\src\share\vm/opto/generateOptoStub.cpp
/*
* Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
#include "precompiled.hpp"
#include "opto/addnode.hpp"
#include "opto/callnode.hpp"
#include "opto/cfgnode.hpp"
#include "opto/compile.hpp"
#include "opto/connode.hpp"
#include "opto/locknode.hpp"
#include "opto/memnode.hpp"
#include "opto/mulnode.hpp"
#include "opto/node.hpp"
#include "opto/parse.hpp"
#include "opto/phaseX.hpp"
#include "opto/rootnode.hpp"
#include "opto/runtime.hpp"
#include "opto/type.hpp"
//--------------------gen_stub-------------------------------
void GraphKit::gen_stub(address C_function,
const char *name,
int is_fancy_jump,
bool pass_tls,
bool return_pc) {
ResourceMark rm;
const TypeTuple *jdomain = C->tf()->domain();
const TypeTuple *jrange = C->tf()->range();
// The procedure start
StartNode* start = new (C) StartNode(root(), jdomain);
_gvn.set_type_bottom(start);
// Make a map, with JVM state
uint parm_cnt = jdomain->cnt();
uint max_map = MAX2(2*parm_cnt+1, jrange->cnt());
// %%% SynchronizationEntryBCI is redundant; use InvocationEntryBci in interfaces
assert(SynchronizationEntryBCI == InvocationEntryBci, "");
JVMState* jvms = new (C) JVMState(0);
jvms->set_bci(InvocationEntryBci);
jvms->set_monoff(max_map);
jvms->set_scloff(max_map);
jvms->set_endoff(max_map);
{
SafePointNode *map = new (C) SafePointNode( max_map, jvms );
jvms->set_map(map);
set_jvms(jvms);
assert(map == this->map(), "kit.map is set");
}
// Make up the parameters
uint i;
for( i = 0; i < parm_cnt; i++ )
map()->init_req(i, _gvn.transform(new (C) ParmNode(start, i)));
for( ; i<map()->req(); i++ )
map()->init_req(i, top()); // For nicer debugging
// GraphKit requires memory to be a MergeMemNode:
set_all_memory(map()->memory());
// Get base of thread-local storage area
Node* thread = _gvn.transform( new (C) ThreadLocalNode() );
const int NoAlias = Compile::AliasIdxBot;
Node* adr_last_Java_pc = basic_plus_adr(top(),
thread,
in_bytes(JavaThread::frame_anchor_offset()) +
in_bytes(JavaFrameAnchor::last_Java_pc_offset()));
#if defined(SPARC)
Node* adr_flags = basic_plus_adr(top(),
thread,
in_bytes(JavaThread::frame_anchor_offset()) +
in_bytes(JavaFrameAnchor::flags_offset()));
#endif /* defined(SPARC) */
// Drop in the last_Java_sp. last_Java_fp is not touched.
// Always do this after the other "last_Java_frame" fields are set since
// as soon as last_Java_sp != NULL the has_last_Java_frame is true and
// users will look at the other fields.
//
Node *adr_sp = basic_plus_adr(top(), thread, in_bytes(JavaThread::last_Java_sp_offset()));
Node *last_sp = basic_plus_adr(top(), frameptr(), (intptr_t) STACK_BIAS);
store_to_memory(NULL, adr_sp, last_sp, T_ADDRESS, NoAlias, MemNode::unordered);
// Set _thread_in_native
// The order of stores into TLS is critical! Setting _thread_in_native MUST
// be last, because a GC is allowed at any time after setting it and the GC
// will require last_Java_pc and last_Java_sp.
//-----------------------------
// Compute signature for C call. Varies from the Java signature!
const Type **fields = TypeTuple::fields(2*parm_cnt+2);
uint cnt = TypeFunc::Parms;
// The C routines gets the base of thread-local storage passed in as an
// extra argument. Not all calls need it, but its cheap to add here.
for (uint pcnt = cnt; pcnt < parm_cnt; pcnt++, cnt++) {
// Convert ints to longs if required.
if (CCallingConventionRequiresIntsAsLongs && jdomain->field_at(pcnt)->isa_int()) {
fields[cnt++] = TypeLong::LONG;
fields[cnt] = Type::HALF; // must add an additional half for a long
} else {
fields[cnt] = jdomain->field_at(pcnt);
}
}
fields[cnt++] = TypeRawPtr::BOTTOM; // Thread-local storage
// Also pass in the caller's PC, if asked for.
if( return_pc )
fields[cnt++] = TypeRawPtr::BOTTOM; // Return PC
const TypeTuple* domain = TypeTuple::make(cnt,fields);
// The C routine we are about to call cannot return an oop; it can block on
// exit and a GC will trash the oop while it sits in C-land. Instead, we
// return the oop through TLS for runtime calls.
// Also, C routines returning integer subword values leave the high
// order bits dirty; these must be cleaned up by explicit sign extension.
const Type* retval = (jrange->cnt() == TypeFunc::Parms) ? Type::TOP : jrange->field_at(TypeFunc::Parms);
// Make a private copy of jrange->fields();
const Type **rfields = TypeTuple::fields(jrange->cnt() - TypeFunc::Parms);
// Fixup oop returns
int retval_ptr = retval->isa_oop_ptr();
if( retval_ptr ) {
assert( pass_tls, "Oop must be returned thru TLS" );
// Fancy-jumps return address; others return void
rfields[TypeFunc::Parms] = is_fancy_jump ? TypeRawPtr::BOTTOM : Type::TOP;
} else if( retval->isa_int() ) { // Returning any integer subtype?
// "Fatten" byte, char & short return types to 'int' to show that
// the native C code can return values with junk high order bits.
// We'll sign-extend it below later.
rfields[TypeFunc::Parms] = TypeInt::INT; // It's "dirty" and needs sign-ext
} else if( jrange->cnt() >= TypeFunc::Parms+1 ) { // Else copy other types
rfields[TypeFunc::Parms] = jrange->field_at(TypeFunc::Parms);
if( jrange->cnt() == TypeFunc::Parms+2 )
rfields[TypeFunc::Parms+1] = jrange->field_at(TypeFunc::Parms+1);
}
const TypeTuple* range = TypeTuple::make(jrange->cnt(),rfields);
// Final C signature
const TypeFunc *c_sig = TypeFunc::make(domain,range);
//-----------------------------
// Make the call node
CallRuntimeNode *call = new (C)
CallRuntimeNode(c_sig, C_function, name, TypePtr::BOTTOM);
//-----------------------------
// Fix-up the debug info for the call
call->set_jvms( new (C) JVMState(0) );
call->jvms()->set_bci(0);
call->jvms()->set_offsets(cnt);
// Set fixed predefined input arguments
cnt = 0;
for (i = 0; i < TypeFunc::Parms; i++)
call->init_req(cnt++, map()->in(i));
// A little too aggressive on the parm copy; return address is not an input
call->set_req(TypeFunc::ReturnAdr, top());
for (; i < parm_cnt; i++) { // Regular input arguments
// Convert ints to longs if required.
if (CCallingConventionRequiresIntsAsLongs && jdomain->field_at(i)->isa_int()) {
Node* int_as_long = _gvn.transform(new (C) ConvI2LNode(map()->in(i)));
call->init_req(cnt++, int_as_long); // long
call->init_req(cnt++, top()); // half
} else {
call->init_req(cnt++, map()->in(i));
}
}
call->init_req( cnt++, thread );
if( return_pc ) // Return PC, if asked for
call->init_req( cnt++, returnadr() );
_gvn.transform_no_reclaim(call);
//-----------------------------
// Now set up the return results
set_control( _gvn.transform( new (C) ProjNode(call,TypeFunc::Control)) );
set_i_o( _gvn.transform( new (C) ProjNode(call,TypeFunc::I_O )) );
set_all_memory_call(call);
if (range->cnt() > TypeFunc::Parms) {
Node* retnode = _gvn.transform( new (C) ProjNode(call,TypeFunc::Parms) );
// C-land is allowed to return sub-word values. Convert to integer type.
assert( retval != Type::TOP, "" );
if (retval == TypeInt::BOOL) {
retnode = _gvn.transform( new (C) AndINode(retnode, intcon(0xFF)) );
} else if (retval == TypeInt::CHAR) {
retnode = _gvn.transform( new (C) AndINode(retnode, intcon(0xFFFF)) );
} else if (retval == TypeInt::BYTE) {
retnode = _gvn.transform( new (C) LShiftINode(retnode, intcon(24)) );
retnode = _gvn.transform( new (C) RShiftINode(retnode, intcon(24)) );
} else if (retval == TypeInt::SHORT) {
retnode = _gvn.transform( new (C) LShiftINode(retnode, intcon(16)) );
retnode = _gvn.transform( new (C) RShiftINode(retnode, intcon(16)) );
}
map()->set_req( TypeFunc::Parms, retnode );
}
//-----------------------------
// Clear last_Java_sp
store_to_memory(NULL, adr_sp, null(), T_ADDRESS, NoAlias, MemNode::unordered);
// Clear last_Java_pc and (optionally)_flags
store_to_memory(NULL, adr_last_Java_pc, null(), T_ADDRESS, NoAlias, MemNode::unordered);
#if defined(SPARC)
store_to_memory(NULL, adr_flags, intcon(0), T_INT, NoAlias, MemNode::unordered);
#endif /* defined(SPARC) */
#if (defined(IA64) && !defined(AIX))
Node* adr_last_Java_fp = basic_plus_adr(top(), thread, in_bytes(JavaThread::last_Java_fp_offset()));
store_to_memory(NULL, adr_last_Java_fp, null(), T_ADDRESS, NoAlias, MemNode::unordered);
#endif
// For is-fancy-jump, the C-return value is also the branch target
Node* target = map()->in(TypeFunc::Parms);
// Runtime call returning oop in TLS? Fetch it out
if( pass_tls ) {
Node* adr = basic_plus_adr(top(), thread, in_bytes(JavaThread::vm_result_offset()));
Node* vm_result = make_load(NULL, adr, TypeOopPtr::BOTTOM, T_OBJECT, NoAlias, MemNode::unordered);
map()->set_req(TypeFunc::Parms, vm_result); // vm_result passed as result
// clear thread-local-storage(tls)
store_to_memory(NULL, adr, null(), T_ADDRESS, NoAlias, MemNode::unordered);
}
//-----------------------------
// check exception
Node* adr = basic_plus_adr(top(), thread, in_bytes(Thread::pending_exception_offset()));
Node* pending = make_load(NULL, adr, TypeOopPtr::BOTTOM, T_OBJECT, NoAlias, MemNode::unordered);
Node* exit_memory = reset_memory();
Node* cmp = _gvn.transform( new (C) CmpPNode(pending, null()) );
Node* bo = _gvn.transform( new (C) BoolNode(cmp, BoolTest::ne) );
IfNode *iff = create_and_map_if(control(), bo, PROB_MIN, COUNT_UNKNOWN);
Node* if_null = _gvn.transform( new (C) IfFalseNode(iff) );
Node* if_not_null = _gvn.transform( new (C) IfTrueNode(iff) );
assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before");
Node *exc_target = makecon(TypeRawPtr::make( StubRoutines::forward_exception_entry() ));
Node *to_exc = new (C) TailCallNode(if_not_null,
i_o(),
exit_memory,
frameptr(),
returnadr(),
exc_target, null());
root()->add_req(_gvn.transform(to_exc)); // bind to root to keep live
C->init_start(start);
//-----------------------------
// If this is a normal subroutine return, issue the return and be done.
Node *ret = NULL;
switch( is_fancy_jump ) {
case 0: // Make a return instruction
// Return to caller, free any space for return address
ret = new (C) ReturnNode(TypeFunc::Parms, if_null,
i_o(),
exit_memory,
frameptr(),
returnadr());
if (C->tf()->range()->cnt() > TypeFunc::Parms)
ret->add_req( map()->in(TypeFunc::Parms) );
break;
case 1: // This is a fancy tail-call jump. Jump to computed address.
// Jump to new callee; leave old return address alone.
ret = new (C) TailCallNode(if_null,
i_o(),
exit_memory,
frameptr(),
returnadr(),
target, map()->in(TypeFunc::Parms));
break;
case 2: // Pop return address & jump
// Throw away old return address; jump to new computed address
//assert(C_function == CAST_FROM_FN_PTR(address, OptoRuntime::rethrow_C), "fancy_jump==2 only for rethrow");
ret = new (C) TailJumpNode(if_null,
i_o(),
exit_memory,
frameptr(),
target, map()->in(TypeFunc::Parms));
break;
default:
ShouldNotReachHere();
}
root()->add_req(_gvn.transform(ret));
}
C:\hotspot-69087d08d473\src\share\vm/opto/graphKit.cpp
/*
* Copyright (c) 2001, 2015, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
sssssssss62
最新推荐文章于 2024-08-02 17:52:30 发布