class Matcher;
class Node;
class RegionNode;
class TypeNode;
class PhiNode;
class GotoNode;
class MultiNode;
class MultiBranchNode;
class IfNode;
class PCTableNode;
class JumpNode;
class CatchNode;
class NeverBranchNode;
class ProjNode;
class CProjNode;
class IfTrueNode;
class IfFalseNode;
class CatchProjNode;
class JProjNode;
class JumpProjNode;
class SCMemProjNode;
class PhaseIdealLoop;
//------------------------------RegionNode-------------------------------------
// The class of RegionNodes, which can be mapped to basic blocks in the
// program. Their inputs point to Control sources. PhiNodes (described
// below) have an input point to a RegionNode. Merged data inputs to PhiNodes
// correspond 1-to-1 with RegionNode inputs. The zero input of a PhiNode is
// the RegionNode, and the zero input of the RegionNode is itself.
class RegionNode : public Node {
public:
// Node layout (parallels PhiNode):
enum { Region, // Generally points to self.
Control // Control arcs are [1..len)
};
RegionNode( uint required ) : Node(required) {
init_class_id(Class_Region);
init_req(0,this);
}
Node* is_copy() const {
const Node* r = _in[Region];
if (r == NULL)
return nonnull_req();
return NULL; // not a copy!
}
PhiNode* has_phi() const; // returns an arbitrary phi user, or NULL
PhiNode* has_unique_phi() const; // returns the unique phi user, or NULL
// Is this region node unreachable from root?
bool is_unreachable_region(PhaseGVN *phase) const;
virtual int Opcode() const;
virtual bool pinned() const { return (const Node *)in(0) == this; }
virtual bool is_CFG () const { return true; }
virtual uint hash() const { return NO_HASH; } // CFG nodes do not hash
virtual bool depends_only_on_test() const { return false; }
virtual const Type *bottom_type() const { return Type::CONTROL; }
virtual const Type *Value( PhaseTransform *phase ) const;
virtual Node *Identity( PhaseTransform *phase );
virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
virtual const RegMask &out_RegMask() const;
bool try_clean_mem_phi(PhaseGVN *phase);
};
//------------------------------JProjNode--------------------------------------
// jump projection for node that produces multiple control-flow paths
class JProjNode : public ProjNode {
public:
JProjNode( Node* ctrl, uint idx ) : ProjNode(ctrl,idx) {}
virtual int Opcode() const;
virtual bool is_CFG() const { return true; }
virtual uint hash() const { return NO_HASH; } // CFG nodes do not hash
virtual const Node* is_block_proj() const { return in(0); }
virtual const RegMask& out_RegMask() const;
virtual uint ideal_reg() const { return 0; }
};
//------------------------------PhiNode----------------------------------------
// PhiNodes merge values from different Control paths. Slot 0 points to the
// controlling RegionNode. Other slots map 1-for-1 with incoming control flow
// paths to the RegionNode. For speed reasons (to avoid another pass) we
// can turn PhiNodes into copys in-place by NULL'ing out their RegionNode
// input in slot 0.
class PhiNode : public TypeNode {
const TypePtr* const _adr_type; // non-null only for Type::MEMORY nodes.
// The following fields are only used for data PhiNodes to indicate
// that the PhiNode represents the value of a known instance field.
int _inst_mem_id; // Instance memory id (node index of the memory Phi)
const int _inst_id; // Instance id of the memory slice.
const int _inst_index; // Alias index of the instance memory slice.
// Array elements references have the same alias_idx but different offset.
const int _inst_offset; // Offset of the instance memory slice.
// Size is bigger to hold the _adr_type field.
virtual uint hash() const; // Check the type
virtual uint cmp( const Node &n ) const;
virtual uint size_of() const { return sizeof(*this); }
// Determine if CMoveNode::is_cmove_id can be used at this join point.
Node* is_cmove_id(PhaseTransform* phase, int true_path);
public:
// Node layout (parallels RegionNode):
enum { Region, // Control input is the Phi's region.
Input // Input values are [1..len)
};
PhiNode( Node *r, const Type *t, const TypePtr* at = NULL,
const int imid = -1,
const int iid = TypeOopPtr::InstanceTop,
const int iidx = Compile::AliasIdxTop,
const int ioffs = Type::OffsetTop )
: TypeNode(t,r->req()),
_adr_type(at),
_inst_mem_id(imid),
_inst_id(iid),
_inst_index(iidx),
_inst_offset(ioffs)
{
init_class_id(Class_Phi);
init_req(0, r);
verify_adr_type();
}
// create a new phi with in edges matching r and set (initially) to x
static PhiNode* make( Node* r, Node* x );
// extra type arguments override the new phi's bottom_type and adr_type
static PhiNode* make( Node* r, Node* x, const Type *t, const TypePtr* at = NULL );
// create a new phi with narrowed memory type
PhiNode* slice_memory(const TypePtr* adr_type) const;
PhiNode* split_out_instance(const TypePtr* at, PhaseIterGVN *igvn) const;
// like make(r, x), but does not initialize the in edges to x
static PhiNode* make_blank( Node* r, Node* x );
// Accessors
RegionNode* region() const { Node* r = in(Region); assert(!r || r->is_Region(), ""); return (RegionNode*)r; }
Node* is_copy() const {
// The node is a real phi if _in[0] is a Region node.
DEBUG_ONLY(const Node* r = _in[Region];)
assert(r != NULL && r->is_Region(), "Not valid control");
return NULL; // not a copy!
}
bool is_tripcount() const;
// Determine a unique non-trivial input, if any.
// Ignore casts if it helps. Return NULL on failure.
Node* unique_input(PhaseTransform *phase);
// Check for a simple dead loop.
enum LoopSafety { Safe = 0, Unsafe, UnsafeLoop };
LoopSafety simple_data_loop_check(Node *in) const;
// Is it unsafe data loop? It becomes a dead loop if this phi node removed.
bool is_unsafe_data_reference(Node *in) const;
int is_diamond_phi(bool check_control_only = false) const;
virtual int Opcode() const;
virtual bool pinned() const { return in(0) != 0; }
virtual const TypePtr *adr_type() const { verify_adr_type(true); return _adr_type; }
void set_inst_mem_id(int inst_mem_id) { _inst_mem_id = inst_mem_id; }
const int inst_mem_id() const { return _inst_mem_id; }
const int inst_id() const { return _inst_id; }
const int inst_index() const { return _inst_index; }
const int inst_offset() const { return _inst_offset; }
bool is_same_inst_field(const Type* tp, int mem_id, int id, int index, int offset) {
return type()->basic_type() == tp->basic_type() &&
inst_mem_id() == mem_id &&
inst_id() == id &&
inst_index() == index &&
inst_offset() == offset &&
type()->higher_equal(tp);
}
virtual const Type *Value( PhaseTransform *phase ) const;
virtual Node *Identity( PhaseTransform *phase );
virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
virtual const RegMask &out_RegMask() const;
virtual const RegMask &in_RegMask(uint) const;
#ifndef PRODUCT
virtual void dump_spec(outputStream *st) const;
#endif
#ifdef ASSERT
void verify_adr_type(VectorSet& visited, const TypePtr* at) const;
void verify_adr_type(bool recursive = false) const;
#else //ASSERT
void verify_adr_type(bool recursive = false) const {}
#endif //ASSERT
};
//------------------------------GotoNode---------------------------------------
// GotoNodes perform direct branches.
class GotoNode : public Node {
public:
GotoNode( Node *control ) : Node(control) {}
virtual int Opcode() const;
virtual bool pinned() const { return true; }
virtual bool is_CFG() const { return true; }
virtual uint hash() const { return NO_HASH; } // CFG nodes do not hash
virtual const Node *is_block_proj() const { return this; }
virtual bool depends_only_on_test() const { return false; }
virtual const Type *bottom_type() const { return Type::CONTROL; }
virtual const Type *Value( PhaseTransform *phase ) const;
virtual Node *Identity( PhaseTransform *phase );
virtual const RegMask &out_RegMask() const;
};
//------------------------------CProjNode--------------------------------------
// control projection for node that produces multiple control-flow paths
class CProjNode : public ProjNode {
public:
CProjNode( Node *ctrl, uint idx ) : ProjNode(ctrl,idx) {}
virtual int Opcode() const;
virtual bool is_CFG() const { return true; }
virtual uint hash() const { return NO_HASH; } // CFG nodes do not hash
virtual const Node *is_block_proj() const { return in(0); }
virtual const RegMask &out_RegMask() const;
virtual uint ideal_reg() const { return 0; }
};
//---------------------------MultiBranchNode-----------------------------------
// This class defines a MultiBranchNode, a MultiNode which yields multiple
// control values. These are distinguished from other types of MultiNodes
// which yield multiple values, but control is always and only projection #0.
class MultiBranchNode : public MultiNode {
public:
MultiBranchNode( uint required ) : MultiNode(required) {
init_class_id(Class_MultiBranch);
}
// returns required number of users to be well formed.
virtual int required_outcnt() const = 0;
};
//------------------------------IfNode-----------------------------------------
// Output selected Control, based on a boolean test
class IfNode : public MultiBranchNode {
// Size is bigger to hold the probability field. However, _prob does not
// change the semantics so it does not appear in the hash & cmp functions.
virtual uint size_of() const { return sizeof(*this); }
public:
// Degrees of branch prediction probability by order of magnitude:
// PROB_UNLIKELY_1e(N) is a 1 in 1eN chance.
// PROB_LIKELY_1e(N) is a 1 - PROB_UNLIKELY_1e(N)
#define PROB_UNLIKELY_MAG(N) (1e- ## N ## f)
#define PROB_LIKELY_MAG(N) (1.0f-PROB_UNLIKELY_MAG(N))
// Maximum and minimum branch prediction probabilties
// 1 in 1,000,000 (magnitude 6)
//
// Although PROB_NEVER == PROB_MIN and PROB_ALWAYS == PROB_MAX
// they are used to distinguish different situations:
//
// The name PROB_MAX (PROB_MIN) is for probabilities which correspond to
// very likely (unlikely) but with a concrete possibility of a rare
// contrary case. These constants would be used for pinning
// measurements, and as measures for assertions that have high
// confidence, but some evidence of occasional failure.
//
// The name PROB_ALWAYS (PROB_NEVER) is to stand for situations for which
// there is no evidence at all that the contrary case has ever occurred.
#define PROB_NEVER PROB_UNLIKELY_MAG(6)
#define PROB_ALWAYS PROB_LIKELY_MAG(6)
#define PROB_MIN PROB_UNLIKELY_MAG(6)
#define PROB_MAX PROB_LIKELY_MAG(6)
// Static branch prediction probabilities
// 1 in 10 (magnitude 1)
#define PROB_STATIC_INFREQUENT PROB_UNLIKELY_MAG(1)
#define PROB_STATIC_FREQUENT PROB_LIKELY_MAG(1)
// Fair probability 50/50
#define PROB_FAIR (0.5f)
// Unknown probability sentinel
#define PROB_UNKNOWN (-1.0f)
// Probability "constructors", to distinguish as a probability any manifest
// constant without a names
#define PROB_LIKELY(x) ((float) (x))
#define PROB_UNLIKELY(x) (1.0f - (float)(x))
// Other probabilities in use, but without a unique name, are documented
// here for lack of a better place:
//
// 1 in 1000 probabilities (magnitude 3):
// threshold for converting to conditional move
// likelihood of null check failure if a null HAS been seen before
// likelihood of slow path taken in library calls
//
// 1 in 10,000 probabilities (magnitude 4):
// threshold for making an uncommon trap probability more extreme
// threshold for for making a null check implicit
// likelihood of needing a gc if eden top moves during an allocation
// likelihood of a predicted call failure
//
// 1 in 100,000 probabilities (magnitude 5):
// threshold for ignoring counts when estimating path frequency
// likelihood of FP clipping failure
// likelihood of catching an exception from a try block
// likelihood of null check failure if a null has NOT been seen before
//
// Magic manifest probabilities such as 0.83, 0.7, ... can be found in
// gen_subtype_check() and catch_inline_exceptions().
float _prob; // Probability of true path being taken.
float _fcnt; // Frequency counter
IfNode( Node *control, Node *b, float p, float fcnt )
: MultiBranchNode(2), _prob(p), _fcnt(fcnt) {
init_class_id(Class_If);
init_req(0,control);
init_req(1,b);
}
virtual int Opcode() const;
virtual bool pinned() const { return true; }
virtual const Type *bottom_type() const { return TypeTuple::IFBOTH; }
virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
virtual const Type *Value( PhaseTransform *phase ) const;
virtual int required_outcnt() const { return 2; }
virtual const RegMask &out_RegMask() const;
void dominated_by(Node* prev_dom, PhaseIterGVN* igvn);
int is_range_check(Node* &range, Node* &index, jint &offset);
Node* fold_compares(PhaseGVN* phase);
static Node* up_one_dom(Node* curr, bool linear_only = false);
// Takes the type of val and filters it through the test represented
// by if_proj and returns a more refined type if one is produced.
// Returns NULL is it couldn't improve the type.
static const TypeInt* filtered_int_type(PhaseGVN* phase, Node* val, Node* if_proj);
#ifndef PRODUCT
virtual void dump_spec(outputStream *st) const;
#endif
};
class IfTrueNode : public CProjNode {
public:
IfTrueNode( IfNode *ifnode ) : CProjNode(ifnode,1) {
init_class_id(Class_IfTrue);
}
virtual int Opcode() const;
virtual Node *Identity( PhaseTransform *phase );
};
class IfFalseNode : public CProjNode {
public:
IfFalseNode( IfNode *ifnode ) : CProjNode(ifnode,0) {
init_class_id(Class_IfFalse);
}
virtual int Opcode() const;
virtual Node *Identity( PhaseTransform *phase );
};
//------------------------------PCTableNode------------------------------------
// Build an indirect branch table. Given a control and a table index,
// control is passed to the Projection matching the table index. Used to
// implement switch statements and exception-handling capabilities.
// Undefined behavior if passed-in index is not inside the table.
class PCTableNode : public MultiBranchNode {
virtual uint hash() const; // Target count; table size
virtual uint cmp( const Node &n ) const;
virtual uint size_of() const { return sizeof(*this); }
public:
const uint _size; // Number of targets
PCTableNode( Node *ctrl, Node *idx, uint size ) : MultiBranchNode(2), _size(size) {
init_class_id(Class_PCTable);
init_req(0, ctrl);
init_req(1, idx);
}
virtual int Opcode() const;
virtual const Type *Value( PhaseTransform *phase ) const;
virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
virtual const Type *bottom_type() const;
virtual bool pinned() const { return true; }
virtual int required_outcnt() const { return _size; }
};
//------------------------------JumpNode---------------------------------------
// Indirect branch. Uses PCTable above to implement a switch statement.
// It emits as a table load and local branch.
class JumpNode : public PCTableNode {
public:
JumpNode( Node* control, Node* switch_val, uint size) : PCTableNode(control, switch_val, size) {
init_class_id(Class_Jump);
}
virtual int Opcode() const;
virtual const RegMask& out_RegMask() const;
virtual const Node* is_block_proj() const { return this; }
};
class JumpProjNode : public JProjNode {
virtual uint hash() const;
virtual uint cmp( const Node &n ) const;
virtual uint size_of() const { return sizeof(*this); }
private:
const int _dest_bci;
const uint _proj_no;
const int _switch_val;
public:
JumpProjNode(Node* jumpnode, uint proj_no, int dest_bci, int switch_val)
: JProjNode(jumpnode, proj_no), _dest_bci(dest_bci), _proj_no(proj_no), _switch_val(switch_val) {
init_class_id(Class_JumpProj);
}
virtual int Opcode() const;
virtual const Type* bottom_type() const { return Type::CONTROL; }
int dest_bci() const { return _dest_bci; }
int switch_val() const { return _switch_val; }
uint proj_no() const { return _proj_no; }
#ifndef PRODUCT
virtual void dump_spec(outputStream *st) const;
#endif
};
//------------------------------CatchNode--------------------------------------
// Helper node to fork exceptions. "Catch" catches any exceptions thrown by
// a just-prior call. Looks like a PCTableNode but emits no code - just the
// table. The table lookup and branch is implemented by RethrowNode.
class CatchNode : public PCTableNode {
public:
CatchNode( Node *ctrl, Node *idx, uint size ) : PCTableNode(ctrl,idx,size){
init_class_id(Class_Catch);
}
virtual int Opcode() const;
virtual const Type *Value( PhaseTransform *phase ) const;
};
// CatchProjNode controls which exception handler is targetted after a call.
// It is passed in the bci of the target handler, or no_handler_bci in case
// the projection doesn't lead to an exception handler.
class CatchProjNode : public CProjNode {
virtual uint hash() const;
virtual uint cmp( const Node &n ) const;
virtual uint size_of() const { return sizeof(*this); }
private:
const int _handler_bci;
public:
enum {
fall_through_index = 0, // the fall through projection index
catch_all_index = 1, // the projection index for catch-alls
no_handler_bci = -1 // the bci for fall through or catch-all projs
};
CatchProjNode(Node* catchnode, uint proj_no, int handler_bci)
: CProjNode(catchnode, proj_no), _handler_bci(handler_bci) {
init_class_id(Class_CatchProj);
assert(proj_no != fall_through_index || handler_bci < 0, "fall through case must have bci < 0");
}
virtual int Opcode() const;
virtual Node *Identity( PhaseTransform *phase );
virtual const Type *bottom_type() const { return Type::CONTROL; }
int handler_bci() const { return _handler_bci; }
bool is_handler_proj() const { return _handler_bci >= 0; }
#ifndef PRODUCT
virtual void dump_spec(outputStream *st) const;
#endif
};
//---------------------------------CreateExNode--------------------------------
// Helper node to create the exception coming back from a call
class CreateExNode : public TypeNode {
public:
CreateExNode(const Type* t, Node* control, Node* i_o) : TypeNode(t, 2) {
init_req(0, control);
init_req(1, i_o);
}
virtual int Opcode() const;
virtual Node *Identity( PhaseTransform *phase );
virtual bool pinned() const { return true; }
uint match_edge(uint idx) const { return 0; }
virtual uint ideal_reg() const { return Op_RegP; }
};
//------------------------------NeverBranchNode-------------------------------
// The never-taken branch. Used to give the appearance of exiting infinite
// loops to those algorithms that like all paths to be reachable. Encodes
// empty.
class NeverBranchNode : public MultiBranchNode {
public:
NeverBranchNode( Node *ctrl ) : MultiBranchNode(1) { init_req(0,ctrl); }
virtual int Opcode() const;
virtual bool pinned() const { return true; };
virtual const Type *bottom_type() const { return TypeTuple::IFBOTH; }
virtual const Type *Value( PhaseTransform *phase ) const;
virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
virtual int required_outcnt() const { return 2; }
virtual void emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { }
virtual uint size(PhaseRegAlloc *ra_) const { return 0; }
#ifndef PRODUCT
virtual void format( PhaseRegAlloc *, outputStream *st ) const;
#endif
};
#endif // SHARE_VM_OPTO_CFGNODE_HPP
C:\hotspot-69087d08d473\src\share\vm/opto/chaitin.cpp
/*
* Copyright (c) 2000, 2018, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
#include "precompiled.hpp"
#include "compiler/compileLog.hpp"
#include "compiler/oopMap.hpp"
#include "memory/allocation.inline.hpp"
#include "opto/addnode.hpp"
#include "opto/block.hpp"
#include "opto/callnode.hpp"
#include "opto/cfgnode.hpp"
#include "opto/chaitin.hpp"
#include "opto/coalesce.hpp"
#include "opto/connode.hpp"
#include "opto/idealGraphPrinter.hpp"
#include "opto/indexSet.hpp"
#include "opto/machnode.hpp"
#include "opto/memnode.hpp"
#include "opto/opcodes.hpp"
#include "opto/rootnode.hpp"
#ifndef PRODUCT
void LRG::dump() const {
ttyLocker ttyl;
tty->print("%d ",num_regs());
_mask.dump();
if( _msize_valid ) {
if( mask_size() == compute_mask_size() ) tty->print(", #%d ",_mask_size);
else tty->print(", #!!!_%d_vs_%d ",_mask_size,_mask.Size());
} else {
tty->print(", #?(%d) ",_mask.Size());
}
tty->print("EffDeg: ");
if( _degree_valid ) tty->print( "%d ", _eff_degree );
else tty->print("? ");
if( is_multidef() ) {
tty->print("MultiDef ");
if (_defs != NULL) {
tty->print("(");
for (int i = 0; i < _defs->length(); i++) {
tty->print("N%d ", _defs->at(i)->_idx);
}
tty->print(") ");
}
}
else if( _def == 0 ) tty->print("Dead ");
else tty->print("Def: N%d ",_def->_idx);
tty->print("Cost:%4.2g Area:%4.2g Score:%4.2g ",_cost,_area, score());
// Flags
if( _is_oop ) tty->print("Oop ");
if( _is_float ) tty->print("Float ");
if( _is_vector ) tty->print("Vector ");
if( _was_spilled1 ) tty->print("Spilled ");
if( _was_spilled2 ) tty->print("Spilled2 ");
if( _direct_conflict ) tty->print("Direct_conflict ");
if( _fat_proj ) tty->print("Fat ");
if( _was_lo ) tty->print("Lo ");
if( _has_copy ) tty->print("Copy ");
if( _at_risk ) tty->print("Risk ");
if( _must_spill ) tty->print("Must_spill ");
if( _is_bound ) tty->print("Bound ");
if( _msize_valid ) {
if( _degree_valid && lo_degree() ) tty->print("Trivial ");
}
tty->cr();
}
#endif
// Compute score from cost and area. Low score is best to spill.
static double raw_score( double cost, double area ) {
return cost - (area*RegisterCostAreaRatio) * 1.52588e-5;
}
double LRG::score() const {
// Scale _area by RegisterCostAreaRatio/64K then subtract from cost.
// Bigger area lowers score, encourages spilling this live range.
// Bigger cost raise score, prevents spilling this live range.
// (Note: 1/65536 is the magic constant below; I dont trust the C optimizer
// to turn a divide by a constant into a multiply by the reciprical).
double score = raw_score( _cost, _area);
// Account for area. Basically, LRGs covering large areas are better
// to spill because more other LRGs get freed up.
if( _area == 0.0 ) // No area? Then no progress to spill
return 1e35;
if( _was_spilled2 ) // If spilled once before, we are unlikely
return score + 1e30; // to make progress again.
if( _cost >= _area*3.0 ) // Tiny area relative to cost
return score + 1e17; // Probably no progress to spill
if( (_cost+_cost) >= _area*3.0 ) // Small area relative to cost
return score + 1e10; // Likely no progress to spill
return score;
}
#define NUMBUCKS 3
// Straight out of Tarjan's union-find algorithm
uint LiveRangeMap::find_compress(uint lrg) {
uint cur = lrg;
uint next = _uf_map.at(cur);
while (next != cur) { // Scan chain of equivalences
assert( next < cur, "always union smaller");
cur = next; // until find a fixed-point
next = _uf_map.at(cur);
}
// Core of union-find algorithm: update chain of
// equivalences to be equal to the root.
while (lrg != next) {
uint tmp = _uf_map.at(lrg);
_uf_map.at_put(lrg, next);
lrg = tmp;
}
return lrg;
}
// Reset the Union-Find map to identity
void LiveRangeMap::reset_uf_map(uint max_lrg_id) {
_max_lrg_id= max_lrg_id;
// Force the Union-Find mapping to be at least this large
_uf_map.at_put_grow(_max_lrg_id, 0);
// Initialize it to be the ID mapping.
for (uint i = 0; i < _max_lrg_id; ++i) {
_uf_map.at_put(i, i);
}
}
// Make all Nodes map directly to their final live range; no need for
// the Union-Find mapping after this call.
void LiveRangeMap::compress_uf_map_for_nodes() {
// For all Nodes, compress mapping
uint unique = _names.length();
for (uint i = 0; i < unique; ++i) {
uint lrg = _names.at(i);
uint compressed_lrg = find(lrg);
if (lrg != compressed_lrg) {
_names.at_put(i, compressed_lrg);
}
}
}
// Like Find above, but no path compress, so bad asymptotic behavior
uint LiveRangeMap::find_const(uint lrg) const {
if (!lrg) {
return lrg; // Ignore the zero LRG
}
// Off the end? This happens during debugging dumps when you got
// brand new live ranges but have not told the allocator yet.
if (lrg >= _max_lrg_id) {
return lrg;
}
uint next = _uf_map.at(lrg);
while (next != lrg) { // Scan chain of equivalences
assert(next < lrg, "always union smaller");
lrg = next; // until find a fixed-point
next = _uf_map.at(lrg);
}
return next;
}
PhaseChaitin::PhaseChaitin(uint unique, PhaseCFG &cfg, Matcher &matcher)
: PhaseRegAlloc(unique, cfg, matcher,
#ifndef PRODUCT
print_chaitin_statistics
#else
NULL
#endif
)
, _lrg_map(Thread::current()->resource_area(), unique)
, _live(0)
, _spilled_once(Thread::current()->resource_area())
, _spilled_twice(Thread::current()->resource_area())
, _lo_degree(0), _lo_stk_degree(0), _hi_degree(0), _simplified(0)
, _oldphi(unique)
#ifndef PRODUCT
, _trace_spilling(TraceSpilling || C->method_has_option("TraceSpilling"))
#endif
{
NOT_PRODUCT( Compile::TracePhase t3("ctorChaitin", &_t_ctorChaitin, TimeCompiler); )
_high_frequency_lrg = MIN2(float(OPTO_LRG_HIGH_FREQ), _cfg.get_outer_loop_frequency());
// Build a list of basic blocks, sorted by frequency
_blks = NEW_RESOURCE_ARRAY(Block *, _cfg.number_of_blocks());
// Experiment with sorting strategies to speed compilation
double cutoff = BLOCK_FREQUENCY(1.0); // Cutoff for high frequency bucket
Block **buckets[NUMBUCKS]; // Array of buckets
uint buckcnt[NUMBUCKS]; // Array of bucket counters
double buckval[NUMBUCKS]; // Array of bucket value cutoffs
for (uint i = 0; i < NUMBUCKS; i++) {
buckets[i] = NEW_RESOURCE_ARRAY(Block *, _cfg.number_of_blocks());
buckcnt[i] = 0;
// Bump by three orders of magnitude each time
cutoff *= 0.001;
buckval[i] = cutoff;
for (uint j = 0; j < _cfg.number_of_blocks(); j++) {
buckets[i][j] = NULL;
}
}
// Sort blocks into buckets
for (uint i = 0; i < _cfg.number_of_blocks(); i++) {
for (uint j = 0; j < NUMBUCKS; j++) {
if ((j == NUMBUCKS - 1) || (_cfg.get_block(i)->_freq > buckval[j])) {
// Assign block to end of list for appropriate bucket
buckets[j][buckcnt[j]++] = _cfg.get_block(i);
break; // kick out of inner loop
}
}
}
// Dump buckets into final block array
uint blkcnt = 0;
for (uint i = 0; i < NUMBUCKS; i++) {
for (uint j = 0; j < buckcnt[i]; j++) {
_blks[blkcnt++] = buckets[i][j];
}
}
assert(blkcnt == _cfg.number_of_blocks(), "Block array not totally filled");
}
// union 2 sets together.
void PhaseChaitin::Union( const Node *src_n, const Node *dst_n ) {
uint src = _lrg_map.find(src_n);
uint dst = _lrg_map.find(dst_n);
assert(src, "");
assert(dst, "");
assert(src < _lrg_map.max_lrg_id(), "oob");
assert(dst < _lrg_map.max_lrg_id(), "oob");
assert(src < dst, "always union smaller");
_lrg_map.uf_map(dst, src);
}
void PhaseChaitin::new_lrg(const Node *x, uint lrg) {
// Make the Node->LRG mapping
_lrg_map.extend(x->_idx,lrg);
// Make the Union-Find mapping an identity function
_lrg_map.uf_extend(lrg, lrg);
}
int PhaseChaitin::clone_projs(Block* b, uint idx, Node* orig, Node* copy, uint& max_lrg_id) {
assert(b->find_node(copy) == (idx - 1), "incorrect insert index for copy kill projections");
DEBUG_ONLY( Block* borig = _cfg.get_block_for_node(orig); )
int found_projs = 0;
uint cnt = orig->outcnt();
for (uint i = 0; i < cnt; i++) {
Node* proj = orig->raw_out(i);
if (proj->is_MachProj()) {
assert(proj->outcnt() == 0, "only kill projections are expected here");
assert(_cfg.get_block_for_node(proj) == borig, "incorrect block for kill projections");
found_projs++;
// Copy kill projections after the cloned node
Node* kills = proj->clone();
kills->set_req(0, copy);
b->insert_node(kills, idx++);
_cfg.map_node_to_block(kills, b);
new_lrg(kills, max_lrg_id++);
}
}
return found_projs;
}
// Renumber the live ranges to compact them. Makes the IFG smaller.
void PhaseChaitin::compact() {
// Current the _uf_map contains a series of short chains which are headed
// by a self-cycle. All the chains run from big numbers to little numbers.
// The Find() call chases the chains & shortens them for the next Find call.
// We are going to change this structure slightly. Numbers above a moving
// wave 'i' are unchanged. Numbers below 'j' point directly to their
// compacted live range with no further chaining. There are no chains or
// cycles below 'i', so the Find call no longer works.
uint j=1;
uint i;
for (i = 1; i < _lrg_map.max_lrg_id(); i++) {
uint lr = _lrg_map.uf_live_range_id(i);
// Ignore unallocated live ranges
if (!lr) {
continue;
}
assert(lr <= i, "");
_lrg_map.uf_map(i, ( lr == i ) ? j++ : _lrg_map.uf_live_range_id(lr));
}
// Now change the Node->LR mapping to reflect the compacted names
uint unique = _lrg_map.size();
for (i = 0; i < unique; i++) {
uint lrg_id = _lrg_map.live_range_id(i);
_lrg_map.map(i, _lrg_map.uf_live_range_id(lrg_id));
}
// Reset the Union-Find mapping
_lrg_map.reset_uf_map(j);
}
void PhaseChaitin::Register_Allocate() {
// Above the OLD FP (and in registers) are the incoming arguments. Stack
// slots in this area are called "arg_slots". Above the NEW FP (and in
// registers) is the outgoing argument area; above that is the spill/temp
// area. These are all "frame_slots". Arg_slots start at the zero
// stack_slots and count up to the known arg_size. Frame_slots start at
// the stack_slot #arg_size and go up. After allocation I map stack
// slots to actual offsets. Stack-slots in the arg_slot area are biased
// by the frame_size; stack-slots in the frame_slot area are biased by 0.
_trip_cnt = 0;
_alternate = 0;
_matcher._allocation_started = true;
ResourceArea split_arena(mtCompiler); // Arena for Split local resources
ResourceArea live_arena(mtCompiler); // Arena for liveness & IFG info
ResourceMark rm(&live_arena);
// Need live-ness for the IFG; need the IFG for coalescing. If the
// liveness is JUST for coalescing, then I can get some mileage by renaming
// all copy-related live ranges low and then using the max copy-related
// live range as a cut-off for LIVE and the IFG. In other words, I can
// build a subset of LIVE and IFG just for copies.
PhaseLive live(_cfg, _lrg_map.names(), &live_arena);
// Need IFG for coalescing and coloring
PhaseIFG ifg(&live_arena);
_ifg = &ifg;
// Come out of SSA world to the Named world. Assign (virtual) registers to
// Nodes. Use the same register for all inputs and the output of PhiNodes
// - effectively ending SSA form. This requires either coalescing live
// ranges or inserting copies. For the moment, we insert "virtual copies"
// - we pretend there is a copy prior to each Phi in predecessor blocks.
// We will attempt to coalesce such "virtual copies" before we manifest
// them for real.
de_ssa();
#ifdef ASSERT
// Veify the graph before RA.
verify(&live_arena);
#endif
{
NOT_PRODUCT( Compile::TracePhase t3("computeLive", &_t_computeLive, TimeCompiler); )
_live = NULL; // Mark live as being not available
rm.reset_to_mark(); // Reclaim working storage
IndexSet::reset_memory(C, &live_arena);
ifg.init(_lrg_map.max_lrg_id()); // Empty IFG
gather_lrg_masks( false ); // Collect LRG masks
live.compute(_lrg_map.max_lrg_id()); // Compute liveness
_live = &live; // Mark LIVE as being available
}
// Base pointers are currently "used" by instructions which define new
// derived pointers. This makes base pointers live up to the where the
// derived pointer is made, but not beyond. Really, they need to be live
// across any GC point where the derived value is live. So this code looks
// at all the GC points, and "stretches" the live range of any base pointer
// to the GC point.
if (stretch_base_pointer_live_ranges(&live_arena)) {
NOT_PRODUCT(Compile::TracePhase t3("computeLive (sbplr)", &_t_computeLive, TimeCompiler);)
// Since some live range stretched, I need to recompute live
_live = NULL;
rm.reset_to_mark(); // Reclaim working storage
IndexSet::reset_memory(C, &live_arena);
ifg.init(_lrg_map.max_lrg_id());
gather_lrg_masks(false);
live.compute(_lrg_map.max_lrg_id());
_live = &live;
}
// Create the interference graph using virtual copies
build_ifg_virtual(); // Include stack slots this time
// Aggressive (but pessimistic) copy coalescing.
// This pass works on virtual copies. Any virtual copies which are not
// coalesced get manifested as actual copies
{
// The IFG is/was triangular. I am 'squaring it up' so Union can run
// faster. Union requires a 'for all' operation which is slow on the
// triangular adjacency matrix (quick reminder: the IFG is 'sparse' -
// meaning I can visit all the Nodes neighbors less than a Node in time
// O(# of neighbors), but I have to visit all the Nodes greater than a
// given Node and search them for an instance, i.e., time O(#MaxLRG)).
_ifg->SquareUp();
PhaseAggressiveCoalesce coalesce(*this);
coalesce.coalesce_driver();
// Insert un-coalesced copies. Visit all Phis. Where inputs to a Phi do
// not match the Phi itself, insert a copy.
coalesce.insert_copies(_matcher);
if (C->failing()) {
return;
}
}
// After aggressive coalesce, attempt a first cut at coloring.
// To color, we need the IFG and for that we need LIVE.
{
NOT_PRODUCT( Compile::TracePhase t3("computeLive", &_t_computeLive, TimeCompiler); )
_live = NULL;
rm.reset_to_mark(); // Reclaim working storage
IndexSet::reset_memory(C, &live_arena);
ifg.init(_lrg_map.max_lrg_id());
gather_lrg_masks( true );
live.compute(_lrg_map.max_lrg_id());
_live = &live;
}
// Build physical interference graph
uint must_spill = 0;
must_spill = build_ifg_physical(&live_arena);
// If we have a guaranteed spill, might as well spill now
if (must_spill) {
if(!_lrg_map.max_lrg_id()) {
return;
}
// Bail out if unique gets too large (ie - unique > MaxNodeLimit)
C->check_node_count(10*must_spill, "out of nodes before split");
if (C->failing()) {
return;
}
uint new_max_lrg_id = Split(_lrg_map.max_lrg_id(), &split_arena); // Split spilling LRG everywhere
_lrg_map.set_max_lrg_id(new_max_lrg_id);
// Bail out if unique gets too large (ie - unique > MaxNodeLimit - 2*NodeLimitFudgeFactor)
// or we failed to split
C->check_node_count(2*NodeLimitFudgeFactor, "out of nodes after physical split");
if (C->failing()) {
return;
}
NOT_PRODUCT(C->verify_graph_edges();)
compact(); // Compact LRGs; return new lower max lrg
{
NOT_PRODUCT( Compile::TracePhase t3("computeLive", &_t_computeLive, TimeCompiler); )
_live = NULL;
rm.reset_to_mark(); // Reclaim working storage
IndexSet::reset_memory(C, &live_arena);
ifg.init(_lrg_map.max_lrg_id()); // Build a new interference graph
gather_lrg_masks( true ); // Collect intersect mask
live.compute(_lrg_map.max_lrg_id()); // Compute LIVE
_live = &live;
}
build_ifg_physical(&live_arena);
_ifg->SquareUp();
_ifg->Compute_Effective_Degree();
// Only do conservative coalescing if requested
if (OptoCoalesce) {
// Conservative (and pessimistic) copy coalescing of those spills
PhaseConservativeCoalesce coalesce(*this);
// If max live ranges greater than cutoff, don't color the stack.
// This cutoff can be larger than below since it is only done once.
coalesce.coalesce_driver();
}
_lrg_map.compress_uf_map_for_nodes();
#ifdef ASSERT
verify(&live_arena, true);
#endif
} else {
ifg.SquareUp();
ifg.Compute_Effective_Degree();
#ifdef ASSERT
set_was_low();
#endif
}
// Prepare for Simplify & Select
cache_lrg_info(); // Count degree of LRGs
// Simplify the InterFerence Graph by removing LRGs of low degree.
// LRGs of low degree are trivially colorable.
Simplify();
// Select colors by re-inserting LRGs back into the IFG in reverse order.
// Return whether or not something spills.
uint spills = Select( );
// If we spill, split and recycle the entire thing
while( spills ) {
if( _trip_cnt++ > 24 ) {
DEBUG_ONLY( dump_for_spill_split_recycle(); )
if( _trip_cnt > 27 ) {
C->record_method_not_compilable("failed spill-split-recycle sanity check");
return;
}
}
if (!_lrg_map.max_lrg_id()) {
return;
}
uint new_max_lrg_id = Split(_lrg_map.max_lrg_id(), &split_arena); // Split spilling LRG everywhere
_lrg_map.set_max_lrg_id(new_max_lrg_id);
// Bail out if unique gets too large (ie - unique > MaxNodeLimit - 2*NodeLimitFudgeFactor)
C->check_node_count(2 * NodeLimitFudgeFactor, "out of nodes after split");
if (C->failing()) {
return;
}
compact(); // Compact LRGs; return new lower max lrg
// Nuke the live-ness and interference graph and LiveRanGe info
{
NOT_PRODUCT( Compile::TracePhase t3("computeLive", &_t_computeLive, TimeCompiler); )
_live = NULL;
rm.reset_to_mark(); // Reclaim working storage
IndexSet::reset_memory(C, &live_arena);
ifg.init(_lrg_map.max_lrg_id());
// Create LiveRanGe array.
// Intersect register masks for all USEs and DEFs
gather_lrg_masks(true);
live.compute(_lrg_map.max_lrg_id());
_live = &live;
}
must_spill = build_ifg_physical(&live_arena);
_ifg->SquareUp();
_ifg->Compute_Effective_Degree();
// Only do conservative coalescing if requested
if (OptoCoalesce) {
// Conservative (and pessimistic) copy coalescing
PhaseConservativeCoalesce coalesce(*this);
// Check for few live ranges determines how aggressive coalesce is.
coalesce.coalesce_driver();
}
_lrg_map.compress_uf_map_for_nodes();
#ifdef ASSERT
verify(&live_arena, true);
#endif
cache_lrg_info(); // Count degree of LRGs
// Simplify the InterFerence Graph by removing LRGs of low degree.
// LRGs of low degree are trivially colorable.
Simplify();
// Select colors by re-inserting LRGs back into the IFG in reverse order.
// Return whether or not something spills.
spills = Select();
}
// Count number of Simplify-Select trips per coloring success.
_allocator_attempts += _trip_cnt + 1;
_allocator_successes += 1;
// Peephole remove copies
post_allocate_copy_removal();
// Merge multidefs if multiple defs representing the same value are used in a single block.
merge_multidefs();
#ifdef ASSERT
// Veify the graph after RA.
verify(&live_arena);
#endif
// max_reg is past the largest *register* used.
// Convert that to a frame_slot number.
if (_max_reg <= _matcher._new_SP) {
_framesize = C->out_preserve_stack_slots();
}
else {
_framesize = _max_reg -_matcher._new_SP;
}
assert((int)(_matcher._new_SP+_framesize) >= (int)_matcher._out_arg_limit, "framesize must be large enough");
// This frame must preserve the required fp alignment
_framesize = round_to(_framesize, Matcher::stack_alignment_in_slots());
assert( _framesize >= 0 && _framesize <= 1000000, "sanity check" );
#ifndef PRODUCT
_total_framesize += _framesize;
if ((int)_framesize > _max_framesize) {
_max_framesize = _framesize;
}
#endif
// Convert CISC spills
fixup_spills();
// Log regalloc results
CompileLog* log = Compile::current()->log();
if (log != NULL) {
log->elem("regalloc attempts='%d' success='%d'", _trip_cnt, !C->failing());
}
if (C->failing()) {
return;
}
NOT_PRODUCT(C->verify_graph_edges();)
// Move important info out of the live_arena to longer lasting storage.
alloc_node_regs(_lrg_map.size());
for (uint i=0; i < _lrg_map.size(); i++) {
if (_lrg_map.live_range_id(i)) { // Live range associated with Node?
LRG &lrg = lrgs(_lrg_map.live_range_id(i));
if (!lrg.alive()) {
set_bad(i);
} else if (lrg.num_regs() == 1) {
set1(i, lrg.reg());
} else { // Must be a register-set
if (!lrg._fat_proj) { // Must be aligned adjacent register set
// Live ranges record the highest register in their mask.
// We want the low register for the AD file writer's convenience.
OptoReg::Name hi = lrg.reg(); // Get hi register
OptoReg::Name lo = OptoReg::add(hi, (1-lrg.num_regs())); // Find lo
// We have to use pair [lo,lo+1] even for wide vectors because
// the rest of code generation works only with pairs. It is safe
// since for registers encoding only 'lo' is used.
// Second reg from pair is used in ScheduleAndBundle on SPARC where
// vector max size is 8 which corresponds to registers pair.
// It is also used in BuildOopMaps but oop operations are not
// vectorized.
set2(i, lo);
} else { // Misaligned; extract 2 bits
OptoReg::Name hi = lrg.reg(); // Get hi register
lrg.Remove(hi); // Yank from mask
int lo = lrg.mask().find_first_elem(); // Find lo
set_pair(i, hi, lo);
}
}
if( lrg._is_oop ) _node_oops.set(i);
} else {
set_bad(i);
}
}
// Done!
_live = NULL;
_ifg = NULL;
C->set_indexSet_arena(NULL); // ResourceArea is at end of scope
}
void PhaseChaitin::de_ssa() {
// Set initial Names for all Nodes. Most Nodes get the virtual register
// number. A few get the ZERO live range number. These do not
// get allocated, but instead rely on correct scheduling to ensure that
// only one instance is simultaneously live at a time.
uint lr_counter = 1;
for( uint i = 0; i < _cfg.number_of_blocks(); i++ ) {
Block* block = _cfg.get_block(i);
uint cnt = block->number_of_nodes();
// Handle all the normal Nodes in the block
for( uint j = 0; j < cnt; j++ ) {
Node *n = block->get_node(j);
// Pre-color to the zero live range, or pick virtual register
const RegMask &rm = n->out_RegMask();
_lrg_map.map(n->_idx, rm.is_NotEmpty() ? lr_counter++ : 0);
}
}
// Reset the Union-Find mapping to be identity
_lrg_map.reset_uf_map(lr_counter);
}
// Gather LiveRanGe information, including register masks. Modification of
// cisc spillable in_RegMasks should not be done before AggressiveCoalesce.
void PhaseChaitin::gather_lrg_masks( bool after_aggressive ) {
// Nail down the frame pointer live range
uint fp_lrg = _lrg_map.live_range_id(_cfg.get_root_node()->in(1)->in(TypeFunc::FramePtr));
lrgs(fp_lrg)._cost += 1e12; // Cost is infinite
// For all blocks
for (uint i = 0; i < _cfg.number_of_blocks(); i++) {
Block* block = _cfg.get_block(i);
// For all instructions
for (uint j = 1; j < block->number_of_nodes(); j++) {
Node* n = block->get_node(j);
uint input_edge_start =1; // Skip control most nodes
if (n->is_Mach()) {
input_edge_start = n->as_Mach()->oper_input_base();
}
uint idx = n->is_Copy();
// Get virtual register number, same as LiveRanGe index
uint vreg = _lrg_map.live_range_id(n);
LRG& lrg = lrgs(vreg);
if (vreg) { // No vreg means un-allocable (e.g. memory)
// Collect has-copy bit
if (idx) {
lrg._has_copy = 1;
uint clidx = _lrg_map.live_range_id(n->in(idx));
LRG& copy_src = lrgs(clidx);
copy_src._has_copy = 1;
}
// Check for float-vs-int live range (used in register-pressure
// calculations)
const Type *n_type = n->bottom_type();
if (n_type->is_floatingpoint()) {
lrg._is_float = 1;
}
// Check for twice prior spilling. Once prior spilling might have
// spilled 'soft', 2nd prior spill should have spilled 'hard' and
// further spilling is unlikely to make progress.
if (_spilled_once.test(n->_idx)) {
lrg._was_spilled1 = 1;
if (_spilled_twice.test(n->_idx)) {
lrg._was_spilled2 = 1;
}
}
#ifndef PRODUCT
if (trace_spilling() && lrg._def != NULL) {
// collect defs for MultiDef printing
if (lrg._defs == NULL) {
lrg._defs = new (_ifg->_arena) GrowableArray<Node*>(_ifg->_arena, 2, 0, NULL);
lrg._defs->append(lrg._def);
}
lrg._defs->append(n);
}
#endif
// Check for a single def LRG; these can spill nicely
// via rematerialization. Flag as NULL for no def found
// yet, or 'n' for single def or -1 for many defs.
lrg._def = lrg._def ? NodeSentinel : n;
// Limit result register mask to acceptable registers
const RegMask &rm = n->out_RegMask();
lrg.AND( rm );
uint ireg = n->ideal_reg();
assert( !n->bottom_type()->isa_oop_ptr() || ireg == Op_RegP,
"oops must be in Op_RegP's" );
// Check for vector live range (only if vector register is used).
// On SPARC vector uses RegD which could be misaligned so it is not
// processes as vector in RA.
if (RegMask::is_vector(ireg))
lrg._is_vector = 1;
assert(n_type->isa_vect() == NULL || lrg._is_vector || ireg == Op_RegD || ireg == Op_RegL,
"vector must be in vector registers");
// Check for bound register masks
const RegMask &lrgmask = lrg.mask();
if (lrgmask.is_bound(ireg)) {
lrg._is_bound = 1;
}
// Check for maximum frequency value
if (lrg._maxfreq < block->_freq) {
lrg._maxfreq = block->_freq;
}
// Check for oop-iness, or long/double
// Check for multi-kill projection
switch (ireg) {
case MachProjNode::fat_proj:
// Fat projections have size equal to number of registers killed
lrg.set_num_regs(rm.Size());
lrg.set_reg_pressure(lrg.num_regs());
lrg._fat_proj = 1;
lrg._is_bound = 1;
break;
case Op_RegP:
#ifdef _LP64
lrg.set_num_regs(2); // Size is 2 stack words
#else
lrg.set_num_regs(1); // Size is 1 stack word
#endif
// Register pressure is tracked relative to the maximum values
// suggested for that platform, INTPRESSURE and FLOATPRESSURE,
// and relative to other types which compete for the same regs.
//
// The following table contains suggested values based on the
// architectures as defined in each .ad file.
// INTPRESSURE and FLOATPRESSURE may be tuned differently for
// compile-speed or performance.
// Note1:
// SPARC and SPARCV9 reg_pressures are at 2 instead of 1
// since .ad registers are defined as high and low halves.
// These reg_pressure values remain compatible with the code
// in is_high_pressure() which relates get_invalid_mask_size(),
// Block::_reg_pressure and INTPRESSURE, FLOATPRESSURE.
// Note2:
// SPARC -d32 has 24 registers available for integral values,
// but only 10 of these are safe for 64-bit longs.
// Using set_reg_pressure(2) for both int and long means
// the allocator will believe it can fit 26 longs into
// registers. Using 2 for longs and 1 for ints means the
// allocator will attempt to put 52 integers into registers.
// The settings below limit this problem to methods with
// many long values which are being run on 32-bit SPARC.
//
// ------------------- reg_pressure --------------------
// Each entry is reg_pressure_per_value,number_of_regs
// RegL RegI RegFlags RegF RegD INTPRESSURE FLOATPRESSURE
// IA32 2 1 1 1 1 6 6
// IA64 1 1 1 1 1 50 41
// SPARC 2 2 2 2 2 48 (24) 52 (26)
// SPARCV9 2 2 2 2 2 48 (24) 52 (26)
// AMD64 1 1 1 1 1 14 15
// -----------------------------------------------------
#if defined(SPARC)
lrg.set_reg_pressure(2); // use for v9 as well
#else
lrg.set_reg_pressure(1); // normally one value per register
#endif
if( n_type->isa_oop_ptr() ) {
lrg._is_oop = 1;
}
break;
case Op_RegL: // Check for long or double
case Op_RegD:
lrg.set_num_regs(2);
// Define platform specific register pressure
#if defined(SPARC) || defined(ARM32)
lrg.set_reg_pressure(2);
#elif defined(IA32)
if( ireg == Op_RegL ) {
lrg.set_reg_pressure(2);
} else {
lrg.set_reg_pressure(1);
}
#else
lrg.set_reg_pressure(1); // normally one value per register
#endif
// If this def of a double forces a mis-aligned double,
// flag as '_fat_proj' - really flag as allowing misalignment
// AND changes how we count interferences. A mis-aligned
// double can interfere with TWO aligned pairs, or effectively
// FOUR registers!
if (rm.is_misaligned_pair()) {
lrg._fat_proj = 1;
lrg._is_bound = 1;
}
break;
case Op_RegF:
case Op_RegI:
case Op_RegN:
case Op_RegFlags:
case 0: // not an ideal register
lrg.set_num_regs(1);
#ifdef SPARC
lrg.set_reg_pressure(2);
#else
lrg.set_reg_pressure(1);
#endif
break;
case Op_VecS:
assert(Matcher::vector_size_supported(T_BYTE,4), "sanity");
assert(RegMask::num_registers(Op_VecS) == RegMask::SlotsPerVecS, "sanity");
lrg.set_num_regs(RegMask::SlotsPerVecS);
lrg.set_reg_pressure(1);
break;
case Op_VecD:
assert(Matcher::vector_size_supported(T_FLOAT,RegMask::SlotsPerVecD), "sanity");
assert(RegMask::num_registers(Op_VecD) == RegMask::SlotsPerVecD, "sanity");
assert(lrgmask.is_aligned_sets(RegMask::SlotsPerVecD), "vector should be aligned");
lrg.set_num_regs(RegMask::SlotsPerVecD);
lrg.set_reg_pressure(1);
break;
case Op_VecX:
assert(Matcher::vector_size_supported(T_FLOAT,RegMask::SlotsPerVecX), "sanity");
assert(RegMask::num_registers(Op_VecX) == RegMask::SlotsPerVecX, "sanity");
assert(lrgmask.is_aligned_sets(RegMask::SlotsPerVecX), "vector should be aligned");
lrg.set_num_regs(RegMask::SlotsPerVecX);
lrg.set_reg_pressure(1);
break;
case Op_VecY:
assert(Matcher::vector_size_supported(T_FLOAT,RegMask::SlotsPerVecY), "sanity");
assert(RegMask::num_registers(Op_VecY) == RegMask::SlotsPerVecY, "sanity");
assert(lrgmask.is_aligned_sets(RegMask::SlotsPerVecY), "vector should be aligned");
lrg.set_num_regs(RegMask::SlotsPerVecY);
lrg.set_reg_pressure(1);
break;
default:
ShouldNotReachHere();
}
}
// Now do the same for inputs
uint cnt = n->req();
// Setup for CISC SPILLING
uint inp = (uint)AdlcVMDeps::Not_cisc_spillable;
if( UseCISCSpill && after_aggressive ) {
inp = n->cisc_operand();
if( inp != (uint)AdlcVMDeps::Not_cisc_spillable )
// Convert operand number to edge index number
inp = n->as_Mach()->operand_index(inp);
}
// Prepare register mask for each input
for( uint k = input_edge_start; k < cnt; k++ ) {
uint vreg = _lrg_map.live_range_id(n->in(k));
if (!vreg) {
continue;
}
// If this instruction is CISC Spillable, add the flags
// bit to its appropriate input
if( UseCISCSpill && after_aggressive && inp == k ) {
#ifndef PRODUCT
if( TraceCISCSpill ) {
tty->print(" use_cisc_RegMask: ");
n->dump();
}
#endif
n->as_Mach()->use_cisc_RegMask();
}
LRG &lrg = lrgs(vreg);
// // Testing for floating point code shape
// Node *test = n->in(k);
// if( test->is_Mach() ) {
// MachNode *m = test->as_Mach();
// int op = m->ideal_Opcode();
// if (n->is_Call() && (op == Op_AddF || op == Op_MulF) ) {
// int zzz = 1;
// }
// }
// Limit result register mask to acceptable registers.
// Do not limit registers from uncommon uses before
// AggressiveCoalesce. This effectively pre-virtual-splits
// around uncommon uses of common defs.
const RegMask &rm = n->in_RegMask(k);
if (!after_aggressive && _cfg.get_block_for_node(n->in(k))->_freq > 1000 * block->_freq) {
// Since we are BEFORE aggressive coalesce, leave the register
// mask untrimmed by the call. This encourages more coalescing.
// Later, AFTER aggressive, this live range will have to spill
// but the spiller handles slow-path calls very nicely.
} else {
lrg.AND( rm );
}
// Check for bound register masks
const RegMask &lrgmask = lrg.mask();
uint kreg = n->in(k)->ideal_reg();
bool is_vect = RegMask::is_vector(kreg);
assert(n->in(k)->bottom_type()->isa_vect() == NULL ||
is_vect || kreg == Op_RegD || kreg == Op_RegL,
"vector must be in vector registers");
if (lrgmask.is_bound(kreg))
lrg._is_bound = 1;
// If this use of a double forces a mis-aligned double,
// flag as '_fat_proj' - really flag as allowing misalignment
// AND changes how we count interferences. A mis-aligned
// double can interfere with TWO aligned pairs, or effectively
// FOUR registers!
#ifdef ASSERT
if (is_vect) {
assert(lrgmask.is_aligned_sets(lrg.num_regs()), "vector should be aligned");
assert(!lrg._fat_proj, "sanity");
assert(RegMask::num_registers(kreg) == lrg.num_regs(), "sanity");
}
#endif
if (!is_vect && lrg.num_regs() == 2 && !lrg._fat_proj && rm.is_misaligned_pair()) {
lrg._fat_proj = 1;
lrg._is_bound = 1;
}
// if the LRG is an unaligned pair, we will have to spill
// so clear the LRG's register mask if it is not already spilled
if (!is_vect && !n->is_SpillCopy() &&
(lrg._def == NULL || lrg.is_multidef() || !lrg._def->is_SpillCopy()) &&
lrgmask.is_misaligned_pair()) {
lrg.Clear();
}
// Check for maximum frequency value
if (lrg._maxfreq < block->_freq) {
lrg._maxfreq = block->_freq;
}
} // End for all allocated inputs
} // end for all instructions
} // end for all blocks
// Final per-liverange setup
for (uint i2 = 0; i2 < _lrg_map.max_lrg_id(); i2++) {
LRG &lrg = lrgs(i2);
assert(!lrg._is_vector || !lrg._fat_proj, "sanity");
if (lrg.num_regs() > 1 && !lrg._fat_proj) {
lrg.clear_to_sets();
}
lrg.compute_set_mask_size();
if (lrg.not_free()) { // Handle case where we lose from the start
lrg.set_reg(OptoReg::Name(LRG::SPILL_REG));
lrg._direct_conflict = 1;
}
lrg.set_degree(0); // no neighbors in IFG yet
}
}
// Set the was-lo-degree bit. Conservative coalescing should not change the
// colorability of the graph. If any live range was of low-degree before
// coalescing, it should Simplify. This call sets the was-lo-degree bit.
// The bit is checked in Simplify.
void PhaseChaitin::set_was_low() {
#ifdef ASSERT
for (uint i = 1; i < _lrg_map.max_lrg_id(); i++) {
int size = lrgs(i).num_regs();
uint old_was_lo = lrgs(i)._was_lo;
lrgs(i)._was_lo = 0;
if( lrgs(i).lo_degree() ) {
lrgs(i)._was_lo = 1; // Trivially of low degree
} else { // Else check the Brigg's assertion
// Brigg's observation is that the lo-degree neighbors of a
// hi-degree live range will not interfere with the color choices
// of said hi-degree live range. The Simplify reverse-stack-coloring
// order takes care of the details. Hence you do not have to count
// low-degree neighbors when determining if this guy colors.
int briggs_degree = 0;
IndexSet *s = _ifg->neighbors(i);
IndexSetIterator elements(s);
uint lidx;
while((lidx = elements.next()) != 0) {
if( !lrgs(lidx).lo_degree() )
briggs_degree += MAX2(size,lrgs(lidx).num_regs());
}
if( briggs_degree < lrgs(i).degrees_of_freedom() )
lrgs(i)._was_lo = 1; // Low degree via the briggs assertion
}
assert(old_was_lo <= lrgs(i)._was_lo, "_was_lo may not decrease");
}
#endif
}
#define REGISTER_CONSTRAINED 16
// Compute cost/area ratio, in case we spill. Build the lo-degree list.
void PhaseChaitin::cache_lrg_info( ) {
for (uint i = 1; i < _lrg_map.max_lrg_id(); i++) {
LRG &lrg = lrgs(i);
// Check for being of low degree: means we can be trivially colored.
// Low degree, dead or must-spill guys just get to simplify right away
if( lrg.lo_degree() ||
!lrg.alive() ||
lrg._must_spill ) {
// Split low degree list into those guys that must get a
// register and those that can go to register or stack.
// The idea is LRGs that can go register or stack color first when
// they have a good chance of getting a register. The register-only
// lo-degree live ranges always get a register.
OptoReg::Name hi_reg = lrg.mask().find_last_elem();
if( OptoReg::is_stack(hi_reg)) { // Can go to stack?
lrg._next = _lo_stk_degree;
_lo_stk_degree = i;
} else {
lrg._next = _lo_degree;
_lo_degree = i;
}
} else { // Else high degree
lrgs(_hi_degree)._prev = i;
lrg._next = _hi_degree;
lrg._prev = 0;
_hi_degree = i;
}
}
}
// Simplify the IFG by removing LRGs of low degree that have NO copies
void PhaseChaitin::Pre_Simplify( ) {
// Warm up the lo-degree no-copy list
int lo_no_copy = 0;
for (uint i = 1; i < _lrg_map.max_lrg_id(); i++) {
if ((lrgs(i).lo_degree() && !lrgs(i)._has_copy) ||
!lrgs(i).alive() ||
lrgs(i)._must_spill) {
lrgs(i)._next = lo_no_copy;
lo_no_copy = i;
}
}
while( lo_no_copy ) {
uint lo = lo_no_copy;
lo_no_copy = lrgs(lo)._next;
int size = lrgs(lo).num_regs();
// Put the simplified guy on the simplified list.
lrgs(lo)._next = _simplified;
_simplified = lo;
// Yank this guy from the IFG.
IndexSet *adj = _ifg->remove_node( lo );
// If any neighbors' degrees fall below their number of
// allowed registers, then put that neighbor on the low degree
// list. Note that 'degree' can only fall and 'numregs' is
// unchanged by this action. Thus the two are equal at most once,
// so LRGs hit the lo-degree worklists at most once.
IndexSetIterator elements(adj);
uint neighbor;
while ((neighbor = elements.next()) != 0) {
LRG *n = &lrgs(neighbor);
assert( _ifg->effective_degree(neighbor) == n->degree(), "" );
// Check for just becoming of-low-degree
if( n->just_lo_degree() && !n->_has_copy ) {
assert(!(*_ifg->_yanked)[neighbor],"Cannot move to lo degree twice");
// Put on lo-degree list
n->_next = lo_no_copy;
lo_no_copy = neighbor;
}
}
} // End of while lo-degree no_copy worklist not empty
// No more lo-degree no-copy live ranges to simplify
}
// Simplify the IFG by removing LRGs of low degree.
void PhaseChaitin::Simplify( ) {
while( 1 ) { // Repeat till simplified it all
// May want to explore simplifying lo_degree before _lo_stk_degree.
// This might result in more spills coloring into registers during
// Select().
while( _lo_degree || _lo_stk_degree ) {
// If possible, pull from lo_stk first
uint lo;
if( _lo_degree ) {
lo = _lo_degree;
_lo_degree = lrgs(lo)._next;
} else {
lo = _lo_stk_degree;
_lo_stk_degree = lrgs(lo)._next;
}
// Put the simplified guy on the simplified list.
lrgs(lo)._next = _simplified;
_simplified = lo;
// If this guy is "at risk" then mark his current neighbors
if( lrgs(lo)._at_risk ) {
IndexSetIterator elements(_ifg->neighbors(lo));
uint datum;
while ((datum = elements.next()) != 0) {
lrgs(datum)._risk_bias = lo;
}
}
// Yank this guy from the IFG.
IndexSet *adj = _ifg->remove_node( lo );
// If any neighbors' degrees fall below their number of
// allowed registers, then put that neighbor on the low degree
// list. Note that 'degree' can only fall and 'numregs' is
// unchanged by this action. Thus the two are equal at most once,
// so LRGs hit the lo-degree worklist at most once.
IndexSetIterator elements(adj);
uint neighbor;
while ((neighbor = elements.next()) != 0) {
LRG *n = &lrgs(neighbor);
#ifdef ASSERT
if( VerifyOpto || VerifyRegisterAllocator ) {
assert( _ifg->effective_degree(neighbor) == n->degree(), "" );
}
#endif
// Check for just becoming of-low-degree just counting registers.
// _must_spill live ranges are already on the low degree list.
if( n->just_lo_degree() && !n->_must_spill ) {
assert(!(*_ifg->_yanked)[neighbor],"Cannot move to lo degree twice");
// Pull from hi-degree list
uint prev = n->_prev;
uint next = n->_next;
if( prev ) lrgs(prev)._next = next;
else _hi_degree = next;
lrgs(next)._prev = prev;
n->_next = _lo_degree;
_lo_degree = neighbor;
}
}
} // End of while lo-degree/lo_stk_degree worklist not empty
// Check for got everything: is hi-degree list empty?
if( !_hi_degree ) break;
// Time to pick a potential spill guy
uint lo_score = _hi_degree;
double score = lrgs(lo_score).score();
double area = lrgs(lo_score)._area;
double cost = lrgs(lo_score)._cost;
bool bound = lrgs(lo_score)._is_bound;
// Find cheapest guy
debug_only( int lo_no_simplify=0; );
for( uint i = _hi_degree; i; i = lrgs(i)._next ) {
assert( !(*_ifg->_yanked)[i], "" );
// It's just vaguely possible to move hi-degree to lo-degree without
// going through a just-lo-degree stage: If you remove a double from
// a float live range it's degree will drop by 2 and you can skip the
// just-lo-degree stage. It's very rare (shows up after 5000+ methods
// in -Xcomp of Java2Demo). So just choose this guy to simplify next.
if( lrgs(i).lo_degree() ) {
lo_score = i;
break;
}
debug_only( if( lrgs(i)._was_lo ) lo_no_simplify=i; );
double iscore = lrgs(i).score();
double iarea = lrgs(i)._area;
double icost = lrgs(i)._cost;
bool ibound = lrgs(i)._is_bound;
// Compare cost/area of i vs cost/area of lo_score. Smaller cost/area
// wins. Ties happen because all live ranges in question have spilled
// a few times before and the spill-score adds a huge number which
// washes out the low order bits. We are choosing the lesser of 2
// evils; in this case pick largest area to spill.
// Ties also happen when live ranges are defined and used only inside
// one block. In which case their area is 0 and score set to max.
// In such case choose bound live range over unbound to free registers
// or with smaller cost to spill.
if( iscore < score ||
(iscore == score && iarea > area && lrgs(lo_score)._was_spilled2) ||
(iscore == score && iarea == area &&
( (ibound && !bound) || ibound == bound && (icost < cost) )) ) {
lo_score = i;
score = iscore;
area = iarea;
cost = icost;
bound = ibound;
}
}
LRG *lo_lrg = &lrgs(lo_score);
// The live range we choose for spilling is either hi-degree, or very
// rarely it can be low-degree. If we choose a hi-degree live range
// there better not be any lo-degree choices.
assert( lo_lrg->lo_degree() || !lo_no_simplify, "Live range was lo-degree before coalesce; should simplify" );
// Pull from hi-degree list
uint prev = lo_lrg->_prev;
uint next = lo_lrg->_next;
if( prev ) lrgs(prev)._next = next;
else _hi_degree = next;
lrgs(next)._prev = prev;
// Jam him on the lo-degree list, despite his high degree.
// Maybe he'll get a color, and maybe he'll spill.
// Only Select() will know.
lrgs(lo_score)._at_risk = true;
_lo_degree = lo_score;
lo_lrg->_next = 0;
} // End of while not simplified everything
}
// Is 'reg' register legal for 'lrg'?
static bool is_legal_reg(LRG &lrg, OptoReg::Name reg, int chunk) {
if (reg >= chunk && reg < (chunk + RegMask::CHUNK_SIZE) &&
lrg.mask().Member(OptoReg::add(reg,-chunk))) {
// RA uses OptoReg which represent the highest element of a registers set.
// For example, vectorX (128bit) on x86 uses [XMM,XMMb,XMMc,XMMd] set
// in which XMMd is used by RA to represent such vectors. A double value
// uses [XMM,XMMb] pairs and XMMb is used by RA for it.
// The register mask uses largest bits set of overlapping register sets.
// On x86 with AVX it uses 8 bits for each XMM registers set.
//
// The 'lrg' already has cleared-to-set register mask (done in Select()
// before calling choose_color()). Passing mask.Member(reg) check above
// indicates that the size (num_regs) of 'reg' set is less or equal to
// 'lrg' set size.
// For set size 1 any register which is member of 'lrg' mask is legal.
if (lrg.num_regs()==1)
return true;
// For larger sets only an aligned register with the same set size is legal.
int mask = lrg.num_regs()-1;
if ((reg&mask) == mask)
return true;
}
return false;
}
// Choose a color using the biasing heuristic
OptoReg::Name PhaseChaitin::bias_color( LRG &lrg, int chunk ) {
// Check for "at_risk" LRG's
uint risk_lrg = _lrg_map.find(lrg._risk_bias);
if( risk_lrg != 0 ) {
// Walk the colored neighbors of the "at_risk" candidate
// Choose a color which is both legal and already taken by a neighbor
// of the "at_risk" candidate in order to improve the chances of the
// "at_risk" candidate of coloring
IndexSetIterator elements(_ifg->neighbors(risk_lrg));
uint datum;
while ((datum = elements.next()) != 0) {
OptoReg::Name reg = lrgs(datum).reg();
// If this LRG's register is legal for us, choose it
if (is_legal_reg(lrg, reg, chunk))
return reg;
}
}
uint copy_lrg = _lrg_map.find(lrg._copy_bias);
if( copy_lrg != 0 ) {
// If he has a color,
if( !(*(_ifg->_yanked))[copy_lrg] ) {
OptoReg::Name reg = lrgs(copy_lrg).reg();
// And it is legal for you,
if (is_legal_reg(lrg, reg, chunk))
return reg;
} else if( chunk == 0 ) {
// Choose a color which is legal for him
RegMask tempmask = lrg.mask();
tempmask.AND(lrgs(copy_lrg).mask());
tempmask.clear_to_sets(lrg.num_regs());
OptoReg::Name reg = tempmask.find_first_set(lrg.num_regs());
if (OptoReg::is_valid(reg))
return reg;
}
}
// If no bias info exists, just go with the register selection ordering
if (lrg._is_vector || lrg.num_regs() == 2) {
// Find an aligned set
return OptoReg::add(lrg.mask().find_first_set(lrg.num_regs()),chunk);
}
// CNC - Fun hack. Alternate 1st and 2nd selection. Enables post-allocate
// copy removal to remove many more copies, by preventing a just-assigned
// register from being repeatedly assigned.
OptoReg::Name reg = lrg.mask().find_first_elem();
if( (++_alternate & 1) && OptoReg::is_valid(reg) ) {
// This 'Remove; find; Insert' idiom is an expensive way to find the
// SECOND element in the mask.
lrg.Remove(reg);
OptoReg::Name reg2 = lrg.mask().find_first_elem();
lrg.Insert(reg);
if( OptoReg::is_reg(reg2))
reg = reg2;
}
return OptoReg::add( reg, chunk );
}
// Choose a color in the current chunk
OptoReg::Name PhaseChaitin::choose_color( LRG &lrg, int chunk ) {
assert( C->in_preserve_stack_slots() == 0 || chunk != 0 || lrg._is_bound || lrg.mask().is_bound1() || !lrg.mask().Member(OptoReg::Name(_matcher._old_SP-1)), "must not allocate stack0 (inside preserve area)");
assert(C->out_preserve_stack_slots() == 0 || chunk != 0 || lrg._is_bound || lrg.mask().is_bound1() || !lrg.mask().Member(OptoReg::Name(_matcher._old_SP+0)), "must not allocate stack0 (inside preserve area)");
if( lrg.num_regs() == 1 || // Common Case
!lrg._fat_proj ) // Aligned+adjacent pairs ok
// Use a heuristic to "bias" the color choice
return bias_color(lrg, chunk);
assert(!lrg._is_vector, "should be not vector here" );
assert( lrg.num_regs() >= 2, "dead live ranges do not color" );
// Fat-proj case or misaligned double argument.
assert(lrg.compute_mask_size() == lrg.num_regs() ||
lrg.num_regs() == 2,"fat projs exactly color" );
assert( !chunk, "always color in 1st chunk" );
// Return the highest element in the set.
return lrg.mask().find_last_elem();
}
// Select colors by re-inserting LRGs back into the IFG. LRGs are re-inserted
// in reverse order of removal. As long as nothing of hi-degree was yanked,
// everything going back is guaranteed a color. Select that color. If some
// hi-degree LRG cannot get a color then we record that we must spill.
uint PhaseChaitin::Select( ) {
uint spill_reg = LRG::SPILL_REG;
_max_reg = OptoReg::Name(0); // Past max register used
while( _simplified ) {
// Pull next LRG from the simplified list - in reverse order of removal
uint lidx = _simplified;
LRG *lrg = &lrgs(lidx);
_simplified = lrg->_next;
#ifndef PRODUCT
if (trace_spilling()) {
ttyLocker ttyl;
tty->print_cr("L%d selecting degree %d degrees_of_freedom %d", lidx, lrg->degree(),
lrg->degrees_of_freedom());
lrg->dump();
}
#endif
// Re-insert into the IFG
_ifg->re_insert(lidx);
if( !lrg->alive() ) continue;
// capture allstackedness flag before mask is hacked
const int is_allstack = lrg->mask().is_AllStack();
// Yeah, yeah, yeah, I know, I know. I can refactor this
// to avoid the GOTO, although the refactored code will not
// be much clearer. We arrive here IFF we have a stack-based
// live range that cannot color in the current chunk, and it
// has to move into the next free stack chunk.
int chunk = 0; // Current chunk is first chunk
retry_next_chunk:
// Remove neighbor colors
IndexSet *s = _ifg->neighbors(lidx);
debug_only(RegMask orig_mask = lrg->mask();)
IndexSetIterator elements(s);
uint neighbor;
while ((neighbor = elements.next()) != 0) {
// Note that neighbor might be a spill_reg. In this case, exclusion
// of its color will be a no-op, since the spill_reg chunk is in outer
// space. Also, if neighbor is in a different chunk, this exclusion
// will be a no-op. (Later on, if lrg runs out of possible colors in
// its chunk, a new chunk of color may be tried, in which case
// examination of neighbors is started again, at retry_next_chunk.)
LRG &nlrg = lrgs(neighbor);
OptoReg::Name nreg = nlrg.reg();
// Only subtract masks in the same chunk
if( nreg >= chunk && nreg < chunk + RegMask::CHUNK_SIZE ) {
#ifndef PRODUCT
uint size = lrg->mask().Size();
RegMask rm = lrg->mask();
#endif
lrg->SUBTRACT(nlrg.mask());
#ifndef PRODUCT
if (trace_spilling() && lrg->mask().Size() != size) {
ttyLocker ttyl;
tty->print("L%d ", lidx);
rm.dump();
tty->print(" intersected L%d ", neighbor);
nlrg.mask().dump();
tty->print(" removed ");
rm.SUBTRACT(lrg->mask());
rm.dump();
tty->print(" leaving ");
lrg->mask().dump();
tty->cr();
}
#endif
}
}
//assert(is_allstack == lrg->mask().is_AllStack(), "nbrs must not change AllStackedness");
// Aligned pairs need aligned masks
assert(!lrg->_is_vector || !lrg->_fat_proj, "sanity");
if (lrg->num_regs() > 1 && !lrg->_fat_proj) {
lrg->clear_to_sets();
}
// Check if a color is available and if so pick the color
OptoReg::Name reg = choose_color( *lrg, chunk );
#ifdef SPARC
debug_only(lrg->compute_set_mask_size());
assert(lrg->num_regs() < 2 || lrg->is_bound() || is_even(reg-1), "allocate all doubles aligned");
#endif
//---------------
// If we fail to color and the AllStack flag is set, trigger
// a chunk-rollover event
if(!OptoReg::is_valid(OptoReg::add(reg,-chunk)) && is_allstack) {
// Bump register mask up to next stack chunk
chunk += RegMask::CHUNK_SIZE;
lrg->Set_All();
goto retry_next_chunk;
}
//---------------
// Did we get a color?
else if( OptoReg::is_valid(reg)) {
#ifndef PRODUCT
RegMask avail_rm = lrg->mask();
#endif
// Record selected register
lrg->set_reg(reg);
if( reg >= _max_reg ) // Compute max register limit
_max_reg = OptoReg::add(reg,1);
// Fold reg back into normal space
reg = OptoReg::add(reg,-chunk);
// If the live range is not bound, then we actually had some choices
// to make. In this case, the mask has more bits in it than the colors
// chosen. Restrict the mask to just what was picked.
int n_regs = lrg->num_regs();
assert(!lrg->_is_vector || !lrg->_fat_proj, "sanity");
if (n_regs == 1 || !lrg->_fat_proj) {
assert(!lrg->_is_vector || n_regs <= RegMask::SlotsPerVecY, "sanity");
lrg->Clear(); // Clear the mask
lrg->Insert(reg); // Set regmask to match selected reg
// For vectors and pairs, also insert the low bit of the pair
for (int i = 1; i < n_regs; i++)
lrg->Insert(OptoReg::add(reg,-i));
lrg->set_mask_size(n_regs);
} else { // Else fatproj
// mask must be equal to fatproj bits, by definition
}
#ifndef PRODUCT
if (trace_spilling()) {
ttyLocker ttyl;
tty->print("L%d selected ", lidx);
lrg->mask().dump();
tty->print(" from ");
avail_rm.dump();
tty->cr();
}
#endif
// Note that reg is the highest-numbered register in the newly-bound mask.
} // end color available case
//---------------
// Live range is live and no colors available
else {
assert( lrg->alive(), "" );
assert( !lrg->_fat_proj || lrg->is_multidef() ||
lrg->_def->outcnt() > 0, "fat_proj cannot spill");
assert( !orig_mask.is_AllStack(), "All Stack does not spill" );
// Assign the special spillreg register
lrg->set_reg(OptoReg::Name(spill_reg++));
// Do not empty the regmask; leave mask_size lying around
// for use during Spilling
#ifndef PRODUCT
if( trace_spilling() ) {
ttyLocker ttyl;
tty->print("L%d spilling with neighbors: ", lidx);
s->dump();
debug_only(tty->print(" original mask: "));
debug_only(orig_mask.dump());
dump_lrg(lidx);
}
#endif
} // end spill case
}
return spill_reg-LRG::SPILL_REG; // Return number of spills
}
// Copy 'was_spilled'-edness from the source Node to the dst Node.
void PhaseChaitin::copy_was_spilled( Node *src, Node *dst ) {
if( _spilled_once.test(src->_idx) ) {
_spilled_once.set(dst->_idx);
lrgs(_lrg_map.find(dst))._was_spilled1 = 1;
if( _spilled_twice.test(src->_idx) ) {
_spilled_twice.set(dst->_idx);
lrgs(_lrg_map.find(dst))._was_spilled2 = 1;
}
}
}
// Set the 'spilled_once' or 'spilled_twice' flag on a node.
void PhaseChaitin::set_was_spilled( Node *n ) {
if( _spilled_once.test_set(n->_idx) )
_spilled_twice.set(n->_idx);
}
// Convert Ideal spill instructions into proper FramePtr + offset Loads and
// Stores. Use-def chains are NOT preserved, but Node->LRG->reg maps are.
void PhaseChaitin::fixup_spills() {
// This function does only cisc spill work.
if( !UseCISCSpill ) return;
NOT_PRODUCT( Compile::TracePhase t3("fixupSpills", &_t_fixupSpills, TimeCompiler); )
// Grab the Frame Pointer
Node *fp = _cfg.get_root_block()->head()->in(1)->in(TypeFunc::FramePtr);
// For all blocks
for (uint i = 0; i < _cfg.number_of_blocks(); i++) {
Block* block = _cfg.get_block(i);
// For all instructions in block
uint last_inst = block->end_idx();
for (uint j = 1; j <= last_inst; j++) {
Node* n = block->get_node(j);
// Dead instruction???
assert( n->outcnt() != 0 ||// Nothing dead after post alloc
C->top() == n || // Or the random TOP node
n->is_Proj(), // Or a fat-proj kill node
"No dead instructions after post-alloc" );
int inp = n->cisc_operand();
if( inp != AdlcVMDeps::Not_cisc_spillable ) {
// Convert operand number to edge index number
MachNode *mach = n->as_Mach();
inp = mach->operand_index(inp);
Node *src = n->in(inp); // Value to load or store
LRG &lrg_cisc = lrgs(_lrg_map.find_const(src));
OptoReg::Name src_reg = lrg_cisc.reg();
// Doubles record the HIGH register of an adjacent pair.
src_reg = OptoReg::add(src_reg,1-lrg_cisc.num_regs());
if( OptoReg::is_stack(src_reg) ) { // If input is on stack
// This is a CISC Spill, get stack offset and construct new node
#ifndef PRODUCT
if( TraceCISCSpill ) {
tty->print(" reg-instr: ");
n->dump();
}
#endif
int stk_offset = reg2offset(src_reg);
// Bailout if we might exceed node limit when spilling this instruction
C->check_node_count(0, "out of nodes fixing spills");
if (C->failing()) return;
// Transform node
MachNode *cisc = mach->cisc_version(stk_offset, C)->as_Mach();
cisc->set_req(inp,fp); // Base register is frame pointer
if( cisc->oper_input_base() > 1 && mach->oper_input_base() <= 1 ) {
assert( cisc->oper_input_base() == 2, "Only adding one edge");
cisc->ins_req(1,src); // Requires a memory edge
}
block->map_node(cisc, j); // Insert into basic block
n->subsume_by(cisc, C); // Correct graph
//
++_used_cisc_instructions;
#ifndef PRODUCT
if( TraceCISCSpill ) {
tty->print(" cisc-instr: ");
cisc->dump();
}
#endif
} else {
#ifndef PRODUCT
if( TraceCISCSpill ) {
tty->print(" using reg-instr: ");
n->dump();
}
#endif
++_unused_cisc_instructions; // input can be on stack
}
}
} // End of for all instructions
} // End of for all blocks
}
// Helper to stretch above; recursively discover the base Node for a
// given derived Node. Easy for AddP-related machine nodes, but needs
// to be recursive for derived Phis.
Node *PhaseChaitin::find_base_for_derived( Node **derived_base_map, Node *derived, uint &maxlrg ) {
// See if already computed; if so return it
if( derived_base_map[derived->_idx] )
return derived_base_map[derived->_idx];
// See if this happens to be a base.
// NOTE: we use TypePtr instead of TypeOopPtr because we can have
// pointers derived from NULL! These are always along paths that
// can't happen at run-time but the optimizer cannot deduce it so
// we have to handle it gracefully.
assert(!derived->bottom_type()->isa_narrowoop() ||
derived->bottom_type()->make_ptr()->is_ptr()->_offset == 0, "sanity");
const TypePtr *tj = derived->bottom_type()->isa_ptr();
// If its an OOP with a non-zero offset, then it is derived.
if( tj == NULL || tj->_offset == 0 ) {
derived_base_map[derived->_idx] = derived;
return derived;
}
// Derived is NULL+offset? Base is NULL!
if( derived->is_Con() ) {
Node *base = _matcher.mach_null();
assert(base != NULL, "sanity");
if (base->in(0) == NULL) {
// Initialize it once and make it shared:
// set control to _root and place it into Start block
// (where top() node is placed).
base->init_req(0, _cfg.get_root_node());
Block *startb = _cfg.get_block_for_node(C->top());
uint node_pos = startb->find_node(C->top());
startb->insert_node(base, node_pos);
_cfg.map_node_to_block(base, startb);
assert(_lrg_map.live_range_id(base) == 0, "should not have LRG yet");
// The loadConP0 might have projection nodes depending on architecture
// Add the projection nodes to the CFG
for (DUIterator_Fast imax, i = base->fast_outs(imax); i < imax; i++) {
Node* use = base->fast_out(i);
if (use->is_MachProj()) {
startb->insert_node(use, ++node_pos);
_cfg.map_node_to_block(use, startb);
new_lrg(use, maxlrg++);
}
}
}
if (_lrg_map.live_range_id(base) == 0) {
new_lrg(base, maxlrg++);
}
assert(base->in(0) == _cfg.get_root_node() && _cfg.get_block_for_node(base) == _cfg.get_block_for_node(C->top()), "base NULL should be shared");
derived_base_map[derived->_idx] = base;
return base;
}
// Check for AddP-related opcodes
if (!derived->is_Phi()) {
assert(derived->as_Mach()->ideal_Opcode() == Op_AddP, err_msg_res("but is: %s", derived->Name()));
Node *base = derived->in(AddPNode::Base);
derived_base_map[derived->_idx] = base;
return base;
}
// Recursively find bases for Phis.
// First check to see if we can avoid a base Phi here.
Node *base = find_base_for_derived( derived_base_map, derived->in(1),maxlrg);
uint i;
for( i = 2; i < derived->req(); i++ )
if( base != find_base_for_derived( derived_base_map,derived->in(i),maxlrg))
break;
// Went to the end without finding any different bases?
if( i == derived->req() ) { // No need for a base Phi here
derived_base_map[derived->_idx] = base;
return base;
}
// Now we see we need a base-Phi here to merge the bases
const Type *t = base->bottom_type();
base = new (C) PhiNode( derived->in(0), t );
for( i = 1; i < derived->req(); i++ ) {
base->init_req(i, find_base_for_derived(derived_base_map, derived->in(i), maxlrg));
t = t->meet(base->in(i)->bottom_type());
}
base->as_Phi()->set_type(t);
// Search the current block for an existing base-Phi
Block *b = _cfg.get_block_for_node(derived);
for( i = 1; i <= b->end_idx(); i++ ) {// Search for matching Phi
Node *phi = b->get_node(i);
if( !phi->is_Phi() ) { // Found end of Phis with no match?
b->insert_node(base, i); // Must insert created Phi here as base
_cfg.map_node_to_block(base, b);
new_lrg(base,maxlrg++);
break;
}
// See if Phi matches.
uint j;
for( j = 1; j < base->req(); j++ )
if( phi->in(j) != base->in(j) &&
!(phi->in(j)->is_Con() && base->in(j)->is_Con()) ) // allow different NULLs
break;
if( j == base->req() ) { // All inputs match?
base = phi; // Then use existing 'phi' and drop 'base'
break;
}
}
// Cache info for later passes
derived_base_map[derived->_idx] = base;
return base;
}
// At each Safepoint, insert extra debug edges for each pair of derived value/
// base pointer that is live across the Safepoint for oopmap building. The
// edge pairs get added in after sfpt->jvmtail()->oopoff(), but are in the
// required edge set.
bool PhaseChaitin::stretch_base_pointer_live_ranges(ResourceArea *a) {
int must_recompute_live = false;
uint maxlrg = _lrg_map.max_lrg_id();
Node **derived_base_map = (Node**)a->Amalloc(sizeof(Node*)*C->unique());
memset( derived_base_map, 0, sizeof(Node*)*C->unique() );
// For all blocks in RPO do...
for (uint i = 0; i < _cfg.number_of_blocks(); i++) {
Block* block = _cfg.get_block(i);
// Note use of deep-copy constructor. I cannot hammer the original
// liveout bits, because they are needed by the following coalesce pass.
IndexSet liveout(_live->live(block));
for (uint j = block->end_idx() + 1; j > 1; j--) {
Node* n = block->get_node(j - 1);
// Pre-split compares of loop-phis. Loop-phis form a cycle we would
// like to see in the same register. Compare uses the loop-phi and so
// extends its live range BUT cannot be part of the cycle. If this
// extended live range overlaps with the update of the loop-phi value
// we need both alive at the same time -- which requires at least 1
// copy. But because Intel has only 2-address registers we end up with
// at least 2 copies, one before the loop-phi update instruction and
// one after. Instead we split the input to the compare just after the
// phi.
if( n->is_Mach() && n->as_Mach()->ideal_Opcode() == Op_CmpI ) {
Node *phi = n->in(1);
if( phi->is_Phi() && phi->as_Phi()->region()->is_Loop() ) {
Block *phi_block = _cfg.get_block_for_node(phi);
if (_cfg.get_block_for_node(phi_block->pred(2)) == block) {
const RegMask *mask = C->matcher()->idealreg2spillmask[Op_RegI];
Node *spill = new (C) MachSpillCopyNode( phi, *mask, *mask );
insert_proj( phi_block, 1, spill, maxlrg++ );
n->set_req(1,spill);
must_recompute_live = true;
}
}
}
// Get value being defined
uint lidx = _lrg_map.live_range_id(n);
// Ignore the occasional brand-new live range
if (lidx && lidx < _lrg_map.max_lrg_id()) {
// Remove from live-out set
liveout.remove(lidx);
// Copies do not define a new value and so do not interfere.
// Remove the copies source from the liveout set before interfering.
uint idx = n->is_Copy();
if (idx) {
liveout.remove(_lrg_map.live_range_id(n->in(idx)));
}
}
// Found a safepoint?
JVMState *jvms = n->jvms();
if( jvms ) {
// Now scan for a live derived pointer
IndexSetIterator elements(&liveout);
uint neighbor;
while ((neighbor = elements.next()) != 0) {
// Find reaching DEF for base and derived values
// This works because we are still in SSA during this call.
Node *derived = lrgs(neighbor)._def;
const TypePtr *tj = derived->bottom_type()->isa_ptr();
assert(!derived->bottom_type()->isa_narrowoop() ||
derived->bottom_type()->make_ptr()->is_ptr()->_offset == 0, "sanity");
// If its an OOP with a non-zero offset, then it is derived.
if( tj && tj->_offset != 0 && tj->isa_oop_ptr() ) {
Node *base = find_base_for_derived(derived_base_map, derived, maxlrg);
assert(base->_idx < _lrg_map.size(), "");
// Add reaching DEFs of derived pointer and base pointer as a
// pair of inputs
n->add_req(derived);
n->add_req(base);
// See if the base pointer is already live to this point.
// Since I'm working on the SSA form, live-ness amounts to
// reaching def's. So if I find the base's live range then
// I know the base's def reaches here.
if ((_lrg_map.live_range_id(base) >= _lrg_map.max_lrg_id() || // (Brand new base (hence not live) or
!liveout.member(_lrg_map.live_range_id(base))) && // not live) AND
(_lrg_map.live_range_id(base) > 0) && // not a constant
_cfg.get_block_for_node(base) != block) { // base not def'd in blk)
// Base pointer is not currently live. Since I stretched
// the base pointer to here and it crosses basic-block
// boundaries, the global live info is now incorrect.
// Recompute live.
must_recompute_live = true;
} // End of if base pointer is not live to debug info
}
} // End of scan all live data for derived ptrs crossing GC point
} // End of if found a GC point
// Make all inputs live
if (!n->is_Phi()) { // Phi function uses come from prior block
for (uint k = 1; k < n->req(); k++) {
uint lidx = _lrg_map.live_range_id(n->in(k));
if (lidx < _lrg_map.max_lrg_id()) {
liveout.insert(lidx);
}
}
}
} // End of forall instructions in block
liveout.clear(); // Free the memory used by liveout.
} // End of forall blocks
_lrg_map.set_max_lrg_id(maxlrg);
// If I created a new live range I need to recompute live
if (maxlrg != _ifg->_maxlrg) {
must_recompute_live = true;
}
return must_recompute_live != 0;
}
// Extend the node to LRG mapping
void PhaseChaitin::add_reference(const Node *node, const Node *old_node) {
_lrg_map.extend(node->_idx, _lrg_map.live_range_id(old_node));
}
#ifndef PRODUCT
void PhaseChaitin::dump(const Node *n) const {
uint r = (n->_idx < _lrg_map.size()) ? _lrg_map.find_const(n) : 0;
tty->print("L%d",r);
if (r && n->Opcode() != Op_Phi) {
if( _node_regs ) { // Got a post-allocation copy of allocation?
tty->print("[");
OptoReg::Name second = get_reg_second(n);
if( OptoReg::is_valid(second) ) {
if( OptoReg::is_reg(second) )
tty->print("%s:",Matcher::regName[second]);
else
tty->print("%s+%d:",OptoReg::regname(OptoReg::c_frame_pointer), reg2offset_unchecked(second));
}
OptoReg::Name first = get_reg_first(n);
if( OptoReg::is_reg(first) )
tty->print("%s]",Matcher::regName[first]);
else
tty->print("%s+%d]",OptoReg::regname(OptoReg::c_frame_pointer), reg2offset_unchecked(first));
} else
n->out_RegMask().dump();
}
tty->print("/N%d\t",n->_idx);
tty->print("%s === ", n->Name());
uint k;
for (k = 0; k < n->req(); k++) {
Node *m = n->in(k);
if (!m) {
tty->print("_ ");
}
else {
uint r = (m->_idx < _lrg_map.size()) ? _lrg_map.find_const(m) : 0;
tty->print("L%d",r);
// Data MultiNode's can have projections with no real registers.
// Don't die while dumping them.
int op = n->Opcode();
if( r && op != Op_Phi && op != Op_Proj && op != Op_SCMemProj) {
if( _node_regs ) {
tty->print("[");
OptoReg::Name second = get_reg_second(n->in(k));
if( OptoReg::is_valid(second) ) {
if( OptoReg::is_reg(second) )
tty->print("%s:",Matcher::regName[second]);
else
tty->print("%s+%d:",OptoReg::regname(OptoReg::c_frame_pointer),
reg2offset_unchecked(second));
}
OptoReg::Name first = get_reg_first(n->in(k));
if( OptoReg::is_reg(first) )
tty->print("%s]",Matcher::regName[first]);
else
tty->print("%s+%d]",OptoReg::regname(OptoReg::c_frame_pointer),
reg2offset_unchecked(first));
} else
n->in_RegMask(k).dump();
}
tty->print("/N%d ",m->_idx);
}
}
if( k < n->len() && n->in(k) ) tty->print("| ");
for( ; k < n->len(); k++ ) {
Node *m = n->in(k);
if(!m) {
break;
}
uint r = (m->_idx < _lrg_map.size()) ? _lrg_map.find_const(m) : 0;
tty->print("L%d",r);
tty->print("/N%d ",m->_idx);
}
if( n->is_Mach() ) n->as_Mach()->dump_spec(tty);
else n->dump_spec(tty);
if( _spilled_once.test(n->_idx ) ) {
tty->print(" Spill_1");
if( _spilled_twice.test(n->_idx ) )
tty->print(" Spill_2");
}
tty->print("\n");
}
void PhaseChaitin::dump(const Block *b) const {
b->dump_head(&_cfg);
// For all instructions
for( uint j = 0; j < b->number_of_nodes(); j++ )
dump(b->get_node(j));
// Print live-out info at end of block
if( _live ) {
tty->print("Liveout: ");
IndexSet *live = _live->live(b);
IndexSetIterator elements(live);
tty->print("{");
uint i;
while ((i = elements.next()) != 0) {
tty->print("L%d ", _lrg_map.find_const(i));
}
tty->print_cr("}");
}
tty->print("\n");
}
void PhaseChaitin::dump() const {
tty->print( "--- Chaitin -- argsize: %d framesize: %d ---\n",
_matcher._new_SP, _framesize );
// For all blocks
for (uint i = 0; i < _cfg.number_of_blocks(); i++) {
dump(_cfg.get_block(i));
}
// End of per-block dump
tty->print("\n");
if (!_ifg) {
tty->print("(No IFG.)\n");
return;
}
// Dump LRG array
tty->print("--- Live RanGe Array ---\n");
for (uint i2 = 1; i2 < _lrg_map.max_lrg_id(); i2++) {
tty->print("L%d: ",i2);
if (i2 < _ifg->_maxlrg) {
lrgs(i2).dump();
}
else {
tty->print_cr("new LRG");
}
}
tty->cr();
// Dump lo-degree list
tty->print("Lo degree: ");
for(uint i3 = _lo_degree; i3; i3 = lrgs(i3)._next )
tty->print("L%d ",i3);
tty->cr();
// Dump lo-stk-degree list
tty->print("Lo stk degree: ");
for(uint i4 = _lo_stk_degree; i4; i4 = lrgs(i4)._next )
tty->print("L%d ",i4);
tty->cr();
// Dump lo-degree list
tty->print("Hi degree: ");
for(uint i5 = _hi_degree; i5; i5 = lrgs(i5)._next )
tty->print("L%d ",i5);
tty->cr();
}
void PhaseChaitin::dump_degree_lists() const {
// Dump lo-degree list
tty->print("Lo degree: ");
for( uint i = _lo_degree; i; i = lrgs(i)._next )
tty->print("L%d ",i);
tty->cr();
// Dump lo-stk-degree list
tty->print("Lo stk degree: ");
for(uint i2 = _lo_stk_degree; i2; i2 = lrgs(i2)._next )
tty->print("L%d ",i2);
tty->cr();
// Dump lo-degree list
tty->print("Hi degree: ");
for(uint i3 = _hi_degree; i3; i3 = lrgs(i3)._next )
tty->print("L%d ",i3);
tty->cr();
}
void PhaseChaitin::dump_simplified() const {
tty->print("Simplified: ");
for( uint i = _simplified; i; i = lrgs(i)._next )
tty->print("L%d ",i);
tty->cr();
}
static char *print_reg( OptoReg::Name reg, const PhaseChaitin *pc, char *buf ) {
if ((int)reg < 0)
sprintf(buf, "<OptoReg::%d>", (int)reg);
else if (OptoReg::is_reg(reg))
strcpy(buf, Matcher::regName[reg]);
else
sprintf(buf,"%s + #%d",OptoReg::regname(OptoReg::c_frame_pointer),
pc->reg2offset(reg));
return buf+strlen(buf);
}
// Dump a register name into a buffer. Be intelligent if we get called
// before allocation is complete.
char *PhaseChaitin::dump_register( const Node *n, char *buf ) const {
if( this == NULL ) { // Not got anything?
sprintf(buf,"N%d",n->_idx); // Then use Node index
} else if( _node_regs ) {
// Post allocation, use direct mappings, no LRG info available
print_reg( get_reg_first(n), this, buf );
} else {
uint lidx = _lrg_map.find_const(n); // Grab LRG number
if( !_ifg ) {
sprintf(buf,"L%d",lidx); // No register binding yet
} else if( !lidx ) { // Special, not allocated value
strcpy(buf,"Special");
} else {
if (lrgs(lidx)._is_vector) {
if (lrgs(lidx).mask().is_bound_set(lrgs(lidx).num_regs()))
print_reg( lrgs(lidx).reg(), this, buf ); // a bound machine register
else
sprintf(buf,"L%d",lidx); // No register binding yet
} else if( (lrgs(lidx).num_regs() == 1)
? lrgs(lidx).mask().is_bound1()
: lrgs(lidx).mask().is_bound_pair() ) {
// Hah! We have a bound machine register
print_reg( lrgs(lidx).reg(), this, buf );
} else {
sprintf(buf,"L%d",lidx); // No register binding yet
}
}
}
return buf+strlen(buf);
}
void PhaseChaitin::dump_for_spill_split_recycle() const {
if( WizardMode && (PrintCompilation || PrintOpto) ) {
// Display which live ranges need to be split and the allocator's state
tty->print_cr("Graph-Coloring Iteration %d will split the following live ranges", _trip_cnt);
for (uint bidx = 1; bidx < _lrg_map.max_lrg_id(); bidx++) {
if( lrgs(bidx).alive() && lrgs(bidx).reg() >= LRG::SPILL_REG ) {
tty->print("L%d: ", bidx);
lrgs(bidx).dump();
}
}
tty->cr();
dump();
}
}
void PhaseChaitin::dump_frame() const {
const char *fp = OptoReg::regname(OptoReg::c_frame_pointer);
const TypeTuple *domain = C->tf()->domain();
const int argcnt = domain->cnt() - TypeFunc::Parms;
// Incoming arguments in registers dump
for( int k = 0; k < argcnt; k++ ) {
OptoReg::Name parmreg = _matcher._parm_regs[k].first();
if( OptoReg::is_reg(parmreg)) {
const char *reg_name = OptoReg::regname(parmreg);
tty->print("#r%3.3d %s", parmreg, reg_name);
parmreg = _matcher._parm_regs[k].second();
if( OptoReg::is_reg(parmreg)) {
tty->print(":%s", OptoReg::regname(parmreg));
}
tty->print(" : parm %d: ", k);
domain->field_at(k + TypeFunc::Parms)->dump();
tty->cr();
}
}
// Check for un-owned padding above incoming args
OptoReg::Name reg = _matcher._new_SP;
if( reg > _matcher._in_arg_limit ) {
reg = OptoReg::add(reg, -1);
tty->print_cr("#r%3.3d %s+%2d: pad0, owned by CALLER", reg, fp, reg2offset_unchecked(reg));
}
// Incoming argument area dump
OptoReg::Name begin_in_arg = OptoReg::add(_matcher._old_SP,C->out_preserve_stack_slots());
while( reg > begin_in_arg ) {
reg = OptoReg::add(reg, -1);
tty->print("#r%3.3d %s+%2d: ",reg,fp,reg2offset_unchecked(reg));
int j;
for( j = 0; j < argcnt; j++) {
if( _matcher._parm_regs[j].first() == reg ||
_matcher._parm_regs[j].second() == reg ) {
tty->print("parm %d: ",j);
domain->field_at(j + TypeFunc::Parms)->dump();
tty->cr();
break;
}
}
if( j >= argcnt )
tty->print_cr("HOLE, owned by SELF");
}
// Old outgoing preserve area
while( reg > _matcher._old_SP ) {
reg = OptoReg::add(reg, -1);
tty->print_cr("#r%3.3d %s+%2d: old out preserve",reg,fp,reg2offset_unchecked(reg));
}
// Old SP
tty->print_cr("# -- Old %s -- Framesize: %d --",fp,
reg2offset_unchecked(OptoReg::add(_matcher._old_SP,-1)) - reg2offset_unchecked(_matcher._new_SP)+jintSize);
// Preserve area dump
int fixed_slots = C->fixed_slots();
OptoReg::Name begin_in_preserve = OptoReg::add(_matcher._old_SP, -(int)C->in_preserve_stack_slots());
OptoReg::Name return_addr = _matcher.return_addr();
reg = OptoReg::add(reg, -1);
while (OptoReg::is_stack(reg)) {
tty->print("#r%3.3d %s+%2d: ",reg,fp,reg2offset_unchecked(reg));
if (return_addr == reg) {
tty->print_cr("return address");
} else if (reg >= begin_in_preserve) {
// Preserved slots are present on x86
if (return_addr == OptoReg::add(reg, VMRegImpl::slots_per_word))
tty->print_cr("saved fp register");
else if (return_addr == OptoReg::add(reg, 2*VMRegImpl::slots_per_word) &&
VerifyStackAtCalls)
tty->print_cr("0xBADB100D +VerifyStackAtCalls");
else
tty->print_cr("in_preserve");
} else if ((int)OptoReg::reg2stack(reg) < fixed_slots) {
tty->print_cr("Fixed slot %d", OptoReg::reg2stack(reg));
} else {
tty->print_cr("pad2, stack alignment");
}
reg = OptoReg::add(reg, -1);
}
// Spill area dump
reg = OptoReg::add(_matcher._new_SP, _framesize );
while( reg > _matcher._out_arg_limit ) {
reg = OptoReg::add(reg, -1);
tty->print_cr("#r%3.3d %s+%2d: spill",reg,fp,reg2offset_unchecked(reg));
}
// Outgoing argument area dump
while( reg > OptoReg::add(_matcher._new_SP, C->out_preserve_stack_slots()) ) {
reg = OptoReg::add(reg, -1);
tty->print_cr("#r%3.3d %s+%2d: outgoing argument",reg,fp,reg2offset_unchecked(reg));
}
// Outgoing new preserve area
while( reg > _matcher._new_SP ) {
reg = OptoReg::add(reg, -1);
tty->print_cr("#r%3.3d %s+%2d: new out preserve",reg,fp,reg2offset_unchecked(reg));
}
tty->print_cr("#");
}
void PhaseChaitin::dump_bb( uint pre_order ) const {
tty->print_cr("---dump of B%d---",pre_order);
for (uint i = 0; i < _cfg.number_of_blocks(); i++) {
Block* block = _cfg.get_block(i);
if (block->_pre_order == pre_order) {
dump(block);
}
}
}
void PhaseChaitin::dump_lrg( uint lidx, bool defs_only ) const {
tty->print_cr("---dump of L%d---",lidx);
if (_ifg) {
if (lidx >= _lrg_map.max_lrg_id()) {
tty->print("Attempt to print live range index beyond max live range.\n");
return;
}
tty->print("L%d: ",lidx);
if (lidx < _ifg->_maxlrg) {
lrgs(lidx).dump();
} else {
tty->print_cr("new LRG");
}
}
if( _ifg && lidx < _ifg->_maxlrg) {
tty->print("Neighbors: %d - ", _ifg->neighbor_cnt(lidx));
_ifg->neighbors(lidx)->dump();
tty->cr();
}
// For all blocks
for (uint i = 0; i < _cfg.number_of_blocks(); i++) {
Block* block = _cfg.get_block(i);
int dump_once = 0;
// For all instructions
for( uint j = 0; j < block->number_of_nodes(); j++ ) {
Node *n = block->get_node(j);
if (_lrg_map.find_const(n) == lidx) {
if (!dump_once++) {
tty->cr();
block->dump_head(&_cfg);
}
dump(n);
continue;
}
if (!defs_only) {
uint cnt = n->req();
for( uint k = 1; k < cnt; k++ ) {
Node *m = n->in(k);
if (!m) {
continue; // be robust in the dumper
}
if (_lrg_map.find_const(m) == lidx) {
if (!dump_once++) {
tty->cr();
block->dump_head(&_cfg);
}
dump(n);
}
}
}
}
} // End of per-block dump
tty->cr();
}
#endif // not PRODUCT
int PhaseChaitin::_final_loads = 0;
int PhaseChaitin::_final_stores = 0;
int PhaseChaitin::_final_memoves= 0;
int PhaseChaitin::_final_copies = 0;
double PhaseChaitin::_final_load_cost = 0;
double PhaseChaitin::_final_store_cost = 0;
double PhaseChaitin::_final_memove_cost= 0;
double PhaseChaitin::_final_copy_cost = 0;
int PhaseChaitin::_conserv_coalesce = 0;
int PhaseChaitin::_conserv_coalesce_pair = 0;
int PhaseChaitin::_conserv_coalesce_trie = 0;
int PhaseChaitin::_conserv_coalesce_quad = 0;
int PhaseChaitin::_post_alloc = 0;
int PhaseChaitin::_lost_opp_pp_coalesce = 0;
int PhaseChaitin::_lost_opp_cflow_coalesce = 0;
int PhaseChaitin::_used_cisc_instructions = 0;
int PhaseChaitin::_unused_cisc_instructions = 0;
int PhaseChaitin::_allocator_attempts = 0;
int PhaseChaitin::_allocator_successes = 0;
#ifndef PRODUCT
uint PhaseChaitin::_high_pressure = 0;
uint PhaseChaitin::_low_pressure = 0;
void PhaseChaitin::print_chaitin_statistics() {
tty->print_cr("Inserted %d spill loads, %d spill stores, %d mem-mem moves and %d copies.", _final_loads, _final_stores, _final_memoves, _final_copies);
tty->print_cr("Total load cost= %6.0f, store cost = %6.0f, mem-mem cost = %5.2f, copy cost = %5.0f.", _final_load_cost, _final_store_cost, _final_memove_cost, _final_copy_cost);
tty->print_cr("Adjusted spill cost = %7.0f.",
_final_load_cost*4.0 + _final_store_cost * 2.0 +
_final_copy_cost*1.0 + _final_memove_cost*12.0);
tty->print("Conservatively coalesced %d copies, %d pairs",
_conserv_coalesce, _conserv_coalesce_pair);
if( _conserv_coalesce_trie || _conserv_coalesce_quad )
tty->print(", %d tries, %d quads", _conserv_coalesce_trie, _conserv_coalesce_quad);
tty->print_cr(", %d post alloc.", _post_alloc);
if( _lost_opp_pp_coalesce || _lost_opp_cflow_coalesce )
tty->print_cr("Lost coalesce opportunity, %d private-private, and %d cflow interfered.",
_lost_opp_pp_coalesce, _lost_opp_cflow_coalesce );
if( _used_cisc_instructions || _unused_cisc_instructions )
tty->print_cr("Used cisc instruction %d, remained in register %d",
_used_cisc_instructions, _unused_cisc_instructions);
if( _allocator_successes != 0 )
tty->print_cr("Average allocation trips %f", (float)_allocator_attempts/(float)_allocator_successes);
tty->print_cr("High Pressure Blocks = %d, Low Pressure Blocks = %d", _high_pressure, _low_pressure);
}
#endif // not PRODUCT
C:\hotspot-69087d08d473\src\share\vm/opto/chaitin.hpp
/*
* Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
#ifndef SHARE_VM_OPTO_CHAITIN_HPP
#define SHARE_VM_OPTO_CHAITIN_HPP
#include "code/vmreg.hpp"
#include "libadt/port.hpp"
#include "memory/resourceArea.hpp"
#include "opto/connode.hpp"
#include "opto/live.hpp"
#include "opto/matcher.hpp"
#include "opto/phase.hpp"
#include "opto/regalloc.hpp"
#include "opto/regmask.hpp"
class LoopTree;
class MachCallNode;
class MachSafePointNode;
class Matcher;
class PhaseCFG;
class PhaseLive;
class PhaseRegAlloc;
class PhaseChaitin;
#define OPTO_DEBUG_SPLIT_FREQ BLOCK_FREQUENCY(0.001)
#define OPTO_LRG_HIGH_FREQ BLOCK_FREQUENCY(0.25)
//------------------------------LRG--------------------------------------------
// Live-RanGe structure.
class LRG : public ResourceObj {
friend class VMStructs;
public:
static const uint AllStack_size = 0xFFFFF; // This mask size is used to tell that the mask of this LRG supports stack positions
enum { SPILL_REG=29999 }; // Register number of a spilled LRG
double _cost; // 2 for loads/1 for stores times block freq
double _area; // Sum of all simultaneously live values
double score() const; // Compute score from cost and area
double _maxfreq; // Maximum frequency of any def or use
Node *_def; // Check for multi-def live ranges
#ifndef PRODUCT
GrowableArray<Node*>* _defs;
#endif
uint _risk_bias; // Index of LRG which we want to avoid color
uint _copy_bias; // Index of LRG which we want to share color
uint _next; // Index of next LRG in linked list
uint _prev; // Index of prev LRG in linked list
private:
uint _reg; // Chosen register; undefined if mask is plural
public:
// Return chosen register for this LRG. Error if the LRG is not bound to
// a single register.
OptoReg::Name reg() const { return OptoReg::Name(_reg); }
void set_reg( OptoReg::Name r ) { _reg = r; }
private:
uint _eff_degree; // Effective degree: Sum of neighbors _num_regs
public:
int degree() const { assert( _degree_valid , "" ); return _eff_degree; }
// Degree starts not valid and any change to the IFG neighbor
// set makes it not valid.
void set_degree( uint degree ) {
_eff_degree = degree;
debug_only(_degree_valid = 1;)
assert(!_mask.is_AllStack() || (_mask.is_AllStack() && lo_degree()), "_eff_degree can't be bigger than AllStack_size - _num_regs if the mask supports stack registers");
}
// Made a change that hammered degree
void invalid_degree() { debug_only(_degree_valid=0;) }
// Incrementally modify degree. If it was correct, it should remain correct
void inc_degree( uint mod ) {
_eff_degree += mod;
assert(!_mask.is_AllStack() || (_mask.is_AllStack() && lo_degree()), "_eff_degree can't be bigger than AllStack_size - _num_regs if the mask supports stack registers");
}
// Compute the degree between 2 live ranges
int compute_degree( LRG &l ) const;
private:
RegMask _mask; // Allowed registers for this LRG
uint _mask_size; // cache of _mask.Size();
public:
int compute_mask_size() const { return _mask.is_AllStack() ? AllStack_size : _mask.Size(); }
void set_mask_size( int size ) {
assert((size == (int)AllStack_size) || (size == (int)_mask.Size()), "");
_mask_size = size;
#ifdef ASSERT
_msize_valid=1;
if (_is_vector) {
assert(!_fat_proj, "sanity");
_mask.verify_sets(_num_regs);
} else if (_num_regs == 2 && !_fat_proj) {
_mask.verify_pairs();
}
#endif
}
void compute_set_mask_size() { set_mask_size(compute_mask_size()); }
int mask_size() const { assert( _msize_valid, "mask size not valid" );
return _mask_size; }
// Get the last mask size computed, even if it does not match the
// count of bits in the current mask.
int get_invalid_mask_size() const { return _mask_size; }
const RegMask &mask() const { return _mask; }
void set_mask( const RegMask &rm ) { _mask = rm; debug_only(_msize_valid=0;)}
void AND( const RegMask &rm ) { _mask.AND(rm); debug_only(_msize_valid=0;)}
void SUBTRACT( const RegMask &rm ) { _mask.SUBTRACT(rm); debug_only(_msize_valid=0;)}
void Clear() { _mask.Clear() ; debug_only(_msize_valid=1); _mask_size = 0; }
void Set_All() { _mask.Set_All(); debug_only(_msize_valid=1); _mask_size = RegMask::CHUNK_SIZE; }
void Insert( OptoReg::Name reg ) { _mask.Insert(reg); debug_only(_msize_valid=0;) }
void Remove( OptoReg::Name reg ) { _mask.Remove(reg); debug_only(_msize_valid=0;) }
void clear_to_pairs() { _mask.clear_to_pairs(); debug_only(_msize_valid=0;) }
void clear_to_sets() { _mask.clear_to_sets(_num_regs); debug_only(_msize_valid=0;) }
// Number of registers this live range uses when it colors
private:
uint8 _num_regs; // 2 for Longs and Doubles, 1 for all else
// except _num_regs is kill count for fat_proj
public:
int num_regs() const { return _num_regs; }
void set_num_regs( int reg ) { assert( _num_regs == reg || !_num_regs, "" ); _num_regs = reg; }
private:
// Number of physical registers this live range uses when it colors
// Architecture and register-set dependent
uint8 _reg_pressure;
public:
void set_reg_pressure(int i) { _reg_pressure = i; }
int reg_pressure() const { return _reg_pressure; }
// How much 'wiggle room' does this live range have?
// How many color choices can it make (scaled by _num_regs)?
int degrees_of_freedom() const { return mask_size() - _num_regs; }
// Bound LRGs have ZERO degrees of freedom. We also count
// must_spill as bound.
bool is_bound () const { return _is_bound; }
// Negative degrees-of-freedom; even with no neighbors this
// live range must spill.
bool not_free() const { return degrees_of_freedom() < 0; }
// Is this live range of "low-degree"? Trivially colorable?
bool lo_degree () const { return degree() <= degrees_of_freedom(); }
// Is this live range just barely "low-degree"? Trivially colorable?
bool just_lo_degree () const { return degree() == degrees_of_freedom(); }
uint _is_oop:1, // Live-range holds an oop
_is_float:1, // True if in float registers
_is_vector:1, // True if in vector registers
_was_spilled1:1, // True if prior spilling on def
_was_spilled2:1, // True if twice prior spilling on def
_is_bound:1, // live range starts life with no
// degrees of freedom.
_direct_conflict:1, // True if def and use registers in conflict
_must_spill:1, // live range has lost all degrees of freedom
// If _fat_proj is set, live range does NOT require aligned, adjacent
// registers and has NO interferences.
// If _fat_proj is clear, live range requires num_regs() to be a power of
// 2, and it requires registers to form an aligned, adjacent set.
_fat_proj:1, //
_was_lo:1, // Was lo-degree prior to coalesce
_msize_valid:1, // _mask_size cache valid
_degree_valid:1, // _degree cache valid
_has_copy:1, // Adjacent to some copy instruction
_at_risk:1; // Simplify says this guy is at risk to spill
// Alive if non-zero, dead if zero
bool alive() const { return _def != NULL; }
bool is_multidef() const { return _def == NodeSentinel; }
bool is_singledef() const { return _def != NodeSentinel; }
#ifndef PRODUCT
void dump( ) const;
#endif
};
//------------------------------IFG--------------------------------------------
// InterFerence Graph
// An undirected graph implementation. Created with a fixed number of
// vertices. Edges can be added & tested. Vertices can be removed, then
// added back later with all edges intact. Can add edges between one vertex
// and a list of other vertices. Can union vertices (and their edges)
// together. The IFG needs to be really really fast, and also fairly
// abstract! It needs abstraction so I can fiddle with the implementation to
// get even more speed.
class PhaseIFG : public Phase {
friend class VMStructs;
// Current implementation: a triangular adjacency list.
// Array of adjacency-lists, indexed by live-range number
IndexSet *_adjs;
// Assertion bit for proper use of Squaring
bool _is_square;
// Live range structure goes here
LRG *_lrgs; // Array of LRG structures
public:
// Largest live-range number
uint _maxlrg;
Arena *_arena;
// Keep track of inserted and deleted Nodes
VectorSet *_yanked;
PhaseIFG( Arena *arena );
void init( uint maxlrg );
// Add edge between a and b. Returns true if actually addded.
int add_edge( uint a, uint b );
// Add edge between a and everything in the vector
void add_vector( uint a, IndexSet *vec );
// Test for edge existance
int test_edge( uint a, uint b ) const;
// Square-up matrix for faster Union
void SquareUp();
// Return number of LRG neighbors
uint neighbor_cnt( uint a ) const { return _adjs[a].count(); }
// Union edges of b into a on Squared-up matrix
void Union( uint a, uint b );
// Test for edge in Squared-up matrix
int test_edge_sq( uint a, uint b ) const;
// Yank a Node and all connected edges from the IFG. Be prepared to
// re-insert the yanked Node in reverse order of yanking. Return a
// list of neighbors (edges) yanked.
IndexSet *remove_node( uint a );
// Reinsert a yanked Node
void re_insert( uint a );
// Return set of neighbors
IndexSet *neighbors( uint a ) const { return &_adjs[a]; }
#ifndef PRODUCT
// Dump the IFG
void dump() const;
void stats() const;
void verify( const PhaseChaitin * ) const;
#endif
//--------------- Live Range Accessors
LRG &lrgs(uint idx) const { assert(idx < _maxlrg, "oob"); return _lrgs[idx]; }
// Compute and set effective degree. Might be folded into SquareUp().
void Compute_Effective_Degree();
// Compute effective degree as the sum of neighbors' _sizes.
int effective_degree( uint lidx ) const;
};
// The LiveRangeMap class is responsible for storing node to live range id mapping.
// Each node is mapped to a live range id (a virtual register). Nodes that are
// not considered for register allocation are given live range id 0.
class LiveRangeMap VALUE_OBJ_CLASS_SPEC {
private:
uint _max_lrg_id;
// Union-find map. Declared as a short for speed.
// Indexed by live-range number, it returns the compacted live-range number
LRG_List _uf_map;
// Map from Nodes to live ranges
LRG_List _names;
// Straight out of Tarjan's union-find algorithm
uint find_compress(const Node *node) {
uint lrg_id = find_compress(_names.at(node->_idx));
_names.at_put(node->_idx, lrg_id);
return lrg_id;
}
uint find_compress(uint lrg);
public:
const LRG_List& names() {
return _names;
}
uint max_lrg_id() const {
return _max_lrg_id;
}
void set_max_lrg_id(uint max_lrg_id) {
_max_lrg_id = max_lrg_id;
}
uint size() const {
return _names.length();
}
uint live_range_id(uint idx) const {
return _names.at(idx);
}
uint live_range_id(const Node *node) const {
return _names.at(node->_idx);
}
uint uf_live_range_id(uint lrg_id) const {
return _uf_map.at(lrg_id);
}
void map(uint idx, uint lrg_id) {
_names.at_put(idx, lrg_id);
}
void uf_map(uint dst_lrg_id, uint src_lrg_id) {
_uf_map.at_put(dst_lrg_id, src_lrg_id);
}
void extend(uint idx, uint lrg_id) {
_names.at_put_grow(idx, lrg_id);
}
void uf_extend(uint dst_lrg_id, uint src_lrg_id) {
_uf_map.at_put_grow(dst_lrg_id, src_lrg_id);
}
LiveRangeMap(Arena* arena, uint unique)
: _names(arena, unique, unique, 0)
, _uf_map(arena, unique, unique, 0)
, _max_lrg_id(0) {}
uint find_id( const Node *n ) {
uint retval = live_range_id(n);
assert(retval == find(n),"Invalid node to lidx mapping");
return retval;
}
// Reset the Union-Find map to identity
void reset_uf_map(uint max_lrg_id);
// Make all Nodes map directly to their final live range; no need for
// the Union-Find mapping after this call.
void compress_uf_map_for_nodes();
uint find(uint lidx) {
uint uf_lidx = _uf_map.at(lidx);
return (uf_lidx == lidx) ? uf_lidx : find_compress(lidx);
}
// Convert a Node into a Live Range Index - a lidx
uint find(const Node *node) {
uint lidx = live_range_id(node);
uint uf_lidx = _uf_map.at(lidx);
return (uf_lidx == lidx) ? uf_lidx : find_compress(node);
}
// Like Find above, but no path compress, so bad asymptotic behavior
uint find_const(uint lrg) const;
// Like Find above, but no path compress, so bad asymptotic behavior
uint find_const(const Node *node) const {
if(node->_idx >= (uint)_names.length()) {
return 0; // not mapped, usual for debug dump
}
return find_const(_names.at(node->_idx));
}
};
//------------------------------Chaitin----------------------------------------
// Briggs-Chaitin style allocation, mostly.
class PhaseChaitin : public PhaseRegAlloc {
friend class VMStructs;
int _trip_cnt;
int _alternate;
LRG &lrgs(uint idx) const { return _ifg->lrgs(idx); }
PhaseLive *_live; // Liveness, used in the interference graph
PhaseIFG *_ifg; // Interference graph (for original chunk)
Node_List **_lrg_nodes; // Array of node; lists for lrgs which spill
VectorSet _spilled_once; // Nodes that have been spilled
VectorSet _spilled_twice; // Nodes that have been spilled twice
// Combine the Live Range Indices for these 2 Nodes into a single live
// range. Future requests for any Node in either live range will
// return the live range index for the combined live range.
void Union( const Node *src, const Node *dst );
void new_lrg( const Node *x, uint lrg );
// Compact live ranges, removing unused ones. Return new maxlrg.
void compact();
uint _lo_degree; // Head of lo-degree LRGs list
uint _lo_stk_degree; // Head of lo-stk-degree LRGs list
uint _hi_degree; // Head of hi-degree LRGs list
uint _simplified; // Linked list head of simplified LRGs
// Helper functions for Split()
uint split_DEF( Node *def, Block *b, int loc, uint max, Node **Reachblock, Node **debug_defs, GrowableArray<uint> splits, int slidx );
uint split_USE( Node *def, Block *b, Node *use, uint useidx, uint max, bool def_down, bool cisc_sp, GrowableArray<uint> splits, int slidx );
//------------------------------clone_projs------------------------------------
// After cloning some rematerialized instruction, clone any MachProj's that
// follow it. Example: Intel zero is XOR, kills flags. Sparc FP constants
// use G3 as an address temp.
int clone_projs(Block* b, uint idx, Node* orig, Node* copy, uint& max_lrg_id);
int clone_projs(Block* b, uint idx, Node* orig, Node* copy, LiveRangeMap& lrg_map) {
uint max_lrg_id = lrg_map.max_lrg_id();
int found_projs = clone_projs(b, idx, orig, copy, max_lrg_id);
if (found_projs > 0) {
// max_lrg_id is updated during call above
lrg_map.set_max_lrg_id(max_lrg_id);
}
return found_projs;
}
Node *split_Rematerialize(Node *def, Block *b, uint insidx, uint &maxlrg, GrowableArray<uint> splits,
int slidx, uint *lrg2reach, Node **Reachblock, bool walkThru);
// True if lidx is used before any real register is def'd in the block
bool prompt_use( Block *b, uint lidx );
Node *get_spillcopy_wide( Node *def, Node *use, uint uidx );
// Insert the spill at chosen location. Skip over any intervening Proj's or
// Phis. Skip over a CatchNode and projs, inserting in the fall-through block
// instead. Update high-pressure indices. Create a new live range.
void insert_proj( Block *b, uint i, Node *spill, uint maxlrg );
bool is_high_pressure( Block *b, LRG *lrg, uint insidx );
uint _oldphi; // Node index which separates pre-allocation nodes
Block **_blks; // Array of blocks sorted by frequency for coalescing
float _high_frequency_lrg; // Frequency at which LRG will be spilled for debug info
#ifndef PRODUCT
bool _trace_spilling;
#endif
public:
PhaseChaitin( uint unique, PhaseCFG &cfg, Matcher &matcher );
~PhaseChaitin() {}
LiveRangeMap _lrg_map;
// Do all the real work of allocate
void Register_Allocate();
float high_frequency_lrg() const { return _high_frequency_lrg; }
#ifndef PRODUCT
bool trace_spilling() const { return _trace_spilling; }
#endif
private:
// De-SSA the world. Assign registers to Nodes. Use the same register for
// all inputs to a PhiNode, effectively coalescing live ranges. Insert
// copies as needed.
void de_ssa();
// Add edge between reg and everything in the vector.
// Same as _ifg->add_vector(reg,live) EXCEPT use the RegMask
// information to trim the set of interferences. Return the
// count of edges added.
void interfere_with_live( uint reg, IndexSet *live );
// Count register pressure for asserts
uint count_int_pressure( IndexSet *liveout );
uint count_float_pressure( IndexSet *liveout );
// Build the interference graph using virtual registers only.
// Used for aggressive coalescing.
void build_ifg_virtual( );
// Build the interference graph using physical registers when available.
// That is, if 2 live ranges are simultaneously alive but in their
// acceptable register sets do not overlap, then they do not interfere.
uint build_ifg_physical( ResourceArea *a );
// Gather LiveRanGe information, including register masks and base pointer/
// derived pointer relationships.
void gather_lrg_masks( bool mod_cisc_masks );
// Force the bases of derived pointers to be alive at GC points.
bool stretch_base_pointer_live_ranges( ResourceArea *a );
// Helper to stretch above; recursively discover the base Node for
// a given derived Node. Easy for AddP-related machine nodes, but
// needs to be recursive for derived Phis.
Node *find_base_for_derived( Node **derived_base_map, Node *derived, uint &maxlrg );
// Set the was-lo-degree bit. Conservative coalescing should not change the
// colorability of the graph. If any live range was of low-degree before
// coalescing, it should Simplify. This call sets the was-lo-degree bit.
void set_was_low();
// Split live-ranges that must spill due to register conflicts (as opposed
// to capacity spills). Typically these are things def'd in a register
// and used on the stack or vice-versa.
void pre_spill();
// Init LRG caching of degree, numregs. Init lo_degree list.
void cache_lrg_info( );
// Simplify the IFG by removing LRGs of low degree with no copies
void Pre_Simplify();
// Simplify the IFG by removing LRGs of low degree
void Simplify();
// Select colors by re-inserting edges into the IFG.
// Return TRUE if any spills occurred.
uint Select( );
// Helper function for select which allows biased coloring
OptoReg::Name choose_color( LRG &lrg, int chunk );
// Helper function which implements biasing heuristic
OptoReg::Name bias_color( LRG &lrg, int chunk );
// Split uncolorable live ranges
// Return new number of live ranges
uint Split(uint maxlrg, ResourceArea* split_arena);
// Copy 'was_spilled'-edness from one Node to another.
void copy_was_spilled( Node *src, Node *dst );
// Set the 'spilled_once' or 'spilled_twice' flag on a node.
void set_was_spilled( Node *n );
// Convert ideal spill-nodes into machine loads & stores
// Set C->failing when fixup spills could not complete, node limit exceeded.
void fixup_spills();
// Post-Allocation peephole copy removal
void post_allocate_copy_removal();
Node *skip_copies( Node *c );
// Replace the old node with the current live version of that value
// and yank the old value if it's dead.
int replace_and_yank_if_dead( Node *old, OptoReg::Name nreg,
Block *current_block, Node_List& value, Node_List& regnd ) {
Node* v = regnd[nreg];
assert(v->outcnt() != 0, "no dead values");
old->replace_by(v);
return yank_if_dead(old, current_block, &value, ®nd);
}
int yank_if_dead( Node *old, Block *current_block, Node_List *value, Node_List *regnd ) {
return yank_if_dead_recurse(old, old, current_block, value, regnd);
}
int yank_if_dead_recurse(Node *old, Node *orig_old, Block *current_block,
Node_List *value, Node_List *regnd);
int yank( Node *old, Block *current_block, Node_List *value, Node_List *regnd );
int elide_copy( Node *n, int k, Block *current_block, Node_List &value, Node_List ®nd, bool can_change_regs );
int use_prior_register( Node *copy, uint idx, Node *def, Block *current_block, Node_List &value, Node_List ®nd );
bool may_be_copy_of_callee( Node *def ) const;
// If nreg already contains the same constant as val then eliminate it
bool eliminate_copy_of_constant(Node* val, Node* n,
Block *current_block, Node_List& value, Node_List ®nd,
OptoReg::Name nreg, OptoReg::Name nreg2);
// Extend the node to LRG mapping
void add_reference( const Node *node, const Node *old_node);
// Record the first use of a def in the block for a register.
class RegDefUse {
Node* _def;
Node* _first_use;
public:
RegDefUse() : _def(NULL), _first_use(NULL) { }
Node* def() const { return _def; }
Node* first_use() const { return _first_use; }
void update(Node* def, Node* use) {
if (_def != def) {
_def = def;
_first_use = use;
}
}
void clear() {
_def = NULL;
_first_use = NULL;
}
};
typedef GrowableArray<RegDefUse> RegToDefUseMap;
int possibly_merge_multidef(Node *n, uint k, Block *block, RegToDefUseMap& reg2defuse);
// Merge nodes that are a part of a multidef lrg and produce the same value within a block.
void merge_multidefs();
private:
static int _final_loads, _final_stores, _final_copies, _final_memoves;
static double _final_load_cost, _final_store_cost, _final_copy_cost, _final_memove_cost;
static int _conserv_coalesce, _conserv_coalesce_pair;
static int _conserv_coalesce_trie, _conserv_coalesce_quad;
static int _post_alloc;
static int _lost_opp_pp_coalesce, _lost_opp_cflow_coalesce;
static int _used_cisc_instructions, _unused_cisc_instructions;
static int _allocator_attempts, _allocator_successes;
#ifndef PRODUCT
static uint _high_pressure, _low_pressure;
void dump() const;
void dump( const Node *n ) const;
void dump( const Block * b ) const;
void dump_degree_lists() const;
void dump_simplified() const;
void dump_lrg( uint lidx, bool defs_only) const;
void dump_lrg( uint lidx) const {
// dump defs and uses by default
dump_lrg(lidx, false);
}
void dump_bb( uint pre_order ) const;
// Verify that base pointers and derived pointers are still sane
void verify_base_ptrs( ResourceArea *a ) const;
void verify( ResourceArea *a, bool verify_ifg = false ) const;
void dump_for_spill_split_recycle() const;
public:
void dump_frame() const;
char *dump_register( const Node *n, char *buf ) const;
private:
static void print_chaitin_statistics();
#endif
friend class PhaseCoalesce;
friend class PhaseAggressiveCoalesce;
friend class PhaseConservativeCoalesce;
};
#endif // SHARE_VM_OPTO_CHAITIN_HPP
C:\hotspot-69087d08d473\src\share\vm/opto/classes.cpp
/*
* Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
#include "precompiled.hpp"
#include "opto/addnode.hpp"
#include "opto/callnode.hpp"
#include "opto/cfgnode.hpp"
#include "opto/connode.hpp"
#include "opto/divnode.hpp"
#include "opto/locknode.hpp"
#include "opto/loopnode.hpp"
#include "opto/machnode.hpp"
#include "opto/memnode.hpp"
#include "opto/mathexactnode.hpp"
#include "opto/mulnode.hpp"
#include "opto/multnode.hpp"
#include "opto/node.hpp"
#include "opto/rootnode.hpp"
#include "opto/subnode.hpp"
#include "opto/vectornode.hpp"
// ----------------------------------------------------------------------------
// Build a table of virtual functions to map from Nodes to dense integer
// opcode names.
int Node::Opcode() const { return Op_Node; }
#define macro(x) int x##Node::Opcode() const { return Op_##x; }
#include "classes.hpp"
#undef macro
C:\hotspot-69087d08d473\src\share\vm/opto/classes.hpp
/*
* Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
// The giant table of Node classes.
// One entry per class, sorted by class name.
macro(AbsD)
macro(AbsF)
macro(AbsI)
macro(AddD)
macro(AddF)
macro(AddI)
macro(AddL)
macro(AddP)
macro(Allocate)
macro(AllocateArray)
macro(AndI)
macro(AndL)
macro(AryEq)
macro(AtanD)
macro(Binary)
macro(Bool)
macro(BoxLock)
macro(ReverseBytesI)
macro(ReverseBytesL)
macro(ReverseBytesUS)
macro(ReverseBytesS)
macro(CProj)
macro(CallDynamicJava)
macro(CallJava)
macro(CallLeaf)
macro(CallLeafNoFP)
macro(CallRuntime)
macro(CallStaticJava)
macro(CastII)
macro(CastX2P)
macro(CastP2X)
macro(CastPP)
macro(Catch)
macro(CatchProj)
macro(CheckCastPP)
macro(ClearArray)
macro(ConstraintCast)
macro(CMoveD)
macro(CMoveF)
macro(CMoveI)
macro(CMoveL)
macro(CMoveP)
macro(CMoveN)
macro(CmpN)
macro(CmpD)
macro(CmpD3)
macro(CmpF)
macro(CmpF3)
macro(CmpI)
macro(CmpL)
macro(CmpL3)
macro(CmpLTMask)
macro(CmpP)
macro(CmpU)
macro(CmpUL)
macro(CompareAndSwapI)
macro(CompareAndSwapL)
macro(CompareAndSwapP)
macro(CompareAndSwapN)
macro(GetAndAddI)
macro(GetAndAddL)
macro(GetAndSetI)
macro(GetAndSetL)
macro(GetAndSetP)
macro(GetAndSetN)
macro(Con)
macro(ConN)
macro(ConNKlass)
macro(ConD)
macro(ConF)
macro(ConI)
macro(ConL)
macro(ConP)
macro(Conv2B)
macro(ConvD2F)
macro(ConvD2I)
macro(ConvD2L)
macro(ConvF2D)
macro(ConvF2I)
macro(ConvF2L)
macro(ConvI2D)
macro(ConvI2F)
macro(ConvI2L)
macro(ConvL2D)
macro(ConvL2F)
macro(ConvL2I)
macro(CosD)
macro(CountedLoop)
macro(CountedLoopEnd)
macro(CountLeadingZerosI)
macro(CountLeadingZerosL)
macro(CountTrailingZerosI)
macro(CountTrailingZerosL)
macro(CreateEx)
macro(DecodeN)
macro(DecodeNKlass)
macro(DivD)
macro(DivF)
macro(DivI)
macro(DivL)
macro(DivMod)
macro(DivModI)
macro(DivModL)
macro(EncodeISOArray)
macro(EncodeP)
macro(EncodePKlass)
macro(ExpD)
macro(FastLock)
macro(FastUnlock)
macro(Goto)
macro(Halt)
macro(If)
macro(IfFalse)
macro(IfTrue)
macro(Initialize)
macro(JProj)
macro(Jump)
macro(JumpProj)
macro(LShiftI)
macro(LShiftL)
macro(LoadB)
macro(LoadUB)
macro(LoadUS)
macro(LoadD)
macro(LoadD_unaligned)
macro(LoadF)
macro(LoadI)
macro(LoadKlass)
macro(LoadNKlass)
macro(LoadL)
macro(LoadL_unaligned)
macro(LoadPLocked)
macro(LoadP)
macro(LoadN)
macro(LoadRange)
macro(LoadS)
macro(Lock)
macro(LogD)
macro(Log10D)
macro(Loop)
macro(LoopLimit)
macro(Mach)
macro(MachProj)
macro(MaxI)
macro(MemBarAcquire)
macro(LoadFence)
macro(MemBarAcquireLock)
macro(MemBarCPUOrder)
macro(MemBarRelease)
macro(StoreFence)
macro(MemBarReleaseLock)
macro(MemBarVolatile)
macro(MemBarStoreStore)
macro(MergeMem)
macro(MinI)
macro(ModD)
macro(ModF)
macro(ModI)
macro(ModL)
macro(MoveI2F)
macro(MoveF2I)
macro(MoveL2D)
macro(MoveD2L)
macro(MulD)
macro(MulF)
macro(MulHiL)
macro(MulI)
macro(MulL)
macro(Multi)
macro(NegD)
macro(NegF)
macro(NeverBranch)
macro(Opaque1)
macro(Opaque2)
macro(Opaque3)
macro(ProfileBoolean)
macro(OrI)
macro(OrL)
macro(OverflowAddI)
macro(OverflowSubI)
macro(OverflowMulI)
macro(OverflowAddL)
macro(OverflowSubL)
macro(OverflowMulL)
macro(PCTable)
macro(Parm)
macro(PartialSubtypeCheck)
macro(Phi)
macro(PopCountI)
macro(PopCountL)
macro(PowD)
macro(PrefetchAllocation)
macro(PrefetchRead)
macro(PrefetchWrite)
macro(Proj)
macro(RShiftI)
macro(RShiftL)
macro(Region)
macro(Rethrow)
macro(Return)
macro(Root)
macro(RoundDouble)
macro(RoundFloat)
macro(SafePoint)
macro(SafePointScalarObject)
macro(SCMemProj)
macro(SinD)
macro(SqrtD)
macro(Start)
macro(StartOSR)
macro(StoreB)
macro(StoreC)
macro(StoreCM)
macro(StorePConditional)
macro(StoreIConditional)
macro(StoreLConditional)
macro(StoreD)
macro(StoreF)
macro(StoreI)
macro(StoreL)
macro(StoreP)
macro(StoreN)
macro(StoreNKlass)
macro(StrComp)
macro(StrEquals)
macro(StrIndexOf)
macro(SubD)
macro(SubF)
macro(SubI)
macro(SubL)
macro(TailCall)
macro(TailJump)
macro(TanD)
macro(ThreadLocal)
macro(Unlock)
macro(URShiftI)
macro(URShiftL)
macro(XorI)
macro(XorL)
macro(Vector)
macro(AddVB)
macro(AddVS)
macro(AddVI)
macro(AddVL)
macro(AddVF)
macro(AddVD)
macro(SubVB)
macro(SubVS)
macro(SubVI)
macro(SubVL)
macro(SubVF)
macro(SubVD)
macro(MulVS)
macro(MulVI)
macro(MulVF)
macro(MulVD)
macro(DivVF)
macro(DivVD)
macro(LShiftCntV)
macro(RShiftCntV)
macro(LShiftVB)
macro(LShiftVS)
macro(LShiftVI)
macro(LShiftVL)
macro(RShiftVB)
macro(RShiftVS)
macro(RShiftVI)
macro(RShiftVL)
macro(URShiftVB)
macro(URShiftVS)
macro(URShiftVI)
macro(URShiftVL)
macro(AndV)
macro(OrV)
macro(XorV)
macro(LoadVector)
macro(StoreVector)
macro(Pack)
macro(PackB)
macro(PackS)
macro(PackI)
macro(PackL)
macro(PackF)
macro(PackD)
macro(Pack2L)
macro(Pack2D)
macro(ReplicateB)
macro(ReplicateS)
macro(ReplicateI)
macro(ReplicateL)
macro(ReplicateF)
macro(ReplicateD)
macro(Extract)
macro(ExtractB)
macro(ExtractUB)
macro(ExtractC)
macro(ExtractS)
macro(ExtractI)
macro(ExtractL)
macro(ExtractF)
macro(ExtractD)
C:\hotspot-69087d08d473\src\share\vm/opto/coalesce.cpp
/*
* Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
#include "precompiled.hpp"
#include "memory/allocation.inline.hpp"
#include "opto/block.hpp"
#include "opto/c2compiler.hpp"
#include "opto/cfgnode.hpp"
#include "opto/chaitin.hpp"
#include "opto/coalesce.hpp"
#include "opto/connode.hpp"
#include "opto/indexSet.hpp"
#include "opto/machnode.hpp"
#include "opto/matcher.hpp"
#include "opto/regmask.hpp"
#ifndef PRODUCT
void PhaseCoalesce::dump(Node *n) const {
// Being a const function means I cannot use 'Find'
uint r = _phc._lrg_map.find(n);
tty->print("L%d/N%d ",r,n->_idx);
}
void PhaseCoalesce::dump() const {
// I know I have a block layout now, so I can print blocks in a loop
for( uint i=0; i<_phc._cfg.number_of_blocks(); i++ ) {
uint j;
Block* b = _phc._cfg.get_block(i);
// Print a nice block header
tty->print("B%d: ",b->_pre_order);
for( j=1; j<b->num_preds(); j++ )
tty->print("B%d ", _phc._cfg.get_block_for_node(b->pred(j))->_pre_order);
tty->print("-> ");
for( j=0; j<b->_num_succs; j++ )
tty->print("B%d ",b->_succs[j]->_pre_order);
tty->print(" IDom: B%d/#%d\n", b->_idom ? b->_idom->_pre_order : 0, b->_dom_depth);
uint cnt = b->number_of_nodes();
for( j=0; j<cnt; j++ ) {
Node *n = b->get_node(j);
dump( n );
tty->print("\t%s\t",n->Name());
// Dump the inputs
uint k; // Exit value of loop
for( k=0; k<n->req(); k++ ) // For all required inputs
if( n->in(k) ) dump( n->in(k) );
else tty->print("_ ");
int any_prec = 0;
for( ; k<n->len(); k++ ) // For all precedence inputs
if( n->in(k) ) {
if( !any_prec++ ) tty->print(" |");
dump( n->in(k) );
}
// Dump node-specific info
n->dump_spec(tty);
tty->print("\n");
}
tty->print("\n");
}
}
#endif
// Combine the live ranges def'd by these 2 Nodes. N2 is an input to N1.
void PhaseCoalesce::combine_these_two(Node *n1, Node *n2) {
uint lr1 = _phc._lrg_map.find(n1);
uint lr2 = _phc._lrg_map.find(n2);
if( lr1 != lr2 && // Different live ranges already AND
!_phc._ifg->test_edge_sq( lr1, lr2 ) ) { // Do not interfere
LRG *lrg1 = &_phc.lrgs(lr1);
LRG *lrg2 = &_phc.lrgs(lr2);
// Not an oop->int cast; oop->oop, int->int, AND int->oop are OK.
// Now, why is int->oop OK? We end up declaring a raw-pointer as an oop
// and in general that's a bad thing. However, int->oop conversions only
// happen at GC points, so the lifetime of the misclassified raw-pointer
// is from the CheckCastPP (that converts it to an oop) backwards up
// through a merge point and into the slow-path call, and around the
// diamond up to the heap-top check and back down into the slow-path call.
// The misclassified raw pointer is NOT live across the slow-path call,
// and so does not appear in any GC info, so the fact that it is
// misclassified is OK.
if( (lrg1->_is_oop || !lrg2->_is_oop) && // not an oop->int cast AND
// Compatible final mask
lrg1->mask().overlap( lrg2->mask() ) ) {
// Merge larger into smaller.
if( lr1 > lr2 ) {
uint tmp = lr1; lr1 = lr2; lr2 = tmp;
Node *n = n1; n1 = n2; n2 = n;
LRG *ltmp = lrg1; lrg1 = lrg2; lrg2 = ltmp;
}
// Union lr2 into lr1
_phc.Union( n1, n2 );
if (lrg1->_maxfreq < lrg2->_maxfreq)
lrg1->_maxfreq = lrg2->_maxfreq;
// Merge in the IFG
_phc._ifg->Union( lr1, lr2 );
// Combine register restrictions
lrg1->AND(lrg2->mask());
}
}
}
// Copy coalescing
void PhaseCoalesce::coalesce_driver() {
verify();
// Coalesce from high frequency to low
for (uint i = 0; i < _phc._cfg.number_of_blocks(); i++) {
coalesce(_phc._blks[i]);
}
}
// I am inserting copies to come out of SSA form. In the general case, I am
// doing a parallel renaming. I'm in the Named world now, so I can't do a
// general parallel renaming. All the copies now use "names" (live-ranges)
// to carry values instead of the explicit use-def chains. Suppose I need to
// insert 2 copies into the same block. They copy L161->L128 and L128->L132.
// If I insert them in the wrong order then L128 will get clobbered before it
// can get used by the second copy. This cannot happen in the SSA model;
// direct use-def chains get me the right value. It DOES happen in the named
// model so I have to handle the reordering of copies.
//
// In general, I need to topo-sort the placed copies to avoid conflicts.
// Its possible to have a closed cycle of copies (e.g., recirculating the same
// values around a loop). In this case I need a temp to break the cycle.
void PhaseAggressiveCoalesce::insert_copy_with_overlap( Block *b, Node *copy, uint dst_name, uint src_name ) {
// Scan backwards for the locations of the last use of the dst_name.
// I am about to clobber the dst_name, so the copy must be inserted
// after the last use. Last use is really first-use on a backwards scan.
uint i = b->end_idx()-1;
while(1) {
Node *n = b->get_node(i);
// Check for end of virtual copies; this is also the end of the
// parallel renaming effort.
if (n->_idx < _unique) {
break;
}
uint idx = n->is_Copy();
assert( idx || n->is_Con() || n->is_MachProj(), "Only copies during parallel renaming" );
if (idx && _phc._lrg_map.find(n->in(idx)) == dst_name) {
break;
}
i--;
}
uint last_use_idx = i;
// Also search for any kill of src_name that exits the block.
// Since the copy uses src_name, I have to come before any kill.
uint kill_src_idx = b->end_idx();
// There can be only 1 kill that exits any block and that is
// the last kill. Thus it is the first kill on a backwards scan.
i = b->end_idx()-1;
while (1) {
Node *n = b->get_node(i);
// Check for end of virtual copies; this is also the end of the
// parallel renaming effort.
if (n->_idx < _unique) {
break;
}
assert( n->is_Copy() || n->is_Con() || n->is_MachProj(), "Only copies during parallel renaming" );
if (_phc._lrg_map.find(n) == src_name) {
kill_src_idx = i;
break;
}
i--;
}
// Need a temp? Last use of dst comes after the kill of src?
if (last_use_idx >= kill_src_idx) {
// Need to break a cycle with a temp
uint idx = copy->is_Copy();
Node *tmp = copy->clone();
uint max_lrg_id = _phc._lrg_map.max_lrg_id();
_phc.new_lrg(tmp, max_lrg_id);
_phc._lrg_map.set_max_lrg_id(max_lrg_id + 1);
// Insert new temp between copy and source
tmp ->set_req(idx,copy->in(idx));
copy->set_req(idx,tmp);
// Save source in temp early, before source is killed
b->insert_node(tmp, kill_src_idx);
_phc._cfg.map_node_to_block(tmp, b);
last_use_idx++;
}
// Insert just after last use
b->insert_node(copy, last_use_idx + 1);
}
void PhaseAggressiveCoalesce::insert_copies( Matcher &matcher ) {
// We do LRGs compressing and fix a liveout data only here since the other
// place in Split() is guarded by the assert which we never hit.
_phc._lrg_map.compress_uf_map_for_nodes();
// Fix block's liveout data for compressed live ranges.
for (uint lrg = 1; lrg < _phc._lrg_map.max_lrg_id(); lrg++) {
uint compressed_lrg = _phc._lrg_map.find(lrg);
if (lrg != compressed_lrg) {
for (uint bidx = 0; bidx < _phc._cfg.number_of_blocks(); bidx++) {
IndexSet *liveout = _phc._live->live(_phc._cfg.get_block(bidx));
if (liveout->member(lrg)) {
liveout->remove(lrg);
liveout->insert(compressed_lrg);
}
}
}
}
// All new nodes added are actual copies to replace virtual copies.
// Nodes with index less than '_unique' are original, non-virtual Nodes.
_unique = C->unique();
for (uint i = 0; i < _phc._cfg.number_of_blocks(); i++) {
C->check_node_count(NodeLimitFudgeFactor, "out of nodes in coalesce");
if (C->failing()) return;
Block *b = _phc._cfg.get_block(i);
uint cnt = b->num_preds(); // Number of inputs to the Phi
for( uint l = 1; l<b->number_of_nodes(); l++ ) {
Node *n = b->get_node(l);
// Do not use removed-copies, use copied value instead
uint ncnt = n->req();
for( uint k = 1; k<ncnt; k++ ) {
Node *copy = n->in(k);
uint cidx = copy->is_Copy();
if( cidx ) {
Node *def = copy->in(cidx);
if (_phc._lrg_map.find(copy) == _phc._lrg_map.find(def)) {
n->set_req(k, def);
}
}
}
// Remove any explicit copies that get coalesced.
uint cidx = n->is_Copy();
if( cidx ) {
Node *def = n->in(cidx);
if (_phc._lrg_map.find(n) == _phc._lrg_map.find(def)) {
n->replace_by(def);
n->set_req(cidx,NULL);
b->remove_node(l);
l--;
continue;
}
}
if (n->is_Phi()) {
// Get the chosen name for the Phi
uint phi_name = _phc._lrg_map.find(n);
// Ignore the pre-allocated specials
if (!phi_name) {
continue;
}
// Check for mismatch inputs to Phi
for (uint j = 1; j < cnt; j++) {
Node *m = n->in(j);
uint src_name = _phc._lrg_map.find(m);
if (src_name != phi_name) {
Block *pred = _phc._cfg.get_block_for_node(b->pred(j));
Node *copy;
assert(!m->is_Con() || m->is_Mach(), "all Con must be Mach");
// Rematerialize constants instead of copying them.
// We do this only for immediate constants, we avoid constant table loads
// because that will unsafely extend the live range of the constant table base.
if (m->is_Mach() && m->as_Mach()->is_Con() && !m->as_Mach()->is_MachConstant() &&
m->as_Mach()->rematerialize()) {
copy = m->clone();
// Insert the copy in the predecessor basic block
pred->add_inst(copy);
// Copy any flags as well
_phc.clone_projs(pred, pred->end_idx(), m, copy, _phc._lrg_map);
} else {
int ireg = m->ideal_reg();
if (ireg == 0 || ireg == Op_RegFlags) {
if (C->subsume_loads()) {
C->record_failure(C2Compiler::retry_no_subsuming_loads());
} else {
assert(false, err_msg("attempted to spill a non-spillable item: %d: %s, ireg = %d",
m->_idx, m->Name(), ireg));
C->record_method_not_compilable("attempted to spill a non-spillable item");
}
return;
}
const RegMask *rm = C->matcher()->idealreg2spillmask[ireg];
copy = new (C) MachSpillCopyNode(m, *rm, *rm);
// Find a good place to insert. Kinda tricky, use a subroutine
insert_copy_with_overlap(pred,copy,phi_name,src_name);
}
// Insert the copy in the use-def chain
n->set_req(j, copy);
_phc._cfg.map_node_to_block(copy, pred);
// Extend ("register allocate") the names array for the copy.
_phc._lrg_map.extend(copy->_idx, phi_name);
} // End of if Phi names do not match
} // End of for all inputs to Phi
} else { // End of if Phi
// Now check for 2-address instructions
uint idx;
if( n->is_Mach() && (idx=n->as_Mach()->two_adr()) ) {
// Get the chosen name for the Node
uint name = _phc._lrg_map.find(n);
assert (name, "no 2-address specials");
// Check for name mis-match on the 2-address input
Node *m = n->in(idx);
if (_phc._lrg_map.find(m) != name) {
Node *copy;
assert(!m->is_Con() || m->is_Mach(), "all Con must be Mach");
// At this point it is unsafe to extend live ranges (6550579).
// Rematerialize only constants as we do for Phi above.
if (m->is_Mach() && m->as_Mach()->is_Con() && !m->as_Mach()->is_MachConstant() &&
m->as_Mach()->rematerialize()) {
copy = m->clone();
// Insert the copy in the basic block, just before us
b->insert_node(copy, l++);
l += _phc.clone_projs(b, l, m, copy, _phc._lrg_map);
} else {
int ireg = m->ideal_reg();
if (ireg == 0 || ireg == Op_RegFlags) {
assert(false, err_msg("attempted to spill a non-spillable item: %d: %s, ireg = %d",
m->_idx, m->Name(), ireg));
C->record_method_not_compilable("attempted to spill a non-spillable item");
return;
}
const RegMask *rm = C->matcher()->idealreg2spillmask[ireg];
copy = new (C) MachSpillCopyNode(m, *rm, *rm);
// Insert the copy in the basic block, just before us
b->insert_node(copy, l++);
}
// Insert the copy in the use-def chain
n->set_req(idx, copy);
// Extend ("register allocate") the names array for the copy.
_phc._lrg_map.extend(copy->_idx, name);
_phc._cfg.map_node_to_block(copy, b);
}
} // End of is two-adr
// Insert a copy at a debug use for a lrg which has high frequency
if (b->_freq < OPTO_DEBUG_SPLIT_FREQ || _phc._cfg.is_uncommon(b)) {
// Walk the debug inputs to the node and check for lrg freq
JVMState* jvms = n->jvms();
uint debug_start = jvms ? jvms->debug_start() : 999999;
uint debug_end = jvms ? jvms->debug_end() : 999999;
for(uint inpidx = debug_start; inpidx < debug_end; inpidx++) {
// Do not split monitors; they are only needed for debug table
// entries and need no code.
if (jvms->is_monitor_use(inpidx)) {
continue;
}
Node *inp = n->in(inpidx);
uint nidx = _phc._lrg_map.live_range_id(inp);
LRG &lrg = lrgs(nidx);
// If this lrg has a high frequency use/def
if( lrg._maxfreq >= _phc.high_frequency_lrg() ) {
// If the live range is also live out of this block (like it
// would be for a fast/slow idiom), the normal spill mechanism
// does an excellent job. If it is not live out of this block
// (like it would be for debug info to uncommon trap) splitting
// the live range now allows a better allocation in the high
// frequency blocks.
// Build_IFG_virtual has converted the live sets to
// live-IN info, not live-OUT info.
uint k;
for( k=0; k < b->_num_succs; k++ )
if( _phc._live->live(b->_succs[k])->member( nidx ) )
break; // Live in to some successor block?
if( k < b->_num_succs )
continue; // Live out; do not pre-split
// Split the lrg at this use
int ireg = inp->ideal_reg();
if (ireg == 0 || ireg == Op_RegFlags) {
assert(false, err_msg("attempted to spill a non-spillable item: %d: %s, ireg = %d",
inp->_idx, inp->Name(), ireg));
C->record_method_not_compilable("attempted to spill a non-spillable item");
return;
}
const RegMask *rm = C->matcher()->idealreg2spillmask[ireg];
Node *copy = new (C) MachSpillCopyNode( inp, *rm, *rm );
// Insert the copy in the use-def chain
n->set_req(inpidx, copy );
// Insert the copy in the basic block, just before us
b->insert_node(copy, l++);
// Extend ("register allocate") the names array for the copy.
uint max_lrg_id = _phc._lrg_map.max_lrg_id();
_phc.new_lrg(copy, max_lrg_id);
_phc._lrg_map.set_max_lrg_id(max_lrg_id + 1);
_phc._cfg.map_node_to_block(copy, b);
//tty->print_cr("Split a debug use in Aggressive Coalesce");
} // End of if high frequency use/def
} // End of for all debug inputs
} // End of if low frequency safepoint
} // End of if Phi
} // End of for all instructions
} // End of for all blocks
}
// Aggressive (but pessimistic) copy coalescing of a single block
// The following coalesce pass represents a single round of aggressive
// pessimistic coalesce. "Aggressive" means no attempt to preserve
// colorability when coalescing. This occasionally means more spills, but
// it also means fewer rounds of coalescing for better code - and that means
// faster compiles.
// "Pessimistic" means we do not hit the fixed point in one pass (and we are
// reaching for the least fixed point to boot). This is typically solved
// with a few more rounds of coalescing, but the compiler must run fast. We
// could optimistically coalescing everything touching PhiNodes together
// into one big live range, then check for self-interference. Everywhere
// the live range interferes with self it would have to be split. Finding
// the right split points can be done with some heuristics (based on
// expected frequency of edges in the live range). In short, it's a real
// research problem and the timeline is too short to allow such research.
// Further thoughts: (1) build the LR in a pass, (2) find self-interference
// in another pass, (3) per each self-conflict, split, (4) split by finding
// the low-cost cut (min-cut) of the LR, (5) edges in the LR are weighted
// according to the GCM algorithm (or just exec freq on CFG edges).
void PhaseAggressiveCoalesce::coalesce( Block *b ) {
// Copies are still "virtual" - meaning we have not made them explicitly
// copies. Instead, Phi functions of successor blocks have mis-matched
// live-ranges. If I fail to coalesce, I'll have to insert a copy to line
// up the live-ranges. Check for Phis in successor blocks.
uint i;
for( i=0; i<b->_num_succs; i++ ) {
Block *bs = b->_succs[i];
// Find index of 'b' in 'bs' predecessors
uint j=1;
while (_phc._cfg.get_block_for_node(bs->pred(j)) != b) {
j++;
}
// Visit all the Phis in successor block
for( uint k = 1; k<bs->number_of_nodes(); k++ ) {
Node *n = bs->get_node(k);
if( !n->is_Phi() ) break;
combine_these_two( n, n->in(j) );
}
} // End of for all successor blocks
// Check _this_ block for 2-address instructions and copies.
uint cnt = b->end_idx();
for( i = 1; i<cnt; i++ ) {
Node *n = b->get_node(i);
uint idx;
// 2-address instructions have a virtual Copy matching their input
// to their output
if (n->is_Mach() && (idx = n->as_Mach()->two_adr())) {
MachNode *mach = n->as_Mach();
combine_these_two(mach, mach->in(idx));
}
} // End of for all instructions in block
}
PhaseConservativeCoalesce::PhaseConservativeCoalesce(PhaseChaitin &chaitin) : PhaseCoalesce(chaitin) {
_ulr.initialize(_phc._lrg_map.max_lrg_id());
}
void PhaseConservativeCoalesce::verify() {
#ifdef ASSERT
_phc.set_was_low();
#endif
}
void PhaseConservativeCoalesce::union_helper( Node *lr1_node, Node *lr2_node, uint lr1, uint lr2, Node *src_def, Node *dst_copy, Node *src_copy, Block *b, uint bindex ) {
// Join live ranges. Merge larger into smaller. Union lr2 into lr1 in the
// union-find tree
_phc.Union( lr1_node, lr2_node );
// Single-def live range ONLY if both live ranges are single-def.
// If both are single def, then src_def powers one live range
// and def_copy powers the other. After merging, src_def powers
// the combined live range.
lrgs(lr1)._def = (lrgs(lr1).is_multidef() ||
lrgs(lr2).is_multidef() )
? NodeSentinel : src_def;
lrgs(lr2)._def = NULL; // No def for lrg 2
lrgs(lr2).Clear(); // Force empty mask for LRG 2
//lrgs(lr2)._size = 0; // Live-range 2 goes dead
lrgs(lr1)._is_oop |= lrgs(lr2)._is_oop;
lrgs(lr2)._is_oop = 0; // In particular, not an oop for GC info
if (lrgs(lr1)._maxfreq < lrgs(lr2)._maxfreq)
lrgs(lr1)._maxfreq = lrgs(lr2)._maxfreq;
// Copy original value instead. Intermediate copies go dead, and
// the dst_copy becomes useless.
int didx = dst_copy->is_Copy();
dst_copy->set_req( didx, src_def );
// Add copy to free list
// _phc.free_spillcopy(b->_nodes[bindex]);
assert( b->get_node(bindex) == dst_copy, "" );
dst_copy->replace_by( dst_copy->in(didx) );
dst_copy->set_req( didx, NULL);
b->remove_node(bindex);
if( bindex < b->_ihrp_index ) b->_ihrp_index--;
if( bindex < b->_fhrp_index ) b->_fhrp_index--;
// Stretched lr1; add it to liveness of intermediate blocks
Block *b2 = _phc._cfg.get_block_for_node(src_copy);
while( b != b2 ) {
b = _phc._cfg.get_block_for_node(b->pred(1));
_phc._live->live(b)->insert(lr1);
}
}
// Factored code from copy_copy that computes extra interferences from
// lengthening a live range by double-coalescing.
uint PhaseConservativeCoalesce::compute_separating_interferences(Node *dst_copy, Node *src_copy, Block *b, uint bindex, RegMask &rm, uint reg_degree, uint rm_size, uint lr1, uint lr2 ) {
assert(!lrgs(lr1)._fat_proj, "cannot coalesce fat_proj");
assert(!lrgs(lr2)._fat_proj, "cannot coalesce fat_proj");
Node *prev_copy = dst_copy->in(dst_copy->is_Copy());
Block *b2 = b;
uint bindex2 = bindex;
while( 1 ) {
// Find previous instruction
bindex2--; // Chain backwards 1 instruction
while( bindex2 == 0 ) { // At block start, find prior block
assert( b2->num_preds() == 2, "cannot double coalesce across c-flow" );
b2 = _phc._cfg.get_block_for_node(b2->pred(1));
bindex2 = b2->end_idx()-1;
}
// Get prior instruction
assert(bindex2 < b2->number_of_nodes(), "index out of bounds");
Node *x = b2->get_node(bindex2);
if( x == prev_copy ) { // Previous copy in copy chain?
if( prev_copy == src_copy)// Found end of chain and all interferences
break; // So break out of loop
// Else work back one in copy chain
prev_copy = prev_copy->in(prev_copy->is_Copy());
} else { // Else collect interferences
uint lidx = _phc._lrg_map.find(x);
// Found another def of live-range being stretched?
if(lidx == lr1) {
return max_juint;
}
if(lidx == lr2) {
return max_juint;
}
// If we attempt to coalesce across a bound def
if( lrgs(lidx).is_bound() ) {
// Do not let the coalesced LRG expect to get the bound color
rm.SUBTRACT( lrgs(lidx).mask() );
// Recompute rm_size
rm_size = rm.Size();
//if( rm._flags ) rm_size += 1000000;
if( reg_degree >= rm_size ) return max_juint;
}
if( rm.overlap(lrgs(lidx).mask()) ) {
// Insert lidx into union LRG; returns TRUE if actually inserted
if( _ulr.insert(lidx) ) {
// Infinite-stack neighbors do not alter colorability, as they
// can always color to some other color.
if( !lrgs(lidx).mask().is_AllStack() ) {
// If this coalesce will make any new neighbor uncolorable,
// do not coalesce.
if( lrgs(lidx).just_lo_degree() )
return max_juint;
// Bump our degree
if( ++reg_degree >= rm_size )
return max_juint;
} // End of if not infinite-stack neighbor
} // End of if actually inserted
} // End of if live range overlaps
} // End of else collect interferences for 1 node
} // End of while forever, scan back for interferences
return reg_degree;
}
void PhaseConservativeCoalesce::update_ifg(uint lr1, uint lr2, IndexSet *n_lr1, IndexSet *n_lr2) {
// Some original neighbors of lr1 might have gone away
// because the constrained register mask prevented them.
// Remove lr1 from such neighbors.
IndexSetIterator one(n_lr1);
uint neighbor;
LRG &lrg1 = lrgs(lr1);
while ((neighbor = one.next()) != 0)
if( !_ulr.member(neighbor) )
if( _phc._ifg->neighbors(neighbor)->remove(lr1) )
lrgs(neighbor).inc_degree( -lrg1.compute_degree(lrgs(neighbor)) );
// lr2 is now called (coalesced into) lr1.
// Remove lr2 from the IFG.
IndexSetIterator two(n_lr2);
LRG &lrg2 = lrgs(lr2);
while ((neighbor = two.next()) != 0)
if( _phc._ifg->neighbors(neighbor)->remove(lr2) )
lrgs(neighbor).inc_degree( -lrg2.compute_degree(lrgs(neighbor)) );
// Some neighbors of intermediate copies now interfere with the
// combined live range.
IndexSetIterator three(&_ulr);
while ((neighbor = three.next()) != 0)
if( _phc._ifg->neighbors(neighbor)->insert(lr1) )
lrgs(neighbor).inc_degree( lrg1.compute_degree(lrgs(neighbor)) );
}
static void record_bias( const PhaseIFG *ifg, int lr1, int lr2 ) {
// Tag copy bias here
if( !ifg->lrgs(lr1)._copy_bias )
ifg->lrgs(lr1)._copy_bias = lr2;
if( !ifg->lrgs(lr2)._copy_bias )
ifg->lrgs(lr2)._copy_bias = lr1;
}
// See if I can coalesce a series of multiple copies together. I need the
// final dest copy and the original src copy. They can be the same Node.
// Compute the compatible register masks.
bool PhaseConservativeCoalesce::copy_copy(Node *dst_copy, Node *src_copy, Block *b, uint bindex) {
if (!dst_copy->is_SpillCopy()) {
return false;
}
if (!src_copy->is_SpillCopy()) {
return false;
}
Node *src_def = src_copy->in(src_copy->is_Copy());
uint lr1 = _phc._lrg_map.find(dst_copy);
uint lr2 = _phc._lrg_map.find(src_def);
// Same live ranges already?
if (lr1 == lr2) {
return false;
}
// Interfere?
if (_phc._ifg->test_edge_sq(lr1, lr2)) {
return false;
}
// Not an oop->int cast; oop->oop, int->int, AND int->oop are OK.
if (!lrgs(lr1)._is_oop && lrgs(lr2)._is_oop) { // not an oop->int cast
return false;
}
// Coalescing between an aligned live range and a mis-aligned live range?
// No, no! Alignment changes how we count degree.
if (lrgs(lr1)._fat_proj != lrgs(lr2)._fat_proj) {
return false;
}
// Sort; use smaller live-range number
Node *lr1_node = dst_copy;
Node *lr2_node = src_def;
if (lr1 > lr2) {
uint tmp = lr1; lr1 = lr2; lr2 = tmp;
lr1_node = src_def; lr2_node = dst_copy;
}
// Check for compatibility of the 2 live ranges by
// intersecting their allowed register sets.
RegMask rm = lrgs(lr1).mask();
rm.AND(lrgs(lr2).mask());
// Number of bits free
uint rm_size = rm.Size();
if (UseFPUForSpilling && rm.is_AllStack() ) {
// Don't coalesce when frequency difference is large
Block *dst_b = _phc._cfg.get_block_for_node(dst_copy);
Block *src_def_b = _phc._cfg.get_block_for_node(src_def);
if (src_def_b->_freq > 10*dst_b->_freq )
return false;
}
// If we can use any stack slot, then effective size is infinite
if( rm.is_AllStack() ) rm_size += 1000000;
// Incompatible masks, no way to coalesce
if( rm_size == 0 ) return false;
// Another early bail-out test is when we are double-coalescing and the
// 2 copies are separated by some control flow.
if( dst_copy != src_copy ) {
Block *src_b = _phc._cfg.get_block_for_node(src_copy);
Block *b2 = b;
while( b2 != src_b ) {
if( b2->num_preds() > 2 ){// Found merge-point
_phc._lost_opp_cflow_coalesce++;
// extra record_bias commented out because Chris believes it is not
// productive. Since we can record only 1 bias, we want to choose one
// that stands a chance of working and this one probably does not.
//record_bias( _phc._lrgs, lr1, lr2 );
return false; // To hard to find all interferences
}
b2 = _phc._cfg.get_block_for_node(b2->pred(1));
}
}
// Union the two interference sets together into '_ulr'
uint reg_degree = _ulr.lrg_union( lr1, lr2, rm_size, _phc._ifg, rm );
if( reg_degree >= rm_size ) {
record_bias( _phc._ifg, lr1, lr2 );
return false;
}
// Now I need to compute all the interferences between dst_copy and
// src_copy. I'm not willing visit the entire interference graph, so
// I limit my search to things in dst_copy's block or in a straight
// line of previous blocks. I give up at merge points or when I get
// more interferences than my degree. I can stop when I find src_copy.
if( dst_copy != src_copy ) {
reg_degree = compute_separating_interferences(dst_copy, src_copy, b, bindex, rm, rm_size, reg_degree, lr1, lr2 );
if( reg_degree == max_juint ) {
record_bias( _phc._ifg, lr1, lr2 );
return false;
}
} // End of if dst_copy & src_copy are different
// ---- THE COMBINED LRG IS COLORABLE ----
// YEAH - Now coalesce this copy away
assert( lrgs(lr1).num_regs() == lrgs(lr2).num_regs(), "" );
IndexSet *n_lr1 = _phc._ifg->neighbors(lr1);
IndexSet *n_lr2 = _phc._ifg->neighbors(lr2);
// Update the interference graph
update_ifg(lr1, lr2, n_lr1, n_lr2);
_ulr.remove(lr1);
// Uncomment the following code to trace Coalescing in great detail.
//
//if (false) {
// tty->cr();
// tty->print_cr("#######################################");
// tty->print_cr("union %d and %d", lr1, lr2);
// n_lr1->dump();
// n_lr2->dump();
// tty->print_cr("resulting set is");
// _ulr.dump();
//}
// Replace n_lr1 with the new combined live range. _ulr will use
// n_lr1's old memory on the next iteration. n_lr2 is cleared to
// send its internal memory to the free list.
_ulr.swap(n_lr1);
_ulr.clear();
n_lr2->clear();
lrgs(lr1).set_degree( _phc._ifg->effective_degree(lr1) );
lrgs(lr2).set_degree( 0 );
// Join live ranges. Merge larger into smaller. Union lr2 into lr1 in the
// union-find tree
union_helper( lr1_node, lr2_node, lr1, lr2, src_def, dst_copy, src_copy, b, bindex );
// Combine register restrictions
lrgs(lr1).set_mask(rm);
lrgs(lr1).compute_set_mask_size();
lrgs(lr1)._cost += lrgs(lr2)._cost;
lrgs(lr1)._area += lrgs(lr2)._area;
// While its uncommon to successfully coalesce live ranges that started out
// being not-lo-degree, it can happen. In any case the combined coalesced
// live range better Simplify nicely.
lrgs(lr1)._was_lo = 1;
// kinda expensive to do all the time
//tty->print_cr("warning: slow verify happening");
//_phc._ifg->verify( &_phc );
return true;
}
// Conservative (but pessimistic) copy coalescing of a single block
void PhaseConservativeCoalesce::coalesce( Block *b ) {
// Bail out on infrequent blocks
if (_phc._cfg.is_uncommon(b)) {
return;
}
// Check this block for copies.
for( uint i = 1; i<b->end_idx(); i++ ) {
// Check for actual copies on inputs. Coalesce a copy into its
// input if use and copy's input are compatible.
Node *copy1 = b->get_node(i);
uint idx1 = copy1->is_Copy();
if( !idx1 ) continue; // Not a copy
if( copy_copy(copy1,copy1,b,i) ) {
i--; // Retry, same location in block
PhaseChaitin::_conserv_coalesce++; // Collect stats on success
continue;
}
}
}
C:\hotspot-69087d08d473\src\share\vm/opto/coalesce.hpp
/*
* Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
#ifndef SHARE_VM_OPTO_COALESCE_HPP
#define SHARE_VM_OPTO_COALESCE_HPP
#include "opto/phase.hpp"
class LoopTree;
class LRG;
class Matcher;
class PhaseIFG;
class PhaseCFG;
//------------------------------PhaseCoalesce----------------------------------
class PhaseCoalesce : public Phase {
protected:
PhaseChaitin &_phc;
public:
// Coalesce copies
PhaseCoalesce(PhaseChaitin &phc)
: Phase(Coalesce)
, _phc(phc) {}
virtual void verify() = 0;
// Coalesce copies
void coalesce_driver();
// Coalesce copies in this block
virtual void coalesce(Block *b) = 0;
// Attempt to coalesce live ranges defined by these 2
void combine_these_two(Node *n1, Node *n2);
LRG &lrgs(uint lidx) { return _phc.lrgs(lidx); }
#ifndef PRODUCT
// Dump internally name
void dump(Node *n) const;
// Dump whole shebang
void dump() const;
#endif
};
//------------------------------PhaseAggressiveCoalesce------------------------
// Aggressively, pessimistic coalesce copies. Aggressive means ignore graph
// colorability; perhaps coalescing to the point of forcing a spill.
// Pessimistic means we cannot coalesce if 2 live ranges interfere. This
// implies we do not hit a fixed point right away.
class PhaseAggressiveCoalesce : public PhaseCoalesce {
uint _unique;
public:
// Coalesce copies
PhaseAggressiveCoalesce( PhaseChaitin &chaitin ) : PhaseCoalesce(chaitin) {}
virtual void verify() { };
// Aggressively coalesce copies in this block
virtual void coalesce( Block *b );
// Where I fail to coalesce, manifest virtual copies as the Real Thing
void insert_copies( Matcher &matcher );
// Copy insertion needs some smarts in case live ranges overlap
void insert_copy_with_overlap( Block *b, Node *copy, uint dst_name, uint src_name );
};
//------------------------------PhaseConservativeCoalesce----------------------
// Conservatively, pessimistic coalesce copies. Conservative means do not
// coalesce if the resultant live range will be uncolorable. Pessimistic
// means we cannot coalesce if 2 live ranges interfere. This implies we do
// not hit a fixed point right away.
class PhaseConservativeCoalesce : public PhaseCoalesce {
IndexSet _ulr; // Union live range interferences
public:
// Coalesce copies
PhaseConservativeCoalesce( PhaseChaitin &chaitin );
virtual void verify();
// Conservatively coalesce copies in this block
virtual void coalesce( Block *b );
// Coalesce this chain of copies away
bool copy_copy( Node *dst_copy, Node *src_copy, Block *b, uint bindex );
void union_helper( Node *lr1_node, Node *lr2_node, uint lr1, uint lr2, Node *src_def, Node *dst_copy, Node *src_copy, Block *b, uint bindex );
uint compute_separating_interferences(Node *dst_copy, Node *src_copy, Block *b, uint bindex, RegMask &rm, uint rm_size, uint reg_degree, uint lr1, uint lr2);
void update_ifg(uint lr1, uint lr2, IndexSet *n_lr1, IndexSet *n_lr2);
};
#endif // SHARE_VM_OPTO_COALESCE_HPP
C:\hotspot-69087d08d473\src\share\vm/opto/compile.cpp
/*
* Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
#include "precompiled.hpp"
#include "asm/macroAssembler.hpp"
#include "asm/macroAssembler.inline.hpp"
#include "ci/ciReplay.hpp"
#include "classfile/systemDictionary.hpp"
#include "code/exceptionHandlerTable.hpp"
#include "code/nmethod.hpp"
#include "compiler/compileLog.hpp"
#include "compiler/disassembler.hpp"
#include "compiler/oopMap.hpp"
#include "jfr/jfrEvents.hpp"
#include "opto/addnode.hpp"
#include "opto/block.hpp"
#include "opto/c2compiler.hpp"
#include "opto/callGenerator.hpp"
#include "opto/callnode.hpp"
#include "opto/cfgnode.hpp"
#include "opto/chaitin.hpp"
#include "opto/compile.hpp"
#include "opto/connode.hpp"
#include "opto/divnode.hpp"
#include "opto/escape.hpp"
#include "opto/idealGraphPrinter.hpp"
#include "opto/loopnode.hpp"
#include "opto/machnode.hpp"
#include "opto/macro.hpp"
#include "opto/matcher.hpp"
#include "opto/mathexactnode.hpp"
#include "opto/memnode.hpp"
#include "opto/mulnode.hpp"
#include "opto/node.hpp"
#include "opto/opcodes.hpp"
#include "opto/output.hpp"
#include "opto/parse.hpp"
#include "opto/phaseX.hpp"
#include "opto/rootnode.hpp"
#include "opto/runtime.hpp"
#include "opto/stringopts.hpp"
#include "opto/type.hpp"
#include "opto/vectornode.hpp"
#include "runtime/arguments.hpp"
#include "runtime/signature.hpp"
#include "runtime/stubRoutines.hpp"
#include "runtime/timer.hpp"
#include "utilities/copy.hpp"
#if defined AD_MD_HPP
# include AD_MD_HPP
#elif defined TARGET_ARCH_MODEL_x86_32
# include "adfiles/ad_x86_32.hpp"
#elif defined TARGET_ARCH_MODEL_x86_64
# include "adfiles/ad_x86_64.hpp"
#elif defined TARGET_ARCH_MODEL_aarch64
# include "adfiles/ad_aarch64.hpp"
#elif defined TARGET_ARCH_MODEL_sparc
# include "adfiles/ad_sparc.hpp"
#elif defined TARGET_ARCH_MODEL_zero
# include "adfiles/ad_zero.hpp"
#elif defined TARGET_ARCH_MODEL_ppc_64
# include "adfiles/ad_ppc_64.hpp"
#endif
// -------------------- Compile::mach_constant_base_node -----------------------
// Constant table base node singleton.
MachConstantBaseNode* Compile::mach_constant_base_node() {
if (_mach_constant_base_node == NULL) {
_mach_constant_base_node = new (C) MachConstantBaseNode();
_mach_constant_base_node->add_req(C->root());
}
return _mach_constant_base_node;
}
/// Support for intrinsics.
// Return the index at which m must be inserted (or already exists).
// The sort order is by the address of the ciMethod, with is_virtual as minor key.
int Compile::intrinsic_insertion_index(ciMethod* m, bool is_virtual) {
#ifdef ASSERT
for (int i = 1; i < _intrinsics->length(); i++) {
CallGenerator* cg1 = _intrinsics->at(i-1);
CallGenerator* cg2 = _intrinsics->at(i);
assert(cg1->method() != cg2->method()
? cg1->method() < cg2->method()
: cg1->is_virtual() < cg2->is_virtual(),
"compiler intrinsics list must stay sorted");
}
#endif
// Binary search sorted list, in decreasing intervals [lo, hi].
int lo = 0, hi = _intrinsics->length()-1;
while (lo <= hi) {
int mid = (uint)(hi + lo) / 2;
ciMethod* mid_m = _intrinsics->at(mid)->method();
if (m < mid_m) {
hi = mid-1;
} else if (m > mid_m) {
lo = mid+1;
} else {
// look at minor sort key
bool mid_virt = _intrinsics->at(mid)->is_virtual();
if (is_virtual < mid_virt) {
hi = mid-1;
} else if (is_virtual > mid_virt) {
lo = mid+1;
} else {
return mid; // exact match
}
}
}
return lo; // inexact match
}
void Compile::register_intrinsic(CallGenerator* cg) {
if (_intrinsics == NULL) {
_intrinsics = new (comp_arena())GrowableArray<CallGenerator*>(comp_arena(), 60, 0, NULL);
}
// This code is stolen from ciObjectFactory::insert.
// Really, GrowableArray should have methods for
// insert_at, remove_at, and binary_search.
int len = _intrinsics->length();
int index = intrinsic_insertion_index(cg->method(), cg->is_virtual());
if (index == len) {
_intrinsics->append(cg);
} else {
#ifdef ASSERT
CallGenerator* oldcg = _intrinsics->at(index);
assert(oldcg->method() != cg->method() || oldcg->is_virtual() != cg->is_virtual(), "don't register twice");
#endif
_intrinsics->append(_intrinsics->at(len-1));
int pos;
for (pos = len-2; pos >= index; pos--) {
_intrinsics->at_put(pos+1,_intrinsics->at(pos));
}
_intrinsics->at_put(index, cg);
}
assert(find_intrinsic(cg->method(), cg->is_virtual()) == cg, "registration worked");
}
CallGenerator* Compile::find_intrinsic(ciMethod* m, bool is_virtual) {
assert(m->is_loaded(), "don't try this on unloaded methods");
if (_intrinsics != NULL) {
int index = intrinsic_insertion_index(m, is_virtual);
if (index < _intrinsics->length()
&& _intrinsics->at(index)->method() == m
&& _intrinsics->at(index)->is_virtual() == is_virtual) {
return _intrinsics->at(index);
}
}
// Lazily create intrinsics for intrinsic IDs well-known in the runtime.
if (m->intrinsic_id() != vmIntrinsics::_none &&
m->intrinsic_id() <= vmIntrinsics::LAST_COMPILER_INLINE) {
CallGenerator* cg = make_vm_intrinsic(m, is_virtual);
if (cg != NULL) {
// Save it for next time:
register_intrinsic(cg);
return cg;
} else {
gather_intrinsic_statistics(m->intrinsic_id(), is_virtual, _intrinsic_disabled);
}
}
return NULL;
}
// Compile:: register_library_intrinsics and make_vm_intrinsic are defined
// in library_call.cpp.
#ifndef PRODUCT
// statistics gathering...
juint Compile::_intrinsic_hist_count[vmIntrinsics::ID_LIMIT] = {0};
jubyte Compile::_intrinsic_hist_flags[vmIntrinsics::ID_LIMIT] = {0};
bool Compile::gather_intrinsic_statistics(vmIntrinsics::ID id, bool is_virtual, int flags) {
assert(id > vmIntrinsics::_none && id < vmIntrinsics::ID_LIMIT, "oob");
int oflags = _intrinsic_hist_flags[id];
assert(flags != 0, "what happened?");
if (is_virtual) {
flags |= _intrinsic_virtual;
}
bool changed = (flags != oflags);
if ((flags & _intrinsic_worked) != 0) {
juint count = (_intrinsic_hist_count[id] += 1);
if (count == 1) {
changed = true; // first time
}
// increment the overall count also:
_intrinsic_hist_count[vmIntrinsics::_none] += 1;
}
if (changed) {
if (((oflags ^ flags) & _intrinsic_virtual) != 0) {
// Something changed about the intrinsic's virtuality.
if ((flags & _intrinsic_virtual) != 0) {
// This is the first use of this intrinsic as a virtual call.
if (oflags != 0) {
// We already saw it as a non-virtual, so note both cases.
flags |= _intrinsic_both;
}
} else if ((oflags & _intrinsic_both) == 0) {
// This is the first use of this intrinsic as a non-virtual
flags |= _intrinsic_both;
}
}
_intrinsic_hist_flags[id] = (jubyte) (oflags | flags);
}
// update the overall flags also:
_intrinsic_hist_flags[vmIntrinsics::_none] |= (jubyte) flags;
return changed;
}
static char* format_flags(int flags, char* buf) {
buf[0] = 0;
if ((flags & Compile::_intrinsic_worked) != 0) strcat(buf, ",worked");
if ((flags & Compile::_intrinsic_failed) != 0) strcat(buf, ",failed");
if ((flags & Compile::_intrinsic_disabled) != 0) strcat(buf, ",disabled");
if ((flags & Compile::_intrinsic_virtual) != 0) strcat(buf, ",virtual");
if ((flags & Compile::_intrinsic_both) != 0) strcat(buf, ",nonvirtual");
if (buf[0] == 0) strcat(buf, ",");
assert(buf[0] == ',', "must be");
return &buf[1];
}
void Compile::print_intrinsic_statistics() {
char flagsbuf[100];
ttyLocker ttyl;
if (xtty != NULL) xtty->head("statistics type='intrinsic'");
tty->print_cr("Compiler intrinsic usage:");
juint total = _intrinsic_hist_count[vmIntrinsics::_none];
if (total == 0) total = 1; // avoid div0 in case of no successes
#define PRINT_STAT_LINE(name, c, f) \
tty->print_cr(" %4d (%4.1f%%) %s (%s)", (int)(c), ((c) * 100.0) / total, name, f);
for (int index = 1 + (int)vmIntrinsics::_none; index < (int)vmIntrinsics::ID_LIMIT; index++) {
vmIntrinsics::ID id = (vmIntrinsics::ID) index;
int flags = _intrinsic_hist_flags[id];
juint count = _intrinsic_hist_count[id];
if ((flags | count) != 0) {
PRINT_STAT_LINE(vmIntrinsics::name_at(id), count, format_flags(flags, flagsbuf));
}
}
PRINT_STAT_LINE("total", total, format_flags(_intrinsic_hist_flags[vmIntrinsics::_none], flagsbuf));
if (xtty != NULL) xtty->tail("statistics");
}
void Compile::print_statistics() {
{ ttyLocker ttyl;
if (xtty != NULL) xtty->head("statistics type='opto'");
Parse::print_statistics();
PhaseCCP::print_statistics();
PhaseRegAlloc::print_statistics();
Scheduling::print_statistics();
PhasePeephole::print_statistics();
PhaseIdealLoop::print_statistics();
if (xtty != NULL) xtty->tail("statistics");
}
if (_intrinsic_hist_flags[vmIntrinsics::_none] != 0) {
// put this under its own <statistics> element.
print_intrinsic_statistics();
}
}
#endif //PRODUCT
// Support for bundling info
Bundle* Compile::node_bundling(const Node *n) {
assert(valid_bundle_info(n), "oob");
return &_node_bundling_base[n->_idx];
}
bool Compile::valid_bundle_info(const Node *n) {
return (_node_bundling_limit > n->_idx);
}
void Compile::gvn_replace_by(Node* n, Node* nn) {
for (DUIterator_Last imin, i = n->last_outs(imin); i >= imin; ) {
Node* use = n->last_out(i);
bool is_in_table = initial_gvn()->hash_delete(use);
uint uses_found = 0;
for (uint j = 0; j < use->len(); j++) {
if (use->in(j) == n) {
if (j < use->req())
use->set_req(j, nn);
else
use->set_prec(j, nn);
uses_found++;
}
}
if (is_in_table) {
// reinsert into table
initial_gvn()->hash_find_insert(use);
}
record_for_igvn(use);
i -= uses_found; // we deleted 1 or more copies of this edge
}
}
static inline bool not_a_node(const Node* n) {
if (n == NULL) return true;
if (((intptr_t)n & 1) != 0) return true; // uninitialized, etc.
if (*(address*)n == badAddress) return true; // kill by Node::destruct
return false;
}
// Identify all nodes that are reachable from below, useful.
// Use breadth-first pass that records state in a Unique_Node_List,
// recursive traversal is slower.
void Compile::identify_useful_nodes(Unique_Node_List &useful) {
int estimated_worklist_size = live_nodes();
useful.map( estimated_worklist_size, NULL ); // preallocate space
// Initialize worklist
if (root() != NULL) { useful.push(root()); }
// If 'top' is cached, declare it useful to preserve cached node
if( cached_top_node() ) { useful.push(cached_top_node()); }
// Push all useful nodes onto the list, breadthfirst
for( uint next = 0; next < useful.size(); ++next ) {
assert( next < unique(), "Unique useful nodes < total nodes");
Node *n = useful.at(next);
uint max = n->len();
for( uint i = 0; i < max; ++i ) {
Node *m = n->in(i);
if (not_a_node(m)) continue;
useful.push(m);
}
}
}
// Update dead_node_list with any missing dead nodes using useful
// list. Consider all non-useful nodes to be useless i.e., dead nodes.
void Compile::update_dead_node_list(Unique_Node_List &useful) {
uint max_idx = unique();
VectorSet& useful_node_set = useful.member_set();
for (uint node_idx = 0; node_idx < max_idx; node_idx++) {
// If node with index node_idx is not in useful set,
// mark it as dead in dead node list.
if (! useful_node_set.test(node_idx) ) {
record_dead_node(node_idx);
}
}
}
void Compile::remove_useless_late_inlines(GrowableArray<CallGenerator*>* inlines, Unique_Node_List &useful) {
int shift = 0;
for (int i = 0; i < inlines->length(); i++) {
CallGenerator* cg = inlines->at(i);
CallNode* call = cg->call_node();
if (shift > 0) {
inlines->at_put(i-shift, cg);
}
if (!useful.member(call)) {
shift++;
}
}
inlines->trunc_to(inlines->length()-shift);
}
// Disconnect all useless nodes by disconnecting those at the boundary.
void Compile::remove_useless_nodes(Unique_Node_List &useful) {
uint next = 0;
while (next < useful.size()) {
Node *n = useful.at(next++);
if (n->is_SafePoint()) {
// We're done with a parsing phase. Replaced nodes are not valid
// beyond that point.
n->as_SafePoint()->delete_replaced_nodes();
}
// Use raw traversal of out edges since this code removes out edges
int max = n->outcnt();
for (int j = 0; j < max; ++j) {
Node* child = n->raw_out(j);
if (! useful.member(child)) {
assert(!child->is_top() || child != top(),
"If top is cached in Compile object it is in useful list");
// Only need to remove this out-edge to the useless node
n->raw_del_out(j);
--j;
--max;
}
}
if (n->outcnt() == 1 && n->has_special_unique_user()) {
record_for_igvn(n->unique_out());
}
}
// Remove useless macro and predicate opaq nodes
for (int i = C->macro_count()-1; i >= 0; i--) {
Node* n = C->macro_node(i);
if (!useful.member(n)) {
remove_macro_node(n);
}
}
// Remove useless CastII nodes with range check dependency
for (int i = range_check_cast_count() - 1; i >= 0; i--) {
Node* cast = range_check_cast_node(i);
if (!useful.member(cast)) {
remove_range_check_cast(cast);
}
}
// Remove useless expensive node
for (int i = C->expensive_count()-1; i >= 0; i--) {
Node* n = C->expensive_node(i);
if (!useful.member(n)) {
remove_expensive_node(n);
}
}
// clean up the late inline lists
remove_useless_late_inlines(&_string_late_inlines, useful);
remove_useless_late_inlines(&_boxing_late_inlines, useful);
remove_useless_late_inlines(&_late_inlines, useful);
debug_only(verify_graph_edges(true/*check for no_dead_code*/);)
}
//------------------------------frame_size_in_words-----------------------------
// frame_slots in units of words
int Compile::frame_size_in_words() const {
// shift is 0 in LP32 and 1 in LP64
const int shift = (LogBytesPerWord - LogBytesPerInt);
int words = _frame_slots >> shift;
assert( words << shift == _frame_slots, "frame size must be properly aligned in LP64" );
return words;
}
// To bang the stack of this compiled method we use the stack size
// that the interpreter would need in case of a deoptimization. This
// removes the need to bang the stack in the deoptimization blob which
// in turn simplifies stack overflow handling.
int Compile::bang_size_in_bytes() const {
return MAX2(_interpreter_frame_size, frame_size_in_bytes());
}
// ============================================================================
//------------------------------CompileWrapper---------------------------------
class CompileWrapper : public StackObj {
Compile *const _compile;
public:
CompileWrapper(Compile* compile);
~CompileWrapper();
};
CompileWrapper::CompileWrapper(Compile* compile) : _compile(compile) {
// the Compile* pointer is stored in the current ciEnv:
ciEnv* env = compile->env();
assert(env == ciEnv::current(), "must already be a ciEnv active");
assert(env->compiler_data() == NULL, "compile already active?");
env->set_compiler_data(compile);
assert(compile == Compile::current(), "sanity");
compile->set_type_dict(NULL);
compile->set_type_hwm(NULL);
compile->set_type_last_size(0);
compile->set_last_tf(NULL, NULL);
compile->set_indexSet_arena(NULL);
compile->set_indexSet_free_block_list(NULL);
compile->init_type_arena();
Type::Initialize(compile);
_compile->set_scratch_buffer_blob(NULL);
_compile->begin_method();
}
CompileWrapper::~CompileWrapper() {
_compile->end_method();
if (_compile->scratch_buffer_blob() != NULL)
BufferBlob::free(_compile->scratch_buffer_blob());
_compile->env()->set_compiler_data(NULL);
}
//----------------------------print_compile_messages---------------------------
void Compile::print_compile_messages() {
#ifndef PRODUCT
// Check if recompiling
if (_subsume_loads == false && PrintOpto) {
// Recompiling without allowing machine instructions to subsume loads
tty->print_cr("*********************************************************");
tty->print_cr("** Bailout: Recompile without subsuming loads **");
tty->print_cr("*********************************************************");
}
if (_do_escape_analysis != DoEscapeAnalysis && PrintOpto) {
// Recompiling without escape analysis
tty->print_cr("*********************************************************");
tty->print_cr("** Bailout: Recompile without escape analysis **");
tty->print_cr("*********************************************************");
}
if (_eliminate_boxing != EliminateAutoBox && PrintOpto) {
// Recompiling without boxing elimination
tty->print_cr("*********************************************************");
tty->print_cr("** Bailout: Recompile without boxing elimination **");
tty->print_cr("*********************************************************");
}
if (env()->break_at_compile()) {
// Open the debugger when compiling this method.
tty->print("### Breaking when compiling: ");
method()->print_short_name();
tty->cr();
BREAKPOINT;
}
if( PrintOpto ) {
if (is_osr_compilation()) {
tty->print("[OSR]%3d", _compile_id);
} else {
tty->print("%3d", _compile_id);
}
}
#endif
}
//-----------------------init_scratch_buffer_blob------------------------------
// Construct a temporary BufferBlob and cache it for this compile.
void Compile::init_scratch_buffer_blob(int const_size) {
// If there is already a scratch buffer blob allocated and the
// constant section is big enough, use it. Otherwise free the
// current and allocate a new one.
BufferBlob* blob = scratch_buffer_blob();
if ((blob != NULL) && (const_size <= _scratch_const_size)) {
// Use the current blob.
} else {
if (blob != NULL) {
BufferBlob::free(blob);
}
ResourceMark rm;
_scratch_const_size = const_size;
int size = (MAX_inst_size + MAX_stubs_size + _scratch_const_size);
blob = BufferBlob::create("Compile::scratch_buffer", size);
// Record the buffer blob for next time.
set_scratch_buffer_blob(blob);
// Have we run out of code space?
if (scratch_buffer_blob() == NULL) {
// Let CompilerBroker disable further compilations.
record_failure("Not enough space for scratch buffer in CodeCache");
return;
}
}
// Initialize the relocation buffers
relocInfo* locs_buf = (relocInfo*) blob->content_end() - MAX_locs_size;
set_scratch_locs_memory(locs_buf);
}
//-----------------------scratch_emit_size-------------------------------------
// Helper function that computes size by emitting code
uint Compile::scratch_emit_size(const Node* n) {
// Start scratch_emit_size section.
set_in_scratch_emit_size(true);
// Emit into a trash buffer and count bytes emitted.
// This is a pretty expensive way to compute a size,
// but it works well enough if seldom used.
// All common fixed-size instructions are given a size
// method by the AD file.
// Note that the scratch buffer blob and locs memory are
// allocated at the beginning of the compile task, and
// may be shared by several calls to scratch_emit_size.
// The allocation of the scratch buffer blob is particularly
// expensive, since it has to grab the code cache lock.
BufferBlob* blob = this->scratch_buffer_blob();
assert(blob != NULL, "Initialize BufferBlob at start");
assert(blob->size() > MAX_inst_size, "sanity");
relocInfo* locs_buf = scratch_locs_memory();
address blob_begin = blob->content_begin();
address blob_end = (address)locs_buf;
assert(blob->content_contains(blob_end), "sanity");
CodeBuffer buf(blob_begin, blob_end - blob_begin);
buf.initialize_consts_size(_scratch_const_size);
buf.initialize_stubs_size(MAX_stubs_size);
assert(locs_buf != NULL, "sanity");
int lsize = MAX_locs_size / 3;
buf.consts()->initialize_shared_locs(&locs_buf[lsize * 0], lsize);
buf.insts()->initialize_shared_locs( &locs_buf[lsize * 1], lsize);
buf.stubs()->initialize_shared_locs( &locs_buf[lsize * 2], lsize);
// Do the emission.
Label fakeL; // Fake label for branch instructions.
Label* saveL = NULL;
uint save_bnum = 0;
bool is_branch = n->is_MachBranch();
if (is_branch) {
MacroAssembler masm(&buf);
masm.bind(fakeL);
n->as_MachBranch()->save_label(&saveL, &save_bnum);
n->as_MachBranch()->label_set(&fakeL, 0);
}
n->emit(buf, this->regalloc());
// Emitting into the scratch buffer should not fail
assert (!failing(), err_msg_res("Must not have pending failure. Reason is: %s", failure_reason()));
if (is_branch) // Restore label.
n->as_MachBranch()->label_set(saveL, save_bnum);
// End scratch_emit_size section.
set_in_scratch_emit_size(false);
return buf.insts_size();
}
// ============================================================================
//------------------------------Compile standard-------------------------------
debug_only( int Compile::_debug_idx = 100000; )
// Compile a method. entry_bci is -1 for normal compilations and indicates
// the continuation bci for on stack replacement.
Compile::Compile( ciEnv* ci_env, C2Compiler* compiler, ciMethod* target, int osr_bci,
bool subsume_loads, bool do_escape_analysis, bool eliminate_boxing )
: Phase(Compiler),
_env(ci_env),
_log(ci_env->log()),
_compile_id(ci_env->compile_id()),
_save_argument_registers(false),
_stub_name(NULL),
_stub_function(NULL),
_stub_entry_point(NULL),
_method(target),
_entry_bci(osr_bci),
_initial_gvn(NULL),
_for_igvn(NULL),
_warm_calls(NULL),
_subsume_loads(subsume_loads),
_do_escape_analysis(do_escape_analysis),
_eliminate_boxing(eliminate_boxing),
_failure_reason(NULL),
_code_buffer("Compile::Fill_buffer"),
_orig_pc_slot(0),
_orig_pc_slot_offset_in_bytes(0),
_has_method_handle_invokes(false),
_mach_constant_base_node(NULL),
_node_bundling_limit(0),
_node_bundling_base(NULL),
_java_calls(0),
_inner_loops(0),
_scratch_const_size(-1),
_in_scratch_emit_size(false),
_dead_node_list(comp_arena()),
_dead_node_count(0),
#ifndef PRODUCT
_trace_opto_output(TraceOptoOutput || method()->has_option("TraceOptoOutput")),
_in_dump_cnt(0),
_printer(IdealGraphPrinter::printer()),
#endif
_congraph(NULL),
_comp_arena(mtCompiler),
_node_arena(mtCompiler),
_old_arena(mtCompiler),
_Compile_types(mtCompiler),
_replay_inline_data(NULL),
_late_inlines(comp_arena(), 2, 0, NULL),
_string_late_inlines(comp_arena(), 2, 0, NULL),
_boxing_late_inlines(comp_arena(), 2, 0, NULL),
_late_inlines_pos(0),
_number_of_mh_late_inlines(0),
_inlining_progress(false),
_inlining_incrementally(false),
_print_inlining_list(NULL),
_print_inlining_idx(0),
_interpreter_frame_size(0),
_max_node_limit(MaxNodeLimit) {
C = this;
CompileWrapper cw(this);
#ifndef PRODUCT
if (TimeCompiler2) {
tty->print(" ");
target->holder()->name()->print();
tty->print(".");
target->print_short_name();
tty->print(" ");
}
TraceTime t1("Total compilation time", &_t_totalCompilation, TimeCompiler, TimeCompiler2);
TraceTime t2(NULL, &_t_methodCompilation, TimeCompiler, false);
bool print_opto_assembly = PrintOptoAssembly || _method->has_option("PrintOptoAssembly");
if (!print_opto_assembly) {
bool print_assembly = (PrintAssembly || _method->should_print_assembly());
if (print_assembly && !Disassembler::can_decode()) {
tty->print_cr("PrintAssembly request changed to PrintOptoAssembly");
print_opto_assembly = true;
}
}
set_print_assembly(print_opto_assembly);
set_parsed_irreducible_loop(false);
if (method()->has_option("ReplayInline")) {
_replay_inline_data = ciReplay::load_inline_data(method(), entry_bci(), ci_env->comp_level());
}
#endif
set_print_inlining(PrintInlining || method()->has_option("PrintInlining") NOT_PRODUCT( || PrintOptoInlining));
set_print_intrinsics(PrintIntrinsics || method()->has_option("PrintIntrinsics"));
set_has_irreducible_loop(true); // conservative until build_loop_tree() reset it
if (ProfileTraps RTM_OPT_ONLY( || UseRTMLocking )) {
// Make sure the method being compiled gets its own MDO,
// so we can at least track the decompile_count().
// Need MDO to record RTM code generation state.
method()->ensure_method_data();
}
Init(::AliasLevel);
print_compile_messages();
_ilt = InlineTree::build_inline_tree_root();
// Even if NO memory addresses are used, MergeMem nodes must have at least 1 slice
assert(num_alias_types() >= AliasIdxRaw, "");
#define MINIMUM_NODE_HASH 1023
// Node list that Iterative GVN will start with
Unique_Node_List for_igvn(comp_arena());
set_for_igvn(&for_igvn);
// GVN that will be run immediately on new nodes
uint estimated_size = method()->code_size()*4+64;
estimated_size = (estimated_size < MINIMUM_NODE_HASH ? MINIMUM_NODE_HASH : estimated_size);
PhaseGVN gvn(node_arena(), estimated_size);
set_initial_gvn(&gvn);
if (print_inlining() || print_intrinsics()) {
_print_inlining_list = new (comp_arena())GrowableArray<PrintInliningBuffer>(comp_arena(), 1, 1, PrintInliningBuffer());
}
{ // Scope for timing the parser
TracePhase t3("parse", &_t_parser, true);
// Put top into the hash table ASAP.
initial_gvn()->transform_no_reclaim(top());
// Set up tf(), start(), and find a CallGenerator.
CallGenerator* cg = NULL;
if (is_osr_compilation()) {
const TypeTuple *domain = StartOSRNode::osr_domain();
const TypeTuple *range = TypeTuple::make_range(method()->signature());
init_tf(TypeFunc::make(domain, range));
StartNode* s = new (this) StartOSRNode(root(), domain);
initial_gvn()->set_type_bottom(s);
init_start(s);
cg = CallGenerator::for_osr(method(), entry_bci());
} else {
// Normal case.
init_tf(TypeFunc::make(method()));
StartNode* s = new (this) StartNode(root(), tf()->domain());
initial_gvn()->set_type_bottom(s);
init_start(s);
if (method()->intrinsic_id() == vmIntrinsics::_Reference_get && UseG1GC) {
// With java.lang.ref.reference.get() we must go through the
// intrinsic when G1 is enabled - even when get() is the root
// method of the compile - so that, if necessary, the value in
// the referent field of the reference object gets recorded by
// the pre-barrier code.
// Specifically, if G1 is enabled, the value in the referent
// field is recorded by the G1 SATB pre barrier. This will
// result in the referent being marked live and the reference
// object removed from the list of discovered references during
// reference processing.
cg = find_intrinsic(method(), false);
}
if (cg == NULL) {
float past_uses = method()->interpreter_invocation_count();
float expected_uses = past_uses;
cg = CallGenerator::for_inline(method(), expected_uses);
}
}
if (failing()) return;
if (cg == NULL) {
record_method_not_compilable_all_tiers("cannot parse method");
return;
}
JVMState* jvms = build_start_state(start(), tf());
if ((jvms = cg->generate(jvms)) == NULL) {
if (!failure_reason_is(C2Compiler::retry_class_loading_during_parsing())) {
record_method_not_compilable("method parse failed");
}
return;
}
GraphKit kit(jvms);
if (!kit.stopped()) {
// Accept return values, and transfer control we know not where.
// This is done by a special, unique ReturnNode bound to root.
return_values(kit.jvms());
}
if (kit.has_exceptions()) {
// Any exceptions that escape from this call must be rethrown
// to whatever caller is dynamically above us on the stack.
// This is done by a special, unique RethrowNode bound to root.
rethrow_exceptions(kit.transfer_exceptions_into_jvms());
}
assert(IncrementalInline || (_late_inlines.length() == 0 && !has_mh_late_inlines()), "incremental inlining is off");
if (_late_inlines.length() == 0 && !has_mh_late_inlines() && !failing() && has_stringbuilder()) {
inline_string_calls(true);
}
if (failing()) return;
print_method(PHASE_BEFORE_REMOVEUSELESS, 3);
// Remove clutter produced by parsing.
if (!failing()) {
ResourceMark rm;
PhaseRemoveUseless pru(initial_gvn(), &for_igvn);
}
}
// Note: Large methods are capped off in do_one_bytecode().
if (failing()) return;
// After parsing, node notes are no longer automagic.
// They must be propagated by register_new_node_with_optimizer(),
// clone(), or the like.
set_default_node_notes(NULL);
for (;;) {
int successes = Inline_Warm();
if (failing()) return;
if (successes == 0) break;
}
// Drain the list.
Finish_Warm();
#ifndef PRODUCT
if (_printer) {
_printer->print_inlining(this);
}
#endif
if (failing()) return;
NOT_PRODUCT( verify_graph_edges(); )
// Now optimize
Optimize();
if (failing()) return;
NOT_PRODUCT( verify_graph_edges(); )
#ifndef PRODUCT
if (PrintIdeal) {
ttyLocker ttyl; // keep the following output all in one block
// This output goes directly to the tty, not the compiler log.
// To enable tools to match it up with the compilation activity,
// be sure to tag this tty output with the compile ID.
if (xtty != NULL) {
xtty->head("ideal compile_id='%d'%s", compile_id(),
is_osr_compilation() ? " compile_kind='osr'" :
"");
}
root()->dump(9999);
if (xtty != NULL) {
xtty->tail("ideal");
}
}
#endif
NOT_PRODUCT( verify_barriers(); )
// Dump compilation data to replay it.
if (method()->has_option("DumpReplay")) {
env()->dump_replay_data(_compile_id);
}
if (method()->has_option("DumpInline") && (ilt() != NULL)) {
env()->dump_inline_data(_compile_id);
}
// Now that we know the size of all the monitors we can add a fixed slot
// for the original deopt pc.
_orig_pc_slot = fixed_slots();
int next_slot = _orig_pc_slot + (sizeof(address) / VMRegImpl::stack_slot_size);
set_fixed_slots(next_slot);
// Compute when to use implicit null checks. Used by matching trap based
// nodes and NullCheck optimization.
set_allowed_deopt_reasons();
// Now generate code
Code_Gen();
if (failing()) return;
// Check if we want to skip execution of all compiled code.
{
#ifndef PRODUCT
if (OptoNoExecute) {
record_method_not_compilable("+OptoNoExecute"); // Flag as failed
return;
}
TracePhase t2("install_code", &_t_registerMethod, TimeCompiler);
#endif
if (is_osr_compilation()) {
_code_offsets.set_value(CodeOffsets::Verified_Entry, 0);
_code_offsets.set_value(CodeOffsets::OSR_Entry, _first_block_size);
} else {
_code_offsets.set_value(CodeOffsets::Verified_Entry, _first_block_size);
_code_offsets.set_value(CodeOffsets::OSR_Entry, 0);
}
env()->register_method(_method, _entry_bci,
&_code_offsets,
_orig_pc_slot_offset_in_bytes,
code_buffer(),
frame_size_in_words(), _oop_map_set,
&_handler_table, &_inc_table,
compiler,
env()->comp_level(),
has_unsafe_access(),
SharedRuntime::is_wide_vector(max_vector_size()),
rtm_state()
);
if (log() != NULL) // Print code cache state into compiler log
log()->code_cache_state();
}
}
//------------------------------Compile----------------------------------------
// Compile a runtime stub
Compile::Compile( ciEnv* ci_env,
TypeFunc_generator generator,
address stub_function,
const char *stub_name,
int is_fancy_jump,
bool pass_tls,
bool save_arg_registers,
bool return_pc )
: Phase(Compiler),
_env(ci_env),
_log(ci_env->log()),
_compile_id(0),
_save_argument_registers(save_arg_registers),
_method(NULL),
_stub_name(stub_name),
_stub_function(stub_function),
_stub_entry_point(NULL),
_entry_bci(InvocationEntryBci),
_initial_gvn(NULL),
_for_igvn(NULL),
_warm_calls(NULL),
_orig_pc_slot(0),
_orig_pc_slot_offset_in_bytes(0),
_subsume_loads(true),
_do_escape_analysis(false),
_eliminate_boxing(false),
_failure_reason(NULL),
_code_buffer("Compile::Fill_buffer"),
_has_method_handle_invokes(false),
_mach_constant_base_node(NULL),
_node_bundling_limit(0),
_node_bundling_base(NULL),
_java_calls(0),
_inner_loops(0),
#ifndef PRODUCT
_trace_opto_output(TraceOptoOutput),
_in_dump_cnt(0),
_printer(NULL),
#endif
_comp_arena(mtCompiler),
_node_arena(mtCompiler),
_old_arena(mtCompiler),
_Compile_types(mtCompiler),
_dead_node_list(comp_arena()),
_dead_node_count(0),
_congraph(NULL),
_replay_inline_data(NULL),
_number_of_mh_late_inlines(0),
_inlining_progress(false),
_inlining_incrementally(false),
_print_inlining_list(NULL),
_print_inlining_idx(0),
_allowed_reasons(0),
_interpreter_frame_size(0),
_max_node_limit(MaxNodeLimit) {
C = this;
#ifndef PRODUCT
TraceTime t1(NULL, &_t_totalCompilation, TimeCompiler, false);
TraceTime t2(NULL, &_t_stubCompilation, TimeCompiler, false);
set_print_assembly(PrintFrameConverterAssembly);
set_parsed_irreducible_loop(false);
#endif
set_has_irreducible_loop(false); // no loops
CompileWrapper cw(this);
Init(/*AliasLevel=*/ 0);
init_tf((*generator)());
{
// The following is a dummy for the sake of GraphKit::gen_stub
Unique_Node_List for_igvn(comp_arena());
set_for_igvn(&for_igvn); // not used, but some GraphKit guys push on this
PhaseGVN gvn(Thread::current()->resource_area(),255);
set_initial_gvn(&gvn); // not significant, but GraphKit guys use it pervasively
gvn.transform_no_reclaim(top());
GraphKit kit;
kit.gen_stub(stub_function, stub_name, is_fancy_jump, pass_tls, return_pc);
}
NOT_PRODUCT( verify_graph_edges(); )
Code_Gen();
if (failing()) return;
// Entry point will be accessed using compile->stub_entry_point();
if (code_buffer() == NULL) {
Matcher::soft_match_failure();
} else {
if (PrintAssembly && (WizardMode || Verbose))
tty->print_cr("### Stub::%s", stub_name);
if (!failing()) {
assert(_fixed_slots == 0, "no fixed slots used for runtime stubs");
// Make the NMethod
// For now we mark the frame as never safe for profile stackwalking
RuntimeStub *rs = RuntimeStub::new_runtime_stub(stub_name,
code_buffer(),
CodeOffsets::frame_never_safe,
// _code_offsets.value(CodeOffsets::Frame_Complete),
frame_size_in_words(),
_oop_map_set,
save_arg_registers);
assert(rs != NULL && rs->is_runtime_stub(), "sanity check");
_stub_entry_point = rs->entry_point();
}
}
}
//------------------------------Init-------------------------------------------
// Prepare for a single compilation
void Compile::Init(int aliaslevel) {
_unique = 0;
_regalloc = NULL;
_tf = NULL; // filled in later
_top = NULL; // cached later
_matcher = NULL; // filled in later
_cfg = NULL; // filled in later
set_24_bit_selection_and_mode(Use24BitFP, false);
_node_note_array = NULL;
_default_node_notes = NULL;
_immutable_memory = NULL; // filled in at first inquiry
// Globally visible Nodes
// First set TOP to NULL to give safe behavior during creation of RootNode
set_cached_top_node(NULL);
set_root(new (this) RootNode());
// Now that you have a Root to point to, create the real TOP
set_cached_top_node( new (this) ConNode(Type::TOP) );
set_recent_alloc(NULL, NULL);
// Create Debug Information Recorder to record scopes, oopmaps, etc.
env()->set_oop_recorder(new OopRecorder(env()->arena()));
env()->set_debug_info(new DebugInformationRecorder(env()->oop_recorder()));
env()->set_dependencies(new Dependencies(env()));
_fixed_slots = 0;
set_has_split_ifs(false);
set_has_loops(has_method() && method()->has_loops()); // first approximation
set_has_stringbuilder(false);
set_has_boxed_value(false);
_trap_can_recompile = false; // no traps emitted yet
_major_progress = true; // start out assuming good things will happen
set_has_unsafe_access(false);
set_max_vector_size(0);
Copy::zero_to_bytes(_trap_hist, sizeof(_trap_hist));
set_decompile_count(0);
set_do_freq_based_layout(BlockLayoutByFrequency || method_has_option("BlockLayoutByFrequency"));
set_num_loop_opts(LoopOptsCount);
set_do_inlining(Inline);
set_max_inline_size(MaxInlineSize);
set_freq_inline_size(FreqInlineSize);
set_do_scheduling(OptoScheduling);
set_do_count_invocations(false);
set_do_method_data_update(false);
set_rtm_state(NoRTM); // No RTM lock eliding by default
method_has_option_value("MaxNodeLimit", _max_node_limit);
#if INCLUDE_RTM_OPT
if (UseRTMLocking && has_method() && (method()->method_data_or_null() != NULL)) {
int rtm_state = method()->method_data()->rtm_state();
if (method_has_option("NoRTMLockEliding") || ((rtm_state & NoRTM) != 0)) {
// Don't generate RTM lock eliding code.
set_rtm_state(NoRTM);
} else if (method_has_option("UseRTMLockEliding") || ((rtm_state & UseRTM) != 0) || !UseRTMDeopt) {
// Generate RTM lock eliding code without abort ratio calculation code.
set_rtm_state(UseRTM);
} else if (UseRTMDeopt) {
// Generate RTM lock eliding code and include abort ratio calculation
// code if UseRTMDeopt is on.
set_rtm_state(ProfileRTM);
}
}
#endif
if (debug_info()->recording_non_safepoints()) {
set_node_note_array(new(comp_arena()) GrowableArray<Node_Notes*>
(comp_arena(), 8, 0, NULL));
set_default_node_notes(Node_Notes::make(this));
}
// // -- Initialize types before each compile --
// // Update cached type information
// if( _method && _method->constants() )
// Type::update_loaded_types(_method, _method->constants());
// Init alias_type map.
if (!_do_escape_analysis && aliaslevel == 3)
aliaslevel = 2; // No unique types without escape analysis
_AliasLevel = aliaslevel;
const int grow_ats = 16;
_max_alias_types = grow_ats;
_alias_types = NEW_ARENA_ARRAY(comp_arena(), AliasType*, grow_ats);
AliasType* ats = NEW_ARENA_ARRAY(comp_arena(), AliasType, grow_ats);
Copy::zero_to_bytes(ats, sizeof(AliasType)*grow_ats);
{
for (int i = 0; i < grow_ats; i++) _alias_types[i] = &ats[i];
}
// Initialize the first few types.
_alias_types[AliasIdxTop]->Init(AliasIdxTop, NULL);
_alias_types[AliasIdxBot]->Init(AliasIdxBot, TypePtr::BOTTOM);
_alias_types[AliasIdxRaw]->Init(AliasIdxRaw, TypeRawPtr::BOTTOM);
_num_alias_types = AliasIdxRaw+1;
// Zero out the alias type cache.
Copy::zero_to_bytes(_alias_cache, sizeof(_alias_cache));
// A NULL adr_type hits in the cache right away. Preload the right answer.
probe_alias_cache(NULL)->_index = AliasIdxTop;
_intrinsics = NULL;
_macro_nodes = new(comp_arena()) GrowableArray<Node*>(comp_arena(), 8, 0, NULL);
_predicate_opaqs = new(comp_arena()) GrowableArray<Node*>(comp_arena(), 8, 0, NULL);
_expensive_nodes = new(comp_arena()) GrowableArray<Node*>(comp_arena(), 8, 0, NULL);
_range_check_casts = new(comp_arena()) GrowableArray<Node*>(comp_arena(), 8, 0, NULL);
register_library_intrinsics();
#ifdef ASSERT
_type_verify_symmetry = true;
#endif
}
//---------------------------init_start----------------------------------------
// Install the StartNode on this compile object.
void Compile::init_start(StartNode* s) {
if (failing())
return; // already failing
assert(s == start(), "");
}
StartNode* Compile::start() const {
assert(!failing(), "");
for (DUIterator_Fast imax, i = root()->fast_outs(imax); i < imax; i++) {
Node* start = root()->fast_out(i);
if( start->is_Start() )
return start->as_Start();
}
fatal("Did not find Start node!");
return NULL;
}
//-------------------------------immutable_memory-------------------------------------
// Access immutable memory
Node* Compile::immutable_memory() {
if (_immutable_memory != NULL) {
return _immutable_memory;
}
StartNode* s = start();
for (DUIterator_Fast imax, i = s->fast_outs(imax); true; i++) {
Node *p = s->fast_out(i);
if (p != s && p->as_Proj()->_con == TypeFunc::Memory) {
_immutable_memory = p;
return _immutable_memory;
}
}
ShouldNotReachHere();
return NULL;
}
//----------------------set_cached_top_node------------------------------------
// Install the cached top node, and make sure Node::is_top works correctly.
void Compile::set_cached_top_node(Node* tn) {
if (tn != NULL) verify_top(tn);
Node* old_top = _top;
_top = tn;
// Calling Node::setup_is_top allows the nodes the chance to adjust
// their _out arrays.
if (_top != NULL) _top->setup_is_top();
if (old_top != NULL) old_top->setup_is_top();
assert(_top == NULL || top()->is_top(), "");
}
#ifdef ASSERT
uint Compile::count_live_nodes_by_graph_walk() {
Unique_Node_List useful(comp_arena());
// Get useful node list by walking the graph.
identify_useful_nodes(useful);
return useful.size();
}
void Compile::print_missing_nodes() {
// Return if CompileLog is NULL and PrintIdealNodeCount is false.
if ((_log == NULL) && (! PrintIdealNodeCount)) {
return;
}
// This is an expensive function. It is executed only when the user
// specifies VerifyIdealNodeCount option or otherwise knows the
// additional work that needs to be done to identify reachable nodes
// by walking the flow graph and find the missing ones using
// _dead_node_list.
Unique_Node_List useful(comp_arena());
// Get useful node list by walking the graph.
identify_useful_nodes(useful);
uint l_nodes = C->live_nodes();
uint l_nodes_by_walk = useful.size();
if (l_nodes != l_nodes_by_walk) {
if (_log != NULL) {
_log->begin_head("mismatched_nodes count='%d'", abs((int) (l_nodes - l_nodes_by_walk)));
_log->stamp();
_log->end_head();
}
VectorSet& useful_member_set = useful.member_set();
int last_idx = l_nodes_by_walk;
for (int i = 0; i < last_idx; i++) {
if (useful_member_set.test(i)) {
if (_dead_node_list.test(i)) {
if (_log != NULL) {
_log->elem("mismatched_node_info node_idx='%d' type='both live and dead'", i);
}
if (PrintIdealNodeCount) {
// Print the log message to tty
tty->print_cr("mismatched_node idx='%d' both live and dead'", i);
useful.at(i)->dump();
}
}
}
else if (! _dead_node_list.test(i)) {
if (_log != NULL) {
_log->elem("mismatched_node_info node_idx='%d' type='neither live nor dead'", i);
}
if (PrintIdealNodeCount) {
// Print the log message to tty
tty->print_cr("mismatched_node idx='%d' type='neither live nor dead'", i);
}
}
}
if (_log != NULL) {
_log->tail("mismatched_nodes");
}
}
}
#endif
#ifndef PRODUCT
void Compile::verify_top(Node* tn) const {
if (tn != NULL) {
assert(tn->is_Con(), "top node must be a constant");
assert(((ConNode*)tn)->type() == Type::TOP, "top node must have correct type");
assert(tn->in(0) != NULL, "must have live top node");
}
}
#endif
///-------------------Managing Per-Node Debug & Profile Info-------------------
void Compile::grow_node_notes(GrowableArray<Node_Notes*>* arr, int grow_by) {
guarantee(arr != NULL, "");
int num_blocks = arr->length();
if (grow_by < num_blocks) grow_by = num_blocks;
int num_notes = grow_by * _node_notes_block_size;
Node_Notes* notes = NEW_ARENA_ARRAY(node_arena(), Node_Notes, num_notes);
Copy::zero_to_bytes(notes, num_notes * sizeof(Node_Notes));
while (num_notes > 0) {
arr->append(notes);
notes += _node_notes_block_size;
num_notes -= _node_notes_block_size;
}
assert(num_notes == 0, "exact multiple, please");
}
bool Compile::copy_node_notes_to(Node* dest, Node* source) {
if (source == NULL || dest == NULL) return false;
if (dest->is_Con())
return false; // Do not push debug info onto constants.
#ifdef ASSERT
// Leave a bread crumb trail pointing to the original node:
if (dest != NULL && dest != source && dest->debug_orig() == NULL) {
dest->set_debug_orig(source);
}
#endif
if (node_note_array() == NULL)
return false; // Not collecting any notes now.
// This is a copy onto a pre-existing node, which may already have notes.
// If both nodes have notes, do not overwrite any pre-existing notes.
Node_Notes* source_notes = node_notes_at(source->_idx);
if (source_notes == NULL || source_notes->is_clear()) return false;
Node_Notes* dest_notes = node_notes_at(dest->_idx);
if (dest_notes == NULL || dest_notes->is_clear()) {
return set_node_notes_at(dest->_idx, source_notes);
}
Node_Notes merged_notes = (*source_notes);
// The order of operations here ensures that dest notes will win...
merged_notes.update_from(dest_notes);
return set_node_notes_at(dest->_idx, &merged_notes);
}
//--------------------------allow_range_check_smearing-------------------------
// Gating condition for coalescing similar range checks.
// Sometimes we try 'speculatively' replacing a series of a range checks by a
// single covering check that is at least as strong as any of them.
// If the optimization succeeds, the simplified (strengthened) range check
// will always succeed. If it fails, we will deopt, and then give up
// on the optimization.
bool Compile::allow_range_check_smearing() const {
// If this method has already thrown a range-check,
// assume it was because we already tried range smearing
// and it failed.
uint already_trapped = trap_count(Deoptimization::Reason_range_check);
return !already_trapped;
}
//------------------------------flatten_alias_type-----------------------------
const TypePtr *Compile::flatten_alias_type( const TypePtr *tj ) const {
int offset = tj->offset();
TypePtr::PTR ptr = tj->ptr();
// Known instance (scalarizable allocation) alias only with itself.
bool is_known_inst = tj->isa_oopptr() != NULL &&
tj->is_oopptr()->is_known_instance();
// Process weird unsafe references.
if (offset == Type::OffsetBot && (tj->isa_instptr() /*|| tj->isa_klassptr()*/)) {
assert(InlineUnsafeOps, "indeterminate pointers come only from unsafe ops");
assert(!is_known_inst, "scalarizable allocation should not have unsafe references");
tj = TypeOopPtr::BOTTOM;
ptr = tj->ptr();
offset = tj->offset();
}
// Array pointers need some flattening
const TypeAryPtr *ta = tj->isa_aryptr();
if (ta && ta->is_stable()) {
// Erase stability property for alias analysis.
tj = ta = ta->cast_to_stable(false);
}
if( ta && is_known_inst ) {
if ( offset != Type::OffsetBot &&
offset > arrayOopDesc::length_offset_in_bytes() ) {
offset = Type::OffsetBot; // Flatten constant access into array body only
tj = ta = TypeAryPtr::make(ptr, ta->ary(), ta->klass(), true, offset, ta->instance_id());
}
} else if( ta && _AliasLevel >= 2 ) {
// For arrays indexed by constant indices, we flatten the alias
// space to include all of the array body. Only the header, klass
// and array length can be accessed un-aliased.
if( offset != Type::OffsetBot ) {
if( ta->const_oop() ) { // MethodData* or Method*
offset = Type::OffsetBot; // Flatten constant access into array body
tj = ta = TypeAryPtr::make(ptr,ta->const_oop(),ta->ary(),ta->klass(),false,offset);
} else if( offset == arrayOopDesc::length_offset_in_bytes() ) {
// range is OK as-is.
tj = ta = TypeAryPtr::RANGE;
} else if( offset == oopDesc::klass_offset_in_bytes() ) {
tj = TypeInstPtr::KLASS; // all klass loads look alike
ta = TypeAryPtr::RANGE; // generic ignored junk
ptr = TypePtr::BotPTR;
} else if( offset == oopDesc::mark_offset_in_bytes() ) {
tj = TypeInstPtr::MARK;
ta = TypeAryPtr::RANGE; // generic ignored junk
ptr = TypePtr::BotPTR;
} else { // Random constant offset into array body
offset = Type::OffsetBot; // Flatten constant access into array body
tj = ta = TypeAryPtr::make(ptr,ta->ary(),ta->klass(),false,offset);
}
}
// Arrays of fixed size alias with arrays of unknown size.
if (ta->size() != TypeInt::POS) {
const TypeAry *tary = TypeAry::make(ta->elem(), TypeInt::POS);
tj = ta = TypeAryPtr::make(ptr,ta->const_oop(),tary,ta->klass(),false,offset);
}
// Arrays of known objects become arrays of unknown objects.
if (ta->elem()->isa_narrowoop() && ta->elem() != TypeNarrowOop::BOTTOM) {
const TypeAry *tary = TypeAry::make(TypeNarrowOop::BOTTOM, ta->size());
tj = ta = TypeAryPtr::make(ptr,ta->const_oop(),tary,NULL,false,offset);
}
if (ta->elem()->isa_oopptr() && ta->elem() != TypeInstPtr::BOTTOM) {
const TypeAry *tary = TypeAry::make(TypeInstPtr::BOTTOM, ta->size());
tj = ta = TypeAryPtr::make(ptr,ta->const_oop(),tary,NULL,false,offset);
}
// Arrays of bytes and of booleans both use 'bastore' and 'baload' so
// cannot be distinguished by bytecode alone.
if (ta->elem() == TypeInt::BOOL) {
const TypeAry *tary = TypeAry::make(TypeInt::BYTE, ta->size());
ciKlass* aklass = ciTypeArrayKlass::make(T_BYTE);
tj = ta = TypeAryPtr::make(ptr,ta->const_oop(),tary,aklass,false,offset);
}
// During the 2nd round of IterGVN, NotNull castings are removed.
// Make sure the Bottom and NotNull variants alias the same.
// Also, make sure exact and non-exact variants alias the same.
if (ptr == TypePtr::NotNull || ta->klass_is_exact() || ta->speculative() != NULL) {
tj = ta = TypeAryPtr::make(TypePtr::BotPTR,ta->ary(),ta->klass(),false,offset);
}
}
// Oop pointers need some flattening
const TypeInstPtr *to = tj->isa_instptr();
if( to && _AliasLevel >= 2 && to != TypeOopPtr::BOTTOM ) {
ciInstanceKlass *k = to->klass()->as_instance_klass();
if( ptr == TypePtr::Constant ) {
if (to->klass() != ciEnv::current()->Class_klass() ||
offset < k->size_helper() * wordSize) {
// No constant oop pointers (such as Strings); they alias with
// unknown strings.
assert(!is_known_inst, "not scalarizable allocation");
tj = to = TypeInstPtr::make(TypePtr::BotPTR,to->klass(),false,0,offset);
}
} else if( is_known_inst ) {
tj = to; // Keep NotNull and klass_is_exact for instance type
} else if( ptr == TypePtr::NotNull || to->klass_is_exact() ) {
// During the 2nd round of IterGVN, NotNull castings are removed.
// Make sure the Bottom and NotNull variants alias the same.
// Also, make sure exact and non-exact variants alias the same.
tj = to = TypeInstPtr::make(TypePtr::BotPTR,to->klass(),false,0,offset);
}
if (to->speculative() != NULL) {
tj = to = TypeInstPtr::make(to->ptr(),to->klass(),to->klass_is_exact(),to->const_oop(),to->offset(), to->instance_id());
}
// Canonicalize the holder of this field
if (offset >= 0 && offset < instanceOopDesc::base_offset_in_bytes()) {
// First handle header references such as a LoadKlassNode, even if the
// object's klass is unloaded at compile time (4965979).
if (!is_known_inst) { // Do it only for non-instance types
tj = to = TypeInstPtr::make(TypePtr::BotPTR, env()->Object_klass(), false, NULL, offset);
}
} else if (offset < 0 || offset >= k->size_helper() * wordSize) {
// Static fields are in the space above the normal instance
// fields in the java.lang.Class instance.
if (to->klass() != ciEnv::current()->Class_klass()) {
to = NULL;
tj = TypeOopPtr::BOTTOM;
offset = tj->offset();
}
} else {
ciInstanceKlass *canonical_holder = k->get_canonical_holder(offset);
if (!k->equals(canonical_holder) || tj->offset() != offset) {
if( is_known_inst ) {
tj = to = TypeInstPtr::make(to->ptr(), canonical_holder, true, NULL, offset, to->instance_id());
} else {
tj = to = TypeInstPtr::make(to->ptr(), canonical_holder, false, NULL, offset);
}
}
}
}
// Klass pointers to object array klasses need some flattening
const TypeKlassPtr *tk = tj->isa_klassptr();
if( tk ) {
// If we are referencing a field within a Klass, we need
// to assume the worst case of an Object. Both exact and
// inexact types must flatten to the same alias class so
// use NotNull as the PTR.
if ( offset == Type::OffsetBot || (offset >= 0 && (size_t)offset < sizeof(Klass)) ) {
tj = tk = TypeKlassPtr::make(TypePtr::NotNull,
TypeKlassPtr::OBJECT->klass(),
offset);
}
ciKlass* klass = tk->klass();
if( klass->is_obj_array_klass() ) {
ciKlass* k = TypeAryPtr::OOPS->klass();
if( !k || !k->is_loaded() ) // Only fails for some -Xcomp runs
k = TypeInstPtr::BOTTOM->klass();
tj = tk = TypeKlassPtr::make( TypePtr::NotNull, k, offset );
}
// Check for precise loads from the primary supertype array and force them
// to the supertype cache alias index. Check for generic array loads from
// the primary supertype array and also force them to the supertype cache
// alias index. Since the same load can reach both, we need to merge
// these 2 disparate memories into the same alias class. Since the
// primary supertype array is read-only, there's no chance of confusion
// where we bypass an array load and an array store.
int primary_supers_offset = in_bytes(Klass::primary_supers_offset());
if (offset == Type::OffsetBot ||
(offset >= primary_supers_offset &&
offset < (int)(primary_supers_offset + Klass::primary_super_limit() * wordSize)) ||
offset == (int)in_bytes(Klass::secondary_super_cache_offset())) {
offset = in_bytes(Klass::secondary_super_cache_offset());
tj = tk = TypeKlassPtr::make( TypePtr::NotNull, tk->klass(), offset );
}
}
// Flatten all Raw pointers together.
if (tj->base() == Type::RawPtr)
tj = TypeRawPtr::BOTTOM;
if (tj->base() == Type::AnyPtr)
tj = TypePtr::BOTTOM; // An error, which the caller must check for.
// Flatten all to bottom for now
switch( _AliasLevel ) {
case 0:
tj = TypePtr::BOTTOM;
break;
case 1: // Flatten to: oop, static, field or array
switch (tj->base()) {
//case Type::AryPtr: tj = TypeAryPtr::RANGE; break;
case Type::RawPtr: tj = TypeRawPtr::BOTTOM; break;
case Type::AryPtr: // do not distinguish arrays at all
case Type::InstPtr: tj = TypeInstPtr::BOTTOM; break;
case Type::KlassPtr: tj = TypeKlassPtr::OBJECT; break;
case Type::AnyPtr: tj = TypePtr::BOTTOM; break; // caller checks it
default: ShouldNotReachHere();
}
break;
case 2: // No collapsing at level 2; keep all splits
case 3: // No collapsing at level 3; keep all splits
break;
default:
Unimplemented();
}
offset = tj->offset();
assert( offset != Type::OffsetTop, "Offset has fallen from constant" );
assert( (offset != Type::OffsetBot && tj->base() != Type::AryPtr) ||
(offset == Type::OffsetBot && tj->base() == Type::AryPtr) ||
(offset == Type::OffsetBot && tj == TypeOopPtr::BOTTOM) ||
(offset == Type::OffsetBot && tj == TypePtr::BOTTOM) ||
(offset == oopDesc::mark_offset_in_bytes() && tj->base() == Type::AryPtr) ||
(offset == oopDesc::klass_offset_in_bytes() && tj->base() == Type::AryPtr) ||
(offset == arrayOopDesc::length_offset_in_bytes() && tj->base() == Type::AryPtr) ,
"For oops, klasses, raw offset must be constant; for arrays the offset is never known" );
assert( tj->ptr() != TypePtr::TopPTR &&
tj->ptr() != TypePtr::AnyNull &&
tj->ptr() != TypePtr::Null, "No imprecise addresses" );
// assert( tj->ptr() != TypePtr::Constant ||
// tj->base() == Type::RawPtr ||
// tj->base() == Type::KlassPtr, "No constant oop addresses" );
return tj;
}
void Compile::AliasType::Init(int i, const TypePtr* at) {
_index = i;
_adr_type = at;
_field = NULL;
_element = NULL;
_is_rewritable = true; // default
const TypeOopPtr *atoop = (at != NULL) ? at->isa_oopptr() : NULL;
if (atoop != NULL && atoop->is_known_instance()) {
const TypeOopPtr *gt = atoop->cast_to_instance_id(TypeOopPtr::InstanceBot);
_general_index = Compile::current()->get_alias_index(gt);
} else {
_general_index = 0;
}
}
BasicType Compile::AliasType::basic_type() const {
if (element() != NULL) {
const Type* element = adr_type()->is_aryptr()->elem();
return element->isa_narrowoop() ? T_OBJECT : element->array_element_basic_type();
} if (field() != NULL) {
return field()->layout_type();
} else {
return T_ILLEGAL; // unknown
}
}
//---------------------------------print_on------------------------------------
#ifndef PRODUCT
void Compile::AliasType::print_on(outputStream* st) {
if (index() < 10)
st->print("@ <%d> ", index());
else st->print("@ <%d>", index());
st->print(is_rewritable() ? " " : " RO");
int offset = adr_type()->offset();
if (offset == Type::OffsetBot)
st->print(" +any");
else st->print(" +%-3d", offset);
st->print(" in ");
adr_type()->dump_on(st);
const TypeOopPtr* tjp = adr_type()->isa_oopptr();
if (field() != NULL && tjp) {
if (tjp->klass() != field()->holder() ||
tjp->offset() != field()->offset_in_bytes()) {
st->print(" != ");
field()->print();
st->print(" ***");
}
}
}
void print_alias_types() {
Compile* C = Compile::current();
tty->print_cr("--- Alias types, AliasIdxBot .. %d", C->num_alias_types()-1);
for (int idx = Compile::AliasIdxBot; idx < C->num_alias_types(); idx++) {
C->alias_type(idx)->print_on(tty);
tty->cr();
}
}
#endif
//----------------------------probe_alias_cache--------------------------------
Compile::AliasCacheEntry* Compile::probe_alias_cache(const TypePtr* adr_type) {
intptr_t key = (intptr_t) adr_type;
key ^= key >> logAliasCacheSize;
return &_alias_cache[key & right_n_bits(logAliasCacheSize)];
}
//-----------------------------grow_alias_types--------------------------------
void Compile::grow_alias_types() {
const int old_ats = _max_alias_types; // how many before?
const int new_ats = old_ats; // how many more?
const int grow_ats = old_ats+new_ats; // how many now?
_max_alias_types = grow_ats;
_alias_types = REALLOC_ARENA_ARRAY(comp_arena(), AliasType*, _alias_types, old_ats, grow_ats);
AliasType* ats = NEW_ARENA_ARRAY(comp_arena(), AliasType, new_ats);
Copy::zero_to_bytes(ats, sizeof(AliasType)*new_ats);
for (int i = 0; i < new_ats; i++) _alias_types[old_ats+i] = &ats[i];
}
//--------------------------------find_alias_type------------------------------
Compile::AliasType* Compile::find_alias_type(const TypePtr* adr_type, bool no_create, ciField* original_field) {
if (_AliasLevel == 0)
return alias_type(AliasIdxBot);
AliasCacheEntry* ace = probe_alias_cache(adr_type);
if (ace->_adr_type == adr_type) {
return alias_type(ace->_index);
}
// Handle special cases.
if (adr_type == NULL) return alias_type(AliasIdxTop);
if (adr_type == TypePtr::BOTTOM) return alias_type(AliasIdxBot);
// Do it the slow way.
const TypePtr* flat = flatten_alias_type(adr_type);
#ifdef ASSERT
{
ResourceMark rm;
assert(flat == flatten_alias_type(flat),
err_msg("not idempotent: adr_type = %s; flat = %s => %s", Type::str(adr_type),
Type::str(flat), Type::str(flatten_alias_type(flat))));
assert(flat != TypePtr::BOTTOM,
err_msg("cannot alias-analyze an untyped ptr: adr_type = %s", Type::str(adr_type)));
if (flat->isa_oopptr() && !flat->isa_klassptr()) {
const TypeOopPtr* foop = flat->is_oopptr();
// Scalarizable allocations have exact klass always.
bool exact = !foop->klass_is_exact() || foop->is_known_instance();
const TypePtr* xoop = foop->cast_to_exactness(exact)->is_ptr();
assert(foop == flatten_alias_type(xoop),
err_msg("exactness must not affect alias type: foop = %s; xoop = %s",
Type::str(foop), Type::str(xoop)));
}
}
#endif
int idx = AliasIdxTop;
for (int i = 0; i < num_alias_types(); i++) {
if (alias_type(i)->adr_type() == flat) {
idx = i;
break;
}
}
if (idx == AliasIdxTop) {
if (no_create) return NULL;
// Grow the array if necessary.
if (_num_alias_types == _max_alias_types) grow_alias_types();
// Add a new alias type.
idx = _num_alias_types++;
_alias_types[idx]->Init(idx, flat);
if (flat == TypeInstPtr::KLASS) alias_type(idx)->set_rewritable(false);
if (flat == TypeAryPtr::RANGE) alias_type(idx)->set_rewritable(false);
if (flat->isa_instptr()) {
if (flat->offset() == java_lang_Class::klass_offset_in_bytes()
&& flat->is_instptr()->klass() == env()->Class_klass())
alias_type(idx)->set_rewritable(false);
}
if (flat->isa_aryptr()) {
#ifdef ASSERT
const int header_size_min = arrayOopDesc::base_offset_in_bytes(T_BYTE);
// (T_BYTE has the weakest alignment and size restrictions...)
assert(flat->offset() < header_size_min, "array body reference must be OffsetBot");
#endif
if (flat->offset() == TypePtr::OffsetBot) {
alias_type(idx)->set_element(flat->is_aryptr()->elem());
}
}
if (flat->isa_klassptr()) {
if (flat->offset() == in_bytes(Klass::super_check_offset_offset()))
alias_type(idx)->set_rewritable(false);
if (flat->offset() == in_bytes(Klass::modifier_flags_offset()))
alias_type(idx)->set_rewritable(false);
if (flat->offset() == in_bytes(Klass::access_flags_offset()))
alias_type(idx)->set_rewritable(false);
if (flat->offset() == in_bytes(Klass::java_mirror_offset()))
alias_type(idx)->set_rewritable(false);
}
// %%% (We would like to finalize JavaThread::threadObj_offset(),
// but the base pointer type is not distinctive enough to identify
// references into JavaThread.)
// Check for final fields.
const TypeInstPtr* tinst = flat->isa_instptr();
if (tinst && tinst->offset() >= instanceOopDesc::base_offset_in_bytes()) {
ciField* field;
if (tinst->const_oop() != NULL &&
tinst->klass() == ciEnv::current()->Class_klass() &&
tinst->offset() >= (tinst->klass()->as_instance_klass()->size_helper() * wordSize)) {
// static field
ciInstanceKlass* k = tinst->const_oop()->as_instance()->java_lang_Class_klass()->as_instance_klass();
field = k->get_field_by_offset(tinst->offset(), true);
} else {
ciInstanceKlass *k = tinst->klass()->as_instance_klass();
field = k->get_field_by_offset(tinst->offset(), false);
}
assert(field == NULL ||
original_field == NULL ||
(field->holder() == original_field->holder() &&
field->offset() == original_field->offset() &&
field->is_static() == original_field->is_static()), "wrong field?");
// Set field() and is_rewritable() attributes.
if (field != NULL) alias_type(idx)->set_field(field);
}
}
// Fill the cache for next time.
ace->_adr_type = adr_type;
ace->_index = idx;
assert(alias_type(adr_type) == alias_type(idx), "type must be installed");
// Might as well try to fill the cache for the flattened version, too.
AliasCacheEntry* face = probe_alias_cache(flat);
if (face->_adr_type == NULL) {
face->_adr_type = flat;
face->_index = idx;
assert(alias_type(flat) == alias_type(idx), "flat type must work too");
}
return alias_type(idx);
}
Compile::AliasType* Compile::alias_type(ciField* field) {
const TypeOopPtr* t;
if (field->is_static())
t = TypeInstPtr::make(field->holder()->java_mirror());
else
t = TypeOopPtr::make_from_klass_raw(field->holder());
AliasType* atp = alias_type(t->add_offset(field->offset_in_bytes()), field);
assert((field->is_final() || field->is_stable()) == !atp->is_rewritable(), "must get the rewritable bits correct");
return atp;
}
//------------------------------have_alias_type--------------------------------
bool Compile::have_alias_type(const TypePtr* adr_type) {
AliasCacheEntry* ace = probe_alias_cache(adr_type);
if (ace->_adr_type == adr_type) {
return true;
}
// Handle special cases.
if (adr_type == NULL) return true;
if (adr_type == TypePtr::BOTTOM) return true;
return find_alias_type(adr_type, true, NULL) != NULL;
}
//-----------------------------must_alias--------------------------------------
// True if all values of the given address type are in the given alias category.
bool Compile::must_alias(const TypePtr* adr_type, int alias_idx) {
if (alias_idx == AliasIdxBot) return true; // the universal category
if (adr_type == NULL) return true; // NULL serves as TypePtr::TOP
if (alias_idx == AliasIdxTop) return false; // the empty category
if (adr_type->base() == Type::AnyPtr) return false; // TypePtr::BOTTOM or its twins
// the only remaining possible overlap is identity
int adr_idx = get_alias_index(adr_type);
assert(adr_idx != AliasIdxBot && adr_idx != AliasIdxTop, "");
assert(adr_idx == alias_idx ||
(alias_type(alias_idx)->adr_type() != TypeOopPtr::BOTTOM
&& adr_type != TypeOopPtr::BOTTOM),
"should not be testing for overlap with an unsafe pointer");
return adr_idx == alias_idx;
}
//------------------------------can_alias--------------------------------------
// True if any values of the given address type are in the given alias category.
bool Compile::can_alias(const TypePtr* adr_type, int alias_idx) {
if (alias_idx == AliasIdxTop) return false; // the empty category
if (adr_type == NULL) return false; // NULL serves as TypePtr::TOP
if (alias_idx == AliasIdxBot) return true; // the universal category
if (adr_type->base() == Type::AnyPtr) return true; // TypePtr::BOTTOM or its twins
// the only remaining possible overlap is identity
int adr_idx = get_alias_index(adr_type);
assert(adr_idx != AliasIdxBot && adr_idx != AliasIdxTop, "");
return adr_idx == alias_idx;
}
//---------------------------pop_warm_call-------------------------------------
WarmCallInfo* Compile::pop_warm_call() {
WarmCallInfo* wci = _warm_calls;
if (wci != NULL) _warm_calls = wci->remove_from(wci);
return wci;
}
//----------------------------Inline_Warm--------------------------------------
int Compile::Inline_Warm() {
// If there is room, try to inline some more warm call sites.
// %%% Do a graph index compaction pass when we think we're out of space?
if (!InlineWarmCalls) return 0;
int calls_made_hot = 0;
int room_to_grow = NodeCountInliningCutoff - unique();
int amount_to_grow = MIN2(room_to_grow, (int)NodeCountInliningStep);
int amount_grown = 0;
WarmCallInfo* call;
while (amount_to_grow > 0 && (call = pop_warm_call()) != NULL) {
int est_size = (int)call->size();
if (est_size > (room_to_grow - amount_grown)) {
// This one won't fit anyway. Get rid of it.
call->make_cold();
continue;
}
call->make_hot();
calls_made_hot++;
amount_grown += est_size;
amount_to_grow -= est_size;
}
if (calls_made_hot > 0) set_major_progress();
return calls_made_hot;
}
//----------------------------Finish_Warm--------------------------------------
void Compile::Finish_Warm() {
if (!InlineWarmCalls) return;
if (failing()) return;
if (warm_calls() == NULL) return;
// Clean up loose ends, if we are out of space for inlining.
WarmCallInfo* call;
while ((call = pop_warm_call()) != NULL) {
call->make_cold();
}
}
//---------------------cleanup_loop_predicates-----------------------
// Remove the opaque nodes that protect the predicates so that all unused
// checks and uncommon_traps will be eliminated from the ideal graph
void Compile::cleanup_loop_predicates(PhaseIterGVN &igvn) {
if (predicate_count()==0) return;
for (int i = predicate_count(); i > 0; i--) {
Node * n = predicate_opaque1_node(i-1);
assert(n->Opcode() == Op_Opaque1, "must be");
igvn.replace_node(n, n->in(1));
}
assert(predicate_count()==0, "should be clean!");
}
void Compile::add_range_check_cast(Node* n) {
assert(n->isa_CastII()->has_range_check(), "CastII should have range check dependency");
assert(!_range_check_casts->contains(n), "duplicate entry in range check casts");
_range_check_casts->append(n);
}
// Remove all range check dependent CastIINodes.
void Compile::remove_range_check_casts(PhaseIterGVN &igvn) {
for (int i = range_check_cast_count(); i > 0; i--) {
Node* cast = range_check_cast_node(i-1);
assert(cast->isa_CastII()->has_range_check(), "CastII should have range check dependency");
igvn.replace_node(cast, cast->in(1));
}
assert(range_check_cast_count() == 0, "should be empty");
}
// StringOpts and late inlining of string methods
void Compile::inline_string_calls(bool parse_time) {
{
// remove useless nodes to make the usage analysis simpler
ResourceMark rm;
PhaseRemoveUseless pru(initial_gvn(), for_igvn());
}
{
ResourceMark rm;
print_method(PHASE_BEFORE_STRINGOPTS, 3);
PhaseStringOpts pso(initial_gvn(), for_igvn());
print_method(PHASE_AFTER_STRINGOPTS, 3);
}
// now inline anything that we skipped the first time around
if (!parse_time) {
_late_inlines_pos = _late_inlines.length();
}
while (_string_late_inlines.length() > 0) {
CallGenerator* cg = _string_late_inlines.pop();
cg->do_late_inline();
if (failing()) return;
}
_string_late_inlines.trunc_to(0);
}
// Late inlining of boxing methods
void Compile::inline_boxing_calls(PhaseIterGVN& igvn) {
if (_boxing_late_inlines.length() > 0) {
assert(has_boxed_value(), "inconsistent");
PhaseGVN* gvn = initial_gvn();
set_inlining_incrementally(true);
assert( igvn._worklist.size() == 0, "should be done with igvn" );
for_igvn()->clear();
gvn->replace_with(&igvn);
_late_inlines_pos = _late_inlines.length();
while (_boxing_late_inlines.length() > 0) {
CallGenerator* cg = _boxing_late_inlines.pop();
cg->do_late_inline();
if (failing()) return;
}
_boxing_late_inlines.trunc_to(0);
{
ResourceMark rm;
PhaseRemoveUseless pru(gvn, for_igvn());
}
igvn = PhaseIterGVN(gvn);
igvn.optimize();
set_inlining_progress(false);
set_inlining_incrementally(false);
}
}
void Compile::inline_incrementally_one(PhaseIterGVN& igvn) {
assert(IncrementalInline, "incremental inlining should be on");
PhaseGVN* gvn = initial_gvn();
set_inlining_progress(false);
for_igvn()->clear();
gvn->replace_with(&igvn);
int i = 0;
for (; i <_late_inlines.length() && !inlining_progress(); i++) {
CallGenerator* cg = _late_inlines.at(i);
_late_inlines_pos = i+1;
cg->do_late_inline();
if (failing()) return;
}
int j = 0;
for (; i < _late_inlines.length(); i++, j++) {
_late_inlines.at_put(j, _late_inlines.at(i));
}
_late_inlines.trunc_to(j);
{
ResourceMark rm;
PhaseRemoveUseless pru(gvn, for_igvn());
}
igvn = PhaseIterGVN(gvn);
}
// Perform incremental inlining until bound on number of live nodes is reached
void Compile::inline_incrementally(PhaseIterGVN& igvn) {
PhaseGVN* gvn = initial_gvn();
set_inlining_incrementally(true);
set_inlining_progress(true);
uint low_live_nodes = 0;
while(inlining_progress() && _late_inlines.length() > 0) {
if (live_nodes() > (uint)LiveNodeCountInliningCutoff) {
if (low_live_nodes < (uint)LiveNodeCountInliningCutoff * 8 / 10) {
// PhaseIdealLoop is expensive so we only try it once we are
// out of live nodes and we only try it again if the previous
// helped got the number of nodes down significantly
PhaseIdealLoop ideal_loop( igvn, false, true );
if (failing()) return;
low_live_nodes = live_nodes();
_major_progress = true;
}
if (live_nodes() > (uint)LiveNodeCountInliningCutoff) {
break;
}
}
inline_incrementally_one(igvn);
if (failing()) return;
igvn.optimize();
if (failing()) return;
}
assert( igvn._worklist.size() == 0, "should be done with igvn" );
if (_string_late_inlines.length() > 0) {
assert(has_stringbuilder(), "inconsistent");
for_igvn()->clear();
initial_gvn()->replace_with(&igvn);
inline_string_calls(false);
if (failing()) return;
{
ResourceMark rm;
PhaseRemoveUseless pru(initial_gvn(), for_igvn());
}
igvn = PhaseIterGVN(gvn);
igvn.optimize();
}
set_inlining_incrementally(false);
}
// Remove edges from "root" to each SafePoint at a backward branch.
// They were inserted during parsing (see add_safepoint()) to make
// infinite loops without calls or exceptions visible to root, i.e.,
// useful.
void Compile::remove_root_to_sfpts_edges(PhaseIterGVN& igvn) {
Node *r = root();
if (r != NULL) {
for (uint i = r->req(); i < r->len(); ++i) {
Node *n = r->in(i);
if (n != NULL && n->is_SafePoint()) {
r->rm_prec(i);
if (n->outcnt() == 0) {
igvn.remove_dead_node(n);
}
--i;
}
}
}
}
//------------------------------Optimize---------------------------------------
// Given a graph, optimize it.
void Compile::Optimize() {
TracePhase t1("optimizer", &_t_optimizer, true);
#ifndef PRODUCT
if (env()->break_at_compile()) {
BREAKPOINT;
}
#endif
ResourceMark rm;
int loop_opts_cnt;
NOT_PRODUCT( verify_graph_edges(); )
print_method(PHASE_AFTER_PARSING);
{
// Iterative Global Value Numbering, including ideal transforms
// Initialize IterGVN with types and values from parse-time GVN
PhaseIterGVN igvn(initial_gvn());
{
NOT_PRODUCT( TracePhase t2("iterGVN", &_t_iterGVN, TimeCompiler); )
igvn.optimize();
}
print_method(PHASE_ITER_GVN1, 2);
if (failing()) return;
{
NOT_PRODUCT( TracePhase t2("incrementalInline", &_t_incrInline, TimeCompiler); )
inline_incrementally(igvn);
}
print_method(PHASE_INCREMENTAL_INLINE, 2);
if (failing()) return;
if (eliminate_boxing()) {
NOT_PRODUCT( TracePhase t2("incrementalInline", &_t_incrInline, TimeCompiler); )
// Inline valueOf() methods now.
inline_boxing_calls(igvn);
if (AlwaysIncrementalInline) {
inline_incrementally(igvn);
}
print_method(PHASE_INCREMENTAL_BOXING_INLINE, 2);
if (failing()) return;
}
// Now that all inlining is over, cut edge from root to loop
// safepoints
remove_root_to_sfpts_edges(igvn);
// Remove the speculative part of types and clean up the graph from
// the extra CastPP nodes whose only purpose is to carry them. Do
// that early so that optimizations are not disrupted by the extra
// CastPP nodes.
remove_speculative_types(igvn);
// No more new expensive nodes will be added to the list from here
// so keep only the actual candidates for optimizations.
cleanup_expensive_nodes(igvn);
if (!failing() && RenumberLiveNodes && live_nodes() + NodeLimitFudgeFactor < unique()) {
NOT_PRODUCT(Compile::TracePhase t2("", &_t_renumberLive, TimeCompiler);)
initial_gvn()->replace_with(&igvn);
for_igvn()->clear();
Unique_Node_List new_worklist(C->comp_arena());
{
ResourceMark rm;
PhaseRenumberLive prl = PhaseRenumberLive(initial_gvn(), for_igvn(), &new_worklist);
}
set_for_igvn(&new_worklist);
igvn = PhaseIterGVN(initial_gvn());
igvn.optimize();
}
// Perform escape analysis
if (_do_escape_analysis && ConnectionGraph::has_candidates(this)) {
if (has_loops()) {
// Cleanup graph (remove dead nodes).
TracePhase t2("idealLoop", &_t_idealLoop, true);
PhaseIdealLoop ideal_loop( igvn, false, true );
if (major_progress()) print_method(PHASE_PHASEIDEAL_BEFORE_EA, 2);
if (failing()) return;
}
ConnectionGraph::do_analysis(this, &igvn);
if (failing()) return;
// Optimize out fields loads from scalar replaceable allocations.
igvn.optimize();
print_method(PHASE_ITER_GVN_AFTER_EA, 2);
if (failing()) return;
if (congraph() != NULL && macro_count() > 0) {
NOT_PRODUCT( TracePhase t2("macroEliminate", &_t_macroEliminate, TimeCompiler); )
PhaseMacroExpand mexp(igvn);
mexp.eliminate_macro_nodes();
igvn.set_delay_transform(false);
igvn.optimize();
print_method(PHASE_ITER_GVN_AFTER_ELIMINATION, 2);
if (failing()) return;
}
}
// Loop transforms on the ideal graph. Range Check Elimination,
// peeling, unrolling, etc.
// Set loop opts counter
loop_opts_cnt = num_loop_opts();
if((loop_opts_cnt > 0) && (has_loops() || has_split_ifs())) {
{
TracePhase t2("idealLoop", &_t_idealLoop, true);
PhaseIdealLoop ideal_loop( igvn, true );
loop_opts_cnt--;
if (major_progress()) print_method(PHASE_PHASEIDEALLOOP1, 2);
if (failing()) return;
}
// Loop opts pass if partial peeling occurred in previous pass
if(PartialPeelLoop && major_progress() && (loop_opts_cnt > 0)) {
TracePhase t3("idealLoop", &_t_idealLoop, true);
PhaseIdealLoop ideal_loop( igvn, false );
loop_opts_cnt--;
if (major_progress()) print_method(PHASE_PHASEIDEALLOOP2, 2);
if (failing()) return;
}
// Loop opts pass for loop-unrolling before CCP
if(major_progress() && (loop_opts_cnt > 0)) {
TracePhase t4("idealLoop", &_t_idealLoop, true);
PhaseIdealLoop ideal_loop( igvn, false );
loop_opts_cnt--;
if (major_progress()) print_method(PHASE_PHASEIDEALLOOP3, 2);
}
if (!failing()) {
// Verify that last round of loop opts produced a valid graph
NOT_PRODUCT( TracePhase t2("idealLoopVerify", &_t_idealLoopVerify, TimeCompiler); )
PhaseIdealLoop::verify(igvn);
}
}
if (failing()) return;
// Conditional Constant Propagation;
PhaseCCP ccp( &igvn );
assert( true, "Break here to ccp.dump_nodes_and_types(_root,999,1)");
{
TracePhase t2("ccp", &_t_ccp, true);
ccp.do_transform();
}
print_method(PHASE_CPP1, 2);
assert( true, "Break here to ccp.dump_old2new_map()");
// Iterative Global Value Numbering, including ideal transforms
{
NOT_PRODUCT( TracePhase t2("iterGVN2", &_t_iterGVN2, TimeCompiler); )
igvn = ccp;
igvn.optimize();
}
print_method(PHASE_ITER_GVN2, 2);
if (failing()) return;
// Loop transforms on the ideal graph. Range Check Elimination,
// peeling, unrolling, etc.
if(loop_opts_cnt > 0) {
debug_only( int cnt = 0; );
while(major_progress() && (loop_opts_cnt > 0)) {
TracePhase t2("idealLoop", &_t_idealLoop, true);
assert( cnt++ < 40, "infinite cycle in loop optimization" );
PhaseIdealLoop ideal_loop( igvn, true);
loop_opts_cnt--;
if (major_progress()) print_method(PHASE_PHASEIDEALLOOP_ITERATIONS, 2);
if (failing()) return;
}
}
{
// Verify that all previous optimizations produced a valid graph
// at least to this point, even if no loop optimizations were done.
NOT_PRODUCT( TracePhase t2("idealLoopVerify", &_t_idealLoopVerify, TimeCompiler); )
PhaseIdealLoop::verify(igvn);
}
if (range_check_cast_count() > 0) {
// No more loop optimizations. Remove all range check dependent CastIINodes.
C->remove_range_check_casts(igvn);
igvn.optimize();
}
{
NOT_PRODUCT( TracePhase t2("macroExpand", &_t_macroExpand, TimeCompiler); )
PhaseMacroExpand mex(igvn);
if (mex.expand_macro_nodes()) {
assert(failing(), "must bail out w/ explicit message");
return;
}
}
} // (End scope of igvn; run destructor if necessary for asserts.)
dump_inlining();
// A method with only infinite loops has no edges entering loops from root
{
NOT_PRODUCT( TracePhase t2("graphReshape", &_t_graphReshaping, TimeCompiler); )
if (final_graph_reshaping()) {
assert(failing(), "must bail out w/ explicit message");
return;
}
}
print_method(PHASE_OPTIMIZE_FINISHED, 2);
}
//------------------------------Code_Gen---------------------------------------
// Given a graph, generate code for it
void Compile::Code_Gen() {
if (failing()) {
return;
}
// Perform instruction selection. You might think we could reclaim Matcher
// memory PDQ, but actually the Matcher is used in generating spill code.
// Internals of the Matcher (including some VectorSets) must remain live
// for awhile - thus I cannot reclaim Matcher memory lest a VectorSet usage
// set a bit in reclaimed memory.
// In debug mode can dump m._nodes.dump() for mapping of ideal to machine
// nodes. Mapping is only valid at the root of each matched subtree.
NOT_PRODUCT( verify_graph_edges(); )
Matcher matcher;
_matcher = &matcher;
{
TracePhase t2("matcher", &_t_matcher, true);
matcher.match();
}
// In debug mode can dump m._nodes.dump() for mapping of ideal to machine
// nodes. Mapping is only valid at the root of each matched subtree.
NOT_PRODUCT( verify_graph_edges(); )
// If you have too many nodes, or if matching has failed, bail out
check_node_count(0, "out of nodes matching instructions");
if (failing()) {
return;
}
// Build a proper-looking CFG
PhaseCFG cfg(node_arena(), root(), matcher);
_cfg = &cfg;
{
NOT_PRODUCT( TracePhase t2("scheduler", &_t_scheduler, TimeCompiler); )
bool success = cfg.do_global_code_motion();
if (!success) {
return;
}
print_method(PHASE_GLOBAL_CODE_MOTION, 2);
NOT_PRODUCT( verify_graph_edges(); )
debug_only( cfg.verify(); )
}
PhaseChaitin regalloc(unique(), cfg, matcher);
_regalloc = ®alloc;
{
TracePhase t2("regalloc", &_t_registerAllocation, true);
// Perform register allocation. After Chaitin, use-def chains are
// no longer accurate (at spill code) and so must be ignored.
// Node->LRG->reg mappings are still accurate.
_regalloc->Register_Allocate();
// Bail out if the allocator builds too many nodes
if (failing()) {
return;
}
}
// Prior to register allocation we kept empty basic blocks in case the
// the allocator needed a place to spill. After register allocation we
// are not adding any new instructions. If any basic block is empty, we
// can now safely remove it.
{
NOT_PRODUCT( TracePhase t2("blockOrdering", &_t_blockOrdering, TimeCompiler); )
cfg.remove_empty_blocks();
if (do_freq_based_layout()) {
PhaseBlockLayout layout(cfg);
} else {
cfg.set_loop_alignment();
}
cfg.fixup_flow();
}
// Apply peephole optimizations
if( OptoPeephole ) {
NOT_PRODUCT( TracePhase t2("peephole", &_t_peephole, TimeCompiler); )
PhasePeephole peep( _regalloc, cfg);
peep.do_transform();
}
// Do late expand if CPU requires this.
if (Matcher::require_postalloc_expand) {
NOT_PRODUCT(TracePhase t2c("postalloc_expand", &_t_postalloc_expand, true));
cfg.postalloc_expand(_regalloc);
}
// Convert Nodes to instruction bits in a buffer
{
// %%%% workspace merge brought two timers together for one job
TracePhase t2a("output", &_t_output, true);
NOT_PRODUCT( TraceTime t2b(NULL, &_t_codeGeneration, TimeCompiler, false); )
Output();
}
print_method(PHASE_FINAL_CODE);
// He's dead, Jim.
_cfg = (PhaseCFG*)((intptr_t)0xdeadbeef);
_regalloc = (PhaseChaitin*)((intptr_t)0xdeadbeef);
}
//------------------------------dump_asm---------------------------------------
// Dump formatted assembly
#ifndef PRODUCT
void Compile::dump_asm(int *pcs, uint pc_limit) {
bool cut_short = false;
tty->print_cr("#");
tty->print("# "); _tf->dump(); tty->cr();
tty->print_cr("#");
// For all blocks
int pc = 0x0; // Program counter
char starts_bundle = ' ';
_regalloc->dump_frame();
Node *n = NULL;
for (uint i = 0; i < _cfg->number_of_blocks(); i++) {
if (VMThread::should_terminate()) {
cut_short = true;
break;
}
Block* block = _cfg->get_block(i);
if (block->is_connector() && !Verbose) {
continue;
}
n = block->head();
if (pcs && n->_idx < pc_limit) {
tty->print("%3.3x ", pcs[n->_idx]);
} else {
tty->print(" ");
}
block->dump_head(_cfg);
if (block->is_connector()) {
tty->print_cr(" # Empty connector block");
} else if (block->num_preds() == 2 && block->pred(1)->is_CatchProj() && block->pred(1)->as_CatchProj()->_con == CatchProjNode::fall_through_index) {
tty->print_cr(" # Block is sole successor of call");
}
// For all instructions
Node *delay = NULL;
for (uint j = 0; j < block->number_of_nodes(); j++) {
if (VMThread::should_terminate()) {
cut_short = true;
break;
}
n = block->get_node(j);
if (valid_bundle_info(n)) {
Bundle* bundle = node_bundling(n);
if (bundle->used_in_unconditional_delay()) {
delay = n;
continue;
}
if (bundle->starts_bundle()) {
starts_bundle = '+';
}
}
if (WizardMode) {
n->dump();
}
if( !n->is_Region() && // Dont print in the Assembly
!n->is_Phi() && // a few noisely useless nodes
!n->is_Proj() &&
!n->is_MachTemp() &&
!n->is_SafePointScalarObject() &&
!n->is_Catch() && // Would be nice to print exception table targets
!n->is_MergeMem() && // Not very interesting
!n->is_top() && // Debug info table constants
!(n->is_Con() && !n->is_Mach())// Debug info table constants
) {
if (pcs && n->_idx < pc_limit)
tty->print("%3.3x", pcs[n->_idx]);
else
tty->print(" ");
tty->print(" %c ", starts_bundle);
starts_bundle = ' ';
tty->print("\t");
n->format(_regalloc, tty);
tty->cr();
}
// If we have an instruction with a delay slot, and have seen a delay,
// then back up and print it
if (valid_bundle_info(n) && node_bundling(n)->use_unconditional_delay()) {
assert(delay != NULL, "no unconditional delay instruction");
if (WizardMode) delay->dump();
if (node_bundling(delay)->starts_bundle())
starts_bundle = '+';
if (pcs && n->_idx < pc_limit)
tty->print("%3.3x", pcs[n->_idx]);
else
tty->print(" ");
tty->print(" %c ", starts_bundle);
starts_bundle = ' ';
tty->print("\t");
delay->format(_regalloc, tty);
tty->cr();
delay = NULL;
}
// Dump the exception table as well
if( n->is_Catch() && (Verbose || WizardMode) ) {
// Print the exception table for this offset
_handler_table.print_subtable_for(pc);
}
}
if (pcs && n->_idx < pc_limit)
tty->print_cr("%3.3x", pcs[n->_idx]);
else
tty->cr();
assert(cut_short || delay == NULL, "no unconditional delay branch");
} // End of per-block dump
tty->cr();
if (cut_short) tty->print_cr("*** disassembly is cut short ***");
}
#endif
//------------------------------Final_Reshape_Counts---------------------------
// This class defines counters to help identify when a method
// may/must be executed using hardware with only 24-bit precision.
struct Final_Reshape_Counts : public StackObj {
int _call_count; // count non-inlined 'common' calls
int _float_count; // count float ops requiring 24-bit precision
int _double_count; // count double ops requiring more precision
int _java_call_count; // count non-inlined 'java' calls
int _inner_loop_count; // count loops which need alignment
VectorSet _visited; // Visitation flags
Node_List _tests; // Set of IfNodes & PCTableNodes
Final_Reshape_Counts() :
_call_count(0), _float_count(0), _double_count(0),
_java_call_count(0), _inner_loop_count(0),
_visited( Thread::current()->resource_area() ) { }
void inc_call_count () { _call_count ++; }
void inc_float_count () { _float_count ++; }
void inc_double_count() { _double_count++; }
void inc_java_call_count() { _java_call_count++; }
void inc_inner_loop_count() { _inner_loop_count++; }
int get_call_count () const { return _call_count ; }
int get_float_count () const { return _float_count ; }
int get_double_count() const { return _double_count; }
int get_java_call_count() const { return _java_call_count; }
int get_inner_loop_count() const { return _inner_loop_count; }
};
#ifdef ASSERT
static bool oop_offset_is_sane(const TypeInstPtr* tp) {
ciInstanceKlass *k = tp->klass()->as_instance_klass();
// Make sure the offset goes inside the instance layout.
return k->contains_field_offset(tp->offset());
// Note that OffsetBot and OffsetTop are very negative.
}
#endif
// Eliminate trivially redundant StoreCMs and accumulate their
// precedence edges.
void Compile::eliminate_redundant_card_marks(Node* n) {
assert(n->Opcode() == Op_StoreCM, "expected StoreCM");
if (n->in(MemNode::Address)->outcnt() > 1) {
// There are multiple users of the same address so it might be
// possible to eliminate some of the StoreCMs
Node* mem = n->in(MemNode::Memory);
Node* adr = n->in(MemNode::Address);
Node* val = n->in(MemNode::ValueIn);
Node* prev = n;
bool done = false;
// Walk the chain of StoreCMs eliminating ones that match. As
// long as it's a chain of single users then the optimization is
// safe. Eliminating partially redundant StoreCMs would require
// cloning copies down the other paths.
while (mem->Opcode() == Op_StoreCM && mem->outcnt() == 1 && !done) {
if (adr == mem->in(MemNode::Address) &&
val == mem->in(MemNode::ValueIn)) {
// redundant StoreCM
if (mem->req() > MemNode::OopStore) {
// Hasn't been processed by this code yet.
n->add_prec(mem->in(MemNode::OopStore));
} else {
// Already converted to precedence edge
for (uint i = mem->req(); i < mem->len(); i++) {
// Accumulate any precedence edges
if (mem->in(i) != NULL) {
n->add_prec(mem->in(i));
}
}
// Everything above this point has been processed.
done = true;
}
// Eliminate the previous StoreCM
prev->set_req(MemNode::Memory, mem->in(MemNode::Memory));
assert(mem->outcnt() == 0, "should be dead");
mem->disconnect_inputs(NULL, this);
} else {
prev = mem;
}
mem = prev->in(MemNode::Memory);
}
}
}
//------------------------------final_graph_reshaping_impl----------------------
// Implement items 1-5 from final_graph_reshaping below.
void Compile::final_graph_reshaping_impl( Node *n, Final_Reshape_Counts &frc) {
if ( n->outcnt() == 0 ) return; // dead node
uint nop = n->Opcode();
// Check for 2-input instruction with "last use" on right input.
// Swap to left input. Implements item (2).
if( n->req() == 3 && // two-input instruction
n->in(1)->outcnt() > 1 && // left use is NOT a last use
(!n->in(1)->is_Phi() || n->in(1)->in(2) != n) && // it is not data loop
n->in(2)->outcnt() == 1 &&// right use IS a last use
!n->in(2)->is_Con() ) { // right use is not a constant
// Check for commutative opcode
switch( nop ) {
case Op_AddI: case Op_AddF: case Op_AddD: case Op_AddL:
case Op_MaxI: case Op_MinI:
case Op_MulI: case Op_MulF: case Op_MulD: case Op_MulL:
case Op_AndL: case Op_XorL: case Op_OrL:
case Op_AndI: case Op_XorI: case Op_OrI: {
// Move "last use" input to left by swapping inputs
n->swap_edges(1, 2);
break;
}
default:
break;
}
}
ssssssss60
最新推荐文章于 2024-10-05 23:06:03 发布