ssssssss60


class Matcher;
class Node;
class   RegionNode;
class   TypeNode;
class     PhiNode;
class   GotoNode;
class   MultiNode;
class     MultiBranchNode;
class       IfNode;
class       PCTableNode;
class         JumpNode;
class         CatchNode;
class       NeverBranchNode;
class   ProjNode;
class     CProjNode;
class       IfTrueNode;
class       IfFalseNode;
class       CatchProjNode;
class     JProjNode;
class       JumpProjNode;
class     SCMemProjNode;
class PhaseIdealLoop;

//------------------------------RegionNode-------------------------------------
// The class of RegionNodes, which can be mapped to basic blocks in the
// program.  Their inputs point to Control sources.  PhiNodes (described
// below) have an input point to a RegionNode.  Merged data inputs to PhiNodes
// correspond 1-to-1 with RegionNode inputs.  The zero input of a PhiNode is
// the RegionNode, and the zero input of the RegionNode is itself.
class RegionNode : public Node {
public:
  // Node layout (parallels PhiNode):
  enum { Region,                // Generally points to self.
         Control                // Control arcs are [1..len)
  };

  RegionNode( uint required ) : Node(required) {
    init_class_id(Class_Region);
    init_req(0,this);
  }

  Node* is_copy() const {
    const Node* r = _in[Region];
    if (r == NULL)
      return nonnull_req();
    return NULL;  // not a copy!
  }
  PhiNode* has_phi() const;        // returns an arbitrary phi user, or NULL
  PhiNode* has_unique_phi() const; // returns the unique phi user, or NULL
  // Is this region node unreachable from root?
  bool is_unreachable_region(PhaseGVN *phase) const;
  virtual int Opcode() const;
  virtual bool pinned() const { return (const Node *)in(0) == this; }
  virtual bool  is_CFG   () const { return true; }
  virtual uint hash() const { return NO_HASH; }  // CFG nodes do not hash
  virtual bool depends_only_on_test() const { return false; }
  virtual const Type *bottom_type() const { return Type::CONTROL; }
  virtual const Type *Value( PhaseTransform *phase ) const;
  virtual Node *Identity( PhaseTransform *phase );
  virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
  virtual const RegMask &out_RegMask() const;
  bool try_clean_mem_phi(PhaseGVN *phase);
};

//------------------------------JProjNode--------------------------------------
// jump projection for node that produces multiple control-flow paths
class JProjNode : public ProjNode {
 public:
  JProjNode( Node* ctrl, uint idx ) : ProjNode(ctrl,idx) {}
  virtual int Opcode() const;
  virtual bool  is_CFG() const { return true; }
  virtual uint  hash() const { return NO_HASH; }  // CFG nodes do not hash
  virtual const Node* is_block_proj() const { return in(0); }
  virtual const RegMask& out_RegMask() const;
  virtual uint  ideal_reg() const { return 0; }
};

//------------------------------PhiNode----------------------------------------
// PhiNodes merge values from different Control paths.  Slot 0 points to the
// controlling RegionNode.  Other slots map 1-for-1 with incoming control flow
// paths to the RegionNode.  For speed reasons (to avoid another pass) we
// can turn PhiNodes into copys in-place by NULL'ing out their RegionNode
// input in slot 0.
class PhiNode : public TypeNode {
  const TypePtr* const _adr_type; // non-null only for Type::MEMORY nodes.
  // The following fields are only used for data PhiNodes to indicate
  // that the PhiNode represents the value of a known instance field.
        int _inst_mem_id; // Instance memory id (node index of the memory Phi)
  const int _inst_id;     // Instance id of the memory slice.
  const int _inst_index;  // Alias index of the instance memory slice.
  // Array elements references have the same alias_idx but different offset.
  const int _inst_offset; // Offset of the instance memory slice.
  // Size is bigger to hold the _adr_type field.
  virtual uint hash() const;    // Check the type
  virtual uint cmp( const Node &n ) const;
  virtual uint size_of() const { return sizeof(*this); }

  // Determine if CMoveNode::is_cmove_id can be used at this join point.
  Node* is_cmove_id(PhaseTransform* phase, int true_path);

public:
  // Node layout (parallels RegionNode):
  enum { Region,                // Control input is the Phi's region.
         Input                  // Input values are [1..len)
  };

  PhiNode( Node *r, const Type *t, const TypePtr* at = NULL,
           const int imid = -1,
           const int iid = TypeOopPtr::InstanceTop,
           const int iidx = Compile::AliasIdxTop,
           const int ioffs = Type::OffsetTop )
    : TypeNode(t,r->req()),
      _adr_type(at),
      _inst_mem_id(imid),
      _inst_id(iid),
      _inst_index(iidx),
      _inst_offset(ioffs)
  {
    init_class_id(Class_Phi);
    init_req(0, r);
    verify_adr_type();
  }
  // create a new phi with in edges matching r and set (initially) to x
  static PhiNode* make( Node* r, Node* x );
  // extra type arguments override the new phi's bottom_type and adr_type
  static PhiNode* make( Node* r, Node* x, const Type *t, const TypePtr* at = NULL );
  // create a new phi with narrowed memory type
  PhiNode* slice_memory(const TypePtr* adr_type) const;
  PhiNode* split_out_instance(const TypePtr* at, PhaseIterGVN *igvn) const;
  // like make(r, x), but does not initialize the in edges to x
  static PhiNode* make_blank( Node* r, Node* x );

  // Accessors
  RegionNode* region() const { Node* r = in(Region); assert(!r || r->is_Region(), ""); return (RegionNode*)r; }

  Node* is_copy() const {
    // The node is a real phi if _in[0] is a Region node.
    DEBUG_ONLY(const Node* r = _in[Region];)
    assert(r != NULL && r->is_Region(), "Not valid control");
    return NULL;  // not a copy!
  }

  bool is_tripcount() const;

  // Determine a unique non-trivial input, if any.
  // Ignore casts if it helps.  Return NULL on failure.
  Node* unique_input(PhaseTransform *phase);

  // Check for a simple dead loop.
  enum LoopSafety { Safe = 0, Unsafe, UnsafeLoop };
  LoopSafety simple_data_loop_check(Node *in) const;
  // Is it unsafe data loop? It becomes a dead loop if this phi node removed.
  bool is_unsafe_data_reference(Node *in) const;
  int  is_diamond_phi(bool check_control_only = false) const;
  virtual int Opcode() const;
  virtual bool pinned() const { return in(0) != 0; }
  virtual const TypePtr *adr_type() const { verify_adr_type(true); return _adr_type; }

  void  set_inst_mem_id(int inst_mem_id) { _inst_mem_id = inst_mem_id; }
  const int inst_mem_id() const { return _inst_mem_id; }
  const int inst_id()     const { return _inst_id; }
  const int inst_index()  const { return _inst_index; }
  const int inst_offset() const { return _inst_offset; }
  bool is_same_inst_field(const Type* tp, int mem_id, int id, int index, int offset) {
    return type()->basic_type() == tp->basic_type() &&
           inst_mem_id() == mem_id &&
           inst_id()     == id     &&
           inst_index()  == index  &&
           inst_offset() == offset &&
           type()->higher_equal(tp);
  }

  virtual const Type *Value( PhaseTransform *phase ) const;
  virtual Node *Identity( PhaseTransform *phase );
  virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
  virtual const RegMask &out_RegMask() const;
  virtual const RegMask &in_RegMask(uint) const;
#ifndef PRODUCT
  virtual void dump_spec(outputStream *st) const;
#endif
#ifdef ASSERT
  void verify_adr_type(VectorSet& visited, const TypePtr* at) const;
  void verify_adr_type(bool recursive = false) const;
#else //ASSERT
  void verify_adr_type(bool recursive = false) const {}
#endif //ASSERT
};

//------------------------------GotoNode---------------------------------------
// GotoNodes perform direct branches.
class GotoNode : public Node {
public:
  GotoNode( Node *control ) : Node(control) {}
  virtual int Opcode() const;
  virtual bool pinned() const { return true; }
  virtual bool  is_CFG() const { return true; }
  virtual uint hash() const { return NO_HASH; }  // CFG nodes do not hash
  virtual const Node *is_block_proj() const { return this; }
  virtual bool depends_only_on_test() const { return false; }
  virtual const Type *bottom_type() const { return Type::CONTROL; }
  virtual const Type *Value( PhaseTransform *phase ) const;
  virtual Node *Identity( PhaseTransform *phase );
  virtual const RegMask &out_RegMask() const;
};

//------------------------------CProjNode--------------------------------------
// control projection for node that produces multiple control-flow paths
class CProjNode : public ProjNode {
public:
  CProjNode( Node *ctrl, uint idx ) : ProjNode(ctrl,idx) {}
  virtual int Opcode() const;
  virtual bool  is_CFG() const { return true; }
  virtual uint hash() const { return NO_HASH; }  // CFG nodes do not hash
  virtual const Node *is_block_proj() const { return in(0); }
  virtual const RegMask &out_RegMask() const;
  virtual uint ideal_reg() const { return 0; }
};

//---------------------------MultiBranchNode-----------------------------------
// This class defines a MultiBranchNode, a MultiNode which yields multiple
// control values. These are distinguished from other types of MultiNodes
// which yield multiple values, but control is always and only projection #0.
class MultiBranchNode : public MultiNode {
public:
  MultiBranchNode( uint required ) : MultiNode(required) {
    init_class_id(Class_MultiBranch);
  }
  // returns required number of users to be well formed.
  virtual int required_outcnt() const = 0;
};

//------------------------------IfNode-----------------------------------------
// Output selected Control, based on a boolean test
class IfNode : public MultiBranchNode {
  // Size is bigger to hold the probability field.  However, _prob does not
  // change the semantics so it does not appear in the hash & cmp functions.
  virtual uint size_of() const { return sizeof(*this); }
public:

  // Degrees of branch prediction probability by order of magnitude:
  // PROB_UNLIKELY_1e(N) is a 1 in 1eN chance.
  // PROB_LIKELY_1e(N) is a 1 - PROB_UNLIKELY_1e(N)
#define PROB_UNLIKELY_MAG(N)    (1e- ## N ## f)
#define PROB_LIKELY_MAG(N)      (1.0f-PROB_UNLIKELY_MAG(N))

  // Maximum and minimum branch prediction probabilties
  // 1 in 1,000,000 (magnitude 6)
  //
  // Although PROB_NEVER == PROB_MIN and PROB_ALWAYS == PROB_MAX
  // they are used to distinguish different situations:
  //
  // The name PROB_MAX (PROB_MIN) is for probabilities which correspond to
  // very likely (unlikely) but with a concrete possibility of a rare
  // contrary case.  These constants would be used for pinning
  // measurements, and as measures for assertions that have high
  // confidence, but some evidence of occasional failure.
  //
  // The name PROB_ALWAYS (PROB_NEVER) is to stand for situations for which
  // there is no evidence at all that the contrary case has ever occurred.

#define PROB_NEVER              PROB_UNLIKELY_MAG(6)
#define PROB_ALWAYS             PROB_LIKELY_MAG(6)

#define PROB_MIN                PROB_UNLIKELY_MAG(6)
#define PROB_MAX                PROB_LIKELY_MAG(6)

  // Static branch prediction probabilities
  // 1 in 10 (magnitude 1)
#define PROB_STATIC_INFREQUENT  PROB_UNLIKELY_MAG(1)
#define PROB_STATIC_FREQUENT    PROB_LIKELY_MAG(1)

  // Fair probability 50/50
#define PROB_FAIR               (0.5f)

  // Unknown probability sentinel
#define PROB_UNKNOWN            (-1.0f)

  // Probability "constructors", to distinguish as a probability any manifest
  // constant without a names
#define PROB_LIKELY(x)          ((float) (x))
#define PROB_UNLIKELY(x)        (1.0f - (float)(x))

  // Other probabilities in use, but without a unique name, are documented
  // here for lack of a better place:
  //
  // 1 in 1000 probabilities (magnitude 3):
  //     threshold for converting to conditional move
  //     likelihood of null check failure if a null HAS been seen before
  //     likelihood of slow path taken in library calls
  //
  // 1 in 10,000 probabilities (magnitude 4):
  //     threshold for making an uncommon trap probability more extreme
  //     threshold for for making a null check implicit
  //     likelihood of needing a gc if eden top moves during an allocation
  //     likelihood of a predicted call failure
  //
  // 1 in 100,000 probabilities (magnitude 5):
  //     threshold for ignoring counts when estimating path frequency
  //     likelihood of FP clipping failure
  //     likelihood of catching an exception from a try block
  //     likelihood of null check failure if a null has NOT been seen before
  //
  // Magic manifest probabilities such as 0.83, 0.7, ... can be found in
  // gen_subtype_check() and catch_inline_exceptions().

  float _prob;                  // Probability of true path being taken.
  float _fcnt;                  // Frequency counter
  IfNode( Node *control, Node *b, float p, float fcnt )
    : MultiBranchNode(2), _prob(p), _fcnt(fcnt) {
    init_class_id(Class_If);
    init_req(0,control);
    init_req(1,b);
  }
  virtual int Opcode() const;
  virtual bool pinned() const { return true; }
  virtual const Type *bottom_type() const { return TypeTuple::IFBOTH; }
  virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
  virtual const Type *Value( PhaseTransform *phase ) const;
  virtual int required_outcnt() const { return 2; }
  virtual const RegMask &out_RegMask() const;
  void dominated_by(Node* prev_dom, PhaseIterGVN* igvn);
  int is_range_check(Node* &range, Node* &index, jint &offset);
  Node* fold_compares(PhaseGVN* phase);
  static Node* up_one_dom(Node* curr, bool linear_only = false);

  // Takes the type of val and filters it through the test represented
  // by if_proj and returns a more refined type if one is produced.
  // Returns NULL is it couldn't improve the type.
  static const TypeInt* filtered_int_type(PhaseGVN* phase, Node* val, Node* if_proj);

#ifndef PRODUCT
  virtual void dump_spec(outputStream *st) const;
#endif
};

class IfTrueNode : public CProjNode {
public:
  IfTrueNode( IfNode *ifnode ) : CProjNode(ifnode,1) {
    init_class_id(Class_IfTrue);
  }
  virtual int Opcode() const;
  virtual Node *Identity( PhaseTransform *phase );
};

class IfFalseNode : public CProjNode {
public:
  IfFalseNode( IfNode *ifnode ) : CProjNode(ifnode,0) {
    init_class_id(Class_IfFalse);
  }
  virtual int Opcode() const;
  virtual Node *Identity( PhaseTransform *phase );
};


//------------------------------PCTableNode------------------------------------
// Build an indirect branch table.  Given a control and a table index,
// control is passed to the Projection matching the table index.  Used to
// implement switch statements and exception-handling capabilities.
// Undefined behavior if passed-in index is not inside the table.
class PCTableNode : public MultiBranchNode {
  virtual uint hash() const;    // Target count; table size
  virtual uint cmp( const Node &n ) const;
  virtual uint size_of() const { return sizeof(*this); }

public:
  const uint _size;             // Number of targets

  PCTableNode( Node *ctrl, Node *idx, uint size ) : MultiBranchNode(2), _size(size) {
    init_class_id(Class_PCTable);
    init_req(0, ctrl);
    init_req(1, idx);
  }
  virtual int Opcode() const;
  virtual const Type *Value( PhaseTransform *phase ) const;
  virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
  virtual const Type *bottom_type() const;
  virtual bool pinned() const { return true; }
  virtual int required_outcnt() const { return _size; }
};

//------------------------------JumpNode---------------------------------------
// Indirect branch.  Uses PCTable above to implement a switch statement.
// It emits as a table load and local branch.
class JumpNode : public PCTableNode {
public:
  JumpNode( Node* control, Node* switch_val, uint size) : PCTableNode(control, switch_val, size) {
    init_class_id(Class_Jump);
  }
  virtual int   Opcode() const;
  virtual const RegMask& out_RegMask() const;
  virtual const Node* is_block_proj() const { return this; }
};

class JumpProjNode : public JProjNode {
  virtual uint hash() const;
  virtual uint cmp( const Node &n ) const;
  virtual uint size_of() const { return sizeof(*this); }

 private:
  const int  _dest_bci;
  const uint _proj_no;
  const int  _switch_val;
 public:
  JumpProjNode(Node* jumpnode, uint proj_no, int dest_bci, int switch_val)
    : JProjNode(jumpnode, proj_no), _dest_bci(dest_bci), _proj_no(proj_no), _switch_val(switch_val) {
    init_class_id(Class_JumpProj);
  }

  virtual int Opcode() const;
  virtual const Type* bottom_type() const { return Type::CONTROL; }
  int  dest_bci()    const { return _dest_bci; }
  int  switch_val()  const { return _switch_val; }
  uint proj_no()     const { return _proj_no; }
#ifndef PRODUCT
  virtual void dump_spec(outputStream *st) const;
#endif
};

//------------------------------CatchNode--------------------------------------
// Helper node to fork exceptions.  "Catch" catches any exceptions thrown by
// a just-prior call.  Looks like a PCTableNode but emits no code - just the
// table.  The table lookup and branch is implemented by RethrowNode.
class CatchNode : public PCTableNode {
public:
  CatchNode( Node *ctrl, Node *idx, uint size ) : PCTableNode(ctrl,idx,size){
    init_class_id(Class_Catch);
  }
  virtual int Opcode() const;
  virtual const Type *Value( PhaseTransform *phase ) const;
};

// CatchProjNode controls which exception handler is targetted after a call.
// It is passed in the bci of the target handler, or no_handler_bci in case
// the projection doesn't lead to an exception handler.
class CatchProjNode : public CProjNode {
  virtual uint hash() const;
  virtual uint cmp( const Node &n ) const;
  virtual uint size_of() const { return sizeof(*this); }

private:
  const int _handler_bci;

public:
  enum {
    fall_through_index =  0,      // the fall through projection index
    catch_all_index    =  1,      // the projection index for catch-alls
    no_handler_bci     = -1       // the bci for fall through or catch-all projs
  };

  CatchProjNode(Node* catchnode, uint proj_no, int handler_bci)
    : CProjNode(catchnode, proj_no), _handler_bci(handler_bci) {
    init_class_id(Class_CatchProj);
    assert(proj_no != fall_through_index || handler_bci < 0, "fall through case must have bci < 0");
  }

  virtual int Opcode() const;
  virtual Node *Identity( PhaseTransform *phase );
  virtual const Type *bottom_type() const { return Type::CONTROL; }
  int  handler_bci() const        { return _handler_bci; }
  bool is_handler_proj() const    { return _handler_bci >= 0; }
#ifndef PRODUCT
  virtual void dump_spec(outputStream *st) const;
#endif
};


//---------------------------------CreateExNode--------------------------------
// Helper node to create the exception coming back from a call
class CreateExNode : public TypeNode {
public:
  CreateExNode(const Type* t, Node* control, Node* i_o) : TypeNode(t, 2) {
    init_req(0, control);
    init_req(1, i_o);
  }
  virtual int Opcode() const;
  virtual Node *Identity( PhaseTransform *phase );
  virtual bool pinned() const { return true; }
  uint match_edge(uint idx) const { return 0; }
  virtual uint ideal_reg() const { return Op_RegP; }
};

//------------------------------NeverBranchNode-------------------------------
// The never-taken branch.  Used to give the appearance of exiting infinite
// loops to those algorithms that like all paths to be reachable.  Encodes
// empty.
class NeverBranchNode : public MultiBranchNode {
public:
  NeverBranchNode( Node *ctrl ) : MultiBranchNode(1) { init_req(0,ctrl); }
  virtual int Opcode() const;
  virtual bool pinned() const { return true; };
  virtual const Type *bottom_type() const { return TypeTuple::IFBOTH; }
  virtual const Type *Value( PhaseTransform *phase ) const;
  virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
  virtual int required_outcnt() const { return 2; }
  virtual void emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { }
  virtual uint size(PhaseRegAlloc *ra_) const { return 0; }
#ifndef PRODUCT
  virtual void format( PhaseRegAlloc *, outputStream *st ) const;
#endif
};

#endif // SHARE_VM_OPTO_CFGNODE_HPP
C:\hotspot-69087d08d473\src\share\vm/opto/chaitin.cpp
/*
 * Copyright (c) 2000, 2018, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 *
 */

#include "precompiled.hpp"
#include "compiler/compileLog.hpp"
#include "compiler/oopMap.hpp"
#include "memory/allocation.inline.hpp"
#include "opto/addnode.hpp"
#include "opto/block.hpp"
#include "opto/callnode.hpp"
#include "opto/cfgnode.hpp"
#include "opto/chaitin.hpp"
#include "opto/coalesce.hpp"
#include "opto/connode.hpp"
#include "opto/idealGraphPrinter.hpp"
#include "opto/indexSet.hpp"
#include "opto/machnode.hpp"
#include "opto/memnode.hpp"
#include "opto/opcodes.hpp"
#include "opto/rootnode.hpp"

#ifndef PRODUCT
void LRG::dump() const {
  ttyLocker ttyl;
  tty->print("%d ",num_regs());
  _mask.dump();
  if( _msize_valid ) {
    if( mask_size() == compute_mask_size() ) tty->print(", #%d ",_mask_size);
    else tty->print(", #!!!_%d_vs_%d ",_mask_size,_mask.Size());
  } else {
    tty->print(", #?(%d) ",_mask.Size());
  }

  tty->print("EffDeg: ");
  if( _degree_valid ) tty->print( "%d ", _eff_degree );
  else tty->print("? ");

  if( is_multidef() ) {
    tty->print("MultiDef ");
    if (_defs != NULL) {
      tty->print("(");
      for (int i = 0; i < _defs->length(); i++) {
        tty->print("N%d ", _defs->at(i)->_idx);
      }
      tty->print(") ");
    }
  }
  else if( _def == 0 ) tty->print("Dead ");
  else tty->print("Def: N%d ",_def->_idx);

  tty->print("Cost:%4.2g Area:%4.2g Score:%4.2g ",_cost,_area, score());
  // Flags
  if( _is_oop ) tty->print("Oop ");
  if( _is_float ) tty->print("Float ");
  if( _is_vector ) tty->print("Vector ");
  if( _was_spilled1 ) tty->print("Spilled ");
  if( _was_spilled2 ) tty->print("Spilled2 ");
  if( _direct_conflict ) tty->print("Direct_conflict ");
  if( _fat_proj ) tty->print("Fat ");
  if( _was_lo ) tty->print("Lo ");
  if( _has_copy ) tty->print("Copy ");
  if( _at_risk ) tty->print("Risk ");

  if( _must_spill ) tty->print("Must_spill ");
  if( _is_bound ) tty->print("Bound ");
  if( _msize_valid ) {
    if( _degree_valid && lo_degree() ) tty->print("Trivial ");
  }

  tty->cr();
}
#endif

// Compute score from cost and area.  Low score is best to spill.
static double raw_score( double cost, double area ) {
  return cost - (area*RegisterCostAreaRatio) * 1.52588e-5;
}

double LRG::score() const {
  // Scale _area by RegisterCostAreaRatio/64K then subtract from cost.
  // Bigger area lowers score, encourages spilling this live range.
  // Bigger cost raise score, prevents spilling this live range.
  // (Note: 1/65536 is the magic constant below; I dont trust the C optimizer
  // to turn a divide by a constant into a multiply by the reciprical).
  double score = raw_score( _cost, _area);

  // Account for area.  Basically, LRGs covering large areas are better
  // to spill because more other LRGs get freed up.
  if( _area == 0.0 )            // No area?  Then no progress to spill
    return 1e35;

  if( _was_spilled2 )           // If spilled once before, we are unlikely
    return score + 1e30;        // to make progress again.

  if( _cost >= _area*3.0 )      // Tiny area relative to cost
    return score + 1e17;        // Probably no progress to spill

  if( (_cost+_cost) >= _area*3.0 ) // Small area relative to cost
    return score + 1e10;        // Likely no progress to spill

  return score;
}

#define NUMBUCKS 3

// Straight out of Tarjan's union-find algorithm
uint LiveRangeMap::find_compress(uint lrg) {
  uint cur = lrg;
  uint next = _uf_map.at(cur);
  while (next != cur) { // Scan chain of equivalences
    assert( next < cur, "always union smaller");
    cur = next; // until find a fixed-point
    next = _uf_map.at(cur);
  }

  // Core of union-find algorithm: update chain of
  // equivalences to be equal to the root.
  while (lrg != next) {
    uint tmp = _uf_map.at(lrg);
    _uf_map.at_put(lrg, next);
    lrg = tmp;
  }
  return lrg;
}

// Reset the Union-Find map to identity
void LiveRangeMap::reset_uf_map(uint max_lrg_id) {
  _max_lrg_id= max_lrg_id;
  // Force the Union-Find mapping to be at least this large
  _uf_map.at_put_grow(_max_lrg_id, 0);
  // Initialize it to be the ID mapping.
  for (uint i = 0; i < _max_lrg_id; ++i) {
    _uf_map.at_put(i, i);
  }
}

// Make all Nodes map directly to their final live range; no need for
// the Union-Find mapping after this call.
void LiveRangeMap::compress_uf_map_for_nodes() {
  // For all Nodes, compress mapping
  uint unique = _names.length();
  for (uint i = 0; i < unique; ++i) {
    uint lrg = _names.at(i);
    uint compressed_lrg = find(lrg);
    if (lrg != compressed_lrg) {
      _names.at_put(i, compressed_lrg);
    }
  }
}

// Like Find above, but no path compress, so bad asymptotic behavior
uint LiveRangeMap::find_const(uint lrg) const {
  if (!lrg) {
    return lrg; // Ignore the zero LRG
  }

  // Off the end?  This happens during debugging dumps when you got
  // brand new live ranges but have not told the allocator yet.
  if (lrg >= _max_lrg_id) {
    return lrg;
  }

  uint next = _uf_map.at(lrg);
  while (next != lrg) { // Scan chain of equivalences
    assert(next < lrg, "always union smaller");
    lrg = next; // until find a fixed-point
    next = _uf_map.at(lrg);
  }
  return next;
}

PhaseChaitin::PhaseChaitin(uint unique, PhaseCFG &cfg, Matcher &matcher)
  : PhaseRegAlloc(unique, cfg, matcher,
#ifndef PRODUCT
       print_chaitin_statistics
#else
       NULL
#endif
       )
  , _lrg_map(Thread::current()->resource_area(), unique)
  , _live(0)
  , _spilled_once(Thread::current()->resource_area())
  , _spilled_twice(Thread::current()->resource_area())
  , _lo_degree(0), _lo_stk_degree(0), _hi_degree(0), _simplified(0)
  , _oldphi(unique)
#ifndef PRODUCT
  , _trace_spilling(TraceSpilling || C->method_has_option("TraceSpilling"))
#endif
{
  NOT_PRODUCT( Compile::TracePhase t3("ctorChaitin", &_t_ctorChaitin, TimeCompiler); )

  _high_frequency_lrg = MIN2(float(OPTO_LRG_HIGH_FREQ), _cfg.get_outer_loop_frequency());

  // Build a list of basic blocks, sorted by frequency
  _blks = NEW_RESOURCE_ARRAY(Block *, _cfg.number_of_blocks());
  // Experiment with sorting strategies to speed compilation
  double  cutoff = BLOCK_FREQUENCY(1.0); // Cutoff for high frequency bucket
  Block **buckets[NUMBUCKS];             // Array of buckets
  uint    buckcnt[NUMBUCKS];             // Array of bucket counters
  double  buckval[NUMBUCKS];             // Array of bucket value cutoffs
  for (uint i = 0; i < NUMBUCKS; i++) {
    buckets[i] = NEW_RESOURCE_ARRAY(Block *, _cfg.number_of_blocks());
    buckcnt[i] = 0;
    // Bump by three orders of magnitude each time
    cutoff *= 0.001;
    buckval[i] = cutoff;
    for (uint j = 0; j < _cfg.number_of_blocks(); j++) {
      buckets[i][j] = NULL;
    }
  }
  // Sort blocks into buckets
  for (uint i = 0; i < _cfg.number_of_blocks(); i++) {
    for (uint j = 0; j < NUMBUCKS; j++) {
      if ((j == NUMBUCKS - 1) || (_cfg.get_block(i)->_freq > buckval[j])) {
        // Assign block to end of list for appropriate bucket
        buckets[j][buckcnt[j]++] = _cfg.get_block(i);
        break; // kick out of inner loop
      }
    }
  }
  // Dump buckets into final block array
  uint blkcnt = 0;
  for (uint i = 0; i < NUMBUCKS; i++) {
    for (uint j = 0; j < buckcnt[i]; j++) {
      _blks[blkcnt++] = buckets[i][j];
    }
  }

  assert(blkcnt == _cfg.number_of_blocks(), "Block array not totally filled");
}

// union 2 sets together.
void PhaseChaitin::Union( const Node *src_n, const Node *dst_n ) {
  uint src = _lrg_map.find(src_n);
  uint dst = _lrg_map.find(dst_n);
  assert(src, "");
  assert(dst, "");
  assert(src < _lrg_map.max_lrg_id(), "oob");
  assert(dst < _lrg_map.max_lrg_id(), "oob");
  assert(src < dst, "always union smaller");
  _lrg_map.uf_map(dst, src);
}

void PhaseChaitin::new_lrg(const Node *x, uint lrg) {
  // Make the Node->LRG mapping
  _lrg_map.extend(x->_idx,lrg);
  // Make the Union-Find mapping an identity function
  _lrg_map.uf_extend(lrg, lrg);
}


int PhaseChaitin::clone_projs(Block* b, uint idx, Node* orig, Node* copy, uint& max_lrg_id) {
  assert(b->find_node(copy) == (idx - 1), "incorrect insert index for copy kill projections");
  DEBUG_ONLY( Block* borig = _cfg.get_block_for_node(orig); )
  int found_projs = 0;
  uint cnt = orig->outcnt();
  for (uint i = 0; i < cnt; i++) {
    Node* proj = orig->raw_out(i);
    if (proj->is_MachProj()) {
      assert(proj->outcnt() == 0, "only kill projections are expected here");
      assert(_cfg.get_block_for_node(proj) == borig, "incorrect block for kill projections");
      found_projs++;
      // Copy kill projections after the cloned node
      Node* kills = proj->clone();
      kills->set_req(0, copy);
      b->insert_node(kills, idx++);
      _cfg.map_node_to_block(kills, b);
      new_lrg(kills, max_lrg_id++);
    }
  }
  return found_projs;
}

// Renumber the live ranges to compact them.  Makes the IFG smaller.
void PhaseChaitin::compact() {
  // Current the _uf_map contains a series of short chains which are headed
  // by a self-cycle.  All the chains run from big numbers to little numbers.
  // The Find() call chases the chains & shortens them for the next Find call.
  // We are going to change this structure slightly.  Numbers above a moving
  // wave 'i' are unchanged.  Numbers below 'j' point directly to their
  // compacted live range with no further chaining.  There are no chains or
  // cycles below 'i', so the Find call no longer works.
  uint j=1;
  uint i;
  for (i = 1; i < _lrg_map.max_lrg_id(); i++) {
    uint lr = _lrg_map.uf_live_range_id(i);
    // Ignore unallocated live ranges
    if (!lr) {
      continue;
    }
    assert(lr <= i, "");
    _lrg_map.uf_map(i, ( lr == i ) ? j++ : _lrg_map.uf_live_range_id(lr));
  }
  // Now change the Node->LR mapping to reflect the compacted names
  uint unique = _lrg_map.size();
  for (i = 0; i < unique; i++) {
    uint lrg_id = _lrg_map.live_range_id(i);
    _lrg_map.map(i, _lrg_map.uf_live_range_id(lrg_id));
  }

  // Reset the Union-Find mapping
  _lrg_map.reset_uf_map(j);
}

void PhaseChaitin::Register_Allocate() {

  // Above the OLD FP (and in registers) are the incoming arguments.  Stack
  // slots in this area are called "arg_slots".  Above the NEW FP (and in
  // registers) is the outgoing argument area; above that is the spill/temp
  // area.  These are all "frame_slots".  Arg_slots start at the zero
  // stack_slots and count up to the known arg_size.  Frame_slots start at
  // the stack_slot #arg_size and go up.  After allocation I map stack
  // slots to actual offsets.  Stack-slots in the arg_slot area are biased
  // by the frame_size; stack-slots in the frame_slot area are biased by 0.

  _trip_cnt = 0;
  _alternate = 0;
  _matcher._allocation_started = true;

  ResourceArea split_arena(mtCompiler);     // Arena for Split local resources
  ResourceArea live_arena(mtCompiler);      // Arena for liveness & IFG info
  ResourceMark rm(&live_arena);

  // Need live-ness for the IFG; need the IFG for coalescing.  If the
  // liveness is JUST for coalescing, then I can get some mileage by renaming
  // all copy-related live ranges low and then using the max copy-related
  // live range as a cut-off for LIVE and the IFG.  In other words, I can
  // build a subset of LIVE and IFG just for copies.
  PhaseLive live(_cfg, _lrg_map.names(), &live_arena);

  // Need IFG for coalescing and coloring
  PhaseIFG ifg(&live_arena);
  _ifg = &ifg;

  // Come out of SSA world to the Named world.  Assign (virtual) registers to
  // Nodes.  Use the same register for all inputs and the output of PhiNodes
  // - effectively ending SSA form.  This requires either coalescing live
  // ranges or inserting copies.  For the moment, we insert "virtual copies"
  // - we pretend there is a copy prior to each Phi in predecessor blocks.
  // We will attempt to coalesce such "virtual copies" before we manifest
  // them for real.
  de_ssa();

#ifdef ASSERT
  // Veify the graph before RA.
  verify(&live_arena);
#endif

  {
    NOT_PRODUCT( Compile::TracePhase t3("computeLive", &_t_computeLive, TimeCompiler); )
    _live = NULL;                 // Mark live as being not available
    rm.reset_to_mark();           // Reclaim working storage
    IndexSet::reset_memory(C, &live_arena);
    ifg.init(_lrg_map.max_lrg_id()); // Empty IFG
    gather_lrg_masks( false );    // Collect LRG masks
    live.compute(_lrg_map.max_lrg_id()); // Compute liveness
    _live = &live;                // Mark LIVE as being available
  }

  // Base pointers are currently "used" by instructions which define new
  // derived pointers.  This makes base pointers live up to the where the
  // derived pointer is made, but not beyond.  Really, they need to be live
  // across any GC point where the derived value is live.  So this code looks
  // at all the GC points, and "stretches" the live range of any base pointer
  // to the GC point.
  if (stretch_base_pointer_live_ranges(&live_arena)) {
    NOT_PRODUCT(Compile::TracePhase t3("computeLive (sbplr)", &_t_computeLive, TimeCompiler);)
    // Since some live range stretched, I need to recompute live
    _live = NULL;
    rm.reset_to_mark();         // Reclaim working storage
    IndexSet::reset_memory(C, &live_arena);
    ifg.init(_lrg_map.max_lrg_id());
    gather_lrg_masks(false);
    live.compute(_lrg_map.max_lrg_id());
    _live = &live;
  }
  // Create the interference graph using virtual copies
  build_ifg_virtual();  // Include stack slots this time

  // Aggressive (but pessimistic) copy coalescing.
  // This pass works on virtual copies.  Any virtual copies which are not
  // coalesced get manifested as actual copies
  {
    // The IFG is/was triangular.  I am 'squaring it up' so Union can run
    // faster.  Union requires a 'for all' operation which is slow on the
    // triangular adjacency matrix (quick reminder: the IFG is 'sparse' -
    // meaning I can visit all the Nodes neighbors less than a Node in time
    // O(# of neighbors), but I have to visit all the Nodes greater than a
    // given Node and search them for an instance, i.e., time O(#MaxLRG)).
    _ifg->SquareUp();

    PhaseAggressiveCoalesce coalesce(*this);
    coalesce.coalesce_driver();
    // Insert un-coalesced copies.  Visit all Phis.  Where inputs to a Phi do
    // not match the Phi itself, insert a copy.
    coalesce.insert_copies(_matcher);
    if (C->failing()) {
      return;
    }
  }

  // After aggressive coalesce, attempt a first cut at coloring.
  // To color, we need the IFG and for that we need LIVE.
  {
    NOT_PRODUCT( Compile::TracePhase t3("computeLive", &_t_computeLive, TimeCompiler); )
    _live = NULL;
    rm.reset_to_mark();           // Reclaim working storage
    IndexSet::reset_memory(C, &live_arena);
    ifg.init(_lrg_map.max_lrg_id());
    gather_lrg_masks( true );
    live.compute(_lrg_map.max_lrg_id());
    _live = &live;
  }

  // Build physical interference graph
  uint must_spill = 0;
  must_spill = build_ifg_physical(&live_arena);
  // If we have a guaranteed spill, might as well spill now
  if (must_spill) {
    if(!_lrg_map.max_lrg_id()) {
      return;
    }
    // Bail out if unique gets too large (ie - unique > MaxNodeLimit)
    C->check_node_count(10*must_spill, "out of nodes before split");
    if (C->failing()) {
      return;
    }

    uint new_max_lrg_id = Split(_lrg_map.max_lrg_id(), &split_arena);  // Split spilling LRG everywhere
    _lrg_map.set_max_lrg_id(new_max_lrg_id);
    // Bail out if unique gets too large (ie - unique > MaxNodeLimit - 2*NodeLimitFudgeFactor)
    // or we failed to split
    C->check_node_count(2*NodeLimitFudgeFactor, "out of nodes after physical split");
    if (C->failing()) {
      return;
    }

    NOT_PRODUCT(C->verify_graph_edges();)

    compact();                  // Compact LRGs; return new lower max lrg

    {
      NOT_PRODUCT( Compile::TracePhase t3("computeLive", &_t_computeLive, TimeCompiler); )
      _live = NULL;
      rm.reset_to_mark();         // Reclaim working storage
      IndexSet::reset_memory(C, &live_arena);
      ifg.init(_lrg_map.max_lrg_id()); // Build a new interference graph
      gather_lrg_masks( true );   // Collect intersect mask
      live.compute(_lrg_map.max_lrg_id()); // Compute LIVE
      _live = &live;
    }
    build_ifg_physical(&live_arena);
    _ifg->SquareUp();
    _ifg->Compute_Effective_Degree();
    // Only do conservative coalescing if requested
    if (OptoCoalesce) {
      // Conservative (and pessimistic) copy coalescing of those spills
      PhaseConservativeCoalesce coalesce(*this);
      // If max live ranges greater than cutoff, don't color the stack.
      // This cutoff can be larger than below since it is only done once.
      coalesce.coalesce_driver();
    }
    _lrg_map.compress_uf_map_for_nodes();

#ifdef ASSERT
    verify(&live_arena, true);
#endif
  } else {
    ifg.SquareUp();
    ifg.Compute_Effective_Degree();
#ifdef ASSERT
    set_was_low();
#endif
  }

  // Prepare for Simplify & Select
  cache_lrg_info();           // Count degree of LRGs

  // Simplify the InterFerence Graph by removing LRGs of low degree.
  // LRGs of low degree are trivially colorable.
  Simplify();

  // Select colors by re-inserting LRGs back into the IFG in reverse order.
  // Return whether or not something spills.
  uint spills = Select( );

  // If we spill, split and recycle the entire thing
  while( spills ) {
    if( _trip_cnt++ > 24 ) {
      DEBUG_ONLY( dump_for_spill_split_recycle(); )
      if( _trip_cnt > 27 ) {
        C->record_method_not_compilable("failed spill-split-recycle sanity check");
        return;
      }
    }

    if (!_lrg_map.max_lrg_id()) {
      return;
    }
    uint new_max_lrg_id = Split(_lrg_map.max_lrg_id(), &split_arena);  // Split spilling LRG everywhere
    _lrg_map.set_max_lrg_id(new_max_lrg_id);
    // Bail out if unique gets too large (ie - unique > MaxNodeLimit - 2*NodeLimitFudgeFactor)
    C->check_node_count(2 * NodeLimitFudgeFactor, "out of nodes after split");
    if (C->failing()) {
      return;
    }

    compact(); // Compact LRGs; return new lower max lrg

    // Nuke the live-ness and interference graph and LiveRanGe info
    {
      NOT_PRODUCT( Compile::TracePhase t3("computeLive", &_t_computeLive, TimeCompiler); )
      _live = NULL;
      rm.reset_to_mark();         // Reclaim working storage
      IndexSet::reset_memory(C, &live_arena);
      ifg.init(_lrg_map.max_lrg_id());

      // Create LiveRanGe array.
      // Intersect register masks for all USEs and DEFs
      gather_lrg_masks(true);
      live.compute(_lrg_map.max_lrg_id());
      _live = &live;
    }
    must_spill = build_ifg_physical(&live_arena);
    _ifg->SquareUp();
    _ifg->Compute_Effective_Degree();

    // Only do conservative coalescing if requested
    if (OptoCoalesce) {
      // Conservative (and pessimistic) copy coalescing
      PhaseConservativeCoalesce coalesce(*this);
      // Check for few live ranges determines how aggressive coalesce is.
      coalesce.coalesce_driver();
    }
    _lrg_map.compress_uf_map_for_nodes();
#ifdef ASSERT
    verify(&live_arena, true);
#endif
    cache_lrg_info();           // Count degree of LRGs

    // Simplify the InterFerence Graph by removing LRGs of low degree.
    // LRGs of low degree are trivially colorable.
    Simplify();

    // Select colors by re-inserting LRGs back into the IFG in reverse order.
    // Return whether or not something spills.
    spills = Select();
  }

  // Count number of Simplify-Select trips per coloring success.
  _allocator_attempts += _trip_cnt + 1;
  _allocator_successes += 1;

  // Peephole remove copies
  post_allocate_copy_removal();

  // Merge multidefs if multiple defs representing the same value are used in a single block.
  merge_multidefs();

#ifdef ASSERT
  // Veify the graph after RA.
  verify(&live_arena);
#endif

  // max_reg is past the largest *register* used.
  // Convert that to a frame_slot number.
  if (_max_reg <= _matcher._new_SP) {
    _framesize = C->out_preserve_stack_slots();
  }
  else {
    _framesize = _max_reg -_matcher._new_SP;
  }
  assert((int)(_matcher._new_SP+_framesize) >= (int)_matcher._out_arg_limit, "framesize must be large enough");

  // This frame must preserve the required fp alignment
  _framesize = round_to(_framesize, Matcher::stack_alignment_in_slots());
  assert( _framesize >= 0 && _framesize <= 1000000, "sanity check" );
#ifndef PRODUCT
  _total_framesize += _framesize;
  if ((int)_framesize > _max_framesize) {
    _max_framesize = _framesize;
  }
#endif

  // Convert CISC spills
  fixup_spills();

  // Log regalloc results
  CompileLog* log = Compile::current()->log();
  if (log != NULL) {
    log->elem("regalloc attempts='%d' success='%d'", _trip_cnt, !C->failing());
  }

  if (C->failing()) {
    return;
  }

  NOT_PRODUCT(C->verify_graph_edges();)

  // Move important info out of the live_arena to longer lasting storage.
  alloc_node_regs(_lrg_map.size());
  for (uint i=0; i < _lrg_map.size(); i++) {
    if (_lrg_map.live_range_id(i)) { // Live range associated with Node?
      LRG &lrg = lrgs(_lrg_map.live_range_id(i));
      if (!lrg.alive()) {
        set_bad(i);
      } else if (lrg.num_regs() == 1) {
        set1(i, lrg.reg());
      } else {                  // Must be a register-set
        if (!lrg._fat_proj) {   // Must be aligned adjacent register set
          // Live ranges record the highest register in their mask.
          // We want the low register for the AD file writer's convenience.
          OptoReg::Name hi = lrg.reg(); // Get hi register
          OptoReg::Name lo = OptoReg::add(hi, (1-lrg.num_regs())); // Find lo
          // We have to use pair [lo,lo+1] even for wide vectors because
          // the rest of code generation works only with pairs. It is safe
          // since for registers encoding only 'lo' is used.
          // Second reg from pair is used in ScheduleAndBundle on SPARC where
          // vector max size is 8 which corresponds to registers pair.
          // It is also used in BuildOopMaps but oop operations are not
          // vectorized.
          set2(i, lo);
        } else {                // Misaligned; extract 2 bits
          OptoReg::Name hi = lrg.reg(); // Get hi register
          lrg.Remove(hi);       // Yank from mask
          int lo = lrg.mask().find_first_elem(); // Find lo
          set_pair(i, hi, lo);
        }
      }
      if( lrg._is_oop ) _node_oops.set(i);
    } else {
      set_bad(i);
    }
  }

  // Done!
  _live = NULL;
  _ifg = NULL;
  C->set_indexSet_arena(NULL);  // ResourceArea is at end of scope
}

void PhaseChaitin::de_ssa() {
  // Set initial Names for all Nodes.  Most Nodes get the virtual register
  // number.  A few get the ZERO live range number.  These do not
  // get allocated, but instead rely on correct scheduling to ensure that
  // only one instance is simultaneously live at a time.
  uint lr_counter = 1;
  for( uint i = 0; i < _cfg.number_of_blocks(); i++ ) {
    Block* block = _cfg.get_block(i);
    uint cnt = block->number_of_nodes();

    // Handle all the normal Nodes in the block
    for( uint j = 0; j < cnt; j++ ) {
      Node *n = block->get_node(j);
      // Pre-color to the zero live range, or pick virtual register
      const RegMask &rm = n->out_RegMask();
      _lrg_map.map(n->_idx, rm.is_NotEmpty() ? lr_counter++ : 0);
    }
  }

  // Reset the Union-Find mapping to be identity
  _lrg_map.reset_uf_map(lr_counter);
}


// Gather LiveRanGe information, including register masks.  Modification of
// cisc spillable in_RegMasks should not be done before AggressiveCoalesce.
void PhaseChaitin::gather_lrg_masks( bool after_aggressive ) {

  // Nail down the frame pointer live range
  uint fp_lrg = _lrg_map.live_range_id(_cfg.get_root_node()->in(1)->in(TypeFunc::FramePtr));
  lrgs(fp_lrg)._cost += 1e12;   // Cost is infinite

  // For all blocks
  for (uint i = 0; i < _cfg.number_of_blocks(); i++) {
    Block* block = _cfg.get_block(i);

    // For all instructions
    for (uint j = 1; j < block->number_of_nodes(); j++) {
      Node* n = block->get_node(j);
      uint input_edge_start =1; // Skip control most nodes
      if (n->is_Mach()) {
        input_edge_start = n->as_Mach()->oper_input_base();
      }
      uint idx = n->is_Copy();

      // Get virtual register number, same as LiveRanGe index
      uint vreg = _lrg_map.live_range_id(n);
      LRG& lrg = lrgs(vreg);
      if (vreg) {              // No vreg means un-allocable (e.g. memory)

        // Collect has-copy bit
        if (idx) {
          lrg._has_copy = 1;
          uint clidx = _lrg_map.live_range_id(n->in(idx));
          LRG& copy_src = lrgs(clidx);
          copy_src._has_copy = 1;
        }

        // Check for float-vs-int live range (used in register-pressure
        // calculations)
        const Type *n_type = n->bottom_type();
        if (n_type->is_floatingpoint()) {
          lrg._is_float = 1;
        }

        // Check for twice prior spilling.  Once prior spilling might have
        // spilled 'soft', 2nd prior spill should have spilled 'hard' and
        // further spilling is unlikely to make progress.
        if (_spilled_once.test(n->_idx)) {
          lrg._was_spilled1 = 1;
          if (_spilled_twice.test(n->_idx)) {
            lrg._was_spilled2 = 1;
          }
        }

#ifndef PRODUCT
        if (trace_spilling() && lrg._def != NULL) {
          // collect defs for MultiDef printing
          if (lrg._defs == NULL) {
            lrg._defs = new (_ifg->_arena) GrowableArray<Node*>(_ifg->_arena, 2, 0, NULL);
            lrg._defs->append(lrg._def);
          }
          lrg._defs->append(n);
        }
#endif

        // Check for a single def LRG; these can spill nicely
        // via rematerialization.  Flag as NULL for no def found
        // yet, or 'n' for single def or -1 for many defs.
        lrg._def = lrg._def ? NodeSentinel : n;

        // Limit result register mask to acceptable registers
        const RegMask &rm = n->out_RegMask();
        lrg.AND( rm );

        uint ireg = n->ideal_reg();
        assert( !n->bottom_type()->isa_oop_ptr() || ireg == Op_RegP,
                "oops must be in Op_RegP's" );

        // Check for vector live range (only if vector register is used).
        // On SPARC vector uses RegD which could be misaligned so it is not
        // processes as vector in RA.
        if (RegMask::is_vector(ireg))
          lrg._is_vector = 1;
        assert(n_type->isa_vect() == NULL || lrg._is_vector || ireg == Op_RegD || ireg == Op_RegL,
               "vector must be in vector registers");

        // Check for bound register masks
        const RegMask &lrgmask = lrg.mask();
        if (lrgmask.is_bound(ireg)) {
          lrg._is_bound = 1;
        }

        // Check for maximum frequency value
        if (lrg._maxfreq < block->_freq) {
          lrg._maxfreq = block->_freq;
        }

        // Check for oop-iness, or long/double
        // Check for multi-kill projection
        switch (ireg) {
        case MachProjNode::fat_proj:
          // Fat projections have size equal to number of registers killed
          lrg.set_num_regs(rm.Size());
          lrg.set_reg_pressure(lrg.num_regs());
          lrg._fat_proj = 1;
          lrg._is_bound = 1;
          break;
        case Op_RegP:
#ifdef _LP64
          lrg.set_num_regs(2);  // Size is 2 stack words
#else
          lrg.set_num_regs(1);  // Size is 1 stack word
#endif
          // Register pressure is tracked relative to the maximum values
          // suggested for that platform, INTPRESSURE and FLOATPRESSURE,
          // and relative to other types which compete for the same regs.
          //
          // The following table contains suggested values based on the
          // architectures as defined in each .ad file.
          // INTPRESSURE and FLOATPRESSURE may be tuned differently for
          // compile-speed or performance.
          // Note1:
          // SPARC and SPARCV9 reg_pressures are at 2 instead of 1
          // since .ad registers are defined as high and low halves.
          // These reg_pressure values remain compatible with the code
          // in is_high_pressure() which relates get_invalid_mask_size(),
          // Block::_reg_pressure and INTPRESSURE, FLOATPRESSURE.
          // Note2:
          // SPARC -d32 has 24 registers available for integral values,
          // but only 10 of these are safe for 64-bit longs.
          // Using set_reg_pressure(2) for both int and long means
          // the allocator will believe it can fit 26 longs into
          // registers.  Using 2 for longs and 1 for ints means the
          // allocator will attempt to put 52 integers into registers.
          // The settings below limit this problem to methods with
          // many long values which are being run on 32-bit SPARC.
          //
          // ------------------- reg_pressure --------------------
          // Each entry is reg_pressure_per_value,number_of_regs
          //         RegL  RegI  RegFlags   RegF RegD    INTPRESSURE  FLOATPRESSURE
          // IA32     2     1     1          1    1          6           6
          // IA64     1     1     1          1    1         50          41
          // SPARC    2     2     2          2    2         48 (24)     52 (26)
          // SPARCV9  2     2     2          2    2         48 (24)     52 (26)
          // AMD64    1     1     1          1    1         14          15
          // -----------------------------------------------------
#if defined(SPARC)
          lrg.set_reg_pressure(2);  // use for v9 as well
#else
          lrg.set_reg_pressure(1);  // normally one value per register
#endif
          if( n_type->isa_oop_ptr() ) {
            lrg._is_oop = 1;
          }
          break;
        case Op_RegL:           // Check for long or double
        case Op_RegD:
          lrg.set_num_regs(2);
          // Define platform specific register pressure
#if defined(SPARC) || defined(ARM32)
          lrg.set_reg_pressure(2);
#elif defined(IA32)
          if( ireg == Op_RegL ) {
            lrg.set_reg_pressure(2);
          } else {
            lrg.set_reg_pressure(1);
          }
#else
          lrg.set_reg_pressure(1);  // normally one value per register
#endif
          // If this def of a double forces a mis-aligned double,
          // flag as '_fat_proj' - really flag as allowing misalignment
          // AND changes how we count interferences.  A mis-aligned
          // double can interfere with TWO aligned pairs, or effectively
          // FOUR registers!
          if (rm.is_misaligned_pair()) {
            lrg._fat_proj = 1;
            lrg._is_bound = 1;
          }
          break;
        case Op_RegF:
        case Op_RegI:
        case Op_RegN:
        case Op_RegFlags:
        case 0:                 // not an ideal register
          lrg.set_num_regs(1);
#ifdef SPARC
          lrg.set_reg_pressure(2);
#else
          lrg.set_reg_pressure(1);
#endif
          break;
        case Op_VecS:
          assert(Matcher::vector_size_supported(T_BYTE,4), "sanity");
          assert(RegMask::num_registers(Op_VecS) == RegMask::SlotsPerVecS, "sanity");
          lrg.set_num_regs(RegMask::SlotsPerVecS);
          lrg.set_reg_pressure(1);
          break;
        case Op_VecD:
          assert(Matcher::vector_size_supported(T_FLOAT,RegMask::SlotsPerVecD), "sanity");
          assert(RegMask::num_registers(Op_VecD) == RegMask::SlotsPerVecD, "sanity");
          assert(lrgmask.is_aligned_sets(RegMask::SlotsPerVecD), "vector should be aligned");
          lrg.set_num_regs(RegMask::SlotsPerVecD);
          lrg.set_reg_pressure(1);
          break;
        case Op_VecX:
          assert(Matcher::vector_size_supported(T_FLOAT,RegMask::SlotsPerVecX), "sanity");
          assert(RegMask::num_registers(Op_VecX) == RegMask::SlotsPerVecX, "sanity");
          assert(lrgmask.is_aligned_sets(RegMask::SlotsPerVecX), "vector should be aligned");
          lrg.set_num_regs(RegMask::SlotsPerVecX);
          lrg.set_reg_pressure(1);
          break;
        case Op_VecY:
          assert(Matcher::vector_size_supported(T_FLOAT,RegMask::SlotsPerVecY), "sanity");
          assert(RegMask::num_registers(Op_VecY) == RegMask::SlotsPerVecY, "sanity");
          assert(lrgmask.is_aligned_sets(RegMask::SlotsPerVecY), "vector should be aligned");
          lrg.set_num_regs(RegMask::SlotsPerVecY);
          lrg.set_reg_pressure(1);
          break;
        default:
          ShouldNotReachHere();
        }
      }

      // Now do the same for inputs
      uint cnt = n->req();
      // Setup for CISC SPILLING
      uint inp = (uint)AdlcVMDeps::Not_cisc_spillable;
      if( UseCISCSpill && after_aggressive ) {
        inp = n->cisc_operand();
        if( inp != (uint)AdlcVMDeps::Not_cisc_spillable )
          // Convert operand number to edge index number
          inp = n->as_Mach()->operand_index(inp);
      }
      // Prepare register mask for each input
      for( uint k = input_edge_start; k < cnt; k++ ) {
        uint vreg = _lrg_map.live_range_id(n->in(k));
        if (!vreg) {
          continue;
        }

        // If this instruction is CISC Spillable, add the flags
        // bit to its appropriate input
        if( UseCISCSpill && after_aggressive && inp == k ) {
#ifndef PRODUCT
          if( TraceCISCSpill ) {
            tty->print("  use_cisc_RegMask: ");
            n->dump();
          }
#endif
          n->as_Mach()->use_cisc_RegMask();
        }

        LRG &lrg = lrgs(vreg);
        // // Testing for floating point code shape
        // Node *test = n->in(k);
        // if( test->is_Mach() ) {
        //   MachNode *m = test->as_Mach();
        //   int  op = m->ideal_Opcode();
        //   if (n->is_Call() && (op == Op_AddF || op == Op_MulF) ) {
        //     int zzz = 1;
        //   }
        // }

        // Limit result register mask to acceptable registers.
        // Do not limit registers from uncommon uses before
        // AggressiveCoalesce.  This effectively pre-virtual-splits
        // around uncommon uses of common defs.
        const RegMask &rm = n->in_RegMask(k);
        if (!after_aggressive && _cfg.get_block_for_node(n->in(k))->_freq > 1000 * block->_freq) {
          // Since we are BEFORE aggressive coalesce, leave the register
          // mask untrimmed by the call.  This encourages more coalescing.
          // Later, AFTER aggressive, this live range will have to spill
          // but the spiller handles slow-path calls very nicely.
        } else {
          lrg.AND( rm );
        }

        // Check for bound register masks
        const RegMask &lrgmask = lrg.mask();
        uint kreg = n->in(k)->ideal_reg();
        bool is_vect = RegMask::is_vector(kreg);
        assert(n->in(k)->bottom_type()->isa_vect() == NULL ||
               is_vect || kreg == Op_RegD || kreg == Op_RegL,
               "vector must be in vector registers");
        if (lrgmask.is_bound(kreg))
          lrg._is_bound = 1;

        // If this use of a double forces a mis-aligned double,
        // flag as '_fat_proj' - really flag as allowing misalignment
        // AND changes how we count interferences.  A mis-aligned
        // double can interfere with TWO aligned pairs, or effectively
        // FOUR registers!
#ifdef ASSERT
        if (is_vect) {
          assert(lrgmask.is_aligned_sets(lrg.num_regs()), "vector should be aligned");
          assert(!lrg._fat_proj, "sanity");
          assert(RegMask::num_registers(kreg) == lrg.num_regs(), "sanity");
        }
#endif
        if (!is_vect && lrg.num_regs() == 2 && !lrg._fat_proj && rm.is_misaligned_pair()) {
          lrg._fat_proj = 1;
          lrg._is_bound = 1;
        }
        // if the LRG is an unaligned pair, we will have to spill
        // so clear the LRG's register mask if it is not already spilled
        if (!is_vect && !n->is_SpillCopy() &&
            (lrg._def == NULL || lrg.is_multidef() || !lrg._def->is_SpillCopy()) &&
            lrgmask.is_misaligned_pair()) {
          lrg.Clear();
        }

        // Check for maximum frequency value
        if (lrg._maxfreq < block->_freq) {
          lrg._maxfreq = block->_freq;
        }

      } // End for all allocated inputs
    } // end for all instructions
  } // end for all blocks

  // Final per-liverange setup
  for (uint i2 = 0; i2 < _lrg_map.max_lrg_id(); i2++) {
    LRG &lrg = lrgs(i2);
    assert(!lrg._is_vector || !lrg._fat_proj, "sanity");
    if (lrg.num_regs() > 1 && !lrg._fat_proj) {
      lrg.clear_to_sets();
    }
    lrg.compute_set_mask_size();
    if (lrg.not_free()) {      // Handle case where we lose from the start
      lrg.set_reg(OptoReg::Name(LRG::SPILL_REG));
      lrg._direct_conflict = 1;
    }
    lrg.set_degree(0);          // no neighbors in IFG yet
  }
}

// Set the was-lo-degree bit.  Conservative coalescing should not change the
// colorability of the graph.  If any live range was of low-degree before
// coalescing, it should Simplify.  This call sets the was-lo-degree bit.
// The bit is checked in Simplify.
void PhaseChaitin::set_was_low() {
#ifdef ASSERT
  for (uint i = 1; i < _lrg_map.max_lrg_id(); i++) {
    int size = lrgs(i).num_regs();
    uint old_was_lo = lrgs(i)._was_lo;
    lrgs(i)._was_lo = 0;
    if( lrgs(i).lo_degree() ) {
      lrgs(i)._was_lo = 1;      // Trivially of low degree
    } else {                    // Else check the Brigg's assertion
      // Brigg's observation is that the lo-degree neighbors of a
      // hi-degree live range will not interfere with the color choices
      // of said hi-degree live range.  The Simplify reverse-stack-coloring
      // order takes care of the details.  Hence you do not have to count
      // low-degree neighbors when determining if this guy colors.
      int briggs_degree = 0;
      IndexSet *s = _ifg->neighbors(i);
      IndexSetIterator elements(s);
      uint lidx;
      while((lidx = elements.next()) != 0) {
        if( !lrgs(lidx).lo_degree() )
          briggs_degree += MAX2(size,lrgs(lidx).num_regs());
      }
      if( briggs_degree < lrgs(i).degrees_of_freedom() )
        lrgs(i)._was_lo = 1;    // Low degree via the briggs assertion
    }
    assert(old_was_lo <= lrgs(i)._was_lo, "_was_lo may not decrease");
  }
#endif
}

#define REGISTER_CONSTRAINED 16

// Compute cost/area ratio, in case we spill.  Build the lo-degree list.
void PhaseChaitin::cache_lrg_info( ) {

  for (uint i = 1; i < _lrg_map.max_lrg_id(); i++) {
    LRG &lrg = lrgs(i);

    // Check for being of low degree: means we can be trivially colored.
    // Low degree, dead or must-spill guys just get to simplify right away
    if( lrg.lo_degree() ||
       !lrg.alive() ||
        lrg._must_spill ) {
      // Split low degree list into those guys that must get a
      // register and those that can go to register or stack.
      // The idea is LRGs that can go register or stack color first when
      // they have a good chance of getting a register.  The register-only
      // lo-degree live ranges always get a register.
      OptoReg::Name hi_reg = lrg.mask().find_last_elem();
      if( OptoReg::is_stack(hi_reg)) { // Can go to stack?
        lrg._next = _lo_stk_degree;
        _lo_stk_degree = i;
      } else {
        lrg._next = _lo_degree;
        _lo_degree = i;
      }
    } else {                    // Else high degree
      lrgs(_hi_degree)._prev = i;
      lrg._next = _hi_degree;
      lrg._prev = 0;
      _hi_degree = i;
    }
  }
}

// Simplify the IFG by removing LRGs of low degree that have NO copies
void PhaseChaitin::Pre_Simplify( ) {

  // Warm up the lo-degree no-copy list
  int lo_no_copy = 0;
  for (uint i = 1; i < _lrg_map.max_lrg_id(); i++) {
    if ((lrgs(i).lo_degree() && !lrgs(i)._has_copy) ||
        !lrgs(i).alive() ||
        lrgs(i)._must_spill) {
      lrgs(i)._next = lo_no_copy;
      lo_no_copy = i;
    }
  }

  while( lo_no_copy ) {
    uint lo = lo_no_copy;
    lo_no_copy = lrgs(lo)._next;
    int size = lrgs(lo).num_regs();

    // Put the simplified guy on the simplified list.
    lrgs(lo)._next = _simplified;
    _simplified = lo;

    // Yank this guy from the IFG.
    IndexSet *adj = _ifg->remove_node( lo );

    // If any neighbors' degrees fall below their number of
    // allowed registers, then put that neighbor on the low degree
    // list.  Note that 'degree' can only fall and 'numregs' is
    // unchanged by this action.  Thus the two are equal at most once,
    // so LRGs hit the lo-degree worklists at most once.
    IndexSetIterator elements(adj);
    uint neighbor;
    while ((neighbor = elements.next()) != 0) {
      LRG *n = &lrgs(neighbor);
      assert( _ifg->effective_degree(neighbor) == n->degree(), "" );

      // Check for just becoming of-low-degree
      if( n->just_lo_degree() && !n->_has_copy ) {
        assert(!(*_ifg->_yanked)[neighbor],"Cannot move to lo degree twice");
        // Put on lo-degree list
        n->_next = lo_no_copy;
        lo_no_copy = neighbor;
      }
    }
  } // End of while lo-degree no_copy worklist not empty

  // No more lo-degree no-copy live ranges to simplify
}

// Simplify the IFG by removing LRGs of low degree.
void PhaseChaitin::Simplify( ) {

  while( 1 ) {                  // Repeat till simplified it all
    // May want to explore simplifying lo_degree before _lo_stk_degree.
    // This might result in more spills coloring into registers during
    // Select().
    while( _lo_degree || _lo_stk_degree ) {
      // If possible, pull from lo_stk first
      uint lo;
      if( _lo_degree ) {
        lo = _lo_degree;
        _lo_degree = lrgs(lo)._next;
      } else {
        lo = _lo_stk_degree;
        _lo_stk_degree = lrgs(lo)._next;
      }

      // Put the simplified guy on the simplified list.
      lrgs(lo)._next = _simplified;
      _simplified = lo;
      // If this guy is "at risk" then mark his current neighbors
      if( lrgs(lo)._at_risk ) {
        IndexSetIterator elements(_ifg->neighbors(lo));
        uint datum;
        while ((datum = elements.next()) != 0) {
          lrgs(datum)._risk_bias = lo;
        }
      }

      // Yank this guy from the IFG.
      IndexSet *adj = _ifg->remove_node( lo );

      // If any neighbors' degrees fall below their number of
      // allowed registers, then put that neighbor on the low degree
      // list.  Note that 'degree' can only fall and 'numregs' is
      // unchanged by this action.  Thus the two are equal at most once,
      // so LRGs hit the lo-degree worklist at most once.
      IndexSetIterator elements(adj);
      uint neighbor;
      while ((neighbor = elements.next()) != 0) {
        LRG *n = &lrgs(neighbor);
#ifdef ASSERT
        if( VerifyOpto || VerifyRegisterAllocator ) {
          assert( _ifg->effective_degree(neighbor) == n->degree(), "" );
        }
#endif

        // Check for just becoming of-low-degree just counting registers.
        // _must_spill live ranges are already on the low degree list.
        if( n->just_lo_degree() && !n->_must_spill ) {
          assert(!(*_ifg->_yanked)[neighbor],"Cannot move to lo degree twice");
          // Pull from hi-degree list
          uint prev = n->_prev;
          uint next = n->_next;
          if( prev ) lrgs(prev)._next = next;
          else _hi_degree = next;
          lrgs(next)._prev = prev;
          n->_next = _lo_degree;
          _lo_degree = neighbor;
        }
      }
    } // End of while lo-degree/lo_stk_degree worklist not empty

    // Check for got everything: is hi-degree list empty?
    if( !_hi_degree ) break;

    // Time to pick a potential spill guy
    uint lo_score = _hi_degree;
    double score = lrgs(lo_score).score();
    double area = lrgs(lo_score)._area;
    double cost = lrgs(lo_score)._cost;
    bool bound = lrgs(lo_score)._is_bound;

    // Find cheapest guy
    debug_only( int lo_no_simplify=0; );
    for( uint i = _hi_degree; i; i = lrgs(i)._next ) {
      assert( !(*_ifg->_yanked)[i], "" );
      // It's just vaguely possible to move hi-degree to lo-degree without
      // going through a just-lo-degree stage: If you remove a double from
      // a float live range it's degree will drop by 2 and you can skip the
      // just-lo-degree stage.  It's very rare (shows up after 5000+ methods
      // in -Xcomp of Java2Demo).  So just choose this guy to simplify next.
      if( lrgs(i).lo_degree() ) {
        lo_score = i;
        break;
      }
      debug_only( if( lrgs(i)._was_lo ) lo_no_simplify=i; );
      double iscore = lrgs(i).score();
      double iarea = lrgs(i)._area;
      double icost = lrgs(i)._cost;
      bool ibound = lrgs(i)._is_bound;

      // Compare cost/area of i vs cost/area of lo_score.  Smaller cost/area
      // wins.  Ties happen because all live ranges in question have spilled
      // a few times before and the spill-score adds a huge number which
      // washes out the low order bits.  We are choosing the lesser of 2
      // evils; in this case pick largest area to spill.
      // Ties also happen when live ranges are defined and used only inside
      // one block. In which case their area is 0 and score set to max.
      // In such case choose bound live range over unbound to free registers
      // or with smaller cost to spill.
      if( iscore < score ||
          (iscore == score && iarea > area && lrgs(lo_score)._was_spilled2) ||
          (iscore == score && iarea == area &&
           ( (ibound && !bound) || ibound == bound && (icost < cost) )) ) {
        lo_score = i;
        score = iscore;
        area = iarea;
        cost = icost;
        bound = ibound;
      }
    }
    LRG *lo_lrg = &lrgs(lo_score);
    // The live range we choose for spilling is either hi-degree, or very
    // rarely it can be low-degree.  If we choose a hi-degree live range
    // there better not be any lo-degree choices.
    assert( lo_lrg->lo_degree() || !lo_no_simplify, "Live range was lo-degree before coalesce; should simplify" );

    // Pull from hi-degree list
    uint prev = lo_lrg->_prev;
    uint next = lo_lrg->_next;
    if( prev ) lrgs(prev)._next = next;
    else _hi_degree = next;
    lrgs(next)._prev = prev;
    // Jam him on the lo-degree list, despite his high degree.
    // Maybe he'll get a color, and maybe he'll spill.
    // Only Select() will know.
    lrgs(lo_score)._at_risk = true;
    _lo_degree = lo_score;
    lo_lrg->_next = 0;

  } // End of while not simplified everything

}

// Is 'reg' register legal for 'lrg'?
static bool is_legal_reg(LRG &lrg, OptoReg::Name reg, int chunk) {
  if (reg >= chunk && reg < (chunk + RegMask::CHUNK_SIZE) &&
      lrg.mask().Member(OptoReg::add(reg,-chunk))) {
    // RA uses OptoReg which represent the highest element of a registers set.
    // For example, vectorX (128bit) on x86 uses [XMM,XMMb,XMMc,XMMd] set
    // in which XMMd is used by RA to represent such vectors. A double value
    // uses [XMM,XMMb] pairs and XMMb is used by RA for it.
    // The register mask uses largest bits set of overlapping register sets.
    // On x86 with AVX it uses 8 bits for each XMM registers set.
    //
    // The 'lrg' already has cleared-to-set register mask (done in Select()
    // before calling choose_color()). Passing mask.Member(reg) check above
    // indicates that the size (num_regs) of 'reg' set is less or equal to
    // 'lrg' set size.
    // For set size 1 any register which is member of 'lrg' mask is legal.
    if (lrg.num_regs()==1)
      return true;
    // For larger sets only an aligned register with the same set size is legal.
    int mask = lrg.num_regs()-1;
    if ((reg&mask) == mask)
      return true;
  }
  return false;
}

// Choose a color using the biasing heuristic
OptoReg::Name PhaseChaitin::bias_color( LRG &lrg, int chunk ) {

  // Check for "at_risk" LRG's
  uint risk_lrg = _lrg_map.find(lrg._risk_bias);
  if( risk_lrg != 0 ) {
    // Walk the colored neighbors of the "at_risk" candidate
    // Choose a color which is both legal and already taken by a neighbor
    // of the "at_risk" candidate in order to improve the chances of the
    // "at_risk" candidate of coloring
    IndexSetIterator elements(_ifg->neighbors(risk_lrg));
    uint datum;
    while ((datum = elements.next()) != 0) {
      OptoReg::Name reg = lrgs(datum).reg();
      // If this LRG's register is legal for us, choose it
      if (is_legal_reg(lrg, reg, chunk))
        return reg;
    }
  }

  uint copy_lrg = _lrg_map.find(lrg._copy_bias);
  if( copy_lrg != 0 ) {
    // If he has a color,
    if( !(*(_ifg->_yanked))[copy_lrg] ) {
      OptoReg::Name reg = lrgs(copy_lrg).reg();
      //  And it is legal for you,
      if (is_legal_reg(lrg, reg, chunk))
        return reg;
    } else if( chunk == 0 ) {
      // Choose a color which is legal for him
      RegMask tempmask = lrg.mask();
      tempmask.AND(lrgs(copy_lrg).mask());
      tempmask.clear_to_sets(lrg.num_regs());
      OptoReg::Name reg = tempmask.find_first_set(lrg.num_regs());
      if (OptoReg::is_valid(reg))
        return reg;
    }
  }

  // If no bias info exists, just go with the register selection ordering
  if (lrg._is_vector || lrg.num_regs() == 2) {
    // Find an aligned set
    return OptoReg::add(lrg.mask().find_first_set(lrg.num_regs()),chunk);
  }

  // CNC - Fun hack.  Alternate 1st and 2nd selection.  Enables post-allocate
  // copy removal to remove many more copies, by preventing a just-assigned
  // register from being repeatedly assigned.
  OptoReg::Name reg = lrg.mask().find_first_elem();
  if( (++_alternate & 1) && OptoReg::is_valid(reg) ) {
    // This 'Remove; find; Insert' idiom is an expensive way to find the
    // SECOND element in the mask.
    lrg.Remove(reg);
    OptoReg::Name reg2 = lrg.mask().find_first_elem();
    lrg.Insert(reg);
    if( OptoReg::is_reg(reg2))
      reg = reg2;
  }
  return OptoReg::add( reg, chunk );
}

// Choose a color in the current chunk
OptoReg::Name PhaseChaitin::choose_color( LRG &lrg, int chunk ) {
  assert( C->in_preserve_stack_slots() == 0 || chunk != 0 || lrg._is_bound || lrg.mask().is_bound1() || !lrg.mask().Member(OptoReg::Name(_matcher._old_SP-1)), "must not allocate stack0 (inside preserve area)");
  assert(C->out_preserve_stack_slots() == 0 || chunk != 0 || lrg._is_bound || lrg.mask().is_bound1() || !lrg.mask().Member(OptoReg::Name(_matcher._old_SP+0)), "must not allocate stack0 (inside preserve area)");

  if( lrg.num_regs() == 1 ||    // Common Case
      !lrg._fat_proj )          // Aligned+adjacent pairs ok
    // Use a heuristic to "bias" the color choice
    return bias_color(lrg, chunk);

  assert(!lrg._is_vector, "should be not vector here" );
  assert( lrg.num_regs() >= 2, "dead live ranges do not color" );

  // Fat-proj case or misaligned double argument.
  assert(lrg.compute_mask_size() == lrg.num_regs() ||
         lrg.num_regs() == 2,"fat projs exactly color" );
  assert( !chunk, "always color in 1st chunk" );
  // Return the highest element in the set.
  return lrg.mask().find_last_elem();
}

// Select colors by re-inserting LRGs back into the IFG.  LRGs are re-inserted
// in reverse order of removal.  As long as nothing of hi-degree was yanked,
// everything going back is guaranteed a color.  Select that color.  If some
// hi-degree LRG cannot get a color then we record that we must spill.
uint PhaseChaitin::Select( ) {
  uint spill_reg = LRG::SPILL_REG;
  _max_reg = OptoReg::Name(0);  // Past max register used
  while( _simplified ) {
    // Pull next LRG from the simplified list - in reverse order of removal
    uint lidx = _simplified;
    LRG *lrg = &lrgs(lidx);
    _simplified = lrg->_next;


#ifndef PRODUCT
    if (trace_spilling()) {
      ttyLocker ttyl;
      tty->print_cr("L%d selecting degree %d degrees_of_freedom %d", lidx, lrg->degree(),
                    lrg->degrees_of_freedom());
      lrg->dump();
    }
#endif

    // Re-insert into the IFG
    _ifg->re_insert(lidx);
    if( !lrg->alive() ) continue;
    // capture allstackedness flag before mask is hacked
    const int is_allstack = lrg->mask().is_AllStack();

    // Yeah, yeah, yeah, I know, I know.  I can refactor this
    // to avoid the GOTO, although the refactored code will not
    // be much clearer.  We arrive here IFF we have a stack-based
    // live range that cannot color in the current chunk, and it
    // has to move into the next free stack chunk.
    int chunk = 0;              // Current chunk is first chunk
    retry_next_chunk:

    // Remove neighbor colors
    IndexSet *s = _ifg->neighbors(lidx);

    debug_only(RegMask orig_mask = lrg->mask();)
    IndexSetIterator elements(s);
    uint neighbor;
    while ((neighbor = elements.next()) != 0) {
      // Note that neighbor might be a spill_reg.  In this case, exclusion
      // of its color will be a no-op, since the spill_reg chunk is in outer
      // space.  Also, if neighbor is in a different chunk, this exclusion
      // will be a no-op.  (Later on, if lrg runs out of possible colors in
      // its chunk, a new chunk of color may be tried, in which case
      // examination of neighbors is started again, at retry_next_chunk.)
      LRG &nlrg = lrgs(neighbor);
      OptoReg::Name nreg = nlrg.reg();
      // Only subtract masks in the same chunk
      if( nreg >= chunk && nreg < chunk + RegMask::CHUNK_SIZE ) {
#ifndef PRODUCT
        uint size = lrg->mask().Size();
        RegMask rm = lrg->mask();
#endif
        lrg->SUBTRACT(nlrg.mask());
#ifndef PRODUCT
        if (trace_spilling() && lrg->mask().Size() != size) {
          ttyLocker ttyl;
          tty->print("L%d ", lidx);
          rm.dump();
          tty->print(" intersected L%d ", neighbor);
          nlrg.mask().dump();
          tty->print(" removed ");
          rm.SUBTRACT(lrg->mask());
          rm.dump();
          tty->print(" leaving ");
          lrg->mask().dump();
          tty->cr();
        }
#endif
      }
    }
    //assert(is_allstack == lrg->mask().is_AllStack(), "nbrs must not change AllStackedness");
    // Aligned pairs need aligned masks
    assert(!lrg->_is_vector || !lrg->_fat_proj, "sanity");
    if (lrg->num_regs() > 1 && !lrg->_fat_proj) {
      lrg->clear_to_sets();
    }

    // Check if a color is available and if so pick the color
    OptoReg::Name reg = choose_color( *lrg, chunk );
#ifdef SPARC
    debug_only(lrg->compute_set_mask_size());
    assert(lrg->num_regs() < 2 || lrg->is_bound() || is_even(reg-1), "allocate all doubles aligned");
#endif

    //---------------
    // If we fail to color and the AllStack flag is set, trigger
    // a chunk-rollover event
    if(!OptoReg::is_valid(OptoReg::add(reg,-chunk)) && is_allstack) {
      // Bump register mask up to next stack chunk
      chunk += RegMask::CHUNK_SIZE;
      lrg->Set_All();

      goto retry_next_chunk;
    }

    //---------------
    // Did we get a color?
    else if( OptoReg::is_valid(reg)) {
#ifndef PRODUCT
      RegMask avail_rm = lrg->mask();
#endif

      // Record selected register
      lrg->set_reg(reg);

      if( reg >= _max_reg )     // Compute max register limit
        _max_reg = OptoReg::add(reg,1);
      // Fold reg back into normal space
      reg = OptoReg::add(reg,-chunk);

      // If the live range is not bound, then we actually had some choices
      // to make.  In this case, the mask has more bits in it than the colors
      // chosen.  Restrict the mask to just what was picked.
      int n_regs = lrg->num_regs();
      assert(!lrg->_is_vector || !lrg->_fat_proj, "sanity");
      if (n_regs == 1 || !lrg->_fat_proj) {
        assert(!lrg->_is_vector || n_regs <= RegMask::SlotsPerVecY, "sanity");
        lrg->Clear();           // Clear the mask
        lrg->Insert(reg);       // Set regmask to match selected reg
        // For vectors and pairs, also insert the low bit of the pair
        for (int i = 1; i < n_regs; i++)
          lrg->Insert(OptoReg::add(reg,-i));
        lrg->set_mask_size(n_regs);
      } else {                  // Else fatproj
        // mask must be equal to fatproj bits, by definition
      }
#ifndef PRODUCT
      if (trace_spilling()) {
        ttyLocker ttyl;
        tty->print("L%d selected ", lidx);
        lrg->mask().dump();
        tty->print(" from ");
        avail_rm.dump();
        tty->cr();
      }
#endif
      // Note that reg is the highest-numbered register in the newly-bound mask.
    } // end color available case

    //---------------
    // Live range is live and no colors available
    else {
      assert( lrg->alive(), "" );
      assert( !lrg->_fat_proj || lrg->is_multidef() ||
              lrg->_def->outcnt() > 0, "fat_proj cannot spill");
      assert( !orig_mask.is_AllStack(), "All Stack does not spill" );

      // Assign the special spillreg register
      lrg->set_reg(OptoReg::Name(spill_reg++));
      // Do not empty the regmask; leave mask_size lying around
      // for use during Spilling
#ifndef PRODUCT
      if( trace_spilling() ) {
        ttyLocker ttyl;
        tty->print("L%d spilling with neighbors: ", lidx);
        s->dump();
        debug_only(tty->print(" original mask: "));
        debug_only(orig_mask.dump());
        dump_lrg(lidx);
      }
#endif
    } // end spill case

  }

  return spill_reg-LRG::SPILL_REG;      // Return number of spills
}

// Copy 'was_spilled'-edness from the source Node to the dst Node.
void PhaseChaitin::copy_was_spilled( Node *src, Node *dst ) {
  if( _spilled_once.test(src->_idx) ) {
    _spilled_once.set(dst->_idx);
    lrgs(_lrg_map.find(dst))._was_spilled1 = 1;
    if( _spilled_twice.test(src->_idx) ) {
      _spilled_twice.set(dst->_idx);
      lrgs(_lrg_map.find(dst))._was_spilled2 = 1;
    }
  }
}

// Set the 'spilled_once' or 'spilled_twice' flag on a node.
void PhaseChaitin::set_was_spilled( Node *n ) {
  if( _spilled_once.test_set(n->_idx) )
    _spilled_twice.set(n->_idx);
}

// Convert Ideal spill instructions into proper FramePtr + offset Loads and
// Stores.  Use-def chains are NOT preserved, but Node->LRG->reg maps are.
void PhaseChaitin::fixup_spills() {
  // This function does only cisc spill work.
  if( !UseCISCSpill ) return;

  NOT_PRODUCT( Compile::TracePhase t3("fixupSpills", &_t_fixupSpills, TimeCompiler); )

  // Grab the Frame Pointer
  Node *fp = _cfg.get_root_block()->head()->in(1)->in(TypeFunc::FramePtr);

  // For all blocks
  for (uint i = 0; i < _cfg.number_of_blocks(); i++) {
    Block* block = _cfg.get_block(i);

    // For all instructions in block
    uint last_inst = block->end_idx();
    for (uint j = 1; j <= last_inst; j++) {
      Node* n = block->get_node(j);

      // Dead instruction???
      assert( n->outcnt() != 0 ||// Nothing dead after post alloc
              C->top() == n ||  // Or the random TOP node
              n->is_Proj(),     // Or a fat-proj kill node
              "No dead instructions after post-alloc" );

      int inp = n->cisc_operand();
      if( inp != AdlcVMDeps::Not_cisc_spillable ) {
        // Convert operand number to edge index number
        MachNode *mach = n->as_Mach();
        inp = mach->operand_index(inp);
        Node *src = n->in(inp);   // Value to load or store
        LRG &lrg_cisc = lrgs(_lrg_map.find_const(src));
        OptoReg::Name src_reg = lrg_cisc.reg();
        // Doubles record the HIGH register of an adjacent pair.
        src_reg = OptoReg::add(src_reg,1-lrg_cisc.num_regs());
        if( OptoReg::is_stack(src_reg) ) { // If input is on stack
          // This is a CISC Spill, get stack offset and construct new node
#ifndef PRODUCT
          if( TraceCISCSpill ) {
            tty->print("    reg-instr:  ");
            n->dump();
          }
#endif
          int stk_offset = reg2offset(src_reg);
          // Bailout if we might exceed node limit when spilling this instruction
          C->check_node_count(0, "out of nodes fixing spills");
          if (C->failing())  return;
          // Transform node
          MachNode *cisc = mach->cisc_version(stk_offset, C)->as_Mach();
          cisc->set_req(inp,fp);          // Base register is frame pointer
          if( cisc->oper_input_base() > 1 && mach->oper_input_base() <= 1 ) {
            assert( cisc->oper_input_base() == 2, "Only adding one edge");
            cisc->ins_req(1,src);         // Requires a memory edge
          }
          block->map_node(cisc, j);          // Insert into basic block
          n->subsume_by(cisc, C); // Correct graph
          //
          ++_used_cisc_instructions;
#ifndef PRODUCT
          if( TraceCISCSpill ) {
            tty->print("    cisc-instr: ");
            cisc->dump();
          }
#endif
        } else {
#ifndef PRODUCT
          if( TraceCISCSpill ) {
            tty->print("    using reg-instr: ");
            n->dump();
          }
#endif
          ++_unused_cisc_instructions;    // input can be on stack
        }
      }

    } // End of for all instructions

  } // End of for all blocks
}

// Helper to stretch above; recursively discover the base Node for a
// given derived Node.  Easy for AddP-related machine nodes, but needs
// to be recursive for derived Phis.
Node *PhaseChaitin::find_base_for_derived( Node **derived_base_map, Node *derived, uint &maxlrg ) {
  // See if already computed; if so return it
  if( derived_base_map[derived->_idx] )
    return derived_base_map[derived->_idx];

  // See if this happens to be a base.
  // NOTE: we use TypePtr instead of TypeOopPtr because we can have
  // pointers derived from NULL!  These are always along paths that
  // can't happen at run-time but the optimizer cannot deduce it so
  // we have to handle it gracefully.
  assert(!derived->bottom_type()->isa_narrowoop() ||
          derived->bottom_type()->make_ptr()->is_ptr()->_offset == 0, "sanity");
  const TypePtr *tj = derived->bottom_type()->isa_ptr();
  // If its an OOP with a non-zero offset, then it is derived.
  if( tj == NULL || tj->_offset == 0 ) {
    derived_base_map[derived->_idx] = derived;
    return derived;
  }
  // Derived is NULL+offset?  Base is NULL!
  if( derived->is_Con() ) {
    Node *base = _matcher.mach_null();
    assert(base != NULL, "sanity");
    if (base->in(0) == NULL) {
      // Initialize it once and make it shared:
      // set control to _root and place it into Start block
      // (where top() node is placed).
      base->init_req(0, _cfg.get_root_node());
      Block *startb = _cfg.get_block_for_node(C->top());
      uint node_pos = startb->find_node(C->top());
      startb->insert_node(base, node_pos);
      _cfg.map_node_to_block(base, startb);
      assert(_lrg_map.live_range_id(base) == 0, "should not have LRG yet");

      // The loadConP0 might have projection nodes depending on architecture
      // Add the projection nodes to the CFG
      for (DUIterator_Fast imax, i = base->fast_outs(imax); i < imax; i++) {
        Node* use = base->fast_out(i);
        if (use->is_MachProj()) {
          startb->insert_node(use, ++node_pos);
          _cfg.map_node_to_block(use, startb);
          new_lrg(use, maxlrg++);
        }
      }
    }
    if (_lrg_map.live_range_id(base) == 0) {
      new_lrg(base, maxlrg++);
    }
    assert(base->in(0) == _cfg.get_root_node() && _cfg.get_block_for_node(base) == _cfg.get_block_for_node(C->top()), "base NULL should be shared");
    derived_base_map[derived->_idx] = base;
    return base;
  }

  // Check for AddP-related opcodes
  if (!derived->is_Phi()) {
    assert(derived->as_Mach()->ideal_Opcode() == Op_AddP, err_msg_res("but is: %s", derived->Name()));
    Node *base = derived->in(AddPNode::Base);
    derived_base_map[derived->_idx] = base;
    return base;
  }

  // Recursively find bases for Phis.
  // First check to see if we can avoid a base Phi here.
  Node *base = find_base_for_derived( derived_base_map, derived->in(1),maxlrg);
  uint i;
  for( i = 2; i < derived->req(); i++ )
    if( base != find_base_for_derived( derived_base_map,derived->in(i),maxlrg))
      break;
  // Went to the end without finding any different bases?
  if( i == derived->req() ) {   // No need for a base Phi here
    derived_base_map[derived->_idx] = base;
    return base;
  }

  // Now we see we need a base-Phi here to merge the bases
  const Type *t = base->bottom_type();
  base = new (C) PhiNode( derived->in(0), t );
  for( i = 1; i < derived->req(); i++ ) {
    base->init_req(i, find_base_for_derived(derived_base_map, derived->in(i), maxlrg));
    t = t->meet(base->in(i)->bottom_type());
  }
  base->as_Phi()->set_type(t);

  // Search the current block for an existing base-Phi
  Block *b = _cfg.get_block_for_node(derived);
  for( i = 1; i <= b->end_idx(); i++ ) {// Search for matching Phi
    Node *phi = b->get_node(i);
    if( !phi->is_Phi() ) {      // Found end of Phis with no match?
      b->insert_node(base,  i); // Must insert created Phi here as base
      _cfg.map_node_to_block(base, b);
      new_lrg(base,maxlrg++);
      break;
    }
    // See if Phi matches.
    uint j;
    for( j = 1; j < base->req(); j++ )
      if( phi->in(j) != base->in(j) &&
          !(phi->in(j)->is_Con() && base->in(j)->is_Con()) ) // allow different NULLs
        break;
    if( j == base->req() ) {    // All inputs match?
      base = phi;               // Then use existing 'phi' and drop 'base'
      break;
    }
  }


  // Cache info for later passes
  derived_base_map[derived->_idx] = base;
  return base;
}

// At each Safepoint, insert extra debug edges for each pair of derived value/
// base pointer that is live across the Safepoint for oopmap building.  The
// edge pairs get added in after sfpt->jvmtail()->oopoff(), but are in the
// required edge set.
bool PhaseChaitin::stretch_base_pointer_live_ranges(ResourceArea *a) {
  int must_recompute_live = false;
  uint maxlrg = _lrg_map.max_lrg_id();
  Node **derived_base_map = (Node**)a->Amalloc(sizeof(Node*)*C->unique());
  memset( derived_base_map, 0, sizeof(Node*)*C->unique() );

  // For all blocks in RPO do...
  for (uint i = 0; i < _cfg.number_of_blocks(); i++) {
    Block* block = _cfg.get_block(i);
    // Note use of deep-copy constructor.  I cannot hammer the original
    // liveout bits, because they are needed by the following coalesce pass.
    IndexSet liveout(_live->live(block));

    for (uint j = block->end_idx() + 1; j > 1; j--) {
      Node* n = block->get_node(j - 1);

      // Pre-split compares of loop-phis.  Loop-phis form a cycle we would
      // like to see in the same register.  Compare uses the loop-phi and so
      // extends its live range BUT cannot be part of the cycle.  If this
      // extended live range overlaps with the update of the loop-phi value
      // we need both alive at the same time -- which requires at least 1
      // copy.  But because Intel has only 2-address registers we end up with
      // at least 2 copies, one before the loop-phi update instruction and
      // one after.  Instead we split the input to the compare just after the
      // phi.
      if( n->is_Mach() && n->as_Mach()->ideal_Opcode() == Op_CmpI ) {
        Node *phi = n->in(1);
        if( phi->is_Phi() && phi->as_Phi()->region()->is_Loop() ) {
          Block *phi_block = _cfg.get_block_for_node(phi);
          if (_cfg.get_block_for_node(phi_block->pred(2)) == block) {
            const RegMask *mask = C->matcher()->idealreg2spillmask[Op_RegI];
            Node *spill = new (C) MachSpillCopyNode( phi, *mask, *mask );
            insert_proj( phi_block, 1, spill, maxlrg++ );
            n->set_req(1,spill);
            must_recompute_live = true;
          }
        }
      }

      // Get value being defined
      uint lidx = _lrg_map.live_range_id(n);
      // Ignore the occasional brand-new live range
      if (lidx && lidx < _lrg_map.max_lrg_id()) {
        // Remove from live-out set
        liveout.remove(lidx);

        // Copies do not define a new value and so do not interfere.
        // Remove the copies source from the liveout set before interfering.
        uint idx = n->is_Copy();
        if (idx) {
          liveout.remove(_lrg_map.live_range_id(n->in(idx)));
        }
      }

      // Found a safepoint?
      JVMState *jvms = n->jvms();
      if( jvms ) {
        // Now scan for a live derived pointer
        IndexSetIterator elements(&liveout);
        uint neighbor;
        while ((neighbor = elements.next()) != 0) {
          // Find reaching DEF for base and derived values
          // This works because we are still in SSA during this call.
          Node *derived = lrgs(neighbor)._def;
          const TypePtr *tj = derived->bottom_type()->isa_ptr();
          assert(!derived->bottom_type()->isa_narrowoop() ||
                  derived->bottom_type()->make_ptr()->is_ptr()->_offset == 0, "sanity");
          // If its an OOP with a non-zero offset, then it is derived.
          if( tj && tj->_offset != 0 && tj->isa_oop_ptr() ) {
            Node *base = find_base_for_derived(derived_base_map, derived, maxlrg);
            assert(base->_idx < _lrg_map.size(), "");
            // Add reaching DEFs of derived pointer and base pointer as a
            // pair of inputs
            n->add_req(derived);
            n->add_req(base);

            // See if the base pointer is already live to this point.
            // Since I'm working on the SSA form, live-ness amounts to
            // reaching def's.  So if I find the base's live range then
            // I know the base's def reaches here.
            if ((_lrg_map.live_range_id(base) >= _lrg_map.max_lrg_id() || // (Brand new base (hence not live) or
                 !liveout.member(_lrg_map.live_range_id(base))) && // not live) AND
                 (_lrg_map.live_range_id(base) > 0) && // not a constant
                 _cfg.get_block_for_node(base) != block) { // base not def'd in blk)
              // Base pointer is not currently live.  Since I stretched
              // the base pointer to here and it crosses basic-block
              // boundaries, the global live info is now incorrect.
              // Recompute live.
              must_recompute_live = true;
            } // End of if base pointer is not live to debug info
          }
        } // End of scan all live data for derived ptrs crossing GC point
      } // End of if found a GC point

      // Make all inputs live
      if (!n->is_Phi()) {      // Phi function uses come from prior block
        for (uint k = 1; k < n->req(); k++) {
          uint lidx = _lrg_map.live_range_id(n->in(k));
          if (lidx < _lrg_map.max_lrg_id()) {
            liveout.insert(lidx);
          }
        }
      }

    } // End of forall instructions in block
    liveout.clear();  // Free the memory used by liveout.

  } // End of forall blocks
  _lrg_map.set_max_lrg_id(maxlrg);

  // If I created a new live range I need to recompute live
  if (maxlrg != _ifg->_maxlrg) {
    must_recompute_live = true;
  }

  return must_recompute_live != 0;
}

// Extend the node to LRG mapping

void PhaseChaitin::add_reference(const Node *node, const Node *old_node) {
  _lrg_map.extend(node->_idx, _lrg_map.live_range_id(old_node));
}

#ifndef PRODUCT
void PhaseChaitin::dump(const Node *n) const {
  uint r = (n->_idx < _lrg_map.size()) ? _lrg_map.find_const(n) : 0;
  tty->print("L%d",r);
  if (r && n->Opcode() != Op_Phi) {
    if( _node_regs ) {          // Got a post-allocation copy of allocation?
      tty->print("[");
      OptoReg::Name second = get_reg_second(n);
      if( OptoReg::is_valid(second) ) {
        if( OptoReg::is_reg(second) )
          tty->print("%s:",Matcher::regName[second]);
        else
          tty->print("%s+%d:",OptoReg::regname(OptoReg::c_frame_pointer), reg2offset_unchecked(second));
      }
      OptoReg::Name first = get_reg_first(n);
      if( OptoReg::is_reg(first) )
        tty->print("%s]",Matcher::regName[first]);
      else
         tty->print("%s+%d]",OptoReg::regname(OptoReg::c_frame_pointer), reg2offset_unchecked(first));
    } else
    n->out_RegMask().dump();
  }
  tty->print("/N%d\t",n->_idx);
  tty->print("%s === ", n->Name());
  uint k;
  for (k = 0; k < n->req(); k++) {
    Node *m = n->in(k);
    if (!m) {
      tty->print("_ ");
    }
    else {
      uint r = (m->_idx < _lrg_map.size()) ? _lrg_map.find_const(m) : 0;
      tty->print("L%d",r);
      // Data MultiNode's can have projections with no real registers.
      // Don't die while dumping them.
      int op = n->Opcode();
      if( r && op != Op_Phi && op != Op_Proj && op != Op_SCMemProj) {
        if( _node_regs ) {
          tty->print("[");
          OptoReg::Name second = get_reg_second(n->in(k));
          if( OptoReg::is_valid(second) ) {
            if( OptoReg::is_reg(second) )
              tty->print("%s:",Matcher::regName[second]);
            else
              tty->print("%s+%d:",OptoReg::regname(OptoReg::c_frame_pointer),
                         reg2offset_unchecked(second));
          }
          OptoReg::Name first = get_reg_first(n->in(k));
          if( OptoReg::is_reg(first) )
            tty->print("%s]",Matcher::regName[first]);
          else
            tty->print("%s+%d]",OptoReg::regname(OptoReg::c_frame_pointer),
                       reg2offset_unchecked(first));
        } else
          n->in_RegMask(k).dump();
      }
      tty->print("/N%d ",m->_idx);
    }
  }
  if( k < n->len() && n->in(k) ) tty->print("| ");
  for( ; k < n->len(); k++ ) {
    Node *m = n->in(k);
    if(!m) {
      break;
    }
    uint r = (m->_idx < _lrg_map.size()) ? _lrg_map.find_const(m) : 0;
    tty->print("L%d",r);
    tty->print("/N%d ",m->_idx);
  }
  if( n->is_Mach() ) n->as_Mach()->dump_spec(tty);
  else n->dump_spec(tty);
  if( _spilled_once.test(n->_idx ) ) {
    tty->print(" Spill_1");
    if( _spilled_twice.test(n->_idx ) )
      tty->print(" Spill_2");
  }
  tty->print("\n");
}

void PhaseChaitin::dump(const Block *b) const {
  b->dump_head(&_cfg);

  // For all instructions
  for( uint j = 0; j < b->number_of_nodes(); j++ )
    dump(b->get_node(j));
  // Print live-out info at end of block
  if( _live ) {
    tty->print("Liveout: ");
    IndexSet *live = _live->live(b);
    IndexSetIterator elements(live);
    tty->print("{");
    uint i;
    while ((i = elements.next()) != 0) {
      tty->print("L%d ", _lrg_map.find_const(i));
    }
    tty->print_cr("}");
  }
  tty->print("\n");
}

void PhaseChaitin::dump() const {
  tty->print( "--- Chaitin -- argsize: %d  framesize: %d ---\n",
              _matcher._new_SP, _framesize );

  // For all blocks
  for (uint i = 0; i < _cfg.number_of_blocks(); i++) {
    dump(_cfg.get_block(i));
  }
  // End of per-block dump
  tty->print("\n");

  if (!_ifg) {
    tty->print("(No IFG.)\n");
    return;
  }

  // Dump LRG array
  tty->print("--- Live RanGe Array ---\n");
  for (uint i2 = 1; i2 < _lrg_map.max_lrg_id(); i2++) {
    tty->print("L%d: ",i2);
    if (i2 < _ifg->_maxlrg) {
      lrgs(i2).dump();
    }
    else {
      tty->print_cr("new LRG");
    }
  }
  tty->cr();

  // Dump lo-degree list
  tty->print("Lo degree: ");
  for(uint i3 = _lo_degree; i3; i3 = lrgs(i3)._next )
    tty->print("L%d ",i3);
  tty->cr();

  // Dump lo-stk-degree list
  tty->print("Lo stk degree: ");
  for(uint i4 = _lo_stk_degree; i4; i4 = lrgs(i4)._next )
    tty->print("L%d ",i4);
  tty->cr();

  // Dump lo-degree list
  tty->print("Hi degree: ");
  for(uint i5 = _hi_degree; i5; i5 = lrgs(i5)._next )
    tty->print("L%d ",i5);
  tty->cr();
}

void PhaseChaitin::dump_degree_lists() const {
  // Dump lo-degree list
  tty->print("Lo degree: ");
  for( uint i = _lo_degree; i; i = lrgs(i)._next )
    tty->print("L%d ",i);
  tty->cr();

  // Dump lo-stk-degree list
  tty->print("Lo stk degree: ");
  for(uint i2 = _lo_stk_degree; i2; i2 = lrgs(i2)._next )
    tty->print("L%d ",i2);
  tty->cr();

  // Dump lo-degree list
  tty->print("Hi degree: ");
  for(uint i3 = _hi_degree; i3; i3 = lrgs(i3)._next )
    tty->print("L%d ",i3);
  tty->cr();
}

void PhaseChaitin::dump_simplified() const {
  tty->print("Simplified: ");
  for( uint i = _simplified; i; i = lrgs(i)._next )
    tty->print("L%d ",i);
  tty->cr();
}

static char *print_reg( OptoReg::Name reg, const PhaseChaitin *pc, char *buf ) {
  if ((int)reg < 0)
    sprintf(buf, "<OptoReg::%d>", (int)reg);
  else if (OptoReg::is_reg(reg))
    strcpy(buf, Matcher::regName[reg]);
  else
    sprintf(buf,"%s + #%d",OptoReg::regname(OptoReg::c_frame_pointer),
            pc->reg2offset(reg));
  return buf+strlen(buf);
}

// Dump a register name into a buffer.  Be intelligent if we get called
// before allocation is complete.
char *PhaseChaitin::dump_register( const Node *n, char *buf  ) const {
  if( this == NULL ) {          // Not got anything?
    sprintf(buf,"N%d",n->_idx); // Then use Node index
  } else if( _node_regs ) {
    // Post allocation, use direct mappings, no LRG info available
    print_reg( get_reg_first(n), this, buf );
  } else {
    uint lidx = _lrg_map.find_const(n); // Grab LRG number
    if( !_ifg ) {
      sprintf(buf,"L%d",lidx);  // No register binding yet
    } else if( !lidx ) {        // Special, not allocated value
      strcpy(buf,"Special");
    } else {
      if (lrgs(lidx)._is_vector) {
        if (lrgs(lidx).mask().is_bound_set(lrgs(lidx).num_regs()))
          print_reg( lrgs(lidx).reg(), this, buf ); // a bound machine register
        else
          sprintf(buf,"L%d",lidx); // No register binding yet
      } else if( (lrgs(lidx).num_regs() == 1)
                 ? lrgs(lidx).mask().is_bound1()
                 : lrgs(lidx).mask().is_bound_pair() ) {
        // Hah!  We have a bound machine register
        print_reg( lrgs(lidx).reg(), this, buf );
      } else {
        sprintf(buf,"L%d",lidx); // No register binding yet
      }
    }
  }
  return buf+strlen(buf);
}

void PhaseChaitin::dump_for_spill_split_recycle() const {
  if( WizardMode && (PrintCompilation || PrintOpto) ) {
    // Display which live ranges need to be split and the allocator's state
    tty->print_cr("Graph-Coloring Iteration %d will split the following live ranges", _trip_cnt);
    for (uint bidx = 1; bidx < _lrg_map.max_lrg_id(); bidx++) {
      if( lrgs(bidx).alive() && lrgs(bidx).reg() >= LRG::SPILL_REG ) {
        tty->print("L%d: ", bidx);
        lrgs(bidx).dump();
      }
    }
    tty->cr();
    dump();
  }
}

void PhaseChaitin::dump_frame() const {
  const char *fp = OptoReg::regname(OptoReg::c_frame_pointer);
  const TypeTuple *domain = C->tf()->domain();
  const int        argcnt = domain->cnt() - TypeFunc::Parms;

  // Incoming arguments in registers dump
  for( int k = 0; k < argcnt; k++ ) {
    OptoReg::Name parmreg = _matcher._parm_regs[k].first();
    if( OptoReg::is_reg(parmreg))  {
      const char *reg_name = OptoReg::regname(parmreg);
      tty->print("#r%3.3d %s", parmreg, reg_name);
      parmreg = _matcher._parm_regs[k].second();
      if( OptoReg::is_reg(parmreg))  {
        tty->print(":%s", OptoReg::regname(parmreg));
      }
      tty->print("   : parm %d: ", k);
      domain->field_at(k + TypeFunc::Parms)->dump();
      tty->cr();
    }
  }

  // Check for un-owned padding above incoming args
  OptoReg::Name reg = _matcher._new_SP;
  if( reg > _matcher._in_arg_limit ) {
    reg = OptoReg::add(reg, -1);
    tty->print_cr("#r%3.3d %s+%2d: pad0, owned by CALLER", reg, fp, reg2offset_unchecked(reg));
  }

  // Incoming argument area dump
  OptoReg::Name begin_in_arg = OptoReg::add(_matcher._old_SP,C->out_preserve_stack_slots());
  while( reg > begin_in_arg ) {
    reg = OptoReg::add(reg, -1);
    tty->print("#r%3.3d %s+%2d: ",reg,fp,reg2offset_unchecked(reg));
    int j;
    for( j = 0; j < argcnt; j++) {
      if( _matcher._parm_regs[j].first() == reg ||
          _matcher._parm_regs[j].second() == reg ) {
        tty->print("parm %d: ",j);
        domain->field_at(j + TypeFunc::Parms)->dump();
        tty->cr();
        break;
      }
    }
    if( j >= argcnt )
      tty->print_cr("HOLE, owned by SELF");
  }

  // Old outgoing preserve area
  while( reg > _matcher._old_SP ) {
    reg = OptoReg::add(reg, -1);
    tty->print_cr("#r%3.3d %s+%2d: old out preserve",reg,fp,reg2offset_unchecked(reg));
  }

  // Old SP
  tty->print_cr("# -- Old %s -- Framesize: %d --",fp,
    reg2offset_unchecked(OptoReg::add(_matcher._old_SP,-1)) - reg2offset_unchecked(_matcher._new_SP)+jintSize);

  // Preserve area dump
  int fixed_slots = C->fixed_slots();
  OptoReg::Name begin_in_preserve = OptoReg::add(_matcher._old_SP, -(int)C->in_preserve_stack_slots());
  OptoReg::Name return_addr = _matcher.return_addr();

  reg = OptoReg::add(reg, -1);
  while (OptoReg::is_stack(reg)) {
    tty->print("#r%3.3d %s+%2d: ",reg,fp,reg2offset_unchecked(reg));
    if (return_addr == reg) {
      tty->print_cr("return address");
    } else if (reg >= begin_in_preserve) {
      // Preserved slots are present on x86
      if (return_addr == OptoReg::add(reg, VMRegImpl::slots_per_word))
        tty->print_cr("saved fp register");
      else if (return_addr == OptoReg::add(reg, 2*VMRegImpl::slots_per_word) &&
               VerifyStackAtCalls)
        tty->print_cr("0xBADB100D   +VerifyStackAtCalls");
      else
        tty->print_cr("in_preserve");
    } else if ((int)OptoReg::reg2stack(reg) < fixed_slots) {
      tty->print_cr("Fixed slot %d", OptoReg::reg2stack(reg));
    } else {
      tty->print_cr("pad2, stack alignment");
    }
    reg = OptoReg::add(reg, -1);
  }

  // Spill area dump
  reg = OptoReg::add(_matcher._new_SP, _framesize );
  while( reg > _matcher._out_arg_limit ) {
    reg = OptoReg::add(reg, -1);
    tty->print_cr("#r%3.3d %s+%2d: spill",reg,fp,reg2offset_unchecked(reg));
  }

  // Outgoing argument area dump
  while( reg > OptoReg::add(_matcher._new_SP, C->out_preserve_stack_slots()) ) {
    reg = OptoReg::add(reg, -1);
    tty->print_cr("#r%3.3d %s+%2d: outgoing argument",reg,fp,reg2offset_unchecked(reg));
  }

  // Outgoing new preserve area
  while( reg > _matcher._new_SP ) {
    reg = OptoReg::add(reg, -1);
    tty->print_cr("#r%3.3d %s+%2d: new out preserve",reg,fp,reg2offset_unchecked(reg));
  }
  tty->print_cr("#");
}

void PhaseChaitin::dump_bb( uint pre_order ) const {
  tty->print_cr("---dump of B%d---",pre_order);
  for (uint i = 0; i < _cfg.number_of_blocks(); i++) {
    Block* block = _cfg.get_block(i);
    if (block->_pre_order == pre_order) {
      dump(block);
    }
  }
}

void PhaseChaitin::dump_lrg( uint lidx, bool defs_only ) const {
  tty->print_cr("---dump of L%d---",lidx);

  if (_ifg) {
    if (lidx >= _lrg_map.max_lrg_id()) {
      tty->print("Attempt to print live range index beyond max live range.\n");
      return;
    }
    tty->print("L%d: ",lidx);
    if (lidx < _ifg->_maxlrg) {
      lrgs(lidx).dump();
    } else {
      tty->print_cr("new LRG");
    }
  }
  if( _ifg && lidx < _ifg->_maxlrg) {
    tty->print("Neighbors: %d - ", _ifg->neighbor_cnt(lidx));
    _ifg->neighbors(lidx)->dump();
    tty->cr();
  }
  // For all blocks
  for (uint i = 0; i < _cfg.number_of_blocks(); i++) {
    Block* block = _cfg.get_block(i);
    int dump_once = 0;

    // For all instructions
    for( uint j = 0; j < block->number_of_nodes(); j++ ) {
      Node *n = block->get_node(j);
      if (_lrg_map.find_const(n) == lidx) {
        if (!dump_once++) {
          tty->cr();
          block->dump_head(&_cfg);
        }
        dump(n);
        continue;
      }
      if (!defs_only) {
        uint cnt = n->req();
        for( uint k = 1; k < cnt; k++ ) {
          Node *m = n->in(k);
          if (!m)  {
            continue;  // be robust in the dumper
          }
          if (_lrg_map.find_const(m) == lidx) {
            if (!dump_once++) {
              tty->cr();
              block->dump_head(&_cfg);
            }
            dump(n);
          }
        }
      }
    }
  } // End of per-block dump
  tty->cr();
}
#endif // not PRODUCT

int PhaseChaitin::_final_loads  = 0;
int PhaseChaitin::_final_stores = 0;
int PhaseChaitin::_final_memoves= 0;
int PhaseChaitin::_final_copies = 0;
double PhaseChaitin::_final_load_cost  = 0;
double PhaseChaitin::_final_store_cost = 0;
double PhaseChaitin::_final_memove_cost= 0;
double PhaseChaitin::_final_copy_cost  = 0;
int PhaseChaitin::_conserv_coalesce = 0;
int PhaseChaitin::_conserv_coalesce_pair = 0;
int PhaseChaitin::_conserv_coalesce_trie = 0;
int PhaseChaitin::_conserv_coalesce_quad = 0;
int PhaseChaitin::_post_alloc = 0;
int PhaseChaitin::_lost_opp_pp_coalesce = 0;
int PhaseChaitin::_lost_opp_cflow_coalesce = 0;
int PhaseChaitin::_used_cisc_instructions   = 0;
int PhaseChaitin::_unused_cisc_instructions = 0;
int PhaseChaitin::_allocator_attempts       = 0;
int PhaseChaitin::_allocator_successes      = 0;

#ifndef PRODUCT
uint PhaseChaitin::_high_pressure           = 0;
uint PhaseChaitin::_low_pressure            = 0;

void PhaseChaitin::print_chaitin_statistics() {
  tty->print_cr("Inserted %d spill loads, %d spill stores, %d mem-mem moves and %d copies.", _final_loads, _final_stores, _final_memoves, _final_copies);
  tty->print_cr("Total load cost= %6.0f, store cost = %6.0f, mem-mem cost = %5.2f, copy cost = %5.0f.", _final_load_cost, _final_store_cost, _final_memove_cost, _final_copy_cost);
  tty->print_cr("Adjusted spill cost = %7.0f.",
                _final_load_cost*4.0 + _final_store_cost  * 2.0 +
                _final_copy_cost*1.0 + _final_memove_cost*12.0);
  tty->print("Conservatively coalesced %d copies, %d pairs",
                _conserv_coalesce, _conserv_coalesce_pair);
  if( _conserv_coalesce_trie || _conserv_coalesce_quad )
    tty->print(", %d tries, %d quads", _conserv_coalesce_trie, _conserv_coalesce_quad);
  tty->print_cr(", %d post alloc.", _post_alloc);
  if( _lost_opp_pp_coalesce || _lost_opp_cflow_coalesce )
    tty->print_cr("Lost coalesce opportunity, %d private-private, and %d cflow interfered.",
                  _lost_opp_pp_coalesce, _lost_opp_cflow_coalesce );
  if( _used_cisc_instructions || _unused_cisc_instructions )
    tty->print_cr("Used cisc instruction  %d,  remained in register %d",
                   _used_cisc_instructions, _unused_cisc_instructions);
  if( _allocator_successes != 0 )
    tty->print_cr("Average allocation trips %f", (float)_allocator_attempts/(float)_allocator_successes);
  tty->print_cr("High Pressure Blocks = %d, Low Pressure Blocks = %d", _high_pressure, _low_pressure);
}
#endif // not PRODUCT
C:\hotspot-69087d08d473\src\share\vm/opto/chaitin.hpp
/*
 * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 *
 */

#ifndef SHARE_VM_OPTO_CHAITIN_HPP
#define SHARE_VM_OPTO_CHAITIN_HPP

#include "code/vmreg.hpp"
#include "libadt/port.hpp"
#include "memory/resourceArea.hpp"
#include "opto/connode.hpp"
#include "opto/live.hpp"
#include "opto/matcher.hpp"
#include "opto/phase.hpp"
#include "opto/regalloc.hpp"
#include "opto/regmask.hpp"

class LoopTree;
class MachCallNode;
class MachSafePointNode;
class Matcher;
class PhaseCFG;
class PhaseLive;
class PhaseRegAlloc;
class   PhaseChaitin;

#define OPTO_DEBUG_SPLIT_FREQ  BLOCK_FREQUENCY(0.001)
#define OPTO_LRG_HIGH_FREQ     BLOCK_FREQUENCY(0.25)

//------------------------------LRG--------------------------------------------
// Live-RanGe structure.
class LRG : public ResourceObj {
  friend class VMStructs;
public:
  static const uint AllStack_size = 0xFFFFF; // This mask size is used to tell that the mask of this LRG supports stack positions
  enum { SPILL_REG=29999 };     // Register number of a spilled LRG

  double _cost;                 // 2 for loads/1 for stores times block freq
  double _area;                 // Sum of all simultaneously live values
  double score() const;         // Compute score from cost and area
  double _maxfreq;              // Maximum frequency of any def or use

  Node *_def;                   // Check for multi-def live ranges
#ifndef PRODUCT
  GrowableArray<Node*>* _defs;
#endif

  uint _risk_bias;              // Index of LRG which we want to avoid color
  uint _copy_bias;              // Index of LRG which we want to share color

  uint _next;                   // Index of next LRG in linked list
  uint _prev;                   // Index of prev LRG in linked list
private:
  uint _reg;                    // Chosen register; undefined if mask is plural
public:
  // Return chosen register for this LRG.  Error if the LRG is not bound to
  // a single register.
  OptoReg::Name reg() const { return OptoReg::Name(_reg); }
  void set_reg( OptoReg::Name r ) { _reg = r; }

private:
  uint _eff_degree;             // Effective degree: Sum of neighbors _num_regs
public:
  int degree() const { assert( _degree_valid , "" ); return _eff_degree; }
  // Degree starts not valid and any change to the IFG neighbor
  // set makes it not valid.
  void set_degree( uint degree ) {
    _eff_degree = degree;
    debug_only(_degree_valid = 1;)
    assert(!_mask.is_AllStack() || (_mask.is_AllStack() && lo_degree()), "_eff_degree can't be bigger than AllStack_size - _num_regs if the mask supports stack registers");
  }
  // Made a change that hammered degree
  void invalid_degree() { debug_only(_degree_valid=0;) }
  // Incrementally modify degree.  If it was correct, it should remain correct
  void inc_degree( uint mod ) {
    _eff_degree += mod;
    assert(!_mask.is_AllStack() || (_mask.is_AllStack() && lo_degree()), "_eff_degree can't be bigger than AllStack_size - _num_regs if the mask supports stack registers");
  }
  // Compute the degree between 2 live ranges
  int compute_degree( LRG &l ) const;

private:
  RegMask _mask;                // Allowed registers for this LRG
  uint _mask_size;              // cache of _mask.Size();
public:
  int compute_mask_size() const { return _mask.is_AllStack() ? AllStack_size : _mask.Size(); }
  void set_mask_size( int size ) {
    assert((size == (int)AllStack_size) || (size == (int)_mask.Size()), "");
    _mask_size = size;
#ifdef ASSERT
    _msize_valid=1;
    if (_is_vector) {
      assert(!_fat_proj, "sanity");
      _mask.verify_sets(_num_regs);
    } else if (_num_regs == 2 && !_fat_proj) {
      _mask.verify_pairs();
    }
#endif
  }
  void compute_set_mask_size() { set_mask_size(compute_mask_size()); }
  int mask_size() const { assert( _msize_valid, "mask size not valid" );
                          return _mask_size; }
  // Get the last mask size computed, even if it does not match the
  // count of bits in the current mask.
  int get_invalid_mask_size() const { return _mask_size; }
  const RegMask &mask() const { return _mask; }
  void set_mask( const RegMask &rm ) { _mask = rm; debug_only(_msize_valid=0;)}
  void AND( const RegMask &rm ) { _mask.AND(rm); debug_only(_msize_valid=0;)}
  void SUBTRACT( const RegMask &rm ) { _mask.SUBTRACT(rm); debug_only(_msize_valid=0;)}
  void Clear()   { _mask.Clear()  ; debug_only(_msize_valid=1); _mask_size = 0; }
  void Set_All() { _mask.Set_All(); debug_only(_msize_valid=1); _mask_size = RegMask::CHUNK_SIZE; }
  void Insert( OptoReg::Name reg ) { _mask.Insert(reg);  debug_only(_msize_valid=0;) }
  void Remove( OptoReg::Name reg ) { _mask.Remove(reg);  debug_only(_msize_valid=0;) }
  void clear_to_pairs() { _mask.clear_to_pairs(); debug_only(_msize_valid=0;) }
  void clear_to_sets()  { _mask.clear_to_sets(_num_regs); debug_only(_msize_valid=0;) }

  // Number of registers this live range uses when it colors
private:
  uint8 _num_regs;              // 2 for Longs and Doubles, 1 for all else
                                // except _num_regs is kill count for fat_proj
public:
  int num_regs() const { return _num_regs; }
  void set_num_regs( int reg ) { assert( _num_regs == reg || !_num_regs, "" ); _num_regs = reg; }

private:
  // Number of physical registers this live range uses when it colors
  // Architecture and register-set dependent
  uint8 _reg_pressure;
public:
  void set_reg_pressure(int i)  { _reg_pressure = i; }
  int      reg_pressure() const { return _reg_pressure; }

  // How much 'wiggle room' does this live range have?
  // How many color choices can it make (scaled by _num_regs)?
  int degrees_of_freedom() const { return mask_size() - _num_regs; }
  // Bound LRGs have ZERO degrees of freedom.  We also count
  // must_spill as bound.
  bool is_bound  () const { return _is_bound; }
  // Negative degrees-of-freedom; even with no neighbors this
  // live range must spill.
  bool not_free() const { return degrees_of_freedom() <  0; }
  // Is this live range of "low-degree"?  Trivially colorable?
  bool lo_degree () const { return degree() <= degrees_of_freedom(); }
  // Is this live range just barely "low-degree"?  Trivially colorable?
  bool just_lo_degree () const { return degree() == degrees_of_freedom(); }

  uint   _is_oop:1,             // Live-range holds an oop
         _is_float:1,           // True if in float registers
         _is_vector:1,          // True if in vector registers
         _was_spilled1:1,       // True if prior spilling on def
         _was_spilled2:1,       // True if twice prior spilling on def
         _is_bound:1,           // live range starts life with no
                                // degrees of freedom.
         _direct_conflict:1,    // True if def and use registers in conflict
         _must_spill:1,         // live range has lost all degrees of freedom
    // If _fat_proj is set, live range does NOT require aligned, adjacent
    // registers and has NO interferences.
    // If _fat_proj is clear, live range requires num_regs() to be a power of
    // 2, and it requires registers to form an aligned, adjacent set.
         _fat_proj:1,           //
         _was_lo:1,             // Was lo-degree prior to coalesce
         _msize_valid:1,        // _mask_size cache valid
         _degree_valid:1,       // _degree cache valid
         _has_copy:1,           // Adjacent to some copy instruction
         _at_risk:1;            // Simplify says this guy is at risk to spill


  // Alive if non-zero, dead if zero
  bool alive() const { return _def != NULL; }
  bool is_multidef() const { return _def == NodeSentinel; }
  bool is_singledef() const { return _def != NodeSentinel; }

#ifndef PRODUCT
  void dump( ) const;
#endif
};

//------------------------------IFG--------------------------------------------
//                         InterFerence Graph
// An undirected graph implementation.  Created with a fixed number of
// vertices.  Edges can be added & tested.  Vertices can be removed, then
// added back later with all edges intact.  Can add edges between one vertex
// and a list of other vertices.  Can union vertices (and their edges)
// together.  The IFG needs to be really really fast, and also fairly
// abstract!  It needs abstraction so I can fiddle with the implementation to
// get even more speed.
class PhaseIFG : public Phase {
  friend class VMStructs;
  // Current implementation: a triangular adjacency list.

  // Array of adjacency-lists, indexed by live-range number
  IndexSet *_adjs;

  // Assertion bit for proper use of Squaring
  bool _is_square;

  // Live range structure goes here
  LRG *_lrgs;                   // Array of LRG structures

public:
  // Largest live-range number
  uint _maxlrg;

  Arena *_arena;

  // Keep track of inserted and deleted Nodes
  VectorSet *_yanked;

  PhaseIFG( Arena *arena );
  void init( uint maxlrg );

  // Add edge between a and b.  Returns true if actually addded.
  int add_edge( uint a, uint b );

  // Add edge between a and everything in the vector
  void add_vector( uint a, IndexSet *vec );

  // Test for edge existance
  int test_edge( uint a, uint b ) const;

  // Square-up matrix for faster Union
  void SquareUp();

  // Return number of LRG neighbors
  uint neighbor_cnt( uint a ) const { return _adjs[a].count(); }
  // Union edges of b into a on Squared-up matrix
  void Union( uint a, uint b );
  // Test for edge in Squared-up matrix
  int test_edge_sq( uint a, uint b ) const;
  // Yank a Node and all connected edges from the IFG.  Be prepared to
  // re-insert the yanked Node in reverse order of yanking.  Return a
  // list of neighbors (edges) yanked.
  IndexSet *remove_node( uint a );
  // Reinsert a yanked Node
  void re_insert( uint a );
  // Return set of neighbors
  IndexSet *neighbors( uint a ) const { return &_adjs[a]; }

#ifndef PRODUCT
  // Dump the IFG
  void dump() const;
  void stats() const;
  void verify( const PhaseChaitin * ) const;
#endif

  //--------------- Live Range Accessors
  LRG &lrgs(uint idx) const { assert(idx < _maxlrg, "oob"); return _lrgs[idx]; }

  // Compute and set effective degree.  Might be folded into SquareUp().
  void Compute_Effective_Degree();

  // Compute effective degree as the sum of neighbors' _sizes.
  int effective_degree( uint lidx ) const;
};

// The LiveRangeMap class is responsible for storing node to live range id mapping.
// Each node is mapped to a live range id (a virtual register). Nodes that are
// not considered for register allocation are given live range id 0.
class LiveRangeMap VALUE_OBJ_CLASS_SPEC {

private:

  uint _max_lrg_id;

  // Union-find map.  Declared as a short for speed.
  // Indexed by live-range number, it returns the compacted live-range number
  LRG_List _uf_map;

  // Map from Nodes to live ranges
  LRG_List _names;

  // Straight out of Tarjan's union-find algorithm
  uint find_compress(const Node *node) {
    uint lrg_id = find_compress(_names.at(node->_idx));
    _names.at_put(node->_idx, lrg_id);
    return lrg_id;
  }

  uint find_compress(uint lrg);

public:

  const LRG_List& names() {
    return _names;
  }

  uint max_lrg_id() const {
    return _max_lrg_id;
  }

  void set_max_lrg_id(uint max_lrg_id) {
    _max_lrg_id = max_lrg_id;
  }

  uint size() const {
    return _names.length();
  }

  uint live_range_id(uint idx) const {
    return _names.at(idx);
  }

  uint live_range_id(const Node *node) const {
    return _names.at(node->_idx);
  }

  uint uf_live_range_id(uint lrg_id) const {
    return _uf_map.at(lrg_id);
  }

  void map(uint idx, uint lrg_id) {
    _names.at_put(idx, lrg_id);
  }

  void uf_map(uint dst_lrg_id, uint src_lrg_id) {
    _uf_map.at_put(dst_lrg_id, src_lrg_id);
  }

  void extend(uint idx, uint lrg_id) {
    _names.at_put_grow(idx, lrg_id);
  }

  void uf_extend(uint dst_lrg_id, uint src_lrg_id) {
    _uf_map.at_put_grow(dst_lrg_id, src_lrg_id);
  }

  LiveRangeMap(Arena* arena, uint unique)
  : _names(arena, unique, unique, 0)
  , _uf_map(arena, unique, unique, 0)
  , _max_lrg_id(0) {}

  uint find_id( const Node *n ) {
    uint retval = live_range_id(n);
    assert(retval == find(n),"Invalid node to lidx mapping");
    return retval;
  }

  // Reset the Union-Find map to identity
  void reset_uf_map(uint max_lrg_id);

  // Make all Nodes map directly to their final live range; no need for
  // the Union-Find mapping after this call.
  void compress_uf_map_for_nodes();

  uint find(uint lidx) {
    uint uf_lidx = _uf_map.at(lidx);
    return (uf_lidx == lidx) ? uf_lidx : find_compress(lidx);
  }

  // Convert a Node into a Live Range Index - a lidx
  uint find(const Node *node) {
    uint lidx = live_range_id(node);
    uint uf_lidx = _uf_map.at(lidx);
    return (uf_lidx == lidx) ? uf_lidx : find_compress(node);
  }

  // Like Find above, but no path compress, so bad asymptotic behavior
  uint find_const(uint lrg) const;

  // Like Find above, but no path compress, so bad asymptotic behavior
  uint find_const(const Node *node) const {
    if(node->_idx >= (uint)_names.length()) {
      return 0; // not mapped, usual for debug dump
    }
    return find_const(_names.at(node->_idx));
  }
};

//------------------------------Chaitin----------------------------------------
// Briggs-Chaitin style allocation, mostly.
class PhaseChaitin : public PhaseRegAlloc {
  friend class VMStructs;

  int _trip_cnt;
  int _alternate;

  LRG &lrgs(uint idx) const { return _ifg->lrgs(idx); }
  PhaseLive *_live;             // Liveness, used in the interference graph
  PhaseIFG *_ifg;               // Interference graph (for original chunk)
  Node_List **_lrg_nodes;       // Array of node; lists for lrgs which spill
  VectorSet _spilled_once;      // Nodes that have been spilled
  VectorSet _spilled_twice;     // Nodes that have been spilled twice

  // Combine the Live Range Indices for these 2 Nodes into a single live
  // range.  Future requests for any Node in either live range will
  // return the live range index for the combined live range.
  void Union( const Node *src, const Node *dst );

  void new_lrg( const Node *x, uint lrg );

  // Compact live ranges, removing unused ones.  Return new maxlrg.
  void compact();

  uint _lo_degree;              // Head of lo-degree LRGs list
  uint _lo_stk_degree;          // Head of lo-stk-degree LRGs list
  uint _hi_degree;              // Head of hi-degree LRGs list
  uint _simplified;             // Linked list head of simplified LRGs

  // Helper functions for Split()
  uint split_DEF( Node *def, Block *b, int loc, uint max, Node **Reachblock, Node **debug_defs, GrowableArray<uint> splits, int slidx );
  uint split_USE( Node *def, Block *b, Node *use, uint useidx, uint max, bool def_down, bool cisc_sp, GrowableArray<uint> splits, int slidx );

  //------------------------------clone_projs------------------------------------
  // After cloning some rematerialized instruction, clone any MachProj's that
  // follow it.  Example: Intel zero is XOR, kills flags.  Sparc FP constants
  // use G3 as an address temp.
  int clone_projs(Block* b, uint idx, Node* orig, Node* copy, uint& max_lrg_id);

  int clone_projs(Block* b, uint idx, Node* orig, Node* copy, LiveRangeMap& lrg_map) {
    uint max_lrg_id = lrg_map.max_lrg_id();
    int found_projs = clone_projs(b, idx, orig, copy, max_lrg_id);
    if (found_projs > 0) {
      // max_lrg_id is updated during call above
      lrg_map.set_max_lrg_id(max_lrg_id);
    }
    return found_projs;
  }

  Node *split_Rematerialize(Node *def, Block *b, uint insidx, uint &maxlrg, GrowableArray<uint> splits,
                            int slidx, uint *lrg2reach, Node **Reachblock, bool walkThru);
  // True if lidx is used before any real register is def'd in the block
  bool prompt_use( Block *b, uint lidx );
  Node *get_spillcopy_wide( Node *def, Node *use, uint uidx );
  // Insert the spill at chosen location.  Skip over any intervening Proj's or
  // Phis.  Skip over a CatchNode and projs, inserting in the fall-through block
  // instead.  Update high-pressure indices.  Create a new live range.
  void insert_proj( Block *b, uint i, Node *spill, uint maxlrg );

  bool is_high_pressure( Block *b, LRG *lrg, uint insidx );

  uint _oldphi;                 // Node index which separates pre-allocation nodes

  Block **_blks;                // Array of blocks sorted by frequency for coalescing

  float _high_frequency_lrg;    // Frequency at which LRG will be spilled for debug info

#ifndef PRODUCT
  bool _trace_spilling;
#endif

public:
  PhaseChaitin( uint unique, PhaseCFG &cfg, Matcher &matcher );
  ~PhaseChaitin() {}

  LiveRangeMap _lrg_map;

  // Do all the real work of allocate
  void Register_Allocate();

  float high_frequency_lrg() const { return _high_frequency_lrg; }

#ifndef PRODUCT
  bool trace_spilling() const { return _trace_spilling; }
#endif

private:
  // De-SSA the world.  Assign registers to Nodes.  Use the same register for
  // all inputs to a PhiNode, effectively coalescing live ranges.  Insert
  // copies as needed.
  void de_ssa();

  // Add edge between reg and everything in the vector.
  // Same as _ifg->add_vector(reg,live) EXCEPT use the RegMask
  // information to trim the set of interferences.  Return the
  // count of edges added.
  void interfere_with_live( uint reg, IndexSet *live );
  // Count register pressure for asserts
  uint count_int_pressure( IndexSet *liveout );
  uint count_float_pressure( IndexSet *liveout );

  // Build the interference graph using virtual registers only.
  // Used for aggressive coalescing.
  void build_ifg_virtual( );

  // Build the interference graph using physical registers when available.
  // That is, if 2 live ranges are simultaneously alive but in their
  // acceptable register sets do not overlap, then they do not interfere.
  uint build_ifg_physical( ResourceArea *a );

  // Gather LiveRanGe information, including register masks and base pointer/
  // derived pointer relationships.
  void gather_lrg_masks( bool mod_cisc_masks );

  // Force the bases of derived pointers to be alive at GC points.
  bool stretch_base_pointer_live_ranges( ResourceArea *a );
  // Helper to stretch above; recursively discover the base Node for
  // a given derived Node.  Easy for AddP-related machine nodes, but
  // needs to be recursive for derived Phis.
  Node *find_base_for_derived( Node **derived_base_map, Node *derived, uint &maxlrg );

  // Set the was-lo-degree bit.  Conservative coalescing should not change the
  // colorability of the graph.  If any live range was of low-degree before
  // coalescing, it should Simplify.  This call sets the was-lo-degree bit.
  void set_was_low();

  // Split live-ranges that must spill due to register conflicts (as opposed
  // to capacity spills).  Typically these are things def'd in a register
  // and used on the stack or vice-versa.
  void pre_spill();

  // Init LRG caching of degree, numregs.  Init lo_degree list.
  void cache_lrg_info( );

  // Simplify the IFG by removing LRGs of low degree with no copies
  void Pre_Simplify();

  // Simplify the IFG by removing LRGs of low degree
  void Simplify();

  // Select colors by re-inserting edges into the IFG.
  // Return TRUE if any spills occurred.
  uint Select( );
  // Helper function for select which allows biased coloring
  OptoReg::Name choose_color( LRG &lrg, int chunk );
  // Helper function which implements biasing heuristic
  OptoReg::Name bias_color( LRG &lrg, int chunk );

  // Split uncolorable live ranges
  // Return new number of live ranges
  uint Split(uint maxlrg, ResourceArea* split_arena);

  // Copy 'was_spilled'-edness from one Node to another.
  void copy_was_spilled( Node *src, Node *dst );
  // Set the 'spilled_once' or 'spilled_twice' flag on a node.
  void set_was_spilled( Node *n );

  // Convert ideal spill-nodes into machine loads & stores
  // Set C->failing when fixup spills could not complete, node limit exceeded.
  void fixup_spills();

  // Post-Allocation peephole copy removal
  void post_allocate_copy_removal();
  Node *skip_copies( Node *c );
  // Replace the old node with the current live version of that value
  // and yank the old value if it's dead.
  int replace_and_yank_if_dead( Node *old, OptoReg::Name nreg,
                                Block *current_block, Node_List& value, Node_List& regnd ) {
    Node* v = regnd[nreg];
    assert(v->outcnt() != 0, "no dead values");
    old->replace_by(v);
    return yank_if_dead(old, current_block, &value, &regnd);
  }

  int yank_if_dead( Node *old, Block *current_block, Node_List *value, Node_List *regnd ) {
    return yank_if_dead_recurse(old, old, current_block, value, regnd);
  }
  int yank_if_dead_recurse(Node *old, Node *orig_old, Block *current_block,
                           Node_List *value, Node_List *regnd);
  int yank( Node *old, Block *current_block, Node_List *value, Node_List *regnd );
  int elide_copy( Node *n, int k, Block *current_block, Node_List &value, Node_List &regnd, bool can_change_regs );
  int use_prior_register( Node *copy, uint idx, Node *def, Block *current_block, Node_List &value, Node_List &regnd );
  bool may_be_copy_of_callee( Node *def ) const;

  // If nreg already contains the same constant as val then eliminate it
  bool eliminate_copy_of_constant(Node* val, Node* n,
                                  Block *current_block, Node_List& value, Node_List &regnd,
                                  OptoReg::Name nreg, OptoReg::Name nreg2);
  // Extend the node to LRG mapping
  void add_reference( const Node *node, const Node *old_node);

  // Record the first use of a def in the block for a register.
  class RegDefUse {
    Node* _def;
    Node* _first_use;
  public:
    RegDefUse() : _def(NULL), _first_use(NULL) { }
    Node* def() const       { return _def;       }
    Node* first_use() const { return _first_use; }

    void update(Node* def, Node* use) {
      if (_def != def) {
        _def = def;
        _first_use = use;
      }
    }
    void clear() {
      _def = NULL;
      _first_use = NULL;
    }
  };
  typedef GrowableArray<RegDefUse> RegToDefUseMap;
  int possibly_merge_multidef(Node *n, uint k, Block *block, RegToDefUseMap& reg2defuse);

  // Merge nodes that are a part of a multidef lrg and produce the same value within a block.
  void merge_multidefs();

private:

  static int _final_loads, _final_stores, _final_copies, _final_memoves;
  static double _final_load_cost, _final_store_cost, _final_copy_cost, _final_memove_cost;
  static int _conserv_coalesce, _conserv_coalesce_pair;
  static int _conserv_coalesce_trie, _conserv_coalesce_quad;
  static int _post_alloc;
  static int _lost_opp_pp_coalesce, _lost_opp_cflow_coalesce;
  static int _used_cisc_instructions, _unused_cisc_instructions;
  static int _allocator_attempts, _allocator_successes;

#ifndef PRODUCT
  static uint _high_pressure, _low_pressure;

  void dump() const;
  void dump( const Node *n ) const;
  void dump( const Block * b ) const;
  void dump_degree_lists() const;
  void dump_simplified() const;
  void dump_lrg( uint lidx, bool defs_only) const;
  void dump_lrg( uint lidx) const {
    // dump defs and uses by default
    dump_lrg(lidx, false);
  }
  void dump_bb( uint pre_order ) const;

  // Verify that base pointers and derived pointers are still sane
  void verify_base_ptrs( ResourceArea *a ) const;

  void verify( ResourceArea *a, bool verify_ifg = false ) const;

  void dump_for_spill_split_recycle() const;

public:
  void dump_frame() const;
  char *dump_register( const Node *n, char *buf  ) const;
private:
  static void print_chaitin_statistics();
#endif
  friend class PhaseCoalesce;
  friend class PhaseAggressiveCoalesce;
  friend class PhaseConservativeCoalesce;
};

#endif // SHARE_VM_OPTO_CHAITIN_HPP
C:\hotspot-69087d08d473\src\share\vm/opto/classes.cpp
/*
 * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 *
 */

#include "precompiled.hpp"
#include "opto/addnode.hpp"
#include "opto/callnode.hpp"
#include "opto/cfgnode.hpp"
#include "opto/connode.hpp"
#include "opto/divnode.hpp"
#include "opto/locknode.hpp"
#include "opto/loopnode.hpp"
#include "opto/machnode.hpp"
#include "opto/memnode.hpp"
#include "opto/mathexactnode.hpp"
#include "opto/mulnode.hpp"
#include "opto/multnode.hpp"
#include "opto/node.hpp"
#include "opto/rootnode.hpp"
#include "opto/subnode.hpp"
#include "opto/vectornode.hpp"

// ----------------------------------------------------------------------------
// Build a table of virtual functions to map from Nodes to dense integer
// opcode names.
int Node::Opcode() const { return Op_Node; }
#define macro(x) int x##Node::Opcode() const { return Op_##x; }
#include "classes.hpp"
#undef macro
C:\hotspot-69087d08d473\src\share\vm/opto/classes.hpp
/*
 * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 *
 */

// The giant table of Node classes.
// One entry per class, sorted by class name.

macro(AbsD)
macro(AbsF)
macro(AbsI)
macro(AddD)
macro(AddF)
macro(AddI)
macro(AddL)
macro(AddP)
macro(Allocate)
macro(AllocateArray)
macro(AndI)
macro(AndL)
macro(AryEq)
macro(AtanD)
macro(Binary)
macro(Bool)
macro(BoxLock)
macro(ReverseBytesI)
macro(ReverseBytesL)
macro(ReverseBytesUS)
macro(ReverseBytesS)
macro(CProj)
macro(CallDynamicJava)
macro(CallJava)
macro(CallLeaf)
macro(CallLeafNoFP)
macro(CallRuntime)
macro(CallStaticJava)
macro(CastII)
macro(CastX2P)
macro(CastP2X)
macro(CastPP)
macro(Catch)
macro(CatchProj)
macro(CheckCastPP)
macro(ClearArray)
macro(ConstraintCast)
macro(CMoveD)
macro(CMoveF)
macro(CMoveI)
macro(CMoveL)
macro(CMoveP)
macro(CMoveN)
macro(CmpN)
macro(CmpD)
macro(CmpD3)
macro(CmpF)
macro(CmpF3)
macro(CmpI)
macro(CmpL)
macro(CmpL3)
macro(CmpLTMask)
macro(CmpP)
macro(CmpU)
macro(CmpUL)
macro(CompareAndSwapI)
macro(CompareAndSwapL)
macro(CompareAndSwapP)
macro(CompareAndSwapN)
macro(GetAndAddI)
macro(GetAndAddL)
macro(GetAndSetI)
macro(GetAndSetL)
macro(GetAndSetP)
macro(GetAndSetN)
macro(Con)
macro(ConN)
macro(ConNKlass)
macro(ConD)
macro(ConF)
macro(ConI)
macro(ConL)
macro(ConP)
macro(Conv2B)
macro(ConvD2F)
macro(ConvD2I)
macro(ConvD2L)
macro(ConvF2D)
macro(ConvF2I)
macro(ConvF2L)
macro(ConvI2D)
macro(ConvI2F)
macro(ConvI2L)
macro(ConvL2D)
macro(ConvL2F)
macro(ConvL2I)
macro(CosD)
macro(CountedLoop)
macro(CountedLoopEnd)
macro(CountLeadingZerosI)
macro(CountLeadingZerosL)
macro(CountTrailingZerosI)
macro(CountTrailingZerosL)
macro(CreateEx)
macro(DecodeN)
macro(DecodeNKlass)
macro(DivD)
macro(DivF)
macro(DivI)
macro(DivL)
macro(DivMod)
macro(DivModI)
macro(DivModL)
macro(EncodeISOArray)
macro(EncodeP)
macro(EncodePKlass)
macro(ExpD)
macro(FastLock)
macro(FastUnlock)
macro(Goto)
macro(Halt)
macro(If)
macro(IfFalse)
macro(IfTrue)
macro(Initialize)
macro(JProj)
macro(Jump)
macro(JumpProj)
macro(LShiftI)
macro(LShiftL)
macro(LoadB)
macro(LoadUB)
macro(LoadUS)
macro(LoadD)
macro(LoadD_unaligned)
macro(LoadF)
macro(LoadI)
macro(LoadKlass)
macro(LoadNKlass)
macro(LoadL)
macro(LoadL_unaligned)
macro(LoadPLocked)
macro(LoadP)
macro(LoadN)
macro(LoadRange)
macro(LoadS)
macro(Lock)
macro(LogD)
macro(Log10D)
macro(Loop)
macro(LoopLimit)
macro(Mach)
macro(MachProj)
macro(MaxI)
macro(MemBarAcquire)
macro(LoadFence)
macro(MemBarAcquireLock)
macro(MemBarCPUOrder)
macro(MemBarRelease)
macro(StoreFence)
macro(MemBarReleaseLock)
macro(MemBarVolatile)
macro(MemBarStoreStore)
macro(MergeMem)
macro(MinI)
macro(ModD)
macro(ModF)
macro(ModI)
macro(ModL)
macro(MoveI2F)
macro(MoveF2I)
macro(MoveL2D)
macro(MoveD2L)
macro(MulD)
macro(MulF)
macro(MulHiL)
macro(MulI)
macro(MulL)
macro(Multi)
macro(NegD)
macro(NegF)
macro(NeverBranch)
macro(Opaque1)
macro(Opaque2)
macro(Opaque3)
macro(ProfileBoolean)
macro(OrI)
macro(OrL)
macro(OverflowAddI)
macro(OverflowSubI)
macro(OverflowMulI)
macro(OverflowAddL)
macro(OverflowSubL)
macro(OverflowMulL)
macro(PCTable)
macro(Parm)
macro(PartialSubtypeCheck)
macro(Phi)
macro(PopCountI)
macro(PopCountL)
macro(PowD)
macro(PrefetchAllocation)
macro(PrefetchRead)
macro(PrefetchWrite)
macro(Proj)
macro(RShiftI)
macro(RShiftL)
macro(Region)
macro(Rethrow)
macro(Return)
macro(Root)
macro(RoundDouble)
macro(RoundFloat)
macro(SafePoint)
macro(SafePointScalarObject)
macro(SCMemProj)
macro(SinD)
macro(SqrtD)
macro(Start)
macro(StartOSR)
macro(StoreB)
macro(StoreC)
macro(StoreCM)
macro(StorePConditional)
macro(StoreIConditional)
macro(StoreLConditional)
macro(StoreD)
macro(StoreF)
macro(StoreI)
macro(StoreL)
macro(StoreP)
macro(StoreN)
macro(StoreNKlass)
macro(StrComp)
macro(StrEquals)
macro(StrIndexOf)
macro(SubD)
macro(SubF)
macro(SubI)
macro(SubL)
macro(TailCall)
macro(TailJump)
macro(TanD)
macro(ThreadLocal)
macro(Unlock)
macro(URShiftI)
macro(URShiftL)
macro(XorI)
macro(XorL)
macro(Vector)
macro(AddVB)
macro(AddVS)
macro(AddVI)
macro(AddVL)
macro(AddVF)
macro(AddVD)
macro(SubVB)
macro(SubVS)
macro(SubVI)
macro(SubVL)
macro(SubVF)
macro(SubVD)
macro(MulVS)
macro(MulVI)
macro(MulVF)
macro(MulVD)
macro(DivVF)
macro(DivVD)
macro(LShiftCntV)
macro(RShiftCntV)
macro(LShiftVB)
macro(LShiftVS)
macro(LShiftVI)
macro(LShiftVL)
macro(RShiftVB)
macro(RShiftVS)
macro(RShiftVI)
macro(RShiftVL)
macro(URShiftVB)
macro(URShiftVS)
macro(URShiftVI)
macro(URShiftVL)
macro(AndV)
macro(OrV)
macro(XorV)
macro(LoadVector)
macro(StoreVector)
macro(Pack)
macro(PackB)
macro(PackS)
macro(PackI)
macro(PackL)
macro(PackF)
macro(PackD)
macro(Pack2L)
macro(Pack2D)
macro(ReplicateB)
macro(ReplicateS)
macro(ReplicateI)
macro(ReplicateL)
macro(ReplicateF)
macro(ReplicateD)
macro(Extract)
macro(ExtractB)
macro(ExtractUB)
macro(ExtractC)
macro(ExtractS)
macro(ExtractI)
macro(ExtractL)
macro(ExtractF)
macro(ExtractD)
C:\hotspot-69087d08d473\src\share\vm/opto/coalesce.cpp
/*
 * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 *
 */

#include "precompiled.hpp"
#include "memory/allocation.inline.hpp"
#include "opto/block.hpp"
#include "opto/c2compiler.hpp"
#include "opto/cfgnode.hpp"
#include "opto/chaitin.hpp"
#include "opto/coalesce.hpp"
#include "opto/connode.hpp"
#include "opto/indexSet.hpp"
#include "opto/machnode.hpp"
#include "opto/matcher.hpp"
#include "opto/regmask.hpp"

#ifndef PRODUCT
void PhaseCoalesce::dump(Node *n) const {
  // Being a const function means I cannot use 'Find'
  uint r = _phc._lrg_map.find(n);
  tty->print("L%d/N%d ",r,n->_idx);
}

void PhaseCoalesce::dump() const {
  // I know I have a block layout now, so I can print blocks in a loop
  for( uint i=0; i<_phc._cfg.number_of_blocks(); i++ ) {
    uint j;
    Block* b = _phc._cfg.get_block(i);
    // Print a nice block header
    tty->print("B%d: ",b->_pre_order);
    for( j=1; j<b->num_preds(); j++ )
      tty->print("B%d ", _phc._cfg.get_block_for_node(b->pred(j))->_pre_order);
    tty->print("-> ");
    for( j=0; j<b->_num_succs; j++ )
      tty->print("B%d ",b->_succs[j]->_pre_order);
    tty->print(" IDom: B%d/#%d\n", b->_idom ? b->_idom->_pre_order : 0, b->_dom_depth);
    uint cnt = b->number_of_nodes();
    for( j=0; j<cnt; j++ ) {
      Node *n = b->get_node(j);
      dump( n );
      tty->print("\t%s\t",n->Name());

      // Dump the inputs
      uint k;                   // Exit value of loop
      for( k=0; k<n->req(); k++ ) // For all required inputs
        if( n->in(k) ) dump( n->in(k) );
        else tty->print("_ ");
      int any_prec = 0;
      for( ; k<n->len(); k++ )          // For all precedence inputs
        if( n->in(k) ) {
          if( !any_prec++ ) tty->print(" |");
          dump( n->in(k) );
        }

      // Dump node-specific info
      n->dump_spec(tty);
      tty->print("\n");

    }
    tty->print("\n");
  }
}
#endif

// Combine the live ranges def'd by these 2 Nodes.  N2 is an input to N1.
void PhaseCoalesce::combine_these_two(Node *n1, Node *n2) {
  uint lr1 = _phc._lrg_map.find(n1);
  uint lr2 = _phc._lrg_map.find(n2);
  if( lr1 != lr2 &&             // Different live ranges already AND
      !_phc._ifg->test_edge_sq( lr1, lr2 ) ) {  // Do not interfere
    LRG *lrg1 = &_phc.lrgs(lr1);
    LRG *lrg2 = &_phc.lrgs(lr2);
    // Not an oop->int cast; oop->oop, int->int, AND int->oop are OK.

    // Now, why is int->oop OK?  We end up declaring a raw-pointer as an oop
    // and in general that's a bad thing.  However, int->oop conversions only
    // happen at GC points, so the lifetime of the misclassified raw-pointer
    // is from the CheckCastPP (that converts it to an oop) backwards up
    // through a merge point and into the slow-path call, and around the
    // diamond up to the heap-top check and back down into the slow-path call.
    // The misclassified raw pointer is NOT live across the slow-path call,
    // and so does not appear in any GC info, so the fact that it is
    // misclassified is OK.

    if( (lrg1->_is_oop || !lrg2->_is_oop) && // not an oop->int cast AND
        // Compatible final mask
        lrg1->mask().overlap( lrg2->mask() ) ) {
      // Merge larger into smaller.
      if( lr1 > lr2 ) {
        uint  tmp =  lr1;  lr1 =  lr2;  lr2 =  tmp;
        Node   *n =   n1;   n1 =   n2;   n2 =    n;
        LRG *ltmp = lrg1; lrg1 = lrg2; lrg2 = ltmp;
      }
      // Union lr2 into lr1
      _phc.Union( n1, n2 );
      if (lrg1->_maxfreq < lrg2->_maxfreq)
        lrg1->_maxfreq = lrg2->_maxfreq;
      // Merge in the IFG
      _phc._ifg->Union( lr1, lr2 );
      // Combine register restrictions
      lrg1->AND(lrg2->mask());
    }
  }
}

// Copy coalescing
void PhaseCoalesce::coalesce_driver() {
  verify();
  // Coalesce from high frequency to low
  for (uint i = 0; i < _phc._cfg.number_of_blocks(); i++) {
    coalesce(_phc._blks[i]);
  }
}

// I am inserting copies to come out of SSA form.  In the general case, I am
// doing a parallel renaming.  I'm in the Named world now, so I can't do a
// general parallel renaming.  All the copies now use  "names" (live-ranges)
// to carry values instead of the explicit use-def chains.  Suppose I need to
// insert 2 copies into the same block.  They copy L161->L128 and L128->L132.
// If I insert them in the wrong order then L128 will get clobbered before it
// can get used by the second copy.  This cannot happen in the SSA model;
// direct use-def chains get me the right value.  It DOES happen in the named
// model so I have to handle the reordering of copies.
//
// In general, I need to topo-sort the placed copies to avoid conflicts.
// Its possible to have a closed cycle of copies (e.g., recirculating the same
// values around a loop).  In this case I need a temp to break the cycle.
void PhaseAggressiveCoalesce::insert_copy_with_overlap( Block *b, Node *copy, uint dst_name, uint src_name ) {

  // Scan backwards for the locations of the last use of the dst_name.
  // I am about to clobber the dst_name, so the copy must be inserted
  // after the last use.  Last use is really first-use on a backwards scan.
  uint i = b->end_idx()-1;
  while(1) {
    Node *n = b->get_node(i);
    // Check for end of virtual copies; this is also the end of the
    // parallel renaming effort.
    if (n->_idx < _unique) {
      break;
    }
    uint idx = n->is_Copy();
    assert( idx || n->is_Con() || n->is_MachProj(), "Only copies during parallel renaming" );
    if (idx && _phc._lrg_map.find(n->in(idx)) == dst_name) {
      break;
    }
    i--;
  }
  uint last_use_idx = i;

  // Also search for any kill of src_name that exits the block.
  // Since the copy uses src_name, I have to come before any kill.
  uint kill_src_idx = b->end_idx();
  // There can be only 1 kill that exits any block and that is
  // the last kill.  Thus it is the first kill on a backwards scan.
  i = b->end_idx()-1;
  while (1) {
    Node *n = b->get_node(i);
    // Check for end of virtual copies; this is also the end of the
    // parallel renaming effort.
    if (n->_idx < _unique) {
      break;
    }
    assert( n->is_Copy() || n->is_Con() || n->is_MachProj(), "Only copies during parallel renaming" );
    if (_phc._lrg_map.find(n) == src_name) {
      kill_src_idx = i;
      break;
    }
    i--;
  }
  // Need a temp?  Last use of dst comes after the kill of src?
  if (last_use_idx >= kill_src_idx) {
    // Need to break a cycle with a temp
    uint idx = copy->is_Copy();
    Node *tmp = copy->clone();
    uint max_lrg_id = _phc._lrg_map.max_lrg_id();
    _phc.new_lrg(tmp, max_lrg_id);
    _phc._lrg_map.set_max_lrg_id(max_lrg_id + 1);

    // Insert new temp between copy and source
    tmp ->set_req(idx,copy->in(idx));
    copy->set_req(idx,tmp);
    // Save source in temp early, before source is killed
    b->insert_node(tmp, kill_src_idx);
    _phc._cfg.map_node_to_block(tmp, b);
    last_use_idx++;
  }

  // Insert just after last use
  b->insert_node(copy, last_use_idx + 1);
}

void PhaseAggressiveCoalesce::insert_copies( Matcher &matcher ) {
  // We do LRGs compressing and fix a liveout data only here since the other
  // place in Split() is guarded by the assert which we never hit.
  _phc._lrg_map.compress_uf_map_for_nodes();
  // Fix block's liveout data for compressed live ranges.
  for (uint lrg = 1; lrg < _phc._lrg_map.max_lrg_id(); lrg++) {
    uint compressed_lrg = _phc._lrg_map.find(lrg);
    if (lrg != compressed_lrg) {
      for (uint bidx = 0; bidx < _phc._cfg.number_of_blocks(); bidx++) {
        IndexSet *liveout = _phc._live->live(_phc._cfg.get_block(bidx));
        if (liveout->member(lrg)) {
          liveout->remove(lrg);
          liveout->insert(compressed_lrg);
        }
      }
    }
  }

  // All new nodes added are actual copies to replace virtual copies.
  // Nodes with index less than '_unique' are original, non-virtual Nodes.
  _unique = C->unique();

  for (uint i = 0; i < _phc._cfg.number_of_blocks(); i++) {
    C->check_node_count(NodeLimitFudgeFactor, "out of nodes in coalesce");
    if (C->failing()) return;
    Block *b = _phc._cfg.get_block(i);
    uint cnt = b->num_preds();  // Number of inputs to the Phi

    for( uint l = 1; l<b->number_of_nodes(); l++ ) {
      Node *n = b->get_node(l);

      // Do not use removed-copies, use copied value instead
      uint ncnt = n->req();
      for( uint k = 1; k<ncnt; k++ ) {
        Node *copy = n->in(k);
        uint cidx = copy->is_Copy();
        if( cidx ) {
          Node *def = copy->in(cidx);
          if (_phc._lrg_map.find(copy) == _phc._lrg_map.find(def)) {
            n->set_req(k, def);
          }
        }
      }

      // Remove any explicit copies that get coalesced.
      uint cidx = n->is_Copy();
      if( cidx ) {
        Node *def = n->in(cidx);
        if (_phc._lrg_map.find(n) == _phc._lrg_map.find(def)) {
          n->replace_by(def);
          n->set_req(cidx,NULL);
          b->remove_node(l);
          l--;
          continue;
        }
      }

      if (n->is_Phi()) {
        // Get the chosen name for the Phi
        uint phi_name = _phc._lrg_map.find(n);
        // Ignore the pre-allocated specials
        if (!phi_name) {
          continue;
        }
        // Check for mismatch inputs to Phi
        for (uint j = 1; j < cnt; j++) {
          Node *m = n->in(j);
          uint src_name = _phc._lrg_map.find(m);
          if (src_name != phi_name) {
            Block *pred = _phc._cfg.get_block_for_node(b->pred(j));
            Node *copy;
            assert(!m->is_Con() || m->is_Mach(), "all Con must be Mach");
            // Rematerialize constants instead of copying them.
            // We do this only for immediate constants, we avoid constant table loads
            // because that will unsafely extend the live range of the constant table base.
            if (m->is_Mach() && m->as_Mach()->is_Con() && !m->as_Mach()->is_MachConstant() &&
                m->as_Mach()->rematerialize()) {
              copy = m->clone();
              // Insert the copy in the predecessor basic block
              pred->add_inst(copy);
              // Copy any flags as well
              _phc.clone_projs(pred, pred->end_idx(), m, copy, _phc._lrg_map);
            } else {
              int ireg = m->ideal_reg();
              if (ireg == 0 || ireg == Op_RegFlags) {
                if (C->subsume_loads()) {
                  C->record_failure(C2Compiler::retry_no_subsuming_loads());
                } else {
                  assert(false, err_msg("attempted to spill a non-spillable item: %d: %s, ireg = %d",
                                        m->_idx, m->Name(), ireg));
                  C->record_method_not_compilable("attempted to spill a non-spillable item");
                }
                return;
              }
              const RegMask *rm = C->matcher()->idealreg2spillmask[ireg];
              copy = new (C) MachSpillCopyNode(m, *rm, *rm);
              // Find a good place to insert.  Kinda tricky, use a subroutine
              insert_copy_with_overlap(pred,copy,phi_name,src_name);
            }
            // Insert the copy in the use-def chain
            n->set_req(j, copy);
            _phc._cfg.map_node_to_block(copy, pred);
            // Extend ("register allocate") the names array for the copy.
            _phc._lrg_map.extend(copy->_idx, phi_name);
          } // End of if Phi names do not match
        } // End of for all inputs to Phi
      } else { // End of if Phi

        // Now check for 2-address instructions
        uint idx;
        if( n->is_Mach() && (idx=n->as_Mach()->two_adr()) ) {
          // Get the chosen name for the Node
          uint name = _phc._lrg_map.find(n);
          assert (name, "no 2-address specials");
          // Check for name mis-match on the 2-address input
          Node *m = n->in(idx);
          if (_phc._lrg_map.find(m) != name) {
            Node *copy;
            assert(!m->is_Con() || m->is_Mach(), "all Con must be Mach");
            // At this point it is unsafe to extend live ranges (6550579).
            // Rematerialize only constants as we do for Phi above.
            if (m->is_Mach() && m->as_Mach()->is_Con() && !m->as_Mach()->is_MachConstant() &&
                m->as_Mach()->rematerialize()) {
              copy = m->clone();
              // Insert the copy in the basic block, just before us
              b->insert_node(copy, l++);
              l += _phc.clone_projs(b, l, m, copy, _phc._lrg_map);
            } else {
              int ireg = m->ideal_reg();
              if (ireg == 0 || ireg == Op_RegFlags) {
                assert(false, err_msg("attempted to spill a non-spillable item: %d: %s, ireg = %d",
                                      m->_idx, m->Name(), ireg));
                C->record_method_not_compilable("attempted to spill a non-spillable item");
                return;
              }
              const RegMask *rm = C->matcher()->idealreg2spillmask[ireg];
              copy = new (C) MachSpillCopyNode(m, *rm, *rm);
              // Insert the copy in the basic block, just before us
              b->insert_node(copy, l++);
            }
            // Insert the copy in the use-def chain
            n->set_req(idx, copy);
            // Extend ("register allocate") the names array for the copy.
            _phc._lrg_map.extend(copy->_idx, name);
            _phc._cfg.map_node_to_block(copy, b);
          }

        } // End of is two-adr

        // Insert a copy at a debug use for a lrg which has high frequency
        if (b->_freq < OPTO_DEBUG_SPLIT_FREQ || _phc._cfg.is_uncommon(b)) {
          // Walk the debug inputs to the node and check for lrg freq
          JVMState* jvms = n->jvms();
          uint debug_start = jvms ? jvms->debug_start() : 999999;
          uint debug_end   = jvms ? jvms->debug_end()   : 999999;
          for(uint inpidx = debug_start; inpidx < debug_end; inpidx++) {
            // Do not split monitors; they are only needed for debug table
            // entries and need no code.
            if (jvms->is_monitor_use(inpidx)) {
              continue;
            }
            Node *inp = n->in(inpidx);
            uint nidx = _phc._lrg_map.live_range_id(inp);
            LRG &lrg = lrgs(nidx);

            // If this lrg has a high frequency use/def
            if( lrg._maxfreq >= _phc.high_frequency_lrg() ) {
              // If the live range is also live out of this block (like it
              // would be for a fast/slow idiom), the normal spill mechanism
              // does an excellent job.  If it is not live out of this block
              // (like it would be for debug info to uncommon trap) splitting
              // the live range now allows a better allocation in the high
              // frequency blocks.
              //   Build_IFG_virtual has converted the live sets to
              // live-IN info, not live-OUT info.
              uint k;
              for( k=0; k < b->_num_succs; k++ )
                if( _phc._live->live(b->_succs[k])->member( nidx ) )
                  break;      // Live in to some successor block?
              if( k < b->_num_succs )
                continue;     // Live out; do not pre-split
              // Split the lrg at this use
              int ireg = inp->ideal_reg();
              if (ireg == 0 || ireg == Op_RegFlags) {
                assert(false, err_msg("attempted to spill a non-spillable item: %d: %s, ireg = %d",
                                      inp->_idx, inp->Name(), ireg));
                C->record_method_not_compilable("attempted to spill a non-spillable item");
                return;
              }
              const RegMask *rm = C->matcher()->idealreg2spillmask[ireg];
              Node *copy = new (C) MachSpillCopyNode( inp, *rm, *rm );
              // Insert the copy in the use-def chain
              n->set_req(inpidx, copy );
              // Insert the copy in the basic block, just before us
              b->insert_node(copy,  l++);
              // Extend ("register allocate") the names array for the copy.
              uint max_lrg_id = _phc._lrg_map.max_lrg_id();
              _phc.new_lrg(copy, max_lrg_id);
              _phc._lrg_map.set_max_lrg_id(max_lrg_id + 1);
              _phc._cfg.map_node_to_block(copy, b);
              //tty->print_cr("Split a debug use in Aggressive Coalesce");
            }  // End of if high frequency use/def
          }  // End of for all debug inputs
        }  // End of if low frequency safepoint

      } // End of if Phi

    } // End of for all instructions
  } // End of for all blocks
}


// Aggressive (but pessimistic) copy coalescing of a single block

// The following coalesce pass represents a single round of aggressive
// pessimistic coalesce.  "Aggressive" means no attempt to preserve
// colorability when coalescing.  This occasionally means more spills, but
// it also means fewer rounds of coalescing for better code - and that means
// faster compiles.

// "Pessimistic" means we do not hit the fixed point in one pass (and we are
// reaching for the least fixed point to boot).  This is typically solved
// with a few more rounds of coalescing, but the compiler must run fast.  We
// could optimistically coalescing everything touching PhiNodes together
// into one big live range, then check for self-interference.  Everywhere
// the live range interferes with self it would have to be split.  Finding
// the right split points can be done with some heuristics (based on
// expected frequency of edges in the live range).  In short, it's a real
// research problem and the timeline is too short to allow such research.
// Further thoughts: (1) build the LR in a pass, (2) find self-interference
// in another pass, (3) per each self-conflict, split, (4) split by finding
// the low-cost cut (min-cut) of the LR, (5) edges in the LR are weighted
// according to the GCM algorithm (or just exec freq on CFG edges).

void PhaseAggressiveCoalesce::coalesce( Block *b ) {
  // Copies are still "virtual" - meaning we have not made them explicitly
  // copies.  Instead, Phi functions of successor blocks have mis-matched
  // live-ranges.  If I fail to coalesce, I'll have to insert a copy to line
  // up the live-ranges.  Check for Phis in successor blocks.
  uint i;
  for( i=0; i<b->_num_succs; i++ ) {
    Block *bs = b->_succs[i];
    // Find index of 'b' in 'bs' predecessors
    uint j=1;
    while (_phc._cfg.get_block_for_node(bs->pred(j)) != b) {
      j++;
    }

    // Visit all the Phis in successor block
    for( uint k = 1; k<bs->number_of_nodes(); k++ ) {
      Node *n = bs->get_node(k);
      if( !n->is_Phi() ) break;
      combine_these_two( n, n->in(j) );
    }
  } // End of for all successor blocks


  // Check _this_ block for 2-address instructions and copies.
  uint cnt = b->end_idx();
  for( i = 1; i<cnt; i++ ) {
    Node *n = b->get_node(i);
    uint idx;
    // 2-address instructions have a virtual Copy matching their input
    // to their output
    if (n->is_Mach() && (idx = n->as_Mach()->two_adr())) {
      MachNode *mach = n->as_Mach();
      combine_these_two(mach, mach->in(idx));
    }
  } // End of for all instructions in block
}

PhaseConservativeCoalesce::PhaseConservativeCoalesce(PhaseChaitin &chaitin) : PhaseCoalesce(chaitin) {
  _ulr.initialize(_phc._lrg_map.max_lrg_id());
}

void PhaseConservativeCoalesce::verify() {
#ifdef ASSERT
  _phc.set_was_low();
#endif
}

void PhaseConservativeCoalesce::union_helper( Node *lr1_node, Node *lr2_node, uint lr1, uint lr2, Node *src_def, Node *dst_copy, Node *src_copy, Block *b, uint bindex ) {
  // Join live ranges.  Merge larger into smaller.  Union lr2 into lr1 in the
  // union-find tree
  _phc.Union( lr1_node, lr2_node );

  // Single-def live range ONLY if both live ranges are single-def.
  // If both are single def, then src_def powers one live range
  // and def_copy powers the other.  After merging, src_def powers
  // the combined live range.
  lrgs(lr1)._def = (lrgs(lr1).is_multidef() ||
                        lrgs(lr2).is_multidef() )
    ? NodeSentinel : src_def;
  lrgs(lr2)._def = NULL;    // No def for lrg 2
  lrgs(lr2).Clear();        // Force empty mask for LRG 2
  //lrgs(lr2)._size = 0;      // Live-range 2 goes dead
  lrgs(lr1)._is_oop |= lrgs(lr2)._is_oop;
  lrgs(lr2)._is_oop = 0;    // In particular, not an oop for GC info

  if (lrgs(lr1)._maxfreq < lrgs(lr2)._maxfreq)
    lrgs(lr1)._maxfreq = lrgs(lr2)._maxfreq;

  // Copy original value instead.  Intermediate copies go dead, and
  // the dst_copy becomes useless.
  int didx = dst_copy->is_Copy();
  dst_copy->set_req( didx, src_def );
  // Add copy to free list
  // _phc.free_spillcopy(b->_nodes[bindex]);
  assert( b->get_node(bindex) == dst_copy, "" );
  dst_copy->replace_by( dst_copy->in(didx) );
  dst_copy->set_req( didx, NULL);
  b->remove_node(bindex);
  if( bindex < b->_ihrp_index ) b->_ihrp_index--;
  if( bindex < b->_fhrp_index ) b->_fhrp_index--;

  // Stretched lr1; add it to liveness of intermediate blocks
  Block *b2 = _phc._cfg.get_block_for_node(src_copy);
  while( b != b2 ) {
    b = _phc._cfg.get_block_for_node(b->pred(1));
    _phc._live->live(b)->insert(lr1);
  }
}

// Factored code from copy_copy that computes extra interferences from
// lengthening a live range by double-coalescing.
uint PhaseConservativeCoalesce::compute_separating_interferences(Node *dst_copy, Node *src_copy, Block *b, uint bindex, RegMask &rm, uint reg_degree, uint rm_size, uint lr1, uint lr2 ) {

  assert(!lrgs(lr1)._fat_proj, "cannot coalesce fat_proj");
  assert(!lrgs(lr2)._fat_proj, "cannot coalesce fat_proj");
  Node *prev_copy = dst_copy->in(dst_copy->is_Copy());
  Block *b2 = b;
  uint bindex2 = bindex;
  while( 1 ) {
    // Find previous instruction
    bindex2--;                  // Chain backwards 1 instruction
    while( bindex2 == 0 ) {     // At block start, find prior block
      assert( b2->num_preds() == 2, "cannot double coalesce across c-flow" );
      b2 = _phc._cfg.get_block_for_node(b2->pred(1));
      bindex2 = b2->end_idx()-1;
    }
    // Get prior instruction
    assert(bindex2 < b2->number_of_nodes(), "index out of bounds");
    Node *x = b2->get_node(bindex2);
    if( x == prev_copy ) {      // Previous copy in copy chain?
      if( prev_copy == src_copy)// Found end of chain and all interferences
        break;                  // So break out of loop
      // Else work back one in copy chain
      prev_copy = prev_copy->in(prev_copy->is_Copy());
    } else {                    // Else collect interferences
      uint lidx = _phc._lrg_map.find(x);
      // Found another def of live-range being stretched?
      if(lidx == lr1) {
        return max_juint;
      }
      if(lidx == lr2) {
        return max_juint;
      }

      // If we attempt to coalesce across a bound def
      if( lrgs(lidx).is_bound() ) {
        // Do not let the coalesced LRG expect to get the bound color
        rm.SUBTRACT( lrgs(lidx).mask() );
        // Recompute rm_size
        rm_size = rm.Size();
        //if( rm._flags ) rm_size += 1000000;
        if( reg_degree >= rm_size ) return max_juint;
      }
      if( rm.overlap(lrgs(lidx).mask()) ) {
        // Insert lidx into union LRG; returns TRUE if actually inserted
        if( _ulr.insert(lidx) ) {
          // Infinite-stack neighbors do not alter colorability, as they
          // can always color to some other color.
          if( !lrgs(lidx).mask().is_AllStack() ) {
            // If this coalesce will make any new neighbor uncolorable,
            // do not coalesce.
            if( lrgs(lidx).just_lo_degree() )
              return max_juint;
            // Bump our degree
            if( ++reg_degree >= rm_size )
              return max_juint;
          } // End of if not infinite-stack neighbor
        } // End of if actually inserted
      } // End of if live range overlaps
    } // End of else collect interferences for 1 node
  } // End of while forever, scan back for interferences
  return reg_degree;
}

void PhaseConservativeCoalesce::update_ifg(uint lr1, uint lr2, IndexSet *n_lr1, IndexSet *n_lr2) {
  // Some original neighbors of lr1 might have gone away
  // because the constrained register mask prevented them.
  // Remove lr1 from such neighbors.
  IndexSetIterator one(n_lr1);
  uint neighbor;
  LRG &lrg1 = lrgs(lr1);
  while ((neighbor = one.next()) != 0)
    if( !_ulr.member(neighbor) )
      if( _phc._ifg->neighbors(neighbor)->remove(lr1) )
        lrgs(neighbor).inc_degree( -lrg1.compute_degree(lrgs(neighbor)) );


  // lr2 is now called (coalesced into) lr1.
  // Remove lr2 from the IFG.
  IndexSetIterator two(n_lr2);
  LRG &lrg2 = lrgs(lr2);
  while ((neighbor = two.next()) != 0)
    if( _phc._ifg->neighbors(neighbor)->remove(lr2) )
      lrgs(neighbor).inc_degree( -lrg2.compute_degree(lrgs(neighbor)) );

  // Some neighbors of intermediate copies now interfere with the
  // combined live range.
  IndexSetIterator three(&_ulr);
  while ((neighbor = three.next()) != 0)
    if( _phc._ifg->neighbors(neighbor)->insert(lr1) )
      lrgs(neighbor).inc_degree( lrg1.compute_degree(lrgs(neighbor)) );
}

static void record_bias( const PhaseIFG *ifg, int lr1, int lr2 ) {
  // Tag copy bias here
  if( !ifg->lrgs(lr1)._copy_bias )
    ifg->lrgs(lr1)._copy_bias = lr2;
  if( !ifg->lrgs(lr2)._copy_bias )
    ifg->lrgs(lr2)._copy_bias = lr1;
}

// See if I can coalesce a series of multiple copies together.  I need the
// final dest copy and the original src copy.  They can be the same Node.
// Compute the compatible register masks.
bool PhaseConservativeCoalesce::copy_copy(Node *dst_copy, Node *src_copy, Block *b, uint bindex) {

  if (!dst_copy->is_SpillCopy()) {
    return false;
  }
  if (!src_copy->is_SpillCopy()) {
    return false;
  }
  Node *src_def = src_copy->in(src_copy->is_Copy());
  uint lr1 = _phc._lrg_map.find(dst_copy);
  uint lr2 = _phc._lrg_map.find(src_def);

  // Same live ranges already?
  if (lr1 == lr2) {
    return false;
  }

  // Interfere?
  if (_phc._ifg->test_edge_sq(lr1, lr2)) {
    return false;
  }

  // Not an oop->int cast; oop->oop, int->int, AND int->oop are OK.
  if (!lrgs(lr1)._is_oop && lrgs(lr2)._is_oop) { // not an oop->int cast
    return false;
  }

  // Coalescing between an aligned live range and a mis-aligned live range?
  // No, no!  Alignment changes how we count degree.
  if (lrgs(lr1)._fat_proj != lrgs(lr2)._fat_proj) {
    return false;
  }

  // Sort; use smaller live-range number
  Node *lr1_node = dst_copy;
  Node *lr2_node = src_def;
  if (lr1 > lr2) {
    uint tmp = lr1; lr1 = lr2; lr2 = tmp;
    lr1_node = src_def;  lr2_node = dst_copy;
  }

  // Check for compatibility of the 2 live ranges by
  // intersecting their allowed register sets.
  RegMask rm = lrgs(lr1).mask();
  rm.AND(lrgs(lr2).mask());
  // Number of bits free
  uint rm_size = rm.Size();

  if (UseFPUForSpilling && rm.is_AllStack() ) {
    // Don't coalesce when frequency difference is large
    Block *dst_b = _phc._cfg.get_block_for_node(dst_copy);
    Block *src_def_b = _phc._cfg.get_block_for_node(src_def);
    if (src_def_b->_freq > 10*dst_b->_freq )
      return false;
  }

  // If we can use any stack slot, then effective size is infinite
  if( rm.is_AllStack() ) rm_size += 1000000;
  // Incompatible masks, no way to coalesce
  if( rm_size == 0 ) return false;

  // Another early bail-out test is when we are double-coalescing and the
  // 2 copies are separated by some control flow.
  if( dst_copy != src_copy ) {
    Block *src_b = _phc._cfg.get_block_for_node(src_copy);
    Block *b2 = b;
    while( b2 != src_b ) {
      if( b2->num_preds() > 2 ){// Found merge-point
        _phc._lost_opp_cflow_coalesce++;
        // extra record_bias commented out because Chris believes it is not
        // productive.  Since we can record only 1 bias, we want to choose one
        // that stands a chance of working and this one probably does not.
        //record_bias( _phc._lrgs, lr1, lr2 );
        return false;           // To hard to find all interferences
      }
      b2 = _phc._cfg.get_block_for_node(b2->pred(1));
    }
  }

  // Union the two interference sets together into '_ulr'
  uint reg_degree = _ulr.lrg_union( lr1, lr2, rm_size, _phc._ifg, rm );

  if( reg_degree >= rm_size ) {
    record_bias( _phc._ifg, lr1, lr2 );
    return false;
  }

  // Now I need to compute all the interferences between dst_copy and
  // src_copy.  I'm not willing visit the entire interference graph, so
  // I limit my search to things in dst_copy's block or in a straight
  // line of previous blocks.  I give up at merge points or when I get
  // more interferences than my degree.  I can stop when I find src_copy.
  if( dst_copy != src_copy ) {
    reg_degree = compute_separating_interferences(dst_copy, src_copy, b, bindex, rm, rm_size, reg_degree, lr1, lr2 );
    if( reg_degree == max_juint ) {
      record_bias( _phc._ifg, lr1, lr2 );
      return false;
    }
  } // End of if dst_copy & src_copy are different


  // ---- THE COMBINED LRG IS COLORABLE ----

  // YEAH - Now coalesce this copy away
  assert( lrgs(lr1).num_regs() == lrgs(lr2).num_regs(),   "" );

  IndexSet *n_lr1 = _phc._ifg->neighbors(lr1);
  IndexSet *n_lr2 = _phc._ifg->neighbors(lr2);

  // Update the interference graph
  update_ifg(lr1, lr2, n_lr1, n_lr2);

  _ulr.remove(lr1);

  // Uncomment the following code to trace Coalescing in great detail.
  //
  //if (false) {
  //  tty->cr();
  //  tty->print_cr("#######################################");
  //  tty->print_cr("union %d and %d", lr1, lr2);
  //  n_lr1->dump();
  //  n_lr2->dump();
  //  tty->print_cr("resulting set is");
  //  _ulr.dump();
  //}

  // Replace n_lr1 with the new combined live range.  _ulr will use
  // n_lr1's old memory on the next iteration.  n_lr2 is cleared to
  // send its internal memory to the free list.
  _ulr.swap(n_lr1);
  _ulr.clear();
  n_lr2->clear();

  lrgs(lr1).set_degree( _phc._ifg->effective_degree(lr1) );
  lrgs(lr2).set_degree( 0 );

  // Join live ranges.  Merge larger into smaller.  Union lr2 into lr1 in the
  // union-find tree
  union_helper( lr1_node, lr2_node, lr1, lr2, src_def, dst_copy, src_copy, b, bindex );
  // Combine register restrictions
  lrgs(lr1).set_mask(rm);
  lrgs(lr1).compute_set_mask_size();
  lrgs(lr1)._cost += lrgs(lr2)._cost;
  lrgs(lr1)._area += lrgs(lr2)._area;

  // While its uncommon to successfully coalesce live ranges that started out
  // being not-lo-degree, it can happen.  In any case the combined coalesced
  // live range better Simplify nicely.
  lrgs(lr1)._was_lo = 1;

  // kinda expensive to do all the time
  //tty->print_cr("warning: slow verify happening");
  //_phc._ifg->verify( &_phc );
  return true;
}

// Conservative (but pessimistic) copy coalescing of a single block
void PhaseConservativeCoalesce::coalesce( Block *b ) {
  // Bail out on infrequent blocks
  if (_phc._cfg.is_uncommon(b)) {
    return;
  }
  // Check this block for copies.
  for( uint i = 1; i<b->end_idx(); i++ ) {
    // Check for actual copies on inputs.  Coalesce a copy into its
    // input if use and copy's input are compatible.
    Node *copy1 = b->get_node(i);
    uint idx1 = copy1->is_Copy();
    if( !idx1 ) continue;       // Not a copy

    if( copy_copy(copy1,copy1,b,i) ) {
      i--;                      // Retry, same location in block
      PhaseChaitin::_conserv_coalesce++;  // Collect stats on success
      continue;
    }
  }
}
C:\hotspot-69087d08d473\src\share\vm/opto/coalesce.hpp
/*
 * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 *
 */

#ifndef SHARE_VM_OPTO_COALESCE_HPP
#define SHARE_VM_OPTO_COALESCE_HPP

#include "opto/phase.hpp"

class LoopTree;
class LRG;
class Matcher;
class PhaseIFG;
class PhaseCFG;

//------------------------------PhaseCoalesce----------------------------------
class PhaseCoalesce : public Phase {
protected:
  PhaseChaitin &_phc;

public:
  // Coalesce copies
  PhaseCoalesce(PhaseChaitin &phc)
  : Phase(Coalesce)
  , _phc(phc) {}

  virtual void verify() = 0;

  // Coalesce copies
  void coalesce_driver();

  // Coalesce copies in this block
  virtual void coalesce(Block *b) = 0;

  // Attempt to coalesce live ranges defined by these 2
  void combine_these_two(Node *n1, Node *n2);

  LRG &lrgs(uint lidx) { return _phc.lrgs(lidx); }
#ifndef PRODUCT
  // Dump internally name
  void dump(Node *n) const;
  // Dump whole shebang
  void dump() const;
#endif
};

//------------------------------PhaseAggressiveCoalesce------------------------
// Aggressively, pessimistic coalesce copies.  Aggressive means ignore graph
// colorability; perhaps coalescing to the point of forcing a spill.
// Pessimistic means we cannot coalesce if 2 live ranges interfere.  This
// implies we do not hit a fixed point right away.
class PhaseAggressiveCoalesce : public PhaseCoalesce {
  uint _unique;
public:
  // Coalesce copies
  PhaseAggressiveCoalesce( PhaseChaitin &chaitin ) : PhaseCoalesce(chaitin) {}

  virtual void verify() { };

  // Aggressively coalesce copies in this block
  virtual void coalesce( Block *b );

  // Where I fail to coalesce, manifest virtual copies as the Real Thing
  void insert_copies( Matcher &matcher );

  // Copy insertion needs some smarts in case live ranges overlap
  void insert_copy_with_overlap( Block *b, Node *copy, uint dst_name, uint src_name );
};


//------------------------------PhaseConservativeCoalesce----------------------
// Conservatively, pessimistic coalesce copies.  Conservative means do not
// coalesce if the resultant live range will be uncolorable.  Pessimistic
// means we cannot coalesce if 2 live ranges interfere.  This implies we do
// not hit a fixed point right away.
class PhaseConservativeCoalesce : public PhaseCoalesce {
  IndexSet _ulr;               // Union live range interferences
public:
  // Coalesce copies
  PhaseConservativeCoalesce( PhaseChaitin &chaitin );

  virtual void verify();

  // Conservatively coalesce copies in this block
  virtual void coalesce( Block *b );

  // Coalesce this chain of copies away
  bool copy_copy( Node *dst_copy, Node *src_copy, Block *b, uint bindex );

  void union_helper( Node *lr1_node, Node *lr2_node, uint lr1, uint lr2, Node *src_def, Node *dst_copy, Node *src_copy, Block *b, uint bindex );

  uint compute_separating_interferences(Node *dst_copy, Node *src_copy, Block *b, uint bindex, RegMask &rm, uint rm_size, uint reg_degree, uint lr1, uint lr2);

  void update_ifg(uint lr1, uint lr2, IndexSet *n_lr1, IndexSet *n_lr2);
};

#endif // SHARE_VM_OPTO_COALESCE_HPP
C:\hotspot-69087d08d473\src\share\vm/opto/compile.cpp
/*
 * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 *
 */

#include "precompiled.hpp"
#include "asm/macroAssembler.hpp"
#include "asm/macroAssembler.inline.hpp"
#include "ci/ciReplay.hpp"
#include "classfile/systemDictionary.hpp"
#include "code/exceptionHandlerTable.hpp"
#include "code/nmethod.hpp"
#include "compiler/compileLog.hpp"
#include "compiler/disassembler.hpp"
#include "compiler/oopMap.hpp"
#include "jfr/jfrEvents.hpp"
#include "opto/addnode.hpp"
#include "opto/block.hpp"
#include "opto/c2compiler.hpp"
#include "opto/callGenerator.hpp"
#include "opto/callnode.hpp"
#include "opto/cfgnode.hpp"
#include "opto/chaitin.hpp"
#include "opto/compile.hpp"
#include "opto/connode.hpp"
#include "opto/divnode.hpp"
#include "opto/escape.hpp"
#include "opto/idealGraphPrinter.hpp"
#include "opto/loopnode.hpp"
#include "opto/machnode.hpp"
#include "opto/macro.hpp"
#include "opto/matcher.hpp"
#include "opto/mathexactnode.hpp"
#include "opto/memnode.hpp"
#include "opto/mulnode.hpp"
#include "opto/node.hpp"
#include "opto/opcodes.hpp"
#include "opto/output.hpp"
#include "opto/parse.hpp"
#include "opto/phaseX.hpp"
#include "opto/rootnode.hpp"
#include "opto/runtime.hpp"
#include "opto/stringopts.hpp"
#include "opto/type.hpp"
#include "opto/vectornode.hpp"
#include "runtime/arguments.hpp"
#include "runtime/signature.hpp"
#include "runtime/stubRoutines.hpp"
#include "runtime/timer.hpp"
#include "utilities/copy.hpp"
#if defined AD_MD_HPP
# include AD_MD_HPP
#elif defined TARGET_ARCH_MODEL_x86_32
# include "adfiles/ad_x86_32.hpp"
#elif defined TARGET_ARCH_MODEL_x86_64
# include "adfiles/ad_x86_64.hpp"
#elif defined TARGET_ARCH_MODEL_aarch64
# include "adfiles/ad_aarch64.hpp"
#elif defined TARGET_ARCH_MODEL_sparc
# include "adfiles/ad_sparc.hpp"
#elif defined TARGET_ARCH_MODEL_zero
# include "adfiles/ad_zero.hpp"
#elif defined TARGET_ARCH_MODEL_ppc_64
# include "adfiles/ad_ppc_64.hpp"
#endif

// -------------------- Compile::mach_constant_base_node -----------------------
// Constant table base node singleton.
MachConstantBaseNode* Compile::mach_constant_base_node() {
  if (_mach_constant_base_node == NULL) {
    _mach_constant_base_node = new (C) MachConstantBaseNode();
    _mach_constant_base_node->add_req(C->root());
  }
  return _mach_constant_base_node;
}


/// Support for intrinsics.

// Return the index at which m must be inserted (or already exists).
// The sort order is by the address of the ciMethod, with is_virtual as minor key.
int Compile::intrinsic_insertion_index(ciMethod* m, bool is_virtual) {
#ifdef ASSERT
  for (int i = 1; i < _intrinsics->length(); i++) {
    CallGenerator* cg1 = _intrinsics->at(i-1);
    CallGenerator* cg2 = _intrinsics->at(i);
    assert(cg1->method() != cg2->method()
           ? cg1->method()     < cg2->method()
           : cg1->is_virtual() < cg2->is_virtual(),
           "compiler intrinsics list must stay sorted");
  }
#endif
  // Binary search sorted list, in decreasing intervals [lo, hi].
  int lo = 0, hi = _intrinsics->length()-1;
  while (lo <= hi) {
    int mid = (uint)(hi + lo) / 2;
    ciMethod* mid_m = _intrinsics->at(mid)->method();
    if (m < mid_m) {
      hi = mid-1;
    } else if (m > mid_m) {
      lo = mid+1;
    } else {
      // look at minor sort key
      bool mid_virt = _intrinsics->at(mid)->is_virtual();
      if (is_virtual < mid_virt) {
        hi = mid-1;
      } else if (is_virtual > mid_virt) {
        lo = mid+1;
      } else {
        return mid;  // exact match
      }
    }
  }
  return lo;  // inexact match
}

void Compile::register_intrinsic(CallGenerator* cg) {
  if (_intrinsics == NULL) {
    _intrinsics = new (comp_arena())GrowableArray<CallGenerator*>(comp_arena(), 60, 0, NULL);
  }
  // This code is stolen from ciObjectFactory::insert.
  // Really, GrowableArray should have methods for
  // insert_at, remove_at, and binary_search.
  int len = _intrinsics->length();
  int index = intrinsic_insertion_index(cg->method(), cg->is_virtual());
  if (index == len) {
    _intrinsics->append(cg);
  } else {
#ifdef ASSERT
    CallGenerator* oldcg = _intrinsics->at(index);
    assert(oldcg->method() != cg->method() || oldcg->is_virtual() != cg->is_virtual(), "don't register twice");
#endif
    _intrinsics->append(_intrinsics->at(len-1));
    int pos;
    for (pos = len-2; pos >= index; pos--) {
      _intrinsics->at_put(pos+1,_intrinsics->at(pos));
    }
    _intrinsics->at_put(index, cg);
  }
  assert(find_intrinsic(cg->method(), cg->is_virtual()) == cg, "registration worked");
}

CallGenerator* Compile::find_intrinsic(ciMethod* m, bool is_virtual) {
  assert(m->is_loaded(), "don't try this on unloaded methods");
  if (_intrinsics != NULL) {
    int index = intrinsic_insertion_index(m, is_virtual);
    if (index < _intrinsics->length()
        && _intrinsics->at(index)->method() == m
        && _intrinsics->at(index)->is_virtual() == is_virtual) {
      return _intrinsics->at(index);
    }
  }
  // Lazily create intrinsics for intrinsic IDs well-known in the runtime.
  if (m->intrinsic_id() != vmIntrinsics::_none &&
      m->intrinsic_id() <= vmIntrinsics::LAST_COMPILER_INLINE) {
    CallGenerator* cg = make_vm_intrinsic(m, is_virtual);
    if (cg != NULL) {
      // Save it for next time:
      register_intrinsic(cg);
      return cg;
    } else {
      gather_intrinsic_statistics(m->intrinsic_id(), is_virtual, _intrinsic_disabled);
    }
  }
  return NULL;
}

// Compile:: register_library_intrinsics and make_vm_intrinsic are defined
// in library_call.cpp.


#ifndef PRODUCT
// statistics gathering...

juint  Compile::_intrinsic_hist_count[vmIntrinsics::ID_LIMIT] = {0};
jubyte Compile::_intrinsic_hist_flags[vmIntrinsics::ID_LIMIT] = {0};

bool Compile::gather_intrinsic_statistics(vmIntrinsics::ID id, bool is_virtual, int flags) {
  assert(id > vmIntrinsics::_none && id < vmIntrinsics::ID_LIMIT, "oob");
  int oflags = _intrinsic_hist_flags[id];
  assert(flags != 0, "what happened?");
  if (is_virtual) {
    flags |= _intrinsic_virtual;
  }
  bool changed = (flags != oflags);
  if ((flags & _intrinsic_worked) != 0) {
    juint count = (_intrinsic_hist_count[id] += 1);
    if (count == 1) {
      changed = true;           // first time
    }
    // increment the overall count also:
    _intrinsic_hist_count[vmIntrinsics::_none] += 1;
  }
  if (changed) {
    if (((oflags ^ flags) & _intrinsic_virtual) != 0) {
      // Something changed about the intrinsic's virtuality.
      if ((flags & _intrinsic_virtual) != 0) {
        // This is the first use of this intrinsic as a virtual call.
        if (oflags != 0) {
          // We already saw it as a non-virtual, so note both cases.
          flags |= _intrinsic_both;
        }
      } else if ((oflags & _intrinsic_both) == 0) {
        // This is the first use of this intrinsic as a non-virtual
        flags |= _intrinsic_both;
      }
    }
    _intrinsic_hist_flags[id] = (jubyte) (oflags | flags);
  }
  // update the overall flags also:
  _intrinsic_hist_flags[vmIntrinsics::_none] |= (jubyte) flags;
  return changed;
}

static char* format_flags(int flags, char* buf) {
  buf[0] = 0;
  if ((flags & Compile::_intrinsic_worked) != 0)    strcat(buf, ",worked");
  if ((flags & Compile::_intrinsic_failed) != 0)    strcat(buf, ",failed");
  if ((flags & Compile::_intrinsic_disabled) != 0)  strcat(buf, ",disabled");
  if ((flags & Compile::_intrinsic_virtual) != 0)   strcat(buf, ",virtual");
  if ((flags & Compile::_intrinsic_both) != 0)      strcat(buf, ",nonvirtual");
  if (buf[0] == 0)  strcat(buf, ",");
  assert(buf[0] == ',', "must be");
  return &buf[1];
}

void Compile::print_intrinsic_statistics() {
  char flagsbuf[100];
  ttyLocker ttyl;
  if (xtty != NULL)  xtty->head("statistics type='intrinsic'");
  tty->print_cr("Compiler intrinsic usage:");
  juint total = _intrinsic_hist_count[vmIntrinsics::_none];
  if (total == 0)  total = 1;  // avoid div0 in case of no successes
  #define PRINT_STAT_LINE(name, c, f) \
    tty->print_cr("  %4d (%4.1f%%) %s (%s)", (int)(c), ((c) * 100.0) / total, name, f);
  for (int index = 1 + (int)vmIntrinsics::_none; index < (int)vmIntrinsics::ID_LIMIT; index++) {
    vmIntrinsics::ID id = (vmIntrinsics::ID) index;
    int   flags = _intrinsic_hist_flags[id];
    juint count = _intrinsic_hist_count[id];
    if ((flags | count) != 0) {
      PRINT_STAT_LINE(vmIntrinsics::name_at(id), count, format_flags(flags, flagsbuf));
    }
  }
  PRINT_STAT_LINE("total", total, format_flags(_intrinsic_hist_flags[vmIntrinsics::_none], flagsbuf));
  if (xtty != NULL)  xtty->tail("statistics");
}

void Compile::print_statistics() {
  { ttyLocker ttyl;
    if (xtty != NULL)  xtty->head("statistics type='opto'");
    Parse::print_statistics();
    PhaseCCP::print_statistics();
    PhaseRegAlloc::print_statistics();
    Scheduling::print_statistics();
    PhasePeephole::print_statistics();
    PhaseIdealLoop::print_statistics();
    if (xtty != NULL)  xtty->tail("statistics");
  }
  if (_intrinsic_hist_flags[vmIntrinsics::_none] != 0) {
    // put this under its own <statistics> element.
    print_intrinsic_statistics();
  }
}
#endif //PRODUCT

// Support for bundling info
Bundle* Compile::node_bundling(const Node *n) {
  assert(valid_bundle_info(n), "oob");
  return &_node_bundling_base[n->_idx];
}

bool Compile::valid_bundle_info(const Node *n) {
  return (_node_bundling_limit > n->_idx);
}


void Compile::gvn_replace_by(Node* n, Node* nn) {
  for (DUIterator_Last imin, i = n->last_outs(imin); i >= imin; ) {
    Node* use = n->last_out(i);
    bool is_in_table = initial_gvn()->hash_delete(use);
    uint uses_found = 0;
    for (uint j = 0; j < use->len(); j++) {
      if (use->in(j) == n) {
        if (j < use->req())
          use->set_req(j, nn);
        else
          use->set_prec(j, nn);
        uses_found++;
      }
    }
    if (is_in_table) {
      // reinsert into table
      initial_gvn()->hash_find_insert(use);
    }
    record_for_igvn(use);
    i -= uses_found;    // we deleted 1 or more copies of this edge
  }
}


static inline bool not_a_node(const Node* n) {
  if (n == NULL)                   return true;
  if (((intptr_t)n & 1) != 0)      return true;  // uninitialized, etc.
  if (*(address*)n == badAddress)  return true;  // kill by Node::destruct
  return false;
}

// Identify all nodes that are reachable from below, useful.
// Use breadth-first pass that records state in a Unique_Node_List,
// recursive traversal is slower.
void Compile::identify_useful_nodes(Unique_Node_List &useful) {
  int estimated_worklist_size = live_nodes();
  useful.map( estimated_worklist_size, NULL );  // preallocate space

  // Initialize worklist
  if (root() != NULL)     { useful.push(root()); }
  // If 'top' is cached, declare it useful to preserve cached node
  if( cached_top_node() ) { useful.push(cached_top_node()); }

  // Push all useful nodes onto the list, breadthfirst
  for( uint next = 0; next < useful.size(); ++next ) {
    assert( next < unique(), "Unique useful nodes < total nodes");
    Node *n  = useful.at(next);
    uint max = n->len();
    for( uint i = 0; i < max; ++i ) {
      Node *m = n->in(i);
      if (not_a_node(m))  continue;
      useful.push(m);
    }
  }
}

// Update dead_node_list with any missing dead nodes using useful
// list. Consider all non-useful nodes to be useless i.e., dead nodes.
void Compile::update_dead_node_list(Unique_Node_List &useful) {
  uint max_idx = unique();
  VectorSet& useful_node_set = useful.member_set();

  for (uint node_idx = 0; node_idx < max_idx; node_idx++) {
    // If node with index node_idx is not in useful set,
    // mark it as dead in dead node list.
    if (! useful_node_set.test(node_idx) ) {
      record_dead_node(node_idx);
    }
  }
}

void Compile::remove_useless_late_inlines(GrowableArray<CallGenerator*>* inlines, Unique_Node_List &useful) {
  int shift = 0;
  for (int i = 0; i < inlines->length(); i++) {
    CallGenerator* cg = inlines->at(i);
    CallNode* call = cg->call_node();
    if (shift > 0) {
      inlines->at_put(i-shift, cg);
    }
    if (!useful.member(call)) {
      shift++;
    }
  }
  inlines->trunc_to(inlines->length()-shift);
}

// Disconnect all useless nodes by disconnecting those at the boundary.
void Compile::remove_useless_nodes(Unique_Node_List &useful) {
  uint next = 0;
  while (next < useful.size()) {
    Node *n = useful.at(next++);
    if (n->is_SafePoint()) {
      // We're done with a parsing phase. Replaced nodes are not valid
      // beyond that point.
      n->as_SafePoint()->delete_replaced_nodes();
    }
    // Use raw traversal of out edges since this code removes out edges
    int max = n->outcnt();
    for (int j = 0; j < max; ++j) {
      Node* child = n->raw_out(j);
      if (! useful.member(child)) {
        assert(!child->is_top() || child != top(),
               "If top is cached in Compile object it is in useful list");
        // Only need to remove this out-edge to the useless node
        n->raw_del_out(j);
        --j;
        --max;
      }
    }
    if (n->outcnt() == 1 && n->has_special_unique_user()) {
      record_for_igvn(n->unique_out());
    }
  }
  // Remove useless macro and predicate opaq nodes
  for (int i = C->macro_count()-1; i >= 0; i--) {
    Node* n = C->macro_node(i);
    if (!useful.member(n)) {
      remove_macro_node(n);
    }
  }
  // Remove useless CastII nodes with range check dependency
  for (int i = range_check_cast_count() - 1; i >= 0; i--) {
    Node* cast = range_check_cast_node(i);
    if (!useful.member(cast)) {
      remove_range_check_cast(cast);
    }
  }
  // Remove useless expensive node
  for (int i = C->expensive_count()-1; i >= 0; i--) {
    Node* n = C->expensive_node(i);
    if (!useful.member(n)) {
      remove_expensive_node(n);
    }
  }
  // clean up the late inline lists
  remove_useless_late_inlines(&_string_late_inlines, useful);
  remove_useless_late_inlines(&_boxing_late_inlines, useful);
  remove_useless_late_inlines(&_late_inlines, useful);
  debug_only(verify_graph_edges(true/*check for no_dead_code*/);)
}

//------------------------------frame_size_in_words-----------------------------
// frame_slots in units of words
int Compile::frame_size_in_words() const {
  // shift is 0 in LP32 and 1 in LP64
  const int shift = (LogBytesPerWord - LogBytesPerInt);
  int words = _frame_slots >> shift;
  assert( words << shift == _frame_slots, "frame size must be properly aligned in LP64" );
  return words;
}

// To bang the stack of this compiled method we use the stack size
// that the interpreter would need in case of a deoptimization. This
// removes the need to bang the stack in the deoptimization blob which
// in turn simplifies stack overflow handling.
int Compile::bang_size_in_bytes() const {
  return MAX2(_interpreter_frame_size, frame_size_in_bytes());
}

// ============================================================================
//------------------------------CompileWrapper---------------------------------
class CompileWrapper : public StackObj {
  Compile *const _compile;
 public:
  CompileWrapper(Compile* compile);

  ~CompileWrapper();
};

CompileWrapper::CompileWrapper(Compile* compile) : _compile(compile) {
  // the Compile* pointer is stored in the current ciEnv:
  ciEnv* env = compile->env();
  assert(env == ciEnv::current(), "must already be a ciEnv active");
  assert(env->compiler_data() == NULL, "compile already active?");
  env->set_compiler_data(compile);
  assert(compile == Compile::current(), "sanity");

  compile->set_type_dict(NULL);
  compile->set_type_hwm(NULL);
  compile->set_type_last_size(0);
  compile->set_last_tf(NULL, NULL);
  compile->set_indexSet_arena(NULL);
  compile->set_indexSet_free_block_list(NULL);
  compile->init_type_arena();
  Type::Initialize(compile);
  _compile->set_scratch_buffer_blob(NULL);
  _compile->begin_method();
}
CompileWrapper::~CompileWrapper() {
  _compile->end_method();
  if (_compile->scratch_buffer_blob() != NULL)
    BufferBlob::free(_compile->scratch_buffer_blob());
  _compile->env()->set_compiler_data(NULL);
}


//----------------------------print_compile_messages---------------------------
void Compile::print_compile_messages() {
#ifndef PRODUCT
  // Check if recompiling
  if (_subsume_loads == false && PrintOpto) {
    // Recompiling without allowing machine instructions to subsume loads
    tty->print_cr("*********************************************************");
    tty->print_cr("** Bailout: Recompile without subsuming loads          **");
    tty->print_cr("*********************************************************");
  }
  if (_do_escape_analysis != DoEscapeAnalysis && PrintOpto) {
    // Recompiling without escape analysis
    tty->print_cr("*********************************************************");
    tty->print_cr("** Bailout: Recompile without escape analysis          **");
    tty->print_cr("*********************************************************");
  }
  if (_eliminate_boxing != EliminateAutoBox && PrintOpto) {
    // Recompiling without boxing elimination
    tty->print_cr("*********************************************************");
    tty->print_cr("** Bailout: Recompile without boxing elimination       **");
    tty->print_cr("*********************************************************");
  }
  if (env()->break_at_compile()) {
    // Open the debugger when compiling this method.
    tty->print("### Breaking when compiling: ");
    method()->print_short_name();
    tty->cr();
    BREAKPOINT;
  }

  if( PrintOpto ) {
    if (is_osr_compilation()) {
      tty->print("[OSR]%3d", _compile_id);
    } else {
      tty->print("%3d", _compile_id);
    }
  }
#endif
}


//-----------------------init_scratch_buffer_blob------------------------------
// Construct a temporary BufferBlob and cache it for this compile.
void Compile::init_scratch_buffer_blob(int const_size) {
  // If there is already a scratch buffer blob allocated and the
  // constant section is big enough, use it.  Otherwise free the
  // current and allocate a new one.
  BufferBlob* blob = scratch_buffer_blob();
  if ((blob != NULL) && (const_size <= _scratch_const_size)) {
    // Use the current blob.
  } else {
    if (blob != NULL) {
      BufferBlob::free(blob);
    }

    ResourceMark rm;
    _scratch_const_size = const_size;
    int size = (MAX_inst_size + MAX_stubs_size + _scratch_const_size);
    blob = BufferBlob::create("Compile::scratch_buffer", size);
    // Record the buffer blob for next time.
    set_scratch_buffer_blob(blob);
    // Have we run out of code space?
    if (scratch_buffer_blob() == NULL) {
      // Let CompilerBroker disable further compilations.
      record_failure("Not enough space for scratch buffer in CodeCache");
      return;
    }
  }

  // Initialize the relocation buffers
  relocInfo* locs_buf = (relocInfo*) blob->content_end() - MAX_locs_size;
  set_scratch_locs_memory(locs_buf);
}


//-----------------------scratch_emit_size-------------------------------------
// Helper function that computes size by emitting code
uint Compile::scratch_emit_size(const Node* n) {
  // Start scratch_emit_size section.
  set_in_scratch_emit_size(true);

  // Emit into a trash buffer and count bytes emitted.
  // This is a pretty expensive way to compute a size,
  // but it works well enough if seldom used.
  // All common fixed-size instructions are given a size
  // method by the AD file.
  // Note that the scratch buffer blob and locs memory are
  // allocated at the beginning of the compile task, and
  // may be shared by several calls to scratch_emit_size.
  // The allocation of the scratch buffer blob is particularly
  // expensive, since it has to grab the code cache lock.
  BufferBlob* blob = this->scratch_buffer_blob();
  assert(blob != NULL, "Initialize BufferBlob at start");
  assert(blob->size() > MAX_inst_size, "sanity");
  relocInfo* locs_buf = scratch_locs_memory();
  address blob_begin = blob->content_begin();
  address blob_end   = (address)locs_buf;
  assert(blob->content_contains(blob_end), "sanity");
  CodeBuffer buf(blob_begin, blob_end - blob_begin);
  buf.initialize_consts_size(_scratch_const_size);
  buf.initialize_stubs_size(MAX_stubs_size);
  assert(locs_buf != NULL, "sanity");
  int lsize = MAX_locs_size / 3;
  buf.consts()->initialize_shared_locs(&locs_buf[lsize * 0], lsize);
  buf.insts()->initialize_shared_locs( &locs_buf[lsize * 1], lsize);
  buf.stubs()->initialize_shared_locs( &locs_buf[lsize * 2], lsize);

  // Do the emission.

  Label fakeL; // Fake label for branch instructions.
  Label*   saveL = NULL;
  uint save_bnum = 0;
  bool is_branch = n->is_MachBranch();
  if (is_branch) {
    MacroAssembler masm(&buf);
    masm.bind(fakeL);
    n->as_MachBranch()->save_label(&saveL, &save_bnum);
    n->as_MachBranch()->label_set(&fakeL, 0);
  }
  n->emit(buf, this->regalloc());

  // Emitting into the scratch buffer should not fail
  assert (!failing(), err_msg_res("Must not have pending failure. Reason is: %s", failure_reason()));

  if (is_branch) // Restore label.
    n->as_MachBranch()->label_set(saveL, save_bnum);

  // End scratch_emit_size section.
  set_in_scratch_emit_size(false);

  return buf.insts_size();
}


// ============================================================================
//------------------------------Compile standard-------------------------------
debug_only( int Compile::_debug_idx = 100000; )

// Compile a method.  entry_bci is -1 for normal compilations and indicates
// the continuation bci for on stack replacement.


Compile::Compile( ciEnv* ci_env, C2Compiler* compiler, ciMethod* target, int osr_bci,
                  bool subsume_loads, bool do_escape_analysis, bool eliminate_boxing )
                : Phase(Compiler),
                  _env(ci_env),
                  _log(ci_env->log()),
                  _compile_id(ci_env->compile_id()),
                  _save_argument_registers(false),
                  _stub_name(NULL),
                  _stub_function(NULL),
                  _stub_entry_point(NULL),
                  _method(target),
                  _entry_bci(osr_bci),
                  _initial_gvn(NULL),
                  _for_igvn(NULL),
                  _warm_calls(NULL),
                  _subsume_loads(subsume_loads),
                  _do_escape_analysis(do_escape_analysis),
                  _eliminate_boxing(eliminate_boxing),
                  _failure_reason(NULL),
                  _code_buffer("Compile::Fill_buffer"),
                  _orig_pc_slot(0),
                  _orig_pc_slot_offset_in_bytes(0),
                  _has_method_handle_invokes(false),
                  _mach_constant_base_node(NULL),
                  _node_bundling_limit(0),
                  _node_bundling_base(NULL),
                  _java_calls(0),
                  _inner_loops(0),
                  _scratch_const_size(-1),
                  _in_scratch_emit_size(false),
                  _dead_node_list(comp_arena()),
                  _dead_node_count(0),
#ifndef PRODUCT
                  _trace_opto_output(TraceOptoOutput || method()->has_option("TraceOptoOutput")),
                  _in_dump_cnt(0),
                  _printer(IdealGraphPrinter::printer()),
#endif
                  _congraph(NULL),
                  _comp_arena(mtCompiler),
                  _node_arena(mtCompiler),
                  _old_arena(mtCompiler),
                  _Compile_types(mtCompiler),
                  _replay_inline_data(NULL),
                  _late_inlines(comp_arena(), 2, 0, NULL),
                  _string_late_inlines(comp_arena(), 2, 0, NULL),
                  _boxing_late_inlines(comp_arena(), 2, 0, NULL),
                  _late_inlines_pos(0),
                  _number_of_mh_late_inlines(0),
                  _inlining_progress(false),
                  _inlining_incrementally(false),
                  _print_inlining_list(NULL),
                  _print_inlining_idx(0),
                  _interpreter_frame_size(0),
                  _max_node_limit(MaxNodeLimit) {
  C = this;

  CompileWrapper cw(this);
#ifndef PRODUCT
  if (TimeCompiler2) {
    tty->print(" ");
    target->holder()->name()->print();
    tty->print(".");
    target->print_short_name();
    tty->print("  ");
  }
  TraceTime t1("Total compilation time", &_t_totalCompilation, TimeCompiler, TimeCompiler2);
  TraceTime t2(NULL, &_t_methodCompilation, TimeCompiler, false);
  bool print_opto_assembly = PrintOptoAssembly || _method->has_option("PrintOptoAssembly");
  if (!print_opto_assembly) {
    bool print_assembly = (PrintAssembly || _method->should_print_assembly());
    if (print_assembly && !Disassembler::can_decode()) {
      tty->print_cr("PrintAssembly request changed to PrintOptoAssembly");
      print_opto_assembly = true;
    }
  }
  set_print_assembly(print_opto_assembly);
  set_parsed_irreducible_loop(false);

  if (method()->has_option("ReplayInline")) {
    _replay_inline_data = ciReplay::load_inline_data(method(), entry_bci(), ci_env->comp_level());
  }
#endif
  set_print_inlining(PrintInlining || method()->has_option("PrintInlining") NOT_PRODUCT( || PrintOptoInlining));
  set_print_intrinsics(PrintIntrinsics || method()->has_option("PrintIntrinsics"));
  set_has_irreducible_loop(true); // conservative until build_loop_tree() reset it

  if (ProfileTraps RTM_OPT_ONLY( || UseRTMLocking )) {
    // Make sure the method being compiled gets its own MDO,
    // so we can at least track the decompile_count().
    // Need MDO to record RTM code generation state.
    method()->ensure_method_data();
  }

  Init(::AliasLevel);


  print_compile_messages();

  _ilt = InlineTree::build_inline_tree_root();

  // Even if NO memory addresses are used, MergeMem nodes must have at least 1 slice
  assert(num_alias_types() >= AliasIdxRaw, "");

#define MINIMUM_NODE_HASH  1023
  // Node list that Iterative GVN will start with
  Unique_Node_List for_igvn(comp_arena());
  set_for_igvn(&for_igvn);

  // GVN that will be run immediately on new nodes
  uint estimated_size = method()->code_size()*4+64;
  estimated_size = (estimated_size < MINIMUM_NODE_HASH ? MINIMUM_NODE_HASH : estimated_size);
  PhaseGVN gvn(node_arena(), estimated_size);
  set_initial_gvn(&gvn);

  if (print_inlining() || print_intrinsics()) {
    _print_inlining_list = new (comp_arena())GrowableArray<PrintInliningBuffer>(comp_arena(), 1, 1, PrintInliningBuffer());
  }
  { // Scope for timing the parser
    TracePhase t3("parse", &_t_parser, true);

    // Put top into the hash table ASAP.
    initial_gvn()->transform_no_reclaim(top());

    // Set up tf(), start(), and find a CallGenerator.
    CallGenerator* cg = NULL;
    if (is_osr_compilation()) {
      const TypeTuple *domain = StartOSRNode::osr_domain();
      const TypeTuple *range = TypeTuple::make_range(method()->signature());
      init_tf(TypeFunc::make(domain, range));
      StartNode* s = new (this) StartOSRNode(root(), domain);
      initial_gvn()->set_type_bottom(s);
      init_start(s);
      cg = CallGenerator::for_osr(method(), entry_bci());
    } else {
      // Normal case.
      init_tf(TypeFunc::make(method()));
      StartNode* s = new (this) StartNode(root(), tf()->domain());
      initial_gvn()->set_type_bottom(s);
      init_start(s);
      if (method()->intrinsic_id() == vmIntrinsics::_Reference_get && UseG1GC) {
        // With java.lang.ref.reference.get() we must go through the
        // intrinsic when G1 is enabled - even when get() is the root
        // method of the compile - so that, if necessary, the value in
        // the referent field of the reference object gets recorded by
        // the pre-barrier code.
        // Specifically, if G1 is enabled, the value in the referent
        // field is recorded by the G1 SATB pre barrier. This will
        // result in the referent being marked live and the reference
        // object removed from the list of discovered references during
        // reference processing.
        cg = find_intrinsic(method(), false);
      }
      if (cg == NULL) {
        float past_uses = method()->interpreter_invocation_count();
        float expected_uses = past_uses;
        cg = CallGenerator::for_inline(method(), expected_uses);
      }
    }
    if (failing())  return;
    if (cg == NULL) {
      record_method_not_compilable_all_tiers("cannot parse method");
      return;
    }
    JVMState* jvms = build_start_state(start(), tf());
    if ((jvms = cg->generate(jvms)) == NULL) {
      if (!failure_reason_is(C2Compiler::retry_class_loading_during_parsing())) {
        record_method_not_compilable("method parse failed");
      }
      return;
    }
    GraphKit kit(jvms);

    if (!kit.stopped()) {
      // Accept return values, and transfer control we know not where.
      // This is done by a special, unique ReturnNode bound to root.
      return_values(kit.jvms());
    }

    if (kit.has_exceptions()) {
      // Any exceptions that escape from this call must be rethrown
      // to whatever caller is dynamically above us on the stack.
      // This is done by a special, unique RethrowNode bound to root.
      rethrow_exceptions(kit.transfer_exceptions_into_jvms());
    }

    assert(IncrementalInline || (_late_inlines.length() == 0 && !has_mh_late_inlines()), "incremental inlining is off");

    if (_late_inlines.length() == 0 && !has_mh_late_inlines() && !failing() && has_stringbuilder()) {
      inline_string_calls(true);
    }

    if (failing())  return;

    print_method(PHASE_BEFORE_REMOVEUSELESS, 3);

    // Remove clutter produced by parsing.
    if (!failing()) {
      ResourceMark rm;
      PhaseRemoveUseless pru(initial_gvn(), &for_igvn);
    }
  }

  // Note:  Large methods are capped off in do_one_bytecode().
  if (failing())  return;

  // After parsing, node notes are no longer automagic.
  // They must be propagated by register_new_node_with_optimizer(),
  // clone(), or the like.
  set_default_node_notes(NULL);

  for (;;) {
    int successes = Inline_Warm();
    if (failing())  return;
    if (successes == 0)  break;
  }

  // Drain the list.
  Finish_Warm();
#ifndef PRODUCT
  if (_printer) {
    _printer->print_inlining(this);
  }
#endif

  if (failing())  return;
  NOT_PRODUCT( verify_graph_edges(); )

  // Now optimize
  Optimize();
  if (failing())  return;
  NOT_PRODUCT( verify_graph_edges(); )

#ifndef PRODUCT
  if (PrintIdeal) {
    ttyLocker ttyl;  // keep the following output all in one block
    // This output goes directly to the tty, not the compiler log.
    // To enable tools to match it up with the compilation activity,
    // be sure to tag this tty output with the compile ID.
    if (xtty != NULL) {
      xtty->head("ideal compile_id='%d'%s", compile_id(),
                 is_osr_compilation()    ? " compile_kind='osr'" :
                 "");
    }
    root()->dump(9999);
    if (xtty != NULL) {
      xtty->tail("ideal");
    }
  }
#endif

  NOT_PRODUCT( verify_barriers(); )

  // Dump compilation data to replay it.
  if (method()->has_option("DumpReplay")) {
    env()->dump_replay_data(_compile_id);
  }
  if (method()->has_option("DumpInline") && (ilt() != NULL)) {
    env()->dump_inline_data(_compile_id);
  }

  // Now that we know the size of all the monitors we can add a fixed slot
  // for the original deopt pc.

  _orig_pc_slot =  fixed_slots();
  int next_slot = _orig_pc_slot + (sizeof(address) / VMRegImpl::stack_slot_size);
  set_fixed_slots(next_slot);

  // Compute when to use implicit null checks. Used by matching trap based
  // nodes and NullCheck optimization.
  set_allowed_deopt_reasons();

  // Now generate code
  Code_Gen();
  if (failing())  return;

  // Check if we want to skip execution of all compiled code.
  {
#ifndef PRODUCT
    if (OptoNoExecute) {
      record_method_not_compilable("+OptoNoExecute");  // Flag as failed
      return;
    }
    TracePhase t2("install_code", &_t_registerMethod, TimeCompiler);
#endif

    if (is_osr_compilation()) {
      _code_offsets.set_value(CodeOffsets::Verified_Entry, 0);
      _code_offsets.set_value(CodeOffsets::OSR_Entry, _first_block_size);
    } else {
      _code_offsets.set_value(CodeOffsets::Verified_Entry, _first_block_size);
      _code_offsets.set_value(CodeOffsets::OSR_Entry, 0);
    }

    env()->register_method(_method, _entry_bci,
                           &_code_offsets,
                           _orig_pc_slot_offset_in_bytes,
                           code_buffer(),
                           frame_size_in_words(), _oop_map_set,
                           &_handler_table, &_inc_table,
                           compiler,
                           env()->comp_level(),
                           has_unsafe_access(),
                           SharedRuntime::is_wide_vector(max_vector_size()),
                           rtm_state()
                           );

    if (log() != NULL) // Print code cache state into compiler log
      log()->code_cache_state();
  }
}

//------------------------------Compile----------------------------------------
// Compile a runtime stub
Compile::Compile( ciEnv* ci_env,
                  TypeFunc_generator generator,
                  address stub_function,
                  const char *stub_name,
                  int is_fancy_jump,
                  bool pass_tls,
                  bool save_arg_registers,
                  bool return_pc )
  : Phase(Compiler),
    _env(ci_env),
    _log(ci_env->log()),
    _compile_id(0),
    _save_argument_registers(save_arg_registers),
    _method(NULL),
    _stub_name(stub_name),
    _stub_function(stub_function),
    _stub_entry_point(NULL),
    _entry_bci(InvocationEntryBci),
    _initial_gvn(NULL),
    _for_igvn(NULL),
    _warm_calls(NULL),
    _orig_pc_slot(0),
    _orig_pc_slot_offset_in_bytes(0),
    _subsume_loads(true),
    _do_escape_analysis(false),
    _eliminate_boxing(false),
    _failure_reason(NULL),
    _code_buffer("Compile::Fill_buffer"),
    _has_method_handle_invokes(false),
    _mach_constant_base_node(NULL),
    _node_bundling_limit(0),
    _node_bundling_base(NULL),
    _java_calls(0),
    _inner_loops(0),
#ifndef PRODUCT
    _trace_opto_output(TraceOptoOutput),
    _in_dump_cnt(0),
    _printer(NULL),
#endif
    _comp_arena(mtCompiler),
    _node_arena(mtCompiler),
    _old_arena(mtCompiler),
    _Compile_types(mtCompiler),
    _dead_node_list(comp_arena()),
    _dead_node_count(0),
    _congraph(NULL),
    _replay_inline_data(NULL),
    _number_of_mh_late_inlines(0),
    _inlining_progress(false),
    _inlining_incrementally(false),
    _print_inlining_list(NULL),
    _print_inlining_idx(0),
    _allowed_reasons(0),
    _interpreter_frame_size(0),
    _max_node_limit(MaxNodeLimit) {
  C = this;

#ifndef PRODUCT
  TraceTime t1(NULL, &_t_totalCompilation, TimeCompiler, false);
  TraceTime t2(NULL, &_t_stubCompilation, TimeCompiler, false);
  set_print_assembly(PrintFrameConverterAssembly);
  set_parsed_irreducible_loop(false);
#endif
  set_has_irreducible_loop(false); // no loops

  CompileWrapper cw(this);
  Init(/*AliasLevel=*/ 0);
  init_tf((*generator)());

  {
    // The following is a dummy for the sake of GraphKit::gen_stub
    Unique_Node_List for_igvn(comp_arena());
    set_for_igvn(&for_igvn);  // not used, but some GraphKit guys push on this
    PhaseGVN gvn(Thread::current()->resource_area(),255);
    set_initial_gvn(&gvn);    // not significant, but GraphKit guys use it pervasively
    gvn.transform_no_reclaim(top());

    GraphKit kit;
    kit.gen_stub(stub_function, stub_name, is_fancy_jump, pass_tls, return_pc);
  }

  NOT_PRODUCT( verify_graph_edges(); )
  Code_Gen();
  if (failing())  return;


  // Entry point will be accessed using compile->stub_entry_point();
  if (code_buffer() == NULL) {
    Matcher::soft_match_failure();
  } else {
    if (PrintAssembly && (WizardMode || Verbose))
      tty->print_cr("### Stub::%s", stub_name);

    if (!failing()) {
      assert(_fixed_slots == 0, "no fixed slots used for runtime stubs");

      // Make the NMethod
      // For now we mark the frame as never safe for profile stackwalking
      RuntimeStub *rs = RuntimeStub::new_runtime_stub(stub_name,
                                                      code_buffer(),
                                                      CodeOffsets::frame_never_safe,
                                                      // _code_offsets.value(CodeOffsets::Frame_Complete),
                                                      frame_size_in_words(),
                                                      _oop_map_set,
                                                      save_arg_registers);
      assert(rs != NULL && rs->is_runtime_stub(), "sanity check");

      _stub_entry_point = rs->entry_point();
    }
  }
}

//------------------------------Init-------------------------------------------
// Prepare for a single compilation
void Compile::Init(int aliaslevel) {
  _unique  = 0;
  _regalloc = NULL;

  _tf      = NULL;  // filled in later
  _top     = NULL;  // cached later
  _matcher = NULL;  // filled in later
  _cfg     = NULL;  // filled in later

  set_24_bit_selection_and_mode(Use24BitFP, false);

  _node_note_array = NULL;
  _default_node_notes = NULL;

  _immutable_memory = NULL; // filled in at first inquiry

  // Globally visible Nodes
  // First set TOP to NULL to give safe behavior during creation of RootNode
  set_cached_top_node(NULL);
  set_root(new (this) RootNode());
  // Now that you have a Root to point to, create the real TOP
  set_cached_top_node( new (this) ConNode(Type::TOP) );
  set_recent_alloc(NULL, NULL);

  // Create Debug Information Recorder to record scopes, oopmaps, etc.
  env()->set_oop_recorder(new OopRecorder(env()->arena()));
  env()->set_debug_info(new DebugInformationRecorder(env()->oop_recorder()));
  env()->set_dependencies(new Dependencies(env()));

  _fixed_slots = 0;
  set_has_split_ifs(false);
  set_has_loops(has_method() && method()->has_loops()); // first approximation
  set_has_stringbuilder(false);
  set_has_boxed_value(false);
  _trap_can_recompile = false;  // no traps emitted yet
  _major_progress = true; // start out assuming good things will happen
  set_has_unsafe_access(false);
  set_max_vector_size(0);
  Copy::zero_to_bytes(_trap_hist, sizeof(_trap_hist));
  set_decompile_count(0);

  set_do_freq_based_layout(BlockLayoutByFrequency || method_has_option("BlockLayoutByFrequency"));
  set_num_loop_opts(LoopOptsCount);
  set_do_inlining(Inline);
  set_max_inline_size(MaxInlineSize);
  set_freq_inline_size(FreqInlineSize);
  set_do_scheduling(OptoScheduling);
  set_do_count_invocations(false);
  set_do_method_data_update(false);
  set_rtm_state(NoRTM); // No RTM lock eliding by default
  method_has_option_value("MaxNodeLimit", _max_node_limit);
#if INCLUDE_RTM_OPT
  if (UseRTMLocking && has_method() && (method()->method_data_or_null() != NULL)) {
    int rtm_state = method()->method_data()->rtm_state();
    if (method_has_option("NoRTMLockEliding") || ((rtm_state & NoRTM) != 0)) {
      // Don't generate RTM lock eliding code.
      set_rtm_state(NoRTM);
    } else if (method_has_option("UseRTMLockEliding") || ((rtm_state & UseRTM) != 0) || !UseRTMDeopt) {
      // Generate RTM lock eliding code without abort ratio calculation code.
      set_rtm_state(UseRTM);
    } else if (UseRTMDeopt) {
      // Generate RTM lock eliding code and include abort ratio calculation
      // code if UseRTMDeopt is on.
      set_rtm_state(ProfileRTM);
    }
  }
#endif
  if (debug_info()->recording_non_safepoints()) {
    set_node_note_array(new(comp_arena()) GrowableArray<Node_Notes*>
                        (comp_arena(), 8, 0, NULL));
    set_default_node_notes(Node_Notes::make(this));
  }

  // // -- Initialize types before each compile --
  // // Update cached type information
  // if( _method && _method->constants() )
  //   Type::update_loaded_types(_method, _method->constants());

  // Init alias_type map.
  if (!_do_escape_analysis && aliaslevel == 3)
    aliaslevel = 2;  // No unique types without escape analysis
  _AliasLevel = aliaslevel;
  const int grow_ats = 16;
  _max_alias_types = grow_ats;
  _alias_types   = NEW_ARENA_ARRAY(comp_arena(), AliasType*, grow_ats);
  AliasType* ats = NEW_ARENA_ARRAY(comp_arena(), AliasType,  grow_ats);
  Copy::zero_to_bytes(ats, sizeof(AliasType)*grow_ats);
  {
    for (int i = 0; i < grow_ats; i++)  _alias_types[i] = &ats[i];
  }
  // Initialize the first few types.
  _alias_types[AliasIdxTop]->Init(AliasIdxTop, NULL);
  _alias_types[AliasIdxBot]->Init(AliasIdxBot, TypePtr::BOTTOM);
  _alias_types[AliasIdxRaw]->Init(AliasIdxRaw, TypeRawPtr::BOTTOM);
  _num_alias_types = AliasIdxRaw+1;
  // Zero out the alias type cache.
  Copy::zero_to_bytes(_alias_cache, sizeof(_alias_cache));
  // A NULL adr_type hits in the cache right away.  Preload the right answer.
  probe_alias_cache(NULL)->_index = AliasIdxTop;

  _intrinsics = NULL;
  _macro_nodes = new(comp_arena()) GrowableArray<Node*>(comp_arena(), 8,  0, NULL);
  _predicate_opaqs = new(comp_arena()) GrowableArray<Node*>(comp_arena(), 8,  0, NULL);
  _expensive_nodes = new(comp_arena()) GrowableArray<Node*>(comp_arena(), 8,  0, NULL);
  _range_check_casts = new(comp_arena()) GrowableArray<Node*>(comp_arena(), 8,  0, NULL);
  register_library_intrinsics();
#ifdef ASSERT
  _type_verify_symmetry = true;
#endif
}

//---------------------------init_start----------------------------------------
// Install the StartNode on this compile object.
void Compile::init_start(StartNode* s) {
  if (failing())
    return; // already failing
  assert(s == start(), "");
}

StartNode* Compile::start() const {
  assert(!failing(), "");
  for (DUIterator_Fast imax, i = root()->fast_outs(imax); i < imax; i++) {
    Node* start = root()->fast_out(i);
    if( start->is_Start() )
      return start->as_Start();
  }
  fatal("Did not find Start node!");
  return NULL;
}

//-------------------------------immutable_memory-------------------------------------
// Access immutable memory
Node* Compile::immutable_memory() {
  if (_immutable_memory != NULL) {
    return _immutable_memory;
  }
  StartNode* s = start();
  for (DUIterator_Fast imax, i = s->fast_outs(imax); true; i++) {
    Node *p = s->fast_out(i);
    if (p != s && p->as_Proj()->_con == TypeFunc::Memory) {
      _immutable_memory = p;
      return _immutable_memory;
    }
  }
  ShouldNotReachHere();
  return NULL;
}

//----------------------set_cached_top_node------------------------------------
// Install the cached top node, and make sure Node::is_top works correctly.
void Compile::set_cached_top_node(Node* tn) {
  if (tn != NULL)  verify_top(tn);
  Node* old_top = _top;
  _top = tn;
  // Calling Node::setup_is_top allows the nodes the chance to adjust
  // their _out arrays.
  if (_top != NULL)     _top->setup_is_top();
  if (old_top != NULL)  old_top->setup_is_top();
  assert(_top == NULL || top()->is_top(), "");
}

#ifdef ASSERT
uint Compile::count_live_nodes_by_graph_walk() {
  Unique_Node_List useful(comp_arena());
  // Get useful node list by walking the graph.
  identify_useful_nodes(useful);
  return useful.size();
}

void Compile::print_missing_nodes() {

  // Return if CompileLog is NULL and PrintIdealNodeCount is false.
  if ((_log == NULL) && (! PrintIdealNodeCount)) {
    return;
  }

  // This is an expensive function. It is executed only when the user
  // specifies VerifyIdealNodeCount option or otherwise knows the
  // additional work that needs to be done to identify reachable nodes
  // by walking the flow graph and find the missing ones using
  // _dead_node_list.

  Unique_Node_List useful(comp_arena());
  // Get useful node list by walking the graph.
  identify_useful_nodes(useful);

  uint l_nodes = C->live_nodes();
  uint l_nodes_by_walk = useful.size();

  if (l_nodes != l_nodes_by_walk) {
    if (_log != NULL) {
      _log->begin_head("mismatched_nodes count='%d'", abs((int) (l_nodes - l_nodes_by_walk)));
      _log->stamp();
      _log->end_head();
    }
    VectorSet& useful_member_set = useful.member_set();
    int last_idx = l_nodes_by_walk;
    for (int i = 0; i < last_idx; i++) {
      if (useful_member_set.test(i)) {
        if (_dead_node_list.test(i)) {
          if (_log != NULL) {
            _log->elem("mismatched_node_info node_idx='%d' type='both live and dead'", i);
          }
          if (PrintIdealNodeCount) {
            // Print the log message to tty
              tty->print_cr("mismatched_node idx='%d' both live and dead'", i);
              useful.at(i)->dump();
          }
        }
      }
      else if (! _dead_node_list.test(i)) {
        if (_log != NULL) {
          _log->elem("mismatched_node_info node_idx='%d' type='neither live nor dead'", i);
        }
        if (PrintIdealNodeCount) {
          // Print the log message to tty
          tty->print_cr("mismatched_node idx='%d' type='neither live nor dead'", i);
        }
      }
    }
    if (_log != NULL) {
      _log->tail("mismatched_nodes");
    }
  }
}
#endif

#ifndef PRODUCT
void Compile::verify_top(Node* tn) const {
  if (tn != NULL) {
    assert(tn->is_Con(), "top node must be a constant");
    assert(((ConNode*)tn)->type() == Type::TOP, "top node must have correct type");
    assert(tn->in(0) != NULL, "must have live top node");
  }
}
#endif


///-------------------Managing Per-Node Debug & Profile Info-------------------

void Compile::grow_node_notes(GrowableArray<Node_Notes*>* arr, int grow_by) {
  guarantee(arr != NULL, "");
  int num_blocks = arr->length();
  if (grow_by < num_blocks)  grow_by = num_blocks;
  int num_notes = grow_by * _node_notes_block_size;
  Node_Notes* notes = NEW_ARENA_ARRAY(node_arena(), Node_Notes, num_notes);
  Copy::zero_to_bytes(notes, num_notes * sizeof(Node_Notes));
  while (num_notes > 0) {
    arr->append(notes);
    notes     += _node_notes_block_size;
    num_notes -= _node_notes_block_size;
  }
  assert(num_notes == 0, "exact multiple, please");
}

bool Compile::copy_node_notes_to(Node* dest, Node* source) {
  if (source == NULL || dest == NULL)  return false;

  if (dest->is_Con())
    return false;               // Do not push debug info onto constants.

#ifdef ASSERT
  // Leave a bread crumb trail pointing to the original node:
  if (dest != NULL && dest != source && dest->debug_orig() == NULL) {
    dest->set_debug_orig(source);
  }
#endif

  if (node_note_array() == NULL)
    return false;               // Not collecting any notes now.

  // This is a copy onto a pre-existing node, which may already have notes.
  // If both nodes have notes, do not overwrite any pre-existing notes.
  Node_Notes* source_notes = node_notes_at(source->_idx);
  if (source_notes == NULL || source_notes->is_clear())  return false;
  Node_Notes* dest_notes   = node_notes_at(dest->_idx);
  if (dest_notes == NULL || dest_notes->is_clear()) {
    return set_node_notes_at(dest->_idx, source_notes);
  }

  Node_Notes merged_notes = (*source_notes);
  // The order of operations here ensures that dest notes will win...
  merged_notes.update_from(dest_notes);
  return set_node_notes_at(dest->_idx, &merged_notes);
}


//--------------------------allow_range_check_smearing-------------------------
// Gating condition for coalescing similar range checks.
// Sometimes we try 'speculatively' replacing a series of a range checks by a
// single covering check that is at least as strong as any of them.
// If the optimization succeeds, the simplified (strengthened) range check
// will always succeed.  If it fails, we will deopt, and then give up
// on the optimization.
bool Compile::allow_range_check_smearing() const {
  // If this method has already thrown a range-check,
  // assume it was because we already tried range smearing
  // and it failed.
  uint already_trapped = trap_count(Deoptimization::Reason_range_check);
  return !already_trapped;
}


//------------------------------flatten_alias_type-----------------------------
const TypePtr *Compile::flatten_alias_type( const TypePtr *tj ) const {
  int offset = tj->offset();
  TypePtr::PTR ptr = tj->ptr();

  // Known instance (scalarizable allocation) alias only with itself.
  bool is_known_inst = tj->isa_oopptr() != NULL &&
                       tj->is_oopptr()->is_known_instance();

  // Process weird unsafe references.
  if (offset == Type::OffsetBot && (tj->isa_instptr() /*|| tj->isa_klassptr()*/)) {
    assert(InlineUnsafeOps, "indeterminate pointers come only from unsafe ops");
    assert(!is_known_inst, "scalarizable allocation should not have unsafe references");
    tj = TypeOopPtr::BOTTOM;
    ptr = tj->ptr();
    offset = tj->offset();
  }

  // Array pointers need some flattening
  const TypeAryPtr *ta = tj->isa_aryptr();
  if (ta && ta->is_stable()) {
    // Erase stability property for alias analysis.
    tj = ta = ta->cast_to_stable(false);
  }
  if( ta && is_known_inst ) {
    if ( offset != Type::OffsetBot &&
         offset > arrayOopDesc::length_offset_in_bytes() ) {
      offset = Type::OffsetBot; // Flatten constant access into array body only
      tj = ta = TypeAryPtr::make(ptr, ta->ary(), ta->klass(), true, offset, ta->instance_id());
    }
  } else if( ta && _AliasLevel >= 2 ) {
    // For arrays indexed by constant indices, we flatten the alias
    // space to include all of the array body.  Only the header, klass
    // and array length can be accessed un-aliased.
    if( offset != Type::OffsetBot ) {
      if( ta->const_oop() ) { // MethodData* or Method*
        offset = Type::OffsetBot;   // Flatten constant access into array body
        tj = ta = TypeAryPtr::make(ptr,ta->const_oop(),ta->ary(),ta->klass(),false,offset);
      } else if( offset == arrayOopDesc::length_offset_in_bytes() ) {
        // range is OK as-is.
        tj = ta = TypeAryPtr::RANGE;
      } else if( offset == oopDesc::klass_offset_in_bytes() ) {
        tj = TypeInstPtr::KLASS; // all klass loads look alike
        ta = TypeAryPtr::RANGE; // generic ignored junk
        ptr = TypePtr::BotPTR;
      } else if( offset == oopDesc::mark_offset_in_bytes() ) {
        tj = TypeInstPtr::MARK;
        ta = TypeAryPtr::RANGE; // generic ignored junk
        ptr = TypePtr::BotPTR;
      } else {                  // Random constant offset into array body
        offset = Type::OffsetBot;   // Flatten constant access into array body
        tj = ta = TypeAryPtr::make(ptr,ta->ary(),ta->klass(),false,offset);
      }
    }
    // Arrays of fixed size alias with arrays of unknown size.
    if (ta->size() != TypeInt::POS) {
      const TypeAry *tary = TypeAry::make(ta->elem(), TypeInt::POS);
      tj = ta = TypeAryPtr::make(ptr,ta->const_oop(),tary,ta->klass(),false,offset);
    }
    // Arrays of known objects become arrays of unknown objects.
    if (ta->elem()->isa_narrowoop() && ta->elem() != TypeNarrowOop::BOTTOM) {
      const TypeAry *tary = TypeAry::make(TypeNarrowOop::BOTTOM, ta->size());
      tj = ta = TypeAryPtr::make(ptr,ta->const_oop(),tary,NULL,false,offset);
    }
    if (ta->elem()->isa_oopptr() && ta->elem() != TypeInstPtr::BOTTOM) {
      const TypeAry *tary = TypeAry::make(TypeInstPtr::BOTTOM, ta->size());
      tj = ta = TypeAryPtr::make(ptr,ta->const_oop(),tary,NULL,false,offset);
    }
    // Arrays of bytes and of booleans both use 'bastore' and 'baload' so
    // cannot be distinguished by bytecode alone.
    if (ta->elem() == TypeInt::BOOL) {
      const TypeAry *tary = TypeAry::make(TypeInt::BYTE, ta->size());
      ciKlass* aklass = ciTypeArrayKlass::make(T_BYTE);
      tj = ta = TypeAryPtr::make(ptr,ta->const_oop(),tary,aklass,false,offset);
    }
    // During the 2nd round of IterGVN, NotNull castings are removed.
    // Make sure the Bottom and NotNull variants alias the same.
    // Also, make sure exact and non-exact variants alias the same.
    if (ptr == TypePtr::NotNull || ta->klass_is_exact() || ta->speculative() != NULL) {
      tj = ta = TypeAryPtr::make(TypePtr::BotPTR,ta->ary(),ta->klass(),false,offset);
    }
  }

  // Oop pointers need some flattening
  const TypeInstPtr *to = tj->isa_instptr();
  if( to && _AliasLevel >= 2 && to != TypeOopPtr::BOTTOM ) {
    ciInstanceKlass *k = to->klass()->as_instance_klass();
    if( ptr == TypePtr::Constant ) {
      if (to->klass() != ciEnv::current()->Class_klass() ||
          offset < k->size_helper() * wordSize) {
        // No constant oop pointers (such as Strings); they alias with
        // unknown strings.
        assert(!is_known_inst, "not scalarizable allocation");
        tj = to = TypeInstPtr::make(TypePtr::BotPTR,to->klass(),false,0,offset);
      }
    } else if( is_known_inst ) {
      tj = to; // Keep NotNull and klass_is_exact for instance type
    } else if( ptr == TypePtr::NotNull || to->klass_is_exact() ) {
      // During the 2nd round of IterGVN, NotNull castings are removed.
      // Make sure the Bottom and NotNull variants alias the same.
      // Also, make sure exact and non-exact variants alias the same.
      tj = to = TypeInstPtr::make(TypePtr::BotPTR,to->klass(),false,0,offset);
    }
    if (to->speculative() != NULL) {
      tj = to = TypeInstPtr::make(to->ptr(),to->klass(),to->klass_is_exact(),to->const_oop(),to->offset(), to->instance_id());
    }
    // Canonicalize the holder of this field
    if (offset >= 0 && offset < instanceOopDesc::base_offset_in_bytes()) {
      // First handle header references such as a LoadKlassNode, even if the
      // object's klass is unloaded at compile time (4965979).
      if (!is_known_inst) { // Do it only for non-instance types
        tj = to = TypeInstPtr::make(TypePtr::BotPTR, env()->Object_klass(), false, NULL, offset);
      }
    } else if (offset < 0 || offset >= k->size_helper() * wordSize) {
      // Static fields are in the space above the normal instance
      // fields in the java.lang.Class instance.
      if (to->klass() != ciEnv::current()->Class_klass()) {
        to = NULL;
        tj = TypeOopPtr::BOTTOM;
        offset = tj->offset();
      }
    } else {
      ciInstanceKlass *canonical_holder = k->get_canonical_holder(offset);
      if (!k->equals(canonical_holder) || tj->offset() != offset) {
        if( is_known_inst ) {
          tj = to = TypeInstPtr::make(to->ptr(), canonical_holder, true, NULL, offset, to->instance_id());
        } else {
          tj = to = TypeInstPtr::make(to->ptr(), canonical_holder, false, NULL, offset);
        }
      }
    }
  }

  // Klass pointers to object array klasses need some flattening
  const TypeKlassPtr *tk = tj->isa_klassptr();
  if( tk ) {
    // If we are referencing a field within a Klass, we need
    // to assume the worst case of an Object.  Both exact and
    // inexact types must flatten to the same alias class so
    // use NotNull as the PTR.
    if ( offset == Type::OffsetBot || (offset >= 0 && (size_t)offset < sizeof(Klass)) ) {

      tj = tk = TypeKlassPtr::make(TypePtr::NotNull,
                                   TypeKlassPtr::OBJECT->klass(),
                                   offset);
    }

    ciKlass* klass = tk->klass();
    if( klass->is_obj_array_klass() ) {
      ciKlass* k = TypeAryPtr::OOPS->klass();
      if( !k || !k->is_loaded() )                  // Only fails for some -Xcomp runs
        k = TypeInstPtr::BOTTOM->klass();
      tj = tk = TypeKlassPtr::make( TypePtr::NotNull, k, offset );
    }

    // Check for precise loads from the primary supertype array and force them
    // to the supertype cache alias index.  Check for generic array loads from
    // the primary supertype array and also force them to the supertype cache
    // alias index.  Since the same load can reach both, we need to merge
    // these 2 disparate memories into the same alias class.  Since the
    // primary supertype array is read-only, there's no chance of confusion
    // where we bypass an array load and an array store.
    int primary_supers_offset = in_bytes(Klass::primary_supers_offset());
    if (offset == Type::OffsetBot ||
        (offset >= primary_supers_offset &&
         offset < (int)(primary_supers_offset + Klass::primary_super_limit() * wordSize)) ||
        offset == (int)in_bytes(Klass::secondary_super_cache_offset())) {
      offset = in_bytes(Klass::secondary_super_cache_offset());
      tj = tk = TypeKlassPtr::make( TypePtr::NotNull, tk->klass(), offset );
    }
  }

  // Flatten all Raw pointers together.
  if (tj->base() == Type::RawPtr)
    tj = TypeRawPtr::BOTTOM;

  if (tj->base() == Type::AnyPtr)
    tj = TypePtr::BOTTOM;      // An error, which the caller must check for.

  // Flatten all to bottom for now
  switch( _AliasLevel ) {
  case 0:
    tj = TypePtr::BOTTOM;
    break;
  case 1:                       // Flatten to: oop, static, field or array
    switch (tj->base()) {
    //case Type::AryPtr: tj = TypeAryPtr::RANGE;    break;
    case Type::RawPtr:   tj = TypeRawPtr::BOTTOM;   break;
    case Type::AryPtr:   // do not distinguish arrays at all
    case Type::InstPtr:  tj = TypeInstPtr::BOTTOM;  break;
    case Type::KlassPtr: tj = TypeKlassPtr::OBJECT; break;
    case Type::AnyPtr:   tj = TypePtr::BOTTOM;      break;  // caller checks it
    default: ShouldNotReachHere();
    }
    break;
  case 2:                       // No collapsing at level 2; keep all splits
  case 3:                       // No collapsing at level 3; keep all splits
    break;
  default:
    Unimplemented();
  }

  offset = tj->offset();
  assert( offset != Type::OffsetTop, "Offset has fallen from constant" );

  assert( (offset != Type::OffsetBot && tj->base() != Type::AryPtr) ||
          (offset == Type::OffsetBot && tj->base() == Type::AryPtr) ||
          (offset == Type::OffsetBot && tj == TypeOopPtr::BOTTOM) ||
          (offset == Type::OffsetBot && tj == TypePtr::BOTTOM) ||
          (offset == oopDesc::mark_offset_in_bytes() && tj->base() == Type::AryPtr) ||
          (offset == oopDesc::klass_offset_in_bytes() && tj->base() == Type::AryPtr) ||
          (offset == arrayOopDesc::length_offset_in_bytes() && tj->base() == Type::AryPtr)  ,
          "For oops, klasses, raw offset must be constant; for arrays the offset is never known" );
  assert( tj->ptr() != TypePtr::TopPTR &&
          tj->ptr() != TypePtr::AnyNull &&
          tj->ptr() != TypePtr::Null, "No imprecise addresses" );
//    assert( tj->ptr() != TypePtr::Constant ||
//            tj->base() == Type::RawPtr ||
//            tj->base() == Type::KlassPtr, "No constant oop addresses" );

  return tj;
}

void Compile::AliasType::Init(int i, const TypePtr* at) {
  _index = i;
  _adr_type = at;
  _field = NULL;
  _element = NULL;
  _is_rewritable = true; // default
  const TypeOopPtr *atoop = (at != NULL) ? at->isa_oopptr() : NULL;
  if (atoop != NULL && atoop->is_known_instance()) {
    const TypeOopPtr *gt = atoop->cast_to_instance_id(TypeOopPtr::InstanceBot);
    _general_index = Compile::current()->get_alias_index(gt);
  } else {
    _general_index = 0;
  }
}

BasicType Compile::AliasType::basic_type() const {
  if (element() != NULL) {
    const Type* element = adr_type()->is_aryptr()->elem();
    return element->isa_narrowoop() ? T_OBJECT : element->array_element_basic_type();
  } if (field() != NULL) {
    return field()->layout_type();
  } else {
    return T_ILLEGAL; // unknown
  }
}

//---------------------------------print_on------------------------------------
#ifndef PRODUCT
void Compile::AliasType::print_on(outputStream* st) {
  if (index() < 10)
        st->print("@ <%d> ", index());
  else  st->print("@ <%d>",  index());
  st->print(is_rewritable() ? "   " : " RO");
  int offset = adr_type()->offset();
  if (offset == Type::OffsetBot)
        st->print(" +any");
  else  st->print(" +%-3d", offset);
  st->print(" in ");
  adr_type()->dump_on(st);
  const TypeOopPtr* tjp = adr_type()->isa_oopptr();
  if (field() != NULL && tjp) {
    if (tjp->klass()  != field()->holder() ||
        tjp->offset() != field()->offset_in_bytes()) {
      st->print(" != ");
      field()->print();
      st->print(" ***");
    }
  }
}

void print_alias_types() {
  Compile* C = Compile::current();
  tty->print_cr("--- Alias types, AliasIdxBot .. %d", C->num_alias_types()-1);
  for (int idx = Compile::AliasIdxBot; idx < C->num_alias_types(); idx++) {
    C->alias_type(idx)->print_on(tty);
    tty->cr();
  }
}
#endif


//----------------------------probe_alias_cache--------------------------------
Compile::AliasCacheEntry* Compile::probe_alias_cache(const TypePtr* adr_type) {
  intptr_t key = (intptr_t) adr_type;
  key ^= key >> logAliasCacheSize;
  return &_alias_cache[key & right_n_bits(logAliasCacheSize)];
}


//-----------------------------grow_alias_types--------------------------------
void Compile::grow_alias_types() {
  const int old_ats  = _max_alias_types; // how many before?
  const int new_ats  = old_ats;          // how many more?
  const int grow_ats = old_ats+new_ats;  // how many now?
  _max_alias_types = grow_ats;
  _alias_types =  REALLOC_ARENA_ARRAY(comp_arena(), AliasType*, _alias_types, old_ats, grow_ats);
  AliasType* ats =    NEW_ARENA_ARRAY(comp_arena(), AliasType, new_ats);
  Copy::zero_to_bytes(ats, sizeof(AliasType)*new_ats);
  for (int i = 0; i < new_ats; i++)  _alias_types[old_ats+i] = &ats[i];
}


//--------------------------------find_alias_type------------------------------
Compile::AliasType* Compile::find_alias_type(const TypePtr* adr_type, bool no_create, ciField* original_field) {
  if (_AliasLevel == 0)
    return alias_type(AliasIdxBot);

  AliasCacheEntry* ace = probe_alias_cache(adr_type);
  if (ace->_adr_type == adr_type) {
    return alias_type(ace->_index);
  }

  // Handle special cases.
  if (adr_type == NULL)             return alias_type(AliasIdxTop);
  if (adr_type == TypePtr::BOTTOM)  return alias_type(AliasIdxBot);

  // Do it the slow way.
  const TypePtr* flat = flatten_alias_type(adr_type);

#ifdef ASSERT
  {
    ResourceMark rm;
    assert(flat == flatten_alias_type(flat),
           err_msg("not idempotent: adr_type = %s; flat = %s => %s", Type::str(adr_type),
                   Type::str(flat), Type::str(flatten_alias_type(flat))));
    assert(flat != TypePtr::BOTTOM,
           err_msg("cannot alias-analyze an untyped ptr: adr_type = %s", Type::str(adr_type)));
    if (flat->isa_oopptr() && !flat->isa_klassptr()) {
      const TypeOopPtr* foop = flat->is_oopptr();
      // Scalarizable allocations have exact klass always.
      bool exact = !foop->klass_is_exact() || foop->is_known_instance();
      const TypePtr* xoop = foop->cast_to_exactness(exact)->is_ptr();
      assert(foop == flatten_alias_type(xoop),
             err_msg("exactness must not affect alias type: foop = %s; xoop = %s",
                     Type::str(foop), Type::str(xoop)));
    }
  }
#endif

  int idx = AliasIdxTop;
  for (int i = 0; i < num_alias_types(); i++) {
    if (alias_type(i)->adr_type() == flat) {
      idx = i;
      break;
    }
  }

  if (idx == AliasIdxTop) {
    if (no_create)  return NULL;
    // Grow the array if necessary.
    if (_num_alias_types == _max_alias_types)  grow_alias_types();
    // Add a new alias type.
    idx = _num_alias_types++;
    _alias_types[idx]->Init(idx, flat);
    if (flat == TypeInstPtr::KLASS)  alias_type(idx)->set_rewritable(false);
    if (flat == TypeAryPtr::RANGE)   alias_type(idx)->set_rewritable(false);
    if (flat->isa_instptr()) {
      if (flat->offset() == java_lang_Class::klass_offset_in_bytes()
          && flat->is_instptr()->klass() == env()->Class_klass())
        alias_type(idx)->set_rewritable(false);
    }
    if (flat->isa_aryptr()) {
#ifdef ASSERT
      const int header_size_min  = arrayOopDesc::base_offset_in_bytes(T_BYTE);
      // (T_BYTE has the weakest alignment and size restrictions...)
      assert(flat->offset() < header_size_min, "array body reference must be OffsetBot");
#endif
      if (flat->offset() == TypePtr::OffsetBot) {
        alias_type(idx)->set_element(flat->is_aryptr()->elem());
      }
    }
    if (flat->isa_klassptr()) {
      if (flat->offset() == in_bytes(Klass::super_check_offset_offset()))
        alias_type(idx)->set_rewritable(false);
      if (flat->offset() == in_bytes(Klass::modifier_flags_offset()))
        alias_type(idx)->set_rewritable(false);
      if (flat->offset() == in_bytes(Klass::access_flags_offset()))
        alias_type(idx)->set_rewritable(false);
      if (flat->offset() == in_bytes(Klass::java_mirror_offset()))
        alias_type(idx)->set_rewritable(false);
    }
    // %%% (We would like to finalize JavaThread::threadObj_offset(),
    // but the base pointer type is not distinctive enough to identify
    // references into JavaThread.)

    // Check for final fields.
    const TypeInstPtr* tinst = flat->isa_instptr();
    if (tinst && tinst->offset() >= instanceOopDesc::base_offset_in_bytes()) {
      ciField* field;
      if (tinst->const_oop() != NULL &&
          tinst->klass() == ciEnv::current()->Class_klass() &&
          tinst->offset() >= (tinst->klass()->as_instance_klass()->size_helper() * wordSize)) {
        // static field
        ciInstanceKlass* k = tinst->const_oop()->as_instance()->java_lang_Class_klass()->as_instance_klass();
        field = k->get_field_by_offset(tinst->offset(), true);
      } else {
        ciInstanceKlass *k = tinst->klass()->as_instance_klass();
        field = k->get_field_by_offset(tinst->offset(), false);
      }
      assert(field == NULL ||
             original_field == NULL ||
             (field->holder() == original_field->holder() &&
              field->offset() == original_field->offset() &&
              field->is_static() == original_field->is_static()), "wrong field?");
      // Set field() and is_rewritable() attributes.
      if (field != NULL)  alias_type(idx)->set_field(field);
    }
  }

  // Fill the cache for next time.
  ace->_adr_type = adr_type;
  ace->_index    = idx;
  assert(alias_type(adr_type) == alias_type(idx),  "type must be installed");

  // Might as well try to fill the cache for the flattened version, too.
  AliasCacheEntry* face = probe_alias_cache(flat);
  if (face->_adr_type == NULL) {
    face->_adr_type = flat;
    face->_index    = idx;
    assert(alias_type(flat) == alias_type(idx), "flat type must work too");
  }

  return alias_type(idx);
}


Compile::AliasType* Compile::alias_type(ciField* field) {
  const TypeOopPtr* t;
  if (field->is_static())
    t = TypeInstPtr::make(field->holder()->java_mirror());
  else
    t = TypeOopPtr::make_from_klass_raw(field->holder());
  AliasType* atp = alias_type(t->add_offset(field->offset_in_bytes()), field);
  assert((field->is_final() || field->is_stable()) == !atp->is_rewritable(), "must get the rewritable bits correct");
  return atp;
}


//------------------------------have_alias_type--------------------------------
bool Compile::have_alias_type(const TypePtr* adr_type) {
  AliasCacheEntry* ace = probe_alias_cache(adr_type);
  if (ace->_adr_type == adr_type) {
    return true;
  }

  // Handle special cases.
  if (adr_type == NULL)             return true;
  if (adr_type == TypePtr::BOTTOM)  return true;

  return find_alias_type(adr_type, true, NULL) != NULL;
}

//-----------------------------must_alias--------------------------------------
// True if all values of the given address type are in the given alias category.
bool Compile::must_alias(const TypePtr* adr_type, int alias_idx) {
  if (alias_idx == AliasIdxBot)         return true;  // the universal category
  if (adr_type == NULL)                 return true;  // NULL serves as TypePtr::TOP
  if (alias_idx == AliasIdxTop)         return false; // the empty category
  if (adr_type->base() == Type::AnyPtr) return false; // TypePtr::BOTTOM or its twins

  // the only remaining possible overlap is identity
  int adr_idx = get_alias_index(adr_type);
  assert(adr_idx != AliasIdxBot && adr_idx != AliasIdxTop, "");
  assert(adr_idx == alias_idx ||
         (alias_type(alias_idx)->adr_type() != TypeOopPtr::BOTTOM
          && adr_type                       != TypeOopPtr::BOTTOM),
         "should not be testing for overlap with an unsafe pointer");
  return adr_idx == alias_idx;
}

//------------------------------can_alias--------------------------------------
// True if any values of the given address type are in the given alias category.
bool Compile::can_alias(const TypePtr* adr_type, int alias_idx) {
  if (alias_idx == AliasIdxTop)         return false; // the empty category
  if (adr_type == NULL)                 return false; // NULL serves as TypePtr::TOP
  if (alias_idx == AliasIdxBot)         return true;  // the universal category
  if (adr_type->base() == Type::AnyPtr) return true;  // TypePtr::BOTTOM or its twins

  // the only remaining possible overlap is identity
  int adr_idx = get_alias_index(adr_type);
  assert(adr_idx != AliasIdxBot && adr_idx != AliasIdxTop, "");
  return adr_idx == alias_idx;
}



//---------------------------pop_warm_call-------------------------------------
WarmCallInfo* Compile::pop_warm_call() {
  WarmCallInfo* wci = _warm_calls;
  if (wci != NULL)  _warm_calls = wci->remove_from(wci);
  return wci;
}

//----------------------------Inline_Warm--------------------------------------
int Compile::Inline_Warm() {
  // If there is room, try to inline some more warm call sites.
  // %%% Do a graph index compaction pass when we think we're out of space?
  if (!InlineWarmCalls)  return 0;

  int calls_made_hot = 0;
  int room_to_grow   = NodeCountInliningCutoff - unique();
  int amount_to_grow = MIN2(room_to_grow, (int)NodeCountInliningStep);
  int amount_grown   = 0;
  WarmCallInfo* call;
  while (amount_to_grow > 0 && (call = pop_warm_call()) != NULL) {
    int est_size = (int)call->size();
    if (est_size > (room_to_grow - amount_grown)) {
      // This one won't fit anyway.  Get rid of it.
      call->make_cold();
      continue;
    }
    call->make_hot();
    calls_made_hot++;
    amount_grown   += est_size;
    amount_to_grow -= est_size;
  }

  if (calls_made_hot > 0)  set_major_progress();
  return calls_made_hot;
}


//----------------------------Finish_Warm--------------------------------------
void Compile::Finish_Warm() {
  if (!InlineWarmCalls)  return;
  if (failing())  return;
  if (warm_calls() == NULL)  return;

  // Clean up loose ends, if we are out of space for inlining.
  WarmCallInfo* call;
  while ((call = pop_warm_call()) != NULL) {
    call->make_cold();
  }
}

//---------------------cleanup_loop_predicates-----------------------
// Remove the opaque nodes that protect the predicates so that all unused
// checks and uncommon_traps will be eliminated from the ideal graph
void Compile::cleanup_loop_predicates(PhaseIterGVN &igvn) {
  if (predicate_count()==0) return;
  for (int i = predicate_count(); i > 0; i--) {
    Node * n = predicate_opaque1_node(i-1);
    assert(n->Opcode() == Op_Opaque1, "must be");
    igvn.replace_node(n, n->in(1));
  }
  assert(predicate_count()==0, "should be clean!");
}

void Compile::add_range_check_cast(Node* n) {
  assert(n->isa_CastII()->has_range_check(), "CastII should have range check dependency");
  assert(!_range_check_casts->contains(n), "duplicate entry in range check casts");
  _range_check_casts->append(n);
}

// Remove all range check dependent CastIINodes.
void Compile::remove_range_check_casts(PhaseIterGVN &igvn) {
  for (int i = range_check_cast_count(); i > 0; i--) {
    Node* cast = range_check_cast_node(i-1);
    assert(cast->isa_CastII()->has_range_check(), "CastII should have range check dependency");
    igvn.replace_node(cast, cast->in(1));
  }
  assert(range_check_cast_count() == 0, "should be empty");
}

// StringOpts and late inlining of string methods
void Compile::inline_string_calls(bool parse_time) {
  {
    // remove useless nodes to make the usage analysis simpler
    ResourceMark rm;
    PhaseRemoveUseless pru(initial_gvn(), for_igvn());
  }

  {
    ResourceMark rm;
    print_method(PHASE_BEFORE_STRINGOPTS, 3);
    PhaseStringOpts pso(initial_gvn(), for_igvn());
    print_method(PHASE_AFTER_STRINGOPTS, 3);
  }

  // now inline anything that we skipped the first time around
  if (!parse_time) {
    _late_inlines_pos = _late_inlines.length();
  }

  while (_string_late_inlines.length() > 0) {
    CallGenerator* cg = _string_late_inlines.pop();
    cg->do_late_inline();
    if (failing())  return;
  }
  _string_late_inlines.trunc_to(0);
}

// Late inlining of boxing methods
void Compile::inline_boxing_calls(PhaseIterGVN& igvn) {
  if (_boxing_late_inlines.length() > 0) {
    assert(has_boxed_value(), "inconsistent");

    PhaseGVN* gvn = initial_gvn();
    set_inlining_incrementally(true);

    assert( igvn._worklist.size() == 0, "should be done with igvn" );
    for_igvn()->clear();
    gvn->replace_with(&igvn);

    _late_inlines_pos = _late_inlines.length();

    while (_boxing_late_inlines.length() > 0) {
      CallGenerator* cg = _boxing_late_inlines.pop();
      cg->do_late_inline();
      if (failing())  return;
    }
    _boxing_late_inlines.trunc_to(0);

    {
      ResourceMark rm;
      PhaseRemoveUseless pru(gvn, for_igvn());
    }

    igvn = PhaseIterGVN(gvn);
    igvn.optimize();

    set_inlining_progress(false);
    set_inlining_incrementally(false);
  }
}

void Compile::inline_incrementally_one(PhaseIterGVN& igvn) {
  assert(IncrementalInline, "incremental inlining should be on");
  PhaseGVN* gvn = initial_gvn();

  set_inlining_progress(false);
  for_igvn()->clear();
  gvn->replace_with(&igvn);

  int i = 0;

  for (; i <_late_inlines.length() && !inlining_progress(); i++) {
    CallGenerator* cg = _late_inlines.at(i);
    _late_inlines_pos = i+1;
    cg->do_late_inline();
    if (failing())  return;
  }
  int j = 0;
  for (; i < _late_inlines.length(); i++, j++) {
    _late_inlines.at_put(j, _late_inlines.at(i));
  }
  _late_inlines.trunc_to(j);

  {
    ResourceMark rm;
    PhaseRemoveUseless pru(gvn, for_igvn());
  }

  igvn = PhaseIterGVN(gvn);
}

// Perform incremental inlining until bound on number of live nodes is reached
void Compile::inline_incrementally(PhaseIterGVN& igvn) {
  PhaseGVN* gvn = initial_gvn();

  set_inlining_incrementally(true);
  set_inlining_progress(true);
  uint low_live_nodes = 0;

  while(inlining_progress() && _late_inlines.length() > 0) {

    if (live_nodes() > (uint)LiveNodeCountInliningCutoff) {
      if (low_live_nodes < (uint)LiveNodeCountInliningCutoff * 8 / 10) {
        // PhaseIdealLoop is expensive so we only try it once we are
        // out of live nodes and we only try it again if the previous
        // helped got the number of nodes down significantly
        PhaseIdealLoop ideal_loop( igvn, false, true );
        if (failing())  return;
        low_live_nodes = live_nodes();
        _major_progress = true;
      }

      if (live_nodes() > (uint)LiveNodeCountInliningCutoff) {
        break;
      }
    }

    inline_incrementally_one(igvn);

    if (failing())  return;

    igvn.optimize();

    if (failing())  return;
  }

  assert( igvn._worklist.size() == 0, "should be done with igvn" );

  if (_string_late_inlines.length() > 0) {
    assert(has_stringbuilder(), "inconsistent");
    for_igvn()->clear();
    initial_gvn()->replace_with(&igvn);

    inline_string_calls(false);

    if (failing())  return;

    {
      ResourceMark rm;
      PhaseRemoveUseless pru(initial_gvn(), for_igvn());
    }

    igvn = PhaseIterGVN(gvn);

    igvn.optimize();
  }

  set_inlining_incrementally(false);
}


// Remove edges from "root" to each SafePoint at a backward branch.
// They were inserted during parsing (see add_safepoint()) to make
// infinite loops without calls or exceptions visible to root, i.e.,
// useful.
void Compile::remove_root_to_sfpts_edges(PhaseIterGVN& igvn) {
  Node *r = root();
  if (r != NULL) {
    for (uint i = r->req(); i < r->len(); ++i) {
      Node *n = r->in(i);
      if (n != NULL && n->is_SafePoint()) {
        r->rm_prec(i);
        if (n->outcnt() == 0) {
          igvn.remove_dead_node(n);
        }
        --i;
      }
    }
  }
}

//------------------------------Optimize---------------------------------------
// Given a graph, optimize it.
void Compile::Optimize() {
  TracePhase t1("optimizer", &_t_optimizer, true);

#ifndef PRODUCT
  if (env()->break_at_compile()) {
    BREAKPOINT;
  }

#endif

  ResourceMark rm;
  int          loop_opts_cnt;

  NOT_PRODUCT( verify_graph_edges(); )

  print_method(PHASE_AFTER_PARSING);

 {
  // Iterative Global Value Numbering, including ideal transforms
  // Initialize IterGVN with types and values from parse-time GVN
  PhaseIterGVN igvn(initial_gvn());
  {
    NOT_PRODUCT( TracePhase t2("iterGVN", &_t_iterGVN, TimeCompiler); )
    igvn.optimize();
  }

  print_method(PHASE_ITER_GVN1, 2);

  if (failing())  return;

  {
    NOT_PRODUCT( TracePhase t2("incrementalInline", &_t_incrInline, TimeCompiler); )
    inline_incrementally(igvn);
  }

  print_method(PHASE_INCREMENTAL_INLINE, 2);

  if (failing())  return;

  if (eliminate_boxing()) {
    NOT_PRODUCT( TracePhase t2("incrementalInline", &_t_incrInline, TimeCompiler); )
    // Inline valueOf() methods now.
    inline_boxing_calls(igvn);

    if (AlwaysIncrementalInline) {
      inline_incrementally(igvn);
    }

    print_method(PHASE_INCREMENTAL_BOXING_INLINE, 2);

    if (failing())  return;
  }

  // Now that all inlining is over, cut edge from root to loop
  // safepoints
  remove_root_to_sfpts_edges(igvn);

  // Remove the speculative part of types and clean up the graph from
  // the extra CastPP nodes whose only purpose is to carry them. Do
  // that early so that optimizations are not disrupted by the extra
  // CastPP nodes.
  remove_speculative_types(igvn);

  // No more new expensive nodes will be added to the list from here
  // so keep only the actual candidates for optimizations.
  cleanup_expensive_nodes(igvn);

  if (!failing() && RenumberLiveNodes && live_nodes() + NodeLimitFudgeFactor < unique()) {
    NOT_PRODUCT(Compile::TracePhase t2("", &_t_renumberLive, TimeCompiler);)
    initial_gvn()->replace_with(&igvn);
    for_igvn()->clear();
    Unique_Node_List new_worklist(C->comp_arena());
    {
      ResourceMark rm;
      PhaseRenumberLive prl = PhaseRenumberLive(initial_gvn(), for_igvn(), &new_worklist);
    }
    set_for_igvn(&new_worklist);
    igvn = PhaseIterGVN(initial_gvn());
    igvn.optimize();
  }

  // Perform escape analysis
  if (_do_escape_analysis && ConnectionGraph::has_candidates(this)) {
    if (has_loops()) {
      // Cleanup graph (remove dead nodes).
      TracePhase t2("idealLoop", &_t_idealLoop, true);
      PhaseIdealLoop ideal_loop( igvn, false, true );
      if (major_progress()) print_method(PHASE_PHASEIDEAL_BEFORE_EA, 2);
      if (failing())  return;
    }
    ConnectionGraph::do_analysis(this, &igvn);

    if (failing())  return;

    // Optimize out fields loads from scalar replaceable allocations.
    igvn.optimize();
    print_method(PHASE_ITER_GVN_AFTER_EA, 2);

    if (failing())  return;

    if (congraph() != NULL && macro_count() > 0) {
      NOT_PRODUCT( TracePhase t2("macroEliminate", &_t_macroEliminate, TimeCompiler); )
      PhaseMacroExpand mexp(igvn);
      mexp.eliminate_macro_nodes();
      igvn.set_delay_transform(false);

      igvn.optimize();
      print_method(PHASE_ITER_GVN_AFTER_ELIMINATION, 2);

      if (failing())  return;
    }
  }

  // Loop transforms on the ideal graph.  Range Check Elimination,
  // peeling, unrolling, etc.

  // Set loop opts counter
  loop_opts_cnt = num_loop_opts();
  if((loop_opts_cnt > 0) && (has_loops() || has_split_ifs())) {
    {
      TracePhase t2("idealLoop", &_t_idealLoop, true);
      PhaseIdealLoop ideal_loop( igvn, true );
      loop_opts_cnt--;
      if (major_progress()) print_method(PHASE_PHASEIDEALLOOP1, 2);
      if (failing())  return;
    }
    // Loop opts pass if partial peeling occurred in previous pass
    if(PartialPeelLoop && major_progress() && (loop_opts_cnt > 0)) {
      TracePhase t3("idealLoop", &_t_idealLoop, true);
      PhaseIdealLoop ideal_loop( igvn, false );
      loop_opts_cnt--;
      if (major_progress()) print_method(PHASE_PHASEIDEALLOOP2, 2);
      if (failing())  return;
    }
    // Loop opts pass for loop-unrolling before CCP
    if(major_progress() && (loop_opts_cnt > 0)) {
      TracePhase t4("idealLoop", &_t_idealLoop, true);
      PhaseIdealLoop ideal_loop( igvn, false );
      loop_opts_cnt--;
      if (major_progress()) print_method(PHASE_PHASEIDEALLOOP3, 2);
    }
    if (!failing()) {
      // Verify that last round of loop opts produced a valid graph
      NOT_PRODUCT( TracePhase t2("idealLoopVerify", &_t_idealLoopVerify, TimeCompiler); )
      PhaseIdealLoop::verify(igvn);
    }
  }
  if (failing())  return;

  // Conditional Constant Propagation;
  PhaseCCP ccp( &igvn );
  assert( true, "Break here to ccp.dump_nodes_and_types(_root,999,1)");
  {
    TracePhase t2("ccp", &_t_ccp, true);
    ccp.do_transform();
  }
  print_method(PHASE_CPP1, 2);

  assert( true, "Break here to ccp.dump_old2new_map()");

  // Iterative Global Value Numbering, including ideal transforms
  {
    NOT_PRODUCT( TracePhase t2("iterGVN2", &_t_iterGVN2, TimeCompiler); )
    igvn = ccp;
    igvn.optimize();
  }

  print_method(PHASE_ITER_GVN2, 2);

  if (failing())  return;

  // Loop transforms on the ideal graph.  Range Check Elimination,
  // peeling, unrolling, etc.
  if(loop_opts_cnt > 0) {
    debug_only( int cnt = 0; );
    while(major_progress() && (loop_opts_cnt > 0)) {
      TracePhase t2("idealLoop", &_t_idealLoop, true);
      assert( cnt++ < 40, "infinite cycle in loop optimization" );
      PhaseIdealLoop ideal_loop( igvn, true);
      loop_opts_cnt--;
      if (major_progress()) print_method(PHASE_PHASEIDEALLOOP_ITERATIONS, 2);
      if (failing())  return;
    }
  }

  {
    // Verify that all previous optimizations produced a valid graph
    // at least to this point, even if no loop optimizations were done.
    NOT_PRODUCT( TracePhase t2("idealLoopVerify", &_t_idealLoopVerify, TimeCompiler); )
    PhaseIdealLoop::verify(igvn);
  }

  if (range_check_cast_count() > 0) {
    // No more loop optimizations. Remove all range check dependent CastIINodes.
    C->remove_range_check_casts(igvn);
    igvn.optimize();
  }

  {
    NOT_PRODUCT( TracePhase t2("macroExpand", &_t_macroExpand, TimeCompiler); )
    PhaseMacroExpand  mex(igvn);
    if (mex.expand_macro_nodes()) {
      assert(failing(), "must bail out w/ explicit message");
      return;
    }
  }

 } // (End scope of igvn; run destructor if necessary for asserts.)

  dump_inlining();
  // A method with only infinite loops has no edges entering loops from root
  {
    NOT_PRODUCT( TracePhase t2("graphReshape", &_t_graphReshaping, TimeCompiler); )
    if (final_graph_reshaping()) {
      assert(failing(), "must bail out w/ explicit message");
      return;
    }
  }

  print_method(PHASE_OPTIMIZE_FINISHED, 2);
}


//------------------------------Code_Gen---------------------------------------
// Given a graph, generate code for it
void Compile::Code_Gen() {
  if (failing()) {
    return;
  }

  // Perform instruction selection.  You might think we could reclaim Matcher
  // memory PDQ, but actually the Matcher is used in generating spill code.
  // Internals of the Matcher (including some VectorSets) must remain live
  // for awhile - thus I cannot reclaim Matcher memory lest a VectorSet usage
  // set a bit in reclaimed memory.

  // In debug mode can dump m._nodes.dump() for mapping of ideal to machine
  // nodes.  Mapping is only valid at the root of each matched subtree.
  NOT_PRODUCT( verify_graph_edges(); )

  Matcher matcher;
  _matcher = &matcher;
  {
    TracePhase t2("matcher", &_t_matcher, true);
    matcher.match();
  }
  // In debug mode can dump m._nodes.dump() for mapping of ideal to machine
  // nodes.  Mapping is only valid at the root of each matched subtree.
  NOT_PRODUCT( verify_graph_edges(); )

  // If you have too many nodes, or if matching has failed, bail out
  check_node_count(0, "out of nodes matching instructions");
  if (failing()) {
    return;
  }

  // Build a proper-looking CFG
  PhaseCFG cfg(node_arena(), root(), matcher);
  _cfg = &cfg;
  {
    NOT_PRODUCT( TracePhase t2("scheduler", &_t_scheduler, TimeCompiler); )
    bool success = cfg.do_global_code_motion();
    if (!success) {
      return;
    }

    print_method(PHASE_GLOBAL_CODE_MOTION, 2);
    NOT_PRODUCT( verify_graph_edges(); )
    debug_only( cfg.verify(); )
  }

  PhaseChaitin regalloc(unique(), cfg, matcher);
  _regalloc = &regalloc;
  {
    TracePhase t2("regalloc", &_t_registerAllocation, true);
    // Perform register allocation.  After Chaitin, use-def chains are
    // no longer accurate (at spill code) and so must be ignored.
    // Node->LRG->reg mappings are still accurate.
    _regalloc->Register_Allocate();

    // Bail out if the allocator builds too many nodes
    if (failing()) {
      return;
    }
  }

  // Prior to register allocation we kept empty basic blocks in case the
  // the allocator needed a place to spill.  After register allocation we
  // are not adding any new instructions.  If any basic block is empty, we
  // can now safely remove it.
  {
    NOT_PRODUCT( TracePhase t2("blockOrdering", &_t_blockOrdering, TimeCompiler); )
    cfg.remove_empty_blocks();
    if (do_freq_based_layout()) {
      PhaseBlockLayout layout(cfg);
    } else {
      cfg.set_loop_alignment();
    }
    cfg.fixup_flow();
  }

  // Apply peephole optimizations
  if( OptoPeephole ) {
    NOT_PRODUCT( TracePhase t2("peephole", &_t_peephole, TimeCompiler); )
    PhasePeephole peep( _regalloc, cfg);
    peep.do_transform();
  }

  // Do late expand if CPU requires this.
  if (Matcher::require_postalloc_expand) {
    NOT_PRODUCT(TracePhase t2c("postalloc_expand", &_t_postalloc_expand, true));
    cfg.postalloc_expand(_regalloc);
  }

  // Convert Nodes to instruction bits in a buffer
  {
    // %%%% workspace merge brought two timers together for one job
    TracePhase t2a("output", &_t_output, true);
    NOT_PRODUCT( TraceTime t2b(NULL, &_t_codeGeneration, TimeCompiler, false); )
    Output();
  }

  print_method(PHASE_FINAL_CODE);

  // He's dead, Jim.
  _cfg     = (PhaseCFG*)((intptr_t)0xdeadbeef);
  _regalloc = (PhaseChaitin*)((intptr_t)0xdeadbeef);
}


//------------------------------dump_asm---------------------------------------
// Dump formatted assembly
#ifndef PRODUCT
void Compile::dump_asm(int *pcs, uint pc_limit) {
  bool cut_short = false;
  tty->print_cr("#");
  tty->print("#  ");  _tf->dump();  tty->cr();
  tty->print_cr("#");

  // For all blocks
  int pc = 0x0;                 // Program counter
  char starts_bundle = ' ';
  _regalloc->dump_frame();

  Node *n = NULL;
  for (uint i = 0; i < _cfg->number_of_blocks(); i++) {
    if (VMThread::should_terminate()) {
      cut_short = true;
      break;
    }
    Block* block = _cfg->get_block(i);
    if (block->is_connector() && !Verbose) {
      continue;
    }
    n = block->head();
    if (pcs && n->_idx < pc_limit) {
      tty->print("%3.3x   ", pcs[n->_idx]);
    } else {
      tty->print("      ");
    }
    block->dump_head(_cfg);
    if (block->is_connector()) {
      tty->print_cr("        # Empty connector block");
    } else if (block->num_preds() == 2 && block->pred(1)->is_CatchProj() && block->pred(1)->as_CatchProj()->_con == CatchProjNode::fall_through_index) {
      tty->print_cr("        # Block is sole successor of call");
    }

    // For all instructions
    Node *delay = NULL;
    for (uint j = 0; j < block->number_of_nodes(); j++) {
      if (VMThread::should_terminate()) {
        cut_short = true;
        break;
      }
      n = block->get_node(j);
      if (valid_bundle_info(n)) {
        Bundle* bundle = node_bundling(n);
        if (bundle->used_in_unconditional_delay()) {
          delay = n;
          continue;
        }
        if (bundle->starts_bundle()) {
          starts_bundle = '+';
        }
      }

      if (WizardMode) {
        n->dump();
      }

      if( !n->is_Region() &&    // Dont print in the Assembly
          !n->is_Phi() &&       // a few noisely useless nodes
          !n->is_Proj() &&
          !n->is_MachTemp() &&
          !n->is_SafePointScalarObject() &&
          !n->is_Catch() &&     // Would be nice to print exception table targets
          !n->is_MergeMem() &&  // Not very interesting
          !n->is_top() &&       // Debug info table constants
          !(n->is_Con() && !n->is_Mach())// Debug info table constants
          ) {
        if (pcs && n->_idx < pc_limit)
          tty->print("%3.3x", pcs[n->_idx]);
        else
          tty->print("   ");
        tty->print(" %c ", starts_bundle);
        starts_bundle = ' ';
        tty->print("\t");
        n->format(_regalloc, tty);
        tty->cr();
      }

      // If we have an instruction with a delay slot, and have seen a delay,
      // then back up and print it
      if (valid_bundle_info(n) && node_bundling(n)->use_unconditional_delay()) {
        assert(delay != NULL, "no unconditional delay instruction");
        if (WizardMode) delay->dump();

        if (node_bundling(delay)->starts_bundle())
          starts_bundle = '+';
        if (pcs && n->_idx < pc_limit)
          tty->print("%3.3x", pcs[n->_idx]);
        else
          tty->print("   ");
        tty->print(" %c ", starts_bundle);
        starts_bundle = ' ';
        tty->print("\t");
        delay->format(_regalloc, tty);
        tty->cr();
        delay = NULL;
      }

      // Dump the exception table as well
      if( n->is_Catch() && (Verbose || WizardMode) ) {
        // Print the exception table for this offset
        _handler_table.print_subtable_for(pc);
      }
    }

    if (pcs && n->_idx < pc_limit)
      tty->print_cr("%3.3x", pcs[n->_idx]);
    else
      tty->cr();

    assert(cut_short || delay == NULL, "no unconditional delay branch");

  } // End of per-block dump
  tty->cr();

  if (cut_short)  tty->print_cr("*** disassembly is cut short ***");
}
#endif

//------------------------------Final_Reshape_Counts---------------------------
// This class defines counters to help identify when a method
// may/must be executed using hardware with only 24-bit precision.
struct Final_Reshape_Counts : public StackObj {
  int  _call_count;             // count non-inlined 'common' calls
  int  _float_count;            // count float ops requiring 24-bit precision
  int  _double_count;           // count double ops requiring more precision
  int  _java_call_count;        // count non-inlined 'java' calls
  int  _inner_loop_count;       // count loops which need alignment
  VectorSet _visited;           // Visitation flags
  Node_List _tests;             // Set of IfNodes & PCTableNodes

  Final_Reshape_Counts() :
    _call_count(0), _float_count(0), _double_count(0),
    _java_call_count(0), _inner_loop_count(0),
    _visited( Thread::current()->resource_area() ) { }

  void inc_call_count  () { _call_count  ++; }
  void inc_float_count () { _float_count ++; }
  void inc_double_count() { _double_count++; }
  void inc_java_call_count() { _java_call_count++; }
  void inc_inner_loop_count() { _inner_loop_count++; }

  int  get_call_count  () const { return _call_count  ; }
  int  get_float_count () const { return _float_count ; }
  int  get_double_count() const { return _double_count; }
  int  get_java_call_count() const { return _java_call_count; }
  int  get_inner_loop_count() const { return _inner_loop_count; }
};

#ifdef ASSERT
static bool oop_offset_is_sane(const TypeInstPtr* tp) {
  ciInstanceKlass *k = tp->klass()->as_instance_klass();
  // Make sure the offset goes inside the instance layout.
  return k->contains_field_offset(tp->offset());
  // Note that OffsetBot and OffsetTop are very negative.
}
#endif

// Eliminate trivially redundant StoreCMs and accumulate their
// precedence edges.
void Compile::eliminate_redundant_card_marks(Node* n) {
  assert(n->Opcode() == Op_StoreCM, "expected StoreCM");
  if (n->in(MemNode::Address)->outcnt() > 1) {
    // There are multiple users of the same address so it might be
    // possible to eliminate some of the StoreCMs
    Node* mem = n->in(MemNode::Memory);
    Node* adr = n->in(MemNode::Address);
    Node* val = n->in(MemNode::ValueIn);
    Node* prev = n;
    bool done = false;
    // Walk the chain of StoreCMs eliminating ones that match.  As
    // long as it's a chain of single users then the optimization is
    // safe.  Eliminating partially redundant StoreCMs would require
    // cloning copies down the other paths.
    while (mem->Opcode() == Op_StoreCM && mem->outcnt() == 1 && !done) {
      if (adr == mem->in(MemNode::Address) &&
          val == mem->in(MemNode::ValueIn)) {
        // redundant StoreCM
        if (mem->req() > MemNode::OopStore) {
          // Hasn't been processed by this code yet.
          n->add_prec(mem->in(MemNode::OopStore));
        } else {
          // Already converted to precedence edge
          for (uint i = mem->req(); i < mem->len(); i++) {
            // Accumulate any precedence edges
            if (mem->in(i) != NULL) {
              n->add_prec(mem->in(i));
            }
          }
          // Everything above this point has been processed.
          done = true;
        }
        // Eliminate the previous StoreCM
        prev->set_req(MemNode::Memory, mem->in(MemNode::Memory));
        assert(mem->outcnt() == 0, "should be dead");
        mem->disconnect_inputs(NULL, this);
      } else {
        prev = mem;
      }
      mem = prev->in(MemNode::Memory);
    }
  }
}

//------------------------------final_graph_reshaping_impl----------------------
// Implement items 1-5 from final_graph_reshaping below.
void Compile::final_graph_reshaping_impl( Node *n, Final_Reshape_Counts &frc) {

  if ( n->outcnt() == 0 ) return; // dead node
  uint nop = n->Opcode();

  // Check for 2-input instruction with "last use" on right input.
  // Swap to left input.  Implements item (2).
  if( n->req() == 3 &&          // two-input instruction
      n->in(1)->outcnt() > 1 && // left use is NOT a last use
      (!n->in(1)->is_Phi() || n->in(1)->in(2) != n) && // it is not data loop
      n->in(2)->outcnt() == 1 &&// right use IS a last use
      !n->in(2)->is_Con() ) {   // right use is not a constant
    // Check for commutative opcode
    switch( nop ) {
    case Op_AddI:  case Op_AddF:  case Op_AddD:  case Op_AddL:
    case Op_MaxI:  case Op_MinI:
    case Op_MulI:  case Op_MulF:  case Op_MulD:  case Op_MulL:
    case Op_AndL:  case Op_XorL:  case Op_OrL:
    case Op_AndI:  case Op_XorI:  case Op_OrI: {
      // Move "last use" input to left by swapping inputs
      n->swap_edges(1, 2);
      break;
    }
    default:
      break;
    }
  }

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值