sssssssss74

最新推荐文章于 2024-07-24 08:48:18 发布
luosha112
最新推荐文章于 2024-07-24 08:48:18 发布
阅读量405
点赞数
分类专栏： jvm源码文章标签： jvm
本文链接：https://blog.csdn.net/aa111sadsa/article/details/134131903
版权
jvm源码专栏收录该内容
18 篇文章 0 订阅
订阅专栏

  // If x is a TypeNode, capture any more-precise type permanently into Node
  if (t != n->bottom_type()) {
    hash_delete(n);             // changing bottom type may force a rehash
    n->raise_bottom_type(t);
    _worklist.push(n);          // n re-enters the hash table via the worklist
  }

  // Idealize graph using DU info.  Must clone() into new-space.
  // DU info is generally used to show profitability, progress or safety
  // (but generally not needed for correctness).
  Node *nn = n->Ideal_DU_postCCP(this);

  // TEMPORARY fix to ensure that 2nd GVN pass eliminates NULL checks
  switch( n->Opcode() ) {
  case Op_FastLock:      // Revisit FastLocks for lock coarsening
  case Op_If:
  case Op_CountedLoopEnd:
  case Op_Region:
  case Op_Loop:
  case Op_CountedLoop:
  case Op_Conv2B:
  case Op_Opaque1:
  case Op_Opaque2:
    _worklist.push(n);
    break;
  default:
    break;
  }
  if( nn ) {
    _worklist.push(n);
    // Put users of 'n' onto worklist for second igvn transform
    add_users_to_worklist(n);
    return nn;
  }

  return  n;
}

//---------------------------------saturate------------------------------------
const Type* PhaseCCP::saturate(const Type* new_type, const Type* old_type,
                               const Type* limit_type) const {
  const Type* wide_type = new_type->widen(old_type, limit_type);
  if (wide_type != new_type) {          // did we widen?
    // If so, we may have widened beyond the limit type.  Clip it back down.
    new_type = wide_type->filter(limit_type);
  }
  return new_type;
}

//------------------------------print_statistics-------------------------------
#ifndef PRODUCT
void PhaseCCP::print_statistics() {
  tty->print_cr("CCP: %d  constants found: %d", _total_invokes, _total_constants);
}
#endif


//=============================================================================
#ifndef PRODUCT
uint PhasePeephole::_total_peepholes = 0;
#endif
//------------------------------PhasePeephole----------------------------------
// Conditional Constant Propagation, ala Wegman & Zadeck
PhasePeephole::PhasePeephole( PhaseRegAlloc *regalloc, PhaseCFG &cfg )
  : PhaseTransform(Peephole), _regalloc(regalloc), _cfg(cfg) {
  NOT_PRODUCT( clear_peepholes(); )
}

#ifndef PRODUCT
//------------------------------~PhasePeephole---------------------------------
PhasePeephole::~PhasePeephole() {
  _total_peepholes += count_peepholes();
}
#endif

//------------------------------transform--------------------------------------
Node *PhasePeephole::transform( Node *n ) {
  ShouldNotCallThis();
  return NULL;
}

//------------------------------do_transform-----------------------------------
void PhasePeephole::do_transform() {
  bool method_name_not_printed = true;

  // Examine each basic block
  for (uint block_number = 1; block_number < _cfg.number_of_blocks(); ++block_number) {
    Block* block = _cfg.get_block(block_number);
    bool block_not_printed = true;

    // and each instruction within a block
    uint end_index = block->number_of_nodes();
    // block->end_idx() not valid after PhaseRegAlloc
    for( uint instruction_index = 1; instruction_index < end_index; ++instruction_index ) {
      Node     *n = block->get_node(instruction_index);
      if( n->is_Mach() ) {
        MachNode *m = n->as_Mach();
        int deleted_count = 0;
        // check for peephole opportunities
        MachNode *m2 = m->peephole( block, instruction_index, _regalloc, deleted_count, C );
        if( m2 != NULL ) {
#ifndef PRODUCT
          if( PrintOptoPeephole ) {
            // Print method, first time only
            if( C->method() && method_name_not_printed ) {
              C->method()->print_short_name(); tty->cr();
              method_name_not_printed = false;
            }
            // Print this block
            if( Verbose && block_not_printed) {
              tty->print_cr("in block");
              block->dump();
              block_not_printed = false;
            }
            // Print instructions being deleted
            for( int i = (deleted_count - 1); i >= 0; --i ) {
              block->get_node(instruction_index-i)->as_Mach()->format(_regalloc); tty->cr();
            }
            tty->print_cr("replaced with");
            // Print new instruction
            m2->format(_regalloc);
            tty->print("\n\n");
          }
#endif
          // Remove old nodes from basic block and update instruction_index
          // (old nodes still exist and may have edges pointing to them
          //  as register allocation info is stored in the allocator using
          //  the node index to live range mappings.)
          uint safe_instruction_index = (instruction_index - deleted_count);
          for( ; (instruction_index > safe_instruction_index); --instruction_index ) {
            block->remove_node( instruction_index );
          }
          // install new node after safe_instruction_index
          block->insert_node(m2, safe_instruction_index + 1);
          end_index = block->number_of_nodes() - 1; // Recompute new block size
          NOT_PRODUCT( inc_peepholes(); )
        }
      }
    }
  }
}

//------------------------------print_statistics-------------------------------
#ifndef PRODUCT
void PhasePeephole::print_statistics() {
  tty->print_cr("Peephole: peephole rules applied: %d",  _total_peepholes);
}
#endif


//=============================================================================
//------------------------------set_req_X--------------------------------------
void Node::set_req_X( uint i, Node *n, PhaseIterGVN *igvn ) {
  assert( is_not_dead(n), "can not use dead node");
  assert( igvn->hash_find(this) != this, "Need to remove from hash before changing edges" );
  Node *old = in(i);
  set_req(i, n);

  // old goes dead?
  if( old ) {
    switch (old->outcnt()) {
    case 0:
      // Put into the worklist to kill later. We do not kill it now because the
      // recursive kill will delete the current node (this) if dead-loop exists
      if (!old->is_top())
        igvn->_worklist.push( old );
      break;
    case 1:
      if( old->is_Store() || old->has_special_unique_user() )
        igvn->add_users_to_worklist( old );
      break;
    case 2:
      if( old->is_Store() )
        igvn->add_users_to_worklist( old );
      if( old->Opcode() == Op_Region )
        igvn->_worklist.push(old);
      break;
    case 3:
      if( old->Opcode() == Op_Region ) {
        igvn->_worklist.push(old);
        igvn->add_users_to_worklist( old );
      }
      break;
    default:
      break;
    }
  }

}

//-------------------------------replace_by-----------------------------------
// Using def-use info, replace one node for another.  Follow the def-use info
// to all users of the OLD node.  Then make all uses point to the NEW node.
void Node::replace_by(Node *new_node) {
  assert(!is_top(), "top node has no DU info");
  for (DUIterator_Last imin, i = last_outs(imin); i >= imin; ) {
    Node* use = last_out(i);
    uint uses_found = 0;
    for (uint j = 0; j < use->len(); j++) {
      if (use->in(j) == this) {
        if (j < use->req())
              use->set_req(j, new_node);
        else  use->set_prec(j, new_node);
        uses_found++;
      }
    }
    i -= uses_found;    // we deleted 1 or more copies of this edge
  }
}

//=============================================================================
//-----------------------------------------------------------------------------
void Type_Array::grow( uint i ) {
  if( !_max ) {
    _max = 1;
    _types = (const Type**)_a->Amalloc( _max * sizeof(Type*) );
    _types[0] = NULL;
  }
  uint old = _max;
  while( i >= _max ) _max <<= 1;        // Double to fit
  _types = (const Type**)_a->Arealloc( _types, old*sizeof(Type*),_max*sizeof(Type*));
  memset( &_types[old], 0, (_max-old)*sizeof(Type*) );
}

//------------------------------dump-------------------------------------------
#ifndef PRODUCT
void Type_Array::dump() const {
  uint max = Size();
  for( uint i = 0; i < max; i++ ) {
    if( _types[i] != NULL ) {
      tty->print("  %d\t== ", i); _types[i]->dump(); tty->cr();
    }
  }
}
#endif
C:\hotspot-69087d08d473\src\share\vm/opto/phaseX.hpp
/*
 * Copyright (c) 1997, 2017, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 *
 */

#ifndef SHARE_VM_OPTO_PHASEX_HPP
#define SHARE_VM_OPTO_PHASEX_HPP

#include "libadt/dict.hpp"
#include "libadt/vectset.hpp"
#include "memory/resourceArea.hpp"
#include "opto/memnode.hpp"
#include "opto/node.hpp"
#include "opto/phase.hpp"
#include "opto/type.hpp"

class Compile;
class ConINode;
class ConLNode;
class Node;
class Type;
class PhaseTransform;
class   PhaseGVN;
class     PhaseIterGVN;
class       PhaseCCP;
class   PhasePeephole;
class   PhaseRegAlloc;


//-----------------------------------------------------------------------------
// Expandable closed hash-table of nodes, initialized to NULL.
// Note that the constructor just zeros things
// Storage is reclaimed when the Arena's lifetime is over.
class NodeHash : public StackObj {
protected:
  Arena *_a;                    // Arena to allocate in
  uint   _max;                  // Size of table (power of 2)
  uint   _inserts;              // For grow and debug, count of hash_inserts
  uint   _insert_limit;         // 'grow' when _inserts reaches _insert_limit
  Node **_table;                // Hash table of Node pointers
  Node  *_sentinel;             // Replaces deleted entries in hash table

public:
  NodeHash(uint est_max_size);
  NodeHash(Arena *arena, uint est_max_size);
  NodeHash(NodeHash *use_this_state);
#ifdef ASSERT
  ~NodeHash();                  // Unlock all nodes upon destruction of table.
  void operator=(const NodeHash&); // Unlock all nodes upon replacement of table.
#endif
  Node  *hash_find(const Node*);// Find an equivalent version in hash table
  Node  *hash_find_insert(Node*);// If not in table insert else return found node
  void   hash_insert(Node*);    // Insert into hash table
  bool   hash_delete(const Node*);// Replace with _sentinel in hash table
  void   check_grow() {
    _inserts++;
    if( _inserts == _insert_limit ) { grow(); }
    assert( _inserts <= _insert_limit, "hash table overflow");
    assert( _inserts < _max, "hash table overflow" );
  }
  static uint round_up(uint);   // Round up to nearest power of 2
  void   grow();                // Grow _table to next power of 2 and rehash
  // Return 75% of _max, rounded up.
  uint   insert_limit() const { return _max - (_max>>2); }

  void   clear();               // Set all entries to NULL, keep storage.
  // Size of hash table
  uint   size()         const { return _max; }
  // Return Node* at index in table
  Node  *at(uint table_index) {
    assert(table_index < _max, "Must be within table");
    return _table[table_index];
  }

  void   remove_useless_nodes(VectorSet &useful); // replace with sentinel
  void   replace_with(NodeHash* nh);
  void   check_no_speculative_types(); // Check no speculative part for type nodes in table

  Node  *sentinel() { return _sentinel; }

#ifndef PRODUCT
  Node  *find_index(uint idx);  // For debugging
  void   dump();                // For debugging, dump statistics
#endif
  uint   _grows;                // For debugging, count of table grow()s
  uint   _look_probes;          // For debugging, count of hash probes
  uint   _lookup_hits;          // For debugging, count of hash_finds
  uint   _lookup_misses;        // For debugging, count of hash_finds
  uint   _insert_probes;        // For debugging, count of hash probes
  uint   _delete_probes;        // For debugging, count of hash probes for deletes
  uint   _delete_hits;          // For debugging, count of hash probes for deletes
  uint   _delete_misses;        // For debugging, count of hash probes for deletes
  uint   _total_inserts;        // For debugging, total inserts into hash table
  uint   _total_insert_probes;  // For debugging, total probes while inserting
};


//-----------------------------------------------------------------------------
// Map dense integer indices to Types.  Uses classic doubling-array trick.
// Abstractly provides an infinite array of Type*'s, initialized to NULL.
// Note that the constructor just zeros things, and since I use Arena
// allocation I do not need a destructor to reclaim storage.
// Despite the general name, this class is customized for use by PhaseTransform.
class Type_Array : public StackObj {
  Arena *_a;                    // Arena to allocate in
  uint   _max;
  const Type **_types;
  void grow( uint i );          // Grow array node to fit
  const Type *operator[] ( uint i ) const // Lookup, or NULL for not mapped
  { return (i<_max) ? _types[i] : (Type*)NULL; }
  friend class PhaseTransform;
public:
  Type_Array(Arena *a) : _a(a), _max(0), _types(0) {}
  Type_Array(Type_Array *ta) : _a(ta->_a), _max(ta->_max), _types(ta->_types) { }
  const Type *fast_lookup(uint i) const{assert(i<_max,"oob");return _types[i];}
  // Extend the mapping: index i maps to Type *n.
  void map( uint i, const Type *n ) { if( i>=_max ) grow(i); _types[i] = n; }
  uint Size() const { return _max; }
#ifndef PRODUCT
  void dump() const;
#endif
};


//------------------------------PhaseRemoveUseless-----------------------------
// Remove useless nodes from GVN hash-table, worklist, and graph
class PhaseRemoveUseless : public Phase {
protected:
  Unique_Node_List _useful;   // Nodes reachable from root
                              // list is allocated from current resource area
public:
  PhaseRemoveUseless(PhaseGVN *gvn, Unique_Node_List *worklist, PhaseNumber phase_num = Remove_Useless);

  Unique_Node_List *get_useful() { return &_useful; }
};

//------------------------------PhaseRenumber----------------------------------
// Phase that first performs a PhaseRemoveUseless, then it renumbers compiler
// structures accordingly.
class PhaseRenumberLive : public PhaseRemoveUseless {
public:
  PhaseRenumberLive(PhaseGVN* gvn,
                    Unique_Node_List* worklist, Unique_Node_List* new_worklist,
                    PhaseNumber phase_num = Remove_Useless_And_Renumber_Live);
};


//------------------------------PhaseTransform---------------------------------
// Phases that analyze, then transform.  Constructing the Phase object does any
// global or slow analysis.  The results are cached later for a fast
// transformation pass.  When the Phase object is deleted the cached analysis
// results are deleted.
class PhaseTransform : public Phase {
protected:
  Arena*     _arena;
  Node_List  _nodes;           // Map old node indices to new nodes.
  Type_Array _types;           // Map old node indices to Types.

  // ConNode caches:
  enum { _icon_min = -1 * HeapWordSize,
         _icon_max = 16 * HeapWordSize,
         _lcon_min = _icon_min,
         _lcon_max = _icon_max,
         _zcon_max = (uint)T_CONFLICT
  };
  ConINode* _icons[_icon_max - _icon_min + 1];   // cached jint constant nodes
  ConLNode* _lcons[_lcon_max - _lcon_min + 1];   // cached jlong constant nodes
  ConNode*  _zcons[_zcon_max + 1];               // cached is_zero_type nodes
  void init_con_caches();

  // Support both int and long caches because either might be an intptr_t,
  // so they show up frequently in address computations.

public:
  PhaseTransform( PhaseNumber pnum );
  PhaseTransform( Arena *arena, PhaseNumber pnum );
  PhaseTransform( PhaseTransform *phase, PhaseNumber pnum );

  Arena*      arena()   { return _arena; }
  Type_Array& types()   { return _types; }
  void replace_types(Type_Array new_types) {
    _types = new_types;
  }
  // _nodes is used in varying ways by subclasses, which define local accessors
  uint nodes_size() {
    return _nodes.size();
  }

public:
  // Get a previously recorded type for the node n.
  // This type must already have been recorded.
  // If you want the type of a very new (untransformed) node,
  // you must use type_or_null, and test the result for NULL.
  const Type* type(const Node* n) const {
    assert(n != NULL, "must not be null");
    const Type* t = _types.fast_lookup(n->_idx);
    assert(t != NULL, "must set before get");
    return t;
  }
  // Get a previously recorded type for the node n,
  // or else return NULL if there is none.
  const Type* type_or_null(const Node* n) const {
    return _types.fast_lookup(n->_idx);
  }
  // Record a type for a node.
  void    set_type(const Node* n, const Type *t) {
    assert(t != NULL, "type must not be null");
    _types.map(n->_idx, t);
  }
  // Record an initial type for a node, the node's bottom type.
  void    set_type_bottom(const Node* n) {
    // Use this for initialization when bottom_type() (or better) is not handy.
    // Usually the initialization shoudl be to n->Value(this) instead,
    // or a hand-optimized value like Type::MEMORY or Type::CONTROL.
    assert(_types[n->_idx] == NULL, "must set the initial type just once");
    _types.map(n->_idx, n->bottom_type());
  }
  // Make sure the types array is big enough to record a size for the node n.
  // (In product builds, we never want to do range checks on the types array!)
  void ensure_type_or_null(const Node* n) {
    if (n->_idx >= _types.Size())
      _types.map(n->_idx, NULL);   // Grow the types array as needed.
  }

  // Utility functions:
  const TypeInt*  find_int_type( Node* n);
  const TypeLong* find_long_type(Node* n);
  jint  find_int_con( Node* n, jint  value_if_unknown) {
    const TypeInt* t = find_int_type(n);
    return (t != NULL && t->is_con()) ? t->get_con() : value_if_unknown;
  }
  jlong find_long_con(Node* n, jlong value_if_unknown) {
    const TypeLong* t = find_long_type(n);
    return (t != NULL && t->is_con()) ? t->get_con() : value_if_unknown;
  }

  // Make an idealized constant, i.e., one of ConINode, ConPNode, ConFNode, etc.
  // Same as transform(ConNode::make(t)).
  ConNode* makecon(const Type* t);
  virtual ConNode* uncached_makecon(const Type* t)  // override in PhaseValues
  { ShouldNotCallThis(); return NULL; }

  // Fast int or long constant.  Same as TypeInt::make(i) or TypeLong::make(l).
  ConINode* intcon(jint i);
  ConLNode* longcon(jlong l);

  // Fast zero or null constant.  Same as makecon(Type::get_zero_type(bt)).
  ConNode* zerocon(BasicType bt);

  // Return a node which computes the same function as this node, but
  // in a faster or cheaper fashion.
  virtual Node *transform( Node *n ) = 0;

  // Return whether two Nodes are equivalent.
  // Must not be recursive, since the recursive version is built from this.
  // For pessimistic optimizations this is simply pointer equivalence.
  bool eqv(const Node* n1, const Node* n2) const { return n1 == n2; }

  // For pessimistic passes, the return type must monotonically narrow.
  // For optimistic  passes, the return type must monotonically widen.
  // It is possible to get into a "death march" in either type of pass,
  // where the types are continually moving but it will take 2**31 or
  // more steps to converge.  This doesn't happen on most normal loops.
  //
  // Here is an example of a deadly loop for an optimistic pass, along
  // with a partial trace of inferred types:
  //    x = phi(0,x'); L: x' = x+1; if (x' >= 0) goto L;
  //    0                 1                join([0..max], 1)
  //    [0..1]            [1..2]           join([0..max], [1..2])
  //    [0..2]            [1..3]           join([0..max], [1..3])
  //      ... ... ...
  //    [0..max]          [min]u[1..max]   join([0..max], [min..max])
  //    [0..max] ==> fixpoint
  // We would have proven, the hard way, that the iteration space is all
  // non-negative ints, with the loop terminating due to 32-bit overflow.
  //
  // Here is the corresponding example for a pessimistic pass:
  //    x = phi(0,x'); L: x' = x-1; if (x' >= 0) goto L;
  //    int               int              join([0..max], int)
  //    [0..max]          [-1..max-1]      join([0..max], [-1..max-1])
  //    [0..max-1]        [-1..max-2]      join([0..max], [-1..max-2])
  //      ... ... ...
  //    [0..1]            [-1..0]          join([0..max], [-1..0])
  //    0                 -1               join([0..max], -1)
  //    0 == fixpoint
  // We would have proven, the hard way, that the iteration space is {0}.
  // (Usually, other optimizations will make the "if (x >= 0)" fold up
  // before we get into trouble.  But not always.)
  //
  // It's a pleasant thing to observe that the pessimistic pass
  // will make short work of the optimistic pass's deadly loop,
  // and vice versa.  That is a good example of the complementary
  // purposes of the CCP (optimistic) vs. GVN (pessimistic) phases.
  //
  // In any case, only widen or narrow a few times before going to the
  // correct flavor of top or bottom.
  //
  // This call only needs to be made once as the data flows around any
  // given cycle.  We do it at Phis, and nowhere else.
  // The types presented are the new type of a phi (computed by PhiNode::Value)
  // and the previously computed type, last time the phi was visited.
  //
  // The third argument is upper limit for the saturated value,
  // if the phase wishes to widen the new_type.
  // If the phase is narrowing, the old type provides a lower limit.
  // Caller guarantees that old_type and new_type are no higher than limit_type.
  virtual const Type* saturate(const Type* new_type, const Type* old_type,
                               const Type* limit_type) const
  { ShouldNotCallThis(); return NULL; }

  virtual PhaseIterGVN *is_IterGVN() { return 0; }

#ifndef PRODUCT
  void dump_old2new_map() const;
  void dump_new( uint new_lidx ) const;
  void dump_types() const;
  void dump_nodes_and_types(const Node *root, uint depth, bool only_ctrl = true);
  void dump_nodes_and_types_recur( const Node *n, uint depth, bool only_ctrl, VectorSet &visited);

  uint   _count_progress;       // For profiling, count transforms that make progress
  void   set_progress()        { ++_count_progress; assert( allow_progress(),"No progress allowed during verification"); }
  void   clear_progress()      { _count_progress = 0; }
  uint   made_progress() const { return _count_progress; }

  uint   _count_transforms;     // For profiling, count transforms performed
  void   set_transforms()      { ++_count_transforms; }
  void   clear_transforms()    { _count_transforms = 0; }
  uint   made_transforms() const{ return _count_transforms; }

  bool   _allow_progress;      // progress not allowed during verification pass
  void   set_allow_progress(bool allow) { _allow_progress = allow; }
  bool   allow_progress()               { return _allow_progress; }
#endif
};

//------------------------------PhaseValues------------------------------------
// Phase infrastructure to support values
class PhaseValues : public PhaseTransform {
protected:
  NodeHash  _table;             // Hash table for value-numbering

public:
  PhaseValues( Arena *arena, uint est_max_size );
  PhaseValues( PhaseValues *pt );
  PhaseValues( PhaseValues *ptv, const char *dummy );
  NOT_PRODUCT( ~PhaseValues(); )
  virtual PhaseIterGVN *is_IterGVN() { return 0; }

  // Some Ideal and other transforms delete --> modify --> insert values
  bool   hash_delete(Node *n)     { return _table.hash_delete(n); }
  void   hash_insert(Node *n)     { _table.hash_insert(n); }
  Node  *hash_find_insert(Node *n){ return _table.hash_find_insert(n); }
  Node  *hash_find(const Node *n) { return _table.hash_find(n); }

  // Used after parsing to eliminate values that are no longer in program
  void   remove_useless_nodes(VectorSet &useful) {
    _table.remove_useless_nodes(useful);
    // this may invalidate cached cons so reset the cache
    init_con_caches();
  }

  virtual ConNode* uncached_makecon(const Type* t);  // override from PhaseTransform

  virtual const Type* saturate(const Type* new_type, const Type* old_type,
                               const Type* limit_type) const
  { return new_type; }

#ifndef PRODUCT
  uint   _count_new_values;     // For profiling, count new values produced
  void    inc_new_values()        { ++_count_new_values; }
  void    clear_new_values()      { _count_new_values = 0; }
  uint    made_new_values() const { return _count_new_values; }
#endif
};


//------------------------------PhaseGVN---------------------------------------
// Phase for performing local, pessimistic GVN-style optimizations.
class PhaseGVN : public PhaseValues {
public:
  PhaseGVN( Arena *arena, uint est_max_size ) : PhaseValues( arena, est_max_size ) {}
  PhaseGVN( PhaseGVN *gvn ) : PhaseValues( gvn ) {}
  PhaseGVN( PhaseGVN *gvn, const char *dummy ) : PhaseValues( gvn, dummy ) {}

  // Return a node which computes the same function as this node, but
  // in a faster or cheaper fashion.
  Node  *transform( Node *n );
  Node  *transform_no_reclaim( Node *n );

  void replace_with(PhaseGVN* gvn) {
    _table.replace_with(&gvn->_table);
    _types = gvn->_types;
  }

  // Check for a simple dead loop when a data node references itself.
  DEBUG_ONLY(void dead_loop_check(Node *n);)
};

//------------------------------PhaseIterGVN-----------------------------------
// Phase for iteratively performing local, pessimistic GVN-style optimizations.
// and ideal transformations on the graph.
class PhaseIterGVN : public PhaseGVN {
private:
  bool _delay_transform;  // When true simply register the node when calling transform
                          // instead of actually optimizing it

  // Idealize old Node 'n' with respect to its inputs and its value
  virtual Node *transform_old( Node *a_node );

  // Subsume users of node 'old' into node 'nn'
  void subsume_node( Node *old, Node *nn );

  Node_Stack _stack;      // Stack used to avoid recursion

protected:

  // Idealize new Node 'n' with respect to its inputs and its value
  virtual Node *transform( Node *a_node );

  // Warm up hash table, type table and initial worklist
  void init_worklist( Node *a_root );

  virtual const Type* saturate(const Type* new_type, const Type* old_type,
                               const Type* limit_type) const;
  // Usually returns new_type.  Returns old_type if new_type is only a slight
  // improvement, such that it would take many (>>10) steps to reach 2**32.

public:
  PhaseIterGVN( PhaseIterGVN *igvn ); // Used by CCP constructor
  PhaseIterGVN( PhaseGVN *gvn ); // Used after Parser
  PhaseIterGVN( PhaseIterGVN *igvn, const char *dummy ); // Used after +VerifyOpto

  virtual PhaseIterGVN *is_IterGVN() { return this; }

  Unique_Node_List _worklist;       // Iterative worklist

  // Given def-use info and an initial worklist, apply Node::Ideal,
  // Node::Value, Node::Identity, hash-based value numbering, Node::Ideal_DU
  // and dominator info to a fixed point.
  void optimize();

  // Register a new node with the iter GVN pass without transforming it.
  // Used when we need to restructure a Region/Phi area and all the Regions
  // and Phis need to complete this one big transform before any other
  // transforms can be triggered on the region.
  // Optional 'orig' is an earlier version of this node.
  // It is significant only for debugging and profiling.
  Node* register_new_node_with_optimizer(Node* n, Node* orig = NULL);

  // Kill a globally dead Node.  All uses are also globally dead and are
  // aggressively trimmed.
  void remove_globally_dead_node( Node *dead );

  // Kill all inputs to a dead node, recursively making more dead nodes.
  // The Node must be dead locally, i.e., have no uses.
  void remove_dead_node( Node *dead ) {
    assert(dead->outcnt() == 0 && !dead->is_top(), "node must be dead");
    remove_globally_dead_node(dead);
  }

  // Add users of 'n' to worklist
  void add_users_to_worklist0( Node *n );
  void add_users_to_worklist ( Node *n );

  // Replace old node with new one.
  void replace_node( Node *old, Node *nn ) {
    add_users_to_worklist(old);
    hash_delete(old); // Yank from hash before hacking edges
    subsume_node(old, nn);
  }

  // Delayed node rehash: remove a node from the hash table and rehash it during
  // next optimizing pass
  void rehash_node_delayed(Node* n) {
    hash_delete(n);
    _worklist.push(n);
  }

  // Replace ith edge of "n" with "in"
  void replace_input_of(Node* n, int i, Node* in) {
    rehash_node_delayed(n);
    n->set_req(i, in);
  }

  // Delete ith edge of "n"
  void delete_input_of(Node* n, int i) {
    rehash_node_delayed(n);
    n->del_req(i);
  }

  bool delay_transform() const { return _delay_transform; }

  void set_delay_transform(bool delay) {
    _delay_transform = delay;
  }

  // Clone loop predicates. Defined in loopTransform.cpp.
  Node* clone_loop_predicates(Node* old_entry, Node* new_entry, bool clone_limit_check);
  // Create a new if below new_entry for the predicate to be cloned
  ProjNode* create_new_if_for_predicate(ProjNode* cont_proj, Node* new_entry,
                                        Deoptimization::DeoptReason reason);

  void remove_speculative_types();
  void check_no_speculative_types() {
    _table.check_no_speculative_types();
  }

#ifndef PRODUCT
protected:
  // Sub-quadratic implementation of VerifyIterativeGVN.
  julong _verify_counter;
  julong _verify_full_passes;
  enum { _verify_window_size = 30 };
  Node* _verify_window[_verify_window_size];
  void verify_step(Node* n);
#endif
};

//------------------------------PhaseCCP---------------------------------------
// Phase for performing global Conditional Constant Propagation.
// Should be replaced with combined CCP & GVN someday.
class PhaseCCP : public PhaseIterGVN {
  // Non-recursive.  Use analysis to transform single Node.
  virtual Node *transform_once( Node *n );

public:
  PhaseCCP( PhaseIterGVN *igvn ); // Compute conditional constants
  NOT_PRODUCT( ~PhaseCCP(); )

  // Worklist algorithm identifies constants
  void analyze();
  // Recursive traversal of program.  Used analysis to modify program.
  virtual Node *transform( Node *n );
  // Do any transformation after analysis
  void          do_transform();

  virtual const Type* saturate(const Type* new_type, const Type* old_type,
                               const Type* limit_type) const;
  // Returns new_type->widen(old_type), which increments the widen bits until
  // giving up with TypeInt::INT or TypeLong::LONG.
  // Result is clipped to limit_type if necessary.

#ifndef PRODUCT
  static uint _total_invokes;    // For profiling, count invocations
  void    inc_invokes()          { ++PhaseCCP::_total_invokes; }

  static uint _total_constants;  // For profiling, count constants found
  uint   _count_constants;
  void    clear_constants()      { _count_constants = 0; }
  void    inc_constants()        { ++_count_constants; }
  uint    count_constants() const { return _count_constants; }

  static void print_statistics();
#endif
};


//------------------------------PhasePeephole----------------------------------
// Phase for performing peephole optimizations on register allocated basic blocks.
class PhasePeephole : public PhaseTransform {
  PhaseRegAlloc *_regalloc;
  PhaseCFG     &_cfg;
  // Recursive traversal of program.  Pure function is unused in this phase
  virtual Node *transform( Node *n );

public:
  PhasePeephole( PhaseRegAlloc *regalloc, PhaseCFG &cfg );
  NOT_PRODUCT( ~PhasePeephole(); )

  // Do any transformation after analysis
  void          do_transform();

#ifndef PRODUCT
  static uint _total_peepholes;  // For profiling, count peephole rules applied
  uint   _count_peepholes;
  void    clear_peepholes()      { _count_peepholes = 0; }
  void    inc_peepholes()        { ++_count_peepholes; }
  uint    count_peepholes() const { return _count_peepholes; }

  static void print_statistics();
#endif
};

#endif // SHARE_VM_OPTO_PHASEX_HPP
C:\hotspot-69087d08d473\src\share\vm/opto/postaloc.cpp
/*
 * Copyright (c) 1998, 2013, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 *
 */

#include "precompiled.hpp"
#include "memory/allocation.inline.hpp"
#include "opto/chaitin.hpp"
#include "opto/machnode.hpp"

// See if this register (or pairs, or vector) already contains the value.
static bool register_contains_value(Node* val, OptoReg::Name reg, int n_regs,
                                    Node_List& value) {
  for (int i = 0; i < n_regs; i++) {
    OptoReg::Name nreg = OptoReg::add(reg,-i);
    if (value[nreg] != val)
      return false;
  }
  return true;
}

//---------------------------may_be_copy_of_callee-----------------------------
// Check to see if we can possibly be a copy of a callee-save value.
bool PhaseChaitin::may_be_copy_of_callee( Node *def ) const {
  // Short circuit if there are no callee save registers
  if (_matcher.number_of_saved_registers() == 0) return false;

  // Expect only a spill-down and reload on exit for callee-save spills.
  // Chains of copies cannot be deep.
  // 5008997 - This is wishful thinking. Register allocator seems to
  // be splitting live ranges for callee save registers to such
  // an extent that in large methods the chains can be very long
  // (50+). The conservative answer is to return true if we don't
  // know as this prevents optimizations from occurring.

  const int limit = 60;
  int i;
  for( i=0; i < limit; i++ ) {
    if( def->is_Proj() && def->in(0)->is_Start() &&
        _matcher.is_save_on_entry(lrgs(_lrg_map.live_range_id(def)).reg()))
      return true;              // Direct use of callee-save proj
    if( def->is_Copy() )        // Copies carry value through
      def = def->in(def->is_Copy());
    else if( def->is_Phi() )    // Phis can merge it from any direction
      def = def->in(1);
    else
      break;
    guarantee(def != NULL, "must not resurrect dead copy");
  }
  // If we reached the end and didn't find a callee save proj
  // then this may be a callee save proj so we return true
  // as the conservative answer. If we didn't reach then end
  // we must have discovered that it was not a callee save
  // else we would have returned.
  return i == limit;
}

//------------------------------yank-----------------------------------
// Helper function for yank_if_dead
int PhaseChaitin::yank( Node *old, Block *current_block, Node_List *value, Node_List *regnd ) {
  int blk_adjust=0;
  Block *oldb = _cfg.get_block_for_node(old);
  oldb->find_remove(old);
  // Count 1 if deleting an instruction from the current block
  if (oldb == current_block) {
    blk_adjust++;
  }
  _cfg.unmap_node_from_block(old);
  OptoReg::Name old_reg = lrgs(_lrg_map.live_range_id(old)).reg();
  if( regnd && (*regnd)[old_reg]==old ) { // Instruction is currently available?
    value->map(old_reg,NULL);  // Yank from value/regnd maps
    regnd->map(old_reg,NULL);  // This register's value is now unknown
  }
  return blk_adjust;
}

#ifdef ASSERT
static bool expected_yanked_node(Node *old, Node *orig_old) {
  // This code is expected only next original nodes:
  // - load from constant table node which may have next data input nodes:
  //     MachConstantBase, MachTemp, MachSpillCopy
  // - Phi nodes that are considered Junk
  // - load constant node which may have next data input nodes:
  //     MachTemp, MachSpillCopy
  // - MachSpillCopy
  // - MachProj and Copy dead nodes
  if (old->is_MachSpillCopy()) {
    return true;
  } else if (old->is_Con()) {
    return true;
  } else if (old->is_MachProj()) { // Dead kills projection of Con node
    return (old == orig_old);
  } else if (old->is_Copy()) {     // Dead copy of a callee-save value
    return (old == orig_old);
  } else if (old->is_MachTemp()) {
    return orig_old->is_Con();
  } else if (old->is_Phi()) { // Junk phi's
    return true;
  } else if (old->is_MachConstantBase()) {
    return (orig_old->is_Con() && orig_old->is_MachConstant());
  }
  return false;
}
#endif

//------------------------------yank_if_dead-----------------------------------
// Removed edges from 'old'.  Yank if dead.  Return adjustment counts to
// iterators in the current block.
int PhaseChaitin::yank_if_dead_recurse(Node *old, Node *orig_old, Block *current_block,
                                       Node_List *value, Node_List *regnd) {
  int blk_adjust=0;
  if (old->outcnt() == 0 && old != C->top()) {
#ifdef ASSERT
    if (!expected_yanked_node(old, orig_old)) {
      tty->print_cr("==============================================");
      tty->print_cr("orig_old:");
      orig_old->dump();
      tty->print_cr("old:");
      old->dump();
      assert(false, "unexpected yanked node");
    }
    if (old->is_Con())
      orig_old = old; // Reset to satisfy expected nodes checks.
#endif
    blk_adjust += yank(old, current_block, value, regnd);

    for (uint i = 1; i < old->req(); i++) {
      Node* n = old->in(i);
      if (n != NULL) {
        old->set_req(i, NULL);
        blk_adjust += yank_if_dead_recurse(n, orig_old, current_block, value, regnd);
      }
    }
    // Disconnect control and remove precedence edges if any exist
    old->disconnect_inputs(NULL, C);
  }
  return blk_adjust;
}

//------------------------------use_prior_register-----------------------------
// Use the prior value instead of the current value, in an effort to make
// the current value go dead.  Return block iterator adjustment, in case
// we yank some instructions from this block.
int PhaseChaitin::use_prior_register( Node *n, uint idx, Node *def, Block *current_block, Node_List &value, Node_List &regnd ) {
  // No effect?
  if( def == n->in(idx) ) return 0;
  // Def is currently dead and can be removed?  Do not resurrect
  if( def->outcnt() == 0 ) return 0;

  // Not every pair of physical registers are assignment compatible,
  // e.g. on sparc floating point registers are not assignable to integer
  // registers.
  const LRG &def_lrg = lrgs(_lrg_map.live_range_id(def));
  OptoReg::Name def_reg = def_lrg.reg();
  const RegMask &use_mask = n->in_RegMask(idx);
  bool can_use = ( RegMask::can_represent(def_reg) ? (use_mask.Member(def_reg) != 0)
                                                   : (use_mask.is_AllStack() != 0));
  if (!RegMask::is_vector(def->ideal_reg())) {
    // Check for a copy to or from a misaligned pair.
    // It is workaround for a sparc with misaligned pairs.
    can_use = can_use && !use_mask.is_misaligned_pair() && !def_lrg.mask().is_misaligned_pair();
  }
  if (!can_use)
    return 0;

  // Capture the old def in case it goes dead...
  Node *old = n->in(idx);

  // Save-on-call copies can only be elided if the entire copy chain can go
  // away, lest we get the same callee-save value alive in 2 locations at
  // once.  We check for the obvious trivial case here.  Although it can
  // sometimes be elided with cooperation outside our scope, here we will just
  // miss the opportunity.  :-(
  if( may_be_copy_of_callee(def) ) {
    if( old->outcnt() > 1 ) return 0; // We're the not last user
    int idx = old->is_Copy();
    assert( idx, "chain of copies being removed" );
    Node *old2 = old->in(idx);  // Chain of copies
    if( old2->outcnt() > 1 ) return 0; // old is not the last user
    int idx2 = old2->is_Copy();
    if( !idx2 ) return 0;       // Not a chain of 2 copies
    if( def != old2->in(idx2) ) return 0; // Chain of exactly 2 copies
  }

  // Use the new def
  n->set_req(idx,def);
  _post_alloc++;

  // Is old def now dead?  We successfully yanked a copy?
  return yank_if_dead(old,current_block,&value,&regnd);
}


//------------------------------skip_copies------------------------------------
// Skip through any number of copies (that don't mod oop-i-ness)
Node *PhaseChaitin::skip_copies( Node *c ) {
  int idx = c->is_Copy();
  uint is_oop = lrgs(_lrg_map.live_range_id(c))._is_oop;
  while (idx != 0) {
    guarantee(c->in(idx) != NULL, "must not resurrect dead copy");
    if (lrgs(_lrg_map.live_range_id(c->in(idx)))._is_oop != is_oop) {
      break;  // casting copy, not the same value
    }
    c = c->in(idx);
    idx = c->is_Copy();
  }
  return c;
}

//------------------------------elide_copy-------------------------------------
// Remove (bypass) copies along Node n, edge k.
int PhaseChaitin::elide_copy( Node *n, int k, Block *current_block, Node_List &value, Node_List &regnd, bool can_change_regs ) {
  int blk_adjust = 0;

  uint nk_idx = _lrg_map.live_range_id(n->in(k));
  OptoReg::Name nk_reg = lrgs(nk_idx).reg();

  // Remove obvious same-register copies
  Node *x = n->in(k);
  int idx;
  while( (idx=x->is_Copy()) != 0 ) {
    Node *copy = x->in(idx);
    guarantee(copy != NULL, "must not resurrect dead copy");
    if(lrgs(_lrg_map.live_range_id(copy)).reg() != nk_reg) {
      break;
    }
    blk_adjust += use_prior_register(n,k,copy,current_block,value,regnd);
    if (n->in(k) != copy) {
      break; // Failed for some cutout?
    }
    x = copy;                   // Progress, try again
  }

  // Phis and 2-address instructions cannot change registers so easily - their
  // outputs must match their input.
  if( !can_change_regs )
    return blk_adjust;          // Only check stupid copies!

  // Loop backedges won't have a value-mapping yet
  if( &value == NULL ) return blk_adjust;

  // Skip through all copies to the _value_ being used.  Do not change from
  // int to pointer.  This attempts to jump through a chain of copies, where
  // intermediate copies might be illegal, i.e., value is stored down to stack
  // then reloaded BUT survives in a register the whole way.
  Node *val = skip_copies(n->in(k));
  if (val == x) return blk_adjust; // No progress?

  int n_regs = RegMask::num_registers(val->ideal_reg());
  uint val_idx = _lrg_map.live_range_id(val);
  OptoReg::Name val_reg = lrgs(val_idx).reg();

  // See if it happens to already be in the correct register!
  // (either Phi's direct register, or the common case of the name
  // never-clobbered original-def register)
  if (register_contains_value(val, val_reg, n_regs, value)) {
    blk_adjust += use_prior_register(n,k,regnd[val_reg],current_block,value,regnd);
    if( n->in(k) == regnd[val_reg] ) // Success!  Quit trying
      return blk_adjust;
  }

  // See if we can skip the copy by changing registers.  Don't change from
  // using a register to using the stack unless we know we can remove a
  // copy-load.  Otherwise we might end up making a pile of Intel cisc-spill
  // ops reading from memory instead of just loading once and using the
  // register.

  // Also handle duplicate copies here.
  const Type *t = val->is_Con() ? val->bottom_type() : NULL;

  // Scan all registers to see if this value is around already
  for( uint reg = 0; reg < (uint)_max_reg; reg++ ) {
    if (reg == (uint)nk_reg) {
      // Found ourselves so check if there is only one user of this
      // copy and keep on searching for a better copy if so.
      bool ignore_self = true;
      x = n->in(k);
      DUIterator_Fast imax, i = x->fast_outs(imax);
      Node* first = x->fast_out(i); i++;
      while (i < imax && ignore_self) {
        Node* use = x->fast_out(i); i++;
        if (use != first) ignore_self = false;
      }
      if (ignore_self) continue;
    }

    Node *vv = value[reg];
    if (n_regs > 1) { // Doubles and vectors check for aligned-adjacent set
      uint last = (n_regs-1); // Looking for the last part of a set
      if ((reg&last) != last) continue; // Wrong part of a set
      if (!register_contains_value(vv, reg, n_regs, value)) continue; // Different value
    }
    if( vv == val ||            // Got a direct hit?
        (t && vv && vv->bottom_type() == t && vv->is_Mach() &&
         vv->as_Mach()->rule() == val->as_Mach()->rule()) ) { // Or same constant?
      assert( !n->is_Phi(), "cannot change registers at a Phi so easily" );
      if( OptoReg::is_stack(nk_reg) || // CISC-loading from stack OR
          OptoReg::is_reg(reg) || // turning into a register use OR
          regnd[reg]->outcnt()==1 ) { // last use of a spill-load turns into a CISC use
        blk_adjust += use_prior_register(n,k,regnd[reg],current_block,value,regnd);
        if( n->in(k) == regnd[reg] ) // Success!  Quit trying
          return blk_adjust;
      } // End of if not degrading to a stack
    } // End of if found value in another register
  } // End of scan all machine registers
  return blk_adjust;
}


//
// Check if nreg already contains the constant value val.  Normal copy
// elimination doesn't doesn't work on constants because multiple
// nodes can represent the same constant so the type and rule of the
// MachNode must be checked to ensure equivalence.
//
bool PhaseChaitin::eliminate_copy_of_constant(Node* val, Node* n,
                                              Block *current_block,
                                              Node_List& value, Node_List& regnd,
                                              OptoReg::Name nreg, OptoReg::Name nreg2) {
  if (value[nreg] != val && val->is_Con() &&
      value[nreg] != NULL && value[nreg]->is_Con() &&
      (nreg2 == OptoReg::Bad || value[nreg] == value[nreg2]) &&
      value[nreg]->bottom_type() == val->bottom_type() &&
      value[nreg]->as_Mach()->rule() == val->as_Mach()->rule()) {
    // This code assumes that two MachNodes representing constants
    // which have the same rule and the same bottom type will produce
    // identical effects into a register.  This seems like it must be
    // objectively true unless there are hidden inputs to the nodes
    // but if that were to change this code would need to updated.
    // Since they are equivalent the second one if redundant and can
    // be removed.
    //
    // n will be replaced with the old value but n might have
    // kills projections associated with it so remove them now so that
    // yank_if_dead will be able to eliminate the copy once the uses
    // have been transferred to the old[value].
    for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
      Node* use = n->fast_out(i);
      if (use->is_Proj() && use->outcnt() == 0) {
        // Kill projections have no users and one input
        use->set_req(0, C->top());
        yank_if_dead(use, current_block, &value, &regnd);
        --i; --imax;
      }
    }
    _post_alloc++;
    return true;
  }
  return false;
}

// The algorithms works as follows:
// We traverse the block top to bottom. possibly_merge_multidef() is invoked for every input edge k
// of the instruction n. We check to see if the input is a multidef lrg. If it is, we record the fact that we've
// seen a definition (coming as an input) and add that fact to the reg2defuse array. The array maps registers to their
// current reaching definitions (we track only multidefs though). With each definition we also associate the first
// instruction we saw use it. If we encounter the situation when we observe an def (an input) that is a part of the
// same lrg but is different from the previous seen def we merge the two with a MachMerge node and substitute
// all the uses that we've seen so far to use the merge. After that we keep replacing the new defs in the same lrg
// as they get encountered with the merge node and keep adding these defs to the merge inputs.
void PhaseChaitin::merge_multidefs() {
  NOT_PRODUCT( Compile::TracePhase t3("mergeMultidefs", &_t_mergeMultidefs, TimeCompiler); )
  ResourceMark rm;
  // Keep track of the defs seen in registers and collect their uses in the block.
  RegToDefUseMap reg2defuse(_max_reg, _max_reg, RegDefUse());
  for (uint i = 0; i < _cfg.number_of_blocks(); i++) {
    Block* block = _cfg.get_block(i);
    for (uint j = 1; j < block->number_of_nodes(); j++) {
      Node* n = block->get_node(j);
      if (n->is_Phi()) continue;
      for (uint k = 1; k < n->req(); k++) {
        j += possibly_merge_multidef(n, k, block, reg2defuse);
      }
      // Null out the value produced by the instruction itself, since we're only interested in defs
      // implicitly defined by the uses. We are actually interested in tracking only redefinitions
      // of the multidef lrgs in the same register. For that matter it's enough to track changes in
      // the base register only and ignore other effects of multi-register lrgs and fat projections.
      // It is also ok to ignore defs coming from singledefs. After an implicit overwrite by one of
      // those our register is guaranteed to be used by another lrg and we won't attempt to merge it.
      uint lrg = _lrg_map.live_range_id(n);
      if (lrg > 0 && lrgs(lrg).is_multidef()) {
        OptoReg::Name reg = lrgs(lrg).reg();
        reg2defuse.at(reg).clear();
      }
    }
    // Clear reg->def->use tracking for the next block
    for (int j = 0; j < reg2defuse.length(); j++) {
      reg2defuse.at(j).clear();
    }
  }
}

int PhaseChaitin::possibly_merge_multidef(Node *n, uint k, Block *block, RegToDefUseMap& reg2defuse) {
  int blk_adjust = 0;

  uint lrg = _lrg_map.live_range_id(n->in(k));
  if (lrg > 0 && lrgs(lrg).is_multidef()) {
    OptoReg::Name reg = lrgs(lrg).reg();

    Node* def = reg2defuse.at(reg).def();
    if (def != NULL && lrg == _lrg_map.live_range_id(def) && def != n->in(k)) {
      // Same lrg but different node, we have to merge.
      MachMergeNode* merge;
      if (def->is_MachMerge()) { // is it already a merge?
        merge = def->as_MachMerge();
      } else {
        merge = new (C) MachMergeNode(def);

        // Insert the merge node into the block before the first use.
        uint use_index = block->find_node(reg2defuse.at(reg).first_use());
        block->insert_node(merge, use_index++);
        _cfg.map_node_to_block(merge, block);

        // Let the allocator know about the new node, use the same lrg
        _lrg_map.extend(merge->_idx, lrg);
        blk_adjust++;

        // Fixup all the uses (there is at least one) that happened between the first
        // use and before the current one.
        for (; use_index < block->number_of_nodes(); use_index++) {
          Node* use = block->get_node(use_index);
          if (use == n) {
            break;
          }
          use->replace_edge(def, merge);
        }
      }
      if (merge->find_edge(n->in(k)) == -1) {
        merge->add_req(n->in(k));
      }
      n->set_req(k, merge);
    }

    // update the uses
    reg2defuse.at(reg).update(n->in(k), n);
  }

  return blk_adjust;
}


//------------------------------post_allocate_copy_removal---------------------
// Post-Allocation peephole copy removal.  We do this in 1 pass over the
// basic blocks.  We maintain a mapping of registers to Nodes (an  array of
// Nodes indexed by machine register or stack slot number).  NULL means that a
// register is not mapped to any Node.  We can (want to have!) have several
// registers map to the same Node.  We walk forward over the instructions
// updating the mapping as we go.  At merge points we force a NULL if we have
// to merge 2 different Nodes into the same register.  Phi functions will give
// us a new Node if there is a proper value merging.  Since the blocks are
// arranged in some RPO, we will visit all parent blocks before visiting any
// successor blocks (except at loops).
//
// If we find a Copy we look to see if the Copy's source register is a stack
// slot and that value has already been loaded into some machine register; if
// so we use machine register directly.  This turns a Load into a reg-reg
// Move.  We also look for reloads of identical constants.
//
// When we see a use from a reg-reg Copy, we will attempt to use the copy's
// source directly and make the copy go dead.
void PhaseChaitin::post_allocate_copy_removal() {
  NOT_PRODUCT( Compile::TracePhase t3("postAllocCopyRemoval", &_t_postAllocCopyRemoval, TimeCompiler); )
  ResourceMark rm;

  // Need a mapping from basic block Node_Lists.  We need a Node_List to
  // map from register number to value-producing Node.
  Node_List **blk2value = NEW_RESOURCE_ARRAY( Node_List *, _cfg.number_of_blocks() + 1);
  memset(blk2value, 0, sizeof(Node_List*) * (_cfg.number_of_blocks() + 1));
  // Need a mapping from basic block Node_Lists.  We need a Node_List to
  // map from register number to register-defining Node.
  Node_List **blk2regnd = NEW_RESOURCE_ARRAY( Node_List *, _cfg.number_of_blocks() + 1);
  memset(blk2regnd, 0, sizeof(Node_List*) * (_cfg.number_of_blocks() + 1));

  // We keep unused Node_Lists on a free_list to avoid wasting
  // memory.
  GrowableArray<Node_List*> free_list = GrowableArray<Node_List*>(16);

  // For all blocks
  for (uint i = 0; i < _cfg.number_of_blocks(); i++) {
    uint j;
    Block* block = _cfg.get_block(i);

    // Count of Phis in block
    uint phi_dex;
    for (phi_dex = 1; phi_dex < block->number_of_nodes(); phi_dex++) {
      Node* phi = block->get_node(phi_dex);
      if (!phi->is_Phi()) {
        break;
      }
    }

    // If any predecessor has not been visited, we do not know the state
    // of registers at the start.  Check for this, while updating copies
    // along Phi input edges
    bool missing_some_inputs = false;
    Block *freed = NULL;
    for (j = 1; j < block->num_preds(); j++) {
      Block* pb = _cfg.get_block_for_node(block->pred(j));
      // Remove copies along phi edges
      for (uint k = 1; k < phi_dex; k++) {
        elide_copy(block->get_node(k), j, block, *blk2value[pb->_pre_order], *blk2regnd[pb->_pre_order], false);
      }
      if (blk2value[pb->_pre_order]) { // Have a mapping on this edge?
        // See if this predecessor's mappings have been used by everybody
        // who wants them.  If so, free 'em.
        uint k;
        for (k = 0; k < pb->_num_succs; k++) {
          Block* pbsucc = pb->_succs[k];
          if (!blk2value[pbsucc->_pre_order] && pbsucc != block) {
            break;              // Found a future user
          }
        }
        if (k >= pb->_num_succs) { // No more uses, free!
          freed = pb;           // Record last block freed
          free_list.push(blk2value[pb->_pre_order]);
          free_list.push(blk2regnd[pb->_pre_order]);
        }
      } else {                  // This block has unvisited (loopback) inputs
        missing_some_inputs = true;
      }
    }


    // Extract Node_List mappings.  If 'freed' is non-zero, we just popped
    // 'freed's blocks off the list
    Node_List &regnd = *(free_list.is_empty() ? new Node_List() : free_list.pop());
    Node_List &value = *(free_list.is_empty() ? new Node_List() : free_list.pop());
    assert( !freed || blk2value[freed->_pre_order] == &value, "" );
    value.map(_max_reg,NULL);
    regnd.map(_max_reg,NULL);
    // Set mappings as OUR mappings
    blk2value[block->_pre_order] = &value;
    blk2regnd[block->_pre_order] = &regnd;

    // Initialize value & regnd for this block
    if (missing_some_inputs) {
      // Some predecessor has not yet been visited; zap map to empty
      for (uint k = 0; k < (uint)_max_reg; k++) {
        value.map(k,NULL);
        regnd.map(k,NULL);
      }
    } else {
      if( !freed ) {            // Didn't get a freebie prior block
        // Must clone some data
        freed = _cfg.get_block_for_node(block->pred(1));
        Node_List &f_value = *blk2value[freed->_pre_order];
        Node_List &f_regnd = *blk2regnd[freed->_pre_order];
        for( uint k = 0; k < (uint)_max_reg; k++ ) {
          value.map(k,f_value[k]);
          regnd.map(k,f_regnd[k]);
        }
      }
      // Merge all inputs together, setting to NULL any conflicts.
      for (j = 1; j < block->num_preds(); j++) {
        Block* pb = _cfg.get_block_for_node(block->pred(j));
        if (pb == freed) {
          continue; // Did self already via freelist
        }
        Node_List &p_regnd = *blk2regnd[pb->_pre_order];
        for( uint k = 0; k < (uint)_max_reg; k++ ) {
          if( regnd[k] != p_regnd[k] ) { // Conflict on reaching defs?
            value.map(k,NULL); // Then no value handy
            regnd.map(k,NULL);
          }
        }
      }
    }

    // For all Phi's
    for (j = 1; j < phi_dex; j++) {
      uint k;
      Node *phi = block->get_node(j);
      uint pidx = _lrg_map.live_range_id(phi);
      OptoReg::Name preg = lrgs(_lrg_map.live_range_id(phi)).reg();

      // Remove copies remaining on edges.  Check for junk phi.
      Node *u = NULL;
      for (k = 1; k < phi->req(); k++) {
        Node *x = phi->in(k);
        if( phi != x && u != x ) // Found a different input
          u = u ? NodeSentinel : x; // Capture unique input, or NodeSentinel for 2nd input
      }
      if (u != NodeSentinel) {    // Junk Phi.  Remove
        phi->replace_by(u);
        j -= yank_if_dead(phi, block, &value, &regnd);
        phi_dex--;
        continue;
      }
      // Note that if value[pidx] exists, then we merged no new values here
      // and the phi is useless.  This can happen even with the above phi
      // removal for complex flows.  I cannot keep the better known value here
      // because locally the phi appears to define a new merged value.  If I
      // keep the better value then a copy of the phi, being unable to use the
      // global flow analysis, can't "peek through" the phi to the original
      // reaching value and so will act like it's defining a new value.  This
      // can lead to situations where some uses are from the old and some from
      // the new values.  Not illegal by itself but throws the over-strong
      // assert in scheduling.
      if( pidx ) {
        value.map(preg,phi);
        regnd.map(preg,phi);
        int n_regs = RegMask::num_registers(phi->ideal_reg());
        for (int l = 1; l < n_regs; l++) {
          OptoReg::Name preg_lo = OptoReg::add(preg,-l);
          value.map(preg_lo,phi);
          regnd.map(preg_lo,phi);
        }
      }
    }

    // For all remaining instructions
    for (j = phi_dex; j < block->number_of_nodes(); j++) {
      Node* n = block->get_node(j);

      if(n->outcnt() == 0 &&   // Dead?
         n != C->top() &&      // (ignore TOP, it has no du info)
         !n->is_Proj() ) {     // fat-proj kills
        j -= yank_if_dead(n, block, &value, &regnd);
        continue;
      }

      // Improve reaching-def info.  Occasionally post-alloc's liveness gives
      // up (at loop backedges, because we aren't doing a full flow pass).
      // The presence of a live use essentially asserts that the use's def is
      // alive and well at the use (or else the allocator fubar'd).  Take
      // advantage of this info to set a reaching def for the use-reg.
      uint k;
      for (k = 1; k < n->req(); k++) {
        Node *def = n->in(k);   // n->in(k) is a USE; def is the DEF for this USE
        guarantee(def != NULL, "no disconnected nodes at this point");
        uint useidx = _lrg_map.live_range_id(def); // useidx is the live range index for this USE

        if( useidx ) {
          OptoReg::Name ureg = lrgs(useidx).reg();
          if( !value[ureg] ) {
            int idx;            // Skip occasional useless copy
            while( (idx=def->is_Copy()) != 0 &&
                   def->in(idx) != NULL &&  // NULL should not happen
                   ureg == lrgs(_lrg_map.live_range_id(def->in(idx))).reg())
              def = def->in(idx);
            Node *valdef = skip_copies(def); // tighten up val through non-useless copies
            value.map(ureg,valdef); // record improved reaching-def info
            regnd.map(ureg,   def);
            // Record other half of doubles
            uint def_ideal_reg = def->ideal_reg();
            int n_regs = RegMask::num_registers(def_ideal_reg);
            for (int l = 1; l < n_regs; l++) {
              OptoReg::Name ureg_lo = OptoReg::add(ureg,-l);
              if (!value[ureg_lo] &&
                  (!RegMask::can_represent(ureg_lo) ||
                   lrgs(useidx).mask().Member(ureg_lo))) { // Nearly always adjacent
                value.map(ureg_lo,valdef); // record improved reaching-def info
                regnd.map(ureg_lo,   def);
              }
            }
          }
        }
      }

      const uint two_adr = n->is_Mach() ? n->as_Mach()->two_adr() : 0;

      // Remove copies along input edges
      for (k = 1; k < n->req(); k++) {
        j -= elide_copy(n, k, block, value, regnd, two_adr != k);
      }

      // Unallocated Nodes define no registers
      uint lidx = _lrg_map.live_range_id(n);
      if (!lidx) {
        continue;
      }

      // Update the register defined by this instruction
      OptoReg::Name nreg = lrgs(lidx).reg();
      // Skip through all copies to the _value_ being defined.
      // Do not change from int to pointer
      Node *val = skip_copies(n);

      // Clear out a dead definition before starting so that the
      // elimination code doesn't have to guard against it.  The
      // definition could in fact be a kill projection with a count of
      // 0 which is safe but since those are uninteresting for copy
      // elimination just delete them as well.
      if (regnd[nreg] != NULL && regnd[nreg]->outcnt() == 0) {
        regnd.map(nreg, NULL);
        value.map(nreg, NULL);
      }

      uint n_ideal_reg = n->ideal_reg();
      int n_regs = RegMask::num_registers(n_ideal_reg);
      if (n_regs == 1) {
        // If Node 'n' does not change the value mapped by the register,
        // then 'n' is a useless copy.  Do not update the register->node
        // mapping so 'n' will go dead.
        if( value[nreg] != val ) {
          if (eliminate_copy_of_constant(val, n, block, value, regnd, nreg, OptoReg::Bad)) {
            j -= replace_and_yank_if_dead(n, nreg, block, value, regnd);
          } else {
            // Update the mapping: record new Node defined by the register
            regnd.map(nreg,n);
            // Update mapping for defined *value*, which is the defined
            // Node after skipping all copies.
            value.map(nreg,val);
          }
        } else if( !may_be_copy_of_callee(n) ) {
          assert(n->is_Copy(), "");
          j -= replace_and_yank_if_dead(n, nreg, block, value, regnd);
        }
      } else if (RegMask::is_vector(n_ideal_reg)) {
        // If Node 'n' does not change the value mapped by the register,
        // then 'n' is a useless copy.  Do not update the register->node
        // mapping so 'n' will go dead.
        if (!register_contains_value(val, nreg, n_regs, value)) {
          // Update the mapping: record new Node defined by the register
          regnd.map(nreg,n);
          // Update mapping for defined *value*, which is the defined
          // Node after skipping all copies.
          value.map(nreg,val);
          for (int l = 1; l < n_regs; l++) {
            OptoReg::Name nreg_lo = OptoReg::add(nreg,-l);
            regnd.map(nreg_lo, n );
            value.map(nreg_lo,val);
          }
        } else if (n->is_Copy()) {
          // Note: vector can't be constant and can't be copy of calee.
          j -= replace_and_yank_if_dead(n, nreg, block, value, regnd);
        }
      } else {
        // If the value occupies a register pair, record same info
        // in both registers.
        OptoReg::Name nreg_lo = OptoReg::add(nreg,-1);
        if( RegMask::can_represent(nreg_lo) &&     // Either a spill slot, or
            !lrgs(lidx).mask().Member(nreg_lo) ) { // Nearly always adjacent
          // Sparc occasionally has non-adjacent pairs.
          // Find the actual other value
          RegMask tmp = lrgs(lidx).mask();
          tmp.Remove(nreg);
          nreg_lo = tmp.find_first_elem();
        }
        if (value[nreg] != val || value[nreg_lo] != val) {
          if (eliminate_copy_of_constant(val, n, block, value, regnd, nreg, nreg_lo)) {
            j -= replace_and_yank_if_dead(n, nreg, block, value, regnd);
          } else {
            regnd.map(nreg   , n );
            regnd.map(nreg_lo, n );
            value.map(nreg   ,val);
            value.map(nreg_lo,val);
          }
        } else if (!may_be_copy_of_callee(n)) {
          assert(n->is_Copy(), "");
          j -= replace_and_yank_if_dead(n, nreg, block, value, regnd);
        }
      }

      // Fat projections kill many registers
      if( n_ideal_reg == MachProjNode::fat_proj ) {
        RegMask rm = n->out_RegMask();
        // wow, what an expensive iterator...
        nreg = rm.find_first_elem();
        while( OptoReg::is_valid(nreg)) {
          rm.Remove(nreg);
          value.map(nreg,n);
          regnd.map(nreg,n);
          nreg = rm.find_first_elem();
        }
      }

    } // End of for all instructions in the block

  } // End for all blocks
}
C:\hotspot-69087d08d473\src\share\vm/opto/regalloc.cpp
/*
 * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 *
 */

#include "precompiled.hpp"
#include "opto/regalloc.hpp"

static const int NodeRegsOverflowSize = 200;

void (*PhaseRegAlloc::_alloc_statistics[MAX_REG_ALLOCATORS])();
int PhaseRegAlloc::_num_allocators = 0;
#ifndef PRODUCT
int PhaseRegAlloc::_total_framesize = 0;
int PhaseRegAlloc::_max_framesize = 0;
#endif

PhaseRegAlloc::PhaseRegAlloc( uint unique, PhaseCFG &cfg,
                              Matcher &matcher,
                              void (*pr_stats)() ):
               Phase(Register_Allocation), _cfg(cfg), _matcher(matcher),
               _node_oops(Thread::current()->resource_area()),
               _node_regs(0),
               _node_regs_max_index(0),
               _framesize(0xdeadbeef)
{
    int i;

    for (i=0; i < _num_allocators; i++) {
        if (_alloc_statistics[i] == pr_stats)
            return;
    }
    assert((_num_allocators + 1) < MAX_REG_ALLOCATORS, "too many register allocators");
    _alloc_statistics[_num_allocators++] = pr_stats;
}


//------------------------------reg2offset-------------------------------------
int PhaseRegAlloc::reg2offset_unchecked( OptoReg::Name reg ) const {
  // Slots below _max_in_arg_stack_reg are offset by the entire frame.
  // Slots above _max_in_arg_stack_reg are frame_slots and are not offset.
  int slot = (reg < _matcher._new_SP)
    ? reg - OptoReg::stack0() + _framesize
    : reg - _matcher._new_SP;
  // Note:  We use the direct formula (reg - SharedInfo::stack0) instead of
  // OptoReg::reg2stack(reg), in order to avoid asserts in the latter
  // function.  This routine must remain unchecked, so that dump_frame()
  // can do its work undisturbed.
  // %%% not really clear why reg2stack would assert here

  return slot*VMRegImpl::stack_slot_size;
}

int PhaseRegAlloc::reg2offset( OptoReg::Name reg ) const {

  // Not allowed in the out-preserve area.
  // In-preserve area is allowed so Intel can fetch the return pc out.
  assert( reg <  _matcher._old_SP ||
          (reg >= OptoReg::add(_matcher._old_SP,C->out_preserve_stack_slots()) &&
           reg <  _matcher._in_arg_limit) ||
          reg >=  OptoReg::add(_matcher._new_SP, C->out_preserve_stack_slots()) ||
          // Allow return_addr in the out-preserve area.
          reg == _matcher.return_addr(),
          "register allocated in a preserve area" );
  return reg2offset_unchecked( reg );
}

//------------------------------offset2reg-------------------------------------
OptoReg::Name PhaseRegAlloc::offset2reg(int stk_offset) const {
  int slot = stk_offset / jintSize;
  int reg = (slot < (int) _framesize)
    ? slot + _matcher._new_SP
    : OptoReg::stack2reg(slot) - _framesize;
  assert(stk_offset == reg2offset((OptoReg::Name) reg),
         "offset2reg does not invert properly");
  return (OptoReg::Name) reg;
}

//------------------------------set_oop----------------------------------------
void PhaseRegAlloc::set_oop( const Node *n, bool is_an_oop ) {
  if( is_an_oop ) {
    _node_oops.set(n->_idx);
  }
}

//------------------------------is_oop-----------------------------------------
bool PhaseRegAlloc::is_oop( const Node *n ) const {
  return _node_oops.test(n->_idx) != 0;
}

// Allocate _node_regs table with at least "size" elements
void PhaseRegAlloc::alloc_node_regs(int size) {
  _node_regs_max_index = size + (size >> 1) + NodeRegsOverflowSize;
  _node_regs = NEW_RESOURCE_ARRAY( OptoRegPair, _node_regs_max_index );
  // We assume our caller will fill in all elements up to size-1, so
  // only the extra space we allocate is initialized here.
  for( uint i = size; i < _node_regs_max_index; ++i )
    _node_regs[i].set_bad();
}

#ifndef PRODUCT
void
PhaseRegAlloc::print_statistics() {
  tty->print_cr("Total frameslots = %d, Max frameslots = %d", _total_framesize, _max_framesize);
  int i;

  for (i=0; i < _num_allocators; i++) {
    _alloc_statistics[i]();
  }
}
#endif
C:\hotspot-69087d08d473\src\share\vm/opto/regalloc.hpp
/*
 * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 *
 */

#ifndef SHARE_VM_OPTO_REGALLOC_HPP
#define SHARE_VM_OPTO_REGALLOC_HPP

#include "code/vmreg.hpp"
#include "opto/block.hpp"
#include "opto/matcher.hpp"
#include "opto/phase.hpp"

class Node;
class Matcher;
class PhaseCFG;

#define  MAX_REG_ALLOCATORS   10

//------------------------------PhaseRegAlloc------------------------------------
// Abstract register allocator
class PhaseRegAlloc : public Phase {
  friend class VMStructs;
  static void (*_alloc_statistics[MAX_REG_ALLOCATORS])();
  static int _num_allocators;

protected:
  OptoRegPair  *_node_regs;
  uint         _node_regs_max_index;
  VectorSet    _node_oops;         // Mapping from node indices to oopiness

  void alloc_node_regs(int size);  // allocate _node_regs table with at least "size" elements

  PhaseRegAlloc( uint unique, PhaseCFG &cfg, Matcher &matcher,
                 void (*pr_stats)());
public:
  PhaseCFG &_cfg;               // Control flow graph
  uint _framesize;              // Size of frame in stack-slots. not counting preserve area
  OptoReg::Name _max_reg;       // Past largest register seen
  Matcher &_matcher;            // Convert Ideal to MachNodes
  uint node_regs_max_index() const { return _node_regs_max_index; }

  // Get the register associated with the Node
  OptoReg::Name get_reg_first( const Node *n ) const {
    debug_only( if( n->_idx >= _node_regs_max_index ) n->dump(); );
    assert( n->_idx < _node_regs_max_index, "Exceeded _node_regs array");
    return _node_regs[n->_idx].first();
  }
  OptoReg::Name get_reg_second( const Node *n ) const {
    debug_only( if( n->_idx >= _node_regs_max_index ) n->dump(); );
    assert( n->_idx < _node_regs_max_index, "Exceeded _node_regs array");
    return _node_regs[n->_idx].second();
  }

  // Do all the real work of allocate
  virtual void Register_Allocate() = 0;


  // notify the register allocator that "node" is a new reference
  // to the value produced by "old_node"
  virtual void add_reference( const Node *node, const Node *old_node) = 0;


  // Set the register associated with a new Node
  void set_bad( uint idx ) {
    assert( idx < _node_regs_max_index, "Exceeded _node_regs array");
    _node_regs[idx].set_bad();
  }
  void set1( uint idx, OptoReg::Name reg ) {
    assert( idx < _node_regs_max_index, "Exceeded _node_regs array");
    _node_regs[idx].set1(reg);
  }
  void set2( uint idx, OptoReg::Name reg ) {
    assert( idx < _node_regs_max_index, "Exceeded _node_regs array");
    _node_regs[idx].set2(reg);
  }
  void set_pair( uint idx, OptoReg::Name hi, OptoReg::Name lo ) {
    assert( idx < _node_regs_max_index, "Exceeded _node_regs array");
    _node_regs[idx].set_pair(hi, lo);
  }
  void set_ptr( uint idx, OptoReg::Name reg ) {
    assert( idx < _node_regs_max_index, "Exceeded _node_regs array");
    _node_regs[idx].set_ptr(reg);
  }
  // Set and query if a node produces an oop
  void set_oop( const Node *n, bool );
  bool is_oop( const Node *n ) const;

  // Convert a register number to a stack offset
  int reg2offset          ( OptoReg::Name reg ) const;
  int reg2offset_unchecked( OptoReg::Name reg ) const;

  // Convert a stack offset to a register number
  OptoReg::Name offset2reg( int stk_offset ) const;

  // Get the register encoding associated with the Node
  int get_encode(const Node *n) const {
    assert( n->_idx < _node_regs_max_index, "Exceeded _node_regs array");
    OptoReg::Name first = _node_regs[n->_idx].first();
    OptoReg::Name second = _node_regs[n->_idx].second();
    assert( !OptoReg::is_valid(second) || second == first+1, "" );
    assert(OptoReg::is_reg(first), "out of range");
    return Matcher::_regEncode[first];
  }

#ifndef PRODUCT
  static int _total_framesize;
  static int _max_framesize;

  virtual void dump_frame() const = 0;
  virtual char *dump_register( const Node *n, char *buf  ) const = 0;
  static void print_statistics();
#endif
};

#endif // SHARE_VM_OPTO_REGALLOC_HPP
C:\hotspot-69087d08d473\src\share\vm/opto/regmask.cpp
/*
 * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 *
 */

#include "precompiled.hpp"
#include "opto/compile.hpp"
#include "opto/regmask.hpp"
#if defined AD_MD_HPP
# include AD_MD_HPP
#elif defined TARGET_ARCH_MODEL_x86_32
# include "adfiles/ad_x86_32.hpp"
#elif defined TARGET_ARCH_MODEL_x86_64
# include "adfiles/ad_x86_64.hpp"
#elif defined TARGET_ARCH_MODEL_aarch64
# include "adfiles/ad_aarch64.hpp"
#elif defined TARGET_ARCH_MODEL_sparc
# include "adfiles/ad_sparc.hpp"
#elif defined TARGET_ARCH_MODEL_zero
# include "adfiles/ad_zero.hpp"
#elif defined TARGET_ARCH_MODEL_ppc_64
# include "adfiles/ad_ppc_64.hpp"
#endif

#define RM_SIZE _RM_SIZE /* a constant private to the class RegMask */

//-------------Non-zero bit search methods used by RegMask---------------------
// Find lowest 1, or return 32 if empty
int find_lowest_bit( uint32 mask ) {
  int n = 0;
  if( (mask & 0xffff) == 0 ) {
    mask >>= 16;
    n += 16;
  }
  if( (mask & 0xff) == 0 ) {
    mask >>= 8;
    n += 8;
  }
  if( (mask & 0xf) == 0 ) {
    mask >>= 4;
    n += 4;
  }
  if( (mask & 0x3) == 0 ) {
    mask >>= 2;
    n += 2;
  }
  if( (mask & 0x1) == 0 ) {
    mask >>= 1;
     n += 1;
  }
  if( mask == 0 ) {
    n = 32;
  }
  return n;
}

// Find highest 1, or return 32 if empty
int find_hihghest_bit( uint32 mask ) {
  int n = 0;
  if( mask > 0xffff ) {
    mask >>= 16;
    n += 16;
  }
  if( mask > 0xff ) {
    mask >>= 8;
    n += 8;
  }
  if( mask > 0xf ) {
    mask >>= 4;
    n += 4;
  }
  if( mask > 0x3 ) {
    mask >>= 2;
    n += 2;
  }
  if( mask > 0x1 ) {
    mask >>= 1;
    n += 1;
  }
  if( mask == 0 ) {
    n = 32;
  }
  return n;
}

//------------------------------dump-------------------------------------------

#ifndef PRODUCT
void OptoReg::dump(int r, outputStream *st) {
  switch (r) {
  case Special: st->print("r---"); break;
  case Bad:     st->print("rBAD"); break;
  default:
    if (r < _last_Mach_Reg) st->print("%s", Matcher::regName[r]);
    else st->print("rS%d",r);
    break;
  }
}
#endif


//=============================================================================
const RegMask RegMask::Empty(
# define BODY(I) 0,
  FORALL_BODY
# undef BODY
  0
);

//=============================================================================
bool RegMask::is_vector(uint ireg) {
  return (ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY);
}

int RegMask::num_registers(uint ireg) {
    switch(ireg) {
      case Op_VecY:
        return 8;
      case Op_VecX:
        return 4;
      case Op_VecD:
      case Op_RegD:
      case Op_RegL:
#ifdef _LP64
      case Op_RegP:
#endif
        return 2;
    }
    // Op_VecS and the rest ideal registers.
    return 1;
}

//------------------------------find_first_pair--------------------------------
// Find the lowest-numbered register pair in the mask.  Return the
// HIGHEST register number in the pair, or BAD if no pairs.
OptoReg::Name RegMask::find_first_pair() const {
  verify_pairs();
  for( int i = 0; i < RM_SIZE; i++ ) {
    if( _A[i] ) {               // Found some bits
      int bit = _A[i] & -_A[i]; // Extract low bit
      // Convert to bit number, return hi bit in pair
      return OptoReg::Name((i<<_LogWordBits)+find_lowest_bit(bit)+1);
    }
  }
  return OptoReg::Bad;
}

//------------------------------ClearToPairs-----------------------------------
// Clear out partial bits; leave only bit pairs
void RegMask::clear_to_pairs() {
  for( int i = 0; i < RM_SIZE; i++ ) {
    int bits = _A[i];
    bits &= ((bits & 0x55555555)<<1); // 1 hi-bit set for each pair
    bits |= (bits>>1);          // Smear 1 hi-bit into a pair
    _A[i] = bits;
  }
  verify_pairs();
}

//------------------------------SmearToPairs-----------------------------------
// Smear out partial bits; leave only bit pairs
void RegMask::smear_to_pairs() {
  for( int i = 0; i < RM_SIZE; i++ ) {
    int bits = _A[i];
    bits |= ((bits & 0x55555555)<<1); // Smear lo bit hi per pair
    bits |= ((bits & 0xAAAAAAAA)>>1); // Smear hi bit lo per pair
    _A[i] = bits;
  }
  verify_pairs();
}

//------------------------------is_aligned_pairs-------------------------------
bool RegMask::is_aligned_pairs() const {
  // Assert that the register mask contains only bit pairs.
  for( int i = 0; i < RM_SIZE; i++ ) {
    int bits = _A[i];
    while( bits ) {             // Check bits for pairing
      int bit = bits & -bits;   // Extract low bit
      // Low bit is not odd means its mis-aligned.
      if( (bit & 0x55555555) == 0 ) return false;
      bits -= bit;              // Remove bit from mask
      // Check for aligned adjacent bit
      if( (bits & (bit<<1)) == 0 ) return false;
      bits -= (bit<<1);         // Remove other halve of pair
    }
  }
  return true;
}

//------------------------------is_bound1--------------------------------------
// Return TRUE if the mask contains a single bit
int RegMask::is_bound1() const {
  if( is_AllStack() ) return false;
  int bit = -1;                 // Set to hold the one bit allowed
  for( int i = 0; i < RM_SIZE; i++ ) {
    if( _A[i] ) {               // Found some bits
      if( bit != -1 ) return false; // Already had bits, so fail
      bit = _A[i] & -_A[i];     // Extract 1 bit from mask
      if( bit != _A[i] ) return false; // Found many bits, so fail
    }
  }
  // True for both the empty mask and for a single bit
  return true;
}

//------------------------------is_bound2--------------------------------------
// Return TRUE if the mask contains an adjacent pair of bits and no other bits.
int RegMask::is_bound_pair() const {
  if( is_AllStack() ) return false;

  int bit = -1;                 // Set to hold the one bit allowed
  for( int i = 0; i < RM_SIZE; i++ ) {
    if( _A[i] ) {               // Found some bits
      if( bit != -1 ) return false; // Already had bits, so fail
      bit = _A[i] & -(_A[i]);   // Extract 1 bit from mask
      if( (bit << 1) != 0 ) {   // Bit pair stays in same word?
        if( (bit | (bit<<1)) != _A[i] )
          return false;         // Require adjacent bit pair and no more bits
      } else {                  // Else its a split-pair case
        if( bit != _A[i] ) return false; // Found many bits, so fail
        i++;                    // Skip iteration forward
        if( i >= RM_SIZE || _A[i] != 1 )
          return false; // Require 1 lo bit in next word
      }
    }
  }
  // True for both the empty mask and for a bit pair
  return true;
}

static int low_bits[3] = { 0x55555555, 0x11111111, 0x01010101 };
//------------------------------find_first_set---------------------------------
// Find the lowest-numbered register set in the mask.  Return the
// HIGHEST register number in the set, or BAD if no sets.
// Works also for size 1.
OptoReg::Name RegMask::find_first_set(const int size) const {
  verify_sets(size);
  for (int i = 0; i < RM_SIZE; i++) {
    if (_A[i]) {                // Found some bits
      int bit = _A[i] & -_A[i]; // Extract low bit
      // Convert to bit number, return hi bit in pair
      return OptoReg::Name((i<<_LogWordBits)+find_lowest_bit(bit)+(size-1));
    }
  }
  return OptoReg::Bad;
}

//------------------------------clear_to_sets----------------------------------
// Clear out partial bits; leave only aligned adjacent bit pairs
void RegMask::clear_to_sets(const int size) {
  if (size == 1) return;
  assert(2 <= size && size <= 8, "update low bits table");
  assert(is_power_of_2(size), "sanity");
  int low_bits_mask = low_bits[size>>2];
  for (int i = 0; i < RM_SIZE; i++) {
    int bits = _A[i];
    int sets = (bits & low_bits_mask);
    for (int j = 1; j < size; j++) {
      sets = (bits & (sets<<1)); // filter bits which produce whole sets
    }
    sets |= (sets>>1);           // Smear 1 hi-bit into a set
    if (size > 2) {
      sets |= (sets>>2);         // Smear 2 hi-bits into a set
      if (size > 4) {
        sets |= (sets>>4);       // Smear 4 hi-bits into a set
      }
    }
    _A[i] = sets;
  }
  verify_sets(size);
}

//------------------------------smear_to_sets----------------------------------
// Smear out partial bits to aligned adjacent bit sets
void RegMask::smear_to_sets(const int size) {
  if (size == 1) return;
  assert(2 <= size && size <= 8, "update low bits table");
  assert(is_power_of_2(size), "sanity");
  int low_bits_mask = low_bits[size>>2];
  for (int i = 0; i < RM_SIZE; i++) {
    int bits = _A[i];
    int sets = 0;
    for (int j = 0; j < size; j++) {
      sets |= (bits & low_bits_mask);  // collect partial bits
      bits  = bits>>1;
    }
    sets |= (sets<<1);           // Smear 1 lo-bit  into a set
    if (size > 2) {
      sets |= (sets<<2);         // Smear 2 lo-bits into a set
      if (size > 4) {
        sets |= (sets<<4);       // Smear 4 lo-bits into a set
      }
    }
    _A[i] = sets;
  }
  verify_sets(size);
}

//------------------------------is_aligned_set--------------------------------
bool RegMask::is_aligned_sets(const int size) const {
  if (size == 1) return true;
  assert(2 <= size && size <= 8, "update low bits table");
  assert(is_power_of_2(size), "sanity");
  int low_bits_mask = low_bits[size>>2];
  // Assert that the register mask contains only bit sets.
  for (int i = 0; i < RM_SIZE; i++) {
    int bits = _A[i];
    while (bits) {              // Check bits for pairing
      int bit = bits & -bits;   // Extract low bit
      // Low bit is not odd means its mis-aligned.
      if ((bit & low_bits_mask) == 0) return false;
      // Do extra work since (bit << size) may overflow.
      int hi_bit = bit << (size-1); // high bit
      int set = hi_bit + ((hi_bit-1) & ~(bit-1));
      // Check for aligned adjacent bits in this set
      if ((bits & set) != set) return false;
      bits -= set;  // Remove this set
    }
  }
  return true;
}

//------------------------------is_bound_set-----------------------------------
// Return TRUE if the mask contains one adjacent set of bits and no other bits.
// Works also for size 1.
int RegMask::is_bound_set(const int size) const {
  if( is_AllStack() ) return false;
  assert(1 <= size && size <= 8, "update low bits table");
  int bit = -1;                 // Set to hold the one bit allowed
  for (int i = 0; i < RM_SIZE; i++) {
    if (_A[i] ) {               // Found some bits
      if (bit != -1)
       return false;            // Already had bits, so fail
      bit = _A[i] & -_A[i];     // Extract low bit from mask
      int hi_bit = bit << (size-1); // high bit
      if (hi_bit != 0) {        // Bit set stays in same word?
        int set = hi_bit + ((hi_bit-1) & ~(bit-1));
        if (set != _A[i])
          return false;         // Require adjacent bit set and no more bits
      } else {                  // Else its a split-set case
        if (((-1) & ~(bit-1)) != _A[i])
          return false;         // Found many bits, so fail
        i++;                    // Skip iteration forward and check high part
        // The lower 24 bits should be 0 since it is split case and size <= 8.
        int set = bit>>24;
        set = set & -set; // Remove sign extension.
        set = (((set << size) - 1) >> 8);
        if (i >= RM_SIZE || _A[i] != set)
          return false; // Require expected low bits in next word
      }
    }
  }
  // True for both the empty mask and for a bit set
  return true;
}

//------------------------------is_UP------------------------------------------
// UP means register only, Register plus stack, or stack only is DOWN
bool RegMask::is_UP() const {
  // Quick common case check for DOWN (any stack slot is legal)
  if( is_AllStack() )
    return false;
  // Slower check for any stack bits set (also DOWN)
  if( overlap(Matcher::STACK_ONLY_mask) )
    return false;
  // Not DOWN, so must be UP
  return true;
}

//------------------------------Size-------------------------------------------
// Compute size of register mask in bits
uint RegMask::Size() const {
  extern uint8 bitsInByte[256];
  uint sum = 0;
  for( int i = 0; i < RM_SIZE; i++ )
    sum +=
      bitsInByte[(_A[i]>>24) & 0xff] +
      bitsInByte[(_A[i]>>16) & 0xff] +
      bitsInByte[(_A[i]>> 8) & 0xff] +
      bitsInByte[ _A[i]      & 0xff];
  return sum;
}

#ifndef PRODUCT
//------------------------------print------------------------------------------
void RegMask::dump(outputStream *st) const {
  st->print("[");
  RegMask rm = *this;           // Structure copy into local temp

  OptoReg::Name start = rm.find_first_elem(); // Get a register
  if (OptoReg::is_valid(start)) { // Check for empty mask
    rm.Remove(start);           // Yank from mask
    OptoReg::dump(start, st);   // Print register
    OptoReg::Name last = start;

    // Now I have printed an initial register.
    // Print adjacent registers as "rX-rZ" instead of "rX,rY,rZ".
    // Begin looping over the remaining registers.
    while (1) {                 //
      OptoReg::Name reg = rm.find_first_elem(); // Get a register
      if (!OptoReg::is_valid(reg))
        break;                  // Empty mask, end loop
      rm.Remove(reg);           // Yank from mask

      if (last+1 == reg) {      // See if they are adjacent
        // Adjacent registers just collect into long runs, no printing.
        last = reg;
      } else {                  // Ending some kind of run
        if (start == last) {    // 1-register run; no special printing
        } else if (start+1 == last) {
          st->print(",");       // 2-register run; print as "rX,rY"
          OptoReg::dump(last, st);
        } else {                // Multi-register run; print as "rX-rZ"
          st->print("-");
          OptoReg::dump(last, st);
        }
        st->print(",");         // Seperate start of new run
        start = last = reg;     // Start a new register run
        OptoReg::dump(start, st); // Print register
      } // End of if ending a register run or not
    } // End of while regmask not empty

    if (start == last) {        // 1-register run; no special printing
    } else if (start+1 == last) {
      st->print(",");           // 2-register run; print as "rX,rY"
      OptoReg::dump(last, st);
    } else {                    // Multi-register run; print as "rX-rZ"
      st->print("-");
      OptoReg::dump(last, st);
    }
    if (rm.is_AllStack()) st->print("...");
  }
  st->print("]");
}
#endif
C:\hotspot-69087d08d473\src\share\vm/opto/regmask.hpp
/*
 * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 *
 */

#ifndef SHARE_VM_OPTO_REGMASK_HPP
#define SHARE_VM_OPTO_REGMASK_HPP

#include "code/vmreg.hpp"
#include "libadt/port.hpp"
#include "opto/optoreg.hpp"
#if defined ADGLOBALS_MD_HPP
# include ADGLOBALS_MD_HPP
#elif defined TARGET_ARCH_MODEL_x86_32
# include "adfiles/adGlobals_x86_32.hpp"
#elif defined TARGET_ARCH_MODEL_x86_64
# include "adfiles/adGlobals_x86_64.hpp"
#elif defined TARGET_ARCH_MODEL_aarch64
# include "adfiles/adGlobals_aarch64.hpp"
#elif defined TARGET_ARCH_MODEL_sparc
# include "adfiles/adGlobals_sparc.hpp"
#elif defined TARGET_ARCH_MODEL_zero
# include "adfiles/adGlobals_zero.hpp"
#elif defined TARGET_ARCH_MODEL_ppc_64
# include "adfiles/adGlobals_ppc_64.hpp"
#endif

// Some fun naming (textual) substitutions:
//
// RegMask::get_low_elem() ==> RegMask::find_first_elem()
// RegMask::Special        ==> RegMask::Empty
// RegMask::_flags         ==> RegMask::is_AllStack()
// RegMask::operator<<=()  ==> RegMask::Insert()
// RegMask::operator>>=()  ==> RegMask::Remove()
// RegMask::Union()        ==> RegMask::OR
// RegMask::Inter()        ==> RegMask::AND
//
// OptoRegister::RegName   ==> OptoReg::Name
//
// OptoReg::stack0()       ==> _last_Mach_Reg  or ZERO in core version
//
// numregs in chaitin      ==> proper degree in chaitin

//-------------Non-zero bit search methods used by RegMask---------------------
// Find lowest 1, or return 32 if empty
int find_lowest_bit( uint32 mask );
// Find highest 1, or return 32 if empty
int find_hihghest_bit( uint32 mask );

//------------------------------RegMask----------------------------------------
// The ADL file describes how to print the machine-specific registers, as well
// as any notion of register classes.  We provide a register mask, which is
// just a collection of Register numbers.

// The ADLC defines 2 macros, RM_SIZE and FORALL_BODY.
// RM_SIZE is the size of a register mask in words.
// FORALL_BODY replicates a BODY macro once per word in the register mask.
// The usage is somewhat clumsy and limited to the regmask.[h,c]pp files.
// However, it means the ADLC can redefine the unroll macro and all loops
// over register masks will be unrolled by the correct amount.

class RegMask VALUE_OBJ_CLASS_SPEC {
  union {
    double _dummy_force_double_alignment[RM_SIZE>>1];
    // Array of Register Mask bits.  This array is large enough to cover
    // all the machine registers and all parameters that need to be passed
    // on the stack (stack registers) up to some interesting limit.  Methods
    // that need more parameters will NOT be compiled.  On Intel, the limit
    // is something like 90+ parameters.
    int _A[RM_SIZE];
  };

  enum {
    _WordBits    = BitsPerInt,
    _LogWordBits = LogBitsPerInt,
    _RM_SIZE     = RM_SIZE   // local constant, imported, then hidden by #undef
  };

public:
  enum { CHUNK_SIZE = RM_SIZE*_WordBits };

  // SlotsPerLong is 2, since slots are 32 bits and longs are 64 bits.
  // Also, consider the maximum alignment size for a normally allocated
  // value.  Since we allocate register pairs but not register quads (at
  // present), this alignment is SlotsPerLong (== 2).  A normally
  // aligned allocated register is either a single register, or a pair
  // of adjacent registers, the lower-numbered being even.
  // See also is_aligned_Pairs() below, and the padding added before
  // Matcher::_new_SP to keep allocated pairs aligned properly.
  // If we ever go to quad-word allocations, SlotsPerQuad will become
  // the controlling alignment constraint.  Note that this alignment
  // requirement is internal to the allocator, and independent of any
  // particular platform.
  enum { SlotsPerLong = 2,
         SlotsPerVecS = 1,
         SlotsPerVecD = 2,
         SlotsPerVecX = 4,
         SlotsPerVecY = 8 };

  // A constructor only used by the ADLC output.  All mask fields are filled
  // in directly.  Calls to this look something like RM(1,2,3,4);
  RegMask(
#   define BODY(I) int a##I,
    FORALL_BODY
#   undef BODY
    int dummy = 0 ) {
#   define BODY(I) _A[I] = a##I;
    FORALL_BODY
#   undef BODY
  }

  // Handy copying constructor
  RegMask( RegMask *rm ) {
#   define BODY(I) _A[I] = rm->_A[I];
    FORALL_BODY
#   undef BODY
  }

  // Construct an empty mask
  RegMask( ) { Clear(); }

  // Construct a mask with a single bit
  RegMask( OptoReg::Name reg ) { Clear(); Insert(reg); }

  // Check for register being in mask
  int Member( OptoReg::Name reg ) const {
    assert( reg < CHUNK_SIZE, "" );
    return _A[reg>>_LogWordBits] & (1<<(reg&(_WordBits-1)));
  }

  // The last bit in the register mask indicates that the mask should repeat
  // indefinitely with ONE bits.  Returns TRUE if mask is infinite or
  // unbounded in size.  Returns FALSE if mask is finite size.
  int is_AllStack() const { return _A[RM_SIZE-1] >> (_WordBits-1); }

  // Work around an -xO3 optimization problme in WS6U1. The old way:
  //   void set_AllStack() { _A[RM_SIZE-1] |= (1<<(_WordBits-1)); }
  // will cause _A[RM_SIZE-1] to be clobbered, not updated when set_AllStack()
  // follows an Insert() loop, like the one found in init_spill_mask(). Using
  // Insert() instead works because the index into _A in computed instead of
  // constant.  See bug 4665841.
  void set_AllStack() { Insert(OptoReg::Name(CHUNK_SIZE-1)); }

  // Test for being a not-empty mask.
  int is_NotEmpty( ) const {
    int tmp = 0;
#   define BODY(I) tmp |= _A[I];
    FORALL_BODY
#   undef BODY
    return tmp;
  }

  // Find lowest-numbered register from mask, or BAD if mask is empty.
  OptoReg::Name find_first_elem() const {
    int base, bits;
#   define BODY(I) if( (bits = _A[I]) != 0 ) base = I<<_LogWordBits; else
    FORALL_BODY
#   undef BODY
      { base = OptoReg::Bad; bits = 1<<0; }
    return OptoReg::Name(base + find_lowest_bit(bits));
  }
  // Get highest-numbered register from mask, or BAD if mask is empty.
  OptoReg::Name find_last_elem() const {
    int base, bits;
#   define BODY(I) if( (bits = _A[RM_SIZE-1-I]) != 0 ) base = (RM_SIZE-1-I)<<_LogWordBits; else
    FORALL_BODY
#   undef BODY
      { base = OptoReg::Bad; bits = 1<<0; }
    return OptoReg::Name(base + find_hihghest_bit(bits));
  }

  // Find the lowest-numbered register pair in the mask.  Return the
  // HIGHEST register number in the pair, or BAD if no pairs.
  // Assert that the mask contains only bit pairs.
  OptoReg::Name find_first_pair() const;

  // Clear out partial bits; leave only aligned adjacent bit pairs.
  void clear_to_pairs();
  // Smear out partial bits; leave only aligned adjacent bit pairs.
  void smear_to_pairs();
  // Verify that the mask contains only aligned adjacent bit pairs
  void verify_pairs() const { assert( is_aligned_pairs(), "mask is not aligned, adjacent pairs" ); }
  // Test that the mask contains only aligned adjacent bit pairs
  bool is_aligned_pairs() const;

  // mask is a pair of misaligned registers
  bool is_misaligned_pair() const { return Size()==2 && !is_aligned_pairs(); }
  // Test for single register
  int is_bound1() const;
  // Test for a single adjacent pair
  int is_bound_pair() const;
  // Test for a single adjacent set of ideal register's size.
  int is_bound(uint ireg) const {
    if (is_vector(ireg)) {
      if (is_bound_set(num_registers(ireg)))
        return true;
    } else if (is_bound1() || is_bound_pair()) {
      return true;
    }
    return false;
  }

  // Find the lowest-numbered register set in the mask.  Return the
  // HIGHEST register number in the set, or BAD if no sets.
  // Assert that the mask contains only bit sets.
  OptoReg::Name find_first_set(const int size) const;

  // Clear out partial bits; leave only aligned adjacent bit sets of size.
  void clear_to_sets(const int size);
  // Smear out partial bits to aligned adjacent bit sets.
  void smear_to_sets(const int size);
  // Verify that the mask contains only aligned adjacent bit sets
  void verify_sets(int size) const { assert(is_aligned_sets(size), "mask is not aligned, adjacent sets"); }
  // Test that the mask contains only aligned adjacent bit sets
  bool is_aligned_sets(const int size) const;

  // mask is a set of misaligned registers
  bool is_misaligned_set(int size) const { return (int)Size()==size && !is_aligned_sets(size);}

  // Test for a single adjacent set
  int is_bound_set(const int size) const;

  static bool is_vector(uint ireg);
  static int num_registers(uint ireg);

  // Fast overlap test.  Non-zero if any registers in common.
  int overlap( const RegMask &rm ) const {
    return
#   define BODY(I) (_A[I] & rm._A[I]) |
    FORALL_BODY
#   undef BODY
    0 ;
  }

  // Special test for register pressure based splitting
  // UP means register only, Register plus stack, or stack only is DOWN
  bool is_UP() const;

  // Clear a register mask
  void Clear( ) {
#   define BODY(I) _A[I] = 0;
    FORALL_BODY
#   undef BODY
  }

  // Fill a register mask with 1's
  void Set_All( ) {
#   define BODY(I) _A[I] = -1;
    FORALL_BODY
#   undef BODY
  }

  // Insert register into mask
  void Insert( OptoReg::Name reg ) {
    assert( reg < CHUNK_SIZE, "" );
    _A[reg>>_LogWordBits] |= (1<<(reg&(_WordBits-1)));
  }

  // Remove register from mask
  void Remove( OptoReg::Name reg ) {
    assert( reg < CHUNK_SIZE, "" );
    _A[reg>>_LogWordBits] &= ~(1<<(reg&(_WordBits-1)));
  }

  // OR 'rm' into 'this'
  void OR( const RegMask &rm ) {
#   define BODY(I) this->_A[I] |= rm._A[I];
    FORALL_BODY
#   undef BODY
  }

  // AND 'rm' into 'this'
  void AND( const RegMask &rm ) {
#   define BODY(I) this->_A[I] &= rm._A[I];
    FORALL_BODY
#   undef BODY
  }

  // Subtract 'rm' from 'this'
  void SUBTRACT( const RegMask &rm ) {
#   define BODY(I) _A[I] &= ~rm._A[I];
    FORALL_BODY
#   undef BODY
  }

  // Compute size of register mask: number of bits
  uint Size() const;

#ifndef PRODUCT
  void print() const { dump(); }
  void dump(outputStream *st = tty) const; // Print a mask
#endif

  static const RegMask Empty;   // Common empty mask

  static bool can_represent(OptoReg::Name reg) {
    // NOTE: -1 in computation reflects the usage of the last
    //       bit of the regmask as an infinite stack flag and
    //       -7 is to keep mask aligned for largest value (VecY).
    return (int)reg < (int)(CHUNK_SIZE-1);
  }
  static bool can_represent_arg(OptoReg::Name reg) {
    // NOTE: -SlotsPerVecY in computation reflects the need
    //       to keep mask aligned for largest value (VecY).
    return (int)reg < (int)(CHUNK_SIZE-SlotsPerVecY);
  }
};

// Do not use this constant directly in client code!
#undef RM_SIZE

#endif // SHARE_VM_OPTO_REGMASK_HPP
C:\hotspot-69087d08d473\src\share\vm/opto/reg_split.cpp
/*
 * Copyright (c) 2000, 2018, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 *
 */

#include "precompiled.hpp"
#include "libadt/vectset.hpp"
#include "memory/allocation.inline.hpp"
#include "opto/addnode.hpp"
#include "opto/c2compiler.hpp"
#include "opto/callnode.hpp"
#include "opto/cfgnode.hpp"
#include "opto/chaitin.hpp"
#include "opto/loopnode.hpp"
#include "opto/machnode.hpp"

//------------------------------Split--------------------------------------
// Walk the graph in RPO and for each lrg which spills, propagate reaching
// definitions.  During propagation, split the live range around regions of
// High Register Pressure (HRP).  If a Def is in a region of Low Register
// Pressure (LRP), it will not get spilled until we encounter a region of
// HRP between it and one of its uses.  We will spill at the transition
// point between LRP and HRP.  Uses in the HRP region will use the spilled
// Def.  The first Use outside the HRP region will generate a SpillCopy to
// hoist the live range back up into a register, and all subsequent uses
// will use that new Def until another HRP region is encountered.  Defs in
// HRP regions will get trailing SpillCopies to push the LRG down into the
// stack immediately.
//
// As a side effect, unlink from (hence make dead) coalesced copies.
//

static const char out_of_nodes[] = "out of nodes during split";

//------------------------------get_spillcopy_wide-----------------------------
// Get a SpillCopy node with wide-enough masks.  Use the 'wide-mask', the
// wide ideal-register spill-mask if possible.  If the 'wide-mask' does
// not cover the input (or output), use the input (or output) mask instead.
Node *PhaseChaitin::get_spillcopy_wide( Node *def, Node *use, uint uidx ) {
  // If ideal reg doesn't exist we've got a bad schedule happening
  // that is forcing us to spill something that isn't spillable.
  // Bail rather than abort
  uint ireg = def->ideal_reg();
  if( ireg == 0 || ireg == Op_RegFlags ) {
    assert(false, "attempted to spill a non-spillable item");
    C->record_method_not_compilable("attempted to spill a non-spillable item");
    return NULL;
  }
  if (C->check_node_count(NodeLimitFudgeFactor, out_of_nodes)) {
    return NULL;
  }
  const RegMask *i_mask = &def->out_RegMask();
  const RegMask *w_mask = C->matcher()->idealreg2spillmask[ireg];
  const RegMask *o_mask = use ? &use->in_RegMask(uidx) : w_mask;
  const RegMask *w_i_mask = w_mask->overlap( *i_mask ) ? w_mask : i_mask;
  const RegMask *w_o_mask;

  int num_regs = RegMask::num_registers(ireg);
  bool is_vect = RegMask::is_vector(ireg);
  if( w_mask->overlap( *o_mask ) && // Overlap AND
      ((num_regs == 1) // Single use or aligned
        ||  is_vect    // or vector
        || !is_vect && o_mask->is_aligned_pairs()) ) {
    assert(!is_vect || o_mask->is_aligned_sets(num_regs), "vectors are aligned");
    // Don't come here for mis-aligned doubles
    w_o_mask = w_mask;
  } else {                      // wide ideal mask does not overlap with o_mask
    // Mis-aligned doubles come here and XMM->FPR moves on x86.
    w_o_mask = o_mask;          // Must target desired registers
    // Does the ideal-reg-mask overlap with o_mask?  I.e., can I use
    // a reg-reg move or do I need a trip across register classes
    // (and thus through memory)?
    if( !C->matcher()->idealreg2regmask[ireg]->overlap( *o_mask) && o_mask->is_UP() )
      // Here we assume a trip through memory is required.
      w_i_mask = &C->FIRST_STACK_mask();
  }
  return new (C) MachSpillCopyNode( def, *w_i_mask, *w_o_mask );
}

//------------------------------insert_proj------------------------------------
// Insert the spill at chosen location.  Skip over any intervening Proj's or
// Phis.  Skip over a CatchNode and projs, inserting in the fall-through block
// instead.  Update high-pressure indices.  Create a new live range.
void PhaseChaitin::insert_proj( Block *b, uint i, Node *spill, uint maxlrg ) {
  // Skip intervening ProjNodes.  Do not insert between a ProjNode and
  // its definer.
  while( i < b->number_of_nodes() &&
         (b->get_node(i)->is_Proj() ||
          b->get_node(i)->is_Phi() ) )
    i++;

  // Do not insert between a call and his Catch
  if( b->get_node(i)->is_Catch() ) {
    // Put the instruction at the top of the fall-thru block.
    // Find the fall-thru projection
    while( 1 ) {
      const CatchProjNode *cp = b->get_node(++i)->as_CatchProj();
      if( cp->_con == CatchProjNode::fall_through_index )
        break;
    }
    int sidx = i - b->end_idx()-1;
    b = b->_succs[sidx];        // Switch to successor block
    i = 1;                      // Right at start of block
  }

  b->insert_node(spill, i);    // Insert node in block
  _cfg.map_node_to_block(spill,  b); // Update node->block mapping to reflect
  // Adjust the point where we go hi-pressure
  if( i <= b->_ihrp_index ) b->_ihrp_index++;
  if( i <= b->_fhrp_index ) b->_fhrp_index++;

  // Assign a new Live Range Number to the SpillCopy and grow
  // the node->live range mapping.
  new_lrg(spill,maxlrg);
}

//------------------------------split_DEF--------------------------------------
// There are four categories of Split; UP/DOWN x DEF/USE
// Only three of these really occur as DOWN/USE will always color
// Any Split with a DEF cannot CISC-Spill now.  Thus we need
// two helper routines, one for Split DEFS (insert after instruction),
// one for Split USES (insert before instruction).  DEF insertion
// happens inside Split, where the Leaveblock array is updated.
uint PhaseChaitin::split_DEF( Node *def, Block *b, int loc, uint maxlrg, Node **Reachblock, Node **debug_defs, GrowableArray<uint> splits, int slidx ) {
#ifdef ASSERT
  // Increment the counter for this lrg
  splits.at_put(slidx, splits.at(slidx)+1);
#endif
  // If we are spilling the memory op for an implicit null check, at the
  // null check location (ie - null check is in HRP block) we need to do
  // the null-check first, then spill-down in the following block.
  // (The implicit_null_check function ensures the use is also dominated
  // by the branch-not-taken block.)
  Node *be = b->end();
  if( be->is_MachNullCheck() && be->in(1) == def && def == b->get_node(loc)) {
    // Spill goes in the branch-not-taken block
    b = b->_succs[b->get_node(b->end_idx()+1)->Opcode() == Op_IfTrue];
    loc = 0;                    // Just past the Region
  }
  assert( loc >= 0, "must insert past block head" );

  // Get a def-side SpillCopy
  Node *spill = get_spillcopy_wide(def,NULL,0);
  // Did we fail to split?, then bail
  if (!spill) {
    return 0;
  }

  // Insert the spill at chosen location
  insert_proj( b, loc+1, spill, maxlrg++);

  // Insert new node into Reaches array
  Reachblock[slidx] = spill;
  // Update debug list of reaching down definitions by adding this one
  debug_defs[slidx] = spill;

  // return updated count of live ranges
  return maxlrg;
}

//------------------------------split_USE--------------------------------------
// Splits at uses can involve redeffing the LRG, so no CISC Spilling there.
// Debug uses want to know if def is already stack enabled.
uint PhaseChaitin::split_USE( Node *def, Block *b, Node *use, uint useidx, uint maxlrg, bool def_down, bool cisc_sp, GrowableArray<uint> splits, int slidx ) {
#ifdef ASSERT
  // Increment the counter for this lrg
  splits.at_put(slidx, splits.at(slidx)+1);
#endif

  // Some setup stuff for handling debug node uses
  JVMState* jvms = use->jvms();
  uint debug_start = jvms ? jvms->debug_start() : 999999;
  uint debug_end   = jvms ? jvms->debug_end()   : 999999;

  //-------------------------------------------
  // Check for use of debug info
  if (useidx >= debug_start && useidx < debug_end) {
    // Actually it's perfectly legal for constant debug info to appear
    // just unlikely.  In this case the optimizer left a ConI of a 4
    // as both inputs to a Phi with only a debug use.  It's a single-def
    // live range of a rematerializable value.  The live range spills,
    // rematerializes and now the ConI directly feeds into the debug info.
    // assert(!def->is_Con(), "constant debug info already constructed directly");

    // Special split handling for Debug Info
    // If DEF is DOWN, just hook the edge and return
    // If DEF is UP, Split it DOWN for this USE.
    if( def->is_Mach() ) {
      if( def_down ) {
        // DEF is DOWN, so connect USE directly to the DEF
        use->set_req(useidx, def);
      } else {
        // Block and index where the use occurs.
        Block *b = _cfg.get_block_for_node(use);
        // Put the clone just prior to use
        int bindex = b->find_node(use);
        // DEF is UP, so must copy it DOWN and hook in USE
        // Insert SpillCopy before the USE, which uses DEF as its input,
        // and defs a new live range, which is used by this node.
        Node *spill = get_spillcopy_wide(def,use,useidx);
        // did we fail to split?
        if (!spill) {
          // Bail
          return 0;
        }
        // insert into basic block
        insert_proj( b, bindex, spill, maxlrg++ );
        // Use the new split
        use->set_req(useidx,spill);
      }
      // No further split handling needed for this use
      return maxlrg;
    }  // End special splitting for debug info live range
  }  // If debug info

  // CISC-SPILLING
  // Finally, check to see if USE is CISC-Spillable, and if so,
  // gather_lrg_masks will add the flags bit to its mask, and
  // no use side copy is needed.  This frees up the live range
  // register choices without causing copy coalescing, etc.
  if( UseCISCSpill && cisc_sp ) {
    int inp = use->cisc_operand();
    if( inp != AdlcVMDeps::Not_cisc_spillable )
      // Convert operand number to edge index number
      inp = use->as_Mach()->operand_index(inp);
    if( inp == (int)useidx ) {
      use->set_req(useidx, def);
#ifndef PRODUCT
      if( TraceCISCSpill ) {
        tty->print("  set_split: ");
        use->dump();
      }
#endif
      return maxlrg;
    }
  }

  //-------------------------------------------
  // Insert a Copy before the use

  // Block and index where the use occurs.
  int bindex;
  // Phi input spill-copys belong at the end of the prior block
  if( use->is_Phi() ) {
    b = _cfg.get_block_for_node(b->pred(useidx));
    bindex = b->end_idx();
  } else {
    // Put the clone just prior to use
    bindex = b->find_node(use);
  }

  Node *spill = get_spillcopy_wide( def, use, useidx );
  if( !spill ) return 0;        // Bailed out
  // Insert SpillCopy before the USE, which uses the reaching DEF as
  // its input, and defs a new live range, which is used by this node.
  insert_proj( b, bindex, spill, maxlrg++ );
  // Use the spill/clone
  use->set_req(useidx,spill);

  // return updated live range count
  return maxlrg;
}

//------------------------------clone_node----------------------------
// Clone node with anti dependence check.
Node* clone_node(Node* def, Block *b, Compile* C) {
  if (def->needs_anti_dependence_check()) {
#ifdef ASSERT
    if (Verbose) {
      tty->print_cr("RA attempts to clone node with anti_dependence:");
      def->dump(-1); tty->cr();
      tty->print_cr("into block:");
      b->dump();
    }
#endif
    if (C->subsume_loads() == true && !C->failing()) {
      // Retry with subsume_loads == false
      // If this is the first failure, the sentinel string will "stick"
      // to the Compile object, and the C2Compiler will see it and retry.
      C->record_failure(C2Compiler::retry_no_subsuming_loads());
    } else {
      // Bailout without retry
      C->record_method_not_compilable("RA Split failed: attempt to clone node with anti_dependence");
    }
    return 0;
  }
  return def->clone();
}

//------------------------------split_Rematerialize----------------------------
// Clone a local copy of the def.
Node *PhaseChaitin::split_Rematerialize( Node *def, Block *b, uint insidx, uint &maxlrg, GrowableArray<uint> splits, int slidx, uint *lrg2reach, Node **Reachblock, bool walkThru ) {
  // The input live ranges will be stretched to the site of the new
  // instruction.  They might be stretched past a def and will thus
  // have the old and new values of the same live range alive at the
  // same time - a definite no-no.  Split out private copies of
  // the inputs.
  if( def->req() > 1 ) {
    for( uint i = 1; i < def->req(); i++ ) {
      Node *in = def->in(i);
      uint lidx = _lrg_map.live_range_id(in);
      // We do not need this for live ranges that are only defined once.
      // However, this is not true for spill copies that are added in this
      // Split() pass, since they might get coalesced later on in this pass.
      if (lidx < _lrg_map.max_lrg_id() && lrgs(lidx).is_singledef()) {
        continue;
      }

      Block *b_def = _cfg.get_block_for_node(def);
      int idx_def = b_def->find_node(def);
      Node *in_spill = get_spillcopy_wide( in, def, i );
      if( !in_spill ) return 0; // Bailed out
      insert_proj(b_def,idx_def,in_spill,maxlrg++);
      if( b_def == b )
        insidx++;
      def->set_req(i,in_spill);
    }
  }

  Node *spill = clone_node(def, b, C);
  if (spill == NULL || C->check_node_count(NodeLimitFudgeFactor, out_of_nodes)) {
    // Check when generating nodes
    return 0;
  }

  // See if any inputs are currently being spilled, and take the
  // latest copy of spilled inputs.
  if( spill->req() > 1 ) {
    for( uint i = 1; i < spill->req(); i++ ) {
      Node *in = spill->in(i);
      uint lidx = _lrg_map.find_id(in);

      // Walk backwards thru spill copy node intermediates
      if (walkThru) {
        while (in->is_SpillCopy() && lidx >= _lrg_map.max_lrg_id()) {
          in = in->in(1);
          lidx = _lrg_map.find_id(in);
        }

        if (lidx < _lrg_map.max_lrg_id() && lrgs(lidx).is_multidef()) {
          // walkThru found a multidef LRG, which is unsafe to use, so
          // just keep the original def used in the clone.
          in = spill->in(i);
          lidx = _lrg_map.find_id(in);
        }
      }

      if (lidx < _lrg_map.max_lrg_id() && lrgs(lidx).reg() >= LRG::SPILL_REG) {
        Node *rdef = Reachblock[lrg2reach[lidx]];
        if (rdef) {
          spill->set_req(i, rdef);
        }
      }
    }
  }


  assert( spill->out_RegMask().is_UP(), "rematerialize to a reg" );
  // Rematerialized op is def->spilled+1
  set_was_spilled(spill);
  if( _spilled_once.test(def->_idx) )
    set_was_spilled(spill);

  insert_proj( b, insidx, spill, maxlrg++ );
#ifdef ASSERT
  // Increment the counter for this lrg
  splits.at_put(slidx, splits.at(slidx)+1);
#endif
  // See if the cloned def kills any flags, and copy those kills as well
  uint i = insidx+1;
  int found_projs = clone_projs( b, i, def, spill, maxlrg);
  if (found_projs > 0) {
    // Adjust the point where we go hi-pressure
    if (i <= b->_ihrp_index) {
      b->_ihrp_index += found_projs;
    }
    if (i <= b->_fhrp_index) {
      b->_fhrp_index += found_projs;
    }
  }

  return spill;
}

//------------------------------is_high_pressure-------------------------------
// Function to compute whether or not this live range is "high pressure"
// in this block - whether it spills eagerly or not.
bool PhaseChaitin::is_high_pressure( Block *b, LRG *lrg, uint insidx ) {
  if( lrg->_was_spilled1 ) return true;
  // Forced spilling due to conflict?  Then split only at binding uses
  // or defs, not for supposed capacity problems.
  // CNC - Turned off 7/8/99, causes too much spilling
  // if( lrg->_is_bound ) return false;

  // Use float pressure numbers for vectors.
  bool is_float_or_vector = lrg->_is_float || lrg->_is_vector;
  // Not yet reached the high-pressure cutoff point, so low pressure
  uint hrp_idx = is_float_or_vector ? b->_fhrp_index : b->_ihrp_index;
  if( insidx < hrp_idx ) return false;
  // Register pressure for the block as a whole depends on reg class
  int block_pres = is_float_or_vector ? b->_freg_pressure : b->_reg_pressure;
  // Bound live ranges will split at the binding points first;
  // Intermediate splits should assume the live range's register set
  // got "freed up" and that num_regs will become INT_PRESSURE.
  int bound_pres = is_float_or_vector ? FLOATPRESSURE : INTPRESSURE;
  // Effective register pressure limit.
  int lrg_pres = (lrg->get_invalid_mask_size() > lrg->num_regs())
    ? (lrg->get_invalid_mask_size() >> (lrg->num_regs()-1)) : bound_pres;
  // High pressure if block pressure requires more register freedom
  // than live range has.
  return block_pres >= lrg_pres;
}


//------------------------------prompt_use---------------------------------
// True if lidx is used before any real register is def'd in the block
bool PhaseChaitin::prompt_use( Block *b, uint lidx ) {
  if (lrgs(lidx)._was_spilled2) {
    return false;
  }

  // Scan block for 1st use.
  for( uint i = 1; i <= b->end_idx(); i++ ) {
    Node *n = b->get_node(i);
    // Ignore PHI use, these can be up or down
    if (n->is_Phi()) {
      continue;
    }
    for (uint j = 1; j < n->req(); j++) {
      if (_lrg_map.find_id(n->in(j)) == lidx) {
        return true;          // Found 1st use!
      }
    }
    if (n->out_RegMask().is_NotEmpty()) {
      return false;
    }
  }
  return false;
}

//------------------------------Split--------------------------------------
//----------Split Routine----------
// ***** NEW SPLITTING HEURISTIC *****
// DEFS: If the DEF is in a High Register Pressure(HRP) Block, split there.
//        Else, no split unless there is a HRP block between a DEF and
//        one of its uses, and then split at the HRP block.
//
// USES: If USE is in HRP, split at use to leave main LRG on stack.
//       Else, hoist LRG back up to register only (ie - split is also DEF)
// We will compute a new maxlrg as we go
uint PhaseChaitin::Split(uint maxlrg, ResourceArea* split_arena) {
  NOT_PRODUCT( Compile::TracePhase t3("regAllocSplit", &_t_regAllocSplit, TimeCompiler); )

  // Free thread local resources used by this method on exit.
  ResourceMark rm(split_arena);

  uint                 bidx, pidx, slidx, insidx, inpidx, twoidx;
  uint                 non_phi = 1, spill_cnt = 0;
  Node                *n1, *n2, *n3;
  Node_List           *defs,*phis;
  bool                *UPblock;
  bool                 u1, u2, u3;
  Block               *b, *pred;
  PhiNode             *phi;
  GrowableArray<uint>  lidxs(split_arena, maxlrg, 0, 0);

  // Array of counters to count splits per live range
  GrowableArray<uint>  splits(split_arena, maxlrg, 0, 0);

#define NEW_SPLIT_ARRAY(type, size)\
  (type*) split_arena->allocate_bytes((size) * sizeof(type))

  //----------Setup Code----------
  // Create a convenient mapping from lrg numbers to reaches/leaves indices
  uint *lrg2reach = NEW_SPLIT_ARRAY(uint, maxlrg);
  // Keep track of DEFS & Phis for later passes
  defs = new Node_List();
  phis = new Node_List();
  // Gather info on which LRG's are spilling, and build maps
  for (bidx = 1; bidx < maxlrg; bidx++) {
    if (lrgs(bidx).alive() && lrgs(bidx).reg() >= LRG::SPILL_REG) {
      assert(!lrgs(bidx).mask().is_AllStack(),"AllStack should color");
      lrg2reach[bidx] = spill_cnt;
      spill_cnt++;
      lidxs.append(bidx);
#ifdef ASSERT
      // Initialize the split counts to zero
      splits.append(0);
#endif
#ifndef PRODUCT
      if( PrintOpto && WizardMode && lrgs(bidx)._was_spilled1 )
        tty->print_cr("Warning, 2nd spill of L%d",bidx);
#endif
    }
  }

  // Create side arrays for propagating reaching defs info.
  // Each block needs a node pointer for each spilling live range for the
  // Def which is live into the block.  Phi nodes handle multiple input
  // Defs by querying the output of their predecessor blocks and resolving
  // them to a single Def at the phi.  The pointer is updated for each
  // Def in the block, and then becomes the output for the block when
  // processing of the block is complete.  We also need to track whether
  // a Def is UP or DOWN.  UP means that it should get a register (ie -
  // it is always in LRP regions), and DOWN means that it is probably
  // on the stack (ie - it crosses HRP regions).
  Node ***Reaches     = NEW_SPLIT_ARRAY( Node**, _cfg.number_of_blocks() + 1);
  bool  **UP          = NEW_SPLIT_ARRAY( bool*, _cfg.number_of_blocks() + 1);
  Node  **debug_defs  = NEW_SPLIT_ARRAY( Node*, spill_cnt );
  VectorSet **UP_entry= NEW_SPLIT_ARRAY( VectorSet*, spill_cnt );

  // Initialize Reaches & UP
  for (bidx = 0; bidx < _cfg.number_of_blocks() + 1; bidx++) {
    Reaches[bidx]     = NEW_SPLIT_ARRAY( Node*, spill_cnt );
    UP[bidx]          = NEW_SPLIT_ARRAY( bool, spill_cnt );
    Node **Reachblock = Reaches[bidx];
    bool *UPblock     = UP[bidx];
    for( slidx = 0; slidx < spill_cnt; slidx++ ) {
      UPblock[slidx] = true;     // Assume they start in registers
      Reachblock[slidx] = NULL;  // Assume that no def is present
    }
  }

#undef NEW_SPLIT_ARRAY

  // Initialize to array of empty vectorsets
  for( slidx = 0; slidx < spill_cnt; slidx++ )
    UP_entry[slidx] = new VectorSet(split_arena);

  //----------PASS 1----------
  //----------Propagation & Node Insertion Code----------
  // Walk the Blocks in RPO for DEF & USE info
  for( bidx = 0; bidx < _cfg.number_of_blocks(); bidx++ ) {

    if (C->check_node_count(spill_cnt, out_of_nodes)) {
      return 0;
    }

    b  = _cfg.get_block(bidx);
    // Reaches & UP arrays for this block
    Node** Reachblock = Reaches[b->_pre_order];
    UPblock    = UP[b->_pre_order];
    // Reset counter of start of non-Phi nodes in block
    non_phi = 1;
    //----------Block Entry Handling----------
    // Check for need to insert a new phi
    // Cycle through this block's predecessors, collecting Reaches
    // info for each spilled LRG.  If they are identical, no phi is
    // needed.  If they differ, check for a phi, and insert if missing,
    // or update edges if present.  Set current block's Reaches set to
    // be either the phi's or the reaching def, as appropriate.
    // If no Phi is needed, check if the LRG needs to spill on entry
    // to the block due to HRP.
    for( slidx = 0; slidx < spill_cnt; slidx++ ) {
      // Grab the live range number
      uint lidx = lidxs.at(slidx);
      // Do not bother splitting or putting in Phis for single-def
      // rematerialized live ranges.  This happens alot to constants
      // with long live ranges.
      if( lrgs(lidx).is_singledef() &&
          lrgs(lidx)._def->rematerialize() ) {
        // reset the Reaches & UP entries
        Reachblock[slidx] = lrgs(lidx)._def;
        UPblock[slidx] = true;
        // Record following instruction in case 'n' rematerializes and
        // kills flags
        Block *pred1 = _cfg.get_block_for_node(b->pred(1));
        continue;
      }

      // Initialize needs_phi and needs_split
      bool needs_phi = false;
      bool needs_split = false;
      bool has_phi = false;
      // Walk the predecessor blocks to check inputs for that live range
      // Grab predecessor block header
      n1 = b->pred(1);
      // Grab the appropriate reaching def info for inpidx
      pred = _cfg.get_block_for_node(n1);
      pidx = pred->_pre_order;
      Node **Ltmp = Reaches[pidx];
      bool  *Utmp = UP[pidx];
      n1 = Ltmp[slidx];
      u1 = Utmp[slidx];
      // Initialize node for saving type info
      n3 = n1;
      u3 = u1;

      // Compare inputs to see if a Phi is needed
      for( inpidx = 2; inpidx < b->num_preds(); inpidx++ ) {
        // Grab predecessor block headers
        n2 = b->pred(inpidx);
        // Grab the appropriate reaching def info for inpidx
        pred = _cfg.get_block_for_node(n2);
        pidx = pred->_pre_order;
        Ltmp = Reaches[pidx];
        Utmp = UP[pidx];
        n2 = Ltmp[slidx];
        u2 = Utmp[slidx];
        // For each LRG, decide if a phi is necessary
        if( n1 != n2 ) {
          needs_phi = true;
        }
        // See if the phi has mismatched inputs, UP vs. DOWN
        if( n1 && n2 && (u1 != u2) ) {
          needs_split = true;
        }
        // Move n2/u2 to n1/u1 for next iteration
        n1 = n2;
        u1 = u2;
        // Preserve a non-NULL predecessor for later type referencing
        if( (n3 == NULL) && (n2 != NULL) ){
          n3 = n2;
          u3 = u2;
        }
      }  // End for all potential Phi inputs

      // check block for appropriate phinode & update edges
      for( insidx = 1; insidx <= b->end_idx(); insidx++ ) {
        n1 = b->get_node(insidx);
        // bail if this is not a phi
        phi = n1->is_Phi() ? n1->as_Phi() : NULL;
        if( phi == NULL ) {
          // Keep track of index of first non-PhiNode instruction in block
          non_phi = insidx;
          // break out of the for loop as we have handled all phi nodes
          break;
        }
        // must be looking at a phi
        if (_lrg_map.find_id(n1) == lidxs.at(slidx)) {
          // found the necessary phi
          needs_phi = false;
          has_phi = true;
          // initialize the Reaches entry for this LRG
          Reachblock[slidx] = phi;
          break;
        }  // end if found correct phi
      }  // end for all phi's

      // If a phi is needed or exist, check for it
      if( needs_phi || has_phi ) {
        // add new phinode if one not already found
        if( needs_phi ) {
          // create a new phi node and insert it into the block
          // type is taken from left over pointer to a predecessor
          assert(n3,"No non-NULL reaching DEF for a Phi");
          phi = new (C) PhiNode(b->head(), n3->bottom_type());
          // initialize the Reaches entry for this LRG
          Reachblock[slidx] = phi;

          // add node to block & node_to_block mapping
          insert_proj(b, insidx++, phi, maxlrg++);
          non_phi++;
          // Reset new phi's mapping to be the spilling live range
          _lrg_map.map(phi->_idx, lidx);
          assert(_lrg_map.find_id(phi) == lidx, "Bad update on Union-Find mapping");
        }  // end if not found correct phi
        // Here you have either found or created the Phi, so record it
        assert(phi != NULL,"Must have a Phi Node here");
        phis->push(phi);
        // PhiNodes should either force the LRG UP or DOWN depending
        // on its inputs and the register pressure in the Phi's block.
        UPblock[slidx] = true;  // Assume new DEF is UP
        // If entering a high-pressure area with no immediate use,
        // assume Phi is DOWN
        if( is_high_pressure( b, &lrgs(lidx), b->end_idx()) && !prompt_use(b,lidx) )
          UPblock[slidx] = false;
        // If we are not split up/down and all inputs are down, then we
        // are down
        if( !needs_split && !u3 )
          UPblock[slidx] = false;
      }  // end if phi is needed

      // Do not need a phi, so grab the reaching DEF
      else {
        // Grab predecessor block header
        n1 = b->pred(1);
        // Grab the appropriate reaching def info for k
        pred = _cfg.get_block_for_node(n1);
        pidx = pred->_pre_order;
        Node **Ltmp = Reaches[pidx];
        bool  *Utmp = UP[pidx];
        // reset the Reaches & UP entries
        Reachblock[slidx] = Ltmp[slidx];
        UPblock[slidx] = Utmp[slidx];
      }  // end else no Phi is needed
    }  // end for all spilling live ranges
    // DEBUG
#ifndef PRODUCT
    if(trace_spilling()) {
      tty->print("/`\nBlock %d: ", b->_pre_order);
      tty->print("Reaching Definitions after Phi handling\n");
      for( uint x = 0; x < spill_cnt; x++ ) {
        tty->print("Spill Idx %d: UP %d: Node\n",x,UPblock[x]);
        if( Reachblock[x] )
          Reachblock[x]->dump();
        else
          tty->print("Undefined\n");
      }
    }
#endif

    //----------Non-Phi Node Splitting----------
    // Since phi-nodes have now been handled, the Reachblock array for this
    // block is initialized with the correct starting value for the defs which
    // reach non-phi instructions in this block.  Thus, process non-phi
    // instructions normally, inserting SpillCopy nodes for all spill
    // locations.

    // Memoize any DOWN reaching definitions for use as DEBUG info
    for( insidx = 0; insidx < spill_cnt; insidx++ ) {
      debug_defs[insidx] = (UPblock[insidx]) ? NULL : Reachblock[insidx];
      if( UPblock[insidx] )     // Memoize UP decision at block start
        UP_entry[insidx]->set( b->_pre_order );
    }

    //----------Walk Instructions in the Block and Split----------
    // For all non-phi instructions in the block
    for( insidx = 1; insidx <= b->end_idx(); insidx++ ) {
      Node *n = b->get_node(insidx);
      // Find the defining Node's live range index
      uint defidx = _lrg_map.find_id(n);
      uint cnt = n->req();

      if (n->is_Phi()) {
        // Skip phi nodes after removing dead copies.
        if (defidx < _lrg_map.max_lrg_id()) {
          // Check for useless Phis.  These appear if we spill, then
          // coalesce away copies.  Dont touch Phis in spilling live
          // ranges; they are busy getting modifed in this pass.
          if( lrgs(defidx).reg() < LRG::SPILL_REG ) {
            uint i;
            Node *u = NULL;
            // Look for the Phi merging 2 unique inputs
            for( i = 1; i < cnt; i++ ) {
              // Ignore repeats and self
              if( n->in(i) != u && n->in(i) != n ) {
                // Found a unique input
                if( u != NULL ) // If it's the 2nd, bail out
                  break;
                u = n->in(i);   // Else record it
              }
            }
            assert( u, "at least 1 valid input expected" );
            if (i >= cnt) {    // Found one unique input
              assert(_lrg_map.find_id(n) == _lrg_map.find_id(u), "should be the same lrg");
              n->replace_by(u); // Then replace with unique input
              n->disconnect_inputs(NULL, C);
              b->remove_node(insidx);
              insidx--;
              b->_ihrp_index--;
              b->_fhrp_index--;
            }
          }
        }
        continue;
      }
      assert( insidx > b->_ihrp_index ||
              (b->_reg_pressure < (uint)INTPRESSURE) ||
              b->_ihrp_index > 4000000 ||
              b->_ihrp_index >= b->end_idx() ||
              !b->get_node(b->_ihrp_index)->is_Proj(), "" );
      assert( insidx > b->_fhrp_index ||
              (b->_freg_pressure < (uint)FLOATPRESSURE) ||
              b->_fhrp_index > 4000000 ||
              b->_fhrp_index >= b->end_idx() ||
              !b->get_node(b->_fhrp_index)->is_Proj(), "" );

      // ********** Handle Crossing HRP Boundry **********
      if( (insidx == b->_ihrp_index) || (insidx == b->_fhrp_index) ) {
        for( slidx = 0; slidx < spill_cnt; slidx++ ) {
          // Check for need to split at HRP boundary - split if UP
          n1 = Reachblock[slidx];
          // bail out if no reaching DEF
          if( n1 == NULL ) continue;
          // bail out if live range is 'isolated' around inner loop
          uint lidx = lidxs.at(slidx);
          // If live range is currently UP
          if( UPblock[slidx] ) {
            // set location to insert spills at
            // SPLIT DOWN HERE - NO CISC SPILL
            if( is_high_pressure( b, &lrgs(lidx), insidx ) &&
                !n1->rematerialize() ) {
              // If there is already a valid stack definition available, use it
              if( debug_defs[slidx] != NULL ) {
                Reachblock[slidx] = debug_defs[slidx];
              }
              else {
                // Insert point is just past last use or def in the block
                int insert_point = insidx-1;
                while( insert_point > 0 ) {
                  Node *n = b->get_node(insert_point);
                  // Hit top of block?  Quit going backwards
                  if (n->is_Phi()) {
                    break;
                  }
                  // Found a def?  Better split after it.
                  if (_lrg_map.live_range_id(n) == lidx) {
                    break;
                  }
                  // Look for a use
                  uint i;
                  for( i = 1; i < n->req(); i++ ) {
                    if (_lrg_map.live_range_id(n->in(i)) == lidx) {
                      break;
                    }
                  }
                  // Found a use?  Better split after it.
                  if (i < n->req()) {
                    break;
                  }
                  insert_point--;
                }
                uint orig_eidx = b->end_idx();
                maxlrg = split_DEF( n1, b, insert_point, maxlrg, Reachblock, debug_defs, splits, slidx);
                // If it wasn't split bail
                if (!maxlrg) {
                  return 0;
                }
                // Spill of NULL check mem op goes into the following block.
                if (b->end_idx() > orig_eidx) {
                  insidx++;
                }
              }
              // This is a new DEF, so update UP
              UPblock[slidx] = false;
#ifndef PRODUCT
              // DEBUG
              if( trace_spilling() ) {
                tty->print("\nNew Split DOWN DEF of Spill Idx ");
                tty->print("%d, UP %d:\n",slidx,false);
                n1->dump();
              }
#endif
            }
          }  // end if LRG is UP
        }  // end for all spilling live ranges
        assert( b->get_node(insidx) == n, "got insidx set incorrectly" );
      }  // end if crossing HRP Boundry

      // If the LRG index is oob, then this is a new spillcopy, skip it.
      if (defidx >= _lrg_map.max_lrg_id()) {
        continue;
      }
      LRG &deflrg = lrgs(defidx);
      uint copyidx = n->is_Copy();
      // Remove coalesced copy from CFG
      if (copyidx && defidx == _lrg_map.live_range_id(n->in(copyidx))) {
        n->replace_by( n->in(copyidx) );
        n->set_req( copyidx, NULL );
        b->remove_node(insidx--);
        b->_ihrp_index--; // Adjust the point where we go hi-pressure
        b->_fhrp_index--;
        continue;
      }

#define DERIVED 0

      // ********** Handle USES **********
      bool nullcheck = false;
      // Implicit null checks never use the spilled value
      if( n->is_MachNullCheck() )
        nullcheck = true;
      if( !nullcheck ) {
        // Search all inputs for a Spill-USE
        JVMState* jvms = n->jvms();
        uint oopoff = jvms ? jvms->oopoff() : cnt;
        uint old_last = cnt - 1;
        for( inpidx = 1; inpidx < cnt; inpidx++ ) {
          // Derived/base pairs may be added to our inputs during this loop.
          // If inpidx > old_last, then one of these new inputs is being
          // handled. Skip the derived part of the pair, but process
          // the base like any other input.
          if (inpidx > old_last && ((inpidx - oopoff) & 1) == DERIVED) {
            continue;  // skip derived_debug added below
          }
          // Get lidx of input
          uint useidx = _lrg_map.find_id(n->in(inpidx));
          // Not a brand-new split, and it is a spill use
          if (useidx < _lrg_map.max_lrg_id() && lrgs(useidx).reg() >= LRG::SPILL_REG) {
            // Check for valid reaching DEF
            slidx = lrg2reach[useidx];
            Node *def = Reachblock[slidx];
            assert( def != NULL, "Using Undefined Value in Split()\n");

            // (+++) %%%% remove this in favor of pre-pass in matcher.cpp
            // monitor references do not care where they live, so just hook
            if ( jvms && jvms->is_monitor_use(inpidx) ) {
              // The effect of this clone is to drop the node out of the block,
              // so that the allocator does not see it anymore, and therefore
              // does not attempt to assign it a register.
              def = clone_node(def, b, C);
              if (def == NULL || C->check_node_count(NodeLimitFudgeFactor, out_of_nodes)) {
                return 0;
              }
              _lrg_map.extend(def->_idx, 0);
              _cfg.map_node_to_block(def, b);
              n->set_req(inpidx, def);
              continue;
            }

            // Rematerializable?  Then clone def at use site instead
            // of store/load
            if( def->rematerialize() ) {
              int old_size = b->number_of_nodes();
              def = split_Rematerialize( def, b, insidx, maxlrg, splits, slidx, lrg2reach, Reachblock, true );
              if( !def ) return 0; // Bail out
              insidx += b->number_of_nodes()-old_size;
            }

            MachNode *mach = n->is_Mach() ? n->as_Mach() : NULL;
            // Base pointers and oopmap references do not care where they live.
            if ((inpidx >= oopoff) ||
                (mach && mach->ideal_Opcode() == Op_AddP && inpidx == AddPNode::Base)) {
              if (def->rematerialize() && lrgs(useidx)._was_spilled2) {
                // This def has been rematerialized a couple of times without
                // progress. It doesn't care if it lives UP or DOWN, so
                // spill it down now.
                maxlrg = split_USE(def,b,n,inpidx,maxlrg,false,false,splits,slidx);
                // If it wasn't split bail
                if (!maxlrg) {
                  return 0;
                }
                insidx++;  // Reset iterator to skip USE side split
              } else {
                // Just hook the def edge
                n->set_req(inpidx, def);
              }

              if (inpidx >= oopoff) {
                // After oopoff, we have derived/base pairs.  We must mention all
                // derived pointers here as derived/base pairs for GC.  If the
                // derived value is spilling and we have a copy both in Reachblock
                // (called here 'def') and debug_defs[slidx] we need to mention
                // both in derived/base pairs or kill one.
                Node *derived_debug = debug_defs[slidx];
                if( ((inpidx - oopoff) & 1) == DERIVED && // derived vs base?
                    mach && mach->ideal_Opcode() != Op_Halt &&
                    derived_debug != NULL &&
                    derived_debug != def ) { // Actual 2nd value appears
                  // We have already set 'def' as a derived value.
                  // Also set debug_defs[slidx] as a derived value.
                  uint k;
                  for( k = oopoff; k < cnt; k += 2 )
                    if( n->in(k) == derived_debug )
                      break;      // Found an instance of debug derived
                  if( k == cnt ) {// No instance of debug_defs[slidx]
                    // Add a derived/base pair to cover the debug info.
                    // We have to process the added base later since it is not
                    // handled yet at this point but skip derived part.
                    assert(((n->req() - oopoff) & 1) == DERIVED,
                           "must match skip condition above");
                    n->add_req( derived_debug );   // this will be skipped above
                    n->add_req( n->in(inpidx+1) ); // this will be processed
                    // Increment cnt to handle added input edges on
                    // subsequent iterations.
                    cnt += 2;
                  }
                }
              }
              continue;
            }
            // Special logic for DEBUG info
            if( jvms && b->_freq > BLOCK_FREQUENCY(0.5) ) {
              uint debug_start = jvms->debug_start();
              // If this is debug info use & there is a reaching DOWN def
              if ((debug_start <= inpidx) && (debug_defs[slidx] != NULL)) {
                assert(inpidx < oopoff, "handle only debug info here");
                // Just hook it in & move on
                n->set_req(inpidx, debug_defs[slidx]);
                // (Note that this can make two sides of a split live at the
                // same time: The debug def on stack, and another def in a
                // register.  The GC needs to know about both of them, but any
                // derived pointers after oopoff will refer to only one of the
                // two defs and the GC would therefore miss the other.  Thus
                // this hack is only allowed for debug info which is Java state
                // and therefore never a derived pointer.)
                continue;
              }
            }
            // Grab register mask info
            const RegMask &dmask = def->out_RegMask();
            const RegMask &umask = n->in_RegMask(inpidx);
            bool is_vect = RegMask::is_vector(def->ideal_reg());
            assert(inpidx < oopoff, "cannot use-split oop map info");

            bool dup = UPblock[slidx];
            bool uup = umask.is_UP();

            // Need special logic to handle bound USES. Insert a split at this
            // bound use if we can't rematerialize the def, or if we need the
            // split to form a misaligned pair.
            if( !umask.is_AllStack() &&
                (int)umask.Size() <= lrgs(useidx).num_regs() &&
                (!def->rematerialize() ||
                 !is_vect && umask.is_misaligned_pair())) {
              // These need a Split regardless of overlap or pressure
              // SPLIT - NO DEF - NO CISC SPILL
              maxlrg = split_USE(def,b,n,inpidx,maxlrg,dup,false, splits,slidx);
              // If it wasn't split bail
              if (!maxlrg) {
                return 0;
              }
              insidx++;  // Reset iterator to skip USE side split
              continue;
            }

            if (UseFPUForSpilling && n->is_MachCall() && !uup && !dup ) {
              // The use at the call can force the def down so insert
              // a split before the use to allow the def more freedom.
              maxlrg = split_USE(def,b,n,inpidx,maxlrg,dup,false, splits,slidx);
              // If it wasn't split bail
              if (!maxlrg) {
                return 0;
              }
              insidx++;  // Reset iterator to skip USE side split
              continue;
            }

            // Here is the logic chart which describes USE Splitting:
            // 0 = false or DOWN, 1 = true or UP
            //
            // Overlap | DEF | USE | Action
            //-------------------------------------------------------
            //    0    |  0  |  0  | Copy - mem -> mem
            //    0    |  0  |  1  | Split-UP - Check HRP
            //    0    |  1  |  0  | Split-DOWN - Debug Info?
            //    0    |  1  |  1  | Copy - reg -> reg
            //    1    |  0  |  0  | Reset Input Edge (no Split)
            //    1    |  0  |  1  | Split-UP - Check HRP
            //    1    |  1  |  0  | Split-DOWN - Debug Info?
            //    1    |  1  |  1  | Reset Input Edge (no Split)
            //
            // So, if (dup == uup), then overlap test determines action,
            // with true being no split, and false being copy. Else,
            // if DEF is DOWN, Split-UP, and check HRP to decide on
            // resetting DEF. Finally if DEF is UP, Split-DOWN, with
            // special handling for Debug Info.
            if( dup == uup ) {
              if( dmask.overlap(umask) ) {
                // Both are either up or down, and there is overlap, No Split
                n->set_req(inpidx, def);
              }
              else {  // Both are either up or down, and there is no overlap
                if( dup ) {  // If UP, reg->reg copy
                  // COPY ACROSS HERE - NO DEF - NO CISC SPILL
                  maxlrg = split_USE(def,b,n,inpidx,maxlrg,false,false, splits,slidx);
                  // If it wasn't split bail
                  if (!maxlrg) {
                    return 0;
                  }
                  insidx++;  // Reset iterator to skip USE side split
                }
                else {       // DOWN, mem->mem copy
                  // COPY UP & DOWN HERE - NO DEF - NO CISC SPILL
                  // First Split-UP to move value into Register
                  uint def_ideal = def->ideal_reg();
                  const RegMask* tmp_rm = Matcher::idealreg2regmask[def_ideal];
                  Node *spill = new (C) MachSpillCopyNode(def, dmask, *tmp_rm);
                  insert_proj( b, insidx, spill, maxlrg );
                  // Then Split-DOWN as if previous Split was DEF
                  maxlrg = split_USE(spill,b,n,inpidx,maxlrg,false,false, splits,slidx);
                  // If it wasn't split bail
                  if (!maxlrg) {
                    return 0;
                  }
                  insidx += 2;  // Reset iterator to skip USE side splits
                }
              }  // End else no overlap
            }  // End if dup == uup
            // dup != uup, so check dup for direction of Split
            else {
              if( dup ) {  // If UP, Split-DOWN and check Debug Info
                // If this node is already a SpillCopy, just patch the edge
                // except the case of spilling to stack.
                if( n->is_SpillCopy() ) {
                  RegMask tmp_rm(umask);
                  tmp_rm.SUBTRACT(Matcher::STACK_ONLY_mask);
                  if( dmask.overlap(tmp_rm) ) {
                    if( def != n->in(inpidx) ) {
                      n->set_req(inpidx, def);
                    }
                    continue;
                  }
                }
                // COPY DOWN HERE - NO DEF - NO CISC SPILL
                maxlrg = split_USE(def,b,n,inpidx,maxlrg,false,false, splits,slidx);
                // If it wasn't split bail
                if (!maxlrg) {
                  return 0;
                }
                insidx++;  // Reset iterator to skip USE side split
                // Check for debug-info split.  Capture it for later
                // debug splits of the same value
                if (jvms && jvms->debug_start() <= inpidx && inpidx < oopoff)
                  debug_defs[slidx] = n->in(inpidx);

              }
              else {       // DOWN, Split-UP and check register pressure
                if( is_high_pressure( b, &lrgs(useidx), insidx ) ) {
                  // COPY UP HERE - NO DEF - CISC SPILL
                  maxlrg = split_USE(def,b,n,inpidx,maxlrg,true,true, splits,slidx);
                  // If it wasn't split bail
                  if (!maxlrg) {
                    return 0;
                  }
                  insidx++;  // Reset iterator to skip USE side split
                } else {                          // LRP
                  // COPY UP HERE - WITH DEF - NO CISC SPILL
                  maxlrg = split_USE(def,b,n,inpidx,maxlrg,true,false, splits,slidx);
                  // If it wasn't split bail
                  if (!maxlrg) {
                    return 0;
                  }
                  // Flag this lift-up in a low-pressure block as
                  // already-spilled, so if it spills again it will
                  // spill hard (instead of not spilling hard and
                  // coalescing away).
                  set_was_spilled(n->in(inpidx));
                  // Since this is a new DEF, update Reachblock & UP
                  Reachblock[slidx] = n->in(inpidx);
                  UPblock[slidx] = true;
                  insidx++;  // Reset iterator to skip USE side split
                }
              }  // End else DOWN
            }  // End dup != uup
          }  // End if Spill USE
        }  // End For All Inputs
      }  // End If not nullcheck

      // ********** Handle DEFS **********
      // DEFS either Split DOWN in HRP regions or when the LRG is bound, or
      // just reset the Reaches info in LRP regions.  DEFS must always update
      // UP info.
      if( deflrg.reg() >= LRG::SPILL_REG ) {    // Spilled?
        uint slidx = lrg2reach[defidx];
        // Add to defs list for later assignment of new live range number
        defs->push(n);
        // Set a flag on the Node indicating it has already spilled.
        // Only do it for capacity spills not conflict spills.
        if( !deflrg._direct_conflict )
          set_was_spilled(n);
        assert(!n->is_Phi(),"Cannot insert Phi into DEFS list");
        // Grab UP info for DEF
        const RegMask &dmask = n->out_RegMask();
        bool defup = dmask.is_UP();
        uint ireg = n->ideal_reg();
        bool is_vect = RegMask::is_vector(ireg);
        // Only split at Def if this is a HRP block or bound (and spilled once)
        if( !n->rematerialize() &&
            (((dmask.is_bound(ireg) || !is_vect && dmask.is_misaligned_pair()) &&
              (deflrg._direct_conflict || deflrg._must_spill)) ||
             // Check for LRG being up in a register and we are inside a high
             // pressure area.  Spill it down immediately.
             (defup && is_high_pressure(b,&deflrg,insidx) && !n->is_SpillCopy())) ) {
          assert( !n->rematerialize(), "" );
          // Do a split at the def site.
          maxlrg = split_DEF( n, b, insidx, maxlrg, Reachblock, debug_defs, splits, slidx );
          // If it wasn't split bail
          if (!maxlrg) {
            return 0;
          }
          // Split DEF's Down
          UPblock[slidx] = 0;
#ifndef PRODUCT
          // DEBUG
          if( trace_spilling() ) {
            tty->print("\nNew Split DOWN DEF of Spill Idx ");
            tty->print("%d, UP %d:\n",slidx,false);
            n->dump();
          }
#endif
        }
        else {                  // Neither bound nor HRP, must be LRP
          // otherwise, just record the def
          Reachblock[slidx] = n;
          // UP should come from the outRegmask() of the DEF
          UPblock[slidx] = defup;
          // Update debug list of reaching down definitions, kill if DEF is UP
          debug_defs[slidx] = defup ? NULL : n;
#ifndef PRODUCT
          // DEBUG
          if( trace_spilling() ) {
            tty->print("\nNew DEF of Spill Idx ");
            tty->print("%d, UP %d:\n",slidx,defup);
            n->dump();
          }
#endif
        }  // End else LRP
      }  // End if spill def

      // ********** Split Left Over Mem-Mem Moves **********
      // Check for mem-mem copies and split them now.  Do not do this
      // to copies about to be spilled; they will be Split shortly.
      if (copyidx) {
        Node *use = n->in(copyidx);
        uint useidx = _lrg_map.find_id(use);
        if (useidx < _lrg_map.max_lrg_id() &&       // This is not a new split
            OptoReg::is_stack(deflrg.reg()) &&
            deflrg.reg() < LRG::SPILL_REG ) { // And DEF is from stack
          LRG &uselrg = lrgs(useidx);
          if( OptoReg::is_stack(uselrg.reg()) &&
              uselrg.reg() < LRG::SPILL_REG && // USE is from stack
              deflrg.reg() != uselrg.reg() ) { // Not trivially removed
            uint def_ideal_reg = n->bottom_type()->ideal_reg();
            const RegMask &def_rm = *Matcher::idealreg2regmask[def_ideal_reg];
            const RegMask &use_rm = n->in_RegMask(copyidx);
            if( def_rm.overlap(use_rm) && n->is_SpillCopy() ) {  // Bug 4707800, 'n' may be a storeSSL
              if (C->check_node_count(NodeLimitFudgeFactor, out_of_nodes)) {  // Check when generating nodes
                return 0;
              }
              Node *spill = new (C) MachSpillCopyNode(use,use_rm,def_rm);
              n->set_req(copyidx,spill);
              n->as_MachSpillCopy()->set_in_RegMask(def_rm);
              // Put the spill just before the copy
              insert_proj( b, insidx++, spill, maxlrg++ );
            }
          }
        }
      }
    }  // End For All Instructions in Block - Non-PHI Pass

    // Check if each LRG is live out of this block so as not to propagate
    // beyond the last use of a LRG.
    for( slidx = 0; slidx < spill_cnt; slidx++ ) {
      uint defidx = lidxs.at(slidx);
      IndexSet *liveout = _live->live(b);
      if( !liveout->member(defidx) ) {
#ifdef ASSERT
        // The index defidx is not live.  Check the liveout array to ensure that
        // it contains no members which compress to defidx.  Finding such an
        // instance may be a case to add liveout adjustment in compress_uf_map().
        // See 5063219.
        uint member;
        IndexSetIterator isi(liveout);
        while ((member = isi.next()) != 0) {
          assert(defidx != _lrg_map.find_const(member), "Live out member has not been compressed");
        }
#endif
        Reachblock[slidx] = NULL;
      } else {
        assert(Reachblock[slidx] != NULL,"No reaching definition for liveout value");
      }
    }
#ifndef PRODUCT
    if( trace_spilling() )
      b->dump();
#endif
  }  // End For All Blocks

  //----------PASS 2----------
  // Reset all DEF live range numbers here
  for( insidx = 0; insidx < defs->size(); insidx++ ) {
    // Grab the def
    n1 = defs->at(insidx);
    // Set new lidx for DEF
    new_lrg(n1, maxlrg++);
  }
  //----------Phi Node Splitting----------
  // Clean up a phi here, and assign a new live range number
  // Cycle through this block's predecessors, collecting Reaches
  // info for each spilled LRG and update edges.
  // Walk the phis list to patch inputs, split phis, and name phis
  uint lrgs_before_phi_split = maxlrg;
  for( insidx = 0; insidx < phis->size(); insidx++ ) {
    Node *phi = phis->at(insidx);
    assert(phi->is_Phi(),"This list must only contain Phi Nodes");
    Block *b = _cfg.get_block_for_node(phi);
    // Grab the live range number
    uint lidx = _lrg_map.find_id(phi);
    uint slidx = lrg2reach[lidx];
    // Update node to lidx map
    new_lrg(phi, maxlrg++);
    // Get PASS1's up/down decision for the block.
    int phi_up = !!UP_entry[slidx]->test(b->_pre_order);

    // Force down if double-spilling live range
    if( lrgs(lidx)._was_spilled1 )
      phi_up = false;

    // When splitting a Phi we an split it normal or "inverted".
    // An inverted split makes the splits target the Phi's UP/DOWN
    // sense inverted; then the Phi is followed by a final def-side
    // split to invert back.  It changes which blocks the spill code
    // goes in.

    // Walk the predecessor blocks and assign the reaching def to the Phi.
    // Split Phi nodes by placing USE side splits wherever the reaching
    // DEF has the wrong UP/DOWN value.
    for( uint i = 1; i < b->num_preds(); i++ ) {
      // Get predecessor block pre-order number
      Block *pred = _cfg.get_block_for_node(b->pred(i));
      pidx = pred->_pre_order;
      // Grab reaching def
      Node *def = Reaches[pidx][slidx];
      Node** Reachblock = Reaches[pidx];
      assert( def, "must have reaching def" );
      // If input up/down sense and reg-pressure DISagree
      if (def->rematerialize()) {
        // Place the rematerialized node above any MSCs created during
        // phi node splitting.  end_idx points at the insertion point
        // so look at the node before it.
        int insert = pred->end_idx();
        while (insert >= 1 &&
               pred->get_node(insert - 1)->is_SpillCopy() &&
               _lrg_map.find(pred->get_node(insert - 1)) >= lrgs_before_phi_split) {
          insert--;
        }
        def = split_Rematerialize(def, pred, insert, maxlrg, splits, slidx, lrg2reach, Reachblock, false);
        if (!def) {
          return 0;    // Bail out
        }
      }
      // Update the Phi's input edge array
      phi->set_req(i,def);
      // Grab the UP/DOWN sense for the input
      u1 = UP[pidx][slidx];
      if( u1 != (phi_up != 0)) {
        maxlrg = split_USE(def, b, phi, i, maxlrg, !u1, false, splits,slidx);
        // If it wasn't split bail
        if (!maxlrg) {
          return 0;
        }
      }
    }  // End for all inputs to the Phi
  }  // End for all Phi Nodes
  // Update _maxlrg to save Union asserts
  _lrg_map.set_max_lrg_id(maxlrg);


  //----------PASS 3----------
  // Pass over all Phi's to union the live ranges
  for( insidx = 0; insidx < phis->size(); insidx++ ) {
    Node *phi = phis->at(insidx);
    assert(phi->is_Phi(),"This list must only contain Phi Nodes");
    // Walk all inputs to Phi and Union input live range with Phi live range
    for( uint i = 1; i < phi->req(); i++ ) {
      // Grab the input node
      Node *n = phi->in(i);
      assert(n, "node should exist");
      uint lidx = _lrg_map.find(n);
      uint pidx = _lrg_map.find(phi);
      if (lidx < pidx) {
        Union(n, phi);
      }
      else if(lidx > pidx) {
        Union(phi, n);
      }
    }  // End for all inputs to the Phi Node
  }  // End for all Phi Nodes
  // Now union all two address instructions
  for (insidx = 0; insidx < defs->size(); insidx++) {
    // Grab the def
    n1 = defs->at(insidx);
    // Set new lidx for DEF & handle 2-addr instructions
    if (n1->is_Mach() && ((twoidx = n1->as_Mach()->two_adr()) != 0)) {
      assert(_lrg_map.find(n1->in(twoidx)) < maxlrg,"Assigning bad live range index");
      // Union the input and output live ranges
      uint lr1 = _lrg_map.find(n1);
      uint lr2 = _lrg_map.find(n1->in(twoidx));
      if (lr1 < lr2) {
        Union(n1, n1->in(twoidx));
      }
      else if (lr1 > lr2) {
        Union(n1->in(twoidx), n1);
      }
    }  // End if two address
  }  // End for all defs
  // DEBUG
#ifdef ASSERT
  // Validate all live range index assignments
  for (bidx = 0; bidx < _cfg.number_of_blocks(); bidx++) {
    b  = _cfg.get_block(bidx);
    for (insidx = 0; insidx <= b->end_idx(); insidx++) {
      Node *n = b->get_node(insidx);
      uint defidx = _lrg_map.find(n);
      assert(defidx < _lrg_map.max_lrg_id(), "Bad live range index in Split");
      assert(defidx < maxlrg,"Bad live range index in Split");
    }
  }
  // Issue a warning if splitting made no progress
  int noprogress = 0;
  for (slidx = 0; slidx < spill_cnt; slidx++) {
    if (PrintOpto && WizardMode && splits.at(slidx) == 0) {
      tty->print_cr("Failed to split live range %d", lidxs.at(slidx));
      //BREAKPOINT;
    }
    else {
      noprogress++;
    }
  }
  if(!noprogress) {
    tty->print_cr("Failed to make progress in Split");
    //BREAKPOINT;
  }
#endif
  // Return updated count of live ranges
  return maxlrg;
}
C:\hotspot-69087d08d473\src\share\vm/opto/replacednodes.cpp
/*
 * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 *
 */

#include "precompiled.hpp"
#include "opto/cfgnode.hpp"
#include "opto/phaseX.hpp"
#include "opto/replacednodes.hpp"

void ReplacedNodes::allocate_if_necessary() {
  if (_replaced_nodes == NULL) {
    _replaced_nodes = new GrowableArray<ReplacedNode>();
  }
}

bool ReplacedNodes::is_empty() const {
  return _replaced_nodes == NULL || _replaced_nodes->length() == 0;
}

bool ReplacedNodes::has_node(const ReplacedNode& r) const {
  return _replaced_nodes->find(r) != -1;
}

bool ReplacedNodes::has_target_node(Node* n) const {
  for (int i = 0; i < _replaced_nodes->length(); i++) {
    if (_replaced_nodes->at(i).improved() == n) {
      return true;
    }
  }
  return false;
}

// Record replaced node if not seen before
void ReplacedNodes::record(Node* initial, Node* improved) {
  allocate_if_necessary();
  ReplacedNode r(initial, improved);
  if (!has_node(r)) {
    _replaced_nodes->push(r);
  }
}

// Copy replaced nodes from one map to another. idx is used to
// identify nodes that are too new to be of interest in the target
// node list.
void ReplacedNodes::transfer_from(const ReplacedNodes& other, uint idx) {
  if (other.is_empty()) {
    return;
  }
  allocate_if_necessary();
  for (int i = 0; i < other._replaced_nodes->length(); i++) {
    ReplacedNode replaced = other._replaced_nodes->at(i);
    // Only transfer the nodes that can actually be useful
    if (!has_node(replaced) && (replaced.initial()->_idx < idx || has_target_node(replaced.initial()))) {
      _replaced_nodes->push(replaced);
    }
  }
}

void ReplacedNodes::clone() {
  if (_replaced_nodes != NULL) {
    GrowableArray<ReplacedNode>* replaced_nodes_clone = new GrowableArray<ReplacedNode>();
    replaced_nodes_clone->appendAll(_replaced_nodes);
    _replaced_nodes = replaced_nodes_clone;
  }
}

void ReplacedNodes::reset() {
  if (_replaced_nodes != NULL) {
    _replaced_nodes->clear();
  }
}

// Perfom node replacement (used when returning to caller)
void ReplacedNodes::apply(Node* n, uint idx) {
  if (is_empty()) {
    return;
  }
  for (int i = 0; i < _replaced_nodes->length(); i++) {
    ReplacedNode replaced = _replaced_nodes->at(i);
    // Only apply if improved node was created in a callee to avoid
    // issues with irreducible loops in the caller
    if (replaced.improved()->_idx >= idx) {
      n->replace_edge(replaced.initial(), replaced.improved());
    }
  }
}

static void enqueue_use(Node* n, Node* use, Unique_Node_List& work) {
  if (use->is_Phi()) {
    Node* r = use->in(0);
    assert(r->is_Region(), "Phi should have Region");
    for (uint i = 1; i < use->req(); i++) {
      if (use->in(i) == n) {
        work.push(r->in(i));
      }
    }
  } else {
    work.push(use);
  }
}

// Perfom node replacement following late inlining
void ReplacedNodes::apply(Compile* C, Node* ctl) {
  // ctl is the control on exit of the method that was late inlined
  if (is_empty()) {
    return;
  }
  for (int i = 0; i < _replaced_nodes->length(); i++) {
    ReplacedNode replaced = _replaced_nodes->at(i);
    Node* initial = replaced.initial();
    Node* improved = replaced.improved();
    assert (ctl != NULL && !ctl->is_top(), "replaced node should have actual control");

    ResourceMark rm;
    Unique_Node_List work;
    // Go over all the uses of the node that is considered for replacement...
    for (DUIterator j = initial->outs(); initial->has_out(j); j++) {
      Node* use = initial->out(j);

      if (use == improved || use->outcnt() == 0) {
        continue;
      }
      work.clear();
      enqueue_use(initial, use, work);
      bool replace = true;
      // Check that this use is dominated by ctl. Go ahead with the
      // replacement if it is.
      while (work.size() != 0 && replace) {
        Node* n = work.pop();
        if (use->outcnt() == 0) {
          continue;
        }
        if (n->is_CFG() || (n->in(0) != NULL && !n->in(0)->is_top())) {
          int depth = 0;
          Node *m = n;
          if (!n->is_CFG()) {
            n = n->in(0);
          }
          assert(n->is_CFG(), "should be CFG now");
          while(n != ctl) {
            n = IfNode::up_one_dom(n);
            depth++;
            // limit search depth
            if (depth >= 100 || n == NULL) {
              replace = false;
              break;
            }
          }
        } else {
          for (DUIterator k = n->outs(); n->has_out(k); k++) {
            enqueue_use(n, n->out(k), work);
          }
        }
      }
      if (replace) {
        bool is_in_table = C->initial_gvn()->hash_delete(use);
        int replaced = use->replace_edge(initial, improved);
        if (is_in_table) {
          C->initial_gvn()->hash_find_insert(use);
        }
        C->record_for_igvn(use);

        assert(replaced > 0, "inconsistent");
        --j;
      }
    }
  }
}

void ReplacedNodes::dump(outputStream *st) const {
  if (!is_empty()) {
    st->print("replaced nodes: ");
    for (int i = 0; i < _replaced_nodes->length(); i++) {
      st->print("%d->%d", _replaced_nodes->at(i).initial()->_idx, _replaced_nodes->at(i).improved()->_idx);
      if (i < _replaced_nodes->length()-1) {
        st->print(",");
      }
    }
  }
}

// Merge 2 list of replaced node at a point where control flow paths merge
void ReplacedNodes::merge_with(const ReplacedNodes& other) {
  if (is_empty()) {
    return;
  }
  if (other.is_empty()) {
    reset();
    return;
  }
  int shift = 0;
  int len = _replaced_nodes->length();
  for (int i = 0; i < len; i++) {
    if (!other.has_node(_replaced_nodes->at(i))) {
      shift++;
    } else if (shift > 0) {
      _replaced_nodes->at_put(i-shift, _replaced_nodes->at(i));
    }
  }
  if (shift > 0) {
    _replaced_nodes->trunc_to(len - shift);
  }
}
C:\hotspot-69087d08d473\src\share\vm/opto/replacednodes.hpp
/*
 * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 *
 */

#ifndef SHARE_VM_OPTO_REPLACEDNODES_HPP
#define SHARE_VM_OPTO_REPLACEDNODES_HPP

#include "opto/connode.hpp"

// During parsing, when a node is "improved",
// GraphKit::replace_in_map() is called to update the current map so
// that the improved node is used from that point
// on. GraphKit::replace_in_map() doesn't operate on the callers maps
// and so some optimization opportunities may be lost. The
// ReplacedNodes class addresses that problem.
//
// A ReplacedNodes object is a list of pair of nodes. Every
// SafePointNode carries a ReplacedNodes object. Every time
// GraphKit::replace_in_map() is called, a new pair of nodes is pushed
// on the list of replaced nodes. When control flow paths merge, their
// replaced nodes are also merged. When parsing exits a method to
// return to a caller, the replaced nodes on the exit path are used to
// update the caller's map.
class ReplacedNodes VALUE_OBJ_CLASS_SPEC {
 private:
  class ReplacedNode VALUE_OBJ_CLASS_SPEC {
  private:
    Node* _initial;
    Node* _improved;
  public:
    ReplacedNode() : _initial(NULL), _improved(NULL) {}
    ReplacedNode(Node* initial, Node* improved) : _initial(initial), _improved(improved) {}
    Node* initial() const  { return _initial; }
    Node* improved() const { return _improved; }

    bool operator==(const ReplacedNode& other) {
      return _initial == other._initial && _improved == other._improved;
    }
  };
  GrowableArray<ReplacedNode>* _replaced_nodes;

  void allocate_if_necessary();
  bool has_node(const ReplacedNode& r) const;
  bool has_target_node(Node* n) const;

 public:
  ReplacedNodes()
    : _replaced_nodes(NULL) {}

  void clone();
  void record(Node* initial, Node* improved);
  void transfer_from(const ReplacedNodes& other, uint idx);
  void reset();
  void apply(Node* n, uint idx);
  void merge_with(const ReplacedNodes& other);
  bool is_empty() const;
  void dump(outputStream *st) const;
  void apply(Compile* C, Node* ctl);
};

#endif // SHARE_VM_OPTO_REPLACEDNODES_HPP
C:\hotspot-69087d08d473\src\share\vm/opto/rootnode.cpp
/*
 * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 *
 */

#include "precompiled.hpp"
#include "memory/allocation.inline.hpp"
#include "opto/callnode.hpp"
#include "opto/cfgnode.hpp"
#include "opto/phaseX.hpp"
#include "opto/regmask.hpp"
#include "opto/rootnode.hpp"
#include "opto/subnode.hpp"
#include "opto/type.hpp"

//------------------------------Ideal------------------------------------------
// Remove dead inputs
Node *RootNode::Ideal(PhaseGVN *phase, bool can_reshape) {
  for( uint i = 1; i < req(); i++ ) { // For all inputs
    // Check for and remove dead inputs
    if( phase->type(in(i)) == Type::TOP ) {
      del_req(i--);             // Delete TOP inputs
    }
  }

  // I used to do tail-splitting in the Ideal graph here, but it does not
  // work.  The tail-splitting forces values live into the Return to be
  // ready at a point which dominates the split returns.  This forces Stores
  // to be hoisted high.  The "proper" fix would be to split Stores down
  // each path, but this makes the split unprofitable.  If we want to do this
  // optimization, it needs to be done after allocation so we can count all
  // the instructions needing to be cloned in the cost metric.

  // There used to be a spoof here for caffeine marks which completely
  // eliminated very simple self-recursion recursions, but it's not worth it.
  // Deep inlining of self-calls gets nearly all of the same benefits.
  // If we want to get the rest of the win later, we should pattern match
  // simple recursive call trees to closed-form solutions.

  return NULL;                  // No further opportunities exposed
}

//=============================================================================
HaltNode::HaltNode( Node *ctrl, Node *frameptr ) : Node(TypeFunc::Parms) {
  Node* top = Compile::current()->top();
  init_req(TypeFunc::Control,  ctrl        );
  init_req(TypeFunc::I_O,      top);
  init_req(TypeFunc::Memory,   top);
  init_req(TypeFunc::FramePtr, frameptr    );
  init_req(TypeFunc::ReturnAdr,top);
}

const Type *HaltNode::bottom_type() const { return Type::BOTTOM; }

//------------------------------Ideal------------------------------------------
Node *HaltNode::Ideal(PhaseGVN *phase, bool can_reshape) {
  return remove_dead_region(phase, can_reshape) ? this : NULL;
}

//------------------------------Value------------------------------------------
const Type *HaltNode::Value( PhaseTransform *phase ) const {
  return ( phase->type(in(TypeFunc::Control)) == Type::TOP)
    ? Type::TOP
    : Type::BOTTOM;
}

const RegMask &HaltNode::out_RegMask() const {
  return RegMask::Empty;
}
C:\hotspot-69087d08d473\src\share\vm/opto/rootnode.hpp
/*
 * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 *
 */

#ifndef SHARE_VM_OPTO_ROOTNODE_HPP
#define SHARE_VM_OPTO_ROOTNODE_HPP

#include "opto/loopnode.hpp"

//------------------------------RootNode---------------------------------------
// The one-and-only before-all-else and after-all-else RootNode.  The RootNode
// represents what happens if the user runs the whole program repeatedly.  The
// RootNode produces the initial values of I/O and memory for the program or
// procedure start.
class RootNode : public LoopNode {
public:
  RootNode( ) : LoopNode(0,0) {
    init_class_id(Class_Root);
    del_req(2);
    del_req(1);
  }
  virtual int   Opcode() const;
  virtual const Node *is_block_proj() const { return this; }
  virtual const Type *bottom_type() const { return Type::BOTTOM; }
  virtual Node *Identity( PhaseTransform *phase ) { return this; }
  virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
  virtual const Type *Value( PhaseTransform *phase ) const { return Type::BOTTOM; }
};

//------------------------------HaltNode---------------------------------------
// Throw an exception & die
class HaltNode : public Node {
public:
  HaltNode( Node *ctrl, Node *frameptr );
  virtual int Opcode() const;
  virtual bool  pinned() const { return true; };
  virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
  virtual const Type *Value( PhaseTransform *phase ) const;
  virtual const Type *bottom_type() const;
  virtual bool  is_CFG() const { return true; }
  virtual uint hash() const { return NO_HASH; }  // CFG nodes do not hash
  virtual bool depends_only_on_test() const { return false; }
  virtual const Node *is_block_proj() const { return this; }
  virtual const RegMask &out_RegMask() const;
  virtual uint ideal_reg() const { return NotAMachineReg; }
  virtual uint match_edge(uint idx) const { return 0; }
};

#endif // SHARE_VM_OPTO_ROOTNODE_HPP
C:\hotspot-69087d08d473\src\share\vm/opto/runtime.cpp
/*
 * Copyright (c) 1998, 2019, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 *
 */

#include "precompiled.hpp"
#include "classfile/systemDictionary.hpp"
#include "classfile/vmSymbols.hpp"
#include "code/compiledIC.hpp"
#include "code/icBuffer.hpp"
#include "code/nmethod.hpp"
#include "code/pcDesc.hpp"
#include "code/scopeDesc.hpp"
#include "code/vtableStubs.hpp"
#include "compiler/compileBroker.hpp"
#include "compiler/compilerOracle.hpp"
#include "compiler/oopMap.hpp"
#include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp"
#include "gc_implementation/g1/heapRegion.hpp"
#include "gc_interface/collectedHeap.hpp"
#include "interpreter/bytecode.hpp"
#include "interpreter/interpreter.hpp"
#include "interpreter/linkResolver.hpp"
#include "memory/barrierSet.hpp"
#include "memory/gcLocker.inline.hpp"
#include "memory/oopFactory.hpp"
#include "oops/objArrayKlass.hpp"
#include "oops/oop.inline.hpp"
#include "opto/addnode.hpp"
#include "opto/callnode.hpp"
#include "opto/cfgnode.hpp"
#include "opto/connode.hpp"
#include "opto/graphKit.hpp"
#include "opto/machnode.hpp"
#include "opto/matcher.hpp"
#include "opto/memnode.hpp"
#include "opto/mulnode.hpp"
#include "opto/runtime.hpp"
#include "opto/subnode.hpp"
#include "runtime/fprofiler.hpp"
#include "runtime/handles.inline.hpp"
#include "runtime/interfaceSupport.hpp"
#include "runtime/javaCalls.hpp"
#include "runtime/sharedRuntime.hpp"
#include "runtime/signature.hpp"
#include "runtime/threadCritical.hpp"
#include "runtime/vframe.hpp"
#include "runtime/vframeArray.hpp"
#include "runtime/vframe_hp.hpp"
#include "utilities/copy.hpp"
#include "utilities/preserveException.hpp"
#if defined AD_MD_HPP
# include AD_MD_HPP
#elif defined TARGET_ARCH_MODEL_x86_32
# include "adfiles/ad_x86_32.hpp"
#elif defined TARGET_ARCH_MODEL_x86_64
# include "adfiles/ad_x86_64.hpp"
#elif defined TARGET_ARCH_MODEL_aarch64
# include "adfiles/ad_aarch64.hpp"
#elif defined TARGET_ARCH_MODEL_sparc
# include "adfiles/ad_sparc.hpp"
#elif defined TARGET_ARCH_MODEL_zero
# include "adfiles/ad_zero.hpp"
#elif defined TARGET_ARCH_MODEL_ppc_64
# include "adfiles/ad_ppc_64.hpp"
#endif


// For debugging purposes:
//  To force FullGCALot inside a runtime function, add the following two lines
//
//  Universe::release_fullgc_alot_dummy();
//  MarkSweep::invoke(0, "Debugging");
//
// At command line specify the parameters: -XX:+FullGCALot -XX:FullGCALotStart=100000000


// GHASH block processing
const TypeFunc* OptoRuntime::ghash_processBlocks_Type() {
    int argcnt = 4;

    const Type** fields = TypeTuple::fields(argcnt);
    int argp = TypeFunc::Parms;
    fields[argp++] = TypePtr::NOTNULL;    // state
    fields[argp++] = TypePtr::NOTNULL;    // subkeyH
    fields[argp++] = TypePtr::NOTNULL;    // data
    fields[argp++] = TypeInt::INT;        // blocks
    assert(argp == TypeFunc::Parms+argcnt, "correct decoding");
    const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms+argcnt, fields);

    // result type needed
    fields = TypeTuple::fields(1);
    fields[TypeFunc::Parms+0] = NULL; // void
    const TypeTuple* range = TypeTuple::make(TypeFunc::Parms, fields);
    return TypeFunc::make(domain, range);
}

// Compiled code entry points
address OptoRuntime::_new_instance_Java                           = NULL;
address OptoRuntime::_new_array_Java                              = NULL;
address OptoRuntime::_new_array_nozero_Java                       = NULL;
address OptoRuntime::_multianewarray2_Java                        = NULL;
address OptoRuntime::_multianewarray3_Java                        = NULL;
address OptoRuntime::_multianewarray4_Java                        = NULL;
address OptoRuntime::_multianewarray5_Java                        = NULL;
address OptoRuntime::_multianewarrayN_Java                        = NULL;
address OptoRuntime::_g1_wb_pre_Java                              = NULL;
address OptoRuntime::_g1_wb_post_Java                             = NULL;
address OptoRuntime::_vtable_must_compile_Java                    = NULL;
address OptoRuntime::_complete_monitor_locking_Java               = NULL;
address OptoRuntime::_rethrow_Java                                = NULL;

address OptoRuntime::_slow_arraycopy_Java                         = NULL;
address OptoRuntime::_register_finalizer_Java                     = NULL;

# ifdef ENABLE_ZAP_DEAD_LOCALS
address OptoRuntime::_zap_dead_Java_locals_Java                   = NULL;
address OptoRuntime::_zap_dead_native_locals_Java                 = NULL;
# endif

ExceptionBlob* OptoRuntime::_exception_blob;

// This should be called in an assertion at the start of OptoRuntime routines
// which are entered from compiled code (all of them)
#ifdef ASSERT
static bool check_compiled_frame(JavaThread* thread) {
  assert(thread->last_frame().is_runtime_frame(), "cannot call runtime directly from compiled code");
  RegisterMap map(thread, false);
  frame caller = thread->last_frame().sender(&map);
  assert(caller.is_compiled_frame(), "not being called from compiled like code");
  return true;
}
#endif // ASSERT


#define gen(env, var, type_func_gen, c_func, fancy_jump, pass_tls, save_arg_regs, return_pc) \
  var = generate_stub(env, type_func_gen, CAST_FROM_FN_PTR(address, c_func), #var, fancy_jump, pass_tls, save_arg_regs, return_pc); \
  if (var == NULL) { return false; }

bool OptoRuntime::generate(ciEnv* env) {

  generate_exception_blob();

  // Note: tls: Means fetching the return oop out of the thread-local storage
  //
  //   variable/name                       type-function-gen              , runtime method                  ,fncy_jp, tls,save_args,retpc
  // -------------------------------------------------------------------------------------------------------------------------------
  gen(env, _new_instance_Java              , new_instance_Type            , new_instance_C                  ,    0 , true , false, false);
  gen(env, _new_array_Java                 , new_array_Type               , new_array_C                     ,    0 , true , false, false);
  gen(env, _new_array_nozero_Java          , new_array_Type               , new_array_nozero_C              ,    0 , true , false, false);
  gen(env, _multianewarray2_Java           , multianewarray2_Type         , multianewarray2_C               ,    0 , true , false, false);
  gen(env, _multianewarray3_Java           , multianewarray3_Type         , multianewarray3_C               ,    0 , true , false, false);
  gen(env, _multianewarray4_Java           , multianewarray4_Type         , multianewarray4_C               ,    0 , true , false, false);
  gen(env, _multianewarray5_Java           , multianewarray5_Type         , multianewarray5_C               ,    0 , true , false, false);
  gen(env, _multianewarrayN_Java           , multianewarrayN_Type         , multianewarrayN_C               ,    0 , true , false, false);
  gen(env, _g1_wb_pre_Java                 , g1_wb_pre_Type               , SharedRuntime::g1_wb_pre        ,    0 , false, false, false);
  gen(env, _g1_wb_post_Java                , g1_wb_post_Type              , SharedRuntime::g1_wb_post       ,    0 , false, false, false);
  gen(env, _complete_monitor_locking_Java  , complete_monitor_enter_Type  , SharedRuntime::complete_monitor_locking_C, 0, false, false, false);
  gen(env, _rethrow_Java                   , rethrow_Type                 , rethrow_C                       ,    2 , true , false, true );

  gen(env, _slow_arraycopy_Java            , slow_arraycopy_Type          , SharedRuntime::slow_arraycopy_C ,    0 , false, false, false);
  gen(env, _register_finalizer_Java        , register_finalizer_Type      , register_finalizer              ,    0 , false, false, false);

# ifdef ENABLE_ZAP_DEAD_LOCALS
  gen(env, _zap_dead_Java_locals_Java      , zap_dead_locals_Type         , zap_dead_Java_locals_C          ,    0 , false, true , false );
  gen(env, _zap_dead_native_locals_Java    , zap_dead_locals_Type         , zap_dead_native_locals_C        ,    0 , false, true , false );
# endif
  return true;
}

#undef gen


// Helper method to do generation of RunTimeStub's
address OptoRuntime::generate_stub( ciEnv* env,
                                    TypeFunc_generator gen, address C_function,
                                    const char *name, int is_fancy_jump,
                                    bool pass_tls,
                                    bool save_argument_registers,
                                    bool return_pc ) {
  ResourceMark rm;
  Compile C( env, gen, C_function, name, is_fancy_jump, pass_tls, save_argument_registers, return_pc );
  return  C.stub_entry_point();
}

const char* OptoRuntime::stub_name(address entry) {
#ifndef PRODUCT
  CodeBlob* cb = CodeCache::find_blob(entry);
  RuntimeStub* rs =(RuntimeStub *)cb;
  assert(rs != NULL && rs->is_runtime_stub(), "not a runtime stub");
  return rs->name();
#else
  // Fast implementation for product mode (maybe it should be inlined too)
  return "runtime stub";
#endif
}


//=============================================================================
// Opto compiler runtime routines
//=============================================================================


//=============================allocation======================================
// We failed the fast-path allocation.  Now we need to do a scavenge or GC
// and try allocation again.

void OptoRuntime::new_store_pre_barrier(JavaThread* thread) {
  // After any safepoint, just before going back to compiled code,
  // we inform the GC that we will be doing initializing writes to
  // this object in the future without emitting card-marks, so
  // GC may take any compensating steps.
  // NOTE: Keep this code consistent with GraphKit::store_barrier.

  oop new_obj = thread->vm_result();
  if (new_obj == NULL)  return;

  assert(Universe::heap()->can_elide_tlab_store_barriers(),
         "compiler must check this first");
  // GC may decide to give back a safer copy of new_obj.
  new_obj = Universe::heap()->new_store_pre_barrier(thread, new_obj);
  thread->set_vm_result(new_obj);
}

// object allocation
JRT_BLOCK_ENTRY(void, OptoRuntime::new_instance_C(Klass* klass, JavaThread* thread))
  JRT_BLOCK;
#ifndef PRODUCT
  SharedRuntime::_new_instance_ctr++;         // new instance requires GC
#endif
  assert(check_compiled_frame(thread), "incorrect caller");

  // These checks are cheap to make and support reflective allocation.
  int lh = klass->layout_helper();
  if (Klass::layout_helper_needs_slow_path(lh) || !InstanceKlass::cast(klass)->is_initialized()) {
    Handle holder(THREAD, klass->klass_holder()); // keep the klass alive
    klass->check_valid_for_instantiation(false, THREAD);
    if (!HAS_PENDING_EXCEPTION) {
      InstanceKlass::cast(klass)->initialize(THREAD);
    }
  }

  if (!HAS_PENDING_EXCEPTION) {
    // Scavenge and allocate an instance.
    Handle holder(THREAD, klass->klass_holder()); // keep the klass alive
    oop result = InstanceKlass::cast(klass)->allocate_instance(THREAD);
    thread->set_vm_result(result);

    // Pass oops back through thread local storage.  Our apparent type to Java
    // is that we return an oop, but we can block on exit from this routine and
    // a GC can trash the oop in C's return register.  The generated stub will
    // fetch the oop from TLS after any possible GC.
  }

  deoptimize_caller_frame(thread, HAS_PENDING_EXCEPTION);
  JRT_BLOCK_END;

  if (GraphKit::use_ReduceInitialCardMarks()) {
    // inform GC that we won't do card marks for initializing writes.
    new_store_pre_barrier(thread);
  }
JRT_END


// array allocation
JRT_BLOCK_ENTRY(void, OptoRuntime::new_array_C(Klass* array_type, int len, JavaThread *thread))
  JRT_BLOCK;
#ifndef PRODUCT
  SharedRuntime::_new_array_ctr++;            // new array requires GC
#endif
  assert(check_compiled_frame(thread), "incorrect caller");

  // Scavenge and allocate an instance.
  oop result;

  if (array_type->oop_is_typeArray()) {
    // The oopFactory likes to work with the element type.
    // (We could bypass the oopFactory, since it doesn't add much value.)
    BasicType elem_type = TypeArrayKlass::cast(array_type)->element_type();
    result = oopFactory::new_typeArray(elem_type, len, THREAD);
  } else {
    // Although the oopFactory likes to work with the elem_type,
    // the compiler prefers the array_type, since it must already have
    // that latter value in hand for the fast path.
    Handle holder(THREAD, array_type->klass_holder()); // keep the array klass alive
    Klass* elem_type = ObjArrayKlass::cast(array_type)->element_klass();
    result = oopFactory::new_objArray(elem_type, len, THREAD);
  }

  // Pass oops back through thread local storage.  Our apparent type to Java
  // is that we return an oop, but we can block on exit from this routine and
  // a GC can trash the oop in C's return register.  The generated stub will
  // fetch the oop from TLS after any possible GC.
  deoptimize_caller_frame(thread, HAS_PENDING_EXCEPTION);
  thread->set_vm_result(result);
  JRT_BLOCK_END;

  if (GraphKit::use_ReduceInitialCardMarks()) {
    // inform GC that we won't do card marks for initializing writes.
    new_store_pre_barrier(thread);
  }
JRT_END

// array allocation without zeroing
JRT_BLOCK_ENTRY(void, OptoRuntime::new_array_nozero_C(Klass* array_type, int len, JavaThread *thread))
  JRT_BLOCK;
#ifndef PRODUCT
  SharedRuntime::_new_array_ctr++;            // new array requires GC
#endif
  assert(check_compiled_frame(thread), "incorrect caller");

  // Scavenge and allocate an instance.
  oop result;

  assert(array_type->oop_is_typeArray(), "should be called only for type array");
  // The oopFactory likes to work with the element type.
  BasicType elem_type = TypeArrayKlass::cast(array_type)->element_type();
  result = oopFactory::new_typeArray_nozero(elem_type, len, THREAD);

  // Pass oops back through thread local storage.  Our apparent type to Java
  // is that we return an oop, but we can block on exit from this routine and
  // a GC can trash the oop in C's return register.  The generated stub will
  // fetch the oop from TLS after any possible GC.
  deoptimize_caller_frame(thread, HAS_PENDING_EXCEPTION);
  thread->set_vm_result(result);
  JRT_BLOCK_END;

  if (GraphKit::use_ReduceInitialCardMarks()) {
    // inform GC that we won't do card marks for initializing writes.
    new_store_pre_barrier(thread);
  }

  oop result = thread->vm_result();
  if ((len > 0) && (result != NULL) &&
      is_deoptimized_caller_frame(thread)) {
    // Zero array here if the caller is deoptimized.
    int size = ((typeArrayOop)result)->object_size();
    BasicType elem_type = TypeArrayKlass::cast(array_type)->element_type();
    const size_t hs = arrayOopDesc::header_size(elem_type);
    // Align to next 8 bytes to avoid trashing arrays's length.
    const size_t aligned_hs = align_object_offset(hs);
    HeapWord* obj = (HeapWord*)result;
    if (aligned_hs > hs) {
      Copy::zero_to_words(obj+hs, aligned_hs-hs);
    }
    // Optimized zeroing.
    Copy::fill_to_aligned_words(obj+aligned_hs, size-aligned_hs);
  }

JRT_END

// Note: multianewarray for one dimension is handled inline by GraphKit::new_array.

// multianewarray for 2 dimensions
JRT_ENTRY(void, OptoRuntime::multianewarray2_C(Klass* elem_type, int len1, int len2, JavaThread *thread))
#ifndef PRODUCT
  SharedRuntime::_multi2_ctr++;                // multianewarray for 1 dimension
#endif
  assert(check_compiled_frame(thread), "incorrect caller");
  assert(elem_type->is_klass(), "not a class");
  jint dims[2];
  dims[0] = len1;
  dims[1] = len2;
  Handle holder(THREAD, elem_type->klass_holder()); // keep the klass alive
  oop obj = ArrayKlass::cast(elem_type)->multi_allocate(2, dims, THREAD);
  deoptimize_caller_frame(thread, HAS_PENDING_EXCEPTION);
  thread->set_vm_result(obj);
JRT_END

// multianewarray for 3 dimensions
JRT_ENTRY(void, OptoRuntime::multianewarray3_C(Klass* elem_type, int len1, int len2, int len3, JavaThread *thread))
#ifndef PRODUCT
  SharedRuntime::_multi3_ctr++;                // multianewarray for 1 dimension
#endif
  assert(check_compiled_frame(thread), "incorrect caller");
  assert(elem_type->is_klass(), "not a class");
  jint dims[3];
  dims[0] = len1;
  dims[1] = len2;
  dims[2] = len3;
  Handle holder(THREAD, elem_type->klass_holder()); // keep the klass alive
  oop obj = ArrayKlass::cast(elem_type)->multi_allocate(3, dims, THREAD);
  deoptimize_caller_frame(thread, HAS_PENDING_EXCEPTION);
  thread->set_vm_result(obj);
JRT_END

// multianewarray for 4 dimensions
JRT_ENTRY(void, OptoRuntime::multianewarray4_C(Klass* elem_type, int len1, int len2, int len3, int len4, JavaThread *thread))
#ifndef PRODUCT
  SharedRuntime::_multi4_ctr++;                // multianewarray for 1 dimension
#endif
  assert(check_compiled_frame(thread), "incorrect caller");
  assert(elem_type->is_klass(), "not a class");
  jint dims[4];
  dims[0] = len1;
  dims[1] = len2;
  dims[2] = len3;
  dims[3] = len4;
  Handle holder(THREAD, elem_type->klass_holder()); // keep the klass alive
  oop obj = ArrayKlass::cast(elem_type)->multi_allocate(4, dims, THREAD);
  deoptimize_caller_frame(thread, HAS_PENDING_EXCEPTION);
  thread->set_vm_result(obj);
JRT_END

// multianewarray for 5 dimensions
JRT_ENTRY(void, OptoRuntime::multianewarray5_C(Klass* elem_type, int len1, int len2, int len3, int len4, int len5, JavaThread *thread))
#ifndef PRODUCT
  SharedRuntime::_multi5_ctr++;                // multianewarray for 1 dimension
#endif
  assert(check_compiled_frame(thread), "incorrect caller");
  assert(elem_type->is_klass(), "not a class");
  jint dims[5];
  dims[0] = len1;
  dims[1] = len2;
  dims[2] = len3;
  dims[3] = len4;
  dims[4] = len5;
  Handle holder(THREAD, elem_type->klass_holder()); // keep the klass alive
  oop obj = ArrayKlass::cast(elem_type)->multi_allocate(5, dims, THREAD);
  deoptimize_caller_frame(thread, HAS_PENDING_EXCEPTION);
  thread->set_vm_result(obj);
JRT_END

JRT_ENTRY(void, OptoRuntime::multianewarrayN_C(Klass* elem_type, arrayOopDesc* dims, JavaThread *thread))
  assert(check_compiled_frame(thread), "incorrect caller");
  assert(elem_type->is_klass(), "not a class");
  assert(oop(dims)->is_typeArray(), "not an array");

  ResourceMark rm;
  jint len = dims->length();
  assert(len > 0, "Dimensions array should contain data");
  jint *j_dims = typeArrayOop(dims)->int_at_addr(0);
  jint *c_dims = NEW_RESOURCE_ARRAY(jint, len);
  Copy::conjoint_jints_atomic(j_dims, c_dims, len);

  Handle holder(THREAD, elem_type->klass_holder()); // keep the klass alive
  oop obj = ArrayKlass::cast(elem_type)->multi_allocate(len, c_dims, THREAD);
  deoptimize_caller_frame(thread, HAS_PENDING_EXCEPTION);
  thread->set_vm_result(obj);
JRT_END


const TypeFunc *OptoRuntime::new_instance_Type() {
  // create input type (domain)
  const Type **fields = TypeTuple::fields(1);
  fields[TypeFunc::Parms+0] = TypeInstPtr::NOTNULL; // Klass to be allocated
  const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+1, fields);

  // create result type (range)
  fields = TypeTuple::fields(1);
  fields[TypeFunc::Parms+0] = TypeRawPtr::NOTNULL; // Returned oop

  const TypeTuple *range = TypeTuple::make(TypeFunc::Parms+1, fields);

  return TypeFunc::make(domain, range);
}


const TypeFunc *OptoRuntime::athrow_Type() {
  // create input type (domain)
  const Type **fields = TypeTuple::fields(1);
  fields[TypeFunc::Parms+0] = TypeInstPtr::NOTNULL; // Klass to be allocated
  const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+1, fields);

  // create result type (range)
  fields = TypeTuple::fields(0);

  const TypeTuple *range = TypeTuple::make(TypeFunc::Parms+0, fields);

  return TypeFunc::make(domain, range);
}


const TypeFunc *OptoRuntime::new_array_Type() {
  // create input type (domain)
  const Type **fields = TypeTuple::fields(2);
  fields[TypeFunc::Parms+0] = TypeInstPtr::NOTNULL;   // element klass
  fields[TypeFunc::Parms+1] = TypeInt::INT;       // array size
  const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+2, fields);

  // create result type (range)
  fields = TypeTuple::fields(1);
  fields[TypeFunc::Parms+0] = TypeRawPtr::NOTNULL; // Returned oop

  const TypeTuple *range = TypeTuple::make(TypeFunc::Parms+1, fields);

  return TypeFunc::make(domain, range);
}

const TypeFunc *OptoRuntime::multianewarray_Type(int ndim) {
  // create input type (domain)
  const int nargs = ndim + 1;
  const Type **fields = TypeTuple::fields(nargs);
  fields[TypeFunc::Parms+0] = TypeInstPtr::NOTNULL;   // element klass
  for( int i = 1; i < nargs; i++ )
    fields[TypeFunc::Parms + i] = TypeInt::INT;       // array size
  const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+nargs, fields);

  // create result type (range)
  fields = TypeTuple::fields(1);
  fields[TypeFunc::Parms+0] = TypeRawPtr::NOTNULL; // Returned oop
  const TypeTuple *range = TypeTuple::make(TypeFunc::Parms+1, fields);

  return TypeFunc::make(domain, range);
}

const TypeFunc *OptoRuntime::multianewarray2_Type() {
  return multianewarray_Type(2);
}

const TypeFunc *OptoRuntime::multianewarray3_Type() {
  return multianewarray_Type(3);
}

const TypeFunc *OptoRuntime::multianewarray4_Type() {
  return multianewarray_Type(4);
}

const TypeFunc *OptoRuntime::multianewarray5_Type() {
  return multianewarray_Type(5);
}

const TypeFunc *OptoRuntime::multianewarrayN_Type() {
  // create input type (domain)
  const Type **fields = TypeTuple::fields(2);
  fields[TypeFunc::Parms+0] = TypeInstPtr::NOTNULL;   // element klass
  fields[TypeFunc::Parms+1] = TypeInstPtr::NOTNULL;   // array of dim sizes
  const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+2, fields);

  // create result type (range)
  fields = TypeTuple::fields(1);
  fields[TypeFunc::Parms+0] = TypeRawPtr::NOTNULL; // Returned oop
  const TypeTuple *range = TypeTuple::make(TypeFunc::Parms+1, fields);

  return TypeFunc::make(domain, range);
}

const TypeFunc *OptoRuntime::g1_wb_pre_Type() {
  const Type **fields = TypeTuple::fields(2);
  fields[TypeFunc::Parms+0] = TypeInstPtr::NOTNULL; // original field value
  fields[TypeFunc::Parms+1] = TypeRawPtr::NOTNULL; // thread
  const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+2, fields);

  // create result type (range)
  fields = TypeTuple::fields(0);
  const TypeTuple *range = TypeTuple::make(TypeFunc::Parms+0, fields);

  return TypeFunc::make(domain, range);
}

const TypeFunc *OptoRuntime::g1_wb_post_Type() {

  const Type **fields = TypeTuple::fields(2);
  fields[TypeFunc::Parms+0] = TypeRawPtr::NOTNULL;  // Card addr
  fields[TypeFunc::Parms+1] = TypeRawPtr::NOTNULL;  // thread
  const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+2, fields);

  // create result type (range)
  fields = TypeTuple::fields(0);
  const TypeTuple *range = TypeTuple::make(TypeFunc::Parms, fields);

  return TypeFunc::make(domain, range);
}

const TypeFunc *OptoRuntime::uncommon_trap_Type() {
  // create input type (domain)
  const Type **fields = TypeTuple::fields(1);
  // Symbol* name of class to be loaded
  fields[TypeFunc::Parms+0] = TypeInt::INT;
  const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+1, fields);

  // create result type (range)
  fields = TypeTuple::fields(0);
  const TypeTuple *range = TypeTuple::make(TypeFunc::Parms+0, fields);

  return TypeFunc::make(domain, range);
}

# ifdef ENABLE_ZAP_DEAD_LOCALS
// Type used for stub generation for zap_dead_locals.
// No inputs or outputs
const TypeFunc *OptoRuntime::zap_dead_locals_Type() {
  // create input type (domain)
  const Type **fields = TypeTuple::fields(0);
  const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms,fields);

  // create result type (range)
  fields = TypeTuple::fields(0);
  const TypeTuple *range = TypeTuple::make(TypeFunc::Parms,fields);

  return TypeFunc::make(domain,range);
}
# endif


//-----------------------------------------------------------------------------
// Monitor Handling
const TypeFunc *OptoRuntime::complete_monitor_enter_Type() {
  // create input type (domain)
  const Type **fields = TypeTuple::fields(2);
  fields[TypeFunc::Parms+0] = TypeInstPtr::NOTNULL;  // Object to be Locked
  fields[TypeFunc::Parms+1] = TypeRawPtr::BOTTOM;   // Address of stack location for lock
  const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+2,fields);

  // create result type (range)
  fields = TypeTuple::fields(0);

  const TypeTuple *range = TypeTuple::make(TypeFunc::Parms+0,fields);

  return TypeFunc::make(domain,range);
}


//-----------------------------------------------------------------------------
const TypeFunc *OptoRuntime::complete_monitor_exit_Type() {
  // create input type (domain)
  const Type **fields = TypeTuple::fields(2);
  fields[TypeFunc::Parms+0] = TypeInstPtr::NOTNULL;  // Object to be Locked
  fields[TypeFunc::Parms+1] = TypeRawPtr::BOTTOM;   // Address of stack location for lock
  const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+2,fields);

  // create result type (range)
  fields = TypeTuple::fields(0);

  const TypeTuple *range = TypeTuple::make(TypeFunc::Parms+0,fields);

  return TypeFunc::make(domain,range);
}

const TypeFunc* OptoRuntime::flush_windows_Type() {
  // create input type (domain)
  const Type** fields = TypeTuple::fields(1);
  fields[TypeFunc::Parms+0] = NULL; // void
  const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms, fields);

  // create result type
  fields = TypeTuple::fields(1);
  fields[TypeFunc::Parms+0] = NULL; // void
  const TypeTuple *range = TypeTuple::make(TypeFunc::Parms, fields);

  return TypeFunc::make(domain, range);
}

const TypeFunc* OptoRuntime::l2f_Type() {
  // create input type (domain)
  const Type **fields = TypeTuple::fields(2);
  fields[TypeFunc::Parms+0] = TypeLong::LONG;
  fields[TypeFunc::Parms+1] = Type::HALF;
  const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+2, fields);

  // create result type (range)
  fields = TypeTuple::fields(1);
  fields[TypeFunc::Parms+0] = Type::FLOAT;
  const TypeTuple *range = TypeTuple::make(TypeFunc::Parms+1, fields);

  return TypeFunc::make(domain, range);
}

const TypeFunc* OptoRuntime::modf_Type() {
  const Type **fields = TypeTuple::fields(2);
  fields[TypeFunc::Parms+0] = Type::FLOAT;
  fields[TypeFunc::Parms+1] = Type::FLOAT;
  const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+2, fields);

  // create result type (range)
  fields = TypeTuple::fields(1);
  fields[TypeFunc::Parms+0] = Type::FLOAT;

  const TypeTuple *range = TypeTuple::make(TypeFunc::Parms+1, fields);

  return TypeFunc::make(domain, range);
}

const TypeFunc *OptoRuntime::Math_D_D_Type() {
  // create input type (domain)
  const Type **fields = TypeTuple::fields(2);
  // Symbol* name of class to be loaded
  fields[TypeFunc::Parms+0] = Type::DOUBLE;
  fields[TypeFunc::Parms+1] = Type::HALF;
  const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+2, fields);

  // create result type (range)
  fields = TypeTuple::fields(2);
  fields[TypeFunc::Parms+0] = Type::DOUBLE;
  fields[TypeFunc::Parms+1] = Type::HALF;
  const TypeTuple *range = TypeTuple::make(TypeFunc::Parms+2, fields);

  return TypeFunc::make(domain, range);
}

const TypeFunc* OptoRuntime::Math_DD_D_Type() {
  const Type **fields = TypeTuple::fields(4);
  fields[TypeFunc::Parms+0] = Type::DOUBLE;
  fields[TypeFunc::Parms+1] = Type::HALF;
  fields[TypeFunc::Parms+2] = Type::DOUBLE;
  fields[TypeFunc::Parms+3] = Type::HALF;
  const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+4, fields);

  // create result type (range)
  fields = TypeTuple::fields(2);
  fields[TypeFunc::Parms+0] = Type::DOUBLE;
  fields[TypeFunc::Parms+1] = Type::HALF;
  const TypeTuple *range = TypeTuple::make(TypeFunc::Parms+2, fields);

  return TypeFunc::make(domain, range);
}

//-------------- currentTimeMillis, currentTimeNanos, etc

const TypeFunc* OptoRuntime::void_long_Type() {
  // create input type (domain)
  const Type **fields = TypeTuple::fields(0);
  const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+0, fields);

  // create result type (range)
  fields = TypeTuple::fields(2);
  fields[TypeFunc::Parms+0] = TypeLong::LONG;
  fields[TypeFunc::Parms+1] = Type::HALF;
  const TypeTuple *range = TypeTuple::make(TypeFunc::Parms+2, fields);

  return TypeFunc::make(domain, range);
}

// arraycopy stub variations:
enum ArrayCopyType {
  ac_fast,                      // void(ptr, ptr, size_t)
  ac_checkcast,                 //  int(ptr, ptr, size_t, size_t, ptr)
  ac_slow,                      // void(ptr, int, ptr, int, int)
  ac_generic                    //  int(ptr, int, ptr, int, int)
};

static const TypeFunc* make_arraycopy_Type(ArrayCopyType act) {
  // create input type (domain)
  int num_args      = (act == ac_fast ? 3 : 5);
  int num_size_args = (act == ac_fast ? 1 : act == ac_checkcast ? 2 : 0);
  int argcnt = num_args;
  LP64_ONLY(argcnt += num_size_args); // halfwords for lengths
  const Type** fields = TypeTuple::fields(argcnt);
  int argp = TypeFunc::Parms;
  fields[argp++] = TypePtr::NOTNULL;    // src
  if (num_size_args == 0) {
    fields[argp++] = TypeInt::INT;      // src_pos
  }
  fields[argp++] = TypePtr::NOTNULL;    // dest
  if (num_size_args == 0) {
    fields[argp++] = TypeInt::INT;      // dest_pos
    fields[argp++] = TypeInt::INT;      // length
  }
  while (num_size_args-- > 0) {
    fields[argp++] = TypeX_X;               // size in whatevers (size_t)
    LP64_ONLY(fields[argp++] = Type::HALF); // other half of long length
  }
  if (act == ac_checkcast) {
    fields[argp++] = TypePtr::NOTNULL;  // super_klass
  }
  assert(argp == TypeFunc::Parms+argcnt, "correct decoding of act");
  const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms+argcnt, fields);

  // create result type if needed
  int retcnt = (act == ac_checkcast || act == ac_generic ? 1 : 0);
  fields = TypeTuple::fields(1);
  if (retcnt == 0)
    fields[TypeFunc::Parms+0] = NULL; // void
  else
    fields[TypeFunc::Parms+0] = TypeInt::INT; // status result, if needed
  const TypeTuple* range = TypeTuple::make(TypeFunc::Parms+retcnt, fields);
  return TypeFunc::make(domain, range);
}

const TypeFunc* OptoRuntime::fast_arraycopy_Type() {
  // This signature is simple:  Two base pointers and a size_t.
  return make_arraycopy_Type(ac_fast);
}

const TypeFunc* OptoRuntime::checkcast_arraycopy_Type() {
  // An extension of fast_arraycopy_Type which adds type checking.
  return make_arraycopy_Type(ac_checkcast);
}

const TypeFunc* OptoRuntime::slow_arraycopy_Type() {
  // This signature is exactly the same as System.arraycopy.
  // There are no intptr_t (int/long) arguments.
  return make_arraycopy_Type(ac_slow);
}

const TypeFunc* OptoRuntime::generic_arraycopy_Type() {
  // This signature is like System.arraycopy, except that it returns status.
  return make_arraycopy_Type(ac_generic);
}


const TypeFunc* OptoRuntime::array_fill_Type() {
  const Type** fields;
  int argp = TypeFunc::Parms;
  if (CCallingConventionRequiresIntsAsLongs) {
  // create input type (domain): pointer, int, size_t
    fields = TypeTuple::fields(3 LP64_ONLY( + 2));
    fields[argp++] = TypePtr::NOTNULL;
    fields[argp++] = TypeLong::LONG;
    fields[argp++] = Type::HALF;
  } else {
    // create input type (domain): pointer, int, size_t
    fields = TypeTuple::fields(3 LP64_ONLY( + 1));
    fields[argp++] = TypePtr::NOTNULL;
    fields[argp++] = TypeInt::INT;
  }
  fields[argp++] = TypeX_X;               // size in whatevers (size_t)
  LP64_ONLY(fields[argp++] = Type::HALF); // other half of long length
  const TypeTuple *domain = TypeTuple::make(argp, fields);

  // create result type
  fields = TypeTuple::fields(1);
  fields[TypeFunc::Parms+0] = NULL; // void
  const TypeTuple *range = TypeTuple::make(TypeFunc::Parms, fields);

  return TypeFunc::make(domain, range);
}

// for aescrypt encrypt/decrypt operations, just three pointers returning void (length is constant)
const TypeFunc* OptoRuntime::aescrypt_block_Type() {
  // create input type (domain)
  int num_args      = 3;
  if (Matcher::pass_original_key_for_aes()) {
    num_args = 4;
  }
  int argcnt = num_args;
  const Type** fields = TypeTuple::fields(argcnt);
  int argp = TypeFunc::Parms;
  fields[argp++] = TypePtr::NOTNULL;    // src
  fields[argp++] = TypePtr::NOTNULL;    // dest
  fields[argp++] = TypePtr::NOTNULL;    // k array
  if (Matcher::pass_original_key_for_aes()) {
    fields[argp++] = TypePtr::NOTNULL;    // original k array
  }
  assert(argp == TypeFunc::Parms+argcnt, "correct decoding");
  const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms+argcnt, fields);

  // no result type needed
  fields = TypeTuple::fields(1);
  fields[TypeFunc::Parms+0] = NULL; // void
  const TypeTuple* range = TypeTuple::make(TypeFunc::Parms, fields);
  return TypeFunc::make(domain, range);
}

/**
 * int updateBytesCRC32(int crc, byte* b, int len)
 */
const TypeFunc* OptoRuntime::updateBytesCRC32_Type() {
  // create input type (domain)
  int num_args = 3;
  int argcnt = num_args;
  if (CCallingConventionRequiresIntsAsLongs) {
    argcnt += 2;
  }
  const Type** fields = TypeTuple::fields(argcnt);
  int argp = TypeFunc::Parms;
  if (CCallingConventionRequiresIntsAsLongs) {
    fields[argp++] = TypeLong::LONG;   // crc
    fields[argp++] = Type::HALF;
    fields[argp++] = TypePtr::NOTNULL; // src
    fields[argp++] = TypeLong::LONG;   // len
    fields[argp++] = Type::HALF;
  } else {
    fields[argp++] = TypeInt::INT;     // crc
    fields[argp++] = TypePtr::NOTNULL; // src
    fields[argp++] = TypeInt::INT;     // len
  }
  assert(argp == TypeFunc::Parms+argcnt, "correct decoding");
  const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms+argcnt, fields);

  // result type needed
  fields = TypeTuple::fields(1);
  fields[TypeFunc::Parms+0] = TypeInt::INT; // crc result
  const TypeTuple* range = TypeTuple::make(TypeFunc::Parms+1, fields);
  return TypeFunc::make(domain, range);
}

// for cipherBlockChaining calls of aescrypt encrypt/decrypt, four pointers and a length, returning int
const TypeFunc* OptoRuntime::cipherBlockChaining_aescrypt_Type() {
  // create input type (domain)
  int num_args      = 5;
  if (Matcher::pass_original_key_for_aes()) {
    num_args = 6;
  }
  int argcnt = num_args;
  const Type** fields = TypeTuple::fields(argcnt);
  int argp = TypeFunc::Parms;
  fields[argp++] = TypePtr::NOTNULL;    // src
  fields[argp++] = TypePtr::NOTNULL;    // dest
  fields[argp++] = TypePtr::NOTNULL;    // k array
  fields[argp++] = TypePtr::NOTNULL;    // r array
  fields[argp++] = TypeInt::INT;        // src len
  if (Matcher::pass_original_key_for_aes()) {
    fields[argp++] = TypePtr::NOTNULL;    // original k array
  }
  assert(argp == TypeFunc::Parms+argcnt, "correct decoding");
  const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms+argcnt, fields);

  // returning cipher len (int)
  fields = TypeTuple::fields(1);
  fields[TypeFunc::Parms+0] = TypeInt::INT;
  const TypeTuple* range = TypeTuple::make(TypeFunc::Parms+1, fields);
  return TypeFunc::make(domain, range);
}

/*
 * void implCompress(byte[] buf, int ofs)
 */
const TypeFunc* OptoRuntime::sha_implCompress_Type() {
  // create input type (domain)
  int num_args = 2;
  int argcnt = num_args;
  const Type** fields = TypeTuple::fields(argcnt);
  int argp = TypeFunc::Parms;
  fields[argp++] = TypePtr::NOTNULL; // buf
  fields[argp++] = TypePtr::NOTNULL; // state
  assert(argp == TypeFunc::Parms+argcnt, "correct decoding");
  const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms+argcnt, fields);

  // no result type needed
  fields = TypeTuple::fields(1);
  fields[TypeFunc::Parms+0] = NULL; // void
  const TypeTuple* range = TypeTuple::make(TypeFunc::Parms, fields);
  return TypeFunc::make(domain, range);
}

/*
 * int implCompressMultiBlock(byte[] b, int ofs, int limit)
 */
const TypeFunc* OptoRuntime::digestBase_implCompressMB_Type() {
  // create input type (domain)
  int num_args = 4;
  int argcnt = num_args;
  if(CCallingConventionRequiresIntsAsLongs) {
    argcnt += 2;
  }
  const Type** fields = TypeTuple::fields(argcnt);
  int argp = TypeFunc::Parms;
  if(CCallingConventionRequiresIntsAsLongs) {
    fields[argp++] = TypePtr::NOTNULL; // buf
    fields[argp++] = TypePtr::NOTNULL; // state
    fields[argp++] = TypeLong::LONG;   // ofs
    fields[argp++] = Type::HALF;
    fields[argp++] = TypeLong::LONG;   // limit
    fields[argp++] = Type::HALF;
  } else {
    fields[argp++] = TypePtr::NOTNULL; // buf
    fields[argp++] = TypePtr::NOTNULL; // state
    fields[argp++] = TypeInt::INT;     // ofs
    fields[argp++] = TypeInt::INT;     // limit
  }
  assert(argp == TypeFunc::Parms+argcnt, "correct decoding");
  const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms+argcnt, fields);

  // returning ofs (int)
  fields = TypeTuple::fields(1);
  fields[TypeFunc::Parms+0] = TypeInt::INT; // ofs
  const TypeTuple* range = TypeTuple::make(TypeFunc::Parms+1, fields);
  return TypeFunc::make(domain, range);
}

const TypeFunc* OptoRuntime::multiplyToLen_Type() {
  // create input type (domain)
  int num_args      = 6;
  int argcnt = num_args;
  const Type** fields = TypeTuple::fields(argcnt);
  int argp = TypeFunc::Parms;
  fields[argp++] = TypePtr::NOTNULL;    // x
  fields[argp++] = TypeInt::INT;        // xlen
  fields[argp++] = TypePtr::NOTNULL;    // y
  fields[argp++] = TypeInt::INT;        // ylen
  fields[argp++] = TypePtr::NOTNULL;    // z
  fields[argp++] = TypeInt::INT;        // zlen
  assert(argp == TypeFunc::Parms+argcnt, "correct decoding");
  const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms+argcnt, fields);

  // no result type needed
  fields = TypeTuple::fields(1);
  fields[TypeFunc::Parms+0] = NULL;
  const TypeTuple* range = TypeTuple::make(TypeFunc::Parms, fields);
  return TypeFunc::make(domain, range);
}

const TypeFunc* OptoRuntime::squareToLen_Type() {
  // create input type (domain)
  int num_args      = 4;
  int argcnt = num_args;
  const Type** fields = TypeTuple::fields(argcnt);
  int argp = TypeFunc::Parms;
  fields[argp++] = TypePtr::NOTNULL;    // x
  fields[argp++] = TypeInt::INT;        // len
  fields[argp++] = TypePtr::NOTNULL;    // z
  fields[argp++] = TypeInt::INT;        // zlen
  assert(argp == TypeFunc::Parms+argcnt, "correct decoding");
  const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms+argcnt, fields);

  // no result type needed
  fields = TypeTuple::fields(1);
  fields[TypeFunc::Parms+0] = NULL;
  const TypeTuple* range = TypeTuple::make(TypeFunc::Parms, fields);
  return TypeFunc::make(domain, range);
}

// for mulAdd calls, 2 pointers and 3 ints, returning int
const TypeFunc* OptoRuntime::mulAdd_Type() {
  // create input type (domain)
  int num_args      = 5;
  int argcnt = num_args;
  const Type** fields = TypeTuple::fields(argcnt);
  int argp = TypeFunc::Parms;
  fields[argp++] = TypePtr::NOTNULL;    // out
  fields[argp++] = TypePtr::NOTNULL;    // in
  fields[argp++] = TypeInt::INT;        // offset
  fields[argp++] = TypeInt::INT;        // len
  fields[argp++] = TypeInt::INT;        // k
  assert(argp == TypeFunc::Parms+argcnt, "correct decoding");
  const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms+argcnt, fields);

  // returning carry (int)
  fields = TypeTuple::fields(1);
  fields[TypeFunc::Parms+0] = TypeInt::INT;
  const TypeTuple* range = TypeTuple::make(TypeFunc::Parms+1, fields);
  return TypeFunc::make(domain, range);
}

const TypeFunc* OptoRuntime::montgomeryMultiply_Type() {
  // create input type (domain)
  int num_args      = 7;
  int argcnt = num_args;
  if (CCallingConventionRequiresIntsAsLongs) {
    argcnt++;                           // additional placeholder
  }
  const Type** fields = TypeTuple::fields(argcnt);
  int argp = TypeFunc::Parms;
  fields[argp++] = TypePtr::NOTNULL;    // a
  fields[argp++] = TypePtr::NOTNULL;    // b
  fields[argp++] = TypePtr::NOTNULL;    // n
  if (CCallingConventionRequiresIntsAsLongs) {
    fields[argp++] = TypeLong::LONG;    // len
    fields[argp++] = TypeLong::HALF;    // placeholder
  } else {
    fields[argp++] = TypeInt::INT;      // len
  }
  fields[argp++] = TypeLong::LONG;      // inv
  fields[argp++] = Type::HALF;
  fields[argp++] = TypePtr::NOTNULL;    // result
  assert(argp == TypeFunc::Parms+argcnt, "correct decoding");
  const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms+argcnt, fields);

  // result type needed
  fields = TypeTuple::fields(1);
  fields[TypeFunc::Parms+0] = TypePtr::NOTNULL;

  const TypeTuple* range = TypeTuple::make(TypeFunc::Parms, fields);
  return TypeFunc::make(domain, range);
}

const TypeFunc* OptoRuntime::montgomerySquare_Type() {
  // create input type (domain)
  int num_args      = 6;
  int argcnt = num_args;
  if (CCallingConventionRequiresIntsAsLongs) {
    argcnt++;                           // additional placeholder
  }
  const Type** fields = TypeTuple::fields(argcnt);
  int argp = TypeFunc::Parms;
  fields[argp++] = TypePtr::NOTNULL;    // a
  fields[argp++] = TypePtr::NOTNULL;    // n
  if (CCallingConventionRequiresIntsAsLongs) {
    fields[argp++] = TypeLong::LONG;    // len
    fields[argp++] = TypeLong::HALF;    // placeholder
  } else {
    fields[argp++] = TypeInt::INT;      // len
  }
  fields[argp++] = TypeLong::LONG;      // inv
  fields[argp++] = Type::HALF;
  fields[argp++] = TypePtr::NOTNULL;    // result
  assert(argp == TypeFunc::Parms+argcnt, "correct decoding");
  const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms+argcnt, fields);

  // result type needed
  fields = TypeTuple::fields(1);
  fields[TypeFunc::Parms+0] = TypePtr::NOTNULL;

  const TypeTuple* range = TypeTuple::make(TypeFunc::Parms, fields);
  return TypeFunc::make(domain, range);
}


//------------- Interpreter state access for on stack replacement
const TypeFunc* OptoRuntime::osr_end_Type() {
  // create input type (domain)
  const Type **fields = TypeTuple::fields(1);
  fields[TypeFunc::Parms+0] = TypeRawPtr::BOTTOM; // OSR temp buf
  const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+1, fields);

  // create result type
  fields = TypeTuple::fields(1);
  // fields[TypeFunc::Parms+0] = TypeInstPtr::NOTNULL; // locked oop
  fields[TypeFunc::Parms+0] = NULL; // void
  const TypeTuple *range = TypeTuple::make(TypeFunc::Parms, fields);
  return TypeFunc::make(domain, range);
}

//-------------- methodData update helpers

const TypeFunc* OptoRuntime::profile_receiver_type_Type() {
  // create input type (domain)
  const Type **fields = TypeTuple::fields(2);
  fields[TypeFunc::Parms+0] = TypeAryPtr::NOTNULL;    // methodData pointer
  fields[TypeFunc::Parms+1] = TypeInstPtr::BOTTOM;    // receiver oop
  const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+2, fields);

  // create result type
  fields = TypeTuple::fields(1);
  fields[TypeFunc::Parms+0] = NULL; // void
  const TypeTuple *range = TypeTuple::make(TypeFunc::Parms, fields);
  return TypeFunc::make(domain,range);
}

JRT_LEAF(void, OptoRuntime::profile_receiver_type_C(DataLayout* data, oopDesc* receiver))
  if (receiver == NULL) return;
  Klass* receiver_klass = receiver->klass();

  intptr_t* mdp = ((intptr_t*)(data)) + DataLayout::header_size_in_cells();
  int empty_row = -1;           // free row, if any is encountered

  // ReceiverTypeData* vc = new ReceiverTypeData(mdp);
  for (uint row = 0; row < ReceiverTypeData::row_limit(); row++) {
    // if (vc->receiver(row) == receiver_klass)
    int receiver_off = ReceiverTypeData::receiver_cell_index(row);
    intptr_t row_recv = *(mdp + receiver_off);
    if (row_recv == (intptr_t) receiver_klass) {
      // vc->set_receiver_count(row, vc->receiver_count(row) + DataLayout::counter_increment);
      int count_off = ReceiverTypeData::receiver_count_cell_index(row);
      *(mdp + count_off) += DataLayout::counter_increment;
      return;
    } else if (row_recv == 0) {
      // else if (vc->receiver(row) == NULL)
      empty_row = (int) row;
    }
  }

  if (empty_row != -1) {
    int receiver_off = ReceiverTypeData::receiver_cell_index(empty_row);
    // vc->set_receiver(empty_row, receiver_klass);
    *(mdp + receiver_off) = (intptr_t) receiver_klass;
    // vc->set_receiver_count(empty_row, DataLayout::counter_increment);
    int count_off = ReceiverTypeData::receiver_count_cell_index(empty_row);
    *(mdp + count_off) = DataLayout::counter_increment;
  } else {
    // Receiver did not match any saved receiver and there is no empty row for it.
    // Increment total counter to indicate polymorphic case.
    intptr_t* count_p = (intptr_t*)(((byte*)(data)) + in_bytes(CounterData::count_offset()));
    *count_p += DataLayout::counter_increment;
  }
JRT_END

//-------------------------------------------------------------------------------------
// register policy

bool OptoRuntime::is_callee_saved_register(MachRegisterNumbers reg) {
  assert(reg >= 0 && reg < _last_Mach_Reg, "must be a machine register");
  switch (register_save_policy[reg]) {
    case 'C': return false; //SOC
    case 'E': return true ; //SOE
    case 'N': return false; //NS
    case 'A': return false; //AS
  }
  ShouldNotReachHere();
  return false;
}

//-----------------------------------------------------------------------
// Exceptions
//

static void trace_exception(oop exception_oop, address exception_pc, const char* msg) PRODUCT_RETURN;

// The method is an entry that is always called by a C++ method not
// directly from compiled code. Compiled code will call the C++ method following.
// We can't allow async exception to be installed during  exception processing.
JRT_ENTRY_NO_ASYNC(address, OptoRuntime::handle_exception_C_helper(JavaThread* thread, nmethod* &nm))

  // Do not confuse exception_oop with pending_exception. The exception_oop
  // is only used to pass arguments into the method. Not for general
  // exception handling.  DO NOT CHANGE IT to use pending_exception, since
  // the runtime stubs checks this on exit.
  assert(thread->exception_oop() != NULL, "exception oop is found");
  address handler_address = NULL;

  Handle exception(thread, thread->exception_oop());
  address pc = thread->exception_pc();

  // Clear out the exception oop and pc since looking up an
  // exception handler can cause class loading, which might throw an
  // exception and those fields are expected to be clear during
  // normal bytecode execution.
  thread->clear_exception_oop_and_pc();

  if (TraceExceptions) {
    trace_exception(exception(), pc, "");
  }

  // for AbortVMOnException flag
  NOT_PRODUCT(Exceptions::debug_check_abort(exception));

#ifdef ASSERT
  if (!(exception->is_a(SystemDictionary::Throwable_klass()))) {
    // should throw an exception here
    ShouldNotReachHere();
  }
#endif

  // new exception handling: this method is entered only from adapters
  // exceptions from compiled java methods are handled in compiled code
  // using rethrow node

  nm = CodeCache::find_nmethod(pc);
  assert(nm != NULL, "No NMethod found");
  if (nm->is_native_method()) {
    fatal("Native method should not have path to exception handling");
  } else {
    // we are switching to old paradigm: search for exception handler in caller_frame
    // instead in exception handler of caller_frame.sender()

    if (JvmtiExport::can_post_on_exceptions()) {
      // "Full-speed catching" is not necessary here,
      // since we're notifying the VM on every catch.
      // Force deoptimization and the rest of the lookup
      // will be fine.
      deoptimize_caller_frame(thread);
    }

    // Check the stack guard pages.  If enabled, look for handler in this frame;
    // otherwise, forcibly unwind the frame.
    //
    // 4826555: use default current sp for reguard_stack instead of &nm: it's more accurate.
    bool force_unwind = !thread->reguard_stack();
    bool deopting = false;
    if (nm->is_deopt_pc(pc)) {
      deopting = true;
      RegisterMap map(thread, false);
      frame deoptee = thread->last_frame().sender(&map);
      assert(deoptee.is_deoptimized_frame(), "must be deopted");
      // Adjust the pc back to the original throwing pc
      pc = deoptee.pc();
    }

    // If we are forcing an unwind because of stack overflow then deopt is
    // irrelevant since we are throwing the frame away anyway.

    if (deopting && !force_unwind) {
      handler_address = SharedRuntime::deopt_blob()->unpack_with_exception();
    } else {

      handler_address =
        force_unwind ? NULL : nm->handler_for_exception_and_pc(exception, pc);

      if (handler_address == NULL) {
        bool recursive_exception = false;
        handler_address = SharedRuntime::compute_compiled_exc_handler(nm, pc, exception, force_unwind, true, recursive_exception);
        assert (handler_address != NULL, "must have compiled handler");
        // Update the exception cache only when the unwind was not forced
        // and there didn't happen another exception during the computation of the
        // compiled exception handler. Checking for exception oop equality is not
        // sufficient because some exceptions are pre-allocated and reused.
        if (!force_unwind && !recursive_exception) {
          nm->add_handler_for_exception_and_pc(exception,pc,handler_address);
        }
      } else {
#ifdef ASSERT
        bool recursive_exception = false;
        address computed_address = SharedRuntime::compute_compiled_exc_handler(nm, pc, exception, force_unwind, true, recursive_exception);
        assert(recursive_exception || (handler_address == computed_address), err_msg("Handler address inconsistency: " PTR_FORMAT " != " PTR_FORMAT,
                 p2i(handler_address), p2i(computed_address)));
#endif
      }
    }

    thread->set_exception_pc(pc);
    thread->set_exception_handler_pc(handler_address);

    // Check if the exception PC is a MethodHandle call site.
    thread->set_is_method_handle_return(nm->is_method_handle_return(pc));
  }

  // Restore correct return pc.  Was saved above.
  thread->set_exception_oop(exception());
  return handler_address;

JRT_END

// We are entering here from exception_blob
// If there is a compiled exception handler in this method, we will continue there;
// otherwise we will unwind the stack and continue at the caller of top frame method
// Note we enter without the usual JRT wrapper. We will call a helper routine that
// will do the normal VM entry. We do it this way so that we can see if the nmethod
// we looked up the handler for has been deoptimized in the meantime. If it has been
// we must not use the handler and instead return the deopt blob.
address OptoRuntime::handle_exception_C(JavaThread* thread) {
//
// We are in Java not VM and in debug mode we have a NoHandleMark
//
#ifndef PRODUCT
  SharedRuntime::_find_handler_ctr++;          // find exception handler
#endif
  debug_only(NoHandleMark __hm;)
  nmethod* nm = NULL;
  address handler_address = NULL;
  {
    // Enter the VM

    ResetNoHandleMark rnhm;
    handler_address = handle_exception_C_helper(thread, nm);
  }

  // Back in java: Use no oops, DON'T safepoint

  // Now check to see if the handler we are returning is in a now
  // deoptimized frame

  if (nm != NULL) {
    RegisterMap map(thread, false);
    frame caller = thread->last_frame().sender(&map);
#ifdef ASSERT
    assert(caller.is_compiled_frame(), "must be");
#endif // ASSERT
    if (caller.is_deoptimized_frame()) {
      handler_address = SharedRuntime::deopt_blob()->unpack_with_exception();
    }
  }
  return handler_address;
}

//------------------------------rethrow----------------------------------------
// We get here after compiled code has executed a 'RethrowNode'.  The callee
// is either throwing or rethrowing an exception.  The callee-save registers
// have been restored, synchronized objects have been unlocked and the callee
// stack frame has been removed.  The return address was passed in.
// Exception oop is passed as the 1st argument.  This routine is then called
// from the stub.  On exit, we know where to jump in the caller's code.
// After this C code exits, the stub will pop his frame and end in a jump
// (instead of a return).  We enter the caller's default handler.
//
// This must be JRT_LEAF:
//     - caller will not change its state as we cannot block on exit,
//       therefore raw_exception_handler_for_return_address is all it takes
//       to handle deoptimized blobs
//
// However, there needs to be a safepoint check in the middle!  So compiled
// safepoints are completely watertight.
//
// Thus, it cannot be a leaf since it contains the No_GC_Verifier.
//
// *THIS IS NOT RECOMMENDED PROGRAMMING STYLE*
//
address OptoRuntime::rethrow_C(oopDesc* exception, JavaThread* thread, address ret_pc) {
#ifndef PRODUCT
  SharedRuntime::_rethrow_ctr++;               // count rethrows
#endif
  assert (exception != NULL, "should have thrown a NULLPointerException");
#ifdef ASSERT
  if (!(exception->is_a(SystemDictionary::Throwable_klass()))) {
    // should throw an exception here
    ShouldNotReachHere();
  }
#endif

  thread->set_vm_result(exception);
  // Frame not compiled (handles deoptimization blob)
  return SharedRuntime::raw_exception_handler_for_return_address(thread, ret_pc);
}


const TypeFunc *OptoRuntime::rethrow_Type() {
  // create input type (domain)
  const Type **fields = TypeTuple::fields(1);
  fields[TypeFunc::Parms+0] = TypeInstPtr::NOTNULL; // Exception oop
  const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+1,fields);

  // create result type (range)
  fields = TypeTuple::fields(1);
  fields[TypeFunc::Parms+0] = TypeInstPtr::NOTNULL; // Exception oop
  const TypeTuple *range = TypeTuple::make(TypeFunc::Parms+1, fields);

  return TypeFunc::make(domain, range);
}


void OptoRuntime::deoptimize_caller_frame(JavaThread *thread, bool doit) {
  // Deoptimize the caller before continuing, as the compiled
  // exception handler table may not be valid.
  if (!StressCompiledExceptionHandlers && doit) {
    deoptimize_caller_frame(thread);
  }
}

void OptoRuntime::deoptimize_caller_frame(JavaThread *thread) {
  // Called from within the owner thread, so no need for safepoint
  RegisterMap reg_map(thread);
  frame stub_frame = thread->last_frame();
  assert(stub_frame.is_runtime_frame() || exception_blob()->contains(stub_frame.pc()), "sanity check");
  frame caller_frame = stub_frame.sender(&reg_map);

  // Deoptimize the caller frame.
  Deoptimization::deoptimize_frame(thread, caller_frame.id());
}


bool OptoRuntime::is_deoptimized_caller_frame(JavaThread *thread) {
  // Called from within the owner thread, so no need for safepoint
  RegisterMap reg_map(thread);
  frame stub_frame = thread->last_frame();
  assert(stub_frame.is_runtime_frame() || exception_blob()->contains(stub_frame.pc()), "sanity check");
  frame caller_frame = stub_frame.sender(&reg_map);
  return caller_frame.is_deoptimized_frame();
}


const TypeFunc *OptoRuntime::register_finalizer_Type() {
  // create input type (domain)
  const Type **fields = TypeTuple::fields(1);
  fields[TypeFunc::Parms+0] = TypeInstPtr::NOTNULL;  // oop;          Receiver
  // // The JavaThread* is passed to each routine as the last argument
  // fields[TypeFunc::Parms+1] = TypeRawPtr::NOTNULL;  // JavaThread *; Executing thread
  const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+1,fields);

  // create result type (range)
  fields = TypeTuple::fields(0);

  const TypeTuple *range = TypeTuple::make(TypeFunc::Parms+0,fields);

  return TypeFunc::make(domain,range);
}


//-----------------------------------------------------------------------------
// Dtrace support.  entry and exit probes have the same signature
const TypeFunc *OptoRuntime::dtrace_method_entry_exit_Type() {
  // create input type (domain)
  const Type **fields = TypeTuple::fields(2);
  fields[TypeFunc::Parms+0] = TypeRawPtr::BOTTOM; // Thread-local storage
  fields[TypeFunc::Parms+1] = TypeMetadataPtr::BOTTOM;  // Method*;    Method we are entering
  const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+2,fields);

  // create result type (range)
  fields = TypeTuple::fields(0);

  const TypeTuple *range = TypeTuple::make(TypeFunc::Parms+0,fields);

  return TypeFunc::make(domain,range);
}

const TypeFunc *OptoRuntime::dtrace_object_alloc_Type() {
  // create input type (domain)
  const Type **fields = TypeTuple::fields(2);
  fields[TypeFunc::Parms+0] = TypeRawPtr::BOTTOM; // Thread-local storage
  fields[TypeFunc::Parms+1] = TypeInstPtr::NOTNULL;  // oop;    newly allocated object

  const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+2,fields);

  // create result type (range)
  fields = TypeTuple::fields(0);

  const TypeTuple *range = TypeTuple::make(TypeFunc::Parms+0,fields);

  return TypeFunc::make(domain,range);
}


JRT_ENTRY_NO_ASYNC(void, OptoRuntime::register_finalizer(oopDesc* obj, JavaThread* thread))
  assert(obj->is_oop(), "must be a valid oop");
  assert(obj->klass()->has_finalizer(), "shouldn't be here otherwise");
  InstanceKlass::register_finalizer(instanceOop(obj), CHECK);
JRT_END

//-----------------------------------------------------------------------------

NamedCounter * volatile OptoRuntime::_named_counters = NULL;

//
// dump the collected NamedCounters.
//
void OptoRuntime::print_named_counters() {
  int total_lock_count = 0;
  int eliminated_lock_count = 0;

  NamedCounter* c = _named_counters;
  while (c) {
    if (c->tag() == NamedCounter::LockCounter || c->tag() == NamedCounter::EliminatedLockCounter) {
      int count = c->count();
      if (count > 0) {
        bool eliminated = c->tag() == NamedCounter::EliminatedLockCounter;
        if (Verbose) {
          tty->print_cr("%d %s%s", count, c->name(), eliminated ? " (eliminated)" : "");
        }
        total_lock_count += count;
        if (eliminated) {
          eliminated_lock_count += count;
        }
      }
    } else if (c->tag() == NamedCounter::BiasedLockingCounter) {
      BiasedLockingCounters* blc = ((BiasedLockingNamedCounter*)c)->counters();
      if (blc->nonzero()) {
        tty->print_cr("%s", c->name());
        blc->print_on(tty);
      }
#if INCLUDE_RTM_OPT
    } else if (c->tag() == NamedCounter::RTMLockingCounter) {
      RTMLockingCounters* rlc = ((RTMLockingNamedCounter*)c)->counters();
      if (rlc->nonzero()) {
        tty->print_cr("%s", c->name());
        rlc->print_on(tty);
      }
#endif
    }
    c = c->next();
  }
  if (total_lock_count > 0) {
    tty->print_cr("dynamic locks: %d", total_lock_count);
    if (eliminated_lock_count) {
      tty->print_cr("eliminated locks: %d (%d%%)", eliminated_lock_count,
                    (int)(eliminated_lock_count * 100.0 / total_lock_count));
    }
  }
}

//
//  Allocate a new NamedCounter.  The JVMState is used to generate the
//  name which consists of method@line for the inlining tree.
//

NamedCounter* OptoRuntime::new_named_counter(JVMState* youngest_jvms, NamedCounter::CounterTag tag) {
  int max_depth = youngest_jvms->depth();

  // Visit scopes from youngest to oldest.
  bool first = true;
  stringStream st;
  for (int depth = max_depth; depth >= 1; depth--) {
    JVMState* jvms = youngest_jvms->of_depth(depth);
    ciMethod* m = jvms->has_method() ? jvms->method() : NULL;
    if (!first) {
      st.print(" ");
    } else {
      first = false;
    }
    int bci = jvms->bci();
    if (bci < 0) bci = 0;
    st.print("%s.%s@%d", m->holder()->name()->as_utf8(), m->name()->as_utf8(), bci);
    // To print linenumbers instead of bci use: m->line_number_from_bci(bci)
  }
  NamedCounter* c;
  if (tag == NamedCounter::BiasedLockingCounter) {
    c = new BiasedLockingNamedCounter(strdup(st.as_string()));
  } else if (tag == NamedCounter::RTMLockingCounter) {
    c = new RTMLockingNamedCounter(strdup(st.as_string()));
  } else {
    c = new NamedCounter(strdup(st.as_string()), tag);
  }

  // atomically add the new counter to the head of the list.  We only
  // add counters so this is safe.
  NamedCounter* head;
  do {
    c->set_next(NULL);
    head = _named_counters;
    c->set_next(head);
  } while (Atomic::cmpxchg_ptr(c, &_named_counters, head) != head);
  return c;
}

//-----------------------------------------------------------------------------
// Non-product code
#ifndef PRODUCT

int trace_exception_counter = 0;
static void trace_exception(oop exception_oop, address exception_pc, const char* msg) {
  ttyLocker ttyl;
  trace_exception_counter++;
  tty->print("%d [Exception (%s): ", trace_exception_counter, msg);
  exception_oop->print_value();
  tty->print(" in ");
  CodeBlob* blob = CodeCache::find_blob(exception_pc);
  if (blob->is_nmethod()) {
    nmethod* nm = blob->as_nmethod_or_null();
    nm->method()->print_value();
  } else if (blob->is_runtime_stub()) {
    tty->print("<runtime-stub>");
  } else {
    tty->print("<unknown>");
  }
  tty->print(" at " INTPTR_FORMAT,  p2i(exception_pc));
  tty->print_cr("]");
}

#endif  // PRODUCT


# ifdef ENABLE_ZAP_DEAD_LOCALS
// Called from call sites in compiled code with oop maps (actually safepoints)
// Zaps dead locals in first java frame.
// Is entry because may need to lock to generate oop maps
// Currently, only used for compiler frames, but someday may be used
// for interpreter frames, too.

int OptoRuntime::ZapDeadCompiledLocals_count = 0;

// avoid pointers to member funcs with these helpers
static bool is_java_frame(  frame* f) { return f->is_java_frame();   }
static bool is_native_frame(frame* f) { return f->is_native_frame(); }


void OptoRuntime::zap_dead_java_or_native_locals(JavaThread* thread,
                                                bool (*is_this_the_right_frame_to_zap)(frame*)) {
  assert(JavaThread::current() == thread, "is this needed?");

  if ( !ZapDeadCompiledLocals )  return;

  bool skip = false;

       if ( ZapDeadCompiledLocalsFirst  ==  0  ) ; // nothing special
  else if ( ZapDeadCompiledLocalsFirst  >  ZapDeadCompiledLocals_count )  skip = true;
  else if ( ZapDeadCompiledLocalsFirst  == ZapDeadCompiledLocals_count )
    warning("starting zapping after skipping");

       if ( ZapDeadCompiledLocalsLast  ==  -1  ) ; // nothing special
  else if ( ZapDeadCompiledLocalsLast  <   ZapDeadCompiledLocals_count )  skip = true;
  else if ( ZapDeadCompiledLocalsLast  ==  ZapDeadCompiledLocals_count )
    warning("about to zap last zap");

  ++ZapDeadCompiledLocals_count; // counts skipped zaps, too

  if ( skip )  return;

  // find java frame and zap it

  for (StackFrameStream sfs(thread);  !sfs.is_done();  sfs.next()) {
    if (is_this_the_right_frame_to_zap(sfs.current()) ) {
      sfs.current()->zap_dead_locals(thread, sfs.register_map());
      return;
    }
  }
  warning("no frame found to zap in zap_dead_Java_locals_C");
}

JRT_LEAF(void, OptoRuntime::zap_dead_Java_locals_C(JavaThread* thread))
  zap_dead_java_or_native_locals(thread, is_java_frame);
JRT_END

// The following does not work because for one thing, the
// thread state is wrong; it expects java, but it is native.
// Also, the invariants in a native stub are different and
// I'm not sure it is safe to have a MachCalRuntimeDirectNode
// in there.
// So for now, we do not zap in native stubs.

JRT_LEAF(void, OptoRuntime::zap_dead_native_locals_C(JavaThread* thread))
  zap_dead_java_or_native_locals(thread, is_native_frame);
JRT_END

# endif
C:\hotspot-69087d08d473\src\share\vm/opto/runtime.hpp
/*
 * Copyright (c) 1998, 2014, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 *
 */

#ifndef SHARE_VM_OPTO_RUNTIME_HPP
#define SHARE_VM_OPTO_RUNTIME_HPP

#include "code/codeBlob.hpp"
#include "opto/machnode.hpp"
#include "opto/type.hpp"
#include "runtime/biasedLocking.hpp"
#include "runtime/rtmLocking.hpp"
#include "runtime/deoptimization.hpp"
#include "runtime/vframe.hpp"

//------------------------------OptoRuntime------------------------------------
// Opto compiler runtime routines
//
// These are all generated from Ideal graphs.  They are called with the
// Java calling convention.  Internally they call C++.  They are made once at
// startup time and Opto compiles calls to them later.
// Things are broken up into quads: the signature they will be called with,
// the address of the generated code, the corresponding C++ code and an
// nmethod.

// The signature (returned by "xxx_Type()") is used at startup time by the
// Generator to make the generated code "xxx_Java".  Opto compiles calls
// to the generated code "xxx_Java".  When the compiled code gets executed,
// it calls the C++ code "xxx_C".  The generated nmethod is saved in the
// CodeCache.  Exception handlers use the nmethod to get the callee-save
// register OopMaps.
class CallInfo;

//
// NamedCounters are tagged counters which can be used for profiling
// code in various ways.  Currently they are used by the lock coarsening code
//

class NamedCounter : public CHeapObj<mtCompiler> {
public:
    enum CounterTag {
    NoTag,
    LockCounter,
    EliminatedLockCounter,
    BiasedLockingCounter,
    RTMLockingCounter
  };

private:
  const char *  _name;
  int           _count;
  CounterTag    _tag;
  NamedCounter* _next;

 public:
  NamedCounter(const char *n, CounterTag tag = NoTag):
    _name(n),
    _count(0),
    _next(NULL),
    _tag(tag) {}

  const char * name() const     { return _name; }
  int count() const             { return _count; }
  address addr()                { return (address)&_count; }
  CounterTag tag() const        { return _tag; }
  void set_tag(CounterTag tag)  { _tag = tag; }

  NamedCounter* next() const    { return _next; }
  void set_next(NamedCounter* next) {
    assert(_next == NULL || next == NULL, "already set");
    _next = next;
  }

};

class BiasedLockingNamedCounter : public NamedCounter {
 private:
  BiasedLockingCounters _counters;

 public:
  BiasedLockingNamedCounter(const char *n) :
    NamedCounter(n, BiasedLockingCounter), _counters() {}

  BiasedLockingCounters* counters() { return &_counters; }
};


class RTMLockingNamedCounter : public NamedCounter {
 private:
 RTMLockingCounters _counters;

 public:
  RTMLockingNamedCounter(const char *n) :
    NamedCounter(n, RTMLockingCounter), _counters() {}

  RTMLockingCounters* counters() { return &_counters; }
};

typedef const TypeFunc*(*TypeFunc_generator)();

class OptoRuntime : public AllStatic {
  friend class Matcher;  // allow access to stub names

 private:
  // define stubs
  static address generate_stub(ciEnv* ci_env, TypeFunc_generator gen, address C_function, const char *name, int is_fancy_jump, bool pass_tls, bool save_arguments, bool return_pc);

  // References to generated stubs
  static address _new_instance_Java;
  static address _new_array_Java;
  static address _new_array_nozero_Java;
  static address _multianewarray2_Java;
  static address _multianewarray3_Java;
  static address _multianewarray4_Java;
  static address _multianewarray5_Java;
  static address _multianewarrayN_Java;
  static address _g1_wb_pre_Java;
  static address _g1_wb_post_Java;
  static address _vtable_must_compile_Java;
  static address _complete_monitor_locking_Java;
  static address _rethrow_Java;

  static address _slow_arraycopy_Java;
  static address _register_finalizer_Java;

# ifdef ENABLE_ZAP_DEAD_LOCALS
  static address _zap_dead_Java_locals_Java;
  static address _zap_dead_native_locals_Java;
# endif


  //
  // Implementation of runtime methods
  // =================================

  // Allocate storage for a Java instance.
  static void new_instance_C(Klass* instance_klass, JavaThread *thread);

  // Allocate storage for a objArray or typeArray
  static void new_array_C(Klass* array_klass, int len, JavaThread *thread);
  static void new_array_nozero_C(Klass* array_klass, int len, JavaThread *thread);

  // Post-slow-path-allocation, pre-initializing-stores step for
  // implementing ReduceInitialCardMarks
  static void new_store_pre_barrier(JavaThread* thread);

  // Allocate storage for a multi-dimensional arrays
  // Note: needs to be fixed for arbitrary number of dimensions
  static void multianewarray2_C(Klass* klass, int len1, int len2, JavaThread *thread);
  static void multianewarray3_C(Klass* klass, int len1, int len2, int len3, JavaThread *thread);
  static void multianewarray4_C(Klass* klass, int len1, int len2, int len3, int len4, JavaThread *thread);
  static void multianewarray5_C(Klass* klass, int len1, int len2, int len3, int len4, int len5, JavaThread *thread);
  static void multianewarrayN_C(Klass* klass, arrayOopDesc* dims, JavaThread *thread);
  static void g1_wb_pre_C(oopDesc* orig, JavaThread* thread);
  static void g1_wb_post_C(void* card_addr, JavaThread* thread);

public:
  // Slow-path Locking and Unlocking
  static void complete_monitor_locking_C(oopDesc* obj, BasicLock* lock, JavaThread* thread);
  static void complete_monitor_unlocking_C(oopDesc* obj, BasicLock* lock);

private:

  // Implicit exception support
  static void throw_null_exception_C(JavaThread* thread);

  // Exception handling
  static address handle_exception_C       (JavaThread* thread);
  static address handle_exception_C_helper(JavaThread* thread, nmethod*& nm);
  static address rethrow_C                (oopDesc* exception, JavaThread *thread, address return_pc );
  static void deoptimize_caller_frame     (JavaThread *thread);
  static void deoptimize_caller_frame     (JavaThread *thread, bool doit);
  static bool is_deoptimized_caller_frame (JavaThread *thread);

  // CodeBlob support
  // ===================================================================

  static ExceptionBlob*       _exception_blob;
  static void generate_exception_blob();

  static void register_finalizer(oopDesc* obj, JavaThread* thread);

  // zaping dead locals, either from Java frames or from native frames
# ifdef ENABLE_ZAP_DEAD_LOCALS
  static void zap_dead_Java_locals_C(   JavaThread* thread);
  static void zap_dead_native_locals_C( JavaThread* thread);

  static void zap_dead_java_or_native_locals( JavaThread*, bool (*)(frame*));

 public:
   static int ZapDeadCompiledLocals_count;

# endif


 public:

  static bool is_callee_saved_register(MachRegisterNumbers reg);

  // One time only generate runtime code stubs. Returns true
  // when runtime stubs have been generated successfully and
  // false otherwise.
  static bool generate(ciEnv* env);

  // Returns the name of a stub
  static const char* stub_name(address entry);

  // access to runtime stubs entry points for java code
  static address new_instance_Java()                     { return _new_instance_Java; }
  static address new_array_Java()                        { return _new_array_Java; }
  static address new_array_nozero_Java()                 { return _new_array_nozero_Java; }
  static address multianewarray2_Java()                  { return _multianewarray2_Java; }
  static address multianewarray3_Java()                  { return _multianewarray3_Java; }
  static address multianewarray4_Java()                  { return _multianewarray4_Java; }
  static address multianewarray5_Java()                  { return _multianewarray5_Java; }
  static address multianewarrayN_Java()                  { return _multianewarrayN_Java; }
  static address g1_wb_pre_Java()                        { return _g1_wb_pre_Java; }
  static address g1_wb_post_Java()                       { return _g1_wb_post_Java; }
  static address vtable_must_compile_stub()              { return _vtable_must_compile_Java; }
  static address complete_monitor_locking_Java()         { return _complete_monitor_locking_Java;   }

  static address slow_arraycopy_Java()                   { return _slow_arraycopy_Java; }
  static address register_finalizer_Java()               { return _register_finalizer_Java; }


# ifdef ENABLE_ZAP_DEAD_LOCALS
  static address zap_dead_locals_stub(bool is_native)    { return is_native
                                                                  ? _zap_dead_native_locals_Java
                                                                  : _zap_dead_Java_locals_Java; }
  static MachNode* node_to_call_zap_dead_locals(Node* n, int block_num, bool is_native);
# endif

  static ExceptionBlob*    exception_blob()                      { return _exception_blob; }

  // Leaf routines helping with method data update
  static void profile_receiver_type_C(DataLayout* data, oopDesc* receiver);

  // Implicit exception support
  static void throw_div0_exception_C      (JavaThread* thread);
  static void throw_stack_overflow_error_C(JavaThread* thread);

  // Exception handling
  static address rethrow_stub()             { return _rethrow_Java; }


  // Type functions
  // ======================================================

  static const TypeFunc* new_instance_Type(); // object allocation (slow case)
  static const TypeFunc* new_array_Type ();   // [a]newarray (slow case)
  static const TypeFunc* multianewarray_Type(int ndim); // multianewarray
  static const TypeFunc* multianewarray2_Type(); // multianewarray
  static const TypeFunc* multianewarray3_Type(); // multianewarray
  static const TypeFunc* multianewarray4_Type(); // multianewarray
  static const TypeFunc* multianewarray5_Type(); // multianewarray
  static const TypeFunc* multianewarrayN_Type(); // multianewarray
  static const TypeFunc* g1_wb_pre_Type();
  static const TypeFunc* g1_wb_post_Type();
  static const TypeFunc* complete_monitor_enter_Type();
  static const TypeFunc* complete_monitor_exit_Type();
  static const TypeFunc* uncommon_trap_Type();
  static const TypeFunc* athrow_Type();
  static const TypeFunc* rethrow_Type();
  static const TypeFunc* Math_D_D_Type();  // sin,cos & friends
  static const TypeFunc* Math_DD_D_Type(); // mod,pow & friends
  static const TypeFunc* modf_Type();
  static const TypeFunc* l2f_Type();
  static const TypeFunc* void_long_Type();

  static const TypeFunc* flush_windows_Type();

  // arraycopy routine types
  static const TypeFunc* fast_arraycopy_Type(); // bit-blasters
  static const TypeFunc* checkcast_arraycopy_Type();
  static const TypeFunc* generic_arraycopy_Type();
  static const TypeFunc* slow_arraycopy_Type();   // the full routine

  static const TypeFunc* array_fill_Type();

  static const TypeFunc* aescrypt_block_Type();
  static const TypeFunc* cipherBlockChaining_aescrypt_Type();

  static const TypeFunc* sha_implCompress_Type();
  static const TypeFunc* digestBase_implCompressMB_Type();

  static const TypeFunc* multiplyToLen_Type();

  static const TypeFunc* squareToLen_Type();

  static const TypeFunc* mulAdd_Type();
  static const TypeFunc* montgomeryMultiply_Type();
  static const TypeFunc* montgomerySquare_Type();

  static const TypeFunc* ghash_processBlocks_Type();

  static const TypeFunc* updateBytesCRC32_Type();

  // leaf on stack replacement interpreter accessor types
  static const TypeFunc* osr_end_Type();

  // leaf methodData routine types
  static const TypeFunc* profile_receiver_type_Type();

  // leaf on stack replacement interpreter accessor types
  static const TypeFunc* fetch_int_Type();
  static const TypeFunc* fetch_long_Type();
  static const TypeFunc* fetch_float_Type();
  static const TypeFunc* fetch_double_Type();
  static const TypeFunc* fetch_oop_Type();
  static const TypeFunc* fetch_monitor_Type();

  static const TypeFunc* register_finalizer_Type();

  // Dtrace support
  static const TypeFunc* dtrace_method_entry_exit_Type();
  static const TypeFunc* dtrace_object_alloc_Type();

# ifdef ENABLE_ZAP_DEAD_LOCALS
  static const TypeFunc* zap_dead_locals_Type();
# endif

 private:
 static NamedCounter * volatile _named_counters;

 public:
 // helper function which creates a named counter labeled with the
 // if they are available
 static NamedCounter* new_named_counter(JVMState* jvms, NamedCounter::CounterTag tag);

 // dumps all the named counters
 static void          print_named_counters();

};

#endif // SHARE_VM_OPTO_RUNTIME_HPP
C:\hotspot-69087d08d473\src\share\vm/opto/split_if.cpp
/*
 * Copyright (c) 1999, 2018, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 *
 */

#include "precompiled.hpp"
#include "memory/allocation.inline.hpp"
#include "opto/callnode.hpp"
#include "opto/connode.hpp"
#include "opto/loopnode.hpp"


//------------------------------split_thru_region------------------------------
// Split Node 'n' through merge point.
Node *PhaseIdealLoop::split_thru_region( Node *n, Node *region ) {
  uint wins = 0;
  assert( n->is_CFG(), "" );
  assert( region->is_Region(), "" );
  Node *r = new (C) RegionNode( region->req() );
  IdealLoopTree *loop = get_loop( n );
  for( uint i = 1; i < region->req(); i++ ) {
    Node *x = n->clone();
    Node *in0 = n->in(0);
    if( in0->in(0) == region ) x->set_req( 0, in0->in(i) );
    for( uint j = 1; j < n->req(); j++ ) {
      Node *in = n->in(j);
      if( get_ctrl(in) == region )
        x->set_req( j, in->in(i) );
    }
    _igvn.register_new_node_with_optimizer(x);
    set_loop(x, loop);
    set_idom(x, x->in(0), dom_depth(x->in(0))+1);
    r->init_req(i, x);
  }

  // Record region
  r->set_req(0,region);         // Not a TRUE RegionNode
  _igvn.register_new_node_with_optimizer(r);
  set_loop(r, loop);
  if( !loop->_child )
    loop->_body.push(r);
  return r;
}

//------------------------------split_up---------------------------------------
// Split block-local op up through the phis to empty the current block
bool PhaseIdealLoop::split_up( Node *n, Node *blk1, Node *blk2 ) {
  if( n->is_CFG() ) {
    assert( n->in(0) != blk1, "Lousy candidate for split-if" );
    return false;
  }
  if( get_ctrl(n) != blk1 && get_ctrl(n) != blk2 )
    return false;               // Not block local
  if( n->is_Phi() ) return false; // Local PHIs are expected

  // Recursively split-up inputs
  for (uint i = 1; i < n->req(); i++) {
    if( split_up( n->in(i), blk1, blk2 ) ) {
      // Got split recursively and self went dead?
      if (n->outcnt() == 0)
        _igvn.remove_dead_node(n);
      return true;
    }
  }

  // Check for needing to clone-up a compare.  Can't do that, it forces
  // another (nested) split-if transform.  Instead, clone it "down".
  if( n->is_Cmp() ) {
    assert(get_ctrl(n) == blk2 || get_ctrl(n) == blk1, "must be in block with IF");
    // Check for simple Cmp/Bool/CMove which we can clone-up.  Cmp/Bool/CMove
    // sequence can have no other users and it must all reside in the split-if
    // block.  Non-simple Cmp/Bool/CMove sequences are 'cloned-down' below -
    // private, per-use versions of the Cmp and Bool are made.  These sink to
    // the CMove block.  If the CMove is in the split-if block, then in the
    // next iteration this will become a simple Cmp/Bool/CMove set to clone-up.
    Node *bol, *cmov;
    if( !(n->outcnt() == 1 && n->unique_out()->is_Bool() &&
          (bol = n->unique_out()->as_Bool()) &&
          (get_ctrl(bol) == blk1 ||
           get_ctrl(bol) == blk2) &&
          bol->outcnt() == 1 &&
          bol->unique_out()->is_CMove() &&
          (cmov = bol->unique_out()->as_CMove()) &&
          (get_ctrl(cmov) == blk1 ||
           get_ctrl(cmov) == blk2) ) ) {

      // Must clone down
#ifndef PRODUCT
      if( PrintOpto && VerifyLoopOptimizations ) {
        tty->print("Cloning down: ");
        n->dump();
      }
#endif
      // Clone down any block-local BoolNode uses of this CmpNode
      for (DUIterator i = n->outs(); n->has_out(i); i++) {
        Node* bol = n->out(i);
        assert( bol->is_Bool(), "" );
        if (bol->outcnt() == 1) {
          Node* use = bol->unique_out();
          Node *use_c = use->is_If() ? use->in(0) : get_ctrl(use);
          if (use_c == blk1 || use_c == blk2) {
            continue;
          }
        }
        if (get_ctrl(bol) == blk1 || get_ctrl(bol) == blk2) {
          // Recursively sink any BoolNode
#ifndef PRODUCT
          if( PrintOpto && VerifyLoopOptimizations ) {
            tty->print("Cloning down: ");
            bol->dump();
          }
#endif
          for (DUIterator_Last jmin, j = bol->last_outs(jmin); j >= jmin; --j) {
            // Uses are either IfNodes or CMoves
            Node* iff = bol->last_out(j);
            assert( iff->in(1) == bol, "" );
            // Get control block of either the CMove or the If input
            Node *iff_ctrl = iff->is_If() ? iff->in(0) : get_ctrl(iff);
            Node *x = bol->clone();
            register_new_node(x, iff_ctrl);
            _igvn.replace_input_of(iff, 1, x);
          }
          _igvn.remove_dead_node( bol );
          --i;
        }
      }
      // Clone down this CmpNode
      for (DUIterator_Last jmin, j = n->last_outs(jmin); j >= jmin; --j) {
        Node* bol = n->last_out(j);
        assert( bol->in(1) == n, "" );
        Node *x = n->clone();
        register_new_node(x, get_ctrl(bol));
        _igvn.replace_input_of(bol, 1, x);
      }
      _igvn.remove_dead_node( n );

      return true;
    }
  }

  // See if splitting-up a Store.  Any anti-dep loads must go up as
  // well.  An anti-dep load might be in the wrong block, because in
  // this particular layout/schedule we ignored anti-deps and allow
  // memory to be alive twice.  This only works if we do the same
  // operations on anti-dep loads as we do their killing stores.
  if( n->is_Store() && n->in(MemNode::Memory)->in(0) == n->in(0) ) {
    // Get store's memory slice
    int alias_idx = C->get_alias_index(_igvn.type(n->in(MemNode::Address))->is_ptr());

    // Get memory-phi anti-dep loads will be using
    Node *memphi = n->in(MemNode::Memory);
    assert( memphi->is_Phi(), "" );
    // Hoist any anti-dep load to the splitting block;
    // it will then "split-up".
    for (DUIterator_Fast imax,i = memphi->fast_outs(imax); i < imax; i++) {
      Node *load = memphi->fast_out(i);
      if( load->is_Load() && alias_idx == C->get_alias_index(_igvn.type(load->in(MemNode::Address))->is_ptr()) )
        set_ctrl(load,blk1);
    }
  }

  // Found some other Node; must clone it up
#ifndef PRODUCT
  if( PrintOpto && VerifyLoopOptimizations ) {
    tty->print("Cloning up: ");
    n->dump();
  }
#endif

  // ConvI2L may have type information on it which becomes invalid if
  // it moves up in the graph so change any clones so widen the type
  // to TypeLong::INT when pushing it up.
  const Type* rtype = NULL;
  if (n->Opcode() == Op_ConvI2L && n->bottom_type() != TypeLong::INT) {
    rtype = TypeLong::INT;
  }

  // Now actually split-up this guy.  One copy per control path merging.
  Node *phi = PhiNode::make_blank(blk1, n);
  for( uint j = 1; j < blk1->req(); j++ ) {
    Node *x = n->clone();
    // Widen the type of the ConvI2L when pushing up.
    if (rtype != NULL) x->as_Type()->set_type(rtype);
    if( n->in(0) && n->in(0) == blk1 )
      x->set_req( 0, blk1->in(j) );
    for( uint i = 1; i < n->req(); i++ ) {
      Node *m = n->in(i);
      if( get_ctrl(m) == blk1 ) {
        assert( m->in(0) == blk1, "" );
        x->set_req( i, m->in(j) );
      }
    }
    register_new_node( x, blk1->in(j) );
    phi->init_req( j, x );
  }
  // Announce phi to optimizer
  register_new_node(phi, blk1);

  // Remove cloned-up value from optimizer; use phi instead
  _igvn.replace_node( n, phi );

  // (There used to be a self-recursive call to split_up() here,
  // but it is not needed.  All necessary forward walking is done
  // by do_split_if() below.)

  return true;
}

//------------------------------register_new_node------------------------------
void PhaseIdealLoop::register_new_node( Node *n, Node *blk ) {
  assert(!n->is_CFG(), "must be data node");
  _igvn.register_new_node_with_optimizer(n);
  set_ctrl(n, blk);
  IdealLoopTree *loop = get_loop(blk);
  if( !loop->_child )
    loop->_body.push(n);
}

//------------------------------small_cache------------------------------------
struct small_cache : public Dict {

  small_cache() : Dict( cmpkey, hashptr ) {}
  Node *probe( Node *use_blk ) { return (Node*)((*this)[use_blk]); }
  void lru_insert( Node *use_blk, Node *new_def ) { Insert(use_blk,new_def); }
};

//------------------------------spinup-----------------------------------------
// "Spin up" the dominator tree, starting at the use site and stopping when we
// find the post-dominating point.

// We must be at the merge point which post-dominates 'new_false' and
// 'new_true'.  Figure out which edges into the RegionNode eventually lead up
// to false and which to true.  Put in a PhiNode to merge values; plug in
// the appropriate false-arm or true-arm values.  If some path leads to the
// original IF, then insert a Phi recursively.
Node *PhaseIdealLoop::spinup( Node *iff_dom, Node *new_false, Node *new_true, Node *use_blk, Node *def, small_cache *cache ) {
  if (use_blk->is_top())        // Handle dead uses
    return use_blk;
  Node *prior_n = (Node*)((intptr_t)0xdeadbeef);
  Node *n = use_blk;            // Get path input
  assert( use_blk != iff_dom, "" );
  // Here's the "spinup" the dominator tree loop.  Do a cache-check
  // along the way, in case we've come this way before.
  while( n != iff_dom ) {       // Found post-dominating point?
    prior_n = n;
    n = idom(n);                // Search higher
    Node *s = cache->probe( prior_n ); // Check cache
    if( s ) return s;           // Cache hit!
  }

  Node *phi_post;
  if( prior_n == new_false || prior_n == new_true ) {
    phi_post = def->clone();
    phi_post->set_req(0, prior_n );
    register_new_node(phi_post, prior_n);
  } else {
    // This method handles both control uses (looking for Regions) or data
    // uses (looking for Phis).  If looking for a control use, then we need
    // to insert a Region instead of a Phi; however Regions always exist
    // previously (the hash_find_insert below would always hit) so we can
    // return the existing Region.
    if( def->is_CFG() ) {
      phi_post = prior_n;       // If looking for CFG, return prior
    } else {
      assert( def->is_Phi(), "" );
      assert( prior_n->is_Region(), "must be a post-dominating merge point" );

      // Need a Phi here
      phi_post = PhiNode::make_blank(prior_n, def);
      // Search for both true and false on all paths till find one.
      for( uint i = 1; i < phi_post->req(); i++ ) // For all paths
        phi_post->init_req( i, spinup( iff_dom, new_false, new_true, prior_n->in(i), def, cache ) );
      Node *t = _igvn.hash_find_insert(phi_post);
      if( t ) {                 // See if we already have this one
        // phi_post will not be used, so kill it
        _igvn.remove_dead_node(phi_post);
        phi_post->destruct();
        phi_post = t;
      } else {
        register_new_node( phi_post, prior_n );
      }
    }
  }

  // Update cache everywhere
  prior_n = (Node*)((intptr_t)0xdeadbeef);  // Reset IDOM walk
  n = use_blk;                  // Get path input
  // Spin-up the idom tree again, basically doing path-compression.
  // Insert cache entries along the way, so that if we ever hit this
  // point in the IDOM tree again we'll stop immediately on a cache hit.
  while( n != iff_dom ) {       // Found post-dominating point?
    prior_n = n;
    n = idom(n);                // Search higher
    cache->lru_insert( prior_n, phi_post ); // Fill cache
  } // End of while not gone high enough

  return phi_post;
}

//------------------------------find_use_block---------------------------------
// Find the block a USE is in.  Normally USE's are in the same block as the
// using instruction.  For Phi-USE's, the USE is in the predecessor block
// along the corresponding path.
Node *PhaseIdealLoop::find_use_block( Node *use, Node *def, Node *old_false, Node *new_false, Node *old_true, Node *new_true ) {
  // CFG uses are their own block
  if( use->is_CFG() )
    return use;

  if( use->is_Phi() ) {         // Phi uses in prior block
    // Grab the first Phi use; there may be many.
    // Each will be handled as a separate iteration of
    // the "while( phi->outcnt() )" loop.
    uint j;
    for( j = 1; j < use->req(); j++ )
      if( use->in(j) == def )
        break;
    assert( j < use->req(), "def should be among use's inputs" );
    return use->in(0)->in(j);
  }
  // Normal (non-phi) use
  Node *use_blk = get_ctrl(use);
  // Some uses are directly attached to the old (and going away)
  // false and true branches.
  if( use_blk == old_false ) {
    use_blk = new_false;
    set_ctrl(use, new_false);
  }
  if( use_blk == old_true ) {
    use_blk = new_true;
    set_ctrl(use, new_true);
  }

  if (use_blk == NULL) {        // He's dead, Jim
    _igvn.replace_node(use, C->top());
  }

  return use_blk;
}

//------------------------------handle_use-------------------------------------
// Handle uses of the merge point.  Basically, split-if makes the merge point
// go away so all uses of the merge point must go away as well.  Most block
// local uses have already been split-up, through the merge point.  Uses from
// far below the merge point can't always be split up (e.g., phi-uses are
// pinned) and it makes too much stuff live.  Instead we use a path-based
// solution to move uses down.
//
// If the use is along the pre-split-CFG true branch, then the new use will
// be from the post-split-CFG true merge point.  Vice-versa for the false
// path.  Some uses will be along both paths; then we sink the use to the
// post-dominating location; we may need to insert a Phi there.
void PhaseIdealLoop::handle_use( Node *use, Node *def, small_cache *cache, Node *region_dom, Node *new_false, Node *new_true, Node *old_false, Node *old_true ) {

  Node *use_blk = find_use_block(use,def,old_false,new_false,old_true,new_true);
  if( !use_blk ) return;        // He's dead, Jim

  // Walk up the dominator tree until I hit either the old IfFalse, the old
  // IfTrue or the old If.  Insert Phis where needed.
  Node *new_def = spinup( region_dom, new_false, new_true, use_blk, def, cache );

  // Found where this USE goes.  Re-point him.
  uint i;
  for( i = 0; i < use->req(); i++ )
    if( use->in(i) == def )
      break;
  assert( i < use->req(), "def should be among use's inputs" );
  _igvn.replace_input_of(use, i, new_def);
}