sssssss72

最新推荐文章于 2024-09-08 10:25:07 发布
luosha112
最新推荐文章于 2024-09-08 10:25:07 发布
阅读量189
点赞数
分类专栏： jvm源码文章标签： jvm
本文链接：https://blog.csdn.net/aa111sadsa/article/details/134131901
版权
jvm源码专栏收录该内容
18 篇文章 0 订阅
订阅专栏

//=============================================================================
uint TypeNode::size_of() const { return sizeof(*this); }
#ifndef PRODUCT
void TypeNode::dump_spec(outputStream *st) const {
  if( !Verbose && !WizardMode ) {
    // standard dump does this in Verbose and WizardMode
    st->print(" #"); _type->dump_on(st);
  }
}
#endif
uint TypeNode::hash() const {
  return Node::hash() + _type->hash();
}
uint TypeNode::cmp( const Node &n ) const
{ return !Type::cmp( _type, ((TypeNode&)n)._type ); }
const Type *TypeNode::bottom_type() const { return _type; }
const Type *TypeNode::Value( PhaseTransform * ) const { return _type; }

//------------------------------ideal_reg--------------------------------------
uint TypeNode::ideal_reg() const {
  return _type->ideal_reg();
}
C:\hotspot-69087d08d473\src\share\vm/opto/node.hpp
/*
 * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 *
 */

#ifndef SHARE_VM_OPTO_NODE_HPP
#define SHARE_VM_OPTO_NODE_HPP

#include "libadt/port.hpp"
#include "libadt/vectset.hpp"
#include "opto/compile.hpp"
#include "opto/type.hpp"

// Portions of code courtesy of Clifford Click

// Optimization - Graph Style


class AbstractLockNode;
class AddNode;
class AddPNode;
class AliasInfo;
class AllocateArrayNode;
class AllocateNode;
class Block;
class BoolNode;
class BoxLockNode;
class CMoveNode;
class CallDynamicJavaNode;
class CallJavaNode;
class CallLeafNode;
class CallNode;
class CallRuntimeNode;
class CallStaticJavaNode;
class CatchNode;
class CatchProjNode;
class CheckCastPPNode;
class CastIINode;
class ClearArrayNode;
class CmpNode;
class CodeBuffer;
class ConstraintCastNode;
class ConNode;
class CountedLoopNode;
class CountedLoopEndNode;
class DecodeNarrowPtrNode;
class DecodeNNode;
class DecodeNKlassNode;
class EncodeNarrowPtrNode;
class EncodePNode;
class EncodePKlassNode;
class FastLockNode;
class FastUnlockNode;
class IfNode;
class IfFalseNode;
class IfTrueNode;
class InitializeNode;
class JVMState;
class JumpNode;
class JumpProjNode;
class LoadNode;
class LoadStoreNode;
class LockNode;
class LoopNode;
class MachBranchNode;
class MachCallDynamicJavaNode;
class MachCallJavaNode;
class MachCallLeafNode;
class MachCallNode;
class MachCallRuntimeNode;
class MachCallStaticJavaNode;
class MachConstantBaseNode;
class MachConstantNode;
class MachGotoNode;
class MachIfNode;
class MachNode;
class MachNullCheckNode;
class MachProjNode;
class MachReturnNode;
class MachSafePointNode;
class MachSpillCopyNode;
class MachTempNode;
class MachMergeNode;
class Matcher;
class MemBarNode;
class MemBarStoreStoreNode;
class MemNode;
class MergeMemNode;
class MulNode;
class MultiNode;
class MultiBranchNode;
class NeverBranchNode;
class Node;
class Node_Array;
class Node_List;
class Node_Stack;
class NullCheckNode;
class OopMap;
class ParmNode;
class PCTableNode;
class PhaseCCP;
class PhaseGVN;
class PhaseIterGVN;
class PhaseRegAlloc;
class PhaseTransform;
class PhaseValues;
class PhiNode;
class Pipeline;
class ProjNode;
class RegMask;
class RegionNode;
class RootNode;
class SafePointNode;
class SafePointScalarObjectNode;
class StartNode;
class State;
class StoreNode;
class SubNode;
class Type;
class TypeNode;
class UnlockNode;
class VectorNode;
class LoadVectorNode;
class StoreVectorNode;
class VectorSet;
typedef void (*NFunc)(Node&,void*);
extern "C" {
  typedef int (*C_sort_func_t)(const void *, const void *);
}

// The type of all node counts and indexes.
// It must hold at least 16 bits, but must also be fast to load and store.
// This type, if less than 32 bits, could limit the number of possible nodes.
// (To make this type platform-specific, move to globalDefinitions_xxx.hpp.)
typedef unsigned int node_idx_t;


#ifndef OPTO_DU_ITERATOR_ASSERT
#ifdef ASSERT
#define OPTO_DU_ITERATOR_ASSERT 1
#else
#define OPTO_DU_ITERATOR_ASSERT 0
#endif
#endif //OPTO_DU_ITERATOR_ASSERT

#if OPTO_DU_ITERATOR_ASSERT
class DUIterator;
class DUIterator_Fast;
class DUIterator_Last;
#else
typedef uint   DUIterator;
typedef Node** DUIterator_Fast;
typedef Node** DUIterator_Last;
#endif

// Node Sentinel
#define NodeSentinel (Node*)-1

// Unknown count frequency
#define COUNT_UNKNOWN (-1.0f)

//------------------------------Node-------------------------------------------
// Nodes define actions in the program.  They create values, which have types.
// They are both vertices in a directed graph and program primitives.  Nodes
// are labeled; the label is the "opcode", the primitive function in the lambda
// calculus sense that gives meaning to the Node.  Node inputs are ordered (so
// that "a-b" is different from "b-a").  The inputs to a Node are the inputs to
// the Node's function.  These inputs also define a Type equation for the Node.
// Solving these Type equations amounts to doing dataflow analysis.
// Control and data are uniformly represented in the graph.  Finally, Nodes
// have a unique dense integer index which is used to index into side arrays
// whenever I have phase-specific information.

class Node {
  friend class VMStructs;

  // Lots of restrictions on cloning Nodes
  Node(const Node&);            // not defined; linker error to use these
  Node &operator=(const Node &rhs);

public:
  friend class Compile;
  #if OPTO_DU_ITERATOR_ASSERT
  friend class DUIterator_Common;
  friend class DUIterator;
  friend class DUIterator_Fast;
  friend class DUIterator_Last;
  #endif

  // Because Nodes come and go, I define an Arena of Node structures to pull
  // from.  This should allow fast access to node creation & deletion.  This
  // field is a local cache of a value defined in some "program fragment" for
  // which these Nodes are just a part of.

  // New Operator that takes a Compile pointer, this will eventually
  // be the "new" New operator.
  inline void* operator new( size_t x, Compile* C) throw() {
    Node* n = (Node*)C->node_arena()->Amalloc_D(x);
#ifdef ASSERT
    n->_in = (Node**)n; // magic cookie for assertion check
#endif
    n->_out = (Node**)C;
    return (void*)n;
  }

  // Delete is a NOP
  void operator delete( void *ptr ) {}
  // Fancy destructor; eagerly attempt to reclaim Node numberings and storage
  void destruct();

  // Create a new Node.  Required is the number is of inputs required for
  // semantic correctness.
  Node( uint required );

  // Create a new Node with given input edges.
  // This version requires use of the "edge-count" new.
  // E.g.  new (C,3) FooNode( C, NULL, left, right );
  Node( Node *n0 );
  Node( Node *n0, Node *n1 );
  Node( Node *n0, Node *n1, Node *n2 );
  Node( Node *n0, Node *n1, Node *n2, Node *n3 );
  Node( Node *n0, Node *n1, Node *n2, Node *n3, Node *n4 );
  Node( Node *n0, Node *n1, Node *n2, Node *n3, Node *n4, Node *n5 );
  Node( Node *n0, Node *n1, Node *n2, Node *n3,
            Node *n4, Node *n5, Node *n6 );

  // Clone an inherited Node given only the base Node type.
  Node* clone() const;

  // Clone a Node, immediately supplying one or two new edges.
  // The first and second arguments, if non-null, replace in(1) and in(2),
  // respectively.
  Node* clone_with_data_edge(Node* in1, Node* in2 = NULL) const {
    Node* nn = clone();
    if (in1 != NULL)  nn->set_req(1, in1);
    if (in2 != NULL)  nn->set_req(2, in2);
    return nn;
  }

private:
  // Shared setup for the above constructors.
  // Handles all interactions with Compile::current.
  // Puts initial values in all Node fields except _idx.
  // Returns the initial value for _idx, which cannot
  // be initialized by assignment.
  inline int Init(int req, Compile* C);

//----------------- input edge handling
protected:
  friend class PhaseCFG;        // Access to address of _in array elements
  Node **_in;                   // Array of use-def references to Nodes
  Node **_out;                  // Array of def-use references to Nodes

  // Input edges are split into two categories.  Required edges are required
  // for semantic correctness; order is important and NULLs are allowed.
  // Precedence edges are used to help determine execution order and are
  // added, e.g., for scheduling purposes.  They are unordered and not
  // duplicated; they have no embedded NULLs.  Edges from 0 to _cnt-1
  // are required, from _cnt to _max-1 are precedence edges.
  node_idx_t _cnt;              // Total number of required Node inputs.

  node_idx_t _max;              // Actual length of input array.

  // Output edges are an unordered list of def-use edges which exactly
  // correspond to required input edges which point from other nodes
  // to this one.  Thus the count of the output edges is the number of
  // users of this node.
  node_idx_t _outcnt;           // Total number of Node outputs.

  node_idx_t _outmax;           // Actual length of output array.

  // Grow the actual input array to the next larger power-of-2 bigger than len.
  void grow( uint len );
  // Grow the output array to the next larger power-of-2 bigger than len.
  void out_grow( uint len );

 public:
  // Each Node is assigned a unique small/dense number.  This number is used
  // to index into auxiliary arrays of data and bit vectors.
  // The field _idx is declared constant to defend against inadvertent assignments,
  // since it is used by clients as a naked field. However, the field's value can be
  // changed using the set_idx() method.
  //
  // The PhaseRenumberLive phase renumbers nodes based on liveness information.
  // Therefore, it updates the value of the _idx field. The parse-time _idx is
  // preserved in _parse_idx.
  const node_idx_t _idx;
  DEBUG_ONLY(const node_idx_t _parse_idx;)

  // Get the (read-only) number of input edges
  uint req() const { return _cnt; }
  uint len() const { return _max; }
  // Get the (read-only) number of output edges
  uint outcnt() const { return _outcnt; }

#if OPTO_DU_ITERATOR_ASSERT
  // Iterate over the out-edges of this node.  Deletions are illegal.
  inline DUIterator outs() const;
  // Use this when the out array might have changed to suppress asserts.
  inline DUIterator& refresh_out_pos(DUIterator& i) const;
  // Does the node have an out at this position?  (Used for iteration.)
  inline bool has_out(DUIterator& i) const;
  inline Node*    out(DUIterator& i) const;
  // Iterate over the out-edges of this node.  All changes are illegal.
  inline DUIterator_Fast fast_outs(DUIterator_Fast& max) const;
  inline Node*    fast_out(DUIterator_Fast& i) const;
  // Iterate over the out-edges of this node, deleting one at a time.
  inline DUIterator_Last last_outs(DUIterator_Last& min) const;
  inline Node*    last_out(DUIterator_Last& i) const;
  // The inline bodies of all these methods are after the iterator definitions.
#else
  // Iterate over the out-edges of this node.  Deletions are illegal.
  // This iteration uses integral indexes, to decouple from array reallocations.
  DUIterator outs() const  { return 0; }
  // Use this when the out array might have changed to suppress asserts.
  DUIterator refresh_out_pos(DUIterator i) const { return i; }

  // Reference to the i'th output Node.  Error if out of bounds.
  Node*    out(DUIterator i) const { assert(i < _outcnt, "oob"); return _out[i]; }
  // Does the node have an out at this position?  (Used for iteration.)
  bool has_out(DUIterator i) const { return i < _outcnt; }

  // Iterate over the out-edges of this node.  All changes are illegal.
  // This iteration uses a pointer internal to the out array.
  DUIterator_Fast fast_outs(DUIterator_Fast& max) const {
    Node** out = _out;
    // Assign a limit pointer to the reference argument:
    max = out + (ptrdiff_t)_outcnt;
    // Return the base pointer:
    return out;
  }
  Node*    fast_out(DUIterator_Fast i) const  { return *i; }
  // Iterate over the out-edges of this node, deleting one at a time.
  // This iteration uses a pointer internal to the out array.
  DUIterator_Last last_outs(DUIterator_Last& min) const {
    Node** out = _out;
    // Assign a limit pointer to the reference argument:
    min = out;
    // Return the pointer to the start of the iteration:
    return out + (ptrdiff_t)_outcnt - 1;
  }
  Node*    last_out(DUIterator_Last i) const  { return *i; }
#endif

  // Reference to the i'th input Node.  Error if out of bounds.
  Node* in(uint i) const { assert(i < _max, err_msg_res("oob: i=%d, _max=%d", i, _max)); return _in[i]; }
  // Reference to the i'th input Node.  NULL if out of bounds.
  Node* lookup(uint i) const { return ((i < _max) ? _in[i] : NULL); }
  // Reference to the i'th output Node.  Error if out of bounds.
  // Use this accessor sparingly.  We are going trying to use iterators instead.
  Node* raw_out(uint i) const { assert(i < _outcnt,"oob"); return _out[i]; }
  // Return the unique out edge.
  Node* unique_out() const { assert(_outcnt==1,"not unique"); return _out[0]; }
  // Delete out edge at position 'i' by moving last out edge to position 'i'
  void  raw_del_out(uint i) {
    assert(i < _outcnt,"oob");
    assert(_outcnt > 0,"oob");
    #if OPTO_DU_ITERATOR_ASSERT
    // Record that a change happened here.
    debug_only(_last_del = _out[i]; ++_del_tick);
    #endif
    _out[i] = _out[--_outcnt];
    // Smash the old edge so it can't be used accidentally.
    debug_only(_out[_outcnt] = (Node *)(uintptr_t)0xdeadbeef);
  }

#ifdef ASSERT
  bool is_dead() const;
#define is_not_dead(n) ((n) == NULL || !VerifyIterativeGVN || !((n)->is_dead()))
#endif
  // Check whether node has become unreachable
  bool is_unreachable(PhaseIterGVN &igvn) const;

  // Set a required input edge, also updates corresponding output edge
  void add_req( Node *n ); // Append a NEW required input
  void add_req( Node *n0, Node *n1 ) {
    add_req(n0); add_req(n1); }
  void add_req( Node *n0, Node *n1, Node *n2 ) {
    add_req(n0); add_req(n1); add_req(n2); }
  void add_req_batch( Node* n, uint m ); // Append m NEW required inputs (all n).
  void del_req( uint idx ); // Delete required edge & compact
  void del_req_ordered( uint idx ); // Delete required edge & compact with preserved order
  void ins_req( uint i, Node *n ); // Insert a NEW required input
  void set_req( uint i, Node *n ) {
    assert( is_not_dead(n), "can not use dead node");
    assert( i < _cnt, err_msg_res("oob: i=%d, _cnt=%d", i, _cnt));
    assert( !VerifyHashTableKeys || _hash_lock == 0,
            "remove node from hash table before modifying it");
    Node** p = &_in[i];    // cache this._in, across the del_out call
    if (*p != NULL)  (*p)->del_out((Node *)this);
    (*p) = n;
    if (n != NULL)      n->add_out((Node *)this);
  }
  // Light version of set_req() to init inputs after node creation.
  void init_req( uint i, Node *n ) {
    assert( i == 0 && this == n ||
            is_not_dead(n), "can not use dead node");
    assert( i < _cnt, "oob");
    assert( !VerifyHashTableKeys || _hash_lock == 0,
            "remove node from hash table before modifying it");
    assert( _in[i] == NULL, "sanity");
    _in[i] = n;
    if (n != NULL)      n->add_out((Node *)this);
  }
  // Find first occurrence of n among my edges:
  int find_edge(Node* n);
  int find_prec_edge(Node* n) {
    for (uint i = req(); i < len(); i++) {
      if (_in[i] == n) return i;
      if (_in[i] == NULL) {
        DEBUG_ONLY( while ((++i) < len()) assert(_in[i] == NULL, "Gap in prec edges!"); )
        break;
      }
    }
    return -1;
  }
  int replace_edge(Node* old, Node* neww);
  int replace_edges_in_range(Node* old, Node* neww, int start, int end);
  // NULL out all inputs to eliminate incoming Def-Use edges.
  // Return the number of edges between 'n' and 'this'
  int  disconnect_inputs(Node *n, Compile *c);

  // Quickly, return true if and only if I am Compile::current()->top().
  bool is_top() const {
    assert((this == (Node*) Compile::current()->top()) == (_out == NULL), "");
    return (_out == NULL);
  }
  // Reaffirm invariants for is_top.  (Only from Compile::set_cached_top_node.)
  void setup_is_top();

  // Strip away casting.  (It is depth-limited.)
  Node* uncast() const;
  // Return whether two Nodes are equivalent, after stripping casting.
  bool eqv_uncast(const Node* n) const {
    return (this->uncast() == n->uncast());
  }

  // Find out of current node that matches opcode.
  Node* find_out_with(int opcode);

private:
  static Node* uncast_helper(const Node* n);

  // Add an output edge to the end of the list
  void add_out( Node *n ) {
    if (is_top())  return;
    if( _outcnt == _outmax ) out_grow(_outcnt);
    _out[_outcnt++] = n;
  }
  // Delete an output edge
  void del_out( Node *n ) {
    if (is_top())  return;
    Node** outp = &_out[_outcnt];
    // Find and remove n
    do {
      assert(outp > _out, "Missing Def-Use edge");
    } while (*--outp != n);
    *outp = _out[--_outcnt];
    // Smash the old edge so it can't be used accidentally.
    debug_only(_out[_outcnt] = (Node *)(uintptr_t)0xdeadbeef);
    // Record that a change happened here.
    #if OPTO_DU_ITERATOR_ASSERT
    debug_only(_last_del = n; ++_del_tick);
    #endif
  }
  // Close gap after removing edge.
  void close_prec_gap_at(uint gap) {
    assert(_cnt <= gap && gap < _max, "no valid prec edge");
    uint i = gap;
    Node *last = NULL;
    for (; i < _max-1; ++i) {
      Node *next = _in[i+1];
      if (next == NULL) break;
      last = next;
    }
    _in[gap] = last; // Move last slot to empty one.
    _in[i] = NULL;   // NULL out last slot.
  }

public:
  // Globally replace this node by a given new node, updating all uses.
  void replace_by(Node* new_node);
  // Globally replace this node by a given new node, updating all uses
  // and cutting input edges of old node.
  void subsume_by(Node* new_node, Compile* c) {
    replace_by(new_node);
    disconnect_inputs(NULL, c);
  }
  void set_req_X( uint i, Node *n, PhaseIterGVN *igvn );
  // Find the one non-null required input.  RegionNode only
  Node *nonnull_req() const;
  // Add or remove precedence edges
  void add_prec( Node *n );
  void rm_prec( uint i );

  // Note: prec(i) will not necessarily point to n if edge already exists.
  void set_prec( uint i, Node *n ) {
    assert(i < _max, err_msg("oob: i=%d, _max=%d", i, _max));
    assert(is_not_dead(n), "can not use dead node");
    assert(i >= _cnt, "not a precedence edge");
    // Avoid spec violation: duplicated prec edge.
    if (_in[i] == n) return;
    if (n == NULL || find_prec_edge(n) != -1) {
      rm_prec(i);
      return;
    }
    if (_in[i] != NULL) _in[i]->del_out((Node *)this);
    _in[i] = n;
    if (n != NULL) n->add_out((Node *)this);
  }

  // Set this node's index, used by cisc_version to replace current node
  void set_idx(uint new_idx) {
    const node_idx_t* ref = &_idx;
    *(node_idx_t*)ref = new_idx;
  }
  // Swap input edge order.  (Edge indexes i1 and i2 are usually 1 and 2.)
  void swap_edges(uint i1, uint i2) {
    debug_only(uint check_hash = (VerifyHashTableKeys && _hash_lock) ? hash() : NO_HASH);
    // Def-Use info is unchanged
    Node* n1 = in(i1);
    Node* n2 = in(i2);
    _in[i1] = n2;
    _in[i2] = n1;
    // If this node is in the hash table, make sure it doesn't need a rehash.
    assert(check_hash == NO_HASH || check_hash == hash(), "edge swap must preserve hash code");
  }

  // Iterators over input Nodes for a Node X are written as:
  // for( i = 0; i < X.req(); i++ ) ... X[i] ...
  // NOTE: Required edges can contain embedded NULL pointers.

//----------------- Other Node Properties

  // Generate class id for some ideal nodes to avoid virtual query
  // methods is_<Node>().
  // Class id is the set of bits corresponded to the node class and all its
  // super classes so that queries for super classes are also valid.
  // Subclasses of the same super class have different assigned bit
  // (the third parameter in the macro DEFINE_CLASS_ID).
  // Classes with deeper hierarchy are declared first.
  // Classes with the same hierarchy depth are sorted by usage frequency.
  //
  // The query method masks the bits to cut off bits of subclasses
  // and then compare the result with the class id
  // (see the macro DEFINE_CLASS_QUERY below).
  //
  //  Class_MachCall=30, ClassMask_MachCall=31
  // 12               8               4               0
  //  0   0   0   0   0   0   0   0   1   1   1   1   0
  //                                  |   |   |   |
  //                                  |   |   |   Bit_Mach=2
  //                                  |   |   Bit_MachReturn=4
  //                                  |   Bit_MachSafePoint=8
  //                                  Bit_MachCall=16
  //
  //  Class_CountedLoop=56, ClassMask_CountedLoop=63
  // 12               8               4               0
  //  0   0   0   0   0   0   0   1   1   1   0   0   0
  //                              |   |   |
  //                              |   |   Bit_Region=8
  //                              |   Bit_Loop=16
  //                              Bit_CountedLoop=32

  #define DEFINE_CLASS_ID(cl, supcl, subn) \
  Bit_##cl = (Class_##supcl == 0) ? 1 << subn : (Bit_##supcl) << (1 + subn) , \
  Class_##cl = Class_##supcl + Bit_##cl , \
  ClassMask_##cl = ((Bit_##cl << 1) - 1) ,

  // This enum is used only for C2 ideal and mach nodes with is_<node>() methods
  // so that it's values fits into 16 bits.
  enum NodeClasses {
    Bit_Node   = 0x0000,
    Class_Node = 0x0000,
    ClassMask_Node = 0xFFFF,

    DEFINE_CLASS_ID(Multi, Node, 0)
      DEFINE_CLASS_ID(SafePoint, Multi, 0)
        DEFINE_CLASS_ID(Call,      SafePoint, 0)
          DEFINE_CLASS_ID(CallJava,         Call, 0)
            DEFINE_CLASS_ID(CallStaticJava,   CallJava, 0)
            DEFINE_CLASS_ID(CallDynamicJava,  CallJava, 1)
          DEFINE_CLASS_ID(CallRuntime,      Call, 1)
            DEFINE_CLASS_ID(CallLeaf,         CallRuntime, 0)
          DEFINE_CLASS_ID(Allocate,         Call, 2)
            DEFINE_CLASS_ID(AllocateArray,    Allocate, 0)
          DEFINE_CLASS_ID(AbstractLock,     Call, 3)
            DEFINE_CLASS_ID(Lock,             AbstractLock, 0)
            DEFINE_CLASS_ID(Unlock,           AbstractLock, 1)
      DEFINE_CLASS_ID(MultiBranch, Multi, 1)
        DEFINE_CLASS_ID(PCTable,     MultiBranch, 0)
          DEFINE_CLASS_ID(Catch,       PCTable, 0)
          DEFINE_CLASS_ID(Jump,        PCTable, 1)
        DEFINE_CLASS_ID(If,          MultiBranch, 1)
          DEFINE_CLASS_ID(CountedLoopEnd, If, 0)
        DEFINE_CLASS_ID(NeverBranch, MultiBranch, 2)
      DEFINE_CLASS_ID(Start,       Multi, 2)
      DEFINE_CLASS_ID(MemBar,      Multi, 3)
        DEFINE_CLASS_ID(Initialize,       MemBar, 0)
        DEFINE_CLASS_ID(MemBarStoreStore, MemBar, 1)

    DEFINE_CLASS_ID(Mach,  Node, 1)
      DEFINE_CLASS_ID(MachReturn, Mach, 0)
        DEFINE_CLASS_ID(MachSafePoint, MachReturn, 0)
          DEFINE_CLASS_ID(MachCall, MachSafePoint, 0)
            DEFINE_CLASS_ID(MachCallJava,         MachCall, 0)
              DEFINE_CLASS_ID(MachCallStaticJava,   MachCallJava, 0)
              DEFINE_CLASS_ID(MachCallDynamicJava,  MachCallJava, 1)
            DEFINE_CLASS_ID(MachCallRuntime,      MachCall, 1)
              DEFINE_CLASS_ID(MachCallLeaf,         MachCallRuntime, 0)
      DEFINE_CLASS_ID(MachBranch, Mach, 1)
        DEFINE_CLASS_ID(MachIf,         MachBranch, 0)
        DEFINE_CLASS_ID(MachGoto,       MachBranch, 1)
        DEFINE_CLASS_ID(MachNullCheck,  MachBranch, 2)
      DEFINE_CLASS_ID(MachSpillCopy,    Mach, 2)
      DEFINE_CLASS_ID(MachTemp,         Mach, 3)
      DEFINE_CLASS_ID(MachConstantBase, Mach, 4)
      DEFINE_CLASS_ID(MachConstant,     Mach, 5)
      DEFINE_CLASS_ID(MachMerge,        Mach, 6)

    DEFINE_CLASS_ID(Type,  Node, 2)
      DEFINE_CLASS_ID(Phi,   Type, 0)
      DEFINE_CLASS_ID(ConstraintCast, Type, 1)
        DEFINE_CLASS_ID(CastII, ConstraintCast, 0)
      DEFINE_CLASS_ID(CheckCastPP, Type, 2)
      DEFINE_CLASS_ID(CMove, Type, 3)
      DEFINE_CLASS_ID(SafePointScalarObject, Type, 4)
      DEFINE_CLASS_ID(DecodeNarrowPtr, Type, 5)
        DEFINE_CLASS_ID(DecodeN, DecodeNarrowPtr, 0)
        DEFINE_CLASS_ID(DecodeNKlass, DecodeNarrowPtr, 1)
      DEFINE_CLASS_ID(EncodeNarrowPtr, Type, 6)
        DEFINE_CLASS_ID(EncodeP, EncodeNarrowPtr, 0)
        DEFINE_CLASS_ID(EncodePKlass, EncodeNarrowPtr, 1)

    DEFINE_CLASS_ID(Proj,  Node, 3)
      DEFINE_CLASS_ID(CatchProj, Proj, 0)
      DEFINE_CLASS_ID(JumpProj,  Proj, 1)
      DEFINE_CLASS_ID(IfTrue,    Proj, 2)
      DEFINE_CLASS_ID(IfFalse,   Proj, 3)
      DEFINE_CLASS_ID(Parm,      Proj, 4)
      DEFINE_CLASS_ID(MachProj,  Proj, 5)

    DEFINE_CLASS_ID(Mem,   Node, 4)
      DEFINE_CLASS_ID(Load,  Mem, 0)
        DEFINE_CLASS_ID(LoadVector,  Load, 0)
      DEFINE_CLASS_ID(Store, Mem, 1)
        DEFINE_CLASS_ID(StoreVector, Store, 0)
      DEFINE_CLASS_ID(LoadStore, Mem, 2)

    DEFINE_CLASS_ID(Region, Node, 5)
      DEFINE_CLASS_ID(Loop, Region, 0)
        DEFINE_CLASS_ID(Root,        Loop, 0)
        DEFINE_CLASS_ID(CountedLoop, Loop, 1)

    DEFINE_CLASS_ID(Sub,   Node, 6)
      DEFINE_CLASS_ID(Cmp,   Sub, 0)
        DEFINE_CLASS_ID(FastLock,   Cmp, 0)
        DEFINE_CLASS_ID(FastUnlock, Cmp, 1)

    DEFINE_CLASS_ID(MergeMem, Node, 7)
    DEFINE_CLASS_ID(Bool,     Node, 8)
    DEFINE_CLASS_ID(AddP,     Node, 9)
    DEFINE_CLASS_ID(BoxLock,  Node, 10)
    DEFINE_CLASS_ID(Add,      Node, 11)
    DEFINE_CLASS_ID(Mul,      Node, 12)
    DEFINE_CLASS_ID(Vector,   Node, 13)
    DEFINE_CLASS_ID(ClearArray, Node, 14)

    _max_classes  = ClassMask_ClearArray
  };
  #undef DEFINE_CLASS_ID

  // Flags are sorted by usage frequency.
  enum NodeFlags {
    Flag_is_Copy                     = 0x01, // should be first bit to avoid shift
    Flag_rematerialize               = Flag_is_Copy << 1,
    Flag_needs_anti_dependence_check = Flag_rematerialize << 1,
    Flag_is_macro                    = Flag_needs_anti_dependence_check << 1,
    Flag_is_Con                      = Flag_is_macro << 1,
    Flag_is_cisc_alternate           = Flag_is_Con << 1,
    Flag_is_dead_loop_safe           = Flag_is_cisc_alternate << 1,
    Flag_may_be_short_branch         = Flag_is_dead_loop_safe << 1,
    Flag_avoid_back_to_back_before   = Flag_may_be_short_branch << 1,
    Flag_avoid_back_to_back_after    = Flag_avoid_back_to_back_before << 1,
    Flag_has_call                    = Flag_avoid_back_to_back_after << 1,
    Flag_is_expensive                = Flag_has_call << 1,
    _max_flags = (Flag_is_expensive << 1) - 1 // allow flags combination
  };

private:
  jushort _class_id;
  jushort _flags;

protected:
  // These methods should be called from constructors only.
  void init_class_id(jushort c) {
    assert(c <= _max_classes, "invalid node class");
    _class_id = c; // cast out const
  }
  void init_flags(jushort fl) {
    assert(fl <= _max_flags, "invalid node flag");
    _flags |= fl;
  }
  void clear_flag(jushort fl) {
    assert(fl <= _max_flags, "invalid node flag");
    _flags &= ~fl;
  }

public:
  const jushort class_id() const { return _class_id; }

  const jushort flags() const { return _flags; }

  // Return a dense integer opcode number
  virtual int Opcode() const;

  // Virtual inherited Node size
  virtual uint size_of() const;

  // Other interesting Node properties
  #define DEFINE_CLASS_QUERY(type)                           \
  bool is_##type() const {                                   \
    return ((_class_id & ClassMask_##type) == Class_##type); \
  }                                                          \
  type##Node *as_##type() const {                            \
    assert(is_##type(), "invalid node class");               \
    return (type##Node*)this;                                \
  }                                                          \
  type##Node* isa_##type() const {                           \
    return (is_##type()) ? as_##type() : NULL;               \
  }

  DEFINE_CLASS_QUERY(AbstractLock)
  DEFINE_CLASS_QUERY(Add)
  DEFINE_CLASS_QUERY(AddP)
  DEFINE_CLASS_QUERY(Allocate)
  DEFINE_CLASS_QUERY(AllocateArray)
  DEFINE_CLASS_QUERY(Bool)
  DEFINE_CLASS_QUERY(BoxLock)
  DEFINE_CLASS_QUERY(Call)
  DEFINE_CLASS_QUERY(CallDynamicJava)
  DEFINE_CLASS_QUERY(CallJava)
  DEFINE_CLASS_QUERY(CallLeaf)
  DEFINE_CLASS_QUERY(CallRuntime)
  DEFINE_CLASS_QUERY(CallStaticJava)
  DEFINE_CLASS_QUERY(Catch)
  DEFINE_CLASS_QUERY(CatchProj)
  DEFINE_CLASS_QUERY(CheckCastPP)
  DEFINE_CLASS_QUERY(CastII)
  DEFINE_CLASS_QUERY(ConstraintCast)
  DEFINE_CLASS_QUERY(ClearArray)
  DEFINE_CLASS_QUERY(CMove)
  DEFINE_CLASS_QUERY(Cmp)
  DEFINE_CLASS_QUERY(CountedLoop)
  DEFINE_CLASS_QUERY(CountedLoopEnd)
  DEFINE_CLASS_QUERY(DecodeNarrowPtr)
  DEFINE_CLASS_QUERY(DecodeN)
  DEFINE_CLASS_QUERY(DecodeNKlass)
  DEFINE_CLASS_QUERY(EncodeNarrowPtr)
  DEFINE_CLASS_QUERY(EncodeP)
  DEFINE_CLASS_QUERY(EncodePKlass)
  DEFINE_CLASS_QUERY(FastLock)
  DEFINE_CLASS_QUERY(FastUnlock)
  DEFINE_CLASS_QUERY(If)
  DEFINE_CLASS_QUERY(IfFalse)
  DEFINE_CLASS_QUERY(IfTrue)
  DEFINE_CLASS_QUERY(Initialize)
  DEFINE_CLASS_QUERY(Jump)
  DEFINE_CLASS_QUERY(JumpProj)
  DEFINE_CLASS_QUERY(Load)
  DEFINE_CLASS_QUERY(LoadStore)
  DEFINE_CLASS_QUERY(Lock)
  DEFINE_CLASS_QUERY(Loop)
  DEFINE_CLASS_QUERY(Mach)
  DEFINE_CLASS_QUERY(MachBranch)
  DEFINE_CLASS_QUERY(MachCall)
  DEFINE_CLASS_QUERY(MachCallDynamicJava)
  DEFINE_CLASS_QUERY(MachCallJava)
  DEFINE_CLASS_QUERY(MachCallLeaf)
  DEFINE_CLASS_QUERY(MachCallRuntime)
  DEFINE_CLASS_QUERY(MachCallStaticJava)
  DEFINE_CLASS_QUERY(MachConstantBase)
  DEFINE_CLASS_QUERY(MachConstant)
  DEFINE_CLASS_QUERY(MachGoto)
  DEFINE_CLASS_QUERY(MachIf)
  DEFINE_CLASS_QUERY(MachNullCheck)
  DEFINE_CLASS_QUERY(MachProj)
  DEFINE_CLASS_QUERY(MachReturn)
  DEFINE_CLASS_QUERY(MachSafePoint)
  DEFINE_CLASS_QUERY(MachSpillCopy)
  DEFINE_CLASS_QUERY(MachTemp)
  DEFINE_CLASS_QUERY(MachMerge)
  DEFINE_CLASS_QUERY(Mem)
  DEFINE_CLASS_QUERY(MemBar)
  DEFINE_CLASS_QUERY(MemBarStoreStore)
  DEFINE_CLASS_QUERY(MergeMem)
  DEFINE_CLASS_QUERY(Mul)
  DEFINE_CLASS_QUERY(Multi)
  DEFINE_CLASS_QUERY(MultiBranch)
  DEFINE_CLASS_QUERY(Parm)
  DEFINE_CLASS_QUERY(PCTable)
  DEFINE_CLASS_QUERY(Phi)
  DEFINE_CLASS_QUERY(Proj)
  DEFINE_CLASS_QUERY(Region)
  DEFINE_CLASS_QUERY(Root)
  DEFINE_CLASS_QUERY(SafePoint)
  DEFINE_CLASS_QUERY(SafePointScalarObject)
  DEFINE_CLASS_QUERY(Start)
  DEFINE_CLASS_QUERY(Store)
  DEFINE_CLASS_QUERY(Sub)
  DEFINE_CLASS_QUERY(Type)
  DEFINE_CLASS_QUERY(Vector)
  DEFINE_CLASS_QUERY(LoadVector)
  DEFINE_CLASS_QUERY(StoreVector)
  DEFINE_CLASS_QUERY(Unlock)

  #undef DEFINE_CLASS_QUERY

  // duplicate of is_MachSpillCopy()
  bool is_SpillCopy () const {
    return ((_class_id & ClassMask_MachSpillCopy) == Class_MachSpillCopy);
  }

  bool is_Con () const { return (_flags & Flag_is_Con) != 0; }
  // The data node which is safe to leave in dead loop during IGVN optimization.
  bool is_dead_loop_safe() const {
    return is_Phi() || (is_Proj() && in(0) == NULL) ||
           ((_flags & (Flag_is_dead_loop_safe | Flag_is_Con)) != 0 &&
            (!is_Proj() || !in(0)->is_Allocate()));
  }

  // is_Copy() returns copied edge index (0 or 1)
  uint is_Copy() const { return (_flags & Flag_is_Copy); }

  virtual bool is_CFG() const { return false; }

  // If this node is control-dependent on a test, can it be
  // rerouted to a dominating equivalent test?  This is usually
  // true of non-CFG nodes, but can be false for operations which
  // depend for their correct sequencing on more than one test.
  // (In that case, hoisting to a dominating test may silently
  // skip some other important test.)
  virtual bool depends_only_on_test() const { assert(!is_CFG(), ""); return true; };

  // When building basic blocks, I need to have a notion of block beginning
  // Nodes, next block selector Nodes (block enders), and next block
  // projections.  These calls need to work on their machine equivalents.  The
  // Ideal beginning Nodes are RootNode, RegionNode and StartNode.
  bool is_block_start() const {
    if ( is_Region() )
      return this == (const Node*)in(0);
    else
      return is_Start();
  }

  // The Ideal control projection Nodes are IfTrue/IfFalse, JumpProjNode, Root,
  // Goto and Return.  This call also returns the block ending Node.
  virtual const Node *is_block_proj() const;

  // The node is a "macro" node which needs to be expanded before matching
  bool is_macro() const { return (_flags & Flag_is_macro) != 0; }
  // The node is expensive: the best control is set during loop opts
  bool is_expensive() const { return (_flags & Flag_is_expensive) != 0 && in(0) != NULL; }

//----------------- Optimization

  // Get the worst-case Type output for this Node.
  virtual const class Type *bottom_type() const;

  // If we find a better type for a node, try to record it permanently.
  // Return true if this node actually changed.
  // Be sure to do the hash_delete game in the "rehash" variant.
  void raise_bottom_type(const Type* new_type);

  // Get the address type with which this node uses and/or defs memory,
  // or NULL if none.  The address type is conservatively wide.
  // Returns non-null for calls, membars, loads, stores, etc.
  // Returns TypePtr::BOTTOM if the node touches memory "broadly".
  virtual const class TypePtr *adr_type() const { return NULL; }

  // Return an existing node which computes the same function as this node.
  // The optimistic combined algorithm requires this to return a Node which
  // is a small number of steps away (e.g., one of my inputs).
  virtual Node *Identity( PhaseTransform *phase );

  // Return the set of values this Node can take on at runtime.
  virtual const Type *Value( PhaseTransform *phase ) const;

  // Return a node which is more "ideal" than the current node.
  // The invariants on this call are subtle.  If in doubt, read the
  // treatise in node.cpp above the default implemention AND TEST WITH
  // +VerifyIterativeGVN!
  virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);

  // Some nodes have specific Ideal subgraph transformations only if they are
  // unique users of specific nodes. Such nodes should be put on IGVN worklist
  // for the transformations to happen.
  bool has_special_unique_user() const;

  // Skip Proj and CatchProj nodes chains. Check for Null and Top.
  Node* find_exact_control(Node* ctrl);

  // Check if 'this' node dominates or equal to 'sub'.
  bool dominates(Node* sub, Node_List &nlist);

protected:
  bool remove_dead_region(PhaseGVN *phase, bool can_reshape);
public:

  // Idealize graph, using DU info.  Done after constant propagation
  virtual Node *Ideal_DU_postCCP( PhaseCCP *ccp );

  // See if there is valid pipeline info
  static  const Pipeline *pipeline_class();
  virtual const Pipeline *pipeline() const;

  // Compute the latency from the def to this instruction of the ith input node
  uint latency(uint i);

  // Hash & compare functions, for pessimistic value numbering

  // If the hash function returns the special sentinel value NO_HASH,
  // the node is guaranteed never to compare equal to any other node.
  // If we accidentally generate a hash with value NO_HASH the node
  // won't go into the table and we'll lose a little optimization.
  enum { NO_HASH = 0 };
  virtual uint hash() const;
  virtual uint cmp( const Node &n ) const;

  // Operation appears to be iteratively computed (such as an induction variable)
  // It is possible for this operation to return false for a loop-varying
  // value, if it appears (by local graph inspection) to be computed by a simple conditional.
  bool is_iteratively_computed();

  // Determine if a node is Counted loop induction variable.
  // The method is defined in loopnode.cpp.
  const Node* is_loop_iv() const;

  // Return a node with opcode "opc" and same inputs as "this" if one can
  // be found; Otherwise return NULL;
  Node* find_similar(int opc);

  // Return the unique control out if only one. Null if none or more than one.
  Node* unique_ctrl_out();

//----------------- Code Generation

  // Ideal register class for Matching.  Zero means unmatched instruction
  // (these are cloned instead of converted to machine nodes).
  virtual uint ideal_reg() const;

  static const uint NotAMachineReg;   // must be > max. machine register

  // Do we Match on this edge index or not?  Generally false for Control
  // and true for everything else.  Weird for calls & returns.
  virtual uint match_edge(uint idx) const;

  // Register class output is returned in
  virtual const RegMask &out_RegMask() const;
  // Register class input is expected in
  virtual const RegMask &in_RegMask(uint) const;
  // Should we clone rather than spill this instruction?
  bool rematerialize() const;

  // Return JVM State Object if this Node carries debug info, or NULL otherwise
  virtual JVMState* jvms() const;

  // Print as assembly
  virtual void format( PhaseRegAlloc *, outputStream* st = tty ) const;
  // Emit bytes starting at parameter 'ptr'
  // Bump 'ptr' by the number of output bytes
  virtual void emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const;
  // Size of instruction in bytes
  virtual uint size(PhaseRegAlloc *ra_) const;

  // Convenience function to extract an integer constant from a node.
  // If it is not an integer constant (either Con, CastII, or Mach),
  // return value_if_unknown.
  jint find_int_con(jint value_if_unknown) const {
    const TypeInt* t = find_int_type();
    return (t != NULL && t->is_con()) ? t->get_con() : value_if_unknown;
  }
  // Return the constant, knowing it is an integer constant already
  jint get_int() const {
    const TypeInt* t = find_int_type();
    guarantee(t != NULL, "must be con");
    return t->get_con();
  }
  // Here's where the work is done.  Can produce non-constant int types too.
  const TypeInt* find_int_type() const;

  // Same thing for long (and intptr_t, via type.hpp):
  jlong get_long() const {
    const TypeLong* t = find_long_type();
    guarantee(t != NULL, "must be con");
    return t->get_con();
  }
  jlong find_long_con(jint value_if_unknown) const {
    const TypeLong* t = find_long_type();
    return (t != NULL && t->is_con()) ? t->get_con() : value_if_unknown;
  }
  const TypeLong* find_long_type() const;

  const TypePtr* get_ptr_type() const;

  // These guys are called by code generated by ADLC:
  intptr_t get_ptr() const;
  intptr_t get_narrowcon() const;
  jdouble getd() const;
  jfloat getf() const;

  // Nodes which are pinned into basic blocks
  virtual bool pinned() const { return false; }

  // Nodes which use memory without consuming it, hence need antidependences
  // More specifically, needs_anti_dependence_check returns true iff the node
  // (a) does a load, and (b) does not perform a store (except perhaps to a
  // stack slot or some other unaliased location).
  bool needs_anti_dependence_check() const;

  // Return which operand this instruction may cisc-spill. In other words,
  // return operand position that can convert from reg to memory access
  virtual int cisc_operand() const { return AdlcVMDeps::Not_cisc_spillable; }
  bool is_cisc_alternate() const { return (_flags & Flag_is_cisc_alternate) != 0; }

//----------------- Graph walking
public:
  // Walk and apply member functions recursively.
  // Supplied (this) pointer is root.
  void walk(NFunc pre, NFunc post, void *env);
  static void nop(Node &, void*); // Dummy empty function
  static void packregion( Node &n, void* );
private:
  void walk_(NFunc pre, NFunc post, void *env, VectorSet &visited);

//----------------- Printing, etc
public:
#ifndef PRODUCT
  Node* find(int idx) const;         // Search the graph for the given idx.
  Node* find_ctrl(int idx) const;    // Search control ancestors for the given idx.
  void dump() const { dump("\n"); }  // Print this node.
  void dump(const char* suffix, outputStream *st = tty) const;// Print this node.
  void dump(int depth) const;        // Print this node, recursively to depth d
  void dump_ctrl(int depth) const;   // Print control nodes, to depth d
  virtual void dump_req(outputStream *st = tty) const;     // Print required-edge info
  virtual void dump_prec(outputStream *st = tty) const;    // Print precedence-edge info
  virtual void dump_out(outputStream *st = tty) const;     // Print the output edge info
  virtual void dump_spec(outputStream *st) const {}; // Print per-node info
  void verify_edges(Unique_Node_List &visited); // Verify bi-directional edges
  void verify() const;               // Check Def-Use info for my subgraph
  static void verify_recur(const Node *n, int verify_depth, VectorSet &old_space, VectorSet &new_space);

  // This call defines a class-unique string used to identify class instances
  virtual const char *Name() const;

  void dump_format(PhaseRegAlloc *ra) const; // debug access to MachNode::format(...)
  // RegMask Print Functions
  void dump_in_regmask(int idx) { in_RegMask(idx).dump(); }
  void dump_out_regmask() { out_RegMask().dump(); }
  static bool in_dump() { return Compile::current()->_in_dump_cnt > 0; }
  void fast_dump() const {
    tty->print("%4d: %-17s", _idx, Name());
    for (uint i = 0; i < len(); i++)
      if (in(i))
        tty->print(" %4d", in(i)->_idx);
      else
        tty->print(" NULL");
    tty->print("\n");
  }
#endif
#ifdef ASSERT
  void verify_construction();
  bool verify_jvms(const JVMState* jvms) const;
  int  _debug_idx;                     // Unique value assigned to every node.
  int   debug_idx() const              { return _debug_idx; }
  void  set_debug_idx( int debug_idx ) { _debug_idx = debug_idx; }

  Node* _debug_orig;                   // Original version of this, if any.
  Node*  debug_orig() const            { return _debug_orig; }
  void   set_debug_orig(Node* orig);   // _debug_orig = orig

  int        _hash_lock;               // Barrier to modifications of nodes in the hash table
  void  enter_hash_lock() { ++_hash_lock; assert(_hash_lock < 99, "in too many hash tables?"); }
  void   exit_hash_lock() { --_hash_lock; assert(_hash_lock >= 0, "mispaired hash locks"); }

  static void init_NodeProperty();

  #if OPTO_DU_ITERATOR_ASSERT
  const Node* _last_del;               // The last deleted node.
  uint        _del_tick;               // Bumped when a deletion happens..
  #endif
#endif
};

//-----------------------------------------------------------------------------
// Iterators over DU info, and associated Node functions.

#if OPTO_DU_ITERATOR_ASSERT

// Common code for assertion checking on DU iterators.
class DUIterator_Common VALUE_OBJ_CLASS_SPEC {
#ifdef ASSERT
 protected:
  bool         _vdui;               // cached value of VerifyDUIterators
  const Node*  _node;               // the node containing the _out array
  uint         _outcnt;             // cached node->_outcnt
  uint         _del_tick;           // cached node->_del_tick
  Node*        _last;               // last value produced by the iterator

  void sample(const Node* node);    // used by c'tor to set up for verifies
  void verify(const Node* node, bool at_end_ok = false);
  void verify_resync();
  void reset(const DUIterator_Common& that);

// The VDUI_ONLY macro protects code conditionalized on VerifyDUIterators
  #define I_VDUI_ONLY(i,x) { if ((i)._vdui) { x; } }
#else
  #define I_VDUI_ONLY(i,x) { }
#endif //ASSERT
};

#define VDUI_ONLY(x)     I_VDUI_ONLY(*this, x)

// Default DU iterator.  Allows appends onto the out array.
// Allows deletion from the out array only at the current point.
// Usage:
//  for (DUIterator i = x->outs(); x->has_out(i); i++) {
//    Node* y = x->out(i);
//    ...
//  }
// Compiles in product mode to a unsigned integer index, which indexes
// onto a repeatedly reloaded base pointer of x->_out.  The loop predicate
// also reloads x->_outcnt.  If you delete, you must perform "--i" just
// before continuing the loop.  You must delete only the last-produced
// edge.  You must delete only a single copy of the last-produced edge,
// or else you must delete all copies at once (the first time the edge
// is produced by the iterator).
class DUIterator : public DUIterator_Common {
  friend class Node;

  // This is the index which provides the product-mode behavior.
  // Whatever the product-mode version of the system does to the
  // DUI index is done to this index.  All other fields in
  // this class are used only for assertion checking.
  uint         _idx;

  #ifdef ASSERT
  uint         _refresh_tick;    // Records the refresh activity.

  void sample(const Node* node); // Initialize _refresh_tick etc.
  void verify(const Node* node, bool at_end_ok = false);
  void verify_increment();       // Verify an increment operation.
  void verify_resync();          // Verify that we can back up over a deletion.
  void verify_finish();          // Verify that the loop terminated properly.
  void refresh();                // Resample verification info.
  void reset(const DUIterator& that);  // Resample after assignment.
  #endif

  DUIterator(const Node* node, int dummy_to_avoid_conversion)
    { _idx = 0;                         debug_only(sample(node)); }

 public:
  // initialize to garbage; clear _vdui to disable asserts
  DUIterator()
    { /*initialize to garbage*/         debug_only(_vdui = false); }

  void operator++(int dummy_to_specify_postfix_op)
    { _idx++;                           VDUI_ONLY(verify_increment()); }

  void operator--()
    { VDUI_ONLY(verify_resync());       --_idx; }

  ~DUIterator()
    { VDUI_ONLY(verify_finish()); }

  void operator=(const DUIterator& that)
    { _idx = that._idx;                 debug_only(reset(that)); }
};

DUIterator Node::outs() const
  { return DUIterator(this, 0); }
DUIterator& Node::refresh_out_pos(DUIterator& i) const
  { I_VDUI_ONLY(i, i.refresh());        return i; }
bool Node::has_out(DUIterator& i) const
  { I_VDUI_ONLY(i, i.verify(this,true));return i._idx < _outcnt; }
Node*    Node::out(DUIterator& i) const
  { I_VDUI_ONLY(i, i.verify(this));     return debug_only(i._last=) _out[i._idx]; }


// Faster DU iterator.  Disallows insertions into the out array.
// Allows deletion from the out array only at the current point.
// Usage:
//  for (DUIterator_Fast imax, i = x->fast_outs(imax); i < imax; i++) {
//    Node* y = x->fast_out(i);
//    ...
//  }
// Compiles in product mode to raw Node** pointer arithmetic, with
// no reloading of pointers from the original node x.  If you delete,
// you must perform "--i; --imax" just before continuing the loop.
// If you delete multiple copies of the same edge, you must decrement
// imax, but not i, multiple times:  "--i, imax -= num_edges".
class DUIterator_Fast : public DUIterator_Common {
  friend class Node;
  friend class DUIterator_Last;

  // This is the pointer which provides the product-mode behavior.
  // Whatever the product-mode version of the system does to the
  // DUI pointer is done to this pointer.  All other fields in
  // this class are used only for assertion checking.
  Node**       _outp;

  #ifdef ASSERT
  void verify(const Node* node, bool at_end_ok = false);
  void verify_limit();
  void verify_resync();
  void verify_relimit(uint n);
  void reset(const DUIterator_Fast& that);
  #endif

  // Note:  offset must be signed, since -1 is sometimes passed
  DUIterator_Fast(const Node* node, ptrdiff_t offset)
    { _outp = node->_out + offset;      debug_only(sample(node)); }

 public:
  // initialize to garbage; clear _vdui to disable asserts
  DUIterator_Fast()
    { /*initialize to garbage*/         debug_only(_vdui = false); }

  void operator++(int dummy_to_specify_postfix_op)
    { _outp++;                          VDUI_ONLY(verify(_node, true)); }

  void operator--()
    { VDUI_ONLY(verify_resync());       --_outp; }

  void operator-=(uint n)   // applied to the limit only
    { _outp -= n;           VDUI_ONLY(verify_relimit(n));  }

  bool operator<(DUIterator_Fast& limit) {
    I_VDUI_ONLY(*this, this->verify(_node, true));
    I_VDUI_ONLY(limit, limit.verify_limit());
    return _outp < limit._outp;
  }

  void operator=(const DUIterator_Fast& that)
    { _outp = that._outp;               debug_only(reset(that)); }
};

DUIterator_Fast Node::fast_outs(DUIterator_Fast& imax) const {
  // Assign a limit pointer to the reference argument:
  imax = DUIterator_Fast(this, (ptrdiff_t)_outcnt);
  // Return the base pointer:
  return DUIterator_Fast(this, 0);
}
Node* Node::fast_out(DUIterator_Fast& i) const {
  I_VDUI_ONLY(i, i.verify(this));
  return debug_only(i._last=) *i._outp;
}


// Faster DU iterator.  Requires each successive edge to be removed.
// Does not allow insertion of any edges.
// Usage:
//  for (DUIterator_Last imin, i = x->last_outs(imin); i >= imin; i -= num_edges) {
//    Node* y = x->last_out(i);
//    ...
//  }
// Compiles in product mode to raw Node** pointer arithmetic, with
// no reloading of pointers from the original node x.
class DUIterator_Last : private DUIterator_Fast {
  friend class Node;

  #ifdef ASSERT
  void verify(const Node* node, bool at_end_ok = false);
  void verify_limit();
  void verify_step(uint num_edges);
  #endif

  // Note:  offset must be signed, since -1 is sometimes passed
  DUIterator_Last(const Node* node, ptrdiff_t offset)
    : DUIterator_Fast(node, offset) { }

  void operator++(int dummy_to_specify_postfix_op) {} // do not use
  void operator<(int)                              {} // do not use

 public:
  DUIterator_Last() { }
  // initialize to garbage

  void operator--()
    { _outp--;              VDUI_ONLY(verify_step(1));  }

  void operator-=(uint n)
    { _outp -= n;           VDUI_ONLY(verify_step(n));  }

  bool operator>=(DUIterator_Last& limit) {
    I_VDUI_ONLY(*this, this->verify(_node, true));
    I_VDUI_ONLY(limit, limit.verify_limit());
    return _outp >= limit._outp;
  }

  void operator=(const DUIterator_Last& that)
    { DUIterator_Fast::operator=(that); }
};

DUIterator_Last Node::last_outs(DUIterator_Last& imin) const {
  // Assign a limit pointer to the reference argument:
  imin = DUIterator_Last(this, 0);
  // Return the initial pointer:
  return DUIterator_Last(this, (ptrdiff_t)_outcnt - 1);
}
Node* Node::last_out(DUIterator_Last& i) const {
  I_VDUI_ONLY(i, i.verify(this));
  return debug_only(i._last=) *i._outp;
}

#endif //OPTO_DU_ITERATOR_ASSERT

#undef I_VDUI_ONLY
#undef VDUI_ONLY

// An Iterator that truly follows the iterator pattern.  Doesn't
// support deletion but could be made to.
//
//   for (SimpleDUIterator i(n); i.has_next(); i.next()) {
//     Node* m = i.get();
//
class SimpleDUIterator : public StackObj {
 private:
  Node* node;
  DUIterator_Fast i;
  DUIterator_Fast imax;
 public:
  SimpleDUIterator(Node* n): node(n), i(n->fast_outs(imax)) {}
  bool has_next() { return i < imax; }
  void next() { i++; }
  Node* get() { return node->fast_out(i); }
};


//-----------------------------------------------------------------------------
// Map dense integer indices to Nodes.  Uses classic doubling-array trick.
// Abstractly provides an infinite array of Node*'s, initialized to NULL.
// Note that the constructor just zeros things, and since I use Arena
// allocation I do not need a destructor to reclaim storage.
class Node_Array : public ResourceObj {
  friend class VMStructs;
protected:
  Arena *_a;                    // Arena to allocate in
  uint   _max;
  Node **_nodes;
  void   grow( uint i );        // Grow array node to fit
public:
  Node_Array(Arena *a) : _a(a), _max(OptoNodeListSize) {
    _nodes = NEW_ARENA_ARRAY( a, Node *, OptoNodeListSize );
    for( int i = 0; i < OptoNodeListSize; i++ ) {
      _nodes[i] = NULL;
    }
  }

  Node_Array(Node_Array *na) : _a(na->_a), _max(na->_max), _nodes(na->_nodes) {}
  Node *operator[] ( uint i ) const // Lookup, or NULL for not mapped
  { return (i<_max) ? _nodes[i] : (Node*)NULL; }
  Node *at( uint i ) const { assert(i<_max,"oob"); return _nodes[i]; }
  Node **adr() { return _nodes; }
  // Extend the mapping: index i maps to Node *n.
  void map( uint i, Node *n ) { if( i>=_max ) grow(i); _nodes[i] = n; }
  void insert( uint i, Node *n );
  void remove( uint i );        // Remove, preserving order
  void sort( C_sort_func_t func);
  void reset( Arena *new_a );   // Zap mapping to empty; reclaim storage
  void clear();                 // Set all entries to NULL, keep storage
  uint Size() const { return _max; }
  void dump() const;
};

class Node_List : public Node_Array {
  friend class VMStructs;
  uint _cnt;
public:
  Node_List() : Node_Array(Thread::current()->resource_area()), _cnt(0) {}
  Node_List(Arena *a) : Node_Array(a), _cnt(0) {}
  bool contains(const Node* n) const {
    for (uint e = 0; e < size(); e++) {
      if (at(e) == n) return true;
    }
    return false;
  }
  void insert( uint i, Node *n ) { Node_Array::insert(i,n); _cnt++; }
  void remove( uint i ) { Node_Array::remove(i); _cnt--; }
  void push( Node *b ) { map(_cnt++,b); }
  void yank( Node *n );         // Find and remove
  Node *pop() { return _nodes[--_cnt]; }
  Node *rpop() { Node *b = _nodes[0]; _nodes[0]=_nodes[--_cnt]; return b;}
  void clear() { _cnt = 0; Node_Array::clear(); } // retain storage
  uint size() const { return _cnt; }
  void dump() const;
  void dump_simple() const;
};

//------------------------------Unique_Node_List-------------------------------
class Unique_Node_List : public Node_List {
  friend class VMStructs;
  VectorSet _in_worklist;
  uint _clock_index;            // Index in list where to pop from next
public:
  Unique_Node_List() : Node_List(), _in_worklist(Thread::current()->resource_area()), _clock_index(0) {}
  Unique_Node_List(Arena *a) : Node_List(a), _in_worklist(a), _clock_index(0) {}

  void remove( Node *n );
  bool member( Node *n ) { return _in_worklist.test(n->_idx) != 0; }
  VectorSet &member_set(){ return _in_worklist; }

  void push( Node *b ) {
    if( !_in_worklist.test_set(b->_idx) )
      Node_List::push(b);
  }
  Node *pop() {
    if( _clock_index >= size() ) _clock_index = 0;
    Node *b = at(_clock_index);
    map( _clock_index, Node_List::pop());
    if (size() != 0) _clock_index++; // Always start from 0
    _in_worklist >>= b->_idx;
    return b;
  }
  Node *remove( uint i ) {
    Node *b = Node_List::at(i);
    _in_worklist >>= b->_idx;
    map(i,Node_List::pop());
    return b;
  }
  void yank( Node *n ) { _in_worklist >>= n->_idx; Node_List::yank(n); }
  void  clear() {
    _in_worklist.Clear();        // Discards storage but grows automatically
    Node_List::clear();
    _clock_index = 0;
  }

  // Used after parsing to remove useless nodes before Iterative GVN
  void remove_useless_nodes(VectorSet &useful);

#ifndef PRODUCT
  void print_set() const { _in_worklist.print(); }
#endif
};

// Inline definition of Compile::record_for_igvn must be deferred to this point.
inline void Compile::record_for_igvn(Node* n) {
  _for_igvn->push(n);
}

//------------------------------Node_Stack-------------------------------------
class Node_Stack {
  friend class VMStructs;
protected:
  struct INode {
    Node *node; // Processed node
    uint  indx; // Index of next node's child
  };
  INode *_inode_top; // tos, stack grows up
  INode *_inode_max; // End of _inodes == _inodes + _max
  INode *_inodes;    // Array storage for the stack
  Arena *_a;         // Arena to allocate in
  void grow();
public:
  Node_Stack(int size) {
    size_t max = (size > OptoNodeListSize) ? size : OptoNodeListSize;
    _a = Thread::current()->resource_area();
    _inodes = NEW_ARENA_ARRAY( _a, INode, max );
    _inode_max = _inodes + max;
    _inode_top = _inodes - 1; // stack is empty
  }

  Node_Stack(Arena *a, int size) : _a(a) {
    size_t max = (size > OptoNodeListSize) ? size : OptoNodeListSize;
    _inodes = NEW_ARENA_ARRAY( _a, INode, max );
    _inode_max = _inodes + max;
    _inode_top = _inodes - 1; // stack is empty
  }

  void pop() {
    assert(_inode_top >= _inodes, "node stack underflow");
    --_inode_top;
  }
  void push(Node *n, uint i) {
    ++_inode_top;
    if (_inode_top >= _inode_max) grow();
    INode *top = _inode_top; // optimization
    top->node = n;
    top->indx = i;
  }
  Node *node() const {
    return _inode_top->node;
  }
  Node* node_at(uint i) const {
    assert(_inodes + i <= _inode_top, "in range");
    return _inodes[i].node;
  }
  uint index() const {
    return _inode_top->indx;
  }
  uint index_at(uint i) const {
    assert(_inodes + i <= _inode_top, "in range");
    return _inodes[i].indx;
  }
  void set_node(Node *n) {
    _inode_top->node = n;
  }
  void set_index(uint i) {
    _inode_top->indx = i;
  }
  uint size_max() const { return (uint)pointer_delta(_inode_max, _inodes,  sizeof(INode)); } // Max size
  uint size() const { return (uint)pointer_delta((_inode_top+1), _inodes,  sizeof(INode)); } // Current size
  bool is_nonempty() const { return (_inode_top >= _inodes); }
  bool is_empty() const { return (_inode_top < _inodes); }
  void clear() { _inode_top = _inodes - 1; } // retain storage

  // Node_Stack is used to map nodes.
  Node* find(uint idx) const;
};


//-----------------------------Node_Notes--------------------------------------
// Debugging or profiling annotations loosely and sparsely associated
// with some nodes.  See Compile::node_notes_at for the accessor.
class Node_Notes VALUE_OBJ_CLASS_SPEC {
  friend class VMStructs;
  JVMState* _jvms;

public:
  Node_Notes(JVMState* jvms = NULL) {
    _jvms = jvms;
  }

  JVMState* jvms()            { return _jvms; }
  void  set_jvms(JVMState* x) {        _jvms = x; }

  // True if there is nothing here.
  bool is_clear() {
    return (_jvms == NULL);
  }

  // Make there be nothing here.
  void clear() {
    _jvms = NULL;
  }

  // Make a new, clean node notes.
  static Node_Notes* make(Compile* C) {
    Node_Notes* nn = NEW_ARENA_ARRAY(C->comp_arena(), Node_Notes, 1);
    nn->clear();
    return nn;
  }

  Node_Notes* clone(Compile* C) {
    Node_Notes* nn = NEW_ARENA_ARRAY(C->comp_arena(), Node_Notes, 1);
    (*nn) = (*this);
    return nn;
  }

  // Absorb any information from source.
  bool update_from(Node_Notes* source) {
    bool changed = false;
    if (source != NULL) {
      if (source->jvms() != NULL) {
        set_jvms(source->jvms());
        changed = true;
      }
    }
    return changed;
  }
};

// Inlined accessors for Compile::node_nodes that require the preceding class:
inline Node_Notes*
Compile::locate_node_notes(GrowableArray<Node_Notes*>* arr,
                           int idx, bool can_grow) {
  assert(idx >= 0, "oob");
  int block_idx = (idx >> _log2_node_notes_block_size);
  int grow_by = (block_idx - (arr == NULL? 0: arr->length()));
  if (grow_by >= 0) {
    if (!can_grow)  return NULL;
    grow_node_notes(arr, grow_by + 1);
  }
  // (Every element of arr is a sub-array of length _node_notes_block_size.)
  return arr->at(block_idx) + (idx & (_node_notes_block_size-1));
}

inline bool
Compile::set_node_notes_at(int idx, Node_Notes* value) {
  if (value == NULL || value->is_clear())
    return false;  // nothing to write => write nothing
  Node_Notes* loc = locate_node_notes(_node_note_array, idx, true);
  assert(loc != NULL, "");
  return loc->update_from(value);
}


//------------------------------TypeNode---------------------------------------
// Node with a Type constant.
class TypeNode : public Node {
protected:
  virtual uint hash() const;    // Check the type
  virtual uint cmp( const Node &n ) const;
  virtual uint size_of() const; // Size is bigger
  const Type* const _type;
public:
  void set_type(const Type* t) {
    assert(t != NULL, "sanity");
    debug_only(uint check_hash = (VerifyHashTableKeys && _hash_lock) ? hash() : NO_HASH);
    *(const Type**)&_type = t;   // cast away const-ness
    // If this node is in the hash table, make sure it doesn't need a rehash.
    assert(check_hash == NO_HASH || check_hash == hash(), "type change must preserve hash code");
  }
  const Type* type() const { assert(_type != NULL, "sanity"); return _type; };
  TypeNode( const Type *t, uint required ) : Node(required), _type(t) {
    init_class_id(Class_Type);
  }
  virtual const Type *Value( PhaseTransform *phase ) const;
  virtual const Type *bottom_type() const;
  virtual       uint  ideal_reg() const;
#ifndef PRODUCT
  virtual void dump_spec(outputStream *st) const;
#endif
};

#endif // SHARE_VM_OPTO_NODE_HPP
C:\hotspot-69087d08d473\src\share\vm/opto/opcodes.cpp
/*
 * Copyright (c) 1998, 2012, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 *
 */

// no precompiled headers

// ----------------------------------------------------------------------------
// Build a table of class names as strings.  Used both for debugging printouts
// and in the ADL machine descriptions.
#define macro(x) #x,
const char *NodeClassNames[] = {
  "Node",
  "Set",
  "RegN",
  "RegI",
  "RegP",
  "RegF",
  "RegD",
  "RegL",
  "RegFlags",
  "VecS",
  "VecD",
  "VecX",
  "VecY",
  "_last_machine_leaf",
#include "classes.hpp"
  "_last_class_name",
};
#undef macro
C:\hotspot-69087d08d473\src\share\vm/opto/opcodes.hpp
/*
 * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 *
 */

#ifndef SHARE_VM_OPTO_OPCODES_HPP
#define SHARE_VM_OPTO_OPCODES_HPP

// Build a big enum of class names to give them dense integer indices
#define macro(x) Op_##x,
enum Opcodes {
  Op_Node = 0,
  macro(Set)                    // Instruction selection match rule
  macro(RegN)                   // Machine narrow oop register
  macro(RegI)                   // Machine integer register
  macro(RegP)                   // Machine pointer register
  macro(RegF)                   // Machine float   register
  macro(RegD)                   // Machine double  register
  macro(RegL)                   // Machine long    register
  macro(VecS)                   // Machine vectors register
  macro(VecD)                   // Machine vectord register
  macro(VecX)                   // Machine vectorx register
  macro(VecY)                   // Machine vectory register
  macro(RegFlags)               // Machine flags   register
  _last_machine_leaf,           // Split between regular opcodes and machine
#include "classes.hpp"
  _last_opcode
};
#undef macro

// Table of names, indexed by Opcode
extern const char *NodeClassNames[];

#endif // SHARE_VM_OPTO_OPCODES_HPP
C:\hotspot-69087d08d473\src\share\vm/opto/optoreg.hpp
/*
 * Copyright (c) 2006, 2013, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 *
 */

#ifndef SHARE_VM_OPTO_OPTOREG_HPP
#define SHARE_VM_OPTO_OPTOREG_HPP

//------------------------------OptoReg----------------------------------------
// We eventually need Registers for the Real World.  Registers are essentially
// non-SSA names.  A Register is represented as a number.  Non-regular values
// (e.g., Control, Memory, I/O) use the Special register.  The actual machine
// registers (as described in the ADL file for a machine) start at zero.
// Stack-slots (spill locations) start at the nest Chunk past the last machine
// register.
//
// Note that stack spill-slots are treated as a very large register set.
// They have all the correct properties for a Register: not aliased (unique
// named).  There is some simple mapping from a stack-slot register number
// to the actual location on the stack; this mapping depends on the calling
// conventions and is described in the ADL.
//
// Note that Name is not enum. C++ standard defines that the range of enum
// is the range of smallest bit-field that can represent all enumerators
// declared in the enum. The result of assigning a value to enum is undefined
// if the value is outside the enumeration's valid range. OptoReg::Name is
// typedef'ed as int, because it needs to be able to represent spill-slots.
//
class OptoReg VALUE_OBJ_CLASS_SPEC {

 friend class C2Compiler;
 public:
  typedef int Name;
  enum {
    // Chunk 0
    Physical = AdlcVMDeps::Physical, // Start of physical regs
    // A few oddballs at the edge of the world
    Special = -2,               // All special (not allocated) values
    Bad = -1                    // Not a register
  };

 private:

 static const VMReg opto2vm[REG_COUNT];
 static Name vm2opto[ConcreteRegisterImpl::number_of_registers];

 public:

  // Stack pointer register
  static OptoReg::Name c_frame_pointer;



  // Increment a register number.  As in:
  //    "for ( OptoReg::Name i; i=Control; i = add(i,1) ) ..."
  static Name add( Name x, int y ) { return Name(x+y); }

  // (We would like to have an operator+ for RegName, but it is not
  // a class, so this would be illegal in C++.)

  static void dump(int, outputStream *st = tty);

  // Get the stack slot number of an OptoReg::Name
  static unsigned int reg2stack( OptoReg::Name r) {
    assert( r >= stack0(), " must be");
    return r - stack0();
  }

  // convert a stack slot number into an OptoReg::Name
  static OptoReg::Name stack2reg( int idx) {
    return Name(stack0() + idx);
  }

  static bool is_stack(Name n) {
    return n >= stack0();
  }

  static bool is_valid(Name n) {
    return (n != Bad);
  }

  static bool is_reg(Name n) {
    return  is_valid(n) && !is_stack(n);
  }

  static VMReg as_VMReg(OptoReg::Name n) {
    if (is_reg(n)) {
      // Must use table, it'd be nice if Bad was indexable...
      return opto2vm[n];
    } else {
      assert(!is_stack(n), "must un warp");
      return VMRegImpl::Bad();
    }
  }

  // Can un-warp a stack slot or convert a register or Bad
  static VMReg as_VMReg(OptoReg::Name n, int frame_size, int arg_count) {
    if (is_reg(n)) {
      // Must use table, it'd be nice if Bad was indexable...
      return opto2vm[n];
    } else if (is_stack(n)) {
      int stack_slot = reg2stack(n);
      if (stack_slot < arg_count) {
        return VMRegImpl::stack2reg(stack_slot + frame_size);
      }
      return VMRegImpl::stack2reg(stack_slot - arg_count);
      // return return VMRegImpl::stack2reg(reg2stack(OptoReg::add(n, -arg_count)));
    } else {
      return VMRegImpl::Bad();
    }
  }

  static OptoReg::Name as_OptoReg(VMReg r) {
    if (r->is_stack()) {
      assert(false, "must warp");
      return stack2reg(r->reg2stack());
    } else if (r->is_valid()) {
      // Must use table, it'd be nice if Bad was indexable...
      return vm2opto[r->value()];
    } else {
      return Bad;
    }
  }

  static OptoReg::Name stack0() {
    return VMRegImpl::stack0->value();
  }

  static const char* regname(OptoReg::Name n) {
    return as_VMReg(n)->name();
  }

};

//---------------------------OptoRegPair-------------------------------------------
// Pairs of 32-bit registers for the allocator.
// This is a very similar class to VMRegPair. C2 only interfaces with VMRegPair
// via the calling convention code which is shared between the compilers.
// Since C2 uses OptoRegs for register allocation it is more efficient to use
// VMRegPair internally for nodes that can contain a pair of OptoRegs rather
// than use VMRegPair and continually be converting back and forth. So normally
// C2 will take in a VMRegPair from the calling convention code and immediately
// convert them to an OptoRegPair and stay in the OptoReg world. The only over
// conversion between OptoRegs and VMRegs is for debug info and oopMaps. This
// is not a high bandwidth spot and so it is not an issue.
// Note that onde other consequence of staying in the OptoReg world with OptoRegPairs
// is that there are "physical" OptoRegs that are not representable in the VMReg
// world, notably flags. [ But by design there is "space" in the VMReg world
// for such registers they just may not be concrete ]. So if we were to use VMRegPair
// then the VMReg world would have to have a representation for these registers
// so that a OptoReg->VMReg->OptoReg would reproduce ther original OptoReg. As it
// stands if you convert a flag (condition code) to a VMReg you will get VMRegImpl::Bad
// and converting that will return OptoReg::Bad losing the identity of the OptoReg.

class OptoRegPair {
  friend class VMStructs;
private:
  short _second;
  short _first;
public:
  void set_bad (                   ) { _second = OptoReg::Bad; _first = OptoReg::Bad; }
  void set1    ( OptoReg::Name n  ) { _second = OptoReg::Bad; _first = n; }
  void set2    ( OptoReg::Name n  ) { _second = n + 1;       _first = n; }
  void set_pair( OptoReg::Name second, OptoReg::Name first    ) { _second= second;    _first= first; }
  void set_ptr ( OptoReg::Name ptr ) {
#ifdef _LP64
    _second = ptr+1;
#else
    _second = OptoReg::Bad;
#endif
    _first = ptr;
  }

  OptoReg::Name second() const { return _second; }
  OptoReg::Name first() const { return _first; }
  OptoRegPair(OptoReg::Name second, OptoReg::Name first) {  _second = second; _first = first; }
  OptoRegPair(OptoReg::Name f) { _second = OptoReg::Bad; _first = f; }
  OptoRegPair() { _second = OptoReg::Bad; _first = OptoReg::Bad; }
};

#endif // SHARE_VM_OPTO_OPTOREG_HPP
C:\hotspot-69087d08d473\src\share\vm/opto/output.cpp
/*
 * Copyright (c) 1998, 2018, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 *
 */

#include "precompiled.hpp"
#include "asm/assembler.inline.hpp"
#include "code/compiledIC.hpp"
#include "code/debugInfo.hpp"
#include "code/debugInfoRec.hpp"
#include "compiler/compileBroker.hpp"
#include "compiler/disassembler.hpp"
#include "compiler/oopMap.hpp"
#include "memory/allocation.inline.hpp"
#include "opto/callnode.hpp"
#include "opto/cfgnode.hpp"
#include "opto/locknode.hpp"
#include "opto/machnode.hpp"
#include "opto/output.hpp"
#include "opto/regalloc.hpp"
#include "opto/runtime.hpp"
#include "opto/subnode.hpp"
#include "opto/type.hpp"
#include "runtime/handles.inline.hpp"
#include "utilities/xmlstream.hpp"

#ifndef PRODUCT
#define DEBUG_ARG(x) , x
#else
#define DEBUG_ARG(x)
#endif

// Convert Nodes to instruction bits and pass off to the VM
void Compile::Output() {
  // RootNode goes
  assert( _cfg->get_root_block()->number_of_nodes() == 0, "" );

  // The number of new nodes (mostly MachNop) is proportional to
  // the number of java calls and inner loops which are aligned.
  if ( C->check_node_count((NodeLimitFudgeFactor + C->java_calls()*3 +
                            C->inner_loops()*(OptoLoopAlignment-1)),
                           "out of nodes before code generation" ) ) {
    return;
  }
  // Make sure I can find the Start Node
  Block *entry = _cfg->get_block(1);
  Block *broot = _cfg->get_root_block();

  const StartNode *start = entry->head()->as_Start();

  // Replace StartNode with prolog
  MachPrologNode *prolog = new (this) MachPrologNode();
  entry->map_node(prolog, 0);
  _cfg->map_node_to_block(prolog, entry);
  _cfg->unmap_node_from_block(start); // start is no longer in any block

  // Virtual methods need an unverified entry point

  if( is_osr_compilation() ) {
    if( PoisonOSREntry ) {
      // TODO: Should use a ShouldNotReachHereNode...
      _cfg->insert( broot, 0, new (this) MachBreakpointNode() );
    }
  } else {
    if( _method && !_method->flags().is_static() ) {
      // Insert unvalidated entry point
      _cfg->insert( broot, 0, new (this) MachUEPNode() );
    }

  }


  // Break before main entry point
  if( (_method && _method->break_at_execute())
#ifndef PRODUCT
    ||(OptoBreakpoint && is_method_compilation())
    ||(OptoBreakpointOSR && is_osr_compilation())
    ||(OptoBreakpointC2R && !_method)
#endif
    ) {
    // checking for _method means that OptoBreakpoint does not apply to
    // runtime stubs or frame converters
    _cfg->insert( entry, 1, new (this) MachBreakpointNode() );
  }

  // Insert epilogs before every return
  for (uint i = 0; i < _cfg->number_of_blocks(); i++) {
    Block* block = _cfg->get_block(i);
    if (!block->is_connector() && block->non_connector_successor(0) == _cfg->get_root_block()) { // Found a program exit point?
      Node* m = block->end();
      if (m->is_Mach() && m->as_Mach()->ideal_Opcode() != Op_Halt) {
        MachEpilogNode* epilog = new (this) MachEpilogNode(m->as_Mach()->ideal_Opcode() == Op_Return);
        block->add_inst(epilog);
        _cfg->map_node_to_block(epilog, block);
      }
    }
  }

# ifdef ENABLE_ZAP_DEAD_LOCALS
  if (ZapDeadCompiledLocals) {
    Insert_zap_nodes();
  }
# endif

  uint* blk_starts = NEW_RESOURCE_ARRAY(uint, _cfg->number_of_blocks() + 1);
  blk_starts[0] = 0;

  // Initialize code buffer and process short branches.
  CodeBuffer* cb = init_buffer(blk_starts);

  if (cb == NULL || failing()) {
    return;
  }

  ScheduleAndBundle();

#ifndef PRODUCT
  if (trace_opto_output()) {
    tty->print("\n---- After ScheduleAndBundle ----\n");
    for (uint i = 0; i < _cfg->number_of_blocks(); i++) {
      tty->print("\nBB#%03d:\n", i);
      Block* block = _cfg->get_block(i);
      for (uint j = 0; j < block->number_of_nodes(); j++) {
        Node* n = block->get_node(j);
        OptoReg::Name reg = _regalloc->get_reg_first(n);
        tty->print(" %-6s ", reg >= 0 && reg < REG_COUNT ? Matcher::regName[reg] : "");
        n->dump();
      }
    }
  }
#endif

  if (failing()) {
    return;
  }

  BuildOopMaps();

  if (failing())  {
    return;
  }

  fill_buffer(cb, blk_starts);
}

bool Compile::need_stack_bang(int frame_size_in_bytes) const {
  // Determine if we need to generate a stack overflow check.
  // Do it if the method is not a stub function and
  // has java calls or has frame size > vm_page_size/8.
  // The debug VM checks that deoptimization doesn't trigger an
  // unexpected stack overflow (compiled method stack banging should
  // guarantee it doesn't happen) so we always need the stack bang in
  // a debug VM.
  return (UseStackBanging && stub_function() == NULL &&
          (has_java_calls() || frame_size_in_bytes > os::vm_page_size()>>3
           DEBUG_ONLY(|| true)));
}

bool Compile::need_register_stack_bang() const {
  // Determine if we need to generate a register stack overflow check.
  // This is only used on architectures which have split register
  // and memory stacks (ie. IA64).
  // Bang if the method is not a stub function and has java calls
  return (stub_function() == NULL && has_java_calls());
}

# ifdef ENABLE_ZAP_DEAD_LOCALS


// In order to catch compiler oop-map bugs, we have implemented
// a debugging mode called ZapDeadCompilerLocals.
// This mode causes the compiler to insert a call to a runtime routine,
// "zap_dead_locals", right before each place in compiled code
// that could potentially be a gc-point (i.e., a safepoint or oop map point).
// The runtime routine checks that locations mapped as oops are really
// oops, that locations mapped as values do not look like oops,
// and that locations mapped as dead are not used later
// (by zapping them to an invalid address).

int Compile::_CompiledZap_count = 0;

void Compile::Insert_zap_nodes() {
  bool skip = false;


  // Dink with static counts because code code without the extra
  // runtime calls is MUCH faster for debugging purposes

       if ( CompileZapFirst  ==  0  ) ; // nothing special
  else if ( CompileZapFirst  >  CompiledZap_count() )  skip = true;
  else if ( CompileZapFirst  == CompiledZap_count() )
    warning("starting zap compilation after skipping");

       if ( CompileZapLast  ==  -1  ) ; // nothing special
  else if ( CompileZapLast  <   CompiledZap_count() )  skip = true;
  else if ( CompileZapLast  ==  CompiledZap_count() )
    warning("about to compile last zap");

  ++_CompiledZap_count; // counts skipped zaps, too

  if ( skip )  return;


  if ( _method == NULL )
    return; // no safepoints/oopmaps emitted for calls in stubs,so we don't care

  // Insert call to zap runtime stub before every node with an oop map
  for( uint i=0; i<_cfg->number_of_blocks(); i++ ) {
    Block *b = _cfg->get_block(i);
    for ( uint j = 0;  j < b->number_of_nodes();  ++j ) {
      Node *n = b->get_node(j);

      // Determining if we should insert a zap-a-lot node in output.
      // We do that for all nodes that has oopmap info, except for calls
      // to allocation.  Calls to allocation passes in the old top-of-eden pointer
      // and expect the C code to reset it.  Hence, there can be no safepoints between
      // the inlined-allocation and the call to new_Java, etc.
      // We also cannot zap monitor calls, as they must hold the microlock
      // during the call to Zap, which also wants to grab the microlock.
      bool insert = n->is_MachSafePoint() && (n->as_MachSafePoint()->oop_map() != NULL);
      if ( insert ) { // it is MachSafePoint
        if ( !n->is_MachCall() ) {
          insert = false;
        } else if ( n->is_MachCall() ) {
          MachCallNode* call = n->as_MachCall();
          if (call->entry_point() == OptoRuntime::new_instance_Java() ||
              call->entry_point() == OptoRuntime::new_array_Java() ||
              call->entry_point() == OptoRuntime::multianewarray2_Java() ||
              call->entry_point() == OptoRuntime::multianewarray3_Java() ||
              call->entry_point() == OptoRuntime::multianewarray4_Java() ||
              call->entry_point() == OptoRuntime::multianewarray5_Java() ||
              call->entry_point() == OptoRuntime::slow_arraycopy_Java() ||
              call->entry_point() == OptoRuntime::complete_monitor_locking_Java()
              ) {
            insert = false;
          }
        }
        if (insert) {
          Node *zap = call_zap_node(n->as_MachSafePoint(), i);
          b->insert_node(zap, j);
          _cfg->map_node_to_block(zap, b);
          ++j;
        }
      }
    }
  }
}


Node* Compile::call_zap_node(MachSafePointNode* node_to_check, int block_no) {
  const TypeFunc *tf = OptoRuntime::zap_dead_locals_Type();
  CallStaticJavaNode* ideal_node =
    new (this) CallStaticJavaNode( tf,
         OptoRuntime::zap_dead_locals_stub(_method->flags().is_native()),
                       "call zap dead locals stub", 0, TypePtr::BOTTOM);
  // We need to copy the OopMap from the site we're zapping at.
  // We have to make a copy, because the zap site might not be
  // a call site, and zap_dead is a call site.
  OopMap* clone = node_to_check->oop_map()->deep_copy();

  // Add the cloned OopMap to the zap node
  ideal_node->set_oop_map(clone);
  return _matcher->match_sfpt(ideal_node);
}

bool Compile::is_node_getting_a_safepoint( Node* n) {
  // This code duplicates the logic prior to the call of add_safepoint
  // below in this file.
  if( n->is_MachSafePoint() ) return true;
  return false;
}

# endif // ENABLE_ZAP_DEAD_LOCALS

// Compute the size of first NumberOfLoopInstrToAlign instructions at the top
// of a loop. When aligning a loop we need to provide enough instructions
// in cpu's fetch buffer to feed decoders. The loop alignment could be
// avoided if we have enough instructions in fetch buffer at the head of a loop.
// By default, the size is set to 999999 by Block's constructor so that
// a loop will be aligned if the size is not reset here.
//
// Note: Mach instructions could contain several HW instructions
// so the size is estimated only.
//
void Compile::compute_loop_first_inst_sizes() {
  // The next condition is used to gate the loop alignment optimization.
  // Don't aligned a loop if there are enough instructions at the head of a loop
  // or alignment padding is larger then MaxLoopPad. By default, MaxLoopPad
  // is equal to OptoLoopAlignment-1 except on new Intel cpus, where it is
  // equal to 11 bytes which is the largest address NOP instruction.
  if (MaxLoopPad < OptoLoopAlignment - 1) {
    uint last_block = _cfg->number_of_blocks() - 1;
    for (uint i = 1; i <= last_block; i++) {
      Block* block = _cfg->get_block(i);
      // Check the first loop's block which requires an alignment.
      if (block->loop_alignment() > (uint)relocInfo::addr_unit()) {
        uint sum_size = 0;
        uint inst_cnt = NumberOfLoopInstrToAlign;
        inst_cnt = block->compute_first_inst_size(sum_size, inst_cnt, _regalloc);

        // Check subsequent fallthrough blocks if the loop's first
        // block(s) does not have enough instructions.
        Block *nb = block;
        while(inst_cnt > 0 &&
              i < last_block &&
              !_cfg->get_block(i + 1)->has_loop_alignment() &&
              !nb->has_successor(block)) {
          i++;
          nb = _cfg->get_block(i);
          inst_cnt  = nb->compute_first_inst_size(sum_size, inst_cnt, _regalloc);
        } // while( inst_cnt > 0 && i < last_block  )

        block->set_first_inst_size(sum_size);
      } // f( b->head()->is_Loop() )
    } // for( i <= last_block )
  } // if( MaxLoopPad < OptoLoopAlignment-1 )
}

// The architecture description provides short branch variants for some long
// branch instructions. Replace eligible long branches with short branches.
void Compile::shorten_branches(uint* blk_starts, int& code_size, int& reloc_size, int& stub_size) {
  // Compute size of each block, method size, and relocation information size
  uint nblocks  = _cfg->number_of_blocks();

  uint*      jmp_offset = NEW_RESOURCE_ARRAY(uint,nblocks);
  uint*      jmp_size   = NEW_RESOURCE_ARRAY(uint,nblocks);
  int*       jmp_nidx   = NEW_RESOURCE_ARRAY(int ,nblocks);

  // Collect worst case block paddings
  int* block_worst_case_pad = NEW_RESOURCE_ARRAY(int, nblocks);
  memset(block_worst_case_pad, 0, nblocks * sizeof(int));

  DEBUG_ONLY( uint *jmp_target = NEW_RESOURCE_ARRAY(uint,nblocks); )
  DEBUG_ONLY( uint *jmp_rule = NEW_RESOURCE_ARRAY(uint,nblocks); )

  bool has_short_branch_candidate = false;

  // Initialize the sizes to 0
  code_size  = 0;          // Size in bytes of generated code
  stub_size  = 0;          // Size in bytes of all stub entries
  // Size in bytes of all relocation entries, including those in local stubs.
  // Start with 2-bytes of reloc info for the unvalidated entry point
  reloc_size = 1;          // Number of relocation entries

  // Make three passes.  The first computes pessimistic blk_starts,
  // relative jmp_offset and reloc_size information.  The second performs
  // short branch substitution using the pessimistic sizing.  The
  // third inserts nops where needed.

  // Step one, perform a pessimistic sizing pass.
  uint last_call_adr = max_uint;
  uint last_avoid_back_to_back_adr = max_uint;
  uint nop_size = (new (this) MachNopNode())->size(_regalloc);
  for (uint i = 0; i < nblocks; i++) { // For all blocks
    Block* block = _cfg->get_block(i);

    // During short branch replacement, we store the relative (to blk_starts)
    // offset of jump in jmp_offset, rather than the absolute offset of jump.
    // This is so that we do not need to recompute sizes of all nodes when
    // we compute correct blk_starts in our next sizing pass.
    jmp_offset[i] = 0;
    jmp_size[i]   = 0;
    jmp_nidx[i]   = -1;
    DEBUG_ONLY( jmp_target[i] = 0; )
    DEBUG_ONLY( jmp_rule[i]   = 0; )

    // Sum all instruction sizes to compute block size
    uint last_inst = block->number_of_nodes();
    uint blk_size = 0;
    for (uint j = 0; j < last_inst; j++) {
      Node* nj = block->get_node(j);
      // Handle machine instruction nodes
      if (nj->is_Mach()) {
        MachNode *mach = nj->as_Mach();
        blk_size += (mach->alignment_required() - 1) * relocInfo::addr_unit(); // assume worst case padding
        reloc_size += mach->reloc();
        if (mach->is_MachCall()) {
          // add size information for trampoline stub
          // class CallStubImpl is platform-specific and defined in the *.ad files.
          stub_size  += CallStubImpl::size_call_trampoline();
          reloc_size += CallStubImpl::reloc_call_trampoline();

          MachCallNode *mcall = mach->as_MachCall();
          // This destination address is NOT PC-relative

          mcall->method_set((intptr_t)mcall->entry_point());

          if (mcall->is_MachCallJava() && mcall->as_MachCallJava()->_method) {
            stub_size  += CompiledStaticCall::to_interp_stub_size();
            reloc_size += CompiledStaticCall::reloc_to_interp_stub();
          }
        } else if (mach->is_MachSafePoint()) {
          // If call/safepoint are adjacent, account for possible
          // nop to disambiguate the two safepoints.
          // ScheduleAndBundle() can rearrange nodes in a block,
          // check for all offsets inside this block.
          if (last_call_adr >= blk_starts[i]) {
            blk_size += nop_size;
          }
        }
        if (mach->avoid_back_to_back(MachNode::AVOID_BEFORE)) {
          // Nop is inserted between "avoid back to back" instructions.
          // ScheduleAndBundle() can rearrange nodes in a block,
          // check for all offsets inside this block.
          if (last_avoid_back_to_back_adr >= blk_starts[i]) {
            blk_size += nop_size;
          }
        }
        if (mach->may_be_short_branch()) {
          if (!nj->is_MachBranch()) {
#ifndef PRODUCT
            nj->dump(3);
#endif
            Unimplemented();
          }
          assert(jmp_nidx[i] == -1, "block should have only one branch");
          jmp_offset[i] = blk_size;
          jmp_size[i]   = nj->size(_regalloc);
          jmp_nidx[i]   = j;
          has_short_branch_candidate = true;
        }
      }
      blk_size += nj->size(_regalloc);
      // Remember end of call offset
      if (nj->is_MachCall() && !nj->is_MachCallLeaf()) {
        last_call_adr = blk_starts[i]+blk_size;
      }
      // Remember end of avoid_back_to_back offset
      if (nj->is_Mach() && nj->as_Mach()->avoid_back_to_back(MachNode::AVOID_AFTER)) {
        last_avoid_back_to_back_adr = blk_starts[i]+blk_size;
      }
    }

    // When the next block starts a loop, we may insert pad NOP
    // instructions.  Since we cannot know our future alignment,
    // assume the worst.
    if (i < nblocks - 1) {
      Block* nb = _cfg->get_block(i + 1);
      int max_loop_pad = nb->code_alignment()-relocInfo::addr_unit();
      if (max_loop_pad > 0) {
        assert(is_power_of_2(max_loop_pad+relocInfo::addr_unit()), "");
        // Adjust last_call_adr and/or last_avoid_back_to_back_adr.
        // If either is the last instruction in this block, bump by
        // max_loop_pad in lock-step with blk_size, so sizing
        // calculations in subsequent blocks still can conservatively
        // detect that it may the last instruction in this block.
        if (last_call_adr == blk_starts[i]+blk_size) {
          last_call_adr += max_loop_pad;
        }
        if (last_avoid_back_to_back_adr == blk_starts[i]+blk_size) {
          last_avoid_back_to_back_adr += max_loop_pad;
        }
        blk_size += max_loop_pad;
        block_worst_case_pad[i + 1] = max_loop_pad;
      }
    }

    // Save block size; update total method size
    blk_starts[i+1] = blk_starts[i]+blk_size;
  }

  // Step two, replace eligible long jumps.
  bool progress = true;
  uint last_may_be_short_branch_adr = max_uint;
  while (has_short_branch_candidate && progress) {
    progress = false;
    has_short_branch_candidate = false;
    int adjust_block_start = 0;
    for (uint i = 0; i < nblocks; i++) {
      Block* block = _cfg->get_block(i);
      int idx = jmp_nidx[i];
      MachNode* mach = (idx == -1) ? NULL: block->get_node(idx)->as_Mach();
      if (mach != NULL && mach->may_be_short_branch()) {
#ifdef ASSERT
        assert(jmp_size[i] > 0 && mach->is_MachBranch(), "sanity");
        int j;
        // Find the branch; ignore trailing NOPs.
        for (j = block->number_of_nodes()-1; j>=0; j--) {
          Node* n = block->get_node(j);
          if (!n->is_Mach() || n->as_Mach()->ideal_Opcode() != Op_Con)
            break;
        }
        assert(j >= 0 && j == idx && block->get_node(j) == (Node*)mach, "sanity");
#endif
        int br_size = jmp_size[i];
        int br_offs = blk_starts[i] + jmp_offset[i];

        // This requires the TRUE branch target be in succs[0]
        uint bnum = block->non_connector_successor(0)->_pre_order;
        int offset = blk_starts[bnum] - br_offs;
        if (bnum > i) { // adjust following block's offset
          offset -= adjust_block_start;
        }

        // This block can be a loop header, account for the padding
        // in the previous block.
        int block_padding = block_worst_case_pad[i];
        assert(i == 0 || block_padding == 0 || br_offs >= block_padding, "Should have at least a padding on top");
        // In the following code a nop could be inserted before
        // the branch which will increase the backward distance.
        bool needs_padding = ((uint)(br_offs - block_padding) == last_may_be_short_branch_adr);
        assert(!needs_padding || jmp_offset[i] == 0, "padding only branches at the beginning of block");

        if (needs_padding && offset <= 0)
          offset -= nop_size;

        if (_matcher->is_short_branch_offset(mach->rule(), br_size, offset)) {
          // We've got a winner.  Replace this branch.
          MachNode* replacement = mach->as_MachBranch()->short_branch_version(this);

          // Update the jmp_size.
          int new_size = replacement->size(_regalloc);
          int diff     = br_size - new_size;
          assert(diff >= (int)nop_size, "short_branch size should be smaller");
          // Conservatively take into account padding between
          // avoid_back_to_back branches. Previous branch could be
          // converted into avoid_back_to_back branch during next
          // rounds.
          if (needs_padding && replacement->avoid_back_to_back(MachNode::AVOID_BEFORE)) {
            jmp_offset[i] += nop_size;
            diff -= nop_size;
          }
          adjust_block_start += diff;
          block->map_node(replacement, idx);
          mach->subsume_by(replacement, C);
          mach = replacement;
          progress = true;

          jmp_size[i] = new_size;
          DEBUG_ONLY( jmp_target[i] = bnum; );
          DEBUG_ONLY( jmp_rule[i] = mach->rule(); );
        } else {
          // The jump distance is not short, try again during next iteration.
          has_short_branch_candidate = true;
        }
      } // (mach->may_be_short_branch())
      if (mach != NULL && (mach->may_be_short_branch() ||
                           mach->avoid_back_to_back(MachNode::AVOID_AFTER))) {
        last_may_be_short_branch_adr = blk_starts[i] + jmp_offset[i] + jmp_size[i];
      }
      blk_starts[i+1] -= adjust_block_start;
    }
  }

#ifdef ASSERT
  for (uint i = 0; i < nblocks; i++) { // For all blocks
    if (jmp_target[i] != 0) {
      int br_size = jmp_size[i];
      int offset = blk_starts[jmp_target[i]]-(blk_starts[i] + jmp_offset[i]);
      if (!_matcher->is_short_branch_offset(jmp_rule[i], br_size, offset)) {
        tty->print_cr("target (%d) - jmp_offset(%d) = offset (%d), jump_size(%d), jmp_block B%d, target_block B%d", blk_starts[jmp_target[i]], blk_starts[i] + jmp_offset[i], offset, br_size, i, jmp_target[i]);
      }
      assert(_matcher->is_short_branch_offset(jmp_rule[i], br_size, offset), "Displacement too large for short jmp");
    }
  }
#endif

  // Step 3, compute the offsets of all blocks, will be done in fill_buffer()
  // after ScheduleAndBundle().

  // ------------------
  // Compute size for code buffer
  code_size = blk_starts[nblocks];

  // Relocation records
  reloc_size += 1;              // Relo entry for exception handler

  // Adjust reloc_size to number of record of relocation info
  // Min is 2 bytes, max is probably 6 or 8, with a tax up to 25% for
  // a relocation index.
  // The CodeBuffer will expand the locs array if this estimate is too low.
  reloc_size *= 10 / sizeof(relocInfo);
}

//------------------------------FillLocArray-----------------------------------
// Create a bit of debug info and append it to the array.  The mapping is from
// Java local or expression stack to constant, register or stack-slot.  For
// doubles, insert 2 mappings and return 1 (to tell the caller that the next
// entry has been taken care of and caller should skip it).
static LocationValue *new_loc_value( PhaseRegAlloc *ra, OptoReg::Name regnum, Location::Type l_type ) {
  // This should never have accepted Bad before
  assert(OptoReg::is_valid(regnum), "location must be valid");
  return (OptoReg::is_reg(regnum))
    ? new LocationValue(Location::new_reg_loc(l_type, OptoReg::as_VMReg(regnum)) )
    : new LocationValue(Location::new_stk_loc(l_type,  ra->reg2offset(regnum)));
}


ObjectValue*
Compile::sv_for_node_id(GrowableArray<ScopeValue*> *objs, int id) {
  for (int i = 0; i < objs->length(); i++) {
    assert(objs->at(i)->is_object(), "corrupt object cache");
    ObjectValue* sv = (ObjectValue*) objs->at(i);
    if (sv->id() == id) {
      return sv;
    }
  }
  // Otherwise..
  return NULL;
}

void Compile::set_sv_for_object_node(GrowableArray<ScopeValue*> *objs,
                                     ObjectValue* sv ) {
  assert(sv_for_node_id(objs, sv->id()) == NULL, "Precondition");
  objs->append(sv);
}


void Compile::FillLocArray( int idx, MachSafePointNode* sfpt, Node *local,
                            GrowableArray<ScopeValue*> *array,
                            GrowableArray<ScopeValue*> *objs ) {
  assert( local, "use _top instead of null" );
  if (array->length() != idx) {
    assert(array->length() == idx + 1, "Unexpected array count");
    // Old functionality:
    //   return
    // New functionality:
    //   Assert if the local is not top. In product mode let the new node
    //   override the old entry.
    assert(local == top(), "LocArray collision");
    if (local == top()) {
      return;
    }
    array->pop();
  }
  const Type *t = local->bottom_type();

  // Is it a safepoint scalar object node?
  if (local->is_SafePointScalarObject()) {
    SafePointScalarObjectNode* spobj = local->as_SafePointScalarObject();

    ObjectValue* sv = Compile::sv_for_node_id(objs, spobj->_idx);
    if (sv == NULL) {
      ciKlass* cik = t->is_oopptr()->klass();
      assert(cik->is_instance_klass() ||
             cik->is_array_klass(), "Not supported allocation.");
      sv = new ObjectValue(spobj->_idx,
                           new ConstantOopWriteValue(cik->java_mirror()->constant_encoding()));
      Compile::set_sv_for_object_node(objs, sv);

      uint first_ind = spobj->first_index(sfpt->jvms());
      for (uint i = 0; i < spobj->n_fields(); i++) {
        Node* fld_node = sfpt->in(first_ind+i);
        (void)FillLocArray(sv->field_values()->length(), sfpt, fld_node, sv->field_values(), objs);
      }
    }
    array->append(sv);
    return;
  }

  // Grab the register number for the local
  OptoReg::Name regnum = _regalloc->get_reg_first(local);
  if( OptoReg::is_valid(regnum) ) {// Got a register/stack?
    // Record the double as two float registers.
    // The register mask for such a value always specifies two adjacent
    // float registers, with the lower register number even.
    // Normally, the allocation of high and low words to these registers
    // is irrelevant, because nearly all operations on register pairs
    // (e.g., StoreD) treat them as a single unit.
    // Here, we assume in addition that the words in these two registers
    // stored "naturally" (by operations like StoreD and double stores
    // within the interpreter) such that the lower-numbered register
    // is written to the lower memory address.  This may seem like
    // a machine dependency, but it is not--it is a requirement on
    // the author of the <arch>.ad file to ensure that, for every
    // even/odd double-register pair to which a double may be allocated,
    // the word in the even single-register is stored to the first
    // memory word.  (Note that register numbers are completely
    // arbitrary, and are not tied to any machine-level encodings.)
#ifdef _LP64
    if( t->base() == Type::DoubleBot || t->base() == Type::DoubleCon ) {
      array->append(new ConstantIntValue(0));
      array->append(new_loc_value( _regalloc, regnum, Location::dbl ));
    } else if ( t->base() == Type::Long ) {
      array->append(new ConstantIntValue(0));
      array->append(new_loc_value( _regalloc, regnum, Location::lng ));
    } else if ( t->base() == Type::RawPtr ) {
      // jsr/ret return address which must be restored into a the full
      // width 64-bit stack slot.
      array->append(new_loc_value( _regalloc, regnum, Location::lng ));
    }
#else //_LP64
#ifdef SPARC
    if (t->base() == Type::Long && OptoReg::is_reg(regnum)) {
      // For SPARC we have to swap high and low words for
      // long values stored in a single-register (g0-g7).
      array->append(new_loc_value( _regalloc,              regnum   , Location::normal ));
      array->append(new_loc_value( _regalloc, OptoReg::add(regnum,1), Location::normal ));
    } else
#endif //SPARC
    if( t->base() == Type::DoubleBot || t->base() == Type::DoubleCon || t->base() == Type::Long ) {
      // Repack the double/long as two jints.
      // The convention the interpreter uses is that the second local
      // holds the first raw word of the native double representation.
      // This is actually reasonable, since locals and stack arrays
      // grow downwards in all implementations.
      // (If, on some machine, the interpreter's Java locals or stack
      // were to grow upwards, the embedded doubles would be word-swapped.)
      array->append(new_loc_value( _regalloc, OptoReg::add(regnum,1), Location::normal ));
      array->append(new_loc_value( _regalloc,              regnum   , Location::normal ));
    }
#endif //_LP64
    else if( (t->base() == Type::FloatBot || t->base() == Type::FloatCon) &&
               OptoReg::is_reg(regnum) ) {
      array->append(new_loc_value( _regalloc, regnum, Matcher::float_in_double()
                                   ? Location::float_in_dbl : Location::normal ));
    } else if( t->base() == Type::Int && OptoReg::is_reg(regnum) ) {
      array->append(new_loc_value( _regalloc, regnum, Matcher::int_in_long
                                   ? Location::int_in_long : Location::normal ));
    } else if( t->base() == Type::NarrowOop ) {
      array->append(new_loc_value( _regalloc, regnum, Location::narrowoop ));
    } else {
      array->append(new_loc_value( _regalloc, regnum, _regalloc->is_oop(local) ? Location::oop : Location::normal ));
    }
    return;
  }

  // No register.  It must be constant data.
  switch (t->base()) {
  case Type::Half:              // Second half of a double
    ShouldNotReachHere();       // Caller should skip 2nd halves
    break;
  case Type::AnyPtr:
    array->append(new ConstantOopWriteValue(NULL));
    break;
  case Type::AryPtr:
  case Type::InstPtr:          // fall through
    array->append(new ConstantOopWriteValue(t->isa_oopptr()->const_oop()->constant_encoding()));
    break;
  case Type::NarrowOop:
    if (t == TypeNarrowOop::NULL_PTR) {
      array->append(new ConstantOopWriteValue(NULL));
    } else {
      array->append(new ConstantOopWriteValue(t->make_ptr()->isa_oopptr()->const_oop()->constant_encoding()));
    }
    break;
  case Type::Int:
    array->append(new ConstantIntValue(t->is_int()->get_con()));
    break;
  case Type::RawPtr:
    // A return address (T_ADDRESS).
    assert((intptr_t)t->is_ptr()->get_con() < (intptr_t)0x10000, "must be a valid BCI");
#ifdef _LP64
    // Must be restored to the full-width 64-bit stack slot.
    array->append(new ConstantLongValue(t->is_ptr()->get_con()));
#else
    array->append(new ConstantIntValue(t->is_ptr()->get_con()));
#endif
    break;
  case Type::FloatCon: {
    float f = t->is_float_constant()->getf();
    array->append(new ConstantIntValue(jint_cast(f)));
    break;
  }
  case Type::DoubleCon: {
    jdouble d = t->is_double_constant()->getd();
#ifdef _LP64
    array->append(new ConstantIntValue(0));
    array->append(new ConstantDoubleValue(d));
#else
    // Repack the double as two jints.
    // The convention the interpreter uses is that the second local
    // holds the first raw word of the native double representation.
    // This is actually reasonable, since locals and stack arrays
    // grow downwards in all implementations.
    // (If, on some machine, the interpreter's Java locals or stack
    // were to grow upwards, the embedded doubles would be word-swapped.)
    jlong_accessor acc;
    acc.long_value = jlong_cast(d);
    array->append(new ConstantIntValue(acc.words[1]));
    array->append(new ConstantIntValue(acc.words[0]));
#endif
    break;
  }
  case Type::Long: {
    jlong d = t->is_long()->get_con();
#ifdef _LP64
    array->append(new ConstantIntValue(0));
    array->append(new ConstantLongValue(d));
#else
    // Repack the long as two jints.
    // The convention the interpreter uses is that the second local
    // holds the first raw word of the native double representation.
    // This is actually reasonable, since locals and stack arrays
    // grow downwards in all implementations.
    // (If, on some machine, the interpreter's Java locals or stack
    // were to grow upwards, the embedded doubles would be word-swapped.)
    jlong_accessor acc;
    acc.long_value = d;
    array->append(new ConstantIntValue(acc.words[1]));
    array->append(new ConstantIntValue(acc.words[0]));
#endif
    break;
  }
  case Type::Top:               // Add an illegal value here
    array->append(new LocationValue(Location()));
    break;
  default:
    ShouldNotReachHere();
    break;
  }
}

// Determine if this node starts a bundle
bool Compile::starts_bundle(const Node *n) const {
  return (_node_bundling_limit > n->_idx &&
          _node_bundling_base[n->_idx].starts_bundle());
}

//--------------------------Process_OopMap_Node--------------------------------
void Compile::Process_OopMap_Node(MachNode *mach, int current_offset) {

  // Handle special safepoint nodes for synchronization
  MachSafePointNode *sfn   = mach->as_MachSafePoint();
  MachCallNode      *mcall;

#ifdef ENABLE_ZAP_DEAD_LOCALS
  assert( is_node_getting_a_safepoint(mach),  "logic does not match; false negative");
#endif

  int safepoint_pc_offset = current_offset;
  bool is_method_handle_invoke = false;
  bool return_oop = false;

  // Add the safepoint in the DebugInfoRecorder
  if( !mach->is_MachCall() ) {
    mcall = NULL;
    debug_info()->add_safepoint(safepoint_pc_offset, sfn->_oop_map);
  } else {
    mcall = mach->as_MachCall();

    // Is the call a MethodHandle call?
    if (mcall->is_MachCallJava()) {
      if (mcall->as_MachCallJava()->_method_handle_invoke) {
        assert(has_method_handle_invokes(), "must have been set during call generation");
        is_method_handle_invoke = true;
      }
    }

    // Check if a call returns an object.
    if (mcall->returns_pointer()) {
      return_oop = true;
    }
    safepoint_pc_offset += mcall->ret_addr_offset();
    debug_info()->add_safepoint(safepoint_pc_offset, mcall->_oop_map);
  }

  // Loop over the JVMState list to add scope information
  // Do not skip safepoints with a NULL method, they need monitor info
  JVMState* youngest_jvms = sfn->jvms();
  int max_depth = youngest_jvms->depth();

  // Allocate the object pool for scalar-replaced objects -- the map from
  // small-integer keys (which can be recorded in the local and ostack
  // arrays) to descriptions of the object state.
  GrowableArray<ScopeValue*> *objs = new GrowableArray<ScopeValue*>();

  // Visit scopes from oldest to youngest.
  for (int depth = 1; depth <= max_depth; depth++) {
    JVMState* jvms = youngest_jvms->of_depth(depth);
    int idx;
    ciMethod* method = jvms->has_method() ? jvms->method() : NULL;
    // Safepoints that do not have method() set only provide oop-map and monitor info
    // to support GC; these do not support deoptimization.
    int num_locs = (method == NULL) ? 0 : jvms->loc_size();
    int num_exps = (method == NULL) ? 0 : jvms->stk_size();
    int num_mon  = jvms->nof_monitors();
    assert(method == NULL || jvms->bci() < 0 || num_locs == method->max_locals(),
           "JVMS local count must match that of the method");

    // Add Local and Expression Stack Information

    // Insert locals into the locarray
    GrowableArray<ScopeValue*> *locarray = new GrowableArray<ScopeValue*>(num_locs);
    for( idx = 0; idx < num_locs; idx++ ) {
      FillLocArray( idx, sfn, sfn->local(jvms, idx), locarray, objs );
    }

    // Insert expression stack entries into the exparray
    GrowableArray<ScopeValue*> *exparray = new GrowableArray<ScopeValue*>(num_exps);
    for( idx = 0; idx < num_exps; idx++ ) {
      FillLocArray( idx,  sfn, sfn->stack(jvms, idx), exparray, objs );
    }

    // Add in mappings of the monitors
    assert( !method ||
            !method->is_synchronized() ||
            method->is_native() ||
            num_mon > 0 ||
            !GenerateSynchronizationCode,
            "monitors must always exist for synchronized methods");

    // Build the growable array of ScopeValues for exp stack
    GrowableArray<MonitorValue*> *monarray = new GrowableArray<MonitorValue*>(num_mon);

    // Loop over monitors and insert into array
    for (idx = 0; idx < num_mon; idx++) {
      // Grab the node that defines this monitor
      Node* box_node = sfn->monitor_box(jvms, idx);
      Node* obj_node = sfn->monitor_obj(jvms, idx);

      // Create ScopeValue for object
      ScopeValue *scval = NULL;

      if (obj_node->is_SafePointScalarObject()) {
        SafePointScalarObjectNode* spobj = obj_node->as_SafePointScalarObject();
        scval = Compile::sv_for_node_id(objs, spobj->_idx);
        if (scval == NULL) {
          const Type *t = spobj->bottom_type();
          ciKlass* cik = t->is_oopptr()->klass();
          assert(cik->is_instance_klass() ||
                 cik->is_array_klass(), "Not supported allocation.");
          ObjectValue* sv = new ObjectValue(spobj->_idx,
                                            new ConstantOopWriteValue(cik->java_mirror()->constant_encoding()));
          Compile::set_sv_for_object_node(objs, sv);

          uint first_ind = spobj->first_index(youngest_jvms);
          for (uint i = 0; i < spobj->n_fields(); i++) {
            Node* fld_node = sfn->in(first_ind+i);
            (void)FillLocArray(sv->field_values()->length(), sfn, fld_node, sv->field_values(), objs);
          }
          scval = sv;
        }
      } else if (!obj_node->is_Con()) {
        OptoReg::Name obj_reg = _regalloc->get_reg_first(obj_node);
        if( obj_node->bottom_type()->base() == Type::NarrowOop ) {
          scval = new_loc_value( _regalloc, obj_reg, Location::narrowoop );
        } else {
          scval = new_loc_value( _regalloc, obj_reg, Location::oop );
        }
      } else {
        const TypePtr *tp = obj_node->get_ptr_type();
        scval = new ConstantOopWriteValue(tp->is_oopptr()->const_oop()->constant_encoding());
      }

      OptoReg::Name box_reg = BoxLockNode::reg(box_node);
      Location basic_lock = Location::new_stk_loc(Location::normal,_regalloc->reg2offset(box_reg));
      bool eliminated = (box_node->is_BoxLock() && box_node->as_BoxLock()->is_eliminated());
      monarray->append(new MonitorValue(scval, basic_lock, eliminated));
    }

    // We dump the object pool first, since deoptimization reads it in first.
    debug_info()->dump_object_pool(objs);

    // Build first class objects to pass to scope
    DebugToken *locvals = debug_info()->create_scope_values(locarray);
    DebugToken *expvals = debug_info()->create_scope_values(exparray);
    DebugToken *monvals = debug_info()->create_monitor_values(monarray);

    // Make method available for all Safepoints
    ciMethod* scope_method = method ? method : _method;
    // Describe the scope here
    assert(jvms->bci() >= InvocationEntryBci && jvms->bci() <= 0x10000, "must be a valid or entry BCI");
    assert(!jvms->should_reexecute() || depth == max_depth, "reexecute allowed only for the youngest");
    // Now we can describe the scope.
    debug_info()->describe_scope(safepoint_pc_offset, scope_method, jvms->bci(), jvms->should_reexecute(), is_method_handle_invoke, return_oop, locvals, expvals, monvals);
  } // End jvms loop

  // Mark the end of the scope set.
  debug_info()->end_safepoint(safepoint_pc_offset);
}



// A simplified version of Process_OopMap_Node, to handle non-safepoints.
class NonSafepointEmitter {
  Compile*  C;
  JVMState* _pending_jvms;
  int       _pending_offset;

  void emit_non_safepoint();

 public:
  NonSafepointEmitter(Compile* compile) {
    this->C = compile;
    _pending_jvms = NULL;
    _pending_offset = 0;
  }

  void observe_instruction(Node* n, int pc_offset) {
    if (!C->debug_info()->recording_non_safepoints())  return;

    Node_Notes* nn = C->node_notes_at(n->_idx);
    if (nn == NULL || nn->jvms() == NULL)  return;
    if (_pending_jvms != NULL &&
        _pending_jvms->same_calls_as(nn->jvms())) {
      // Repeated JVMS?  Stretch it up here.
      _pending_offset = pc_offset;
    } else {
      if (_pending_jvms != NULL &&
          _pending_offset < pc_offset) {
        emit_non_safepoint();
      }
      _pending_jvms = NULL;
      if (pc_offset > C->debug_info()->last_pc_offset()) {
        // This is the only way _pending_jvms can become non-NULL:
        _pending_jvms = nn->jvms();
        _pending_offset = pc_offset;
      }
    }
  }

  // Stay out of the way of real safepoints:
  void observe_safepoint(JVMState* jvms, int pc_offset) {
    if (_pending_jvms != NULL &&
        !_pending_jvms->same_calls_as(jvms) &&
        _pending_offset < pc_offset) {
      emit_non_safepoint();
    }
    _pending_jvms = NULL;
  }

  void flush_at_end() {
    if (_pending_jvms != NULL) {
      emit_non_safepoint();
    }
    _pending_jvms = NULL;
  }
};

void NonSafepointEmitter::emit_non_safepoint() {
  JVMState* youngest_jvms = _pending_jvms;
  int       pc_offset     = _pending_offset;

  // Clear it now:
  _pending_jvms = NULL;

  DebugInformationRecorder* debug_info = C->debug_info();
  assert(debug_info->recording_non_safepoints(), "sanity");

  debug_info->add_non_safepoint(pc_offset);
  int max_depth = youngest_jvms->depth();

  // Visit scopes from oldest to youngest.
  for (int depth = 1; depth <= max_depth; depth++) {
    JVMState* jvms = youngest_jvms->of_depth(depth);
    ciMethod* method = jvms->has_method() ? jvms->method() : NULL;
    assert(!jvms->should_reexecute() || depth==max_depth, "reexecute allowed only for the youngest");
    debug_info->describe_scope(pc_offset, method, jvms->bci(), jvms->should_reexecute());
  }

  // Mark the end of the scope set.
  debug_info->end_non_safepoint(pc_offset);
}

//------------------------------init_buffer------------------------------------
CodeBuffer* Compile::init_buffer(uint* blk_starts) {

  // Set the initially allocated size
  int  code_req   = initial_code_capacity;
  int  locs_req   = initial_locs_capacity;
  int  stub_req   = TraceJumps ? initial_stub_capacity * 10 : initial_stub_capacity;
  int  const_req  = initial_const_capacity;

  int  pad_req    = NativeCall::instruction_size;
  // The extra spacing after the code is necessary on some platforms.
  // Sometimes we need to patch in a jump after the last instruction,
  // if the nmethod has been deoptimized.  (See 4932387, 4894843.)

  // Compute the byte offset where we can store the deopt pc.
  if (fixed_slots() != 0) {
    _orig_pc_slot_offset_in_bytes = _regalloc->reg2offset(OptoReg::stack2reg(_orig_pc_slot));
  }

  // Compute prolog code size
  _method_size = 0;
  _frame_slots = OptoReg::reg2stack(_matcher->_old_SP)+_regalloc->_framesize;
#if defined(IA64) && !defined(AIX)
  if (save_argument_registers()) {
    // 4815101: this is a stub with implicit and unknown precision fp args.
    // The usual spill mechanism can only generate stfd's in this case, which
    // doesn't work if the fp reg to spill contains a single-precision denorm.
    // Instead, we hack around the normal spill mechanism using stfspill's and
    // ldffill's in the MachProlog and MachEpilog emit methods.  We allocate
    // space here for the fp arg regs (f8-f15) we're going to thusly spill.
    //
    // If we ever implement 16-byte 'registers' == stack slots, we can
    // get rid of this hack and have SpillCopy generate stfspill/ldffill
    // instead of stfd/stfs/ldfd/ldfs.
    _frame_slots += 8*(16/BytesPerInt);
  }
#endif
  assert(_frame_slots >= 0 && _frame_slots < 1000000, "sanity check");

  if (has_mach_constant_base_node()) {
    uint add_size = 0;
    // Fill the constant table.
    // Note:  This must happen before shorten_branches.
    for (uint i = 0; i < _cfg->number_of_blocks(); i++) {
      Block* b = _cfg->get_block(i);

      for (uint j = 0; j < b->number_of_nodes(); j++) {
        Node* n = b->get_node(j);

        // If the node is a MachConstantNode evaluate the constant
        // value section.
        if (n->is_MachConstant()) {
          MachConstantNode* machcon = n->as_MachConstant();
          machcon->eval_constant(C);
        } else if (n->is_Mach()) {
          // On Power there are more nodes that issue constants.
          add_size += (n->as_Mach()->ins_num_consts() * 8);
        }
      }
    }

    // Calculate the offsets of the constants and the size of the
    // constant table (including the padding to the next section).
    constant_table().calculate_offsets_and_size();
    const_req = constant_table().size() + add_size;
  }

  // Initialize the space for the BufferBlob used to find and verify
  // instruction size in MachNode::emit_size()
  init_scratch_buffer_blob(const_req);
  if (failing())  return NULL; // Out of memory

  // Pre-compute the length of blocks and replace
  // long branches with short if machine supports it.
  shorten_branches(blk_starts, code_req, locs_req, stub_req);

  // nmethod and CodeBuffer count stubs & constants as part of method's code.
  // class HandlerImpl is platform-specific and defined in the *.ad files.
  int exception_handler_req = HandlerImpl::size_exception_handler() + MAX_stubs_size; // add marginal slop for handler
  int deopt_handler_req     = HandlerImpl::size_deopt_handler()     + MAX_stubs_size; // add marginal slop for handler
  stub_req += MAX_stubs_size;   // ensure per-stub margin
  code_req += MAX_inst_size;    // ensure per-instruction margin

  if (StressCodeBuffers)
    code_req = const_req = stub_req = exception_handler_req = deopt_handler_req = 0x10;  // force expansion

  int total_req =
    const_req +
    code_req +
    pad_req +
    stub_req +
    exception_handler_req +
    deopt_handler_req;               // deopt handler

  if (has_method_handle_invokes())
    total_req += deopt_handler_req;  // deopt MH handler

  CodeBuffer* cb = code_buffer();
  cb->initialize(total_req, locs_req);

  // Have we run out of code space?
  if ((cb->blob() == NULL) || (!CompileBroker::should_compile_new_jobs())) {
    C->record_failure("CodeCache is full");
    return NULL;
  }
  // Configure the code buffer.
  cb->initialize_consts_size(const_req);
  cb->initialize_stubs_size(stub_req);
  cb->initialize_oop_recorder(env()->oop_recorder());

  // fill in the nop array for bundling computations
  MachNode *_nop_list[Bundle::_nop_count];
  Bundle::initialize_nops(_nop_list, this);

  return cb;
}

//------------------------------fill_buffer------------------------------------
void Compile::fill_buffer(CodeBuffer* cb, uint* blk_starts) {
  // blk_starts[] contains offsets calculated during short branches processing,
  // offsets should not be increased during following steps.

  // Compute the size of first NumberOfLoopInstrToAlign instructions at head
  // of a loop. It is used to determine the padding for loop alignment.
  compute_loop_first_inst_sizes();

  // Create oopmap set.
  _oop_map_set = new OopMapSet();

  // !!!!! This preserves old handling of oopmaps for now
  debug_info()->set_oopmaps(_oop_map_set);

  uint nblocks  = _cfg->number_of_blocks();
  // Count and start of implicit null check instructions
  uint inct_cnt = 0;
  uint *inct_starts = NEW_RESOURCE_ARRAY(uint, nblocks+1);

  // Count and start of calls
  uint *call_returns = NEW_RESOURCE_ARRAY(uint, nblocks+1);

  uint  return_offset = 0;
  int nop_size = (new (this) MachNopNode())->size(_regalloc);

  int previous_offset = 0;
  int current_offset  = 0;
  int last_call_offset = -1;
  int last_avoid_back_to_back_offset = -1;
#ifdef ASSERT
  uint* jmp_target = NEW_RESOURCE_ARRAY(uint,nblocks);
  uint* jmp_offset = NEW_RESOURCE_ARRAY(uint,nblocks);
  uint* jmp_size   = NEW_RESOURCE_ARRAY(uint,nblocks);
  uint* jmp_rule   = NEW_RESOURCE_ARRAY(uint,nblocks);
#endif

  // Create an array of unused labels, one for each basic block, if printing is enabled
#ifndef PRODUCT
  int *node_offsets      = NULL;
  uint node_offset_limit = unique();

  if (print_assembly())
    node_offsets         = NEW_RESOURCE_ARRAY(int, node_offset_limit);
#endif

  NonSafepointEmitter non_safepoints(this);  // emit non-safepoints lazily

  // Emit the constant table.
  if (has_mach_constant_base_node()) {
    constant_table().emit(*cb);
  }

  // Create an array of labels, one for each basic block
  Label *blk_labels = NEW_RESOURCE_ARRAY(Label, nblocks+1);
  for (uint i=0; i <= nblocks; i++) {
    blk_labels[i].init();
  }

  // ------------------
  // Now fill in the code buffer
  Node *delay_slot = NULL;

  for (uint i = 0; i < nblocks; i++) {
    Block* block = _cfg->get_block(i);
    Node* head = block->head();

    // If this block needs to start aligned (i.e, can be reached other
    // than by falling-thru from the previous block), then force the
    // start of a new bundle.
    if (Pipeline::requires_bundling() && starts_bundle(head)) {
      cb->flush_bundle(true);
    }

#ifdef ASSERT
    if (!block->is_connector()) {
      stringStream st;
      block->dump_head(_cfg, &st);
      MacroAssembler(cb).block_comment(st.as_string());
    }
    jmp_target[i] = 0;
    jmp_offset[i] = 0;
    jmp_size[i]   = 0;
    jmp_rule[i]   = 0;
#endif
    int blk_offset = current_offset;

    // Define the label at the beginning of the basic block
    MacroAssembler(cb).bind(blk_labels[block->_pre_order]);

    uint last_inst = block->number_of_nodes();

    // Emit block normally, except for last instruction.
    // Emit means "dump code bits into code buffer".
    for (uint j = 0; j<last_inst; j++) {

      // Get the node
      Node* n = block->get_node(j);

      // See if delay slots are supported
      if (valid_bundle_info(n) &&
          node_bundling(n)->used_in_unconditional_delay()) {
        assert(delay_slot == NULL, "no use of delay slot node");
        assert(n->size(_regalloc) == Pipeline::instr_unit_size(), "delay slot instruction wrong size");

        delay_slot = n;
        continue;
      }

      // If this starts a new instruction group, then flush the current one
      // (but allow split bundles)
      if (Pipeline::requires_bundling() && starts_bundle(n))
        cb->flush_bundle(false);

      // The following logic is duplicated in the code ifdeffed for
      // ENABLE_ZAP_DEAD_LOCALS which appears above in this file.  It
      // should be factored out.  Or maybe dispersed to the nodes?

      // Special handling for SafePoint/Call Nodes
      bool is_mcall = false;
      if (n->is_Mach()) {
        MachNode *mach = n->as_Mach();
        is_mcall = n->is_MachCall();
        bool is_sfn = n->is_MachSafePoint();

        // If this requires all previous instructions be flushed, then do so
        if (is_sfn || is_mcall || mach->alignment_required() != 1) {
          cb->flush_bundle(true);
          current_offset = cb->insts_size();
        }

        // A padding may be needed again since a previous instruction
        // could be moved to delay slot.

        // align the instruction if necessary
        int padding = mach->compute_padding(current_offset);
        // Make sure safepoint node for polling is distinct from a call's
        // return by adding a nop if needed.
        if (is_sfn && !is_mcall && padding == 0 && current_offset == last_call_offset) {
          padding = nop_size;
        }
        if (padding == 0 && mach->avoid_back_to_back(MachNode::AVOID_BEFORE) &&
            current_offset == last_avoid_back_to_back_offset) {
          // Avoid back to back some instructions.
          padding = nop_size;
        }

        if(padding > 0) {
          assert((padding % nop_size) == 0, "padding is not a multiple of NOP size");
          int nops_cnt = padding / nop_size;
          MachNode *nop = new (this) MachNopNode(nops_cnt);
          block->insert_node(nop, j++);
          last_inst++;
          _cfg->map_node_to_block(nop, block);
          nop->emit(*cb, _regalloc);
          cb->flush_bundle(true);
          current_offset = cb->insts_size();
        }

        // Remember the start of the last call in a basic block
        if (is_mcall) {
          MachCallNode *mcall = mach->as_MachCall();

          // This destination address is NOT PC-relative
          mcall->method_set((intptr_t)mcall->entry_point());

          // Save the return address
          call_returns[block->_pre_order] = current_offset + mcall->ret_addr_offset();

          if (mcall->is_MachCallLeaf()) {
            is_mcall = false;
            is_sfn = false;
          }
        }

        // sfn will be valid whenever mcall is valid now because of inheritance
        if (is_sfn || is_mcall) {

          // Handle special safepoint nodes for synchronization
          if (!is_mcall) {
            MachSafePointNode *sfn = mach->as_MachSafePoint();
            // !!!!! Stubs only need an oopmap right now, so bail out
            if (sfn->jvms()->method() == NULL) {
              // Write the oopmap directly to the code blob??!!
#             ifdef ENABLE_ZAP_DEAD_LOCALS
              assert( !is_node_getting_a_safepoint(sfn),  "logic does not match; false positive");
#             endif
              continue;
            }
          } // End synchronization

          non_safepoints.observe_safepoint(mach->as_MachSafePoint()->jvms(),
                                           current_offset);
          Process_OopMap_Node(mach, current_offset);
        } // End if safepoint

        // If this is a null check, then add the start of the previous instruction to the list
        else if( mach->is_MachNullCheck() ) {
          inct_starts[inct_cnt++] = previous_offset;
        }

        // If this is a branch, then fill in the label with the target BB's label
        else if (mach->is_MachBranch()) {
          // This requires the TRUE branch target be in succs[0]
          uint block_num = block->non_connector_successor(0)->_pre_order;

          // Try to replace long branch if delay slot is not used,
          // it is mostly for back branches since forward branch's
          // distance is not updated yet.
          bool delay_slot_is_used = valid_bundle_info(n) &&
                                    node_bundling(n)->use_unconditional_delay();
          if (!delay_slot_is_used && mach->may_be_short_branch()) {
           assert(delay_slot == NULL, "not expecting delay slot node");
           int br_size = n->size(_regalloc);
            int offset = blk_starts[block_num] - current_offset;
            if (block_num >= i) {
              // Current and following block's offset are not
              // finalized yet, adjust distance by the difference
              // between calculated and final offsets of current block.
              offset -= (blk_starts[i] - blk_offset);
            }
            // In the following code a nop could be inserted before
            // the branch which will increase the backward distance.
            bool needs_padding = (current_offset == last_avoid_back_to_back_offset);
            if (needs_padding && offset <= 0)
              offset -= nop_size;

            if (_matcher->is_short_branch_offset(mach->rule(), br_size, offset)) {
              // We've got a winner.  Replace this branch.
              MachNode* replacement = mach->as_MachBranch()->short_branch_version(this);

              // Update the jmp_size.
              int new_size = replacement->size(_regalloc);
              assert((br_size - new_size) >= (int)nop_size, "short_branch size should be smaller");
              // Insert padding between avoid_back_to_back branches.
              if (needs_padding && replacement->avoid_back_to_back(MachNode::AVOID_BEFORE)) {
                MachNode *nop = new (this) MachNopNode();
                block->insert_node(nop, j++);
                _cfg->map_node_to_block(nop, block);
                last_inst++;
                nop->emit(*cb, _regalloc);
                cb->flush_bundle(true);
                current_offset = cb->insts_size();
              }
#ifdef ASSERT
              jmp_target[i] = block_num;
              jmp_offset[i] = current_offset - blk_offset;
              jmp_size[i]   = new_size;
              jmp_rule[i]   = mach->rule();
#endif
              block->map_node(replacement, j);
              mach->subsume_by(replacement, C);
              n    = replacement;
              mach = replacement;
            }
          }
          mach->as_MachBranch()->label_set( &blk_labels[block_num], block_num );
        } else if (mach->ideal_Opcode() == Op_Jump) {
          for (uint h = 0; h < block->_num_succs; h++) {
            Block* succs_block = block->_succs[h];
            for (uint j = 1; j < succs_block->num_preds(); j++) {
              Node* jpn = succs_block->pred(j);
              if (jpn->is_JumpProj() && jpn->in(0) == mach) {
                uint block_num = succs_block->non_connector()->_pre_order;
                Label *blkLabel = &blk_labels[block_num];
                mach->add_case_label(jpn->as_JumpProj()->proj_no(), blkLabel);
              }
            }
          }
        }
#ifdef ASSERT
        // Check that oop-store precedes the card-mark
        else if (mach->ideal_Opcode() == Op_StoreCM) {
          uint storeCM_idx = j;
          int count = 0;
          for (uint prec = mach->req(); prec < mach->len(); prec++) {
            Node *oop_store = mach->in(prec);  // Precedence edge
            if (oop_store == NULL) continue;
            count++;
            uint i4;
            for (i4 = 0; i4 < last_inst; ++i4) {
              if (block->get_node(i4) == oop_store) {
                break;
              }
            }
            // Note: This test can provide a false failure if other precedence
            // edges have been added to the storeCMNode.
            assert(i4 == last_inst || i4 < storeCM_idx, "CM card-mark executes before oop-store");
          }
          assert(count > 0, "storeCM expects at least one precedence edge");
        }
#endif
        else if (!n->is_Proj()) {
          // Remember the beginning of the previous instruction, in case
          // it's followed by a flag-kill and a null-check.  Happens on
          // Intel all the time, with add-to-memory kind of opcodes.
          previous_offset = current_offset;
        }

        // Not an else-if!
        // If this is a trap based cmp then add its offset to the list.
        if (mach->is_TrapBasedCheckNode()) {
          inct_starts[inct_cnt++] = current_offset;
        }
      }

      // Verify that there is sufficient space remaining
      cb->insts()->maybe_expand_to_ensure_remaining(MAX_inst_size);
      if ((cb->blob() == NULL) || (!CompileBroker::should_compile_new_jobs())) {
        C->record_failure("CodeCache is full");
        return;
      }

      // Save the offset for the listing
#ifndef PRODUCT
      if (node_offsets && n->_idx < node_offset_limit)
        node_offsets[n->_idx] = cb->insts_size();
#endif

      // "Normal" instruction case
      DEBUG_ONLY( uint instr_offset = cb->insts_size(); )
      n->emit(*cb, _regalloc);
      current_offset  = cb->insts_size();

      // Above we only verified that there is enough space in the instruction section.
      // However, the instruction may emit stubs that cause code buffer expansion.
      // Bail out here if expansion failed due to a lack of code cache space.
      if (failing()) {
        return;
      }

#ifdef ASSERT
      uint n_size = n->size(_regalloc);
      if (n_size < (current_offset-instr_offset)) {
        MachNode* mach = n->as_Mach();
        n->dump();
        mach->dump_format(_regalloc, tty);
        tty->print_cr(" n_size (%d), current_offset (%d), instr_offset (%d)", n_size, current_offset, instr_offset);
        Disassembler::decode(cb->insts_begin() + instr_offset, cb->insts_begin() + current_offset + 1, tty);
        tty->print_cr(" ------------------- ");
        BufferBlob* blob = this->scratch_buffer_blob();
        address blob_begin = blob->content_begin();
        Disassembler::decode(blob_begin, blob_begin + n_size + 1, tty);
        assert(false, "wrong size of mach node");
      }
#endif
      non_safepoints.observe_instruction(n, current_offset);

      // mcall is last "call" that can be a safepoint
      // record it so we can see if a poll will directly follow it
      // in which case we'll need a pad to make the PcDesc sites unique
      // see  5010568. This can be slightly inaccurate but conservative
      // in the case that return address is not actually at current_offset.
      // This is a small price to pay.

      if (is_mcall) {
        last_call_offset = current_offset;
      }

      if (n->is_Mach() && n->as_Mach()->avoid_back_to_back(MachNode::AVOID_AFTER)) {
        // Avoid back to back some instructions.
        last_avoid_back_to_back_offset = current_offset;
      }

      // See if this instruction has a delay slot
      if (valid_bundle_info(n) && node_bundling(n)->use_unconditional_delay()) {
        assert(delay_slot != NULL, "expecting delay slot node");

        // Back up 1 instruction
        cb->set_insts_end(cb->insts_end() - Pipeline::instr_unit_size());

        // Save the offset for the listing
#ifndef PRODUCT
        if (node_offsets && delay_slot->_idx < node_offset_limit)
          node_offsets[delay_slot->_idx] = cb->insts_size();
#endif

        // Support a SafePoint in the delay slot
        if (delay_slot->is_MachSafePoint()) {
          MachNode *mach = delay_slot->as_Mach();
          // !!!!! Stubs only need an oopmap right now, so bail out
          if (!mach->is_MachCall() && mach->as_MachSafePoint()->jvms()->method() == NULL) {
            // Write the oopmap directly to the code blob??!!
#           ifdef ENABLE_ZAP_DEAD_LOCALS
            assert( !is_node_getting_a_safepoint(mach),  "logic does not match; false positive");
#           endif
            delay_slot = NULL;
            continue;
          }

          int adjusted_offset = current_offset - Pipeline::instr_unit_size();
          non_safepoints.observe_safepoint(mach->as_MachSafePoint()->jvms(),
                                           adjusted_offset);
          // Generate an OopMap entry
          Process_OopMap_Node(mach, adjusted_offset);
        }

        // Insert the delay slot instruction
        delay_slot->emit(*cb, _regalloc);

        // Don't reuse it
        delay_slot = NULL;
      }

    } // End for all instructions in block

    // If the next block is the top of a loop, pad this block out to align
    // the loop top a little. Helps prevent pipe stalls at loop back branches.
    if (i < nblocks-1) {
      Block *nb = _cfg->get_block(i + 1);
      int padding = nb->alignment_padding(current_offset);
      if( padding > 0 ) {
        MachNode *nop = new (this) MachNopNode(padding / nop_size);
        block->insert_node(nop, block->number_of_nodes());
        _cfg->map_node_to_block(nop, block);
        nop->emit(*cb, _regalloc);
        current_offset = cb->insts_size();
      }
    }
    // Verify that the distance for generated before forward
    // short branches is still valid.
    guarantee((int)(blk_starts[i+1] - blk_starts[i]) >= (current_offset - blk_offset), "shouldn't increase block size");

    // Save new block start offset
    blk_starts[i] = blk_offset;
  } // End of for all blocks
  blk_starts[nblocks] = current_offset;

  non_safepoints.flush_at_end();

  // Offset too large?
  if (failing())  return;

  // Define a pseudo-label at the end of the code
  MacroAssembler(cb).bind( blk_labels[nblocks] );

  // Compute the size of the first block
  _first_block_size = blk_labels[1].loc_pos() - blk_labels[0].loc_pos();

  assert(cb->insts_size() < 500000, "method is unreasonably large");

#ifdef ASSERT
  for (uint i = 0; i < nblocks; i++) { // For all blocks
    if (jmp_target[i] != 0) {
      int br_size = jmp_size[i];
      int offset = blk_starts[jmp_target[i]]-(blk_starts[i] + jmp_offset[i]);
      if (!_matcher->is_short_branch_offset(jmp_rule[i], br_size, offset)) {
        tty->print_cr("target (%d) - jmp_offset(%d) = offset (%d), jump_size(%d), jmp_block B%d, target_block B%d", blk_starts[jmp_target[i]], blk_starts[i] + jmp_offset[i], offset, br_size, i, jmp_target[i]);
        assert(false, "Displacement too large for short jmp");
      }
    }
  }
#endif

#ifndef PRODUCT
  // Information on the size of the method, without the extraneous code
  Scheduling::increment_method_size(cb->insts_size());
#endif

  // ------------------
  // Fill in exception table entries.
  FillExceptionTables(inct_cnt, call_returns, inct_starts, blk_labels);

  // Only java methods have exception handlers and deopt handlers
  // class HandlerImpl is platform-specific and defined in the *.ad files.
  if (_method) {
    // Emit the exception handler code.
    _code_offsets.set_value(CodeOffsets::Exceptions, HandlerImpl::emit_exception_handler(*cb));
    if (failing()) {
      return; // CodeBuffer::expand failed
    }
    // Emit the deopt handler code.
    _code_offsets.set_value(CodeOffsets::Deopt, HandlerImpl::emit_deopt_handler(*cb));

    // Emit the MethodHandle deopt handler code (if required).
    if (has_method_handle_invokes() && !failing()) {
      // We can use the same code as for the normal deopt handler, we
      // just need a different entry point address.
      _code_offsets.set_value(CodeOffsets::DeoptMH, HandlerImpl::emit_deopt_handler(*cb));
    }
  }

  // One last check for failed CodeBuffer::expand:
  if ((cb->blob() == NULL) || (!CompileBroker::should_compile_new_jobs())) {
    C->record_failure("CodeCache is full");
    return;
  }

#ifndef PRODUCT
  // Dump the assembly code, including basic-block numbers
  if (print_assembly()) {
    ttyLocker ttyl;  // keep the following output all in one block
    if (!VMThread::should_terminate()) {  // test this under the tty lock
      // This output goes directly to the tty, not the compiler log.
      // To enable tools to match it up with the compilation activity,
      // be sure to tag this tty output with the compile ID.
      if (xtty != NULL) {
        xtty->head("opto_assembly compile_id='%d'%s", compile_id(),
                   is_osr_compilation()    ? " compile_kind='osr'" :
                   "");
      }
      if (method() != NULL) {
        method()->print_metadata();
      } else if (stub_name() != NULL) {
        tty->print_cr("Generating RuntimeStub - %s", stub_name());
      }
      dump_asm(node_offsets, node_offset_limit);
      if (xtty != NULL) {
        xtty->tail("opto_assembly");
      }
    }
  }
#endif

}

void Compile::FillExceptionTables(uint cnt, uint *call_returns, uint *inct_starts, Label *blk_labels) {
  _inc_table.set_size(cnt);

  uint inct_cnt = 0;
  for (uint i = 0; i < _cfg->number_of_blocks(); i++) {
    Block* block = _cfg->get_block(i);
    Node *n = NULL;
    int j;

    // Find the branch; ignore trailing NOPs.
    for (j = block->number_of_nodes() - 1; j >= 0; j--) {
      n = block->get_node(j);
      if (!n->is_Mach() || n->as_Mach()->ideal_Opcode() != Op_Con) {
        break;
      }
    }

    // If we didn't find anything, continue
    if (j < 0) {
      continue;
    }

    // Compute ExceptionHandlerTable subtable entry and add it
    // (skip empty blocks)
    if (n->is_Catch()) {

      // Get the offset of the return from the call
      uint call_return = call_returns[block->_pre_order];
#ifdef ASSERT
      assert( call_return > 0, "no call seen for this basic block" );
      while (block->get_node(--j)->is_MachProj()) ;
      assert(block->get_node(j)->is_MachCall(), "CatchProj must follow call");
#endif
      // last instruction is a CatchNode, find it's CatchProjNodes
      int nof_succs = block->_num_succs;
      // allocate space
      GrowableArray<intptr_t> handler_bcis(nof_succs);
      GrowableArray<intptr_t> handler_pcos(nof_succs);
      // iterate through all successors
      for (int j = 0; j < nof_succs; j++) {
        Block* s = block->_succs[j];
        bool found_p = false;
        for (uint k = 1; k < s->num_preds(); k++) {
          Node* pk = s->pred(k);
          if (pk->is_CatchProj() && pk->in(0) == n) {
            const CatchProjNode* p = pk->as_CatchProj();
            found_p = true;
            // add the corresponding handler bci & pco information
            if (p->_con != CatchProjNode::fall_through_index) {
              // p leads to an exception handler (and is not fall through)
              assert(s == _cfg->get_block(s->_pre_order), "bad numbering");
              // no duplicates, please
              if (!handler_bcis.contains(p->handler_bci())) {
                uint block_num = s->non_connector()->_pre_order;
                handler_bcis.append(p->handler_bci());
                handler_pcos.append(blk_labels[block_num].loc_pos());
              }
            }
          }
        }
        assert(found_p, "no matching predecessor found");
        // Note:  Due to empty block removal, one block may have
        // several CatchProj inputs, from the same Catch.
      }

      // Set the offset of the return from the call
      _handler_table.add_subtable(call_return, &handler_bcis, NULL, &handler_pcos);
      continue;
    }

    // Handle implicit null exception table updates
    if (n->is_MachNullCheck()) {
      uint block_num = block->non_connector_successor(0)->_pre_order;
      _inc_table.append(inct_starts[inct_cnt++], blk_labels[block_num].loc_pos());
      continue;
    }
    // Handle implicit exception table updates: trap instructions.
    if (n->is_Mach() && n->as_Mach()->is_TrapBasedCheckNode()) {
      uint block_num = block->non_connector_successor(0)->_pre_order;
      _inc_table.append(inct_starts[inct_cnt++], blk_labels[block_num].loc_pos());
      continue;
    }
  } // End of for all blocks fill in exception table entries
}

// Static Variables
#ifndef PRODUCT
uint Scheduling::_total_nop_size = 0;
uint Scheduling::_total_method_size = 0;
uint Scheduling::_total_branches = 0;
uint Scheduling::_total_unconditional_delays = 0;
uint Scheduling::_total_instructions_per_bundle[Pipeline::_max_instrs_per_cycle+1];
#endif

// Initializer for class Scheduling

Scheduling::Scheduling(Arena *arena, Compile &compile)
  : _arena(arena),
    _cfg(compile.cfg()),
    _regalloc(compile.regalloc()),
    _reg_node(arena),
    _bundle_instr_count(0),
    _bundle_cycle_number(0),
    _scheduled(arena),
    _available(arena),
    _next_node(NULL),
    _bundle_use(0, 0, resource_count, &_bundle_use_elements[0]),
    _pinch_free_list(arena)
#ifndef PRODUCT
  , _branches(0)
  , _unconditional_delays(0)
#endif
{
  // Create a MachNopNode
  _nop = new (&compile) MachNopNode();

  // Now that the nops are in the array, save the count
  // (but allow entries for the nops)
  _node_bundling_limit = compile.unique();
  uint node_max = _regalloc->node_regs_max_index();

  compile.set_node_bundling_limit(_node_bundling_limit);

  // This one is persistent within the Compile class
  _node_bundling_base = NEW_ARENA_ARRAY(compile.comp_arena(), Bundle, node_max);

  // Allocate space for fixed-size arrays
  _node_latency    = NEW_ARENA_ARRAY(arena, unsigned short, node_max);
  _uses            = NEW_ARENA_ARRAY(arena, short,          node_max);
  _current_latency = NEW_ARENA_ARRAY(arena, unsigned short, node_max);

  // Clear the arrays
  memset(_node_bundling_base, 0, node_max * sizeof(Bundle));
  memset(_node_latency,       0, node_max * sizeof(unsigned short));
  memset(_uses,               0, node_max * sizeof(short));
  memset(_current_latency,    0, node_max * sizeof(unsigned short));

  // Clear the bundling information
  memcpy(_bundle_use_elements, Pipeline_Use::elaborated_elements, sizeof(Pipeline_Use::elaborated_elements));

  // Get the last node
  Block* block = _cfg->get_block(_cfg->number_of_blocks() - 1);

  _next_node = block->get_node(block->number_of_nodes() - 1);
}

#ifndef PRODUCT
// Scheduling destructor
Scheduling::~Scheduling() {
  _total_branches             += _branches;
  _total_unconditional_delays += _unconditional_delays;
}
#endif

// Step ahead "i" cycles
void Scheduling::step(uint i) {

  Bundle *bundle = node_bundling(_next_node);
  bundle->set_starts_bundle();

  // Update the bundle record, but leave the flags information alone
  if (_bundle_instr_count > 0) {
    bundle->set_instr_count(_bundle_instr_count);
    bundle->set_resources_used(_bundle_use.resourcesUsed());
  }

  // Update the state information
  _bundle_instr_count = 0;
  _bundle_cycle_number += i;
  _bundle_use.step(i);
}

void Scheduling::step_and_clear() {
  Bundle *bundle = node_bundling(_next_node);
  bundle->set_starts_bundle();

  // Update the bundle record
  if (_bundle_instr_count > 0) {
    bundle->set_instr_count(_bundle_instr_count);
    bundle->set_resources_used(_bundle_use.resourcesUsed());

    _bundle_cycle_number += 1;
  }

  // Clear the bundling information
  _bundle_instr_count = 0;
  _bundle_use.reset();

  memcpy(_bundle_use_elements,
    Pipeline_Use::elaborated_elements,
    sizeof(Pipeline_Use::elaborated_elements));
}

// Perform instruction scheduling and bundling over the sequence of
// instructions in backwards order.
void Compile::ScheduleAndBundle() {

  // Don't optimize this if it isn't a method
  if (!_method)
    return;

  // Don't optimize this if scheduling is disabled
  if (!do_scheduling())
    return;

  // Scheduling code works only with pairs (8 bytes) maximum.
  if (max_vector_size() > 8)
    return;

  NOT_PRODUCT( TracePhase t2("isched", &_t_instrSched, TimeCompiler); )

  // Create a data structure for all the scheduling information
  Scheduling scheduling(Thread::current()->resource_area(), *this);

  // Walk backwards over each basic block, computing the needed alignment
  // Walk over all the basic blocks
  scheduling.DoScheduling();
}

// Compute the latency of all the instructions.  This is fairly simple,
// because we already have a legal ordering.  Walk over the instructions
// from first to last, and compute the latency of the instruction based
// on the latency of the preceding instruction(s).
void Scheduling::ComputeLocalLatenciesForward(const Block *bb) {
#ifndef PRODUCT
  if (_cfg->C->trace_opto_output())
    tty->print("# -> ComputeLocalLatenciesForward\n");
#endif

  // Walk over all the schedulable instructions
  for( uint j=_bb_start; j < _bb_end; j++ ) {

    // This is a kludge, forcing all latency calculations to start at 1.
    // Used to allow latency 0 to force an instruction to the beginning
    // of the bb
    uint latency = 1;
    Node *use = bb->get_node(j);
    uint nlen = use->len();

    // Walk over all the inputs
    for ( uint k=0; k < nlen; k++ ) {
      Node *def = use->in(k);
      if (!def)
        continue;

      uint l = _node_latency[def->_idx] + use->latency(k);
      if (latency < l)
        latency = l;
    }

    _node_latency[use->_idx] = latency;

#ifndef PRODUCT
    if (_cfg->C->trace_opto_output()) {
      tty->print("# latency %4d: ", latency);
      use->dump();
    }
#endif
  }

#ifndef PRODUCT
  if (_cfg->C->trace_opto_output())
    tty->print("# <- ComputeLocalLatenciesForward\n");
#endif

} // end ComputeLocalLatenciesForward

// See if this node fits into the present instruction bundle
bool Scheduling::NodeFitsInBundle(Node *n) {
  uint n_idx = n->_idx;

  // If this is the unconditional delay instruction, then it fits
  if (n == _unconditional_delay_slot) {
#ifndef PRODUCT
    if (_cfg->C->trace_opto_output())
      tty->print("#     NodeFitsInBundle [%4d]: TRUE; is in unconditional delay slot\n", n->_idx);
#endif
    return (true);
  }

  // If the node cannot be scheduled this cycle, skip it
  if (_current_latency[n_idx] > _bundle_cycle_number) {
#ifndef PRODUCT
    if (_cfg->C->trace_opto_output())
      tty->print("#     NodeFitsInBundle [%4d]: FALSE; latency %4d > %d\n",
        n->_idx, _current_latency[n_idx], _bundle_cycle_number);
#endif
    return (false);
  }

  const Pipeline *node_pipeline = n->pipeline();

  uint instruction_count = node_pipeline->instructionCount();
  if (node_pipeline->mayHaveNoCode() && n->size(_regalloc) == 0)
    instruction_count = 0;
  else if (node_pipeline->hasBranchDelay() && !_unconditional_delay_slot)
    instruction_count++;

  if (_bundle_instr_count + instruction_count > Pipeline::_max_instrs_per_cycle) {
#ifndef PRODUCT
    if (_cfg->C->trace_opto_output())
      tty->print("#     NodeFitsInBundle [%4d]: FALSE; too many instructions: %d > %d\n",
        n->_idx, _bundle_instr_count + instruction_count, Pipeline::_max_instrs_per_cycle);
#endif
    return (false);
  }

  // Don't allow non-machine nodes to be handled this way
  if (!n->is_Mach() && instruction_count == 0)
    return (false);

  // See if there is any overlap
  uint delay = _bundle_use.full_latency(0, node_pipeline->resourceUse());

  if (delay > 0) {
#ifndef PRODUCT
    if (_cfg->C->trace_opto_output())
      tty->print("#     NodeFitsInBundle [%4d]: FALSE; functional units overlap\n", n_idx);
#endif
    return false;
  }

#ifndef PRODUCT
  if (_cfg->C->trace_opto_output())
    tty->print("#     NodeFitsInBundle [%4d]:  TRUE\n", n_idx);
#endif

  return true;
}

Node * Scheduling::ChooseNodeToBundle() {
  uint siz = _available.size();

  if (siz == 0) {

#ifndef PRODUCT
    if (_cfg->C->trace_opto_output())
      tty->print("#   ChooseNodeToBundle: NULL\n");
#endif
    return (NULL);
  }

  // Fast path, if only 1 instruction in the bundle
  if (siz == 1) {
#ifndef PRODUCT
    if (_cfg->C->trace_opto_output()) {
      tty->print("#   ChooseNodeToBundle (only 1): ");
      _available[0]->dump();
    }
#endif
    return (_available[0]);
  }

  // Don't bother, if the bundle is already full
  if (_bundle_instr_count < Pipeline::_max_instrs_per_cycle) {
    for ( uint i = 0; i < siz; i++ ) {
      Node *n = _available[i];

      // Skip projections, we'll handle them another way
      if (n->is_Proj())
        continue;

      // This presupposed that instructions are inserted into the
      // available list in a legality order; i.e. instructions that
      // must be inserted first are at the head of the list
      if (NodeFitsInBundle(n)) {
#ifndef PRODUCT
        if (_cfg->C->trace_opto_output()) {
          tty->print("#   ChooseNodeToBundle: ");
          n->dump();
        }
#endif
        return (n);
      }
    }
  }

  // Nothing fits in this bundle, choose the highest priority
#ifndef PRODUCT
  if (_cfg->C->trace_opto_output()) {
    tty->print("#   ChooseNodeToBundle: ");
    _available[0]->dump();
  }
#endif

  return _available[0];
}

void Scheduling::AddNodeToAvailableList(Node *n) {
  assert( !n->is_Proj(), "projections never directly made available" );
#ifndef PRODUCT
  if (_cfg->C->trace_opto_output()) {
    tty->print("#   AddNodeToAvailableList: ");
    n->dump();
  }
#endif

  int latency = _current_latency[n->_idx];

  // Insert in latency order (insertion sort)
  uint i;
  for ( i=0; i < _available.size(); i++ )
    if (_current_latency[_available[i]->_idx] > latency)
      break;

  // Special Check for compares following branches
  if( n->is_Mach() && _scheduled.size() > 0 ) {
    int op = n->as_Mach()->ideal_Opcode();
    Node *last = _scheduled[0];
    if( last->is_MachIf() && last->in(1) == n &&
        ( op == Op_CmpI ||
          op == Op_CmpU ||
          op == Op_CmpUL ||
          op == Op_CmpP ||
          op == Op_CmpF ||
          op == Op_CmpD ||
          op == Op_CmpL ) ) {

      // Recalculate position, moving to front of same latency
      for ( i=0 ; i < _available.size(); i++ )
        if (_current_latency[_available[i]->_idx] >= latency)
          break;
    }
  }

  // Insert the node in the available list
  _available.insert(i, n);

#ifndef PRODUCT
  if (_cfg->C->trace_opto_output())
    dump_available();
#endif
}

void Scheduling::DecrementUseCounts(Node *n, const Block *bb) {
  for ( uint i=0; i < n->len(); i++ ) {
    Node *def = n->in(i);
    if (!def) continue;
    if( def->is_Proj() )        // If this is a machine projection, then
      def = def->in(0);         // propagate usage thru to the base instruction

    if(_cfg->get_block_for_node(def) != bb) { // Ignore if not block-local
      continue;
    }

    // Compute the latency
    uint l = _bundle_cycle_number + n->latency(i);
    if (_current_latency[def->_idx] < l)
      _current_latency[def->_idx] = l;

    // If this does not have uses then schedule it
    if ((--_uses[def->_idx]) == 0)
      AddNodeToAvailableList(def);
  }
}

void Scheduling::AddNodeToBundle(Node *n, const Block *bb) {
#ifndef PRODUCT
  if (_cfg->C->trace_opto_output()) {
    tty->print("#   AddNodeToBundle: ");
    n->dump();
  }
#endif

  // Remove this from the available list
  uint i;
  for (i = 0; i < _available.size(); i++)
    if (_available[i] == n)
      break;
  assert(i < _available.size(), "entry in _available list not found");
  _available.remove(i);

  // See if this fits in the current bundle
  const Pipeline *node_pipeline = n->pipeline();
  const Pipeline_Use& node_usage = node_pipeline->resourceUse();

  // Check for instructions to be placed in the delay slot. We
  // do this before we actually schedule the current instruction,
  // because the delay slot follows the current instruction.
  if (Pipeline::_branch_has_delay_slot &&
      node_pipeline->hasBranchDelay() &&
      !_unconditional_delay_slot) {

    uint siz = _available.size();

    // Conditional branches can support an instruction that
    // is unconditionally executed and not dependent by the
    // branch, OR a conditionally executed instruction if
    // the branch is taken.  In practice, this means that
    // the first instruction at the branch target is
    // copied to the delay slot, and the branch goes to
    // the instruction after that at the branch target
    if ( n->is_MachBranch() ) {

      assert( !n->is_MachNullCheck(), "should not look for delay slot for Null Check" );
      assert( !n->is_Catch(),         "should not look for delay slot for Catch" );

#ifndef PRODUCT
      _branches++;
#endif

      // At least 1 instruction is on the available list
      // that is not dependent on the branch
      for (uint i = 0; i < siz; i++) {
        Node *d = _available[i];
        const Pipeline *avail_pipeline = d->pipeline();

        // Don't allow safepoints in the branch shadow, that will
        // cause a number of difficulties
        if ( avail_pipeline->instructionCount() == 1 &&
            !avail_pipeline->hasMultipleBundles() &&
            !avail_pipeline->hasBranchDelay() &&
            Pipeline::instr_has_unit_size() &&
            d->size(_regalloc) == Pipeline::instr_unit_size() &&
            NodeFitsInBundle(d) &&
            !node_bundling(d)->used_in_delay()) {

          if (d->is_Mach() && !d->is_MachSafePoint()) {
            // A node that fits in the delay slot was found, so we need to
            // set the appropriate bits in the bundle pipeline information so
            // that it correctly indicates resource usage.  Later, when we
            // attempt to add this instruction to the bundle, we will skip
            // setting the resource usage.
            _unconditional_delay_slot = d;
            node_bundling(n)->set_use_unconditional_delay();
            node_bundling(d)->set_used_in_unconditional_delay();
            _bundle_use.add_usage(avail_pipeline->resourceUse());
            _current_latency[d->_idx] = _bundle_cycle_number;
            _next_node = d;
            ++_bundle_instr_count;
#ifndef PRODUCT
            _unconditional_delays++;
#endif
            break;
          }
        }
      }
    }

    // No delay slot, add a nop to the usage
    if (!_unconditional_delay_slot) {
      // See if adding an instruction in the delay slot will overflow
      // the bundle.
      if (!NodeFitsInBundle(_nop)) {
#ifndef PRODUCT
        if (_cfg->C->trace_opto_output())
          tty->print("#  *** STEP(1 instruction for delay slot) ***\n");
#endif
        step(1);
      }

      _bundle_use.add_usage(_nop->pipeline()->resourceUse());
      _next_node = _nop;
      ++_bundle_instr_count;
    }

    // See if the instruction in the delay slot requires a
    // step of the bundles
    if (!NodeFitsInBundle(n)) {
#ifndef PRODUCT
        if (_cfg->C->trace_opto_output())
          tty->print("#  *** STEP(branch won't fit) ***\n");
#endif
        // Update the state information
        _bundle_instr_count = 0;
        _bundle_cycle_number += 1;
        _bundle_use.step(1);
    }
  }

  // Get the number of instructions
  uint instruction_count = node_pipeline->instructionCount();
  if (node_pipeline->mayHaveNoCode() && n->size(_regalloc) == 0)
    instruction_count = 0;

  // Compute the latency information
  uint delay = 0;

  if (instruction_count > 0 || !node_pipeline->mayHaveNoCode()) {
    int relative_latency = _current_latency[n->_idx] - _bundle_cycle_number;
    if (relative_latency < 0)
      relative_latency = 0;

    delay = _bundle_use.full_latency(relative_latency, node_usage);

    // Does not fit in this bundle, start a new one
    if (delay > 0) {
      step(delay);

#ifndef PRODUCT
      if (_cfg->C->trace_opto_output())
        tty->print("#  *** STEP(%d) ***\n", delay);
#endif
    }
  }

  // If this was placed in the delay slot, ignore it
  if (n != _unconditional_delay_slot) {

    if (delay == 0) {
      if (node_pipeline->hasMultipleBundles()) {
#ifndef PRODUCT
        if (_cfg->C->trace_opto_output())
          tty->print("#  *** STEP(multiple instructions) ***\n");
#endif
        step(1);
      }

      else if (instruction_count + _bundle_instr_count > Pipeline::_max_instrs_per_cycle) {
#ifndef PRODUCT
        if (_cfg->C->trace_opto_output())
          tty->print("#  *** STEP(%d >= %d instructions) ***\n",
            instruction_count + _bundle_instr_count,
            Pipeline::_max_instrs_per_cycle);
#endif
        step(1);
      }
    }

    if (node_pipeline->hasBranchDelay() && !_unconditional_delay_slot)
      _bundle_instr_count++;

    // Set the node's latency
    _current_latency[n->_idx] = _bundle_cycle_number;

    // Now merge the functional unit information
    if (instruction_count > 0 || !node_pipeline->mayHaveNoCode())
      _bundle_use.add_usage(node_usage);

    // Increment the number of instructions in this bundle
    _bundle_instr_count += instruction_count;

    // Remember this node for later
    if (n->is_Mach())
      _next_node = n;
  }

  // It's possible to have a BoxLock in the graph and in the _bbs mapping but
  // not in the bb->_nodes array.  This happens for debug-info-only BoxLocks.
  // 'Schedule' them (basically ignore in the schedule) but do not insert them
  // into the block.  All other scheduled nodes get put in the schedule here.
  int op = n->Opcode();
  if( (op == Op_Node && n->req() == 0) || // anti-dependence node OR
      (op != Op_Node &&         // Not an unused antidepedence node and
       // not an unallocated boxlock
       (OptoReg::is_valid(_regalloc->get_reg_first(n)) || op != Op_BoxLock)) ) {

    // Push any trailing projections
    if( bb->get_node(bb->number_of_nodes()-1) != n ) {
      for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
        Node *foi = n->fast_out(i);
        if( foi->is_Proj() )
          _scheduled.push(foi);
      }
    }

    // Put the instruction in the schedule list
    _scheduled.push(n);
  }

#ifndef PRODUCT
  if (_cfg->C->trace_opto_output())
    dump_available();
#endif

  // Walk all the definitions, decrementing use counts, and
  // if a definition has a 0 use count, place it in the available list.
  DecrementUseCounts(n,bb);
}

// This method sets the use count within a basic block.  We will ignore all
// uses outside the current basic block.  As we are doing a backwards walk,
// any node we reach that has a use count of 0 may be scheduled.  This also
// avoids the problem of cyclic references from phi nodes, as long as phi
// nodes are at the front of the basic block.  This method also initializes
// the available list to the set of instructions that have no uses within this
// basic block.
void Scheduling::ComputeUseCount(const Block *bb) {
#ifndef PRODUCT
  if (_cfg->C->trace_opto_output())
    tty->print("# -> ComputeUseCount\n");
#endif

  // Clear the list of available and scheduled instructions, just in case
  _available.clear();
  _scheduled.clear();

  // No delay slot specified
  _unconditional_delay_slot = NULL;

#ifdef ASSERT
  for( uint i=0; i < bb->number_of_nodes(); i++ )
    assert( _uses[bb->get_node(i)->_idx] == 0, "_use array not clean" );
#endif

  // Force the _uses count to never go to zero for unscheduable pieces
  // of the block
  for( uint k = 0; k < _bb_start; k++ )
    _uses[bb->get_node(k)->_idx] = 1;
  for( uint l = _bb_end; l < bb->number_of_nodes(); l++ )
    _uses[bb->get_node(l)->_idx] = 1;

  // Iterate backwards over the instructions in the block.  Don't count the
  // branch projections at end or the block header instructions.
  for( uint j = _bb_end-1; j >= _bb_start; j-- ) {
    Node *n = bb->get_node(j);
    if( n->is_Proj() ) continue; // Projections handled another way

    // Account for all uses
    for ( uint k = 0; k < n->len(); k++ ) {
      Node *inp = n->in(k);
      if (!inp) continue;
      assert(inp != n, "no cycles allowed" );
      if (_cfg->get_block_for_node(inp) == bb) { // Block-local use?
        if (inp->is_Proj()) { // Skip through Proj's
          inp = inp->in(0);
        }
        ++_uses[inp->_idx];     // Count 1 block-local use
      }
    }

    // If this instruction has a 0 use count, then it is available
    if (!_uses[n->_idx]) {
      _current_latency[n->_idx] = _bundle_cycle_number;
      AddNodeToAvailableList(n);
    }

#ifndef PRODUCT
    if (_cfg->C->trace_opto_output()) {
      tty->print("#   uses: %3d: ", _uses[n->_idx]);
      n->dump();
    }
#endif
  }

#ifndef PRODUCT
  if (_cfg->C->trace_opto_output())
    tty->print("# <- ComputeUseCount\n");
#endif
}

// This routine performs scheduling on each basic block in reverse order,
// using instruction latencies and taking into account function unit
// availability.
void Scheduling::DoScheduling() {
#ifndef PRODUCT
  if (_cfg->C->trace_opto_output())
    tty->print("# -> DoScheduling\n");
#endif

  Block *succ_bb = NULL;
  Block *bb;

  // Walk over all the basic blocks in reverse order
  for (int i = _cfg->number_of_blocks() - 1; i >= 0; succ_bb = bb, i--) {
    bb = _cfg->get_block(i);

#ifndef PRODUCT
    if (_cfg->C->trace_opto_output()) {
      tty->print("#  Schedule BB#%03d (initial)\n", i);
      for (uint j = 0; j < bb->number_of_nodes(); j++) {
        bb->get_node(j)->dump();
      }
    }
#endif

    // On the head node, skip processing
    if (bb == _cfg->get_root_block()) {
      continue;
    }

    // Skip empty, connector blocks
    if (bb->is_connector())
      continue;

    // If the following block is not the sole successor of
    // this one, then reset the pipeline information
    if (bb->_num_succs != 1 || bb->non_connector_successor(0) != succ_bb) {
#ifndef PRODUCT
      if (_cfg->C->trace_opto_output()) {
        tty->print("*** bundle start of next BB, node %d, for %d instructions\n",
                   _next_node->_idx, _bundle_instr_count);
      }
#endif
      step_and_clear();
    }

    // Leave untouched the starting instruction, any Phis, a CreateEx node
    // or Top.  bb->get_node(_bb_start) is the first schedulable instruction.
    _bb_end = bb->number_of_nodes()-1;
    for( _bb_start=1; _bb_start <= _bb_end; _bb_start++ ) {
      Node *n = bb->get_node(_bb_start);
      // Things not matched, like Phinodes and ProjNodes don't get scheduled.
      // Also, MachIdealNodes do not get scheduled
      if( !n->is_Mach() ) continue;     // Skip non-machine nodes
      MachNode *mach = n->as_Mach();
      int iop = mach->ideal_Opcode();
      if( iop == Op_CreateEx ) continue; // CreateEx is pinned
      if( iop == Op_Con ) continue;      // Do not schedule Top
      if( iop == Op_Node &&     // Do not schedule PhiNodes, ProjNodes
          mach->pipeline() == MachNode::pipeline_class() &&
          !n->is_SpillCopy() && !n->is_MachMerge() )  // Breakpoints, Prolog, etc
        continue;
      break;                    // Funny loop structure to be sure...
    }
    // Compute last "interesting" instruction in block - last instruction we
    // might schedule.  _bb_end points just after last schedulable inst.  We
    // normally schedule conditional branches (despite them being forced last
    // in the block), because they have delay slots we can fill.  Calls all
    // have their delay slots filled in the template expansions, so we don't
    // bother scheduling them.
    Node *last = bb->get_node(_bb_end);
    // Ignore trailing NOPs.
    while (_bb_end > 0 && last->is_Mach() &&
           last->as_Mach()->ideal_Opcode() == Op_Con) {
      last = bb->get_node(--_bb_end);
    }
    assert(!last->is_Mach() || last->as_Mach()->ideal_Opcode() != Op_Con, "");
    if( last->is_Catch() ||
       // Exclude unreachable path case when Halt node is in a separate block.
       (_bb_end > 1 && last->is_Mach() && last->as_Mach()->ideal_Opcode() == Op_Halt) ) {
      // There must be a prior call.  Skip it.
      while( !bb->get_node(--_bb_end)->is_MachCall() ) {
        assert( bb->get_node(_bb_end)->is_MachProj(), "skipping projections after expected call" );
      }
    } else if( last->is_MachNullCheck() ) {
      // Backup so the last null-checked memory instruction is
      // outside the schedulable range. Skip over the nullcheck,
      // projection, and the memory nodes.
      Node *mem = last->in(1);
      do {
        _bb_end--;
      } while (mem != bb->get_node(_bb_end));
    } else {
      // Set _bb_end to point after last schedulable inst.
      _bb_end++;
    }

    assert( _bb_start <= _bb_end, "inverted block ends" );

    // Compute the register antidependencies for the basic block
    ComputeRegisterAntidependencies(bb);
    if (_cfg->C->failing())  return;  // too many D-U pinch points

    // Compute intra-bb latencies for the nodes
    ComputeLocalLatenciesForward(bb);

    // Compute the usage within the block, and set the list of all nodes
    // in the block that have no uses within the block.
    ComputeUseCount(bb);

    // Schedule the remaining instructions in the block
    while ( _available.size() > 0 ) {
      Node *n = ChooseNodeToBundle();
      guarantee(n != NULL, "no nodes available");
      AddNodeToBundle(n,bb);
    }

    assert( _scheduled.size() == _bb_end - _bb_start, "wrong number of instructions" );
#ifdef ASSERT
    for( uint l = _bb_start; l < _bb_end; l++ ) {
      Node *n = bb->get_node(l);
      uint m;
      for( m = 0; m < _bb_end-_bb_start; m++ )
        if( _scheduled[m] == n )
          break;
      assert( m < _bb_end-_bb_start, "instruction missing in schedule" );
    }
#endif

    // Now copy the instructions (in reverse order) back to the block
    for ( uint k = _bb_start; k < _bb_end; k++ )
      bb->map_node(_scheduled[_bb_end-k-1], k);

#ifndef PRODUCT
    if (_cfg->C->trace_opto_output()) {
      tty->print("#  Schedule BB#%03d (final)\n", i);
      uint current = 0;
      for (uint j = 0; j < bb->number_of_nodes(); j++) {
        Node *n = bb->get_node(j);
        if( valid_bundle_info(n) ) {
          Bundle *bundle = node_bundling(n);
          if (bundle->instr_count() > 0 || bundle->flags() > 0) {
            tty->print("*** Bundle: ");
            bundle->dump();
          }
          n->dump();
        }
      }
    }
#endif
#ifdef ASSERT
  verify_good_schedule(bb,"after block local scheduling");
#endif
  }

#ifndef PRODUCT
  if (_cfg->C->trace_opto_output())
    tty->print("# <- DoScheduling\n");
#endif

  // Record final node-bundling array location
  _regalloc->C->set_node_bundling_base(_node_bundling_base);

} // end DoScheduling

// Verify that no live-range used in the block is killed in the block by a
// wrong DEF.  This doesn't verify live-ranges that span blocks.

// Check for edge existence.  Used to avoid adding redundant precedence edges.
static bool edge_from_to( Node *from, Node *to ) {
  for( uint i=0; i<from->len(); i++ )
    if( from->in(i) == to )
      return true;
  return false;
}

#ifdef ASSERT
void Scheduling::verify_do_def( Node *n, OptoReg::Name def, const char *msg ) {
  // Check for bad kills
  if( OptoReg::is_valid(def) ) { // Ignore stores & control flow
    Node *prior_use = _reg_node[def];
    if( prior_use && !edge_from_to(prior_use,n) ) {
      tty->print("%s = ",OptoReg::as_VMReg(def)->name());
      n->dump();
      tty->print_cr("...");
      prior_use->dump();
      assert(edge_from_to(prior_use,n),msg);
    }
    _reg_node.map(def,NULL); // Kill live USEs
  }
}

void Scheduling::verify_good_schedule( Block *b, const char *msg ) {

  // Zap to something reasonable for the verify code
  _reg_node.clear();

  // Walk over the block backwards.  Check to make sure each DEF doesn't
  // kill a live value (other than the one it's supposed to).  Add each
  // USE to the live set.
  for( uint i = b->number_of_nodes()-1; i >= _bb_start; i-- ) {
    Node *n = b->get_node(i);
    int n_op = n->Opcode();
    if( n_op == Op_MachProj && n->ideal_reg() == MachProjNode::fat_proj ) {
      // Fat-proj kills a slew of registers
      RegMask rm = n->out_RegMask();// Make local copy
      while( rm.is_NotEmpty() ) {
        OptoReg::Name kill = rm.find_first_elem();
        rm.Remove(kill);
        verify_do_def( n, kill, msg );
      }
    } else if( n_op != Op_Node ) { // Avoid brand new antidependence nodes
      // Get DEF'd registers the normal way
      verify_do_def( n, _regalloc->get_reg_first(n), msg );
      verify_do_def( n, _regalloc->get_reg_second(n), msg );
    }

    // Now make all USEs live
    for( uint i=1; i<n->req(); i++ ) {
      Node *def = n->in(i);
      assert(def != 0, "input edge required");
      OptoReg::Name reg_lo = _regalloc->get_reg_first(def);
      OptoReg::Name reg_hi = _regalloc->get_reg_second(def);
      if( OptoReg::is_valid(reg_lo) ) {
        assert(!_reg_node[reg_lo] || edge_from_to(_reg_node[reg_lo],def), msg);
        _reg_node.map(reg_lo,n);
      }
      if( OptoReg::is_valid(reg_hi) ) {
        assert(!_reg_node[reg_hi] || edge_from_to(_reg_node[reg_hi],def), msg);
        _reg_node.map(reg_hi,n);
      }
    }

  }

  // Zap to something reasonable for the Antidependence code
  _reg_node.clear();
}
#endif

// Conditionally add precedence edges.  Avoid putting edges on Projs.
static void add_prec_edge_from_to( Node *from, Node *to ) {
  if( from->is_Proj() ) {       // Put precedence edge on Proj's input
    assert( from->req() == 1 && (from->len() == 1 || from->in(1)==0), "no precedence edges on projections" );
    from = from->in(0);
  }
  if( from != to &&             // No cycles (for things like LD L0,[L0+4] )
      !edge_from_to( from, to ) ) // Avoid duplicate edge
    from->add_prec(to);
}

void Scheduling::anti_do_def( Block *b, Node *def, OptoReg::Name def_reg, int is_def ) {
  if( !OptoReg::is_valid(def_reg) ) // Ignore stores & control flow
    return;

  Node *pinch = _reg_node[def_reg]; // Get pinch point
  if ((pinch == NULL) || _cfg->get_block_for_node(pinch) != b || // No pinch-point yet?
      is_def ) {    // Check for a true def (not a kill)
    _reg_node.map(def_reg,def); // Record def/kill as the optimistic pinch-point
    return;
  }

  Node *kill = def;             // Rename 'def' to more descriptive 'kill'
  debug_only( def = (Node*)((intptr_t)0xdeadbeef); )

  // After some number of kills there _may_ be a later def
  Node *later_def = NULL;

  // Finding a kill requires a real pinch-point.
  // Check for not already having a pinch-point.
  // Pinch points are Op_Node's.
  if( pinch->Opcode() != Op_Node ) { // Or later-def/kill as pinch-point?
    later_def = pinch;            // Must be def/kill as optimistic pinch-point
    if ( _pinch_free_list.size() > 0) {
      pinch = _pinch_free_list.pop();
    } else {
      pinch = new (_cfg->C) Node(1); // Pinch point to-be
    }
    if (pinch->_idx >= _regalloc->node_regs_max_index()) {
      _cfg->C->record_method_not_compilable("too many D-U pinch points");
      return;
    }
    _cfg->map_node_to_block(pinch, b);      // Pretend it's valid in this block (lazy init)
    _reg_node.map(def_reg,pinch); // Record pinch-point
    //_regalloc->set_bad(pinch->_idx); // Already initialized this way.
    if( later_def->outcnt() == 0 || later_def->ideal_reg() == MachProjNode::fat_proj ) { // Distinguish def from kill
      pinch->init_req(0, _cfg->C->top());     // set not NULL for the next call
      add_prec_edge_from_to(later_def,pinch); // Add edge from kill to pinch
      later_def = NULL;           // and no later def
    }
    pinch->set_req(0,later_def);  // Hook later def so we can find it
  } else {                        // Else have valid pinch point
    if( pinch->in(0) )            // If there is a later-def
      later_def = pinch->in(0);   // Get it
  }

  // Add output-dependence edge from later def to kill
  if( later_def )               // If there is some original def
    add_prec_edge_from_to(later_def,kill); // Add edge from def to kill

  // See if current kill is also a use, and so is forced to be the pinch-point.
  if( pinch->Opcode() == Op_Node ) {
    Node *uses = kill->is_Proj() ? kill->in(0) : kill;
    for( uint i=1; i<uses->req(); i++ ) {
      if( _regalloc->get_reg_first(uses->in(i)) == def_reg ||
          _regalloc->get_reg_second(uses->in(i)) == def_reg ) {
        // Yes, found a use/kill pinch-point
        pinch->set_req(0,NULL);  //
        pinch->replace_by(kill); // Move anti-dep edges up
        pinch = kill;
        _reg_node.map(def_reg,pinch);
        return;
      }
    }
  }

  // Add edge from kill to pinch-point
  add_prec_edge_from_to(kill,pinch);
}

void Scheduling::anti_do_use( Block *b, Node *use, OptoReg::Name use_reg ) {
  if( !OptoReg::is_valid(use_reg) ) // Ignore stores & control flow
    return;
  Node *pinch = _reg_node[use_reg]; // Get pinch point
  // Check for no later def_reg/kill in block
  if ((pinch != NULL) && _cfg->get_block_for_node(pinch) == b &&
      // Use has to be block-local as well
      _cfg->get_block_for_node(use) == b) {
    if( pinch->Opcode() == Op_Node && // Real pinch-point (not optimistic?)
        pinch->req() == 1 ) {   // pinch not yet in block?
      pinch->del_req(0);        // yank pointer to later-def, also set flag
      // Insert the pinch-point in the block just after the last use
      b->insert_node(pinch, b->find_node(use) + 1);
      _bb_end++;                // Increase size scheduled region in block
    }

    add_prec_edge_from_to(pinch,use);
  }
}

// We insert antidependences between the reads and following write of
// allocated registers to prevent illegal code motion. Hopefully, the
// number of added references should be fairly small, especially as we
// are only adding references within the current basic block.
void Scheduling::ComputeRegisterAntidependencies(Block *b) {

#ifdef ASSERT
  verify_good_schedule(b,"before block local scheduling");
#endif

  // A valid schedule, for each register independently, is an endless cycle
  // of: a def, then some uses (connected to the def by true dependencies),
  // then some kills (defs with no uses), finally the cycle repeats with a new
  // def.  The uses are allowed to float relative to each other, as are the
  // kills.  No use is allowed to slide past a kill (or def).  This requires
  // antidependencies between all uses of a single def and all kills that
  // follow, up to the next def.  More edges are redundant, because later defs
  // & kills are already serialized with true or antidependencies.  To keep
  // the edge count down, we add a 'pinch point' node if there's more than
  // one use or more than one kill/def.

  // We add dependencies in one bottom-up pass.

  // For each instruction we handle it's DEFs/KILLs, then it's USEs.

  // For each DEF/KILL, we check to see if there's a prior DEF/KILL for this
  // register.  If not, we record the DEF/KILL in _reg_node, the
  // register-to-def mapping.  If there is a prior DEF/KILL, we insert a
  // "pinch point", a new Node that's in the graph but not in the block.
  // We put edges from the prior and current DEF/KILLs to the pinch point.
  // We put the pinch point in _reg_node.  If there's already a pinch point
  // we merely add an edge from the current DEF/KILL to the pinch point.

  // After doing the DEF/KILLs, we handle USEs.  For each used register, we
  // put an edge from the pinch point to the USE.

  // To be expedient, the _reg_node array is pre-allocated for the whole
  // compilation.  _reg_node is lazily initialized; it either contains a NULL,
  // or a valid def/kill/pinch-point, or a leftover node from some prior
  // block.  Leftover node from some prior block is treated like a NULL (no
  // prior def, so no anti-dependence needed).  Valid def is distinguished by
  // it being in the current block.
  bool fat_proj_seen = false;
  uint last_safept = _bb_end-1;
  Node* end_node         = (_bb_end-1 >= _bb_start) ? b->get_node(last_safept) : NULL;
  Node* last_safept_node = end_node;
  for( uint i = _bb_end-1; i >= _bb_start; i-- ) {
    Node *n = b->get_node(i);
    int is_def = n->outcnt();   // def if some uses prior to adding precedence edges
    if( n->is_MachProj() && n->ideal_reg() == MachProjNode::fat_proj ) {
      // Fat-proj kills a slew of registers
      // This can add edges to 'n' and obscure whether or not it was a def,
      // hence the is_def flag.
      fat_proj_seen = true;
      RegMask rm = n->out_RegMask();// Make local copy
      while( rm.is_NotEmpty() ) {
        OptoReg::Name kill = rm.find_first_elem();
        rm.Remove(kill);
        anti_do_def( b, n, kill, is_def );
      }
    } else {
      // Get DEF'd registers the normal way
      anti_do_def( b, n, _regalloc->get_reg_first(n), is_def );
      anti_do_def( b, n, _regalloc->get_reg_second(n), is_def );
    }

    // Kill projections on a branch should appear to occur on the
    // branch, not afterwards, so grab the masks from the projections
    // and process them.
    if (n->is_MachBranch() || n->is_Mach() && n->as_Mach()->ideal_Opcode() == Op_Jump) {
      for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
        Node* use = n->fast_out(i);
        if (use->is_Proj()) {
          RegMask rm = use->out_RegMask();// Make local copy
          while( rm.is_NotEmpty() ) {
            OptoReg::Name kill = rm.find_first_elem();
            rm.Remove(kill);
            anti_do_def( b, n, kill, false );
          }
        }
      }
    }

    // Check each register used by this instruction for a following DEF/KILL
    // that must occur afterward and requires an anti-dependence edge.
    for( uint j=0; j<n->req(); j++ ) {
      Node *def = n->in(j);
      if( def ) {
        assert( !def->is_MachProj() || def->ideal_reg() != MachProjNode::fat_proj, "" );
        anti_do_use( b, n, _regalloc->get_reg_first(def) );
        anti_do_use( b, n, _regalloc->get_reg_second(def) );
      }
    }
    // Do not allow defs of new derived values to float above GC
    // points unless the base is definitely available at the GC point.

    Node *m = b->get_node(i);

    // Add precedence edge from following safepoint to use of derived pointer
    if( last_safept_node != end_node &&
        m != last_safept_node) {
      for (uint k = 1; k < m->req(); k++) {
        const Type *t = m->in(k)->bottom_type();
        if( t->isa_oop_ptr() &&
            t->is_ptr()->offset() != 0 ) {
          last_safept_node->add_prec( m );
          break;
        }
      }
    }

    if( n->jvms() ) {           // Precedence edge from derived to safept
      // Check if last_safept_node was moved by pinch-point insertion in anti_do_use()
      if( b->get_node(last_safept) != last_safept_node ) {
        last_safept = b->find_node(last_safept_node);
      }
      for( uint j=last_safept; j > i; j-- ) {
        Node *mach = b->get_node(j);
        if( mach->is_Mach() && mach->as_Mach()->ideal_Opcode() == Op_AddP )
          mach->add_prec( n );
      }
      last_safept = i;
      last_safept_node = m;
    }
  }

  if (fat_proj_seen) {
    // Garbage collect pinch nodes that were not consumed.
    // They are usually created by a fat kill MachProj for a call.
    garbage_collect_pinch_nodes();
  }
}

// Garbage collect pinch nodes for reuse by other blocks.
//
// The block scheduler's insertion of anti-dependence
// edges creates many pinch nodes when the block contains
// 2 or more Calls.  A pinch node is used to prevent a
// combinatorial explosion of edges.  If a set of kills for a
// register is anti-dependent on a set of uses (or defs), rather
// than adding an edge in the graph between each pair of kill
// and use (or def), a pinch is inserted between them:
//
//            use1   use2  use3
//                \   |   /
//                 \  |  /
//                  pinch
//                 /  |  \
//                /   |   \
//            kill1 kill2 kill3
//
// One pinch node is created per register killed when
// the second call is encountered during a backwards pass
// over the block.  Most of these pinch nodes are never
// wired into the graph because the register is never
// used or def'ed in the block.
//
void Scheduling::garbage_collect_pinch_nodes() {
#ifndef PRODUCT
    if (_cfg->C->trace_opto_output()) tty->print("Reclaimed pinch nodes:");
#endif
    int trace_cnt = 0;
    for (uint k = 0; k < _reg_node.Size(); k++) {
      Node* pinch = _reg_node[k];
      if ((pinch != NULL) && pinch->Opcode() == Op_Node &&
          // no predecence input edges
          (pinch->req() == pinch->len() || pinch->in(pinch->req()) == NULL) ) {
        cleanup_pinch(pinch);
        _pinch_free_list.push(pinch);
        _reg_node.map(k, NULL);
#ifndef PRODUCT
        if (_cfg->C->trace_opto_output()) {
          trace_cnt++;
          if (trace_cnt > 40) {
            tty->print("\n");
            trace_cnt = 0;
          }
          tty->print(" %d", pinch->_idx);
        }
#endif
      }
    }
#ifndef PRODUCT
    if (_cfg->C->trace_opto_output()) tty->print("\n");
#endif
}

// Clean up a pinch node for reuse.
void Scheduling::cleanup_pinch( Node *pinch ) {
  assert (pinch && pinch->Opcode() == Op_Node && pinch->req() == 1, "just checking");

  for (DUIterator_Last imin, i = pinch->last_outs(imin); i >= imin; ) {
    Node* use = pinch->last_out(i);
    uint uses_found = 0;
    for (uint j = use->req(); j < use->len(); j++) {
      if (use->in(j) == pinch) {
        use->rm_prec(j);
        uses_found++;
      }
    }
    assert(uses_found > 0, "must be a precedence edge");
    i -= uses_found;    // we deleted 1 or more copies of this edge
  }
  // May have a later_def entry
  pinch->set_req(0, NULL);
}

#ifndef PRODUCT

void Scheduling::dump_available() const {
  tty->print("#Availist  ");
  for (uint i = 0; i < _available.size(); i++)
    tty->print(" N%d/l%d", _available[i]->_idx,_current_latency[_available[i]->_idx]);
  tty->cr();
}

// Print Scheduling Statistics
void Scheduling::print_statistics() {
  // Print the size added by nops for bundling
  tty->print("Nops added %d bytes to total of %d bytes",
    _total_nop_size, _total_method_size);
  if (_total_method_size > 0)
    tty->print(", for %.2f%%",
      ((double)_total_nop_size) / ((double) _total_method_size) * 100.0);
  tty->print("\n");

  // Print the number of branch shadows filled
  if (Pipeline::_branch_has_delay_slot) {
    tty->print("Of %d branches, %d had unconditional delay slots filled",
      _total_branches, _total_unconditional_delays);
    if (_total_branches > 0)
      tty->print(", for %.2f%%",
        ((double)_total_unconditional_delays) / ((double)_total_branches) * 100.0);
    tty->print("\n");
  }

  uint total_instructions = 0, total_bundles = 0;

  for (uint i = 1; i <= Pipeline::_max_instrs_per_cycle; i++) {
    uint bundle_count   = _total_instructions_per_bundle[i];
    total_instructions += bundle_count * i;
    total_bundles      += bundle_count;
  }

  if (total_bundles > 0)
    tty->print("Average ILP (excluding nops) is %.2f\n",
      ((double)total_instructions) / ((double)total_bundles));
}
#endif
C:\hotspot-69087d08d473\src\share\vm/opto/output.hpp
/*
 * Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 *
 */

#ifndef SHARE_VM_OPTO_OUTPUT_HPP
#define SHARE_VM_OPTO_OUTPUT_HPP

#include "opto/block.hpp"
#include "opto/node.hpp"
#if defined AD_MD_HPP
# include AD_MD_HPP
#elif defined TARGET_ARCH_MODEL_x86_32
# include "adfiles/ad_x86_32.hpp"
#elif defined TARGET_ARCH_MODEL_x86_64
# include "adfiles/ad_x86_64.hpp"
#elif defined TARGET_ARCH_MODEL_aarch64
# include "adfiles/ad_aarch64.hpp"
#elif defined TARGET_ARCH_MODEL_sparc
# include "adfiles/ad_sparc.hpp"
#elif defined TARGET_ARCH_MODEL_zero
# include "adfiles/ad_zero.hpp"
#elif defined TARGET_ARCH_MODEL_ppc_64
# include "adfiles/ad_ppc_64.hpp"
#endif

class Arena;
class Bundle;
class Block;
class Block_Array;
class Node;
class Node_Array;
class Node_List;
class PhaseCFG;
class PhaseChaitin;
class Pipeline_Use_Element;
class Pipeline_Use;

#ifndef PRODUCT
#define DEBUG_ARG(x) , x
#else
#define DEBUG_ARG(x)
#endif

// Define the initial sizes for allocation of the resizable code buffer
enum {
  initial_code_capacity  =  16 * 1024,
  initial_stub_capacity  =   4 * 1024,
  initial_const_capacity =   4 * 1024,
  initial_locs_capacity  =   3 * 1024
};

//------------------------------Scheduling----------------------------------
// This class contains all the information necessary to implement instruction
// scheduling and bundling.
class Scheduling {

private:
  // Arena to use
  Arena *_arena;

  // Control-Flow Graph info
  PhaseCFG *_cfg;

  // Register Allocation info
  PhaseRegAlloc *_regalloc;

  // Number of nodes in the method
  uint _node_bundling_limit;

  // List of scheduled nodes. Generated in reverse order
  Node_List _scheduled;

  // List of nodes currently available for choosing for scheduling
  Node_List _available;

  // For each instruction beginning a bundle, the number of following
  // nodes to be bundled with it.
  Bundle *_node_bundling_base;

  // Mapping from register to Node
  Node_List _reg_node;

  // Free list for pinch nodes.
  Node_List _pinch_free_list;

  // Latency from the beginning of the containing basic block (base 1)
  // for each node.
  unsigned short *_node_latency;

  // Number of uses of this node within the containing basic block.
  short *_uses;

  // Schedulable portion of current block.  Skips Region/Phi/CreateEx up
  // front, branch+proj at end.  Also skips Catch/CProj (same as
  // branch-at-end), plus just-prior exception-throwing call.
  uint _bb_start, _bb_end;

  // Latency from the end of the basic block as scheduled
  unsigned short *_current_latency;

  // Remember the next node
  Node *_next_node;

  // Use this for an unconditional branch delay slot
  Node *_unconditional_delay_slot;

  // Pointer to a Nop
  MachNopNode *_nop;

  // Length of the current bundle, in instructions
  uint _bundle_instr_count;

  // Current Cycle number, for computing latencies and bundling
  uint _bundle_cycle_number;

  // Bundle information
  Pipeline_Use_Element _bundle_use_elements[resource_count];
  Pipeline_Use         _bundle_use;

  // Dump the available list
  void dump_available() const;

public:
  Scheduling(Arena *arena, Compile &compile);

  // Destructor
  NOT_PRODUCT( ~Scheduling(); )

  // Step ahead "i" cycles
  void step(uint i);

  // Step ahead 1 cycle, and clear the bundle state (for example,
  // at a branch target)
  void step_and_clear();

  Bundle* node_bundling(const Node *n) {
    assert(valid_bundle_info(n), "oob");
    return (&_node_bundling_base[n->_idx]);
  }

  bool valid_bundle_info(const Node *n) const {
    return (_node_bundling_limit > n->_idx);
  }

  bool starts_bundle(const Node *n) const {
    return (_node_bundling_limit > n->_idx && _node_bundling_base[n->_idx].starts_bundle());
  }

  // Do the scheduling
  void DoScheduling();

  // Compute the local latencies walking forward over the list of
  // nodes for a basic block
  void ComputeLocalLatenciesForward(const Block *bb);

  // Compute the register antidependencies within a basic block
  void ComputeRegisterAntidependencies(Block *bb);
  void verify_do_def( Node *n, OptoReg::Name def, const char *msg );
  void verify_good_schedule( Block *b, const char *msg );
  void anti_do_def( Block *b, Node *def, OptoReg::Name def_reg, int is_def );
  void anti_do_use( Block *b, Node *use, OptoReg::Name use_reg );

  // Add a node to the current bundle
  void AddNodeToBundle(Node *n, const Block *bb);

  // Add a node to the list of available nodes
  void AddNodeToAvailableList(Node *n);

  // Compute the local use count for the nodes in a block, and compute
  // the list of instructions with no uses in the block as available
  void ComputeUseCount(const Block *bb);

  // Choose an instruction from the available list to add to the bundle
  Node * ChooseNodeToBundle();

  // See if this Node fits into the currently accumulating bundle
  bool NodeFitsInBundle(Node *n);

  // Decrement the use count for a node
 void DecrementUseCounts(Node *n, const Block *bb);

  // Garbage collect pinch nodes for reuse by other blocks.
  void garbage_collect_pinch_nodes();
  // Clean up a pinch node for reuse (helper for above).
  void cleanup_pinch( Node *pinch );

  // Information for statistics gathering
#ifndef PRODUCT
private:
  // Gather information on size of nops relative to total
  uint _branches, _unconditional_delays;

  static uint _total_nop_size, _total_method_size;
  static uint _total_branches, _total_unconditional_delays;
  static uint _total_instructions_per_bundle[Pipeline::_max_instrs_per_cycle+1];

public:
  static void print_statistics();

  static void increment_instructions_per_bundle(uint i) {
    _total_instructions_per_bundle[i]++;
  }

  static void increment_nop_size(uint s) {
    _total_nop_size += s;
  }

  static void increment_method_size(uint s) {
    _total_method_size += s;
  }
#endif

};

#endif // SHARE_VM_OPTO_OUTPUT_HPP
C:\hotspot-69087d08d473\src\share\vm/opto/parse.hpp
/*
 * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 *
 */

#ifndef SHARE_VM_OPTO_PARSE_HPP
#define SHARE_VM_OPTO_PARSE_HPP

#include "ci/ciMethodData.hpp"
#include "ci/ciTypeFlow.hpp"
#include "compiler/methodLiveness.hpp"
#include "libadt/vectset.hpp"
#include "oops/generateOopMap.hpp"
#include "opto/graphKit.hpp"
#include "opto/subnode.hpp"

class BytecodeParseHistogram;
class InlineTree;
class Parse;
class SwitchRange;


//------------------------------InlineTree-------------------------------------
class InlineTree : public ResourceObj {
  friend class VMStructs;

  Compile*    C;                  // cache
  JVMState*   _caller_jvms;       // state of caller
  ciMethod*   _method;            // method being called by the caller_jvms
  InlineTree* _caller_tree;
  uint        _count_inline_bcs;  // Accumulated count of inlined bytecodes
  // Call-site count / interpreter invocation count, scaled recursively.
  // Always between 0.0 and 1.0.  Represents the percentage of the method's
  // total execution time used at this call site.
  const float _site_invoke_ratio;
  const int   _max_inline_level;  // the maximum inline level for this sub-tree (may be adjusted)
  float compute_callee_frequency( int caller_bci ) const;

  GrowableArray<InlineTree*> _subtrees;

  void print_impl(outputStream* stj, int indent) const PRODUCT_RETURN;
  const char* _msg;
protected:
  InlineTree(Compile* C,
             const InlineTree* caller_tree,
             ciMethod* callee_method,
             JVMState* caller_jvms,
             int caller_bci,
             float site_invoke_ratio,
             int max_inline_level);
  InlineTree *build_inline_tree_for_callee(ciMethod* callee_method,
                                           JVMState* caller_jvms,
                                           int caller_bci);
  bool        try_to_inline(ciMethod* callee_method,
                            ciMethod* caller_method,
                            int caller_bci,
                            JVMState* jvms,
                            ciCallProfile& profile,
                            WarmCallInfo* wci_result,
                            bool& should_delay);
  bool        should_inline(ciMethod* callee_method,
                            ciMethod* caller_method,
                            int caller_bci,
                            ciCallProfile& profile,
                            WarmCallInfo* wci_result);
  bool        should_not_inline(ciMethod* callee_method,
                                ciMethod* caller_method,
                                JVMState* jvms,
                                WarmCallInfo* wci_result);
  void        print_inlining(ciMethod* callee_method, int caller_bci,
                             ciMethod* caller_method, bool success) const;

  InlineTree* caller_tree()       const { return _caller_tree;  }
  InlineTree* callee_at(int bci, ciMethod* m) const;
  int         inline_level()      const { return stack_depth(); }
  int         stack_depth()       const { return _caller_jvms ? _caller_jvms->depth() : 0; }
  const char* msg()               const { return _msg; }
  void        set_msg(const char* msg)  { _msg = msg; }
public:
  static const char* check_can_parse(ciMethod* callee);

  static InlineTree* build_inline_tree_root();
  static InlineTree* find_subtree_from_root(InlineTree* root, JVMState* jvms, ciMethod* callee);

  // For temporary (stack-allocated, stateless) ilts:
  InlineTree(Compile* c, ciMethod* callee_method, JVMState* caller_jvms, float site_invoke_ratio, int max_inline_level);

  // InlineTree enum
  enum InlineStyle {
    Inline_do_not_inline             =   0, //
    Inline_cha_is_monomorphic        =   1, //
    Inline_type_profile_monomorphic  =   2  //
  };

  // See if it is OK to inline.
  // The receiver is the inline tree for the caller.
  //
  // The result is a temperature indication.  If it is hot or cold,
  // inlining is immediate or undesirable.  Otherwise, the info block
  // returned is newly allocated and may be enqueued.
  //
  // If the method is inlinable, a new inline subtree is created on the fly,
  // and may be accessed by find_subtree_from_root.
  // The call_method is the dest_method for a special or static invocation.
  // The call_method is an optimized virtual method candidate otherwise.
  WarmCallInfo* ok_to_inline(ciMethod *call_method, JVMState* caller_jvms, ciCallProfile& profile, WarmCallInfo* wci, bool& should_delay);

  // Information about inlined method
  JVMState*   caller_jvms()       const { return _caller_jvms; }
  ciMethod   *method()            const { return _method; }
  int         caller_bci()        const { return _caller_jvms ? _caller_jvms->bci() : InvocationEntryBci; }
  uint        count_inline_bcs()  const { return _count_inline_bcs; }
  float       site_invoke_ratio() const { return _site_invoke_ratio; };

#ifndef PRODUCT
private:
  uint        _count_inlines;     // Count of inlined methods
public:
  // Debug information collected during parse
  uint        count_inlines()     const { return _count_inlines; };
#endif
  GrowableArray<InlineTree*> subtrees() { return _subtrees; }

  void print_value_on(outputStream* st) const PRODUCT_RETURN;

  bool        _forced_inline;     // Inlining was forced by CompilerOracle, ciReplay or annotation
  bool        forced_inline()     const { return _forced_inline; }
  // Count number of nodes in this subtree
  int         count() const;
  // Dump inlining replay data to the stream.
  void dump_replay_data(outputStream* out);
};


//-----------------------------------------------------------------------------
//------------------------------Parse------------------------------------------
// Parse bytecodes, build a Graph
class Parse : public GraphKit {
 public:
  // Per-block information needed by the parser:
  class Block {
   private:
    ciTypeFlow::Block* _flow;
    int                _pred_count;     // how many predecessors in CFG?
    int                _preds_parsed;   // how many of these have been parsed?
    uint               _count;          // how many times executed?  Currently only set by _goto's
    bool               _is_parsed;      // has this block been parsed yet?
    bool               _is_handler;     // is this block an exception handler?
    bool               _has_merged_backedge; // does this block have merged backedge?
    SafePointNode*     _start_map;      // all values flowing into this block
    MethodLivenessResult _live_locals;  // lazily initialized liveness bitmap

    int                _num_successors; // Includes only normal control flow.
    int                _all_successors; // Include exception paths also.
    Block**            _successors;

    // Use init_node/init_graph to initialize Blocks.
    // Block() : _live_locals((uintptr_t*)NULL,0) { ShouldNotReachHere(); }
    Block() : _live_locals(NULL,0) { ShouldNotReachHere(); }

   public:

    // Set up the block data structure itself.
    void init_node(Parse* outer, int po);
    // Set up the block's relations to other blocks.
    void init_graph(Parse* outer);

    ciTypeFlow::Block* flow() const        { return _flow; }
    int pred_count() const                 { return _pred_count; }
    int preds_parsed() const               { return _preds_parsed; }
    bool is_parsed() const                 { return _is_parsed; }
    bool is_handler() const                { return _is_handler; }
    void set_count( uint x )               { _count = x; }
    uint count() const                     { return _count; }

    SafePointNode* start_map() const       { assert(is_merged(),"");   return _start_map; }
    void set_start_map(SafePointNode* m)   { assert(!is_merged(), ""); _start_map = m; }

    // True after any predecessor flows control into this block
    bool is_merged() const                 { return _start_map != NULL; }

#ifdef ASSERT
    // True after backedge predecessor flows control into this block
    bool has_merged_backedge() const       { return _has_merged_backedge; }
    void mark_merged_backedge(Block* pred) {
      assert(is_SEL_head(), "should be loop head");
      if (pred != NULL && is_SEL_backedge(pred)) {
        assert(is_parsed(), "block should be parsed before merging backedges");
        _has_merged_backedge = true;
      }
    }
#endif

    // True when all non-exception predecessors have been parsed.
    bool is_ready() const                  { return preds_parsed() == pred_count(); }

    int num_successors() const             { return _num_successors; }
    int all_successors() const             { return _all_successors; }
    Block* successor_at(int i) const {
      assert((uint)i < (uint)all_successors(), "");
      return _successors[i];
    }
    Block* successor_for_bci(int bci);

    int start() const                      { return flow()->start(); }
    int limit() const                      { return flow()->limit(); }
    int rpo() const                        { return flow()->rpo(); }
    int start_sp() const                   { return flow()->stack_size(); }

    bool is_loop_head() const              { return flow()->is_loop_head(); }
    bool is_SEL_head() const               { return flow()->is_single_entry_loop_head(); }
    bool is_SEL_backedge(Block* pred) const{ return is_SEL_head() && pred->rpo() >= rpo(); }
    bool is_invariant_local(uint i) const  {
      const JVMState* jvms = start_map()->jvms();
      if (!jvms->is_loc(i) || flow()->outer()->has_irreducible_entry()) return false;
      return flow()->is_invariant_local(i - jvms->locoff());
    }
    bool can_elide_SEL_phi(uint i) const  { assert(is_SEL_head(),""); return is_invariant_local(i); }

    const Type* peek(int off=0) const      { return stack_type_at(start_sp() - (off+1)); }

    const Type* stack_type_at(int i) const;
    const Type* local_type_at(int i) const;
    static const Type* get_type(ciType* t) { return Type::get_typeflow_type(t); }

    bool has_trap_at(int bci) const        { return flow()->has_trap() && flow()->trap_bci() == bci; }

    // Call this just before parsing a block.
    void mark_parsed() {
      assert(!_is_parsed, "must parse each block exactly once");
      _is_parsed = true;
    }

    // Return the phi/region input index for the "current" pred,
    // and bump the pred number.  For historical reasons these index
    // numbers are handed out in descending order.  The last index is
    // always PhiNode::Input (i.e., 1).  The value returned is known
    // as a "path number" because it distinguishes by which path we are
    // entering the block.
    int next_path_num() {
      assert(preds_parsed() < pred_count(), "too many preds?");
      return pred_count() - _preds_parsed++;
    }

    // Add a previously unaccounted predecessor to this block.
    // This operates by increasing the size of the block's region
    // and all its phi nodes (if any).  The value returned is a
    // path number ("pnum").
    int add_new_path();

    // Initialize me by recording the parser's map.  My own map must be NULL.
    void record_state(Parse* outer);
  };

#ifndef PRODUCT
  // BytecodeParseHistogram collects number of bytecodes parsed, nodes constructed, and transformations.
  class BytecodeParseHistogram : public ResourceObj {
   private:
    enum BPHType {
      BPH_transforms,
      BPH_values
    };
    static bool _initialized;
    static uint _bytecodes_parsed [Bytecodes::number_of_codes];
    static uint _nodes_constructed[Bytecodes::number_of_codes];
    static uint _nodes_transformed[Bytecodes::number_of_codes];
    static uint _new_values       [Bytecodes::number_of_codes];

    Bytecodes::Code _initial_bytecode;
    int             _initial_node_count;
    int             _initial_transforms;
    int             _initial_values;

    Parse     *_parser;
    Compile   *_compiler;

    // Initialization
    static void reset();

    // Return info being collected, select with global flag 'BytecodeParseInfo'
    int current_count(BPHType info_selector);

   public:
    BytecodeParseHistogram(Parse *p, Compile *c);
    static bool initialized();

    // Record info when starting to parse one bytecode
    void set_initial_state( Bytecodes::Code bc );
    // Record results of parsing one bytecode
    void record_change();

    // Profile printing
    static void print(float cutoff = 0.01F); // cutoff in percent
  };

  public:
    // Record work done during parsing
    BytecodeParseHistogram* _parse_histogram;
    void set_parse_histogram(BytecodeParseHistogram *bph) { _parse_histogram = bph; }
    BytecodeParseHistogram* parse_histogram()      { return _parse_histogram; }
#endif

 private:
  friend class Block;

  // Variables which characterize this compilation as a whole:

  JVMState*     _caller;        // JVMS which carries incoming args & state.
  float         _expected_uses; // expected number of calls to this code
  float         _prof_factor;   // discount applied to my profile counts
  int           _depth;         // Inline tree depth, for debug printouts
  const TypeFunc*_tf;           // My kind of function type
  int           _entry_bci;     // the osr bci or InvocationEntryBci

  ciTypeFlow*   _flow;          // Results of previous flow pass.
  Block*        _blocks;        // Array of basic-block structs.
  int           _block_count;   // Number of elements in _blocks.

  GraphKit      _exits;         // Record all normal returns and throws here.
  bool          _wrote_final;   // Did we write a final field?
  bool          _wrote_volatile;     // Did we write a volatile field?
  bool          _count_invocations;  // update and test invocation counter
  bool          _method_data_update; // update method data oop
  Node*         _alloc_with_final;   // An allocation node with final field

  // Variables which track Java semantics during bytecode parsing:

  Block*            _block;     // block currently getting parsed
  ciBytecodeStream  _iter;      // stream of this method's bytecodes

  int           _blocks_merged; // Progress meter: state merges from BB preds
  int           _blocks_parsed; // Progress meter: BBs actually parsed

  const FastLockNode* _synch_lock; // FastLockNode for synchronized method

#ifndef PRODUCT
  int _max_switch_depth;        // Debugging SwitchRanges.
  int _est_switch_depth;        // Debugging SwitchRanges.
#endif

  bool         _first_return;                  // true if return is the first to be parsed
  bool         _replaced_nodes_for_exceptions; // needs processing of replaced nodes in exception paths?
  uint         _new_idx;                       // any node with _idx above were new during this parsing. Used to trim the replaced nodes list.

 public:
  // Constructor
  Parse(JVMState* caller, ciMethod* parse_method, float expected_uses);

  virtual Parse* is_Parse() const { return (Parse*)this; }

  // Accessors.
  JVMState*     caller()        const { return _caller; }
  float         expected_uses() const { return _expected_uses; }
  float         prof_factor()   const { return _prof_factor; }
  int           depth()         const { return _depth; }
  const TypeFunc* tf()          const { return _tf; }
  //            entry_bci()     -- see osr_bci, etc.

  ciTypeFlow*   flow()          const { return _flow; }
  //            blocks()        -- see rpo_at, start_block, etc.
  int           block_count()   const { return _block_count; }

  GraphKit&     exits()               { return _exits; }
  bool          wrote_final() const   { return _wrote_final; }
  void      set_wrote_final(bool z)   { _wrote_final = z; }
  bool          wrote_volatile() const { return _wrote_volatile; }
  void      set_wrote_volatile(bool z) { _wrote_volatile = z; }
  bool          count_invocations() const  { return _count_invocations; }
  bool          method_data_update() const { return _method_data_update; }
  Node*    alloc_with_final() const   { return _alloc_with_final; }
  void set_alloc_with_final(Node* n)  {
    assert((_alloc_with_final == NULL) || (_alloc_with_final == n), "different init objects?");
    _alloc_with_final = n;
  }

  Block*             block()    const { return _block; }
  ciBytecodeStream&  iter()           { return _iter; }
  Bytecodes::Code    bc()       const { return _iter.cur_bc(); }

  void set_block(Block* b)            { _block = b; }

  // Derived accessors:
  bool is_normal_parse() const  { return _entry_bci == InvocationEntryBci; }
  bool is_osr_parse() const     { return _entry_bci != InvocationEntryBci; }
  int osr_bci() const           { assert(is_osr_parse(),""); return _entry_bci; }

  void set_parse_bci(int bci);

  // Must this parse be aborted?
  bool failing()                { return C->failing(); }

  Block* rpo_at(int rpo) {
    assert(0 <= rpo && rpo < _block_count, "oob");
    return &_blocks[rpo];
  }
  Block* start_block() {
    return rpo_at(flow()->start_block()->rpo());
  }
  // Can return NULL if the flow pass did not complete a block.
  Block* successor_for_bci(int bci) {
    return block()->successor_for_bci(bci);
  }

 private:
  // Create a JVMS & map for the initial state of this method.
  SafePointNode* create_entry_map();

  // OSR helpers
  Node *fetch_interpreter_state(int index, BasicType bt, Node *local_addrs, Node *local_addrs_base);
  Node* check_interpreter_type(Node* l, const Type* type, SafePointNode* &bad_type_exit);
  void  load_interpreter_state(Node* osr_buf);

  // Functions for managing basic blocks:
  void init_blocks();
  void load_state_from(Block* b);
  void store_state_to(Block* b) { b->record_state(this); }

  // Parse all the basic blocks.
  void do_all_blocks();

  // Parse the current basic block
  void do_one_block();

  // Raise an error if we get a bad ciTypeFlow CFG.
  void handle_missing_successor(int bci);

  // first actions (before BCI 0)
  void do_method_entry();

  // implementation of monitorenter/monitorexit
  void do_monitor_enter();
  void do_monitor_exit();

  // Eagerly create phie throughout the state, to cope with back edges.
  void ensure_phis_everywhere();

  // Merge the current mapping into the basic block starting at bci
  void merge(          int target_bci);
  // Same as plain merge, except that it allocates a new path number.
  void merge_new_path( int target_bci);
  // Merge the current mapping into an exception handler.
  void merge_exception(int target_bci);
  // Helper: Merge the current mapping into the given basic block
  void merge_common(Block* target, int pnum);
  // Helper functions for merging individual cells.
  PhiNode *ensure_phi(       int idx, bool nocreate = false);
  PhiNode *ensure_memory_phi(int idx, bool nocreate = false);
  // Helper to merge the current memory state into the given basic block
  void merge_memory_edges(MergeMemNode* n, int pnum, bool nophi);

  // Parse this bytecode, and alter the Parsers JVM->Node mapping
  void do_one_bytecode();

  // helper function to generate array store check
  void array_store_check();
  // Helper function to generate array load
  void array_load(BasicType etype);
  // Helper function to generate array store
  void array_store(BasicType etype);
  // Helper function to compute array addressing
  Node* array_addressing(BasicType type, int vals, const Type* *result2=NULL);

  void rtm_deopt();

  // Pass current map to exits
  void return_current(Node* value);

  // Register finalizers on return from Object.<init>
  void call_register_finalizer();

  // Insert a compiler safepoint into the graph
  void add_safepoint();

  // Insert a compiler safepoint into the graph, if there is a back-branch.
  void maybe_add_safepoint(int target_bci) {
    if (UseLoopSafepoints && target_bci <= bci()) {
      add_safepoint();
    }
  }

  // Note:  Intrinsic generation routines may be found in library_call.cpp.

  // Helper function to setup Ideal Call nodes
  void do_call();

  // Helper function to uncommon-trap or bailout for non-compilable call-sites
  bool can_not_compile_call_site(ciMethod *dest_method, ciInstanceKlass *klass);

  // Helper function to setup for type-profile based inlining
  bool prepare_type_profile_inline(ciInstanceKlass* prof_klass, ciMethod* prof_method);

  // Helper functions for type checking bytecodes:
  void  do_checkcast();
  void  do_instanceof();

  // Helper functions for shifting & arithmetic
  void modf();
  void modd();
  void l2f();

  void do_irem();

  // implementation of _get* and _put* bytecodes
  void do_getstatic() { do_field_access(true,  false); }
  void do_getfield () { do_field_access(true,  true); }
  void do_putstatic() { do_field_access(false, false); }
  void do_putfield () { do_field_access(false, true); }

  // common code for making initial checks and forming addresses
  void do_field_access(bool is_get, bool is_field);
  bool static_field_ok_in_clinit(ciField *field, ciMethod *method);

  // common code for actually performing the load or store
  void do_get_xxx(Node* obj, ciField* field, bool is_field);
  void do_put_xxx(Node* obj, ciField* field, bool is_field);

  // loading from a constant field or the constant pool
  // returns false if push failed (non-perm field constants only, not ldcs)
  bool push_constant(ciConstant con, bool require_constant = false, bool is_autobox_cache = false, const Type* basic_type = NULL);

  // implementation of object creation bytecodes
  void emit_guard_for_new(ciInstanceKlass* klass);
  void do_new();
  void do_newarray(BasicType elemtype);
  void do_anewarray();
  void do_multianewarray();
  Node* expand_multianewarray(ciArrayKlass* array_klass, Node* *lengths, int ndimensions, int nargs);

  // implementation of jsr/ret
  void do_jsr();
  void do_ret();

  float   dynamic_branch_prediction(float &cnt, BoolTest::mask btest, Node* test);
  float   branch_prediction(float &cnt, BoolTest::mask btest, int target_bci, Node* test);
  bool    seems_never_taken(float prob) const;
  bool    path_is_suitable_for_uncommon_trap(float prob) const;
  bool    seems_stable_comparison() const;

  void    do_ifnull(BoolTest::mask btest, Node* c);
  void    do_if(BoolTest::mask btest, Node* c);
  int     repush_if_args();
  void    adjust_map_after_if(BoolTest::mask btest, Node* c, float prob,
                              Block* path, Block* other_path);
  void    sharpen_type_after_if(BoolTest::mask btest,
                                Node* con, const Type* tcon,
                                Node* val, const Type* tval);
  IfNode* jump_if_fork_int(Node* a, Node* b, BoolTest::mask mask);
  Node*   jump_if_join(Node* iffalse, Node* iftrue);
  void    jump_if_true_fork(IfNode *ifNode, int dest_bci_if_true, int prof_table_index);
  void    jump_if_false_fork(IfNode *ifNode, int dest_bci_if_false, int prof_table_index);
  void    jump_if_always_fork(int dest_bci_if_true, int prof_table_index);

  friend class SwitchRange;
  void    do_tableswitch();
  void    do_lookupswitch();
  void    jump_switch_ranges(Node* a, SwitchRange* lo, SwitchRange* hi, int depth = 0);
  bool    create_jump_tables(Node* a, SwitchRange* lo, SwitchRange* hi);

  // helper functions for methodData style profiling
  void test_counter_against_threshold(Node* cnt, int limit);
  void increment_and_test_invocation_counter(int limit);
  void test_for_osr_md_counter_at(ciMethodData* md, ciProfileData* data, ByteSize offset, int limit);
  Node* method_data_addressing(ciMethodData* md, ciProfileData* data, ByteSize offset, Node* idx = NULL, uint stride = 0);
  void increment_md_counter_at(ciMethodData* md, ciProfileData* data, ByteSize offset, Node* idx = NULL, uint stride = 0);
  void set_md_flag_at(ciMethodData* md, ciProfileData* data, int flag_constant);

  void profile_method_entry();
  void profile_taken_branch(int target_bci, bool force_update = false);
  void profile_not_taken_branch(bool force_update = false);
  void profile_call(Node* receiver);
  void profile_generic_call();
  void profile_receiver_type(Node* receiver);
  void profile_ret(int target_bci);
  void profile_null_checkcast();
  void profile_switch_case(int table_index);

  // helper function for call statistics
  void count_compiled_calls(bool at_method_entry, bool is_inline) PRODUCT_RETURN;

  Node_Notes* make_node_notes(Node_Notes* caller_nn);

  // Helper functions for handling normal and abnormal exits.
  void build_exits();

  // Fix up all exceptional control flow exiting a single bytecode.
  void do_exceptions();

  // Fix up all exiting control flow at the end of the parse.
  void do_exits();

  // Add Catch/CatchProjs
  // The call is either a Java call or the VM's rethrow stub
  void catch_call_exceptions(ciExceptionHandlerStream&);

  // Handle all exceptions thrown by the inlined method.
  // Also handles exceptions for individual bytecodes.
  void catch_inline_exceptions(SafePointNode* ex_map);

  // Merge the given map into correct exceptional exit state.
  // Assumes that there is no applicable local handler.
  void throw_to_exit(SafePointNode* ex_map);

  // Use speculative type to optimize CmpP node
  Node* optimize_cmp_with_klass(Node* c);

 public:
#ifndef PRODUCT
  // Handle PrintOpto, etc.
  void show_parse_info();
  void dump_map_adr_mem() const;
  static void print_statistics(); // Print some performance counters
  void dump();
  void dump_bci(int bci);
#endif
};

#endif // SHARE_VM_OPTO_PARSE_HPP
C:\hotspot-69087d08d473\src\share\vm/opto/parse1.cpp
/*
 * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 *
 */

#include "precompiled.hpp"
#include "compiler/compileLog.hpp"
#include "interpreter/linkResolver.hpp"
#include "oops/method.hpp"
#include "opto/addnode.hpp"
#include "opto/c2compiler.hpp"
#include "opto/idealGraphPrinter.hpp"
#include "opto/locknode.hpp"
#include "opto/memnode.hpp"
#include "opto/parse.hpp"
#include "opto/rootnode.hpp"
#include "opto/runtime.hpp"
#include "runtime/arguments.hpp"
#include "runtime/handles.inline.hpp"
#include "runtime/sharedRuntime.hpp"
#include "utilities/copy.hpp"

// Static array so we can figure out which bytecodes stop us from compiling
// the most. Some of the non-static variables are needed in bytecodeInfo.cpp
// and eventually should be encapsulated in a proper class (gri 8/18/98).

int nodes_created              = 0;
int methods_parsed             = 0;
int methods_seen               = 0;
int blocks_parsed              = 0;
int blocks_seen                = 0;

int explicit_null_checks_inserted = 0;
int explicit_null_checks_elided   = 0;
int all_null_checks_found         = 0, implicit_null_checks              = 0;
int implicit_null_throws          = 0;

int reclaim_idx  = 0;
int reclaim_in   = 0;
int reclaim_node = 0;

#ifndef PRODUCT
bool Parse::BytecodeParseHistogram::_initialized = false;
uint Parse::BytecodeParseHistogram::_bytecodes_parsed [Bytecodes::number_of_codes];
uint Parse::BytecodeParseHistogram::_nodes_constructed[Bytecodes::number_of_codes];
uint Parse::BytecodeParseHistogram::_nodes_transformed[Bytecodes::number_of_codes];
uint Parse::BytecodeParseHistogram::_new_values       [Bytecodes::number_of_codes];
#endif

//------------------------------print_statistics-------------------------------
#ifndef PRODUCT
void Parse::print_statistics() {
  tty->print_cr("--- Compiler Statistics ---");
  tty->print("Methods seen: %d  Methods parsed: %d", methods_seen, methods_parsed);
  tty->print("  Nodes created: %d", nodes_created);
  tty->cr();
  if (methods_seen != methods_parsed)
    tty->print_cr("Reasons for parse failures (NOT cumulative):");
  tty->print_cr("Blocks parsed: %d  Blocks seen: %d", blocks_parsed, blocks_seen);

  if( explicit_null_checks_inserted )
    tty->print_cr("%d original NULL checks - %d elided (%2d%%); optimizer leaves %d,", explicit_null_checks_inserted, explicit_null_checks_elided, (100*explicit_null_checks_elided)/explicit_null_checks_inserted, all_null_checks_found);
  if( all_null_checks_found )
    tty->print_cr("%d made implicit (%2d%%)", implicit_null_checks,
                  (100*implicit_null_checks)/all_null_checks_found);
  if( implicit_null_throws )
    tty->print_cr("%d implicit null exceptions at runtime",
                  implicit_null_throws);

  if( PrintParseStatistics && BytecodeParseHistogram::initialized() ) {
    BytecodeParseHistogram::print();
  }
}
#endif

//------------------------------ON STACK REPLACEMENT---------------------------

// Construct a node which can be used to get incoming state for
// on stack replacement.
Node *Parse::fetch_interpreter_state(int index,
                                     BasicType bt,
                                     Node *local_addrs,
                                     Node *local_addrs_base) {
  Node *mem = memory(Compile::AliasIdxRaw);
  Node *adr = basic_plus_adr( local_addrs_base, local_addrs, -index*wordSize );
  Node *ctl = control();

  // Very similar to LoadNode::make, except we handle un-aligned longs and
  // doubles on Sparc.  Intel can handle them just fine directly.
  Node *l = NULL;
  switch (bt) {                // Signature is flattened
  case T_INT:     l = new (C) LoadINode(ctl, mem, adr, TypeRawPtr::BOTTOM, TypeInt::INT,        MemNode::unordered); break;
  case T_FLOAT:   l = new (C) LoadFNode(ctl, mem, adr, TypeRawPtr::BOTTOM, Type::FLOAT,         MemNode::unordered); break;
  case T_ADDRESS: l = new (C) LoadPNode(ctl, mem, adr, TypeRawPtr::BOTTOM, TypeRawPtr::BOTTOM,  MemNode::unordered); break;
  case T_OBJECT:  l = new (C) LoadPNode(ctl, mem, adr, TypeRawPtr::BOTTOM, TypeInstPtr::BOTTOM, MemNode::unordered); break;
  case T_LONG:
  case T_DOUBLE: {
    // Since arguments are in reverse order, the argument address 'adr'
    // refers to the back half of the long/double.  Recompute adr.
    adr = basic_plus_adr(local_addrs_base, local_addrs, -(index+1)*wordSize);
    if (Matcher::misaligned_doubles_ok) {
      l = (bt == T_DOUBLE)
        ? (Node*)new (C) LoadDNode(ctl, mem, adr, TypeRawPtr::BOTTOM, Type::DOUBLE, MemNode::unordered)
        : (Node*)new (C) LoadLNode(ctl, mem, adr, TypeRawPtr::BOTTOM, TypeLong::LONG, MemNode::unordered);
    } else {
      l = (bt == T_DOUBLE)
        ? (Node*)new (C) LoadD_unalignedNode(ctl, mem, adr, TypeRawPtr::BOTTOM, MemNode::unordered)
        : (Node*)new (C) LoadL_unalignedNode(ctl, mem, adr, TypeRawPtr::BOTTOM, MemNode::unordered);
    }
    break;
  }
  default: ShouldNotReachHere();
  }
  return _gvn.transform(l);
}

// Helper routine to prevent the interpreter from handing
// unexpected typestate to an OSR method.
// The Node l is a value newly dug out of the interpreter frame.
// The type is the type predicted by ciTypeFlow.  Note that it is
// not a general type, but can only come from Type::get_typeflow_type.
// The safepoint is a map which will feed an uncommon trap.
Node* Parse::check_interpreter_type(Node* l, const Type* type,
                                    SafePointNode* &bad_type_exit) {

  const TypeOopPtr* tp = type->isa_oopptr();

  // TypeFlow may assert null-ness if a type appears unloaded.
  if (type == TypePtr::NULL_PTR ||
      (tp != NULL && !tp->klass()->is_loaded())) {
    // Value must be null, not a real oop.
    Node* chk = _gvn.transform( new (C) CmpPNode(l, null()) );
    Node* tst = _gvn.transform( new (C) BoolNode(chk, BoolTest::eq) );
    IfNode* iff = create_and_map_if(control(), tst, PROB_MAX, COUNT_UNKNOWN);
    set_control(_gvn.transform( new (C) IfTrueNode(iff) ));
    Node* bad_type = _gvn.transform( new (C) IfFalseNode(iff) );
    bad_type_exit->control()->add_req(bad_type);
    l = null();
  }

  // Typeflow can also cut off paths from the CFG, based on
  // types which appear unloaded, or call sites which appear unlinked.
  // When paths are cut off, values at later merge points can rise
  // toward more specific classes.  Make sure these specific classes
  // are still in effect.
  if (tp != NULL && tp->klass() != C->env()->Object_klass()) {
    // TypeFlow asserted a specific object type.  Value must have that type.
    Node* bad_type_ctrl = NULL;
    l = gen_checkcast(l, makecon(TypeKlassPtr::make(tp->klass())), &bad_type_ctrl);
    bad_type_exit->control()->add_req(bad_type_ctrl);
  }

  BasicType bt_l = _gvn.type(l)->basic_type();
  BasicType bt_t = type->basic_type();
  assert(_gvn.type(l)->higher_equal(type), "must constrain OSR typestate");
  return l;
}

// Helper routine which sets up elements of the initial parser map when
// performing a parse for on stack replacement.  Add values into map.
// The only parameter contains the address of a interpreter arguments.
void Parse::load_interpreter_state(Node* osr_buf) {
  int index;
  int max_locals = jvms()->loc_size();
  int max_stack  = jvms()->stk_size();


  // Mismatch between method and jvms can occur since map briefly held
  // an OSR entry state (which takes up one RawPtr word).
  assert(max_locals == method()->max_locals(), "sanity");
  assert(max_stack  >= method()->max_stack(),  "sanity");
  assert((int)jvms()->endoff() == TypeFunc::Parms + max_locals + max_stack, "sanity");
  assert((int)jvms()->endoff() == (int)map()->req(), "sanity");

  // Find the start block.
  Block* osr_block = start_block();
  assert(osr_block->start() == osr_bci(), "sanity");

  // Set initial BCI.
  set_parse_bci(osr_block->start());

  // Set initial stack depth.
  set_sp(osr_block->start_sp());

  // Check bailouts.  We currently do not perform on stack replacement
  // of loops in catch blocks or loops which branch with a non-empty stack.
  if (sp() != 0) {
    C->record_method_not_compilable("OSR starts with non-empty stack");
    return;
  }
  // Do not OSR inside finally clauses:
  if (osr_block->has_trap_at(osr_block->start())) {
    C->record_method_not_compilable("OSR starts with an immediate trap");
    return;
  }

  // Commute monitors from interpreter frame to compiler frame.
  assert(jvms()->monitor_depth() == 0, "should be no active locks at beginning of osr");
  int mcnt = osr_block->flow()->monitor_count();
  Node *monitors_addr = basic_plus_adr(osr_buf, osr_buf, (max_locals+mcnt*2-1)*wordSize);
  for (index = 0; index < mcnt; index++) {
    // Make a BoxLockNode for the monitor.
    Node *box = _gvn.transform(new (C) BoxLockNode(next_monitor()));


    // Displaced headers and locked objects are interleaved in the
    // temp OSR buffer.  We only copy the locked objects out here.
    // Fetch the locked object from the OSR temp buffer and copy to our fastlock node.
    Node *lock_object = fetch_interpreter_state(index*2, T_OBJECT, monitors_addr, osr_buf);
    // Try and copy the displaced header to the BoxNode
    Node *displaced_hdr = fetch_interpreter_state((index*2) + 1, T_ADDRESS, monitors_addr, osr_buf);


    store_to_memory(control(), box, displaced_hdr, T_ADDRESS, Compile::AliasIdxRaw, MemNode::unordered);

    // Build a bogus FastLockNode (no code will be generated) and push the
    // monitor into our debug info.
    const FastLockNode *flock = _gvn.transform(new (C) FastLockNode( 0, lock_object, box ))->as_FastLock();
    map()->push_monitor(flock);

    // If the lock is our method synchronization lock, tuck it away in
    // _sync_lock for return and rethrow exit paths.
    if (index == 0 && method()->is_synchronized()) {
      _synch_lock = flock;
    }
  }

  // Use the raw liveness computation to make sure that unexpected
  // values don't propagate into the OSR frame.
  MethodLivenessResult live_locals = method()->liveness_at_bci(osr_bci());
  if (!live_locals.is_valid()) {
    // Degenerate or breakpointed method.
    C->record_method_not_compilable("OSR in empty or breakpointed method");
    return;
  }

  // Extract the needed locals from the interpreter frame.
  Node *locals_addr = basic_plus_adr(osr_buf, osr_buf, (max_locals-1)*wordSize);

  // find all the locals that the interpreter thinks contain live oops
  const BitMap live_oops = method()->live_local_oops_at_bci(osr_bci());
  for (index = 0; index < max_locals; index++) {

    if (!live_locals.at(index)) {
      continue;
    }

    const Type *type = osr_block->local_type_at(index);

    if (type->isa_oopptr() != NULL) {

      // 6403625: Verify that the interpreter oopMap thinks that the oop is live
      // else we might load a stale oop if the MethodLiveness disagrees with the
      // result of the interpreter. If the interpreter says it is dead we agree
      // by making the value go to top.
      //

      if (!live_oops.at(index)) {
        if (C->log() != NULL) {
          C->log()->elem("OSR_mismatch local_index='%d'",index);
        }
        set_local(index, null());
        // and ignore it for the loads
        continue;
      }
    }

    // Filter out TOP, HALF, and BOTTOM.  (Cf. ensure_phi.)
    if (type == Type::TOP || type == Type::HALF) {
      continue;
    }
    // If the type falls to bottom, then this must be a local that
    // is mixing ints and oops or some such.  Forcing it to top
    // makes it go dead.
    if (type == Type::BOTTOM) {
      continue;
    }
    // Construct code to access the appropriate local.
    BasicType bt = type->basic_type();
    if (type == TypePtr::NULL_PTR) {
      // Ptr types are mixed together with T_ADDRESS but NULL is
      // really for T_OBJECT types so correct it.
      bt = T_OBJECT;
    }
    Node *value = fetch_interpreter_state(index, bt, locals_addr, osr_buf);
    set_local(index, value);
  }

  // Extract the needed stack entries from the interpreter frame.
  for (index = 0; index < sp(); index++) {
    const Type *type = osr_block->stack_type_at(index);
    if (type != Type::TOP) {
      // Currently the compiler bails out when attempting to on stack replace
      // at a bci with a non-empty stack.  We should not reach here.
      ShouldNotReachHere();
    }
  }

  // End the OSR migration
  make_runtime_call(RC_LEAF, OptoRuntime::osr_end_Type(),
                    CAST_FROM_FN_PTR(address, SharedRuntime::OSR_migration_end),
                    "OSR_migration_end", TypeRawPtr::BOTTOM,
                    osr_buf);

  // Now that the interpreter state is loaded, make sure it will match
  // at execution time what the compiler is expecting now:
  SafePointNode* bad_type_exit = clone_map();
  bad_type_exit->set_control(new (C) RegionNode(1));

  assert(osr_block->flow()->jsrs()->size() == 0, "should be no jsrs live at osr point");
  for (index = 0; index < max_locals; index++) {
    if (stopped())  break;
    Node* l = local(index);
    if (l->is_top())  continue;  // nothing here
    const Type *type = osr_block->local_type_at(index);
    if (type->isa_oopptr() != NULL) {
      if (!live_oops.at(index)) {
        // skip type check for dead oops
        continue;
      }
    }
    if (osr_block->flow()->local_type_at(index)->is_return_address()) {
      // In our current system it's illegal for jsr addresses to be
      // live into an OSR entry point because the compiler performs
      // inlining of jsrs.  ciTypeFlow has a bailout that detect this
      // case and aborts the compile if addresses are live into an OSR
      // entry point.  Because of that we can assume that any address
      // locals at the OSR entry point are dead.  Method liveness
      // isn't precise enought to figure out that they are dead in all
      // cases so simply skip checking address locals all
      // together. Any type check is guaranteed to fail since the
      // interpreter type is the result of a load which might have any
      // value and the expected type is a constant.
      continue;
    }
    set_local(index, check_interpreter_type(l, type, bad_type_exit));
  }

  for (index = 0; index < sp(); index++) {
    if (stopped())  break;
    Node* l = stack(index);
    if (l->is_top())  continue;  // nothing here
    const Type *type = osr_block->stack_type_at(index);
    set_stack(index, check_interpreter_type(l, type, bad_type_exit));
  }

  if (bad_type_exit->control()->req() > 1) {
    // Build an uncommon trap here, if any inputs can be unexpected.
    bad_type_exit->set_control(_gvn.transform( bad_type_exit->control() ));
    record_for_igvn(bad_type_exit->control());
    SafePointNode* types_are_good = map();
    set_map(bad_type_exit);
    // The unexpected type happens because a new edge is active
    // in the CFG, which typeflow had previously ignored.
    // E.g., Object x = coldAtFirst() && notReached()? "str": new Integer(123).
    // This x will be typed as Integer if notReached is not yet linked.
    // It could also happen due to a problem in ciTypeFlow analysis.
    uncommon_trap(Deoptimization::Reason_constraint,
                  Deoptimization::Action_reinterpret);
    set_map(types_are_good);
  }
}

//------------------------------Parse------------------------------------------
// Main parser constructor.
Parse::Parse(JVMState* caller, ciMethod* parse_method, float expected_uses)
  : _exits(caller)
{
  // Init some variables
  _caller = caller;
  _method = parse_method;
  _expected_uses = expected_uses;
  _depth = 1 + (caller->has_method() ? caller->depth() : 0);
  _wrote_final = false;
  _wrote_volatile = false;
  _alloc_with_final = NULL;
  _entry_bci = InvocationEntryBci;
  _tf = NULL;
  _block = NULL;
  _first_return = true;
  _replaced_nodes_for_exceptions = false;
  _new_idx = C->unique();
  debug_only(_block_count = -1);
  debug_only(_blocks = (Block*)-1);
#ifndef PRODUCT
  if (PrintCompilation || PrintOpto) {
    // Make sure I have an inline tree, so I can print messages about it.
    JVMState* ilt_caller = is_osr_parse() ? caller->caller() : caller;
    InlineTree::find_subtree_from_root(C->ilt(), ilt_caller, parse_method);
  }
  _max_switch_depth = 0;
  _est_switch_depth = 0;
#endif

  _tf = TypeFunc::make(method());
  _iter.reset_to_method(method());
  _flow = method()->get_flow_analysis();
  if (_flow->failing()) {
    C->record_method_not_compilable_all_tiers(_flow->failure_reason());
  }

#ifndef PRODUCT
  if (_flow->has_irreducible_entry()) {
    C->set_parsed_irreducible_loop(true);
  }
#endif

  if (_expected_uses <= 0) {
    _prof_factor = 1;
  } else {
    float prof_total = parse_method->interpreter_invocation_count();
    if (prof_total <= _expected_uses) {
      _prof_factor = 1;
    } else {
      _prof_factor = _expected_uses / prof_total;
    }
  }

  CompileLog* log = C->log();
  if (log != NULL) {
    log->begin_head("parse method='%d' uses='%g'",
                    log->identify(parse_method), expected_uses);
    if (depth() == 1 && C->is_osr_compilation()) {
      log->print(" osr_bci='%d'", C->entry_bci());
    }
    log->stamp();
    log->end_head();
  }

  // Accumulate deoptimization counts.
  // (The range_check and store_check counts are checked elsewhere.)
  ciMethodData* md = method()->method_data();
  for (uint reason = 0; reason < md->trap_reason_limit(); reason++) {
    uint md_count = md->trap_count(reason);
    if (md_count != 0) {
      if (md_count == md->trap_count_limit())
        md_count += md->overflow_trap_count();
      uint total_count = C->trap_count(reason);
      uint old_count   = total_count;
      total_count += md_count;
      // Saturate the add if it overflows.
      if (total_count < old_count || total_count < md_count)
        total_count = (uint)-1;
      C->set_trap_count(reason, total_count);
      if (log != NULL)
        log->elem("observe trap='%s' count='%d' total='%d'",
                  Deoptimization::trap_reason_name(reason),
                  md_count, total_count);
    }
  }
  // Accumulate total sum of decompilations, also.
  C->set_decompile_count(C->decompile_count() + md->decompile_count());

  _count_invocations = C->do_count_invocations();
  _method_data_update = C->do_method_data_update();

  if (log != NULL && method()->has_exception_handlers()) {
    log->elem("observe that='has_exception_handlers'");
  }

  assert(method()->can_be_compiled(),       "Can not parse this method, cutout earlier");
  assert(method()->has_balanced_monitors(), "Can not parse unbalanced monitors, cutout earlier");

  // Always register dependence if JVMTI is enabled, because
  // either breakpoint setting or hotswapping of methods may
  // cause deoptimization.
  if (C->env()->jvmti_can_hotswap_or_post_breakpoint()) {
    C->dependencies()->assert_evol_method(method());
  }

  methods_seen++;

  // Do some special top-level things.
  if (depth() == 1 && C->is_osr_compilation()) {
    _entry_bci = C->entry_bci();
    _flow = method()->get_osr_flow_analysis(osr_bci());
    if (_flow->failing()) {
      C->record_method_not_compilable(_flow->failure_reason());
#ifndef PRODUCT
      if (PrintOpto && (Verbose || WizardMode)) {
        tty->print_cr("OSR @%d type flow bailout: %s", _entry_bci, _flow->failure_reason());
        if (Verbose) {
          method()->print();
          method()->print_codes();
          _flow->print();
        }
      }
#endif
    }
    _tf = C->tf();     // the OSR entry type is different
  }

#ifdef ASSERT
  if (depth() == 1) {
    assert(C->is_osr_compilation() == this->is_osr_parse(), "OSR in sync");
    if (C->tf() != tf()) {
      MutexLockerEx ml(Compile_lock, Mutex::_no_safepoint_check_flag);
      assert(C->env()->system_dictionary_modification_counter_changed(),
             "Must invalidate if TypeFuncs differ");
    }
  } else {
    assert(!this->is_osr_parse(), "no recursive OSR");
  }
#endif

  methods_parsed++;
#ifndef PRODUCT
  // add method size here to guarantee that inlined methods are added too
  if (TimeCompiler)
    _total_bytes_compiled += method()->code_size();

  show_parse_info();
#endif

  if (failing()) {
    if (log)  log->done("parse");
    return;
  }

  gvn().set_type(root(), root()->bottom_type());
  gvn().transform(top());

  // Import the results of the ciTypeFlow.
  init_blocks();

  // Merge point for all normal exits
  build_exits();

  // Setup the initial JVM state map.
  SafePointNode* entry_map = create_entry_map();

  // Check for bailouts during map initialization
  if (failing() || entry_map == NULL) {
    if (log)  log->done("parse");
    return;
  }

  Node_Notes* caller_nn = C->default_node_notes();
  // Collect debug info for inlined calls unless -XX:-DebugInlinedCalls.
  if (DebugInlinedCalls || depth() == 1) {
    C->set_default_node_notes(make_node_notes(caller_nn));
  }

  if (is_osr_parse()) {
    Node* osr_buf = entry_map->in(TypeFunc::Parms+0);
    entry_map->set_req(TypeFunc::Parms+0, top());
    set_map(entry_map);
    load_interpreter_state(osr_buf);
  } else {
    set_map(entry_map);
    do_method_entry();
  }

  if (depth() == 1 && !failing()) {
    // Add check to deoptimize the nmethod if RTM state was changed
    rtm_deopt();
  }

  // Check for bailouts during method entry or RTM state check setup.
  if (failing()) {
    if (log)  log->done("parse");
    C->set_default_node_notes(caller_nn);
    return;
  }

  entry_map = map();  // capture any changes performed by method setup code
  assert(jvms()->endoff() == map()->req(), "map matches JVMS layout");

  // We begin parsing as if we have just encountered a jump to the
  // method entry.
  Block* entry_block = start_block();
  assert(entry_block->start() == (is_osr_parse() ? osr_bci() : 0), "");
  set_map_clone(entry_map);
  merge_common(entry_block, entry_block->next_path_num());

#ifndef PRODUCT
  BytecodeParseHistogram *parse_histogram_obj = new (C->env()->arena()) BytecodeParseHistogram(this, C);
  set_parse_histogram( parse_histogram_obj );
#endif

  // Parse all the basic blocks.
  do_all_blocks();

  C->set_default_node_notes(caller_nn);

  // Check for bailouts during conversion to graph
  if (failing()) {
    if (log)  log->done("parse");
    return;
  }

  // Fix up all exiting control flow.
  set_map(entry_map);
  do_exits();

  if (log)  log->done("parse nodes='%d' live='%d' memory='" SIZE_FORMAT "'",
                      C->unique(), C->live_nodes(), C->node_arena()->used());
}

//---------------------------do_all_blocks-------------------------------------
void Parse::do_all_blocks() {
  bool has_irreducible = flow()->has_irreducible_entry();

  // Walk over all blocks in Reverse Post-Order.
  while (true) {
    bool progress = false;
    for (int rpo = 0; rpo < block_count(); rpo++) {
      Block* block = rpo_at(rpo);

      if (block->is_parsed()) continue;

      if (!block->is_merged()) {
        // Dead block, no state reaches this block
        continue;
      }

      // Prepare to parse this block.
      load_state_from(block);

      if (stopped()) {
        // Block is dead.
        continue;
      }

      blocks_parsed++;

      progress = true;
      if (block->is_loop_head() || block->is_handler() || has_irreducible && !block->is_ready()) {
        // Not all preds have been parsed.  We must build phis everywhere.
        // (Note that dead locals do not get phis built, ever.)
        ensure_phis_everywhere();

        if (block->is_SEL_head() &&
            (UseLoopPredicate || LoopLimitCheck)) {
          // Add predicate to single entry (not irreducible) loop head.
          assert(!block->has_merged_backedge(), "only entry paths should be merged for now");
          // Need correct bci for predicate.
          // It is fine to set it here since do_one_block() will set it anyway.
          set_parse_bci(block->start());
          add_predicate();
          // Add new region for back branches.
          int edges = block->pred_count() - block->preds_parsed() + 1; // +1 for original region
          RegionNode *r = new (C) RegionNode(edges+1);
          _gvn.set_type(r, Type::CONTROL);
          record_for_igvn(r);
          r->init_req(edges, control());
          set_control(r);
          // Add new phis.
          ensure_phis_everywhere();
        }

        // Leave behind an undisturbed copy of the map, for future merges.
        set_map(clone_map());
      }

      if (control()->is_Region() && !block->is_loop_head() && !has_irreducible && !block->is_handler()) {
        // In the absence of irreducible loops, the Region and Phis
        // associated with a merge that doesn't involve a backedge can
        // be simplified now since the RPO parsing order guarantees
        // that any path which was supposed to reach here has already
        // been parsed or must be dead.
        Node* c = control();
        Node* result = _gvn.transform_no_reclaim(control());
        if (c != result && TraceOptoParse) {
          tty->print_cr("Block #%d replace %d with %d", block->rpo(), c->_idx, result->_idx);
        }
        if (result != top()) {
          record_for_igvn(result);
        }
      }

      // Parse the block.
      do_one_block();

      // Check for bailouts.
      if (failing())  return;
    }

    // with irreducible loops multiple passes might be necessary to parse everything
    if (!has_irreducible || !progress) {
      break;
    }
  }

  blocks_seen += block_count();

#ifndef PRODUCT
  // Make sure there are no half-processed blocks remaining.
  // Every remaining unprocessed block is dead and may be ignored now.
  for (int rpo = 0; rpo < block_count(); rpo++) {
    Block* block = rpo_at(rpo);
    if (!block->is_parsed()) {
      if (TraceOptoParse) {
        tty->print_cr("Skipped dead block %d at bci:%d", rpo, block->start());
      }
      assert(!block->is_merged(), "no half-processed blocks");
    }
  }
#endif
}

static Node* mask_int_value(Node* v, BasicType bt, PhaseGVN* gvn) {
  Compile* C = gvn->C;
  switch (bt) {
  case T_BYTE:
    v = gvn->transform(new (C) LShiftINode(v, gvn->intcon(24)));
    v = gvn->transform(new (C) RShiftINode(v, gvn->intcon(24)));
    break;
  case T_SHORT:
    v = gvn->transform(new (C) LShiftINode(v, gvn->intcon(16)));
    v = gvn->transform(new (C) RShiftINode(v, gvn->intcon(16)));
    break;
  case T_CHAR:
    v = gvn->transform(new (C) AndINode(v, gvn->intcon(0xFFFF)));
    break;
  case T_BOOLEAN:
    v = gvn->transform(new (C) AndINode(v, gvn->intcon(0x1)));
    break;
  }
  return v;
}

//-------------------------------build_exits----------------------------------
// Build normal and exceptional exit merge points.
void Parse::build_exits() {
  // make a clone of caller to prevent sharing of side-effects
  _exits.set_map(_exits.clone_map());
  _exits.clean_stack(_exits.sp());
  _exits.sync_jvms();

  RegionNode* region = new (C) RegionNode(1);
  record_for_igvn(region);
  gvn().set_type_bottom(region);
  _exits.set_control(region);

  // Note:  iophi and memphi are not transformed until do_exits.
  Node* iophi  = new (C) PhiNode(region, Type::ABIO);
  Node* memphi = new (C) PhiNode(region, Type::MEMORY, TypePtr::BOTTOM);
  gvn().set_type_bottom(iophi);
  gvn().set_type_bottom(memphi);
  _exits.set_i_o(iophi);
  _exits.set_all_memory(memphi);

  // Add a return value to the exit state.  (Do not push it yet.)
  if (tf()->range()->cnt() > TypeFunc::Parms) {
    const Type* ret_type = tf()->range()->field_at(TypeFunc::Parms);
    if (ret_type->isa_int()) {
      BasicType ret_bt = method()->return_type()->basic_type();
      if (ret_bt == T_BOOLEAN ||
          ret_bt == T_CHAR ||
          ret_bt == T_BYTE ||
          ret_bt == T_SHORT) {
        ret_type = TypeInt::INT;
      }
    }

    // Don't "bind" an unloaded return klass to the ret_phi. If the klass
    // becomes loaded during the subsequent parsing, the loaded and unloaded
    // types will not join when we transform and push in do_exits().
    const TypeOopPtr* ret_oop_type = ret_type->isa_oopptr();
    if (ret_oop_type && !ret_oop_type->klass()->is_loaded()) {
      ret_type = TypeOopPtr::BOTTOM;
    }
    int         ret_size = type2size[ret_type->basic_type()];
    Node*       ret_phi  = new (C) PhiNode(region, ret_type);
    gvn().set_type_bottom(ret_phi);
    _exits.ensure_stack(ret_size);
    assert((int)(tf()->range()->cnt() - TypeFunc::Parms) == ret_size, "good tf range");
    assert(method()->return_type()->size() == ret_size, "tf agrees w/ method");
    _exits.set_argument(0, ret_phi);  // here is where the parser finds it
    // Note:  ret_phi is not yet pushed, until do_exits.
  }
}


//----------------------------build_start_state-------------------------------
// Construct a state which contains only the incoming arguments from an
// unknown caller.  The method & bci will be NULL & InvocationEntryBci.
JVMState* Compile::build_start_state(StartNode* start, const TypeFunc* tf) {
  int        arg_size = tf->domain()->cnt();
  int        max_size = MAX2(arg_size, (int)tf->range()->cnt());
  JVMState*  jvms     = new (this) JVMState(max_size - TypeFunc::Parms);
  SafePointNode* map  = new (this) SafePointNode(max_size, NULL);
  record_for_igvn(map);
  assert(arg_size == TypeFunc::Parms + (is_osr_compilation() ? 1 : method()->arg_size()), "correct arg_size");
  Node_Notes* old_nn = default_node_notes();
  if (old_nn != NULL && has_method()) {
    Node_Notes* entry_nn = old_nn->clone(this);
    JVMState* entry_jvms = new(this) JVMState(method(), old_nn->jvms());
    entry_jvms->set_offsets(0);
    entry_jvms->set_bci(entry_bci());
    entry_nn->set_jvms(entry_jvms);
    set_default_node_notes(entry_nn);
  }
  uint i;
  for (i = 0; i < (uint)arg_size; i++) {
    Node* parm = initial_gvn()->transform(new (this) ParmNode(start, i));
    map->init_req(i, parm);
    // Record all these guys for later GVN.
    record_for_igvn(parm);
  }
  for (; i < map->req(); i++) {
    map->init_req(i, top());
  }
  assert(jvms->argoff() == TypeFunc::Parms, "parser gets arguments here");
  set_default_node_notes(old_nn);
  map->set_jvms(jvms);
  jvms->set_map(map);
  return jvms;
}

//-----------------------------make_node_notes---------------------------------
Node_Notes* Parse::make_node_notes(Node_Notes* caller_nn) {
  if (caller_nn == NULL)  return NULL;
  Node_Notes* nn = caller_nn->clone(C);
  JVMState* caller_jvms = nn->jvms();
  JVMState* jvms = new (C) JVMState(method(), caller_jvms);
  jvms->set_offsets(0);
  jvms->set_bci(_entry_bci);
  nn->set_jvms(jvms);
  return nn;
}


//--------------------------return_values--------------------------------------
void Compile::return_values(JVMState* jvms) {
  GraphKit kit(jvms);
  Node* ret = new (this) ReturnNode(TypeFunc::Parms,
                             kit.control(),
                             kit.i_o(),
                             kit.reset_memory(),
                             kit.frameptr(),
                             kit.returnadr());
  // Add zero or 1 return values
  int ret_size = tf()->range()->cnt() - TypeFunc::Parms;
  if (ret_size > 0) {
    kit.inc_sp(-ret_size);  // pop the return value(s)
    kit.sync_jvms();
    ret->add_req(kit.argument(0));
    // Note:  The second dummy edge is not needed by a ReturnNode.
  }
  // bind it to root
  root()->add_req(ret);
  record_for_igvn(ret);
  initial_gvn()->transform_no_reclaim(ret);
}

//------------------------rethrow_exceptions-----------------------------------
// Bind all exception states in the list into a single RethrowNode.
void Compile::rethrow_exceptions(JVMState* jvms) {
  GraphKit kit(jvms);
  if (!kit.has_exceptions())  return;  // nothing to generate
  // Load my combined exception state into the kit, with all phis transformed:
  SafePointNode* ex_map = kit.combine_and_pop_all_exception_states();
  Node* ex_oop = kit.use_exception_state(ex_map);
  RethrowNode* exit = new (this) RethrowNode(kit.control(),
                                      kit.i_o(), kit.reset_memory(),
                                      kit.frameptr(), kit.returnadr(),
                                      // like a return but with exception input
                                      ex_oop);
  // bind to root
  root()->add_req(exit);
  record_for_igvn(exit);
  initial_gvn()->transform_no_reclaim(exit);
}

//---------------------------do_exceptions-------------------------------------
// Process exceptions arising from the current bytecode.
// Send caught exceptions to the proper handler within this method.
// Unhandled exceptions feed into _exit.
void Parse::do_exceptions() {
  if (!has_exceptions())  return;

  if (failing()) {
    // Pop them all off and throw them away.
    while (pop_exception_state() != NULL) ;
    return;
  }

  PreserveJVMState pjvms(this, false);

  SafePointNode* ex_map;
  while ((ex_map = pop_exception_state()) != NULL) {
    if (!method()->has_exception_handlers()) {
      // Common case:  Transfer control outward.
      // Doing it this early allows the exceptions to common up
      // even between adjacent method calls.
      throw_to_exit(ex_map);
    } else {
      // Have to look at the exception first.
      assert(stopped(), "catch_inline_exceptions trashes the map");
      catch_inline_exceptions(ex_map);
      stop_and_kill_map();      // we used up this exception state; kill it
    }
  }

  // We now return to our regularly scheduled program:
}

//---------------------------throw_to_exit-------------------------------------
// Merge the given map into an exception exit from this method.
// The exception exit will handle any unlocking of receiver.
// The ex_oop must be saved within the ex_map, unlike merge_exception.
void Parse::throw_to_exit(SafePointNode* ex_map) {
  // Pop the JVMS to (a copy of) the caller.
  GraphKit caller;
  caller.set_map_clone(_caller->map());
  caller.set_bci(_caller->bci());
  caller.set_sp(_caller->sp());
  // Copy out the standard machine state:
  for (uint i = 0; i < TypeFunc::Parms; i++) {
    caller.map()->set_req(i, ex_map->in(i));
  }
  if (ex_map->has_replaced_nodes()) {
    _replaced_nodes_for_exceptions = true;
  }
  caller.map()->transfer_replaced_nodes_from(ex_map, _new_idx);
  // ...and the exception:
  Node*          ex_oop        = saved_ex_oop(ex_map);
  SafePointNode* caller_ex_map = caller.make_exception_state(ex_oop);
  // Finally, collect the new exception state in my exits:
  _exits.add_exception_state(caller_ex_map);
}

//------------------------------do_exits---------------------------------------
void Parse::do_exits() {
  set_parse_bci(InvocationEntryBci);

  // Now peephole on the return bits
  Node* region = _exits.control();
  _exits.set_control(gvn().transform(region));

  Node* iophi = _exits.i_o();
  _exits.set_i_o(gvn().transform(iophi));

  // On PPC64, also add MemBarRelease for constructors which write
  // volatile fields. As support_IRIW_for_not_multiple_copy_atomic_cpu
  // is set on PPC64, no sync instruction is issued after volatile
  // stores. We want to quarantee the same behaviour as on platforms
  // with total store order, although this is not required by the Java
  // memory model. So as with finals, we add a barrier here.
  if (wrote_final() PPC64_ONLY(|| (wrote_volatile() && method()->is_initializer()))) {
    // This method (which must be a constructor by the rules of Java)
    // wrote a final.  The effects of all initializations must be
    // committed to memory before any code after the constructor
    // publishes the reference to the newly constructor object.
    // Rather than wait for the publication, we simply block the
    // writes here.  Rather than put a barrier on only those writes
    // which are required to complete, we force all writes to complete.
    //
    // "All bets are off" unless the first publication occurs after a
    // normal return from the constructor.  We do not attempt to detect
    // such unusual early publications.  But no barrier is needed on
    // exceptional returns, since they cannot publish normally.
    //
    _exits.insert_mem_bar(Op_MemBarRelease, alloc_with_final());
#ifndef PRODUCT
    if (PrintOpto && (Verbose || WizardMode)) {
      method()->print_name();
      tty->print_cr(" writes finals and needs a memory barrier");
    }
#endif
  }

  for (MergeMemStream mms(_exits.merged_memory()); mms.next_non_empty(); ) {
    // transform each slice of the original memphi:
    mms.set_memory(_gvn.transform(mms.memory()));
  }

  if (tf()->range()->cnt() > TypeFunc::Parms) {
    const Type* ret_type = tf()->range()->field_at(TypeFunc::Parms);
    Node*       ret_phi  = _gvn.transform( _exits.argument(0) );
    if (!_exits.control()->is_top() && _gvn.type(ret_phi)->empty()) {
      // In case of concurrent class loading, the type we set for the
      // ret_phi in build_exits() may have been too optimistic and the
      // ret_phi may be top now.
      // Otherwise, we've encountered an error and have to mark the method as
      // not compilable. Just using an assertion instead would be dangerous
      // as this could lead to an infinite compile loop in non-debug builds.
      {
        MutexLockerEx ml(Compile_lock, Mutex::_no_safepoint_check_flag);
        if (C->env()->system_dictionary_modification_counter_changed()) {
          C->record_failure(C2Compiler::retry_class_loading_during_parsing());
        } else {
          C->record_method_not_compilable("Can't determine return type.");
        }
      }
      return;
    }
    if (ret_type->isa_int()) {
      BasicType ret_bt = method()->return_type()->basic_type();
      ret_phi = mask_int_value(ret_phi, ret_bt, &_gvn);
    }
    _exits.push_node(ret_type->basic_type(), ret_phi);
  }

  // Note:  Logic for creating and optimizing the ReturnNode is in Compile.

  // Unlock along the exceptional paths.
  // This is done late so that we can common up equivalent exceptions
  // (e.g., null checks) arising from multiple points within this method.
  // See GraphKit::add_exception_state, which performs the commoning.
  bool do_synch = method()->is_synchronized() && GenerateSynchronizationCode;

  // record exit from a method if compiled while Dtrace is turned on.
  if (do_synch || C->env()->dtrace_method_probes() || _replaced_nodes_for_exceptions) {
    // First move the exception list out of _exits:
    GraphKit kit(_exits.transfer_exceptions_into_jvms());
    SafePointNode* normal_map = kit.map();  // keep this guy safe
    // Now re-collect the exceptions into _exits:
    SafePointNode* ex_map;
    while ((ex_map = kit.pop_exception_state()) != NULL) {
      Node* ex_oop = kit.use_exception_state(ex_map);
      // Force the exiting JVM state to have this method at InvocationEntryBci.
      // The exiting JVM state is otherwise a copy of the calling JVMS.
      JVMState* caller = kit.jvms();
      JVMState* ex_jvms = caller->clone_shallow(C);
      ex_jvms->set_map(kit.clone_map());
      ex_jvms->map()->set_jvms(ex_jvms);
      ex_jvms->set_bci(   InvocationEntryBci);
      kit.set_jvms(ex_jvms);
      if (do_synch) {
        // Add on the synchronized-method box/object combo
        kit.map()->push_monitor(_synch_lock);
        // Unlock!
        kit.shared_unlock(_synch_lock->box_node(), _synch_lock->obj_node());
      }
      if (C->env()->dtrace_method_probes()) {
        kit.make_dtrace_method_exit(method());
      }
      if (_replaced_nodes_for_exceptions) {
        kit.map()->apply_replaced_nodes(_new_idx);
      }
      // Done with exception-path processing.
      ex_map = kit.make_exception_state(ex_oop);
      assert(ex_jvms->same_calls_as(ex_map->jvms()), "sanity");
      // Pop the last vestige of this method:
      ex_map->set_jvms(caller->clone_shallow(C));
      ex_map->jvms()->set_map(ex_map);
      _exits.push_exception_state(ex_map);
    }
    assert(_exits.map() == normal_map, "keep the same return state");
  }

  {
    // Capture very early exceptions (receiver null checks) from caller JVMS
    GraphKit caller(_caller);
    SafePointNode* ex_map;
    while ((ex_map = caller.pop_exception_state()) != NULL) {
      _exits.add_exception_state(ex_map);
    }
  }
  _exits.map()->apply_replaced_nodes(_new_idx);
}

//-----------------------------create_entry_map-------------------------------
// Initialize our parser map to contain the types at method entry.
// For OSR, the map contains a single RawPtr parameter.
// Initial monitor locking for sync. methods is performed by do_method_entry.
SafePointNode* Parse::create_entry_map() {
  // Check for really stupid bail-out cases.
  uint len = TypeFunc::Parms + method()->max_locals() + method()->max_stack();
  if (len >= 32760) {
    C->record_method_not_compilable_all_tiers("too many local variables");
    return NULL;
  }

  // clear current replaced nodes that are of no use from here on (map was cloned in build_exits).
  _caller->map()->delete_replaced_nodes();

  // If this is an inlined method, we may have to do a receiver null check.
  if (_caller->has_method() && is_normal_parse() && !method()->is_static()) {
    GraphKit kit(_caller);
    kit.null_check_receiver_before_call(method());
    _caller = kit.transfer_exceptions_into_jvms();
    if (kit.stopped()) {
      _exits.add_exception_states_from(_caller);
      _exits.set_jvms(_caller);
      return NULL;
    }
  }

  assert(method() != NULL, "parser must have a method");

  // Create an initial safepoint to hold JVM state during parsing
  JVMState* jvms = new (C) JVMState(method(), _caller->has_method() ? _caller : NULL);
  set_map(new (C) SafePointNode(len, jvms));
  jvms->set_map(map());
  record_for_igvn(map());
  assert(jvms->endoff() == len, "correct jvms sizing");

  SafePointNode* inmap = _caller->map();
  assert(inmap != NULL, "must have inmap");
  // In case of null check on receiver above
  map()->transfer_replaced_nodes_from(inmap, _new_idx);

  uint i;

  // Pass thru the predefined input parameters.
  for (i = 0; i < TypeFunc::Parms; i++) {
    map()->init_req(i, inmap->in(i));
  }