ssssssssss66


          // Insure high score for immediate-use spill copies so they get a color
          if( n->is_SpillCopy()
              && lrgs(r).is_singledef()        // MultiDef live range can still split
              && n->outcnt() == 1              // and use must be in this block
              && _cfg.get_block_for_node(n->unique_out()) == block) {
            // All single-use MachSpillCopy(s) that immediately precede their
            // use must color early.  If a longer live range steals their
            // color, the spill copy will split and may push another spill copy
            // further away resulting in an infinite spill-split-retry cycle.
            // Assigning a zero area results in a high score() and a good
            // location in the simplify list.
            //

            Node *single_use = n->unique_out();
            assert(block->find_node(single_use) >= j, "Use must be later in block");
            // Use can be earlier in block if it is a Phi, but then I should be a MultiDef

            // Find first non SpillCopy 'm' that follows the current instruction
            // (j - 1) is index for current instruction 'n'
            Node *m = n;
            for (uint i = j; i <= last_inst && m->is_SpillCopy(); ++i) {
              m = block->get_node(i);
            }
            if (m == single_use) {
              lrgs(r)._area = 0.0;
            }
          }

          // Remove from live-out set
          if( liveout.remove(r) ) {
            // Adjust register pressure.
            // Capture last hi-to-lo pressure transition
            lower_pressure(&lrgs(r), j - 1, block, pressure, hrp_index);
            assert( pressure[0] == count_int_pressure  (&liveout), "" );
            assert( pressure[1] == count_float_pressure(&liveout), "" );
          }

          // Copies do not define a new value and so do not interfere.
          // Remove the copies source from the liveout set before interfering.
          uint idx = n->is_Copy();
          if (idx) {
            uint x = _lrg_map.live_range_id(n->in(idx));
            if (liveout.remove(x)) {
              lrgs(x)._area -= cost;
              // Adjust register pressure.
              lower_pressure(&lrgs(x), j - 1, block, pressure, hrp_index);
              assert( pressure[0] == count_int_pressure  (&liveout), "" );
              assert( pressure[1] == count_float_pressure(&liveout), "" );
            }
          }
        } // End of if live or not

        // Interfere with everything live.  If the defined value must
        // go in a particular register, just remove that register from
        // all conflicting parties and avoid the interference.

        // Make exclusions for rematerializable defs.  Since rematerializable
        // DEFs are not bound but the live range is, some uses must be bound.
        // If we spill live range 'r', it can rematerialize at each use site
        // according to its bindings.
        const RegMask &rmask = lrgs(r).mask();
        if( lrgs(r).is_bound() && !(n->rematerialize()) && rmask.is_NotEmpty() ) {
          // Check for common case
          int r_size = lrgs(r).num_regs();
          OptoReg::Name r_reg = (r_size == 1) ? rmask.find_first_elem() : OptoReg::Physical;
          // Smear odd bits
          IndexSetIterator elements(&liveout);
          uint l;
          while ((l = elements.next()) != 0) {
            LRG &lrg = lrgs(l);
            // If 'l' must spill already, do not further hack his bits.
            // He'll get some interferences and be forced to spill later.
            if( lrg._must_spill ) continue;
            // Remove bound register(s) from 'l's choices
            RegMask old = lrg.mask();
            uint old_size = lrg.mask_size();
            // Remove the bits from LRG 'r' from LRG 'l' so 'l' no
            // longer interferes with 'r'.  If 'l' requires aligned
            // adjacent pairs, subtract out bit pairs.
            assert(!lrg._is_vector || !lrg._fat_proj, "sanity");
            if (lrg.num_regs() > 1 && !lrg._fat_proj) {
              RegMask r2mask = rmask;
              // Leave only aligned set of bits.
              r2mask.smear_to_sets(lrg.num_regs());
              // It includes vector case.
              lrg.SUBTRACT( r2mask );
              lrg.compute_set_mask_size();
            } else if( r_size != 1 ) { // fat proj
              lrg.SUBTRACT( rmask );
              lrg.compute_set_mask_size();
            } else {            // Common case: size 1 bound removal
              if( lrg.mask().Member(r_reg) ) {
                lrg.Remove(r_reg);
                lrg.set_mask_size(lrg.mask().is_AllStack() ? LRG::AllStack_size : old_size - 1);
              }
            }
            // If 'l' goes completely dry, it must spill.
            if( lrg.not_free() ) {
              // Give 'l' some kind of reasonable mask, so he picks up
              // interferences (and will spill later).
              lrg.set_mask( old );
              lrg.set_mask_size(old_size);
              must_spill++;
              lrg._must_spill = 1;
              lrg.set_reg(OptoReg::Name(LRG::SPILL_REG));
            }
          }
        } // End of if bound

        // Now interference with everything that is live and has
        // compatible register sets.
        interfere_with_live(r,&liveout);

      } // End of if normal register-allocated value

      // Area remaining in the block
      inst_count--;
      cost = (inst_count <= 0) ? 0.0 : block->_freq * double(inst_count);

      // Make all inputs live
      if( !n->is_Phi() ) {      // Phi function uses come from prior block
        JVMState* jvms = n->jvms();
        uint debug_start = jvms ? jvms->debug_start() : 999999;
        // Start loop at 1 (skip control edge) for most Nodes.
        // SCMemProj's might be the sole use of a StoreLConditional.
        // While StoreLConditionals set memory (the SCMemProj use)
        // they also def flags; if that flag def is unused the
        // allocator sees a flag-setting instruction with no use of
        // the flags and assumes it's dead.  This keeps the (useless)
        // flag-setting behavior alive while also keeping the (useful)
        // memory update effect.
        for (uint k = ((n->Opcode() == Op_SCMemProj) ? 0:1); k < n->req(); k++) {
          Node *def = n->in(k);
          uint x = _lrg_map.live_range_id(def);
          if (!x) {
            continue;
          }
          LRG &lrg = lrgs(x);
          // No use-side cost for spilling debug info
          if (k < debug_start) {
            // A USE costs twice block frequency (once for the Load, once
            // for a Load-delay).  Rematerialized uses only cost once.
            lrg._cost += (def->rematerialize() ? block->_freq : (block->_freq + block->_freq));
          }
          // It is live now
          if (liveout.insert(x)) {
            // Newly live things assumed live from here to top of block
            lrg._area += cost;
            // Adjust register pressure
            if (lrg.mask().is_UP() && lrg.mask_size()) {
              if (lrg._is_float || lrg._is_vector) {
                pressure[1] += lrg.reg_pressure();
                if (pressure[1] > block->_freg_pressure)  {
                  block->_freg_pressure = pressure[1];
                }
              } else if( lrg.mask().overlap(*Matcher::idealreg2regmask[Op_RegI]) ) {
                pressure[0] += lrg.reg_pressure();
                if (pressure[0] > block->_reg_pressure) {
                  block->_reg_pressure = pressure[0];
                }
              }
            }
            assert( pressure[0] == count_int_pressure  (&liveout), "" );
            assert( pressure[1] == count_float_pressure(&liveout), "" );
          }
          assert(!(lrg._area < 0.0), "negative spill area" );
        }
      }
    } // End of reverse pass over all instructions in block

    // If we run off the top of the block with high pressure and
    // never see a hi-to-low pressure transition, just record that
    // the whole block is high pressure.
    if (pressure[0] > (uint)INTPRESSURE) {
      hrp_index[0] = 0;
      if (pressure[0] > block->_reg_pressure) {
        block->_reg_pressure = pressure[0];
      }
    }
    if (pressure[1] > (uint)FLOATPRESSURE) {
      hrp_index[1] = 0;
      if (pressure[1] > block->_freg_pressure) {
        block->_freg_pressure = pressure[1];
      }
    }

    // Compute high pressure indice; avoid landing in the middle of projnodes
    j = hrp_index[0];
    if (j < block->number_of_nodes() && j < block->end_idx() + 1) {
      Node* cur = block->get_node(j);
      while (cur->is_Proj() || (cur->is_MachNullCheck()) || cur->is_Catch()) {
        j--;
        cur = block->get_node(j);
      }
    }
    block->_ihrp_index = j;
    j = hrp_index[1];
    if (j < block->number_of_nodes() && j < block->end_idx() + 1) {
      Node* cur = block->get_node(j);
      while (cur->is_Proj() || (cur->is_MachNullCheck()) || cur->is_Catch()) {
        j--;
        cur = block->get_node(j);
      }
    }
    block->_fhrp_index = j;

#ifndef PRODUCT
    // Gather Register Pressure Statistics
    if( PrintOptoStatistics ) {
      if (block->_reg_pressure > (uint)INTPRESSURE || block->_freg_pressure > (uint)FLOATPRESSURE) {
        _high_pressure++;
      } else {
        _low_pressure++;
      }
    }
#endif
  } // End of for all blocks

  return must_spill;
}
C:\hotspot-69087d08d473\src\share\vm/opto/ifnode.cpp
/*
 * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 *
 */

#include "precompiled.hpp"
#include "memory/allocation.inline.hpp"
#include "opto/addnode.hpp"
#include "opto/cfgnode.hpp"
#include "opto/connode.hpp"
#include "opto/loopnode.hpp"
#include "opto/phaseX.hpp"
#include "opto/runtime.hpp"
#include "opto/subnode.hpp"

// Portions of code courtesy of Clifford Click

// Optimization - Graph Style


extern int explicit_null_checks_elided;

//=============================================================================
//------------------------------Value------------------------------------------
// Return a tuple for whichever arm of the IF is reachable
const Type *IfNode::Value( PhaseTransform *phase ) const {
  if( !in(0) ) return Type::TOP;
  if( phase->type(in(0)) == Type::TOP )
    return Type::TOP;
  const Type *t = phase->type(in(1));
  if( t == Type::TOP )          // data is undefined
    return TypeTuple::IFNEITHER; // unreachable altogether
  if( t == TypeInt::ZERO )      // zero, or false
    return TypeTuple::IFFALSE;  // only false branch is reachable
  if( t == TypeInt::ONE )       // 1, or true
    return TypeTuple::IFTRUE;   // only true branch is reachable
  assert( t == TypeInt::BOOL, "expected boolean type" );

  return TypeTuple::IFBOTH;     // No progress
}

const RegMask &IfNode::out_RegMask() const {
  return RegMask::Empty;
}

//------------------------------split_if---------------------------------------
// Look for places where we merge constants, then test on the merged value.
// If the IF test will be constant folded on the path with the constant, we
// win by splitting the IF to before the merge point.
static Node* split_if(IfNode *iff, PhaseIterGVN *igvn) {
  // I could be a lot more general here, but I'm trying to squeeze this
  // in before the Christmas '98 break so I'm gonna be kinda restrictive
  // on the patterns I accept.  CNC

  // Look for a compare of a constant and a merged value
  Node *i1 = iff->in(1);
  if( !i1->is_Bool() ) return NULL;
  BoolNode *b = i1->as_Bool();
  Node *cmp = b->in(1);
  if( !cmp->is_Cmp() ) return NULL;
  i1 = cmp->in(1);
  if( i1 == NULL || !i1->is_Phi() ) return NULL;
  PhiNode *phi = i1->as_Phi();
  if( phi->is_copy() ) return NULL;
  Node *con2 = cmp->in(2);
  if( !con2->is_Con() ) return NULL;
  // See that the merge point contains some constants
  Node *con1=NULL;
  uint i4;
  for( i4 = 1; i4 < phi->req(); i4++ ) {
    con1 = phi->in(i4);
    if( !con1 ) return NULL;    // Do not optimize partially collapsed merges
    if( con1->is_Con() ) break; // Found a constant
    // Also allow null-vs-not-null checks
    const TypePtr *tp = igvn->type(con1)->isa_ptr();
    if( tp && tp->_ptr == TypePtr::NotNull )
      break;
  }
  if( i4 >= phi->req() ) return NULL; // Found no constants

  igvn->C->set_has_split_ifs(true); // Has chance for split-if

  // Make sure that the compare can be constant folded away
  Node *cmp2 = cmp->clone();
  cmp2->set_req(1,con1);
  cmp2->set_req(2,con2);
  const Type *t = cmp2->Value(igvn);
  // This compare is dead, so whack it!
  igvn->remove_dead_node(cmp2);
  if( !t->singleton() ) return NULL;

  // No intervening control, like a simple Call
  Node *r = iff->in(0);
  if( !r->is_Region() ) return NULL;
  if( phi->region() != r ) return NULL;
  // No other users of the cmp/bool
  if (b->outcnt() != 1 || cmp->outcnt() != 1) {
    //tty->print_cr("many users of cmp/bool");
    return NULL;
  }

  // Make sure we can determine where all the uses of merged values go
  for (DUIterator_Fast jmax, j = r->fast_outs(jmax); j < jmax; j++) {
    Node* u = r->fast_out(j);
    if( u == r ) continue;
    if( u == iff ) continue;
    if( u->outcnt() == 0 ) continue; // use is dead & ignorable
    if( !u->is_Phi() ) {
      /*
      if( u->is_Start() ) {
        tty->print_cr("Region has inlined start use");
      } else {
        tty->print_cr("Region has odd use");
        u->dump(2);
      }*/
      return NULL;
    }
    if( u != phi ) {
      // CNC - do not allow any other merged value
      //tty->print_cr("Merging another value");
      //u->dump(2);
      return NULL;
    }
    // Make sure we can account for all Phi uses
    for (DUIterator_Fast kmax, k = u->fast_outs(kmax); k < kmax; k++) {
      Node* v = u->fast_out(k); // User of the phi
      // CNC - Allow only really simple patterns.
      // In particular I disallow AddP of the Phi, a fairly common pattern
      if( v == cmp ) continue;  // The compare is OK
      if( (v->is_ConstraintCast()) &&
          v->in(0)->in(0) == iff )
        continue;               // CastPP/II of the IfNode is OK
      // Disabled following code because I cannot tell if exactly one
      // path dominates without a real dominator check. CNC 9/9/1999
      //uint vop = v->Opcode();
      //if( vop == Op_Phi ) {     // Phi from another merge point might be OK
      //  Node *r = v->in(0);     // Get controlling point
      //  if( !r ) return NULL;   // Degraded to a copy
      //  // Find exactly one path in (either True or False doms, but not IFF)
      //  int cnt = 0;
      //  for( uint i = 1; i < r->req(); i++ )
      //    if( r->in(i) && r->in(i)->in(0) == iff )
      //      cnt++;
      //  if( cnt == 1 ) continue; // Exactly one of True or False guards Phi
      //}
      if( !v->is_Call() ) {
        /*
        if( v->Opcode() == Op_AddP ) {
          tty->print_cr("Phi has AddP use");
        } else if( v->Opcode() == Op_CastPP ) {
          tty->print_cr("Phi has CastPP use");
        } else if( v->Opcode() == Op_CastII ) {
          tty->print_cr("Phi has CastII use");
        } else {
          tty->print_cr("Phi has use I cant be bothered with");
        }
        */
      }
      return NULL;

      /* CNC - Cut out all the fancy acceptance tests
      // Can we clone this use when doing the transformation?
      // If all uses are from Phis at this merge or constants, then YES.
      if( !v->in(0) && v != cmp ) {
        tty->print_cr("Phi has free-floating use");
        v->dump(2);
        return NULL;
      }
      for( uint l = 1; l < v->req(); l++ ) {
        if( (!v->in(l)->is_Phi() || v->in(l)->in(0) != r) &&
            !v->in(l)->is_Con() ) {
          tty->print_cr("Phi has use");
          v->dump(2);
          return NULL;
        } // End of if Phi-use input is neither Phi nor Constant
      } // End of for all inputs to Phi-use
      */
    } // End of for all uses of Phi
  } // End of for all uses of Region

  // Only do this if the IF node is in a sane state
  if (iff->outcnt() != 2)
    return NULL;

  // Got a hit!  Do the Mondo Hack!
  //
  //ABC  a1c   def   ghi            B     1     e     h   A C   a c   d f   g i
  // R - Phi - Phi - Phi            Rc - Phi - Phi - Phi   Rx - Phi - Phi - Phi
  //     cmp - 2                         cmp - 2               cmp - 2
  //       bool                            bool_c                bool_x
  //       if                               if_c                  if_x
  //      T  F                              T  F                  T  F
  // ..s..    ..t ..                   ..s..    ..t..        ..s..    ..t..
  //
  // Split the paths coming into the merge point into 2 separate groups of
  // merges.  On the left will be all the paths feeding constants into the
  // Cmp's Phi.  On the right will be the remaining paths.  The Cmp's Phi
  // will fold up into a constant; this will let the Cmp fold up as well as
  // all the control flow.  Below the original IF we have 2 control
  // dependent regions, 's' and 't'.  Now we will merge the two paths
  // just prior to 's' and 't' from the two IFs.  At least 1 path (and quite
  // likely 2 or more) will promptly constant fold away.
  PhaseGVN *phase = igvn;

  // Make a region merging constants and a region merging the rest
  uint req_c = 0;
  Node* predicate_proj = NULL;
  for (uint ii = 1; ii < r->req(); ii++) {
    if (phi->in(ii) == con1) {
      req_c++;
    }
    Node* proj = PhaseIdealLoop::find_predicate(r->in(ii));
    if (proj != NULL) {
      assert(predicate_proj == NULL, "only one predicate entry expected");
      predicate_proj = proj;
    }
  }

  // If all the defs of the phi are the same constant, we already have the desired end state.
  // Skip the split that would create empty phi and region nodes.
  if((r->req() - req_c) == 1) {
    return NULL;
  }

  Node* predicate_c = NULL;
  Node* predicate_x = NULL;
  bool counted_loop = r->is_CountedLoop();

  Node *region_c = new (igvn->C) RegionNode(req_c + 1);
  Node *phi_c    = con1;
  uint  len      = r->req();
  Node *region_x = new (igvn->C) RegionNode(len - req_c);
  Node *phi_x    = PhiNode::make_blank(region_x, phi);
  for (uint i = 1, i_c = 1, i_x = 1; i < len; i++) {
    if (phi->in(i) == con1) {
      region_c->init_req( i_c++, r  ->in(i) );
      if (r->in(i) == predicate_proj)
        predicate_c = predicate_proj;
    } else {
      region_x->init_req( i_x,   r  ->in(i) );
      phi_x   ->init_req( i_x++, phi->in(i) );
      if (r->in(i) == predicate_proj)
        predicate_x = predicate_proj;
    }
  }
  if (predicate_c != NULL && (req_c > 1)) {
    assert(predicate_x == NULL, "only one predicate entry expected");
    predicate_c = NULL; // Do not clone predicate below merge point
  }
  if (predicate_x != NULL && ((len - req_c) > 2)) {
    assert(predicate_c == NULL, "only one predicate entry expected");
    predicate_x = NULL; // Do not clone predicate below merge point
  }

  // Register the new RegionNodes but do not transform them.  Cannot
  // transform until the entire Region/Phi conglomerate has been hacked
  // as a single huge transform.
  igvn->register_new_node_with_optimizer( region_c );
  igvn->register_new_node_with_optimizer( region_x );
  // Prevent the untimely death of phi_x.  Currently he has no uses.  He is
  // about to get one.  If this only use goes away, then phi_x will look dead.
  // However, he will be picking up some more uses down below.
  Node *hook = new (igvn->C) Node(4);
  hook->init_req(0, phi_x);
  hook->init_req(1, phi_c);
  phi_x = phase->transform( phi_x );

  // Make the compare
  Node *cmp_c = phase->makecon(t);
  Node *cmp_x = cmp->clone();
  cmp_x->set_req(1,phi_x);
  cmp_x->set_req(2,con2);
  cmp_x = phase->transform(cmp_x);
  // Make the bool
  Node *b_c = phase->transform(new (igvn->C) BoolNode(cmp_c,b->_test._test));
  Node *b_x = phase->transform(new (igvn->C) BoolNode(cmp_x,b->_test._test));
  // Make the IfNode
  IfNode *iff_c = new (igvn->C) IfNode(region_c,b_c,iff->_prob,iff->_fcnt);
  igvn->set_type_bottom(iff_c);
  igvn->_worklist.push(iff_c);
  hook->init_req(2, iff_c);

  IfNode *iff_x = new (igvn->C) IfNode(region_x,b_x,iff->_prob, iff->_fcnt);
  igvn->set_type_bottom(iff_x);
  igvn->_worklist.push(iff_x);
  hook->init_req(3, iff_x);

  // Make the true/false arms
  Node *iff_c_t = phase->transform(new (igvn->C) IfTrueNode (iff_c));
  Node *iff_c_f = phase->transform(new (igvn->C) IfFalseNode(iff_c));
  if (predicate_c != NULL) {
    assert(predicate_x == NULL, "only one predicate entry expected");
    // Clone loop predicates to each path
    iff_c_t = igvn->clone_loop_predicates(predicate_c, iff_c_t, !counted_loop);
    iff_c_f = igvn->clone_loop_predicates(predicate_c, iff_c_f, !counted_loop);
  }
  Node *iff_x_t = phase->transform(new (igvn->C) IfTrueNode (iff_x));
  Node *iff_x_f = phase->transform(new (igvn->C) IfFalseNode(iff_x));
  if (predicate_x != NULL) {
    assert(predicate_c == NULL, "only one predicate entry expected");
    // Clone loop predicates to each path
    iff_x_t = igvn->clone_loop_predicates(predicate_x, iff_x_t, !counted_loop);
    iff_x_f = igvn->clone_loop_predicates(predicate_x, iff_x_f, !counted_loop);
  }

  // Merge the TRUE paths
  Node *region_s = new (igvn->C) RegionNode(3);
  igvn->_worklist.push(region_s);
  region_s->init_req(1, iff_c_t);
  region_s->init_req(2, iff_x_t);
  igvn->register_new_node_with_optimizer( region_s );

  // Merge the FALSE paths
  Node *region_f = new (igvn->C) RegionNode(3);
  igvn->_worklist.push(region_f);
  region_f->init_req(1, iff_c_f);
  region_f->init_req(2, iff_x_f);
  igvn->register_new_node_with_optimizer( region_f );

  igvn->hash_delete(cmp);// Remove soon-to-be-dead node from hash table.
  cmp->set_req(1,NULL);  // Whack the inputs to cmp because it will be dead
  cmp->set_req(2,NULL);
  // Check for all uses of the Phi and give them a new home.
  // The 'cmp' got cloned, but CastPP/IIs need to be moved.
  Node *phi_s = NULL;     // do not construct unless needed
  Node *phi_f = NULL;     // do not construct unless needed
  for (DUIterator_Last i2min, i2 = phi->last_outs(i2min); i2 >= i2min; --i2) {
    Node* v = phi->last_out(i2);// User of the phi
    igvn->rehash_node_delayed(v); // Have to fixup other Phi users
    uint vop = v->Opcode();
    Node *proj = NULL;
    if( vop == Op_Phi ) {       // Remote merge point
      Node *r = v->in(0);
      for (uint i3 = 1; i3 < r->req(); i3++)
        if (r->in(i3) && r->in(i3)->in(0) == iff) {
          proj = r->in(i3);
          break;
        }
    } else if( v->is_ConstraintCast() ) {
      proj = v->in(0);          // Controlling projection
    } else {
      assert( 0, "do not know how to handle this guy" );
    }

    Node *proj_path_data, *proj_path_ctrl;
    if( proj->Opcode() == Op_IfTrue ) {
      if( phi_s == NULL ) {
        // Only construct phi_s if needed, otherwise provides
        // interfering use.
        phi_s = PhiNode::make_blank(region_s,phi);
        phi_s->init_req( 1, phi_c );
        phi_s->init_req( 2, phi_x );
        hook->add_req(phi_s);
        phi_s = phase->transform(phi_s);
      }
      proj_path_data = phi_s;
      proj_path_ctrl = region_s;
    } else {
      if( phi_f == NULL ) {
        // Only construct phi_f if needed, otherwise provides
        // interfering use.
        phi_f = PhiNode::make_blank(region_f,phi);
        phi_f->init_req( 1, phi_c );
        phi_f->init_req( 2, phi_x );
        hook->add_req(phi_f);
        phi_f = phase->transform(phi_f);
      }
      proj_path_data = phi_f;
      proj_path_ctrl = region_f;
    }

    // Fixup 'v' for for the split
    if( vop == Op_Phi ) {       // Remote merge point
      uint i;
      for( i = 1; i < v->req(); i++ )
        if( v->in(i) == phi )
          break;
      v->set_req(i, proj_path_data );
    } else if( v->is_ConstraintCast() ) {
      v->set_req(0, proj_path_ctrl );
      v->set_req(1, proj_path_data );
    } else
      ShouldNotReachHere();
  }

  // Now replace the original iff's True/False with region_s/region_t.
  // This makes the original iff go dead.
  for (DUIterator_Last i3min, i3 = iff->last_outs(i3min); i3 >= i3min; --i3) {
    Node* p = iff->last_out(i3);
    assert( p->Opcode() == Op_IfTrue || p->Opcode() == Op_IfFalse, "" );
    Node *u = (p->Opcode() == Op_IfTrue) ? region_s : region_f;
    // Replace p with u
    igvn->add_users_to_worklist(p);
    for (DUIterator_Last lmin, l = p->last_outs(lmin); l >= lmin;) {
      Node* x = p->last_out(l);
      igvn->hash_delete(x);
      uint uses_found = 0;
      for( uint j = 0; j < x->req(); j++ ) {
        if( x->in(j) == p ) {
          x->set_req(j, u);
          uses_found++;
        }
      }
      l -= uses_found;    // we deleted 1 or more copies of this edge
    }
    igvn->remove_dead_node(p);
  }

  // Force the original merge dead
  igvn->hash_delete(r);
  // First, remove region's dead users.
  for (DUIterator_Last lmin, l = r->last_outs(lmin); l >= lmin;) {
    Node* u = r->last_out(l);
    if( u == r ) {
      r->set_req(0, NULL);
    } else {
      assert(u->outcnt() == 0, "only dead users");
      igvn->remove_dead_node(u);
    }
    l -= 1;
  }
  igvn->remove_dead_node(r);

  // Now remove the bogus extra edges used to keep things alive
  igvn->remove_dead_node( hook );

  // Must return either the original node (now dead) or a new node
  // (Do not return a top here, since that would break the uniqueness of top.)
  return new (igvn->C) ConINode(TypeInt::ZERO);
}

//------------------------------is_range_check---------------------------------
// Return 0 if not a range check.  Return 1 if a range check and set index and
// offset.  Return 2 if we had to negate the test.  Index is NULL if the check
// is versus a constant.
int IfNode::is_range_check(Node* &range, Node* &index, jint &offset) {
  if (outcnt() != 2) {
    return 0;
  }
  Node* b = in(1);
  if (b == NULL || !b->is_Bool())  return 0;
  BoolNode* bn = b->as_Bool();
  Node* cmp = bn->in(1);
  if (cmp == NULL)  return 0;
  if (cmp->Opcode() != Op_CmpU)  return 0;

  Node* l = cmp->in(1);
  Node* r = cmp->in(2);
  int flip_test = 1;
  if (bn->_test._test == BoolTest::le) {
    l = cmp->in(2);
    r = cmp->in(1);
    flip_test = 2;
  } else if (bn->_test._test != BoolTest::lt) {
    return 0;
  }
  if (l->is_top())  return 0;   // Top input means dead test
  if (r->Opcode() != Op_LoadRange)  return 0;

  // We have recognized one of these forms:
  //  Flip 1:  If (Bool[<] CmpU(l, LoadRange)) ...
  //  Flip 2:  If (Bool[<=] CmpU(LoadRange, l)) ...

  // Make sure it's a real range check by requiring an uncommon trap
  // along the OOB path.  Otherwise, it's possible that the user wrote
  // something which optimized to look like a range check but behaves
  // in some other way.
  Node* iftrap = proj_out(flip_test == 2 ? true : false);
  bool found_trap = false;
  if (iftrap != NULL) {
    Node* u = iftrap->unique_ctrl_out();
    if (u != NULL) {
      // It could be a merge point (Region) for uncommon trap.
      if (u->is_Region()) {
        Node* c = u->unique_ctrl_out();
        if (c != NULL) {
          iftrap = u;
          u = c;
        }
      }
      if (u->in(0) == iftrap && u->is_CallStaticJava()) {
        int req = u->as_CallStaticJava()->uncommon_trap_request();
        if (Deoptimization::trap_request_reason(req) ==
            Deoptimization::Reason_range_check) {
          found_trap = true;
        }
      }
    }
  }
  if (!found_trap)  return 0;   // sorry, no cigar

  // Look for index+offset form
  Node* ind = l;
  jint  off = 0;
  if (l->is_top()) {
    return 0;
  } else if (l->Opcode() == Op_AddI) {
    if ((off = l->in(1)->find_int_con(0)) != 0) {
      ind = l->in(2);
    } else if ((off = l->in(2)->find_int_con(0)) != 0) {
      ind = l->in(1);
    }
  } else if ((off = l->find_int_con(-1)) >= 0) {
    // constant offset with no variable index
    ind = NULL;
  } else {
    // variable index with no constant offset (or dead negative index)
    off = 0;
  }

  // Return all the values:
  index  = ind;
  offset = off;
  range  = r;
  return flip_test;
}

//------------------------------adjust_check-----------------------------------
// Adjust (widen) a prior range check
static void adjust_check(Node* proj, Node* range, Node* index,
                         int flip, jint off_lo, PhaseIterGVN* igvn) {
  PhaseGVN *gvn = igvn;
  // Break apart the old check
  Node *iff = proj->in(0);
  Node *bol = iff->in(1);
  if( bol->is_top() ) return;   // In case a partially dead range check appears
  // bail (or bomb[ASSERT/DEBUG]) if NOT projection-->IfNode-->BoolNode
  DEBUG_ONLY( if( !bol->is_Bool() ) { proj->dump(3); fatal("Expect projection-->IfNode-->BoolNode"); } )
  if( !bol->is_Bool() ) return;

  Node *cmp = bol->in(1);
  // Compute a new check
  Node *new_add = gvn->intcon(off_lo);
  if( index ) {
    new_add = off_lo ? gvn->transform(new (gvn->C) AddINode( index, new_add )) : index;
  }
  Node *new_cmp = (flip == 1)
    ? new (gvn->C) CmpUNode( new_add, range )
    : new (gvn->C) CmpUNode( range, new_add );
  new_cmp = gvn->transform(new_cmp);
  // See if no need to adjust the existing check
  if( new_cmp == cmp ) return;
  // Else, adjust existing check
  Node *new_bol = gvn->transform( new (gvn->C) BoolNode( new_cmp, bol->as_Bool()->_test._test ) );
  igvn->rehash_node_delayed( iff );
  iff->set_req_X( 1, new_bol, igvn );
}

//------------------------------up_one_dom-------------------------------------
// Walk up the dominator tree one step.  Return NULL at root or true
// complex merges.  Skips through small diamonds.
Node* IfNode::up_one_dom(Node *curr, bool linear_only) {
  Node *dom = curr->in(0);
  if( !dom )                    // Found a Region degraded to a copy?
    return curr->nonnull_req(); // Skip thru it

  if( curr != dom )             // Normal walk up one step?
    return dom;

  // Use linear_only if we are still parsing, since we cannot
  // trust the regions to be fully filled in.
  if (linear_only)
    return NULL;

  if( dom->is_Root() )
    return NULL;

  // Else hit a Region.  Check for a loop header
  if( dom->is_Loop() )
    return dom->in(1);          // Skip up thru loops

  // Check for small diamonds
  Node *din1, *din2, *din3, *din4;
  if( dom->req() == 3 &&        // 2-path merge point
      (din1 = dom ->in(1)) &&   // Left  path exists
      (din2 = dom ->in(2)) &&   // Right path exists
      (din3 = din1->in(0)) &&   // Left  path up one
      (din4 = din2->in(0)) ) {  // Right path up one
    if( din3->is_Call() &&      // Handle a slow-path call on either arm
        (din3 = din3->in(0)) )
      din3 = din3->in(0);
    if( din4->is_Call() &&      // Handle a slow-path call on either arm
        (din4 = din4->in(0)) )
      din4 = din4->in(0);
    if (din3 != NULL && din3 == din4 && din3->is_If()) // Regions not degraded to a copy
      return din3;              // Skip around diamonds
  }

  // Give up the search at true merges
  return NULL;                  // Dead loop?  Or hit root?
}


//------------------------------filtered_int_type--------------------------------
// Return a possibly more restrictive type for val based on condition control flow for an if
const TypeInt* IfNode::filtered_int_type(PhaseGVN* gvn, Node *val, Node* if_proj) {
  assert(if_proj &&
         (if_proj->Opcode() == Op_IfTrue || if_proj->Opcode() == Op_IfFalse), "expecting an if projection");
  if (if_proj->in(0) && if_proj->in(0)->is_If()) {
    IfNode* iff = if_proj->in(0)->as_If();
    if (iff->in(1) && iff->in(1)->is_Bool()) {
      BoolNode* bol = iff->in(1)->as_Bool();
      if (bol->in(1) && bol->in(1)->is_Cmp()) {
        const CmpNode* cmp  = bol->in(1)->as_Cmp();
        if (cmp->in(1) == val) {
          const TypeInt* cmp2_t = gvn->type(cmp->in(2))->isa_int();
          if (cmp2_t != NULL) {
            jint lo = cmp2_t->_lo;
            jint hi = cmp2_t->_hi;
            BoolTest::mask msk = if_proj->Opcode() == Op_IfTrue ? bol->_test._test : bol->_test.negate();
            switch (msk) {
            case BoolTest::ne:
              // Can't refine type
              return NULL;
            case BoolTest::eq:
              return cmp2_t;
            case BoolTest::lt:
              lo = TypeInt::INT->_lo;
              if (hi - 1 < hi) {
                hi = hi - 1;
              }
              break;
            case BoolTest::le:
              lo = TypeInt::INT->_lo;
              break;
            case BoolTest::gt:
              if (lo + 1 > lo) {
                lo = lo + 1;
              }
              hi = TypeInt::INT->_hi;
              break;
            case BoolTest::ge:
              // lo unchanged
              hi = TypeInt::INT->_hi;
              break;
            }
            const TypeInt* rtn_t = TypeInt::make(lo, hi, cmp2_t->_widen);
            return rtn_t;
          }
        }
      }
    }
  }
  return NULL;
}

//------------------------------fold_compares----------------------------
// See if a pair of CmpIs can be converted into a CmpU.  In some cases
// the direction of this if is determined by the preceding if so it
// can be eliminate entirely.  Given an if testing (CmpI n c) check
// for an immediately control dependent if that is testing (CmpI n c2)
// and has one projection leading to this if and the other projection
// leading to a region that merges one of this ifs control
// projections.
//
//                   If
//                  / |
//                 /  |
//                /   |
//              If    |
//              /\    |
//             /  \   |
//            /    \  |
//           /    Region
//
Node* IfNode::fold_compares(PhaseGVN* phase) {
  if (Opcode() != Op_If) return NULL;

  Node* this_cmp = in(1)->in(1);
  if (this_cmp != NULL && this_cmp->Opcode() == Op_CmpI &&
      this_cmp->in(2)->is_Con() && this_cmp->in(2) != phase->C->top()) {
    Node* ctrl = in(0);
    BoolNode* this_bool = in(1)->as_Bool();
    Node* n = this_cmp->in(1);
    int hi = this_cmp->in(2)->get_int();
    if (ctrl != NULL && ctrl->is_Proj() && ctrl->outcnt() == 1 &&
        ctrl->in(0)->is_If() &&
        ctrl->in(0)->outcnt() == 2 &&
        ctrl->in(0)->in(1)->is_Bool() &&
        ctrl->in(0)->in(1)->in(1)->Opcode() == Op_CmpI &&
        ctrl->in(0)->in(1)->in(1)->in(2)->is_Con() &&
        ctrl->in(0)->in(1)->in(1)->in(2) != phase->C->top() &&
        ctrl->in(0)->in(1)->in(1)->in(1) == n) {
      IfNode* dom_iff = ctrl->in(0)->as_If();
      Node* otherproj = dom_iff->proj_out(!ctrl->as_Proj()->_con);
      if (otherproj->outcnt() == 1 && otherproj->unique_out()->is_Region() &&
          this_bool->_test._test != BoolTest::ne && this_bool->_test._test != BoolTest::eq) {
        // Identify which proj goes to the region and which continues on
        RegionNode* region = otherproj->unique_out()->as_Region();
        Node* success = NULL;
        Node* fail = NULL;
        for (int i = 0; i < 2; i++) {
          Node* proj = proj_out(i);
          if (success == NULL && proj->outcnt() == 1 && proj->unique_out() == region) {
            success = proj;
          } else if (fail == NULL) {
            fail = proj;
          } else {
            success = fail = NULL;
          }
        }
        if (success != NULL && fail != NULL && !region->has_phi()) {
          int lo = dom_iff->in(1)->in(1)->in(2)->get_int();
          BoolNode* dom_bool = dom_iff->in(1)->as_Bool();
          Node* dom_cmp =  dom_bool->in(1);
          const TypeInt* failtype  = filtered_int_type(phase, n, ctrl);
          if (failtype != NULL) {
            const TypeInt* type2 = filtered_int_type(phase, n, fail);
            if (type2 != NULL) {
              failtype = failtype->join(type2)->is_int();
            } else {
              failtype = NULL;
            }
          }

          if (failtype != NULL &&
              dom_bool->_test._test != BoolTest::ne && dom_bool->_test._test != BoolTest::eq) {
            int bound = failtype->_hi - failtype->_lo + 1;
            if (failtype->_hi != max_jint && failtype->_lo != min_jint && bound > 1) {
              // Merge the two compares into a single unsigned compare by building  (CmpU (n - lo) hi)
              BoolTest::mask cond = fail->as_Proj()->_con ? BoolTest::lt : BoolTest::ge;
              Node* adjusted = phase->transform(new (phase->C) SubINode(n, phase->intcon(failtype->_lo)));
              Node* newcmp = phase->transform(new (phase->C) CmpUNode(adjusted, phase->intcon(bound)));
              Node* newbool = phase->transform(new (phase->C) BoolNode(newcmp, cond));
              phase->is_IterGVN()->replace_input_of(dom_iff, 1, phase->intcon(ctrl->as_Proj()->_con));
              phase->hash_delete(this);
              set_req(1, newbool);
              return this;
            }
            if (failtype->_lo > failtype->_hi) {
              // previous if determines the result of this if so
              // replace Bool with constant
              phase->hash_delete(this);
              set_req(1, phase->intcon(success->as_Proj()->_con));
              return this;
            }
          }
        }
      }
    }
  }
  return NULL;
}

//------------------------------remove_useless_bool----------------------------
// Check for people making a useless boolean: things like
// if( (x < y ? true : false) ) { ... }
// Replace with if( x < y ) { ... }
static Node *remove_useless_bool(IfNode *iff, PhaseGVN *phase) {
  Node *i1 = iff->in(1);
  if( !i1->is_Bool() ) return NULL;
  BoolNode *bol = i1->as_Bool();

  Node *cmp = bol->in(1);
  if( cmp->Opcode() != Op_CmpI ) return NULL;

  // Must be comparing against a bool
  const Type *cmp2_t = phase->type( cmp->in(2) );
  if( cmp2_t != TypeInt::ZERO &&
      cmp2_t != TypeInt::ONE )
    return NULL;

  // Find a prior merge point merging the boolean
  i1 = cmp->in(1);
  if( !i1->is_Phi() ) return NULL;
  PhiNode *phi = i1->as_Phi();
  if( phase->type( phi ) != TypeInt::BOOL )
    return NULL;

  // Check for diamond pattern
  int true_path = phi->is_diamond_phi();
  if( true_path == 0 ) return NULL;

  // Make sure that iff and the control of the phi are different. This
  // should really only happen for dead control flow since it requires
  // an illegal cycle.
  if (phi->in(0)->in(1)->in(0) == iff) return NULL;

  // phi->region->if_proj->ifnode->bool->cmp
  BoolNode *bol2 = phi->in(0)->in(1)->in(0)->in(1)->as_Bool();

  // Now get the 'sense' of the test correct so we can plug in
  // either iff2->in(1) or its complement.
  int flip = 0;
  if( bol->_test._test == BoolTest::ne ) flip = 1-flip;
  else if( bol->_test._test != BoolTest::eq ) return NULL;
  if( cmp2_t == TypeInt::ZERO ) flip = 1-flip;

  const Type *phi1_t = phase->type( phi->in(1) );
  const Type *phi2_t = phase->type( phi->in(2) );
  // Check for Phi(0,1) and flip
  if( phi1_t == TypeInt::ZERO ) {
    if( phi2_t != TypeInt::ONE ) return NULL;
    flip = 1-flip;
  } else {
    // Check for Phi(1,0)
    if( phi1_t != TypeInt::ONE  ) return NULL;
    if( phi2_t != TypeInt::ZERO ) return NULL;
  }
  if( true_path == 2 ) {
    flip = 1-flip;
  }

  Node* new_bol = (flip ? phase->transform( bol2->negate(phase) ) : bol2);
  assert(new_bol != iff->in(1), "must make progress");
  iff->set_req(1, new_bol);
  // Intervening diamond probably goes dead
  phase->C->set_major_progress();
  return iff;
}

static IfNode* idealize_test(PhaseGVN* phase, IfNode* iff);

struct RangeCheck {
  Node* ctl;
  jint off;
};

//------------------------------Ideal------------------------------------------
// Return a node which is more "ideal" than the current node.  Strip out
// control copies
Node *IfNode::Ideal(PhaseGVN *phase, bool can_reshape) {
  if (remove_dead_region(phase, can_reshape))  return this;
  // No Def-Use info?
  if (!can_reshape)  return NULL;
  PhaseIterGVN *igvn = phase->is_IterGVN();

  // Don't bother trying to transform a dead if
  if (in(0)->is_top())  return NULL;
  // Don't bother trying to transform an if with a dead test
  if (in(1)->is_top())  return NULL;
  // Another variation of a dead test
  if (in(1)->is_Con())  return NULL;
  // Another variation of a dead if
  if (outcnt() < 2)  return NULL;

  // Canonicalize the test.
  Node* idt_if = idealize_test(phase, this);
  if (idt_if != NULL)  return idt_if;

  // Try to split the IF
  Node *s = split_if(this, igvn);
  if (s != NULL)  return s;

  // Check for people making a useless boolean: things like
  // if( (x < y ? true : false) ) { ... }
  // Replace with if( x < y ) { ... }
  Node *bol2 = remove_useless_bool(this, phase);
  if( bol2 ) return bol2;

  // Setup to scan up the CFG looking for a dominating test
  Node *dom = in(0);
  Node *prev_dom = this;

  // Check for range-check vs other kinds of tests
  Node *index1, *range1;
  jint offset1;
  int flip1 = is_range_check(range1, index1, offset1);
  if( flip1 ) {
    // Try to remove extra range checks.  All 'up_one_dom' gives up at merges
    // so all checks we inspect post-dominate the top-most check we find.
    // If we are going to fail the current check and we reach the top check
    // then we are guaranteed to fail, so just start interpreting there.
    // We 'expand' the top 3 range checks to include all post-dominating
    // checks.

    // The top 3 range checks seen
    const int NRC =3;
    RangeCheck prev_checks[NRC];
    int nb_checks = 0;

    // Low and high offsets seen so far
    jint off_lo = offset1;
    jint off_hi = offset1;

    bool found_immediate_dominator = false;

    // Scan for the top checks and collect range of offsets
    for (int dist = 0; dist < 999; dist++) { // Range-Check scan limit
      if (dom->Opcode() == Op_If &&  // Not same opcode?
          prev_dom->in(0) == dom) { // One path of test does dominate?
        if (dom == this) return NULL; // dead loop
        // See if this is a range check
        Node *index2, *range2;
        jint offset2;
        int flip2 = dom->as_If()->is_range_check(range2, index2, offset2);
        // See if this is a _matching_ range check, checking against
        // the same array bounds.
        if (flip2 == flip1 && range2 == range1 && index2 == index1 &&
            dom->outcnt() == 2) {
          if (nb_checks == 0 && dom->in(1) == in(1)) {
            // Found an immediately dominating test at the same offset.
            // This kind of back-to-back test can be eliminated locally,
            // and there is no need to search further for dominating tests.
            assert(offset2 == offset1, "Same test but different offsets");
            found_immediate_dominator = true;
            break;
          }
          // Gather expanded bounds
          off_lo = MIN2(off_lo,offset2);
          off_hi = MAX2(off_hi,offset2);
          // Record top NRC range checks
          prev_checks[nb_checks%NRC].ctl = prev_dom;
          prev_checks[nb_checks%NRC].off = offset2;
          nb_checks++;
        }
      }
      prev_dom = dom;
      dom = up_one_dom(dom);
      if (!dom) break;
    }

    if (!found_immediate_dominator) {
      // Attempt to widen the dominating range check to cover some later
      // ones.  Since range checks "fail" by uncommon-trapping to the
      // interpreter, widening a check can make us speculatively enter
      // the interpreter.  If we see range-check deopt's, do not widen!
      if (!phase->C->allow_range_check_smearing())  return NULL;

      // Didn't find prior covering check, so cannot remove anything.
      if (nb_checks == 0) {
        return NULL;
      }
      // Constant indices only need to check the upper bound.
      // Non-constant indices must check both low and high.
      int chk0 = (nb_checks - 1) % NRC;
      if (index1) {
        if (nb_checks == 1) {
          return NULL;
        } else {
          // If the top range check's constant is the min or max of
          // all constants we widen the next one to cover the whole
          // range of constants.
          RangeCheck rc0 = prev_checks[chk0];
          int chk1 = (nb_checks - 2) % NRC;
          RangeCheck rc1 = prev_checks[chk1];
          if (rc0.off == off_lo) {
            adjust_check(rc1.ctl, range1, index1, flip1, off_hi, igvn);
            prev_dom = rc1.ctl;
          } else if (rc0.off == off_hi) {
            adjust_check(rc1.ctl, range1, index1, flip1, off_lo, igvn);
            prev_dom = rc1.ctl;
          } else {
            // If the top test's constant is not the min or max of all
            // constants, we need 3 range checks. We must leave the
            // top test unchanged because widening it would allow the
            // accesses it protects to successfully read/write out of
            // bounds.
            if (nb_checks == 2) {
              return NULL;
            }
            int chk2 = (nb_checks - 3) % NRC;
            RangeCheck rc2 = prev_checks[chk2];
            // The top range check a+i covers interval: -a <= i < length-a
            // The second range check b+i covers interval: -b <= i < length-b
            if (rc1.off <= rc0.off) {
              // if b <= a, we change the second range check to:
              // -min_of_all_constants <= i < length-min_of_all_constants
              // Together top and second range checks now cover:
              // -min_of_all_constants <= i < length-a
              // which is more restrictive than -b <= i < length-b:
              // -b <= -min_of_all_constants <= i < length-a <= length-b
              // The third check is then changed to:
              // -max_of_all_constants <= i < length-max_of_all_constants
              // so 2nd and 3rd checks restrict allowed values of i to:
              // -min_of_all_constants <= i < length-max_of_all_constants
              adjust_check(rc1.ctl, range1, index1, flip1, off_lo, igvn);
              adjust_check(rc2.ctl, range1, index1, flip1, off_hi, igvn);
            } else {
              // if b > a, we change the second range check to:
              // -max_of_all_constants <= i < length-max_of_all_constants
              // Together top and second range checks now cover:
              // -a <= i < length-max_of_all_constants
              // which is more restrictive than -b <= i < length-b:
              // -b < -a <= i < length-max_of_all_constants <= length-b
              // The third check is then changed to:
              // -max_of_all_constants <= i < length-max_of_all_constants
              // so 2nd and 3rd checks restrict allowed values of i to:
              // -min_of_all_constants <= i < length-max_of_all_constants
              adjust_check(rc1.ctl, range1, index1, flip1, off_hi, igvn);
              adjust_check(rc2.ctl, range1, index1, flip1, off_lo, igvn);
            }
            prev_dom = rc2.ctl;
          }
        }
      } else {
        RangeCheck rc0 = prev_checks[chk0];
        // 'Widen' the offset of the 1st and only covering check
        adjust_check(rc0.ctl, range1, index1, flip1, off_hi, igvn);
        // Test is now covered by prior checks, dominate it out
        prev_dom = rc0.ctl;
      }
    }

  } else {                      // Scan for an equivalent test

    Node *cmp;
    int dist = 0;               // Cutoff limit for search
    int op = Opcode();
    if( op == Op_If &&
        (cmp=in(1)->in(1))->Opcode() == Op_CmpP ) {
      if( cmp->in(2) != NULL && // make sure cmp is not already dead
          cmp->in(2)->bottom_type() == TypePtr::NULL_PTR ) {
        dist = 64;              // Limit for null-pointer scans
      } else {
        dist = 4;               // Do not bother for random pointer tests
      }
    } else {
      dist = 4;                 // Limit for random junky scans
    }

    // Normal equivalent-test check.
    if( !dom ) return NULL;     // Dead loop?

    Node* result = fold_compares(phase);
    if (result != NULL) {
      return result;
    }

    // Search up the dominator tree for an If with an identical test
    while( dom->Opcode() != op    ||  // Not same opcode?
           dom->in(1)    != in(1) ||  // Not same input 1?
           (req() == 3 && dom->in(2) != in(2)) || // Not same input 2?
           prev_dom->in(0) != dom ) { // One path of test does not dominate?
      if( dist < 0 ) return NULL;

      dist--;
      prev_dom = dom;
      dom = up_one_dom( dom );
      if( !dom ) return NULL;
    }

    // Check that we did not follow a loop back to ourselves
    if( this == dom )
      return NULL;

    if( dist > 2 )              // Add to count of NULL checks elided
      explicit_null_checks_elided++;

  } // End of Else scan for an equivalent test

  // Hit!  Remove this IF
#ifndef PRODUCT
  if( TraceIterativeGVN ) {
    tty->print("   Removing IfNode: "); this->dump();
  }
  if( VerifyOpto && !phase->allow_progress() ) {
    // Found an equivalent dominating test,
    // we can not guarantee reaching a fix-point for these during iterativeGVN
    // since intervening nodes may not change.
    return NULL;
  }
#endif

  // Replace dominated IfNode
  dominated_by( prev_dom, igvn );

  // Must return either the original node (now dead) or a new node
  // (Do not return a top here, since that would break the uniqueness of top.)
  return new (phase->C) ConINode(TypeInt::ZERO);
}

//------------------------------dominated_by-----------------------------------
void IfNode::dominated_by( Node *prev_dom, PhaseIterGVN *igvn ) {
  igvn->hash_delete(this);      // Remove self to prevent spurious V-N
  Node *idom = in(0);
  // Need opcode to decide which way 'this' test goes
  int prev_op = prev_dom->Opcode();
  Node *top = igvn->C->top(); // Shortcut to top

  // Loop predicates may have depending checks which should not
  // be skipped. For example, range check predicate has two checks
  // for lower and upper bounds.
  ProjNode* unc_proj = proj_out(1 - prev_dom->as_Proj()->_con)->as_Proj();
  if ((unc_proj != NULL) && (unc_proj->is_uncommon_trap_proj(Deoptimization::Reason_predicate))) {
    prev_dom = idom;
  }

  // Now walk the current IfNode's projections.
  // Loop ends when 'this' has no more uses.
  for (DUIterator_Last imin, i = last_outs(imin); i >= imin; --i) {
    Node *ifp = last_out(i);     // Get IfTrue/IfFalse
    igvn->add_users_to_worklist(ifp);
    // Check which projection it is and set target.
    // Data-target is either the dominating projection of the same type
    // or TOP if the dominating projection is of opposite type.
    // Data-target will be used as the new control edge for the non-CFG
    // nodes like Casts and Loads.
    Node *data_target = (ifp->Opcode() == prev_op) ? prev_dom : top;
    // Control-target is just the If's immediate dominator or TOP.
    Node *ctrl_target = (ifp->Opcode() == prev_op) ?     idom : top;

    // For each child of an IfTrue/IfFalse projection, reroute.
    // Loop ends when projection has no more uses.
    for (DUIterator_Last jmin, j = ifp->last_outs(jmin); j >= jmin; --j) {
      Node* s = ifp->last_out(j);   // Get child of IfTrue/IfFalse
      if( !s->depends_only_on_test() ) {
        // Find the control input matching this def-use edge.
        // For Regions it may not be in slot 0.
        uint l;
        for( l = 0; s->in(l) != ifp; l++ ) { }
        igvn->replace_input_of(s, l, ctrl_target);
      } else {                      // Else, for control producers,
        igvn->replace_input_of(s, 0, data_target); // Move child to data-target
      }
    } // End for each child of a projection

    igvn->remove_dead_node(ifp);
  } // End for each IfTrue/IfFalse child of If

  // Kill the IfNode
  igvn->remove_dead_node(this);
}

//------------------------------Identity---------------------------------------
// If the test is constant & we match, then we are the input Control
Node *IfTrueNode::Identity( PhaseTransform *phase ) {
  // Can only optimize if cannot go the other way
  const TypeTuple *t = phase->type(in(0))->is_tuple();
  return ( t == TypeTuple::IFNEITHER || t == TypeTuple::IFTRUE )
    ? in(0)->in(0)              // IfNode control
    : this;                     // no progress
}

//------------------------------dump_spec--------------------------------------
#ifndef PRODUCT
void IfNode::dump_spec(outputStream *st) const {
  st->print("P=%f, C=%f",_prob,_fcnt);
}
#endif

//------------------------------idealize_test----------------------------------
// Try to canonicalize tests better.  Peek at the Cmp/Bool/If sequence and
// come up with a canonical sequence.  Bools getting 'eq', 'gt' and 'ge' forms
// converted to 'ne', 'le' and 'lt' forms.  IfTrue/IfFalse get swapped as
// needed.
static IfNode* idealize_test(PhaseGVN* phase, IfNode* iff) {
  assert(iff->in(0) != NULL, "If must be live");

  if (iff->outcnt() != 2)  return NULL; // Malformed projections.
  Node* old_if_f = iff->proj_out(false);
  Node* old_if_t = iff->proj_out(true);

  // CountedLoopEnds want the back-control test to be TRUE, irregardless of
  // whether they are testing a 'gt' or 'lt' condition.  The 'gt' condition
  // happens in count-down loops
  if (iff->is_CountedLoopEnd())  return NULL;
  if (!iff->in(1)->is_Bool())  return NULL; // Happens for partially optimized IF tests
  BoolNode *b = iff->in(1)->as_Bool();
  BoolTest bt = b->_test;
  // Test already in good order?
  if( bt.is_canonical() )
    return NULL;

  // Flip test to be canonical.  Requires flipping the IfFalse/IfTrue and
  // cloning the IfNode.
  Node* new_b = phase->transform( new (phase->C) BoolNode(b->in(1), bt.negate()) );
  if( !new_b->is_Bool() ) return NULL;
  b = new_b->as_Bool();

  PhaseIterGVN *igvn = phase->is_IterGVN();
  assert( igvn, "Test is not canonical in parser?" );

  // The IF node never really changes, but it needs to be cloned
  iff = new (phase->C) IfNode( iff->in(0), b, 1.0-iff->_prob, iff->_fcnt);

  Node *prior = igvn->hash_find_insert(iff);
  if( prior ) {
    igvn->remove_dead_node(iff);
    iff = (IfNode*)prior;
  } else {
    // Cannot call transform on it just yet
    igvn->set_type_bottom(iff);
  }
  igvn->_worklist.push(iff);

  // Now handle projections.  Cloning not required.
  Node* new_if_f = (Node*)(new (phase->C) IfFalseNode( iff ));
  Node* new_if_t = (Node*)(new (phase->C) IfTrueNode ( iff ));

  igvn->register_new_node_with_optimizer(new_if_f);
  igvn->register_new_node_with_optimizer(new_if_t);
  // Flip test, so flip trailing control
  igvn->replace_node(old_if_f, new_if_t);
  igvn->replace_node(old_if_t, new_if_f);

  // Progress
  return iff;
}

//------------------------------Identity---------------------------------------
// If the test is constant & we match, then we are the input Control
Node *IfFalseNode::Identity( PhaseTransform *phase ) {
  // Can only optimize if cannot go the other way
  const TypeTuple *t = phase->type(in(0))->is_tuple();
  return ( t == TypeTuple::IFNEITHER || t == TypeTuple::IFFALSE )
    ? in(0)->in(0)              // IfNode control
    : this;                     // no progress
}
C:\hotspot-69087d08d473\src\share\vm/opto/indexSet.cpp
/*
 * Copyright (c) 1998, 2011, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 *
 */

#include "precompiled.hpp"
#include "memory/allocation.inline.hpp"
#include "opto/chaitin.hpp"
#include "opto/compile.hpp"
#include "opto/indexSet.hpp"
#include "opto/regmask.hpp"

// This file defines the IndexSet class, a set of sparse integer indices.
// This data structure is used by the compiler in its liveness analysis and
// during register allocation.  It also defines an iterator for this class.

//-------------------------------- Initializations ------------------------------

IndexSet::BitBlock  IndexSet::_empty_block     = IndexSet::BitBlock();

#ifdef ASSERT
// Initialize statistics counters
julong IndexSet::_alloc_new = 0;
julong IndexSet::_alloc_total = 0;

julong IndexSet::_total_bits = 0;
julong IndexSet::_total_used_blocks = 0;
julong IndexSet::_total_unused_blocks = 0;

// Per set, or all sets operation tracing
int IndexSet::_serial_count = 1;
#endif

// What is the first set bit in a 5 bit integer?
const byte IndexSetIterator::_first_bit[32] = {
  0, 0, 1, 0,
  2, 0, 1, 0,
  3, 0, 1, 0,
  2, 0, 1, 0,
  4, 0, 1, 0,
  2, 0, 1, 0,
  3, 0, 1, 0,
  2, 0, 1, 0
};

// What is the second set bit in a 5 bit integer?
const byte IndexSetIterator::_second_bit[32] = {
  5, 5, 5, 1,
  5, 2, 2, 1,
  5, 3, 3, 1,
  3, 2, 2, 1,
  5, 4, 4, 1,
  4, 2, 2, 1,
  4, 3, 3, 1,
  3, 2, 2, 1
};

// I tried implementing the IndexSetIterator with a window_size of 8 and
// didn't seem to get a noticeable speedup.  I am leaving in the tables
// in case we want to switch back.

/*const byte IndexSetIterator::_first_bit[256] = {
  8, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
  4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
  5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
  4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
  6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
  4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
  5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
  4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
  7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
  4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
  5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
  4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
  6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
  4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
  5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
  4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
};

const byte IndexSetIterator::_second_bit[256] = {
  8, 8, 8, 1, 8, 2, 2, 1, 8, 3, 3, 1, 3, 2, 2, 1,
  8, 4, 4, 1, 4, 2, 2, 1, 4, 3, 3, 1, 3, 2, 2, 1,
  8, 5, 5, 1, 5, 2, 2, 1, 5, 3, 3, 1, 3, 2, 2, 1,
  5, 4, 4, 1, 4, 2, 2, 1, 4, 3, 3, 1, 3, 2, 2, 1,
  8, 6, 6, 1, 6, 2, 2, 1, 6, 3, 3, 1, 3, 2, 2, 1,
  6, 4, 4, 1, 4, 2, 2, 1, 4, 3, 3, 1, 3, 2, 2, 1,
  6, 5, 5, 1, 5, 2, 2, 1, 5, 3, 3, 1, 3, 2, 2, 1,
  5, 4, 4, 1, 4, 2, 2, 1, 4, 3, 3, 1, 3, 2, 2, 1,
  8, 7, 7, 1, 7, 2, 2, 1, 7, 3, 3, 1, 3, 2, 2, 1,
  7, 4, 4, 1, 4, 2, 2, 1, 4, 3, 3, 1, 3, 2, 2, 1,
  7, 5, 5, 1, 5, 2, 2, 1, 5, 3, 3, 1, 3, 2, 2, 1,
  5, 4, 4, 1, 4, 2, 2, 1, 4, 3, 3, 1, 3, 2, 2, 1,
  7, 6, 6, 1, 6, 2, 2, 1, 6, 3, 3, 1, 3, 2, 2, 1,
  6, 4, 4, 1, 4, 2, 2, 1, 4, 3, 3, 1, 3, 2, 2, 1,
  6, 5, 5, 1, 5, 2, 2, 1, 5, 3, 3, 1, 3, 2, 2, 1,
  5, 4, 4, 1, 4, 2, 2, 1, 4, 3, 3, 1, 3, 2, 2, 1
};*/

//---------------------------- IndexSet::populate_free_list() -----------------------------
// Populate the free BitBlock list with a batch of BitBlocks.  The BitBlocks
// are 32 bit aligned.

void IndexSet::populate_free_list() {
  Compile *compile = Compile::current();
  BitBlock *free = (BitBlock*)compile->indexSet_free_block_list();

  char *mem = (char*)arena()->Amalloc_4(sizeof(BitBlock) *
                                        bitblock_alloc_chunk_size + 32);

  // Align the pointer to a 32 bit boundary.
  BitBlock *new_blocks = (BitBlock*)(((uintptr_t)mem + 32) & ~0x001F);

  // Add the new blocks to the free list.
  for (int i = 0; i < bitblock_alloc_chunk_size; i++) {
    new_blocks->set_next(free);
    free = new_blocks;
    new_blocks++;
  }

  compile->set_indexSet_free_block_list(free);

#ifdef ASSERT
  if (CollectIndexSetStatistics) {
    inc_stat_counter(&_alloc_new, bitblock_alloc_chunk_size);
  }
#endif
}


//---------------------------- IndexSet::alloc_block() ------------------------
// Allocate a BitBlock from the free list.  If the free list is empty,
// prime it.

IndexSet::BitBlock *IndexSet::alloc_block() {
#ifdef ASSERT
  if (CollectIndexSetStatistics) {
    inc_stat_counter(&_alloc_total, 1);
  }
#endif
  Compile *compile = Compile::current();
  BitBlock* free_list = (BitBlock*)compile->indexSet_free_block_list();
  if (free_list == NULL) {
    populate_free_list();
    free_list = (BitBlock*)compile->indexSet_free_block_list();
  }
  BitBlock *block = free_list;
  compile->set_indexSet_free_block_list(block->next());

  block->clear();
  return block;
}

//---------------------------- IndexSet::alloc_block_containing() -------------
// Allocate a new BitBlock and put it into the position in the _blocks array
// corresponding to element.

IndexSet::BitBlock *IndexSet::alloc_block_containing(uint element) {
  BitBlock *block = alloc_block();
  uint bi = get_block_index(element);
  _blocks[bi] = block;
  return block;
}

//---------------------------- IndexSet::free_block() -------------------------
// Add a BitBlock to the free list.

void IndexSet::free_block(uint i) {
  debug_only(check_watch("free block", i));
  assert(i < _max_blocks, "block index too large");
  BitBlock *block = _blocks[i];
  assert(block != &_empty_block, "cannot free the empty block");
  block->set_next((IndexSet::BitBlock*)Compile::current()->indexSet_free_block_list());
  Compile::current()->set_indexSet_free_block_list(block);
  set_block(i,&_empty_block);
}

//------------------------------lrg_union--------------------------------------
// Compute the union of all elements of one and two which interfere with
// the RegMask mask.  If the degree of the union becomes exceeds
// fail_degree, the union bails out.  The underlying set is cleared before
// the union is performed.

uint IndexSet::lrg_union(uint lr1, uint lr2,
                         const uint fail_degree,
                         const PhaseIFG *ifg,
                         const RegMask &mask ) {
  IndexSet *one = ifg->neighbors(lr1);
  IndexSet *two = ifg->neighbors(lr2);
  LRG &lrg1 = ifg->lrgs(lr1);
  LRG &lrg2 = ifg->lrgs(lr2);
#ifdef ASSERT
  assert(_max_elements == one->_max_elements, "max element mismatch");
  check_watch("union destination");
  one->check_watch("union source");
  two->check_watch("union source");
#endif

  // Compute the degree of the combined live-range.  The combined
  // live-range has the union of the original live-ranges' neighbors set as
  // well as the neighbors of all intermediate copies, minus those neighbors
  // that can not use the intersected allowed-register-set.

  // Copy the larger set.  Insert the smaller set into the larger.
  if (two->count() > one->count()) {
    IndexSet *temp = one;
    one = two;
    two = temp;
  }

  clear();

  // Used to compute degree of register-only interferences.  Infinite-stack
  // neighbors do not alter colorability, as they can always color to some
  // other color.  (A variant of the Briggs assertion)
  uint reg_degree = 0;

  uint element;
  // Load up the combined interference set with the neighbors of one
  IndexSetIterator elements(one);
  while ((element = elements.next()) != 0) {
    LRG &lrg = ifg->lrgs(element);
    if (mask.overlap(lrg.mask())) {
      insert(element);
      if( !lrg.mask().is_AllStack() ) {
        reg_degree += lrg1.compute_degree(lrg);
        if( reg_degree >= fail_degree ) return reg_degree;
      } else {
        // !!!!! Danger!  No update to reg_degree despite having a neighbor.
        // A variant of the Briggs assertion.
        // Not needed if I simplify during coalesce, ala George/Appel.
        assert( lrg.lo_degree(), "" );
      }
    }
  }
  // Add neighbors of two as well
  IndexSetIterator elements2(two);
  while ((element = elements2.next()) != 0) {
    LRG &lrg = ifg->lrgs(element);
    if (mask.overlap(lrg.mask())) {
      if (insert(element)) {
        if( !lrg.mask().is_AllStack() ) {
          reg_degree += lrg2.compute_degree(lrg);
          if( reg_degree >= fail_degree ) return reg_degree;
        } else {
          // !!!!! Danger!  No update to reg_degree despite having a neighbor.
          // A variant of the Briggs assertion.
          // Not needed if I simplify during coalesce, ala George/Appel.
          assert( lrg.lo_degree(), "" );
        }
      }
    }
  }

  return reg_degree;
}

//---------------------------- IndexSet() -----------------------------
// A deep copy constructor.  This is used when you need a scratch copy of this set.

IndexSet::IndexSet (IndexSet *set) {
#ifdef ASSERT
  _serial_number = _serial_count++;
  set->check_watch("copied", _serial_number);
  check_watch("initialized by copy", set->_serial_number);
  _max_elements = set->_max_elements;
#endif
  _count = set->_count;
  _max_blocks = set->_max_blocks;
  if (_max_blocks <= preallocated_block_list_size) {
    _blocks = _preallocated_block_list;
  } else {
    _blocks =
      (IndexSet::BitBlock**) arena()->Amalloc_4(sizeof(IndexSet::BitBlock**) * _max_blocks);
  }
  for (uint i = 0; i < _max_blocks; i++) {
    BitBlock *block = set->_blocks[i];
    if (block == &_empty_block) {
      set_block(i, &_empty_block);
    } else {
      BitBlock *new_block = alloc_block();
      memcpy(new_block->words(), block->words(), sizeof(uint32) * words_per_block);
      set_block(i, new_block);
    }
  }
}

//---------------------------- IndexSet::initialize() -----------------------------
// Prepare an IndexSet for use.

void IndexSet::initialize(uint max_elements) {
#ifdef ASSERT
  _serial_number = _serial_count++;
  check_watch("initialized", max_elements);
  _max_elements = max_elements;
#endif
  _count = 0;
  _max_blocks = (max_elements + bits_per_block - 1) / bits_per_block;

  if (_max_blocks <= preallocated_block_list_size) {
    _blocks = _preallocated_block_list;
  } else {
    _blocks = (IndexSet::BitBlock**) arena()->Amalloc_4(sizeof(IndexSet::BitBlock**) * _max_blocks);
  }
  for (uint i = 0; i < _max_blocks; i++) {
    set_block(i, &_empty_block);
  }
}

//---------------------------- IndexSet::initialize()------------------------------
// Prepare an IndexSet for use.  If it needs to allocate its _blocks array, it does
// so from the Arena passed as a parameter.  BitBlock allocation is still done from
// the static Arena which was set with reset_memory().

void IndexSet::initialize(uint max_elements, Arena *arena) {
#ifdef ASSERT
  _serial_number = _serial_count++;
  check_watch("initialized2", max_elements);
  _max_elements = max_elements;
#endif // ASSERT
  _count = 0;
  _max_blocks = (max_elements + bits_per_block - 1) / bits_per_block;

  if (_max_blocks <= preallocated_block_list_size) {
    _blocks = _preallocated_block_list;
  } else {
    _blocks = (IndexSet::BitBlock**) arena->Amalloc_4(sizeof(IndexSet::BitBlock**) * _max_blocks);
  }
  for (uint i = 0; i < _max_blocks; i++) {
    set_block(i, &_empty_block);
  }
}

//---------------------------- IndexSet::swap() -----------------------------
// Exchange two IndexSets.

void IndexSet::swap(IndexSet *set) {
#ifdef ASSERT
  assert(_max_elements == set->_max_elements, "must have same universe size to swap");
  check_watch("swap", set->_serial_number);
  set->check_watch("swap", _serial_number);
#endif

  for (uint i = 0; i < _max_blocks; i++) {
    BitBlock *temp = _blocks[i];
    set_block(i, set->_blocks[i]);
    set->set_block(i, temp);
  }
  uint temp = _count;
  _count = set->_count;
  set->_count = temp;
}

//---------------------------- IndexSet::dump() -----------------------------
// Print this set.  Used for debugging.

#ifndef PRODUCT
void IndexSet::dump() const {
  IndexSetIterator elements(this);

  tty->print("{");
  uint i;
  while ((i = elements.next()) != 0) {
    tty->print("L%d ", i);
  }
  tty->print_cr("}");
}
#endif

#ifdef ASSERT
//---------------------------- IndexSet::tally_iteration_statistics() -----------------------------
// Update block/bit counts to reflect that this set has been iterated over.

void IndexSet::tally_iteration_statistics() const {
  inc_stat_counter(&_total_bits, count());

  for (uint i = 0; i < _max_blocks; i++) {
    if (_blocks[i] != &_empty_block) {
      inc_stat_counter(&_total_used_blocks, 1);
    } else {
      inc_stat_counter(&_total_unused_blocks, 1);
    }
  }
}

//---------------------------- IndexSet::print_statistics() -----------------------------
// Print statistics about IndexSet usage.

void IndexSet::print_statistics() {
  julong total_blocks = _total_used_blocks + _total_unused_blocks;
  tty->print_cr ("Accumulated IndexSet usage statistics:");
  tty->print_cr ("--------------------------------------");
  tty->print_cr ("  Iteration:");
  tty->print_cr ("    blocks visited: " UINT64_FORMAT, total_blocks);
  tty->print_cr ("    blocks empty: %4.2f%%", 100.0*(double)_total_unused_blocks/total_blocks);
  tty->print_cr ("    bit density (bits/used blocks): %4.2f", (double)_total_bits/_total_used_blocks);
  tty->print_cr ("    bit density (bits/all blocks): %4.2f", (double)_total_bits/total_blocks);
  tty->print_cr ("  Allocation:");
  tty->print_cr ("    blocks allocated: " UINT64_FORMAT, _alloc_new);
  tty->print_cr ("    blocks used/reused: " UINT64_FORMAT, _alloc_total);
}

//---------------------------- IndexSet::verify() -----------------------------
// Expensive test of IndexSet sanity.  Ensure that the count agrees with the
// number of bits in the blocks.  Make sure the iterator is seeing all elements
// of the set.  Meant for use during development.

void IndexSet::verify() const {
  assert(!member(0), "zero cannot be a member");
  uint count = 0;
  uint i;
  for (i = 1; i < _max_elements; i++) {
    if (member(i)) {
      count++;
      assert(count <= _count, "_count is messed up");
    }
  }

  IndexSetIterator elements(this);
  count = 0;
  while ((i = elements.next()) != 0) {
    count++;
    assert(member(i), "returned a non member");
    assert(count <= _count, "iterator returned wrong number of elements");
  }
}
#endif

//---------------------------- IndexSetIterator() -----------------------------
// Create an iterator for a set.  If empty blocks are detected when iterating
// over the set, these blocks are replaced.

IndexSetIterator::IndexSetIterator(IndexSet *set) {
#ifdef ASSERT
  if (CollectIndexSetStatistics) {
    set->tally_iteration_statistics();
  }
  set->check_watch("traversed", set->count());
#endif
  if (set->is_empty()) {
    _current = 0;
    _next_word = IndexSet::words_per_block;
    _next_block = 1;
    _max_blocks = 1;

    // We don't need the following values when we iterate over an empty set.
    // The commented out code is left here to document that the omission
    // is intentional.
    //
    //_value = 0;
    //_words = NULL;
    //_blocks = NULL;
    //_set = NULL;
  } else {
    _current = 0;
    _value = 0;
    _next_block = 0;
    _next_word = IndexSet::words_per_block;

    _max_blocks = set->_max_blocks;
    _words = NULL;
    _blocks = set->_blocks;
    _set = set;
  }
}

//---------------------------- IndexSetIterator(const) -----------------------------
// Iterate over a constant IndexSet.

IndexSetIterator::IndexSetIterator(const IndexSet *set) {
#ifdef ASSERT
  if (CollectIndexSetStatistics) {
    set->tally_iteration_statistics();
  }
  // We don't call check_watch from here to avoid bad recursion.
  //   set->check_watch("traversed const", set->count());
#endif
  if (set->is_empty()) {
    _current = 0;
    _next_word = IndexSet::words_per_block;
    _next_block = 1;
    _max_blocks = 1;

    // We don't need the following values when we iterate over an empty set.
    // The commented out code is left here to document that the omission
    // is intentional.
    //
    //_value = 0;
    //_words = NULL;
    //_blocks = NULL;
    //_set = NULL;
  } else {
    _current = 0;
    _value = 0;
    _next_block = 0;
    _next_word = IndexSet::words_per_block;

    _max_blocks = set->_max_blocks;
    _words = NULL;
    _blocks = set->_blocks;
    _set = NULL;
  }
}

//---------------------------- List16Iterator::advance_and_next() -----------------------------
// Advance to the next non-empty word in the set being iterated over.  Return the next element
// if there is one.  If we are done, return 0.  This method is called from the next() method
// when it gets done with a word.

uint IndexSetIterator::advance_and_next() {
  // See if there is another non-empty word in the current block.
  for (uint wi = _next_word; wi < (unsigned)IndexSet::words_per_block; wi++) {
    if (_words[wi] != 0) {
      // Found a non-empty word.
      _value = ((_next_block - 1) * IndexSet::bits_per_block) + (wi * IndexSet::bits_per_word);
      _current = _words[wi];

      _next_word = wi+1;

      return next();
    }
  }

  // We ran out of words in the current block.  Advance to next non-empty block.
  for (uint bi = _next_block; bi < _max_blocks; bi++) {
    if (_blocks[bi] != &IndexSet::_empty_block) {
      // Found a non-empty block.

      _words = _blocks[bi]->words();
      for (uint wi = 0; wi < (unsigned)IndexSet::words_per_block; wi++) {
        if (_words[wi] != 0) {
          // Found a non-empty word.
          _value = (bi * IndexSet::bits_per_block) + (wi * IndexSet::bits_per_word);
          _current = _words[wi];

          _next_block = bi+1;
          _next_word = wi+1;

          return next();
        }
      }

      // All of the words in the block were empty.  Replace
      // the block with the empty block.
      if (_set) {
        _set->free_block(bi);
      }
    }
  }

  // These assignments make redundant calls to next on a finished iterator
  // faster.  Probably not necessary.
  _next_block = _max_blocks;
  _next_word = IndexSet::words_per_block;

  // No more words.
  return 0;
}
C:\hotspot-69087d08d473\src\share\vm/opto/indexSet.hpp
/*
 * Copyright (c) 1998, 2011, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 *
 */

#ifndef SHARE_VM_OPTO_INDEXSET_HPP
#define SHARE_VM_OPTO_INDEXSET_HPP

#include "memory/allocation.hpp"
#include "memory/resourceArea.hpp"
#include "opto/compile.hpp"
#include "opto/regmask.hpp"

// This file defines the IndexSet class, a set of sparse integer indices.
// This data structure is used by the compiler in its liveness analysis and
// during register allocation.

//-------------------------------- class IndexSet ----------------------------
// An IndexSet is a piece-wise bitvector.  At the top level, we have an array
// of pointers to bitvector chunks called BitBlocks.  Each BitBlock has a fixed
// size and is allocated from a shared free list.  The bits which are set in
// each BitBlock correspond to the elements of the set.

class IndexSet : public ResourceObj {
 friend class IndexSetIterator;

 public:
  // When we allocate an IndexSet, it starts off with an array of top level block
  // pointers of a set length.  This size is intended to be large enough for the
  // majority of IndexSets.  In the cases when this size is not large enough,
  // a separately allocated array is used.

  // The length of the preallocated top level block array
  enum { preallocated_block_list_size = 16 };

  // Elements of a IndexSet get decomposed into three fields.  The highest order
  // bits are the block index, which tell which high level block holds the element.
  // Within that block, the word index indicates which word holds the element.
  // Finally, the bit index determines which single bit within that word indicates
  // membership of the element in the set.

  // The lengths of the index bitfields
  enum { bit_index_length = 5,
         word_index_length = 3,
         block_index_length = 8 // not used
  };

  // Derived constants used for manipulating the index bitfields
  enum {
         bit_index_offset = 0, // not used
         word_index_offset = bit_index_length,
         block_index_offset = bit_index_length + word_index_length,

         bits_per_word = 1 << bit_index_length,
         words_per_block = 1 << word_index_length,
         bits_per_block = bits_per_word * words_per_block,

         bit_index_mask = right_n_bits(bit_index_length),
         word_index_mask = right_n_bits(word_index_length)
  };

  // These routines are used for extracting the block, word, and bit index
  // from an element.
  static uint get_block_index(uint element) {
    return element >> block_index_offset;
  }
  static uint get_word_index(uint element) {
    return mask_bits(element >> word_index_offset,word_index_mask);
  }
  static uint get_bit_index(uint element) {
    return mask_bits(element,bit_index_mask);
  }

  //------------------------------ class BitBlock ----------------------------
  // The BitBlock class is a segment of a bitvector set.

  class BitBlock : public ResourceObj {
   friend class IndexSetIterator;
   friend class IndexSet;

   private:
    // All of BitBlocks fields and methods are declared private.  We limit
    // access to IndexSet and IndexSetIterator.

    // A BitBlock is composed of some number of 32 bit words.  When a BitBlock
    // is not in use by any IndexSet, it is stored on a free list.  The next field
    // is used by IndexSet to mainting this free list.

    union {
      uint32 _words[words_per_block];
      BitBlock *_next;
    } _data;

    // accessors
    uint32 *words() { return _data._words; }
    void set_next(BitBlock *next) { _data._next = next; }
    BitBlock *next() { return _data._next; }

    // Operations.  A BitBlock supports four simple operations,
    // clear(), member(), insert(), and remove().  These methods do
    // not assume that the block index has been masked out.

    void clear() {
      memset(words(), 0, sizeof(uint32) * words_per_block);
    }

    bool member(uint element) {
      uint word_index = IndexSet::get_word_index(element);
      uint bit_index = IndexSet::get_bit_index(element);

      return ((words()[word_index] & (uint32)(0x1 << bit_index)) != 0);
    }

    bool insert(uint element) {
      uint word_index = IndexSet::get_word_index(element);
      uint bit_index = IndexSet::get_bit_index(element);

      uint32 bit = (0x1 << bit_index);
      uint32 before = words()[word_index];
      words()[word_index] = before | bit;
      return ((before & bit) != 0);
    }

    bool remove(uint element) {
      uint word_index = IndexSet::get_word_index(element);
      uint bit_index = IndexSet::get_bit_index(element);

      uint32 bit = (0x1 << bit_index);
      uint32 before = words()[word_index];
      words()[word_index] = before & ~bit;
      return ((before & bit) != 0);
    }
  };

  //-------------------------- BitBlock allocation ---------------------------
 private:

  // All IndexSets share an arena from which they allocate BitBlocks.  Unused
  // BitBlocks are placed on a free list.

  // The number of BitBlocks to allocate at a time
  enum { bitblock_alloc_chunk_size = 50 };

  static Arena *arena() { return Compile::current()->indexSet_arena(); }

  static void populate_free_list();

 public:

  // Invalidate the current free BitBlock list and begin allocation
  // from a new arena.  It is essential that this method is called whenever
  // the Arena being used for BitBlock allocation is reset.
  static void reset_memory(Compile* compile, Arena *arena) {
    compile->set_indexSet_free_block_list(NULL);
    compile->set_indexSet_arena(arena);

   // This should probably be done in a static initializer
   _empty_block.clear();
  }

 private:
  friend class BitBlock;
  // A distinguished BitBlock which always remains empty.  When a new IndexSet is
  // created, all of its top level BitBlock pointers are initialized to point to
  // this.
  static BitBlock _empty_block;

  //-------------------------- Members ------------------------------------------

  // The number of elements in the set
  uint      _count;

  // Our top level array of bitvector segments
  BitBlock **_blocks;

  BitBlock  *_preallocated_block_list[preallocated_block_list_size];

  // The number of top level array entries in use
  uint       _max_blocks;

  // Our assertions need to know the maximum number allowed in the set
#ifdef ASSERT
  uint       _max_elements;
#endif

  // The next IndexSet on the free list (not used at same time as count)
  IndexSet *_next;

 public:
  //-------------------------- Free list operations ------------------------------
  // Individual IndexSets can be placed on a free list.  This is done in PhaseLive.

  IndexSet *next() {
#ifdef ASSERT
    if( VerifyOpto ) {
      check_watch("removed from free list?", ((_next == NULL) ? 0 : _next->_serial_number));
    }
#endif
    return _next;
  }

  void set_next(IndexSet *next) {
#ifdef ASSERT
    if( VerifyOpto ) {
      check_watch("put on free list?", ((next == NULL) ? 0 : next->_serial_number));
    }
#endif
    _next = next;
  }

 private:
  //-------------------------- Utility methods -----------------------------------

  // Get the block which holds element
  BitBlock *get_block_containing(uint element) const {
    assert(element < _max_elements, "element out of bounds");
    return _blocks[get_block_index(element)];
  }

  // Set a block in the top level array
  void set_block(uint index, BitBlock *block) {
#ifdef ASSERT
    if( VerifyOpto )
      check_watch("set block", index);
#endif
    _blocks[index] = block;
  }

  // Get a BitBlock from the free list
  BitBlock *alloc_block();

  // Get a BitBlock from the free list and place it in the top level array
  BitBlock *alloc_block_containing(uint element);

  // Free a block from the top level array, placing it on the free BitBlock list
  void free_block(uint i);

 public:
  //-------------------------- Primitive set operations --------------------------

  void clear() {
#ifdef ASSERT
    if( VerifyOpto )
      check_watch("clear");
#endif
    _count = 0;
    for (uint i = 0; i < _max_blocks; i++) {
      BitBlock *block = _blocks[i];
      if (block != &_empty_block) {
        free_block(i);
      }
    }
  }

  uint count() const { return _count; }

  bool is_empty() const { return _count == 0; }

  bool member(uint element) const {
    return get_block_containing(element)->member(element);
  }

  bool insert(uint element) {
#ifdef ASSERT
    if( VerifyOpto )
      check_watch("insert", element);
#endif
    if (element == 0) {
      return 0;
    }
    BitBlock *block = get_block_containing(element);
    if (block == &_empty_block) {
      block = alloc_block_containing(element);
    }
    bool present = block->insert(element);
    if (!present) {
      _count++;
    }
    return !present;
  }

  bool remove(uint element) {
#ifdef ASSERT
    if( VerifyOpto )
      check_watch("remove", element);
#endif

    BitBlock *block = get_block_containing(element);
    bool present = block->remove(element);
    if (present) {
      _count--;
    }
    return present;
  }

  //-------------------------- Compound set operations ------------------------
  // Compute the union of all elements of one and two which interfere
  // with the RegMask mask.  If the degree of the union becomes
  // exceeds fail_degree, the union bails out.  The underlying set is
  // cleared before the union is performed.
  uint lrg_union(uint lr1, uint lr2,
                 const uint fail_degree,
                 const class PhaseIFG *ifg,
                 const RegMask &mask);


  //------------------------- Construction, initialization -----------------------

  IndexSet() {}

  // This constructor is used for making a deep copy of a IndexSet.
  IndexSet(IndexSet *set);

  // Perform initialization on a IndexSet
  void initialize(uint max_element);

  // Initialize a IndexSet.  If the top level BitBlock array needs to be
  // allocated, do it from the proffered arena.  BitBlocks are still allocated
  // from the static Arena member.
  void initialize(uint max_element, Arena *arena);

  // Exchange two sets
  void swap(IndexSet *set);

  //-------------------------- Debugging and statistics --------------------------

#ifndef PRODUCT
  // Output a IndexSet for debugging
  void dump() const;
#endif

#ifdef ASSERT
  void tally_iteration_statistics() const;

  // BitBlock allocation statistics
  static julong _alloc_new;
  static julong _alloc_total;

  // Block density statistics
  static julong _total_bits;
  static julong _total_used_blocks;
  static julong _total_unused_blocks;

  // Sanity tests
  void verify() const;

  static int _serial_count;
  int        _serial_number;

  // Check to see if the serial number of the current set is the one we're tracing.
  // If it is, print a message.
  void check_watch(const char *operation, uint operand) const {
    if (IndexSetWatch != 0) {
      if (IndexSetWatch == -1 || _serial_number == IndexSetWatch) {
        tty->print_cr("IndexSet %d : %s ( %d )", _serial_number, operation, operand);
      }
    }
  }
  void check_watch(const char *operation) const {
    if (IndexSetWatch != 0) {
      if (IndexSetWatch == -1 || _serial_number == IndexSetWatch) {
        tty->print_cr("IndexSet %d : %s", _serial_number, operation);
      }
    }
  }

 public:
  static void print_statistics();

#endif
};


//-------------------------------- class IndexSetIterator --------------------
// An iterator for IndexSets.

class IndexSetIterator VALUE_OBJ_CLASS_SPEC {
 friend class IndexSet;

 public:

  // We walk over the bits in a word in chunks of size window_size.
  enum { window_size = 5,
         window_mask = right_n_bits(window_size),
         table_size  = (1 << window_size) };

  // For an integer of length window_size, what is the first set bit?
  static const byte _first_bit[table_size];

  // For an integer of length window_size, what is the second set bit?
  static const byte _second_bit[table_size];

 private:
  // The current word we are inspecting
  uint32                _current;

  // What element number are we currently on?
  uint                  _value;

  // The index of the next word we will inspect
  uint                  _next_word;

  // A pointer to the contents of the current block
  uint32               *_words;

  // The index of the next block we will inspect
  uint                  _next_block;

  // A pointer to the blocks in our set
  IndexSet::BitBlock **_blocks;

  // The number of blocks in the set
  uint                  _max_blocks;

  // If the iterator was created from a non-const set, we replace
  // non-canonical empty blocks with the _empty_block pointer.  If
  // _set is NULL, we do no replacement.
  IndexSet            *_set;

  // Advance to the next non-empty word and return the next
  // element in the set.
  uint advance_and_next();


 public:

  // If an iterator is built from a constant set then empty blocks
  // are not canonicalized.
  IndexSetIterator(IndexSet *set);
  IndexSetIterator(const IndexSet *set);

  // Return the next element of the set.  Return 0 when done.
  uint next() {
    uint current = _current;
    if (current != 0) {
      uint value = _value;
      while (mask_bits(current,window_mask) == 0) {
        current >>= window_size;
        value += window_size;
      }

      uint advance = _second_bit[mask_bits(current,window_mask)];
      _current = current >> advance;
      _value = value + advance;
      return value + _first_bit[mask_bits(current,window_mask)];
    } else {
      return advance_and_next();
    }
  }
};

#endif // SHARE_VM_OPTO_INDEXSET_HPP
C:\hotspot-69087d08d473\src\share\vm/opto/lcm.cpp
/*
 * Copyright (c) 1998, 2016, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 *
 */

#include "precompiled.hpp"
#include "memory/allocation.inline.hpp"
#include "opto/block.hpp"
#include "opto/c2compiler.hpp"
#include "opto/callnode.hpp"
#include "opto/cfgnode.hpp"
#include "opto/machnode.hpp"
#include "opto/runtime.hpp"
#if defined AD_MD_HPP
# include AD_MD_HPP
#elif defined TARGET_ARCH_MODEL_x86_32
# include "adfiles/ad_x86_32.hpp"
#elif defined TARGET_ARCH_MODEL_x86_64
# include "adfiles/ad_x86_64.hpp"
#elif defined TARGET_ARCH_MODEL_aarch64
# include "adfiles/ad_aarch64.hpp"
#elif defined TARGET_ARCH_MODEL_sparc
# include "adfiles/ad_sparc.hpp"
#elif defined TARGET_ARCH_MODEL_zero
# include "adfiles/ad_zero.hpp"
#elif defined TARGET_ARCH_MODEL_ppc_64
# include "adfiles/ad_ppc_64.hpp"
#endif

// Optimization - Graph Style

// Check whether val is not-null-decoded compressed oop,
// i.e. will grab into the base of the heap if it represents NULL.
static bool accesses_heap_base_zone(Node *val) {
  if (Universe::narrow_oop_base() != NULL) { // Implies UseCompressedOops.
    if (val && val->is_Mach()) {
      if (val->as_Mach()->ideal_Opcode() == Op_DecodeN) {
        // This assumes all Decodes with TypePtr::NotNull are matched to nodes that
        // decode NULL to point to the heap base (Decode_NN).
        if (val->bottom_type()->is_oopptr()->ptr() == TypePtr::NotNull) {
          return true;
        }
      }
      // Must recognize load operation with Decode matched in memory operand.
      // We should not reach here exept for PPC/AIX, as os::zero_page_read_protected()
      // returns true everywhere else. On PPC, no such memory operands
      // exist, therefore we did not yet implement a check for such operands.
      NOT_AIX(Unimplemented());
    }
  }
  return false;
}

static bool needs_explicit_null_check_for_read(Node *val) {
  // On some OSes (AIX) the page at address 0 is only write protected.
  // If so, only Store operations will trap.
  if (os::zero_page_read_protected()) {
    return false;  // Implicit null check will work.
  }
  // Also a read accessing the base of a heap-based compressed heap will trap.
  if (accesses_heap_base_zone(val) &&                    // Hits the base zone page.
      Universe::narrow_oop_use_implicit_null_checks()) { // Base zone page is protected.
    return false;
  }

  return true;
}

//------------------------------implicit_null_check----------------------------
// Detect implicit-null-check opportunities.  Basically, find NULL checks
// with suitable memory ops nearby.  Use the memory op to do the NULL check.
// I can generate a memory op if there is not one nearby.
// The proj is the control projection for the not-null case.
// The val is the pointer being checked for nullness or
// decodeHeapOop_not_null node if it did not fold into address.
void PhaseCFG::implicit_null_check(Block* block, Node *proj, Node *val, int allowed_reasons) {
  // Assume if null check need for 0 offset then always needed
  // Intel solaris doesn't support any null checks yet and no
  // mechanism exists (yet) to set the switches at an os_cpu level
  if( !ImplicitNullChecks || MacroAssembler::needs_explicit_null_check(0)) return;

  // Make sure the ptr-is-null path appears to be uncommon!
  float f = block->end()->as_MachIf()->_prob;
  if( proj->Opcode() == Op_IfTrue ) f = 1.0f - f;
  if( f > PROB_UNLIKELY_MAG(4) ) return;

  uint bidx = 0;                // Capture index of value into memop
  bool was_store;               // Memory op is a store op

  // Get the successor block for if the test ptr is non-null
  Block* not_null_block;  // this one goes with the proj
  Block* null_block;
  if (block->get_node(block->number_of_nodes()-1) == proj) {
    null_block     = block->_succs[0];
    not_null_block = block->_succs[1];
  } else {
    assert(block->get_node(block->number_of_nodes()-2) == proj, "proj is one or the other");
    not_null_block = block->_succs[0];
    null_block     = block->_succs[1];
  }
  while (null_block->is_Empty() == Block::empty_with_goto) {
    null_block     = null_block->_succs[0];
  }

  // Search the exception block for an uncommon trap.
  // (See Parse::do_if and Parse::do_ifnull for the reason
  // we need an uncommon trap.  Briefly, we need a way to
  // detect failure of this optimization, as in 6366351.)
  {
    bool found_trap = false;
    for (uint i1 = 0; i1 < null_block->number_of_nodes(); i1++) {
      Node* nn = null_block->get_node(i1);
      if (nn->is_MachCall() &&
          nn->as_MachCall()->entry_point() == SharedRuntime::uncommon_trap_blob()->entry_point()) {
        const Type* trtype = nn->in(TypeFunc::Parms)->bottom_type();
        if (trtype->isa_int() && trtype->is_int()->is_con()) {
          jint tr_con = trtype->is_int()->get_con();
          Deoptimization::DeoptReason reason = Deoptimization::trap_request_reason(tr_con);
          Deoptimization::DeoptAction action = Deoptimization::trap_request_action(tr_con);
          assert((int)reason < (int)BitsPerInt, "recode bit map");
          if (is_set_nth_bit(allowed_reasons, (int) reason)
              && action != Deoptimization::Action_none) {
            // This uncommon trap is sure to recompile, eventually.
            // When that happens, C->too_many_traps will prevent
            // this transformation from happening again.
            found_trap = true;
          }
        }
        break;
      }
    }
    if (!found_trap) {
      // We did not find an uncommon trap.
      return;
    }
  }

  // Check for decodeHeapOop_not_null node which did not fold into address
  bool is_decoden = ((intptr_t)val) & 1;
  val = (Node*)(((intptr_t)val) & ~1);

  assert(!is_decoden || (val->in(0) == NULL) && val->is_Mach() &&
         (val->as_Mach()->ideal_Opcode() == Op_DecodeN), "sanity");

  // Search the successor block for a load or store who's base value is also
  // the tested value.  There may be several.
  Node_List *out = new Node_List(Thread::current()->resource_area());
  MachNode *best = NULL;        // Best found so far
  for (DUIterator i = val->outs(); val->has_out(i); i++) {
    Node *m = val->out(i);
    if( !m->is_Mach() ) continue;
    MachNode *mach = m->as_Mach();
    was_store = false;
    int iop = mach->ideal_Opcode();
    switch( iop ) {
    case Op_LoadB:
    case Op_LoadUB:
    case Op_LoadUS:
    case Op_LoadD:
    case Op_LoadF:
    case Op_LoadI:
    case Op_LoadL:
    case Op_LoadP:
    case Op_LoadN:
    case Op_LoadS:
    case Op_LoadKlass:
    case Op_LoadNKlass:
    case Op_LoadRange:
    case Op_LoadD_unaligned:
    case Op_LoadL_unaligned:
      assert(mach->in(2) == val, "should be address");
      break;
    case Op_StoreB:
    case Op_StoreC:
    case Op_StoreCM:
    case Op_StoreD:
    case Op_StoreF:
    case Op_StoreI:
    case Op_StoreL:
    case Op_StoreP:
    case Op_StoreN:
    case Op_StoreNKlass:
      was_store = true;         // Memory op is a store op
      // Stores will have their address in slot 2 (memory in slot 1).
      // If the value being nul-checked is in another slot, it means we
      // are storing the checked value, which does NOT check the value!
      if( mach->in(2) != val ) continue;
      break;                    // Found a memory op?
    case Op_StrComp:
    case Op_StrEquals:
    case Op_StrIndexOf:
    case Op_AryEq:
    case Op_EncodeISOArray:
      // Not a legit memory op for implicit null check regardless of
      // embedded loads
      continue;
    default:                    // Also check for embedded loads
      if( !mach->needs_anti_dependence_check() )
        continue;               // Not an memory op; skip it
      if( must_clone[iop] ) {
        // Do not move nodes which produce flags because
        // RA will try to clone it to place near branch and
        // it will cause recompilation, see clone_node().
        continue;
      }
      {
        // Check that value is used in memory address in
        // instructions with embedded load (CmpP val1,(val2+off)).
        Node* base;
        Node* index;
        const MachOper* oper = mach->memory_inputs(base, index);
        if (oper == NULL || oper == (MachOper*)-1) {
          continue;             // Not an memory op; skip it
        }
        if (val == base ||
            val == index && val->bottom_type()->isa_narrowoop()) {
          break;                // Found it
        } else {
          continue;             // Skip it
        }
      }
      break;
    }

    // On some OSes (AIX) the page at address 0 is only write protected.
    // If so, only Store operations will trap.
    // But a read accessing the base of a heap-based compressed heap will trap.
    if (!was_store && needs_explicit_null_check_for_read(val)) {
      continue;
    }

    // Check that node's control edge is not-null block's head or dominates it,
    // otherwise we can't hoist it because there are other control dependencies.
    Node* ctrl = mach->in(0);
    if (ctrl != NULL && !(ctrl == not_null_block->head() ||
        get_block_for_node(ctrl)->dominates(not_null_block))) {
      continue;
    }

    // check if the offset is not too high for implicit exception
    {
      intptr_t offset = 0;
      const TypePtr *adr_type = NULL;  // Do not need this return value here
      const Node* base = mach->get_base_and_disp(offset, adr_type);
      if (base == NULL || base == NodeSentinel) {
        // Narrow oop address doesn't have base, only index
        if( val->bottom_type()->isa_narrowoop() &&
            MacroAssembler::needs_explicit_null_check(offset) )
          continue;             // Give up if offset is beyond page size
        // cannot reason about it; is probably not implicit null exception
      } else {
        const TypePtr* tptr;
        if (UseCompressedOops && (Universe::narrow_oop_shift() == 0 ||
                                  Universe::narrow_klass_shift() == 0)) {
          // 32-bits narrow oop can be the base of address expressions
          tptr = base->get_ptr_type();
        } else {
          // only regular oops are expected here
          tptr = base->bottom_type()->is_ptr();
        }
        // Give up if offset is not a compile-time constant
        if( offset == Type::OffsetBot || tptr->_offset == Type::OffsetBot )
          continue;
        offset += tptr->_offset; // correct if base is offseted
        if( MacroAssembler::needs_explicit_null_check(offset) )
          continue;             // Give up is reference is beyond 4K page size
      }
    }

    // Check ctrl input to see if the null-check dominates the memory op
    Block *cb = get_block_for_node(mach);
    cb = cb->_idom;             // Always hoist at least 1 block
    if( !was_store ) {          // Stores can be hoisted only one block
      while( cb->_dom_depth > (block->_dom_depth + 1))
        cb = cb->_idom;         // Hoist loads as far as we want
      // The non-null-block should dominate the memory op, too. Live
      // range spilling will insert a spill in the non-null-block if it is
      // needs to spill the memory op for an implicit null check.
      if (cb->_dom_depth == (block->_dom_depth + 1)) {
        if (cb != not_null_block) continue;
        cb = cb->_idom;
      }
    }
    if( cb != block ) continue;

    // Found a memory user; see if it can be hoisted to check-block
    uint vidx = 0;              // Capture index of value into memop
    uint j;
    for( j = mach->req()-1; j > 0; j-- ) {
      if( mach->in(j) == val ) {
        vidx = j;
        // Ignore DecodeN val which could be hoisted to where needed.
        if( is_decoden ) continue;
      }
      // Block of memory-op input
      Block *inb = get_block_for_node(mach->in(j));
      Block *b = block;          // Start from nul check
      while( b != inb && b->_dom_depth > inb->_dom_depth )
        b = b->_idom;           // search upwards for input
      // See if input dominates null check
      if( b != inb )
        break;
    }
    if( j > 0 )
      continue;
    Block *mb = get_block_for_node(mach);
    // Hoisting stores requires more checks for the anti-dependence case.
    // Give up hoisting if we have to move the store past any load.
    if( was_store ) {
      Block *b = mb;            // Start searching here for a local load
      // mach use (faulting) trying to hoist
      // n might be blocker to hoisting
      while( b != block ) {
        uint k;
        for( k = 1; k < b->number_of_nodes(); k++ ) {
          Node *n = b->get_node(k);
          if( n->needs_anti_dependence_check() &&
              n->in(LoadNode::Memory) == mach->in(StoreNode::Memory) )
            break;              // Found anti-dependent load
        }
        if( k < b->number_of_nodes() )
          break;                // Found anti-dependent load
        // Make sure control does not do a merge (would have to check allpaths)
        if( b->num_preds() != 2 ) break;
        b = get_block_for_node(b->pred(1)); // Move up to predecessor block
      }
      if( b != block ) continue;
    }

    // Make sure this memory op is not already being used for a NullCheck
    Node *e = mb->end();
    if( e->is_MachNullCheck() && e->in(1) == mach )
      continue;                 // Already being used as a NULL check

    // Found a candidate!  Pick one with least dom depth - the highest
    // in the dom tree should be closest to the null check.
    if (best == NULL || get_block_for_node(mach)->_dom_depth < get_block_for_node(best)->_dom_depth) {
      best = mach;
      bidx = vidx;
    }
  }
  // No candidate!
  if (best == NULL) {
    return;
  }

  // ---- Found an implicit null check
  extern int implicit_null_checks;
  implicit_null_checks++;

  if( is_decoden ) {
    // Check if we need to hoist decodeHeapOop_not_null first.
    Block *valb = get_block_for_node(val);
    if( block != valb && block->_dom_depth < valb->_dom_depth ) {
      // Hoist it up to the end of the test block.
      valb->find_remove(val);
      block->add_inst(val);
      map_node_to_block(val, block);
      // DecodeN on x86 may kill flags. Check for flag-killing projections
      // that also need to be hoisted.
      for (DUIterator_Fast jmax, j = val->fast_outs(jmax); j < jmax; j++) {
        Node* n = val->fast_out(j);
        if( n->is_MachProj() ) {
          get_block_for_node(n)->find_remove(n);
          block->add_inst(n);
          map_node_to_block(n, block);
        }
      }
    }
  }
  // Hoist the memory candidate up to the end of the test block.
  Block *old_block = get_block_for_node(best);
  old_block->find_remove(best);
  block->add_inst(best);
  map_node_to_block(best, block);

  // Move the control dependence if it is pinned to not-null block.
  // Don't change it in other cases: NULL or dominating control.
  if (best->in(0) == not_null_block->head()) {
    // Set it to control edge of null check.
    best->set_req(0, proj->in(0)->in(0));
  }

  // Check for flag-killing projections that also need to be hoisted
  // Should be DU safe because no edge updates.
  for (DUIterator_Fast jmax, j = best->fast_outs(jmax); j < jmax; j++) {
    Node* n = best->fast_out(j);
    if( n->is_MachProj() ) {
      get_block_for_node(n)->find_remove(n);
      block->add_inst(n);
      map_node_to_block(n, block);
    }
  }

  // proj==Op_True --> ne test; proj==Op_False --> eq test.
  // One of two graph shapes got matched:
  //   (IfTrue  (If (Bool NE (CmpP ptr NULL))))
  //   (IfFalse (If (Bool EQ (CmpP ptr NULL))))
  // NULL checks are always branch-if-eq.  If we see a IfTrue projection
  // then we are replacing a 'ne' test with a 'eq' NULL check test.
  // We need to flip the projections to keep the same semantics.
  if( proj->Opcode() == Op_IfTrue ) {
    // Swap order of projections in basic block to swap branch targets
    Node *tmp1 = block->get_node(block->end_idx()+1);
    Node *tmp2 = block->get_node(block->end_idx()+2);
    block->map_node(tmp2, block->end_idx()+1);
    block->map_node(tmp1, block->end_idx()+2);
    Node *tmp = new (C) Node(C->top()); // Use not NULL input
    tmp1->replace_by(tmp);
    tmp2->replace_by(tmp1);
    tmp->replace_by(tmp2);
    tmp->destruct();
  }

  // Remove the existing null check; use a new implicit null check instead.
  // Since schedule-local needs precise def-use info, we need to correct
  // it as well.
  Node *old_tst = proj->in(0);
  MachNode *nul_chk = new (C) MachNullCheckNode(old_tst->in(0),best,bidx);
  block->map_node(nul_chk, block->end_idx());
  map_node_to_block(nul_chk, block);
  // Redirect users of old_test to nul_chk
  for (DUIterator_Last i2min, i2 = old_tst->last_outs(i2min); i2 >= i2min; --i2)
    old_tst->last_out(i2)->set_req(0, nul_chk);
  // Clean-up any dead code
  for (uint i3 = 0; i3 < old_tst->req(); i3++) {
    Node* in = old_tst->in(i3);
    old_tst->set_req(i3, NULL);
    if (in->outcnt() == 0) {
      // Remove dead input node
      in->disconnect_inputs(NULL, C);
      block->find_remove(in);
    }
  }

  latency_from_uses(nul_chk);
  latency_from_uses(best);

  // insert anti-dependences to defs in this block
  if (! best->needs_anti_dependence_check()) {
    for (uint k = 1; k < block->number_of_nodes(); k++) {
      Node *n = block->get_node(k);
      if (n->needs_anti_dependence_check() &&
          n->in(LoadNode::Memory) == best->in(StoreNode::Memory)) {
        // Found anti-dependent load
        insert_anti_dependences(block, n);
      }
    }
  }
}


//------------------------------select-----------------------------------------
// Select a nice fellow from the worklist to schedule next. If there is only
// one choice, then use it. Projections take top priority for correctness
// reasons - if I see a projection, then it is next.  There are a number of
// other special cases, for instructions that consume condition codes, et al.
// These are chosen immediately. Some instructions are required to immediately
// precede the last instruction in the block, and these are taken last. Of the
// remaining cases (most), choose the instruction with the greatest latency
// (that is, the most number of pseudo-cycles required to the end of the
// routine). If there is a tie, choose the instruction with the most inputs.
Node* PhaseCFG::select(Block* block, Node_List &worklist, GrowableArray<int> &ready_cnt, VectorSet &next_call, uint sched_slot) {

  // If only a single entry on the stack, use it
  uint cnt = worklist.size();
  if (cnt == 1) {
    Node *n = worklist[0];
    worklist.map(0,worklist.pop());
    return n;
  }

  uint choice  = 0; // Bigger is most important
  uint latency = 0; // Bigger is scheduled first
  uint score   = 0; // Bigger is better
  int idx = -1;     // Index in worklist
  int cand_cnt = 0; // Candidate count

  for( uint i=0; i<cnt; i++ ) { // Inspect entire worklist
    // Order in worklist is used to break ties.
    // See caller for how this is used to delay scheduling
    // of induction variable increments to after the other
    // uses of the phi are scheduled.
    Node *n = worklist[i];      // Get Node on worklist

    int iop = n->is_Mach() ? n->as_Mach()->ideal_Opcode() : 0;
    if( n->is_Proj() ||         // Projections always win
        n->Opcode()== Op_Con || // So does constant 'Top'
        iop == Op_CreateEx ||   // Create-exception must start block
        iop == Op_CheckCastPP
        ) {
      worklist.map(i,worklist.pop());
      return n;
    }

    // Final call in a block must be adjacent to 'catch'
    Node *e = block->end();
    if( e->is_Catch() && e->in(0)->in(0) == n )
      continue;

    // Memory op for an implicit null check has to be at the end of the block
    if( e->is_MachNullCheck() && e->in(1) == n )
      continue;

    // Schedule IV increment last.
    if (e->is_Mach() && e->as_Mach()->ideal_Opcode() == Op_CountedLoopEnd &&
        e->in(1)->in(1) == n && n->is_iteratively_computed())
      continue;

    uint n_choice  = 2;

    // See if this instruction is consumed by a branch. If so, then (as the
    // branch is the last instruction in the basic block) force it to the
    // end of the basic block
    if ( must_clone[iop] ) {
      // See if any use is a branch
      bool found_machif = false;

      for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
        Node* use = n->fast_out(j);

        // The use is a conditional branch, make them adjacent
        if (use->is_MachIf() && get_block_for_node(use) == block) {
          found_machif = true;
          break;
        }

        // More than this instruction pending for successor to be ready,
        // don't choose this if other opportunities are ready
        if (ready_cnt.at(use->_idx) > 1)
          n_choice = 1;
      }

      // loop terminated, prefer not to use this instruction
      if (found_machif)
        continue;
    }

    // See if this has a predecessor that is "must_clone", i.e. sets the
    // condition code. If so, choose this first
    for (uint j = 0; j < n->req() ; j++) {
      Node *inn = n->in(j);
      if (inn) {
        if (inn->is_Mach() && must_clone[inn->as_Mach()->ideal_Opcode()] ) {
          n_choice = 3;
          break;
        }
      }
    }

    // MachTemps should be scheduled last so they are near their uses
    if (n->is_MachTemp()) {
      n_choice = 1;
    }

    uint n_latency = get_latency_for_node(n);
    uint n_score   = n->req();   // Many inputs get high score to break ties

    // Keep best latency found
    cand_cnt++;
    if (choice < n_choice ||
        (choice == n_choice &&
         ((StressLCM && Compile::randomized_select(cand_cnt)) ||
          (!StressLCM &&
           (latency < n_latency ||
            (latency == n_latency &&
             (score < n_score))))))) {
      choice  = n_choice;
      latency = n_latency;
      score   = n_score;
      idx     = i;               // Also keep index in worklist
    }
  } // End of for all ready nodes in worklist

  assert(idx >= 0, "index should be set");
  Node *n = worklist[(uint)idx];      // Get the winner

  worklist.map((uint)idx, worklist.pop());     // Compress worklist
  return n;
}


//------------------------------set_next_call----------------------------------
void PhaseCFG::set_next_call(Block* block, Node* n, VectorSet& next_call) {
  if( next_call.test_set(n->_idx) ) return;
  for( uint i=0; i<n->len(); i++ ) {
    Node *m = n->in(i);
    if( !m ) continue;  // must see all nodes in block that precede call
    if (get_block_for_node(m) == block) {
      set_next_call(block, m, next_call);
    }
  }
}

//------------------------------needed_for_next_call---------------------------
// Set the flag 'next_call' for each Node that is needed for the next call to
// be scheduled.  This flag lets me bias scheduling so Nodes needed for the
// next subroutine call get priority - basically it moves things NOT needed
// for the next call till after the call.  This prevents me from trying to
// carry lots of stuff live across a call.
void PhaseCFG::needed_for_next_call(Block* block, Node* this_call, VectorSet& next_call) {
  // Find the next control-defining Node in this block
  Node* call = NULL;
  for (DUIterator_Fast imax, i = this_call->fast_outs(imax); i < imax; i++) {
    Node* m = this_call->fast_out(i);
    if (get_block_for_node(m) == block && // Local-block user
        m != this_call &&       // Not self-start node
        m->is_MachCall()) {
      call = m;
      break;
    }
  }
  if (call == NULL)  return;    // No next call (e.g., block end is near)
  // Set next-call for all inputs to this call
  set_next_call(block, call, next_call);
}

//------------------------------add_call_kills-------------------------------------
// helper function that adds caller save registers to MachProjNode
static void add_call_kills(MachProjNode *proj, RegMask& regs, const char* save_policy, bool exclude_soe) {
  // Fill in the kill mask for the call
  for( OptoReg::Name r = OptoReg::Name(0); r < _last_Mach_Reg; r=OptoReg::add(r,1) ) {
    if( !regs.Member(r) ) {     // Not already defined by the call
      // Save-on-call register?
      if ((save_policy[r] == 'C') ||
          (save_policy[r] == 'A') ||
          ((save_policy[r] == 'E') && exclude_soe)) {
        proj->_rout.Insert(r);
      }
    }
  }
}


//------------------------------sched_call-------------------------------------
uint PhaseCFG::sched_call(Block* block, uint node_cnt, Node_List& worklist, GrowableArray<int>& ready_cnt, MachCallNode* mcall, VectorSet& next_call) {
  RegMask regs;

  // Schedule all the users of the call right now.  All the users are
  // projection Nodes, so they must be scheduled next to the call.
  // Collect all the defined registers.
  for (DUIterator_Fast imax, i = mcall->fast_outs(imax); i < imax; i++) {
    Node* n = mcall->fast_out(i);
    assert( n->is_MachProj(), "" );
    int n_cnt = ready_cnt.at(n->_idx)-1;
    ready_cnt.at_put(n->_idx, n_cnt);
    assert( n_cnt == 0, "" );
    // Schedule next to call
    block->map_node(n, node_cnt++);
    // Collect defined registers
    regs.OR(n->out_RegMask());
    // Check for scheduling the next control-definer
    if( n->bottom_type() == Type::CONTROL )
      // Warm up next pile of heuristic bits
      needed_for_next_call(block, n, next_call);

    // Children of projections are now all ready
    for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
      Node* m = n->fast_out(j); // Get user
      if(get_block_for_node(m) != block) {
        continue;
      }
      if( m->is_Phi() ) continue;
      int m_cnt = ready_cnt.at(m->_idx)-1;
      ready_cnt.at_put(m->_idx, m_cnt);
      if( m_cnt == 0 )
        worklist.push(m);
    }

  }

  // Act as if the call defines the Frame Pointer.
  // Certainly the FP is alive and well after the call.
  regs.Insert(_matcher.c_frame_pointer());

  // Set all registers killed and not already defined by the call.
  uint r_cnt = mcall->tf()->range()->cnt();
  int op = mcall->ideal_Opcode();
  MachProjNode *proj = new (C) MachProjNode( mcall, r_cnt+1, RegMask::Empty, MachProjNode::fat_proj );
  map_node_to_block(proj, block);
  block->insert_node(proj, node_cnt++);

  // Select the right register save policy.
  const char *save_policy = NULL;
  switch (op) {
    case Op_CallRuntime:
    case Op_CallLeaf:
    case Op_CallLeafNoFP:
      // Calling C code so use C calling convention
      save_policy = _matcher._c_reg_save_policy;
      break;

    case Op_CallStaticJava:
    case Op_CallDynamicJava:
      // Calling Java code so use Java calling convention
      save_policy = _matcher._register_save_policy;
      break;

    default:
      ShouldNotReachHere();
  }

  // When using CallRuntime mark SOE registers as killed by the call
  // so values that could show up in the RegisterMap aren't live in a
  // callee saved register since the register wouldn't know where to
  // find them.  CallLeaf and CallLeafNoFP are ok because they can't
  // have debug info on them.  Strictly speaking this only needs to be
  // done for oops since idealreg2debugmask takes care of debug info
  // references but there no way to handle oops differently than other
  // pointers as far as the kill mask goes.
  bool exclude_soe = op == Op_CallRuntime;

  // If the call is a MethodHandle invoke, we need to exclude the
  // register which is used to save the SP value over MH invokes from
  // the mask.  Otherwise this register could be used for
  // deoptimization information.
  if (op == Op_CallStaticJava) {
    MachCallStaticJavaNode* mcallstaticjava = (MachCallStaticJavaNode*) mcall;
    if (mcallstaticjava->_method_handle_invoke)
      proj->_rout.OR(Matcher::method_handle_invoke_SP_save_mask());
  }

  add_call_kills(proj, regs, save_policy, exclude_soe);

  return node_cnt;
}


//------------------------------schedule_local---------------------------------
// Topological sort within a block.  Someday become a real scheduler.
bool PhaseCFG::schedule_local(Block* block, GrowableArray<int>& ready_cnt, VectorSet& next_call) {
  // Already "sorted" are the block start Node (as the first entry), and
  // the block-ending Node and any trailing control projections.  We leave
  // these alone.  PhiNodes and ParmNodes are made to follow the block start
  // Node.  Everything else gets topo-sorted.

#ifndef PRODUCT
    if (trace_opto_pipelining()) {
      tty->print_cr("# --- schedule_local B%d, before: ---", block->_pre_order);
      for (uint i = 0;i < block->number_of_nodes(); i++) {
        tty->print("# ");
        block->get_node(i)->fast_dump();
      }
      tty->print_cr("#");
    }
#endif

  // RootNode is already sorted
  if (block->number_of_nodes() == 1) {
    return true;
  }

  // Move PhiNodes and ParmNodes from 1 to cnt up to the start
  uint node_cnt = block->end_idx();
  uint phi_cnt = 1;
  uint i;
  for( i = 1; i<node_cnt; i++ ) { // Scan for Phi
    Node *n = block->get_node(i);
    if( n->is_Phi() ||          // Found a PhiNode or ParmNode
        (n->is_Proj()  && n->in(0) == block->head()) ) {
      // Move guy at 'phi_cnt' to the end; makes a hole at phi_cnt
      block->map_node(block->get_node(phi_cnt), i);
      block->map_node(n, phi_cnt++);  // swap Phi/Parm up front
    } else {                    // All others
      // Count block-local inputs to 'n'
      uint cnt = n->len();      // Input count
      uint local = 0;
      for( uint j=0; j<cnt; j++ ) {
        Node *m = n->in(j);
        if( m && get_block_for_node(m) == block && !m->is_top() )
          local++;              // One more block-local input
      }
      ready_cnt.at_put(n->_idx, local); // Count em up

#ifdef ASSERT
      if( UseConcMarkSweepGC || UseG1GC ) {
        if( n->is_Mach() && n->as_Mach()->ideal_Opcode() == Op_StoreCM ) {
          // Check the precedence edges
          for (uint prec = n->req(); prec < n->len(); prec++) {
            Node* oop_store = n->in(prec);
            if (oop_store != NULL) {
              assert(get_block_for_node(oop_store)->_dom_depth <= block->_dom_depth, "oop_store must dominate card-mark");
            }
          }
        }
      }
#endif

      // A few node types require changing a required edge to a precedence edge
      // before allocation.
      if( n->is_Mach() && n->req() > TypeFunc::Parms &&
          (n->as_Mach()->ideal_Opcode() == Op_MemBarAcquire ||
           n->as_Mach()->ideal_Opcode() == Op_MemBarVolatile) ) {
        // MemBarAcquire could be created without Precedent edge.
        // del_req() replaces the specified edge with the last input edge
        // and then removes the last edge. If the specified edge > number of
        // edges the last edge will be moved outside of the input edges array
        // and the edge will be lost. This is why this code should be
        // executed only when Precedent (== TypeFunc::Parms) edge is present.
        Node *x = n->in(TypeFunc::Parms);
        if (x != NULL && get_block_for_node(x) == block && n->find_prec_edge(x) != -1) {
          // Old edge to node within same block will get removed, but no precedence
          // edge will get added because it already exists. Update ready count.
          int cnt = ready_cnt.at(n->_idx);
          assert(cnt > 1, err_msg("MemBar node %d must not get ready here", n->_idx));
          ready_cnt.at_put(n->_idx, cnt-1);
        }
        n->del_req(TypeFunc::Parms);
        n->add_prec(x);
      }
    }
  }
  for(uint i2=i; i2< block->number_of_nodes(); i2++ ) // Trailing guys get zapped count
    ready_cnt.at_put(block->get_node(i2)->_idx, 0);

  // All the prescheduled guys do not hold back internal nodes
  uint i3;
  for(i3 = 0; i3<phi_cnt; i3++ ) {  // For all pre-scheduled
    Node *n = block->get_node(i3);       // Get pre-scheduled
    for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
      Node* m = n->fast_out(j);
      if (get_block_for_node(m) == block) { // Local-block user
        int m_cnt = ready_cnt.at(m->_idx)-1;
        ready_cnt.at_put(m->_idx, m_cnt);   // Fix ready count
      }
    }
  }

  Node_List delay;
  // Make a worklist
  Node_List worklist;
  for(uint i4=i3; i4<node_cnt; i4++ ) {    // Put ready guys on worklist
    Node *m = block->get_node(i4);
    if( !ready_cnt.at(m->_idx) ) {   // Zero ready count?
      if (m->is_iteratively_computed()) {
        // Push induction variable increments last to allow other uses
        // of the phi to be scheduled first. The select() method breaks
        // ties in scheduling by worklist order.
        delay.push(m);
      } else if (m->is_Mach() && m->as_Mach()->ideal_Opcode() == Op_CreateEx) {
        // Force the CreateEx to the top of the list so it's processed
        // first and ends up at the start of the block.
        worklist.insert(0, m);
      } else {
        worklist.push(m);         // Then on to worklist!
      }
    }
  }
  while (delay.size()) {
    Node* d = delay.pop();
    worklist.push(d);
  }

  // Warm up the 'next_call' heuristic bits
  needed_for_next_call(block, block->head(), next_call);

#ifndef PRODUCT
    if (trace_opto_pipelining()) {
      for (uint j=0; j< block->number_of_nodes(); j++) {
        Node     *n = block->get_node(j);
        int     idx = n->_idx;
        tty->print("#   ready cnt:%3d  ", ready_cnt.at(idx));
        tty->print("latency:%3d  ", get_latency_for_node(n));
        tty->print("%4d: %s\n", idx, n->Name());
      }
    }
#endif

  uint max_idx = (uint)ready_cnt.length();
  // Pull from worklist and schedule
  while( worklist.size() ) {    // Worklist is not ready

#ifndef PRODUCT
    if (trace_opto_pipelining()) {
      tty->print("#   ready list:");
      for( uint i=0; i<worklist.size(); i++ ) { // Inspect entire worklist
        Node *n = worklist[i];      // Get Node on worklist
        tty->print(" %d", n->_idx);
      }
      tty->cr();
    }
#endif

    // Select and pop a ready guy from worklist
    Node* n = select(block, worklist, ready_cnt, next_call, phi_cnt);
    block->map_node(n, phi_cnt++);    // Schedule him next

#ifndef PRODUCT
    if (trace_opto_pipelining()) {
      tty->print("#    select %d: %s", n->_idx, n->Name());
      tty->print(", latency:%d", get_latency_for_node(n));
      n->dump();
      if (Verbose) {
        tty->print("#   ready list:");
        for( uint i=0; i<worklist.size(); i++ ) { // Inspect entire worklist
          Node *n = worklist[i];      // Get Node on worklist
          tty->print(" %d", n->_idx);
        }
        tty->cr();
      }
    }

#endif
    if( n->is_MachCall() ) {
      MachCallNode *mcall = n->as_MachCall();
      phi_cnt = sched_call(block, phi_cnt, worklist, ready_cnt, mcall, next_call);
      continue;
    }

    if (n->is_Mach() && n->as_Mach()->has_call()) {
      RegMask regs;
      regs.Insert(_matcher.c_frame_pointer());
      regs.OR(n->out_RegMask());

      MachProjNode *proj = new (C) MachProjNode( n, 1, RegMask::Empty, MachProjNode::fat_proj );
      map_node_to_block(proj, block);
      block->insert_node(proj, phi_cnt++);

      add_call_kills(proj, regs, _matcher._c_reg_save_policy, false);
    }

    // Children are now all ready
    for (DUIterator_Fast i5max, i5 = n->fast_outs(i5max); i5 < i5max; i5++) {
      Node* m = n->fast_out(i5); // Get user
      if (get_block_for_node(m) != block) {
        continue;
      }
      if( m->is_Phi() ) continue;
      if (m->_idx >= max_idx) { // new node, skip it
        assert(m->is_MachProj() && n->is_Mach() && n->as_Mach()->has_call(), "unexpected node types");
        continue;
      }
      int m_cnt = ready_cnt.at(m->_idx)-1;
      ready_cnt.at_put(m->_idx, m_cnt);
      if( m_cnt == 0 )
        worklist.push(m);
    }
  }

  if( phi_cnt != block->end_idx() ) {
    // did not schedule all.  Retry, Bailout, or Die
    if (C->subsume_loads() == true && !C->failing()) {
      // Retry with subsume_loads == false
      // If this is the first failure, the sentinel string will "stick"
      // to the Compile object, and the C2Compiler will see it and retry.
      C->record_failure(C2Compiler::retry_no_subsuming_loads());
    } else {
      assert(false, "graph should be schedulable");
    }
    // assert( phi_cnt == end_idx(), "did not schedule all" );
    return false;
  }

#ifndef PRODUCT
  if (trace_opto_pipelining()) {
    tty->print_cr("#");
    tty->print_cr("# after schedule_local");
    for (uint i = 0;i < block->number_of_nodes();i++) {
      tty->print("# ");
      block->get_node(i)->fast_dump();
    }
    tty->cr();
  }
#endif


  return true;
}

//--------------------------catch_cleanup_fix_all_inputs-----------------------
static void catch_cleanup_fix_all_inputs(Node *use, Node *old_def, Node *new_def) {
  for (uint l = 0; l < use->len(); l++) {
    if (use->in(l) == old_def) {
      if (l < use->req()) {
        use->set_req(l, new_def);
      } else {
        use->rm_prec(l);
        use->add_prec(new_def);
        l--;
      }
    }
  }
}

//------------------------------catch_cleanup_find_cloned_def------------------
Node* PhaseCFG::catch_cleanup_find_cloned_def(Block *use_blk, Node *def, Block *def_blk, int n_clone_idx) {
  assert( use_blk != def_blk, "Inter-block cleanup only");

  // The use is some block below the Catch.  Find and return the clone of the def
  // that dominates the use. If there is no clone in a dominating block, then
  // create a phi for the def in a dominating block.

  // Find which successor block dominates this use.  The successor
  // blocks must all be single-entry (from the Catch only; I will have
  // split blocks to make this so), hence they all dominate.
  while( use_blk->_dom_depth > def_blk->_dom_depth+1 )
    use_blk = use_blk->_idom;

  // Find the successor
  Node *fixup = NULL;

  uint j;
  for( j = 0; j < def_blk->_num_succs; j++ )
    if( use_blk == def_blk->_succs[j] )
      break;

  if( j == def_blk->_num_succs ) {
    // Block at same level in dom-tree is not a successor.  It needs a
    // PhiNode, the PhiNode uses from the def and IT's uses need fixup.
    Node_Array inputs = new Node_List(Thread::current()->resource_area());
    for(uint k = 1; k < use_blk->num_preds(); k++) {
      Block* block = get_block_for_node(use_blk->pred(k));
      inputs.map(k, catch_cleanup_find_cloned_def(block, def, def_blk, n_clone_idx));
    }

    // Check to see if the use_blk already has an identical phi inserted.
    // If it exists, it will be at the first position since all uses of a
    // def are processed together.
    Node *phi = use_blk->get_node(1);
    if( phi->is_Phi() ) {
      fixup = phi;
      for (uint k = 1; k < use_blk->num_preds(); k++) {
        if (phi->in(k) != inputs[k]) {
          // Not a match
          fixup = NULL;
          break;
        }
      }
    }

    // If an existing PhiNode was not found, make a new one.
    if (fixup == NULL) {
      Node *new_phi = PhiNode::make(use_blk->head(), def);
      use_blk->insert_node(new_phi, 1);
      map_node_to_block(new_phi, use_blk);
      for (uint k = 1; k < use_blk->num_preds(); k++) {
        new_phi->set_req(k, inputs[k]);
      }
      fixup = new_phi;
    }

  } else {
    // Found the use just below the Catch.  Make it use the clone.
    fixup = use_blk->get_node(n_clone_idx);
  }

  return fixup;
}

//--------------------------catch_cleanup_intra_block--------------------------
// Fix all input edges in use that reference "def".  The use is in the same
// block as the def and both have been cloned in each successor block.
static void catch_cleanup_intra_block(Node *use, Node *def, Block *blk, int beg, int n_clone_idx) {

  // Both the use and def have been cloned. For each successor block,
  // get the clone of the use, and make its input the clone of the def
  // found in that block.

  uint use_idx = blk->find_node(use);
  uint offset_idx = use_idx - beg;
  for( uint k = 0; k < blk->_num_succs; k++ ) {
    // Get clone in each successor block
    Block *sb = blk->_succs[k];
    Node *clone = sb->get_node(offset_idx+1);
    assert( clone->Opcode() == use->Opcode(), "" );

    // Make use-clone reference the def-clone
    catch_cleanup_fix_all_inputs(clone, def, sb->get_node(n_clone_idx));
  }
}

//------------------------------catch_cleanup_inter_block---------------------
// Fix all input edges in use that reference "def".  The use is in a different
// block than the def.
void PhaseCFG::catch_cleanup_inter_block(Node *use, Block *use_blk, Node *def, Block *def_blk, int n_clone_idx) {
  if( !use_blk ) return;        // Can happen if the use is a precedence edge

  Node *new_def = catch_cleanup_find_cloned_def(use_blk, def, def_blk, n_clone_idx);
  catch_cleanup_fix_all_inputs(use, def, new_def);
}

//------------------------------call_catch_cleanup-----------------------------
// If we inserted any instructions between a Call and his CatchNode,
// clone the instructions on all paths below the Catch.
void PhaseCFG::call_catch_cleanup(Block* block) {

  // End of region to clone
  uint end = block->end_idx();
  if( !block->get_node(end)->is_Catch() ) return;
  // Start of region to clone
  uint beg = end;
  while(!block->get_node(beg-1)->is_MachProj() ||
        !block->get_node(beg-1)->in(0)->is_MachCall() ) {
    beg--;
    assert(beg > 0,"Catch cleanup walking beyond block boundary");
  }
  // Range of inserted instructions is [beg, end)
  if( beg == end ) return;

  // Clone along all Catch output paths.  Clone area between the 'beg' and
  // 'end' indices.
  for( uint i = 0; i < block->_num_succs; i++ ) {
    Block *sb = block->_succs[i];
    // Clone the entire area; ignoring the edge fixup for now.
    for( uint j = end; j > beg; j-- ) {
      Node *clone = block->get_node(j-1)->clone();
      sb->insert_node(clone, 1);
      map_node_to_block(clone, sb);
      if (clone->needs_anti_dependence_check()) {
        insert_anti_dependences(sb, clone);
      }
    }
  }


  // Fixup edges.  Check the def-use info per cloned Node
  for(uint i2 = beg; i2 < end; i2++ ) {
    uint n_clone_idx = i2-beg+1; // Index of clone of n in each successor block
    Node *n = block->get_node(i2);        // Node that got cloned
    // Need DU safe iterator because of edge manipulation in calls.
    Unique_Node_List *out = new Unique_Node_List(Thread::current()->resource_area());
    for (DUIterator_Fast j1max, j1 = n->fast_outs(j1max); j1 < j1max; j1++) {
      out->push(n->fast_out(j1));
    }
    uint max = out->size();
    for (uint j = 0; j < max; j++) {// For all users
      Node *use = out->pop();
      Block *buse = get_block_for_node(use);
      if( use->is_Phi() ) {
        for( uint k = 1; k < use->req(); k++ )
          if( use->in(k) == n ) {
            Block* b = get_block_for_node(buse->pred(k));
            Node *fixup = catch_cleanup_find_cloned_def(b, n, block, n_clone_idx);
            use->set_req(k, fixup);
          }
      } else {
        if (block == buse) {
          catch_cleanup_intra_block(use, n, block, beg, n_clone_idx);
        } else {
          catch_cleanup_inter_block(use, buse, n, block, n_clone_idx);
        }
      }
    } // End for all users

  } // End of for all Nodes in cloned area

  // Remove the now-dead cloned ops
  for(uint i3 = beg; i3 < end; i3++ ) {
    block->get_node(beg)->disconnect_inputs(NULL, C);
    block->remove_node(beg);
  }

  // If the successor blocks have a CreateEx node, move it back to the top
  for(uint i4 = 0; i4 < block->_num_succs; i4++ ) {
    Block *sb = block->_succs[i4];
    uint new_cnt = end - beg;
    // Remove any newly created, but dead, nodes.
    for( uint j = new_cnt; j > 0; j-- ) {
      Node *n = sb->get_node(j);
      if (n->outcnt() == 0 &&
          (!n->is_Proj() || n->as_Proj()->in(0)->outcnt() == 1) ){
        n->disconnect_inputs(NULL, C);
        sb->remove_node(j);
        new_cnt--;
      }
    }
    // If any newly created nodes remain, move the CreateEx node to the top
    if (new_cnt > 0) {
      Node *cex = sb->get_node(1+new_cnt);
      if( cex->is_Mach() && cex->as_Mach()->ideal_Opcode() == Op_CreateEx ) {
        sb->remove_node(1+new_cnt);
        sb->insert_node(cex, 1);
      }
    }
  }
}
C:\hotspot-69087d08d473\src\share\vm/opto/library_call.cpp
/*
 * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 *
 */

#include "precompiled.hpp"
#include "classfile/systemDictionary.hpp"
#include "classfile/vmSymbols.hpp"
#include "compiler/compileBroker.hpp"
#include "compiler/compileLog.hpp"
#include "jfr/support/jfrIntrinsics.hpp"
#include "oops/objArrayKlass.hpp"
#include "opto/addnode.hpp"
#include "opto/callGenerator.hpp"
#include "opto/cfgnode.hpp"
#include "opto/connode.hpp"
#include "opto/idealKit.hpp"
#include "opto/mathexactnode.hpp"
#include "opto/mulnode.hpp"
#include "opto/parse.hpp"
#include "opto/runtime.hpp"
#include "opto/subnode.hpp"
#include "prims/nativeLookup.hpp"
#include "runtime/sharedRuntime.hpp"
#include "utilities/macros.hpp"

class LibraryIntrinsic : public InlineCallGenerator {
  // Extend the set of intrinsics known to the runtime:
 public:
 private:
  bool             _is_virtual;
  bool             _does_virtual_dispatch;
  int8_t           _predicates_count;  // Intrinsic is predicated by several conditions
  int8_t           _last_predicate; // Last generated predicate
  vmIntrinsics::ID _intrinsic_id;

 public:
  LibraryIntrinsic(ciMethod* m, bool is_virtual, int predicates_count, bool does_virtual_dispatch, vmIntrinsics::ID id)
    : InlineCallGenerator(m),
      _is_virtual(is_virtual),
      _does_virtual_dispatch(does_virtual_dispatch),
      _predicates_count((int8_t)predicates_count),
      _last_predicate((int8_t)-1),
      _intrinsic_id(id)
  {
  }
  virtual bool is_intrinsic() const { return true; }
  virtual bool is_virtual()   const { return _is_virtual; }
  virtual bool is_predicated() const { return _predicates_count > 0; }
  virtual int  predicates_count() const { return _predicates_count; }
  virtual bool does_virtual_dispatch()   const { return _does_virtual_dispatch; }
  virtual JVMState* generate(JVMState* jvms);
  virtual Node* generate_predicate(JVMState* jvms, int predicate);
  vmIntrinsics::ID intrinsic_id() const { return _intrinsic_id; }
};


// Local helper class for LibraryIntrinsic:
class LibraryCallKit : public GraphKit {
 private:
  LibraryIntrinsic* _intrinsic;     // the library intrinsic being called
  Node*             _result;        // the result node, if any
  int               _reexecute_sp;  // the stack pointer when bytecode needs to be reexecuted

  const TypeOopPtr* sharpen_unsafe_type(Compile::AliasType* alias_type, const TypePtr *adr_type, bool is_native_ptr = false);

 public:
  LibraryCallKit(JVMState* jvms, LibraryIntrinsic* intrinsic)
    : GraphKit(jvms),
      _intrinsic(intrinsic),
      _result(NULL)
  {
    // Check if this is a root compile.  In that case we don't have a caller.
    if (!jvms->has_method()) {
      _reexecute_sp = sp();
    } else {
      // Find out how many arguments the interpreter needs when deoptimizing
      // and save the stack pointer value so it can used by uncommon_trap.
      // We find the argument count by looking at the declared signature.
      bool ignored_will_link;
      ciSignature* declared_signature = NULL;
      ciMethod* ignored_callee = caller()->get_method_at_bci(bci(), ignored_will_link, &declared_signature);
      const int nargs = declared_signature->arg_size_for_bc(caller()->java_code_at_bci(bci()));
      _reexecute_sp = sp() + nargs;  // "push" arguments back on stack
    }
  }

  virtual LibraryCallKit* is_LibraryCallKit() const { return (LibraryCallKit*)this; }

  ciMethod*         caller()    const    { return jvms()->method(); }
  int               bci()       const    { return jvms()->bci(); }
  LibraryIntrinsic* intrinsic() const    { return _intrinsic; }
  vmIntrinsics::ID  intrinsic_id() const { return _intrinsic->intrinsic_id(); }
  ciMethod*         callee()    const    { return _intrinsic->method(); }

  bool  try_to_inline(int predicate);
  Node* try_to_predicate(int predicate);

  void push_result() {
    // Push the result onto the stack.
    if (!stopped() && result() != NULL) {
      BasicType bt = result()->bottom_type()->basic_type();
      push_node(bt, result());
    }
  }

 private:
  void fatal_unexpected_iid(vmIntrinsics::ID iid) {
    fatal(err_msg_res("unexpected intrinsic %d: %s", iid, vmIntrinsics::name_at(iid)));
  }

  void  set_result(Node* n) { assert(_result == NULL, "only set once"); _result = n; }
  void  set_result(RegionNode* region, PhiNode* value);
  Node*     result() { return _result; }

  virtual int reexecute_sp() { return _reexecute_sp; }

  // Helper functions to inline natives
  Node* generate_guard(Node* test, RegionNode* region, float true_prob);
  Node* generate_slow_guard(Node* test, RegionNode* region);
  Node* generate_fair_guard(Node* test, RegionNode* region);
  Node* generate_negative_guard(Node* index, RegionNode* region,
                                // resulting CastII of index:
                                Node* *pos_index = NULL);
  Node* generate_nonpositive_guard(Node* index, bool never_negative,
                                   // resulting CastII of index:
                                   Node* *pos_index = NULL);
  Node* generate_limit_guard(Node* offset, Node* subseq_length,
                             Node* array_length,
                             RegionNode* region);
  Node* generate_current_thread(Node* &tls_output);
  address basictype2arraycopy(BasicType t, Node *src_offset, Node *dest_offset,
                              bool disjoint_bases, const char* &name, bool dest_uninitialized);
  Node* load_mirror_from_klass(Node* klass);
  Node* load_klass_from_mirror_common(Node* mirror, bool never_see_null,
                                      RegionNode* region, int null_path,
                                      int offset);
  Node* load_klass_from_mirror(Node* mirror, bool never_see_null,
                               RegionNode* region, int null_path) {
    int offset = java_lang_Class::klass_offset_in_bytes();
    return load_klass_from_mirror_common(mirror, never_see_null,
                                         region, null_path,
                                         offset);
  }
  Node* load_array_klass_from_mirror(Node* mirror, bool never_see_null,
                                     RegionNode* region, int null_path) {
    int offset = java_lang_Class::array_klass_offset_in_bytes();
    return load_klass_from_mirror_common(mirror, never_see_null,
                                         region, null_path,
                                         offset);
  }
  Node* generate_access_flags_guard(Node* kls,
                                    int modifier_mask, int modifier_bits,
                                    RegionNode* region);
  Node* generate_interface_guard(Node* kls, RegionNode* region);
  Node* generate_array_guard(Node* kls, RegionNode* region) {
    return generate_array_guard_common(kls, region, false, false);
  }
  Node* generate_non_array_guard(Node* kls, RegionNode* region) {
    return generate_array_guard_common(kls, region, false, true);
  }
  Node* generate_objArray_guard(Node* kls, RegionNode* region) {
    return generate_array_guard_common(kls, region, true, false);
  }
  Node* generate_non_objArray_guard(Node* kls, RegionNode* region) {
    return generate_array_guard_common(kls, region, true, true);
  }
  Node* generate_array_guard_common(Node* kls, RegionNode* region,
                                    bool obj_array, bool not_array);
  Node* generate_virtual_guard(Node* obj_klass, RegionNode* slow_region);
  CallJavaNode* generate_method_call(vmIntrinsics::ID method_id,
                                     bool is_virtual = false, bool is_static = false);
  CallJavaNode* generate_method_call_static(vmIntrinsics::ID method_id) {
    return generate_method_call(method_id, false, true);
  }
  CallJavaNode* generate_method_call_virtual(vmIntrinsics::ID method_id) {
    return generate_method_call(method_id, true, false);
  }
  Node * load_field_from_object(Node * fromObj, const char * fieldName, const char * fieldTypeString, bool is_exact, bool is_static);

  Node* make_string_method_node(int opcode, Node* str1_start, Node* cnt1, Node* str2_start, Node* cnt2);
  Node* make_string_method_node(int opcode, Node* str1, Node* str2);
  bool inline_string_compareTo();
  bool inline_string_indexOf();
  Node* string_indexOf(Node* string_object, ciTypeArray* target_array, jint offset, jint cache_i, jint md2_i);
  bool inline_string_equals();
  Node* round_double_node(Node* n);
  bool runtime_math(const TypeFunc* call_type, address funcAddr, const char* funcName);
  bool inline_math_native(vmIntrinsics::ID id);
  bool inline_trig(vmIntrinsics::ID id);
  bool inline_math(vmIntrinsics::ID id);
  template <typename OverflowOp>
  bool inline_math_overflow(Node* arg1, Node* arg2);
  void inline_math_mathExact(Node* math, Node* test);
  bool inline_math_addExactI(bool is_increment);
  bool inline_math_addExactL(bool is_increment);
  bool inline_math_multiplyExactI();
  bool inline_math_multiplyExactL();
  bool inline_math_negateExactI();
  bool inline_math_negateExactL();
  bool inline_math_subtractExactI(bool is_decrement);
  bool inline_math_subtractExactL(bool is_decrement);
  bool inline_exp();
  bool inline_pow();
  Node* finish_pow_exp(Node* result, Node* x, Node* y, const TypeFunc* call_type, address funcAddr, const char* funcName);
  bool inline_min_max(vmIntrinsics::ID id);
  Node* generate_min_max(vmIntrinsics::ID id, Node* x, Node* y);
  // This returns Type::AnyPtr, RawPtr, or OopPtr.
  int classify_unsafe_addr(Node* &base, Node* &offset);
  Node* make_unsafe_address(Node* base, Node* offset);
  // Helper for inline_unsafe_access.
  // Generates the guards that check whether the result of
  // Unsafe.getObject should be recorded in an SATB log buffer.
  void insert_pre_barrier(Node* base_oop, Node* offset, Node* pre_val, bool need_mem_bar);
  bool inline_unsafe_access(bool is_native_ptr, bool is_store, BasicType type, bool is_volatile, bool is_unaligned);
  bool inline_unsafe_prefetch(bool is_native_ptr, bool is_store, bool is_static);
  static bool klass_needs_init_guard(Node* kls);
  bool inline_unsafe_allocate();
  bool inline_unsafe_copyMemory();
  bool inline_native_currentThread();
#ifdef JFR_HAVE_INTRINSICS
  bool inline_native_classID();
  bool inline_native_getEventWriter();
#endif
  bool inline_native_time_funcs(address method, const char* funcName);
  bool inline_native_isInterrupted();
  bool inline_native_Class_query(vmIntrinsics::ID id);
  bool inline_native_subtype_check();

  bool inline_native_newArray();
  bool inline_native_getLength();
  bool inline_array_copyOf(bool is_copyOfRange);
  bool inline_array_equals();
  void copy_to_clone(Node* obj, Node* alloc_obj, Node* obj_size, bool is_array, bool card_mark);
  bool inline_native_clone(bool is_virtual);
  bool inline_native_Reflection_getCallerClass();
  // Helper function for inlining native object hash method
  bool inline_native_hashcode(bool is_virtual, bool is_static);
  bool inline_native_getClass();

  // Helper functions for inlining arraycopy
  bool inline_arraycopy();
  void generate_arraycopy(const TypePtr* adr_type,
                          BasicType basic_elem_type,
                          Node* src,  Node* src_offset,
                          Node* dest, Node* dest_offset,
                          Node* copy_length,
                          bool disjoint_bases = false,
                          bool length_never_negative = false,
                          RegionNode* slow_region = NULL);
  AllocateArrayNode* tightly_coupled_allocation(Node* ptr,
                                                RegionNode* slow_region);
  void generate_clear_array(const TypePtr* adr_type,
                            Node* dest,
                            BasicType basic_elem_type,
                            Node* slice_off,
                            Node* slice_len,
                            Node* slice_end);
  bool generate_block_arraycopy(const TypePtr* adr_type,
                                BasicType basic_elem_type,
                                AllocateNode* alloc,
                                Node* src,  Node* src_offset,
                                Node* dest, Node* dest_offset,
                                Node* dest_size, bool dest_uninitialized);
  void generate_slow_arraycopy(const TypePtr* adr_type,
                               Node* src,  Node* src_offset,
                               Node* dest, Node* dest_offset,
                               Node* copy_length, bool dest_uninitialized);
  Node* generate_checkcast_arraycopy(const TypePtr* adr_type,
                                     Node* dest_elem_klass,
                                     Node* src,  Node* src_offset,
                                     Node* dest, Node* dest_offset,
                                     Node* copy_length, bool dest_uninitialized);
  Node* generate_generic_arraycopy(const TypePtr* adr_type,
                                   Node* src,  Node* src_offset,
                                   Node* dest, Node* dest_offset,
                                   Node* copy_length, bool dest_uninitialized);
  void generate_unchecked_arraycopy(const TypePtr* adr_type,
                                    BasicType basic_elem_type,
                                    bool disjoint_bases,
                                    Node* src,  Node* src_offset,
                                    Node* dest, Node* dest_offset,
                                    Node* copy_length, bool dest_uninitialized);
  typedef enum { LS_xadd, LS_xchg, LS_cmpxchg } LoadStoreKind;
  bool inline_unsafe_load_store(BasicType type,  LoadStoreKind kind);
  bool inline_unsafe_ordered_store(BasicType type);
  bool inline_unsafe_fence(vmIntrinsics::ID id);
  bool inline_fp_conversions(vmIntrinsics::ID id);
  bool inline_number_methods(vmIntrinsics::ID id);
  bool inline_reference_get();
  bool inline_aescrypt_Block(vmIntrinsics::ID id);
  bool inline_cipherBlockChaining_AESCrypt(vmIntrinsics::ID id);
  Node* inline_cipherBlockChaining_AESCrypt_predicate(bool decrypting);
  Node* get_key_start_from_aescrypt_object(Node* aescrypt_object);
  Node* get_original_key_start_from_aescrypt_object(Node* aescrypt_object);
  bool inline_ghash_processBlocks();
  bool inline_sha_implCompress(vmIntrinsics::ID id);
  bool inline_digestBase_implCompressMB(int predicate);
  bool inline_sha_implCompressMB(Node* digestBaseObj, ciInstanceKlass* instklass_SHA,
                                 bool long_state, address stubAddr, const char *stubName,
                                 Node* src_start, Node* ofs, Node* limit);
  Node* get_state_from_sha_object(Node *sha_object);
  Node* get_state_from_sha5_object(Node *sha_object);
  Node* inline_digestBase_implCompressMB_predicate(int predicate);
  bool inline_encodeISOArray();
  bool inline_updateCRC32();
  bool inline_updateBytesCRC32();
  bool inline_updateByteBufferCRC32();
  bool inline_multiplyToLen();
  bool inline_squareToLen();
  bool inline_mulAdd();
  bool inline_montgomeryMultiply();
  bool inline_montgomerySquare();

  bool inline_profileBoolean();
};


//---------------------------make_vm_intrinsic----------------------------
CallGenerator* Compile::make_vm_intrinsic(ciMethod* m, bool is_virtual) {
  vmIntrinsics::ID id = m->intrinsic_id();
  assert(id != vmIntrinsics::_none, "must be a VM intrinsic");

  ccstr disable_intr = NULL;

  if ((DisableIntrinsic[0] != '\0'
       && strstr(DisableIntrinsic, vmIntrinsics::name_at(id)) != NULL) ||
      (method_has_option_value("DisableIntrinsic", disable_intr)
       && strstr(disable_intr, vmIntrinsics::name_at(id)) != NULL)) {
    // disabled by a user request on the command line:
    // example: -XX:DisableIntrinsic=_hashCode,_getClass
    return NULL;
  }

  if (!m->is_loaded()) {
    // do not attempt to inline unloaded methods
    return NULL;
  }

  // Only a few intrinsics implement a virtual dispatch.
  // They are expensive calls which are also frequently overridden.
  if (is_virtual) {
    switch (id) {
    case vmIntrinsics::_hashCode:
    case vmIntrinsics::_clone:
      // OK, Object.hashCode and Object.clone intrinsics come in both flavors
      break;
    default:
      return NULL;
    }
  }

  // -XX:-InlineNatives disables nearly all intrinsics:
  if (!InlineNatives) {
    switch (id) {
    case vmIntrinsics::_indexOf:
    case vmIntrinsics::_compareTo:
    case vmIntrinsics::_equals:
    case vmIntrinsics::_equalsC:
    case vmIntrinsics::_getAndAddInt:
    case vmIntrinsics::_getAndAddLong:
    case vmIntrinsics::_getAndSetInt:
    case vmIntrinsics::_getAndSetLong:
    case vmIntrinsics::_getAndSetObject:
    case vmIntrinsics::_loadFence:
    case vmIntrinsics::_storeFence:
    case vmIntrinsics::_fullFence:
      break;  // InlineNatives does not control String.compareTo
    case vmIntrinsics::_Reference_get:
      break;  // InlineNatives does not control Reference.get
    default:
      return NULL;
    }
  }

  int predicates = 0;
  bool does_virtual_dispatch = false;

  switch (id) {
  case vmIntrinsics::_compareTo:
    if (!SpecialStringCompareTo)  return NULL;
    if (!Matcher::match_rule_supported(Op_StrComp))  return NULL;
    break;
  case vmIntrinsics::_indexOf:
    if (!SpecialStringIndexOf)  return NULL;
    break;
  case vmIntrinsics::_equals:
    if (!SpecialStringEquals)  return NULL;
    if (!Matcher::match_rule_supported(Op_StrEquals))  return NULL;
    break;
  case vmIntrinsics::_equalsC:
    if (!SpecialArraysEquals)  return NULL;
    if (!Matcher::match_rule_supported(Op_AryEq))  return NULL;
    break;
  case vmIntrinsics::_arraycopy:
    if (!InlineArrayCopy)  return NULL;
    break;
  case vmIntrinsics::_copyMemory:
    if (StubRoutines::unsafe_arraycopy() == NULL)  return NULL;
    if (!InlineArrayCopy)  return NULL;
    break;
  case vmIntrinsics::_hashCode:
    if (!InlineObjectHash)  return NULL;
    does_virtual_dispatch = true;
    break;
  case vmIntrinsics::_clone:
    does_virtual_dispatch = true;
  case vmIntrinsics::_copyOf:
  case vmIntrinsics::_copyOfRange:
    if (!InlineObjectCopy)  return NULL;
    // These also use the arraycopy intrinsic mechanism:
    if (!InlineArrayCopy)  return NULL;
    break;
  case vmIntrinsics::_encodeISOArray:
    if (!SpecialEncodeISOArray)  return NULL;
    if (!Matcher::match_rule_supported(Op_EncodeISOArray))  return NULL;
    break;
  case vmIntrinsics::_checkIndex:
    // We do not intrinsify this.  The optimizer does fine with it.
    return NULL;

  case vmIntrinsics::_getCallerClass:
    if (!UseNewReflection)  return NULL;
    if (!InlineReflectionGetCallerClass)  return NULL;
    if (SystemDictionary::reflect_CallerSensitive_klass() == NULL)  return NULL;
    break;

  case vmIntrinsics::_bitCount_i:
    if (!Matcher::match_rule_supported(Op_PopCountI)) return NULL;
    break;

  case vmIntrinsics::_bitCount_l:
    if (!Matcher::match_rule_supported(Op_PopCountL)) return NULL;
    break;

  case vmIntrinsics::_numberOfLeadingZeros_i:
    if (!Matcher::match_rule_supported(Op_CountLeadingZerosI)) return NULL;
    break;

  case vmIntrinsics::_numberOfLeadingZeros_l:
    if (!Matcher::match_rule_supported(Op_CountLeadingZerosL)) return NULL;
    break;

  case vmIntrinsics::_numberOfTrailingZeros_i:
    if (!Matcher::match_rule_supported(Op_CountTrailingZerosI)) return NULL;
    break;

  case vmIntrinsics::_numberOfTrailingZeros_l:
    if (!Matcher::match_rule_supported(Op_CountTrailingZerosL)) return NULL;
    break;

  case vmIntrinsics::_reverseBytes_c:
    if (!Matcher::match_rule_supported(Op_ReverseBytesUS)) return NULL;
    break;
  case vmIntrinsics::_reverseBytes_s:
    if (!Matcher::match_rule_supported(Op_ReverseBytesS))  return NULL;
    break;
  case vmIntrinsics::_reverseBytes_i:
    if (!Matcher::match_rule_supported(Op_ReverseBytesI))  return NULL;
    break;
  case vmIntrinsics::_reverseBytes_l:
    if (!Matcher::match_rule_supported(Op_ReverseBytesL))  return NULL;
    break;

  case vmIntrinsics::_Reference_get:
    // Use the intrinsic version of Reference.get() so that the value in
    // the referent field can be registered by the G1 pre-barrier code.
    // Also add memory barrier to prevent commoning reads from this field
    // across safepoint since GC can change it value.
    break;

  case vmIntrinsics::_compareAndSwapObject:
#ifdef _LP64
    if (!UseCompressedOops && !Matcher::match_rule_supported(Op_CompareAndSwapP)) return NULL;
#endif
    break;

  case vmIntrinsics::_compareAndSwapLong:
    if (!Matcher::match_rule_supported(Op_CompareAndSwapL)) return NULL;
    break;

  case vmIntrinsics::_getAndAddInt:
    if (!Matcher::match_rule_supported(Op_GetAndAddI)) return NULL;
    break;

  case vmIntrinsics::_getAndAddLong:
    if (!Matcher::match_rule_supported(Op_GetAndAddL)) return NULL;
    break;

  case vmIntrinsics::_getAndSetInt:
    if (!Matcher::match_rule_supported(Op_GetAndSetI)) return NULL;
    break;

  case vmIntrinsics::_getAndSetLong:
    if (!Matcher::match_rule_supported(Op_GetAndSetL)) return NULL;
    break;

  case vmIntrinsics::_getAndSetObject:
#ifdef _LP64
    if (!UseCompressedOops && !Matcher::match_rule_supported(Op_GetAndSetP)) return NULL;
    if (UseCompressedOops && !Matcher::match_rule_supported(Op_GetAndSetN)) return NULL;
    break;
#else
    if (!Matcher::match_rule_supported(Op_GetAndSetP)) return NULL;
    break;
#endif

  case vmIntrinsics::_aescrypt_encryptBlock:
  case vmIntrinsics::_aescrypt_decryptBlock:
    if (!UseAESIntrinsics) return NULL;
    break;

  case vmIntrinsics::_multiplyToLen:
    if (!UseMultiplyToLenIntrinsic) return NULL;
    break;

  case vmIntrinsics::_squareToLen:
    if (!UseSquareToLenIntrinsic) return NULL;
    break;

  case vmIntrinsics::_mulAdd:
    if (!UseMulAddIntrinsic) return NULL;
    break;

  case vmIntrinsics::_montgomeryMultiply:
     if (!UseMontgomeryMultiplyIntrinsic) return NULL;
    break;
  case vmIntrinsics::_montgomerySquare:
     if (!UseMontgomerySquareIntrinsic) return NULL;
    break;

  case vmIntrinsics::_cipherBlockChaining_encryptAESCrypt:
  case vmIntrinsics::_cipherBlockChaining_decryptAESCrypt:
    if (!UseAESIntrinsics) return NULL;
    // these two require the predicated logic
    predicates = 1;
    break;

  case vmIntrinsics::_sha_implCompress:
    if (!UseSHA1Intrinsics) return NULL;
    break;

  case vmIntrinsics::_sha2_implCompress:
    if (!UseSHA256Intrinsics) return NULL;
    break;

  case vmIntrinsics::_sha5_implCompress:
    if (!UseSHA512Intrinsics) return NULL;
    break;

  case vmIntrinsics::_digestBase_implCompressMB:
    if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) return NULL;
    predicates = 3;
    break;

  case vmIntrinsics::_ghash_processBlocks:
    if (!UseGHASHIntrinsics) return NULL;
    break;

  case vmIntrinsics::_updateCRC32:
  case vmIntrinsics::_updateBytesCRC32:
  case vmIntrinsics::_updateByteBufferCRC32:
    if (!UseCRC32Intrinsics) return NULL;
    break;

  case vmIntrinsics::_incrementExactI:
  case vmIntrinsics::_addExactI:
    if (!Matcher::match_rule_supported(Op_OverflowAddI) || !UseMathExactIntrinsics) return NULL;
    break;
  case vmIntrinsics::_incrementExactL:
  case vmIntrinsics::_addExactL:
    if (!Matcher::match_rule_supported(Op_OverflowAddL) || !UseMathExactIntrinsics) return NULL;
    break;
  case vmIntrinsics::_decrementExactI:
  case vmIntrinsics::_subtractExactI:
    if (!Matcher::match_rule_supported(Op_OverflowSubI) || !UseMathExactIntrinsics) return NULL;
    break;
  case vmIntrinsics::_decrementExactL:
  case vmIntrinsics::_subtractExactL:
    if (!Matcher::match_rule_supported(Op_OverflowSubL) || !UseMathExactIntrinsics) return NULL;
    break;
  case vmIntrinsics::_negateExactI:
    if (!Matcher::match_rule_supported(Op_OverflowSubI) || !UseMathExactIntrinsics) return NULL;
    break;
  case vmIntrinsics::_negateExactL:
    if (!Matcher::match_rule_supported(Op_OverflowSubL) || !UseMathExactIntrinsics) return NULL;
    break;
  case vmIntrinsics::_multiplyExactI:
    if (!Matcher::match_rule_supported(Op_OverflowMulI) || !UseMathExactIntrinsics) return NULL;
    break;
  case vmIntrinsics::_multiplyExactL:
    if (!Matcher::match_rule_supported(Op_OverflowMulL) || !UseMathExactIntrinsics) return NULL;
    break;

 default:
    assert(id <= vmIntrinsics::LAST_COMPILER_INLINE, "caller responsibility");
    assert(id != vmIntrinsics::_Object_init && id != vmIntrinsics::_invoke, "enum out of order?");
    break;
  }

  // -XX:-InlineClassNatives disables natives from the Class class.
  // The flag applies to all reflective calls, notably Array.newArray
  // (visible to Java programmers as Array.newInstance).
  if (m->holder()->name() == ciSymbol::java_lang_Class() ||
      m->holder()->name() == ciSymbol::java_lang_reflect_Array()) {
    if (!InlineClassNatives)  return NULL;
  }

  // -XX:-InlineThreadNatives disables natives from the Thread class.
  if (m->holder()->name() == ciSymbol::java_lang_Thread()) {
    if (!InlineThreadNatives)  return NULL;
  }

  // -XX:-InlineMathNatives disables natives from the Math,Float and Double classes.
  if (m->holder()->name() == ciSymbol::java_lang_Math() ||
      m->holder()->name() == ciSymbol::java_lang_Float() ||
      m->holder()->name() == ciSymbol::java_lang_Double()) {
    if (!InlineMathNatives)  return NULL;
  }

  // -XX:-InlineUnsafeOps disables natives from the Unsafe class.
  if (m->holder()->name() == ciSymbol::sun_misc_Unsafe()) {
    if (!InlineUnsafeOps)  return NULL;
  }

  return new LibraryIntrinsic(m, is_virtual, predicates, does_virtual_dispatch, (vmIntrinsics::ID) id);
}

//----------------------register_library_intrinsics-----------------------
// Initialize this file's data structures, for each Compile instance.
void Compile::register_library_intrinsics() {
  // Nothing to do here.
}

JVMState* LibraryIntrinsic::generate(JVMState* jvms) {
  LibraryCallKit kit(jvms, this);
  Compile* C = kit.C;
  int nodes = C->unique();
#ifndef PRODUCT
  if ((C->print_intrinsics() || C->print_inlining()) && Verbose) {
    char buf[1000];
    const char* str = vmIntrinsics::short_name_as_C_string(intrinsic_id(), buf, sizeof(buf));
    tty->print_cr("Intrinsic %s", str);
  }
#endif
  ciMethod* callee = kit.callee();
  const int bci    = kit.bci();

  // Try to inline the intrinsic.
  if (kit.try_to_inline(_last_predicate)) {
    if (C->print_intrinsics() || C->print_inlining()) {
      C->print_inlining(callee, jvms->depth() - 1, bci, is_virtual() ? "(intrinsic, virtual)" : "(intrinsic)");
    }
    C->gather_intrinsic_statistics(intrinsic_id(), is_virtual(), Compile::_intrinsic_worked);
    if (C->log()) {
      C->log()->elem("intrinsic id='%s'%s nodes='%d'",
                     vmIntrinsics::name_at(intrinsic_id()),
                     (is_virtual() ? " virtual='1'" : ""),
                     C->unique() - nodes);
    }
    // Push the result from the inlined method onto the stack.
    kit.push_result();
    return kit.transfer_exceptions_into_jvms();
  }

  // The intrinsic bailed out
  if (C->print_intrinsics() || C->print_inlining()) {
    if (jvms->has_method()) {
      // Not a root compile.
      const char* msg = is_virtual() ? "failed to inline (intrinsic, virtual)" : "failed to inline (intrinsic)";
      C->print_inlining(callee, jvms->depth() - 1, bci, msg);
    } else {
      // Root compile
      tty->print("Did not generate intrinsic %s%s at bci:%d in",
               vmIntrinsics::name_at(intrinsic_id()),
               (is_virtual() ? " (virtual)" : ""), bci);
    }
  }
  C->gather_intrinsic_statistics(intrinsic_id(), is_virtual(), Compile::_intrinsic_failed);
  return NULL;
}

Node* LibraryIntrinsic::generate_predicate(JVMState* jvms, int predicate) {
  LibraryCallKit kit(jvms, this);
  Compile* C = kit.C;
  int nodes = C->unique();
  _last_predicate = predicate;
#ifndef PRODUCT
  assert(is_predicated() && predicate < predicates_count(), "sanity");
  if ((C->print_intrinsics() || C->print_inlining()) && Verbose) {
    char buf[1000];
    const char* str = vmIntrinsics::short_name_as_C_string(intrinsic_id(), buf, sizeof(buf));
    tty->print_cr("Predicate for intrinsic %s", str);
  }
#endif
  ciMethod* callee = kit.callee();
  const int bci    = kit.bci();

  Node* slow_ctl = kit.try_to_predicate(predicate);
  if (!kit.failing()) {
    if (C->print_intrinsics() || C->print_inlining()) {
      C->print_inlining(callee, jvms->depth() - 1, bci, is_virtual() ? "(intrinsic, virtual, predicate)" : "(intrinsic, predicate)");
    }
    C->gather_intrinsic_statistics(intrinsic_id(), is_virtual(), Compile::_intrinsic_worked);
    if (C->log()) {
      C->log()->elem("predicate_intrinsic id='%s'%s nodes='%d'",
                     vmIntrinsics::name_at(intrinsic_id()),
                     (is_virtual() ? " virtual='1'" : ""),
                     C->unique() - nodes);
    }
    return slow_ctl; // Could be NULL if the check folds.
  }

  // The intrinsic bailed out
  if (C->print_intrinsics() || C->print_inlining()) {
    if (jvms->has_method()) {
      // Not a root compile.
      const char* msg = "failed to generate predicate for intrinsic";
      C->print_inlining(kit.callee(), jvms->depth() - 1, bci, msg);
    } else {
      // Root compile
      C->print_inlining_stream()->print("Did not generate predicate for intrinsic %s%s at bci:%d in",
                                        vmIntrinsics::name_at(intrinsic_id()),
                                        (is_virtual() ? " (virtual)" : ""), bci);
    }
  }
  C->gather_intrinsic_statistics(intrinsic_id(), is_virtual(), Compile::_intrinsic_failed);
  return NULL;
}

bool LibraryCallKit::try_to_inline(int predicate) {
  // Handle symbolic names for otherwise undistinguished boolean switches:
  const bool is_store       = true;
  const bool is_native_ptr  = true;
  const bool is_static      = true;
  const bool is_volatile    = true;

  if (!jvms()->has_method()) {
    // Root JVMState has a null method.
    assert(map()->memory()->Opcode() == Op_Parm, "");
    // Insert the memory aliasing node
    set_all_memory(reset_memory());
  }
  assert(merged_memory(), "");


  switch (intrinsic_id()) {
  case vmIntrinsics::_hashCode:                 return inline_native_hashcode(intrinsic()->is_virtual(), !is_static);
  case vmIntrinsics::_identityHashCode:         return inline_native_hashcode(/*!virtual*/ false,         is_static);
  case vmIntrinsics::_getClass:                 return inline_native_getClass();

  case vmIntrinsics::_dsin:
  case vmIntrinsics::_dcos:
  case vmIntrinsics::_dtan:
  case vmIntrinsics::_dabs:
  case vmIntrinsics::_datan2:
  case vmIntrinsics::_dsqrt:
  case vmIntrinsics::_dexp:
  case vmIntrinsics::_dlog:
  case vmIntrinsics::_dlog10:
  case vmIntrinsics::_dpow:                     return inline_math_native(intrinsic_id());

  case vmIntrinsics::_min:
  case vmIntrinsics::_max:                      return inline_min_max(intrinsic_id());

  case vmIntrinsics::_addExactI:                return inline_math_addExactI(false /* add */);
  case vmIntrinsics::_addExactL:                return inline_math_addExactL(false /* add */);
  case vmIntrinsics::_decrementExactI:          return inline_math_subtractExactI(true /* decrement */);
  case vmIntrinsics::_decrementExactL:          return inline_math_subtractExactL(true /* decrement */);
  case vmIntrinsics::_incrementExactI:          return inline_math_addExactI(true /* increment */);
  case vmIntrinsics::_incrementExactL:          return inline_math_addExactL(true /* increment */);
  case vmIntrinsics::_multiplyExactI:           return inline_math_multiplyExactI();
  case vmIntrinsics::_multiplyExactL:           return inline_math_multiplyExactL();
  case vmIntrinsics::_negateExactI:             return inline_math_negateExactI();
  case vmIntrinsics::_negateExactL:             return inline_math_negateExactL();
  case vmIntrinsics::_subtractExactI:           return inline_math_subtractExactI(false /* subtract */);
  case vmIntrinsics::_subtractExactL:           return inline_math_subtractExactL(false /* subtract */);

  case vmIntrinsics::_arraycopy:                return inline_arraycopy();

  case vmIntrinsics::_compareTo:                return inline_string_compareTo();
  case vmIntrinsics::_indexOf:                  return inline_string_indexOf();
  case vmIntrinsics::_equals:                   return inline_string_equals();

  case vmIntrinsics::_getObject:                return inline_unsafe_access(!is_native_ptr, !is_store, T_OBJECT,  !is_volatile, false);
  case vmIntrinsics::_getBoolean:               return inline_unsafe_access(!is_native_ptr, !is_store, T_BOOLEAN, !is_volatile, false);
  case vmIntrinsics::_getByte:                  return inline_unsafe_access(!is_native_ptr, !is_store, T_BYTE,    !is_volatile, false);
  case vmIntrinsics::_getShort:                 return inline_unsafe_access(!is_native_ptr, !is_store, T_SHORT,   !is_volatile, false);
  case vmIntrinsics::_getChar:                  return inline_unsafe_access(!is_native_ptr, !is_store, T_CHAR,    !is_volatile, false);
  case vmIntrinsics::_getInt:                   return inline_unsafe_access(!is_native_ptr, !is_store, T_INT,     !is_volatile, false);
  case vmIntrinsics::_getLong:                  return inline_unsafe_access(!is_native_ptr, !is_store, T_LONG,    !is_volatile, false);
  case vmIntrinsics::_getFloat:                 return inline_unsafe_access(!is_native_ptr, !is_store, T_FLOAT,   !is_volatile, false);
  case vmIntrinsics::_getDouble:                return inline_unsafe_access(!is_native_ptr, !is_store, T_DOUBLE,  !is_volatile, false);

  case vmIntrinsics::_putObject:                return inline_unsafe_access(!is_native_ptr,  is_store, T_OBJECT,  !is_volatile, false);
  case vmIntrinsics::_putBoolean:               return inline_unsafe_access(!is_native_ptr,  is_store, T_BOOLEAN, !is_volatile, false);
  case vmIntrinsics::_putByte:                  return inline_unsafe_access(!is_native_ptr,  is_store, T_BYTE,    !is_volatile, false);
  case vmIntrinsics::_putShort:                 return inline_unsafe_access(!is_native_ptr,  is_store, T_SHORT,   !is_volatile, false);
  case vmIntrinsics::_putChar:                  return inline_unsafe_access(!is_native_ptr,  is_store, T_CHAR,    !is_volatile, false);
  case vmIntrinsics::_putInt:                   return inline_unsafe_access(!is_native_ptr,  is_store, T_INT,     !is_volatile, false);
  case vmIntrinsics::_putLong:                  return inline_unsafe_access(!is_native_ptr,  is_store, T_LONG,    !is_volatile, false);
  case vmIntrinsics::_putFloat:                 return inline_unsafe_access(!is_native_ptr,  is_store, T_FLOAT,   !is_volatile, false);
  case vmIntrinsics::_putDouble:                return inline_unsafe_access(!is_native_ptr,  is_store, T_DOUBLE,  !is_volatile, false);

  case vmIntrinsics::_getByte_raw:              return inline_unsafe_access( is_native_ptr, !is_store, T_BYTE,    !is_volatile, false);
  case vmIntrinsics::_getShort_raw:             return inline_unsafe_access( is_native_ptr, !is_store, T_SHORT,   !is_volatile, false);
  case vmIntrinsics::_getChar_raw:              return inline_unsafe_access( is_native_ptr, !is_store, T_CHAR,    !is_volatile, false);
  case vmIntrinsics::_getInt_raw:               return inline_unsafe_access( is_native_ptr, !is_store, T_INT,     !is_volatile, false);
  case vmIntrinsics::_getLong_raw:              return inline_unsafe_access( is_native_ptr, !is_store, T_LONG,    !is_volatile, false);
  case vmIntrinsics::_getFloat_raw:             return inline_unsafe_access( is_native_ptr, !is_store, T_FLOAT,   !is_volatile, false);
  case vmIntrinsics::_getDouble_raw:            return inline_unsafe_access( is_native_ptr, !is_store, T_DOUBLE,  !is_volatile, false);
  case vmIntrinsics::_getAddress_raw:           return inline_unsafe_access( is_native_ptr, !is_store, T_ADDRESS, !is_volatile, false);

  case vmIntrinsics::_putByte_raw:              return inline_unsafe_access( is_native_ptr,  is_store, T_BYTE,    !is_volatile, false);
  case vmIntrinsics::_putShort_raw:             return inline_unsafe_access( is_native_ptr,  is_store, T_SHORT,   !is_volatile, false);
  case vmIntrinsics::_putChar_raw:              return inline_unsafe_access( is_native_ptr,  is_store, T_CHAR,    !is_volatile, false);
  case vmIntrinsics::_putInt_raw:               return inline_unsafe_access( is_native_ptr,  is_store, T_INT,     !is_volatile, false);
  case vmIntrinsics::_putLong_raw:              return inline_unsafe_access( is_native_ptr,  is_store, T_LONG,    !is_volatile, false);
  case vmIntrinsics::_putFloat_raw:             return inline_unsafe_access( is_native_ptr,  is_store, T_FLOAT,   !is_volatile, false);
  case vmIntrinsics::_putDouble_raw:            return inline_unsafe_access( is_native_ptr,  is_store, T_DOUBLE,  !is_volatile, false);
  case vmIntrinsics::_putAddress_raw:           return inline_unsafe_access( is_native_ptr,  is_store, T_ADDRESS, !is_volatile, false);

  case vmIntrinsics::_getObjectVolatile:        return inline_unsafe_access(!is_native_ptr, !is_store, T_OBJECT,   is_volatile, false);
  case vmIntrinsics::_getBooleanVolatile:       return inline_unsafe_access(!is_native_ptr, !is_store, T_BOOLEAN,  is_volatile, false);
  case vmIntrinsics::_getByteVolatile:          return inline_unsafe_access(!is_native_ptr, !is_store, T_BYTE,     is_volatile, false);
  case vmIntrinsics::_getShortVolatile:         return inline_unsafe_access(!is_native_ptr, !is_store, T_SHORT,    is_volatile, false);
  case vmIntrinsics::_getCharVolatile:          return inline_unsafe_access(!is_native_ptr, !is_store, T_CHAR,     is_volatile, false);
  case vmIntrinsics::_getIntVolatile:           return inline_unsafe_access(!is_native_ptr, !is_store, T_INT,      is_volatile, false);
  case vmIntrinsics::_getLongVolatile:          return inline_unsafe_access(!is_native_ptr, !is_store, T_LONG,     is_volatile, false);
  case vmIntrinsics::_getFloatVolatile:         return inline_unsafe_access(!is_native_ptr, !is_store, T_FLOAT,    is_volatile, false);
  case vmIntrinsics::_getDoubleVolatile:        return inline_unsafe_access(!is_native_ptr, !is_store, T_DOUBLE,   is_volatile, false);

  case vmIntrinsics::_putObjectVolatile:        return inline_unsafe_access(!is_native_ptr,  is_store, T_OBJECT,   is_volatile, false);
  case vmIntrinsics::_putBooleanVolatile:       return inline_unsafe_access(!is_native_ptr,  is_store, T_BOOLEAN,  is_volatile, false);
  case vmIntrinsics::_putByteVolatile:          return inline_unsafe_access(!is_native_ptr,  is_store, T_BYTE,     is_volatile, false);
  case vmIntrinsics::_putShortVolatile:         return inline_unsafe_access(!is_native_ptr,  is_store, T_SHORT,    is_volatile, false);
  case vmIntrinsics::_putCharVolatile:          return inline_unsafe_access(!is_native_ptr,  is_store, T_CHAR,     is_volatile, false);
  case vmIntrinsics::_putIntVolatile:           return inline_unsafe_access(!is_native_ptr,  is_store, T_INT,      is_volatile, false);
  case vmIntrinsics::_putLongVolatile:          return inline_unsafe_access(!is_native_ptr,  is_store, T_LONG,     is_volatile, false);
  case vmIntrinsics::_putFloatVolatile:         return inline_unsafe_access(!is_native_ptr,  is_store, T_FLOAT,    is_volatile, false);
  case vmIntrinsics::_putDoubleVolatile:        return inline_unsafe_access(!is_native_ptr,  is_store, T_DOUBLE,   is_volatile, false);

  case vmIntrinsics::_prefetchRead:             return inline_unsafe_prefetch(!is_native_ptr, !is_store, !is_static);
  case vmIntrinsics::_prefetchWrite:            return inline_unsafe_prefetch(!is_native_ptr,  is_store, !is_static);
  case vmIntrinsics::_prefetchReadStatic:       return inline_unsafe_prefetch(!is_native_ptr, !is_store,  is_static);
  case vmIntrinsics::_prefetchWriteStatic:      return inline_unsafe_prefetch(!is_native_ptr,  is_store,  is_static);

  case vmIntrinsics::_compareAndSwapObject:     return inline_unsafe_load_store(T_OBJECT, LS_cmpxchg);
  case vmIntrinsics::_compareAndSwapInt:        return inline_unsafe_load_store(T_INT,    LS_cmpxchg);
  case vmIntrinsics::_compareAndSwapLong:       return inline_unsafe_load_store(T_LONG,   LS_cmpxchg);

  case vmIntrinsics::_putOrderedObject:         return inline_unsafe_ordered_store(T_OBJECT);
  case vmIntrinsics::_putOrderedInt:            return inline_unsafe_ordered_store(T_INT);
  case vmIntrinsics::_putOrderedLong:           return inline_unsafe_ordered_store(T_LONG);

  case vmIntrinsics::_getAndAddInt:             return inline_unsafe_load_store(T_INT,    LS_xadd);
  case vmIntrinsics::_getAndAddLong:            return inline_unsafe_load_store(T_LONG,   LS_xadd);
  case vmIntrinsics::_getAndSetInt:             return inline_unsafe_load_store(T_INT,    LS_xchg);
  case vmIntrinsics::_getAndSetLong:            return inline_unsafe_load_store(T_LONG,   LS_xchg);
  case vmIntrinsics::_getAndSetObject:          return inline_unsafe_load_store(T_OBJECT, LS_xchg);

  case vmIntrinsics::_loadFence:
  case vmIntrinsics::_storeFence:
  case vmIntrinsics::_fullFence:                return inline_unsafe_fence(intrinsic_id());

  case vmIntrinsics::_currentThread:            return inline_native_currentThread();
  case vmIntrinsics::_isInterrupted:            return inline_native_isInterrupted();

#ifdef JFR_HAVE_INTRINSICS
  case vmIntrinsics::_counterTime:              return inline_native_time_funcs(CAST_FROM_FN_PTR(address, JFR_TIME_FUNCTION), "counterTime");
  case vmIntrinsics::_getClassId:               return inline_native_classID();
  case vmIntrinsics::_getEventWriter:           return inline_native_getEventWriter();
#endif
  case vmIntrinsics::_currentTimeMillis:        return inline_native_time_funcs(CAST_FROM_FN_PTR(address, os::javaTimeMillis), "currentTimeMillis");
  case vmIntrinsics::_nanoTime:                 return inline_native_time_funcs(CAST_FROM_FN_PTR(address, os::javaTimeNanos), "nanoTime");
  case vmIntrinsics::_allocateInstance:         return inline_unsafe_allocate();
  case vmIntrinsics::_copyMemory:               return inline_unsafe_copyMemory();
  case vmIntrinsics::_newArray:                 return inline_native_newArray();
  case vmIntrinsics::_getLength:                return inline_native_getLength();
  case vmIntrinsics::_copyOf:                   return inline_array_copyOf(false);
  case vmIntrinsics::_copyOfRange:              return inline_array_copyOf(true);
  case vmIntrinsics::_equalsC:                  return inline_array_equals();
  case vmIntrinsics::_clone:                    return inline_native_clone(intrinsic()->is_virtual());

  case vmIntrinsics::_isAssignableFrom:         return inline_native_subtype_check();

  case vmIntrinsics::_isInstance:
  case vmIntrinsics::_getModifiers:
  case vmIntrinsics::_isInterface:
  case vmIntrinsics::_isArray:
  case vmIntrinsics::_isPrimitive:
  case vmIntrinsics::_getSuperclass:
  case vmIntrinsics::_getComponentType:
  case vmIntrinsics::_getClassAccessFlags:      return inline_native_Class_query(intrinsic_id());

  case vmIntrinsics::_floatToRawIntBits:
  case vmIntrinsics::_floatToIntBits:
  case vmIntrinsics::_intBitsToFloat:
  case vmIntrinsics::_doubleToRawLongBits:
  case vmIntrinsics::_doubleToLongBits:
  case vmIntrinsics::_longBitsToDouble:         return inline_fp_conversions(intrinsic_id());

  case vmIntrinsics::_numberOfLeadingZeros_i:
  case vmIntrinsics::_numberOfLeadingZeros_l:
  case vmIntrinsics::_numberOfTrailingZeros_i:
  case vmIntrinsics::_numberOfTrailingZeros_l:
  case vmIntrinsics::_bitCount_i:
  case vmIntrinsics::_bitCount_l:
  case vmIntrinsics::_reverseBytes_i:
  case vmIntrinsics::_reverseBytes_l:
  case vmIntrinsics::_reverseBytes_s:
  case vmIntrinsics::_reverseBytes_c:           return inline_number_methods(intrinsic_id());

  case vmIntrinsics::_getCallerClass:           return inline_native_Reflection_getCallerClass();

  case vmIntrinsics::_Reference_get:            return inline_reference_get();

  case vmIntrinsics::_aescrypt_encryptBlock:
  case vmIntrinsics::_aescrypt_decryptBlock:    return inline_aescrypt_Block(intrinsic_id());

  case vmIntrinsics::_cipherBlockChaining_encryptAESCrypt:
  case vmIntrinsics::_cipherBlockChaining_decryptAESCrypt:
    return inline_cipherBlockChaining_AESCrypt(intrinsic_id());

  case vmIntrinsics::_sha_implCompress:
  case vmIntrinsics::_sha2_implCompress:
  case vmIntrinsics::_sha5_implCompress:
    return inline_sha_implCompress(intrinsic_id());

  case vmIntrinsics::_digestBase_implCompressMB:
    return inline_digestBase_implCompressMB(predicate);

  case vmIntrinsics::_multiplyToLen:
    return inline_multiplyToLen();

  case vmIntrinsics::_squareToLen:
    return inline_squareToLen();

  case vmIntrinsics::_mulAdd:
    return inline_mulAdd();

  case vmIntrinsics::_montgomeryMultiply:
    return inline_montgomeryMultiply();
  case vmIntrinsics::_montgomerySquare:
    return inline_montgomerySquare();

  case vmIntrinsics::_ghash_processBlocks:
    return inline_ghash_processBlocks();

  case vmIntrinsics::_encodeISOArray:
    return inline_encodeISOArray();

  case vmIntrinsics::_updateCRC32:
    return inline_updateCRC32();
  case vmIntrinsics::_updateBytesCRC32:
    return inline_updateBytesCRC32();
  case vmIntrinsics::_updateByteBufferCRC32:
    return inline_updateByteBufferCRC32();

  case vmIntrinsics::_profileBoolean:
    return inline_profileBoolean();

  default:
    // If you get here, it may be that someone has added a new intrinsic
    // to the list in vmSymbols.hpp without implementing it here.
#ifndef PRODUCT
    if ((PrintMiscellaneous && (Verbose || WizardMode)) || PrintOpto) {
      tty->print_cr("*** Warning: Unimplemented intrinsic %s(%d)",
                    vmIntrinsics::name_at(intrinsic_id()), intrinsic_id());
    }
#endif
    return false;
  }
}

Node* LibraryCallKit::try_to_predicate(int predicate) {
  if (!jvms()->has_method()) {
    // Root JVMState has a null method.
    assert(map()->memory()->Opcode() == Op_Parm, "");
    // Insert the memory aliasing node
    set_all_memory(reset_memory());
  }
  assert(merged_memory(), "");

  switch (intrinsic_id()) {
  case vmIntrinsics::_cipherBlockChaining_encryptAESCrypt:
    return inline_cipherBlockChaining_AESCrypt_predicate(false);
  case vmIntrinsics::_cipherBlockChaining_decryptAESCrypt:
    return inline_cipherBlockChaining_AESCrypt_predicate(true);
  case vmIntrinsics::_digestBase_implCompressMB:
    return inline_digestBase_implCompressMB_predicate(predicate);

  default:
    // If you get here, it may be that someone has added a new intrinsic
    // to the list in vmSymbols.hpp without implementing it here.
#ifndef PRODUCT
    if ((PrintMiscellaneous && (Verbose || WizardMode)) || PrintOpto) {
      tty->print_cr("*** Warning: Unimplemented predicate for intrinsic %s(%d)",
                    vmIntrinsics::name_at(intrinsic_id()), intrinsic_id());
    }
#endif
    Node* slow_ctl = control();
    set_control(top()); // No fast path instrinsic
    return slow_ctl;
  }
}

//------------------------------set_result-------------------------------
// Helper function for finishing intrinsics.
void LibraryCallKit::set_result(RegionNode* region, PhiNode* value) {
  record_for_igvn(region);
  set_control(_gvn.transform(region));
  set_result( _gvn.transform(value));
  assert(value->type()->basic_type() == result()->bottom_type()->basic_type(), "sanity");
}

//------------------------------generate_guard---------------------------
// Helper function for generating guarded fast-slow graph structures.
// The given 'test', if true, guards a slow path.  If the test fails
// then a fast path can be taken.  (We generally hope it fails.)
// In all cases, GraphKit::control() is updated to the fast path.
// The returned value represents the control for the slow path.
// The return value is never 'top'; it is either a valid control
// or NULL if it is obvious that the slow path can never be taken.
// Also, if region and the slow control are not NULL, the slow edge
// is appended to the region.
Node* LibraryCallKit::generate_guard(Node* test, RegionNode* region, float true_prob) {
  if (stopped()) {
    // Already short circuited.
    return NULL;
  }

  // Build an if node and its projections.
  // If test is true we take the slow path, which we assume is uncommon.
  if (_gvn.type(test) == TypeInt::ZERO) {
    // The slow branch is never taken.  No need to build this guard.
    return NULL;
  }

  IfNode* iff = create_and_map_if(control(), test, true_prob, COUNT_UNKNOWN);

  Node* if_slow = _gvn.transform(new (C) IfTrueNode(iff));
  if (if_slow == top()) {
    // The slow branch is never taken.  No need to build this guard.
    return NULL;
  }

  if (region != NULL)
    region->add_req(if_slow);

  Node* if_fast = _gvn.transform(new (C) IfFalseNode(iff));
  set_control(if_fast);

  return if_slow;
}

inline Node* LibraryCallKit::generate_slow_guard(Node* test, RegionNode* region) {
  return generate_guard(test, region, PROB_UNLIKELY_MAG(3));
}
inline Node* LibraryCallKit::generate_fair_guard(Node* test, RegionNode* region) {
  return generate_guard(test, region, PROB_FAIR);
}

inline Node* LibraryCallKit::generate_negative_guard(Node* index, RegionNode* region,
                                                     Node* *pos_index) {
  if (stopped())
    return NULL;                // already stopped
  if (_gvn.type(index)->higher_equal(TypeInt::POS)) // [0,maxint]
    return NULL;                // index is already adequately typed
  Node* cmp_lt = _gvn.transform(new (C) CmpINode(index, intcon(0)));
  Node* bol_lt = _gvn.transform(new (C) BoolNode(cmp_lt, BoolTest::lt));
  Node* is_neg = generate_guard(bol_lt, region, PROB_MIN);
  if (is_neg != NULL && pos_index != NULL) {
    // Emulate effect of Parse::adjust_map_after_if.
    Node* ccast = new (C) CastIINode(index, TypeInt::POS);
    ccast->set_req(0, control());
    (*pos_index) = _gvn.transform(ccast);
  }
  return is_neg;
}

inline Node* LibraryCallKit::generate_nonpositive_guard(Node* index, bool never_negative,
                                                        Node* *pos_index) {
  if (stopped())
    return NULL;                // already stopped
  if (_gvn.type(index)->higher_equal(TypeInt::POS1)) // [1,maxint]
    return NULL;                // index is already adequately typed
  Node* cmp_le = _gvn.transform(new (C) CmpINode(index, intcon(0)));
  BoolTest::mask le_or_eq = (never_negative ? BoolTest::eq : BoolTest::le);
  Node* bol_le = _gvn.transform(new (C) BoolNode(cmp_le, le_or_eq));
  Node* is_notp = generate_guard(bol_le, NULL, PROB_MIN);
  if (is_notp != NULL && pos_index != NULL) {
    // Emulate effect of Parse::adjust_map_after_if.
    Node* ccast = new (C) CastIINode(index, TypeInt::POS1);
    ccast->set_req(0, control());
    (*pos_index) = _gvn.transform(ccast);
  }
  return is_notp;
}

// Make sure that 'position' is a valid limit index, in [0..length].
// There are two equivalent plans for checking this:
//   A. (offset + copyLength)  unsigned<=  arrayLength
//   B. offset  <=  (arrayLength - copyLength)
// We require that all of the values above, except for the sum and
// difference, are already known to be non-negative.
// Plan A is robust in the face of overflow, if offset and copyLength
// are both hugely positive.
//
// Plan B is less direct and intuitive, but it does not overflow at
// all, since the difference of two non-negatives is always
// representable.  Whenever Java methods must perform the equivalent
// check they generally use Plan B instead of Plan A.
// For the moment we use Plan A.
inline Node* LibraryCallKit::generate_limit_guard(Node* offset,
                                                  Node* subseq_length,
                                                  Node* array_length,
                                                  RegionNode* region) {
  if (stopped())
    return NULL;                // already stopped
  bool zero_offset = _gvn.type(offset) == TypeInt::ZERO;
  if (zero_offset && subseq_length->eqv_uncast(array_length))
    return NULL;                // common case of whole-array copy
  Node* last = subseq_length;
  if (!zero_offset)             // last += offset
    last = _gvn.transform(new (C) AddINode(last, offset));
  Node* cmp_lt = _gvn.transform(new (C) CmpUNode(array_length, last));
  Node* bol_lt = _gvn.transform(new (C) BoolNode(cmp_lt, BoolTest::lt));
  Node* is_over = generate_guard(bol_lt, region, PROB_MIN);
  return is_over;
}


//--------------------------generate_current_thread--------------------
Node* LibraryCallKit::generate_current_thread(Node* &tls_output) {
  ciKlass*    thread_klass = env()->Thread_klass();
  const Type* thread_type  = TypeOopPtr::make_from_klass(thread_klass)->cast_to_ptr_type(TypePtr::NotNull);
  Node* thread = _gvn.transform(new (C) ThreadLocalNode());
  Node* p = basic_plus_adr(top()/*!oop*/, thread, in_bytes(JavaThread::threadObj_offset()));
  Node* threadObj = make_load(NULL, p, thread_type, T_OBJECT, MemNode::unordered);
  tls_output = thread;
  return threadObj;
}


//------------------------------make_string_method_node------------------------
// Helper method for String intrinsic functions. This version is called
// with str1 and str2 pointing to String object nodes.
//
Node* LibraryCallKit::make_string_method_node(int opcode, Node* str1, Node* str2) {
  Node* no_ctrl = NULL;

  // Get start addr of string
  Node* str1_value   = load_String_value(no_ctrl, str1);
  Node* str1_offset  = load_String_offset(no_ctrl, str1);
  Node* str1_start   = array_element_address(str1_value, str1_offset, T_CHAR);

  // Get length of string 1
  Node* str1_len  = load_String_length(no_ctrl, str1);

  Node* str2_value   = load_String_value(no_ctrl, str2);
  Node* str2_offset  = load_String_offset(no_ctrl, str2);
  Node* str2_start   = array_element_address(str2_value, str2_offset, T_CHAR);

  Node* str2_len = NULL;
  Node* result = NULL;

  switch (opcode) {
  case Op_StrIndexOf:
    // Get length of string 2
    str2_len = load_String_length(no_ctrl, str2);

    result = new (C) StrIndexOfNode(control(), memory(TypeAryPtr::CHARS),
                                 str1_start, str1_len, str2_start, str2_len);
    break;
  case Op_StrComp:
    // Get length of string 2
    str2_len = load_String_length(no_ctrl, str2);

    result = new (C) StrCompNode(control(), memory(TypeAryPtr::CHARS),
                                 str1_start, str1_len, str2_start, str2_len);
    break;
  case Op_StrEquals:
    result = new (C) StrEqualsNode(control(), memory(TypeAryPtr::CHARS),
                               str1_start, str2_start, str1_len);
    break;
  default:
    ShouldNotReachHere();
    return NULL;
  }

  // All these intrinsics have checks.
  C->set_has_split_ifs(true); // Has chance for split-if optimization

  return _gvn.transform(result);
}

// Helper method for String intrinsic functions. This version is called
// with str1 and str2 pointing to char[] nodes, with cnt1 and cnt2 pointing
// to Int nodes containing the lenghts of str1 and str2.
//
Node* LibraryCallKit::make_string_method_node(int opcode, Node* str1_start, Node* cnt1, Node* str2_start, Node* cnt2) {
  Node* result = NULL;
  switch (opcode) {
  case Op_StrIndexOf:
    result = new (C) StrIndexOfNode(control(), memory(TypeAryPtr::CHARS),
                                 str1_start, cnt1, str2_start, cnt2);
    break;
  case Op_StrComp:
    result = new (C) StrCompNode(control(), memory(TypeAryPtr::CHARS),
                                 str1_start, cnt1, str2_start, cnt2);
    break;
  case Op_StrEquals:
    result = new (C) StrEqualsNode(control(), memory(TypeAryPtr::CHARS),
                                 str1_start, str2_start, cnt1);
    break;
  default:
    ShouldNotReachHere();
    return NULL;
  }

  // All these intrinsics have checks.
  C->set_has_split_ifs(true); // Has chance for split-if optimization

  return _gvn.transform(result);
}

//------------------------------inline_string_compareTo------------------------
// public int java.lang.String.compareTo(String anotherString);
bool LibraryCallKit::inline_string_compareTo() {
  Node* receiver = null_check(argument(0));
  Node* arg      = null_check(argument(1));
  if (stopped()) {
    return true;
  }
  set_result(make_string_method_node(Op_StrComp, receiver, arg));
  return true;
}

//------------------------------inline_string_equals------------------------
bool LibraryCallKit::inline_string_equals() {
  Node* receiver = null_check_receiver();
  // NOTE: Do not null check argument for String.equals() because spec
  // allows to specify NULL as argument.
  Node* argument = this->argument(1);
  if (stopped()) {
    return true;
  }

  // paths (plus control) merge
  RegionNode* region = new (C) RegionNode(5);
  Node* phi = new (C) PhiNode(region, TypeInt::BOOL);

  // does source == target string?
  Node* cmp = _gvn.transform(new (C) CmpPNode(receiver, argument));
  Node* bol = _gvn.transform(new (C) BoolNode(cmp, BoolTest::eq));

  Node* if_eq = generate_slow_guard(bol, NULL);
  if (if_eq != NULL) {
    // receiver == argument
    phi->init_req(2, intcon(1));
    region->init_req(2, if_eq);
  }

  // get String klass for instanceOf
  ciInstanceKlass* klass = env()->String_klass();

  if (!stopped()) {
    Node* inst = gen_instanceof(argument, makecon(TypeKlassPtr::make(klass)));
    Node* cmp  = _gvn.transform(new (C) CmpINode(inst, intcon(1)));
    Node* bol  = _gvn.transform(new (C) BoolNode(cmp, BoolTest::ne));

    Node* inst_false = generate_guard(bol, NULL, PROB_MIN);
    //instanceOf == true, fallthrough

    if (inst_false != NULL) {
      phi->init_req(3, intcon(0));
      region->init_req(3, inst_false);
    }
  }

  if (!stopped()) {
    const TypeOopPtr* string_type = TypeOopPtr::make_from_klass(klass);

    // Properly cast the argument to String
    argument = _gvn.transform(new (C) CheckCastPPNode(control(), argument, string_type));
    // This path is taken only when argument's type is String:NotNull.
    argument = cast_not_null(argument, false);

    Node* no_ctrl = NULL;

    // Get start addr of receiver
    Node* receiver_val    = load_String_value(no_ctrl, receiver);
    Node* receiver_offset = load_String_offset(no_ctrl, receiver);
    Node* receiver_start = array_element_address(receiver_val, receiver_offset, T_CHAR);

    // Get length of receiver
    Node* receiver_cnt  = load_String_length(no_ctrl, receiver);

    // Get start addr of argument
    Node* argument_val    = load_String_value(no_ctrl, argument);
    Node* argument_offset = load_String_offset(no_ctrl, argument);
    Node* argument_start = array_element_address(argument_val, argument_offset, T_CHAR);

    // Get length of argument
    Node* argument_cnt  = load_String_length(no_ctrl, argument);

    // Check for receiver count != argument count
    Node* cmp = _gvn.transform(new(C) CmpINode(receiver_cnt, argument_cnt));
    Node* bol = _gvn.transform(new(C) BoolNode(cmp, BoolTest::ne));
    Node* if_ne = generate_slow_guard(bol, NULL);
    if (if_ne != NULL) {
      phi->init_req(4, intcon(0));
      region->init_req(4, if_ne);
    }

    // Check for count == 0 is done by assembler code for StrEquals.

    if (!stopped()) {
      Node* equals = make_string_method_node(Op_StrEquals, receiver_start, receiver_cnt, argument_start, argument_cnt);
      phi->init_req(1, equals);
      region->init_req(1, control());
    }
  }

  // post merge
  set_control(_gvn.transform(region));
  record_for_igvn(region);

  set_result(_gvn.transform(phi));
  return true;
}

//------------------------------inline_array_equals----------------------------
bool LibraryCallKit::inline_array_equals() {
  Node* arg1 = argument(0);
  Node* arg2 = argument(1);
  set_result(_gvn.transform(new (C) AryEqNode(control(), memory(TypeAryPtr::CHARS), arg1, arg2)));
  return true;
}

// Java version of String.indexOf(constant string)
// class StringDecl {
//   StringDecl(char[] ca) {
//     offset = 0;
//     count = ca.length;
//     value = ca;
//   }
//   int offset;
//   int count;
//   char[] value;
// }
//
// static int string_indexOf_J(StringDecl string_object, char[] target_object,
//                             int targetOffset, int cache_i, int md2) {
//   int cache = cache_i;
//   int sourceOffset = string_object.offset;
//   int sourceCount = string_object.count;
//   int targetCount = target_object.length;
//
//   int targetCountLess1 = targetCount - 1;
//   int sourceEnd = sourceOffset + sourceCount - targetCountLess1;
//
//   char[] source = string_object.value;
//   char[] target = target_object;
//   int lastChar = target[targetCountLess1];
//
//  outer_loop:
//   for (int i = sourceOffset; i < sourceEnd; ) {
//     int src = source[i + targetCountLess1];
//     if (src == lastChar) {
//       // With random strings and a 4-character alphabet,
//       // reverse matching at this point sets up 0.8% fewer
//       // frames, but (paradoxically) makes 0.3% more probes.
//       // Since those probes are nearer the lastChar probe,
//       // there is may be a net D$ win with reverse matching.
//       // But, reversing loop inhibits unroll of inner loop
//       // for unknown reason.  So, does running outer loop from
//       // (sourceOffset - targetCountLess1) to (sourceOffset + sourceCount)
//       for (int j = 0; j < targetCountLess1; j++) {
//         if (target[targetOffset + j] != source[i+j]) {
//           if ((cache & (1 << source[i+j])) == 0) {
//             if (md2 < j+1) {
//               i += j+1;
//               continue outer_loop;
//             }
//           }
//           i += md2;
//           continue outer_loop;
//         }
//       }
//       return i - sourceOffset;
//     }
//     if ((cache & (1 << src)) == 0) {
//       i += targetCountLess1;
//     } // using "i += targetCount;" and an "else i++;" causes a jump to jump.
//     i++;
//   }
//   return -1;
// }

//------------------------------string_indexOf------------------------
Node* LibraryCallKit::string_indexOf(Node* string_object, ciTypeArray* target_array, jint targetOffset_i,
                                     jint cache_i, jint md2_i) {

  Node* no_ctrl  = NULL;
  float likely   = PROB_LIKELY(0.9);
  float unlikely = PROB_UNLIKELY(0.9);

  const int nargs = 0; // no arguments to push back for uncommon trap in predicate

  Node* source        = load_String_value(no_ctrl, string_object);
  Node* sourceOffset  = load_String_offset(no_ctrl, string_object);
  Node* sourceCount   = load_String_length(no_ctrl, string_object);

  Node* target = _gvn.transform( makecon(TypeOopPtr::make_from_constant(target_array, true)));
  jint target_length = target_array->length();
  const TypeAry* target_array_type = TypeAry::make(TypeInt::CHAR, TypeInt::make(0, target_length, Type::WidenMin));
  const TypeAryPtr* target_type = TypeAryPtr::make(TypePtr::BotPTR, target_array_type, target_array->klass(), true, Type::OffsetBot);

  // String.value field is known to be @Stable.
  if (UseImplicitStableValues) {
    target = cast_array_to_stable(target, target_type);
  }

  IdealKit kit(this, false, true);
#define __ kit.
  Node* zero             = __ ConI(0);
  Node* one              = __ ConI(1);
  Node* cache            = __ ConI(cache_i);
  Node* md2              = __ ConI(md2_i);
  Node* lastChar         = __ ConI(target_array->char_at(target_length - 1));
  Node* targetCount      = __ ConI(target_length);
  Node* targetCountLess1 = __ ConI(target_length - 1);
  Node* targetOffset     = __ ConI(targetOffset_i);
  Node* sourceEnd        = __ SubI(__ AddI(sourceOffset, sourceCount), targetCountLess1);

  IdealVariable rtn(kit), i(kit), j(kit); __ declarations_done();
  Node* outer_loop = __ make_label(2 /* goto */);
  Node* return_    = __ make_label(1);

  __ set(rtn,__ ConI(-1));
  __ loop(this, nargs, i, sourceOffset, BoolTest::lt, sourceEnd); {
       Node* i2  = __ AddI(__ value(i), targetCountLess1);
       // pin to prohibit loading of "next iteration" value which may SEGV (rare)
       Node* src = load_array_element(__ ctrl(), source, i2, TypeAryPtr::CHARS);
       __ if_then(src, BoolTest::eq, lastChar, unlikely); {
         __ loop(this, nargs, j, zero, BoolTest::lt, targetCountLess1); {
              Node* tpj = __ AddI(targetOffset, __ value(j));
              Node* targ = load_array_element(no_ctrl, target, tpj, target_type);
              Node* ipj  = __ AddI(__ value(i), __ value(j));
              Node* src2 = load_array_element(no_ctrl, source, ipj, TypeAryPtr::CHARS);
              __ if_then(targ, BoolTest::ne, src2); {
                __ if_then(__ AndI(cache, __ LShiftI(one, src2)), BoolTest::eq, zero); {
                  __ if_then(md2, BoolTest::lt, __ AddI(__ value(j), one)); {
                    __ increment(i, __ AddI(__ value(j), one));
                    __ goto_(outer_loop);
                  } __ end_if(); __ dead(j);
                }__ end_if(); __ dead(j);
                __ increment(i, md2);
                __ goto_(outer_loop);
              }__ end_if();
              __ increment(j, one);
         }__ end_loop(); __ dead(j);
         __ set(rtn, __ SubI(__ value(i), sourceOffset)); __ dead(i);
         __ goto_(return_);
       }__ end_if();
       __ if_then(__ AndI(cache, __ LShiftI(one, src)), BoolTest::eq, zero, likely); {
         __ increment(i, targetCountLess1);
       }__ end_if();
       __ increment(i, one);
       __ bind(outer_loop);
  }__ end_loop(); __ dead(i);
  __ bind(return_);

  // Final sync IdealKit and GraphKit.
  final_sync(kit);
  Node* result = __ value(rtn);
#undef __
  C->set_has_loops(true);
  return result;
}

//------------------------------inline_string_indexOf------------------------
bool LibraryCallKit::inline_string_indexOf() {
  Node* receiver = argument(0);
  Node* arg      = argument(1);

  Node* result;
  // Disable the use of pcmpestri until it can be guaranteed that
  // the load doesn't cross into the uncommited space.
  if (Matcher::has_match_rule(Op_StrIndexOf) &&
      UseSSE42Intrinsics) {
    // Generate SSE4.2 version of indexOf
    // We currently only have match rules that use SSE4.2

    receiver = null_check(receiver);
    arg      = null_check(arg);
    if (stopped()) {
      return true;
    }

    ciInstanceKlass* str_klass = env()->String_klass();
    const TypeOopPtr* string_type = TypeOopPtr::make_from_klass(str_klass);

    // Make the merge point
    RegionNode* result_rgn = new (C) RegionNode(4);
    Node*       result_phi = new (C) PhiNode(result_rgn, TypeInt::INT);
    Node* no_ctrl  = NULL;

    // Get start addr of source string
    Node* source = load_String_value(no_ctrl, receiver);
    Node* source_offset = load_String_offset(no_ctrl, receiver);
    Node* source_start = array_element_address(source, source_offset, T_CHAR);

    // Get length of source string
    Node* source_cnt  = load_String_length(no_ctrl, receiver);

    // Get start addr of substring
    Node* substr = load_String_value(no_ctrl, arg);
    Node* substr_offset = load_String_offset(no_ctrl, arg);
    Node* substr_start = array_element_address(substr, substr_offset, T_CHAR);

    // Get length of source string
    Node* substr_cnt  = load_String_length(no_ctrl, arg);

    // Check for substr count > string count
    Node* cmp = _gvn.transform(new(C) CmpINode(substr_cnt, source_cnt));
    Node* bol = _gvn.transform(new(C) BoolNode(cmp, BoolTest::gt));
    Node* if_gt = generate_slow_guard(bol, NULL);
    if (if_gt != NULL) {
      result_phi->init_req(2, intcon(-1));
      result_rgn->init_req(2, if_gt);
    }

    if (!stopped()) {
      // Check for substr count == 0
      cmp = _gvn.transform(new(C) CmpINode(substr_cnt, intcon(0)));
      bol = _gvn.transform(new(C) BoolNode(cmp, BoolTest::eq));
      Node* if_zero = generate_slow_guard(bol, NULL);
      if (if_zero != NULL) {
        result_phi->init_req(3, intcon(0));
        result_rgn->init_req(3, if_zero);
      }
    }

    if (!stopped()) {
      result = make_string_method_node(Op_StrIndexOf, source_start, source_cnt, substr_start, substr_cnt);
      result_phi->init_req(1, result);
      result_rgn->init_req(1, control());
    }
    set_control(_gvn.transform(result_rgn));
    record_for_igvn(result_rgn);
    result = _gvn.transform(result_phi);

  } else { // Use LibraryCallKit::string_indexOf
    // don't intrinsify if argument isn't a constant string.
    if (!arg->is_Con()) {
     return false;
    }
    const TypeOopPtr* str_type = _gvn.type(arg)->isa_oopptr();
    if (str_type == NULL) {
      return false;
    }
    ciInstanceKlass* klass = env()->String_klass();
    ciObject* str_const = str_type->const_oop();
    if (str_const == NULL || str_const->klass() != klass) {
      return false;
    }
    ciInstance* str = str_const->as_instance();
    assert(str != NULL, "must be instance");

    ciObject* v = str->field_value_by_offset(java_lang_String::value_offset_in_bytes()).as_object();
    ciTypeArray* pat = v->as_type_array(); // pattern (argument) character array

    int o;
    int c;
    if (java_lang_String::has_offset_field()) {
      o = str->field_value_by_offset(java_lang_String::offset_offset_in_bytes()).as_int();
      c = str->field_value_by_offset(java_lang_String::count_offset_in_bytes()).as_int();
    } else {
      o = 0;
      c = pat->length();
    }

    // constant strings have no offset and count == length which
    // simplifies the resulting code somewhat so lets optimize for that.
    if (o != 0 || c != pat->length()) {
     return false;
    }

    receiver = null_check(receiver, T_OBJECT);
    // NOTE: No null check on the argument is needed since it's a constant String oop.
    if (stopped()) {
      return true;
    }

    // The null string as a pattern always returns 0 (match at beginning of string)
    if (c == 0) {
      set_result(intcon(0));
      return true;
    }

    // Generate default indexOf
    jchar lastChar = pat->char_at(o + (c - 1));
    int cache = 0;
    int i;
    for (i = 0; i < c - 1; i++) {
      assert(i < pat->length(), "out of range");
      cache |= (1 << (pat->char_at(o + i) & (sizeof(cache) * BitsPerByte - 1)));
    }

    int md2 = c;
    for (i = 0; i < c - 1; i++) {
      assert(i < pat->length(), "out of range");
      if (pat->char_at(o + i) == lastChar) {
        md2 = (c - 1) - i;
      }
    }

    result = string_indexOf(receiver, pat, o, cache, md2);
  }
  set_result(result);
  return true;
}

//--------------------------round_double_node--------------------------------
// Round a double node if necessary.
Node* LibraryCallKit::round_double_node(Node* n) {
  if (Matcher::strict_fp_requires_explicit_rounding && UseSSE <= 1)
    n = _gvn.transform(new (C) RoundDoubleNode(0, n));
  return n;
}

//------------------------------inline_math-----------------------------------
// public static double Math.abs(double)
// public static double Math.sqrt(double)
// public static double Math.log(double)
// public static double Math.log10(double)
bool LibraryCallKit::inline_math(vmIntrinsics::ID id) {
  Node* arg = round_double_node(argument(0));
  Node* n = NULL;
  switch (id) {
  case vmIntrinsics::_dabs:   n = new (C) AbsDNode(                arg);  break;
  case vmIntrinsics::_dsqrt:  n = new (C) SqrtDNode(C, control(),  arg);  break;
  case vmIntrinsics::_dlog:   n = new (C) LogDNode(C, control(),   arg);  break;
  case vmIntrinsics::_dlog10: n = new (C) Log10DNode(C, control(), arg);  break;
  default:  fatal_unexpected_iid(id);  break;
  }
  set_result(_gvn.transform(n));
  return true;
}

//------------------------------inline_trig----------------------------------
// Inline sin/cos/tan instructions, if possible.  If rounding is required, do
// argument reduction which will turn into a fast/slow diamond.
bool LibraryCallKit::inline_trig(vmIntrinsics::ID id) {
  Node* arg = round_double_node(argument(0));
  Node* n = NULL;

  switch (id) {
  case vmIntrinsics::_dsin:  n = new (C) SinDNode(C, control(), arg);  break;
  case vmIntrinsics::_dcos:  n = new (C) CosDNode(C, control(), arg);  break;
  case vmIntrinsics::_dtan:  n = new (C) TanDNode(C, control(), arg);  break;
  default:  fatal_unexpected_iid(id);  break;
  }
  n = _gvn.transform(n);

  // Rounding required?  Check for argument reduction!
  if (Matcher::strict_fp_requires_explicit_rounding) {
    static const double     pi_4 =  0.7853981633974483;
    static const double neg_pi_4 = -0.7853981633974483;
    // pi/2 in 80-bit extended precision
    // static const unsigned char pi_2_bits_x[] = {0x35,0xc2,0x68,0x21,0xa2,0xda,0x0f,0xc9,0xff,0x3f,0x00,0x00,0x00,0x00,0x00,0x00};
    // -pi/2 in 80-bit extended precision
    // static const unsigned char neg_pi_2_bits_x[] = {0x35,0xc2,0x68,0x21,0xa2,0xda,0x0f,0xc9,0xff,0xbf,0x00,0x00,0x00,0x00,0x00,0x00};
    // Cutoff value for using this argument reduction technique
    //static const double    pi_2_minus_epsilon =  1.564660403643354;
    //static const double neg_pi_2_plus_epsilon = -1.564660403643354;

    // Pseudocode for sin:
    // if (x <= Math.PI / 4.0) {
    //   if (x >= -Math.PI / 4.0) return  fsin(x);
    //   if (x >= -Math.PI / 2.0) return -fcos(x + Math.PI / 2.0);
    // } else {
    //   if (x <=  Math.PI / 2.0) return  fcos(x - Math.PI / 2.0);
    // }
    // return StrictMath.sin(x);

    // Pseudocode for cos:
    // if (x <= Math.PI / 4.0) {
    //   if (x >= -Math.PI / 4.0) return  fcos(x);
    //   if (x >= -Math.PI / 2.0) return  fsin(x + Math.PI / 2.0);
    // } else {
    //   if (x <=  Math.PI / 2.0) return -fsin(x - Math.PI / 2.0);
    // }
    // return StrictMath.cos(x);

    // Actually, sticking in an 80-bit Intel value into C2 will be tough; it
    // requires a special machine instruction to load it.  Instead we'll try
    // the 'easy' case.  If we really need the extra range +/- PI/2 we'll
    // probably do the math inside the SIN encoding.

    // Make the merge point
    RegionNode* r = new (C) RegionNode(3);
    Node* phi = new (C) PhiNode(r, Type::DOUBLE);

    // Flatten arg so we need only 1 test
    Node *abs = _gvn.transform(new (C) AbsDNode(arg));
    // Node for PI/4 constant
    Node *pi4 = makecon(TypeD::make(pi_4));
    // Check PI/4 : abs(arg)
    Node *cmp = _gvn.transform(new (C) CmpDNode(pi4,abs));
    // Check: If PI/4 < abs(arg) then go slow
    Node *bol = _gvn.transform(new (C) BoolNode( cmp, BoolTest::lt ));
    // Branch either way
    IfNode *iff = create_and_xform_if(control(),bol, PROB_STATIC_FREQUENT, COUNT_UNKNOWN);
    set_control(opt_iff(r,iff));

    // Set fast path result
    phi->init_req(2, n);

    // Slow path - non-blocking leaf call
    Node* call = NULL;
    switch (id) {
    case vmIntrinsics::_dsin:
      call = make_runtime_call(RC_LEAF, OptoRuntime::Math_D_D_Type(),
                               CAST_FROM_FN_PTR(address, SharedRuntime::dsin),
                               "Sin", NULL, arg, top());
      break;
    case vmIntrinsics::_dcos:
      call = make_runtime_call(RC_LEAF, OptoRuntime::Math_D_D_Type(),
                               CAST_FROM_FN_PTR(address, SharedRuntime::dcos),
                               "Cos", NULL, arg, top());
      break;
    case vmIntrinsics::_dtan:
      call = make_runtime_call(RC_LEAF, OptoRuntime::Math_D_D_Type(),
                               CAST_FROM_FN_PTR(address, SharedRuntime::dtan),
                               "Tan", NULL, arg, top());
      break;
    }
    assert(control()->in(0) == call, "");
    Node* slow_result = _gvn.transform(new (C) ProjNode(call, TypeFunc::Parms));
    r->init_req(1, control());
    phi->init_req(1, slow_result);

    // Post-merge
    set_control(_gvn.transform(r));
    record_for_igvn(r);
    n = _gvn.transform(phi);

    C->set_has_split_ifs(true); // Has chance for split-if optimization
  }
  set_result(n);
  return true;
}

Node* LibraryCallKit::finish_pow_exp(Node* result, Node* x, Node* y, const TypeFunc* call_type, address funcAddr, const char* funcName) {
  //-------------------
  //result=(result.isNaN())? funcAddr():result;
  // Check: If isNaN() by checking result!=result? then either trap
  // or go to runtime
  Node* cmpisnan = _gvn.transform(new (C) CmpDNode(result, result));
  // Build the boolean node
  Node* bolisnum = _gvn.transform(new (C) BoolNode(cmpisnan, BoolTest::eq));

  if (!too_many_traps(Deoptimization::Reason_intrinsic)) {
    { BuildCutout unless(this, bolisnum, PROB_STATIC_FREQUENT);
      // The pow or exp intrinsic returned a NaN, which requires a call
      // to the runtime.  Recompile with the runtime call.
      uncommon_trap(Deoptimization::Reason_intrinsic,
                    Deoptimization::Action_make_not_entrant);
    }
    return result;
  } else {
    // If this inlining ever returned NaN in the past, we compile a call
    // to the runtime to properly handle corner cases

    IfNode* iff = create_and_xform_if(control(), bolisnum, PROB_STATIC_FREQUENT, COUNT_UNKNOWN);
    Node* if_slow = _gvn.transform(new (C) IfFalseNode(iff));
    Node* if_fast = _gvn.transform(new (C) IfTrueNode(iff));

    if (!if_slow->is_top()) {
      RegionNode* result_region = new (C) RegionNode(3);
      PhiNode*    result_val = new (C) PhiNode(result_region, Type::DOUBLE);

      result_region->init_req(1, if_fast);
      result_val->init_req(1, result);

      set_control(if_slow);

      const TypePtr* no_memory_effects = NULL;
      Node* rt = make_runtime_call(RC_LEAF, call_type, funcAddr, funcName,
                                   no_memory_effects,
                                   x, top(), y, y ? top() : NULL);
      Node* value = _gvn.transform(new (C) ProjNode(rt, TypeFunc::Parms+0));
#ifdef ASSERT
      Node* value_top = _gvn.transform(new (C) ProjNode(rt, TypeFunc::Parms+1));
      assert(value_top == top(), "second value must be top");
#endif

      result_region->init_req(2, control());
      result_val->init_req(2, value);
      set_control(_gvn.transform(result_region));
      return _gvn.transform(result_val);
    } else {
      return result;
    }
  }
}

//------------------------------inline_exp-------------------------------------
// Inline exp instructions, if possible.  The Intel hardware only misses
// really odd corner cases (+/- Infinity).  Just uncommon-trap them.
bool LibraryCallKit::inline_exp() {
  Node* arg = round_double_node(argument(0));
  Node* n   = _gvn.transform(new (C) ExpDNode(C, control(), arg));

  n = finish_pow_exp(n, arg, NULL, OptoRuntime::Math_D_D_Type(), CAST_FROM_FN_PTR(address, SharedRuntime::dexp), "EXP");
  set_result(n);

  C->set_has_split_ifs(true); // Has chance for split-if optimization
  return true;
}

//------------------------------inline_pow-------------------------------------
// Inline power instructions, if possible.
bool LibraryCallKit::inline_pow() {
  // Pseudocode for pow
  // if (y == 2) {
  //   return x * x;
  // } else {
  //   if (x <= 0.0) {
  //     long longy = (long)y;
  //     if ((double)longy == y) { // if y is long
  //       if (y + 1 == y) longy = 0; // huge number: even
  //       result = ((1&longy) == 0)?-DPow(abs(x), y):DPow(abs(x), y);
  //     } else {
  //       result = NaN;
  //     }
  //   } else {
  //     result = DPow(x,y);
  //   }
  //   if (result != result)?  {
  //     result = uncommon_trap() or runtime_call();
  //   }
  //   return result;
  // }

  Node* x = round_double_node(argument(0));
  Node* y = round_double_node(argument(2));

  Node* result = NULL;

  Node*   const_two_node = makecon(TypeD::make(2.0));
  Node*   cmp_node       = _gvn.transform(new (C) CmpDNode(y, const_two_node));
  Node*   bool_node      = _gvn.transform(new (C) BoolNode(cmp_node, BoolTest::eq));
  IfNode* if_node        = create_and_xform_if(control(), bool_node, PROB_STATIC_INFREQUENT, COUNT_UNKNOWN);
  Node*   if_true        = _gvn.transform(new (C) IfTrueNode(if_node));
  Node*   if_false       = _gvn.transform(new (C) IfFalseNode(if_node));

  RegionNode* region_node = new (C) RegionNode(3);
  region_node->init_req(1, if_true);

  Node* phi_node = new (C) PhiNode(region_node, Type::DOUBLE);
  // special case for x^y where y == 2, we can convert it to x * x
  phi_node->init_req(1, _gvn.transform(new (C) MulDNode(x, x)));

  // set control to if_false since we will now process the false branch
  set_control(if_false);

  if (!too_many_traps(Deoptimization::Reason_intrinsic)) {
    // Short form: skip the fancy tests and just check for NaN result.
    result = _gvn.transform(new (C) PowDNode(C, control(), x, y));
  } else {
    // If this inlining ever returned NaN in the past, include all
    // checks + call to the runtime.

    // Set the merge point for If node with condition of (x <= 0.0)
    // There are four possible paths to region node and phi node
    RegionNode *r = new (C) RegionNode(4);
    Node *phi = new (C) PhiNode(r, Type::DOUBLE);

    // Build the first if node: if (x <= 0.0)
    // Node for 0 constant
    Node *zeronode = makecon(TypeD::ZERO);
    // Check x:0
    Node *cmp = _gvn.transform(new (C) CmpDNode(x, zeronode));
    // Check: If (x<=0) then go complex path
    Node *bol1 = _gvn.transform(new (C) BoolNode( cmp, BoolTest::le ));
    // Branch either way
    IfNode *if1 = create_and_xform_if(control(),bol1, PROB_STATIC_INFREQUENT, COUNT_UNKNOWN);
    // Fast path taken; set region slot 3
    Node *fast_taken = _gvn.transform(new (C) IfFalseNode(if1));
    r->init_req(3,fast_taken); // Capture fast-control

    // Fast path not-taken, i.e. slow path
    Node *complex_path = _gvn.transform(new (C) IfTrueNode(if1));

    // Set fast path result
    Node *fast_result = _gvn.transform(new (C) PowDNode(C, control(), x, y));
    phi->init_req(3, fast_result);

    // Complex path
    // Build the second if node (if y is long)
    // Node for (long)y
    Node *longy = _gvn.transform(new (C) ConvD2LNode(y));
    // Node for (double)((long) y)
    Node *doublelongy= _gvn.transform(new (C) ConvL2DNode(longy));
    // Check (double)((long) y) : y
    Node *cmplongy= _gvn.transform(new (C) CmpDNode(doublelongy, y));
    // Check if (y isn't long) then go to slow path

    Node *bol2 = _gvn.transform(new (C) BoolNode( cmplongy, BoolTest::ne ));
    // Branch either way
    IfNode *if2 = create_and_xform_if(complex_path,bol2, PROB_STATIC_INFREQUENT, COUNT_UNKNOWN);
    Node* ylong_path = _gvn.transform(new (C) IfFalseNode(if2));

    Node *slow_path = _gvn.transform(new (C) IfTrueNode(if2));

    // Calculate DPow(abs(x), y)*(1 & (long)y)
    // Node for constant 1
    Node *conone = longcon(1);
    // 1& (long)y
    Node *signnode= _gvn.transform(new (C) AndLNode(conone, longy));

    // A huge number is always even. Detect a huge number by checking
    // if y + 1 == y and set integer to be tested for parity to 0.
    // Required for corner case:
    // (long)9.223372036854776E18 = max_jlong
    // (double)(long)9.223372036854776E18 = 9.223372036854776E18
    // max_jlong is odd but 9.223372036854776E18 is even
    Node* yplus1 = _gvn.transform(new (C) AddDNode(y, makecon(TypeD::make(1))));
    Node *cmpyplus1= _gvn.transform(new (C) CmpDNode(yplus1, y));
    Node *bolyplus1 = _gvn.transform(new (C) BoolNode( cmpyplus1, BoolTest::eq ));
    Node* correctedsign = NULL;
    if (ConditionalMoveLimit != 0) {
      correctedsign = _gvn.transform( CMoveNode::make(C, NULL, bolyplus1, signnode, longcon(0), TypeLong::LONG));
    } else {
      IfNode *ifyplus1 = create_and_xform_if(ylong_path,bolyplus1, PROB_FAIR, COUNT_UNKNOWN);
      RegionNode *r = new (C) RegionNode(3);
      Node *phi = new (C) PhiNode(r, TypeLong::LONG);
      r->init_req(1, _gvn.transform(new (C) IfFalseNode(ifyplus1)));
      r->init_req(2, _gvn.transform(new (C) IfTrueNode(ifyplus1)));
      phi->init_req(1, signnode);
      phi->init_req(2, longcon(0));
      correctedsign = _gvn.transform(phi);
      ylong_path = _gvn.transform(r);
      record_for_igvn(r);
    }

    // zero node
    Node *conzero = longcon(0);
    // Check (1&(long)y)==0?
    Node *cmpeq1 = _gvn.transform(new (C) CmpLNode(correctedsign, conzero));
    // Check if (1&(long)y)!=0?, if so the result is negative
    Node *bol3 = _gvn.transform(new (C) BoolNode( cmpeq1, BoolTest::ne ));
    // abs(x)
    Node *absx=_gvn.transform(new (C) AbsDNode(x));
    // abs(x)^y
    Node *absxpowy = _gvn.transform(new (C) PowDNode(C, control(), absx, y));
    // -abs(x)^y
    Node *negabsxpowy = _gvn.transform(new (C) NegDNode (absxpowy));
    // (1&(long)y)==1?-DPow(abs(x), y):DPow(abs(x), y)
    Node *signresult = NULL;
    if (ConditionalMoveLimit != 0) {
      signresult = _gvn.transform( CMoveNode::make(C, NULL, bol3, absxpowy, negabsxpowy, Type::DOUBLE));
    } else {
      IfNode *ifyeven = create_and_xform_if(ylong_path,bol3, PROB_FAIR, COUNT_UNKNOWN);
      RegionNode *r = new (C) RegionNode(3);
      Node *phi = new (C) PhiNode(r, Type::DOUBLE);
      r->init_req(1, _gvn.transform(new (C) IfFalseNode(ifyeven)));
      r->init_req(2, _gvn.transform(new (C) IfTrueNode(ifyeven)));
      phi->init_req(1, absxpowy);
      phi->init_req(2, negabsxpowy);
      signresult = _gvn.transform(phi);
      ylong_path = _gvn.transform(r);
      record_for_igvn(r);
    }
    // Set complex path fast result
    r->init_req(2, ylong_path);
    phi->init_req(2, signresult);

    static const jlong nan_bits = CONST64(0x7ff8000000000000);
    Node *slow_result = makecon(TypeD::make(*(double*)&nan_bits)); // return NaN
    r->init_req(1,slow_path);
    phi->init_req(1,slow_result);

    // Post merge
    set_control(_gvn.transform(r));
    record_for_igvn(r);
    result = _gvn.transform(phi);
  }

  result = finish_pow_exp(result, x, y, OptoRuntime::Math_DD_D_Type(), CAST_FROM_FN_PTR(address, SharedRuntime::dpow), "POW");

  // control from finish_pow_exp is now input to the region node
  region_node->set_req(2, control());
  // the result from finish_pow_exp is now input to the phi node
  phi_node->init_req(2, result);
  set_control(_gvn.transform(region_node));
  record_for_igvn(region_node);
  set_result(_gvn.transform(phi_node));

  C->set_has_split_ifs(true); // Has chance for split-if optimization
  return true;
}

//------------------------------runtime_math-----------------------------
bool LibraryCallKit::runtime_math(const TypeFunc* call_type, address funcAddr, const char* funcName) {
  assert(call_type == OptoRuntime::Math_DD_D_Type() || call_type == OptoRuntime::Math_D_D_Type(),
         "must be (DD)D or (D)D type");

  // Inputs
  Node* a = round_double_node(argument(0));
  Node* b = (call_type == OptoRuntime::Math_DD_D_Type()) ? round_double_node(argument(2)) : NULL;

  const TypePtr* no_memory_effects = NULL;
  Node* trig = make_runtime_call(RC_LEAF, call_type, funcAddr, funcName,
                                 no_memory_effects,
                                 a, top(), b, b ? top() : NULL);
  Node* value = _gvn.transform(new (C) ProjNode(trig, TypeFunc::Parms+0));
#ifdef ASSERT
  Node* value_top = _gvn.transform(new (C) ProjNode(trig, TypeFunc::Parms+1));
  assert(value_top == top(), "second value must be top");
#endif

  set_result(value);
  return true;
}

//------------------------------inline_math_native-----------------------------
bool LibraryCallKit::inline_math_native(vmIntrinsics::ID id) {
#define FN_PTR(f) CAST_FROM_FN_PTR(address, f)
  switch (id) {
    // These intrinsics are not properly supported on all hardware
  case vmIntrinsics::_dcos:   return Matcher::has_match_rule(Op_CosD)   ? inline_trig(id) :
    runtime_math(OptoRuntime::Math_D_D_Type(), FN_PTR(SharedRuntime::dcos),   "COS");
  case vmIntrinsics::_dsin:   return Matcher::has_match_rule(Op_SinD)   ? inline_trig(id) :
    runtime_math(OptoRuntime::Math_D_D_Type(), FN_PTR(SharedRuntime::dsin),   "SIN");
  case vmIntrinsics::_dtan:   return Matcher::has_match_rule(Op_TanD)   ? inline_trig(id) :
    runtime_math(OptoRuntime::Math_D_D_Type(), FN_PTR(SharedRuntime::dtan),   "TAN");

  case vmIntrinsics::_dlog:   return Matcher::has_match_rule(Op_LogD)   ? inline_math(id) :
    runtime_math(OptoRuntime::Math_D_D_Type(), FN_PTR(SharedRuntime::dlog),   "LOG");
  case vmIntrinsics::_dlog10: return Matcher::has_match_rule(Op_Log10D) ? inline_math(id) :
    runtime_math(OptoRuntime::Math_D_D_Type(), FN_PTR(SharedRuntime::dlog10), "LOG10");

    // These intrinsics are supported on all hardware
  case vmIntrinsics::_dsqrt:  return Matcher::match_rule_supported(Op_SqrtD) ? inline_math(id) : false;
  case vmIntrinsics::_dabs:   return Matcher::has_match_rule(Op_AbsD)   ? inline_math(id) : false;

  case vmIntrinsics::_dexp:   return Matcher::has_match_rule(Op_ExpD)   ? inline_exp()    :
    runtime_math(OptoRuntime::Math_D_D_Type(),  FN_PTR(SharedRuntime::dexp),  "EXP");
  case vmIntrinsics::_dpow:   return Matcher::has_match_rule(Op_PowD)   ? inline_pow()    :
    runtime_math(OptoRuntime::Math_DD_D_Type(), FN_PTR(SharedRuntime::dpow),  "POW");
#undef FN_PTR

   // These intrinsics are not yet correctly implemented
  case vmIntrinsics::_datan2:
    return false;

  default:
    fatal_unexpected_iid(id);
    return false;
  }
}

static bool is_simple_name(Node* n) {
  return (n->req() == 1         // constant
          || (n->is_Type() && n->as_Type()->type()->singleton())
          || n->is_Proj()       // parameter or return value
          || n->is_Phi()        // local of some sort
          );
}

//----------------------------inline_min_max-----------------------------------
bool LibraryCallKit::inline_min_max(vmIntrinsics::ID id) {
  set_result(generate_min_max(id, argument(0), argument(1)));
  return true;
}

void LibraryCallKit::inline_math_mathExact(Node* math, Node *test) {
  Node* bol = _gvn.transform( new (C) BoolNode(test, BoolTest::overflow) );
  IfNode* check = create_and_map_if(control(), bol, PROB_UNLIKELY_MAG(3), COUNT_UNKNOWN);
  Node* fast_path = _gvn.transform( new (C) IfFalseNode(check));
  Node* slow_path = _gvn.transform( new (C) IfTrueNode(check) );

  {
    PreserveJVMState pjvms(this);
    PreserveReexecuteState preexecs(this);
    jvms()->set_should_reexecute(true);

    set_control(slow_path);
    set_i_o(i_o());

    uncommon_trap(Deoptimization::Reason_intrinsic,
                  Deoptimization::Action_none);
  }

  set_control(fast_path);
  set_result(math);
}

template <typename OverflowOp>
bool LibraryCallKit::inline_math_overflow(Node* arg1, Node* arg2) {
  typedef typename OverflowOp::MathOp MathOp;

  MathOp* mathOp = new(C) MathOp(arg1, arg2);
  Node* operation = _gvn.transform( mathOp );
  Node* ofcheck = _gvn.transform( new(C) OverflowOp(arg1, arg2) );
  inline_math_mathExact(operation, ofcheck);
  return true;
}

bool LibraryCallKit::inline_math_addExactI(bool is_increment) {
  return inline_math_overflow<OverflowAddINode>(argument(0), is_increment ? intcon(1) : argument(1));
}

bool LibraryCallKit::inline_math_addExactL(bool is_increment) {
  return inline_math_overflow<OverflowAddLNode>(argument(0), is_increment ? longcon(1) : argument(2));
}

bool LibraryCallKit::inline_math_subtractExactI(bool is_decrement) {
  return inline_math_overflow<OverflowSubINode>(argument(0), is_decrement ? intcon(1) : argument(1));
}

bool LibraryCallKit::inline_math_subtractExactL(bool is_decrement) {
  return inline_math_overflow<OverflowSubLNode>(argument(0), is_decrement ? longcon(1) : argument(2));
}

bool LibraryCallKit::inline_math_negateExactI() {
  return inline_math_overflow<OverflowSubINode>(intcon(0), argument(0));
}

bool LibraryCallKit::inline_math_negateExactL() {
  return inline_math_overflow<OverflowSubLNode>(longcon(0), argument(0));
}

bool LibraryCallKit::inline_math_multiplyExactI() {
  return inline_math_overflow<OverflowMulINode>(argument(0), argument(1));
}

bool LibraryCallKit::inline_math_multiplyExactL() {
  return inline_math_overflow<OverflowMulLNode>(argument(0), argument(2));
}

Node*
LibraryCallKit::generate_min_max(vmIntrinsics::ID id, Node* x0, Node* y0) {
  // These are the candidate return value:
  Node* xvalue = x0;
  Node* yvalue = y0;

  if (xvalue == yvalue) {
    return xvalue;
  }

  bool want_max = (id == vmIntrinsics::_max);

  const TypeInt* txvalue = _gvn.type(xvalue)->isa_int();
  const TypeInt* tyvalue = _gvn.type(yvalue)->isa_int();
  if (txvalue == NULL || tyvalue == NULL)  return top();
  // This is not really necessary, but it is consistent with a
  // hypothetical MaxINode::Value method:
  int widen = MAX2(txvalue->_widen, tyvalue->_widen);

  // %%% This folding logic should (ideally) be in a different place.
  // Some should be inside IfNode, and there to be a more reliable
  // transformation of ?: style patterns into cmoves.  We also want
  // more powerful optimizations around cmove and min/max.

  // Try to find a dominating comparison of these guys.
  // It can simplify the index computation for Arrays.copyOf
  // and similar uses of System.arraycopy.
  // First, compute the normalized version of CmpI(x, y).
  int   cmp_op = Op_CmpI;
  Node* xkey = xvalue;
  Node* ykey = yvalue;
  Node* ideal_cmpxy = _gvn.transform(new(C) CmpINode(xkey, ykey));
  if (ideal_cmpxy->is_Cmp()) {
    // E.g., if we have CmpI(length - offset, count),
    // it might idealize to CmpI(length, count + offset)
    cmp_op = ideal_cmpxy->Opcode();
    xkey = ideal_cmpxy->in(1);
    ykey = ideal_cmpxy->in(2);
  }

  // Start by locating any relevant comparisons.
  Node* start_from = (xkey->outcnt() < ykey->outcnt()) ? xkey : ykey;
  Node* cmpxy = NULL;
  Node* cmpyx = NULL;
  for (DUIterator_Fast kmax, k = start_from->fast_outs(kmax); k < kmax; k++) {
    Node* cmp = start_from->fast_out(k);
    if (cmp->outcnt() > 0 &&            // must have prior uses
        cmp->in(0) == NULL &&           // must be context-independent
        cmp->Opcode() == cmp_op) {      // right kind of compare
      if (cmp->in(1) == xkey && cmp->in(2) == ykey)  cmpxy = cmp;
      if (cmp->in(1) == ykey && cmp->in(2) == xkey)  cmpyx = cmp;
    }
  }

  const int NCMPS = 2;
  Node* cmps[NCMPS] = { cmpxy, cmpyx };
  int cmpn;
  for (cmpn = 0; cmpn < NCMPS; cmpn++) {
    if (cmps[cmpn] != NULL)  break;     // find a result
  }
  if (cmpn < NCMPS) {
    // Look for a dominating test that tells us the min and max.
    int depth = 0;                // Limit search depth for speed
    Node* dom = control();
    for (; dom != NULL; dom = IfNode::up_one_dom(dom, true)) {
      if (++depth >= 100)  break;
      Node* ifproj = dom;
      if (!ifproj->is_Proj())  continue;
      Node* iff = ifproj->in(0);
      if (!iff->is_If())  continue;
      Node* bol = iff->in(1);
      if (!bol->is_Bool())  continue;
      Node* cmp = bol->in(1);
      if (cmp == NULL)  continue;
      for (cmpn = 0; cmpn < NCMPS; cmpn++)
        if (cmps[cmpn] == cmp)  break;
      if (cmpn == NCMPS)  continue;
      BoolTest::mask btest = bol->as_Bool()->_test._test;
      if (ifproj->is_IfFalse())  btest = BoolTest(btest).negate();
      if (cmp->in(1) == ykey)    btest = BoolTest(btest).commute();
      // At this point, we know that 'x btest y' is true.
      switch (btest) {
      case BoolTest::eq:
        // They are proven equal, so we can collapse the min/max.
        // Either value is the answer.  Choose the simpler.
        if (is_simple_name(yvalue) && !is_simple_name(xvalue))
          return yvalue;
        return xvalue;
      case BoolTest::lt:          // x < y
      case BoolTest::le:          // x <= y
        return (want_max ? yvalue : xvalue);
      case BoolTest::gt:          // x > y
      case BoolTest::ge:          // x >= y
        return (want_max ? xvalue : yvalue);
      }
    }
  }

  // We failed to find a dominating test.
  // Let's pick a test that might GVN with prior tests.
  Node*          best_bol   = NULL;
  BoolTest::mask best_btest = BoolTest::illegal;
  for (cmpn = 0; cmpn < NCMPS; cmpn++) {
    Node* cmp = cmps[cmpn];
    if (cmp == NULL)  continue;
    for (DUIterator_Fast jmax, j = cmp->fast_outs(jmax); j < jmax; j++) {
      Node* bol = cmp->fast_out(j);
      if (!bol->is_Bool())  continue;
      BoolTest::mask btest = bol->as_Bool()->_test._test;
      if (btest == BoolTest::eq || btest == BoolTest::ne)  continue;
      if (cmp->in(1) == ykey)   btest = BoolTest(btest).commute();
      if (bol->outcnt() > (best_bol == NULL ? 0 : best_bol->outcnt())) {
        best_bol   = bol->as_Bool();
        best_btest = btest;
      }
    }
  }

  Node* answer_if_true  = NULL;
  Node* answer_if_false = NULL;
  switch (best_btest) {
  default:
    if (cmpxy == NULL)
      cmpxy = ideal_cmpxy;
    best_bol = _gvn.transform(new(C) BoolNode(cmpxy, BoolTest::lt));
    // and fall through:
  case BoolTest::lt:          // x < y
  case BoolTest::le:          // x <= y
    answer_if_true  = (want_max ? yvalue : xvalue);
    answer_if_false = (want_max ? xvalue : yvalue);
    break;
  case BoolTest::gt:          // x > y
  case BoolTest::ge:          // x >= y
    answer_if_true  = (want_max ? xvalue : yvalue);
    answer_if_false = (want_max ? yvalue : xvalue);
    break;
  }

  jint hi, lo;
  if (want_max) {
    // We can sharpen the minimum.
    hi = MAX2(txvalue->_hi, tyvalue->_hi);
    lo = MAX2(txvalue->_lo, tyvalue->_lo);
  } else {
    // We can sharpen the maximum.
    hi = MIN2(txvalue->_hi, tyvalue->_hi);
    lo = MIN2(txvalue->_lo, tyvalue->_lo);
  }

  // Use a flow-free graph structure, to avoid creating excess control edges
  // which could hinder other optimizations.
  // Since Math.min/max is often used with arraycopy, we want
  // tightly_coupled_allocation to be able to see beyond min/max expressions.
  Node* cmov = CMoveNode::make(C, NULL, best_bol,
                               answer_if_false, answer_if_true,
                               TypeInt::make(lo, hi, widen));

  return _gvn.transform(cmov);

  /*
  // This is not as desirable as it may seem, since Min and Max
  // nodes do not have a full set of optimizations.
  // And they would interfere, anyway, with 'if' optimizations
  // and with CMoveI canonical forms.
  switch (id) {
  case vmIntrinsics::_min:
    result_val = _gvn.transform(new (C, 3) MinINode(x,y)); break;
  case vmIntrinsics::_max:
    result_val = _gvn.transform(new (C, 3) MaxINode(x,y)); break;
  default:
    ShouldNotReachHere();
  }
  */
}

inline int
LibraryCallKit::classify_unsafe_addr(Node* &base, Node* &offset) {
  const TypePtr* base_type = TypePtr::NULL_PTR;
  if (base != NULL)  base_type = _gvn.type(base)->isa_ptr();
  if (base_type == NULL) {
    // Unknown type.
    return Type::AnyPtr;
  } else if (base_type == TypePtr::NULL_PTR) {
    // Since this is a NULL+long form, we have to switch to a rawptr.
    base   = _gvn.transform(new (C) CastX2PNode(offset));
    offset = MakeConX(0);
    return Type::RawPtr;
  } else if (base_type->base() == Type::RawPtr) {
    return Type::RawPtr;
  } else if (base_type->isa_oopptr()) {
    // Base is never null => always a heap address.
    if (base_type->ptr() == TypePtr::NotNull) {
      return Type::OopPtr;
    }
    // Offset is small => always a heap address.
    const TypeX* offset_type = _gvn.type(offset)->isa_intptr_t();
    if (offset_type != NULL &&
        base_type->offset() == 0 &&     // (should always be?)
        offset_type->_lo >= 0 &&
        !MacroAssembler::needs_explicit_null_check(offset_type->_hi)) {
      return Type::OopPtr;
    }
    // Otherwise, it might either be oop+off or NULL+addr.
    return Type::AnyPtr;
  } else {
    // No information:
    return Type::AnyPtr;
  }
}

inline Node* LibraryCallKit::make_unsafe_address(Node* base, Node* offset) {
  int kind = classify_unsafe_addr(base, offset);
  if (kind == Type::RawPtr) {
    return basic_plus_adr(top(), base, offset);
  } else {
    return basic_plus_adr(base, offset);
  }
}

//--------------------------inline_number_methods-----------------------------
// inline int     Integer.numberOfLeadingZeros(int)
// inline int        Long.numberOfLeadingZeros(long)
//
// inline int     Integer.numberOfTrailingZeros(int)
// inline int        Long.numberOfTrailingZeros(long)
//
// inline int     Integer.bitCount(int)
// inline int        Long.bitCount(long)
//
// inline char  Character.reverseBytes(char)
// inline short     Short.reverseBytes(short)
// inline int     Integer.reverseBytes(int)
// inline long       Long.reverseBytes(long)
bool LibraryCallKit::inline_number_methods(vmIntrinsics::ID id) {
  Node* arg = argument(0);
  Node* n = NULL;
  switch (id) {
  case vmIntrinsics::_numberOfLeadingZeros_i:   n = new (C) CountLeadingZerosINode( arg);  break;
  case vmIntrinsics::_numberOfLeadingZeros_l:   n = new (C) CountLeadingZerosLNode( arg);  break;
  case vmIntrinsics::_numberOfTrailingZeros_i:  n = new (C) CountTrailingZerosINode(arg);  break;
  case vmIntrinsics::_numberOfTrailingZeros_l:  n = new (C) CountTrailingZerosLNode(arg);  break;
  case vmIntrinsics::_bitCount_i:               n = new (C) PopCountINode(          arg);  break;
  case vmIntrinsics::_bitCount_l:               n = new (C) PopCountLNode(          arg);  break;
  case vmIntrinsics::_reverseBytes_c:           n = new (C) ReverseBytesUSNode(0,   arg);  break;
  case vmIntrinsics::_reverseBytes_s:           n = new (C) ReverseBytesSNode( 0,   arg);  break;
  case vmIntrinsics::_reverseBytes_i:           n = new (C) ReverseBytesINode( 0,   arg);  break;
  case vmIntrinsics::_reverseBytes_l:           n = new (C) ReverseBytesLNode( 0,   arg);  break;
  default:  fatal_unexpected_iid(id);  break;
  }
  set_result(_gvn.transform(n));
  return true;
}

//----------------------------inline_unsafe_access----------------------------

const static BasicType T_ADDRESS_HOLDER = T_LONG;

// Helper that guards and inserts a pre-barrier.
void LibraryCallKit::insert_pre_barrier(Node* base_oop, Node* offset,
                                        Node* pre_val, bool need_mem_bar) {
  // We could be accessing the referent field of a reference object. If so, when G1
  // is enabled, we need to log the value in the referent field in an SATB buffer.
  // This routine performs some compile time filters and generates suitable
  // runtime filters that guard the pre-barrier code.
  // Also add memory barrier for non volatile load from the referent field
  // to prevent commoning of loads across safepoint.
  if (!UseG1GC && !need_mem_bar)
    return;

  // Some compile time checks.

  // If offset is a constant, is it java_lang_ref_Reference::_reference_offset?
  const TypeX* otype = offset->find_intptr_t_type();
  if (otype != NULL && otype->is_con() &&
      otype->get_con() != java_lang_ref_Reference::referent_offset) {
    // Constant offset but not the reference_offset so just return
    return;
  }

  // We only need to generate the runtime guards for instances.
  const TypeOopPtr* btype = base_oop->bottom_type()->isa_oopptr();
  if (btype != NULL) {
    if (btype->isa_aryptr()) {
      // Array type so nothing to do
      return;
    }

    const TypeInstPtr* itype = btype->isa_instptr();
    if (itype != NULL) {
      // Can the klass of base_oop be statically determined to be
      // _not_ a sub-class of Reference and _not_ Object?
      ciKlass* klass = itype->klass();
      if ( klass->is_loaded() &&
          !klass->is_subtype_of(env()->Reference_klass()) &&
          !env()->Object_klass()->is_subtype_of(klass)) {
        return;
      }
    }
  }

  // The compile time filters did not reject base_oop/offset so
  // we need to generate the following runtime filters
  //
  // if (offset == java_lang_ref_Reference::_reference_offset) {
  //   if (instance_of(base, java.lang.ref.Reference)) {
  //     pre_barrier(_, pre_val, ...);
  //   }
  // }

  float likely   = PROB_LIKELY(  0.999);
  float unlikely = PROB_UNLIKELY(0.999);

  IdealKit ideal(this);
#define __ ideal.

  Node* referent_off = __ ConX(java_lang_ref_Reference::referent_offset);

  __ if_then(offset, BoolTest::eq, referent_off, unlikely); {
      // Update graphKit memory and control from IdealKit.
      sync_kit(ideal);

      Node* ref_klass_con = makecon(TypeKlassPtr::make(env()->Reference_klass()));
      Node* is_instof = gen_instanceof(base_oop, ref_klass_con);

      // Update IdealKit memory and control from graphKit.
      __ sync_kit(this);

      Node* one = __ ConI(1);
      // is_instof == 0 if base_oop == NULL
      __ if_then(is_instof, BoolTest::eq, one, unlikely); {

        // Update graphKit from IdeakKit.
        sync_kit(ideal);

        // Use the pre-barrier to record the value in the referent field
        pre_barrier(false /* do_load */,
                    __ ctrl(),
                    NULL /* obj */, NULL /* adr */, max_juint /* alias_idx */, NULL /* val */, NULL /* val_type */,
                    pre_val /* pre_val */,
                    T_OBJECT);
        if (need_mem_bar) {
          // Add memory barrier to prevent commoning reads from this field
          // across safepoint since GC can change its value.
          insert_mem_bar(Op_MemBarCPUOrder);
        }
        // Update IdealKit from graphKit.
        __ sync_kit(this);

      } __ end_if(); // _ref_type != ref_none
  } __ end_if(); // offset == referent_offset

  // Final sync IdealKit and GraphKit.
  final_sync(ideal);
#undef __
}


// Interpret Unsafe.fieldOffset cookies correctly:
extern jlong Unsafe_field_offset_to_byte_offset(jlong field_offset);

const TypeOopPtr* LibraryCallKit::sharpen_unsafe_type(Compile::AliasType* alias_type, const TypePtr *adr_type, bool is_native_ptr) {
  // Attempt to infer a sharper value type from the offset and base type.
  ciKlass* sharpened_klass = NULL;

  // See if it is an instance field, with an object type.
  if (alias_type->field() != NULL) {
    assert(!is_native_ptr, "native pointer op cannot use a java address");
    if (alias_type->field()->type()->is_klass()) {
      sharpened_klass = alias_type->field()->type()->as_klass();
    }
  }

  // See if it is a narrow oop array.
  if (adr_type->isa_aryptr()) {
    if (adr_type->offset() >= objArrayOopDesc::base_offset_in_bytes()) {
      const TypeOopPtr *elem_type = adr_type->is_aryptr()->elem()->isa_oopptr();
      if (elem_type != NULL) {
        sharpened_klass = elem_type->klass();
      }
    }
  }

  // The sharpened class might be unloaded if there is no class loader
  // contraint in place.
  if (sharpened_klass != NULL && sharpened_klass->is_loaded()) {
    const TypeOopPtr* tjp = TypeOopPtr::make_from_klass(sharpened_klass);

#ifndef PRODUCT
    if (C->print_intrinsics() || C->print_inlining()) {
      tty->print("  from base type:  ");  adr_type->dump(); tty->cr();
      tty->print("  sharpened value: ");  tjp->dump();      tty->cr();
    }
#endif
    // Sharpen the value type.
    return tjp;
  }
  return NULL;
}

bool LibraryCallKit::inline_unsafe_access(bool is_native_ptr, bool is_store, BasicType type, bool is_volatile, bool unaligned) {
  if (callee()->is_static())  return false;  // caller must have the capability!
  assert(type != T_OBJECT || !unaligned, "unaligned access not supported with object type");

#ifndef PRODUCT
  {
    ResourceMark rm;
    // Check the signatures.
    ciSignature* sig = callee()->signature();
#ifdef ASSERT
    if (!is_store) {
      // Object getObject(Object base, int/long offset), etc.
      BasicType rtype = sig->return_type()->basic_type();
      if (rtype == T_ADDRESS_HOLDER && callee()->name() == ciSymbol::getAddress_name())
          rtype = T_ADDRESS;  // it is really a C void*
      assert(rtype == type, "getter must return the expected value");
      if (!is_native_ptr) {
        assert(sig->count() == 2, "oop getter has 2 arguments");
        assert(sig->type_at(0)->basic_type() == T_OBJECT, "getter base is object");
        assert(sig->type_at(1)->basic_type() == T_LONG, "getter offset is correct");
      } else {
        assert(sig->count() == 1, "native getter has 1 argument");
        assert(sig->type_at(0)->basic_type() == T_LONG, "getter base is long");
      }
    } else {
      // void putObject(Object base, int/long offset, Object x), etc.
      assert(sig->return_type()->basic_type() == T_VOID, "putter must not return a value");
      if (!is_native_ptr) {
        assert(sig->count() == 3, "oop putter has 3 arguments");
        assert(sig->type_at(0)->basic_type() == T_OBJECT, "putter base is object");
        assert(sig->type_at(1)->basic_type() == T_LONG, "putter offset is correct");
      } else {
        assert(sig->count() == 2, "native putter has 2 arguments");
        assert(sig->type_at(0)->basic_type() == T_LONG, "putter base is long");
      }
      BasicType vtype = sig->type_at(sig->count()-1)->basic_type();
      if (vtype == T_ADDRESS_HOLDER && callee()->name() == ciSymbol::putAddress_name())
        vtype = T_ADDRESS;  // it is really a C void*
      assert(vtype == type, "putter must accept the expected value");
    }
#endif // ASSERT
 }
#endif //PRODUCT

  C->set_has_unsafe_access(true);  // Mark eventual nmethod as "unsafe".

  Node* receiver = argument(0);  // type: oop

  // Build address expression.  See the code in inline_unsafe_prefetch.
  Node* adr;
  Node* heap_base_oop = top();
  Node* offset = top();
  Node* val;

  // The base is either a Java object or a value produced by Unsafe.staticFieldBase
  Node* base = argument(1);  // type: oop

  if (!is_native_ptr) {
    // The offset is a value produced by Unsafe.staticFieldOffset or Unsafe.objectFieldOffset
    offset = argument(2);  // type: long
    // We currently rely on the cookies produced by Unsafe.xxxFieldOffset
    // to be plain byte offsets, which are also the same as those accepted
    // by oopDesc::field_base.
    assert(Unsafe_field_offset_to_byte_offset(11) == 11,
           "fieldOffset must be byte-scaled");
    // 32-bit machines ignore the high half!
    offset = ConvL2X(offset);
    adr = make_unsafe_address(base, offset);
    heap_base_oop = base;
    val = is_store ? argument(4) : NULL;
  } else {
    Node* ptr = argument(1);  // type: long
    ptr = ConvL2X(ptr);  // adjust Java long to machine word
    adr = make_unsafe_address(NULL, ptr);
    val = is_store ? argument(3) : NULL;
  }

  if ((_gvn.type(base)->isa_ptr() == TypePtr::NULL_PTR) && type == T_OBJECT) {
    return false; // off-heap oop accesses are not supported
  }

  const TypePtr *adr_type = _gvn.type(adr)->isa_ptr();

  // Try to categorize the address.
  Compile::AliasType* alias_type = C->alias_type(adr_type);
  assert(alias_type->index() != Compile::AliasIdxBot, "no bare pointers here");

  if (alias_type->adr_type() == TypeInstPtr::KLASS ||
      alias_type->adr_type() == TypeAryPtr::RANGE) {
    return false; // not supported
  }

  bool mismatched = false;
  BasicType bt = alias_type->basic_type();
  if (bt != T_ILLEGAL) {
    assert(alias_type->adr_type()->is_oopptr(), "should be on-heap access");
    if (bt == T_BYTE && adr_type->isa_aryptr()) {
      // Alias type doesn't differentiate between byte[] and boolean[]).
      // Use address type to get the element type.
      bt = adr_type->is_aryptr()->elem()->array_element_basic_type();
    }
    if (bt == T_ARRAY || bt == T_NARROWOOP) {
      // accessing an array field with getObject is not a mismatch
      bt = T_OBJECT;
    }
    if ((bt == T_OBJECT) != (type == T_OBJECT)) {
      // Don't intrinsify mismatched object accesses
      return false;
    }
    mismatched = (bt != type);
  }

  assert(!mismatched || alias_type->adr_type()->is_oopptr(), "off-heap access can't be mismatched");

  // First guess at the value type.
  const Type *value_type = Type::get_const_basic_type(type);

  // We will need memory barriers unless we can determine a unique
  // alias category for this reference.  (Note:  If for some reason
  // the barriers get omitted and the unsafe reference begins to "pollute"
  // the alias analysis of the rest of the graph, either Compile::can_alias
  // or Compile::must_alias will throw a diagnostic assert.)
  bool need_mem_bar = (alias_type->adr_type() == TypeOopPtr::BOTTOM);

  // If we are reading the value of the referent field of a Reference
  // object (either by using Unsafe directly or through reflection)
  // then, if G1 is enabled, we need to record the referent in an
  // SATB log buffer using the pre-barrier mechanism.
  // Also we need to add memory barrier to prevent commoning reads
  // from this field across safepoint since GC can change its value.
  bool need_read_barrier = !is_native_ptr && !is_store &&
                           offset != top() && heap_base_oop != top();

  if (!is_store && type == T_OBJECT) {
    const TypeOopPtr* tjp = sharpen_unsafe_type(alias_type, adr_type, is_native_ptr);
    if (tjp != NULL) {
      value_type = tjp;
    }
  }

  receiver = null_check(receiver);
  if (stopped()) {
    return true;
  }
  // Heap pointers get a null-check from the interpreter,
  // as a courtesy.  However, this is not guaranteed by Unsafe,
  // and it is not possible to fully distinguish unintended nulls
  // from intended ones in this API.

  Node* load = NULL;
  Node* store = NULL;
  Node* leading_membar = NULL;
  if (is_volatile) {
    // We need to emit leading and trailing CPU membars (see below) in
    // addition to memory membars when is_volatile. This is a little
    // too strong, but avoids the need to insert per-alias-type
    // volatile membars (for stores; compare Parse::do_put_xxx), which
    // we cannot do effectively here because we probably only have a
    // rough approximation of type.
    need_mem_bar = true;
    // For Stores, place a memory ordering barrier now.
    if (is_store) {
      leading_membar = insert_mem_bar(Op_MemBarRelease);
    } else {
      if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
        leading_membar = insert_mem_bar(Op_MemBarVolatile);
      }
    }
  }

  // Memory barrier to prevent normal and 'unsafe' accesses from
  // bypassing each other.  Happens after null checks, so the
  // exception paths do not take memory state from the memory barrier,
  // so there's no problems making a strong assert about mixing users
  // of safe & unsafe memory.  Otherwise fails in a CTW of rt.jar
  // around 5701, class sun/reflect/UnsafeBooleanFieldAccessorImpl.
  if (need_mem_bar) insert_mem_bar(Op_MemBarCPUOrder);

  if (!is_store) {
    MemNode::MemOrd mo = is_volatile ? MemNode::acquire : MemNode::unordered;
    // To be valid, unsafe loads may depend on other conditions than
    // the one that guards them: pin the Load node
    load = make_load(control(), adr, value_type, type, adr_type, mo, LoadNode::Pinned, is_volatile, unaligned, mismatched);
    // load value
    switch (type) {
    case T_BOOLEAN:
    case T_CHAR:
    case T_BYTE:
    case T_SHORT:
    case T_INT:
    case T_LONG:
    case T_FLOAT:
    case T_DOUBLE:
      break;
    case T_OBJECT:
      if (need_read_barrier) {
        insert_pre_barrier(heap_base_oop, offset, load, !(is_volatile || need_mem_bar));
      }
      break;
    case T_ADDRESS:
      // Cast to an int type.
      load = _gvn.transform(new (C) CastP2XNode(NULL, load));
      load = ConvX2UL(load);
      break;
    default:
      fatal(err_msg_res("unexpected type %d: %s", type, type2name(type)));
      break;
    }
    // The load node has the control of the preceding MemBarCPUOrder.  All
    // following nodes will have the control of the MemBarCPUOrder inserted at
    // the end of this method.  So, pushing the load onto the stack at a later
    // point is fine.
    set_result(load);
  } else {
    // place effect of store into memory
    switch (type) {
    case T_DOUBLE:
      val = dstore_rounding(val);
      break;
    case T_ADDRESS:
      // Repackage the long as a pointer.
      val = ConvL2X(val);
      val = _gvn.transform(new (C) CastX2PNode(val));
      break;
    }

    MemNode::MemOrd mo = is_volatile ? MemNode::release : MemNode::unordered;
    if (type == T_OBJECT ) {
      store = store_oop_to_unknown(control(), heap_base_oop, adr, adr_type, val, type, mo, mismatched);
    } else {
      store = store_to_memory(control(), adr, val, type, adr_type, mo, is_volatile, unaligned, mismatched);
    }
  }

  if (is_volatile) {
    if (!is_store) {
      Node* mb = insert_mem_bar(Op_MemBarAcquire, load);
      mb->as_MemBar()->set_trailing_load();
    } else {
      if (!support_IRIW_for_not_multiple_copy_atomic_cpu) {
        Node* mb = insert_mem_bar(Op_MemBarVolatile, store);
        MemBarNode::set_store_pair(leading_membar->as_MemBar(), mb->as_MemBar());
      }
    }
  }

  if (need_mem_bar) insert_mem_bar(Op_MemBarCPUOrder);

  return true;
}

//----------------------------inline_unsafe_prefetch----------------------------

bool LibraryCallKit::inline_unsafe_prefetch(bool is_native_ptr, bool is_store, bool is_static) {
#ifndef PRODUCT
  {
    ResourceMark rm;
    // Check the signatures.
    ciSignature* sig = callee()->signature();
#ifdef ASSERT
    // Object getObject(Object base, int/long offset), etc.
    BasicType rtype = sig->return_type()->basic_type();
    if (!is_native_ptr) {
      assert(sig->count() == 2, "oop prefetch has 2 arguments");
      assert(sig->type_at(0)->basic_type() == T_OBJECT, "prefetch base is object");
      assert(sig->type_at(1)->basic_type() == T_LONG, "prefetcha offset is correct");
    } else {
      assert(sig->count() == 1, "native prefetch has 1 argument");
      assert(sig->type_at(0)->basic_type() == T_LONG, "prefetch base is long");
    }
#endif // ASSERT
  }
#endif // !PRODUCT

  C->set_has_unsafe_access(true);  // Mark eventual nmethod as "unsafe".

  const int idx = is_static ? 0 : 1;
  if (!is_static) {
    null_check_receiver();
    if (stopped()) {
      return true;
    }
  }

  // Build address expression.  See the code in inline_unsafe_access.
  Node *adr;
  if (!is_native_ptr) {
    // The base is either a Java object or a value produced by Unsafe.staticFieldBase
    Node* base   = argument(idx + 0);  // type: oop
    // The offset is a value produced by Unsafe.staticFieldOffset or Unsafe.objectFieldOffset
    Node* offset = argument(idx + 1);  // type: long
    // We currently rely on the cookies produced by Unsafe.xxxFieldOffset
    // to be plain byte offsets, which are also the same as those accepted
    // by oopDesc::field_base.
    assert(Unsafe_field_offset_to_byte_offset(11) == 11,
           "fieldOffset must be byte-scaled");
    // 32-bit machines ignore the high half!
    offset = ConvL2X(offset);
    adr = make_unsafe_address(base, offset);
  } else {
    Node* ptr = argument(idx + 0);  // type: long
    ptr = ConvL2X(ptr);  // adjust Java long to machine word
    adr = make_unsafe_address(NULL, ptr);
  }

  // Generate the read or write prefetch
  Node *prefetch;
  if (is_store) {
    prefetch = new (C) PrefetchWriteNode(i_o(), adr);
  } else {
    prefetch = new (C) PrefetchReadNode(i_o(), adr);
  }
  prefetch->init_req(0, control());
  set_i_o(_gvn.transform(prefetch));

  return true;
}

//----------------------------inline_unsafe_load_store----------------------------
// This method serves a couple of different customers (depending on LoadStoreKind):
//
// LS_cmpxchg:
//   public final native boolean compareAndSwapObject(Object o, long offset, Object expected, Object x);
//   public final native boolean compareAndSwapInt(   Object o, long offset, int    expected, int    x);
//   public final native boolean compareAndSwapLong(  Object o, long offset, long   expected, long   x);
//
// LS_xadd:
//   public int  getAndAddInt( Object o, long offset, int  delta)
//   public long getAndAddLong(Object o, long offset, long delta)
//
// LS_xchg:
//   int    getAndSet(Object o, long offset, int    newValue)
//   long   getAndSet(Object o, long offset, long   newValue)
//   Object getAndSet(Object o, long offset, Object newValue)
//
bool LibraryCallKit::inline_unsafe_load_store(BasicType type, LoadStoreKind kind) {
  // This basic scheme here is the same as inline_unsafe_access, but
  // differs in enough details that combining them would make the code
  // overly confusing.  (This is a true fact! I originally combined
  // them, but even I was confused by it!) As much code/comments as
  // possible are retained from inline_unsafe_access though to make
  // the correspondences clearer. - dl

  if (callee()->is_static())  return false;  // caller must have the capability!

#ifndef PRODUCT
  BasicType rtype;
  {
    ResourceMark rm;
    // Check the signatures.
    ciSignature* sig = callee()->signature();
    rtype = sig->return_type()->basic_type();
    if (kind == LS_xadd || kind == LS_xchg) {
      // Check the signatures.
#ifdef ASSERT
      assert(rtype == type, "get and set must return the expected type");
      assert(sig->count() == 3, "get and set has 3 arguments");
      assert(sig->type_at(0)->basic_type() == T_OBJECT, "get and set base is object");
      assert(sig->type_at(1)->basic_type() == T_LONG, "get and set offset is long");
      assert(sig->type_at(2)->basic_type() == type, "get and set must take expected type as new value/delta");
#endif // ASSERT
    } else if (kind == LS_cmpxchg) {
      // Check the signatures.
#ifdef ASSERT
      assert(rtype == T_BOOLEAN, "CAS must return boolean");
      assert(sig->count() == 4, "CAS has 4 arguments");
      assert(sig->type_at(0)->basic_type() == T_OBJECT, "CAS base is object");
      assert(sig->type_at(1)->basic_type() == T_LONG, "CAS offset is long");
#endif // ASSERT
    } else {
      ShouldNotReachHere();
    }
  }
#endif //PRODUCT

  C->set_has_unsafe_access(true);  // Mark eventual nmethod as "unsafe".

  // Get arguments:
  Node* receiver = NULL;
  Node* base     = NULL;
  Node* offset   = NULL;
  Node* oldval   = NULL;
  Node* newval   = NULL;
  if (kind == LS_cmpxchg) {
    const bool two_slot_type = type2size[type] == 2;
    receiver = argument(0);  // type: oop
    base     = argument(1);  // type: oop
    offset   = argument(2);  // type: long
    oldval   = argument(4);  // type: oop, int, or long
    newval   = argument(two_slot_type ? 6 : 5);  // type: oop, int, or long
  } else if (kind == LS_xadd || kind == LS_xchg){
    receiver = argument(0);  // type: oop
    base     = argument(1);  // type: oop
    offset   = argument(2);  // type: long
    oldval   = NULL;
    newval   = argument(4);  // type: oop, int, or long
  }

  // Build field offset expression.
  // We currently rely on the cookies produced by Unsafe.xxxFieldOffset
  // to be plain byte offsets, which are also the same as those accepted
  // by oopDesc::field_base.
  assert(Unsafe_field_offset_to_byte_offset(11) == 11, "fieldOffset must be byte-scaled");
  // 32-bit machines ignore the high half of long offsets
  offset = ConvL2X(offset);
  Node* adr = make_unsafe_address(base, offset);
  const TypePtr *adr_type = _gvn.type(adr)->isa_ptr();

  Compile::AliasType* alias_type = C->alias_type(adr_type);
  BasicType bt = alias_type->basic_type();
  if (bt != T_ILLEGAL &&
      ((bt == T_OBJECT || bt == T_ARRAY) != (type == T_OBJECT))) {
    // Don't intrinsify mismatched object accesses.
    return false;
  }

  // For CAS, unlike inline_unsafe_access, there seems no point in
  // trying to refine types. Just use the coarse types here.
  assert(alias_type->index() != Compile::AliasIdxBot, "no bare pointers here");
  const Type *value_type = Type::get_const_basic_type(type);

  if (kind == LS_xchg && type == T_OBJECT) {
    const TypeOopPtr* tjp = sharpen_unsafe_type(alias_type, adr_type);
    if (tjp != NULL) {
      value_type = tjp;
    }
  }

  // Null check receiver.
  receiver = null_check(receiver);
  if (stopped()) {
    return true;
  }

  int alias_idx = C->get_alias_index(adr_type);

  // Memory-model-wise, a LoadStore acts like a little synchronized
  // block, so needs barriers on each side.  These don't translate
  // into actual barriers on most machines, but we still need rest of
  // compiler to respect ordering.

  Node* leading_membar = insert_mem_bar(Op_MemBarRelease);
  insert_mem_bar(Op_MemBarCPUOrder);

  // 4984716: MemBars must be inserted before this
  //          memory node in order to avoid a false
  //          dependency which will confuse the scheduler.
  Node *mem = memory(alias_idx);

  // For now, we handle only those cases that actually exist: ints,
  // longs, and Object. Adding others should be straightforward.
  Node* load_store = NULL;
  switch(type) {
  case T_INT:
    if (kind == LS_xadd) {
      load_store = _gvn.transform(new (C) GetAndAddINode(control(), mem, adr, newval, adr_type));
    } else if (kind == LS_xchg) {
      load_store = _gvn.transform(new (C) GetAndSetINode(control(), mem, adr, newval, adr_type));
    } else if (kind == LS_cmpxchg) {
      load_store = _gvn.transform(new (C) CompareAndSwapINode(control(), mem, adr, newval, oldval));
    } else {
      ShouldNotReachHere();
    }
    break;
  case T_LONG:
    if (kind == LS_xadd) {
      load_store = _gvn.transform(new (C) GetAndAddLNode(control(), mem, adr, newval, adr_type));
    } else if (kind == LS_xchg) {
      load_store = _gvn.transform(new (C) GetAndSetLNode(control(), mem, adr, newval, adr_type));
    } else if (kind == LS_cmpxchg) {
      load_store = _gvn.transform(new (C) CompareAndSwapLNode(control(), mem, adr, newval, oldval));
    } else {
      ShouldNotReachHere();
    }
    break;
  case T_OBJECT:
    // Transformation of a value which could be NULL pointer (CastPP #NULL)
    // could be delayed during Parse (for example, in adjust_map_after_if()).
    // Execute transformation here to avoid barrier generation in such case.
    if (_gvn.type(newval) == TypePtr::NULL_PTR)
      newval = _gvn.makecon(TypePtr::NULL_PTR);

    // Reference stores need a store barrier.
    if (kind == LS_xchg) {
      // If pre-barrier must execute before the oop store, old value will require do_load here.
      if (!can_move_pre_barrier()) {
        pre_barrier(true /* do_load*/,
                    control(), base, adr, alias_idx, newval, value_type->make_oopptr(),
                    NULL /* pre_val*/,
                    T_OBJECT);
      } // Else move pre_barrier to use load_store value, see below.
    } else if (kind == LS_cmpxchg) {
      // Same as for newval above:
      if (_gvn.type(oldval) == TypePtr::NULL_PTR) {
        oldval = _gvn.makecon(TypePtr::NULL_PTR);
      }
      // The only known value which might get overwritten is oldval.
      pre_barrier(false /* do_load */,
                  control(), NULL, NULL, max_juint, NULL, NULL,
                  oldval /* pre_val */,
                  T_OBJECT);
    } else {
      ShouldNotReachHere();
    }

#ifdef _LP64
    if (adr->bottom_type()->is_ptr_to_narrowoop()) {
      Node *newval_enc = _gvn.transform(new (C) EncodePNode(newval, newval->bottom_type()->make_narrowoop()));
      if (kind == LS_xchg) {
        load_store = _gvn.transform(new (C) GetAndSetNNode(control(), mem, adr,
                                                           newval_enc, adr_type, value_type->make_narrowoop()));
      } else {
        assert(kind == LS_cmpxchg, "wrong LoadStore operation");
        Node *oldval_enc = _gvn.transform(new (C) EncodePNode(oldval, oldval->bottom_type()->make_narrowoop()));
        load_store = _gvn.transform(new (C) CompareAndSwapNNode(control(), mem, adr,
                                                                newval_enc, oldval_enc));
      }
    } else
#endif
    {
      if (kind == LS_xchg) {
        load_store = _gvn.transform(new (C) GetAndSetPNode(control(), mem, adr, newval, adr_type, value_type->is_oopptr()));
      } else {
        assert(kind == LS_cmpxchg, "wrong LoadStore operation");
        load_store = _gvn.transform(new (C) CompareAndSwapPNode(control(), mem, adr, newval, oldval));
      }
    }
    post_barrier(control(), load_store, base, adr, alias_idx, newval, T_OBJECT, true);
    break;
  default:
    fatal(err_msg_res("unexpected type %d: %s", type, type2name(type)));
    break;
  }

  // SCMemProjNodes represent the memory state of a LoadStore. Their
  // main role is to prevent LoadStore nodes from being optimized away
  // when their results aren't used.
  Node* proj = _gvn.transform(new (C) SCMemProjNode(load_store));
  set_memory(proj, alias_idx);

  Node* access = load_store;

  if (type == T_OBJECT && kind == LS_xchg) {
#ifdef _LP64
    if (adr->bottom_type()->is_ptr_to_narrowoop()) {
      load_store = _gvn.transform(new (C) DecodeNNode(load_store, load_store->get_ptr_type()));
    }
#endif
    if (can_move_pre_barrier()) {
      // Don't need to load pre_val. The old value is returned by load_store.
      // The pre_barrier can execute after the xchg as long as no safepoint
      // gets inserted between them.
      pre_barrier(false /* do_load */,
                  control(), NULL, NULL, max_juint, NULL, NULL,
                  load_store /* pre_val */,
                  T_OBJECT);
    }
  }

  // Add the trailing membar surrounding the access
  insert_mem_bar(Op_MemBarCPUOrder);
  Node* mb = insert_mem_bar(Op_MemBarAcquire, access);
  MemBarNode::set_load_store_pair(leading_membar->as_MemBar(), mb->as_MemBar());

  assert(type2size[load_store->bottom_type()->basic_type()] == type2size[rtype], "result type should match");
  set_result(load_store);
  return true;
}

//----------------------------inline_unsafe_ordered_store----------------------
// public native void sun.misc.Unsafe.putOrderedObject(Object o, long offset, Object x);
// public native void sun.misc.Unsafe.putOrderedInt(Object o, long offset, int x);
// public native void sun.misc.Unsafe.putOrderedLong(Object o, long offset, long x);
bool LibraryCallKit::inline_unsafe_ordered_store(BasicType type) {
  // This is another variant of inline_unsafe_access, differing in
  // that it always issues store-store ("release") barrier and ensures
  // store-atomicity (which only matters for "long").

  if (callee()->is_static())  return false;  // caller must have the capability!

#ifndef PRODUCT
  {
    ResourceMark rm;
    // Check the signatures.
    ciSignature* sig = callee()->signature();
#ifdef ASSERT
    BasicType rtype = sig->return_type()->basic_type();
    assert(rtype == T_VOID, "must return void");
    assert(sig->count() == 3, "has 3 arguments");
    assert(sig->type_at(0)->basic_type() == T_OBJECT, "base is object");
    assert(sig->type_at(1)->basic_type() == T_LONG, "offset is long");
#endif // ASSERT
  }
#endif //PRODUCT

  C->set_has_unsafe_access(true);  // Mark eventual nmethod as "unsafe".

  // Get arguments:
  Node* receiver = argument(0);  // type: oop
  Node* base     = argument(1);  // type: oop
  Node* offset   = argument(2);  // type: long
  Node* val      = argument(4);  // type: oop, int, or long

  // Null check receiver.
  receiver = null_check(receiver);
  if (stopped()) {
    return true;
  }

  // Build field offset expression.
  assert(Unsafe_field_offset_to_byte_offset(11) == 11, "fieldOffset must be byte-scaled");
  // 32-bit machines ignore the high half of long offsets
  offset = ConvL2X(offset);
  Node* adr = make_unsafe_address(base, offset);
  const TypePtr *adr_type = _gvn.type(adr)->isa_ptr();
  const Type *value_type = Type::get_const_basic_type(type);
  Compile::AliasType* alias_type = C->alias_type(adr_type);

  insert_mem_bar(Op_MemBarRelease);
  insert_mem_bar(Op_MemBarCPUOrder);
  // Ensure that the store is atomic for longs:
  const bool require_atomic_access = true;
  Node* store;
  if (type == T_OBJECT) // reference stores need a store barrier.
    store = store_oop_to_unknown(control(), base, adr, adr_type, val, type, MemNode::release);
  else {
    store = store_to_memory(control(), adr, val, type, adr_type, MemNode::release, require_atomic_access);
  }
  insert_mem_bar(Op_MemBarCPUOrder);
  return true;
}

bool LibraryCallKit::inline_unsafe_fence(vmIntrinsics::ID id) {
  // Regardless of form, don't allow previous ld/st to move down,
  // then issue acquire, release, or volatile mem_bar.
  insert_mem_bar(Op_MemBarCPUOrder);
  switch(id) {
    case vmIntrinsics::_loadFence:
      insert_mem_bar(Op_LoadFence);
      return true;
    case vmIntrinsics::_storeFence:
      insert_mem_bar(Op_StoreFence);
      return true;
    case vmIntrinsics::_fullFence:
      insert_mem_bar(Op_MemBarVolatile);
      return true;
    default:
      fatal_unexpected_iid(id);
      return false;
  }
}

bool LibraryCallKit::klass_needs_init_guard(Node* kls) {
  if (!kls->is_Con()) {
    return true;
  }
  const TypeKlassPtr* klsptr = kls->bottom_type()->isa_klassptr();
  if (klsptr == NULL) {
    return true;
  }
  ciInstanceKlass* ik = klsptr->klass()->as_instance_klass();
  // don't need a guard for a klass that is already initialized
  return !ik->is_initialized();
}

//----------------------------inline_unsafe_allocate---------------------------
// public native Object sun.misc.Unsafe.allocateInstance(Class<?> cls);
bool LibraryCallKit::inline_unsafe_allocate() {
  if (callee()->is_static())  return false;  // caller must have the capability!

  null_check_receiver();  // null-check, then ignore
  Node* cls = null_check(argument(1));
  if (stopped())  return true;

  Node* kls = load_klass_from_mirror(cls, false, NULL, 0);
  kls = null_check(kls);
  if (stopped())  return true;  // argument was like int.class

  Node* test = NULL;
  if (LibraryCallKit::klass_needs_init_guard(kls)) {
    // Note:  The argument might still be an illegal value like
    // Serializable.class or Object[].class.   The runtime will handle it.
    // But we must make an explicit check for initialization.
    Node* insp = basic_plus_adr(kls, in_bytes(InstanceKlass::init_state_offset()));
    // Use T_BOOLEAN for InstanceKlass::_init_state so the compiler
    // can generate code to load it as unsigned byte.
    Node* inst = make_load(NULL, insp, TypeInt::UBYTE, T_BOOLEAN, MemNode::unordered);
    Node* bits = intcon(InstanceKlass::fully_initialized);
    test = _gvn.transform(new (C) SubINode(inst, bits));
    // The 'test' is non-zero if we need to take a slow path.
  }

  Node* obj = new_instance(kls, test);
  set_result(obj);
  return true;
}

#ifdef JFR_HAVE_INTRINSICS
/*
 * oop -> myklass
 * myklass->trace_id |= USED
 * return myklass->trace_id & ~0x3
 */
bool LibraryCallKit::inline_native_classID() {
  Node* cls = null_check(argument(0), T_OBJECT);
  Node* kls = load_klass_from_mirror(cls, false, NULL, 0);
  kls = null_check(kls, T_OBJECT);

  ByteSize offset = KLASS_TRACE_ID_OFFSET;
  Node* insp = basic_plus_adr(kls, in_bytes(offset));
  Node* tvalue = make_load(NULL, insp, TypeLong::LONG, T_LONG, MemNode::unordered);

  Node* clsused = longcon(0x01l); // set the class bit
  Node* orl = _gvn.transform(new (C) OrLNode(tvalue, clsused));
  const TypePtr *adr_type = _gvn.type(insp)->isa_ptr();
  store_to_memory(control(), insp, orl, T_LONG, adr_type, MemNode::unordered);

#ifdef TRACE_ID_META_BITS
  Node* mbits = longcon(~TRACE_ID_META_BITS);
  tvalue = _gvn.transform(new (C) AndLNode(tvalue, mbits));
#endif
#ifdef TRACE_ID_SHIFT
  Node* cbits = intcon(TRACE_ID_SHIFT);
  tvalue = _gvn.transform(new (C) URShiftLNode(tvalue, cbits));
#endif

  set_result(tvalue);
  return true;
}

bool LibraryCallKit::inline_native_getEventWriter() {
  Node* tls_ptr = _gvn.transform(new (C) ThreadLocalNode());

  Node* jobj_ptr = basic_plus_adr(top(), tls_ptr,
                                  in_bytes(THREAD_LOCAL_WRITER_OFFSET_JFR)
                                  );

  Node* jobj = make_load(control(), jobj_ptr, TypeRawPtr::BOTTOM, T_ADDRESS, MemNode::unordered);

  Node* jobj_cmp_null = _gvn.transform( new (C) CmpPNode(jobj, null()) );
  Node* test_jobj_eq_null  = _gvn.transform( new (C) BoolNode(jobj_cmp_null, BoolTest::eq) );

  IfNode* iff_jobj_null =
    create_and_map_if(control(), test_jobj_eq_null, PROB_MIN, COUNT_UNKNOWN);

  enum { _normal_path = 1,
         _null_path = 2,
         PATH_LIMIT };

  RegionNode* result_rgn = new (C) RegionNode(PATH_LIMIT);
  PhiNode*    result_val = new (C) PhiNode(result_rgn, TypePtr::BOTTOM);

  Node* jobj_is_null = _gvn.transform(new (C) IfTrueNode(iff_jobj_null));
  result_rgn->init_req(_null_path, jobj_is_null);
  result_val->init_req(_null_path, null());

  Node* jobj_is_not_null = _gvn.transform(new (C) IfFalseNode(iff_jobj_null));
  result_rgn->init_req(_normal_path, jobj_is_not_null);

  Node* res = make_load(jobj_is_not_null, jobj, TypeInstPtr::NOTNULL, T_OBJECT, MemNode::unordered);
  result_val->init_req(_normal_path, res);

  set_result(result_rgn, result_val);

  return true;
}
#endif // JFR_HAVE_INTRINSICS

//------------------------inline_native_time_funcs--------------
// inline code for System.currentTimeMillis() and System.nanoTime()
// these have the same type and signature
bool LibraryCallKit::inline_native_time_funcs(address funcAddr, const char* funcName) {
  const TypeFunc* tf = OptoRuntime::void_long_Type();
  const TypePtr* no_memory_effects = NULL;
  Node* time = make_runtime_call(RC_LEAF, tf, funcAddr, funcName, no_memory_effects);
  Node* value = _gvn.transform(new (C) ProjNode(time, TypeFunc::Parms+0));
#ifdef ASSERT
  Node* value_top = _gvn.transform(new (C) ProjNode(time, TypeFunc::Parms+1));
  assert(value_top == top(), "second value must be top");
#endif
  set_result(value);
  return true;
}

//------------------------inline_native_currentThread------------------
bool LibraryCallKit::inline_native_currentThread() {
  Node* junk = NULL;
  set_result(generate_current_thread(junk));
  return true;
}

//------------------------inline_native_isInterrupted------------------
// private native boolean java.lang.Thread.isInterrupted(boolean ClearInterrupted);
bool LibraryCallKit::inline_native_isInterrupted() {
  // Add a fast path to t.isInterrupted(clear_int):
  //   (t == Thread.current() &&
  //    (!TLS._osthread._interrupted || WINDOWS_ONLY(false) NOT_WINDOWS(!clear_int)))
  //   ? TLS._osthread._interrupted : /*slow path:*/ t.isInterrupted(clear_int)
  // So, in the common case that the interrupt bit is false,
  // we avoid making a call into the VM.  Even if the interrupt bit
  // is true, if the clear_int argument is false, we avoid the VM call.
  // However, if the receiver is not currentThread, we must call the VM,
  // because there must be some locking done around the operation.

  // We only go to the fast case code if we pass two guards.
  // Paths which do not pass are accumulated in the slow_region.

  enum {
    no_int_result_path   = 1, // t == Thread.current() && !TLS._osthread._interrupted
    no_clear_result_path = 2, // t == Thread.current() &&  TLS._osthread._interrupted && !clear_int
    slow_result_path     = 3, // slow path: t.isInterrupted(clear_int)
    PATH_LIMIT
  };

  // Ensure that it's not possible to move the load of TLS._osthread._interrupted flag
  // out of the function.
  insert_mem_bar(Op_MemBarCPUOrder);

  RegionNode* result_rgn = new (C) RegionNode(PATH_LIMIT);
  PhiNode*    result_val = new (C) PhiNode(result_rgn, TypeInt::BOOL);

  RegionNode* slow_region = new (C) RegionNode(1);
  record_for_igvn(slow_region);

  // (a) Receiving thread must be the current thread.
  Node* rec_thr = argument(0);
  Node* tls_ptr = NULL;
  Node* cur_thr = generate_current_thread(tls_ptr);
  Node* cmp_thr = _gvn.transform(new (C) CmpPNode(cur_thr, rec_thr));
  Node* bol_thr = _gvn.transform(new (C) BoolNode(cmp_thr, BoolTest::ne));

  generate_slow_guard(bol_thr, slow_region);

  // (b) Interrupt bit on TLS must be false.
  Node* p = basic_plus_adr(top()/*!oop*/, tls_ptr, in_bytes(JavaThread::osthread_offset()));
  Node* osthread = make_load(NULL, p, TypeRawPtr::NOTNULL, T_ADDRESS, MemNode::unordered);
  p = basic_plus_adr(top()/*!oop*/, osthread, in_bytes(OSThread::interrupted_offset()));

  // Set the control input on the field _interrupted read to prevent it floating up.
  Node* int_bit = make_load(control(), p, TypeInt::BOOL, T_INT, MemNode::unordered);
  Node* cmp_bit = _gvn.transform(new (C) CmpINode(int_bit, intcon(0)));
  Node* bol_bit = _gvn.transform(new (C) BoolNode(cmp_bit, BoolTest::ne));

  IfNode* iff_bit = create_and_map_if(control(), bol_bit, PROB_UNLIKELY_MAG(3), COUNT_UNKNOWN);

  // First fast path:  if (!TLS._interrupted) return false;
  Node* false_bit = _gvn.transform(new (C) IfFalseNode(iff_bit));
  result_rgn->init_req(no_int_result_path, false_bit);
  result_val->init_req(no_int_result_path, intcon(0));

  // drop through to next case
  set_control( _gvn.transform(new (C) IfTrueNode(iff_bit)));

#ifndef TARGET_OS_FAMILY_windows
  // (c) Or, if interrupt bit is set and clear_int is false, use 2nd fast path.
  Node* clr_arg = argument(1);
  Node* cmp_arg = _gvn.transform(new (C) CmpINode(clr_arg, intcon(0)));
  Node* bol_arg = _gvn.transform(new (C) BoolNode(cmp_arg, BoolTest::ne));
  IfNode* iff_arg = create_and_map_if(control(), bol_arg, PROB_FAIR, COUNT_UNKNOWN);

  // Second fast path:  ... else if (!clear_int) return true;
  Node* false_arg = _gvn.transform(new (C) IfFalseNode(iff_arg));
  result_rgn->init_req(no_clear_result_path, false_arg);
  result_val->init_req(no_clear_result_path, intcon(1));

  // drop through to next case
  set_control( _gvn.transform(new (C) IfTrueNode(iff_arg)));
#else
  // To return true on Windows you must read the _interrupted field
  // and check the the event state i.e. take the slow path.
#endif // TARGET_OS_FAMILY_windows

  // (d) Otherwise, go to the slow path.
  slow_region->add_req(control());
  set_control( _gvn.transform(slow_region));

  if (stopped()) {
    // There is no slow path.
    result_rgn->init_req(slow_result_path, top());
    result_val->init_req(slow_result_path, top());
  } else {
    // non-virtual because it is a private non-static
    CallJavaNode* slow_call = generate_method_call(vmIntrinsics::_isInterrupted);

    Node* slow_val = set_results_for_java_call(slow_call);
    // this->control() comes from set_results_for_java_call

    Node* fast_io  = slow_call->in(TypeFunc::I_O);
    Node* fast_mem = slow_call->in(TypeFunc::Memory);

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值