sssssssss75

最新推荐文章于 2024-07-24 08:48:18 发布
luosha112
最新推荐文章于 2024-07-24 08:48:18 发布
阅读量656
点赞数
分类专栏： jvm源码文章标签： jvm
本文链接：https://blog.csdn.net/aa111sadsa/article/details/134131904
版权
jvm源码专栏收录该内容
18 篇文章 0 订阅
订阅专栏

//------------------------------do_split_if------------------------------------
// Found an If getting its condition-code input from a Phi in the same block.
// Split thru the Region.
void PhaseIdealLoop::do_split_if( Node *iff ) {
#ifndef PRODUCT
  if( PrintOpto && VerifyLoopOptimizations )
    tty->print_cr("Split-if");
  if (TraceLoopOpts) {
    tty->print_cr("SplitIf");
  }
#endif
  C->set_major_progress();
  Node *region = iff->in(0);
  Node *region_dom = idom(region);

  // We are going to clone this test (and the control flow with it) up through
  // the incoming merge point.  We need to empty the current basic block.
  // Clone any instructions which must be in this block up through the merge
  // point.
  DUIterator i, j;
  bool progress = true;
  while (progress) {
    progress = false;
    for (i = region->outs(); region->has_out(i); i++) {
      Node* n = region->out(i);
      if( n == region ) continue;
      // The IF to be split is OK.
      if( n == iff ) continue;
      if( !n->is_Phi() ) {      // Found pinned memory op or such
        if (split_up(n, region, iff)) {
          i = region->refresh_out_pos(i);
          progress = true;
        }
        continue;
      }
      assert( n->in(0) == region, "" );

      // Recursively split up all users of a Phi
      for (j = n->outs(); n->has_out(j); j++) {
        Node* m = n->out(j);
        // If m is dead, throw it away, and declare progress
        if (_nodes[m->_idx] == NULL) {
          _igvn.remove_dead_node(m);
          // fall through
        }
        else if (m != iff && split_up(m, region, iff)) {
          // fall through
        } else {
          continue;
        }
        // Something unpredictable changed.
        // Tell the iterators to refresh themselves, and rerun the loop.
        i = region->refresh_out_pos(i);
        j = region->refresh_out_pos(j);
        progress = true;
      }
    }
  }

  // Now we have no instructions in the block containing the IF.
  // Split the IF.
  Node *new_iff = split_thru_region( iff, region );

  // Replace both uses of 'new_iff' with Regions merging True/False
  // paths.  This makes 'new_iff' go dead.
  Node *old_false = NULL, *old_true = NULL;
  Node *new_false = NULL, *new_true = NULL;
  for (DUIterator_Last j2min, j2 = iff->last_outs(j2min); j2 >= j2min; --j2) {
    Node *ifp = iff->last_out(j2);
    assert( ifp->Opcode() == Op_IfFalse || ifp->Opcode() == Op_IfTrue, "" );
    ifp->set_req(0, new_iff);
    Node *ifpx = split_thru_region( ifp, region );

    // Replace 'If' projection of a Region with a Region of
    // 'If' projections.
    ifpx->set_req(0, ifpx);       // A TRUE RegionNode

    // Setup dominator info
    set_idom(ifpx, region_dom, dom_depth(region_dom) + 1);

    // Check for splitting loop tails
    if( get_loop(iff)->tail() == ifp )
      get_loop(iff)->_tail = ifpx;

    // Replace in the graph with lazy-update mechanism
    new_iff->set_req(0, new_iff); // hook self so it does not go dead
    lazy_replace(ifp, ifpx);
    new_iff->set_req(0, region);

    // Record bits for later xforms
    if( ifp->Opcode() == Op_IfFalse ) {
      old_false = ifp;
      new_false = ifpx;
    } else {
      old_true = ifp;
      new_true = ifpx;
    }
  }
  _igvn.remove_dead_node(new_iff);
  // Lazy replace IDOM info with the region's dominator
  lazy_replace( iff, region_dom );

  // Now make the original merge point go dead, by handling all its uses.
  small_cache region_cache;
  // Preload some control flow in region-cache
  region_cache.lru_insert( new_false, new_false );
  region_cache.lru_insert( new_true , new_true  );
  // Now handle all uses of the splitting block
  for (DUIterator k = region->outs(); region->has_out(k); k++) {
    Node* phi = region->out(k);
    if (!phi->in(0)) {         // Dead phi?  Remove it
      _igvn.remove_dead_node(phi);
    } else if (phi == region) { // Found the self-reference
      continue;                 // No roll-back of DUIterator
    } else if (phi->is_Phi()) { // Expected common case: Phi hanging off of Region
      assert(phi->in(0) == region, "Inconsistent graph");
      // Need a per-def cache.  Phi represents a def, so make a cache
      small_cache phi_cache;

      // Inspect all Phi uses to make the Phi go dead
      for (DUIterator_Last lmin, l = phi->last_outs(lmin); l >= lmin; --l) {
        Node* use = phi->last_out(l);
        // Compute the new DEF for this USE.  New DEF depends on the path
        // taken from the original DEF to the USE.  The new DEF may be some
        // collection of PHI's merging values from different paths.  The Phis
        // inserted depend only on the location of the USE.  We use a
        // 2-element cache to handle multiple uses from the same block.
        handle_use(use, phi, &phi_cache, region_dom, new_false, new_true, old_false, old_true);
      } // End of while phi has uses
      // Remove the dead Phi
      _igvn.remove_dead_node( phi );
    } else {
      assert(phi->in(0) == region, "Inconsistent graph");
      // Random memory op guarded by Region.  Compute new DEF for USE.
      handle_use(phi, region, &region_cache, region_dom, new_false, new_true, old_false, old_true);
    }
    // Every path above deletes a use of the region, except for the region
    // self-cycle (which is needed by handle_use calling find_use_block
    // calling get_ctrl calling get_ctrl_no_update looking for dead
    // regions).  So roll back the DUIterator innards.
    --k;
  } // End of while merge point has phis

  assert(region->outcnt() == 1, "Only self reference should remain"); // Just Self on the Region
  region->set_req(0, NULL);       // Break the self-cycle

  // Any leftover bits in the splitting block must not have depended on local
  // Phi inputs (these have already been split-up).  Hence it's safe to hoist
  // these guys to the dominating point.
  lazy_replace( region, region_dom );
#ifndef PRODUCT
  if( VerifyLoopOptimizations ) verify();
#endif
}
C:\hotspot-69087d08d473\src\share\vm/opto/stringopts.cpp
/*
 * Copyright (c) 2009, 2016, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 *
 */

#include "precompiled.hpp"
#include "compiler/compileLog.hpp"
#include "opto/addnode.hpp"
#include "opto/callGenerator.hpp"
#include "opto/callnode.hpp"
#include "opto/divnode.hpp"
#include "opto/graphKit.hpp"
#include "opto/idealKit.hpp"
#include "opto/rootnode.hpp"
#include "opto/runtime.hpp"
#include "opto/stringopts.hpp"
#include "opto/subnode.hpp"

#define __ kit.

class StringConcat : public ResourceObj {
 private:
  PhaseStringOpts*    _stringopts;
  Node*               _string_alloc;
  AllocateNode*       _begin;          // The allocation the begins the pattern
  CallStaticJavaNode* _end;            // The final call of the pattern.  Will either be
                                       // SB.toString or or String.<init>(SB.toString)
  bool                _multiple;       // indicates this is a fusion of two or more
                                       // separate StringBuilders

  Node*               _arguments;      // The list of arguments to be concatenated
  GrowableArray<int>  _mode;           // into a String along with a mode flag
                                       // indicating how to treat the value.
  Node_List           _constructors;   // List of constructors (many in case of stacked concat)
  Node_List           _control;        // List of control nodes that will be deleted
  Node_List           _uncommon_traps; // Uncommon traps that needs to be rewritten
                                       // to restart at the initial JVMState.

 public:
  // Mode for converting arguments to Strings
  enum {
    StringMode,
    IntMode,
    CharMode,
    StringNullCheckMode
  };

  StringConcat(PhaseStringOpts* stringopts, CallStaticJavaNode* end):
    _end(end),
    _begin(NULL),
    _multiple(false),
    _string_alloc(NULL),
    _stringopts(stringopts) {
    _arguments = new (_stringopts->C) Node(1);
    _arguments->del_req(0);
  }

  bool validate_mem_flow();
  bool validate_control_flow();

  void merge_add() {
#if 0
    // XXX This is place holder code for reusing an existing String
    // allocation but the logic for checking the state safety is
    // probably inadequate at the moment.
    CallProjections endprojs;
    sc->end()->extract_projections(&endprojs, false);
    if (endprojs.resproj != NULL) {
      for (SimpleDUIterator i(endprojs.resproj); i.has_next(); i.next()) {
        CallStaticJavaNode *use = i.get()->isa_CallStaticJava();
        if (use != NULL && use->method() != NULL &&
            use->method()->intrinsic_id() == vmIntrinsics::_String_String &&
            use->in(TypeFunc::Parms + 1) == endprojs.resproj) {
          // Found useless new String(sb.toString()) so reuse the newly allocated String
          // when creating the result instead of allocating a new one.
          sc->set_string_alloc(use->in(TypeFunc::Parms));
          sc->set_end(use);
        }
      }
    }
#endif
  }

  StringConcat* merge(StringConcat* other, Node* arg);

  void set_allocation(AllocateNode* alloc) {
    _begin = alloc;
  }

  void append(Node* value, int mode) {
    _arguments->add_req(value);
    _mode.append(mode);
  }
  void push(Node* value, int mode) {
    _arguments->ins_req(0, value);
    _mode.insert_before(0, mode);
  }

  void push_string(Node* value) {
    push(value, StringMode);
  }
  void push_string_null_check(Node* value) {
    push(value, StringNullCheckMode);
  }
  void push_int(Node* value) {
    push(value, IntMode);
  }
  void push_char(Node* value) {
    push(value, CharMode);
  }

  static bool is_SB_toString(Node* call) {
    if (call->is_CallStaticJava()) {
      CallStaticJavaNode* csj = call->as_CallStaticJava();
      ciMethod* m = csj->method();
      if (m != NULL &&
          (m->intrinsic_id() == vmIntrinsics::_StringBuilder_toString ||
           m->intrinsic_id() == vmIntrinsics::_StringBuffer_toString)) {
        return true;
      }
    }
    return false;
  }

  static Node* skip_string_null_check(Node* value) {
    // Look for a diamond shaped Null check of toString() result
    // (could be code from String.valueOf()):
    // (Proj == NULL) ? "null":"CastPP(Proj)#NotNULL
    if (value->is_Phi()) {
      int true_path = value->as_Phi()->is_diamond_phi();
      if (true_path != 0) {
        // phi->region->if_proj->ifnode->bool
        BoolNode* b = value->in(0)->in(1)->in(0)->in(1)->as_Bool();
        Node* cmp = b->in(1);
        Node* v1 = cmp->in(1);
        Node* v2 = cmp->in(2);
        // Null check of the return of toString which can simply be skipped.
        if (b->_test._test == BoolTest::ne &&
            v2->bottom_type() == TypePtr::NULL_PTR &&
            value->in(true_path)->Opcode() == Op_CastPP &&
            value->in(true_path)->in(1) == v1 &&
            v1->is_Proj() && is_SB_toString(v1->in(0))) {
          return v1;
        }
      }
    }
    return value;
  }

  Node* argument(int i) {
    return _arguments->in(i);
  }
  Node* argument_uncast(int i) {
    Node* arg = argument(i);
    int amode = mode(i);
    if (amode == StringConcat::StringMode ||
        amode == StringConcat::StringNullCheckMode) {
      arg = skip_string_null_check(arg);
    }
    return arg;
  }
  void set_argument(int i, Node* value) {
    _arguments->set_req(i, value);
  }
  int num_arguments() {
    return _mode.length();
  }
  int mode(int i) {
    return _mode.at(i);
  }
  void add_control(Node* ctrl) {
    assert(!_control.contains(ctrl), "only push once");
    _control.push(ctrl);
  }
  void add_constructor(Node* init) {
    assert(!_constructors.contains(init), "only push once");
    _constructors.push(init);
  }
  CallStaticJavaNode* end() { return _end; }
  AllocateNode* begin() { return _begin; }
  Node* string_alloc() { return _string_alloc; }

  void eliminate_unneeded_control();
  void eliminate_initialize(InitializeNode* init);
  void eliminate_call(CallNode* call);

  void maybe_log_transform() {
    CompileLog* log = _stringopts->C->log();
    if (log != NULL) {
      log->head("replace_string_concat arguments='%d' string_alloc='%d' multiple='%d'",
                num_arguments(),
                _string_alloc != NULL,
                _multiple);
      JVMState* p = _begin->jvms();
      while (p != NULL) {
        log->elem("jvms bci='%d' method='%d'", p->bci(), log->identify(p->method()));
        p = p->caller();
      }
      log->tail("replace_string_concat");
    }
  }

  void convert_uncommon_traps(GraphKit& kit, const JVMState* jvms) {
    for (uint u = 0; u < _uncommon_traps.size(); u++) {
      Node* uct = _uncommon_traps.at(u);

      // Build a new call using the jvms state of the allocate
      address call_addr = SharedRuntime::uncommon_trap_blob()->entry_point();
      const TypeFunc* call_type = OptoRuntime::uncommon_trap_Type();
      const TypePtr* no_memory_effects = NULL;
      Compile* C = _stringopts->C;
      CallStaticJavaNode* call = new (C) CallStaticJavaNode(call_type, call_addr, "uncommon_trap",
                                                            jvms->bci(), no_memory_effects);
      for (int e = 0; e < TypeFunc::Parms; e++) {
        call->init_req(e, uct->in(e));
      }
      // Set the trap request to record intrinsic failure if this trap
      // is taken too many times.  Ideally we would handle then traps by
      // doing the original bookkeeping in the MDO so that if it caused
      // the code to be thrown out we could still recompile and use the
      // optimization.  Failing the uncommon traps doesn't really mean
      // that the optimization is a bad idea but there's no other way to
      // do the MDO updates currently.
      int trap_request = Deoptimization::make_trap_request(Deoptimization::Reason_intrinsic,
                                                           Deoptimization::Action_make_not_entrant);
      call->init_req(TypeFunc::Parms, __ intcon(trap_request));
      kit.add_safepoint_edges(call);

      _stringopts->gvn()->transform(call);
      C->gvn_replace_by(uct, call);
      uct->disconnect_inputs(NULL, C);
    }
  }

  void cleanup() {
    // disconnect the hook node
    _arguments->disconnect_inputs(NULL, _stringopts->C);
  }
};


void StringConcat::eliminate_unneeded_control() {
  for (uint i = 0; i < _control.size(); i++) {
    Node* n = _control.at(i);
    if (n->is_Allocate()) {
      eliminate_initialize(n->as_Allocate()->initialization());
    }
    if (n->is_Call()) {
      if (n != _end) {
        eliminate_call(n->as_Call());
      }
    } else if (n->is_IfTrue()) {
      Compile* C = _stringopts->C;
      C->gvn_replace_by(n, n->in(0)->in(0));
      // get rid of the other projection
      C->gvn_replace_by(n->in(0)->as_If()->proj_out(false), C->top());
    }
  }
}


StringConcat* StringConcat::merge(StringConcat* other, Node* arg) {
  StringConcat* result = new StringConcat(_stringopts, _end);
  for (uint x = 0; x < _control.size(); x++) {
    Node* n = _control.at(x);
    if (n->is_Call()) {
      result->_control.push(n);
    }
  }
  for (uint x = 0; x < other->_control.size(); x++) {
    Node* n = other->_control.at(x);
    if (n->is_Call()) {
      result->_control.push(n);
    }
  }
  assert(result->_control.contains(other->_end), "what?");
  assert(result->_control.contains(_begin), "what?");
  for (int x = 0; x < num_arguments(); x++) {
    Node* argx = argument_uncast(x);
    if (argx == arg) {
      // replace the toString result with the all the arguments that
      // made up the other StringConcat
      for (int y = 0; y < other->num_arguments(); y++) {
        result->append(other->argument(y), other->mode(y));
      }
    } else {
      result->append(argx, mode(x));
    }
  }
  result->set_allocation(other->_begin);
  for (uint i = 0; i < _constructors.size(); i++) {
    result->add_constructor(_constructors.at(i));
  }
  for (uint i = 0; i < other->_constructors.size(); i++) {
    result->add_constructor(other->_constructors.at(i));
  }
  result->_multiple = true;
  return result;
}


void StringConcat::eliminate_call(CallNode* call) {
  Compile* C = _stringopts->C;
  CallProjections projs;
  call->extract_projections(&projs, false);
  if (projs.fallthrough_catchproj != NULL) {
    C->gvn_replace_by(projs.fallthrough_catchproj, call->in(TypeFunc::Control));
  }
  if (projs.fallthrough_memproj != NULL) {
    C->gvn_replace_by(projs.fallthrough_memproj, call->in(TypeFunc::Memory));
  }
  if (projs.catchall_memproj != NULL) {
    C->gvn_replace_by(projs.catchall_memproj, C->top());
  }
  if (projs.fallthrough_ioproj != NULL) {
    C->gvn_replace_by(projs.fallthrough_ioproj, call->in(TypeFunc::I_O));
  }
  if (projs.catchall_ioproj != NULL) {
    C->gvn_replace_by(projs.catchall_ioproj, C->top());
  }
  if (projs.catchall_catchproj != NULL) {
    // EA can't cope with the partially collapsed graph this
    // creates so put it on the worklist to be collapsed later.
    for (SimpleDUIterator i(projs.catchall_catchproj); i.has_next(); i.next()) {
      Node *use = i.get();
      int opc = use->Opcode();
      if (opc == Op_CreateEx || opc == Op_Region) {
        _stringopts->record_dead_node(use);
      }
    }
    C->gvn_replace_by(projs.catchall_catchproj, C->top());
  }
  if (projs.resproj != NULL) {
    C->gvn_replace_by(projs.resproj, C->top());
  }
  C->gvn_replace_by(call, C->top());
}

void StringConcat::eliminate_initialize(InitializeNode* init) {
  Compile* C = _stringopts->C;

  // Eliminate Initialize node.
  assert(init->outcnt() <= 2, "only a control and memory projection expected");
  assert(init->req() <= InitializeNode::RawStores, "no pending inits");
  Node *ctrl_proj = init->proj_out(TypeFunc::Control);
  if (ctrl_proj != NULL) {
    C->gvn_replace_by(ctrl_proj, init->in(TypeFunc::Control));
  }
  Node *mem_proj = init->proj_out(TypeFunc::Memory);
  if (mem_proj != NULL) {
    Node *mem = init->in(TypeFunc::Memory);
    C->gvn_replace_by(mem_proj, mem);
  }
  C->gvn_replace_by(init, C->top());
  init->disconnect_inputs(NULL, C);
}

Node_List PhaseStringOpts::collect_toString_calls() {
  Node_List string_calls;
  Node_List worklist;

  _visited.Clear();

  // Prime the worklist
  for (uint i = 1; i < C->root()->len(); i++) {
    Node* n = C->root()->in(i);
    if (n != NULL && !_visited.test_set(n->_idx)) {
      worklist.push(n);
    }
  }

  while (worklist.size() > 0) {
    Node* ctrl = worklist.pop();
    if (StringConcat::is_SB_toString(ctrl)) {
      CallStaticJavaNode* csj = ctrl->as_CallStaticJava();
      string_calls.push(csj);
    }
    if (ctrl->in(0) != NULL && !_visited.test_set(ctrl->in(0)->_idx)) {
      worklist.push(ctrl->in(0));
    }
    if (ctrl->is_Region()) {
      for (uint i = 1; i < ctrl->len(); i++) {
        if (ctrl->in(i) != NULL && !_visited.test_set(ctrl->in(i)->_idx)) {
          worklist.push(ctrl->in(i));
        }
      }
    }
  }
  return string_calls;
}


StringConcat* PhaseStringOpts::build_candidate(CallStaticJavaNode* call) {
  ciMethod* m = call->method();
  ciSymbol* string_sig;
  ciSymbol* int_sig;
  ciSymbol* char_sig;
  if (m->holder() == C->env()->StringBuilder_klass()) {
    string_sig = ciSymbol::String_StringBuilder_signature();
    int_sig = ciSymbol::int_StringBuilder_signature();
    char_sig = ciSymbol::char_StringBuilder_signature();
  } else if (m->holder() == C->env()->StringBuffer_klass()) {
    string_sig = ciSymbol::String_StringBuffer_signature();
    int_sig = ciSymbol::int_StringBuffer_signature();
    char_sig = ciSymbol::char_StringBuffer_signature();
  } else {
    return NULL;
  }
#ifndef PRODUCT
  if (PrintOptimizeStringConcat) {
    tty->print("considering toString call in ");
    call->jvms()->dump_spec(tty); tty->cr();
  }
#endif

  StringConcat* sc = new StringConcat(this, call);

  AllocateNode* alloc = NULL;
  InitializeNode* init = NULL;

  // possible opportunity for StringBuilder fusion
  CallStaticJavaNode* cnode = call;
  while (cnode) {
    Node* recv = cnode->in(TypeFunc::Parms)->uncast();
    if (recv->is_Proj()) {
      recv = recv->in(0);
    }
    cnode = recv->isa_CallStaticJava();
    if (cnode == NULL) {
      alloc = recv->isa_Allocate();
      if (alloc == NULL) {
        break;
      }
      // Find the constructor call
      Node* result = alloc->result_cast();
      if (result == NULL || !result->is_CheckCastPP() || alloc->in(TypeFunc::Memory)->is_top()) {
        // strange looking allocation
#ifndef PRODUCT
        if (PrintOptimizeStringConcat) {
          tty->print("giving up because allocation looks strange ");
          alloc->jvms()->dump_spec(tty); tty->cr();
        }
#endif
        break;
      }
      Node* constructor = NULL;
      for (SimpleDUIterator i(result); i.has_next(); i.next()) {
        CallStaticJavaNode *use = i.get()->isa_CallStaticJava();
        if (use != NULL &&
            use->method() != NULL &&
            !use->method()->is_static() &&
            use->method()->name() == ciSymbol::object_initializer_name() &&
            use->method()->holder() == m->holder()) {
          // Matched the constructor.
          ciSymbol* sig = use->method()->signature()->as_symbol();
          if (sig == ciSymbol::void_method_signature() ||
              sig == ciSymbol::int_void_signature() ||
              sig == ciSymbol::string_void_signature()) {
            if (sig == ciSymbol::string_void_signature()) {
              // StringBuilder(String) so pick this up as the first argument
              assert(use->in(TypeFunc::Parms + 1) != NULL, "what?");
              const Type* type = _gvn->type(use->in(TypeFunc::Parms + 1));
              if (type == TypePtr::NULL_PTR) {
                // StringBuilder(null) throws exception.
#ifndef PRODUCT
                if (PrintOptimizeStringConcat) {
                  tty->print("giving up because StringBuilder(null) throws exception");
                  alloc->jvms()->dump_spec(tty); tty->cr();
                }
#endif
                return NULL;
              }
              // StringBuilder(str) argument needs null check.
              sc->push_string_null_check(use->in(TypeFunc::Parms + 1));
            }
            // The int variant takes an initial size for the backing
            // array so just treat it like the void version.
            constructor = use;
          } else {
#ifndef PRODUCT
            if (PrintOptimizeStringConcat) {
              tty->print("unexpected constructor signature: %s", sig->as_utf8());
            }
#endif
          }
          break;
        }
      }
      if (constructor == NULL) {
        // couldn't find constructor
#ifndef PRODUCT
        if (PrintOptimizeStringConcat) {
          tty->print("giving up because couldn't find constructor ");
          alloc->jvms()->dump_spec(tty); tty->cr();
        }
#endif
        break;
      }

      // Walked all the way back and found the constructor call so see
      // if this call converted into a direct string concatenation.
      sc->add_control(call);
      sc->add_control(constructor);
      sc->add_control(alloc);
      sc->set_allocation(alloc);
      sc->add_constructor(constructor);
      if (sc->validate_control_flow() && sc->validate_mem_flow()) {
        return sc;
      } else {
        return NULL;
      }
    } else if (cnode->method() == NULL) {
      break;
    } else if (!cnode->method()->is_static() &&
               cnode->method()->holder() == m->holder() &&
               cnode->method()->name() == ciSymbol::append_name() &&
               (cnode->method()->signature()->as_symbol() == string_sig ||
                cnode->method()->signature()->as_symbol() == char_sig ||
                cnode->method()->signature()->as_symbol() == int_sig)) {
      sc->add_control(cnode);
      Node* arg = cnode->in(TypeFunc::Parms + 1);
      if (cnode->method()->signature()->as_symbol() == int_sig) {
        sc->push_int(arg);
      } else if (cnode->method()->signature()->as_symbol() == char_sig) {
        sc->push_char(arg);
      } else {
        if (arg->is_Proj() && arg->in(0)->is_CallStaticJava()) {
          CallStaticJavaNode* csj = arg->in(0)->as_CallStaticJava();
          if (csj->method() != NULL &&
              csj->method()->intrinsic_id() == vmIntrinsics::_Integer_toString &&
              arg->outcnt() == 1) {
            // _control is the list of StringBuilder calls nodes which
            // will be replaced by new String code after this optimization.
            // Integer::toString() call is not part of StringBuilder calls
            // chain. It could be eliminated only if its result is used
            // only by this SB calls chain.
            // Another limitation: it should be used only once because
            // it is unknown that it is used only by this SB calls chain
            // until all related SB calls nodes are collected.
            assert(arg->unique_out() == cnode, "sanity");
            sc->add_control(csj);
            sc->push_int(csj->in(TypeFunc::Parms));
            continue;
          }
        }
        sc->push_string(arg);
      }
      continue;
    } else {
      // some unhandled signature
#ifndef PRODUCT
      if (PrintOptimizeStringConcat) {
        tty->print("giving up because encountered unexpected signature ");
        cnode->tf()->dump(); tty->cr();
        cnode->in(TypeFunc::Parms + 1)->dump();
      }
#endif
      break;
    }
  }
  return NULL;
}


PhaseStringOpts::PhaseStringOpts(PhaseGVN* gvn, Unique_Node_List*):
  Phase(StringOpts),
  _gvn(gvn),
  _visited(Thread::current()->resource_area()) {

  assert(OptimizeStringConcat, "shouldn't be here");

  size_table_field = C->env()->Integer_klass()->get_field_by_name(ciSymbol::make("sizeTable"),
                                                                  ciSymbol::make("[I"), true);
  if (size_table_field == NULL) {
    // Something wrong so give up.
    assert(false, "why can't we find Integer.sizeTable?");
    return;
  }

  // Collect the types needed to talk about the various slices of memory
  char_adr_idx = C->get_alias_index(TypeAryPtr::CHARS);

  // For each locally allocated StringBuffer see if the usages can be
  // collapsed into a single String construction.

  // Run through the list of allocation looking for SB.toString to see
  // if it's possible to fuse the usage of the SB into a single String
  // construction.
  GrowableArray<StringConcat*> concats;
  Node_List toStrings = collect_toString_calls();
  while (toStrings.size() > 0) {
    StringConcat* sc = build_candidate(toStrings.pop()->as_CallStaticJava());
    if (sc != NULL) {
      concats.push(sc);
    }
  }

  // try to coalesce separate concats
 restart:
  for (int c = 0; c < concats.length(); c++) {
    StringConcat* sc = concats.at(c);
    for (int i = 0; i < sc->num_arguments(); i++) {
      Node* arg = sc->argument_uncast(i);
      if (arg->is_Proj() && StringConcat::is_SB_toString(arg->in(0))) {
        CallStaticJavaNode* csj = arg->in(0)->as_CallStaticJava();
        for (int o = 0; o < concats.length(); o++) {
          if (c == o) continue;
          StringConcat* other = concats.at(o);
          if (other->end() == csj) {
#ifndef PRODUCT
            if (PrintOptimizeStringConcat) {
              tty->print_cr("considering stacked concats");
            }
#endif

            StringConcat* merged = sc->merge(other, arg);
            if (merged->validate_control_flow() && merged->validate_mem_flow()) {
#ifndef PRODUCT
              if (PrintOptimizeStringConcat) {
                tty->print_cr("stacking would succeed");
              }
#endif
              if (c < o) {
                concats.remove_at(o);
                concats.at_put(c, merged);
              } else {
                concats.remove_at(c);
                concats.at_put(o, merged);
              }
              goto restart;
            } else {
#ifndef PRODUCT
              if (PrintOptimizeStringConcat) {
                tty->print_cr("stacking would fail");
              }
#endif
            }
          }
        }
      }
    }
  }


  for (int c = 0; c < concats.length(); c++) {
    StringConcat* sc = concats.at(c);
    replace_string_concat(sc);
  }

  remove_dead_nodes();
}

void PhaseStringOpts::record_dead_node(Node* dead) {
  dead_worklist.push(dead);
}

void PhaseStringOpts::remove_dead_nodes() {
  // Delete any dead nodes to make things clean enough that escape
  // analysis doesn't get unhappy.
  while (dead_worklist.size() > 0) {
    Node* use = dead_worklist.pop();
    int opc = use->Opcode();
    switch (opc) {
      case Op_Region: {
        uint i = 1;
        for (i = 1; i < use->req(); i++) {
          if (use->in(i) != C->top()) {
            break;
          }
        }
        if (i >= use->req()) {
          for (SimpleDUIterator i(use); i.has_next(); i.next()) {
            Node* m = i.get();
            if (m->is_Phi()) {
              dead_worklist.push(m);
            }
          }
          C->gvn_replace_by(use, C->top());
        }
        break;
      }
      case Op_AddP:
      case Op_CreateEx: {
        // Recurisvely clean up references to CreateEx so EA doesn't
        // get unhappy about the partially collapsed graph.
        for (SimpleDUIterator i(use); i.has_next(); i.next()) {
          Node* m = i.get();
          if (m->is_AddP()) {
            dead_worklist.push(m);
          }
        }
        C->gvn_replace_by(use, C->top());
        break;
      }
      case Op_Phi:
        if (use->in(0) == C->top()) {
          C->gvn_replace_by(use, C->top());
        }
        break;
    }
  }
}


bool StringConcat::validate_mem_flow() {
  Compile* C = _stringopts->C;

  for (uint i = 0; i < _control.size(); i++) {
#ifndef PRODUCT
    Node_List path;
#endif
    Node* curr = _control.at(i);
    if (curr->is_Call() && curr != _begin) { // For all calls except the first allocation
      // Now here's the main invariant in our case:
      // For memory between the constructor, and appends, and toString we should only see bottom memory,
      // produced by the previous call we know about.
      if (!_constructors.contains(curr)) {
        NOT_PRODUCT(path.push(curr);)
        Node* mem = curr->in(TypeFunc::Memory);
        assert(mem != NULL, "calls should have memory edge");
        assert(!mem->is_Phi(), "should be handled by control flow validation");
        NOT_PRODUCT(path.push(mem);)
        while (mem->is_MergeMem()) {
          for (uint i = 1; i < mem->req(); i++) {
            if (i != Compile::AliasIdxBot && mem->in(i) != C->top()) {
#ifndef PRODUCT
              if (PrintOptimizeStringConcat) {
                tty->print("fusion has incorrect memory flow (side effects) for ");
                _begin->jvms()->dump_spec(tty); tty->cr();
                path.dump();
              }
#endif
              return false;
            }
          }
          // skip through a potential MergeMem chain, linked through Bot
          mem = mem->in(Compile::AliasIdxBot);
          NOT_PRODUCT(path.push(mem);)
        }
        // now let it fall through, and see if we have a projection
        if (mem->is_Proj()) {
          // Should point to a previous known call
          Node *prev = mem->in(0);
          NOT_PRODUCT(path.push(prev);)
          if (!prev->is_Call() || !_control.contains(prev)) {
#ifndef PRODUCT
            if (PrintOptimizeStringConcat) {
              tty->print("fusion has incorrect memory flow (unknown call) for ");
              _begin->jvms()->dump_spec(tty); tty->cr();
              path.dump();
            }
#endif
            return false;
          }
        } else {
          assert(mem->is_Store() || mem->is_LoadStore(), err_msg_res("unexpected node type: %s", mem->Name()));
#ifndef PRODUCT
          if (PrintOptimizeStringConcat) {
            tty->print("fusion has incorrect memory flow (unexpected source) for ");
            _begin->jvms()->dump_spec(tty); tty->cr();
            path.dump();
          }
#endif
          return false;
        }
      } else {
        // For memory that feeds into constructors it's more complicated.
        // However the advantage is that any side effect that happens between the Allocate/Initialize and
        // the constructor will have to be control-dependent on Initialize.
        // So we actually don't have to do anything, since it's going to be caught by the control flow
        // analysis.
#ifdef ASSERT
        // Do a quick verification of the control pattern between the constructor and the initialize node
        assert(curr->is_Call(), "constructor should be a call");
        // Go up the control starting from the constructor call
        Node* ctrl = curr->in(0);
        IfNode* iff = NULL;
        RegionNode* copy = NULL;

        while (true) {
          // skip known check patterns
          if (ctrl->is_Region()) {
            if (ctrl->as_Region()->is_copy()) {
              copy = ctrl->as_Region();
              ctrl = copy->is_copy();
            } else { // a cast
              assert(ctrl->req() == 3 &&
                     ctrl->in(1) != NULL && ctrl->in(1)->is_Proj() &&
                     ctrl->in(2) != NULL && ctrl->in(2)->is_Proj() &&
                     ctrl->in(1)->in(0) == ctrl->in(2)->in(0) &&
                     ctrl->in(1)->in(0) != NULL && ctrl->in(1)->in(0)->is_If(),
                     "must be a simple diamond");
              Node* true_proj = ctrl->in(1)->is_IfTrue() ? ctrl->in(1) : ctrl->in(2);
              for (SimpleDUIterator i(true_proj); i.has_next(); i.next()) {
                Node* use = i.get();
                assert(use == ctrl || use->is_ConstraintCast(),
                       err_msg_res("unexpected user: %s", use->Name()));
              }

              iff = ctrl->in(1)->in(0)->as_If();
              ctrl = iff->in(0);
            }
          } else if (ctrl->is_IfTrue()) { // null checks, class checks
            iff = ctrl->in(0)->as_If();
            assert(iff->is_If(), "must be if");
            // Verify that the other arm is an uncommon trap
            Node* otherproj = iff->proj_out(1 - ctrl->as_Proj()->_con);
            CallStaticJavaNode* call = otherproj->unique_out()->isa_CallStaticJava();
            assert(strcmp(call->_name, "uncommon_trap") == 0, "must be uncommond trap");
            ctrl = iff->in(0);
          } else {
            break;
          }
        }

        assert(ctrl->is_Proj(), "must be a projection");
        assert(ctrl->in(0)->is_Initialize(), "should be initialize");
        for (SimpleDUIterator i(ctrl); i.has_next(); i.next()) {
          Node* use = i.get();
          assert(use == copy || use == iff || use == curr || use->is_CheckCastPP() || use->is_Load(),
                 err_msg_res("unexpected user: %s", use->Name()));
        }
#endif // ASSERT
      }
    }
  }

#ifndef PRODUCT
  if (PrintOptimizeStringConcat) {
    tty->print("fusion has correct memory flow for ");
    _begin->jvms()->dump_spec(tty); tty->cr();
    tty->cr();
  }
#endif
  return true;
}

bool StringConcat::validate_control_flow() {
  // We found all the calls and arguments now lets see if it's
  // safe to transform the graph as we would expect.

  // Check to see if this resulted in too many uncommon traps previously
  if (Compile::current()->too_many_traps(_begin->jvms()->method(), _begin->jvms()->bci(),
                        Deoptimization::Reason_intrinsic)) {
    return false;
  }

  // Walk backwards over the control flow from toString to the
  // allocation and make sure all the control flow is ok.  This
  // means it's either going to be eliminated once the calls are
  // removed or it can safely be transformed into an uncommon
  // trap.

  int null_check_count = 0;
  Unique_Node_List ctrl_path;

  assert(_control.contains(_begin), "missing");
  assert(_control.contains(_end), "missing");

  // Collect the nodes that we know about and will eliminate into ctrl_path
  for (uint i = 0; i < _control.size(); i++) {
    // Push the call and it's control projection
    Node* n = _control.at(i);
    if (n->is_Allocate()) {
      AllocateNode* an = n->as_Allocate();
      InitializeNode* init = an->initialization();
      ctrl_path.push(init);
      ctrl_path.push(init->as_Multi()->proj_out(0));
    }
    if (n->is_Call()) {
      CallNode* cn = n->as_Call();
      ctrl_path.push(cn);
      ctrl_path.push(cn->proj_out(0));
      ctrl_path.push(cn->proj_out(0)->unique_out());
      Node* catchproj = cn->proj_out(0)->unique_out()->as_Catch()->proj_out(0);
      if (catchproj != NULL) {
        ctrl_path.push(catchproj);
      }
    } else {
      ShouldNotReachHere();
    }
  }

  // Skip backwards through the control checking for unexpected control flow
  Node* ptr = _end;
  bool fail = false;
  while (ptr != _begin) {
    if (ptr->is_Call() && ctrl_path.member(ptr)) {
      ptr = ptr->in(0);
    } else if (ptr->is_CatchProj() && ctrl_path.member(ptr)) {
      ptr = ptr->in(0)->in(0)->in(0);
      assert(ctrl_path.member(ptr), "should be a known piece of control");
    } else if (ptr->is_IfTrue()) {
      IfNode* iff = ptr->in(0)->as_If();
      BoolNode* b = iff->in(1)->isa_Bool();

      if (b == NULL) {
        fail = true;
        break;
      }

      Node* cmp = b->in(1);
      Node* v1 = cmp->in(1);
      Node* v2 = cmp->in(2);
      Node* otherproj = iff->proj_out(1 - ptr->as_Proj()->_con);

      // Null check of the return of append which can simply be eliminated
      if (b->_test._test == BoolTest::ne &&
          v2->bottom_type() == TypePtr::NULL_PTR &&
          v1->is_Proj() && ctrl_path.member(v1->in(0))) {
        // NULL check of the return value of the append
        null_check_count++;
        if (otherproj->outcnt() == 1) {
          CallStaticJavaNode* call = otherproj->unique_out()->isa_CallStaticJava();
          if (call != NULL && call->_name != NULL && strcmp(call->_name, "uncommon_trap") == 0) {
            ctrl_path.push(call);
          }
        }
        _control.push(ptr);
        ptr = ptr->in(0)->in(0);
        continue;
      }

      // A test which leads to an uncommon trap which should be safe.
      // Later this trap will be converted into a trap that restarts
      // at the beginning.
      if (otherproj->outcnt() == 1) {
        CallStaticJavaNode* call = otherproj->unique_out()->isa_CallStaticJava();
        if (call != NULL && call->_name != NULL && strcmp(call->_name, "uncommon_trap") == 0) {
          // control flow leads to uct so should be ok
          _uncommon_traps.push(call);
          ctrl_path.push(call);
          ptr = ptr->in(0)->in(0);
          continue;
        }
      }

#ifndef PRODUCT
      // Some unexpected control flow we don't know how to handle.
      if (PrintOptimizeStringConcat) {
        tty->print_cr("failing with unknown test");
        b->dump();
        cmp->dump();
        v1->dump();
        v2->dump();
        tty->cr();
      }
#endif
      fail = true;
      break;
    } else if (ptr->is_Proj() && ptr->in(0)->is_Initialize()) {
      ptr = ptr->in(0)->in(0);
    } else if (ptr->is_Region()) {
      Node* copy = ptr->as_Region()->is_copy();
      if (copy != NULL) {
        ptr = copy;
        continue;
      }
      if (ptr->req() == 3 &&
          ptr->in(1) != NULL && ptr->in(1)->is_Proj() &&
          ptr->in(2) != NULL && ptr->in(2)->is_Proj() &&
          ptr->in(1)->in(0) == ptr->in(2)->in(0) &&
          ptr->in(1)->in(0) != NULL && ptr->in(1)->in(0)->is_If()) {
        // Simple diamond.
        // XXX should check for possibly merging stores.  simple data merges are ok.
        // The IGVN will make this simple diamond go away when it
        // transforms the Region. Make sure it sees it.
        Compile::current()->record_for_igvn(ptr);
        ptr = ptr->in(1)->in(0)->in(0);
        continue;
      }
#ifndef PRODUCT
      if (PrintOptimizeStringConcat) {
        tty->print_cr("fusion would fail for region");
        _begin->dump();
        ptr->dump(2);
      }
#endif
      fail = true;
      break;
    } else {
      // other unknown control
      if (!fail) {
#ifndef PRODUCT
        if (PrintOptimizeStringConcat) {
          tty->print_cr("fusion would fail for");
          _begin->dump();
        }
#endif
        fail = true;
      }
#ifndef PRODUCT
      if (PrintOptimizeStringConcat) {
        ptr->dump();
      }
#endif
      ptr = ptr->in(0);
    }
  }
#ifndef PRODUCT
  if (PrintOptimizeStringConcat && fail) {
    tty->cr();
  }
#endif
  if (fail) return !fail;

  // Validate that all these results produced are contained within
  // this cluster of objects.  First collect all the results produced
  // by calls in the region.
  _stringopts->_visited.Clear();
  Node_List worklist;
  Node* final_result = _end->proj_out(TypeFunc::Parms);
  for (uint i = 0; i < _control.size(); i++) {
    CallNode* cnode = _control.at(i)->isa_Call();
    if (cnode != NULL) {
      _stringopts->_visited.test_set(cnode->_idx);
    }
    Node* result = cnode != NULL ? cnode->proj_out(TypeFunc::Parms) : NULL;
    if (result != NULL && result != final_result) {
      worklist.push(result);
    }
  }

  Node* last_result = NULL;
  while (worklist.size() > 0) {
    Node* result = worklist.pop();
    if (_stringopts->_visited.test_set(result->_idx))
      continue;
    for (SimpleDUIterator i(result); i.has_next(); i.next()) {
      Node *use = i.get();
      if (ctrl_path.member(use)) {
        // already checked this
        continue;
      }
      int opc = use->Opcode();
      if (opc == Op_CmpP || opc == Op_Node) {
        ctrl_path.push(use);
        continue;
      }
      if (opc == Op_CastPP || opc == Op_CheckCastPP) {
        for (SimpleDUIterator j(use); j.has_next(); j.next()) {
          worklist.push(j.get());
        }
        worklist.push(use->in(1));
        ctrl_path.push(use);
        continue;
      }
#ifndef PRODUCT
      if (PrintOptimizeStringConcat) {
        if (result != last_result) {
          last_result = result;
          tty->print_cr("extra uses for result:");
          last_result->dump();
        }
        use->dump();
      }
#endif
      fail = true;
      break;
    }
  }

#ifndef PRODUCT
  if (PrintOptimizeStringConcat && !fail) {
    ttyLocker ttyl;
    tty->cr();
    tty->print("fusion has correct control flow (%d %d) for ", null_check_count, _uncommon_traps.size());
    _begin->jvms()->dump_spec(tty); tty->cr();
    for (int i = 0; i < num_arguments(); i++) {
      argument(i)->dump();
    }
    _control.dump();
    tty->cr();
  }
#endif

  return !fail;
}

Node* PhaseStringOpts::fetch_static_field(GraphKit& kit, ciField* field) {
  const TypeInstPtr* mirror_type = TypeInstPtr::make(field->holder()->java_mirror());
  Node* klass_node = __ makecon(mirror_type);
  BasicType bt = field->layout_type();
  ciType* field_klass = field->type();

  const Type *type;
  if( bt == T_OBJECT ) {
    if (!field->type()->is_loaded()) {
      type = TypeInstPtr::BOTTOM;
    } else if (field->is_constant()) {
      // This can happen if the constant oop is non-perm.
      ciObject* con = field->constant_value().as_object();
      // Do not "join" in the previous type; it doesn't add value,
      // and may yield a vacuous result if the field is of interface type.
      type = TypeOopPtr::make_from_constant(con, true)->isa_oopptr();
      assert(type != NULL, "field singleton type must be consistent");
      return __ makecon(type);
    } else {
      type = TypeOopPtr::make_from_klass(field_klass->as_klass());
    }
  } else {
    type = Type::get_const_basic_type(bt);
  }

  return kit.make_load(NULL, kit.basic_plus_adr(klass_node, field->offset_in_bytes()),
                       type, T_OBJECT,
                       C->get_alias_index(mirror_type->add_offset(field->offset_in_bytes())),
                       MemNode::unordered);
}

Node* PhaseStringOpts::int_stringSize(GraphKit& kit, Node* arg) {
  RegionNode *final_merge = new (C) RegionNode(3);
  kit.gvn().set_type(final_merge, Type::CONTROL);
  Node* final_size = new (C) PhiNode(final_merge, TypeInt::INT);
  kit.gvn().set_type(final_size, TypeInt::INT);

  IfNode* iff = kit.create_and_map_if(kit.control(),
                                      __ Bool(__ CmpI(arg, __ intcon(0x80000000)), BoolTest::ne),
                                      PROB_FAIR, COUNT_UNKNOWN);
  Node* is_min = __ IfFalse(iff);
  final_merge->init_req(1, is_min);
  final_size->init_req(1, __ intcon(11));

  kit.set_control(__ IfTrue(iff));
  if (kit.stopped()) {
    final_merge->init_req(2, C->top());
    final_size->init_req(2, C->top());
  } else {

    // int size = (i < 0) ? stringSize(-i) + 1 : stringSize(i);
    RegionNode *r = new (C) RegionNode(3);
    kit.gvn().set_type(r, Type::CONTROL);
    Node *phi = new (C) PhiNode(r, TypeInt::INT);
    kit.gvn().set_type(phi, TypeInt::INT);
    Node *size = new (C) PhiNode(r, TypeInt::INT);
    kit.gvn().set_type(size, TypeInt::INT);
    Node* chk = __ CmpI(arg, __ intcon(0));
    Node* p = __ Bool(chk, BoolTest::lt);
    IfNode* iff = kit.create_and_map_if(kit.control(), p, PROB_FAIR, COUNT_UNKNOWN);
    Node* lessthan = __ IfTrue(iff);
    Node* greaterequal = __ IfFalse(iff);
    r->init_req(1, lessthan);
    phi->init_req(1, __ SubI(__ intcon(0), arg));
    size->init_req(1, __ intcon(1));
    r->init_req(2, greaterequal);
    phi->init_req(2, arg);
    size->init_req(2, __ intcon(0));
    kit.set_control(r);
    C->record_for_igvn(r);
    C->record_for_igvn(phi);
    C->record_for_igvn(size);

    // for (int i=0; ; i++)
    //   if (x <= sizeTable[i])
    //     return i+1;

    // Add loop predicate first.
    kit.add_predicate();

    RegionNode *loop = new (C) RegionNode(3);
    loop->init_req(1, kit.control());
    kit.gvn().set_type(loop, Type::CONTROL);

    Node *index = new (C) PhiNode(loop, TypeInt::INT);
    index->init_req(1, __ intcon(0));
    kit.gvn().set_type(index, TypeInt::INT);
    kit.set_control(loop);
    Node* sizeTable = fetch_static_field(kit, size_table_field);

    Node* value = kit.load_array_element(NULL, sizeTable, index, TypeAryPtr::INTS);
    C->record_for_igvn(value);
    Node* limit = __ CmpI(phi, value);
    Node* limitb = __ Bool(limit, BoolTest::le);
    IfNode* iff2 = kit.create_and_map_if(kit.control(), limitb, PROB_MIN, COUNT_UNKNOWN);
    Node* lessEqual = __ IfTrue(iff2);
    Node* greater = __ IfFalse(iff2);

    loop->init_req(2, greater);
    index->init_req(2, __ AddI(index, __ intcon(1)));

    kit.set_control(lessEqual);
    C->record_for_igvn(loop);
    C->record_for_igvn(index);

    final_merge->init_req(2, kit.control());
    final_size->init_req(2, __ AddI(__ AddI(index, size), __ intcon(1)));
  }

  kit.set_control(final_merge);
  C->record_for_igvn(final_merge);
  C->record_for_igvn(final_size);

  return final_size;
}

void PhaseStringOpts::int_getChars(GraphKit& kit, Node* arg, Node* char_array, Node* start, Node* end) {
  RegionNode *final_merge = new (C) RegionNode(4);
  kit.gvn().set_type(final_merge, Type::CONTROL);
  Node *final_mem = PhiNode::make(final_merge, kit.memory(char_adr_idx), Type::MEMORY, TypeAryPtr::CHARS);
  kit.gvn().set_type(final_mem, Type::MEMORY);

  // need to handle Integer.MIN_VALUE specially because negating doesn't make it positive
  {
    // i == MIN_VALUE
    IfNode* iff = kit.create_and_map_if(kit.control(),
                                        __ Bool(__ CmpI(arg, __ intcon(0x80000000)), BoolTest::ne),
                                        PROB_FAIR, COUNT_UNKNOWN);

    Node* old_mem = kit.memory(char_adr_idx);

    kit.set_control(__ IfFalse(iff));
    if (kit.stopped()) {
      // Statically not equal to MIN_VALUE so this path is dead
      final_merge->init_req(3, kit.control());
    } else {
      copy_string(kit, __ makecon(TypeInstPtr::make(C->env()->the_min_jint_string())),
                  char_array, start);
      final_merge->init_req(3, kit.control());
      final_mem->init_req(3, kit.memory(char_adr_idx));
    }

    kit.set_control(__ IfTrue(iff));
    kit.set_memory(old_mem, char_adr_idx);
  }


  // Simplified version of Integer.getChars

  // int q, r;
  // int charPos = index;
  Node* charPos = end;

  // char sign = 0;

  Node* i = arg;
  Node* sign = __ intcon(0);

  // if (i < 0) {
  //     sign = '-';
  //     i = -i;
  // }
  {
    IfNode* iff = kit.create_and_map_if(kit.control(),
                                        __ Bool(__ CmpI(arg, __ intcon(0)), BoolTest::lt),
                                        PROB_FAIR, COUNT_UNKNOWN);

    RegionNode *merge = new (C) RegionNode(3);
    kit.gvn().set_type(merge, Type::CONTROL);
    i = new (C) PhiNode(merge, TypeInt::INT);
    kit.gvn().set_type(i, TypeInt::INT);
    sign = new (C) PhiNode(merge, TypeInt::INT);
    kit.gvn().set_type(sign, TypeInt::INT);

    merge->init_req(1, __ IfTrue(iff));
    i->init_req(1, __ SubI(__ intcon(0), arg));
    sign->init_req(1, __ intcon('-'));
    merge->init_req(2, __ IfFalse(iff));
    i->init_req(2, arg);
    sign->init_req(2, __ intcon(0));

    kit.set_control(merge);

    C->record_for_igvn(merge);
    C->record_for_igvn(i);
    C->record_for_igvn(sign);
  }

  // for (;;) {
  //     q = i / 10;
  //     r = i - ((q << 3) + (q << 1));  // r = i-(q*10) ...
  //     buf [--charPos] = digits [r];
  //     i = q;
  //     if (i == 0) break;
  // }

  {
    // Add loop predicate first.
    kit.add_predicate();

    RegionNode *head = new (C) RegionNode(3);
    head->init_req(1, kit.control());
    kit.gvn().set_type(head, Type::CONTROL);
    Node *i_phi = new (C) PhiNode(head, TypeInt::INT);
    i_phi->init_req(1, i);
    kit.gvn().set_type(i_phi, TypeInt::INT);
    charPos = PhiNode::make(head, charPos);
    kit.gvn().set_type(charPos, TypeInt::INT);
    Node *mem = PhiNode::make(head, kit.memory(char_adr_idx), Type::MEMORY, TypeAryPtr::CHARS);
    kit.gvn().set_type(mem, Type::MEMORY);
    kit.set_control(head);
    kit.set_memory(mem, char_adr_idx);

    Node* q = __ DivI(NULL, i_phi, __ intcon(10));
    Node* r = __ SubI(i_phi, __ AddI(__ LShiftI(q, __ intcon(3)),
                                     __ LShiftI(q, __ intcon(1))));
    Node* m1 = __ SubI(charPos, __ intcon(1));
    Node* ch = __ AddI(r, __ intcon('0'));

    Node* st = __ store_to_memory(kit.control(), kit.array_element_address(char_array, m1, T_CHAR),
                                  ch, T_CHAR, char_adr_idx, MemNode::unordered);


    IfNode* iff = kit.create_and_map_if(head, __ Bool(__ CmpI(q, __ intcon(0)), BoolTest::ne),
                                        PROB_FAIR, COUNT_UNKNOWN);
    Node* ne = __ IfTrue(iff);
    Node* eq = __ IfFalse(iff);

    head->init_req(2, ne);
    mem->init_req(2, st);
    i_phi->init_req(2, q);
    charPos->init_req(2, m1);

    charPos = m1;

    kit.set_control(eq);
    kit.set_memory(st, char_adr_idx);

    C->record_for_igvn(head);
    C->record_for_igvn(mem);
    C->record_for_igvn(i_phi);
    C->record_for_igvn(charPos);
  }

  {
    // if (sign != 0) {
    //     buf [--charPos] = sign;
    // }
    IfNode* iff = kit.create_and_map_if(kit.control(),
                                        __ Bool(__ CmpI(sign, __ intcon(0)), BoolTest::ne),
                                        PROB_FAIR, COUNT_UNKNOWN);

    final_merge->init_req(2, __ IfFalse(iff));
    final_mem->init_req(2, kit.memory(char_adr_idx));

    kit.set_control(__ IfTrue(iff));
    if (kit.stopped()) {
      final_merge->init_req(1, C->top());
      final_mem->init_req(1, C->top());
    } else {
      Node* m1 = __ SubI(charPos, __ intcon(1));
      Node* st = __ store_to_memory(kit.control(), kit.array_element_address(char_array, m1, T_CHAR),
                                    sign, T_CHAR, char_adr_idx, MemNode::unordered);

      final_merge->init_req(1, kit.control());
      final_mem->init_req(1, st);
    }

    kit.set_control(final_merge);
    kit.set_memory(final_mem, char_adr_idx);

    C->record_for_igvn(final_merge);
    C->record_for_igvn(final_mem);
  }
}


Node* PhaseStringOpts::copy_string(GraphKit& kit, Node* str, Node* char_array, Node* start) {
  Node* string = str;
  Node* offset = kit.load_String_offset(kit.control(), string);
  Node* count  = kit.load_String_length(kit.control(), string);
  Node* value  = kit.load_String_value (kit.control(), string);

  // copy the contents
  if (offset->is_Con() && count->is_Con() && value->is_Con() && count->get_int() < unroll_string_copy_length) {
    // For small constant strings just emit individual stores.
    // A length of 6 seems like a good space/speed tradeof.
    int c = count->get_int();
    int o = offset->get_int();
    const TypeOopPtr* t = kit.gvn().type(value)->isa_oopptr();
    ciTypeArray* value_array = t->const_oop()->as_type_array();
    for (int e = 0; e < c; e++) {
      __ store_to_memory(kit.control(), kit.array_element_address(char_array, start, T_CHAR),
                         __ intcon(value_array->char_at(o + e)), T_CHAR, char_adr_idx,
                         MemNode::unordered);
      start = __ AddI(start, __ intcon(1));
    }
  } else {
    Node* src_ptr = kit.array_element_address(value, offset, T_CHAR);
    Node* dst_ptr = kit.array_element_address(char_array, start, T_CHAR);
    Node* c = count;
    Node* extra = NULL;
#ifdef _LP64
    c = __ ConvI2L(c);
    extra = C->top();
#endif
    Node* call = kit.make_runtime_call(GraphKit::RC_LEAF|GraphKit::RC_NO_FP,
                                       OptoRuntime::fast_arraycopy_Type(),
                                       CAST_FROM_FN_PTR(address, StubRoutines::jshort_disjoint_arraycopy()),
                                       "jshort_disjoint_arraycopy", TypeAryPtr::CHARS,
                                       src_ptr, dst_ptr, c, extra);
    start = __ AddI(start, count);
  }
  return start;
}


void PhaseStringOpts::replace_string_concat(StringConcat* sc) {
  // Log a little info about the transformation
  sc->maybe_log_transform();

  // pull the JVMState of the allocation into a SafePointNode to serve as
  // as a shim for the insertion of the new code.
  JVMState* jvms     = sc->begin()->jvms()->clone_shallow(C);
  uint size = sc->begin()->req();
  SafePointNode* map = new (C) SafePointNode(size, jvms);

  // copy the control and memory state from the final call into our
  // new starting state.  This allows any preceeding tests to feed
  // into the new section of code.
  for (uint i1 = 0; i1 < TypeFunc::Parms; i1++) {
    map->init_req(i1, sc->end()->in(i1));
  }
  // blow away old allocation arguments
  for (uint i1 = TypeFunc::Parms; i1 < jvms->debug_start(); i1++) {
    map->init_req(i1, C->top());
  }
  // Copy the rest of the inputs for the JVMState
  for (uint i1 = jvms->debug_start(); i1 < sc->begin()->req(); i1++) {
    map->init_req(i1, sc->begin()->in(i1));
  }
  // Make sure the memory state is a MergeMem for parsing.
  if (!map->in(TypeFunc::Memory)->is_MergeMem()) {
    map->set_req(TypeFunc::Memory, MergeMemNode::make(C, map->in(TypeFunc::Memory)));
  }

  jvms->set_map(map);
  map->ensure_stack(jvms, jvms->method()->max_stack());


  // disconnect all the old StringBuilder calls from the graph
  sc->eliminate_unneeded_control();

  // At this point all the old work has been completely removed from
  // the graph and the saved JVMState exists at the point where the
  // final toString call used to be.
  GraphKit kit(jvms);

  // There may be uncommon traps which are still using the
  // intermediate states and these need to be rewritten to point at
  // the JVMState at the beginning of the transformation.
  sc->convert_uncommon_traps(kit, jvms);

  // Now insert the logic to compute the size of the string followed
  // by all the logic to construct array and resulting string.

  Node* null_string = __ makecon(TypeInstPtr::make(C->env()->the_null_string()));

  // Create a region for the overflow checks to merge into.
  int args = MAX2(sc->num_arguments(), 1);
  RegionNode* overflow = new (C) RegionNode(args);
  kit.gvn().set_type(overflow, Type::CONTROL);

  // Create a hook node to hold onto the individual sizes since they
  // are need for the copying phase.
  Node* string_sizes = new (C) Node(args);

  Node* length = __ intcon(0);
  for (int argi = 0; argi < sc->num_arguments(); argi++) {
    Node* arg = sc->argument(argi);
    switch (sc->mode(argi)) {
      case StringConcat::IntMode: {
        Node* string_size = int_stringSize(kit, arg);

        // accumulate total
        length = __ AddI(length, string_size);

        // Cache this value for the use by int_toString
        string_sizes->init_req(argi, string_size);
        break;
      }
      case StringConcat::StringNullCheckMode: {
        const Type* type = kit.gvn().type(arg);
        assert(type != TypePtr::NULL_PTR, "missing check");
        if (!type->higher_equal(TypeInstPtr::NOTNULL)) {
          // Null check with uncommont trap since
          // StringBuilder(null) throws exception.
          // Use special uncommon trap instead of
          // calling normal do_null_check().
          Node* p = __ Bool(__ CmpP(arg, kit.null()), BoolTest::ne);
          IfNode* iff = kit.create_and_map_if(kit.control(), p, PROB_MIN, COUNT_UNKNOWN);
          overflow->add_req(__ IfFalse(iff));
          Node* notnull = __ IfTrue(iff);
          kit.set_control(notnull); // set control for the cast_not_null
          arg = kit.cast_not_null(arg, false);
          sc->set_argument(argi, arg);
        }
        assert(kit.gvn().type(arg)->higher_equal(TypeInstPtr::NOTNULL), "sanity");
        // Fallthrough to add string length.
      }
      case StringConcat::StringMode: {
        const Type* type = kit.gvn().type(arg);
        Node* count = NULL;
        if (type == TypePtr::NULL_PTR) {
          // replace the argument with the null checked version
          arg = null_string;
          sc->set_argument(argi, arg);
          count = kit.load_String_length(kit.control(), arg);
        } else if (!type->higher_equal(TypeInstPtr::NOTNULL)) {
          // s = s != null ? s : "null";
          // length = length + (s.count - s.offset);
          RegionNode *r = new (C) RegionNode(3);
          kit.gvn().set_type(r, Type::CONTROL);
          Node *phi = new (C) PhiNode(r, type);
          kit.gvn().set_type(phi, phi->bottom_type());
          Node* p = __ Bool(__ CmpP(arg, kit.null()), BoolTest::ne);
          IfNode* iff = kit.create_and_map_if(kit.control(), p, PROB_MIN, COUNT_UNKNOWN);
          Node* notnull = __ IfTrue(iff);
          Node* isnull =  __ IfFalse(iff);
          kit.set_control(notnull); // set control for the cast_not_null
          r->init_req(1, notnull);
          phi->init_req(1, kit.cast_not_null(arg, false));
          r->init_req(2, isnull);
          phi->init_req(2, null_string);
          kit.set_control(r);
          C->record_for_igvn(r);
          C->record_for_igvn(phi);
          // replace the argument with the null checked version
          arg = phi;
          sc->set_argument(argi, arg);
          count = kit.load_String_length(kit.control(), arg);
        } else {
          // A corresponding nullcheck will be connected during IGVN MemNode::Ideal_common_DU_postCCP
          // kit.control might be a different test, that can be hoisted above the actual nullcheck
          // in case, that the control input is not null, Ideal_common_DU_postCCP will not look for a nullcheck.
          count = kit.load_String_length(NULL, arg);
        }
        length = __ AddI(length, count);
        string_sizes->init_req(argi, NULL);
        break;
      }
      case StringConcat::CharMode: {
        // one character only
        length = __ AddI(length, __ intcon(1));
        break;
      }
      default:
        ShouldNotReachHere();
    }
    if (argi > 0) {
      // Check that the sum hasn't overflowed
      IfNode* iff = kit.create_and_map_if(kit.control(),
                                          __ Bool(__ CmpI(length, __ intcon(0)), BoolTest::lt),
                                          PROB_MIN, COUNT_UNKNOWN);
      kit.set_control(__ IfFalse(iff));
      overflow->set_req(argi, __ IfTrue(iff));
    }
  }

  {
    // Hook
    PreserveJVMState pjvms(&kit);
    kit.set_control(overflow);
    C->record_for_igvn(overflow);
    kit.uncommon_trap(Deoptimization::Reason_intrinsic,
                      Deoptimization::Action_make_not_entrant);
  }

  Node* result;
  if (!kit.stopped()) {

    // length now contains the number of characters needed for the
    // char[] so create a new AllocateArray for the char[]
    Node* char_array = NULL;
    {
      PreserveReexecuteState preexecs(&kit);
      // The original jvms is for an allocation of either a String or
      // StringBuffer so no stack adjustment is necessary for proper
      // reexecution.  If we deoptimize in the slow path the bytecode
      // will be reexecuted and the char[] allocation will be thrown away.
      kit.jvms()->set_should_reexecute(true);
      char_array = kit.new_array(__ makecon(TypeKlassPtr::make(ciTypeArrayKlass::make(T_CHAR))),
                                 length, 1);
    }

    // Mark the allocation so that zeroing is skipped since the code
    // below will overwrite the entire array
    AllocateArrayNode* char_alloc = AllocateArrayNode::Ideal_array_allocation(char_array, _gvn);
    char_alloc->maybe_set_complete(_gvn);

    // Now copy the string representations into the final char[]
    Node* start = __ intcon(0);
    for (int argi = 0; argi < sc->num_arguments(); argi++) {
      Node* arg = sc->argument(argi);
      switch (sc->mode(argi)) {
        case StringConcat::IntMode: {
          Node* end = __ AddI(start, string_sizes->in(argi));
          // getChars words backwards so pass the ending point as well as the start
          int_getChars(kit, arg, char_array, start, end);
          start = end;
          break;
        }
        case StringConcat::StringNullCheckMode:
        case StringConcat::StringMode: {
          start = copy_string(kit, arg, char_array, start);
          break;
        }
        case StringConcat::CharMode: {
          __ store_to_memory(kit.control(), kit.array_element_address(char_array, start, T_CHAR),
                             arg, T_CHAR, char_adr_idx, MemNode::unordered);
          start = __ AddI(start, __ intcon(1));
          break;
        }
        default:
          ShouldNotReachHere();
      }
    }

    // If we're not reusing an existing String allocation then allocate one here.
    result = sc->string_alloc();
    if (result == NULL) {
      PreserveReexecuteState preexecs(&kit);
      // The original jvms is for an allocation of either a String or
      // StringBuffer so no stack adjustment is necessary for proper
      // reexecution.
      kit.jvms()->set_should_reexecute(true);
      result = kit.new_instance(__ makecon(TypeKlassPtr::make(C->env()->String_klass())));
    }

    // Intialize the string
    if (java_lang_String::has_offset_field()) {
      kit.store_String_offset(kit.control(), result, __ intcon(0));
      kit.store_String_length(kit.control(), result, length);
    }
    kit.store_String_value(kit.control(), result, char_array);

    // The value field is final. Emit a barrier here to ensure that the effect
    // of the initialization is committed to memory before any code publishes
    // a reference to the newly constructed object (see Parse::do_exits()).
    assert(AllocateNode::Ideal_allocation(result, _gvn) != NULL, "should be newly allocated");
    kit.insert_mem_bar(Op_MemBarRelease, result);
  } else {
    result = C->top();
  }
  // hook up the outgoing control and result
  kit.replace_call(sc->end(), result);

  // Unhook any hook nodes
  string_sizes->disconnect_inputs(NULL, C);
  sc->cleanup();
}
C:\hotspot-69087d08d473\src\share\vm/opto/stringopts.hpp
/*
 * Copyright (c) 2009, 2012, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 *
 */

#ifndef SHARE_VM_OPTO_STRINGOPTS_HPP
#define SHARE_VM_OPTO_STRINGOPTS_HPP

#include "opto/node.hpp"
#include "opto/phaseX.hpp"

class StringConcat;

class PhaseStringOpts : public Phase {
  friend class StringConcat;

 private:
  PhaseGVN* _gvn;

  // List of dead nodes to clean up aggressively at the end
  Unique_Node_List dead_worklist;

  // Memory slices needed for code gen
  int char_adr_idx;

  // Integer.sizeTable - used for int to String conversion
  ciField* size_table_field;

  // A set for use by various stages
  VectorSet _visited;

  // Collect a list of all SB.toString calls
  Node_List collect_toString_calls();

  // Examine the use of the SB alloc to see if it can be replace with
  // a single string construction.
  StringConcat* build_candidate(CallStaticJavaNode* call);

  // Replace all the SB calls in concat with an optimization String allocation
  void replace_string_concat(StringConcat* concat);

  // Load the value of a static field, performing any constant folding.
  Node* fetch_static_field(GraphKit& kit, ciField* field);

  // Compute the number of characters required to represent the int value
  Node* int_stringSize(GraphKit& kit, Node* value);

  // Copy the characters representing value into char_array starting at start
  void int_getChars(GraphKit& kit, Node* value, Node* char_array, Node* start, Node* end);

  // Copy of the contents of the String str into char_array starting at index start.
  Node* copy_string(GraphKit& kit, Node* str, Node* char_array, Node* start);

  // Clean up any leftover nodes
  void record_dead_node(Node* node);
  void remove_dead_nodes();

  PhaseGVN* gvn() { return _gvn; }

  enum {
    // max length of constant string copy unrolling in copy_string
    unroll_string_copy_length = 6
  };

 public:
  PhaseStringOpts(PhaseGVN* gvn, Unique_Node_List* worklist);
};

#endif // SHARE_VM_OPTO_STRINGOPTS_HPP
C:\hotspot-69087d08d473\src\share\vm/opto/subnode.cpp
/*
 * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 *
 */

#include "precompiled.hpp"
#include "compiler/compileLog.hpp"
#include "memory/allocation.inline.hpp"
#include "opto/addnode.hpp"
#include "opto/callnode.hpp"
#include "opto/cfgnode.hpp"
#include "opto/connode.hpp"
#include "opto/loopnode.hpp"
#include "opto/matcher.hpp"
#include "opto/mulnode.hpp"
#include "opto/opcodes.hpp"
#include "opto/phaseX.hpp"
#include "opto/subnode.hpp"
#include "runtime/sharedRuntime.hpp"

// Portions of code courtesy of Clifford Click

// Optimization - Graph Style

#include "math.h"

//=============================================================================
//------------------------------Identity---------------------------------------
// If right input is a constant 0, return the left input.
Node *SubNode::Identity( PhaseTransform *phase ) {
  assert(in(1) != this, "Must already have called Value");
  assert(in(2) != this, "Must already have called Value");

  // Remove double negation
  const Type *zero = add_id();
  if( phase->type( in(1) )->higher_equal( zero ) &&
      in(2)->Opcode() == Opcode() &&
      phase->type( in(2)->in(1) )->higher_equal( zero ) ) {
    return in(2)->in(2);
  }

  // Convert "(X+Y) - Y" into X and "(X+Y) - X" into Y
  if( in(1)->Opcode() == Op_AddI ) {
    if( phase->eqv(in(1)->in(2),in(2)) )
      return in(1)->in(1);
    if (phase->eqv(in(1)->in(1),in(2)))
      return in(1)->in(2);

    // Also catch: "(X + Opaque2(Y)) - Y".  In this case, 'Y' is a loop-varying
    // trip counter and X is likely to be loop-invariant (that's how O2 Nodes
    // are originally used, although the optimizer sometimes jiggers things).
    // This folding through an O2 removes a loop-exit use of a loop-varying
    // value and generally lowers register pressure in and around the loop.
    if( in(1)->in(2)->Opcode() == Op_Opaque2 &&
        phase->eqv(in(1)->in(2)->in(1),in(2)) )
      return in(1)->in(1);
  }

  return ( phase->type( in(2) )->higher_equal( zero ) ) ? in(1) : this;
}

//------------------------------Value------------------------------------------
// A subtract node differences it's two inputs.
const Type* SubNode::Value_common(PhaseTransform *phase) const {
  const Node* in1 = in(1);
  const Node* in2 = in(2);
  // Either input is TOP ==> the result is TOP
  const Type* t1 = (in1 == this) ? Type::TOP : phase->type(in1);
  if( t1 == Type::TOP ) return Type::TOP;
  const Type* t2 = (in2 == this) ? Type::TOP : phase->type(in2);
  if( t2 == Type::TOP ) return Type::TOP;

  // Not correct for SubFnode and AddFNode (must check for infinity)
  // Equal?  Subtract is zero
  if (in1->eqv_uncast(in2))  return add_id();

  // Either input is BOTTOM ==> the result is the local BOTTOM
  if( t1 == Type::BOTTOM || t2 == Type::BOTTOM )
    return bottom_type();

  return NULL;
}

const Type* SubNode::Value(PhaseTransform *phase) const {
  const Type* t = Value_common(phase);
  if (t != NULL) {
    return t;
  }
  const Type* t1 = phase->type(in(1));
  const Type* t2 = phase->type(in(2));
  return sub(t1,t2);            // Local flavor of type subtraction

}

//=============================================================================

//------------------------------Helper function--------------------------------
static bool ok_to_convert(Node* inc, Node* iv) {
    // Do not collapse (x+c0)-y if "+" is a loop increment, because the
    // "-" is loop invariant and collapsing extends the live-range of "x"
    // to overlap with the "+", forcing another register to be used in
    // the loop.
    // This test will be clearer with '&&' (apply DeMorgan's rule)
    // but I like the early cutouts that happen here.
    const PhiNode *phi;
    if( ( !inc->in(1)->is_Phi() ||
          !(phi=inc->in(1)->as_Phi()) ||
          phi->is_copy() ||
          !phi->region()->is_CountedLoop() ||
          inc != phi->region()->as_CountedLoop()->incr() )
       &&
        // Do not collapse (x+c0)-iv if "iv" is a loop induction variable,
        // because "x" maybe invariant.
        ( !iv->is_loop_iv() )
      ) {
      return true;
    } else {
      return false;
    }
}
//------------------------------Ideal------------------------------------------
Node *SubINode::Ideal(PhaseGVN *phase, bool can_reshape){
  Node *in1 = in(1);
  Node *in2 = in(2);
  uint op1 = in1->Opcode();
  uint op2 = in2->Opcode();

#ifdef ASSERT
  // Check for dead loop
  if( phase->eqv( in1, this ) || phase->eqv( in2, this ) ||
      ( op1 == Op_AddI || op1 == Op_SubI ) &&
      ( phase->eqv( in1->in(1), this ) || phase->eqv( in1->in(2), this ) ||
        phase->eqv( in1->in(1), in1  ) || phase->eqv( in1->in(2), in1 ) ) )
    assert(false, "dead loop in SubINode::Ideal");
#endif

  const Type *t2 = phase->type( in2 );
  if( t2 == Type::TOP ) return NULL;
  // Convert "x-c0" into "x+ -c0".
  if( t2->base() == Type::Int ){        // Might be bottom or top...
    const TypeInt *i = t2->is_int();
    if( i->is_con() )
      return new (phase->C) AddINode(in1, phase->intcon(-i->get_con()));
  }

  // Convert "(x+c0) - y" into (x-y) + c0"
  // Do not collapse (x+c0)-y if "+" is a loop increment or
  // if "y" is a loop induction variable.
  if( op1 == Op_AddI && ok_to_convert(in1, in2) ) {
    const Type *tadd = phase->type( in1->in(2) );
    if( tadd->singleton() && tadd != Type::TOP ) {
      Node *sub2 = phase->transform( new (phase->C) SubINode( in1->in(1), in2 ));
      return new (phase->C) AddINode( sub2, in1->in(2) );
    }
  }


  // Convert "x - (y+c0)" into "(x-y) - c0"
  // Need the same check as in above optimization but reversed.
  if (op2 == Op_AddI && ok_to_convert(in2, in1)) {
    Node* in21 = in2->in(1);
    Node* in22 = in2->in(2);
    const TypeInt* tcon = phase->type(in22)->isa_int();
    if (tcon != NULL && tcon->is_con()) {
      Node* sub2 = phase->transform( new (phase->C) SubINode(in1, in21) );
      Node* neg_c0 = phase->intcon(- tcon->get_con());
      return new (phase->C) AddINode(sub2, neg_c0);
    }
  }

  const Type *t1 = phase->type( in1 );
  if( t1 == Type::TOP ) return NULL;

#ifdef ASSERT
  // Check for dead loop
  if( ( op2 == Op_AddI || op2 == Op_SubI ) &&
      ( phase->eqv( in2->in(1), this ) || phase->eqv( in2->in(2), this ) ||
        phase->eqv( in2->in(1), in2  ) || phase->eqv( in2->in(2), in2  ) ) )
    assert(false, "dead loop in SubINode::Ideal");
#endif

  // Convert "x - (x+y)" into "-y"
  if( op2 == Op_AddI &&
      phase->eqv( in1, in2->in(1) ) )
    return new (phase->C) SubINode( phase->intcon(0),in2->in(2));
  // Convert "(x-y) - x" into "-y"
  if( op1 == Op_SubI &&
      phase->eqv( in1->in(1), in2 ) )
    return new (phase->C) SubINode( phase->intcon(0),in1->in(2));
  // Convert "x - (y+x)" into "-y"
  if( op2 == Op_AddI &&
      phase->eqv( in1, in2->in(2) ) )
    return new (phase->C) SubINode( phase->intcon(0),in2->in(1));

  // Convert "0 - (x-y)" into "y-x"
  if( t1 == TypeInt::ZERO && op2 == Op_SubI )
    return new (phase->C) SubINode( in2->in(2), in2->in(1) );

  // Convert "0 - (x+con)" into "-con-x"
  jint con;
  if( t1 == TypeInt::ZERO && op2 == Op_AddI &&
      (con = in2->in(2)->find_int_con(0)) != 0 )
    return new (phase->C) SubINode( phase->intcon(-con), in2->in(1) );

  // Convert "(X+A) - (X+B)" into "A - B"
  if( op1 == Op_AddI && op2 == Op_AddI && in1->in(1) == in2->in(1) )
    return new (phase->C) SubINode( in1->in(2), in2->in(2) );

  // Convert "(A+X) - (B+X)" into "A - B"
  if( op1 == Op_AddI && op2 == Op_AddI && in1->in(2) == in2->in(2) )
    return new (phase->C) SubINode( in1->in(1), in2->in(1) );

  // Convert "(A+X) - (X+B)" into "A - B"
  if( op1 == Op_AddI && op2 == Op_AddI && in1->in(2) == in2->in(1) )
    return new (phase->C) SubINode( in1->in(1), in2->in(2) );

  // Convert "(X+A) - (B+X)" into "A - B"
  if( op1 == Op_AddI && op2 == Op_AddI && in1->in(1) == in2->in(2) )
    return new (phase->C) SubINode( in1->in(2), in2->in(1) );

  // Convert "A-(B-C)" into (A+C)-B", since add is commutative and generally
  // nicer to optimize than subtract.
  if( op2 == Op_SubI && in2->outcnt() == 1) {
    Node *add1 = phase->transform( new (phase->C) AddINode( in1, in2->in(2) ) );
    return new (phase->C) SubINode( add1, in2->in(1) );
  }

  return NULL;
}

//------------------------------sub--------------------------------------------
// A subtract node differences it's two inputs.
const Type *SubINode::sub( const Type *t1, const Type *t2 ) const {
  const TypeInt *r0 = t1->is_int(); // Handy access
  const TypeInt *r1 = t2->is_int();
  int32 lo = java_subtract(r0->_lo, r1->_hi);
  int32 hi = java_subtract(r0->_hi, r1->_lo);

  // We next check for 32-bit overflow.
  // If that happens, we just assume all integers are possible.
  if( (((r0->_lo ^ r1->_hi) >= 0) ||    // lo ends have same signs OR
       ((r0->_lo ^      lo) >= 0)) &&   // lo results have same signs AND
      (((r0->_hi ^ r1->_lo) >= 0) ||    // hi ends have same signs OR
       ((r0->_hi ^      hi) >= 0)) )    // hi results have same signs
    return TypeInt::make(lo,hi,MAX2(r0->_widen,r1->_widen));
  else                          // Overflow; assume all integers
    return TypeInt::INT;
}

//=============================================================================
//------------------------------Ideal------------------------------------------
Node *SubLNode::Ideal(PhaseGVN *phase, bool can_reshape) {
  Node *in1 = in(1);
  Node *in2 = in(2);
  uint op1 = in1->Opcode();
  uint op2 = in2->Opcode();

#ifdef ASSERT
  // Check for dead loop
  if( phase->eqv( in1, this ) || phase->eqv( in2, this ) ||
      ( op1 == Op_AddL || op1 == Op_SubL ) &&
      ( phase->eqv( in1->in(1), this ) || phase->eqv( in1->in(2), this ) ||
        phase->eqv( in1->in(1), in1  ) || phase->eqv( in1->in(2), in1  ) ) )
    assert(false, "dead loop in SubLNode::Ideal");
#endif

  if( phase->type( in2 ) == Type::TOP ) return NULL;
  const TypeLong *i = phase->type( in2 )->isa_long();
  // Convert "x-c0" into "x+ -c0".
  if( i &&                      // Might be bottom or top...
      i->is_con() )
    return new (phase->C) AddLNode(in1, phase->longcon(-i->get_con()));

  // Convert "(x+c0) - y" into (x-y) + c0"
  // Do not collapse (x+c0)-y if "+" is a loop increment or
  // if "y" is a loop induction variable.
  if( op1 == Op_AddL && ok_to_convert(in1, in2) ) {
    Node *in11 = in1->in(1);
    const Type *tadd = phase->type( in1->in(2) );
    if( tadd->singleton() && tadd != Type::TOP ) {
      Node *sub2 = phase->transform( new (phase->C) SubLNode( in11, in2 ));
      return new (phase->C) AddLNode( sub2, in1->in(2) );
    }
  }

  // Convert "x - (y+c0)" into "(x-y) - c0"
  // Need the same check as in above optimization but reversed.
  if (op2 == Op_AddL && ok_to_convert(in2, in1)) {
    Node* in21 = in2->in(1);
    Node* in22 = in2->in(2);
    const TypeLong* tcon = phase->type(in22)->isa_long();
    if (tcon != NULL && tcon->is_con()) {
      Node* sub2 = phase->transform( new (phase->C) SubLNode(in1, in21) );
      Node* neg_c0 = phase->longcon(- tcon->get_con());
      return new (phase->C) AddLNode(sub2, neg_c0);
    }
  }

  const Type *t1 = phase->type( in1 );
  if( t1 == Type::TOP ) return NULL;

#ifdef ASSERT
  // Check for dead loop
  if( ( op2 == Op_AddL || op2 == Op_SubL ) &&
      ( phase->eqv( in2->in(1), this ) || phase->eqv( in2->in(2), this ) ||
        phase->eqv( in2->in(1), in2  ) || phase->eqv( in2->in(2), in2  ) ) )
    assert(false, "dead loop in SubLNode::Ideal");
#endif

  // Convert "x - (x+y)" into "-y"
  if( op2 == Op_AddL &&
      phase->eqv( in1, in2->in(1) ) )
    return new (phase->C) SubLNode( phase->makecon(TypeLong::ZERO), in2->in(2));
  // Convert "x - (y+x)" into "-y"
  if( op2 == Op_AddL &&
      phase->eqv( in1, in2->in(2) ) )
    return new (phase->C) SubLNode( phase->makecon(TypeLong::ZERO),in2->in(1));

  // Convert "0 - (x-y)" into "y-x"
  if( phase->type( in1 ) == TypeLong::ZERO && op2 == Op_SubL )
    return new (phase->C) SubLNode( in2->in(2), in2->in(1) );

  // Convert "(X+A) - (X+B)" into "A - B"
  if( op1 == Op_AddL && op2 == Op_AddL && in1->in(1) == in2->in(1) )
    return new (phase->C) SubLNode( in1->in(2), in2->in(2) );

  // Convert "(A+X) - (B+X)" into "A - B"
  if( op1 == Op_AddL && op2 == Op_AddL && in1->in(2) == in2->in(2) )
    return new (phase->C) SubLNode( in1->in(1), in2->in(1) );

  // Convert "A-(B-C)" into (A+C)-B"
  if( op2 == Op_SubL && in2->outcnt() == 1) {
    Node *add1 = phase->transform( new (phase->C) AddLNode( in1, in2->in(2) ) );
    return new (phase->C) SubLNode( add1, in2->in(1) );
  }

  return NULL;
}

//------------------------------sub--------------------------------------------
// A subtract node differences it's two inputs.
const Type *SubLNode::sub( const Type *t1, const Type *t2 ) const {
  const TypeLong *r0 = t1->is_long(); // Handy access
  const TypeLong *r1 = t2->is_long();
  jlong lo = java_subtract(r0->_lo, r1->_hi);
  jlong hi = java_subtract(r0->_hi, r1->_lo);

  // We next check for 32-bit overflow.
  // If that happens, we just assume all integers are possible.
  if( (((r0->_lo ^ r1->_hi) >= 0) ||    // lo ends have same signs OR
       ((r0->_lo ^      lo) >= 0)) &&   // lo results have same signs AND
      (((r0->_hi ^ r1->_lo) >= 0) ||    // hi ends have same signs OR
       ((r0->_hi ^      hi) >= 0)) )    // hi results have same signs
    return TypeLong::make(lo,hi,MAX2(r0->_widen,r1->_widen));
  else                          // Overflow; assume all integers
    return TypeLong::LONG;
}

//=============================================================================
//------------------------------Value------------------------------------------
// A subtract node differences its two inputs.
const Type *SubFPNode::Value( PhaseTransform *phase ) const {
  const Node* in1 = in(1);
  const Node* in2 = in(2);
  // Either input is TOP ==> the result is TOP
  const Type* t1 = (in1 == this) ? Type::TOP : phase->type(in1);
  if( t1 == Type::TOP ) return Type::TOP;
  const Type* t2 = (in2 == this) ? Type::TOP : phase->type(in2);
  if( t2 == Type::TOP ) return Type::TOP;

  // if both operands are infinity of same sign, the result is NaN; do
  // not replace with zero
  if( (t1->is_finite() && t2->is_finite()) ) {
    if( phase->eqv(in1, in2) ) return add_id();
  }

  // Either input is BOTTOM ==> the result is the local BOTTOM
  const Type *bot = bottom_type();
  if( (t1 == bot) || (t2 == bot) ||
      (t1 == Type::BOTTOM) || (t2 == Type::BOTTOM) )
    return bot;

  return sub(t1,t2);            // Local flavor of type subtraction
}


//=============================================================================
//------------------------------Ideal------------------------------------------
Node *SubFNode::Ideal(PhaseGVN *phase, bool can_reshape) {
  const Type *t2 = phase->type( in(2) );
  // Convert "x-c0" into "x+ -c0".
  if( t2->base() == Type::FloatCon ) {  // Might be bottom or top...
    // return new (phase->C, 3) AddFNode(in(1), phase->makecon( TypeF::make(-t2->getf()) ) );
  }

  // Not associative because of boundary conditions (infinity)
  if( IdealizedNumerics && !phase->C->method()->is_strict() ) {
    // Convert "x - (x+y)" into "-y"
    if( in(2)->is_Add() &&
        phase->eqv(in(1),in(2)->in(1) ) )
      return new (phase->C) SubFNode( phase->makecon(TypeF::ZERO),in(2)->in(2));
  }

  // Cannot replace 0.0-X with -X because a 'fsub' bytecode computes
  // 0.0-0.0 as +0.0, while a 'fneg' bytecode computes -0.0.
  //if( phase->type(in(1)) == TypeF::ZERO )
  //return new (phase->C, 2) NegFNode(in(2));

  return NULL;
}

//------------------------------sub--------------------------------------------
// A subtract node differences its two inputs.
const Type *SubFNode::sub( const Type *t1, const Type *t2 ) const {
  // no folding if one of operands is infinity or NaN, do not do constant folding
  if( g_isfinite(t1->getf()) && g_isfinite(t2->getf()) ) {
    return TypeF::make( t1->getf() - t2->getf() );
  }
  else if( g_isnan(t1->getf()) ) {
    return t1;
  }
  else if( g_isnan(t2->getf()) ) {
    return t2;
  }
  else {
    return Type::FLOAT;
  }
}

//=============================================================================
//------------------------------Ideal------------------------------------------
Node *SubDNode::Ideal(PhaseGVN *phase, bool can_reshape){
  const Type *t2 = phase->type( in(2) );
  // Convert "x-c0" into "x+ -c0".
  if( t2->base() == Type::DoubleCon ) { // Might be bottom or top...
    // return new (phase->C, 3) AddDNode(in(1), phase->makecon( TypeD::make(-t2->getd()) ) );
  }

  // Not associative because of boundary conditions (infinity)
  if( IdealizedNumerics && !phase->C->method()->is_strict() ) {
    // Convert "x - (x+y)" into "-y"
    if( in(2)->is_Add() &&
        phase->eqv(in(1),in(2)->in(1) ) )
      return new (phase->C) SubDNode( phase->makecon(TypeD::ZERO),in(2)->in(2));
  }

  // Cannot replace 0.0-X with -X because a 'dsub' bytecode computes
  // 0.0-0.0 as +0.0, while a 'dneg' bytecode computes -0.0.
  //if( phase->type(in(1)) == TypeD::ZERO )
  //return new (phase->C, 2) NegDNode(in(2));

  return NULL;
}

//------------------------------sub--------------------------------------------
// A subtract node differences its two inputs.
const Type *SubDNode::sub( const Type *t1, const Type *t2 ) const {
  // no folding if one of operands is infinity or NaN, do not do constant folding
  if( g_isfinite(t1->getd()) && g_isfinite(t2->getd()) ) {
    return TypeD::make( t1->getd() - t2->getd() );
  }
  else if( g_isnan(t1->getd()) ) {
    return t1;
  }
  else if( g_isnan(t2->getd()) ) {
    return t2;
  }
  else {
    return Type::DOUBLE;
  }
}

//=============================================================================
//------------------------------Idealize---------------------------------------
// Unlike SubNodes, compare must still flatten return value to the
// range -1, 0, 1.
// And optimizations like those for (X + Y) - X fail if overflow happens.
Node *CmpNode::Identity( PhaseTransform *phase ) {
  return this;
}

//=============================================================================
//------------------------------cmp--------------------------------------------
// Simplify a CmpI (compare 2 integers) node, based on local information.
// If both inputs are constants, compare them.
const Type *CmpINode::sub( const Type *t1, const Type *t2 ) const {
  const TypeInt *r0 = t1->is_int(); // Handy access
  const TypeInt *r1 = t2->is_int();

  if( r0->_hi < r1->_lo )       // Range is always low?
    return TypeInt::CC_LT;
  else if( r0->_lo > r1->_hi )  // Range is always high?
    return TypeInt::CC_GT;

  else if( r0->is_con() && r1->is_con() ) { // comparing constants?
    assert(r0->get_con() == r1->get_con(), "must be equal");
    return TypeInt::CC_EQ;      // Equal results.
  } else if( r0->_hi == r1->_lo ) // Range is never high?
    return TypeInt::CC_LE;
  else if( r0->_lo == r1->_hi ) // Range is never low?
    return TypeInt::CC_GE;
  return TypeInt::CC;           // else use worst case results
}

// Simplify a CmpU (compare 2 integers) node, based on local information.
// If both inputs are constants, compare them.
const Type *CmpUNode::sub( const Type *t1, const Type *t2 ) const {
  assert(!t1->isa_ptr(), "obsolete usage of CmpU");

  // comparing two unsigned ints
  const TypeInt *r0 = t1->is_int();   // Handy access
  const TypeInt *r1 = t2->is_int();

  // Current installed version
  // Compare ranges for non-overlap
  juint lo0 = r0->_lo;
  juint hi0 = r0->_hi;
  juint lo1 = r1->_lo;
  juint hi1 = r1->_hi;

  // If either one has both negative and positive values,
  // it therefore contains both 0 and -1, and since [0..-1] is the
  // full unsigned range, the type must act as an unsigned bottom.
  bool bot0 = ((jint)(lo0 ^ hi0) < 0);
  bool bot1 = ((jint)(lo1 ^ hi1) < 0);

  if (bot0 || bot1) {
    // All unsigned values are LE -1 and GE 0.
    if (lo0 == 0 && hi0 == 0) {
      return TypeInt::CC_LE;            //   0 <= bot
    } else if ((jint)lo0 == -1 && (jint)hi0 == -1) {
      return TypeInt::CC_GE;            // -1 >= bot
    } else if (lo1 == 0 && hi1 == 0) {
      return TypeInt::CC_GE;            // bot >= 0
    } else if ((jint)lo1 == -1 && (jint)hi1 == -1) {
      return TypeInt::CC_LE;            // bot <= -1
    }
  } else {
    // We can use ranges of the form [lo..hi] if signs are the same.
    assert(lo0 <= hi0 && lo1 <= hi1, "unsigned ranges are valid");
    // results are reversed, '-' > '+' for unsigned compare
    if (hi0 < lo1) {
      return TypeInt::CC_LT;            // smaller
    } else if (lo0 > hi1) {
      return TypeInt::CC_GT;            // greater
    } else if (hi0 == lo1 && lo0 == hi1) {
      return TypeInt::CC_EQ;            // Equal results
    } else if (lo0 >= hi1) {
      return TypeInt::CC_GE;
    } else if (hi0 <= lo1) {
      // Check for special case in Hashtable::get.  (See below.)
      if ((jint)lo0 >= 0 && (jint)lo1 >= 0 && is_index_range_check())
        return TypeInt::CC_LT;
      return TypeInt::CC_LE;
    }
  }
  // Check for special case in Hashtable::get - the hash index is
  // mod'ed to the table size so the following range check is useless.
  // Check for: (X Mod Y) CmpU Y, where the mod result and Y both have
  // to be positive.
  // (This is a gross hack, since the sub method never
  // looks at the structure of the node in any other case.)
  if ((jint)lo0 >= 0 && (jint)lo1 >= 0 && is_index_range_check())
    return TypeInt::CC_LT;
  return TypeInt::CC;                   // else use worst case results
}

const Type* CmpUNode::Value(PhaseTransform *phase) const {
  const Type* t = SubNode::Value_common(phase);
  if (t != NULL) {
    return t;
  }
  const Node* in1 = in(1);
  const Node* in2 = in(2);
  const Type* t1 = phase->type(in1);
  const Type* t2 = phase->type(in2);
  assert(t1->isa_int(), "CmpU has only Int type inputs");
  if (t2 == TypeInt::INT) { // Compare to bottom?
    return bottom_type();
  }
  uint in1_op = in1->Opcode();
  if (in1_op == Op_AddI || in1_op == Op_SubI) {
    // The problem rise when result of AddI(SubI) may overflow
    // signed integer value. Let say the input type is
    // [256, maxint] then +128 will create 2 ranges due to
    // overflow: [minint, minint+127] and [384, maxint].
    // But C2 type system keep only 1 type range and as result
    // it use general [minint, maxint] for this case which we
    // can't optimize.
    //
    // Make 2 separate type ranges based on types of AddI(SubI) inputs
    // and compare results of their compare. If results are the same
    // CmpU node can be optimized.
    const Node* in11 = in1->in(1);
    const Node* in12 = in1->in(2);
    const Type* t11 = (in11 == in1) ? Type::TOP : phase->type(in11);
    const Type* t12 = (in12 == in1) ? Type::TOP : phase->type(in12);
    // Skip cases when input types are top or bottom.
    if ((t11 != Type::TOP) && (t11 != TypeInt::INT) &&
        (t12 != Type::TOP) && (t12 != TypeInt::INT)) {
      const TypeInt *r0 = t11->is_int();
      const TypeInt *r1 = t12->is_int();
      jlong lo_r0 = r0->_lo;
      jlong hi_r0 = r0->_hi;
      jlong lo_r1 = r1->_lo;
      jlong hi_r1 = r1->_hi;
      if (in1_op == Op_SubI) {
        jlong tmp = hi_r1;
        hi_r1 = -lo_r1;
        lo_r1 = -tmp;
        // Note, for substructing [minint,x] type range
        // long arithmetic provides correct overflow answer.
        // The confusion come from the fact that in 32-bit
        // -minint == minint but in 64-bit -minint == maxint+1.
      }
      jlong lo_long = lo_r0 + lo_r1;
      jlong hi_long = hi_r0 + hi_r1;
      int lo_tr1 = min_jint;
      int hi_tr1 = (int)hi_long;
      int lo_tr2 = (int)lo_long;
      int hi_tr2 = max_jint;
      bool underflow = lo_long != (jlong)lo_tr2;
      bool overflow  = hi_long != (jlong)hi_tr1;
      // Use sub(t1, t2) when there is no overflow (one type range)
      // or when both overflow and underflow (too complex).
      if ((underflow != overflow) && (hi_tr1 < lo_tr2)) {
        // Overflow only on one boundary, compare 2 separate type ranges.
        int w = MAX2(r0->_widen, r1->_widen); // _widen does not matter here
        const TypeInt* tr1 = TypeInt::make(lo_tr1, hi_tr1, w);
        const TypeInt* tr2 = TypeInt::make(lo_tr2, hi_tr2, w);
        const Type* cmp1 = sub(tr1, t2);
        const Type* cmp2 = sub(tr2, t2);
        if (cmp1 == cmp2) {
          return cmp1; // Hit!
        }
      }
    }
  }

  return sub(t1, t2);            // Local flavor of type subtraction
}

bool CmpUNode::is_index_range_check() const {
  // Check for the "(X ModI Y) CmpU Y" shape
  return (in(1)->Opcode() == Op_ModI &&
          in(1)->in(2)->eqv_uncast(in(2)));
}

//------------------------------Idealize---------------------------------------
Node *CmpINode::Ideal( PhaseGVN *phase, bool can_reshape ) {
  if (phase->type(in(2))->higher_equal(TypeInt::ZERO)) {
    switch (in(1)->Opcode()) {
    case Op_CmpL3:              // Collapse a CmpL3/CmpI into a CmpL
      return new (phase->C) CmpLNode(in(1)->in(1),in(1)->in(2));
    case Op_CmpF3:              // Collapse a CmpF3/CmpI into a CmpF
      return new (phase->C) CmpFNode(in(1)->in(1),in(1)->in(2));
    case Op_CmpD3:              // Collapse a CmpD3/CmpI into a CmpD
      return new (phase->C) CmpDNode(in(1)->in(1),in(1)->in(2));
    //case Op_SubI:
      // If (x - y) cannot overflow, then ((x - y) <?> 0)
      // can be turned into (x <?> y).
      // This is handled (with more general cases) by Ideal_sub_algebra.
    }
  }
  return NULL;                  // No change
}


//=============================================================================
// Simplify a CmpL (compare 2 longs ) node, based on local information.
// If both inputs are constants, compare them.
const Type *CmpLNode::sub( const Type *t1, const Type *t2 ) const {
  const TypeLong *r0 = t1->is_long(); // Handy access
  const TypeLong *r1 = t2->is_long();

  if( r0->_hi < r1->_lo )       // Range is always low?
    return TypeInt::CC_LT;
  else if( r0->_lo > r1->_hi )  // Range is always high?
    return TypeInt::CC_GT;

  else if( r0->is_con() && r1->is_con() ) { // comparing constants?
    assert(r0->get_con() == r1->get_con(), "must be equal");
    return TypeInt::CC_EQ;      // Equal results.
  } else if( r0->_hi == r1->_lo ) // Range is never high?
    return TypeInt::CC_LE;
  else if( r0->_lo == r1->_hi ) // Range is never low?
    return TypeInt::CC_GE;
  return TypeInt::CC;           // else use worst case results
}


// Simplify a CmpUL (compare 2 unsigned longs) node, based on local information.
// If both inputs are constants, compare them.
const Type* CmpULNode::sub(const Type* t1, const Type* t2) const {
  assert(!t1->isa_ptr(), "obsolete usage of CmpUL");

  // comparing two unsigned longs
  const TypeLong* r0 = t1->is_long();   // Handy access
  const TypeLong* r1 = t2->is_long();

  // Current installed version
  // Compare ranges for non-overlap
  julong lo0 = r0->_lo;
  julong hi0 = r0->_hi;
  julong lo1 = r1->_lo;
  julong hi1 = r1->_hi;

  // If either one has both negative and positive values,
  // it therefore contains both 0 and -1, and since [0..-1] is the
  // full unsigned range, the type must act as an unsigned bottom.
  bool bot0 = ((jlong)(lo0 ^ hi0) < 0);
  bool bot1 = ((jlong)(lo1 ^ hi1) < 0);

  if (bot0 || bot1) {
    // All unsigned values are LE -1 and GE 0.
    if (lo0 == 0 && hi0 == 0) {
      return TypeInt::CC_LE;            //   0 <= bot
    } else if ((jlong)lo0 == -1 && (jlong)hi0 == -1) {
      return TypeInt::CC_GE;            // -1 >= bot
    } else if (lo1 == 0 && hi1 == 0) {
      return TypeInt::CC_GE;            // bot >= 0
    } else if ((jlong)lo1 == -1 && (jlong)hi1 == -1) {
      return TypeInt::CC_LE;            // bot <= -1
    }
  } else {
    // We can use ranges of the form [lo..hi] if signs are the same.
    assert(lo0 <= hi0 && lo1 <= hi1, "unsigned ranges are valid");
    // results are reversed, '-' > '+' for unsigned compare
    if (hi0 < lo1) {
      return TypeInt::CC_LT;            // smaller
    } else if (lo0 > hi1) {
      return TypeInt::CC_GT;            // greater
    } else if (hi0 == lo1 && lo0 == hi1) {
      return TypeInt::CC_EQ;            // Equal results
    } else if (lo0 >= hi1) {
      return TypeInt::CC_GE;
    } else if (hi0 <= lo1) {
      return TypeInt::CC_LE;
    }
  }

  return TypeInt::CC;                   // else use worst case results
}

//=============================================================================
//------------------------------sub--------------------------------------------
// Simplify an CmpP (compare 2 pointers) node, based on local information.
// If both inputs are constants, compare them.
const Type *CmpPNode::sub( const Type *t1, const Type *t2 ) const {
  const TypePtr *r0 = t1->is_ptr(); // Handy access
  const TypePtr *r1 = t2->is_ptr();

  // Undefined inputs makes for an undefined result
  if( TypePtr::above_centerline(r0->_ptr) ||
      TypePtr::above_centerline(r1->_ptr) )
    return Type::TOP;

  if (r0 == r1 && r0->singleton()) {
    // Equal pointer constants (klasses, nulls, etc.)
    return TypeInt::CC_EQ;
  }

  // See if it is 2 unrelated classes.
  const TypeOopPtr* p0 = r0->isa_oopptr();
  const TypeOopPtr* p1 = r1->isa_oopptr();
  if (p0 && p1) {
    Node* in1 = in(1)->uncast();
    Node* in2 = in(2)->uncast();
    AllocateNode* alloc1 = AllocateNode::Ideal_allocation(in1, NULL);
    AllocateNode* alloc2 = AllocateNode::Ideal_allocation(in2, NULL);
    if (MemNode::detect_ptr_independence(in1, alloc1, in2, alloc2, NULL)) {
      return TypeInt::CC_GT;  // different pointers
    }
    ciKlass* klass0 = p0->klass();
    bool    xklass0 = p0->klass_is_exact();
    ciKlass* klass1 = p1->klass();
    bool    xklass1 = p1->klass_is_exact();
    int kps = (p0->isa_klassptr()?1:0) + (p1->isa_klassptr()?1:0);
    if (klass0 && klass1 &&
        kps != 1 &&             // both or neither are klass pointers
        klass0->is_loaded() && !klass0->is_interface() && // do not trust interfaces
        klass1->is_loaded() && !klass1->is_interface() &&
        (!klass0->is_obj_array_klass() ||
         !klass0->as_obj_array_klass()->base_element_klass()->is_interface()) &&
        (!klass1->is_obj_array_klass() ||
         !klass1->as_obj_array_klass()->base_element_klass()->is_interface())) {
      bool unrelated_classes = false;
      // See if neither subclasses the other, or if the class on top
      // is precise.  In either of these cases, the compare is known
      // to fail if at least one of the pointers is provably not null.
      if (klass0->equals(klass1)) {  // if types are unequal but klasses are equal
        // Do nothing; we know nothing for imprecise types
      } else if (klass0->is_subtype_of(klass1)) {
        // If klass1's type is PRECISE, then classes are unrelated.
        unrelated_classes = xklass1;
      } else if (klass1->is_subtype_of(klass0)) {
        // If klass0's type is PRECISE, then classes are unrelated.
        unrelated_classes = xklass0;
      } else {                  // Neither subtypes the other
        unrelated_classes = true;
      }
      if (unrelated_classes) {
        // The oops classes are known to be unrelated. If the joined PTRs of
        // two oops is not Null and not Bottom, then we are sure that one
        // of the two oops is non-null, and the comparison will always fail.
        TypePtr::PTR jp = r0->join_ptr(r1->_ptr);
        if (jp != TypePtr::Null && jp != TypePtr::BotPTR) {
          return TypeInt::CC_GT;
        }
      }
    }
  }

  // Known constants can be compared exactly
  // Null can be distinguished from any NotNull pointers
  // Unknown inputs makes an unknown result
  if( r0->singleton() ) {
    intptr_t bits0 = r0->get_con();
    if( r1->singleton() )
      return bits0 == r1->get_con() ? TypeInt::CC_EQ : TypeInt::CC_GT;
    return ( r1->_ptr == TypePtr::NotNull && bits0==0 ) ? TypeInt::CC_GT : TypeInt::CC;
  } else if( r1->singleton() ) {
    intptr_t bits1 = r1->get_con();
    return ( r0->_ptr == TypePtr::NotNull && bits1==0 ) ? TypeInt::CC_GT : TypeInt::CC;
  } else
    return TypeInt::CC;
}

static inline Node* isa_java_mirror_load(PhaseGVN* phase, Node* n) {
  // Return the klass node for
  //   LoadP(AddP(foo:Klass, #java_mirror))
  //   or NULL if not matching.
  if (n->Opcode() != Op_LoadP) return NULL;

  const TypeInstPtr* tp = phase->type(n)->isa_instptr();
  if (!tp || tp->klass() != phase->C->env()->Class_klass()) return NULL;

  Node* adr = n->in(MemNode::Address);
  intptr_t off = 0;
  Node* k = AddPNode::Ideal_base_and_offset(adr, phase, off);
  if (k == NULL)  return NULL;
  const TypeKlassPtr* tkp = phase->type(k)->isa_klassptr();
  if (!tkp || off != in_bytes(Klass::java_mirror_offset())) return NULL;

  // We've found the klass node of a Java mirror load.
  return k;
}

static inline Node* isa_const_java_mirror(PhaseGVN* phase, Node* n) {
  // for ConP(Foo.class) return ConP(Foo.klass)
  // otherwise return NULL
  if (!n->is_Con()) return NULL;

  const TypeInstPtr* tp = phase->type(n)->isa_instptr();
  if (!tp) return NULL;

  ciType* mirror_type = tp->java_mirror_type();
  // TypeInstPtr::java_mirror_type() returns non-NULL for compile-
  // time Class constants only.
  if (!mirror_type) return NULL;

  // x.getClass() == int.class can never be true (for all primitive types)
  // Return a ConP(NULL) node for this case.
  if (mirror_type->is_classless()) {
    return phase->makecon(TypePtr::NULL_PTR);
  }

  // return the ConP(Foo.klass)
  assert(mirror_type->is_klass(), "mirror_type should represent a Klass*");
  return phase->makecon(TypeKlassPtr::make(mirror_type->as_klass()));
}

//------------------------------Ideal------------------------------------------
// Normalize comparisons between Java mirror loads to compare the klass instead.
//
// Also check for the case of comparing an unknown klass loaded from the primary
// super-type array vs a known klass with no subtypes.  This amounts to
// checking to see an unknown klass subtypes a known klass with no subtypes;
// this only happens on an exact match.  We can shorten this test by 1 load.
Node *CmpPNode::Ideal( PhaseGVN *phase, bool can_reshape ) {
  // Normalize comparisons between Java mirrors into comparisons of the low-
  // level klass, where a dependent load could be shortened.
  //
  // The new pattern has a nice effect of matching the same pattern used in the
  // fast path of instanceof/checkcast/Class.isInstance(), which allows
  // redundant exact type check be optimized away by GVN.
  // For example, in
  //   if (x.getClass() == Foo.class) {
  //     Foo foo = (Foo) x;
  //     // ... use a ...
  //   }
  // a CmpPNode could be shared between if_acmpne and checkcast
  {
    Node* k1 = isa_java_mirror_load(phase, in(1));
    Node* k2 = isa_java_mirror_load(phase, in(2));
    Node* conk2 = isa_const_java_mirror(phase, in(2));

    if (k1 && (k2 || conk2)) {
      Node* lhs = k1;
      Node* rhs = (k2 != NULL) ? k2 : conk2;
      this->set_req(1, lhs);
      this->set_req(2, rhs);
      return this;
    }
  }

  // Constant pointer on right?
  const TypeKlassPtr* t2 = phase->type(in(2))->isa_klassptr();
  if (t2 == NULL || !t2->klass_is_exact())
    return NULL;
  // Get the constant klass we are comparing to.
  ciKlass* superklass = t2->klass();

  // Now check for LoadKlass on left.
  Node* ldk1 = in(1);
  if (ldk1->is_DecodeNKlass()) {
    ldk1 = ldk1->in(1);
    if (ldk1->Opcode() != Op_LoadNKlass )
      return NULL;
  } else if (ldk1->Opcode() != Op_LoadKlass )
    return NULL;
  // Take apart the address of the LoadKlass:
  Node* adr1 = ldk1->in(MemNode::Address);
  intptr_t con2 = 0;
  Node* ldk2 = AddPNode::Ideal_base_and_offset(adr1, phase, con2);
  if (ldk2 == NULL)
    return NULL;
  if (con2 == oopDesc::klass_offset_in_bytes()) {
    // We are inspecting an object's concrete class.
    // Short-circuit the check if the query is abstract.
    if (superklass->is_interface() ||
        superklass->is_abstract()) {
      // Make it come out always false:
      this->set_req(2, phase->makecon(TypePtr::NULL_PTR));
      return this;
    }
  }

  // Check for a LoadKlass from primary supertype array.
  // Any nested loadklass from loadklass+con must be from the p.s. array.
  if (ldk2->is_DecodeNKlass()) {
    // Keep ldk2 as DecodeN since it could be used in CmpP below.
    if (ldk2->in(1)->Opcode() != Op_LoadNKlass )
      return NULL;
  } else if (ldk2->Opcode() != Op_LoadKlass)
    return NULL;

  // Verify that we understand the situation
  if (con2 != (intptr_t) superklass->super_check_offset())
    return NULL;                // Might be element-klass loading from array klass

  // If 'superklass' has no subklasses and is not an interface, then we are
  // assured that the only input which will pass the type check is
  // 'superklass' itself.
  //
  // We could be more liberal here, and allow the optimization on interfaces
  // which have a single implementor.  This would require us to increase the
  // expressiveness of the add_dependency() mechanism.
  // %%% Do this after we fix TypeOopPtr:  Deps are expressive enough now.

  // Object arrays must have their base element have no subtypes
  while (superklass->is_obj_array_klass()) {
    ciType* elem = superklass->as_obj_array_klass()->element_type();
    superklass = elem->as_klass();
  }
  if (superklass->is_instance_klass()) {
    ciInstanceKlass* ik = superklass->as_instance_klass();
    if (ik->has_subklass() || ik->is_interface())  return NULL;
    // Add a dependency if there is a chance that a subclass will be added later.
    if (!ik->is_final()) {
      phase->C->dependencies()->assert_leaf_type(ik);
    }
  }

  // Bypass the dependent load, and compare directly
  this->set_req(1,ldk2);

  return this;
}

//=============================================================================
//------------------------------sub--------------------------------------------
// Simplify an CmpN (compare 2 pointers) node, based on local information.
// If both inputs are constants, compare them.
const Type *CmpNNode::sub( const Type *t1, const Type *t2 ) const {
  const TypePtr *r0 = t1->make_ptr(); // Handy access
  const TypePtr *r1 = t2->make_ptr();

  // Undefined inputs makes for an undefined result
  if ((r0 == NULL) || (r1 == NULL) ||
      TypePtr::above_centerline(r0->_ptr) ||
      TypePtr::above_centerline(r1->_ptr)) {
    return Type::TOP;
  }
  if (r0 == r1 && r0->singleton()) {
    // Equal pointer constants (klasses, nulls, etc.)
    return TypeInt::CC_EQ;
  }

  // See if it is 2 unrelated classes.
  const TypeOopPtr* p0 = r0->isa_oopptr();
  const TypeOopPtr* p1 = r1->isa_oopptr();
  if (p0 && p1) {
    ciKlass* klass0 = p0->klass();
    bool    xklass0 = p0->klass_is_exact();
    ciKlass* klass1 = p1->klass();
    bool    xklass1 = p1->klass_is_exact();
    int kps = (p0->isa_klassptr()?1:0) + (p1->isa_klassptr()?1:0);
    if (klass0 && klass1 &&
        kps != 1 &&             // both or neither are klass pointers
        !klass0->is_interface() && // do not trust interfaces
        !klass1->is_interface()) {
      bool unrelated_classes = false;
      // See if neither subclasses the other, or if the class on top
      // is precise.  In either of these cases, the compare is known
      // to fail if at least one of the pointers is provably not null.
      if (klass0->equals(klass1)) { // if types are unequal but klasses are equal
        // Do nothing; we know nothing for imprecise types
      } else if (klass0->is_subtype_of(klass1)) {
        // If klass1's type is PRECISE, then classes are unrelated.
        unrelated_classes = xklass1;
      } else if (klass1->is_subtype_of(klass0)) {
        // If klass0's type is PRECISE, then classes are unrelated.
        unrelated_classes = xklass0;
      } else {                  // Neither subtypes the other
        unrelated_classes = true;
      }
      if (unrelated_classes) {
        // The oops classes are known to be unrelated. If the joined PTRs of
        // two oops is not Null and not Bottom, then we are sure that one
        // of the two oops is non-null, and the comparison will always fail.
        TypePtr::PTR jp = r0->join_ptr(r1->_ptr);
        if (jp != TypePtr::Null && jp != TypePtr::BotPTR) {
          return TypeInt::CC_GT;
        }
      }
    }
  }

  // Known constants can be compared exactly
  // Null can be distinguished from any NotNull pointers
  // Unknown inputs makes an unknown result
  if( r0->singleton() ) {
    intptr_t bits0 = r0->get_con();
    if( r1->singleton() )
      return bits0 == r1->get_con() ? TypeInt::CC_EQ : TypeInt::CC_GT;
    return ( r1->_ptr == TypePtr::NotNull && bits0==0 ) ? TypeInt::CC_GT : TypeInt::CC;
  } else if( r1->singleton() ) {
    intptr_t bits1 = r1->get_con();
    return ( r0->_ptr == TypePtr::NotNull && bits1==0 ) ? TypeInt::CC_GT : TypeInt::CC;
  } else
    return TypeInt::CC;
}

//------------------------------Ideal------------------------------------------
Node *CmpNNode::Ideal( PhaseGVN *phase, bool can_reshape ) {
  return NULL;
}

//=============================================================================
//------------------------------Value------------------------------------------
// Simplify an CmpF (compare 2 floats ) node, based on local information.
// If both inputs are constants, compare them.
const Type *CmpFNode::Value( PhaseTransform *phase ) const {
  const Node* in1 = in(1);
  const Node* in2 = in(2);
  // Either input is TOP ==> the result is TOP
  const Type* t1 = (in1 == this) ? Type::TOP : phase->type(in1);
  if( t1 == Type::TOP ) return Type::TOP;
  const Type* t2 = (in2 == this) ? Type::TOP : phase->type(in2);
  if( t2 == Type::TOP ) return Type::TOP;

  // Not constants?  Don't know squat - even if they are the same
  // value!  If they are NaN's they compare to LT instead of EQ.
  const TypeF *tf1 = t1->isa_float_constant();
  const TypeF *tf2 = t2->isa_float_constant();
  if( !tf1 || !tf2 ) return TypeInt::CC;

  // This implements the Java bytecode fcmpl, so unordered returns -1.
  if( tf1->is_nan() || tf2->is_nan() )
    return TypeInt::CC_LT;

  if( tf1->_f < tf2->_f ) return TypeInt::CC_LT;
  if( tf1->_f > tf2->_f ) return TypeInt::CC_GT;
  assert( tf1->_f == tf2->_f, "do not understand FP behavior" );
  return TypeInt::CC_EQ;
}


//=============================================================================
//------------------------------Value------------------------------------------
// Simplify an CmpD (compare 2 doubles ) node, based on local information.
// If both inputs are constants, compare them.
const Type *CmpDNode::Value( PhaseTransform *phase ) const {
  const Node* in1 = in(1);
  const Node* in2 = in(2);
  // Either input is TOP ==> the result is TOP
  const Type* t1 = (in1 == this) ? Type::TOP : phase->type(in1);
  if( t1 == Type::TOP ) return Type::TOP;
  const Type* t2 = (in2 == this) ? Type::TOP : phase->type(in2);
  if( t2 == Type::TOP ) return Type::TOP;

  // Not constants?  Don't know squat - even if they are the same
  // value!  If they are NaN's they compare to LT instead of EQ.
  const TypeD *td1 = t1->isa_double_constant();
  const TypeD *td2 = t2->isa_double_constant();
  if( !td1 || !td2 ) return TypeInt::CC;

  // This implements the Java bytecode dcmpl, so unordered returns -1.
  if( td1->is_nan() || td2->is_nan() )
    return TypeInt::CC_LT;

  if( td1->_d < td2->_d ) return TypeInt::CC_LT;
  if( td1->_d > td2->_d ) return TypeInt::CC_GT;
  assert( td1->_d == td2->_d, "do not understand FP behavior" );
  return TypeInt::CC_EQ;
}

//------------------------------Ideal------------------------------------------
Node *CmpDNode::Ideal(PhaseGVN *phase, bool can_reshape){
  // Check if we can change this to a CmpF and remove a ConvD2F operation.
  // Change  (CMPD (F2D (float)) (ConD value))
  // To      (CMPF      (float)  (ConF value))
  // Valid when 'value' does not lose precision as a float.
  // Benefits: eliminates conversion, does not require 24-bit mode

  // NaNs prevent commuting operands.  This transform works regardless of the
  // order of ConD and ConvF2D inputs by preserving the original order.
  int idx_f2d = 1;              // ConvF2D on left side?
  if( in(idx_f2d)->Opcode() != Op_ConvF2D )
    idx_f2d = 2;                // No, swap to check for reversed args
  int idx_con = 3-idx_f2d;      // Check for the constant on other input

  if( ConvertCmpD2CmpF &&
      in(idx_f2d)->Opcode() == Op_ConvF2D &&
      in(idx_con)->Opcode() == Op_ConD ) {
    const TypeD *t2 = in(idx_con)->bottom_type()->is_double_constant();
    double t2_value_as_double = t2->_d;
    float  t2_value_as_float  = (float)t2_value_as_double;
    if( t2_value_as_double == (double)t2_value_as_float ) {
      // Test value can be represented as a float
      // Eliminate the conversion to double and create new comparison
      Node *new_in1 = in(idx_f2d)->in(1);
      Node *new_in2 = phase->makecon( TypeF::make(t2_value_as_float) );
      if( idx_f2d != 1 ) {      // Must flip args to match original order
        Node *tmp = new_in1;
        new_in1 = new_in2;
        new_in2 = tmp;
      }
      CmpFNode *new_cmp = (Opcode() == Op_CmpD3)
        ? new (phase->C) CmpF3Node( new_in1, new_in2 )
        : new (phase->C) CmpFNode ( new_in1, new_in2 ) ;
      return new_cmp;           // Changed to CmpFNode
    }
    // Testing value required the precision of a double
  }
  return NULL;                  // No change
}


//=============================================================================
//------------------------------cc2logical-------------------------------------
// Convert a condition code type to a logical type
const Type *BoolTest::cc2logical( const Type *CC ) const {
  if( CC == Type::TOP ) return Type::TOP;
  if( CC->base() != Type::Int ) return TypeInt::BOOL; // Bottom or worse
  const TypeInt *ti = CC->is_int();
  if( ti->is_con() ) {          // Only 1 kind of condition codes set?
    // Match low order 2 bits
    int tmp = ((ti->get_con()&3) == (_test&3)) ? 1 : 0;
    if( _test & 4 ) tmp = 1-tmp;     // Optionally complement result
    return TypeInt::make(tmp);       // Boolean result
  }

  if( CC == TypeInt::CC_GE ) {
    if( _test == ge ) return TypeInt::ONE;
    if( _test == lt ) return TypeInt::ZERO;
  }
  if( CC == TypeInt::CC_LE ) {
    if( _test == le ) return TypeInt::ONE;
    if( _test == gt ) return TypeInt::ZERO;
  }

  return TypeInt::BOOL;
}

//------------------------------dump_spec-------------------------------------
// Print special per-node info
void BoolTest::dump_on(outputStream *st) const {
  const char *msg[] = {"eq","gt","of","lt","ne","le","nof","ge"};
  st->print("%s", msg[_test]);
}

//=============================================================================
uint BoolNode::hash() const { return (Node::hash() << 3)|(_test._test+1); }
uint BoolNode::size_of() const { return sizeof(BoolNode); }

//------------------------------operator==-------------------------------------
uint BoolNode::cmp( const Node &n ) const {
  const BoolNode *b = (const BoolNode *)&n; // Cast up
  return (_test._test == b->_test._test);
}

//-------------------------------make_predicate--------------------------------
Node* BoolNode::make_predicate(Node* test_value, PhaseGVN* phase) {
  if (test_value->is_Con())   return test_value;
  if (test_value->is_Bool())  return test_value;
  Compile* C = phase->C;
  if (test_value->is_CMove() &&
      test_value->in(CMoveNode::Condition)->is_Bool()) {
    BoolNode*   bol   = test_value->in(CMoveNode::Condition)->as_Bool();
    const Type* ftype = phase->type(test_value->in(CMoveNode::IfFalse));
    const Type* ttype = phase->type(test_value->in(CMoveNode::IfTrue));
    if (ftype == TypeInt::ZERO && !TypeInt::ZERO->higher_equal(ttype)) {
      return bol;
    } else if (ttype == TypeInt::ZERO && !TypeInt::ZERO->higher_equal(ftype)) {
      return phase->transform( bol->negate(phase) );
    }
    // Else fall through.  The CMove gets in the way of the test.
    // It should be the case that make_predicate(bol->as_int_value()) == bol.
  }
  Node* cmp = new (C) CmpINode(test_value, phase->intcon(0));
  cmp = phase->transform(cmp);
  Node* bol = new (C) BoolNode(cmp, BoolTest::ne);
  return phase->transform(bol);
}

//--------------------------------as_int_value---------------------------------
Node* BoolNode::as_int_value(PhaseGVN* phase) {
  // Inverse to make_predicate.  The CMove probably boils down to a Conv2B.
  Node* cmov = CMoveNode::make(phase->C, NULL, this,
                               phase->intcon(0), phase->intcon(1),
                               TypeInt::BOOL);
  return phase->transform(cmov);
}

//----------------------------------negate-------------------------------------
BoolNode* BoolNode::negate(PhaseGVN* phase) {
  Compile* C = phase->C;
  return new (C) BoolNode(in(1), _test.negate());
}


//------------------------------Ideal------------------------------------------
Node *BoolNode::Ideal(PhaseGVN *phase, bool can_reshape) {
  // Change "bool tst (cmp con x)" into "bool ~tst (cmp x con)".
  // This moves the constant to the right.  Helps value-numbering.
  Node *cmp = in(1);
  if( !cmp->is_Sub() ) return NULL;
  int cop = cmp->Opcode();
  if( cop == Op_FastLock || cop == Op_FastUnlock) return NULL;
  Node *cmp1 = cmp->in(1);
  Node *cmp2 = cmp->in(2);
  if( !cmp1 ) return NULL;

  if (_test._test == BoolTest::overflow || _test._test == BoolTest::no_overflow) {
    return NULL;
  }

  // Constant on left?
  Node *con = cmp1;
  uint op2 = cmp2->Opcode();
  // Move constants to the right of compare's to canonicalize.
  // Do not muck with Opaque1 nodes, as this indicates a loop
  // guard that cannot change shape.
  if( con->is_Con() && !cmp2->is_Con() && op2 != Op_Opaque1 &&
      // Because of NaN's, CmpD and CmpF are not commutative
      cop != Op_CmpD && cop != Op_CmpF &&
      // Protect against swapping inputs to a compare when it is used by a
      // counted loop exit, which requires maintaining the loop-limit as in(2)
      !is_counted_loop_exit_test() ) {
    // Ok, commute the constant to the right of the cmp node.
    // Clone the Node, getting a new Node of the same class
    cmp = cmp->clone();
    // Swap inputs to the clone
    cmp->swap_edges(1, 2);
    cmp = phase->transform( cmp );
    return new (phase->C) BoolNode( cmp, _test.commute() );
  }

  // Change "bool eq/ne (cmp (xor X 1) 0)" into "bool ne/eq (cmp X 0)".
  // The XOR-1 is an idiom used to flip the sense of a bool.  We flip the
  // test instead.
  int cmp1_op = cmp1->Opcode();
  const TypeInt* cmp2_type = phase->type(cmp2)->isa_int();
  if (cmp2_type == NULL)  return NULL;
  Node* j_xor = cmp1;
  if( cmp2_type == TypeInt::ZERO &&
      cmp1_op == Op_XorI &&
      j_xor->in(1) != j_xor &&          // An xor of itself is dead
      phase->type( j_xor->in(1) ) == TypeInt::BOOL &&
      phase->type( j_xor->in(2) ) == TypeInt::ONE &&
      (_test._test == BoolTest::eq ||
       _test._test == BoolTest::ne) ) {
    Node *ncmp = phase->transform(new (phase->C) CmpINode(j_xor->in(1),cmp2));
    return new (phase->C) BoolNode( ncmp, _test.negate() );
  }

  // Change "bool eq/ne (cmp (Conv2B X) 0)" into "bool eq/ne (cmp X 0)".
  // This is a standard idiom for branching on a boolean value.
  Node *c2b = cmp1;
  if( cmp2_type == TypeInt::ZERO &&
      cmp1_op == Op_Conv2B &&
      (_test._test == BoolTest::eq ||
       _test._test == BoolTest::ne) ) {
    Node *ncmp = phase->transform(phase->type(c2b->in(1))->isa_int()
       ? (Node*)new (phase->C) CmpINode(c2b->in(1),cmp2)
       : (Node*)new (phase->C) CmpPNode(c2b->in(1),phase->makecon(TypePtr::NULL_PTR))
    );
    return new (phase->C) BoolNode( ncmp, _test._test );
  }

  // Comparing a SubI against a zero is equal to comparing the SubI
  // arguments directly.  This only works for eq and ne comparisons
  // due to possible integer overflow.
  if ((_test._test == BoolTest::eq || _test._test == BoolTest::ne) &&
        (cop == Op_CmpI) &&
        (cmp1->Opcode() == Op_SubI) &&
        ( cmp2_type == TypeInt::ZERO ) ) {
    Node *ncmp = phase->transform( new (phase->C) CmpINode(cmp1->in(1),cmp1->in(2)));
    return new (phase->C) BoolNode( ncmp, _test._test );
  }

  // Change (-A vs 0) into (A vs 0) by commuting the test.  Disallow in the
  // most general case because negating 0x80000000 does nothing.  Needed for
  // the CmpF3/SubI/CmpI idiom.
  if( cop == Op_CmpI &&
      cmp1->Opcode() == Op_SubI &&
      cmp2_type == TypeInt::ZERO &&
      phase->type( cmp1->in(1) ) == TypeInt::ZERO &&
      phase->type( cmp1->in(2) )->higher_equal(TypeInt::SYMINT) ) {
    Node *ncmp = phase->transform( new (phase->C) CmpINode(cmp1->in(2),cmp2));
    return new (phase->C) BoolNode( ncmp, _test.commute() );
  }

  //  The transformation below is not valid for either signed or unsigned
  //  comparisons due to wraparound concerns at MAX_VALUE and MIN_VALUE.
  //  This transformation can be resurrected when we are able to
  //  make inferences about the range of values being subtracted from
  //  (or added to) relative to the wraparound point.
  //
  //    // Remove +/-1's if possible.
  //    // "X <= Y-1" becomes "X <  Y"
  //    // "X+1 <= Y" becomes "X <  Y"
  //    // "X <  Y+1" becomes "X <= Y"
  //    // "X-1 <  Y" becomes "X <= Y"
  //    // Do not this to compares off of the counted-loop-end.  These guys are
  //    // checking the trip counter and they want to use the post-incremented
  //    // counter.  If they use the PRE-incremented counter, then the counter has
  //    // to be incremented in a private block on a loop backedge.
  //    if( du && du->cnt(this) && du->out(this)[0]->Opcode() == Op_CountedLoopEnd )
  //      return NULL;
  //  #ifndef PRODUCT
  //    // Do not do this in a wash GVN pass during verification.
  //    // Gets triggered by too many simple optimizations to be bothered with
  //    // re-trying it again and again.
  //    if( !phase->allow_progress() ) return NULL;
  //  #endif
  //    // Not valid for unsigned compare because of corner cases in involving zero.
  //    // For example, replacing "X-1 <u Y" with "X <=u Y" fails to throw an
  //    // exception in case X is 0 (because 0-1 turns into 4billion unsigned but
  //    // "0 <=u Y" is always true).
  //    if( cmp->Opcode() == Op_CmpU ) return NULL;
  //    int cmp2_op = cmp2->Opcode();
  //    if( _test._test == BoolTest::le ) {
  //      if( cmp1_op == Op_AddI &&
  //          phase->type( cmp1->in(2) ) == TypeInt::ONE )
  //        return clone_cmp( cmp, cmp1->in(1), cmp2, phase, BoolTest::lt );
  //      else if( cmp2_op == Op_AddI &&
  //         phase->type( cmp2->in(2) ) == TypeInt::MINUS_1 )
  //        return clone_cmp( cmp, cmp1, cmp2->in(1), phase, BoolTest::lt );
  //    } else if( _test._test == BoolTest::lt ) {
  //      if( cmp1_op == Op_AddI &&
  //          phase->type( cmp1->in(2) ) == TypeInt::MINUS_1 )
  //        return clone_cmp( cmp, cmp1->in(1), cmp2, phase, BoolTest::le );
  //      else if( cmp2_op == Op_AddI &&
  //         phase->type( cmp2->in(2) ) == TypeInt::ONE )
  //        return clone_cmp( cmp, cmp1, cmp2->in(1), phase, BoolTest::le );
  //    }

  return NULL;
}

//------------------------------Value------------------------------------------
// Simplify a Bool (convert condition codes to boolean (1 or 0)) node,
// based on local information.   If the input is constant, do it.
const Type *BoolNode::Value( PhaseTransform *phase ) const {
  return _test.cc2logical( phase->type( in(1) ) );
}

//------------------------------dump_spec--------------------------------------
// Dump special per-node info
#ifndef PRODUCT
void BoolNode::dump_spec(outputStream *st) const {
  st->print("[");
  _test.dump_on(st);
  st->print("]");
}
#endif

//------------------------------is_counted_loop_exit_test--------------------------------------
// Returns true if node is used by a counted loop node.
bool BoolNode::is_counted_loop_exit_test() {
  for( DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++ ) {
    Node* use = fast_out(i);
    if (use->is_CountedLoopEnd()) {
      return true;
    }
  }
  return false;
}

//=============================================================================
//------------------------------Value------------------------------------------
// Compute sqrt
const Type *SqrtDNode::Value( PhaseTransform *phase ) const {
  const Type *t1 = phase->type( in(1) );
  if( t1 == Type::TOP ) return Type::TOP;
  if( t1->base() != Type::DoubleCon ) return Type::DOUBLE;
  double d = t1->getd();
  if( d < 0.0 ) return Type::DOUBLE;
  return TypeD::make( sqrt( d ) );
}

//=============================================================================
//------------------------------Value------------------------------------------
// Compute cos
const Type *CosDNode::Value( PhaseTransform *phase ) const {
  const Type *t1 = phase->type( in(1) );
  if( t1 == Type::TOP ) return Type::TOP;
  if( t1->base() != Type::DoubleCon ) return Type::DOUBLE;
  double d = t1->getd();
  return TypeD::make( StubRoutines::intrinsic_cos( d ) );
}

//=============================================================================
//------------------------------Value------------------------------------------
// Compute sin
const Type *SinDNode::Value( PhaseTransform *phase ) const {
  const Type *t1 = phase->type( in(1) );
  if( t1 == Type::TOP ) return Type::TOP;
  if( t1->base() != Type::DoubleCon ) return Type::DOUBLE;
  double d = t1->getd();
  return TypeD::make( StubRoutines::intrinsic_sin( d ) );
}

//=============================================================================
//------------------------------Value------------------------------------------
// Compute tan
const Type *TanDNode::Value( PhaseTransform *phase ) const {
  const Type *t1 = phase->type( in(1) );
  if( t1 == Type::TOP ) return Type::TOP;
  if( t1->base() != Type::DoubleCon ) return Type::DOUBLE;
  double d = t1->getd();
  return TypeD::make( StubRoutines::intrinsic_tan( d ) );
}

//=============================================================================
//------------------------------Value------------------------------------------
// Compute log
const Type *LogDNode::Value( PhaseTransform *phase ) const {
  const Type *t1 = phase->type( in(1) );
  if( t1 == Type::TOP ) return Type::TOP;
  if( t1->base() != Type::DoubleCon ) return Type::DOUBLE;
  double d = t1->getd();
  return TypeD::make( StubRoutines::intrinsic_log( d ) );
}

//=============================================================================
//------------------------------Value------------------------------------------
// Compute log10
const Type *Log10DNode::Value( PhaseTransform *phase ) const {
  const Type *t1 = phase->type( in(1) );
  if( t1 == Type::TOP ) return Type::TOP;
  if( t1->base() != Type::DoubleCon ) return Type::DOUBLE;
  double d = t1->getd();
  return TypeD::make( StubRoutines::intrinsic_log10( d ) );
}

//=============================================================================
//------------------------------Value------------------------------------------
// Compute exp
const Type *ExpDNode::Value( PhaseTransform *phase ) const {
  const Type *t1 = phase->type( in(1) );
  if( t1 == Type::TOP ) return Type::TOP;
  if( t1->base() != Type::DoubleCon ) return Type::DOUBLE;
  double d = t1->getd();
  return TypeD::make( StubRoutines::intrinsic_exp( d ) );
}


//=============================================================================
//------------------------------Value------------------------------------------
// Compute pow
const Type *PowDNode::Value( PhaseTransform *phase ) const {
  const Type *t1 = phase->type( in(1) );
  if( t1 == Type::TOP ) return Type::TOP;
  if( t1->base() != Type::DoubleCon ) return Type::DOUBLE;
  const Type *t2 = phase->type( in(2) );
  if( t2 == Type::TOP ) return Type::TOP;
  if( t2->base() != Type::DoubleCon ) return Type::DOUBLE;
  double d1 = t1->getd();
  double d2 = t2->getd();
  return TypeD::make( StubRoutines::intrinsic_pow( d1, d2 ) );
}
C:\hotspot-69087d08d473\src\share\vm/opto/subnode.hpp
/*
 * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 *
 */

#ifndef SHARE_VM_OPTO_SUBNODE_HPP
#define SHARE_VM_OPTO_SUBNODE_HPP

#include "opto/node.hpp"
#include "opto/opcodes.hpp"
#include "opto/type.hpp"

// Portions of code courtesy of Clifford Click

//------------------------------SUBNode----------------------------------------
// Class SUBTRACTION functionality.  This covers all the usual 'subtract'
// behaviors.  Subtract-integer, -float, -double, binary xor, compare-integer,
// -float, and -double are all inherited from this class.  The compare
// functions behave like subtract functions, except that all negative answers
// are compressed into -1, and all positive answers compressed to 1.
class SubNode : public Node {
public:
  SubNode( Node *in1, Node *in2 ) : Node(0,in1,in2) {
    init_class_id(Class_Sub);
  }

  // Handle algebraic identities here.  If we have an identity, return the Node
  // we are equivalent to.  We look for "add of zero" as an identity.
  virtual Node *Identity( PhaseTransform *phase );

  // Compute a new Type for this node.  Basically we just do the pre-check,
  // then call the virtual add() to set the type.
  virtual const Type *Value( PhaseTransform *phase ) const;
  const Type* Value_common( PhaseTransform *phase ) const;

  // Supplied function returns the subtractend of the inputs.
  // This also type-checks the inputs for sanity.  Guaranteed never to
  // be passed a TOP or BOTTOM type, these are filtered out by a pre-check.
  virtual const Type *sub( const Type *, const Type * ) const = 0;

  // Supplied function to return the additive identity type.
  // This is returned whenever the subtracts inputs are the same.
  virtual const Type *add_id() const = 0;

};


// NOTE: SubINode should be taken away and replaced by add and negate
//------------------------------SubINode---------------------------------------
// Subtract 2 integers
class SubINode : public SubNode {
public:
  SubINode( Node *in1, Node *in2 ) : SubNode(in1,in2) {}
  virtual int Opcode() const;
  virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
  virtual const Type *sub( const Type *, const Type * ) const;
  const Type *add_id() const { return TypeInt::ZERO; }
  const Type *bottom_type() const { return TypeInt::INT; }
  virtual uint ideal_reg() const { return Op_RegI; }
};

//------------------------------SubLNode---------------------------------------
// Subtract 2 integers
class SubLNode : public SubNode {
public:
  SubLNode( Node *in1, Node *in2 ) : SubNode(in1,in2) {}
  virtual int Opcode() const;
  virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
  virtual const Type *sub( const Type *, const Type * ) const;
  const Type *add_id() const { return TypeLong::ZERO; }
  const Type *bottom_type() const { return TypeLong::LONG; }
  virtual uint ideal_reg() const { return Op_RegL; }
};

// NOTE: SubFPNode should be taken away and replaced by add and negate
//------------------------------SubFPNode--------------------------------------
// Subtract 2 floats or doubles
class SubFPNode : public SubNode {
protected:
  SubFPNode( Node *in1, Node *in2 ) : SubNode(in1,in2) {}
public:
  const Type *Value( PhaseTransform *phase ) const;
};

// NOTE: SubFNode should be taken away and replaced by add and negate
//------------------------------SubFNode---------------------------------------
// Subtract 2 doubles
class SubFNode : public SubFPNode {
public:
  SubFNode( Node *in1, Node *in2 ) : SubFPNode(in1,in2) {}
  virtual int Opcode() const;
  virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
  virtual const Type *sub( const Type *, const Type * ) const;
  const Type   *add_id() const { return TypeF::ZERO; }
  const Type   *bottom_type() const { return Type::FLOAT; }
  virtual uint  ideal_reg() const { return Op_RegF; }
};

// NOTE: SubDNode should be taken away and replaced by add and negate
//------------------------------SubDNode---------------------------------------
// Subtract 2 doubles
class SubDNode : public SubFPNode {
public:
  SubDNode( Node *in1, Node *in2 ) : SubFPNode(in1,in2) {}
  virtual int Opcode() const;
  virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
  virtual const Type *sub( const Type *, const Type * ) const;
  const Type   *add_id() const { return TypeD::ZERO; }
  const Type   *bottom_type() const { return Type::DOUBLE; }
  virtual uint  ideal_reg() const { return Op_RegD; }
};

//------------------------------CmpNode---------------------------------------
// Compare 2 values, returning condition codes (-1, 0 or 1).
class CmpNode : public SubNode {
public:
  CmpNode( Node *in1, Node *in2 ) : SubNode(in1,in2) {
    init_class_id(Class_Cmp);
  }
  virtual Node *Identity( PhaseTransform *phase );
  const Type *add_id() const { return TypeInt::ZERO; }
  const Type *bottom_type() const { return TypeInt::CC; }
  virtual uint ideal_reg() const { return Op_RegFlags; }
};

//------------------------------CmpINode---------------------------------------
// Compare 2 signed values, returning condition codes (-1, 0 or 1).
class CmpINode : public CmpNode {
public:
  CmpINode( Node *in1, Node *in2 ) : CmpNode(in1,in2) {}
  virtual int Opcode() const;
  virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
  virtual const Type *sub( const Type *, const Type * ) const;
};

//------------------------------CmpUNode---------------------------------------
// Compare 2 unsigned values (integer or pointer), returning condition codes (-1, 0 or 1).
class CmpUNode : public CmpNode {
public:
  CmpUNode( Node *in1, Node *in2 ) : CmpNode(in1,in2) {}
  virtual int Opcode() const;
  virtual const Type *sub( const Type *, const Type * ) const;
  const Type *Value( PhaseTransform *phase ) const;
  bool is_index_range_check() const;
};

//------------------------------CmpPNode---------------------------------------
// Compare 2 pointer values, returning condition codes (-1, 0 or 1).
class CmpPNode : public CmpNode {
public:
  CmpPNode( Node *in1, Node *in2 ) : CmpNode(in1,in2) {}
  virtual int Opcode() const;
  virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
  virtual const Type *sub( const Type *, const Type * ) const;
};

//------------------------------CmpNNode--------------------------------------
// Compare 2 narrow oop values, returning condition codes (-1, 0 or 1).
class CmpNNode : public CmpNode {
public:
  CmpNNode( Node *in1, Node *in2 ) : CmpNode(in1,in2) {}
  virtual int Opcode() const;
  virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
  virtual const Type *sub( const Type *, const Type * ) const;
};

//------------------------------CmpLNode---------------------------------------
// Compare 2 long values, returning condition codes (-1, 0 or 1).
class CmpLNode : public CmpNode {
public:
  CmpLNode( Node *in1, Node *in2 ) : CmpNode(in1,in2) {}
  virtual int    Opcode() const;
  virtual const Type *sub( const Type *, const Type * ) const;
};

//------------------------------CmpULNode---------------------------------------
// Compare 2 unsigned long values, returning condition codes (-1, 0 or 1).
class CmpULNode : public CmpNode {
public:
  CmpULNode(Node* in1, Node* in2) : CmpNode(in1, in2) { }
  virtual int Opcode() const;
  virtual const Type* sub(const Type*, const Type*) const;
};

//------------------------------CmpL3Node--------------------------------------
// Compare 2 long values, returning integer value (-1, 0 or 1).
class CmpL3Node : public CmpLNode {
public:
  CmpL3Node( Node *in1, Node *in2 ) : CmpLNode(in1,in2) {
    // Since it is not consumed by Bools, it is not really a Cmp.
    init_class_id(Class_Sub);
  }
  virtual int    Opcode() const;
  virtual uint ideal_reg() const { return Op_RegI; }
};

//------------------------------CmpFNode---------------------------------------
// Compare 2 float values, returning condition codes (-1, 0 or 1).
// This implements the Java bytecode fcmpl, so unordered returns -1.
// Operands may not commute.
class CmpFNode : public CmpNode {
public:
  CmpFNode( Node *in1, Node *in2 ) : CmpNode(in1,in2) {}
  virtual int Opcode() const;
  virtual const Type *sub( const Type *, const Type * ) const { ShouldNotReachHere(); return NULL; }
  const Type *Value( PhaseTransform *phase ) const;
};

//------------------------------CmpF3Node--------------------------------------
// Compare 2 float values, returning integer value (-1, 0 or 1).
// This implements the Java bytecode fcmpl, so unordered returns -1.
// Operands may not commute.
class CmpF3Node : public CmpFNode {
public:
  CmpF3Node( Node *in1, Node *in2 ) : CmpFNode(in1,in2) {
    // Since it is not consumed by Bools, it is not really a Cmp.
    init_class_id(Class_Sub);
  }
  virtual int Opcode() const;
  // Since it is not consumed by Bools, it is not really a Cmp.
  virtual uint ideal_reg() const { return Op_RegI; }
};


//------------------------------CmpDNode---------------------------------------
// Compare 2 double values, returning condition codes (-1, 0 or 1).
// This implements the Java bytecode dcmpl, so unordered returns -1.
// Operands may not commute.
class CmpDNode : public CmpNode {
public:
  CmpDNode( Node *in1, Node *in2 ) : CmpNode(in1,in2) {}
  virtual int Opcode() const;
  virtual const Type *sub( const Type *, const Type * ) const { ShouldNotReachHere(); return NULL; }
  const Type *Value( PhaseTransform *phase ) const;
  virtual Node  *Ideal(PhaseGVN *phase, bool can_reshape);
};

//------------------------------CmpD3Node--------------------------------------
// Compare 2 double values, returning integer value (-1, 0 or 1).
// This implements the Java bytecode dcmpl, so unordered returns -1.
// Operands may not commute.
class CmpD3Node : public CmpDNode {
public:
  CmpD3Node( Node *in1, Node *in2 ) : CmpDNode(in1,in2) {
    // Since it is not consumed by Bools, it is not really a Cmp.
    init_class_id(Class_Sub);
  }
  virtual int Opcode() const;
  virtual uint ideal_reg() const { return Op_RegI; }
};


//------------------------------BoolTest---------------------------------------
// Convert condition codes to a boolean test value (0 or -1).
// We pick the values as 3 bits; the low order 2 bits we compare against the
// condition codes, the high bit flips the sense of the result.
struct BoolTest VALUE_OBJ_CLASS_SPEC {
  enum mask { eq = 0, ne = 4, le = 5, ge = 7, lt = 3, gt = 1, overflow = 2, no_overflow = 6, illegal = 8 };
  mask _test;
  BoolTest( mask btm ) : _test(btm) {}
  const Type *cc2logical( const Type *CC ) const;
  // Commute the test.  I use a small table lookup.  The table is created as
  // a simple char array where each element is the ASCII version of a 'mask'
  // enum from above.
  mask commute( ) const { return mask("032147658"[_test]-'0'); }
  mask negate( ) const { return mask(_test^4); }
  bool is_canonical( ) const { return (_test == BoolTest::ne || _test == BoolTest::lt || _test == BoolTest::le || _test == BoolTest::overflow); }
  void dump_on(outputStream *st) const;
};

//------------------------------BoolNode---------------------------------------
// A Node to convert a Condition Codes to a Logical result.
class BoolNode : public Node {
  virtual uint hash() const;
  virtual uint cmp( const Node &n ) const;
  virtual uint size_of() const;
public:
  const BoolTest _test;
  BoolNode( Node *cc, BoolTest::mask t): _test(t), Node(0,cc) {
    init_class_id(Class_Bool);
  }
  // Convert an arbitrary int value to a Bool or other suitable predicate.
  static Node* make_predicate(Node* test_value, PhaseGVN* phase);
  // Convert self back to an integer value.
  Node* as_int_value(PhaseGVN* phase);
  // Invert sense of self, returning new Bool.
  BoolNode* negate(PhaseGVN* phase);
  virtual int Opcode() const;
  virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
  virtual const Type *Value( PhaseTransform *phase ) const;
  virtual const Type *bottom_type() const { return TypeInt::BOOL; }
  uint match_edge(uint idx) const { return 0; }
  virtual uint ideal_reg() const { return Op_RegI; }

  bool is_counted_loop_exit_test();
#ifndef PRODUCT
  virtual void dump_spec(outputStream *st) const;
#endif
};

//------------------------------AbsNode----------------------------------------
// Abstract class for absolute value.  Mostly used to get a handy wrapper
// for finding this pattern in the graph.
class AbsNode : public Node {
public:
  AbsNode( Node *value ) : Node(0,value) {}
};

//------------------------------AbsINode---------------------------------------
// Absolute value an integer.  Since a naive graph involves control flow, we
// "match" it in the ideal world (so the control flow can be removed).
class AbsINode : public AbsNode {
public:
  AbsINode( Node *in1 ) : AbsNode(in1) {}
  virtual int Opcode() const;
  const Type *bottom_type() const { return TypeInt::INT; }
  virtual uint ideal_reg() const { return Op_RegI; }
};

//------------------------------AbsFNode---------------------------------------
// Absolute value a float, a common float-point idiom with a cheap hardware
// implemention on most chips.  Since a naive graph involves control flow, we
// "match" it in the ideal world (so the control flow can be removed).
class AbsFNode : public AbsNode {
public:
  AbsFNode( Node *in1 ) : AbsNode(in1) {}
  virtual int Opcode() const;
  const Type *bottom_type() const { return Type::FLOAT; }
  virtual uint ideal_reg() const { return Op_RegF; }
};

//------------------------------AbsDNode---------------------------------------
// Absolute value a double, a common float-point idiom with a cheap hardware
// implemention on most chips.  Since a naive graph involves control flow, we
// "match" it in the ideal world (so the control flow can be removed).
class AbsDNode : public AbsNode {
public:
  AbsDNode( Node *in1 ) : AbsNode(in1) {}
  virtual int Opcode() const;
  const Type *bottom_type() const { return Type::DOUBLE; }
  virtual uint ideal_reg() const { return Op_RegD; }
};


//------------------------------CmpLTMaskNode----------------------------------
// If p < q, return -1 else return 0.  Nice for flow-free idioms.
class CmpLTMaskNode : public Node {
public:
  CmpLTMaskNode( Node *p, Node *q ) : Node(0, p, q) {}
  virtual int Opcode() const;
  const Type *bottom_type() const { return TypeInt::INT; }
  virtual uint ideal_reg() const { return Op_RegI; }
};


//------------------------------NegNode----------------------------------------
class NegNode : public Node {
public:
  NegNode( Node *in1 ) : Node(0,in1) {}
};

//------------------------------NegFNode---------------------------------------
// Negate value a float.  Negating 0.0 returns -0.0, but subtracting from
// zero returns +0.0 (per JVM spec on 'fneg' bytecode).  As subtraction
// cannot be used to replace negation we have to implement negation as ideal
// node; note that negation and addition can replace subtraction.
class NegFNode : public NegNode {
public:
  NegFNode( Node *in1 ) : NegNode(in1) {}
  virtual int Opcode() const;
  const Type *bottom_type() const { return Type::FLOAT; }
  virtual uint ideal_reg() const { return Op_RegF; }
};

//------------------------------NegDNode---------------------------------------
// Negate value a double.  Negating 0.0 returns -0.0, but subtracting from
// zero returns +0.0 (per JVM spec on 'dneg' bytecode).  As subtraction
// cannot be used to replace negation we have to implement negation as ideal
// node; note that negation and addition can replace subtraction.
class NegDNode : public NegNode {
public:
  NegDNode( Node *in1 ) : NegNode(in1) {}
  virtual int Opcode() const;
  const Type *bottom_type() const { return Type::DOUBLE; }
  virtual uint ideal_reg() const { return Op_RegD; }
};

//------------------------------CosDNode---------------------------------------
// Cosinus of a double
class CosDNode : public Node {
public:
  CosDNode(Compile* C, Node *c, Node *in1) : Node(c, in1) {
    init_flags(Flag_is_expensive);
    C->add_expensive_node(this);
  }
  virtual int Opcode() const;
  const Type *bottom_type() const { return Type::DOUBLE; }
  virtual uint ideal_reg() const { return Op_RegD; }
  virtual const Type *Value( PhaseTransform *phase ) const;
};

//------------------------------CosDNode---------------------------------------
// Sinus of a double
class SinDNode : public Node {
public:
  SinDNode(Compile* C, Node *c, Node *in1) : Node(c, in1) {
    init_flags(Flag_is_expensive);
    C->add_expensive_node(this);
  }
  virtual int Opcode() const;
  const Type *bottom_type() const { return Type::DOUBLE; }
  virtual uint ideal_reg() const { return Op_RegD; }
  virtual const Type *Value( PhaseTransform *phase ) const;
};


//------------------------------TanDNode---------------------------------------
// tangens of a double
class TanDNode : public Node {
public:
  TanDNode(Compile* C, Node *c,Node *in1) : Node(c, in1) {
    init_flags(Flag_is_expensive);
    C->add_expensive_node(this);
  }
  virtual int Opcode() const;
  const Type *bottom_type() const { return Type::DOUBLE; }
  virtual uint ideal_reg() const { return Op_RegD; }
  virtual const Type *Value( PhaseTransform *phase ) const;
};


//------------------------------AtanDNode--------------------------------------
// arcus tangens of a double
class AtanDNode : public Node {
public:
  AtanDNode(Node *c, Node *in1, Node *in2  ) : Node(c, in1, in2) {}
  virtual int Opcode() const;
  const Type *bottom_type() const { return Type::DOUBLE; }
  virtual uint ideal_reg() const { return Op_RegD; }
};


//------------------------------SqrtDNode--------------------------------------
// square root a double
class SqrtDNode : public Node {
public:
  SqrtDNode(Compile* C, Node *c, Node *in1) : Node(c, in1) {
    init_flags(Flag_is_expensive);
    C->add_expensive_node(this);
  }
  virtual int Opcode() const;
  const Type *bottom_type() const { return Type::DOUBLE; }
  virtual uint ideal_reg() const { return Op_RegD; }
  virtual const Type *Value( PhaseTransform *phase ) const;
};

//------------------------------ExpDNode---------------------------------------
//  Exponentiate a double
class ExpDNode : public Node {
public:
  ExpDNode(Compile* C, Node *c, Node *in1) : Node(c, in1) {
    init_flags(Flag_is_expensive);
    C->add_expensive_node(this);
  }
  virtual int Opcode() const;
  const Type *bottom_type() const { return Type::DOUBLE; }
  virtual uint ideal_reg() const { return Op_RegD; }
  virtual const Type *Value( PhaseTransform *phase ) const;
};

//------------------------------LogDNode---------------------------------------
// Log_e of a double
class LogDNode : public Node {
public:
  LogDNode(Compile* C, Node *c, Node *in1) : Node(c, in1) {
    init_flags(Flag_is_expensive);
    C->add_expensive_node(this);
  }
  virtual int Opcode() const;
  const Type *bottom_type() const { return Type::DOUBLE; }
  virtual uint ideal_reg() const { return Op_RegD; }
  virtual const Type *Value( PhaseTransform *phase ) const;
};

//------------------------------Log10DNode---------------------------------------
// Log_10 of a double
class Log10DNode : public Node {
public:
  Log10DNode(Compile* C, Node *c, Node *in1) : Node(c, in1) {
    init_flags(Flag_is_expensive);
    C->add_expensive_node(this);
  }
  virtual int Opcode() const;
  const Type *bottom_type() const { return Type::DOUBLE; }
  virtual uint ideal_reg() const { return Op_RegD; }
  virtual const Type *Value( PhaseTransform *phase ) const;
};

//------------------------------PowDNode---------------------------------------
// Raise a double to a double power
class PowDNode : public Node {
public:
  PowDNode(Compile* C, Node *c, Node *in1, Node *in2 ) : Node(c, in1, in2) {
    init_flags(Flag_is_expensive);
    C->add_expensive_node(this);
  }
  virtual int Opcode() const;
  const Type *bottom_type() const { return Type::DOUBLE; }
  virtual uint ideal_reg() const { return Op_RegD; }
  virtual const Type *Value( PhaseTransform *phase ) const;
};

//-------------------------------ReverseBytesINode--------------------------------
// reverse bytes of an integer
class ReverseBytesINode : public Node {
public:
  ReverseBytesINode(Node *c, Node *in1) : Node(c, in1) {}
  virtual int Opcode() const;
  const Type *bottom_type() const { return TypeInt::INT; }
  virtual uint ideal_reg() const { return Op_RegI; }
};

//-------------------------------ReverseBytesLNode--------------------------------
// reverse bytes of a long
class ReverseBytesLNode : public Node {
public:
  ReverseBytesLNode(Node *c, Node *in1) : Node(c, in1) {}
  virtual int Opcode() const;
  const Type *bottom_type() const { return TypeLong::LONG; }
  virtual uint ideal_reg() const { return Op_RegL; }
};

//-------------------------------ReverseBytesUSNode--------------------------------
// reverse bytes of an unsigned short / char
class ReverseBytesUSNode : public Node {
public:
  ReverseBytesUSNode(Node *c, Node *in1) : Node(c, in1) {}
  virtual int Opcode() const;
  const Type *bottom_type() const { return TypeInt::CHAR; }
  virtual uint ideal_reg() const { return Op_RegI; }
};

//-------------------------------ReverseBytesSNode--------------------------------
// reverse bytes of a short
class ReverseBytesSNode : public Node {
public:
  ReverseBytesSNode(Node *c, Node *in1) : Node(c, in1) {}
  virtual int Opcode() const;
  const Type *bottom_type() const { return TypeInt::SHORT; }
  virtual uint ideal_reg() const { return Op_RegI; }
};

#endif // SHARE_VM_OPTO_SUBNODE_HPP
C:\hotspot-69087d08d473\src\share\vm/opto/superword.cpp
/*
 * Copyright (c) 2007, 2013, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 */

#include "precompiled.hpp"
#include "compiler/compileLog.hpp"
#include "libadt/vectset.hpp"
#include "memory/allocation.inline.hpp"
#include "opto/addnode.hpp"
#include "opto/callnode.hpp"
#include "opto/divnode.hpp"
#include "opto/matcher.hpp"
#include "opto/memnode.hpp"
#include "opto/mulnode.hpp"
#include "opto/opcodes.hpp"
#include "opto/superword.hpp"
#include "opto/vectornode.hpp"

//
//                  S U P E R W O R D   T R A N S F O R M
//=============================================================================

//------------------------------SuperWord---------------------------
SuperWord::SuperWord(PhaseIdealLoop* phase) :
  _phase(phase),
  _igvn(phase->_igvn),
  _arena(phase->C->comp_arena()),
  _packset(arena(), 8,  0, NULL),         // packs for the current block
  _bb_idx(arena(), (int)(1.10 * phase->C->unique()), 0, 0), // node idx to index in bb
  _block(arena(), 8,  0, NULL),           // nodes in current block
  _data_entry(arena(), 8,  0, NULL),      // nodes with all inputs from outside
  _mem_slice_head(arena(), 8,  0, NULL),  // memory slice heads
  _mem_slice_tail(arena(), 8,  0, NULL),  // memory slice tails
  _node_info(arena(), 8,  0, SWNodeInfo::initial), // info needed per node
  _align_to_ref(NULL),                    // memory reference to align vectors to
  _disjoint_ptrs(arena(), 8,  0, OrderedPair::initial), // runtime disambiguated pointer pairs
  _dg(_arena),                            // dependence graph
  _visited(arena()),                      // visited node set
  _post_visited(arena()),                 // post visited node set
  _n_idx_list(arena(), 8),                // scratch list of (node,index) pairs
  _stk(arena(), 8, 0, NULL),              // scratch stack of nodes
  _nlist(arena(), 8, 0, NULL),            // scratch list of nodes
  _lpt(NULL),                             // loop tree node
  _lp(NULL),                              // LoopNode
  _bb(NULL),                              // basic block
  _iv(NULL)                               // induction var
{}

//------------------------------transform_loop---------------------------
void SuperWord::transform_loop(IdealLoopTree* lpt) {
  assert(UseSuperWord, "should be");
  // Do vectors exist on this architecture?
  if (Matcher::vector_width_in_bytes(T_BYTE) < 2) return;

  assert(lpt->_head->is_CountedLoop(), "must be");
  CountedLoopNode *cl = lpt->_head->as_CountedLoop();

  if (!cl->is_valid_counted_loop()) return; // skip malformed counted loop

  if (!cl->is_main_loop() ) return; // skip normal, pre, and post loops

  // Check for no control flow in body (other than exit)
  Node *cl_exit = cl->loopexit();
  if (cl_exit->in(0) != lpt->_head) return;

  // Make sure the are no extra control users of the loop backedge
  if (cl->back_control()->outcnt() != 1) {
    return;
  }

  // Check for pre-loop ending with CountedLoopEnd(Bool(Cmp(x,Opaque1(limit))))
  CountedLoopEndNode* pre_end = get_pre_loop_end(cl);
  if (pre_end == NULL) return;
  Node *pre_opaq1 = pre_end->limit();
  if (pre_opaq1->Opcode() != Op_Opaque1) return;

  init(); // initialize data structures

  set_lpt(lpt);
  set_lp(cl);

  // For now, define one block which is the entire loop body
  set_bb(cl);

  assert(_packset.length() == 0, "packset must be empty");
  SLP_extract();
}

//------------------------------SLP_extract---------------------------
// Extract the superword level parallelism
//
// 1) A reverse post-order of nodes in the block is constructed.  By scanning
//    this list from first to last, all definitions are visited before their uses.
//
// 2) A point-to-point dependence graph is constructed between memory references.
//    This simplies the upcoming "independence" checker.
//
// 3) The maximum depth in the node graph from the beginning of the block
//    to each node is computed.  This is used to prune the graph search
//    in the independence checker.
//
// 4) For integer types, the necessary bit width is propagated backwards
//    from stores to allow packed operations on byte, char, and short
//    integers.  This reverses the promotion to type "int" that javac
//    did for operations like: char c1,c2,c3;  c1 = c2 + c3.
//
// 5) One of the memory references is picked to be an aligned vector reference.
//    The pre-loop trip count is adjusted to align this reference in the
//    unrolled body.
//
// 6) The initial set of pack pairs is seeded with memory references.
//
// 7) The set of pack pairs is extended by following use->def and def->use links.
//
// 8) The pairs are combined into vector sized packs.
//
// 9) Reorder the memory slices to co-locate members of the memory packs.
//
// 10) Generate ideal vector nodes for the final set of packs and where necessary,
//    inserting scalar promotion, vector creation from multiple scalars, and
//    extraction of scalar values from vectors.
//
void SuperWord::SLP_extract() {

  // Ready the block

  if (!construct_bb())
    return; // Exit if no interesting nodes or complex graph.

  dependence_graph();

  compute_max_depth();

  compute_vector_element_type();

  // Attempt vectorization

  find_adjacent_refs();

  extend_packlist();

  combine_packs();

  construct_my_pack_map();

  filter_packs();

  schedule();

  output();
}

//------------------------------find_adjacent_refs---------------------------
// Find the adjacent memory references and create pack pairs for them.
// This is the initial set of packs that will then be extended by
// following use->def and def->use links.  The align positions are
// assigned relative to the reference "align_to_ref"
void SuperWord::find_adjacent_refs() {
  // Get list of memory operations
  Node_List memops;
  for (int i = 0; i < _block.length(); i++) {
    Node* n = _block.at(i);
    if (n->is_Mem() && !n->is_LoadStore() && in_bb(n) &&
        is_java_primitive(n->as_Mem()->memory_type())) {
      int align = memory_alignment(n->as_Mem(), 0);
      if (align != bottom_align) {
        memops.push(n);
      }
    }
  }

  Node_List align_to_refs;
  int best_iv_adjustment = 0;
  MemNode* best_align_to_mem_ref = NULL;

  while (memops.size() != 0) {
    // Find a memory reference to align to.
    MemNode* mem_ref = find_align_to_ref(memops);
    if (mem_ref == NULL) break;
    align_to_refs.push(mem_ref);
    int iv_adjustment = get_iv_adjustment(mem_ref);

    if (best_align_to_mem_ref == NULL) {
      // Set memory reference which is the best from all memory operations
      // to be used for alignment. The pre-loop trip count is modified to align
      // this reference to a vector-aligned address.
      best_align_to_mem_ref = mem_ref;
      best_iv_adjustment = iv_adjustment;
    }

    SWPointer align_to_ref_p(mem_ref, this);
    // Set alignment relative to "align_to_ref" for all related memory operations.
    for (int i = memops.size() - 1; i >= 0; i--) {
      MemNode* s = memops.at(i)->as_Mem();
      if (isomorphic(s, mem_ref)) {
        SWPointer p2(s, this);
        if (p2.comparable(align_to_ref_p)) {
          int align = memory_alignment(s, iv_adjustment);
          set_alignment(s, align);
        }
      }
    }

    // Create initial pack pairs of memory operations for which
    // alignment is set and vectors will be aligned.
    bool create_pack = true;
    if (memory_alignment(mem_ref, best_iv_adjustment) == 0) {
      if (!Matcher::misaligned_vectors_ok()) {
        int vw = vector_width(mem_ref);
        int vw_best = vector_width(best_align_to_mem_ref);
        if (vw > vw_best) {
          // Do not vectorize a memory access with more elements per vector
          // if unaligned memory access is not allowed because number of
          // iterations in pre-loop will be not enough to align it.
          create_pack = false;
        } else {
          SWPointer p2(best_align_to_mem_ref, this);
          if (align_to_ref_p.invar() != p2.invar()) {
            // Do not vectorize memory accesses with different invariants
            // if unaligned memory accesses are not allowed.
            create_pack = false;
          }
        }
      }
    } else {
      if (same_velt_type(mem_ref, best_align_to_mem_ref)) {
        // Can't allow vectorization of unaligned memory accesses with the
        // same type since it could be overlapped accesses to the same array.
        create_pack = false;
      } else {
        // Allow independent (different type) unaligned memory operations
        // if HW supports them.
        if (!Matcher::misaligned_vectors_ok()) {
          create_pack = false;
        } else {
          // Check if packs of the same memory type but
          // with a different alignment were created before.
          for (uint i = 0; i < align_to_refs.size(); i++) {
            MemNode* mr = align_to_refs.at(i)->as_Mem();
            if (same_velt_type(mr, mem_ref) &&
                memory_alignment(mr, iv_adjustment) != 0)
              create_pack = false;
          }
        }
      }
    }
    if (create_pack) {
      for (uint i = 0; i < memops.size(); i++) {
        Node* s1 = memops.at(i);
        int align = alignment(s1);
        if (align == top_align) continue;
        for (uint j = 0; j < memops.size(); j++) {
          Node* s2 = memops.at(j);
          if (alignment(s2) == top_align) continue;
          if (s1 != s2 && are_adjacent_refs(s1, s2)) {
            if (stmts_can_pack(s1, s2, align)) {
              Node_List* pair = new Node_List();
              pair->push(s1);
              pair->push(s2);
              _packset.append(pair);
            }
          }
        }
      }
    } else { // Don't create unaligned pack
      // First, remove remaining memory ops of the same type from the list.
      for (int i = memops.size() - 1; i >= 0; i--) {
        MemNode* s = memops.at(i)->as_Mem();
        if (same_velt_type(s, mem_ref)) {
          memops.remove(i);
        }
      }

      // Second, remove already constructed packs of the same type.
      for (int i = _packset.length() - 1; i >= 0; i--) {
        Node_List* p = _packset.at(i);
        MemNode* s = p->at(0)->as_Mem();
        if (same_velt_type(s, mem_ref)) {
          remove_pack_at(i);
        }
      }

      // If needed find the best memory reference for loop alignment again.
      if (same_velt_type(mem_ref, best_align_to_mem_ref)) {
        // Put memory ops from remaining packs back on memops list for
        // the best alignment search.
        uint orig_msize = memops.size();
        for (int i = 0; i < _packset.length(); i++) {
          Node_List* p = _packset.at(i);
          MemNode* s = p->at(0)->as_Mem();
          assert(!same_velt_type(s, mem_ref), "sanity");
          memops.push(s);
        }
        MemNode* best_align_to_mem_ref = find_align_to_ref(memops);
        if (best_align_to_mem_ref == NULL) break;
        best_iv_adjustment = get_iv_adjustment(best_align_to_mem_ref);
        // Restore list.
        while (memops.size() > orig_msize)
          (void)memops.pop();
      }
    } // unaligned memory accesses

    // Remove used mem nodes.
    for (int i = memops.size() - 1; i >= 0; i--) {
      MemNode* m = memops.at(i)->as_Mem();
      if (alignment(m) != top_align) {
        memops.remove(i);
      }
    }

  } // while (memops.size() != 0
  set_align_to_ref(best_align_to_mem_ref);

#ifndef PRODUCT
  if (TraceSuperWord) {
    tty->print_cr("\nAfter find_adjacent_refs");
    print_packset();
  }
#endif
}

//------------------------------find_align_to_ref---------------------------
// Find a memory reference to align the loop induction variable to.
// Looks first at stores then at loads, looking for a memory reference
// with the largest number of references similar to it.
MemNode* SuperWord::find_align_to_ref(Node_List &memops) {
  GrowableArray<int> cmp_ct(arena(), memops.size(), memops.size(), 0);

  // Count number of comparable memory ops
  for (uint i = 0; i < memops.size(); i++) {
    MemNode* s1 = memops.at(i)->as_Mem();
    SWPointer p1(s1, this);
    // Discard if pre loop can't align this reference
    if (!ref_is_alignable(p1)) {
      *cmp_ct.adr_at(i) = 0;
      continue;
    }
    for (uint j = i+1; j < memops.size(); j++) {
      MemNode* s2 = memops.at(j)->as_Mem();
      if (isomorphic(s1, s2)) {
        SWPointer p2(s2, this);
        if (p1.comparable(p2)) {
          (*cmp_ct.adr_at(i))++;
          (*cmp_ct.adr_at(j))++;
        }
      }
    }
  }

  // Find Store (or Load) with the greatest number of "comparable" references,
  // biggest vector size, smallest data size and smallest iv offset.
  int max_ct        = 0;
  int max_vw        = 0;
  int max_idx       = -1;
  int min_size      = max_jint;
  int min_iv_offset = max_jint;
  for (uint j = 0; j < memops.size(); j++) {
    MemNode* s = memops.at(j)->as_Mem();
    if (s->is_Store()) {
      int vw = vector_width_in_bytes(s);
      assert(vw > 1, "sanity");
      SWPointer p(s, this);
      if (cmp_ct.at(j) >  max_ct ||
          cmp_ct.at(j) == max_ct &&
            (vw >  max_vw ||
             vw == max_vw &&
              (data_size(s) <  min_size ||
               data_size(s) == min_size &&
                 (p.offset_in_bytes() < min_iv_offset)))) {
        max_ct = cmp_ct.at(j);
        max_vw = vw;
        max_idx = j;
        min_size = data_size(s);
        min_iv_offset = p.offset_in_bytes();
      }
    }
  }
  // If no stores, look at loads
  if (max_ct == 0) {
    for (uint j = 0; j < memops.size(); j++) {
      MemNode* s = memops.at(j)->as_Mem();
      if (s->is_Load()) {
        int vw = vector_width_in_bytes(s);
        assert(vw > 1, "sanity");
        SWPointer p(s, this);
        if (cmp_ct.at(j) >  max_ct ||
            cmp_ct.at(j) == max_ct &&
              (vw >  max_vw ||
               vw == max_vw &&
                (data_size(s) <  min_size ||
                 data_size(s) == min_size &&
                   (p.offset_in_bytes() < min_iv_offset)))) {
          max_ct = cmp_ct.at(j);
          max_vw = vw;
          max_idx = j;
          min_size = data_size(s);
          min_iv_offset = p.offset_in_bytes();
        }
      }
    }
  }

#ifdef ASSERT
  if (TraceSuperWord && Verbose) {
    tty->print_cr("\nVector memops after find_align_to_refs");
    for (uint i = 0; i < memops.size(); i++) {
      MemNode* s = memops.at(i)->as_Mem();
      s->dump();
    }
  }
#endif

  if (max_ct > 0) {
#ifdef ASSERT
    if (TraceSuperWord) {
      tty->print("\nVector align to node: ");
      memops.at(max_idx)->as_Mem()->dump();
    }
#endif
    return memops.at(max_idx)->as_Mem();
  }
  return NULL;
}

//------------------------------ref_is_alignable---------------------------
// Can the preloop align the reference to position zero in the vector?
bool SuperWord::ref_is_alignable(SWPointer& p) {
  if (!p.has_iv()) {
    return true;   // no induction variable
  }
  CountedLoopEndNode* pre_end = get_pre_loop_end(lp()->as_CountedLoop());
  assert(pre_end != NULL, "we must have a correct pre-loop");
  assert(pre_end->stride_is_con(), "pre loop stride is constant");
  int preloop_stride = pre_end->stride_con();

  int span = preloop_stride * p.scale_in_bytes();
  int mem_size = p.memory_size();
  int offset   = p.offset_in_bytes();
  // Stride one accesses are alignable if offset is aligned to memory operation size.
  // Offset can be unaligned when UseUnalignedAccesses is used.
  if (ABS(span) == mem_size && (ABS(offset) % mem_size) == 0) {
    return true;
  }
  // If the initial offset from start of the object is computable,
  // check if the pre-loop can align the final offset accordingly.
  //
  // In other words: Can we find an i such that the offset
  // after i pre-loop iterations is aligned to vw?
  //   (init_offset + pre_loop) % vw == 0              (1)
  // where
  //   pre_loop = i * span
  // is the number of bytes added to the offset by i pre-loop iterations.
  //
  // For this to hold we need pre_loop to increase init_offset by
  //   pre_loop = vw - (init_offset % vw)
  //
  // This is only possible if pre_loop is divisible by span because each
  // pre-loop iteration increases the initial offset by 'span' bytes:
  //   (vw - (init_offset % vw)) % span == 0
  //
  int vw = vector_width_in_bytes(p.mem());
  assert(vw > 1, "sanity");
  Node* init_nd = pre_end->init_trip();
  if (init_nd->is_Con() && p.invar() == NULL) {
    int init = init_nd->bottom_type()->is_int()->get_con();
    int init_offset = init * p.scale_in_bytes() + offset;
    if (init_offset < 0) { // negative offset from object start?
      return false;        // may happen in dead loop
    }
    if (vw % span == 0) {
      // If vm is a multiple of span, we use formula (1).
      if (span > 0) {
        return (vw - (init_offset % vw)) % span == 0;
      } else {
        assert(span < 0, "nonzero stride * scale");
        return (init_offset % vw) % -span == 0;
      }
    } else if (span % vw == 0) {
      // If span is a multiple of vw, we can simplify formula (1) to:
      //   (init_offset + i * span) % vw == 0
      //     =>
      //   (init_offset % vw) + ((i * span) % vw) == 0
      //     =>
      //   init_offset % vw == 0
      //
      // Because we add a multiple of vw to the initial offset, the final
      // offset is a multiple of vw if and only if init_offset is a multiple.
      //
      return (init_offset % vw) == 0;
    }
  }
  return false;
}

//---------------------------get_iv_adjustment---------------------------
// Calculate loop's iv adjustment for this memory ops.
int SuperWord::get_iv_adjustment(MemNode* mem_ref) {
  SWPointer align_to_ref_p(mem_ref, this);
  int offset = align_to_ref_p.offset_in_bytes();
  int scale  = align_to_ref_p.scale_in_bytes();
  int elt_size = align_to_ref_p.memory_size();
  int vw       = vector_width_in_bytes(mem_ref);
  assert(vw > 1, "sanity");
  int iv_adjustment;
  if (scale != 0) {
    int stride_sign = (scale * iv_stride()) > 0 ? 1 : -1;
    // At least one iteration is executed in pre-loop by default. As result
    // several iterations are needed to align memory operations in main-loop even
    // if offset is 0.
    int iv_adjustment_in_bytes = (stride_sign * vw - (offset % vw));
    assert(((ABS(iv_adjustment_in_bytes) % elt_size) == 0),
           err_msg_res("(%d) should be divisible by (%d)", iv_adjustment_in_bytes, elt_size));
    iv_adjustment = iv_adjustment_in_bytes/elt_size;
  } else {
    // This memory op is not dependent on iv (scale == 0)
    iv_adjustment = 0;
  }

#ifndef PRODUCT
  if (TraceSuperWord)
    tty->print_cr("\noffset = %d iv_adjust = %d elt_size = %d scale = %d iv_stride = %d vect_size %d",
                  offset, iv_adjustment, elt_size, scale, iv_stride(), vw);
#endif
  return iv_adjustment;
}

//---------------------------dependence_graph---------------------------
// Construct dependency graph.
// Add dependence edges to load/store nodes for memory dependence
//    A.out()->DependNode.in(1) and DependNode.out()->B.prec(x)
void SuperWord::dependence_graph() {
  // First, assign a dependence node to each memory node
  for (int i = 0; i < _block.length(); i++ ) {
    Node *n = _block.at(i);
    if (n->is_Mem() || n->is_Phi() && n->bottom_type() == Type::MEMORY) {
      _dg.make_node(n);
    }
  }

  // For each memory slice, create the dependences
  for (int i = 0; i < _mem_slice_head.length(); i++) {
    Node* n      = _mem_slice_head.at(i);
    Node* n_tail = _mem_slice_tail.at(i);

    // Get slice in predecessor order (last is first)
    mem_slice_preds(n_tail, n, _nlist);

    // Make the slice dependent on the root
    DepMem* slice = _dg.dep(n);
    _dg.make_edge(_dg.root(), slice);

    // Create a sink for the slice
    DepMem* slice_sink = _dg.make_node(NULL);
    _dg.make_edge(slice_sink, _dg.tail());

    // Now visit each pair of memory ops, creating the edges
    for (int j = _nlist.length() - 1; j >= 0 ; j--) {
      Node* s1 = _nlist.at(j);

      // If no dependency yet, use slice
      if (_dg.dep(s1)->in_cnt() == 0) {
        _dg.make_edge(slice, s1);
      }
      SWPointer p1(s1->as_Mem(), this);
      bool sink_dependent = true;
      for (int k = j - 1; k >= 0; k--) {
        Node* s2 = _nlist.at(k);
        if (s1->is_Load() && s2->is_Load())
          continue;
        SWPointer p2(s2->as_Mem(), this);

        int cmp = p1.cmp(p2);
        if (SuperWordRTDepCheck &&
            p1.base() != p2.base() && p1.valid() && p2.valid()) {
          // Create a runtime check to disambiguate
          OrderedPair pp(p1.base(), p2.base());
          _disjoint_ptrs.append_if_missing(pp);
        } else if (!SWPointer::not_equal(cmp)) {
          // Possibly same address
          _dg.make_edge(s1, s2);
          sink_dependent = false;
        }
      }
      if (sink_dependent) {
        _dg.make_edge(s1, slice_sink);
      }
    }
#ifndef PRODUCT
    if (TraceSuperWord) {
      tty->print_cr("\nDependence graph for slice: %d", n->_idx);
      for (int q = 0; q < _nlist.length(); q++) {
        _dg.print(_nlist.at(q));
      }
      tty->cr();
    }
#endif
    _nlist.clear();
  }

#ifndef PRODUCT
  if (TraceSuperWord) {
    tty->print_cr("\ndisjoint_ptrs: %s", _disjoint_ptrs.length() > 0 ? "" : "NONE");
    for (int r = 0; r < _disjoint_ptrs.length(); r++) {
      _disjoint_ptrs.at(r).print();
      tty->cr();
    }
    tty->cr();
  }
#endif
}

//---------------------------mem_slice_preds---------------------------
// Return a memory slice (node list) in predecessor order starting at "start"
void SuperWord::mem_slice_preds(Node* start, Node* stop, GrowableArray<Node*> &preds) {
  assert(preds.length() == 0, "start empty");
  Node* n = start;
  Node* prev = NULL;
  while (true) {
    assert(in_bb(n), "must be in block");
    for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
      Node* out = n->fast_out(i);
      if (out->is_Load()) {
        if (in_bb(out)) {
          preds.push(out);
        }
      } else {
        // FIXME
        if (out->is_MergeMem() && !in_bb(out)) {
          // Either unrolling is causing a memory edge not to disappear,
          // or need to run igvn.optimize() again before SLP
        } else if (out->is_Phi() && out->bottom_type() == Type::MEMORY && !in_bb(out)) {
          // Ditto.  Not sure what else to check further.
        } else if (out->Opcode() == Op_StoreCM && out->in(MemNode::OopStore) == n) {
          // StoreCM has an input edge used as a precedence edge.
          // Maybe an issue when oop stores are vectorized.
        } else {
          assert(out == prev || prev == NULL, "no branches off of store slice");
        }
      }
    }
    if (n == stop) break;
    preds.push(n);
    prev = n;
    assert(n->is_Mem(), err_msg_res("unexpected node %s", n->Name()));
    n = n->in(MemNode::Memory);
  }
}

//------------------------------stmts_can_pack---------------------------
// Can s1 and s2 be in a pack with s1 immediately preceding s2 and
// s1 aligned at "align"
bool SuperWord::stmts_can_pack(Node* s1, Node* s2, int align) {

  // Do not use superword for non-primitives
  BasicType bt1 = velt_basic_type(s1);
  BasicType bt2 = velt_basic_type(s2);
  if(!is_java_primitive(bt1) || !is_java_primitive(bt2))
    return false;
  if (Matcher::max_vector_size(bt1) < 2) {
    return false; // No vectors for this type
  }

  if (isomorphic(s1, s2)) {
    if (independent(s1, s2)) {
      if (!exists_at(s1, 0) && !exists_at(s2, 1)) {
        if (!s1->is_Mem() || are_adjacent_refs(s1, s2)) {
          int s1_align = alignment(s1);
          int s2_align = alignment(s2);
          if (s1_align == top_align || s1_align == align) {
            if (s2_align == top_align || s2_align == align + data_size(s1)) {
              return true;
            }
          }
        }
      }
    }
  }
  return false;
}

//------------------------------exists_at---------------------------
// Does s exist in a pack at position pos?
bool SuperWord::exists_at(Node* s, uint pos) {
  for (int i = 0; i < _packset.length(); i++) {
    Node_List* p = _packset.at(i);
    if (p->at(pos) == s) {
      return true;
    }
  }
  return false;
}

//------------------------------are_adjacent_refs---------------------------
// Is s1 immediately before s2 in memory?
bool SuperWord::are_adjacent_refs(Node* s1, Node* s2) {
  if (!s1->is_Mem() || !s2->is_Mem()) return false;
  if (!in_bb(s1)    || !in_bb(s2))    return false;

  // Do not use superword for non-primitives
  if (!is_java_primitive(s1->as_Mem()->memory_type()) ||
      !is_java_primitive(s2->as_Mem()->memory_type())) {
    return false;
  }

  // FIXME - co_locate_pack fails on Stores in different mem-slices, so
  // only pack memops that are in the same alias set until that's fixed.
  if (_phase->C->get_alias_index(s1->as_Mem()->adr_type()) !=
      _phase->C->get_alias_index(s2->as_Mem()->adr_type()))
    return false;
  SWPointer p1(s1->as_Mem(), this);
  SWPointer p2(s2->as_Mem(), this);
  if (p1.base() != p2.base() || !p1.comparable(p2)) return false;
  int diff = p2.offset_in_bytes() - p1.offset_in_bytes();
  return diff == data_size(s1);
}

//------------------------------isomorphic---------------------------
// Are s1 and s2 similar?
bool SuperWord::isomorphic(Node* s1, Node* s2) {
  if (s1->Opcode() != s2->Opcode()) return false;
  if (s1->req() != s2->req()) return false;
  if (s1->in(0) != s2->in(0)) return false;
  if (!same_velt_type(s1, s2)) return false;
  return true;
}

//------------------------------independent---------------------------
// Is there no data path from s1 to s2 or s2 to s1?
bool SuperWord::independent(Node* s1, Node* s2) {
  //  assert(s1->Opcode() == s2->Opcode(), "check isomorphic first");
  int d1 = depth(s1);
  int d2 = depth(s2);
  if (d1 == d2) return s1 != s2;
  Node* deep    = d1 > d2 ? s1 : s2;
  Node* shallow = d1 > d2 ? s2 : s1;

  visited_clear();

  return independent_path(shallow, deep);
}

//------------------------------independent_path------------------------------
// Helper for independent
bool SuperWord::independent_path(Node* shallow, Node* deep, uint dp) {
  if (dp >= 1000) return false; // stop deep recursion
  visited_set(deep);
  int shal_depth = depth(shallow);
  assert(shal_depth <= depth(deep), "must be");
  for (DepPreds preds(deep, _dg); !preds.done(); preds.next()) {
    Node* pred = preds.current();
    if (in_bb(pred) && !visited_test(pred)) {
      if (shallow == pred) {
        return false;
      }
      if (shal_depth < depth(pred) && !independent_path(shallow, pred, dp+1)) {
        return false;
      }
    }
  }
  return true;
}

//------------------------------set_alignment---------------------------
void SuperWord::set_alignment(Node* s1, Node* s2, int align) {
  set_alignment(s1, align);
  if (align == top_align || align == bottom_align) {
    set_alignment(s2, align);
  } else {
    set_alignment(s2, align + data_size(s1));
  }
}

//------------------------------data_size---------------------------
int SuperWord::data_size(Node* s) {
  int bsize = type2aelembytes(velt_basic_type(s));
  assert(bsize != 0, "valid size");
  return bsize;
}

//------------------------------extend_packlist---------------------------
// Extend packset by following use->def and def->use links from pack members.
void SuperWord::extend_packlist() {
  bool changed;
  do {
    changed = false;
    for (int i = 0; i < _packset.length(); i++) {
      Node_List* p = _packset.at(i);
      changed |= follow_use_defs(p);
      changed |= follow_def_uses(p);
    }
  } while (changed);

#ifndef PRODUCT
  if (TraceSuperWord) {
    tty->print_cr("\nAfter extend_packlist");
    print_packset();
  }
#endif
}

//------------------------------follow_use_defs---------------------------
// Extend the packset by visiting operand definitions of nodes in pack p
bool SuperWord::follow_use_defs(Node_List* p) {
  assert(p->size() == 2, "just checking");
  Node* s1 = p->at(0);
  Node* s2 = p->at(1);
  assert(s1->req() == s2->req(), "just checking");
  assert(alignment(s1) + data_size(s1) == alignment(s2), "just checking");

  if (s1->is_Load()) return false;

  int align = alignment(s1);
  bool changed = false;
  int start = s1->is_Store() ? MemNode::ValueIn   : 1;
  int end   = s1->is_Store() ? MemNode::ValueIn+1 : s1->req();
  for (int j = start; j < end; j++) {
    Node* t1 = s1->in(j);
    Node* t2 = s2->in(j);
    if (!in_bb(t1) || !in_bb(t2))
      continue;
    if (stmts_can_pack(t1, t2, align)) {
      if (est_savings(t1, t2) >= 0) {
        Node_List* pair = new Node_List();
        pair->push(t1);
        pair->push(t2);
        _packset.append(pair);
        set_alignment(t1, t2, align);
        changed = true;
      }
    }
  }
  return changed;
}

//------------------------------follow_def_uses---------------------------
// Extend the packset by visiting uses of nodes in pack p
bool SuperWord::follow_def_uses(Node_List* p) {
  bool changed = false;
  Node* s1 = p->at(0);
  Node* s2 = p->at(1);
  assert(p->size() == 2, "just checking");
  assert(s1->req() == s2->req(), "just checking");
  assert(alignment(s1) + data_size(s1) == alignment(s2), "just checking");

  if (s1->is_Store()) return false;

  int align = alignment(s1);
  int savings = -1;
  Node* u1 = NULL;
  Node* u2 = NULL;
  for (DUIterator_Fast imax, i = s1->fast_outs(imax); i < imax; i++) {
    Node* t1 = s1->fast_out(i);
    if (!in_bb(t1)) continue;
    for (DUIterator_Fast jmax, j = s2->fast_outs(jmax); j < jmax; j++) {
      Node* t2 = s2->fast_out(j);
      if (!in_bb(t2)) continue;
      if (!opnd_positions_match(s1, t1, s2, t2))
        continue;
      if (stmts_can_pack(t1, t2, align)) {
        int my_savings = est_savings(t1, t2);
        if (my_savings > savings) {
          savings = my_savings;
          u1 = t1;
          u2 = t2;
        }
      }
    }
  }
  if (savings >= 0) {
    Node_List* pair = new Node_List();
    pair->push(u1);
    pair->push(u2);
    _packset.append(pair);
    set_alignment(u1, u2, align);
    changed = true;
  }
  return changed;
}

//---------------------------opnd_positions_match-------------------------
// Is the use of d1 in u1 at the same operand position as d2 in u2?
bool SuperWord::opnd_positions_match(Node* d1, Node* u1, Node* d2, Node* u2) {
  uint ct = u1->req();
  if (ct != u2->req()) return false;
  uint i1 = 0;
  uint i2 = 0;
  do {
    for (i1++; i1 < ct; i1++) if (u1->in(i1) == d1) break;
    for (i2++; i2 < ct; i2++) if (u2->in(i2) == d2) break;
    if (i1 != i2) {
      if ((i1 == (3-i2)) && (u2->is_Add() || u2->is_Mul())) {
        // Further analysis relies on operands position matching.
        u2->swap_edges(i1, i2);
      } else {
        return false;
      }
    }
  } while (i1 < ct);
  return true;
}

//------------------------------est_savings---------------------------
// Estimate the savings from executing s1 and s2 as a pack
int SuperWord::est_savings(Node* s1, Node* s2) {
  int save_in = 2 - 1; // 2 operations per instruction in packed form

  // inputs
  for (uint i = 1; i < s1->req(); i++) {
    Node* x1 = s1->in(i);
    Node* x2 = s2->in(i);
    if (x1 != x2) {
      if (are_adjacent_refs(x1, x2)) {
        save_in += adjacent_profit(x1, x2);
      } else if (!in_packset(x1, x2)) {
        save_in -= pack_cost(2);
      } else {
        save_in += unpack_cost(2);
      }
    }
  }

  // uses of result
  uint ct = 0;
  int save_use = 0;
  for (DUIterator_Fast imax, i = s1->fast_outs(imax); i < imax; i++) {
    Node* s1_use = s1->fast_out(i);
    for (int j = 0; j < _packset.length(); j++) {
      Node_List* p = _packset.at(j);
      if (p->at(0) == s1_use) {
        for (DUIterator_Fast kmax, k = s2->fast_outs(kmax); k < kmax; k++) {
          Node* s2_use = s2->fast_out(k);
          if (p->at(p->size()-1) == s2_use) {
            ct++;
            if (are_adjacent_refs(s1_use, s2_use)) {
              save_use += adjacent_profit(s1_use, s2_use);
            }
          }
        }
      }
    }
  }

  if (ct < s1->outcnt()) save_use += unpack_cost(1);
  if (ct < s2->outcnt()) save_use += unpack_cost(1);

  return MAX2(save_in, save_use);
}

//------------------------------costs---------------------------
int SuperWord::adjacent_profit(Node* s1, Node* s2) { return 2; }
int SuperWord::pack_cost(int ct)   { return ct; }
int SuperWord::unpack_cost(int ct) { return ct; }

//------------------------------combine_packs---------------------------
// Combine packs A and B with A.last == B.first into A.first..,A.last,B.second,..B.last
void SuperWord::combine_packs() {
  bool changed = true;
  // Combine packs regardless max vector size.
  while (changed) {
    changed = false;
    for (int i = 0; i < _packset.length(); i++) {
      Node_List* p1 = _packset.at(i);
      if (p1 == NULL) continue;
      for (int j = 0; j < _packset.length(); j++) {
        Node_List* p2 = _packset.at(j);
        if (p2 == NULL) continue;
        if (i == j) continue;
        if (p1->at(p1->size()-1) == p2->at(0)) {
          for (uint k = 1; k < p2->size(); k++) {
            p1->push(p2->at(k));
          }
          _packset.at_put(j, NULL);
          changed = true;
        }
      }
    }
  }

  // Split packs which have size greater then max vector size.
  for (int i = 0; i < _packset.length(); i++) {
    Node_List* p1 = _packset.at(i);
    if (p1 != NULL) {
      BasicType bt = velt_basic_type(p1->at(0));
      uint max_vlen = Matcher::max_vector_size(bt); // Max elements in vector
      assert(is_power_of_2(max_vlen), "sanity");
      uint psize = p1->size();
      if (!is_power_of_2(psize)) {
        // Skip pack which can't be vector.
        // case1: for(...) { a[i] = i; }    elements values are different (i+x)
        // case2: for(...) { a[i] = b[i+1]; }  can't align both, load and store
        _packset.at_put(i, NULL);
        continue;
      }
      if (psize > max_vlen) {
        Node_List* pack = new Node_List();
        for (uint j = 0; j < psize; j++) {
          pack->push(p1->at(j));
          if (pack->size() >= max_vlen) {
            assert(is_power_of_2(pack->size()), "sanity");
            _packset.append(pack);
            pack = new Node_List();
          }
        }
        _packset.at_put(i, NULL);
      }
    }
  }

  // Compress list.
  for (int i = _packset.length() - 1; i >= 0; i--) {
    Node_List* p1 = _packset.at(i);
    if (p1 == NULL) {
      _packset.remove_at(i);
    }
  }

#ifndef PRODUCT
  if (TraceSuperWord) {
    tty->print_cr("\nAfter combine_packs");
    print_packset();
  }
#endif
}

//-----------------------------construct_my_pack_map--------------------------
// Construct the map from nodes to packs.  Only valid after the
// point where a node is only in one pack (after combine_packs).
void SuperWord::construct_my_pack_map() {
  Node_List* rslt = NULL;
  for (int i = 0; i < _packset.length(); i++) {
    Node_List* p = _packset.at(i);
    for (uint j = 0; j < p->size(); j++) {
      Node* s = p->at(j);
      assert(my_pack(s) == NULL, "only in one pack");
      set_my_pack(s, p);
    }
  }
}

//------------------------------filter_packs---------------------------
// Remove packs that are not implemented or not profitable.
void SuperWord::filter_packs() {

  // Remove packs that are not implemented
  for (int i = _packset.length() - 1; i >= 0; i--) {
    Node_List* pk = _packset.at(i);
    bool impl = implemented(pk);
    if (!impl) {
#ifndef PRODUCT
      if (TraceSuperWord && Verbose) {
        tty->print_cr("Unimplemented");
        pk->at(0)->dump();
      }
#endif
      remove_pack_at(i);
    }
  }

  // Remove packs that are not profitable
  bool changed;
  do {
    changed = false;
    for (int i = _packset.length() - 1; i >= 0; i--) {
      Node_List* pk = _packset.at(i);
      bool prof = profitable(pk);
      if (!prof) {
#ifndef PRODUCT
        if (TraceSuperWord && Verbose) {
          tty->print_cr("Unprofitable");
          pk->at(0)->dump();
        }
#endif
        remove_pack_at(i);
        changed = true;
      }
    }
  } while (changed);

#ifndef PRODUCT
  if (TraceSuperWord) {
    tty->print_cr("\nAfter filter_packs");
    print_packset();
    tty->cr();
  }
#endif
}

//------------------------------implemented---------------------------
// Can code be generated for pack p?
bool SuperWord::implemented(Node_List* p) {
  Node* p0 = p->at(0);
  return VectorNode::implemented(p0->Opcode(), p->size(), velt_basic_type(p0));
}

//------------------------------same_inputs--------------------------
// For pack p, are all idx operands the same?
static bool same_inputs(Node_List* p, int idx) {
  Node* p0 = p->at(0);
  uint vlen = p->size();
  Node* p0_def = p0->in(idx);
  for (uint i = 1; i < vlen; i++) {
    Node* pi = p->at(i);
    Node* pi_def = pi->in(idx);
    if (p0_def != pi_def)
      return false;
  }
  return true;
}

//------------------------------profitable---------------------------
// For pack p, are all operands and all uses (with in the block) vector?
bool SuperWord::profitable(Node_List* p) {
  Node* p0 = p->at(0);
  uint start, end;
  VectorNode::vector_operands(p0, &start, &end);

  // Return false if some inputs are not vectors or vectors with different
  // size or alignment.
  // Also, for now, return false if not scalar promotion case when inputs are
  // the same. Later, implement PackNode and allow differing, non-vector inputs
  // (maybe just the ones from outside the block.)
  for (uint i = start; i < end; i++) {
    if (!is_vector_use(p0, i))
      return false;
  }
  if (VectorNode::is_shift(p0)) {
    // For now, return false if shift count is vector or not scalar promotion
    // case (different shift counts) because it is not supported yet.
    Node* cnt = p0->in(2);
    Node_List* cnt_pk = my_pack(cnt);
    if (cnt_pk != NULL)
      return false;
    if (!same_inputs(p, 2))
      return false;
  }
  if (!p0->is_Store()) {
    // For now, return false if not all uses are vector.
    // Later, implement ExtractNode and allow non-vector uses (maybe
    // just the ones outside the block.)
    for (uint i = 0; i < p->size(); i++) {
      Node* def = p->at(i);
      for (DUIterator_Fast jmax, j = def->fast_outs(jmax); j < jmax; j++) {
        Node* use = def->fast_out(j);
        for (uint k = 0; k < use->req(); k++) {
          Node* n = use->in(k);
          if (def == n) {
            if (!is_vector_use(use, k)) {
              return false;
            }
          }
        }
      }
    }
  }
  return true;
}

//------------------------------schedule---------------------------
// Adjust the memory graph for the packed operations
void SuperWord::schedule() {

  // Co-locate in the memory graph the members of each memory pack
  for (int i = 0; i < _packset.length(); i++) {
    co_locate_pack(_packset.at(i));
  }
}

//-------------------------------remove_and_insert-------------------
// Remove "current" from its current position in the memory graph and insert
// it after the appropriate insertion point (lip or uip).
void SuperWord::remove_and_insert(MemNode *current, MemNode *prev, MemNode *lip,
                                  Node *uip, Unique_Node_List &sched_before) {
  Node* my_mem = current->in(MemNode::Memory);
  bool sched_up = sched_before.member(current);

  // remove current_store from its current position in the memmory graph
  for (DUIterator i = current->outs(); current->has_out(i); i++) {
    Node* use = current->out(i);
    if (use->is_Mem()) {
      assert(use->in(MemNode::Memory) == current, "must be");
      if (use == prev) { // connect prev to my_mem
          _igvn.replace_input_of(use, MemNode::Memory, my_mem);
          --i; //deleted this edge; rescan position
      } else if (sched_before.member(use)) {
        if (!sched_up) { // Will be moved together with current
          _igvn.replace_input_of(use, MemNode::Memory, uip);
          --i; //deleted this edge; rescan position
        }
      } else {
        if (sched_up) { // Will be moved together with current
          _igvn.replace_input_of(use, MemNode::Memory, lip);
          --i; //deleted this edge; rescan position
        }
      }
    }
  }

  Node *insert_pt =  sched_up ?  uip : lip;

  // all uses of insert_pt's memory state should use current's instead
  for (DUIterator i = insert_pt->outs(); insert_pt->has_out(i); i++) {
    Node* use = insert_pt->out(i);
    if (use->is_Mem()) {
      assert(use->in(MemNode::Memory) == insert_pt, "must be");
      _igvn.replace_input_of(use, MemNode::Memory, current);
      --i; //deleted this edge; rescan position
    } else if (!sched_up && use->is_Phi() && use->bottom_type() == Type::MEMORY) {
      uint pos; //lip (lower insert point) must be the last one in the memory slice
      for (pos=1; pos < use->req(); pos++) {
        if (use->in(pos) == insert_pt) break;
      }
      _igvn.replace_input_of(use, pos, current);
      --i;
    }
  }

  //connect current to insert_pt
  _igvn.replace_input_of(current, MemNode::Memory, insert_pt);
}

//------------------------------co_locate_pack----------------------------------
// To schedule a store pack, we need to move any sandwiched memory ops either before
// or after the pack, based upon dependence information:
// (1) If any store in the pack depends on the sandwiched memory op, the
//     sandwiched memory op must be scheduled BEFORE the pack;
// (2) If a sandwiched memory op depends on any store in the pack, the
//     sandwiched memory op must be scheduled AFTER the pack;
// (3) If a sandwiched memory op (say, memA) depends on another sandwiched
//     memory op (say memB), memB must be scheduled before memA. So, if memA is
//     scheduled before the pack, memB must also be scheduled before the pack;
// (4) If there is no dependence restriction for a sandwiched memory op, we simply
//     schedule this store AFTER the pack
// (5) We know there is no dependence cycle, so there in no other case;
// (6) Finally, all memory ops in another single pack should be moved in the same direction.
//
// To schedule a load pack, we use the memory state of either the first or the last load in
// the pack, based on the dependence constraint.
void SuperWord::co_locate_pack(Node_List* pk) {
  if (pk->at(0)->is_Store()) {
    MemNode* first     = executed_first(pk)->as_Mem();
    MemNode* last      = executed_last(pk)->as_Mem();
    Unique_Node_List schedule_before_pack;
    Unique_Node_List memops;

    MemNode* current   = last->in(MemNode::Memory)->as_Mem();
    MemNode* previous  = last;
    while (true) {
      assert(in_bb(current), "stay in block");
      memops.push(previous);
      for (DUIterator i = current->outs(); current->has_out(i); i++) {
        Node* use = current->out(i);
        if (use->is_Mem() && use != previous)
          memops.push(use);
      }
      if (current == first) break;
      previous = current;
      current  = current->in(MemNode::Memory)->as_Mem();
    }

    // determine which memory operations should be scheduled before the pack
    for (uint i = 1; i < memops.size(); i++) {
      Node *s1 = memops.at(i);
      if (!in_pack(s1, pk) && !schedule_before_pack.member(s1)) {
        for (uint j = 0; j< i; j++) {
          Node *s2 = memops.at(j);
          if (!independent(s1, s2)) {
            if (in_pack(s2, pk) || schedule_before_pack.member(s2)) {
              schedule_before_pack.push(s1); // s1 must be scheduled before
              Node_List* mem_pk = my_pack(s1);
              if (mem_pk != NULL) {
                for (uint ii = 0; ii < mem_pk->size(); ii++) {
                  Node* s = mem_pk->at(ii);  // follow partner
                  if (memops.member(s) && !schedule_before_pack.member(s))
                    schedule_before_pack.push(s);
                }
              }
              break;
            }
          }
        }
      }
    }

    Node*    upper_insert_pt = first->in(MemNode::Memory);
    // Following code moves loads connected to upper_insert_pt below aliased stores.
    // Collect such loads here and reconnect them back to upper_insert_pt later.
    memops.clear();
    for (DUIterator i = upper_insert_pt->outs(); upper_insert_pt->has_out(i); i++) {
      Node* use = upper_insert_pt->out(i);
      if (use->is_Mem() && !use->is_Store()) {
        memops.push(use);
      }
    }

    MemNode* lower_insert_pt = last;
    previous                 = last; //previous store in pk
    current                  = last->in(MemNode::Memory)->as_Mem();

    // start scheduling from "last" to "first"
    while (true) {
      assert(in_bb(current), "stay in block");
      assert(in_pack(previous, pk), "previous stays in pack");
      Node* my_mem = current->in(MemNode::Memory);

      if (in_pack(current, pk)) {
        // Forward users of my memory state (except "previous) to my input memory state
        for (DUIterator i = current->outs(); current->has_out(i); i++) {
          Node* use = current->out(i);
          if (use->is_Mem() && use != previous) {
            assert(use->in(MemNode::Memory) == current, "must be");
            if (schedule_before_pack.member(use)) {
              _igvn.replace_input_of(use, MemNode::Memory, upper_insert_pt);
            } else {
              _igvn.replace_input_of(use, MemNode::Memory, lower_insert_pt);
            }
            --i; // deleted this edge; rescan position
          }
        }
        previous = current;
      } else { // !in_pack(current, pk) ==> a sandwiched store
        remove_and_insert(current, previous, lower_insert_pt, upper_insert_pt, schedule_before_pack);
      }

      if (current == first) break;
      current = my_mem->as_Mem();
    } // end while

    // Reconnect loads back to upper_insert_pt.
    for (uint i = 0; i < memops.size(); i++) {
      Node *ld = memops.at(i);
      if (ld->in(MemNode::Memory) != upper_insert_pt) {
        _igvn.replace_input_of(ld, MemNode::Memory, upper_insert_pt);
      }
    }
  } else if (pk->at(0)->is_Load()) { //load
    // all loads in the pack should have the same memory state. By default,
    // we use the memory state of the last load. However, if any load could
    // not be moved down due to the dependence constraint, we use the memory
    // state of the first load.
    Node* last_mem  = executed_last(pk)->in(MemNode::Memory);
    Node* first_mem = executed_first(pk)->in(MemNode::Memory);
    bool schedule_last = true;
    for (uint i = 0; i < pk->size(); i++) {
      Node* ld = pk->at(i);
      for (Node* current = last_mem; current != ld->in(MemNode::Memory);
           current=current->in(MemNode::Memory)) {
        assert(current != first_mem, "corrupted memory graph");
        if(current->is_Mem() && !independent(current, ld)){
          schedule_last = false; // a later store depends on this load
          break;
        }
      }
    }

    Node* mem_input = schedule_last ? last_mem : first_mem;
    _igvn.hash_delete(mem_input);
    // Give each load the same memory state
    for (uint i = 0; i < pk->size(); i++) {
      LoadNode* ld = pk->at(i)->as_Load();
      _igvn.replace_input_of(ld, MemNode::Memory, mem_input);
    }
  }
}

//------------------------------output---------------------------
// Convert packs into vector node operations
void SuperWord::output() {
  if (_packset.length() == 0) return;

#ifndef PRODUCT
  if (TraceLoopOpts) {
    tty->print("SuperWord    ");
    lpt()->dump_head();
  }
#endif

  // MUST ENSURE main loop's initial value is properly aligned:
  //  (iv_initial_value + min_iv_offset) % vector_width_in_bytes() == 0

  align_initial_loop_index(align_to_ref());

  // Insert extract (unpack) operations for scalar uses
  for (int i = 0; i < _packset.length(); i++) {
    insert_extracts(_packset.at(i));
  }

  Compile* C = _phase->C;
  uint max_vlen_in_bytes = 0;
  for (int i = 0; i < _block.length(); i++) {
    Node* n = _block.at(i);
    Node_List* p = my_pack(n);
    if (p && n == executed_last(p)) {
      uint vlen = p->size();
      uint vlen_in_bytes = 0;
      Node* vn = NULL;
      Node* low_adr = p->at(0);
      Node* first   = executed_first(p);
      int   opc = n->Opcode();
      if (n->is_Load()) {
        Node* ctl = n->in(MemNode::Control);
        Node* mem = first->in(MemNode::Memory);
        SWPointer p1(n->as_Mem(), this);
        // Identify the memory dependency for the new loadVector node by
        // walking up through memory chain.
        // This is done to give flexibility to the new loadVector node so that
        // it can move above independent storeVector nodes.
        while (mem->is_StoreVector()) {
          SWPointer p2(mem->as_Mem(), this);
          int cmp = p1.cmp(p2);
          if (SWPointer::not_equal(cmp) || !SWPointer::comparable(cmp)) {
            mem = mem->in(MemNode::Memory);
          } else {
            break; // dependent memory
          }
        }
        Node* adr = low_adr->in(MemNode::Address);
        const TypePtr* atyp = n->adr_type();
        vn = LoadVectorNode::make(C, opc, ctl, mem, adr, atyp, vlen, velt_basic_type(n), control_dependency(p));
        vlen_in_bytes = vn->as_LoadVector()->memory_size();
      } else if (n->is_Store()) {
        // Promote value to be stored to vector
        Node* val = vector_opd(p, MemNode::ValueIn);
        Node* ctl = n->in(MemNode::Control);
        Node* mem = first->in(MemNode::Memory);
        Node* adr = low_adr->in(MemNode::Address);
        const TypePtr* atyp = n->adr_type();
        vn = StoreVectorNode::make(C, opc, ctl, mem, adr, atyp, val, vlen);
        vlen_in_bytes = vn->as_StoreVector()->memory_size();
      } else if (n->req() == 3) {
        // Promote operands to vector
        Node* in1 = vector_opd(p, 1);
        Node* in2 = vector_opd(p, 2);
        if (VectorNode::is_invariant_vector(in1) && (n->is_Add() || n->is_Mul())) {
          // Move invariant vector input into second position to avoid register spilling.
          Node* tmp = in1;
          in1 = in2;
          in2 = tmp;
        }
        vn = VectorNode::make(C, opc, in1, in2, vlen, velt_basic_type(n));
        vlen_in_bytes = vn->as_Vector()->length_in_bytes();
      } else {
        ShouldNotReachHere();
      }
      assert(vn != NULL, "sanity");
      _igvn.register_new_node_with_optimizer(vn);
      _phase->set_ctrl(vn, _phase->get_ctrl(p->at(0)));
      for (uint j = 0; j < p->size(); j++) {
        Node* pm = p->at(j);
        _igvn.replace_node(pm, vn);
      }
      _igvn._worklist.push(vn);

      if (vlen_in_bytes > max_vlen_in_bytes) {
        max_vlen_in_bytes = vlen_in_bytes;
      }
#ifdef ASSERT
      if (TraceNewVectors) {
        tty->print("new Vector node: ");
        vn->dump();
      }
#endif
    }
  }
  C->set_max_vector_size(max_vlen_in_bytes);
}

//------------------------------vector_opd---------------------------
// Create a vector operand for the nodes in pack p for operand: in(opd_idx)
Node* SuperWord::vector_opd(Node_List* p, int opd_idx) {
  Node* p0 = p->at(0);
  uint vlen = p->size();
  Node* opd = p0->in(opd_idx);

  if (same_inputs(p, opd_idx)) {
    if (opd->is_Vector() || opd->is_LoadVector()) {
      assert(((opd_idx != 2) || !VectorNode::is_shift(p0)), "shift's count can't be vector");
      return opd; // input is matching vector
    }
    if ((opd_idx == 2) && VectorNode::is_shift(p0)) {
      Compile* C = _phase->C;
      Node* cnt = opd;
      // Vector instructions do not mask shift count, do it here.
      juint mask = (p0->bottom_type() == TypeInt::INT) ? (BitsPerInt - 1) : (BitsPerLong - 1);
      const TypeInt* t = opd->find_int_type();
      if (t != NULL && t->is_con()) {
        juint shift = t->get_con();
        if (shift > mask) { // Unsigned cmp
          cnt = ConNode::make(C, TypeInt::make(shift & mask));
        }
      } else {
        if (t == NULL || t->_lo < 0 || t->_hi > (int)mask) {
          cnt = ConNode::make(C, TypeInt::make(mask));
          _igvn.register_new_node_with_optimizer(cnt);
          cnt = new (C) AndINode(opd, cnt);
          _igvn.register_new_node_with_optimizer(cnt);
          _phase->set_ctrl(cnt, _phase->get_ctrl(opd));
        }
        assert(opd->bottom_type()->isa_int(), "int type only");
        // Move non constant shift count into vector register.
        cnt = VectorNode::shift_count(C, p0, cnt, vlen, velt_basic_type(p0));
      }
      if (cnt != opd) {
        _igvn.register_new_node_with_optimizer(cnt);
        _phase->set_ctrl(cnt, _phase->get_ctrl(opd));
      }
      return cnt;
    }
    assert(!opd->is_StoreVector(), "such vector is not expected here");
    // Convert scalar input to vector with the same number of elements as
    // p0's vector. Use p0's type because size of operand's container in
    // vector should match p0's size regardless operand's size.
    const Type* p0_t = velt_type(p0);
    VectorNode* vn = VectorNode::scalar2vector(_phase->C, opd, vlen, p0_t);

    _igvn.register_new_node_with_optimizer(vn);
    _phase->set_ctrl(vn, _phase->get_ctrl(opd));
#ifdef ASSERT
    if (TraceNewVectors) {
      tty->print("new Vector node: ");
      vn->dump();
    }
#endif
    return vn;
  }

  // Insert pack operation
  BasicType bt = velt_basic_type(p0);
  PackNode* pk = PackNode::make(_phase->C, opd, vlen, bt);
  DEBUG_ONLY( const BasicType opd_bt = opd->bottom_type()->basic_type(); )

  for (uint i = 1; i < vlen; i++) {
    Node* pi = p->at(i);
    Node* in = pi->in(opd_idx);
    assert(my_pack(in) == NULL, "Should already have been unpacked");
    assert(opd_bt == in->bottom_type()->basic_type(), "all same type");
    pk->add_opd(in);
  }
  _igvn.register_new_node_with_optimizer(pk);
  _phase->set_ctrl(pk, _phase->get_ctrl(opd));
#ifdef ASSERT
  if (TraceNewVectors) {
    tty->print("new Vector node: ");
    pk->dump();
  }
#endif
  return pk;
}

//------------------------------insert_extracts---------------------------
// If a use of pack p is not a vector use, then replace the
// use with an extract operation.
void SuperWord::insert_extracts(Node_List* p) {
  if (p->at(0)->is_Store()) return;
  assert(_n_idx_list.is_empty(), "empty (node,index) list");

  // Inspect each use of each pack member.  For each use that is
  // not a vector use, replace the use with an extract operation.

  for (uint i = 0; i < p->size(); i++) {
    Node* def = p->at(i);
    for (DUIterator_Fast jmax, j = def->fast_outs(jmax); j < jmax; j++) {
      Node* use = def->fast_out(j);
      for (uint k = 0; k < use->req(); k++) {
        Node* n = use->in(k);
        if (def == n) {
          if (!is_vector_use(use, k)) {
            _n_idx_list.push(use, k);
          }
        }
      }
    }
  }

  while (_n_idx_list.is_nonempty()) {
    Node* use = _n_idx_list.node();
    int   idx = _n_idx_list.index();
    _n_idx_list.pop();
    Node* def = use->in(idx);

    // Insert extract operation
    _igvn.hash_delete(def);
    int def_pos = alignment(def) / data_size(def);

    Node* ex = ExtractNode::make(_phase->C, def, def_pos, velt_basic_type(def));
    _igvn.register_new_node_with_optimizer(ex);
    _phase->set_ctrl(ex, _phase->get_ctrl(def));
    _igvn.replace_input_of(use, idx, ex);
    _igvn._worklist.push(def);

    bb_insert_after(ex, bb_idx(def));
    set_velt_type(ex, velt_type(def));
  }
}

//------------------------------is_vector_use---------------------------
// Is use->in(u_idx) a vector use?
bool SuperWord::is_vector_use(Node* use, int u_idx) {
  Node_List* u_pk = my_pack(use);
  if (u_pk == NULL) return false;
  Node* def = use->in(u_idx);
  Node_List* d_pk = my_pack(def);
  if (d_pk == NULL) {
    // check for scalar promotion
    Node* n = u_pk->at(0)->in(u_idx);
    for (uint i = 1; i < u_pk->size(); i++) {
      if (u_pk->at(i)->in(u_idx) != n) return false;
    }
    return true;
  }
  if (u_pk->size() != d_pk->size())
    return false;
  for (uint i = 0; i < u_pk->size(); i++) {
    Node* ui = u_pk->at(i);
    Node* di = d_pk->at(i);
    if (ui->in(u_idx) != di || alignment(ui) != alignment(di))
      return false;
  }
  return true;
}

//------------------------------construct_bb---------------------------
// Construct reverse postorder list of block members
bool SuperWord::construct_bb() {
  Node* entry = bb();

  assert(_stk.length() == 0,            "stk is empty");
  assert(_block.length() == 0,          "block is empty");
  assert(_data_entry.length() == 0,     "data_entry is empty");
  assert(_mem_slice_head.length() == 0, "mem_slice_head is empty");
  assert(_mem_slice_tail.length() == 0, "mem_slice_tail is empty");

  // Find non-control nodes with no inputs from within block,
  // create a temporary map from node _idx to bb_idx for use
  // by the visited and post_visited sets,
  // and count number of nodes in block.
  int bb_ct = 0;
  for (uint i = 0; i < lpt()->_body.size(); i++ ) {
    Node *n = lpt()->_body.at(i);
    set_bb_idx(n, i); // Create a temporary map
    if (in_bb(n)) {
      if (n->is_LoadStore() || n->is_MergeMem() ||
          (n->is_Proj() && !n->as_Proj()->is_CFG())) {
        // Bailout if the loop has LoadStore, MergeMem or data Proj
        // nodes. Superword optimization does not work with them.
        return false;
      }
      bb_ct++;
      if (!n->is_CFG()) {
        bool found = false;
        for (uint j = 0; j < n->req(); j++) {
          Node* def = n->in(j);
          if (def && in_bb(def)) {
            found = true;
            break;
          }
        }
        if (!found) {
          assert(n != entry, "can't be entry");
          _data_entry.push(n);
        }
      }
    }
  }

  // Find memory slices (head and tail)
  for (DUIterator_Fast imax, i = lp()->fast_outs(imax); i < imax; i++) {
    Node *n = lp()->fast_out(i);
    if (in_bb(n) && (n->is_Phi() && n->bottom_type() == Type::MEMORY)) {
      Node* n_tail  = n->in(LoopNode::LoopBackControl);
      if (n_tail != n->in(LoopNode::EntryControl)) {
        if (!n_tail->is_Mem()) {
          assert(n_tail->is_Mem(), err_msg_res("unexpected node for memory slice: %s", n_tail->Name()));
          return false; // Bailout
        }
        _mem_slice_head.push(n);
        _mem_slice_tail.push(n_tail);
      }
    }
  }

  // Create an RPO list of nodes in block

  visited_clear();
  post_visited_clear();

  // Push all non-control nodes with no inputs from within block, then control entry
  for (int j = 0; j < _data_entry.length(); j++) {
    Node* n = _data_entry.at(j);
    visited_set(n);
    _stk.push(n);
  }
  visited_set(entry);
  _stk.push(entry);

  // Do a depth first walk over out edges
  int rpo_idx = bb_ct - 1;
  int size;
  while ((size = _stk.length()) > 0) {
    Node* n = _stk.top(); // Leave node on stack
    if (!visited_test_set(n)) {
      // forward arc in graph
    } else if (!post_visited_test(n)) {
      // cross or back arc
      for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
        Node *use = n->fast_out(i);
        if (in_bb(use) && !visited_test(use) &&
            // Don't go around backedge
            (!use->is_Phi() || n == entry)) {
          _stk.push(use);
        }
      }
      if (_stk.length() == size) {
        // There were no additional uses, post visit node now
        _stk.pop(); // Remove node from stack
        assert(rpo_idx >= 0, "");
        _block.at_put_grow(rpo_idx, n);
        rpo_idx--;
        post_visited_set(n);
        assert(rpo_idx >= 0 || _stk.is_empty(), "");
      }
    } else {
      _stk.pop(); // Remove post-visited node from stack
    }
  }

  // Create real map of block indices for nodes
  for (int j = 0; j < _block.length(); j++) {
    Node* n = _block.at(j);
    set_bb_idx(n, j);
  }

  initialize_bb(); // Ensure extra info is allocated.

#ifndef PRODUCT
  if (TraceSuperWord) {
    print_bb();
    tty->print_cr("\ndata entry nodes: %s", _data_entry.length() > 0 ? "" : "NONE");
    for (int m = 0; m < _data_entry.length(); m++) {
      tty->print("%3d ", m);
      _data_entry.at(m)->dump();
    }
    tty->print_cr("\nmemory slices: %s", _mem_slice_head.length() > 0 ? "" : "NONE");
    for (int m = 0; m < _mem_slice_head.length(); m++) {
      tty->print("%3d ", m); _mem_slice_head.at(m)->dump();
      tty->print("    ");    _mem_slice_tail.at(m)->dump();
    }
  }
#endif
  assert(rpo_idx == -1 && bb_ct == _block.length(), "all block members found");
  return (_mem_slice_head.length() > 0) || (_data_entry.length() > 0);
}

//------------------------------initialize_bb---------------------------
// Initialize per node info
void SuperWord::initialize_bb() {
  Node* last = _block.at(_block.length() - 1);
  grow_node_info(bb_idx(last));
}

//------------------------------bb_insert_after---------------------------
// Insert n into block after pos
void SuperWord::bb_insert_after(Node* n, int pos) {
  int n_pos = pos + 1;
  // Make room
  for (int i = _block.length() - 1; i >= n_pos; i--) {
    _block.at_put_grow(i+1, _block.at(i));
  }
  for (int j = _node_info.length() - 1; j >= n_pos; j--) {
    _node_info.at_put_grow(j+1, _node_info.at(j));
  }
  // Set value
  _block.at_put_grow(n_pos, n);
  _node_info.at_put_grow(n_pos, SWNodeInfo::initial);
  // Adjust map from node->_idx to _block index
  for (int i = n_pos; i < _block.length(); i++) {
    set_bb_idx(_block.at(i), i);
  }
}

//------------------------------compute_max_depth---------------------------
// Compute max depth for expressions from beginning of block
// Use to prune search paths during test for independence.
void SuperWord::compute_max_depth() {
  int ct = 0;
  bool again;
  do {
    again = false;
    for (int i = 0; i < _block.length(); i++) {
      Node* n = _block.at(i);
      if (!n->is_Phi()) {
        int d_orig = depth(n);
        int d_in   = 0;
        for (DepPreds preds(n, _dg); !preds.done(); preds.next()) {
          Node* pred = preds.current();
          if (in_bb(pred)) {
            d_in = MAX2(d_in, depth(pred));
          }
        }
        if (d_in + 1 != d_orig) {
          set_depth(n, d_in + 1);
          again = true;
        }
      }
    }
    ct++;
  } while (again);
#ifndef PRODUCT
  if (TraceSuperWord && Verbose)
    tty->print_cr("compute_max_depth iterated: %d times", ct);
#endif
}

//-------------------------compute_vector_element_type-----------------------
// Compute necessary vector element type for expressions
// This propagates backwards a narrower integer type when the
// upper bits of the value are not needed.
// Example:  char a,b,c;  a = b + c;
// Normally the type of the add is integer, but for packed character
// operations the type of the add needs to be char.
void SuperWord::compute_vector_element_type() {
#ifndef PRODUCT
  if (TraceSuperWord && Verbose)
    tty->print_cr("\ncompute_velt_type:");
#endif

  // Initial type
  for (int i = 0; i < _block.length(); i++) {
    Node* n = _block.at(i);
    set_velt_type(n, container_type(n));
  }

  // Propagate integer narrowed type backwards through operations
  // that don't depend on higher order bits
  for (int i = _block.length() - 1; i >= 0; i--) {
    Node* n = _block.at(i);
    // Only integer types need be examined
    const Type* vtn = velt_type(n);
    if (vtn->basic_type() == T_INT) {
      uint start, end;
      VectorNode::vector_operands(n, &start, &end);

      for (uint j = start; j < end; j++) {
        Node* in  = n->in(j);
        // Don't propagate through a memory
        if (!in->is_Mem() && in_bb(in) && velt_type(in)->basic_type() == T_INT &&
            data_size(n) < data_size(in)) {
          bool same_type = true;
          for (DUIterator_Fast kmax, k = in->fast_outs(kmax); k < kmax; k++) {
            Node *use = in->fast_out(k);
            if (!in_bb(use) || !same_velt_type(use, n)) {
              same_type = false;
              break;
            }
          }
          if (same_type) {
            // For right shifts of small integer types (bool, byte, char, short)
            // we need precise information about sign-ness. Only Load nodes have
            // this information because Store nodes are the same for signed and
            // unsigned values. And any arithmetic operation after a load may
            // expand a value to signed Int so such right shifts can't be used
            // because vector elements do not have upper bits of Int.
            const Type* vt = vtn;
            if (VectorNode::is_shift(in)) {
              Node* load = in->in(1);
              if (load->is_Load() && in_bb(load) && (velt_type(load)->basic_type() == T_INT)) {
                vt = velt_type(load);
              } else if (in->Opcode() != Op_LShiftI) {
                // Widen type to Int to avoid creation of right shift vector
                // (align + data_size(s1) check in stmts_can_pack() will fail).
                // Note, left shifts work regardless type.
                vt = TypeInt::INT;
              }
            }
            set_velt_type(in, vt);
          }
        }
      }
    }
  }
#ifndef PRODUCT
  if (TraceSuperWord && Verbose) {
    for (int i = 0; i < _block.length(); i++) {
      Node* n = _block.at(i);
      velt_type(n)->dump();
      tty->print("\t");
      n->dump();
    }
  }
#endif
}

//------------------------------memory_alignment---------------------------
// Alignment within a vector memory reference
int SuperWord::memory_alignment(MemNode* s, int iv_adjust) {
  SWPointer p(s, this);
  if (!p.valid()) {
    return bottom_align;
  }
  int vw = vector_width_in_bytes(s);
  if (vw < 2) {
    return bottom_align; // No vectors for this type
  }
  int offset  = p.offset_in_bytes();
  offset     += iv_adjust*p.memory_size();
  int off_rem = offset % vw;
  int off_mod = off_rem >= 0 ? off_rem : off_rem + vw;
  return off_mod;
}

//---------------------------container_type---------------------------
// Smallest type containing range of values
const Type* SuperWord::container_type(Node* n) {
  if (n->is_Mem()) {
    BasicType bt = n->as_Mem()->memory_type();
    if (n->is_Store() && (bt == T_CHAR)) {
      // Use T_SHORT type instead of T_CHAR for stored values because any
      // preceding arithmetic operation extends values to signed Int.
      bt = T_SHORT;
    }
    if (n->Opcode() == Op_LoadUB) {
      // Adjust type for unsigned byte loads, it is important for right shifts.
      // T_BOOLEAN is used because there is no basic type representing type
      // TypeInt::UBYTE. Use of T_BOOLEAN for vectors is fine because only
      // size (one byte) and sign is important.
      bt = T_BOOLEAN;
    }
    return Type::get_const_basic_type(bt);
  }
  const Type* t = _igvn.type(n);
  if (t->basic_type() == T_INT) {
    // A narrow type of arithmetic operations will be determined by
    // propagating the type of memory operations.
    return TypeInt::INT;
  }
  return t;
}

bool SuperWord::same_velt_type(Node* n1, Node* n2) {
  const Type* vt1 = velt_type(n1);
  const Type* vt2 = velt_type(n2);
  if (vt1->basic_type() == T_INT && vt2->basic_type() == T_INT) {
    // Compare vectors element sizes for integer types.
    return data_size(n1) == data_size(n2);
  }
  return vt1 == vt2;
}

//------------------------------in_packset---------------------------
// Are s1 and s2 in a pack pair and ordered as s1,s2?
bool SuperWord::in_packset(Node* s1, Node* s2) {
  for (int i = 0; i < _packset.length(); i++) {
    Node_List* p = _packset.at(i);
    assert(p->size() == 2, "must be");
    if (p->at(0) == s1 && p->at(p->size()-1) == s2) {
      return true;
    }
  }
  return false;
}

//------------------------------in_pack---------------------------
// Is s in pack p?
Node_List* SuperWord::in_pack(Node* s, Node_List* p) {
  for (uint i = 0; i < p->size(); i++) {
    if (p->at(i) == s) {
      return p;
    }
  }
  return NULL;
}

//------------------------------remove_pack_at---------------------------
// Remove the pack at position pos in the packset
void SuperWord::remove_pack_at(int pos) {
  Node_List* p = _packset.at(pos);
  for (uint i = 0; i < p->size(); i++) {
    Node* s = p->at(i);
    set_my_pack(s, NULL);
  }
  _packset.remove_at(pos);
}

//------------------------------executed_first---------------------------
// Return the node executed first in pack p.  Uses the RPO block list
// to determine order.
Node* SuperWord::executed_first(Node_List* p) {
  Node* n = p->at(0);
  int n_rpo = bb_idx(n);
  for (uint i = 1; i < p->size(); i++) {
    Node* s = p->at(i);
    int s_rpo = bb_idx(s);
    if (s_rpo < n_rpo) {
      n = s;
      n_rpo = s_rpo;
    }
  }
  return n;
}

//------------------------------executed_last---------------------------
// Return the node executed last in pack p.
Node* SuperWord::executed_last(Node_List* p) {
  Node* n = p->at(0);
  int n_rpo = bb_idx(n);
  for (uint i = 1; i < p->size(); i++) {
    Node* s = p->at(i);
    int s_rpo = bb_idx(s);
    if (s_rpo > n_rpo) {
      n = s;
      n_rpo = s_rpo;
    }
  }
  return n;
}

LoadNode::ControlDependency SuperWord::control_dependency(Node_List* p) {
  LoadNode::ControlDependency dep = LoadNode::DependsOnlyOnTest;
  for (uint i = 0; i < p->size(); i++) {
    Node* n = p->at(i);
    assert(n->is_Load(), "only meaningful for loads");
    if (!n->depends_only_on_test()) {
      dep = LoadNode::Pinned;
    }
  }
  return dep;
}


//----------------------------align_initial_loop_index---------------------------
// Adjust pre-loop limit so that in main loop, a load/store reference
// to align_to_ref will be a position zero in the vector.
//   (iv + k) mod vector_align == 0
void SuperWord::align_initial_loop_index(MemNode* align_to_ref) {
  CountedLoopNode *main_head = lp()->as_CountedLoop();
  assert(main_head->is_main_loop(), "");
  CountedLoopEndNode* pre_end = get_pre_loop_end(main_head);
  assert(pre_end != NULL, "we must have a correct pre-loop");
  Node *pre_opaq1 = pre_end->limit();
  assert(pre_opaq1->Opcode() == Op_Opaque1, "");
  Opaque1Node *pre_opaq = (Opaque1Node*)pre_opaq1;
  Node *lim0 = pre_opaq->in(1);

  // Where we put new limit calculations
  Node *pre_ctrl = pre_end->loopnode()->in(LoopNode::EntryControl);

  // Ensure the original loop limit is available from the
  // pre-loop Opaque1 node.
  Node *orig_limit = pre_opaq->original_loop_limit();
  assert(orig_limit != NULL && _igvn.type(orig_limit) != Type::TOP, "");

  SWPointer align_to_ref_p(align_to_ref, this);
  assert(align_to_ref_p.valid(), "sanity");

  // Given:
  //     lim0 == original pre loop limit
  //     V == v_align (power of 2)
  //     invar == extra invariant piece of the address expression
  //     e == offset [ +/- invar ]
  //
  // When reassociating expressions involving '%' the basic rules are:
  //     (a - b) % k == 0   =>  a % k == b % k
  // and:
  //     (a + b) % k == 0   =>  a % k == (k - b) % k
  //
  // For stride > 0 && scale > 0,
  //   Derive the new pre-loop limit "lim" such that the two constraints:
  //     (1) lim = lim0 + N           (where N is some positive integer < V)
  //     (2) (e + lim) % V == 0
  //   are true.
  //
  //   Substituting (1) into (2),
  //     (e + lim0 + N) % V == 0
  //   solve for N:
  //     N = (V - (e + lim0)) % V
  //   substitute back into (1), so that new limit
  //     lim = lim0 + (V - (e + lim0)) % V
  //
  // For stride > 0 && scale < 0
  //   Constraints:
  //     lim = lim0 + N
  //     (e - lim) % V == 0
  //   Solving for lim:
  //     (e - lim0 - N) % V == 0
  //     N = (e - lim0) % V
  //     lim = lim0 + (e - lim0) % V
  //
  // For stride < 0 && scale > 0
  //   Constraints:
  //     lim = lim0 - N
  //     (e + lim) % V == 0
  //   Solving for lim:
  //     (e + lim0 - N) % V == 0
  //     N = (e + lim0) % V
  //     lim = lim0 - (e + lim0) % V
  //
  // For stride < 0 && scale < 0
  //   Constraints:
  //     lim = lim0 - N
  //     (e - lim) % V == 0
  //   Solving for lim:
  //     (e - lim0 + N) % V == 0
  //     N = (V - (e - lim0)) % V
  //     lim = lim0 - (V - (e - lim0)) % V

  int vw = vector_width_in_bytes(align_to_ref);
  int stride   = iv_stride();
  int scale    = align_to_ref_p.scale_in_bytes();
  int elt_size = align_to_ref_p.memory_size();
  int v_align  = vw / elt_size;
  assert(v_align > 1, "sanity");
  int offset   = align_to_ref_p.offset_in_bytes() / elt_size;
  Node *offsn  = _igvn.intcon(offset);

  Node *e = offsn;
  if (align_to_ref_p.invar() != NULL) {
    // incorporate any extra invariant piece producing (offset +/- invar) >>> log2(elt)
    Node* log2_elt = _igvn.intcon(exact_log2(elt_size));
    Node* aref     = new (_phase->C) URShiftINode(align_to_ref_p.invar(), log2_elt);
    _igvn.register_new_node_with_optimizer(aref);
    _phase->set_ctrl(aref, pre_ctrl);
    if (align_to_ref_p.negate_invar()) {
      e = new (_phase->C) SubINode(e, aref);
    } else {
      e = new (_phase->C) AddINode(e, aref);
    }
    _igvn.register_new_node_with_optimizer(e);
    _phase->set_ctrl(e, pre_ctrl);
  }
  if (vw > ObjectAlignmentInBytes) {
    // incorporate base e +/- base && Mask >>> log2(elt)
    Node* xbase = new(_phase->C) CastP2XNode(NULL, align_to_ref_p.base());
    _igvn.register_new_node_with_optimizer(xbase);
#ifdef _LP64
    xbase  = new (_phase->C) ConvL2INode(xbase);
    _igvn.register_new_node_with_optimizer(xbase);
#endif
    Node* mask = _igvn.intcon(vw-1);
    Node* masked_xbase  = new (_phase->C) AndINode(xbase, mask);
    _igvn.register_new_node_with_optimizer(masked_xbase);
    Node* log2_elt = _igvn.intcon(exact_log2(elt_size));
    Node* bref     = new (_phase->C) URShiftINode(masked_xbase, log2_elt);
    _igvn.register_new_node_with_optimizer(bref);
    _phase->set_ctrl(bref, pre_ctrl);
    e = new (_phase->C) AddINode(e, bref);
    _igvn.register_new_node_with_optimizer(e);
    _phase->set_ctrl(e, pre_ctrl);
  }

  // compute e +/- lim0
  if (scale < 0) {
    e = new (_phase->C) SubINode(e, lim0);
  } else {
    e = new (_phase->C) AddINode(e, lim0);
  }
  _igvn.register_new_node_with_optimizer(e);
  _phase->set_ctrl(e, pre_ctrl);

  if (stride * scale > 0) {
    // compute V - (e +/- lim0)
    Node* va  = _igvn.intcon(v_align);
    e = new (_phase->C) SubINode(va, e);
    _igvn.register_new_node_with_optimizer(e);
    _phase->set_ctrl(e, pre_ctrl);
  }
  // compute N = (exp) % V
  Node* va_msk = _igvn.intcon(v_align - 1);
  Node* N = new (_phase->C) AndINode(e, va_msk);
  _igvn.register_new_node_with_optimizer(N);
  _phase->set_ctrl(N, pre_ctrl);

  //   substitute back into (1), so that new limit
  //     lim = lim0 + N
  Node* lim;
  if (stride < 0) {
    lim = new (_phase->C) SubINode(lim0, N);
  } else {
    lim = new (_phase->C) AddINode(lim0, N);
  }
  _igvn.register_new_node_with_optimizer(lim);
  _phase->set_ctrl(lim, pre_ctrl);
  Node* constrained =
    (stride > 0) ? (Node*) new (_phase->C) MinINode(lim, orig_limit)
                 : (Node*) new (_phase->C) MaxINode(lim, orig_limit);
  _igvn.register_new_node_with_optimizer(constrained);
  _phase->set_ctrl(constrained, pre_ctrl);
  _igvn.hash_delete(pre_opaq);
  pre_opaq->set_req(1, constrained);
}

//----------------------------get_pre_loop_end---------------------------
// Find pre loop end from main loop.  Returns null if none.
CountedLoopEndNode* SuperWord::get_pre_loop_end(CountedLoopNode* cl) {
  // The loop cannot be optimized if the graph shape at
  // the loop entry is inappropriate.
  if (!PhaseIdealLoop::is_canonical_main_loop_entry(cl)) {
    return NULL;
  }

  Node* p_f = cl->in(LoopNode::EntryControl)->in(0)->in(0);
  if (!p_f->is_IfFalse()) return NULL;
  if (!p_f->in(0)->is_CountedLoopEnd()) return NULL;
  CountedLoopEndNode* pre_end = p_f->in(0)->as_CountedLoopEnd();
  CountedLoopNode* loop_node = pre_end->loopnode();
  if (loop_node == NULL || !loop_node->is_pre_loop()) return NULL;
  return pre_end;
}


//------------------------------init---------------------------
void SuperWord::init() {
  _dg.init();
  _packset.clear();
  _disjoint_ptrs.clear();
  _block.clear();
  _data_entry.clear();
  _mem_slice_head.clear();
  _mem_slice_tail.clear();
  _node_info.clear();
  _align_to_ref = NULL;
  _lpt = NULL;
  _lp = NULL;
  _bb = NULL;
  _iv = NULL;
}

//------------------------------print_packset---------------------------
void SuperWord::print_packset() {
#ifndef PRODUCT
  tty->print_cr("packset");
  for (int i = 0; i < _packset.length(); i++) {
    tty->print_cr("Pack: %d", i);
    Node_List* p = _packset.at(i);
    print_pack(p);
  }
#endif
}

//------------------------------print_pack---------------------------
void SuperWord::print_pack(Node_List* p) {
  for (uint i = 0; i < p->size(); i++) {
    print_stmt(p->at(i));
  }
}

//------------------------------print_bb---------------------------
void SuperWord::print_bb() {
#ifndef PRODUCT
  tty->print_cr("\nBlock");
  for (int i = 0; i < _block.length(); i++) {
    Node* n = _block.at(i);
    tty->print("%d ", i);
    if (n) {
      n->dump();
    }
  }
#endif
}

//------------------------------print_stmt---------------------------
void SuperWord::print_stmt(Node* s) {
#ifndef PRODUCT
  tty->print(" align: %d \t", alignment(s));
  s->dump();
#endif
}

//------------------------------blank---------------------------
char* SuperWord::blank(uint depth) {
  static char blanks[101];
  assert(depth < 101, "too deep");
  for (uint i = 0; i < depth; i++) blanks[i] = ' ';
  blanks[depth] = '\0';
  return blanks;
}


//==============================SWPointer===========================

//----------------------------SWPointer------------------------
SWPointer::SWPointer(MemNode* mem, SuperWord* slp) :
  _mem(mem), _slp(slp),  _base(NULL),  _adr(NULL),
  _scale(0), _offset(0), _invar(NULL), _negate_invar(false) {

  Node* adr = mem->in(MemNode::Address);
  if (!adr->is_AddP()) {
    assert(!valid(), "too complex");
    return;
  }
  // Match AddP(base, AddP(ptr, k*iv [+ invariant]), constant)
  Node* base = adr->in(AddPNode::Base);
  // The base address should be loop invariant
  if (!invariant(base)) {
    assert(!valid(), "base address is loop variant");
    return;
  }
  //unsafe reference could not be aligned appropriately without runtime checking
  if (base == NULL || base->bottom_type() == Type::TOP) {
    assert(!valid(), "unsafe access");
    return;
  }
  for (int i = 0; i < 3; i++) {
    if (!scaled_iv_plus_offset(adr->in(AddPNode::Offset))) {
      assert(!valid(), "too complex");
      return;
    }
    adr = adr->in(AddPNode::Address);
    if (base == adr || !adr->is_AddP()) {
      break; // stop looking at addp's
    }
  }
  _base = base;
  _adr  = adr;
  assert(valid(), "Usable");
}

// Following is used to create a temporary object during
// the pattern match of an address expression.
SWPointer::SWPointer(SWPointer* p) :
  _mem(p->_mem), _slp(p->_slp),  _base(NULL),  _adr(NULL),
  _scale(0), _offset(0), _invar(NULL), _negate_invar(false) {}

//------------------------scaled_iv_plus_offset--------------------
// Match: k*iv + offset
// where: k is a constant that maybe zero, and
//        offset is (k2 [+/- invariant]) where k2 maybe zero and invariant is optional
bool SWPointer::scaled_iv_plus_offset(Node* n) {
  if (scaled_iv(n)) {
    return true;
  }
  if (offset_plus_k(n)) {
    return true;
  }
  int opc = n->Opcode();
  if (opc == Op_AddI) {
    if (scaled_iv(n->in(1)) && offset_plus_k(n->in(2))) {
      return true;
    }
    if (scaled_iv(n->in(2)) && offset_plus_k(n->in(1))) {
      return true;
    }
  } else if (opc == Op_SubI) {
    if (scaled_iv(n->in(1)) && offset_plus_k(n->in(2), true)) {
      return true;
    }
    if (scaled_iv(n->in(2)) && offset_plus_k(n->in(1))) {
      _scale *= -1;
      return true;
    }
  }
  return false;
}

//----------------------------scaled_iv------------------------
// Match: k*iv where k is a constant that's not zero
bool SWPointer::scaled_iv(Node* n) {
  if (_scale != 0) {
    return false;  // already found a scale
  }
  if (n == iv()) {
    _scale = 1;
    return true;
  }
  int opc = n->Opcode();
  if (opc == Op_MulI) {
    if (n->in(1) == iv() && n->in(2)->is_Con()) {
      _scale = n->in(2)->get_int();
      return true;
    } else if (n->in(2) == iv() && n->in(1)->is_Con()) {
      _scale = n->in(1)->get_int();
      return true;
    }
  } else if (opc == Op_LShiftI) {
    if (n->in(1) == iv() && n->in(2)->is_Con()) {
      _scale = 1 << n->in(2)->get_int();
      return true;
    }
  } else if (opc == Op_ConvI2L) {
    if (n->in(1)->Opcode() == Op_CastII &&
        n->in(1)->as_CastII()->has_range_check()) {
      // Skip range check dependent CastII nodes
      n = n->in(1);
    }
    if (scaled_iv_plus_offset(n->in(1))) {
      return true;
    }
  } else if (opc == Op_LShiftL) {
    if (!has_iv() && _invar == NULL) {
      // Need to preserve the current _offset value, so
      // create a temporary object for this expression subtree.
      // Hacky, so should re-engineer the address pattern match.
      SWPointer tmp(this);
      if (tmp.scaled_iv_plus_offset(n->in(1))) {
        if (tmp._invar == NULL) {
          int mult = 1 << n->in(2)->get_int();
          _scale   = tmp._scale  * mult;
          _offset += tmp._offset * mult;
          return true;
        }
      }
    }
  }
  return false;
}

//----------------------------offset_plus_k------------------------
// Match: offset is (k [+/- invariant])
// where k maybe zero and invariant is optional, but not both.
bool SWPointer::offset_plus_k(Node* n, bool negate) {
  int opc = n->Opcode();
  if (opc == Op_ConI) {
    _offset += negate ? -(n->get_int()) : n->get_int();
    return true;
  } else if (opc == Op_ConL) {
    // Okay if value fits into an int
    const TypeLong* t = n->find_long_type();
    if (t->higher_equal(TypeLong::INT)) {
      jlong loff = n->get_long();
      jint  off  = (jint)loff;
      _offset += negate ? -off : loff;
      return true;
    }
    return false;
  }
  if (_invar != NULL) return false; // already have an invariant
  if (opc == Op_AddI) {
    if (n->in(2)->is_Con() && invariant(n->in(1))) {
      _negate_invar = negate;
      _invar = n->in(1);
      _offset += negate ? -(n->in(2)->get_int()) : n->in(2)->get_int();
      return true;
    } else if (n->in(1)->is_Con() && invariant(n->in(2))) {
      _offset += negate ? -(n->in(1)->get_int()) : n->in(1)->get_int();
      _negate_invar = negate;
      _invar = n->in(2);
      return true;
    }
  }
  if (opc == Op_SubI) {
    if (n->in(2)->is_Con() && invariant(n->in(1))) {
      _negate_invar = negate;
      _invar = n->in(1);
      _offset += !negate ? -(n->in(2)->get_int()) : n->in(2)->get_int();
      return true;
    } else if (n->in(1)->is_Con() && invariant(n->in(2))) {
      _offset += negate ? -(n->in(1)->get_int()) : n->in(1)->get_int();
      _negate_invar = !negate;
      _invar = n->in(2);
      return true;
    }
  }
  if (invariant(n)) {
    _negate_invar = negate;
    _invar = n;
    return true;
  }
  return false;
}

//----------------------------print------------------------
void SWPointer::print() {
#ifndef PRODUCT
  tty->print("base: %d  adr: %d  scale: %d  offset: %d  invar: %c%d\n",
             _base != NULL ? _base->_idx : 0,
             _adr  != NULL ? _adr->_idx  : 0,
             _scale, _offset,
             _negate_invar?'-':'+',
             _invar != NULL ? _invar->_idx : 0);
#endif
}

// ========================= OrderedPair =====================

const OrderedPair OrderedPair::initial;

// ========================= SWNodeInfo =====================

const SWNodeInfo SWNodeInfo::initial;


// ============================ DepGraph ===========================

//------------------------------make_node---------------------------
// Make a new dependence graph node for an ideal node.
DepMem* DepGraph::make_node(Node* node) {
  DepMem* m = new (_arena) DepMem(node);
  if (node != NULL) {
    assert(_map.at_grow(node->_idx) == NULL, "one init only");
    _map.at_put_grow(node->_idx, m);
  }
  return m;
}

//------------------------------make_edge---------------------------
// Make a new dependence graph edge from dpred -> dsucc
DepEdge* DepGraph::make_edge(DepMem* dpred, DepMem* dsucc) {
  DepEdge* e = new (_arena) DepEdge(dpred, dsucc, dsucc->in_head(), dpred->out_head());
  dpred->set_out_head(e);
  dsucc->set_in_head(e);
  return e;
}

// ========================== DepMem ========================

//------------------------------in_cnt---------------------------
int DepMem::in_cnt() {
  int ct = 0;
  for (DepEdge* e = _in_head; e != NULL; e = e->next_in()) ct++;
  return ct;
}

//------------------------------out_cnt---------------------------
int DepMem::out_cnt() {
  int ct = 0;
  for (DepEdge* e = _out_head; e != NULL; e = e->next_out()) ct++;
  return ct;
}

//------------------------------print-----------------------------
void DepMem::print() {
#ifndef PRODUCT
  tty->print("  DepNode %d (", _node->_idx);
  for (DepEdge* p = _in_head; p != NULL; p = p->next_in()) {
    Node* pred = p->pred()->node();
    tty->print(" %d", pred != NULL ? pred->_idx : 0);
  }
  tty->print(") [");
  for (DepEdge* s = _out_head; s != NULL; s = s->next_out()) {
    Node* succ = s->succ()->node();
    tty->print(" %d", succ != NULL ? succ->_idx : 0);
  }
  tty->print_cr(" ]");
#endif
}

// =========================== DepEdge =========================

//------------------------------DepPreds---------------------------
void DepEdge::print() {
#ifndef PRODUCT
  tty->print_cr("DepEdge: %d [ %d ]", _pred->node()->_idx, _succ->node()->_idx);
#endif
}

// =========================== DepPreds =========================
// Iterator over predecessor edges in the dependence graph.

//------------------------------DepPreds---------------------------
DepPreds::DepPreds(Node* n, DepGraph& dg) {
  _n = n;
  _done = false;
  if (_n->is_Store() || _n->is_Load()) {
    _next_idx = MemNode::Address;
    _end_idx  = n->req();
    _dep_next = dg.dep(_n)->in_head();
  } else if (_n->is_Mem()) {
    _next_idx = 0;
    _end_idx  = 0;
    _dep_next = dg.dep(_n)->in_head();
  } else {
    _next_idx = 1;
    _end_idx  = _n->req();
    _dep_next = NULL;
  }
  next();
}

//------------------------------next---------------------------
void DepPreds::next() {
  if (_dep_next != NULL) {
    _current  = _dep_next->pred()->node();
    _dep_next = _dep_next->next_in();
  } else if (_next_idx < _end_idx) {
    _current  = _n->in(_next_idx++);
  } else {
    _done = true;
  }
}

// =========================== DepSuccs =========================
// Iterator over successor edges in the dependence graph.

//------------------------------DepSuccs---------------------------
DepSuccs::DepSuccs(Node* n, DepGraph& dg) {
  _n = n;
  _done = false;
  if (_n->is_Load()) {
    _next_idx = 0;
    _end_idx  = _n->outcnt();
    _dep_next = dg.dep(_n)->out_head();
  } else if (_n->is_Mem() || _n->is_Phi() && _n->bottom_type() == Type::MEMORY) {
    _next_idx = 0;
    _end_idx  = 0;
    _dep_next = dg.dep(_n)->out_head();
  } else {
    _next_idx = 0;
    _end_idx  = _n->outcnt();
    _dep_next = NULL;
  }
  next();
}

//-------------------------------next---------------------------
void DepSuccs::next() {
  if (_dep_next != NULL) {
    _current  = _dep_next->succ()->node();
    _dep_next = _dep_next->next_out();
  } else if (_next_idx < _end_idx) {
    _current  = _n->raw_out(_next_idx++);
  } else {
    _done = true;
  }
}
C:\hotspot-69087d08d473\src\share\vm/opto/superword.hpp
/*
 * Copyright (c) 2007, 2013, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 */

#ifndef SHARE_VM_OPTO_SUPERWORD_HPP
#define SHARE_VM_OPTO_SUPERWORD_HPP

#include "opto/connode.hpp"
#include "opto/loopnode.hpp"
#include "opto/node.hpp"
#include "opto/phaseX.hpp"
#include "opto/vectornode.hpp"
#include "utilities/growableArray.hpp"

//
//                  S U P E R W O R D   T R A N S F O R M
//
// SuperWords are short, fixed length vectors.
//
// Algorithm from:
//
// Exploiting SuperWord Level Parallelism with
//   Multimedia Instruction Sets
// by
//   Samuel Larsen and Saman Amarasinghe
//   MIT Laboratory for Computer Science
// date
//   May 2000
// published in
//   ACM SIGPLAN Notices
//   Proceedings of ACM PLDI '00,  Volume 35 Issue 5
//
// Definition 3.1 A Pack is an n-tuple, <s1, ...,sn>, where
// s1,...,sn are independent isomorphic statements in a basic
// block.
//
// Definition 3.2 A PackSet is a set of Packs.
//
// Definition 3.3 A Pair is a Pack of size two, where the
// first statement is considered the left element, and the
// second statement is considered the right element.

class SWPointer;
class OrderedPair;

// ========================= Dependence Graph =====================

class DepMem;

//------------------------------DepEdge---------------------------
// An edge in the dependence graph.  The edges incident to a dependence
// node are threaded through _next_in for incoming edges and _next_out
// for outgoing edges.
class DepEdge : public ResourceObj {
 protected:
  DepMem* _pred;
  DepMem* _succ;
  DepEdge* _next_in;   // list of in edges, null terminated
  DepEdge* _next_out;  // list of out edges, null terminated

 public:
  DepEdge(DepMem* pred, DepMem* succ, DepEdge* next_in, DepEdge* next_out) :
    _pred(pred), _succ(succ), _next_in(next_in), _next_out(next_out) {}

  DepEdge* next_in()  { return _next_in; }
  DepEdge* next_out() { return _next_out; }
  DepMem*  pred()     { return _pred; }
  DepMem*  succ()     { return _succ; }

  void print();
};

//------------------------------DepMem---------------------------
// A node in the dependence graph.  _in_head starts the threaded list of
// incoming edges, and _out_head starts the list of outgoing edges.
class DepMem : public ResourceObj {
 protected:
  Node*    _node;     // Corresponding ideal node
  DepEdge* _in_head;  // Head of list of in edges, null terminated
  DepEdge* _out_head; // Head of list of out edges, null terminated

 public:
  DepMem(Node* node) : _node(node), _in_head(NULL), _out_head(NULL) {}

  Node*    node()                { return _node;     }
  DepEdge* in_head()             { return _in_head;  }
  DepEdge* out_head()            { return _out_head; }
  void set_in_head(DepEdge* hd)  { _in_head = hd;    }
  void set_out_head(DepEdge* hd) { _out_head = hd;   }

  int in_cnt();  // Incoming edge count
  int out_cnt(); // Outgoing edge count

  void print();
};

//------------------------------DepGraph---------------------------
class DepGraph VALUE_OBJ_CLASS_SPEC {
 protected:
  Arena* _arena;
  GrowableArray<DepMem*> _map;
  DepMem* _root;
  DepMem* _tail;

 public:
  DepGraph(Arena* a) : _arena(a), _map(a, 8,  0, NULL) {
    _root = new (_arena) DepMem(NULL);
    _tail = new (_arena) DepMem(NULL);
  }

  DepMem* root() { return _root; }
  DepMem* tail() { return _tail; }

  // Return dependence node corresponding to an ideal node
  DepMem* dep(Node* node) { return _map.at(node->_idx); }

  // Make a new dependence graph node for an ideal node.
  DepMem* make_node(Node* node);

  // Make a new dependence graph edge dprec->dsucc
  DepEdge* make_edge(DepMem* dpred, DepMem* dsucc);

  DepEdge* make_edge(Node* pred,   Node* succ)   { return make_edge(dep(pred), dep(succ)); }
  DepEdge* make_edge(DepMem* pred, Node* succ)   { return make_edge(pred,      dep(succ)); }
  DepEdge* make_edge(Node* pred,   DepMem* succ) { return make_edge(dep(pred), succ);      }

  void init() { _map.clear(); } // initialize

  void print(Node* n)   { dep(n)->print(); }
  void print(DepMem* d) { d->print(); }
};

//------------------------------DepPreds---------------------------
// Iterator over predecessors in the dependence graph and
// non-memory-graph inputs of ideal nodes.
class DepPreds : public StackObj {
private:
  Node*    _n;
  int      _next_idx, _end_idx;
  DepEdge* _dep_next;
  Node*    _current;
  bool     _done;

public:
  DepPreds(Node* n, DepGraph& dg);
  Node* current() { return _current; }
  bool  done()    { return _done; }
  void  next();
};

//------------------------------DepSuccs---------------------------
// Iterator over successors in the dependence graph and
// non-memory-graph outputs of ideal nodes.
class DepSuccs : public StackObj {
private:
  Node*    _n;
  int      _next_idx, _end_idx;
  DepEdge* _dep_next;
  Node*    _current;
  bool     _done;

public:
  DepSuccs(Node* n, DepGraph& dg);
  Node* current() { return _current; }
  bool  done()    { return _done; }
  void  next();
};


// ========================= SuperWord =====================

// -----------------------------SWNodeInfo---------------------------------
// Per node info needed by SuperWord
class SWNodeInfo VALUE_OBJ_CLASS_SPEC {
 public:
  int         _alignment; // memory alignment for a node
  int         _depth;     // Max expression (DAG) depth from block start
  const Type* _velt_type; // vector element type
  Node_List*  _my_pack;   // pack containing this node

  SWNodeInfo() : _alignment(-1), _depth(0), _velt_type(NULL), _my_pack(NULL) {}
  static const SWNodeInfo initial;
};

// JVMCI: OrderedPair is moved up to deal with compilation issues on Windows
//------------------------------OrderedPair---------------------------
// Ordered pair of Node*.
class OrderedPair VALUE_OBJ_CLASS_SPEC {
 protected:
  Node* _p1;
  Node* _p2;
 public:
  OrderedPair() : _p1(NULL), _p2(NULL) {}
  OrderedPair(Node* p1, Node* p2) {
    if (p1->_idx < p2->_idx) {
      _p1 = p1; _p2 = p2;
    } else {
      _p1 = p2; _p2 = p1;
    }
  }

  bool operator==(const OrderedPair &rhs) {
    return _p1 == rhs._p1 && _p2 == rhs._p2;
  }
  void print() { tty->print("  (%d, %d)", _p1->_idx, _p2->_idx); }

  static const OrderedPair initial;
};

// -----------------------------SuperWord---------------------------------
// Transforms scalar operations into packed (superword) operations.
class SuperWord : public ResourceObj {
 private:
  PhaseIdealLoop* _phase;
  Arena*          _arena;
  PhaseIterGVN   &_igvn;

  enum consts { top_align = -1, bottom_align = -666 };

  GrowableArray<Node_List*> _packset;    // Packs for the current block

  GrowableArray<int> _bb_idx;            // Map from Node _idx to index within block

  GrowableArray<Node*> _block;           // Nodes in current block
  GrowableArray<Node*> _data_entry;      // Nodes with all inputs from outside
  GrowableArray<Node*> _mem_slice_head;  // Memory slice head nodes
  GrowableArray<Node*> _mem_slice_tail;  // Memory slice tail nodes

  GrowableArray<SWNodeInfo> _node_info;  // Info needed per node

  MemNode* _align_to_ref;                // Memory reference that pre-loop will align to

  GrowableArray<OrderedPair> _disjoint_ptrs; // runtime disambiguated pointer pairs

  DepGraph _dg; // Dependence graph

  // Scratch pads
  VectorSet    _visited;       // Visited set
  VectorSet    _post_visited;  // Post-visited set
  Node_Stack   _n_idx_list;    // List of (node,index) pairs
  GrowableArray<Node*> _nlist; // List of nodes
  GrowableArray<Node*> _stk;   // Stack of nodes

 public:
  SuperWord(PhaseIdealLoop* phase);

  void transform_loop(IdealLoopTree* lpt);

  // Accessors for SWPointer
  PhaseIdealLoop* phase()          { return _phase; }
  IdealLoopTree* lpt()             { return _lpt; }
  PhiNode* iv()                    { return _iv; }

 private:
  IdealLoopTree* _lpt;             // Current loop tree node
  LoopNode*      _lp;              // Current LoopNode
  Node*          _bb;              // Current basic block
  PhiNode*       _iv;              // Induction var

  // Accessors
  Arena* arena()                   { return _arena; }

  Node* bb()                       { return _bb; }
  void  set_bb(Node* bb)           { _bb = bb; }

  void set_lpt(IdealLoopTree* lpt) { _lpt = lpt; }

  LoopNode* lp()                   { return _lp; }
  void      set_lp(LoopNode* lp)   { _lp = lp;
                                     _iv = lp->as_CountedLoop()->phi()->as_Phi(); }
  int      iv_stride()             { return lp()->as_CountedLoop()->stride_con(); }

  int vector_width(Node* n) {
    BasicType bt = velt_basic_type(n);
    return MIN2(ABS(iv_stride()), Matcher::max_vector_size(bt));
  }
  int vector_width_in_bytes(Node* n) {
    BasicType bt = velt_basic_type(n);
    return vector_width(n)*type2aelembytes(bt);
  }
  MemNode* align_to_ref()            { return _align_to_ref; }
  void  set_align_to_ref(MemNode* m) { _align_to_ref = m; }

  Node* ctrl(Node* n) const { return _phase->has_ctrl(n) ? _phase->get_ctrl(n) : n; }

  // block accessors
  bool in_bb(Node* n)      { return n != NULL && n->outcnt() > 0 && ctrl(n) == _bb; }
  int  bb_idx(Node* n)     { assert(in_bb(n), "must be"); return _bb_idx.at(n->_idx); }
  void set_bb_idx(Node* n, int i) { _bb_idx.at_put_grow(n->_idx, i); }

  // visited set accessors
  void visited_clear()           { _visited.Clear(); }
  void visited_set(Node* n)      { return _visited.set(bb_idx(n)); }
  int visited_test(Node* n)      { return _visited.test(bb_idx(n)); }
  int visited_test_set(Node* n)  { return _visited.test_set(bb_idx(n)); }
  void post_visited_clear()      { _post_visited.Clear(); }
  void post_visited_set(Node* n) { return _post_visited.set(bb_idx(n)); }
  int post_visited_test(Node* n) { return _post_visited.test(bb_idx(n)); }

  // Ensure node_info contains element "i"
  void grow_node_info(int i) { if (i >= _node_info.length()) _node_info.at_put_grow(i, SWNodeInfo::initial); }

  // memory alignment for a node
  int alignment(Node* n)                     { return _node_info.adr_at(bb_idx(n))->_alignment; }
  void set_alignment(Node* n, int a)         { int i = bb_idx(n); grow_node_info(i); _node_info.adr_at(i)->_alignment = a; }

  // Max expression (DAG) depth from beginning of the block for each node
  int depth(Node* n)                         { return _node_info.adr_at(bb_idx(n))->_depth; }
  void set_depth(Node* n, int d)             { int i = bb_idx(n); grow_node_info(i); _node_info.adr_at(i)->_depth = d; }

  // vector element type
  const Type* velt_type(Node* n)             { return _node_info.adr_at(bb_idx(n))->_velt_type; }
  BasicType velt_basic_type(Node* n)         { return velt_type(n)->array_element_basic_type(); }
  void set_velt_type(Node* n, const Type* t) { int i = bb_idx(n); grow_node_info(i); _node_info.adr_at(i)->_velt_type = t; }
  bool same_velt_type(Node* n1, Node* n2);

  // my_pack
  Node_List* my_pack(Node* n)                { return !in_bb(n) ? NULL : _node_info.adr_at(bb_idx(n))->_my_pack; }
  void set_my_pack(Node* n, Node_List* p)    { int i = bb_idx(n); grow_node_info(i); _node_info.adr_at(i)->_my_pack = p; }

  // methods