ssssssss70


//---------------------------match---------------------------------------------
void Matcher::match( ) {
  if( MaxLabelRootDepth < 100 ) { // Too small?
    assert(false, "invalid MaxLabelRootDepth, increase it to 100 minimum");
    MaxLabelRootDepth = 100;
  }
  // One-time initialization of some register masks.
  init_spill_mask( C->root()->in(1) );
  _return_addr_mask = return_addr();
#ifdef _LP64
  // Pointers take 2 slots in 64-bit land
  _return_addr_mask.Insert(OptoReg::add(return_addr(),1));
#endif

  // Map a Java-signature return type into return register-value
  // machine registers for 0, 1 and 2 returned values.
  const TypeTuple *range = C->tf()->range();
  if( range->cnt() > TypeFunc::Parms ) { // If not a void function
    // Get ideal-register return type
    uint ireg = range->field_at(TypeFunc::Parms)->ideal_reg();
    // Get machine return register
    uint sop = C->start()->Opcode();
    OptoRegPair regs = return_value(ireg, false);

    // And mask for same
    _return_value_mask = RegMask(regs.first());
    if( OptoReg::is_valid(regs.second()) )
      _return_value_mask.Insert(regs.second());
  }

  // ---------------
  // Frame Layout

  // Need the method signature to determine the incoming argument types,
  // because the types determine which registers the incoming arguments are
  // in, and this affects the matched code.
  const TypeTuple *domain = C->tf()->domain();
  uint             argcnt = domain->cnt() - TypeFunc::Parms;
  BasicType *sig_bt        = NEW_RESOURCE_ARRAY( BasicType, argcnt );
  VMRegPair *vm_parm_regs  = NEW_RESOURCE_ARRAY( VMRegPair, argcnt );
  _parm_regs               = NEW_RESOURCE_ARRAY( OptoRegPair, argcnt );
  _calling_convention_mask = NEW_RESOURCE_ARRAY( RegMask, argcnt );
  uint i;
  for( i = 0; i<argcnt; i++ ) {
    sig_bt[i] = domain->field_at(i+TypeFunc::Parms)->basic_type();
  }

  // Pass array of ideal registers and length to USER code (from the AD file)
  // that will convert this to an array of register numbers.
  const StartNode *start = C->start();
  start->calling_convention( sig_bt, vm_parm_regs, argcnt );
#ifdef ASSERT
  // Sanity check users' calling convention.  Real handy while trying to
  // get the initial port correct.
  { for (uint i = 0; i<argcnt; i++) {
      if( !vm_parm_regs[i].first()->is_valid() && !vm_parm_regs[i].second()->is_valid() ) {
        assert(domain->field_at(i+TypeFunc::Parms)==Type::HALF, "only allowed on halve" );
        _parm_regs[i].set_bad();
        continue;
      }
      VMReg parm_reg = vm_parm_regs[i].first();
      assert(parm_reg->is_valid(), "invalid arg?");
      if (parm_reg->is_reg()) {
        OptoReg::Name opto_parm_reg = OptoReg::as_OptoReg(parm_reg);
        assert(can_be_java_arg(opto_parm_reg) ||
               C->stub_function() == CAST_FROM_FN_PTR(address, OptoRuntime::rethrow_C) ||
               opto_parm_reg == inline_cache_reg(),
               "parameters in register must be preserved by runtime stubs");
      }
      for (uint j = 0; j < i; j++) {
        assert(parm_reg != vm_parm_regs[j].first(),
               "calling conv. must produce distinct regs");
      }
    }
  }
#endif

  // Do some initial frame layout.

  // Compute the old incoming SP (may be called FP) as
  //   OptoReg::stack0() + locks + in_preserve_stack_slots + pad2.
  _old_SP = C->compute_old_SP();
  assert( is_even(_old_SP), "must be even" );

  // Compute highest incoming stack argument as
  //   _old_SP + out_preserve_stack_slots + incoming argument size.
  _in_arg_limit = OptoReg::add(_old_SP, C->out_preserve_stack_slots());
  assert( is_even(_in_arg_limit), "out_preserve must be even" );
  for( i = 0; i < argcnt; i++ ) {
    // Permit args to have no register
    _calling_convention_mask[i].Clear();
    if( !vm_parm_regs[i].first()->is_valid() && !vm_parm_regs[i].second()->is_valid() ) {
      continue;
    }
    // calling_convention returns stack arguments as a count of
    // slots beyond OptoReg::stack0()/VMRegImpl::stack0.  We need to convert this to
    // the allocators point of view, taking into account all the
    // preserve area, locks & pad2.

    OptoReg::Name reg1 = warp_incoming_stk_arg(vm_parm_regs[i].first());
    if( OptoReg::is_valid(reg1))
      _calling_convention_mask[i].Insert(reg1);

    OptoReg::Name reg2 = warp_incoming_stk_arg(vm_parm_regs[i].second());
    if( OptoReg::is_valid(reg2))
      _calling_convention_mask[i].Insert(reg2);

    // Saved biased stack-slot register number
    _parm_regs[i].set_pair(reg2, reg1);
  }

  // Finally, make sure the incoming arguments take up an even number of
  // words, in case the arguments or locals need to contain doubleword stack
  // slots.  The rest of the system assumes that stack slot pairs (in
  // particular, in the spill area) which look aligned will in fact be
  // aligned relative to the stack pointer in the target machine.  Double
  // stack slots will always be allocated aligned.
  _new_SP = OptoReg::Name(round_to(_in_arg_limit, RegMask::SlotsPerLong));

  // Compute highest outgoing stack argument as
  //   _new_SP + out_preserve_stack_slots + max(outgoing argument size).
  _out_arg_limit = OptoReg::add(_new_SP, C->out_preserve_stack_slots());
  assert( is_even(_out_arg_limit), "out_preserve must be even" );

  if (!RegMask::can_represent_arg(OptoReg::add(_out_arg_limit,-1))) {
    // the compiler cannot represent this method's calling sequence
    C->record_method_not_compilable("must be able to represent all call arguments in reg mask");
  }

  if (C->failing())  return;  // bailed out on incoming arg failure

  // ---------------
  // Collect roots of matcher trees.  Every node for which
  // _shared[_idx] is cleared is guaranteed to not be shared, and thus
  // can be a valid interior of some tree.
  find_shared( C->root() );
  find_shared( C->top() );

  C->print_method(PHASE_BEFORE_MATCHING);

  // Create new ideal node ConP #NULL even if it does exist in old space
  // to avoid false sharing if the corresponding mach node is not used.
  // The corresponding mach node is only used in rare cases for derived
  // pointers.
  Node* new_ideal_null = ConNode::make(C, TypePtr::NULL_PTR);

  // Swap out to old-space; emptying new-space
  Arena *old = C->node_arena()->move_contents(C->old_arena());

  // Save debug and profile information for nodes in old space:
  _old_node_note_array = C->node_note_array();
  if (_old_node_note_array != NULL) {
    C->set_node_note_array(new(C->comp_arena()) GrowableArray<Node_Notes*>
                           (C->comp_arena(), _old_node_note_array->length(),
                            0, NULL));
  }

  // Pre-size the new_node table to avoid the need for range checks.
  grow_new_node_array(C->unique());

  // Reset node counter so MachNodes start with _idx at 0
  int live_nodes = C->live_nodes();
  C->set_unique(0);
  C->reset_dead_node_list();

  // Recursively match trees from old space into new space.
  // Correct leaves of new-space Nodes; they point to old-space.
  _visited.Clear();             // Clear visit bits for xform call
  C->set_cached_top_node(xform( C->top(), live_nodes));
  if (!C->failing()) {
    Node* xroot =        xform( C->root(), 1 );
    if (xroot == NULL) {
      Matcher::soft_match_failure();  // recursive matching process failed
      C->record_method_not_compilable("instruction match failed");
    } else {
      // During matching shared constants were attached to C->root()
      // because xroot wasn't available yet, so transfer the uses to
      // the xroot.
      for( DUIterator_Fast jmax, j = C->root()->fast_outs(jmax); j < jmax; j++ ) {
        Node* n = C->root()->fast_out(j);
        if (C->node_arena()->contains(n)) {
          assert(n->in(0) == C->root(), "should be control user");
          n->set_req(0, xroot);
          --j;
          --jmax;
        }
      }

      // Generate new mach node for ConP #NULL
      assert(new_ideal_null != NULL, "sanity");
      _mach_null = match_tree(new_ideal_null);
      // Don't set control, it will confuse GCM since there are no uses.
      // The control will be set when this node is used first time
      // in find_base_for_derived().
      assert(_mach_null != NULL, "");

      C->set_root(xroot->is_Root() ? xroot->as_Root() : NULL);

#ifdef ASSERT
      verify_new_nodes_only(xroot);
#endif
    }
  }
  if (C->top() == NULL || C->root() == NULL) {
    C->record_method_not_compilable("graph lost"); // %%% cannot happen?
  }
  if (C->failing()) {
    // delete old;
    old->destruct_contents();
    return;
  }
  assert( C->top(), "" );
  assert( C->root(), "" );
  validate_null_checks();

  // Now smoke old-space
  NOT_DEBUG( old->destruct_contents() );

  // ------------------------
  // Set up save-on-entry registers
  Fixup_Save_On_Entry( );
}


//------------------------------Fixup_Save_On_Entry----------------------------
// The stated purpose of this routine is to take care of save-on-entry
// registers.  However, the overall goal of the Match phase is to convert into
// machine-specific instructions which have RegMasks to guide allocation.
// So what this procedure really does is put a valid RegMask on each input
// to the machine-specific variations of all Return, TailCall and Halt
// instructions.  It also adds edgs to define the save-on-entry values (and of
// course gives them a mask).

static RegMask *init_input_masks( uint size, RegMask &ret_adr, RegMask &fp ) {
  RegMask *rms = NEW_RESOURCE_ARRAY( RegMask, size );
  // Do all the pre-defined register masks
  rms[TypeFunc::Control  ] = RegMask::Empty;
  rms[TypeFunc::I_O      ] = RegMask::Empty;
  rms[TypeFunc::Memory   ] = RegMask::Empty;
  rms[TypeFunc::ReturnAdr] = ret_adr;
  rms[TypeFunc::FramePtr ] = fp;
  return rms;
}

//---------------------------init_first_stack_mask-----------------------------
// Create the initial stack mask used by values spilling to the stack.
// Disallow any debug info in outgoing argument areas by setting the
// initial mask accordingly.
void Matcher::init_first_stack_mask() {

  // Allocate storage for spill masks as masks for the appropriate load type.
  RegMask *rms = (RegMask*)C->comp_arena()->Amalloc_D(sizeof(RegMask) * (3*6+4));

  idealreg2spillmask  [Op_RegN] = &rms[0];
  idealreg2spillmask  [Op_RegI] = &rms[1];
  idealreg2spillmask  [Op_RegL] = &rms[2];
  idealreg2spillmask  [Op_RegF] = &rms[3];
  idealreg2spillmask  [Op_RegD] = &rms[4];
  idealreg2spillmask  [Op_RegP] = &rms[5];

  idealreg2debugmask  [Op_RegN] = &rms[6];
  idealreg2debugmask  [Op_RegI] = &rms[7];
  idealreg2debugmask  [Op_RegL] = &rms[8];
  idealreg2debugmask  [Op_RegF] = &rms[9];
  idealreg2debugmask  [Op_RegD] = &rms[10];
  idealreg2debugmask  [Op_RegP] = &rms[11];

  idealreg2mhdebugmask[Op_RegN] = &rms[12];
  idealreg2mhdebugmask[Op_RegI] = &rms[13];
  idealreg2mhdebugmask[Op_RegL] = &rms[14];
  idealreg2mhdebugmask[Op_RegF] = &rms[15];
  idealreg2mhdebugmask[Op_RegD] = &rms[16];
  idealreg2mhdebugmask[Op_RegP] = &rms[17];

  idealreg2spillmask  [Op_VecS] = &rms[18];
  idealreg2spillmask  [Op_VecD] = &rms[19];
  idealreg2spillmask  [Op_VecX] = &rms[20];
  idealreg2spillmask  [Op_VecY] = &rms[21];

  OptoReg::Name i;

  // At first, start with the empty mask
  C->FIRST_STACK_mask().Clear();

  // Add in the incoming argument area
  OptoReg::Name init_in = OptoReg::add(_old_SP, C->out_preserve_stack_slots());
  for (i = init_in; i < _in_arg_limit; i = OptoReg::add(i,1)) {
    C->FIRST_STACK_mask().Insert(i);
  }
  // Add in all bits past the outgoing argument area
  guarantee(RegMask::can_represent_arg(OptoReg::add(_out_arg_limit,-1)),
            "must be able to represent all call arguments in reg mask");
  OptoReg::Name init = _out_arg_limit;
  for (i = init; RegMask::can_represent(i); i = OptoReg::add(i,1)) {
    C->FIRST_STACK_mask().Insert(i);
  }
  // Finally, set the "infinite stack" bit.
  C->FIRST_STACK_mask().set_AllStack();

  // Make spill masks.  Registers for their class, plus FIRST_STACK_mask.
  RegMask aligned_stack_mask = C->FIRST_STACK_mask();
  // Keep spill masks aligned.
  aligned_stack_mask.clear_to_pairs();
  assert(aligned_stack_mask.is_AllStack(), "should be infinite stack");

  *idealreg2spillmask[Op_RegP] = *idealreg2regmask[Op_RegP];
#ifdef _LP64
  *idealreg2spillmask[Op_RegN] = *idealreg2regmask[Op_RegN];
   idealreg2spillmask[Op_RegN]->OR(C->FIRST_STACK_mask());
   idealreg2spillmask[Op_RegP]->OR(aligned_stack_mask);
#else
   idealreg2spillmask[Op_RegP]->OR(C->FIRST_STACK_mask());
#endif
  *idealreg2spillmask[Op_RegI] = *idealreg2regmask[Op_RegI];
   idealreg2spillmask[Op_RegI]->OR(C->FIRST_STACK_mask());
  *idealreg2spillmask[Op_RegL] = *idealreg2regmask[Op_RegL];
   idealreg2spillmask[Op_RegL]->OR(aligned_stack_mask);
  *idealreg2spillmask[Op_RegF] = *idealreg2regmask[Op_RegF];
   idealreg2spillmask[Op_RegF]->OR(C->FIRST_STACK_mask());
  *idealreg2spillmask[Op_RegD] = *idealreg2regmask[Op_RegD];
   idealreg2spillmask[Op_RegD]->OR(aligned_stack_mask);

  if (Matcher::vector_size_supported(T_BYTE,4)) {
    *idealreg2spillmask[Op_VecS] = *idealreg2regmask[Op_VecS];
     idealreg2spillmask[Op_VecS]->OR(C->FIRST_STACK_mask());
  }
  if (Matcher::vector_size_supported(T_FLOAT,2)) {
    // For VecD we need dual alignment and 8 bytes (2 slots) for spills.
    // RA guarantees such alignment since it is needed for Double and Long values.
    *idealreg2spillmask[Op_VecD] = *idealreg2regmask[Op_VecD];
     idealreg2spillmask[Op_VecD]->OR(aligned_stack_mask);
  }
  if (Matcher::vector_size_supported(T_FLOAT,4)) {
    // For VecX we need quadro alignment and 16 bytes (4 slots) for spills.
    //
    // RA can use input arguments stack slots for spills but until RA
    // we don't know frame size and offset of input arg stack slots.
    //
    // Exclude last input arg stack slots to avoid spilling vectors there
    // otherwise vector spills could stomp over stack slots in caller frame.
    OptoReg::Name in = OptoReg::add(_in_arg_limit, -1);
    for (int k = 1; (in >= init_in) && (k < RegMask::SlotsPerVecX); k++) {
      aligned_stack_mask.Remove(in);
      in = OptoReg::add(in, -1);
    }
     aligned_stack_mask.clear_to_sets(RegMask::SlotsPerVecX);
     assert(aligned_stack_mask.is_AllStack(), "should be infinite stack");
    *idealreg2spillmask[Op_VecX] = *idealreg2regmask[Op_VecX];
     idealreg2spillmask[Op_VecX]->OR(aligned_stack_mask);
  }
  if (Matcher::vector_size_supported(T_FLOAT,8)) {
    // For VecY we need octo alignment and 32 bytes (8 slots) for spills.
    OptoReg::Name in = OptoReg::add(_in_arg_limit, -1);
    for (int k = 1; (in >= init_in) && (k < RegMask::SlotsPerVecY); k++) {
      aligned_stack_mask.Remove(in);
      in = OptoReg::add(in, -1);
    }
     aligned_stack_mask.clear_to_sets(RegMask::SlotsPerVecY);
     assert(aligned_stack_mask.is_AllStack(), "should be infinite stack");
    *idealreg2spillmask[Op_VecY] = *idealreg2regmask[Op_VecY];
     idealreg2spillmask[Op_VecY]->OR(aligned_stack_mask);
  }
   if (UseFPUForSpilling) {
     // This mask logic assumes that the spill operations are
     // symmetric and that the registers involved are the same size.
     // On sparc for instance we may have to use 64 bit moves will
     // kill 2 registers when used with F0-F31.
     idealreg2spillmask[Op_RegI]->OR(*idealreg2regmask[Op_RegF]);
     idealreg2spillmask[Op_RegF]->OR(*idealreg2regmask[Op_RegI]);
#ifdef _LP64
     idealreg2spillmask[Op_RegN]->OR(*idealreg2regmask[Op_RegF]);
     idealreg2spillmask[Op_RegL]->OR(*idealreg2regmask[Op_RegD]);
     idealreg2spillmask[Op_RegD]->OR(*idealreg2regmask[Op_RegL]);
     idealreg2spillmask[Op_RegP]->OR(*idealreg2regmask[Op_RegD]);
#else
     idealreg2spillmask[Op_RegP]->OR(*idealreg2regmask[Op_RegF]);
#ifdef ARM
     // ARM has support for moving 64bit values between a pair of
     // integer registers and a double register
     idealreg2spillmask[Op_RegL]->OR(*idealreg2regmask[Op_RegD]);
     idealreg2spillmask[Op_RegD]->OR(*idealreg2regmask[Op_RegL]);
#endif
#endif
   }

  // Make up debug masks.  Any spill slot plus callee-save registers.
  // Caller-save registers are assumed to be trashable by the various
  // inline-cache fixup routines.
  *idealreg2debugmask  [Op_RegN]= *idealreg2spillmask[Op_RegN];
  *idealreg2debugmask  [Op_RegI]= *idealreg2spillmask[Op_RegI];
  *idealreg2debugmask  [Op_RegL]= *idealreg2spillmask[Op_RegL];
  *idealreg2debugmask  [Op_RegF]= *idealreg2spillmask[Op_RegF];
  *idealreg2debugmask  [Op_RegD]= *idealreg2spillmask[Op_RegD];
  *idealreg2debugmask  [Op_RegP]= *idealreg2spillmask[Op_RegP];

  *idealreg2mhdebugmask[Op_RegN]= *idealreg2spillmask[Op_RegN];
  *idealreg2mhdebugmask[Op_RegI]= *idealreg2spillmask[Op_RegI];
  *idealreg2mhdebugmask[Op_RegL]= *idealreg2spillmask[Op_RegL];
  *idealreg2mhdebugmask[Op_RegF]= *idealreg2spillmask[Op_RegF];
  *idealreg2mhdebugmask[Op_RegD]= *idealreg2spillmask[Op_RegD];
  *idealreg2mhdebugmask[Op_RegP]= *idealreg2spillmask[Op_RegP];

  // Prevent stub compilations from attempting to reference
  // callee-saved registers from debug info
  bool exclude_soe = !Compile::current()->is_method_compilation();

  for( i=OptoReg::Name(0); i<OptoReg::Name(_last_Mach_Reg); i = OptoReg::add(i,1) ) {
    // registers the caller has to save do not work
    if( _register_save_policy[i] == 'C' ||
        _register_save_policy[i] == 'A' ||
        (_register_save_policy[i] == 'E' && exclude_soe) ) {
      idealreg2debugmask  [Op_RegN]->Remove(i);
      idealreg2debugmask  [Op_RegI]->Remove(i); // Exclude save-on-call
      idealreg2debugmask  [Op_RegL]->Remove(i); // registers from debug
      idealreg2debugmask  [Op_RegF]->Remove(i); // masks
      idealreg2debugmask  [Op_RegD]->Remove(i);
      idealreg2debugmask  [Op_RegP]->Remove(i);

      idealreg2mhdebugmask[Op_RegN]->Remove(i);
      idealreg2mhdebugmask[Op_RegI]->Remove(i);
      idealreg2mhdebugmask[Op_RegL]->Remove(i);
      idealreg2mhdebugmask[Op_RegF]->Remove(i);
      idealreg2mhdebugmask[Op_RegD]->Remove(i);
      idealreg2mhdebugmask[Op_RegP]->Remove(i);
    }
  }

  // Subtract the register we use to save the SP for MethodHandle
  // invokes to from the debug mask.
  const RegMask save_mask = method_handle_invoke_SP_save_mask();
  idealreg2mhdebugmask[Op_RegN]->SUBTRACT(save_mask);
  idealreg2mhdebugmask[Op_RegI]->SUBTRACT(save_mask);
  idealreg2mhdebugmask[Op_RegL]->SUBTRACT(save_mask);
  idealreg2mhdebugmask[Op_RegF]->SUBTRACT(save_mask);
  idealreg2mhdebugmask[Op_RegD]->SUBTRACT(save_mask);
  idealreg2mhdebugmask[Op_RegP]->SUBTRACT(save_mask);
}

//---------------------------is_save_on_entry----------------------------------
bool Matcher::is_save_on_entry( int reg ) {
  return
    _register_save_policy[reg] == 'E' ||
    _register_save_policy[reg] == 'A' || // Save-on-entry register?
    // Also save argument registers in the trampolining stubs
    (C->save_argument_registers() && is_spillable_arg(reg));
}

//---------------------------Fixup_Save_On_Entry-------------------------------
void Matcher::Fixup_Save_On_Entry( ) {
  init_first_stack_mask();

  Node *root = C->root();       // Short name for root
  // Count number of save-on-entry registers.
  uint soe_cnt = number_of_saved_registers();
  uint i;

  // Find the procedure Start Node
  StartNode *start = C->start();
  assert( start, "Expect a start node" );

  // Save argument registers in the trampolining stubs
  if( C->save_argument_registers() )
    for( i = 0; i < _last_Mach_Reg; i++ )
      if( is_spillable_arg(i) )
        soe_cnt++;

  // Input RegMask array shared by all Returns.
  // The type for doubles and longs has a count of 2, but
  // there is only 1 returned value
  uint ret_edge_cnt = TypeFunc::Parms + ((C->tf()->range()->cnt() == TypeFunc::Parms) ? 0 : 1);
  RegMask *ret_rms  = init_input_masks( ret_edge_cnt + soe_cnt, _return_addr_mask, c_frame_ptr_mask );
  // Returns have 0 or 1 returned values depending on call signature.
  // Return register is specified by return_value in the AD file.
  if (ret_edge_cnt > TypeFunc::Parms)
    ret_rms[TypeFunc::Parms+0] = _return_value_mask;

  // Input RegMask array shared by all Rethrows.
  uint reth_edge_cnt = TypeFunc::Parms+1;
  RegMask *reth_rms  = init_input_masks( reth_edge_cnt + soe_cnt, _return_addr_mask, c_frame_ptr_mask );
  // Rethrow takes exception oop only, but in the argument 0 slot.
  reth_rms[TypeFunc::Parms] = mreg2regmask[find_receiver(false)];
#ifdef _LP64
  // Need two slots for ptrs in 64-bit land
  reth_rms[TypeFunc::Parms].Insert(OptoReg::add(OptoReg::Name(find_receiver(false)),1));
#endif

  // Input RegMask array shared by all TailCalls
  uint tail_call_edge_cnt = TypeFunc::Parms+2;
  RegMask *tail_call_rms = init_input_masks( tail_call_edge_cnt + soe_cnt, _return_addr_mask, c_frame_ptr_mask );

  // Input RegMask array shared by all TailJumps
  uint tail_jump_edge_cnt = TypeFunc::Parms+2;
  RegMask *tail_jump_rms = init_input_masks( tail_jump_edge_cnt + soe_cnt, _return_addr_mask, c_frame_ptr_mask );

  // TailCalls have 2 returned values (target & moop), whose masks come
  // from the usual MachNode/MachOper mechanism.  Find a sample
  // TailCall to extract these masks and put the correct masks into
  // the tail_call_rms array.
  for( i=1; i < root->req(); i++ ) {
    MachReturnNode *m = root->in(i)->as_MachReturn();
    if( m->ideal_Opcode() == Op_TailCall ) {
      tail_call_rms[TypeFunc::Parms+0] = m->MachNode::in_RegMask(TypeFunc::Parms+0);
      tail_call_rms[TypeFunc::Parms+1] = m->MachNode::in_RegMask(TypeFunc::Parms+1);
      break;
    }
  }

  // TailJumps have 2 returned values (target & ex_oop), whose masks come
  // from the usual MachNode/MachOper mechanism.  Find a sample
  // TailJump to extract these masks and put the correct masks into
  // the tail_jump_rms array.
  for( i=1; i < root->req(); i++ ) {
    MachReturnNode *m = root->in(i)->as_MachReturn();
    if( m->ideal_Opcode() == Op_TailJump ) {
      tail_jump_rms[TypeFunc::Parms+0] = m->MachNode::in_RegMask(TypeFunc::Parms+0);
      tail_jump_rms[TypeFunc::Parms+1] = m->MachNode::in_RegMask(TypeFunc::Parms+1);
      break;
    }
  }

  // Input RegMask array shared by all Halts
  uint halt_edge_cnt = TypeFunc::Parms;
  RegMask *halt_rms = init_input_masks( halt_edge_cnt + soe_cnt, _return_addr_mask, c_frame_ptr_mask );

  // Capture the return input masks into each exit flavor
  for( i=1; i < root->req(); i++ ) {
    MachReturnNode *exit = root->in(i)->as_MachReturn();
    switch( exit->ideal_Opcode() ) {
      case Op_Return   : exit->_in_rms = ret_rms;  break;
      case Op_Rethrow  : exit->_in_rms = reth_rms; break;
      case Op_TailCall : exit->_in_rms = tail_call_rms; break;
      case Op_TailJump : exit->_in_rms = tail_jump_rms; break;
      case Op_Halt     : exit->_in_rms = halt_rms; break;
      default          : ShouldNotReachHere();
    }
  }

  // Next unused projection number from Start.
  int proj_cnt = C->tf()->domain()->cnt();

  // Do all the save-on-entry registers.  Make projections from Start for
  // them, and give them a use at the exit points.  To the allocator, they
  // look like incoming register arguments.
  for( i = 0; i < _last_Mach_Reg; i++ ) {
    if( is_save_on_entry(i) ) {

      // Add the save-on-entry to the mask array
      ret_rms      [      ret_edge_cnt] = mreg2regmask[i];
      reth_rms     [     reth_edge_cnt] = mreg2regmask[i];
      tail_call_rms[tail_call_edge_cnt] = mreg2regmask[i];
      tail_jump_rms[tail_jump_edge_cnt] = mreg2regmask[i];
      // Halts need the SOE registers, but only in the stack as debug info.
      // A just-prior uncommon-trap or deoptimization will use the SOE regs.
      halt_rms     [     halt_edge_cnt] = *idealreg2spillmask[_register_save_type[i]];

      Node *mproj;

      // Is this a RegF low half of a RegD?  Double up 2 adjacent RegF's
      // into a single RegD.
      if( (i&1) == 0 &&
          _register_save_type[i  ] == Op_RegF &&
          _register_save_type[i+1] == Op_RegF &&
          is_save_on_entry(i+1) ) {
        // Add other bit for double
        ret_rms      [      ret_edge_cnt].Insert(OptoReg::Name(i+1));
        reth_rms     [     reth_edge_cnt].Insert(OptoReg::Name(i+1));
        tail_call_rms[tail_call_edge_cnt].Insert(OptoReg::Name(i+1));
        tail_jump_rms[tail_jump_edge_cnt].Insert(OptoReg::Name(i+1));
        halt_rms     [     halt_edge_cnt].Insert(OptoReg::Name(i+1));
        mproj = new (C) MachProjNode( start, proj_cnt, ret_rms[ret_edge_cnt], Op_RegD );
        proj_cnt += 2;          // Skip 2 for doubles
      }
      else if( (i&1) == 1 &&    // Else check for high half of double
               _register_save_type[i-1] == Op_RegF &&
               _register_save_type[i  ] == Op_RegF &&
               is_save_on_entry(i-1) ) {
        ret_rms      [      ret_edge_cnt] = RegMask::Empty;
        reth_rms     [     reth_edge_cnt] = RegMask::Empty;
        tail_call_rms[tail_call_edge_cnt] = RegMask::Empty;
        tail_jump_rms[tail_jump_edge_cnt] = RegMask::Empty;
        halt_rms     [     halt_edge_cnt] = RegMask::Empty;
        mproj = C->top();
      }
      // Is this a RegI low half of a RegL?  Double up 2 adjacent RegI's
      // into a single RegL.
      else if( (i&1) == 0 &&
          _register_save_type[i  ] == Op_RegI &&
          _register_save_type[i+1] == Op_RegI &&
        is_save_on_entry(i+1) ) {
        // Add other bit for long
        ret_rms      [      ret_edge_cnt].Insert(OptoReg::Name(i+1));
        reth_rms     [     reth_edge_cnt].Insert(OptoReg::Name(i+1));
        tail_call_rms[tail_call_edge_cnt].Insert(OptoReg::Name(i+1));
        tail_jump_rms[tail_jump_edge_cnt].Insert(OptoReg::Name(i+1));
        halt_rms     [     halt_edge_cnt].Insert(OptoReg::Name(i+1));
        mproj = new (C) MachProjNode( start, proj_cnt, ret_rms[ret_edge_cnt], Op_RegL );
        proj_cnt += 2;          // Skip 2 for longs
      }
      else if( (i&1) == 1 &&    // Else check for high half of long
               _register_save_type[i-1] == Op_RegI &&
               _register_save_type[i  ] == Op_RegI &&
               is_save_on_entry(i-1) ) {
        ret_rms      [      ret_edge_cnt] = RegMask::Empty;
        reth_rms     [     reth_edge_cnt] = RegMask::Empty;
        tail_call_rms[tail_call_edge_cnt] = RegMask::Empty;
        tail_jump_rms[tail_jump_edge_cnt] = RegMask::Empty;
        halt_rms     [     halt_edge_cnt] = RegMask::Empty;
        mproj = C->top();
      } else {
        // Make a projection for it off the Start
        mproj = new (C) MachProjNode( start, proj_cnt++, ret_rms[ret_edge_cnt], _register_save_type[i] );
      }

      ret_edge_cnt ++;
      reth_edge_cnt ++;
      tail_call_edge_cnt ++;
      tail_jump_edge_cnt ++;
      halt_edge_cnt ++;

      // Add a use of the SOE register to all exit paths
      for( uint j=1; j < root->req(); j++ )
        root->in(j)->add_req(mproj);
    } // End of if a save-on-entry register
  } // End of for all machine registers
}

//------------------------------init_spill_mask--------------------------------
void Matcher::init_spill_mask( Node *ret ) {
  if( idealreg2regmask[Op_RegI] ) return; // One time only init

  OptoReg::c_frame_pointer = c_frame_pointer();
  c_frame_ptr_mask = c_frame_pointer();
#ifdef _LP64
  // pointers are twice as big
  c_frame_ptr_mask.Insert(OptoReg::add(c_frame_pointer(),1));
#endif

  // Start at OptoReg::stack0()
  STACK_ONLY_mask.Clear();
  OptoReg::Name init = OptoReg::stack2reg(0);
  // STACK_ONLY_mask is all stack bits
  OptoReg::Name i;
  for (i = init; RegMask::can_represent(i); i = OptoReg::add(i,1))
    STACK_ONLY_mask.Insert(i);
  // Also set the "infinite stack" bit.
  STACK_ONLY_mask.set_AllStack();

  // Copy the register names over into the shared world
  for( i=OptoReg::Name(0); i<OptoReg::Name(_last_Mach_Reg); i = OptoReg::add(i,1) ) {
    // SharedInfo::regName[i] = regName[i];
    // Handy RegMasks per machine register
    mreg2regmask[i].Insert(i);
  }

  // Grab the Frame Pointer
  Node *fp  = ret->in(TypeFunc::FramePtr);
  Node *mem = ret->in(TypeFunc::Memory);
  const TypePtr* atp = TypePtr::BOTTOM;
  // Share frame pointer while making spill ops
  set_shared(fp);

  // Compute generic short-offset Loads
#ifdef _LP64
  MachNode *spillCP = match_tree(new (C) LoadNNode(NULL,mem,fp,atp,TypeInstPtr::BOTTOM,MemNode::unordered));
#endif
  MachNode *spillI  = match_tree(new (C) LoadINode(NULL,mem,fp,atp,TypeInt::INT,MemNode::unordered));
  MachNode *spillL  = match_tree(new (C) LoadLNode(NULL,mem,fp,atp,TypeLong::LONG,MemNode::unordered, LoadNode::DependsOnlyOnTest,false));
  MachNode *spillF  = match_tree(new (C) LoadFNode(NULL,mem,fp,atp,Type::FLOAT,MemNode::unordered));
  MachNode *spillD  = match_tree(new (C) LoadDNode(NULL,mem,fp,atp,Type::DOUBLE,MemNode::unordered));
  MachNode *spillP  = match_tree(new (C) LoadPNode(NULL,mem,fp,atp,TypeInstPtr::BOTTOM,MemNode::unordered));
  assert(spillI != NULL && spillL != NULL && spillF != NULL &&
         spillD != NULL && spillP != NULL, "");
  // Get the ADLC notion of the right regmask, for each basic type.
#ifdef _LP64
  idealreg2regmask[Op_RegN] = &spillCP->out_RegMask();
#endif
  idealreg2regmask[Op_RegI] = &spillI->out_RegMask();
  idealreg2regmask[Op_RegL] = &spillL->out_RegMask();
  idealreg2regmask[Op_RegF] = &spillF->out_RegMask();
  idealreg2regmask[Op_RegD] = &spillD->out_RegMask();
  idealreg2regmask[Op_RegP] = &spillP->out_RegMask();

  // Vector regmasks.
  if (Matcher::vector_size_supported(T_BYTE,4)) {
    TypeVect::VECTS = TypeVect::make(T_BYTE, 4);
    MachNode *spillVectS = match_tree(new (C) LoadVectorNode(NULL,mem,fp,atp,TypeVect::VECTS));
    idealreg2regmask[Op_VecS] = &spillVectS->out_RegMask();
  }
  if (Matcher::vector_size_supported(T_FLOAT,2)) {
    MachNode *spillVectD = match_tree(new (C) LoadVectorNode(NULL,mem,fp,atp,TypeVect::VECTD));
    idealreg2regmask[Op_VecD] = &spillVectD->out_RegMask();
  }
  if (Matcher::vector_size_supported(T_FLOAT,4)) {
    MachNode *spillVectX = match_tree(new (C) LoadVectorNode(NULL,mem,fp,atp,TypeVect::VECTX));
    idealreg2regmask[Op_VecX] = &spillVectX->out_RegMask();
  }
  if (Matcher::vector_size_supported(T_FLOAT,8)) {
    MachNode *spillVectY = match_tree(new (C) LoadVectorNode(NULL,mem,fp,atp,TypeVect::VECTY));
    idealreg2regmask[Op_VecY] = &spillVectY->out_RegMask();
  }
}

#ifdef ASSERT
static void match_alias_type(Compile* C, Node* n, Node* m) {
  if (!VerifyAliases)  return;  // do not go looking for trouble by default
  const TypePtr* nat = n->adr_type();
  const TypePtr* mat = m->adr_type();
  int nidx = C->get_alias_index(nat);
  int midx = C->get_alias_index(mat);
  // Detune the assert for cases like (AndI 0xFF (LoadB p)).
  if (nidx == Compile::AliasIdxTop && midx >= Compile::AliasIdxRaw) {
    for (uint i = 1; i < n->req(); i++) {
      Node* n1 = n->in(i);
      const TypePtr* n1at = n1->adr_type();
      if (n1at != NULL) {
        nat = n1at;
        nidx = C->get_alias_index(n1at);
      }
    }
  }
  // %%% Kludgery.  Instead, fix ideal adr_type methods for all these cases:
  if (nidx == Compile::AliasIdxTop && midx == Compile::AliasIdxRaw) {
    switch (n->Opcode()) {
    case Op_PrefetchRead:
    case Op_PrefetchWrite:
    case Op_PrefetchAllocation:
      nidx = Compile::AliasIdxRaw;
      nat = TypeRawPtr::BOTTOM;
      break;
    }
  }
  if (nidx == Compile::AliasIdxRaw && midx == Compile::AliasIdxTop) {
    switch (n->Opcode()) {
    case Op_ClearArray:
      midx = Compile::AliasIdxRaw;
      mat = TypeRawPtr::BOTTOM;
      break;
    }
  }
  if (nidx == Compile::AliasIdxTop && midx == Compile::AliasIdxBot) {
    switch (n->Opcode()) {
    case Op_Return:
    case Op_Rethrow:
    case Op_Halt:
    case Op_TailCall:
    case Op_TailJump:
      nidx = Compile::AliasIdxBot;
      nat = TypePtr::BOTTOM;
      break;
    }
  }
  if (nidx == Compile::AliasIdxBot && midx == Compile::AliasIdxTop) {
    switch (n->Opcode()) {
    case Op_StrComp:
    case Op_StrEquals:
    case Op_StrIndexOf:
    case Op_AryEq:
    case Op_MemBarVolatile:
    case Op_MemBarCPUOrder: // %%% these ideals should have narrower adr_type?
    case Op_EncodeISOArray:
      nidx = Compile::AliasIdxTop;
      nat = NULL;
      break;
    }
  }
  if (nidx != midx) {
    if (PrintOpto || (PrintMiscellaneous && (WizardMode || Verbose))) {
      tty->print_cr("==== Matcher alias shift %d => %d", nidx, midx);
      n->dump();
      m->dump();
    }
    assert(C->subsume_loads() && C->must_alias(nat, midx),
           "must not lose alias info when matching");
  }
}
#endif


//------------------------------MStack-----------------------------------------
// State and MStack class used in xform() and find_shared() iterative methods.
enum Node_State { Pre_Visit,  // node has to be pre-visited
                      Visit,  // visit node
                 Post_Visit,  // post-visit node
             Alt_Post_Visit   // alternative post-visit path
                };

class MStack: public Node_Stack {
  public:
    MStack(int size) : Node_Stack(size) { }

    void push(Node *n, Node_State ns) {
      Node_Stack::push(n, (uint)ns);
    }
    void push(Node *n, Node_State ns, Node *parent, int indx) {
      ++_inode_top;
      if ((_inode_top + 1) >= _inode_max) grow();
      _inode_top->node = parent;
      _inode_top->indx = (uint)indx;
      ++_inode_top;
      _inode_top->node = n;
      _inode_top->indx = (uint)ns;
    }
    Node *parent() {
      pop();
      return node();
    }
    Node_State state() const {
      return (Node_State)index();
    }
    void set_state(Node_State ns) {
      set_index((uint)ns);
    }
};


//------------------------------xform------------------------------------------
// Given a Node in old-space, Match him (Label/Reduce) to produce a machine
// Node in new-space.  Given a new-space Node, recursively walk his children.
Node *Matcher::transform( Node *n ) { ShouldNotCallThis(); return n; }
Node *Matcher::xform( Node *n, int max_stack ) {
  // Use one stack to keep both: child's node/state and parent's node/index
  MStack mstack(max_stack * 2 * 2); // usually: C->live_nodes() * 2 * 2
  mstack.push(n, Visit, NULL, -1);  // set NULL as parent to indicate root

  while (mstack.is_nonempty()) {
    C->check_node_count(NodeLimitFudgeFactor, "too many nodes matching instructions");
    if (C->failing()) return NULL;
    n = mstack.node();          // Leave node on stack
    Node_State nstate = mstack.state();
    if (nstate == Visit) {
      mstack.set_state(Post_Visit);
      Node *oldn = n;
      // Old-space or new-space check
      if (!C->node_arena()->contains(n)) {
        // Old space!
        Node* m;
        if (has_new_node(n)) {  // Not yet Label/Reduced
          m = new_node(n);
        } else {
          if (!is_dontcare(n)) { // Matcher can match this guy
            // Calls match special.  They match alone with no children.
            // Their children, the incoming arguments, match normally.
            m = n->is_SafePoint() ? match_sfpt(n->as_SafePoint()):match_tree(n);
            if (C->failing())  return NULL;
            if (m == NULL) { Matcher::soft_match_failure(); return NULL; }
          } else {                  // Nothing the matcher cares about
            if (n->is_Proj() && n->in(0) != NULL && n->in(0)->is_Multi()) {       // Projections?
              // Convert to machine-dependent projection
              m = n->in(0)->as_Multi()->match( n->as_Proj(), this );
#ifdef ASSERT
              _new2old_map.map(m->_idx, n);
#endif
              if (m->in(0) != NULL) // m might be top
                collect_null_checks(m, n);
            } else {                // Else just a regular 'ol guy
              m = n->clone();       // So just clone into new-space
#ifdef ASSERT
              _new2old_map.map(m->_idx, n);
#endif
              // Def-Use edges will be added incrementally as Uses
              // of this node are matched.
              assert(m->outcnt() == 0, "no Uses of this clone yet");
            }
          }

          set_new_node(n, m);       // Map old to new
          if (_old_node_note_array != NULL) {
            Node_Notes* nn = C->locate_node_notes(_old_node_note_array,
                                                  n->_idx);
            C->set_node_notes_at(m->_idx, nn);
          }
          debug_only(match_alias_type(C, n, m));
        }
        n = m;    // n is now a new-space node
        mstack.set_node(n);
      }

      // New space!
      if (_visited.test_set(n->_idx)) continue; // while(mstack.is_nonempty())

      int i;
      // Put precedence edges on stack first (match them last).
      for (i = oldn->req(); (uint)i < oldn->len(); i++) {
        Node *m = oldn->in(i);
        if (m == NULL) break;
        // set -1 to call add_prec() instead of set_req() during Step1
        mstack.push(m, Visit, n, -1);
      }

      // For constant debug info, I'd rather have unmatched constants.
      int cnt = n->req();
      JVMState* jvms = n->jvms();
      int debug_cnt = jvms ? jvms->debug_start() : cnt;

      // Now do only debug info.  Clone constants rather than matching.
      // Constants are represented directly in the debug info without
      // the need for executable machine instructions.
      // Monitor boxes are also represented directly.
      for (i = cnt - 1; i >= debug_cnt; --i) { // For all debug inputs do
        Node *m = n->in(i);          // Get input
        int op = m->Opcode();
        assert((op == Op_BoxLock) == jvms->is_monitor_use(i), "boxes only at monitor sites");
        if( op == Op_ConI || op == Op_ConP || op == Op_ConN || op == Op_ConNKlass ||
            op == Op_ConF || op == Op_ConD || op == Op_ConL
            // || op == Op_BoxLock  // %%%% enable this and remove (+++) in chaitin.cpp
            ) {
          m = m->clone();
#ifdef ASSERT
          _new2old_map.map(m->_idx, n);
#endif
          mstack.push(m, Post_Visit, n, i); // Don't need to visit
          mstack.push(m->in(0), Visit, m, 0);
        } else {
          mstack.push(m, Visit, n, i);
        }
      }

      // And now walk his children, and convert his inputs to new-space.
      for( ; i >= 0; --i ) { // For all normal inputs do
        Node *m = n->in(i);  // Get input
        if(m != NULL)
          mstack.push(m, Visit, n, i);
      }

    }
    else if (nstate == Post_Visit) {
      // Set xformed input
      Node *p = mstack.parent();
      if (p != NULL) { // root doesn't have parent
        int i = (int)mstack.index();
        if (i >= 0)
          p->set_req(i, n); // required input
        else if (i == -1)
          p->add_prec(n);   // precedence input
        else
          ShouldNotReachHere();
      }
      mstack.pop(); // remove processed node from stack
    }
    else {
      ShouldNotReachHere();
    }
  } // while (mstack.is_nonempty())
  return n; // Return new-space Node
}

//------------------------------warp_outgoing_stk_arg------------------------
OptoReg::Name Matcher::warp_outgoing_stk_arg( VMReg reg, OptoReg::Name begin_out_arg_area, OptoReg::Name &out_arg_limit_per_call ) {
  // Convert outgoing argument location to a pre-biased stack offset
  if (reg->is_stack()) {
    OptoReg::Name warped = reg->reg2stack();
    // Adjust the stack slot offset to be the register number used
    // by the allocator.
    warped = OptoReg::add(begin_out_arg_area, warped);
    // Keep track of the largest numbered stack slot used for an arg.
    // Largest used slot per call-site indicates the amount of stack
    // that is killed by the call.
    if( warped >= out_arg_limit_per_call )
      out_arg_limit_per_call = OptoReg::add(warped,1);
    if (!RegMask::can_represent_arg(warped)) {
      C->record_method_not_compilable_all_tiers("unsupported calling sequence");
      return OptoReg::Bad;
    }
    return warped;
  }
  return OptoReg::as_OptoReg(reg);
}


//------------------------------match_sfpt-------------------------------------
// Helper function to match call instructions.  Calls match special.
// They match alone with no children.  Their children, the incoming
// arguments, match normally.
MachNode *Matcher::match_sfpt( SafePointNode *sfpt ) {
  MachSafePointNode *msfpt = NULL;
  MachCallNode      *mcall = NULL;
  uint               cnt;
  // Split out case for SafePoint vs Call
  CallNode *call;
  const TypeTuple *domain;
  ciMethod*        method = NULL;
  bool             is_method_handle_invoke = false;  // for special kill effects
  if( sfpt->is_Call() ) {
    call = sfpt->as_Call();
    domain = call->tf()->domain();
    cnt = domain->cnt();

    // Match just the call, nothing else
    MachNode *m = match_tree(call);
    if (C->failing())  return NULL;
    if( m == NULL ) { Matcher::soft_match_failure(); return NULL; }

    // Copy data from the Ideal SafePoint to the machine version
    mcall = m->as_MachCall();

    mcall->set_tf(         call->tf());
    mcall->set_entry_point(call->entry_point());
    mcall->set_cnt(        call->cnt());

    if( mcall->is_MachCallJava() ) {
      MachCallJavaNode *mcall_java  = mcall->as_MachCallJava();
      const CallJavaNode *call_java =  call->as_CallJava();
      method = call_java->method();
      mcall_java->_method = method;
      mcall_java->_bci = call_java->_bci;
      mcall_java->_optimized_virtual = call_java->is_optimized_virtual();
      is_method_handle_invoke = call_java->is_method_handle_invoke();
      mcall_java->_method_handle_invoke = is_method_handle_invoke;
      if (is_method_handle_invoke) {
        C->set_has_method_handle_invokes(true);
      }
      if( mcall_java->is_MachCallStaticJava() )
        mcall_java->as_MachCallStaticJava()->_name =
         call_java->as_CallStaticJava()->_name;
      if( mcall_java->is_MachCallDynamicJava() )
        mcall_java->as_MachCallDynamicJava()->_vtable_index =
         call_java->as_CallDynamicJava()->_vtable_index;
    }
    else if( mcall->is_MachCallRuntime() ) {
      mcall->as_MachCallRuntime()->_name = call->as_CallRuntime()->_name;
    }
    msfpt = mcall;
  }
  // This is a non-call safepoint
  else {
    call = NULL;
    domain = NULL;
    MachNode *mn = match_tree(sfpt);
    if (C->failing())  return NULL;
    msfpt = mn->as_MachSafePoint();
    cnt = TypeFunc::Parms;
  }

  // Advertise the correct memory effects (for anti-dependence computation).
  msfpt->set_adr_type(sfpt->adr_type());

  // Allocate a private array of RegMasks.  These RegMasks are not shared.
  msfpt->_in_rms = NEW_RESOURCE_ARRAY( RegMask, cnt );
  // Empty them all.
  memset( msfpt->_in_rms, 0, sizeof(RegMask)*cnt );

  // Do all the pre-defined non-Empty register masks
  msfpt->_in_rms[TypeFunc::ReturnAdr] = _return_addr_mask;
  msfpt->_in_rms[TypeFunc::FramePtr ] = c_frame_ptr_mask;

  // Place first outgoing argument can possibly be put.
  OptoReg::Name begin_out_arg_area = OptoReg::add(_new_SP, C->out_preserve_stack_slots());
  assert( is_even(begin_out_arg_area), "" );
  // Compute max outgoing register number per call site.
  OptoReg::Name out_arg_limit_per_call = begin_out_arg_area;
  // Calls to C may hammer extra stack slots above and beyond any arguments.
  // These are usually backing store for register arguments for varargs.
  if( call != NULL && call->is_CallRuntime() )
    out_arg_limit_per_call = OptoReg::add(out_arg_limit_per_call,C->varargs_C_out_slots_killed());


  // Do the normal argument list (parameters) register masks
  int argcnt = cnt - TypeFunc::Parms;
  if( argcnt > 0 ) {          // Skip it all if we have no args
    BasicType *sig_bt  = NEW_RESOURCE_ARRAY( BasicType, argcnt );
    VMRegPair *parm_regs = NEW_RESOURCE_ARRAY( VMRegPair, argcnt );
    int i;
    for( i = 0; i < argcnt; i++ ) {
      sig_bt[i] = domain->field_at(i+TypeFunc::Parms)->basic_type();
    }
    // V-call to pick proper calling convention
    call->calling_convention( sig_bt, parm_regs, argcnt );

#ifdef ASSERT
    // Sanity check users' calling convention.  Really handy during
    // the initial porting effort.  Fairly expensive otherwise.
    { for (int i = 0; i<argcnt; i++) {
      if( !parm_regs[i].first()->is_valid() &&
          !parm_regs[i].second()->is_valid() ) continue;
      VMReg reg1 = parm_regs[i].first();
      VMReg reg2 = parm_regs[i].second();
      for (int j = 0; j < i; j++) {
        if( !parm_regs[j].first()->is_valid() &&
            !parm_regs[j].second()->is_valid() ) continue;
        VMReg reg3 = parm_regs[j].first();
        VMReg reg4 = parm_regs[j].second();
        if( !reg1->is_valid() ) {
          assert( !reg2->is_valid(), "valid halvsies" );
        } else if( !reg3->is_valid() ) {
          assert( !reg4->is_valid(), "valid halvsies" );
        } else {
          assert( reg1 != reg2, "calling conv. must produce distinct regs");
          assert( reg1 != reg3, "calling conv. must produce distinct regs");
          assert( reg1 != reg4, "calling conv. must produce distinct regs");
          assert( reg2 != reg3, "calling conv. must produce distinct regs");
          assert( reg2 != reg4 || !reg2->is_valid(), "calling conv. must produce distinct regs");
          assert( reg3 != reg4, "calling conv. must produce distinct regs");
        }
      }
    }
    }
#endif

    // Visit each argument.  Compute its outgoing register mask.
    // Return results now can have 2 bits returned.
    // Compute max over all outgoing arguments both per call-site
    // and over the entire method.
    for( i = 0; i < argcnt; i++ ) {
      // Address of incoming argument mask to fill in
      RegMask *rm = &mcall->_in_rms[i+TypeFunc::Parms];
      if( !parm_regs[i].first()->is_valid() &&
          !parm_regs[i].second()->is_valid() ) {
        continue;               // Avoid Halves
      }
      // Grab first register, adjust stack slots and insert in mask.
      OptoReg::Name reg1 = warp_outgoing_stk_arg(parm_regs[i].first(), begin_out_arg_area, out_arg_limit_per_call );
      if (OptoReg::is_valid(reg1))
        rm->Insert( reg1 );
      // Grab second register (if any), adjust stack slots and insert in mask.
      OptoReg::Name reg2 = warp_outgoing_stk_arg(parm_regs[i].second(), begin_out_arg_area, out_arg_limit_per_call );
      if (OptoReg::is_valid(reg2))
        rm->Insert( reg2 );
    } // End of for all arguments

    // Compute number of stack slots needed to restore stack in case of
    // Pascal-style argument popping.
    mcall->_argsize = out_arg_limit_per_call - begin_out_arg_area;
  }

  // Compute the max stack slot killed by any call.  These will not be
  // available for debug info, and will be used to adjust FIRST_STACK_mask
  // after all call sites have been visited.
  if( _out_arg_limit < out_arg_limit_per_call)
    _out_arg_limit = out_arg_limit_per_call;

  if (mcall) {
    // Kill the outgoing argument area, including any non-argument holes and
    // any legacy C-killed slots.  Use Fat-Projections to do the killing.
    // Since the max-per-method covers the max-per-call-site and debug info
    // is excluded on the max-per-method basis, debug info cannot land in
    // this killed area.
    uint r_cnt = mcall->tf()->range()->cnt();
    MachProjNode *proj = new (C) MachProjNode( mcall, r_cnt+10000, RegMask::Empty, MachProjNode::fat_proj );
    if (!RegMask::can_represent_arg(OptoReg::Name(out_arg_limit_per_call-1))) {
      C->record_method_not_compilable_all_tiers("unsupported outgoing calling sequence");
    } else {
      for (int i = begin_out_arg_area; i < out_arg_limit_per_call; i++)
        proj->_rout.Insert(OptoReg::Name(i));
    }
    if (proj->_rout.is_NotEmpty()) {
      push_projection(proj);
    }
  }
  // Transfer the safepoint information from the call to the mcall
  // Move the JVMState list
  msfpt->set_jvms(sfpt->jvms());
  for (JVMState* jvms = msfpt->jvms(); jvms; jvms = jvms->caller()) {
    jvms->set_map(sfpt);
  }

  // Debug inputs begin just after the last incoming parameter
  assert((mcall == NULL) || (mcall->jvms() == NULL) ||
         (mcall->jvms()->debug_start() + mcall->_jvmadj == mcall->tf()->domain()->cnt()), "");

  // Move the OopMap
  msfpt->_oop_map = sfpt->_oop_map;

  // Add additional edges.
  if (msfpt->mach_constant_base_node_input() != (uint)-1 && !msfpt->is_MachCallLeaf()) {
    // For these calls we can not add MachConstantBase in expand(), as the
    // ins are not complete then.
    msfpt->ins_req(msfpt->mach_constant_base_node_input(), C->mach_constant_base_node());
    if (msfpt->jvms() &&
        msfpt->mach_constant_base_node_input() <= msfpt->jvms()->debug_start() + msfpt->_jvmadj) {
      // We added an edge before jvms, so we must adapt the position of the ins.
      msfpt->jvms()->adapt_position(+1);
    }
  }

  // Registers killed by the call are set in the local scheduling pass
  // of Global Code Motion.
  return msfpt;
}

//---------------------------match_tree----------------------------------------
// Match a Ideal Node DAG - turn it into a tree; Label & Reduce.  Used as part
// of the whole-sale conversion from Ideal to Mach Nodes.  Also used for
// making GotoNodes while building the CFG and in init_spill_mask() to identify
// a Load's result RegMask for memoization in idealreg2regmask[]
MachNode *Matcher::match_tree( const Node *n ) {
  assert( n->Opcode() != Op_Phi, "cannot match" );
  assert( !n->is_block_start(), "cannot match" );
  // Set the mark for all locally allocated State objects.
  // When this call returns, the _states_arena arena will be reset
  // freeing all State objects.
  ResourceMark rm( &_states_arena );

  LabelRootDepth = 0;

  // StoreNodes require their Memory input to match any LoadNodes
  Node *mem = n->is_Store() ? n->in(MemNode::Memory) : (Node*)1 ;
#ifdef ASSERT
  Node* save_mem_node = _mem_node;
  _mem_node = n->is_Store() ? (Node*)n : NULL;
#endif
  // State object for root node of match tree
  // Allocate it on _states_arena - stack allocation can cause stack overflow.
  State *s = new (&_states_arena) State;
  s->_kids[0] = NULL;
  s->_kids[1] = NULL;
  s->_leaf = (Node*)n;
  // Label the input tree, allocating labels from top-level arena
  Label_Root( n, s, n->in(0), mem );
  if (C->failing())  return NULL;

  // The minimum cost match for the whole tree is found at the root State
  uint mincost = max_juint;
  uint cost = max_juint;
  uint i;
  for( i = 0; i < NUM_OPERANDS; i++ ) {
    if( s->valid(i) &&                // valid entry and
        s->_cost[i] < cost &&         // low cost and
        s->_rule[i] >= NUM_OPERANDS ) // not an operand
      cost = s->_cost[mincost=i];
  }
  if (mincost == max_juint) {
#ifndef PRODUCT
    tty->print("No matching rule for:");
    s->dump();
#endif
    Matcher::soft_match_failure();
    return NULL;
  }
  // Reduce input tree based upon the state labels to machine Nodes
  MachNode *m = ReduceInst( s, s->_rule[mincost], mem );
#ifdef ASSERT
  _old2new_map.map(n->_idx, m);
  _new2old_map.map(m->_idx, (Node*)n);
#endif

  // Add any Matcher-ignored edges
  uint cnt = n->req();
  uint start = 1;
  if( mem != (Node*)1 ) start = MemNode::Memory+1;
  if( n->is_AddP() ) {
    assert( mem == (Node*)1, "" );
    start = AddPNode::Base+1;
  }
  for( i = start; i < cnt; i++ ) {
    if( !n->match_edge(i) ) {
      if( i < m->req() )
        m->ins_req( i, n->in(i) );
      else
        m->add_req( n->in(i) );
    }
  }

  debug_only( _mem_node = save_mem_node; )
  return m;
}


//------------------------------match_into_reg---------------------------------
// Choose to either match this Node in a register or part of the current
// match tree.  Return true for requiring a register and false for matching
// as part of the current match tree.
static bool match_into_reg( const Node *n, Node *m, Node *control, int i, bool shared ) {

  const Type *t = m->bottom_type();

  if (t->singleton()) {
    // Never force constants into registers.  Allow them to match as
    // constants or registers.  Copies of the same value will share
    // the same register.  See find_shared_node.
    return false;
  } else {                      // Not a constant
    // Stop recursion if they have different Controls.
    Node* m_control = m->in(0);
    // Control of load's memory can post-dominates load's control.
    // So use it since load can't float above its memory.
    Node* mem_control = (m->is_Load()) ? m->in(MemNode::Memory)->in(0) : NULL;
    if (control && m_control && control != m_control && control != mem_control) {

      // Actually, we can live with the most conservative control we
      // find, if it post-dominates the others.  This allows us to
      // pick up load/op/store trees where the load can float a little
      // above the store.
      Node *x = control;
      const uint max_scan = 6;  // Arbitrary scan cutoff
      uint j;
      for (j=0; j<max_scan; j++) {
        if (x->is_Region())     // Bail out at merge points
          return true;
        x = x->in(0);
        if (x == m_control)     // Does 'control' post-dominate
          break;                // m->in(0)?  If so, we can use it
        if (x == mem_control)   // Does 'control' post-dominate
          break;                // mem_control?  If so, we can use it
      }
      if (j == max_scan)        // No post-domination before scan end?
        return true;            // Then break the match tree up
    }
    if ((m->is_DecodeN() && Matcher::narrow_oop_use_complex_address()) ||
        (m->is_DecodeNKlass() && Matcher::narrow_klass_use_complex_address())) {
      // These are commonly used in address expressions and can
      // efficiently fold into them on X64 in some cases.
      return false;
    }
  }

  // Not forceable cloning.  If shared, put it into a register.
  return shared;
}


//------------------------------Instruction Selection--------------------------
// Label method walks a "tree" of nodes, using the ADLC generated DFA to match
// ideal nodes to machine instructions.  Trees are delimited by shared Nodes,
// things the Matcher does not match (e.g., Memory), and things with different
// Controls (hence forced into different blocks).  We pass in the Control
// selected for this entire State tree.

// The Matcher works on Trees, but an Intel add-to-memory requires a DAG: the
// Store and the Load must have identical Memories (as well as identical
// pointers).  Since the Matcher does not have anything for Memory (and
// does not handle DAGs), I have to match the Memory input myself.  If the
// Tree root is a Store, I require all Loads to have the identical memory.
Node *Matcher::Label_Root( const Node *n, State *svec, Node *control, const Node *mem){
  // Since Label_Root is a recursive function, its possible that we might run
  // out of stack space.  See bugs 6272980 & 6227033 for more info.
  LabelRootDepth++;
  if (LabelRootDepth > MaxLabelRootDepth) {
    C->record_method_not_compilable_all_tiers("Out of stack space, increase MaxLabelRootDepth");
    return NULL;
  }
  uint care = 0;                // Edges matcher cares about
  uint cnt = n->req();
  uint i = 0;

  // Examine children for memory state
  // Can only subsume a child into your match-tree if that child's memory state
  // is not modified along the path to another input.
  // It is unsafe even if the other inputs are separate roots.
  Node *input_mem = NULL;
  for( i = 1; i < cnt; i++ ) {
    if( !n->match_edge(i) ) continue;
    Node *m = n->in(i);         // Get ith input
    assert( m, "expect non-null children" );
    if( m->is_Load() ) {
      if( input_mem == NULL ) {
        input_mem = m->in(MemNode::Memory);
      } else if( input_mem != m->in(MemNode::Memory) ) {
        input_mem = NodeSentinel;
      }
    }
  }

  for( i = 1; i < cnt; i++ ){// For my children
    if( !n->match_edge(i) ) continue;
    Node *m = n->in(i);         // Get ith input
    // Allocate states out of a private arena
    State *s = new (&_states_arena) State;
    svec->_kids[care++] = s;
    assert( care <= 2, "binary only for now" );

    // Recursively label the State tree.
    s->_kids[0] = NULL;
    s->_kids[1] = NULL;
    s->_leaf = m;

    // Check for leaves of the State Tree; things that cannot be a part of
    // the current tree.  If it finds any, that value is matched as a
    // register operand.  If not, then the normal matching is used.
    if( match_into_reg(n, m, control, i, is_shared(m)) ||
        //
        // Stop recursion if this is LoadNode and the root of this tree is a
        // StoreNode and the load & store have different memories.
        ((mem!=(Node*)1) && m->is_Load() && m->in(MemNode::Memory) != mem) ||
        // Can NOT include the match of a subtree when its memory state
        // is used by any of the other subtrees
        (input_mem == NodeSentinel) ) {
#ifndef PRODUCT
      // Print when we exclude matching due to different memory states at input-loads
      if( PrintOpto && (Verbose && WizardMode) && (input_mem == NodeSentinel)
        && !((mem!=(Node*)1) && m->is_Load() && m->in(MemNode::Memory) != mem) ) {
        tty->print_cr("invalid input_mem");
      }
#endif
      // Switch to a register-only opcode; this value must be in a register
      // and cannot be subsumed as part of a larger instruction.
      s->DFA( m->ideal_reg(), m );

    } else {
      // If match tree has no control and we do, adopt it for entire tree
      if( control == NULL && m->in(0) != NULL && m->req() > 1 )
        control = m->in(0);         // Pick up control
      // Else match as a normal part of the match tree.
      control = Label_Root(m,s,control,mem);
      if (C->failing()) return NULL;
    }
  }


  // Call DFA to match this node, and return
  svec->DFA( n->Opcode(), n );

#ifdef ASSERT
  uint x;
  for( x = 0; x < _LAST_MACH_OPER; x++ )
    if( svec->valid(x) )
      break;

  if (x >= _LAST_MACH_OPER) {
    n->dump();
    svec->dump();
    assert( false, "bad AD file" );
  }
#endif
  return control;
}


// Con nodes reduced using the same rule can share their MachNode
// which reduces the number of copies of a constant in the final
// program.  The register allocator is free to split uses later to
// split live ranges.
MachNode* Matcher::find_shared_node(Node* leaf, uint rule) {
  if (!leaf->is_Con() && !leaf->is_DecodeNarrowPtr()) return NULL;

  // See if this Con has already been reduced using this rule.
  if (_shared_nodes.Size() <= leaf->_idx) return NULL;
  MachNode* last = (MachNode*)_shared_nodes.at(leaf->_idx);
  if (last != NULL && rule == last->rule()) {
    // Don't expect control change for DecodeN
    if (leaf->is_DecodeNarrowPtr())
      return last;
    // Get the new space root.
    Node* xroot = new_node(C->root());
    if (xroot == NULL) {
      // This shouldn't happen give the order of matching.
      return NULL;
    }

    // Shared constants need to have their control be root so they
    // can be scheduled properly.
    Node* control = last->in(0);
    if (control != xroot) {
      if (control == NULL || control == C->root()) {
        last->set_req(0, xroot);
      } else {
        assert(false, "unexpected control");
        return NULL;
      }
    }
    return last;
  }
  return NULL;
}


//------------------------------ReduceInst-------------------------------------
// Reduce a State tree (with given Control) into a tree of MachNodes.
// This routine (and it's cohort ReduceOper) convert Ideal Nodes into
// complicated machine Nodes.  Each MachNode covers some tree of Ideal Nodes.
// Each MachNode has a number of complicated MachOper operands; each
// MachOper also covers a further tree of Ideal Nodes.

// The root of the Ideal match tree is always an instruction, so we enter
// the recursion here.  After building the MachNode, we need to recurse
// the tree checking for these cases:
// (1) Child is an instruction -
//     Build the instruction (recursively), add it as an edge.
//     Build a simple operand (register) to hold the result of the instruction.
// (2) Child is an interior part of an instruction -
//     Skip over it (do nothing)
// (3) Child is the start of a operand -
//     Build the operand, place it inside the instruction
//     Call ReduceOper.
MachNode *Matcher::ReduceInst( State *s, int rule, Node *&mem ) {
  assert( rule >= NUM_OPERANDS, "called with operand rule" );

  MachNode* shared_node = find_shared_node(s->_leaf, rule);
  if (shared_node != NULL) {
    return shared_node;
  }

  // Build the object to represent this state & prepare for recursive calls
  MachNode *mach = s->MachNodeGenerator( rule, C );
  guarantee(mach != NULL, "Missing MachNode");
  mach->_opnds[0] = s->MachOperGenerator( _reduceOp[rule], C );
  assert( mach->_opnds[0] != NULL, "Missing result operand" );
  Node *leaf = s->_leaf;
  // Check for instruction or instruction chain rule
  if( rule >= _END_INST_CHAIN_RULE || rule < _BEGIN_INST_CHAIN_RULE ) {
    assert(C->node_arena()->contains(s->_leaf) || !has_new_node(s->_leaf),
           "duplicating node that's already been matched");
    // Instruction
    mach->add_req( leaf->in(0) ); // Set initial control
    // Reduce interior of complex instruction
    ReduceInst_Interior( s, rule, mem, mach, 1 );
  } else {
    // Instruction chain rules are data-dependent on their inputs
    mach->add_req(0);             // Set initial control to none
    ReduceInst_Chain_Rule( s, rule, mem, mach );
  }

  // If a Memory was used, insert a Memory edge
  if( mem != (Node*)1 ) {
    mach->ins_req(MemNode::Memory,mem);
#ifdef ASSERT
    // Verify adr type after matching memory operation
    const MachOper* oper = mach->memory_operand();
    if (oper != NULL && oper != (MachOper*)-1) {
      // It has a unique memory operand.  Find corresponding ideal mem node.
      Node* m = NULL;
      if (leaf->is_Mem()) {
        m = leaf;
      } else {
        m = _mem_node;
        assert(m != NULL && m->is_Mem(), "expecting memory node");
      }
      const Type* mach_at = mach->adr_type();
      // DecodeN node consumed by an address may have different type
      // then its input. Don't compare types for such case.
      if (m->adr_type() != mach_at &&
          (m->in(MemNode::Address)->is_DecodeNarrowPtr() ||
           m->in(MemNode::Address)->is_AddP() &&
           m->in(MemNode::Address)->in(AddPNode::Address)->is_DecodeNarrowPtr() ||
           m->in(MemNode::Address)->is_AddP() &&
           m->in(MemNode::Address)->in(AddPNode::Address)->is_AddP() &&
           m->in(MemNode::Address)->in(AddPNode::Address)->in(AddPNode::Address)->is_DecodeNarrowPtr())) {
        mach_at = m->adr_type();
      }
      if (m->adr_type() != mach_at) {
        m->dump();
        tty->print_cr("mach:");
        mach->dump(1);
      }
      assert(m->adr_type() == mach_at, "matcher should not change adr type");
    }
#endif
  }

  // If the _leaf is an AddP, insert the base edge
  if (leaf->is_AddP()) {
    mach->ins_req(AddPNode::Base,leaf->in(AddPNode::Base));
  }

  uint number_of_projections_prior = number_of_projections();

  // Perform any 1-to-many expansions required
  MachNode *ex = mach->Expand(s, _projection_list, mem);
  if (ex != mach) {
    assert(ex->ideal_reg() == mach->ideal_reg(), "ideal types should match");
    if( ex->in(1)->is_Con() )
      ex->in(1)->set_req(0, C->root());
    // Remove old node from the graph
    for( uint i=0; i<mach->req(); i++ ) {
      mach->set_req(i,NULL);
    }
#ifdef ASSERT
    _new2old_map.map(ex->_idx, s->_leaf);
#endif
  }

  // PhaseChaitin::fixup_spills will sometimes generate spill code
  // via the matcher.  By the time, nodes have been wired into the CFG,
  // and any further nodes generated by expand rules will be left hanging
  // in space, and will not get emitted as output code.  Catch this.
  // Also, catch any new register allocation constraints ("projections")
  // generated belatedly during spill code generation.
  if (_allocation_started) {
    guarantee(ex == mach, "no expand rules during spill generation");
    guarantee(number_of_projections_prior == number_of_projections(), "no allocation during spill generation");
  }

  if (leaf->is_Con() || leaf->is_DecodeNarrowPtr()) {
    // Record the con for sharing
    _shared_nodes.map(leaf->_idx, ex);
  }

  return ex;
}

void Matcher::ReduceInst_Chain_Rule( State *s, int rule, Node *&mem, MachNode *mach ) {
  // 'op' is what I am expecting to receive
  int op = _leftOp[rule];
  // Operand type to catch childs result
  // This is what my child will give me.
  int opnd_class_instance = s->_rule[op];
  // Choose between operand class or not.
  // This is what I will receive.
  int catch_op = (FIRST_OPERAND_CLASS <= op && op < NUM_OPERANDS) ? opnd_class_instance : op;
  // New rule for child.  Chase operand classes to get the actual rule.
  int newrule = s->_rule[catch_op];

  if( newrule < NUM_OPERANDS ) {
    // Chain from operand or operand class, may be output of shared node
    assert( 0 <= opnd_class_instance && opnd_class_instance < NUM_OPERANDS,
            "Bad AD file: Instruction chain rule must chain from operand");
    // Insert operand into array of operands for this instruction
    mach->_opnds[1] = s->MachOperGenerator( opnd_class_instance, C );

    ReduceOper( s, newrule, mem, mach );
  } else {
    // Chain from the result of an instruction
    assert( newrule >= _LAST_MACH_OPER, "Do NOT chain from internal operand");
    mach->_opnds[1] = s->MachOperGenerator( _reduceOp[catch_op], C );
    Node *mem1 = (Node*)1;
    debug_only(Node *save_mem_node = _mem_node;)
    mach->add_req( ReduceInst(s, newrule, mem1) );
    debug_only(_mem_node = save_mem_node;)
  }
  return;
}


uint Matcher::ReduceInst_Interior( State *s, int rule, Node *&mem, MachNode *mach, uint num_opnds ) {
  if( s->_leaf->is_Load() ) {
    Node *mem2 = s->_leaf->in(MemNode::Memory);
    assert( mem == (Node*)1 || mem == mem2, "multiple Memories being matched at once?" );
    debug_only( if( mem == (Node*)1 ) _mem_node = s->_leaf;)
    mem = mem2;
  }
  if( s->_leaf->in(0) != NULL && s->_leaf->req() > 1) {
    if( mach->in(0) == NULL )
      mach->set_req(0, s->_leaf->in(0));
  }

  // Now recursively walk the state tree & add operand list.
  for( uint i=0; i<2; i++ ) {   // binary tree
    State *newstate = s->_kids[i];
    if( newstate == NULL ) break;      // Might only have 1 child
    // 'op' is what I am expecting to receive
    int op;
    if( i == 0 ) {
      op = _leftOp[rule];
    } else {
      op = _rightOp[rule];
    }
    // Operand type to catch childs result
    // This is what my child will give me.
    int opnd_class_instance = newstate->_rule[op];
    // Choose between operand class or not.
    // This is what I will receive.
    int catch_op = (op >= FIRST_OPERAND_CLASS && op < NUM_OPERANDS) ? opnd_class_instance : op;
    // New rule for child.  Chase operand classes to get the actual rule.
    int newrule = newstate->_rule[catch_op];

    if( newrule < NUM_OPERANDS ) { // Operand/operandClass or internalOp/instruction?
      // Operand/operandClass
      // Insert operand into array of operands for this instruction
      mach->_opnds[num_opnds++] = newstate->MachOperGenerator( opnd_class_instance, C );
      ReduceOper( newstate, newrule, mem, mach );

    } else {                    // Child is internal operand or new instruction
      if( newrule < _LAST_MACH_OPER ) { // internal operand or instruction?
        // internal operand --> call ReduceInst_Interior
        // Interior of complex instruction.  Do nothing but recurse.
        num_opnds = ReduceInst_Interior( newstate, newrule, mem, mach, num_opnds );
      } else {
        // instruction --> call build operand(  ) to catch result
        //             --> ReduceInst( newrule )
        mach->_opnds[num_opnds++] = s->MachOperGenerator( _reduceOp[catch_op], C );
        Node *mem1 = (Node*)1;
        debug_only(Node *save_mem_node = _mem_node;)
        mach->add_req( ReduceInst( newstate, newrule, mem1 ) );
        debug_only(_mem_node = save_mem_node;)
      }
    }
    assert( mach->_opnds[num_opnds-1], "" );
  }
  return num_opnds;
}

// This routine walks the interior of possible complex operands.
// At each point we check our children in the match tree:
// (1) No children -
//     We are a leaf; add _leaf field as an input to the MachNode
// (2) Child is an internal operand -
//     Skip over it ( do nothing )
// (3) Child is an instruction -
//     Call ReduceInst recursively and
//     and instruction as an input to the MachNode
void Matcher::ReduceOper( State *s, int rule, Node *&mem, MachNode *mach ) {
  assert( rule < _LAST_MACH_OPER, "called with operand rule" );
  State *kid = s->_kids[0];
  assert( kid == NULL || s->_leaf->in(0) == NULL, "internal operands have no control" );

  // Leaf?  And not subsumed?
  if( kid == NULL && !_swallowed[rule] ) {
    mach->add_req( s->_leaf );  // Add leaf pointer
    return;                     // Bail out
  }

  if( s->_leaf->is_Load() ) {
    assert( mem == (Node*)1, "multiple Memories being matched at once?" );
    mem = s->_leaf->in(MemNode::Memory);
    debug_only(_mem_node = s->_leaf;)
  }
  if( s->_leaf->in(0) && s->_leaf->req() > 1) {
    if( !mach->in(0) )
      mach->set_req(0,s->_leaf->in(0));
    else {
      assert( s->_leaf->in(0) == mach->in(0), "same instruction, differing controls?" );
    }
  }

  for( uint i=0; kid != NULL && i<2; kid = s->_kids[1], i++ ) {   // binary tree
    int newrule;
    if( i == 0)
      newrule = kid->_rule[_leftOp[rule]];
    else
      newrule = kid->_rule[_rightOp[rule]];

    if( newrule < _LAST_MACH_OPER ) { // Operand or instruction?
      // Internal operand; recurse but do nothing else
      ReduceOper( kid, newrule, mem, mach );

    } else {                    // Child is a new instruction
      // Reduce the instruction, and add a direct pointer from this
      // machine instruction to the newly reduced one.
      Node *mem1 = (Node*)1;
      debug_only(Node *save_mem_node = _mem_node;)
      mach->add_req( ReduceInst( kid, newrule, mem1 ) );
      debug_only(_mem_node = save_mem_node;)
    }
  }
}


// -------------------------------------------------------------------------
// Java-Java calling convention
// (what you use when Java calls Java)

//------------------------------find_receiver----------------------------------
// For a given signature, return the OptoReg for parameter 0.
OptoReg::Name Matcher::find_receiver( bool is_outgoing ) {
  VMRegPair regs;
  BasicType sig_bt = T_OBJECT;
  calling_convention(&sig_bt, &regs, 1, is_outgoing);
  // Return argument 0 register.  In the LP64 build pointers
  // take 2 registers, but the VM wants only the 'main' name.
  return OptoReg::as_OptoReg(regs.first());
}

// This function identifies sub-graphs in which a 'load' node is
// input to two different nodes, and such that it can be matched
// with BMI instructions like blsi, blsr, etc.
// Example : for b = -a[i] & a[i] can be matched to blsi r32, m32.
// The graph is (AndL (SubL Con0 LoadL*) LoadL*), where LoadL*
// refers to the same node.
#ifdef X86
// Match the generic fused operations pattern (op1 (op2 Con{ConType} mop) mop)
// This is a temporary solution until we make DAGs expressible in ADL.
template<typename ConType>
class FusedPatternMatcher {
  Node* _op1_node;
  Node* _mop_node;
  int _con_op;

  static int match_next(Node* n, int next_op, int next_op_idx) {
    if (n->in(1) == NULL || n->in(2) == NULL) {
      return -1;
    }

    if (next_op_idx == -1) { // n is commutative, try rotations
      if (n->in(1)->Opcode() == next_op) {
        return 1;
      } else if (n->in(2)->Opcode() == next_op) {
        return 2;
      }
    } else {
      assert(next_op_idx > 0 && next_op_idx <= 2, "Bad argument index");
      if (n->in(next_op_idx)->Opcode() == next_op) {
        return next_op_idx;
      }
    }
    return -1;
  }
public:
  FusedPatternMatcher(Node* op1_node, Node *mop_node, int con_op) :
    _op1_node(op1_node), _mop_node(mop_node), _con_op(con_op) { }

  bool match(int op1, int op1_op2_idx,  // op1 and the index of the op1->op2 edge, -1 if op1 is commutative
             int op2, int op2_con_idx,  // op2 and the index of the op2->con edge, -1 if op2 is commutative
             typename ConType::NativeType con_value) {
    if (_op1_node->Opcode() != op1) {
      return false;
    }
    if (_mop_node->outcnt() > 2) {
      return false;
    }
    op1_op2_idx = match_next(_op1_node, op2, op1_op2_idx);
    if (op1_op2_idx == -1) {
      return false;
    }
    // Memory operation must be the other edge
    int op1_mop_idx = (op1_op2_idx & 1) + 1;

    // Check that the mop node is really what we want
    if (_op1_node->in(op1_mop_idx) == _mop_node) {
      Node *op2_node = _op1_node->in(op1_op2_idx);
      if (op2_node->outcnt() > 1) {
        return false;
      }
      assert(op2_node->Opcode() == op2, "Should be");
      op2_con_idx = match_next(op2_node, _con_op, op2_con_idx);
      if (op2_con_idx == -1) {
        return false;
      }
      // Memory operation must be the other edge
      int op2_mop_idx = (op2_con_idx & 1) + 1;
      // Check that the memory operation is the same node
      if (op2_node->in(op2_mop_idx) == _mop_node) {
        // Now check the constant
        const Type* con_type = op2_node->in(op2_con_idx)->bottom_type();
        if (con_type != Type::TOP && ConType::as_self(con_type)->get_con() == con_value) {
          return true;
        }
      }
    }
    return false;
  }
};


bool Matcher::is_bmi_pattern(Node *n, Node *m) {
  if (n != NULL && m != NULL) {
    if (m->Opcode() == Op_LoadI) {
      FusedPatternMatcher<TypeInt> bmii(n, m, Op_ConI);
      return bmii.match(Op_AndI, -1, Op_SubI,  1,  0)  ||
             bmii.match(Op_AndI, -1, Op_AddI, -1, -1)  ||
             bmii.match(Op_XorI, -1, Op_AddI, -1, -1);
    } else if (m->Opcode() == Op_LoadL) {
      FusedPatternMatcher<TypeLong> bmil(n, m, Op_ConL);
      return bmil.match(Op_AndL, -1, Op_SubL,  1,  0) ||
             bmil.match(Op_AndL, -1, Op_AddL, -1, -1) ||
             bmil.match(Op_XorL, -1, Op_AddL, -1, -1);
    }
  }
  return false;
}
#endif // X86

// A method-klass-holder may be passed in the inline_cache_reg
// and then expanded into the inline_cache_reg and a method_oop register
//   defined in ad_<arch>.cpp


//------------------------------find_shared------------------------------------
// Set bits if Node is shared or otherwise a root
void Matcher::find_shared( Node *n ) {
  // Allocate stack of size C->live_nodes() * 2 to avoid frequent realloc
  MStack mstack(C->live_nodes() * 2);
  // Mark nodes as address_visited if they are inputs to an address expression
  VectorSet address_visited(Thread::current()->resource_area());
  mstack.push(n, Visit);     // Don't need to pre-visit root node
  while (mstack.is_nonempty()) {
    n = mstack.node();       // Leave node on stack
    Node_State nstate = mstack.state();
    uint nop = n->Opcode();
    if (nstate == Pre_Visit) {
      if (address_visited.test(n->_idx)) { // Visited in address already?
        // Flag as visited and shared now.
        set_visited(n);
      }
      if (is_visited(n)) {   // Visited already?
        // Node is shared and has no reason to clone.  Flag it as shared.
        // This causes it to match into a register for the sharing.
        set_shared(n);       // Flag as shared and
        if (n->is_DecodeNarrowPtr()) {
          // Oop field/array element loads must be shared but since
          // they are shared through a DecodeN they may appear to have
          // a single use so force sharing here.
          set_shared(n->in(1));
        }
        mstack.pop();        // remove node from stack
        continue;
      }
      nstate = Visit; // Not already visited; so visit now
    }
    if (nstate == Visit) {
      mstack.set_state(Post_Visit);
      set_visited(n);   // Flag as visited now
      bool mem_op = false;

      switch( nop ) {  // Handle some opcodes special
      case Op_Phi:             // Treat Phis as shared roots
      case Op_Parm:
      case Op_Proj:            // All handled specially during matching
      case Op_SafePointScalarObject:
        set_shared(n);
        set_dontcare(n);
        break;
      case Op_If:
      case Op_CountedLoopEnd:
        mstack.set_state(Alt_Post_Visit); // Alternative way
        // Convert (If (Bool (CmpX A B))) into (If (Bool) (CmpX A B)).  Helps
        // with matching cmp/branch in 1 instruction.  The Matcher needs the
        // Bool and CmpX side-by-side, because it can only get at constants
        // that are at the leaves of Match trees, and the Bool's condition acts
        // as a constant here.
        mstack.push(n->in(1), Visit);         // Clone the Bool
        mstack.push(n->in(0), Pre_Visit);     // Visit control input
        continue; // while (mstack.is_nonempty())
      case Op_ConvI2D:         // These forms efficiently match with a prior
      case Op_ConvI2F:         //   Load but not a following Store
        if( n->in(1)->is_Load() &&        // Prior load
            n->outcnt() == 1 &&           // Not already shared
            n->unique_out()->is_Store() ) // Following store
          set_shared(n);       // Force it to be a root
        break;
      case Op_ReverseBytesI:
      case Op_ReverseBytesL:
        if( n->in(1)->is_Load() &&        // Prior load
            n->outcnt() == 1 )            // Not already shared
          set_shared(n);                  // Force it to be a root
        break;
      case Op_BoxLock:         // Cant match until we get stack-regs in ADLC
      case Op_IfFalse:
      case Op_IfTrue:
      case Op_MachProj:
      case Op_MergeMem:
      case Op_Catch:
      case Op_CatchProj:
      case Op_CProj:
      case Op_JumpProj:
      case Op_JProj:
      case Op_NeverBranch:
        set_dontcare(n);
        break;
      case Op_Jump:
        mstack.push(n->in(1), Pre_Visit);     // Switch Value (could be shared)
        mstack.push(n->in(0), Pre_Visit);     // Visit Control input
        continue;                             // while (mstack.is_nonempty())
      case Op_StrComp:
      case Op_StrEquals:
      case Op_StrIndexOf:
      case Op_AryEq:
      case Op_EncodeISOArray:
        set_shared(n); // Force result into register (it will be anyways)
        break;
      case Op_ConP: {  // Convert pointers above the centerline to NUL
        TypeNode *tn = n->as_Type(); // Constants derive from type nodes
        const TypePtr* tp = tn->type()->is_ptr();
        if (tp->_ptr == TypePtr::AnyNull) {
          tn->set_type(TypePtr::NULL_PTR);
        }
        break;
      }
      case Op_ConN: {  // Convert narrow pointers above the centerline to NUL
        TypeNode *tn = n->as_Type(); // Constants derive from type nodes
        const TypePtr* tp = tn->type()->make_ptr();
        if (tp && tp->_ptr == TypePtr::AnyNull) {
          tn->set_type(TypeNarrowOop::NULL_PTR);
        }
        break;
      }
      case Op_Binary:         // These are introduced in the Post_Visit state.
        ShouldNotReachHere();
        break;
      case Op_ClearArray:
      case Op_SafePoint:
        mem_op = true;
        break;
      default:
        if( n->is_Store() ) {
          // Do match stores, despite no ideal reg
          mem_op = true;
          break;
        }
        if( n->is_Mem() ) { // Loads and LoadStores
          mem_op = true;
          // Loads must be root of match tree due to prior load conflict
          if( C->subsume_loads() == false )
            set_shared(n);
        }
        // Fall into default case
        if( !n->ideal_reg() )
          set_dontcare(n);  // Unmatchable Nodes
      } // end_switch

      for(int i = n->req() - 1; i >= 0; --i) { // For my children
        Node *m = n->in(i); // Get ith input
        if (m == NULL) continue;  // Ignore NULLs
        uint mop = m->Opcode();

        // Must clone all producers of flags, or we will not match correctly.
        // Suppose a compare setting int-flags is shared (e.g., a switch-tree)
        // then it will match into an ideal Op_RegFlags.  Alas, the fp-flags
        // are also there, so we may match a float-branch to int-flags and
        // expect the allocator to haul the flags from the int-side to the
        // fp-side.  No can do.
        if( _must_clone[mop] ) {
          mstack.push(m, Visit);
          continue; // for(int i = ...)
        }

        // if 'n' and 'm' are part of a graph for BMI instruction, clone this node.
#ifdef X86
        if (UseBMI1Instructions && is_bmi_pattern(n, m)) {
          mstack.push(m, Visit);
          continue;
        }
#endif

        // Clone addressing expressions as they are "free" in memory access instructions
        if( mem_op && i == MemNode::Address && mop == Op_AddP ) {
          // Some inputs for address expression are not put on stack
          // to avoid marking them as shared and forcing them into register
          // if they are used only in address expressions.
          // But they should be marked as shared if there are other uses
          // besides address expressions.

          Node *off = m->in(AddPNode::Offset);
          if( off->is_Con() &&
              // When there are other uses besides address expressions
              // put it on stack and mark as shared.
              !is_visited(m) ) {
            address_visited.test_set(m->_idx); // Flag as address_visited
            Node *adr = m->in(AddPNode::Address);

            // Intel, ARM and friends can handle 2 adds in addressing mode
            if( clone_shift_expressions && adr->is_AddP() &&
                // AtomicAdd is not an addressing expression.
                // Cheap to find it by looking for screwy base.
                !adr->in(AddPNode::Base)->is_top() &&
                // Are there other uses besides address expressions?
                !is_visited(adr) ) {
              address_visited.set(adr->_idx); // Flag as address_visited
              Node *shift = adr->in(AddPNode::Offset);
              // Check for shift by small constant as well
              if( shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() &&
                  shift->in(2)->get_int() <= 3 &&
                  // Are there other uses besides address expressions?
                  !is_visited(shift) ) {
                address_visited.set(shift->_idx); // Flag as address_visited
                mstack.push(shift->in(2), Visit);
                Node *conv = shift->in(1);
#ifdef _LP64
                // Allow Matcher to match the rule which bypass
                // ConvI2L operation for an array index on LP64
                // if the index value is positive.
                if( conv->Opcode() == Op_ConvI2L &&
                    conv->as_Type()->type()->is_long()->_lo >= 0 &&
                    // Are there other uses besides address expressions?
                    !is_visited(conv) ) {
                  address_visited.set(conv->_idx); // Flag as address_visited
                  mstack.push(conv->in(1), Pre_Visit);
                } else
#endif
                mstack.push(conv, Pre_Visit);
              } else {
                mstack.push(shift, Pre_Visit);
              }
              mstack.push(adr->in(AddPNode::Address), Pre_Visit);
              mstack.push(adr->in(AddPNode::Base), Pre_Visit);
            } else {  // Sparc, Alpha, PPC and friends
              mstack.push(adr, Pre_Visit);
            }

            // Clone X+offset as it also folds into most addressing expressions
            mstack.push(off, Visit);
            mstack.push(m->in(AddPNode::Base), Pre_Visit);
            continue; // for(int i = ...)
          } // if( off->is_Con() )
        }   // if( mem_op &&
        mstack.push(m, Pre_Visit);
      }     // for(int i = ...)
    }
    else if (nstate == Alt_Post_Visit) {
      mstack.pop(); // Remove node from stack
      // We cannot remove the Cmp input from the Bool here, as the Bool may be
      // shared and all users of the Bool need to move the Cmp in parallel.
      // This leaves both the Bool and the If pointing at the Cmp.  To
      // prevent the Matcher from trying to Match the Cmp along both paths
      // BoolNode::match_edge always returns a zero.

      // We reorder the Op_If in a pre-order manner, so we can visit without
      // accidentally sharing the Cmp (the Bool and the If make 2 users).
      n->add_req( n->in(1)->in(1) ); // Add the Cmp next to the Bool
    }
    else if (nstate == Post_Visit) {
      mstack.pop(); // Remove node from stack

      // Now hack a few special opcodes
      switch( n->Opcode() ) {       // Handle some opcodes special
      case Op_StorePConditional:
      case Op_StoreIConditional:
      case Op_StoreLConditional:
      case Op_CompareAndSwapI:
      case Op_CompareAndSwapL:
      case Op_CompareAndSwapP:
      case Op_CompareAndSwapN: {   // Convert trinary to binary-tree
        Node *newval = n->in(MemNode::ValueIn );
        Node *oldval  = n->in(LoadStoreConditionalNode::ExpectedIn);
        Node *pair = new (C) BinaryNode( oldval, newval );
        n->set_req(MemNode::ValueIn,pair);
        n->del_req(LoadStoreConditionalNode::ExpectedIn);
        break;
      }
      case Op_CMoveD:              // Convert trinary to binary-tree
      case Op_CMoveF:
      case Op_CMoveI:
      case Op_CMoveL:
      case Op_CMoveN:
      case Op_CMoveP: {
        // Restructure into a binary tree for Matching.  It's possible that
        // we could move this code up next to the graph reshaping for IfNodes
        // or vice-versa, but I do not want to debug this for Ladybird.
        // 10/2/2000 CNC.
        Node *pair1 = new (C) BinaryNode(n->in(1),n->in(1)->in(1));
        n->set_req(1,pair1);
        Node *pair2 = new (C) BinaryNode(n->in(2),n->in(3));
        n->set_req(2,pair2);
        n->del_req(3);
        break;
      }
      case Op_LoopLimit: {
        Node *pair1 = new (C) BinaryNode(n->in(1),n->in(2));
        n->set_req(1,pair1);
        n->set_req(2,n->in(3));
        n->del_req(3);
        break;
      }
      case Op_StrEquals: {
        Node *pair1 = new (C) BinaryNode(n->in(2),n->in(3));
        n->set_req(2,pair1);
        n->set_req(3,n->in(4));
        n->del_req(4);
        break;
      }
      case Op_StrComp:
      case Op_StrIndexOf: {
        Node *pair1 = new (C) BinaryNode(n->in(2),n->in(3));
        n->set_req(2,pair1);
        Node *pair2 = new (C) BinaryNode(n->in(4),n->in(5));
        n->set_req(3,pair2);
        n->del_req(5);
        n->del_req(4);
        break;
      }
      case Op_EncodeISOArray: {
        // Restructure into a binary tree for Matching.
        Node* pair = new (C) BinaryNode(n->in(3), n->in(4));
        n->set_req(3, pair);
        n->del_req(4);
        break;
      }
      default:
        break;
      }
    }
    else {
      ShouldNotReachHere();
    }
  } // end of while (mstack.is_nonempty())
}

#ifdef ASSERT
// machine-independent root to machine-dependent root
void Matcher::dump_old2new_map() {
  _old2new_map.dump();
}
#endif

//---------------------------collect_null_checks-------------------------------
// Find null checks in the ideal graph; write a machine-specific node for
// it.  Used by later implicit-null-check handling.  Actually collects
// either an IfTrue or IfFalse for the common NOT-null path, AND the ideal
// value being tested.
void Matcher::collect_null_checks( Node *proj, Node *orig_proj ) {
  Node *iff = proj->in(0);
  if( iff->Opcode() == Op_If ) {
    // During matching If's have Bool & Cmp side-by-side
    BoolNode *b = iff->in(1)->as_Bool();
    Node *cmp = iff->in(2);
    int opc = cmp->Opcode();
    if (opc != Op_CmpP && opc != Op_CmpN) return;

    const Type* ct = cmp->in(2)->bottom_type();
    if (ct == TypePtr::NULL_PTR ||
        (opc == Op_CmpN && ct == TypeNarrowOop::NULL_PTR)) {

      bool push_it = false;
      if( proj->Opcode() == Op_IfTrue ) {
        extern int all_null_checks_found;
        all_null_checks_found++;
        if( b->_test._test == BoolTest::ne ) {
          push_it = true;
        }
      } else {
        assert( proj->Opcode() == Op_IfFalse, "" );
        if( b->_test._test == BoolTest::eq ) {
          push_it = true;
        }
      }
      if( push_it ) {
        _null_check_tests.push(proj);
        Node* val = cmp->in(1);
#ifdef _LP64
        if (val->bottom_type()->isa_narrowoop() &&
            !Matcher::narrow_oop_use_complex_address()) {
          //
          // Look for DecodeN node which should be pinned to orig_proj.
          // On platforms (Sparc) which can not handle 2 adds
          // in addressing mode we have to keep a DecodeN node and
          // use it to do implicit NULL check in address.
          //
          // DecodeN node was pinned to non-null path (orig_proj) during
          // CastPP transformation in final_graph_reshaping_impl().
          //
          uint cnt = orig_proj->outcnt();
          for (uint i = 0; i < orig_proj->outcnt(); i++) {
            Node* d = orig_proj->raw_out(i);
            if (d->is_DecodeN() && d->in(1) == val) {
              val = d;
              val->set_req(0, NULL); // Unpin now.
              // Mark this as special case to distinguish from
              // a regular case: CmpP(DecodeN, NULL).
              val = (Node*)(((intptr_t)val) | 1);
              break;
            }
          }
        }
#endif
        _null_check_tests.push(val);
      }
    }
  }
}

//---------------------------validate_null_checks------------------------------
// Its possible that the value being NULL checked is not the root of a match
// tree.  If so, I cannot use the value in an implicit null check.
void Matcher::validate_null_checks( ) {
  uint cnt = _null_check_tests.size();
  for( uint i=0; i < cnt; i+=2 ) {
    Node *test = _null_check_tests[i];
    Node *val = _null_check_tests[i+1];
    bool is_decoden = ((intptr_t)val) & 1;
    val = (Node*)(((intptr_t)val) & ~1);
    if (has_new_node(val)) {
      Node* new_val = new_node(val);
      if (is_decoden) {
        assert(val->is_DecodeNarrowPtr() && val->in(0) == NULL, "sanity");
        // Note: new_val may have a control edge if
        // the original ideal node DecodeN was matched before
        // it was unpinned in Matcher::collect_null_checks().
        // Unpin the mach node and mark it.
        new_val->set_req(0, NULL);
        new_val = (Node*)(((intptr_t)new_val) | 1);
      }
      // Is a match-tree root, so replace with the matched value
      _null_check_tests.map(i+1, new_val);
    } else {
      // Yank from candidate list
      _null_check_tests.map(i+1,_null_check_tests[--cnt]);
      _null_check_tests.map(i,_null_check_tests[--cnt]);
      _null_check_tests.pop();
      _null_check_tests.pop();
      i-=2;
    }
  }
}

// Used by the DFA in dfa_xxx.cpp.  Check for a following barrier or
// atomic instruction acting as a store_load barrier without any
// intervening volatile load, and thus we don't need a barrier here.
// We retain the Node to act as a compiler ordering barrier.
bool Matcher::post_store_load_barrier(const Node* vmb) {
  Compile* C = Compile::current();
  assert(vmb->is_MemBar(), "");
  assert(vmb->Opcode() != Op_MemBarAcquire && vmb->Opcode() != Op_LoadFence, "");
  const MemBarNode* membar = vmb->as_MemBar();

  // Get the Ideal Proj node, ctrl, that can be used to iterate forward
  Node* ctrl = NULL;
  for (DUIterator_Fast imax, i = membar->fast_outs(imax); i < imax; i++) {
    Node* p = membar->fast_out(i);
    assert(p->is_Proj(), "only projections here");
    if ((p->as_Proj()->_con == TypeFunc::Control) &&
        !C->node_arena()->contains(p)) { // Unmatched old-space only
      ctrl = p;
      break;
    }
  }
  assert((ctrl != NULL), "missing control projection");

  for (DUIterator_Fast jmax, j = ctrl->fast_outs(jmax); j < jmax; j++) {
    Node *x = ctrl->fast_out(j);
    int xop = x->Opcode();

    // We don't need current barrier if we see another or a lock
    // before seeing volatile load.
    //
    // Op_Fastunlock previously appeared in the Op_* list below.
    // With the advent of 1-0 lock operations we're no longer guaranteed
    // that a monitor exit operation contains a serializing instruction.

    if (xop == Op_MemBarVolatile ||
        xop == Op_CompareAndSwapL ||
        xop == Op_CompareAndSwapP ||
        xop == Op_CompareAndSwapN ||
        xop == Op_CompareAndSwapI) {
      return true;
    }

    // Op_FastLock previously appeared in the Op_* list above.
    // With biased locking we're no longer guaranteed that a monitor
    // enter operation contains a serializing instruction.
    if ((xop == Op_FastLock) && !UseBiasedLocking) {
      return true;
    }

    if (x->is_MemBar()) {
      // We must retain this membar if there is an upcoming volatile
      // load, which will be followed by acquire membar.
      if (xop == Op_MemBarAcquire || xop == Op_LoadFence) {
        return false;
      } else {
        // For other kinds of barriers, check by pretending we
        // are them, and seeing if we can be removed.
        return post_store_load_barrier(x->as_MemBar());
      }
    }

    // probably not necessary to check for these
    if (x->is_Call() || x->is_SafePoint() || x->is_block_proj()) {
      return false;
    }
  }
  return false;
}

// Check whether node n is a branch to an uncommon trap that we could
// optimize as test with very high branch costs in case of going to
// the uncommon trap. The code must be able to be recompiled to use
// a cheaper test.
bool Matcher::branches_to_uncommon_trap(const Node *n) {
  // Don't do it for natives, adapters, or runtime stubs
  Compile *C = Compile::current();
  if (!C->is_method_compilation()) return false;

  assert(n->is_If(), "You should only call this on if nodes.");
  IfNode *ifn = n->as_If();

  Node *ifFalse = NULL;
  for (DUIterator_Fast imax, i = ifn->fast_outs(imax); i < imax; i++) {
    if (ifn->fast_out(i)->is_IfFalse()) {
      ifFalse = ifn->fast_out(i);
      break;
    }
  }
  assert(ifFalse, "An If should have an ifFalse. Graph is broken.");

  Node *reg = ifFalse;
  int cnt = 4; // We must protect against cycles.  Limit to 4 iterations.
               // Alternatively use visited set?  Seems too expensive.
  while (reg != NULL && cnt > 0) {
    CallNode *call = NULL;
    RegionNode *nxt_reg = NULL;
    for (DUIterator_Fast imax, i = reg->fast_outs(imax); i < imax; i++) {
      Node *o = reg->fast_out(i);
      if (o->is_Call()) {
        call = o->as_Call();
      }
      if (o->is_Region()) {
        nxt_reg = o->as_Region();
      }
    }

    if (call &&
        call->entry_point() == SharedRuntime::uncommon_trap_blob()->entry_point()) {
      const Type* trtype = call->in(TypeFunc::Parms)->bottom_type();
      if (trtype->isa_int() && trtype->is_int()->is_con()) {
        jint tr_con = trtype->is_int()->get_con();
        Deoptimization::DeoptReason reason = Deoptimization::trap_request_reason(tr_con);
        Deoptimization::DeoptAction action = Deoptimization::trap_request_action(tr_con);
        assert((int)reason < (int)BitsPerInt, "recode bit map");

        if (is_set_nth_bit(C->allowed_deopt_reasons(), (int)reason)
            && action != Deoptimization::Action_none) {
          // This uncommon trap is sure to recompile, eventually.
          // When that happens, C->too_many_traps will prevent
          // this transformation from happening again.
          return true;
        }
      }
    }

    reg = nxt_reg;
    cnt--;
  }

  return false;
}

//=============================================================================
//---------------------------State---------------------------------------------
State::State(void) {
#ifdef ASSERT
  _id = 0;
  _kids[0] = _kids[1] = (State*)(intptr_t) CONST64(0xcafebabecafebabe);
  _leaf = (Node*)(intptr_t) CONST64(0xbaadf00dbaadf00d);
  //memset(_cost, -1, sizeof(_cost));
  //memset(_rule, -1, sizeof(_rule));
#endif
  memset(_valid, 0, sizeof(_valid));
}

#ifdef ASSERT
State::~State() {
  _id = 99;
  _kids[0] = _kids[1] = (State*)(intptr_t) CONST64(0xcafebabecafebabe);
  _leaf = (Node*)(intptr_t) CONST64(0xbaadf00dbaadf00d);
  memset(_cost, -3, sizeof(_cost));
  memset(_rule, -3, sizeof(_rule));
}
#endif

#ifndef PRODUCT
//---------------------------dump----------------------------------------------
void State::dump() {
  tty->print("\n");
  dump(0);
}

void State::dump(int depth) {
  for( int j = 0; j < depth; j++ )
    tty->print("   ");
  tty->print("--N: ");
  _leaf->dump();
  uint i;
  for( i = 0; i < _LAST_MACH_OPER; i++ )
    // Check for valid entry
    if( valid(i) ) {
      for( int j = 0; j < depth; j++ )
        tty->print("   ");
        assert(_cost[i] != max_juint, "cost must be a valid value");
        assert(_rule[i] < _last_Mach_Node, "rule[i] must be valid rule");
        tty->print_cr("%s  %d  %s",
                      ruleName[i], _cost[i], ruleName[_rule[i]] );
      }
  tty->cr();

  for( i=0; i<2; i++ )
    if( _kids[i] )
      _kids[i]->dump(depth+1);
}
#endif
C:\hotspot-69087d08d473\src\share\vm/opto/matcher.hpp
/*
 * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 *
 */

#ifndef SHARE_VM_OPTO_MATCHER_HPP
#define SHARE_VM_OPTO_MATCHER_HPP

#include "libadt/vectset.hpp"
#include "memory/resourceArea.hpp"
#include "opto/node.hpp"
#include "opto/phaseX.hpp"
#include "opto/regmask.hpp"

class Compile;
class Node;
class MachNode;
class MachTypeNode;
class MachOper;

//---------------------------Matcher-------------------------------------------
class Matcher : public PhaseTransform {
  friend class VMStructs;
  // Private arena of State objects
  ResourceArea _states_arena;

  VectorSet   _visited;         // Visit bits

  // Used to control the Label pass
  VectorSet   _shared;          // Shared Ideal Node
  VectorSet   _dontcare;        // Nothing the matcher cares about

  // Private methods which perform the actual matching and reduction
  // Walks the label tree, generating machine nodes
  MachNode *ReduceInst( State *s, int rule, Node *&mem);
  void ReduceInst_Chain_Rule( State *s, int rule, Node *&mem, MachNode *mach);
  uint ReduceInst_Interior(State *s, int rule, Node *&mem, MachNode *mach, uint num_opnds);
  void ReduceOper( State *s, int newrule, Node *&mem, MachNode *mach );

  // If this node already matched using "rule", return the MachNode for it.
  MachNode* find_shared_node(Node* n, uint rule);

  // Convert a dense opcode number to an expanded rule number
  const int *_reduceOp;
  const int *_leftOp;
  const int *_rightOp;

  // Map dense opcode number to info on when rule is swallowed constant.
  const bool *_swallowed;

  // Map dense rule number to determine if this is an instruction chain rule
  const uint _begin_inst_chain_rule;
  const uint _end_inst_chain_rule;

  // We want to clone constants and possible CmpI-variants.
  // If we do not clone CmpI, then we can have many instances of
  // condition codes alive at once.  This is OK on some chips and
  // bad on others.  Hence the machine-dependent table lookup.
  const char *_must_clone;

  // Find shared Nodes, or Nodes that otherwise are Matcher roots
  void find_shared( Node *n );
#ifdef X86
  bool is_bmi_pattern(Node *n, Node *m);
#endif

  // Debug and profile information for nodes in old space:
  GrowableArray<Node_Notes*>* _old_node_note_array;

  // Node labeling iterator for instruction selection
  Node *Label_Root( const Node *n, State *svec, Node *control, const Node *mem );

  Node *transform( Node *dummy );

  Node_List _projection_list;        // For Machine nodes killing many values

  Node_Array _shared_nodes;

  debug_only(Node_Array _old2new_map;)   // Map roots of ideal-trees to machine-roots
  debug_only(Node_Array _new2old_map;)   // Maps machine nodes back to ideal

  // Accessors for the inherited field PhaseTransform::_nodes:
  void   grow_new_node_array(uint idx_limit) {
    _nodes.map(idx_limit-1, NULL);
  }
  bool    has_new_node(const Node* n) const {
    return _nodes.at(n->_idx) != NULL;
  }
  Node*       new_node(const Node* n) const {
    assert(has_new_node(n), "set before get");
    return _nodes.at(n->_idx);
  }
  void    set_new_node(const Node* n, Node *nn) {
    assert(!has_new_node(n), "set only once");
    _nodes.map(n->_idx, nn);
  }

#ifdef ASSERT
  // Make sure only new nodes are reachable from this node
  void verify_new_nodes_only(Node* root);

  Node* _mem_node;   // Ideal memory node consumed by mach node
#endif

  // Mach node for ConP #NULL
  MachNode* _mach_null;

public:
  int LabelRootDepth;
  // Convert ideal machine register to a register mask for spill-loads
  static const RegMask *idealreg2regmask[];
  RegMask *idealreg2spillmask  [_last_machine_leaf];
  RegMask *idealreg2debugmask  [_last_machine_leaf];
  RegMask *idealreg2mhdebugmask[_last_machine_leaf];
  void init_spill_mask( Node *ret );
  // Convert machine register number to register mask
  static uint mreg2regmask_max;
  static RegMask mreg2regmask[];
  static RegMask STACK_ONLY_mask;

  MachNode* mach_null() const { return _mach_null; }

  bool    is_shared( Node *n ) { return _shared.test(n->_idx) != 0; }
  void   set_shared( Node *n ) {  _shared.set(n->_idx); }
  bool   is_visited( Node *n ) { return _visited.test(n->_idx) != 0; }
  void  set_visited( Node *n ) { _visited.set(n->_idx); }
  bool  is_dontcare( Node *n ) { return _dontcare.test(n->_idx) != 0; }
  void set_dontcare( Node *n ) {  _dontcare.set(n->_idx); }

  // Mode bit to tell DFA and expand rules whether we are running after
  // (or during) register selection.  Usually, the matcher runs before,
  // but it will also get called to generate post-allocation spill code.
  // In this situation, it is a deadly error to attempt to allocate more
  // temporary registers.
  bool _allocation_started;

  // Machine register names
  static const char *regName[];
  // Machine register encodings
  static const unsigned char _regEncode[];
  // Machine Node names
  const char **_ruleName;
  // Rules that are cheaper to rematerialize than to spill
  static const uint _begin_rematerialize;
  static const uint _end_rematerialize;

  // An array of chars, from 0 to _last_Mach_Reg.
  // No Save       = 'N' (for register windows)
  // Save on Entry = 'E'
  // Save on Call  = 'C'
  // Always Save   = 'A' (same as SOE + SOC)
  const char *_register_save_policy;
  const char *_c_reg_save_policy;
  // Convert a machine register to a machine register type, so-as to
  // properly match spill code.
  const int *_register_save_type;
  // Maps from machine register to boolean; true if machine register can
  // be holding a call argument in some signature.
  static bool can_be_java_arg( int reg );
  // Maps from machine register to boolean; true if machine register holds
  // a spillable argument.
  static bool is_spillable_arg( int reg );

  // List of IfFalse or IfTrue Nodes that indicate a taken null test.
  // List is valid in the post-matching space.
  Node_List _null_check_tests;
  void collect_null_checks( Node *proj, Node *orig_proj );
  void validate_null_checks( );

  Matcher();

  // Get a projection node at position pos
  Node* get_projection(uint pos) {
    return _projection_list[pos];
  }

  // Push a projection node onto the projection list
  void push_projection(Node* node) {
    _projection_list.push(node);
  }

  Node* pop_projection() {
    return _projection_list.pop();
  }

  // Number of nodes in the projection list
  uint number_of_projections() const {
    return _projection_list.size();
  }

  // Select instructions for entire method
  void match();

  // Helper for match
  OptoReg::Name warp_incoming_stk_arg( VMReg reg );

  // Transform, then walk.  Does implicit DCE while walking.
  // Name changed from "transform" to avoid it being virtual.
  Node *xform( Node *old_space_node, int Nodes );

  // Match a single Ideal Node - turn it into a 1-Node tree; Label & Reduce.
  MachNode *match_tree( const Node *n );
  MachNode *match_sfpt( SafePointNode *sfpt );
  // Helper for match_sfpt
  OptoReg::Name warp_outgoing_stk_arg( VMReg reg, OptoReg::Name begin_out_arg_area, OptoReg::Name &out_arg_limit_per_call );

  // Initialize first stack mask and related masks.
  void init_first_stack_mask();

  // If we should save-on-entry this register
  bool is_save_on_entry( int reg );

  // Fixup the save-on-entry registers
  void Fixup_Save_On_Entry( );

  // --- Frame handling ---

  // Register number of the stack slot corresponding to the incoming SP.
  // Per the Big Picture in the AD file, it is:
  //   SharedInfo::stack0 + locks + in_preserve_stack_slots + pad2.
  OptoReg::Name _old_SP;

  // Register number of the stack slot corresponding to the highest incoming
  // argument on the stack.  Per the Big Picture in the AD file, it is:
  //   _old_SP + out_preserve_stack_slots + incoming argument size.
  OptoReg::Name _in_arg_limit;

  // Register number of the stack slot corresponding to the new SP.
  // Per the Big Picture in the AD file, it is:
  //   _in_arg_limit + pad0
  OptoReg::Name _new_SP;

  // Register number of the stack slot corresponding to the highest outgoing
  // argument on the stack.  Per the Big Picture in the AD file, it is:
  //   _new_SP + max outgoing arguments of all calls
  OptoReg::Name _out_arg_limit;

  OptoRegPair *_parm_regs;        // Array of machine registers per argument
  RegMask *_calling_convention_mask; // Array of RegMasks per argument

  // Does matcher have a match rule for this ideal node?
  static const bool has_match_rule(int opcode);
  static const bool _hasMatchRule[_last_opcode];

  // Does matcher have a match rule for this ideal node and is the
  // predicate (if there is one) true?
  // NOTE: If this function is used more commonly in the future, ADLC
  // should generate this one.
  static const bool match_rule_supported(int opcode);

  // Used to determine if we have fast l2f conversion
  // USII has it, USIII doesn't
  static const bool convL2FSupported(void);

  // Vector width in bytes
  static const int vector_width_in_bytes(BasicType bt);

  // Limits on vector size (number of elements).
  static const int max_vector_size(const BasicType bt);
  static const int min_vector_size(const BasicType bt);
  static const bool vector_size_supported(const BasicType bt, int size) {
    return (Matcher::max_vector_size(bt) >= size &&
            Matcher::min_vector_size(bt) <= size);
  }

  // Vector ideal reg
  static const uint vector_ideal_reg(int len);
  static const uint vector_shift_count_ideal_reg(int len);

  // CPU supports misaligned vectors store/load.
  static const bool misaligned_vectors_ok();

  // Should original key array reference be passed to AES stubs
  static const bool pass_original_key_for_aes();

  // Used to determine a "low complexity" 64-bit constant.  (Zero is simple.)
  // The standard of comparison is one (StoreL ConL) vs. two (StoreI ConI).
  // Depends on the details of 64-bit constant generation on the CPU.
  static const bool isSimpleConstant64(jlong con);

  // These calls are all generated by the ADLC

  // TRUE - grows up, FALSE - grows down (Intel)
  virtual bool stack_direction() const;

  // Java-Java calling convention
  // (what you use when Java calls Java)

  // Alignment of stack in bytes, standard Intel word alignment is 4.
  // Sparc probably wants at least double-word (8).
  static uint stack_alignment_in_bytes();
  // Alignment of stack, measured in stack slots.
  // The size of stack slots is defined by VMRegImpl::stack_slot_size.
  static uint stack_alignment_in_slots() {
    return stack_alignment_in_bytes() / (VMRegImpl::stack_slot_size);
  }

  // Array mapping arguments to registers.  Argument 0 is usually the 'this'
  // pointer.  Registers can include stack-slots and regular registers.
  static void calling_convention( BasicType *, VMRegPair *, uint len, bool is_outgoing );

  // Convert a sig into a calling convention register layout
  // and find interesting things about it.
  static OptoReg::Name  find_receiver( bool is_outgoing );
  // Return address register.  On Intel it is a stack-slot.  On PowerPC
  // it is the Link register.  On Sparc it is r31?
  virtual OptoReg::Name return_addr() const;
  RegMask              _return_addr_mask;
  // Return value register.  On Intel it is EAX.  On Sparc i0/o0.
  static OptoRegPair   return_value(uint ideal_reg, bool is_outgoing);
  static OptoRegPair c_return_value(uint ideal_reg, bool is_outgoing);
  RegMask                     _return_value_mask;
  // Inline Cache Register
  static OptoReg::Name  inline_cache_reg();
  static int            inline_cache_reg_encode();

  // Register for DIVI projection of divmodI
  static RegMask divI_proj_mask();
  // Register for MODI projection of divmodI
  static RegMask modI_proj_mask();

  // Register for DIVL projection of divmodL
  static RegMask divL_proj_mask();
  // Register for MODL projection of divmodL
  static RegMask modL_proj_mask();

  // Use hardware DIV instruction when it is faster than
  // a code which use multiply for division by constant.
  static bool use_asm_for_ldiv_by_con( jlong divisor );

  static const RegMask method_handle_invoke_SP_save_mask();

  // Java-Interpreter calling convention
  // (what you use when calling between compiled-Java and Interpreted-Java

  // Number of callee-save + always-save registers
  // Ignores frame pointer and "special" registers
  static int  number_of_saved_registers();

  // The Method-klass-holder may be passed in the inline_cache_reg
  // and then expanded into the inline_cache_reg and a method_oop register

  static OptoReg::Name  interpreter_method_oop_reg();
  static int            interpreter_method_oop_reg_encode();

  static OptoReg::Name  compiler_method_oop_reg();
  static const RegMask &compiler_method_oop_reg_mask();
  static int            compiler_method_oop_reg_encode();

  // Interpreter's Frame Pointer Register
  static OptoReg::Name  interpreter_frame_pointer_reg();

  // Java-Native calling convention
  // (what you use when intercalling between Java and C++ code)

  // Array mapping arguments to registers.  Argument 0 is usually the 'this'
  // pointer.  Registers can include stack-slots and regular registers.
  static void c_calling_convention( BasicType*, VMRegPair *, uint );
  // Frame pointer. The frame pointer is kept at the base of the stack
  // and so is probably the stack pointer for most machines.  On Intel
  // it is ESP.  On the PowerPC it is R1.  On Sparc it is SP.
  OptoReg::Name  c_frame_pointer() const;
  static RegMask c_frame_ptr_mask;

  // !!!!! Special stuff for building ScopeDescs
  virtual int      regnum_to_fpu_offset(int regnum);

  // Is this branch offset small enough to be addressed by a short branch?
  bool is_short_branch_offset(int rule, int br_size, int offset);

  // Optional scaling for the parameter to the ClearArray/CopyArray node.
  static const bool init_array_count_is_in_bytes;

  // Threshold small size (in bytes) for a ClearArray/CopyArray node.
  // Anything this size or smaller may get converted to discrete scalar stores.
  static const int init_array_short_size;

  // Some hardware needs 2 CMOV's for longs.
  static const int long_cmove_cost();

  // Some hardware have expensive CMOV for float and double.
  static const int float_cmove_cost();

  // Should the Matcher clone shifts on addressing modes, expecting them to
  // be subsumed into complex addressing expressions or compute them into
  // registers?  True for Intel but false for most RISCs
  static const bool clone_shift_expressions;

  static bool narrow_oop_use_complex_address();
  static bool narrow_klass_use_complex_address();

  // Generate implicit null check for narrow oops if it can fold
  // into address expression (x64).
  //
  // [R12 + narrow_oop_reg<<3 + offset] // fold into address expression
  // NullCheck narrow_oop_reg
  //
  // When narrow oops can't fold into address expression (Sparc) and
  // base is not null use decode_not_null and normal implicit null check.
  // Note, decode_not_null node can be used here since it is referenced
  // only on non null path but it requires special handling, see
  // collect_null_checks():
  //
  // decode_not_null narrow_oop_reg, oop_reg // 'shift' and 'add base'
  // [oop_reg + offset]
  // NullCheck oop_reg
  //
  // With Zero base and when narrow oops can not fold into address
  // expression use normal implicit null check since only shift
  // is needed to decode narrow oop.
  //
  // decode narrow_oop_reg, oop_reg // only 'shift'
  // [oop_reg + offset]
  // NullCheck oop_reg
  //
  inline static bool gen_narrow_oop_implicit_null_checks() {
    return Universe::narrow_oop_use_implicit_null_checks() &&
           (narrow_oop_use_complex_address() ||
            Universe::narrow_oop_base() != NULL);
  }

  // Is it better to copy float constants, or load them directly from memory?
  // Intel can load a float constant from a direct address, requiring no
  // extra registers.  Most RISCs will have to materialize an address into a
  // register first, so they may as well materialize the constant immediately.
  static const bool rematerialize_float_constants;

  // If CPU can load and store mis-aligned doubles directly then no fixup is
  // needed.  Else we split the double into 2 integer pieces and move it
  // piece-by-piece.  Only happens when passing doubles into C code or when
  // calling i2c adapters as the Java calling convention forces doubles to be
  // aligned.
  static const bool misaligned_doubles_ok;

  // Does the CPU require postalloc expand (see block.cpp for description of
  // postalloc expand)?
  static const bool require_postalloc_expand;

  // Perform a platform dependent implicit null fixup.  This is needed
  // on windows95 to take care of some unusual register constraints.
  void pd_implicit_null_fixup(MachNode *load, uint idx);

  // Advertise here if the CPU requires explicit rounding operations
  // to implement the UseStrictFP mode.
  static const bool strict_fp_requires_explicit_rounding;

  // Are floats conerted to double when stored to stack during deoptimization?
  static bool float_in_double();
  // Do ints take an entire long register or just half?
  static const bool int_in_long;

  // Do the processor's shift instructions only use the low 5/6 bits
  // of the count for 32/64 bit ints? If not we need to do the masking
  // ourselves.
  static const bool need_masked_shift_count;

  // This routine is run whenever a graph fails to match.
  // If it returns, the compiler should bailout to interpreter without error.
  // In non-product mode, SoftMatchFailure is false to detect non-canonical
  // graphs.  Print a message and exit.
  static void soft_match_failure() {
    if( SoftMatchFailure ) return;
    else { fatal("SoftMatchFailure is not allowed except in product"); }
  }

  // Check for a following volatile memory barrier without an
  // intervening load and thus we don't need a barrier here.  We
  // retain the Node to act as a compiler ordering barrier.
  static bool post_store_load_barrier(const Node* mb);

  // Does n lead to an uncommon trap that can cause deoptimization?
  static bool branches_to_uncommon_trap(const Node *n);

#ifdef ASSERT
  void dump_old2new_map();      // machine-independent to machine-dependent

  Node* find_old_node(Node* new_node) {
    return _new2old_map[new_node->_idx];
  }
#endif
};

#endif // SHARE_VM_OPTO_MATCHER_HPP
C:\hotspot-69087d08d473\src\share\vm/opto/mathexactnode.cpp
/*
 * Copyright (c) 2013, 2018, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 *
 */

#include "precompiled.hpp"
#include "memory/allocation.inline.hpp"
#include "opto/addnode.hpp"
#include "opto/cfgnode.hpp"
#include "opto/machnode.hpp"
#include "opto/matcher.hpp"
#include "opto/mathexactnode.hpp"
#include "opto/subnode.hpp"

template <typename OverflowOp>
class AddHelper {
public:
  typedef typename OverflowOp::TypeClass TypeClass;
  typedef typename TypeClass::NativeType NativeType;

  static bool will_overflow(NativeType value1, NativeType value2) {
    NativeType result = value1 + value2;
    // Hacker's Delight 2-12 Overflow if both arguments have the opposite sign of the result
    if (((value1 ^ result) & (value2 ^ result)) >= 0) {
      return false;
    }
    return true;
  }

  static bool can_overflow(const Type* type1, const Type* type2) {
    if (type1 == TypeClass::ZERO || type2 == TypeClass::ZERO) {
      return false;
    }
    return true;
  }
};

template <typename OverflowOp>
class SubHelper {
public:
  typedef typename OverflowOp::TypeClass TypeClass;
  typedef typename TypeClass::NativeType NativeType;

  static bool will_overflow(NativeType value1, NativeType value2) {
    NativeType result = value1 - value2;
    // hacker's delight 2-12 overflow iff the arguments have different signs and
    // the sign of the result is different than the sign of arg1
    if (((value1 ^ value2) & (value1 ^ result)) >= 0) {
      return false;
    }
    return true;
  }

  static bool can_overflow(const Type* type1, const Type* type2) {
    if (type2 == TypeClass::ZERO) {
      return false;
    }
    return true;
  }
};

template <typename OverflowOp>
class MulHelper {
public:
  typedef typename OverflowOp::TypeClass TypeClass;

  static bool can_overflow(const Type* type1, const Type* type2) {
    if (type1 == TypeClass::ZERO || type2 == TypeClass::ZERO) {
      return false;
    } else if (type1 == TypeClass::ONE || type2 == TypeClass::ONE) {
      return false;
    }
    return true;
  }
};

bool OverflowAddINode::will_overflow(jint v1, jint v2) const {
  return AddHelper<OverflowAddINode>::will_overflow(v1, v2);
}

bool OverflowSubINode::will_overflow(jint v1, jint v2) const {
  return SubHelper<OverflowSubINode>::will_overflow(v1, v2);
}

bool OverflowMulINode::will_overflow(jint v1, jint v2) const {
    jlong result = (jlong) v1 * (jlong) v2;
    if ((jint) result == result) {
      return false;
    }
    return true;
}

bool OverflowAddLNode::will_overflow(jlong v1, jlong v2) const {
  return AddHelper<OverflowAddLNode>::will_overflow(v1, v2);
}

bool OverflowSubLNode::will_overflow(jlong v1, jlong v2) const {
  return SubHelper<OverflowSubLNode>::will_overflow(v1, v2);
}

bool OverflowMulLNode::is_overflow(jlong val1, jlong val2) {
    // x * { 0, 1 } will never overflow. Even for x = min_jlong
    if (val1 == 0 || val2 == 0 || val1 == 1 || val2 == 1) {
      return false;
    }

    // x * min_jlong for x not in { 0, 1 } overflows
    // even -1 as -1 * min_jlong is an overflow
    if (val1 == min_jlong || val2 == min_jlong) {
      return true;
    }

    // if (x * y) / y == x there is no overflow
    //
    // the multiplication here is done as unsigned to avoid undefined behaviour which
    // can be used by the compiler to assume that the check further down (result / val2 != val1)
    // is always false and breaks the overflow check
    julong v1 = (julong) val1;
    julong v2 = (julong) val2;
    julong tmp = v1 * v2;
    jlong result = (jlong) tmp;

    if (result / val2 != val1) {
      return true;
    }

    return false;
}

bool OverflowAddINode::can_overflow(const Type* t1, const Type* t2) const {
  return AddHelper<OverflowAddINode>::can_overflow(t1, t2);
}

bool OverflowSubINode::can_overflow(const Type* t1, const Type* t2) const {
  if (in(1) == in(2)) {
    return false;
  }
  return SubHelper<OverflowSubINode>::can_overflow(t1, t2);
}

bool OverflowMulINode::can_overflow(const Type* t1, const Type* t2) const {
  return MulHelper<OverflowMulINode>::can_overflow(t1, t2);
}

bool OverflowAddLNode::can_overflow(const Type* t1, const Type* t2) const {
  return AddHelper<OverflowAddLNode>::can_overflow(t1, t2);
}

bool OverflowSubLNode::can_overflow(const Type* t1, const Type* t2) const {
  if (in(1) == in(2)) {
    return false;
  }
  return SubHelper<OverflowSubLNode>::can_overflow(t1, t2);
}

bool OverflowMulLNode::can_overflow(const Type* t1, const Type* t2) const {
  return MulHelper<OverflowMulLNode>::can_overflow(t1, t2);
}

const Type* OverflowNode::sub(const Type* t1, const Type* t2) const {
  fatal(err_msg_res("sub() should not be called for '%s'", NodeClassNames[this->Opcode()]));
  return TypeInt::CC;
}

template <typename OverflowOp>
struct IdealHelper {
  typedef typename OverflowOp::TypeClass TypeClass; // TypeInt, TypeLong
  typedef typename TypeClass::NativeType NativeType;

  static Node* Ideal(const OverflowOp* node, PhaseGVN* phase, bool can_reshape) {
    Node* arg1 = node->in(1);
    Node* arg2 = node->in(2);
    const Type* type1 = phase->type(arg1);
    const Type* type2 = phase->type(arg2);

    if (type1 == NULL || type2 == NULL) {
      return NULL;
    }

    if (type1 != Type::TOP && type1->singleton() &&
        type2 != Type::TOP && type2->singleton()) {
      NativeType val1 = TypeClass::as_self(type1)->get_con();
      NativeType val2 = TypeClass::as_self(type2)->get_con();
      if (node->will_overflow(val1, val2) == false) {
        Node* con_result = ConINode::make(phase->C, 0);
        return con_result;
      }
      return NULL;
    }
    return NULL;
  }

  static const Type* Value(const OverflowOp* node, PhaseTransform* phase) {
    const Type *t1 = phase->type( node->in(1) );
    const Type *t2 = phase->type( node->in(2) );
    if( t1 == Type::TOP ) return Type::TOP;
    if( t2 == Type::TOP ) return Type::TOP;

    const TypeClass* i1 = TypeClass::as_self(t1);
    const TypeClass* i2 = TypeClass::as_self(t2);

    if (i1 == NULL || i2 == NULL) {
      return TypeInt::CC;
    }

    if (t1->singleton() && t2->singleton()) {
      NativeType val1 = i1->get_con();
      NativeType val2 = i2->get_con();
      if (node->will_overflow(val1, val2)) {
        return TypeInt::CC;
      }
      return TypeInt::ZERO;
    } else if (i1 != TypeClass::TYPE_DOMAIN && i2 != TypeClass::TYPE_DOMAIN) {
      if (node->will_overflow(i1->_lo, i2->_lo)) {
        return TypeInt::CC;
      } else if (node->will_overflow(i1->_lo, i2->_hi)) {
        return TypeInt::CC;
      } else if (node->will_overflow(i1->_hi, i2->_lo)) {
        return TypeInt::CC;
      } else if (node->will_overflow(i1->_hi, i2->_hi)) {
        return TypeInt::CC;
      }
      return TypeInt::ZERO;
    }

    if (!node->can_overflow(t1, t2)) {
      return TypeInt::ZERO;
    }
    return TypeInt::CC;
  }
};

Node* OverflowINode::Ideal(PhaseGVN* phase, bool can_reshape) {
  return IdealHelper<OverflowINode>::Ideal(this, phase, can_reshape);
}

Node* OverflowLNode::Ideal(PhaseGVN* phase, bool can_reshape) {
  return IdealHelper<OverflowLNode>::Ideal(this, phase, can_reshape);
}

const Type* OverflowINode::Value(PhaseTransform* phase) const {
  return IdealHelper<OverflowINode>::Value(this, phase);
}

const Type* OverflowLNode::Value(PhaseTransform* phase) const {
  return IdealHelper<OverflowLNode>::Value(this, phase);
}
C:\hotspot-69087d08d473\src\share\vm/opto/mathexactnode.hpp
/*
 * Copyright (c) 2013, 2018, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 *
 */

#ifndef SHARE_VM_OPTO_MATHEXACTNODE_HPP
#define SHARE_VM_OPTO_MATHEXACTNODE_HPP

#include "opto/multnode.hpp"
#include "opto/node.hpp"
#include "opto/addnode.hpp"
#include "opto/subnode.hpp"
#include "opto/type.hpp"

class PhaseGVN;
class PhaseTransform;

class OverflowNode : public CmpNode {
public:
  OverflowNode(Node* in1, Node* in2) : CmpNode(in1, in2) {}

  virtual uint ideal_reg() const { return Op_RegFlags; }
  virtual const Type* sub(const Type* t1, const Type* t2) const;
};

class OverflowINode : public OverflowNode {
public:
  typedef TypeInt TypeClass;

  OverflowINode(Node* in1, Node* in2) : OverflowNode(in1, in2) {}
  virtual Node* Ideal(PhaseGVN* phase, bool can_reshape);
  virtual const Type* Value(PhaseTransform* phase) const;

  virtual bool will_overflow(jint v1, jint v2) const = 0;
  virtual bool can_overflow(const Type* t1, const Type* t2) const = 0;
};


class OverflowLNode : public OverflowNode {
public:
  typedef TypeLong TypeClass;

  OverflowLNode(Node* in1, Node* in2) : OverflowNode(in1, in2) {}
  virtual Node* Ideal(PhaseGVN* phase, bool can_reshape);
  virtual const Type* Value(PhaseTransform* phase) const;

  virtual bool will_overflow(jlong v1, jlong v2) const = 0;
  virtual bool can_overflow(const Type* t1, const Type* t2) const = 0;
};

class OverflowAddINode : public OverflowINode {
public:
  typedef AddINode MathOp;

  OverflowAddINode(Node* in1, Node* in2) : OverflowINode(in1, in2) {}
  virtual int Opcode() const;

  virtual bool will_overflow(jint v1, jint v2) const;
  virtual bool can_overflow(const Type* t1, const Type* t2) const;
};

class OverflowSubINode : public OverflowINode {
public:
  typedef SubINode MathOp;

  OverflowSubINode(Node* in1, Node* in2) : OverflowINode(in1, in2) {}
  virtual int Opcode() const;

  virtual bool will_overflow(jint v1, jint v2) const;
  virtual bool can_overflow(const Type* t1, const Type* t2) const;
};

class OverflowMulINode : public OverflowINode {
public:
  typedef MulINode MathOp;

  OverflowMulINode(Node* in1, Node* in2) : OverflowINode(in1, in2) {}
  virtual int Opcode() const;

  virtual bool will_overflow(jint v1, jint v2) const;
  virtual bool can_overflow(const Type* t1, const Type* t2) const;
};

class OverflowAddLNode : public OverflowLNode {
public:
  typedef AddLNode MathOp;

  OverflowAddLNode(Node* in1, Node* in2) : OverflowLNode(in1, in2) {}
  virtual int Opcode() const;

  virtual bool will_overflow(jlong v1, jlong v2) const;
  virtual bool can_overflow(const Type* t1, const Type* t2) const;
};

class OverflowSubLNode : public OverflowLNode {
public:
  typedef SubLNode MathOp;

  OverflowSubLNode(Node* in1, Node* in2) : OverflowLNode(in1, in2) {}
  virtual int Opcode() const;

  virtual bool will_overflow(jlong v1, jlong v2) const;
  virtual bool can_overflow(const Type* t1, const Type* t2) const;
};

class OverflowMulLNode : public OverflowLNode {
public:
  typedef MulLNode MathOp;

  OverflowMulLNode(Node* in1, Node* in2) : OverflowLNode(in1, in2) {}
  virtual int Opcode() const;

  virtual bool will_overflow(jlong v1, jlong v2) const { return is_overflow(v1, v2); }
  virtual bool can_overflow(const Type* t1, const Type* t2) const;

  static bool is_overflow(jlong v1, jlong v2);
};

#endif
C:\hotspot-69087d08d473\src\share\vm/opto/memnode.cpp
/*
 * Copyright (c) 1997, 2017, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 *
 */

#include "precompiled.hpp"
#include "classfile/systemDictionary.hpp"
#include "compiler/compileLog.hpp"
#include "memory/allocation.inline.hpp"
#include "oops/objArrayKlass.hpp"
#include "opto/addnode.hpp"
#include "opto/cfgnode.hpp"
#include "opto/compile.hpp"
#include "opto/connode.hpp"
#include "opto/loopnode.hpp"
#include "opto/machnode.hpp"
#include "opto/matcher.hpp"
#include "opto/memnode.hpp"
#include "opto/mulnode.hpp"
#include "opto/phaseX.hpp"
#include "opto/regmask.hpp"

// Portions of code courtesy of Clifford Click

// Optimization - Graph Style

static Node *step_through_mergemem(PhaseGVN *phase, MergeMemNode *mmem,  const TypePtr *tp, const TypePtr *adr_check, outputStream *st);

//=============================================================================
uint MemNode::size_of() const { return sizeof(*this); }

const TypePtr *MemNode::adr_type() const {
  Node* adr = in(Address);
  const TypePtr* cross_check = NULL;
  DEBUG_ONLY(cross_check = _adr_type);
  return calculate_adr_type(adr->bottom_type(), cross_check);
}

bool MemNode::check_if_adr_maybe_raw(Node* adr) {
  if (adr != NULL) {
    if (adr->bottom_type()->base() == Type::RawPtr || adr->bottom_type()->base() == Type::AnyPtr) {
      return true;
    }
  }
  return false;
}

#ifndef PRODUCT
void MemNode::dump_spec(outputStream *st) const {
  if (in(Address) == NULL)  return; // node is dead
#ifndef ASSERT
  // fake the missing field
  const TypePtr* _adr_type = NULL;
  if (in(Address) != NULL)
    _adr_type = in(Address)->bottom_type()->isa_ptr();
#endif
  dump_adr_type(this, _adr_type, st);

  Compile* C = Compile::current();
  if (C->alias_type(_adr_type)->is_volatile()) {
    st->print(" Volatile!");
  }
  if (_unaligned_access) {
    st->print(" unaligned");
  }
  if (_mismatched_access) {
    st->print(" mismatched");
  }
}

void MemNode::dump_adr_type(const Node* mem, const TypePtr* adr_type, outputStream *st) {
  st->print(" @");
  if (adr_type == NULL) {
    st->print("NULL");
  } else {
    adr_type->dump_on(st);
    Compile* C = Compile::current();
    Compile::AliasType* atp = NULL;
    if (C->have_alias_type(adr_type))  atp = C->alias_type(adr_type);
    if (atp == NULL)
      st->print(", idx=?\?;");
    else if (atp->index() == Compile::AliasIdxBot)
      st->print(", idx=Bot;");
    else if (atp->index() == Compile::AliasIdxTop)
      st->print(", idx=Top;");
    else if (atp->index() == Compile::AliasIdxRaw)
      st->print(", idx=Raw;");
    else {
      ciField* field = atp->field();
      if (field) {
        st->print(", name=");
        field->print_name_on(st);
      }
      st->print(", idx=%d;", atp->index());
    }
  }
}

extern void print_alias_types();

#endif

Node *MemNode::optimize_simple_memory_chain(Node *mchain, const TypeOopPtr *t_oop, Node *load, PhaseGVN *phase) {
  assert((t_oop != NULL), "sanity");
  bool is_instance = t_oop->is_known_instance_field();
  bool is_boxed_value_load = t_oop->is_ptr_to_boxed_value() &&
                             (load != NULL) && load->is_Load() &&
                             (phase->is_IterGVN() != NULL);
  if (!(is_instance || is_boxed_value_load))
    return mchain;  // don't try to optimize non-instance types
  uint instance_id = t_oop->instance_id();
  Node *start_mem = phase->C->start()->proj_out(TypeFunc::Memory);
  Node *prev = NULL;
  Node *result = mchain;
  while (prev != result) {
    prev = result;
    if (result == start_mem)
      break;  // hit one of our sentinels
    // skip over a call which does not affect this memory slice
    if (result->is_Proj() && result->as_Proj()->_con == TypeFunc::Memory) {
      Node *proj_in = result->in(0);
      if (proj_in->is_Allocate() && proj_in->_idx == instance_id) {
        break;  // hit one of our sentinels
      } else if (proj_in->is_Call()) {
        CallNode *call = proj_in->as_Call();
        if (!call->may_modify(t_oop, phase)) { // returns false for instances
          result = call->in(TypeFunc::Memory);
        }
      } else if (proj_in->is_Initialize()) {
        AllocateNode* alloc = proj_in->as_Initialize()->allocation();
        // Stop if this is the initialization for the object instance which
        // which contains this memory slice, otherwise skip over it.
        if ((alloc == NULL) || (alloc->_idx == instance_id)) {
          break;
        }
        if (is_instance) {
          result = proj_in->in(TypeFunc::Memory);
        } else if (is_boxed_value_load) {
          Node* klass = alloc->in(AllocateNode::KlassNode);
          const TypeKlassPtr* tklass = phase->type(klass)->is_klassptr();
          if (tklass->klass_is_exact() && !tklass->klass()->equals(t_oop->klass())) {
            result = proj_in->in(TypeFunc::Memory); // not related allocation
          }
        }
      } else if (proj_in->is_MemBar()) {
        result = proj_in->in(TypeFunc::Memory);
      } else {
        assert(false, "unexpected projection");
      }
    } else if (result->is_ClearArray()) {
      if (!is_instance || !ClearArrayNode::step_through(&result, instance_id, phase)) {
        // Can not bypass initialization of the instance
        // we are looking for.
        break;
      }
      // Otherwise skip it (the call updated 'result' value).
    } else if (result->is_MergeMem()) {
      result = step_through_mergemem(phase, result->as_MergeMem(), t_oop, NULL, tty);
    }
  }
  return result;
}

Node *MemNode::optimize_memory_chain(Node *mchain, const TypePtr *t_adr, Node *load, PhaseGVN *phase) {
  const TypeOopPtr* t_oop = t_adr->isa_oopptr();
  if (t_oop == NULL)
    return mchain;  // don't try to optimize non-oop types
  Node* result = optimize_simple_memory_chain(mchain, t_oop, load, phase);
  bool is_instance = t_oop->is_known_instance_field();
  PhaseIterGVN *igvn = phase->is_IterGVN();
  if (is_instance && igvn != NULL  && result->is_Phi()) {
    PhiNode *mphi = result->as_Phi();
    assert(mphi->bottom_type() == Type::MEMORY, "memory phi required");
    const TypePtr *t = mphi->adr_type();
    if (t == TypePtr::BOTTOM || t == TypeRawPtr::BOTTOM ||
        t->isa_oopptr() && !t->is_oopptr()->is_known_instance() &&
        t->is_oopptr()->cast_to_exactness(true)
         ->is_oopptr()->cast_to_ptr_type(t_oop->ptr())
         ->is_oopptr()->cast_to_instance_id(t_oop->instance_id()) == t_oop) {
      // clone the Phi with our address type
      result = mphi->split_out_instance(t_adr, igvn);
    } else {
      assert(phase->C->get_alias_index(t) == phase->C->get_alias_index(t_adr), "correct memory chain");
    }
  }
  return result;
}

static Node *step_through_mergemem(PhaseGVN *phase, MergeMemNode *mmem,  const TypePtr *tp, const TypePtr *adr_check, outputStream *st) {
  uint alias_idx = phase->C->get_alias_index(tp);
  Node *mem = mmem;
#ifdef ASSERT
  {
    // Check that current type is consistent with the alias index used during graph construction
    assert(alias_idx >= Compile::AliasIdxRaw, "must not be a bad alias_idx");
    bool consistent =  adr_check == NULL || adr_check->empty() ||
                       phase->C->must_alias(adr_check, alias_idx );
    // Sometimes dead array references collapse to a[-1], a[-2], or a[-3]
    if( !consistent && adr_check != NULL && !adr_check->empty() &&
               tp->isa_aryptr() &&        tp->offset() == Type::OffsetBot &&
        adr_check->isa_aryptr() && adr_check->offset() != Type::OffsetBot &&
        ( adr_check->offset() == arrayOopDesc::length_offset_in_bytes() ||
          adr_check->offset() == oopDesc::klass_offset_in_bytes() ||
          adr_check->offset() == oopDesc::mark_offset_in_bytes() ) ) {
      // don't assert if it is dead code.
      consistent = true;
    }
    if( !consistent ) {
      st->print("alias_idx==%d, adr_check==", alias_idx);
      if( adr_check == NULL ) {
        st->print("NULL");
      } else {
        adr_check->dump();
      }
      st->cr();
      print_alias_types();
      assert(consistent, "adr_check must match alias idx");
    }
  }
#endif
  // TypeOopPtr::NOTNULL+any is an OOP with unknown offset - generally
  // means an array I have not precisely typed yet.  Do not do any
  // alias stuff with it any time soon.
  const TypeOopPtr *toop = tp->isa_oopptr();
  if( tp->base() != Type::AnyPtr &&
      !(toop &&
        toop->klass() != NULL &&
        toop->klass()->is_java_lang_Object() &&
        toop->offset() == Type::OffsetBot) ) {
    // compress paths and change unreachable cycles to TOP
    // If not, we can update the input infinitely along a MergeMem cycle
    // Equivalent code in PhiNode::Ideal
    Node* m  = phase->transform(mmem);
    // If transformed to a MergeMem, get the desired slice
    // Otherwise the returned node represents memory for every slice
    mem = (m->is_MergeMem())? m->as_MergeMem()->memory_at(alias_idx) : m;
    // Update input if it is progress over what we have now
  }
  return mem;
}

//--------------------------Ideal_common---------------------------------------
// Look for degenerate control and memory inputs.  Bypass MergeMem inputs.
// Unhook non-raw memories from complete (macro-expanded) initializations.
Node *MemNode::Ideal_common(PhaseGVN *phase, bool can_reshape) {
  // If our control input is a dead region, kill all below the region
  Node *ctl = in(MemNode::Control);
  if (ctl && remove_dead_region(phase, can_reshape))
    return this;
  ctl = in(MemNode::Control);
  // Don't bother trying to transform a dead node
  if (ctl && ctl->is_top())  return NodeSentinel;

  PhaseIterGVN *igvn = phase->is_IterGVN();
  // Wait if control on the worklist.
  if (ctl && can_reshape && igvn != NULL) {
    Node* bol = NULL;
    Node* cmp = NULL;
    if (ctl->in(0)->is_If()) {
      assert(ctl->is_IfTrue() || ctl->is_IfFalse(), "sanity");
      bol = ctl->in(0)->in(1);
      if (bol->is_Bool())
        cmp = ctl->in(0)->in(1)->in(1);
    }
    if (igvn->_worklist.member(ctl) ||
        (bol != NULL && igvn->_worklist.member(bol)) ||
        (cmp != NULL && igvn->_worklist.member(cmp)) ) {
      // This control path may be dead.
      // Delay this memory node transformation until the control is processed.
      phase->is_IterGVN()->_worklist.push(this);
      return NodeSentinel; // caller will return NULL
    }
  }
  // Ignore if memory is dead, or self-loop
  Node *mem = in(MemNode::Memory);
  if (phase->type( mem ) == Type::TOP) return NodeSentinel; // caller will return NULL
  assert(mem != this, "dead loop in MemNode::Ideal");

  if (can_reshape && igvn != NULL && igvn->_worklist.member(mem)) {
    // This memory slice may be dead.
    // Delay this mem node transformation until the memory is processed.
    phase->is_IterGVN()->_worklist.push(this);
    return NodeSentinel; // caller will return NULL
  }

  Node *address = in(MemNode::Address);
  const Type *t_adr = phase->type(address);
  if (t_adr == Type::TOP)              return NodeSentinel; // caller will return NULL

  if (can_reshape && igvn != NULL &&
      (igvn->_worklist.member(address) ||
       igvn->_worklist.size() > 0 && (t_adr != adr_type())) ) {
    // The address's base and type may change when the address is processed.
    // Delay this mem node transformation until the address is processed.
    phase->is_IterGVN()->_worklist.push(this);
    return NodeSentinel; // caller will return NULL
  }

  // Do NOT remove or optimize the next lines: ensure a new alias index
  // is allocated for an oop pointer type before Escape Analysis.
  // Note: C++ will not remove it since the call has side effect.
  if (t_adr->isa_oopptr()) {
    int alias_idx = phase->C->get_alias_index(t_adr->is_ptr());
  }

  Node* base = NULL;
  if (address->is_AddP()) {
    base = address->in(AddPNode::Base);
  }
  if (base != NULL && phase->type(base)->higher_equal(TypePtr::NULL_PTR) &&
      !t_adr->isa_rawptr()) {
    // Note: raw address has TOP base and top->higher_equal(TypePtr::NULL_PTR) is true.
    // Skip this node optimization if its address has TOP base.
    return NodeSentinel; // caller will return NULL
  }

  // Avoid independent memory operations
  Node* old_mem = mem;

  // The code which unhooks non-raw memories from complete (macro-expanded)
  // initializations was removed. After macro-expansion all stores catched
  // by Initialize node became raw stores and there is no information
  // which memory slices they modify. So it is unsafe to move any memory
  // operation above these stores. Also in most cases hooked non-raw memories
  // were already unhooked by using information from detect_ptr_independence()
  // and find_previous_store().

  if (mem->is_MergeMem()) {
    MergeMemNode* mmem = mem->as_MergeMem();
    const TypePtr *tp = t_adr->is_ptr();

    mem = step_through_mergemem(phase, mmem, tp, adr_type(), tty);
  }

  if (mem != old_mem) {
    set_req(MemNode::Memory, mem);
    if (can_reshape && old_mem->outcnt() == 0) {
        igvn->_worklist.push(old_mem);
    }
    if (phase->type( mem ) == Type::TOP) return NodeSentinel;
    return this;
  }

  // let the subclass continue analyzing...
  return NULL;
}

// Helper function for proving some simple control dominations.
// Attempt to prove that all control inputs of 'dom' dominate 'sub'.
// Already assumes that 'dom' is available at 'sub', and that 'sub'
// is not a constant (dominated by the method's StartNode).
// Used by MemNode::find_previous_store to prove that the
// control input of a memory operation predates (dominates)
// an allocation it wants to look past.
bool MemNode::all_controls_dominate(Node* dom, Node* sub) {
  if (dom == NULL || dom->is_top() || sub == NULL || sub->is_top())
    return false; // Conservative answer for dead code

  // Check 'dom'. Skip Proj and CatchProj nodes.
  dom = dom->find_exact_control(dom);
  if (dom == NULL || dom->is_top())
    return false; // Conservative answer for dead code

  if (dom == sub) {
    // For the case when, for example, 'sub' is Initialize and the original
    // 'dom' is Proj node of the 'sub'.
    return false;
  }

  if (dom->is_Con() || dom->is_Start() || dom->is_Root() || dom == sub)
    return true;

  // 'dom' dominates 'sub' if its control edge and control edges
  // of all its inputs dominate or equal to sub's control edge.

  // Currently 'sub' is either Allocate, Initialize or Start nodes.
  // Or Region for the check in LoadNode::Ideal();
  // 'sub' should have sub->in(0) != NULL.
  assert(sub->is_Allocate() || sub->is_Initialize() || sub->is_Start() ||
         sub->is_Region() || sub->is_Call(), "expecting only these nodes");

  // Get control edge of 'sub'.
  Node* orig_sub = sub;
  sub = sub->find_exact_control(sub->in(0));
  if (sub == NULL || sub->is_top())
    return false; // Conservative answer for dead code

  assert(sub->is_CFG(), "expecting control");

  if (sub == dom)
    return true;

  if (sub->is_Start() || sub->is_Root())
    return false;

  {
    // Check all control edges of 'dom'.

    ResourceMark rm;
    Arena* arena = Thread::current()->resource_area();
    Node_List nlist(arena);
    Unique_Node_List dom_list(arena);

    dom_list.push(dom);
    bool only_dominating_controls = false;

    for (uint next = 0; next < dom_list.size(); next++) {
      Node* n = dom_list.at(next);
      if (n == orig_sub)
        return false; // One of dom's inputs dominated by sub.
      if (!n->is_CFG() && n->pinned()) {
        // Check only own control edge for pinned non-control nodes.
        n = n->find_exact_control(n->in(0));
        if (n == NULL || n->is_top())
          return false; // Conservative answer for dead code
        assert(n->is_CFG(), "expecting control");
        dom_list.push(n);
      } else if (n->is_Con() || n->is_Start() || n->is_Root()) {
        only_dominating_controls = true;
      } else if (n->is_CFG()) {
        if (n->dominates(sub, nlist))
          only_dominating_controls = true;
        else
          return false;
      } else {
        // First, own control edge.
        Node* m = n->find_exact_control(n->in(0));
        if (m != NULL) {
          if (m->is_top())
            return false; // Conservative answer for dead code
          dom_list.push(m);
        }
        // Now, the rest of edges.
        uint cnt = n->req();
        for (uint i = 1; i < cnt; i++) {
          m = n->find_exact_control(n->in(i));
          if (m == NULL || m->is_top())
            continue;
          dom_list.push(m);
        }
      }
    }
    return only_dominating_controls;
  }
}

//---------------------detect_ptr_independence---------------------------------
// Used by MemNode::find_previous_store to prove that two base
// pointers are never equal.
// The pointers are accompanied by their associated allocations,
// if any, which have been previously discovered by the caller.
bool MemNode::detect_ptr_independence(Node* p1, AllocateNode* a1,
                                      Node* p2, AllocateNode* a2,
                                      PhaseTransform* phase) {
  // Attempt to prove that these two pointers cannot be aliased.
  // They may both manifestly be allocations, and they should differ.
  // Or, if they are not both allocations, they can be distinct constants.
  // Otherwise, one is an allocation and the other a pre-existing value.
  if (a1 == NULL && a2 == NULL) {           // neither an allocation
    return (p1 != p2) && p1->is_Con() && p2->is_Con();
  } else if (a1 != NULL && a2 != NULL) {    // both allocations
    return (a1 != a2);
  } else if (a1 != NULL) {                  // one allocation a1
    // (Note:  p2->is_Con implies p2->in(0)->is_Root, which dominates.)
    return all_controls_dominate(p2, a1);
  } else { //(a2 != NULL)                   // one allocation a2
    return all_controls_dominate(p1, a2);
  }
  return false;
}


// The logic for reordering loads and stores uses four steps:
// (a) Walk carefully past stores and initializations which we
//     can prove are independent of this load.
// (b) Observe that the next memory state makes an exact match
//     with self (load or store), and locate the relevant store.
// (c) Ensure that, if we were to wire self directly to the store,
//     the optimizer would fold it up somehow.
// (d) Do the rewiring, and return, depending on some other part of
//     the optimizer to fold up the load.
// This routine handles steps (a) and (b).  Steps (c) and (d) are
// specific to loads and stores, so they are handled by the callers.
// (Currently, only LoadNode::Ideal has steps (c), (d).  More later.)
//
Node* MemNode::find_previous_store(PhaseTransform* phase) {
  Node*         ctrl   = in(MemNode::Control);
  Node*         adr    = in(MemNode::Address);
  intptr_t      offset = 0;
  Node*         base   = AddPNode::Ideal_base_and_offset(adr, phase, offset);
  AllocateNode* alloc  = AllocateNode::Ideal_allocation(base, phase);

  if (offset == Type::OffsetBot)
    return NULL;            // cannot unalias unless there are precise offsets

  const bool adr_maybe_raw = check_if_adr_maybe_raw(adr);
  const TypeOopPtr *addr_t = adr->bottom_type()->isa_oopptr();

  intptr_t size_in_bytes = memory_size();

  Node* mem = in(MemNode::Memory);   // start searching here...

  int cnt = 50;             // Cycle limiter
  for (;;) {                // While we can dance past unrelated stores...
    if (--cnt < 0)  break;  // Caught in cycle or a complicated dance?

    if (mem->is_Store()) {
      Node* st_adr = mem->in(MemNode::Address);
      intptr_t st_offset = 0;
      Node* st_base = AddPNode::Ideal_base_and_offset(st_adr, phase, st_offset);
      if (st_base == NULL)
        break;              // inscrutable pointer

      // For raw accesses it's not enough to prove that constant offsets don't intersect.
      // We need the bases to be the equal in order for the offset check to make sense.
      if ((adr_maybe_raw || check_if_adr_maybe_raw(st_adr)) && st_base != base) {
        break;
      }

      if (st_offset != offset && st_offset != Type::OffsetBot) {
        const int MAX_STORE = BytesPerLong;
        if (st_offset >= offset + size_in_bytes ||
            st_offset <= offset - MAX_STORE ||
            st_offset <= offset - mem->as_Store()->memory_size()) {
          // Success:  The offsets are provably independent.
          // (You may ask, why not just test st_offset != offset and be done?
          // The answer is that stores of different sizes can co-exist
          // in the same sequence of RawMem effects.  We sometimes initialize
          // a whole 'tile' of array elements with a single jint or jlong.)
          mem = mem->in(MemNode::Memory);
          continue;           // (a) advance through independent store memory
        }
      }
      if (st_base != base &&
          detect_ptr_independence(base, alloc,
                                  st_base,
                                  AllocateNode::Ideal_allocation(st_base, phase),
                                  phase)) {
        // Success:  The bases are provably independent.
        mem = mem->in(MemNode::Memory);
        continue;           // (a) advance through independent store memory
      }

      // (b) At this point, if the bases or offsets do not agree, we lose,
      // since we have not managed to prove 'this' and 'mem' independent.
      if (st_base == base && st_offset == offset) {
        return mem;         // let caller handle steps (c), (d)
      }

    } else if (mem->is_Proj() && mem->in(0)->is_Initialize()) {
      InitializeNode* st_init = mem->in(0)->as_Initialize();
      AllocateNode*  st_alloc = st_init->allocation();
      if (st_alloc == NULL)
        break;              // something degenerated
      bool known_identical = false;
      bool known_independent = false;
      if (alloc == st_alloc)
        known_identical = true;
      else if (alloc != NULL)
        known_independent = true;
      else if (all_controls_dominate(this, st_alloc))
        known_independent = true;

      if (known_independent) {
        // The bases are provably independent: Either they are
        // manifestly distinct allocations, or else the control
        // of this load dominates the store's allocation.
        int alias_idx = phase->C->get_alias_index(adr_type());
        if (alias_idx == Compile::AliasIdxRaw) {
          mem = st_alloc->in(TypeFunc::Memory);
        } else {
          mem = st_init->memory(alias_idx);
        }
        continue;           // (a) advance through independent store memory
      }

      // (b) at this point, if we are not looking at a store initializing
      // the same allocation we are loading from, we lose.
      if (known_identical) {
        // From caller, can_see_stored_value will consult find_captured_store.
        return mem;         // let caller handle steps (c), (d)
      }

    } else if (addr_t != NULL && addr_t->is_known_instance_field()) {
      // Can't use optimize_simple_memory_chain() since it needs PhaseGVN.
      if (mem->is_Proj() && mem->in(0)->is_Call()) {
        CallNode *call = mem->in(0)->as_Call();
        if (!call->may_modify(addr_t, phase)) {
          mem = call->in(TypeFunc::Memory);
          continue;         // (a) advance through independent call memory
        }
      } else if (mem->is_Proj() && mem->in(0)->is_MemBar()) {
        mem = mem->in(0)->in(TypeFunc::Memory);
        continue;           // (a) advance through independent MemBar memory
      } else if (mem->is_ClearArray()) {
        if (ClearArrayNode::step_through(&mem, (uint)addr_t->instance_id(), phase)) {
          // (the call updated 'mem' value)
          continue;         // (a) advance through independent allocation memory
        } else {
          // Can not bypass initialization of the instance
          // we are looking for.
          return mem;
        }
      } else if (mem->is_MergeMem()) {
        int alias_idx = phase->C->get_alias_index(adr_type());
        mem = mem->as_MergeMem()->memory_at(alias_idx);
        continue;           // (a) advance through independent MergeMem memory
      }
    }

    // Unless there is an explicit 'continue', we must bail out here,
    // because 'mem' is an inscrutable memory state (e.g., a call).
    break;
  }

  return NULL;              // bail out
}

//----------------------calculate_adr_type-------------------------------------
// Helper function.  Notices when the given type of address hits top or bottom.
// Also, asserts a cross-check of the type against the expected address type.
const TypePtr* MemNode::calculate_adr_type(const Type* t, const TypePtr* cross_check) {
  if (t == Type::TOP)  return NULL; // does not touch memory any more?
  #ifdef PRODUCT
  cross_check = NULL;
  #else
  if (!VerifyAliases || is_error_reported() || Node::in_dump())  cross_check = NULL;
  #endif
  const TypePtr* tp = t->isa_ptr();
  if (tp == NULL) {
    assert(cross_check == NULL || cross_check == TypePtr::BOTTOM, "expected memory type must be wide");
    return TypePtr::BOTTOM;           // touches lots of memory
  } else {
    #ifdef ASSERT
    // %%%% [phh] We don't check the alias index if cross_check is
    //            TypeRawPtr::BOTTOM.  Needs to be investigated.
    if (cross_check != NULL &&
        cross_check != TypePtr::BOTTOM &&
        cross_check != TypeRawPtr::BOTTOM) {
      // Recheck the alias index, to see if it has changed (due to a bug).
      Compile* C = Compile::current();
      assert(C->get_alias_index(cross_check) == C->get_alias_index(tp),
             "must stay in the original alias category");
      // The type of the address must be contained in the adr_type,
      // disregarding "null"-ness.
      // (We make an exception for TypeRawPtr::BOTTOM, which is a bit bucket.)
      const TypePtr* tp_notnull = tp->join(TypePtr::NOTNULL)->is_ptr();
      assert(cross_check->meet(tp_notnull) == cross_check->remove_speculative(),
             "real address must not escape from expected memory type");
    }
    #endif
    return tp;
  }
}

//------------------------adr_phi_is_loop_invariant----------------------------
// A helper function for Ideal_DU_postCCP to check if a Phi in a counted
// loop is loop invariant. Make a quick traversal of Phi and associated
// CastPP nodes, looking to see if they are a closed group within the loop.
bool MemNode::adr_phi_is_loop_invariant(Node* adr_phi, Node* cast) {
  // The idea is that the phi-nest must boil down to only CastPP nodes
  // with the same data. This implies that any path into the loop already
  // includes such a CastPP, and so the original cast, whatever its input,
  // must be covered by an equivalent cast, with an earlier control input.
  ResourceMark rm;

  // The loop entry input of the phi should be the unique dominating
  // node for every Phi/CastPP in the loop.
  Unique_Node_List closure;
  closure.push(adr_phi->in(LoopNode::EntryControl));

  // Add the phi node and the cast to the worklist.
  Unique_Node_List worklist;
  worklist.push(adr_phi);
  if( cast != NULL ){
    if( !cast->is_ConstraintCast() ) return false;
    worklist.push(cast);
  }

  // Begin recursive walk of phi nodes.
  while( worklist.size() ){
    // Take a node off the worklist
    Node *n = worklist.pop();
    if( !closure.member(n) ){
      // Add it to the closure.
      closure.push(n);
      // Make a sanity check to ensure we don't waste too much time here.
      if( closure.size() > 20) return false;
      // This node is OK if:
      //  - it is a cast of an identical value
      //  - or it is a phi node (then we add its inputs to the worklist)
      // Otherwise, the node is not OK, and we presume the cast is not invariant
      if( n->is_ConstraintCast() ){
        worklist.push(n->in(1));
      } else if( n->is_Phi() ) {
        for( uint i = 1; i < n->req(); i++ ) {
          worklist.push(n->in(i));
        }
      } else {
        return false;
      }
    }
  }

  // Quit when the worklist is empty, and we've found no offending nodes.
  return true;
}

//------------------------------Ideal_DU_postCCP-------------------------------
// Find any cast-away of null-ness and keep its control.  Null cast-aways are
// going away in this pass and we need to make this memory op depend on the
// gating null check.
Node *MemNode::Ideal_DU_postCCP( PhaseCCP *ccp ) {
  return Ideal_common_DU_postCCP(ccp, this, in(MemNode::Address));
}

// I tried to leave the CastPP's in.  This makes the graph more accurate in
// some sense; we get to keep around the knowledge that an oop is not-null
// after some test.  Alas, the CastPP's interfere with GVN (some values are
// the regular oop, some are the CastPP of the oop, all merge at Phi's which
// cannot collapse, etc).  This cost us 10% on SpecJVM, even when I removed
// some of the more trivial cases in the optimizer.  Removing more useless
// Phi's started allowing Loads to illegally float above null checks.  I gave
// up on this approach.  CNC 10/20/2000
// This static method may be called not from MemNode (EncodePNode calls it).
// Only the control edge of the node 'n' might be updated.
Node *MemNode::Ideal_common_DU_postCCP( PhaseCCP *ccp, Node* n, Node* adr ) {
  Node *skipped_cast = NULL;
  // Need a null check?  Regular static accesses do not because they are
  // from constant addresses.  Array ops are gated by the range check (which
  // always includes a NULL check).  Just check field ops.
  if( n->in(MemNode::Control) == NULL ) {
    // Scan upwards for the highest location we can place this memory op.
    while( true ) {
      switch( adr->Opcode() ) {

      case Op_AddP:             // No change to NULL-ness, so peek thru AddP's
        adr = adr->in(AddPNode::Base);
        continue;

      case Op_DecodeN:         // No change to NULL-ness, so peek thru
      case Op_DecodeNKlass:
        adr = adr->in(1);
        continue;

      case Op_EncodeP:
      case Op_EncodePKlass:
        // EncodeP node's control edge could be set by this method
        // when EncodeP node depends on CastPP node.
        //
        // Use its control edge for memory op because EncodeP may go away
        // later when it is folded with following or preceding DecodeN node.
        if (adr->in(0) == NULL) {
          // Keep looking for cast nodes.
          adr = adr->in(1);
          continue;
        }
        ccp->hash_delete(n);
        n->set_req(MemNode::Control, adr->in(0));
        ccp->hash_insert(n);
        return n;

      case Op_CastPP:
        // If the CastPP is useless, just peek on through it.
        if( ccp->type(adr) == ccp->type(adr->in(1)) ) {
          // Remember the cast that we've peeked though. If we peek
          // through more than one, then we end up remembering the highest
          // one, that is, if in a loop, the one closest to the top.
          skipped_cast = adr;
          adr = adr->in(1);
          continue;
        }
        // CastPP is going away in this pass!  We need this memory op to be
        // control-dependent on the test that is guarding the CastPP.
        ccp->hash_delete(n);
        n->set_req(MemNode::Control, adr->in(0));
        ccp->hash_insert(n);
        return n;

      case Op_Phi:
        // Attempt to float above a Phi to some dominating point.
        if (adr->in(0) != NULL && adr->in(0)->is_CountedLoop()) {
          // If we've already peeked through a Cast (which could have set the
          // control), we can't float above a Phi, because the skipped Cast
          // may not be loop invariant.
          if (adr_phi_is_loop_invariant(adr, skipped_cast)) {
            adr = adr->in(1);
            continue;
          }
        }

        // Intentional fallthrough!

        // No obvious dominating point.  The mem op is pinned below the Phi
        // by the Phi itself.  If the Phi goes away (no true value is merged)
        // then the mem op can float, but not indefinitely.  It must be pinned
        // behind the controls leading to the Phi.
      case Op_CheckCastPP:
        // These usually stick around to change address type, however a
        // useless one can be elided and we still need to pick up a control edge
        if (adr->in(0) == NULL) {
          // This CheckCastPP node has NO control and is likely useless. But we
          // need check further up the ancestor chain for a control input to keep
          // the node in place. 4959717.
          skipped_cast = adr;
          adr = adr->in(1);
          continue;
        }
        ccp->hash_delete(n);
        n->set_req(MemNode::Control, adr->in(0));
        ccp->hash_insert(n);
        return n;

        // List of "safe" opcodes; those that implicitly block the memory
        // op below any null check.
      case Op_CastX2P:          // no null checks on native pointers
      case Op_Parm:             // 'this' pointer is not null
      case Op_LoadP:            // Loading from within a klass
      case Op_LoadN:            // Loading from within a klass
      case Op_LoadKlass:        // Loading from within a klass
      case Op_LoadNKlass:       // Loading from within a klass
      case Op_ConP:             // Loading from a klass
      case Op_ConN:             // Loading from a klass
      case Op_ConNKlass:        // Loading from a klass
      case Op_CreateEx:         // Sucking up the guts of an exception oop
      case Op_Con:              // Reading from TLS
      case Op_CMoveP:           // CMoveP is pinned
      case Op_CMoveN:           // CMoveN is pinned
        break;                  // No progress

      case Op_Proj:             // Direct call to an allocation routine
      case Op_SCMemProj:        // Memory state from store conditional ops
#ifdef ASSERT
        {
          assert(adr->as_Proj()->_con == TypeFunc::Parms, "must be return value");
          const Node* call = adr->in(0);
          if (call->is_CallJava()) {
            const CallJavaNode* call_java = call->as_CallJava();
            const TypeTuple *r = call_java->tf()->range();
            assert(r->cnt() > TypeFunc::Parms, "must return value");
            const Type* ret_type = r->field_at(TypeFunc::Parms);
            assert(ret_type && ret_type->isa_ptr(), "must return pointer");
            // We further presume that this is one of
            // new_instance_Java, new_array_Java, or
            // the like, but do not assert for this.
          } else if (call->is_Allocate()) {
            // similar case to new_instance_Java, etc.
          } else if (!call->is_CallLeaf()) {
            // Projections from fetch_oop (OSR) are allowed as well.
            ShouldNotReachHere();
          }
        }
#endif
        break;
      default:
        ShouldNotReachHere();
      }
      break;
    }
  }

  return  NULL;               // No progress
}


//=============================================================================
// Should LoadNode::Ideal() attempt to remove control edges?
bool LoadNode::can_remove_control() const {
  return true;
}
uint LoadNode::size_of() const { return sizeof(*this); }
uint LoadNode::cmp( const Node &n ) const
{ return !Type::cmp( _type, ((LoadNode&)n)._type ); }
const Type *LoadNode::bottom_type() const { return _type; }
uint LoadNode::ideal_reg() const {
  return _type->ideal_reg();
}

#ifndef PRODUCT
void LoadNode::dump_spec(outputStream *st) const {
  MemNode::dump_spec(st);
  if( !Verbose && !WizardMode ) {
    // standard dump does this in Verbose and WizardMode
    st->print(" #"); _type->dump_on(st);
  }
  if (!_depends_only_on_test) {
    st->print(" (does not depend only on test)");
  }
}
#endif

#ifdef ASSERT
//----------------------------is_immutable_value-------------------------------
// Helper function to allow a raw load without control edge for some cases
bool LoadNode::is_immutable_value(Node* adr) {
  return (adr->is_AddP() && adr->in(AddPNode::Base)->is_top() &&
          adr->in(AddPNode::Address)->Opcode() == Op_ThreadLocal &&
          (adr->in(AddPNode::Offset)->find_intptr_t_con(-1) ==
           in_bytes(JavaThread::osthread_offset())));
}
#endif

//----------------------------LoadNode::make-----------------------------------
// Polymorphic factory method:
Node *LoadNode::make(PhaseGVN& gvn, Node *ctl, Node *mem, Node *adr, const TypePtr* adr_type, const Type *rt, BasicType bt, MemOrd mo, ControlDependency control_dependency) {
  Compile* C = gvn.C;

  // sanity check the alias category against the created node type
  assert(!(adr_type->isa_oopptr() &&
           adr_type->offset() == oopDesc::klass_offset_in_bytes()),
         "use LoadKlassNode instead");
  assert(!(adr_type->isa_aryptr() &&
           adr_type->offset() == arrayOopDesc::length_offset_in_bytes()),
         "use LoadRangeNode instead");
  // Check control edge of raw loads
  assert( ctl != NULL || C->get_alias_index(adr_type) != Compile::AliasIdxRaw ||
          // oop will be recorded in oop map if load crosses safepoint
          rt->isa_oopptr() || is_immutable_value(adr),
          "raw memory operations should have control edge");
  switch (bt) {
  case T_BOOLEAN: return new (C) LoadUBNode(ctl, mem, adr, adr_type, rt->is_int(),  mo, control_dependency);
  case T_BYTE:    return new (C) LoadBNode (ctl, mem, adr, adr_type, rt->is_int(),  mo, control_dependency);
  case T_INT:     return new (C) LoadINode (ctl, mem, adr, adr_type, rt->is_int(),  mo, control_dependency);
  case T_CHAR:    return new (C) LoadUSNode(ctl, mem, adr, adr_type, rt->is_int(),  mo, control_dependency);
  case T_SHORT:   return new (C) LoadSNode (ctl, mem, adr, adr_type, rt->is_int(),  mo, control_dependency);
  case T_LONG:    return new (C) LoadLNode (ctl, mem, adr, adr_type, rt->is_long(), mo, control_dependency);
  case T_FLOAT:   return new (C) LoadFNode (ctl, mem, adr, adr_type, rt,            mo, control_dependency);
  case T_DOUBLE:  return new (C) LoadDNode (ctl, mem, adr, adr_type, rt,            mo, control_dependency);
  case T_ADDRESS: return new (C) LoadPNode (ctl, mem, adr, adr_type, rt->is_ptr(),  mo, control_dependency);
  case T_OBJECT:
#ifdef _LP64
    if (adr->bottom_type()->is_ptr_to_narrowoop()) {
      Node* load  = gvn.transform(new (C) LoadNNode(ctl, mem, adr, adr_type, rt->make_narrowoop(), mo, control_dependency));
      return new (C) DecodeNNode(load, load->bottom_type()->make_ptr());
    } else
#endif
    {
      assert(!adr->bottom_type()->is_ptr_to_narrowoop() && !adr->bottom_type()->is_ptr_to_narrowklass(), "should have got back a narrow oop");
      return new (C) LoadPNode(ctl, mem, adr, adr_type, rt->is_oopptr(), mo, control_dependency);
    }
  }
  ShouldNotReachHere();
  return (LoadNode*)NULL;
}

LoadLNode* LoadLNode::make_atomic(Compile *C, Node* ctl, Node* mem, Node* adr, const TypePtr* adr_type, const Type* rt, MemOrd mo, ControlDependency control_dependency) {
  bool require_atomic = true;
  return new (C) LoadLNode(ctl, mem, adr, adr_type, rt->is_long(), mo, control_dependency, require_atomic);
}

LoadDNode* LoadDNode::make_atomic(Compile *C, Node* ctl, Node* mem, Node* adr, const TypePtr* adr_type, const Type* rt, MemOrd mo, ControlDependency control_dependency) {
  bool require_atomic = true;
  return new (C) LoadDNode(ctl, mem, adr, adr_type, rt, mo, control_dependency, require_atomic);
}



//------------------------------hash-------------------------------------------
uint LoadNode::hash() const {
  // unroll addition of interesting fields
  return (uintptr_t)in(Control) + (uintptr_t)in(Memory) + (uintptr_t)in(Address);
}

static bool skip_through_membars(Compile::AliasType* atp, const TypeInstPtr* tp, bool eliminate_boxing) {
  if ((atp != NULL) && (atp->index() >= Compile::AliasIdxRaw)) {
    bool non_volatile = (atp->field() != NULL) && !atp->field()->is_volatile();
    bool is_stable_ary = FoldStableValues &&
                         (tp != NULL) && (tp->isa_aryptr() != NULL) &&
                         tp->isa_aryptr()->is_stable();

    return (eliminate_boxing && non_volatile) || is_stable_ary;
  }

  return false;
}

//---------------------------can_see_stored_value------------------------------
// This routine exists to make sure this set of tests is done the same
// everywhere.  We need to make a coordinated change: first LoadNode::Ideal
// will change the graph shape in a way which makes memory alive twice at the
// same time (uses the Oracle model of aliasing), then some
// LoadXNode::Identity will fold things back to the equivalence-class model
// of aliasing.
Node* MemNode::can_see_stored_value(Node* st, PhaseTransform* phase) const {
  Node* ld_adr = in(MemNode::Address);
  intptr_t ld_off = 0;
  AllocateNode* ld_alloc = AllocateNode::Ideal_allocation(ld_adr, phase, ld_off);
  const TypeInstPtr* tp = phase->type(ld_adr)->isa_instptr();
  Compile::AliasType* atp = (tp != NULL) ? phase->C->alias_type(tp) : NULL;
  // This is more general than load from boxing objects.
  if (skip_through_membars(atp, tp, phase->C->eliminate_boxing())) {
    uint alias_idx = atp->index();
    bool final = !atp->is_rewritable();
    Node* result = NULL;
    Node* current = st;
    // Skip through chains of MemBarNodes checking the MergeMems for
    // new states for the slice of this load.  Stop once any other
    // kind of node is encountered.  Loads from final memory can skip
    // through any kind of MemBar but normal loads shouldn't skip
    // through MemBarAcquire since the could allow them to move out of
    // a synchronized region.
    while (current->is_Proj()) {
      int opc = current->in(0)->Opcode();
      if ((final && (opc == Op_MemBarAcquire ||
                     opc == Op_MemBarAcquireLock ||
                     opc == Op_LoadFence)) ||
          opc == Op_MemBarRelease ||
          opc == Op_StoreFence ||
          opc == Op_MemBarReleaseLock ||
          opc == Op_MemBarCPUOrder) {
        Node* mem = current->in(0)->in(TypeFunc::Memory);
        if (mem->is_MergeMem()) {
          MergeMemNode* merge = mem->as_MergeMem();
          Node* new_st = merge->memory_at(alias_idx);
          if (new_st == merge->base_memory()) {
            // Keep searching
            current = new_st;
            continue;
          }
          // Save the new memory state for the slice and fall through
          // to exit.
          result = new_st;
        }
      }
      break;
    }
    if (result != NULL) {
      st = result;
    }
  }

  // Loop around twice in the case Load -> Initialize -> Store.
  // (See PhaseIterGVN::add_users_to_worklist, which knows about this case.)
  for (int trip = 0; trip <= 1; trip++) {

    if (st->is_Store()) {
      Node* st_adr = st->in(MemNode::Address);
      if (!phase->eqv(st_adr, ld_adr)) {
        // Try harder before giving up...  Match raw and non-raw pointers.
        intptr_t st_off = 0;
        AllocateNode* alloc = AllocateNode::Ideal_allocation(st_adr, phase, st_off);
        if (alloc == NULL)       return NULL;
        if (alloc != ld_alloc)   return NULL;
        if (ld_off != st_off)    return NULL;
        // At this point we have proven something like this setup:
        //  A = Allocate(...)
        //  L = LoadQ(,  AddP(CastPP(, A.Parm),, #Off))
        //  S = StoreQ(, AddP(,        A.Parm  , #Off), V)
        // (Actually, we haven't yet proven the Q's are the same.)
        // In other words, we are loading from a casted version of
        // the same pointer-and-offset that we stored to.
        // Thus, we are able to replace L by V.
      }
      // Now prove that we have a LoadQ matched to a StoreQ, for some Q.
      if (store_Opcode() != st->Opcode())
        return NULL;
      return st->in(MemNode::ValueIn);
    }

    // A load from a freshly-created object always returns zero.
    // (This can happen after LoadNode::Ideal resets the load's memory input
    // to find_captured_store, which returned InitializeNode::zero_memory.)
    if (st->is_Proj() && st->in(0)->is_Allocate() &&
        (st->in(0) == ld_alloc) &&
        (ld_off >= st->in(0)->as_Allocate()->minimum_header_size())) {
      // return a zero value for the load's basic type
      // (This is one of the few places where a generic PhaseTransform
      // can create new nodes.  Think of it as lazily manifesting
      // virtually pre-existing constants.)
      return phase->zerocon(memory_type());
    }

    // A load from an initialization barrier can match a captured store.
    if (st->is_Proj() && st->in(0)->is_Initialize()) {
      InitializeNode* init = st->in(0)->as_Initialize();
      AllocateNode* alloc = init->allocation();
      if ((alloc != NULL) && (alloc == ld_alloc)) {
        // examine a captured store value
        st = init->find_captured_store(ld_off, memory_size(), phase);
        if (st != NULL)
          continue;             // take one more trip around
      }
    }

    // Load boxed value from result of valueOf() call is input parameter.
    if (this->is_Load() && ld_adr->is_AddP() &&
        (tp != NULL) && tp->is_ptr_to_boxed_value()) {
      intptr_t ignore = 0;
      Node* base = AddPNode::Ideal_base_and_offset(ld_adr, phase, ignore);
      if (base != NULL && base->is_Proj() &&
          base->as_Proj()->_con == TypeFunc::Parms &&
          base->in(0)->is_CallStaticJava() &&
          base->in(0)->as_CallStaticJava()->is_boxing_method()) {
        return base->in(0)->in(TypeFunc::Parms);
      }
    }

    break;
  }

  return NULL;
}

//----------------------is_instance_field_load_with_local_phi------------------
bool LoadNode::is_instance_field_load_with_local_phi(Node* ctrl) {
  if( in(Memory)->is_Phi() && in(Memory)->in(0) == ctrl &&
      in(Address)->is_AddP() ) {
    const TypeOopPtr* t_oop = in(Address)->bottom_type()->isa_oopptr();
    // Only instances and boxed values.
    if( t_oop != NULL &&
        (t_oop->is_ptr_to_boxed_value() ||
         t_oop->is_known_instance_field()) &&
        t_oop->offset() != Type::OffsetBot &&
        t_oop->offset() != Type::OffsetTop) {
      return true;
    }
  }
  return false;
}

//------------------------------Identity---------------------------------------
// Loads are identity if previous store is to same address
Node *LoadNode::Identity( PhaseTransform *phase ) {
  // If the previous store-maker is the right kind of Store, and the store is
  // to the same address, then we are equal to the value stored.
  Node* mem = in(Memory);
  Node* value = can_see_stored_value(mem, phase);
  if( value ) {
    // byte, short & char stores truncate naturally.
    // A load has to load the truncated value which requires
    // some sort of masking operation and that requires an
    // Ideal call instead of an Identity call.
    if (memory_size() < BytesPerInt) {
      // If the input to the store does not fit with the load's result type,
      // it must be truncated via an Ideal call.
      if (!phase->type(value)->higher_equal(phase->type(this)))
        return this;
    }
    // (This works even when value is a Con, but LoadNode::Value
    // usually runs first, producing the singleton type of the Con.)
    return value;
  }

  // Search for an existing data phi which was generated before for the same
  // instance's field to avoid infinite generation of phis in a loop.
  Node *region = mem->in(0);
  if (is_instance_field_load_with_local_phi(region)) {
    const TypeOopPtr *addr_t = in(Address)->bottom_type()->isa_oopptr();
    int this_index  = phase->C->get_alias_index(addr_t);
    int this_offset = addr_t->offset();
    int this_iid    = addr_t->instance_id();
    if (!addr_t->is_known_instance() &&
         addr_t->is_ptr_to_boxed_value()) {
      // Use _idx of address base (could be Phi node) for boxed values.
      intptr_t   ignore = 0;
      Node*      base = AddPNode::Ideal_base_and_offset(in(Address), phase, ignore);
      if (base == NULL) {
        return this;
      }
      this_iid = base->_idx;
    }
    const Type* this_type = bottom_type();
    for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) {
      Node* phi = region->fast_out(i);
      if (phi->is_Phi() && phi != mem &&
          phi->as_Phi()->is_same_inst_field(this_type, (int)mem->_idx, this_iid, this_index, this_offset)) {
        return phi;
      }
    }
  }

  return this;
}

// We're loading from an object which has autobox behaviour.
// If this object is result of a valueOf call we'll have a phi
// merging a newly allocated object and a load from the cache.
// We want to replace this load with the original incoming
// argument to the valueOf call.
Node* LoadNode::eliminate_autobox(PhaseGVN* phase) {
  assert(phase->C->eliminate_boxing(), "sanity");
  intptr_t ignore = 0;
  Node* base = AddPNode::Ideal_base_and_offset(in(Address), phase, ignore);
  if ((base == NULL) || base->is_Phi()) {
    // Push the loads from the phi that comes from valueOf up
    // through it to allow elimination of the loads and the recovery
    // of the original value. It is done in split_through_phi().
    return NULL;
  } else if (base->is_Load() ||
             base->is_DecodeN() && base->in(1)->is_Load()) {
    // Eliminate the load of boxed value for integer types from the cache
    // array by deriving the value from the index into the array.
    // Capture the offset of the load and then reverse the computation.

    // Get LoadN node which loads a boxing object from 'cache' array.
    if (base->is_DecodeN()) {
      base = base->in(1);
    }
    if (!base->in(Address)->is_AddP()) {
      return NULL; // Complex address
    }
    AddPNode* address = base->in(Address)->as_AddP();
    Node* cache_base = address->in(AddPNode::Base);
    if ((cache_base != NULL) && cache_base->is_DecodeN()) {
      // Get ConP node which is static 'cache' field.
      cache_base = cache_base->in(1);
    }
    if ((cache_base != NULL) && cache_base->is_Con()) {
      const TypeAryPtr* base_type = cache_base->bottom_type()->isa_aryptr();
      if ((base_type != NULL) && base_type->is_autobox_cache()) {
        Node* elements[4];
        int shift = exact_log2(type2aelembytes(T_OBJECT));
        int count = address->unpack_offsets(elements, ARRAY_SIZE(elements));
        if ((count >  0) && elements[0]->is_Con() &&
            ((count == 1) ||
             (count == 2) && elements[1]->Opcode() == Op_LShiftX &&
                             elements[1]->in(2) == phase->intcon(shift))) {
          ciObjArray* array = base_type->const_oop()->as_obj_array();
          // Fetch the box object cache[0] at the base of the array and get its value
          ciInstance* box = array->obj_at(0)->as_instance();
          ciInstanceKlass* ik = box->klass()->as_instance_klass();
          assert(ik->is_box_klass(), "sanity");
          assert(ik->nof_nonstatic_fields() == 1, "change following code");
          if (ik->nof_nonstatic_fields() == 1) {
            // This should be true nonstatic_field_at requires calling
            // nof_nonstatic_fields so check it anyway
            ciConstant c = box->field_value(ik->nonstatic_field_at(0));
            BasicType bt = c.basic_type();
            // Only integer types have boxing cache.
            assert(bt == T_BOOLEAN || bt == T_CHAR  ||
                   bt == T_BYTE    || bt == T_SHORT ||
                   bt == T_INT     || bt == T_LONG, err_msg_res("wrong type = %s", type2name(bt)));
            jlong cache_low = (bt == T_LONG) ? c.as_long() : c.as_int();
            if (cache_low != (int)cache_low) {
              return NULL; // should not happen since cache is array indexed by value
            }
            jlong offset = arrayOopDesc::base_offset_in_bytes(T_OBJECT) - (cache_low << shift);
            if (offset != (int)offset) {
              return NULL; // should not happen since cache is array indexed by value
            }
           // Add up all the offsets making of the address of the load
            Node* result = elements[0];
            for (int i = 1; i < count; i++) {
              result = phase->transform(new (phase->C) AddXNode(result, elements[i]));
            }
            // Remove the constant offset from the address and then
            result = phase->transform(new (phase->C) AddXNode(result, phase->MakeConX(-(int)offset)));
            // remove the scaling of the offset to recover the original index.
            if (result->Opcode() == Op_LShiftX && result->in(2) == phase->intcon(shift)) {
              // Peel the shift off directly but wrap it in a dummy node
              // since Ideal can't return existing nodes
              result = new (phase->C) RShiftXNode(result->in(1), phase->intcon(0));
            } else if (result->is_Add() && result->in(2)->is_Con() &&
                       result->in(1)->Opcode() == Op_LShiftX &&
                       result->in(1)->in(2) == phase->intcon(shift)) {
              // We can't do general optimization: ((X<<Z) + Y) >> Z ==> X + (Y>>Z)
              // but for boxing cache access we know that X<<Z will not overflow
              // (there is range check) so we do this optimizatrion by hand here.
              Node* add_con = new (phase->C) RShiftXNode(result->in(2), phase->intcon(shift));
              result = new (phase->C) AddXNode(result->in(1)->in(1), phase->transform(add_con));
            } else {
              result = new (phase->C) RShiftXNode(result, phase->intcon(shift));
            }
#ifdef _LP64
            if (bt != T_LONG) {
              result = new (phase->C) ConvL2INode(phase->transform(result));
            }
#else
            if (bt == T_LONG) {
              result = new (phase->C) ConvI2LNode(phase->transform(result));
            }
#endif
            // Boxing/unboxing can be done from signed & unsigned loads (e.g. LoadUB -> ... -> LoadB pair).
            // Need to preserve unboxing load type if it is unsigned.
            switch(this->Opcode()) {
              case Op_LoadUB:
                result = new (phase->C) AndINode(phase->transform(result), phase->intcon(0xFF));
                break;
              case Op_LoadUS:
                result = new (phase->C) AndINode(phase->transform(result), phase->intcon(0xFFFF));
                break;
            }
            return result;
          }
        }
      }
    }
  }
  return NULL;
}

static bool stable_phi(PhiNode* phi, PhaseGVN *phase) {
  Node* region = phi->in(0);
  if (region == NULL) {
    return false; // Wait stable graph
  }
  uint cnt = phi->req();
  for (uint i = 1; i < cnt; i++) {
    Node* rc = region->in(i);
    if (rc == NULL || phase->type(rc) == Type::TOP)
      return false; // Wait stable graph
    Node* in = phi->in(i);
    if (in == NULL || phase->type(in) == Type::TOP)
      return false; // Wait stable graph
  }
  return true;
}
//------------------------------split_through_phi------------------------------
// Split instance or boxed field load through Phi.
Node *LoadNode::split_through_phi(PhaseGVN *phase) {
  Node* mem     = in(Memory);
  Node* address = in(Address);
  const TypeOopPtr *t_oop = phase->type(address)->isa_oopptr();

  assert((t_oop != NULL) &&
         (t_oop->is_known_instance_field() ||
          t_oop->is_ptr_to_boxed_value()), "invalide conditions");

  Compile* C = phase->C;
  intptr_t ignore = 0;
  Node*    base = AddPNode::Ideal_base_and_offset(address, phase, ignore);
  bool base_is_phi = (base != NULL) && base->is_Phi();
  bool load_boxed_values = t_oop->is_ptr_to_boxed_value() && C->aggressive_unboxing() &&
                           (base != NULL) && (base == address->in(AddPNode::Base)) &&
                           phase->type(base)->higher_equal(TypePtr::NOTNULL);

  if (!((mem->is_Phi() || base_is_phi) &&
        (load_boxed_values || t_oop->is_known_instance_field()))) {
    return NULL; // memory is not Phi
  }

  if (mem->is_Phi()) {
    if (!stable_phi(mem->as_Phi(), phase)) {
      return NULL; // Wait stable graph
    }
    uint cnt = mem->req();
    // Check for loop invariant memory.
    if (cnt == 3) {
      for (uint i = 1; i < cnt; i++) {
        Node* in = mem->in(i);
        Node*  m = optimize_memory_chain(in, t_oop, this, phase);
        if (m == mem) {
          if (i == 1) {
            // if the first edge was a loop, check second edge too.
            // If both are replaceable - we are in an infinite loop
            Node *n = optimize_memory_chain(mem->in(2), t_oop, this, phase);
            if (n == mem) {
              break;
            }
          }
          set_req(Memory, mem->in(cnt - i));
          return this; // made change
        }
      }
    }
  }
  if (base_is_phi) {
    if (!stable_phi(base->as_Phi(), phase)) {
      return NULL; // Wait stable graph
    }
    uint cnt = base->req();
    // Check for loop invariant memory.
    if (cnt == 3) {
      for (uint i = 1; i < cnt; i++) {
        if (base->in(i) == base) {
          return NULL; // Wait stable graph
        }
      }
    }
  }

  bool load_boxed_phi = load_boxed_values && base_is_phi && (base->in(0) == mem->in(0));

  // Split through Phi (see original code in loopopts.cpp).
  assert(C->have_alias_type(t_oop), "instance should have alias type");

  // Do nothing here if Identity will find a value
  // (to avoid infinite chain of value phis generation).
  if (!phase->eqv(this, this->Identity(phase)))
    return NULL;

  // Select Region to split through.
  Node* region;
  if (!base_is_phi) {
    assert(mem->is_Phi(), "sanity");
    region = mem->in(0);
    // Skip if the region dominates some control edge of the address.
    if (!MemNode::all_controls_dominate(address, region))
      return NULL;
  } else if (!mem->is_Phi()) {
    assert(base_is_phi, "sanity");
    region = base->in(0);
    // Skip if the region dominates some control edge of the memory.
    if (!MemNode::all_controls_dominate(mem, region))
      return NULL;
  } else if (base->in(0) != mem->in(0)) {
    assert(base_is_phi && mem->is_Phi(), "sanity");
    if (MemNode::all_controls_dominate(mem, base->in(0))) {
      region = base->in(0);
    } else if (MemNode::all_controls_dominate(address, mem->in(0))) {
      region = mem->in(0);
    } else {
      return NULL; // complex graph
    }
  } else {
    assert(base->in(0) == mem->in(0), "sanity");
    region = mem->in(0);
  }

  const Type* this_type = this->bottom_type();
  int this_index  = C->get_alias_index(t_oop);
  int this_offset = t_oop->offset();
  int this_iid    = t_oop->instance_id();
  if (!t_oop->is_known_instance() && load_boxed_values) {
    // Use _idx of address base for boxed values.
    this_iid = base->_idx;
  }
  PhaseIterGVN* igvn = phase->is_IterGVN();
  Node* phi = new (C) PhiNode(region, this_type, NULL, mem->_idx, this_iid, this_index, this_offset);
  for (uint i = 1; i < region->req(); i++) {
    Node* x;
    Node* the_clone = NULL;
    if (region->in(i) == C->top()) {
      x = C->top();      // Dead path?  Use a dead data op
    } else {
      x = this->clone();        // Else clone up the data op
      the_clone = x;            // Remember for possible deletion.
      // Alter data node to use pre-phi inputs
      if (this->in(0) == region) {
        x->set_req(0, region->in(i));
      } else {
        x->set_req(0, NULL);
      }
      if (mem->is_Phi() && (mem->in(0) == region)) {
        x->set_req(Memory, mem->in(i)); // Use pre-Phi input for the clone.
      }
      if (address->is_Phi() && address->in(0) == region) {
        x->set_req(Address, address->in(i)); // Use pre-Phi input for the clone
      }
      if (base_is_phi && (base->in(0) == region)) {
        Node* base_x = base->in(i); // Clone address for loads from boxed objects.
        Node* adr_x = phase->transform(new (C) AddPNode(base_x,base_x,address->in(AddPNode::Offset)));
        x->set_req(Address, adr_x);
      }
    }
    // Check for a 'win' on some paths
    const Type *t = x->Value(igvn);

    bool singleton = t->singleton();

    // See comments in PhaseIdealLoop::split_thru_phi().
    if (singleton && t == Type::TOP) {
      singleton &= region->is_Loop() && (i != LoopNode::EntryControl);
    }

    if (singleton) {
      x = igvn->makecon(t);
    } else {
      // We now call Identity to try to simplify the cloned node.
      // Note that some Identity methods call phase->type(this).
      // Make sure that the type array is big enough for
      // our new node, even though we may throw the node away.
      // (This tweaking with igvn only works because x is a new node.)
      igvn->set_type(x, t);
      // If x is a TypeNode, capture any more-precise type permanently into Node
      // otherwise it will be not updated during igvn->transform since
      // igvn->type(x) is set to x->Value() already.
      x->raise_bottom_type(t);
      Node *y = x->Identity(igvn);
      if (y != x) {
        x = y;
      } else {
        y = igvn->hash_find_insert(x);
        if (y) {
          x = y;
        } else {
          // Else x is a new node we are keeping
          // We do not need register_new_node_with_optimizer
          // because set_type has already been called.
          igvn->_worklist.push(x);
        }
      }
    }
    if (x != the_clone && the_clone != NULL) {
      igvn->remove_dead_node(the_clone);
    }
    phi->set_req(i, x);
  }
  // Record Phi
  igvn->register_new_node_with_optimizer(phi);
  return phi;
}

//------------------------------Ideal------------------------------------------
// If the load is from Field memory and the pointer is non-null, it might be possible to
// zero out the control input.
// If the offset is constant and the base is an object allocation,
// try to hook me up to the exact initializing store.
Node *LoadNode::Ideal(PhaseGVN *phase, bool can_reshape) {
  Node* p = MemNode::Ideal_common(phase, can_reshape);
  if (p)  return (p == NodeSentinel) ? NULL : p;

  Node* ctrl    = in(MemNode::Control);
  Node* address = in(MemNode::Address);

  bool addr_mark = ((phase->type(address)->isa_oopptr() || phase->type(address)->isa_narrowoop()) &&
         phase->type(address)->is_ptr()->offset() == oopDesc::mark_offset_in_bytes());

  // Skip up past a SafePoint control.  Cannot do this for Stores because
  // pointer stores & cardmarks must stay on the same side of a SafePoint.
  if( ctrl != NULL && ctrl->Opcode() == Op_SafePoint &&
      phase->C->get_alias_index(phase->type(address)->is_ptr()) != Compile::AliasIdxRaw  &&
      !addr_mark ) {
    ctrl = ctrl->in(0);
    set_req(MemNode::Control,ctrl);
  }

  intptr_t ignore = 0;
  Node*    base   = AddPNode::Ideal_base_and_offset(address, phase, ignore);
  if (base != NULL
      && phase->C->get_alias_index(phase->type(address)->is_ptr()) != Compile::AliasIdxRaw) {
    // Check for useless control edge in some common special cases
    if (in(MemNode::Control) != NULL
        && can_remove_control()
        && phase->type(base)->higher_equal(TypePtr::NOTNULL)
        && all_controls_dominate(base, phase->C->start())) {
      // A method-invariant, non-null address (constant or 'this' argument).
      set_req(MemNode::Control, NULL);
    }
  }

  Node* mem = in(MemNode::Memory);
  const TypePtr *addr_t = phase->type(address)->isa_ptr();

  if (can_reshape && (addr_t != NULL)) {
    // try to optimize our memory input
    Node* opt_mem = MemNode::optimize_memory_chain(mem, addr_t, this, phase);
    if (opt_mem != mem) {
      set_req(MemNode::Memory, opt_mem);
      if (phase->type( opt_mem ) == Type::TOP) return NULL;
      return this;
    }
    const TypeOopPtr *t_oop = addr_t->isa_oopptr();
    if ((t_oop != NULL) &&
        (t_oop->is_known_instance_field() ||
         t_oop->is_ptr_to_boxed_value())) {
      PhaseIterGVN *igvn = phase->is_IterGVN();
      if (igvn != NULL && igvn->_worklist.member(opt_mem)) {
        // Delay this transformation until memory Phi is processed.
        phase->is_IterGVN()->_worklist.push(this);
        return NULL;
      }
      // Split instance field load through Phi.
      Node* result = split_through_phi(phase);
      if (result != NULL) return result;

      if (t_oop->is_ptr_to_boxed_value()) {
        Node* result = eliminate_autobox(phase);
        if (result != NULL) return result;
      }
    }
  }

  // Check for prior store with a different base or offset; make Load
  // independent.  Skip through any number of them.  Bail out if the stores
  // are in an endless dead cycle and report no progress.  This is a key
  // transform for Reflection.  However, if after skipping through the Stores
  // we can't then fold up against a prior store do NOT do the transform as
  // this amounts to using the 'Oracle' model of aliasing.  It leaves the same
  // array memory alive twice: once for the hoisted Load and again after the
  // bypassed Store.  This situation only works if EVERYBODY who does
  // anti-dependence work knows how to bypass.  I.e. we need all
  // anti-dependence checks to ask the same Oracle.  Right now, that Oracle is
  // the alias index stuff.  So instead, peek through Stores and IFF we can
  // fold up, do so.
  Node* prev_mem = find_previous_store(phase);
  // Steps (a), (b):  Walk past independent stores to find an exact match.
  if (prev_mem != NULL && prev_mem != in(MemNode::Memory)) {
    // (c) See if we can fold up on the spot, but don't fold up here.
    // Fold-up might require truncation (for LoadB/LoadS/LoadUS) or
    // just return a prior value, which is done by Identity calls.
    if (can_see_stored_value(prev_mem, phase)) {
      // Make ready for step (d):
      set_req(MemNode::Memory, prev_mem);
      return this;
    }
  }

  return NULL;                  // No further progress
}

// Helper to recognize certain Klass fields which are invariant across
// some group of array types (e.g., int[] or all T[] where T < Object).
const Type*
LoadNode::load_array_final_field(const TypeKlassPtr *tkls,
                                 ciKlass* klass) const {
  if (tkls->offset() == in_bytes(Klass::modifier_flags_offset())) {
    // The field is Klass::_modifier_flags.  Return its (constant) value.
    // (Folds up the 2nd indirection in aClassConstant.getModifiers().)
    assert(this->Opcode() == Op_LoadI, "must load an int from _modifier_flags");
    return TypeInt::make(klass->modifier_flags());
  }
  if (tkls->offset() == in_bytes(Klass::access_flags_offset())) {
    // The field is Klass::_access_flags.  Return its (constant) value.
    // (Folds up the 2nd indirection in Reflection.getClassAccessFlags(aClassConstant).)
    assert(this->Opcode() == Op_LoadI, "must load an int from _access_flags");
    return TypeInt::make(klass->access_flags());
  }
  if (tkls->offset() == in_bytes(Klass::layout_helper_offset())) {
    // The field is Klass::_layout_helper.  Return its constant value if known.
    assert(this->Opcode() == Op_LoadI, "must load an int from _layout_helper");
    return TypeInt::make(klass->layout_helper());
  }

  // No match.
  return NULL;
}

// Try to constant-fold a stable array element.
static const Type* fold_stable_ary_elem(const TypeAryPtr* ary, int off, BasicType loadbt) {
  assert(ary->const_oop(), "array should be constant");
  assert(ary->is_stable(), "array should be stable");

  // Decode the results of GraphKit::array_element_address.
  ciArray* aobj = ary->const_oop()->as_array();
  ciConstant con = aobj->element_value_by_offset(off);

  if (con.basic_type() != T_ILLEGAL && !con.is_null_or_zero()) {
    const Type* con_type = Type::make_from_constant(con);
    if (con_type != NULL) {
      if (con_type->isa_aryptr()) {
        // Join with the array element type, in case it is also stable.
        int dim = ary->stable_dimension();
        con_type = con_type->is_aryptr()->cast_to_stable(true, dim-1);
      }
      if (loadbt == T_NARROWOOP && con_type->isa_oopptr()) {
        con_type = con_type->make_narrowoop();
      }
#ifndef PRODUCT
      if (TraceIterativeGVN) {
        tty->print("FoldStableValues: array element [off=%d]: con_type=", off);
        con_type->dump(); tty->cr();
      }
#endif //PRODUCT
      return con_type;
    }
  }
  return NULL;
}

//------------------------------Value-----------------------------------------
const Type *LoadNode::Value( PhaseTransform *phase ) const {
  // Either input is TOP ==> the result is TOP
  Node* mem = in(MemNode::Memory);
  const Type *t1 = phase->type(mem);
  if (t1 == Type::TOP)  return Type::TOP;
  Node* adr = in(MemNode::Address);
  const TypePtr* tp = phase->type(adr)->isa_ptr();
  if (tp == NULL || tp->empty())  return Type::TOP;
  int off = tp->offset();
  assert(off != Type::OffsetTop, "case covered by TypePtr::empty");
  Compile* C = phase->C;

  // Try to guess loaded type from pointer type
  if (tp->isa_aryptr()) {
    const TypeAryPtr* ary = tp->is_aryptr();
    const Type* t = ary->elem();

    // Determine whether the reference is beyond the header or not, by comparing
    // the offset against the offset of the start of the array's data.
    // Different array types begin at slightly different offsets (12 vs. 16).
    // We choose T_BYTE as an example base type that is least restrictive
    // as to alignment, which will therefore produce the smallest
    // possible base offset.
    const int min_base_off = arrayOopDesc::base_offset_in_bytes(T_BYTE);
    const bool off_beyond_header = ((uint)off >= (uint)min_base_off);

    // Try to constant-fold a stable array element.
    if (FoldStableValues && ary->is_stable() && ary->const_oop() != NULL) {
      // Make sure the reference is not into the header and the offset is constant
      if (off_beyond_header && adr->is_AddP() && off != Type::OffsetBot) {
        const Type* con_type = fold_stable_ary_elem(ary, off, memory_type());
        if (con_type != NULL) {
          return con_type;
        }
      }
    }

    // Don't do this for integer types. There is only potential profit if
    // the element type t is lower than _type; that is, for int types, if _type is
    // more restrictive than t.  This only happens here if one is short and the other
    // char (both 16 bits), and in those cases we've made an intentional decision
    // to use one kind of load over the other. See AndINode::Ideal and 4965907.
    // Also, do not try to narrow the type for a LoadKlass, regardless of offset.
    //
    // Yes, it is possible to encounter an expression like (LoadKlass p1:(AddP x x 8))
    // where the _gvn.type of the AddP is wider than 8.  This occurs when an earlier
    // copy p0 of (AddP x x 8) has been proven equal to p1, and the p0 has been
    // subsumed by p1.  If p1 is on the worklist but has not yet been re-transformed,
    // it is possible that p1 will have a type like Foo*[int+]:NotNull*+any.
    // In fact, that could have been the original type of p1, and p1 could have
    // had an original form like p1:(AddP x x (LShiftL quux 3)), where the
    // expression (LShiftL quux 3) independently optimized to the constant 8.
    if ((t->isa_int() == NULL) && (t->isa_long() == NULL)
        && (_type->isa_vect() == NULL)
        && Opcode() != Op_LoadKlass && Opcode() != Op_LoadNKlass) {
      // t might actually be lower than _type, if _type is a unique
      // concrete subclass of abstract class t.
      if (off_beyond_header) {  // is the offset beyond the header?
        const Type* jt = t->join_speculative(_type);
        // In any case, do not allow the join, per se, to empty out the type.
        if (jt->empty() && !t->empty()) {
          // This can happen if a interface-typed array narrows to a class type.
          jt = _type;
        }
#ifdef ASSERT
        if (phase->C->eliminate_boxing() && adr->is_AddP()) {
          // The pointers in the autobox arrays are always non-null
          Node* base = adr->in(AddPNode::Base);
          if ((base != NULL) && base->is_DecodeN()) {
            // Get LoadN node which loads IntegerCache.cache field
            base = base->in(1);
          }
          if ((base != NULL) && base->is_Con()) {
            const TypeAryPtr* base_type = base->bottom_type()->isa_aryptr();
            if ((base_type != NULL) && base_type->is_autobox_cache()) {
              // It could be narrow oop
              assert(jt->make_ptr()->ptr() == TypePtr::NotNull,"sanity");
            }
          }
        }
#endif
        return jt;
      }
    }
  } else if (tp->base() == Type::InstPtr) {
    ciEnv* env = C->env();
    const TypeInstPtr* tinst = tp->is_instptr();
    ciKlass* klass = tinst->klass();
    assert( off != Type::OffsetBot ||
            // arrays can be cast to Objects
            tp->is_oopptr()->klass()->is_java_lang_Object() ||
            // unsafe field access may not have a constant offset
            C->has_unsafe_access(),
            "Field accesses must be precise" );
    // For oop loads, we expect the _type to be precise
    if (klass == env->String_klass() &&
        adr->is_AddP() && off != Type::OffsetBot) {
      // For constant Strings treat the final fields as compile time constants.
      Node* base = adr->in(AddPNode::Base);
      const TypeOopPtr* t = phase->type(base)->isa_oopptr();
      if (t != NULL && t->singleton()) {
        ciField* field = env->String_klass()->get_field_by_offset(off, false);
        if (field != NULL && field->is_final()) {
          ciObject* string = t->const_oop();
          ciConstant constant = string->as_instance()->field_value(field);
          if (constant.basic_type() == T_INT) {
            return TypeInt::make(constant.as_int());
          } else if (constant.basic_type() == T_ARRAY) {
            if (adr->bottom_type()->is_ptr_to_narrowoop()) {
              return TypeNarrowOop::make_from_constant(constant.as_object(), true);
            } else {
              return TypeOopPtr::make_from_constant(constant.as_object(), true);
            }
          }
        }
      }
    }
    // Optimizations for constant objects
    ciObject* const_oop = tinst->const_oop();
    if (const_oop != NULL) {
      // For constant Boxed value treat the target field as a compile time constant.
      if (tinst->is_ptr_to_boxed_value()) {
        return tinst->get_const_boxed_value();
      } else
      // For constant CallSites treat the target field as a compile time constant.
      if (const_oop->is_call_site()) {
        ciCallSite* call_site = const_oop->as_call_site();
        ciField* field = call_site->klass()->as_instance_klass()->get_field_by_offset(off, /*is_static=*/ false);
        if (field != NULL && field->is_call_site_target()) {
          ciMethodHandle* target = call_site->get_target();
          if (target != NULL) {  // just in case
            ciConstant constant(T_OBJECT, target);
            const Type* t;
            if (adr->bottom_type()->is_ptr_to_narrowoop()) {
              t = TypeNarrowOop::make_from_constant(constant.as_object(), true);
            } else {
              t = TypeOopPtr::make_from_constant(constant.as_object(), true);
            }
            // Add a dependence for invalidation of the optimization.
            if (!call_site->is_constant_call_site()) {
              C->dependencies()->assert_call_site_target_value(call_site, target);
            }
            return t;
          }
        }
      }
    }
  } else if (tp->base() == Type::KlassPtr) {
    assert( off != Type::OffsetBot ||
            // arrays can be cast to Objects
            tp->is_klassptr()->klass()->is_java_lang_Object() ||
            // also allow array-loading from the primary supertype
            // array during subtype checks
            Opcode() == Op_LoadKlass,
            "Field accesses must be precise" );
    // For klass/static loads, we expect the _type to be precise
  }

  const TypeKlassPtr *tkls = tp->isa_klassptr();
  if (tkls != NULL && !StressReflectiveCode) {
    ciKlass* klass = tkls->klass();
    if (klass->is_loaded() && tkls->klass_is_exact()) {
      // We are loading a field from a Klass metaobject whose identity
      // is known at compile time (the type is "exact" or "precise").
      // Check for fields we know are maintained as constants by the VM.
      if (tkls->offset() == in_bytes(Klass::super_check_offset_offset())) {
        // The field is Klass::_super_check_offset.  Return its (constant) value.
        // (Folds up type checking code.)
        assert(Opcode() == Op_LoadI, "must load an int from _super_check_offset");
        return TypeInt::make(klass->super_check_offset());
      }
      // Compute index into primary_supers array
      juint depth = (tkls->offset() - in_bytes(Klass::primary_supers_offset())) / sizeof(Klass*);
      // Check for overflowing; use unsigned compare to handle the negative case.
      if( depth < ciKlass::primary_super_limit() ) {
        // The field is an element of Klass::_primary_supers.  Return its (constant) value.
        // (Folds up type checking code.)
        assert(Opcode() == Op_LoadKlass, "must load a klass from _primary_supers");
        ciKlass *ss = klass->super_of_depth(depth);
        return ss ? TypeKlassPtr::make(ss) : TypePtr::NULL_PTR;
      }
      const Type* aift = load_array_final_field(tkls, klass);
      if (aift != NULL)  return aift;
      if (tkls->offset() == in_bytes(ArrayKlass::component_mirror_offset())
          && klass->is_array_klass()) {
        // The field is ArrayKlass::_component_mirror.  Return its (constant) value.
        // (Folds up aClassConstant.getComponentType, common in Arrays.copyOf.)
        assert(Opcode() == Op_LoadP, "must load an oop from _component_mirror");
        return TypeInstPtr::make(klass->as_array_klass()->component_mirror());
      }
      if (tkls->offset() == in_bytes(Klass::java_mirror_offset())) {
        // The field is Klass::_java_mirror.  Return its (constant) value.
        // (Folds up the 2nd indirection in anObjConstant.getClass().)
        assert(Opcode() == Op_LoadP, "must load an oop from _java_mirror");
        return TypeInstPtr::make(klass->java_mirror());
      }
    }

    // We can still check if we are loading from the primary_supers array at a
    // shallow enough depth.  Even though the klass is not exact, entries less
    // than or equal to its super depth are correct.
    if (klass->is_loaded() ) {
      ciType *inner = klass;
      while( inner->is_obj_array_klass() )
        inner = inner->as_obj_array_klass()->base_element_type();
      if( inner->is_instance_klass() &&
          !inner->as_instance_klass()->flags().is_interface() ) {
        // Compute index into primary_supers array
        juint depth = (tkls->offset() - in_bytes(Klass::primary_supers_offset())) / sizeof(Klass*);
        // Check for overflowing; use unsigned compare to handle the negative case.
        if( depth < ciKlass::primary_super_limit() &&
            depth <= klass->super_depth() ) { // allow self-depth checks to handle self-check case
          // The field is an element of Klass::_primary_supers.  Return its (constant) value.
          // (Folds up type checking code.)
          assert(Opcode() == Op_LoadKlass, "must load a klass from _primary_supers");
          ciKlass *ss = klass->super_of_depth(depth);
          return ss ? TypeKlassPtr::make(ss) : TypePtr::NULL_PTR;
        }
      }
    }

    // If the type is enough to determine that the thing is not an array,
    // we can give the layout_helper a positive interval type.
    // This will help short-circuit some reflective code.
    if (tkls->offset() == in_bytes(Klass::layout_helper_offset())
        && !klass->is_array_klass() // not directly typed as an array
        && !klass->is_interface()  // specifically not Serializable & Cloneable
        && !klass->is_java_lang_Object()   // not the supertype of all T[]
        ) {
      // Note:  When interfaces are reliable, we can narrow the interface
      // test to (klass != Serializable && klass != Cloneable).
      assert(Opcode() == Op_LoadI, "must load an int from _layout_helper");
      jint min_size = Klass::instance_layout_helper(oopDesc::header_size(), false);
      // The key property of this type is that it folds up tests
      // for array-ness, since it proves that the layout_helper is positive.
      // Thus, a generic value like the basic object layout helper works fine.
      return TypeInt::make(min_size, max_jint, Type::WidenMin);
    }
  }

  // If we are loading from a freshly-allocated object, produce a zero,
  // if the load is provably beyond the header of the object.
  // (Also allow a variable load from a fresh array to produce zero.)
  const TypeOopPtr *tinst = tp->isa_oopptr();
  bool is_instance = (tinst != NULL) && tinst->is_known_instance_field();
  bool is_boxed_value = (tinst != NULL) && tinst->is_ptr_to_boxed_value();
  if (ReduceFieldZeroing || is_instance || is_boxed_value) {
    Node* value = can_see_stored_value(mem,phase);
    if (value != NULL && value->is_Con()) {
      assert(value->bottom_type()->higher_equal(_type),"sanity");
      return value->bottom_type();
    }
  }

  if (is_instance) {
    // If we have an instance type and our memory input is the
    // programs's initial memory state, there is no matching store,
    // so just return a zero of the appropriate type
    Node *mem = in(MemNode::Memory);
    if (mem->is_Parm() && mem->in(0)->is_Start()) {
      assert(mem->as_Parm()->_con == TypeFunc::Memory, "must be memory Parm");
      return Type::get_zero_type(_type->basic_type());
    }
  }
  return _type;
}

//------------------------------match_edge-------------------------------------
// Do we Match on this edge index or not?  Match only the address.
uint LoadNode::match_edge(uint idx) const {
  return idx == MemNode::Address;
}

//--------------------------LoadBNode::Ideal--------------------------------------
//
//  If the previous store is to the same address as this load,
//  and the value stored was larger than a byte, replace this load
//  with the value stored truncated to a byte.  If no truncation is
//  needed, the replacement is done in LoadNode::Identity().
//
Node *LoadBNode::Ideal(PhaseGVN *phase, bool can_reshape) {
  Node* mem = in(MemNode::Memory);
  Node* value = can_see_stored_value(mem,phase);
  if( value && !phase->type(value)->higher_equal( _type ) ) {
    Node *result = phase->transform( new (phase->C) LShiftINode(value, phase->intcon(24)) );
    return new (phase->C) RShiftINode(result, phase->intcon(24));
  }
  // Identity call will handle the case where truncation is not needed.
  return LoadNode::Ideal(phase, can_reshape);
}

const Type* LoadBNode::Value(PhaseTransform *phase) const {
  Node* mem = in(MemNode::Memory);
  Node* value = can_see_stored_value(mem,phase);
  if (value != NULL && value->is_Con() &&
      !value->bottom_type()->higher_equal(_type)) {
    // If the input to the store does not fit with the load's result type,
    // it must be truncated. We can't delay until Ideal call since
    // a singleton Value is needed for split_thru_phi optimization.
    int con = value->get_int();
    return TypeInt::make((con << 24) >> 24);
  }
  return LoadNode::Value(phase);
}

//--------------------------LoadUBNode::Ideal-------------------------------------
//
//  If the previous store is to the same address as this load,
//  and the value stored was larger than a byte, replace this load
//  with the value stored truncated to a byte.  If no truncation is
//  needed, the replacement is done in LoadNode::Identity().
//
Node* LoadUBNode::Ideal(PhaseGVN* phase, bool can_reshape) {
  Node* mem = in(MemNode::Memory);
  Node* value = can_see_stored_value(mem, phase);
  if (value && !phase->type(value)->higher_equal(_type))
    return new (phase->C) AndINode(value, phase->intcon(0xFF));
  // Identity call will handle the case where truncation is not needed.
  return LoadNode::Ideal(phase, can_reshape);
}

const Type* LoadUBNode::Value(PhaseTransform *phase) const {
  Node* mem = in(MemNode::Memory);
  Node* value = can_see_stored_value(mem,phase);
  if (value != NULL && value->is_Con() &&
      !value->bottom_type()->higher_equal(_type)) {
    // If the input to the store does not fit with the load's result type,
    // it must be truncated. We can't delay until Ideal call since
    // a singleton Value is needed for split_thru_phi optimization.
    int con = value->get_int();
    return TypeInt::make(con & 0xFF);
  }
  return LoadNode::Value(phase);
}

//--------------------------LoadUSNode::Ideal-------------------------------------
//
//  If the previous store is to the same address as this load,
//  and the value stored was larger than a char, replace this load
//  with the value stored truncated to a char.  If no truncation is
//  needed, the replacement is done in LoadNode::Identity().
//
Node *LoadUSNode::Ideal(PhaseGVN *phase, bool can_reshape) {
  Node* mem = in(MemNode::Memory);
  Node* value = can_see_stored_value(mem,phase);
  if( value && !phase->type(value)->higher_equal( _type ) )
    return new (phase->C) AndINode(value,phase->intcon(0xFFFF));
  // Identity call will handle the case where truncation is not needed.
  return LoadNode::Ideal(phase, can_reshape);
}

const Type* LoadUSNode::Value(PhaseTransform *phase) const {
  Node* mem = in(MemNode::Memory);
  Node* value = can_see_stored_value(mem,phase);
  if (value != NULL && value->is_Con() &&
      !value->bottom_type()->higher_equal(_type)) {
    // If the input to the store does not fit with the load's result type,
    // it must be truncated. We can't delay until Ideal call since
    // a singleton Value is needed for split_thru_phi optimization.
    int con = value->get_int();
    return TypeInt::make(con & 0xFFFF);
  }
  return LoadNode::Value(phase);
}

//--------------------------LoadSNode::Ideal--------------------------------------
//
//  If the previous store is to the same address as this load,
//  and the value stored was larger than a short, replace this load
//  with the value stored truncated to a short.  If no truncation is
//  needed, the replacement is done in LoadNode::Identity().
//
Node *LoadSNode::Ideal(PhaseGVN *phase, bool can_reshape) {
  Node* mem = in(MemNode::Memory);
  Node* value = can_see_stored_value(mem,phase);
  if( value && !phase->type(value)->higher_equal( _type ) ) {
    Node *result = phase->transform( new (phase->C) LShiftINode(value, phase->intcon(16)) );
    return new (phase->C) RShiftINode(result, phase->intcon(16));
  }
  // Identity call will handle the case where truncation is not needed.
  return LoadNode::Ideal(phase, can_reshape);
}

const Type* LoadSNode::Value(PhaseTransform *phase) const {
  Node* mem = in(MemNode::Memory);
  Node* value = can_see_stored_value(mem,phase);
  if (value != NULL && value->is_Con() &&
      !value->bottom_type()->higher_equal(_type)) {
    // If the input to the store does not fit with the load's result type,
    // it must be truncated. We can't delay until Ideal call since
    // a singleton Value is needed for split_thru_phi optimization.
    int con = value->get_int();
    return TypeInt::make((con << 16) >> 16);
  }
  return LoadNode::Value(phase);
}

//=============================================================================
//----------------------------LoadKlassNode::make------------------------------
// Polymorphic factory method:
Node* LoadKlassNode::make(PhaseGVN& gvn, Node* ctl, Node *mem, Node *adr, const TypePtr* at, const TypeKlassPtr *tk) {
  Compile* C = gvn.C;
  // sanity check the alias category against the created node type
  const TypePtr *adr_type = adr->bottom_type()->isa_ptr();
  assert(adr_type != NULL, "expecting TypeKlassPtr");
#ifdef _LP64
  if (adr_type->is_ptr_to_narrowklass()) {
    assert(UseCompressedClassPointers, "no compressed klasses");
    Node* load_klass = gvn.transform(new (C) LoadNKlassNode(ctl, mem, adr, at, tk->make_narrowklass(), MemNode::unordered));
    return new (C) DecodeNKlassNode(load_klass, load_klass->bottom_type()->make_ptr());
  }
#endif
  assert(!adr_type->is_ptr_to_narrowklass() && !adr_type->is_ptr_to_narrowoop(), "should have got back a narrow oop");
  return new (C) LoadKlassNode(ctl, mem, adr, at, tk, MemNode::unordered);
}

//------------------------------Value------------------------------------------
const Type *LoadKlassNode::Value( PhaseTransform *phase ) const {
  return klass_value_common(phase);
}

// In most cases, LoadKlassNode does not have the control input set. If the control
// input is set, it must not be removed (by LoadNode::Ideal()).
bool LoadKlassNode::can_remove_control() const {
  return false;
}

const Type *LoadNode::klass_value_common( PhaseTransform *phase ) const {
  // Either input is TOP ==> the result is TOP
  const Type *t1 = phase->type( in(MemNode::Memory) );
  if (t1 == Type::TOP)  return Type::TOP;
  Node *adr = in(MemNode::Address);
  const Type *t2 = phase->type( adr );
  if (t2 == Type::TOP)  return Type::TOP;
  const TypePtr *tp = t2->is_ptr();
  if (TypePtr::above_centerline(tp->ptr()) ||
      tp->ptr() == TypePtr::Null)  return Type::TOP;

  // Return a more precise klass, if possible
  const TypeInstPtr *tinst = tp->isa_instptr();
  if (tinst != NULL) {
    ciInstanceKlass* ik = tinst->klass()->as_instance_klass();
    int offset = tinst->offset();
    if (ik == phase->C->env()->Class_klass()
        && (offset == java_lang_Class::klass_offset_in_bytes() ||
            offset == java_lang_Class::array_klass_offset_in_bytes())) {
      // We are loading a special hidden field from a Class mirror object,
      // the field which points to the VM's Klass metaobject.
      ciType* t = tinst->java_mirror_type();
      // java_mirror_type returns non-null for compile-time Class constants.
      if (t != NULL) {
        // constant oop => constant klass
        if (offset == java_lang_Class::array_klass_offset_in_bytes()) {
          if (t->is_void()) {
            // We cannot create a void array.  Since void is a primitive type return null
            // klass.  Users of this result need to do a null check on the returned klass.
            return TypePtr::NULL_PTR;
          }
          return TypeKlassPtr::make(ciArrayKlass::make(t));
        }
        if (!t->is_klass()) {
          // a primitive Class (e.g., int.class) has NULL for a klass field
          return TypePtr::NULL_PTR;
        }
        // (Folds up the 1st indirection in aClassConstant.getModifiers().)
        return TypeKlassPtr::make(t->as_klass());
      }
      // non-constant mirror, so we can't tell what's going on
    }
    if( !ik->is_loaded() )
      return _type;             // Bail out if not loaded
    if (offset == oopDesc::klass_offset_in_bytes()) {
      if (tinst->klass_is_exact()) {
        return TypeKlassPtr::make(ik);
      }
      // See if we can become precise: no subklasses and no interface
      // (Note:  We need to support verified interfaces.)
      if (!ik->is_interface() && !ik->has_subklass()) {
        //assert(!UseExactTypes, "this code should be useless with exact types");
        // Add a dependence; if any subclass added we need to recompile
        if (!ik->is_final()) {
          // %%% should use stronger assert_unique_concrete_subtype instead
          phase->C->dependencies()->assert_leaf_type(ik);
        }
        // Return precise klass
        return TypeKlassPtr::make(ik);
      }

      // Return root of possible klass
      return TypeKlassPtr::make(TypePtr::NotNull, ik, 0/*offset*/);
    }
  }

  // Check for loading klass from an array
  const TypeAryPtr *tary = tp->isa_aryptr();
  if( tary != NULL ) {
    ciKlass *tary_klass = tary->klass();
    if (tary_klass != NULL   // can be NULL when at BOTTOM or TOP
        && tary->offset() == oopDesc::klass_offset_in_bytes()) {
      if (tary->klass_is_exact()) {
        return TypeKlassPtr::make(tary_klass);
      }
      ciArrayKlass *ak = tary->klass()->as_array_klass();
      // If the klass is an object array, we defer the question to the
      // array component klass.
      if( ak->is_obj_array_klass() ) {
        assert( ak->is_loaded(), "" );
        ciKlass *base_k = ak->as_obj_array_klass()->base_element_klass();
        if( base_k->is_loaded() && base_k->is_instance_klass() ) {
          ciInstanceKlass* ik = base_k->as_instance_klass();
          // See if we can become precise: no subklasses and no interface
          if (!ik->is_interface() && !ik->has_subklass()) {
            //assert(!UseExactTypes, "this code should be useless with exact types");
            // Add a dependence; if any subclass added we need to recompile
            if (!ik->is_final()) {
              phase->C->dependencies()->assert_leaf_type(ik);
            }
            // Return precise array klass
            return TypeKlassPtr::make(ak);
          }
        }
        return TypeKlassPtr::make(TypePtr::NotNull, ak, 0/*offset*/);
      } else {                  // Found a type-array?
        //assert(!UseExactTypes, "this code should be useless with exact types");
        assert( ak->is_type_array_klass(), "" );
        return TypeKlassPtr::make(ak); // These are always precise
      }
    }
  }

  // Check for loading klass from an array klass
  const TypeKlassPtr *tkls = tp->isa_klassptr();
  if (tkls != NULL && !StressReflectiveCode) {
    ciKlass* klass = tkls->klass();
    if( !klass->is_loaded() )
      return _type;             // Bail out if not loaded
    if( klass->is_obj_array_klass() &&
        tkls->offset() == in_bytes(ObjArrayKlass::element_klass_offset())) {
      ciKlass* elem = klass->as_obj_array_klass()->element_klass();
      // // Always returning precise element type is incorrect,
      // // e.g., element type could be object and array may contain strings
      // return TypeKlassPtr::make(TypePtr::Constant, elem, 0);

      // The array's TypeKlassPtr was declared 'precise' or 'not precise'
      // according to the element type's subclassing.
      return TypeKlassPtr::make(tkls->ptr(), elem, 0/*offset*/);
    }
    if( klass->is_instance_klass() && tkls->klass_is_exact() &&
        tkls->offset() == in_bytes(Klass::super_offset())) {
      ciKlass* sup = klass->as_instance_klass()->super();
      // The field is Klass::_super.  Return its (constant) value.
      // (Folds up the 2nd indirection in aClassConstant.getSuperClass().)
      return sup ? TypeKlassPtr::make(sup) : TypePtr::NULL_PTR;
    }
  }

  // Bailout case
  return LoadNode::Value(phase);
}

//------------------------------Identity---------------------------------------
// To clean up reflective code, simplify k.java_mirror.as_klass to plain k.
// Also feed through the klass in Allocate(...klass...)._klass.
Node* LoadKlassNode::Identity( PhaseTransform *phase ) {
  return klass_identity_common(phase);
}

Node* LoadNode::klass_identity_common(PhaseTransform *phase ) {
  Node* x = LoadNode::Identity(phase);
  if (x != this)  return x;

  // Take apart the address into an oop and and offset.
  // Return 'this' if we cannot.
  Node*    adr    = in(MemNode::Address);
  intptr_t offset = 0;
  Node*    base   = AddPNode::Ideal_base_and_offset(adr, phase, offset);
  if (base == NULL)     return this;
  const TypeOopPtr* toop = phase->type(adr)->isa_oopptr();
  if (toop == NULL)     return this;

  // We can fetch the klass directly through an AllocateNode.
  // This works even if the klass is not constant (clone or newArray).
  if (offset == oopDesc::klass_offset_in_bytes()) {
    Node* allocated_klass = AllocateNode::Ideal_klass(base, phase);
    if (allocated_klass != NULL) {
      return allocated_klass;
    }
  }

  // Simplify k.java_mirror.as_klass to plain k, where k is a Klass*.
  // Simplify ak.component_mirror.array_klass to plain ak, ak an ArrayKlass.
  // See inline_native_Class_query for occurrences of these patterns.
  // Java Example:  x.getClass().isAssignableFrom(y)
  // Java Example:  Array.newInstance(x.getClass().getComponentType(), n)
  //
  // This improves reflective code, often making the Class
  // mirror go completely dead.  (Current exception:  Class
  // mirrors may appear in debug info, but we could clean them out by
  // introducing a new debug info operator for Klass*.java_mirror).
  if (toop->isa_instptr() && toop->klass() == phase->C->env()->Class_klass()
      && (offset == java_lang_Class::klass_offset_in_bytes() ||
          offset == java_lang_Class::array_klass_offset_in_bytes())) {
    // We are loading a special hidden field from a Class mirror,
    // the field which points to its Klass or ArrayKlass metaobject.
    if (base->is_Load()) {
      Node* adr2 = base->in(MemNode::Address);
      const TypeKlassPtr* tkls = phase->type(adr2)->isa_klassptr();
      if (tkls != NULL && !tkls->empty()
          && (tkls->klass()->is_instance_klass() ||
              tkls->klass()->is_array_klass())
          && adr2->is_AddP()
          ) {
        int mirror_field = in_bytes(Klass::java_mirror_offset());
        if (offset == java_lang_Class::array_klass_offset_in_bytes()) {
          mirror_field = in_bytes(ArrayKlass::component_mirror_offset());
        }
        if (tkls->offset() == mirror_field) {
          return adr2->in(AddPNode::Base);
        }
      }
    }
  }

  return this;
}


//------------------------------Value------------------------------------------
const Type *LoadNKlassNode::Value( PhaseTransform *phase ) const {
  const Type *t = klass_value_common(phase);
  if (t == Type::TOP)
    return t;

  return t->make_narrowklass();
}

//------------------------------Identity---------------------------------------
// To clean up reflective code, simplify k.java_mirror.as_klass to narrow k.
// Also feed through the klass in Allocate(...klass...)._klass.
Node* LoadNKlassNode::Identity( PhaseTransform *phase ) {
  Node *x = klass_identity_common(phase);

  const Type *t = phase->type( x );
  if( t == Type::TOP ) return x;
  if( t->isa_narrowklass()) return x;
  assert (!t->isa_narrowoop(), "no narrow oop here");

  return phase->transform(new (phase->C) EncodePKlassNode(x, t->make_narrowklass()));
}

//------------------------------Value-----------------------------------------
const Type *LoadRangeNode::Value( PhaseTransform *phase ) const {
  // Either input is TOP ==> the result is TOP
  const Type *t1 = phase->type( in(MemNode::Memory) );
  if( t1 == Type::TOP ) return Type::TOP;
  Node *adr = in(MemNode::Address);
  const Type *t2 = phase->type( adr );
  if( t2 == Type::TOP ) return Type::TOP;
  const TypePtr *tp = t2->is_ptr();
  if (TypePtr::above_centerline(tp->ptr()))  return Type::TOP;
  const TypeAryPtr *tap = tp->isa_aryptr();
  if( !tap ) return _type;
  return tap->size();
}

//-------------------------------Ideal---------------------------------------
// Feed through the length in AllocateArray(...length...)._length.
Node *LoadRangeNode::Ideal(PhaseGVN *phase, bool can_reshape) {
  Node* p = MemNode::Ideal_common(phase, can_reshape);
  if (p)  return (p == NodeSentinel) ? NULL : p;

  // Take apart the address into an oop and and offset.
  // Return 'this' if we cannot.
  Node*    adr    = in(MemNode::Address);
  intptr_t offset = 0;
  Node*    base   = AddPNode::Ideal_base_and_offset(adr, phase,  offset);
  if (base == NULL)     return NULL;
  const TypeAryPtr* tary = phase->type(adr)->isa_aryptr();
  if (tary == NULL)     return NULL;

  // We can fetch the length directly through an AllocateArrayNode.
  // This works even if the length is not constant (clone or newArray).
  if (offset == arrayOopDesc::length_offset_in_bytes()) {
    AllocateArrayNode* alloc = AllocateArrayNode::Ideal_array_allocation(base, phase);
    if (alloc != NULL) {
      Node* allocated_length = alloc->Ideal_length();
      Node* len = alloc->make_ideal_length(tary, phase);
      if (allocated_length != len) {
        // New CastII improves on this.
        return len;
      }
    }
  }

  return NULL;
}

//------------------------------Identity---------------------------------------
// Feed through the length in AllocateArray(...length...)._length.
Node* LoadRangeNode::Identity( PhaseTransform *phase ) {
  Node* x = LoadINode::Identity(phase);
  if (x != this)  return x;

  // Take apart the address into an oop and and offset.
  // Return 'this' if we cannot.
  Node*    adr    = in(MemNode::Address);
  intptr_t offset = 0;
  Node*    base   = AddPNode::Ideal_base_and_offset(adr, phase, offset);
  if (base == NULL)     return this;
  const TypeAryPtr* tary = phase->type(adr)->isa_aryptr();
  if (tary == NULL)     return this;

  // We can fetch the length directly through an AllocateArrayNode.
  // This works even if the length is not constant (clone or newArray).
  if (offset == arrayOopDesc::length_offset_in_bytes()) {
    AllocateArrayNode* alloc = AllocateArrayNode::Ideal_array_allocation(base, phase);
    if (alloc != NULL) {
      Node* allocated_length = alloc->Ideal_length();
      // Do not allow make_ideal_length to allocate a CastII node.
      Node* len = alloc->make_ideal_length(tary, phase, false);
      if (allocated_length == len) {
        // Return allocated_length only if it would not be improved by a CastII.
        return allocated_length;
      }
    }
  }

  return this;

}

//=============================================================================
//---------------------------StoreNode::make-----------------------------------
// Polymorphic factory method:
StoreNode* StoreNode::make(PhaseGVN& gvn, Node* ctl, Node* mem, Node* adr, const TypePtr* adr_type, Node* val, BasicType bt, MemOrd mo) {
  assert((mo == unordered || mo == release), "unexpected");
  Compile* C = gvn.C;
  assert(C->get_alias_index(adr_type) != Compile::AliasIdxRaw ||
         ctl != NULL, "raw memory operations should have control edge");

  switch (bt) {
  case T_BOOLEAN: val = gvn.transform(new (C) AndINode(val, gvn.intcon(0x1))); // Fall through to T_BYTE case
  case T_BYTE:    return new (C) StoreBNode(ctl, mem, adr, adr_type, val, mo);
  case T_INT:     return new (C) StoreINode(ctl, mem, adr, adr_type, val, mo);
  case T_CHAR:
  case T_SHORT:   return new (C) StoreCNode(ctl, mem, adr, adr_type, val, mo);
  case T_LONG:    return new (C) StoreLNode(ctl, mem, adr, adr_type, val, mo);
  case T_FLOAT:   return new (C) StoreFNode(ctl, mem, adr, adr_type, val, mo);
  case T_DOUBLE:  return new (C) StoreDNode(ctl, mem, adr, adr_type, val, mo);
  case T_METADATA:
  case T_ADDRESS:
  case T_OBJECT:
#ifdef _LP64
    if (adr->bottom_type()->is_ptr_to_narrowoop()) {
      val = gvn.transform(new (C) EncodePNode(val, val->bottom_type()->make_narrowoop()));
      return new (C) StoreNNode(ctl, mem, adr, adr_type, val, mo);
    } else if (adr->bottom_type()->is_ptr_to_narrowklass() ||
               (UseCompressedClassPointers && val->bottom_type()->isa_klassptr() &&
                adr->bottom_type()->isa_rawptr())) {
      val = gvn.transform(new (C) EncodePKlassNode(val, val->bottom_type()->make_narrowklass()));
      return new (C) StoreNKlassNode(ctl, mem, adr, adr_type, val, mo);
    }
#endif
    {
      return new (C) StorePNode(ctl, mem, adr, adr_type, val, mo);
    }
  }
  ShouldNotReachHere();
  return (StoreNode*)NULL;
}

StoreLNode* StoreLNode::make_atomic(Compile *C, Node* ctl, Node* mem, Node* adr, const TypePtr* adr_type, Node* val, MemOrd mo) {
  bool require_atomic = true;
  return new (C) StoreLNode(ctl, mem, adr, adr_type, val, mo, require_atomic);
}

StoreDNode* StoreDNode::make_atomic(Compile *C, Node* ctl, Node* mem, Node* adr, const TypePtr* adr_type, Node* val, MemOrd mo) {
  bool require_atomic = true;
  return new (C) StoreDNode(ctl, mem, adr, adr_type, val, mo, require_atomic);
}


//--------------------------bottom_type----------------------------------------
const Type *StoreNode::bottom_type() const {
  return Type::MEMORY;
}

//------------------------------hash-------------------------------------------
uint StoreNode::hash() const {
  // unroll addition of interesting fields
  //return (uintptr_t)in(Control) + (uintptr_t)in(Memory) + (uintptr_t)in(Address) + (uintptr_t)in(ValueIn);

  // Since they are not commoned, do not hash them:
  return NO_HASH;
}

//------------------------------Ideal------------------------------------------
// Change back-to-back Store(, p, x) -> Store(m, p, y) to Store(m, p, x).
// When a store immediately follows a relevant allocation/initialization,
// try to capture it into the initialization, or hoist it above.
Node *StoreNode::Ideal(PhaseGVN *phase, bool can_reshape) {
  Node* p = MemNode::Ideal_common(phase, can_reshape);
  if (p)  return (p == NodeSentinel) ? NULL : p;

  Node* mem     = in(MemNode::Memory);
  Node* address = in(MemNode::Address);

  // Back-to-back stores to same address?  Fold em up.  Generally
  // unsafe if I have intervening uses...  Also disallowed for StoreCM
  // since they must follow each StoreP operation.  Redundant StoreCMs
  // are eliminated just before matching in final_graph_reshape.
  if (mem->is_Store() && mem->in(MemNode::Address)->eqv_uncast(address) &&
      mem->Opcode() != Op_StoreCM) {
    // Looking at a dead closed cycle of memory?
    assert(mem != mem->in(MemNode::Memory), "dead loop in StoreNode::Ideal");

    assert(Opcode() == mem->Opcode() ||
           phase->C->get_alias_index(adr_type()) == Compile::AliasIdxRaw ||
           (is_mismatched_access() || mem->as_Store()->is_mismatched_access()),
           "no mismatched stores, except on raw memory");

    if (mem->outcnt() == 1 &&           // check for intervening uses
        mem->as_Store()->memory_size() <= this->memory_size()) {
      // If anybody other than 'this' uses 'mem', we cannot fold 'mem' away.
      // For example, 'mem' might be the final state at a conditional return.
      // Or, 'mem' might be used by some node which is live at the same time
      // 'this' is live, which might be unschedulable.  So, require exactly
      // ONE user, the 'this' store, until such time as we clone 'mem' for
      // each of 'mem's uses (thus making the exactly-1-user-rule hold true).
      if (can_reshape) {  // (%%% is this an anachronism?)
        set_req_X(MemNode::Memory, mem->in(MemNode::Memory),
                  phase->is_IterGVN());
      } else {
        // It's OK to do this in the parser, since DU info is always accurate,
        // and the parser always refers to nodes via SafePointNode maps.
        set_req(MemNode::Memory, mem->in(MemNode::Memory));
      }
      return this;
    }
  }

  // Capture an unaliased, unconditional, simple store into an initializer.
  // Or, if it is independent of the allocation, hoist it above the allocation.
  if (ReduceFieldZeroing && /*can_reshape &&*/
      mem->is_Proj() && mem->in(0)->is_Initialize()) {
    InitializeNode* init = mem->in(0)->as_Initialize();
    intptr_t offset = init->can_capture_store(this, phase, can_reshape);
    if (offset > 0) {
      Node* moved = init->capture_store(this, offset, phase, can_reshape);
      // If the InitializeNode captured me, it made a raw copy of me,
      // and I need to disappear.
      if (moved != NULL) {
        // %%% hack to ensure that Ideal returns a new node:
        mem = MergeMemNode::make(phase->C, mem);
        return mem;             // fold me away
      }
    }
  }

  return NULL;                  // No further progress
}

//------------------------------Value-----------------------------------------
const Type *StoreNode::Value( PhaseTransform *phase ) const {
  // Either input is TOP ==> the result is TOP
  const Type *t1 = phase->type( in(MemNode::Memory) );
  if( t1 == Type::TOP ) return Type::TOP;
  const Type *t2 = phase->type( in(MemNode::Address) );
  if( t2 == Type::TOP ) return Type::TOP;
  const Type *t3 = phase->type( in(MemNode::ValueIn) );
  if( t3 == Type::TOP ) return Type::TOP;
  return Type::MEMORY;
}

//------------------------------Identity---------------------------------------
// Remove redundant stores:
//   Store(m, p, Load(m, p)) changes to m.
//   Store(, p, x) -> Store(m, p, x) changes to Store(m, p, x).
Node *StoreNode::Identity( PhaseTransform *phase ) {
  Node* mem = in(MemNode::Memory);
  Node* adr = in(MemNode::Address);
  Node* val = in(MemNode::ValueIn);

  Node* result = this;

  // Load then Store?  Then the Store is useless
  if (val->is_Load() &&
      val->in(MemNode::Address)->eqv_uncast(adr) &&
      val->in(MemNode::Memory )->eqv_uncast(mem) &&
      val->as_Load()->store_Opcode() == Opcode()) {
    result = mem;
  }

  // Two stores in a row of the same value?
  if (mem->is_Store() &&
      mem->in(MemNode::Address)->eqv_uncast(adr) &&
      mem->in(MemNode::ValueIn)->eqv_uncast(val) &&
      mem->Opcode() == Opcode()) {
    result = mem;
  }

  // Store of zero anywhere into a freshly-allocated object?
  // Then the store is useless.
  // (It must already have been captured by the InitializeNode.)
  if (result == this &&
      ReduceFieldZeroing && phase->type(val)->is_zero_type()) {
    // a newly allocated object is already all-zeroes everywhere
    if (mem->is_Proj() && mem->in(0)->is_Allocate()) {
      result = mem;
    }

    if (result == this) {
      // the store may also apply to zero-bits in an earlier object
      Node* prev_mem = find_previous_store(phase);
      // Steps (a), (b):  Walk past independent stores to find an exact match.
      if (prev_mem != NULL) {
        Node* prev_val = can_see_stored_value(prev_mem, phase);
        if (prev_val != NULL && phase->eqv(prev_val, val)) {
          // prev_val and val might differ by a cast; it would be good
          // to keep the more informative of the two.
          result = mem;
        }
      }
    }
  }

  if (result != this && phase->is_IterGVN() != NULL) {
    MemBarNode* trailing = trailing_membar();
    if (trailing != NULL) {
#ifdef ASSERT
      const TypeOopPtr* t_oop = phase->type(in(Address))->isa_oopptr();
      assert(t_oop == NULL || t_oop->is_known_instance_field(), "only for non escaping objects");
#endif
      PhaseIterGVN* igvn = phase->is_IterGVN();
      trailing->remove(igvn);
    }
  }

  return result;
}

//------------------------------match_edge-------------------------------------
// Do we Match on this edge index or not?  Match only memory & value
uint StoreNode::match_edge(uint idx) const {
  return idx == MemNode::Address || idx == MemNode::ValueIn;
}

//------------------------------cmp--------------------------------------------
// Do not common stores up together.  They generally have to be split
// back up anyways, so do not bother.
uint StoreNode::cmp( const Node &n ) const {
  return (&n == this);          // Always fail except on self
}

//------------------------------Ideal_masked_input-----------------------------
// Check for a useless mask before a partial-word store
// (StoreB ... (AndI valIn conIa) )
// If (conIa & mask == mask) this simplifies to
// (StoreB ... (valIn) )
Node *StoreNode::Ideal_masked_input(PhaseGVN *phase, uint mask) {
  Node *val = in(MemNode::ValueIn);
  if( val->Opcode() == Op_AndI ) {
    const TypeInt *t = phase->type( val->in(2) )->isa_int();
    if( t && t->is_con() && (t->get_con() & mask) == mask ) {
      set_req(MemNode::ValueIn, val->in(1));
      return this;
    }
  }
  return NULL;
}


//------------------------------Ideal_sign_extended_input----------------------
// Check for useless sign-extension before a partial-word store
// (StoreB ... (RShiftI _ (LShiftI _ valIn conIL ) conIR) )
// If (conIL == conIR && conIR <= num_bits)  this simplifies to
// (StoreB ... (valIn) )
Node *StoreNode::Ideal_sign_extended_input(PhaseGVN *phase, int num_bits) {
  Node *val = in(MemNode::ValueIn);
  if( val->Opcode() == Op_RShiftI ) {
    const TypeInt *t = phase->type( val->in(2) )->isa_int();
    if( t && t->is_con() && (t->get_con() <= num_bits) ) {
      Node *shl = val->in(1);
      if( shl->Opcode() == Op_LShiftI ) {
        const TypeInt *t2 = phase->type( shl->in(2) )->isa_int();
        if( t2 && t2->is_con() && (t2->get_con() == t->get_con()) ) {
          set_req(MemNode::ValueIn, shl->in(1));
          return this;
        }
      }
    }
  }
  return NULL;
}

//------------------------------value_never_loaded-----------------------------------
// Determine whether there are any possible loads of the value stored.
// For simplicity, we actually check if there are any loads from the
// address stored to, not just for loads of the value stored by this node.
//
bool StoreNode::value_never_loaded( PhaseTransform *phase) const {
  Node *adr = in(Address);
  const TypeOopPtr *adr_oop = phase->type(adr)->isa_oopptr();
  if (adr_oop == NULL)
    return false;
  if (!adr_oop->is_known_instance_field())
    return false; // if not a distinct instance, there may be aliases of the address
  for (DUIterator_Fast imax, i = adr->fast_outs(imax); i < imax; i++) {
    Node *use = adr->fast_out(i);
    int opc = use->Opcode();
    if (use->is_Load() || use->is_LoadStore()) {
      return false;
    }
  }
  return true;
}

MemBarNode* StoreNode::trailing_membar() const {
  if (is_release()) {
    MemBarNode* trailing_mb = NULL;
    for (DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++) {
      Node* u = fast_out(i);
      if (u->is_MemBar()) {
        if (u->as_MemBar()->trailing_store()) {
          assert(u->Opcode() == Op_MemBarVolatile, "");
          assert(trailing_mb == NULL, "only one");
          trailing_mb = u->as_MemBar();
#ifdef ASSERT
          Node* leading = u->as_MemBar()->leading_membar();
          assert(leading->Opcode() == Op_MemBarRelease, "incorrect membar");
          assert(leading->as_MemBar()->leading_store(), "incorrect membar pair");
          assert(leading->as_MemBar()->trailing_membar() == u, "incorrect membar pair");
#endif
        } else {
          assert(u->as_MemBar()->standalone(), "");
        }
      }
    }
    return trailing_mb;
  }
  return NULL;
}

//=============================================================================
//------------------------------Ideal------------------------------------------
// If the store is from an AND mask that leaves the low bits untouched, then
// we can skip the AND operation.  If the store is from a sign-extension
// (a left shift, then right shift) we can skip both.
Node *StoreBNode::Ideal(PhaseGVN *phase, bool can_reshape){
  Node *progress = StoreNode::Ideal_masked_input(phase, 0xFF);
  if( progress != NULL ) return progress;

  progress = StoreNode::Ideal_sign_extended_input(phase, 24);
  if( progress != NULL ) return progress;

  // Finally check the default case
  return StoreNode::Ideal(phase, can_reshape);
}

//=============================================================================
//------------------------------Ideal------------------------------------------
// If the store is from an AND mask that leaves the low bits untouched, then
// we can skip the AND operation
Node *StoreCNode::Ideal(PhaseGVN *phase, bool can_reshape){
  Node *progress = StoreNode::Ideal_masked_input(phase, 0xFFFF);
  if( progress != NULL ) return progress;

  progress = StoreNode::Ideal_sign_extended_input(phase, 16);
  if( progress != NULL ) return progress;

  // Finally check the default case
  return StoreNode::Ideal(phase, can_reshape);
}

//=============================================================================
//------------------------------Identity---------------------------------------
Node *StoreCMNode::Identity( PhaseTransform *phase ) {
  // No need to card mark when storing a null ptr
  Node* my_store = in(MemNode::OopStore);
  if (my_store->is_Store()) {
    const Type *t1 = phase->type( my_store->in(MemNode::ValueIn) );
    if( t1 == TypePtr::NULL_PTR ) {
      return in(MemNode::Memory);
    }
  }
  return this;
}

//=============================================================================
//------------------------------Ideal---------------------------------------
Node *StoreCMNode::Ideal(PhaseGVN *phase, bool can_reshape){
  Node* progress = StoreNode::Ideal(phase, can_reshape);
  if (progress != NULL) return progress;

  Node* my_store = in(MemNode::OopStore);
  if (my_store->is_MergeMem()) {
    Node* mem = my_store->as_MergeMem()->memory_at(oop_alias_idx());
    set_req(MemNode::OopStore, mem);
    return this;
  }

  return NULL;
}

//------------------------------Value-----------------------------------------
const Type *StoreCMNode::Value( PhaseTransform *phase ) const {
  // Either input is TOP ==> the result is TOP
  const Type *t = phase->type( in(MemNode::Memory) );
  if( t == Type::TOP ) return Type::TOP;
  t = phase->type( in(MemNode::Address) );
  if( t == Type::TOP ) return Type::TOP;
  t = phase->type( in(MemNode::ValueIn) );
  if( t == Type::TOP ) return Type::TOP;
  // If extra input is TOP ==> the result is TOP
  t = phase->type( in(MemNode::OopStore) );
  if( t == Type::TOP ) return Type::TOP;

  return StoreNode::Value( phase );
}


//=============================================================================
//----------------------------------SCMemProjNode------------------------------
const Type * SCMemProjNode::Value( PhaseTransform *phase ) const
{
  return bottom_type();
}

//=============================================================================
//----------------------------------LoadStoreNode------------------------------
LoadStoreNode::LoadStoreNode( Node *c, Node *mem, Node *adr, Node *val, const TypePtr* at, const Type* rt, uint required )
  : Node(required),
    _type(rt),
    _adr_type(at)
{
  init_req(MemNode::Control, c  );
  init_req(MemNode::Memory , mem);
  init_req(MemNode::Address, adr);
  init_req(MemNode::ValueIn, val);
  init_class_id(Class_LoadStore);
}

uint LoadStoreNode::ideal_reg() const {
  return _type->ideal_reg();
}

bool LoadStoreNode::result_not_used() const {
  for( DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++ ) {
    Node *x = fast_out(i);
    if (x->Opcode() == Op_SCMemProj) continue;
    return false;
  }
  return true;
}

MemBarNode* LoadStoreNode::trailing_membar() const {
  MemBarNode* trailing = NULL;
  for (DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++) {
    Node* u = fast_out(i);
    if (u->is_MemBar()) {
      if (u->as_MemBar()->trailing_load_store()) {
        assert(u->Opcode() == Op_MemBarAcquire, "");
        assert(trailing == NULL, "only one");
        trailing = u->as_MemBar();
#ifdef ASSERT
        Node* leading = trailing->leading_membar();
        assert(support_IRIW_for_not_multiple_copy_atomic_cpu || leading->Opcode() == Op_MemBarRelease, "incorrect membar");
        assert(leading->as_MemBar()->leading_load_store(), "incorrect membar pair");
        assert(leading->as_MemBar()->trailing_membar() == trailing, "incorrect membar pair");
#endif
      } else {
        assert(u->as_MemBar()->standalone(), "wrong barrier kind");
      }
    }
  }

  return trailing;
}

uint LoadStoreNode::size_of() const { return sizeof(*this); }

//=============================================================================
//----------------------------------LoadStoreConditionalNode--------------------
LoadStoreConditionalNode::LoadStoreConditionalNode( Node *c, Node *mem, Node *adr, Node *val, Node *ex ) : LoadStoreNode(c, mem, adr, val, NULL, TypeInt::BOOL, 5) {
  init_req(ExpectedIn, ex );
}

//=============================================================================
//-------------------------------adr_type--------------------------------------
// Do we Match on this edge index or not?  Do not match memory
const TypePtr* ClearArrayNode::adr_type() const {
  Node *adr = in(3);
  return MemNode::calculate_adr_type(adr->bottom_type());
}

//------------------------------match_edge-------------------------------------
// Do we Match on this edge index or not?  Do not match memory
uint ClearArrayNode::match_edge(uint idx) const {
  return idx > 1;
}

//------------------------------Identity---------------------------------------
// Clearing a zero length array does nothing
Node *ClearArrayNode::Identity( PhaseTransform *phase ) {
  return phase->type(in(2))->higher_equal(TypeX::ZERO)  ? in(1) : this;
}

//------------------------------Idealize---------------------------------------
// Clearing a short array is faster with stores
Node *ClearArrayNode::Ideal(PhaseGVN *phase, bool can_reshape){
  const int unit = BytesPerLong;
  const TypeX* t = phase->type(in(2))->isa_intptr_t();
  if (!t)  return NULL;
  if (!t->is_con())  return NULL;
  intptr_t raw_count = t->get_con();
  intptr_t size = raw_count;
  if (!Matcher::init_array_count_is_in_bytes) size *= unit;
  // Clearing nothing uses the Identity call.
  // Negative clears are possible on dead ClearArrays
  // (see jck test stmt114.stmt11402.val).
  if (size <= 0 || size % unit != 0)  return NULL;
  intptr_t count = size / unit;
  // Length too long; use fast hardware clear
  if (size > Matcher::init_array_short_size)  return NULL;
  Node *mem = in(1);
  if( phase->type(mem)==Type::TOP ) return NULL;
  Node *adr = in(3);
  const Type* at = phase->type(adr);
  if( at==Type::TOP ) return NULL;
  const TypePtr* atp = at->isa_ptr();
  // adjust atp to be the correct array element address type
  if (atp == NULL)  atp = TypePtr::BOTTOM;
  else              atp = atp->add_offset(Type::OffsetBot);
  // Get base for derived pointer purposes
  if( adr->Opcode() != Op_AddP ) Unimplemented();
  Node *base = adr->in(1);

  Node *zero = phase->makecon(TypeLong::ZERO);
  Node *off  = phase->MakeConX(BytesPerLong);
  mem = new (phase->C) StoreLNode(in(0),mem,adr,atp,zero,MemNode::unordered,false);
  count--;
  while( count-- ) {
    mem = phase->transform(mem);
    adr = phase->transform(new (phase->C) AddPNode(base,adr,off));
    mem = new (phase->C) StoreLNode(in(0),mem,adr,atp,zero,MemNode::unordered,false);
  }
  return mem;
}

//----------------------------step_through----------------------------------
// Return allocation input memory edge if it is different instance
// or itself if it is the one we are looking for.
bool ClearArrayNode::step_through(Node** np, uint instance_id, PhaseTransform* phase) {
  Node* n = *np;
  assert(n->is_ClearArray(), "sanity");
  intptr_t offset;
  AllocateNode* alloc = AllocateNode::Ideal_allocation(n->in(3), phase, offset);
  // This method is called only before Allocate nodes are expanded during
  // macro nodes expansion. Before that ClearArray nodes are only generated
  // in LibraryCallKit::generate_arraycopy() which follows allocations.
  assert(alloc != NULL, "should have allocation");
  if (alloc->_idx == instance_id) {
    // Can not bypass initialization of the instance we are looking for.
    return false;
  }
  // Otherwise skip it.
  InitializeNode* init = alloc->initialization();
  if (init != NULL)
    *np = init->in(TypeFunc::Memory);
  else
    *np = alloc->in(TypeFunc::Memory);
  return true;
}

//----------------------------clear_memory-------------------------------------
// Generate code to initialize object storage to zero.
Node* ClearArrayNode::clear_memory(Node* ctl, Node* mem, Node* dest,
                                   intptr_t start_offset,
                                   Node* end_offset,
                                   PhaseGVN* phase) {
  Compile* C = phase->C;
  intptr_t offset = start_offset;

  int unit = BytesPerLong;
  if ((offset % unit) != 0) {
    Node* adr = new (C) AddPNode(dest, dest, phase->MakeConX(offset));
    adr = phase->transform(adr);
    const TypePtr* atp = TypeRawPtr::BOTTOM;
    mem = StoreNode::make(*phase, ctl, mem, adr, atp, phase->zerocon(T_INT), T_INT, MemNode::unordered);
    mem = phase->transform(mem);
    offset += BytesPerInt;
  }
  assert((offset % unit) == 0, "");

  // Initialize the remaining stuff, if any, with a ClearArray.
  return clear_memory(ctl, mem, dest, phase->MakeConX(offset), end_offset, phase);
}

Node* ClearArrayNode::clear_memory(Node* ctl, Node* mem, Node* dest,
                                   Node* start_offset,
                                   Node* end_offset,
                                   PhaseGVN* phase) {
  if (start_offset == end_offset) {
    // nothing to do
    return mem;
  }

  Compile* C = phase->C;
  int unit = BytesPerLong;
  Node* zbase = start_offset;
  Node* zend  = end_offset;

  // Scale to the unit required by the CPU:
  if (!Matcher::init_array_count_is_in_bytes) {
    Node* shift = phase->intcon(exact_log2(unit));
    zbase = phase->transform( new(C) URShiftXNode(zbase, shift) );
    zend  = phase->transform( new(C) URShiftXNode(zend,  shift) );
  }

  // Bulk clear double-words
  Node* zsize = phase->transform( new(C) SubXNode(zend, zbase) );
  Node* adr = phase->transform( new(C) AddPNode(dest, dest, start_offset) );
  mem = new (C) ClearArrayNode(ctl, mem, zsize, adr);
  return phase->transform(mem);
}

Node* ClearArrayNode::clear_memory(Node* ctl, Node* mem, Node* dest,
                                   intptr_t start_offset,
                                   intptr_t end_offset,
                                   PhaseGVN* phase) {
  if (start_offset == end_offset) {
    // nothing to do
    return mem;
  }

  Compile* C = phase->C;
  assert((end_offset % BytesPerInt) == 0, "odd end offset");
  intptr_t done_offset = end_offset;
  if ((done_offset % BytesPerLong) != 0) {
    done_offset -= BytesPerInt;
  }
  if (done_offset > start_offset) {
    mem = clear_memory(ctl, mem, dest,
                       start_offset, phase->MakeConX(done_offset), phase);
  }
  if (done_offset < end_offset) { // emit the final 32-bit store
    Node* adr = new (C) AddPNode(dest, dest, phase->MakeConX(done_offset));
    adr = phase->transform(adr);
    const TypePtr* atp = TypeRawPtr::BOTTOM;
    mem = StoreNode::make(*phase, ctl, mem, adr, atp, phase->zerocon(T_INT), T_INT, MemNode::unordered);
    mem = phase->transform(mem);
    done_offset += BytesPerInt;
  }
  assert(done_offset == end_offset, "");
  return mem;
}

//=============================================================================
// Do not match memory edge.
uint StrIntrinsicNode::match_edge(uint idx) const {
  return idx == 2 || idx == 3;
}

//------------------------------Ideal------------------------------------------
// Return a node which is more "ideal" than the current node.  Strip out
// control copies
Node *StrIntrinsicNode::Ideal(PhaseGVN *phase, bool can_reshape) {
  if (remove_dead_region(phase, can_reshape)) return this;
  // Don't bother trying to transform a dead node
  if (in(0) && in(0)->is_top())  return NULL;

  if (can_reshape) {
    Node* mem = phase->transform(in(MemNode::Memory));
    // If transformed to a MergeMem, get the desired slice
    uint alias_idx = phase->C->get_alias_index(adr_type());
    mem = mem->is_MergeMem() ? mem->as_MergeMem()->memory_at(alias_idx) : mem;
    if (mem != in(MemNode::Memory)) {
      set_req(MemNode::Memory, mem);
      return this;
    }
  }
  return NULL;
}

//------------------------------Value------------------------------------------
const Type *StrIntrinsicNode::Value( PhaseTransform *phase ) const {
  if (in(0) && phase->type(in(0)) == Type::TOP) return Type::TOP;
  return bottom_type();
}

//=============================================================================
//------------------------------match_edge-------------------------------------
// Do not match memory edge
uint EncodeISOArrayNode::match_edge(uint idx) const {
  return idx == 2 || idx == 3; // EncodeISOArray src (Binary dst len)
}

//------------------------------Ideal------------------------------------------
// Return a node which is more "ideal" than the current node.  Strip out
// control copies
Node *EncodeISOArrayNode::Ideal(PhaseGVN *phase, bool can_reshape) {
  return remove_dead_region(phase, can_reshape) ? this : NULL;
}

//------------------------------Value------------------------------------------
const Type *EncodeISOArrayNode::Value(PhaseTransform *phase) const {
  if (in(0) && phase->type(in(0)) == Type::TOP) return Type::TOP;
  return bottom_type();
}

//=============================================================================
MemBarNode::MemBarNode(Compile* C, int alias_idx, Node* precedent)
  : MultiNode(TypeFunc::Parms + (precedent == NULL? 0: 1)),
  _adr_type(C->get_adr_type(alias_idx)), _kind(Standalone)
#ifdef ASSERT
  , _pair_idx(0)
#endif
{
  init_class_id(Class_MemBar);
  Node* top = C->top();
  init_req(TypeFunc::I_O,top);
  init_req(TypeFunc::FramePtr,top);
  init_req(TypeFunc::ReturnAdr,top);
  if (precedent != NULL)
    init_req(TypeFunc::Parms, precedent);
}

//------------------------------cmp--------------------------------------------
uint MemBarNode::hash() const { return NO_HASH; }
uint MemBarNode::cmp( const Node &n ) const {
  return (&n == this);          // Always fail except on self
}

//------------------------------make-------------------------------------------
MemBarNode* MemBarNode::make(Compile* C, int opcode, int atp, Node* pn) {
  switch (opcode) {
  case Op_MemBarAcquire:     return new(C) MemBarAcquireNode(C, atp, pn);
  case Op_LoadFence:         return new(C) LoadFenceNode(C, atp, pn);
  case Op_MemBarRelease:     return new(C) MemBarReleaseNode(C, atp, pn);
  case Op_StoreFence:        return new(C) StoreFenceNode(C, atp, pn);
  case Op_MemBarAcquireLock: return new(C) MemBarAcquireLockNode(C, atp, pn);
  case Op_MemBarReleaseLock: return new(C) MemBarReleaseLockNode(C, atp, pn);
  case Op_MemBarVolatile:    return new(C) MemBarVolatileNode(C, atp, pn);
  case Op_MemBarCPUOrder:    return new(C) MemBarCPUOrderNode(C, atp, pn);
  case Op_Initialize:        return new(C) InitializeNode(C, atp, pn);
  case Op_MemBarStoreStore:  return new(C) MemBarStoreStoreNode(C, atp, pn);
  default: ShouldNotReachHere(); return NULL;
  }
}

void MemBarNode::remove(PhaseIterGVN *igvn) {
  if (outcnt() != 2) {
    return;
  }
  if (trailing_store() || trailing_load_store()) {
    MemBarNode* leading = leading_membar();
    if (leading != NULL) {
      assert(leading->trailing_membar() == this, "inconsistent leading/trailing membars");
      leading->remove(igvn);
    }
  }
  igvn->replace_node(proj_out(TypeFunc::Memory), in(TypeFunc::Memory));
  igvn->replace_node(proj_out(TypeFunc::Control), in(TypeFunc::Control));
}

//------------------------------Ideal------------------------------------------
// Return a node which is more "ideal" than the current node.  Strip out
// control copies
Node *MemBarNode::Ideal(PhaseGVN *phase, bool can_reshape) {
  if (remove_dead_region(phase, can_reshape)) return this;
  // Don't bother trying to transform a dead node
  if (in(0) && in(0)->is_top()) {
    return NULL;
  }

  // Eliminate volatile MemBars for scalar replaced objects.
  if (can_reshape && req() == (Precedent+1)) {
    bool eliminate = false;
    int opc = Opcode();
    if ((opc == Op_MemBarAcquire || opc == Op_MemBarVolatile)) {
      // Volatile field loads and stores.
      Node* my_mem = in(MemBarNode::Precedent);
      // The MembarAquire may keep an unused LoadNode alive through the Precedent edge
      if ((my_mem != NULL) && (opc == Op_MemBarAcquire) && (my_mem->outcnt() == 1)) {
        // if the Precedent is a decodeN and its input (a Load) is used at more than one place,
        // replace this Precedent (decodeN) with the Load instead.
        if ((my_mem->Opcode() == Op_DecodeN) && (my_mem->in(1)->outcnt() > 1))  {
          Node* load_node = my_mem->in(1);
          set_req(MemBarNode::Precedent, load_node);
          phase->is_IterGVN()->_worklist.push(my_mem);
          my_mem = load_node;
        } else {
          assert(my_mem->unique_out() == this, "sanity");
          del_req(Precedent);
          phase->is_IterGVN()->_worklist.push(my_mem); // remove dead node later
          my_mem = NULL;
        }
      }
      if (my_mem != NULL && my_mem->is_Mem()) {
        const TypeOopPtr* t_oop = my_mem->in(MemNode::Address)->bottom_type()->isa_oopptr();
        // Check for scalar replaced object reference.
        if( t_oop != NULL && t_oop->is_known_instance_field() &&
            t_oop->offset() != Type::OffsetBot &&
            t_oop->offset() != Type::OffsetTop) {
          eliminate = true;
        }
      }
    } else if (opc == Op_MemBarRelease) {
      // Final field stores.
      Node* alloc = AllocateNode::Ideal_allocation(in(MemBarNode::Precedent), phase);
      if ((alloc != NULL) && alloc->is_Allocate() &&
          AARCH64_ONLY ( alloc->as_Allocate()->does_not_escape_thread() )
          NOT_AARCH64  ( alloc->as_Allocate()->_is_non_escaping )
         ) {
        // The allocated object does not escape.
        eliminate = true;
      }
    }
    if (eliminate) {
      // Replace MemBar projections by its inputs.
      PhaseIterGVN* igvn = phase->is_IterGVN();
      remove(igvn);
      // Must return either the original node (now dead) or a new node
      // (Do not return a top here, since that would break the uniqueness of top.)
      return new (phase->C) ConINode(TypeInt::ZERO);
    }
  }
  return NULL;
}

//------------------------------Value------------------------------------------
const Type *MemBarNode::Value( PhaseTransform *phase ) const {
  if( !in(0) ) return Type::TOP;
  if( phase->type(in(0)) == Type::TOP )
    return Type::TOP;
  return TypeTuple::MEMBAR;
}

//------------------------------match------------------------------------------
// Construct projections for memory.
Node *MemBarNode::match( const ProjNode *proj, const Matcher *m ) {
  switch (proj->_con) {
  case TypeFunc::Control:
  case TypeFunc::Memory:
    return new (m->C) MachProjNode(this,proj->_con,RegMask::Empty,MachProjNode::unmatched_proj);
  }
  ShouldNotReachHere();
  return NULL;
}

void MemBarNode::set_store_pair(MemBarNode* leading, MemBarNode* trailing) {
  trailing->_kind = TrailingStore;
  leading->_kind = LeadingStore;
#ifdef ASSERT
  trailing->_pair_idx = leading->_idx;
  leading->_pair_idx = leading->_idx;
#endif
}

void MemBarNode::set_load_store_pair(MemBarNode* leading, MemBarNode* trailing) {
  trailing->_kind = TrailingLoadStore;
  leading->_kind = LeadingLoadStore;
#ifdef ASSERT
  trailing->_pair_idx = leading->_idx;
  leading->_pair_idx = leading->_idx;
#endif
}

MemBarNode* MemBarNode::trailing_membar() const {
  ResourceMark rm;
  Node* trailing = (Node*)this;
  VectorSet seen(Thread::current()->resource_area());

  Node_Stack multis(0);
  do {
    Node* c = trailing;
    uint i = 0;
    do {
      trailing = NULL;
      for (; i < c->outcnt(); i++) {
        Node* next = c->raw_out(i);
        if (next != c && next->is_CFG()) {
          if (c->is_MultiBranch()) {
            if (multis.node() == c) {
              multis.set_index(i+1);
            } else {
              multis.push(c, i+1);
            }
          }
          trailing = next;
          break;
        }
      }
      if (trailing != NULL && !seen.test_set(trailing->_idx)) {
         break;
       }
      while (multis.size() > 0) {
        c = multis.node();
        i = multis.index();
        if (i < c->req()) {
          break;
        }
        multis.pop();
      }
    } while (multis.size() > 0);
  } while (!trailing->is_MemBar() || !trailing->as_MemBar()->trailing());

  MemBarNode* mb = trailing->as_MemBar();
  assert((mb->_kind == TrailingStore && _kind == LeadingStore) ||
         (mb->_kind == TrailingLoadStore && _kind == LeadingLoadStore), "bad trailing membar");
  assert(mb->_pair_idx == _pair_idx, "bad trailing membar");
  return mb;
}

MemBarNode* MemBarNode::leading_membar() const {
  ResourceMark rm;
  VectorSet seen(Thread::current()->resource_area());
  Node_Stack regions(0);
  Node* leading = in(0);
  while (leading != NULL && (!leading->is_MemBar() || !leading->as_MemBar()->leading())) {
    while (leading == NULL || leading->is_top() || seen.test_set(leading->_idx)) {
      leading = NULL;
      while (regions.size() > 0 && leading == NULL) {
        Node* r = regions.node();
        uint i = regions.index();
        if (i < r->req()) {
          leading = r->in(i);
          regions.set_index(i+1);
        } else {
          regions.pop();
        }
      }
      if (leading == NULL) {
        assert(regions.size() == 0, "all paths should have been tried");
        return NULL;
      }
    }
    if (leading->is_Region()) {
      regions.push(leading, 2);
      leading = leading->in(1);
    } else {
      leading = leading->in(0);
    }
  }
#ifdef ASSERT
  Unique_Node_List wq;
  wq.push((Node*)this);
  uint found = 0;
  for (uint i = 0; i < wq.size(); i++) {
    Node* n = wq.at(i);
    if (n->is_Region()) {
      for (uint j = 1; j < n->req(); j++) {
        Node* in = n->in(j);
        if (in != NULL && !in->is_top()) {
          wq.push(in);
        }
      }
    } else {
      if (n->is_MemBar() && n->as_MemBar()->leading()) {
        assert(n == leading, "consistency check failed");
        found++;
      } else {
        Node* in = n->in(0);
        if (in != NULL && !in->is_top()) {
          wq.push(in);
        }
      }
    }
  }
  assert(found == 1 || (found == 0 && leading == NULL), "consistency check failed");
#endif
  if (leading == NULL) {
    return NULL;
  }
  MemBarNode* mb = leading->as_MemBar();
  assert((mb->_kind == LeadingStore && _kind == TrailingStore) ||
         (mb->_kind == LeadingLoadStore && _kind == TrailingLoadStore), "bad leading membar");
  assert(mb->_pair_idx == _pair_idx, "bad leading membar");
  return mb;
}

//===========================InitializeNode====================================
// SUMMARY:
// This node acts as a memory barrier on raw memory, after some raw stores.
// The 'cooked' oop value feeds from the Initialize, not the Allocation.
// The Initialize can 'capture' suitably constrained stores as raw inits.
// It can coalesce related raw stores into larger units (called 'tiles').
// It can avoid zeroing new storage for memory units which have raw inits.
// At macro-expansion, it is marked 'complete', and does not optimize further.
//
// EXAMPLE:
// The object 'new short[2]' occupies 16 bytes in a 32-bit machine.
//   ctl = incoming control; mem* = incoming memory
// (Note:  A star * on a memory edge denotes I/O and other standard edges.)
// First allocate uninitialized memory and fill in the header:
//   alloc = (Allocate ctl mem* 16 #short[].klass ...)
//   ctl := alloc.Control; mem* := alloc.Memory*
//   rawmem = alloc.Memory; rawoop = alloc.RawAddress
// Then initialize to zero the non-header parts of the raw memory block:
//   init = (Initialize alloc.Control alloc.Memory* alloc.RawAddress)
//   ctl := init.Control; mem.SLICE(#short[*]) := init.Memory
// After the initialize node executes, the object is ready for service:
//   oop := (CheckCastPP init.Control alloc.RawAddress #short[])
// Suppose its body is immediately initialized as {1,2}:
//   store1 = (StoreC init.Control init.Memory (+ oop 12) 1)
//   store2 = (StoreC init.Control store1      (+ oop 14) 2)
//   mem.SLICE(#short[*]) := store2
//
// DETAILS:
// An InitializeNode collects and isolates object initialization after
// an AllocateNode and before the next possible safepoint.  As a
// memory barrier (MemBarNode), it keeps critical stores from drifting
// down past any safepoint or any publication of the allocation.
// Before this barrier, a newly-allocated object may have uninitialized bits.
// After this barrier, it may be treated as a real oop, and GC is allowed.
//
// The semantics of the InitializeNode include an implicit zeroing of
// the new object from object header to the end of the object.
// (The object header and end are determined by the AllocateNode.)
//
// Certain stores may be added as direct inputs to the InitializeNode.
// These stores must update raw memory, and they must be to addresses
// derived from the raw address produced by AllocateNode, and with
// a constant offset.  They must be ordered by increasing offset.
// The first one is at in(RawStores), the last at in(req()-1).
// Unlike most memory operations, they are not linked in a chain,
// but are displayed in parallel as users of the rawmem output of
// the allocation.
//
// (See comments in InitializeNode::capture_store, which continue
// the example given above.)
//
// When the associated Allocate is macro-expanded, the InitializeNode
// may be rewritten to optimize collected stores.  A ClearArrayNode
// may also be created at that point to represent any required zeroing.
// The InitializeNode is then marked 'complete', prohibiting further
// capturing of nearby memory operations.
//
// During macro-expansion, all captured initializations which store
// constant values of 32 bits or smaller are coalesced (if advantageous)
// into larger 'tiles' 32 or 64 bits.  This allows an object to be
// initialized in fewer memory operations.  Memory words which are
// covered by neither tiles nor non-constant stores are pre-zeroed
// by explicit stores of zero.  (The code shape happens to do all
// zeroing first, then all other stores, with both sequences occurring
// in order of ascending offsets.)
//
// Alternatively, code may be inserted between an AllocateNode and its
// InitializeNode, to perform arbitrary initialization of the new object.
// E.g., the object copying intrinsics insert complex data transfers here.
// The initialization must then be marked as 'complete' disable the
// built-in zeroing semantics and the collection of initializing stores.
//
// While an InitializeNode is incomplete, reads from the memory state
// produced by it are optimizable if they match the control edge and
// new oop address associated with the allocation/initialization.
// They return a stored value (if the offset matches) or else zero.
// A write to the memory state, if it matches control and address,
// and if it is to a constant offset, may be 'captured' by the
// InitializeNode.  It is cloned as a raw memory operation and rewired
// inside the initialization, to the raw oop produced by the allocation.
// Operations on addresses which are provably distinct (e.g., to
// other AllocateNodes) are allowed to bypass the initialization.
//
// The effect of all this is to consolidate object initialization
// (both arrays and non-arrays, both piecewise and bulk) into a
// single location, where it can be optimized as a unit.
//
// Only stores with an offset less than TrackedInitializationLimit words
// will be considered for capture by an InitializeNode.  This puts a
// reasonable limit on the complexity of optimized initializations.

//---------------------------InitializeNode------------------------------------
InitializeNode::InitializeNode(Compile* C, int adr_type, Node* rawoop)
  : _is_complete(Incomplete), _does_not_escape(false),
    MemBarNode(C, adr_type, rawoop)
{
  init_class_id(Class_Initialize);

  assert(adr_type == Compile::AliasIdxRaw, "only valid atp");
  assert(in(RawAddress) == rawoop, "proper init");
  // Note:  allocation() can be NULL, for secondary initialization barriers
}

// Since this node is not matched, it will be processed by the
// register allocator.  Declare that there are no constraints
// on the allocation of the RawAddress edge.
const RegMask &InitializeNode::in_RegMask(uint idx) const {
  // This edge should be set to top, by the set_complete.  But be conservative.
  if (idx == InitializeNode::RawAddress)
    return *(Compile::current()->matcher()->idealreg2spillmask[in(idx)->ideal_reg()]);
  return RegMask::Empty;
}

Node* InitializeNode::memory(uint alias_idx) {
  Node* mem = in(Memory);
  if (mem->is_MergeMem()) {
    return mem->as_MergeMem()->memory_at(alias_idx);
  } else {
    // incoming raw memory is not split
    return mem;
  }
}

bool InitializeNode::is_non_zero() {
  if (is_complete())  return false;
  remove_extra_zeroes();
  return (req() > RawStores);
}

void InitializeNode::set_complete(PhaseGVN* phase) {
  assert(!is_complete(), "caller responsibility");
  _is_complete = Complete;

  // After this node is complete, it contains a bunch of
  // raw-memory initializations.  There is no need for
  // it to have anything to do with non-raw memory effects.
  // Therefore, tell all non-raw users to re-optimize themselves,
  // after skipping the memory effects of this initialization.
  PhaseIterGVN* igvn = phase->is_IterGVN();
  if (igvn)  igvn->add_users_to_worklist(this);
}

// convenience function
// return false if the init contains any stores already
bool AllocateNode::maybe_set_complete(PhaseGVN* phase) {
  InitializeNode* init = initialization();
  if (init == NULL || init->is_complete())  return false;
  init->remove_extra_zeroes();
  // for now, if this allocation has already collected any inits, bail:
  if (init->is_non_zero())  return false;
  init->set_complete(phase);
  return true;
}

void InitializeNode::remove_extra_zeroes() {
  if (req() == RawStores)  return;
  Node* zmem = zero_memory();
  uint fill = RawStores;
  for (uint i = fill; i < req(); i++) {
    Node* n = in(i);
    if (n->is_top() || n == zmem)  continue;  // skip
    if (fill < i)  set_req(fill, n);          // compact
    ++fill;
  }
  // delete any empty spaces created:
  while (fill < req()) {
    del_req(fill);
  }
}

// Helper for remembering which stores go with which offsets.
intptr_t InitializeNode::get_store_offset(Node* st, PhaseTransform* phase) {
  if (!st->is_Store())  return -1;  // can happen to dead code via subsume_node
  intptr_t offset = -1;
  Node* base = AddPNode::Ideal_base_and_offset(st->in(MemNode::Address),
                                               phase, offset);
  if (base == NULL)     return -1;  // something is dead,
  if (offset < 0)       return -1;  //        dead, dead
  return offset;
}

// Helper for proving that an initialization expression is
// "simple enough" to be folded into an object initialization.
// Attempts to prove that a store's initial value 'n' can be captured
// within the initialization without creating a vicious cycle, such as:
//     { Foo p = new Foo(); p.next = p; }
// True for constants and parameters and small combinations thereof.
bool InitializeNode::detect_init_independence(Node* n, int& count) {
  if (n == NULL)      return true;   // (can this really happen?)
  if (n->is_Proj())   n = n->in(0);
  if (n == this)      return false;  // found a cycle
  if (n->is_Con())    return true;
  if (n->is_Start())  return true;   // params, etc., are OK
  if (n->is_Root())   return true;   // even better

  Node* ctl = n->in(0);
  if (ctl != NULL && !ctl->is_top()) {
    if (ctl->is_Proj())  ctl = ctl->in(0);
    if (ctl == this)  return false;

    // If we already know that the enclosing memory op is pinned right after
    // the init, then any control flow that the store has picked up
    // must have preceded the init, or else be equal to the init.
    // Even after loop optimizations (which might change control edges)
    // a store is never pinned *before* the availability of its inputs.
    if (!MemNode::all_controls_dominate(n, this))
      return false;                  // failed to prove a good control
  }

  // Check data edges for possible dependencies on 'this'.
  if ((count += 1) > 20)  return false;  // complexity limit
  for (uint i = 1; i < n->req(); i++) {
    Node* m = n->in(i);
    if (m == NULL || m == n || m->is_top())  continue;
    uint first_i = n->find_edge(m);
    if (i != first_i)  continue;  // process duplicate edge just once
    if (!detect_init_independence(m, count)) {
      return false;
    }
  }

  return true;
}

// Here are all the checks a Store must pass before it can be moved into
// an initialization.  Returns zero if a check fails.
// On success, returns the (constant) offset to which the store applies,
// within the initialized memory.
intptr_t InitializeNode::can_capture_store(StoreNode* st, PhaseTransform* phase, bool can_reshape) {
  const int FAIL = 0;
  if (st->req() != MemNode::ValueIn + 1)
    return FAIL;                // an inscrutable StoreNode (card mark?)
  Node* ctl = st->in(MemNode::Control);
  if (!(ctl != NULL && ctl->is_Proj() && ctl->in(0) == this))
    return FAIL;                // must be unconditional after the initialization
  Node* mem = st->in(MemNode::Memory);
  if (!(mem->is_Proj() && mem->in(0) == this))
    return FAIL;                // must not be preceded by other stores
  Node* adr = st->in(MemNode::Address);
  intptr_t offset;
  AllocateNode* alloc = AllocateNode::Ideal_allocation(adr, phase, offset);
  if (alloc == NULL)
    return FAIL;                // inscrutable address
  if (alloc != allocation())
    return FAIL;                // wrong allocation!  (store needs to float up)
  int size_in_bytes = st->memory_size();
  if ((size_in_bytes != 0) && (offset % size_in_bytes) != 0) {
    return FAIL;                // mismatched access
  }
  Node* val = st->in(MemNode::ValueIn);
  int complexity_count = 0;
  if (!detect_init_independence(val, complexity_count))
    return FAIL;                // stored value must be 'simple enough'

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值