// If x is a TypeNode, capture any more-precise type permanently into Node
if (t != n->bottom_type()) {
hash_delete(n); // changing bottom type may force a rehash
n->raise_bottom_type(t);
_worklist.push(n); // n re-enters the hash table via the worklist
}
// Idealize graph using DU info. Must clone() into new-space.
// DU info is generally used to show profitability, progress or safety
// (but generally not needed for correctness).
Node *nn = n->Ideal_DU_postCCP(this);
// TEMPORARY fix to ensure that 2nd GVN pass eliminates NULL checks
switch( n->Opcode() ) {
case Op_FastLock: // Revisit FastLocks for lock coarsening
case Op_If:
case Op_CountedLoopEnd:
case Op_Region:
case Op_Loop:
case Op_CountedLoop:
case Op_Conv2B:
case Op_Opaque1:
case Op_Opaque2:
_worklist.push(n);
break;
default:
break;
}
if( nn ) {
_worklist.push(n);
// Put users of 'n' onto worklist for second igvn transform
add_users_to_worklist(n);
return nn;
}
return n;
}
//---------------------------------saturate------------------------------------
const Type* PhaseCCP::saturate(const Type* new_type, const Type* old_type,
const Type* limit_type) const {
const Type* wide_type = new_type->widen(old_type, limit_type);
if (wide_type != new_type) { // did we widen?
// If so, we may have widened beyond the limit type. Clip it back down.
new_type = wide_type->filter(limit_type);
}
return new_type;
}
//------------------------------print_statistics-------------------------------
#ifndef PRODUCT
void PhaseCCP::print_statistics() {
tty->print_cr("CCP: %d constants found: %d", _total_invokes, _total_constants);
}
#endif
//=============================================================================
#ifndef PRODUCT
uint PhasePeephole::_total_peepholes = 0;
#endif
//------------------------------PhasePeephole----------------------------------
// Conditional Constant Propagation, ala Wegman & Zadeck
PhasePeephole::PhasePeephole( PhaseRegAlloc *regalloc, PhaseCFG &cfg )
: PhaseTransform(Peephole), _regalloc(regalloc), _cfg(cfg) {
NOT_PRODUCT( clear_peepholes(); )
}
#ifndef PRODUCT
//------------------------------~PhasePeephole---------------------------------
PhasePeephole::~PhasePeephole() {
_total_peepholes += count_peepholes();
}
#endif
//------------------------------transform--------------------------------------
Node *PhasePeephole::transform( Node *n ) {
ShouldNotCallThis();
return NULL;
}
//------------------------------do_transform-----------------------------------
void PhasePeephole::do_transform() {
bool method_name_not_printed = true;
// Examine each basic block
for (uint block_number = 1; block_number < _cfg.number_of_blocks(); ++block_number) {
Block* block = _cfg.get_block(block_number);
bool block_not_printed = true;
// and each instruction within a block
uint end_index = block->number_of_nodes();
// block->end_idx() not valid after PhaseRegAlloc
for( uint instruction_index = 1; instruction_index < end_index; ++instruction_index ) {
Node *n = block->get_node(instruction_index);
if( n->is_Mach() ) {
MachNode *m = n->as_Mach();
int deleted_count = 0;
// check for peephole opportunities
MachNode *m2 = m->peephole( block, instruction_index, _regalloc, deleted_count, C );
if( m2 != NULL ) {
#ifndef PRODUCT
if( PrintOptoPeephole ) {
// Print method, first time only
if( C->method() && method_name_not_printed ) {
C->method()->print_short_name(); tty->cr();
method_name_not_printed = false;
}
// Print this block
if( Verbose && block_not_printed) {
tty->print_cr("in block");
block->dump();
block_not_printed = false;
}
// Print instructions being deleted
for( int i = (deleted_count - 1); i >= 0; --i ) {
block->get_node(instruction_index-i)->as_Mach()->format(_regalloc); tty->cr();
}
tty->print_cr("replaced with");
// Print new instruction
m2->format(_regalloc);
tty->print("\n\n");
}
#endif
// Remove old nodes from basic block and update instruction_index
// (old nodes still exist and may have edges pointing to them
// as register allocation info is stored in the allocator using
// the node index to live range mappings.)
uint safe_instruction_index = (instruction_index - deleted_count);
for( ; (instruction_index > safe_instruction_index); --instruction_index ) {
block->remove_node( instruction_index );
}
// install new node after safe_instruction_index
block->insert_node(m2, safe_instruction_index + 1);
end_index = block->number_of_nodes() - 1; // Recompute new block size
NOT_PRODUCT( inc_peepholes(); )
}
}
}
}
}
//------------------------------print_statistics-------------------------------
#ifndef PRODUCT
void PhasePeephole::print_statistics() {
tty->print_cr("Peephole: peephole rules applied: %d", _total_peepholes);
}
#endif
//=============================================================================
//------------------------------set_req_X--------------------------------------
void Node::set_req_X( uint i, Node *n, PhaseIterGVN *igvn ) {
assert( is_not_dead(n), "can not use dead node");
assert( igvn->hash_find(this) != this, "Need to remove from hash before changing edges" );
Node *old = in(i);
set_req(i, n);
// old goes dead?
if( old ) {
switch (old->outcnt()) {
case 0:
// Put into the worklist to kill later. We do not kill it now because the
// recursive kill will delete the current node (this) if dead-loop exists
if (!old->is_top())
igvn->_worklist.push( old );
break;
case 1:
if( old->is_Store() || old->has_special_unique_user() )
igvn->add_users_to_worklist( old );
break;
case 2:
if( old->is_Store() )
igvn->add_users_to_worklist( old );
if( old->Opcode() == Op_Region )
igvn->_worklist.push(old);
break;
case 3:
if( old->Opcode() == Op_Region ) {
igvn->_worklist.push(old);
igvn->add_users_to_worklist( old );
}
break;
default:
break;
}
}
}
//-------------------------------replace_by-----------------------------------
// Using def-use info, replace one node for another. Follow the def-use info
// to all users of the OLD node. Then make all uses point to the NEW node.
void Node::replace_by(Node *new_node) {
assert(!is_top(), "top node has no DU info");
for (DUIterator_Last imin, i = last_outs(imin); i >= imin; ) {
Node* use = last_out(i);
uint uses_found = 0;
for (uint j = 0; j < use->len(); j++) {
if (use->in(j) == this) {
if (j < use->req())
use->set_req(j, new_node);
else use->set_prec(j, new_node);
uses_found++;
}
}
i -= uses_found; // we deleted 1 or more copies of this edge
}
}
//=============================================================================
//-----------------------------------------------------------------------------
void Type_Array::grow( uint i ) {
if( !_max ) {
_max = 1;
_types = (const Type**)_a->Amalloc( _max * sizeof(Type*) );
_types[0] = NULL;
}
uint old = _max;
while( i >= _max ) _max <<= 1; // Double to fit
_types = (const Type**)_a->Arealloc( _types, old*sizeof(Type*),_max*sizeof(Type*));
memset( &_types[old], 0, (_max-old)*sizeof(Type*) );
}
//------------------------------dump-------------------------------------------
#ifndef PRODUCT
void Type_Array::dump() const {
uint max = Size();
for( uint i = 0; i < max; i++ ) {
if( _types[i] != NULL ) {
tty->print(" %d\t== ", i); _types[i]->dump(); tty->cr();
}
}
}
#endif
C:\hotspot-69087d08d473\src\share\vm/opto/phaseX.hpp
/*
* Copyright (c) 1997, 2017, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
#ifndef SHARE_VM_OPTO_PHASEX_HPP
#define SHARE_VM_OPTO_PHASEX_HPP
#include "libadt/dict.hpp"
#include "libadt/vectset.hpp"
#include "memory/resourceArea.hpp"
#include "opto/memnode.hpp"
#include "opto/node.hpp"
#include "opto/phase.hpp"
#include "opto/type.hpp"
class Compile;
class ConINode;
class ConLNode;
class Node;
class Type;
class PhaseTransform;
class PhaseGVN;
class PhaseIterGVN;
class PhaseCCP;
class PhasePeephole;
class PhaseRegAlloc;
//-----------------------------------------------------------------------------
// Expandable closed hash-table of nodes, initialized to NULL.
// Note that the constructor just zeros things
// Storage is reclaimed when the Arena's lifetime is over.
class NodeHash : public StackObj {
protected:
Arena *_a; // Arena to allocate in
uint _max; // Size of table (power of 2)
uint _inserts; // For grow and debug, count of hash_inserts
uint _insert_limit; // 'grow' when _inserts reaches _insert_limit
Node **_table; // Hash table of Node pointers
Node *_sentinel; // Replaces deleted entries in hash table
public:
NodeHash(uint est_max_size);
NodeHash(Arena *arena, uint est_max_size);
NodeHash(NodeHash *use_this_state);
#ifdef ASSERT
~NodeHash(); // Unlock all nodes upon destruction of table.
void operator=(const NodeHash&); // Unlock all nodes upon replacement of table.
#endif
Node *hash_find(const Node*);// Find an equivalent version in hash table
Node *hash_find_insert(Node*);// If not in table insert else return found node
void hash_insert(Node*); // Insert into hash table
bool hash_delete(const Node*);// Replace with _sentinel in hash table
void check_grow() {
_inserts++;
if( _inserts == _insert_limit ) { grow(); }
assert( _inserts <= _insert_limit, "hash table overflow");
assert( _inserts < _max, "hash table overflow" );
}
static uint round_up(uint); // Round up to nearest power of 2
void grow(); // Grow _table to next power of 2 and rehash
// Return 75% of _max, rounded up.
uint insert_limit() const { return _max - (_max>>2); }
void clear(); // Set all entries to NULL, keep storage.
// Size of hash table
uint size() const { return _max; }
// Return Node* at index in table
Node *at(uint table_index) {
assert(table_index < _max, "Must be within table");
return _table[table_index];
}
void remove_useless_nodes(VectorSet &useful); // replace with sentinel
void replace_with(NodeHash* nh);
void check_no_speculative_types(); // Check no speculative part for type nodes in table
Node *sentinel() { return _sentinel; }
#ifndef PRODUCT
Node *find_index(uint idx); // For debugging
void dump(); // For debugging, dump statistics
#endif
uint _grows; // For debugging, count of table grow()s
uint _look_probes; // For debugging, count of hash probes
uint _lookup_hits; // For debugging, count of hash_finds
uint _lookup_misses; // For debugging, count of hash_finds
uint _insert_probes; // For debugging, count of hash probes
uint _delete_probes; // For debugging, count of hash probes for deletes
uint _delete_hits; // For debugging, count of hash probes for deletes
uint _delete_misses; // For debugging, count of hash probes for deletes
uint _total_inserts; // For debugging, total inserts into hash table
uint _total_insert_probes; // For debugging, total probes while inserting
};
//-----------------------------------------------------------------------------
// Map dense integer indices to Types. Uses classic doubling-array trick.
// Abstractly provides an infinite array of Type*'s, initialized to NULL.
// Note that the constructor just zeros things, and since I use Arena
// allocation I do not need a destructor to reclaim storage.
// Despite the general name, this class is customized for use by PhaseTransform.
class Type_Array : public StackObj {
Arena *_a; // Arena to allocate in
uint _max;
const Type **_types;
void grow( uint i ); // Grow array node to fit
const Type *operator[] ( uint i ) const // Lookup, or NULL for not mapped
{ return (i<_max) ? _types[i] : (Type*)NULL; }
friend class PhaseTransform;
public:
Type_Array(Arena *a) : _a(a), _max(0), _types(0) {}
Type_Array(Type_Array *ta) : _a(ta->_a), _max(ta->_max), _types(ta->_types) { }
const Type *fast_lookup(uint i) const{assert(i<_max,"oob");return _types[i];}
// Extend the mapping: index i maps to Type *n.
void map( uint i, const Type *n ) { if( i>=_max ) grow(i); _types[i] = n; }
uint Size() const { return _max; }
#ifndef PRODUCT
void dump() const;
#endif
};
//------------------------------PhaseRemoveUseless-----------------------------
// Remove useless nodes from GVN hash-table, worklist, and graph
class PhaseRemoveUseless : public Phase {
protected:
Unique_Node_List _useful; // Nodes reachable from root
// list is allocated from current resource area
public:
PhaseRemoveUseless(PhaseGVN *gvn, Unique_Node_List *worklist, PhaseNumber phase_num = Remove_Useless);
Unique_Node_List *get_useful() { return &_useful; }
};
//------------------------------PhaseRenumber----------------------------------
// Phase that first performs a PhaseRemoveUseless, then it renumbers compiler
// structures accordingly.
class PhaseRenumberLive : public PhaseRemoveUseless {
public:
PhaseRenumberLive(PhaseGVN* gvn,
Unique_Node_List* worklist, Unique_Node_List* new_worklist,
PhaseNumber phase_num = Remove_Useless_And_Renumber_Live);
};
//------------------------------PhaseTransform---------------------------------
// Phases that analyze, then transform. Constructing the Phase object does any
// global or slow analysis. The results are cached later for a fast
// transformation pass. When the Phase object is deleted the cached analysis
// results are deleted.
class PhaseTransform : public Phase {
protected:
Arena* _arena;
Node_List _nodes; // Map old node indices to new nodes.
Type_Array _types; // Map old node indices to Types.
// ConNode caches:
enum { _icon_min = -1 * HeapWordSize,
_icon_max = 16 * HeapWordSize,
_lcon_min = _icon_min,
_lcon_max = _icon_max,
_zcon_max = (uint)T_CONFLICT
};
ConINode* _icons[_icon_max - _icon_min + 1]; // cached jint constant nodes
ConLNode* _lcons[_lcon_max - _lcon_min + 1]; // cached jlong constant nodes
ConNode* _zcons[_zcon_max + 1]; // cached is_zero_type nodes
void init_con_caches();
// Support both int and long caches because either might be an intptr_t,
// so they show up frequently in address computations.
public:
PhaseTransform( PhaseNumber pnum );
PhaseTransform( Arena *arena, PhaseNumber pnum );
PhaseTransform( PhaseTransform *phase, PhaseNumber pnum );
Arena* arena() { return _arena; }
Type_Array& types() { return _types; }
void replace_types(Type_Array new_types) {
_types = new_types;
}
// _nodes is used in varying ways by subclasses, which define local accessors
uint nodes_size() {
return _nodes.size();
}
public:
// Get a previously recorded type for the node n.
// This type must already have been recorded.
// If you want the type of a very new (untransformed) node,
// you must use type_or_null, and test the result for NULL.
const Type* type(const Node* n) const {
assert(n != NULL, "must not be null");
const Type* t = _types.fast_lookup(n->_idx);
assert(t != NULL, "must set before get");
return t;
}
// Get a previously recorded type for the node n,
// or else return NULL if there is none.
const Type* type_or_null(const Node* n) const {
return _types.fast_lookup(n->_idx);
}
// Record a type for a node.
void set_type(const Node* n, const Type *t) {
assert(t != NULL, "type must not be null");
_types.map(n->_idx, t);
}
// Record an initial type for a node, the node's bottom type.
void set_type_bottom(const Node* n) {
// Use this for initialization when bottom_type() (or better) is not handy.
// Usually the initialization shoudl be to n->Value(this) instead,
// or a hand-optimized value like Type::MEMORY or Type::CONTROL.
assert(_types[n->_idx] == NULL, "must set the initial type just once");
_types.map(n->_idx, n->bottom_type());
}
// Make sure the types array is big enough to record a size for the node n.
// (In product builds, we never want to do range checks on the types array!)
void ensure_type_or_null(const Node* n) {
if (n->_idx >= _types.Size())
_types.map(n->_idx, NULL); // Grow the types array as needed.
}
// Utility functions:
const TypeInt* find_int_type( Node* n);
const TypeLong* find_long_type(Node* n);
jint find_int_con( Node* n, jint value_if_unknown) {
const TypeInt* t = find_int_type(n);
return (t != NULL && t->is_con()) ? t->get_con() : value_if_unknown;
}
jlong find_long_con(Node* n, jlong value_if_unknown) {
const TypeLong* t = find_long_type(n);
return (t != NULL && t->is_con()) ? t->get_con() : value_if_unknown;
}
// Make an idealized constant, i.e., one of ConINode, ConPNode, ConFNode, etc.
// Same as transform(ConNode::make(t)).
ConNode* makecon(const Type* t);
virtual ConNode* uncached_makecon(const Type* t) // override in PhaseValues
{ ShouldNotCallThis(); return NULL; }
// Fast int or long constant. Same as TypeInt::make(i) or TypeLong::make(l).
ConINode* intcon(jint i);
ConLNode* longcon(jlong l);
// Fast zero or null constant. Same as makecon(Type::get_zero_type(bt)).
ConNode* zerocon(BasicType bt);
// Return a node which computes the same function as this node, but
// in a faster or cheaper fashion.
virtual Node *transform( Node *n ) = 0;
// Return whether two Nodes are equivalent.
// Must not be recursive, since the recursive version is built from this.
// For pessimistic optimizations this is simply pointer equivalence.
bool eqv(const Node* n1, const Node* n2) const { return n1 == n2; }
// For pessimistic passes, the return type must monotonically narrow.
// For optimistic passes, the return type must monotonically widen.
// It is possible to get into a "death march" in either type of pass,
// where the types are continually moving but it will take 2**31 or
// more steps to converge. This doesn't happen on most normal loops.
//
// Here is an example of a deadly loop for an optimistic pass, along
// with a partial trace of inferred types:
// x = phi(0,x'); L: x' = x+1; if (x' >= 0) goto L;
// 0 1 join([0..max], 1)
// [0..1] [1..2] join([0..max], [1..2])
// [0..2] [1..3] join([0..max], [1..3])
// ... ... ...
// [0..max] [min]u[1..max] join([0..max], [min..max])
// [0..max] ==> fixpoint
// We would have proven, the hard way, that the iteration space is all
// non-negative ints, with the loop terminating due to 32-bit overflow.
//
// Here is the corresponding example for a pessimistic pass:
// x = phi(0,x'); L: x' = x-1; if (x' >= 0) goto L;
// int int join([0..max], int)
// [0..max] [-1..max-1] join([0..max], [-1..max-1])
// [0..max-1] [-1..max-2] join([0..max], [-1..max-2])
// ... ... ...
// [0..1] [-1..0] join([0..max], [-1..0])
// 0 -1 join([0..max], -1)
// 0 == fixpoint
// We would have proven, the hard way, that the iteration space is {0}.
// (Usually, other optimizations will make the "if (x >= 0)" fold up
// before we get into trouble. But not always.)
//
// It's a pleasant thing to observe that the pessimistic pass
// will make short work of the optimistic pass's deadly loop,
// and vice versa. That is a good example of the complementary
// purposes of the CCP (optimistic) vs. GVN (pessimistic) phases.
//
// In any case, only widen or narrow a few times before going to the
// correct flavor of top or bottom.
//
// This call only needs to be made once as the data flows around any
// given cycle. We do it at Phis, and nowhere else.
// The types presented are the new type of a phi (computed by PhiNode::Value)
// and the previously computed type, last time the phi was visited.
//
// The third argument is upper limit for the saturated value,
// if the phase wishes to widen the new_type.
// If the phase is narrowing, the old type provides a lower limit.
// Caller guarantees that old_type and new_type are no higher than limit_type.
virtual const Type* saturate(const Type* new_type, const Type* old_type,
const Type* limit_type) const
{ ShouldNotCallThis(); return NULL; }
virtual PhaseIterGVN *is_IterGVN() { return 0; }
#ifndef PRODUCT
void dump_old2new_map() const;
void dump_new( uint new_lidx ) const;
void dump_types() const;
void dump_nodes_and_types(const Node *root, uint depth, bool only_ctrl = true);
void dump_nodes_and_types_recur( const Node *n, uint depth, bool only_ctrl, VectorSet &visited);
uint _count_progress; // For profiling, count transforms that make progress
void set_progress() { ++_count_progress; assert( allow_progress(),"No progress allowed during verification"); }
void clear_progress() { _count_progress = 0; }
uint made_progress() const { return _count_progress; }
uint _count_transforms; // For profiling, count transforms performed
void set_transforms() { ++_count_transforms; }
void clear_transforms() { _count_transforms = 0; }
uint made_transforms() const{ return _count_transforms; }
bool _allow_progress; // progress not allowed during verification pass
void set_allow_progress(bool allow) { _allow_progress = allow; }
bool allow_progress() { return _allow_progress; }
#endif
};
//------------------------------PhaseValues------------------------------------
// Phase infrastructure to support values
class PhaseValues : public PhaseTransform {
protected:
NodeHash _table; // Hash table for value-numbering
public:
PhaseValues( Arena *arena, uint est_max_size );
PhaseValues( PhaseValues *pt );
PhaseValues( PhaseValues *ptv, const char *dummy );
NOT_PRODUCT( ~PhaseValues(); )
virtual PhaseIterGVN *is_IterGVN() { return 0; }
// Some Ideal and other transforms delete --> modify --> insert values
bool hash_delete(Node *n) { return _table.hash_delete(n); }
void hash_insert(Node *n) { _table.hash_insert(n); }
Node *hash_find_insert(Node *n){ return _table.hash_find_insert(n); }
Node *hash_find(const Node *n) { return _table.hash_find(n); }
// Used after parsing to eliminate values that are no longer in program
void remove_useless_nodes(VectorSet &useful) {
_table.remove_useless_nodes(useful);
// this may invalidate cached cons so reset the cache
init_con_caches();
}
virtual ConNode* uncached_makecon(const Type* t); // override from PhaseTransform
virtual const Type* saturate(const Type* new_type, const Type* old_type,
const Type* limit_type) const
{ return new_type; }
#ifndef PRODUCT
uint _count_new_values; // For profiling, count new values produced
void inc_new_values() { ++_count_new_values; }
void clear_new_values() { _count_new_values = 0; }
uint made_new_values() const { return _count_new_values; }
#endif
};
//------------------------------PhaseGVN---------------------------------------
// Phase for performing local, pessimistic GVN-style optimizations.
class PhaseGVN : public PhaseValues {
public:
PhaseGVN( Arena *arena, uint est_max_size ) : PhaseValues( arena, est_max_size ) {}
PhaseGVN( PhaseGVN *gvn ) : PhaseValues( gvn ) {}
PhaseGVN( PhaseGVN *gvn, const char *dummy ) : PhaseValues( gvn, dummy ) {}
// Return a node which computes the same function as this node, but
// in a faster or cheaper fashion.
Node *transform( Node *n );
Node *transform_no_reclaim( Node *n );
void replace_with(PhaseGVN* gvn) {
_table.replace_with(&gvn->_table);
_types = gvn->_types;
}
// Check for a simple dead loop when a data node references itself.
DEBUG_ONLY(void dead_loop_check(Node *n);)
};
//------------------------------PhaseIterGVN-----------------------------------
// Phase for iteratively performing local, pessimistic GVN-style optimizations.
// and ideal transformations on the graph.
class PhaseIterGVN : public PhaseGVN {
private:
bool _delay_transform; // When true simply register the node when calling transform
// instead of actually optimizing it
// Idealize old Node 'n' with respect to its inputs and its value
virtual Node *transform_old( Node *a_node );
// Subsume users of node 'old' into node 'nn'
void subsume_node( Node *old, Node *nn );
Node_Stack _stack; // Stack used to avoid recursion
protected:
// Idealize new Node 'n' with respect to its inputs and its value
virtual Node *transform( Node *a_node );
// Warm up hash table, type table and initial worklist
void init_worklist( Node *a_root );
virtual const Type* saturate(const Type* new_type, const Type* old_type,
const Type* limit_type) const;
// Usually returns new_type. Returns old_type if new_type is only a slight
// improvement, such that it would take many (>>10) steps to reach 2**32.
public:
PhaseIterGVN( PhaseIterGVN *igvn ); // Used by CCP constructor
PhaseIterGVN( PhaseGVN *gvn ); // Used after Parser
PhaseIterGVN( PhaseIterGVN *igvn, const char *dummy ); // Used after +VerifyOpto
virtual PhaseIterGVN *is_IterGVN() { return this; }
Unique_Node_List _worklist; // Iterative worklist
// Given def-use info and an initial worklist, apply Node::Ideal,
// Node::Value, Node::Identity, hash-based value numbering, Node::Ideal_DU
// and dominator info to a fixed point.
void optimize();
// Register a new node with the iter GVN pass without transforming it.
// Used when we need to restructure a Region/Phi area and all the Regions
// and Phis need to complete this one big transform before any other
// transforms can be triggered on the region.
// Optional 'orig' is an earlier version of this node.
// It is significant only for debugging and profiling.
Node* register_new_node_with_optimizer(Node* n, Node* orig = NULL);
// Kill a globally dead Node. All uses are also globally dead and are
// aggressively trimmed.
void remove_globally_dead_node( Node *dead );
// Kill all inputs to a dead node, recursively making more dead nodes.
// The Node must be dead locally, i.e., have no uses.
void remove_dead_node( Node *dead ) {
assert(dead->outcnt() == 0 && !dead->is_top(), "node must be dead");
remove_globally_dead_node(dead);
}
// Add users of 'n' to worklist
void add_users_to_worklist0( Node *n );
void add_users_to_worklist ( Node *n );
// Replace old node with new one.
void replace_node( Node *old, Node *nn ) {
add_users_to_worklist(old);
hash_delete(old); // Yank from hash before hacking edges
subsume_node(old, nn);
}
// Delayed node rehash: remove a node from the hash table and rehash it during
// next optimizing pass
void rehash_node_delayed(Node* n) {
hash_delete(n);
_worklist.push(n);
}
// Replace ith edge of "n" with "in"
void replace_input_of(Node* n, int i, Node* in) {
rehash_node_delayed(n);
n->set_req(i, in);
}
// Delete ith edge of "n"
void delete_input_of(Node* n, int i) {
rehash_node_delayed(n);
n->del_req(i);
}
bool delay_transform() const { return _delay_transform; }
void set_delay_transform(bool delay) {
_delay_transform = delay;
}
// Clone loop predicates. Defined in loopTransform.cpp.
Node* clone_loop_predicates(Node* old_entry, Node* new_entry, bool clone_limit_check);
// Create a new if below new_entry for the predicate to be cloned
ProjNode* create_new_if_for_predicate(ProjNode* cont_proj, Node* new_entry,
Deoptimization::DeoptReason reason);
void remove_speculative_types();
void check_no_speculative_types() {
_table.check_no_speculative_types();
}
#ifndef PRODUCT
protected:
// Sub-quadratic implementation of VerifyIterativeGVN.
julong _verify_counter;
julong _verify_full_passes;
enum { _verify_window_size = 30 };
Node* _verify_window[_verify_window_size];
void verify_step(Node* n);
#endif
};
//------------------------------PhaseCCP---------------------------------------
// Phase for performing global Conditional Constant Propagation.
// Should be replaced with combined CCP & GVN someday.
class PhaseCCP : public PhaseIterGVN {
// Non-recursive. Use analysis to transform single Node.
virtual Node *transform_once( Node *n );
public:
PhaseCCP( PhaseIterGVN *igvn ); // Compute conditional constants
NOT_PRODUCT( ~PhaseCCP(); )
// Worklist algorithm identifies constants
void analyze();
// Recursive traversal of program. Used analysis to modify program.
virtual Node *transform( Node *n );
// Do any transformation after analysis
void do_transform();
virtual const Type* saturate(const Type* new_type, const Type* old_type,
const Type* limit_type) const;
// Returns new_type->widen(old_type), which increments the widen bits until
// giving up with TypeInt::INT or TypeLong::LONG.
// Result is clipped to limit_type if necessary.
#ifndef PRODUCT
static uint _total_invokes; // For profiling, count invocations
void inc_invokes() { ++PhaseCCP::_total_invokes; }
static uint _total_constants; // For profiling, count constants found
uint _count_constants;
void clear_constants() { _count_constants = 0; }
void inc_constants() { ++_count_constants; }
uint count_constants() const { return _count_constants; }
static void print_statistics();
#endif
};
//------------------------------PhasePeephole----------------------------------
// Phase for performing peephole optimizations on register allocated basic blocks.
class PhasePeephole : public PhaseTransform {
PhaseRegAlloc *_regalloc;
PhaseCFG &_cfg;
// Recursive traversal of program. Pure function is unused in this phase
virtual Node *transform( Node *n );
public:
PhasePeephole( PhaseRegAlloc *regalloc, PhaseCFG &cfg );
NOT_PRODUCT( ~PhasePeephole(); )
// Do any transformation after analysis
void do_transform();
#ifndef PRODUCT
static uint _total_peepholes; // For profiling, count peephole rules applied
uint _count_peepholes;
void clear_peepholes() { _count_peepholes = 0; }
void inc_peepholes() { ++_count_peepholes; }
uint count_peepholes() const { return _count_peepholes; }
static void print_statistics();
#endif
};
#endif // SHARE_VM_OPTO_PHASEX_HPP
C:\hotspot-69087d08d473\src\share\vm/opto/postaloc.cpp
/*
* Copyright (c) 1998, 2013, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
#include "precompiled.hpp"
#include "memory/allocation.inline.hpp"
#include "opto/chaitin.hpp"
#include "opto/machnode.hpp"
// See if this register (or pairs, or vector) already contains the value.
static bool register_contains_value(Node* val, OptoReg::Name reg, int n_regs,
Node_List& value) {
for (int i = 0; i < n_regs; i++) {
OptoReg::Name nreg = OptoReg::add(reg,-i);
if (value[nreg] != val)
return false;
}
return true;
}
//---------------------------may_be_copy_of_callee-----------------------------
// Check to see if we can possibly be a copy of a callee-save value.
bool PhaseChaitin::may_be_copy_of_callee( Node *def ) const {
// Short circuit if there are no callee save registers
if (_matcher.number_of_saved_registers() == 0) return false;
// Expect only a spill-down and reload on exit for callee-save spills.
// Chains of copies cannot be deep.
// 5008997 - This is wishful thinking. Register allocator seems to
// be splitting live ranges for callee save registers to such
// an extent that in large methods the chains can be very long
// (50+). The conservative answer is to return true if we don't
// know as this prevents optimizations from occurring.
const int limit = 60;
int i;
for( i=0; i < limit; i++ ) {
if( def->is_Proj() && def->in(0)->is_Start() &&
_matcher.is_save_on_entry(lrgs(_lrg_map.live_range_id(def)).reg()))
return true; // Direct use of callee-save proj
if( def->is_Copy() ) // Copies carry value through
def = def->in(def->is_Copy());
else if( def->is_Phi() ) // Phis can merge it from any direction
def = def->in(1);
else
break;
guarantee(def != NULL, "must not resurrect dead copy");
}
// If we reached the end and didn't find a callee save proj
// then this may be a callee save proj so we return true
// as the conservative answer. If we didn't reach then end
// we must have discovered that it was not a callee save
// else we would have returned.
return i == limit;
}
//------------------------------yank-----------------------------------
// Helper function for yank_if_dead
int PhaseChaitin::yank( Node *old, Block *current_block, Node_List *value, Node_List *regnd ) {
int blk_adjust=0;
Block *oldb = _cfg.get_block_for_node(old);
oldb->find_remove(old);
// Count 1 if deleting an instruction from the current block
if (oldb == current_block) {
blk_adjust++;
}
_cfg.unmap_node_from_block(old);
OptoReg::Name old_reg = lrgs(_lrg_map.live_range_id(old)).reg();
if( regnd && (*regnd)[old_reg]==old ) { // Instruction is currently available?
value->map(old_reg,NULL); // Yank from value/regnd maps
regnd->map(old_reg,NULL); // This register's value is now unknown
}
return blk_adjust;
}
#ifdef ASSERT
static bool expected_yanked_node(Node *old, Node *orig_old) {
// This code is expected only next original nodes:
// - load from constant table node which may have next data input nodes:
// MachConstantBase, MachTemp, MachSpillCopy
// - Phi nodes that are considered Junk
// - load constant node which may have next data input nodes:
// MachTemp, MachSpillCopy
// - MachSpillCopy
// - MachProj and Copy dead nodes
if (old->is_MachSpillCopy()) {
return true;
} else if (old->is_Con()) {
return true;
} else if (old->is_MachProj()) { // Dead kills projection of Con node
return (old == orig_old);
} else if (old->is_Copy()) { // Dead copy of a callee-save value
return (old == orig_old);
} else if (old->is_MachTemp()) {
return orig_old->is_Con();
} else if (old->is_Phi()) { // Junk phi's
return true;
} else if (old->is_MachConstantBase()) {
return (orig_old->is_Con() && orig_old->is_MachConstant());
}
return false;
}
#endif
//------------------------------yank_if_dead-----------------------------------
// Removed edges from 'old'. Yank if dead. Return adjustment counts to
// iterators in the current block.
int PhaseChaitin::yank_if_dead_recurse(Node *old, Node *orig_old, Block *current_block,
Node_List *value, Node_List *regnd) {
int blk_adjust=0;
if (old->outcnt() == 0 && old != C->top()) {
#ifdef ASSERT
if (!expected_yanked_node(old, orig_old)) {
tty->print_cr("==============================================");
tty->print_cr("orig_old:");
orig_old->dump();
tty->print_cr("old:");
old->dump();
assert(false, "unexpected yanked node");
}
if (old->is_Con())
orig_old = old; // Reset to satisfy expected nodes checks.
#endif
blk_adjust += yank(old, current_block, value, regnd);
for (uint i = 1; i < old->req(); i++) {
Node* n = old->in(i);
if (n != NULL) {
old->set_req(i, NULL);
blk_adjust += yank_if_dead_recurse(n, orig_old, current_block, value, regnd);
}
}
// Disconnect control and remove precedence edges if any exist
old->disconnect_inputs(NULL, C);
}
return blk_adjust;
}
//------------------------------use_prior_register-----------------------------
// Use the prior value instead of the current value, in an effort to make
// the current value go dead. Return block iterator adjustment, in case
// we yank some instructions from this block.
int PhaseChaitin::use_prior_register( Node *n, uint idx, Node *def, Block *current_block, Node_List &value, Node_List ®nd ) {
// No effect?
if( def == n->in(idx) ) return 0;
// Def is currently dead and can be removed? Do not resurrect
if( def->outcnt() == 0 ) return 0;
// Not every pair of physical registers are assignment compatible,
// e.g. on sparc floating point registers are not assignable to integer
// registers.
const LRG &def_lrg = lrgs(_lrg_map.live_range_id(def));
OptoReg::Name def_reg = def_lrg.reg();
const RegMask &use_mask = n->in_RegMask(idx);
bool can_use = ( RegMask::can_represent(def_reg) ? (use_mask.Member(def_reg) != 0)
: (use_mask.is_AllStack() != 0));
if (!RegMask::is_vector(def->ideal_reg())) {
// Check for a copy to or from a misaligned pair.
// It is workaround for a sparc with misaligned pairs.
can_use = can_use && !use_mask.is_misaligned_pair() && !def_lrg.mask().is_misaligned_pair();
}
if (!can_use)
return 0;
// Capture the old def in case it goes dead...
Node *old = n->in(idx);
// Save-on-call copies can only be elided if the entire copy chain can go
// away, lest we get the same callee-save value alive in 2 locations at
// once. We check for the obvious trivial case here. Although it can
// sometimes be elided with cooperation outside our scope, here we will just
// miss the opportunity. :-(
if( may_be_copy_of_callee(def) ) {
if( old->outcnt() > 1 ) return 0; // We're the not last user
int idx = old->is_Copy();
assert( idx, "chain of copies being removed" );
Node *old2 = old->in(idx); // Chain of copies
if( old2->outcnt() > 1 ) return 0; // old is not the last user
int idx2 = old2->is_Copy();
if( !idx2 ) return 0; // Not a chain of 2 copies
if( def != old2->in(idx2) ) return 0; // Chain of exactly 2 copies
}
// Use the new def
n->set_req(idx,def);
_post_alloc++;
// Is old def now dead? We successfully yanked a copy?
return yank_if_dead(old,current_block,&value,®nd);
}
//------------------------------skip_copies------------------------------------
// Skip through any number of copies (that don't mod oop-i-ness)
Node *PhaseChaitin::skip_copies( Node *c ) {
int idx = c->is_Copy();
uint is_oop = lrgs(_lrg_map.live_range_id(c))._is_oop;
while (idx != 0) {
guarantee(c->in(idx) != NULL, "must not resurrect dead copy");
if (lrgs(_lrg_map.live_range_id(c->in(idx)))._is_oop != is_oop) {
break; // casting copy, not the same value
}
c = c->in(idx);
idx = c->is_Copy();
}
return c;
}
//------------------------------elide_copy-------------------------------------
// Remove (bypass) copies along Node n, edge k.
int PhaseChaitin::elide_copy( Node *n, int k, Block *current_block, Node_List &value, Node_List ®nd, bool can_change_regs ) {
int blk_adjust = 0;
uint nk_idx = _lrg_map.live_range_id(n->in(k));
OptoReg::Name nk_reg = lrgs(nk_idx).reg();
// Remove obvious same-register copies
Node *x = n->in(k);
int idx;
while( (idx=x->is_Copy()) != 0 ) {
Node *copy = x->in(idx);
guarantee(copy != NULL, "must not resurrect dead copy");
if(lrgs(_lrg_map.live_range_id(copy)).reg() != nk_reg) {
break;
}
blk_adjust += use_prior_register(n,k,copy,current_block,value,regnd);
if (n->in(k) != copy) {
break; // Failed for some cutout?
}
x = copy; // Progress, try again
}
// Phis and 2-address instructions cannot change registers so easily - their
// outputs must match their input.
if( !can_change_regs )
return blk_adjust; // Only check stupid copies!
// Loop backedges won't have a value-mapping yet
if( &value == NULL ) return blk_adjust;
// Skip through all copies to the _value_ being used. Do not change from
// int to pointer. This attempts to jump through a chain of copies, where
// intermediate copies might be illegal, i.e., value is stored down to stack
// then reloaded BUT survives in a register the whole way.
Node *val = skip_copies(n->in(k));
if (val == x) return blk_adjust; // No progress?
int n_regs = RegMask::num_registers(val->ideal_reg());
uint val_idx = _lrg_map.live_range_id(val);
OptoReg::Name val_reg = lrgs(val_idx).reg();
// See if it happens to already be in the correct register!
// (either Phi's direct register, or the common case of the name
// never-clobbered original-def register)
if (register_contains_value(val, val_reg, n_regs, value)) {
blk_adjust += use_prior_register(n,k,regnd[val_reg],current_block,value,regnd);
if( n->in(k) == regnd[val_reg] ) // Success! Quit trying
return blk_adjust;
}
// See if we can skip the copy by changing registers. Don't change from
// using a register to using the stack unless we know we can remove a
// copy-load. Otherwise we might end up making a pile of Intel cisc-spill
// ops reading from memory instead of just loading once and using the
// register.
// Also handle duplicate copies here.
const Type *t = val->is_Con() ? val->bottom_type() : NULL;
// Scan all registers to see if this value is around already
for( uint reg = 0; reg < (uint)_max_reg; reg++ ) {
if (reg == (uint)nk_reg) {
// Found ourselves so check if there is only one user of this
// copy and keep on searching for a better copy if so.
bool ignore_self = true;
x = n->in(k);
DUIterator_Fast imax, i = x->fast_outs(imax);
Node* first = x->fast_out(i); i++;
while (i < imax && ignore_self) {
Node* use = x->fast_out(i); i++;
if (use != first) ignore_self = false;
}
if (ignore_self) continue;
}
Node *vv = value[reg];
if (n_regs > 1) { // Doubles and vectors check for aligned-adjacent set
uint last = (n_regs-1); // Looking for the last part of a set
if ((reg&last) != last) continue; // Wrong part of a set
if (!register_contains_value(vv, reg, n_regs, value)) continue; // Different value
}
if( vv == val || // Got a direct hit?
(t && vv && vv->bottom_type() == t && vv->is_Mach() &&
vv->as_Mach()->rule() == val->as_Mach()->rule()) ) { // Or same constant?
assert( !n->is_Phi(), "cannot change registers at a Phi so easily" );
if( OptoReg::is_stack(nk_reg) || // CISC-loading from stack OR
OptoReg::is_reg(reg) || // turning into a register use OR
regnd[reg]->outcnt()==1 ) { // last use of a spill-load turns into a CISC use
blk_adjust += use_prior_register(n,k,regnd[reg],current_block,value,regnd);
if( n->in(k) == regnd[reg] ) // Success! Quit trying
return blk_adjust;
} // End of if not degrading to a stack
} // End of if found value in another register
} // End of scan all machine registers
return blk_adjust;
}
//
// Check if nreg already contains the constant value val. Normal copy
// elimination doesn't doesn't work on constants because multiple
// nodes can represent the same constant so the type and rule of the
// MachNode must be checked to ensure equivalence.
//
bool PhaseChaitin::eliminate_copy_of_constant(Node* val, Node* n,
Block *current_block,
Node_List& value, Node_List& regnd,
OptoReg::Name nreg, OptoReg::Name nreg2) {
if (value[nreg] != val && val->is_Con() &&
value[nreg] != NULL && value[nreg]->is_Con() &&
(nreg2 == OptoReg::Bad || value[nreg] == value[nreg2]) &&
value[nreg]->bottom_type() == val->bottom_type() &&
value[nreg]->as_Mach()->rule() == val->as_Mach()->rule()) {
// This code assumes that two MachNodes representing constants
// which have the same rule and the same bottom type will produce
// identical effects into a register. This seems like it must be
// objectively true unless there are hidden inputs to the nodes
// but if that were to change this code would need to updated.
// Since they are equivalent the second one if redundant and can
// be removed.
//
// n will be replaced with the old value but n might have
// kills projections associated with it so remove them now so that
// yank_if_dead will be able to eliminate the copy once the uses
// have been transferred to the old[value].
for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
Node* use = n->fast_out(i);
if (use->is_Proj() && use->outcnt() == 0) {
// Kill projections have no users and one input
use->set_req(0, C->top());
yank_if_dead(use, current_block, &value, ®nd);
--i; --imax;
}
}
_post_alloc++;
return true;
}
return false;
}
// The algorithms works as follows:
// We traverse the block top to bottom. possibly_merge_multidef() is invoked for every input edge k
// of the instruction n. We check to see if the input is a multidef lrg. If it is, we record the fact that we've
// seen a definition (coming as an input) and add that fact to the reg2defuse array. The array maps registers to their
// current reaching definitions (we track only multidefs though). With each definition we also associate the first
// instruction we saw use it. If we encounter the situation when we observe an def (an input) that is a part of the
// same lrg but is different from the previous seen def we merge the two with a MachMerge node and substitute
// all the uses that we've seen so far to use the merge. After that we keep replacing the new defs in the same lrg
// as they get encountered with the merge node and keep adding these defs to the merge inputs.
void PhaseChaitin::merge_multidefs() {
NOT_PRODUCT( Compile::TracePhase t3("mergeMultidefs", &_t_mergeMultidefs, TimeCompiler); )
ResourceMark rm;
// Keep track of the defs seen in registers and collect their uses in the block.
RegToDefUseMap reg2defuse(_max_reg, _max_reg, RegDefUse());
for (uint i = 0; i < _cfg.number_of_blocks(); i++) {
Block* block = _cfg.get_block(i);
for (uint j = 1; j < block->number_of_nodes(); j++) {
Node* n = block->get_node(j);
if (n->is_Phi()) continue;
for (uint k = 1; k < n->req(); k++) {
j += possibly_merge_multidef(n, k, block, reg2defuse);
}
// Null out the value produced by the instruction itself, since we're only interested in defs
// implicitly defined by the uses. We are actually interested in tracking only redefinitions
// of the multidef lrgs in the same register. For that matter it's enough to track changes in
// the base register only and ignore other effects of multi-register lrgs and fat projections.
// It is also ok to ignore defs coming from singledefs. After an implicit overwrite by one of
// those our register is guaranteed to be used by another lrg and we won't attempt to merge it.
uint lrg = _lrg_map.live_range_id(n);
if (lrg > 0 && lrgs(lrg).is_multidef()) {
OptoReg::Name reg = lrgs(lrg).reg();
reg2defuse.at(reg).clear();
}
}
// Clear reg->def->use tracking for the next block
for (int j = 0; j < reg2defuse.length(); j++) {
reg2defuse.at(j).clear();
}
}
}
int PhaseChaitin::possibly_merge_multidef(Node *n, uint k, Block *block, RegToDefUseMap& reg2defuse) {
int blk_adjust = 0;
uint lrg = _lrg_map.live_range_id(n->in(k));
if (lrg > 0 && lrgs(lrg).is_multidef()) {
OptoReg::Name reg = lrgs(lrg).reg();
Node* def = reg2defuse.at(reg).def();
if (def != NULL && lrg == _lrg_map.live_range_id(def) && def != n->in(k)) {
// Same lrg but different node, we have to merge.
MachMergeNode* merge;
if (def->is_MachMerge()) { // is it already a merge?
merge = def->as_MachMerge();
} else {
merge = new (C) MachMergeNode(def);
// Insert the merge node into the block before the first use.
uint use_index = block->find_node(reg2defuse.at(reg).first_use());
block->insert_node(merge, use_index++);
_cfg.map_node_to_block(merge, block);
// Let the allocator know about the new node, use the same lrg
_lrg_map.extend(merge->_idx, lrg);
blk_adjust++;
// Fixup all the uses (there is at least one) that happened between the first
// use and before the current one.
for (; use_index < block->number_of_nodes(); use_index++) {
Node* use = block->get_node(use_index);
if (use == n) {
break;
}
use->replace_edge(def, merge);
}
}
if (merge->find_edge(n->in(k)) == -1) {
merge->add_req(n->in(k));
}
n->set_req(k, merge);
}
// update the uses
reg2defuse.at(reg).update(n->in(k), n);
}
return blk_adjust;
}
//------------------------------post_allocate_copy_removal---------------------
// Post-Allocation peephole copy removal. We do this in 1 pass over the
// basic blocks. We maintain a mapping of registers to Nodes (an array of
// Nodes indexed by machine register or stack slot number). NULL means that a
// register is not mapped to any Node. We can (want to have!) have several
// registers map to the same Node. We walk forward over the instructions
// updating the mapping as we go. At merge points we force a NULL if we have
// to merge 2 different Nodes into the same register. Phi functions will give
// us a new Node if there is a proper value merging. Since the blocks are
// arranged in some RPO, we will visit all parent blocks before visiting any
// successor blocks (except at loops).
//
// If we find a Copy we look to see if the Copy's source register is a stack
// slot and that value has already been loaded into some machine register; if
// so we use machine register directly. This turns a Load into a reg-reg
// Move. We also look for reloads of identical constants.
//
// When we see a use from a reg-reg Copy, we will attempt to use the copy's
// source directly and make the copy go dead.
void PhaseChaitin::post_allocate_copy_removal() {
NOT_PRODUCT( Compile::TracePhase t3("postAllocCopyRemoval", &_t_postAllocCopyRemoval, TimeCompiler); )
ResourceMark rm;
// Need a mapping from basic block Node_Lists. We need a Node_List to
// map from register number to value-producing Node.
Node_List **blk2value = NEW_RESOURCE_ARRAY( Node_List *, _cfg.number_of_blocks() + 1);
memset(blk2value, 0, sizeof(Node_List*) * (_cfg.number_of_blocks() + 1));
// Need a mapping from basic block Node_Lists. We need a Node_List to
// map from register number to register-defining Node.
Node_List **blk2regnd = NEW_RESOURCE_ARRAY( Node_List *, _cfg.number_of_blocks() + 1);
memset(blk2regnd, 0, sizeof(Node_List*) * (_cfg.number_of_blocks() + 1));
// We keep unused Node_Lists on a free_list to avoid wasting
// memory.
GrowableArray<Node_List*> free_list = GrowableArray<Node_List*>(16);
// For all blocks
for (uint i = 0; i < _cfg.number_of_blocks(); i++) {
uint j;
Block* block = _cfg.get_block(i);
// Count of Phis in block
uint phi_dex;
for (phi_dex = 1; phi_dex < block->number_of_nodes(); phi_dex++) {
Node* phi = block->get_node(phi_dex);
if (!phi->is_Phi()) {
break;
}
}
// If any predecessor has not been visited, we do not know the state
// of registers at the start. Check for this, while updating copies
// along Phi input edges
bool missing_some_inputs = false;
Block *freed = NULL;
for (j = 1; j < block->num_preds(); j++) {
Block* pb = _cfg.get_block_for_node(block->pred(j));
// Remove copies along phi edges
for (uint k = 1; k < phi_dex; k++) {
elide_copy(block->get_node(k), j, block, *blk2value[pb->_pre_order], *blk2regnd[pb->_pre_order], false);
}
if (blk2value[pb->_pre_order]) { // Have a mapping on this edge?
// See if this predecessor's mappings have been used by everybody
// who wants them. If so, free 'em.
uint k;
for (k = 0; k < pb->_num_succs; k++) {
Block* pbsucc = pb->_succs[k];
if (!blk2value[pbsucc->_pre_order] && pbsucc != block) {
break; // Found a future user
}
}
if (k >= pb->_num_succs) { // No more uses, free!
freed = pb; // Record last block freed
free_list.push(blk2value[pb->_pre_order]);
free_list.push(blk2regnd[pb->_pre_order]);
}
} else { // This block has unvisited (loopback) inputs
missing_some_inputs = true;
}
}
// Extract Node_List mappings. If 'freed' is non-zero, we just popped
// 'freed's blocks off the list
Node_List ®nd = *(free_list.is_empty() ? new Node_List() : free_list.pop());
Node_List &value = *(free_list.is_empty() ? new Node_List() : free_list.pop());
assert( !freed || blk2value[freed->_pre_order] == &value, "" );
value.map(_max_reg,NULL);
regnd.map(_max_reg,NULL);
// Set mappings as OUR mappings
blk2value[block->_pre_order] = &value;
blk2regnd[block->_pre_order] = ®nd;
// Initialize value & regnd for this block
if (missing_some_inputs) {
// Some predecessor has not yet been visited; zap map to empty
for (uint k = 0; k < (uint)_max_reg; k++) {
value.map(k,NULL);
regnd.map(k,NULL);
}
} else {
if( !freed ) { // Didn't get a freebie prior block
// Must clone some data
freed = _cfg.get_block_for_node(block->pred(1));
Node_List &f_value = *blk2value[freed->_pre_order];
Node_List &f_regnd = *blk2regnd[freed->_pre_order];
for( uint k = 0; k < (uint)_max_reg; k++ ) {
value.map(k,f_value[k]);
regnd.map(k,f_regnd[k]);
}
}
// Merge all inputs together, setting to NULL any conflicts.
for (j = 1; j < block->num_preds(); j++) {
Block* pb = _cfg.get_block_for_node(block->pred(j));
if (pb == freed) {
continue; // Did self already via freelist
}
Node_List &p_regnd = *blk2regnd[pb->_pre_order];
for( uint k = 0; k < (uint)_max_reg; k++ ) {
if( regnd[k] != p_regnd[k] ) { // Conflict on reaching defs?
value.map(k,NULL); // Then no value handy
regnd.map(k,NULL);
}
}
}
}
// For all Phi's
for (j = 1; j < phi_dex; j++) {
uint k;
Node *phi = block->get_node(j);
uint pidx = _lrg_map.live_range_id(phi);
OptoReg::Name preg = lrgs(_lrg_map.live_range_id(phi)).reg();
// Remove copies remaining on edges. Check for junk phi.
Node *u = NULL;
for (k = 1; k < phi->req(); k++) {
Node *x = phi->in(k);
if( phi != x && u != x ) // Found a different input
u = u ? NodeSentinel : x; // Capture unique input, or NodeSentinel for 2nd input
}
if (u != NodeSentinel) { // Junk Phi. Remove
phi->replace_by(u);
j -= yank_if_dead(phi, block, &value, ®nd);
phi_dex--;
continue;
}
// Note that if value[pidx] exists, then we merged no new values here
// and the phi is useless. This can happen even with the above phi
// removal for complex flows. I cannot keep the better known value here
// because locally the phi appears to define a new merged value. If I
// keep the better value then a copy of the phi, being unable to use the
// global flow analysis, can't "peek through" the phi to the original
// reaching value and so will act like it's defining a new value. This
// can lead to situations where some uses are from the old and some from
// the new values. Not illegal by itself but throws the over-strong
// assert in scheduling.
if( pidx ) {
value.map(preg,phi);
regnd.map(preg,phi);
int n_regs = RegMask::num_registers(phi->ideal_reg());
for (int l = 1; l < n_regs; l++) {
OptoReg::Name preg_lo = OptoReg::add(preg,-l);
value.map(preg_lo,phi);
regnd.map(preg_lo,phi);
}
}
}
// For all remaining instructions
for (j = phi_dex; j < block->number_of_nodes(); j++) {
Node* n = block->get_node(j);
if(n->outcnt() == 0 && // Dead?
n != C->top() && // (ignore TOP, it has no du info)
!n->is_Proj() ) { // fat-proj kills
j -= yank_if_dead(n, block, &value, ®nd);
continue;
}
// Improve reaching-def info. Occasionally post-alloc's liveness gives
// up (at loop backedges, because we aren't doing a full flow pass).
// The presence of a live use essentially asserts that the use's def is
// alive and well at the use (or else the allocator fubar'd). Take
// advantage of this info to set a reaching def for the use-reg.
uint k;
for (k = 1; k < n->req(); k++) {
Node *def = n->in(k); // n->in(k) is a USE; def is the DEF for this USE
guarantee(def != NULL, "no disconnected nodes at this point");
uint useidx = _lrg_map.live_range_id(def); // useidx is the live range index for this USE
if( useidx ) {
OptoReg::Name ureg = lrgs(useidx).reg();
if( !value[ureg] ) {
int idx; // Skip occasional useless copy
while( (idx=def->is_Copy()) != 0 &&
def->in(idx) != NULL && // NULL should not happen
ureg == lrgs(_lrg_map.live_range_id(def->in(idx))).reg())
def = def->in(idx);
Node *valdef = skip_copies(def); // tighten up val through non-useless copies
value.map(ureg,valdef); // record improved reaching-def info
regnd.map(ureg, def);
// Record other half of doubles
uint def_ideal_reg = def->ideal_reg();
int n_regs = RegMask::num_registers(def_ideal_reg);
for (int l = 1; l < n_regs; l++) {
OptoReg::Name ureg_lo = OptoReg::add(ureg,-l);
if (!value[ureg_lo] &&
(!RegMask::can_represent(ureg_lo) ||
lrgs(useidx).mask().Member(ureg_lo))) { // Nearly always adjacent
value.map(ureg_lo,valdef); // record improved reaching-def info
regnd.map(ureg_lo, def);
}
}
}
}
}
const uint two_adr = n->is_Mach() ? n->as_Mach()->two_adr() : 0;
// Remove copies along input edges
for (k = 1; k < n->req(); k++) {
j -= elide_copy(n, k, block, value, regnd, two_adr != k);
}
// Unallocated Nodes define no registers
uint lidx = _lrg_map.live_range_id(n);
if (!lidx) {
continue;
}
// Update the register defined by this instruction
OptoReg::Name nreg = lrgs(lidx).reg();
// Skip through all copies to the _value_ being defined.
// Do not change from int to pointer
Node *val = skip_copies(n);
// Clear out a dead definition before starting so that the
// elimination code doesn't have to guard against it. The
// definition could in fact be a kill projection with a count of
// 0 which is safe but since those are uninteresting for copy
// elimination just delete them as well.
if (regnd[nreg] != NULL && regnd[nreg]->outcnt() == 0) {
regnd.map(nreg, NULL);
value.map(nreg, NULL);
}
uint n_ideal_reg = n->ideal_reg();
int n_regs = RegMask::num_registers(n_ideal_reg);
if (n_regs == 1) {
// If Node 'n' does not change the value mapped by the register,
// then 'n' is a useless copy. Do not update the register->node
// mapping so 'n' will go dead.
if( value[nreg] != val ) {
if (eliminate_copy_of_constant(val, n, block, value, regnd, nreg, OptoReg::Bad)) {
j -= replace_and_yank_if_dead(n, nreg, block, value, regnd);
} else {
// Update the mapping: record new Node defined by the register
regnd.map(nreg,n);
// Update mapping for defined *value*, which is the defined
// Node after skipping all copies.
value.map(nreg,val);
}
} else if( !may_be_copy_of_callee(n) ) {
assert(n->is_Copy(), "");
j -= replace_and_yank_if_dead(n, nreg, block, value, regnd);
}
} else if (RegMask::is_vector(n_ideal_reg)) {
// If Node 'n' does not change the value mapped by the register,
// then 'n' is a useless copy. Do not update the register->node
// mapping so 'n' will go dead.
if (!register_contains_value(val, nreg, n_regs, value)) {
// Update the mapping: record new Node defined by the register
regnd.map(nreg,n);
// Update mapping for defined *value*, which is the defined
// Node after skipping all copies.
value.map(nreg,val);
for (int l = 1; l < n_regs; l++) {
OptoReg::Name nreg_lo = OptoReg::add(nreg,-l);
regnd.map(nreg_lo, n );
value.map(nreg_lo,val);
}
} else if (n->is_Copy()) {
// Note: vector can't be constant and can't be copy of calee.
j -= replace_and_yank_if_dead(n, nreg, block, value, regnd);
}
} else {
// If the value occupies a register pair, record same info
// in both registers.
OptoReg::Name nreg_lo = OptoReg::add(nreg,-1);
if( RegMask::can_represent(nreg_lo) && // Either a spill slot, or
!lrgs(lidx).mask().Member(nreg_lo) ) { // Nearly always adjacent
// Sparc occasionally has non-adjacent pairs.
// Find the actual other value
RegMask tmp = lrgs(lidx).mask();
tmp.Remove(nreg);
nreg_lo = tmp.find_first_elem();
}
if (value[nreg] != val || value[nreg_lo] != val) {
if (eliminate_copy_of_constant(val, n, block, value, regnd, nreg, nreg_lo)) {
j -= replace_and_yank_if_dead(n, nreg, block, value, regnd);
} else {
regnd.map(nreg , n );
regnd.map(nreg_lo, n );
value.map(nreg ,val);
value.map(nreg_lo,val);
}
} else if (!may_be_copy_of_callee(n)) {
assert(n->is_Copy(), "");
j -= replace_and_yank_if_dead(n, nreg, block, value, regnd);
}
}
// Fat projections kill many registers
if( n_ideal_reg == MachProjNode::fat_proj ) {
RegMask rm = n->out_RegMask();
// wow, what an expensive iterator...
nreg = rm.find_first_elem();
while( OptoReg::is_valid(nreg)) {
rm.Remove(nreg);
value.map(nreg,n);
regnd.map(nreg,n);
nreg = rm.find_first_elem();
}
}
} // End of for all instructions in the block
} // End for all blocks
}
C:\hotspot-69087d08d473\src\share\vm/opto/regalloc.cpp
/*
* Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
#include "precompiled.hpp"
#include "opto/regalloc.hpp"
static const int NodeRegsOverflowSize = 200;
void (*PhaseRegAlloc::_alloc_statistics[MAX_REG_ALLOCATORS])();
int PhaseRegAlloc::_num_allocators = 0;
#ifndef PRODUCT
int PhaseRegAlloc::_total_framesize = 0;
int PhaseRegAlloc::_max_framesize = 0;
#endif
PhaseRegAlloc::PhaseRegAlloc( uint unique, PhaseCFG &cfg,
Matcher &matcher,
void (*pr_stats)() ):
Phase(Register_Allocation), _cfg(cfg), _matcher(matcher),
_node_oops(Thread::current()->resource_area()),
_node_regs(0),
_node_regs_max_index(0),
_framesize(0xdeadbeef)
{
int i;
for (i=0; i < _num_allocators; i++) {
if (_alloc_statistics[i] == pr_stats)
return;
}
assert((_num_allocators + 1) < MAX_REG_ALLOCATORS, "too many register allocators");
_alloc_statistics[_num_allocators++] = pr_stats;
}
//------------------------------reg2offset-------------------------------------
int PhaseRegAlloc::reg2offset_unchecked( OptoReg::Name reg ) const {
// Slots below _max_in_arg_stack_reg are offset by the entire frame.
// Slots above _max_in_arg_stack_reg are frame_slots and are not offset.
int slot = (reg < _matcher._new_SP)
? reg - OptoReg::stack0() + _framesize
: reg - _matcher._new_SP;
// Note: We use the direct formula (reg - SharedInfo::stack0) instead of
// OptoReg::reg2stack(reg), in order to avoid asserts in the latter
// function. This routine must remain unchecked, so that dump_frame()
// can do its work undisturbed.
// %%% not really clear why reg2stack would assert here
return slot*VMRegImpl::stack_slot_size;
}
int PhaseRegAlloc::reg2offset( OptoReg::Name reg ) const {
// Not allowed in the out-preserve area.
// In-preserve area is allowed so Intel can fetch the return pc out.
assert( reg < _matcher._old_SP ||
(reg >= OptoReg::add(_matcher._old_SP,C->out_preserve_stack_slots()) &&
reg < _matcher._in_arg_limit) ||
reg >= OptoReg::add(_matcher._new_SP, C->out_preserve_stack_slots()) ||
// Allow return_addr in the out-preserve area.
reg == _matcher.return_addr(),
"register allocated in a preserve area" );
return reg2offset_unchecked( reg );
}
//------------------------------offset2reg-------------------------------------
OptoReg::Name PhaseRegAlloc::offset2reg(int stk_offset) const {
int slot = stk_offset / jintSize;
int reg = (slot < (int) _framesize)
? slot + _matcher._new_SP
: OptoReg::stack2reg(slot) - _framesize;
assert(stk_offset == reg2offset((OptoReg::Name) reg),
"offset2reg does not invert properly");
return (OptoReg::Name) reg;
}
//------------------------------set_oop----------------------------------------
void PhaseRegAlloc::set_oop( const Node *n, bool is_an_oop ) {
if( is_an_oop ) {
_node_oops.set(n->_idx);
}
}
//------------------------------is_oop-----------------------------------------
bool PhaseRegAlloc::is_oop( const Node *n ) const {
return _node_oops.test(n->_idx) != 0;
}
// Allocate _node_regs table with at least "size" elements
void PhaseRegAlloc::alloc_node_regs(int size) {
_node_regs_max_index = size + (size >> 1) + NodeRegsOverflowSize;
_node_regs = NEW_RESOURCE_ARRAY( OptoRegPair, _node_regs_max_index );
// We assume our caller will fill in all elements up to size-1, so
// only the extra space we allocate is initialized here.
for( uint i = size; i < _node_regs_max_index; ++i )
_node_regs[i].set_bad();
}
#ifndef PRODUCT
void
PhaseRegAlloc::print_statistics() {
tty->print_cr("Total frameslots = %d, Max frameslots = %d", _total_framesize, _max_framesize);
int i;
for (i=0; i < _num_allocators; i++) {
_alloc_statistics[i]();
}
}
#endif
C:\hotspot-69087d08d473\src\share\vm/opto/regalloc.hpp
/*
* Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
#ifndef SHARE_VM_OPTO_REGALLOC_HPP
#define SHARE_VM_OPTO_REGALLOC_HPP
#include "code/vmreg.hpp"
#include "opto/block.hpp"
#include "opto/matcher.hpp"
#include "opto/phase.hpp"
class Node;
class Matcher;
class PhaseCFG;
#define MAX_REG_ALLOCATORS 10
//------------------------------PhaseRegAlloc------------------------------------
// Abstract register allocator
class PhaseRegAlloc : public Phase {
friend class VMStructs;
static void (*_alloc_statistics[MAX_REG_ALLOCATORS])();
static int _num_allocators;
protected:
OptoRegPair *_node_regs;
uint _node_regs_max_index;
VectorSet _node_oops; // Mapping from node indices to oopiness
void alloc_node_regs(int size); // allocate _node_regs table with at least "size" elements
PhaseRegAlloc( uint unique, PhaseCFG &cfg, Matcher &matcher,
void (*pr_stats)());
public:
PhaseCFG &_cfg; // Control flow graph
uint _framesize; // Size of frame in stack-slots. not counting preserve area
OptoReg::Name _max_reg; // Past largest register seen
Matcher &_matcher; // Convert Ideal to MachNodes
uint node_regs_max_index() const { return _node_regs_max_index; }
// Get the register associated with the Node
OptoReg::Name get_reg_first( const Node *n ) const {
debug_only( if( n->_idx >= _node_regs_max_index ) n->dump(); );
assert( n->_idx < _node_regs_max_index, "Exceeded _node_regs array");
return _node_regs[n->_idx].first();
}
OptoReg::Name get_reg_second( const Node *n ) const {
debug_only( if( n->_idx >= _node_regs_max_index ) n->dump(); );
assert( n->_idx < _node_regs_max_index, "Exceeded _node_regs array");
return _node_regs[n->_idx].second();
}
// Do all the real work of allocate
virtual void Register_Allocate() = 0;
// notify the register allocator that "node" is a new reference
// to the value produced by "old_node"
virtual void add_reference( const Node *node, const Node *old_node) = 0;
// Set the register associated with a new Node
void set_bad( uint idx ) {
assert( idx < _node_regs_max_index, "Exceeded _node_regs array");
_node_regs[idx].set_bad();
}
void set1( uint idx, OptoReg::Name reg ) {
assert( idx < _node_regs_max_index, "Exceeded _node_regs array");
_node_regs[idx].set1(reg);
}
void set2( uint idx, OptoReg::Name reg ) {
assert( idx < _node_regs_max_index, "Exceeded _node_regs array");
_node_regs[idx].set2(reg);
}
void set_pair( uint idx, OptoReg::Name hi, OptoReg::Name lo ) {
assert( idx < _node_regs_max_index, "Exceeded _node_regs array");
_node_regs[idx].set_pair(hi, lo);
}
void set_ptr( uint idx, OptoReg::Name reg ) {
assert( idx < _node_regs_max_index, "Exceeded _node_regs array");
_node_regs[idx].set_ptr(reg);
}
// Set and query if a node produces an oop
void set_oop( const Node *n, bool );
bool is_oop( const Node *n ) const;
// Convert a register number to a stack offset
int reg2offset ( OptoReg::Name reg ) const;
int reg2offset_unchecked( OptoReg::Name reg ) const;
// Convert a stack offset to a register number
OptoReg::Name offset2reg( int stk_offset ) const;
// Get the register encoding associated with the Node
int get_encode(const Node *n) const {
assert( n->_idx < _node_regs_max_index, "Exceeded _node_regs array");
OptoReg::Name first = _node_regs[n->_idx].first();
OptoReg::Name second = _node_regs[n->_idx].second();
assert( !OptoReg::is_valid(second) || second == first+1, "" );
assert(OptoReg::is_reg(first), "out of range");
return Matcher::_regEncode[first];
}
#ifndef PRODUCT
static int _total_framesize;
static int _max_framesize;
virtual void dump_frame() const = 0;
virtual char *dump_register( const Node *n, char *buf ) const = 0;
static void print_statistics();
#endif
};
#endif // SHARE_VM_OPTO_REGALLOC_HPP
C:\hotspot-69087d08d473\src\share\vm/opto/regmask.cpp
/*
* Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
#include "precompiled.hpp"
#include "opto/compile.hpp"
#include "opto/regmask.hpp"
#if defined AD_MD_HPP
# include AD_MD_HPP
#elif defined TARGET_ARCH_MODEL_x86_32
# include "adfiles/ad_x86_32.hpp"
#elif defined TARGET_ARCH_MODEL_x86_64
# include "adfiles/ad_x86_64.hpp"
#elif defined TARGET_ARCH_MODEL_aarch64
# include "adfiles/ad_aarch64.hpp"
#elif defined TARGET_ARCH_MODEL_sparc
# include "adfiles/ad_sparc.hpp"
#elif defined TARGET_ARCH_MODEL_zero
# include "adfiles/ad_zero.hpp"
#elif defined TARGET_ARCH_MODEL_ppc_64
# include "adfiles/ad_ppc_64.hpp"
#endif
#define RM_SIZE _RM_SIZE /* a constant private to the class RegMask */
//-------------Non-zero bit search methods used by RegMask---------------------
// Find lowest 1, or return 32 if empty
int find_lowest_bit( uint32 mask ) {
int n = 0;
if( (mask & 0xffff) == 0 ) {
mask >>= 16;
n += 16;
}
if( (mask & 0xff) == 0 ) {
mask >>= 8;
n += 8;
}
if( (mask & 0xf) == 0 ) {
mask >>= 4;
n += 4;
}
if( (mask & 0x3) == 0 ) {
mask >>= 2;
n += 2;
}
if( (mask & 0x1) == 0 ) {
mask >>= 1;
n += 1;
}
if( mask == 0 ) {
n = 32;
}
return n;
}
// Find highest 1, or return 32 if empty
int find_hihghest_bit( uint32 mask ) {
int n = 0;
if( mask > 0xffff ) {
mask >>= 16;
n += 16;
}
if( mask > 0xff ) {
mask >>= 8;
n += 8;
}
if( mask > 0xf ) {
mask >>= 4;
n += 4;
}
if( mask > 0x3 ) {
mask >>= 2;
n += 2;
}
if( mask > 0x1 ) {
mask >>= 1;
n += 1;
}
if( mask == 0 ) {
n = 32;
}
return n;
}
//------------------------------dump-------------------------------------------
#ifndef PRODUCT
void OptoReg::dump(int r, outputStream *st) {
switch (r) {
case Special: st->print("r---"); break;
case Bad: st->print("rBAD"); break;
default:
if (r < _last_Mach_Reg) st->print("%s", Matcher::regName[r]);
else st->print("rS%d",r);
break;
}
}
#endif
//=============================================================================
const RegMask RegMask::Empty(
# define BODY(I) 0,
FORALL_BODY
# undef BODY
0
);
//=============================================================================
bool RegMask::is_vector(uint ireg) {
return (ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY);
}
int RegMask::num_registers(uint ireg) {
switch(ireg) {
case Op_VecY:
return 8;
case Op_VecX:
return 4;
case Op_VecD:
case Op_RegD:
case Op_RegL:
#ifdef _LP64
case Op_RegP:
#endif
return 2;
}
// Op_VecS and the rest ideal registers.
return 1;
}
//------------------------------find_first_pair--------------------------------
// Find the lowest-numbered register pair in the mask. Return the
// HIGHEST register number in the pair, or BAD if no pairs.
OptoReg::Name RegMask::find_first_pair() const {
verify_pairs();
for( int i = 0; i < RM_SIZE; i++ ) {
if( _A[i] ) { // Found some bits
int bit = _A[i] & -_A[i]; // Extract low bit
// Convert to bit number, return hi bit in pair
return OptoReg::Name((i<<_LogWordBits)+find_lowest_bit(bit)+1);
}
}
return OptoReg::Bad;
}
//------------------------------ClearToPairs-----------------------------------
// Clear out partial bits; leave only bit pairs
void RegMask::clear_to_pairs() {
for( int i = 0; i < RM_SIZE; i++ ) {
int bits = _A[i];
bits &= ((bits & 0x55555555)<<1); // 1 hi-bit set for each pair
bits |= (bits>>1); // Smear 1 hi-bit into a pair
_A[i] = bits;
}
verify_pairs();
}
//------------------------------SmearToPairs-----------------------------------
// Smear out partial bits; leave only bit pairs
void RegMask::smear_to_pairs() {
for( int i = 0; i < RM_SIZE; i++ ) {
int bits = _A[i];
bits |= ((bits & 0x55555555)<<1); // Smear lo bit hi per pair
bits |= ((bits & 0xAAAAAAAA)>>1); // Smear hi bit lo per pair
_A[i] = bits;
}
verify_pairs();
}
//------------------------------is_aligned_pairs-------------------------------
bool RegMask::is_aligned_pairs() const {
// Assert that the register mask contains only bit pairs.
for( int i = 0; i < RM_SIZE; i++ ) {
int bits = _A[i];
while( bits ) { // Check bits for pairing
int bit = bits & -bits; // Extract low bit
// Low bit is not odd means its mis-aligned.
if( (bit & 0x55555555) == 0 ) return false;
bits -= bit; // Remove bit from mask
// Check for aligned adjacent bit
if( (bits & (bit<<1)) == 0 ) return false;
bits -= (bit<<1); // Remove other halve of pair
}
}
return true;
}
//------------------------------is_bound1--------------------------------------
// Return TRUE if the mask contains a single bit
int RegMask::is_bound1() const {
if( is_AllStack() ) return false;
int bit = -1; // Set to hold the one bit allowed
for( int i = 0; i < RM_SIZE; i++ ) {
if( _A[i] ) { // Found some bits
if( bit != -1 ) return false; // Already had bits, so fail
bit = _A[i] & -_A[i]; // Extract 1 bit from mask
if( bit != _A[i] ) return false; // Found many bits, so fail
}
}
// True for both the empty mask and for a single bit
return true;
}
//------------------------------is_bound2--------------------------------------
// Return TRUE if the mask contains an adjacent pair of bits and no other bits.
int RegMask::is_bound_pair() const {
if( is_AllStack() ) return false;
int bit = -1; // Set to hold the one bit allowed
for( int i = 0; i < RM_SIZE; i++ ) {
if( _A[i] ) { // Found some bits
if( bit != -1 ) return false; // Already had bits, so fail
bit = _A[i] & -(_A[i]); // Extract 1 bit from mask
if( (bit << 1) != 0 ) { // Bit pair stays in same word?
if( (bit | (bit<<1)) != _A[i] )
return false; // Require adjacent bit pair and no more bits
} else { // Else its a split-pair case
if( bit != _A[i] ) return false; // Found many bits, so fail
i++; // Skip iteration forward
if( i >= RM_SIZE || _A[i] != 1 )
return false; // Require 1 lo bit in next word
}
}
}
// True for both the empty mask and for a bit pair
return true;
}
static int low_bits[3] = { 0x55555555, 0x11111111, 0x01010101 };
//------------------------------find_first_set---------------------------------
// Find the lowest-numbered register set in the mask. Return the
// HIGHEST register number in the set, or BAD if no sets.
// Works also for size 1.
OptoReg::Name RegMask::find_first_set(const int size) const {
verify_sets(size);
for (int i = 0; i < RM_SIZE; i++) {
if (_A[i]) { // Found some bits
int bit = _A[i] & -_A[i]; // Extract low bit
// Convert to bit number, return hi bit in pair
return OptoReg::Name((i<<_LogWordBits)+find_lowest_bit(bit)+(size-1));
}
}
return OptoReg::Bad;
}
//------------------------------clear_to_sets----------------------------------
// Clear out partial bits; leave only aligned adjacent bit pairs
void RegMask::clear_to_sets(const int size) {
if (size == 1) return;
assert(2 <= size && size <= 8, "update low bits table");
assert(is_power_of_2(size), "sanity");
int low_bits_mask = low_bits[size>>2];
for (int i = 0; i < RM_SIZE; i++) {
int bits = _A[i];
int sets = (bits & low_bits_mask);
for (int j = 1; j < size; j++) {
sets = (bits & (sets<<1)); // filter bits which produce whole sets
}
sets |= (sets>>1); // Smear 1 hi-bit into a set
if (size > 2) {
sets |= (sets>>2); // Smear 2 hi-bits into a set
if (size > 4) {
sets |= (sets>>4); // Smear 4 hi-bits into a set
}
}
_A[i] = sets;
}
verify_sets(size);
}
//------------------------------smear_to_sets----------------------------------
// Smear out partial bits to aligned adjacent bit sets
void RegMask::smear_to_sets(const int size) {
if (size == 1) return;
assert(2 <= size && size <= 8, "update low bits table");
assert(is_power_of_2(size), "sanity");
int low_bits_mask = low_bits[size>>2];
for (int i = 0; i < RM_SIZE; i++) {
int bits = _A[i];
int sets = 0;
for (int j = 0; j < size; j++) {
sets |= (bits & low_bits_mask); // collect partial bits
bits = bits>>1;
}
sets |= (sets<<1); // Smear 1 lo-bit into a set
if (size > 2) {
sets |= (sets<<2); // Smear 2 lo-bits into a set
if (size > 4) {
sets |= (sets<<4); // Smear 4 lo-bits into a set
}
}
_A[i] = sets;
}
verify_sets(size);
}
//------------------------------is_aligned_set--------------------------------
bool RegMask::is_aligned_sets(const int size) const {
if (size == 1) return true;
assert(2 <= size && size <= 8, "update low bits table");
assert(is_power_of_2(size), "sanity");
int low_bits_mask = low_bits[size>>2];
// Assert that the register mask contains only bit sets.
for (int i = 0; i < RM_SIZE; i++) {
int bits = _A[i];
while (bits) { // Check bits for pairing
int bit = bits & -bits; // Extract low bit
// Low bit is not odd means its mis-aligned.
if ((bit & low_bits_mask) == 0) return false;
// Do extra work since (bit << size) may overflow.
int hi_bit = bit << (size-1); // high bit
int set = hi_bit + ((hi_bit-1) & ~(bit-1));
// Check for aligned adjacent bits in this set
if ((bits & set) != set) return false;
bits -= set; // Remove this set
}
}
return true;
}
//------------------------------is_bound_set-----------------------------------
// Return TRUE if the mask contains one adjacent set of bits and no other bits.
// Works also for size 1.
int RegMask::is_bound_set(const int size) const {
if( is_AllStack() ) return false;
assert(1 <= size && size <= 8, "update low bits table");
int bit = -1; // Set to hold the one bit allowed
for (int i = 0; i < RM_SIZE; i++) {
if (_A[i] ) { // Found some bits
if (bit != -1)
return false; // Already had bits, so fail
bit = _A[i] & -_A[i]; // Extract low bit from mask
int hi_bit = bit << (size-1); // high bit
if (hi_bit != 0) { // Bit set stays in same word?
int set = hi_bit + ((hi_bit-1) & ~(bit-1));
if (set != _A[i])
return false; // Require adjacent bit set and no more bits
} else { // Else its a split-set case
if (((-1) & ~(bit-1)) != _A[i])
return false; // Found many bits, so fail
i++; // Skip iteration forward and check high part
// The lower 24 bits should be 0 since it is split case and size <= 8.
int set = bit>>24;
set = set & -set; // Remove sign extension.
set = (((set << size) - 1) >> 8);
if (i >= RM_SIZE || _A[i] != set)
return false; // Require expected low bits in next word
}
}
}
// True for both the empty mask and for a bit set
return true;
}
//------------------------------is_UP------------------------------------------
// UP means register only, Register plus stack, or stack only is DOWN
bool RegMask::is_UP() const {
// Quick common case check for DOWN (any stack slot is legal)
if( is_AllStack() )
return false;
// Slower check for any stack bits set (also DOWN)
if( overlap(Matcher::STACK_ONLY_mask) )
return false;
// Not DOWN, so must be UP
return true;
}
//------------------------------Size-------------------------------------------
// Compute size of register mask in bits
uint RegMask::Size() const {
extern uint8 bitsInByte[256];
uint sum = 0;
for( int i = 0; i < RM_SIZE; i++ )
sum +=
bitsInByte[(_A[i]>>24) & 0xff] +
bitsInByte[(_A[i]>>16) & 0xff] +
bitsInByte[(_A[i]>> 8) & 0xff] +
bitsInByte[ _A[i] & 0xff];
return sum;
}
#ifndef PRODUCT
//------------------------------print------------------------------------------
void RegMask::dump(outputStream *st) const {
st->print("[");
RegMask rm = *this; // Structure copy into local temp
OptoReg::Name start = rm.find_first_elem(); // Get a register
if (OptoReg::is_valid(start)) { // Check for empty mask
rm.Remove(start); // Yank from mask
OptoReg::dump(start, st); // Print register
OptoReg::Name last = start;
// Now I have printed an initial register.
// Print adjacent registers as "rX-rZ" instead of "rX,rY,rZ".
// Begin looping over the remaining registers.
while (1) { //
OptoReg::Name reg = rm.find_first_elem(); // Get a register
if (!OptoReg::is_valid(reg))
break; // Empty mask, end loop
rm.Remove(reg); // Yank from mask
if (last+1 == reg) { // See if they are adjacent
// Adjacent registers just collect into long runs, no printing.
last = reg;
} else { // Ending some kind of run
if (start == last) { // 1-register run; no special printing
} else if (start+1 == last) {
st->print(","); // 2-register run; print as "rX,rY"
OptoReg::dump(last, st);
} else { // Multi-register run; print as "rX-rZ"
st->print("-");
OptoReg::dump(last, st);
}
st->print(","); // Seperate start of new run
start = last = reg; // Start a new register run
OptoReg::dump(start, st); // Print register
} // End of if ending a register run or not
} // End of while regmask not empty
if (start == last) { // 1-register run; no special printing
} else if (start+1 == last) {
st->print(","); // 2-register run; print as "rX,rY"
OptoReg::dump(last, st);
} else { // Multi-register run; print as "rX-rZ"
st->print("-");
OptoReg::dump(last, st);
}
if (rm.is_AllStack()) st->print("...");
}
st->print("]");
}
#endif
C:\hotspot-69087d08d473\src\share\vm/opto/regmask.hpp
/*
* Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
#ifndef SHARE_VM_OPTO_REGMASK_HPP
#define SHARE_VM_OPTO_REGMASK_HPP
#include "code/vmreg.hpp"
#include "libadt/port.hpp"
#include "opto/optoreg.hpp"
#if defined ADGLOBALS_MD_HPP
# include ADGLOBALS_MD_HPP
#elif defined TARGET_ARCH_MODEL_x86_32
# include "adfiles/adGlobals_x86_32.hpp"
#elif defined TARGET_ARCH_MODEL_x86_64
# include "adfiles/adGlobals_x86_64.hpp"
#elif defined TARGET_ARCH_MODEL_aarch64
# include "adfiles/adGlobals_aarch64.hpp"
#elif defined TARGET_ARCH_MODEL_sparc
# include "adfiles/adGlobals_sparc.hpp"
#elif defined TARGET_ARCH_MODEL_zero
# include "adfiles/adGlobals_zero.hpp"
#elif defined TARGET_ARCH_MODEL_ppc_64
# include "adfiles/adGlobals_ppc_64.hpp"
#endif
// Some fun naming (textual) substitutions:
//
// RegMask::get_low_elem() ==> RegMask::find_first_elem()
// RegMask::Special ==> RegMask::Empty
// RegMask::_flags ==> RegMask::is_AllStack()
// RegMask::operator<<=() ==> RegMask::Insert()
// RegMask::operator>>=() ==> RegMask::Remove()
// RegMask::Union() ==> RegMask::OR
// RegMask::Inter() ==> RegMask::AND
//
// OptoRegister::RegName ==> OptoReg::Name
//
// OptoReg::stack0() ==> _last_Mach_Reg or ZERO in core version
//
// numregs in chaitin ==> proper degree in chaitin
//-------------Non-zero bit search methods used by RegMask---------------------
// Find lowest 1, or return 32 if empty
int find_lowest_bit( uint32 mask );
// Find highest 1, or return 32 if empty
int find_hihghest_bit( uint32 mask );
//------------------------------RegMask----------------------------------------
// The ADL file describes how to print the machine-specific registers, as well
// as any notion of register classes. We provide a register mask, which is
// just a collection of Register numbers.
// The ADLC defines 2 macros, RM_SIZE and FORALL_BODY.
// RM_SIZE is the size of a register mask in words.
// FORALL_BODY replicates a BODY macro once per word in the register mask.
// The usage is somewhat clumsy and limited to the regmask.[h,c]pp files.
// However, it means the ADLC can redefine the unroll macro and all loops
// over register masks will be unrolled by the correct amount.
class RegMask VALUE_OBJ_CLASS_SPEC {
union {
double _dummy_force_double_alignment[RM_SIZE>>1];
// Array of Register Mask bits. This array is large enough to cover
// all the machine registers and all parameters that need to be passed
// on the stack (stack registers) up to some interesting limit. Methods
// that need more parameters will NOT be compiled. On Intel, the limit
// is something like 90+ parameters.
int _A[RM_SIZE];
};
enum {
_WordBits = BitsPerInt,
_LogWordBits = LogBitsPerInt,
_RM_SIZE = RM_SIZE // local constant, imported, then hidden by #undef
};
public:
enum { CHUNK_SIZE = RM_SIZE*_WordBits };
// SlotsPerLong is 2, since slots are 32 bits and longs are 64 bits.
// Also, consider the maximum alignment size for a normally allocated
// value. Since we allocate register pairs but not register quads (at
// present), this alignment is SlotsPerLong (== 2). A normally
// aligned allocated register is either a single register, or a pair
// of adjacent registers, the lower-numbered being even.
// See also is_aligned_Pairs() below, and the padding added before
// Matcher::_new_SP to keep allocated pairs aligned properly.
// If we ever go to quad-word allocations, SlotsPerQuad will become
// the controlling alignment constraint. Note that this alignment
// requirement is internal to the allocator, and independent of any
// particular platform.
enum { SlotsPerLong = 2,
SlotsPerVecS = 1,
SlotsPerVecD = 2,
SlotsPerVecX = 4,
SlotsPerVecY = 8 };
// A constructor only used by the ADLC output. All mask fields are filled
// in directly. Calls to this look something like RM(1,2,3,4);
RegMask(
# define BODY(I) int a##I,
FORALL_BODY
# undef BODY
int dummy = 0 ) {
# define BODY(I) _A[I] = a##I;
FORALL_BODY
# undef BODY
}
// Handy copying constructor
RegMask( RegMask *rm ) {
# define BODY(I) _A[I] = rm->_A[I];
FORALL_BODY
# undef BODY
}
// Construct an empty mask
RegMask( ) { Clear(); }
// Construct a mask with a single bit
RegMask( OptoReg::Name reg ) { Clear(); Insert(reg); }
// Check for register being in mask
int Member( OptoReg::Name reg ) const {
assert( reg < CHUNK_SIZE, "" );
return _A[reg>>_LogWordBits] & (1<<(reg&(_WordBits-1)));
}
// The last bit in the register mask indicates that the mask should repeat
// indefinitely with ONE bits. Returns TRUE if mask is infinite or
// unbounded in size. Returns FALSE if mask is finite size.
int is_AllStack() const { return _A[RM_SIZE-1] >> (_WordBits-1); }
// Work around an -xO3 optimization problme in WS6U1. The old way:
// void set_AllStack() { _A[RM_SIZE-1] |= (1<<(_WordBits-1)); }
// will cause _A[RM_SIZE-1] to be clobbered, not updated when set_AllStack()
// follows an Insert() loop, like the one found in init_spill_mask(). Using
// Insert() instead works because the index into _A in computed instead of
// constant. See bug 4665841.
void set_AllStack() { Insert(OptoReg::Name(CHUNK_SIZE-1)); }
// Test for being a not-empty mask.
int is_NotEmpty( ) const {
int tmp = 0;
# define BODY(I) tmp |= _A[I];
FORALL_BODY
# undef BODY
return tmp;
}
// Find lowest-numbered register from mask, or BAD if mask is empty.
OptoReg::Name find_first_elem() const {
int base, bits;
# define BODY(I) if( (bits = _A[I]) != 0 ) base = I<<_LogWordBits; else
FORALL_BODY
# undef BODY
{ base = OptoReg::Bad; bits = 1<<0; }
return OptoReg::Name(base + find_lowest_bit(bits));
}
// Get highest-numbered register from mask, or BAD if mask is empty.
OptoReg::Name find_last_elem() const {
int base, bits;
# define BODY(I) if( (bits = _A[RM_SIZE-1-I]) != 0 ) base = (RM_SIZE-1-I)<<_LogWordBits; else
FORALL_BODY
# undef BODY
{ base = OptoReg::Bad; bits = 1<<0; }
return OptoReg::Name(base + find_hihghest_bit(bits));
}
// Find the lowest-numbered register pair in the mask. Return the
// HIGHEST register number in the pair, or BAD if no pairs.
// Assert that the mask contains only bit pairs.
OptoReg::Name find_first_pair() const;
// Clear out partial bits; leave only aligned adjacent bit pairs.
void clear_to_pairs();
// Smear out partial bits; leave only aligned adjacent bit pairs.
void smear_to_pairs();
// Verify that the mask contains only aligned adjacent bit pairs
void verify_pairs() const { assert( is_aligned_pairs(), "mask is not aligned, adjacent pairs" ); }
// Test that the mask contains only aligned adjacent bit pairs
bool is_aligned_pairs() const;
// mask is a pair of misaligned registers
bool is_misaligned_pair() const { return Size()==2 && !is_aligned_pairs(); }
// Test for single register
int is_bound1() const;
// Test for a single adjacent pair
int is_bound_pair() const;
// Test for a single adjacent set of ideal register's size.
int is_bound(uint ireg) const {
if (is_vector(ireg)) {
if (is_bound_set(num_registers(ireg)))
return true;
} else if (is_bound1() || is_bound_pair()) {
return true;
}
return false;
}
// Find the lowest-numbered register set in the mask. Return the
// HIGHEST register number in the set, or BAD if no sets.
// Assert that the mask contains only bit sets.
OptoReg::Name find_first_set(const int size) const;
// Clear out partial bits; leave only aligned adjacent bit sets of size.
void clear_to_sets(const int size);
// Smear out partial bits to aligned adjacent bit sets.
void smear_to_sets(const int size);
// Verify that the mask contains only aligned adjacent bit sets
void verify_sets(int size) const { assert(is_aligned_sets(size), "mask is not aligned, adjacent sets"); }
// Test that the mask contains only aligned adjacent bit sets
bool is_aligned_sets(const int size) const;
// mask is a set of misaligned registers
bool is_misaligned_set(int size) const { return (int)Size()==size && !is_aligned_sets(size);}
// Test for a single adjacent set
int is_bound_set(const int size) const;
static bool is_vector(uint ireg);
static int num_registers(uint ireg);
// Fast overlap test. Non-zero if any registers in common.
int overlap( const RegMask &rm ) const {
return
# define BODY(I) (_A[I] & rm._A[I]) |
FORALL_BODY
# undef BODY
0 ;
}
// Special test for register pressure based splitting
// UP means register only, Register plus stack, or stack only is DOWN
bool is_UP() const;
// Clear a register mask
void Clear( ) {
# define BODY(I) _A[I] = 0;
FORALL_BODY
# undef BODY
}
// Fill a register mask with 1's
void Set_All( ) {
# define BODY(I) _A[I] = -1;
FORALL_BODY
# undef BODY
}
// Insert register into mask
void Insert( OptoReg::Name reg ) {
assert( reg < CHUNK_SIZE, "" );
_A[reg>>_LogWordBits] |= (1<<(reg&(_WordBits-1)));
}
// Remove register from mask
void Remove( OptoReg::Name reg ) {
assert( reg < CHUNK_SIZE, "" );
_A[reg>>_LogWordBits] &= ~(1<<(reg&(_WordBits-1)));
}
// OR 'rm' into 'this'
void OR( const RegMask &rm ) {
# define BODY(I) this->_A[I] |= rm._A[I];
FORALL_BODY
# undef BODY
}
// AND 'rm' into 'this'
void AND( const RegMask &rm ) {
# define BODY(I) this->_A[I] &= rm._A[I];
FORALL_BODY
# undef BODY
}
// Subtract 'rm' from 'this'
void SUBTRACT( const RegMask &rm ) {
# define BODY(I) _A[I] &= ~rm._A[I];
FORALL_BODY
# undef BODY
}
// Compute size of register mask: number of bits
uint Size() const;
#ifndef PRODUCT
void print() const { dump(); }
void dump(outputStream *st = tty) const; // Print a mask
#endif
static const RegMask Empty; // Common empty mask
static bool can_represent(OptoReg::Name reg) {
// NOTE: -1 in computation reflects the usage of the last
// bit of the regmask as an infinite stack flag and
// -7 is to keep mask aligned for largest value (VecY).
return (int)reg < (int)(CHUNK_SIZE-1);
}
static bool can_represent_arg(OptoReg::Name reg) {
// NOTE: -SlotsPerVecY in computation reflects the need
// to keep mask aligned for largest value (VecY).
return (int)reg < (int)(CHUNK_SIZE-SlotsPerVecY);
}
};
// Do not use this constant directly in client code!
#undef RM_SIZE
#endif // SHARE_VM_OPTO_REGMASK_HPP
C:\hotspot-69087d08d473\src\share\vm/opto/reg_split.cpp
/*
* Copyright (c) 2000, 2018, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
#include "precompiled.hpp"
#include "libadt/vectset.hpp"
#include "memory/allocation.inline.hpp"
#include "opto/addnode.hpp"
#include "opto/c2compiler.hpp"
#include "opto/callnode.hpp"
#include "opto/cfgnode.hpp"
#include "opto/chaitin.hpp"
#include "opto/loopnode.hpp"
#include "opto/machnode.hpp"
//------------------------------Split--------------------------------------
// Walk the graph in RPO and for each lrg which spills, propagate reaching
// definitions. During propagation, split the live range around regions of
// High Register Pressure (HRP). If a Def is in a region of Low Register
// Pressure (LRP), it will not get spilled until we encounter a region of
// HRP between it and one of its uses. We will spill at the transition
// point between LRP and HRP. Uses in the HRP region will use the spilled
// Def. The first Use outside the HRP region will generate a SpillCopy to
// hoist the live range back up into a register, and all subsequent uses
// will use that new Def until another HRP region is encountered. Defs in
// HRP regions will get trailing SpillCopies to push the LRG down into the
// stack immediately.
//
// As a side effect, unlink from (hence make dead) coalesced copies.
//
static const char out_of_nodes[] = "out of nodes during split";
//------------------------------get_spillcopy_wide-----------------------------
// Get a SpillCopy node with wide-enough masks. Use the 'wide-mask', the
// wide ideal-register spill-mask if possible. If the 'wide-mask' does
// not cover the input (or output), use the input (or output) mask instead.
Node *PhaseChaitin::get_spillcopy_wide( Node *def, Node *use, uint uidx ) {
// If ideal reg doesn't exist we've got a bad schedule happening
// that is forcing us to spill something that isn't spillable.
// Bail rather than abort
uint ireg = def->ideal_reg();
if( ireg == 0 || ireg == Op_RegFlags ) {
assert(false, "attempted to spill a non-spillable item");
C->record_method_not_compilable("attempted to spill a non-spillable item");
return NULL;
}
if (C->check_node_count(NodeLimitFudgeFactor, out_of_nodes)) {
return NULL;
}
const RegMask *i_mask = &def->out_RegMask();
const RegMask *w_mask = C->matcher()->idealreg2spillmask[ireg];
const RegMask *o_mask = use ? &use->in_RegMask(uidx) : w_mask;
const RegMask *w_i_mask = w_mask->overlap( *i_mask ) ? w_mask : i_mask;
const RegMask *w_o_mask;
int num_regs = RegMask::num_registers(ireg);
bool is_vect = RegMask::is_vector(ireg);
if( w_mask->overlap( *o_mask ) && // Overlap AND
((num_regs == 1) // Single use or aligned
|| is_vect // or vector
|| !is_vect && o_mask->is_aligned_pairs()) ) {
assert(!is_vect || o_mask->is_aligned_sets(num_regs), "vectors are aligned");
// Don't come here for mis-aligned doubles
w_o_mask = w_mask;
} else { // wide ideal mask does not overlap with o_mask
// Mis-aligned doubles come here and XMM->FPR moves on x86.
w_o_mask = o_mask; // Must target desired registers
// Does the ideal-reg-mask overlap with o_mask? I.e., can I use
// a reg-reg move or do I need a trip across register classes
// (and thus through memory)?
if( !C->matcher()->idealreg2regmask[ireg]->overlap( *o_mask) && o_mask->is_UP() )
// Here we assume a trip through memory is required.
w_i_mask = &C->FIRST_STACK_mask();
}
return new (C) MachSpillCopyNode( def, *w_i_mask, *w_o_mask );
}
//------------------------------insert_proj------------------------------------
// Insert the spill at chosen location. Skip over any intervening Proj's or
// Phis. Skip over a CatchNode and projs, inserting in the fall-through block
// instead. Update high-pressure indices. Create a new live range.
void PhaseChaitin::insert_proj( Block *b, uint i, Node *spill, uint maxlrg ) {
// Skip intervening ProjNodes. Do not insert between a ProjNode and
// its definer.
while( i < b->number_of_nodes() &&
(b->get_node(i)->is_Proj() ||
b->get_node(i)->is_Phi() ) )
i++;
// Do not insert between a call and his Catch
if( b->get_node(i)->is_Catch() ) {
// Put the instruction at the top of the fall-thru block.
// Find the fall-thru projection
while( 1 ) {
const CatchProjNode *cp = b->get_node(++i)->as_CatchProj();
if( cp->_con == CatchProjNode::fall_through_index )
break;
}
int sidx = i - b->end_idx()-1;
b = b->_succs[sidx]; // Switch to successor block
i = 1; // Right at start of block
}
b->insert_node(spill, i); // Insert node in block
_cfg.map_node_to_block(spill, b); // Update node->block mapping to reflect
// Adjust the point where we go hi-pressure
if( i <= b->_ihrp_index ) b->_ihrp_index++;
if( i <= b->_fhrp_index ) b->_fhrp_index++;
// Assign a new Live Range Number to the SpillCopy and grow
// the node->live range mapping.
new_lrg(spill,maxlrg);
}
//------------------------------split_DEF--------------------------------------
// There are four categories of Split; UP/DOWN x DEF/USE
// Only three of these really occur as DOWN/USE will always color
// Any Split with a DEF cannot CISC-Spill now. Thus we need
// two helper routines, one for Split DEFS (insert after instruction),
// one for Split USES (insert before instruction). DEF insertion
// happens inside Split, where the Leaveblock array is updated.
uint PhaseChaitin::split_DEF( Node *def, Block *b, int loc, uint maxlrg, Node **Reachblock, Node **debug_defs, GrowableArray<uint> splits, int slidx ) {
#ifdef ASSERT
// Increment the counter for this lrg
splits.at_put(slidx, splits.at(slidx)+1);
#endif
// If we are spilling the memory op for an implicit null check, at the
// null check location (ie - null check is in HRP block) we need to do
// the null-check first, then spill-down in the following block.
// (The implicit_null_check function ensures the use is also dominated
// by the branch-not-taken block.)
Node *be = b->end();
if( be->is_MachNullCheck() && be->in(1) == def && def == b->get_node(loc)) {
// Spill goes in the branch-not-taken block
b = b->_succs[b->get_node(b->end_idx()+1)->Opcode() == Op_IfTrue];
loc = 0; // Just past the Region
}
assert( loc >= 0, "must insert past block head" );
// Get a def-side SpillCopy
Node *spill = get_spillcopy_wide(def,NULL,0);
// Did we fail to split?, then bail
if (!spill) {
return 0;
}
// Insert the spill at chosen location
insert_proj( b, loc+1, spill, maxlrg++);
// Insert new node into Reaches array
Reachblock[slidx] = spill;
// Update debug list of reaching down definitions by adding this one
debug_defs[slidx] = spill;
// return updated count of live ranges
return maxlrg;
}
//------------------------------split_USE--------------------------------------
// Splits at uses can involve redeffing the LRG, so no CISC Spilling there.
// Debug uses want to know if def is already stack enabled.
uint PhaseChaitin::split_USE( Node *def, Block *b, Node *use, uint useidx, uint maxlrg, bool def_down, bool cisc_sp, GrowableArray<uint> splits, int slidx ) {
#ifdef ASSERT
// Increment the counter for this lrg
splits.at_put(slidx, splits.at(slidx)+1);
#endif
// Some setup stuff for handling debug node uses
JVMState* jvms = use->jvms();
uint debug_start = jvms ? jvms->debug_start() : 999999;
uint debug_end = jvms ? jvms->debug_end() : 999999;
//-------------------------------------------
// Check for use of debug info
if (useidx >= debug_start && useidx < debug_end) {
// Actually it's perfectly legal for constant debug info to appear
// just unlikely. In this case the optimizer left a ConI of a 4
// as both inputs to a Phi with only a debug use. It's a single-def
// live range of a rematerializable value. The live range spills,
// rematerializes and now the ConI directly feeds into the debug info.
// assert(!def->is_Con(), "constant debug info already constructed directly");
// Special split handling for Debug Info
// If DEF is DOWN, just hook the edge and return
// If DEF is UP, Split it DOWN for this USE.
if( def->is_Mach() ) {
if( def_down ) {
// DEF is DOWN, so connect USE directly to the DEF
use->set_req(useidx, def);
} else {
// Block and index where the use occurs.
Block *b = _cfg.get_block_for_node(use);
// Put the clone just prior to use
int bindex = b->find_node(use);
// DEF is UP, so must copy it DOWN and hook in USE
// Insert SpillCopy before the USE, which uses DEF as its input,
// and defs a new live range, which is used by this node.
Node *spill = get_spillcopy_wide(def,use,useidx);
// did we fail to split?
if (!spill) {
// Bail
return 0;
}
// insert into basic block
insert_proj( b, bindex, spill, maxlrg++ );
// Use the new split
use->set_req(useidx,spill);
}
// No further split handling needed for this use
return maxlrg;
} // End special splitting for debug info live range
} // If debug info
// CISC-SPILLING
// Finally, check to see if USE is CISC-Spillable, and if so,
// gather_lrg_masks will add the flags bit to its mask, and
// no use side copy is needed. This frees up the live range
// register choices without causing copy coalescing, etc.
if( UseCISCSpill && cisc_sp ) {
int inp = use->cisc_operand();
if( inp != AdlcVMDeps::Not_cisc_spillable )
// Convert operand number to edge index number
inp = use->as_Mach()->operand_index(inp);
if( inp == (int)useidx ) {
use->set_req(useidx, def);
#ifndef PRODUCT
if( TraceCISCSpill ) {
tty->print(" set_split: ");
use->dump();
}
#endif
return maxlrg;
}
}
//-------------------------------------------
// Insert a Copy before the use
// Block and index where the use occurs.
int bindex;
// Phi input spill-copys belong at the end of the prior block
if( use->is_Phi() ) {
b = _cfg.get_block_for_node(b->pred(useidx));
bindex = b->end_idx();
} else {
// Put the clone just prior to use
bindex = b->find_node(use);
}
Node *spill = get_spillcopy_wide( def, use, useidx );
if( !spill ) return 0; // Bailed out
// Insert SpillCopy before the USE, which uses the reaching DEF as
// its input, and defs a new live range, which is used by this node.
insert_proj( b, bindex, spill, maxlrg++ );
// Use the spill/clone
use->set_req(useidx,spill);
// return updated live range count
return maxlrg;
}
//------------------------------clone_node----------------------------
// Clone node with anti dependence check.
Node* clone_node(Node* def, Block *b, Compile* C) {
if (def->needs_anti_dependence_check()) {
#ifdef ASSERT
if (Verbose) {
tty->print_cr("RA attempts to clone node with anti_dependence:");
def->dump(-1); tty->cr();
tty->print_cr("into block:");
b->dump();
}
#endif
if (C->subsume_loads() == true && !C->failing()) {
// Retry with subsume_loads == false
// If this is the first failure, the sentinel string will "stick"
// to the Compile object, and the C2Compiler will see it and retry.
C->record_failure(C2Compiler::retry_no_subsuming_loads());
} else {
// Bailout without retry
C->record_method_not_compilable("RA Split failed: attempt to clone node with anti_dependence");
}
return 0;
}
return def->clone();
}
//------------------------------split_Rematerialize----------------------------
// Clone a local copy of the def.
Node *PhaseChaitin::split_Rematerialize( Node *def, Block *b, uint insidx, uint &maxlrg, GrowableArray<uint> splits, int slidx, uint *lrg2reach, Node **Reachblock, bool walkThru ) {
// The input live ranges will be stretched to the site of the new
// instruction. They might be stretched past a def and will thus
// have the old and new values of the same live range alive at the
// same time - a definite no-no. Split out private copies of
// the inputs.
if( def->req() > 1 ) {
for( uint i = 1; i < def->req(); i++ ) {
Node *in = def->in(i);
uint lidx = _lrg_map.live_range_id(in);
// We do not need this for live ranges that are only defined once.
// However, this is not true for spill copies that are added in this
// Split() pass, since they might get coalesced later on in this pass.
if (lidx < _lrg_map.max_lrg_id() && lrgs(lidx).is_singledef()) {
continue;
}
Block *b_def = _cfg.get_block_for_node(def);
int idx_def = b_def->find_node(def);
Node *in_spill = get_spillcopy_wide( in, def, i );
if( !in_spill ) return 0; // Bailed out
insert_proj(b_def,idx_def,in_spill,maxlrg++);
if( b_def == b )
insidx++;
def->set_req(i,in_spill);
}
}
Node *spill = clone_node(def, b, C);
if (spill == NULL || C->check_node_count(NodeLimitFudgeFactor, out_of_nodes)) {
// Check when generating nodes
return 0;
}
// See if any inputs are currently being spilled, and take the
// latest copy of spilled inputs.
if( spill->req() > 1 ) {
for( uint i = 1; i < spill->req(); i++ ) {
Node *in = spill->in(i);
uint lidx = _lrg_map.find_id(in);
// Walk backwards thru spill copy node intermediates
if (walkThru) {
while (in->is_SpillCopy() && lidx >= _lrg_map.max_lrg_id()) {
in = in->in(1);
lidx = _lrg_map.find_id(in);
}
if (lidx < _lrg_map.max_lrg_id() && lrgs(lidx).is_multidef()) {
// walkThru found a multidef LRG, which is unsafe to use, so
// just keep the original def used in the clone.
in = spill->in(i);
lidx = _lrg_map.find_id(in);
}
}
if (lidx < _lrg_map.max_lrg_id() && lrgs(lidx).reg() >= LRG::SPILL_REG) {
Node *rdef = Reachblock[lrg2reach[lidx]];
if (rdef) {
spill->set_req(i, rdef);
}
}
}
}
assert( spill->out_RegMask().is_UP(), "rematerialize to a reg" );
// Rematerialized op is def->spilled+1
set_was_spilled(spill);
if( _spilled_once.test(def->_idx) )
set_was_spilled(spill);
insert_proj( b, insidx, spill, maxlrg++ );
#ifdef ASSERT
// Increment the counter for this lrg
splits.at_put(slidx, splits.at(slidx)+1);
#endif
// See if the cloned def kills any flags, and copy those kills as well
uint i = insidx+1;
int found_projs = clone_projs( b, i, def, spill, maxlrg);
if (found_projs > 0) {
// Adjust the point where we go hi-pressure
if (i <= b->_ihrp_index) {
b->_ihrp_index += found_projs;
}
if (i <= b->_fhrp_index) {
b->_fhrp_index += found_projs;
}
}
return spill;
}
//------------------------------is_high_pressure-------------------------------
// Function to compute whether or not this live range is "high pressure"
// in this block - whether it spills eagerly or not.
bool PhaseChaitin::is_high_pressure( Block *b, LRG *lrg, uint insidx ) {
if( lrg->_was_spilled1 ) return true;
// Forced spilling due to conflict? Then split only at binding uses
// or defs, not for supposed capacity problems.
// CNC - Turned off 7/8/99, causes too much spilling
// if( lrg->_is_bound ) return false;
// Use float pressure numbers for vectors.
bool is_float_or_vector = lrg->_is_float || lrg->_is_vector;
// Not yet reached the high-pressure cutoff point, so low pressure
uint hrp_idx = is_float_or_vector ? b->_fhrp_index : b->_ihrp_index;
if( insidx < hrp_idx ) return false;
// Register pressure for the block as a whole depends on reg class
int block_pres = is_float_or_vector ? b->_freg_pressure : b->_reg_pressure;
// Bound live ranges will split at the binding points first;
// Intermediate splits should assume the live range's register set
// got "freed up" and that num_regs will become INT_PRESSURE.
int bound_pres = is_float_or_vector ? FLOATPRESSURE : INTPRESSURE;
// Effective register pressure limit.
int lrg_pres = (lrg->get_invalid_mask_size() > lrg->num_regs())
? (lrg->get_invalid_mask_size() >> (lrg->num_regs()-1)) : bound_pres;
// High pressure if block pressure requires more register freedom
// than live range has.
return block_pres >= lrg_pres;
}
//------------------------------prompt_use---------------------------------
// True if lidx is used before any real register is def'd in the block
bool PhaseChaitin::prompt_use( Block *b, uint lidx ) {
if (lrgs(lidx)._was_spilled2) {
return false;
}
// Scan block for 1st use.
for( uint i = 1; i <= b->end_idx(); i++ ) {
Node *n = b->get_node(i);
// Ignore PHI use, these can be up or down
if (n->is_Phi()) {
continue;
}
for (uint j = 1; j < n->req(); j++) {
if (_lrg_map.find_id(n->in(j)) == lidx) {
return true; // Found 1st use!
}
}
if (n->out_RegMask().is_NotEmpty()) {
return false;
}
}
return false;
}
//------------------------------Split--------------------------------------
//----------Split Routine----------
// ***** NEW SPLITTING HEURISTIC *****
// DEFS: If the DEF is in a High Register Pressure(HRP) Block, split there.
// Else, no split unless there is a HRP block between a DEF and
// one of its uses, and then split at the HRP block.
//
// USES: If USE is in HRP, split at use to leave main LRG on stack.
// Else, hoist LRG back up to register only (ie - split is also DEF)
// We will compute a new maxlrg as we go
uint PhaseChaitin::Split(uint maxlrg, ResourceArea* split_arena) {
NOT_PRODUCT( Compile::TracePhase t3("regAllocSplit", &_t_regAllocSplit, TimeCompiler); )
// Free thread local resources used by this method on exit.
ResourceMark rm(split_arena);
uint bidx, pidx, slidx, insidx, inpidx, twoidx;
uint non_phi = 1, spill_cnt = 0;
Node *n1, *n2, *n3;
Node_List *defs,*phis;
bool *UPblock;
bool u1, u2, u3;
Block *b, *pred;
PhiNode *phi;
GrowableArray<uint> lidxs(split_arena, maxlrg, 0, 0);
// Array of counters to count splits per live range
GrowableArray<uint> splits(split_arena, maxlrg, 0, 0);
#define NEW_SPLIT_ARRAY(type, size)\
(type*) split_arena->allocate_bytes((size) * sizeof(type))
//----------Setup Code----------
// Create a convenient mapping from lrg numbers to reaches/leaves indices
uint *lrg2reach = NEW_SPLIT_ARRAY(uint, maxlrg);
// Keep track of DEFS & Phis for later passes
defs = new Node_List();
phis = new Node_List();
// Gather info on which LRG's are spilling, and build maps
for (bidx = 1; bidx < maxlrg; bidx++) {
if (lrgs(bidx).alive() && lrgs(bidx).reg() >= LRG::SPILL_REG) {
assert(!lrgs(bidx).mask().is_AllStack(),"AllStack should color");
lrg2reach[bidx] = spill_cnt;
spill_cnt++;
lidxs.append(bidx);
#ifdef ASSERT
// Initialize the split counts to zero
splits.append(0);
#endif
#ifndef PRODUCT
if( PrintOpto && WizardMode && lrgs(bidx)._was_spilled1 )
tty->print_cr("Warning, 2nd spill of L%d",bidx);
#endif
}
}
// Create side arrays for propagating reaching defs info.
// Each block needs a node pointer for each spilling live range for the
// Def which is live into the block. Phi nodes handle multiple input
// Defs by querying the output of their predecessor blocks and resolving
// them to a single Def at the phi. The pointer is updated for each
// Def in the block, and then becomes the output for the block when
// processing of the block is complete. We also need to track whether
// a Def is UP or DOWN. UP means that it should get a register (ie -
// it is always in LRP regions), and DOWN means that it is probably
// on the stack (ie - it crosses HRP regions).
Node ***Reaches = NEW_SPLIT_ARRAY( Node**, _cfg.number_of_blocks() + 1);
bool **UP = NEW_SPLIT_ARRAY( bool*, _cfg.number_of_blocks() + 1);
Node **debug_defs = NEW_SPLIT_ARRAY( Node*, spill_cnt );
VectorSet **UP_entry= NEW_SPLIT_ARRAY( VectorSet*, spill_cnt );
// Initialize Reaches & UP
for (bidx = 0; bidx < _cfg.number_of_blocks() + 1; bidx++) {
Reaches[bidx] = NEW_SPLIT_ARRAY( Node*, spill_cnt );
UP[bidx] = NEW_SPLIT_ARRAY( bool, spill_cnt );
Node **Reachblock = Reaches[bidx];
bool *UPblock = UP[bidx];
for( slidx = 0; slidx < spill_cnt; slidx++ ) {
UPblock[slidx] = true; // Assume they start in registers
Reachblock[slidx] = NULL; // Assume that no def is present
}
}
#undef NEW_SPLIT_ARRAY
// Initialize to array of empty vectorsets
for( slidx = 0; slidx < spill_cnt; slidx++ )
UP_entry[slidx] = new VectorSet(split_arena);
//----------PASS 1----------
//----------Propagation & Node Insertion Code----------
// Walk the Blocks in RPO for DEF & USE info
for( bidx = 0; bidx < _cfg.number_of_blocks(); bidx++ ) {
if (C->check_node_count(spill_cnt, out_of_nodes)) {
return 0;
}
b = _cfg.get_block(bidx);
// Reaches & UP arrays for this block
Node** Reachblock = Reaches[b->_pre_order];
UPblock = UP[b->_pre_order];
// Reset counter of start of non-Phi nodes in block
non_phi = 1;
//----------Block Entry Handling----------
// Check for need to insert a new phi
// Cycle through this block's predecessors, collecting Reaches
// info for each spilled LRG. If they are identical, no phi is
// needed. If they differ, check for a phi, and insert if missing,
// or update edges if present. Set current block's Reaches set to
// be either the phi's or the reaching def, as appropriate.
// If no Phi is needed, check if the LRG needs to spill on entry
// to the block due to HRP.
for( slidx = 0; slidx < spill_cnt; slidx++ ) {
// Grab the live range number
uint lidx = lidxs.at(slidx);
// Do not bother splitting or putting in Phis for single-def
// rematerialized live ranges. This happens alot to constants
// with long live ranges.
if( lrgs(lidx).is_singledef() &&
lrgs(lidx)._def->rematerialize() ) {
// reset the Reaches & UP entries
Reachblock[slidx] = lrgs(lidx)._def;
UPblock[slidx] = true;
// Record following instruction in case 'n' rematerializes and
// kills flags
Block *pred1 = _cfg.get_block_for_node(b->pred(1));
continue;
}
// Initialize needs_phi and needs_split
bool needs_phi = false;
bool needs_split = false;
bool has_phi = false;
// Walk the predecessor blocks to check inputs for that live range
// Grab predecessor block header
n1 = b->pred(1);
// Grab the appropriate reaching def info for inpidx
pred = _cfg.get_block_for_node(n1);
pidx = pred->_pre_order;
Node **Ltmp = Reaches[pidx];
bool *Utmp = UP[pidx];
n1 = Ltmp[slidx];
u1 = Utmp[slidx];
// Initialize node for saving type info
n3 = n1;
u3 = u1;
// Compare inputs to see if a Phi is needed
for( inpidx = 2; inpidx < b->num_preds(); inpidx++ ) {
// Grab predecessor block headers
n2 = b->pred(inpidx);
// Grab the appropriate reaching def info for inpidx
pred = _cfg.get_block_for_node(n2);
pidx = pred->_pre_order;
Ltmp = Reaches[pidx];
Utmp = UP[pidx];
n2 = Ltmp[slidx];
u2 = Utmp[slidx];
// For each LRG, decide if a phi is necessary
if( n1 != n2 ) {
needs_phi = true;
}
// See if the phi has mismatched inputs, UP vs. DOWN
if( n1 && n2 && (u1 != u2) ) {
needs_split = true;
}
// Move n2/u2 to n1/u1 for next iteration
n1 = n2;
u1 = u2;
// Preserve a non-NULL predecessor for later type referencing
if( (n3 == NULL) && (n2 != NULL) ){
n3 = n2;
u3 = u2;
}
} // End for all potential Phi inputs
// check block for appropriate phinode & update edges
for( insidx = 1; insidx <= b->end_idx(); insidx++ ) {
n1 = b->get_node(insidx);
// bail if this is not a phi
phi = n1->is_Phi() ? n1->as_Phi() : NULL;
if( phi == NULL ) {
// Keep track of index of first non-PhiNode instruction in block
non_phi = insidx;
// break out of the for loop as we have handled all phi nodes
break;
}
// must be looking at a phi
if (_lrg_map.find_id(n1) == lidxs.at(slidx)) {
// found the necessary phi
needs_phi = false;
has_phi = true;
// initialize the Reaches entry for this LRG
Reachblock[slidx] = phi;
break;
} // end if found correct phi
} // end for all phi's
// If a phi is needed or exist, check for it
if( needs_phi || has_phi ) {
// add new phinode if one not already found
if( needs_phi ) {
// create a new phi node and insert it into the block
// type is taken from left over pointer to a predecessor
assert(n3,"No non-NULL reaching DEF for a Phi");
phi = new (C) PhiNode(b->head(), n3->bottom_type());
// initialize the Reaches entry for this LRG
Reachblock[slidx] = phi;
// add node to block & node_to_block mapping
insert_proj(b, insidx++, phi, maxlrg++);
non_phi++;
// Reset new phi's mapping to be the spilling live range
_lrg_map.map(phi->_idx, lidx);
assert(_lrg_map.find_id(phi) == lidx, "Bad update on Union-Find mapping");
} // end if not found correct phi
// Here you have either found or created the Phi, so record it
assert(phi != NULL,"Must have a Phi Node here");
phis->push(phi);
// PhiNodes should either force the LRG UP or DOWN depending
// on its inputs and the register pressure in the Phi's block.
UPblock[slidx] = true; // Assume new DEF is UP
// If entering a high-pressure area with no immediate use,
// assume Phi is DOWN
if( is_high_pressure( b, &lrgs(lidx), b->end_idx()) && !prompt_use(b,lidx) )
UPblock[slidx] = false;
// If we are not split up/down and all inputs are down, then we
// are down
if( !needs_split && !u3 )
UPblock[slidx] = false;
} // end if phi is needed
// Do not need a phi, so grab the reaching DEF
else {
// Grab predecessor block header
n1 = b->pred(1);
// Grab the appropriate reaching def info for k
pred = _cfg.get_block_for_node(n1);
pidx = pred->_pre_order;
Node **Ltmp = Reaches[pidx];
bool *Utmp = UP[pidx];
// reset the Reaches & UP entries
Reachblock[slidx] = Ltmp[slidx];
UPblock[slidx] = Utmp[slidx];
} // end else no Phi is needed
} // end for all spilling live ranges
// DEBUG
#ifndef PRODUCT
if(trace_spilling()) {
tty->print("/`\nBlock %d: ", b->_pre_order);
tty->print("Reaching Definitions after Phi handling\n");
for( uint x = 0; x < spill_cnt; x++ ) {
tty->print("Spill Idx %d: UP %d: Node\n",x,UPblock[x]);
if( Reachblock[x] )
Reachblock[x]->dump();
else
tty->print("Undefined\n");
}
}
#endif
//----------Non-Phi Node Splitting----------
// Since phi-nodes have now been handled, the Reachblock array for this
// block is initialized with the correct starting value for the defs which
// reach non-phi instructions in this block. Thus, process non-phi
// instructions normally, inserting SpillCopy nodes for all spill
// locations.
// Memoize any DOWN reaching definitions for use as DEBUG info
for( insidx = 0; insidx < spill_cnt; insidx++ ) {
debug_defs[insidx] = (UPblock[insidx]) ? NULL : Reachblock[insidx];
if( UPblock[insidx] ) // Memoize UP decision at block start
UP_entry[insidx]->set( b->_pre_order );
}
//----------Walk Instructions in the Block and Split----------
// For all non-phi instructions in the block
for( insidx = 1; insidx <= b->end_idx(); insidx++ ) {
Node *n = b->get_node(insidx);
// Find the defining Node's live range index
uint defidx = _lrg_map.find_id(n);
uint cnt = n->req();
if (n->is_Phi()) {
// Skip phi nodes after removing dead copies.
if (defidx < _lrg_map.max_lrg_id()) {
// Check for useless Phis. These appear if we spill, then
// coalesce away copies. Dont touch Phis in spilling live
// ranges; they are busy getting modifed in this pass.
if( lrgs(defidx).reg() < LRG::SPILL_REG ) {
uint i;
Node *u = NULL;
// Look for the Phi merging 2 unique inputs
for( i = 1; i < cnt; i++ ) {
// Ignore repeats and self
if( n->in(i) != u && n->in(i) != n ) {
// Found a unique input
if( u != NULL ) // If it's the 2nd, bail out
break;
u = n->in(i); // Else record it
}
}
assert( u, "at least 1 valid input expected" );
if (i >= cnt) { // Found one unique input
assert(_lrg_map.find_id(n) == _lrg_map.find_id(u), "should be the same lrg");
n->replace_by(u); // Then replace with unique input
n->disconnect_inputs(NULL, C);
b->remove_node(insidx);
insidx--;
b->_ihrp_index--;
b->_fhrp_index--;
}
}
}
continue;
}
assert( insidx > b->_ihrp_index ||
(b->_reg_pressure < (uint)INTPRESSURE) ||
b->_ihrp_index > 4000000 ||
b->_ihrp_index >= b->end_idx() ||
!b->get_node(b->_ihrp_index)->is_Proj(), "" );
assert( insidx > b->_fhrp_index ||
(b->_freg_pressure < (uint)FLOATPRESSURE) ||
b->_fhrp_index > 4000000 ||
b->_fhrp_index >= b->end_idx() ||
!b->get_node(b->_fhrp_index)->is_Proj(), "" );
// ********** Handle Crossing HRP Boundry **********
if( (insidx == b->_ihrp_index) || (insidx == b->_fhrp_index) ) {
for( slidx = 0; slidx < spill_cnt; slidx++ ) {
// Check for need to split at HRP boundary - split if UP
n1 = Reachblock[slidx];
// bail out if no reaching DEF
if( n1 == NULL ) continue;
// bail out if live range is 'isolated' around inner loop
uint lidx = lidxs.at(slidx);
// If live range is currently UP
if( UPblock[slidx] ) {
// set location to insert spills at
// SPLIT DOWN HERE - NO CISC SPILL
if( is_high_pressure( b, &lrgs(lidx), insidx ) &&
!n1->rematerialize() ) {
// If there is already a valid stack definition available, use it
if( debug_defs[slidx] != NULL ) {
Reachblock[slidx] = debug_defs[slidx];
}
else {
// Insert point is just past last use or def in the block
int insert_point = insidx-1;
while( insert_point > 0 ) {
Node *n = b->get_node(insert_point);
// Hit top of block? Quit going backwards
if (n->is_Phi()) {
break;
}
// Found a def? Better split after it.
if (_lrg_map.live_range_id(n) == lidx) {
break;
}
// Look for a use
uint i;
for( i = 1; i < n->req(); i++ ) {
if (_lrg_map.live_range_id(n->in(i)) == lidx) {
break;
}
}
// Found a use? Better split after it.
if (i < n->req()) {
break;
}
insert_point--;
}
uint orig_eidx = b->end_idx();
maxlrg = split_DEF( n1, b, insert_point, maxlrg, Reachblock, debug_defs, splits, slidx);
// If it wasn't split bail
if (!maxlrg) {
return 0;
}
// Spill of NULL check mem op goes into the following block.
if (b->end_idx() > orig_eidx) {
insidx++;
}
}
// This is a new DEF, so update UP
UPblock[slidx] = false;
#ifndef PRODUCT
// DEBUG
if( trace_spilling() ) {
tty->print("\nNew Split DOWN DEF of Spill Idx ");
tty->print("%d, UP %d:\n",slidx,false);
n1->dump();
}
#endif
}
} // end if LRG is UP
} // end for all spilling live ranges
assert( b->get_node(insidx) == n, "got insidx set incorrectly" );
} // end if crossing HRP Boundry
// If the LRG index is oob, then this is a new spillcopy, skip it.
if (defidx >= _lrg_map.max_lrg_id()) {
continue;
}
LRG &deflrg = lrgs(defidx);
uint copyidx = n->is_Copy();
// Remove coalesced copy from CFG
if (copyidx && defidx == _lrg_map.live_range_id(n->in(copyidx))) {
n->replace_by( n->in(copyidx) );
n->set_req( copyidx, NULL );
b->remove_node(insidx--);
b->_ihrp_index--; // Adjust the point where we go hi-pressure
b->_fhrp_index--;
continue;
}
#define DERIVED 0
// ********** Handle USES **********
bool nullcheck = false;
// Implicit null checks never use the spilled value
if( n->is_MachNullCheck() )
nullcheck = true;
if( !nullcheck ) {
// Search all inputs for a Spill-USE
JVMState* jvms = n->jvms();
uint oopoff = jvms ? jvms->oopoff() : cnt;
uint old_last = cnt - 1;
for( inpidx = 1; inpidx < cnt; inpidx++ ) {
// Derived/base pairs may be added to our inputs during this loop.
// If inpidx > old_last, then one of these new inputs is being
// handled. Skip the derived part of the pair, but process
// the base like any other input.
if (inpidx > old_last && ((inpidx - oopoff) & 1) == DERIVED) {
continue; // skip derived_debug added below
}
// Get lidx of input
uint useidx = _lrg_map.find_id(n->in(inpidx));
// Not a brand-new split, and it is a spill use
if (useidx < _lrg_map.max_lrg_id() && lrgs(useidx).reg() >= LRG::SPILL_REG) {
// Check for valid reaching DEF
slidx = lrg2reach[useidx];
Node *def = Reachblock[slidx];
assert( def != NULL, "Using Undefined Value in Split()\n");
// (+++) %%%% remove this in favor of pre-pass in matcher.cpp
// monitor references do not care where they live, so just hook
if ( jvms && jvms->is_monitor_use(inpidx) ) {
// The effect of this clone is to drop the node out of the block,
// so that the allocator does not see it anymore, and therefore
// does not attempt to assign it a register.
def = clone_node(def, b, C);
if (def == NULL || C->check_node_count(NodeLimitFudgeFactor, out_of_nodes)) {
return 0;
}
_lrg_map.extend(def->_idx, 0);
_cfg.map_node_to_block(def, b);
n->set_req(inpidx, def);
continue;
}
// Rematerializable? Then clone def at use site instead
// of store/load
if( def->rematerialize() ) {
int old_size = b->number_of_nodes();
def = split_Rematerialize( def, b, insidx, maxlrg, splits, slidx, lrg2reach, Reachblock, true );
if( !def ) return 0; // Bail out
insidx += b->number_of_nodes()-old_size;
}
MachNode *mach = n->is_Mach() ? n->as_Mach() : NULL;
// Base pointers and oopmap references do not care where they live.
if ((inpidx >= oopoff) ||
(mach && mach->ideal_Opcode() == Op_AddP && inpidx == AddPNode::Base)) {
if (def->rematerialize() && lrgs(useidx)._was_spilled2) {
// This def has been rematerialized a couple of times without
// progress. It doesn't care if it lives UP or DOWN, so
// spill it down now.
maxlrg = split_USE(def,b,n,inpidx,maxlrg,false,false,splits,slidx);
// If it wasn't split bail
if (!maxlrg) {
return 0;
}
insidx++; // Reset iterator to skip USE side split
} else {
// Just hook the def edge
n->set_req(inpidx, def);
}
if (inpidx >= oopoff) {
// After oopoff, we have derived/base pairs. We must mention all
// derived pointers here as derived/base pairs for GC. If the
// derived value is spilling and we have a copy both in Reachblock
// (called here 'def') and debug_defs[slidx] we need to mention
// both in derived/base pairs or kill one.
Node *derived_debug = debug_defs[slidx];
if( ((inpidx - oopoff) & 1) == DERIVED && // derived vs base?
mach && mach->ideal_Opcode() != Op_Halt &&
derived_debug != NULL &&
derived_debug != def ) { // Actual 2nd value appears
// We have already set 'def' as a derived value.
// Also set debug_defs[slidx] as a derived value.
uint k;
for( k = oopoff; k < cnt; k += 2 )
if( n->in(k) == derived_debug )
break; // Found an instance of debug derived
if( k == cnt ) {// No instance of debug_defs[slidx]
// Add a derived/base pair to cover the debug info.
// We have to process the added base later since it is not
// handled yet at this point but skip derived part.
assert(((n->req() - oopoff) & 1) == DERIVED,
"must match skip condition above");
n->add_req( derived_debug ); // this will be skipped above
n->add_req( n->in(inpidx+1) ); // this will be processed
// Increment cnt to handle added input edges on
// subsequent iterations.
cnt += 2;
}
}
}
continue;
}
// Special logic for DEBUG info
if( jvms && b->_freq > BLOCK_FREQUENCY(0.5) ) {
uint debug_start = jvms->debug_start();
// If this is debug info use & there is a reaching DOWN def
if ((debug_start <= inpidx) && (debug_defs[slidx] != NULL)) {
assert(inpidx < oopoff, "handle only debug info here");
// Just hook it in & move on
n->set_req(inpidx, debug_defs[slidx]);
// (Note that this can make two sides of a split live at the
// same time: The debug def on stack, and another def in a
// register. The GC needs to know about both of them, but any
// derived pointers after oopoff will refer to only one of the
// two defs and the GC would therefore miss the other. Thus
// this hack is only allowed for debug info which is Java state
// and therefore never a derived pointer.)
continue;
}
}
// Grab register mask info
const RegMask &dmask = def->out_RegMask();
const RegMask &umask = n->in_RegMask(inpidx);
bool is_vect = RegMask::is_vector(def->ideal_reg());
assert(inpidx < oopoff, "cannot use-split oop map info");
bool dup = UPblock[slidx];
bool uup = umask.is_UP();
// Need special logic to handle bound USES. Insert a split at this
// bound use if we can't rematerialize the def, or if we need the
// split to form a misaligned pair.
if( !umask.is_AllStack() &&
(int)umask.Size() <= lrgs(useidx).num_regs() &&
(!def->rematerialize() ||
!is_vect && umask.is_misaligned_pair())) {
// These need a Split regardless of overlap or pressure
// SPLIT - NO DEF - NO CISC SPILL
maxlrg = split_USE(def,b,n,inpidx,maxlrg,dup,false, splits,slidx);
// If it wasn't split bail
if (!maxlrg) {
return 0;
}
insidx++; // Reset iterator to skip USE side split
continue;
}
if (UseFPUForSpilling && n->is_MachCall() && !uup && !dup ) {
// The use at the call can force the def down so insert
// a split before the use to allow the def more freedom.
maxlrg = split_USE(def,b,n,inpidx,maxlrg,dup,false, splits,slidx);
// If it wasn't split bail
if (!maxlrg) {
return 0;
}
insidx++; // Reset iterator to skip USE side split
continue;
}
// Here is the logic chart which describes USE Splitting:
// 0 = false or DOWN, 1 = true or UP
//
// Overlap | DEF | USE | Action
//-------------------------------------------------------
// 0 | 0 | 0 | Copy - mem -> mem
// 0 | 0 | 1 | Split-UP - Check HRP
// 0 | 1 | 0 | Split-DOWN - Debug Info?
// 0 | 1 | 1 | Copy - reg -> reg
// 1 | 0 | 0 | Reset Input Edge (no Split)
// 1 | 0 | 1 | Split-UP - Check HRP
// 1 | 1 | 0 | Split-DOWN - Debug Info?
// 1 | 1 | 1 | Reset Input Edge (no Split)
//
// So, if (dup == uup), then overlap test determines action,
// with true being no split, and false being copy. Else,
// if DEF is DOWN, Split-UP, and check HRP to decide on
// resetting DEF. Finally if DEF is UP, Split-DOWN, with
// special handling for Debug Info.
if( dup == uup ) {
if( dmask.overlap(umask) ) {
// Both are either up or down, and there is overlap, No Split
n->set_req(inpidx, def);
}
else { // Both are either up or down, and there is no overlap
if( dup ) { // If UP, reg->reg copy
// COPY ACROSS HERE - NO DEF - NO CISC SPILL
maxlrg = split_USE(def,b,n,inpidx,maxlrg,false,false, splits,slidx);
// If it wasn't split bail
if (!maxlrg) {
return 0;
}
insidx++; // Reset iterator to skip USE side split
}
else { // DOWN, mem->mem copy
// COPY UP & DOWN HERE - NO DEF - NO CISC SPILL
// First Split-UP to move value into Register
uint def_ideal = def->ideal_reg();
const RegMask* tmp_rm = Matcher::idealreg2regmask[def_ideal];
Node *spill = new (C) MachSpillCopyNode(def, dmask, *tmp_rm);
insert_proj( b, insidx, spill, maxlrg );
// Then Split-DOWN as if previous Split was DEF
maxlrg = split_USE(spill,b,n,inpidx,maxlrg,false,false, splits,slidx);
// If it wasn't split bail
if (!maxlrg) {
return 0;
}
insidx += 2; // Reset iterator to skip USE side splits
}
} // End else no overlap
} // End if dup == uup
// dup != uup, so check dup for direction of Split
else {
if( dup ) { // If UP, Split-DOWN and check Debug Info
// If this node is already a SpillCopy, just patch the edge
// except the case of spilling to stack.
if( n->is_SpillCopy() ) {
RegMask tmp_rm(umask);
tmp_rm.SUBTRACT(Matcher::STACK_ONLY_mask);
if( dmask.overlap(tmp_rm) ) {
if( def != n->in(inpidx) ) {
n->set_req(inpidx, def);
}
continue;
}
}
// COPY DOWN HERE - NO DEF - NO CISC SPILL
maxlrg = split_USE(def,b,n,inpidx,maxlrg,false,false, splits,slidx);
// If it wasn't split bail
if (!maxlrg) {
return 0;
}
insidx++; // Reset iterator to skip USE side split
// Check for debug-info split. Capture it for later
// debug splits of the same value
if (jvms && jvms->debug_start() <= inpidx && inpidx < oopoff)
debug_defs[slidx] = n->in(inpidx);
}
else { // DOWN, Split-UP and check register pressure
if( is_high_pressure( b, &lrgs(useidx), insidx ) ) {
// COPY UP HERE - NO DEF - CISC SPILL
maxlrg = split_USE(def,b,n,inpidx,maxlrg,true,true, splits,slidx);
// If it wasn't split bail
if (!maxlrg) {
return 0;
}
insidx++; // Reset iterator to skip USE side split
} else { // LRP
// COPY UP HERE - WITH DEF - NO CISC SPILL
maxlrg = split_USE(def,b,n,inpidx,maxlrg,true,false, splits,slidx);
// If it wasn't split bail
if (!maxlrg) {
return 0;
}
// Flag this lift-up in a low-pressure block as
// already-spilled, so if it spills again it will
// spill hard (instead of not spilling hard and
// coalescing away).
set_was_spilled(n->in(inpidx));
// Since this is a new DEF, update Reachblock & UP
Reachblock[slidx] = n->in(inpidx);
UPblock[slidx] = true;
insidx++; // Reset iterator to skip USE side split
}
} // End else DOWN
} // End dup != uup
} // End if Spill USE
} // End For All Inputs
} // End If not nullcheck
// ********** Handle DEFS **********
// DEFS either Split DOWN in HRP regions or when the LRG is bound, or
// just reset the Reaches info in LRP regions. DEFS must always update
// UP info.
if( deflrg.reg() >= LRG::SPILL_REG ) { // Spilled?
uint slidx = lrg2reach[defidx];
// Add to defs list for later assignment of new live range number
defs->push(n);
// Set a flag on the Node indicating it has already spilled.
// Only do it for capacity spills not conflict spills.
if( !deflrg._direct_conflict )
set_was_spilled(n);
assert(!n->is_Phi(),"Cannot insert Phi into DEFS list");
// Grab UP info for DEF
const RegMask &dmask = n->out_RegMask();
bool defup = dmask.is_UP();
uint ireg = n->ideal_reg();
bool is_vect = RegMask::is_vector(ireg);
// Only split at Def if this is a HRP block or bound (and spilled once)
if( !n->rematerialize() &&
(((dmask.is_bound(ireg) || !is_vect && dmask.is_misaligned_pair()) &&
(deflrg._direct_conflict || deflrg._must_spill)) ||
// Check for LRG being up in a register and we are inside a high
// pressure area. Spill it down immediately.
(defup && is_high_pressure(b,&deflrg,insidx) && !n->is_SpillCopy())) ) {
assert( !n->rematerialize(), "" );
// Do a split at the def site.
maxlrg = split_DEF( n, b, insidx, maxlrg, Reachblock, debug_defs, splits, slidx );
// If it wasn't split bail
if (!maxlrg) {
return 0;
}
// Split DEF's Down
UPblock[slidx] = 0;
#ifndef PRODUCT
// DEBUG
if( trace_spilling() ) {
tty->print("\nNew Split DOWN DEF of Spill Idx ");
tty->print("%d, UP %d:\n",slidx,false);
n->dump();
}
#endif
}
else { // Neither bound nor HRP, must be LRP
// otherwise, just record the def
Reachblock[slidx] = n;
// UP should come from the outRegmask() of the DEF
UPblock[slidx] = defup;
// Update debug list of reaching down definitions, kill if DEF is UP
debug_defs[slidx] = defup ? NULL : n;
#ifndef PRODUCT
// DEBUG
if( trace_spilling() ) {
tty->print("\nNew DEF of Spill Idx ");
tty->print("%d, UP %d:\n",slidx,defup);
n->dump();
}
#endif
} // End else LRP
} // End if spill def
// ********** Split Left Over Mem-Mem Moves **********
// Check for mem-mem copies and split them now. Do not do this
// to copies about to be spilled; they will be Split shortly.
if (copyidx) {
Node *use = n->in(copyidx);
uint useidx = _lrg_map.find_id(use);
if (useidx < _lrg_map.max_lrg_id() && // This is not a new split
OptoReg::is_stack(deflrg.reg()) &&
deflrg.reg() < LRG::SPILL_REG ) { // And DEF is from stack
LRG &uselrg = lrgs(useidx);
if( OptoReg::is_stack(uselrg.reg()) &&
uselrg.reg() < LRG::SPILL_REG && // USE is from stack
deflrg.reg() != uselrg.reg() ) { // Not trivially removed
uint def_ideal_reg = n->bottom_type()->ideal_reg();
const RegMask &def_rm = *Matcher::idealreg2regmask[def_ideal_reg];
const RegMask &use_rm = n->in_RegMask(copyidx);
if( def_rm.overlap(use_rm) && n->is_SpillCopy() ) { // Bug 4707800, 'n' may be a storeSSL
if (C->check_node_count(NodeLimitFudgeFactor, out_of_nodes)) { // Check when generating nodes
return 0;
}
Node *spill = new (C) MachSpillCopyNode(use,use_rm,def_rm);
n->set_req(copyidx,spill);
n->as_MachSpillCopy()->set_in_RegMask(def_rm);
// Put the spill just before the copy
insert_proj( b, insidx++, spill, maxlrg++ );
}
}
}
}
} // End For All Instructions in Block - Non-PHI Pass
// Check if each LRG is live out of this block so as not to propagate
// beyond the last use of a LRG.
for( slidx = 0; slidx < spill_cnt; slidx++ ) {
uint defidx = lidxs.at(slidx);
IndexSet *liveout = _live->live(b);
if( !liveout->member(defidx) ) {
#ifdef ASSERT
// The index defidx is not live. Check the liveout array to ensure that
// it contains no members which compress to defidx. Finding such an
// instance may be a case to add liveout adjustment in compress_uf_map().
// See 5063219.
uint member;
IndexSetIterator isi(liveout);
while ((member = isi.next()) != 0) {
assert(defidx != _lrg_map.find_const(member), "Live out member has not been compressed");
}
#endif
Reachblock[slidx] = NULL;
} else {
assert(Reachblock[slidx] != NULL,"No reaching definition for liveout value");
}
}
#ifndef PRODUCT
if( trace_spilling() )
b->dump();
#endif
} // End For All Blocks
//----------PASS 2----------
// Reset all DEF live range numbers here
for( insidx = 0; insidx < defs->size(); insidx++ ) {
// Grab the def
n1 = defs->at(insidx);
// Set new lidx for DEF
new_lrg(n1, maxlrg++);
}
//----------Phi Node Splitting----------
// Clean up a phi here, and assign a new live range number
// Cycle through this block's predecessors, collecting Reaches
// info for each spilled LRG and update edges.
// Walk the phis list to patch inputs, split phis, and name phis
uint lrgs_before_phi_split = maxlrg;
for( insidx = 0; insidx < phis->size(); insidx++ ) {
Node *phi = phis->at(insidx);
assert(phi->is_Phi(),"This list must only contain Phi Nodes");
Block *b = _cfg.get_block_for_node(phi);
// Grab the live range number
uint lidx = _lrg_map.find_id(phi);
uint slidx = lrg2reach[lidx];
// Update node to lidx map
new_lrg(phi, maxlrg++);
// Get PASS1's up/down decision for the block.
int phi_up = !!UP_entry[slidx]->test(b->_pre_order);
// Force down if double-spilling live range
if( lrgs(lidx)._was_spilled1 )
phi_up = false;
// When splitting a Phi we an split it normal or "inverted".
// An inverted split makes the splits target the Phi's UP/DOWN
// sense inverted; then the Phi is followed by a final def-side
// split to invert back. It changes which blocks the spill code
// goes in.
// Walk the predecessor blocks and assign the reaching def to the Phi.
// Split Phi nodes by placing USE side splits wherever the reaching
// DEF has the wrong UP/DOWN value.
for( uint i = 1; i < b->num_preds(); i++ ) {
// Get predecessor block pre-order number
Block *pred = _cfg.get_block_for_node(b->pred(i));
pidx = pred->_pre_order;
// Grab reaching def
Node *def = Reaches[pidx][slidx];
Node** Reachblock = Reaches[pidx];
assert( def, "must have reaching def" );
// If input up/down sense and reg-pressure DISagree
if (def->rematerialize()) {
// Place the rematerialized node above any MSCs created during
// phi node splitting. end_idx points at the insertion point
// so look at the node before it.
int insert = pred->end_idx();
while (insert >= 1 &&
pred->get_node(insert - 1)->is_SpillCopy() &&
_lrg_map.find(pred->get_node(insert - 1)) >= lrgs_before_phi_split) {
insert--;
}
def = split_Rematerialize(def, pred, insert, maxlrg, splits, slidx, lrg2reach, Reachblock, false);
if (!def) {
return 0; // Bail out
}
}
// Update the Phi's input edge array
phi->set_req(i,def);
// Grab the UP/DOWN sense for the input
u1 = UP[pidx][slidx];
if( u1 != (phi_up != 0)) {
maxlrg = split_USE(def, b, phi, i, maxlrg, !u1, false, splits,slidx);
// If it wasn't split bail
if (!maxlrg) {
return 0;
}
}
} // End for all inputs to the Phi
} // End for all Phi Nodes
// Update _maxlrg to save Union asserts
_lrg_map.set_max_lrg_id(maxlrg);
//----------PASS 3----------
// Pass over all Phi's to union the live ranges
for( insidx = 0; insidx < phis->size(); insidx++ ) {
Node *phi = phis->at(insidx);
assert(phi->is_Phi(),"This list must only contain Phi Nodes");
// Walk all inputs to Phi and Union input live range with Phi live range
for( uint i = 1; i < phi->req(); i++ ) {
// Grab the input node
Node *n = phi->in(i);
assert(n, "node should exist");
uint lidx = _lrg_map.find(n);
uint pidx = _lrg_map.find(phi);
if (lidx < pidx) {
Union(n, phi);
}
else if(lidx > pidx) {
Union(phi, n);
}
} // End for all inputs to the Phi Node
} // End for all Phi Nodes
// Now union all two address instructions
for (insidx = 0; insidx < defs->size(); insidx++) {
// Grab the def
n1 = defs->at(insidx);
// Set new lidx for DEF & handle 2-addr instructions
if (n1->is_Mach() && ((twoidx = n1->as_Mach()->two_adr()) != 0)) {
assert(_lrg_map.find(n1->in(twoidx)) < maxlrg,"Assigning bad live range index");
// Union the input and output live ranges
uint lr1 = _lrg_map.find(n1);
uint lr2 = _lrg_map.find(n1->in(twoidx));
if (lr1 < lr2) {
Union(n1, n1->in(twoidx));
}
else if (lr1 > lr2) {
Union(n1->in(twoidx), n1);
}
} // End if two address
} // End for all defs
// DEBUG
#ifdef ASSERT
// Validate all live range index assignments
for (bidx = 0; bidx < _cfg.number_of_blocks(); bidx++) {
b = _cfg.get_block(bidx);
for (insidx = 0; insidx <= b->end_idx(); insidx++) {
Node *n = b->get_node(insidx);
uint defidx = _lrg_map.find(n);
assert(defidx < _lrg_map.max_lrg_id(), "Bad live range index in Split");
assert(defidx < maxlrg,"Bad live range index in Split");
}
}
// Issue a warning if splitting made no progress
int noprogress = 0;
for (slidx = 0; slidx < spill_cnt; slidx++) {
if (PrintOpto && WizardMode && splits.at(slidx) == 0) {
tty->print_cr("Failed to split live range %d", lidxs.at(slidx));
//BREAKPOINT;
}
else {
noprogress++;
}
}
if(!noprogress) {
tty->print_cr("Failed to make progress in Split");
//BREAKPOINT;
}
#endif
// Return updated count of live ranges
return maxlrg;
}
C:\hotspot-69087d08d473\src\share\vm/opto/replacednodes.cpp
/*
* Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
#include "precompiled.hpp"
#include "opto/cfgnode.hpp"
#include "opto/phaseX.hpp"
#include "opto/replacednodes.hpp"
void ReplacedNodes::allocate_if_necessary() {
if (_replaced_nodes == NULL) {
_replaced_nodes = new GrowableArray<ReplacedNode>();
}
}
bool ReplacedNodes::is_empty() const {
return _replaced_nodes == NULL || _replaced_nodes->length() == 0;
}
bool ReplacedNodes::has_node(const ReplacedNode& r) const {
return _replaced_nodes->find(r) != -1;
}
bool ReplacedNodes::has_target_node(Node* n) const {
for (int i = 0; i < _replaced_nodes->length(); i++) {
if (_replaced_nodes->at(i).improved() == n) {
return true;
}
}
return false;
}
// Record replaced node if not seen before
void ReplacedNodes::record(Node* initial, Node* improved) {
allocate_if_necessary();
ReplacedNode r(initial, improved);
if (!has_node(r)) {
_replaced_nodes->push(r);
}
}
// Copy replaced nodes from one map to another. idx is used to
// identify nodes that are too new to be of interest in the target
// node list.
void ReplacedNodes::transfer_from(const ReplacedNodes& other, uint idx) {
if (other.is_empty()) {
return;
}
allocate_if_necessary();
for (int i = 0; i < other._replaced_nodes->length(); i++) {
ReplacedNode replaced = other._replaced_nodes->at(i);
// Only transfer the nodes that can actually be useful
if (!has_node(replaced) && (replaced.initial()->_idx < idx || has_target_node(replaced.initial()))) {
_replaced_nodes->push(replaced);
}
}
}
void ReplacedNodes::clone() {
if (_replaced_nodes != NULL) {
GrowableArray<ReplacedNode>* replaced_nodes_clone = new GrowableArray<ReplacedNode>();
replaced_nodes_clone->appendAll(_replaced_nodes);
_replaced_nodes = replaced_nodes_clone;
}
}
void ReplacedNodes::reset() {
if (_replaced_nodes != NULL) {
_replaced_nodes->clear();
}
}
// Perfom node replacement (used when returning to caller)
void ReplacedNodes::apply(Node* n, uint idx) {
if (is_empty()) {
return;
}
for (int i = 0; i < _replaced_nodes->length(); i++) {
ReplacedNode replaced = _replaced_nodes->at(i);
// Only apply if improved node was created in a callee to avoid
// issues with irreducible loops in the caller
if (replaced.improved()->_idx >= idx) {
n->replace_edge(replaced.initial(), replaced.improved());
}
}
}
static void enqueue_use(Node* n, Node* use, Unique_Node_List& work) {
if (use->is_Phi()) {
Node* r = use->in(0);
assert(r->is_Region(), "Phi should have Region");
for (uint i = 1; i < use->req(); i++) {
if (use->in(i) == n) {
work.push(r->in(i));
}
}
} else {
work.push(use);
}
}
// Perfom node replacement following late inlining
void ReplacedNodes::apply(Compile* C, Node* ctl) {
// ctl is the control on exit of the method that was late inlined
if (is_empty()) {
return;
}
for (int i = 0; i < _replaced_nodes->length(); i++) {
ReplacedNode replaced = _replaced_nodes->at(i);
Node* initial = replaced.initial();
Node* improved = replaced.improved();
assert (ctl != NULL && !ctl->is_top(), "replaced node should have actual control");
ResourceMark rm;
Unique_Node_List work;
// Go over all the uses of the node that is considered for replacement...
for (DUIterator j = initial->outs(); initial->has_out(j); j++) {
Node* use = initial->out(j);
if (use == improved || use->outcnt() == 0) {
continue;
}
work.clear();
enqueue_use(initial, use, work);
bool replace = true;
// Check that this use is dominated by ctl. Go ahead with the
// replacement if it is.
while (work.size() != 0 && replace) {
Node* n = work.pop();
if (use->outcnt() == 0) {
continue;
}
if (n->is_CFG() || (n->in(0) != NULL && !n->in(0)->is_top())) {
int depth = 0;
Node *m = n;
if (!n->is_CFG()) {
n = n->in(0);
}
assert(n->is_CFG(), "should be CFG now");
while(n != ctl) {
n = IfNode::up_one_dom(n);
depth++;
// limit search depth
if (depth >= 100 || n == NULL) {
replace = false;
break;
}
}
} else {
for (DUIterator k = n->outs(); n->has_out(k); k++) {
enqueue_use(n, n->out(k), work);
}
}
}
if (replace) {
bool is_in_table = C->initial_gvn()->hash_delete(use);
int replaced = use->replace_edge(initial, improved);
if (is_in_table) {
C->initial_gvn()->hash_find_insert(use);
}
C->record_for_igvn(use);
assert(replaced > 0, "inconsistent");
--j;
}
}
}
}
void ReplacedNodes::dump(outputStream *st) const {
if (!is_empty()) {
st->print("replaced nodes: ");
for (int i = 0; i < _replaced_nodes->length(); i++) {
st->print("%d->%d", _replaced_nodes->at(i).initial()->_idx, _replaced_nodes->at(i).improved()->_idx);
if (i < _replaced_nodes->length()-1) {
st->print(",");
}
}
}
}
// Merge 2 list of replaced node at a point where control flow paths merge
void ReplacedNodes::merge_with(const ReplacedNodes& other) {
if (is_empty()) {
return;
}
if (other.is_empty()) {
reset();
return;
}
int shift = 0;
int len = _replaced_nodes->length();
for (int i = 0; i < len; i++) {
if (!other.has_node(_replaced_nodes->at(i))) {
shift++;
} else if (shift > 0) {
_replaced_nodes->at_put(i-shift, _replaced_nodes->at(i));
}
}
if (shift > 0) {
_replaced_nodes->trunc_to(len - shift);
}
}
C:\hotspot-69087d08d473\src\share\vm/opto/replacednodes.hpp
/*
* Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
#ifndef SHARE_VM_OPTO_REPLACEDNODES_HPP
#define SHARE_VM_OPTO_REPLACEDNODES_HPP
#include "opto/connode.hpp"
// During parsing, when a node is "improved",
// GraphKit::replace_in_map() is called to update the current map so
// that the improved node is used from that point
// on. GraphKit::replace_in_map() doesn't operate on the callers maps
// and so some optimization opportunities may be lost. The
// ReplacedNodes class addresses that problem.
//
// A ReplacedNodes object is a list of pair of nodes. Every
// SafePointNode carries a ReplacedNodes object. Every time
// GraphKit::replace_in_map() is called, a new pair of nodes is pushed
// on the list of replaced nodes. When control flow paths merge, their
// replaced nodes are also merged. When parsing exits a method to
// return to a caller, the replaced nodes on the exit path are used to
// update the caller's map.
class ReplacedNodes VALUE_OBJ_CLASS_SPEC {
private:
class ReplacedNode VALUE_OBJ_CLASS_SPEC {
private:
Node* _initial;
Node* _improved;
public:
ReplacedNode() : _initial(NULL), _improved(NULL) {}
ReplacedNode(Node* initial, Node* improved) : _initial(initial), _improved(improved) {}
Node* initial() const { return _initial; }
Node* improved() const { return _improved; }
bool operator==(const ReplacedNode& other) {
return _initial == other._initial && _improved == other._improved;
}
};
GrowableArray<ReplacedNode>* _replaced_nodes;
void allocate_if_necessary();
bool has_node(const ReplacedNode& r) const;
bool has_target_node(Node* n) const;
public:
ReplacedNodes()
: _replaced_nodes(NULL) {}
void clone();
void record(Node* initial, Node* improved);
void transfer_from(const ReplacedNodes& other, uint idx);
void reset();
void apply(Node* n, uint idx);
void merge_with(const ReplacedNodes& other);
bool is_empty() const;
void dump(outputStream *st) const;
void apply(Compile* C, Node* ctl);
};
#endif // SHARE_VM_OPTO_REPLACEDNODES_HPP
C:\hotspot-69087d08d473\src\share\vm/opto/rootnode.cpp
/*
* Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
#include "precompiled.hpp"
#include "memory/allocation.inline.hpp"
#include "opto/callnode.hpp"
#include "opto/cfgnode.hpp"
#include "opto/phaseX.hpp"
#include "opto/regmask.hpp"
#include "opto/rootnode.hpp"
#include "opto/subnode.hpp"
#include "opto/type.hpp"
//------------------------------Ideal------------------------------------------
// Remove dead inputs
Node *RootNode::Ideal(PhaseGVN *phase, bool can_reshape) {
for( uint i = 1; i < req(); i++ ) { // For all inputs
// Check for and remove dead inputs
if( phase->type(in(i)) == Type::TOP ) {
del_req(i--); // Delete TOP inputs
}
}
// I used to do tail-splitting in the Ideal graph here, but it does not
// work. The tail-splitting forces values live into the Return to be
// ready at a point which dominates the split returns. This forces Stores
// to be hoisted high. The "proper" fix would be to split Stores down
// each path, but this makes the split unprofitable. If we want to do this
// optimization, it needs to be done after allocation so we can count all
// the instructions needing to be cloned in the cost metric.
// There used to be a spoof here for caffeine marks which completely
// eliminated very simple self-recursion recursions, but it's not worth it.
// Deep inlining of self-calls gets nearly all of the same benefits.
// If we want to get the rest of the win later, we should pattern match
// simple recursive call trees to closed-form solutions.
return NULL; // No further opportunities exposed
}
//=============================================================================
HaltNode::HaltNode( Node *ctrl, Node *frameptr ) : Node(TypeFunc::Parms) {
Node* top = Compile::current()->top();
init_req(TypeFunc::Control, ctrl );
init_req(TypeFunc::I_O, top);
init_req(TypeFunc::Memory, top);
init_req(TypeFunc::FramePtr, frameptr );
init_req(TypeFunc::ReturnAdr,top);
}
const Type *HaltNode::bottom_type() const { return Type::BOTTOM; }
//------------------------------Ideal------------------------------------------
Node *HaltNode::Ideal(PhaseGVN *phase, bool can_reshape) {
return remove_dead_region(phase, can_reshape) ? this : NULL;
}
//------------------------------Value------------------------------------------
const Type *HaltNode::Value( PhaseTransform *phase ) const {
return ( phase->type(in(TypeFunc::Control)) == Type::TOP)
? Type::TOP
: Type::BOTTOM;
}
const RegMask &HaltNode::out_RegMask() const {
return RegMask::Empty;
}
C:\hotspot-69087d08d473\src\share\vm/opto/rootnode.hpp
/*
* Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
#ifndef SHARE_VM_OPTO_ROOTNODE_HPP
#define SHARE_VM_OPTO_ROOTNODE_HPP
#include "opto/loopnode.hpp"
//------------------------------RootNode---------------------------------------
// The one-and-only before-all-else and after-all-else RootNode. The RootNode
// represents what happens if the user runs the whole program repeatedly. The
// RootNode produces the initial values of I/O and memory for the program or
// procedure start.
class RootNode : public LoopNode {
public:
RootNode( ) : LoopNode(0,0) {
init_class_id(Class_Root);
del_req(2);
del_req(1);
}
virtual int Opcode() const;
virtual const Node *is_block_proj() const { return this; }
virtual const Type *bottom_type() const { return Type::BOTTOM; }
virtual Node *Identity( PhaseTransform *phase ) { return this; }
virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
virtual const Type *Value( PhaseTransform *phase ) const { return Type::BOTTOM; }
};
//------------------------------HaltNode---------------------------------------
// Throw an exception & die
class HaltNode : public Node {
public:
HaltNode( Node *ctrl, Node *frameptr );
virtual int Opcode() const;
virtual bool pinned() const { return true; };
virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
virtual const Type *Value( PhaseTransform *phase ) const;
virtual const Type *bottom_type() const;
virtual bool is_CFG() const { return true; }
virtual uint hash() const { return NO_HASH; } // CFG nodes do not hash
virtual bool depends_only_on_test() const { return false; }
virtual const Node *is_block_proj() const { return this; }
virtual const RegMask &out_RegMask() const;
virtual uint ideal_reg() const { return NotAMachineReg; }
virtual uint match_edge(uint idx) const { return 0; }
};
#endif // SHARE_VM_OPTO_ROOTNODE_HPP
C:\hotspot-69087d08d473\src\share\vm/opto/runtime.cpp
/*
* Copyright (c) 1998, 2019, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
#include "precompiled.hpp"
#include "classfile/systemDictionary.hpp"
#include "classfile/vmSymbols.hpp"
#include "code/compiledIC.hpp"
#include "code/icBuffer.hpp"
#include "code/nmethod.hpp"
#include "code/pcDesc.hpp"
#include "code/scopeDesc.hpp"
#include "code/vtableStubs.hpp"
#include "compiler/compileBroker.hpp"
#include "compiler/compilerOracle.hpp"
#include "compiler/oopMap.hpp"
#include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp"
#include "gc_implementation/g1/heapRegion.hpp"
#include "gc_interface/collectedHeap.hpp"
#include "interpreter/bytecode.hpp"
#include "interpreter/interpreter.hpp"
#include "interpreter/linkResolver.hpp"
#include "memory/barrierSet.hpp"
#include "memory/gcLocker.inline.hpp"
#include "memory/oopFactory.hpp"
#include "oops/objArrayKlass.hpp"
#include "oops/oop.inline.hpp"
#include "opto/addnode.hpp"
#include "opto/callnode.hpp"
#include "opto/cfgnode.hpp"
#include "opto/connode.hpp"
#include "opto/graphKit.hpp"
#include "opto/machnode.hpp"
#include "opto/matcher.hpp"
#include "opto/memnode.hpp"
#include "opto/mulnode.hpp"
#include "opto/runtime.hpp"
#include "opto/subnode.hpp"
#include "runtime/fprofiler.hpp"
#include "runtime/handles.inline.hpp"
#include "runtime/interfaceSupport.hpp"
#include "runtime/javaCalls.hpp"
#include "runtime/sharedRuntime.hpp"
#include "runtime/signature.hpp"
#include "runtime/threadCritical.hpp"
#include "runtime/vframe.hpp"
#include "runtime/vframeArray.hpp"
#include "runtime/vframe_hp.hpp"
#include "utilities/copy.hpp"
#include "utilities/preserveException.hpp"
#if defined AD_MD_HPP
# include AD_MD_HPP
#elif defined TARGET_ARCH_MODEL_x86_32
# include "adfiles/ad_x86_32.hpp"
#elif defined TARGET_ARCH_MODEL_x86_64
# include "adfiles/ad_x86_64.hpp"
#elif defined TARGET_ARCH_MODEL_aarch64
# include "adfiles/ad_aarch64.hpp"
#elif defined TARGET_ARCH_MODEL_sparc
# include "adfiles/ad_sparc.hpp"
#elif defined TARGET_ARCH_MODEL_zero
# include "adfiles/ad_zero.hpp"
#elif defined TARGET_ARCH_MODEL_ppc_64
# include "adfiles/ad_ppc_64.hpp"
#endif
// For debugging purposes:
// To force FullGCALot inside a runtime function, add the following two lines
//
// Universe::release_fullgc_alot_dummy();
// MarkSweep::invoke(0, "Debugging");
//
// At command line specify the parameters: -XX:+FullGCALot -XX:FullGCALotStart=100000000
// GHASH block processing
const TypeFunc* OptoRuntime::ghash_processBlocks_Type() {
int argcnt = 4;
const Type** fields = TypeTuple::fields(argcnt);
int argp = TypeFunc::Parms;
fields[argp++] = TypePtr::NOTNULL; // state
fields[argp++] = TypePtr::NOTNULL; // subkeyH
fields[argp++] = TypePtr::NOTNULL; // data
fields[argp++] = TypeInt::INT; // blocks
assert(argp == TypeFunc::Parms+argcnt, "correct decoding");
const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms+argcnt, fields);
// result type needed
fields = TypeTuple::fields(1);
fields[TypeFunc::Parms+0] = NULL; // void
const TypeTuple* range = TypeTuple::make(TypeFunc::Parms, fields);
return TypeFunc::make(domain, range);
}
// Compiled code entry points
address OptoRuntime::_new_instance_Java = NULL;
address OptoRuntime::_new_array_Java = NULL;
address OptoRuntime::_new_array_nozero_Java = NULL;
address OptoRuntime::_multianewarray2_Java = NULL;
address OptoRuntime::_multianewarray3_Java = NULL;
address OptoRuntime::_multianewarray4_Java = NULL;
address OptoRuntime::_multianewarray5_Java = NULL;
address OptoRuntime::_multianewarrayN_Java = NULL;
address OptoRuntime::_g1_wb_pre_Java = NULL;
address OptoRuntime::_g1_wb_post_Java = NULL;
address OptoRuntime::_vtable_must_compile_Java = NULL;
address OptoRuntime::_complete_monitor_locking_Java = NULL;
address OptoRuntime::_rethrow_Java = NULL;
address OptoRuntime::_slow_arraycopy_Java = NULL;
address OptoRuntime::_register_finalizer_Java = NULL;
# ifdef ENABLE_ZAP_DEAD_LOCALS
address OptoRuntime::_zap_dead_Java_locals_Java = NULL;
address OptoRuntime::_zap_dead_native_locals_Java = NULL;
# endif
ExceptionBlob* OptoRuntime::_exception_blob;
// This should be called in an assertion at the start of OptoRuntime routines
// which are entered from compiled code (all of them)
#ifdef ASSERT
static bool check_compiled_frame(JavaThread* thread) {
assert(thread->last_frame().is_runtime_frame(), "cannot call runtime directly from compiled code");
RegisterMap map(thread, false);
frame caller = thread->last_frame().sender(&map);
assert(caller.is_compiled_frame(), "not being called from compiled like code");
return true;
}
#endif // ASSERT
#define gen(env, var, type_func_gen, c_func, fancy_jump, pass_tls, save_arg_regs, return_pc) \
var = generate_stub(env, type_func_gen, CAST_FROM_FN_PTR(address, c_func), #var, fancy_jump, pass_tls, save_arg_regs, return_pc); \
if (var == NULL) { return false; }
bool OptoRuntime::generate(ciEnv* env) {
generate_exception_blob();
// Note: tls: Means fetching the return oop out of the thread-local storage
//
// variable/name type-function-gen , runtime method ,fncy_jp, tls,save_args,retpc
// -------------------------------------------------------------------------------------------------------------------------------
gen(env, _new_instance_Java , new_instance_Type , new_instance_C , 0 , true , false, false);
gen(env, _new_array_Java , new_array_Type , new_array_C , 0 , true , false, false);
gen(env, _new_array_nozero_Java , new_array_Type , new_array_nozero_C , 0 , true , false, false);
gen(env, _multianewarray2_Java , multianewarray2_Type , multianewarray2_C , 0 , true , false, false);
gen(env, _multianewarray3_Java , multianewarray3_Type , multianewarray3_C , 0 , true , false, false);
gen(env, _multianewarray4_Java , multianewarray4_Type , multianewarray4_C , 0 , true , false, false);
gen(env, _multianewarray5_Java , multianewarray5_Type , multianewarray5_C , 0 , true , false, false);
gen(env, _multianewarrayN_Java , multianewarrayN_Type , multianewarrayN_C , 0 , true , false, false);
gen(env, _g1_wb_pre_Java , g1_wb_pre_Type , SharedRuntime::g1_wb_pre , 0 , false, false, false);
gen(env, _g1_wb_post_Java , g1_wb_post_Type , SharedRuntime::g1_wb_post , 0 , false, false, false);
gen(env, _complete_monitor_locking_Java , complete_monitor_enter_Type , SharedRuntime::complete_monitor_locking_C, 0, false, false, false);
gen(env, _rethrow_Java , rethrow_Type , rethrow_C , 2 , true , false, true );
gen(env, _slow_arraycopy_Java , slow_arraycopy_Type , SharedRuntime::slow_arraycopy_C , 0 , false, false, false);
gen(env, _register_finalizer_Java , register_finalizer_Type , register_finalizer , 0 , false, false, false);
# ifdef ENABLE_ZAP_DEAD_LOCALS
gen(env, _zap_dead_Java_locals_Java , zap_dead_locals_Type , zap_dead_Java_locals_C , 0 , false, true , false );
gen(env, _zap_dead_native_locals_Java , zap_dead_locals_Type , zap_dead_native_locals_C , 0 , false, true , false );
# endif
return true;
}
#undef gen
// Helper method to do generation of RunTimeStub's
address OptoRuntime::generate_stub( ciEnv* env,
TypeFunc_generator gen, address C_function,
const char *name, int is_fancy_jump,
bool pass_tls,
bool save_argument_registers,
bool return_pc ) {
ResourceMark rm;
Compile C( env, gen, C_function, name, is_fancy_jump, pass_tls, save_argument_registers, return_pc );
return C.stub_entry_point();
}
const char* OptoRuntime::stub_name(address entry) {
#ifndef PRODUCT
CodeBlob* cb = CodeCache::find_blob(entry);
RuntimeStub* rs =(RuntimeStub *)cb;
assert(rs != NULL && rs->is_runtime_stub(), "not a runtime stub");
return rs->name();
#else
// Fast implementation for product mode (maybe it should be inlined too)
return "runtime stub";
#endif
}
//=============================================================================
// Opto compiler runtime routines
//=============================================================================
//=============================allocation======================================
// We failed the fast-path allocation. Now we need to do a scavenge or GC
// and try allocation again.
void OptoRuntime::new_store_pre_barrier(JavaThread* thread) {
// After any safepoint, just before going back to compiled code,
// we inform the GC that we will be doing initializing writes to
// this object in the future without emitting card-marks, so
// GC may take any compensating steps.
// NOTE: Keep this code consistent with GraphKit::store_barrier.
oop new_obj = thread->vm_result();
if (new_obj == NULL) return;
assert(Universe::heap()->can_elide_tlab_store_barriers(),
"compiler must check this first");
// GC may decide to give back a safer copy of new_obj.
new_obj = Universe::heap()->new_store_pre_barrier(thread, new_obj);
thread->set_vm_result(new_obj);
}
// object allocation
JRT_BLOCK_ENTRY(void, OptoRuntime::new_instance_C(Klass* klass, JavaThread* thread))
JRT_BLOCK;
#ifndef PRODUCT
SharedRuntime::_new_instance_ctr++; // new instance requires GC
#endif
assert(check_compiled_frame(thread), "incorrect caller");
// These checks are cheap to make and support reflective allocation.
int lh = klass->layout_helper();
if (Klass::layout_helper_needs_slow_path(lh) || !InstanceKlass::cast(klass)->is_initialized()) {
Handle holder(THREAD, klass->klass_holder()); // keep the klass alive
klass->check_valid_for_instantiation(false, THREAD);
if (!HAS_PENDING_EXCEPTION) {
InstanceKlass::cast(klass)->initialize(THREAD);
}
}
if (!HAS_PENDING_EXCEPTION) {
// Scavenge and allocate an instance.
Handle holder(THREAD, klass->klass_holder()); // keep the klass alive
oop result = InstanceKlass::cast(klass)->allocate_instance(THREAD);
thread->set_vm_result(result);
// Pass oops back through thread local storage. Our apparent type to Java
// is that we return an oop, but we can block on exit from this routine and
// a GC can trash the oop in C's return register. The generated stub will
// fetch the oop from TLS after any possible GC.
}
deoptimize_caller_frame(thread, HAS_PENDING_EXCEPTION);
JRT_BLOCK_END;
if (GraphKit::use_ReduceInitialCardMarks()) {
// inform GC that we won't do card marks for initializing writes.
new_store_pre_barrier(thread);
}
JRT_END
// array allocation
JRT_BLOCK_ENTRY(void, OptoRuntime::new_array_C(Klass* array_type, int len, JavaThread *thread))
JRT_BLOCK;
#ifndef PRODUCT
SharedRuntime::_new_array_ctr++; // new array requires GC
#endif
assert(check_compiled_frame(thread), "incorrect caller");
// Scavenge and allocate an instance.
oop result;
if (array_type->oop_is_typeArray()) {
// The oopFactory likes to work with the element type.
// (We could bypass the oopFactory, since it doesn't add much value.)
BasicType elem_type = TypeArrayKlass::cast(array_type)->element_type();
result = oopFactory::new_typeArray(elem_type, len, THREAD);
} else {
// Although the oopFactory likes to work with the elem_type,
// the compiler prefers the array_type, since it must already have
// that latter value in hand for the fast path.
Handle holder(THREAD, array_type->klass_holder()); // keep the array klass alive
Klass* elem_type = ObjArrayKlass::cast(array_type)->element_klass();
result = oopFactory::new_objArray(elem_type, len, THREAD);
}
// Pass oops back through thread local storage. Our apparent type to Java
// is that we return an oop, but we can block on exit from this routine and
// a GC can trash the oop in C's return register. The generated stub will
// fetch the oop from TLS after any possible GC.
deoptimize_caller_frame(thread, HAS_PENDING_EXCEPTION);
thread->set_vm_result(result);
JRT_BLOCK_END;
if (GraphKit::use_ReduceInitialCardMarks()) {
// inform GC that we won't do card marks for initializing writes.
new_store_pre_barrier(thread);
}
JRT_END
// array allocation without zeroing
JRT_BLOCK_ENTRY(void, OptoRuntime::new_array_nozero_C(Klass* array_type, int len, JavaThread *thread))
JRT_BLOCK;
#ifndef PRODUCT
SharedRuntime::_new_array_ctr++; // new array requires GC
#endif
assert(check_compiled_frame(thread), "incorrect caller");
// Scavenge and allocate an instance.
oop result;
assert(array_type->oop_is_typeArray(), "should be called only for type array");
// The oopFactory likes to work with the element type.
BasicType elem_type = TypeArrayKlass::cast(array_type)->element_type();
result = oopFactory::new_typeArray_nozero(elem_type, len, THREAD);
// Pass oops back through thread local storage. Our apparent type to Java
// is that we return an oop, but we can block on exit from this routine and
// a GC can trash the oop in C's return register. The generated stub will
// fetch the oop from TLS after any possible GC.
deoptimize_caller_frame(thread, HAS_PENDING_EXCEPTION);
thread->set_vm_result(result);
JRT_BLOCK_END;
if (GraphKit::use_ReduceInitialCardMarks()) {
// inform GC that we won't do card marks for initializing writes.
new_store_pre_barrier(thread);
}
oop result = thread->vm_result();
if ((len > 0) && (result != NULL) &&
is_deoptimized_caller_frame(thread)) {
// Zero array here if the caller is deoptimized.
int size = ((typeArrayOop)result)->object_size();
BasicType elem_type = TypeArrayKlass::cast(array_type)->element_type();
const size_t hs = arrayOopDesc::header_size(elem_type);
// Align to next 8 bytes to avoid trashing arrays's length.
const size_t aligned_hs = align_object_offset(hs);
HeapWord* obj = (HeapWord*)result;
if (aligned_hs > hs) {
Copy::zero_to_words(obj+hs, aligned_hs-hs);
}
// Optimized zeroing.
Copy::fill_to_aligned_words(obj+aligned_hs, size-aligned_hs);
}
JRT_END
// Note: multianewarray for one dimension is handled inline by GraphKit::new_array.
// multianewarray for 2 dimensions
JRT_ENTRY(void, OptoRuntime::multianewarray2_C(Klass* elem_type, int len1, int len2, JavaThread *thread))
#ifndef PRODUCT
SharedRuntime::_multi2_ctr++; // multianewarray for 1 dimension
#endif
assert(check_compiled_frame(thread), "incorrect caller");
assert(elem_type->is_klass(), "not a class");
jint dims[2];
dims[0] = len1;
dims[1] = len2;
Handle holder(THREAD, elem_type->klass_holder()); // keep the klass alive
oop obj = ArrayKlass::cast(elem_type)->multi_allocate(2, dims, THREAD);
deoptimize_caller_frame(thread, HAS_PENDING_EXCEPTION);
thread->set_vm_result(obj);
JRT_END
// multianewarray for 3 dimensions
JRT_ENTRY(void, OptoRuntime::multianewarray3_C(Klass* elem_type, int len1, int len2, int len3, JavaThread *thread))
#ifndef PRODUCT
SharedRuntime::_multi3_ctr++; // multianewarray for 1 dimension
#endif
assert(check_compiled_frame(thread), "incorrect caller");
assert(elem_type->is_klass(), "not a class");
jint dims[3];
dims[0] = len1;
dims[1] = len2;
dims[2] = len3;
Handle holder(THREAD, elem_type->klass_holder()); // keep the klass alive
oop obj = ArrayKlass::cast(elem_type)->multi_allocate(3, dims, THREAD);
deoptimize_caller_frame(thread, HAS_PENDING_EXCEPTION);
thread->set_vm_result(obj);
JRT_END
// multianewarray for 4 dimensions
JRT_ENTRY(void, OptoRuntime::multianewarray4_C(Klass* elem_type, int len1, int len2, int len3, int len4, JavaThread *thread))
#ifndef PRODUCT
SharedRuntime::_multi4_ctr++; // multianewarray for 1 dimension
#endif
assert(check_compiled_frame(thread), "incorrect caller");
assert(elem_type->is_klass(), "not a class");
jint dims[4];
dims[0] = len1;
dims[1] = len2;
dims[2] = len3;
dims[3] = len4;
Handle holder(THREAD, elem_type->klass_holder()); // keep the klass alive
oop obj = ArrayKlass::cast(elem_type)->multi_allocate(4, dims, THREAD);
deoptimize_caller_frame(thread, HAS_PENDING_EXCEPTION);
thread->set_vm_result(obj);
JRT_END
// multianewarray for 5 dimensions
JRT_ENTRY(void, OptoRuntime::multianewarray5_C(Klass* elem_type, int len1, int len2, int len3, int len4, int len5, JavaThread *thread))
#ifndef PRODUCT
SharedRuntime::_multi5_ctr++; // multianewarray for 1 dimension
#endif
assert(check_compiled_frame(thread), "incorrect caller");
assert(elem_type->is_klass(), "not a class");
jint dims[5];
dims[0] = len1;
dims[1] = len2;
dims[2] = len3;
dims[3] = len4;
dims[4] = len5;
Handle holder(THREAD, elem_type->klass_holder()); // keep the klass alive
oop obj = ArrayKlass::cast(elem_type)->multi_allocate(5, dims, THREAD);
deoptimize_caller_frame(thread, HAS_PENDING_EXCEPTION);
thread->set_vm_result(obj);
JRT_END
JRT_ENTRY(void, OptoRuntime::multianewarrayN_C(Klass* elem_type, arrayOopDesc* dims, JavaThread *thread))
assert(check_compiled_frame(thread), "incorrect caller");
assert(elem_type->is_klass(), "not a class");
assert(oop(dims)->is_typeArray(), "not an array");
ResourceMark rm;
jint len = dims->length();
assert(len > 0, "Dimensions array should contain data");
jint *j_dims = typeArrayOop(dims)->int_at_addr(0);
jint *c_dims = NEW_RESOURCE_ARRAY(jint, len);
Copy::conjoint_jints_atomic(j_dims, c_dims, len);
Handle holder(THREAD, elem_type->klass_holder()); // keep the klass alive
oop obj = ArrayKlass::cast(elem_type)->multi_allocate(len, c_dims, THREAD);
deoptimize_caller_frame(thread, HAS_PENDING_EXCEPTION);
thread->set_vm_result(obj);
JRT_END
const TypeFunc *OptoRuntime::new_instance_Type() {
// create input type (domain)
const Type **fields = TypeTuple::fields(1);
fields[TypeFunc::Parms+0] = TypeInstPtr::NOTNULL; // Klass to be allocated
const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+1, fields);
// create result type (range)
fields = TypeTuple::fields(1);
fields[TypeFunc::Parms+0] = TypeRawPtr::NOTNULL; // Returned oop
const TypeTuple *range = TypeTuple::make(TypeFunc::Parms+1, fields);
return TypeFunc::make(domain, range);
}
const TypeFunc *OptoRuntime::athrow_Type() {
// create input type (domain)
const Type **fields = TypeTuple::fields(1);
fields[TypeFunc::Parms+0] = TypeInstPtr::NOTNULL; // Klass to be allocated
const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+1, fields);
// create result type (range)
fields = TypeTuple::fields(0);
const TypeTuple *range = TypeTuple::make(TypeFunc::Parms+0, fields);
return TypeFunc::make(domain, range);
}
const TypeFunc *OptoRuntime::new_array_Type() {
// create input type (domain)
const Type **fields = TypeTuple::fields(2);
fields[TypeFunc::Parms+0] = TypeInstPtr::NOTNULL; // element klass
fields[TypeFunc::Parms+1] = TypeInt::INT; // array size
const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+2, fields);
// create result type (range)
fields = TypeTuple::fields(1);
fields[TypeFunc::Parms+0] = TypeRawPtr::NOTNULL; // Returned oop
const TypeTuple *range = TypeTuple::make(TypeFunc::Parms+1, fields);
return TypeFunc::make(domain, range);
}
const TypeFunc *OptoRuntime::multianewarray_Type(int ndim) {
// create input type (domain)
const int nargs = ndim + 1;
const Type **fields = TypeTuple::fields(nargs);
fields[TypeFunc::Parms+0] = TypeInstPtr::NOTNULL; // element klass
for( int i = 1; i < nargs; i++ )
fields[TypeFunc::Parms + i] = TypeInt::INT; // array size
const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+nargs, fields);
// create result type (range)
fields = TypeTuple::fields(1);
fields[TypeFunc::Parms+0] = TypeRawPtr::NOTNULL; // Returned oop
const TypeTuple *range = TypeTuple::make(TypeFunc::Parms+1, fields);
return TypeFunc::make(domain, range);
}
const TypeFunc *OptoRuntime::multianewarray2_Type() {
return multianewarray_Type(2);
}
const TypeFunc *OptoRuntime::multianewarray3_Type() {
return multianewarray_Type(3);
}
const TypeFunc *OptoRuntime::multianewarray4_Type() {
return multianewarray_Type(4);
}
const TypeFunc *OptoRuntime::multianewarray5_Type() {
return multianewarray_Type(5);
}
const TypeFunc *OptoRuntime::multianewarrayN_Type() {
// create input type (domain)
const Type **fields = TypeTuple::fields(2);
fields[TypeFunc::Parms+0] = TypeInstPtr::NOTNULL; // element klass
fields[TypeFunc::Parms+1] = TypeInstPtr::NOTNULL; // array of dim sizes
const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+2, fields);
// create result type (range)
fields = TypeTuple::fields(1);
fields[TypeFunc::Parms+0] = TypeRawPtr::NOTNULL; // Returned oop
const TypeTuple *range = TypeTuple::make(TypeFunc::Parms+1, fields);
return TypeFunc::make(domain, range);
}
const TypeFunc *OptoRuntime::g1_wb_pre_Type() {
const Type **fields = TypeTuple::fields(2);
fields[TypeFunc::Parms+0] = TypeInstPtr::NOTNULL; // original field value
fields[TypeFunc::Parms+1] = TypeRawPtr::NOTNULL; // thread
const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+2, fields);
// create result type (range)
fields = TypeTuple::fields(0);
const TypeTuple *range = TypeTuple::make(TypeFunc::Parms+0, fields);
return TypeFunc::make(domain, range);
}
const TypeFunc *OptoRuntime::g1_wb_post_Type() {
const Type **fields = TypeTuple::fields(2);
fields[TypeFunc::Parms+0] = TypeRawPtr::NOTNULL; // Card addr
fields[TypeFunc::Parms+1] = TypeRawPtr::NOTNULL; // thread
const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+2, fields);
// create result type (range)
fields = TypeTuple::fields(0);
const TypeTuple *range = TypeTuple::make(TypeFunc::Parms, fields);
return TypeFunc::make(domain, range);
}
const TypeFunc *OptoRuntime::uncommon_trap_Type() {
// create input type (domain)
const Type **fields = TypeTuple::fields(1);
// Symbol* name of class to be loaded
fields[TypeFunc::Parms+0] = TypeInt::INT;
const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+1, fields);
// create result type (range)
fields = TypeTuple::fields(0);
const TypeTuple *range = TypeTuple::make(TypeFunc::Parms+0, fields);
return TypeFunc::make(domain, range);
}
# ifdef ENABLE_ZAP_DEAD_LOCALS
// Type used for stub generation for zap_dead_locals.
// No inputs or outputs
const TypeFunc *OptoRuntime::zap_dead_locals_Type() {
// create input type (domain)
const Type **fields = TypeTuple::fields(0);
const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms,fields);
// create result type (range)
fields = TypeTuple::fields(0);
const TypeTuple *range = TypeTuple::make(TypeFunc::Parms,fields);
return TypeFunc::make(domain,range);
}
# endif
//-----------------------------------------------------------------------------
// Monitor Handling
const TypeFunc *OptoRuntime::complete_monitor_enter_Type() {
// create input type (domain)
const Type **fields = TypeTuple::fields(2);
fields[TypeFunc::Parms+0] = TypeInstPtr::NOTNULL; // Object to be Locked
fields[TypeFunc::Parms+1] = TypeRawPtr::BOTTOM; // Address of stack location for lock
const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+2,fields);
// create result type (range)
fields = TypeTuple::fields(0);
const TypeTuple *range = TypeTuple::make(TypeFunc::Parms+0,fields);
return TypeFunc::make(domain,range);
}
//-----------------------------------------------------------------------------
const TypeFunc *OptoRuntime::complete_monitor_exit_Type() {
// create input type (domain)
const Type **fields = TypeTuple::fields(2);
fields[TypeFunc::Parms+0] = TypeInstPtr::NOTNULL; // Object to be Locked
fields[TypeFunc::Parms+1] = TypeRawPtr::BOTTOM; // Address of stack location for lock
const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+2,fields);
// create result type (range)
fields = TypeTuple::fields(0);
const TypeTuple *range = TypeTuple::make(TypeFunc::Parms+0,fields);
return TypeFunc::make(domain,range);
}
const TypeFunc* OptoRuntime::flush_windows_Type() {
// create input type (domain)
const Type** fields = TypeTuple::fields(1);
fields[TypeFunc::Parms+0] = NULL; // void
const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms, fields);
// create result type
fields = TypeTuple::fields(1);
fields[TypeFunc::Parms+0] = NULL; // void
const TypeTuple *range = TypeTuple::make(TypeFunc::Parms, fields);
return TypeFunc::make(domain, range);
}
const TypeFunc* OptoRuntime::l2f_Type() {
// create input type (domain)
const Type **fields = TypeTuple::fields(2);
fields[TypeFunc::Parms+0] = TypeLong::LONG;
fields[TypeFunc::Parms+1] = Type::HALF;
const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+2, fields);
// create result type (range)
fields = TypeTuple::fields(1);
fields[TypeFunc::Parms+0] = Type::FLOAT;
const TypeTuple *range = TypeTuple::make(TypeFunc::Parms+1, fields);
return TypeFunc::make(domain, range);
}
const TypeFunc* OptoRuntime::modf_Type() {
const Type **fields = TypeTuple::fields(2);
fields[TypeFunc::Parms+0] = Type::FLOAT;
fields[TypeFunc::Parms+1] = Type::FLOAT;
const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+2, fields);
// create result type (range)
fields = TypeTuple::fields(1);
fields[TypeFunc::Parms+0] = Type::FLOAT;
const TypeTuple *range = TypeTuple::make(TypeFunc::Parms+1, fields);
return TypeFunc::make(domain, range);
}
const TypeFunc *OptoRuntime::Math_D_D_Type() {
// create input type (domain)
const Type **fields = TypeTuple::fields(2);
// Symbol* name of class to be loaded
fields[TypeFunc::Parms+0] = Type::DOUBLE;
fields[TypeFunc::Parms+1] = Type::HALF;
const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+2, fields);
// create result type (range)
fields = TypeTuple::fields(2);
fields[TypeFunc::Parms+0] = Type::DOUBLE;
fields[TypeFunc::Parms+1] = Type::HALF;
const TypeTuple *range = TypeTuple::make(TypeFunc::Parms+2, fields);
return TypeFunc::make(domain, range);
}
const TypeFunc* OptoRuntime::Math_DD_D_Type() {
const Type **fields = TypeTuple::fields(4);
fields[TypeFunc::Parms+0] = Type::DOUBLE;
fields[TypeFunc::Parms+1] = Type::HALF;
fields[TypeFunc::Parms+2] = Type::DOUBLE;
fields[TypeFunc::Parms+3] = Type::HALF;
const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+4, fields);
// create result type (range)
fields = TypeTuple::fields(2);
fields[TypeFunc::Parms+0] = Type::DOUBLE;
fields[TypeFunc::Parms+1] = Type::HALF;
const TypeTuple *range = TypeTuple::make(TypeFunc::Parms+2, fields);
return TypeFunc::make(domain, range);
}
//-------------- currentTimeMillis, currentTimeNanos, etc
const TypeFunc* OptoRuntime::void_long_Type() {
// create input type (domain)
const Type **fields = TypeTuple::fields(0);
const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+0, fields);
// create result type (range)
fields = TypeTuple::fields(2);
fields[TypeFunc::Parms+0] = TypeLong::LONG;
fields[TypeFunc::Parms+1] = Type::HALF;
const TypeTuple *range = TypeTuple::make(TypeFunc::Parms+2, fields);
return TypeFunc::make(domain, range);
}
// arraycopy stub variations:
enum ArrayCopyType {
ac_fast, // void(ptr, ptr, size_t)
ac_checkcast, // int(ptr, ptr, size_t, size_t, ptr)
ac_slow, // void(ptr, int, ptr, int, int)
ac_generic // int(ptr, int, ptr, int, int)
};
static const TypeFunc* make_arraycopy_Type(ArrayCopyType act) {
// create input type (domain)
int num_args = (act == ac_fast ? 3 : 5);
int num_size_args = (act == ac_fast ? 1 : act == ac_checkcast ? 2 : 0);
int argcnt = num_args;
LP64_ONLY(argcnt += num_size_args); // halfwords for lengths
const Type** fields = TypeTuple::fields(argcnt);
int argp = TypeFunc::Parms;
fields[argp++] = TypePtr::NOTNULL; // src
if (num_size_args == 0) {
fields[argp++] = TypeInt::INT; // src_pos
}
fields[argp++] = TypePtr::NOTNULL; // dest
if (num_size_args == 0) {
fields[argp++] = TypeInt::INT; // dest_pos
fields[argp++] = TypeInt::INT; // length
}
while (num_size_args-- > 0) {
fields[argp++] = TypeX_X; // size in whatevers (size_t)
LP64_ONLY(fields[argp++] = Type::HALF); // other half of long length
}
if (act == ac_checkcast) {
fields[argp++] = TypePtr::NOTNULL; // super_klass
}
assert(argp == TypeFunc::Parms+argcnt, "correct decoding of act");
const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms+argcnt, fields);
// create result type if needed
int retcnt = (act == ac_checkcast || act == ac_generic ? 1 : 0);
fields = TypeTuple::fields(1);
if (retcnt == 0)
fields[TypeFunc::Parms+0] = NULL; // void
else
fields[TypeFunc::Parms+0] = TypeInt::INT; // status result, if needed
const TypeTuple* range = TypeTuple::make(TypeFunc::Parms+retcnt, fields);
return TypeFunc::make(domain, range);
}
const TypeFunc* OptoRuntime::fast_arraycopy_Type() {
// This signature is simple: Two base pointers and a size_t.
return make_arraycopy_Type(ac_fast);
}
const TypeFunc* OptoRuntime::checkcast_arraycopy_Type() {
// An extension of fast_arraycopy_Type which adds type checking.
return make_arraycopy_Type(ac_checkcast);
}
const TypeFunc* OptoRuntime::slow_arraycopy_Type() {
// This signature is exactly the same as System.arraycopy.
// There are no intptr_t (int/long) arguments.
return make_arraycopy_Type(ac_slow);
}
const TypeFunc* OptoRuntime::generic_arraycopy_Type() {
// This signature is like System.arraycopy, except that it returns status.
return make_arraycopy_Type(ac_generic);
}
const TypeFunc* OptoRuntime::array_fill_Type() {
const Type** fields;
int argp = TypeFunc::Parms;
if (CCallingConventionRequiresIntsAsLongs) {
// create input type (domain): pointer, int, size_t
fields = TypeTuple::fields(3 LP64_ONLY( + 2));
fields[argp++] = TypePtr::NOTNULL;
fields[argp++] = TypeLong::LONG;
fields[argp++] = Type::HALF;
} else {
// create input type (domain): pointer, int, size_t
fields = TypeTuple::fields(3 LP64_ONLY( + 1));
fields[argp++] = TypePtr::NOTNULL;
fields[argp++] = TypeInt::INT;
}
fields[argp++] = TypeX_X; // size in whatevers (size_t)
LP64_ONLY(fields[argp++] = Type::HALF); // other half of long length
const TypeTuple *domain = TypeTuple::make(argp, fields);
// create result type
fields = TypeTuple::fields(1);
fields[TypeFunc::Parms+0] = NULL; // void
const TypeTuple *range = TypeTuple::make(TypeFunc::Parms, fields);
return TypeFunc::make(domain, range);
}
// for aescrypt encrypt/decrypt operations, just three pointers returning void (length is constant)
const TypeFunc* OptoRuntime::aescrypt_block_Type() {
// create input type (domain)
int num_args = 3;
if (Matcher::pass_original_key_for_aes()) {
num_args = 4;
}
int argcnt = num_args;
const Type** fields = TypeTuple::fields(argcnt);
int argp = TypeFunc::Parms;
fields[argp++] = TypePtr::NOTNULL; // src
fields[argp++] = TypePtr::NOTNULL; // dest
fields[argp++] = TypePtr::NOTNULL; // k array
if (Matcher::pass_original_key_for_aes()) {
fields[argp++] = TypePtr::NOTNULL; // original k array
}
assert(argp == TypeFunc::Parms+argcnt, "correct decoding");
const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms+argcnt, fields);
// no result type needed
fields = TypeTuple::fields(1);
fields[TypeFunc::Parms+0] = NULL; // void
const TypeTuple* range = TypeTuple::make(TypeFunc::Parms, fields);
return TypeFunc::make(domain, range);
}
/**
* int updateBytesCRC32(int crc, byte* b, int len)
*/
const TypeFunc* OptoRuntime::updateBytesCRC32_Type() {
// create input type (domain)
int num_args = 3;
int argcnt = num_args;
if (CCallingConventionRequiresIntsAsLongs) {
argcnt += 2;
}
const Type** fields = TypeTuple::fields(argcnt);
int argp = TypeFunc::Parms;
if (CCallingConventionRequiresIntsAsLongs) {
fields[argp++] = TypeLong::LONG; // crc
fields[argp++] = Type::HALF;
fields[argp++] = TypePtr::NOTNULL; // src
fields[argp++] = TypeLong::LONG; // len
fields[argp++] = Type::HALF;
} else {
fields[argp++] = TypeInt::INT; // crc
fields[argp++] = TypePtr::NOTNULL; // src
fields[argp++] = TypeInt::INT; // len
}
assert(argp == TypeFunc::Parms+argcnt, "correct decoding");
const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms+argcnt, fields);
// result type needed
fields = TypeTuple::fields(1);
fields[TypeFunc::Parms+0] = TypeInt::INT; // crc result
const TypeTuple* range = TypeTuple::make(TypeFunc::Parms+1, fields);
return TypeFunc::make(domain, range);
}
// for cipherBlockChaining calls of aescrypt encrypt/decrypt, four pointers and a length, returning int
const TypeFunc* OptoRuntime::cipherBlockChaining_aescrypt_Type() {
// create input type (domain)
int num_args = 5;
if (Matcher::pass_original_key_for_aes()) {
num_args = 6;
}
int argcnt = num_args;
const Type** fields = TypeTuple::fields(argcnt);
int argp = TypeFunc::Parms;
fields[argp++] = TypePtr::NOTNULL; // src
fields[argp++] = TypePtr::NOTNULL; // dest
fields[argp++] = TypePtr::NOTNULL; // k array
fields[argp++] = TypePtr::NOTNULL; // r array
fields[argp++] = TypeInt::INT; // src len
if (Matcher::pass_original_key_for_aes()) {
fields[argp++] = TypePtr::NOTNULL; // original k array
}
assert(argp == TypeFunc::Parms+argcnt, "correct decoding");
const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms+argcnt, fields);
// returning cipher len (int)
fields = TypeTuple::fields(1);
fields[TypeFunc::Parms+0] = TypeInt::INT;
const TypeTuple* range = TypeTuple::make(TypeFunc::Parms+1, fields);
return TypeFunc::make(domain, range);
}
/*
* void implCompress(byte[] buf, int ofs)
*/
const TypeFunc* OptoRuntime::sha_implCompress_Type() {
// create input type (domain)
int num_args = 2;
int argcnt = num_args;
const Type** fields = TypeTuple::fields(argcnt);
int argp = TypeFunc::Parms;
fields[argp++] = TypePtr::NOTNULL; // buf
fields[argp++] = TypePtr::NOTNULL; // state
assert(argp == TypeFunc::Parms+argcnt, "correct decoding");
const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms+argcnt, fields);
// no result type needed
fields = TypeTuple::fields(1);
fields[TypeFunc::Parms+0] = NULL; // void
const TypeTuple* range = TypeTuple::make(TypeFunc::Parms, fields);
return TypeFunc::make(domain, range);
}
/*
* int implCompressMultiBlock(byte[] b, int ofs, int limit)
*/
const TypeFunc* OptoRuntime::digestBase_implCompressMB_Type() {
// create input type (domain)
int num_args = 4;
int argcnt = num_args;
if(CCallingConventionRequiresIntsAsLongs) {
argcnt += 2;
}
const Type** fields = TypeTuple::fields(argcnt);
int argp = TypeFunc::Parms;
if(CCallingConventionRequiresIntsAsLongs) {
fields[argp++] = TypePtr::NOTNULL; // buf
fields[argp++] = TypePtr::NOTNULL; // state
fields[argp++] = TypeLong::LONG; // ofs
fields[argp++] = Type::HALF;
fields[argp++] = TypeLong::LONG; // limit
fields[argp++] = Type::HALF;
} else {
fields[argp++] = TypePtr::NOTNULL; // buf
fields[argp++] = TypePtr::NOTNULL; // state
fields[argp++] = TypeInt::INT; // ofs
fields[argp++] = TypeInt::INT; // limit
}
assert(argp == TypeFunc::Parms+argcnt, "correct decoding");
const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms+argcnt, fields);
// returning ofs (int)
fields = TypeTuple::fields(1);
fields[TypeFunc::Parms+0] = TypeInt::INT; // ofs
const TypeTuple* range = TypeTuple::make(TypeFunc::Parms+1, fields);
return TypeFunc::make(domain, range);
}
const TypeFunc* OptoRuntime::multiplyToLen_Type() {
// create input type (domain)
int num_args = 6;
int argcnt = num_args;
const Type** fields = TypeTuple::fields(argcnt);
int argp = TypeFunc::Parms;
fields[argp++] = TypePtr::NOTNULL; // x
fields[argp++] = TypeInt::INT; // xlen
fields[argp++] = TypePtr::NOTNULL; // y
fields[argp++] = TypeInt::INT; // ylen
fields[argp++] = TypePtr::NOTNULL; // z
fields[argp++] = TypeInt::INT; // zlen
assert(argp == TypeFunc::Parms+argcnt, "correct decoding");
const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms+argcnt, fields);
// no result type needed
fields = TypeTuple::fields(1);
fields[TypeFunc::Parms+0] = NULL;
const TypeTuple* range = TypeTuple::make(TypeFunc::Parms, fields);
return TypeFunc::make(domain, range);
}
const TypeFunc* OptoRuntime::squareToLen_Type() {
// create input type (domain)
int num_args = 4;
int argcnt = num_args;
const Type** fields = TypeTuple::fields(argcnt);
int argp = TypeFunc::Parms;
fields[argp++] = TypePtr::NOTNULL; // x
fields[argp++] = TypeInt::INT; // len
fields[argp++] = TypePtr::NOTNULL; // z
fields[argp++] = TypeInt::INT; // zlen
assert(argp == TypeFunc::Parms+argcnt, "correct decoding");
const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms+argcnt, fields);
// no result type needed
fields = TypeTuple::fields(1);
fields[TypeFunc::Parms+0] = NULL;
const TypeTuple* range = TypeTuple::make(TypeFunc::Parms, fields);
return TypeFunc::make(domain, range);
}
// for mulAdd calls, 2 pointers and 3 ints, returning int
const TypeFunc* OptoRuntime::mulAdd_Type() {
// create input type (domain)
int num_args = 5;
int argcnt = num_args;
const Type** fields = TypeTuple::fields(argcnt);
int argp = TypeFunc::Parms;
fields[argp++] = TypePtr::NOTNULL; // out
fields[argp++] = TypePtr::NOTNULL; // in
fields[argp++] = TypeInt::INT; // offset
fields[argp++] = TypeInt::INT; // len
fields[argp++] = TypeInt::INT; // k
assert(argp == TypeFunc::Parms+argcnt, "correct decoding");
const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms+argcnt, fields);
// returning carry (int)
fields = TypeTuple::fields(1);
fields[TypeFunc::Parms+0] = TypeInt::INT;
const TypeTuple* range = TypeTuple::make(TypeFunc::Parms+1, fields);
return TypeFunc::make(domain, range);
}
const TypeFunc* OptoRuntime::montgomeryMultiply_Type() {
// create input type (domain)
int num_args = 7;
int argcnt = num_args;
if (CCallingConventionRequiresIntsAsLongs) {
argcnt++; // additional placeholder
}
const Type** fields = TypeTuple::fields(argcnt);
int argp = TypeFunc::Parms;
fields[argp++] = TypePtr::NOTNULL; // a
fields[argp++] = TypePtr::NOTNULL; // b
fields[argp++] = TypePtr::NOTNULL; // n
if (CCallingConventionRequiresIntsAsLongs) {
fields[argp++] = TypeLong::LONG; // len
fields[argp++] = TypeLong::HALF; // placeholder
} else {
fields[argp++] = TypeInt::INT; // len
}
fields[argp++] = TypeLong::LONG; // inv
fields[argp++] = Type::HALF;
fields[argp++] = TypePtr::NOTNULL; // result
assert(argp == TypeFunc::Parms+argcnt, "correct decoding");
const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms+argcnt, fields);
// result type needed
fields = TypeTuple::fields(1);
fields[TypeFunc::Parms+0] = TypePtr::NOTNULL;
const TypeTuple* range = TypeTuple::make(TypeFunc::Parms, fields);
return TypeFunc::make(domain, range);
}
const TypeFunc* OptoRuntime::montgomerySquare_Type() {
// create input type (domain)
int num_args = 6;
int argcnt = num_args;
if (CCallingConventionRequiresIntsAsLongs) {
argcnt++; // additional placeholder
}
const Type** fields = TypeTuple::fields(argcnt);
int argp = TypeFunc::Parms;
fields[argp++] = TypePtr::NOTNULL; // a
fields[argp++] = TypePtr::NOTNULL; // n
if (CCallingConventionRequiresIntsAsLongs) {
fields[argp++] = TypeLong::LONG; // len
fields[argp++] = TypeLong::HALF; // placeholder
} else {
fields[argp++] = TypeInt::INT; // len
}
fields[argp++] = TypeLong::LONG; // inv
fields[argp++] = Type::HALF;
fields[argp++] = TypePtr::NOTNULL; // result
assert(argp == TypeFunc::Parms+argcnt, "correct decoding");
const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms+argcnt, fields);
// result type needed
fields = TypeTuple::fields(1);
fields[TypeFunc::Parms+0] = TypePtr::NOTNULL;
const TypeTuple* range = TypeTuple::make(TypeFunc::Parms, fields);
return TypeFunc::make(domain, range);
}
//------------- Interpreter state access for on stack replacement
const TypeFunc* OptoRuntime::osr_end_Type() {
// create input type (domain)
const Type **fields = TypeTuple::fields(1);
fields[TypeFunc::Parms+0] = TypeRawPtr::BOTTOM; // OSR temp buf
const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+1, fields);
// create result type
fields = TypeTuple::fields(1);
// fields[TypeFunc::Parms+0] = TypeInstPtr::NOTNULL; // locked oop
fields[TypeFunc::Parms+0] = NULL; // void
const TypeTuple *range = TypeTuple::make(TypeFunc::Parms, fields);
return TypeFunc::make(domain, range);
}
//-------------- methodData update helpers
const TypeFunc* OptoRuntime::profile_receiver_type_Type() {
// create input type (domain)
const Type **fields = TypeTuple::fields(2);
fields[TypeFunc::Parms+0] = TypeAryPtr::NOTNULL; // methodData pointer
fields[TypeFunc::Parms+1] = TypeInstPtr::BOTTOM; // receiver oop
const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+2, fields);
// create result type
fields = TypeTuple::fields(1);
fields[TypeFunc::Parms+0] = NULL; // void
const TypeTuple *range = TypeTuple::make(TypeFunc::Parms, fields);
return TypeFunc::make(domain,range);
}
JRT_LEAF(void, OptoRuntime::profile_receiver_type_C(DataLayout* data, oopDesc* receiver))
if (receiver == NULL) return;
Klass* receiver_klass = receiver->klass();
intptr_t* mdp = ((intptr_t*)(data)) + DataLayout::header_size_in_cells();
int empty_row = -1; // free row, if any is encountered
// ReceiverTypeData* vc = new ReceiverTypeData(mdp);
for (uint row = 0; row < ReceiverTypeData::row_limit(); row++) {
// if (vc->receiver(row) == receiver_klass)
int receiver_off = ReceiverTypeData::receiver_cell_index(row);
intptr_t row_recv = *(mdp + receiver_off);
if (row_recv == (intptr_t) receiver_klass) {
// vc->set_receiver_count(row, vc->receiver_count(row) + DataLayout::counter_increment);
int count_off = ReceiverTypeData::receiver_count_cell_index(row);
*(mdp + count_off) += DataLayout::counter_increment;
return;
} else if (row_recv == 0) {
// else if (vc->receiver(row) == NULL)
empty_row = (int) row;
}
}
if (empty_row != -1) {
int receiver_off = ReceiverTypeData::receiver_cell_index(empty_row);
// vc->set_receiver(empty_row, receiver_klass);
*(mdp + receiver_off) = (intptr_t) receiver_klass;
// vc->set_receiver_count(empty_row, DataLayout::counter_increment);
int count_off = ReceiverTypeData::receiver_count_cell_index(empty_row);
*(mdp + count_off) = DataLayout::counter_increment;
} else {
// Receiver did not match any saved receiver and there is no empty row for it.
// Increment total counter to indicate polymorphic case.
intptr_t* count_p = (intptr_t*)(((byte*)(data)) + in_bytes(CounterData::count_offset()));
*count_p += DataLayout::counter_increment;
}
JRT_END
//-------------------------------------------------------------------------------------
// register policy
bool OptoRuntime::is_callee_saved_register(MachRegisterNumbers reg) {
assert(reg >= 0 && reg < _last_Mach_Reg, "must be a machine register");
switch (register_save_policy[reg]) {
case 'C': return false; //SOC
case 'E': return true ; //SOE
case 'N': return false; //NS
case 'A': return false; //AS
}
ShouldNotReachHere();
return false;
}
//-----------------------------------------------------------------------
// Exceptions
//
static void trace_exception(oop exception_oop, address exception_pc, const char* msg) PRODUCT_RETURN;
// The method is an entry that is always called by a C++ method not
// directly from compiled code. Compiled code will call the C++ method following.
// We can't allow async exception to be installed during exception processing.
JRT_ENTRY_NO_ASYNC(address, OptoRuntime::handle_exception_C_helper(JavaThread* thread, nmethod* &nm))
// Do not confuse exception_oop with pending_exception. The exception_oop
// is only used to pass arguments into the method. Not for general
// exception handling. DO NOT CHANGE IT to use pending_exception, since
// the runtime stubs checks this on exit.
assert(thread->exception_oop() != NULL, "exception oop is found");
address handler_address = NULL;
Handle exception(thread, thread->exception_oop());
address pc = thread->exception_pc();
// Clear out the exception oop and pc since looking up an
// exception handler can cause class loading, which might throw an
// exception and those fields are expected to be clear during
// normal bytecode execution.
thread->clear_exception_oop_and_pc();
if (TraceExceptions) {
trace_exception(exception(), pc, "");
}
// for AbortVMOnException flag
NOT_PRODUCT(Exceptions::debug_check_abort(exception));
#ifdef ASSERT
if (!(exception->is_a(SystemDictionary::Throwable_klass()))) {
// should throw an exception here
ShouldNotReachHere();
}
#endif
// new exception handling: this method is entered only from adapters
// exceptions from compiled java methods are handled in compiled code
// using rethrow node
nm = CodeCache::find_nmethod(pc);
assert(nm != NULL, "No NMethod found");
if (nm->is_native_method()) {
fatal("Native method should not have path to exception handling");
} else {
// we are switching to old paradigm: search for exception handler in caller_frame
// instead in exception handler of caller_frame.sender()
if (JvmtiExport::can_post_on_exceptions()) {
// "Full-speed catching" is not necessary here,
// since we're notifying the VM on every catch.
// Force deoptimization and the rest of the lookup
// will be fine.
deoptimize_caller_frame(thread);
}
// Check the stack guard pages. If enabled, look for handler in this frame;
// otherwise, forcibly unwind the frame.
//
// 4826555: use default current sp for reguard_stack instead of &nm: it's more accurate.
bool force_unwind = !thread->reguard_stack();
bool deopting = false;
if (nm->is_deopt_pc(pc)) {
deopting = true;
RegisterMap map(thread, false);
frame deoptee = thread->last_frame().sender(&map);
assert(deoptee.is_deoptimized_frame(), "must be deopted");
// Adjust the pc back to the original throwing pc
pc = deoptee.pc();
}
// If we are forcing an unwind because of stack overflow then deopt is
// irrelevant since we are throwing the frame away anyway.
if (deopting && !force_unwind) {
handler_address = SharedRuntime::deopt_blob()->unpack_with_exception();
} else {
handler_address =
force_unwind ? NULL : nm->handler_for_exception_and_pc(exception, pc);
if (handler_address == NULL) {
bool recursive_exception = false;
handler_address = SharedRuntime::compute_compiled_exc_handler(nm, pc, exception, force_unwind, true, recursive_exception);
assert (handler_address != NULL, "must have compiled handler");
// Update the exception cache only when the unwind was not forced
// and there didn't happen another exception during the computation of the
// compiled exception handler. Checking for exception oop equality is not
// sufficient because some exceptions are pre-allocated and reused.
if (!force_unwind && !recursive_exception) {
nm->add_handler_for_exception_and_pc(exception,pc,handler_address);
}
} else {
#ifdef ASSERT
bool recursive_exception = false;
address computed_address = SharedRuntime::compute_compiled_exc_handler(nm, pc, exception, force_unwind, true, recursive_exception);
assert(recursive_exception || (handler_address == computed_address), err_msg("Handler address inconsistency: " PTR_FORMAT " != " PTR_FORMAT,
p2i(handler_address), p2i(computed_address)));
#endif
}
}
thread->set_exception_pc(pc);
thread->set_exception_handler_pc(handler_address);
// Check if the exception PC is a MethodHandle call site.
thread->set_is_method_handle_return(nm->is_method_handle_return(pc));
}
// Restore correct return pc. Was saved above.
thread->set_exception_oop(exception());
return handler_address;
JRT_END
// We are entering here from exception_blob
// If there is a compiled exception handler in this method, we will continue there;
// otherwise we will unwind the stack and continue at the caller of top frame method
// Note we enter without the usual JRT wrapper. We will call a helper routine that
// will do the normal VM entry. We do it this way so that we can see if the nmethod
// we looked up the handler for has been deoptimized in the meantime. If it has been
// we must not use the handler and instead return the deopt blob.
address OptoRuntime::handle_exception_C(JavaThread* thread) {
//
// We are in Java not VM and in debug mode we have a NoHandleMark
//
#ifndef PRODUCT
SharedRuntime::_find_handler_ctr++; // find exception handler
#endif
debug_only(NoHandleMark __hm;)
nmethod* nm = NULL;
address handler_address = NULL;
{
// Enter the VM
ResetNoHandleMark rnhm;
handler_address = handle_exception_C_helper(thread, nm);
}
// Back in java: Use no oops, DON'T safepoint
// Now check to see if the handler we are returning is in a now
// deoptimized frame
if (nm != NULL) {
RegisterMap map(thread, false);
frame caller = thread->last_frame().sender(&map);
#ifdef ASSERT
assert(caller.is_compiled_frame(), "must be");
#endif // ASSERT
if (caller.is_deoptimized_frame()) {
handler_address = SharedRuntime::deopt_blob()->unpack_with_exception();
}
}
return handler_address;
}
//------------------------------rethrow----------------------------------------
// We get here after compiled code has executed a 'RethrowNode'. The callee
// is either throwing or rethrowing an exception. The callee-save registers
// have been restored, synchronized objects have been unlocked and the callee
// stack frame has been removed. The return address was passed in.
// Exception oop is passed as the 1st argument. This routine is then called
// from the stub. On exit, we know where to jump in the caller's code.
// After this C code exits, the stub will pop his frame and end in a jump
// (instead of a return). We enter the caller's default handler.
//
// This must be JRT_LEAF:
// - caller will not change its state as we cannot block on exit,
// therefore raw_exception_handler_for_return_address is all it takes
// to handle deoptimized blobs
//
// However, there needs to be a safepoint check in the middle! So compiled
// safepoints are completely watertight.
//
// Thus, it cannot be a leaf since it contains the No_GC_Verifier.
//
// *THIS IS NOT RECOMMENDED PROGRAMMING STYLE*
//
address OptoRuntime::rethrow_C(oopDesc* exception, JavaThread* thread, address ret_pc) {
#ifndef PRODUCT
SharedRuntime::_rethrow_ctr++; // count rethrows
#endif
assert (exception != NULL, "should have thrown a NULLPointerException");
#ifdef ASSERT
if (!(exception->is_a(SystemDictionary::Throwable_klass()))) {
// should throw an exception here
ShouldNotReachHere();
}
#endif
thread->set_vm_result(exception);
// Frame not compiled (handles deoptimization blob)
return SharedRuntime::raw_exception_handler_for_return_address(thread, ret_pc);
}
const TypeFunc *OptoRuntime::rethrow_Type() {
// create input type (domain)
const Type **fields = TypeTuple::fields(1);
fields[TypeFunc::Parms+0] = TypeInstPtr::NOTNULL; // Exception oop
const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+1,fields);
// create result type (range)
fields = TypeTuple::fields(1);
fields[TypeFunc::Parms+0] = TypeInstPtr::NOTNULL; // Exception oop
const TypeTuple *range = TypeTuple::make(TypeFunc::Parms+1, fields);
return TypeFunc::make(domain, range);
}
void OptoRuntime::deoptimize_caller_frame(JavaThread *thread, bool doit) {
// Deoptimize the caller before continuing, as the compiled
// exception handler table may not be valid.
if (!StressCompiledExceptionHandlers && doit) {
deoptimize_caller_frame(thread);
}
}
void OptoRuntime::deoptimize_caller_frame(JavaThread *thread) {
// Called from within the owner thread, so no need for safepoint
RegisterMap reg_map(thread);
frame stub_frame = thread->last_frame();
assert(stub_frame.is_runtime_frame() || exception_blob()->contains(stub_frame.pc()), "sanity check");
frame caller_frame = stub_frame.sender(®_map);
// Deoptimize the caller frame.
Deoptimization::deoptimize_frame(thread, caller_frame.id());
}
bool OptoRuntime::is_deoptimized_caller_frame(JavaThread *thread) {
// Called from within the owner thread, so no need for safepoint
RegisterMap reg_map(thread);
frame stub_frame = thread->last_frame();
assert(stub_frame.is_runtime_frame() || exception_blob()->contains(stub_frame.pc()), "sanity check");
frame caller_frame = stub_frame.sender(®_map);
return caller_frame.is_deoptimized_frame();
}
const TypeFunc *OptoRuntime::register_finalizer_Type() {
// create input type (domain)
const Type **fields = TypeTuple::fields(1);
fields[TypeFunc::Parms+0] = TypeInstPtr::NOTNULL; // oop; Receiver
// // The JavaThread* is passed to each routine as the last argument
// fields[TypeFunc::Parms+1] = TypeRawPtr::NOTNULL; // JavaThread *; Executing thread
const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+1,fields);
// create result type (range)
fields = TypeTuple::fields(0);
const TypeTuple *range = TypeTuple::make(TypeFunc::Parms+0,fields);
return TypeFunc::make(domain,range);
}
//-----------------------------------------------------------------------------
// Dtrace support. entry and exit probes have the same signature
const TypeFunc *OptoRuntime::dtrace_method_entry_exit_Type() {
// create input type (domain)
const Type **fields = TypeTuple::fields(2);
fields[TypeFunc::Parms+0] = TypeRawPtr::BOTTOM; // Thread-local storage
fields[TypeFunc::Parms+1] = TypeMetadataPtr::BOTTOM; // Method*; Method we are entering
const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+2,fields);
// create result type (range)
fields = TypeTuple::fields(0);
const TypeTuple *range = TypeTuple::make(TypeFunc::Parms+0,fields);
return TypeFunc::make(domain,range);
}
const TypeFunc *OptoRuntime::dtrace_object_alloc_Type() {
// create input type (domain)
const Type **fields = TypeTuple::fields(2);
fields[TypeFunc::Parms+0] = TypeRawPtr::BOTTOM; // Thread-local storage
fields[TypeFunc::Parms+1] = TypeInstPtr::NOTNULL; // oop; newly allocated object
const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+2,fields);
// create result type (range)
fields = TypeTuple::fields(0);
const TypeTuple *range = TypeTuple::make(TypeFunc::Parms+0,fields);
return TypeFunc::make(domain,range);
}
JRT_ENTRY_NO_ASYNC(void, OptoRuntime::register_finalizer(oopDesc* obj, JavaThread* thread))
assert(obj->is_oop(), "must be a valid oop");
assert(obj->klass()->has_finalizer(), "shouldn't be here otherwise");
InstanceKlass::register_finalizer(instanceOop(obj), CHECK);
JRT_END
//-----------------------------------------------------------------------------
NamedCounter * volatile OptoRuntime::_named_counters = NULL;
//
// dump the collected NamedCounters.
//
void OptoRuntime::print_named_counters() {
int total_lock_count = 0;
int eliminated_lock_count = 0;
NamedCounter* c = _named_counters;
while (c) {
if (c->tag() == NamedCounter::LockCounter || c->tag() == NamedCounter::EliminatedLockCounter) {
int count = c->count();
if (count > 0) {
bool eliminated = c->tag() == NamedCounter::EliminatedLockCounter;
if (Verbose) {
tty->print_cr("%d %s%s", count, c->name(), eliminated ? " (eliminated)" : "");
}
total_lock_count += count;
if (eliminated) {
eliminated_lock_count += count;
}
}
} else if (c->tag() == NamedCounter::BiasedLockingCounter) {
BiasedLockingCounters* blc = ((BiasedLockingNamedCounter*)c)->counters();
if (blc->nonzero()) {
tty->print_cr("%s", c->name());
blc->print_on(tty);
}
#if INCLUDE_RTM_OPT
} else if (c->tag() == NamedCounter::RTMLockingCounter) {
RTMLockingCounters* rlc = ((RTMLockingNamedCounter*)c)->counters();
if (rlc->nonzero()) {
tty->print_cr("%s", c->name());
rlc->print_on(tty);
}
#endif
}
c = c->next();
}
if (total_lock_count > 0) {
tty->print_cr("dynamic locks: %d", total_lock_count);
if (eliminated_lock_count) {
tty->print_cr("eliminated locks: %d (%d%%)", eliminated_lock_count,
(int)(eliminated_lock_count * 100.0 / total_lock_count));
}
}
}
//
// Allocate a new NamedCounter. The JVMState is used to generate the
// name which consists of method@line for the inlining tree.
//
NamedCounter* OptoRuntime::new_named_counter(JVMState* youngest_jvms, NamedCounter::CounterTag tag) {
int max_depth = youngest_jvms->depth();
// Visit scopes from youngest to oldest.
bool first = true;
stringStream st;
for (int depth = max_depth; depth >= 1; depth--) {
JVMState* jvms = youngest_jvms->of_depth(depth);
ciMethod* m = jvms->has_method() ? jvms->method() : NULL;
if (!first) {
st.print(" ");
} else {
first = false;
}
int bci = jvms->bci();
if (bci < 0) bci = 0;
st.print("%s.%s@%d", m->holder()->name()->as_utf8(), m->name()->as_utf8(), bci);
// To print linenumbers instead of bci use: m->line_number_from_bci(bci)
}
NamedCounter* c;
if (tag == NamedCounter::BiasedLockingCounter) {
c = new BiasedLockingNamedCounter(strdup(st.as_string()));
} else if (tag == NamedCounter::RTMLockingCounter) {
c = new RTMLockingNamedCounter(strdup(st.as_string()));
} else {
c = new NamedCounter(strdup(st.as_string()), tag);
}
// atomically add the new counter to the head of the list. We only
// add counters so this is safe.
NamedCounter* head;
do {
c->set_next(NULL);
head = _named_counters;
c->set_next(head);
} while (Atomic::cmpxchg_ptr(c, &_named_counters, head) != head);
return c;
}
//-----------------------------------------------------------------------------
// Non-product code
#ifndef PRODUCT
int trace_exception_counter = 0;
static void trace_exception(oop exception_oop, address exception_pc, const char* msg) {
ttyLocker ttyl;
trace_exception_counter++;
tty->print("%d [Exception (%s): ", trace_exception_counter, msg);
exception_oop->print_value();
tty->print(" in ");
CodeBlob* blob = CodeCache::find_blob(exception_pc);
if (blob->is_nmethod()) {
nmethod* nm = blob->as_nmethod_or_null();
nm->method()->print_value();
} else if (blob->is_runtime_stub()) {
tty->print("<runtime-stub>");
} else {
tty->print("<unknown>");
}
tty->print(" at " INTPTR_FORMAT, p2i(exception_pc));
tty->print_cr("]");
}
#endif // PRODUCT
# ifdef ENABLE_ZAP_DEAD_LOCALS
// Called from call sites in compiled code with oop maps (actually safepoints)
// Zaps dead locals in first java frame.
// Is entry because may need to lock to generate oop maps
// Currently, only used for compiler frames, but someday may be used
// for interpreter frames, too.
int OptoRuntime::ZapDeadCompiledLocals_count = 0;
// avoid pointers to member funcs with these helpers
static bool is_java_frame( frame* f) { return f->is_java_frame(); }
static bool is_native_frame(frame* f) { return f->is_native_frame(); }
void OptoRuntime::zap_dead_java_or_native_locals(JavaThread* thread,
bool (*is_this_the_right_frame_to_zap)(frame*)) {
assert(JavaThread::current() == thread, "is this needed?");
if ( !ZapDeadCompiledLocals ) return;
bool skip = false;
if ( ZapDeadCompiledLocalsFirst == 0 ) ; // nothing special
else if ( ZapDeadCompiledLocalsFirst > ZapDeadCompiledLocals_count ) skip = true;
else if ( ZapDeadCompiledLocalsFirst == ZapDeadCompiledLocals_count )
warning("starting zapping after skipping");
if ( ZapDeadCompiledLocalsLast == -1 ) ; // nothing special
else if ( ZapDeadCompiledLocalsLast < ZapDeadCompiledLocals_count ) skip = true;
else if ( ZapDeadCompiledLocalsLast == ZapDeadCompiledLocals_count )
warning("about to zap last zap");
++ZapDeadCompiledLocals_count; // counts skipped zaps, too
if ( skip ) return;
// find java frame and zap it
for (StackFrameStream sfs(thread); !sfs.is_done(); sfs.next()) {
if (is_this_the_right_frame_to_zap(sfs.current()) ) {
sfs.current()->zap_dead_locals(thread, sfs.register_map());
return;
}
}
warning("no frame found to zap in zap_dead_Java_locals_C");
}
JRT_LEAF(void, OptoRuntime::zap_dead_Java_locals_C(JavaThread* thread))
zap_dead_java_or_native_locals(thread, is_java_frame);
JRT_END
// The following does not work because for one thing, the
// thread state is wrong; it expects java, but it is native.
// Also, the invariants in a native stub are different and
// I'm not sure it is safe to have a MachCalRuntimeDirectNode
// in there.
// So for now, we do not zap in native stubs.
JRT_LEAF(void, OptoRuntime::zap_dead_native_locals_C(JavaThread* thread))
zap_dead_java_or_native_locals(thread, is_native_frame);
JRT_END
# endif
C:\hotspot-69087d08d473\src\share\vm/opto/runtime.hpp
/*
* Copyright (c) 1998, 2014, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
#ifndef SHARE_VM_OPTO_RUNTIME_HPP
#define SHARE_VM_OPTO_RUNTIME_HPP
#include "code/codeBlob.hpp"
#include "opto/machnode.hpp"
#include "opto/type.hpp"
#include "runtime/biasedLocking.hpp"
#include "runtime/rtmLocking.hpp"
#include "runtime/deoptimization.hpp"
#include "runtime/vframe.hpp"
//------------------------------OptoRuntime------------------------------------
// Opto compiler runtime routines
//
// These are all generated from Ideal graphs. They are called with the
// Java calling convention. Internally they call C++. They are made once at
// startup time and Opto compiles calls to them later.
// Things are broken up into quads: the signature they will be called with,
// the address of the generated code, the corresponding C++ code and an
// nmethod.
// The signature (returned by "xxx_Type()") is used at startup time by the
// Generator to make the generated code "xxx_Java". Opto compiles calls
// to the generated code "xxx_Java". When the compiled code gets executed,
// it calls the C++ code "xxx_C". The generated nmethod is saved in the
// CodeCache. Exception handlers use the nmethod to get the callee-save
// register OopMaps.
class CallInfo;
//
// NamedCounters are tagged counters which can be used for profiling
// code in various ways. Currently they are used by the lock coarsening code
//
class NamedCounter : public CHeapObj<mtCompiler> {
public:
enum CounterTag {
NoTag,
LockCounter,
EliminatedLockCounter,
BiasedLockingCounter,
RTMLockingCounter
};
private:
const char * _name;
int _count;
CounterTag _tag;
NamedCounter* _next;
public:
NamedCounter(const char *n, CounterTag tag = NoTag):
_name(n),
_count(0),
_next(NULL),
_tag(tag) {}
const char * name() const { return _name; }
int count() const { return _count; }
address addr() { return (address)&_count; }
CounterTag tag() const { return _tag; }
void set_tag(CounterTag tag) { _tag = tag; }
NamedCounter* next() const { return _next; }
void set_next(NamedCounter* next) {
assert(_next == NULL || next == NULL, "already set");
_next = next;
}
};
class BiasedLockingNamedCounter : public NamedCounter {
private:
BiasedLockingCounters _counters;
public:
BiasedLockingNamedCounter(const char *n) :
NamedCounter(n, BiasedLockingCounter), _counters() {}
BiasedLockingCounters* counters() { return &_counters; }
};
class RTMLockingNamedCounter : public NamedCounter {
private:
RTMLockingCounters _counters;
public:
RTMLockingNamedCounter(const char *n) :
NamedCounter(n, RTMLockingCounter), _counters() {}
RTMLockingCounters* counters() { return &_counters; }
};
typedef const TypeFunc*(*TypeFunc_generator)();
class OptoRuntime : public AllStatic {
friend class Matcher; // allow access to stub names
private:
// define stubs
static address generate_stub(ciEnv* ci_env, TypeFunc_generator gen, address C_function, const char *name, int is_fancy_jump, bool pass_tls, bool save_arguments, bool return_pc);
// References to generated stubs
static address _new_instance_Java;
static address _new_array_Java;
static address _new_array_nozero_Java;
static address _multianewarray2_Java;
static address _multianewarray3_Java;
static address _multianewarray4_Java;
static address _multianewarray5_Java;
static address _multianewarrayN_Java;
static address _g1_wb_pre_Java;
static address _g1_wb_post_Java;
static address _vtable_must_compile_Java;
static address _complete_monitor_locking_Java;
static address _rethrow_Java;
static address _slow_arraycopy_Java;
static address _register_finalizer_Java;
# ifdef ENABLE_ZAP_DEAD_LOCALS
static address _zap_dead_Java_locals_Java;
static address _zap_dead_native_locals_Java;
# endif
//
// Implementation of runtime methods
// =================================
// Allocate storage for a Java instance.
static void new_instance_C(Klass* instance_klass, JavaThread *thread);
// Allocate storage for a objArray or typeArray
static void new_array_C(Klass* array_klass, int len, JavaThread *thread);
static void new_array_nozero_C(Klass* array_klass, int len, JavaThread *thread);
// Post-slow-path-allocation, pre-initializing-stores step for
// implementing ReduceInitialCardMarks
static void new_store_pre_barrier(JavaThread* thread);
// Allocate storage for a multi-dimensional arrays
// Note: needs to be fixed for arbitrary number of dimensions
static void multianewarray2_C(Klass* klass, int len1, int len2, JavaThread *thread);
static void multianewarray3_C(Klass* klass, int len1, int len2, int len3, JavaThread *thread);
static void multianewarray4_C(Klass* klass, int len1, int len2, int len3, int len4, JavaThread *thread);
static void multianewarray5_C(Klass* klass, int len1, int len2, int len3, int len4, int len5, JavaThread *thread);
static void multianewarrayN_C(Klass* klass, arrayOopDesc* dims, JavaThread *thread);
static void g1_wb_pre_C(oopDesc* orig, JavaThread* thread);
static void g1_wb_post_C(void* card_addr, JavaThread* thread);
public:
// Slow-path Locking and Unlocking
static void complete_monitor_locking_C(oopDesc* obj, BasicLock* lock, JavaThread* thread);
static void complete_monitor_unlocking_C(oopDesc* obj, BasicLock* lock);
private:
// Implicit exception support
static void throw_null_exception_C(JavaThread* thread);
// Exception handling
static address handle_exception_C (JavaThread* thread);
static address handle_exception_C_helper(JavaThread* thread, nmethod*& nm);
static address rethrow_C (oopDesc* exception, JavaThread *thread, address return_pc );
static void deoptimize_caller_frame (JavaThread *thread);
static void deoptimize_caller_frame (JavaThread *thread, bool doit);
static bool is_deoptimized_caller_frame (JavaThread *thread);
// CodeBlob support
// ===================================================================
static ExceptionBlob* _exception_blob;
static void generate_exception_blob();
static void register_finalizer(oopDesc* obj, JavaThread* thread);
// zaping dead locals, either from Java frames or from native frames
# ifdef ENABLE_ZAP_DEAD_LOCALS
static void zap_dead_Java_locals_C( JavaThread* thread);
static void zap_dead_native_locals_C( JavaThread* thread);
static void zap_dead_java_or_native_locals( JavaThread*, bool (*)(frame*));
public:
static int ZapDeadCompiledLocals_count;
# endif
public:
static bool is_callee_saved_register(MachRegisterNumbers reg);
// One time only generate runtime code stubs. Returns true
// when runtime stubs have been generated successfully and
// false otherwise.
static bool generate(ciEnv* env);
// Returns the name of a stub
static const char* stub_name(address entry);
// access to runtime stubs entry points for java code
static address new_instance_Java() { return _new_instance_Java; }
static address new_array_Java() { return _new_array_Java; }
static address new_array_nozero_Java() { return _new_array_nozero_Java; }
static address multianewarray2_Java() { return _multianewarray2_Java; }
static address multianewarray3_Java() { return _multianewarray3_Java; }
static address multianewarray4_Java() { return _multianewarray4_Java; }
static address multianewarray5_Java() { return _multianewarray5_Java; }
static address multianewarrayN_Java() { return _multianewarrayN_Java; }
static address g1_wb_pre_Java() { return _g1_wb_pre_Java; }
static address g1_wb_post_Java() { return _g1_wb_post_Java; }
static address vtable_must_compile_stub() { return _vtable_must_compile_Java; }
static address complete_monitor_locking_Java() { return _complete_monitor_locking_Java; }
static address slow_arraycopy_Java() { return _slow_arraycopy_Java; }
static address register_finalizer_Java() { return _register_finalizer_Java; }
# ifdef ENABLE_ZAP_DEAD_LOCALS
static address zap_dead_locals_stub(bool is_native) { return is_native
? _zap_dead_native_locals_Java
: _zap_dead_Java_locals_Java; }
static MachNode* node_to_call_zap_dead_locals(Node* n, int block_num, bool is_native);
# endif
static ExceptionBlob* exception_blob() { return _exception_blob; }
// Leaf routines helping with method data update
static void profile_receiver_type_C(DataLayout* data, oopDesc* receiver);
// Implicit exception support
static void throw_div0_exception_C (JavaThread* thread);
static void throw_stack_overflow_error_C(JavaThread* thread);
// Exception handling
static address rethrow_stub() { return _rethrow_Java; }
// Type functions
// ======================================================
static const TypeFunc* new_instance_Type(); // object allocation (slow case)
static const TypeFunc* new_array_Type (); // [a]newarray (slow case)
static const TypeFunc* multianewarray_Type(int ndim); // multianewarray
static const TypeFunc* multianewarray2_Type(); // multianewarray
static const TypeFunc* multianewarray3_Type(); // multianewarray
static const TypeFunc* multianewarray4_Type(); // multianewarray
static const TypeFunc* multianewarray5_Type(); // multianewarray
static const TypeFunc* multianewarrayN_Type(); // multianewarray
static const TypeFunc* g1_wb_pre_Type();
static const TypeFunc* g1_wb_post_Type();
static const TypeFunc* complete_monitor_enter_Type();
static const TypeFunc* complete_monitor_exit_Type();
static const TypeFunc* uncommon_trap_Type();
static const TypeFunc* athrow_Type();
static const TypeFunc* rethrow_Type();
static const TypeFunc* Math_D_D_Type(); // sin,cos & friends
static const TypeFunc* Math_DD_D_Type(); // mod,pow & friends
static const TypeFunc* modf_Type();
static const TypeFunc* l2f_Type();
static const TypeFunc* void_long_Type();
static const TypeFunc* flush_windows_Type();
// arraycopy routine types
static const TypeFunc* fast_arraycopy_Type(); // bit-blasters
static const TypeFunc* checkcast_arraycopy_Type();
static const TypeFunc* generic_arraycopy_Type();
static const TypeFunc* slow_arraycopy_Type(); // the full routine
static const TypeFunc* array_fill_Type();
static const TypeFunc* aescrypt_block_Type();
static const TypeFunc* cipherBlockChaining_aescrypt_Type();
static const TypeFunc* sha_implCompress_Type();
static const TypeFunc* digestBase_implCompressMB_Type();
static const TypeFunc* multiplyToLen_Type();
static const TypeFunc* squareToLen_Type();
static const TypeFunc* mulAdd_Type();
static const TypeFunc* montgomeryMultiply_Type();
static const TypeFunc* montgomerySquare_Type();
static const TypeFunc* ghash_processBlocks_Type();
static const TypeFunc* updateBytesCRC32_Type();
// leaf on stack replacement interpreter accessor types
static const TypeFunc* osr_end_Type();
// leaf methodData routine types
static const TypeFunc* profile_receiver_type_Type();
// leaf on stack replacement interpreter accessor types
static const TypeFunc* fetch_int_Type();
static const TypeFunc* fetch_long_Type();
static const TypeFunc* fetch_float_Type();
static const TypeFunc* fetch_double_Type();
static const TypeFunc* fetch_oop_Type();
static const TypeFunc* fetch_monitor_Type();
static const TypeFunc* register_finalizer_Type();
// Dtrace support
static const TypeFunc* dtrace_method_entry_exit_Type();
static const TypeFunc* dtrace_object_alloc_Type();
# ifdef ENABLE_ZAP_DEAD_LOCALS
static const TypeFunc* zap_dead_locals_Type();
# endif
private:
static NamedCounter * volatile _named_counters;
public:
// helper function which creates a named counter labeled with the
// if they are available
static NamedCounter* new_named_counter(JVMState* jvms, NamedCounter::CounterTag tag);
// dumps all the named counters
static void print_named_counters();
};
#endif // SHARE_VM_OPTO_RUNTIME_HPP
C:\hotspot-69087d08d473\src\share\vm/opto/split_if.cpp
/*
* Copyright (c) 1999, 2018, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
#include "precompiled.hpp"
#include "memory/allocation.inline.hpp"
#include "opto/callnode.hpp"
#include "opto/connode.hpp"
#include "opto/loopnode.hpp"
//------------------------------split_thru_region------------------------------
// Split Node 'n' through merge point.
Node *PhaseIdealLoop::split_thru_region( Node *n, Node *region ) {
uint wins = 0;
assert( n->is_CFG(), "" );
assert( region->is_Region(), "" );
Node *r = new (C) RegionNode( region->req() );
IdealLoopTree *loop = get_loop( n );
for( uint i = 1; i < region->req(); i++ ) {
Node *x = n->clone();
Node *in0 = n->in(0);
if( in0->in(0) == region ) x->set_req( 0, in0->in(i) );
for( uint j = 1; j < n->req(); j++ ) {
Node *in = n->in(j);
if( get_ctrl(in) == region )
x->set_req( j, in->in(i) );
}
_igvn.register_new_node_with_optimizer(x);
set_loop(x, loop);
set_idom(x, x->in(0), dom_depth(x->in(0))+1);
r->init_req(i, x);
}
// Record region
r->set_req(0,region); // Not a TRUE RegionNode
_igvn.register_new_node_with_optimizer(r);
set_loop(r, loop);
if( !loop->_child )
loop->_body.push(r);
return r;
}
//------------------------------split_up---------------------------------------
// Split block-local op up through the phis to empty the current block
bool PhaseIdealLoop::split_up( Node *n, Node *blk1, Node *blk2 ) {
if( n->is_CFG() ) {
assert( n->in(0) != blk1, "Lousy candidate for split-if" );
return false;
}
if( get_ctrl(n) != blk1 && get_ctrl(n) != blk2 )
return false; // Not block local
if( n->is_Phi() ) return false; // Local PHIs are expected
// Recursively split-up inputs
for (uint i = 1; i < n->req(); i++) {
if( split_up( n->in(i), blk1, blk2 ) ) {
// Got split recursively and self went dead?
if (n->outcnt() == 0)
_igvn.remove_dead_node(n);
return true;
}
}
// Check for needing to clone-up a compare. Can't do that, it forces
// another (nested) split-if transform. Instead, clone it "down".
if( n->is_Cmp() ) {
assert(get_ctrl(n) == blk2 || get_ctrl(n) == blk1, "must be in block with IF");
// Check for simple Cmp/Bool/CMove which we can clone-up. Cmp/Bool/CMove
// sequence can have no other users and it must all reside in the split-if
// block. Non-simple Cmp/Bool/CMove sequences are 'cloned-down' below -
// private, per-use versions of the Cmp and Bool are made. These sink to
// the CMove block. If the CMove is in the split-if block, then in the
// next iteration this will become a simple Cmp/Bool/CMove set to clone-up.
Node *bol, *cmov;
if( !(n->outcnt() == 1 && n->unique_out()->is_Bool() &&
(bol = n->unique_out()->as_Bool()) &&
(get_ctrl(bol) == blk1 ||
get_ctrl(bol) == blk2) &&
bol->outcnt() == 1 &&
bol->unique_out()->is_CMove() &&
(cmov = bol->unique_out()->as_CMove()) &&
(get_ctrl(cmov) == blk1 ||
get_ctrl(cmov) == blk2) ) ) {
// Must clone down
#ifndef PRODUCT
if( PrintOpto && VerifyLoopOptimizations ) {
tty->print("Cloning down: ");
n->dump();
}
#endif
// Clone down any block-local BoolNode uses of this CmpNode
for (DUIterator i = n->outs(); n->has_out(i); i++) {
Node* bol = n->out(i);
assert( bol->is_Bool(), "" );
if (bol->outcnt() == 1) {
Node* use = bol->unique_out();
Node *use_c = use->is_If() ? use->in(0) : get_ctrl(use);
if (use_c == blk1 || use_c == blk2) {
continue;
}
}
if (get_ctrl(bol) == blk1 || get_ctrl(bol) == blk2) {
// Recursively sink any BoolNode
#ifndef PRODUCT
if( PrintOpto && VerifyLoopOptimizations ) {
tty->print("Cloning down: ");
bol->dump();
}
#endif
for (DUIterator_Last jmin, j = bol->last_outs(jmin); j >= jmin; --j) {
// Uses are either IfNodes or CMoves
Node* iff = bol->last_out(j);
assert( iff->in(1) == bol, "" );
// Get control block of either the CMove or the If input
Node *iff_ctrl = iff->is_If() ? iff->in(0) : get_ctrl(iff);
Node *x = bol->clone();
register_new_node(x, iff_ctrl);
_igvn.replace_input_of(iff, 1, x);
}
_igvn.remove_dead_node( bol );
--i;
}
}
// Clone down this CmpNode
for (DUIterator_Last jmin, j = n->last_outs(jmin); j >= jmin; --j) {
Node* bol = n->last_out(j);
assert( bol->in(1) == n, "" );
Node *x = n->clone();
register_new_node(x, get_ctrl(bol));
_igvn.replace_input_of(bol, 1, x);
}
_igvn.remove_dead_node( n );
return true;
}
}
// See if splitting-up a Store. Any anti-dep loads must go up as
// well. An anti-dep load might be in the wrong block, because in
// this particular layout/schedule we ignored anti-deps and allow
// memory to be alive twice. This only works if we do the same
// operations on anti-dep loads as we do their killing stores.
if( n->is_Store() && n->in(MemNode::Memory)->in(0) == n->in(0) ) {
// Get store's memory slice
int alias_idx = C->get_alias_index(_igvn.type(n->in(MemNode::Address))->is_ptr());
// Get memory-phi anti-dep loads will be using
Node *memphi = n->in(MemNode::Memory);
assert( memphi->is_Phi(), "" );
// Hoist any anti-dep load to the splitting block;
// it will then "split-up".
for (DUIterator_Fast imax,i = memphi->fast_outs(imax); i < imax; i++) {
Node *load = memphi->fast_out(i);
if( load->is_Load() && alias_idx == C->get_alias_index(_igvn.type(load->in(MemNode::Address))->is_ptr()) )
set_ctrl(load,blk1);
}
}
// Found some other Node; must clone it up
#ifndef PRODUCT
if( PrintOpto && VerifyLoopOptimizations ) {
tty->print("Cloning up: ");
n->dump();
}
#endif
// ConvI2L may have type information on it which becomes invalid if
// it moves up in the graph so change any clones so widen the type
// to TypeLong::INT when pushing it up.
const Type* rtype = NULL;
if (n->Opcode() == Op_ConvI2L && n->bottom_type() != TypeLong::INT) {
rtype = TypeLong::INT;
}
// Now actually split-up this guy. One copy per control path merging.
Node *phi = PhiNode::make_blank(blk1, n);
for( uint j = 1; j < blk1->req(); j++ ) {
Node *x = n->clone();
// Widen the type of the ConvI2L when pushing up.
if (rtype != NULL) x->as_Type()->set_type(rtype);
if( n->in(0) && n->in(0) == blk1 )
x->set_req( 0, blk1->in(j) );
for( uint i = 1; i < n->req(); i++ ) {
Node *m = n->in(i);
if( get_ctrl(m) == blk1 ) {
assert( m->in(0) == blk1, "" );
x->set_req( i, m->in(j) );
}
}
register_new_node( x, blk1->in(j) );
phi->init_req( j, x );
}
// Announce phi to optimizer
register_new_node(phi, blk1);
// Remove cloned-up value from optimizer; use phi instead
_igvn.replace_node( n, phi );
// (There used to be a self-recursive call to split_up() here,
// but it is not needed. All necessary forward walking is done
// by do_split_if() below.)
return true;
}
//------------------------------register_new_node------------------------------
void PhaseIdealLoop::register_new_node( Node *n, Node *blk ) {
assert(!n->is_CFG(), "must be data node");
_igvn.register_new_node_with_optimizer(n);
set_ctrl(n, blk);
IdealLoopTree *loop = get_loop(blk);
if( !loop->_child )
loop->_body.push(n);
}
//------------------------------small_cache------------------------------------
struct small_cache : public Dict {
small_cache() : Dict( cmpkey, hashptr ) {}
Node *probe( Node *use_blk ) { return (Node*)((*this)[use_blk]); }
void lru_insert( Node *use_blk, Node *new_def ) { Insert(use_blk,new_def); }
};
//------------------------------spinup-----------------------------------------
// "Spin up" the dominator tree, starting at the use site and stopping when we
// find the post-dominating point.
// We must be at the merge point which post-dominates 'new_false' and
// 'new_true'. Figure out which edges into the RegionNode eventually lead up
// to false and which to true. Put in a PhiNode to merge values; plug in
// the appropriate false-arm or true-arm values. If some path leads to the
// original IF, then insert a Phi recursively.
Node *PhaseIdealLoop::spinup( Node *iff_dom, Node *new_false, Node *new_true, Node *use_blk, Node *def, small_cache *cache ) {
if (use_blk->is_top()) // Handle dead uses
return use_blk;
Node *prior_n = (Node*)((intptr_t)0xdeadbeef);
Node *n = use_blk; // Get path input
assert( use_blk != iff_dom, "" );
// Here's the "spinup" the dominator tree loop. Do a cache-check
// along the way, in case we've come this way before.
while( n != iff_dom ) { // Found post-dominating point?
prior_n = n;
n = idom(n); // Search higher
Node *s = cache->probe( prior_n ); // Check cache
if( s ) return s; // Cache hit!
}
Node *phi_post;
if( prior_n == new_false || prior_n == new_true ) {
phi_post = def->clone();
phi_post->set_req(0, prior_n );
register_new_node(phi_post, prior_n);
} else {
// This method handles both control uses (looking for Regions) or data
// uses (looking for Phis). If looking for a control use, then we need
// to insert a Region instead of a Phi; however Regions always exist
// previously (the hash_find_insert below would always hit) so we can
// return the existing Region.
if( def->is_CFG() ) {
phi_post = prior_n; // If looking for CFG, return prior
} else {
assert( def->is_Phi(), "" );
assert( prior_n->is_Region(), "must be a post-dominating merge point" );
// Need a Phi here
phi_post = PhiNode::make_blank(prior_n, def);
// Search for both true and false on all paths till find one.
for( uint i = 1; i < phi_post->req(); i++ ) // For all paths
phi_post->init_req( i, spinup( iff_dom, new_false, new_true, prior_n->in(i), def, cache ) );
Node *t = _igvn.hash_find_insert(phi_post);
if( t ) { // See if we already have this one
// phi_post will not be used, so kill it
_igvn.remove_dead_node(phi_post);
phi_post->destruct();
phi_post = t;
} else {
register_new_node( phi_post, prior_n );
}
}
}
// Update cache everywhere
prior_n = (Node*)((intptr_t)0xdeadbeef); // Reset IDOM walk
n = use_blk; // Get path input
// Spin-up the idom tree again, basically doing path-compression.
// Insert cache entries along the way, so that if we ever hit this
// point in the IDOM tree again we'll stop immediately on a cache hit.
while( n != iff_dom ) { // Found post-dominating point?
prior_n = n;
n = idom(n); // Search higher
cache->lru_insert( prior_n, phi_post ); // Fill cache
} // End of while not gone high enough
return phi_post;
}
//------------------------------find_use_block---------------------------------
// Find the block a USE is in. Normally USE's are in the same block as the
// using instruction. For Phi-USE's, the USE is in the predecessor block
// along the corresponding path.
Node *PhaseIdealLoop::find_use_block( Node *use, Node *def, Node *old_false, Node *new_false, Node *old_true, Node *new_true ) {
// CFG uses are their own block
if( use->is_CFG() )
return use;
if( use->is_Phi() ) { // Phi uses in prior block
// Grab the first Phi use; there may be many.
// Each will be handled as a separate iteration of
// the "while( phi->outcnt() )" loop.
uint j;
for( j = 1; j < use->req(); j++ )
if( use->in(j) == def )
break;
assert( j < use->req(), "def should be among use's inputs" );
return use->in(0)->in(j);
}
// Normal (non-phi) use
Node *use_blk = get_ctrl(use);
// Some uses are directly attached to the old (and going away)
// false and true branches.
if( use_blk == old_false ) {
use_blk = new_false;
set_ctrl(use, new_false);
}
if( use_blk == old_true ) {
use_blk = new_true;
set_ctrl(use, new_true);
}
if (use_blk == NULL) { // He's dead, Jim
_igvn.replace_node(use, C->top());
}
return use_blk;
}
//------------------------------handle_use-------------------------------------
// Handle uses of the merge point. Basically, split-if makes the merge point
// go away so all uses of the merge point must go away as well. Most block
// local uses have already been split-up, through the merge point. Uses from
// far below the merge point can't always be split up (e.g., phi-uses are
// pinned) and it makes too much stuff live. Instead we use a path-based
// solution to move uses down.
//
// If the use is along the pre-split-CFG true branch, then the new use will
// be from the post-split-CFG true merge point. Vice-versa for the false
// path. Some uses will be along both paths; then we sink the use to the
// post-dominating location; we may need to insert a Phi there.
void PhaseIdealLoop::handle_use( Node *use, Node *def, small_cache *cache, Node *region_dom, Node *new_false, Node *new_true, Node *old_false, Node *old_true ) {
Node *use_blk = find_use_block(use,def,old_false,new_false,old_true,new_true);
if( !use_blk ) return; // He's dead, Jim
// Walk up the dominator tree until I hit either the old IfFalse, the old
// IfTrue or the old If. Insert Phis where needed.
Node *new_def = spinup( region_dom, new_false, new_true, use_blk, def, cache );
// Found where this USE goes. Re-point him.
uint i;
for( i = 0; i < use->req(); i++ )
if( use->in(i) == def )
break;
assert( i < use->req(), "def should be among use's inputs" );
_igvn.replace_input_of(use, i, new_def);
}
sssssssss74
最新推荐文章于 2024-07-24 08:48:18 发布