/**
* Partitioning schema shows how an array is distributed among the SciDB instances.
*
* Guidelines for introducing a new partitioning schema:
* - Add to enum PartitioningSchema (right above psMax).
* - Modify the doxygen comments in LogicalSG.cpp.
* - Modify redistribute() to handle the new partitioning schema.
* - Modify std::ostream& operator<<(std::ostream& stream, const RedistributeContext& dist). (See Operator.cpp)
* - If the partitioning schema uses extra data:
* - Modify doesPartitioningSchemaHaveOptionalData.
* - Derive a class from PartitioningSchemaData.
* - When modifying redistribute(), consider the extra data for the new partitioning schema.
*/
enum PartitioningSchema
{
psUninitialized = -1, // e.g. _ps after ArrayDesc::ArrayDesc()
psMIN = 0,
psReplication = psMIN,
psHashPartitioned,
psLocalInstance,
psByRow,
psByCol,
psUndefined, // a range of meanings, including sometimes "wildcard"
// TODO: replace with psWildcard and others as required
psGroupby,
psScaLAPACK,
// A newly introduced partitioning schema should be added before this line.
psEND
};
/**
* defaultPartitioning
*
* Most of the code was assuming that arrays are scanned and stored in
* psHashPartitioned Partitioning (currently a category of ArrayDistribution).
* Therefore there were many "psHashPartitioned" scatter about the code that
* logically mean "whatever the default PartitioningSchema is". We are moving on
* a path toward generalizing what distributions can be stored. The first attempt
* will be to extend tempArrays to psReplication, psByRow, and psByCol. When that
* is working we will enable it for dbArrays. Later we will move on to distributions
* which require additional parameterizations to be stored, such as the 2D
* ScaLAPACK.
*
* As scaffolding for that effort, we are making the "default" PartitioningSchema
* configurable. This will allow us to (1) experiment with performance of alternate
* distributions for certain workflows and (2) start fixing the query compiler/optimizer
* to insert SGs without assuming that psHashPartitioned is the universal goal.
*
* Then we can add a create array parameterization for particular distributions
* and drive the query tree's output toward that. (e.g. right now, for iterated spgemm,
* psByRow and psByCol are far more efficient than psHashed)
*
* Once this is working for TEMP arrays, we can allow it for general arrays
*
* Once this all works correctly via create statements, we will no longer be managing
* "ps" outside of ArrayDesc's and ArrayDistributions, this function should no longer be referenced
* and this scaffolding removed.
*/
inline PartitioningSchema defaultPartitioning()
{
// you MAY NOT change this from psHashPartitioned at this time
// this is scaffolding code that is locating all the places where
// psHashPartitioned is assumed which need to be generalized.
// there are further changes in the optimizer that must be planned first
// and then we can decide which PartitioningSchema variable is the
// definitive one. Until then, the code will read "defaultParitioning()"
// and the reader should think "psHashPartitioned"
return psHashPartitioned;
}
* Partitioning schema shows how an array is distributed among the SciDB instances.
*
* Guidelines for introducing a new partitioning schema:
* - Add to enum PartitioningSchema (right above psMax).
* - Modify the doxygen comments in LogicalSG.cpp.
* - Modify redistribute() to handle the new partitioning schema.
* - Modify std::ostream& operator<<(std::ostream& stream, const RedistributeContext& dist). (See Operator.cpp)
* - If the partitioning schema uses extra data:
* - Modify doesPartitioningSchemaHaveOptionalData.
* - Derive a class from PartitioningSchemaData.
* - When modifying redistribute(), consider the extra data for the new partitioning schema.
*/
enum PartitioningSchema
{
psUninitialized = -1, // e.g. _ps after ArrayDesc::ArrayDesc()
psMIN = 0,
psReplication = psMIN,
psHashPartitioned,
psLocalInstance,
psByRow,
psByCol,
psUndefined, // a range of meanings, including sometimes "wildcard"
// TODO: replace with psWildcard and others as required
psGroupby,
psScaLAPACK,
// A newly introduced partitioning schema should be added before this line.
psEND
};
/**
* defaultPartitioning
*
* Most of the code was assuming that arrays are scanned and stored in
* psHashPartitioned Partitioning (currently a category of ArrayDistribution).
* Therefore there were many "psHashPartitioned" scatter about the code that
* logically mean "whatever the default PartitioningSchema is". We are moving on
* a path toward generalizing what distributions can be stored. The first attempt
* will be to extend tempArrays to psReplication, psByRow, and psByCol. When that
* is working we will enable it for dbArrays. Later we will move on to distributions
* which require additional parameterizations to be stored, such as the 2D
* ScaLAPACK.
*
* As scaffolding for that effort, we are making the "default" PartitioningSchema
* configurable. This will allow us to (1) experiment with performance of alternate
* distributions for certain workflows and (2) start fixing the query compiler/optimizer
* to insert SGs without assuming that psHashPartitioned is the universal goal.
*
* Then we can add a create array parameterization for particular distributions
* and drive the query tree's output toward that. (e.g. right now, for iterated spgemm,
* psByRow and psByCol are far more efficient than psHashed)
*
* Once this is working for TEMP arrays, we can allow it for general arrays
*
* Once this all works correctly via create statements, we will no longer be managing
* "ps" outside of ArrayDesc's and ArrayDistributions, this function should no longer be referenced
* and this scaffolding removed.
*/
inline PartitioningSchema defaultPartitioning()
{
// you MAY NOT change this from psHashPartitioned at this time
// this is scaffolding code that is locating all the places where
// psHashPartitioned is assumed which need to be generalized.
// there are further changes in the optimizer that must be planned first
// and then we can decide which PartitioningSchema variable is the
// definitive one. Until then, the code will read "defaultParitioning()"
// and the reader should think "psHashPartitioned"
return psHashPartitioned;
}