一. 前言
presto中,RBO的规则有两种写法,一种是implements Rule(比如PushLimitThroughUnion等),一种是implements PlanOptimizer(比如LimitPushDown等),本文通过走读LimitPushDown代码来了解implements PlanOptimizer的规则是怎么调用的。
二. LimitPushDown调用关系
LimitPushDown::optimize
SimplePlanRewriter.rewriteWith
node.accept(new RewriteContext)
defaultRewrite
node.getSources().stream()
.map(child -> rewrite(child, context)) //此处会对整个执行计划的每个Node,调用rewrite方法
node.accept
SimplePlanRewriter
LimitPushDown::Rewriter
replaceChildren(node, children) //将优化后的子树替换优化前的
从上边可以看出,如果需要implements PlanOptimizer,需要提供SimplePlanRewriter的实现类,在RBO优化的时候,会自动通过递归访问执行计划树中的所有Node,每个Node都调用所提供的SimplePlanRewriter的visitXXX(比如visitAggregation等)方法进行优化,并且将优化后的Node替换原来的Node。因为如果要实现一个自定义的RBO优化器,仅仅提供一个SimplePlanRewriter的类即可。
三. LimitPushDown 优化实现
private static class Rewriter
extends SimplePlanRewriter<LimitContext>
{
private final PlanNodeIdAllocator idAllocator;
//如果没有复写对应的Node的处理方法,RBO对该节点的优化的时候,就会调用通用的visitPlan进行处理优化,比如VistTableScan没实现,则访问TableScan的时候,会调用成visitPlan
@Override
public PlanNode visitPlan(PlanNode node, RewriteContext<LimitContext> context)
{
PlanNode rewrittenNode = context.defaultRewrite(node);
// 只要执行计划树中有Limit界面,都在遍历的节点中增加个Linit的节点来控制输出大小
LimitContext limit = context.get();
if (limit != null) {
// Drop in a LimitNode b/c we cannot push our limit down any further
rewrittenNode = new LimitNode(idAllocator.getNextId(), rewrittenNode, limit.getCount(), limit.isPartial());
}
return rewrittenNode;
}
@Override
public PlanNode visitLimit(LimitNode node, RewriteContext<LimitContext> context)
{
long count = node.getCount();
LimitContext limit = context.get();
if (limit != null) {
//选取较小的Limit count保留
count = Math.min(count, limit.getCount());
}
// return empty ValuesNode in case of limit 0
if (count == 0) {
return new ValuesNode(idAllocator.getNextId(),
node.getOutputSymbols(),
ImmutableList.of());
}
if (!node.isWithTies() || (limit != null && node.getCount() >= limit.getCount())) {
// default visitPlan logic will insert the limit node
return context.rewrite(node.getSource(), new LimitContext(count, false));
}
return context.defaultRewrite(node, context.get());
}
@Override
@Deprecated
public PlanNode visitAggregation(AggregationNode node, RewriteContext<LimitContext> context)
{
LimitContext limit = context.get();
if (limit != null &&
node.getAggregations().isEmpty() &&
!node.getGroupingKeys().isEmpty() &&
node.getOutputSymbols().size() == node.getGroupingKeys().size() &&
node.getOutputSymbols().containsAll(node.getGroupingKeys())) {
// 仅仅有groupby输出的情况
PlanNode rewrittenSource = context.rewrite(node.getSource());
return new DistinctLimitNode(idAllocator.getNextId(), rewrittenSource, limit.getCount(), false, rewrittenSource.getOutputSymbols(), Optional.empty());
}
PlanNode rewrittenNode = context.defaultRewrite(node);
if (limit != null) {
// Drop in a LimitNode b/c limits cannot be pushed through aggregations
// 有limit的话,输出增加个limit节点
rewrittenNode = new LimitNode(idAllocator.getNextId(), rewrittenNode, limit.getCount(), limit.isPartial());
}
return rewrittenNode;
}
@Override
public PlanNode visitMarkDistinct(MarkDistinctNode node, RewriteContext<LimitContext> context)
{
// the fallback logic (in visitPlan) for node types we don't know about introduces a limit node,
// so we need this here to push the limit through this trivial node type
return context.defaultRewrite(node, context.get());
}
@Override
public PlanNode visitProject(ProjectNode node, RewriteContext<LimitContext> context)
{
// the fallback logic (in visitPlan) for node types we don't know about introduces a limit node,
// so we need this here to push the limit through this trivial node type
// 递归访问project的所有的source
return context.defaultRewrite(node, context.get());
}
@Override
public PlanNode visitTopN(TopNNode node, RewriteContext<LimitContext> context)
{
LimitContext limit = context.get();
PlanNode rewrittenSource = context.rewrite(node.getSource());
if (rewrittenSource == node.getSource() && limit == null) {
return node;
}
// TOPN和Limit之间选个较小值保留即可
long count = node.getCount();
if (limit != null) {
count = Math.min(count, limit.getCount());
}
return new TopNNode(node.getId(), rewrittenSource, count, node.getOrderingScheme(), node.getStep());
}
@Override
@Deprecated
public PlanNode visitSort(SortNode node, RewriteContext<LimitContext> context)
{
LimitContext limit = context.get();
PlanNode rewrittenSource = context.rewrite(node.getSource());
if (limit != null) {
// 用TopN 代替原来的sort
return new TopNNode(node.getId(), rewrittenSource, limit.getCount(), node.getOrderingScheme(), TopNNode.Step.SINGLE);
}
if (rewrittenSource != node.getSource()) {
return new SortNode(node.getId(), rewrittenSource, node.getOrderingScheme(), node.isPartial());
}
return node;
}
@Override
public PlanNode visitUnion(UnionNode node, RewriteContext<LimitContext> context)
{
LimitContext limit = context.get();
LimitContext childLimit = null;
if (limit != null) {
childLimit = new LimitContext(limit.getCount(), true);
}
List<PlanNode> sources = new ArrayList<>();
for (int i = 0; i < node.getSources().size(); i++) {
// union中的所有source都限制最多limit个输出
sources.add(context.rewrite(node.getSources().get(i), childLimit));
}
PlanNode output = new UnionNode(node.getId(), sources, node.getSymbolMapping(), node.getOutputSymbols());
if (limit != null) {
//除了source之外,Union本身的输出格式也被限制
output = new LimitNode(idAllocator.getNextId(), output, limit.getCount(), limit.isPartial());
}
return output;
}
@Override
public PlanNode visitSemiJoin(SemiJoinNode node, RewriteContext<LimitContext> context)
{
// 递归优化所有的source节点
PlanNode source = context.rewrite(node.getSource(), context.get());
if (source != node.getSource()) {
return new SemiJoinNode(
node.getId(),
source,
node.getFilteringSource(),
node.getSourceJoinSymbol(),
node.getFilteringSourceJoinSymbol(),
node.getSemiJoinOutput(),
node.getSourceHashSymbol(),
node.getFilteringSourceHashSymbol(),
node.getDistributionType());
}
return node;
}
}