Spark1 code

R2C/C2R
ColumnarOverrideRules

这是这个Gazelle 的入口 , 通过 SparkSessionExtensions, 隐式嵌入。

object ColumnarOverrides extends GazelleSparkExtensionsInjector {
  override def inject(extensions: SparkSessionExtensions): Unit = {
    extensions.injectColumnar(ColumnarOverrideRules)
  }
}

class ColumnarOverrideRules 继承的是 ColumnarRule

case class ColumnarOverrideRules(session: SparkSession) extends ColumnarRule with Logging {
  def columnarEnabled =
    session.sqlContext.getConf("org.apache.spark.example.columnar.enabled", "true").trim.toBoolean
  def codegendisable =
    session.sqlContext.getConf("spark.oap.sql.columnar.codegendisableforsmallshuffles", "false").trim.toBoolean
  def conf = session.sparkContext.getConf

所以 ColumnarOverrideRules 要实现两个方法

preColumnarTransitions
postColumnarTransitions

其中preColumnarTransitions 使用的是 val rule = preOverrides

override def preColumnarTransitions: Rule[SparkPlan] = plan => {
    if (columnarEnabled) {
      // According to Spark's Columnar.scala, the plan is tackled one by one.
      // By recording the original plan, we can easily let the whole stage
      // fallback at #postColumnarTransitions.
      originalPlan = plan
      isSupportAdaptive = SparkShimLoader.getSparkShims.supportAdaptiveWithExchangeConsidered(plan)
      val rule = preOverrides
      rule.setAdaptiveSupport(isSupportAdaptive)
      rule(rowGuardOverrides(plan))
    } else {
      plan
    }
  }

其中 postColumnarTransitions 使用的是 val rule = postOverrides

  override def postColumnarTransitions: Rule[SparkPlan] = plan => {
    if (columnarEnabled) {
      if (isSupportAdaptive && fallbackWholeStage(plan)) {
        // BatchScan with ArrowScan initialized can still connect
        // to ColumnarToRow for transition.
        insertTransitions(originalPlan, false)
      } else {
        val rule = postOverrides
        rule.setAdaptiveSupport(isSupportAdaptive)
        val tmpPlan = rule(plan)
        val ret = collapseOverrides(tmpPlan)
        if (codegendisable)
        {
          logDebug("postColumnarTransitions:" +
            " resetting spark.oap.sql.columnar.codegendisableforsmallshuffles To false")
          session.sqlContext.setConf(
            "spark.oap.sql.columnar.codegendisableforsmallshuffles", "false")
        }
        ret
      }
    } else {
      plan
    }
  }

这两个定义如下
def preOverrides = ColumnarPreOverrides(session)
def postOverrides = ColumnarPostOverrides()
如下,

这两个类都是继承 Rule[SparkPlan]。
都各自含有 独立的replaceWithColumnarPlan。
只有第二个 replaceWithColumnarPlan 才涉及 R2C/C2R conversion

case class ColumnarPreOverrides(session: SparkSession) extends Rule[SparkPlan] {
  val columnarConf: GazellePluginConfig = GazellePluginConfig.getSessionConf
  var isSupportAdaptive: Boolean = true


  def replaceWithColumnarPlan(plan: SparkPlan): SparkPlan = plan match {
case class ColumnarPostOverrides() extends Rule[SparkPlan] {
  val columnarConf = GazellePluginConfig.getSessionConf
  var isSupportAdaptive: Boolean = true

  def replaceWithColumnarPlan(plan: SparkPlan): SparkPlan = plan match {
    // To get ColumnarBroadcastExchangeExec back from the fallback that for DPP reuse.
    case RowToColumnarExec(broadcastQueryStageExec: BroadcastQueryStageExec)
      if (broadcastQueryStageExec.plan match {
        case BroadcastExchangeExec(_, _: DataToArrowColumnarExec) => true
        case _ => false
      }) =>
      logDebug(s"Due to a fallback of BHJ inserted into plan." +
        s" See above override in BroadcastQueryStageExec")
      val localBroadcastXchg = broadcastQueryStageExec.plan.asInstanceOf[BroadcastExchangeExec]
      val dataToArrowColumnar = localBroadcastXchg.child.asInstanceOf[DataToArrowColumnarExec]
      //ColumnarBroadcastExchangeExec(localBroadcastXchg.mode, dataToArrowColumnar)
      dataToArrowColumnar.child
    case plan: RowToColumnarExec =>
      val child = replaceWithColumnarPlan(plan.child)
      if (columnarConf.enableArrowRowToColumnar) {
        logDebug(s"ColumnarPostOverrides ArrowRowToColumnarExec(${child.getClass})")
        try {
          ArrowRowToColumnarExec(child)
        } catch {
          case _: Throwable =>
            logInfo("ArrowRowToColumnar: Falling back to RowToColumnar...")
            RowToArrowColumnarExec(child)
        }
      } else {
        logDebug(s"ColumnarPostOverrides RowToArrowColumnarExec(${child.getClass})")
        RowToArrowColumnarExec(child)
      }
    case ColumnarToRowExec(child: ColumnarShuffleExchangeAdaptor) =>
      replaceWithColumnarPlan(child)
    case ColumnarToRowExec(child: ColumnarBroadcastExchangeAdaptor) =>
      replaceWithColumnarPlan(child)
    case ColumnarToRowExec(child: CoalesceBatchesExec) =>
      plan.withNewChildren(Seq(replaceWithColumnarPlan(child.child)))
    case ColumnarToRowExec(child: ArrowCoalesceBatchesExec) =>
      plan.withNewChildren(Seq(replaceWithColumnarPlan(child.child)))
    case plan: ColumnarToRowExec =>
      if (columnarConf.enableArrowColumnarToRow) {
        val child = replaceWithColumnarPlan(plan.child)
        logDebug(s"ColumnarPostOverrides ArrowColumnarToRowExec(${child.getClass})")
        try {
          ArrowColumnarToRowExec(child)
        } catch {
          case _: Throwable =>
            logInfo("ArrowColumnarToRowExec: Falling back to ColumnarToRow...")
            ColumnarToRowExec(child)
        }
      } else {
        val children = plan.children.map(replaceWithColumnarPlan)
        plan.withNewChildren(children)
      }
    case r: SparkPlan
        if !r.isInstanceOf[QueryStageExec] && !r.supportsColumnar && r.children.exists(c =>
          c.isInstanceOf[ColumnarToRowExec]) =>
      // This is a fix for when DPP and AQE both enabled,
      // ColumnarExchange maybe child as a Row SparkPlan.
      val children = r.children.map {
        case c: ColumnarToRowExec =>
          if (columnarConf.enableArrowColumnarToRow) {
            try {
              val child = replaceWithColumnarPlan(c.child)
              ArrowColumnarToRowExec(child)
            } catch {
              case _: Throwable =>
                logInfo("ArrowColumnarToRow : Falling back to ColumnarToRow...")
                c.withNewChildren(c.children.map(replaceWithColumnarPlan))
            }
          } else {
            c.withNewChildren(c.children.map(replaceWithColumnarPlan))
          }
        case other =>
          replaceWithColumnarPlan(other)
      }
      r.withNewChildren(children)
    case p =>
      val children = p.children.map(replaceWithColumnarPlan)
      p.withNewChildren(children)
  }

  def setAdaptiveSupport(enable: Boolean): Unit = { isSupportAdaptive = enable }

  def apply(plan: SparkPlan): SparkPlan = {
    replaceWithColumnarPlan(plan)
  }

}

整个文件涉及三个类, 结构上三个类是平行的。逻辑上是 ColumnarOverrideRules 使用和调用了 ColumnarPreOverrides和ColumnarPostOverrides

case class ColumnarPreOverrides(session: SparkSession) extends Rule[SparkPlan] 
case class  () extends Rule[SparkPlan]
case class ColumnarOverrideRules(session: SparkSession) extends ColumnarRule
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值