总结一下遇到的sparksql大小表join情况。
一、数据倾斜
使用sparksql对一张大表和小表join时发现executor节点数据倾斜严重,最终执行超时失败了。查看日志发现对应的执行计划是Sort-Merge Join,查了下该方式适合两张大表join,会把两张表
- shuffle:按照join key进行重新分区,两张表数据会分布到整个集群;
- sort:对分区中的两表数据,分别进行排序;
- merge:对排好序的两张分区表数据执行join操作。分别遍历两个有序序列,碰到相同join key就merge输出,否则取更小一边。
大小表join可以使用broadcastjoin,将小表先广播分发至executor上,取小表根据key连接,避免了数据shuffle。
二、源码
1.执行join执行计划的SparkStrategy
查看spark中源码,入口为org.apache.spark.sql.execution.SparkStrategy
查看apply方法,可以看到执行不同join,canBuildRight与canBuildLeft判断join方式,canBroadcast方法根据表大小判断是否广播。
def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match {...}
private def canBuildRight(joinType: JoinType): Boolean = joinType match {
case _: InnerLike | LeftOuter | LeftSemi | LeftAnti | _: ExistenceJoin => true
case _ => false
}
private def canBuildLeft(joinType: JoinType): Boolean = joinType match {
case _: InnerLike | RightOuter => true
case _ => false
}
/**
* Matches a plan whose output should be small enough to be used in broadcast join.
*/
private def canBroadcast(plan: LogicalPlan): Boolean = {
plan.stats.sizeInBytes >= 0 && plan.stats.sizeInBytes <= conf.autoBroadcastJoinThreshold
}
autoBroadcastJoinThreshold默认10M,plan.stats.sizeInBytes表的大小从hive元数据中取得,默认是long最大值,而本次从视图中读表没有大小,因此不会广播。所以可以将数据导入hdfs建立外表,spark源码中通过hdfs文件获取文件大小,进而取得sizeInBytes 。
2.查询提示(hint)
spark提供了hint方式指定需要广播的表,修改join的sql如下,源码中还会继续计算表的大小判断是否广播:
select /*+ BROADCAST(small) */ big.col_1,big.col_2,small.col_1
from big left outer join small on small.col_1= big.col_2
/**
* The hint for broadcast hash join or broadcast nested loop join, depending on the availability of
* equi-join keys.
*/
case object BROADCAST extends JoinStrategyHint {
override def displayName: String = "broadcast"
override def hintAliases: Set[String] = Set(
"BROADCAST",
"BROADCASTJOIN",
"MAPJOIN")
}
spark3源码中根据是否等值join,解析是否包含hint提示选择不同的执行计划。
def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match {
// If it is an equi-join, we first look at the join hints w.r.t. the following order:
// 1. broadcast hint: pick broadcast hash join if the join type is supported. If both sides
// have the broadcast hints, choose the smaller side (based on stats) to broadcast.
// 2. sort merge hint: pick sort merge join if join keys are sortable.
// 3. shuffle hash hint: We pick shuffle hash join if the join type is supported. If both
// sides have the shuffle hash hints, choose the smaller side (based on stats) as the
// build side.
// 4. shuffle replicate NL hint: pick cartesian product if join type is inner like.
//
// If there is no hint or the hints are not applicable, we follow these rules one by one:
// 1. Pick broadcast hash join if one side is small enough to broadcast, and the join type
// is supported. If both sides are small, choose the smaller side (based on stats)
// to broadcast.
// 2. Pick shuffle hash join if one side is small enough to build local hash map, and is
// much smaller than the other side, and `spark.sql.join.preferSortMergeJoin` is false.
// 3. Pick sort merge join if the join keys are sortable.
// 4. Pick cartesian product if join type is inner like.
// 5. Pick broadcast nested loop join as the final solution. It may OOM but we don't have
// other choice.
case j @ ExtractEquiJoinKeys(joinType, leftKeys, rightKeys, nonEquiCond, left, right, hint) =>
def createBroadcastHashJoin(buildLeft: Boolean, buildRight: Boolean) = {
val wantToBuildLeft = canBuildLeft(joinType) && buildLeft
val wantToBuildRight = canBuildRight(joinType) && buildRight
getBuildSide(wantToBuildLeft, wantToBuildRight, left, right).map { buildSide =>
Seq(joins.BroadcastHashJoinExec(
leftKeys,
rightKeys,
joinType,
buildSide,
nonEquiCond,
planLater(left),
planLater(right)))
}
}
def createShuffleHashJoin(buildLeft: Boolean, buildRight: Boolean) = {
val wantToBuildLeft = canBuildLeft(joinType) && buildLeft
val wantToBuildRight = canBuildRight(joinType) && buildRight
getBuildSide(wantToBuildLeft, wantToBuildRight, left, right).map { buildSide=>
Seq(joins.ShuffledHashJoinExec(
leftKeys,
rightKeys,
joinType,
buildSide,
nonEquiCond,
planLater(left),
planLater(right)))
}
}
def createSortMergeJoin() = {
if (RowOrdering.isOrderable(leftKeys)) {
Some(Seq(joins.SortMergeJoinExec(
leftKeys, rightKeys, joinType, nonEquiCond, planLater(left), planLater(right))))
} else {
None
}
}
def createCartesianProduct() = {
if (joinType.isInstanceOf[InnerLike]) {
// `CartesianProductExec` can't implicitly evaluate equal join condition, here we should
// pass the original condition which includes both equal and non-equal conditions.
Some(Seq(joins.CartesianProductExec(planLater(left), planLater(right), j.condition)))
} else {
None
}
}
def createJoinWithoutHint() = {
createBroadcastHashJoin(
canBroadcast(left) && !hint.leftHint.exists(_.strategy.contains(NO_BROADCAST_HASH)),
canBroadcast(right) && !hint.rightHint.exists(_.strategy.contains(NO_BROADCAST_HASH)))
.orElse {
if (!conf.preferSortMergeJoin) {
createShuffleHashJoin(
canBuildLocalHashMap(left) && muchSmaller(left, right),
canBuildLocalHashMap(right) && muchSmaller(right, left))
} else {
None
}
}
.orElse(createSortMergeJoin())
.orElse(createCartesianProduct())
.getOrElse {
// This join could be very slow or OOM
val buildSide = getSmallerSide(left, right)
Seq(joins.BroadcastNestedLoopJoinExec(
planLater(left), planLater(right), buildSide, joinType, nonEquiCond))
}
}
createBroadcastHashJoin(hintToBroadcastLeft(hint), hintToBroadcastRight(hint))
.orElse { if (hintToSortMergeJoin(hint)) createSortMergeJoin() else None }
.orElse(createShuffleHashJoin(hintToShuffleHashLeft(hint), hintToShuffleHashRight(hint)))
.orElse { if (hintToShuffleReplicateNL(hint)) createCartesianProduct() else None }
.getOrElse(createJoinWithoutHint())
// If it is not an equi-join, we first look at the join hints w.r.t. the following order:
// 1. broadcast hint: pick broadcast nested loop join. If both sides have the broadcast
// hints, choose the smaller side (based on stats) to broadcast for inner and full joins,
// choose the left side for right join, and choose right side for left join.
// 2. shuffle replicate NL hint: pick cartesian product if join type is inner like.
//
// If there is no hint or the hints are not applicable, we follow these rules one by one:
// 1. Pick broadcast nested loop join if one side is small enough to broadcast. If only left
// side is broadcast-able and it's left join, or only right side is broadcast-able and
// it's right join, we skip this rule. If both sides are small, broadcasts the smaller
// side for inner and full joins, broadcasts the left side for right join, and broadcasts
// right side for left join.
// 2. Pick cartesian product if join type is inner like.
// 3. Pick broadcast nested loop join as the final solution. It may OOM but we don't have
// other choice. It broadcasts the smaller side for inner and full joins, broadcasts the
// left side for right join, and broadcasts right side for left join.
case logical.Join(left, right, joinType, condition, hint) =>
val desiredBuildSide = if (joinType.isInstanceOf[InnerLike] || joinType == FullOuter) {
getSmallerSide(left, right)
} else {
// For perf reasons, `BroadcastNestedLoopJoinExec` prefers to broadcast left side if
// it's a right join, and broadcast right side if it's a left join.
// TODO: revisit it. If left side is much smaller than the right side, it may be better
// to broadcast the left side even if it's a left join.
if (canBuildLeft(joinType)) BuildLeft else BuildRight
}
def createBroadcastNLJoin(buildLeft: Boolean, buildRight: Boolean) = {
val maybeBuildSide = if (buildLeft && buildRight) {
Some(desiredBuildSide)
} else if (buildLeft) {
Some(BuildLeft)
} else if (buildRight) {
Some(BuildRight)
} else {
None
}
maybeBuildSide.map { buildSide =>
Seq(joins.BroadcastNestedLoopJoinExec(
planLater(left), planLater(right), buildSide, joinType, condition))
}
}
def createCartesianProduct() = {
if (joinType.isInstanceOf[InnerLike]) {
Some(Seq(joins.CartesianProductExec(planLater(left), planLater(right), condition)))
} else {
None
}
}
def createJoinWithoutHint() = {
createBroadcastNLJoin(canBroadcast(left), canBroadcast(right))
.orElse(createCartesianProduct())
.getOrElse {
// This join could be very slow or OOM
Seq(joins.BroadcastNestedLoopJoinExec(
planLater(left), planLater(right), desiredBuildSide, joinType, condition))
}
}
createBroadcastNLJoin(hintToBroadcastLeft(hint), hintToBroadcastRight(hint))
.orElse { if (hintToShuffleReplicateNL(hint)) createCartesianProduct() else None }
.getOrElse(createJoinWithoutHint())
// --- Cases where this strategy does not apply ---------------------------------------------
case _ => Nil
}
参考
[1] https://www.cnblogs.com/ssqq5200936/p/13084824.html
[2] https://blog.csdn.net/dabokele/article/details/65963401
[3] https://blog.csdn.net/MrLevo520/article/details/104024814/
[4] https://www.cnblogs.com/importbigdata/p/11247299.html