Spark数据倾斜处理之添加前缀

Spark数据倾斜处理之添加前缀

package spark.day03

import org.apache.spark.rdd.RDD
import org.apache.spark.sql.{DataFrame, SparkSession}

object _07TestDataSkew {
  def main(args: Array[String]): Unit = {
    val spark: SparkSession = SparkSession.builder()
      .master("local[*]")
      .appName("dataSkew")
      .getOrCreate()
    import spark.implicits._
    val rdd1: RDD[String] = spark.sparkContext.makeRDD(List("a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a", "b,c,d,e,f", "b,b,c,c,d,e,f,g", "a,b,a,c,f"))
    val df: DataFrame = rdd1.toDF("line")
    df.createTempView("temp")
    //正常统计每个字符的个数,可能会造成数据倾斜
    val sql=
      """
        |select word,count(1)
        |from
        |(
        |select explode(split(line,",")) word
        |from temp) t1
        |group by t1.word
        |""".stripMargin
        //spark.sql(sql).show()
    println("------先在单词前面拼接随机数字,比如0,1,2,3-----")
    val sql1=
      """
        |select concat(floor(rand()*4),"-",word)
        |from
        |(
        |select explode(split(line,",")) word
        |from temp) t1
        |""".stripMargin
        //spark.sql(sql1).show()
    println("----将加上前缀的单词,进行预聚合---")
    val sql2=
      """
        |select prefix_word,count(1)
        |from(
        |select concat(floor(rand()*4),"-",word) prefix_word
        |from
        |(
        |select explode(split(line,",")) word
        |from temp) t1
        |) t2
        |group by prefix_word
        |""".stripMargin
        //spark.sql(sql2).show()
    println("----去掉前缀,进行全局聚合--")
    val sql3=
      """
        |select substr(prefix_word,instr(prefix_word,"-")+1) w,sum(num)
        |from
        |(select prefix_word,count(1) num
        |from(
        |select concat(floor(rand()*4),"-",word) prefix_word
        |from(
        |select explode(split(line,",")) word
        |from temp) t1
        |) t2
        |group by prefix_word
        |) t3
        |group by w
        |""".stripMargin
        spark.sql(sql3).show()
        spark.stop()
  }
}

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值