写sql处理使用groupby 产生的数据倾斜问题:
import java.util.Random
import org.apache.spark.sql.SparkSession
object TestUDF {
def main(args: Array[String]): Unit = {
val spark =
SparkSession.builder()
.appName("TestUDF")
.enableHiveSupport()
.getOrCreate()
spark.udf.register("random_prefix", (value: Int, num: Int) => randomPrefixUDF(value, num))
spark.udf.register("remove_random_prefix", (value: String) => removeRandomPrefixUDF(value))
// 加随机前缀
val sql1 =
s"""
|select
| random_prefix(name, 6) product,
| id
|from
| ggg.test
""".stripMargin
// 分组求和
val sql2 =
s"""
|select
| product,
| sum(id) click
|from
| (
| select
| random_prefix(name, 6) product,