Spark-----UDAF案例

Spark-----UDAF案例

package spark.day03

import org.apache.spark.sql.expressions.{MutableAggregationBuffer, UserDefinedAggregateFunction}
import org.apache.spark.sql.types.{DataType, DoubleType, LongType, StructField, StructType}
import org.apache.spark.sql.{DataFrame, Row, SparkSession}

object _06TestUDAF {
  def main(args: Array[String]): Unit = {
    val spark: SparkSession = SparkSession.builder()
      .master("local[*]")
      .appName("udaf")
      .getOrCreate()
    val df: DataFrame = spark.read.json("sql/emp.json")
    df.cache()
    df.createTempView("emp")
//    val sql=
//      """
//        |select deptno,avg(sal)
//        |from emp
//        |group by deptno
//        |""".stripMargin
//        spark.sql(sql).show()
    spark.udf.register("myavg",new MyUDAF)
    val sql1=
      """
        |select deptno,myavg(sal)
        |from emp
        |group by deptno
        |""".stripMargin
        spark.sql(sql1).show()
  }
  class MyUDAF extends UserDefinedAggregateFunction {
    override def inputSchema: StructType = StructType{
      Array(
        StructField("sal",DoubleType)
      )
    }

    override def bufferSchema: StructType = StructType{
      Array(
        StructField("sum",DoubleType),
        StructField("count",LongType)
      )
    }

    override def dataType: DataType = DoubleType

    override def deterministic: Boolean = true

    override def initialize(buffer: MutableAggregationBuffer): Unit = {
      buffer(0)=0D
      buffer(1)=0L
    }

    override def update(buffer: MutableAggregationBuffer, input: Row): Unit = {
      buffer.update(0,buffer.getDouble(0)+input.getDouble(0))
      buffer.update(1,buffer.getLong(1)+1)
    }

    override def merge(buffer1: MutableAggregationBuffer, buffer2: Row): Unit = {
      buffer1.update(0,buffer1.getDouble(0)+buffer2.getDouble(0))
      buffer1.update(1,buffer1.getLong(1)+buffer2.getLong(1))
    }

    override def evaluate(buffer: Row): Any = {
      buffer.getDouble(0)/buffer.getLong(1)
    }
  }
}

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值