数据:
user.json
{"id":1,"name":"zhangsan","age":10}
{"id":2,"name":"lisi","age":30}
{"id":3,"name":"wangwu","age":20}
{"id":4,"name":"liuliu","age":40}
需求:统计以上文件的平均年龄
工具:idea
语言:Scala
代码:
package com.test
import org.apache.spark.SparkConf
import org.apache.spark.sql.{Row, SparkSession}
import org.apache.spark.sql.expressions.{MutableAggregationBuffer, UserDefinedAggregateFunction}
import org.apache.spark.sql.types.{DataType, DoubleType, LongType, StructType}
object SparkUDAF {
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setAppName("SparkUDAF").setMaster("local[*]")
val sparkSession = SparkSession.builder().config(conf).getOrCreate()
// 创建dataFrame
val dataFrame = sparkSession.r