官网聚合函数api
http://spark.apache.org/docs/latest/api/scala/index.html#org.apache.spark.sql.functions$
avg sum max min count countDistinct
package com.scala.spark.function
import org.apache.spark.sql.SparkSession
object AggregateFunctionAndOtherFunction {
def main(args: Array[String]): Unit = {
val spark = SparkSession.builder().appName("untyped").master("local").getOrCreate()
import spark.implicits._
import org.apache.spark.sql.functions._
val employee=spark.read.json("employee")
val department=spark.read.json("department")
// employee.join(department,$"depId"===$"id")
// .groupBy(department("name"))
// .agg(round(avg(employee("salary")),2).alias("平均工资"),sum(employee("salary")).alias("总工资")
// ,min(employee("salary")).alias("最低工资"),count(employee("name")).alias("多少个")
// ,countDistinct(employee("name")).alias("去重个数")
// )
// .show()
employee
.join(department, $"depId" === $"id")
.groupBy(department("name"))
.agg(avg(employee("salary")), sum(employee("salary")), max(employee("salary")), min(employee("salary")), count(employee("name")), countDistinct(employee("name")))
.show()
}
}
employee.select(employee("name"),current_date(),current_timestamp(),rand(),concat(employee("name"),employee("age")),concat_ws("//",employee("name"),employee("age")))
.show()