Spark SQL UDF 和 DUAF
import org.apache.log4j.{Level, Logger}
import org.apache.spark.sql.expressions.{MutableAggregationBuffer, UserDefinedAggregateFunction}
import org.apache.spark.sql.types.{DataType, IntegerType, StringType, StructField, StructType}
import org.apache.spark.sql.{Row, SQLContext}
import org.apache.spark.{SparkConf, SparkContext}
/**
* @Autho: Administrator and wind
* @Version: 2019/11/21 & 1.0
*
*/
object SparkSQLUDFUDAF {
def main(args: Array[String]): Unit = {
//减少日志打印
Logger.getLogger("org.apache.spark").setLevel(Level.ERROR)
val conf = new SparkConf().setMaster("local").setAppName("SparkSQLUDFUDAF")
val sc = new SparkContext(conf)
val sqlContext = new SQLContext(sc)
val bigData = Array("Spark","Spark","Spark","spark","spark","spark","spark","spark","Hadoop","Hadoop","Hadoop","Hadoop")
//创建Dataframe
val bigDataRDD = sc.parallelize(bigData)
val bigDataRDDRow = bigDataRDD.map(item => Row(item))
val structType = StructType(Array(
new StructField("word",StringType)
))
val bigDataDF = sqlContext.createDataFrame(bigDataRDDRow,structType)
bigDataDF.createOrReplaceTempView("bigDataTable")
//UDF 最多22个输入参数
sqlContext.udf.register("length",(input:String) => input.length())
//UDAF
sqlContext.udf.register("wordcount",new MyUDAF)
sqlContext.sql("select word,wordcount(word) as count from bigDataTable group by word").show()
sqlContext.sql("select word,length(word) from bigDataTable").show()
sc.stop()
}
}
class MyUDAF extends UserDefinedAggregateFunction {
/**
* 该方法指定具体输入数据类型
* @return
*/
ove