临时UDF
创建临时UDF,在SQL中使用:
(注:涉及文档上一篇博文《Spark SQL基础笔记及简单案例》给出》
//创建DataFrame
case class Student(id: String, name : String, age: Int)
val rdd=sc.textFile("hdfs://node01:9000/sqldata/students.txt").map(_.split(",")).map(p => Student(p(0), p(1), p(2).trim.toInt))
val students = rdd.toDF()
//定义函数,将表中三个列合并为一个列,使用','分割
def allInOne(seq: Seq[Any], sep: String): String = seq.mkString(sep)
def allInOne1(s1:String, s2:String,s3:Int): String = s1+s2+s3.toString
//注册函数allInOne
sqlContext.udf.register("allInOne", allInOne _)
sqlContext.udf.register("allInOne1", allInOne1 _)
//或者直接使用匿名函数做参数
sqlContext.udf.register( "allInOne", (seq: Seq[Any], sep: String)=>seq.mkString(sep) )
//使用函数allInOne:在DataFrame的接口中使用
import org.apache.spark.sql.functions.{udf,array,lit}
val myFunc = udf(allInOne _)
val cols = array("id","name","age")
val sep = lit(",")
students.select(myFunc(cols,sep).alias("co