UDF概述 UDF(一进一出):对每个列中的每个元素进行操作,只返回一个结果UDAF(多进一出):UDAF的定义与使用UDTF(一进多出):UDTF的定义与使用 UDF的定义 spark.udf.register("自定义UDF名称",(v:[数据类型])=>[自定义执行代码块...]) UDF的使用 spark.sql("select UDF名称(字段) from 表(DF/视图等)") 示例 object SparkUDFDemo { case class Hobbies(name:String,hobbies: String) def main(args: Array[String]): Unit = { val spark = SparkSession.builder().master("local[*]").appName("udf").getOrCreate() import spark.implicits._ val sc = spark.sparkContext val rdd = sc.parallelize(List(("zs,29"),("ls,23"))) val df = rdd.map(x=>x.split(",")).map(x=>Hobbies(x(0),x(1))).toDF() df.show() /* +----+-------+ |name|hobbies| +----+-------+ | zs| 29| | ls| 23| +----+-------+ */ //创建视图 df.createOrReplaceTempView("df") //定义UDF spark.udf.register("hoby_num",(v:String)=>v.length) //使用UDF val frame:DataFrame = spark.sql("select name,hobbies,hoby_num(hobbies) as hobnum from df") frame.show() /* +----+-------+------+ |name|hobbies|hobnum| +----+-------+------+ | zs| 29| 2| | ls| 23| 2| +----+-------+------+ */ } }