需求: 使用自定义函数,实现concat_ws功能
import org.apache.spark.sql.{DataFrame, Dataset, SparkSession}
object Myconcat_ws {
def main(args: Array[String]): Unit = {
val spark: SparkSession = SparkSession.builder().appName("naya").master("local[*]").getOrCreate()
import spark.implicits._
val ds: Dataset[(String, String, String)] = spark.createDataset(List(("北京市", "北京市", "昌平区"), ("广东省", "广州市", "南沙区"), ("河北省", "衡水市", "桃城区")))
val df: DataFrame = ds.toDF("province", "city", "district")
df.createTempView("v_location")
//spark.sql("select concat_ws('|',province,city,district) location from v_location").show()
//自定义函数 实现concat_ws的功能
val func = (split :String,p1 :String, p2:String, p3:String) =>{
p1 + split + p2 +split + p3
}
// 注册自定义函数,并起名
spark.udf.register("MY_CONCAT_WS",func)
spark.sql("select MY_CONCAT_WS('|',province,city,district) location from v_location").show()
spark.stop()
}
}