UDF----------------------------------------
完整的示例:
object SparkSQL {
------------------------
def main(args:Array[String]):Unit = {
//创建SparkConf()并设置App名称
val conf = new SparkConf().setAppName("SparkSQLDemo").setMaster("local")
val spark = SparkSession.builder().config(conf).getOrCreate()
val df: DataFrame = spark.read.json("dir/people.json")
//注册函数,在整个应用中可以使用
val addName = spark.udf.register("addName", (x: String) => "Name:" + x)
df.createOrReplaceTempView("people")
spark.sql("Select addName(name), age from people").show()
spark.stop()
}
}
/**
* 自定义UDF函数
* 传入一个json形式的字符串,获取指定字段,返回改字段的值
*/
public class GetJsonObjectUDF implements UDF2<String,String,String> {
private static final long serialVersionUID = 6776121915573178083L;
@Override
public String call(String json, String field) throws Exception {
//fastjson自带的方法
try {
JSONObject jsonObject = JSONObject.parseObject(json);
return jsonObject.getString(field);
} catch (Exception e) {
e.printStackTrace();
}
return null;
}
}
---