操作的数据:
{"city":"St. John's","avgHigh":8.7,"avgLow":0.6}
{"city":"Charlottetown","avgHigh":9.7,"avgLow":0.9}
{"city":"Halifax","avgHigh":11.0,"avgLow":1.6}
{"city":"Fredericton","avgHigh":11.2,"avgLow":-0.5}
{"city":"Quebec","avgHigh":9.0,"avgLow":-1.0}
{"city":"Montreal","avgHigh":11.1,"avgLow":1.4}
object UdfDemo1 {
def main(args: Array[String]): Unit = {
val conf: SparkConf = new SparkConf().setAppName("").setMaster("local")
val sc = new SparkContext(conf)
// 可以执行SQL语句的实例
val sct: SQLContext = new SQLContext(sc)
// 以json格式读取文件
val file: DataFrame = sct.read.json("E://words.txt")
// 创建表
file.createOrReplaceTempView("citytemps")
// 注册一个函数, 输出的每个数都进行处理
sct.udf.register("CTOF",(dc:Double)=>((dc*9.0/5.0)+32.0))
// 调用函数 自定义函数(参数)
sct.sql("select city,CTOF(avgLow) as avgLows, CTOF(avgHigh) as avgHighs from citytemps").show()
sc.stop()
}
}
输出结果:
+-------------+-------+--------+
| city|avgLows|avgHighs|
+-------------+-------+--------+
| null| null| null|
|Charlottetown| 33.62| 49.46|
| Halifax| 34.88| 51.8|
| Fredericton| 31.1| 52.16|
| Quebec| 30.2| 48.2|
| Montreal| 34.52| 51.98|
+-------------+-------+--------+