df 加字段(插入固定值)
1.使用lit()函数
var status: String = _
var del_flag: String = _
status = componentProps.getString("status", "1")
is_supplement = componentProps.getString("is_supplement","1")
val frame = df
.withColumn("status", lit(status))
.withColumn("del_flag", lit(del_flag))
2.使用selectExpr()
.selectExpr("*", "(1)" + "status",
"(1)" + "del_flag")
3.sql
df.createOrReplaceTempView("test")
val frame = spark.sql(
"""
|select
|*,1 as status,1 as del_flag
|from
|test
|""".stripMargin)
df 加字段(不是固定值,sparksql内置的函数)
使用selectExpr(),或者直接在sql里使用
val frame = df.selectExpr("*", "now()" + "create_time")
或者直接在sql中使用
udf注册函数
有时候应对复杂的业务时,sparksql的内置函数不能满足时,就要用udf注册函数
1.spark.udf.register()
val context = df.sqlContext
context.udf.register("uuid", () =>{
UUID.randomUUID().toString.replaceAll("-", "")
} )
df.selectExpr("*","uuid()"+"id").show()
或者直接在sql中使用
df.createOrReplaceTempView("test")
val frame = spark.sql(
"""
|select
|*,uuid() as id
|from
|test
|""".stripMargin)
frame.show()
2.udf()
在df.withColumn中使用
import org.apache.spark.sql.functions._
def primary_pollutant(pm25: Double,pm10:Double,o3:Double,co:Double,so2:Double,no2:Double): String = {
AqiUtil.CountAqi(pm25,pm10,o3,co,so2,no2).getName
}
val primary_pollutantUDF = udf(primary_pollutant _)
val frame = df.withColumn("primary_pollutant", primary_pollutantUDF(df("pm25"),
df("pm10"), df("o3"), df("co"), df("so2"), df("no2")))
frame.show()