- 'uid
- col(“uid”)
- s"uid"
- “uid” 是字符串的意思
以上1~3的写法表示的意思是一样的,都是org.apache.spark.sql.Column的意思,但是4是字符串的意思
但是需要注意的是: String 和 org.apache.spark.sql.Column 是不能同时出现在一块的
例如:
下面的代码才是正确的
val df: DataFrame = spark.read.csv("/user/vc/demo_2.csv")
.toDF("uid", "sdt", "edt", "flow")
df.select('uid,
'sdt,
expr("lag(edt, 1,sdt) over(partition By uid order by sdt ) as lag_time ")
)
.show()
df.select(col("uid"),
col("sdt"),
expr("lag(edt, 1,sdt) over(partition By uid order by sdt ) as lag_time ")
)
.show()
df.select($"uid",
$"sdt",
expr("lag(edt, 1,sdt) over(partition By uid order by sdt ) as lag_time ")
)
.show()
df.select("uid", "sdt")
.show()