建表语句
create table test_over
(
user_id string,
login_date string
) COMMENT '测试函数使用,可以删除'
row format delimited
fields terminated by '\t';
over 执行计划
spark
spark-sql> explain select
> user_id
> ,login_date
> ,lag(login_date,1,'0001-01-01') over(partition by user_id order by login_date) prev_date
> from test_over;
22/03/10 10:55:50 INFO [main] CodeGenerator: Code generated in 9.641436 ms
== Physical Plan ==
Window [lag(login_date#34, 1, 0001-01-01) windowspecdefinition(user_id#33, login_date#34 ASC NULLS FIRST, specifiedwindowframe(RowFrame, -1, -1)) AS prev_date#30], [user_id#33], [login_date#34 ASC NULLS FIRST]
+- *(1) Sort [user_id#33 ASC NULLS FIRST, login_date#34 ASC NULLS FIRST], false, 0
+- Exchange hashpartitioning(user_id#33, 200)
+- Scan hive default.test_over [user_id#33, login_date#34], HiveTableRelation `default`.`test_over`, org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, [user_id#33, login_date#34]
Time taken: 0.098 seconds, Fetched 1 row(s)
22/03/10 10:55:50 INFO [main] SparkSQLCLIDriver: Time taken: 0.098 seconds, Fetched 1 row(s)
spark-sql>
> explain
> select
> user_id
> ,login_date
> ,first_value(login_date) over(partition by user_id ) prev_date
> from test_over;
== Physical Plan ==
Window [first(login_date#39, false) windowspecdefinition(user_id#38, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS prev_date#35], [user_id#38]
+- *(1) Sort [user_id#38 ASC NULLS FIRST], false, 0
+- Exchange hashpartitioning(user_id#38, 200)
+- Scan hive default.test_over [user_id#38, login_date#39], HiveTableRelation `default`.`test_over`, org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, [user_id#38, login_date#39]
Time taken: 0.077 seconds, Fetched 1 row(s)
22/03/10 10:57:34 INFO [main] SparkSQLCLIDriver: Time taken: 0.077 seconds, Fetched 1 row(s)
spark-sql>
>
> explain select
> user_id
> ,login_date
> ,max(login_date) over(partition by use