数据准备
create extenal table data(
cookieid string,
creattime string,
url string,
)
row formated field terminated by ','
select * from data;
结果如下所示
cookie1 2015-04-10 10:00:02 url2
cookie1 2015-04-10 10:00:00 url1
cookie1 2015-04-10 10:03:04 1url3
cookie1 2015-04-10 10:50:05 url6
cookie1 2015-04-10 11:00:00 url7
cookie1 2015-04-10 10:10:00 url4
cookie1 2015-04-10 10:50:01 url5
cookie2 2015-04-10 10:00:02 url22
cookie2 2015-04-10 10:00:00 url11
cookie2 2015-04-10 10:03:04 1url33
cookie2 2015-04-10 10:50:05 url66
cookie2 2015-04-10 11:00:00 url77
cookie2 2015-04-10 10:10:00 url44
cookie2 2015-04-10 10:50:01 url55
1.LAG函数
LAG函数用于统计窗口内往上第n行值,传递三个参数,第一个参数为列名,第二个参数为从该行其往上第n行(默认为1),第三个参数为默认值(当数据为空的时候传递的参数,默认为NULL)
select cookieid,
creattime,
url,
LAG(createtime,1)OVER(partition by cookieid order by createtime) AS last_1_time,
from data;
结果如下所示
cookieid createtime url last_1_time
---------------------------------------------------------------
cookie1 2015-04-10 10:00:00 url1 NULL
cookie1 2015-04-10 10:00:02 url2 2015-04-10 10:00:00
cookie1 2015-04-10 10:03:04 1url3 2015-04-10 10:00:02
cookie1 2015-04-10 10:10:00 url4 2015-04-10 10:03:04
cookie1 2015-04-10 10:50:01 url5 2015-04-10 10:10:00
cookie1 2015-04-10 10:50:05 url6 2015-04-10 10:50:01
cookie1 2015-04-10 11:00:00 url7 2015-04-10 10:50:05
cookie2 2015-04-10 10:00:00 url11 NULL
cookie2 2015-04-10 10:00:02 url22 2015-04-10 10:00:00
cookie2 2015-04-10 10:03:04 1url33 2015-04-10 10:00:02
cookie2 2015-04-10 10:10:00 url44 2015-04-10 10:03:04
cookie2 2015-04-10 10:50:01 url55 2015-04-10 10:10:00
cookie2 2015-04-10 10:50:05 url66 2015-04-10 10:50:01
cookie2 2015-04-10 11:00:00 url77 2015-04-10 10:50:05
2.LEAD函数
LEAD函数用于统计当前窗口内向下第n行值,其传递参数规则与LAG函数一致。
select cookieid,
createtime,
url,
LEAD(createtime,1) OVER(partition by cookieid order by createtime) AS next_1_time
FROM lxw1234;
cookieid createtime url next_1_time
------------------------------------------------------------------
cookie1 2015-04-10 10:00:00 url1 2015-04-10 10:00:02
cookie1 2015-04-10 10:00:02 url2 2015-04-10 10:03:04
cookie1 2015-04-10 10:03:04 1url3 2015-04-10 10:10:00
cookie1 2015-04-10 10:10:00 url4 2015-04-10 10:50:01
cookie1 2015-04-10 10:50:01 url5 2015-04-10 10:50:05
cookie1 2015-04-10 10:50:05 url6 2015-04-10 11:00:00
cookie1 2015-04-10 11:00:00 url7 NULL
cookie2 2015-04-10 10:00:00 url11 2015-04-10 10:00:02
cookie2 2015-04-10 10:00:02 url22 2015-04-10 10:03:04
cookie2 2015-04-10 10:03:04 1url33 2015-04-10 10:10:00
cookie2 2015-04-10 10:10:00 url44 2015-04-10 10:50:01
cookie2 2015-04-10 10:50:01 url55 2015-04-10 10:50:05
cookie2 2015-04-10 10:50:05 url66 2015-04-10 11:00:00
cookie2 2015-04-10 11:00:00 url77 NULL
3.FRIST_VAIUE
取分组内排序后的最后一个结果
SELECT cookieid,
createtime,
url,
LAST_VALUE(url) OVER(PARTITION BY cookieid ORDER BY createtime) AS last1
FROM data;
cookieid createtime url last1
--------------------------------------------------------------
cookie1 2015-04-10 10:00:00 url1 url1
cookie1 2015-04-10 10:00:02 url2 url2
cookie1 2015-04-10 10:03:04 1url3 1url3
cookie1 2015-04-10 10:10:00 url4 url4
cookie1 2015-04-10 10:50:01 url5 url5
cookie1 2015-04-10 10:50:05 url6 url6
cookie1 2015-04-10 11:00:00 url7 url7
cookie2 2015-04-10 10:00:00 url11 url11
cookie2 2015-04-10 10:00:02 url22 url22
cookie2 2015-04-10 10:03:04 1url33 1url33
cookie2 2015-04-10 10:10:00 url44 url44
cookie2 2015-04-10 10:50:01 url55 url55
cookie2 2015-04-10 10:50:05 url66 url66
cookie2 2015-04-10 11:00:00 url77 url77
4.LAST_VALUE
取分组内排序后,截止到当前行,最后一个值,传递参数规则与FIRST_VALUE一致