Write a query that'll identify returning active users. A returning active user is a user that has made a second purchase within 7 days of any other of their purchases. Output a list of user_ids of these returning active users.
--建表
use strata;
drop table amazon_transactions;
create table amazon_transactions
(
id int
,user_id int
,item string
,created_at date
,revenue int
)
row format delimited fields terminated by ',';
load data local inpath '/tmp/strata/amazon_transactions.txt' overwrite into table amazon_transactions;
--查看数据
hive> select * from amazon_transactions limit 10;
OK
1 109 milk 2020-03-03 123
2 139 biscuit 2020-03-18 421
3 120 milk 2020-03-18 176
4 108 banana 2020-03-18 862
5 130 milk 2020-03-28 333
6 103 bread 2020-03-29 862
7 122 banana 2020-03-07 952
8 125 bread 2020-03-13 317
9 139 bread 2020-03-30 929
10 141 banana 2020-03-17 812
Time taken: 0.08 seconds, Fetched: 10 row(s)
--output
with t as
(
select user_id
,created_at
,datediff(created_at,lag(created_at,1)over(partition by user_id order by created_at asc)) as diff
from amazon_transactions
)
select distinct user_id
from t where t.diff<=7;
---
100
103
105
109
110
111
112
114
117
120
122
128
129
130
131
133
141
143
150
Time taken: 39.329 seconds, Fetched: 19 row(s)