1、找出全部夺得3连贯的队伍
数据:
活塞,1990
公牛,1991
公牛,1992
公牛,1993
火箭,1994
火箭,1995
公牛,1996
公牛,1997
公牛,1998
马刺,1999
湖人,2000
湖人,2001
湖人,2002
马刺,2003
活塞,2004
马刺,2005
热火,2006
马刺,2007
凯尔特人,2008
湖人,2009
湖人,2010
create table hwt1(
team string,
year int
)
row format delimited
fields terminated by ',';
load data local inpath "/home/hw2/t1.dat" into table hwt1;
select * from hwt1;
答题:
select team,year,row_number() over(partition by team order by year) rowNum
from hwt1;
select team,year-row_number() over(partition by team order by year) gid
from hwt1;
select team,gid+1 starYear
from (
select team,year-row_number() over(partition by team order by year) gid
from hwt1
) t1 group by team,gid having count(*)>=3;
结果:
2、找出每个id在在一天之内所有的波峰与波谷值
数据:
sh66688,9:35,29.48
sh66688,9:40,28.72
sh66688,9:45,27.74
sh66688,9:50,26.75
sh66688,9:55,27.13
sh66688,10:00,26.30
sh66688,10:05,27.09
sh66688,10:10,26.46
sh66688,10:15,26.11
sh66688,10:20,26.88
sh66688,10:25,27.49
sh66688,10:30,26.70
sh66688,10:35,27.57
sh66688,10:40,28.26
sh66688,10:45,28.03
sh66688,10:50,27.36
sh66688,10:55,26.48
sh66688,11:00,27.41
sh66688,11:05,26.70
sh66688,11:10,27.35
sh66688,11:15,27.35
sh66688,11:20,26.63
sh66688,11:25,26.35
sh66688,11:30,26.81
sh66688,13:00,29.45
sh66688,13:05,29.41
sh66688,13:10,29.10
sh66688,13:15,28.24
sh66688,13:20,28.20
sh66688,13:25,28.59
sh66688,13:30,29.49
sh66688,13:35,30.45
sh66688,13:40,30.31
sh66688,13:45,30.17
sh66688,13:50,30.55
sh66688,13:55,30.75
sh66688,14:00,30.03
sh66688,14:05,29.61
sh66688,14:10,29.96
sh66688,14:15,30.79
sh66688,14:20,29.82
sh66688,14:25,30.09
sh66688,14:30,29.61
sh66688,14:35,29.88
sh66688,14:40,30.36
sh66688,14:45,30.88
sh66688,14:50,30.73
sh66688,14:55,30.76
sh88888,9:35,67.23
sh88888,9:40,66.56
sh88888,9:45,66.73
sh88888,9:50,67.43
sh88888,9:55,67.49
sh88888,10:00,68.34
sh88888,10:05,68.13
sh88888,10:10,67.35
sh88888,10:15,68.13
sh88888,10:20,69.05
sh88888,10:25,69.82
sh88888,10:30,70.62
sh88888,10:35,70.59
sh88888,10:40,70.40
sh88888,10:45,70.29
sh88888,10:50,70.53
sh88888,10:55,70.92
sh88888,11:00,71.13
sh88888,11:05,70.24
sh88888,11:10,70.37
sh88888,11:15,69.79
sh88888,11:20,69.73
sh88888,11:25,70.52
sh88888,11:30,71.23
sh88888,13:00,72.85
sh88888,13:05,73.76
sh88888,13:10,74.72
sh88888,13:15,75.48
sh88888,13:20,75.80
sh88888,13:25,76.74
sh88888,13:30,77.22
sh88888,13:35,77.12
sh88888,13:40,76.90
sh88888,13:45,77.80
sh88888,13:50,78.75
sh88888,13:55,78.30
sh88888,14:00,78.68
sh88888,14:05,78.99
sh88888,14:10,78.35
sh88888,14:15,78.37
sh88888,14:20,78.07
sh88888,14:25,78.80
sh88888,14:30,79.78
sh88888,14:35,79.72
sh88888,14:40,80.71
sh88888,14:45,79.92
sh88888,14:50,80.49
sh88888,14:55,80.44
准备:
create table hwt2(
id string,
time string,
price double
)
row format delimited
fields terminated by ',';
load data local inpath "/home/hw2/t2.dat" into table hwt2;
select * from hwt2;
答题:
select id,if(length(time)=4,'0'||time,time)time,price from hwt2;
select id,if(length(time)=4,'0'||time,time) time,price,
if(price-nvl(lead(price) over(partition by id order by if(length(time)=4,'0'||time,time)),0)>=0,1,-1) feature,
row_number() over(partition by id order by if(length(time)=4,'0'||time,time)) rowNum
from hwt2;
select t1.id,t1.time,t1.price,t1.feature,
t1.rowNum-row_number() over(partition by t1.id,t1.feature order by t1.time) gid
from(
select id,if(length(time)=4,'0'||time,time) time,price,
if(price-nvl(lead(price) over(partition by id order by if(length(time)=4,'0'||time,time)),0)>=0,1,-1) feature,
row_number() over(partition by id order by if(length(time)=4,'0'||time,time)) rowNum
from hwt2
)t1;
select t3.id,t3.time,t3.price,if(t3.feature=1,'波峰','波谷')feature
from(
select t2.id,t2.time,t2.price,t2.feature,
row_number() over(partition by t2.id,t2.feature,t2.gid order by t2.time) rn
from(
select t1.id,t1.time,t1.price,t1.feature,
t1.rowNum-row_number() over(partition by t1.id,t1.feature order by t1.time) gid
from(
select id,if(length(time)=4,'0'||time,time) time,price,
if(price-nvl(lead(price) over(partition by id order by if(length(time)=4,'0'||time,time)),0)>=0,1,-1) feature,
row_number() over(partition by id order by if(length(time)=4,'0'||time,time)) rowNum
from hwt2
)t1
)t2
)t3
where t3.rn=1;
不会了!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
结果:
3、写SQL
数据:
934e8bee978a42c7a8dbb4cfa8af0b4f 2020/05/28 17:02 https:
934e8bee978a42c7a8dbb4cfa8af0b4f 2020/05/28 17:23 https:
934e8bee978a42c7a8dbb4cfa8af0b4f 2020/05/28 17:09 https:
934e8bee978a42c7a8dbb4cfa8af0b4f 2020/05/28 17:12 https:
934e8bee978a42c7a8dbb4cfa8af0b4f 2020/05/28 17:31 https:
934e8bee978a42c7a8dbb4cfa8af0b4f 2020/05/28 17:34 https:
934e8bee978a42c7a8dbb4cfa8af0b4f 2020/05/28 18:46 https:
934e8bee978a42c7a8dbb4cfa8af0b4f 2020/05/28 18:13 https:
934e8bee978a42c7a8dbb4cfa8af0b4f 2020/05/28 18:14 https:
934e8bee978a42c7a8dbb4cfa8af0b4f 2020/05/28 18:11 https:
934e8bee978a42c7a8dbb4cfa8af0b4f 2020/05/28 18:20 https:
934e8bee978a42c7a8dbb4cfa8af0b4f 2020/05/28 18:25 https:
934e8bee978a42c7a8dbb4cfa8af0b4f 2020/05/28 18:41 https:
32258fe7130844399859aec54b6df5ff 2020/05/28 03:47 https:
32258fe7130844399859aec54b6df5ff 2020/05/28 03:33 https:
32258fe7130844399859aec54b6df5ff 2020/05/28 03:21 https:
32258fe7130844399859aec54b6df5ff 2020/05/28 03:54 https:
32258fe7130844399859aec54b6df5ff 2020/05/28 03:48 https:
32258fe7130844399859aec54b6df5ff 2020/05/28 03:08 https:
32258fe7130844399859aec54b6df5ff 2020/05/28 05:09 https:
32258fe7130844399859aec54b6df5ff 2020/05/28 05:26 https:
32258fe7130844399859aec54b6df5ff 2020/05/28 05:11 https:
32258fe7130844399859aec54b6df5ff 2020/05/28 05:34 https:
32258fe7130844399859aec54b6df5ff 2020/05/28 05:18 https:
32258fe7130844399859aec54b6df5ff 2020/05/28 05:50 https:
32258fe7130844399859aec54b6df5ff 2020/05/28 05:45 https:
de0096ad04ec4273b0462c7da7d79653 2020/05/28 07:04 https:
de0096ad04ec4273b0462c7da7d79653 2020/05/28 07:21 https:
de0096ad04ec4273b0462c7da7d79653 2020/05/28 17:48 https:
de0096ad04ec4273b0462c7da7d79653 2020/05/28 17:56 https:
de0096ad04ec4273b0462c7da7d79653 2020/05/28 17:50 https:
de0096ad04ec4273b0462c7da7d79653 2020/05/28 07:53 https:
de0096ad04ec4273b0462c7da7d79653 2020/05/28 07:00 https:
de0096ad04ec4273b0462c7da7d79653 2020/05/28 17:48 https:
de0096ad04ec4273b0462c7da7d79653 2020/05/28 17:14 https:
de0096ad04ec4273b0462c7da7d79653 2020/05/28 17:50 https:
de0096ad04ec4273b0462c7da7d79653 2020/05/28 17:58 https:
de0096ad04ec4273b0462c7da7d79653 2020/05/28 17:13 https:
de0096ad04ec4273b0462c7da7d79653 2020/05/28 17:39 https:
de0096ad04ec4273b0462c7da7d79653 2020/05/28 07:15 https:
de0096ad04ec4273b0462c7da7d79653 2020/05/28 07:43 https:
de0096ad04ec4273b0462c7da7d79653 2020/05/28 07:14 https:
de0096ad04ec4273b0462c7da7d79653 2020/05/28 07:36 https:
307d9dce3b7f495ab8ad6033f8c54930 2020/05/28 08:24 https:
307d9dce3b7f495ab8ad6033f8c54930 2020/05/28 08:06 https:
307d9dce3b7f495ab8ad6033f8c54930 2020/05/28 08:51 https:
307d9dce3b7f495ab8ad6033f8c54930 2020/05/28 08:33 https:
307d9dce3b7f495ab8ad6033f8c54930 2020/05/28 08:09 https:
307d9dce3b7f495ab8ad6033f8c54930 2020/05/28 08:24 https:
307d9dce3b7f495ab8ad6033f8c54930 2020/05/28 08:47 https:
307d9dce3b7f495ab8ad6033f8c54930 2020/05/28 08:17 https:
307d9dce3b7f495ab8ad6033f8c54930 2020/05/28 18:46 https:
307d9dce3b7f495ab8ad6033f8c54930 2020/05/28 18:00 https:
307d9dce3b7f495ab8ad6033f8c54930 2020/05/28 18:08 https:
307d9dce3b7f495ab8ad6033f8c54930 2020/05/28 18:10 https:
307d9dce3b7f495ab8ad6033f8c54930 2020/05/28 18:51 https:
307d9dce3b7f495ab8ad6033f8c54930 2020/05/28 18:56 https:
307d9dce3b7f495ab8ad6033f8c54930 2020/05/28 18:20 https:
307d9dce3b7f495ab8ad6033f8c54930 2020/05/28 18:16 https:
307d9dce3b7f495ab8ad6033f8c54930 2020/05/28 18:10 https:
f5ae36c6cdda40d5954e08a2d14954a7 2020/05/28 14:51 https:
f5ae36c6cdda40d5954e08a2d14954a7 2020/05/28 14:32 https:
f5ae36c6cdda40d5954e08a2d14954a7 2020/05/28 14:11 https:
f5ae36c6cdda40d5954e08a2d14954a7 2020/05/28 14:32 https:
f5ae36c6cdda40d5954e08a2d14954a7 2020/05/28 14:37 https:
f5ae36c6cdda40d5954e08a2d14954a7 2020/05/28 14:35 https:
80ea80b2e5a64cbebfaf34aa797125f0 2020/05/28 03:22 https:
80ea80b2e5a64cbebfaf34aa797125f0 2020/05/28 03:07 https:
80ea80b2e5a64cbebfaf34aa797125f0 2020/05/28 03:56 https:
80ea80b2e5a64cbebfaf34aa797125f0 2020/05/28 03:58 https:
80ea80b2e5a64cbebfaf34aa797125f0 2020/05/28 03:27 https:
95273392ab1a4579914273cdd1f3a3ae 2020/05/28 02:17 https:
95273392ab1a4579914273cdd1f3a3ae 2020/05/28 02:46 https:
95273392ab1a4579914273cdd1f3a3ae 2020/05/28 02:17 https:
95273392ab1a4579914273cdd1f3a3ae 2020/05/28 02:35 https:
95273392ab1a4579914273cdd1f3a3ae 2020/05/28 02:56 https:
95273392ab1a4579914273cdd1f3a3ae 2020/05/28 02:53 https:
95273392ab1a4579914273cdd1f3a3ae 2020/05/28 02:16 https:
95273392ab1a4579914273cdd1f3a3ae 2020/05/28 02:23 https:
95273392ab1a4579914273cdd1f3a3ae 2020/05/28 02:52 https:
95273392ab1a4579914273cdd1f3a3ae 2020/05/28 02:45 https:
95273392ab1a4579914273cdd1f3a3ae 2020/05/28 22:32 https:
95273392ab1a4579914273cdd1f3a3ae 2020/05/28 22:47 https:
95273392ab1a4579914273cdd1f3a3ae 2020/05/28 22:46 https:
95273392ab1a4579914273cdd1f3a3ae 2020/05/28 22:54 https:
95273392ab1a4579914273cdd1f3a3ae 2020/05/28 22:17 https:
95273392ab1a4579914273cdd1f3a3ae 2020/05/28 22:16 https:
95273392ab1a4579914273cdd1f3a3ae 2020/05/28 22:56 https:
95273392ab1a4579914273cdd1f3a3ae 2020/05/28 22:14 https:
95273392ab1a4579914273cdd1f3a3ae 2020/05/28 22:58 https:
022f86d4533740ad914f233cbd9c4430 2020/05/28 22:13 https:
022f86d4533740ad914f233cbd9c4430 2020/05/28 22:51 https:
022f86d4533740ad914f233cbd9c4430 2020/05/28 22:33 https:
022f86d4533740ad914f233cbd9c4430 2020/05/28 22:37 https:
022f86d4533740ad914f233cbd9c4430 2020/05/28 22:56 https:
022f86d4533740ad914f233cbd9c4430 2020/05/28 22:38 https:
022f86d4533740ad914f233cbd9c4430 2020/05/28 22:05 https:
022f86d4533740ad914f233cbd9c4430 2020/05/28 22:39 https:
准备
create table hwt3(
id string,
dt string,
browseid string
)row format delimited fields terminated by '\t';
load data local inpath "/home/hw2/t3.dat" into table hwt3;
select * from hwt3;
create table if not exists hwt3
as select id,split(dt,' ')[1]dt,split(split(browseid,'/')[4],'\\?')[0] browseid from hwt3mid;
select * from hwt3;
drop table hwt3mid;
3.1、每个id浏览时长、步长
答题:
select id,browseid,
(unix_timestamp(concat_ws('-',split(last_value(dt) over (partition by id order by dt rows between unbounded preceding and unbounded following),'/'))|| ':00') -
unix_timestamp(concat_ws('-',split(first_value(dt) over (partition by id order by dt rows between unbounded preceding and unbounded following),'/'))|| ':00'))/60 duration
from hwt3;
select id,duration,count(*) stepSize
from(
select id,browseid,
(unix_timestamp(concat_ws('-',split(last_value(dt) over (partition by id order by dt rows between unbounded preceding and unbounded following),'/'))|| ':00') -
unix_timestamp(concat_ws('-',split(first_value(dt) over (partition by id order by dt rows between unbounded preceding and unbounded following),'/'))|| ':00'))/60 duration
from hwt3
)t1 group by id,duration;
结果:
3.2、如果两次浏览之间的间隔超过30分钟,认为是两个不同的浏览时间;再求每个id浏 览时长、步长
答题
select id,browseid,concat_ws('-',split(dt,'/'))|| ':00' dt,
if((nvl(unix_timestamp(concat_ws('-',split(dt,'/'))|| ':00')-unix_timestamp(concat_ws('-',split(lag(dt) over(partition by id order by dt),'/'))|| ':00'),0)/60)>30,1,0) gid
from hwt3;
select id,browseid,dt,
sum(flag) over(partition by id order by dt rows between unbounded preceding and current row) gid
from(
select id,browseid,concat_ws('-',split(dt,'/'))|| ':00' dt,
if((nvl(unix_timestamp(concat_ws('-',split(dt,'/'))|| ':00')-unix_timestamp(concat_ws('-',split(lag(dt) over(partition by id order by dt),'/'))|| ':00'),0)/60)>30,1,0) flag
from hwt3
)t3;
select id,dt,browseid,gid,
(unix_timestamp(last_value(dt) over (partition by id,gid order by dt rows between unbounded preceding and unbounded following)) -
unix_timestamp(first_value(dt) over (partition by id,gid order by dt rows between unbounded preceding and unbounded following)))/60 duration
from(
select id,browseid,dt,
sum(flag) over(partition by id order by dt rows between unbounded preceding and current row) gid
from(
select id,browseid,concat_ws('-',split(dt,'/'))|| ':00' dt,
if((nvl(unix_timestamp(concat_ws('-',split(dt,'/'))|| ':00')-unix_timestamp(concat_ws('-',split(lag(dt) over(partition by id order by dt),'/'))|| ':00'),0)/60)>30,1,0) flag
from hwt3
)t3
)t2;
select id,duration,count(*) stepSize
from(
select id,browseid,
(unix_timestamp(last_value(dt) over (partition by id,gid order by dt rows between unbounded preceding and unbounded following)) -
unix_timestamp(first_value(dt) over (partition by id,gid order by dt rows between unbounded preceding and unbounded following)))/60 duration
from(
select id,browseid,dt,
sum(flag) over(partition by id order by dt rows between unbounded preceding and current row) gid
from(
select id,browseid,concat_ws('-',split(dt,'/'))|| ':00' dt,
if((nvl(unix_timestamp(concat_ws('-',split(dt,'/'))|| ':00')-unix_timestamp(concat_ws('-',split(lag(dt) over(partition by id order by dt),'/'))|| ':00'),0)/60)>30,1,0) flag
from hwt3
)t3
)t2
)t1 group by id,duration;
结果: