哈喽,小伙伴们,欢迎来到小张的频道,今天给大家讲解一道面试中常见的SQL问题------连续问题,希望能帮助到小伙伴们。还需要给大家说明的是这是一个系列性文章,如果您想了解更多面试题型,希望大家多多关注小张哦~
需求1: 求连续登录2天的用户
需求2: 求夺得3连贯的队伍
需求3: 找出连续 3 天及以上减少碳排放量在 100 以上的用户
文章目录
1. 需求1:求连续登录2天的用户
1.1 原始数据 login.txt
A,2019-08-22
B,2019-08-22
C,2019-08-22
A,2019-08-23
C,2019-08-23
A,2019-08-24
B,2019-08-24
1.2 期望结果
±------------±-+
| A |
| C |
±------------±-+
1.3 建表语句
create table test.login(
username string,
longinstr string
)row format delimited fields terminated by ',';
1.4 加载数据
load data local inpath '/opt/datas/hive/login.txt' into table test.login;
+-----------------+------------------+
| login.username | login.longinstr |
+-----------------+------------------+
| A | 2019-08-22 |
| B | 2019-08-22 |
| C | 2019-08-22 |
| A | 2019-08-23 |
| C | 2019-08-23 |
| A | 2019-08-24 |
| B | 2019-08-24 |
+-----------------+------------------+
1.5 答案
SELECT
username
FROM
(SELECT
username,
longinstr,
date_sub(longinstr,ROW_NUMBER() OVER (PARTITION BY username ORDER BY longinstr)) rank
FROM
longin
) t1
GROUP BY
username,rank
HAVING
COUNT(rank) >=2 ;
1.6 执行结果
+-----------+
| username |
+-----------+
| A |
| C |
+-----------+
2. 需求2:求夺得3连贯的队伍
2.1 原始数据team.txt
team,year
活塞,1990
公牛,1991
公牛,1992
公牛,1993
火箭,1994
火箭,1995
公牛,1996
公牛,1997
公牛,1998
马刺,1999
湖人,2000
湖人,2001
湖人,2002
马刺,2003
活塞,2004
马刺,2005
热火,2006
马刺,2007
凯尔特人,2008
湖人,2009
湖人,2010
2.2 建表语句
create table test.teams(
team string,
year int
)row format delimited fields terminated by ',';
2.3 加载数据
load data local inpath '/opt/datas/hive/teams.txt' into table teams;
+-------------+-------------+
| teams.team | teams.year |
+-------------+-------------+
| 活塞 | 1990 |
| 公牛 | 1991 |
| 公牛 | 1992 |
| 公牛 | 1993 |
| 火箭 | 1994 |
| 火箭 | 1995 |
| 公牛 | 1996 |
| 公牛 | 1997 |
| 公牛 | 1998 |
| 马刺 | 1999 |
| 湖人 | 2000 |
| 湖人 | 2001 |
| 湖人 | 2002 |
| 马刺 | 2003 |
| 活塞 | 2004 |
| 马刺 | 2005 |
| 热火 | 2006 |
| 马刺 | 2007 |
| 凯尔特人 | 2008 |
| 湖人 | 2009 |
| 湖人 | 2010 |
+-------------+-------------+
2.4 答案
SELECT
team,rank
FROM(
SELECT
team,
year,
(year-ROW_NUMBER() over (PARTITION BY team ORDER BY year) )rank
FROM
teams
) t1
GROUP BY
team,rank
HAVING
count(rank)>=3;
2.5 执行结果
+-------+-------+
| team | rank |
+-------+-------+
| 公牛 | 1990 |
| 公牛 | 1992 |
| 湖人 | 1999 |
+-------+-------+
3. 需求3:找出连续 3 天及以上减少碳排放量在 100 以上的用户
3.1 原始数据carbon.txt
1001 2021-12-12 123
1001 2021-12-13 43
1001 2021-12-13 45
1001 2021-12-13 23
1001 2021-12-14 230
1001 2021-12-15 23
1002 2021-12-12 45
1002 2021-12-14 45
1002 2021-12-15 45
3.2 建表语句
create table test.carbon(
id string,
dt string,
lowcarbon string
)row format delimited fields terminated by ' ';
3.3 加载数据
load data local inpath '/opt/datas/hive/carbon.txt' into table carbon;
+------------+-------------+-------------------+
| carbon.id | carbon.dt | carbon.lowcarbon |
+------------+-------------+-------------------+
| 1001 | 2021-12-12 | 123 |
| 1002 | 2021-12-12 | 45 |
| 1001 | 2021-12-13 | 43 |
| 1001 | 2021-12-13 | 45 |
| 1001 | 2021-12-13 | 23 |
| 1002 | 2021-12-14 | 45 |
| 1001 | 2021-12-14 | 230 |
| 1002 | 2021-12-15 | 45 |
| 1001 | 2021-12-15 | 23 |
+------------+-------------+-------------------+
3.4 期望结果
+-------+
| id |
+-------+
| 1001 |
+-------+
3.5 答案
SELECT
id
FROM
(SELECT
id,
dt,
lowcarbon,
date_sub(dt,row_number() over (partition by id ORDER BY dt) ) rank
FROM (
SELECT
id,
dt,
SUM(lowcarbon) lowcarbon
FROM
(SELECT
id,
dt,
lowcarbon,
row_number() over (partition by id ORDER BY dt ) rank
FROM
carbon) t1
GROUP BY
id,dt
ORDER BY
id,dt
)
t2) t3
WHERE
lowcarbon>100
GROUP BY
id,rank
HAVING
COUNT(rank)>=3 ;
3.6 运行结果
+-------+
| id |
+-------+
| 1001 |
+-------+