sql练习专场(一)

题目一:找出连续活跃3天及以上的用户

建表语句

create table sql1_1(
  uid   string,
  dt    string
);

insert into sql1_1
values('A','2023-10-01'),('A','2023-10-02'),('A','2023-10-03'),('A','2023-10-04'),
      ('B','2023-10-01'),('B','2023-10-03'),('B','2023-10-04'),('B','2023-10-05'),
      ('C','2023-10-01'),('C','2023-10-03'),('C','2023-10-05'),('C','2023-10-06'),
      ('D','2023-10-02'),('D','2023-10-03'),('D','2023-10-05'),('D','2023-10-06');

结果

思路:
先将所有用户的登录时间,进行排序,然后拿登录时间(可以考虑每日多次登录)去减排序的数字,有几个相等的日期即为连续登录多长时间

结果:

with t1 as
    ( select *,date_sub(dt,row_number() over (partition by uid order by dt)) tempdt from sql1_1  )
select uid from t1 group by uid,tempdt having count(*) >=3;

若考虑每日登录多次可以使用distinct

题目二:统计每月会员数量

create table sql1_2(
    consumerid  string,
    startdate   string,
    enddate     string
)
row format delimited
fields terminated by ',';

load data local inpath '/home/homedata/sql_1/sql1_2.txt' into table  sql1_2;

数据
1,2021-01-01,2022-01-01
2,2021-02-02,2022-02-02
3,2021-03-03,2022-03-03
解决:自定义函数

题目三:统计每个Top3歌单以及Top3歌单下的Top3歌曲

用户编号  歌单编号  歌单名称    歌曲编号    歌曲名称
1   1  经典老歌    1   月亮代表我的心
2   1  经典老歌    1   月亮代表我的心
3   1  经典老歌    3   夜来香
4   1  经典老歌    4   我只在乎你
5   1  经典老歌    5   千言万语
6   1  经典老歌    5   千言万语
7   2  流行金曲    7   突然好想你
8   2  流行金曲    8   后来
9   2  流行金曲    9   童话
10  2  流行金曲    10  晴天
11  2  流行金曲    7   突然好想你
12  2  流行金曲    7   突然好想你
13  3  纯音乐集    13  二泉映月
14  3  纯音乐集    14  琵琶语
15  3  纯音乐集    15  梦回还
16  4  欧美音乐    16  Shape of My Heart
17  4  欧美音乐    17  Just the Way You Are
18  4  欧美音乐    18  Hello
19  4  欧美音乐    19  A Thousand Years
20  4  欧美音乐    20  Thinking Out Loud
21  4  欧美音乐    20  Thinking Out Loud
22  4  欧美音乐    18  Hello
23  4  欧美音乐    18  Hello
24  5  民谣时光    24  易燃易爆炸
25  5  民谣时光    25  成全
26  5  民谣时光    25  成全
27  5  民谣时光    25  成全
加载数据
create table sql1_3(
    u_id int,
    mf_id int,
    mf_name string,
    m_id int,
    m_name string
)
row format serde 'org.apache.hadoop.hive.serde2.RegexSerDe'
with serdeproperties(
  'input.regex'='(\\d+)\\s+(\\d+)\\s+(.+?)\\s+(\\d+)\\s+(.+?)'
);
load data local inpath '/home/homedata/sql_1/sql1_3.txt' into table sql1_3;
编写sql
with t1 as (
    // 前三的歌单
    select mf_id from sql1_3 group by mf_id order by  count(*) desc limit 3
) ,t2 as(
    // 根据歌单挑选出所有的歌曲记录数量
    select  mf_name ,m_name,count(*) count from t1 left join sql1_3 on t1.mf_id = sql1_3.mf_id group by mf_name ,m_name
),t3 as (
    // 按照数量进行排序
    select mf_name,m_name,rank() over (partition by mf_name order by count desc ) rankNum from t2
)
select * from t3 where rankNum <=3;

题目四:用一条sql语句查询出每门课都大于80分的学生姓名

//加载数据
create table sql1_4(
     name   string,
     course string,
     grade  int
);

INSERT INTO sql1_4 (name, course, grade) VALUES
('张三', '英语', 85),
('张三', '语文', 90),
('张三', '数学', 92),
('李四', '英语', 75),
('李四', '语文', 98),
('李四', '数学', 72),
('王五', '英语', 90),
('王五', '语文', 85),
('王五', '数学', 92),
('王五', '体育', 80);
//编写sql
select name from sql1_4 group by name having min(grade) >=80;

题目五:查找所有至少连续出现三次的数字

create table sql1_5(
   id int,
   num int
);
INSERT INTO sql1_5 (id, num) VALUES
(1, 1),
(2, 1),
(3, 1),
(4, 2),
(5, 1),
(6, 2),
(7, 2);
with t1 as (
    select *,row_number() over (partition by num order by id) ,
       id - row_number() over (partition by num order by id) jyl from sql1_5
)
select num from t1 group by jyl,num having count(*) >=3;

题目六:统计每个成绩段人数

{"username":"张三","score":95}
{"username":"李四","score":76}
{"username":"赵本山","score":92}
{"username":"王五","score":76}
{"username":"赵六","score":62}
{"username":"赵六1","score":62}
{"username":"赵六2","score":26}
{"username":"赵六3","score":89}
{"username":"赵六4","score":77}

create table sql1_6(
    str string
);
load data local inpath '/home/homedata/sql_1/sql1_6.txt' into table sql1_6;
with t1 as (
    select case when get_json_object(str,'$.score') >90 then '优'
            when get_json_object(str,'$.score') >80 then '良'
            when get_json_object(str,'$.score') >60 then '中'
            else '差' end grade
        from sql1_6
)
select grade,count(*) from t1 group by grade;

题目七:支付次数和累计充值金额统计

create table sql1_7(
  user_id     string,
  date_dt     string,
  trans_type  string,
  trans_amt   double,
  trans_dc    string
)row format delimited fields terminated by ',';

load data local inpath '/home/homedata/sql_1/sql1_7.txt' into table sql1_7;

u_001,20191201,支付,20000,支
u_001,20191203,充值,30010,收
u_001,20191203,提现,50000,支
u_001,20191208,支付,20000,支
u_001,20191210,充值,30010,收
u_001,20191220,提现,50000,支
u_002,20191202,支付,20000,支
u_002,20191202,转入,30010,收
u_002,20191230,充值,50000,收
u_003,20200110,支付,60.68,支
u_004,20200111,支付,90.05,支
u_004,20200114,充值,100.1,收
u_005,20200101,还款,30010,支
-- 求每个月用户的支出(trans_dc)次数大于3次,且每月累计充值(trans_type)金额大于10000元的用户交易记录

with t1 as (
    select user_id,substr(date_dt,1,6) dt,trans_dc ,count(*) count from sql1_7 group by user_id,trans_dc,substr(date_dt,1,6)
),
    t2 as ( 
    select user_id,dt from t1 where count >3 and trans_dc ='支'
),
    t3 as (  
    select t2.user_id user_id,dt,trans_amt from t2 left join sql1_7 on t2.user_id = sql1_7.user_id and dt = substr(date_dt,1,6) and trans_type = '充值'
) 
select user_id,dt from t3 group by user_id,dt having sum(trans_amt) >10000;

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值