1.SQL执行顺序
SQL语句的执行顺序是: from (去加载table1 和 table2这2个表 ) -> join -> on -> where -> group by->select 后面的聚合函数count,sum -> having -> distinct -> order by -> limit
1.1 访问量统计
1.2 电商场景TopK统计
1.3 订单量统计
1.4 大数据拍讯统计
1.5 活跃用户统计
1.6 电商购买金额统计实战
1.7 教育领域SQL实战
1.8 服务器日志统计
1.9 充值日志SQL实战
1.10 电商分组TopK统计
2.行转列
行转列的常规做法是:group by +sum(if()) 或者 count(if())
2.1 华泰证劵1
已知
year | month | amount |
---|---|---|
1991 | 1 | 1.1 |
1991 | 2 | 1.2 |
1991 | 3 | 1.3 |
1991 | 4 | 1.4 |
1992 | 1 | 2.1 |
1992 | 2 | 2.2 |
1992 | 3 | 2.3 |
1992 | 4 | 2.4 |
查成这样一个结果
year | m1 | m2 | m3 | m4 |
---|---|---|---|---|
1991 | 1.1 | 1.2 | 1.3 | 1.4 |
1992 | 2.1 | 2.2 | 2.3 | 2.4 |
-- 创建表格
create table table2(year int,month int ,amount double) ;
insert into test_sql.table2 values
(1991,1,1.1),
(1991,2,1.2),
(1991,3,1.3),
(1991,4,1.4),
(1992,1,2.1),
(1992,2,2.2),
(1992,3,2.3),
(1992,4,2.4);
select * from table2;
-- 方法一
select year,
sum(case when month=1 then amount else 0 end) as m1,
sum(case when month=2 then amount else 0 end) as m2,
sum(case when month=2 then amount else 0 end) as m3,
sum(case when month=4 then amount else 0 end) as m4
from table2
group by year order by year
-- 方法二select year,
sum(if(month=1,amount,0)) as m1,
sum(if(month=2,amount,0)) as m2,
sum(if(month=3,amount,0))as m3,
sum(if(month=4,amount,0)) as m4
from table2
group by year order by year
2.2 华泰证券2
create table student(sid int, sname varchar(10), gender char(1), class_id int);
insert into student
values (1, '张三', '女', 1),
(2, '李四', '女', 1),
(3, '王五', '男', 2);
select * from student;
create table course (cid int, cname varchar(10), teacher_id int);
insert into course
values (1, '生物', 1),
(2, '体育', 1),
(3, '物理', 2);
select * from course;
create table score (sid int, student_id int, course_id int, number int);
insert score
values (1, 1, 1, 58),
(4, 1, 2, 50),
(2, 1, 2, 68),
(3, 2, 2, 89);
select * from score;
# 查询课程编号“2”的成绩比课程编号“1”低的所有同学的学号、姓名。
-- 方法一
select ac.sid,ac.sname from
student as ac
join (
select student_id,
sum(if(course_id=2,number,0)) as pe,
sum(if(course_id=1,number,0)) as bio
from score
group by student_id
)as aa
on ac.sid=aa.student_id
where aa.pe<bio
-- 方法二
select ac.sid,ac.sname from
student as ac
join (
select student_id,
sum(case when course_id=2 then number else 0 end) as pe,
sum(case when course_id=1 then number else 0 end) as bio
from score
group by student_id
)as aa
on ac.sid=aa.student_id
where aa.pe<bio
-- 方法三
with temp as (
select *from (
select student_id,
sum(case when course_id=2 then number else 0 end) as pe,
sum(case when course_id=1 then number else 0 end) as bio
from score
group by student_id
)as aa
where aa.pe<bio)
select ac.sid,ac.sname
from student as ac
join temp as ad
on ac.sid =ad.student_id
2.3 腾讯游戏
表table如下:
DDate | shengfu |
---|---|
2015-05-09 | 胜 |
2015-05-09 | 胜 |
2015-05-09 | 负 |
2015-05-09 | 负 |
2015-05-10 | 胜 |
2015-05-10 | 负 |
2015-05-10 | 负 |
如果要生成下列结果, 该如何写sql语句?
DDate | 胜 | 负 |
---|---|---|
2015-05-09 | 2 | 2 |
2015-05-10 | 1 | 2 |
create table table1(DDate char(10), shengfu char(1)) ;
insert into table1 values ('2015-05-09', "胜"),
('2015-05-09', "胜"),
('2015-05-09', "负"),
('2015-05-09', "负"),
('2015-05-10', "胜"),
('2015-05-10', "负"),
('2015-05-10', "负");
select * from table1
select DDate,
sum(case when shengfu="胜" then 1 else 0 end) as m1,
sum(case when shengfu="负" then 1 else 0 end) as m2
from test_sql.table1
group by DDate order by DDate
select DDate,
sum(if(shengfu="胜",1,0)) as m1,
sum(if(shengfu="负",1,0)) as m2
from test_sql.table1
group by DDate order by DDate
2.4.腾讯QQ
表5
qq号(字段名:qq) | 游戏(字段名:game) |
---|---|
10000 | a |
10000 | b |
10000 | c |
20000 | c |
20000 | d |
表6
qq号(字段名:qq) | 游戏(字段名:game) |
---|---|
10000 | a_b_c |
20000 | c_d |
create table tableA(qq char(5), game char(5))
insert into tableA values
(10000, 'a'),
(10000, 'b'),
(10000, 'c'),
(20000, 'c'),
(20000, 'd');
create table tableB(qq char(5), game char(5)) ;
insert into tableB values
(10000, 'a_b_c'),
(20000, 'c_d');
# a, 将tableA输出为tableB的格式; 【行转列】
select * from tableA
select qq,replace(GROUP_CONCAT(game),',','_')game
from tableA
group by qq
order by qq
# --将tableB输出为tableA的格式;
select *,SUBSTRING_INDEX(game, '_') from tableB
SELECT SUBSTRING_INDEX(SUBSTRING_INDEX(game, '_', n.digit), '_', -1) AS game_split
FROM tableB
JOIN (
SELECT 0 AS digit UNION ALL SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3
) n ON CHAR_LENGTH(game) - CHAR_LENGTH(REPLACE(game, '_', '')) >= n.digit
3.连续N天登录
3.1 OPPO
以下为用户登陆游戏的日期,用一条sQL语句查询出连续三天登录的人员姓名
name | date |
---|---|
张三 | 2021-01-01 |
张三 | 2021-01-02 |
张三 | 2021-01-03 |
张三 | 2021-01-02 |
李四 | 2021-01-01 |
李四 | 2021-01-02 |
王五 | 2021-01-03 |
王五 | 2021-01-02 |
王五 | 2021-01-02 |
create table game(name char(2), `date` char(10));
insert into game values
('张三','2021-01-01'),
('张三','2021-01-02'),
('张三','2021-01-03'),
('张三','2021-01-02'),
('张三','2021-01-07'),
('张三','2021-01-08'),
('张三','2021-01-09'),
('李四','2021-01-01'),
('李四','2021-01-02'),
('王五','2021-01-03'),
('王五','2021-01-02'),
('王五','2021-01-02');
('胡超','2021-01-01');
('胡超','2021-01-03');
#方法一
SELECT distinct name
FROM (
select c.name,c.date,LEAD(c.date,2) over (PARTITION BY c.name ORDER BY c.date) AS RN
from (select distinct * from game) as c
)AS A
where datediff(rn,date)=2
# 方法二,和一是一个逻辑思想
with temp as (select distinct name,date from game),
temp1 as (select *,date_Add(date,interval 2 day) as dt,lead(date,2) over (partition by name order by date) rn from temp)
select name from temp1 where dt =rn
3.2 脉脉
在过去一个月内,曾连续两天活跃的用户
create table dau(d char(10), uid int, module varchar(10), active_duration int);
insert into dau
values ('2020-01-01', 1, 'jobs', 324),
('2020-01-01', 2, 'feeds', 445),
('2020-01-01', 3, 'im', 345),
('2020-01-02', 2, 'network', 765),
('2020-01-02', 3, 'jobs', 342);
select *from dau;
# 方法一
with
temp as (select distinct uid,d from dau ),
temm as (select * ,lead(d,1) over (partition by uid order by d) rn from temp)
select distinct uid
from temm where datediff(rn,d)=1
# 方法二
with
temp as (select distinct uid,d from dau ),
temm as (select * ,date_sub(d,interval 1 day) from temp)
select distinct uid
from temm group by uid having count(*)>=2
# 方法三
with t1 as ( select distinct uid,d from dau),
t2 as ( select *,row_number() over (partition by uid order by d) rn from t1),
t3 as (select *,date_sub(d,interval rn day)ss from t2)
select distinct uid
from t3 group by uid,ss having count(*)>=2
3.3 广州银行
有一张表C_T(列举了部分数据)表示持卡人消费记录,表结构如下:
CARD NER | VARCHAR2 | 卡号, |
---|---|---|
C_MONTH | NUMBER | 消费月份, |
C_DATE | DATE | 消费日期, |
C_TYPEVAR | CHAR2 | 消费类型 |
C_ATM | NUMBER | 消费金额 |
每个月每张卡连续消费的最大天数(如卡在当月只有一次消费则为1)。
连续消费天数:指一楼时间内连续每天都有消费,同一天有多笔消费算一天消费,不能跨月份统计。
create table c_t
(
card_nbr char(1),
c_month char(7),
c_date char(10),
c_type char(2),
c_atm decimal
);
insert into c_t values
(1,'2022-01','2022-01-01','网购',100),
(1,'2022-01','2022-01-02','网购',200),
(1,'2022-01','2022-01-03','网购',300),
(1,'2022-01','2022-01-15','网购',100),
(1,'2022-01','2022-01-16','网购',200),
(2,'2022-01','2022-01-06','网购',500),
(2,'2022-01','2022-01-07','网购',800),
(1,'2022-02','2022-02-01','网购',100),
(1,'2022-02','2022-02-02','网购',200),
(1,'2022-02','2022-02-03','网购',300),
(2,'2022-02','2022-02-06','网购',500),
(2,'2022-02','2022-02-07','网购',800);
select * from c_t
with t1 as (select distinct card_nbr,c_month,c_date from c_t),
t2 as (select *,row_number() over (partition by card_nbr,c_month order by c_date) rn from t1 ),
t3 as (select *,date_sub(c_date, interval rn day) dt2 from t2 ),
t4 as (select card_nbr,c_month ,dt2 ,count(*) cnt from t3 group by card_nbr,c_month ,dt2),
t5 as ( select *,row_number() over (partition by card_nbr,c_month order by cnt desc) as rn from t4)
select card_nbr,c_month,cnt from t5 where rn=1
4.N日留存率
4.1 腾讯视频号游戏直播
表:tableA
ds(日期) | device | user_id | is_active |
---|---|---|---|
2020-03-01 | ios | 0001 | 0 |
2020-03-01 | ios | 0002 | 1 |
2020-03-01 | android | 0003 | 1 |
2020-03-02 | ios | 0001 | 0 |
2020-03-02 | ios | 0002 | 0 |
2020-03-02 | android | 0003 | 1 |
20200301的ios设备用户活跃的次日留存率是多少?
create table happy (ds char(10) comment '(日期)' ,device char(10),user_id char(10),is_active int) ; insert into happy values ('2020-03-01','ios','0001',0), ('2020-03-01','ios','0002',1), ('2020-03-01','ios','0004',1), ('2020-03-01','android','0003',1), ('2020-03-02','ios','0001',0), ('2020-03-02','ios','0002',0), ('2020-03-02','android','0003',1), ('2020-03-02','ios','0005',1) , ('2020-03-02','ios','0004',1) ; select * from happy # 方法一 with temp as( select user_id, count(if(ds = '2020-03-01',1,null))cnt1, count(if(ds = '2020-03-02' and is_active =1 ,1,null)) cnt2 from happy where device = 'IOS' AND ((ds='2020-03-01' and is_active=1)or ds = '2020-03-02') group by user_id having cnt1>0) select count(cnt1) sum1, count(if(cnt2 > 0, user_id, null)) sum2, count(if(cnt2 > 0, user_id, null)) / count(cnt1) rate from temp; # 方法二 with temp as( select user_id, count(if(ds = '2020-03-01',1,null))cnt1, count(if(ds = '2020-03-02' and is_active =1 ,1,null)) cnt2 from happy where device = 'IOS' AND ((ds='2020-03-01' and is_active=1)or ds = '2020-03-02') group by user_id having cnt1>0) select sum(cnt1) sum1, sum(cnt2) sum2, sum(cnt2) / sum(cnt1) rate from temp;
4.2 百度
写出用户表 tb_cuid_1d的 20200401 的次日、次7日留存的具体SQL : 一条sql统计出以下指标 (4.1号uv,4.1号在4.2号的留存uv,4.1号在4.8号的留存uv);
create table tb_cuid_1d
(
cuid char(1) comment '用户的唯一标识',
os char(10) comment '平台',
soft_version char(1) comment '版本',
event_day char(10) comment '日期',
visit_time int comment '用户访问时间戳',
duration decimal comment '用户访问时长'
);
insert into tb_cuid_1d values
(1,'android',1,'2020-04-01',1234567,100),
(1,'android',1,'2020-04-02',1234567,100),
(1,'android',1,'2020-04-08',1234567,100),
(2,'android',1,'2020-04-01',1234567,100),
(3,'android',1,'2020-04-02',1234567,100);
select * from tb_cuid_1d
# 方法一
select count(t1.cuid) as '4.1号uv',
count(t2.cuid) as '4.1号在4.2号的留存uv',
count(t3.cuid) as '4.1号在4.8号的留存uv'
from
(select distinct cuid,event_day from tb_cuid_1d where event_day = '2020-04-01' )as t1
left join (select distinct cuid,event_day from tb_cuid_1d where event_day = '2020-04-02' )as t2
on t1.cuid=t2.cuid
left join (select distinct cuid,event_day from tb_cuid_1d where event_day = '2020-04-08' )as t3
on t1.cuid=t3.cuid
# 方法二
with temp as(
select cuid,
count(if(event_day = '2020-04-01',1,null)) as uv1,
count(if(event_day = '2020-04-02',1,null)) as uv2,
count(if(event_day = '2020-04-08',1,null)) as uv8
from tb_cuid_1d
where event_day in ('2020-04-01','2020-04-02','2020-04-08')
group by cuid
having uv1>0
)
select sum(uv1)as u1,
sum(uv2)as u2,
sum(uv2)/sum(uv1) as 'u2/u1',
sum(uv8)as u8,
sum(uv8)/sum(uv1) as 'u8/u1'
from temp
5.分组内top前几
5.1 跨越物流
员工表结构
员工表数据
题目描述
求出每个部门工资最高的前三名员工,并计算这些员工的工资占所属部门总工资的百分比。
结果
create table emp(empno char(5) ,ename char(6),hiredate char(10),sal int ,deptno char(2));
insert into emp values
('7521', 'WARD', '1981-2-22', 1250, 30),
('7566', 'JONES', '1981-4-2', 2975, 20),
('7876', 'ADAMS', '1987-7-13', 1100, 20),
('7369', 'SMITH', '1980-12-17', 800, 20),
('7934', 'MILLER', '1982-1-23', 1300, 10),
('7844', 'TURNER', '1981-9-8', 1500, 30),
('7782', 'CLARK', '1981-6-9', 2450, 10),
('7839', 'KING', '1981-11-17', 5000, 10),
('7902', 'FORD', '1981-12-3', 3000, 20),
('7499', 'ALLEN', '1981-2-20', 1600, 30),
('7654', 'MARTIN', '1981-9-28', 1250, 30),
('7900', 'JAMES', '1981-12-3', 950, 30),
('7788', 'SCOTT', '1987-7-13', 3000, 20),
('7698', 'BLAKE', '1981-5-1', 2850, 30);
select * from emp;
# --求出每个部门工资最高的前三名员工,并计算这些员工的工资占所属部门总工资的百分比。
select empno,ename,hiredate,sal,round(sal/total,2) as rate
from (
select *,row_number() over (partition by deptno order by sal desc) as rn,sum(sal) over(partition by deptno) as total
from emp
) as temp
where temp.rn<4
# 5.2小米电商
create table empp(name char(2) , month char(2), amt int);
insert into empp values ('张三', '01', 100),
('李四', '02', 120),
('王五', '03', 150),
('赵六', '04', 500),
('张三', '05', 400),
('李四', '06', 350),
('王五', '07', 180),
('赵六', '08', 400);
select * from empp
select * ,row_number() over (order by total desc )as rn ,concat(round(totaL/(sum(total) over()) *100,2),'%') as rate
from (
select name,sum(amt) total from empp group by name)as aa
5.2 小米电商
订单表,torder. 字段,user_id, order_id, ctime(10位时间戳),city id,sale_num,sku_id(商品)
问题:20201201至今每日订单量top10的城市及其订单量(订单量对order id去重)(在线写)
create table ordertable (user_id char(3),
order_id char(3),
ctime char(10),
city_id char(5),
sale_num int ,
sku_id varchar(100)) ;
insert into ordertable values
('zs','001','2020-12-01','杭州',2,'鞋子'),
('ls','002','2020-12-01','杭州',1,'衣服'),
('ww','003','2020-12-01','杭州',1,'小米12'),
('zl','004','2020-12-01','杭州',1,'小米11'),
('zs','005','2020-12-01','上海',2,'鞋子'),
('zs','005','2020-12-01','上海',2,'裤子'),
('ls','006','2020-12-01','上海',1,'衣服'),
('ww','007','2020-12-01','上海',1,'小米12'),
('zs','008','2020-12-01','武汉',2,'鞋子'),
('ls','009','2020-12-01','武汉',1,'衣服'),
('zs','010','2020-12-01','长沙',2,'鞋子'),
('zs','011','2020-12-02','上海',2,'鞋子'),
('ls','012','2020-12-02','上海',1,'衣服'),
('ww','013','2020-12-02','上海',1,'小米12'),
('zl','014','2020-12-02','上海',1,'小米11'),
('zs','015','2020-12-02','广州',2,'鞋子'),
('ls','016','2020-12-02','广州',1,'衣服'),
('ww','017','2020-12-02','广州',1,'小米12'),
('zs','018','2020-12-02','武汉',2,'鞋子'),
('ls','019','2020-12-02','武汉',1,'衣服');
select * from ordertable
# 订单表,torder. 字段,user_id, order_id, ctime(10位时间戳),city id,sale_num,sku_id(商品)
# 问题:20201201至今每日订单量top10的城市及其订单量(订单量对order id去重)(在线写)
select order_time,cnt,cnt
from (
select *,row_number() over (partition by order_time,city_id order by cnt desc) as rn
from(
select date(ctime) order_time,city_id,count(distinct order_id) cnt
from ordertable
where date(ctime) between '2020-12-01' and current_date
group by date(ctime),city_id) as aa ) as bb
where bb.rn<=10
6.窗口函数
6.1 交通银行
Emp表的表数据如下:
NAME | MONTH | AMT |
---|---|---|
张三 | 01 | 100 |
李四 | 02 | 120 |
王五 | 03 | 150 |
赵六 | 04 | 500 |
张三 | 05 | 400 |
李四 | 06 | 350 |
王五 | 07 | 180 |
赵六 | 08 | 400 |
问题:请写出可以得到以下的结果SQL
NAME | 总金额 | 排名 | 占比 |
---|---|---|---|
赵六 | 900 | 1 | 40.91% |
张三 | 500 | 2 | 22.73% |
李四 | 470 | 3 | 21.36% |
王五 | 330 | 4 | 15.00% |