1.现在有大学考试成绩如下表A:id,subject,score,找出每一科都是这一科前10%的学生的id
1)建表及准备数据
create table tmp
(
nam char(10),
lesson char(20),
mark int
);
INSERT into tmp VALUES('a1','c1',80),('a2','c2',80),('a3','c1',90),('a4','c1',90),('a5','c2',70),
('a6','c3',70),('a7','c1',60),('a8','c2',60),('a9','c2',80),('a3','c2',90),('a3','c3',80);
查询表结果:
2)先求每门课排名前10%的学生
//每门课排名前10%的学生
(select nam,lesson,mark from
(select *,ntile(5) over (partition by lesson order by mark desc) as rn
from tmp) t where rn = 1);
3)求课程总数
//求一共有多少门课程
select count(distinct lesson) from tmp;
//优化
select count(*) from (select * from tmp group by lesson) as t;
4)按照nam分组,求count(*)等于全部课程
select nam from (select nam,lesson,mark from
(select *,ntile(2) over (partition by lesson order by mark desc) as rn
from tmp) t where rn = 1) as temp group by nam
having count(*)=(select count(*) from (select * from tmp group by lesson) as t);
2.现在有大学考试成绩如下表A:id,subject,score,找出每一科都是这一科前30%的学生的id
使用上一题数据
#先计算每门课前30%的人数各有多少
select lesson,round(count(*)*0.3) as num from tmp group by lesson;#t1
#或
select lesson,round(count(*)*0.3,0) as num from tmp group by lesson;#t1
#分别对每门课进行课内排名
select *,row_number() over(partition by lesson order by mark desc) as r from tmp;#t2
#两个表相关联,并求出每门课前30%的所有学生
select t1.lesson,t1.num,t2.nam,t2.mark,t2.r
from (select lesson,round(count(*)*0.3) as num from tmp group by lesson) t1
join (select *,row_number() over(partition by lesson order by mark desc) as r from tmp) t2
on t1.lesson=t2.lesson
where t2.r<=t1.num;#temp
#求一共有多少门课程以及优化
select count(distinct lesson) from tmp;
select count(*) from (select * from tmp group by lesson) as t;
#求出每门课排名都在前30%的学生
select nam from
(select t1.lesson,t1.num,t2.nam,t2.mark,t2.r
from (select lesson,round(count(*)*0.3) as num from tmp group by lesson) t1
join (select *,row_number() over(partition by lesson order by mark desc) as r from tmp) t2
on t1.lesson=t2.lesson
where t2.r<=t1.num) as temp
group by nam having count(*)=(select count(*) from (select * from tmp group by lesson) as t);
3.用rank求排名的
使用第一题数据:
1)题目描述:对每门课程进行排名
select *,rank() over(partition by lesson order by mark desc) as r
from tmp;
select *,dense_rank() over(partition by lesson order by mark desc) as r
from tmp;
2)题目描述求每门课程分数最高的学生
select nam,lesson,mark from
(select *,dense_rank() over(partition by lesson order by mark desc) as r
from tmp) as t
where t.r=1;
3)统计班级课程成绩有三门高于80的学生信息?
#1.求每门课程高于80的所有学生
select * from tmp where mark>=80;#t
#2.按照姓名分组,求每组课程数,输出课程数为3的
select * from (select * from tmp where mark>=80) as t group by nam having count(*)=3;
4.销售表sales_dtl有城市(city),销售经理(cbm),客户(customer),销售金额(sale_amt)三列字段,请用sql取出每个城市中销售金额为本城市前10名的销售经理的信息。
此题类似于求每门课程分数排名前2名的学生的信息。
#每门课程分数排名前2名的学生的信息
#1.求出每门课程分数排名
select *,rank() over(partition by lesson order by mark desc) as r
from tmp;#t
#2.求出前两名信息
select nam,lesson,mark from (select *,rank() over(partition by lesson order by mark desc) as r
from tmp) t where t.r<=2;
5.
1)
create table dws_ae_sir_1d(
store char(4),
cate char(20),
sell_cnt int,
gmv int
);
insert into dws_ae_sir_1d values('A1','女装',20,300),('A2','女装',10,500),('B1','男装',15,400),
('C1','鞋类',40,850),('A3','女装',30,600),('A4','女装',25,400),('B2','男装',25,600),
('C2','鞋类',50,1000),('B3','男装',20,500),('C3','鞋类',45,900);
select * from dws_ae_sir_1d;
#查询出每个行业销量最高的top2商家,并给出按照销售额从高到低给出综合排序(需展示出序号)
#1.按照每个行业内部销售额排序
select *,rank() over(partition by cate order by sell_cnt desc) as r from dws_ae_sir_1d;#t
#2.求出top2
select * from
(select *,rank() over(partition by cate order by sell_cnt desc) as r from dws_ae_sir_1d) t
where t.r<=2;
6.如何将表dept的dname字段的值转化成一个字符串,以逗号分隔(concat_ws()和collect_set()函数的使用)
列转行相关函数:CONCAT(),CONCAT_WS(),COLLECT_SET()
#COLLECT_SET(col):函数只接受基本数据类型,主要作用是将某字段的值进行去重汇总,产生array类型字段。与COLLECT_LIST()区别:COLLECT_LIST()不会去重。
select collect_set(dname) from dept;
#结果:[ACCOUNTING,RESEARCH,SALES,OPERATIONS]
#CONCAT_WS():特殊形式的CONCAT(),第一个参数是剩余参数间的分隔符。
CONCAT_WS(',',collect_set(dname)) from dept;
#结果:ACCOUNTING,RESEARCH,SALES,OPERATIONS
7.如何将表table3的中生成的那个字符串变成原先table1的形式(UDTF中的explode()和lateral view()函数的使用)
1)单个lateral view题目:
#行转列:explode()、lateral view()
select A,B,C from table_1 LATERAL VIEW explode(B) table1 as B;
2)多个lateral view题目:
SELECT myCol1, myCol2 FROM baseTable
LATERAL VIEW explode(col1) myTable1 AS myCol1
LATERAL VIEW explode(col2) myTable2 AS myCol2;
3)复杂方式lateral view()题目:
tb_split表数据如下:
select datenu,des,type from tb_split
lateral view explode(split(des,"//|")) tb1 as des
lateral view explode(split(type,"//|")) tb2 as type
8.表table1(usr_id int,usr_dt string),其中id字段数值相同,找出表中连续活跃的天数/连续发文天数/连续登陆天数,如果不连续,则当前行连续活跃字段设为1,返回查询结果table2(窗口函数)
#数据准备
create table tmp_continous(
user_id char(4),
login_date date
);
INSERT into tmp_continous values('123', '2018-08-02'),('123','2018-08-03'),
('123','2018-08-04'),('456','2018-11-02'),('456','2018-12-09'),('789','2018-01-01'),
('789','2018-04-23'),('789','2018-09-10'),('789','2018-09-11'),('789','2018-09-12');
# 对user_id分组排序
select *,row_number() over(partition by user_id order by login_date) as no from tmp_continous;#t1
# 若连续登陆,则login_date减去no后应该是同一天
# select *,adddate(login_date,-no) as groupInfo from t1;
select *,adddate(login_date,-no) as groupInfo from
(select *,row_number() over(partition by user_id order by login_date) as no from tmp_continous) as t1;#t2
# 按照groupInfo分组
# select *,count(*) as c from t2 group by groupInfo;
select *,count(*) as c from (select *,adddate(login_date,-no) as groupInfo from
(select *,row_number() over(partition by user_id order by login_date) as no from tmp_continous) as t1) t2
group by groupInfo;#t3
# 按照user_id分组,求每个分组最大值即最大连续登陆天数
# select *,max(c) from t3 group by user_id;
select *,max(c) from
(select *,count(*) as c from (select *,adddate(login_date,-no) as groupInfo from
(select *,row_number() over(partition by user_id order by login_date) as no from tmp_continous) as t1) t2
group by groupInfo) t3
group by user_id;
9.SQL题 给一个简单日志表,包含userid ,activateddate,每个用户每天最多只有一条数据,输出连续3天登陆快手的userid
前面的步骤与上一题相同,倒数第二步改为:
select *,count(*) as c from (select *,adddate(login_date,-no) as groupInfo from
(select *,row_number() over(partition by user_id order by login_date) as no from tmp_continous) as t1) t2
group by groupInfo having c=3;#t3
10.第一天登录的,第二天也登陆的user_id
上一题小小改动
select *,count(*) as c from (select *,adddate(login_date,-no) as groupInfo from
(select *,row_number() over(partition by user_id order by login_date) as no from tmp_continous) as t1) t2
group by groupInfo having c>=2;#t3