表结构:uid,subject_id,score
求:找出所有科目成绩都大于某一学科平均成绩的学生
数据集如下
1001 01 90
1001 02 90
1001 03 90
1002 01 85
1002 02 85
1002 03 70
1003 01 70
1003 02 70
1003 03 85
1)建表语句
create table score(
uid string,
subject_id string,
score int)
row format delimited fields terminated by '\t';
2)求出每个学科平均成绩
select uid,
subject_id,
score,
avg(score) over (distribute by subject_id) avg_score
from score
group by uid, subject_id, score;
3)根据是否大于平均成绩记录flag,大于则记为0否则记为1
with a as (
select uid,
subject_id,
score,
avg(score) over (distribute by subject_id) avg_score
from score
group by uid, subject_id, score
)
select uid,
if(score > avg_score, 0, 1) flag
from a;
4)根据学生id进行分组统计flag的和,和为0则是所有学科都大于平均成绩
with a as (
select uid,
subject_id,
score,
avg(score) over (distribute by subject_id) avg_score
from score
group by uid, subject_id, score
),
b as (select uid,
if(score > avg_score, 0, 1) flag
from a)
select uid
from b
group by uid
having sum(b.flag) = 0;
换种写法 如下,结果是一样的
select uid
from (select uid,
if(score > avg_score, 0, 1) flag
from (select uid,
score,
avg(score) over (partition by subject_id) avg_score
from score) t1) t2
group by uid
having sum(flag) = 0;
运行结果如下
+----+
|uid |
+----+
|1001|
+----+