student.uid student.xi student.fuid
1 1 1
1 1 3
2 1 10
2 2 11
1 1 4
1 2 5
1 2 6
2 1 7
2 1 8
2 1 9
1 1 2
2 2 12
2 2 13
1.group by先以uid分组再以xi分组(group by 的所有非聚合字段都放在group byzhong 而且partition不必这样)
select
uid,
xi,
count(fuid)
from
student
group by uid,xi;
uid xi _c2
1 1 4
1 2 2
2 1 4
2 2 3
2.分组排序(分组与排序互不影响)
select
uid,
xi,
count(fuid) as mycount
from
student
group by uid,xi order by mycount;
OK
uid xi mycount
1 2 2
2 2 3
2 1 4
1 1 4
select
uid,
xi,
count(fuid) as mycount
from
student
group by uid,xi order by xi desc;
uid xi mycount
2 2 3
1 2 2
2 1 4
1 1 4
group by 与order by 互不影响
3.partition by order by 的使用
select
uid,
xi,
count(fuid) over(partition by uid,xi order by xi desc) as myrow
from
student;
uid xi myrow
1 2 2
1 2 2
1 1 4
1 1 4
1 1 4
1 1 4
2 2 3
2 2 3
2 2 3
2 1 4
2 1 4
2 1 4
2 1 4
分完区后数据的行数不会减少,只是为每一行增加了一个属相,order by 后面不能加limit
4.partition by 不与order 不与 结合也可以使用
select
uid,
xi,
count(fuid) over(partition by uid,xi) as myrow
from
student;
5.分区计数后各个分区去掉重复值
select
*
from
(
select
uid,
xi,
mycon,
row_number() over(partition by uid,xi order by mycon desc) as myrow
from
(
select
uid,
xi,
count(fuid) over(partition by uid,xi) as mycon
from
student
) t1
) t2 where myrow<=1;
OK
t2.uid t2.xi t2.mycon t2.myrow
1 1 4 1
1 2 2 1
2 1 4 1
2 2 3 1
6.partition分区计数后 相同记录标号一样两条2下来是3
select
uid,
xi,
mycon,
dense_rank() over(partition by uid,xi order by mycon desc) as myrow
from
(
select
uid,
xi,
count(fuid) over(partition by uid,xi) as mycon
from
student
) t1;
7.group by 与cluster by结合局部排序
select
uid,
xi,
count(fuid) as mycount
from
student
group by uid,xi distribute by xi desc sort by xi desc;
8.分组后计数,去重标号后取前几名
select
*
from
(
select
uid,
xi,
mycon,
row_number() over(partition by uid,xi order by mycon desc) as myrow
from
(
select
uid,
xi,
count(fuid) over(partition by uid,xi) as mycon
from
student
) t1
) t2 where myrow<=1;