hive的级联求和
首先创建源数据表:
create table t_salary_detail(username string,month string,salary int)
row format delimited fields terminated by ',';
load data local inpath '/export/servers/hivedatas/accumulate/t_salary_detail.dat' into table t_salary_detail;
源数据如下:
需求:统计每个用户每个月总共获得多少小费
select t.month,t.username,sum(salary) as salSum
from t_salary_detail t
group by t.username,t.month;
需求:统计每个用户累计小费
第一步:求每个用户的每个月的小费总和
select t.month,t.username,sum(salary) as salSum
from t_salary_detail t
group by t.username,t.month;
第二步:使用inner join 实现自己连接自己
select
A.* ,B.*
from
(select t.month,t.username,sum(salary) as salSum
from t_salary_detail t
group by t.username,t.month) A
inner join
(select t.month,t.username,sum(salary) as salSum
from t_salary_detail t
group by t.username,t.month) B
on A.username = B.username;
±---------±------------±----------±---------±------------±----------±-+
| a.month | a.username | a.salsum | b.month | b.username | b.salsum |
±---------±------------±----------±---------±------------±----------±-+
取这一个作为一组
| 2015-01 | A | 33 | 2015-01 | A | 33 |33
| 2015-01 | A | 33 | 2015-02 | A | 10 |
| 2015-01 | A | 33 | 2015-03 | A | 16 |
取这两个作为一组
| 2015-02 | A | 10 | 2015-01 | A | 33 |
| 2015-02 | A | 10 | 2015-02 | A | 10 |43
| 2015-02 | A | 10 | 2015-03 | A | 16 |
取这三个作为一组
| 2015-03 | A | 16 | 2015-01 | A | 33 |
| 2015-03 | A | 16 | 2015-02 | A | 10 |
| 2015-03 | A | 16 | 2015-03 | A | 16 |59
| 2015-01 | B | 30 | 2015-01 | B | 30 |
| 2015-01 | B | 30 | 2015-02 | B | 15 |
| 2015-01 | B | 30 | 2015-03 | B | 17 |
| 2015-02 | B | 15 | 2015-01 | B | 30 |
| 2015-02 | B | 15 | 2015-02 | B | 15 |
| 2015-02 | B | 15 | 2015-03 | B | 17 |
| 2015-03 | B | 17 | 2015-01 | B | 30 |
| 2015-03 | B | 17 | 2015-02 | B | 15 |
| 2015-03 | B | 17 | 2015-03 | B | 17 |
±---------±------------±----------±---------±------------±----------±-+
加参数继续变形
select
A.* ,B.*
from
(select t.month,t.username,sum(salary) as salSum
from t_salary_detail t
group by t.username,t.month) A
inner join
(select t.month,t.username,sum(salary) as salSum
from t_salary_detail t
group by t.username,t.month) B
on A.username = B.username
where B.month <= A.month;