这几年面试数仓,一般肯定会问累计报表sql,所以总结了以下几个方法,作为练习及准备
建表导数据
create table t_access_times(username string,month string,salary int)
row format delimited fields terminated by ',';
load data local inpath '/home/dmp_chenl/t_access_times.dat' into table t_access_times;
A,2015-01,5
A,2015-01,15
B,2015-01,5
A,2015-01,8
B,2015-01,25
A,2015-01,5
A,2015-02,4
A,2015-02,6
B,2015-02,10
B,2015-02,5
一、自连接方法
1、第一步,先求个用户的月总金额
select username,month,sum(salary) as salary from t_access_times group by username,month
+-----------+----------+---------+--+
| username | month | salary |
+-----------+----------+---------+--+
| A | 2015-01 | 33 |
| A | 2015-02 | 10 |
| B | 2015-01 | 30 |
| B | 2015-02 | 15 |
+-----------+----------+---------+--+
2、第二步,将月总金额表 自己连接 自己连接
+-------------+----------+-----------+-------------+----------+-----------+--+
| a.username | a.month | a.salary | b.username | b.month | b.salary |
+-------------+----------+-----------+-------------+----------+-----------+--+
| A | 2015-01 | 33 | A | 2015-01 | 33 |
| A | 2015-01 | 33 | A | 2015-02 | 10 |
| A | 2015-02 | 10 | A | 2015-01 | 33 |
| A | 2015-02 | 10 | A | 2015-02 | 10 |
| B | 2015-01 | 30 | B | 2015-01 | 30 |
| B | 2015-01 | 30 | B | 2015-02 | 15 |
| B | 2015-02 | 15 | B | 2015-01 | 30 |
| B | 2015-02 | 15 | B | 2015-02 | 15 |
+-------------+----------+-----------+-------------+----------+-----------+--+
3、第三步,从上一步的结果中
进行分组查询,分组的字段是a.username a.month
求月累计值: 将b.month <= a.month的所有b.salary求和即可
select A.username,A.month,max(A.salary) as salary,sum(B.salary) as accumulate
from
(select username,month,sum(salary) as salary from t_access_times group by username,month) A
inner join
(select username,month,sum(salary) as salary from t_access_times group by username,month) B
on
A.username=B.username
where B.month <= A.month
group by A.username,A.month
order by A.username,A.month;
A 2015-01 33 33
A 2015-02 10 43
B 2015-01 30 30
B 2015-02 15 45
二、表关联方法
1、第一步,先求个用户的月总金额
select username,month,sum(salary) as salary from t_access_times group by username,month;
+-----------+----------+---------+--+
| username | month | salary |
+-----------+----------+---------+--+
| A | 2015-01 | 33 |
| A | 2015-02 | 10 |
| B | 2015-01 | 30 |
| B | 2015-02 | 15 |
+-----------+----------+---------+--+
2、第二步,利用表关联求和
select username,month,sum(b.salary)
from t_access_times b,
(select username,month,sum(salary) as salary from t_access_times group by username,month order by username,month)a
where b.month <= a.month and a.username=b.username
group by a.username,a.month;
A 2015-01 33
A 2015-02 43
B 2015-01 30
B 2015-02 45
3、第三步,从上一步的结果与第一步的结果相关联即可
三、分析函数方法
1、第一步,先求个用户的月总金额
select username,month,sum(salary) as salary from t_access_times group by username,month;
+-----------+----------+---------+--+
| username | month | salary |
+-----------+----------+---------+--+
| A | 2015-01 | 33 |
| A | 2015-02 | 10 |
| B | 2015-01 | 30 |
| B | 2015-02 | 15 |
+-----------+----------+---------+--+
2、第二步,利用分析函数求和
select username,month,salary,sum(salary) over(partition by username order by month) salary_sum from
(select username,month,sum(salary) as salary from t_access_times group by username,month) temp;
select username,month,salary,sum(salary) over(partition by username order by month) salary_sum,
max(salary) over(partition by username order by month) salary_sum from
(select username,month,sum(salary) as salary from t_access_times group by username,month) temp;