目录
- 1/原生数据 t_access_times.dat:
- 2/创建表
- 3/导入数据
- 4/按照username、month分组
- 5/自己 inner join 自己
- 6/按照用户、月份,联级求和salary
1/原生数据 t_access_times.dat:
A,2015-01,5
A,2015-01,15
B,2015-01,5
A,2015-01,8
B,2015-01,25
A,2015-01,5
A,2015-02,4
A,2015-02,6
B,2015-02,10
B,2015-02,5
2/创建表
create table t_access_times(username string, month string, salary int)
row format delimited fields terminated by ',';
+-----------+------------+----------+--+
| col_name | data_type | comment |
+-----------+------------+----------+--+
| username | string | |
| month | string | |
| salary | int | |
+-----------+------------+----------+--+
3/导入数据
load data local inpath '/home/hadoop/hivedata/t_access_times.dat' into table t_access_times;
4/按照username、month分组
select username, month, sum(salary) as salary from t_access_times group by username,month;
+-----------+----------+---------+--+
| username | month | salary |
+-----------+----------+---------+--+
| A | 2015-01 | 33 |
| A | 2015-02 | 10 |
| B | 2015-01 | 30 |
| B | 2015-02 | 15 |
+-----------+----------+---------+--+
5/自己 inner join 自己
select A.*, B.* from
(select username, month, sum(salary) as salary from t_access_times group by username,month) A
inner join
(select username, month, sum(salary) as salary from t_access_times group by username,month) B
on
A.username = B.username;
+-------------+----------+-----------+-------------+----------+-----------+--+
| a.username | a.month | a.salary | b.username | b.month | b.salary |
+-------------+----------+-----------+-------------+----------+-----------+--+
| A | 2015-01 | 33 | A | 2015-01 | 33 |
| A | 2015-01 | 33 | A | 2015-02 | 10 |
| A | 2015-02 | 10 | A | 2015-01 | 33 |
| A | 2015-02 | 10 | A | 2015-02 | 10 |
| B | 2015-01 | 30 | B | 2015-01 | 30 |
| B | 2015-01 | 30 | B | 2015-02 | 15 |
| B | 2015-02 | 15 | B | 2015-01 | 30 |
| B | 2015-02 | 15 | B | 2015-02 | 15 |
+-------------+----------+-----------+-------------+----------+-----------+--+
6/按照用户、月份,联机求salary
select A.username, A.month, max(A.salary) as salary, sum(B.salary) as accumulate
from
(select username,month,sum(salary) as salary from t_access_times group by username,month) A
inner join
(select username,month,sum(salary) as salary from t_access_times group by username,month) B
on
A.username=B.username
where B.month <= A.month
group by A.username,A.month
order by A.username,A.month;
+-------------+----------+---------+-------------+--+
| a.username | a.month | salary | accumulate |
+-------------+----------+---------+-------------+--+
| A | 2015-01 | 33 | 33 |
| A | 2015-02 | 10 | 43 |
| B | 2015-01 | 30 | 30 |
| B | 2015-02 | 15 | 45 |
+-------------+----------+---------+-------------+--+