文章目录
1、数据说明
(1)数据格式
a,01,150
a,01,200
b,01,1000
b,01,800
c,01,250
c,01,220
b,01,6000
a,02,2000
a,02,3000
b,02,1000
b,02,1500
c,02,350
c,02,280
a,03,350
a,03,250
(2)字段含义
店铺,月份,金额
2、数据准备
(1)创建数据库表t_store
use class;
create table t_store(
name string,
months int,
money int
)
row format delimited fields terminated by ",";
(2)导入数据
load data local inpath "/home/hadoop/store.txt" into table t_store;
3、需求
编写Hive的HQL语句求出每个店铺的当月销售额和累计到当月的总销售额
4、解析
(1)按照商店名称和月份进行分组统计
create table tmp_store1 as
select name,months,sum(money) as money from t_store group by name,months;
select * from tmp_store1;
(2)对tmp_store1 表里面的数据进行自连接
create table tmp_store2 as
select a.name aname,a.months amonths,a.money amoney,b.name bname,b.months bmonths,b.money bmoney from tmp_store1 a
join tmp_store1 b on a.name=b.name order by aname,amonths;
select * from tmp_store2;
(3)比较统计
select aname,amonths,amoney,sum(bmoney) as total from tmp_store2 where amonths >= bmonths group by aname,amonths,amoney;
5、另一种方式(窗口函数)
select
name,months,t1.m_money,
sum(t1.m_money) over(partition by name order by months rows between unbounded preceding and current row) as sum_money
from
(select name,months,sum(money) as m_money from t_store group by name,months) as t1;
我们可以看到,最终得到的结果是一样的:
+-------+---------+-------------+------------+--+
| name | months | t1.m_money | sum_money |
+-------+---------+-------------+------------+--+
| a | 1 | 350 | 350 |
| a | 2 | 5000 | 5350 |
| a | 3 | 600 | 5950 |
| b | 1 | 7800 | 7800 |
| b | 2 | 2500 | 10300 |
| c | 1 | 470 | 470 |
| c | 2 | 630 | 1100 |
+-------+---------+-------------+------------+--+