Hive 案例

1、需求描述和数据说明

针对销售数据,完成统计:

  1. 按年统计销售额
  2. 销售金额在 10W 以上的订单
  3. 每年销售额的差值
  4. 年度订单金额前10位(年度、订单号、订单金额、排名)
  5. 季度订单金额前10位(年度、季度、订单id、订单金额、排名)
  6. 求所有交易日中订单金额最高的前10位
  7. 每年度销售额最大的交易日
  8. 年度最畅销的商品(即每年销售金额最大的商品)

数据如下
在这里插入图片描述
在这里插入图片描述
在这里插入图片描述

2、建表并导入数据

vim createtable.hql

-- createtable.hql
drop database sale cascade;
create database if not exists sale;
create table sale.dimdate_ori(
dt date,
yearmonth int,
year smallint,
month tinyint,
day tinyint,
week tinyint,
weeks tinyint,
quat tinyint,
tendays tinyint,
halfmonth tinyint
)
row format delimited
fields terminated by ",";

create table sale.sale_ori(
orderid string,
locationid string,
dt date
)
row format delimited
fields terminated by ",";



create table sale.saledetail_ori(
orderid string,
rownum int,
goods string,
num int,
price double,
amount double
)
row format delimited
fields terminated by ",";


create table sale.dimdate(
dt date,
yearmonth int,
year smallint,
month tinyint,
day tinyint,
week tinyint,
weeks tinyint,
quat tinyint,
tendays tinyint,
halfmonth tinyint
) stored as orc;

create table sale.sale(
orderid string,
locationid string,
dt date
) stored as orc;

create table sale.saledetail(
orderid string,
rownum int,
goods string,
num int,
price double,
amount double
)stored as orc;
hive -f createtable.hql

导入数据

use sale;
load data local inpath "/root/bigdata/test_data/hive/sale/tbDate.dat" overwrite into
table dimdate_ori;
load data local inpath "/root/bigdata/test_data/hive/sale/tbSale.dat" overwrite into
table sale_ori;
load data local inpath "/root/bigdata/test_data/hive/sale/tbSaleDetail.dat" overwrite
into table saledetail_ori;
-- 导入数据
insert into table dimdate select * from dimdate_ori;
insert into table sale select * from sale_ori;
insert into table saledetail select * from saledetail_ori;
hive -f loaddata.hql

查看结果

select * from dimdate limit 5;
+-------------+--------------------+---------------+----------------+--------------+---------------+----------------+---------------+------------------+--------------------+
| dimdate.dt  | dimdate.yearmonth  | dimdate.year  | dimdate.month  | dimdate.day  | dimdate.week  | dimdate.weeks  | dimdate.quat  | dimdate.tendays  | dimdate.halfmonth  |
+-------------+--------------------+---------------+----------------+--------------+---------------+----------------+---------------+------------------+--------------------+
| 2003-01-01  | 200301             | 2003          | 1              | 1            | 3             | 1              | 1             | 1                | 1                  |
| 2003-01-02  | 200301             | 2003          | 1              | 2            | 4             | 1              | 1             | 1                | 1                  |
| 2003-01-03  | 200301             | 2003          | 1              | 3            | 5             | 1              | 1             | 1                | 1                  |
| 2003-01-04  | 200301             | 2003          | 1              | 4            | 6             | 1              | 1             | 1                | 1                  |
| 2003-01-05  | 200301             | 2003          | 1              | 5            | 7             | 1              | 1             | 1                | 1                  |
+-------------+--------------------+---------------+----------------+--------------+---------------+----------------+---------------+------------------+--------------------+



select * from sale limit 5;
+---------------+------------------+-------------+
| sale.orderid  | sale.locationid  |   sale.dt   |
+---------------+------------------+-------------+
| BYSL00000893  | ZHAO             | 2007-08-23  |
| BYSL00000897  | ZHAO             | 2007-08-24  |
| BYSL00000898  | ZHAO             | 2007-08-25  |
| BYSL00000899  | ZHAO             | 2007-08-26  |
| BYSL00000900  | ZHAO             | 2007-08-26  |
+---------------+------------------+-------------+


 select * from saledetail limit 5;
+---------------------+--------------------+-------------------+-----------------+-------------------+--------------------+
| saledetail.orderid  | saledetail.rownum  | saledetail.goods  | saledetail.num  | saledetail.price  | saledetail.amount  |
+---------------------+--------------------+-------------------+-----------------+-------------------+--------------------+
| BYSL00000893        | 0                  | FS527258160501    | -1              | 268.0             | -268.0             |
| BYSL00000893        | 1                  | FS527258169701    | 1               | 268.0             | 268.0              |
| BYSL00000893        | 2                  | FS527230163001    | 1               | 198.0             | 198.0              |
| BYSL00000893        | 3                  | 24627209125406    | 1               | 298.0             | 298.0              |
| BYSL00000893        | 4                  | K9527220210202    | 1               | 120.0             | 120.0              |
+---------------------+--------------------+-------------------+-----------------+-------------------+--------------------+


3、 sql 实现

1、按年统计销售额

SELECT year(B.dt) year, 
       round(sum(A.amount)/10000, 2) amount
FROM saledetail A 
join sale B 
on A.orderid=B.orderid
group by year(B.dt);

+-------+----------+
| year  |  amount  |
+-------+----------+
| 2004  | 326.81   |
| 2005  | 1325.76  |
| 2006  | 1368.1   |
| 2007  | 1671.94  |
| 2008  | 1467.43  |
| 2009  | 632.37   |
| 2010  | 21.09    |
| 3274  | 0.17     |
+-------+----------+

2、销售金额在 10W 以上的订单

SELECT orderid, 
       round(sum(amount), 2) amount
FROM saledetail
group by orderid
having sum(amount) > 100000
+----------------+-----------+
|    orderid     |  amount   |
+----------------+-----------+
| HMJSL00009024  | 119084.8  |
| HMJSL00009958  | 159126.0  |
+----------------+-----------+


3、每年销售额的差值

select
 year,
 amount,
 round( amount - lag(amount) over(sort by year),2 )  df
from 
(
SELECT 
   year(B.dt) year,
   sum(A.amount) amount
FROM saledetail A 
join sale B 
on A.orderid=B.orderid
group by year(B.dt)  
) t

+-------+-----------------------+--------------+
| year  |        amount         |      df      |
+-------+-----------------------+--------------+
| 2004  | 3268115.4991999646    | NULL         |
| 2005  | 1.3257564150000084E7  | 9989448.65   |
| 2006  | 1.3680982899999822E7  | 423418.75    |
| 2007  | 1.6719354560000354E7  | 3038371.66   |
| 2008  | 1.4674295299999664E7  | -2045059.26  |
| 2009  | 6323697.1899998775    | -8350598.11  |
| 2010  | 210949.66000000003    | -6112747.53  |
| 3274  | 1703.0                | -209246.66   |
+-------+-----------------------+--------------+

4、年度订单金额前10位(年度、订单号、订单金额、排名)

with tmp as (
  select  
     year(a.dt) year,
     a.orderid,
     sum(b.amount) as amounts
  from sale a 
  left join saledetail b
  on a.orderid = b.orderid
  group by  year(a.dt),a.orderid
),

tmp2 as (
 select 
     year,
     orderid,
     amounts,
     dense_rank()  over(partition by year order by amounts desc ) as rn 
 from tmp
)
select year,orderid,amounts,rn from tmp2 where rn <=10
+-------+----------------+---------------------+-----+
| year  |    orderid     |       amounts       | rn  |
+-------+----------------+---------------------+-----+
| 2004  | HMJSL00001557  | 23656.79999999997   | 1   |
| 2004  | HMJSL00001556  | 22010.599999999984  | 2   |
| 2004  | HMJSL00001349  | 17147.2             | 3   |
| 2004  | HMJSL00001531  | 16605.6             | 4   |
| 2004  | HMJSL00001567  | 15651.0             | 5   |
| 2004  | HMJSL00001562  | 15429.999999999989  | 6   |
| 2004  | HMJSL00000706  | 15266.0             | 7   |
| 2004  | HMJSL00000656  | 15021.319999999998  | 8   |
| 2004  | HMJSL00001568  | 14444.0             | 9   |
| 2004  | HMJSL00000963  | 14268.800000000001  | 10  |
| 2005  | HMJSL00003263  | 38186.399999999994  | 1   |
| 2005  | HMJSL00005422  | 29442.999999999964  | 2   |
| 2005  | HMJSL00002046  | 27612.399999999983  | 3   |
| 2005  | HMJSL00005147  | 24473.399999999998  | 4   |
| 2005  | HMJSL00004467  | 22908.479999999996  | 5   |
| 2005  | HMJSL00004201  | 22700.159999999996  | 6   |
| 2005  | HMJSL00004084  | 22618.0             | 7   |
| 2005  | HMJSL00004491  | 22570.799999999996  | 8   |
| 2005  | HMJSL00004464  | 22439.76            | 9   |
| 2005  | HMJSL00004547  | 22271.76            | 10  |
| 2006  | HMJSL00006452  | 36124.0             | 1   |
| 2006  | HMJSL00005769  | 31269.799999999977  | 2   |
| 2006  | HMJSL00008345  | 29011.79999999998   | 3   |
| 2006  | HMJSL00006759  | 28610.0             | 4   |
| 2006  | GCSL00000781   | 27158.999999999975  | 5   |
| 2006  | HMJSL00005760  | 26736.49999999998   | 6   |
| 2006  | HMJSL00006758  | 24652.0             | 7   |
| 2006  | HMJSL00006763  | 24368.0             | 8   |
| 2006  | YZSL00000563   | 23696.65000000001   | 9   |
| 2006  | HMJSL00006757  | 23656.0             | 10  |
| 2007  | HMJSL00009958  | 159126.0            | 1   |
| 2007  | HMJSL00009024  | 119084.80000000008  | 2   |
| 2007  | HMJSL00009957  | 52422.0             | 3   |
| 2007  | HMJSL00010216  | 49800.0             | 4   |
| 2007  | HMJSL00009956  | 43018.0             | 5   |
| 2007  | HMJSL00010339  | 42157.59999999999   | 6   |
| 2007  | HMJSL00008593  | 41902.400000000016  | 7   |
| 2007  | HMJSL00010137  | 40339.999999999985  | 8   |
| 2007  | TYSL00001043   | 37940.0             | 9   |
| 2007  | HMJSL00008408  | 37438.2             | 10  |
| 2008  | HMJSL00010598  | 55828.0             | 1   |
| 2008  | YZSL00001404   | 35794.2             | 2   |
| 2008  | YZSL00001405   | 34713.4             | 3   |
| 2008  | GCSL00001414   | 29843.800000000003  | 4   |
| 2008  | YZSL00001235   | 27295.799999999996  | 5   |
| 2008  | RMSL00012270   | 24210.0             | 6   |
| 2008  | YZSL00001246   | 23838.2             | 7   |
| 2008  | GCSL00001255   | 23284.199999999993  | 8   |
| 2008  | SSSL00013486   | 22544.199999999997  | 9   |
| 2008  | YZSL00001283   | 22502.8             | 10  |
| 2009  | TSSL00016101   | 25813.200000000008  | 1   |
| 2009  | TSSL00016110   | 23628.200000000004  | 2   |
| 2009  | TSSL00016082   | 23560.000000000004  | 3   |
| 2009  | SSSL00013554   | 22309.599999999995  | 4   |
| 2009  | TMSL00015434   | 21706.60000000001   | 5   |
| 2009  | SSSL00013535   | 21254.399999999998  | 6   |
| 2009  | SSSL00013540   | 21199.399999999998  | 7   |
| 2009  | TSSL00016087   | 21087.200000000004  | 8   |
| 2009  | TMSL00015426   | 20946.200000000008  | 9   |
| 2009  | TSSL00015110   | 20126.199999999997  | 10  |
| 2010  | SSSL00016272   | 13065.280000000002  | 1   |
| 2010  | SSSL00016265   | 12905.000000000002  | 2   |
| 2010  | SSSL00016279   | 11227.0             | 3   |
| 2010  | SSSL00016338   | 9033.0              | 4   |
| 2010  | TSSL00016266   | 8952.8              | 5   |
| 2010  | TSSL00016340   | 8945.1              | 6   |
| 2010  | TSSL00016281   | 8532.8              | 7   |
| 2010  | SSSL00016327   | 6883.4              | 8   |
| 2010  | TSSL00016274   | 6781.199999999999   | 9   |
| 2010  | GHSL00016289   | 4734.0              | 10  |
| 3274  | BYSL00013309   | 1703.0              | 1   |
+-------+----------------+---------------------+-----+

5、季度订单金额前10位(年度、季度、订单id、订单金额、排名)

with tmp as (
  select  
    year(a.dt) year,
    c.quat,
    a.orderid,
    sum(b.amount) as amounts
  from sale a 
  left join saledetail b
  on a.orderid = b.orderid
  left join  dimdate c 
  on a.dt = c.dt
  group by  year(a.dt),c.quat,a.orderid
),

tmp2 as (
 select 
    year,
    quat,
    orderid,
    amounts,
    dense_rank()  over(partition by year,quat order by amounts desc ) as rn 
 from tmp
)

select year,orderid,amounts,rn from tmp2 where rn <=10

6、求所有交易日中订单金额最高的前10位

with tmp as (
select A.dt, A.orderid, round(sum(B.amount), 2) amount
from sale A join saledetail B on A.orderid=B.orderid
group by A.dt, A.orderid
)
select dt, orderid, amount, rank
from (
select dt, orderid, amount, dense_rank() over(order by
amount desc) rank
from tmp
) tmp1
where rank <= 10;

7、每年度销售额最大的交易日

with tmp as (
select A.dt, round(sum(B.amount), 2) amount
from sale A join saledetail B on A.orderid=B.orderid
group by A.dt
)
select year(dt) year, max(amount) dayamount
from tmp
group by year(dt);

8、年度最畅销的商品(即每年销售金额最大的商品)

with tmp as (
select year(B.dt) year, goods, round(sum(amount),2) amount
from saledetail A join sale B on A.orderid=B.orderid
group by year(B.dt), goods
)
select year, goods, amount
from (select year, goods, amount, dense_rank() over
(partition by year order by amount desc) rank
from tmp) tmp1
where rank = 1;
  • 2
    点赞
  • 4
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值