1、需求描述和数据说明
针对销售数据,完成统计:
- 按年统计销售额
- 销售金额在 10W 以上的订单
- 每年销售额的差值
- 年度订单金额前10位(年度、订单号、订单金额、排名)
- 季度订单金额前10位(年度、季度、订单id、订单金额、排名)
- 求所有交易日中订单金额最高的前10位
- 每年度销售额最大的交易日
- 年度最畅销的商品(即每年销售金额最大的商品)
数据如下
2、建表并导入数据
vim createtable.hql
-- createtable.hql
drop database sale cascade;
create database if not exists sale;
create table sale.dimdate_ori(
dt date,
yearmonth int,
year smallint,
month tinyint,
day tinyint,
week tinyint,
weeks tinyint,
quat tinyint,
tendays tinyint,
halfmonth tinyint
)
row format delimited
fields terminated by ",";
create table sale.sale_ori(
orderid string,
locationid string,
dt date
)
row format delimited
fields terminated by ",";
create table sale.saledetail_ori(
orderid string,
rownum int,
goods string,
num int,
price double,
amount double
)
row format delimited
fields terminated by ",";
create table sale.dimdate(
dt date,
yearmonth int,
year smallint,
month tinyint,
day tinyint,
week tinyint,
weeks tinyint,
quat tinyint,
tendays tinyint,
halfmonth tinyint
) stored as orc;
create table sale.sale(
orderid string,
locationid string,
dt date
) stored as orc;
create table sale.saledetail(
orderid string,
rownum int,
goods string,
num int,
price double,
amount double
)stored as orc;
hive -f createtable.hql
导入数据
use sale;
load data local inpath "/root/bigdata/test_data/hive/sale/tbDate.dat" overwrite into
table dimdate_ori;
load data local inpath "/root/bigdata/test_data/hive/sale/tbSale.dat" overwrite into
table sale_ori;
load data local inpath "/root/bigdata/test_data/hive/sale/tbSaleDetail.dat" overwrite
into table saledetail_ori;
-- 导入数据
insert into table dimdate select * from dimdate_ori;
insert into table sale select * from sale_ori;
insert into table saledetail select * from saledetail_ori;
hive -f loaddata.hql
查看结果
select * from dimdate limit 5;
+-------------+--------------------+---------------+----------------+--------------+---------------+----------------+---------------+------------------+--------------------+
| dimdate.dt | dimdate.yearmonth | dimdate.year | dimdate.month | dimdate.day | dimdate.week | dimdate.weeks | dimdate.quat | dimdate.tendays | dimdate.halfmonth |
+-------------+--------------------+---------------+----------------+--------------+---------------+----------------+---------------+------------------+--------------------+
| 2003-01-01 | 200301 | 2003 | 1 | 1 | 3 | 1 | 1 | 1 | 1 |
| 2003-01-02 | 200301 | 2003 | 1 | 2 | 4 | 1 | 1 | 1 | 1 |
| 2003-01-03 | 200301 | 2003 | 1 | 3 | 5 | 1 | 1 | 1 | 1 |
| 2003-01-04 | 200301 | 2003 | 1 | 4 | 6 | 1 | 1 | 1 | 1 |
| 2003-01-05 | 200301 | 2003 | 1 | 5 | 7 | 1 | 1 | 1 | 1 |
+-------------+--------------------+---------------+----------------+--------------+---------------+----------------+---------------+------------------+--------------------+
select * from sale limit 5;
+---------------+------------------+-------------+
| sale.orderid | sale.locationid | sale.dt |
+---------------+------------------+-------------+
| BYSL00000893 | ZHAO | 2007-08-23 |
| BYSL00000897 | ZHAO | 2007-08-24 |
| BYSL00000898 | ZHAO | 2007-08-25 |
| BYSL00000899 | ZHAO | 2007-08-26 |
| BYSL00000900 | ZHAO | 2007-08-26 |
+---------------+------------------+-------------+
select * from saledetail limit 5;
+---------------------+--------------------+-------------------+-----------------+-------------------+--------------------+
| saledetail.orderid | saledetail.rownum | saledetail.goods | saledetail.num | saledetail.price | saledetail.amount |
+---------------------+--------------------+-------------------+-----------------+-------------------+--------------------+
| BYSL00000893 | 0 | FS527258160501 | -1 | 268.0 | -268.0 |
| BYSL00000893 | 1 | FS527258169701 | 1 | 268.0 | 268.0 |
| BYSL00000893 | 2 | FS527230163001 | 1 | 198.0 | 198.0 |
| BYSL00000893 | 3 | 24627209125406 | 1 | 298.0 | 298.0 |
| BYSL00000893 | 4 | K9527220210202 | 1 | 120.0 | 120.0 |
+---------------------+--------------------+-------------------+-----------------+-------------------+--------------------+
3、 sql 实现
1、按年统计销售额
SELECT year(B.dt) year,
round(sum(A.amount)/10000, 2) amount
FROM saledetail A
join sale B
on A.orderid=B.orderid
group by year(B.dt);
+-------+----------+
| year | amount |
+-------+----------+
| 2004 | 326.81 |
| 2005 | 1325.76 |
| 2006 | 1368.1 |
| 2007 | 1671.94 |
| 2008 | 1467.43 |
| 2009 | 632.37 |
| 2010 | 21.09 |
| 3274 | 0.17 |
+-------+----------+
2、销售金额在 10W 以上的订单
SELECT orderid,
round(sum(amount), 2) amount
FROM saledetail
group by orderid
having sum(amount) > 100000
+----------------+-----------+
| orderid | amount |
+----------------+-----------+
| HMJSL00009024 | 119084.8 |
| HMJSL00009958 | 159126.0 |
+----------------+-----------+
3、每年销售额的差值
select
year,
amount,
round( amount - lag(amount) over(sort by year),2 ) df
from
(
SELECT
year(B.dt) year,
sum(A.amount) amount
FROM saledetail A
join sale B
on A.orderid=B.orderid
group by year(B.dt)
) t
+-------+-----------------------+--------------+
| year | amount | df |
+-------+-----------------------+--------------+
| 2004 | 3268115.4991999646 | NULL |
| 2005 | 1.3257564150000084E7 | 9989448.65 |
| 2006 | 1.3680982899999822E7 | 423418.75 |
| 2007 | 1.6719354560000354E7 | 3038371.66 |
| 2008 | 1.4674295299999664E7 | -2045059.26 |
| 2009 | 6323697.1899998775 | -8350598.11 |
| 2010 | 210949.66000000003 | -6112747.53 |
| 3274 | 1703.0 | -209246.66 |
+-------+-----------------------+--------------+
4、年度订单金额前10位(年度、订单号、订单金额、排名)
with tmp as (
select
year(a.dt) year,
a.orderid,
sum(b.amount) as amounts
from sale a
left join saledetail b
on a.orderid = b.orderid
group by year(a.dt),a.orderid
),
tmp2 as (
select
year,
orderid,
amounts,
dense_rank() over(partition by year order by amounts desc ) as rn
from tmp
)
select year,orderid,amounts,rn from tmp2 where rn <=10
+-------+----------------+---------------------+-----+
| year | orderid | amounts | rn |
+-------+----------------+---------------------+-----+
| 2004 | HMJSL00001557 | 23656.79999999997 | 1 |
| 2004 | HMJSL00001556 | 22010.599999999984 | 2 |
| 2004 | HMJSL00001349 | 17147.2 | 3 |
| 2004 | HMJSL00001531 | 16605.6 | 4 |
| 2004 | HMJSL00001567 | 15651.0 | 5 |
| 2004 | HMJSL00001562 | 15429.999999999989 | 6 |
| 2004 | HMJSL00000706 | 15266.0 | 7 |
| 2004 | HMJSL00000656 | 15021.319999999998 | 8 |
| 2004 | HMJSL00001568 | 14444.0 | 9 |
| 2004 | HMJSL00000963 | 14268.800000000001 | 10 |
| 2005 | HMJSL00003263 | 38186.399999999994 | 1 |
| 2005 | HMJSL00005422 | 29442.999999999964 | 2 |
| 2005 | HMJSL00002046 | 27612.399999999983 | 3 |
| 2005 | HMJSL00005147 | 24473.399999999998 | 4 |
| 2005 | HMJSL00004467 | 22908.479999999996 | 5 |
| 2005 | HMJSL00004201 | 22700.159999999996 | 6 |
| 2005 | HMJSL00004084 | 22618.0 | 7 |
| 2005 | HMJSL00004491 | 22570.799999999996 | 8 |
| 2005 | HMJSL00004464 | 22439.76 | 9 |
| 2005 | HMJSL00004547 | 22271.76 | 10 |
| 2006 | HMJSL00006452 | 36124.0 | 1 |
| 2006 | HMJSL00005769 | 31269.799999999977 | 2 |
| 2006 | HMJSL00008345 | 29011.79999999998 | 3 |
| 2006 | HMJSL00006759 | 28610.0 | 4 |
| 2006 | GCSL00000781 | 27158.999999999975 | 5 |
| 2006 | HMJSL00005760 | 26736.49999999998 | 6 |
| 2006 | HMJSL00006758 | 24652.0 | 7 |
| 2006 | HMJSL00006763 | 24368.0 | 8 |
| 2006 | YZSL00000563 | 23696.65000000001 | 9 |
| 2006 | HMJSL00006757 | 23656.0 | 10 |
| 2007 | HMJSL00009958 | 159126.0 | 1 |
| 2007 | HMJSL00009024 | 119084.80000000008 | 2 |
| 2007 | HMJSL00009957 | 52422.0 | 3 |
| 2007 | HMJSL00010216 | 49800.0 | 4 |
| 2007 | HMJSL00009956 | 43018.0 | 5 |
| 2007 | HMJSL00010339 | 42157.59999999999 | 6 |
| 2007 | HMJSL00008593 | 41902.400000000016 | 7 |
| 2007 | HMJSL00010137 | 40339.999999999985 | 8 |
| 2007 | TYSL00001043 | 37940.0 | 9 |
| 2007 | HMJSL00008408 | 37438.2 | 10 |
| 2008 | HMJSL00010598 | 55828.0 | 1 |
| 2008 | YZSL00001404 | 35794.2 | 2 |
| 2008 | YZSL00001405 | 34713.4 | 3 |
| 2008 | GCSL00001414 | 29843.800000000003 | 4 |
| 2008 | YZSL00001235 | 27295.799999999996 | 5 |
| 2008 | RMSL00012270 | 24210.0 | 6 |
| 2008 | YZSL00001246 | 23838.2 | 7 |
| 2008 | GCSL00001255 | 23284.199999999993 | 8 |
| 2008 | SSSL00013486 | 22544.199999999997 | 9 |
| 2008 | YZSL00001283 | 22502.8 | 10 |
| 2009 | TSSL00016101 | 25813.200000000008 | 1 |
| 2009 | TSSL00016110 | 23628.200000000004 | 2 |
| 2009 | TSSL00016082 | 23560.000000000004 | 3 |
| 2009 | SSSL00013554 | 22309.599999999995 | 4 |
| 2009 | TMSL00015434 | 21706.60000000001 | 5 |
| 2009 | SSSL00013535 | 21254.399999999998 | 6 |
| 2009 | SSSL00013540 | 21199.399999999998 | 7 |
| 2009 | TSSL00016087 | 21087.200000000004 | 8 |
| 2009 | TMSL00015426 | 20946.200000000008 | 9 |
| 2009 | TSSL00015110 | 20126.199999999997 | 10 |
| 2010 | SSSL00016272 | 13065.280000000002 | 1 |
| 2010 | SSSL00016265 | 12905.000000000002 | 2 |
| 2010 | SSSL00016279 | 11227.0 | 3 |
| 2010 | SSSL00016338 | 9033.0 | 4 |
| 2010 | TSSL00016266 | 8952.8 | 5 |
| 2010 | TSSL00016340 | 8945.1 | 6 |
| 2010 | TSSL00016281 | 8532.8 | 7 |
| 2010 | SSSL00016327 | 6883.4 | 8 |
| 2010 | TSSL00016274 | 6781.199999999999 | 9 |
| 2010 | GHSL00016289 | 4734.0 | 10 |
| 3274 | BYSL00013309 | 1703.0 | 1 |
+-------+----------------+---------------------+-----+
5、季度订单金额前10位(年度、季度、订单id、订单金额、排名)
with tmp as (
select
year(a.dt) year,
c.quat,
a.orderid,
sum(b.amount) as amounts
from sale a
left join saledetail b
on a.orderid = b.orderid
left join dimdate c
on a.dt = c.dt
group by year(a.dt),c.quat,a.orderid
),
tmp2 as (
select
year,
quat,
orderid,
amounts,
dense_rank() over(partition by year,quat order by amounts desc ) as rn
from tmp
)
select year,orderid,amounts,rn from tmp2 where rn <=10
6、求所有交易日中订单金额最高的前10位
with tmp as (
select A.dt, A.orderid, round(sum(B.amount), 2) amount
from sale A join saledetail B on A.orderid=B.orderid
group by A.dt, A.orderid
)
select dt, orderid, amount, rank
from (
select dt, orderid, amount, dense_rank() over(order by
amount desc) rank
from tmp
) tmp1
where rank <= 10;
7、每年度销售额最大的交易日
with tmp as (
select A.dt, round(sum(B.amount), 2) amount
from sale A join saledetail B on A.orderid=B.orderid
group by A.dt
)
select year(dt) year, max(amount) dayamount
from tmp
group by year(dt);
8、年度最畅销的商品(即每年销售金额最大的商品)
with tmp as (
select year(B.dt) year, goods, round(sum(amount),2) amount
from saledetail A join sale B on A.orderid=B.orderid
group by year(B.dt), goods
)
select year, goods, amount
from (select year, goods, amount, dense_rank() over
(partition by year order by amount desc) rank
from tmp) tmp1
where rank = 1;