资料文件地址:链接:https://pan.baidu.com/s/1rPlbKgHlxDoc2KwviC6QOw?pwd=z7wo
1、需求描述
针对销售数据,完成统计:
- 按年统计销售额
- 销售金额在 10W 以上的订单
- 每年销售额的差值
- 年度订单金额前10位(年度、订单号、订单金额、排名)
- 季度订单金额前10位(年度、季度、订单id、订单金额、排名)
- 求所有交易日中订单金额最高的前10位
- 每年度销售额最大的交易日
- 年度最畅销的商品(即每年销售金额最大的商品)
2、数据说明
日期表(dimdate) | ||
dt | date | 日期 |
yearmonth | int | 年月 |
year | smallint | 年 |
month | tinyint | 月 |
day | tinyint | 日 |
week | tinyint | 周几 |
weeks | tinyint | 第几周 |
quat | tinyint | 季度 |
tendays | tinyint | 旬 |
halfmonth | tinyint | 半月 |
订单表(sale) | ||
orderid | string | 订单号 |
locationid | string | 交易位置 |
dt | date | 交易日期 |
订单销售明细表(saledetail) | ||
orderid | string | 订单号 |
rownum | int | 行号 |
itemid | string | 货品 |
num | int | 数量 |
price | double | 单价 |
amount | double | 金额 |
3、实现
3.1、创建表
将数据存放在ORC文件中
-- createtable.hql
drop database sale cascade;
create database if not exists sale;
create table sale.dimdate_ori(
dt date,
yearmonth int,
year smallint,
month tinyint,
day tinyint,
week tinyint,
weeks tinyint,
quat tinyint,
tendays tinyint,
halfmonth tinyint
)
row format delimited
fields terminated by ",";
create table sale.sale_ori(
orderid string,
locationid string,
dt date
)
row format delimited
fields terminated by ",";
create table sale.saledetail_ori(
orderid string,
rownum int,
goods string,
num int,
price double,
amount double
)
row format delimited
fields terminated by ",";
create table sale.dimdate(
dt date,
yearmonth int,
year smallint,
month tinyint,
day tinyint,
week tinyint,
weeks tinyint,
quat tinyint,
tendays tinyint,
halfmonth tinyint
) stored as orc;
create table sale.sale(
orderid string,
locationid string,
dt date
) stored as orc;
create table sale.saledetail(
orderid string,
rownum int,
goods string,
num int,
price double,
amount double
)stored as orc;
hive -f createtable.hql
3.2、导入数据
-- 加载数据
use sale;
load data local inpath "/root/data/tbDate.dat" overwrite into table dimdate_ori;
load data local inpath "/root/data/tbSale.dat" overwrite into table sale_ori;
load data local inpath "/root/data/tbSaleDetail.dat" overwrite into table saledetail_ori;
-- 导入数据
insert into table dimdate select * from dimdate_ori;
insert into table sale select * from sale_ori;
insert into table saledetail select * from saledetail_ori;
hive -f loaddata.hql
3.3、SQL实现
(1)按年统计销售额
SELECT year(B.dt) year, round(sum(A.amount)/10000, 2) amount
FROM saledetail A join sale B on A.orderid=B.orderid
group by year(B.dt);
(2)销售金额在 10W 以上的订单
SELECT orderid, round(sum(amount), 2) amount
FROM saledetail
group by orderid
having sum(amount) > 100000
(3)每年销售额的差值
SELECT year, round(amount, 2) amount, round(lag(amount) over (ORDER BY year), 2) prioramount
,round(amount - lag(amount) over (ORDER BY year), 2) diff
from (SELECT year(B.dt) year, sum(A.amount) amount
from saledetail A join sale B on A.orderid=B.orderid
group by year(B.dt)
) tmp;
(4)年度订单金额前10位(年度、订单号、订单金额、排名)
-- 方法一
SELECT dt, orderid, amount, rank
from (SELECT dt, orderid, amount,
dense_rank() over(PARTITION BY dt ORDER BY amount desc) rank
from (SELECT year(B.dt) dt, A.orderid, sum(A.amount) amount
from saledetail A join sale B on A.orderid=B.orderid
GROUP BY year(B.dt), A.orderid
) tmp1
) tmp2
where rank <= 10;
-- 方法二
with tmp as (
SELECT year(B.dt) dt, A.orderid, sum(A.amount) amount
from saledetail A join sale B on A.orderid=B.orderid
GROUP BY year(B.dt), A.orderid
)
SELECT dt, orderid, amount, rank
from (SELECT dt, orderid, amount,
dense_rank() over(PARTITION BY dt ORDER BY amount desc) rank
from tmp
) tmp2
where rank <= 10;
(5)季度订单金额前10位(年度、季度、订单id、订单金额、排名)
-- 方法一
with tmp as (
select C.year, C.quat, A.orderid, round(sum(B.amount), 2) amount
from sale A join saledetail B on A.orderid=B.orderid
join dimdate C on A.dt=C.dt
group by C.year, C.quat, A.orderid
)
select year, quat, orderid, amount, rank
from (
select year, quat, orderid, amount,
dense_rank() over (partition by year, quat order by amount desc) rank
from tmp
) tmp1
where rank <= 10;
-- 方法二
with tmp as(
select year(A.dt) year,
case when month(A.dt) <= 3 then 1
when month(A.dt) <= 6 then 2
when month(A.dt) <= 9 then 3
else 4 end quat,
A.orderid,
round(sum(B.amount), 2) amount
from sale A join saledetail B on A.orderid = B.orderid
group by year(A.dt),
case when month(A.dt) <= 3 then 1
when month(A.dt) <= 6 then 2
when month(A.dt) <= 9 then 3
else 4 end,
A.orderid
)
select year, quat, orderid, amount, rank
from (
select year, quat, orderid, amount,
dense_rank() over (partition by year, quat order by amount desc) rank
from tmp
) tmp1
where rank <= 10;
-- 方法三。求季度
select floor(month(dt/3.1)) + 1;
with tmp as (
select year(A.dt) year, floor(month(A.dt)/3.1) + 1 quat,
A.orderid,
round(sum(B.amount), 2) amount
from sale A join saledetail B on A.orderid=B.orderid
group by year(A.dt), floor(month(A.dt)/3.1) + 1, A.orderid
)
select year, quat, orderid, amount, rank
from (
select year, quat, orderid, amount,
dense_rank() over (partition by year, quat order by amount desc) rank
from tmp
) tmp1
where rank <= 10;
(6)求所有交易日中订单金额最高的前10位
topN问题:
1、基础数据
2、上排名函数
3、解决N的问题
with tmp as (
select A.dt, A.orderid, round(sum(B.amount), 2) amount
from sale A join saledetail B on A.orderid=B.orderid
group by A.dt, A.orderid
)
select dt, orderid, amount, rank
from (
select dt, orderid, amount, dense_rank() over(order by amount desc) rank
from tmp
) tmp1
where rank <= 10;
(7)每年度销售额最大的交易日
with tmp as (
select A.dt, round(sum(B.amount), 2) amount
from sale A join saledetail B on A.orderid=B.orderid
group by A.dt
)
select year(dt) year, max(amount) dayamount
from tmp
group by year(dt);
备注:以上求解忽略了交易日,以下SQL更符合题意
with tmp as (
select dt, amount, dense_rank() over (partition by year(dt) order by amount desc) as rank
from (select A.dt, round(sum(B.amount), 2) amount
from sale A join saledetail B on A.orderid=B.orderid
group by A.dt) tab1
)
select year(dt) as year, dt, amount
from tmp
where rank=1;
(8)年度最畅销的商品(即每年销售金额最大的商品)
with tmp as (
select year(B.dt) year, goods, round(sum(amount),2) amount
from saledetail A join sale B on A.orderid=B.orderid
group by year(B.dt), goods
)
select year, goods, amount
from (select year, goods, amount, dense_rank() over
(partition by year order by amount desc) rank
from tmp) tmp1
where rank = 1;