Hive案例

资料文件地址:链接:https://pan.baidu.com/s/1rPlbKgHlxDoc2KwviC6QOw?pwd=z7wo

1、需求描述

针对销售数据,完成统计:

  1. 按年统计销售额
  2. 销售金额在 10W 以上的订单
  3. 每年销售额的差值
  4. 年度订单金额前10位(年度、订单号、订单金额、排名)
  5. 季度订单金额前10位(年度、季度、订单id、订单金额、排名)
  6. 求所有交易日中订单金额最高的前10位
  7. 每年度销售额最大的交易日
  8. 年度最畅销的商品(即每年销售金额最大的商品)

2、数据说明

日期表(dimdate)
dtdate日期
yearmonthint年月
yearsmallint
monthtinyint
daytinyint
weektinyint周几
weekstinyint第几周
quattinyint季度
tendaystinyint
halfmonthtinyint半月
订单表(sale)
orderidstring订单号
locationidstring交易位置
dtdate交易日期
订单销售明细表(saledetail)
orderidstring订单号
rownumint行号
itemidstring货品
numint数量
pricedouble单价
amountdouble金额

3、实现

3.1、创建表

将数据存放在ORC文件中

-- createtable.hql
drop database sale cascade;
create database if not exists sale;

create table sale.dimdate_ori(
    dt date,
    yearmonth int,
    year smallint,
    month tinyint,
    day tinyint,
    week tinyint,
    weeks tinyint,
    quat tinyint,
    tendays tinyint,
    halfmonth tinyint
)
row format delimited
fields terminated by ",";

create table sale.sale_ori(
    orderid string,
    locationid string,
    dt date
)
row format delimited
fields terminated by ",";

create table sale.saledetail_ori(
    orderid string,
    rownum int,
    goods string,
    num int,
    price double,
    amount double
)
row format delimited
fields terminated by ",";


create table sale.dimdate(
    dt date,
    yearmonth int,
    year smallint,
    month tinyint,
    day tinyint,
    week tinyint,
    weeks tinyint,
    quat tinyint,
    tendays tinyint,
    halfmonth tinyint
) stored as orc;

create table sale.sale(
    orderid string,
    locationid string,
    dt date
) stored as orc;

create table sale.saledetail(
    orderid string,
    rownum int,
    goods string,
    num int,
    price double,
    amount double
)stored as orc;

hive -f createtable.hql

3.2、导入数据

-- 加载数据
use sale;
load data local inpath "/root/data/tbDate.dat" overwrite into table dimdate_ori;
load data local inpath "/root/data/tbSale.dat" overwrite into table sale_ori;
load data local inpath "/root/data/tbSaleDetail.dat" overwrite into table saledetail_ori;

-- 导入数据
insert into table dimdate select * from dimdate_ori;
insert into table sale select * from sale_ori;
insert into table saledetail select * from saledetail_ori;

hive -f loaddata.hql

3.3、SQL实现

(1)按年统计销售额

SELECT year(B.dt) year, round(sum(A.amount)/10000, 2) amount
    FROM saledetail A join sale B on A.orderid=B.orderid
group by year(B.dt);

(2)销售金额在 10W 以上的订单

SELECT orderid, round(sum(amount), 2) amount
    FROM saledetail
group by orderid
having sum(amount) > 100000

(3)每年销售额的差值

SELECT year, round(amount, 2) amount, round(lag(amount) over (ORDER BY year), 2) prioramount
        ,round(amount - lag(amount) over (ORDER BY year), 2) diff
    from (SELECT year(B.dt) year, sum(A.amount) amount
            from saledetail A join sale B on A.orderid=B.orderid
           group by year(B.dt)
        ) tmp;

(4)年度订单金额前10位(年度、订单号、订单金额、排名)

-- 方法一
SELECT dt, orderid, amount, rank
    from (SELECT dt, orderid, amount,
                dense_rank() over(PARTITION BY dt ORDER BY amount desc) rank
            from (SELECT year(B.dt) dt, A.orderid, sum(A.amount) amount
                from saledetail A join sale B on A.orderid=B.orderid
                    GROUP BY year(B.dt), A.orderid
                    ) tmp1
          ) tmp2
    where rank <= 10;


-- 方法二
with tmp as (
SELECT year(B.dt) dt, A.orderid, sum(A.amount) amount
    from saledetail A join sale B on A.orderid=B.orderid
GROUP BY year(B.dt), A.orderid
)
SELECT dt, orderid, amount, rank
    from (SELECT dt, orderid, amount,
                dense_rank() over(PARTITION BY dt ORDER BY amount desc) rank
            from tmp
        ) tmp2
    where rank <= 10;

(5)季度订单金额前10位(年度、季度、订单id、订单金额、排名)

-- 方法一
with tmp as (
select C.year, C.quat, A.orderid, round(sum(B.amount), 2) amount
    from sale A join saledetail B on A.orderid=B.orderid
        join dimdate C on A.dt=C.dt
group by C.year, C.quat, A.orderid
)
select year, quat, orderid, amount, rank
    from (
        select year, quat, orderid, amount,
            dense_rank() over (partition by year, quat order by amount desc) rank
          from tmp
        ) tmp1
  where rank <= 10;


-- 方法二
with tmp as(
select year(A.dt) year,
        case when month(A.dt) <= 3 then 1
             when month(A.dt) <= 6 then 2
             when month(A.dt) <= 9 then 3
             else 4 end quat,
        A.orderid,
        round(sum(B.amount), 2) amount
    from sale A join saledetail B on A.orderid = B.orderid
group by year(A.dt),
        case when month(A.dt) <= 3 then 1
             when month(A.dt) <= 6 then 2
             when month(A.dt) <= 9 then 3
             else 4 end,
        A.orderid
)
select year, quat, orderid, amount, rank
    from (
         select year, quat, orderid, amount,
            dense_rank() over (partition by year, quat order by amount desc) rank
         from tmp
        ) tmp1
  where rank <= 10;


-- 方法三。求季度
select floor(month(dt/3.1)) + 1;
with tmp as (
select year(A.dt) year, floor(month(A.dt)/3.1) + 1 quat,
        A.orderid,
        round(sum(B.amount), 2) amount
    from sale A join saledetail B on A.orderid=B.orderid
group by year(A.dt), floor(month(A.dt)/3.1) + 1, A.orderid
)
select year, quat, orderid, amount, rank
    from (
        select year, quat, orderid, amount,
            dense_rank() over (partition by year, quat order by amount desc) rank
        from tmp
        ) tmp1
  where rank <= 10;

(6)求所有交易日中订单金额最高的前10位

topN问题:
1、基础数据
2、上排名函数
3、解决N的问题

with tmp as (
select A.dt, A.orderid, round(sum(B.amount), 2) amount
    from sale A join saledetail B on A.orderid=B.orderid
group by A.dt, A.orderid
)
select dt, orderid, amount, rank
    from (
        select dt, orderid, amount, dense_rank() over(order by amount desc) rank
        from tmp
    ) tmp1
  where rank <= 10;

(7)每年度销售额最大的交易日

with tmp as (
select A.dt, round(sum(B.amount), 2) amount
    from sale A join saledetail B on A.orderid=B.orderid
group by A.dt
)
select year(dt) year, max(amount) dayamount
    from tmp
group by year(dt);

备注:以上求解忽略了交易日,以下SQL更符合题意

with tmp as (
select dt, amount, dense_rank() over (partition by year(dt) order by amount desc) as rank
    from (select A.dt, round(sum(B.amount), 2) amount
            from sale A join saledetail B on A.orderid=B.orderid
group by A.dt) tab1
)
select year(dt) as year, dt, amount
    from tmp
  where rank=1;

(8)年度最畅销的商品(即每年销售金额最大的商品)

with tmp as (
select year(B.dt) year, goods, round(sum(amount),2) amount
    from saledetail A join sale B on A.orderid=B.orderid
group by year(B.dt), goods
)
select year, goods, amount
    from (select year, goods, amount, dense_rank() over
        (partition by year order by amount desc) rank
            from tmp) tmp1
  where rank = 1;
  • 1
    点赞
  • 4
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

悠然予夏

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值