CC00054.pbpositions——|Hadoop&PB级数仓.V12|——|PB数仓.v12|核心交易分析|DWD层建表|加载数据|

一、DWD层建表加载数据说明
### --- 要处理的表有两张:订单表、订单产品表。其中:

~~~     订单表是周期性事实表;为保留订单状态,可以使用拉链表进行处理;
~~~     订单产品表普通的事实表,用常规的方法进行处理;
~~~     如果有数据清洗、数据转换的业务需求,ODS => DWD
~~~     如果没有数据清洗、数据转换的业务需求,保留在ODS,不做任何变化。这个是本项目的处理方式
### --- 订单状态:

~~~     3:用户拒收
~~~     2:未付款的订单
~~~     1:用户取消
~~~     0:待发货
~~~     1:配送中
~~~     2:用户确认收货
~~~     订单从创建到最终完成,是有时间限制的;业务上也不允许订单在一个月之后,状态仍然在发生变化;
二、DWD层建表加载数据
### --- DWD层建表加载数据

~~~     与维表不同,订单事实表的记录数非常多
~~~     订单有生命周期;订单的状态不可能永远处于变化之中(订单的生命周期一般在15天左右)
~~~     订单是一个拉链表,而且是分区表
~~~     分区的目的:订单一旦终止,不会重复计算
~~~     分区的条件:订单创建日期;保证相同的订单在用一个分区
### --- DWD层建表:订单事实表(拉链表)
~~~     订单事实表(拉链表)

DROP TABLE IF EXISTS dwd.dwd_trade_orders;

create table dwd.dwd_trade_orders(
`orderId` int,
`orderNo` string,
`userId` bigint,
`status` tinyint,
`productMoney` decimal,
`totalMoney` decimal,
`payMethod` tinyint,
`isPay` tinyint,
`areaId` int,
`tradeSrc` tinyint,
`tradeType` int,
`isRefund` tinyint,
`dataFlag` tinyint,
`createTime` string,
`payTime` string,
`modifiedTime` string,
`start_date` string,
`end_date` string
) COMMENT '订单事实拉链表'
partitioned by (dt string)
STORED AS PARQUET;
### --- DWD层数据加载
~~~     时间日期格式转换

-- 'yyyy-MM-dd HH:mm:ss' => timestamp => 'yyyy-MM-dd'
select unix_timestamp(modifiedtime, 'yyyy-MM-dd HH:mm:ss')
from ods.ods_trade_orders limit 10;
select from_unixtime(unix_timestamp(modifiedtime, 'yyyy-MM-dd
HH:mm:ss'), 'yyyy-MM-dd')
from ods.ods_trade_orders limit 10;
[root@hadoop02 ~]# vim /data/yanqidw/script/trade/dwd_load_trade_orders.sh
#!/bin/bash

source /etc/profile

if [ -n "$1" ]
then
    do_date=$1
else
    do_date=`date -d "-1 day" +%F`
fi

sql="
set hive.exec.dynamic.partition.mode=nonstrict;
set hive.exec.dynamic.partition=true;
INSERT OVERWRITE TABLE dwd.dwd_trade_orders partition(dt)
    SELECT orderId,
    orderNo,
    userId,
    status,
    productMoney,
    totalMoney,
    payMethod,
    isPay,
    areaId,
    tradeSrc,
    tradeType,
    isRefund,
    dataFlag,
    createTime,
    payTime,
    modifiedTime,
case when modifiedTime is not null then from_unixtime(unix_timestamp(modifiedTime,
'yyyy-MM-dd HH:mm:ss'),'yyyy-MM-dd') else from_unixtime(unix_timestamp(createTime,
'yyyy-MM-dd HH:mm:ss'), 'yyyy-MM-dd') end as start_date,
'9999-12-31' as end_date,
from_unixtime(unix_timestamp(createTime, 
'yyyy-MM-dd HH:mm:ss'), 
'yyyy-MM-dd') as dt FROM ods.ods_trade_orders
WHERE dt='$do_date'
union all
    SELECT A.orderId,
    A.orderNo,
    A.userId,
    A.status,
    A.productMoney,
    A.totalMoney,
    A.payMethod,
    A.isPay,
    A.areaId,
    A.tradeSrc,
    A.tradeType,
    A.isRefund,
    A.dataFlag,
    A.createTime,
    A.payTime,
    A.modifiedTime,
    A.start_date,
CASE WHEN B.orderid IS NOT NULL AND A.end_date > '$do_date'
THEN date_add('$do_date', -1) ELSE A.end_date END AS end_date,
from_unixtime(unix_timestamp(A.createTime, 
'yyyy-MM-dd HH:mm:ss'), 
'yyyy-MM-dd') as dt FROM (SELECT * FROM dwd.dwd_trade_orders WHERE
dt>date_add('$do_date', -15)) A
left outer join (SELECT * FROM ods.ods_trade_orders
WHERE dt='$do_date') B
ON A.orderId = B.orderId;
"

hive -e "$sql"
~~~     # 加载数据到DWD层订单事实表&拉链表

[root@hadoop02 ~]# sh /data/yanqidw/script/trade/dwd_load_trade_orders.sh  2020-07-12
~~~     # 查看DWD层订单事实表&拉链表是否加载到数据

hive (default)> show partitions dwd.dwd_trade_orders;
partition
dt=2020-06-28
dt=2020-06-29
dt=2020-06-30
dt=2020-07-01
dt=2020-07-02
dt=2020-07-03
dt=2020-07-04
dt=2020-07-05
dt=2020-07-06

hive (default)> select * from dwd.dwd_trade_orders limit 5;
dwd_trade_orders.orderid    dwd_trade_orders.orderno    dwd_trade_orders.userid dwd_trade_orders.status dwd_trade_orders.productmoney   dwd_trade_orders.totalmoney dwd_trade_orders.paymethod  dwd_trade_orders.ispay  dwd_trade_orders.areaid dwd_trade_orders.tradesrc   dwd_trade_orders.tradetype  dwd_trade_orders.isrefund   dwd_trade_orders.dataflag     dwd_trade_orders.createtime   dwd_trade_orders.paytime    dwd_trade_orders.modifiedtime   dwd_trade_orders.start_date dwd_trade_orders.end_date   dwd_trade_orders.dt
1   23a0b124546 98  2   0   10468   2   0   123 0   0   1   2   2020-06-28 18:14:01 2020-06-28 18:14:01 2020-07-12 13:22:22 2020-07-12  9999-12-31  2020-06-28
2   23a0b124546 121 2   0   6331    2   0   123 0   0   0   1   2020-06-28 16:55:02 2020-06-28 16:55:02 2020-07-12 13:22:22 2020-07-12  9999-12-31  2020-06-28
3   23a0b124546 35  2   0   1988    4   0   123 0   0   0   1   2020-06-28 12:07:01 2020-06-28 12:07:01 2020-07-12 13:22:22 2020-07-12  9999-12-31  2020-06-28
4   23a0b124546 161 2   0   43659   4   0   123 0   0   0   1   2020-06-28 13:19:48 2020-06-28 13:19:48 2020-07-12 13:22:22 2020-07-12  9999-12-31  2020-06-28
5   23a0b124546 72  2   0   32757   0   0   123 0   0   0   1   2020-06-28 22:14:21 2020-06-28 22:14:21 2020-07-12 13:22:22 2020-07-12  9999-12-31  2020-06-28

hive (default)> select count(*) from dwd.dwd_trade_orders;
354
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

yanqi_vip

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值