拉链表实现(demo)

创建库

create database shop_ods;

创建订单增量分区表(每天一个分区)

CREATE TABLE shop_ods.ods_orders_inc (
orderid INT,
createtime STRING,
modifiedtime STRING,
status STRING
) 
partitioned by(day string )
row format delimited fields terminated by '\t'
stored as orc
;

在数据库中创建业务表

CREATE TABLE orders (
orderid INT primary key,
createtime timestamp,
modifiedtime timestamp,
status varchar(5)
)
;

加载第1天数据

insert into orders values(1,"2019-10-01 00:00:00","2019-10-01 00:00:00",'创建');
insert into orders values(2,"2019-10-01 00:00:00","2019-10-01 00:00:00",'创建');
insert into orders values(3,"2019-10-01 00:00:00","2019-10-01 00:00:00",'创建');

使用sqoop脚本导入数据

sqoop导入hive分区数据,数据格式为ORC(版本需要1.4.7支持,需要配置hcatalog)

export HCAT_HOME=/apps/hive-1.2.1/hcatalog
export PATH=$PATH:$HCAT_HOME/bin:$HCAT_HOME/sbin
#!/bin/bash
dt=`date -d "1 days ago" +%s`
zero_dt=$(((${dt}+3600*8)/86400*86400-3600*8))
echo $dt
echo $zero_dt
dt_part=`date -d "1 days ago "+%Y-%m-%d"`
echo $dt_part
/apps/sqoop-1.4.7/bin/sqoop import \
--connect jdbc:mysql://mini1:3306/shop \
--username hive \
--password pwd123456 \
--query "select orderid, from orders where UNIX_TIMESTAMP(modifiedtime)>=${zero_dt} and \$CONDITIONS" \
--null-string '\\N' \
--null-non-string '\\N' \
--hcatalog-database shop_ods \
--hcatalog-table ods_orders_inc \
--hcatalog-partition-keys day \
--hcatalog-partition-values ${dt_part} \
-m 1 

sqoop导入hive分区数据(版本需要1.4.7支持)不属于拉链表的实现内容

sqoop import \
--hive-import \
--connect jdbc:mysql://mini1:3306/shop \
--username hive \
--password pwd123456 \
--query "select * from orders where substring(modifiedtime,1,10)>'2016-08-20' and \$CONDITIONS" \
--hive-database shop_ods \
--fields-terminated-by '\t' \
--lines-terminated-by '\n' \
--hive-table ods_orders_inc \
--null-string '\\N' \
--null-non-string '\\N' \
--hive-partition-key day \
--hive-partition-value 20190918 \
--target-dir day=20190918 \
-m 1 

=======================================================

创建shop_dw库

create database shop_dw;

创建拉链表

CREATE TABLE shop_dw.dw_orders_his (
orderid INT,
createtime STRING,
modifiedtime STRING,
status STRING,
dw_start_date STRING,
dw_end_date STRING
) 
row format delimited fields terminated by '\t' 
stored as orc
;

初始装载

INSERT overwrite TABLE shop_dw.dw_orders_his
SELECT 
orderid,
createtime,
modifiedtime,
status,
createtime AS dw_start_date,
'9999-12-31' AS dw_end_date
FROM shop_ods.ods_orders_inc
WHERE day = '2019-10-01'
;

如下结果

select * from shop_dw.dw_orders_his;

OK
1 2016-08-20 2016-08-20 创建 2016-08-20 9999-12-31
2 2016-08-20 2016-08-20 创建 2016-08-20 9999-12-31
3 2016-08-20 2016-08-20 创建 2016-08-20 9999-12-31

加载第2天数据

insert into orders values(1,'2019-10-01 01:00:05','2019-10-02 10:00:03','支付');
insert into orders values(2,'2019-10-01 01:00:05','2019-10-02 10:00:03','完成');
insert into orders values(4,'2019-10-02 01:00:05','2019-10-02 10:00:03','创建');

使用sqoop脚本导入数据

select * from shop_ods.ods_orders_inc where day=‘2019-10-02’;
OK
1 2019-10-01 2019-10-02 支付 2019-10-02
2 2019-10-01 2019-10-02 完成 2019-10-02
4 2019-10-02 2019-10-02 创建 2019-10-02

创建临时层
create database if not exists shop_tmp;

语句实现

create table table shop_tmp.tmp_order_his
as
select
orderid,
createtime,
modifiedtime,
status status,
case when (lag(modifiedtime) over(distribute by orderid sort by modifiedtime)) is null then dw_start_date else modifiedtime end dw_start_date,
nvl(lead(modifiedtime) over(distribute by orderid sort by modifiedtime),dw_end_date) dw_end_date
from
(select 
orderid,
createtime,
modifiedtime,
status,
dw_start_date,
dw_end_date dw_end_date
from shop_dw.dw_orders_his
union all
SELECT 
orderid,
createtime,
modifiedtime,
status,
createtime AS dw_start_date,
'9999-12-31' AS dw_end_date
FROM shop_ods.ods_orders_inc
WHERE day = '2019-10-02' //${dt}
) tmp
;

shell脚本实现

#!/bin/bash
dt=`date -d "1 days ago" +%Y-%m-%d`
hive -e "
create database if not exists shop_tmp;
set hive.exec.mode.local.auto=true;
set hive.groupby.skewindata=true;
create as table shop_tmp.tmp_order_his
select
orderid,
createtime,
modifiedtime,
status status,
case when (lag(modifiedtime) over(distribute by orderid sort by modifiedtime)) is null then dw_start_date else modifiedtime end dw_start_date,
nvl(lead(modifiedtime) over(distribute by orderid sort by modifiedtime),dw_end_date) dw_end_date
from
(select 
orderid,
createtime,
modifiedtime,
status,
dw_start_date,
dw_end_date dw_end_date
from shop_dw.dw_orders_his
union all
SELECT 
orderid,
createtime,
modifiedtime,
status,
createtime AS dw_start_date,
'9999-12-31' AS dw_end_date
FROM shop_ods.ods_orders_inc
WHERE day = '${dt}'
) tmp
; 
insert overwrite shop_dw.dw_orders_his
select * from shop_tmp.tmp_order_his
;
drop table table shop_tmp.tmp_order_his;
"

1 2019-10-01 00:00:00.0 2019-10-01 00:00:00.0 创建 2019-10-01 00:00:00.0 9999-12-31
1 2019-10-01 01:00:05.0 2019-10-02 10:00:03.0 支付 2019-10-01 01:00:05.0 9999-12-31
2 2019-10-01 00:00:00.0 2019-10-01 00:00:00.0 创建 2019-10-01 00:00:00.0 9999-12-31
2 2019-10-01 01:00:05.0 2019-10-02 10:00:03.0 完成 2019-10-01 01:00:05.0 9999-12-31
3 2019-10-01 00:00:00.0 2019-10-01 00:00:00.0 创建 2019-10-01 00:00:00.0 9999-12-31
4 2019-10-02 01:00:05.0 2019-10-02 10:00:03.0 创建 2019-10-02 01:00:05.0 9999-12-31

加载第3天数据
insert into orders values(1,‘2019-10-01 10:10:10’,‘2019-10-03 09:09:09’,‘完成’);
insert into orders values(3,‘2019-10-01 10:10:10’,‘2019-10-03 09:09:09’,‘支付’);
insert into orders values(4,‘2019-10-02 10:10:10’,‘2019-10-03 09:09:09’,‘支付’);
insert into orders values(5,‘2019-10-03 10:10:10’,‘2019-10-03 09:09:09’,‘创建’);
使用sqoop脚本导入数据

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值