创建库
create database shop_ods;
创建订单增量分区表(每天一个分区)
CREATE TABLE shop_ods.ods_orders_inc (
orderid INT,
createtime STRING,
modifiedtime STRING,
status STRING
)
partitioned by(day string )
row format delimited fields terminated by '\t'
stored as orc
;
在数据库中创建业务表
CREATE TABLE orders (
orderid INT primary key,
createtime timestamp,
modifiedtime timestamp,
status varchar(5)
)
;
加载第1天数据
insert into orders values(1,"2019-10-01 00:00:00","2019-10-01 00:00:00",'创建');
insert into orders values(2,"2019-10-01 00:00:00","2019-10-01 00:00:00",'创建');
insert into orders values(3,"2019-10-01 00:00:00","2019-10-01 00:00:00",'创建');
使用sqoop脚本导入数据
sqoop导入hive分区数据,数据格式为ORC(版本需要1.4.7支持,需要配置hcatalog)
export HCAT_HOME=/apps/hive-1.2.1/hcatalog
export PATH=$PATH:$HCAT_HOME/bin:$HCAT_HOME/sbin
#!/bin/bash
dt=`date -d "1 days ago" +%s`
zero_dt=$(((${dt}+3600*8)/86400*86400-3600*8))
echo $dt
echo $zero_dt
dt_part=`date -d "1 days ago "+%Y-%m-%d"`
echo $dt_part
/apps/sqoop-1.4.7/bin/sqoop import \
--connect jdbc:mysql://mini1:3306/shop \
--username hive \
--password pwd123456 \
--query "select orderid, from orders where UNIX_TIMESTAMP(modifiedtime)>=${zero_dt} and \$CONDITIONS" \
--null-string '\\N' \
--null-non-string '\\N' \
--hcatalog-database shop_ods \
--hcatalog-table ods_orders_inc \
--hcatalog-partition-keys day \
--hcatalog-partition-values ${dt_part} \
-m 1
sqoop导入hive分区数据(版本需要1.4.7支持)不属于拉链表的实现内容
sqoop import \
--hive-import \
--connect jdbc:mysql://mini1:3306/shop \
--username hive \
--password pwd123456 \
--query "select * from orders where substring(modifiedtime,1,10)>'2016-08-20' and \$CONDITIONS" \
--hive-database shop_ods \
--fields-terminated-by '\t' \
--lines-terminated-by '\n' \
--hive-table ods_orders_inc \
--null-string '\\N' \
--null-non-string '\\N' \
--hive-partition-key day \
--hive-partition-value 20190918 \
--target-dir day=20190918 \
-m 1
=======================================================
创建shop_dw库
create database shop_dw;
创建拉链表
CREATE TABLE shop_dw.dw_orders_his (
orderid INT,
createtime STRING,
modifiedtime STRING,
status STRING,
dw_start_date STRING,
dw_end_date STRING
)
row format delimited fields terminated by '\t'
stored as orc
;
初始装载
INSERT overwrite TABLE shop_dw.dw_orders_his
SELECT
orderid,
createtime,
modifiedtime,
status,
createtime AS dw_start_date,
'9999-12-31' AS dw_end_date
FROM shop_ods.ods_orders_inc
WHERE day = '2019-10-01'
;
如下结果
select * from shop_dw.dw_orders_his;
OK
1 2016-08-20 2016-08-20 创建 2016-08-20 9999-12-31
2 2016-08-20 2016-08-20 创建 2016-08-20 9999-12-31
3 2016-08-20 2016-08-20 创建 2016-08-20 9999-12-31
加载第2天数据
insert into orders values(1,'2019-10-01 01:00:05','2019-10-02 10:00:03','支付');
insert into orders values(2,'2019-10-01 01:00:05','2019-10-02 10:00:03','完成');
insert into orders values(4,'2019-10-02 01:00:05','2019-10-02 10:00:03','创建');
使用sqoop脚本导入数据
select * from shop_ods.ods_orders_inc where day=‘2019-10-02’;
OK
1 2019-10-01 2019-10-02 支付 2019-10-02
2 2019-10-01 2019-10-02 完成 2019-10-02
4 2019-10-02 2019-10-02 创建 2019-10-02
创建临时层
create database if not exists shop_tmp;
语句实现
create table table shop_tmp.tmp_order_his
as
select
orderid,
createtime,
modifiedtime,
status status,
case when (lag(modifiedtime) over(distribute by orderid sort by modifiedtime)) is null then dw_start_date else modifiedtime end dw_start_date,
nvl(lead(modifiedtime) over(distribute by orderid sort by modifiedtime),dw_end_date) dw_end_date
from
(select
orderid,
createtime,
modifiedtime,
status,
dw_start_date,
dw_end_date dw_end_date
from shop_dw.dw_orders_his
union all
SELECT
orderid,
createtime,
modifiedtime,
status,
createtime AS dw_start_date,
'9999-12-31' AS dw_end_date
FROM shop_ods.ods_orders_inc
WHERE day = '2019-10-02' //${dt}
) tmp
;
shell脚本实现
#!/bin/bash
dt=`date -d "1 days ago" +%Y-%m-%d`
hive -e "
create database if not exists shop_tmp;
set hive.exec.mode.local.auto=true;
set hive.groupby.skewindata=true;
create as table shop_tmp.tmp_order_his
select
orderid,
createtime,
modifiedtime,
status status,
case when (lag(modifiedtime) over(distribute by orderid sort by modifiedtime)) is null then dw_start_date else modifiedtime end dw_start_date,
nvl(lead(modifiedtime) over(distribute by orderid sort by modifiedtime),dw_end_date) dw_end_date
from
(select
orderid,
createtime,
modifiedtime,
status,
dw_start_date,
dw_end_date dw_end_date
from shop_dw.dw_orders_his
union all
SELECT
orderid,
createtime,
modifiedtime,
status,
createtime AS dw_start_date,
'9999-12-31' AS dw_end_date
FROM shop_ods.ods_orders_inc
WHERE day = '${dt}'
) tmp
;
insert overwrite shop_dw.dw_orders_his
select * from shop_tmp.tmp_order_his
;
drop table table shop_tmp.tmp_order_his;
"
1 2019-10-01 00:00:00.0 2019-10-01 00:00:00.0 创建 2019-10-01 00:00:00.0 9999-12-31
1 2019-10-01 01:00:05.0 2019-10-02 10:00:03.0 支付 2019-10-01 01:00:05.0 9999-12-31
2 2019-10-01 00:00:00.0 2019-10-01 00:00:00.0 创建 2019-10-01 00:00:00.0 9999-12-31
2 2019-10-01 01:00:05.0 2019-10-02 10:00:03.0 完成 2019-10-01 01:00:05.0 9999-12-31
3 2019-10-01 00:00:00.0 2019-10-01 00:00:00.0 创建 2019-10-01 00:00:00.0 9999-12-31
4 2019-10-02 01:00:05.0 2019-10-02 10:00:03.0 创建 2019-10-02 01:00:05.0 9999-12-31
加载第3天数据
insert into orders values(1,‘2019-10-01 10:10:10’,‘2019-10-03 09:09:09’,‘完成’);
insert into orders values(3,‘2019-10-01 10:10:10’,‘2019-10-03 09:09:09’,‘支付’);
insert into orders values(4,‘2019-10-02 10:10:10’,‘2019-10-03 09:09:09’,‘支付’);
insert into orders values(5,‘2019-10-03 10:10:10’,‘2019-10-03 09:09:09’,‘创建’);
使用sqoop脚本导入数据