vi ./orders.txt
1 2021-11-20 2021-11-20 创建
2 2021-11-20 2021-11-20 创建
3 2021-11-20 2021-11-20 创建
hive> drop database lalian cascade;
hive> create database lalian;
hive> use lalian;
hive> drop table orders;
hive> create table orders(
orderid int,
createdate string,
modifiedtime string,
status string
) row format delimited fields terminated by '\t';
OK
Time taken: 0.129 seconds
hive> show tables;
OK
orders
[root@gree139 lalian]# hdfs dfs -put ./orders.txt /user/hive/warehouse/lalian.db/orders
hive> select * from orders;
OK
1 2021-11-20 2021-11-20 创建
2 2021-11-20 2021-11-20 创建
3 2021-11-20 2021-11-20 创建
Time taken: 0.157 seconds, Fetched: 3 row(s)
------------------------------------------------------------
hive> create table ods_orders_inc(
> orderid int,
> createtime string,
> modifiedtime string,
> status string
> ) partitioned by (day string)
> row format delimited fields terminated by '\t';
hive> insert overwrite table ods_orders_inc partition(day='2021-11-20')
> select orderid,createdate,modifiedtime,status from orders;
hive> show partitions ods_orders_inc;
OK
day=2021-11-20
------------------------------------------------------------
hive> create table dws_orders_his(
> orderid int,
> createtime string,
> modifiedtime string,
> status string,
> start_time string,
> end_time string
> ) row format delimited fields terminated by '\t';
hive> insert overwrite table dws_orders_his
> select orderid,createtime,modifiedtime,status,modifiedtime,'9999-12-31' from ods_orders_inc
> where day='2021-11-20';
vi ./orders.txt 添加三条数据
1 2021-11-20 2021-11-20 创建
2 2021-11-20 2021-11-20 创建
3 2021-11-20 2021-11-20 创建
1 2021-11-20 2021-11-21 支付
2 2021-11-20 2021-11-21 支付
4 2021-11-21 2021-11-21 创建
hdfs dfs -put -f ./orders.txt /user/hive/warehouse/lalian.db/orders
未使用动态分区 此为静态分区
---------将21日orders表里的状态修改及新增的数据导入到 ods_orders_inc 分区为2021-11-21-------------------------------------------------
hive> insert overwrite table ods_orders_inc partition(day='2021-11-21')
> select orderid,createdate,modifiedtime,status from orders
> where (createdate='2021-11-21' and modifiedtime='2021-11-21') or modifiedtime='2021-11-21'
> ;
可只写modifiedtime='2021-11-21' 此处为增强可读性
-- 合并查询 UNIN ALL --
-- 通过his left join inc 找出订单状态有更新的数据 --
-- 将inc 2021-11-21的数据添加end_time为9999-12-31后插入his表 --
create table dws_his_20211121_tmp as
select t.orderid,t.createtime,t.modifiedtime,
t.status,t.start_time,t.end_time
from
(
select
t1.orderid,
t1.createtime,
t1.modifiedtime,
t1.status,
t1.start_time,
case when t2.orderid is not null and t1.end_time > '2021-11-21' then '2021-11-21' else t1.end_time end end_time
from dws_orders_his t1
left join
(select orderid from ods_orders_inc where day='2021-11-21') t2
on t1.orderid=t2.orderid
union all
select
orderid,
createtime,
modifiedtime,
status,
modifiedtime as start_time,
'9999-12-31' as end_time
from ods_orders_inc where day='2021-11-21'
) t
order by t.orderid,t.start_time
单单两表合并时的效果 没有排序
1 2021-11-20 2021-11-20 创建 2021-11-20 2021-11-21
2 2021-11-20 2021-11-20 创建 2021-11-20 2021-11-21
3 2021-11-20 2021-11-20 创建 2021-11-20 9999-12-31
1 2021-11-20 2021-11-21 支付 2021-11-21 9999-12-31
2 2021-11-20 2021-11-21 支付 2021-11-21 9999-12-31
4 2021-11-21 2021-11-21 创建 2021-11-21 9999-12-31
hive> insert overwrite table dws_orders_his select * from dws_his_20211121_tmp;
hive> select * from dws_orders_his
1 2021-11-20 2021-11-20 创建 2021-11-20 2021-11-21
1 2021-11-20 2021-11-21 支付 2021-11-21 9999-12-31
2 2021-11-20 2021-11-20 创建 2021-11-20 2021-11-21
2 2021-11-20 2021-11-21 支付 2021-11-21 9999-12-31
3 2021-11-20 2021-11-20 创建 2021-11-20 9999-12-31
4 2021-11-21 2021-11-21 创建 2021-11-21 9999-12-31
vi ./orders.txt 新增四条数据
1 2021-11-20 2021-11-20 创建
2 2021-11-20 2021-11-20 创建
3 2021-11-20 2021-11-20 创建
1 2021-11-20 2021-11-21 支付
2 2021-11-20 2021-11-21 支付
4 2021-11-21 2021-11-21 创建
1 2021-11-20 2021-11-22 完成
3 2021-11-20 2021-11-22 支付
4 2021-11-21 2021-11-22 支付
5 2021-11-22 2021-11-22 创建
hdfs dfs -put -f ./orders.txt /user/hive/warehouse/lalian.db/orders
hive> insert overwrite table ods_orders_inc partition(day='2021-11-22')
> select orderid,createdate,modifiedtime,status from orders
> where (createdate='2021-11-22' and modifiedtime='2021-11-22') or modifiedtime='2021-11-22'
> ;
create table dws_his_20211122_tmp as
select t.orderid,t.createtime,t.modifiedtime,
t.status,t.start_time,t.end_time
from
(
select
t1.orderid,
t1.createtime,
t1.modifiedtime,
t1.status,
t1.start_time,
case when t2.orderid is not null and t1.end_time > '2021-11-22' then '2021-11-22' else t1.end_time end end_time
from dws_orders_his t1
left join
(select orderid from ods_orders_inc where day='2021-11-22') t2
on t1.orderid=t2.orderid
union all
select
orderid,
createtime,
modifiedtime,
status,
modifiedtime as start_time,
'9999-12-31' as end_time
from ods_orders_inc where day='2021-11-22'
) t
order by t.orderid,t.start_time
hive> insert overwrite table dws_orders_his select * from dws_his_20211122_tmp;
hive> select * from dws_orders_his
1 2021-11-20 2021-11-20 创建 2021-11-20 2021-11-21
1 2021-11-20 2021-11-21 支付 2021-11-21 2021-11-22
1 2021-11-20 2021-11-22 完成 2021-11-22 9999-12-31
2 2021-11-20 2021-11-20 创建 2021-11-20 2021-11-21
2 2021-11-20 2021-11-21 支付 2021-11-21 9999-12-31
3 2021-11-20 2021-11-20 创建 2021-11-20 2021-11-22
3 2021-11-20 2021-11-22 支付 2021-11-22 9999-12-31
4 2021-11-21 2021-11-21 创建 2021-11-21 2021-11-22
4 2021-11-21 2021-11-22 支付 2021-11-22 9999-12-31
5 2021-11-22 2021-11-22 创建 2021-11-22 9999-12-31