新建一个orders.txt文件,内容如下:
使用xftp将orders.txt上传到linux虚拟机中。
新建orders表
create table orders(
orderid int,
createdate string,
modifiedtime string,
status string)
row format delimited fields terminated by '\t';
将orders.txt文件上传到hdfs中
hdfs dfs -put ./orders.txt /user/hive/warehouse/lalian.db/orders
使用命令查询数据select * from orders;
1 2021-11-20 2021-11-20 创建
2 2021-11-20 2021-11-20 创建
3 2021-11-20 2021-11-20 创建
创建ods_orders_inc表
create table ods_orders_inc(
orderid int,
createtime string,
modifiedtime string,
status string
)
partitioned by (day string)
row format delimited fields terminated by '\t';
插入数据,并分区insert overwrite table ods_orders_inc partition(day='2021-11-20') select orderid,createdate,modifiedtime,status from orders;
创建dws_orders_his表
create table dws_orders_his(
orderid int,
createtime string,
modifiedtime string,
status string,
start_time string,
end_time string)
row format delimited fields terminated by '\t';
插入数据,insert overwrite table dws_orders_his select orderid,createtime,modifiedtime,status,modifiedtime,'9999-12-31' from ods_orders_inc where day='2021-11-20';
在orders.txt中添加新数据
重新将orders.txt上传到hdfs中
hdfs dfs -put -f ./orders.txt /user/hive/warehouse/lalian.db/orders
将21号orders表里的状态修改及新增的数据导入到ods_orders_inc 分区为2021-11-21
insert overwrite table ods_orders_inc partition(day='2021-11-21')
select orderid,createdate,modifiedtime,status from orders
where (createdate='2021-11-21' and modifiedtime='2021-11-21') or modifiedtime='2021-11-21';
两个查询,合并 union all
通过his left join inc 找出订单状态有更新的数据
将inc 2021-11-21的数据添加end_time为9999-12-31后插入his表
create table dws_his_20211121_tmp as
select t.orderid,t.createtime,t.modifiedtime,
t.status,t.start_time,t.end_time
from
(
select
t1.orderid,
t1.createtime,
t1.modifiedtime,
t1.status,
t1.start_time,
case when t2.orderid is not null and t1.end_time > '2021-11-21' then '2021-11-21' else t1.end_time
end end_time
from dws_orders_his t1
left join
(select orderid from ods_orders_inc where day='2021-11-21') t2
on t1.orderid=t2.orderid
union all
select
orderid,
createtime,
modifiedtime,
status,
modifiedtime as start_time,
'9999-12-31' as end_time
from ods_orders_inc where day='2021-11-21'
) t
order by t.orderid,t.start_time
想dws_orders_his表中插入最新数据insert overwrite table dws_orders_his select * from dws_his_20211121_tmp;
使用命令查询数据select * from dws_orders_his;
1 2021-11-20 2021-11-20 创建 2021-11-20 2021-11-21
1 2021-11-20 2021-11-21 支付 2021-11-21 9999-12-31
2 2021-11-20 2021-11-20 创建 2021-11-20 2021-11-21
2 2021-11-20 2021-11-21 支付 2021-11-21 9999-12-31
3 2021-11-20 2021-11-20 创建 2021-11-20 9999-12-31
4 2021-11-21 2021-11-21 创建 2021-11-21 9999-12-31