新增字段&跑历史分区
1、现象
hive(笔者使用的hive版本为1.1.0)表新增加列,重跑历史分区数据,发现新增的列在老分区中取值都为NULL。如下示例
1)初始化表
//init table
drop table if exists tmp.test;
create table tmp.test
(
id string
)
partitioned by (dt string);
insert overwrite table tmp.test
partition(dt='20190509')
select
'0001';
// show
select
*
from
tmp.test
where
dt='20190509';
+-------+-----------
| id | dt |
+-------+-----------+
| 0001 | 20190509 |
+-------+-----------+
1 row selected (0.234 seconds)
2)添加字段重写数据新增字段为空
//add columns
alter table tmp.test add columns(name string);
//insert data
insert overwrite table tmp.test
partition(dt='20190509')
select
'0001' as id,
'光头强' as name;
//show
select
*
from
tmp.test
where
dt=