kudu to csv to hive to kudu
1 建表kudu
create table default.kudu_table(
id string not null,
ts bigint,
count bigint,
primary key(id)
)
PARTITION BY HASH (id) PARTITIONS 6 STORED AS KUDU;
2 插入数据
insert into default.kudu_table values (“1”, 11, 11), (“2”, 22, 22)
3 导出到linux
impala-shell -q ‘select * from default.kudu_table’ -B --output_delimiter="*" -o /data/tmp/test.csv
4 建表hive,临时表
create table default.kudu_table2(
id string,
ts bigint,
count bigint)row format delimited fields terminated by ‘*’;
处理null值问题
alter table default.kudu_table2 SET SERDEPROPERTIES(‘serialization.null.format’ = ‘NULL’);
5 csv导入hive
load data local inpath ‘/data/tmp/test.csv’ into table default.kudu_table2;
6 建表kudu,最终表
create table default.kudu_table3(
id string not null,
ts bigint,
count bigint,
primary key(id)
)
PARTITION BY HASH (id) PARTITIONS 6 STORED AS KUDU;
7 hive to kudu,刷新impala数据
insert into default.kudu_table3 select * from default.kudu_table2;