Hive text 格式,默认把 \N
作为 NULL,没有内容的列认为是长度为 0 的字符串。
create table t1(c1 string, c2 string) row format delimited fields terminated by ',' stored as textfile;
vim t1.txt
\N,1
,2
load data local inpath 't1.txt' overwrite into table t1;
hive> select * from t1;
OK
NULL 1
2
Time taken: 0.924 seconds, Fetched: 2 row(s)
hive> select * from t1 where c1 is null;
OK
NULL 1
Time taken: 0.293 seconds, Fetched: 1 row(s)
hive> select * from t1 where c1 = '';
OK
2
Time taken: 0.108 seconds, Fetched: 1 row(s)
把长度为 0 的字符串认为 NULL
如果已经有数据,需要把长度为 0 的字符串认为 NULL。
t2.txt 文件 。
1,,2
3,4,5
t2.txt 文件第 1 行,
之间没有任何字符,默认 hive 不会把此列认为是 NULL
create table t2(c1 string, c2 string,c3 string) row format delimited fields terminated by ',' stored as textfile;
load data local inpath 't2.txt' overwrite into table t2;
hive> select * from t2 where c2 is null;
OK
Time taken: 1.184 seconds
hive> select * from t2 where c2 = '';
OK
1 2
Time taken: 0.106 seconds, Fetched: 1 row(s)
hive>
如果需要默认为 NULL,设置 serdeproperties 属性。
alter table t2 set serdeproperties('serialization.null.format' = '');
hive> select * from t2 where c2 is null;
OK
1 NULL 2
Time taken: 0.149 seconds, Fetched: 1 row(s)
hive> select * from t2 where c2 = '';
OK
Time taken: 0.148 seconds