Hive的复杂数据类型主要分为3类:array、map 和 struct。
1. array
测试数据: 列间用 \t 分隔,第二列用逗号分隔。
zhangsan chengdu,shanghai,beijing
lisi tianjin,taiyuan,chongqing
wangwu xian,nanning,beijing
建表:
create table hive_array(
name string,
locations array<string>
) row format delimited fields terminated by '\t'
collection items terminated by ',';
查询:
//取出数组的第一个元素
select name, locations[0] from hive_array;
//返回数组长度
select name, size(locations) from hive_array;
//包含返回true,否则false
select * from hive_array where array_contains(locations,'tianjin');
2. map
测试数据:
1,zhangsan,father:xiaoming#mother:xiaohuang,28
2,lisi,father:xiaoxi#mother:xiaofang#brother:xiaoli,22
3,wangwu,father:xiaoqiang#mother:xiaohong#brother:xiaowang,35
建表:
create table hive_map(
id int,
name string,
members map<string,string>,
age int
) row format delimited fields terminated by ','
collection items terminated by '#' //组间分隔符
map keys terminated by ':'; //组内分隔符
查询:
select name, members['father'], members['mother'] from hive_map;
select name, map_keys(members) from hive_map;
select name, size(members) from hive_map;
select name, members['brother'] from hive_map where array_contains(map_keys(members),'brother');
//拥有兄弟的人及兄弟名称
3. struct
测试数据:
192.168.1.1#zhangsan:40
192.168.1.2#lisi:25
192.168.1.3#wangwu:38
建表:
create table hive_struct(
ip string,
userinfo struct<name:string,age:int>,
) row format delimited fields terminated by '#'
collection items terminated by ':'; //组间分隔符
查询:
select ip, userinfo.name, userinfo.age from hive_struct;