一、字段类型
1.1 图表
分类 | 类型 | 描述 | 字面量示例 |
原始类型 | BOOLEAN | true/false | TRUE |
| TINYINT | 1字节的有符号整数-128~127 | 1Y |
| SMALLINT | 2个字节的有符号整数,-32768~32767 | 1S |
| INT | 4个字节的带符号整数 | 1 |
| BIGINT | 8字节带符号整数 | 1L |
| FLOAT | 4字节单精度浮点数1.0 |
|
| DOUBLE | 8字节双精度浮点数 | 1.0 |
| DEICIMAL | 任意精度的带符号小数 | 1.0 |
| STRING | 字符串,变长 | “a”,’b’ |
| VARCHAR | 变长字符串 | “a”,’b’ |
| CHAR | 固定长度字符串 | “a”,’b’ |
| BINARY | 字节数组 | 无法表示 |
| TIMESTAMP | 时间戳,毫秒值精度 | 122327493795 |
| DATE | 日期 | ‘2016-03-29’ |
| 时间频率间隔 |
| |
复杂类型 | ARRAY | 有序的的同类型的集合 | array(1,2) |
| MAP | key-value,key必须为原始类型,value可以任意类型 | map(‘a’,1,’b’,2) |
| STRUCT | 字段集合,类型可以不同 | struct(‘1’,1,1.0), named_stract(‘col1’,’1’,’col2’,1,’clo3’,1.0) |
| UNION | 在有限取值范围内的一个值 | create_union(1,’a’,63) |
1.2 Array类型
数据源
zhangsan beijing,shanghai,tianjin,hangzhou wangwu changchun,chengdu,wuhan,beijin
建表结构
create table hive_array(name string, work_locations array<string>) row format delimited fields terminated by '\t' COLLECTION ITEMS TERMINATED BY ',';
导入数据
load data local inpath '/export/server/hivedatas/work_locations.csv' overwrite into table hive_array;
常用查询
-- 查询所有数据
select * from hive_array;
-- 查询work_locations数组中第一个元素
select name, work_locations[0] location from hive_array; #此处location是别名
-- 查询work_locations数组中元素的个数
select name, size(work_locations) location from hive_array; #此处location是别名
-- 查询work_locations数组中包含tianjin的信息
select * from hive_array where array_contains(work_locations,'tianjin');
elect * from hive_array;
select name, work_locations[0] location from hive_array; #此处location是别名
1.3 map类型
源数据:
hive_map.csv
说明:字段与字段分隔符: “,”;需要map字段之间的分隔符:"#";map内部k-v分隔符:":"
1,zhangsan,father:xiaoming#mother:xiaohuang#brother:xiaoxu,28
2,lisi,father:mayun#mother:huangyi#brother:guanyu,22
3,wangwu,father:wangjianlin#mother:ruhua#sister:jingtian,29
4,mayun,father:mayongzhen#mother:angelababy,26
建表结构
create table hive_map( id int, name string, members map<string,string>, age int ) row format delimited fields terminated by ',' COLLECTION ITEMS TERMINATED BY '#' MAP KEYS TERMINATED BY ':';
导入数据
load data local inpath '/export/server/hivedatas/hive_map.csv' overwrite into table hive_map;
常用查询
select * from hive_map;
select id, name, members['father'] father, members['mother'] mother, age from hive_map;
select id, name, map_keys(members) as relation from hive_map;
select id, name, map_values(members) as relation from hive_map;
select id,name,size(members) num from hive_map;
select * from hive_map where array_contains(map_keys(members), 'brother');
select id,name, members['brother'] brother from hive_map where array_contains(map_keys(members), 'brother');
select * from hive_map;
select id, name, members['father'] father, members['mother'] mother, age from hive_map;
select id, name, map_keys(members) as relation from hive_map;
select id, name, map_values(members) as relation from hive_map;
select id,name,size(members) num from hive_map;
select * from hive_map where array_contains(map_keys(members), 'brother');
select id,name, members['brother'] brother from hive_map where array_contains(map_keys(members), 'brother');
1.4 struct类型
源数据:
说明:字段之间#分割,第二个字段之间冒号分割
hive_struct.csv
192.168.1.1#zhangsan:40
192.168.1.2#lisi:50
192.168.1.3#wangwu:60
192.168.1.4#zhaoliu:70
建表结构
create table hive_struct( ip string, info struct<name:string, age:int> ) row format delimited fields terminated by '#' COLLECTION ITEMS TERMINATED BY ':';
导入数据
load data local inpath '/export/server/hivedatas/hive_struct.csv' into table hive_struct;
常用查询用查询
select * from hive_struct; select ip, info.name from hive_struct;
select * from hive_struct;
select ip, info.name from hive_struct;