05 字段类型和操作

最新推荐文章于 2022-09-15 18:45:27 发布

莹火虫的另一半

最新推荐文章于 2022-09-15 18:45:27 发布

阅读量144

点赞数

分类专栏： hive

本文链接：https://blog.csdn.net/woshilovetg/article/details/111875972

版权

hive 专栏收录该内容

19 篇文章 0 订阅

订阅专栏

一、字段类型

1.1 图表

分类	类型	描述	字面量示例
原始类型	BOOLEAN	true/false	TRUE
	TINYINT	1字节的有符号整数-128~127	1Y
	SMALLINT	2个字节的有符号整数，-32768~32767	1S
	INT	4个字节的带符号整数	1
	BIGINT	8字节带符号整数	1L
	FLOAT	4字节单精度浮点数1.0
	DOUBLE	8字节双精度浮点数	1.0
	DEICIMAL	任意精度的带符号小数	1.0
	STRING	字符串，变长	“a”,’b’
	VARCHAR	变长字符串	“a”,’b’
	CHAR	固定长度字符串	“a”,’b’
	BINARY	字节数组	无法表示
	TIMESTAMP	时间戳，毫秒值精度	122327493795
	DATE	日期	‘2016-03-29’
	INTERVAL	时间频率间隔
复杂类型	ARRAY	有序的的同类型的集合	array(1,2)
	MAP	key-value,key必须为原始类型，value可以任意类型	map(‘a’,1,’b’,2)
	STRUCT	字段集合,类型可以不同	struct(‘1’,1,1.0), named_stract(‘col1’,’1’,’col2’,1,’clo3’,1.0)
	UNION	在有限取值范围内的一个值	create_union(1,’a’,63)

1.2 Array类型

数据源

zhangsan beijing,shanghai,tianjin,hangzhou wangwu changchun,chengdu,wuhan,beijin

建表结构

create table hive_array(name string, work_locations array<string>) row format delimited fields terminated by '\t' COLLECTION ITEMS TERMINATED BY ',';

导入数据

load data local inpath '/export/server/hivedatas/work_locations.csv' overwrite into table hive_array;

常用查询

-- 查询所有数据
select * from hive_array;
-- 查询work_locations数组中第一个元素
select name, work_locations[0] location from hive_array; #此处location是别名
-- 查询work_locations数组中元素的个数
select name, size(work_locations) location from hive_array; #此处location是别名
-- 查询work_locations数组中包含tianjin的信息
select * from hive_array where array_contains(work_locations,'tianjin');

elect * from hive_array;

select name, work_locations[0] location from hive_array; #此处location是别名

1.3 map类型

源数据:

hive_map.csv

说明：字段与字段分隔符: “,”；需要map字段之间的分隔符："#"；map内部k-v分隔符：":"

1,zhangsan,father:xiaoming#mother:xiaohuang#brother:xiaoxu,28
2,lisi,father:mayun#mother:huangyi#brother:guanyu,22
3,wangwu,father:wangjianlin#mother:ruhua#sister:jingtian,29
4,mayun,father:mayongzhen#mother:angelababy,26

建表结构

create table hive_map( id int, name string, members map<string,string>, age int ) row format delimited fields terminated by ',' COLLECTION ITEMS TERMINATED BY '#' MAP KEYS TERMINATED BY ':';

导入数据

load data local inpath '/export/server/hivedatas/hive_map.csv' overwrite into table hive_map;

常用查询

select * from hive_map;
select id, name, members['father'] father, members['mother'] mother, age from hive_map;
select id, name, map_keys(members) as relation from hive_map;
select id, name, map_values(members) as relation from hive_map;
select id,name,size(members) num from hive_map;
select * from hive_map where array_contains(map_keys(members), 'brother');
select id,name, members['brother'] brother from hive_map where array_contains(map_keys(members), 'brother');

select * from hive_map;

select id, name, members['father'] father, members['mother'] mother, age from hive_map;

select id, name, map_keys(members) as relation from hive_map;

select id, name, map_values(members) as relation from hive_map;

select id,name,size(members) num from hive_map;

select * from hive_map where array_contains(map_keys(members), 'brother');

select id,name, members['brother'] brother from hive_map where array_contains(map_keys(members), 'brother');

1.4 struct类型

源数据：

说明：字段之间#分割，第二个字段之间冒号分割

hive_struct.csv

192.168.1.1#zhangsan:40
192.168.1.2#lisi:50
192.168.1.3#wangwu:60
192.168.1.4#zhaoliu:70

建表结构

create table hive_struct( ip string, info struct<name:string, age:int> ) row format delimited fields terminated by '#' COLLECTION ITEMS TERMINATED BY ':';

导入数据

load data local inpath '/export/server/hivedatas/hive_struct.csv' into table hive_struct;

常用查询用查询

select * from hive_struct; select ip, info.name from hive_struct;

select * from hive_struct;