一、集合数据类型
在hive中的数据类型 :
int bigint double string timestamp
struct 结构体 类似于java对象
array 数组
map 键值对
{"name": "songsong","friends": ["bingbing" , "lili"] ,"children": {"xiao song": 18 ,"xiaoxiao song": 14},"address": {"street": "hui long guan" ,"city": "beijing" }}
解析如下:
songsong,bingbing_lili,xiao song:18_xiaoxiao song:14,hui long guan_beijing
1、创建表
create table tb_user(
name string ,
friends array<string> ,
children map<string ,int> ,
address struct<street:string , city:string>
)
row format delimited fields terminated by ','
collection items terminated by '_' --指定数组(array)和结构(struct)的分隔符
map keys terminated by ':' --指定map集合的分隔符
lines terminated by '\n';
2、导入数据
load data local inpath "/data/collection/" into table tb_user ;
3、array
---数组
取值 friends[index]
+----------+---------+
| fs1 | fs2 |
+----------+---------+
| fengjie | furong |
| caicai | susu |
+----------+---------+
-- 长度
size(arr)
--如果1大于数组的长度,取0(friends[0]),否则取1(friends[1])
select friends[if(1>size(friends) , 0 , 1)] from tb_user ;
4、map集合
-- 根据key获取map的value值
select children['xiaoben'] from tb_user ;
-- 获取map集合中所有的key map_keys(map)
+--------------------------------+
| _c0 |
+--------------------------------+
| ["xiaoben","daben"] |
| ["xiao yang","xiaoxiao yang"] |
+--------------------------------+
-- 获取map集合中所有的value map_values(map)
+----------+
| _c0 |
+----------+
| [18,19] |
| [18,19] |
+----------+
5、struct 结构体
--- struct 结构体获取属性
select address.street,address.city from tb_user
+----------------+----------+
| street | city |
+----------------+----------+
| hui long guan | beijing |
| chao yang | beijing |
+----------------+----------+
二、反射 reflect 函数
reflect(class , methodName , args....)
1 编写java程序
2 打包
3 上传到linux系统
4 add jar /test.jar 或 将jar包添加到 $HIVE_HOME/lib/目录下
5 select reflect('cn._51doit.test.Test1' , 'test1' , 'HELLO' , 23) ;
三、自定义函数
1 创建maven工程 添加依赖
<dependencies>
<!-- https://mvnrepository.com/artifact/org.apache.hive/hive-exec -->
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-exec</artifactId>
<version>1.2.1</version>
</dependency>
</dependencies>
2 编写 类 继承 UDF 类
3 重写方法 evaluate 允许重载
4 打包上传的HDFS
5 创建函数
create function sayHello as 'com._51doit.functions.MyFunction'
using jar 'hdfs://linux01:8020/func.jar' ;