一 集合数据类型有 :
1) array 数组
2) map (K,V形式)
3) struct (结构体,对象)
二 数据源
name , friends , children ,address.street/city
songsong,bingbing_lili,xiao song:18_xiaoxiao song:19,hui long guan_beijing
yangyang,caicai_susu,xiao yang:18_xiaoxiao yang:19,chao yang_beijing
三 集合的建表操作
1 array 集合的创建 ,将结构化数据插入到 /root/hive/ 目录下,变成静态文件
[root@linux03 hive]# vi person.txt
[root@linux03 hive]# ll
-rw-r--r--. 1 root root 144 Sep 4 19:13 person.txt
2 在 hive 上创建表格(有 array ,map ,struct 结构体的表格) ,加载数据并查询
------建表-------- 一张表中有三种集合来处理不同形式的数据
0: jdbc:hive2://linux03:10000>
create table tb_conllection(
name string,
friends array<string>,
children map<string,int>,
address struct<street:string,city:string>
)
row format delimited fields terminated by ","
collection items terminated by "_" ---array 数组(朋友用数组装)
map keys terminated by ":" ----map 集合(孩子用map集合装, kv 格式)
lines terminated by "\n"; ----struct结构体(地址用 struct 结构体装,方便查询)
------加载数据
load data local inpath "/root/hive/person.txt" into table tb_conllection;
-----查询数据
0: jdbc:hive2://linux03:10000> select * from tb_conllection;
+----------------------+-------------------------+--------------------------------------+----------------------------------------------+
| tb_conllection.name | tb_conllection.friends | tb_conllection.children | tb_conllection.address |
+----------------------+-------------------------+--------------------------------------+----------------------------------------------+
| songsong | ["bingbing","lili"] | {"xiao song":18,"xiaoxiao song":19} | {"street":"hui long guan","city":"beijing"} |
| yangyang | ["caicai","susu"] | {"xiao yang":18,"xiaoxiao yang":19} | {"street":"chao yang","city":"beijing"} |
+----------------------+-------------------------+--------------------------------------+----------------------------------------------+
四 需求的实现
1 需求 1 : 将两个人的第一位好友和第二位好友找出来
select
name,
friends[0] first_fs,
friends[1] second_fs,
friends[2] tr
from
tb_conllection;
+-----------+-----------+------------+-------+
| name | first_fs | second_fs | tr |
+-----------+-----------+------------+-------+
| songsong | bingbing | lili | NULL |
| yangyang | caicai | susu | NULL |
+-----------+-----------+------------+-------+
2 需求 2 : 查看好友中有 bingbing 的用户
select
name,
array_contains(friends,"bingbing")
from
tb_conllection;
+-----------+--------+
| name | _c1 |
+-----------+--------+
| songsong | true |-----代表有
| yangyang | false |-----代表无
+-----------+--------+
3 需求 3 : 查看用户好友个数和孩子个数
length(字符串) 查看字符串的长度
size(集合) 查看集合的长度
select
name,
size(friends) friends_size,
size(children) children_size
from
tb_conllection;
+-----------+---------------+----------------+
| name | friends_size | children_size |
+-----------+---------------+----------------+
| songsong | 2 | 2 |
| yangyang | 2 | 2 |
+-----------+---------------+----------------+
4 需求 4 : 将好友数组进行排序 sorted_array(只能升序排序)
select
name,
sort_array(friends) fs_array
from
tb_conllection;
+-----------+----------------------+
| name | fs_array
+-----------+----------------------+
| songsong | ["bingbing","lili"] | 根据ASCLL码进行升序排序
| yangyang | ["caicai","susu"] |
+-----------+----------------------+
5 explode(数组/map集合)的运用,将map集合里面的元素以 KV 的形式展示出来
select
explode(children)
from
tb_conllection
where
name = "songsong";
+----------------+--------+
| key | value | {"xiao song":18,"xiaoxiao song":19}==>children map集合
+----------------+--------+
| xiao song | 18 |
| xiaoxiao song | 19 |
+----------------+--------+
select
explode(friends)
from
tb_conllection
where
name = "songsong";
+-----------+
| col | ["bingbing","lili"] ==> friends array数组
+-----------+
| bingbing |
| lili |
+-----------+
6 map_keys(map集合)[i] 的使用 : 将map集合里面的指定index 的 key 取出来
select
name,
map_keys(children)[0] key1
from
tb_conllection
where
name = "songsong";
+-----------+------------+
| name | key1 |
+-----------+------------+
| songsong | xiao song |
+-----------+------------+
7 map_values(map集合)/[i] 的使用 : 将map集合里面指定index或者未指定 的values 取出来
select
name,
map_values(children) value ==>这里的别名不能使用 values ,否者会报错提示无法识别 未指定index
from
tb_conllection
where
name = "songsong";
+-----------+----------+
| name | value |
+-----------+----------+
| songsong | [18,19] |
+-----------+----------+
select
name,
map_values(children)[0] value ===>指定 index 的value值
from
tb_conllection
where
name = "songsong";
+-----------+--------+
| name | value |
+-----------+--------+
| songsong | 18 |
+-----------+--------+
8 将children map集合中孩子是"xiao song"的对应value值取出来
select
name,
children["xiao song"] xs_age
from
tb_conllection;
+-----------+---------+
| name | xs_age |
+-----------+---------+
| songsong | 18 |
| yangyang | NULL |
+-----------+---------+
因为不确定那个人有这个孩子 ,所以无法根据名字字段做过滤处理,没有做过滤的话会将空字符串一起打印出来,
所以要对查询出来的结果的空字符串做过滤处理 ,需要用到嵌套子查询 (注: 嵌套子查询没有别名会报错)
select
*
from
(select
name,
children["xiao song"] xs_age
from
tb_conllection)t
where
xs_age is not null;
+-----------+-----------+
| t.name | t.xs_age |
+-----------+-----------+
| songsong | 18 |
+-----------+-----------+
9 struct 结构体的取值
select
name,
address.city,
address.street
from
tb_conllection;
+-----------+----------+----------------+
| name | city | street |
+-----------+----------+----------------+
| songsong | beijing | hui long guan |
| yangyang | beijing | chao yang |
+-----------+----------+----------------+
五 侧窗口函数的运用(lateral view)
1 需求1 : 将好友 friends 数组转为列 ,与用户一一对应
步骤1 : 将friends array数组的元素都展示出来
select explode(friends) from tb_conllection;
步骤2 : 将名字单独找出来
select name from tb_conllection;
+-----------+ +-----------+
| col | | songsong |
+-----------+ | yangyang |
| bingbing | | |
| lili | +-----------+
| caicai |
| susu |
+-----------+
步骤3 : 然后将数组展示的表和这个表拼接起来
select
name,
arr(写 col ,与下字段对应)
from
tb_conllection ==> 表1
lateral view
explode(friends) tmp as arr(或者写 col); ==> tmp 为炸裂展示数据的临时虚拟表2 ,arr 为展示的炸裂的这个字段
+-----------+-----------+
| name | arr/col | ==> 这个字段是自定义的 ,也可以是 friend
+-----------+-----------+
| songsong | bingbing |
| songsong | lili |
| yangyang | caicai |
| yangyang | susu |
+-----------+-----------+
源数据
select name,friends from tb_conllection;
+-----------+----------------------+
| name | friends |
+-----------+----------------------+
| songsong | ["bingbing","lili"] |
| yangyang | ["caicai","susu"] |
+-----------+----------------------+
2 将 children集合里面的元素展示出来(以列的形式)
select
name,
cname,cage
from
tb_conllection
lateral view
explode(children) tmp as cname,cage;
+-----------+----------------+-------+
| name | cname | cage | ==>cname,cage分别是孩子的名字和年龄 ,也可以用map的 k,v 代替
+-----------+----------------+-------+
| songsong | xiao song | 18 |
| songsong | xiaoxiao song | 19 |
| yangyang | xiao yang | 18 |
| yangyang | xiaoxiao yang | 19 |
+-----------+----------------+-------+