Hive 之复杂数据类型,以及内置函数的应用

复杂数据类型array

-- array 类型
-- 创表语法

 --不指定数组内部分隔符
create table if not exists array1(
name string,
scores array<string>
)
row format delimited 
fields terminated by '\t'
;

load data local inpath '/root/hivedata/array1' into table array1;
select * from array1;


--指定数组内部分隔符
drop table array2;
create table if not exists array2(
name string,
scores array<string>
)
row format delimited 
fields terminated by '\t'
collection items terminated by ';'
;
load data local inpath '/root/hivedata/array1' into table array2;
select * from array2;

--查看数组中的元素,(不指定分隔符的话,scores数组中只有一个元素  如 ["55,33,22,11"])
select scores[0],scores[1] from array2;

--统计数组中的元素个数
select name,size(scores) from array2;

--统计array2中每个人的总成绩(因为王五的成绩只有两条,null值于任何数运算都是null,所以需要使用nvl来替换为0)
select name,scores[0]+scores[1]+nvl(scores[2],0)+nvl(scores[3],0) from array2;

select name,scores[10] from array2;
-- 数据越界不会报异常,会以null值填充

--explode函数,可以使array,map,复杂数据类型列转行(一行变多行)

select explode(scores) score from array2;--这条语句就是根据scores字段中的分隔符将array中每一个元素切分出来,换行显示
--(一行显示一个元素)

--lateral view :虚拟表  (这个函数会将UDTF函数生成的结果放到一个虚拟表中,然后这个虚拟表会和输入行join来达到数据聚合的作用)
-- UDTF:User-Defined Table-Generating Functions,用户定义表生成函数,用来解决输入一行输出多行

--需求:将表中如下数据
zhangsan	78,89,92,96
lisi	67,75,83,94
王五	23,12

转成如下格式:
zhangsan 78
zhangsan 89
zhangsan 92
zhangsan 96
lisi 67
lisi 75
lisi 83
lisi 94
王五 23
王五 12

-- hive中提供了一个展开函数explode,可以完成以上效果,是UDTF函数的一种
-- 作用:explode可以将集合字段,比如array,map等展开,一行中的一列变多行显示(列转行)
-- 语法: explode(colname)
将 array2表中的scores字段展开

select explode(scores) from array2;   展开后的效果如下
+------+--+
| col  |
+------+--+
| 78   |
| 89   |
| 92   |
| 96   |
| 67   |
| 75   |
| 83   |
| 94   |
| 23   |
| 12   |
+------+--+

-- 将开展后的数据与对应行的其他字段进行关联。需要借助虚拟表 lateral view
-- 虚拟表的作用是可以存储展开函数的数据

案例演示:
select name, score from array2 lateral view explode(scores)  A as score

+-----------+--------+--+
|   name    | score  |
+-----------+--------+--+
| zhangsan  | 78     |
| zhangsan  | 89     |
| zhangsan  | 92     |
| zhangsan  | 96     |
| lisi      | 67     |
| lisi      | 75     |
| lisi      | 83     |
| lisi      | 94     |
| 王五        | 23     |
| 王五        | 12     |
+-----------+--------+--+

案例需求:查询每个学生的总成绩

select name,sum(score)
from array2 lateral view explode(scores)  A as score
group by name


收集函数

搜集函数在hive中有两个,分别是:
--1. collect_set(colName)  :搜集的数据会去重
--2. collect_list(colName):搜集的数据不会去重
--3. 搜集后的结果是一个数组形式。


##数据准备:使用克隆的方式创建array4
create table array4 as select name, score from array2 lateral view explode(scores)  A as score;
insert into array4 values ('王五',12);

--使用collect_set查询,
select name,collect_set(score) from array4 group by name; --结果没有重复的数据
--使用collect_list查询,
select name,collect_list(score) from array4 group by name; --结果有重复的数据

案例演示:不分组和分组的情况
select collect_set(score) from array4;  --自动将整张表分为一组,所以select中不能写其他字段     将整张表的数据当成一组,搜集到一起
select name,collect_set(score) from array4 group by name;

案例演示:将以下表形式
+--------------+---------------+--+
| array4.name  | array4.score  |
+--------------+---------------+--+
| zhangsan     | 78            |
| zhangsan     | 89            |
| zhangsan     | 92            |
| zhangsan     | 96            |
| lisi         | 67            |
| lisi         | 75            |
| lisi         | 83            |
| lisi         | 94            |
| 王五           | 23            |
| 王五           | 12            |
| 王五           | 12            |
+--------------+---------------+--+
转为:
+--------------+----------------+--+
| array2.name  | array2.scores  |
+--------------+----------------+--+
| zhangsan     | [78,89,92,96]  |
| lisi         | [67,75,83,94]  |
| 王五           | [23,12,12]        |
+--------------+----------------+--+
借助搜集函数完成。
create table array5
as 
select name,collect_list(score) scores from array4 group by name;

1.4 复杂类型之Map类型

有以下数据文件
zhangsan	chinese:78,math:89,english:92,nature:96
lisi	chinese:67,math:75,english:83,nature:94
王五	english:23,nature:12

就可以使用复杂类型map类型来对应这个字段。

--语法:
create table tableName(
.....
colName map<T,T>
.....
)

##案例演示:
--建表:
create table map1(
name string,
scores map<string,int>
)
row format delimited
fields terminated by '\t'
collection items terminated by ','
map keys terminated by ':';

load data local inpath '/root/data/map.txt' into table map1;


-- map类型的访问语法: colName[key]
-- 查询每个人的数学成绩:
select name,scores['math'] from map1;
-- 计算每个人的总成绩
select name,nvl(scores['chinese'],0)+nvl(scores['math'],0)+nvl(scores['english'],0)+nvl(scores['nature'],0) `total` from map1;
-- 计算每个人的考试科目数
select name,size(scores) from map1;


-- 案例需求:将map类型的数据进行展开练习,展开如下效果
zhangsan	chinese	78
zhangsan	math	89
zhangsan	english	92
zhangsan	nature	96
lisi	chinese	67
lisi	math	75
lisi	english	83
lisi	nature	94
王五	english	23
王五	nature	12

--(1) 展开map类型的字段
select explode(scores) as (course,score) from map1;
+----------+--------+--+
|  course  | score  |
+----------+--------+--+
| chinese  | 78     |
| math     | 89     |
| english  | 92     |
| nature   | 96     |
| chinese  | 67     |
| math     | 75     |
| english  | 83     |
| nature   | 94     |
| english  | 23     |
| nature   | 12     |
+----------+--------+--+
-- (2) 借助虚拟表,与表中其他字段关联
select name,course,score from map1 lateral view explode(scores) t as course,score
+-----------+----------+--------+--+
|   name    |  course  | score  |
+-----------+----------+--------+--+
| zhangsan  | chinese  | 78     |
| zhangsan  | math     | 89     |
| zhangsan  | english  | 92     |
| zhangsan  | nature   | 96     |
| lisi      | chinese  | 67     |
| lisi      | math     | 75     |
| lisi      | english  | 83     |
| lisi      | nature   | 94     |
| 王五        | english  | 23     |
| 王五        | nature   | 12     |
+-----------+----------+--------+--+

将数据动态保存到map字段里的应用

需求:将以下的表数据
+-----------+----------+--------+--+
|   name    |  course  | score  |
+-----------+----------+--------+--+
| zhangsan  | chinese  | 78     |
| zhangsan  | math     | 89     |
| zhangsan  | english  | 92     |
| zhangsan  | nature   | 96     |
| lisi      | chinese  | 67     |
| lisi      | math     | 75     |
| lisi      | english  | 83     |
| lisi      | nature   | 94     |
| 王五        | english  | 23     |
| 王五        | nature   | 12     |
+-----------+----------+--------+--+
转成以下的形式
+------------+----------------------------------------------------+--+
| map1.name  |                    map1.scores                     |
+------------+----------------------------------------------------+--+
| zhangsan   | {"chinese":78,"math":89,"english":92,"nature":96}  |
| lisi       | {"chinese":67,"math":75,"english":83,"nature":94}  |
| 王五         | {"english":23,"nature":12}                         |
+------------+----------------------------------------------------+--+



--1.  数据准备
create table map2 
as 
select name,course,score from map1 lateral view explode(scores) t as course,score;
+-----------+----------+--------+--+
|   name    |  course  | score  |
+-----------+----------+--------+--+
| zhangsan  | chinese  | 78     |
| zhangsan  | math     | 89     |
| zhangsan  | english  | 92     |
| zhangsan  | nature   | 96     |
| lisi      | chinese  | 67     |
| lisi      | math     | 75     |
| lisi      | english  | 83     |
| lisi      | nature   | 94     |
| 王五        | english  | 23     |
| 王五        | nature   | 12     |
+-----------+----------+--------+--+
--2. 借助concat函数将course和score拼接到一起,拼接符号为冒号
 select name,concat(course,":",score) `result` from map2;
 +-----------+-------------+--+
|   name    |   result    |
+-----------+-------------+--+
| zhangsan  | chinese:78  |
| zhangsan  | math:89     |
| zhangsan  | english:92  |
| zhangsan  | nature:96   |
| lisi      | chinese:67  |
| lisi      | math:75     |
| lisi      | english:83  |
| lisi      | nature:94   |
| 王五        | english:23  |
| 王五        | nature:12   |
+-----------+-------------+--+

--3. 使用搜集函数搜集每一个人的所有科目信息,注意,搜集函数的返回值是数组形式
select name,collect_set(concat(course,":",score))  from map2 group by name
+-----------+----------------------------------------------------+--+
|   name    |                         c1                         |
+-----------+----------------------------------------------------+--+
| lisi      | ["chinese:67","math:75","english:83","nature:94"]  |
| zhangsan  | ["chinese:78","math:89","english:92","nature:96"]  |
| 王五        | ["english:23","nature:12"]                         |
+-----------+----------------------------------------------------+--+

--4. 将数组中的所有元素使用concat_ws函数拼接成一个字符串形式
select name, concat_ws(",",collect_set(concat(course,":",score))) from map2 group by name;
+-----------+------------------------------------------+--+
|   name    |                    c1                    |
+-----------+------------------------------------------+--+
| lisi      | chinese:67,math:75,english:83,nature:94  |
| zhangsan  | chinese:78,math:89,english:92,nature:96  |
| 王五        | english:23,nature:12                     |
+-----------+------------------------------------------+--+

--5. 借助str_to_map函数将字符串转为map类型
select name,str_to_map(concat_ws(",",collect_set(concat(course,":",score)))) from map2 group by name;

--6. 使用克隆的方式,存储查询出来的数据。
create table map3
as
select name,str_to_map(concat_ws(",",collect_set(concat(course,":",score)))) scores from map2 group by name;

--7. 查看表结构
desc map3;
OK
+-----------+---------------------+----------+--+
| col_name  |      data_type      | comment  |
+-----------+---------------------+----------+--+
| name      | string              |          |
| scores    | map<string,string>  |          |
+-----------+---------------------+----------+--+

--8. 计算每个人的英语和自然的成绩之和
select name,scores['english']+scores['nature'] from map3;
OK
+-----------+--------+--+
|   name    |   c1   |
+-----------+--------+--+
| lisi      | 177.0  |
| zhangsan  | 188.0  |
| 王五        | 35.0   |
+-----------+--------+--+

--9:从8的结果中可以得出纯数字的字符串可以自动转成浮点型进行运算。```

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值