复杂数据类型array
create table if not exists array1(
name string,
scores array< string>
)
row format delimited
fields terminated by '\t'
;
load data local inpath '/root/hivedata/array1' into table array1;
select * from array1;
drop table array2;
create table if not exists array2(
name string,
scores array< string>
)
row format delimited
fields terminated by '\t'
collection items terminated by ';'
;
load data local inpath '/root/hivedata/array1' into table array2;
select * from array2;
select scores[ 0 ] , scores[ 1 ] from array2;
select name, size( scores) from array2;
select name, scores[ 0 ] + scores[ 1 ] + nvl( scores[ 2 ] , 0 ) + nvl( scores[ 3 ] , 0 ) from array2;
select name, scores[ 10 ] from array2;
select explode( scores) score from array2;
zhangsan 78 , 89 , 92 , 96
lisi 67 , 75 , 83 , 94
王五 23 , 12
转成如下格式:
zhangsan 78
zhangsan 89
zhangsan 92
zhangsan 96
lisi 67
lisi 75
lisi 83
lisi 94
王五 23
王五 12
将 array2表中的scores字段展开
select explode( scores) from array2; 展开后的效果如下
+
| col |
+
| 78 |
| 89 |
| 92 |
| 96 |
| 67 |
| 75 |
| 83 |
| 94 |
| 23 |
| 12 |
+
案例演示:
select name, score from array2 lateral view explode( scores) A as score
+
| name | score |
+
| zhangsan | 78 |
| zhangsan | 89 |
| zhangsan | 92 |
| zhangsan | 96 |
| lisi | 67 |
| lisi | 75 |
| lisi | 83 |
| lisi | 94 |
| 王五 | 23 |
| 王五 | 12 |
+
案例需求:查询每个学生的总成绩
select name, sum ( score)
from array2 lateral view explode( scores) A as score
group by name
收集函数
搜集函数在hive中有两个,分别是:
create table array4 as select name, score from array2 lateral view explode( scores) A as score;
insert into array4 values ( '王五' , 12 ) ;
select name, collect_set( score) from array4 group by name;
select name, collect_list( score) from array4 group by name;
案例演示:不分组和分组的情况
select collect_set( score) from array4;
select name, collect_set( score) from array4 group by name;
案例演示:将以下表形式
+
| array4. name | array4. score |
+
| zhangsan | 78 |
| zhangsan | 89 |
| zhangsan | 92 |
| zhangsan | 96 |
| lisi | 67 |
| lisi | 75 |
| lisi | 83 |
| lisi | 94 |
| 王五 | 23 |
| 王五 | 12 |
| 王五 | 12 |
+
转为:
+
| array2. name | array2. scores |
+
| zhangsan | [ 78 , 89 , 92 , 96 ] |
| lisi | [ 67 , 75 , 83 , 94 ] |
| 王五 | [ 23 , 12 , 12 ] |
+
借助搜集函数完成。
create table array5
as
select name, collect_list( score) scores from array4 group by name;
1.4 复杂类型之Map类型
有以下数据文件
zhangsan chinese:78 , math:89 , english:92 , nature:96
lisi chinese:67 , math:75 , english:83 , nature:94
王五 english:23 , nature:12
就可以使用复杂类型map类型来对应这个字段。
create table tableName(
. . . . .
colName map< T, T>
. . . . .
)
create table map1(
name string,
scores map< string, int >
)
row format delimited
fields terminated by '\t'
collection items terminated by ','
map keys terminated by ':' ;
load data local inpath '/root/data/map.txt' into table map1;
select name, scores[ 'math' ] from map1;
select name, nvl( scores[ 'chinese' ] , 0 ) + nvl( scores[ 'math' ] , 0 ) + nvl( scores[ 'english' ] , 0 ) + nvl( scores[ 'nature' ] , 0 ) ` total` from map1;
select name, size( scores) from map1;
zhangsan chinese 78
zhangsan math 89
zhangsan english 92
zhangsan nature 96
lisi chinese 67
lisi math 75
lisi english 83
lisi nature 94
王五 english 23
王五 nature 12
select explode( scores) as ( course, score) from map1;
+
| course | score |
+
| chinese | 78 |
| math | 89 |
| english | 92 |
| nature | 96 |
| chinese | 67 |
| math | 75 |
| english | 83 |
| nature | 94 |
| english | 23 |
| nature | 12 |
+
select name, course, score from map1 lateral view explode( scores) t as course, score
+
| name | course | score |
+
| zhangsan | chinese | 78 |
| zhangsan | math | 89 |
| zhangsan | english | 92 |
| zhangsan | nature | 96 |
| lisi | chinese | 67 |
| lisi | math | 75 |
| lisi | english | 83 |
| lisi | nature | 94 |
| 王五 | english | 23 |
| 王五 | nature | 12 |
+
将数据动态保存到map字段里的应用
需求:将以下的表数据
+
| name | course | score |
+
| zhangsan | chinese | 78 |
| zhangsan | math | 89 |
| zhangsan | english | 92 |
| zhangsan | nature | 96 |
| lisi | chinese | 67 |
| lisi | math | 75 |
| lisi | english | 83 |
| lisi | nature | 94 |
| 王五 | english | 23 |
| 王五 | nature | 12 |
+
转成以下的形式
+
| map1. name | map1. scores |
+
| zhangsan | {"chinese" :78 , "math" :89 , "english" :92 , "nature" :96 } |
| lisi | {"chinese" :67 , "math" :75 , "english" :83 , "nature" :94 } |
| 王五 | {"english" :23 , "nature" :12 } |
+
create table map2
as
select name, course, score from map1 lateral view explode( scores) t as course, score;
+
| name | course | score |
+
| zhangsan | chinese | 78 |
| zhangsan | math | 89 |
| zhangsan | english | 92 |
| zhangsan | nature | 96 |
| lisi | chinese | 67 |
| lisi | math | 75 |
| lisi | english | 83 |
| lisi | nature | 94 |
| 王五 | english | 23 |
| 王五 | nature | 12 |
+
select name, concat( course, ":" , score) ` result` from map2;
+
| name | result |
+
| zhangsan | chinese:78 |
| zhangsan | math:89 |
| zhangsan | english:92 |
| zhangsan | nature:96 |
| lisi | chinese:67 |
| lisi | math:75 |
| lisi | english:83 |
| lisi | nature:94 |
| 王五 | english:23 |
| 王五 | nature:12 |
+
select name, collect_set( concat( course, ":" , score) ) from map2 group by name
+
| name | c1 |
+
| lisi | [ "chinese:67" , "math:75" , "english:83" , "nature:94" ] |
| zhangsan | [ "chinese:78" , "math:89" , "english:92" , "nature:96" ] |
| 王五 | [ "english:23" , "nature:12" ] |
+
select name, concat_ws( "," , collect_set( concat( course, ":" , score) ) ) from map2 group by name;
+
| name | c1 |
+
| lisi | chinese:67 , math:75 , english:83 , nature:94 |
| zhangsan | chinese:78 , math:89 , english:92 , nature:96 |
| 王五 | english:23 , nature:12 |
+
select name, str_to_map( concat_ws( "," , collect_set( concat( course, ":" , score) ) ) ) from map2 group by name;
create table map3
as
select name, str_to_map( concat_ws( "," , collect_set( concat( course, ":" , score) ) ) ) scores from map2 group by name;
desc map3;
OK
+
| col_name | data_type | comment |
+
| name | string | |
| scores | map< string, string> | |
+
select name, scores[ 'english' ] + scores[ 'nature' ] from map3;
OK
+
| name | c1 |
+
| lisi | 177.0 |
| zhangsan | 188.0 |
| 王五 | 35.0 |
+