1、Array数据类型的使用
1.1、创建数据库表,以array作为数据类型
1
2
3
4
|
hive (hive_demo1)> create table stu_test(name array<string>,phone array<string>)
> row format delimited fields terminated by
'\t'
> collection items terminated by
','
;
OK
|
1.2、在/opt/datas/test目录下创建stu_info.txt文件,并将文件内容导入hive的stu_test表中
1
2
3
4
5
6
7
8
|
[liupeng
@tonyliu
test]$ pwd
/opt/datas/test
[liupeng
@tonyliu
test]$ ls
person.txt stu_info.txt
[liupeng
@tonyliu
test]$ more stu_info.txt
//创建数据并查看
小明,小王,小张
15975319964
,
18665851264
,
13278659963
tony, tom,jack
18677549911
,
15923458765
,
18665851989
[liupeng
@tonyliu
test]$
|
1
|
<span style=
"color: #99cc00"
> </span>
|
1
2
3
4
5
6
7
|
hive (hive_demo1)> load data local inpath
'/opt/datas/test/stu_info.txt'
into table stu_test;
//load数据到stu_test表中
Copying data from file:/opt/datas/test/stu_info.txt
Copying file: file:/opt/datas/test/stu_info.txt
Loading data to table hive_demo1.stu_test
Table hive_demo1.stu_test stats: [numFiles=
1
, numRows=
0
, totalSize=
108
, rawDataSize=
0
]
OK
Time taken:
0.439
seconds
|
1.3、查询stu_info表
1
2
3
4
5
6
|
hive (hive_demo1)> select * from stu_test;
//查看stu_test所有数据
OK
stu_test.name stu_test.phone
[
"小明"
,
"小王"
,
"小张"
] [
"15975319964"
,
"18665851264"
,
"13278659963"
]
[
"tony"
,
" tom"
,
"jack"
] [
"18677549911"
,
"15923458765"
,
"18665851989"
]
Time taken:
0.057
seconds, Fetched:
2
row(s)
|
1.4、查询stu_info表中array数据类型字段的指定列
1
2
3
4
5
6
|
hive (hive_demo1)> select name[
0
],phone[
0
] from stu_test;
//显示stu_info中的name,phone arraylist中的第一个元素
OK
_c0 _c1
小明
15975319964
tony
18677549911
Time taken:
0.117
seconds, Fetched:
2
row(s)
|
1.5、查询array数据类型字段的长度
1
2
3
4
5
6
7
8
9
10
11
12
13
|
hive (hive_demo1)> select name,size(phone) from stu_test;
//size()是用来判断长度的
OK
name _c1
[
"小明"
,
"小王"
,
"小张"
]
3
[
"tony"
,
" tom"
,
"jack"
]
3
Time taken:
0.071
seconds, Fetched:
2
row(s)
hive (hive_demo1)> select size(name),size(phone) from stu_test;
OK
_c0 _c1
3
3
3
3
Time taken:
0.08
seconds, Fetched:
2
row(s)
|
1.6、查询包含array数据类型字段指定列的一行数据
1
2
3
4
5
|
<br
class
=
"Apple-interchange-newline"
>hive (hive_demo1)> select name[
1
],phone[
1
] from stu_test where array_contains(name,
'小王'
);
//具体指定arraylist中第2个元素并指定符合条件的contains条目。
OK
_c0 _c1
小王
18665851264
Time taken:
0.079
seconds, Fetched:
1
row(s)
|
1.7、查看表结构
1
2
3
4
5
6
|
hive (hive_demo1)> desc stu_test;
OK
col_name data_type comment
name array<string>
phone array<string>
Time taken:
0.095
seconds, Fetched:
2
row(s)
|
2、Map数据类型的使用
2.1、创建表的同时使用Map数据类型
1
2
3
4
|
//创建 per_test表
hive (hive_demo1)> create table per_test(name string,info map<string,string>)
> row format delimited fields terminated by
'\t'
> collection items terminated by
'\073'
//因为我的数据字段分隔符中含有';',因为hdfs文件的的格式就是用分号隔开的。因此冲突情况下会报错。为了解决这个问题找到分号的asc码值 : <a href="http://blog.csdn.net/lxpbs8851/article/details/11525501" target="_blank">http://blog.csdn.net/lxpbs8851/article/details/11525501</a>
|
(其他字符有同样问题 也可以这样做) 找到的是073 那么将定义表的语句修改为:row format delimited fields terminated by '\073'
1
2
3
|
> map keys terminated by
':'
;
OK
Time taken:
0.09
seconds
|
2.2、在/opt/datas/test中编辑person.txt文件
1
2
3
4
5
6
7
8
9
|
[liupeng
@tonyliu
test]$ pwd
/opt/datas/test
[liupeng
@tonyliu
test]$ ls
person.txt stu_info.txt
[liupeng
@tonyliu
test]$ more person.txt
小明 年龄:
18
;身高:
1
米
8
;地址:北京
小红 年龄:
30
;身高:
1
米
72
;地址:上海
小李 年龄:
27
;身高:
1
米
90
;地址:深圳
[liupeng
@tonyliu
test]$
|
2.3、将person.txt文件中的数据导入hive中的per_test表中
1
2
3
4
5
6
7
|
hive (hive_demo1)> load data local inpath
'/opt/datas/test/person.txt'
into table per_test;
Copying data from file:/opt/datas/test/person.txt
Copying file: file:/opt/datas/test/person.txt
Loading data to table hive_demo1.per_test
Table hive_demo1.per_test stats: [numFiles=
1
, numRows=
0
, totalSize=
134
, rawDataSize=
0
]
OK
Time taken:
0.269
seconds
|
2.4、查询per_test表中全部数据
1
2
3
4
5
6
7
|
hive (hive_demo1)> select * from per_test;
OK
per_test.name per_test.info
小明 {
"年龄"
:
"18"
,
"身高"
:
"1米8"
,
"地址"
:
"北京"
}
小红 {
"年龄"
:
"30"
,
"身高"
:
"1米72"
,
"地址"
:
"上海"
}
小李 {
"年龄"
:
"27"
,
"身高"
:
"1米90"
,
"地址"
:
"深圳"
}
Time taken:
0.049
seconds, Fetched:
3
row(s)
|
2.5、查询per_test表中数据
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
|
//取per_test表中某个字段的值(name)
hive (hive_demo1)> select name from per_test;
OK
name
小明
小红
小李
Time taken:
0.062
seconds, Fetched:
3
row(s)
//取per_test表中某个字段的值(info)
//因为info在我们数据中有多个字段,中间是通过,号做了分割。因此直接取info的话会把所有字段返回。
hive (hive_demo1)> select info from per_test;
OK
info
{
"年龄"
:
"18"
,
"身高"
:
"1米8"
,
"地址"
:
"北京"
}
{
"年龄"
:
"30"
,
"身高"
:
"1米72"
,
"地址"
:
"上海"
}
{
"年龄"
:
"27"
,
"身高"
:
"1米90"
,
"地址"
:
"深圳"
}
Time taken:
0.039
seconds, Fetched:
3
row(s)
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
|
//也可以指定具体字段,以及字段中子字段的value进行输出。子字段是通过指定key的值来识别并输出value的
hive (hive_demo1)> select name,info[
'年龄'
]from per_info;
OK
name _c1
小明
18
小红
30
小李
27
Time taken:
0.049
seconds, Fetched:
3
row(s)
//同上,某个字段中也可以输出多个子字段的value值。通过指定key
hive (hive_demo1)> select name,info[
'年龄'
],info[
'身高'
],info[
'地址'
]from per_info;
OK
name _c1 _c2 _c3
小明
18
1
米
8
北京
小红
30
1
米
72
上海
小李
27
1
米
90
深圳
Time taken:
0.051
seconds, Fetched:
3
row(s)
|
3、Struct数据类型的使用
3.1、创建表的同时使用struct数据类型
1
2
3
4
5
6
|
hive (hive_demo1)> create table struct_info(
> id
int
,info struct<key:string,value:
int
>)
//info为字段标示名,struct<key,value> key指定子字段的键,value指定子字段对应键的值
> row format delimited fields terminated by
'.'
> collection items terminated by
':'
;
OK
Time taken:
0.125
seconds
|
3.2、创建stu_struct.txt文件,并将文件数据导入到hive的stu_struct表中
1
2
3
4
5
6
7
8
9
10
11
12
|
[liupeng
@tonyliu
test]$ pwd
/opt/datas/test
[liupeng
@tonyliu
test]$ ls
person_map.txt stu_list.txt stu_struct.txt
[liupeng
@tonyliu
test]$ more stu_struct.txt
1
.小明:
90
2
.小红:
100
3
.小方:
70
4
.小白:
50
5
.小兰:
60
6
.小花:
85
[liupeng
@tonyliu
test]$
|
1
2
3
4
5
6
7
|
hive (hive_demo1)> load data local inpath
'/opt/datas/test/stu_struct.txt'
into table struct_info;
Copying data from file:/opt/datas/test/stu_struct.txt
Copying file: file:/opt/datas/test/stu_struct.txt
Loading data to table hive_demo1.struct_info
Table hive_demo1.struct_info stats: [numFiles=
1
, numRows=
0
, totalSize=
73
, rawDataSize=
0
]
OK
Time taken:
0.256
seconds
|
3.3、查询struct_info表中数据(全部查询,部分查询及子元素的查询)
(1) 显示全表
1
2
3
4
5
6
7
8
9
10
|
hive (hive_demo1)> select * from struct_info;
OK
struct_info.id struct_info.info
1
{
"key"
:
"小明"
,
"value"
:
90
}
2
{
"key"
:
"小红"
,
"value"
:
100
}
3
{
"key"
:
"小方"
,
"value"
:
70
}
4
{
"key"
:
"小白"
,
"value"
:
50
}
5
{
"key"
:
"小兰"
,
"value"
:
60
}
6
{
"key"
:
"小花"
,
"value"
:
85
}
Time taken:
0.059
seconds, Fetched:
6
row(s)
|
(2) 显示表中字段
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
|
hive (hive_demo1)> select id from struct_info;
//显示id 这个字段的信息
OK
id
1
2
3
4
5
6
Time taken:
0.065
seconds, Fetched:
6
row(s)
hive (hive_demo1)> select info from struct_info;
//显示info这个字段的信息
OK
info
{
"key"
:
"小明"
,
"value"
:
90
}
{
"key"
:
"小红"
,
"value"
:
100
}
{
"key"
:
"小方"
,
"value"
:
70
}
{
"key"
:
"小白"
,
"value"
:
50
}
{
"key"
:
"小兰"
,
"value"
:
60
}
{
"key"
:
"小花"
,
"value"
:
85
}
Time taken:
0.056
seconds, Fetched:
6
row(s)<em id=
"__mceDel"
style=
"font-size: 1.17em; background-color: #ffffff; font-family: "PingFang SC", "Helvetica Neue", Helvetica, Arial, sans-serif"
> </em>
|
(3) 显示子字段key与value的字段信息
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
|
hive (hive_demo1)> select info.key from struct_info;
//显示key的信息
OK
key
小明
小红
小方
小白
小兰
小花
Time taken:
0.063
seconds, Fetched:
6
row(s)<br>
hive (hive_demo1)> select info.value from struct_info;
//显示value的信息
OK
value
90
100
70
50
60
85
Time taken:
0.056
seconds, Fetched:
6
row(s)
|
(4) 通过where条件语句过滤出指定显示的语句
1
2
3
4
5
|
hive (hive_demo1)> select id,info from struct_info where id=
1
;
//加上where条件语句显示其中1条指定信息
OK
id info
1
{
"key"
:
"小明"
,
"value"
:
90
}
Time taken:
0.112
seconds, Fetched:
1
row(s)
|
(5) 选择value作为范围取指定key的值
1
2
3
4
5
|
hive (hive_demo1)> select info from struct_info where info.key=
'小明'
;
OK
info
{
"key"
:
"小明"
,
"value"
:
90
}
Time taken:
0.042
seconds, Fetched:
1
row(s)
|