一、验证hive元数据
1. hive
show databases;
[root@hadoop sbin]# hive
...
hive> show databases;
OK
default
school
Time taken: 3.881 seconds, Fetched: 2 row(s)
hive>
2. MySQL
mysql -uroot -p
mysql> show databases;
+--------------------+
| Database |
+--------------------+
| information_schema |
| hive |
| mysql |
| performance_schema |
| sys |
+--------------------+
5 rows in set (0.00 sec)
mysql> use hive;
Reading table information for completion of table and column names
You can turn off this feature to get a quicker startup with -A
Database changed
mysql> show tables;
+---------------------------+
| Tables_in_hive |
+---------------------------+
| AUX_TABLE |
| BUCKETING_COLS |
| CDS |
| COLUMNS_V2 |
| COMPACTION_QUEUE |
| COMPLETED_COMPACTIONS |
| COMPLETED_TXN_COMPONENTS |
| DATABASE_PARAMS |
| DBS |
| DB_PRIVS |
| DELEGATION_TOKENS |
| FUNCS |
| FUNC_RU |
| GLOBAL_PRIVS |
| HIVE_LOCKS |
| IDXS |
| INDEX_PARAMS |
| KEY_CONSTRAINTS |
| MASTER_KEYS |
| NEXT_COMPACTION_QUEUE_ID |
| NEXT_LOCK_ID |
| NEXT_TXN_ID |
| NOTIFICATION_LOG |
| NOTIFICATION_SEQUENCE |
| NUCLEUS_TABLES |
| PARTITIONS |
| PARTITION_EVENTS |
| PARTITION_KEYS |
| PARTITION_KEY_VALS |
| PARTITION_PARAMS |
| PART_COL_PRIVS |
| PART_COL_STATS |
| PART_PRIVS |
| ROLES |
| ROLE_MAP |
| SDS |
| SD_PARAMS |
| SEQUENCE_TABLE |
| SERDES |
| SERDE_PARAMS |
| SKEWED_COL_NAMES |
| SKEWED_COL_VALUE_LOC_MAP |
| SKEWED_STRING_LIST |
| SKEWED_STRING_LIST_VALUES |
| SKEWED_VALUES |
| SORT_COLS |
| TABLE_PARAMS |
| TAB_COL_STATS |
| TBLS |
| TBL_COL_PRIVS |
| TBL_PRIVS |
| TXNS |
| TXN_COMPONENTS |
| TYPES |
| TYPE_FIELDS |
| VERSION |
| WRITE_SET |
+---------------------------+
57 rows in set (0.00 sec)
二、hive内部命令使用
1、基础操作
- 进入hive后,并不需要退出,也可以使用bash shell;命令前加上!后边带上;
hive> !pwd;
/opt/hadoop/sbin
hive> !cd /root/;
hive> !pwd;
/opt/hadoop/sbin
hive>
- 查看数据库HDFS分布式文件系统根目录下文件
MySQL查看
mysql> select DB_LOCATION_URI, NAME, OWNER_NAME from DBS;
+--------------------------------------------------+---------+------------+
| DB_LOCATION_URI | NAME | OWNER_NAME |
+--------------------------------------------------+---------+------------+
| hdfs://hadoop:8020/user/hive/warehouse | default | public |
| hdfs://hadoop:8020/user/hive/warehouse/school.db | school | root |
+--------------------------------------------------+---------+------------+
2 rows in set (0.00 sec)
hive使用dfs命令查看
hive> dfs -ls hdfs://hadoop:8020/;
Found 2 items
drwx-wx-wx - root supergroup 0 2023-11-20 21:16 hdfs://hadoop:8020/tmp
drwxr-xr-x - root supergroup 0 2023-11-20 22:28 hdfs://hadoop:8020/user
2、hive基本数据类型
- hive创建一个school库,创建students表
create database if not exists school;
create table school.students(id bigint, name string, score double, age int) row format delimited fields terminated by ',';
hive> create database if not exists school;
OK
Time taken: 0.024 seconds
hive> use school;
OK
Time taken: 0.027 seconds
hive> create table school.students(id bigint, name string, score double, age int) row format delimited fields terminated by ',';
OK
Time taken: 0.092 seconds
hive> show tables;
OK
students
test
Time taken: 0.027 seconds, Fetched: 2 row(s)
- 构造数据,创建文件写入下列内容
[root@hadoop ~]# vim student.txt
[root@hadoop ~]# cat student.txt
10010,li,100,23
10087,zgs,90,22
10086,ming,98,21
load data local inpath '/root/s.txt' into table students;
hive> load data local inpath '/root/student.txt' into table students;
Loading data to table school.students
OK
Time taken: 1.552 seconds
hive> select * from students;
OK
10010 li 100.0 23
10087 zgs 90.0 22
10086 ming 98.0 21
NULL NULL NULL NULL
Time taken: 1.39 seconds, Fetched: 4 row(s)
3. hive集合数据类型
- 1、STRUCT数据类型
create database enterprise;
use enterprise;
create table enterprise.employee(id int, info struct<name:string, age:int>) row format delimited fields terminated by ',' collection items terminated by ':';
load data local inpath '/root/e.txt' into table employee;
select * from employee;
- 准备数据文件
[root@hadoop ~]# vim e.txt
[root@hadoop ~]# cat e.txt
10001,zhangsan:22
10010,lilili:24
10011,wuyi:21
- hive创建表导入数据
hive> create database enterprise;
OK
Time taken: 0.132 seconds
hive> show databases;
OK
default
enterprise
school
Time taken: 0.055 seconds, Fetched: 3 row(s)
hive> use enterprise;
OK
Time taken: 0.023 seconds
hive> create table enterprise.employee(id int, info struct<name:string, age:int>) row format delimited fields terminated by ',' collection items terminated by ':';
OK
Time taken: 0.116 seconds
hive> load data local inpath '/root/e.txt' into table employee;
Loading data to table enterprise.employee
OK
Time taken: 0.464 seconds
hive> select * from employee;
OK
10001 {"name":"zhangsan","age":22}
10010 {"name":"lilili","age":24}
10011 {"name":"wuyi","age":21}
Time taken: 0.106 seconds, Fetched: 3 row(s)
- 2、map数据类型
- 准备数据
[root@hadoop ~]# vim e1.txt
[root@hadoop ~]# cat e1.txt
12345 job:80,team:90,person:50
12344 job:60,team:70
12343 job:90,team:80,person:100
- hive数据表创建,导入数据
hive> use enterprise;
OK
Time taken: 3.407 seconds
hive> create table enterprise.employee1(id int, perf map<string, int>) row format delimited fields terminated by '\t' collection items terminated by ',' map keys terminated by ':';
OK
Time taken: 0.607 seconds
hive> show tables;
OK
employee
employee1
Time taken: 0.09 seconds, Fetched: 2 row(s)
hive> load data local inpath '/root/e1.txt' into table employee1;
Loading data to table enterprise.employee1
OK
Time taken: 1.099 seconds
hive> select * from employee1;
OK
12345 {"job":80,"team":90,"person":50}
NULL NULL
12343 {"job":90,"team":80,"person":100}
Time taken: 1.161 seconds, Fetched: 3 row(s)hive> select perf['team'] from employee1;
OK
90
NULL
80
Time taken: 0.357 seconds, Fetched: 3 row(s)
- 3、ARRAY数据类型
- 准备数据
[root@hadoop ~]# cat e2.txt
zhangsan,123321:231312:312123
lisi,978798:798987:798798
wanger,210120:120012:120210
- 创建表导入数据
create table enterprise.employee2(name string, emp_id_list array<int>) row format delimited fields terminated by ',' collection items terminated by ':' ;
load data local inpath '/root/e2.txt' into table employee2;
hive> use enterprise;
OK
Time taken: 0.035 seconds
hive> create table enterprise.employee2(name string, emp_id_list array<int>) row format delimited fields terminated by ',' collection items terminated by ':' ;
OK
Time taken: 0.05 seconds
hive> load data local inpath '/root/e2.txt' into table employee2;
Loading data to table enterprise.employee2
OK
Time taken: 0.321 seconds
hive> select * from employee2;
OK
zhangsan [123321,231312,312123]
lisi [978798,798987,798798]
wanger [210120,120012,120210]
Time taken: 0.13 seconds, Fetched: 3 row(s)
hive> select emp_id_list[0] from employee2;
OK
123321
978798
210120
Time taken: 0.359 seconds, Fetched: 3 row(s)
三、综合应用
1、创建三类数据的表
create table enterprise.employee3(name string, salary double, subordinates array<string>, deductions map<string,float>, address struct<street:string,city:string, state:string, zip:int>) row format delimited fields terminated by '\t' collection items terminated by ',' map keys terminated by ':';
hive> create table enterprise.employee3(name string, salary double, subordinates array<string>, deductions map<string,float>, address struct<street:string,city:string, state:string, zip:int>) row format delimited fields terminated by '\t' collection items terminated by ',' map keys terminated by ':';
OK
Time taken: 0.126 seconds
hive> show tables;
OK
employee
employee1
employee2
employee3
Time taken: 0.074 seconds, Fetched: 4 row(s)
hive> desc employee3;
OK
name string
salary double
subordinates array<string>
deductions map<string,float>
address struct<street:string,city:string,state:string,zip:int>
Time taken: 0.064 seconds, Fetched: 5 row(s)
2、导入数据
[root@hadoop ~]# vim e3.txt
[root@hadoop ~]# cat e3.txt
zs 8000 li1,li2,li3 cd:30,zt:50,sw:100 nanjingdajie,NJ,ziyou,10012
lis 9000 w1,w2,w3 cd:10,zt:40,sw:33 anhualu,BJ,ziiyou2,10021
hive> load data local inpath '/root/e3.txt' into table employee3;
Loading data to table enterprise.employee3
OK
Time taken: 0.281 seconds
hive> select * from employee3;
OK
zs 8000.0 ["li1","li2","li3"] {"cd":30.0,"zt":50.0,"sw":100.0} {"street":"nanjingdajie","city":"NJ","state":"ziyou","zip":10012}
lis 9000.0 ["w1","w2","w3"] {"cd":10.0,"zt":40.0,"sw":33.0} {"street":"anhualu","city":"BJ","state":"ziiyou2","zip":10021}
Time taken: 0.162 seconds, Fetched: 2 row(s)
hive> select subordinates[0], deductions['cd'], address.city from enterprise.employee3;
OK
li1 30.0 NJ
w1 10.0 BJ
Time taken: 0.421 seconds, Fetched: 2 row(s)