6.hive基础使用

一、验证hive元数据

1. hive

show databases;

[root@hadoop sbin]# hive
...
hive> show databases;
OK
default
school
Time taken: 3.881 seconds, Fetched: 2 row(s)
hive> 

2. MySQL

mysql -uroot -p

mysql> show databases;
+--------------------+
| Database           |
+--------------------+
| information_schema |
| hive               |
| mysql              |
| performance_schema |
| sys                |
+--------------------+
5 rows in set (0.00 sec)

mysql> use hive;
Reading table information for completion of table and column names
You can turn off this feature to get a quicker startup with -A

Database changed
mysql> show tables;
+---------------------------+
| Tables_in_hive            |
+---------------------------+
| AUX_TABLE                 |
| BUCKETING_COLS            |
| CDS                       |
| COLUMNS_V2                |
| COMPACTION_QUEUE          |
| COMPLETED_COMPACTIONS     |
| COMPLETED_TXN_COMPONENTS  |
| DATABASE_PARAMS           |
| DBS                       |
| DB_PRIVS                  |
| DELEGATION_TOKENS         |
| FUNCS                     |
| FUNC_RU                   |
| GLOBAL_PRIVS              |
| HIVE_LOCKS                |
| IDXS                      |
| INDEX_PARAMS              |
| KEY_CONSTRAINTS           |
| MASTER_KEYS               |
| NEXT_COMPACTION_QUEUE_ID  |
| NEXT_LOCK_ID              |
| NEXT_TXN_ID               |
| NOTIFICATION_LOG          |
| NOTIFICATION_SEQUENCE     |
| NUCLEUS_TABLES            |
| PARTITIONS                |
| PARTITION_EVENTS          |
| PARTITION_KEYS            |
| PARTITION_KEY_VALS        |
| PARTITION_PARAMS          |
| PART_COL_PRIVS            |
| PART_COL_STATS            |
| PART_PRIVS                |
| ROLES                     |
| ROLE_MAP                  |
| SDS                       |
| SD_PARAMS                 |
| SEQUENCE_TABLE            |
| SERDES                    |
| SERDE_PARAMS              |
| SKEWED_COL_NAMES          |
| SKEWED_COL_VALUE_LOC_MAP  |
| SKEWED_STRING_LIST        |
| SKEWED_STRING_LIST_VALUES |
| SKEWED_VALUES             |
| SORT_COLS                 |
| TABLE_PARAMS              |
| TAB_COL_STATS             |
| TBLS                      |
| TBL_COL_PRIVS             |
| TBL_PRIVS                 |
| TXNS                      |
| TXN_COMPONENTS            |
| TYPES                     |
| TYPE_FIELDS               |
| VERSION                   |
| WRITE_SET                 |
+---------------------------+
57 rows in set (0.00 sec)

二、hive内部命令使用

1、基础操作

  • 进入hive后,并不需要退出,也可以使用bash shell;命令前加上!后边带上;
hive> !pwd;
/opt/hadoop/sbin
hive> !cd /root/;
hive> !pwd;
/opt/hadoop/sbin
hive> 
  • 查看数据库HDFS分布式文件系统根目录下文件
    MySQL查看
mysql> select DB_LOCATION_URI, NAME, OWNER_NAME from DBS;
+--------------------------------------------------+---------+------------+
| DB_LOCATION_URI                                  | NAME    | OWNER_NAME |
+--------------------------------------------------+---------+------------+
| hdfs://hadoop:8020/user/hive/warehouse           | default | public     |
| hdfs://hadoop:8020/user/hive/warehouse/school.db | school  | root       |
+--------------------------------------------------+---------+------------+
2 rows in set (0.00 sec)

hive使用dfs命令查看

hive> dfs -ls hdfs://hadoop:8020/;
Found 2 items
drwx-wx-wx   - root supergroup          0 2023-11-20 21:16 hdfs://hadoop:8020/tmp
drwxr-xr-x   - root supergroup          0 2023-11-20 22:28 hdfs://hadoop:8020/user

2、hive基本数据类型

  • hive创建一个school库,创建students表

create database if not exists school;
create table school.students(id bigint, name string, score double, age int) row format delimited fields terminated by ',';

hive> create database if not exists school;
OK
Time taken: 0.024 seconds
hive> use school;
OK
Time taken: 0.027 seconds
hive> create table school.students(id bigint, name string, score double, age int) row format delimited fields terminated by ',';
OK
Time taken: 0.092 seconds

hive> show tables;
OK
students
test
Time taken: 0.027 seconds, Fetched: 2 row(s)
  • 构造数据,创建文件写入下列内容
[root@hadoop ~]# vim student.txt
[root@hadoop ~]# cat student.txt 
10010,li,100,23
10087,zgs,90,22
10086,ming,98,21

load data local inpath '/root/s.txt' into table students;

hive> load data local inpath '/root/student.txt' into table students;
Loading data to table school.students
OK
Time taken: 1.552 seconds
hive> select * from students;
OK
10010   li      100.0   23
10087   zgs     90.0    22
10086   ming    98.0    21
NULL    NULL    NULL    NULL
Time taken: 1.39 seconds, Fetched: 4 row(s)

3. hive集合数据类型

  • 1、STRUCT数据类型

create database enterprise;

use enterprise;

create table enterprise.employee(id int, info struct<name:string, age:int>) row format delimited fields terminated by ',' collection items terminated by ':';

load data local inpath '/root/e.txt' into table employee;

select * from employee;

  • 准备数据文件
[root@hadoop ~]# vim e.txt
[root@hadoop ~]# cat e.txt 
10001,zhangsan:22
10010,lilili:24
10011,wuyi:21
  • hive创建表导入数据
hive> create database enterprise;
OK
Time taken: 0.132 seconds

hive> show databases;
OK
default
enterprise
school
Time taken: 0.055 seconds, Fetched: 3 row(s)
hive> use enterprise;
OK
Time taken: 0.023 seconds
hive> create table enterprise.employee(id int, info struct<name:string, age:int>) row format delimited fields terminated by ',' collection items terminated by ':';
OK
Time taken: 0.116 seconds
hive> load data local inpath '/root/e.txt' into table employee;
Loading data to table enterprise.employee
OK
Time taken: 0.464 seconds
hive> select * from employee;
OK
10001   {"name":"zhangsan","age":22}
10010   {"name":"lilili","age":24}
10011   {"name":"wuyi","age":21}
Time taken: 0.106 seconds, Fetched: 3 row(s)
  • 2、map数据类型
  • 准备数据
[root@hadoop ~]# vim e1.txt
[root@hadoop ~]# cat e1.txt 
12345   job:80,team:90,person:50
12344   job:60,team:70
12343   job:90,team:80,person:100
  • hive数据表创建,导入数据
hive> use enterprise;
OK
Time taken: 3.407 seconds
hive> create table enterprise.employee1(id int, perf map<string, int>) row format delimited fields terminated by '\t' collection items terminated by ',' map keys terminated by ':';
OK
Time taken: 0.607 seconds
hive> show tables;
OK
employee
employee1
Time taken: 0.09 seconds, Fetched: 2 row(s)
hive> load data local inpath '/root/e1.txt' into table employee1;
Loading data to table enterprise.employee1
OK
Time taken: 1.099 seconds
hive> select * from employee1;
OK
12345   {"job":80,"team":90,"person":50}
NULL    NULL
12343   {"job":90,"team":80,"person":100}
Time taken: 1.161 seconds, Fetched: 3 row(s)hive> select perf['team'] from employee1;
OK
90
NULL
80
Time taken: 0.357 seconds, Fetched: 3 row(s)
  • 3、ARRAY数据类型
  • 准备数据
[root@hadoop ~]# cat e2.txt 
zhangsan,123321:231312:312123
lisi,978798:798987:798798
wanger,210120:120012:120210
  • 创建表导入数据

create table enterprise.employee2(name string, emp_id_list array<int>) row format delimited fields terminated by ',' collection items terminated by ':' ;

load data local inpath '/root/e2.txt' into table employee2;

hive> use enterprise;
OK
Time taken: 0.035 seconds
hive> create table enterprise.employee2(name string, emp_id_list array<int>) row format delimited fields terminated by ',' collection items terminated by ':' ;
OK
Time taken: 0.05 seconds
hive> load data local inpath '/root/e2.txt' into table employee2;
Loading data to table enterprise.employee2
OK
Time taken: 0.321 seconds
hive> select * from employee2;
OK
zhangsan        [123321,231312,312123]
lisi    [978798,798987,798798]
wanger  [210120,120012,120210]
Time taken: 0.13 seconds, Fetched: 3 row(s)
hive> select emp_id_list[0] from employee2;
OK
123321
978798
210120
Time taken: 0.359 seconds, Fetched: 3 row(s)

三、综合应用

1、创建三类数据的表

create table enterprise.employee3(name string, salary double, subordinates array<string>, deductions map<string,float>, address struct<street:string,city:string, state:string, zip:int>) row format delimited fields terminated by '\t' collection items terminated by ',' map keys terminated by ':';
hive> create table enterprise.employee3(name string, salary double, subordinates array<string>, deductions map<string,float>, address struct<street:string,city:string, state:string, zip:int>) row format delimited fields terminated by '\t' collection items terminated by ',' map keys terminated by ':';
OK
Time taken: 0.126 seconds
hive> show tables;
OK
employee
employee1
employee2
employee3
Time taken: 0.074 seconds, Fetched: 4 row(s)
hive> desc employee3;
OK
name                    string                                      
salary                  double                                      
subordinates            array<string>                               
deductions              map<string,float>                           
address                 struct<street:string,city:string,state:string,zip:int>                      
Time taken: 0.064 seconds, Fetched: 5 row(s)

2、导入数据

[root@hadoop ~]# vim e3.txt 
[root@hadoop ~]# cat e3.txt 
zs      8000    li1,li2,li3     cd:30,zt:50,sw:100      nanjingdajie,NJ,ziyou,10012
lis     9000    w1,w2,w3        cd:10,zt:40,sw:33       anhualu,BJ,ziiyou2,10021
hive> load data local inpath '/root/e3.txt' into table employee3;
Loading data to table enterprise.employee3
OK
Time taken: 0.281 seconds
hive> select * from employee3;
OK
zs      8000.0  ["li1","li2","li3"]     {"cd":30.0,"zt":50.0,"sw":100.0}        {"street":"nanjingdajie","city":"NJ","state":"ziyou","zip":10012}
lis     9000.0  ["w1","w2","w3"]        {"cd":10.0,"zt":40.0,"sw":33.0} {"street":"anhualu","city":"BJ","state":"ziiyou2","zip":10021}
Time taken: 0.162 seconds, Fetched: 2 row(s)
hive> select subordinates[0], deductions['cd'], address.city from enterprise.employee3;
OK
li1     30.0    NJ
w1      10.0    BJ
Time taken: 0.421 seconds, Fetched: 2 row(s)
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值