Hive基础语法
HiveServer和beeline(HiveServer2)的区别:
- hive不需要启动服务再访问
- beeline需要启动服务端,在访问客户端
- beeline在查询效率上比hive高,beeline不支持update和delete
hive:
-
连接方式
hive -h hostname -p port
-
加载文件
-
将本地文件上传到指定的存储路径中表就可以读取到数据(需要和表结构匹配)
-
使用load命令
load data local inpath 'local_dir' into table table_name;
-
-
库级操作
- 同mysql
-
表级操作
- 默认创建的是内部表,会默认在指定的存储空间中建立对应文件夹
- 只要把文件放入,表就可以读取到数据(需要和表结构匹配)
- 分区表,会在表下创建文件夹,数据在各分区文件夹下
-
行级操作
- 同mysql
关于各种建表语句
文件employee.txt
Michael|Montreal,Toronto|Male,30|DB:80|Product:DeveloperLead
Will|Montreal|Male,35|Perl:85|Product:Lead,Test:Lead
Shelley|New York|Female,27|Python:80|Test:Lead,COE:Architect
Lucy|Vancouver|Female,57|Sales:89,HR:94|Sales:Lead
文件employee_id.txt
Michael|100|Montreal,Toronto|Male,30|DB:80|Product:DeveloperLead
Will|101|Montreal|Male,35|Perl:85|Product:Lead,Test:Lead
Steven|102|New York|Female,27|Python:80|Test:Lead,COE:Architect
Lucy|103|Vancouver|Female,57|Sales:89,HR:94|Sales:Lead
Mike|104|Montreal|Male,35|Perl:85|Product:Lead,Test:Lead
Shelley|105|New York|Female,27|Python:80|Test:Lead,COE:Architect
Luly|106|Vancouver|Female,57|Sales:89,HR:94|Sales:Lead
Lily|107|Montreal|Male,35|Perl:85|Product:Lead,Test:Lead
Shell|108|New York|Female,27|Python:80|Test:Lead,COE:Architect
Mich|109|Vancouver|Female,57|Sales:89,HR:94|Sales:Lead
Dayong|110|Montreal|Male,35|Perl:85|Product:Lead,Test:Lead
Sara|111|New York|Female,27|Python:80|Test:Lead,COE:Architect
Roman|112|Vancouver|Female,57|Sales:89,HR:94|Sales:Lead
Christine|113|Montreal|Male,35|Perl:85|Product:Lead,Test:Lead
Eman|114|New York|Female,27|Python:80|Test:Lead,COE:Architect
Alex|115|Vancouver|Female,57|Sales:89,HR:94|Sales:Lead
Alan|116|Montreal|Male,35|Perl:85|Product:Lead,Test:Lead
Andy|117|New York|Female,27|Python:80|Test:Lead,COE:Architect
Ryan|118|Vancouver|Female,57|Sales:89,HR:94|Sales:Lead
Rome|119|Montreal|Male,35|Perl:85|Product:Lead,Test:Lead
Lym|120|New York|Female,27|Python:80|Test:Lead,COE:Architect
Linm|121|Vancouver|Female,57|Sales:89,HR:94|Sales:Lead
Dach|122|Montreal|Male,35|Perl:85|Product:Lead,Test:Lead
Ilon|123|New York|Female,27|Python:80|Test:Lead,COE:Architect
Elaine|124|Vancouver|Female,57|Sales:89,HR:94|Sales:Lead
创建库hivetest:
create database hivetest;
切换到库hivetest:
use hivetest;
创建表employee:(字段类型)
create table employee( #创建名为employee的表
name string, #字段名
address array<string>, #字段类型为array
personalInfo array<string>,
technol map<string,int>, #字段类型为map
jobs map<string,string>)
row format delimited #对表格的分割符进行格式化
fields terminated by '|' #设置列的分割符
collection items terminated by ',' #设置array的分割符
map keys terminated by ':' #设置map的分割符
lines terminated by '\n'; #设置行的分割符
创建表employee_id:
create table employee_id(
name string,
id int,
address array<string>,
info struct<sex:string,age:int>, #字段类型为struct
technol map<string,int>,
jobs map<string,string>)
row format delimited
fields terminated by '|'
collection items terminated by ','
map keys terminated by ':'
lines terminated by '\n';
创建分区表employee_partition:
create table employee_partition(
name string,
address array<string>,
info struct<gender:string,age:int>,
technol map<string,int>,
jobs map<string,string>)
partitioned by (country string,add string) #分区类型自定义
row format delimited
fields terminated by '|'
collection items terminated by ','
map keys terminated by ':'
lines terminated by '\n';
创建外部表emp_id:
create external table emp_id(
name string,
id int,
address array<string>,
info struct<sex:string,age:int>,
technol map<string,int>,
jobs map<string,string>)
row format delimited
fields terminated by '|'
collection items terminated by ','
map keys terminated by ':'
lines terminated by '\n'
stored as textfile #指定要存储在hdfs上的位置路径要提前创建好
location '/usr/test/employee';
创建分通表emp_bucket:
create external table emp_bucket(
name string,
id int,
address array<string>,
info struct<sex:string,age:int>,
technol map<string,int>,
jobs map<string,string>)
clustered by(id) into 3 buckets
row format delimited
fields terminated by '|'
collection items terminated by ','
map keys terminated by ':'
lines terminated by '\n'
stored as textfile
location '/usr/test/bucket';
set hive.enfore.bucketing=true; #是否进行分桶