Hive的基础语法
1. DDL
1.1 DataBase
CREATE DATABASE [ IF NOT EXISTS ] database_name
[ COMMENT database_comment]
[ LOCATION hdfs_path]
[ WITH DBPROPERTIES ( property_name= property_value, . . . ) ] ;
e. g.
create database if not exists hive
comment 'the 1st database demo'
localtion 'hdfs://192.168.10.101//user/hive/warehouse'
with dbproperties( 'name' = 'qwer' ) ;
create database if not exists hive2
comment 'the 2nd database demo'
with dbproperties( 'name' = 'rocklee' ) ;
drop database [ if exists ] database_name [ cascsde] ;
alter database database_name set dbproperties
( property_name= property_value, . . . )
alter ( database ) database_name setowner [ user | role] user_or_role;
alter ( database ) database_name set location hdfs_path;
use database_name;
1.2 Table
1.2.1 建表
create table if not exists ` hive` . ` t_usr` (
` id` int comment 'user id' ,
` name` string comment 'user name' ,
` age` int comment 'user age'
)
comment 'user info'
row format delimited
fields terminated by ','
stored as textfile;
load data local inpath '/root/user.txt' into table ` hive` . ` t_user` ;
create external table if not exists ` hive` . ` t_user2` (
` id` int comment 'user id' ,
` name` string comment 'username' ,
` age` int comment 'user age'
)
comment 'user info'
row format delimited
fields terminated by ','
stored as textfile
location '/t_user2' ;
load data local inpath '/root/user.txt' into table ` hive` . ` t_user2` ;
create temporary table if not exists ` hive` . ` t_user3` (
` id` int comment 'userid' ,
` name` string comment 'username' ,
` age` int comment 'user age'
) comment 'user info'
row format delimited
fields terminated by ','
stored as textfile;
load data local inpath '/home/user.txt' into table ` hive` . ` t_user3` ;
create table if not exists ` hive` . ` t_user2` as select * from ` hive` . ` t_user` ;
create external table if not exists ` hive` . ` t_user3` like ` hive` . ` t_user` location '/user/hive/warehouse/hive.db/t_user' ;
1.2.2 修改表
alter table table_name rename to new_table_name;
alter table table_name set tblproperties( 'comment' = new_comment) ;
1.2.3 修改列
alter table table_name change [ column ] col_old_name col_new_name column_type [ comment col_comment] [ first | after coumn_name] [ cascsde] ;
e. g.
alter table ` hive` . ` t_1` change column ` id` ` uid` string comment 'this is uid' ;
alter table table_name add | replace columns ( col_name data_type [ comment col_comment] , . . . ) [ cascade ]
e. g.
alter table ` hive` . ` t_1` add colums ( ` salary` double , ` common` double ) ;
alter table ` hive` . ` t_1` replace columns ( ` salary` double , ` common` double ) ;
2. DML
2.1 加载数据
LOAD DATA [ LOCAL ] INPATH 'filepath' [ OVERWRITE] INTO TABLE tablename [ PARTITION ( partcol1= val1, partcol2= val2 . . . ) ]
e. g.
load data local inpath '/home/user.txt' into table ` t_1` ;
load data inpath '/user.txt' into table ` t_1` ;
insert into t_user3 values ( 4 , 'narudo' , 18 ) ;
INSERT OVERWRITE TABLE tablename1 [ PARTITION ( partcol1= val1, partcol2= val2 . . . ) [ IF NOT EXISTS ] ]
select_statement1 FROM from_statement;
INSERT INTO TABLE tablename1 [ PARTITION ( partcol1= val1, partcol2= val2 . . . ) ] select_statement1 FROM from_statement;
e. g.
create table ` t_user4` like ` t_user3` ;
insert overwrite table ` t_user4` if not exists select * from t_user3;
2.2 多数据插入
FROM from_statement
INSERT OVERWRITE TABLE tablename1 [ PARTITION ( partcol1= val1, partcol2= val2 . . . ) [ IF NOT EXISTS ] ] select_statement1
[ INSERT OVERWRITE TABLE tablename2 [ PARTITION . . . [ IF NOT EXISTS ] ] select_statement2]
[ INSERT INTO TABLE tablename2 [ PARTITION . . . ] select_statement2] . . . ;
e. g.
create table ` t_user5` like ` t_user3` ;
create table ` t_user6` like ` t_user3` ;
from ` t_user3`
insert overwrite table ` t_user5` if not exists select *
insert overwrite table ` t_user6` if not exists select * ;
2.3 写数据
INSERT OVERWRITE [ LOCAL ] DIRECTORY directory1
[ ROW FORMAT row_format] [ STORED AS file_format]
SELECT . . . FROM . . .
e. g.
INSERT OVERWRITE DIRECTORY '/write'
ROW FORMAT DELIMITED
SELECT * FROM ` hive` . ` t_user5` ;
2.4 Join
2.4.1 语法
SELECT [ ALL | DISTINCT ] select_expr, select_expr, . . .
FROM table_reference
[ WHERE where_condition]
[ GROUP BY col_list]
[ ORDER BY col_list]
[ CLUSTER BY col_list
| [ DISTRIBUTE BY col_list] [ SORT BY col_list]
]
[ LIMIT [ offset , ] rows ]
[ UNION / UNION ALL ]
join_table:
table_reference [ INNER ] JOIN table_factor [ join_condition]
| table_reference {LEFT | RIGHT | FULL } [ OUTER ] JOIN table_reference join_condition
| table_reference LEFT SEMI JOIN table_reference join_condition
| table_reference CROSS JOIN table_reference [ join_condition]
2.4.2 数据
1 , lixi, 1
2 , zhangshuai, 2
3 , cuikai, 3
4 , xiaojianxiong, 4
6 , ranldo, 10
1 , Java
2 , Bigdata
3 , UI
4 , HTML5
5 , Python
2.4.3 测试
create table if not exists t_emp(
eid int ,
ename string,
did int
)
row format delimited
fields terminated by ','
;
create table if not exists t_dept(
did int ,
dname string
)
row format delimited
fields terminated by ','
;
load data local inpath '/home/emp.txt' into table t_emp;
load data local inpath '/home/dept.txt' into table t_dept;
select
e. eid,
e. ename,
d. dname
from
t_emp e
join
t_dept d
on
e. did = d. did
;
select
e. eid,
e. ename,
d. dname
from
t_emp e
left join
t_dept d
on
e. did = d. did
;
select
e. eid,
e. ename,
d. dname
from
t_emp e
right join
t_dept d
on
e. did = d. did
;
select
e. eid,
e. ename,
d. dname
from
t_emp e
full join
t_dept d
on
e. did = d. did
;
select
e. eid,
e. ename,
e. did
from
t_emp e
left semi join
t_dept d
on
e. did = d. did
;
2.5 group by
2.5.1 准备数据
1 , lixi, 1
2 , zhangshuai, 2
3 , cuikai, 2
4 , xiaojianxiong, 2
6 , ranldo, 1
1 , Java
2 , Bigdata
3 , UI
4 , HTML5
5 , Python
2.5.2 分组查询–每个部门的人数
SELECT
d. did,
COUNT ( * )
FROM
t_emp e
JOIN
t_dept d
ON
e. did = d. did
GROUP BY
d. did
;
SELECT
d. dname,
COUNT ( * )
FROM
t_emp e
JOIN
t_dept d
ON
e. did = d. did
group by
d. did, d. dname
;
tip:
我们发现在mysql中能够成功运行的sql 代码,在hive中的不能运行。这个分组的sql 中在hive里有一个要求,分组字段才能够做select 的字段被查询出来。非分组字段在hive中不能使用!!!
2.6 Order By 和Sort By的区别
- 局部排序,只保证单个的reduce内有序
e. g.
select * from a
distribute by ( id) sort by ( name) ;
- 全局有序,保证所有的reduce中的数据有序
e. g.
select * from a order by ( name) ;