常用语句,函数等快速回查
本篇未做任何知识点解析,单纯为了方便回忆和快速查询使用
对于本篇中提到的语句和函数若有疑问,请移步至(未完成)查看详解
启动
bin/hive
bin/hiveserver2
使用
bin/hive -e 'hive sql'
bin/beeline -u jdbc:hive2:localhost:10000 -n root
set设置
set hive.cli.print.current.db=true
set hive.cli.print.header=true
建库
create database 库名 location 'path' ;
删库
drop database 库名 ;
建表
create table 表名 (col type...) ;
查看建表语句
show create table 表名 ;
改表名
alter table 表名 rename to 新名 ;
加载数据
load data local inpath 'path' into table 表名 ;
hdfs dfs -put 'file' 'path'
向已有表添加列
alter table 表名 add columns (col type) ;
修改列 行的分隔符
row format delimited
fields terminated by '\t'
lines terminated by '\n'
复杂数据类型
array<type>
collection items terminated by ','
map<keyType,valueType>
collection items terminated by ','
map keys terminated byu ':'
struct<colName:valueType...>
collection items terminated by ','
建立外部表
create external table 表名 ;
内外部表装换
alter table 表名 set tblproperties('external'='true' | 'false') ;
建立分区表
create table 表名(...)partitioned by (分区名 类型 ...)
查看分区
show partitions 表名
上传数据
load .... partition(分区名=分区值 ...)
添加 删除分区
alter table 表名 add | drop partition(分区名=分区值...) location 'path' ;
桶表
create table 名(...)clustered by (列名) into 桶个数 buckets ;
向桶表中添加数据
insert into table 表名 select * from 表名
开启桶操作
set hive.enforce.bucketing=true ;
取指定桶
select * from 表名 tablesample(bucket 桶编号 out of 总桶树 on 列名) ;
排序
order by 全局排序,仅一个reduce
sort by 局部排序,多个reduce
distribute by 从map到reduce的分区规则 需配合sort by使用
cluster by = distribute by + sort by
去重
select count( distinct id ) from table ;
select count( * ) from ( select id from table group by id) tmp;
分区排序取topn
select * from (
select *, row_number() over(partition by sub order by score desc) as num from student_scores
) as s where s.num<=3 ;
行转列
select name, concat_ws(',',collection_list(favors)) as favor_list from student_favors group by name;
列转行
select name, favor_new from student_favors lateral view
explore( split(favor_list,',') ) table as favor_new ;
常用函数
round(double) floor(double) ceil(double) rand()(seed) exp(double)
log10(double) log2(double) log(base,a) pow(base,a) sqrt(double)
abs(double) bin(a) hex(a) conv(number,base,expert)
positive(a) negative(a)
unix_timestamp(now()) ('yyyy-MM-dd')
from_unixtime(string,'yyyy-MM-dd HH-mm-ss')
to_date(string) year(string) month(string) day(string)
hour(string) minute(string) secont(string) weekofyear(string)
datediff(end,start) date_add(date,days) date_sub(date,days)
if(condititon,trueValue,falseValue)
coalesce(v1,v2,v3...)
case a when b then ... when c then ... else ... end
length(string) reverse(string) concat(str1, str2 ... )
concat_ws(',',str1, str2 ...)
substr(string, start, length) substring(string,start, end)
upper(string) lower(string)
trim(string) ltrim(string) rtrim(string)
regexp_replace(string, regexp, str)
regexp_extract(string, regexp, index)
parse_url(urlstring,HOST|PATH|QUERY,KEY|PORT)
get_json_object(jsonstring,key)
space(n) repeat(n) split(string,',')
ascii(string) lpad(string,lenth,str) rpad(string,length,str)
find_in_set(string,strSet)
高级常用函数
count() sum() avg() min() max()
row_number() first_value() last_value()
lead() lag()
ntile()
cume_dist()
percent_rank()
grouping sets()
with cube
with rollup
over
over(partition by ... order by ... asc|desc
rows between ... and ...
)
eg:
unbounded preceding
unbounded following
current row
3 preceding
1 following