Hive sql 常用语句，函数等备忘录，建议收藏

最新推荐文章于 2024-06-20 14:02:41 发布

Edge535

最新推荐文章于 2024-06-20 14:02:41 发布

阅读量398

点赞数 1

文章标签： hive 大数据 hadoop hdfs

本文链接：https://blog.csdn.net/GaoFeng535/article/details/110727033

版权

本篇未做任何知识点解析，单纯为了方便回忆和快速查询使用
对于本篇中提到的语句和函数若有疑问，请移步至（未完成）查看详解

启动

bin/hive
bin/hiveserver2

使用

bin/hive -e 'hive sql'
bin/beeline -u jdbc:hive2:localhost:10000 -n root

set设置

set hive.cli.print.current.db=true
set hive.cli.print.header=true

建库

create database 库名 location 'path' ;

删库

drop database 库名 ;

建表

create table 表名 (col type...) ;

查看建表语句

show create table 表名 ;

改表名

alter table 表名 rename to 新名 ;

加载数据

load data local inpath 'path' into table 表名 ;
hdfs dfs -put 'file' 'path'

向已有表添加列

alter table 表名 add columns (col type) ;

修改列行的分隔符

row format delimited
fields terminated by '\t'
lines terminated by '\n'

复杂数据类型

array<type>
collection items terminated by ','

map<keyType,valueType>
collection items terminated by ','
map keys terminated byu ':'

struct<colName:valueType...>
collection items terminated by ','

建立外部表

create external table 表名 ;

内外部表装换

alter table 表名 set tblproperties('external'='true' | 'false') ;

建立分区表

create table 表名(...)partitioned by (分区名 类型 ...)

查看分区

show partitions 表名

上传数据

load .... partition(分区名=分区值 ...)

添加删除分区

alter table 表名 add | drop partition(分区名=分区值...) location 'path' ;

桶表

create table 名(...)clustered by (列名) into 桶个数 buckets ;

向桶表中添加数据

insert into table 表名 select * from 表名

开启桶操作

set hive.enforce.bucketing=true ;

取指定桶

select * from 表名 tablesample(bucket 桶编号 out of 总桶树 on 列名) ;

排序

order by 全局排序，仅一个reduce
sort by 局部排序，多个reduce
distribute by 从map到reduce的分区规则 需配合sort by使用
cluster by = distribute by + sort by

去重

select count( distinct id ) from table ;
select count( * ) from ( select id from table group by id) tmp;

分区排序取topn

select * from (
 select *, row_number() over(partition by sub order by score desc) as num from student_scores
) as s where s.num<=3 ;

行转列

select name, concat_ws(',',collection_list(favors)) as favor_list from student_favors group by name;

列转行

select name, favor_new from student_favors lateral view 
explore( split(favor_list,',') ) table as favor_new ;

常用函数

round(double)	  floor(double)	  ceil(double)	  rand()(seed)	  exp(double)
log10(double)	  log2(double)	  log(base,a)	  pow(base,a)	  sqrt(double)
abs(double)	  bin(a)	 hex(a) 	conv(number,base,expert) 	
positive(a) 	negative(a)

unix_timestamp(now()) ('yyyy-MM-dd')
from_unixtime(string,'yyyy-MM-dd HH-mm-ss')
to_date(string) 	year(string) 	month(string) 	day(string) 	
hour(string) 	minute(string) 		secont(string) 	weekofyear(string)
datediff(end,start) 	date_add(date,days) 	date_sub(date,days)

if(condititon,trueValue,falseValue)
coalesce(v1,v2,v3...)
case a when b then ... when c then ... else ... end

length(string) 	reverse(string) 	concat(str1, str2 ... )
concat_ws(',',str1, str2 ...) 	
substr(string, start, length) 	substring(string,start, end)
upper(string) 	lower(string) 	
trim(string) 	ltrim(string) 	rtrim(string)
regexp_replace(string, regexp, str)
regexp_extract(string, regexp, index)

parse_url(urlstring,HOST|PATH|QUERY,KEY|PORT)
get_json_object(jsonstring,key)

space(n) 	repeat(n) 	split(string,',') 
ascii(string) 	lpad(string,lenth,str) 	rpad(string,length,str)
find_in_set(string,strSet)

高级常用函数

count() 	sum() 	avg() 	min() 	max() 	
row_number() 	first_value() 	last_value()
lead() 	lag()	
ntile() 	
cume_dist()
percent_rank()

grouping sets()
with cube
with rollup

over

over(partition by ... order by ... asc|desc
   rows between ... and ...
)
eg:
 unbounded preceding
 unbounded following
 current row
 3 preceding
 1 following

Edge535

关注

1
点赞
踩
2

收藏

觉得还不错? 一键收藏
0
评论
Hive sql 常用语句，函数等备忘录，建议收藏

常用语句，函数等快速回查启动使用set设置建库删库建表查看建表语句改表名加载数据向已有表添加列修改列行的分隔符复杂数据类型建立内部表内外部表装换建立分区表查看分区上传数据添加删除分区桶表向桶表中添加数据开启桶操作取指定桶启动bin/hivebin/hiveserver2使用bin/hive -e 'hive sql'bin/beeline -u jdbc:hive2:localhost:10000 -n rootset设置set hive.cli.print.current.db=t
复制链接

扫一扫