hive函数

一、常用函数

--nvl空字段赋值
select comm, nvl(comm, -1) from emp;
--case when
--统计不同部门男女各有多少人
select
    dept_id,
    count(*) total,
    sum(case sex when '男' then 1 else 0 end) male,
    sum(case sex when '女' then 1 else 0 end) female
from
    emp_sex
group by
    dept_id;
--行转列
select
    concat(constellation,",",blood_type) xzxx,  ##concat拼接函数
    concat_ws("|", collect_list(name)) rentou   
from
    person_info
group by
    constellation,blood_type;
--列转行
select
    m.movie,
    tbl.cate
from
    movie_info m
lateral view
    explode(split(category, ",")) tbl as cate;

二、窗口函数

OVER():指定分析函数工作的数据窗口大小,这个数据窗口大小可能会随着行的变而变化。
CURRENT ROW:当前行
n PRECEDING:往前n行数据
n FOLLOWING:往后n行数据
UNBOUNDED:起点,UNBOUNDED PRECEDING 表示从前面的起点, UNBOUNDED FOLLOWING表示到后面的终点
LAG(col,n,default_val):往前第n行数据
LEAD(col,n, default_val):往后第n行数据
NTILE(n):把有序窗口的行分发到指定数据的组中,各个组有编号,编号从1开始,对于每一行,NTILE返回此行所属的组的编号。注意:n必须为int类型。
--聚合
select name,count(*) over () 
from business 
where substring(orderdate,1,7) = '2017-04' 
group by name;
--各种聚合
select name,orderdate,cost, 
sum(cost) over() as sample1,--所有行相加 
sum(cost) over(partition by name) as sample2,--按name分组,组内数据相加 
sum(cost) over(partition by name order by orderdate) as sample3,--按name分组,组内数据累加 
sum(cost) over(partition by name order by orderdate rows between UNBOUNDED PRECEDING and current row ) as sample4 ,--和sample3一样,由起点到当前行的聚合 
sum(cost) over(partition by name order by orderdate rows between 1 PRECEDING and current row) as sample5, --当前行和前面一行做聚合 
sum(cost) over(partition by name order by orderdate rows between 1 PRECEDING AND 1 FOLLOWING ) as sample6,--当前行和前边一行及后面一行 
sum(cost) over(partition by name order by orderdate rows between current row and UNBOUNDED FOLLOWING ) as sample7 --当前行及后面所有行 
from business;
--结合其他函数使用
select
    name, orderdate, cost, 
    lag(orderdate, 1) 
    over(partition by name order by orderdate) last_order,
    lead(orderdate, 1) 
    over(partition by name order by orderdate) next_order
from
    business;
--ntile
SELECT
	*
FROM
	(
		select name,
		orderdate,
		cost,
		ntile(5) over(
		order by orderdate) n
	from
		business) t1
WHERE
	n = 1;
--percent_rank
select
	name,
	orderdate,
	cost,
	PERCENT_RANK() over(
	order by orderdate) pr
from
	business;
RANK() 排序相同时会重复,总数不会变
DENSE_RANK() 排序相同时会重复,总数会减少
ROW_NUMBER() 会根据顺序计算
--rank
SELECT
	*,
	rank() OVER(partition by subject
order by
	score desc) r,
	DENSE_RANK() OVER(partition by subject
order by
	score desc) dr,
	ROW_NUMBER() OVER(partition by subject
order by
	score desc) rn
from
	score;

三、日期函数

--current_date 返回当前日期
select current_date();
--日期的加减
--今天开始90天以后的日期
select date_add(current_date(), 90);
--今天开始90天以前的日期
select date_sub(current_date(), 90);
--日期差
SELECT datediff(CURRENT_DATE(), "1990-06-04");
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值