Hive03 函数+UDTF

Hive03

1. nvl

select comm, nvl(comm, 0 ) from emp;
如果comm 为null, 用0来替代

2. avg

求平均

3. case when

select 
dept,
sum(case sex when 'M' then 1 else 0 end) m_cnt,
sum(case sex when 'F' then 1 else 0 end) f_cnt
from people
group by dept;

4. if

select 
dept,
sum(if(sex='M',1,0)) m_cnt,
sum(if(sex='F',1,0)) f_cnt
from people
group by dept;

5. concat_ws concat

select 
concat(dept,",",sex), concat_ws("|",collect_list(name))
from people
group by dept,sex;

5. collect_list collect_set

collect_list 不去重
collect_set 去重

6. 行列转换

select 
concat(dept,",",sex), concat_ws("|",collect_list(name))
from people
group by dept,sex;
RD,PK,M
RD,RUOZE,M
RD,WANQIAN,F
QA,XINGXING,M
QA,LISIDANNI,F
QA,JINSHA,F
===>
RD,M PK|RUOZE
RD,F WANQIAN

7. 行列转换

page       ads
page1   [1,2,3]
page2   [3,4,5]

page1 1
page1 2
page1 3
page2 3
page2 4
page2 5
 select page, ad_id
 from page_ads lateral view explode(ads) tmp as ad_id;

select ad_id, count(1)
from page_ads lateral view explode(ads) tmp as ad_id group by ad_id;

8. 窗口函数之 cume_dist

select 
dept,user,sal,
round(over(order by sal),2) rn1,
round(over(partition by dept order by sal),2) rn2
from ruozedata_window02;

小于等于当前行值的行数 /  分组内的总行数
dept    user    		sal     rn1     rn2
dept01  ruoze   		10000   0.2     0.3333333333333333   1/3
dept01  jepson  		20000   0.4     0.6666666666666666   2/3
dept01  xingxing        30000   0.6     1.0                  3/3
dept02  zhangsan        40000   0.8     0.5
dept02  lisi    		50000   1.0     1.0

select 
dept,user,sal,
round(PERCENT_RANK() over(order by sal),2) rn1,
round(PERCENT_RANK() over(partition by dept order by sal),2) rn2
from ruozedata_window02;

分组内当前行的rank-1/分组内总行数-1
dept    user    		sal     rn1     		rn2
dept01  ruoze   		10000   0.0  1-1/5-1   	0.0 1-1/3-1
dept01  jepson  		20000   0.25 2-1/5-1  	0.5 2-1/3-1
dept01  xingxing        30000   0.5     		1.0
dept02  zhangsan        40000   0.75    		0.0
dept02  lisi    		50000   1.0     		1.0

9. lag 、lead 、FIRST_VALUE、 LAST_VALUE

向前
select
cookieid,
time,
lag(time, 1,  '1970-01-01 00:00:00') over(partition by cookieid order by time) pre1,
lag(time, 2) over(partition by cookieid order by time) pre1
from window03;

向后
select cookieid,time,url,
lead(time, 1, '1970-01-01 00:00:00') over(partition by cookieid order by time) pre1,
lead(time, 2) over(partition by cookieid order by time) pre2
from window03;
select cookieid,time,url,
FIRST_VALUE(url) over(partition by cookieid order by time) rn
from window03;

select cookieid,time,url,
LAST_VALUE(url) over(partition by cookieid order by time) rn
from window03;

10. UDTF

package com.hpznyf.hive.UDTF;

import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;

import java.util.ArrayList;
import java.util.List;

/**
 add jar /home/hadoop/lib/hpznyf-hive-1.0.jar;
 create temporary function hpznyf_split as "com.hpznyf.hive.UDTF.SplitUDTF"
 */
public class SplitUDTF extends GenericUDTF {
    @Override
    public StructObjectInspector initialize(StructObjectInspector argOIs) throws UDFArgumentException {

        List<String> structFieldNames = new ArrayList<>();
        List<ObjectInspector> structFieldObjectInspectors = new ArrayList<>();

        structFieldNames.add("word");
        structFieldObjectInspectors.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
        return ObjectInspectorFactory.getStandardStructObjectInspector(structFieldNames, structFieldObjectInspectors);
    }

    List<String> list = new ArrayList<>();
    @Override
    public void process(Object[] args) throws HiveException {
        String line = args[0].toString();
        String split = args[1].toString();
        String[] fields = line.split(split);

        for(String field : fields){
            list.clear();
            list.add(field);
            forward(list);
        }
    }

    @Override
    public void close() throws HiveException {

    }
}

11. 执行流程

select * from emp;
TableScanOperator
ReduceSinkOperator => Map -> Reduce
JoinOperator
MapJoinOperator
SelectOperator
FileSinkOperator
FileterOperator
GroupByOperator

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值