1.官方文档
https://cwiki.apache.org/confluence/display/Hive/HivePlugins
UDF:一进一出
UDAF:多进一出 (aggregation 聚集 类似于count/max/min)
UDTF:一进多出
2.查看自带的函数
show functions;
查看一个函数的使用
desc function extended upper ;
3.自定义函数
1.修改pom.xml
<dependencies>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.10</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>2.5.0</version>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-jdbc</artifactId>
<version>0.13.1</version>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-exec</artifactId>
<version>0.13.1</version>
</dependency>
</dependencies>
自定义一个函数,
package com.hive.udf;
import org.apache.hadoop.hive.ql.exec.UDF;
import org.apache.hadoop.io.Text;
/**
* implement one or more methods named "evaluate"
* "evaluate" should nerver be a void method
* however it can return "null" if needed.
* @author wensimiao
*hive是基于MapReduce计算的 所有数据类型推荐使用MapReduce中的类型
*/
public class LowerUDF extends UDF{
public Text evaluate(Text str) {
//validate
if(null==str.toString()) {
return null;
}
//lower
return new Text(str.toString().toLowerCase()) ;
}
public static void main(String[] args) {
System.out.println(new LowerUDF().evaluate(new Text("WSM")));
}
}
本地测试成功
将自定义的类打包成一个jar,上传到Linux
命令行使用
add jar /opt/datas/hive_UDF.jar ;
create temporary function my_lower as "com.hive.udf.LowerUDF" ;
select ename,my_lower(ename) lowername from emp limit 5 ;
执行结果
4.
CREATE FUNCTION myfunc AS 'myclass' USING JAR 'hdfs:///path/to/jar';