1. UDF
1.1 概述
UDF(User-Defined-Function) 一进一出
编程步骤:
(1)继承org.apache.hadoop.hive.ql.UDF
(2)需要实现evaluate函数;evaluate函数支持重载;
注意事项:
(1)UDF必须要有返回类型,可以返回null,但是返回类型不能为void;
(2)UDF中常用Text/LongWritable等类型,不推荐使用java类型;
1.2 编程实现
实现转大写
package HiveStudy;
import org.apache.hadoop.hive.ql.exec.UDF;
public class HiveUdf01 extends UDF {
public String evaluate (String input){
return input.toUpperCase();
}
public static void main(String[] args) {
String input = "aBc";
System.out.println(new HiveUdf01().evaluate(input));
}
}
2. UDAF
2.1 概述
UDAF(User-Defined Aggregation Function) 聚集函数,多进一出
编程步骤:
(1)继承org.apache.hadoop.hive.ql.UDAF;
(2)实现UDAFEvaluator接口;
(3)实现init()、iterate()、terminatePartial()、merge()、terminate() 共5个方法;
2.2 编程实现
实现类似sum函数的功能
package HiveStudy;
import MapreduceStudy.MyReducer;
import org.apache.hadoop.hive.ql.exec.UDAF;
import org.apache.hadoop.hive.ql.exec.UDAFEvaluator;
import org.apache.log4j.Logger;
public class MySumUDAF extends UDAF {
public static Logger logger = Logger.getLogger(MyReducer.class);
public static class Evaluator implements UDAFEvaluator {
public int total = 0;
//map和reduce阶段的初始化
@Override
public void init() {
total = 0;
logger.info("init method total = " + total);
}
//map阶段sort完成后按组处理
public boolean iterate (int column){
String str = String.valueOf(column);
if(str != null && str.trim().length() != 0) {
total += column;
}
logger.info("iterate total = " + total);
return true;
}
//对map端组内处理结果做进一步处理,相当于combiner
public int terminatePartial() {
logger.info("terminatePartial total = " + total);
return total;
}
//对terminatePartial的结果进行全局处理,相当于reduce端的merge
public boolean merge(int mapOutPut) {
total += mapOutPut;
logger.info("merge total = " + total);
return true;
}
//对merge结果做处理
public int terminate() {
logger.info("terminate total = " + total);
return total;
}
}
}
POM文件
<repositories>
<repository>
<id>cloudera</id>
<url>https://repository.cloudera.com/artifactory/cloudera-repos/</url>
</repository>
<repository>
<id>spring</id>
<url>https://maven.aliyun.com/repository/spring</url>
</repository>
</repositories>
<dependencies>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>2.6.0-mr1-cdh5.14.2</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>2.6.0-cdh5.14.2</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>2.6.0-cdh5.14.2</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-client-core</artifactId>
<version>2.6.0-cdh5.14.2</version>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-exec</artifactId>
<version>1.1.0-cdh5.14.2</version>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-jdbc</artifactId>
<version>1.1.0-cdh5.14.2</version>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-cli</artifactId>
<version>1.1.0-cdh5.14.2</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.11</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.testng</groupId>
<artifactId>testng</artifactId>
<version>RELEASE</version>
<scope>test</scope>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.0</version>
<configuration>
<source>1.8</source>
<target>1.8</target>
<encoding>UTF-8</encoding>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
<version>2.4.3</version>
<configuration>
<filters>
<filter>
<artifact>*:*</artifact>
<!--重点:打包排除 *.SF *.DSA *.RSA 文件-->
<excludes>
<exclude>META-INF/*.SF</exclude>
<exclude>META-INF/*.DSA</exclude>
<exclude>META-INF/*.RSA</exclude>
</excludes>
</filter>
</filters>
</configuration>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>shade</goal>
</goals>
<configuration>
<minimizeJar>true</minimizeJar>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
–The End–