Pig中Java编写UDF函数
idea中新建Maven项目
- 添加依赖到pom.xml文件
<dependency>
<groupId>org.apache.pig</groupId>
<artifactId>pig</artifactId>
<version>0.17.0</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-core</artifactId>
<version>0.20.2</version>
</dependency>
- 新建Sample_eval.class文件
import org.apache.pig.EvalFunc;
import org.apache.pig.data.Tuple;
import java.io.IOException;
/**
* @author user
* @describe 用户自定义函数
* @create 2021-04-21 8:24
*/
public class Sample_Eval extends EvalFunc<String> {
@Override
public String exec(Tuple input) throws IOException {
if (input == null || input.size() == 0)
return null;
String str = (String)input.get(0);
return str.toUpperCase();
}
}
- 导出包
将jar包上传到虚拟机进行测试
-
注册
register '/home/hduser/SampleEval.jar';
-
使用define命令定义别名
define sample_eval org.qfnu.Sample_Eval();
-
数据文件
001,Robin,22,newyork 002,Naya,23,Kolkata 003,Maya,23,Tokyo 004,Sara,25,London 005,David,23,Bhuwaneshwar 006,Maggy,22,Chennai 007,Robert,22,newyork 008,Syam,23,Kolkata 009,Mary,25,Tokyo 010,Saran,25,London 011,Stacy,25,Bhuwaneshwar 012,Kelly,22,Chennai
-
加载数据
emp_data = LOAD '/home/hduser/pig/emp1.txt' USING PigStorage(',')as (id:int, name:chararray, age:int, city:chararray);
-
调用UDF函数
Upper_case = FOREACH emp_data GENERATE sample_eval(name);
-
输出结果
dump Upper_case;