目录
虽然Hive中内置了很多函数,但是Hive也给我们提供了自定义函数的接口,方便我们自定义函数进行调用,从而减少代码的编写量。
一、添加依赖
<dependencies>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-exec</artifactId>
<version>3.1.2</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>3.1.3</version>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-jdbc</artifactId>
<version>3.1.2</version>
<exclusions>
<exclusion>
<groupId>org.glassfish</groupId>
<artifactId>javax.el</artifactId>
</exclusion>
<exclusion>
<groupId>org.eclipse.jetty</groupId>
<artifactId>jetty</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.12</version>
<scope>test</scope>
</dependency>
</dependencies>
二、编写自定义UDF函数
如果在hive中新建一个函数,断联后新函数就会丢失,所以需要在java中编写,打包后放入lib中。
(一)自定义首字母大写函数
1.java代码
import java.util.Locale;
/**
* 将传入的字符串,首字母变成大写字母
*/
public class InitialString extends UDF {
public static void main(String[] args) {
InitialString ini = new InitialString();
String hello = ini.evaluate("hello");
System.out.println(hello);
}
public String evaluate(final String txt) {
return txt.trim().substring(0, 1).toUpperCase(Locale.ROOT) + txt.substring(1);
}
}
maven编译打包,放到hive的lib目录下
2.hive中运行
-- 加载jar包
hive (default)>add jar /opt/soft/hive312/lib/hivestu-1.0-SNAPSHOT.jar
-- 创建函数
hive (default)> create temporary function myudf as 'test.udf.InitialString';-- 类的全路径
-- 调用函数并输入参数
hive (default)> select myudf('hello');
Hello
hive (default)> select myudf('hadoop');
Hadoop
hive (default)> select myudf('java');
Java
(二)自定义字符串全部小写的函数
1.java代码
import org.apache.hadoop.hive.ql.exec.UDF;
public class LowerUDF extends UDF {
public static void main(String[] args) {
LowerUDF lowerUDF = new LowerUDF();
String evaluate = lowerUDF.evaluate("HELLO", "JAVA");
System.out.println(evaluate);
}
public String evaluate(final String txt, final String txt2) {
String res = txt + "," + txt2;
return res.toLowerCase();
}
}
2.hive运行
hive (default)> create function mylower as 'test.udf.LowerUDF';
hive (default)> select mylower('HELLO','JAVA');
hello,java
(三)创建解析JSON字符串的函数
1.java代码
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.hive.ql.exec.UDF;
import org.json.JSONObject;
/**
* "15850500365|{"name":"zhangsan","age":50,"address":"上海"}"
*/
public class ThreeUDF extends UDF {
public static void main(String[] args) {
ThreeUDF threeUDF = new ThreeUDF();
String tel = threeUDF.evaluate("15850500365|{\"name\":\"zhangsan\",\"age\":50,\"address\":\"上海\"}", "address");
System.out.println(tel);
}
public String evaluate(String line, String key) {
String[] infos = line.split("\\|");
if (infos.length != 2 || StringUtils.isBlank(infos[1])) {
return "";
}
if (key.equals("tel")) {
return infos[0];
} else {
JSONObject object = new JSONObject(infos[1].trim());
if (key.equals("name") && object.has("name"))
return object.getString("name");
else if (key.equals("age") && object.has("age"))
return object.getString("age");
else if (key.equals("address") && object.has("address"))
return object.getString("address");
}
return "nothave";
}
}
三、自定义编写UDTF函数
1.java编写
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import java.util.ArrayList;
import java.util.List;
/**
* UDTF解决输入一行,输出多行的需求
* 输入"hello,world,zhangsan,shanghai",","
* 输出
* world
* hello
* world
* zhangsan
* shanghai
*/
public class MyUDTF extends GenericUDTF {
private List<String> wordList = new ArrayList<String>();
@Override
public StructObjectInspector initialize(StructObjectInspector argOIs)
throws UDFArgumentException {
/**
* 输出数据类型说明:
*/
List<String> fieldNames = new ArrayList<String>();
fieldNames.add("word");
List<ObjectInspector> fieldOIs = new ArrayList<>();
fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
return ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldOIs);
}
@Override
public void process(Object[] args) throws HiveException {
String data = args[0].toString();
String splitkey = args[1].toString();
String[] words = data.split(splitkey);
for (String word :
words) {
wordList.clear();
wordList.add(word);
forward(wordList);
}
}
@Override
public void close() throws HiveException {
}
}
2.hive运行
hive (default)> create function myudtf as 'test.udtf.MyUDTF';
hive (default)> select myudtf('aa,bb,cc,dd',',');
word
aa
bb
cc
dd
hive (default)> select myudtf('aa$bb$cc$dd','$');
word
aa$bb$cc$dd
hive (default)> select myudtf('aa$bb$cc$dd','\\$');
word
aa
bb
cc
dd