pom.xml
<properties>
<project.build.sourceEncoding>UTF8</project.build.sourceEncoding>
<hive.version>1.2.1</hive.version>
</properties>
<repositories>
<repository>
<id>spring-plugin</id>
<url>https://repo.spring.io/plugins-release/</url>
</repository>
</repositories>
<dependencies>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-exec</artifactId>
<version>${hive.version}</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.13</version>
<scope>test</scope>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
<version>2.3.2</version>
<configuration>
<source>1.8</source>
<target>1.8</target>
</configuration>
</plugin>
<plugin>
<artifactId>maven-assembly-plugin</artifactId>
<configuration>
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>
</configuration>
<executions>
<execution>
<id>make-assembly</id>
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
UDF
package com.chen.hive;
import org.apache.hadoop.hive.ql.exec.UDF;
import org.json.JSONException;
import org.json.JSONObject;
public class MyUDF extends UDF {
public String evaluate(String source,String param) throws JSONException {
if(!source.contains(param)&&!"ts".equals(param)){
return "";
}
String[] words = source.split("\\|");
JSONObject root = new JSONObject(words[1]);
if ("ts".equals(param)){
return words[0].trim();
}else if("ap".equals(param)){
return root.getString("ap");
}else if ("et".equals(param)){
return root.getString("et");
}else{
return root.getJSONObject("cm").getString(param);
}
}
}
UDTF
package com.chen.hive;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
public class MyUDTF extends GenericUDTF {
@Override
public StructObjectInspector initialize(ObjectInspector[] argOIs) throws UDFArgumentException {
List<String> fieldNames = new ArrayList<>();
fieldNames.add("event_name");
fieldNames.add("event_json");
List<ObjectInspector> fieldOIs = new ArrayList<>();
fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
return ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldOIs);
}
@Override
public void process(Object[] args) throws HiveException {
if (args[0] == null || args.length == 0) {
return;
}
try {
JSONArray jsonArray = new JSONArray(args[0].toString());
if (jsonArray == null || jsonArray.length() == 0) {
return;
}
for (int i = 0; i < jsonArray.length(); i++) {
try {
String[] result = new String[2];
JSONObject jsonObject = jsonArray.getJSONObject(i);
result[0] = jsonObject.getString("en");
result[1] = jsonObject.toString();
System.out.println(Arrays.asList(result));
forward(result);
} catch (Exception e) {
continue;
}
}
} catch (JSONException e) {
e.printStackTrace();
}
}
@Override
public void close() throws HiveException {
}
}
Test
package com.chen.hive;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.json.JSONException;
import org.junit.Test;
import java.util.ArrayList;
public class MyUDTFTest {
@Test
public void testUDF() throws JSONException, HiveException {
MyUDF myUDF = new MyUDF();
MyUDTF myUDTF = new MyUDTF();
System.out.println(myUDF.evaluate(str, "ts"));
Object [] args=new Object[2];
args[0]=myUDF.evaluate(str, "et");
myUDTF.process(args);
}
}
添加到hive中
- 打包
- hive安装目录下创建auxlib并上传到该目录中(hive中auxlib会自动加载jar包)
- 重新启动hive
- hive中使用命令 创建函数
create function base_analizer as 'com.chen.hive.udf.MyUDF'