-
创建mavena项目,引入依赖
<dependency> <groupId>org.apache.hive</groupId> <artifactId>hive-exec</artifactId> <version>1.2.1</version> <exclusions> <exclusion> <groupId>org.pentaho</groupId> <artifactId>pentaho-aggdesigner-algorithm</artifactId> </exclusion> </exclusions> </dependency>
-
引入编译和打包插件,可以打出无依赖的jar包(hive已有)
<build> <plugins> <plugin> <artifactId>maven-compiler-plugin</artifactId> <version>2.3.2</version> <configuration> <source>1.8</source> <target>1.8</target> </configuration> </plugin> <plugin> <artifactId>maven-assembly-plugin</artifactId> <configuration> <descriptorRefs> <descriptorRef>jar-with-dependencies</descriptorRef> </descriptorRefs> </configuration> <executions> <execution> <id>make-assembly</id> <phase>package</phase> <goals> <goal>single</goal> </goals> </execution> </executions> </plugin> </plugins> </build>
-
创建BaseFieldUDF类继承org.apache.hadoop.hive.ql.exec.UDF,并实现evaluate方法。
public class BaseFieldUDF extends UDF { public String evaluate(String line, String jsonkeyString) { StringBuilder sb = new StringBuilder(); String[] jsonkeys = jsonkeyString.split(","); String[] logContents = line.split("\\|"); if (logContents.length != 2 || StringUtils.isBlank(logContents[1])) { return ""; } try { JSONObject jsonObject = new JSONObject(logContents[1]); JSONObject base = jsonObject.getJSONObject("cm"); for (int i = 0; i < jsonkeys.length; i++) { String field = jsonkeys[i].trim(); if (base.has(field)) { sb.append(base.get(field)).append("\t"); } else { sb.append("\t"); } } sb.append(jsonObject.getString("et")).append("\t"); sb.append(logContents[0]).append("\t"); } catch (JSONException e) { e.printStackTrace(); } return sb.toString(); } }
-
创建EventJsonUDTF继承org.apache.hadoop.hive.ql.udf.generic.GenericUDTF,
public class EventJsonUDTF extends GenericUDTF { /** * 指定输出参数的参数名和类型 * * @param argOIs * @return * @throws UDFArgumentException */ @Override public StructObjectInspector initialize(ObjectInspector[] argOIs) throws UDFArgumentException { List<String> fieldNames = new ArrayList<>(); List<ObjectInspector> fieldOIs = new ArrayList<>(); fieldNames.add("event_name"); fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector); fieldNames.add("event_json"); fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector); return ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldOIs); } /** * 输入一条数据,返回多条结果 * * @param args * @throws HiveException */ @Override public void process(Object[] args) throws HiveException { // 获取传入的event_list String input = args[0].toString(); if (StringUtils.isBlank(input)) { return; } else { try { JSONArray events = new JSONArray(input); if (events == null) { return; } for (int i = 0; i < events.length(); i++) { // 创建数组保存event_name, event_json String[] result = new String[2]; try { // 取出事件名称 result[0] = events.getJSONObject(i).getString("en"); // 取出事件整体 result[1] = events.getString(i); } catch (JSONException e) { continue; } // 将结果返回,类似于mapreduce将结果写入到context forward(result); } } catch (JSONException e) { e.printStackTrace(); } } } @Override public void close() throws HiveException { } }
-
打包上传到服务器,打开hive,添加jar,创建零时函数
add jar hiveudf-1.0-SNAPSHOT.jar
create temporary function base_analyse as 'com.spark.udf.BaseFieldUDF'
create temporary function json_analyse as 'com.spark.udtf.EventJsonUDTF'
hive自定义udf、udtf
最新推荐文章于 2024-06-27 17:03:37 发布