测试数据为:
“1541217850325|{“cm”:{“mid”:“m7856”,“uid”:“u8739”,“ln”:”-74.8",“sv”:“V2.2.2”,“os”:“8.1.3”,“g”:“P8XC9326@163.com”,“nw”:“5G”,“l”:“es”,“vc”:“6”,“hw”:“640*960”,“ar”:“MX”,“t”:“1541204134250”,“la”:"-31.7",“md”:“huaweiPro”,“vn”:“1.1.2”,“sr”:“O”,“ba”:“Huawei”},“ap”:“weather”,“et”:[{“ett”:“1541146624055”,“en”:“display”,“kv”:{“goodsid”:“n4195”,“copyright”:“ESPN”,“content_provider”:“CNN”,“extend2”:“5”,“action”:“2”,“extend1”:“2”,“place”:“3”,“showtype”:“2”,“category”:“72”,“newstype”:“5”}},{“ett”:“1541213331817”,“en”:“loading”,“kv”:{“extend2”:"",“loading_time”:“15”,“action”:“3”,“extend1”:"",“type1”:"",“type”:“3”,“loading_way”:“1”}},{“ett”:“1541126195645”,“en”:“ad”,“kv”:{“entry”:“3”,“show_style”:“0”,“action”:“2”,“detail”:“325”,“source”:“4”,“behavior”:“2”,“content”:“1”,“newstype”:“5”}},{“ett”:“1541202678812”,“en”:“notification”,“kv”:{“ap_time”:“1541184614380”,“action”:“3”,“type”:“4”,“content”:""}},{“ett”:“1541194686688”,“en”:“active_background”,“kv”:{“active_source”:“3”}}]}";
需要在pom.xml导入依赖
<properties>
<project.build.sourceEncoding>UTF8</project.build.sourceEncoding>
<hive.version>1.2.1</hive.version>
</properties>
<dependencies>
<!--添加hive依赖-->
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-exec</artifactId>
<version>${hive.version}</version>
</dependency>
</dependencies>
build
<build>
<plugins>
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
<version>2.3.2</version>
<configuration>
<source>1.8</source>
<target>1.8</target>
</configuration>
</plugin>
<plugin>
<artifactId>maven-assembly-plugin</artifactId>
<configuration>
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>
</configuration>
<executions>
<execution>
<id>make-assembly</id>
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
创建UDF项目:
编写一个类 继承UDF 类
在udf类中需要编写一个evaluate()方法来进行自己的逻辑分析:
import org.apache.hadoop.hive.ql.exec.UDF;
import org.apache.commons.lang.StringUtils;
import org.json.JSONException;
import org.json.JSONObject;
public class FieldUDF extends UDF {
public String evaluate(String line,String keystring){
StringBuilder sb=new StringBuilder();
String[] keys=keystring.split(",");
//.和| 都需要进行转义,不然无法进行分割的
String[] contents=line.split("\\|");
if(contents.length != 2 || StringUtils.isBlank(contents[1])){
return "";
}
try {
JSONObject jsonObject=new JSONObject(contents[1]);
JSONObject comment=jsonObject.getJSONObject("cm");
for(int i=0;i<keys.length;i++){
String fieldName =keys[i].trim();
if(base.has(fieldName)){
sb.append(comment.getString(fieldName)).append("\t");
}else {
sb.append("\t");
}
}
sb.append(jsonObject.getString("et")).append("\t");
sb.append(contents[0]).append("\t");
} catch (JSONException e) {
e.printStackTrace();
}
return sb.toString();
}
}
测试运行结果:
m7856 u8739 6 1.1.2 es O 8.1.3 MX huaweiPro Huawei V2.2.2 P8XC9326@163.com 640*960 5G -74.8 -31.7 1541204134250 [{“ett”:“1541146624055”,“en”:“display”,“kv”:{“copyright”:“ESPN”,“content_provider”:“CNN”,“extend2”:“5”,“goodsid”:“n4195”,“action”:“2”,“extend1”:“2”,“place”:“3”,“showtype”:“2”,“category”:“72”,“newstype”:“5”}},{“ett”:“1541213331817”,“en”:“loading”,“kv”:{“extend2”:"",“loading_time”:“15”,“action”:“3”,“extend1”:"",“type1”:"",“type”:“3”,“loading_way”:“1”}},{“ett”:“1541126195645”,“en”:“ad”,“kv”:{“entry”:“3”,“show_style”:“0”,“action”:“2”,“detail”:“325”,“source”:“4”,“behavior”:“2”,“content”:“1”,“newstype”:“5”}},{“ett”:“1541202678812”,“en”:“notification”,“kv”:{“ap_time”:“1541184614380”,“action”:“3”,“type”:“4”,“content”:""}},{“ett”:“1541194686688”,“en”:“active_background”,“kv”:{“active_source”:“3”}}] 1541217850325