Hive提供了User Defined Functions的开发接口以给使用者扩展。上手比较简单,下面是开发使用一个MD5的例子。
JAVA代码:
package org.nalang.hive.udf; import java.io.UnsupportedEncodingException; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; import org.apache.hadoop.hive.ql.exec.UDF; import org.apache.hadoop.io.Text; public class Md5 extends UDF { public Text evaluate(Object... args) { if(args.length == 1){ return new Text(rawMd5(args[0],16)); }else if(args.length == 2 && args[1].equals("16")){ return new Text(rawMd5(args[0],16)); }else{ return new Text(rawMd5(args[0],32)); } } private String rawMd5(String str) { MessageDigest messageDigest = null; try { messageDigest = MessageDigest.getInstance("MD5"); messageDigest.reset(); messageDigest.update(str.getBytes("UTF-8")); } catch (NoSuchAlgorithmException e) { System.out.println("NoSuchAlgorithmException caught!"); System.exit(-1); } catch (UnsupportedEncodingException e) { e.printStackTrace(); } byte[] byteArray = messageDigest.digest(); StringBuffer md5StrBuff = new StringBuffer(); for (int i = 0; i < byteArray.length; i++) { if (Integer.toHexString(0xFF & byteArray[i]).length() == 1) md5StrBuff.append("0").append( Integer.toHexString(0xFF & byteArray[i])); else md5StrBuff.append(Integer.toHexString(0xFF & byteArray[i])); } return md5StrBuff.toString().toUpperCase(); } private String rawMd5(Object str,int length){ if(length == 16){ return rawMd5(StringUtils.StringFilter(str)).substring(8,24); }else{ return rawMd5(StringUtils.StringFilter(str)); } } }
Hive应用:
hive>add jar /home/work/local/hive/udf/udf.jar; create temporary function md5 as 'com.baifendian.hive.udf.Md5'; select md5('test') from taba limit 1;
参考:
https://cwiki.apache.org/confluence/display/Hive/LanguageManual+UDF