下面来说说hive的UDF函数使用:
以下是实现类似mysql的date_format的功能UDFjava代码:
package com.hadoopbook.hive;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Date;
import org.apache.hadoop.hive.ql.exec.UDF;
import org.apache.hadoop.io.Text;
public class DATE_FORMAT extends UDF {
private Text result = new Text();
public Text evaluate(Text str, String stripChars) {
if (str == null||stripChars ==null) {
return null;
}
try {
SimpleDateFormat sdf=new SimpleDateFormat(stripChars);
Date date = sdf.parse(str.toString());
String c=sdf.format(date);
result.set(c);
} catch (ParseException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return result;
}
}
然后就是把java代码export出来打包成jar文件。
把jar拷贝到hive所在的机器中:
在hive的客户端依次输入:
ADD JAR /home/test/udf.jar;
CREATE TEMPORARY FUNCTION DATE_FORMAT as 'com.hadoopbook.hive.DATE_FORMAT';
CREATE TEMPORARY FUNCTION SUBSTRING_INDEX as 'com.hadoopbook.hive.SUBSTRING_INDEX';
insert into table t_bz partition(dt)
select LogId,CreateTime,dt from t_bz_tmp where (dt = DATE_FORMAT(CreateTime,'yyyy-MM-dd'));