0.依赖
<dependencies>
<!--添加hive依赖-->
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-exec</artifactId>
<version>3.1.2</version>
</dependency>
</dependencies>
1.UDF
功能:输入根据时间,返回星座类型。
关键点:1.extends UDF;2.在evaluate()方法中写逻辑;
如下代码:
import java.text.SimpleDateFormat;
import java.util.Calendar;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDF;
public class UDFDemo extends UDF {
private SimpleDateFormat df;
public UDFDemo(){
df= new SimpleDateFormat("yyyy-MM-dd");
}
public String evaluate(String date_str) throws Exception{
int month;
int day;
try{
java.util.Date bday = df.parse(date_str);
Calendar cal = Calendar.getInstance();
cal.setTime(bday);
month = cal.get(Calendar.MONTH)+1;
day = cal.get(Calendar.DAY_OF_MONTH);
}catch (Exception e) {
return null;
}
if(month==1){
if(day<20){
return "魔蝎座";
}else{
return "水瓶座";
}
}
if(month==2){
if(day<19){
return "水瓶座";
}else{
return "双鱼座";
}
}
if(month==3){
if(day<21){
return "双鱼座";
}else{
return "白羊座";
}
}
if(month==4){
if(day<20){
return "白羊座";
}else{
return "金牛座";
}
}
if(month==5){
if(day<21){
return "金牛座";
}else{
return "双子座";
}
}
if(month==6){
if(day<22){
return "双子座";
}else{
return "巨蟹座";
}
}
if(month==7){
if(day<23){
return "巨蟹座";
}else{
return "狮子座";
}
}
if(month==8){
if(day<23){
return "狮子座";
}else{
return "处女座";
}
}
if(month==9){
if(day<23){
return "处女座";
}else{
return "天秤座";
}
}
if(month==10){
if(day<24){
return "天秤座";
}else{
return "天蝎座";
}
}
if(month==11){
if(day<23){
return "天蝎座";
}else{
return "射手座";
}
}
if(month==12){
if(day<22){
return "射手座";
}else{
return "摩羯座";
}
}
return null;
}
/*
* 测试
*/
public static void main(String[] args) throws Exception{
UDFDemo demo =new UDFDemo();
String date_str = "2009-04-15";
System.out.println("result:"+demo.evaluate(date_str));
}
}
2.UDAF
功能:行转列,一列数据连接成一行。
关键点:1.extends AbstractGenericUDAFResolver;
如下代码:
package com.hive.udaf;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.apache.hadoop.hive.ql.udf.generic.AbstractGenericUDAFResolver;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.util.StringUtils;
/*
* .行转列
*/
@Description(name = "mycolconcat", value = "_FUNC_(x) - Returns the concat of a set of cols")
public class ConcatUDAF extends AbstractGenericUDAFResolver{
static final Log LOG = LogFactory.getLog(ConcatUDAF.class.getName());
@Override
public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters)
throws SemanticException {
if (parameters.length != 1) {
throw new UDFArgumentTypeException(parameters.length - 1,
"Exactly one argument is expected.");
}
if (parameters[0].getCategory() != ObjectInspector.Category.PRIMITIVE) {
throw new UDFArgumentTypeException(0,
"Only primitive type arguments are accepted but "
+ parameters[0].getTypeName() + " is passed.");
}
switch (((PrimitiveTypeInfo) parameters[0]).getPrimitiveCategory()) {
case BYTE:
case SHORT:
case INT:
case LONG:
case FLOAT:
case DOUBLE:
case STRING:
case TIMESTAMP:
return new ConcatUDAFEvaluator();
case BOOLEAN:
default:
throw new UDFArgumentTypeException(0,
"Only numeric or string type arguments are accepted but "
+ parameters[0].getTypeName() + " is passed.");
}
}
public static class ConcatUDAFEvaluator extends GenericUDAFEvaluator {
//Mode的各部分的输入都是String类型,输出也是,所以对应的OI实例也都一样
PrimitiveObjectInspector inputOI;
Text partialResult;
Text result;
@Override
public ObjectInspector init(Mode mode, ObjectInspector[] parameters)
throws HiveException {
assert (parameters.length == 1);
super.init(mode, parameters);
// init input
inputOI = (PrimitiveObjectInspector) parameters[0];
// init output
result = new Text("");
return PrimitiveObjectInspectorFactory.writableStringObjectInspector;
}
static class ConcatAgg implements AggregationBuffer {
StringBuilder line = new StringBuilder("");
};
@Override
public AggregationBuffer getNewAggregationBuffer() throws HiveException {
ConcatAgg result = new ConcatAgg();
reset(result);
return result;
}
@Override
public void reset(AggregationBuffer agg) throws HiveException {
ConcatAgg myagg = (ConcatAgg) agg;
myagg.line.delete(0, myagg.line.length());
}
boolean warned = false;
@Override
public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException {
Object p = parameters[0];
if (p != null) {
ConcatAgg myagg = (ConcatAgg) agg;
try {
String v = PrimitiveObjectInspectorUtils.getString(p, inputOI);
if (myagg.line.length() == 0)
myagg.line.append(v);
else
myagg.line.append("," + v);
} catch (RuntimeException e) {
if (!warned) {
warned = true;
LOG.warn(getClass().getSimpleName() + " "
+ StringUtils.stringifyException(e));
LOG.warn(getClass().getSimpleName()
+ " ignoring similar exceptions.");
}
}
}
}
@Override
public Object terminatePartial(AggregationBuffer agg) throws HiveException {
ConcatAgg myagg = (ConcatAgg) agg;
result.set(myagg.line.toString());
return result;
}
@Override
public void merge(AggregationBuffer agg, Object partial) throws HiveException {
if (partial != null) {
try {
ConcatAgg myagg = (ConcatAgg) agg;
String v = PrimitiveObjectInspectorUtils.getString(partial, inputOI);
if (myagg.line.length() == 0)
myagg.line.append(v);
else
myagg.line.append("," + v);
} catch (RuntimeException e) {
if (!warned) {
warned = true;
LOG.warn(getClass().getSimpleName() + " "
+ StringUtils.stringifyException(e));
LOG.warn(getClass().getSimpleName()
+ " ignoring similar exceptions.");
}
}
}
}
@Override
public Object terminate(AggregationBuffer agg) throws HiveException {
ConcatAgg myagg = (ConcatAgg) agg;
result.set(myagg.line.toString());
return result;
}
}
}
3.UDTF
功能:炸裂字符串:json数组。
关键点:1.extends AbstractGenericUDAFResolver;
如下代码:
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.json.JSONArray;
import java.util.ArrayList;
import java.util.List;
public class ExplodeJSONArray extends GenericUDTF {
@Override
public StructObjectInspector initialize(StructObjectInspector argOIs) throws UDFArgumentException {
// 1 参数合法性检查
if (argOIs.getAllStructFieldRefs().size() != 1){
throw new UDFArgumentException("ExplodeJSONArray 只需要一个参数");
}
// 2 第一个参数必须为string
if(!"string".equals(argOIs.getAllStructFieldRefs().get(0).getFieldObjectInspector().getTypeName())){
throw new UDFArgumentException("json_array_to_struct_array的第1个参数应为string类型");
}
// 3 定义返回值名称和类型
List<String> fieldNames = new ArrayList<String>();
List<ObjectInspector> fieldOIs = new ArrayList<ObjectInspector>();
fieldNames.add("items");
fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
return ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldOIs);
}
public void process(Object[] objects) throws HiveException {
// 1 获取传入的数据
String jsonArray = objects[0].toString();
// 2 将string转换为json数组
JSONArray actions = new JSONArray(jsonArray);
// 3 循环一次,取出数组中的一个json,并写出
for (int i = 0; i < actions.length(); i++) {
String[] result = new String[1];
result[0] = actions.getString(i);
forward(result);
}
}
public void close() throws HiveException {
}
}
4.创建函数
(1)打包
(2)将hivefunction-1.0-SNAPSHOT.jar上传到hadoop102的/opt/module,然后再将该jar包上传到HDFS的/user/hive/jars路径下
[bigdata@hadoop102 module]$ hadoop fs -mkdir -p /user/hive/jars
[bigdata@hadoop102 module]$ hadoop fs -put hivefunction-1.0-SNAPSHOT.jar /user/hive/jars
(3)创建永久函数与开发好的java class关联
hive (dw)>
create function explode_json_array as 'com.atguigu.hive.udtf.ExplodeJSONArray' using jar 'hdfs://hadoop102:8020/user/hive/jars/hivefunction-1.0-SNAPSHOT.jar';
(4)注意:
a)如果修改了自定义函数重新生成jar包怎么处理?只需要替换HDFS路径上的旧jar包,然后重启Hive客户端即可。
b)引用函数时候,需要加上库名;
c)创建临时函数时候,只需要将包放在本地即可,
如:--添加jar包
hive> add jar /usr/wh/addPrefix.jar;
--创建自定义临时函数
hive> create temporary function add_prefix as 'com.test.AddPrefix';