//hive的内置函数
向下取整
hive> select floor(salary) from employeeforhaving;
select log(salary) from employeeforhaving;
取负数
select negative(salary) from employeeforhaving;
UDF
package com.dt.spark.hive;
import org.apache.hadoop.hive.ql.exec.UDF;
import org.apache.hadoop.io.Text;
public final class HiveUDF extends UDF {
/**
* 在这个方法中实现任意符合业务处理需求的代码,而hive自带的函数没有实现的
* @param s
* @return
*/
public Text evaluate(final Text s) {
if (s == null) { return null; }
return new Text(s.toString().toLowerCase());
}
}
打包
lowerCase.jar
加到上下文:
hive> Add jar /bigdata/learn_data/lowerCase.jar;
Added [/bigdata/learn_data/lowerCase.jar] to class path
Added resources: [/bigdata/learn_data/lowerCase.jar]
//起别名
hive> CREATE TEMPORARY FUNCTION tolower AS ‘com.dt.spark.hive.HiveUDF';
//接下来直接使用:
hive> select tolower(address) from employeeforhaving;
UDAF
聚合操作就可以迭代多次
package com.dt.spark.hive;
import org.apache.hadoop.hive.ql.exec.UDAF;
import org.apache.hadoop.hive.ql.exec.UDAFEvaluator;
/**
* 多行字符串拼接为一行函数
* @author jiudu
*/
public class HiveUDAF extends UDAF {
public static class ConcatUDAFEvaluator implements UDAFEvaluator {
public static class PartialResult {
String result;
String delimiter;
}
private PartialResult partial;
/**
* 用于初始化工作,例如用于设置全局变量的初始状态
*/
public void init() {
partial = null;
}
/**
* 是数据迭代的处理的核心
* @param value
* @param deli
* @return
*/
public boolean iterate(String value, String deli) {
if (value == null) {
return true;
}
if (partial == null) {
partial = new PartialResult();
partial.result = new String("");
if (deli == null || deli.equals("")) {
partial.delimiter = new String(",");
} else {
partial.delimiter = new String(deli);
}
}
if (partial.result.length() > 0) {
partial.result = partial.result.concat(partial.delimiter);
}
partial.result = partial.result.concat(value);
return true;
}
/**
* 分布式计算有很多Mapper会把自己的计算结构传给Reducer,如果说Mapper中
* 可以继续计算就会调用terminatePartial来更新当前对象的状态
* @return
*/
public PartialResult terminatePartial() {
return partial;
}
/**
* 在Reducer端负责处理Mapper端传过来的参数,其传入的参数的类型是terminatePartial的返回的类型
* @param other
* @return
*/
public boolean merge(PartialResult other) {
if (other == null) {
return true;
}
if (partial == null) {
partial = new PartialResult();
partial.result = new String(other.result);
partial.delimiter = new String(other.delimiter);
} else {
if (partial.result.length() > 0) {
partial.result = partial.result.concat(partial.delimiter);
}
partial.result = partial.result.concat(other.result);
}
return true;
}
/**
* Hive 的UDAF最终返回的聚合结果
* @return
*/
public String terminate() {
return new String(partial.result);
}
}
}
打成pinjie.jar
hive> add jar /bigdata/learn_data/pinjie.jar;
hive> CREATE TEMPORARY FUNCTION pinjie AS 'com.dt.spark.hive.HiveUDAF';
hive> select pinjie(name,"111") from employeeforhaving;