Apache Phoenix自定义函数,文章包含代码、配置、以及自己遇到的一些坑分享三大部分。
环境:JDK1.8、CDH5.14.4。
开发语言:java
官网链接:http://phoenix.apache.org/udf.html
自定义函数功能描述:根据一个日期,返回所属季度
一. 代码
1. 创建maven J2SE工程,配置pom文件
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<hbase.version>1.2.0-cdh5.14.4</hbase.version><!-- hbase版本 -->
</properties>
<dependencies>
<dependency>
<groupId>joda-time</groupId>
<artifactId>joda-time</artifactId>
<version>1.6</version>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-client</artifactId>
<version>${hbase.version}</version>
<scope>provided</scope>
<exclusions>
<exclusion>
<groupId>commons-io</groupId>
<artifactId>*</artifactId>
</exclusion>
<exclusion>
<groupId>commons-lang</groupId>
<artifactId>*</artifactId>
</exclusion>
<exclusion>
<!--- 此处版本12.0.1 -->
<groupId>com.google.guava</groupId>
<artifactId>*</artifactId>
</exclusion>
<exclusion>
<groupId>commons-codec</groupId>
<artifactId>*</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.phoenix</groupId>
<artifactId>phoenix-core</artifactId>
<version>4.14.0-cdh5.14.2</version>
<scope>provided</scope>
<exclusions>
<exclusion>
<groupId>jline</groupId>
<artifactId>*</artifactId>
</exclusion>
<exclusion>
<!-- 此处原生版本为13.0.1 -->
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
</exclusion>
<exclusion>
<groupId>log4j</groupId>
<artifactId>*</artifactId>
</exclusion>
<exclusion>
<groupId>commons-collections</groupId>
<artifactId>*</artifactId>
</exclusion>
<exclusion>
<groupId>commons-codec</groupId>
<artifactId>*</artifactId>
</exclusion>
<exclusion>
<groupId>commons-io</groupId>
<artifactId>*</artifactId>
</exclusion>
<exclusion>
<groupId>commons-lang</groupId>
<artifactId>*</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.hbase</groupId>
<artifactId>*</artifactId>
</exclusion>
</exclusions>
</dependency>
</dependencies>
2. 创建DfQuarter类
package cn.cdp.dope.phoenix.pefl;
import org.apache.phoenix.expression.Expression;
import org.apache.phoenix.expression.LiteralExpression;
import org.apache.phoenix.expression.function.DateScalarFunction;
import org.apache.phoenix.expression.function.ScalarFunction;
import org.apache.phoenix.parse.CurrentDateParseNode;
import org.apache.phoenix.parse.FunctionParseNode;
import org.apache.phoenix.schema.tuple.Tuple;
import org.apache.phoenix.schema.types.PDataType;
import org.apache.phoenix.schema.types.PDate;
import org.apache.phoenix.schema.types.PTimestamp;
import org.apache.phoenix.schema.types.PVarchar;
import org.joda.time.DateTime;
import java.sql.SQLException;
import java.util.List;
/*
* @Description: 季节自定义函数,根据一个日期类型日期,返回一个字符串类型季节
* @Auther: gucp
* @Date: 2019/1/2 16:11
*
*/
@FunctionParseNode.BuiltInFunction(
name = "QUARTER",
args = {@FunctionParseNode.Argument(
allowedTypes = {PTimestamp.class}
)}
)
public class DfQuarter extends DateScalarFunction {
public static final String NAME = "QUARTER";
public DfQuarter() {
}
public DfQuarter(List<Expression> children) throws SQLException {
super(children);
}
public boolean evaluate(Tuple tuple, org.apache.hadoop.hbase.io.ImmutableBytesWritable immutableBytesWritable) {
Expression expression = this.getChildren().get(0);
if (!expression.evaluate(tuple, immutableBytesWritable)) {
return false;
}
if (immutableBytesWritable.getLength() == 0) {
return true;
}
long dateTime = this.inputCodec.decodeLong(immutableBytesWritable, expression.getSortOrder());
DateTime dt = new DateTime(dateTime);
int month = dt.getMonthOfYear();
PDataType returnType = this.getDataType();
byte[] byteValue = (dt.getYear() + "第" + getQuarterByMonth(month) + "季度").getBytes();
immutableBytesWritable.set(byteValue);
return true;
}
private String getQuarterByMonth(int month) {
String quarter = "";
if (month <= 3) {
quarter = "1";
} else if (month <= 6) {
quarter = "2";
} else if (month <= 9) {
quarter = "3";
} else {
quarter = "4";
}
return quarter;
}
public String getName() {
return "QUARTER";
}
public PDataType getDataType() {
return PVarchar.INSTANCE;
}
}
3. 打jar包
注意:打jar时,需要把joda-time1.6.jar 包含进去
二. CDH配置
1. hbase-site.xml服务端和客户端都需要配置
<property>
<name>phoenix.functions.allowUserDefinedFunctions</name>
<value>true</value>
</property>
<property>
<name>fs.hdfs.impl</name>
<value>org.apache.hadoop.hdfs.DistributedFileSystem</value>
</property>
<property>
<name>hbase.rootdir</name>
<value>hdfs://填写自己的/hbase</value>
</property>
<!-- 自定义函数,存储jar的hdfs目录 -- >
<property>
<name>hbase.dynamic.jars.dir</name>
<value>hdfs://填写自己的/hbase/lib</value>
</property>
注意事项:
后面两个配置,需要和服务端的配置一样。不然会提示找不到函数。
2. 登陆phoenix客户端创建函数
CREATE FUNCTION QUARTER(TIMESTAMP) returns varchar as 'cn.cdp.dope.phoenix.pefl.DfQuarter' using jar 'hdfs://填写自己的/hbase/lib/phoenix-udf-1.4-SNAPSHOT.jar'
3. 验证
三. 错误摘要
1、jar 路径不写,则执行读取配置hbase.dynamic.jars.dir
2、验证函数是否可行,需要添加from
不行:select QUARTER(to_date(‘2019-01-02’))
行:select QUARTER(to_date(‘2019-01-02’)) from table
3、如果一直报函数未定义,有以下几种可能
- 退出phoenix客户端,重新登陆试试
- 如果是程序中,如java/scala/… JDBC中使用到了该函数,需要在hbase-site.xml中添加参数
<property>
<name>hbase.local.dir</name>
<value>/tmp/hbase-hbase/local/</value>
<description>hbase本地文件系统路径</description>
</property>
修改JDBC配置
Properties props = new Properties()
props.setProperty("phoenix.functions.allowUserDefinedFunctions", "true")
DriverManager.getConnection("jdbc:phoenix:10.20.127.68,10.20.127.74:2181",props)
- 在oozie任务中,提示找不到
把该jar包,上传至oozie共享库或者私有库中