由于impala处理日期的函数如date_sub(),date_trunc(),last_day()等这些日期处理函数还需要进行日期格式化为yyyy-MM-dd使用,sql代码段过长,导致频繁嵌套过于复杂.所以自定义udf函数解决这些问题.以下为实现过程.
- 日期加减,月份加减
- 对应日期的周一或周末,月初或月末,年初或年末
1 创建maven工程
导入以下maven依赖
<dependencies>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-exec</artifactId>
<version>1.1.0</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.12</version>
</dependency>
</dependencies>
创建udf对应的java脚本所在的包com.impala.udf(自定义,后期别搞错就行)
2 编写java脚本
2.1 以下所有方法都extend UDF(继承hive包的udf父类),在com.impala.udf路径下创建TimeADDOfDate.java(以日为维度加减)
public String addDate(String time,int num){
SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMdd");
Date parse = null ;
try {
parse = sdf.parse(time);
} catch (ParseException e) {
e.printStackTrace();
}
Calendar instance = Calendar.getInstance();
instance.setTime(parse);
instance.add(Calendar.DATE,num);
Date instanceTime = instance.getTime();
String resultTime = sdf.format(instanceTime);
return resultTime;
}
public String evaluate(String time,int num){ return addDate(time,num); }
2.2 创建TimeADDOfMonth.java(以月为维度加减)
public static String addMonth(String time,int num){
SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMdd");
Date parse = null;
try {
parse = sdf.parse(time);
} catch (ParseException e) {
e.printStackTrace();
}
Calendar instance = Calendar.getInstance();
instance.setTime(parse);
instance.add(Calendar.MONTH,num);
Date instanceTime = instance.getTime();
String resultTime = sdf.format(instanceTime);
return resultTime;
}
public static String evaluate(String time,int num){ return addMonth(time, num); }
2.3 创建TimeFirstAndLastOfMonth.java(取月初月末)
public static String substrMonth(String time,String sign){
SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMdd");
Date parse = null;
try {
parse = sdf.parse(time);
} catch (ParseException e) {
e.printStackTrace();
}
Calendar instance = Calendar.getInstance();
instance.setTime(parse);
if (sign.compareToIgnoreCase("f")==0){
instance.add(Calendar.MONTH,0);
instance.set(Calendar.DAY_OF_MONTH,1);
Date instanceTime1 = instance.getTime();
String firstTimeOfMonth = sdf.format(instanceTime1);
return firstTimeOfMonth;
}
else if (sign.compareToIgnoreCase("l")==0){
instance.add(Calendar.MONTH,1);
instance.set(Calendar.DAY_OF_MONTH,0);
Date instanceTime2 = instance.getTime();
String lastTimeOfMonth = sdf.format(instanceTime2);
return lastTimeOfMonth;
}
else {
return "";
}
}
public static String evaluate(String time,String sign){
return substrMonth(time, sign);
}
2.4 创建TimeFirstAndLastOfWeek.java类(取周一或周末)
public static String substrWeek(String time,String sign){
SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMdd");
Date parse = null;
try {
parse = sdf.parse(time);
} catch (ParseException e) {
e.printStackTrace();
}
Calendar instance = Calendar.getInstance();
instance.setTime(parse);
int tmp = instance.get(Calendar.DAY_OF_WEEK);
String tmpTime = "";
if (sign.compareToIgnoreCase("f")==0){
if (tmp>2){
instance.add(Calendar.DATE,0-(tmp-2));
Date instanceTime1 = instance.getTime();
tmpTime = sdf.format(instanceTime1);
}else if (tmp==2){
tmpTime= time;
}else if (tmp<2){
instance.add(Calendar.DATE,-6);
Date instanceTime3 = instance.getTime();
tmpTime = sdf.format(instanceTime3);
}
}else if (sign.compareToIgnoreCase("l")==0){
if (tmp==1){
tmpTime = time;
}else {
instance.add(Calendar.DATE,(7-tmp)+1);
Date instanceTime5 = instance.getTime();
tmpTime = sdf.format(instanceTime5);
}
}else {
tmpTime = "";
}
return tmpTime;
}
public static String evaluate(String time,String sgin){
return substrWeek(time, sgin);
}
2.5 创建TimeFirstAndLastOfYear.java类(取年初年末)
public static String substrYear(String time,String sign){
SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMdd");
Date parse = null;
try {
parse = sdf.parse(time);
} catch (ParseException e) {
e.printStackTrace();
}
Calendar instance = Calendar.getInstance();
instance.setTime(parse);
if (sign.compareToIgnoreCase("f")==0){
instance.add(Calendar.YEAR,0);
instance.set(Calendar.DAY_OF_YEAR,1);
Date instanceTime1 = instance.getTime();
String firstYear = sdf.format(instanceTime1);
return firstYear;
}else if (sign.compareToIgnoreCase("l")==0){
instance.add(Calendar.YEAR,1);
instance.set(Calendar.DAY_OF_YEAR,0);
Date instanceTime2 = instance.getTime();
String lastYear = sdf.format(instanceTime2);
return lastYear;
}else{
return "";
}
}
public static String evaluate(String time,String sign){
return substrYear(time, sign);
}
3 导出jar包,上传至hdfs
3.1 点击idea右侧的Maven按钮,选择当前项目包,生命周期文件夹,点击"package",等待导出成功.然后再项目包的target文件夹下找到XXX-udf-1.0-SNAPSHOT.jar包
3.2 将jar包通过上传到hdfs的/user/impala/udfs/目录下
su impala
hdfs dfs -mkdir /user/impala/udfs/
hdfs dfs -put impala-udf-1.0-SNAPSHOT.jar /user/impala/udfs/
hdfs dfs -chmod 755 /user/impala/udfs/impala-udf-1.0-SNAPSHOT.jar
4 创建impala的function
impala-shell
use haubei; --需要建函数的库名
create function Time_ADD_Of_Date location '/user/impala/udfs/impala-udf-1.0-SNAPSHOT.jar' symbol='com.impala.udf.TimeADDOfDate';
create function Time_ADD_Of_Month location '/user/impala/udfs/impala-udf-1.0-SNAPSHOT.jar' symbol='com.impala.udf.TimeADDOfMonth';
create function Time_First_And_Last_Of_Month location '/user/impala/udfs/impala-udf-1.0-SNAPSHOT.jar' symbol='com.impala.udf.TimeFirstAndLastOfMonth';
create function Time_First_And_Last_Of_Week location '/user/impala/udfs/impala-udf-1.0-SNAPSHOT.jar' symbol='com.impala.udf.TimeFirstAndLastOfWeek';
create function Time_First_And_Last_Of_Year location '/user/impala/udfs/impala-udf-1.0-SNAPSHOT.jar' symbol='com.impala.udf.TimeFirstAndLastOfYear';
PS:方法名按自己习惯编写,symbol的值需注意,某个function一定要找到其对应的evaluate()方法所在类的路径下.
5 UDF的简单使用
select Time_ADD_Of_Date("20210323",-8); --当前日期减8天
select Time_ADD_Of_Month("20210323",4); --取3个月后对应的日期
select Time_First_And_Last_Of_Month("20210323","f"); --取月初日期
select Time_First_And_Last_Of_Week("20210323","l"); --取对应的周末
select Time_First_And_Last_Of_Year("20210323","l"); --取年末
6 代码下载路径
6.1 如果觉得写代码比较麻烦的话,可以在我的资源中下载jar包,源码和sql,地址如下:
至此自定义日期的udf函数初始化完成,博客中有遗漏,或者其他意见,可以评论或者私信告知.