给大家分享一道hive sql的面试题:
题目
思路
先拿到每个用户会员开始和结束两个日期之间的所有月份,然后再根据月份分组,统计数量,再将对应的月份换成题目要求的格式即可
但是hive中没有提供拿到两个日期之间所有的月份的函数,所以需要自定义函数
答案
Java代码自定义函数
新建一个maven项目,导包:
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-exec</artifactId>
<version>3.1.2</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-common -->
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>3.3.1</version>
</dependency>
新建一个类,继承GenericUDF类:
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import java.time.LocalDate;
import java.time.format.DateTimeFormatter;
import java.util.ArrayList;
public class DateToMonth extends GenericUDF {
@Override
public ObjectInspector initialize(ObjectInspector[] objectInspectors) throws UDFArgumentException {
if (objectInspectors.length != 2) {
// 说明参数的数量不对
throw new UDFArgumentException("参数数量错误");
}
// 返回值类型检查
return PrimitiveObjectInspectorFactory.javaStringObjectInspector;
}
@Override
public Object evaluate(DeferredObject[] deferredObjects) throws HiveException {
String startDateStr = deferredObjects[0].get().toString();
String endDateStr = deferredObjects[1].get().toString();
DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd");
try {
LocalDate startDate = LocalDate.parse(startDateStr, formatter);
LocalDate endDate = LocalDate.parse(endDateStr, formatter);
ArrayList<String> Months = new ArrayList<>();
LocalDate tempDate = startDate;
while (!tempDate.isAfter(endDate)){
Months.add(formatter.format(tempDate).substring(0,7));
tempDate = tempDate.plusMonths(1);
}
StringBuilder sb = new StringBuilder();
for (String month : Months) {
sb.append(month).append(",");
}
StringBuilder stringBuilder = sb.deleteCharAt(sb.length() - 1);
return stringBuilder;
} catch (Exception e) {
return -1;
}
}
@Override
public String getDisplayString(String[] strings) {
return null;
}
}
sql代码
打包,将打好的jar包上传到hive的lib目录下,在hive中添加jar包和函数
add jar /opt/installs/hive/lib/MyFunction-1.0-SNAPSHOT.jar;
create temporary function MyDateToMonth as 'com.donghu.DateToMonth';
这个自定义的函数返回的是一个字符串,所以需要hive自带的split函数进行切割,变成一个数组,然后通过hive自带的explode炸裂函数将数组炸开,再将对应的月份换成题目要求的样子即可:
with t1 as(
select consumerid,split(MyDateToMonth(startdate,enddate),',' ) as list from t_consumer
)
select concat(
case split(months,'-')[1] when '01' then 'Jan'
when '01' then 'Jan'
when '02' then 'Feb'
when '03' then 'Mar'
when '04' then 'Apr'
when '05' then 'May'
when '06' then 'Jun'
when '07' then 'Jul'
when '08' then 'Aug'
when '09' then 'Sep'
when '10' then 'Oct'
when '11' then 'Nov'
when '12' then 'Dec'
end,
'-',
substr(months,3,2)
),
count(*) from t1 lateral view explode(list) mytable as months group by months;