需求:现有一张用户信息表,记录用户会员信息
现在需要统计每个月的有效会员数量
问题分析: 想要统计每个月份的有效会员数量就要获取表中所有存在的月份,有两种思路:
第一种可以直接一点:
直接根据表数据创建一个临时辅助表 (有点low但简单)
insert into MonthTable
select concat(year, '-', lpad(month, 2, '0')) as month_year
from (
select 2021 as year, 1 as month union all
select 2021, 2 union all
select 2021, 3 union all
select 2021, 4 union all
select 2021, 5 union all
select 2021, 6 union all
select 2021, 7 union all
select 2021, 8 union all
select 2021, 9 union all
select 2021, 10 union all
select 2021, 11 union all
select 2021, 12 union all
select 2022, 1 union all
select 2022, 2 union all
select 2022, 3 union all
select 2022, 4 union all
select 2022, 5 union all
select 2022, 6 union all
select 2022, 7 union all
select 2022, 8 union all
select 2022, 9 union all
select 2022, 10 union all
select 2022, 11 union all
select 2022, 12
) as year_month;
然后可以和原表进行表连接进行查询:
select
m.month_year,
count(c.consumerid) as active_members
from
MonthTable m
left join
t_consumer c
on
m.month_year between date_format(c.startdate, 'yyyy-MM') and date_format(c.enddate, 'yyyy-MM')
group by
m.month_year
order by
m.month_year;
第二种是采用自定义函数(注:这里只书写了关键操作
)
利用 Java 的 UDTF 类来自定义一个函数用于返回两个日期之间的月份:
public class VipCountUDTF extends GenericUDTF {
@Override
public StructObjectInspector initialize(StructObjectInspector argOIs) throws UDFArgumentException {
// 返回值的数据类型和名称
//1.定义输出数据的列名和类型
List<String> fieldNames = new ArrayList<String>();
List<ObjectInspector> fieldOIs = new ArrayList<ObjectInspector>();
//2.添加输出数据的列名和类型
fieldNames.add("mt");
fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
return ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldOIs);
}
@Override
public void process(Object[] objects) throws HiveException {
String beginDate = objects[0].toString();
String endDate = objects[1].toString();
// 调用之前的方法,返回一个日期列表
List<String> dateList = DateUtils.getDateList(beginDate, endDate);
for(String date : dateList)
{
forward(new String[]{date});
}
}
@Override
public void close() throws HiveException {
}
}
class DateUtils {
public static List<String> getDateList(String beginDate, String endDate) {
ArrayList<String> list = new ArrayList<>();
// 解析传递过来的日期的格式
SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyy/MM");
// 将Date类型转换为字符串类型,前面是月份,后面是年份
SimpleDateFormat sdf = new SimpleDateFormat("MMM-yyyy", Locale.ENGLISH);
try {
// 将传递过来的日期转换为Date类型
Date dateFrom = simpleDateFormat.parse(beginDate);
Date dateTo = simpleDateFormat.parse(endDate);
// 因为需要用到Calendar 中的 获取下一个月的日期的函数
Calendar calendar = Calendar.getInstance();
calendar.setTime(dateFrom);
// 判断结束日期是否大于开始日期
while (dateTo.after(calendar.getTime())) {
// 将一个日期转为 月份-年份
String yearMonth = sdf.format(calendar.getTime());
System.out.println(yearMonth);
list.add(yearMonth);
calendar.add(Calendar.MONTH,1);
}
} catch (ParseException e) {
throw new RuntimeException(e);
}
return list;
}
}
自定义 hive 函数之后把自定义函数上传到 hive 中就可以继续进行操作了:
select mt,count(*) c1
from(
select consumerid,mt from t_consumer lateral view k1(startdate,enddate) t1 as mt
)t2 group by mt