public static Configuration initConf(Configuration conf, String date)
throws IOException {
Scan scan = new Scan();
scan.setCaching(300);
scan.setMaxVersions();
scan.addFamily(HTableConstant.IPJ_STATICS_INDEX_IMEI_FAMILY);
scan.addColumn(HTableConstant.IPJ_STATICS_INDEX_IMEI_FAMILY,
HTableConstant.IPJ_STATICS_INDEX_VERSION_QUALIFIER);
scan.addColumn(HTableConstant.IPJ_STATICS_INDEX_IMEI_FAMILY,
HTableConstant.IPJ_STATICS_INDEX_TIME_QUALIFIER);
scan.addColumn(HTableConstant.IPJ_STATICS_INDEX_IMEI_FAMILY,
HTableConstant.IPJ_STATICS_INDEX_DATE_QUALIFIER);
FilterList list = new FilterList();
HBaseManager.addTimeStampExcludeFilter(list,
HTableConstant.IPJ_STATICS_INDEX_IMEI_FAMILY,
HTableConstant.IPJ_STATICS_INDEX_DATE_QUALIFIER,
Bytes.toBytes(date));
scan.setFilter(list);
conf.set("date", date);
conf.set(TableInputFormat.INPUT_TABLE,
HTableConstant.ACCESS_INDEX_TABLE_NAME);
conf.set(TableInputFormat.SCAN, StatUtils.convertScanToString(scan));
return conf;
}
使用上面的方法进行过滤(addTimeStampExcludeFilter)的话,map中获取app_id
和imei的方法:
protected void map(ImmutableBytesWritable key, Result value, Context context)
throws IOException, InterruptedException {
String keyStr = Bytes.toString(key.get());
String appIdStr = keyStr.substring(2, 4);
byte[] app_id = Bytes.toBytes(appIdStr);
byte[] imei = keyStr.substring(4).getBytes();
// if version = 2
if (value.raw().length == 2) {
byte[] version = value.getValue(
HTableConstant.IPJ_STATICS_INDEX_IMEI_FAMILY,
HTableConstant.IPJ_STATICS_INDEX_VERSION_QUALIFIER);
byte[] time = value.getValue(
HTableConstant.IPJ_STATICS_INDEX_IMEI_FAMILY,
HTableConstant.IPJ_STATICS_INDEX_TIME_QUALIFIER);
if (version != null) {
outkey.set(app_id, 0, app_id.length, version,
imei, time);
context.write(outkey, ONE);
}
}
}
value.raw().length的值:
1.当job中过滤一行时,那么 value.raw().length = 1 表示新增UV
value.raw().length > 1 表示活跃UV
2.当job中过滤两行时,那么(上述代码情况)
value.raw().length = 2 表示新增UV
value.raw().length > 2 表示活跃UV