实现背景:
消费Kafka数据写入HBase时,单条处理效率太低。需要批量插入hbase,这里自定义时间窗口countWindowAll 实现100条hbase插入一次Hbase
前面我就不写了 直接上核心代码吧
/*每10秒一个处理窗口*/
DataStream<List<Put>> putList = filterData.countWindowAll(Constants.windowCount).apply(new AllWindowFunction<String, List<Put>, GlobalWindow>() {
@Override
public void apply(GlobalWindow window, Iterable<String> message, Collector<List<Put>> out) throws Exception {
List<Put> putList=new ArrayList<Put>();
for (String value : message)
{
String rowKey=value.replace("::","_");
Put put = new Put(Bytes.toBytes(rowKey.toString()));
String[] column=value.split("::");
for (int i = 0; i < column.length; i++) {
put.addColumn(Bytes.toBytes(Constants.columnFamily),Bytes.toBytes(Constants.columnArray[i]),Bytes.toBytes(column[i]));
}
putList.add(put);
}
out.collect(putList);
}
}).setParallelism(4);
putList.addSink(new HBaseSinkFunction()).setParallelism(1);
写HBase自定义Sink为
HBaseSinkFunction extends RichSinkFunction<List<Put>>{
@Override
public void open(Configuration parameters) throws Exception {
super.open(parameters);
HbaseUtils.connectHbase();
TableName table=TableName.valueOf(Constants.tableNameStr);
Admin admin = HbaseUtils.connection.getAdmin();
if(!admin.tableExists(table)){
HTableDescriptor tableDescriptor = new HTableDescriptor(Constants.tableNameStr);
tableDescriptor.addFamily(new HColumnDescriptor(Constants.columnFamily));
admin.createTable(tableDescriptor);
}
}
@Override
public void invoke(List<Put> putList, Context context) throws Exception {
Table table=HbaseUtils.connection.getTable(TableName.valueOf(Constants.tableNameStr));
table.put(putList);
}
@Override
public void close() throws Exception {
super.close();
HbaseUtils.closeHBaseConnect();
}
}
看完qing