Flink 1.12 读取kafka 写入hbase | 自定义HbaseSink | 含源码
maven依赖在之前的帖子有写,是共用的,可以做参考
[Flink1.12读取Kafka数据写入到Hdfs | 含maven依赖 | FileSink]( Flink1.12读取Kafka数据写入到Hdfs | 含maven依赖 | FileSink_MIDSUMMER_yy的博客-CSDN博客 )
首先是主类的代码
public class KafkaToHbase {
public static void main(String[] args) throws Exception{
//读取配置文件
String config_path=args[0];
ParameterTool parameterTool = ParameterTool.fromPropertiesFile(new FileInputStream(new File(config_path)))
.mergeWith(ParameterTool.fromSystemProperties())
.mergeWith(ParameterTool.fromMap(getenv()));
//创建环境
StreamExecutionEnvironment env = FlinkEnvUtils.creatEnv();
//配置文件放入流式环境
env.getConfig().setGlobalJobParameters(parameterTool);
//配置kafka
Properties props = new Properties();
props.setProperty("bootstrap.servers", parameterTool.get("kafka_ips"));
props.setProperty("group.id", parameterTool.get("kakfa.group.name"));
FlinkKafkaConsumer<String> consumer = new FlinkKafkaConsumer<>(parameterTool.get("kakfa.topic.name"), new SimpleStringSchema(), props);
consumer.setCommitOffsetsOnCheckpoints(true);
consumer.setStartFromGroupOffsets();
DataStream<String> stream = env.addSource(consumer);
stream.addSink(new HbaseSink()).name("kafkaTohbase_test");
env.execute();
}
private static Map<String, String> getenv()
{
Map<String, String> map = new HashMap();
for (Map.Entry<String, String> entry : System.getenv().entrySet()) {
map.put(entry.getKey(), entry.getValue());
}
return map;
}
}
跟之前一样,传参只有一个配置文件
HbaseSink的代码如下
import org.apache.commons.lang.StringUtils;
import org.apache.flink.api.java.utils.ParameterTool;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.functions.sink.RichSinkFunction;
import java.io.IOException;
import java.io.Serializable;
public class HbaseSink<IN>
extends RichSinkFunction<String>
implements Serializable {
private static final long serialVersionUID = 1L;
private HBaseUtil hbaseUtil;
private static int keyIDPos;
private static int optypePos;
private static String tableName;
private static String columnFamily;
private static String column;
@Override
public void open(Configuration parameters) throws Exception {
super.open(parameters);
ParameterTool params = (ParameterTool)getRuntimeContext().getExecutionConfig().getGlobalJobParameters();
tableName = params.get("hbase.table.name");
columnFamily = params.get("hbase.table.column.family");
column = params.get("hbase.table.column");
keyIDPos = params.getInt("record.keyID.field.Pos") ;
optypePos = params.getInt("record.op.type.field.Pos");
this.hbaseUtil = new HBaseUtil(params, tableName);
}
public void invoke(String s)
throws IOException
{
if ((s.contains("\n")) || (s.contains("\r")) || (s.contains("\r\n"))) {
s = s.trim();
}
String[] str = s.split(String.valueOf('\007'));
String keyID = str[(keyIDPos - 1)];
keyID = new StringBuilder(keyID).reverse().toString();
String op_type = str[(optypePos - 1)];
String rowkey = keyID;
if ((op_type.toUpperCase().equals("INSERT")) || (op_type.toUpperCase().equals("UPDATE"))) {
this.hbaseUtil.insertRow(rowkey, columnFamily, column.split(","), str);
} else if (op_type.toUpperCase().equals("DELETE")) {
this.hbaseUtil.deleteRow(rowkey);
}
}
public void close()
throws Exception
{
this.hbaseUtil.close();
}
}
我这边Hbase表的设计是单列簇的,这样子跟上游的Oracle格式差不多,就不用有太多处理
上游是ogg写入kafka的,所以会自带插入类型(INSERT、UPDATE、DELETE),这个ogg配置写入kafka后面会再搞一个文章单独说
只要上游能确保kafka存储的信息中含有插入类型即可,或者纯新增操作都可以对上面的代码进行变动
hbase.table.name指的是表名
hbase.table.column.family指的是列簇名
hbase.table.column指的是列的列表,我在配置文件用逗号隔开
record.keyID.field.Pos指的是主键的坐标,我的kafka里每个记录的分割符是’\007’
record.op.type.field.Pos指的是插入类型
下面是HBaseUtil的相关代码
import org.apache.flink.api.java.utils.ParameterTool;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.util.Bytes;
import java.io.IOException;
public class HBaseUtil {
private Connection conn;
private Table htable;
public HBaseUtil(ParameterTool params, String tablename)
throws IOException
{
Configuration conf = HBaseConfiguration.create();
conf.set("hbase.zookeeper.property.clientPort", "xxx");
conf.set("hbase.zookeeper.quorum", "xxx,xxx,xxx");
conf.set("hbase.master", "xxx:xxx,xxx:xxx");
conf.set("mapreduce.output.fileoutputformat.compress", "false");
conf.set("zookeeper.znode.parent", "xxx");
conf.setInt("hbase.rpc.timeout", 20000);
conf.setInt("hbase.client.operation.timeout",30000);
conf.setInt("hbase.client.scanner.timeout.period",200000);
this.conn = ConnectionFactory.createConnection(conf);
openTable(tablename);
}
public void openTable(String tablename)
throws IOException
{
TableName tableName = TableName.valueOf(tablename);
this.htable = this.conn.getTable(tableName);
}
public void insertRow(String rowkey, String columnFamily, String[] column, String[] value)
throws IOException
{
Put put = new Put(Bytes.toBytes(rowkey));
for (int i = 0; i < column.length; i++) {
put.addColumn(Bytes.toBytes(columnFamily), Bytes.toBytes(column[i]), Bytes.toBytes(value[i]));
}
this.htable.put(put);
}
public void deleteRow(String rowkey)
throws IOException
{
Delete de = new Delete(Bytes.toBytes(rowkey));
this.htable.delete(de);
}
public void close()
throws IOException
{
if (this.htable != null) {
this.htable.close();
}
if (this.conn != null) {
this.conn.close();
}
}
}
在构造函数里把xxx的内容根据自己集群的内容进行填充即可