Flink数据写入Hbase与Kafka
Sink到Hbase
导入Phoenix相关依赖
<dependency>
<groupId>org.apache.phoenix</groupId>
<artifactId>phoenix-core</artifactId>
<version>5.0.0-HBase-2.0</version>
</dependency>
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
<version>30.1-jre</version>
</dependency>
<dependency>
<groupId>com.google.protobuf</groupId>
<artifactId>protobuf-java</artifactId>
<version>2.5.0</version>
</dependency>
具体实现代码
/**
- 维度表数据写入到Hbase
*/
private void sink2HBase(DataStream<Tuple2<JSONObject, TableProcess>> sink2HBaseStream) {
String phoenixUrl = "jdbc:phoenix:hadoop162,hadoop163,hadoop164:2181";
sink2HBaseStream
.keyBy(t -> t.f1.getSinkTable())
.addSink(new RichSinkFunction<Tuple2<JSONObject, TableProcess>>() {
//表示是否已经创建表的状态
private ValueState<Boolean> createTableState;
private Connection conn;
@Override
public void open(Configuration parameters) throws Exception {
//建立到phoenix的连接
conn = DriverManager.getConnection(phoenixUrl);
//是否第一条数据,用于创建
createTableState = getRuntimeContext().getState(
new ValueStateDescriptor<Boolean>("createTableState", Boolean.class)
);
}
@Override
public void invoke(Tuple2<JSONObject, TableProcess> value,
Context context) throws Exception {
//1. 检查表是否存在,如果不存在就创建表
checkTable(value);
//2. 把数据写入到hbase
write2HBase(value);
}
// 1. 检查表是否存在,如果不存在就创建表
private void checkTable(Tuple2<JSONObject, TableProcess> value) throws IOException, SQLException {
// 执行建表语句
// create table if not exists user(
// id varchar,
// name varchar,
// constraint primary key(id) ) SALT_BUCKETS = 3
if (createTableState.value() == null){
System.out.println("value");
TableProcess tp = value.f1;
StringBuilder createSql = new StringBuilder();
createSql
.append("create table if not exists ")
.append(tp.getSinkTable())
.append("(");
for (String s : tp.getSinkColumns().split(",")) {
createSql.append(s).append(" varchar,");
}
//拼接主键
createSql
.append("constraint pk primary key(")
.append(tp.getSinkPk()==null?"id":tp.getSinkPk())
.append(")");
createSql.append(")");
createSql.append(tp.getSinkExtend()==null?"":tp.getSinkExtend());
//System.out.println(createSql);
PreparedStatement ps = conn.prepareStatement(createSql.toString());
ps.execute();
conn.commit();
if (ps != null) {
ps.close();
}
createTableState.update(true);
}
}
// 2.把数据写入到hbase
private void write2HBase(Tuple2<JSONObject, TableProcess> value) throws SQLException {
// upsert into user(id, name)values(?, ?)
JSONObject data = value.f0;
TableProcess tp = value.f1;
StringBuilder sql = new StringBuilder();
sql
.append("upsert into ")
.append(tp.getSinkTable())
.append("(")
.append(tp.getSinkColumns())
.append(")values(");
for (String c : tp.getSinkColumns().split(",")) {
sql.append("'").append(data.getString(c)).append("',");
}
//删除多添加的一个逗号
sql.deleteCharAt(sql.length()-1);
sql.append(")");
System.out.println(sql);
PreparedStatement ps = conn.prepareStatement(sql.toString());
ps.execute();
conn.commit();
if (ps != null) {
ps.close();
}
}
@Override
public void close() throws Exception {
if (conn != null) {
conn.close();
}
}
});
}
Sink到Kafka
更新MyKafkaUtil
添加新的获取KafkaSink的方法
// 根据内容动态的写入不同的kafka Topic
public static FlinkKafkaProducer<Tuple2<JSONObject, TableProcess>> getKafkaSink() {
Properties props = new Properties();
props.setProperty("bootstrap.servers", "hadoop162:9092,hadoop163:9092,hadoop164:9092");
//如果15分钟没有更新状态,则超时 默认1分钟
props.setProperty("transaction.timeout.ms", 1000 * 60 * 15 + "");
return new FlinkKafkaProducer<Tuple2<JSONObject, TableProcess>>(
"default_topic",
new KafkaSerializationSchema<Tuple2<JSONObject, TableProcess>>() {
@Override
public ProducerRecord<byte[], byte[]> serialize(Tuple2<JSONObject, TableProcess> element, @Nullable Long timestamp) {
final String topic = element.f1.getSinkTable();
final JSONObject data = element.f0.getJSONObject("data");
return new ProducerRecord<>(topic, data.toJSONString().getBytes());
}
},
props,
FlinkKafkaProducer.Semantic.EXACTLY_ONCE);
}
具体实现代码
/**
* 事实表数据写入到Kafka
*/
private void sink2Kafka(SingleOutputStreamOperator<Tuple2<JSONObject, TableProcess>> sink2KafkaStream) {
sink2KafkaStream.addSink(MyKafkaUtil.getKafkaSink());
}