package com.ysservice.dataStreamApi.sink;
import com.ysservice.dataStreamApi.utils.GreenplumUtil;
import com.ysservice.dataStreamApi.utils.RegexUtils;
import org.apache.flink.api.common.state.CheckpointListener;
import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.runtime.state.FunctionInitializationContext;
import org.apache.flink.runtime.state.FunctionSnapshotContext;
import org.apache.flink.streaming.api.checkpoint.CheckpointedFunction;
import org.apache.flink.streaming.api.functions.sink.RichSinkFunction;
import org.apache.flink.streaming.api.functions.sink.SinkFunction;
import org.apache.flink.types.Row;
import java.lang.reflect.Field;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.*;
/**
* @Description: GreenPlum Sink
* @author: WuBo
* @date:2021/11/8 14:09
*/
public class GreenPlumSink extends RichSinkFunction<Tuple3<Boolean, String, Row>> implements CheckpointListener, CheckpointedFunction {
Connection connection;
Statement statement;
HashMap<Long, List<Tuple3<Boolean, String, Row>>> pendingCasesPerCheckpoint;
ArrayList<Tuple3<Boolean, String, Row>> pendingCases;
//任务开始时执行一次,用于获取Greenplum的链接
@Override
public void open(Configuration parameters) throws Exception {
super.open(parameters);
connection = GreenplumUtil.getConnection();
pendingCasesPerCheckpoint = new HashMap<Long, List<Tuple3<Boolean, String, Row>>>();
pendingCases = new ArrayList<>();
statement = connection.createStatement();
}
// 每条记录插入时调用一次
public void invoke(Tuple3<Boolean, String, Row> value, SinkFunction.Context context) throws Exception {
pendingCases.add(value);
}
//执行插入语句
public static String insertSql(Set<String> names, Tuple3<Boolean, String, Row> value) {
String datas = "";//用于拼接数据
String fields = "";//用于拼接字段
for (String name : names) {//遍历字段,获取字段对于的数据后,拼接字段和数据,最终拼成一个完整的sql
Object data = value.f2.getField(name);//获得字段对应的数据
if (data != null) {
if (RegexUtils.numberRegex(data.toString())) {//判断数据类型是否是数字的
data = data.toString();
} else {
data = "'" + RegexUtils.timeStampRegex(data.toString()).replace("'", "''") + "'";//拼sql时如果遇到单引号会报错,将一个单引号变成两个就ok了
}
datas += data + ",";
fields += name + ",";
}
}
//拼接插入的sql
String sql = "insert into " + value.f1 + "(" + fields.substring(0, (fields.length() - 1)) + ") values(" + datas.substring(0, (datas.length() - 1)) + ");";
return sql;
}
//执行删除语句
public String deleteSql(Set<String> names, Tuple3<Boolean, String, Row> value) throws Exception {
//获得每张表的主键字段,使用主键去删除数据
Class<?> sourceClass = Class.forName("com.ysservice.dataStreamApi.source." + value.f1);
Field primaryKey = sourceClass.getDeclaredField("primaryKey");
String keys = (String) primaryKey.get(sourceClass);
String[] keyArr = keys.split(",");
String wheres = "";//用于拼接where sql
for (String name : keyArr) {
Object data = value.f2.getField(name);
if (data != null) {
if (RegexUtils.numberRegex(data.toString())) {
data = data.toString();
} else {
data = "'" + RegexUtils.timeStampRegex(data.toString()).replace("'", "''") + "'";
}
wheres += name + "=" + data + " and ";
}
}
//拼接删除的sql
String sql = "delete from " + value.f1 + " where " + wheres.substring(0, (wheres.length() - 5)) + ";";
return sql;
}
@Override
public void close() throws Exception {
super.close();
if (connection != null) {
connection.close();
}
}
@Override
public void notifyCheckpointComplete(long checkpointId) throws Exception {
Iterator<Map.Entry<Long, List<Tuple3<Boolean, String, Row>>>> pendingCheckpointsIt = pendingCasesPerCheckpoint.entrySet().iterator();
while (pendingCheckpointsIt.hasNext()) {
Map.Entry<Long, List<Tuple3<Boolean, String, Row>>> entry = pendingCheckpointsIt.next();
Long pastCheckpointId = entry.getKey();
List<Tuple3<Boolean, String, Row>> pendingCases = entry.getValue();
if (pastCheckpointId <= checkpointId) {
for (Tuple3<Boolean, String, Row> pendingCase : pendingCases) {
Set<String> names = pendingCase.f2.getFieldNames(true);//获得所有的自段名
if (pendingCase.f0) {
String insertSql = insertSql(names, pendingCase);
statement.addBatch(insertSql);
} else {
String deleteSql = deleteSql(names, pendingCase);
statement.addBatch(deleteSql);
}
}
pendingCheckpointsIt.remove();
}
}
statement.executeBatch();
}
@Override
public void snapshotState(FunctionSnapshotContext context) throws Exception {
long checkpointId = context.getCheckpointId();
List<Tuple3<Boolean, String, Row>> cases = pendingCasesPerCheckpoint.get(checkpointId);
if (cases == null) {
cases = new ArrayList<>();
pendingCasesPerCheckpoint.put(checkpointId, cases);
}
cases.addAll(pendingCases);
pendingCases.clear();
}
@Override
public void initializeState(FunctionInitializationContext context) throws Exception {
}
}
flink通过checkpoint的完成信号,提交jdbc sql,保证数据一致性
最新推荐文章于 2024-02-22 08:47:07 发布