设置超时触发时间类型为ProcessingTime
env.setStreamTimeCharacteristic(TimeCharacteristic.ProcessingTime);
主流程代码
dataStream
.keyBy(r -> r.f1)//按表名进行keyBy,避免同一张表的数据传入出现先后问题
.timeWindow(Time.of(10, TimeUnit.SECONDS))//超时时间10秒
.trigger(new CountTriggerWithTimeout(500, TimeCharacteristic.ProcessingTime))//500条数据一个窗口
.process(new CdcMyProcessWindowFunction())//将同一时间窗口内的数据收集到List中
.addSink(new GreenPlumSink());//输出到GreenPlum
自定义超时触发器
package com.ysservice.dataStreamApi.udfs;
import org.apache.flink.api.common.functions.ReduceFunction;
import org.apache.flink.api.common.state.ReducingState;
import org.apache.flink.api.common.state.ReducingStateDescriptor;
import org.apache.flink.api.common.typeutils.base.LongSerializer;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.windowing.triggers.Trigger;
import org.apache.flink.streaming.api.windowing.triggers.TriggerResult;
import org.apache.flink.streaming.api.windowing.windows.TimeWindow;
/**
* 带超时的计数窗口触发器
*/
public class CountTriggerWithTimeout<T> extends Trigger<T, TimeWindow> {
/**
* 窗口最大数据量
*/
private int maxCount;
/**
* event time / process time
*/
private TimeCharacteristic timeType;
/**
* 用于储存窗口当前数据量的状态对象
*/
private ReducingStateDescriptor<Long> countStateDescriptor =
new ReducingStateDescriptor("counter", new Sum(), LongSerializer.INSTANCE);
public CountTriggerWithTimeout(int maxCount, TimeCharacteristic timeType) {
this.maxCount = maxCount;
this.timeType = timeType;
}
private TriggerResult fireAndPurge(TimeWindow window, TriggerContext ctx) throws Exception {
clear(window, ctx);
return TriggerResult.FIRE_AND_PURGE;
}
@Override
public TriggerResult onElement(T element, long timestamp, TimeWindow window, TriggerContext ctx) throws Exception {
ReducingState<Long> countState = ctx.getPartitionedState(countStateDescriptor);
countState.add(1L);
if (countState.get() >= maxCount) {
return fireAndPurge(window, ctx);
}
if (timestamp >= window.getEnd()) {
return fireAndPurge(window, ctx);
} else {
return TriggerResult.CONTINUE;
}
}
@Override
public TriggerResult onProcessingTime(long time, TimeWindow window, TriggerContext ctx) throws Exception {
if (timeType != TimeCharacteristic.ProcessingTime) {
return TriggerResult.CONTINUE;
}
if (time >= window.getEnd()) {
return TriggerResult.CONTINUE;
} else {
return fireAndPurge(window, ctx);
}
}
@Override
public TriggerResult onEventTime(long time, TimeWindow window, TriggerContext ctx) throws Exception {
if (timeType != TimeCharacteristic.EventTime) {
return TriggerResult.CONTINUE;
}
if (time >= window.getEnd()) {
return TriggerResult.CONTINUE;
} else {
return fireAndPurge(window, ctx);
}
}
@Override
public void clear(TimeWindow window, TriggerContext ctx) throws Exception {
ReducingState<Long> countState = ctx.getPartitionedState(countStateDescriptor);
countState.clear();
}
/**
* 计数方法
*/
class Sum implements ReduceFunction<Long> {
@Override
public Long reduce(Long value1, Long value2) throws Exception {
return value1 + value2;
}
}
}
将同一时间窗口内的数据收集到List中
package com.ysservice.dataStreamApi.udfs;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.shaded.guava18.com.google.common.collect.Lists;
import org.apache.flink.streaming.api.functions.windowing.ProcessWindowFunction;
import org.apache.flink.streaming.api.windowing.windows.TimeWindow;
import org.apache.flink.types.Row;
import org.apache.flink.util.Collector;
import java.util.ArrayList;
import java.util.List;
/**
* @Description:将同一时间窗口内的数据收集到List中
* @author: WuBo
* @date:2021/11/12 17:21
*/
public class CdcMyProcessWindowFunction extends ProcessWindowFunction<Tuple3<Boolean, String, Row>, List<Tuple3<Boolean, String, Row>>, String, TimeWindow> {
@Override
public void process(String s, Context context, Iterable<Tuple3<Boolean, String, Row>> elements, Collector<List<Tuple3<Boolean, String, Row>>> out) throws Exception {
ArrayList<Tuple3<Boolean, String, Row>> sqlArr = Lists.newArrayList(elements);
if (sqlArr.size() > 0) {
out.collect(sqlArr);
sqlArr.clear();
}
}
}
自定义gpSink,通过批量写入的方式,往gp中写入数据
package com.ysservice.dataStreamApi.sink;
import com.ysservice.dataStreamApi.utils.GreenplumUtil;
import com.ysservice.dataStreamApi.utils.RegexUtils;
import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.functions.sink.RichSinkFunction;
import org.apache.flink.streaming.api.functions.sink.SinkFunction;
import org.apache.flink.types.Row;
import java.lang.reflect.Field;
import java.sql.Connection;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.List;
import java.util.Set;
/**
* @Description: GreenPlum Sink
* @author: WuBo
* @date:2021/11/8 14:09
*/
public class GreenPlumSink extends RichSinkFunction<List<Tuple3<Boolean, String, Row>>> {
Connection connection;
Statement statement;
//任务开始时执行一次,用于获取Greenplum的链接
@Override
public void open(Configuration parameters) throws Exception {
super.open(parameters);
connection = GreenplumUtil.getConnection();
statement = connection.createStatement();
}
// 每条记录插入时调用一次
public void invoke(List<Tuple3<Boolean, String, Row>> value, SinkFunction.Context context) throws Exception {
if (value.size() > 0) {
for (Tuple3<Boolean, String, Row> tuple3 : value) {
if (tuple3.f0) {
String insertSql = insertSql(tuple3.f2.getFieldNames(true), tuple3);
statement.addBatch(insertSql);
} else {
String deleteSql = deleteSql(tuple3.f2.getFieldNames(true), tuple3);
statement.addBatch(deleteSql);
}
}
try {
statement.executeBatch();
} catch (SQLException throwables) {
executeErrorSql(value);
}
}
value.clear();
}
public void executeErrorSql(List<Tuple3<Boolean, String, Row>> errorTuple3List) throws Exception {
if (errorTuple3List.size() > 0) {
for (Tuple3<Boolean, String, Row> tuple3 : errorTuple3List) {
if (tuple3.f0) {
String deleteSql = deleteSql(tuple3.f2.getFieldNames(true), tuple3);
try {
statement.execute(deleteSql); //如果执行删除报错,就将错误的sql写入错误日志表
} catch (SQLException e) {
statement.execute("insert into cdc_log.cdc_error_sql(error_sql) values ('" + deleteSql + "')");
}
String insertSql = insertSql(tuple3.f2.getFieldNames(true), tuple3);
try {
statement.execute(insertSql); //如果执行插入报错,就将错误的sql写入错误日志表
} catch (SQLException e) {
statement.execute("insert into cdc_log.cdc_error_sql(error_sql) values ('" + insertSql + "')");
}
}else {
String deleteSql = deleteSql(tuple3.f2.getFieldNames(true), tuple3);
try {
statement.execute(deleteSql); //如果执行删除报错,就将错误的sql写入错误日志表
} catch (SQLException e) {
statement.execute("insert into cdc_log.cdc_error_sql(error_sql) values ('" + deleteSql + "')");
}
}
}
}
errorTuple3List.clear();
}
//sql插入语句
public String insertValue(Set<String> names, Tuple3<Boolean, String, Row> value) {
String values = "";//用于拼接数据
for (String name : names) {//遍历字段,获取字段对于的数据后,拼接字段和数据,最终拼成一个完整的sql
Object data = value.f2.getField(name);//获得字段对应的数据
if (data != null) {
if (RegexUtils.numberRegex(data.toString())) {//判断数据类型是否是数字的
data = data.toString();
} else {
data = "'" + RegexUtils.timeStampRegex(data.toString()).replace("'", "''") + "'";//拼sql时如果遇到单引号会报错,将一个单引号变成两个就ok了
}
} else {
data = "null";
}
values += data + ",";
}
//拼接插入的sql
return "(" + values.substring(0, (values.length() - 1)) + "),";
}
//sql插入语句
public String insertSql(Set<String> names, Tuple3<Boolean, String, Row> value) {
String datas = "";//用于拼接数据
String fields = "";//用于拼接字段
for (String name : names) {//遍历字段,获取字段对于的数据后,拼接字段和数据,最终拼成一个完整的sql
Object data = value.f2.getField(name);//获得字段对应的数据
if (data != null) {
if (RegexUtils.numberRegex(data.toString())) {//判断数据类型是否是数字的
data = data.toString();
} else {
data = "'" + RegexUtils.timeStampRegex(data.toString()).replace("'", "''") + "'";//拼sql时如果遇到单引号会报错,将一个单引号变成两个就ok了
}
datas += data + ",";
fields += name + ",";
}
}
//拼接插入的sql
String sql = "insert into " + value.f1 + "(" + fields.substring(0, (fields.length() - 1)) + ") values(" + datas.substring(0, (datas.length() - 1)) + ");";
return sql;
}
//sql删除语句
public String deleteSql(Set<String> names, Tuple3<Boolean, String, Row> value) throws Exception {
//获得每张表的主键字段,使用主键去删除数据
Class<?> sourceClass = Class.forName("com.ysservice.dataStreamApi.source." + value.f1);
Field primaryKey = sourceClass.getDeclaredField("primaryKey");
String keys = (String) primaryKey.get(sourceClass);
String[] keyArr = keys.split(",");
String wheres = "";//用于拼接where sql
for (String name : keyArr) {
Object data = value.f2.getField(name);
if (data != null) {
if (RegexUtils.numberRegex(data.toString())) {
data = data.toString();
} else {
data = "'" + RegexUtils.timeStampRegex(data.toString()).replace("'", "''") + "'";
}
wheres += name + "=" + data + " and ";
}
}
//拼接删除的sql
String sql = "delete from " + value.f1 + " where " + wheres.substring(0, (wheres.length() - 5)) + ";";
return sql;
}
@Override
public void close() throws Exception {
super.close();
if (statement != null) {
statement.close();
}
if (connection != null) {
connection.close();
}
}
}