通过如下链接:Flink官方文档,我们知道数据保存到 Redis 的容错机制是 at least once。所以我们通过幂等操作,使用新数据覆盖旧数据的方式,以此来实现 exactly-once 。
1.代码部分
1.1 config.properties配置文件
redis.host=192.168.204.210
redis.port=6379
redis.password=123456
redis.timeout=5000
redis.db=0
1.2 FlinkUtils工具类
package cn.huimin100.bigdata.tools;
import org.apache.flink.api.common.restartstrategy.RestartStrategies;
import org.apache.flink.api.common.serialization.DeserializationSchema;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.api.common.time.Time;
import org.apache.flink.api.java.utils.ParameterTool;
import org.apache.flink.runtime.state.filesystem.FsStateBackend;
import org.apache.flink.streaming.api.CheckpointingMode;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.CheckpointConfig;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer;
import org.apache.kafka.clients.consumer.ConsumerConfig;
import java.util.Arrays;
import java.util.List;
import java.util.Properties;
public class FlinkUtils {
//获取运行环境
private static final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
//获取运行环境
public static StreamExecutionEnvironment getEnv() {
return env;
}
/**
* 创建一个kafka streaming
*
* @param topics topic
* // * @param parameterPath 参数文件路劲
* @param clazz 序列化类
* @param <T>
* @return DataStream
* @throws Exception
*/
public static <T> DataStream<T> createKafkaStreaming(ParameterTool parameters, String topics, String groupId, Class<? extends DeserializationSchema<T>> clazz) throws Exception {
//设置全局变量
env.getConfig().setGlobalJobParameters(parameters);
//设置checkPoint 确保一次语义,10scheckpoint 一次
//设置Checkpoint模式(与Kafka整合,一定要设置Checkpoint模式为Exactly_Once)
env.enableCheckpointing(parameters.getLong("checkpoint.interval", 10000L), CheckpointingMode.EXACTLY_ONCE);
//系统异常退出或人为 Cancel 掉,不删除checkpoint数据
env.getCheckpointConfig().enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);
//此处设置重启策略为:出现异常重启6次,隔5秒一次(你也可以在flink-conf.yaml配置文件中写死。此处配置会覆盖配置文件中的)
String restartAttempts = parameters.get("restartAttempts","6");
String delayInterval = parameters.get("delayInterval","5000");
// String restartAttempts = parameters.getRequired("restartAttempts");
// String delayInterval = parameters.getRequired("delayInterval");
env.getConfig(