1、使用 HashMapStateBackend 和 JobManagerCheckpointStorage
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
Configuration config = new Configuration();
config.set(StateBackendOptions.STATE_BACKEND, "hashmap");
config.set(CheckpointingOptions.CHECKPOINT_STORAGE, "jobmanager");
env.configure(config);
2、使用 HashMapStateBackend 和 FileSystemCheckpointStorage
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
Configuration config = new Configuration();
config.set(StateBackendOptions.STATE_BACKEND, "hashmap");
config.set(CheckpointingOptions.CHECKPOINT_STORAGE, "filesystem");
config.set(CheckpointingOptions.CHECKPOINTS_DIRECTORY, "file:///Users/***/Desktop/checkpoint_test/filesystem");
env.configure(config);
3、使用 EmbeddedRocksDBStateBackend 和 FileSystemCheckpointStorage
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
Configuration config = new Configuration();
config.set(StateBackendOptions.STATE_BACKEND, "rocksdb");
config.set(CheckpointingOptions.CHECKPOINT_STORAGE, "filesystem");
config.set(CheckpointingOptions.CHECKPOINTS_DIRECTORY, "file:///Users/***/Desktop/checkpoint_test/filesystem");
env.configure(config);
4、完整代码示例
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.state.ValueState;
import org.apache.flink.api.common.state.ValueStateDescriptor;
import org.apache.flink.api.common.typeinfo.TypeHint;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.configuration.*;
import org.apache.flink.contrib.streaming.state.ConfigurableRocksDBOptionsFactory;
import org.apache.flink.contrib.streaming.state.RocksDBOptionsFactory;
import org.apache.flink.streaming.api.CheckpointingMode;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.KeyedStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.KeyedProcessFunction;
import org.apache.flink.util.Collector;
import org.rocksdb.BlockBasedTableConfig;
import org.rocksdb.ColumnFamilyOptions;
import org.rocksdb.DBOptions;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
public class _02_StateBackendConfig {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
Configuration config = new Configuration();
// 是否启用部分任务结束后的 Checkpoint
// config.set(ExecutionCheckpointingOptions.ENABLE_CHECKPOINTS_AFTER_TASKS_FINISH, false);
// 使用 HashMapStateBackend 和 JobManagerCheckpointStorage
// config.set(StateBackendOptions.STATE_BACKEND, "hashmap");
// config.set(CheckpointingOptions.CHECKPOINT_STORAGE, "jobmanager");
// 使用 HashMapStateBackend 和 FileSystemCheckpointStorage
// 注意:使用 hdfs 文件系统,需要添加 hdfs 依赖
// config.set(StateBackendOptions.STATE_BACKEND, "hashmap");
// config.set(CheckpointingOptions.CHECKPOINT_STORAGE, "filesystem");
// config.set(CheckpointingOptions.CHECKPOINTS_DIRECTORY, "file:///Users/***/Desktop/checkpoint_test/filesystem");
// 使用 EmbeddedRocksDBStateBackend 和 FileSystemCheckpointStorage
config.set(StateBackendOptions.STATE_BACKEND, "rocksdb");
config.set(CheckpointingOptions.CHECKPOINT_STORAGE, "filesystem");
config.set(CheckpointingOptions.CHECKPOINTS_DIRECTORY, "file:///Users/***/Desktop/checkpoint_test/rocksdb");
// rocksdb 配置
// 增量快照 增量快照
// EmbeddedRocksDBStateBackend backend = new EmbeddedRocksDBStateBackend(true);
config.set(CheckpointingOptions.INCREMENTAL_CHECKPOINTS, true);
// 通过自定义工厂配置 rocksdb
// backend.setRocksDBOptions(new MyOptionsFactory());
// env.setStateBackend(backend);
// 开启 ChangelogStateBackend
env.enableChangelogStateBackend(true);
env.configure(config);
// 启用 Checkpoint
env.enableCheckpointing(3000L, CheckpointingMode.AT_LEAST_ONCE);
DataStreamSource<String> source = env.socketTextStream("localhost", 9999);
KeyedStream<Tuple2<String, String>, String> keyedStream = source.map(new MapFunction<String, Tuple2<String, String>>() {
@Override
public Tuple2<String, String> map(String value) throws Exception {
String[] fields = value.split(",");
return new Tuple2<>(fields[0], fields[1]);
}
}).keyBy(e -> e.f0);
keyedStream.process(new KeyedProcessFunction<String, Tuple2<String, String>, String>() {
private ValueState<List<String>> valueState;
@Override
public void open(Configuration parameters) throws Exception {
ValueStateDescriptor<List<String>> valueStateDescriptor = new ValueStateDescriptor<>("res", TypeInformation.of(new TypeHint<List<String>>() {
}));
valueState = getRuntimeContext().getState(valueStateDescriptor);
}
@Override
public void processElement(Tuple2<String, String> value, KeyedProcessFunction<String, Tuple2<String, String>, String>.Context ctx, Collector<String> out) throws Exception {
List<String> stringList = valueState.value();
if (stringList == null) {
stringList = new ArrayList<>();
}
if (!stringList.contains(value.f1)) {
stringList.add(value.f1);
valueState.update(stringList);
out.collect(value.f1);
}
}
})
.print("res=>");
env.execute();
}
}
class MyOptionsFactory implements ConfigurableRocksDBOptionsFactory {
public static final ConfigOption<Integer> BLOCK_RESTART_INTERVAL = ConfigOptions
.key("my.custom.rocksdb.block.restart-interval")
.intType()
.defaultValue(16)
.withDescription(
" Block restart interval. RocksDB has default block restart interval as 16. ");
private int blockRestartInterval = BLOCK_RESTART_INTERVAL.defaultValue();
@Override
public DBOptions createDBOptions(DBOptions currentOptions,
Collection<AutoCloseable> handlesToClose) {
return currentOptions
.setIncreaseParallelism(4)
.setUseFsync(false);
}
@Override
public ColumnFamilyOptions createColumnOptions(ColumnFamilyOptions currentOptions,
Collection<AutoCloseable> handlesToClose) {
return currentOptions.setTableFormatConfig(
new BlockBasedTableConfig()
.setBlockRestartInterval(blockRestartInterval));
}
@Override
public RocksDBOptionsFactory configure(ReadableConfig configuration) {
this.blockRestartInterval = configuration.get(BLOCK_RESTART_INTERVAL);
return this;
}
}