20、Flink 的 State Backends 代码示例

1、使用 HashMapStateBackend 和 JobManagerCheckpointStorage

StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

Configuration config = new Configuration();
config.set(StateBackendOptions.STATE_BACKEND, "hashmap");
config.set(CheckpointingOptions.CHECKPOINT_STORAGE, "jobmanager");

env.configure(config);

2、使用 HashMapStateBackend 和 FileSystemCheckpointStorage

StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

Configuration config = new Configuration();
config.set(StateBackendOptions.STATE_BACKEND, "hashmap");
config.set(CheckpointingOptions.CHECKPOINT_STORAGE, "filesystem");
config.set(CheckpointingOptions.CHECKPOINTS_DIRECTORY, "file:///Users/***/Desktop/checkpoint_test/filesystem");

env.configure(config);

3、使用 EmbeddedRocksDBStateBackend 和 FileSystemCheckpointStorage

StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

Configuration config = new Configuration();
config.set(StateBackendOptions.STATE_BACKEND, "rocksdb");
config.set(CheckpointingOptions.CHECKPOINT_STORAGE, "filesystem");
config.set(CheckpointingOptions.CHECKPOINTS_DIRECTORY, "file:///Users/***/Desktop/checkpoint_test/filesystem");

env.configure(config);

4、完整代码示例

import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.state.ValueState;
import org.apache.flink.api.common.state.ValueStateDescriptor;
import org.apache.flink.api.common.typeinfo.TypeHint;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.configuration.*;
import org.apache.flink.contrib.streaming.state.ConfigurableRocksDBOptionsFactory;
import org.apache.flink.contrib.streaming.state.RocksDBOptionsFactory;
import org.apache.flink.streaming.api.CheckpointingMode;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.KeyedStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.KeyedProcessFunction;
import org.apache.flink.util.Collector;
import org.rocksdb.BlockBasedTableConfig;
import org.rocksdb.ColumnFamilyOptions;
import org.rocksdb.DBOptions;

import java.util.ArrayList;
import java.util.Collection;
import java.util.List;

public class _02_StateBackendConfig {
    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        Configuration config = new Configuration();
        // 是否启用部分任务结束后的 Checkpoint
//        config.set(ExecutionCheckpointingOptions.ENABLE_CHECKPOINTS_AFTER_TASKS_FINISH, false);

        // 使用 HashMapStateBackend 和 JobManagerCheckpointStorage
//        config.set(StateBackendOptions.STATE_BACKEND, "hashmap");
//        config.set(CheckpointingOptions.CHECKPOINT_STORAGE, "jobmanager");

        // 使用 HashMapStateBackend 和 FileSystemCheckpointStorage
        // 注意:使用 hdfs 文件系统,需要添加 hdfs 依赖
//        config.set(StateBackendOptions.STATE_BACKEND, "hashmap");
//        config.set(CheckpointingOptions.CHECKPOINT_STORAGE, "filesystem");
//        config.set(CheckpointingOptions.CHECKPOINTS_DIRECTORY, "file:///Users/***/Desktop/checkpoint_test/filesystem");

        // 使用 EmbeddedRocksDBStateBackend 和 FileSystemCheckpointStorage
        config.set(StateBackendOptions.STATE_BACKEND, "rocksdb");
        config.set(CheckpointingOptions.CHECKPOINT_STORAGE, "filesystem");
        config.set(CheckpointingOptions.CHECKPOINTS_DIRECTORY, "file:///Users/***/Desktop/checkpoint_test/rocksdb");

        // rocksdb 配置
        // 增量快照 增量快照
//        EmbeddedRocksDBStateBackend backend = new EmbeddedRocksDBStateBackend(true);
        config.set(CheckpointingOptions.INCREMENTAL_CHECKPOINTS, true);

        // 通过自定义工厂配置 rocksdb
//        backend.setRocksDBOptions(new MyOptionsFactory());
//        env.setStateBackend(backend);

        // 开启 ChangelogStateBackend
        env.enableChangelogStateBackend(true);

        env.configure(config);

        // 启用 Checkpoint
        env.enableCheckpointing(3000L, CheckpointingMode.AT_LEAST_ONCE);

        DataStreamSource<String> source = env.socketTextStream("localhost", 9999);
        KeyedStream<Tuple2<String, String>, String> keyedStream = source.map(new MapFunction<String, Tuple2<String, String>>() {
            @Override
            public Tuple2<String, String> map(String value) throws Exception {
                String[] fields = value.split(",");
                return new Tuple2<>(fields[0], fields[1]);
            }
        }).keyBy(e -> e.f0);
        keyedStream.process(new KeyedProcessFunction<String, Tuple2<String, String>, String>() {
                    private ValueState<List<String>> valueState;

                    @Override
                    public void open(Configuration parameters) throws Exception {
                        ValueStateDescriptor<List<String>> valueStateDescriptor = new ValueStateDescriptor<>("res", TypeInformation.of(new TypeHint<List<String>>() {
                        }));
                        valueState = getRuntimeContext().getState(valueStateDescriptor);
                    }

                    @Override
                    public void processElement(Tuple2<String, String> value, KeyedProcessFunction<String, Tuple2<String, String>, String>.Context ctx, Collector<String> out) throws Exception {
                        List<String> stringList = valueState.value();

                        if (stringList == null) {
                            stringList = new ArrayList<>();
                        }

                        if (!stringList.contains(value.f1)) {
                            stringList.add(value.f1);
                            valueState.update(stringList);
                            out.collect(value.f1);
                        }
                    }
                })
                .print("res=>");

        env.execute();

    }
}

class MyOptionsFactory implements ConfigurableRocksDBOptionsFactory {
    public static final ConfigOption<Integer> BLOCK_RESTART_INTERVAL = ConfigOptions
            .key("my.custom.rocksdb.block.restart-interval")
            .intType()
            .defaultValue(16)
            .withDescription(
                    " Block restart interval. RocksDB has default block restart interval as 16. ");

    private int blockRestartInterval = BLOCK_RESTART_INTERVAL.defaultValue();

    @Override
    public DBOptions createDBOptions(DBOptions currentOptions,
                                     Collection<AutoCloseable> handlesToClose) {
        return currentOptions
                .setIncreaseParallelism(4)
                .setUseFsync(false);
    }

    @Override
    public ColumnFamilyOptions createColumnOptions(ColumnFamilyOptions currentOptions,
                                                   Collection<AutoCloseable> handlesToClose) {
        return currentOptions.setTableFormatConfig(
                new BlockBasedTableConfig()
                        .setBlockRestartInterval(blockRestartInterval));
    }

    @Override
    public RocksDBOptionsFactory configure(ReadableConfig configuration) {
        this.blockRestartInterval = configuration.get(BLOCK_RESTART_INTERVAL);
        return this;
    }
}
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

猫猫爱吃小鱼粮

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值