结论:Keyed State是和key绑定的,即使被处理的元素是被算子并行实例中的同一个实例处理的,如果key不相同,那么获取的Keyed State也是不同的
测试代码
package com.example.flink; import org.apache.flink.api.common.state.ListState; import org.apache.flink.api.common.state.ListStateDescriptor; import org.apache.flink.api.common.state.ValueState; import org.apache.flink.api.common.state.ValueStateDescriptor; import org.apache.flink.api.java.DataSet; import org.apache.flink.api.java.ExecutionEnvironment; import org.apache.flink.configuration.Configuration; import org.apache.flink.streaming.api.datastream.DataStream; import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; import org.apache.flink.streaming.api.functions.KeyedProcessFunction; import org.apache.flink.util.Collector; import java.util.Spliterator; /** * * 目的:测试Keyed State是和算子实例绑定还是和key绑定的 * * 结论:Keyed State是和key绑定的,即使被处理的元素是被算子并行实例中的同一个实例处理的,如果key不相同,那么获取的Keyed State也是不同的 * @Description * @auther tangdong * @create 2023-11-14 11:41 上午 */ public class DataSetToDataStreamDemo { public static void main(String[] args) throws Exception { // Set up the execution environment for batch processing ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); KeyedStreamTestModel e1 = KeyedStreamTestModel.builder() .type(1) .name("peter1") .build(); KeyedStreamTestModel e2 = KeyedStreamTestModel.builder() .type(1) .name("peter2") .build(); KeyedStreamTestModel e3 = KeyedStreamTestModel.builder() .type(1) .name("peter3") .build(); KeyedStreamTestModel e4 = KeyedStreamTestModel.builder() .type(1) .name("peter4") .build(); KeyedStreamTestModel e5 = KeyedStreamTestModel.builder() .type(1) .name("peter5") .build(); KeyedStreamTestModel e6 = KeyedStreamTestModel.builder() .type(1) .name("peter6") .build(); KeyedStreamTestModel e7 = KeyedStreamTestModel.builder() .type(1) .name("peter7") .build(); KeyedStreamTestModel e8 = KeyedStreamTestModel.builder() .type(2) .name("peter8") .build(); KeyedStreamTestModel e9 = KeyedStreamTestModel.builder() .type(3) .name("peter9") .build(); KeyedStreamTestModel e10 = KeyedStreamTestModel.builder() .type(2) .name("peter10") .build(); KeyedStreamTestModel e11 = KeyedStreamTestModel.builder() .type(2) .name("peter11") .build(); KeyedStreamTestModel e12 = KeyedStreamTestModel.builder() .type(3) .name("peter12") .build(); KeyedStreamTestModel e13 = KeyedStreamTestModel.builder() .type(2) .name("peter13") .build(); KeyedStreamTestModel e14 = KeyedStreamTestModel.builder() .type(2) .name("peter14") .build(); KeyedStreamTestModel e15 = KeyedStreamTestModel.builder() .type(5) .name("peter15") .build(); KeyedStreamTestModel e16 = KeyedStreamTestModel.builder() .type(5) .name("peter16") .build(); KeyedStreamTestModel e17 = KeyedStreamTestModel.builder() .type(5) .name("peter17") .build(); KeyedStreamTestModel e18 = KeyedStreamTestModel.builder() .type(5) .name("peter18") .build(); KeyedStreamTestModel e19 = KeyedStreamTestModel.builder() .type(5) .name("peter19") .build(); KeyedStreamTestModel e20 = KeyedStreamTestModel.builder() .type(5) .name("peter20") .build(); KeyedStreamTestModel e21 = KeyedStreamTestModel.builder() .type(5) .name("peter21") .build(); KeyedStreamTestModel e22 = KeyedStreamTestModel.builder() .type(5) .name("peter22") .build(); // Create a DataSet from a collection of elements DataSet<KeyedStreamTestModel> dataSet = env.fromElements(e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15, e16, e17, e18, e19, e20, e21, e22); // Convert the DataSet into a DataStream StreamExecutionEnvironment streamEnv = StreamExecutionEnvironment.getExecutionEnvironment(); DataStream<KeyedStreamTestModel> dataStream = streamEnv.fromCollection(dataSet.collect()); SingleOutputStreamOperator<Object> process = dataStream.keyBy(KeyedStreamTestModel::getType) .process(new KeyedProcessFunction<Integer, KeyedStreamTestModel, Object>() { private ListState<KeyedStreamTestModel> bufferState; private ValueState<Long> countState; @Override public void open(Configuration parameters) throws Exception { bufferState = getRuntimeContext().getListState(new ListStateDescriptor<>("buffer", KeyedStreamTestModel.class)); countState = getRuntimeContext().getState(new ValueStateDescriptor<>("count", Long.class)); } @Override public void processElement(KeyedStreamTestModel keyedStreamTestModel, KeyedProcessFunction<Integer, KeyedStreamTestModel, Object>.Context context, Collector<Object> collector) throws Exception { System.out.println(getRuntimeContext().getTaskNameWithSubtasks() + "process-element" + keyedStreamTestModel.toString()); Long value = countState.value(); if (value == null) { value = 1L; } System.out.println("process-element-count-by-key" + value); Iterable<KeyedStreamTestModel> keyedStreamTestModels = bufferState.get(); Spliterator<KeyedStreamTestModel> spliterator = keyedStreamTestModels.spliterator(); if (spliterator.getExactSizeIfKnown() > 3) { for (KeyedStreamTestModel next : keyedStreamTestModels) { System.out.print(getRuntimeContext().getTaskNameWithSubtasks() + "/" + value + "/" + next.toString() + " "); } System.out.println(); bufferState.clear(); } bufferState.add(keyedStreamTestModel); value++; countState.update(value); // Iterable<KeyedStreamTestModel> keyedStreamTestModels = bufferState.get(); // Spliterator<KeyedStreamTestModel> spliterator = keyedStreamTestModels.spliterator(); // if (spliterator.getExactSizeIfKnown() > 3) { // Iterator<KeyedStreamTestModel> iterator = keyedStreamTestModels.iterator(); // while (iterator.hasNext()) { // KeyedStreamTestModel next = iterator.next(); // System.out.println(getRuntimeContext().getTaskNameWithSubtasks() + "/" + value + "/" + next.toString()); // } // bufferState.clear(); // } // bufferState.add(keyedStreamTestModel); // // value++; // countState.update(value); } }).setParallelism(2); process.print(); // Apply a map function to the DataStream to transform the elements // DataStream<String> transformedStream = dataStream.map(new MapFunction<String, String>() { // @Override // public String map(String value) throws Exception { // return value.toUpperCase(); // } // }); // Print the transformed elements of the DataStream // transformedStream.print(); // Execute the Flink job streamEnv.execute("DataSet to DataStream Demo"); } }