在读《Flink原理、实战与性能优化》的时候,遇到其中有一个案例即使用valuestate获取最小值,文章中使用的Scala写的案例,改写的时候,忽略了其中初始化部分,导致迟迟计算不出结果
public class StreamTest {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
DataStream<Tuple2<Integer,Long>> dataStream = env.fromElements(
new Tuple2<>(2,21L),
new Tuple2<>(4,1L),
new Tuple2<>(5,4L),
new Tuple2<>(2,12L),
new Tuple2<>(2,13L),
new Tuple2<>(2,8L));
// dataStream.print(); //测试元素是否正确输入
dataStream.keyBy(0)
.flatMap(new RichFlatMapFunction<Tuple2<Integer, Long>, Tuple3<Integer,Long,Long>>() {
private ValueState<Long> leastValueState;
@Override
public void open(Configuration parameters) throws Exception{
ValueStateDescriptor<Long> state = new ValueStateDescriptor<Long>("leastValue",Types.LONG);
leastValueState = getRuntimeContext().getState(state);
}
@Override
public void flatMap(Tuple2<Integer,Long> t,Collector<Tuple3<Integer,Long,Long>> collector) throws Exception{
Long leastValue = leastValueState.value();
// System.out.println("状态值为:" + leastValue+" 对应的t.f1为:" + t.f1);
//当初始值为null的时候,设定输入状态值,然后进行比较,否则为null
if (leastValue == null) {
leastValueState.update(t.f1);
leastValue = leastValueState.value();
}
// System.out.println("状态值为:" + leastValue+" 对应的t.f1为:" + t.f1);
if (t.f1 > leastValue) {
collector.collect(new Tuple3<>(t.f0, t.f1, leastValue));
}
else{
leastValueState.update(t.f1);
collector.collect(new Tuple3<>(t.f0,t.f1,t.f1));
}
}
})
.print();
env.execute("测试带状态计算");
// DataStream<Tuple2<String, Long>> waterMarkStream = inputMa
System.out.println("------------分割线------------");
// dataStream.writeAsCsv("~/Documents/code/flink/flinkDemo/frauddetection/src/main/outputFile/count.csv");
}
其中RichFlatMapFunction接口是比较常用的一个接口,获取状态的基本语法也即
ValueStateDescriptor<Long> state = new ValueStateDescriptor<Long>("leastValue",Types.LONG);
leastValueState = getRuntimeContext().getState(state);
这一段