在前面的Flink学习笔记(八):flink热词统计中介绍了 sum算子进行数据统计,那么有没有别的方法实现呢。本篇记录下另一种实现方式,采用state状态进行统计计算。
获取数据流,随机模拟搜索的水果
统计结构:Tuple2<String, Integer> 水果名称,搜索次数
通过 keyBy(0) 聚合相同名称的水果
通过ValueState保存中间状态,进行数据累计
前文以及介绍了背景,这直接上代码
public class TestKeyedState {
private static final String[] FRUIT = { "苹果", "梨", "西瓜", "葡萄", "火龙果", "橘子", "桃子", "香蕉" };
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
DataStream<Tuple2<String, Integer>> fruit = env.addSource(new SourceFunction<Tuple2<String, Integer>>() {
private volatile boolean isRunning = true;
private final Random random = new Random();
@Override
public void run(SourceContext<Tuple2<String, Integer>> ctx) throws Exception {
while (isRunning) {
TimeUnit.SECONDS.sleep(1);
ctx.collect(Tuple2.of(FRUIT[random.nextInt(FRUIT.length)], 1));
}
}
@Override
public void cancel() {
isRunning = false;
}
});
//执行state
fruit.keyBy(0).map(new RichMapFunction<Tuple2<String, Integer>, Tuple2<String, Integer>>() {
private ValueState<Integer> valueState;
@Override
public void open(Configuration parameters) throws Exception {
ValueStateDescriptor stateDescriptor = new ValueStateDescriptor("key-fruit", Types.TUPLE(Types.STRING, Types.INT));
valueState = getRuntimeContext().getState(stateDescriptor);
}
@Override
public Tuple2<String, Integer> map(Tuple2<String, Integer> tuple2) throws Exception {
//水果名称
String name = tuple2.f0;
//单位数量(每次为1)
Integer count = tuple2.f1;
//上一次状态保存的值
Integer lastValue = valueState.value();
//如果获取上一次保存的值不为空
if (lastValue != null) {
//更新状态值
valueState.update(valueState.value() + count);
} else {
valueState.update(count);
}
return Tuple2.of(name, valueState.value());
}
}).print();
env.execute("fruit");
}
}
运行结果: