package org.example.tmp;
import org.apache.flink.api.common.RuntimeExecutionMode;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.operators.Order;
import org.apache.flink.api.java.tuple.Tuple;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.KeyedStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.util.Collector;
public class WordCount2 {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setRuntimeMode(RuntimeExecutionMode.AUTOMATIC);
DataStream<String> dataStream = env.fromElements("it hadoop spark flink", "it hadoop spark", "it hadoop", "it");
DataStream<String> flatMap = dataStream.flatMap(new FlatMapFunction<String, String>() {
@Override
public void flatMap(String value, Collector<String> out) throws Exception {
for (String s : value.split(" ")) {
out.collect(s);
}
}
});
DataStream<Tuple2<String, Integer>> mapDataStream = flatMap.map(new MapFunction<String, Tuple2<String, Integer>>() {
@Override
public Tuple2<String, Integer> map(String s) throws Exception {
return Tuple2.of(s, 1);
}
});
KeyedStream<Tuple2<String, Integer>, Tuple> keyBy = mapDataStream.keyBy(0);
DataStream<Tuple2<String, Integer>> sum = keyBy.sum(1).setParallelism(1);
sum.print();
env.execute();
}
}
flink 1.12.0版本,在datastream里计算sum的时候, 当你统计的数据只有1条时,不会显示出来.比如把代码样例改为 DataStream dataStream = env.fromElements(“it hadoop spark flink”, “it hadoop spark”, “it hadoop”, “it”);
理论上应该输出:
(hadoop,3)
(it,4)
(flink,1)
(spark,2)
实际只会输出:
(hadoop,3)
(it,4)
(spark,2)
将flink版本改为1.12.2可解决这个问题表情包表情包