流式处理分为有界流和无界流
1)有界流
有界流就是读取的数据是有界限的,在流不关闭的情况下,能读取到数据的尽头
2)无界流
在人为不干预的情况下,流一直在,不会停,数据没有边界
package day01;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.java.functions.KeySelector;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.KeyedStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
public class Flink02_wordCount_Bounded {
public static void main(String[] args) throws Exception {
//流式处理,有界流
//获取执行环境
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
//读取文件
DataStream<String> DS = env.readTextFile("input");
//对每行数据进行flatMap
SingleOutputStreamOperator<String> words = DS.flatMap(new Flink01_wordcount_batch.MyFlatMap());
//改变每个单词的结构:word->(word,1)
SingleOutputStreamOperator<Tuple2<String, Integer>> wordToOne = words.map(new MapFunction<String, Tuple2<String, Integer>>() {
@Override
public Tuple2<String, Integer> map(String s) throws Exception {
return new Tuple2<>(s,1);
}
});
//按照key进行分组,但是在流处理中,没有groupBy,有keyBY,需要传入KeySelector对象,输入参数是tuple2,输出只要key进行分组就行
KeyedStream<Tuple2<String, Integer>, String> wordGroup = wordToOne.keyBy(new KeySelector<Tuple2<String, Integer>, String>() {
@Override
public String getKey(Tuple2<String, Integer> stringIntegerTuple2) throws Exception {
return stringIntegerTuple2.f0;
}
});
//将分组好的数据进行聚合
SingleOutputStreamOperator<Tuple2<String, Integer>> result = wordGroup.sum(1);
//打印结果
result.print();
//开启流
env.execute("wordcount");
}
}