Flink的流处理API
介绍下流处理api的基本使用,从文本文件中读取数据作为数据源
enviroment---->source----->transform---->sink
1、准备文本文件
1,160,22
2,160,22
3,142,22
4,180,22
5,190,22
6,177,23
7,160,23
8,179,23
9,183,23
10,182,23
2、输出文本长度、切割操作
创建执行环境
StreamExecutionEnvironment environment=StreamExecutionEnvironment.getExecutionEnvironment();
读取数据源
DataStream dataStreamSource = environment.readTextFile(“E:\flink\src\main\resources\Test.txt”);
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment environment=StreamExecutionEnvironment.getExecutionEnvironment();
DataStream<String> dataStreamSource = environment.readTextFile("E:\\flink\\src\\main\\resources\\Test.txt");
/**输出各个的长度*/
DataStream<Object> lengthStream = dataStreamSource.map(new MapFunction<String, Object>() {
@Override
public Object map(String s) throws Exception {
return s.length();
}
});
/**根据”,“分割*/
SingleOutputStreamOperator<Object> spritStream = dataStreamSource.flatMap(new FlatMapFunction<String, Object>() {
@Override
public void flatMap(String s, Collector<Object> collector) throws Exception {
String[] strings = s.split(",");
for(String s1:strings){
collector.collect(s1);
}
}
});
lengthStream.print();
spritStream.print();
environment.execute();
}
2、对文本数据进行分组
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment environment=StreamExecutionEnvironment.getExecutionEnvironment();
DataStream<String> dataStreamSource = environment.readTextFile("E:\\flink\\src\\main\\resources\\Test.txt");
DataStream<PersonBean> personStream=dataStreamSource.map(line->{
String [] nowStr=line.split(",");
return new PersonBean(Integer.valueOf(nowStr[0]),Double.valueOf(nowStr[1]),Integer.valueOf(nowStr[2]));
});
KeyedStream<PersonBean, Tuple> ageStream = personStream.keyBy("age");
SingleOutputStreamOperator<PersonBean> reduceStream = ageStream.reduce(new ReduceFunction<PersonBean>() {
@Override
public PersonBean reduce(PersonBean personBean, PersonBean t1) throws Exception {
return new PersonBean(personBean.getId(), t1.getHeigth(), t1.getAge());
}
});
SingleOutputStreamOperator<PersonBean> maxHeight = ageStream.maxBy("heigth");
maxHeight.print();
//heigthStream.print();
environment.execute();
}
3、数据的分流和合流
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment environment=StreamExecutionEnvironment.getExecutionEnvironment();
DataStream<String> dataStreamSource = environment.readTextFile("E:\\flink\\src\\main\\resources\\Test.txt");
DataStream<PersonBean> personStream=dataStreamSource.map(line->{
String [] nowStr=line.split(",");
return new PersonBean(Integer.valueOf(nowStr[0]),Double.valueOf(nowStr[1]),Integer.valueOf(nowStr[2]));
});
SplitStream<PersonBean> splitStream = personStream.split(new OutputSelector<PersonBean>() {
@Override
public Iterable<String> select(PersonBean personBean) {
return personBean.getHeigth() > 160 ? Collections.singletonList("high") : Collections.singletonList("short");
}
});
/**分流成功,分成了high和short*/
DataStream<PersonBean> highPerson = splitStream.select("high");
DataStream<PersonBean> ShortPerson = splitStream.select("short");
/**开始合流*/
//将high流转换成二元数组
SingleOutputStreamOperator<Tuple2<Double, Integer>> highTupleStream = highPerson.map(new MapFunction<PersonBean, Tuple2<Double, Integer>>() {
@Override
public Tuple2<Double, Integer> map(PersonBean personBean) throws Exception {
return new Tuple2<>(personBean.getHeigth(), personBean.getAge());
}
});
//连接short
ConnectedStreams<Tuple2<Double, Integer>, PersonBean> connectedStreams = highTupleStream.connect(ShortPerson);
//开始合流
SingleOutputStreamOperator<Object> resultStream = connectedStreams.map(new CoMapFunction<Tuple2<Double, Integer>, PersonBean, Object>() {
//high的流的方法
@Override
public Object map1(Tuple2<Double, Integer> doubleIntegerTuple2) throws Exception {
return new Tuple3<>(doubleIntegerTuple2.f0, doubleIntegerTuple2.f1, "Is HIGH");
}
//short的流的方法
@Override
public Object map2(PersonBean personBean) throws Exception {
return new Tuple3<>(personBean.getHeigth(),personBean.getAge(), "IS Short");
}
});
resultStream.print();//打印
environment.execute();//执行
}