旁路输出在Flink中叫作SideOutput,用途类似于DataStream#split,本质上是一个数据流的切分行为,按照条件将DataStream切分为多个子数据流,子数据流叫作旁路输出数据流,每个旁路输出数据流可以有自己的下游处理逻辑
旁路输出在Flink中叫作SideOutput,用途类似于DataStream#split,本质上是一个数据流的切分行为,按照条件将DataStream切分为多个子数据流,子数据流叫作旁路输出数据流,每个旁路输出数据流可以有自己的下游处理逻辑
package com.zxl.blink;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.configuration.RestOptions;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.ProcessFunction;
import org.apache.flink.util.Collector;
import org.apache.flink.util.OutputTag;
public class OutPut {
public static void main(String[] args) throws Exception {
Configuration configuration=new Configuration();
configuration.setInteger(RestOptions.PORT,8848);
StreamExecutionEnvironment environment = StreamExecutionEnvironment.createLocalEnvironmentWithWebUI(configuration);
environment.setParallelism(4);
//split
DataStream<Person> source = environment.addSource(new DataDB());
// TODO: 2022/2/9 定义 旁路输出在Flink中叫作SideOutput,用途类似于DataStream#split,本质上是一个数据流的切分行为,按照条件将DataStream切分为多个子数据流,子数据流叫作旁路输出数据流,每个旁路输出数据流可以有自己的下游处理逻辑
//当使用旁路输出的时候,首先需要定义OutputTag,OutputTag是每一个下游分支的标识
OutputTag<Person> hight=new OutputTag<Person>("hight"){};
OutputTag<Person> low=new OutputTag<Person>("low"){};
SingleOutputStreamOperator<Person> process = source.process(new ProcessFunction<Person, Person>() {
@Override
public void processElement(Person person, Context ctx, Collector<Person> out) throws Exception {
if (person.getPage() < 14) {
ctx.output(low, person);
} else if (person.getPage() > 14 && person.getPage() < 18) {
ctx.output(hight, person);
} else {
out.collect(person);
}
}
});
process.getSideOutput(low).print("小于14岁");
process.getSideOutput(hight).print("大于14岁外成年");
process.print("成年");
environment.execute();
}
}