应该说有两个区别:
1:侧输出流的输出类型可以是任意的,但是一条流处理多次的时候一定会经历filter阶段,在经历fliter阶段输出类型一定是相同。
2:一条流处理多次的时候相当于使用了多个if操作。但是侧输出流,可以使用if、else if、else这样的操作,如果if、else if满足条件,可以提前结束方法,减少某些数据的逻辑执行。
结果上看是没有区别的,但是从实现上来说,一般旁路输出只需要对一条流处理一次(类似于split),但是两条流一般都是对流filter,这样就相当于对一条流处理了两次才能生成两条流。
看一个代码例子,大家瞬间就明白了:
首先来一个一个流执行多次的代码案例(一定有多次filter)
package com.andy;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.java.tuple.Tuple4;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
public class SideOutputTest {
/**
* 整个方法假设只在分别统计访问量最多的10个用户,以及访问量最多的10个url,以及访问量最多的10个ip
*
* @param args
*/
public static void main(String[] args) {
StreamExecutionEnvironment environment = StreamExecutionEnvironment.getExecutionEnvironment();
//假设每行的数据是businessType,userId,url,ip(中间以逗号隔开,假设businessType值又jd,taobao)
DataStreamSource<String> dataStreamSource = environment.socketTextStream("localhost", 9999);
SingleOutputStreamOperator<Tuple4<String, String, String, String>> afterMap =
dataStreamSource.map(new MapFunction<String, Tuple4<String, String, String, String>>() {
@Override
public Tuple4<String, String, String, String> map(String input) throws Exception {
String[] userIdUrlIp = input.split(",");
return Tuple4.of(userIdUrlIp[0], userIdUrlIp[1], userIdUrlIp[2], userIdUrlIp[3]);
}
});
afterMap.filter(item -> item.f0.equalsIgnoreCase("jd"))
.print("jd");
afterMap.filter(item -> item.f0.equalsIgnoreCase("taobao"))
.print("taobao");
try {
environment.execute();
} catch (Exception e) {
e.printStackTrace();
}
}
}
再来一个侧输出流的例子:
package com.andy;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.api.java.tuple.Tuple4;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.ProcessFunction;
import org.apache.flink.util.Collector;
import org.apache.flink.util.OutputTag;
public class SideOutputTest2 {
/**
* 整个方法假设只在分别统计访问量最多的10个用户,以及访问量最多的10个url,以及访问量最多的10个ip
*
* @param args
*/
public static void main(String[] args) {
StreamExecutionEnvironment environment = StreamExecutionEnvironment.getExecutionEnvironment();
//假设每行的数据是businessType,userId,url,ip(中间以逗号隔开,假设businessType值又jd,taobao)
DataStreamSource<String> dataStreamSource = environment.socketTextStream("localhost", 9999);
SingleOutputStreamOperator<Tuple4<String, String, String, String>> afterMap =
dataStreamSource.map(new MapFunction<String, Tuple4<String, String, String, String>>() {
@Override
public Tuple4<String, String, String, String> map(String input) throws Exception {
String[] userIdUrlIp = input.split(",");
return Tuple4.of(userIdUrlIp[0], userIdUrlIp[1], userIdUrlIp[2], userIdUrlIp[3]);
}
});
//侧输出流的时候,类型在processElement里面就可以从Tuple4变成Tuple3
OutputTag<Tuple3<String, String, String>> jd = new OutputTag<Tuple3<String, String, String>>("jd");
OutputTag<Tuple3<String, String, String>> taobao = new OutputTag<Tuple3<String, String, String>>("taobao");
SingleOutputStreamOperator<Object> processedDataStream = afterMap.process(new ProcessFunction<Tuple4<String, String, String, String>, Object>() {
@Override
public void processElement(Tuple4<String, String, String, String> input, ProcessFunction<Tuple4<String, String, String, String>, Object>.Context ctx, Collector<Object> out) throws Exception {
if (input.f0.equalsIgnoreCase("jd")) {
ctx.output(jd, Tuple3.of(input.f1, input.f2, input.f3));
} else if (input.f0.equalsIgnoreCase("taobao")) {
ctx.output(taobao, Tuple3.of(input.f1, input.f2, input.f3));
} else {
out.collect(input);
}
}
});
processedDataStream.getSideOutput(jd).print("jd");
processedDataStream.getSideOutput(taobao).print("taobao");
processedDataStream.print("except jd and taobao");
try {
environment.execute();
} catch (Exception e) {
e.printStackTrace();
}
}
}