算子之间没有shuffle且并行度一样,则会形成operator chain
env.disableOperatorChaining(); 全局禁用算子链
package cn._51doit.flink.day04;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.KeyedStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.util.Collector;
/**
* Flink默认情况就是开启算子链,尽可能的将算子的链连在一起,触发有特殊情况才会将算子链断开
*/
public class OperatorChainDemo {
public static void main(String[] args) throws Exception{
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
//在env上调用disableOperatorChaining即全局禁用算子链
env.disableOperatorChaining();
DataStreamSource<String> lines = env.socketTextStream("localhost", 8888);
SingleOutputStreamOperator<String> filtered = lines.filter(e -> !"error".startsWith(e));
SingleOutputStreamOperator<String> words = filtered.flatMap(new FlatMapFunction<String, String>() {
@Override
public void flatMap(String line, Collector<String> collector) throws Exception {
String[] words = line.split(" ");
for (String word : words) {
collector.collect(word);
}
}
});
SingleOutputStreamOperator<Tuple2<String, Integer>> wordAndOne = words.map(new MapFunction<String, Tuple2<String, Integer>>() {
@Override
public Tuple2<String, Integer> map(String word) throws Exception {
return Tuple2.of(word, 1);
}
});
KeyedStream<Tuple2<String, Integer>, String> keyedStream = wordAndOne.keyBy(t -> t.f0);
SingleOutputStreamOperator<Tuple2<String, Integer>> res = keyedStream.sum(1);
res.print();
env.execute();
}
}
DisableOperatorChain之前的 DataFlow Graph
其中 Keyed Aggregation 指的时sum算子,而keyBy算子分为了两个部分,分别在第二个Task和第三个Task中,图中没有体现。
DisableOperatorChain的 DataFlow Graph
可以看到原本的算子链拆开后,算子之间的关系是forward(直传)
startNewChain算子
package cn._51doit.flink.day04;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.KeyedStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.util.Collector;
/**
* Flink默认情况就是开启算子链,尽可能的将算子的链连在一起,触发有特殊情况才会将算子链断开
*
* StartNewChain开启一个新的链
*
*/
public class StartNewChainDemo {
public static void main(String[] args) throws Exception{
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
DataStreamSource<String> lines = env.socketTextStream("localhost", 9999);
SingleOutputStreamOperator<String> filtered = lines.filter(e -> !"error".startsWith(e));
SingleOutputStreamOperator<String> words = filtered.flatMap(new FlatMapFunction<String, String>() {
@Override
public void flatMap(String line, Collector<String> collector) throws Exception {
String[] words = line.split(" ");
for (String word : words) {
collector.collect(word);
}
}
}).startNewChain(); //从该算子的前面开始,开启一个新链,其余地方的链没有影响
SingleOutputStreamOperator<Tuple2<String, Integer>> wordAndOne = words.map(new MapFunction<String, Tuple2<String, Integer>>() {
@Override
public Tuple2<String, Integer> map(String word) throws Exception {
return Tuple2.of(word, 1);
}
});
KeyedStream<Tuple2<String, Integer>, String> keyedStream = wordAndOne.keyBy(t -> t.f0);
SingleOutputStreamOperator<Tuple2<String, Integer>> res = keyedStream.sum(1);
res.print();
env.execute();
}
}
原:
现:
注意:startNewChain是在flatMap算子处调用的
disasbleChaining算子
package cn._51doit.flink.day04;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.KeyedStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.util.Collector;
/**
* Flink默认情况就是开启算子链,尽可能的将算子的链连在一起,触发有特殊情况才会将算子链断开
*
*
*
*/
public class DisableChainingDemo {
public static void main(String[] args) throws Exception{
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
DataStreamSource<String> lines = env.socketTextStream("localhost", 10000);
SingleOutputStreamOperator<String> filtered = lines.filter(e -> !"error".startsWith(e));
SingleOutputStreamOperator<String> words = filtered.flatMap(new FlatMapFunction<String, String>() {
@Override
public void flatMap(String line, Collector<String> collector) throws Exception {
String[] words = line.split(" ");
for (String word : words) {
collector.collect(word);
}
}
}).disableChaining(); //将该算子的前后的算子链都断开,其余地方没有影响
SingleOutputStreamOperator<Tuple2<String, Integer>> wordAndOne = words.map(new MapFunction<String, Tuple2<String, Integer>>() {
@Override
public Tuple2<String, Integer> map(String word) throws Exception {
return Tuple2.of(word, 1);
}
});
KeyedStream<Tuple2<String, Integer>, String> keyedStream = wordAndOne.keyBy(t -> t.f0);
SingleOutputStreamOperator<Tuple2<String, Integer>> res = keyedStream.sum(1);
res.print();
env.execute();
}
}
原:
现:
注意:调用disableChaining的算子前后都会断开
回顾:Flink的资源槽
共享资源槽
package cn._51doit.flink.day04;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.KeyedStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.util.Collector;
/**
* 设置程序的共享资源槽组名称
*
*/
public class SlotSharingGroupDemo {
public static void main(String[] args) throws Exception{
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
DataStreamSource<String> lines = env.socketTextStream(args[0], Integer.parseInt(args[1]));
SingleOutputStreamOperator<String> filtered = lines.filter(e -> !"error".startsWith(e));
SingleOutputStreamOperator<String> words = filtered.flatMap(new FlatMapFunction<String, String>() {
@Override
public void flatMap(String line, Collector<String> collector) throws Exception {
String[] words = line.split(" ");
for (String word : words) {
collector.collect(word);
}
}
}).disableChaining().slotSharingGroup("doit"); //设置该算子的资源槽名称为doit
SingleOutputStreamOperator<Tuple2<String, Integer>> wordAndOne = words.map(new MapFunction<String, Tuple2<String, Integer>>() {
@Override
public Tuple2<String, Integer> map(String word) throws Exception {
return Tuple2.of(word, 1);
}
}).slotSharingGroup("default");
KeyedStream<Tuple2<String, Integer>, String> keyedStream = wordAndOne.keyBy(t -> t.f0);
SingleOutputStreamOperator<Tuple2<String, Integer>> res = keyedStream.sum(1);
res.print();
env.execute();
}
}