导航
(十)Flink Datastream API 编程指南 算子-1 (转换算子、物理分区、任务链、资源组 、算子和作业)等基本介绍
上一篇文章Flink从入门到放弃—Stream API—常用算子(reduce)
更多代码样例
算子列表
- window
- windowAll
先说一下两者的最大区别,window可以设置并行度,windowAll reduce始终并行度为1,且不能更改。
用户代码
package com.stream.samples;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.typeinfo.Types;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.windowing.assigners.TumblingProcessingTimeWindows;
import org.apache.flink.streaming.api.windowing.time.Time;
/**
* @author DeveloperZJQ
* @since 2022/11/13
*/
public class CustomWindowAndWindowAllOperator {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
DataStreamSource<String> source = env.socketTextStream("192.168.112.147", 7777);
SingleOutputStreamOperator<Tuple2<String, Integer>> map = source.map(
(MapFunction<String, Tuple2<String, Integer>>)
value -> Tuple2.of(value, value.length())).returns(Types.TUPLE(Types.STRING, Types.INT));
// map-> keyBy-> reduce-> print
SingleOutputStreamOperator<Tuple2<String, Integer>> windowAll =
map.keyBy(value -> value.f0)
.windowAll(TumblingProcessingTimeWindows.of(Time.seconds(10)))
.reduce((v1, v2) -> Tuple2.of(v1.f0, v1.f1 + v2.f1));
// map-> keyBy-> window-> reduce-> print
SingleOutputStreamOperator<Tuple2<String, Integer>> window =
map.keyBy((in) -> in, Types.TUPLE(Types.STRING, Types.INT))
.window(TumblingProcessingTimeWindows.of(Time.seconds(10)))
.reduce((v1, v2) -> Tuple2.of(v1.f0, v1.f1 + v2.f1));
windowAll.print("windowAll->");
window.print("window->");
env.execute(CustomWindowAndWindowAllOperator.class.getSimpleName());
}
}
将上面的程序运行起来之后,可以看到print出来的数据,windowAll的reduce 任务始终为1。
window()
@PublicEvolving
public <W extends Window> WindowedStream<T, KEY, W> window(WindowAssigner<? super T, W> assigner) {
return new WindowedStream(this, assigner);
}
上来就进入到KeyedStream类,然后再new WindowedStream(this,assigner).
public WindowedStream(KeyedStream<T, K> input, WindowAssigner<? super T, W> windowAssigner) {
this.input = input;
this.builder = new WindowOperatorBuilder(windowAssigner, windowAssigner.getDefaultTrigger(input.getExecutionEnvironment()), input.getExecutionConfig(), input.getType(), input.getKeySelector(), input.getKeyType());
}
调用WindowedStream构造器。
// 调用reduce算子,看到并没有指定并行度等配置
public <R> SingleOutputStreamOperator<R> reduce(ReduceFunction<T> reduceFunction, WindowFunction<T, R, K, W> function, TypeInformation<R> resultType) {
function = (WindowFunction)this.input.getExecutionEnvironment().clean(function);
reduceFunction = (ReduceFunction)this.input.getExecutionEnvironment().clean(reduceFunction);
String opName = this.builder.generateOperatorName();
String opDescription = this.builder.generateOperatorDescription(reduceFunction, function);
OneInputStreamOperator<T, R> operator = this.builder.reduce(reduceFunction, function);
return this.input.transform(opName, resultType, operator).setDescription(opDescription);
}
windowAll()
@PublicEvolving
public <R> SingleOutputStreamOperator<R> reduce(ReduceFunction<T> reduceFunction, AllWindowFunction<T, R, W> function, TypeInformation<R> resultType) {
if (reduceFunction instanceof RichFunction) {
throw new UnsupportedOperationException("ReduceFunction of reduce can not be a RichFunction.");
} else {
function = (AllWindowFunction)this.input.getExecutionEnvironment().clean(function);
reduceFunction = (ReduceFunction)this.input.getExecutionEnvironment().clean(reduceFunction);
String callLocation = Utils.getCallLocationName();
String udfName = "AllWindowedStream." + callLocation;
String opName = this.windowAssigner.getClass().getSimpleName();
KeySelector<T, Byte> keySel = this.input.getKeySelector();
String opDescription;
Object operator;
if (this.evictor != null) {
TypeSerializer<StreamRecord<T>> streamRecordSerializer = new StreamElementSerializer(this.input.getType().createSerializer(this.getExecutionEnvironment().getConfig()));
ListStateDescriptor<StreamRecord<T>> stateDesc = new ListStateDescriptor("window-contents", streamRecordSerializer);
opDescription = "TriggerWindow(" + this.windowAssigner + ", " + stateDesc + ", " + this.trigger + ", " + this.evictor + ", " + udfName + ")";
operator = new EvictingWindowOperator(this.windowAssigner, this.windowAssigner.getWindowSerializer(this.getExecutionEnvironment().getConfig()), keySel, this.input.getKeyType().createSerializer(this.getExecutionEnvironment().getConfig()), stateDesc, new InternalIterableAllWindowFunction(new ReduceApplyAllWindowFunction(reduceFunction, function)), this.trigger, this.evictor, this.allowedLateness, this.lateDataOutputTag);
} else {
ReducingStateDescriptor<T> stateDesc = new ReducingStateDescriptor("window-contents", reduceFunction, this.input.getType().createSerializer(this.getExecutionEnvironment().getConfig()));
opDescription = "TriggerWindow(" + this.windowAssigner + ", " + stateDesc + ", " + this.trigger + ", " + udfName + ")";
operator = new WindowOperator(this.windowAssigner, this.windowAssigner.getWindowSerializer(this.getExecutionEnvironment().getConfig()), keySel, this.input.getKeyType().createSerializer(this.getExecutionEnvironment().getConfig()), stateDesc, new InternalSingleValueAllWindowFunction(function), this.trigger, this.allowedLateness, this.lateDataOutputTag);
}
// 对比这里,发现使用了一个方法forceNoParallel(),点击进去,可以看到 并行度设置为1
return this.input.transform(opName, resultType, (OneInputStreamOperator)operator).setDescription(opDescription).forceNonParallel();
}
}
并行度和最大并行度常量值1,并且nonParallel标识为true
@PublicEvolving
public SingleOutputStreamOperator<T> forceNonParallel() {
this.transformation.setParallelism(1);
this.transformation.setMaxParallelism(1);
this.nonParallel = true;
return this;
}
4605

被折叠的 条评论
为什么被折叠?



