connect flink_聊聊flink DataStream的connect操作

本文主要研究一下flink DataStream的connect操作

DataStream.connect

flink-streaming-java_2.11-1.7.0-sources.jar!/org/apache/flink/streaming/api/datastream/DataStream.java

@Public

public class DataStream {

//......

public ConnectedStreams connect(DataStream dataStream) {

return new ConnectedStreams<>(environment, this, dataStream);

}

@PublicEvolving

public BroadcastConnectedStream connect(BroadcastStream broadcastStream) {

return new BroadcastConnectedStream<>(

environment,

this,

Preconditions.checkNotNull(broadcastStream),

broadcastStream.getBroadcastStateDescriptor());

}

//......

}

DataStream的connect操作创建的是ConnectedStreams或BroadcastConnectedStream,它用了两个泛型,即不要求两个dataStream的element是同一类型

ConnectedStreams

flink-streaming-java_2.11-1.7.0-sources.jar!/org/apache/flink/streaming/api/datastream/ConnectedStreams.java

@Public

public class ConnectedStreams {

protected final StreamExecutionEnvironment environment;

protected final DataStream inputStream1;

protected final DataStream inputStream2;

protected ConnectedStreams(StreamExecutionEnvironment env, DataStream input1, DataStream input2) {

this.environment = requireNonNull(env);

this.inputStream1 = requireNonNull(input1);

this.inputStream2 = requireNonNull(input2);

}

public StreamExecutionEnvironment getExecutionEnvironment() {

return environment;

}

public DataStream getFirstInput() {

return inputStream1;

}

public DataStream getSecondInput() {

return inputStream2;

}

public TypeInformation getType1() {

return inputStream1.getType();

}

public TypeInformation getType2() {

return inputStream2.getType();

}

public ConnectedStreams keyBy(int keyPosition1, int keyPosition2) {

return new ConnectedStreams<>(this.environment, inputStream1.keyBy(keyPosition1),

inputStream2.keyBy(keyPosition2));

}

public ConnectedStreams keyBy(int[] keyPositions1, int[] keyPositions2) {

return new ConnectedStreams<>(environment, inputStream1.keyBy(keyPositions1),

inputStream2.keyBy(keyPositions2));

}

public ConnectedStreams keyBy(String field1, String field2) {

return new ConnectedStreams<>(environment, inputStream1.keyBy(field1),

inputStream2.keyBy(field2));

}

public ConnectedStreams keyBy(String[] fields1, String[] fields2) {

return new ConnectedStreams<>(environment, inputStream1.keyBy(fields1),

inputStream2.keyBy(fields2));

}

public ConnectedStreams keyBy(KeySelector keySelector1, KeySelector keySelector2) {

return new ConnectedStreams<>(environment, inputStream1.keyBy(keySelector1),

inputStream2.keyBy(keySelector2));

}

public ConnectedStreams keyBy(

KeySelector keySelector1,

KeySelector keySelector2,

TypeInformation keyType) {

return new ConnectedStreams<>(

environment,

inputStream1.keyBy(keySelector1, keyType),

inputStream2.keyBy(keySelector2, keyType));

}

public SingleOutputStreamOperator map(CoMapFunction coMapper) {

TypeInformation outTypeInfo = TypeExtractor.getBinaryOperatorReturnType(

coMapper,

CoMapFunction.class,

0,

1,

2,

TypeExtractor.NO_INDEX,

getType1(),

getType2(),

Utils.getCallLocationName(),

true);

return transform("Co-Map", outTypeInfo, new CoStreamMap<>(inputStream1.clean(coMapper)));

}

public SingleOutputStreamOperator flatMap(

CoFlatMapFunction coFlatMapper) {

TypeInformation outTypeInfo = TypeExtractor.getBinaryOperatorReturnType(

coFlatMapper,

CoFlatMapFunction.class,

0,

1,

2,

TypeExtractor.NO_INDEX,

getType1(),

getType2(),

Utils.getCallLocationName(),

true);

return transform("Co-Flat Map", outTypeInfo, new CoStreamFlatMap<>(inputStream1.clean(coFlatMapper)));

}

@PublicEvolving

public SingleOutputStreamOperator process(

CoProcessFunction coProcessFunction) {

TypeInformation outTypeInfo = TypeExtractor.getBinaryOperatorReturnType(

coProcessFunction,

CoProcessFunction.class,

0,

1,

2,

TypeExtractor.NO_INDEX,

getType1(),

getType2(),

Utils.getCallLocationName(),

true);

return process(coProcessFunction, outTypeInfo);

}

@Internal

public SingleOutputStreamOperator process(

CoProcessFunction coProcessFunction,

TypeInformation outputType) {

TwoInputStreamOperator operator;

if ((inputStream1 instanceof KeyedStream) && (inputStream2 instanceof KeyedStream)) {

operator = new KeyedCoProcessOperator<>(inputStream1.clean(coProcessFunction));

} else {

operator = new CoProcessOperator<>(inputStream1.clean(coProcessFunction));

}

return transform("Co-Process", outputType, operator);

}

@PublicEvolving

public SingleOutputStreamOperator transform(String functionName,

TypeInformation outTypeInfo,

TwoInputStreamOperator operator) {

// read the output type of the input Transforms to coax out errors about MissingTypeInfo

inputStream1.getType();

inputStream2.getType();

TwoInputTransformation transform = new TwoInputTransformation<>(

inputStream1.getTransformation(),

inputStream2.getTransformation(),

functionName,

operator,

outTypeInfo,

environment.getParallelism());

if (inputStream1 instanceof KeyedStream && inputStream2 instanceof KeyedStream) {

KeyedStream keyedInput1 = (KeyedStream) inputStream1;

KeyedStream keyedInput2 = (KeyedStream) inputStream2;

TypeInformation> keyType1 = keyedInput1.getKeyType();

TypeInformation> keyType2 = keyedInput2.getKeyType();

if (!(keyType1.canEqual(keyType2) && keyType1.equals(keyType2))) {

throw new UnsupportedOperationException("Key types if input KeyedStreams " +

"don't match: " + keyType1 + " and " + keyType2 + ".");

}

transform.setStateKeySelectors(keyedInput1.getKeySelector(), keyedInput2.getKeySelector());

transform.setStateKeyType(keyType1);

}

@SuppressWarnings({ "unchecked", "rawtypes" })

SingleOutputStreamOperator returnStream = new SingleOutputStreamOperator(environment, transform);

getExecutionEnvironment().addOperator(transform);

return returnStream;

}

}

ConnectedStreams提供了keyBy方法用于指定两个stream的keySelector,提供了map、flatMap、process、transform操作,其中前三个操作最后都是调用transform操作

transform操作接收TwoInputStreamOperator类型的operator,然后转换为SingleOutputStreamOperator

map操作接收CoMapFunction,flatMap操作接收CoFlatMapFunction,process操作接收CoProcessFunction

CoMapFunction

flink-streaming-java_2.11-1.7.0-sources.jar!/org/apache/flink/streaming/api/functions/co/CoMapFunction.java

@Public

public interface CoMapFunction extends Function, Serializable {

OUT map1(IN1 value) throws Exception;

OUT map2(IN2 value) throws Exception;

}

CoMapFunction继承了Function,它定义了map1、map2方法

CoFlatMapFunction

flink-streaming-java_2.11-1.7.0-sources.jar!/org/apache/flink/streaming/api/functions/co/CoFlatMapFunction.java

@Public

public interface CoFlatMapFunction extends Function, Serializable {

void flatMap1(IN1 value, Collector out) throws Exception;

void flatMap2(IN2 value, Collector out) throws Exception;

}

CoFlatMapFunction继承了Function,它定义了map1、map2方法,与CoMapFunction不同的是,CoFlatMapFunction的map1、map2方法多了Collector参数

CoProcessFunction

flink-streaming-java_2.11-1.7.0-sources.jar!/org/apache/flink/streaming/api/functions/co/CoProcessFunction.java

@PublicEvolving

public abstract class CoProcessFunction extends AbstractRichFunction {

private static final long serialVersionUID = 1L;

public abstract void processElement1(IN1 value, Context ctx, Collector out) throws Exception;

public abstract void processElement2(IN2 value, Context ctx, Collector out) throws Exception;

public void onTimer(long timestamp, OnTimerContext ctx, Collector out) throws Exception {}

public abstract class Context {

public abstract Long timestamp();

public abstract TimerService timerService();

public abstract void output(OutputTag outputTag, X value);

}

public abstract class OnTimerContext extends Context {

/**

* The {@link TimeDomain} of the firing timer.

*/

public abstract TimeDomain timeDomain();

}

}

CoProcessFunction继承了AbstractRichFunction,它定义了processElement1、processElement2方法,与CoFlatMapFunction不同的是,它定义的这两个方法多了Context参数

CoProcessFunction定义了Context及OnTimerContext,在processElement1、processElement2方法可以访问到Context,Context提供了timestamp、timerService、output方法

CoProcessFunction与CoFlatMapFunction不同的另外一点是它可以使用TimerService来注册timer,然后在onTimer方法里头实现响应的逻辑

小结

DataStream的connect操作创建的是ConnectedStreams或BroadcastConnectedStream,它用了两个泛型,即不要求两个dataStream的element是同一类型

ConnectedStreams提供了keyBy方法用于指定两个stream的keySelector,提供了map、flatMap、process、transform操作,其中前三个操作最后都是调用transform操作;transform操作接收TwoInputStreamOperator类型的operator,然后转换为SingleOutputStreamOperator;map操作接收CoMapFunction,flatMap操作接收CoFlatMapFunction,process操作接收CoProcessFunction

CoFlatMapFunction与CoMapFunction不同的是,CoFlatMapFunction的map1、map2方法多了Collector参数;CoProcessFunction定义了processElement1、processElement2方法,与CoFlatMapFunction不同的是,它定义的这两个方法多了Context参数;CoProcessFunction与CoFlatMapFunction不同的另外一点是它可以使用TimerService来注册timer,然后在onTimer方法里头实现响应的逻辑

doc

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值