文章目录
Flink示例——Connect、CoMapFunction、Split、Select
版本信息
产品 | 版本 |
---|---|
Flink | 1.7.2 |
Java | 1.8.0_231 |
Scala | 2.11.12 |
Mavan依赖
- pom.xml 依赖部分
<dependency> <groupId>org.apache.flink</groupId> <artifactId>flink-java</artifactId> <version>${flink.version}</version> </dependency> <dependency> <groupId>org.apache.flink</groupId> <artifactId>flink-streaming-java_2.11</artifactId> <version>${flink.version}</version> </dependency> <dependency> <groupId>org.apache.flink</groupId> <artifactId>flink-clients_2.11</artifactId> <version>${flink.version}</version> </dependency>
自定义SourceFunction
- 提供一个SourceFunction,方便后面测试
public class CustomSourceFunction extends RichSourceFunction<Tuple2<String, Long>> { private boolean flag = true; @Override public void run(SourceContext<Tuple2<String, Long>> ctx) throws Exception { List<String> data = Arrays.asList("a", "b", "c", "d", "e", "f", "g"); Random random = new Random(); while (flag) { Thread.sleep(100); // 随机取一个值 String key = data.get(random.nextInt(data.size())); long value = System.currentTimeMillis(); ctx.collect(Tuple2.of(key, value)); } } @Override public void cancel() { flag = false; } }
Connect、CoMapFunction 示例
- 代码 ConnectCoMapFunctionDemo
public class ConnectCoMapFunctionDemo { public static void main(String[] args) { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); // 自定义数据源 CustomSourceFunction sourceFunction = new CustomSourceFunction(); DataStreamSource<Tuple2<String, Long>> customDS = env.addSource(sourceFunction); // 第一份数据 DataStream<Tuple2<String, Long>> filter1DS = customDS .filter(value -> "a".equals(value.f0)); // 第二份数据 DataStream<Long> filter2DS = customDS .filter(value -> "b".equals(value.f0)) .map(value -> value.f1); // filter1DS.print("filter1"); // filter2DS.print("filter2"); // 连接2份数据 DataStream<Tuple3<String, Long, Long>> connectDS = filter1DS.connect(filter2DS) .map(new CoMapFunction<Tuple2<String, Long>, Long, Tuple3<String, Long, Long>>() { AtomicLong num = new AtomicLong(); // 单个Slot内 @Override public Tuple3<String, Long, Long> map1(Tuple2<String, Long> value) throws Exception { return Tuple3.of(value.f0, value.f1, num.incrementAndGet()); } @Override public Tuple3<String, Long, Long> map2(Long value) throws Exception { return Tuple3.of("value", value, num.incrementAndGet()); } }); DataStream<String> resultDS = connectDS .map(value -> value.f0 + "|" + value.f1 + "|" + value.f2); resultDS.print(); try { env.execute(); } catch (Exception e) { e.printStackTrace(); } } }
Split、Select 示例
- 代码 SplitSelectDemo
public class SplitSelectDemo { public static void main(String[] args) { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); // 自定义数据源 CustomSourceFunction sourceFunction = new CustomSourceFunction(); DataStreamSource<Tuple2<String, Long>> customDS = env.addSource(sourceFunction); // 拆分流 SplitStream<Tuple2<String, Long>> splitDS = customDS .split(value -> { if ("a".equals(value.f0) || "b".equals(value.f0)) { return Collections.singletonList("first"); } else { return Collections.singletonList("second"); } }); // 获取first、second对应的DataStream DataStream<Tuple2<String, Long>> firstDS = splitDS.select("first"); DataStream<Tuple2<String, Long>> secondDS = splitDS.select("second"); firstDS.print("first"); secondDS.print("second"); try { env.execute(); } catch (Exception e) { e.printStackTrace(); } } }
Split、Select 的新版推荐写法
- 代码 NewSplitSelectDemo
public class NewSplitSelectDemo { public static void main(String[] args) { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); // 自定义数据源 CustomSourceFunction sourceFunction = new CustomSourceFunction(); DataStreamSource<Tuple2<String, Long>> customDS = env.addSource(sourceFunction); // 使用ProcessFunction来拆分流 // 可以拆分出不同泛型的DataStream SingleOutputStreamOperator<Object> splitDS = customDS.process(new SplitProcessFunction()); // splitDS是主流,需要在ProcessFunction中调用out.collect(...) // splitDS.print(); // select出first、second对应的DataStream,并打印 splitDS.getSideOutput(new OutputTag<>("first", TypeInformation.of(String.class))) .print("first"); splitDS.getSideOutput(new OutputTag<>("second", TypeInformation.of(Long.class))) .print("second"); try { env.execute(); } catch (Exception e) { e.printStackTrace(); } } private static class SplitProcessFunction extends ProcessFunction<Tuple2<String, Long>, Object> { private OutputTag<String> firstTag = new OutputTag<>("first", TypeInformation.of(String.class)); private OutputTag<Long> secondTag = new OutputTag<>("second", TypeInformation.of(Long.class)); @Override public void processElement(Tuple2<String, Long> value, Context ctx, Collector<Object> out) throws Exception { if ("a".equals(value.f0) || "b".equals(value.f0)) { ctx.output(firstTag, value.f0 + "|" + value.f1); } else { ctx.output(secondTag, value.f1); } } } }