一、分流
1.1、filter过滤:不推荐
package com.hpsk.flink.moreStream;
import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
public class CutDataSteam1 {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
SingleOutputStreamOperator<Tuple3<String, String, Long>> inputDS = env.fromElements(
Tuple3.of("001", "app", 1000L),
Tuple3.of("002", "web", 2000L),
Tuple3.of("003", "html5", 2000L)
);
SingleOutputStreamOperator<Tuple3<String, String, Long>> app = inputDS
.filter(line -> line.f1.equals("app"));
SingleOutputStreamOperator<Tuple3<String, String, Long>> web = inputDS
.filter(line -> line.f1.equals("web"));
app.print("app ");
web.print("web ");
env.execute();
}
}
1.2、OutputTag侧输出流:推荐
package com.hpsk.flink.moreStream;
import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.ProcessFunction;
import org.apache.flink.util.Collector;
import org.apache.flink.util.OutputTag;
public class OutputTagDataSteam {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
SingleOutputStreamOperator<Tuple3<String, String, Long>> inputDS = env.fromElements(
Tuple3.of("001", "app", 1000L),
Tuple3.of("002", "web", 2000L),
Tuple3.of("003", "html5", 2000L)
);
OutputTag<String> app = new OutputTag<String>("app") {};
OutputTag<String> web = new OutputTag<String>("web") {};
SingleOutputStreamOperator<String> otherOutput = inputDS.process(new ProcessFunction<Tuple3<String, String, Long>, String>() {
@Override
public void processElement(Tuple3<String, String, Long> value, Context ctx, Collector<String> out) throws Exception {
if (value.f1.equals("app")) {
ctx.output(app, value.toString());
} else if (value.f1.equals("web")) {
ctx.output(web, value.toString());
} else {
out.collect(value.toString());
}
}
});
DataStream<String> appOutput = otherOutput.getSideOutput(app);
DataStream<String> webOutput = otherOutput.getSideOutput(web);
appOutput.print("app ");
webOutput.print("web ");
otherOutput.print("other ");
env.execute();
}
}
二、合流
2.1、union
package com.hpsk.flink.moreStream;
import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
public class UnionDataSteam {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
SingleOutputStreamOperator<Tuple3<String, String, Long>> stream1 = env.fromElements(
Tuple3.of("001", "app", 1000L),
Tuple3.of("002", "web", 2000L),
Tuple3.of("003", "html5", 2000L)
);
SingleOutputStreamOperator<Tuple3<String, String, Long>> stream2 = env.fromElements(
Tuple3.of("005", "app", 1000L),
Tuple3.of("006", "web", 2000L),
Tuple3.of("007", "html5", 2000L)
);
DataStream<Tuple3<String, String, Long>> union = stream1.union(stream2);
union.print("union ");
env.execute();
}
}
2.2、connect
package com.hpsk.flink.function;
import org.apache.flink.api.common.state.BroadcastState;
import org.apache.flink.api.common.state.MapStateDescriptor;
import org.apache.flink.api.common.state.ReadOnlyBroadcastState;
import org.apache.flink.api.common.typeinfo.Types;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.BroadcastConnectedStream;
import org.apache.flink.streaming.api.datastream.BroadcastStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.co.BroadcastProcessFunction;
import org.apache.flink.util.Collector;
public class BroadcastProcessFunctionDS {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
SingleOutputStreamOperator<String> tableConfigStream = env.fromElements(
"table1,createTable",
"table2,createTable",
"table3,createTable");
SingleOutputStreamOperator<Tuple2<String, String>> MySqlTableStream = env.socketTextStream("hadoop102", 8888)
.map(line -> Tuple2.of(line.split(",")[0], line.split(",")[1]))
.returns(Types.TUPLE(Types.STRING, Types.STRING));
MapStateDescriptor<String, String> mapStateDescriptor = new MapStateDescriptor<>("map-state", String.class, String.class);
BroadcastStream<String> broadcast = tableConfigStream.broadcast(mapStateDescriptor);
BroadcastConnectedStream<Tuple2<String, String>, String> connectedStream = MySqlTableStream.connect(broadcast);
SingleOutputStreamOperator<String> result = connectedStream.process(new MyBroadcastProcessFunction(mapStateDescriptor));
result.print("output ");
env.execute();
}
public static class MyBroadcastProcessFunction extends BroadcastProcessFunction<Tuple2<String, String>, String, String>{
private MapStateDescriptor<String, String> mapStateDescriptor;
public MyBroadcastProcessFunction(MapStateDescriptor<String, String> mapStateDescriptor) {
this.mapStateDescriptor = mapStateDescriptor;
}
@Override
public void processBroadcastElement(String value, Context ctx, Collector<String> out) throws Exception {
BroadcastState<String, String> broadcastState = ctx.getBroadcastState(mapStateDescriptor);
String[] split = value.split(",");
broadcastState.put(split[0].trim(), split[1].trim());
}
@Override
public void processElement(Tuple2<String, String> value, ReadOnlyContext ctx, Collector<String> out) throws Exception {
ReadOnlyBroadcastState<String, String> broadcastState = ctx.getBroadcastState(mapStateDescriptor);
String table = value.f0;
String create = broadcastState.get(table);
if (create != null) {
out.collect(value.f0 + "为配置表,需要在phoenix中建表 -> 建表语句:" + create + ", 数据为:" + value.f1);
} else {
out.collect(value.f0 + "业务表, 跳过建表");
}
}
}
}
2.3、join
package com.hpsk.flink.function;
import com.hpsk.flink.beans.Event;
import org.apache.flink.api.common.eventtime.SerializableTimestampAssigner;
import org.apache.flink.api.common.eventtime.WatermarkStrategy;
import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.co.ProcessJoinFunction;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.util.Collector;
import java.sql.Timestamp;
public class ProcessJoinFunctionDS {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
SingleOutputStreamOperator<Tuple3<String, String, Long>> orderStream = env.fromElements(
Tuple3.of("Mary", "order-1", 5000L),
Tuple3.of("Alice", "order-2", 5000L),
Tuple3.of("Bob", "order-3", 20000L),
Tuple3.of("Alice", "order-4", 20000L),
Tuple3.of("Cary", "order-5", 51000L)
).assignTimestampsAndWatermarks(WatermarkStrategy.<Tuple3<String,
String, Long>>forMonotonousTimestamps()
.withTimestampAssigner(new SerializableTimestampAssigner<Tuple3<String, String, Long>>() {
@Override
public long extractTimestamp(Tuple3<String, String, Long> element, long recordTimestamp) {
return element.f2;
}
})
);
SingleOutputStreamOperator<Event> clickStream = env.fromElements(
new Event("Bob", "./cart", 2000L),
new Event("Alice", "./prod?id=100", 3000L),
new Event("Alice", "./prod?id=200", 3500L),
new Event("Bob", "./prod?id=2", 2500L),
new Event("Alice", "./prod?id=300", 36000L),
new Event("Bob", "./home", 30000L),
new Event("Bob", "./prod?id=1", 23000L),
new Event("Bob", "./prod?id=3", 33000L)
).assignTimestampsAndWatermarks(WatermarkStrategy.<Event>forMonotonousTimestamps()
.withTimestampAssigner(new SerializableTimestampAssigner<Event>() {
@Override
public long extractTimestamp(Event element, long recordTimestamp) {
return element.timestamp;
}
})
);
SingleOutputStreamOperator<String> result = orderStream
.keyBy(t -> t.f0)
.intervalJoin(clickStream.keyBy(t -> t.user))
.between(Time.seconds(-5), Time.seconds(10))
.process(new ProcessJoinFunction<Tuple3<String, String, Long>, Event, String>() {
@Override
public void processElement(Tuple3<String, String, Long> left, Event right, Context ctx, Collector<String> out) throws Exception {
out.collect(right + " => {" + left.f0 + ", " + left.f1 + ", " + new Timestamp(left.f2) + "}");
}
});
result.print("output ");
env.execute();
}
}