上篇:自定义Source
二话不说,直接上代码
这个案例是一个无限流,执行环境默认的并行度是:4,SocketSink的并行度:1
package cn._51doit.flink.day01;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.common.functions.IterationRuntimeContext;
import org.apache.flink.api.common.functions.RuntimeContext;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.sink.RichSinkFunction;
import org.apache.flink.streaming.api.functions.source.RichSourceFunction;
import org.apache.flink.util.Collector;
/**
* Sink-printSink底层实现
*
* print--》PrintSinkFunction---extends RichSinkFunction---@Override
* public void invoke(IN record) {
* writer.write(record);
* }
*
*
*/
public class PrintSinkDemo {
public static void main(String[] args) throws Exception {
//local模式默认的并行度是当前节点逻辑核的数量
Configuration configuration = new Configuration();
StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironmentWithWebUI(configuration);
//DataStream的并行度
int parallelism01 = env.getParallelism();
System.out.println("执行环境默认的并行度是:"+parallelism01);
DataStreamSource<String> lines = env.socketTextStream("Master", 8888);
int parallelism = lines.getParallelism();
System.out.println("SocketSink的并行度:"+parallelism);
// lines.print();
lines.addSink(new MyPrintSink()).name("my-print-sink"); //在执行计划的sink的算子取名为“my-print-sink”
env.execute();
}
//定义内部类
public static class MyPrintSink extends RichSinkFunction<String > {
private int indexOfThisSubtask;
//最终把数据输出的方法(如:mysql、jdbc)
@Override
public void invoke(String value, Context context) throws Exception {
//:拿到索引编号[从0开始]
RuntimeContext runtimeContext = getRuntimeContext();
int indexOfThisSubtask = runtimeContext.getIndexOfThisSubtask();
System.out.println(indexOfThisSubtask+"> "+value);
}
}
}
源码解析
在print类点击进去查看到它是继承RichSinkFunction方法,重写invoke方法,采用writer把数据输出
在open方法传进去看到几个参数的判断,看看并行度是否大于1,如果大于1,就当前的subtaskIndex + 1再拼上大于号输出
public void open(int subtaskIndex, int numParallelSubtasks) {
// get the target stream
stream = target == STD_OUT ? System.out : System.err;
completedPrefix = sinkIdentifier;
if (numParallelSubtasks > 1) {
if (!completedPrefix.isEmpty()) {
completedPrefix += ":";
}
completedPrefix += (subtaskIndex + 1);
}
if (!completedPrefix.isEmpty()) {
completedPrefix += "> ";
}