若想要实现实现Exactly Once,kafka必须要0.11以上版本(可以支持事务)
直接代码
package cn._51doit.flink.day01;
import org.apache.flink.api.common.functions.RuntimeContext;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.sink.RichSinkFunction;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer;
public class KafkaSinkDemo {
public static void main(String[] args) throws Exception {
//local模式默认的并行度是当前节点逻辑核的数量
Configuration configuration = new Configuration();
StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironmentWithWebUI(configuration);
//DataStream的并行度
int parallelism01 = env.getParallelism();
System.out.println("执行环境默认的并行度是:"+parallelism01);
DataStreamSource<String> lines = env.socketTextStream("192.168.242.102", 9999);
//获取DataStream的并行度
int parallelism = lines.getParallelism();
System.out.println("SocketSink的并行度:"+parallelism);
//FlinkKafkaProducer往kafka写数据
FlinkKafkaProducer<String> kafkaproducer = new FlinkKafkaProducer<>(
"Master:9092,Slave:9092,Slave:9092", "topic_log", new SimpleStringSchema());
//把数据写到指定的Slink
lines.addSink(kafkaproducer);
env.execute();
}
//定义内部类
public static class MyPrintSink extends RichSinkFunction<String > {
private int indexOfThisSubtask;
//最终把数据输出的方法(如:mysql、jdbc)
@Override
public void invoke(String value, Context context) throws Exception {
//:拿到索引编号[从0开始]
RuntimeContext runtimeContext = getRuntimeContext();
int indexOfThisSubtask = runtimeContext.getIndexOfThisSubtask();
System.out.println(indexOfThisSubtask+"> "+value);
}
}
}
控制台打印输出:是一个无界流程序
查看job:http://localhost:8081/#/job/6cb13849f794d199f8bd20cb30d7c149/overview