Flink1.17中Kafka sink和source的方法

一、pom准备

我用的版本是Flink1.17.1,Java用的是1.8

    <dependencies>
        <!--导入Flink的相关依赖-->
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-java</artifactId>
            <version>${flink.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-streaming-java</artifactId>
            <version>${flink.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-clients</artifactId>
            <version>${flink.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-table-planner-loader</artifactId>
            <version>${flink.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-table-runtime</artifactId>
            <version>${flink.version}</version>
        </dependency>

        <!--//添加Flink Table API相关的依赖 -->
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-table-common</artifactId>
            <version>${flink.version}</version>
        </dependency>

        <!-- Flink默认使用的是slf4j记录日志,相当于一个日志的接口,我们这里使用log4j作为具体的日志实现 -->
        <dependency>
            <groupId>org.slf4j</groupId>
            <artifactId>slf4j-api</artifactId>
            <version>1.7.25</version>
        </dependency>
        <!-- 用于FlinkKafka-->
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-connector-kafka</artifactId>
            <version>${flink.version}</version>
        </dependency>

        <!-- 使用lombok的注解-->
        <dependency>
            <groupId>org.projectlombok</groupId>
            <artifactId>lombok</artifactId>
            <version>1.18.20</version>
        </dependency>
        <dependency>
            <groupId>junit</groupId>
            <artifactId>junit</artifactId>
            <version>RELEASE</version>
            <scope>compile</scope>
        </dependency>
    </dependencies>

二、KafkaSource

1、利用Flink自带的KafkaSource

封装成一个方法

public static KafkaSource<String> getKafkaSource(String topic,String groupId,String kafkaAdress) {
        KafkaSource<String> kafkaSource = KafkaSource.<String>builder()
                .setBootstrapServers(kafkaAdress)
                .setTopics(topic)
                .setGroupId(groupId)
                //初始化读取最新数据
                .setStartingOffsets(OffsetsInitializer.earliest())
                //如下这种方式不能处理空消息
                // .setValueOnlyDeserializer(new SimpleStringSchema())
                //为了处理读取到空消息的情况,需要自定义反序列化器
                .setValueOnlyDeserializer(
                        new DeserializationSchema<String>() {
                            @Override
                            public String deserialize(byte[] message) throws IOException {
                                if (message != null) {
                                    return new String(message);
                                }
                                return null;
                            }
                            @Override
                            public boolean isEndOfStream(String nextElement) {
                                return false;
                            }

                            @Override
                            public TypeInformation<String> getProducedType() {
                                return TypeInformation.of(String.class);
                            }
                        }
                ).build();
        return kafkaSource;
    }

在主程序里面调用

public class Key_Vehicle_Project {
    public static void main(String[] args) throws Exception {

        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

        KafkaSource<String> kafkaSource = FlinkSourceUtil.getKafkaSource("test_topic", "test","localhost:9092");
        kafkaSource.print();
        env.execute("Flink from Kafka!");
    }
}

2、创建一个新的KafkaConsumer

public class Main {

    public static void main(String[] args) throws Exception {

        // 创建执行环境
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

        // 创建kafka Consumer
        Properties properties = new Properties();
        properties.setProperty("bootstrap.servers","localhost:9092");
        properties.setProperty("group.id", groupID);

        FlinkKafkaConsumer<String> myConsumer = new FlinkKafkaConsumer<String>("test_topic",new SimpleStringSchema(),properties);
        myConsumer.setStartFromLatest();

        // 数据接入
        DataStream<String> dataStream = env.addSource(myConsumer);
        dataStream.print();
        env.execute("Flink from Kafka");
    }
}

 3、利用FlinkSQL读取Kafka中的表

先封装一个获取Kafka连接属性的方法

    //获取kafka连接器的连接属性
    public static String getKafkaDDL(String topic, String groupId,String kafkaAdress) {
        return " WITH (\n" +
                "  'connector' = 'kafka',\n" +
                "  'topic' = '" + topic + "',\n" +
                "  'properties.bootstrap.servers' = '" + kafkaAdress + "',\n" +
                "  'properties.group.id' = '" + groupId + "',\n" +
                "  'scan.startup.mode' = 'latest-offset',\n" +
                "  'format' = 'json'\n" +
                ")";
    }

再创建一个 动态表

//从test_topic主题中读取数据  并创建动态表
    public void readOdsDb(StreamTableEnvironment tableEnv, String groupId) {
        tableEnv.executeSql("CREATE TABLE test_topic(\n" +
                "  `database` string,\n" +
                "  `table` string,\n" +
                "  `type` string,\n" +
                "  `data` map<string,string>,\n" +
                "  `old` map<string,string>,\n" +
                "  `ts` bigint,\n" +
                "  `pt` as proctime(),\n" +
                "  `et` as TO_TIMESTAMP_LTZ(ts, 0),\n" +
                "  WATERMARK FOR `et` AS `et`\n" +
                ") " + SQLUtil.getKafkaDDL(test_topic, groupId));
    }

创建环境,并查询Kafka的数据

三、KafkaSink

1、利用KafkaSink

封装成一个方法

    public static KafkaSink<String> getKafkaSink(String sinkTopic,String kafkaAdress){
        KafkaSink<String> kafkaSink = KafkaSink.<String>builder()
                .setBootstrapServers(kafkaAdress)
                //写入的精准一次
                .setDeliveryGuarantee(DeliveryGuarantee.EXACTLY_ONCE)
                .setRecordSerializer(KafkaRecordSerializationSchema.<String>builder()
                        .setTopic(sinkTopic)
                        .setValueSerializationSchema(new SimpleStringSchema())
                        .build())
                .build();
        return kafkaSink;
    }

在主方法中调用

public class Key_Vehicle_Project {
    public static void main(String[] args) throws Exception {

        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

        KafkaSource<String> kafkaSource = FlinkSourceUtil.getKafkaSource("test_topic", "test","localhost:9092");
        kafkaSource.print();
                 kafkaOutputDS.sinkTo(FlinkSinkUtil.getKafkaSink("test_topic2","localhost:9092"));
        env.execute("Flink to Kafka!");
    }
}

2、利用FlinkSQL将envtable中的数据传输到Kafka

封装一个方法用于updateKafka

    //获取upsert-kafka连接器的连接属性
    public static String getUpsertKafkaDDL(String sinkTopic,String kafkaAdress) {
        return "WITH (\n" +
                "  'connector' = 'upsert-kafka',\n" +
                "  'topic' = '" + sinkTopic + "',\n" +
                "  'properties.bootstrap.servers' = '" + kafkaAdress + "',\n" +
                "  'key.format' = 'json',\n" +
                "  'value.format' = 'json'\n" +
                ")";
    }

在主程序中用getUpsertKafkaDDL方法,tableResult是生成的envTable 

        tableEnv.executeSql(
                "create table student(\n" +
                        " id int,\n" +
                        " name string,\n" 
                        "  PRIMARY KEY (id) NOT ENFORCED\n" +
                        ")"
                 + SQLUtil.getUpsertKafkaDDL("test_topic","localhost:9092"));
        tableEnv.executeSql("insert into student select * from "+tableResult);
        tableEnv.executeSql("select * from student").print();
        

3、创建一个KafkaProduce

public class ProducerDemo  {
    public static void main(String[] args) {
        Properties props = new Properties();
        props.put("bootstrap.servers","localhost:9092");
        props.put("acks", "all"); props.put("retries", 0);
        props.put("batch.size", 16384); props.put("linger.ms", 1);


        props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, StringSerializer.class.getName());
        props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class.getName());
        Producer<String, String> producer = new KafkaProducer<String, String>(props);
        String jsonMessage = "1 jojo";

        producer.send(new ProducerRecord<String, String>("test_topic",jsonMessage));
        System.out.println("Message sent successfully");
        producer.close();
    }
}

要自定义 FlinkKafka Sink,您可以通过实现 `org.apache.flink.streaming.connectors.kafka.KafkaSerializationSchema` 接口来实现自定义的序列化逻辑。以下是一个简单的示例: ```java import org.apache.flink.streaming.connectors.kafka.KafkaSerializationSchema; import org.apache.kafka.clients.producer.ProducerRecord; public class CustomKafkaSerializationSchema implements KafkaSerializationSchema<String> { private final String topic; public CustomKafkaSerializationSchema(String topic) { this.topic = topic; } @Override public ProducerRecord<byte[], byte[]> serialize(String element, Long timestamp) { // 将 String 类型的数据序列化为字节数组,可以根据需要进行自定义序列化逻辑 byte[] serializedValue = element.getBytes(); return new ProducerRecord<>(topic, serializedValue); } } ``` 然后,您可以在 Flink 程序使用这个自定义的 KafkaSink,示例如下: ```java import org.apache.flink.streaming.api.datastream.DataStream; import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer; public class CustomKafkaSinkExample { public static void main(String[] args) throws Exception { final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); // 创建数据流 DataStream<String> stream = ... // 创建自定义的 KafkaSink String topic = "your-topic"; FlinkKafkaProducer<String> kafkaSink = new FlinkKafkaProducer<>( topic, new CustomKafkaSerializationSchema(topic), properties); // 将数据流写入 Kafka stream.addSink(kafkaSink); env.execute("Custom Kafka Sink Example"); } } ```
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值