Flink DataSet Sink 写入 Kafka

最新推荐文章于 2024-08-13 16:26:30 发布

疯琴

最新推荐文章于 2024-08-13 16:26:30 发布

阅读量1.4k

点赞数 2

分类专栏： java/scala flink/spark 大数据文章标签： flink DataSet kafka

本文链接：https://blog.csdn.net/qq_35753140/article/details/94396193

版权

java/scala 同时被 3 个专栏收录

6 篇文章 0 订阅

订阅专栏

大数据

5 篇文章 0 订阅

订阅专栏

flink/spark

4 篇文章 1 订阅

订阅专栏

借鉴这篇博客，由于flink没有提供将DataSet写入kafka的API，所以自己写了一个。通过实现org.apache.flink.api.common.io.OutputFormat接口，参照JDBCOutputFormat，加入了自定义分区器。

Github

KafkaOutputFormat.java

package com.yngwiewang;

import org.apache.flink.api.common.io.OutputFormat;
import org.apache.flink.api.java.tuple.Tuple7;
import org.apache.flink.configuration.Configuration;
import org.apache.kafka.clients.producer.KafkaProducer;
import org.apache.kafka.clients.producer.Producer;
import org.apache.kafka.clients.producer.ProducerRecord;

import java.io.IOException;
import java.util.Properties;

public class KafkaOutputFormat implements OutputFormat<Tuple7<String, String, String, String, String, String, String>> {

    private String servers;
    private String topic;
    private String acks;
    private String retries;
    private String batchSize;
    private String bufferMemory;
    private String lingerMS;

    private Producer<String, String> producer;


    @Override
    public void configure(Configuration parameters) {

    }

    @Override
    public void open(int taskNumber, int numTasks) throws IOException {
        Properties props = new Properties();
        props.put("bootstrap.servers", this.servers);
        props.put("topic", this.topic);
        props.put("acks", this.acks);
        props.put("retires", this.retries);
        props.put("batch.size", this.batchSize);
        props.put("linger.ms", this.lingerMS);
        props.put("buffer.memory", this.bufferMemory);

        props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");
        props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer");
        props.put("partitioner.class", "com.yngwiewang.KafkaCustomPartitioner");

        producer = new KafkaProducer<>(props);
    }

    @Override
    public void writeRecord(Tuple7<String, String, String, String, String, String, Integer> record) throws IOException {
        producer.send(new ProducerRecord<>(this.topic, record.f0,
                String.join(",", record.f0, record.f1, record.f2, record.f3, record.f4, record.f5)));
    }

    @Override
    public void close() throws IOException {
        producer.close();
    }


    static KafkaOutputFormatBuilder buildKafkaOutputFormat() {
        return new KafkaOutputFormatBuilder();
    }


    public static class KafkaOutputFormatBuilder {
        private final KafkaOutputFormat format;

        KafkaOutputFormatBuilder() {
            this.format = new KafkaOutputFormat();
        }

        KafkaOutputFormatBuilder setBootstrapServers(String val) {
            format.servers = val;
            return this;
        }

        KafkaOutputFormatBuilder setTopic(String val) {
            format.topic = val;
            return this;
        }


        KafkaOutputFormatBuilder setAcks(String val) {
            format.acks = val != null ? val : "all";
            return this;
        }

        KafkaOutputFormatBuilder setRetries(String val) {
            format.retries = val != null ? val : "3";
            return this;
        }

        KafkaOutputFormatBuilder setBatchSize(String val) {
            format.batchSize = val != null ? val : "16384";
            return this;
        }

        KafkaOutputFormatBuilder setLingerMs(String val) {
            format.lingerMS = val != null ? val : "1000";
            return this;
        }

        KafkaOutputFormatBuilder setBufferMemory(String val) {
            format.bufferMemory = val != null ? val : "33554432";
            return this;
        }


        KafkaOutputFormat finish() {
            if (format.servers == null) {
                throw new IllegalArgumentException("required parameter not found: KafkaBrokerList");
            }
            if (format.topic == null) {
                throw new IllegalArgumentException("required parameter not found: KafkaTopic");
            }
            return format;
        }
    }
}

KafkaCustomPartitioner.java
```java
package com.yngwiewang;

import org.apache.kafka.clients.producer.Partitioner;
import org.apache.kafka.common.Cluster;
import org.apache.kafka.common.PartitionInfo;

import java.util.List;
import java.util.Map;

public class KafkaCustomPartitioner implements Partitioner {

    @Override
    public void configure(Map<String, ?> configs) {
    }

    @Override
    public int partition(String topic, Object key, byte[] keyBytes, Object value, byte[] valueBytes, Cluster cluster) {
        List<PartitionInfo> partitions = cluster.partitionsForTopic(topic);
        int numPatitions = partitions.size();
        String strKey = (String) key;
        int intKey = Integer.parseInt(strKey.substring(9, 15));
        return intKey % numPatitions;
    }

    @Override
    public void close() {
    }
}

自定义分区器KafkaCustomPartitioner.java

package com.yngwiewang;

import org.apache.kafka.clients.producer.Partitioner;
import org.apache.kafka.common.Cluster;
import org.apache.kafka.common.PartitionInfo;

import java.util.List;
import java.util.Map;

public class KafkaCustomPartitioner implements Partitioner {

    @Override
    public void configure(Map<String, ?> configs) {
    }

    @Override
    public int partition(String topic, Object key, byte[] keyBytes, Object value, byte[] valueBytes, Cluster cluster) {
        List<PartitionInfo> partitions = cluster.partitionsForTopic(topic);
        int numPatitions = partitions.size();
        String strKey = (String) key;
        int intKey = Integer.parseInt(strKey.substring(9, 15));
        return intKey % numPatitions;
    }

    @Override
    public void close() {
    }
}

DataSet 写入 Kafka

public static void main(String[] args) throws Exception {

    DataSet ds = new DateSet<Tuple7<String, String, String, String, String, String, Integer>>;
    /* sink to kafka */
    // ds is the DataSet
    ds.output(KafkaOutputFormat.buildKafkaOutputFormat()
            .setBootstrapServers("192.168.100.101:9092")
            .setTopic("testTopic")
            .setAcks("all")
            .setBatchSize("16384")
            .setBufferMemory("33554432")
            .setLingerMs("100")
            .setRetries("2")
            .finish()
    ).setParallelism(parallelism);

    /* execute */
    env.execute("DataSet Sink To Kafka");
}