flink手动维护kafka的offset

原创,允许转载,我的目的就是给大家节省时间

先说一下为什么手动维护offset,因为环境问题,目前读的是kafka0.8版本,推动升级比较吃力

手动维护offset的好处,你可以记录每个时间点的offset,如果上游日志异常,你可以把你记录的offset和时间戳拿出来,找出对应时间点的offset,去修复历史数据

不废话,写过spark的,看了我的代码就知道如何实现了,这个是

FlinkKafkaConsumer08是链接flink的类,咱们重点看
KeyedDeserializationSchema,这个是获取kafka的partitions喝offset的,实现了他就可以了,直接贴代码了兄弟们,不想码字了
public class FlinkKafkaConsumer08<T> extends FlinkKafkaConsumerBase<T> {
    private static final long serialVersionUID = -6272159445203409112L;
    public static final String GET_PARTITIONS_RETRIES_KEY = "flink.get-partitions.retry";
    public static final int DEFAULT_GET_PARTITIONS_RETRIES = 3;
    private final Properties kafkaProperties;

    public FlinkKafkaConsumer08(String topic, DeserializationSchema<T> valueDeserializer, Properties props) {
        this(Collections.singletonList(topic), valueDeserializer, props);
    }

    public FlinkKafkaConsumer08(String topic, KeyedDeserializationSchema<T> deserializer, Properties props) {
        this(Collections.singletonList(topic), deserializer, props);
    }

    public FlinkKafkaConsumer08(List<String> topics, DeserializationSchema<T> deserializer, Properties props) {
        this((List)topics, (KeyedDeserializationSchema)(new KeyedDeserializationSchemaWrapper(deserializer)), props);
    }

    public FlinkKafkaConsumer08(List<String> topics, KeyedDeserializationSchema<T> deserializer, Properties props) {
        this(topics, (Pattern)null, deserializer, props);
    }

    @PublicEvolving
    public FlinkKafkaConsumer08(Pattern subscriptionPattern, DeserializationSchema<T> valueDeserializer, Properties props) {
        this((Pattern)subscriptionPattern, (KeyedDeserializationSchema)(new KeyedDeserializationSchemaWrapper(valueDeserializer)), props);
    }

    @PublicEvolving
    public FlinkKafkaConsumer08(Pattern subscriptionPattern, KeyedDeserializationSchema<T> deserializer, Properties props) {
        this((List)null, subscriptionPattern, deserializer, props);
    }

    private FlinkKafkaConsumer08(List<String> topics, Pattern subscriptionPattern, KeyedDeserializationSchema<T> deserializer, Properties props) {
        super(topics, subscriptionPattern, deserializer, PropertiesUtil.getLong((Properties)Preconditions.checkNotNull(props, "props"), "flink.partition-discovery.interval-millis", -9223372036854775808L), !PropertiesUtil.getBoolean(props, "flink.disable-metrics", false));
        this.kafkaProperties = props;
        validateZooKeeperConfig(props);
        validateAutoOffsetResetValue(props);
    }

    protected AbstractFetcher<T, ?> createFetcher(SourceContext<T> sourceContext, Map<KafkaTopicPartition, Long> assignedPartitionsWithInitialOffsets, SerializedValue<AssignerWithPeriodicWatermarks<T>> watermarksPeriodic, SerializedValue<AssignerWithPunctuatedWatermarks<T>> watermarksPunctuated, StreamingRuntimeContext runtimeContext, OffsetCommitMode offsetCommitMode, MetricGroup consumerMetricGroup, boolean useMetrics) throws Exception {
        long autoCommitInterval = offsetCommitMode == OffsetCommitMode.KAFKA_PERIODIC ? PropertiesUtil.getLong(this.kafkaProperties, "auto.commit.interval.ms", 60000L) : -1L;
        return new Kafka08Fetcher(sourceContext, assignedPartitionsWithInitialOffsets, watermarksPeriodic, watermarksPunctuated, runtimeContext, this.deserializer, this.kafkaProperties, autoCommitInterval, consumerMetricGroup, useMetrics);
    }

    protected AbstractPartitionDiscoverer createPartitionDiscoverer(KafkaTopicsDescriptor topicsDescriptor, int indexOfThisSubtask, int numParallelSubtasks) {
        return new Kafka08PartitionDiscoverer(topicsDescriptor, indexOfThisSubtask, numParallelSubtasks, this.kafkaProperties);
    }

    protected boolean getIsAutoCommitEnabled() {
        return PropertiesUtil.getBoolean(this.kafkaProperties, "auto.commit.enable", true) && PropertiesUtil.getLong(this.kafkaProperties, "auto.commit.interval.ms", 60000L) > 0L;
    }

    protected Map<KafkaTopicPartition, Long> fetchOffsetsWithTimestamp(Collection<KafkaTopicPartition> partitions, long timestamp) {
        throw new UnsupportedOperationException("Fetching partition offsets using timestamps is only supported in Kafka versions 0.10 and above.");
    }

    protected static void validateZooKeeperConfig(Properties props) {
        if (props.getProperty("zookeeper.connect") == null) {
            throw new IllegalArgumentException("Required property 'zookeeper.connect' has not been set in the properties");
        } else if (props.getProperty("group.id") == null) {
            throw new IllegalArgumentException("Required property 'group.id' has not been set in the properties");
        } else {
            try {
                Integer.parseInt(props.getProperty("zookeeper.session.timeout.ms", "0"));
            } catch (NumberFormatException var3) {
                throw new IllegalArgumentException("Property 'zookeeper.session.timeout.ms' is not a valid integer");
            }

            try {
                Integer.parseInt(props.getProperty("zookeeper.connection.timeout.ms", "0"));
            } catch (NumberFormatException var2) {
                throw new IllegalArgumentException("Property 'zookeeper.connection.timeout.ms' is not a valid integer");
            }
        }
    }

    private static void validateAutoOffsetResetValue(Properties config) {
        String val = config.getProperty("auto.offset.reset", "largest");
        if (!val.equals("largest") && !val.equals("latest") && !val.equals("earliest") && !val.equals("smallest")) {
            throw new IllegalArgumentException("Cannot use 'auto.offset.reset' value '" + val + "'. Possible values: 'latest', 'largest', 'earliest', or 'smallest'.");
        }
    }
}

 

下面开始直接上代码,这样就可以了:
 

import java.text.SimpleDateFormat
import java.util

import com.alibaba.fastjson.{JSON, JSONObject}
import java.util.{Date, Properties}
import java.util.Date
import org.apache.flink.api.common.functions.RuntimeContext
import org.apache.flink.api.common.serialization.SimpleStringSchema
import org.apache.flink.api.common.state.StateTtlConfig.TimeCharacteristic
import org.apache.flink.api.common.typeinfo.TypeInformation
import org.apache.flink.runtime.state.FunctionSnapshotContext
import org.apache.flink.streaming.api.scala._
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer08
import org.apache.flink.streaming.util.serialization.KeyedDeserializationSchema
import org.apache.flink.table.api.{TableEnvironment, Types}
import org.apache.flink.table.api.scala._
import org.apache.flink.table.functions.{ScalarFunction, TableFunction}
import org.apache.flink.types.Row
object FlinkDemo {
  case class KafkaMsg(key: String, value: String, topic: String, partiton: Int, offset: Long)
  class TypedKeyedDeserializationSchema extends KeyedDeserializationSchema[KafkaMsg] {
    def deserialize(key: Array[Byte],
                    value: Array[Byte],
                    topic: String,
                    partition: Int,
                    offset: Long
                   ): KafkaMsg =
      KafkaMsg(new String(key),
        new String(value),
        topic,
        partition,
        offset
      )

    def isEndOfStream(e: KafkaMsg): Boolean = false

    def getProducedType(): TypeInformation[KafkaMsg] = createTypeInformation
  }

  def main(args: Array[String]): Unit = {
    //System.setProperties()
    val env = StreamExecutionEnvironment.getExecutionEnvironment
    val properties = new Properties()
    //kafka位置 老版本的 kafka是配置zookeeper地址
    properties.setProperty("bootstrap.servers","localhost:9092")
    properties.setProperty("zookeeper.connect","localhost:2181")
    val topic = "click"
    properties.setProperty("group.id", "test-flink")
    val consumer = new FlinkKafkaConsumer08(topic,new TypedKeyedDeserializationSchema(),properties)
    val text  = env.addSource(consumer).print()


    env.execute()
  }

}

 

记得关注我的微信公众号,后边有实战的东西和源码都会在上面更新,减少你们走弯路的时间

我微信二维码可以加一下也行

 

  • 4
    点赞
  • 9
    收藏
    觉得还不错? 一键收藏
  • 5
    评论
好的,下面是一个简单的 Flink 设置 Kafka Offset 的示例代码: ```java import org.apache.flink.api.common.functions.FlatMapFunction; import org.apache.flink.api.common.serialization.SimpleStringSchema; import org.apache.flink.api.java.utils.ParameterTool; import org.apache.flink.streaming.api.CheckpointingMode; import org.apache.flink.streaming.api.datastream.DataStream; import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; import org.apache.flink.streaming.api.functions.source.SourceFunction; import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer; import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer010; import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer09; import org.apache.flink.streaming.connectors.kafka.KafkaDeserializationSchema; import org.apache.flink.streaming.connectors.kafka.KafkaSerializationSchema; import org.apache.flink.streaming.connectors.kafka.KafkaSink; import org.apache.flink.streaming.connectors.kafka.config.StartupMode; import org.apache.flink.streaming.util.serialization.JSONKeyValueDeserializationSchema; import org.apache.flink.streaming.util.serialization.KeyedSerializationSchema; import org.apache.flink.util.Collector; import org.apache.kafka.clients.consumer.ConsumerConfig; import org.apache.kafka.clients.producer.ProducerConfig; import org.apache.kafka.common.serialization.StringDeserializer; import org.apache.kafka.common.serialization.StringSerializer; import java.util.Properties; public class FlinkKafkaOffsetDemo { public static void main(String[] args) throws Exception { // 从参数中读取 Kafka topic 和其他参数 final ParameterTool parameterTool = ParameterTool.fromArgs(args); String topic = parameterTool.get("topic"); String brokers = parameterTool.get("brokers"); String groupId = parameterTool.get("group-id"); String offsetReset = parameterTool.get("offset-reset", "latest"); // 设置 Flink 的执行环境 final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.enableCheckpointing(5000L, CheckpointingMode.EXACTLY_ONCE); // 设置 Kafka Consumer 的配置 Properties kafkaProps = new Properties(); kafkaProps.setProperty(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, brokers); kafkaProps.setProperty(ConsumerConfig.GROUP_ID_CONFIG, groupId); kafkaProps.setProperty(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName()); kafkaProps.setProperty(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName()); kafkaProps.setProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, offsetReset); // 从 Kafka 中读取数据 FlinkKafkaConsumer<String> kafkaConsumer = new FlinkKafkaConsumer<>(topic, new SimpleStringSchema(), kafkaProps); kafkaConsumer.setStartFromEarliest(); DataStream<String> input = env.addSource(kafkaConsumer); // 对数据进行处理 DataStream<String> result = input.flatMap(new FlatMapFunction<String, String>() { @Override public void flatMap(String value, Collector<String> out) throws Exception { out.collect(value); } }); // 将数据写入 Kafka Properties producerProps = new Properties(); producerProps.setProperty(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, brokers); producerProps.setProperty(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class.getName()); producerProps.setProperty(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, StringSerializer.class.getName()); KafkaSerializationSchema<String> kafkaSerializationSchema = new KeyedSerializationSchema<String>() { @Override public byte[] serializeKey(String element) { return null; } @Override public byte[] serializeValue(String element) { return element.getBytes(); } @Override public String getTargetTopic(String element) { return topic; } }; KafkaSink<String> kafkaSink = new KafkaSink<>(producerProps, kafkaSerializationSchema); result.addSink(kafkaSink); // 执行 Flink Job env.execute("Flink Kafka Offset Demo"); } } ``` 在上面的示例中,我们使用 FlinkKafkaConsumer 设置了 Kafka Consumer 的配置,并从 Kafka 中读取了数据。在从 Kafka 中读取数据的过程中,我们可以通过设置 `setStartFromEarliest()` 或 `setStartFromLatest()` 方法来设置从什么位置开始读取数据。 读取到的数据会经过我们自定义的 `flatMap()` 函数进行处理,然后再将处理后的数据写入 Kafka 中。在写入数据时,我们使用了 KafkaSink,并设置了 Kafka Producer 的配置和序列化方式。 在实际使用时,我们可以根据具体的业务场景来设置 Kafka Consumer 的 offset,以实现更加灵活的数据处理。

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 5
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值