Kafka的高级消费者与低级消费者
在Kafka实战章节,我们写的例子都是Kafka的高级消费实例,可以看到在消息消费者的程序中,我们只需要指定zookeeper、及消费群组的groupId即可实现从消息队列中消费消息,屏蔽了大量的底层细节:如消息的偏移量等信息都不在程序中维护。Kafka的高级消费实例,满足以下几点规则:
(1)同一个消费群组中,如果线程数大于Topic分区数,那么一些线程永远接收不到消息;
(2)同一个消费群组中,如果线程数小于Topic分区数,部分线程将从多个分区接收消息;
(3)对于从多个分区接收消息的线程,消费每个分区内的消息是有序的,但消费多个分区之间的消息是无序的;
明白了Kafka的高级消费实例的过程之后,如果我们想进一步控制一个消费者消费哪个分区怎么办呢?比如多次读取同一个消息。答案是使用低级消费者实例,即在程序中指定Topic的Partition的Leader broker,并在程序中跟踪消息的偏移量offset值。其步骤大致如下:
(1)指定消费Topic Partition的Leader broker及备份broker;
(2)构造并发送请求数据;
(3)处理leader broker的变更;
实例如下:
- import kafka.api.FetchRequest;
- import kafka.api.FetchRequestBuilder;
- import kafka.api.PartitionOffsetRequestInfo;
- import kafka.common.ErrorMapping;
- import kafka.common.TopicAndPartition;
- import kafka.javaapi.*;
- import kafka.javaapi.consumer.SimpleConsumer;
- import kafka.message.MessageAndOffset;
- import java.nio.ByteBuffer;
- import java.util.ArrayList;
- import java.util.Collections;
- import java.util.HashMap;
- import java.util.List;
- import java.util.Map;
- public class SimpleConsumerDemo {
- private List<String> m_replicaBrokers = new ArrayList<>();
- public SimpleConsumerDemo(){
- m_replicaBrokers = new ArrayList<>();
- }
- public void run(long a_maxReads, String a_topic, int a_partition, List<String> a_seedBrokers, int a_port) throws Exception {
- // find the meta data about the topic and partition we are interested in
- //
- PartitionMetadata metadata = findLeader(a_seedBrokers, a_port, a_topic, a_partition);
- if (metadata == null) {
- System.out.println("Can't find metadata for Topic and Partition. Exiting");
- return;
- }
- if (metadata.leader() == null) {
- System.out.println("Can't find Leader for Topic and Partition. Exiting");
- return;
- }
- String leadBroker = metadata.leader().host();
- String clientName = "Client_" + a_topic + "_" + a_partition;
- SimpleConsumer consumer = new SimpleConsumer(leadBroker, a_port, 100000, 64 * 1024, clientName);
- long readOffset = getLastOffset(consumer,a_topic, a_partition, kafka.api.OffsetRequest.EarliestTime(), clientName);
- int numErrors = 0;
- while (a_maxReads > 0) {
- if (consumer == null) {
- consumer = new SimpleConsumer(leadBroker, a_port, 100000, 64 * 1024, clientName);
- }
- FetchRequest req = new FetchRequestBuilder()
- .clientId(clientName)
- .addFetch(a_topic, a_partition, readOffset, 100000) // Note: this fetchSize of 100000 might need to be increased if large batches are written to Kafka
- .build();
- FetchResponse fetchResponse = consumer.fetch(req);
- if (fetchResponse.hasError()) {
- numErrors++;
- // Something went wrong!
- short code = fetchResponse.errorCode(a_topic, a_partition);
- System.out.println("Error fetching data from the Broker:" + leadBroker + " Reason: " + code);
- if (numErrors > 5) break;
- if (code == ErrorMapping.OffsetOutOfRangeCode()) {
- // We asked for an invalid offset. For simple case ask for the last element to reset
- readOffset = getLastOffset(consumer,a_topic, a_partition, kafka.api.OffsetRequest.LatestTime(), clientName);
- continue;
- }
- consumer.close();
- consumer = null;
- leadBroker = findNewLeader(leadBroker, a_topic, a_partition, a_port);
- continue;
- }
- numErrors = 0;
- long numRead = 0;
- for (MessageAndOffset messageAndOffset : fetchResponse.messageSet(a_topic, a_partition)) {
- long currentOffset = messageAndOffset.offset();
- if (currentOffset < readOffset) {
- System.out.println("Found an old offset: " + currentOffset + " Expecting: " + readOffset);
- continue;
- }
- readOffset = messageAndOffset.nextOffset();
- ByteBuffer payload = messageAndOffset.message().payload();
- byte[] bytes = new byte[payload.limit()];
- payload.get(bytes);
- System.out.println(String.valueOf(messageAndOffset.offset()) + ": " + new String(bytes, "UTF-8"));
- numRead++;
- a_maxReads--;
- }
- if (numRead == 0) {
- try {
- Thread.sleep(1000);
- } catch (InterruptedException ie) {
- }
- }
- }
- if (consumer != null) consumer.close();
- }
- public static long getLastOffset(SimpleConsumer consumer, String topic, int partition,
- long whichTime, String clientName) {
- TopicAndPartition topicAndPartition = new TopicAndPartition(topic, partition);
- Map<TopicAndPartition, PartitionOffsetRequestInfo> requestInfo = new HashMap<TopicAndPartition, PartitionOffsetRequestInfo>();
- requestInfo.put(topicAndPartition, new PartitionOffsetRequestInfo(whichTime, 1));
- kafka.javaapi.OffsetRequest request = new kafka.javaapi.OffsetRequest(
- requestInfo, kafka.api.OffsetRequest.CurrentVersion(), clientName);
- OffsetResponse response = consumer.getOffsetsBefore(request);
- if (response.hasError()) {
- System.out.println("Error fetching data Offset Data the Broker. Reason: " + response.errorCode(topic, partition) );
- return 0;
- }
- long[] offsets = response.offsets(topic, partition);
- return offsets[0];
- }
- private String findNewLeader(String a_oldLeader, String a_topic, int a_partition, int a_port) throws Exception {
- for (int i = 0; i < 3; i++) {
- boolean goToSleep = false;
- PartitionMetadata metadata = findLeader(m_replicaBrokers, a_port, a_topic, a_partition);
- if (metadata == null) {
- goToSleep = true;
- } else if (metadata.leader() == null) {
- goToSleep = true;
- } else if (a_oldLeader.equalsIgnoreCase(metadata.leader().host()) && i == 0) {
- // first time through if the leader hasn't changed give ZooKeeper a second to recover
- // second time, assume the broker did recover before failover, or it was a non-Broker issue
- //
- goToSleep = true;
- } else {
- return metadata.leader().host();
- }
- if (goToSleep) {
- try {
- Thread.sleep(1000);
- } catch (InterruptedException ie) {
- }
- }
- }
- System.out.println("Unable to find new leader after Broker failure. Exiting");
- throw new Exception("Unable to find new leader after Broker failure. Exiting");
- }
- private PartitionMetadata findLeader(List<String> a_seedBrokers, int a_port, String a_topic, int a_partition) {
- PartitionMetadata returnMetaData = null;
- loop:
- for (String seed : a_seedBrokers) {
- SimpleConsumer consumer = null;
- try {
- consumer = new SimpleConsumer(seed, a_port, 100000, 64 * 1024, "leaderLookup");
- List<String> topics = Collections.singletonList(a_topic);
- TopicMetadataRequest req = new TopicMetadataRequest(topics);
- kafka.javaapi.TopicMetadataResponse resp = consumer.send(req);
- List<TopicMetadata> metaData = resp.topicsMetadata();
- for (TopicMetadata item : metaData) {
- for (PartitionMetadata part : item.partitionsMetadata()) {
- if (part.partitionId() == a_partition) {
- returnMetaData = part;
- break loop;
- }
- }
- }
- } catch (Exception e) {
- System.out.println("Error communicating with Broker [" + seed + "] to find Leader for [" + a_topic
- + ", " + a_partition + "] Reason: " + e);
- } finally {
- if (consumer != null) consumer.close();
- }
- }
- if (returnMetaData != null) {
- m_replicaBrokers.clear();
- for (kafka.cluster.Broker replica : returnMetaData.replicas()) {
- m_replicaBrokers.add(replica.host());
- }
- }
- return returnMetaData;
- }
- public static void main(String args[]) {
- SimpleConsumerDemo example = new SimpleConsumerDemo();
- long maxReads = Long.parseLong(args[0]);
- String topic = args[1];
- int partition = Integer.parseInt(args[2]);
- List<String> seeds = new ArrayList<>();
- seeds.add(args[3]);
- int port = Integer.parseInt(args[4]);
- try {
- example.run(maxReads, topic, partition, seeds, port);
- } catch (Exception e) {
- System.out.println("Oops:" + e);
- e.printStackTrace();
- }
- }
- }
参考资料:
1、https://cwiki.apache.org/confluence/display/KAFKA/Index
2、http://www.nohup.cc/article/195/
3、http://blog.csdn.net/honglei915/article/details/37563647
4、http://orchome.com/11