Kafka JAVA客户端代码示例

转载自http://my.oschina.net/cloudcoder/blog/299215?fromerr=1Luc9l6g


介绍

      http://kafka.apache.org 
    kafka是一种高吞吐量的分布式发布订阅消息系统 
    kafka是linkedin用于日志处理的分布式消息队列,linkedin的日志数据容量大,但对可靠性要求不高,其日志数据主要包括用户行为(登录、浏览、点击、分享、喜欢)以及系统运行日志(CPU、内存、磁盘、网络、系统及进程状态) 

    当前很多的消息队列服务提供可靠交付保证,并默认是即时消费(不适合离线)。 

高可靠交付对linkedin的日志不是必须的,故可通过降低可靠性来提高性能,同时通过构建分布式的集群,允许消息在系统中累积,使得kafka同时支持离线和在线日志处理

测试环境

    kafka_2.10-0.8.1.1 3个节点做的集群

    zookeeper-3.4.5 一个实例节点

代码示例

消息生产者代码示例

?
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import java.util.Collections;
import java.util.Date;
import java.util.Properties;
import java.util.Random;
 
import kafka.javaapi.producer.Producer;
import kafka.producer.KeyedMessage;
import kafka.producer.ProducerConfig;
 
/**
  * 详细可以参考:https://cwiki.apache.org/confluence/display/KAFKA/0.8.0+Producer+Example
  * @author Fung
  *
  */
public class ProducerDemo {
     public static void main(String[] args) {
         Random rnd = new Random();
         int events= 100 ;
 
         // 设置配置属性
         Properties props = new Properties();
         props.put( "metadata.broker.list" , "172.168.63.221:9092,172.168.63.233:9092,172.168.63.234:9092" );
         props.put( "serializer.class" , "kafka.serializer.StringEncoder" );
         // key.serializer.class默认为serializer.class
         props.put( "key.serializer.class" , "kafka.serializer.StringEncoder" );
         // 可选配置,如果不配置,则使用默认的partitioner
         props.put( "partitioner.class" , "com.catt.kafka.demo.PartitionerDemo" );
         // 触发acknowledgement机制,否则是fire and forget,可能会引起数据丢失
         // 值为0,1,-1,可以参考
         // http://kafka.apache.org/08/configuration.html
         props.put( "request.required.acks" , "1" );
         ProducerConfig config = new ProducerConfig(props);
 
         // 创建producer
         Producer<String, String> producer = new Producer<String, String>(config);
         // 产生并发送消息
         long start=System.currentTimeMillis();
         for ( long i = 0 ; i < events; i++) {
             long runtime = new Date().getTime();
             String ip = "192.168.2." + i; //rnd.nextInt(255);
             String msg = runtime + ",www.example.com," + ip;
             //如果topic不存在,则会自动创建,默认replication-factor为1,partitions为0
             KeyedMessage<String, String> data = new KeyedMessage<String, String>(
                     "page_visits" , ip, msg);
             producer.send(data);
         }
         System.out.println( "耗时:" + (System.currentTimeMillis() - start));
         // 关闭producer
         producer.close();
     }
}

消息消费者代码示例

?
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
 
import kafka.consumer.Consumer;
import kafka.consumer.ConsumerConfig;
import kafka.consumer.KafkaStream;
import kafka.javaapi.consumer.ConsumerConnector;
 
/**
  * 详细可以参考:https://cwiki.apache.org/confluence/display/KAFKA/Consumer+Group+Example
  *
  * @author Fung
  *
  */
public class ConsumerDemo {
     private final ConsumerConnector consumer;
     private final String topic;
     private ExecutorService executor;
 
     public ConsumerDemo(String a_zookeeper, String a_groupId, String a_topic) {
         consumer = Consumer.createJavaConsumerConnector(createConsumerConfig(a_zookeeper,a_groupId));
         this .topic = a_topic;
     }
 
     public void shutdown() {
         if (consumer != null )
             consumer.shutdown();
         if (executor != null )
             executor.shutdown();
     }
 
     public void run( int numThreads) {
         Map<String, Integer> topicCountMap = new HashMap<String, Integer>();
         topicCountMap.put(topic, new Integer(numThreads));
         Map<String, List<KafkaStream< byte [], byte []>>> consumerMap = consumer
                 .createMessageStreams(topicCountMap);
         List<KafkaStream< byte [], byte []>> streams = consumerMap.get(topic);
 
         // now launch all the threads
         executor = Executors.newFixedThreadPool(numThreads);
 
         // now create an object to consume the messages
         //
         int threadNumber = 0 ;
         for ( final KafkaStream stream : streams) {
             executor.submit( new ConsumerMsgTask(stream, threadNumber));
             threadNumber++;
         }
     }
 
     private static ConsumerConfig createConsumerConfig(String a_zookeeper,
             String a_groupId) {
         Properties props = new Properties();
         props.put( "zookeeper.connect" , a_zookeeper);
         props.put( "group.id" , a_groupId);
         props.put( "zookeeper.session.timeout.ms" , "400" );
         props.put( "zookeeper.sync.time.ms" , "200" );
         props.put( "auto.commit.interval.ms" , "1000" );
 
         return new ConsumerConfig(props);
     }
 
     public static void main(String[] arg) {
         String[] args = { "172.168.63.221:2188" , "group-1" , "page_visits" , "12" };
         String zooKeeper = args[ 0 ];
         String groupId = args[ 1 ];
         String topic = args[ 2 ];
         int threads = Integer.parseInt(args[ 3 ]);
 
         ConsumerDemo demo = new ConsumerDemo(zooKeeper, groupId, topic);
         demo.run(threads);
 
         try {
             Thread.sleep( 10000 );
         } catch (InterruptedException ie) {
 
         }
         demo.shutdown();
     }
}

消息处理类

?
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
import kafka.consumer.ConsumerIterator;
import kafka.consumer.KafkaStream;
 
public class ConsumerMsgTask implements Runnable {
     private KafkaStream m_stream;
     private int m_threadNumber;
 
     public ConsumerMsgTask(KafkaStream stream, int threadNumber) {
         m_threadNumber = threadNumber;
         m_stream = stream;
     }
 
     public void run() {
         ConsumerIterator< byte [], byte []> it = m_stream.iterator();
         while (it.hasNext())
             System.out.println( "Thread " + m_threadNumber + ": "
                     + new String(it.next().message()));
         System.out.println( "Shutting down Thread: " + m_threadNumber);
     }
}

Partitioner类示例

?
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
import kafka.producer.Partitioner;
import kafka.utils.VerifiableProperties;
 
public class PartitionerDemo implements Partitioner {
     public PartitionerDemo(VerifiableProperties props) {
 
     }
 
     @Override
     public int partition(Object obj, int numPartitions) {
         int partition = 0 ;
         if (obj instanceof String) {
             String key=(String)obj;
             int offset = key.lastIndexOf( '.' );
             if (offset > 0 ) {
                 partition = Integer.parseInt(key.substring(offset + 1 )) % numPartitions;
             }
         } else {
             partition = obj.toString().length() % numPartitions;
         }
         
         return partition;
     }
 
}

pom.xml文件

?
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
<project xmlns= "http://maven.apache.org/POM/4.0.0" xmlns:xsi= "http://www.w3.org/2001/XMLSchema-instance"
     xsi:schemaLocation= "http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd" >
     <modelVersion> 4.0 . 0 </modelVersion>
 
     <groupId>com.xxx</groupId>
     <artifactId>kafka-demo</artifactId>
     <version> 0.0 . 1 -SNAPSHOT</version>
     <packaging>jar</packaging>
 
     <name>kafka-demo</name>
     <url>http: //maven.apache.org</url>
 
     <properties>
         <project.build.sourceEncoding>UTF- 8 </project.build.sourceEncoding>
     </properties>
 
     <dependencies>
         <dependency>
             <groupId>org.apache.kafka</groupId>
             <artifactId>kafka_2. 10 </artifactId>
             <version> 0.8 . 1.1 </version>
             <exclusions>
                 <exclusion>
                     <artifactId>jmxtools</artifactId>
                     <groupId>com.sun.jdmk</groupId>
                 </exclusion>
                 <exclusion>
                     <artifactId>jmxri</artifactId>
                     <groupId>com.sun.jmx</groupId>
                 </exclusion>
                 <exclusion>
                     <artifactId>jms</artifactId>
                     <groupId>javax.jms</groupId>
                 </exclusion>
             </exclusions>
         </dependency>
         <dependency>
             <groupId>log4j</groupId>
             <artifactId>log4j</artifactId>
             <version> 1.2 . 15 </version>
             <exclusions>
                 <exclusion>
                     <artifactId>jmxtools</artifactId>
                     <groupId>com.sun.jdmk</groupId>
                 </exclusion>
                 <exclusion>
                     <artifactId>jmxri</artifactId>
                     <groupId>com.sun.jmx</groupId>
                 </exclusion>
                 <exclusion>
                     <artifactId>jms</artifactId>
                     <groupId>javax.jms</groupId>
                 </exclusion>
                 <exclusion>
                     <artifactId>mail</artifactId>
                     <groupId>javax.mail</groupId>
                 </exclusion>
             </exclusions>
         </dependency>
         <dependency>
             <groupId>junit</groupId>
             <artifactId>junit</artifactId>
             <version> 4.11 </version>
             <scope>test</scope>
         </dependency>
     </dependencies>
</project>

参考

https://cwiki.apache.org/confluence/display/KAFKA/Index

https://kafka.apache.org/



什么时间使用高级应用?

  1. 针对一个消息读取多次
  2. 在一个process中,仅仅处理一个topic中的一组partitions
  3. 使用事务,确保每个消息只被处理一次

使用高级应用(调用较底层函数)的缺点?

    SimpleConsumer需要做很多额外的工作(在以groups方式进行消息处理时不需要)

  1. 在应用程序中跟踪上次消息处理的offset
  2. 确定一个topic partition的lead broker
  3. 手工处理broker leander的改变

使用底层函数(SimpleConsumer)开发的步骤

  1.  通过active broker,确定topic partition的lead broker
  2. 确定topic partition的replicat brokers
  3. 根据需要,创建数据请求
  4. 抓取数据
  5. 识别lead brokder改变并进行恢复

代码示例

?
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
 
import kafka.api.FetchRequest;
import kafka.api.FetchRequestBuilder;
import kafka.api.PartitionOffsetRequestInfo;
import kafka.cluster.Broker;
import kafka.common.ErrorMapping;
import kafka.common.TopicAndPartition;
import kafka.javaapi.FetchResponse;
import kafka.javaapi.OffsetResponse;
import kafka.javaapi.PartitionMetadata;
import kafka.javaapi.TopicMetadata;
import kafka.javaapi.TopicMetadataRequest;
import kafka.javaapi.TopicMetadataResponse;
import kafka.javaapi.consumer.SimpleConsumer;
import kafka.message.MessageAndOffset;
 
/**
  * https://cwiki.apache.org/confluence/display/KAFKA/0.8.0+SimpleConsumer+Example
  * @author Fung
  *
  */
public class ConsumerSimpleExample {
     public static void main(String arg[]) {
         String[] args={ "20" , "page_visits" , "2" , "172.168.63.233" , "9092" };
         ConsumerSimpleExample example = new ConsumerSimpleExample();
         long maxReads = Long.parseLong(args[ 0 ]);
         String topic = args[ 1 ];
         int partition = Integer.parseInt(args[ 2 ]);
         List<String> seeds = new ArrayList<String>();
         seeds.add(args[ 3 ]);
         int port = Integer.parseInt(args[ 4 ]);
         try {
             example.run(maxReads, topic, partition, seeds, port);
         } catch (Exception e) {
             System.out.println( "Oops:" + e);
             e.printStackTrace();
         }
     }
 
     private List<String> m_replicaBrokers = new ArrayList<String>();
 
     public ConsumerSimpleExample() {
         m_replicaBrokers = new ArrayList<String>();
     }
 
     public void run( long a_maxReads, String a_topic, int a_partition,
             List<String> a_seedBrokers, int a_port) throws Exception {
         // find the meta data about the topic and partition we are interested in
         //
         PartitionMetadata metadata = findLeader(a_seedBrokers, a_port, a_topic,
                 a_partition);
         if (metadata == null ) {
             System.out
                     .println( "Can't find metadata for Topic and Partition. Exiting" );
             return ;
         }
         if (metadata.leader() == null ) {
             System.out
                     .println( "Can't find Leader for Topic and Partition. Exiting" );
             return ;
         }
         String leadBroker = metadata.leader().host();
         String clientName = "Client_" + a_topic + "_" + a_partition;
 
         SimpleConsumer consumer = new SimpleConsumer(leadBroker, a_port,
                 100000 , 64 * 1024 , clientName);
         long readOffset = getLastOffset(consumer, a_topic, a_partition,
                 kafka.api.OffsetRequest.LatestTime(), clientName);
 
         int numErrors = 0 ;
         while (a_maxReads > 0 ) {
             if (consumer == null ) {
                 consumer = new SimpleConsumer(leadBroker, a_port, 100000 ,
                         64 * 1024 , clientName);
             }
             // Note: this fetchSize of 100000 might need to be increased if
             // large batches are written to Kafka
             FetchRequest req = new FetchRequestBuilder().clientId(clientName)
                     .addFetch(a_topic, a_partition, readOffset, 100000 ).build();
             FetchResponse fetchResponse = consumer.fetch(req);
 
             if (fetchResponse.hasError()) {
                 numErrors++;
                 // Something went wrong!
                 short code = fetchResponse.errorCode(a_topic, a_partition);
                 System.out.println( "Error fetching data from the Broker:"
                         + leadBroker + " Reason: " + code);
                 if (numErrors > 5 )
                     break ;
                 if (code == ErrorMapping.OffsetOutOfRangeCode()) {
                     // We asked for an invalid offset. For simple case ask for
                     // the last element to reset
                     readOffset = getLastOffset(consumer, a_topic, a_partition,
                             kafka.api.OffsetRequest.LatestTime(), clientName);
                     continue ;
                 }
                 consumer.close();
                 consumer = null ;
                 leadBroker = findNewLeader(leadBroker, a_topic, a_partition,
                         a_port);
                 continue ;
             }
             numErrors = 0 ;
 
             long numRead = 0 ;
             for (MessageAndOffset messageAndOffset : fetchResponse.messageSet(
                     a_topic, a_partition)) {
                 long currentOffset = messageAndOffset.offset();
                 if (currentOffset < readOffset) {
                     System.out.println( "Found an old offset: " + currentOffset
                             + " Expecting: " + readOffset);
                     continue ;
                 }
                 readOffset = messageAndOffset.nextOffset();
                 ByteBuffer payload = messageAndOffset.message().payload();
                 byte [] bytes = new byte [payload.limit()];
                 payload.get(bytes);
                 System.out.println(String.valueOf(messageAndOffset.offset())
                         + ": " + new String(bytes, "UTF-8" ));
                 numRead++;
                 a_maxReads--;
             }
 
             if (numRead == 0 ) {
                 try {
                     Thread.sleep( 1000 );
                 } catch (InterruptedException ie) {
                 }
             }
         }
         if (consumer != null )
             consumer.close();
     }
 
     public static long getLastOffset(SimpleConsumer consumer, String topic,
             int partition, long whichTime, String clientName) {
         TopicAndPartition topicAndPartition = new TopicAndPartition(topic,
                 partition);
         Map<TopicAndPartition, PartitionOffsetRequestInfo> requestInfo = new HashMap<TopicAndPartition, PartitionOffsetRequestInfo>();
         requestInfo.put(topicAndPartition, new PartitionOffsetRequestInfo(
                 whichTime, 1 ));
         kafka.javaapi.OffsetRequest request = new kafka.javaapi.OffsetRequest(
                 requestInfo, kafka.api.OffsetRequest.CurrentVersion(),
                 clientName);
         OffsetResponse response = consumer.getOffsetsBefore(request);
 
         if (response.hasError()) {
             System.out
                     .println( "Error fetching data Offset Data the Broker. Reason: "
                             + response.errorCode(topic, partition));
             return 0 ;
         }
         long [] offsets = response.offsets(topic, partition);
         return offsets[ 0 ];
     }
 
     private String findNewLeader(String a_oldLeader, String a_topic,
             int a_partition, int a_port) throws Exception {
         for ( int i = 0 ; i < 3 ; i++) {
             boolean goToSleep = false ;
             PartitionMetadata metadata = findLeader(m_replicaBrokers, a_port,
                     a_topic, a_partition);
             if (metadata == null ) {
                 goToSleep = true ;
             } else if (metadata.leader() == null ) {
                 goToSleep = true ;
             } else if (a_oldLeader.equalsIgnoreCase(metadata.leader().host())
                     && i == 0 ) {
                 // first time through if the leader hasn't changed give
                 // ZooKeeper a second to recover
                 // second time, assume the broker did recover before failover,
                 // or it was a non-Broker issue
                 //
                 goToSleep = true ;
             } else {
                 return metadata.leader().host();
             }
             if (goToSleep) {
                 try {
                     Thread.sleep( 1000 );
                 } catch (InterruptedException ie) {
                 }
             }
         }
         System.out
                 .println( "Unable to find new leader after Broker failure. Exiting" );
         throw new Exception(
                 "Unable to find new leader after Broker failure. Exiting" );
     }
 
     private PartitionMetadata findLeader(List<String> a_seedBrokers,
             int a_port, String a_topic, int a_partition) {
         PartitionMetadata returnMetaData = null ;
         loop: for (String seed : a_seedBrokers) {
             SimpleConsumer consumer = null ;
             try {
                 consumer = new SimpleConsumer(seed, a_port, 100000 , 64 * 1024 ,
                         "leaderLookup" );
                 List<String> topics = Collections.singletonList(a_topic);
                 TopicMetadataRequest req = new TopicMetadataRequest(topics);
                 TopicMetadataResponse resp = consumer.send(req);
 
                 List<TopicMetadata> metaData = resp.topicsMetadata();
                 for (TopicMetadata item : metaData) {
                     for (PartitionMetadata part : item.partitionsMetadata()) {
                         if (part.partitionId() == a_partition) {
                             returnMetaData = part;
                             break loop;
                         }
                     }
                 }
             } catch (Exception e) {
                 System.out.println( "Error communicating with Broker [" + seed
                         + "] to find Leader for [" + a_topic + ", "
                         + a_partition + "] Reason: " + e);
             } finally {
                 if (consumer != null )
                     consumer.close();
             }
         }
         if (returnMetaData != null ) {
             m_replicaBrokers.clear();
             for (Broker replica : returnMetaData.replicas()) {
                 m_replicaBrokers.add(replica.host());
             }
         }
         return returnMetaData;
     }
}

参考

https://cwiki.apache.org/confluence/display/KAFKA/0.8.0+SimpleConsumer+Example


  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值