package com.iflytek.spark.test;
import java.text.MessageFormat;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import com.iflytek.spark.bean.SysCode;
import kafka.consumer.Consumer;
import kafka.consumer.ConsumerConfig;
import kafka.consumer.ConsumerIterator;
import kafka.consumer.KafkaStream;
import kafka.javaapi.consumer.ConsumerConnector;
import kafka.message.MessageAndMetadata;
/**
* 同一consumer group的多线程消费可以两种方法实现:
* <p>
* 1、实现单线程客户端,启动多个去消费
* </p>
* <p>
* 2、在客户端的createMessageStreams里为topic指定大于1的线程数,再启动多个线程处理每个stream
* </p>
*/publicclassJConsumerextendsThread {private ConsumerConnector consumer;
private String topic;
privatefinalint SLEEP = 20;
publicJConsumer(String topic) {
consumer = Consumer.createJavaConsumerConnector(this.consumerConfig());
this.topic = topic;
}
private ConsumerConfig consumerConfig() {
Properties props = new Properties();
props.put("zookeeper.connect", SysCode.CDH_ZKHOST);
props.put("group.id", "jd-group");
props.put("auto.commit.enable", "true");// 默认为true,让consumer定期commit offset,zookeeper会将offset持久化,否则只在内存,若故障则再消费时会从最后一次保存的offset开始
props.put("auto.commit.interval.ms", 1000+"");// 经过INTERVAL时间提交一次offset
props.put("auto.offset.reset", "largest");// What to do when there is no initial offset in ZooKeeper or if an offset is out of range
props.put("zookeeper.session.timeout.ms", 4000 + "");
props.put("zookeeper.sync.time.ms", "200");
props.put("auto.offset.reset", "largest");
returnnew ConsumerConfig(props);
}
@Overridepublicvoidrun() {
Map<String, Integer> topicCountMap = new HashMap<String, Integer>();
topicCountMap.put(topic, new Integer(1));// 线程数
Map<String, List<KafkaStream<byte[], byte[]>>> streams = consumer.createMessageStreams(topicCountMap);
KafkaStream<byte[], byte[]> stream = streams.get(topic).get(0);// 若上面设了多个线程去消费,则这里需为每个stream开个线程做如下的处理
ConsumerIterator<byte[], byte[]> it = stream.iterator();
MessageAndMetadata<byte[], byte[]> messageAndMetaData = null;
while (it.hasNext()) {
messageAndMetaData = it.next();
System.out.println(MessageFormat.format("Receive->[ message:{0} , partition:{1} , offset:{2} ]",
new String(messageAndMetaData.message()),
messageAndMetaData.partition() + "", messageAndMetaData.offset() + ""));
try {
sleep(SLEEP);
} catch (Exception ex) {
ex.printStackTrace();
}
}
}
publicstaticvoidmain(String[] args) {
JConsumer con = new JConsumer("102011");
con.start();
}
}
2,生产者代码
package com.iflytek.spark.test;
import java.util.ArrayList;
import java.util.List;
import java.util.Properties;
import com.iflytek.spark.bean.SysCode;
import kafka.javaapi.producer.Producer;
import kafka.producer.KeyedMessage;
import kafka.producer.ProducerConfig;
/**
* 可以指定规则(key和分区函数)以让消息写到特定分区:
* <p>
* 1、若发送的消息没有指定key则Kafka会随机选择一个分区
* </p>
* <p>
* 2、否则,若指定了分区函数(通过partitioner.class)则该函数以key为参数确定写到哪个分区
* </p>
* <p>
* 3、否则,Kafka根据hash(key)%partitionNum确定写到哪个分区
* </p>
*/publicclassJProducerextendsThread {private Producer<String, String> producer;
private String topic;
privatefinalint SLEEP = 10;
privatefinalint msgNum = 1000;
publicJProducer(String topic) {
Properties props = new Properties();
props.put("metadata.broker.list", SysCode.KAFKAHOST);// 如192.168.6.127:9092,192.168.6.128:9092// request.required.acks// 0, which means that the producer never waits for an acknowledgement from the broker (the same behavior as 0.7). This option provides the lowest latency but the weakest durability guarantees// (some data will be lost when a server fails).// 1, which means that the producer gets an acknowledgement after the leader replica has received the data. This option provides better durability as the client waits until the server// acknowledges the request as successful (only messages that were written to the now-dead leader but not yet replicated will be lost).// -1, which means that the producer gets an acknowledgement after all in-sync replicas have received the data. This option provides the best durability, we guarantee that no messages will be// lost as long as at least one in sync replica remains.
props.put("request.required.acks", "-1");
// 配置value的序列化类
props.put("serializer.class", "kafka.serializer.StringEncoder");
// 配置key的序列化类
props.put("key.serializer.class", "kafka.serializer.StringEncoder");
// 提供自定义的分区函数将消息写到分区上,未指定的话Kafka根据hash(messageKey)%partitionNum确定写到哪个分区// props.put("partitioner.class", "com.zsm.kfkdemo.MyPartitioner");
producer = new Producer<String, String>(new ProducerConfig(props));
this.topic = topic;
}
@Overridepublicvoidrun() {
boolean isBatchWriteMode = false;
System.out.println("isBatchWriteMode: " + isBatchWriteMode);
if (isBatchWriteMode) {
// 批量发送int batchSize = 100;
List<KeyedMessage<String, String>> msgList = new ArrayList<KeyedMessage<String, String>>(batchSize);
for (int i = 0; i < msgNum; i++) {
String msg = "Message_" + i;
msgList.add(new KeyedMessage<String, String>(topic, i + "", msg));
// msgList.add(new KeyedMessage<String, String>(topic, msg));//未指定key,Kafka会自动选择一个分区if (i % batchSize == 0) {
producer.send(msgList);
System.out.println("Send->[" + msgList + "]");
msgList.clear();
try {
sleep(SLEEP);
} catch (Exception ex) {
ex.printStackTrace();
}
}
}
producer.send(msgList);
} else {
// 单个发送for (int i = 0; i < msgNum; i++) {
KeyedMessage<String, String> msg = new KeyedMessage<String, String>(topic, i + "", "Message_" + i);
// KeyedMessage<String, String> msg = new KeyedMessage<String, String>(topic, "Message_" + i);//未指定key,Kafka会自动选择一个分区
producer.send(msg);
System.out.println("Send->[" + msg + "]");
try {
sleep(SLEEP);
} catch (Exception ex) {
ex.printStackTrace();
}
}
}
System.out.println("send done");
}
publicstaticvoidmain(String[] args) {
JProducer pro = new JProducer("test1");
pro.start();
}
}