一、Kafka组成(从别处粘过来的)
Broker Kafka集群包含一个或多个服务器,这种服务器被称为broker
Topic 每条发布到Kafka集群的消息都有一个类别,这个类别被称为Topic。(物理上不同Topic的消息分开存储,逻辑上一个Topic的消息虽然保存于一个或多个broker上但用户只需指定消息的Topic即可生产或消费数据而不必关心数据存于何处)
Partition Parition是物理上的概念,每个Topic包含一个或多个Partition.
Producer 负责发布消息到Kafka broker
Consumer 消息消费者,向Kafka broker读取消息的客户端。
Consumer Group 每个Consumer属于一个特定的Consumer Group(可为每个Consumer指定group name,若不指定group name则属于默认的group)。
http://www.infoq.com/cn/articles/kafka-analysis-part-1/
二、数据源
浏览网页产生的日志
网络分析日志
跟踪日志
三、消费场景
Hadoop离线处理
hbase?mongo等一些接近实时处理
spark?storm实时处理
四、搭建
单节点单broker
单节点多broker
多节点多broker
五、producer代码大放送
// 设置配置属性
Properties props = new Properties();
//kafk节点列表
props.put("metadata.broker.list", "lwj:9092,lwj:9093")
//message编码序列化方式
props.put("serializer.class", "kafka.serializer.StringEncoder");
// key.serializer.class默认为serializer.class
//props.put("key.serializer.class", "kafka.serializer.StringEncoder");
// 可选配置,如果不配置,则使用默认的partitioner
//props.put("partitioner.class", "com.catt.kafka.demo.PartitionerDemo");
// 触发acknowledgement机制,否则是fire and forget,可能会引起数据丢失
// 值为0,1,-1,可以参考
// http://kafka.apache.org/08/configuration.html
props.put("request.required.acks", "1");
ProducerConfig config = new ProducerConfig(props);
// 创建producer
Producer<String, String> producer = new Producer<String, String>(config);
String msg = "hello kafka hello storm hello storm hello hbase";
KeyedMessage<String, String> data = new KeyedMessage<String, String>("visit", msg);
producer.send(data);
// 关闭producer与broker的连接
producer.close();
六、Consumer代码大放送
public class ConsumerGroupExample {
private final ConsumerConnector consumer;
private final String topic;
private ExecutorService executor;
public ConsumerGroupExample(String a_zookeeper, String a_groupId, String a_topic) {
consumer = kafka.consumer.Consumer.createJavaConsumerConnector(createConsumerConfig(a_zookeeper, a_groupId));
this.topic = a_topic;
}
public void run(int a_numThreads) {
Map<String, Integer> topicCountMap = new HashMap<String, Integer>();
topicCountMap.put(topic, new Integer(a_numThreads));
Map<String, List<KafkaStream<byte[], byte[]>>> consumerMap = consumer.createMessageStreams(topicCountMap);
List<KafkaStream<byte[], byte[]>> streams = consumerMap.get(topic);
// now launch all the threads
executor = Executors.newFixedThreadPool(a_numThreads);
// now create an object to consume the messages
//
int threadNumber = 0;
for (final KafkaStream stream : streams) {
executor.submit(new Eat(stream, threadNumber));
threadNumber++;
}
}
private static ConsumerConfig createConsumerConfig(String a_zookeeper, String a_groupId) {
Properties props = new Properties();
props.put("zookeeper.connect", a_zookeeper);
props.put("group.id", a_groupId);
props.put("zookeeper.session.timeout.ms", "400");
props.put("zookeeper.sync.time.ms", "200");
props.put("auto.commit.interval.ms", "1000");
props.put("auto.offset.reset", "smallest");
return new ConsumerConfig(props);
}
public void shutdown() {
if (consumer != null) consumer.shutdown();
if (executor != null) executor.shutdown();
try {
if (!executor.awaitTermination(5000, TimeUnit.MILLISECONDS)) {
System.out.println("Timed out waiting for consumer threads to shut down, exiting uncleanly");
}
} catch (InterruptedException e) {
System.out.println("Interrupted during shutdown, exiting uncleanly");
}
}
}
public class Eat implements Runnable {
private KafkaStream m_stream;
private int m_threadNumber;
public Eat(KafkaStream a_stream, int a_threadNumber) {
m_threadNumber = a_threadNumber;
m_stream = a_stream;
}
@Override
public void run() {
ConsumerIterator<byte[], byte[]> it = m_stream.iterator();
while (it.hasNext())
System.out.println("Thread " + m_threadNumber + ": " + new String(it.next().message()));
System.out.println("Shutting down Thread: " + m_threadNumber);
}
public static void main(String[] args) {
String zooKeeper = "lwj:2181";
String groupId = "test-consumer-group";
String topic = "visit";
ConsumerGroupExample example = new ConsumerGroupExample(zooKeeper, groupId, topic);
example.run(3);
try {
Thread.sleep(10000);
} catch (InterruptedException ie) {
}
example.shutdown();
}
}