使用kafka我们需要先安装zookeeper、kafka其中zookeeper的安装可以参考这篇文章:点击打开链接,而kafka的安装很简单,只需要解压缩包就可以了,所以这里就不说了,如果不会就google、百度!!!
废话少说来看代码:
一、Producer(消息生产者)
package com.kafka.test.test1;
import java.util.Properties;
import kafka.javaapi.producer.Producer;
import kafka.producer.KeyedMessage;
import kafka.producer.ProducerConfig;
import kafka.serializer.StringEncoder;
@SuppressWarnings("deprecation")
public class KafkaProducer {
public static void testProducer() {
Properties props = new Properties();
//kafka实例的列表
props.put("metadata.broker.list", "127.0.0.1:9092,127.0.0.1:9093,127.0.0.1:9094");
props.put("serializer.class", StringEncoder.class.getName());
props.put("key.serializer.class", "kafka.serializer.StringEncoder");
//这个是计算partition分区的算法,要么默认要么自己实现partition来重写
props.put("partitioner.class", "com.kafka.test.test1.JavaKafkaProducerPartitioner");
// 值为0,1,-1,可以参考
// http://kafka.apache.org/08/configuration.html
//0: 不论写入是否成功,server不需要给Producer发送Response,如果发生异常,server会终止连接,触发Producer更新meta数据;
//1: Leader写入成功后即发送Response,此种情况如果Leader fail,会丢失数据
//-1: 等待所有ISR接收到消息后再给Producer发送Response,这是最强保证
props.put("request.required.acks", "1");
//压缩配置:0不压缩、 1用gzip压缩、2用snappy压缩
props.put("compression.codec","2");
//要压缩的主题,即对那些消息要进行压缩
props.put("compressed.topics","test8");
//同步、异步:sync同步 、async异步 ,使用异步发送速度快很多
//个人大体测试了一下使用同步和异步速度绝对不在一个级别,有兴趣的可以自己测测!!!
props.put("producer.type", "async");
//在异步的情况下添加如下配置:
//缓存
props.put("queue.buffering.max.ms", "5000");
//消息条数
props.put("queue.buffering.max.messages", "2000");
//如果是异步指定每次批量发送的数据量默认200
props.put("batch.num.messages", "500");
//消息异常后的信息
props.put("queue.enqueue.timeout.ms", "-1");
//消息生产者配置
ProducerConfig config = new ProducerConfig(props);
//按照消息生产者配置创建消息生产者
Producer<String, String> producer = new Producer<String, String>(config);
//消息
int i=0;
long begin = System.currentTimeMillis();
while(true) {
StringBuffer msg = new StringBuffer("hello world 8 hello kafka test");
msg.append(i);
//消息的创建,创建的时候指定了topic的name,具体分配到topic下的那个partition(分区)是controller按照给的算法来决定的
KeyedMessage<String, String> data = new KeyedMessage<String, String>("test8", msg.toString());
//发送消息
producer.send(data);
i++;
if(i >=320000)
break;
}
long end = System.currentTimeMillis();
long total =(end-begin)/1000;
System.out.println("耗时:"+total);
//释放资源
producer.close();
}
public static void main(String[] args) {
testProducer();
}
}
二、Partition(消息生产者选择Topic的partition(分区)的算法)
package com.kafka.test.test1;
import kafka.producer.Partitioner;
import kafka.utils.VerifiableProperties;
/**
* producer按照自己的算法来决定将信息发送到那个partitions(分区)
* @author Administrator
*
*/
public class JavaKafkaProducerPartitioner implements Partitioner {
/**
* 无参构造函数
*/
public JavaKafkaProducerPartitioner() {
this(new VerifiableProperties());
}
/**
* 构造函数,必须给定
*
* @param properties 上下文
*/
public JavaKafkaProducerPartitioner(VerifiableProperties properties) {
// nothings
}
@Override
public int partition(Object key, int numPartitions) {
int num = Integer.valueOf(((String) key).replaceAll("key_", "").trim());
return num % numPartitions;
}
}
三、Consumer(消息消费者)
package com.kafka.test.test1;
import java.io.UnsupportedEncodingException;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import kafka.consumer.ConsumerConfig;
import kafka.consumer.ConsumerIterator;
import kafka.consumer.KafkaStream;
import kafka.javaapi.consumer.ConsumerConnector;
public class Consumers {
private final ConsumerConnector consumer;
private final String topic;
private ExecutorService executor; // 任务具体执行放到一个线程执行中
public Consumers(String a_zookeeper, String a_groupId, String a_topic) {
this.consumer = kafka.consumer.Consumer
.createJavaConsumerConnector(createConsumerConfig(a_zookeeper,
a_groupId));
this.topic = a_topic;
}
private static ConsumerConfig createConsumerConfig(String a_zookeeper,
String a_groupId) {
Properties props = new Properties();
props.put("zookeeper.connect", a_zookeeper); //zookeeper集群地址
props.put("group.id", a_groupId); //消费者组
props.put("zookeeper.session.timeout.ms", "1000"); //zookeeper session的有效期,如果1000之内kafka没有请求则会话失效
props.put("zookeeper.sync.time.ms", "1000"); // ms和zookeeper同步时间
props.put("auto.commit.interval.ms", "1000"); //自动提交数据到zookeeper的时间间隔
props.put("auto.offset.reset", "smallest"); //自动偏移复位
props.put("serializer.class", "kafka.serializer.StringEncoder");
props.put("queued.max.message.chunks", "50"); //最大取多少块缓存到消费者(默认10)
props.put("rebalance.max.retries", "5"); //此值用于控制,注册节点的重试次数
props.put("fetch.min.bytes", "6553600"); //每次feth将得到多条消息,此值为总大小,提升此值,将会消耗更多的consumer端内存
props.put("fetch.wait.max.ms", "5000"); //当消息的尺寸不足时,server阻塞的时间,如果超时,消息将立即发送给consumer
return new ConsumerConfig(props);
}
public void shutdown() {
if (consumer != null)
consumer.shutdown();
if (executor != null)
executor.shutdown();
}
public void run(int a_numThreads) {
//创建并发的consumers
Map<String, Integer> topicCountMap = new HashMap<String, Integer>();
//描述读取哪个topic,需要几个线程读
topicCountMap.put(topic, new Integer(a_numThreads));
//创建Streams
Map<String, List<KafkaStream<byte[], byte[]>>> consumerMap = consumer
.createMessageStreams(topicCountMap);
List<KafkaStream<byte[], byte[]>> streams = consumerMap.get(topic);
System.out.println("streams.size = " + streams.size());
executor = Executors.newFixedThreadPool(a_numThreads);
int threadNumber = 0;
for (final KafkaStream stream : streams) {
executor.submit(new ConsumerTest(stream, threadNumber));
threadNumber++;
}
}
public class ConsumerTest implements Runnable {
private KafkaStream m_stream;
private int m_threadNumber;
public ConsumerTest(KafkaStream a_stream, int a_threadNumber) {
m_threadNumber = a_threadNumber;
m_stream = a_stream;
}
public void run() {
//一个线程一个KafkaStream对象
//一个kafkaStream对象中包含多条消息即Message(consumer可以设置每次读取的数据块的大小,个人感觉也可以理解为一个kafkaStream对象)
//而iterator就是对kafkaStream中的message的迭代
ConsumerIterator<byte[], byte[]> it = m_stream.iterator();
while (it.hasNext()) {
byte [] bb = it.next().message();
try {
String str = new String(bb,"utf-8");
System.out.println("消息:"+str);
} catch (UnsupportedEncodingException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
System.out.println("Shutting down Thread: " + m_threadNumber);
}
}
public static void main(String[] args) {
String zooKeeper = "127.0.0.1:2181,127.0.0.1:2182,127.0.0.1:2183";
String groupId = "group2";
String topic = "test8";
int threads = 3;
Consumers example = new Consumers(zooKeeper, groupId, topic);
example.run(threads);
}
}
server.properties 配置文件,这里配置的不是很全,因为有很多用了默认的配置,只是简单列出来看看而已,具体配置自己按照之前的配置属性列表和业务需求进行修改
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# see kafka.server.KafkaConfig for additional details and defaults
############################# Server Basics #############################
# The id of the broker. This must be set to a unique integer for each broker.
broker.id=0
port=9092
#如果要做集群则host.name是必须有的
host.name=127.0.0.1
# Switch to enable topic deletion or not, default value is false
#delete.topic.enable=true
############################# Socket Server Settings #############################
# The address the socket server listens on. It will get the value returned from
# java.net.InetAddress.getCanonicalHostName() if not configured.
# FORMAT:
# listeners = listener_name://host_name:port
# EXAMPLE:
# listeners = PLAINTEXT://your.host.name:9092
#listeners=PLAINTEXT://:9094
# Hostname and port the broker will advertise to producers and consumers. If not set,
# it uses the value for "listeners" if configured. Otherwise, it will use the value
# returned from java.net.InetAddress.getCanonicalHostName().
#advertised.listeners=PLAINTEXT://your.host.name:9094
# Maps listener names to security protocols, the default is for them to be the same. See the config documentation for more details
#listener.security.protocol.map=PLAINTEXT:PLAINTEXT,SSL:SSL,SASL_PLAINTEXT:SASL_PLAINTEXT,SASL_SSL:SASL_SSL
# The number of threads handling network requests
num.network.threads=3
# The number of threads doing disk I/O
num.io.threads=8
# The send buffer (SO_SNDBUF) used by the socket server
socket.send.buffer.bytes=102400
# The receive buffer (SO_RCVBUF) used by the socket server
socket.receive.buffer.bytes=102400
# The maximum size of a request that the socket server will accept (protection against OOM)
socket.request.max.bytes=104857600
############################# Log Basics #############################
# A comma seperated list of directories under which to store log files
#log.dirs=/tmp/kafka-logs
log.dirs=D:\kafka\kafka_2.10\log
# The default number of log partitions per topic. More partitions allow greater
# parallelism for consumption, but this will also result in more files across
# the brokers.
num.partitions=3
# The number of threads per data directory to be used for log recovery at startup and flushing at shutdown.
# This value is recommended to be increased for installations with data dirs located in RAID array.
num.recovery.threads.per.data.dir=1
############################# Log Flush Policy #############################
# Messages are immediately written to the filesystem but by default we only fsync() to sync
# the OS cache lazily. The following configurations control the flush of data to disk.
# There are a few important trade-offs here:
# 1. Durability: Unflushed data may be lost if you are not using replication.
# 2. Latency: Very large flush intervals may lead to latency spikes when the flush does occur as there will be a lot of data to flush.
# 3. Throughput: The flush is generally the most expensive operation, and a small flush interval may lead to exceessive seeks.
# The settings below allow one to configure the flush policy to flush data after a period of time or
# every N messages (or both). This can be done globally and overridden on a per-topic basis.
# The number of messages to accept before forcing a flush of data to disk
#log.flush.interval.messages=10000
# The maximum amount of time a message can sit in a log before we force a flush
#log.flush.interval.ms=1000
############################# Log Retention Policy #############################
# The following configurations control the disposal of log segments. The policy can
# be set to delete segments after a period of time, or after a given size has accumulated.
# A segment will be deleted whenever *either* of these criteria are met. Deletion always happens
# from the end of the log.
# The minimum age of a log file to be eligible for deletion due to age
log.retention.hours=168
# A size-based retention policy for logs. Segments are pruned from the log as long as the remaining
# segments don't drop below log.retention.bytes. Functions independently of log.retention.hours.
#log.retention.bytes=1073741824f
# The maximum size of a log segment file. When this size is reached a new log segment will be created.
log.segment.bytes=1073741824
# The interval at which log segments are checked to see if they can be deleted according
# to the retention policies
log.retention.check.interval.ms=300000
############################# Zookeeper #############################
# Zookeeper connection string (see zookeeper docs for details).
# This is a comma separated host:port pairs, each corresponding to a zk
# server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002".
# You can also append an optional chroot string to the urls to specify the
# root directory for all kafka znodes.
#zookeeper.connect=127.0.0.1:2181
zookeeper.connect=127.0.0.1:2181,127.0.0.1:2182,127.0.0.1:2183
# Timeout in ms for connecting to zookeeper
zookeeper.connection.timeout.ms=10000
#partiton分区的备份的线程数
num.replica.fetchers=2
简单描述:
这里使用了本地的集群,其中zookeeper使用本地的三个服务做了本机伪集群而kafka也是本地开启三个服务,具体安装就不在这里阐述了,可以看文章开始给的链接!!!