centos06.6+JDK1.7
flume1.4+kafka2.10+storm0.9.3
zookeeper3.4.6
集群:
192.168.80.133 x01
192.168.80.134 x02
1.两台机器上设置hostname和hosts
。。。
![](http://www.kekeyun.org/data/attachment/forum/201504/26/231822rtz797wrqwew734x.jpg)
2.两台机器上安装JDK并设置环境变量
![](http://www.kekeyun.org/data/attachment/forum/201504/26/231822pwgy0vynsttn00t9.jpg)
3.下载安装zookeeper并设置环境变量
- # example sakes.
- dataDir=/data/zookeeper/data
- # the port at which the clients will connect
- clientPort=2181
- # the maximum number of client connections.
- # increase this if you need to handle more clients
- #maxClientCnxns=60
- #
- # Be sure to read the maintenance section of the
- # administrator guide before turning on autopurge.
- #
- # http://zookeeper.apache.org/doc/current/zookeeperAdmin.html#sc_maintenance
- #
- # The number of snapshots to retain in dataDir
- #autopurge.snapRetainCount=3
- # Purge task interval in hours
- # Set to "0" to disable auto purge feature
- #autopurge.purgeInterval=1
-
- server.1=x01:2888:3888
- server.2=x02:2888:3888
zkServer.sh startzkserver.sh status
![](http://www.kekeyun.org/data/attachment/forum/201504/26/231823zqrlhhih55r55l21.jpg)
4.下载安装flume
5.下载安装kafka
6.整合flume和kafka
下载整合插件flumeng-kafka-plugin:
https://github.com/beyondj2ee/flumeng-kafka-plugin
提取插件中的flume-conf.properties,修改后放到kafka的conf目录下
- ############################################
- # producer config
- ###########################################
-
- #agent section
- producer.sources = s
- producer.channels = c
- producer.sinks = r
-
- #source section
- producer.sources.s.type = spooldir
- producer.sources.s.spoolDir=/home/hadoop/testFlume
- producer.sources.s.fileHeader=false
- producer.sources.s.channels = c
-
- # Each sink's type must be defined
- producer.sinks.r.type = org.apache.flume.plugins.KafkaSink
- producer.sinks.r.metadata.broker.list=x01:9092
- producer.sinks.r.partition.key=0
- producer.sinks.r.partitioner.class=org.apache.flume.plugins.SinglePartition
- producer.sinks.r.serializer.class=kafka.serializer.StringEncoder
- producer.sinks.r.request.required.acks=0
- producer.sinks.r.max.message.size=1000000
- producer.sinks.r.producer.type=sync
- producer.sinks.r.custom.encoding=UTF-8
- producer.sinks.r.custom.topic.name=test
-
- #Specify the channel the sink should use
- producer.sinks.r.channel = c
-
- # Each channel's type is defined.
- producer.channels.c.type = memory
- producer.channels.c.capacity = 1000
将Plugin中的jar包拷贝到flume的lib目录中
在/home/hadoop/testFlume中放入文件,在kafka中启用一个console的consumer来测试
bin/flume-ng agent -n producer -c conf -f conf/kafka.conf -Dflume.root.logger=DEBUG,console
bin/kafka-console-consumer.sh --zookeeper x01:2181 --topic test --from-beginning
![](http://www.kekeyun.org/data/attachment/forum/201504/26/231823uiqqdd6d6e9fpyey.jpg)
![](http://www.kekeyun.org/data/attachment/forum/201504/26/231823mz9zel19lr1ww9em.jpg)
测试成功
注意:如果让flume传输中文的话,文件编码最好是UTF-8,否则容易乱码导致flume死掉
7.安装storm
![](http://www.kekeyun.org/data/attachment/forum/201504/26/231823j0nfh5nh80bi6n5h.jpg)
8.整合storm和kafka
- 将kafka的一些jar包复制到storm的lib目录中
-
-
- cp kafka_2.10-0.8.1.1/libs/kafka_2.10-0.8.1.1.jar apache-storm-0.9.3/lib/
- cp kafka_2.10-0.8.1.1/libs/scala-library-2.10.1.jar apache-storm-0.9.3/lib/
- cp kafka_2.10-0.8.1.1/libs/metrics-core-2.2.0.jar apache-storm-0.9.3/lib/
- cp kafka_2.10-0.8.1.1/libs/snappy-java-1.0.5.jar apache-storm-0.9.3/lib/
- cp kafka_2.10-0.8.1.1/libs/zkclient-0.3.jar apache-storm-0.9.3/lib/
- cp kafka_2.10-0.8.1.1/libs/log4j-1.2.15.jar apache-storm-0.9.3/lib/
- cp kafka_2.10-0.8.1.1/libs/slf4j-api-1.7.2.jar apache-storm-0.9.3/lib/
- cp kafka_2.10-0.8.1.1/libs/jopt-simple-3.2.jar apache-storm-0.9.3/lib/
-
-
-
- 把zookeeper的zookeeper-3.4.6.jar复制到storm的lib目录中
-
- cp zookeeper-3.4.6/zookeeper-3.4.6.jar apache-storm-0.9.3/lib/
-
- 编写storm程序来测试
-
- pom.xml
-
-
- <dependencies>
- <dependency>
- <groupId>junit</groupId>
- <artifactId>junit</artifactId>
- <version>3.8.1</version>
- <scope>test</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.storm</groupId>
- <artifactId>storm-core</artifactId>
- <version>0.9.3</version>
- </dependency>
- <dependency>
- <groupId>org.apache.kafka</groupId>
- <artifactId>kafka_2.10</artifactId>
- <version>0.8.1.1</version>
- <exclusions>
- <exclusion>
- <groupId>org.apache.zookeeper</groupId>
- <artifactId>zookeeper</artifactId>
- </exclusion>
- <exclusion>
- <groupId>log4j</groupId>
- <artifactId>log4j</artifactId>
- </exclusion>
- </exclusions>
- </dependency>
- </dependencies>
-
-
-
- spout
-
-
- package org.admln.flume_kafka_storm;
-
- import java.util.HashMap;
- import java.util.List;
- import java.util.Map;
- import java.util.Properties;
-
- import kafka.consumer.ConsumerConfig;
- import kafka.consumer.ConsumerIterator;
- import kafka.consumer.KafkaStream;
- import kafka.javaapi.consumer.ConsumerConnector;
- import backtype.storm.spout.SpoutOutputCollector;
- import backtype.storm.task.TopologyContext;
- import backtype.storm.topology.OutputFieldsDeclarer;
- import backtype.storm.topology.base.BaseRichSpout;
- import backtype.storm.tuple.Fields;
- import backtype.storm.tuple.Values;
-
- public class KafkaSpout extends BaseRichSpout {
-
- private static final long serialVersionUID = -9174998944310422274L;
- private SpoutOutputCollector collector;
- private ConsumerConnector consumer;
- private String topic;
-
- public KafkaSpout() {}
-
- public KafkaSpout(String topic) {
- this.topic = topic;
- }
-
- public void nextTuple() { }
-
- public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) {
- this.collector = collector;
- }
-
- public void ack(Object msgId) { }
-
- public void activate() {
- consumer =kafka.consumer.Consumer.createJavaConsumerConnector(createConsumerConfig());
- Map<String,Integer> topickMap = new HashMap<String, Integer>();
- topickMap.put(topic, 1);
-
- System.out.println("*********Results********topic:"+topic);
-
- Map<String, List<KafkaStream<byte[],byte[]>>> streamMap=consumer.createMessageStreams(topickMap);
- KafkaStream<byte[],byte[]>stream = streamMap.get(topic).get(0);
- ConsumerIterator<byte[],byte[]> it =stream.iterator();
- while(it.hasNext()){
- String value =new String(it.next().message());
- System.out.println("storm接收到来自kafka的消息------->" + value);
- collector.emit(new Values(value), value);
- }
- }
-
- private static ConsumerConfig createConsumerConfig() {
- Properties props = new Properties();
- // 设置zookeeper的链接地址
- props.put("zookeeper.connect","x01:2181,x02:2181");
- // 设置group id
- props.put("group.id", "1");
- // kafka的group 消费记录是保存在zookeeper上的, 但这个信息在zookeeper上不是实时更新的, 需要有个间隔时间更新
- props.put("auto.commit.interval.ms", "1000");
- props.put("zookeeper.session.timeout.ms","10000");
- return new ConsumerConfig(props);
- }
-
- public void close() { }
-
- public void deactivate() { }
-
- public void fail(Object msgId) { }
-
- public void declareOutputFields(OutputFieldsDeclarer declarer) {
- declarer.declare(new Fields("word"));
- }
-
- public Map<String, Object> getComponentConfiguration() {
- System.out.println("getComponentConfiguration被调用");
- topic="test";
- return null;
- }
- }
-
-
-
- bolt(wordsplitter)
-
-
- package org.admln.flume_kafka_storm;
-
- import java.util.Map;
-
- import backtype.storm.task.OutputCollector;
- import backtype.storm.task.TopologyContext;
- import backtype.storm.topology.OutputFieldsDeclarer;
- import backtype.storm.topology.base.BaseRichBolt;
- import backtype.storm.tuple.Fields;
- import backtype.storm.tuple.Tuple;
- import backtype.storm.tuple.Values;
-
- public class KafkaWordSplitterBolt extends BaseRichBolt {
-
- private static final long serialVersionUID = 886149197481637894L;
- private OutputCollector collector;
-
- public void prepare(Map stormConf, TopologyContext context,
- OutputCollector collector) {
- this.collector = collector;
- }
-
- public void execute(Tuple input) {
- String line = input.getString(0);
- String[] words = line.split(",");
- for(String word : words) { //这里除了提交一个传向下个bolt的list集,还把tuple提交了,这是collector的emit方法之一,为了下面的ack错误恢复
- collector.emit(input, new Values(word, 1));
- }
- collector.ack(input);
- }
-
- public void declareOutputFields(OutputFieldsDeclarer declarer) {
- declarer.declare(new Fields("word", "count"));
- }
- }
-
-
-
- bolt(wordcount)
-
-
- package org.admln.flume_kafka_storm;
-
- import java.util.HashMap;
- import java.util.Iterator;
- import java.util.Map;
- import java.util.Map.Entry;
- import java.util.concurrent.atomic.AtomicInteger;
-
- import backtype.storm.task.OutputCollector;
- import backtype.storm.task.TopologyContext;
- import backtype.storm.topology.OutputFieldsDeclarer;
- import backtype.storm.topology.base.BaseRichBolt;
- import backtype.storm.tuple.Fields;
- import backtype.storm.tuple.Tuple;
-
- public class KafkaWordCounterBolt extends BaseRichBolt {
- private static final long serialVersionUID = 886149197481637894L;
- private OutputCollector collector;
- private Map<String, AtomicInteger> counterMap;
-
- public void prepare(Map stormConf, TopologyContext context,
- OutputCollector collector) {
- this.collector = collector;
- this.counterMap = new HashMap<String, AtomicInteger>();
- }
-
- public void execute(Tuple input) {
- String word = input.getString(0);
- int count = input.getInteger(1);
- AtomicInteger ai = this.counterMap.get(word);
- if (ai == null) {
- ai = new AtomicInteger();
- this.counterMap.put(word, ai);
- }
- ai.addAndGet(count);
- collector.ack(input);
- }
-
- public void cleanup() {
- Iterator<Entry<String, AtomicInteger>> iter = this.counterMap
- .entrySet().iterator();
- while (iter.hasNext()) {
- Entry<String, AtomicInteger> entry = iter.next();
- System.out.println(entry.getKey() + "\t:\t" + entry.getValue().get());
- }
-
- }
-
- public void declareOutputFields(OutputFieldsDeclarer declarer) {
- declarer.declare(new Fields("word", "count"));
- }
- }
-
-
-
- topology
-
-
- package org.admln.flume_kafka_storm;
-
- import java.util.HashMap;
- import java.util.Map;
-
- import backtype.storm.Config;
- import backtype.storm.LocalCluster;
- import backtype.storm.StormSubmitter;
- import backtype.storm.generated.AlreadyAliveException;
- import backtype.storm.generated.InvalidTopologyException;
- import backtype.storm.topology.TopologyBuilder;
- import backtype.storm.tuple.Fields;
-
- public class KafkaTopology {
-
- public static void main(String[] args) throws AlreadyAliveException,
- InvalidTopologyException {
- TopologyBuilder builder = new TopologyBuilder();
- builder.setSpout("spout", new KafkaSpout(""), 1); //bolt1 是此bolt在这个图中的ID //2表示启用多少线程来运行,可以省略,省略的话则默认分配一个线程
- builder.setBolt("bolt1", new KafkaWordSplitterBolt(), 2)
- .shuffleGrouping("spout");
- builder.setBolt("bolt2", new KafkaWordCounterBolt(), 2).fieldsGrouping(
- "bolt1", new Fields("word"));
- String name = KafkaTopology.class.getSimpleName();
- if (args != null && args.length > 0) {
- Config conf = new Config();
- // 通过指定nimbus主机
- conf.put(Config.NIMBUS_HOST, args[0]);
- conf.setNumWorkers(2);
- StormSubmitter.submitTopologyWithProgressBar(name, conf,
- builder.createTopology());
- } else {
- Map conf = new HashMap();
- conf.put(Config.TOPOLOGY_WORKERS, 1);
- conf.put(Config.TOPOLOGY_DEBUG, true);
- LocalCluster cluster = new LocalCluster();
- cluster.submitTopology("my-flume-kafka-storm-topology-integration",
- conf, builder.createTopology());
- }
- }
- }
-
-
-
- 可以直接在eclipse中本地运行也可以放到集群上运行
-
- 集群上
-
- bin/storm jar flume-kafka-storm.jar org.admln.flume_kafka_storm.KafkaToplology x01
-