flume+kafka+storm

centos06.6+JDK1.7
flume1.4+kafka2.10+storm0.9.3
zookeeper3.4.6

集群:
192.168.80.133 x01
192.168.80.134 x02

1.两台机器上设置hostname和hosts
。。。
2.两台机器上安装JDK并设置环境变量
3.下载安装zookeeper并设置环境变量

  1. # example sakes.
  2. dataDir=/data/zookeeper/data
  3. # the port at which the clients will connect
  4. clientPort=2181
  5. # the maximum number of client connections.
  6. # increase this if you need to handle more clients
  7. #maxClientCnxns=60
  8. #
  9. # Be sure to read the maintenance section of the
  10. # administrator guide before turning on autopurge.
  11. #
  12. # http://zookeeper.apache.org/doc/current/zookeeperAdmin.html#sc_maintenance
  13. #
  14. # The number of snapshots to retain in dataDir
  15. #autopurge.snapRetainCount=3
  16. # Purge task interval in hours
  17. # Set to "0" to disable auto purge feature
  18. #autopurge.purgeInterval=1

  19. server.1=x01:2888:3888
  20. server.2=x02:2888:3888
复制代码

zkServer.sh startzkserver.sh status
4.下载安装flume
5.下载安装kafka
6.整合flume和kafka
下载整合插件flumeng-kafka-plugin: https://github.com/beyondj2ee/flumeng-kafka-plugin
提取插件中的flume-conf.properties,修改后放到kafka的conf目录下

  1. ############################################
  2. #  producer config
  3. ###########################################

  4. #agent section
  5. producer.sources = s
  6. producer.channels = c
  7. producer.sinks = r

  8. #source section
  9. producer.sources.s.type = spooldir
  10. producer.sources.s.spoolDir=/home/hadoop/testFlume
  11. producer.sources.s.fileHeader=false
  12. producer.sources.s.channels = c

  13. # Each sink's type must be defined
  14. producer.sinks.r.type = org.apache.flume.plugins.KafkaSink
  15. producer.sinks.r.metadata.broker.list=x01:9092
  16. producer.sinks.r.partition.key=0
  17. producer.sinks.r.partitioner.class=org.apache.flume.plugins.SinglePartition
  18. producer.sinks.r.serializer.class=kafka.serializer.StringEncoder
  19. producer.sinks.r.request.required.acks=0
  20. producer.sinks.r.max.message.size=1000000
  21. producer.sinks.r.producer.type=sync
  22. producer.sinks.r.custom.encoding=UTF-8
  23. producer.sinks.r.custom.topic.name=test

  24. #Specify the channel the sink should use
  25. producer.sinks.r.channel = c

  26. # Each channel's type is defined.
  27. producer.channels.c.type = memory
  28. producer.channels.c.capacity = 1000
复制代码


将Plugin中的jar包拷贝到flume的lib目录中
在/home/hadoop/testFlume中放入文件,在kafka中启用一个console的consumer来测试
bin/flume-ng agent -n producer -c conf -f conf/kafka.conf -Dflume.root.logger=DEBUG,console
bin/kafka-console-consumer.sh --zookeeper x01:2181 --topic test --from-beginning
测试成功
注意:如果让flume传输中文的话,文件编码最好是UTF-8,否则容易乱码导致flume死掉
7.安装storm
8.整合storm和kafka

  1. 将kafka的一些jar包复制到storm的lib目录中


  2. cp kafka_2.10-0.8.1.1/libs/kafka_2.10-0.8.1.1.jar apache-storm-0.9.3/lib/
  3. cp kafka_2.10-0.8.1.1/libs/scala-library-2.10.1.jar apache-storm-0.9.3/lib/
  4. cp kafka_2.10-0.8.1.1/libs/metrics-core-2.2.0.jar apache-storm-0.9.3/lib/
  5. cp kafka_2.10-0.8.1.1/libs/snappy-java-1.0.5.jar apache-storm-0.9.3/lib/
  6. cp kafka_2.10-0.8.1.1/libs/zkclient-0.3.jar apache-storm-0.9.3/lib/
  7. cp kafka_2.10-0.8.1.1/libs/log4j-1.2.15.jar apache-storm-0.9.3/lib/
  8. cp kafka_2.10-0.8.1.1/libs/slf4j-api-1.7.2.jar apache-storm-0.9.3/lib/
  9. cp kafka_2.10-0.8.1.1/libs/jopt-simple-3.2.jar apache-storm-0.9.3/lib/



  10. 把zookeeper的zookeeper-3.4.6.jar复制到storm的lib目录中

  11. cp zookeeper-3.4.6/zookeeper-3.4.6.jar apache-storm-0.9.3/lib/

  12. 编写storm程序来测试

  13. pom.xml


  14. <dependencies>
  15.     <dependency>
  16.             <groupId>junit</groupId>
  17.             <artifactId>junit</artifactId>
  18.             <version>3.8.1</version>
  19.             <scope>test</scope>
  20.         </dependency>
  21.         <dependency>
  22.             <groupId>org.apache.storm</groupId>
  23.             <artifactId>storm-core</artifactId>
  24.             <version>0.9.3</version>
  25.         </dependency>
  26.         <dependency>
  27.             <groupId>org.apache.kafka</groupId>
  28.             <artifactId>kafka_2.10</artifactId>
  29.             <version>0.8.1.1</version>
  30.             <exclusions>
  31.                 <exclusion>
  32.                     <groupId>org.apache.zookeeper</groupId>
  33.                     <artifactId>zookeeper</artifactId>
  34.                 </exclusion>
  35.                 <exclusion>
  36.                     <groupId>log4j</groupId>
  37.                     <artifactId>log4j</artifactId>
  38.                 </exclusion>
  39.             </exclusions>
  40.         </dependency>
  41.   </dependencies>



  42. spout


  43. package org.admln.flume_kafka_storm;

  44. import java.util.HashMap;
  45. import java.util.List;
  46. import java.util.Map;
  47. import java.util.Properties;

  48. import kafka.consumer.ConsumerConfig;
  49. import kafka.consumer.ConsumerIterator;
  50. import kafka.consumer.KafkaStream;
  51. import kafka.javaapi.consumer.ConsumerConnector;
  52. import backtype.storm.spout.SpoutOutputCollector;
  53. import backtype.storm.task.TopologyContext;
  54. import backtype.storm.topology.OutputFieldsDeclarer;
  55. import backtype.storm.topology.base.BaseRichSpout;
  56. import backtype.storm.tuple.Fields;
  57. import backtype.storm.tuple.Values;

  58. public class KafkaSpout extends BaseRichSpout {
  59.     
  60.     private static final long serialVersionUID = -9174998944310422274L;
  61.     private SpoutOutputCollector collector;
  62.     private ConsumerConnector consumer;
  63.     private String topic;

  64.     public KafkaSpout() {}
  65.      
  66.     public KafkaSpout(String topic) {
  67.         this.topic = topic;
  68.     }

  69.     public void nextTuple() {    }

  70.     public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) {
  71.         this.collector = collector;
  72.     }

  73.     public void ack(Object msgId) {    }

  74.     public void activate() {         
  75.         consumer =kafka.consumer.Consumer.createJavaConsumerConnector(createConsumerConfig()); 
  76.         Map<String,Integer> topickMap = new HashMap<String, Integer>();  
  77.         topickMap.put(topic, 1);  

  78.         System.out.println("*********Results********topic:"+topic);  

  79.         Map<String, List<KafkaStream<byte[],byte[]>>>  streamMap=consumer.createMessageStreams(topickMap);  
  80.         KafkaStream<byte[],byte[]>stream = streamMap.get(topic).get(0);  
  81.         ConsumerIterator<byte[],byte[]> it =stream.iterator();   
  82.         while(it.hasNext()){  
  83.              String value =new String(it.next().message());
  84.              System.out.println("storm接收到来自kafka的消息------->" + value);
  85.              collector.emit(new Values(value), value);
  86.         }  
  87.     }
  88.      
  89.     private static ConsumerConfig createConsumerConfig() {  
  90.         Properties props = new Properties();  
  91.         // 设置zookeeper的链接地址
  92.         props.put("zookeeper.connect","x01:2181,x02:2181");  
  93.         // 设置group id
  94.         props.put("group.id", "1");  
  95.         // kafka的group 消费记录是保存在zookeeper上的, 但这个信息在zookeeper上不是实时更新的, 需要有个间隔时间更新
  96.         props.put("auto.commit.interval.ms", "1000");
  97.         props.put("zookeeper.session.timeout.ms","10000");  
  98.         return new ConsumerConfig(props);  
  99.     }  

  100.     public void close() {    }

  101.     public void deactivate() {    }

  102.     public void fail(Object msgId) {    }

  103.     public void declareOutputFields(OutputFieldsDeclarer declarer) {
  104.         declarer.declare(new Fields("word"));
  105.     }

  106.     public Map<String, Object> getComponentConfiguration() {
  107.         System.out.println("getComponentConfiguration被调用");
  108.         topic="test";
  109.         return null;
  110.     }
  111. }



  112. bolt(wordsplitter)


  113. package org.admln.flume_kafka_storm;

  114. import java.util.Map;

  115. import backtype.storm.task.OutputCollector;
  116. import backtype.storm.task.TopologyContext;
  117. import backtype.storm.topology.OutputFieldsDeclarer;
  118. import backtype.storm.topology.base.BaseRichBolt;
  119. import backtype.storm.tuple.Fields;
  120. import backtype.storm.tuple.Tuple;
  121. import backtype.storm.tuple.Values;

  122. public class KafkaWordSplitterBolt extends BaseRichBolt {

  123.     private static final long serialVersionUID = 886149197481637894L;
  124.     private OutputCollector collector;
  125.    
  126.     public void prepare(Map stormConf, TopologyContext context,
  127.               OutputCollector collector) {
  128.          this.collector = collector;              
  129.     }

  130.     public void execute(Tuple input) {
  131.          String line = input.getString(0);
  132.          String[] words = line.split(",");
  133.          for(String word : words) {         //这里除了提交一个传向下个bolt的list集,还把tuple提交了,这是collector的emit方法之一,为了下面的ack错误恢复
  134.               collector.emit(input, new Values(word, 1));
  135.          }
  136.          collector.ack(input);
  137.     }

  138.     public void declareOutputFields(OutputFieldsDeclarer declarer) {
  139.          declarer.declare(new Fields("word", "count"));         
  140.     }
  141. }



  142. bolt(wordcount)


  143. package org.admln.flume_kafka_storm;

  144. import java.util.HashMap;
  145. import java.util.Iterator;
  146. import java.util.Map;
  147. import java.util.Map.Entry;
  148. import java.util.concurrent.atomic.AtomicInteger;

  149. import backtype.storm.task.OutputCollector;
  150. import backtype.storm.task.TopologyContext;
  151. import backtype.storm.topology.OutputFieldsDeclarer;
  152. import backtype.storm.topology.base.BaseRichBolt;
  153. import backtype.storm.tuple.Fields;
  154. import backtype.storm.tuple.Tuple;

  155. public class KafkaWordCounterBolt extends BaseRichBolt {
  156.     private static final long serialVersionUID = 886149197481637894L;
  157.     private OutputCollector collector;
  158.     private Map<String, AtomicInteger> counterMap;

  159.     public void prepare(Map stormConf, TopologyContext context,
  160.             OutputCollector collector) {
  161.         this.collector = collector;
  162.         this.counterMap = new HashMap<String, AtomicInteger>();
  163.     }

  164.     public void execute(Tuple input) {
  165.         String word = input.getString(0);
  166.         int count = input.getInteger(1);
  167.         AtomicInteger ai = this.counterMap.get(word);
  168.         if (ai == null) {
  169.             ai = new AtomicInteger();
  170.             this.counterMap.put(word, ai);
  171.         }
  172.         ai.addAndGet(count);
  173.         collector.ack(input);
  174.     }

  175.     public void cleanup() {
  176.         Iterator<Entry<String, AtomicInteger>> iter = this.counterMap
  177.                 .entrySet().iterator();
  178.         while (iter.hasNext()) {
  179.             Entry<String, AtomicInteger> entry = iter.next();
  180.             System.out.println(entry.getKey() + "\t:\t" + entry.getValue().get());
  181.         }

  182.     }

  183.     public void declareOutputFields(OutputFieldsDeclarer declarer) {
  184.         declarer.declare(new Fields("word", "count"));
  185.     }
  186. }



  187. topology


  188. package org.admln.flume_kafka_storm;

  189. import java.util.HashMap;
  190. import java.util.Map;

  191. import backtype.storm.Config;
  192. import backtype.storm.LocalCluster;
  193. import backtype.storm.StormSubmitter;
  194. import backtype.storm.generated.AlreadyAliveException;
  195. import backtype.storm.generated.InvalidTopologyException;
  196. import backtype.storm.topology.TopologyBuilder;
  197. import backtype.storm.tuple.Fields;

  198. public class KafkaTopology {

  199.     public static void main(String[] args) throws AlreadyAliveException,
  200.             InvalidTopologyException {
  201.         TopologyBuilder builder = new TopologyBuilder();
  202.         builder.setSpout("spout", new KafkaSpout(""), 1);     //bolt1 是此bolt在这个图中的ID     //2表示启用多少线程来运行,可以省略,省略的话则默认分配一个线程
  203.         builder.setBolt("bolt1", new KafkaWordSplitterBolt(), 2)
  204.                 .shuffleGrouping("spout");
  205.         builder.setBolt("bolt2", new KafkaWordCounterBolt(), 2).fieldsGrouping(
  206.                 "bolt1", new Fields("word"));
  207.         String name = KafkaTopology.class.getSimpleName();
  208.         if (args != null && args.length > 0) {
  209.             Config conf = new Config();
  210.             // 通过指定nimbus主机
  211.             conf.put(Config.NIMBUS_HOST, args[0]);
  212.             conf.setNumWorkers(2);
  213.             StormSubmitter.submitTopologyWithProgressBar(name, conf,
  214.                     builder.createTopology());
  215.         } else {
  216.             Map conf = new HashMap();
  217.             conf.put(Config.TOPOLOGY_WORKERS, 1);
  218.             conf.put(Config.TOPOLOGY_DEBUG, true);
  219.             LocalCluster cluster = new LocalCluster();
  220.             cluster.submitTopology("my-flume-kafka-storm-topology-integration",
  221.                     conf, builder.createTopology());
  222.         }
  223.     }
  224. }



  225. 可以直接在eclipse中本地运行也可以放到集群上运行

  226. 集群上

  227. bin/storm jar flume-kafka-storm.jar org.admln.flume_kafka_storm.KafkaToplology x01

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值