WINDOWS本机环境下搭建KAFKA+STORM
安装工具准备
JDK1.8
具体下载及安装方法可参考博主前一篇博文
kafka0.9.0.1
同上,可参考上一篇
storm1.0.2
https://storm.apache.org/downloads.html
下载之后解压即可
python2.7
官网下载即可
以上的版本搭配不是唯一方案(只是博主本人体验可以通过的方案之一)
JDK一定要下载版本1.7及以上(这是storm对JDK的版本要求)
kafka安装包内自带zookeeper,所以此处不需要再安装zookeeper
注意点:所有的安装文件夹名称最好不要带空格,避免后续的在启动命令过程中对于文件夹名称的报错处理
kafka搭建及测试
搭建
解压到当前文件夹即可,具体过程参照上一篇博文,不需要过多的配置
测试
举例:解压文件夹为kafka0.9.0.1(以下命令行位置均为该文件夹下)
开启zookeeper服务(对于kafka的支撑管理,必须先开启zookeeper服务才能后续开启kafka以及storm等服务)
bin\windows\zookeeper-server-start.bat config\zookeeper.properties
开启kafka服务
bin\windows\kafka-server-start.bat config\server.properties
创建一个消费者
bin\windows\kafka-console-consumer.bat --zookeeper localhost:2181 --topic test --from-beginning
创建一个生产者者
bin\windows\kafka-console-producer.bat --broker-list localhost:9092 --topic test
到此,producer和consumer能实现发消息和收消息说明kafka搭建成功
图中comsumer比producer多一个new是因为我之前用这个topic发过一个测试消息,而我consumer时候的设置参数是–from-beginning所以连同之前所有的消息一起接收了
storm搭建及测试
搭建
storm的搭建我是参考的这篇博主的
https://github.com/BVLC/caffe/wiki/Model-Zoo
(其实主要也就参考了一下python的各种设置,STORM_HOME的设置,storm的安装其实很简单)
测试
举例:解压文件夹为storm1.0.2(以下命令均在解压文件夹\bin下完成)
启动zookeeper
启动nimbus,supervisor以及ui
storm nimbus
storm supervisor
storm ui
浏览器地址栏输入localhost:8080能看到STORM UI界面即为成功
kafka+storm连接及测试
(emmm本人JAVA新手入坑,有些东西理解表达解释的不对可以联系我大家交流交流)
eclipse下新建项目,及四个.java文件,这四个文件分别表示kafka producer , kafka consumer , storm spout 以及 storm bolt 借此展示完整的数据从producer到被storm接收后并且处理输出
KafkaProducer.java
KafkaConsumer.java
LogProcessTopology.java ----其中包含kafkaspout组件负责从kafka接收数据,并包含简单的topology搭建代码
LogProcessBolt.java ----bolt的逻辑处理代码,本代码只负责简单演示如何从kafka中取到数据,因此并无处理只是从input中接收并展示数据
KafkaProducer.java
package example.Demo;
import java.util.Properties;
import java.util.Random;
import org.apache.kafka.clients.producer.Producer;
import org.apache.kafka.clients.producer.ProducerRecord;
public class KafkaProducer {
public static void main(String[] args) throws Exception {
Properties props = new Properties();
props.put("bootstrap.servers","localhost:9092");
props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");
props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer");
String topic = "gold";
Producer<String, String> producer = new org.apache.kafka.clients.producer.KafkaProducer<String, String>(props);
int count = 0;
//{"id":"865456863256326","vid":"1495267869123456","uid":"965406863256326","gold":150,"platform":"ios","timestamp":1495267869}
//模拟送礼人id
String[] idArr = {"865456863256320","865456863256321","865456863256322","865456863256323","865456863256324","865456863256325","865456863256326","865456863256327","865456863256328","865456863256329"};
//模拟直播间视频id
String[] vidArr = {"1495267869123450","1495267869123451","1495267869123452","1495267869123453","1495267869123454"};
//模拟直播用户id
String[] uidArr = {"964226522333220","964226522333221","964226522333222","964226522333223","964226522333224"};
//模拟用户手机平台
String[] platformArr = {"android","ios"};
Random random = new Random();
while(true){
int rint1 = random.nextInt(10);
int rint2 = random.nextInt(5);
int rint3 = random.nextInt(2);
String log = "{\"id\":\""+idArr[rint1]+"\",\"vid\":\""+vidArr[rint2]+"\",\"uid\":\""+uidArr[rint2]+"\",\"gold\":"+random.nextInt(10000)+",\"platform\":\""+platformArr[rint3]+"\",\"timestamp\":"+System.currentTimeMillis()+"}";
producer.send(new ProducerRecord<String, String>(topic, log));
System.out.println(log);
count++;
Thread.sleep(1000*5);
if(count%10 == 0){
//break;
Thread.sleep(1000*60);
}
}
}
}
KafkaConsumer.java
package example.Demo;
import java.util.ArrayList;
import java.util.Properties;
import org.apache.kafka.clients.consumer.Consumer;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.ConsumerRecords;
public class KafkaConsumer {
public static void main(String[] args) {
Properties props = new Properties();
props.put("bootstrap.servers", "localhost:9092");
props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
props.setProperty("group.id", "1");
String topic = "gold";
Consumer<String, String> consumer = new org.apache.kafka.clients.consumer.KafkaConsumer<String, String>(props);
ArrayList<String> topicList = new ArrayList<String>();
topicList.add(topic);
consumer.subscribe(topicList);
while(true){
ConsumerRecords<String, String> records = consumer.poll(1);
for (ConsumerRecord<String, String> record : records) {
System.out.println(record);
}
}
}
}
LogProcessTopology.java
package example.Demo;
import java.util.Map;
import org.apache.storm.Config;
import org.apache.storm.LocalCluster;
import org.apache.storm.StormSubmitter;
import org.apache.storm.generated.AlreadyAliveException;
import org.apache.storm.generated.AuthorizationException;
import org.apache.storm.generated.InvalidTopologyException;
import org.apache.storm.generated.StormTopology;
import org.apache.storm.kafka.BrokerHosts;
import org.apache.storm.kafka.KafkaSpout;
import org.apache.storm.kafka.SpoutConfig;
import org.apache.storm.kafka.StringScheme;
import org.apache.storm.kafka.ZkHosts;
import org.apache.storm.spout.SchemeAsMultiScheme;
import org.apache.storm.spout.SpoutOutputCollector;
import org.apache.storm.task.OutputCollector;
import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.topology.TopologyBuilder;
import org.apache.storm.topology.base.BaseRichBolt;
import org.apache.storm.topology.base.BaseRichSpout;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Tuple;
import org.apache.storm.tuple.Values;
import org.apache.storm.utils.Utils;
import com.storm0709.stormProject0709.bolt.LogProcessBolt1;
import com.storm0709.stormProject0709.bolt.LogProcessBolt2;
import com.storm0709.stormProject0709.bolt.ParseLogBolt;
public class LogProcessTopology {
public static void main(String[] args) {
TopologyBuilder topologyBuilder = new TopologyBuilder();
//链接kafka的spout
BrokerHosts hosts = new ZkHosts("localhost:2181"); //ZK地址
String topic = "gold";
String zkRoot = "/kafkaspout0709";//会在zk中创建这个节点,再节点里面保存消费信息
//用于保存消费者的偏离值
String id = "consumer123456";//groupid spout的唯一标志
SpoutConfig spoutConf = new SpoutConfig(hosts, topic, zkRoot, id);
spoutConf.scheme = new SchemeAsMultiScheme(new StringScheme());//表示spout发送数据的格式
//第一次启动的时候在zk中并没有保存消费的位置信息,可以这样设置LatestTime,表示次用kafka topic的最新数据开始消费
// EarliestTime() 表示从这个topic的最老的数据开始消费
spoutConf.startOffsetTime = kafka.api.OffsetRequest.LatestTime();
//spoutConf.startOffsetTime = kafka.api.OffsetRequest.EarliestTime();
String SPOUT_ID = KafkaSpout.class.getSimpleName();
String BOLT_ID_1 = ParseLogBolt.class.getSimpleName();
//调忧参数
topologyBuilder.setSpout(SPOUT_ID, new KafkaSpout(spoutConf));
topologyBuilder.setBolt(BOLT_ID_1, new ParseLogBolt()).shuffleGrouping(SPOUT_ID);
StormTopology createTopology = topologyBuilder.createTopology();
String topology_name = LogProcessTopology.class.getSimpleName();
Config config = new Config();
if(args.length==0){
//创建本地集群
LocalCluster localCluster = new LocalCluster();
localCluster.submitTopology(topology_name, config, createTopology);
}else{
try {
StormSubmitter.submitTopology(topology_name, config, createTopology);
} catch (AlreadyAliveException e) {
e.printStackTrace();
} catch (InvalidTopologyException e) {
e.printStackTrace();
} catch (AuthorizationException e) {
e.printStackTrace();
}
}
}
}
LogProcessBolt.java
package example.Demo;
import java.util.HashMap;
import java.util.Map;
import org.apache.storm.Config;
import org.apache.storm.Constants;
import org.apache.storm.task.OutputCollector;
import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.topology.base.BaseRichBolt;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Tuple;
import org.apache.storm.tuple.Values;
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONObject;
public class ParseLogBolt extends BaseRichBolt {
private OutputCollector collector;
private Map<String, String> idCountryMap;
@Override
public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
this.collector = collector;
// 在这吧全量的用户信息加载过来
// 在初始化的时候从redis中把送礼人id和省份信息加载过来,后期在storm的定时任务中每半个小时同步一次,把新注册用户的信息拉取过来
}
@Override
public void execute(Tuple input) {
System.out.println("6..be here~"); String log =
input.getStringByField("str"); System.out.println("7..be here~");
System.out.println(log); this.collector.ack(input);
}
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields("province", "uid", "gold"));
}
@Override
public Map<String, Object> getComponentConfiguration() {
HashMap<String, Object> hashMap = new HashMap<String, Object>();
hashMap.put(Config.TOPOLOGY_TICK_TUPLE_FREQ_SECS, 30 * 60);
return hashMap;
}
}
在eclipse中启动上述1-3java文件
可以看到kafka producer在源源不断的发送文件
并且在storm中可以接收到文件(需要的逻辑处理功能可以在bolt中继续修改)
至此,KAFKA+STORM的本地搭建及流程跑通已完成