WINDOWS本机环境下搭建KAFKA+STORM

安装工具准备

JDK1.8
具体下载及安装方法可参考博主前一篇博文
kafka0.9.0.1
同上,可参考上一篇
storm1.0.2
https://storm.apache.org/downloads.html
下载之后解压即可
python2.7
官网下载即可

以上的版本搭配不是唯一方案(只是博主本人体验可以通过的方案之一)
JDK一定要下载版本1.7及以上(这是storm对JDK的版本要求)
kafka安装包内自带zookeeper,所以此处不需要再安装zookeeper
注意点:所有的安装文件夹名称最好不要带空格,避免后续的在启动命令过程中对于文件夹名称的报错处理

kafka搭建及测试

搭建

解压到当前文件夹即可,具体过程参照上一篇博文,不需要过多的配置

测试

举例:解压文件夹为kafka0.9.0.1(以下命令行位置均为该文件夹下)

开启zookeeper服务(对于kafka的支撑管理,必须先开启zookeeper服务才能后续开启kafka以及storm等服务)
bin\windows\zookeeper-server-start.bat config\zookeeper.properties

在这里插入图片描述

开启kafka服务
bin\windows\kafka-server-start.bat config\server.properties

在这里插入图片描述

创建一个消费者
bin\windows\kafka-console-consumer.bat --zookeeper localhost:2181 --topic test --from-beginning

在这里插入图片描述

创建一个生产者者
bin\windows\kafka-console-producer.bat --broker-list localhost:9092 --topic test

在这里插入图片描述

到此,producer和consumer能实现发消息和收消息说明kafka搭建成功
图中comsumer比producer多一个new是因为我之前用这个topic发过一个测试消息,而我consumer时候的设置参数是–from-beginning所以连同之前所有的消息一起接收了

storm搭建及测试

搭建

storm的搭建我是参考的这篇博主的
https://github.com/BVLC/caffe/wiki/Model-Zoo
(其实主要也就参考了一下python的各种设置,STORM_HOME的设置,storm的安装其实很简单)

测试

举例:解压文件夹为storm1.0.2(以下命令均在解压文件夹\bin下完成)

启动zookeeper

启动nimbus,supervisor以及ui
storm nimbus
storm supervisor
storm ui

在这里插入图片描述

浏览器地址栏输入localhost:8080能看到STORM UI界面即为成功

kafka+storm连接及测试

(emmm本人JAVA新手入坑,有些东西理解表达解释的不对可以联系我大家交流交流)
eclipse下新建项目,及四个.java文件,这四个文件分别表示kafka producer , kafka consumer , storm spout 以及 storm bolt 借此展示完整的数据从producer到被storm接收后并且处理输出
KafkaProducer.java
KafkaConsumer.java
LogProcessTopology.java ----其中包含kafkaspout组件负责从kafka接收数据,并包含简单的topology搭建代码
LogProcessBolt.java ----bolt的逻辑处理代码,本代码只负责简单演示如何从kafka中取到数据,因此并无处理只是从input中接收并展示数据

KafkaProducer.java

package example.Demo;

import java.util.Properties;
import java.util.Random;

import org.apache.kafka.clients.producer.Producer;
import org.apache.kafka.clients.producer.ProducerRecord;

public class KafkaProducer {
	
	public static void main(String[] args) throws Exception {
		Properties props = new Properties();  
		props.put("bootstrap.servers","localhost:9092");
		props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");  
		props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer");  
		String topic = "gold";
		
		Producer<String, String> producer = new org.apache.kafka.clients.producer.KafkaProducer<String, String>(props);  
		int count = 0;
		//{"id":"865456863256326","vid":"1495267869123456","uid":"965406863256326","gold":150,"platform":"ios","timestamp":1495267869}
		//模拟送礼人id
		String[] idArr = {"865456863256320","865456863256321","865456863256322","865456863256323","865456863256324","865456863256325","865456863256326","865456863256327","865456863256328","865456863256329"};
		//模拟直播间视频id
		String[] vidArr = {"1495267869123450","1495267869123451","1495267869123452","1495267869123453","1495267869123454"};
		//模拟直播用户id
		String[] uidArr = {"964226522333220","964226522333221","964226522333222","964226522333223","964226522333224"};
		//模拟用户手机平台
		String[] platformArr = {"android","ios"};
		Random random = new Random();
		while(true){
			int rint1 = random.nextInt(10);
			int rint2 = random.nextInt(5);
			int rint3 = random.nextInt(2);
			String log = "{\"id\":\""+idArr[rint1]+"\",\"vid\":\""+vidArr[rint2]+"\",\"uid\":\""+uidArr[rint2]+"\",\"gold\":"+random.nextInt(10000)+",\"platform\":\""+platformArr[rint3]+"\",\"timestamp\":"+System.currentTimeMillis()+"}";
			producer.send(new ProducerRecord<String, String>(topic, log));  
			System.out.println(log);
			count++;
			Thread.sleep(1000*5);
			if(count%10 == 0){
				//break;
				Thread.sleep(1000*60);
			}
		}
	}

}

KafkaConsumer.java

package example.Demo;

import java.util.ArrayList;
import java.util.Properties;

import org.apache.kafka.clients.consumer.Consumer;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.ConsumerRecords;

public class KafkaConsumer {
	
	public static void main(String[] args) {
		Properties props = new Properties();  
		props.put("bootstrap.servers", "localhost:9092");
		props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");  
		props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");  
		props.setProperty("group.id", "1");
		String topic = "gold";
		
		Consumer<String, String> consumer = new org.apache.kafka.clients.consumer.KafkaConsumer<String, String>(props);  
		ArrayList<String> topicList = new ArrayList<String>();
		topicList.add(topic);
		consumer.subscribe(topicList);  
		
		while(true){
		    ConsumerRecords<String, String> records = consumer.poll(1);  
		    for (ConsumerRecord<String, String> record : records) {  
		        System.out.println(record);  
		    }  
		}
	}

}

LogProcessTopology.java

package example.Demo;

import java.util.Map;

import org.apache.storm.Config;
import org.apache.storm.LocalCluster;
import org.apache.storm.StormSubmitter;
import org.apache.storm.generated.AlreadyAliveException;
import org.apache.storm.generated.AuthorizationException;
import org.apache.storm.generated.InvalidTopologyException;
import org.apache.storm.generated.StormTopology;
import org.apache.storm.kafka.BrokerHosts;
import org.apache.storm.kafka.KafkaSpout;
import org.apache.storm.kafka.SpoutConfig;
import org.apache.storm.kafka.StringScheme;
import org.apache.storm.kafka.ZkHosts;
import org.apache.storm.spout.SchemeAsMultiScheme;
import org.apache.storm.spout.SpoutOutputCollector;
import org.apache.storm.task.OutputCollector;
import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.topology.TopologyBuilder;
import org.apache.storm.topology.base.BaseRichBolt;
import org.apache.storm.topology.base.BaseRichSpout;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Tuple;
import org.apache.storm.tuple.Values;
import org.apache.storm.utils.Utils;

import com.storm0709.stormProject0709.bolt.LogProcessBolt1;
import com.storm0709.stormProject0709.bolt.LogProcessBolt2;
import com.storm0709.stormProject0709.bolt.ParseLogBolt;

public class LogProcessTopology {	
	public static void main(String[] args) {
		TopologyBuilder topologyBuilder = new TopologyBuilder();
		
		//链接kafka的spout
		BrokerHosts hosts = new ZkHosts("localhost:2181"); //ZK地址
		String topic = "gold";
		String zkRoot = "/kafkaspout0709";//会在zk中创建这个节点,再节点里面保存消费信息
		//用于保存消费者的偏离值
		String id = "consumer123456";//groupid    spout的唯一标志
		
		SpoutConfig spoutConf = new SpoutConfig(hosts, topic, zkRoot, id);
		spoutConf.scheme = new SchemeAsMultiScheme(new StringScheme());//表示spout发送数据的格式
		//第一次启动的时候在zk中并没有保存消费的位置信息,可以这样设置LatestTime,表示次用kafka topic的最新数据开始消费
		// EarliestTime() 表示从这个topic的最老的数据开始消费
		spoutConf.startOffsetTime = kafka.api.OffsetRequest.LatestTime();
		//spoutConf.startOffsetTime = kafka.api.OffsetRequest.EarliestTime();
		
		
		String SPOUT_ID = KafkaSpout.class.getSimpleName();
		String BOLT_ID_1 = ParseLogBolt.class.getSimpleName();
		//调忧参数
		topologyBuilder.setSpout(SPOUT_ID, new KafkaSpout(spoutConf));
		topologyBuilder.setBolt(BOLT_ID_1, new ParseLogBolt()).shuffleGrouping(SPOUT_ID);
		
		StormTopology createTopology = topologyBuilder.createTopology(); 
		String topology_name = LogProcessTopology.class.getSimpleName();
		Config config = new Config();
		if(args.length==0){
			//创建本地集群
			LocalCluster localCluster = new LocalCluster();
			localCluster.submitTopology(topology_name, config, createTopology);
		}else{
			try {
				StormSubmitter.submitTopology(topology_name, config, createTopology);
			} catch (AlreadyAliveException e) {
				e.printStackTrace();
			} catch (InvalidTopologyException e) {
				e.printStackTrace();
			} catch (AuthorizationException e) {
				e.printStackTrace();
			}
		}
	}

}

LogProcessBolt.java

package example.Demo;

import java.util.HashMap;
import java.util.Map;

import org.apache.storm.Config;
import org.apache.storm.Constants;
import org.apache.storm.task.OutputCollector;
import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.topology.base.BaseRichBolt;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Tuple;
import org.apache.storm.tuple.Values;

import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONObject;

public class ParseLogBolt extends BaseRichBolt {
	private OutputCollector collector;
	private Map<String, String> idCountryMap;

	@Override
	public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
		this.collector = collector;
		// 在这吧全量的用户信息加载过来
		// 在初始化的时候从redis中把送礼人id和省份信息加载过来,后期在storm的定时任务中每半个小时同步一次,把新注册用户的信息拉取过来
	}

	@Override
	public void execute(Tuple input) {

		 System.out.println("6..be here~"); String log =
		 input.getStringByField("str"); System.out.println("7..be here~");
		 System.out.println(log); this.collector.ack(input);
		 
	}

	@Override
	public void declareOutputFields(OutputFieldsDeclarer declarer) {
		declarer.declare(new Fields("province", "uid", "gold"));
	}

	@Override
	public Map<String, Object> getComponentConfiguration() {
		HashMap<String, Object> hashMap = new HashMap<String, Object>();
		hashMap.put(Config.TOPOLOGY_TICK_TUPLE_FREQ_SECS, 30 * 60);
		return hashMap;
	}

}

在eclipse中启动上述1-3java文件
可以看到kafka producer在源源不断的发送文件
并且在storm中可以接收到文件(需要的逻辑处理功能可以在bolt中继续修改)

至此,KAFKA+STORM的本地搭建及流程跑通已完成

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值