kafka、storm、zookeeper集群环境java代码编写部署

重要备注:1、整个java代码编写使用到的工具是 IntelliJ IDEA

                2、前提是kafka、zookeeper、storm集群环境已经能够使用

                3、特别注意本地pom文件中版本号一定要与集群环境的版本号对应

代码编写:

首先是pom文件:

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>

    <groupId>com.daqsoft</groupId>
    <artifactId>kafka-demo</artifactId>
    <version>1.0-SNAPSHOT</version>
    <packaging>jar</packaging>

    <name>kafka-demo</name>
    <url>http://maven.apache.org</url>

    <properties>
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
    </properties>

    <dependencies>
        <dependency>
            <groupId>org.apache.storm</groupId>
            <artifactId>storm-core</artifactId>
            <version>1.1.0</version>
            <scope>provided</scope>
        </dependency>
        <dependency>
            <groupId>com.alibaba</groupId>
            <artifactId>fastjson</artifactId>
            <version>1.2.44</version>
        </dependency>
        <dependency>
            <groupId>org.apache.storm</groupId>
            <artifactId>storm-kafka</artifactId>
            <version>1.1.0</version>
        </dependency>
        <dependency>
            <groupId>org.apache.kafka</groupId>
            <artifactId>kafka-clients</artifactId>
            <version>0.10.1.0</version>
        </dependency>
        <dependency>
            <groupId>org.apache.kafka</groupId>
            <artifactId>kafka_2.11</artifactId>
            <version>0.10.1.0</version>
            <exclusions>
                <exclusion>
                    <groupId>org.apache.zookeeper</groupId>
                    <artifactId>zookeeper</artifactId>
                </exclusion>
                <exclusion>
                    <groupId>com.101tec</groupId>
                    <artifactId>zkclient</artifactId>
                </exclusion>
                <exclusion>
                    <groupId>org.slf4j</groupId>
                    <artifactId>log4j-over-slf4j</artifactId>
                </exclusion>
                <exclusion>
                    <groupId>org.slf4j</groupId>
                    <artifactId>slf4j-log4j12</artifactId>
                </exclusion>
            </exclusions>
        </dependency>
        <dependency>
            <groupId>junit</groupId>
            <artifactId>junit</artifactId>
            <version>3.8.1</version>
            <scope>test</scope>
        </dependency>
    </dependencies>
    <!-- 打包配置 -->
    <build>
        <plugins>
            <plugin>
                <artifactId>maven-assembly-plugin</artifactId>
                <configuration>
                    <descriptorRefs>
                        <descriptorRef>jar-with-dependencies</descriptorRef>
                    </descriptorRefs>
                </configuration>
            </plugin>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-compiler-plugin</artifactId>
                <configuration>
                    <source>1.6</source>
                    <target>1.6</target>
                </configuration>
            </plugin>
        </plugins>
    </build>
</project>

上面需要注意两点:

a. artifactId值为storm-core的配置:<scope>provided</scope>

这行代码在打包时,放开,项目在本地工具中运行时,需要注释掉。

b.注意 配置中的 exclusions排除依赖的配置

创建4个类:

WordCountBoltCount

import org.apache.storm.task.OutputCollector;
import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.topology.base.BaseRichBolt;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Tuple;

import java.util.HashMap;
import java.util.Map;

/*
 * 第二个bolt组件,用于接收前一个 bolt组件分词后的数据,然后进行单词的计数
 */
public class WordCountBoltCount extends BaseRichBolt {
	//定义一个Map集合保存最后结果
	private Map<String,Integer> result = new HashMap<String,Integer>();

	private OutputCollector collector;
	@Override
	public void execute(Tuple tuple) {
		// 从tuple中取出发送来的数据
		String word = tuple.getStringByField("word");
		int count = tuple.getIntegerByField("count");

		//判断一个result是否存在该单词
		if(result.containsKey(word)){
			//包含该单词
			int total = result.get(word);
			result.put(word, total+count);
		}else{
			//不存在该单词
			result.put(word, count);
		}

		//输出结果
		System.out.println("最后结果输出============"+ result);
		//将处理后的结果发送给下一个Bolt组件,然后插入数据库
		//this.collector.emit(new Values(word,result.get(word)));
	}

	@Override
	public void prepare(Map arg0, TopologyContext arg1, OutputCollector collector) {
		this.collector = collector;
	}

	@Override
	public void declareOutputFields(OutputFieldsDeclarer declarer) {
		// 申明输出数据的schema:  Beijing    2
		declarer.declare(new Fields("word","total"));
	}
}

WordCountSplitBolt

import java.util.Map;

import org.apache.storm.task.OutputCollector;
import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.topology.base.BaseRichBolt;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Tuple;
import org.apache.storm.tuple.Values;

/*
 * 第一个Bolt组件:用于接收spout组件发送的数据,然后进行单词的拆分
 */
public class WordCountSplitBolt  extends BaseRichBolt{
	private OutputCollector collector;

	@Override
	public void execute(Tuple tuple) {
		// 如何在bolt组件中处理数据(从spout发送过来)
		// I love Beijing
		String sentence = tuple.getStringByField("str");
		System.out.println("获取到的数据为:================================" + sentence);
		//分词
		String[] words = sentence.split(" ");

		//输出数据
		for(String word:words){
			this.collector.emit(new Values(word,1));
		}
	}

	@Override
	public void prepare(Map arg0, TopologyContext arg1, OutputCollector collector) {
		// 对该bolt组件进行初始化
		this.collector = collector;
	}

	@Override
	public void declareOutputFields(OutputFieldsDeclarer declarer) {
		// 申明本组件输出数据tuple的schema是什么
		//                            I       1
		//                            love    1
		//                            Beijing 1
		declarer.declare(new Fields("word","count"));
	}

}

WordCountSpout

import java.util.Map;
import java.util.Random;

import org.apache.storm.spout.SpoutOutputCollector;
import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.topology.base.BaseRichSpout;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Values;
import org.apache.storm.utils.Utils;

/*
 *  Spout组件是整个Topology的数据源,负责从外部接收数据
 */
public class WordCountSpout extends BaseRichSpout {

	//定义输出的变量用于向下一个组件输出数据
	private SpoutOutputCollector collector;

	//模拟一些数据
	private static String[] data = {"I love Beijing","I love China","Beijing is the capital of China"};

	@Override
	public void nextTuple() {
		//让当前线程睡一段时间
		Utils.sleep(3000);

		//由Storm框架进行调用,用于接收外部发送的数据

		//产生一个随机数
		int random = (new Random()).nextInt(3);

		//模拟产生数据
		String sentence = data[random];

		//将数据发送给下一个组件bolt进行单词的拆分
		this.collector.emit(new Values(sentence));

		//输出发送的数据
		System.out.println("Spout采集的数据是:"+ sentence);
	}

	@Override
	public void open(Map arg0, TopologyContext arg1, SpoutOutputCollector collector) {
		//对spout组件进行初始化
		this.collector = collector;
	}

	@Override
	public void declareOutputFields(OutputFieldsDeclarer declarer) {
		//申请输出tuple的schema
		declarer.declare(new Fields("sentence"));
	}
}

WordCountTopology

import org.apache.storm.Config;
import org.apache.storm.LocalCluster;
import org.apache.storm.StormSubmitter;
import org.apache.storm.generated.StormTopology;
import org.apache.storm.kafka.BrokerHosts;
import org.apache.storm.kafka.KafkaSpout;
import org.apache.storm.kafka.SpoutConfig;
import org.apache.storm.kafka.StringScheme;
import org.apache.storm.kafka.ZkHosts;
import org.apache.storm.spout.SchemeAsMultiScheme;
import org.apache.storm.topology.TopologyBuilder;
import org.apache.storm.tuple.Fields;

import java.util.Arrays;

/*
 * 单词计数的topology的入口,主程序
 */
public class WordCountTopology {

	public static void main(String[] args) throws Exception {
		TopologyBuilder builder = new TopologyBuilder();

		//配置zk 集群.
		BrokerHosts zkHosts = new ZkHosts("192.168.2.100:2181");

		//初始化配置信息
		SpoutConfig spoutConfig = new SpoutConfig(zkHosts,
				"kafkaTest",
				"/kafkaTest",
				"kafka_test_storm1");

		//zkServers 列表
		spoutConfig.zkServers = Arrays.asList("192.168.2.100".toString().split(","));

		//zk 端口
		spoutConfig.zkPort = 2181;

		//解析数据成 string 类型数据 对接kafkaspout 可以通过 getStringByField("str") 获取 也可以通过 (String)getValue(0); 获取
		spoutConfig.scheme = new SchemeAsMultiScheme(new StringScheme());

		spoutConfig.startOffsetTime = kafka.api.OffsetRequest.EarliestTime();
		KafkaSpout kafkaSpout = new KafkaSpout(spoutConfig);

		//设置任务的spout组件
//		builder.setSpout("wordcount_spout", new WordCountSpout());
		//数据源
		builder.setSpout("wordcount_spout", kafkaSpout, 3);

		//设置任务的第一个bolt组件
		builder.setBolt("wordcount_splitbolt", new WordCountSplitBolt())
				.shuffleGrouping("wordcount_spout");

		//设置任务的第二个bolt组件
		builder.setBolt("wordcount_countbolt", new WordCountBoltCount())
				.fieldsGrouping("wordcount_splitbolt", new Fields("word"));

		//创建Topology任务
		StormTopology wc = builder.createTopology();

		//配置参数信息
		Config conf = new Config();
		if (args != null && args.length > 0) {
			conf.setNumWorkers(3);
			try {
				StormSubmitter.submitTopology(args[0], conf, wc);
			} catch (Exception e) {
				e.printStackTrace();
			}
		} else {
			//设置成 toplic 分区数
			conf.setNumWorkers(3);
			LocalCluster cluster = new LocalCluster();
			cluster.submitTopology("otaTopology", conf, wc);
		}
	}
}

接下来就是最坑的地方:打包

以下以截图方式说明:

对项目的Project Structure进行配置:



确定后,如下图


确认后,进行下一步打包操作:


点击后进入下图:


打包完成后,打完的包需要到项目配置的打包输出路径下查看:



架包通过ftp上传到服务器的目录下


然后进行服务器storm命令执行topology任务:

在xshell上,进入到架包所在目录

执行命令:

storm-1.1.0/bin/storm jar kafkademo.jar com.daqsoft.kafka.WordCountTopology tests

对上面命令说明:

storm-1.1.0/bin/storm jar   storm执行jar命令,注意  不是  -jar

kafkademo.jar  架包名称

com.daqsoft.kafka.WordCountTopology  指定的是 需要执行main的类的路径

tests  指的是提交的  topology任务的名称

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值