基于docker的storm集群搭建
一、创建docker容器
我所使用的镜像是ubuntu的镜像
1.创建storm的局域网
docker network create -d bridge --subnet 172.25.0.0/16 storm-net
2.创建主节点的容器
docker run -itd --name storm-master --network storm-net -h storm-master ubuntu
进入主节点的容器
docker exec -it storm-master /bin/bash
3.创建从节点容器
docker run -itd --name storm-slave1 --network storm-net -h storm-slave1 ubuntu
docker run -itd --name storm-slave2 --network storm-net -h storm-slave2 ubuntu
4.配置hosts文件
二、搭建zookeeper集群
1.配置java环境
下载地址:链接:https://pan.baidu.com/s/1OAkGjw5g2r5zkUu7h4Zqow
提取码:66jh
新建/usr/java目录
tar -zxvf jdk-8u161-linux-x64.tar.gz -C /usr/java/
配置环境变量
vim ~/.bashrc
在最后加上下列内容(一定注意环境变量后面不要加空格,否则容易出问题,jdk版本号看自己的版本,如果用网盘里的就不用更改)
export JAVA_HOME=/usr/java/jdk1.8.0_161
export CLASSPATH=.:$JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tools.jar
export PATH=$JAVA_HOME/bin:$PATH
使环境变量生效
source ~/.bashrc
查看java是否安装成功
java -version
2.下载zookeeper安装包
https://www.apache.org/dyn/closer.lua/zookeeper/zookeeper-3.6.2/apache-zookeeper-3.6.2-bin.tar.gz
3.将安装包发送到docker容器中
docker cp apache-zookeeper-3.6.2-bin.tar.gz storm-master:/usr/local
4.对zookeeper进行配置
首先进行解压
tar -zxvf apache-zookeeper-3.6.2-bin.tar.gz
进入到conf目录,红框中的是我们需要配置的文件
cp zoo_sample.cfg zoo.cfg
然后对zoo.cfg进行配置
在上述配置的data目录下创建myid文件,然后再文件中写上0,这个0对应的是配置文件中配置的id,后面两个节点依次为1和2
5.配置环境变量
export ZK_HOME=/usr/local/apache-zookeeper-3.6.2-bin
export PATH=$PATH:$ZK_HOME/bin
source /etc/profile
6.将zookeeper发送到从节点
scp -r /usr/local/apache-zookeeper-3.6.2-bin root@storm-slave1:/usr/local
scp -r /usr/local/apache-zookeeper-3.6.2-bin root@storm-slave2:/usr/local
然后对从节点中data目录下的myid改成自己的id
7.启动zookeeper集群
每个节点都启动zookeeper
zkServer.sh start
查看启动状态
zkServer.sh status
可以看出slave2被选为leader,master和slave1被选为了follower,启动成功
三、安装storm
1.下载并解压
下载1.2.3版本的
https://www.apache.org/dyn/closer.lua/storm/apache-storm-1.2.3/apache-storm-1.2.3.tar.gz
将其传到docker容器中
docker cp apache-storm-1.2.3.tar.gz storm-master:/usr/local
对其进行解压
tar -zxvf apache-storm-1.2.3.tar.gz
2.配置环境变量
vim /etc/profile
添加下面语句
export STORM_HOME=/usr/local/apache-storm-1.2.3
export PATH=${STORM_HOME}/bin:$PATH
激活配置
source /etc/profile
3.配置storm的配置文件
进入conf文件夹
修改storm.yaml
新增加
storm.local.dir: "/usr/local/apache-storm-1.2.3/localdir"
storm.zookeeper.port: 2181
storm.zookeeper.servers:
- "storm-master"
- "storm-slave1"
- "storm-slave2"
nimbus.seeds: ["storm-master"]
ui.host: 0.0.0.0
ui.port: 8080
supervisor.slots.ports:
- 6700
- 6701
- 6702
其中localdir需要创建
将storm发送到两个从节点上
scp -r /usr/local/apache-storm-1.2.3 root@storm-slave1:/usr/local
scp -r /usr/local/apache-storm-1.2.3 root@storm-slave2:/usr/local
4.配置python环境
apt-get install python
5.启动storm集群
在master上启动
storm nimbus &
storm ui &
storm logviewer &
在slave上启动
storm supervisor &
storm logviewer &
master上的进程
slave上的进程
通过web界面访问storm集群
搭建成功
四、使用storm集群运行wordcount
1.设置spout
WordReader
package edu.storm.samples.wordcount.spout;
import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.util.Map;
import org.apache.storm.spout.SpoutOutputCollector;
import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.topology.base.BaseRichSpout;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Values;
public class WordReader extends BaseRichSpout {
private SpoutOutputCollector collector;
private FileReader fileReader;
private boolean completed = false;
public void ack(Object msgId) {
System.out.println("OK:" + msgId);
}
public void close() {
}
public void fail(Object msgId) {
System.out.println("FAIL:" + msgId);
}
/**
* The only thing that the methods will do It is emit each file line
*/
public void nextTuple() {
/**
* The nextuple it is called forever, so if we have been readed the file we will
* wait and then return
*/
if (completed) {
try {
Thread.sleep(1000);
} catch (InterruptedException e) {
// Do nothing
}
return;
}
String str;
// Open the reader
BufferedReader reader = new BufferedReader(fileReader);
try {
// Read all lines
while ((str = reader.readLine()) != null) {
/**
* By each line emmit a new value with the line as a their
*/
this.collector.emit(new Values(str));
}
} catch (Exception e) {
throw new RuntimeException("Error reading tuple", e);
} finally {
completed = true;
}
}
/**
* We will create the file and get the collector object
*/
public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) {
try {
this.fileReader = new FileReader(conf.get("wordsFile").toString());
} catch (FileNotFoundException e) {
throw new RuntimeException("Error reading file [" + conf.get("wordFile") + "]");
}
this.collector = collector;
}
/**
* Declare the output field "word"
*/
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields("line"));
}
}
RandomSentenceSpout
package edu.storm.samples.wordcount.spout;
import java.util.Map;
import java.util.Random;
import java.util.UUID;
import org.apache.storm.spout.SpoutOutputCollector;
import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.topology.base.BaseRichSpout;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Values;
import org.apache.storm.utils.Utils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class RandomSentenceSpout extends BaseRichSpout {
private static final long serialVersionUID = 9191503060592527669L;
private static final Logger LOG = LoggerFactory.getLogger(RandomSentenceSpout.class);
SpoutOutputCollector collector;
Random rand;
@Override
public void open(Map<String, Object> conf, TopologyContext context, SpoutOutputCollector collector) {
this.collector = collector;
rand = new Random();
LOG.debug("Spout opened in thread {}", Thread.currentThread().getId());
}
@Override
public void nextTuple() {
Utils.sleep(1000);
String[] sentences = new String[] { "the cow jumped over the moon", "an apple keeps the doctor away",
"four score and seven years ago", "snow white and the seven dwarfs", "i am at two with nature" };
final String sentence = sentences[rand.nextInt(sentences.length)];
UUID uuid = UUID.randomUUID();
if (sentence.charAt(0) < 'o')
this.collector.emit("small", new Values(sentence), uuid);
else
this.collector.emit("large", new Values(sentence), uuid);
}
@Override
public void ack(Object id) {
LOG.info("ACK:::" + id);
}
@Override
public void fail(Object id) {
LOG.info("FAIL:::" + id);
}
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declareStream("large", new Fields("sentence"));
declarer.declareStream("small", new Fields("sentence"));
}
}
2.设置blot
ReportBlot
强调一下数据库的连接,这里我使用的是远程连接数据库,首先需要手动创建数据库storm,然后在storm数据库中使用下面sql语句创建数据表wordcount:
CREATE TABLE `wordcount` (
`word` varchar(255) CHARACTER SET utf8 COLLATE utf8_general_ci NOT NULL,
`count` int(11) NULL DEFAULT NULL
) ENGINE = InnoDB CHARACTER SET = utf8 COLLATE = utf8_general_ci ROW_FORMAT = Dynamic;
SET FOREIGN_KEY_CHECKS = 1;
这里要注意数据库的连接,需要设置IP,端口,用户和密码,其他地方不用改,要确保storm集群能访问到该数据库(注意防火墙的关闭)
package edu.storm.samples.wordcount.blots;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.SQLException;
import java.util.Map;
import org.apache.storm.task.OutputCollector;
import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.topology.base.BaseRichBolt;
import org.apache.storm.tuple.Tuple;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.mysql.cj.jdbc.Driver;
public class ReportBlot extends BaseRichBolt {
private static final long serialVersionUID = 59312899674400751L;
private static final Logger LOG = LoggerFactory.getLogger(ReportBlot.class);
private PreparedStatement statement;
private Connection connection;
private OutputCollector collector;
public void prepare(Map<String, Object> config, TopologyContext context, OutputCollector collector) {
this.collector = collector;
try {
Class.forName("com.mysql.cj.jdbc.Driver");
connection = DriverManager.getConnection("jdbc:mysql://10.0.75.1:3306/storm?serverTimezone=UTC", "root", "123456");
statement = connection
.prepareStatement("insert into wordcount(word,count) values(?,?) on duplicate key update count=?");
LOG.debug("Report prepared in thread {}", Thread.currentThread().getId());
} catch (Exception e) {
throw new RuntimeException("Error to open mysql", e);
}
}
public void execute(Tuple tuple) {
String word = tuple.getStringByField("word");
Long count = tuple.getLongByField("count");
try {
statement.setString(1, word);
statement.setLong(2, count);
statement.setLong(3, count);
statement.execute();
LOG.debug("Thread: {} save tuple: {},{}", Thread.currentThread().getId(), word, count);
} catch (Exception e) {
this.collector.fail(tuple);
throw new RuntimeException("Error to open mysql", e);
}
this.collector.ack(tuple);
}
public void declareOutputFields(OutputFieldsDeclarer declarer) {
}
public void cleanup() {
if (connection != null)
try {
connection.close();
} catch (SQLException e) {
}
}
}
WordCounter
package edu.storm.samples.wordcount.blots;
import java.util.HashMap;
import java.util.Map;
import org.apache.storm.task.OutputCollector;
import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.topology.base.BaseRichBolt;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Tuple;
import org.apache.storm.tuple.Values;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class WordCounter extends BaseRichBolt {
private static final long serialVersionUID = -4407535766970946560L;
private static final Logger LOG = LoggerFactory.getLogger(WordCounter.class);
private Map<String, Long> counters;
private OutputCollector collector;
/**
* On create
*/
@Override
public void prepare(Map<String, Object> conf, TopologyContext context, OutputCollector collector) {
this.collector = collector;
this.counters = new HashMap<String, Long>();
LOG.debug("Counter prepared in thread {}", Thread.currentThread().getId());
}
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields("word", "count"));
}
@Override
public void execute(Tuple tuple) {
String word = tuple.getString(0);
long count = 0L;
if (counters.containsKey(word))
count = counters.get(word);
count++;
counters.put(word, count);
if (count % 13 == 0)
this.collector.fail(tuple);
else {
collector.emit(tuple, new Values(word, count));
this.collector.ack(tuple);
}
}
}
WordNormalizer
package edu.storm.samples.wordcount.blots;
import java.util.Map;
import org.apache.storm.task.OutputCollector;
import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.topology.base.BaseRichBolt;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Tuple;
import org.apache.storm.tuple.Values;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class WordNormalizer extends BaseRichBolt {
private static final long serialVersionUID = 2082054596379381224L;
private static final Logger LOG = LoggerFactory.getLogger(WordNormalizer.class);
private OutputCollector collector;
@Override
public void prepare(Map<String, Object> conf, TopologyContext context, OutputCollector collector) {
this.collector = collector;
LOG.debug("Normalizer prepared in thread {}", Thread.currentThread().getId());
}
@Override
public void execute(Tuple tuple) {
String sentence = tuple.getString(0);
String[] words = sentence.split(" ");
for (String word : words) {
word = word.trim();
if (word.isEmpty())
continue;
word = word.toLowerCase();
this.collector.emit(tuple, new Values(word));
}
this.collector.ack(tuple);
}
#
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields("word"));
}
}
3.设置入口函数
StormApp
package edu.storm.samples.wordcount.main;
import edu.storm.samples.wordcount.blots.ReportBlot;
import edu.storm.samples.wordcount.blots.WordCounter;
import edu.storm.samples.wordcount.blots.WordNormalizer;
import edu.storm.samples.wordcount.spout.RandomSentenceSpout;
import org.apache.storm.Config;
import org.apache.storm.StormSubmitter;
import org.apache.storm.generated.AlreadyAliveException;
import org.apache.storm.generated.AuthorizationException;
import org.apache.storm.generated.InvalidTopologyException;
import org.apache.storm.topology.TopologyBuilder;
import org.apache.storm.tuple.Fields;
import org.apache.storm.utils.Utils;
import java.util.ArrayList;
import java.util.Map;
/**
* @Auther: 18030501
* @Date: 2018/10/24 15:08
* @Description: 实现单词计数topology
* <p>
* Storm的路由模式:
* shuffle grouping:洗牌模式,随机平均的发到下游节点上
* fields grouping:按照某一个字段来分配,拥有相同值的字段会分配到同一个节点上(即可连续跟踪某个固定特征的数据流)
* global grouping: 强制到某唯一的节点,实际上如果有多个节点去到任务号最低的节点
* all grouping: 强制到所有节点,需小心使用
* Partial Key grouping: 最新支持的,带负载均衡的Fields grouping
* Direct grouping: 手动指定要流动到的节点
*/
public class StormApp {
private static final String SENTENCE_SPOUT_ID = "sentence-spout";
private static final String SPLIT_BOLT_ID = "split-bolt";
private static final String COUNT_BOLT_ID = "count-bolt";
private static final String REPORT_BOLT_ID = "report-bolt";
private static final String TOPOLOGY_NAME = "word-count-topology";
public static void main(String[] args) throws InvalidTopologyException, AuthorizationException, AlreadyAliveException {
TopologyBuilder builder = new TopologyBuilder();
builder.setSpout("reader", new RandomSentenceSpout());
builder.setBolt("large", new WordNormalizer(), 2).shuffleGrouping("reader", "large");
builder.setBolt("small", new WordNormalizer(), 2).shuffleGrouping("reader", "small");
builder.setBolt("counter", new WordCounter(), 3).fieldsGrouping("large", new Fields("word"))
.fieldsGrouping("small", new Fields("word"));
builder.setBolt("report", new ReportBlot()).shuffleGrouping("counter");
Config conf = new Config();
conf.setDebug(false);
// conf.setNumWorkers(3);
Map<String, Object> stormConf = Utils.readStormConfig();
// 提交到集群
StormSubmitter.submitTopology(TOPOLOGY_NAME, conf, builder.createTopology());
}
}
4.maven依赖
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>Storm</groupId>
<artifactId>edu</artifactId>
<version>0.0.1-SNAPSHOT</version>
<build>
<sourceDirectory>src</sourceDirectory>
<plugins>
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.8.0</version>
<configuration>
<release>11</release>
</configuration>
</plugin>
</plugins>
</build>
<dependencies>
<!-- https://mvnrepository.com/artifact/org.apache.storm/storm-core -->
<dependency>
<groupId>org.apache.storm</groupId>
<artifactId>storm-core</artifactId>
<version>2.1.0</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.kafka</groupId>
<artifactId>kafka_2.12</artifactId>
<version>2.3.1</version>
</dependency>
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>8.0.17</version>
</dependency>
</dependencies>
</project>
5.maven打包并提交到集群
storm jar edu.jar edu.storm.samples.wordcount.main.StormApp
可以看出正常运行
通过ui界面查看运行结果
查看拓扑结构
可以看到数据库中的wordcount表已经有了数据,并不断地进行更新,代码运行成功