运行demo 了解如何提交Topology
storm jar ../examples/storm-starter/storm-starter-topologies-1.0.1.jar storm.starter.StatefulTopology StatefulApp
准备工作
- 安装jdk
- 安装eclipse For Java,带maven
创建工程
Eclipse 中 新建Storm1的Maven项目
修改:pom.xml,在<project>节点内加入:
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>2.3.2</version>
<configuration>
<source>1.6</source>
<target>1.6</target>
<compilerVersion>1.6</compilerVersion>
</configuration>
</plugin>
</plugins>
</build>
<repositories>
<!-- Repository where we can found the storm dependencies -->
<repository>
<id>clojars.org</id>
<url>http://clojars.org/repo</url>
</repository>
</repositories>
<dependencies>
<!-- Storm Dependency -->
<dependency>
<groupId>storm</groupId>
<artifactId>storm</artifactId>
<version>0.6.0</version>
</dependency>
</dependencies>
右击项目,Run-Maven install
如果报 project configuration is not up-to-dae with pom.xml ,则右击项目,Maven-Update Project
创建第一个Topology
本示例是运行单词计数。
项目结构:
代码:
TopologyMain.java
import spouts.WordReader;
import backtype.storm.Config;
import backtype.storm.LocalCluster;
import backtype.storm.topology.TopologyBuilder;
import backtype.storm.tuple.Fields;
import bolts.WordCounter;
import bolts.WordNormalizer;
public class TopologyMain {
public static void main(String[] args) throws InterruptedException {
//定义拓扑
TopologyBuilder builder = new TopologyBuilder();
builder.setSpout("word-reader", new WordReader());
builder.setBolt("word-normalizer", new WordNormalizer()).shuffleGrouping("word-reader");
builder.setBolt("word-counter", new WordCounter(),2).fieldsGrouping("word-normalizer", new Fields("word"));
//配置
Config conf = new Config();
conf.put("wordsFile", args[0]);
conf.setDebug(false);
//运行拓扑
conf.put(Config.TOPOLOGY_MAX_SPOUT_PENDING, 1);
LocalCluster cluster = new LocalCluster();
cluster.submitTopology("Getting-Started-Topologie", conf, builder.createTopology());
Thread.sleep(1000);
cluster.shutdown();
}
}
WordCounter.java
package bolts;
import java.util.HashMap;
import java.util.Map;
import backtype.storm.task.OutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.IRichBolt;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.tuple.Tuple;
public class WordCounter implements IRichBolt {
Integer id;
String name;
Map<String, Integer> counters;
private OutputCollector collector;
/**
* 这个spout结束时(集群关闭的时候),我们会显示单词数量
*/
@Override
public void cleanup() {
System.out.println("-- 单词数 【" + name + "-" + id + "】 --");
for (Map.Entry<String, Integer> entry : counters.entrySet()) {
System.out.println(entry.getKey() + ": " + entry.getValue());
}
}
/**
* 为每个单词计数
*/
@Override
public void execute(Tuple input) {
String str = input.getString(0);
/**
* 如果单词尚不存在于map,我们就创建一个,如果已在,我们就为它加1
*/
if (!counters.containsKey(str)) {
counters.put(str, 1);
} else {
Integer c = counters.get(str) + 1;
counters.put(str, c);
}
// 对元组作为应答
collector.ack(input);
}
/**
* 初始化
*/
@Override
public void prepare(Map stormConf, TopologyContext context,
OutputCollector collector) {
this.counters = new HashMap<String, Integer>();
this.collector = collector;
this.name = context.getThisComponentId();
this.id = context.getThisTaskId();
}
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
}
}
WordNormalizer.java
package bolts;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import backtype.storm.task.OutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.IRichBolt;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Tuple;
import backtype.storm.tuple.Values;
public class WordNormalizer implements IRichBolt {
private OutputCollector collector;
public void cleanup() {
}
/**
* *bolt*从单词文件接收到文本行,并标准化它。 文本行会全部转化成小写,并切分它,从中得到所有单词。
*/
public void execute(Tuple input) {
String sentence = input.getString(0);
String[] words = sentence.split(" ");
for (String word : words) {
word = word.trim();
if (!word.isEmpty()) {
word = word.toLowerCase();
// 发布这个单词
List a = new ArrayList();
a.add(input);
collector.emit(a, new Values(word));
}
}
// 对元组做出应答
collector.ack(input);
}
public void prepare(Map stormConf, TopologyContext context,
OutputCollector collector) {
this.collector = collector;
}
/**
* 这个*bolt*只会发布“word”域
*/
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields("word"));
}
}
WordReader.java
实现iRichSpout接口,从文件按行读取文本,并把文本行提供给第一个bolt
package spouts;
import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.util.Map;
import backtype.storm.spout.SpoutOutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.IRichSpout;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Values;
public class WordReader implements IRichSpout {
private SpoutOutputCollector collector;
private FileReader fileReader;
private boolean completed = false;
private TopologyContext context;
public boolean isDistributed() {
return false;
}
public void ack(Object msgId) {
System.out.println("OK:" + msgId);
}
public void close() {
}
public void fail(Object msgId) {
System.out.println("FAIL:" + msgId);
}
/** step:2
* 这个方法做的就是分发文件中的文本行
*/
public void nextTuple() {
/**
* 这个方法会不断的被调用,直到整个文件都读完了,我们将等待并返回。
*/
if (completed) {
try {
Thread.sleep(1000);
} catch (InterruptedException e) {
// 什么也不做
}
return;
}
String str;
// 创建reader
BufferedReader reader = new BufferedReader(fileReader);
try {
// 读所有文本行
while ((str = reader.readLine()) != null) {
/**
* 按行发布一个新值
*/
this.collector.emit(new Values(str), str);
}
} catch (Exception e) {
throw new RuntimeException("Error reading tuple", e);
} finally {
completed = true;
}
}
/** step:1
* 我们将创建一个文件并维持一个collector对象
*/
public void open(Map conf, TopologyContext context,
SpoutOutputCollector collector) {
try {
this.context = context;
this.fileReader = new FileReader(conf.get("wordsFile").toString());
} catch (FileNotFoundException e) {
throw new RuntimeException("Error reading file ["
+ conf.get("wordFile") + "]");
}
this.collector = collector;
}
/**
* 声明输入域"word"
*/
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields("line"));
}
}
第一个调用的spout方法是open,参数里接收配置对象、TopologyContext对象,包含所有拓扑数据,还有SpoutOutputCollectior对象,通过它发布交给bolts处理的数据。
在这个方法里创建的FileReader对象用来读取文件。
接下来是nextTuple,对过它向bolts发布待处理的数据。示例里是读取文件并逐行发布数据。
nextTuple会在同一个循环内被ack和fail周期性调用。没有任务时它必须释放对线程的控制,其它方法才有机会得以执行。因此nextTuple的第一行就要检查是否已经处理完成。如果完成了,会在返回前休眠一毫秒。如果任务完成了,文件中的每一行都已被读取并分发了。
words.txt
随便写些英文段落
运行程序:
mvn exec:java -Dexec.mainClass="TopologyMain" -Dexec.args="src/main/resources/words.txt"
看到运行结果:
也可能看到报如下错误:
看github上是storm这个版本的 bug 。
https://github.com/nathanmarz/storm/pull/102