最近常和Storm打交道,今天第一次写个总结。
为什么Storm上跑的应用叫做Topology ( 拓扑 ) ? 这得从Stream Grouping概念说起。
设计一个Topology, 核心就是设计Storm上的数据流如何被Topology的各个Bolt消费。Stream Grouping描述了这种Spout和每一个Bolt之间的上下游关系,规定每个Bolt结点消费哪些数据。我猜应该是因为这个原因才叫“拓扑”。
Stream Grouping的类型:
Grouping代码示例( Topology主函数)
import spouts.WordReader;
import backtype.storm.Config;
import backtype.storm.LocalCluster;
import backtype.storm.topology.TopologyBuilder;
import backtype.storm.tuple.Fields;
import bolts.WordCounter;
import bolts.WordNormalizer;
public class TopologyMain {
public static void main(String[] args) throws InterruptedException {
//定义 Topology
TopologyBuilder builder = new TopologyBuilder());
builder.setSpout("word-reader", new WordReader());
builder.setBolt("word-normalizer", new WordNormalizer()).shuffleGrouping("word-reader");
builder.setBolt("word-counter", new WordCounter(),2).fieldsGrouping("word-normalizer", new Fields("word"));
//配置 Topology
Config conf = new Config();
conf.put("wordsFile", args[0]);
conf.setDebug(false);
//本地模式运行拓扑
conf.put(Config.TOPOLOGY_MAX_SPOUT_PENDING, 1);
LocalCluster cluster = new LocalCluster();
cluster.submitTopology("Getting-Started-Topologie", conf, builder.createTopology();
Thread.sleep(1000);
cluster.shutdown();
}
Spout 常用接口:
Spout代码示例:
src/main/java/spouts/WordReader.java:
package spouts;
import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.util.Map;
import backtype.storm.spout.SpoutOutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.IRichSpout;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Values;
public class WordReader implements IRichSpout {
private SpoutOutputCollector collector;
private FileReader fileReader;
private boolean completed = false;
private TopologyContext context;
public boolean isDistributed() {return false;}
<span style="color:#3333FF;">public void ack(Object msgId)</span> {
System.out.println("OK:"+msgId);
}
<span style="color:#3333FF;">public void close() </span> {}
<span style="color:#3333FF;">public void fail(Object msgId)</span> {
System.out.println("FAIL:"+msgId);
}
<span style="color:#3333FF;">public void nextTuple()</span> {
if(completed){
try {
Thread.sleep(1000);
} catch (InterruptedException e) {
//什么也不做
}
return;
}
String str;
//创建reader
BufferedReader reader = new BufferedReader(fileReader);
try{
//读所有文本行
while((str = reader.readLine()) != null){
/**
* 按行发布一个新值
*/
this.collector.emit(new Values(str),str);
}
}catch(Exception e){
throw new RuntimeException("Error reading tuple",e);
}finally{
completed = true;
}
}
<span style="color:#3333FF;">public void open(Map conf, TopologyContext context, SpoutOutputCollector collector)</span> {
try {
this.context = context;
this.fileReader = new FileReader(conf.get("wordsFile").toString());
} catch (FileNotFoundException e) {
throw new RuntimeException("Error reading file ["+conf.get("wordFile")+"]");
}
this.collector = collector;
}
<span style="color:#3333FF;">public void declareOutputFields(OutputFieldsDeclarer declarer)</span> {
declarer.declare(new Fields("line"));
}
}
src/main/java/spouts/WordReader.java:
package spouts;
import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.util.Map;
import backtype.storm.spout.SpoutOutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.IRichSpout;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Values;
public class WordReader implements IRichSpout {
private SpoutOutputCollector collector;
private FileReader fileReader;
private boolean completed = false;
private TopologyContext context;
public boolean isDistributed() {return false;}
<span style="color:#3333FF;">public void ack(Object msgId)</span> {
System.out.println("OK:"+msgId);
}
<span style="color:#3333FF;">public void close() </span> {}
<span style="color:#3333FF;">public void fail(Object msgId)</span> {
System.out.println("FAIL:"+msgId);
}
<span style="color:#3333FF;">public void nextTuple()</span> {
if(completed){
try {
Thread.sleep(1000);
} catch (InterruptedException e) {
//什么也不做
}
return;
}
String str;
//创建reader
BufferedReader reader = new BufferedReader(fileReader);
try{
//读所有文本行
while((str = reader.readLine()) != null){
/**
* 按行发布一个新值
*/
this.collector.emit(new Values(str),str);
}
}catch(Exception e){
throw new RuntimeException("Error reading tuple",e);
}finally{
completed = true;
}
}
<span style="color:#3333FF;">public void open(Map conf, TopologyContext context, SpoutOutputCollector collector)</span> {
try {
this.context = context;
this.fileReader = new FileReader(conf.get("wordsFile").toString());
} catch (FileNotFoundException e) {
throw new RuntimeException("Error reading file ["+conf.get("wordFile")+"]");
}
this.collector = collector;
}
<span style="color:#3333FF;">public void declareOutputFields(OutputFieldsDeclarer declarer)</span> {
declarer.declare(new Fields("line"));
}
}
Bolt常用接口:
Bolt代码示例:
src/main/java/bolts/WordNormalizer.java:
package bolts;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import backtype.storm.task.OutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.IRichBolt;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Tuple;
import backtype.storm.tuple.Values;
public class WordNormalizer implements IRichBolt{
private OutputCollector collector;
public void cleanup(){}
public void execute(Tuple input){
String sentence = input.getString(0);
String[] words = sentence.split(" ");
for(String word : words){
word = word.trim();
if(!word.isEmpty()){
word=word.toLowerCase();
List a = new ArrayList();
a.add(input);
collector.emit(a,new Values(word));
}
}
collector.ack(input);
}
public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
this.collector=collector;
}
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields("word"));
}
}