WordcountSpout--数据源
SplitBolt--对字符串进行分割
WordCountBolt--统计字符串个数
PrintBolt--整合发送
WordCountTopology--拓扑集合处理
---------------------------------------------------------------------------------------------------------------------------------------------------------------------
WordcountSpout.java
package Word;
import java.util.Map;
import backtype.storm.spout.SpoutOutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.topology.base.BaseRichSpout;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Values;
public class WordCountSpout extends BaseRichSpout{
private SpoutOutputCollector collector;
//添加默认数据
private String[] lines=new String[]{"hello world","hello hadoop","hello storm","hello flume"};
private int i=0;
@Override
public void nextTuple() {
String line=lines[i];
i++;
if(i==lines.length){//循环打印
i=0;
}
collector.emit(new Values(line));
try {
Thread.sleep(3000);
System.out.println("------分割线---------");
} catch (InterruptedException e) {
e.printStackTrace();
}
}
//初始化方法
@Override
public void open(Map arg0, TopologyContext arg1, SpoutOutputCollector collector) {
this.collector=collector;
}
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
//声明tuple的key字段--tuple和key一一对应,且数量保持一致
declarer.declare(new Fields("line"));
}
}
SplitBolt.java
package Word;
import java.util.Map;
import backtype.storm.task.OutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.topology.base.BaseRichBolt;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Tuple;
import backtype.storm.tuple.Values;
public class SplitBolt extends BaseRichBolt {
private OutputCollector collector;//创建bolt组建的tuple发射器,用于向下游发射tuple
@Override
public void execute(Tuple input) {
String line=input.getStringByField("line");
String[] words=line.split(" ");//以空格来将字符串进行分割
for(String word:words){
//把数据封装到Tuple的value对象中后
//emit对数据进行发送-通过发射器将tuple发送给下游
collector.emit(new Values(word));
}
}
//初始化
@Override
public void prepare(Map arg0, TopologyContext arg1, OutputCollector collector) {
this.collector=collector;
}
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
//声明tuple的key字段--tuple和key一一对应,且数量保持一致
declarer.declare(new Fields("word"));
}
}
WordCountBolt.java
package Word;
import java.util.HashMap;
import java.util.Map;
import backtype.storm.task.OutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.topology.base.BaseRichBolt;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Tuple;
import backtype.storm.tuple.Values;
public class WordCountBolt extends BaseRichBolt {
private OutputCollector collector;
private Map<String,Integer> wordMap;//统计单词频次
@Override
public void execute(Tuple input) {
String word=input.getStringByField("word");
if(wordMap.containsKey(word)){
wordMap.put(word, wordMap.get(word)+1);//该单词的频率+1
}else{
wordMap.put(word, 1);//没有该单词的记录,说明为第一次出现该单词,设置次数为1
}
//把数据封装到Tuple的value对象中,value对象的参数是可变参数(个数不限,类型不限--object)
//emit对数据进行发送-通过发射器将tuple发送给下游
collector.emit(new Values(word,wordMap.get(word)));
}
//此方法是bolt组件的初始化方法
@Override
public void prepare(Map arg0, TopologyContext arg1, OutputCollector collector) {
this.collector=collector;
wordMap=new HashMap<>();
}
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
//声明tuple的key字段--tuple和key一一对应,且数量保持一致
declarer.declare(new Fields("word","count"));
}
}
PrintBolt.java
package Word;
import java.util.HashMap;
import java.util.Map;
import backtype.storm.task.OutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.topology.base.BaseRichBolt;
import backtype.storm.tuple.Tuple;
public class PrintBolt extends BaseRichBolt {
private OutputCollector collector;
@Override
public void execute(Tuple input) {
//元祖是一种key-v结构,通过key来去value值
String word=input.getStringByField("word");
int count=input.getIntegerByField("count");
System.out.println(word+"..."+count);
}
@Override
public void prepare(Map arg0, TopologyContext arg1, OutputCollector collector) {
//this.collector=collector;
}
@Override
public void declareOutputFields(OutputFieldsDeclarer arg0) {
// TODO Auto-generated method stub
}
}
WordCountTopology.java
package Word;
import backtype.storm.Config;
import backtype.storm.LocalCluster;
import backtype.storm.StormSubmitter;
import backtype.storm.generated.AlreadyAliveException;
import backtype.storm.generated.InvalidTopologyException;
import backtype.storm.generated.StormTopology;
import backtype.storm.topology.TopologyBuilder;
import backtype.storm.tuple.Fields;
import clojure.reflect.Field;
import numberberrouter.Number;
public class WordCountTopolopy {
public static void main(String[] args) throws AlreadyAliveException, InvalidTopologyException {
Config conf=new Config();
//conf.setNumWorkers(2);//设置进程数量--运行拓扑,默认为1
WordCountSpout spout=new WordCountSpout();
SplitBolt splitBolt=new SplitBolt();
WordCountBolt wordCountBolt=new WordCountBolt();
PrintBolt printBolt=new PrintBolt();
//创建拓扑构建者,用于绑定各组件的上下游关系
TopologyBuilder builder=new TopologyBuilder();
//绑定数据源,组件标识id 组件对象--并发线程设置是2--所以在输出打印时为一次打印俩条数据
builder.setSpout("wordcount_spout", spout,2);
//绑定Bolt并制定上游组件是数据源
//ShuffleGrouping ---随机分组
builder.setBolt("wordcount_bolt", wordCountBolt,2).fieldsGrouping("split_bolt",new Fields("word"));
builder.setBolt("split_bolt", splitBolt,2).setNumTasks(4).shuffleGrouping("wordcount_spout");
builder.setBolt("print_bolt", printBolt).shuffleGrouping("wordcount_bolt");
//生成拓扑对象--即一个job任务
StormTopology topology=builder.createTopology();
//创建本地测试对象
LocalCluster cluster=new LocalCluster();
//StormSubmitter stormSubmitter=new StormSubmitter();
//提交拓扑运行
cluster.submitTopology("wordcount_topolopy", conf, topology);
}
}