import org.apache.storm.Config;
import org.apache.storm.LocalCluster;
import org.apache.storm.StormSubmitter;
import org.apache.storm.generated.StormTopology;
import org.apache.storm.spout.SpoutOutputCollector;
import org.apache.storm.task.OutputCollector;
import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.topology.TopologyBuilder;
import org.apache.storm.topology.base.BaseRichBolt;
import org.apache.storm.topology.base.BaseRichSpout;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Tuple;
import org.apache.storm.tuple.Values;
import org.apache.storm.utils.Utils;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.PrintStream;
import java.util.HashMap;
import java.util.Map;
public class WordCountTopology {
public static void main(String[] args) throws Exception {
//throws Exception捕获异常声明
//****请根据提示补全WordCountTopology主程序的创建过程****//
/*********begin*********/
//主程序:创建一个名为builder的Topology的任务
TopologyBuilder builder = new TopologyBuilder();
//指定Topology任务的spout组件(不设置并行度)
builder.setSpout("wordcount_spout", new WordCountSpout());
//指定Topology任务的第一个Bolt组件:分词(不设置并行度)
builder.setBolt("wordcount_split_bolt", new WordCountSplitBolt()).shuffleGrouping("wordcount_spout");
//指定Topology任务的第二个Bolt组件:计数(不设置并行度)
builder.setBolt("wordcount_count_bolt", new WordCountTotalBolt()).fieldsGrouping("wordcount_split_bolt",new Fields("word"));
/*********end**********/
//创建Topology任务
StormTopology wc = builder.createTopology();
//配置参数
Config conf = new Config();
//执行任务
//方式1:本地模式
LocalCluster localCluster = new LocalCluster();
localCluster.submitTopology("MyStormWordCount", conf, wc);
//方式2:集群模式
//使用 StormSubmitter 将 topology 提交到集群. StormSubmitter 以 topology 的名称, topology 的配置和 topology 本身作为输入
// StormSubmitter.submitTopology(args[0], conf, wc);
}
//采集数据spout组件
public static class WordCountSpout extends BaseRichSpout {
// 模拟产生一些数据
private String[] data = {
"Apache Storm is a free and open source distributed realtime computation system.Apache Storm makes it easy to reliably process unbounded streams of data,doing for realtime processing what Hadoop did for batch processing."
};
// 定义spout的输出流
private SpoutOutputCollector collector;
@Override
public void nextTuple() {
// 由storm框架调用,每次调用进行数据采集
// 打印打印采集到的数据
System.out.println("采集的数据是:" + data[0]);
/*********begin*********/
// 将采集到的数据发送给下一个组件进行处理
this.collector.emit(new Values(data[0]));
/*********end**********/
// 设置为隔很长时间才执行一次下采集操作
Utils.sleep(300000000);
}
@Override
public void open(Map arg0, TopologyContext arg1, SpoutOutputCollector collector) {
// 初始化spout组件时调用
this.collector = collector;
}
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
// 声明输出的Tuple的格式
declarer.declare(new Fields("sentence"));
}
}
public static class WordCountSplitBolt extends BaseRichBolt {
private OutputCollector collector;
@Override
public void execute(Tuple tuple) {
// 处理上一个组件发来的数据
String str = tuple.getStringByField("sentence");
//str.trim()用于去除字符串两端的空白字符
str = str.trim();
//replace(char oldChar,char newChar);
// oldChar:要替换的子字符串或者字符。
// newChar:新的字符串或字符,用于替换原有字符串的内容。
str = str.replace(",", " ");
str = str.replace(".", " ");
str = str.trim();
// 分词操作
String[] words = str.split(" ");
//words:[Apache, Storm, is, a, free, and, open, source, distributed, realtime, computation, system, Apache, Storm, makes, it, easy, to, reliably, process, unbounded, streams, of, data, doing, for, realtime, processing, what, Hadoop, did, for, batch, processing]
/*
word:
[Apache, 1]
[Storm, 1]
[is, 1]
[a, 1]
[free, 1]
[and, 1]
[open, 1]
[source, 1]
[distributed, 1]
[realtime, 1]
[computation, 1]
[system, 1]
[Apache, 1]
[Storm, 1]
[makes, 1]
[it, 1]
[easy, 1]
[to, 1]
[reliably, 1]
[process, 1]
[unbounded, 1]
[streams, 1]
[of, 1]
[data, 1]
[doing, 1]
[for, 1]
[realtime, 1]
[processing, 1]
[what, 1]
[Hadoop, 1]
[did, 1]
[for, 1]
[batch, 1]
[processing, 1]
*/
/*********begin*********/
// 将处理好的(word,1)形式的数据发送给下一个组件
for (String w : words) {
this.collector.emit(new Values(w, 1));
}
/*********end**********/
}
@Override
public void prepare(Map arg0, TopologyContext arg1, OutputCollector collector) {
// 初始化时调用
// OutputCollector代表的就是这个bolt组件的输出流
this.collector = collector;
}
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
// 声明这个Bolt组件输出Tuple的格式
declarer.declare(new Fields("word", "count"));
}
}
//统计单词个数
public static class WordCountTotalBolt extends BaseRichBolt {
private OutputCollector collector;
// 定义一个集合来保存单词计数结果
private Map<String, Integer> result = new HashMap<>();
@Override
public void execute(Tuple tuple) {
// 取出Tuple中的数据
String word = tuple.getStringByField("word");
int count = tuple.getIntegerByField("count");
/*********begin*********/
// 统计单词出现的个数,存入result集合中
if (result.containsKey(word)) {
int total = result.get(word);
result.put(word, total + count);
} else {
result.put(word, count);
}
/*********end**********/
// 打印
System.out.println("result" + result);
String a="result" + result;
try {
//新建一个文件a.txt
File file = new File("a.txt");
PrintStream ps = new PrintStream(new FileOutputStream(file));
//往文件里写入字符串a
ps.append(a);
} catch (FileNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
// 输出数据
this.collector.emit(new Values(word, result.get(word)));
}
@Override
public void prepare(Map arg0, TopologyContext arg1, OutputCollector collector) {
// 初始化时调用
this.collector = collector;
}
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
// 声明输出数据的格式
declarer.declare(new Fields("word", "count"));
}
}
}
Storm 的 WordCount 案例
最新推荐文章于 2024-08-23 15:54:50 发布