




(3) 输入自定义项目名,点击完成:

(4) 你会看到这样的结构:





package storm;

import org.apache.storm.Config;
import org.apache.storm.LocalCluster;
import org.apache.storm.StormSubmitter;
import org.apache.storm.generated.StormTopology;
import org.apache.storm.spout.SpoutOutputCollector;
import org.apache.storm.task.OutputCollector;
import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.topology.TopologyBuilder;
import org.apache.storm.topology.base.BaseRichBolt;
import org.apache.storm.topology.base.BaseRichSpout;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Tuple;
import org.apache.storm.tuple.Values;
import org.apache.storm.utils.Utils;

import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.PrintStream;
import java.util.HashMap;
import java.util.Map;

public class WordCountTopology {
    public static void main(String[] args) throws Exception {
        //throws Exception捕获异常声明
        TopologyBuilder builder = new TopologyBuilder();
        builder.setSpout("wordcount_spout", new WordCountSpout());
        builder.setBolt("wordcount_split_bolt", new WordCountSplitBolt()).shuffleGrouping("wordcount_spout");
        builder.setBolt("wordcount_count_bolt", new WordCountTotalBolt()).fieldsGrouping("wordcount_split_bolt",new Fields("word"));
        StormTopology wc = builder.createTopology();
        Config conf = new Config();
        LocalCluster localCluster = new LocalCluster();
        localCluster.submitTopology("MyStormWordCount", conf, wc);
        //使用 StormSubmitter 将 topology 提交到集群. StormSubmitter 以 topology 的名称, topology 的配置和 topology 本身作为输入
        // StormSubmitter.submitTopology(args[0], conf, wc);
    public static class WordCountSpout extends BaseRichSpout {
        // 模拟产生一些数据
        private String[] data = {
                "Apache Storm is a free and open source distributed realtime computation system.Apache Storm makes it easy to reliably process unbounded streams of data,doing for realtime processing what Hadoop did for batch processing."
        // 定义spout的输出流
        private SpoutOutputCollector collector;

        public void nextTuple() {
        // 由storm框架调用,每次调用进行数据采集
        // 打印打印采集到的数据
            System.out.println("采集的数据是:" + data[0]);
            // 将采集到的数据发送给下一个组件进行处理
            this.collector.emit(new Values(data[0]));
            // 设置为隔很长时间才执行一次下采集操作

        public void open(Map arg0, TopologyContext arg1, SpoutOutputCollector collector) {
            // 初始化spout组件时调用
            this.collector = collector;

        public void declareOutputFields(OutputFieldsDeclarer declarer) {
            // 声明输出的Tuple的格式
            declarer.declare(new Fields("sentence"));
    public static class WordCountSplitBolt extends BaseRichBolt {
        private OutputCollector collector;
        public void execute(Tuple tuple) {
            // 处理上一个组件发来的数据
            String str = tuple.getStringByField("sentence");
            str = str.trim();
            //replace(char oldChar,char newChar);
            //   oldChar:要替换的子字符串或者字符。
            //   newChar:新的字符串或字符,用于替换原有字符串的内容。
            str = str.replace(",", " ");
            str = str.replace(".", " ");
            str = str.trim();
            // 分词操作
            String[] words = str.split(" ");
            // 将处理好的(word,1)形式的数据发送给下一个组件

            for (String w : words) {
                this.collector.emit(new Values(w, 1));
        public void prepare(Map arg0, TopologyContext arg1, OutputCollector collector) {
        // 初始化时调用
        // OutputCollector代表的就是这个bolt组件的输出流
            this.collector = collector;
        public void declareOutputFields(OutputFieldsDeclarer declarer) {
            // 声明这个Bolt组件输出Tuple的格式
            declarer.declare(new Fields("word", "count"));
    public static class WordCountTotalBolt extends BaseRichBolt {
        private OutputCollector collector;
        private Map<String, Integer> result = new HashMap<>();

        public void execute(Tuple tuple) {
            String word = tuple.getStringByField("word");
            int count = tuple.getIntegerByField("count");

            if (result.containsKey(word)) {
                int total = result.get(word);
                result.put(word, total + count);
            } else {
                result.put(word, count);

            System.out.println("result" + result);

            try (PrintStream ps = new PrintStream(new FileOutputStream("a.txt", true))) {
                ps.println("word: " + word + ", count: " + result.get(word));
            } catch (FileNotFoundException e) {

            this.collector.emit(new Values(word, result.get(word)));

        public void prepare(Map arg0, TopologyContext arg1, OutputCollector collector) {
            this.collector = collector;

        public void declareOutputFields(OutputFieldsDeclarer declarer) {
            declarer.declare(new Fields("word", "count"));



(9)选择 java构建路径,点击libraries(库),点击添加外部JAR:



(12)到现在,就已经全部配置完了,下面开始运行。点击代码,右键run as,选择java应用程序:




package storm;

import org.apache.storm.Config;
import org.apache.storm.LocalCluster;
import org.apache.storm.generated.StormTopology;
import org.apache.storm.thrift.TException;
import org.apache.storm.trident.TridentTopology;
import org.apache.storm.trident.operation.BaseFilter;
import org.apache.storm.trident.operation.BaseFunction;
import org.apache.storm.trident.operation.TridentCollector;
import org.apache.storm.trident.testing.FixedBatchSpout;
import org.apache.storm.trident.tuple.TridentTuple;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Values;
/*  部分输出
emit word is :the
Filter word is:the  and return type is:true
emit word is :cow
emit word is :jumped
emit word is :over
emit word is :the
Filter word is:the  and return type is:true
emit word is :moon
* */
public class FunctionFilter {
    public static void main(String[] args) throws TException{
        TridentTopology topology = new TridentTopology();
        FixedBatchSpout spout = new FixedBatchSpout(new Fields("sentence"), 1,
                new Values("the cow jumped over the moon"),
                new Values("the man went to the store and bought some candy"),
                new Values("four score and seven years ago"),
                new Values("how many apples can you eat"));
        //newStream 方法从输入源中读取数据, 并在 topology 中创建一个新的数据流 batch-spout
                //使用.each()方法,sentence tuple经过split()方法后输出word tuple
                .each(new Fields("sentence"), new Split(), new Fields("word"))
                //使用.each()方法,new Fields()保留setence tuple和word tuple ,经过WordFilter() 过滤 单词 the
                .each(new Fields("sentence","word"), new WordFilter("the"));
        StormTopology stormTopology = topology.build();
        LocalCluster cluster = null;
		try {
			cluster = new LocalCluster();
		} catch (Exception e) {
			// TODO 自动生成的 catch 块
        Config conf = new Config();
        cluster.submitTopology("soc", conf,stormTopology);
    public static class WordFilter extends BaseFilter {
        String actor;
        public WordFilter(String actor) {
            this.actor = actor;
        public boolean isKeep(TridentTuple tuple) {
            //如果元组的值和 actor 相等(这里的actor是“the”)
                //输出 Filter word is:the  and return type is:true
                System.out.println("Filter word is:"+tuple.getString(1) + "  and return type is:"+tuple.getString(1).equals(actor));
            return tuple.getString(1).equals(actor);
    // Function函数
    public static class Split extends BaseFunction {
        public void execute(TridentTuple tuple, TridentCollector collector) {
            String sentence = tuple.getString(0);
            //每一个 sentence tuple 可能会被转换成多个 word tuple,
            //比如说 "the cow jumped over the moon" 这个句子会被转换成 6 个 "word" tuples
            for(String word: sentence.split(" ")) {
                System.out.println("emit word is :"+word);
                collector.emit(new Values(word));
package storm;

import java.util.ArrayList;
import java.util.List;

import org.apache.storm.Config;
import org.apache.storm.LocalCluster;
import org.apache.storm.generated.StormTopology;
import org.apache.storm.trident.TridentTopology;
import org.apache.storm.trident.operation.BaseFunction;
import org.apache.storm.trident.operation.TridentCollector;
import org.apache.storm.trident.testing.FixedBatchSpout;
import org.apache.storm.trident.tuple.TridentTuple;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Values;
import org.apache.storm.trident.state.BaseQueryFunction;
import org.apache.storm.trident.state.State;

import java.util.Map;

import org.apache.storm.task.IMetricsContext;
import org.apache.storm.trident.state.StateFactory;
public class StateTopology {
    public static void main(String[] agrs) throws Exception{
        FixedBatchSpout spout = new FixedBatchSpout(
                new Fields("sentence"), 2,
                new Values("the cow"),
                new Values("the man"),
                new Values("four score"),
                new Values("many apples"));
        TridentTopology topology = new TridentTopology();
        //newStream 方法从输入源中读取数据, 并在 topology 中创建一个新的数据流 spout
                //使用.each()方法,sentence tuple经过split()方法后输出word tuple
                .each(new Fields("sentence"), new Split(), new Fields("word"))
                //使用 .newStaticState()方法创建了一个外部数据库,
                //.stateQuery()将topology.newStaticState(new TestStateFactory())映射到new Fields("word") word字段
                //将new TestQueryLocation()映射到 new Fields("test") test字段 上
                .stateQuery(topology.newStaticState(new TestStateFactory()),new Fields("word"), new TestQueryLocation(), new Fields("test"));
        StormTopology stormTopology = topology.build();
        LocalCluster cluster = new LocalCluster();
        Config conf = new Config();
        cluster.submitTopology("test", conf,stormTopology);
    public static class Split extends BaseFunction {
        public void execute(TridentTuple tuple, TridentCollector collector) {
            String sentence = tuple.getString(0);
            for(String word: sentence.split(" ")) {
                collector.emit(new Values(word));
    public static class TestState implements State{
        public void beginCommit(Long arg0) {
            // TODO Auto-generated method stub
        public void commit(Long arg0) {
            // TODO Auto-generated method stub
        public String getDBOption(int i){
            return "success"+i;
    public static class TestStateFactory implements StateFactory{
        public State makeState(Map arg0, IMetricsContext arg1, int arg2, int arg3) {
            // TODO Auto-generated method stub
            return new TestState();
    public static class TestQueryLocation extends BaseQueryFunction<TestState, String>{
        public List<String> batchRetrieve(TestState state, List<TridentTuple> arg1) {
            List<String> list = new ArrayList<String>();
            for(int i = 0 ; i< arg1.size() ; i++){
            return list;
        public void execute(TridentTuple arg0, String arg1, TridentCollector arg2) {
package storm;

import org.apache.storm.Config;
import org.apache.storm.LocalCluster;
import org.apache.storm.generated.StormTopology;
import org.apache.storm.trident.TridentTopology;
import org.apache.storm.trident.operation.BaseAggregator;
import org.apache.storm.trident.operation.TridentCollector;
import org.apache.storm.trident.testing.FixedBatchSpout;
import org.apache.storm.trident.testing.Split;
import org.apache.storm.trident.tuple.TridentTuple;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Values;
import org.apache.storm.utils.Utils;

import java.util.HashMap;
import java.util.Map;

public class TridentAggreTopology {
    public  static class WordAggregat extends BaseAggregator<Map<String, Integer>> {
        public static Map<String, Integer> map =  new HashMap<String, Integer>();
        public Map<String, Integer> init(Object batchId, TridentCollector collector) {
            return new HashMap<String, Integer>();
        public void aggregate(Map<String, Integer> val, TridentTuple tuple,
                              TridentCollector collector) {
            String location = tuple.getString(0);
            Integer i = map.get(location);
            if(null == i){
                i = 1;
                i = i+1;
            map.put(location, i);
        public void complete(Map<String, Integer> val, TridentCollector collector) {
            for (String key : map.keySet()) {
                System.out.println("key= "+ key + " and value= " + map.get(key));
            collector.emit(new Values(map));
    public static void main(String[] args) throws Exception{
        TridentTopology topology = new TridentTopology();
        FixedBatchSpout spout = new FixedBatchSpout(new Fields("sentence"), 1,
                new Values("the cow jumped cow jumped jumped"));
        //newStream 方法从输入源中读取数据, 并在 topology 中创建一个新的数据流 batch-spout
                //使用.each()方法,sentence tuple经过split()方法后输出word tuple
                .each(new Fields("sentence"), new Split(), new Fields("word"))
                //使用partitionBy()对word tuple进行分区
                .partitionBy(new Fields("word"))
                //使用partitionAggregate(),对word tuple聚合每个分区,经过WordAggregat(),输出agg字段元组
                .partitionAggregate(new Fields("word"),new WordAggregat(), new Fields("agg"));
        StormTopology stormTopology = topology.build();
        LocalCluster cluster = new LocalCluster();
        Config conf = new Config();
        cluster.submitTopology("soc", conf,stormTopology);
package storm;

import org.apache.storm.Config;
import org.apache.storm.LocalCluster;
import org.apache.storm.StormSubmitter;
import org.apache.storm.generated.StormTopology;
import org.apache.storm.trident.Stream;
import org.apache.storm.trident.TridentState;
import org.apache.storm.trident.TridentTopology;
import org.apache.storm.trident.operation.BaseFunction;
import org.apache.storm.trident.operation.Consumer;
import org.apache.storm.trident.operation.TridentCollector;
import org.apache.storm.trident.operation.builtin.Count;
import org.apache.storm.trident.testing.FixedBatchSpout;
import org.apache.storm.trident.testing.MemoryMapState;
import org.apache.storm.trident.tuple.TridentTuple;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Values;
public class WordCountTrident {
    private static StormTopology buildTopology() {
        FixedBatchSpout spout = new FixedBatchSpout(new Fields("sentence"),1,
                new Values("the cow jumped over the moon"),
                new Values("the man went to the store and bought some candy"),
                new Values("four score and seven years ago"),
                new Values("how many apples can you eat"));
        //首先创建了一个 名为 topology 的TridentTopology 对象
        TridentTopology topology = new TridentTopology();
        //使用.newStream() 方法从上面定义的输入源中读取数据,并在 topology 中创建一个新的数据流 名为spout1
        //使用.each()方法遍历每一个文本行 sentence ,指定使用split()处理,输出字段名为word的tuple元组
        // 不使用.parallelismHint()方法设置并行度
        TridentState wordCounts =
                topology.newStream("spout1", spout)
                        .each(new Fields("sentence"), new Split(), new Fields("word"))
                        .groupBy(new Fields("word"))
                        .persistentAggregate(new MemoryMapState.Factory(), new Count(), new Fields("count"));
                .peek(new Consumer() {
                    public void accept(TridentTuple input) {
        return topology.build();
    public static class Split extends BaseFunction {
        public void execute(TridentTuple tuple, TridentCollector collector) {
            String sentence = tuple.getString(0);
            //根据空格拆分 sentence
            for(String word: sentence.split(" ")) {
                //将拆分出的每个单词作为一个 tuple 输出
                collector.emit(new Values(word));
    public static void main(String[] args) throws Exception {
        Config conf = new Config();
        //创建一个进程内的集群,只需要使用 LocalCluster 类
        //使用 LocalCluster 对象的 submitTopology 方法提交topologies(拓扑)
        //以 topology 的名称, topology 的配置和 topology 本身作为参数输入
        LocalCluster cluster = new LocalCluster();
        cluster.submitTopology("app0", conf, buildTopology());




package storm;

import org.apache.storm.Config;
import org.apache.storm.LocalCluster;
import org.apache.storm.StormSubmitter;
import org.apache.storm.generated.StormTopology;
import org.apache.storm.spout.SpoutOutputCollector;
import org.apache.storm.task.OutputCollector;
import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.topology.TopologyBuilder;
import org.apache.storm.topology.base.BaseRichBolt;
import org.apache.storm.topology.base.BaseRichSpout;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Tuple;
import org.apache.storm.tuple.Values;
import org.apache.storm.utils.Utils;

import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.PrintStream;
import java.util.HashMap;
import java.util.Map;
import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;

public class WordCountTopology {
    public static void main(String[] args) throws Exception {
        //throws Exception捕获异常声明
        TopologyBuilder builder = new TopologyBuilder();
        builder.setSpout("wordcount_spout", new WordCountSpout());
        builder.setBolt("wordcount_split_bolt", new WordCountSplitBolt()).shuffleGrouping("wordcount_spout");
        builder.setBolt("wordcount_count_bolt", new WordCountTotalBolt()).fieldsGrouping("wordcount_split_bolt",new Fields("word"));
        StormTopology wc = builder.createTopology();
        Config conf = new Config();
        LocalCluster localCluster = new LocalCluster();
        localCluster.submitTopology("MyStormWordCount", conf, wc);
//        使用 StormSubmitter 将 topology 提交到集群. StormSubmitter 以 topology 的名称, topology 的配置和 topology 本身作为输入
//         StormSubmitter.submitTopology(args[0], conf, wc);
    public static class WordCountSpout extends BaseRichSpout {
        // 定义spout的输出流
        private SpoutOutputCollector collector;
        private BufferedReader reader;

        public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) {
            // 初始化spout组件时调用
            this.collector = collector;
            try {
                // 读取本地文件
                FileReader fileReader = new FileReader("/home/lbw/workspace/storm/src/storm/1.txt");
                this.reader = new BufferedReader(fileReader);
            } catch (FileNotFoundException e) {

        public void nextTuple() {
            try {
                // 读取文件中的一行数据
                String line = reader.readLine();
                if (line != null) {
                    // 将读取到的行数据发送给下一个组件进行处理
                    this.collector.emit(new Values(line));
                } else {
                    // 文件已读取完毕,等待一段时间后重复读取
            } catch (IOException e) {

        public void declareOutputFields(OutputFieldsDeclarer declarer) {
            // 声明输出的Tuple的格式
            declarer.declare(new Fields("sentence"));

        public void close() {
            // 在spout组件关闭时关闭文件读取器
            try {
            } catch (IOException e) {
    public static class WordCountSplitBolt extends BaseRichBolt {
        private OutputCollector collector;
        public void execute(Tuple tuple) {
            // 处理上一个组件发来的数据
            String str = tuple.getStringByField("sentence");
            str = str.trim();
            //replace(char oldChar,char newChar);
            //   oldChar:要替换的子字符串或者字符。
            //   newChar:新的字符串或字符,用于替换原有字符串的内容。
            str = str.replace(",", " ");
            str = str.replace(".", " ");
            str = str.trim();
            // 分词操作
            String[] words = str.split(" ");
            // 将处理好的(word,1)形式的数据发送给下一个组件

            for (String w : words) {
                this.collector.emit(new Values(w, 1));
        public void prepare(Map arg0, TopologyContext arg1, OutputCollector collector) {
        // 初始化时调用
        // OutputCollector代表的就是这个bolt组件的输出流
            this.collector = collector;
        public void declareOutputFields(OutputFieldsDeclarer declarer) {
            // 声明这个Bolt组件输出Tuple的格式
            declarer.declare(new Fields("word", "count"));
    public static class WordCountTotalBolt extends BaseRichBolt {
        private OutputCollector collector;
        private Map<String, Integer> result = new HashMap<>();

        public void execute(Tuple tuple) {
            String word = tuple.getStringByField("word");
            int count = tuple.getIntegerByField("count");

            if (result.containsKey(word)) {
                int total = result.get(word);
                result.put(word, total + count);
            } else {
                result.put(word, count);

            System.out.println("result" + result);

            try (PrintStream ps = new PrintStream(new FileOutputStream("a.txt", true))) {
                ps.println("word: " + word + ", count: " + result.get(word));
            } catch (FileNotFoundException e) {

            this.collector.emit(new Values(word, result.get(word)));

        public void prepare(Map arg0, TopologyContext arg1, OutputCollector collector) {
            this.collector = collector;

        public void declareOutputFields(OutputFieldsDeclarer declarer) {
            declarer.declare(new Fields("word", "count"));




