Trident 基本概念

最新推荐文章于 2024-08-23 15:54:50 发布

@Anges

最新推荐文章于 2024-08-23 15:54:50 发布

阅读量382

点赞数

文章标签： java word

本文链接：https://blog.csdn.net/m0_51550513/article/details/130410669

版权

import org.apache.storm.Config;
import org.apache.storm.LocalCluster;
import org.apache.storm.StormSubmitter;
import org.apache.storm.generated.StormTopology;
import org.apache.storm.trident.Stream;
import org.apache.storm.trident.TridentState;
import org.apache.storm.trident.TridentTopology;
import org.apache.storm.trident.operation.BaseFunction;
import org.apache.storm.trident.operation.Consumer;
import org.apache.storm.trident.operation.TridentCollector;
import org.apache.storm.trident.operation.builtin.Count;
import org.apache.storm.trident.testing.FixedBatchSpout;
import org.apache.storm.trident.testing.MemoryMapState;
import org.apache.storm.trident.tuple.TridentTuple;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Values;

public class WordCountTrident {
    //构建topology
    private static StormTopology buildTopology() {
        //spout源
        FixedBatchSpout spout = new FixedBatchSpout(new Fields("sentence"),1,
                new Values("the cow jumped over the moon"),
                new Values("the man went to the store and bought some candy"),
                new Values("four score and seven years ago"),
                new Values("how many apples can you eat"));
        //不循环发射数据
        spout.setCycle(false);

        //****请根据提示补全StormTopology程序的创建过程****//

        /*********begin*********/

        //首先创建了一个 名为 topology 的TridentTopology 对象
        TridentTopology topology = new TridentTopology();

        //使用.newStream() 方法从上面定义的输入源中读取数据，并在 topology 中创建一个新的数据流 名为spout1
        //使用.each()方法遍历每一个文本行 sentence ，指定使用split()处理，输出字段名为word的tuple元组
        //使用.groupby()方法对字段名为word的tuple元组进行分组
        //使用.persistentAggregate()方法，指定使用count()方法对word进行统计并保存结果到内存中。
        // 不使用.parallelismHint()方法设置并行度
        TridentState wordCounts =
            topology.newStream("spout1", spout)
                .each(new Fields("sentence"), new Split(), new Fields("word"))
                .groupBy(new Fields("word"))
                .persistentAggregate(new MemoryMapState.Factory(), new Count(), new Fields("count"));
                

        /*********end**********/

        //打印count后结果
        wordCounts.newValuesStream()
                .peek(new Consumer() {
                    @Override
                    public void accept(TridentTuple input) {
                        System.out.println(input);
                    }
                });

        return topology.build();
    }
    public static class Split extends BaseFunction {
        public void execute(TridentTuple tuple, TridentCollector collector) {
            String sentence = tuple.getString(0);
            //根据空格拆分 sentence
            for(String word: sentence.split(" ")) {
                //将拆分出的每个单词作为一个 tuple 输出
                collector.emit(new Values(word));
            }
        }
    }
    public static void main(String[] args) throws Exception {
        Config conf = new Config();
        //本地模式
        //创建一个进程内的集群，只需要使用 LocalCluster 类
        //使用 LocalCluster 对象的 submitTopology 方法提交topologies（拓扑）
        //以 topology 的名称, topology 的配置和 topology 本身作为参数输入
        LocalCluster cluster = new LocalCluster();
        cluster.submitTopology("app0", conf, buildTopology());

    }
}