Storm 编程模型

在本地模型下,完成storm编程


    在pom.xml中添加storm的maven依赖
        <dependency>
          <groupId>org.apache.storm</groupId>
          <artifactId>storm-core</artifactId>
          <version>1.1.1</version>
        </dependency>





ISpout
    概述
        核心接口(interface),负责将数据发送到topology中去处理
        Storm会跟踪Spout发出去的tuple的DAG
        ack/fail
        tuple: message id
        ack/fail/nextTuple是在同一个线程中执行的,所以不用考虑线程安全方面

    核心方法
        open: 初始化操作
        close: 资源释放操作
        nextTuple: 发送数据   core api
        ack: tuple处理成功,storm会反馈给spout一个成功消息
        fail:tuple处理失败,storm会发送一个消息给spout,处理失败

    实现类
        public abstract class BaseRichSpout extends BaseComponent implements IRichSpout {
        public interface IRichSpout extends ISpout, IComponent
        DRPCSpout
        ShellSpout



IComponent接口
    概述:
        public interface IComponent extends Serializable
        为topology中所有可能的组件提供公用的方法

        void declareOutputFields(OutputFieldsDeclarer declarer);
        用于声明当前Spout/Bolt发送的tuple的名称
        使用OutputFieldsDeclarer配合使用


    实现类:
    public abstract class BaseComponent implements IComponent



IBolt接口
    概述
        职责:接收tuple处理,并进行相应的处理(filter/join/....)
        hold住tuple再处理
        IBolt会在一个运行的机器上创建,使用Java序列化它,然后提交到主节点(nimbus)上去执行
        nimbus会启动worker来反序列化,调用prepare方法,然后才开始处理tuple处理


    方法
        prepare:初始化
        execute:处理一个tuple暑假,tuple对象中包含了元数据信息
        cleanup:shutdown之前的资源清理操作


    实现类:
        public abstract class BaseRichBolt extends BaseComponent implements IRichBolt {
        public interface IRichBolt extends IBolt, IComponent
        RichShellBolt


求和案例
    需求:1 + 2 + 3 + ....   = ???
    实现方案:
        Spout发送数字作为input
        使用Bolt来处理业务逻辑:求和
        将结果输出到控制台
    拓扑设计: DataSourceSpout  --> SumBolt    



/**
* 使用Storm实现积累求和的操作
*/
public class LocalSumStormTopology {


    /**
     * Spout需要继承BaseRichSpout
     * 数据源需要产生数据并发射
     */
    public static class DataSourceSpout extends BaseRichSpout {

        private SpoutOutputCollector collector;
        /**
         * 初始化方法,只会被调用一次
         * @param conf  配置参数
         * @param context  上下文
         * @param collector 数据发射器
         */
        public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) {
            this.collector = collector;
        }

        int number = 0;

        /**
         * 会产生数据,在生产上肯定是从消息队列中获取数据
         *
         * 这个方法是一个死循环,会一直不停的执行
         */
        public void nextTuple() {
            this.collector.emit(new Values(++number));

            System.out.println("Spout: " + number);

            // 防止数据产生太快
            Utils.sleep(1000);

        }

        /**
         * 声明输出字段
         * @param declarer
         */
        public void declareOutputFields(OutputFieldsDeclarer declarer) {
            declarer.declare(new Fields("num"));
        }
    }


    /**
     * 数据的累积求和Bolt:接收数据并处理
     */
    public static class SumBolt extends BaseRichBolt {

        /**
         * 初始化方法,会被执行一次
         * @param stormConf
         * @param context
         * @param collector
         */
        public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {

        }

        int sum = 0;

        /**
         * 其实也是一个死循环,职责:获取Spout发送过来的数据
         * @param input
         */
        public void execute(Tuple input) {

            // Bolt中获取值可以根据index获取,也可以根据上一个环节中定义的field的名称获取(建议使用该方式)
            Integer value = input.getIntegerByField("num");
            sum += value;

            System.out.println("Bolt: sum = [" + sum + "]");
        }

        public void declareOutputFields(OutputFieldsDeclarer declarer) {

        }
    }


    public static void main(String[] args) {

        // TopologyBuilder根据Spout和Bolt来构建出Topology
        // Storm中任何一个作业都是通过Topology的方式进行提交的
        // Topology中需要指定Spout和Bolt的执行顺序
        TopologyBuilder builder = new TopologyBuilder();
        builder.setSpout("DataSourceSpout", new DataSourceSpout());
        builder.setBolt("SumBolt", new SumBolt()).shuffleGrouping("DataSourceSpout");



        // 创建一个本地Storm集群:本地模式运行,不需要搭建Storm集群
        LocalCluster cluster = new LocalCluster();
        cluster.submitTopology("LocalSumStormTopology", new Config(),
                builder.createTopology());



    }

}


=======================================================





词频统计
    需求:读取指定目录的数据,并实现单词计数功能
    实现方案:
        Spout来读取指定目录的数据,作为后续Bolt处理的input
        使用一个Bolt把input的数据,切割开,我们按照逗号进行分割
        使用一个Bolt来进行最终的单词的次数统计操作
        并输出
    拓扑设计: DataSourceSpout ==> SplitBolt ==> CountBolt    




Storm编程注意事项
3) topology的名称不能重复: local似乎没问题, 等我们到集群测试的时候再来验证这个问题

/**
* 使用Storm完成词频统计功能
*/
public class LocalWordCountStormTopology {

    public static class DataSourceSpout extends BaseRichSpout {
        private SpoutOutputCollector collector;

        public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) {
            this.collector = collector;
        }

        /**
         * 业务:
         * 1) 读取指定目录的文件夹下的数据:/Users/rocky/data/storm/wc
         * 2) 把每一行数据发射出去
         */
        public void nextTuple() {

            // 获取所有文件
            Collection<File> files = FileUtils.listFiles(new File("/Users/rocky/data/storm/wc"),
                    new String[]{"txt"},true);

            for(File file : files) {
                try {
                    // 获取文件中的所有内容
                    List<String> lines = FileUtils.readLines(file);

                    // 获取文件中的每行的内容
                    for(String line : lines) {

                        // 发射出去
                        this.collector.emit(new Values(line));
                    }

                    // TODO... 数据处理完之后,改名,否则一直重复执行
                    FileUtils.moveFile(file, new File(file.getAbsolutePath() + System.currentTimeMillis()));
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }

        }

        public void declareOutputFields(OutputFieldsDeclarer declarer) {
            declarer.declare(new Fields("line"));
        }
    }


    /**
     * 对数据进行分割
     */
    public static class SplitBolt extends BaseRichBolt {

        private OutputCollector collector;

        public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
            this.collector = collector;
        }

        /**
         * 业务逻辑:
         *   line: 对line进行分割,按照逗号
         */
        public void execute(Tuple input) {
            String line = input.getStringByField("line");
            String[] words = line.split(",");

            for(String word : words) {
                this.collector.emit(new Values(word));
            }

        }

        public void declareOutputFields(OutputFieldsDeclarer declarer) {
            declarer.declare(new Fields("word"));
        }
    }


    /**
     * 词频汇总Bolt
     */
    public static class CountBolt extends  BaseRichBolt {

        public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {

        }


        Map<String,Integer> map = new HashMap<String, Integer>();
        /**
         * 业务逻辑:
         * 1)获取每个单词
         * 2)对所有单词进行汇总
         * 3)输出
         */
        public void execute(Tuple input) {
            // 1)获取每个单词
            String word = input.getStringByField("word");
            Integer count = map.get(word);
            if(count == null) {
                count = 0;
            }

            count ++;

            // 2)对所有单词进行汇总
            map.put(word, count);

            // 3)输出
            System.out.println("~~~~~~~~~~~~~~~~~~~~~~");
            Set<Map.Entry<String,Integer>> entrySet = map.entrySet();
            for(Map.Entry<String,Integer> entry : entrySet) {
                System.out.println(entry);
            }

        }

        public void declareOutputFields(OutputFieldsDeclarer declarer) {

        }
    }

    public static void main(String[] args) {

        // 通过TopologyBuilder根据Spout和Bolt构建Topology
        TopologyBuilder builder = new TopologyBuilder();
        builder.setSpout("DataSourceSpout", new DataSourceSpout());
        builder.setBolt("SplitBolt", new SplitBolt()).shuffleGrouping("DataSourceSpout");
        builder.setBolt("CountBolt", new CountBolt()).shuffleGrouping("SplitBolt");

        // 创建本地集群
        LocalCluster cluster = new LocalCluster();
        cluster.submitTopology("LocalWordCountStormTopology",
                new Config(), builder.createTopology());

    }

}


















评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值