Storm案例之WordCount

7、Storm案例之WordCount

  • pom.xml
  • WordCountTopology.java
  • WordCountSpout.java
  • LineBolt.java
  • CountBolt.java
pom.xml
  • <properties>
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
        <maven.compiler.source>1.8</maven.compiler.source>
        <maven.compiler.target>1.8</maven.compiler.target>
      </properties>
    
      <dependencies>
        <dependency>
          <groupId>junit</groupId>
          <artifactId>junit</artifactId>
          <version>4.12</version>
          <scope>test</scope>
        </dependency>
        <!-- https://mvnrepository.com/artifact/org.apache.storm/storm-core -->
        <dependency>
          <groupId>org.apache.storm</groupId>
          <artifactId>storm-core</artifactId>
          <version>1.2.3</version>
        </dependency>
      </dependencies>
    
WordCountTopology.java
  • package com.yjx;
    
    import org.apache.storm.Config;
    import org.apache.storm.LocalCluster;
    import org.apache.storm.generated.StormTopology;
    import org.apache.storm.topology.IBasicBolt;
    import org.apache.storm.topology.TopologyBuilder;
    
    public class WordCountTopology {
        public static void main(String[] args) {
            //创建Topology的构建器
            TopologyBuilder topologyBuilder = new TopologyBuilder();
            //开始构建整个流程(Spout)
            topologyBuilder.setSpout("WordCountSpout", new WordCountSpout());
            //开始构建整个流程(Bolt)
            topologyBuilder.setBolt("LineBolt", new LineBolt()).shuffleGrouping("WordCountSpout");
            topologyBuilder.setBolt("CountBolt", new CountBolt()).shuffleGrouping("LineBolt");
            //启动Topology
            Config conf = new Config();
            conf.put("wordsFile","D:\\ha\\test.txt");
            //创建一个topology
            StormTopology topology = topologyBuilder.createTopology();
            //本地模式启动集群
            LocalCluster localCluster = new LocalCluster();
            localCluster.submitTopology("WordCountSpout", conf, topology);
    
        }
    }
    
WordCountSpout.java
  • package com.yjx;
    
    import org.apache.storm.spout.SpoutOutputCollector;
    import org.apache.storm.task.TopologyContext;
    import org.apache.storm.topology.OutputFieldsDeclarer;
    import org.apache.storm.topology.base.BaseRichSpout;
    import org.apache.storm.tuple.Fields;
    import org.apache.storm.tuple.Values;
    
    import java.io.BufferedReader;
    import java.io.FileNotFoundException;
    import java.io.FileReader;
    import java.io.IOException;
    import java.util.Map;
    
    public class WordCountSpout extends BaseRichSpout {
        //声明一个SpoutOutputCollector对象,用于发送数据
        private SpoutOutputCollector collector;
    
        private int count=0;
    
        //判断停止条件
        private boolean completed=false;
        private FileReader fileReader;
    
    
        @Override
        public void open(Map map, TopologyContext topologyContext, SpoutOutputCollector spoutOutputCollector) {
            try {
               //获取创建Topology时指定的要读取的文件路径
               this.fileReader = new FileReader("D:\\ha\\test.txt");
            } catch (FileNotFoundException e) {
               throw new RuntimeException("Error reading file ["+ "wordFile"+ "]");
            }
            //获取初始化时的发送器
            this.collector = spoutOutputCollector;
        }
    
        @Override
        public void nextTuple() {
            if (completed) {
                try {
                    Thread.sleep(1000);
                } catch (InterruptedException e) {
                    // Do nothing
                    e.printStackTrace();
                }
                return;
            }
    
            String str;
            BufferedReader reader = new BufferedReader(fileReader);
    
            try {
                //将数据发送下一个Bolt
                //this.collector.emit(new Values(lines[(int) (Math.random() * lines.length)]));
                while ((str = reader.readLine()) != null) {
                    //发射每一行,Values是一个ArrayList的实现
                    this.collector.emit(new Values(str));
                    System.out.println(" WordCountSpout = [" + Thread.currentThread().getId()+"]");
                }
    
    
                //限制传输速度
                Thread.sleep(1000);
    
            } catch (Exception e) {
                throw new RuntimeException("Error reading tuple", e);
            } finally {
                try {
                    if(reader.readLine() == null) {
                        completed = true;
                        reader.close();
                    }
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
        }
    
        @Override
        public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer) {
            outputFieldsDeclarer.declare(new Fields("line"));
        }
    }
    
LineBolt.java
  • package com.yjx;
    
    import org.apache.storm.task.OutputCollector;
    import org.apache.storm.task.TopologyContext;
    import org.apache.storm.topology.OutputFieldsDeclarer;
    import org.apache.storm.topology.base.BaseRichBolt;
    import org.apache.storm.tuple.Fields;
    import org.apache.storm.tuple.Tuple;
    import org.apache.storm.tuple.Values;
    
    import java.util.Map;
    
    public class LineBolt extends BaseRichBolt {
    
        private OutputCollector collector;
    
        @Override
        public void prepare(Map map, TopologyContext topologyContext, OutputCollector outputCollector) {
            this.collector = outputCollector;
        }
    
        @Override
        public void execute(Tuple tuple) {
            String line = tuple.getStringByField("line");
            String[] words = line.split("[\\W,.?!_—:`\"\\s]+");
    
            System.out.println("LineBolt.execute[" + words + "][" + Thread.currentThread().getId() + "]");
    
            for (String word : words) {
                word = word.trim();
                if (!word.isEmpty()) {
                    word = word.toLowerCase();
                    collector.emit(new Values(word));
                }
            }
        }
    
        @Override
        public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer) {
            outputFieldsDeclarer.declare(new Fields("word"));
        }
    }
    
    
CountBolt.java
  • package com.yjx;
    
    import org.apache.storm.task.OutputCollector;
    import org.apache.storm.task.TopologyContext;
    import org.apache.storm.topology.IBasicBolt;
    import org.apache.storm.topology.OutputFieldsDeclarer;
    import org.apache.storm.topology.base.BaseRichBolt;
    import org.apache.storm.tuple.Fields;
    import org.apache.storm.tuple.Tuple;
    import org.apache.storm.tuple.Values;
    
    import java.util.HashMap;
    import java.util.Map;
    
    public class CountBolt extends BaseRichBolt {
    
        private OutputCollector collector;
    
        //定义一个Task全局的对象存放数据
        private Map<String, Integer> map = new HashMap<>();
    
        @Override
        public void prepare(Map map, TopologyContext topologyContext, OutputCollector outputCollector) {
            this.collector =outputCollector;
        }
    
        @Override
        public void execute(Tuple tuple) {
            //声明单词的数量
            int count = 1;
            //获取单词
            String word = tuple.getStringByField("word");
            //判断是累加还是新增
            if (map.containsKey(word)) {
                count = map.get(word) + 1;
            }
    
            map.put(word, count);
            System.out.println("WordBolt.execute[" + word + "][" + count + "]【" + Thread.currentThread().getId() + "】");
        }
    
        @Override
        public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer) {
    
        }
    
    
    }
    
  • 0
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值