7、Storm案例之WordCount
- pom.xml
- WordCountTopology.java
- WordCountSpout.java
- LineBolt.java
- CountBolt.java
pom.xml
-
<properties> <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> <maven.compiler.source>1.8</maven.compiler.source> <maven.compiler.target>1.8</maven.compiler.target> </properties> <dependencies> <dependency> <groupId>junit</groupId> <artifactId>junit</artifactId> <version>4.12</version> <scope>test</scope> </dependency> <!-- https://mvnrepository.com/artifact/org.apache.storm/storm-core --> <dependency> <groupId>org.apache.storm</groupId> <artifactId>storm-core</artifactId> <version>1.2.3</version> </dependency> </dependencies>
WordCountTopology.java
-
package com.yjx; import org.apache.storm.Config; import org.apache.storm.LocalCluster; import org.apache.storm.generated.StormTopology; import org.apache.storm.topology.IBasicBolt; import org.apache.storm.topology.TopologyBuilder; public class WordCountTopology { public static void main(String[] args) { //创建Topology的构建器 TopologyBuilder topologyBuilder = new TopologyBuilder(); //开始构建整个流程(Spout) topologyBuilder.setSpout("WordCountSpout", new WordCountSpout()); //开始构建整个流程(Bolt) topologyBuilder.setBolt("LineBolt", new LineBolt()).shuffleGrouping("WordCountSpout"); topologyBuilder.setBolt("CountBolt", new CountBolt()).shuffleGrouping("LineBolt"); //启动Topology Config conf = new Config(); conf.put("wordsFile","D:\\ha\\test.txt"); //创建一个topology StormTopology topology = topologyBuilder.createTopology(); //本地模式启动集群 LocalCluster localCluster = new LocalCluster(); localCluster.submitTopology("WordCountSpout", conf, topology); } }
WordCountSpout.java
-
package com.yjx; import org.apache.storm.spout.SpoutOutputCollector; import org.apache.storm.task.TopologyContext; import org.apache.storm.topology.OutputFieldsDeclarer; import org.apache.storm.topology.base.BaseRichSpout; import org.apache.storm.tuple.Fields; import org.apache.storm.tuple.Values; import java.io.BufferedReader; import java.io.FileNotFoundException; import java.io.FileReader; import java.io.IOException; import java.util.Map; public class WordCountSpout extends BaseRichSpout { //声明一个SpoutOutputCollector对象,用于发送数据 private SpoutOutputCollector collector; private int count=0; //判断停止条件 private boolean completed=false; private FileReader fileReader; @Override public void open(Map map, TopologyContext topologyContext, SpoutOutputCollector spoutOutputCollector) { try { //获取创建Topology时指定的要读取的文件路径 this.fileReader = new FileReader("D:\\ha\\test.txt"); } catch (FileNotFoundException e) { throw new RuntimeException("Error reading file ["+ "wordFile"+ "]"); } //获取初始化时的发送器 this.collector = spoutOutputCollector; } @Override public void nextTuple() { if (completed) { try { Thread.sleep(1000); } catch (InterruptedException e) { // Do nothing e.printStackTrace(); } return; } String str; BufferedReader reader = new BufferedReader(fileReader); try { //将数据发送下一个Bolt //this.collector.emit(new Values(lines[(int) (Math.random() * lines.length)])); while ((str = reader.readLine()) != null) { //发射每一行,Values是一个ArrayList的实现 this.collector.emit(new Values(str)); System.out.println(" WordCountSpout = [" + Thread.currentThread().getId()+"]"); } //限制传输速度 Thread.sleep(1000); } catch (Exception e) { throw new RuntimeException("Error reading tuple", e); } finally { try { if(reader.readLine() == null) { completed = true; reader.close(); } } catch (IOException e) { e.printStackTrace(); } } } @Override public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer) { outputFieldsDeclarer.declare(new Fields("line")); } }
LineBolt.java
-
package com.yjx; import org.apache.storm.task.OutputCollector; import org.apache.storm.task.TopologyContext; import org.apache.storm.topology.OutputFieldsDeclarer; import org.apache.storm.topology.base.BaseRichBolt; import org.apache.storm.tuple.Fields; import org.apache.storm.tuple.Tuple; import org.apache.storm.tuple.Values; import java.util.Map; public class LineBolt extends BaseRichBolt { private OutputCollector collector; @Override public void prepare(Map map, TopologyContext topologyContext, OutputCollector outputCollector) { this.collector = outputCollector; } @Override public void execute(Tuple tuple) { String line = tuple.getStringByField("line"); String[] words = line.split("[\\W,.?!_—:`\"\\s]+"); System.out.println("LineBolt.execute[" + words + "][" + Thread.currentThread().getId() + "]"); for (String word : words) { word = word.trim(); if (!word.isEmpty()) { word = word.toLowerCase(); collector.emit(new Values(word)); } } } @Override public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer) { outputFieldsDeclarer.declare(new Fields("word")); } }
CountBolt.java
-
package com.yjx; import org.apache.storm.task.OutputCollector; import org.apache.storm.task.TopologyContext; import org.apache.storm.topology.IBasicBolt; import org.apache.storm.topology.OutputFieldsDeclarer; import org.apache.storm.topology.base.BaseRichBolt; import org.apache.storm.tuple.Fields; import org.apache.storm.tuple.Tuple; import org.apache.storm.tuple.Values; import java.util.HashMap; import java.util.Map; public class CountBolt extends BaseRichBolt { private OutputCollector collector; //定义一个Task全局的对象存放数据 private Map<String, Integer> map = new HashMap<>(); @Override public void prepare(Map map, TopologyContext topologyContext, OutputCollector outputCollector) { this.collector =outputCollector; } @Override public void execute(Tuple tuple) { //声明单词的数量 int count = 1; //获取单词 String word = tuple.getStringByField("word"); //判断是累加还是新增 if (map.containsKey(word)) { count = map.get(word) + 1; } map.put(word, count); System.out.println("WordBolt.execute[" + word + "][" + count + "]【" + Thread.currentThread().getId() + "】"); } @Override public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer) { } }