利用storm实现简单的单词统计
添加依赖pom.xml
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<artifactId>storm-wordcount</artifactId>
<packaging>jar</packaging>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
<java.version>1.8</java.version>
</properties>
<dependencies>
<dependency>
<groupId>org.apache.storm</groupId>
<artifactId>storm-core</artifactId>
<version>1.1.1</version>
<!--<scope>provided</scope>-->
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.12</version>
<scope>test</scope>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<artifactId>maven-assembly-plugin</artifactId>
<configuration>
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>
<archive>
<manifest>
<mainClass>cn.ljh.storm.helloworld.ExclamationTopology</mainClass>
</manifest>
</archive>
</configuration>
</plugin>
</plugins>
</build>
</project>
创建SentenceSpout.java
package com.storm.demo;
import org.apache.storm.spout.SpoutOutputCollector;
import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.topology.base.BaseRichSpout;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Values;
import java.util.Map;
/**
* 路径:com.storm.demo
* 类名:
* 功能:《用一句话描述一下》
* 备注:
* 创建人:typ
* 创建时间:2018/7/24 21:44
* 修改人:
* 修改备注:
* 修改时间:
*/
public class SentenceSpout extends BaseRichSpout {
private SpoutOutputCollector collector;
//初始化方法
public void open(Map map, TopologyContext topologyContext, SpoutOutputCollector spoutOutputCollector) {
this.collector = spoutOutputCollector;
}
//storm框架在while(true) 调用nextTuple
public void nextTuple() {
collector.emit(new Values("i am love storm"));
}
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields("love"));
}
}
创建SplitSentenceBolt.java
package com.storm.demo;
import org.apache.storm.task.OutputCollector;
import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.topology.base.BaseRichBolt;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Tuple;
import org.apache.storm.tuple.Values;
import java.util.Map;
/**
* 路径:com.storm.demo
* 类名:
* 功能:《用一句话描述一下》
* 备注:
* 创建人:typ
* 创建时间:2018/7/24 21:48
* 修改人:
* 修改备注:
* 修改时间:
*/
public class SplitSentenceBolt extends BaseRichBolt {
private OutputCollector collector;
//初始化方法
public void prepare(Map map, TopologyContext topologyContext, OutputCollector outputCollector) {
this.collector = outputCollector;
}
//被storm框架while(true) 循环调用 传入参数tuple
public void execute(Tuple tuple) {
String line = tuple.getString(0);
String[] words = line.split(" ");
for(String word : words){
this.collector.emit(new Values(word,1));
}
}
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields("word","count"));
}
}
创建WordCountBolt.java
package com.storm.demo;
import org.apache.storm.task.OutputCollector;
import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.topology.base.BaseRichBolt;
import org.apache.storm.tuple.Tuple;
import java.util.HashMap;
import java.util.Map;
/**
* 路径:com.storm.demo
* 类名:
* 功能:《用一句话描述一下》
* 备注:
* 创建人:typ
* 创建时间:2018/7/24 21:52
* 修改人:
* 修改备注:
* 修改时间:
*/
public class WordCountBolt extends BaseRichBolt {
private OutputCollector collector;
private HashMap<String,Integer> map = new HashMap<String, Integer>();
public void prepare(Map map, TopologyContext topologyContext, OutputCollector outputCollector) {
this.collector = outputCollector;
}
public void execute(Tuple tuple) {
String word = tuple.getString(0);
Integer num = tuple.getInteger(1);
if(map.containsKey(word)){
Integer count = map.get(word);
map.put(word,count+num);
}else{
map.put(word,1);
}
System.out.println("count"+map);
}
public void declareOutputFields(OutputFieldsDeclarer declarer) {
// declarer.declare(new Fields("word","count"));
}
}
创建WordCountTopology.java
package com.storm.demo;
import org.apache.storm.Config;
import org.apache.storm.LocalCluster;
import org.apache.storm.topology.TopologyBuilder;
import org.apache.storm.tuple.Fields;
/**
* 路径:com.storm.demo
* 类名:
* 功能:《用一句话描述一下》
* 备注:
* 创建人:typ
* 创建时间:2018/7/24 22:02
* 修改人:
* 修改备注:
* 修改时间:
*/
public class WordCountTopology {
public static void main(String[] args) throws Exception {
//定义一个TopologyBuilder
TopologyBuilder builder = new TopologyBuilder();
builder.setSpout("spout",new SentenceSpout(),1);
builder.setBolt("myBolt1",new SplitSentenceBolt(),10).shuffleGrouping("spout");
builder.setBolt("myBolt2",new WordCountBolt(),2).fieldsGrouping("myBolt1",new Fields("word"));
//创建一个Config,用来指定当前topology需要的worker的数量
Config config = new Config();
config.setNumWorkers(2);
//提交任务的两种模式:本地模式和远程模式
//1.远程模式
// StormSubmitter.submitTopology("mywordcount",config,builder.createTopology());
//2.本地模式
LocalCluster localCluster = new LocalCluster();
localCluster.submitTopology("mywordcount",config,builder.createTopology());
}
}
运行结果: