import backtype.storm.spout.SpoutOutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.base.BaseRichSpout;
import backtype.storm.utils.Utils;
import backtype.storm.Config;
import backtype.storm.LocalCluster;
import backtype.storm.StormSubmitter;
import backtype.storm.task.ShellBolt;
import backtype.storm.topology.BasicOutputCollector;
import backtype.storm.topology.IRichBolt;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.topology.TopologyBuilder;
import backtype.storm.topology.base.BaseBasicBolt;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Tuple;
import backtype.storm.tuple.Values;
import java.util.*;
//import java.util.HashMap;
//import java.util.Map;
//import java.util.Random;
//import java.util.StringTokenizer;
/*
** WordCountTopolopgyAllInJava类(单词计数)
*/
public class WordCountTopolopgyAllInJava{
// 定义一个喷头,用于产生数据。该类继承自BaseRichSpout
public static class RandomSentenceSpout extends BaseRichSpout {
SpoutOutputCollector _collector;
Random _rand;
@Override
public void open(Map conf, TopologyContext context, SpoutOutputCollector collector){
_collector = collector;
_rand = new Random();
}
@Override
public void nextTuple(){
// 睡眠一段时间后再产生一个数据
Utils.sleep(100);
// 句子数组
String[] sentences = new String[]{ "the cow jumped over the moon", "an apple a day keeps the doctor away",
"four score and seven years ago", "snow white and the seven dwarfs", "i am at two with nature" };
// 随机选择一个句子
String sentence = sentences[_rand.nextInt(sentences.length)];
// 发射该句子给Bolt
_collector.emit(new Values(sentence));
}
// 确认函数
@Override
public void ack(Object id){
}
// 处理失败的时候调用
@Override
public void fail(Object id){
}
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer){
// 定义一个字段word
declarer.declare(new Fields("word"));
}
}
// 定义个Bolt,用于将句子切分为单词
public static class SplitSentence extends BaseBasicBolt{
@Override
public void execute(Tuple tuple, BasicOutputCollector collector){
// 接收到一个句子
String sentence = tuple.getString(0);
// 把句子切割为单词
StringTokenizer iter = new StringTokenizer(sentence);
// 发送每一个单词
while(iter.hasMoreElements()){
collector.emit(new Values(iter.nextToken()));
}
}
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer){
// 定义一个字段
declarer.declare(new Fields("word"));
}
}
// 定义一个Bolt,用于单词计数
public static class WordCount extends BaseBasicBolt {
Map<String, Integer> counts = new HashMap<String, Integer>();
@Override
public void execute(Tuple tuple, BasicOutputCollector collector){
// 接收一个单词
String word = tuple.getString(0);
// 获取该单词对应的计数
Integer count = counts.get(word);
if(count == null)
count = 0;
// 计数增加
count++;
// 将单词和对应的计数加入map中
counts.put(word,count);
System.out.println("hello word!");
System.out.println(word +" "+count);
// 发送单词和计数(分别对应字段word和count)
collector.emit(new Values(word, count));
}
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer){
// 定义两个字段word和count
declarer.declare(new Fields("word","count"));
}
}
public static void main(String[] args) throws Exception
{
// 创建一个拓扑
TopologyBuilder builder = new TopologyBuilder();
// 设置Spout,这个Spout的名字叫做"Spout",设置并行度为5
builder.setSpout("Spout", new RandomSentenceSpout(), 5);
// 设置slot——“split”,并行度为8,它的数据来源是spout的
builder.setBolt("split", new SplitSentence(), 8).shuffleGrouping("spout");
// 设置slot——“count”,你并行度为12,它的数据来源是split的word字段
builder.setBolt("count", new WordCount(), 12).fieldsGrouping("split", new Fields("word"));
Config conf = new Config();
conf.setDebug(false);
//if(args != null && args.length > 0){
//if(false){
// conf.setNumWorkers(3);
// StormSubmitter.submitTopology(args[0], conf, builder.createTopology());
//}else{
conf.setMaxTaskParallelism(3);
// 本地集群
LocalCluster cluster = new LocalCluster();
// 提交拓扑(该拓扑的名字叫word-count)
cluster.submitTopology("word-count", conf, builder.createTopology() );
Thread.sleep(10000);
// cluster.shutdown();
//}
}
}
使用maven编译该项目: mvn clean package
运行:storm jar word-count-1.0.jar WordCountTopolopgyAllInJava
结果如下:
hello word!
moon 811
hello word!
an 829
hello word!
apple 829
hello word!
a 829
hello word!
keeps 829
hello word!
day 829
hello word!
score 800
hello word!
pom.xml文件定义如下
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>storm-yqj</groupId>
<artifactId>word-count</artifactId>
<version>1.0</version>
<packaging>jar</packaging>
<name>word-count</name>
<url>http://maven.apache.org</url>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>
<dependencies>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>3.8.1</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.testng</groupId>
<artifactId>testng</artifactId>
<version>6.8.5</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.mockito</groupId>
<artifactId>mockito-all</artifactId>
<version>1.9.0</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.easytesting</groupId>
<artifactId>fest-assert-core</artifactId>
<version>2.0M8</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.jmock</groupId>
<artifactId>jmock</artifactId>
<version>2.6.0</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.storm</groupId>
<artifactId>storm-core</artifactId>
<version>0.9.1-incubating</version>
</dependency>
<dependency>
<groupId>commons-collections</groupId>
<artifactId>commons-collections</artifactId>
<version>3.2.1</version>
</dependency>
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
<version>15.0</version>
</dependency>
</dependencies>
<build>
<resources>
<resource>
<directory>${basedir}/multilang</directory>
</resource>
</resources>
<plugins>
<plugin>
<artifactId>maven-assembly-plugin</artifactId>
<configuration>
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>
<archive>
<manifest>
<mainClass></mainClass>
</manifest>
</archive>
</configuration>
<executions>
<execution>
<id>make-assembly</id>
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<groupId>com.theoryinpractise</groupId>
<artifactId>clojure-maven-plugin</artifactId>
<version>1.3.12</version>
<extensions>true</extensions>
<configuration>
<sourceDirectories>
<sourceDirectory>src/clj</sourceDirectory>
</sourceDirectories>
</configuration>
<executions>
<execution>
<id>compile</id>
<phase>compile</phase>
<goals>
<goal>compile</goal>
</goals>
</execution>
<execution>
<id>test</id>
<phase>test</phase>
<goals>
<goal>test</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>exec-maven-plugin</artifactId>
<version>1.2.1</version>
<executions>
<execution>
<goals>
<goal>exec</goal>
</goals>
</execution>
</executions>
<configuration>
<executable>java</executable>
<includeProjectDependencies>true</includeProjectDependencies>
<includePluginDependencies>false</includePluginDependencies>
<classpathScope>compile</classpathScope>
<mainClass>${storm.topology}</mainClass>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<source>1.6</source>
<target>1.6</target>
</configuration>
</plugin>
</plugins>
</build>
</project>