一个storm的完整例子——WordCount

import backtype.storm.spout.SpoutOutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.base.BaseRichSpout;
import backtype.storm.utils.Utils;

import backtype.storm.Config;
import backtype.storm.LocalCluster;
import backtype.storm.StormSubmitter;
import backtype.storm.task.ShellBolt;

import backtype.storm.topology.BasicOutputCollector;
import backtype.storm.topology.IRichBolt;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.topology.TopologyBuilder;
import backtype.storm.topology.base.BaseBasicBolt;

import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Tuple;
import backtype.storm.tuple.Values;

import java.util.*;
//import java.util.HashMap;
//import java.util.Map;
//import java.util.Random;
//import java.util.StringTokenizer;

/*
** WordCountTopolopgyAllInJava类(单词计数)
*/
public class  WordCountTopolopgyAllInJava{
	
	// 定义一个喷头,用于产生数据。该类继承自BaseRichSpout
	public static class RandomSentenceSpout extends BaseRichSpout {
		SpoutOutputCollector _collector;
		Random _rand;
		
		@Override
		public void open(Map conf, TopologyContext context, SpoutOutputCollector collector){
			_collector = collector;
			_rand = new Random();
		}
		
		@Override
		public void nextTuple(){
			
			// 睡眠一段时间后再产生一个数据
			Utils.sleep(100);
			
			// 句子数组
			String[] sentences = new String[]{ "the cow jumped over the moon", "an apple a day keeps the doctor away",
				"four score and seven years ago", "snow white and the seven dwarfs", "i am at two with nature" };
			
			// 随机选择一个句子
			String sentence = sentences[_rand.nextInt(sentences.length)];
			
			// 发射该句子给Bolt
			_collector.emit(new Values(sentence));
		}
		
		// 确认函数
		@Override
		public void ack(Object id){
		}
		
		// 处理失败的时候调用
		@Override
		public void fail(Object id){
		}
		
		@Override
		public void declareOutputFields(OutputFieldsDeclarer declarer){
			// 定义一个字段word
			declarer.declare(new Fields("word"));
		}
	}
	
	// 定义个Bolt,用于将句子切分为单词
	public static class SplitSentence extends BaseBasicBolt{
		@Override
		public void execute(Tuple tuple, BasicOutputCollector collector){
			// 接收到一个句子
			String sentence = tuple.getString(0);
			// 把句子切割为单词
			StringTokenizer iter = new StringTokenizer(sentence);
			// 发送每一个单词
			while(iter.hasMoreElements()){
				collector.emit(new Values(iter.nextToken()));
			}
		}
		
		@Override
		public void declareOutputFields(OutputFieldsDeclarer declarer){
			// 定义一个字段
			declarer.declare(new Fields("word"));
		}
	}
	
	// 定义一个Bolt,用于单词计数
	public static class WordCount extends BaseBasicBolt {
		Map<String, Integer> counts = new HashMap<String, Integer>();
		
		@Override
		public void execute(Tuple tuple, BasicOutputCollector collector){
			// 接收一个单词
			String word = tuple.getString(0);
			// 获取该单词对应的计数
			Integer count = counts.get(word);
			if(count == null)
				count = 0;
			// 计数增加
			count++;
			// 将单词和对应的计数加入map中
			counts.put(word,count);
			System.out.println("hello word!");
			System.out.println(word +"	"+count);
			// 发送单词和计数(分别对应字段word和count)
			collector.emit(new Values(word, count));
		}
		
		@Override
		public void declareOutputFields(OutputFieldsDeclarer declarer){
			// 定义两个字段word和count
			declarer.declare(new Fields("word","count"));
		}
	}
	public static void main(String[] args) throws Exception 
	{
		// 创建一个拓扑
		TopologyBuilder builder = new TopologyBuilder();
		// 设置Spout,这个Spout的名字叫做"Spout",设置并行度为5
		builder.setSpout("Spout", new RandomSentenceSpout(), 5);
		// 设置slot——“split”,并行度为8,它的数据来源是spout的
		builder.setBolt("split", new SplitSentence(), 8).shuffleGrouping("spout");
		// 设置slot——“count”,你并行度为12,它的数据来源是split的word字段
		builder.setBolt("count", new WordCount(), 12).fieldsGrouping("split", new Fields("word"));
		
		Config conf = new Config();
		conf.setDebug(false);
		
		//if(args != null && args.length > 0){
		//if(false){
		//	conf.setNumWorkers(3);
		//	StormSubmitter.submitTopology(args[0], conf, builder.createTopology());
		//}else{
			conf.setMaxTaskParallelism(3);
			
			// 本地集群
			LocalCluster cluster = new LocalCluster();
			
			// 提交拓扑(该拓扑的名字叫word-count)
			cluster.submitTopology("word-count", conf, builder.createTopology() );
			
			Thread.sleep(10000);
		//	cluster.shutdown();
		//}
	}
}

使用maven编译该项目: mvn clean package

运行:storm jar word-count-1.0.jar WordCountTopolopgyAllInJava 

结果如下:

hello word!
moon    811
hello word!
an      829
hello word!
apple   829
hello word!
a       829
hello word!
keeps   829
hello word!
day     829
hello word!
score   800
hello word!

pom.xml文件定义如下

<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
  xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
  <modelVersion>4.0.0</modelVersion>

 <groupId>storm-yqj</groupId>
  <artifactId>word-count</artifactId>
  <version>1.0</version>
  <packaging>jar</packaging>

  <name>word-count</name>
  <url>http://maven.apache.org</url>

  <properties>
    <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
  </properties>

  

  <dependencies>
    <dependency>
      <groupId>junit</groupId>
      <artifactId>junit</artifactId>
      <version>3.8.1</version>
      <scope>test</scope>
    </dependency>
    <dependency>
      <groupId>org.testng</groupId>
      <artifactId>testng</artifactId>
      <version>6.8.5</version>
      <scope>test</scope>
    </dependency>
    <dependency>
      <groupId>org.mockito</groupId>
      <artifactId>mockito-all</artifactId>
      <version>1.9.0</version>
      <scope>test</scope>
    </dependency>
    <dependency>
      <groupId>org.easytesting</groupId>
      <artifactId>fest-assert-core</artifactId>
      <version>2.0M8</version>
      <scope>test</scope>
    </dependency>
    <dependency>
      <groupId>org.jmock</groupId>
      <artifactId>jmock</artifactId>
      <version>2.6.0</version>
      <scope>test</scope>
    </dependency>
    <dependency>
      <groupId>org.apache.storm</groupId>
      <artifactId>storm-core</artifactId>
      <version>0.9.1-incubating</version>
    </dependency>


    <dependency>
      <groupId>commons-collections</groupId>
      <artifactId>commons-collections</artifactId>
      <version>3.2.1</version>
    </dependency>
    <dependency>
      <groupId>com.google.guava</groupId>
      <artifactId>guava</artifactId>
      <version>15.0</version>
    </dependency>
  </dependencies>

  <build>
    <resources>
      <resource>
        <directory>${basedir}/multilang</directory>
      </resource>
    </resources>

    <plugins>
      <plugin>
        <artifactId>maven-assembly-plugin</artifactId>
        <configuration>
          <descriptorRefs>
            <descriptorRef>jar-with-dependencies</descriptorRef>
          </descriptorRefs>
          <archive>
            <manifest>
              <mainClass></mainClass>
            </manifest>
          </archive>
        </configuration>
        <executions>
          <execution>
            <id>make-assembly</id>
            <phase>package</phase>
            <goals>
              <goal>single</goal>
            </goals>
          </execution>
        </executions>
      </plugin>

      <plugin>
        <groupId>com.theoryinpractise</groupId>
        <artifactId>clojure-maven-plugin</artifactId>
        <version>1.3.12</version>
        <extensions>true</extensions>
        <configuration>
          <sourceDirectories>
            <sourceDirectory>src/clj</sourceDirectory>
          </sourceDirectories>
        </configuration>
        <executions>
          <execution>
            <id>compile</id>
            <phase>compile</phase>
            <goals>
              <goal>compile</goal>
            </goals>
          </execution>
          <execution>
            <id>test</id>
            <phase>test</phase>
            <goals>
              <goal>test</goal>
            </goals>
          </execution>
        </executions>
      </plugin>

      <plugin>
        <groupId>org.codehaus.mojo</groupId>
        <artifactId>exec-maven-plugin</artifactId>
        <version>1.2.1</version>
        <executions>
          <execution>
            <goals>
              <goal>exec</goal>
            </goals>
          </execution>
        </executions>
        <configuration>
          <executable>java</executable>
          <includeProjectDependencies>true</includeProjectDependencies>
          <includePluginDependencies>false</includePluginDependencies>
          <classpathScope>compile</classpathScope>
          <mainClass>${storm.topology}</mainClass>
        </configuration>
      </plugin>

      <plugin>
        <groupId>org.apache.maven.plugins</groupId>
        <artifactId>maven-compiler-plugin</artifactId>
        <configuration>
          <source>1.6</source>
          <target>1.6</target>
        </configuration>
      </plugin>

    </plugins>
  </build>
</project>



  • 3
    点赞
  • 11
    收藏
    觉得还不错? 一键收藏
  • 7
    评论
Mapreduce实例-WordCount一个经典的MapReduce程序,用于统计文本中每个单词出现的次数。它的工作原理是将输入的文本划分为多个片段,每个片段由多个键值对组成,其中键是单词,值是1。然后通过Map阶段将每个片段中的单词提取出来,并将每个单词映射为键值对,其中键是单词,值是1。接下来,通过Shuffle和Sort阶段将具有相同单词的键值对聚集在一起。最后,通过Reduce阶段将相同单词的计数值进行累加,得到每个单词的总次数。 以下是一个示例代码片段,展示了WordCount程序的基本结构和关键组件: ```java import java.io.IOException; import java.util.StringTokenizer; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; public class WordCount { public static class TokenizerMapper extends Mapper<Object, Text, Text, IntWritable>{ private final static IntWritable one = new IntWritable(1); private Text word = new Text(); public void map(Object key, Text value, Context context) throws IOException, InterruptedException { StringTokenizer itr = new StringTokenizer(value.toString()); while (itr.hasMoreTokens()) { word.set(itr.nextToken()); context.write(word, one); } } } public static class IntSumReducer extends Reducer<Text,IntWritable,Text,IntWritable> { private IntWritable result = new IntWritable(); public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException { int sum = 0; for (IntWritable val : values) { sum += val.get(); } result.set(sum); context.write(key, result); } } public static void main(String[] args) throws Exception { Job job = Job.getInstance(); job.setJarByClass(WordCount.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); } } ```

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 7
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值