基于storm的wordCount

  前段时间买了本 storm分布式实时计算模式 看了几天,今天工作之余就想着写个wordCount,扫了几处雷,把代码贴出来、让你们少走几处雷

      1:pom文件

<properties>
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
        <project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
        <storm.version>0.9.4</storm.version>
    </properties>

    <dependencies>
        <dependency>
            <groupId>org.apache.storm</groupId>
            <artifactId>storm-core</artifactId>
            <version>${storm.version}</version>
            <scope>compile</scope>
        </dependency>
    </dependencies>

    <repositories>
        <repository>
            <id>nexus</id>
            <name>local private nexus</name>
            <url>http://maven.oschina.net/content/groups/public/</url>
            <releases>
                <enabled>true</enabled>
            </releases>
            <snapshots>
                <enabled>false</enabled>
            </snapshots>
        </repository>
    </repositories>
    <build>
        <plugins>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-shade-plugin</artifactId>
                <version>2.3</version>
                <executions>
                    <execution>
                        <phase>package</phase>
                        <goals>
                            <goal>shade</goal>
                        </goals>
                        <configuration>
                            <artifactSet>
                                <excludes>
                                    <exclude>junit:junit</exclude>
                                </excludes>
                            </artifactSet>
                        </configuration>
                    </execution>
                </executions>
            </plugin>
        </plugins>
    </build>

2:spout

package com.tengrong.wordCount.batch.spout;

import backtype.storm.spout.SpoutOutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.topology.base.BaseRichSpout;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Values;

import java.util.Map;

/**
 * Created by wangmao on 16/12/6.
 */
public class SentenceSpout extends BaseRichSpout {

    private SpoutOutputCollector collector;
    private String[] sentences = {
            "hello wm", "banyan qBao", "my Word"
    };
    private int index = 0;

    public void declareOutputFields(OutputFieldsDeclarer declarer) {
        declarer.declare(new Fields("sentence"));
    }

    public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) {
        this.collector = collector;
    }

    public void nextTuple() {
        while (true) {
            this.collector.emit(new Values(sentences[index]));
            index++;
            if (index >= sentences.length) {
                index = 0;
            }
            try {
                Thread.sleep(100);
            } catch (Exception e) {
            }
        }
    }
}

3:splitSentence

package com.tengrong.wordCount.batch.bolt;

import backtype.storm.task.OutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.topology.base.BaseRichBolt;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Tuple;
import backtype.storm.tuple.Values;

import java.util.Map;

/**
 * Created by wangmao on 16/12/6.
 */
public class SplitSentenceBolt extends BaseRichBolt {

    private OutputCollector collector;

    public void declareOutputFields(OutputFieldsDeclarer declarer) {
        declarer.declare(new Fields("word"));
    }

    public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
        this.collector=collector;
    }

    public void execute(Tuple input) {
        String sentence=input.getStringByField("sentence");
        String []words=sentence.split(" ");
        for(String word:words){
            this.collector.emit(new Values(word));
        }
    }
}
4:wordCountBolt

package com.tengrong.wordCount.batch.bolt;

import backtype.storm.task.OutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.topology.base.BaseRichBolt;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Tuple;
import backtype.storm.tuple.Values;

import java.util.HashMap;
import java.util.Map;

/**
 * Created by wangmao on 16/12/6.
 */
public class WordCountBolt extends BaseRichBolt {
    private OutputCollector collector;
    private HashMap<String,Long> counts=null;
    public void declareOutputFields(OutputFieldsDeclarer declarer) {
        declarer.declare(new Fields("word","count"));
    }

    public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
        this.collector=collector;
        this.counts=new HashMap<String,Long>();
    }

    public void execute(Tuple input) {
        String word=input.getStringByField("word");
        Long count=this.counts.get(word);
        if(count==null){
            count=0L;
        }
        count++;
        this.counts.put(word,count);
        this.collector.emit(new Values(word,count));
    }
}

5:reportBolt

package com.tengrong.wordCount.batch.bolt;

import backtype.storm.task.OutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.topology.base.BaseRichBolt;
import backtype.storm.tuple.Tuple;

import java.util.*;

/**
 * Created by wangmao on 16/12/6.
 */
public class ReportBolt extends BaseRichBolt {
    private HashMap<String,Long> counts=null;
    public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
        this.counts=new HashMap<String,Long>();
    }

    public void execute(Tuple input) {
        String word=input.getStringByField("word");
        Long count=input.getLongByField("count");
        this.counts.put(word,count);
    }

    @Override
    public void cleanup() {
        System.out.println("----begin-----");
        List<String> keys=new ArrayList<String>();
        keys.addAll(this.counts.keySet());
        Collections.sort(keys);
        for(String key:keys){
            System.out.println(key+":"+this.counts.get(key));
        }
        System.out.println("----end-----");
    }
    public void declareOutputFields(OutputFieldsDeclarer declarer) {

    }
}

6:topology

package com.tengrong.wordCount.batch.topology;

import backtype.storm.Config;
import backtype.storm.LocalCluster;
import backtype.storm.topology.TopologyBuilder;
import backtype.storm.tuple.Fields;
import com.tengrong.wordCount.batch.spout.SentenceSpout;

import com.tengrong.wordCount.batch.bolt.*;
/**
 * Created by wangmao on 16/12/6.
 */
public class WordCountTopology {
    private static final String SENTENCE_SPOUT_ID="sentences-spout";
    private static final String SPLIT_BOLT_ID="split-bolt";
    private static final String COUNT_BOLT_ID="count-bolt";
    private static final String REPORT_BOLT_ID="report-bolt";
    private static final String TOPOLOGY_NAME="word-count-topology";

    public static void main(String args[]) {
        SentenceSpout spout=new SentenceSpout();
        SplitSentenceBolt splitSentenceBolt=new SplitSentenceBolt();
        WordCountBolt wordCountBolt=new WordCountBolt();
        ReportBolt reportBolt=new ReportBolt();
        TopologyBuilder builder=new TopologyBuilder();
        builder.setSpout(SENTENCE_SPOUT_ID,spout);
        builder.setBolt(SPLIT_BOLT_ID,splitSentenceBolt).shuffleGrouping(SENTENCE_SPOUT_ID);
        builder.setBolt(COUNT_BOLT_ID,wordCountBolt).fieldsGrouping(SPLIT_BOLT_ID,new Fields("word"));
        builder.setBolt(REPORT_BOLT_ID,reportBolt).globalGrouping(COUNT_BOLT_ID);
        Config config=new Config();
        LocalCluster cluster=new LocalCluster();
        cluster.submitTopology(TOPOLOGY_NAME,config,builder.createTopology());
        try {
            Thread.sleep(10000);
        }catch (Exception e){

        }
        cluster.killTopology(TOPOLOGY_NAME);
        cluster.shutdown();
    }
}


7:运行topology

maven install 生成jar

java -cp storm-1.0-SNAPSHOT.jar com.tengrong.wordCount.batch.topology.WordCountTopology

----begin-----

Word:25

banyan:26

hello:26

my:25

qBao:26

wm:26

----end-----

源码我的下载里可以下载


1808568908@qq.com,交流学习


  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值