storm之wordCount

WordCount spout

package storm.demo;

import org.apache.storm.spout.SpoutOutputCollector;
import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.topology.base.BaseRichSpout;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Values;

import java.util.Map;
import java.util.Random;

public class WordCount extends BaseRichSpout{

    private SpoutOutputCollector collector;
    //自己封装一个数据
    private static final String[] msgs = new String[]{
            "I have a dream",
            "My dream is to be a data analyst",
            "So many people dream about better life",
            "better job, doing more and achieving more",
            "creating new things",
            "keep on rolling the routine",
            "Our brains articulate our visions but it is ONLY our hearts that can make it happen",
            "We all dream"
    };
    private static final Random random = new Random();


    @Override
    public void open(Map map, TopologyContext topologyContext, SpoutOutputCollector spoutOutputCollector) {
        this.collector = spoutOutputCollector;
    }

    @Override
    public void nextTuple() {
        String sentence =msgs[random.nextInt(8)];
        collector.emit(new Values(sentence));
        try {
            Thread.sleep(1000);
        } catch (InterruptedException e) {
            e.printStackTrace();
        }
    }

    @Override
    public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer) {
        //给输出的字段指定索引
        outputFieldsDeclarer.declare(new Fields("sentence"));
    }
}

SplitSentenceBolt

//这个bolt将语句切分
package storm.demo;

import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.BasicOutputCollector;
import org.apache.storm.topology.IBasicBolt;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Tuple;
import org.apache.storm.tuple.Values;

import java.util.Map;

/**
 * 将spout发送过来的数据按照空格切分
 */
public class SplitSentenceBolt implements IBasicBolt{
    @Override
    public void prepare(Map map, TopologyContext topologyContext) {

    }

    @Override
    public void execute(Tuple tuple, BasicOutputCollector basicOutputCollector) {
        //通过索引编号或者索引名来获取数据
        final String sentence = tuple.getString(0);
        for(String word:sentence.split(" ")){
            basicOutputCollector.emit(new Values(word));
        }
    }

    @Override
    public void cleanup() {

    }

    @Override
    public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer) {
        //指定索引名给下一个spolt使用
        outputFieldsDeclarer.declare(new Fields("word"));
    }

    @Override
    public Map<String, Object> getComponentConfiguration() {
        return null;
    }
}

WordCountBolt

//这边bolt统计单词 并将结果写入文件
package storm.demo;

import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.BasicOutputCollector;
import org.apache.storm.topology.IBasicBolt;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Tuple;
import org.apache.storm.tuple.Values;

import java.io.FileWriter;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.UUID;

/**
 * 功能:将统计后的结果输出到存储系统
 */
public class WordCountBolt implements IBasicBolt{

    private Map<String,Integer> counts = new HashMap<String, Integer>();
    private FileWriter fw = null;
    /**
     * Bolt组件的初始化方法,是在bolt实例化时候调用一次
     * @param stormConf
     * @param context
     */
    @Override
    public void prepare(Map stormConf, TopologyContext context) {
        try {
           fw = new FileWriter("/home/os/hadoop/storm/"+ UUID.randomUUID());
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    /**
     * execute 调用规则是:每来一条消息调用一次
     * @param input
     * @param collector
     */
    @Override
    public void execute(Tuple input, BasicOutputCollector collector) {
        //String word = input.getString(0);
        String word = input.getStringByField("word");
        int count;
        if (counts.containsKey(word)){
            count = counts.get(word); //如果集合中包含指定的word 就取出 对应的value
        }else{
            count = 0;
        }
        count++;
        counts.put(word,count);
        //可以将结果写入到磁盘
        try {
            fw.write(word+":"+count+"\n");
            fw.flush();
        } catch (IOException e) {
            e.printStackTrace();
        }
        collector.emit(new Values(word,count));
    }

    @Override
    public void cleanup() {

    }

    @Override
    public void declareOutputFields(OutputFieldsDeclarer declarer) {
        //如果是最后一个bolt可以不用写这个消息字段定义了
        declarer.declare(new Fields("word","count"));
    }

    @Override
    public Map<String, Object> getComponentConfiguration() {
        return null;
    }
}

MyTopology

package storm.demo;

import org.apache.storm.Config;
import org.apache.storm.LocalCluster;
import org.apache.storm.StormSubmitter;
import org.apache.storm.generated.AlreadyAliveException;
import org.apache.storm.generated.AuthorizationException;
import org.apache.storm.generated.InvalidTopologyException;
import org.apache.storm.topology.TopologyBuilder;
import org.apache.storm.tuple.Fields;

public class MyTopology {

    public static void main(String[] args) throws InvalidTopologyException, AuthorizationException, AlreadyAliveException {
        final TopologyBuilder builder = new TopologyBuilder();


        builder.setSpout("WordSpout",new WordCount(),2);
        builder.setBolt("SplitBolt",new SplitSentenceBolt(),10).shuffleGrouping("WordSpout");
        builder.setBolt("WordCountBolt",new WordCountBolt(),20).fieldsGrouping("SplitBolt",new Fields("word"));

        Config config = new Config();
        config.setDebug(true);
        config.setNumWorkers(1);

        if(args!=null && args.length > 0){
            //通过StormSubmitter向storm集群提交
            StormSubmitter.submitTopology(args[0],config,builder.createTopology());
        }else{
            //storm可以通过创建一个localCluster,来进行本地模拟运行测试
            LocalCluster cluster = new LocalCluster();
            //将拓扑提交到集群
            cluster.submitTopology("WordCount",config,builder.createTopology());
        }
    }
}
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值