Storm Word Count 内容讲解

public class SentenceSpout extends BaseRichSpout{
	
	private static final long serialVersionUID = 1L;

	/**
	 * This output collector exposes the API for emitting tuples from an {@link org.apache.storm.topology.IRichSpout}.
	 * The main difference between this output collector and {@link OutputCollector}
	 * for {@link org.apache.storm.topology.IRichBolt} is that spouts can tag messages with ids so that they can be
	 * acked or failed later on. This is the Spout portion of Storm's API to
	 * guarantee that each message is fully processed at least once.
	 */
	private SpoutOutputCollector collector;
	//private OutputCollector collector;
	
	//准备测试数据
	private String[] sentences={
			"my dog has fleas",
			"i like cold beverages",
			"the dog ate my homework",
			"don't have a cow man",
			"i don't think i like fleas"};
	
	private int index=0;
	
	/**
	 * private Map<String, StreamInfo> _fields = new HashMap<>();
	 * public void declareStream(String streamId, boolean direct, Fields fields) {
     *   if(_fields.containsKey(streamId)) {
     *       throw new IllegalArgumentException("Fields for " + streamId + " already set");
     *    }
     *   _fields.put(streamId, new StreamInfo(fields.toList(), direct));
     * }
	 */
	
	@Override
	public void declareOutputFields(OutputFieldsDeclarer declarer) {
		declarer.declare(new Fields("sentences"));
	}
	
	
	/**
	 * open方法在ISpout接口中定义,所有Spout组件在初始化时调用这个方法,open()方法接收三个参数
	 * 一个包含了Storm配置信息的map
	 * TopologyContext对象提供了topology中组件的信息
	 * SpoutOutputCollector对象提供了发射tuple的方法
	 */
	public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) {
		this.collector=collector;
	}
	
	/**
	 * 是所有spout实现的核心所在,Storm通过调用这个方法向输出的collector发射tuple
	 */
	public void nextTuple() {
		try {
			Thread.sleep(100);
		} catch (InterruptedException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
		this.collector.emit(new Values(sentences[index]));
		//System.out.println("===============");
		index++;
		if(index>=sentences.length){
			index=0;
		}
	}
}


public class SplitSentenceBolt extends BaseRichBolt{

	private static final long serialVersionUID = 1L;
	private OutputCollector collector;
	
	public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
		this.collector=collector;
	}

	public void execute(Tuple input) {
		String sentence=input.getStringByField("sentences");
		String[] words=sentence.split(" ");
		for(String word :words){
			this.collector.emit(new Values(word));
		}
	}

	public void declareOutputFields(OutputFieldsDeclarer declarer) {
		declarer.declare(new Fields("words"));
	}
}


public class WordCountBolt extends BaseRichBolt{

	private static final long serialVersionUID = 1L;
	
	private OutputCollector collector;
	private HashMap<String,Long> counts=null;
	
	/**
	 * 通常情况下最好是在构造函数中对基本数据类型和可序列化的对象进行赋值和实例化
	 * 在prepare()方法中对不可序列化的对象进行实例化
	 */
	public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
		this.collector=collector;
		this.counts=new HashMap<String,Long>();
	}
	public void execute(Tuple input) {
		String word=input.getStringByField("words");
		Long count=this.counts.get(word);
		if(count==null){
			count=0L;
		}
		count++;
		//出现就添加到map中,word相同的,会覆盖掉 所以最后的word就是准确的数据
		this.counts.put(word,count);
		this.collector.emit(new Values(word,count));
	}
	public void declareOutputFields(OutputFieldsDeclarer declarer) {
		declarer.declare(new Fields("word","count"));
	}	
}


public class ReportBolt extends BaseRichBolt{

	private static final long serialVersionUID = 1L;
	
	private HashMap<String,Long> counts=null;

	public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
		this.counts=new HashMap<String,Long>();
	}

	public void execute(Tuple input) {
		String word=input.getStringByField("word");
		Long count=input.getLongByField("count");
		this.counts.put(word, count);
		
		
		System.out.println("--------FINAL COUNTS--------");
		List<String> keys=new ArrayList<String>();
		keys.addAll(this.counts.keySet());
		Collections.sort(keys);
		for(String key:keys){
			System.out.println(key+":"+this.counts.get(key));
		}
		System.out.println("----------------------------");
		
	}

	public void declareOutputFields(OutputFieldsDeclarer declarer) {
		// this bolt does not emit anything
	}
}

public class WordCountTopology{
	
	private static final String SENTENCE_SPOUT_ID="sentence-sput";
	private static final String SPLIT_BOLT_ID="split-bolt";
	private static final String COUNT_BOLT_ID="count-bolt";
	private static final String REPORT_BOLT_ID="report-bolt";
	private static final String TOPOLOGY_NAME="word-count-topology";
	
	public static void main(String[] args) throws InterruptedException {
		SentenceSpout spout=new SentenceSpout();
		SplitSentenceBolt splitbolt=new SplitSentenceBolt();
		WordCountBolt countbolt=new WordCountBolt();
		ReportBolt reportbolt=new ReportBolt();
		
		TopologyBuilder builder=new TopologyBuilder();
		// 设置并发为2个executor,每个Task指派各自的executor线程
		builder.setSpout(SENTENCE_SPOUT_ID,spout,2);
		// 设置并发为2个executor,每个executor执行2个task
		builder.setBolt(SPLIT_BOLT_ID,splitbolt,2).setNumTasks(4).shuffleGrouping(SENTENCE_SPOUT_ID);
		// 有时候我们需要将特定数据的tuple路由到特殊的bolt实例中,在此我们使用fieldsGrouping
		// 来保证所有"word"字段值相同的tuple会被路由到同一个WordCountBolt实例中
		builder.setBolt(COUNT_BOLT_ID,countbolt,2).fieldsGrouping(SPLIT_BOLT_ID,new Fields("words"));
		builder.setBolt(REPORT_BOLT_ID,reportbolt).globalGrouping(COUNT_BOLT_ID);
		
		/*Map conf=new HashMap();
		conf.put(Config.TOPOLOGY_WORKERS,4);
		conf.put(Config.TOPOLOGY_DEBUG,true);*/
		
		Config conf = new Config();
	    //conf.setDebug(true);
		LocalCluster cluster=new LocalCluster();
		cluster.submitTopology(TOPOLOGY_NAME,conf,builder.createTopology());
		
//		Thread.sleep(1000);
//		cluster.shutdown();
		
	}
}


  • 1
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值