Eclipse-Java实现Storm的WordCount词频统计

 

目录

Spout层

Bolt层

Topology层

结果:


文章中的所有内容不明白的可以查看前后文或者call博主;

相关文章:

Storm集群安装部署1——准备版

Storm集群安装部署2——Centos6.5的默认python2.6.6版本升级到python2.7.15

Storm集群安装部署3——在节点服务器上安装 Storm并启动Storm

Storm集群安装部署4——在Storm节点服务器上安装 nimBus和supervisor

Eclipse配置、创建Maven项目

Storm集群上运行第一个“Hello World文件”

Eclipse实现Storm的WordCount词频统计

 

创建Maven项目如下:

 

我会尽量吧项目的注释写清楚一些,不懂的就问博主[/微笑]

 

Spout层

WordSpout:

package com.swun.storm.topology;

import com.swun.storm.Utils.ThreadUtils;
import com.swun.storm.bolt.WordCountBolt;
import com.swun.storm.bolt.WordReportBolt;
import com.swun.storm.bolt.WordSplitBolt;
import com.swun.storm.spout.WordSpout;

import backtype.storm.Config;
import backtype.storm.LocalCluster;
import backtype.storm.topology.TopologyBuilder;
import backtype.storm.tuple.Fields;

public class WordTopology {
	
	//定义常量
	private static final String WORD_SPOUT_ID = "word-spout";
	private static final String SPLIT_BOLT_ID = "split-bolt";
	private static final String COUNT_BOLT_ID = "count-bolt";
	private static final String REPORT_BOLT_ID = "report-bolt";
	private static final String WORD_TOPOLOGY_ID = "wordcount-topology";
	
	
	public static void main(String[] args) {
		//创建spout、bolt对象;
		WordSpout spout = new WordSpout();
		WordSplitBolt splitBolt = new WordSplitBolt();
		WordCountBolt countBolt = new WordCountBolt();
		WordReportBolt reportBolt = new WordReportBolt();
		//创建拓扑对象
		
		TopologyBuilder builder = new TopologyBuilder();
		//设置spout
		builder.setSpout(WORD_SPOUT_ID, spout);
		//WordSpot -- > SplitBolt。使用随机分组
		builder.setBolt(SPLIT_BOLT_ID, splitBolt,5).shuffleGrouping(WORD_SPOUT_ID);
		//WordSplitBolt --> WordCountBolt。使用字段分组
		builder.setBolt(COUNT_BOLT_ID, countBolt,5).fieldsGrouping(SPLIT_BOLT_ID, new Fields("word"));
		//WordCountBolt --> WordReportBollt
		builder.setBolt(REPORT_BOLT_ID, reportBolt).globalGrouping(COUNT_BOLT_ID);
		
		//创建配置
		Config config = new Config();
//		config.setNumWorkers(2);
		config.setDebug(true);
		//本地模式
		
		LocalCluster cluster = new LocalCluster();
		//提交拓扑
		cluster.submitTopology(WORD_TOPOLOGY_ID, config, builder.createTopology());
		ThreadUtils.waitForSeconds(10);
		cluster.killTopology(WORD_TOPOLOGY_ID);
		cluster.shutdown();
		
	}

}

Bolt层

WordSpiltBolt:

package com.swun.storm.bolt;

import java.util.Map;

import com.sun.org.apache.bcel.internal.classfile.Field;

import backtype.storm.task.OutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.IRichBolt;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Tuple;
import backtype.storm.tuple.Values;

public class WordSplitBolt implements IRichBolt{
	
	
	
	/**
	 * 
	 */
	private static final long serialVersionUID = 7509915842346596564L;

	private OutputCollector collector;
	

	/**
	 * 初始化collector
	 */
	@Override
	public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
		this.collector  =  collector;
	}
	
	
	/**
	 * 分割句子
	 */
	@Override
	public void execute(Tuple input) {
		
		//接收字段名为sentence的数据
		String sentence = input.getStringByField("sentencs");
		
		//分割数据,放到字符串数组中
		String[] words = sentence.split(" ");
		
		//遍历一遍数组,将分割后的数据上传
		for(String word : words) collector.emit(new Values(word));
		
	}
	
	@Override
	public void declareOutputFields(OutputFieldsDeclarer declarer) {
		//声明发送数据的字段名为word
		declarer.declare(new Fields("word"));
		
	}
	
	@Override
	public void cleanup() {
		
	}


	@Override
	public Map<String, Object> getComponentConfiguration() {
		return null;
	}

}

 

WordCountBolt:

package com.swun.storm.bolt;

import java.util.HashMap;
import java.util.Map;

import backtype.storm.task.OutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.IRichBolt;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Tuple;
import backtype.storm.tuple.Values;

public class WordCountBolt implements IRichBolt{

	/**
	 * 
	 */
	private static final long serialVersionUID = 2503069329578140148L;
	private OutputCollector collector;
	//用Map来统计分割后的词语
	private Map<String, Long> counts;

	
	/**
	 * 初始化
	 */
	@Override
	public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
		this.collector = collector;
		this.counts = new HashMap<String, Long>();
	}

	/**
	 * 统计数据的频率
	 */
	@Override
	public void execute(Tuple input) {
		//接收字段名为word的数据
		String wordString = input.getStringByField("word");
		
		//去map中查询数据的频率
		Long numLong = counts.get(wordString);
		
		//如果数据从来没有出现过,就初始化数据的次数为0
		if(numLong == null) numLong = 0L;
		
		//词语出现的次数+1
		numLong++;
		
		//把结果重新放入map中,方便别的bolt进程异步更改数据
		counts.put(wordString, numLong);
		
		//将更新的数据放入容器中
		collector.emit(new Values(wordString,numLong));
	}


	@Override
	public void declareOutputFields(OutputFieldsDeclarer declarer) {
		//声明发送数据的字段名为word,和count。这里是提交了两个字段
		declarer.declare(new Fields("word","count"));
	}
	
	@Override
	public void cleanup() {
		
	}


	@Override
	public Map<String, Object> getComponentConfiguration() {
		return null;
	}

}

WordReportBolt:

package com.swun.storm.bolt;

import java.util.HashMap;
import java.util.Map;

import backtype.storm.task.OutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.IRichBolt;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.tuple.Tuple;

public class WordReportBolt implements IRichBolt{

	/**
	 * 
	 */
	private static final long serialVersionUID = -778078375282102637L;

	private OutputCollector collector;
	
	
	//保存最终的结果,缓存效果
	private Map<String, Long> countsNum;

	/**
	 * 初始化
	 */
	@Override
	public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
		this.collector = collector;
		this.countsNum = new HashMap<String, Long>();
	}
	
	@Override
	public void execute(Tuple input) {
		//接收字段名为word和count的数据。
		String word = input.getStringByField("word");
		Long count = input.getLongByField("count");
		
		//将数据统一放进countNum这个map中
		countsNum.put(word,count);
		
		//这里为什么要连个map来统计词频呢?
		//CountBolt中的counts是用来试试统计每次接收词语后的数据,每个bolt有多个进程,counts是共享数据,是实时变化的
		//countNum是用来接汇总的。汇总不同词语的总数
		
	}



	@Override
	public void declareOutputFields(OutputFieldsDeclarer declarer) {
		
	}
	

	/**
	 * 在bolt停止的时候执行该方法
	 */
	@Override
	public void cleanup() {
		
		//打印结果
		System.out.println("-----------------FINAL COUNTS-----------------");
		for(String key : countsNum.keySet()){
			System.out.println(key + ":" + countsNum.get(key));
		}
		System.out.println("----------------------------------------------");
	}



	@Override
	public Map<String, Object> getComponentConfiguration() {
		return null;
	}

}

 

Topology层

WordTopology:

package com.swun.storm.topology;

import com.swun.storm.Utils.ThreadUtils;
import com.swun.storm.bolt.WordCountBolt;
import com.swun.storm.bolt.WordReportBolt;
import com.swun.storm.bolt.WordSplitBolt;
import com.swun.storm.spout.WordSpout;

import backtype.storm.Config;
import backtype.storm.LocalCluster;
import backtype.storm.topology.TopologyBuilder;
import backtype.storm.tuple.Fields;

public class WordTopology {
	
	//定义常量
	private static final String WORD_SPOUT_ID = "word-spout";
	private static final String SPLIT_BOLT_ID = "split-bolt";
	private static final String COUNT_BOLT_ID = "count-bolt";
	private static final String REPORT_BOLT_ID = "report-bolt";
	private static final String WORD_TOPOLOGY_ID = "wordcount-topology";
	
	
	public static void main(String[] args) {
		//创建spout、bolt对象;
		WordSpout spout = new WordSpout();
		WordSplitBolt splitBolt = new WordSplitBolt();
		WordCountBolt countBolt = new WordCountBolt();
		WordReportBolt reportBolt = new WordReportBolt();
		//创建拓扑对象
		
		TopologyBuilder builder = new TopologyBuilder();
		//设置spout
		builder.setSpout(WORD_SPOUT_ID, spout);
		//WordSpot -- > SplitBolt。使用随机分组
		builder.setBolt(SPLIT_BOLT_ID, splitBolt,5).shuffleGrouping(WORD_SPOUT_ID);
		//WordSplitBolt --> WordCountBolt。使用字段分组
		builder.setBolt(COUNT_BOLT_ID, countBolt,5).fieldsGrouping(SPLIT_BOLT_ID, new Fields("word"));
		//WordCountBolt --> WordReportBollt
		builder.setBolt(REPORT_BOLT_ID, reportBolt).globalGrouping(COUNT_BOLT_ID);
		
		//创建配置
		Config config = new Config();
//		config.setNumWorkers(2);
		config.setDebug(true);
		//本地模式
		
		LocalCluster cluster = new LocalCluster();
		//提交拓扑
		cluster.submitTopology(WORD_TOPOLOGY_ID, config, builder.createTopology());
		ThreadUtils.waitForSeconds(10);
		cluster.killTopology(WORD_TOPOLOGY_ID);
		cluster.shutdown();
		
	}

}

结果:

 

 

今天的作业终于做完了T~T

  • 0
    点赞
  • 4
    收藏
    觉得还不错? 一键收藏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值