java工程实现简单Storm--统计字符串频率

WordcountSpout--数据源

SplitBolt--对字符串进行分割

WordCountBolt--统计字符串个数

PrintBolt--整合发送

WordCountTopology--拓扑集合处理

---------------------------------------------------------------------------------------------------------------------------------------------------------------------

WordcountSpout.java

package Word;

import java.util.Map;

import backtype.storm.spout.SpoutOutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.topology.base.BaseRichSpout;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Values;

public class WordCountSpout extends BaseRichSpout{
	private SpoutOutputCollector collector;
	//添加默认数据
	private String[] lines=new String[]{"hello world","hello hadoop","hello storm","hello flume"};
	private int i=0;
	@Override
	public void nextTuple() {
		String line=lines[i];
		i++;
		if(i==lines.length){//循环打印
			i=0;
		}
		collector.emit(new Values(line));
		try {
			Thread.sleep(3000);
			System.out.println("------分割线---------");
		} catch (InterruptedException e) {
			e.printStackTrace();
		}
	}
	//初始化方法
	@Override
	public void open(Map arg0, TopologyContext arg1, SpoutOutputCollector collector) {
		this.collector=collector;
	}
	@Override
	public void declareOutputFields(OutputFieldsDeclarer declarer) {
		//声明tuple的key字段--tuple和key一一对应,且数量保持一致
		declarer.declare(new Fields("line"));
	}

}

SplitBolt.java

package Word;

import java.util.Map;

import backtype.storm.task.OutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.topology.base.BaseRichBolt;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Tuple;
import backtype.storm.tuple.Values;

public class SplitBolt extends BaseRichBolt {
	private OutputCollector collector;//创建bolt组建的tuple发射器,用于向下游发射tuple
	@Override
	public void execute(Tuple input) {
		String line=input.getStringByField("line");
		String[] words=line.split(" ");//以空格来将字符串进行分割
		for(String word:words){
			//把数据封装到Tuple的value对象中后
			//emit对数据进行发送-通过发射器将tuple发送给下游
			collector.emit(new Values(word));
		}
	}
	//初始化
	@Override
	public void prepare(Map arg0, TopologyContext arg1, OutputCollector collector) {
		this.collector=collector;
	}

	@Override
	public void declareOutputFields(OutputFieldsDeclarer declarer) {
		//声明tuple的key字段--tuple和key一一对应,且数量保持一致
		declarer.declare(new Fields("word"));
	}

}

WordCountBolt.java

package Word;

import java.util.HashMap;
import java.util.Map;

import backtype.storm.task.OutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.topology.base.BaseRichBolt;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Tuple;
import backtype.storm.tuple.Values;

public class WordCountBolt extends BaseRichBolt {
	private OutputCollector collector;
	private Map<String,Integer> wordMap;//统计单词频次
	@Override
	public void execute(Tuple input) {
		String word=input.getStringByField("word");
		if(wordMap.containsKey(word)){
			wordMap.put(word, wordMap.get(word)+1);//该单词的频率+1
		}else{
			wordMap.put(word, 1);//没有该单词的记录,说明为第一次出现该单词,设置次数为1
		}
		//把数据封装到Tuple的value对象中,value对象的参数是可变参数(个数不限,类型不限--object)
		//emit对数据进行发送-通过发射器将tuple发送给下游
		collector.emit(new Values(word,wordMap.get(word)));
		
	}
	//此方法是bolt组件的初始化方法
	@Override
	public void prepare(Map arg0, TopologyContext arg1, OutputCollector collector) {
		this.collector=collector;
		wordMap=new HashMap<>();
	}
	
	@Override
	public void declareOutputFields(OutputFieldsDeclarer declarer) {
		//声明tuple的key字段--tuple和key一一对应,且数量保持一致
		declarer.declare(new Fields("word","count"));
		
	}

}

PrintBolt.java

package Word;

import java.util.HashMap;
import java.util.Map;

import backtype.storm.task.OutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.topology.base.BaseRichBolt;
import backtype.storm.tuple.Tuple;

public class PrintBolt extends BaseRichBolt {
	private OutputCollector collector;
	@Override
	public void execute(Tuple input) {
		//元祖是一种key-v结构,通过key来去value值
		String word=input.getStringByField("word");
		int count=input.getIntegerByField("count");
		System.out.println(word+"..."+count);
	}

	@Override
	public void prepare(Map arg0, TopologyContext arg1, OutputCollector collector) {
		//this.collector=collector;
	}

	@Override
	public void declareOutputFields(OutputFieldsDeclarer arg0) {
		// TODO Auto-generated method stub
		
	}

}

WordCountTopology.java

package Word;
import backtype.storm.Config;
import backtype.storm.LocalCluster;
import backtype.storm.StormSubmitter;
import backtype.storm.generated.AlreadyAliveException;
import backtype.storm.generated.InvalidTopologyException;
import backtype.storm.generated.StormTopology;
import backtype.storm.topology.TopologyBuilder;
import backtype.storm.tuple.Fields;
import clojure.reflect.Field;
import numberberrouter.Number;

public class WordCountTopolopy {

	public static void main(String[] args) throws AlreadyAliveException, InvalidTopologyException {
		Config conf=new Config();
		//conf.setNumWorkers(2);//设置进程数量--运行拓扑,默认为1
		WordCountSpout spout=new WordCountSpout();
		SplitBolt splitBolt=new SplitBolt();
		WordCountBolt wordCountBolt=new WordCountBolt();
		PrintBolt printBolt=new PrintBolt();
		
		//创建拓扑构建者,用于绑定各组件的上下游关系
		TopologyBuilder builder=new TopologyBuilder();
		//绑定数据源,组件标识id  组件对象--并发线程设置是2--所以在输出打印时为一次打印俩条数据
		builder.setSpout("wordcount_spout", spout,2);
		//绑定Bolt并制定上游组件是数据源
		//ShuffleGrouping  ---随机分组
		builder.setBolt("wordcount_bolt", wordCountBolt,2).fieldsGrouping("split_bolt",new Fields("word"));
		builder.setBolt("split_bolt", splitBolt,2).setNumTasks(4).shuffleGrouping("wordcount_spout");
		builder.setBolt("print_bolt", printBolt).shuffleGrouping("wordcount_bolt");
		//生成拓扑对象--即一个job任务
		StormTopology topology=builder.createTopology();
		//创建本地测试对象
		LocalCluster cluster=new LocalCluster();
		
		//StormSubmitter stormSubmitter=new StormSubmitter();
		//提交拓扑运行
		cluster.submitTopology("wordcount_topolopy", conf, topology);
		
	}
}

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值