一、开发WordCount程序:实时计算
二、Storm任务提交的过程
三、Storm内部通信的机制:有Work中的Executor来执行
-----> ----> -----> ----
--> -------->
--> ----
package demo;
import java.util.Map;
import java.util.Random;
import org.apache.storm.spout.SpoutOutputCollector;
import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.topology.base.BaseRichSpout;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Values;
import org.apache.storm.utils.Utils;
public class WordCountSpout extends BaseRichSpout{
private SpoutOutputCollector collector;
private String[] data = {"I love Beijing","I love China","Beijing is the capital of China"};
@Override
public void nextTuple() {
Utils.sleep(3000);
int random = (new Random()).nextInt(3);
System.out.println("采集的数据是:" + data[random]);
this.collector.emit(new Values(data[random]));
}
/**
* SpoutOutputCollector collector: spout组件的输出流
*/
@Override
public void open(Map arg0, TopologyContext arg1, SpoutOutputCollector collector) {
this.collector = collector;
}
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields("sentence"));
}
}
package demo;
import java.util.Map;
import org.apache.storm.task.OutputCollector;
import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.topology.base.BaseRichBolt;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Tuple;
import org.apache.storm.tuple.Values;
public class WordCountSplitBolt extends BaseRichBolt{
private OutputCollector collector;
@Override
public void execute(Tuple tuple) {
String line = tuple.getStringByField("sentence");
String[] words = line.split(" ");
for(String w:words){
this.collector.emit(new Values(w,1));
}
}
@Override
public void prepare(Map arg0, TopologyContext arg1, OutputCollector collector) {
this.collector = collector;
}
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields("word","count"));
}
}
package demo;
import java.util.HashMap;
import java.util.Map;
import org.apache.storm.task.OutputCollector;
import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.topology.base.BaseRichBolt;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Tuple;
import org.apache.storm.tuple.Values;
public class WordCountTotalBolt extends BaseRichBolt{
private OutputCollector collector;
private Map<String, Integer> result = new HashMap<>();
@Override
public void execute(Tuple tuple) {
String word = tuple.getStringByField("word");
int count = tuple.getIntegerByField("count");
if(result.containsKey(word)){
int total = result.get(word);
result.put(word, total+count);
}else{
result.put(word, count);
}
System.out.println("输出的结果是:" + result);
this.collector.emit(new Values(word,result.get(word)));
}
@Override
public void prepare(Map arg0, TopologyContext arg1, OutputCollector collector) {
this.collector = collector;
}
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields("word","total"));
}
}
package demo;
import org.apache.storm.Config;
import org.apache.storm.LocalCluster;
import org.apache.storm.StormSubmitter;
import org.apache.storm.generated.AlreadyAliveException;
import org.apache.storm.generated.AuthorizationException;
import org.apache.storm.generated.InvalidTopologyException;
import org.apache.storm.generated.StormTopology;
import org.apache.storm.hdfs.bolt.HdfsBolt;
import org.apache.storm.hdfs.bolt.format.DefaultFileNameFormat;
import org.apache.storm.hdfs.bolt.format.DelimitedRecordFormat;
import org.apache.storm.hdfs.bolt.rotation.FileSizeRotationPolicy;
import org.apache.storm.hdfs.bolt.rotation.FileSizeRotationPolicy.Units;
import org.apache.storm.hdfs.bolt.sync.CountSyncPolicy;
import org.apache.storm.redis.bolt.RedisStoreBolt;
import org.apache.storm.redis.common.config.JedisPoolConfig;
import org.apache.storm.redis.common.mapper.RedisDataTypeDescription;
import org.apache.storm.redis.common.mapper.RedisStoreMapper;
import org.apache.storm.topology.IRichBolt;
import org.apache.storm.topology.TopologyBuilder;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.ITuple;
public class WordCountTopology {
public static void main(String[] args) throws Exception {
TopologyBuilder builder = new TopologyBuilder();
builder.setSpout("mywordcount_spout", new WordCountSpout());
builder.setBolt("mywordcount_split", new WordCountSplitBolt()).shuffleGrouping("mywordcount_spout");
builder.setBolt("mywordcount_total", new WordCountTotalBolt()).fieldsGrouping("mywordcount_split", new Fields("word"));
builder.setBolt("mywordcount_hdfs", new WordCountHBaseBolt()).shuffleGrouping("mywordcount_total");
StormTopology topology = builder.createTopology();
Config conf = new Config();
LocalCluster cluster = new LocalCluster();
cluster.submitTopology("mywordcount", conf, topology);
}
private static IRichBolt createHDFSBolt() {
HdfsBolt bolt = new HdfsBolt();
bolt.withFsUrl("hdfs://192.168.157.11:9000");
bolt.withFileNameFormat(new DefaultFileNameFormat().withPath("/stormresult"));
bolt.withRecordFormat(new DelimitedRecordFormat().withFieldDelimiter("|"));
bolt.withRotationPolicy(new FileSizeRotationPolicy(5.0f,Units.MB));
bolt.withSyncPolicy(new CountSyncPolicy(1024));
return bolt;
}
private static IRichBolt createRedisBolt() {
JedisPoolConfig.Builder builder = new JedisPoolConfig.Builder();
builder.setHost("192.168.157.11");
builder.setPort(6379);
JedisPoolConfig poolConfig = builder.build();
return new RedisStoreBolt(poolConfig, new RedisStoreMapper() {
@Override
public RedisDataTypeDescription getDataTypeDescription() {
return new RedisDataTypeDescription(RedisDataTypeDescription.RedisDataType.HASH,"wordcount");
}
@Override
public String getValueFromTuple(ITuple tuple) {
return String.valueOf(tuple.getIntegerByField("total"));
}
@Override
public String getKeyFromTuple(ITuple tuple) {
return tuple.getStringByField("word");
}
});
}
}
package demo
import java.util.Map
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.hbase.client.HTable
import org.apache.hadoop.hbase.client.Put
import org.apache.hadoop.hbase.util.Bytes
import org.apache.storm.task.OutputCollector
import org.apache.storm.task.TopologyContext
import org.apache.storm.topology.OutputFieldsDeclarer
import org.apache.storm.topology.base.BaseRichBolt
import org.apache.storm.tuple.Tuple
public class WordCountHBaseBolt extends BaseRichBolt {
//定义一个HBase的客户端
private HTable table
@Override
public void execute(Tuple tuple) {
//得到上一个组件处理的数据
String word = tuple.getStringByField("word")
int total = tuple.getIntegerByField("total")
//创建一个Put对象
Put put = new Put(Bytes.toBytes(word))
put.add(Bytes.toBytes("info"), Bytes.toBytes("word"), Bytes.toBytes(word))
put.add(Bytes.toBytes("info"), Bytes.toBytes("total"), Bytes.toBytes(String.valueOf(total)))
try{
table.put(put)
}catch(Exception ex){
ex.printStackTrace()
}
}
@Override
public void prepare(Map arg0, TopologyContext arg1, OutputCollector arg2) {
// 初始化:指定HBase的相关信息
//指定ZK的地址
Configuration conf = new Configuration()
conf.set("hbase.zookeeper.quorum", "192.168.157.11")
try{
table = new HTable(conf, "result")
}catch(Exception ex){
ex.printStackTrace()
}
}
@Override
public void declareOutputFields(OutputFieldsDeclarer arg0) {
// TODO Auto-generated method stub
}
}