组织架构
**
主程序
**
package com.wenglei.storm.demo;
import org.apache.storm.Config;
import org.apache.storm.LocalCluster;
import org.apache.storm.topology.TopologyBuilder;
public class TopologMain {
public static void main(String[] args) {
//1. 构建 拓扑关系
TopologyBuilder builder = new TopologyBuilder();
builder.setSpout("readFileSpout", new ReadFileSpout());
builder.setBolt("splitBolt", new SplitBolt()).shuffleGrouping("readFileSpout");
builder.setBolt("countBolt", new CountBolt()).shuffleGrouping("splitBolt");
//2. 提交任务 : 2种 1_本地提交 2. 集群提交
LocalCluster localCluster = new LocalCluster();
// 参数1: 任务的名称 参数2: 运行的配置 参数3 拓扑图(任务)
localCluster.submitTopology("wordCount", new Config(), builder.createTopology());
}
}
数据源
package com.wenglei.storm.demo;
import org.apache.storm.spout.SpoutOutputCollector;
import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.topology.base.BaseRichSpout;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Values;
import java.util.Map;
import java.util.Random;
//读取文件的spout程序
public class ReadFileSpout extends BaseRichSpout{
//在创建这个 ReadFileSpout对象的时候就会调用这个方法进行初始化
private SpoutOutputCollector collector;
// 随机数
private Random random;
private String[] users;
/**
*
* @param map 进行对storm配置操作 一般不使用
* @param topologyContext storm上下文对象,一般不使用
* @param collector 向下游输出内容对象
*/
@Override
public void open(Map map, TopologyContext topologyContext, SpoutOutputCollector collector) {
this.collector = collector;
users =new String[]{"1 zhaoliying 18","2 yangmi 19","3 tongliya 20","4 zhangxiulian 24"};
random = new Random();
}
//当任务提交给storm程序后,storm程序会不断的调用
//nextTuple方法 进行执行 一般在这个方法 循环的读取数据
@Override
public void nextTuple() {
//随机获取一个用户信息
int i = random.nextInt(users.length);
String user = users[i];
collector.emit(new Values(user));
}
//数据传输 tuple 看成一个map 本质上是一个list
@Override
public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer) {
outputFieldsDeclarer.declare(new Fields("line"));
}
}
数据处理的程序
1.数据分割
package com.wenglei.storm.demo;
import org.apache.storm.task.OutputCollector;
import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.topology.base.BaseRichBolt;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Tuple;
import java.util.Arrays;
import java.util.Map;
public class SplitBolt extends BaseRichBolt {
private OutputCollector outputCollector;
@Override
public void prepare(Map map, TopologyContext topologyContext, OutputCollector outputCollector) {
this.outputCollector=outputCollector;
}
@Override
public void execute(Tuple tuple) {
//从tuple中获取数据
String line = tuple.getStringByField("line");
//切割数据
String[] words = line.split(" ");
//获取用户姓名信息,打上 一个标签,向下游传输数据
outputCollector.emit(Arrays.asList(words[1],1));
}
@Override
public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer) {
outputFieldsDeclarer.declare(new Fields("word","count"));
}
}
2.数据输出
package com.wenglei.storm.demo;
import org.apache.storm.task.OutputCollector;
import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.topology.base.BaseRichBolt;
import org.apache.storm.tuple.Tuple;
import java.util.HashMap;
import java.util.Map;
public class CountBolt extends BaseRichBolt {
private OutputCollector outputCollector;
private Map<String,Integer> map = new HashMap<>();
@Override
public void prepare(Map map, TopologyContext topologyContext, OutputCollector outputCollector) {
this.outputCollector=outputCollector;
}
@Override
public void execute(Tuple tuple) {
//1 从tuple中获取数据
String word = tuple.getStringByField("word");
Integer count = tuple.getIntegerByField("count");
if (map.get(word)!=null){
Integer wordCount = map.get(word);
wordCount++;
map.put(word,wordCount);
}else {
map.put(word,count);
}
Integer integer = map.get(word);
System.out.println(word+" "+integer);
//System.out.println(map);
}
@Override
public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer) {
}
}