业务
统计谁给谁打了多少次的电话
创建一个Maven项目
创建如下结构的Maven项目
POM
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>org.example</groupId>
<artifactId>stormcalllog</artifactId>
<version>1.0-SNAPSHOT</version>
<properties>
<maven.compiler.source>8</maven.compiler.source>
<maven.compiler.target>8</maven.compiler.target>
</properties>
<dependencies>
<dependency>
<groupId>org.apache.storm</groupId>
<artifactId>storm-core</artifactId>
<version>2.4.0</version>
</dependency>
</dependencies>
</project>
编写数据源Spout
生成源源不断的数据的Spout,并将数据封装成tuple发送给下一个节点
package com.zj.storm;
import org.apache.storm.spout.SpoutOutputCollector;
import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.IRichSpout;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Values;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Random;
/**
* 产生数据源的
*/
public class CallLogSpout implements IRichSpout {
//Spout输出收集器
private SpoutOutputCollector collector;
private boolean completed = false;
private TopologyContext context;
//上下文
private Random random = new Random();
private Integer idx = 0;
@Override
public void open(Map map, TopologyContext topologyContext, SpoutOutputCollector spoutOutputCollector) {
this.collector = spoutOutputCollector;
this.context = topologyContext;
}
@Override
public void close() {
}
@Override
public void activate() {
}
@Override
public void deactivate() {
}
/**
* 下一个元组
* 一直运行
*/
@Override
public void nextTuple() {
if (this.idx <= 1000) {
List<String> mobileNumbers = new ArrayList<>();
mobileNumbers.add("1234123401");
mobileNumbers.add("1234123402");
mobileNumbers.add("1234123403");
mobileNumbers.add("1234123404");
Integer localIdx = 0;
while(localIdx++ < 100 && this.idx++ < 1000) {
String caller = mobileNumbers.get(random.nextInt(4));
String callee = mobileNumbers.get(random.nextInt(4));
while(caller == callee) {
callee = mobileNumbers.get(random.nextInt(4));
}
Integer duration = random.nextInt(60);
//输出元组
this.collector.emit(new Values(caller, callee, duration));
}
}
}
@Override
public void ack(Object o) {
}
@Override
public void fail(Object o) {
}
/**
* 定义输出的字段名
*
* @param outputFieldsDeclarer
*/
@Override
public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer) {
outputFieldsDeclarer.declare(new Fields("from", "to", "duration"));
}
@Override
public Map<String, Object> getComponentConfiguration() {
return null;
}
}
编写日志创建Bolt
该Bolt负责处理数据,如拼接,拆分,分组等业务,将处理好的数据发送给下一个Bolt
package com.zj.storm;
import org.apache.storm.task.OutputCollector;
import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.IRichBolt;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Tuple;
import org.apache.storm.tuple.Values;
import java.util.Map;
/**
* 创建CallLog日志的Bolt
*/
public class CallLogCreatorBolt implements IRichBolt {
private OutputCollector collector;
@Override
public void prepare(Map map, TopologyContext topologyContext, OutputCollector outputCollector) {
this.collector = outputCollector;
}
@Override
public void execute(Tuple tuple) {
//处理通话记录
String from = tuple.getString(0);
String to = tuple.getString(1);
Integer duration = tuple.getInteger(2);
//产生新的tuple
collector.emit(new Values(from + " - " + to, duration));
}
@Override
public void cleanup() {
}
/**
* 生成字段名称
*
* @param outputFieldsDeclarer
*/
@Override
public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer) {
outputFieldsDeclarer.declare(new Fields("call", "duration"));
}
@Override
public Map<String, Object> getComponentConfiguration() {
return null;
}
}
编写统计Bolt
该Bolt负责统计上游Bolt处理好的数据,并做清理
package com.zj.storm;
import org.apache.storm.task.OutputCollector;
import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.IRichBolt;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Tuple;
import java.util.HashMap;
import java.util.Map;
/**
* 通话记录计数器Bolt
*/
public class CallLogCounterBolt implements IRichBolt {
Map<String , Integer> counterMap;
private OutputCollector collector;
@Override
public void prepare(Map map, TopologyContext topologyContext, OutputCollector outputCollector) {
this.counterMap = new HashMap<>();
this.collector = outputCollector;
}
@Override
public void execute(Tuple tuple) {
String call = tuple.getString(0);
Integer duration = tuple.getInteger(1);
if (!counterMap.containsKey(call)) {
counterMap.put(call, 1);
} else {
Integer c = counterMap.get(call) + 1;
counterMap.put(call, c);
}
collector.ack(tuple);
}
@Override
public void cleanup() {
for (Map.Entry<String, Integer> entry :
counterMap.entrySet()) {
System.err.println(entry.getKey() + " : " + entry.getValue());
}
}
@Override
public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer) {
outputFieldsDeclarer.declare(new Fields("call"));
}
@Override
public Map<String, Object> getComponentConfiguration() {
return null;
}
}
编写主类
设置并行度,Work,Task等配置参数,关联好Spout和Bolt组成DAG - Storm的Topology
package com.zj.storm;
import org.apache.storm.Config;
import org.apache.storm.LocalCluster;
import org.apache.storm.topology.TopologyBuilder;
import org.apache.storm.tuple.Fields;
/**
* App
*/
public class App {
public static void main(String[] args) throws Exception {
TopologyBuilder topologyBuilder = new TopologyBuilder();
//设置Spout
topologyBuilder.setSpout("reader-spout", new CallLogSpout(), 2).setNumTasks(2);
topologyBuilder.setBolt("creator-bolt", new CallLogCreatorBolt(), 3).setNumTasks(3)
.shuffleGrouping("reader-spout");
topologyBuilder.setBolt("counter-bolt", new CallLogCounterBolt(), 4).setNumTasks(4)
.fieldsGrouping("creator-bolt", new Fields("call"));
Config config = new Config();
config.setDebug(true);
config.setNumWorkers(2);
LocalCluster cluster = new LocalCluster();
cluster.submitTopology("local-storm", config, topologyBuilder.createTopology());
Thread.sleep(10000);
//停止集群
cluster.shutdown();
}
}
效果
统计出一段时间内,谁给谁打了多少次的电话