Storm 字符统计Demo

1、数据源读取,字符发射spout类

/**
* 字符发射spout类
*/
public class RandomSentenceSpout extends BaseRichSpout {

private static final long serialVersionUID = 1L;

SpoutOutputCollector _collector;
Random _rand;

public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) {
_collector = collector;
_rand = new Random();
}

public void nextTuple() {
String[] sentences = new String[] { sentence("a b c d "), sentence("b d"), sentence("a d") };
for (String sentence : sentences) {// 发射三行数据致bolt处理
_collector.emit(new Values(sentence));
}
Utils.sleep(1000 * 1000);
}

protected String sentence(String input) {
return input;
}

@Override
public void ack(Object id) {
}

@Override
public void fail(Object id) {
}

public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields("firstSpout"));
}

// Add unique identifier to each tuple, which is helpful for debugging
public static class TimeStamped extends RandomSentenceSpout {
private final String prefix;

public TimeStamped() {
this("");
}

public TimeStamped(String prefix) {
this.prefix = prefix;
}

protected String sentence(String input) {
return prefix + currentDate() + " " + input;
}

private String currentDate() {
return new SimpleDateFormat("yyyy.MM.dd_HH:mm:ss.SSSSSSSSS").format(new Date());
}
}
}


2、第一次对字符串加工处下,切割处理bolt类

/**
* 字符切割处理bolt类
*/
public class MysplitBolt implements IBasicBolt {

private static final long serialVersionUID = 1L;

String patton;

public MysplitBolt(String patton) {
this.patton = patton;
}

/**
* 接收处理每一行数据
*/
public void execute(Tuple input, BasicOutputCollector collector) {
try {
String sen = input.getStringByField("firstSpout");
if (sen != null) {
for (String word : sen.split(patton)) {// 发射多个字符数据,让下一级bolt处理
collector.emit(new Values(word));
}

}

} catch (FailedException e) {
e.printStackTrace();// TODO: handle exception
}

}

public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields("singleWord"));
}

public Map<String, Object> getComponentConfiguration() {
// TODO Auto-generated method stub
return null;
}

public void prepare(Map stormConf, TopologyContext context) {
// TODO Auto-generated method stub

}

public void cleanup() {
// TODO Auto-generated method stub

}

}


3、提交topology的main函数及字符统计处理类

public class WordCountTopology {

/**
* 提交topology的main函数及字符统计处理类
*/
public static class SplitSentence extends ShellBolt implements IRichBolt {

private static final long serialVersionUID = 1L;



/**
* 字符统计处理bolt类
*/
public static class WordCount extends BaseBasicBolt {
private static final long serialVersionUID = 1L;
// 声明当前线程全局变量,统计字母个数,线程一直处于运行状态
Map<String, Integer> counts = new HashMap<String, Integer>();

public void execute(Tuple tuple, BasicOutputCollector collector) {
String word = tuple.getString(0);
Integer count = counts.get(word);
if (count == null) {
count = 0;
}
count++;
counts.put(word, count);
System.err.println(Thread.currentThread().getName() + "---word:" + word + " count:" + count);
collector.emit(new Values(word, count));
}

public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields("word", "count"));
}
}

//提交topology的main函数
public static void main(String[] args) throws Exception {
TopologyBuilder builder = new TopologyBuilder();
//读取数据用1个线程,防止数据重复读取
builder.setSpout("spout", new RandomSentenceSpout(), 1);
//从spout源读取数据,设置2个线程处理字符分割
builder.setBolt("split", new MysplitBolt(" "), 2).shuffleGrouping("spout");
/**
* 上个bolt接收数据,设置3个线程处理数据统计。
* Fields Grouping:按Field分组,相同的tuple会分发给同一个线程(Executer或task)处理。
* 比如按singleWord来分组, 具有同样singleWord的tuple会被分到相同的Bolts, 而不同的word则会被分配到不同的Bolts。
*/
builder.setBolt("count", new WordCount(), 3).fieldsGrouping("split", new Fields("singleWord"));

Config conf = new Config();
conf.setDebug(true);

if (args != null && args.length > 0) {
conf.setNumWorkers(3);

StormSubmitter.submitTopologyWithProgressBar(args[0], conf, builder.createTopology());
} else {
conf.setMaxTaskParallelism(3);

LocalCluster cluster = new LocalCluster();
cluster.submitTopology("word-count", conf, builder.createTopology());
}
}

public void declareOutputFields(OutputFieldsDeclarer declarer) {
// TODO Auto-generated method stub
}

public Map<String, Object> getComponentConfiguration() {
// TODO Auto-generated method stub
return null;
}
}

}


4、处理结果
[quote]
Thread-22-count-executor[3 3]---word:c count:1
Thread-18-count-executor[2 2]---word:b count:1
Thread-32-count-executor[4 4]---word:a count:1
Thread-32-count-executor[4 4]---word:d count:1

Thread-18-count-executor[2 2]---word:b count:2
Thread-32-count-executor[4 4]---word:d count:2
Thread-32-count-executor[4 4]---word:a count:2

Thread-32-count-executor[4 4]---word:d count:3
[/quote]

5、相关总结
[quote]
1、每一个线程bolt获取处理数据与上一个bolt或spout输出的数据方式一致。
declarer.declare(new Fields("firstSpout"));

2、每一个线程bolt在topology运行中,一直处理运行状态。而声明的全局变量是针对每个线程的全局变量,每一个线程输出统计数据是当前线程的变量数据。

3、每个spout或bolt处理数据时,都可以设置对应的线程数。但spout读取数据时,会重复读取数据。

4、bolt与bolt数据传递,bolt数据输出格式与下一个bolt数据接收格式扭转,都是通过对应的”相同字符”扭转。
[/quote]

6、相关pom.xml文件
[quote]
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.test</groupId>
<artifactId>StormMavenProject</artifactId>
<packaging>jar</packaging>
<version>0.0.1-SNAPSHOT</version>
<name>StormMavenProject</name>
<url>http://maven.apache.org</url>
<dependencies>

<dependency>
<groupId>org.ow2.asm</groupId>
<artifactId>asm</artifactId>
<version>5.0.3</version>
</dependency>
<dependency>
<groupId>org.clojure</groupId>
<artifactId>clojure</artifactId>
<version>1.7.0</version>
</dependency>
<dependency>
<groupId>com.lmax</groupId>
<artifactId>disruptor</artifactId>
<version>3.3.2</version>
</dependency>
<dependency>
<groupId>com.esotericsoftware</groupId>
<artifactId>kryo</artifactId>
<version>3.0.3</version>
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-api</artifactId>
<version>2.8</version>
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-core</artifactId>
<version>2.8</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>log4j-over-slf4j</artifactId>
<version>1.6.6</version>
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-slf4j-impl</artifactId>
<version>2.8</version>
</dependency>
<dependency>
<groupId>com.esotericsoftware</groupId>
<artifactId>minlog</artifactId>
<version>1.3.0</version>
</dependency>
<dependency>
<groupId>org.objenesis</groupId>
<artifactId>objenesis</artifactId>
<version>2.1</version>
</dependency>
<dependency>
<groupId>com.esotericsoftware</groupId>
<artifactId>reflectasm</artifactId>
<version>1.10.1</version>
</dependency>

<dependency>
<groupId>javax.servlet</groupId>
<artifactId>servlet-api</artifactId>
<version>2.5</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
<version>1.7.21</version>
</dependency>
<dependency>
<groupId>org.apache.storm</groupId>
<artifactId>storm-core</artifactId>
<version>1.1.0</version>
</dependency>
<dependency>
<groupId>org.apache.storm</groupId>
<artifactId>storm-rename-hack</artifactId>
<version>1.1.0</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>3.8.1</version>
<scope>test</scope>
</dependency>

<dependency>
<groupId>ring-cors</groupId>
<artifactId>ring-cors</artifactId>
<version>0.1.5</version>
</dependency>

</dependencies>
<build>
<finalName>StormMavenProject</finalName>
</build>
</project>
[/quote]
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值