1.拓扑
package com.zpark.stu.storm;
import org.apache.storm.Config;
import org.apache.storm.LocalCluster;
import org.apache.storm.StormSubmitter;
import org.apache.storm.generated.AlreadyAliveException;
import org.apache.storm.generated.AuthorizationException;
import org.apache.storm.generated.InvalidTopologyException;
import org.apache.storm.topology.TopologyBuilder;
/**
* wordcount的驱动类,用来提交任务的。
*/
public class WordCountTopologie {
/**
* 需求:对数据源进行数据的切割与统计每个单词出现的个数
* 1.需要一个读取数据的Spout
* 2.将数据交由SplitBolt进行单词的切割
* 3.对切割后的数据进行字数的统计 WordCount
*/
public static void main(String[] args) throws InvalidTopologyException, AuthorizationException, AlreadyAliveException, InterruptedException {
//拿到一个TOPO
TopologyBuilder topologyBuilder = new TopologyBuilder();
//创建一个Spout
topologyBuilder.setSpout("readSpout",new ReadFileSpout(),1);
//创建一个Bolt进行单词的切割
topologyBuilder.setBolt("splitBolt",new SplitBolt(),1).shuffleGrouping("readSpout");
// //创建一个Bolt对切割好的单词进行字数统计
topologyBuilder.setBolt("wordCountBolt",new WordCountBolt(),1).shuffleGrouping("splitBolt");
// //将数据发送给redis
topologyBuilder.setBolt("redis",new RedisDemo(),1).shuffleGrouping("wordCountBolt");
Config config = new Config();
config.setNumWorkers(1);
if (args != null && args.length > 0){
System.out.println("非本地运行");
config.setNumWorkers(1);
StormSubmitter.submitTopologyWithProgressBar(args[0],config ,topologyBuilder.createTopology());
}else {
System.out.println("本地-运行");
config.setMaxTaskParallelism(1);
LocalCluster localCluster = new LocalCluster();
localCluster.submitTopology("wordCount",config,topologyBuilder.createTopology());
Thread.sleep(10*60*1000);
localCluster.shutdown();
}
}
}
2.spout
package com.zpark.stu.storm;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.clients.consumer.KafkaConsumer;
import org.apache.storm.spout.SpoutOutputCollector;
import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.topology.base.BaseRichSpout;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Values;
import java.io.*;
import java.util.Arrays;
import java.util.Collections;
import java.util.Map;
import java.util.Properties;
/**
* 读取外部的文件,将一行一行的数据发送给下游的bolt
* 类似于hadoop MapReduce中的inputformat
*/
public class ReadFileSpout extends BaseRichSpout{
private SpoutOutputCollector collector;
private KafkaConsumer<String, String> consumer;
private ConsumerRecords<String, String> records;
private BufferedReader reader;
private String value;
private static final Properties props;
static {
props = new Properties();
//消费者kafkka地址
props.put("bootstrap.servers", "hdp-1:9092");
//key反序列化
props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
//组
props.put("group.id", "yangk");
}
@Override
public void open(Map map, TopologyContext topologyContext, SpoutOutputCollector spoutOutputCollector) {
this.collector = spoutOutputCollector;
props.put("enable.auto.commit", true);
consumer = new KafkaConsumer<String, String>(props);
consumer.subscribe(Collections.singleton("first"));
//
}
@Override
public void nextTuple() {
records = consumer.poll(100);
for (ConsumerRecord<String, String> r :records) {
value = r.value();
System.out.println("数据为:" + value);
collector.emit(new Values(value));
System.out.println("数据发送成功!");
}
}
@Override
public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer) {
outputFieldsDeclarer.declare(new Fields("readSpout"));
}
}
3.WordCountBolt
package com.zpark.stu.storm;
import org.apache.storm.task.OutputCollector;
import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.topology.base.BaseRichBolt;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Tuple;
import redis.clients.jedis.Jedis;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;
/**
* 输入:单词及单词出现的次数
* 输出:打印在控制台
* 负责统计每个单词出现的次数
* 类似于hadoop MapReduce中的reduce函数
*/
public class WordCountBolt extends BaseRichBolt{
HashMap<String, Integer> wordCountMap = new HashMap<String, Integer>();
private OutputCollector collector;
@Override
public void prepare(Map map, TopologyContext topologyContext, OutputCollector outputCollector) {
//由于这是最后一个Bolt,不需要发送数据给下一个Bolt所以i不需要在·声明
this.collector = outputCollector;
}
@Override
public void execute(Tuple tuple) {
//读取SplitBolt发送过来的数据
String word = tuple.getStringByField("word");
String num = tuple.getStringByField("num");
//对单词进行计数
if (wordCountMap.containsKey(word)){
Integer integer = wordCountMap.get(word);
wordCountMap.put(word, integer + Integer.parseInt(num));
}else {
wordCountMap.put(word, Integer.parseInt(num));
}
collector.emit(Arrays.asList(wordCountMap.toString()));
System.out.println(wordCountMap);
}
@Override
public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer) {
outputFieldsDeclarer.declare(new Fields("wordCount"));
}
}
4.RedisDemo
package com.zpark.stu.storm;
import org.apache.storm.task.OutputCollector;
import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.IBasicBolt;
import org.apache.storm.topology.IRichBolt;
import org.apache.storm.topology.IWindowedBolt;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.topology.base.BaseRichBolt;
import org.apache.storm.tuple.Tuple;
import redis.clients.jedis.Jedis;
import java.util.Map;
public class RedisDemo extends BaseRichBolt {
@Override
public void prepare(Map map, TopologyContext topologyContext, OutputCollector outputCollector) {
}
@Override
public void execute(Tuple tuple) {
String wordCount = tuple.getStringByField("wordCount");
Jedis jedis = new Jedis("hdp-1");
jedis.auth("403411");
jedis.set("wordcount",wordCount);
System.out.println("上传数据到Redis完成!");
System.out.println("数据为 : " + wordCount);
}
@Override
public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer) {
}
}
6.pom文件
<dependencies>
<dependency>
<groupId>org.apache.storm</groupId>
<artifactId>storm-core</artifactId>
<version>1.2.2</version>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.7.0</version>
<configuration>
<source>1.8</source>
<target>1.8</target>
</configuration>
</plugin>
</plugins>
</build>