pom.xml依赖
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
<version>21.0</version>
</dependency>
<dependency>
<groupId>org.apache.kafka</groupId>
<artifactId>kafka_2.12</artifactId>
<version>2.3.0</version>
</dependency>
<dependency>
<groupId>org.apache.storm</groupId>
<artifactId>storm-core</artifactId>
<version>1.1.0</version>
<!-- 集群模式需要释放如下注释 -->
<!--<scope>provided</scope>-->
</dependency>
<dependency>
<groupId>org.apache.storm</groupId>
<artifactId>storm-kafka</artifactId>
<version>1.2.3</version>
<!--<scope>provided</scope>-->
</dependency>
<dependency>
<groupId>org.apache.storm</groupId>
<artifactId>storm-kafka-client</artifactId>
<version>1.2.3</version>
<!--<scope>provided</scope>-->
</dependency>
KafKaTopic
package com.zbj.storm.kafka;
import com.google.common.collect.Maps;
import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.common.serialization.StringDeserializer;
import org.apache.storm.Config;
import org.apache.storm.LocalCluster;
import org.apache.storm.kafka.spout.ByTopicRecordTranslator;
import org.apache.storm.kafka.spout.Func;
import org.apache.storm.kafka.spout.KafkaSpout;
import org.apache.storm.kafka.spout.KafkaSpoutConfig;
import org.apache.storm.topology.TopologyBuilder;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Values;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.concurrent.TimeUnit;
public class KafKaTopic {
public static final String TOPIC = "kafkaStorm1";
public static void main(String[] args) throws Exception {
String spoutId = "kafkaSpout";
KafkaSpoutConfig.Builder builder = new KafkaSpoutConfig.Builder(
"172.31.15.175:9092", Arrays.asList(TOPIC));
builder.setFirstPollOffsetStrategy(KafkaSpoutConfig.FirstPollOffsetStrategy.LATEST);
ByTopicRecordTranslator<String, String> recordTranslator = new ByTopicRecordTranslator<>(new Func<ConsumerRecord<String, String>, List<Object>>() {
@Override
public List<Object> apply(ConsumerRecord<String, String> record) {
return new Values(record.value(), record.topic());
}
}, new Fields("values", TOPIC));
builder.setRecordTranslator(recordTranslator);
Map<String, Object> consumerMap = Maps.newHashMap();
consumerMap.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName());
consumerMap.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName());
consumerMap.put(ConsumerConfig.GROUP_ID_CONFIG, "zbj-kafka");
builder.setProp(consumerMap);
KafkaSpoutConfig spoutConfig = new KafkaSpoutConfig(builder);
TopologyBuilder topologyBuilder = new TopologyBuilder();
KafkaSpout kafkaSpout = new KafkaSpout<>(spoutConfig);
topologyBuilder.setSpout(spoutId, kafkaSpout);
topologyBuilder.setBolt("word-split", new WordSpliter()).shuffleGrouping(spoutId);
topologyBuilder.setBolt("writer", new WriterBolt(), 4).fieldsGrouping("word-split", new Fields("word"));
Config config = new Config();
config.setNumWorkers(4);
config.setNumAckers(0);
config.setDebug(false);
LocalCluster cluster = new LocalCluster();
cluster.submitTopology("wordCount", config, topologyBuilder.createTopology());
TimeUnit.MINUTES.sleep(10);
cluster.killTopology("wordCount");
}
}
单词分割
package com.zbj.storm.kafka;
import org.apache.commons.lang.StringUtils;
import org.apache.storm.topology.BasicOutputCollector;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.topology.base.BaseBasicBolt;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Tuple;
import org.apache.storm.tuple.Values;
public class WordSpliter extends BaseBasicBolt {
@Override
public void execute(Tuple input, BasicOutputCollector collector) {
String line = input.getString(0);
String[] words = line.split(" ");
for (String word : words) {
word = word.trim();
if (StringUtils.isNotBlank(word)) {
word.toLowerCase();
collector.emit(new Values(word));
}
}
}
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields("word"));
}
}
写入到文件
package com.zbj.storm.kafka;
import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.BasicOutputCollector;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.topology.base.BaseBasicBolt;
import org.apache.storm.tuple.Tuple;
import java.io.FileWriter;
import java.util.Map;
import java.util.UUID;
public class WriterBolt extends BaseBasicBolt {
private FileWriter writer;
@Override
public void prepare(Map stormConf, TopologyContext context) {
try {
writer = new FileWriter("wordCount-" + UUID.randomUUID().toString().replaceAll("-", "") + ".txt");
} catch (Exception e) {
e.printStackTrace();
}
}
@Override
public void execute(Tuple input, BasicOutputCollector collector) {
String word = input.getString(0);
try {
writer.write(word);
writer.write("\n");
writer.flush();
} catch (Exception e) {
e.printStackTrace();
}
}
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
}
}
示例代码地址
- kafka-storm-demo
参考地址