Kafka接收数据源生产的消息数据,通过订阅的方式,使用Storm的Topology作为消息的消费者。
相关版本:
kafka_2.11-0.10.0.0
storm-1.0.1
启动Kafka Broker
kafka_2.11-0.10.0.0$ bin/kafka-server-start.sh config/server.properties &
启动Kafka Producer 生产消息
kafka_2.11-0.10.0.0$ bin/kafka-console-producer.sh --broker-list centos.host1:9092 --topic kafka-storm
Storm Topology
public class KafkaWCTopology {
private static final String TOPOLOGY_NAME = "kafka-word-count-topology";
private static final boolean isCluster = true;
public static void main(String[] args) {
String brokerZks = "192.168.10.20:2181";
String topic = "kafka-storm";
String zkRoot = "/kafka";
String id = "word";
BrokerHosts brokerHosts = new ZkHosts(brokerZks, "/kafka/brokers");
SpoutConfig spoutConfig = new SpoutConfig(brokerHosts, topic, zkRoot, id);
spoutConfig.scheme = new SchemeAsMultiScheme(new StringScheme());
List<String> zkServers = new ArrayList<String>();
zkServers.add("192.168.10.20");
spoutConfig.zkServers = zkServers;
spoutConfig.zkPort = 2181;
TopologyBuilder builder = new TopologyBuilder();
builder.setSpout("kafka-spout", new KafkaSpout(spoutConfig), 1);
builder.setBolt("word-split-bolt", new KafkaWordSplitBolt(), 2)
.setNumTasks(4).shuffleGrouping("kafka-spout");
builder.setBolt("word-count-bolt", new KafkaWordCountBolt())
.fieldsGrouping("word-split-bolt", new Fields("word"));
RecordFormat recordFormat = new DelimitedRecordFormat().withFieldDelimiter(":");
SyncPolicy syncPolicy = new CountSyncPolicy(10);
FileRotationPolicy fileRotationPolicy = new TimedRotationPolicy(1.0f, TimeUnit.MINUTES);
FileNameFormat fileNameFormat = new DefaultFileNameFormat().withPath("/storm/")
.withPrefix("wc_").withExtension(".log");
HdfsBolt hdfsBolt = new HdfsBolt().withFsUrl("hdfs://centos.host1:9000")
.withFileNameFormat(fileNameFormat).withRecordFormat(recordFormat)
.withRotationPolicy(fileRotationPolicy).withSyncPolicy(syncPolicy);
builder.setBolt("word-count-hdfs-bolt", hdfsBolt).globalGrouping("word-count-bolt");
Config config = new Config();
config.setDebug(true);
if (isCluster) {
try {
config.setNumWorkers(1);
StormSubmitter.submitTopologyWithProgressBar(
TOPOLOGY_NAME, config, builder.createTopology());
} catch (Exception e) {
e.printStackTrace();
}
} else {
LocalCluster cluster = new LocalCluster();
cluster.submitTopology(TOPOLOGY_NAME, config, builder.createTopology());
Utils.sleep(10000);
cluster.killTopology(TOPOLOGY_NAME);
cluster.shutdown();
}
}
}
public class KafkaWordSplitBolt extends BaseRichBolt {
private static final long serialVersionUID = 1L;
private static final Logger LOG = LoggerFactory.getLogger(KafkaWordSplitBolt.class);
private OutputCollector collector = null;
@SuppressWarnings("rawtypes")
@Override
public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
this.collector = collector;
}
@Override
public void execute(Tuple input) {
String line = input.getString(0);
LOG.info("receive input tuple " + input);
String[] words = line.trim().split(" ");
for (int i = 0, len = words.length; i < len; i++) {
collector.emit(input, new Values(words[i], 1));
}
collector.ack(input);
}
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields("word", "count"));
}
}
public class KafkaWordCountBolt extends BaseRichBolt {
private static final long serialVersionUID = 1L;
private static final Logger LOG = LoggerFactory.getLogger(KafkaWordCountBolt.class);
private OutputCollector collector = null;
private Map<String, Integer> result = null;
@SuppressWarnings("rawtypes")
@Override
public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
this.collector = collector;
result = new HashMap<String, Integer>();
}
@Override
public void execute(Tuple input) {
String word = input.getStringByField("word");
int count = input.getIntegerByField("count");
LOG.info("receive input word " + word + " count " + count);
Integer wcount = this.result.get(word);
wcount = null == wcount ? 1 : wcount + 1;
this.result.put(word, wcount);
this.collector.emit(new Values(word, wcount));
collector.ack(input);
}
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields("word", "count"));
}
@Override
public void cleanup() {
LOG.info("=====FINAL WORD COUNT RESULT=====");
for (Map.Entry<String, Integer> entry : result.entrySet()) {
LOG.info("word: " + entry.getKey() + " count: " + entry.getValue());
}
}
}
拷贝对应的jar包到Strom安装目录下的extlib目录下
启动Strom
$bin/storm nimbus &
$bin/storm supervisor &
$bin/storm ui &
执行Topology
bin/storm jar storm.jar org.platform.storm.KafkaWCTopology
可以在storm-1.0.1/logs/workers-artifacts目录下查看相关的日志信息,同时也可以在HDFS上的storm目录下看到产生的计算结果文件。