Storm学习笔记-集成Kafka

Kafka接收数据源生产的消息数据,通过订阅的方式,使用Storm的Topology作为消息的消费者。

相关版本:

kafka_2.11-0.10.0.0

storm-1.0.1


启动Kafka Broker

kafka_2.11-0.10.0.0$ bin/kafka-server-start.sh config/server.properties &

启动Kafka Producer 生产消息

kafka_2.11-0.10.0.0$ bin/kafka-console-producer.sh --broker-list centos.host1:9092 --topic kafka-storm


Storm Topology

public class KafkaWCTopology {
	
	private static final String TOPOLOGY_NAME = "kafka-word-count-topology";
	
	private static final boolean isCluster = true;

	public static void main(String[] args) {
		String brokerZks = "192.168.10.20:2181";
		String topic = "kafka-storm";
		String zkRoot = "/kafka";
		String id = "word";
		
		BrokerHosts brokerHosts = new ZkHosts(brokerZks, "/kafka/brokers");
		SpoutConfig spoutConfig = new SpoutConfig(brokerHosts, topic, zkRoot, id);
		spoutConfig.scheme = new SchemeAsMultiScheme(new StringScheme());
		List<String> zkServers = new ArrayList<String>();
		zkServers.add("192.168.10.20");
		spoutConfig.zkServers = zkServers;
		spoutConfig.zkPort = 2181;
		
		TopologyBuilder builder = new TopologyBuilder();
		builder.setSpout("kafka-spout", new KafkaSpout(spoutConfig), 1);
		builder.setBolt("word-split-bolt", new KafkaWordSplitBolt(), 2)
			.setNumTasks(4).shuffleGrouping("kafka-spout");
		builder.setBolt("word-count-bolt", new KafkaWordCountBolt())
			.fieldsGrouping("word-split-bolt", new Fields("word"));
		
		RecordFormat recordFormat = new DelimitedRecordFormat().withFieldDelimiter(":");
		SyncPolicy syncPolicy = new CountSyncPolicy(10);
		FileRotationPolicy fileRotationPolicy = new TimedRotationPolicy(1.0f, TimeUnit.MINUTES);
		FileNameFormat fileNameFormat = new DefaultFileNameFormat().withPath("/storm/")
				.withPrefix("wc_").withExtension(".log");
		HdfsBolt hdfsBolt = new HdfsBolt().withFsUrl("hdfs://centos.host1:9000")
				.withFileNameFormat(fileNameFormat).withRecordFormat(recordFormat)
				.withRotationPolicy(fileRotationPolicy).withSyncPolicy(syncPolicy);
		
		builder.setBolt("word-count-hdfs-bolt", hdfsBolt).globalGrouping("word-count-bolt");
		
		Config config = new Config();
		config.setDebug(true);
		
		if (isCluster) {
			try {
				config.setNumWorkers(1);
				StormSubmitter.submitTopologyWithProgressBar(
						TOPOLOGY_NAME, config, builder.createTopology());
			} catch (Exception e) {
				e.printStackTrace();
			}
		} else {
			LocalCluster cluster = new LocalCluster();
			cluster.submitTopology(TOPOLOGY_NAME, config, builder.createTopology());
			Utils.sleep(10000);
			cluster.killTopology(TOPOLOGY_NAME);
			cluster.shutdown();
		}
	}
	
}


public class KafkaWordSplitBolt extends BaseRichBolt {

	private static final long serialVersionUID = 1L;
	
	private static final Logger LOG = LoggerFactory.getLogger(KafkaWordSplitBolt.class);

	private OutputCollector collector = null;
	
	@SuppressWarnings("rawtypes")
	@Override
	public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
		this.collector = collector;
	}

	@Override
	public void execute(Tuple input) {
		String line = input.getString(0);
		LOG.info("receive input tuple " + input);
		String[] words = line.trim().split(" ");
		for (int i = 0, len = words.length; i < len; i++) {
			collector.emit(input, new Values(words[i], 1));
		}
		collector.ack(input);
	}

	@Override
	public void declareOutputFields(OutputFieldsDeclarer declarer) {
		declarer.declare(new Fields("word", "count"));
	}

}


public class KafkaWordCountBolt extends BaseRichBolt {

	private static final long serialVersionUID = 1L;
	
	private static final Logger LOG = LoggerFactory.getLogger(KafkaWordCountBolt.class);

	private OutputCollector collector = null;
	
	private Map<String, Integer> result = null;

	@SuppressWarnings("rawtypes")
	@Override
	public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
		this.collector = collector;
		result = new HashMap<String, Integer>();
	}

	@Override
	public void execute(Tuple input) {
		String word = input.getStringByField("word");
		int count = input.getIntegerByField("count");
		LOG.info("receive input word " + word + " count " + count);
		Integer wcount = this.result.get(word);
		wcount = null == wcount ? 1 : wcount + 1;
		this.result.put(word, wcount);
		this.collector.emit(new Values(word, wcount));
		collector.ack(input);
	}

	@Override
	public void declareOutputFields(OutputFieldsDeclarer declarer) {
		declarer.declare(new Fields("word", "count"));
	}
	
	@Override
	public void cleanup() {
		LOG.info("=====FINAL WORD COUNT RESULT=====");
		for (Map.Entry<String, Integer> entry : result.entrySet()) {
			LOG.info("word: " + entry.getKey() + " count: " + entry.getValue());
		}
	}
	
}

拷贝对应的jar包到Strom安装目录下的extlib目录下



启动Strom

$bin/storm nimbus &
$bin/storm supervisor &
$bin/storm ui &


执行Topology

bin/storm jar storm.jar org.platform.storm.KafkaWCTopology

可以在storm-1.0.1/logs/workers-artifacts目录下查看相关的日志信息,同时也可以在HDFS上的storm目录下看到产生的计算结果文件。




评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值