集群环境:
storm-1.1.2
zookeeper-3.4.5-cdh5.3.6
kafka_2.11-0.11.0.2
再来看一下我的storm代码
package com.weziye.filter;
import org.apache.storm.Config;
import org.apache.storm.LocalCluster;
import org.apache.storm.StormSubmitter;
import org.apache.storm.hdfs.bolt.HdfsBolt;
import org.apache.storm.hdfs.bolt.format.DefaultFileNameFormat;
import org.apache.storm.hdfs.bolt.format.DelimitedRecordFormat;
import org.apache.storm.hdfs.bolt.format.FileNameFormat;
import org.apache.storm.hdfs.bolt.format.RecordFormat;
import org.apache.storm.hdfs.bolt.rotation.FileRotationPolicy;
import org.apache.storm.hdfs.bolt.rotation.TimedRotationPolicy;
import org.apache.storm.hdfs.bolt.rotation.TimedRotationPolicy.TimeUnit;
import org.apache.storm.hdfs.bolt.sync.CountSyncPolicy;
import org.apache.storm.hdfs.bolt.sync.SyncPolicy;
import org.apache.storm.kafka.BrokerHosts;
import org.apache.storm.kafka.KafkaSpout;
import org.apache.storm.kafka.SpoutConfig;
import org.apache.storm.kafka.StringScheme;
import org.apache.storm.kafka.ZkHosts;
import org.apache.storm.spout.SchemeAsMultiScheme;
import org.apache.storm.topology.TopologyBuilder;
public class KafkaFilter {
public static void main(String[] args) throws Exception {
String zkConnString = "hadoop000:2181,hadoop001:2181";//zookeeper地址
String topic = "kafka-filter"; //kafka topic 名称
BrokerHosts hosts = new ZkHosts(zkConnString);
SpoutConfig kafkaSpoutConfig = new SpoutConfig(hosts, topic, "", "kafka-filter-spout");
kafkaSpoutConfig.bufferSizeBytes = 1024 * 1024 * 4;
kafkaSpoutConfig.fetchSizeBytes = 1024 * 1024 * 4;
kafkaSpoutConfig.scheme = new SchemeAsMultiScheme(new StringScheme());
kafkaSpoutConfig.startOffsetTime = kafka.api.OffsetRequest.LatestTime();
TopologyBuilder builder = new TopologyBuilder();
//设置数据来源kafka
builder.setSpout("kafka-spout", new KafkaSpout(kafkaSpoutConfig));
//设置数据处理
builder.setBolt("kafka-word-spitter", new SplitBolt(), 1).shuffleGrouping("kafka-spout");
//设置数据打印
builder.setBolt("word-spitter-printf", new PrintBolt()).shuffleGrouping("kafka-word-spitter");
Config config = new Config();
config.put(Config.TOPOLOGY_MAX_SPOUT_PENDING, 1);
config.setNumWorkers(1);
if(args[0].length() >0){
StormSubmitter.submitTopology(args[0], config, builder.createTopology());
}
else{
LocalCluster cluster = new LocalCluster();
cluster.submitTopology("kafkafilter", config, builder.createTopology());
}
}
}
通过编译执行后在Storm ui中看到如下错误,错误说明无法再zookeeper“/”目录下找到“kafka-filter-spout”节点。
查找问题和解决办法:
使用zookeeper 客户端连接zk节点
[hadoop@hadoop000 zookeeper-3.4.5-cdh5.3.6]$ bin/zkCli.sh -server hadoop000:2181,hadoop001:2181
使用“ls /”可以查看到目录,发现的确没有“kafka-filter-spout”
[zk: hadoop000:2181,hadoop001:2181(CONNECTED) 0] ls /
使用“create [-s] [-e] path data acl”新建一个目录,这样就有ok了
例外,在本地测试时会发现无法保存kafka offset,在storm中运行不会出现这个问题