Storm 消费Kafka数据及相关异常解决
问题
storm报错:Exception in thread “main” java.lang.RuntimeException: InvalidTopologyException(msg:Component: [mybolt] subscribes from non-existent stream: [default] of component [kafka_spout])
storm版本:2.3.0
kafka版本:2.11
报错:mybolt这个组件,在从kafka_sput组件上消费消息时,它所消费的default数据流是不存在的。
KafkaTopoDemo类
main方法入口类和kafkaSpout设置
package com.suhaha.storm.storm122_kafka211_demo02;
import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.storm.Config;
import org.apache.storm.LocalCluster;
import org.apache.storm.StormSubmitter;
import org.apache.storm.generated.AlreadyAliveException;
import org.apache.storm.generated.AuthorizationException;
import org.apache.storm.generated.InvalidTopologyException;
import org.apache.storm.kafka.spout.*;
import org.apache.storm.topology.TopologyBuilder;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Values;
import org.apache.storm.kafka.spout.KafkaSpoutRetryExponentialBackoff.TimeInterval;
import static org.apache.storm.kafka.spout.KafkaSpoutConfig.FirstPollOffsetStrategy.EARLIEST;
/**
* @comment storm消费kafka数据
*/
public class KafkaTopoDemo {
public static void main(String[] args) {
final TopologyBuilder topologybuilder = new TopologyBuilder();
//简单的不可靠spout
// topologybuilder.setSpout("kafka_spout", new KafkaSpout<>(KafkaSpoutConfig.builder("node01:9092,node02:9092,node03:9092", "topic01").build()));
//详细的设置spout,写一个方法生成KafkaSpoutConfig
topologybuilder.setSpout("kafka_spout", new KafkaSpout<String,String>(newKafkaSpoutConfig("topic01")));
topologybuilder.setBolt("mybolt", new MyBolt("/tmp/storm_test.log")).shuffleGrouping("kafka_spout");
//上面设置的是topology,现在设置storm配置
Config stormConf=new Config();
stormConf.setNumWorkers(1);
stormConf.setDebug(true);
if (args != null && args.length > 0) {//集群提交
System.out.println("【run on cluster】");
try {
StormSubmitter.submitTopology(args[0], stormConf, topologybuilder.createTopology());
} catch (AlreadyAliveException e) {
e.printStackTrace();
} catch (InvalidTopologyException e) {
e.printStackTrace();
} catch (AuthorizationException e) {
e.printStackTrace();
}
System.out.println("提交完成");
} else {//本地提交
System.out.println("【run on local】");
LocalCluster lc = new LocalCluster();
lc.submitTopology("storm_kafka", stormConf, topologybuilder.createTopology());
}
}
/**
* KafkaSpoutConfig设置
*/
private static KafkaSpoutConfig<String,String> newKafkaSpoutConfig(String topic) {
ByTopicRecordTranslator<String, String> trans = new ByTopicRecordTranslator<>(
(r) -> new Values(r.topic(), r.partition(), r.offset(), r.key(), r.value()),
new Fields("topic", "partition", "offset", "key", "value"), "stream1");
//bootstrapServer 以及topic
return KafkaSpoutConfig.builder("node01:9092,node02:9092,node03:9092", topic)
.setProp(ConsumerConfig.GROUP_ID_CONFIG, "kafkaSpoutTestGroup_" + System.nanoTime())//设置kafka使用者组属性"group.id"
.setProp(ConsumerConfig.MAX_PARTITION_FETCH_BYTES_CONFIG, 200)
.setRecordTranslator(trans)//修改spout如何将Kafka消费者message转换为tuple,以及将该tuple发布到哪个stream中
.setRetry(getRetryService())//重试策略
.setOffsetCommitPeriodMs(10_000)
.setFirstPollOffsetStrategy(EARLIEST)//允许你设置从哪里开始消费数据
.setMaxUncommittedOffsets(250)
.build();
}
/**
* 重试策略设置
*/
protected static KafkaSpoutRetryService getRetryService() {
return new KafkaSpoutRetryExponentialBackoff(TimeInterval.microSeconds(500),
TimeInterval.milliSeconds(2), Integer.MAX_VALUE, TimeInterval.seconds(10));
}
}
bolt类
异常跟此bolt类无关
package com.suhaha.storm.storm122_kafka211_demo02;
import org.apache.storm.task.OutputCollector;
import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.IRichBolt;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.tuple.Tuple;
import java.io.FileWriter;
import java.io.IOException;
import java.util.Map;
/**
* @comment 该bolt中的处理逻辑非常简单,只是简单的从input中将各类数据取出来,然后简单的打印出来
* 并且将数据打印到path指定的文件中(这里需要注意的是,最终写出的文件是在执行该bolt task的worker上的,
* 而不在nimbus服务器路径下,也不一定在提交storm job的服务器上)
*/
public class MyBolt implements IRichBolt {
private FileWriter fileWriter = null;
String path = null;
@Override
public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
try {
fileWriter = new FileWriter(path);
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* 构造方法
* @param path
*/
public MyBolt(String path) {
this.path = path;
}
@Override
public void execute(Tuple input) {
System.out.println(input);
try {
/**
* 从input中获取相应数据
*/
System.out.println("=========================");
String topic = input.getString(0);
System.out.println("index 0 --> " + topic); //topic
System.out.println("topic --> " + input.getStringByField("topic"));
System.out.println("-------------------------");
System.out.println("index 1 --> " + input.getInteger(1)); //partition
Integer partition = input.getIntegerByField("partition");
System.out.println("partition-> " + partition);
System.out.println("-------------------------");
Long offset = input.getLong(2);
System.out.println("index 2 --> " + offset); //offset
System.out.println("offset----> " +input.getLongByField("offset"));
System.out.println("-------------------------");
String key = input.getString(3);
System.out.println("index 3 --> " + key); //key
System.out.println("key-------> " + input.getStringByField("key"));
System.out.println("-------------------------");
String value = input.getString(4);
System.out.println("index 4 --> " + value); //value
System.out.println("value--> " + input.getStringByField("value"));
String info = "topic: " + topic + ", partiton: " +partition + ", offset: " + offset + ", key: " + key +", value: " + value + "\n";
System.out.println("info = " + info);
fileWriter.write(info);
fileWriter.flush();
} catch (Exception e) {
e.printStackTrace();
}
}
@Override
public void cleanup() {
// TODO Auto-generated method stub
}
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
// TODO Auto-generated method stub
}
@Override
public Map<String, Object> getComponentConfiguration() {
// TODO Auto-generated method stub
return null;
}
}
错误出现在KafkaTopoDemo类中,
错误的原因在于,在代码中对RecordTranslator进行设置时(第67行),将数据流Id设置成了stream1;而在对topologyBuilder设置bolt时(第32行),使用的分发策略是shuffleGrouping(“kafka_spout”),其实错误跟分发策略没关系,但是跟分发策略的使用方式有关系——当使用shuffleGrouping(String componentId)这种方式设置分发策略时,mybolt组件默认是从上游组件的default 这个数据流中获取数据,而在代码中,我已将上游(kafka_spout)的数据流id设置成了stream1,故而导致了报错(InvalidTopologyException(msg:Component: [mybolt] subscribes from non-existent stream: [default] of component [kafka_spout]),说default数据流不存在)。
修改:在设置mybolt组件的分发策略时,使用shuffleGrouping(String componentId, String streamId),手动指定要读取的数据流id为stream1
topologybuilder.setBolt("mybolt", new MyBolt("/tmp/storm_test.log")).shuffleGrouping("kafka_spout", "stream1");