不多说,依旧是代码。
pom.xml
<dependency> <groupId>org.apache.storm</groupId> <artifactId>storm-core</artifactId> <version>1.0.2</version> <!-- 由于storm环境中有该jar,所以不用pack到最终的task.jar中 --> <scope>provided</scope> </dependency> <!--storm和kafka的整合--> <dependency> <groupId>org.apache.storm</groupId> <artifactId>storm-kafka</artifactId> <version>1.0.2</version> </dependency> <!-- kafka目前已经有2.10了,但是我用了,任务执行报错,目前只能用kafka_2.9.2,我kafka服务端也是用最新的2.10版本 --> <dependency> <groupId>org.apache.kafka</groupId> <artifactId>kafka_2.9.2</artifactId> <version>0.8.2.2</version> <!-- 排除以下jar,由于storm服务端有log4j,避免冲突报错--> <exclusions> <exclusion> <groupId>org.apache.zookeeper</groupId> <artifactId>zookeeper</artifactId> </exclusion> <exclusion> <groupId>log4j</groupId> <artifactId>log4j</artifactId> </exclusion> <exclusion> <groupId>org.slf4j</groupId> <artifactId>slf4j-log4j12</artifactId> </exclusion> </exclusions> </dependency>kafka的配置:
package com.yjp.kafkastorm.kafka; import org.springframework.boot.context.properties.ConfigurationProperties; import java.io.Serializable; /** * kafka的配置类 * * @Author : WenBao * Date : 15:36 2018/1/19 */ @ConfigurationProperties(prefix = "spring.storm.kafka") public class KafkaProperties implements Serializable { private static final long serialVersionUID = 1L; /** * 用以获取Kafka broker和partition的信息 */ private String zkhosts; /** * 从哪个topic读取消息 */ private String topic; /** * 进度记录的id,想要一个新的Spout读取之前的记录,应把它的id设为跟之前的一样。 */ private String groupId; /** * spout的并行度 */ private String spoutParallelism; /** * bolt的并行度 */ private String boltParallelism; /** * 进度信息记录于zookeeper的哪个路径下 */ private String zkRoot; /** * 记录Spout读取进度所用的zookeeper的host */ private String zkServersIp; /** * 记录进度用的zookeeper的端口 */ private Integer zkPort; public String getZkhosts() { return zkhosts; } public void setZkhosts(String zkhosts) { this.zkhosts = zkhosts; } public String getTopic() { return topic; } public void setTopic(String topic) { this.topic = topic; } public String getGroupId() { return groupId; } public void setGroupId(String groupId) { this.groupId = groupId; } public String getSpoutParallelism() { return spoutParallelism; } public void setSpoutParallelism(String spoutParallelism) { this.spoutParallelism = spoutParallelism; } public String getBoltParallelism() { return boltParallelism; } public void setBoltParallelism(String boltParallelism) { this.boltParallelism = boltParallelism; } public String getZkRoot() { return zkRoot; } public void setZkRoot(String zkRoot) { this.zkRoot = zkRoot; } public String getZkServersIp() { return zkServersIp; } public void setZkServersIp(String zkServersIp) { this.zkServersIp = zkServersIp; } public Integer getZkPort() { return zkPort; } public void setZkPort(Integer zkPort) { this.zkPort = zkPort; } @Override public String toString() { return "KafkaProperties{" + "zkhosts='" + zkhosts + '\'' + ", topic='" + topic + '\'' + ", groupId='" + groupId + '\'' + ", spoutParallelism='" + spoutParallelism + '\'' + ", boltParallelism='" + boltParallelism + '\'' + ", zkRoot='" + zkRoot + '\'' + ", zkServersIp='" + zkServersIp + '\'' + ", zkPort=" + zkPort + '}'; } }
KafkaSpout:
package com.yjp.kafkastorm.kafka; import com.yjp.kafkastorm.util.StringScheme; import org.apache.storm.kafka.KafkaSpout; import org.apache.storm.kafka.SpoutConfig; import org.apache.storm.kafka.ZkHosts; import org.apache.storm.spout.SchemeAsMultiScheme; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.boot.context.properties.EnableConfigurationProperties; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; import java.util.Arrays; @Configuration @EnableConfigurationProperties({KafkaProperties.class}) public class KafkaSpoutAutoConfiguration { @Autowired private KafkaProperties kafkaProperties; //返回一个KafkaSpout 可以直接放置在topology中 @Bean public KafkaSpout kafkaSpout() { ZkHosts zkHosts = new ZkHosts(this.kafkaProperties.getZkhosts()); SpoutConfig spoutConfig = new SpoutConfig(zkHosts, this.kafkaProperties.getTopic(), this.kafkaProperties.getZkRoot(), this.kafkaProperties.getGroupId()); spoutConfig.zkPort = this.kafkaProperties.getZkPort(); spoutConfig.zkServers = Arrays.asList(this.kafkaProperties.getZkServersIp().split(",")); spoutConfig.scheme = new SchemeAsMultiScheme(new StringScheme()); //每次读取从最近的Offset中读取 spoutConfig.startOffsetTime = kafka.api.OffsetRequest.LatestTime(); return new KafkaSpout(spoutConfig); } }
反序列化类:
package com.yjp.kafkastorm.util; import org.apache.storm.spout.Scheme; import org.apache.storm.tuple.Fields; import org.apache.storm.tuple.Values; import org.apache.storm.utils.Utils; import java.nio.ByteBuffer; import java.nio.charset.Charset; import java.nio.charset.StandardCharsets; import java.util.List; /** * 反序列化的方式 * * @Author : WenBao * Date : 15:02 2018/1/22 */ public class StringScheme implements Scheme { private static final long serialVersionUID = -7545331467885885076L; private static final Charset UTF8_CHARSET = StandardCharsets.UTF_8; public static final String STRING_SCHEME_KEY = "str"; @Override public List<Object> deserialize(ByteBuffer byteBuffer) { return new Values(deserializeString(byteBuffer)); } public static String deserializeString(ByteBuffer string) { if (string.hasArray()) { //返回缓冲区的偏移量 int base = string.arrayOffset(); //position 返回此缓冲区的位置 //remaining 最大缓冲区的位置减去当前缓冲区的位置 return new String(string.array(), base + string.position(), string.remaining(), UTF8_CHARSET); } else { return new String(Utils.toByteArray(string), UTF8_CHARSET); } } @Override public Fields getOutputFields() { return new Fields(STRING_SCHEME_KEY);//kafkaSpout输出的字段名称 } }整合到这里结束,将kafka启动,然后在项目的properties中配置好kafka的属性就可以了。
努力吧 皮卡丘