这里的整合其实是将Storm充当kafka的消费者进行处理数据
官方文档教程:http://storm.apache.org/releases/1.1.2/storm-kafka.html
一、环境准备
在服务器中启动storm、kafka、Zookeeper
创建topic
kafka-topics.sh --create --zookeeper hadoop000:2181 --replication-factor 1 --partitions 1 --topic stormkafka
测试topic是否有效
kafka生产者
kafka-console-producer.sh --broker-list hadoop000:9092 --topic stormkafka
kafka消费者
kafka-console-consumer.sh --zookeeper hadoop000:2181 --topic stormkafka --from-beginning
二、开发整合
pom文件
这里的依赖一个都不能少,不然,说多了都是泪呀,索性全部贴出来
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.bigdata</groupId>
<artifactId>storm</artifactId>
<version>1.0</version>
<name>storm</name>
<!-- FIXME change it to the project's website -->
<url>http://www.example.com</url>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<maven.compiler.source>1.7</maven.compiler.source>
<maven.compiler.target>1.7</maven.compiler.target>
</properties>
<dependencies>
<dependency>
<groupId>org.apache.storm</groupId>
<artifactId>storm-core</artifactId>
<version>1.1.1</version>
<exclusions>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>log4j-over-slf4j</artifactId>
</exclusion>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
</exclusion>
</exclusions>
</dependency>
<!-- <dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
<version>2.4</version>
</dependency>-->
<dependency>
<groupId>com.google.protobuf</groupId>
<artifactId>protobuf-java</artifactId>
<version>2.5.0</version>
</dependency>
<dependency>
<groupId>org.apache.storm</groupId>
<artifactId>storm-kafka</artifactId>
<version>1.1.1</version>
</dependency>
<dependency>
<groupId>org.apache.kafka</groupId>
<artifactId>kafka_2.11</artifactId>
<version>0.9.0.0</version>
<exclusions>
<exclusion>
<groupId>org.apache.zookeeper</groupId>
<artifactId>zookeeper</artifactId>
</exclusion>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
</exclusion>
</exclusions>
</dependency>
<!-- storm-kafka模块需要的依赖 -->
<dependency>
<groupId>org.apache.curator</groupId>
<artifactId>curator-client</artifactId>
<version>2.12.0</version>
</dependency>
<dependency>
<groupId>org.apache.kafka</groupId>
<artifactId>kafka-clients</artifactId>
<version>0.9.0.0</version>
</dependency>
</dependencies>
<build>
<pluginManagement><!-- lock down plugins versions to avoid using Maven defaults (may be moved to parent pom) -->
<plugins>
<plugin>
<artifactId>maven-clean-plugin</artifactId>
<version>3.0.0</version>
</plugin>
<!-- see http://maven.apache.org/ref/current/maven-core/default-bindings.html#Plugin_bindings_for_jar_packaging -->
<plugin>
<artifactId>maven-resources-plugin</artifactId>
<version>3.0.2</version>
</plugin>
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.7.0</version>
</plugin>
<plugin>
<artifactId>maven-surefire-plugin</artifactId>
<version>2.20.1</version>
</plugin>
<plugin>
<artifactId>maven-jar-plugin</artifactId>
<version>3.0.2</version>
</plugin>
<plugin>
<artifactId>maven-install-plugin</artifactId>
<version>2.5.2</version>
</plugin>
<plugin>
<artifactId>maven-deploy-plugin</artifactId>
<version>2.8.2</version>
</plugin>
</plugins>
</pluginManagement>
</build>
</project>
测试的代码
1、kafkastorm
package com.kafkastorm;
import org.apache.storm.Config;
import org.apache.storm.LocalCluster;
import org.apache.storm.kafka.BrokerHosts;
import org.apache.storm.kafka.KafkaSpout;
import org.apache.storm.kafka.SpoutConfig;
import org.apache.storm.kafka.ZkHosts;
import org.apache.storm.topology.TopologyBuilder;
import java.util.UUID;
/**
* @Title: storm与kafka的整合
* @author: 陈宏松
* @create: 2018-12-12 16:06
* @version: 1.0.0
**/
public class kafkastorm {
public static void main(String[] args) {
//创建TopologyBuilder
TopologyBuilder builder = new TopologyBuilder();
//zk服务器ip
BrokerHosts hosts =new ZkHosts("192.168.1.117:2181");
//topic名称
String topic ="stormkafka";
//zk地址
String zkRoot = "/"+topic;
//id
String id = UUID.randomUUID().toString();
//创建SpoutConfig
SpoutConfig spoutConfig =new SpoutConfig(hosts,topic,zkRoot,id);
//创建kafkaSpout
KafkaSpout kafkaSpout =new KafkaSpout(spoutConfig);
//spout id
String spout_id =KafkaSpout.class.getSimpleName();
//设置spout
builder.setSpout(spout_id,kafkaSpout);
//bolt id
String bolt_id = LogBolt.class.getSimpleName();
//设置bolt
builder.setBolt(bolt_id,new LogBolt()).shuffleGrouping(spout_id);
//本地运行
LocalCluster cluster = new LocalCluster();
//提交Topology
cluster.submitTopology(kafkastorm.class.getSimpleName(),new Config(),builder.createTopology());
}
}
2、Bolt的实现
package com.kafkastorm;
import org.apache.storm.task.OutputCollector;
import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.topology.base.BaseRichBolt;
import org.apache.storm.tuple.Tuple;
import java.util.Map;
/**
* @Title: 接收kafka数据进行处理的bolt
* @author: 陈宏松
* @create: 2018-12-12 16:19
* @version: 1.0.0
**/
public class LogBolt extends BaseRichBolt{
private OutputCollector collector;
@Override
public void prepare(Map map, TopologyContext topologyContext, OutputCollector outputCollector) {
this.collector = collector;
}
@Override
public void execute(Tuple tuple) {
//返回值字节数组
byte[] bytes = tuple.getBinaryByField("bytes");
String value = new String(bytes);
System.out.println(value + ".............");
}
@Override
public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer) {
}
}
最终结果(调试了半天,中间一堆错误,有各种错误学会有道词典和谷歌吧)