(1) 启动zookeeper
./zkServer.sh start
(2) 启动kafka
./bin/kafka-server-start.sh config/server.properties
(3) 创建topic
./bin/kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 1 --partitions 1 --topic kafka-streaming-topic
(4) 启动producer,用来生产消息
./bin/kafka-console-producer.sh --broker-list localhost:9092 --topic kafka-streaming-topic
(5)启动consumer
./bin/kafka-console-consumer.sh --zookeeper localhost:2181 --topic kafka-streaming-topic --from-beginning
(6)添加依赖
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming-kafka-0-8_2.11</artifactId>
<version>${spark.version}</version>
</dependency>
(7) KafKaReceiverWC.scala
package com.streaming.kafka
import org.apache.spark.SparkConf
import org.apache.spark.streaming.kafka.KafkaUtils
import org.apache.spark.streaming.{Seconds, StreamingContext}
/**
* Spark Streaming对接Kafka的方式一
*/
object KafkaReceiverWordCount {
def main(args: Array[String]): Unit = {
if(args.length != 4) {
System.err.println("Usage: KafkaReceiverWordCount <zkQuorum> <group> <topics> <numThreads>")
}
val Array(zkQuorum, group, topics, numThreads) = args
val sparkConf = new SparkConf() //.setAppName("KafkaReceiverWordCount")
//.setMaster("local[2]")
val ssc = new StreamingContext(sparkConf, Seconds(5))
val topicMap = topics.split(",").map((_, numThreads.toInt)).toMap
// TODO... Spark Streaming如何对接Kafka
val messages = KafkaUtils.createStream(ssc, zkQuorum, group,topicMap)
// TODO... 自己去测试为什么要取第二个
messages.map(_._2).flatMap(_.split(" ")).map((_,1)).reduceByKey(_+_).print()
ssc.start()
ssc.awaitTermination()
}
}
编译代码
mvn clean package -DskipTests
提交集群
spark-submit \
--class com.lihaogn.sparkKafka.KafKaReceiverWC \
--master local[2] \
--name KafKaReceiverWC \
--jars /Users/Mac/software/spark-streaming-kafka-0-8-assembly_2.11-2.2.0.jar \
/Users/Mac/my-lib/Kafka-train-1.0.jar \
localhost:2181 test kafka-streaming-topic 1