1 添加依赖
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming-kafka-0-8_2.11</artifactId>
<version>${spark.version}</version>
</dependency>
2 KafKaReceiverWC.scala
package com.lihaogn.sparkKafka
import org.apache.spark.SparkConf
import org.apache.spark.streaming.kafka.KafkaUtils
import org.apache.spark.streaming.{Seconds, StreamingContext}
/**
* spark streaming & kafka -> receiver
*/
object KafKaReceiverWC {
def main(args: Array[String]): Unit = {
if (args.length != 4) {
System.err.println("usage: KafKaReceiverWC <zkQuorum> <group> <topics> <numThreads>")
}
val Array(zkQuorum, group, topics, numThreads) = args
val sparkConf = new SparkConf()
val ssc = new StreamingContext(sparkConf, Seconds(5))
val topicMap = topics.split(",").map((_, numThreads.toInt)).toMap
// spark straming 对接 kafka
val messages = KafkaUtils.createStream(ssc, zkQuorum, group, topicMap)
messages.map(_._2).flatMap(_.split(" ")).map((_, 1)).reduceByKey(_ + _).print()
ssc.start()
ssc.awaitTermination()
}
}
3 编译代码
mvn clean package -DskipTests
4 启动zookeeper
zkServer.sh start
5 启动kafka
kafka-server-start.sh -daemon $KAFKA_HOME/config/server.properties &
6 创建topic
kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 1 --partitions 1 --topic kafka-streaming-topic
7 启动producer,用来生产消息
kafka-console-producer.sh --broker-list localhost:9092 --topic kafka-streaming-topic
8 提交spark程序
spark-submit \
--class com.lihaogn.sparkKafka.KafKaReceiverWC \
--master local[2] \
--name KafKaReceiverWC \
--jars /Users/Mac/software/spark-streaming-kafka-0-8-assembly_2.11-2.2.0.jar \
/Users/Mac/my-lib/Kafka-train-1.0.jar \
localhost:2181 test kafka-streaming-topic 1
9 测试