Receiver整合
1.启动ZK cd /home/hadoop/app/zookeeper-3.4.5-cdh5.7.0/bin/ ./zkServer.sh start
2.启动kafka cd /home/hadoop/app/kafka_2.11-0.10.0.1/bin ./kafka-server-start.sh -daemon /home/hadoop/app/kafka_2.11-0.10.0.1/config/server.properties
3.创建topic ./kafka-topics.sh --create --zookeeper hadoop000:2181 --replication-factor 1 --partitions 1 --topic kafka_streaming_topic
4.查看topic ./kafka-topics.sh --list --zookeeper hadoop000:2181
5.通过控制台测试本topic是否能够正常的生产和消费信息
生产者: ./kafka-console-producer.sh --broker-list hadoop000:9092 --topic kafka_streaming_topic
消费者: ./kafka-console-consumer.sh --zookeeper hadoop000:2181 --topic kafka_streaming_topic
注意:做整合联调的时候,一定要先检查环境是否正确,一定要提前测试,一步一步来
//Spark Streaming对接Kafka的方式一
object KafkaReceiverWordCount {
def main(args: Array[String]): Unit = {
if (args.length != 4) {
// hadoop000:2181 test kafka_streaming_topic 1
System.err.println("Usage: KafkaReceiverWordCount <zkQuorum> <group> <topics> <numThreads>")
}
val Array(zkQuorum, group, topics, numThreads) = args
val sparkConf = new SparkConf()
//在生产环境下要打包到Linux中去运行,不需要设置下面的两个参数
// .setAppName(this.getClass.getSimpleName)
// .setMaster("local[*]")
val ssc = new StreamingContext(sparkConf, Seconds(5))
val topicMap = topics.split(",").map((_, numThreads.toInt)).toMap
//Spark Streaming如何对接Kafka
val message = KafkaUtils.createStream(ssc, zkQuorum, group, topicMap)
//统计数据
message.map(_._2).flatMap(_.split(" ")).map((_, 1)).reduceByKey(_ + _).print()
ssc.start()
ssc.awaitTermination()
}
}
代码写好后,打包上传到Linux中,执行命令:
spark-submit \
--class com.imooc.spark.KafkaReceiverWordCount \
--master local[2] \
--name KafkaReceiverWordCount \
--packages org.apache.spark:spark-streaming-kafka-0-8_2.11:2.2.0 \
/home/hadoop/lib/sparkstream-1.0-SNAPSHOT.jar hadoop000:2181 test kafka_streaming_topic 1
import org.apache.spark.SparkConf
import org.apache.spark.streaming.kafka.KafkaUtils
import org.apache.spark.streaming.{Seconds, StreamingContext}
import kafka.serializer.StringDecoder
/**
* Spark Streaming对接Kafka的方式二
*/
object KafkaDirectWordCount {
def main(args: Array[String]): Unit = {
if(args.length != 2) {
System.err.println("Usage: KafkaDirectWordCount <brokers> <topics>")
System.exit(1)
}
val Array(brokers, topics) = args
val sparkConf = new SparkConf() //.setAppName("KafkaReceiverWordCount")
//.setMaster("local[2]")
val ssc = new StreamingContext(sparkConf, Seconds(5))
val topicsSet = topics.split(",").toSet
val kafkaParams = Map[String,String]("metadata.broker.list"-> brokers)
// TODO... Spark Streaming如何对接Kafka
val messages = KafkaUtils.createDirectStream[String,String,StringDecoder,StringDecoder](
ssc,kafkaParams,topicsSet
)
// TODO... 自己去测试为什么要取第二个
messages.map(_._2).flatMap(_.split(" ")).map((_,1)).reduceByKey(_+_).print()
ssc.start()
ssc.awaitTermination()
}
}
代码写好后,打包上传到Linux中,执行命令:
spark-submit \
--class com.imooc.spark.KafkaDirectWordCount \
--master local[2] \
--name KafkaDirectWordCount \
--packages org.apache.spark:spark-streaming-kafka-0-8_2.11:2.2.0 \
/home/hadoop/lib/sparkstream-1.0-SNAPSHOT.jar hadoop000:9092 kafka_streaming_topic