编写Spark Streaming代码
import org.apache.kafka.clients.consumer.{ConsumerConfig, ConsumerRecord}
import org.apache.spark.SparkConf
import org.apache.spark.streaming.dstream.InputDStream
import org.apache.spark.streaming.kafka010.{ConsumerStrategies, KafkaUtils, LocationStrategies}
import org.apache.spark.streaming.{Seconds, StreamingContext}
object Practice821 extends App {
//todo 创建一个spark StrieamingContext对象
val sparkConf: SparkConf = new SparkConf().setMaster("local[2]").setAppName("StreamingDemo2")
val ssc = new StreamingContext(sparkConf,Seconds(5))
//todo Spark Streaming 消费Kafka数据
private val kafkaParams = Map(
("bootstrap.servers" -> "hadoopt:9092"),
("value.deserializer" -> "org.apache.kafka.common.serialization.StringDeserializer"),
("key.deserializer" -> "org.apache.kafka.common.serialization.StringDeserializer"),
(ConsumerConfig.GROUP_ID_CONFIG -> "kafkaG1")
)
private val message: InputDStream[ConsumerRecord[String, String]] = KafkaUtils.createDirectStream(
ssc,
LocationStrategies.PreferConsistent,
//todo 指定kafka的topic
ConsumerStrategies.Subscribe(Set("kb07demo"), kafkaParams)
)
message.map(x=>x.value()).flatMap(_.split(" ")).map((_,1)).reduceByKey(_+_)
.print()
ssc.start()
ssc.awaitTermination()
}
测试代码
打开虚拟机,打开kafka服务
运行上面的代码
然后在指定的topic上生产消息:
kafka-console-producer.sh --broker-list hadoopt:9092 --topic kb07demo
然后随便输入一些单词:
然后在运行代码端: