val spark = SparkSession.builder().master("local[*]").appName("myKafka"),getOrCreate()
//5秒一个窗口
val ssc=new StreamingContext(spark.sparkContext,Seconds(5))
val kafkaParams = Map(
ConsumerConfig.BOOTSTRAP_SERVER_CONFIG->"192.168.30.182:9092", //写自己的kafka所在虚拟机ip
ConsumerConfig.GROUP_ID_CONFIG->"cmoo1", //消费者组
ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG->classOf[StringDeserializer].getName,
ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG->classOf[StringDeserializer].getName,
ConsumerConfig.AUTO_OFFSET_RESET_CONFIG->"earliest"
)
//从Kafka消费数据
val ds=KafkaUtils.createDirectStream(ssc,LocationStrategies.preferConsistent,
ConsumerStrategies.Subscribe[String,String](Set("mydemo"),kafkaParams)
ds.mapPartitions(itercr=>{
val lb =ListBuffer[String]()
itercr.foreach(cr=>lb.append(cr.value()))
lb.iterator
}).foreachRDD(line=>println(line.collect().mkString("\n")))
ssc.start()
ssc.awaitTermination()
SparkStreaming从kafka消费数据
最新推荐文章于 2024-07-21 18:15:30 发布