import org.apache.spark.SparkConf
import org.apache.spark.streaming.dstream.ReceiverInputDStream
import org.apache.spark.streaming.kafka.KafkaUtils
import org.apache.spark.streaming.{Seconds, StreamingContext}
object SparkStramingKafkaWC {
def main(args: Array[String]): Unit = {
//一个是实时的读,一个是实时的操作,Straming最少需要两个进程
val conf = new SparkConf().setAppName("SparkStramingKafkaWC").setMaster("local[*]")
val ssc = new StreamingContext(conf,Seconds(5))
//指定整合kafka相关的参数
val zkQueue = "192.168.88.130:2181,192.168.88.131:2181,192.168.88.132:2181"
val groupId = "group1"
val topic = Map[String,Int]("test"->1)
//连接kafka的broker去消费数据, 创建一个dstream 连接kafka去拉取数据
//第一个String表示写入消息的key值,第二个String代表着真正的消息内容
val data:ReceiverInputDStream[(String,String)]=KafkaUtils.createStream(ssc,zkQueue,groupId,topic)
val lines=data.map(_._2)
val res=lines.flatMap(_.split(",")).map((_,1)).reduceByKey(_+_)
res.print()
ssc.start()
ssc.awaitTermination()
}
}