packagecn.kgc.kb11importorg.apache.spark.SparkConfimportorg.apache.spark.streaming.dstream.{DStream,ReceiverInputDStream}importorg.apache.spark.streaming.{Seconds,StreamingContext}
object SparkStreamDemo1{
def main(args:Array[String]):Unit={
val conf:SparkConf=newSparkConf().setMaster("local[*]").setAppName("sparkstream1")//定义流处理上下文,采集周期为3秒
val streamingContext =newStreamingContext(conf,Seconds(3))//指定数据的获取方式
val socketLineStream:ReceiverInputDStream[String]= streamingContext.socketTextStream("192.168.112.100",7777)//处理每批次(3秒)采集到的数据
val wordStream:DStream[String]= socketLineStream.flatMap(line=>line.split("\\s+"))
val mapStream:DStream[(String,Int)]= wordStream.map(x=>(x,1))
val wordCountStream:DStream[(String,Int)]= mapStream.reduceByKey(_+_)//输出
wordCountStream.print()//启动采集器
streamingContext.start()
streamingContext.awaitTermination()}}
Linux启动端口,启动之后运行程序,再在Linux输入内容,便可以在idea控制台看到数据
[root@hadoop100~]# nc -lk 7777
2、使用sparkStream读取kafka的数据,进行处理,再写回kafka
```objectivec
packagecn.kgc.kb11importjava.utilimportorg.apache.kafka.clients.consumer.{ConsumerConfig,ConsumerRecord}importorg.apache.kafka.clients.producer.{KafkaProducer,ProducerConfig,ProducerRecord}importorg.apache.spark.SparkConfimportorg.apache.spark.streaming.{Seconds,StreamingContext}importorg.apache.spark.streaming.dstream.{DStream,InputDStream}importorg.apache.spark.streaming.kafka010.{ConsumerStrategies,KafkaUtils,LocationStrategies}/*
将数据从Kafka topicA读出,进行处理,再写回Kafka topicB
*/
object SparkStreamKafkaSourceToKafkaSink{
def main(args:Array[String]):Unit={
val conf:SparkConf=newSparkConf().setMaster("local[*]").setAppName("sparkKafkaStream")
val streamingContext =newStreamingContext(conf,Seconds(5))
streamingContext.checkpoint("checkpoint")
val kafkaParams =Map((ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG ->"192.168.112.100:9092"),(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG ->"org.apache.kafka.common.serialization.StringDeserializer"),(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG ->"org.apache.kafka.common.serialization.StringDeserializer"),(ConsumerConfig.GROUP_ID_CONFIG,"kafkaGroup1"))
val kafkaStream:InputDStream[ConsumerRecord[String,String]]=KafkaUtils.createDirectStream(
streamingContext,LocationStrategies.PreferConsistent,ConsumerStrategies.Subscribe(Set("sparkKafkaDemo"), kafkaParams))
val wprdCount:DStream[(String,Int)]= kafkaStream.flatMap(x => x.value().split("\\s+")).map(x =>(x,1)).reduceByKey(_ + _)
wordCount.foreachRDD(
rdd=>{
rdd.foreachPartition(
x=>{
val props =newutil.HashMap[String,Object]()
props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG,"192.168.112.100:9092")
props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG,"org.apache.kafka.common.serialization.StringSerializer")
props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG,"org.apache.kafka.common.serialization.StringSerializer")
val producer =newKafkaProducer[String,String](props)
x.foreach(
y=>{
val word = y._1
val num = y._2
val record=newProducerRecord[String,String]("sparkKafkaDemoOut","",word+","+num)
producer.send(record)})})})
streamingContext.start()
streamingContext.awaitTermination()}}