直接贴代码,注释内都有详细解释:
pom依赖:
<!--SparkStreaming 整合kafka0.10版本-->
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming-kafka-0-10_2.11</artifactId>
<version>2.1.3</version>
</dependency>
package cn.spark.direct
import org.apache.kafka.clients.consumer.ConsumerRecord
import org.apache.kafka.common.serialization.StringDeserializer
import org.apache.spark.SparkConf
import org.apache.spark.streaming.dstream.InputDStream
import org.apache.spark.streaming.kafka010._
import org.apache.spark.streaming.kafka010.LocationStrategies.PreferConsistent
import org.apache.spark.streaming.kafka010.ConsumerStrategies.Subscribe
import org.apache.spark.streaming.{Seconds, StreamingContext}
/**
* 使用直连方式 SparkStreaming连接kafka0.10版本以上获取数据
* @Author xiaohuli
* @CreateDate 2019/2/13
*/
object DirectStream {
def main(args: Array[String]): Unit = {
val group = "g001"
val topic = "myorder"
//创建SparkConf,如果将任务提交到集群中,那么要去掉.setMaster("local[4]")
val conf = new SparkConf().setAppName("DirectStream").setMaster("local[4]")
//创建一个StreamingContext,其里面包含了一个SparkContext
val ssc = new StreamingContext(conf, Seconds(5));
//配置kafka的参数
val kafkaParams = Map[String, Object](
"bootstrap.servers" -> "master:9092,slave1:9092,slave2:9092",
"key.deserializer" -> classOf[StringDeserializer],
"value.deserializer" -> classOf[StringDeserializer],
"group.id" -> group,
"auto.offset.reset" -> "earliest", // lastest
"enable.auto.commit" -> (false: java.lang.Boolean)
)
val topics = Array(topic)
//在Kafka中记录读取偏移量
val stream: InputDStream[ConsumerRecord[String, String]] = KafkaUtils.createDirectStream[String, String](
ssc,
//位置策略
PreferConsistent,
//订阅的策略
Subscribe[String, String](topics, kafkaParams)
)
//迭代DStream中的RDD,将每一个时间点对于的RDD拿出来
stream.foreachRDD { rdd =>
//获取该RDD对于的偏移量
val offsetRanges = rdd.asInstanceOf[HasOffsetRanges].offsetRanges
//拿出对于的数据,foreach是一个aciton
//todo
//val results = yourCalculation(rdd)
rdd.foreach { line =>
println(line.key() + " " + line.value())
}
//更新偏移量
stream.asInstanceOf[CanCommitOffsets].commitAsync(offsetRanges)
}
ssc.start()
ssc.awaitTermination()
}
}