import org.apache.kafka.clients.consumer.ConsumerConfig
import org.apache.kafka.common.serialization.StringDeserializer
import org.apache.spark.streaming.kafka010.{ConsumerStrategies, KafkaUtils, LocationStrategies}
import org.apache.spark.streaming.{Seconds, StreamingContext}
import org.apache.spark.{SparkConf, SparkContext}
object MyReadKafkaHandler {
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setMaster("local[*]").setAppName("MyKafka")
val sc = new SparkContext(conf)
//流处理的上下文类
val ssc = new StreamingContext(sc,Seconds(10))
//因为有状态DStream 所以必须要有记录
ssc.checkpoint("src/main/data/mykafka-logs")
//创建连接kafka服务器参数
val kafkaParam = Map(
ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG -> "192.168.253.150:9092",
ConsumerConfig.GROUP_ID_CONFIG -> "mykafka1",
ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG -> "true",
ConsumerConfig.AUTO_COMMIT_INTERVAL_MS_CONFIG -> "20000",
ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG -> classOf[StringDeserializer],
ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG -> classOf[StringDeserializer],
ConsumerConfig.AUTO_OFFSET_RESET_CONFIG -> "earliest"
)
//创建Direct流
val streams = KafkaUtils.createDirectStream(ssc,LocationStrategies.PreferConsistent,
ConsumerStrategies.Subscribe[String,String](Set("mydemo5"),kafkaParam))
streams.map(_.value()).map(x=>{val e = x.split(",");(e(1),e(0))}).transform(rdd=>{
rdd.leftOuterJoin(blacks).filter(p=>p._2._2.getOrElse(false) != true)
// rdd.leftOuterJoin(blacks).filter(p=>p._2._2.mkString != "true")
// rdd.leftOuterJoin(blacks).filter(p=>p._2._2.asInstanceOf != "true")
//三种写法
}).print()
//启动SparkStreaming
ssc.start()
ssc.awaitTermination()
SparkStreaming整合kafka——黑名单过滤
最新推荐文章于 2022-02-18 14:38:50 发布