1.代码
package security.view.model
import java.util.Properties
import org.apache.kafka.clients.consumer.ConsumerRecord
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord}
import org.apache.kafka.common.serialization.StringDeserializer
import org.apache.spark.SparkConf
import org.apache.spark.sql.SparkSession
import org.apache.spark.streaming.dstream.DStream
import org.apache.spark.streaming.kafka010.KafkaUtils
import org.apache.spark.streaming.{Seconds, StreamingContext}
import org.apache.spark.streaming.kafka010.LocationStrategies.PreferConsistent
import org.apache.spark.streaming.kafka010.ConsumerStrategies.Subscribe
import org.apache.spark.streaming.kafka010._
import org.datanucleus.util.StringUtils
import scala.collection.JavaConverters._
object RealTimeAgentServerIncident {
def main(args: Array[String]): Unit = {
//加载配置文件
val spark = SparkSession.builder().appName("Kafka2Spark2Kudu-kerberos").config(new SparkConf()).getOrCreate()
val ssc = new StreamingContext(spark.sparkContext, Seconds(6)) //设置Spark时间窗口,每5s处理一次
val kafkaParams = Map[String, Object]("bootstrap.servers" -> "master:9092"
, "auto.offset.reset" -> "latest"
, "security.protocol" -> "SASL_PLAINTEXT"
, "sasl.kerberos.service.name" -> "kafka"
, "sasl.mechanism" -> "GSSAPI"
, "key.deserializer" -> classOf[StringDeserializer]
, "value.deserializer" -> classOf[StringDeserializer]
, "group.id" -> "testgrou111p"
)
val topics = Array("agent.server.log")
val stream = KafkaUtils.createDirectStream[String, String](
ssc,
PreferConsistent,
Subscribe[String, String](topics, kafkaParams)
)
stream.foreachRDD { rdd =>
val offsetRanges = rdd.asInstanceOf[HasOffsetRanges].offsetRanges
rdd.foreach(println)
stream.asInstanceOf[CanCommitOffsets].commitAsync(offsetRanges)
}
ssc.start()
ssc.awaitTermination()
}
}
注意:在创建kafka流的时候要加上以下三个配置
, "security.protocol" -> "SASL_PLAINTEXT"
, "sasl.kerberos.service.name" -> "kafka"
, "sasl.mechanism" -> "GSSAPI"
2. 提交任务
需要增加以下两个配置
--conf "spark.executor.extraJavaOptions=Djava.security.auth.login.config=/etc/kafka/kafka_client_jaas.conf1"
--driver-java-options "Djava.security.auth.login.config=/etc/kafka/kafka_client_jaas.conf1"
bin/spark-submit --master yarn
--deploy-mode cluster
--class security.view.model.AgentView
--principal kafka@RSD.COM
--keytab /data/share/keytabpath/kafka.keytab
--conf "spark.executor.extraJavaOptions=Djava.security.auth.login.config=/etc/kafka/kafka_client_jaas.conf1"
--driver-java-options "Djava.security.auth.login.config=/etc/kafka/kafka_client_jaas.conf1"
/data/modelx/spark/1281472784429481984.jar AgentView
3.kafka_client_jass.conf文件
KafkaClient {
com.sun.security.auth.module.Krb5LoginModule required
useTicketCache=false
useKeyTab=true
keyTab="/data/share/keytabpath/kafka.keytab"
principal="kafka@RSD.COM"
serviceName="kafka"
storeKey=true
renewTicket=true;
};