cdh迁移cdp过程中关于kerberos的设置
keytab文件和kafkajass.conf必须下发到每个节点同一目录下
1.spark structed streaming消费非cdp环境的topic并过滤后下发到cdp环境的topic
val lines = sparksession.readStream
.format("kafka")
//cdp的配置
//消费cdp的kafka加kafka.消费核心的kafka把kafka.去掉
.option("security.protocol", "SASL_PLAINTEXT")
.option("sasl.mechanism", "GSSAPI")
.option("sasl.kerberos.service.name", "kafka")
kafka生产者代码
def getCDPKafkaProducer(brokerList: String): KafkaProducer[String, String] = {
val props = new Properties()
props.put("bootstrap.servers", brokerList)
props.put("acks", "all")
props.put("retries", "3")
props.put("key.serializer", classOf[StringSerializer].getName)
props.put("value.serializer", classOf[StringSerializer].getName)
//props.put("partitioner.class", classOf[HashPartitioner].getName)
props.put("producer.type", "sync")
props.put("batch.size", "16384")// 一次达到多大的消息开始发送,16384代表16k
props.put("security.protocol", "SASL_PLAINTEXT")
props.put("sasl.mechanism", "GSSAPI")
val producer = new KafkaProducer[String, String](props)
return producer
}
2.spark structed streaming消费cdp环境的topic
在代码最开头加一段代码
//消费cdp上的kafka必须加上这段
System.setProperty("java.security.krb5.conf", "/etc/krb5.conf");
System.setProperty("java.security.auth.login.config", "/**/**/.base_prod/kafka_client_jaas.conf");
val conf = new Configuration()
conf.set("hadoop.security.authentication", "Kerberos")
conf.set("fs.hdfs.impl", "org.apache.hadoop.hdfs.DistributedFileSystem")
UserGroupInformation.setConfiguration(conf)
UserGroupInformation.loginUserFromKeytab("****@NEWCHINALIFE.COM","/***/***/.base_prod/base_prod.keytab")
val lines = sparksession.readStream
.format("kafka")
//cdp的配置
//消费cdp的kafka加kafka.消费核心的kafka把kafka.去掉
.option("kafka.security.protocol", "SASL_PLAINTEXT")
.option("kafka.sasl.mechanism", "GSSAPI")
.option("kafka.sasl.kerberos.service.name", "kafka")