Kerberos 安装与hadoop组件集成
一. kerberos 安装
yum -y install krb5-libs krb5-server krb5-workstation
安装成功后,修改kerberos必需文件
1. vim /etc/krb5.conf
#注意修改被注释的内容
# Configuration snippets may be placed in this directory as well
includedir /etc/krb5.conf.d/
[logging]
default = FILE:/var/log/krb5libs.log
kdc = FILE:/var/log/krb5kdc.log
admin_server = FILE:/var/log/kadmind.log
[libdefaults]
dns_lookup_realm = false
ticket_lifetime = 7d
renew_lifetime = 7d
forwardable = true
rdns = false
pkinit_anchors = /etc/pki/tls/certs/ca-bundle.crt
default_realm = EXAMPLE.COM
#default_ccache_name = KEYRING:persistent:%{uid} 此处需要被注释
[realms]
EXAMPLE.COM = {
kdc = kerberos.example.com #主机名称
admin_server = kerberos.example.com #主机名称
}
[domain_realm]
.example.com = EXAMPLE.COM
example.com = EXAMPLE.COM
2. vim /var/kerberos/krb5kdc/kdc.conf
[kdcdefaults]
kdc_ports = 88
kdc_tcp_ports = 88
[realms]
EXAMPLE.COM = {
#master_key_type = aes256-cts
acl_file = /var/kerberos/krb5kdc/kadm5.acl
dict_file = /usr/share/dict/words
admin_keytab = /var/kerberos/krb5kdc/kadm5.keytab
supported_enctypes = aes256-cts:normal aes128-cts:normal des3-hmac-sha1:normal arcfour-hmac:normal camellia256-cts:normal camellia128-cts:normal des-hmac-sha1:normal des-cbc-md5:normal des-cbc-crc:normal
}
3. vim /var/kerberos/krb5kdc/kadm5.acl
*/admin@EXAMPLE.COM *
4. 修改主机名称
echo '192.168.25.149 myli' >> /etc/hosts
echo '192.168.25.149 kerberos.example.com' >> /etc/hosts
5. 创建kerberos数据库
kdb5_util create -s -r EXAMPLE.COM #跟Realms名称
# 期间需要数据两次数据库密码(初始化,自定义密码)
6. 添加kerberos管理员
kadmin.local -q "addprinc admin/admin"
# 期间需要数据两次管理员密码(初始化,自定义密码)
7. 启动kerberos 服务,并设置为开机启动
/bin/systemctl start krb5kdc.service
/bin/systemctl start kadmin.service
/bin/systemctl enable krb5kdc.service
/bin/systemctl enable kadmin.service
8. 新建用户,并生成.tab文件
kadmin.local -q 'addprinc -randkey hdfs/myli@EXAMPLE.COM' # 新建用户
kadmin.local -q 'addprinc -randkey HTTP/myli@EXAMPLE.COM'
kadmin.local -q 'xst -k hdfs.keytab hdfs/myli@EXAMPLE.COM' # 生成keytab文件
kadmin.local -q 'xst -k HTTP.keytab HTTP/myli@EXAMPLE.COM'
# 校验文件是否正确
klist -kt hdfs.keytab # keytab中的用户列表
kinit -kt hdfs.keytab hdfs/myli@EXAMPLE.COM # 指定用户登陆
klist # 列出已登陆用户
kdestroy # 清理缓存并退出
9. 安装JDK
rpm -qa|grep jdk
#如果安装过了,卸载
yum -y remove java-1.8.0-openjdk-1.8.0.131-11.b12.el7.x86_64
yum -y remove java-1.8.0-openjdk-headless-1.8.0.131-11.b12.el7.x86_64
//安装RPM 包
rpm -ivh jdk-8u152-linux-x64.rpm
#检查是否安装成功
java -version
#修改环境变量
vim /etc/profile
JAVA_HOME=/usr/java/jdk1.8.0_152
CLASSPATH=$JAVA_HOME/lib/
PATH=$PATH:$JAVA_HOME/bin
export PATH JAVA_HOME CLASSPATH
10. 安装NTP时间服务器
yum install ntp
ntpdate time1.aliyun.com
#启动ntp,并设置开机自启动
systemctl start ntpd.service
systemctl enable ntpd.service
二. hadoop安装与kerberos配置
1. 准备资源,编译环境
# 使用jsvc安全启动datanode,必须用普通用户
useradd hdfs
# 拷贝文件
cp hdfs.keytab /home/hdfs/
cp HTTP.keytab /home/hdfs/
chown hdfs:hdfs /home/hdfs/*.keytab
# 用于编译jsvc
yum -y groupinstall 'Development Tools'
# 准备编译所需文件文件
su hdfs
wget https://archive.apache.org/dist/commons/daemon/binaries/commons-daemon-1.0.15-bin.tar.gz
wget https://archive.apache.org/dist/commons/daemon/source/commons-daemon-1.0.15-src.tar.gz
# 下载,解压hadoop
tar xf hadoop-2.6.0.tar.gz
tar xf commons-daemon-1.0.15-bin.tar.gz
tar xf commons-daemon-1.0.15-src.tar.gz
# 编译jsvc
cd commons-daemon-1.0.15-src/src/native/unix/
./configure --with-java=/usr/lib/jvm/java-openjdk
make
cp jsvc ~/hadoop-2.7.1/libexec/
rm ~/hadoop-2.7.1/share/hadoop/hdfs/lib/commons-daemon-1.0.13.jar
cp commons-daemon-1.0.15/commons-daemon-1.0.15.jar ~/hadoop-2.7.1/share/hadoop/hdfs/lib/
2. 修改hadoop配置文件
cd ~/hadoop/etc/hadoop/
2.1 vim hadoop-env.sh
export JAVA_HOME=/usr/java/jdk1.8.0_152
export JSVC_HOME=/home/hdfs/hadoop/libexec
export HADOOP_SECURE_DN_USER=hdfs
2.2 vim core-site.xml
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://192.168.25.149:9000</value>
</property>
<property>
<name>hadoop.security.authentication</name>
<value>kerberos</value>
</property>
<property>
<name>hadoop.security.authorization</name>
<value>true</value>
</property>
</configuration>
2.3 vim hdfs-site.xml
<configuration>
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
<property>
<name>dfs.block.access.token.enable</name>
<value>true</value>
</property>
<property>
<name>dfs.datanode.data.dir.perm</name>
<value>700</value>
</property>
<property>
<name>dfs.namenode.keytab.file</name>
<value>/home/hdfs/hdfs.keytab</value>
</property>
<property>
<name>dfs.namenode.kerberos.principal</name>
<value>hdfs/myli@EXAMPLE.COM</value>
</property>
<property>
<name>dfs.namenode.kerberos.https.principal</name>
<value>HTTP/myli@EXAMPLE.COM</value>
</property>
<property>
<name>dfs.datanode.address</name>
<value>0.0.0.0:1004</value>
</property>
<property>
<name>dfs.datanode.http.address</name>
<value>0.0.0.0:1006</value>
</property>
<property>
<name>dfs.datanode.keytab.file</name>
<value>/home/hdfs/hdfs.keytab</value>
</property>
<property>
<name>dfs.datanode.kerberos.principal</name>
<value>hdfs/myli@EXAMPLE.COM</value>
</property>
<property>
<name>dfs.datanode.kerberos.https.principal</name>
<value>HTTP/myli@EXAMPLE.COM</value>
</property>
<property>
<name>dfs.webhdfs.enabled</name>
<value>true</value>
</property>
<property>
<name>dfs.web.authentication.kerberos.principal</name>
<value>HTTP/myli@EXAMPLE.COM</value>
</property>
<property>
<name>dfs.web.authentication.kerberos.keytab</name>
<value>/home/hdfs/HTTP.keytab</value>
</property>
<property>
<name>dfs.encrypt.data.transfer</name>
<value>true</value>
</property>
<property>
<name>dfs.encrypt.data.transfer</name>
<value>true</value>
</property>
</configuration>
3. 配置ssh免密登陆
ssh-keygen -t rsa -N '' -f ~/.ssh/id_rsa
cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
chmod 600 ~/.ssh/authorized_keys
#测试是否成功
ssh kerberos.example.com date
ssh localhost date
4. 启动hadoop
# 使用hdfs用户
bin/hdfs namenode -format
sbin/start-dfs.sh
#使用root用户
sbin/hadoop-daemon.sh start datanode
最终启动结果:jps查看
[hdfs@myli hadoop]$ jps
4178 NameNode
4596
6623 Jps
#如上即为正确:jps # 确认有三个进程,jps, NameNode, 没有名字进程
#如果jps显示没用进程,查看log日志
5. 访问
kinit -kt ~/hdfs.keytab hdfs/myli@EXAMPLE.COM
bin/hdfs dfs -ls /
# 直接使用此命令会报错:
20/04/13 14:49:28 WARN ipc.Client: Exception encountered while connecting to the server : javax.security.sasl.SaslException: GSS initiate failed [Caused by GSSException: No valid credentials provided (Mechanism level: Failed to find any Kerberos tgt)]
ls: Failed on local exception: java.io.IOException: javax.security.sasl.SaslException: GSS initiate failed [Caused by GSSException: No valid credentials provided (Mechanism level: Failed to find any Kerberos tgt)]; Host Details : local host is: "myli/192.168.25.149"; destination host is: "myli":9000;
#提示没有credentials,也就是说没有凭据。
#一方面说明Kerberos的确是保护了hdfs,
#另一方面也说明我们还需要给本地加一个Principal来访问hdfs。
# 增加用户Principal
su root
kadmin.local
addprinc hdfs@Example.COM
# 输入两次密码
# 在此登陆
kinit hdfs@EXAMPLE
# 输入两次密码
bin/hdfs dfs -ls / # 即可成功
三. yarn 集成kerberos
1. 生成keytab
kadmin.local -q "addprinc -randkey yarn/myli@EXAMPLE.COM"
kadmin.local -q "addprinc -randkey mapred/myli@EXAMPLE.COM"
kadmin.local -q "xst -k yarn.keytab yarn/myli@EXAMPLE.COM"
kadmin.local -q "xst -k mapred.keytab mapred/myli@EXAMPLE.COM"
# 复制并命名
cp /var/kerberos/krb5kdc/yarn.keytab /home/hdfs/
cp /var/kerberos/krb5kdc/mapred.keytab /home/hdfs/
chown hdfs:hdfs *.keytab
2. 修改yarn配置文件
使用之前的.keytab文件
2.1 vim yarn-site.xml
<?xml version="1.0"?>
<configuration>
<property>
<name>yarn.resourcemanager.principal</name>
<value>hdfs/myli@EXAMPLE.COM</value>
</property>
<property>
<name>yarn.resourcemanager.keytab</name>
<value>/home/hdfs/hdfs.keytab</value>
</property>
<property>
<name>yarn.nodemanager.keytab</name>
<value>/home/hdfs/hdfs.keytab</value>
</property>
<property>
<name>yarn.nodemanager.principal</name>
<value>hdfs/myli@EXAMPLE.COM</value>
</property>
<property>
<name>yarn.nodemanager.container-executor.class</name>
<value>org.apache.hadoop.yarn.server.nodemanager.LinuxContainerExecutor</value>
</property>
<property>
<name>yarn.nodemanager.linux-container-executor.group</name>
<value>hdfs</value>
</property>
<property>
<name>yarn.nodemanager.linux-container-executor.path</name>
<value>/hdp/bin/container-executor</value>
</property>
</configuration>
2.2 vim mapred-site.xml
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>mapreduce.jobhistory.keytab</name>
<value>/home/hdfs/hdfs.keytab</value>
</property>
<property>
<name>mapreduce.jobhistory.principal</name>
<value>hdfs/myli@EXAMPLE.COM</value>
</property>
</configuration>
2.3 创建container-executor客户组
mkdir -p /hdp/bin
mkdir -p /hdp/etc/hadoop
cp /home/hdfs/hadoop/etc/hadoop/container-executor.cfg /hdp/etc/hadoop/
cp /home/hdfs/hadoop/bin/container-executor /hdp/bin/
2.4. vim /hdp/etc/hadoop/container-executor.cfg
yarn.nodemanager.linux-container-executor.group=hdfs
banned.users=mysql
min.user.id=0
allowed.system.users=root,nobody,impala,hive,hdfs,yarn
2.5 修改文件权限
chown root:hdfs /home/hdfs/hadoop/bin/container-executor
chmod 6050 /hdp/bin/container-executor
# 校验
/hdp/etc/hadoop/
/hdp/bin/container-executor --checksetup
# 如果不报错,即为成功,即可去启动yarn
3. 启动ResourceManager
#首先得认证
su root # 使用root权限启动
kinit -kt ~/hdfs.keytab hdfs/myli@EXAMPLE.COM
cd hadoop/sbin/
./start-yarn.cmd
#jps查看进程,如下即正确
3168 Jps
2578
3014 NodeManager
2732 ResourceManager
2172 NameNode
四. hive集成kerberos
3.1 vim hive-site.xml
添加下面内容
<property>
<name>hive.server2.authentication</name>
<value>KERBEROS</value>
</property>
<property>
<name>hive.server2.authentication.kerberos.principal</name>
<value>hdfs/myli@EXAMPLE.COM</value>
</property>
<property>
<name>hive.server2.authentication.kerberos.keytab</name>
<value>/home/hdfs/hdfs.keytab</value>
</property>
<property>
<name>hive.metastore.sasl.enabled</name>
<value>true</value>
</property>
<property>
<name>hive.metastore.kerberos.keytab.file</name>
<value>/home/hdfs/hdfs.keytab</value>
</property>
<property>
<name>hive.metastore.kerberos.principal</name>
<value>hive/myli@EXAMPLE.COM</value>
</property>
3.2 vim core-site.xml
<property>
<name>hadoop.proxyuser.hive.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.hive.groups</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.hdfs.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.hdfs.groups</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.HTTP.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.HTTP.groups</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.udap.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.udap.groups</name>
<value>*</value>
</property>
3.3 启动hive
nohup ./hive --service metastore &
nohup ./hive --service hiveserver2 &
登陆
kinit -kt /home/hdfs/hdfs.keytab hdfs/myli@EXAMPLE.COM
hive
# beenline 登陆
!connect jdbc:hive2://localhost:10000/default;principal=hdfs/myli@EXAMPLE.COM
五. Spark 集成kerberos
1. 登陆方式一
klint -it kinit -kt /home/hdfs/hdfs.keytab hdfs/myli@EXAMPLE.COM
# 1.本地机器测试
spark-submit --class org.apache.spark.examples.SparkPi /home/hdfs/spark/examples/jars/spark-examples_2.11-2.4.3.jar 10
# 2.Yarn 模式Client运行
spark-submit --class org.apache.spark.examples.SparkPi --master yarn /home/hdfs/spark/examples/jars/spark-examples_2.11-2.4.3.jar 10
# 3. Yarn cluster 模式运行
spark-submit --class org.apache.spark.examples.SparkPi --master yarn --deploy-mode cluster /home/hdfs/spark/examples/jars/spark-examples_2.11-2.4.3.jar 10
2. 登陆方式二
# 1. Yarn 模式Client运行
spark-submit --class org.apache.spark.examples.SparkPi --master yarn --principal hdfs/myli@EXAMPLE.COM --keytab /home/hdfs/hdfs.keytab /home/hdfs/spark/examples/jars/spark-examples_2.11-2.4.3.jar 10
# 2. Yarn cluster 模式运行
spark-submit --class org.apache.spark.examples.SparkPi --master yarn --deploy-mode cluster --principal hdfs/myli@EXAMPLE.COM --keytab /home/hdfs/hdfs.keytab /home/hdfs/spark/examples/jars/spark-examples_2.11-2.4.3.jar 10
六. zookeeper集成kerberos
1. 添加kerberos凭证
#添加HTTP服务的凭据
kadmin.local -q "addprinc -randkey HTTP/myli@EXAMPLE.COM"
#添加zookeeper的凭据
kadmin.local -q "addprinc -randkey zookeeper/myli@EXAMPLE.COM"
#添加hadoop用户的凭据
kadmin.local -q "addprinc -randkey hdfs/myli@EXAMPLE.COM"
#生成包含前三个凭据的keytab证书,hadoop.keytab为最终生成的证书的名称
kadmin.local -q "xst -k hdfs_hadoop.keytab hdfs/myli HTTP/myli zookeeper/myli"
#至此,生成一个hdfs_hadoop.keytab,我是将hdfs_hadoop.keytab放置/home/hdfs 下,并且修改权限
chown -R hdfs:hdfs /etc/security/keytab
chmod 400 /home/hdfs/hdfs_hadoop.keytab
# 回头说一句,此处生成了新的.keytab文件,前面hdfs,yarn,hive对应的.keytab文件便会失效.需要在配置中重新指定新.keytab文件的路径
2. zookeeper配置
#在$ZOOKEEPER_HOME/conf/目录创建文件zoo.cfg
cd $ZOOKEEPER_HOME/conf/
mkdir zoo.cfg
#添加下面内容
kerberos.removeHostFromPrincipal=true
kerberos.removeRealmFromPrincipal=true
authProvider.1=org.apache.zookeeper.server.auth.SASLAuthenticationProvider
jaasLoginRenew=3600000
mkdir jaas.conf
#添加下面内容
Server {
com.sun.security.auth.module.Krb5LoginModule required
useKeyTab=true
keyTab="/home/hdfs/hdfs_hadoop.keytab"
storeKey=true
useTicketCache=false
principal="zookeeper/myli@EXAMPLE.COM";
};
Client {
com.sun.security.auth.module.Krb5LoginModule required
useKeyTab=true
keyTab="/home/hdfs/hdfs_hadoop.keytab"
storeKey=true
useTicketCache=false
principal="hdfs/myli@EXAMPLE.COM";
};
mkdir java.env
#添加下面内容
export JVMFLAGS="-Djava.security.auth.login.config=$ZOOKEEPER_HOME/conf/jaas.conf"
3. 启动zookeeper
#重启zookeeper
./zkServer.sh start
#通过jps命令查看,zookeeper进程是可以起来的.
#验证安装是否成功
./zkCli.sh -server myli:2181
#不报错即为成功
#参考:https://blog.csdn.net/weixin_39971186/article/details/87994659?depth_1-utm_source=distribute.pc_relevant.none-task-blog-OPENSEARCH-1&utm_source=distribute.pc_relevant.none-task-blog-OPENSEARCH-1
七. kafka 集成kerberos
保证zookeeper集成kerberos安装成功
kafka版本:kafka_2.11-1.1.1
1. kafka配置
cd $KAFKA_HOME/conf
mkdir jaas.conf
#添加下面内容
KafkaServer {
com.sun.security.auth.module.Krb5LoginModule required
useKeyTab=true
storeKey=true
keyTab="/home/hdfs/hdfs_hadoop.keytab"
principal="hdfs/myli@EXAMPLE.COM";
};
KafkaClient {
com.sun.security.auth.module.Krb5LoginModule required
useKeyTab=false
useTicketCache=true
renewTicket=true;
};
Client {
com.sun.security.auth.module.Krb5LoginModule required
useKeyTab=true
keyTab="/home/hdfs/hdfs_hadoop.keytab"
storeKey=true
useTicketCache=false
principal="hdfs/myli@EXAMPLE.COM";
};
vim server.properties
#添加下面内容
zookeeper.connect=myli:2181#默认是localhost:2181,一开始我没有修改,kafkaServer启动报错
advertised.host.name=myli
advertised.listeners=SASL_PLAINTEXT://myli:9092
listeners=SASL_PLAINTEXT://myli:9092
security.inter.broker.protocol=SASL_PLAINTEXT
sasl.mechanism.inter.broker.protocol=GSSAPI
sasl.enabled.mechanisms=GSSAPI
sasl.kerberos.service.name=hdfs
2. kafka-run-class.sh修改
cd $KAKFA_HOME/bin
vim kafka-run-class.sh
#在236行附近有一个"JVM performance options"参数配置,将下面参数加入改配置:
-Djava.security.krb5.conf=/etc/krb5.conf -Djava.security.a uth.login.config=/opt/kafka/config/jaas.conf
#如果没有其他的改变,最后连贯的配置应该如下:
# JVM performance options
if [ -z "$KAFKA_JVM_PERFORMANCE_OPTS" ]; then
KAFKA_JVM_PERFORMANCE_OPTS="-server -XX:+UseG1GC -XX:MaxGCPauseMillis=20 -XX:InitiatingHeapOccupancyPercent=35 -XX:+ExplicitGCInvokesConcurrent -Djava.awt.headless=true -Djava.security.krb5.conf=/etc/krb5.conf -Djava.security.auth.login.config=/opt/kafka/config/jaas.conf"
fi
3. kafka启动
# 第一遍搭建的时候,kafkaService怎么都起不来.过了两天回过头再去搭建一遍,发现ok了
./bin/kafka-server-start.sh config/server.properties
4. 验证
1. 配置config/producer.properties,kafka生产者kerberos配置
# 修改为主机名,不要使用localhost
bootstrap.servers=myli:9092
security.protocol = SASL_PLAINTEXT
sasl.mechanism = GSSAPI
sasl.kerberos.service.name =hdfs
2. 配置config/consumer.properties,kafka消费者kerberos配置
# 修改为主机名,不要使用localhost
bootstrap.servers=myli:9092
security.protocol = SASL_PLAINTEXT
sasl.mechanism = GSSAPI
sasl.kerberos.service.name =hdfs
3. 测试
# 创建topic
kafka-topics.sh --zookeeper myli:2181 --create --topic xiahu --partitions 1 --replication-factor 1
# 启动生产者
bin/kafka-console-producer.sh --broker-list myli:9092 --topic xiahu --producer.config config/producer.properties
# 启动消费者
bin/kafka-console-consumer.sh --bootstrap-server myli:9092 --topic xiahu --consumer.config config/consumer.properties