污点与容忍
污点策略
尽量不调度:PreferNoSchedule 不被调度:NoSchedule 驱逐节点:NoExecute
管理污点标签
# 查看污点策略
[root@master ~]# kubectl describe nodes|grep Taints
Taints: node-role.kubernetes.io/master:NoSchedule
Taints: <none>
Taints: <none>
Taints: <none>
Taints: <none>
Taints: <none>
# node-0001 设置污点策略 PreferNoSchedule
[root@master ~]# kubectl taint node node-0001 k=v1:PreferNoSchedule
node/node-0001 tainted
# node-0002 设置污点策略 NoSchedule
[root@master ~]# kubectl taint node node-0002 k=v2:NoSchedule
node/node-0002 tainted
[root@master ~]# kubectl describe nodes |grep Taints
Taints: node-role.kubernetes.io/master:NoSchedule
Taints: k=v1:PreferNoSchedule
Taints: k=v2:NoSchedule
Taints: <none>
Taints: <none>
Taints: <none>
Pod 资源文件
[root@master ~]# vim myphp.yaml
---
kind: Pod
apiVersion: v1
metadata:
name: myphp
spec:
containers:
- name: php
image: myos:php-fpm
resources:
requests:
cpu: 1500m
验证污点策略
# 优先使用没有污点的节点
[root@master ~]# sed "s,myphp,php1," myphp.yaml |kubectl apply -f -
pod/php1 created
[root@master ~]# sed "s,myphp,php2," myphp.yaml |kubectl apply -f -
pod/php2 created
[root@master ~]# sed "s,myphp,php3," myphp.yaml |kubectl apply -f -
pod/php3 created
[root@master ~]# kubectl get pods -o wide
NAME READY STATUS RESTARTS AGE IP NODE
php1 1/1 Running 0 13s 10.244.3.35 node-0003
php2 1/1 Running 0 5s 10.244.4.32 node-0004
php3 1/1 Running 0 5s 10.244.5.34 node-0005
# 最后使用 PreferNoSchedule 节点
[root@master ~]# sed 's,myphp,php4,' myphp.yaml |kubectl apply -f -
pod/php4 created
[root@master ~]# kubectl get pods -o wide
NAME READY STATUS RESTARTS AGE IP NODE
php1 1/1 Running 0 13s 10.244.3.35 node-0003
php2 1/1 Running 0 5s 10.244.4.32 node-0004
php3 1/1 Running 0 5s 10.244.5.34 node-0005
php4 1/1 Running 0 80s 10.244.1.33 node-0001
# 不会使用 NoSchedule 节点
[root@master ~]# sed 's,myphp,php5,' myphp.yaml |kubectl apply -f -
pod/php5 created
[root@master ~]# kubectl get pods -o wide
NAME READY STATUS RESTARTS AGE IP NODE
php1 1/1 Running 0 53s 10.244.3.35 node-0003
php2 1/1 Running 0 65s 10.244.4.32 node-0004
php3 1/1 Running 0 75s 10.244.5.34 node-0005
php4 1/1 Running 0 80s 10.244.1.33 node-0001
php5 0/1 Pending 0 5s <none> <none>
验证污点策略
# NoSchedule 不会影响已经创建的 Pod
[root@master ~]# kubectl taint node node-0003 k=v3:NoSchedule
node/node-0003 tainted
[root@master ~]# kubectl describe nodes |grep Taints
Taints: node-role.kubernetes.io/master:NoSchedule
Taints: k=v1:PreferNoSchedule
Taints: k=v2:NoSchedule
Taints: k=v3:NoSchedule
[root@master ~]# kubectl get pods -o wide
NAME READY STATUS RESTARTS AGE IP NODE
php1 1/1 Running 0 53s 10.244.3.35 node-0003
php2 1/1 Running 0 65s 10.244.4.32 node-0004
php3 1/1 Running 0 75s 10.244.5.34 node-0005
php4 1/1 Running 0 80s 10.244.1.33 node-0001
php5 0/1 Pending 0 5s <none> <none>
# NoExecute 会删除节点上的 Pod
[root@master ~]# kubectl taint node node-0004 k=v4:NoExecute
node/node-0004 tainted
[root@master ~]# kubectl describe nodes |grep Taints
Taints: node-role.kubernetes.io/master:NoSchedule
Taints: k=v1:PreferNoSchedule
Taints: k=v2:NoSchedule
Taints: k=v3:NoSchedule
Taints: k=v4:NoExecute
[root@master ~]# kubectl get pods -o wide
NAME READY STATUS RESTARTS AGE IP NODE
php1 1/1 Running 0 53s 10.244.3.35 node-0003
php3 1/1 Running 0 75s 10.244.5.34 node-0005
php4 1/1 Running 0 80s 10.244.1.33 node-0001
php5 0/1 Pending 0 5s <none> <none>
清理实验配置
[root@master ~]# kubectl delete pod --all
pod "php1" deleted
pod "php3" deleted
pod "php4" deleted
pod "php5" deleted
[root@master ~]# kubectl taint node node-000{1..4} k-
node/node-0001 untainted
node/node-0002 untainted
node/node-0003 untainted
node/node-0004 untainted
[root@master ~]# kubectl describe nodes |grep Taints
Taints: node-role.kubernetes.io/master:NoSchedule
Taints: <none>
Taints: <none>
Taints: <none>
Taints: <none>
Taints: <none>
容忍策略
设置污点标签
# 节点 node-0001,node-0002 设置污点标签 k=v1:NoSchedule
[root@master ~]# kubectl taint node node-000{1..2} k=v1:NoSchedule
node/node-0001 tainted
node/node-0002 tainted
# 节点 node-0003,node-0004 设置污点标签 k=v2:NoSchedule
[root@master ~]# kubectl taint node node-000{3..4} k=v2:NoSchedule
node/node-0003 tainted
node/node-0004 tainted
# 节点 node-0005 设置污点标签 k=v1:NoExecute
[root@master ~]# kubectl taint node node-0005 k=v1:NoExecute
node/node-0005 tainted
[root@master ~]# kubectl describe nodes |grep Taints
Taints: node-role.kubernetes.io/master:NoSchedule
Taints: k=v1:NoSchedule
Taints: k=v1:NoSchedule
Taints: k=v2:NoSchedule
Taints: k=v2:NoSchedule
Taints: k=v1:NoExecute
精确匹配策略
# 容忍 k=v1:NoSchedule 污点
[root@master ~]# vim myphp.yaml
---
kind: Pod
apiVersion: v1
metadata:
name: myphp
spec:
tolerations:
- operator: Equal # 完全匹配键值对
key: k # 键
value: v1 # 值
effect: NoSchedule # 污点标签
containers:
- name: php
image: myos:php-fpm
resources:
requests:
cpu: 1500m
[root@master ~]# for i in php{1..3};do sed "s,myphp,${i}," myphp.yaml ;done|kubectl apply -f -
pod/php1 created
pod/php2 created
pod/php3 created
[root@master ~]# kubectl get pods -o wide
NAME READY STATUS RESTARTS AGE IP NODE
php1 1/1 Running 0 6s 10.244.1.10 node-0001
php2 1/1 Running 0 6s 10.244.2.11 node-0002
php3 1/1 Pending 0 6s <none> <none>
[root@master ~]# kubectl delete pod --all
pod "php1" deleted
pod "php2" deleted
pod "php3" deleted
模糊匹配策略
# 容忍 k=*:NoSchedule 污点
[root@master ~]# vim myphp.yaml
---
kind: Pod
apiVersion: v1
metadata:
name: myphp
spec:
tolerations:
- operator: Exists # 部分匹配,存在即可
key: k # 键
effect: NoSchedule # 污点标签
containers:
- name: php
image: myos:php-fpm
resources:
requests:
cpu: 1500m
[root@master ~]# for i in php{1..5};do sed "s,myphp,${i}," myphp.yaml ;done|kubectl apply -f -
pod/php1 created
pod/php2 created
pod/php3 created
pod/php4 created
pod/php5 created
[root@master ~]# kubectl get pods -o wide
NAME READY STATUS RESTARTS AGE IP NODE
php1 1/1 Running 0 6s 10.244.1.12 node-0001
php2 1/1 Running 0 6s 10.244.2.21 node-0002
php3 1/1 Running 0 6s 10.244.3.18 node-0003
php3 1/1 Running 0 6s 10.244.4.24 node-0004
php5 1/1 Pending 0 6s <none> <none>
[root@master ~]# kubectl delete pod --all
pod "php1" deleted
pod "php2" deleted
pod "php3" deleted
pod "php4" deleted
pod "php5" deleted
所有污点标签
# 容忍所有 node 上的污点
[root@master ~]# vim myphp.yaml
---
kind: Pod
apiVersion: v1
metadata:
name: myphp
spec:
tolerations:
- operator: Exists # 模糊匹配
key: k # 键
effect: "" # 设置空或删除,代表所有污点标签
containers:
- name: php
image: myos:php-fpm
resources:
requests:
cpu: 1500m
[root@master ~]# for i in php{1..5};do sed "s,myphp,${i}," myphp.yaml ;done|kubectl apply -f -
pod/php1 created
pod/php2 created
pod/php3 created
pod/php4 created
pod/php5 created
[root@master ~]# kubectl get pods -o wide
NAME READY STATUS RESTARTS AGE IP NODE
php1 1/1 Running 0 36s 10.244.1.15 node-0001
php2 1/1 Running 0 36s 10.244.2.16 node-0002
php3 1/1 Running 0 36s 10.244.3.19 node-0003
php4 1/1 Running 0 36s 10.244.4.17 node-0004
php5 1/1 Running 0 36s 10.244.5.18 node-0005
清理实验配置
[root@master ~]# kubectl taint node node-000{1..5} k-
node/node-0001 untainted
node/node-0002 untainted
node/node-0003 untainted
node/node-0004 untainted
node/node-0005 untainted
[root@master ~]# kubectl describe nodes |grep Taints
Taints: node-role.kubernetes.io/master:NoSchedule
Taints: <none>
Taints: <none>
Taints: <none>
Taints: <none>
Taints: <none>
[root@master ~]# kubectl delete pod --all
pod "php1" deleted
pod "php2" deleted
pod "php3" deleted
pod "php4" deleted
pod "php5" deleted
优先级与抢占
非抢占优先级
# 定义优先级(队列优先)
[root@master ~]# vim mypriority.yaml
---
kind: PriorityClass
apiVersion: scheduling.k8s.io/v1
metadata:
name: high-non
preemptionPolicy: Never
value: 1000
---
kind: PriorityClass
apiVersion: scheduling.k8s.io/v1
metadata:
name: low-non
preemptionPolicy: Never
value: 500
[root@master ~]# kubectl apply -f mypriority.yaml
priorityclass.scheduling.k8s.io/high-non created
priorityclass.scheduling.k8s.io/low-non created
[root@master ~]# kubectl get priorityclasses.scheduling.k8s.io
NAME VALUE GLOBAL-DEFAULT AGE
high-non 1000 false 12s
low-non 500 false 12s
system-cluster-critical 2000000000 false 45h
system-node-critical 2000001000 false 45h
Pod 资源文件
# 无优先级的 Pod
[root@master ~]# cat php1.yaml
---
kind: Pod
apiVersion: v1
metadata:
name: php1
spec:
nodeSelector:
kubernetes.io/hostname: node-0004
containers:
- name: php
image: myos:php-fpm
resources:
requests:
cpu: "1500m"
# 低优先级 Pod
[root@master ~]# cat php2.yaml
---
kind: Pod
apiVersion: v1
metadata:
name: php2
spec:
nodeSelector:
kubernetes.io/hostname: node-0004
priorityClassName: low-non # 优先级名称
containers:
- name: php
image: myos:php-fpm
resources:
requests:
cpu: "1500m"
# 高优先级 Pod
[root@master ~]# cat php3.yaml
---
kind: Pod
apiVersion: v1
metadata:
name: php3
spec:
nodeSelector:
kubernetes.io/hostname: node-0004
priorityClassName: high-non # 优先级名称
containers:
- name: php
image: myos:php-fpm
resources:
requests:
cpu: "1500m"
验证非抢占优先
[root@master ~]# kubectl apply -f php1.yaml
pod/php1 created
[root@master ~]# kubectl apply -f php2.yaml
pod/php2 created
[root@master ~]# kubectl apply -f php3.yaml
pod/php3 created
[root@master ~]# kubectl get pods
NAME READY STATUS RESTARTS AGE
php1 1/1 Running 0 9s
php2 0/1 Pending 0 6s
php3 0/1 Pending 0 4s
[root@master ~]# kubectl delete pod php1
pod "php1" deleted
[root@master ~]# kubectl get pods
NAME READY STATUS RESTARTS AGE
php2 0/1 Pending 0 20s
php3 1/1 Running 0 18s
# 清理实验 Pod
[root@master ~]# kubectl delete pod php2 php3
pod "php2" deleted
pod "php3" deleted
抢占策略
[root@master ~]# vim mypriority.yaml
---
kind: PriorityClass
apiVersion: scheduling.k8s.io/v1
metadata:
name: high-non
preemptionPolicy: Never
value: 1000
---
kind: PriorityClass
apiVersion: scheduling.k8s.io/v1
metadata:
name: low-non
preemptionPolicy: Never
value: 500
---
kind: PriorityClass
apiVersion: scheduling.k8s.io/v1
metadata:
name: high
preemptionPolicy: PreemptLowerPriority
value: 1000
---
kind: PriorityClass
apiVersion: scheduling.k8s.io/v1
metadata:
name: low
preemptionPolicy: PreemptLowerPriority
value: 500
[root@master ~]# kubectl apply -f mypriority.yaml
priorityclass.scheduling.k8s.io/high created
priorityclass.scheduling.k8s.io/low created
[root@master ~]# kubectl get priorityclasses.scheduling.k8s.io
NAME VALUE GLOBAL-DEFAULT AGE
high 1000 false 4s
high-non 1000 false 2h
low 500 false 4s
low-non 500 false 2h
system-cluster-critical 2000000000 false 21d
system-node-critical 2000001000 false 21d
验证抢占优先级
# 替换优先级策略
[root@master ~]# sed 's,-non,,' -i php?.yaml
# 默认优先级 Pod
[root@master ~]# kubectl apply -f php1.yaml
pod/php1 created
[root@master ~]# kubectl get pods
NAME READY STATUS RESTARTS AGE
php1 1/1 Running 0 6s
# 高优先级 Pod
[root@master ~]# kubectl apply -f php3.yaml
pod/php3 created
[root@master ~]# kubectl get pods
NAME READY STATUS RESTARTS AGE
php3 1/1 Running 0 9s
# 低优先级 Pod
[root@master ~]# kubectl apply -f php2.yaml
pod/php2 created
[root@master ~]# kubectl get pods
NAME READY STATUS RESTARTS AGE
php2 0/1 Pending 0 3s
php3 1/1 Running 0 9s
# 清理实验 Pod
[root@master ~]# kubectl delete pod --all
pod "php2" deleted
pod "php3" deleted
[root@master ~]# kubectl delete -f mypriority.yaml
priorityclass.scheduling.k8s.io "high-non" deleted
priorityclass.scheduling.k8s.io "low-non" deleted
priorityclass.scheduling.k8s.io "high" deleted
priorityclass.scheduling.k8s.io "low" deleted
Pod 安全
特权容器
更改容器主机名 和 /etc/hosts 文件
[root@master ~]# vim root.yaml
---
kind: Pod
apiVersion: v1
metadata:
name: root
spec:
hostname: myhost # 修改主机名
hostAliases: # 修改 /etc/hosts
- ip: 192.168.1.30 # IP 地址
hostnames: # 名称键值对
- harbor # 主机名
containers:
- name: apache
image: myos:httpd
[root@master ~]# kubectl apply -f root.yaml
pod/root created
[root@master ~]# kubectl exec -it root -- /bin/bash
[root@myhost html]# hostname
myhost
[root@myhost html]# cat /etc/hosts
... ...
# Entries added by HostAliases.
192.168.1.30 harbor
[root@master ~]# kubectl delete pod root
pod "root" deleted
root特权容器
[root@master ~]# vim root.yaml
---
kind: Pod
apiVersion: v1
metadata:
name: root
spec:
hostPID: true # 特权,共享系统进程
hostNetwork: true # 特权,共享主机网络
containers:
- name: apache
image: myos:httpd
securityContext: # 安全上下文值
privileged: true # root特权容器
[root@master ~]# kubectl get pods
NAME READY STATUS RESTARTS AGE
root 1/1 Running 0 26s
[root@master ~]# kubectl exec -it root -- /bin/bash
[root@node-0001 /]#
# 系统进程特权
[root@node-0001 /]# pstree -p
systemd(1)-+-NetworkManager(510)-+-dhclient(548)
| |-{NetworkManager}(522)
| `-{NetworkManager}(524)
|-agetty(851)
|-chronyd(502)
|-containerd(531)-+-{containerd}(555)
... ...
# 网络特权
[root@node-0001 /]# ifconfig eth0
eth0: flags=4163<UP,BROADCAST,RUNNING,MULTICAST> mtu 1500
inet 192.168.1.51 netmask 255.255.255.0 broadcast 192.168.1.255
ether fa:16:3e:70:c8:fa txqueuelen 1000 (Ethernet)
... ...
# root用户特权
[root@node-0001 /]# mkdir /sysroot
[root@node-0001 /]# mount /dev/vda1 /sysroot
[root@node-0001 /]# mount -t proc proc /sysroot/proc
[root@node-0001 /]# chroot /sysroot
sh-4.2# : 此处已经是 node 节点上的 root 用户了
# 删除特权容器
[root@master ~]# kubectl delete pod root
pod "root" deleted
Pod 安全策略
# 生产环境设置严格的准入控制
[root@master ~]# kubectl create namespace myprod
namespace/myprod created
[root@master ~]# kubectl label namespaces myprod pod-security.kubernetes.io/enforce=restricted
namespace/myprod labeled
# 测试环境测试警告提示
[root@master ~]# kubectl create namespace mytest
namespace/mytest created
[root@master ~]# kubectl label namespaces mytest pod-security.kubernetes.io/warn=baseline
namespace/mytest labeled
# 创建特权容器
[root@master ~]# kubectl -n myprod apply -f root.yaml
Error from server (Failure): error when creating "root.yaml": host namespaces (hostNetwork=true, hostPID=true), privileged (container "linux" must not set securityContext.privileged=true), allowPrivilegeEscalation != false (container "linux" must set securityContext.allowPrivilegeEscalation=false), unrestricted capabilities (container "linux" must set securityContext.capabilities.drop=["ALL"]), runAsNonRoot != true (pod or container "linux" must set securityContext.runAsNonRoot=true), seccompProfile (pod or container "linux" must set securityContext.seccompProfile.type to "RuntimeDefault" or "Localhost")
[root@master ~]#
[root@master ~]# kubectl -n myprod get pods
No resources found in myprod namespace.
[root@master ~]# kubectl -n mytest apply -f root.yaml
Warning: would violate "latest" version of "baseline" PodSecurity profile: host namespaces (hostNetwork=true, hostPID=true), privileged (container "linux" must not set securityContext.privileged=true)
pod/root created
[root@master ~]#
[root@master ~]# kubectl -n mytest get pods
NAME READY STATUS RESTARTS AGE
root 1/1 Running 0 7s
[root@master ~]#
安全的 Pod
[root@master ~]# vim nonroot.yaml
---
kind: Pod
apiVersion: v1
metadata:
name: nonroot
spec:
restartPolicy: Always
containers:
- name: php
image: myos:php-fpm
securityContext:
allowPrivilegeEscalation: false
runAsNonRoot: true
runAsUser: 65534
seccompProfile:
type: "RuntimeDefault"
capabilities:
drop: ["ALL"]
[root@master ~]# kubectl -n myprod apply -f nonroot.yaml
pod/nonroot created
[root@master ~]# kubectl -n myprod get pods
NAME READY STATUS RESTARTS AGE
nonroot 1/1 Running 0 6s
[root@master ~]# kubectl -n myprod exec -it nonroot -- id
uid=65534(nobody) gid=65534(nobody) groups=65534(nobody)