【原文链接】
一、容器探针简介
1.1 容器探测简介
容器探测用于检测容器中的应用实例是否正常工作,是保障业务可用性的一种传统机制,如果经过探测,实例的状态不符合预期,那么kubernetes就会把该问题实例“摘除”,不承担业务流量,kubernetes提供了两种探针来实现容器探测,分别是:
- livenessProbe:存活性探针,用于检测应用那个实例当前是否处于正常的运行状态,如果不是,k8s会重启容器
- readnessProbe:就绪性探针,用于检测应用实例当前是否可以接收请求,如果不能,k8s不会转发流量
即livenessProbe决定是否重启容器,readnessProb决定是否将请求转发给容器
1.2 容器探测方式
- exec命令:在容器内执行一次命令,如果命令执行的退出码为0,则认为程序正常,否则不正常
livenessProb:
exec:
command:
- cat
- /var/lib/redis.conf
- tcpSocket: 将会尝试访问容器一个用户容器的端口,如果能够简建立这条连接,则认为程序正常,否则不正常
livenessProbe:
tcpSocket:
port: 8000
- httpGet: 调用容器内web应用的url,如果返回的状态码在200-399之间,则认为程序正常,否则不正常
如下,为访问 http://192.168.2.150:80/users
livenessProbe:
httpGet:
path: /users
port: 80
host: 192.168.2.150
scheme: HTTP # 或者HTTPS
二、容器探测方式实例演示
2.1 exec探测方式
编辑pod_liveness_exec.yaml文件,内容如下,探测内容为查看一个不存在的文件的内容
apiVersion: v1
kind: Namespace
metadata:
name: dev
---
apiVersion: v1
kind: Pod
metadata:
name: pod-nginx
namespace: dev
labels:
user: redrose2100
spec:
containers:
- name: nginx
image: nginx:1.17.1
livenessProbe:
exec:
command: ["/bin/cat","/opt/demo100.txt"]
使用如下命令创建
[root@master pod]# kubectl apply -f pod_liveness_exec.yaml
namespace/dev created
pod/pod-nginx created
[root@master pod]#
通过如下命令可以看到,此时因为探测命令失败,所以会不断地尝试去重启Pod
[root@master pod]# kubectl describe pod pod-nginx -n dev
Name: pod-nginx
Namespace: dev
Priority: 0
Node: node2/192.168.16.42
Start Time: Wed, 23 Mar 2022 00:38:37 +0800
Labels: user=redrose2100
Annotations: <none>
Status: Running
IP: 10.244.2.39
IPs:
IP: 10.244.2.39
Containers:
nginx:
Container ID: docker://8ab6ad7cf41c11f903cf6beb3a0c4f9f2ff4a9d5255a64091ab1923a89a5739e
Image: nginx:1.17.1
Image ID: docker-pullable://nginx@sha256:b4b9b3eee194703fc2fa8afa5b7510c77ae70cfba567af1376a573a967c03dbb
Port: <none>
Host Port: <none>
State: Running
Started: Wed, 23 Mar 2022 00:40:07 +0800
Last State: Terminated
Reason: Completed
Exit Code: 0
Started: Wed, 23 Mar 2022 00:39:37 +0800
Finished: Wed, 23 Mar 2022 00:40:07 +0800
Ready: True
Restart Count: 3
Liveness: exec [/bin/cat /opt/demo100.txt] delay=0s timeout=1s period=10s #success=1 #failure=3
Environment: <none>
Mounts:
/var/run/secrets/kubernetes.io/serviceaccount from kube-api-access-pj5nh (ro)
Conditions:
Type Status
Initialized True
Ready True
ContainersReady True
PodScheduled True
Volumes:
kube-api-access-pj5nh:
Type: Projected (a volume that contains injected data from multiple sources)
TokenExpirationSeconds: 3607
ConfigMapName: kube-root-ca.crt
ConfigMapOptional: <nil>
DownwardAPI: true
QoS Class: BestEffort
Node-Selectors: <none>
Tolerations: node.kubernetes.io/not-ready:NoExecute op=Exists for 300s
node.kubernetes.io/unreachable:NoExecute op=Exists for 300s
Events:
Type Reason Age From Message
---- ------ ---- ---- -------
Normal Scheduled 101s default-scheduler Successfully assigned dev/pod-nginx to node2
Normal Pulled 11s (x4 over 101s) kubelet Container image "nginx:1.17.1" already present on machine
Normal Created 11s (x4 over 101s) kubelet Created container nginx
Normal Started 11s (x4 over 101s) kubelet Started container nginx
Normal Killing 11s (x3 over 71s) kubelet Container nginx failed liveness probe, will be restarted
Warning Unhealthy 1s (x10 over 91s) kubelet Liveness probe failed: /bin/cat: /opt/demo100.txt: No such file or directory
[root@master pod]#
使用如下命令删除资源
[root@master pod]# kubectl delete -f pod_liveness_exec.yaml
namespace "dev" deleted
pod "pod-nginx" deleted
[root@master pod]#
2.2 tcpSocket探测方式
编辑pod_liveness_socket.yaml文件,内容如下,使用TCPSocket的方式进行探测
apiVersion: v1
kind: Namespace
metadata:
name: dev
---
apiVersion: v1
kind: Pod
metadata:
name: pod-nginx
namespace: dev
labels:
user: redrose2100
spec:
containers:
- name: nginx
image: nginx:1.17.2
livenessProbe:
tcpSocket:
port: 80
使用如下命令创建资源
[root@master pod]# kubectl apply -f pod_liveness_socket.yaml
namespace/dev unchanged
pod/pod-nginx created
[root@master pod]#
如下命令,可以发现此时探测成功
[root@master pod]# kubectl get pod -n dev
NAME READY STATUS RESTARTS AGE
pod-nginx 1/1 Running 0 74s
[root@master pod]# kubectl describe pod pod-nginx -n dev
Name: pod-nginx
Namespace: dev
Priority: 0
Node: node2/192.168.16.42
Start Time: Wed, 23 Mar 2022 00:48:54 +0800
Labels: user=redrose2100
Annotations: <none>
Status: Running
IP: 10.244.2.40
IPs:
IP: 10.244.2.40
Containers:
nginx:
Container ID: docker://7383958c1f8d59cf1506ee447d7f4953d432a6c284f02df60e0c3bc0a49986a1
Image: nginx:1.17.2
Image ID: docker-pullable://nginx@sha256:5411d8897c3da841a1f45f895b43ad4526eb62d3393c3287124a56be49962d41
Port: <none>
Host Port: <none>
State: Running
Started: Wed, 23 Mar 2022 00:49:15 +0800
Ready: True
Restart Count: 0
Liveness: tcp-socket :80 delay=0s timeout=1s period=10s #success=1 #failure=3
Environment: <none>
Mounts:
/var/run/secrets/kubernetes.io/serviceaccount from kube-api-access-mmq59 (ro)
Conditions:
Type Status
Initialized True
Ready True
ContainersReady True
PodScheduled True
Volumes:
kube-api-access-mmq59:
Type: Projected (a volume that contains injected data from multiple sources)
TokenExpirationSeconds: 3607
ConfigMapName: kube-root-ca.crt
ConfigMapOptional: <nil>
DownwardAPI: true
QoS Class: BestEffort
Node-Selectors: <none>
Tolerations: node.kubernetes.io/not-ready:NoExecute op=Exists for 300s
node.kubernetes.io/unreachable:NoExecute op=Exists for 300s
Events:
Type Reason Age From Message
---- ------ ---- ---- -------
Normal Scheduled 84s default-scheduler Successfully assigned dev/pod-nginx to node2
Normal Pulling 84s kubelet Pulling image "nginx:1.17.2"
Normal Pulled 64s kubelet Successfully pulled image "nginx:1.17.2" in 20.160656657s
Normal Created 63s kubelet Created container nginx
Normal Started 63s kubelet Started container nginx
[root@master pod]#
使用如下命令删除资源
[root@master pod]# kubectl delete -f pod_liveness_socket.yaml
namespace "dev" deleted
pod "pod-nginx" deleted
[root@master pod]#
2.3 Http探测方式
编辑pod_liveness_http.yaml文件,内容如下:
apiVersion: v1
kind: Namespace
metadata:
name: dev
---
apiVersion: v1
kind: Pod
metadata:
name: pod-nginx
namespace: dev
labels:
user: redrose2100
spec:
containers:
- name: nginx
image: nginx:1.17.1
livenessProbe:
httpGet:
scheme: HTTP
port: 80
path: /
使用如下命令创建资源
[root@master pod]# kubectl apply -f pod_liveness_http.yaml
namespace/dev created
pod/pod-nginx created
[root@master pod]#
使用如下命令查询,因为nginx默认启动的是80端口,因此这里探测会成功的
[root@master pod]# kubectl get pod -n dev
NAME READY STATUS RESTARTS AGE
pod-nginx 1/1 Running 0 90s
[root@master pod]# kubectl describe pod pod-nginx -n dev
Name: pod-nginx
Namespace: dev
Priority: 0
Node: node2/192.168.16.42
Start Time: Wed, 23 Mar 2022 00:58:05 +0800
Labels: user=redrose2100
Annotations: <none>
Status: Running
IP: 10.244.2.41
IPs:
IP: 10.244.2.41
Containers:
nginx:
Container ID: docker://9d0fb6c6770bb6712f94a1685cf3a763135161180bdf685124b2e0020ac046db
Image: nginx:1.17.1
Image ID: docker-pullable://nginx@sha256:b4b9b3eee194703fc2fa8afa5b7510c77ae70cfba567af1376a573a967c03dbb
Port: <none>
Host Port: <none>
State: Running
Started: Wed, 23 Mar 2022 00:58:06 +0800
Ready: True
Restart Count: 0
Liveness: http-get http://:80/ delay=0s timeout=1s period=10s #success=1 #failure=3
Environment: <none>
Mounts:
/var/run/secrets/kubernetes.io/serviceaccount from kube-api-access-j8wq2 (ro)
Conditions:
Type Status
Initialized True
Ready True
ContainersReady True
PodScheduled True
Volumes:
kube-api-access-j8wq2:
Type: Projected (a volume that contains injected data from multiple sources)
TokenExpirationSeconds: 3607
ConfigMapName: kube-root-ca.crt
ConfigMapOptional: <nil>
DownwardAPI: true
QoS Class: BestEffort
Node-Selectors: <none>
Tolerations: node.kubernetes.io/not-ready:NoExecute op=Exists for 300s
node.kubernetes.io/unreachable:NoExecute op=Exists for 300s
Events:
Type Reason Age From Message
---- ------ ---- ---- -------
Normal Scheduled 100s default-scheduler Successfully assigned dev/pod-nginx to node2
Normal Pulled 99s kubelet Container image "nginx:1.17.1" already present on machine
Normal Created 99s kubelet Created container nginx
Normal Started 99s kubelet Started container nginx
[root@master pod]#
三、容器探测总结
查看livenessProbe的子属性,除了三种方式,还有一些参数,如下:
initialDelaySeconds <integer> # 容器启动后等待多少秒执行第一次探测
timeoutSeconds <integer> # 探测超时时间,默认一秒,最小一秒
periodSeconds <integer> # 执行探测的频率,默认是10秒,最小1秒
failureThreshold <integer> # 连续探测失败多少次才被认定为失败,默认是3,最小是1
successThreshold <integer> # 连续探测成功多少次才被认定为成功,默认是1