k8s集群的超简单的巡检脚本
#!/bin/bash
#k8s每日巡检
# IPADDR=$(ifconfig eth0|grep 'inet addr'|awk -F '[ :]' '{print $13}')
IPADDR=$(hostname -I | awk '{print $1}')
#环境变量PATH没设好,在cron里执行时有很多命令会找不到
export PATH=/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin:/root/bin
source /etc/profile
[ $(id -u) -gt 0 ] && echo "请用root用户执行此脚本!" && exit 1
centosVersion=$(awk '{print $(NF-1)}' /etc/redhat-release)
VERSION="2023.08.28"
#日志相关
PROGPATH=`echo $0 | sed -e 's,[\\/][^\\/][^\\/]*$,,'`
[ -f $PROGPATH ] && PROGPATH="."
LOGPATH="$PROGPATH/log"
[ -e $LOGPATH ] || mkdir $LOGPATH
k8s_check_logs="$LOGPATH/k8s-Check-logs-$IPADDR-`date +%Y%m%d`.txt"
function version(){
echo ""
echo ""
echo "K8S巡检脚本:Version $VERSION"
}
# 输出当前集群信息
function getK8sClusterInfo(){
echo ""
echo ""
echo -e "############################ 输出当前集群信息 #############################"
kubectl cluster-info
}
function getK8sNodes(){
echo ""
echo ""
echo -e "############################ 检查节点健康状态 #############################"
kubectl get nodes
}
function getK8sPodsStatus(){
echo ""
echo ""
echo -e "############################ PODS检查 --- \033[0;31m处于非Running状态的pods\033[0m #############################"
# 获取Pods的状态信息
pods_status=$(kubectl get pods -A | grep -Ev 'Running|Completed' | awk 'NR>1')
# 检查输出是否为空
if [ -z "$pods_status" ]; then
echo "Pods 无异常资源"
else
kubectl get pods -A | grep -Ev 'Running|Completed'
fi
echo ""
echo ""
echo -e "############################ PODS检查 --- \033[0;31m处于Running状态container异常的pods\033[0m ##################"
# 获取Pods的状态信息
pods_status=$(kubectl get pods -A | grep Running | awk '{split($3,a,"/");if(a[1] != a[2]){print $0} }')
# 检查输出是否为空
if [ -z "$pods_status" ]; then
echo "Pods 无异常资源"
else
kubectl get pods -A | grep Running | awk '{split($3,a,"/");if(a[1] != a[2]){print $0} }'
fi
}
function getK8sDeplaymentsStatus(){
echo ""
echo ""
echo -e "############################ Deplayments检查 #############################"
# 获取Deployments的状态信息
deployment_status=$( kubectl get deployments.apps -A | awk 'NR>1 {split($3,a,"/");if(a[1] != a[2]){print $0} }')
# 检查输出是否为空
if [ -z "$deployment_status" ]; then
echo "Deplayments 无异常资源"
else
kubectl get deployments.apps -A | awk '{split($3,a,"/");if(a[1] != a[2]){print $0} }'
fi
}
function getK8sStatefulsetsStatus(){
echo ""
echo ""
echo -e "############################ Statefulsets检查 ############################"
# 获取Statefulsets的状态信息
statefulsets_status=$(kubectl get statefulsets.apps -A | awk 'NR>1 {split($3,a,"/");if(a[1] != a[2]){print $0} }')
# 检查输出是否为空
if [ -z "$statefulsets_status" ]; then
echo "Statefulsets 无异常资源"
else
kubectl get statefulsets.apps -A | awk '{split($3,a,"/");if(a[1] != a[2]){print $0} }'
fi
}
function getK8sDaemonsetsStatus(){
echo ""
echo ""
echo -e "############################ Daemonsets检查 ##############################"
# 获取Daemonsets的状态信息
daemonsets_status=$(kubectl get daemonsets.apps -A | awk 'NR>1 {split($3,a);split($4,b);split($7,c); if(a[1] != b[1] || b[1] != c[1] || c[1] != a[1]){print $0}}')
# 检查输出是否为空
if [ -z "$daemonsets_status" ]; then
echo "Daemonsets 无异常资源"
else
kubectl get daemonsets.apps -A | awk '{split($3,a);split($4,b);split($7,c); if(a[1] != b[1] || b[1] != c[1] || c[1] != a[1]){print $0}}'
fi
}
# 检查资源使用情况
function getK8sTop(){
echo ""
echo ""
echo -e "############################ 检查节点资源使用情况 #########################"
# 检查Metrics API是否可用
if kubectl top nodes &> /dev/null; then
kubectl top nodes
else
echo "Metrics API 不可用,请确保Metrics Server已正确安装和配置。"
fi
echo ""
echo ""
echo -e "############################ 检查Pod资源使用情况 ##########################"
# 检查Metrics API是否可用
if kubectl top pods --all-namespaces &> /dev/null; then
kubectl top pods --all-namespaces
else
echo "Metrics API 不可用,请确保Metrics Server已正确安装和配置。"
fi
}
# 检查事件
function getK8sGetEvents(){
echo ""
echo ""
echo -e "############################ 检查事件 ####################################"
kubectl get events --all-namespaces --sort-by='.metadata.creationTimestamp'
}
function check(){
version
getK8sClusterInfo
getK8sNodes
getK8sPodsStatus
getK8sDeplaymentsStatus
getK8sStatefulsetsStatus
getK8sDaemonsetsStatus
getK8sTop
getK8sGetEvents
}
#执行检查并保存检查结果
check > $k8s_check_logs
echo "检查结果:$k8s_check_logs"