1 consul maint维护模式 /home/qboxserver/consul/current/bin/consul maint -enable
2、尝试重启 docker
3、2成功→4; 2失败→ 5
4、重启cadvisor 和 mesos-agent
5、重启机器,照机器重启的步骤恢复
6、supervisorctl restart dockerd
7、supervisorctl restart boots-cadvisor mesos-agent
退出维护模式: /home/qboxserver/consul/current/bin/consul maint -disable
该脚本启动方式为screenSESSION_NAME="kill_hulk_app"; screen -ls | grep "${SESSION_NAME}" > /dev/null; [ "$?" != "0" ] && screen -d -m -S "${SESSION_NAME}" bash /root/kill_hulk_app.sh; screen -ls; echo
处理脚本sed -i '10s/20/17/g' /root/kill_hulk_app.sh &&cat /root/kill_hulk_app.sh
#清理对应screen内进程pidscreen -ls | grep kill_hulk_app | cut -d. -f1 |xargs kill
#重新启动脚本SESSION_NAME="kill_hulk_app"; screen -ls | grep "${SESSION_NAME}" > /dev/null; [ "$?" != "0" ] && screen -d -m -S "${SESSION_NAME}" bash /root/kill_hulk_app.sh; screen -ls; echo && screen -ls | grep kill_hulk_app | cut -d. -f1
脚本内容:
cat kill_hulk_app.sh
#!/bin/bash
function log() {
echo $(date +"%Y-%m-%d %H:%M:%S"): "$@" | tee -a kill_hulk_app.log
}
while true; do
log "start"
HULKS=$(docker ps -s --format '{{.ID}} {{.Size}}' | awk -F ' ' '$3=="GB" && $2 > 17 {print $1}')
log "find hulks: " ${HULKS}
for HULK in ${HULKS}; do
log "container id to stop: " ${HULK}
log $(docker inspect ${HULK} | grep instance_id)
log $(docker ps -s --format '{{.ID}} {{.Size}}' | grep ${HULK})
if [ "${HULK}" != "" ]; then
log "stop && rm : " ${HULK}
docker stop ${HULK} && docker rm ${HULK}
log $?
fi
done
log "sleep 300s"
sleep 300
done
转载于:https://blog.51cto.com/daixuan/2360512