#!/bin/sh PATH= /sbin : /bin : /usr/sbin : /usr/bin : /usr/local/bin : /usr/local/sbin . /etc/profile ## 监控页面地址参数 MON_SRV_IPADDR= "192.168.1.103" MON_SRV_PORT= "8080" ## 是否已正确扫描 SCAN_FLAG=0 ## 工作基路径 BASE_PATH= "/data/scripts" ## 异常 storm Supervisor 主机地址列表 FAIL_SUPERVISOR_LIST= "${BASE_PATH}/fail_supervisor.txt" #--------------------------------------------------------------------------------------------------- ## 重启storm的nimbus服务 function restart_storm_nimbus_server() { [[ -n ` ps aux | grep java | grep storm` ]] && kill -9 ` ps aux | grep java | grep storm | awk '{print $2}' ` nohup /usr/local/storm/bin/storm nimbus > /dev/null 2>&1 & nohup /usr/local/storm/bin/storm ui > /dev/null 2>&1 & sleep 30 } #--------------------------------------------------------------------------------------------------- ## 1、检查监控页面是否正常【8080端口不通的情况】 for ((i=0; i<3; i++)); do RETVAL=` /usr/bin/nmap -n -sS -p ${MON_SRV_PORT} ${MON_SRV_IPADDR} | grep open ` [[ -n "${RETVAL}" ]] && SCAN_FLAG=1; break || sleep 10 done [[ ${SCAN_FLAG} - ne 1 ]] && restart_storm_nimbus_server #--------------------------------------------------------------------------------------------------- ## 2、将监控页面抓取内容与本地hosts内容进行差异比较,以确定是否存在异常的 storm supervisor 服务 curl -s http: // ${MON_SRV_IPADDR}:${MON_SRV_PORT}/ | sed 's/<td>/<td>\n/g' | awk -F '<' '/^storm_/{print $1}' | awk '!/nimbus/{print}' | sort > ${BASE_PATH} /supervisor_list_from_page .txt ## 如果获取的storm nimbus监控页面数据为空,代表storm nimbus服务存在异常 [[ -z ` sed '/^$/d' ${BASE_PATH} /supervisor_list_from_page .txt` ]] && restart_storm_nimbus_server sort -nr ${BASE_PATH} /supervisor_list_from_page .txt ${BASE_PATH} /supervisor_list .txt | uniq -u > ${BASE_PATH} /supervisor_list_for_failed .txt [[ -z ` sed '/^$/d' ${BASE_PATH} /supervisor_list_for_failed .txt` ]] && rm -f ${BASE_PATH} /supervisor_list_for_failed .txt && exit 0 #--------------------------------------------------------------------------------------------------- ## 3、获得异常的 storm supervisor 服务的IP地址列表 echo "[fail_supervisor]" >> ${FAIL_SUPERVISOR_LIST} for SUPERVISOR_NAMEADDR in ` cat ${BASE_PATH} /supervisor_list_for_failed .txt` do TEMP_IPADDR=` grep -w ${SUPERVISOR_NAMEADDR} /etc/hosts | grep - v '#' | awk '{print $1}' | tail -1` echo "${TEMP_IPADDR}" >> ${FAIL_SUPERVISOR_LIST} IPLIST= "${IPLIST} ${TEMP_IPADDR}" done #--------------------------------------------------------------------------------------------------- ## 4、远程重启 storm supervisor 服务 /usr/local/bin/ansible -i ${FAIL_SUPERVISOR_LIST} fail_supervisor -m shell -a "/data/scripts/restart_storm_service.sh" rm -f ${FAIL_SUPERVISOR_LIST} |