consul用法
consul作用
- 域名解析
- 服务状态检测
域名解析功能
1. 采用supervisor管理consul
/opt/py27/bin/python /opt/py27/bin/supervisord -c /data/bkce/etc/supervisor-consul.conf
cat /data/bkce/etc/supervisor-consul.conf
[unix_http_server]
file=/data/bkce/logs/consul/supervisor.sock ; (the path to the socket file)
;[inet_http_server] ; inet (TCP) server disabled by default
;port=127.0.0.1:9001 ; (ip_address:port specifier, *:port for all iface)
;username=user ; (default is no username (open server))
;password=123 ; (default is no password (open server))
[supervisord]
logfile=/data/bkce/logs/consul/supervisord_out.log ; (main log file;default $CWD/supervisord_out.log)
logfile_maxbytes=50MB ; (max main logfile bytes b4 rotation;default 50MB)
logfile_backups=10 ; (num of main logfile rotation backups;default 10)
loglevel=info ; (log level;default info; others: debug,warn,trace)
pidfile=/data/bkce/logs/consul/supervisord.pid ; (supervisord pidfile;default supervisord.pid)
nodaemon=false ; (start in foreground if true;default false)
minfds=1024 ; (min. avail startup file descriptors;default 1024)
minprocs=200 ; (min. avail process descriptors;default 200)
[rpcinterface:supervisor]
supervisor.rpcinterface_factory = supervisor.rpcinterface:make_main_rpcinterface
[supervisorctl]
serverurl=unix:///data/bkce/logs/consul/supervisor.sock ; use a unix:// URL for a unix socket
;serverurl=http://127.0.0.1:9001 ; use an http:// url to specify an inet socket
;username=chris ; should be same as http_username if set
;password=123 ; should be same as http_password if set
;prompt=mysupervisor ; cmd line prompt (default "supervisor")
;history_file=~/.sc_history ; use readline history if available
[program:agent]
directory=/usr/bin/
command=/usr/bin/consul agent -config-file=/data/bkce/etc/consul.conf -config-dir=/data/bkce/etc/consul.d
autostart=true
autorestart=true
redirect_stderr=true
stdout_logfile=/data/bkce/logs/consul/consul.log
2. 配置consol agent地址作为DNS解析地址
cat /data/bkce/etc/consul.conf
{
"rejoin_after_leave": true,
"skip_leave_on_interrupt": true,
"recursors": [
"nds1.host.ip",
"nds2.host.ip",
"nds3.host.ip"
],
"bind_addr": "10.0.**.128",
"node_id": "1b04ee75-82c3-43c0-9ebf-1fbf72d0f76a",
"retry_join": [
"10.0.**.128"
],
"log_level": "info",
"server": true,
"datacenter": "dc",
"data_dir": "/data/bkce/public/consul",
"leave_on_terminate": false,
"bootstrap": true,
"node_name": "nginx-1",
"pid_file": "/data/bkce/logs/consul/consul.pid",
"encrypt": "H9KhSNqXml1e0IER7ntZmA==",
"ports": {
"dns": 53
}
}
3. 配置consol agent地址作为DNS解析地址
cat /etc/resolv.conf
# 指向consul agent 的地址
nameserver consul.host.ip
健康检查
/data/bkce/etc/consul.d是consul启动命令跟的配置目录
# cd /data/bkce/etc/consul.d
# cat appo.json
{
"service": {
"id": "appo-1",
"checks": [
{
"service_id": "appo-1",
"interval": "10s",
"script": "/data/bkce/bin/health_check/check_proc_exists -m appo"
}
],
"name": "appo",
"enableTagOverride": false,
"address": "10.0.53.128"
}
}
上面的配置中有个检查脚本
“script”: “/data/bkce/bin/health_check/check_proc_exists -m appo”
# cat /data/bkce/bin/health_check/check_proc_exists
#!/bin/bash
usage () {
echo "${0##*/} -m module_name -h"
echo
echo "for example:"
echo " ${0##*/} -m paas"
echo " ${0##*/} -m bkdata -p dataapi"
exit 0
}
while getopts m:p:h arg; do
case $arg in
m) module=$OPTARG ;;
p) proj=$OPTARG ;;
*|h) usage ;;
esac
done
source /root/.bkrc
cd $CTRL_DIR
source ./status.rc
export LOG_FILE=/tmp/health_check.log
if get_status $module $proj; then
log "consul health checking: $module-${proj:-COMMON}: ok"
exit 0
else
log "consul health checking: $module-${proj:-COMMON}: critical"
exit 2
fi
这个脚本中检测功能是导入的./status.rc
# cat status.rc
# vim:ft=sh
#
source ./utils.fc
load_functions status third
load_functions status extra
ps_status_fta () {
local module=fta
local proj=fta
local ret=0
_ps_status $module $proj
}
_ps_status () {
local module=$1
local proj=$2
local checkcmd=${3:-supervisorctl}
workon $proj >/dev/null 2>&1
if process_is_running "supervisord -c .*$module-$proj.conf" >/dev/null; then
$checkcmd -c $INSTALL_PATH/etc/supervisor-$module-$proj.conf status | awk -v IP=$LAN_IP -v P=$proj '{printf "[%s] %-8s%4s%s\n", IP, P, "", $0}'
else
echo "[$(blue_echo $LAN_IP)] $proj: $(red_echo EXIT)"
return 1
fi
}
ps_status_bkdata () {
local module=bkdata
local proj=$1
local ret=0
echo "---------------------------------------------------------------------------------------------------------"
if [ -z "$proj" ]; then
for proj in dataapi monitor databus; do
_ps_status $module $proj || { let ret+=1; continue; }
done
else
_ps_status $module $proj
ret=$?
fi
return $ret
}
ps_status_appo () {
ps_status_paas_agent O
}
ps_status_appt () {
ps_status_paas_agent T
}
ps_status_paas_agent () {
local module=paas_agent
local ret=0
workon paas_agent || return 1
if process_is_running "supervisord -c .*${module}.conf" >/dev/null; then
supervisorctl -c $INSTALL_PATH/etc/supervisor-${module}.conf status | awk -v IP=$LAN_IP -v P="paas_agent($APPENV)" '{printf "[%s] %-8s%4s%s\n",IP, P, "", $0}'
else
echo "[$(blue_echo $LAN_IP)] $module: $(red_echo EXIT)"
ret=1
fi
ps_status nginx "nginx: master process nginx"
return $ret
}
ps_status_docker () {
ps_status docker "dockerd .*docker.sock"
}
ps_status_paas () {
ps_status_open_paas $@
}
ps_status_open_paas () {
local proj=$1
local ret=0
workon open_paas || return 1
echo "---------------------------------------------------------------------------------------------------------"
if process_is_running "supervisord -c .*open_paas.conf" >/dev/null; then
supervisorctl -c $INSTALL_PATH/etc/supervisor-open_paas.conf status ${proj} | awk -v IP=$LAN_IP '{printf "[%s] %-8s%4s%s\n",IP, "open_paas", "", $0}'
else
echo "[$(blue_echo $LAN_IP)] open_paas: $(red_echo EXIT)"
ret=1
fi
return $ret
}
ps_status () {
local module="$1"
local pattern="$2"
local ret
[ -z "$module" ] && return 0
_pids=( $(process_is_running "$pattern") )
if [ ${#_pids[@]} -ne 0 ]; then
status="$(green_echo RUNNING)"
ret=0
else
status="$(red_echo EXIT)"
ret=1
fi
echo "[$(blue_echo $LAN_IP)] $module: $status"
return $ret
}
ps_status_nginx () {
ps_status nginx "nginx: master process nginx"
}
ps_status_nfs () {
service rpcbind status >/dev/null 2>&1 && ok "rpcbind is running" || err "rpcbind not running"
service nfs status >/dev/null 2>&1 && ok "nfs is running" || err "nfs is not running"
}
ps_status_cmdb () {
local module=cmdb
local proj=server
local ret=0
ps_status cmdb-nginx "nginx: master process nginx" || let ret+=1
_ps_status $module $proj /opt/py27/bin/supervisorctl
return $ret
}
ps_status_job () {
ps_status job "job.conf.*job-exec.war"
}
ps_status_java () {
local module=$1
local pattern=$2
if jps -lv | grep -q "$pattern"; then
echo "[$(blue_echo $LAN_IP)] $module: $(green_echo RUNNING)"
return 0
else
echo "[$(blue_echo $LAN_IP)] $module: $(red_echo EXIT)"
return 2
fi
}
ps_status_kafka () {
ps_status_java kafka "kafka.Kafka .*-Dkafka.logs.dir="
}
ps_status_zk () {
ps_status_java zk "org.apache.zookeeper.server.quorum.QuorumPeerMain"
}
ps_status_es () {
ps_status_java es "org.elasticsearch.bootstrap.Elasticsearch .*Des.path.home=.*/es"
}
ps_status_activemq () {
ps_status activemq "activemq/bin/activemq.jar"
}
ps_status_redis () {
ps_status redis "redis-server $LAN_IP:$REDIS_PORT"
}
ps_status_redis_cluster () {
local ret=0
ps_status redis "redis-server $LAN_IP:$REDIS_PORT" || let ret+=1
ps_status sentinel "redis-server.*[sentinel]" || let ret+=1
return $ret
}
ps_status_beanstalk () {
ps_status beanstalk "beanstalkd -l"
}
ps_status_rabbitmq () {
ps_status rabbitmq "beam.*/rabbitmq"
}
ps_status_mysql () {
ps_status mysqld "mysqld .*--basedir=$INSTALL_PATH/service/mysql"
}
_check_status_bygroup () {
local ret=0
for m in $@; do
proc_pattern="${m} .*${m//_//}.conf"
pids=( $(process_is_running "$proc_pattern") )
if process_paired ${pids[@]}; then
status="$(green_echo RUNNING)"
let ret+=$ret
elif [ "${#pids[@]}" -ne 0 ]; then
status="$(red_echo 'ERROR STATUS')"
let ret+=1
else
status="$(red_echo 'EXIT')"
let ret+=1
fi
echo "[$(blue_echo $LAN_IP)] $m: $status"
done
return $ret
}
ps_status_gse () {
local modules_required=(gse_api gse_btsvr gse_data gse_dba gse_task gse_syncdata gse_procmgr gse_dataop)
local modules_optional=(gse_alarm gse_ops gse_opts)
_check_status_bygroup ${modules_optional[@]}
_check_status_bygroup ${modules_required[@]}
return $?
}
ps_status_consul () {
local ret
pidof consul >/dev/null 2>&1
if [ $? -eq 0 ]; then
echo "[$(blue_echo $LAN_IP)] consul: $(green_echo RUNNING)"
ret=0
else
echo "[$(blue_echo $LAN_IP)] consul: $(red_echo EXIT)"
ret=1
fi
return $ret
}
ps_status_license () {
ps_status license "./license_server -config"
}
ps_status_plugin () {
local ret=0
local plugin=$1
workon $plugin || return 1
if process_is_running $plugin "supervisord .*paas_plugins-$plugin.conf" >/dev/null; then
supervisorctl -c $INSTALL_PATH/etc/supervisor-paas_plugins-$plugin.conf status \
| awk -v IP=$LAN_IP -v P=plugin/$plugin '{printf "[%s] %-8s%4s%s\n", IP, P, "", $0}'
else
echo "[$(blue_echo $LAN_IP)] $plugin: $(red_echo EXIT)"
let ret+=1
fi
return $ret
}
ps_status_influxdb () {
local module=influxdb
local ret=0
ps_status influxdb "influxd .*-config /etc/influxdb/influxdb.conf" || let ret+=1
return $ret
}
ps_status_mongodb () {
ps_status mongod "mongod -f .*$INSTALL_PATH/etc/mongodb.yaml"
}
ps_status_etcd () {
local ret=0
ps_status etcd "etcd --name.*initial-advertise"
return $?
}
ps_status_paas_plugins () {
local ret=0
ps_status_plugin log_parser || let ret+=1
ps_status_plugin log_alert || let ret+=1
if grep -E '(appo|appt|paas_agent|open_paas|paas)' $INSTALL_PATH/.installed_module; then
ps_status_plugin log_agent || let ret+=1
fi
return $ret
}
status_all () {
local mod
for mod in $(awk '{print $1}' $INSTALL_PATH/.installed_module); do
[ "$mod" == "paas_plugins" ] && continue
ps_status_$mod
done
}
get_status () {
local module=$1
local proj=$2
case $module in
saas-*) status_saas; return $? ;;
all) status_all; return $? ;;
esac
ps_status_$module $proj
return $?
}
status_saas () {
read env_type ignore < $INSTALL_PATH/.app.env
_status_saas_$env_type
}
_status_saas_docker () {
docker ps
}
_status_saas_virtualenv () {
local venv_path=$INSTALL_PATH/paas_agent/apps/Envs
local sp_conf app_code
for app_code in $(ls $venv_path); do
sp_conf=${venv_path%/*}/projects/$app_code/conf/supervisord.conf
$venv_path/$app_code/bin/supervisorctl -c $sp_conf status all \
awk -v P=$app_code '{ printf("%-20s\t%s\n", P, $0) }'
done
}
ps_status_tsdbproxy () {
ps_status tsdbproxy "tsdb-proxy -config.*tsdb-proxy.conf"
return $?
}
ps_status_zabbix_agent () {
ps_status zabbix_agent "zabbix_agentd -c"
}
ps_status_zabbix () {
ps_status zabbix_server "zabbix_server -c"
ps_status zabbix_web "nginx: master"
}
ps_status_gse_agent () {
ps_status gse_aegnt "gse_agent -f .*agent.conf"
}
ps_status_mongodb () {
ps_status mongod "mongod -f .*$INSTALL_PATH/etc/mongodb.yaml"
}
_status_paas_plugins () {
for ip in ${APPO_IP[@]} $APPT_IP; do
rcmd root@$ip "source ./status.rc; ps_status_plugin log_agent"
done
for ip in ${PAAS_IP[@]}; do
rcmd root@$ip "source ./status.rc; ps_status_plugin log_parser ps_status_plugin log_agent"
done
rcmd root@$PAAS_IP "source ./status.rc; ps_status_plugin log_alert"
}
status () {
local module=$1
local proj=$2
local proc=$3
case $module in
paas_plugins|plugin) _status_paas_plugins; return $? ;;
consul) TARGET_IPS=( ${ALL_IP[@]} ) ;;
all) # 因为 paas_plugin 的进程分布毫无规律, 因此单独处理.
# 相应的, 下面的 get_status all 中则略过对 paas_plugin 的处理
# TODO: 待优化
grep -q paas_plugins $INSTALL_PATH/.installed_sumary && _status_paas_plugins
TARGET_IPS=( ${ALL_IP[@]} ) ;;
open_paas) TARGET_IPS=( ${PAAS_IP[@]} ) ;;
tsdbproxy) TARGET_IPS=( ${INFLUXDB_IP[@]} ) ;;
gse_agent) TARGET_IPS=( ${ALL_IP[@]} ) ;;
saas-o) TARGET_IPS=( ${APPO_IP[@]} ) ;;
saas-t) TARGET_IPS=( ${APPT_IP[@]} ) ;;
*) TARGET_IPS=( $(map_name2ip $module $proj) )
;;
esac
for ip in ${TARGET_IPS[@]};do
rcmd root@$ip "source ./status.rc; export APPENV=$APPENV; get_status ${module%0*} $proj $proc"
done
}
ps_status_ntp () {
local module=ntp
local proj="$1"
local ret
local server
module="$module(client)"
## server 主模块
if [[ "$LAN_IP" == "$NTP_IP0" ]]; then
server=${NTP_SERVER[0]}
module="$module(server-master)"
fi
## NTP Server备机模块
is_string_in_array "$LAN_IP" "${NTP_IP[@]:1}" && module="$module(server-backup)"
server_time_dist=$( ntpdate -q ${server:-$NTP_IP0} 2>&1 | grep -oP '(?<=: ).*' )
if syscmd_byos is-active ntpd &> /dev/null; then
status="$(green_echo RUNNING)"
server_now=$( ntpdc -c sysinfo | grep -oP '(?<=\[).*[^\]]' )
ret=0
else
status="$(red_echo EXIT)"
server_now="$(red_echo NULL)"
let ret+=1
fi
log "[$(blue_echo $LAN_IP)] $module: $status"
log "[$(blue_echo $LAN_IP)] offset: $server_time_dist"
log "[$(blue_echo $LAN_IP)] reference server: $server_now"
return $ret
}