腾讯蓝鲸-consul用法

consul作用

  • 域名解析
  • 服务状态检测

域名解析功能

1. 采用supervisor管理consul

/opt/py27/bin/python /opt/py27/bin/supervisord -c /data/bkce/etc/supervisor-consul.conf
cat /data/bkce/etc/supervisor-consul.conf
[unix_http_server]
file=/data/bkce/logs/consul/supervisor.sock   ; (the path to the socket file)

;[inet_http_server]         ; inet (TCP) server disabled by default
;port=127.0.0.1:9001        ; (ip_address:port specifier, *:port for all iface)
;username=user              ; (default is no username (open server))
;password=123               ; (default is no password (open server))

[supervisord]
logfile=/data/bkce/logs/consul/supervisord_out.log ; (main log file;default $CWD/supervisord_out.log)
logfile_maxbytes=50MB        ; (max main logfile bytes b4 rotation;default 50MB)
logfile_backups=10           ; (num of main logfile rotation backups;default 10)
loglevel=info                ; (log level;default info; others: debug,warn,trace)
pidfile=/data/bkce/logs/consul/supervisord.pid ; (supervisord pidfile;default supervisord.pid)
nodaemon=false               ; (start in foreground if true;default false)
minfds=1024                  ; (min. avail startup file descriptors;default 1024)
minprocs=200                 ; (min. avail process descriptors;default 200)

[rpcinterface:supervisor]
supervisor.rpcinterface_factory = supervisor.rpcinterface:make_main_rpcinterface

[supervisorctl]
serverurl=unix:///data/bkce/logs/consul/supervisor.sock ; use a unix:// URL  for a unix socket
;serverurl=http://127.0.0.1:9001 ; use an http:// url to specify an inet socket
;username=chris              ; should be same as http_username if set
;password=123                ; should be same as http_password if set
;prompt=mysupervisor         ; cmd line prompt (default "supervisor")
;history_file=~/.sc_history  ; use readline history if available

[program:agent]
directory=/usr/bin/
command=/usr/bin/consul agent -config-file=/data/bkce/etc/consul.conf -config-dir=/data/bkce/etc/consul.d
autostart=true
autorestart=true
redirect_stderr=true
stdout_logfile=/data/bkce/logs/consul/consul.log

2. 配置consol agent地址作为DNS解析地址

cat /data/bkce/etc/consul.conf 
{
    "rejoin_after_leave": true, 
    "skip_leave_on_interrupt": true, 
    "recursors": [
        "nds1.host.ip", 
        "nds2.host.ip", 
        "nds3.host.ip"
    ], 
    "bind_addr": "10.0.**.128", 
    "node_id": "1b04ee75-82c3-43c0-9ebf-1fbf72d0f76a", 
    "retry_join": [
        "10.0.**.128"
    ], 
    "log_level": "info", 
    "server": true, 
    "datacenter": "dc", 
    "data_dir": "/data/bkce/public/consul", 
    "leave_on_terminate": false, 
    "bootstrap": true, 
    "node_name": "nginx-1", 
    "pid_file": "/data/bkce/logs/consul/consul.pid", 
    "encrypt": "H9KhSNqXml1e0IER7ntZmA==", 
    "ports": {
        "dns": 53
    }
}

3. 配置consol agent地址作为DNS解析地址

cat /etc/resolv.conf
# 指向consul agent 的地址 
nameserver consul.host.ip

健康检查

/data/bkce/etc/consul.d是consul启动命令跟的配置目录

# cd /data/bkce/etc/consul.d
# cat appo.json 
{
    "service": {
        "id": "appo-1", 
        "checks": [
            {
                "service_id": "appo-1", 
                "interval": "10s", 
                "script": "/data/bkce/bin/health_check/check_proc_exists -m appo"
            }
        ], 
        "name": "appo", 
        "enableTagOverride": false, 
        "address": "10.0.53.128"
    }
}

上面的配置中有个检查脚本

“script”: “/data/bkce/bin/health_check/check_proc_exists -m appo”

# cat /data/bkce/bin/health_check/check_proc_exists
#!/bin/bash

usage () {
    echo "${0##*/} -m module_name -h"
    echo 
    echo "for example:"
    echo "  ${0##*/} -m paas"
    echo "  ${0##*/} -m bkdata -p dataapi"

    exit 0
}


while getopts m:p:h arg; do
    case $arg in
        m)  module=$OPTARG ;;
        p)  proj=$OPTARG ;;
        *|h)  usage ;;
    esac
done

source /root/.bkrc

cd $CTRL_DIR
source ./status.rc
export LOG_FILE=/tmp/health_check.log

if get_status $module $proj; then
    log "consul health checking: $module-${proj:-COMMON}: ok"
    exit 0
else
    log "consul health checking: $module-${proj:-COMMON}: critical"
    exit 2
fi

这个脚本中检测功能是导入的./status.rc

# cat status.rc 
# vim:ft=sh
#

source ./utils.fc

load_functions status third
load_functions status extra

ps_status_fta () {
    local module=fta
    local proj=fta
    local ret=0

    _ps_status $module $proj
}

_ps_status () {
    local module=$1
    local proj=$2
    local checkcmd=${3:-supervisorctl}

    workon $proj >/dev/null 2>&1

    if process_is_running "supervisord -c .*$module-$proj.conf" >/dev/null; then
        $checkcmd -c $INSTALL_PATH/etc/supervisor-$module-$proj.conf status | awk -v IP=$LAN_IP -v P=$proj '{printf "[%s] %-8s%4s%s\n", IP, P, "", $0}'
    else
        echo "[$(blue_echo $LAN_IP)] $proj: $(red_echo EXIT)"
        return 1
    fi
}

ps_status_bkdata () {
    local module=bkdata
    local proj=$1
    local ret=0

    echo "---------------------------------------------------------------------------------------------------------"
    if [ -z "$proj" ]; then
        for proj in dataapi monitor databus; do
            _ps_status $module $proj || { let ret+=1; continue; }
        done
    else
        _ps_status $module $proj
        ret=$?
    fi

    return $ret
}

ps_status_appo () {
    ps_status_paas_agent O
}

ps_status_appt () {
    ps_status_paas_agent T
}

ps_status_paas_agent () {
    local module=paas_agent
    local ret=0

    workon paas_agent || return 1
    if process_is_running "supervisord -c .*${module}.conf" >/dev/null; then
        supervisorctl -c $INSTALL_PATH/etc/supervisor-${module}.conf status | awk -v IP=$LAN_IP -v P="paas_agent($APPENV)" '{printf "[%s] %-8s%4s%s\n",IP, P, "", $0}'
    else
        echo "[$(blue_echo $LAN_IP)] $module: $(red_echo EXIT)"
        ret=1
    fi

    ps_status nginx "nginx: master process nginx"

    return $ret
}

ps_status_docker () {
    ps_status docker "dockerd .*docker.sock"
}

ps_status_paas () {
    ps_status_open_paas $@
}

ps_status_open_paas () {
    local proj=$1
    local ret=0

    workon open_paas || return 1
    echo "---------------------------------------------------------------------------------------------------------"
    if process_is_running "supervisord -c .*open_paas.conf" >/dev/null; then
        supervisorctl -c $INSTALL_PATH/etc/supervisor-open_paas.conf status ${proj} | awk -v IP=$LAN_IP '{printf "[%s] %-8s%4s%s\n",IP, "open_paas", "", $0}'
    else
        echo "[$(blue_echo $LAN_IP)] open_paas: $(red_echo EXIT)"
        ret=1
    fi

    return $ret
}

ps_status () {
    local module="$1"
    local pattern="$2"
    local ret

    [ -z "$module" ] && return 0

    _pids=( $(process_is_running "$pattern") )
    if [ ${#_pids[@]} -ne 0 ]; then
        status="$(green_echo RUNNING)"
        ret=0
    else
        status="$(red_echo EXIT)"
        ret=1
    fi

    echo "[$(blue_echo $LAN_IP)] $module: $status"
    return $ret
}

ps_status_nginx () {
    ps_status nginx "nginx: master process nginx"
}

ps_status_nfs () {
    service rpcbind status >/dev/null 2>&1 && ok "rpcbind is running" || err "rpcbind not running"
    service nfs status >/dev/null 2>&1 && ok "nfs is running" || err "nfs is not running"
}

ps_status_cmdb () {
    local module=cmdb
    local proj=server
    local ret=0

    ps_status cmdb-nginx "nginx: master process nginx" || let ret+=1
    _ps_status $module $proj /opt/py27/bin/supervisorctl
    return $ret
}

ps_status_job () {
    ps_status job "job.conf.*job-exec.war"
}

ps_status_java () {
    local module=$1
    local pattern=$2

    if jps -lv | grep -q "$pattern"; then
            echo "[$(blue_echo $LAN_IP)] $module: $(green_echo RUNNING)"
        return 0
    else
            echo "[$(blue_echo $LAN_IP)] $module: $(red_echo EXIT)"
        return 2
    fi
}

ps_status_kafka () {
    ps_status_java kafka "kafka.Kafka .*-Dkafka.logs.dir="
}

ps_status_zk () {
    ps_status_java zk "org.apache.zookeeper.server.quorum.QuorumPeerMain"
}

ps_status_es () {
    ps_status_java es "org.elasticsearch.bootstrap.Elasticsearch .*Des.path.home=.*/es"
}

ps_status_activemq () {
    ps_status activemq "activemq/bin/activemq.jar"
}

ps_status_redis () {
    ps_status redis "redis-server $LAN_IP:$REDIS_PORT"
}

ps_status_redis_cluster () {
    local ret=0

    ps_status redis "redis-server $LAN_IP:$REDIS_PORT" || let ret+=1
    ps_status sentinel "redis-server.*[sentinel]" || let ret+=1

    return $ret
}

ps_status_beanstalk () {
    ps_status beanstalk "beanstalkd -l"
}

ps_status_rabbitmq () {
    ps_status rabbitmq "beam.*/rabbitmq"
}

ps_status_mysql () {
    ps_status mysqld "mysqld .*--basedir=$INSTALL_PATH/service/mysql"
}

_check_status_bygroup () {
    local ret=0

    for m in $@; do
        proc_pattern="${m} .*${m//_//}.conf"
        pids=( $(process_is_running "$proc_pattern") )
        if process_paired ${pids[@]}; then
            status="$(green_echo RUNNING)"
            let ret+=$ret
        elif [ "${#pids[@]}" -ne 0 ]; then
            status="$(red_echo 'ERROR STATUS')"
            let ret+=1
        else
            status="$(red_echo 'EXIT')"
            let ret+=1
        fi
        echo "[$(blue_echo $LAN_IP)] $m: $status"
    done

    return $ret
}

ps_status_gse () {

    local modules_required=(gse_api gse_btsvr gse_data gse_dba gse_task gse_syncdata gse_procmgr gse_dataop)
    local modules_optional=(gse_alarm gse_ops gse_opts)

    _check_status_bygroup ${modules_optional[@]}
    _check_status_bygroup ${modules_required[@]}

    return $?
}

ps_status_consul () {
    local ret

    pidof consul >/dev/null 2>&1
    if [ $? -eq 0 ]; then
        echo "[$(blue_echo $LAN_IP)] consul: $(green_echo RUNNING)"
        ret=0
    else
        echo "[$(blue_echo $LAN_IP)] consul: $(red_echo EXIT)"
        ret=1
    fi

    return $ret
}

ps_status_license () {
    ps_status license "./license_server -config"
}

ps_status_plugin () {
    local ret=0
    local plugin=$1

    workon $plugin || return 1
    if process_is_running $plugin "supervisord .*paas_plugins-$plugin.conf" >/dev/null; then
        supervisorctl -c $INSTALL_PATH/etc/supervisor-paas_plugins-$plugin.conf status \
            | awk -v IP=$LAN_IP -v P=plugin/$plugin '{printf "[%s] %-8s%4s%s\n", IP, P, "", $0}'
    else
        echo "[$(blue_echo $LAN_IP)] $plugin: $(red_echo EXIT)"
        let ret+=1
    fi

    return $ret
}

ps_status_influxdb () {
    local module=influxdb
    local ret=0

    ps_status influxdb "influxd .*-config /etc/influxdb/influxdb.conf" || let ret+=1

    return $ret
}

ps_status_mongodb () {
    ps_status mongod "mongod -f .*$INSTALL_PATH/etc/mongodb.yaml"
}

ps_status_etcd () {
     local ret=0

     ps_status etcd "etcd --name.*initial-advertise"

     return $?
}

ps_status_paas_plugins () {
    local ret=0
    ps_status_plugin log_parser || let ret+=1
    ps_status_plugin log_alert || let ret+=1

    if grep -E '(appo|appt|paas_agent|open_paas|paas)' $INSTALL_PATH/.installed_module; then
        ps_status_plugin log_agent || let ret+=1
    fi

    return $ret
}

status_all () {
    local mod

    for mod  in $(awk '{print $1}' $INSTALL_PATH/.installed_module); do
        [ "$mod" == "paas_plugins" ] && continue
        ps_status_$mod
    done
}

get_status () {
    local module=$1
    local proj=$2

    case $module in
        saas-*) status_saas; return $? ;;
        all)  status_all; return $? ;;
    esac

    ps_status_$module $proj

    return $?
}

status_saas () {
    read env_type ignore < $INSTALL_PATH/.app.env
    _status_saas_$env_type
}

_status_saas_docker () {
    docker ps
}

_status_saas_virtualenv () {
    local venv_path=$INSTALL_PATH/paas_agent/apps/Envs
    local sp_conf app_code

    for app_code in $(ls $venv_path); do
        sp_conf=${venv_path%/*}/projects/$app_code/conf/supervisord.conf
        $venv_path/$app_code/bin/supervisorctl -c $sp_conf status all \
            awk -v P=$app_code '{ printf("%-20s\t%s\n", P, $0) }'
    done
}

ps_status_tsdbproxy () {
    ps_status tsdbproxy "tsdb-proxy -config.*tsdb-proxy.conf"

    return $?
}

ps_status_zabbix_agent () {
    ps_status zabbix_agent "zabbix_agentd -c"
}

ps_status_zabbix () {
    ps_status zabbix_server "zabbix_server -c"
    ps_status zabbix_web "nginx: master"
}

ps_status_gse_agent () {
    ps_status gse_aegnt "gse_agent -f .*agent.conf"
}

ps_status_mongodb () {
    ps_status mongod "mongod -f .*$INSTALL_PATH/etc/mongodb.yaml"
}

_status_paas_plugins () {
    for ip in ${APPO_IP[@]} $APPT_IP; do
        rcmd root@$ip "source ./status.rc; ps_status_plugin log_agent"
    done

    for ip in ${PAAS_IP[@]}; do
        rcmd root@$ip "source ./status.rc; ps_status_plugin log_parser ps_status_plugin log_agent"
    done

    rcmd root@$PAAS_IP "source ./status.rc; ps_status_plugin log_alert"
}

status () {
    local module=$1
    local proj=$2
    local proc=$3

    case $module in
        paas_plugins|plugin) _status_paas_plugins; return $? ;;
        consul) TARGET_IPS=( ${ALL_IP[@]} ) ;;
        all) # 因为 paas_plugin 的进程分布毫无规律, 因此单独处理.
             # 相应的, 下面的 get_status all 中则略过对 paas_plugin 的处理
             # TODO: 待优化
             grep -q paas_plugins $INSTALL_PATH/.installed_sumary && _status_paas_plugins
             TARGET_IPS=( ${ALL_IP[@]} ) ;;
        open_paas) TARGET_IPS=( ${PAAS_IP[@]} ) ;;
        tsdbproxy) TARGET_IPS=( ${INFLUXDB_IP[@]} ) ;;
        gse_agent) TARGET_IPS=( ${ALL_IP[@]} ) ;;
        saas-o) TARGET_IPS=( ${APPO_IP[@]} ) ;;
        saas-t) TARGET_IPS=( ${APPT_IP[@]} ) ;;
        *) TARGET_IPS=( $(map_name2ip $module $proj) )
           ;;
    esac

    for ip in ${TARGET_IPS[@]};do
        rcmd root@$ip "source ./status.rc; export APPENV=$APPENV; get_status ${module%0*} $proj $proc"
    done
}

ps_status_ntp () {
    local module=ntp
    local proj="$1"
    local ret
    local server

    module="$module(client)"
    ## server 主模块
    if [[ "$LAN_IP" == "$NTP_IP0" ]]; then
        server=${NTP_SERVER[0]}
        module="$module(server-master)"
    fi
    ## NTP Server备机模块
    is_string_in_array "$LAN_IP" "${NTP_IP[@]:1}" && module="$module(server-backup)"

    server_time_dist=$( ntpdate -q ${server:-$NTP_IP0} 2>&1 | grep -oP '(?<=: ).*' )
    
    if syscmd_byos is-active ntpd &> /dev/null; then
        status="$(green_echo RUNNING)"
        server_now=$( ntpdc -c sysinfo | grep -oP '(?<=\[).*[^\]]' )
        ret=0
    else
        status="$(red_echo EXIT)"
        server_now="$(red_echo NULL)"
        let ret+=1
    fi

    log "[$(blue_echo $LAN_IP)] $module: $status"
    log "[$(blue_echo $LAN_IP)] offset: $server_time_dist"
    log "[$(blue_echo $LAN_IP)] reference server: $server_now"
    
    return $ret
}
  • 1
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

^白开水^

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值