需求分析
ansible作为一款运维工具极大的方便的运维人员的工作,执行结果默认是输出到cli终端和日志中的,但在对大批量主机执行playbook时,难免会有报错,这时错误信息就会被淹没掉,以至于不能及时发现;除此之外,有时我们会有处理执行结果的需求。
callback插件
ansible官方提供了一个callback插件,可以对执行结果做自定义处理,类似于回调,但是默认不做任何处理;可以在ansible.cfg
(ansible的配置文件)中callback_plugins = 'xxx'
处进行配置,其中xxx
为文件夹。
实现方法
在callback插件中定义了若干场景,如主机不可达,执行任务失败,实行任务成功等,分别对应不用的方法,如:
class CallbackModule(CallbackBase):
"""
logs playbook results, per host, in /var/log/ansible/hosts
"""
def on_any(self, *args, **kwargs):
pass
def runner_on_failed(self, host, res, ignore_errors=False):
log(host, 'FAILED', res)
def runner_on_ok(self, host, res):
log(host, 'OK', res)
def runner_on_skipped(self, host, item=None):
log(host, 'SKIPPED', '...')
def runner_on_unreachable(self, host, res):
log(host, 'UNREACHABLE', res)
def runner_on_no_hosts(self):
pass
def runner_on_async_poll(self, host, res, jid, clock):
pass
def runner_on_async_ok(self, host, res, jid):
pass
def runner_on_async_failed(self, host, res, jid):
log(host, 'ASYNC_FAILED', res)
def playbook_on_start(self):
pass
def playbook_on_notify(self, host, handler):
pass
def playbook_on_no_hosts_matched(self):
pass
def playbook_on_no_hosts_remaining(self):
pass
def playbook_on_task_start(self, name, is_conditional):
pass
def playbook_on_vars_prompt(self, varname, private=True, prompt=None, encrypt=None, confirm=False, salt_size=None, salt=None, default=None):
pass
def playbook_on_setup(self):
pass
def playbook_on_import_for_host(self, host, imported_file):
log(host, 'IMPORTED', imported_file)
def playbook_on_not_import_for_host(self, host, missing_file):
log(host, 'NOTIMPORTED', missing_file)
def playbook_on_play_start(self, name):
pass
def playbook_on_stats(self, stats):
pass
可以从方法名基本判断每个函数的作用,当我们需要自定义结果处理时,继承CallbackBase
类,并重写要用到的方法即可
实例演示
在本案例中,ansible作为控制所有主机的枢纽,又做了多项自定义工作,所以相关文件都将部署在一个独立项目工作空间中,并使用钉钉的webhook作为报警通知
建立项目工作空间
创建目录mkdir /opt/ansible_admin
安装python3(这里使用python3的ansible组件)
yum -y install epel-release # 若已安装epel源可忽略此步骤
yum -y install python3
mkdir -p ~/.pip # 使用国内pip源加速
cat > ~/.pip/pip.conf <
创建并激活隔离环境隔离环境
python3 -m venv /opt/ansible_admin/.venv
source /opt/ansible_admin/.venv/bin/activate
安装ansiblepip install ansible
自定义ansible配置
在工作空间(/opt/ansible_admin/
)创建文件ansible.cfg
cat > /opt/ansible_admin/ansible.cfg <
查看ansible配置信息
(.venv) [root@localhost ansible_admin]# ansible --version
ansible 2.9.12
config file = /opt/ansible_admin/ansible.cfg
configured module search path = ['/root/.ansible/plugins/modules', '/usr/share/ansible/plugins/modules']
ansible python module location = /opt/ansible_admin/.venv/lib64/python3.6/site-packages/ansible
executable location = /opt/ansible_admin/.venv/bin/ansible
python version = 3.6.8 (default, Apr 2 2020, 13:34:55) [GCC 4.8.5 20150623 (Red Hat 4.8.5-39)]
(.venv) [root@localhost ansible_admin]#
可以看到ansible的版本,使用的python版本,以及使用的配置文件(config file)
配置文件中指定callback为/opt/ansible_admin/callback
自定义回调程序
在/opt/ansible_admin/callback/
中新建文件result_check.py
,文件名随意,但是需与文件中CALLBACK_NAME
的值相同
#!/usr/bin/env python
# coding=utf-8
import os
import socket
import traceback
import requests
from ansible.plugins.callback import CallbackBase
dingding_url = 'xxx' # 自定义webhook地址
def send_dingding(alert):
if alert:
content = os.linesep.join(alert)
if len(content) > 4000:
content = content[:4000]
content += '\nhostname:{}'.format(socket.gethostname())
data = {
"msgtype": "text",
"text": {
"content": content
}
}
r = requests.post(dingding_url, json=data)
return (r.status_code, r.content)
class CallbackModule(CallbackBase):
CALLBACK_VERSION = 2.0
CALLBACK_TYPE = 'notification'
CALLBACK_NAME = 'result_check'
CALLBACK_NEEDS_WHITELIST = True
def __init__(self):
self.results = []
self.alert = []
def v2_runner_on_failed(self, result, ignore_errors=False):
host = result._host.get_name()
self.runner_on_failed(host, result._result, ignore_errors)
self.results.append(result)
def v2_runner_item_on_failed(self, result):
self.results.append(result)
def v2_playbook_on_stats(self, stats):
try:
for result in self.results:
if 'exception' in result._result:
alert_line = "hostname:{} task_name:{} exception:{}\r\n ".format(result._host.get_name(), result.task_name, result._result['exception'].encode('utf-8'))
print('exception:', alert_line)
self.alert.append(alert_line)
# 跳过忽略错误的情况
if result._task_fields['ignore_errors']:
continue
if 'stdout_lines' in result._result:
for stdout_line in result._result['stdout_lines']:
alert_line = "hostname:{} task_name:{} msg:{} stdout:{}\r\n ".format(result._host.get_name(), result.task_name, result._result['msg'].encode('utf-8'), stdout_line.encode('utf-8'))
print('msg_std:', alert_line)
self.alert.append(alert_line)
if 'stderr_lines' in result._result:
for stderr_line in result._result['stderr_lines']:
alert_line = "hostname:{} task_name:{} msg:{} stderr:{}\r\n ".format(result._host.get_name(), result.task_name, result._result['msg'].encode('utf-8'), stderr_line.encode('utf-8'))
print('msg_err:', alert_line)
self.alert.append(alert_line)
else:
alert_line = "hostname:{} task_name:{} msg:{}\r\n ".format(result._host.get_name(), result.task_name, result._result['msg'].encode('utf-8'))
self.alert.append(alert_line)
except:
self.alert.append(traceback.format_exc())
if self.alert:
send_dingding(self.alert)
这里仅对部分方法做了重写,至此,若执行playbook有报错,将发送到公司的钉钉报警群中