1、页面中导入zabbix模版
- 结尾是 xml 文件;
<?xml version="1.0" encoding="UTF-8"?>
<zabbix_export>
<version>3.2</version>
<date>2018-06-04T04:12:36Z</date>
<groups>
<group>
<name>Templates</name>
</group>
</groups>
<templates>
<template>
<template>docker-status</template>
<name>docker-status</name>
<description/>
<groups>
<group>
<name>Templates</name>
</group>
</groups>
<applications>
<application>
<name>docker_test</name>
</application>
</applications>
<items/>
<discovery_rules>
<discovery_rule>
<name>docker.discovery</name>
<type>0</type>
<snmp_community/>
<snmp_oid/>
<key>docker.discovery</key>
<delay>60</delay>
<status>0</status>
<allowed_hosts/>
<snmpv3_contextname/>
<snmpv3_securityname/>
<snmpv3_securitylevel>0</snmpv3_securitylevel>
<snmpv3_authprotocol>0</snmpv3_authprotocol>
<snmpv3_authpassphrase/>
<snmpv3_privprotocol>0</snmpv3_privprotocol>
<snmpv3_privpassphrase/>
<delay_flex/>
<params/>
<ipmi_sensor/>
<authtype>0</authtype>
<username/>
<password/>
<publickey/>
<privatekey/>
<port/>
<filter>
<evaltype>0</evaltype>
<formula/>
<conditions>
<condition>
<macro>{#CONTAINERNAME}</macro>
<value>@ CONTAINER NAME</value>
<operator>8</operator>
<formulaid>A</formulaid>
</condition>
</conditions>
</filter>
<lifetime>30</lifetime>
<description/>
<item_prototypes>
<item_prototype>
<name>Container {#CONTAINERNAME} Diskio usage:</name>
<type>0</type>
<snmp_community/>
<multiplier>0</multiplier>
<snmp_oid/>
<key>docker.[{#CONTAINERNAME} ,BlockIO]</key>
<delay>60</delay>
<history>90</history>
<trends>0</trends>
<status>0</status>
<value_type>1</value_type>
<allowed_hosts/>
<units/>
<delta>0</delta>
<snmpv3_contextname/>
<snmpv3_securityname/>
<snmpv3_securitylevel>0</snmpv3_securitylevel>
<snmpv3_authprotocol>0</snmpv3_authprotocol>
<snmpv3_authpassphrase/>
<snmpv3_privprotocol>0</snmpv3_privprotocol>
<snmpv3_privpassphrase/>
<formula>1</formula>
<delay_flex/>
<params/>
<ipmi_sensor/>
<data_type>0</data_type>
<authtype>0</authtype>
<username/>
<password/>
<publickey/>
<privatekey/>
<port/>
<description/>
<inventory_link>0</inventory_link>
<applications>
<application>
<name>docker_test</name>
</application>
</applications>
<valuemap/>
<logtimefmt/>
<application_prototypes/>
</item_prototype>
<item_prototype>
<name>Container{#CONTAINERNAME} CPU usage:</name>
<type>0</type>
<snmp_community/>
<multiplier>0</multiplier>
<snmp_oid/>
<key>docker.[{#CONTAINERNAME},CPUPerc]</key>
<delay>60</delay>
<history>90</history>
<trends>365</trends>
<status>0</status>
<value_type>0</value_type>
<allowed_hosts/>
<units>%</units>
<delta>0</delta>
<snmpv3_contextname/>
<snmpv3_securityname/>
<snmpv3_securitylevel>0</snmpv3_securitylevel>
<snmpv3_authprotocol>0</snmpv3_authprotocol>
<snmpv3_authpassphrase/>
<snmpv3_privprotocol>0</snmpv3_privprotocol>
<snmpv3_privpassphrase/>
<formula>1</formula>
<delay_flex/>
<params/>
<ipmi_sensor/>
<data_type>0</data_type>
<authtype>0</authtype>
<username/>
<password/>
<publickey/>
<privatekey/>
<port/>
<description/>
<inventory_link>0</inventory_link>
<applications>
<application>
<name>docker_test</name>
</application>
</applications>
<valuemap/>
<logtimefmt/>
<application_prototypes/>
</item_prototype>
<item_prototype>
<name>Container {#CONTAINERNAME} mem usage:</name>
<type>0</type>
<snmp_community/>
<multiplier>0</multiplier>
<snmp_oid/>
<key>docker.[{#CONTAINERNAME},MemPerc]</key>
<delay>60</delay>
<history>90</history>
<trends>365</trends>
<status>0</status>
<value_type>0</value_type>
<allowed_hosts/>
<units>%</units>
<delta>0</delta>
<snmpv3_contextname/>
<snmpv3_securityname/>
<snmpv3_securitylevel>0</snmpv3_securitylevel>
<snmpv3_authprotocol>0</snmpv3_authprotocol>
<snmpv3_authpassphrase/>
<snmpv3_privprotocol>0</snmpv3_privprotocol>
<snmpv3_privpassphrase/>
<formula>1</formula>
<delay_flex/>
<params/>
<ipmi_sensor/>
<data_type>0</data_type>
<authtype>0</authtype>
<username/>
<password/>
<publickey/>
<privatekey/>
<port/>
<description/>
<inventory_link>0</inventory_link>
<applications>
<application>
<name>docker_test</name>
</application>
</applications>
<valuemap/>
<logtimefmt/>
<application_prototypes/>
</item_prototype>
<item_prototype>
<name>Container {#CONTAINERNAME} NETio usage:</name>
<type>0</type>
<snmp_community/>
<multiplier>0</multiplier>
<snmp_oid/>
<key>docker.[{#CONTAINERNAME},NetIO]</key>
<delay>60</delay>
<history>90</history>
<trends>0</trends>
<status>0</status>
<value_type>1</value_type>
<allowed_hosts/>
<units/>
<delta>0</delta>
<snmpv3_contextname/>
<snmpv3_securityname/>
<snmpv3_securitylevel>0</snmpv3_securitylevel>
<snmpv3_authprotocol>0</snmpv3_authprotocol>
<snmpv3_authpassphrase/>
<snmpv3_privprotocol>0</snmpv3_privprotocol>
<snmpv3_privpassphrase/>
<formula>1</formula>
<delay_flex/>
<params/>
<ipmi_sensor/>
<data_type>0</data_type>
<authtype>0</authtype>
<username/>
<password/>
<publickey/>
<privatekey/>
<port/>
<description/>
<inventory_link>0</inventory_link>
<applications>
<application>
<name>docker_test</name>
</application>
</applications>
<valuemap/>
<logtimefmt/>
<application_prototypes/>
</item_prototype>
<item_prototype>
<name>Container{#CONTAINERNAME} is_run :</name>
<type>0</type>
<snmp_community/>
<multiplier>0</multiplier>
<snmp_oid/>
<key>docker.[{#CONTAINERNAME} ,ping]</key>
<delay>30</delay>
<history>90</history>
<trends>365</trends>
<status>0</status>
<value_type>3</value_type>
<allowed_hosts/>
<units/>
<delta>0</delta>
<snmpv3_contextname/>
<snmpv3_securityname/>
<snmpv3_securitylevel>0</snmpv3_securitylevel>
<snmpv3_authprotocol>0</snmpv3_authprotocol>
<snmpv3_authpassphrase/>
<snmpv3_privprotocol>0</snmpv3_privprotocol>
<snmpv3_privpassphrase/>
<formula>1</formula>
<delay_flex/>
<params/>
<ipmi_sensor/>
<data_type>0</data_type>
<authtype>0</authtype>
<username/>
<password/>
<publickey/>
<privatekey/>
<port/>
<description/>
<inventory_link>0</inventory_link>
<applications>
<application>
<name>docker_test</name>
</application>
</applications>
<valuemap/>
<logtimefmt/>
<application_prototypes/>
</item_prototype>
</item_prototypes>
<trigger_prototypes>
<trigger_prototype>
<expression>{docker-status:docker.[{#CONTAINERNAME} ,ping].last()}=0</expression>
<recovery_mode>0</recovery_mode>
<recovery_expression/>
<name>docker_{#CONTAINERNAME}_down</name>
<correlation_mode>0</correlation_mode>
<correlation_tag/>
<url/>
<status>0</status>
<priority>5</priority>
<description/>
<type>0</type>
<manual_close>0</manual_close>
<dependencies/>
<tags/>
</trigger_prototype>
</trigger_prototypes>
<graph_prototypes/>
<host_prototypes/>
</discovery_rule>
</discovery_rules>
<httptests/>
<macros/>
<templates/>
<screens/>
</template>
</templates>
</zabbix_export>
- 将这个文件通过 zabbix 页面导入;
2、服务器中存放监控脚本
- [root@cest-3 ~]# cat /etc/zabbix/zabbix_agentd.d/docker_alarm.py
#!/usr/bin/python
import sys
import os
import json
def discover():
d = {}
d['data'] = []
with os.popen("docker ps -a --format {{.Names}}") as pipe:
for line in pipe:
info = {}
info['{#CONTAINERNAME}'] = line.replace("\n","")
d['data'].append(info)
print json.dumps(d)
def status(name,action):
if action == "ping":
cmd = 'docker inspect --format="{{.State.Running}}" %s' %name
result = os.popen(cmd).read().replace("\n","")
if result == "true":
print 1
else:
print 0
else:
cmd = 'docker stats %s --no-stream --format "{{.%s}}"' % (name,action)
result = os.popen(cmd).read().replace("\n","")
if "%" in result:
print float(result.replace("%",""))
else:
print result
if __name__ == '__main__':
try:
name, action = sys.argv[1], sys.argv[2]
status(name,action)
except IndexError:
discover()
- docker 占位符说明:
.Container 根据用户指定的名称显示容器的名称或 ID。
.Name 容器名称。
.ID 容器 ID。
.CPUPerc CPU 使用率。
.MemUsage 内存使用量。
.NetIO 网络 I/O。
.BlockIO 磁盘 I/O。
.MemPerc 内存使用率。
.PIDs PID 号。
3、服务器中配置运行的监控脚本
- [root@cest-3 ~]# vim /etc/zabbix/zabbix_agentd.d/docker.conf
UserParameter=docker.discovery,/etc/zabbix/zabbix_agentd.d/docker_alarm.py
UserParameter=docker.[*],/etc/zabbix/zabbix_agentd.d/docker_alarm.py $1 $2
UserParameter=process.stats,ps -aux | grep $(cat /var/run/docker.pid) | grep -v grep | wc -l
4、常见问题
报错信息:
Got permission denied while trying to connect to the Docker daemon socket at unix:///var/run/docker.sock: Post http://%2Fvar%2Frun%2Fdocker.sock/v1.26/build?
buildargs=%7B%7D&buildbinds=null&cachefrom=%5B%5D&cgroupparent=&cpuperiod=0&cpuquota=0&cpusetcpus=&cpusetmems=&cpushares=0&dockerfile=Dockerfile&labels=%7B%7
D&memory=0&memswap=0&networkmode=default&rm=1&shmsize=0&t=192.168.1.202%2Flibrary%2Ficp-service-interface%3Av2&ulimits=null:dial unix /var/run/docker.sock: connect: permission denied
解决办法:
- 修改 /etc/zabbix/zabbix_agentd.conf 文件中的 EnableRemoteCommands 参数改为 1;
- 将 zabbix 用户添加到 docker 组中,并更新组信息;
gpasswd -a zabbix docker
newgrp docker