zabbix自定义监控
自定义监控进程
主机名 | ip | 角色 |
---|---|---|
Server | 192.168.129.250 | 服务端 |
slave and agent | 192.168.129.135 | 客服端与从数据库 |
master | 192.168.129.134 | 主数据库 |
想要监控进程,首先得有进程,在客户端上安装一个httpd并启动
[root@agent ~]# yum -y install httpd
[root@agent ~]# systemctl start httpd
[root@agent ~]# ss -antl
State Recv-Q Send-Q Local Address:Port Peer Address:Port
LISTEN 0 128 0.0.0.0:22 0.0.0.0:*
LISTEN 0 128 0.0.0.0:10050 0.0.0.0:*
LISTEN 0 128 *:80 *:*
LISTEN 0 128 [::]:22 [::]:*
[root@agent ~]# ps -ef |grep httpd
root 67873 1 0 23:30 ? 00:00:00 /usr/sbin/httpd -DFOREGROUND
apache 68669 67873 0 23:30 ? 00:00:00 /usr/sbin/httpd -DFOREGROUND
apache 68670 67873 0 23:30 ? 00:00:00 /usr/sbin/httpd -DFOREGROUND
apache 68671 67873 0 23:30 ? 00:00:00 /usr/sbin/httpd -DFOREGROUND
apache 68672 67873 0 23:30 ? 00:00:00 /usr/sbin/httpd -DFOREGROUND
root 69986 1453 0 23:31 pts/0 00:00:00 grep --color=auto httpd
测试一下网页能否访问(若是不能访问那就有可能是防火墙和selinux没关闭)
新建一个文件夹存放脚本、编写脚本、给脚本文件执行权限
[root@agent ~]# mkdir /scripts
[root@agent ~]# cd /scripts/
[root@agent scripts]# vim check_process.sh
[root@agent scripts]# cat check_process.sh
#!/bin/bash
process_status=$(ps -ef|grep -Ev "grep|$0"|grep -c $1)
if [ $process_status -eq 0 ];then
echo '1'
else
echo '0'
fi
[root@agent scripts]# chmod +x check_process.sh
[root@agent scripts]# ll
总用量 4
-rwxr-xr-x 1 root root 137 10月 8 23:42 check_process.sh
[root@agent scripts]# ./check_process.sh httpd
0 #输出0表示httpd服务正常
//将httpd服务停止
[root@agent scripts]# systemctl stop httpd
[root@agent scripts]# ss -antl
State Recv-Q Send-Q Local Address:Port Peer Address:Port
LISTEN 0 128 0.0.0.0:22 0.0.0.0:*
LISTEN 0 128 0.0.0.0:10050 0.0.0.0:*
LISTEN 0 128 [::]:22 [::]:*
[root@agent scripts]# ./check_process.sh httpd
1 #输出1表示httpd服务不正常
修改客户端的zabbix_agentd.conf配置
//开启自定义监控服务
[root@agent scripts]# vim /usr/local/etc/zabbix_agentd.conf
UnsafeUserParameters=1 #取消这322一行的注释并把0修改为1表示启用
//并在最后面新建一行插入脚本
[root@agent scripts]# vim /usr/local/etc/zabbix_agentd.conf
UserParameter=check_process[*],/scripts/check_process.sh $1
//重启zabbix服务
[root@agent scripts]# pkill zabbix
[root@agent scripts]# zabbix_agentd
[root@agent scripts]# ss -anlt
State Recv-Q Send-Q Local Address:Port Peer Address:Port
LISTEN 0 128 0.0.0.0:22 0.0.0.0:*
LISTEN 0 128 0.0.0.0:10050 0.0.0.0:*
LISTEN 0 128 [::]:22 [::]:*
//服务端测试效果
[root@Server ~]# zabbix_get -s 192.168.129.135 -k check_process['httpd']
1 #不正常
[root@agent scripts]# systemctl start httpd
[root@agent scripts]# ss -anlt
State Recv-Q Send-Q Local Address:Port Peer Address:Port
LISTEN 0 128 0.0.0.0:22 0.0.0.0:*
LISTEN 0 128 0.0.0.0:10050 0.0.0.0:*
LISTEN 0 128 *:80 *:*
LISTEN 0 128 [::]:22 [::]:*
[root@Server ~]# zabbix_get -s 192.168.129.135 -k check_process['httpd']
0 #正常启动
配置监控项
在客户端将httpd服务关闭
[root@agent scripts]# systemctl stop httpd
[root@agent scripts]# systemctl start httpd
添加触发器
在客户端将httpd服务关闭
[root@agent scripts]# systemctl stop httpd
因为之前设置了媒介与邮箱告警,收到了邮件
自定义监控日志文件
编写并查看脚本文件
[root@agent scripts]# vim log.py
[root@agent scripts]# cat log.py
#!/usr/bin/env python3
import sys
import re
def prePos(seekfile):
global curpos
try:
cf = open(seekfile)
except IOError:
curpos = 0
return curpos
except FileNotFoundError:
curpos = 0
return curpos
else:
try:
curpos = int(cf.readline().strip())
except ValueError:
curpos = 0
cf.close()
return curpos
cf.close()
return curpos
def lastPos(filename):
with open(filename) as lfile:
if lfile.readline():
lfile.seek(0,2)
else:
return 0
lastPos = lfile.tell()
return lastPos
def getSeekFile():
try:
seekfile = sys.argv[2]
except IndexError:
seekfile = '/tmp/logseek'
return seekfile
def getKey():
try:
tagKey = str(sys.argv[3])
except IndexError:
tagKey = 'Error'
return tagKey
def getResult(filename,seekfile,tagkey):
destPos = prePos(seekfile)
curPos = lastPos(filename)
if curPos < destPos:
curpos = 0
try:
f = open(filename)
except IOError:
print('Could not open file: %s' % filename)
except FileNotFoundError:
print('Could not open file: %s' % filename)
else:
f.seek(destPos)
while curPos != 0 and f.tell() < curPos:
rresult = f.readline().strip()
global result
if re.search(tagkey, rresult):
result = 1
break
else:
result = 0
with open(seekfile,'w') as sf:
sf.write(str(curPos))
finally:
f.close()
return result
if __name__ == "__main__":
result = 0
curpos = 0
tagkey = getKey()
seekfile = getSeekFile()
result = getResult(sys.argv[1],seekfile,tagkey)
print(result)
给脚本文件添加执行权限
[root@agent scripts]# chmod +x /scripts/log.py
[root@agent scripts]# ll
总用量 8
-rwxr-xr-x 1 root root 137 10月 8 23:42 check_process.sh
-rwxr-xr-x 1 root root 1854 10月 8 23:41 log.py
安装python3
[root@agent scripts]# yum -y install python3
在客户端修改zabbix_agentd.conf配置文件
[root@agent scripts]# vim /usr/local/etc/zabbix_agentd.conf
UserParameter=check_log[*],/scripts/log.py $1 $2 $3 //最后一行添加
[root@agent scripts]# pkill zabbix_agentd
[root@agent scripts]# zabbix_agentd
[root@agent scripts]# ss -anlt
State Recv-Q Send-Q Local Address:Port Peer Address:Port
LISTEN 0 128 0.0.0.0:22 0.0.0.0:*
LISTEN 0 128 0.0.0.0:10050 0.0.0.0:*
LISTEN 0 128 [::]:22 [::]:*
在客户端修改权限便于key访问
[root@agent scripts]# chmod 755 /var/log/httpd
[root@agent scripts]# ll -d /var/log/httpd/
drwxr-xr-x 2 root root 41 10月 8 23:30 /var/log/httpd/
服务端
[root@Server ~]# zabbix_get -s 192.168.129.135 -k check_log[/var/log/httpd/error_log]
0
客户端
//查看属主是否是zabbix
[root@agent scripts]# ll /tmp/logseek
-rw-rw-r-- 1 zabbix zabbix 4 10月 9 01:10 /tmp/logseek
[root@agent scripts]# echo 'Failed' >> /var/log/httpd/error_log
服务端
[root@Server ~]# zabbix_get -s 192.168.129.135 -k check_log[/var/log/httpd/error_log,/tmp/logseek,Failed]
1
添加监控项
添加触发器
测试验证
[root@agent scripts]# echo 'Error' >> /var/log/httpd/error_log
自定义监控mysql主从状态
mysql主从详情配置请点击下方超链接
主
[root@master ~]# mysql -uroot -p
Enter password:
Welcome to the MySQL monitor. Commands end with ; or \g.
Your MySQL connection id is 3
Server version: 5.7.34 MySQL Community Server (GPL)
Copyright (c) 2000, 2021, Oracle and/or its affiliates.
Oracle is a registered trademark of Oracle Corporation and/or its
affiliates. Other names may be trademarks of their respective
owners.
Type 'help;' or '\h' for help. Type '\c' to clear the current input statement.
mysql> set password = password('123456');
Query OK, 0 rows affected, 1 warning (0.01 sec)
mysql> grant replication slave on *.* to 'hhr'@'192.168.129.135' identified by 'hhr123!';
Query OK, 0 rows affected, 1 warning (0.00 sec)
mysql> flush privileges;
Query OK, 0 rows affected (0.00 sec)
[root@master ~]# vim /etc/my.cnf
[root@master ~]# cat /etc/my.cnf
[mysqld]
basedir = /usr/local/mysql
datadir = /opt/data
socket = /tmp/mysql.sock
port = 3306
pid-file = /opt/data/mysql.pid
skip-name-resolve
log-bin=mysql_bin #启用binlog日志
server-id=10 #数据库服务器唯一标识符,主库的server-id值必须比从库的小
[root@master ~]# service mysqld restart
Shutting down MySQL.. SUCCESS!
Starting MySQL. SUCCESS!
//主库查看
mysql> show master status;
+------------------+----------+--------------+------------------+
| File | Position | Binlog_Do_DB | Binlog_Ignore_DB |
+------------------+----------+--------------+------------------+
| mysql_bin.000004 | 342 | | |
+------------------+----------+--------------+------------------+
1 row in set (0.000 sec)
从
[root@slave ~]# vim /etc/my.cnf
[root@slave ~]# cat /etc/my.cnf
[mysqld]
basedir = /usr/local/mysql
datadir = /opt/data
socket = /tmp/mysql.sock
port = 3306
pid-file = /opt/data/mysql.pid
skip-name-resolve
server-id=20 #设置从库的唯一标识符,从库的server-id值必须小于主库的该值
relay-log=mysql-relay #启用中继日志relay-log
[root@slave ~]# service mysqld restart
Shutting down MySQL.. SUCCESS!
Starting MySQL. SUCCESS!
//从库查看slave状态
mysql> show slave status\G
*************************** 1. row ***************************
Slave_IO_State: Waiting for master to send event
Master_Host: 192.168.129.134
Master_User: hhr
Master_Port: 3306
Connect_Retry: 60
Master_Log_File: mysql_bin.000004
Read_Master_Log_Pos: 342
Relay_Log_File: mysql-relay.000002
Relay_Log_Pos: 555
Relay_Master_Log_File: mysql_bin.000004
Slave_IO_Running: Yes
Slave_SQL_Running: Yes
客户端
[root@slave scripts]# vim /scripts/check_mysql_replistatus.sh
[root@slave scripts]# cat /scripts/check_mysql_replistatus.sh
#!/bin/bash
status=$(mysql -uzabbix -p'zabbix123!' -e 'show slave status\G' | grep 'Running: Yes' |awk '{print $2}' | grep -c 'Yes')
if [ $status -eq 2 ];then
echo '0'
else
echo '1'
fi
[root@slave scripts]# chmod +x check_mysql_replistatus.sh
[root@slave scripts]# ./check_mysql_replistatus.sh
0
[root@slave scripts]# vim /usr/local/etc/zabbix_agentd.conf
UserParameter=check_mysql_replication,/scripts/check_mysql_replistatus.sh
[root@slave scripts]# pkill zabbix
[root@slave scripts]# zabbix_agentd
服务端
[root@Server ~]# zabbix_get -s 192.168.129.135 -k check_mysql_replication
ERROR 1045 (28000): Access denied for user 'zabbix'@'localhost' (using password: NO) #需要授权一个zabbix用户
1
[root@Server ~]# zabbix_get -s 192.168.129.135 -k check_mysql_replication
0
如出现上述ERROR情况以下是解决方案
//从(slave端)
mysql> grant select on *.* to 'zabbix'@'localhost' identified by 'zabbix123!';
Query OK, 0 rows affected (0.004 sec)
mysql> flush privileges;
Query OK, 0 rows affected (0.002 sec)
mysql> grant super,replication client on *.* to 'zabbix'@'localhost' identified by 'zabbix123!';
Query OK, 0 rows affected (0.001 sec)
mysql> flush privileges;
Query OK, 0 rows affected (0.000 sec)
添加监控项
设置告警声音
测试验证
//从库中停止slave
mysql> stop slave;
Query OK, 0 rows affected (0.00 sec)
自定义监控mysql主从延迟
[root@slave ~]# cd /scripts/
[root@slave scripts]# vim /scripts/check_mysql_delay.sh
#!/bin/bash
delay=$(mysql -uzabbix -p'zabbix123!' -e 'show slave status\G' | grep "Seconds_Behind_Master" |awk '{print $2}')
if [ $delay != NULL ];then
echo $delay
else
echo '0'
fi
[root@slave scripts]# chmod +x /scripts/check_mysql_delay.sh
[root@slave scripts]# ./check_mysql_delay.sh
0
//配置文件
[root@slave scripts]# vim /usr/local/etc/zabbix_agentd.conf
UserParameter=check_mysql_delay,/scripts/check_mysql_delay.sh #最后一行添加
[root@slave scripts]# pkill zabbix_agent
[root@slave scripts]# zabbix_agentd
//服务端
[root@Server ~]# zabbix_get -s 192.168.129.135 -k check_mysql_delay
0
添加监控项
添加触发器
测试使用
正确使用方法