Monit是一款功能非常丰富的进程、文件、目录和设备的监测软件,用于Unix平台。它可以自动修复那些已经停止运作的程序,特使适合处理那些由于多种原因导致的软件错误。
官网:https://mmonit.com/monit
文档:https://mmonit.com/monit/documentation/monit.html
注意:Monit是一个开源工具,但M/Monit是收费的。
一、安装与配置
参考链接:
https://blog.csdn.net/arnold_wang/article/details/39855331
https://blog.csdn.net/qin_weilong/article/details/90639769
安装
yum install monit
配置
配置文件:/etc/monitrc
vim /etc/monitrc
#监控间隔
set daemon 30 # check services at 30 seconds intervals
#配置日志文件
set log syslog
set logfile /var/log/monit.log
#设置pid文件
set pidfile /var/run/monit.pid
#配置邮箱提醒
#其中用户名不用添加@xx.xxx
#password可以使用授权码
set mailserver smtp.163.com USERNAME "username" PASSWORD "password"
set mail-format {
from: username@xx.xxx
subject: $SERVICE $EVENT at $DATE on $HOST
message: Monit $ACTION $SERVICE $EVENT at $DATE on $HOST : $DESCRIPTION.
Yours sincerely, Monit
}
set alert toemail@xx.xxx
#配置端口、访问IP地址和用户名密码
set httpd port 2812 and
use address 192.168.20.24 # only accept connection from localhost (drop if you use M/Monit)
allow 0.0.0.0/0.0.0.0 # allow localhost to connect to the server and
allow admin:admin # require user 'admin' with password 'monit'
# with ssl { # enable SSL/TLS and set path to server certificate
# pemfile: /etc/ssl/certs/monit.pem
# }
#监控主机状态
check system localhost
if loadavg (1min) > 4 then alert
if loadavg (5min) > 2 then alert
if cpu usage(user) > 95% for 10 cycles then alert
if memory usage > 80% then alert
if swap usage > 25% then alert
#监控进程
check process business MATCHING busniess.jar
start program = "/project/introduce/business start 1>nohup.out 2>&1 &"
stop program = "/project/introduce/business stop 1>nohup.out 2>&1 &"
restart program = "/project/introduce/business restart 1>nohup.out 2>&1 &"
if failed port 8088 type tcp then alert
if failed port 8088 type tcp then restart
if cpu > 60% for 2 cycles then alert
if cpu > 80% for 5 cycles then restart
#监控服务
check process mysql with pidfile /var/run/mysqld/mysqld.pid
group database
start program = "/etc/init.d/mysqld start"
stop program = "/etc/init.d/mysqld stop"
if failed host 127.0.0.1 port 3306 then restart
if 5 restarts within 5 cycles then timeout
#监控文件
check file httpd.conf with path /usr/local/apache/conf/httpd.conf
if does not exist for 5 cycles then alert #是否存在
if changed size for 1 cycles then alert #改变大小
if changed sha1 checksum then alter #改变sha1
#时间戳是指文件属性里的创建、修改、访问的时间
if changed timestamp alter #改变形式
if timestamp > 1 minute then alert #常量模式
#权限模块
if failed permission 0555 then unmonitor #权限
if failed uid root then unmonitor #uid
if failed gid root then unmonitor #gid
#文件系统
check filesystem rootfs with path /
if space usage > 80% then alert
if changed fsflags then exec "/my/script" #标签
alert root@localhost
if changed pid then exec "/my/script" #pid
配置完成后使用命令:
#更新配置文件,否则会报错
monit reload
#xxx为自定义进程名称
monit start xxx
也可以在页面中启动检测服务:
浏览器中打开地址:http://ip:port
常用命令:
#查看版本
monit -V
#启动monit
monit -c /etc/monitrc
#停止monit
monit quit
#更新配置文件
monit reload
#输出monit监控服务的状态
monit status
#启动单个监控服务
monit start NAME
#停止单个监控服务
monit stop NAME
#启动所有监控服务
monit start all
#停止所有监控服务
monit stop all
常见错误
there is no service named xxx
配置文件没有更新,运行命令 monit reload
后重试