nagios主备切换,本地邮件告警

搭建nagios
准备:将主机关闭,克隆出另一台虚拟机出来,启动克隆虚拟机修改IP,之后启动主机

IPhostname备注
192.168.174.128localhost主机
192.168.174.130node2备用机

文件关联

配置文件作用
nrpe.cfgnrpe的主配置文件
objects/command.cfgnagios监控命令的定义文件
objects/localhost.cfg定义监控的主机,组,服务,此处引用的模板来源于objects/templates.cfg,监控命令来源于objects/command
objects/templates.cfg定义主机和服务的模板文件
nagios.cfgnagios的主配置文件

先将上一篇博客中监控的端口服务注释掉(主备都要)
[root@localhost ~]# cat /usr/local/nagios/etc/objects/services.cfg

#define service{
#        use                     generic-service
#        host_name               node1
#        service_description     nginx
#        check_command           check_nrpe!check_nginx
#        }
#define service{
#        use                     generic-service
#        host_name               node1
#        service_description     mysql
#        check_command           check_nrpe!check_mysql
#        }
#define service{
#        use                     generic-service
#        host_name               node1
#        service_description     httpd
#        check_command           check_nrpe!check_httpd
#        }

编辑环境变量,为了方便后面操作(主备都需要执行)

[root@localhost etc]# vi /etc/profile
export nagios=/usr/local/nagios/libexec			//添加以下
export PATH=$PATH:$nagios						两行
[root@localhost etc]# source /etc/profile		//生效配置

开放监控(主从都要做)

[root@localhost etc]# vi /usr/local/nagios/etc/nrpe.cfg
allowed_hosts=127.0.0.1,::1,192.168.174.130		//加上对方ip,修改完后重启nrpe
[root@localhost etc]pkill nrpe
[root@localhost etc]/usr/local/nagios/bin/nrpe -d -c /usr/local/nagios/etc/nrpe.cfg 
[root@localhost etc]# check_nrpe -H 192.168.174.130		//测试能否连接
NRPE v3.2.1

添加命令(主机)

[root@localhost etc]# vi /usr/local/nagios/etc/nrpe.cfg
command[check_nagios]=/usr/local/nagios/libexec/check_nagios -e 5 -F /usr/local/nagios/var/status.dat -C /usr/local/nagios/bin/nagios	//添加命令
[root@localhost etc]pkill nrpe		重启nrpe
[root@localhost etc]/usr/local/nagios/bin/nrpe -d -c /usr/local/nagios/etc/nrpe.cfg 
[root@node2 etc]# check_nrpe -H 192.168.174.128 -c check_nagios		//在客户端测试
NAGIOS OK: 6 processes, status log updated 5 seconds ago

备用机配置
复制相关文件并修改

[root@node2 etc]# cd /usr/local/src/nagioscore-nagios-4.4.3/contrib/eventhandlers/
[root@node2 eventhandlers]cp enable_notifications /usr/local/nagios/libexec/eventhandlers/
[root@node2 eventhandlers]cp disable_notifications /usr/local/nagios/libexec/eventhandlers/
[root@node2 eventhandlers]cp redundancy-scenario1/handle-master-host-event /usr/local/nagios/libexec/eventhandlers/
[root@node2 eventhandlers]cp redundancy-scenario1/handle-master-proc-event /usr/local/nagios/libexec/eventhandlers/
[root@node2 eventhandlers]sed -i 's/active_service_checks/notifications/g' /usr/local/nagios/libexec/eventhandlers/handle-master-proc-event

配置添加命令 command

 [root@node2 eventhandlers]# cd /usr/local/nagios/etc/objects/
 
 [root@node2 objects]#vi /usr/local/nagios/etc/objects/commands.cfg

define command {
 command_name handle-master-host-event
 command_line $USER1$/eventhandlers/handle-master-host-event $HOSTSTATE$ $HOSTSTATETYPE$ $HOSTATTEMPT$
}

define command {
 command_name handle-master-proc-event
 command_line $USER1$/eventhandlers/handle-master-proc-event $SERVICESTATE$ $SERVICESTATETYPE$ $SERVICEATTEMPT$
}

define command {
 command_name check_nrpe
 command_line $USER1$/check_nrpe -H $HOSTADDRESS$ -c $ARG1$
}

修改自己配置的主机文件

[root@node2 objects]# vi /usr/local/nagios/etc/objects/hosts.cfg

define host {
        use                             critical-host
        host_name                       nagiosMaster
        alias                           nagios master
        address                         192.168.174.128
        event_handler                   handle-master-host-event
}

define service {
        use                              critical-service
        host_name                        nagiosMaster
        service_description              NAGIOS
        check_command                    check_nrpe!check_nagios
        event_handler                    handle-master-proc-event
}

修改模板文件

[root@node2 objects]# vi /usr/local/nagios/etc/objects/templates.cfg

define host{
        name                            critical-host
        use                             generic-host
        check_period                    24x7
        check_interval                  5
        retry_interval                  1
        max_check_attempts              10
        check_command                   check-host-alive
        notification_period             workhours
        notification_interval           120
        notification_options            d,u,r
        contact_groups                  admins
        register                        0
        }

define service{
        name                            critical-service
        active_checks_enabled           1
        passive_checks_enabled          1                               
        parallelize_check               1
        obsess_over_service             1
        check_freshness                 0
        notifications_enabled           1
        event_handler_enabled           1
        flap_detection_enabled          1
        failure_prediction_enabled      1
        process_perf_data               1
        retain_status_information       1
        retain_nonstatus_information    1
        is_volatile                     0
        check_period                    24x7
        max_check_attempts              1
        normal_check_interval           1
        retry_check_interval            1
        contact_groups                  admins
        notification_options            w,u,c,r
        notification_interval           60
        notification_period             24x7
         register                        0
        }

修改主配置文件

[root@node2 objects]# vi /usr/local/nagios/etc/nagios.cfg
# Values: 1 = enable notifications, 0 = disable notifications
enable_notifications=0
#改成不发告警
use_retained_program_state=0
#状态保持改成0,否则Nagios在启动和重启时将忽略notifications的设置,并采用最近的一个设置(比如你已经切换过一次发告警的状态)

配置邮件告警

[root@node2 objects]# yum install -y mailx		//安装mail
[root@node2 objects]# vi /usr/local/nagios/etc/objects/commands.cfg		//修改邮件命令
define command {
    command_name    notify-host-by-email
    command_line    /usr/bin/printf "%b" "***** Nagios *****\n\nNotification Type: $NOTIFICATIONTYPE$\nHost: $HOSTNAME$\nState: $HOSTSTATE$\nAddress: $HOSTADDRESS$\nInfo: $HOSTOUTPUT$\n\nDate/Time: $LONGDATETIME$\n" | /bin/mail -s "** $NOTIFICATIONTYPE$ Host Alert: $HOSTNAME$ is $HOSTSTATE$ **" $CONTACTEMAIL$
}
define command {

    command_name    notify-service-by-email
    command_line    /usr/bin/printf "%b" "***** Nagios *****\n\nNotification Type: $NOTIFICATIONTYPE$\n\nService: $SERVICEDESC$\nHost: $HOSTALIAS$\nAddress: $HOSTADDRESS$\nState: $SERVICESTATE$\n\nDate/Time: $LONGDATETIME$\n\nAdditional Info:\n\n$SERVICEOUTPUT$\n" | /bin/mail -s "** $NOTIFICATIONTYPE$ Service Alert: $HOSTALIAS$/$SERVICEDESC$ is $SERVICESTATE$ **" $CONTACTEMAIL$
}

最后重启备用机nagios

[root@node2 objects]# nagios -v /usr/local/nagios/etc/nagios.cfg 		//检查配置文件
[root@node2 objects]# systemctl restart nagios 

测试
将主机nagios关闭查看从机日志

[root@node2 objects]# tail -f 20 /var/log/messages
Mar 25 13:50:41 node2 nagios: SERVICE ALERT: 192.168.174.128;NAGIOS;CRITICAL;HARD;1;NAGIOS CRITICAL: Cannot open status log for reading!
Mar 25 13:50:41 node2 nagios: SERVICE EVENT HANDLER: 192.168.174.128;NAGIOS;CRITICAL;HARD;1;handle-master-proc-event
Mar 25 13:50:41 node2 nagios: EXTERNAL COMMAND: ENABLE_NOTIFICATIONS;1585158641
...
Mar 25 16:33:00 node2 nagios: SERVICE NOTIFICATION: nagiosadmin;localhost;Total Processes;WARNING;notify-service-by-email;PROCS WARNING: 303 processes with STATE = RSZDT

查看邮件

[root@node2 objects]# cat /var/spool/mail/nagios 
From nagios@node2.localdomain  Wed Mar 25 16:33:00 2020
Return-Path: <nagios@node2.localdomain>
X-Original-To: nagios@localhost
Delivered-To: nagios@localhost.localdomain
Received: by node2.localdomain (Postfix, from userid 1000)
        id 5496840E9409; Wed, 25 Mar 2020 16:33:00 -0400 (EDT)
Date: Wed, 25 Mar 2020 16:33:00 -0400
To: nagios@localhost.localdomain
Subject: ** PROBLEM Service Alert: localhost/Total Processes is WARNING **
User-Agent: Heirloom mailx 12.5 7/5/10
MIME-Version: 1.0
Content-Type: text/plain; charset=us-ascii
Content-Transfer-Encoding: 7bit
Message-Id: <20200325203300.5496840E9409@node2.localdomain>
From: nagios@node2.localdomain

***** Nagios *****

Notification Type: PROBLEM

Service: NAGIOS
Host: nagios master
Address: 192.168.174.128
State: CRITICAL

Date/Time: Wed Mar 25 16:33:00 EDT 2020

Additional Info:

NAGIOS CRITICAL: Cannot open status log for reading!

收到邮件,成功

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值