nagios
[root@zw_test_26_74 nagios-plugins-2.0.2]# vi /etc/hosts
192.168.26.74 hostname
Apache安装配置:
[root@zw_test_26_74 backup]# yum -y install httpd
[root@zw_test_26_74 backup]# vim /etc/httpd/conf/httpd.conf
修改为:
User apache
Group apache
User nagios
Group nagios
修改
ServerName localhost:80
DirectoryIndex index.html index.html.var
修改为:
DirectoryIndex index.html index.php
添加两行:
Include conf.d/*.conf
AddType application/x-httpd-php .php
PHP安装配置
[root@zw_test_26_74 backup]# yum -y install php
编译安装Nagios
依赖包:
[root@zw_test_26_74 local]# yum install -y gcc glibc glibc-common gd gd-devel make net-snmp php-gd
添加用户和组
[root@localhostsrc]# useradd nagios
[root@localhostsrc]# groupadd nagcmd
[root@localhostsrc]# usermod -a -G nagcmd nagios
[root@localhostnagios]# tar zxvf nagios-3.5.1.tar.gz
[root@localhostnagios]# ./configure --with-httpd-conf=/etc/httpd/conf.d apache目录
[root@localhostnagios]# make all
[root@zw_test_26_74 nagios]# make install && make install-init && make install-commandmode && make install-config && make install-webconf
[root@zw_test_26_74 nagios]# cp -R contrib/eventhandlers/ /usr/local/nagios/libexec/
[root@zw_test_26_74 nagios]# chown -R nagios:nagios /usr/local/nagios/libexec/eventhandlers
注意相关依赖包的路径对应好
编译安装nagios-plugins
[root@zw_test_26_74 nagios-plugins-2.0.2]# ./configure --with-nagios-user=nagios --with-nagios-group=nagios
[root@zw_test_26_74 nagios-plugins-2.0.2]# make && make install
创建Apache目录验证文件
[root@localhostnagios]# /usr/bin/htpasswd -c /usr/local/nagios/etc/htpasswd.users nagiosadmin
(第一次安装先使用默认的用户名nagiosadmin,以后熟悉了再换,处于安装考虑
查看验证密码
[root@zw_test_26_74 nagios-plugins-2.0.2]# cat /usr/local/nagios/etc/htpasswd.users
nagiosadmin:PaZSLFSaaKbuY
添加Nagios到启动项中
[root@localhostnagios]# chkconfig --add nagios
[root@localhostnagios]# chkconfig --level 35 nagios on
启动Nagios 和 Apache
[root@localhost httpd-2.0.65]# service nagios start
[root@localhost httpd-2.0.65]# service httpd start
访问:http://192.168.26.74/nagios
安装rrdtool
[root@zw_test_26_74 etc]# yum -y install rrdtool
安装pnp4nagios
[root@zw_test_26_74 xebest]# tar -zxvf pnp4nagios-0.6.22.tar.gz
[root@zw_test_26_74 xebest]# yum install -y perl-Time-HiRes
(编译时可能会报错,缺少一个perl的包)
[root@localhost pnp4nagios-0.6.22]#./configure --prefix=/usr/local/pnp4nagios --with-nagios-user=nagios --with-nagios-group=nagios --with-rrdtool=/usr/bin/rrdtool --with-httpd-conf=/etc/httpd/conf.d
WARNING: The RRDs Perl Modules are not found on your system
Using RRDs will speedup things in larger installations.
[root@zw_test_26_74 pnp4nagios-0.6.22]# yum -y install perl-rrdtool
[root@localhost pnp4nagios-0.6.22]# make all
[root@localhost pnp4nagios-0.6.22]# make fullinstall
2、修改nagios.cfg
/usr/local/nagios/etc/nagios.cfg
[root@localhostetc]# vim /usr/local/nagios/etc/nagios.cfg
修改如下
process_performance_data=1
service_perfdata_command=process-service-perfdata
host_perfdata_command=process-host-perfdata
修改nagios command配置文件
[root@localhost bin]# cd /usr/local/nagios/etc/objects
[root@localhost objects]# vim commands.cfg
(在文件末尾追加如下几行)
define command {
command_name process-service-perfdata
command_line /usr/bin/perl /usr/local/pnp4nagios/libexec/process_perfdata.pl
}
define command {
command_name process-host-perfdata
command_line /usr/bin/perl /usr/local/pnp4nagios/libexec/process_perfdata.pl -d HOSTPERFDATA
}
修改nagios template 配置文件
[root@localhost objects]# vim templates.cfg
(在文件末尾追加如下几行)
define host {
name host-pnp
action_url /pnp4nagios/index.php/graph?host=$HOSTNAME$&srv=_HOST_
register 0
}
define service {
name srv-pnp
action_url /pnp4nagios/index.php/graph?host=$HOSTNAME$&srv=$SERVICEDESC$
register 0
}
5、修改 hosts.cfg 和 services.cfg
这两个文件默认是不存在的,是用来定义你要监控的主机和服务,需要你手工创建。
[root@localhost objects]# vim hosts.cfg
define host{
use linux-server,host-pnp
host_name web01
alias pub_web01
address 192.168.0.51
}
define host{
use linux-server,host-pnp
host_name oracle
alias pub_oracle
address 192.168.0.180
}
[root@localhost objects]# vim services.cfg
define service{
use local-service,srv-pnp
host_name web01
service_description PING
check_command check_ping!100.0,20%!500.0,60%
}
define service{
use local-service,srv-pnp
host_name web01
service_description SSH
check_command check_ssh
}
define service{
use local-service,srv-pnp
host_name web01
service_description http
check_command check_http
}
把新建的配置文件映射到 nagios.cfg里面
vim /usr/local/nagios/etc/nagios.cfg
cfg_file=/usr/local/nagios/etc/objects/hosts.cfg
cfg_file=/usr/local/nagios/etc/objects/services.cfg
验证配置文件,重启服务
1、验证pnp4nagios配置文件,这个脚本默认没有,需要到官网下载
[root@zw_test_26_74 object]# wget http://verify.pnp4nagios.org/verify_pnp_config
/root/nagiossoftware/pnp4nagios/verify_pnp_config
[root@zw_test_26_74 pnp4nagios]# perl verify_pnp_config --mode sync --config=/usr/local/nagios/etc/nagios.cfg --pnpcfg=/usr/local/pnp4nagios/etc
[INFO] ========== Starting Environment Checks ============
[INFO] My version is: verify_pnp_config-0.6.25-R.40
[INFO] Start Options: verify_pnp_config --mode sync --config=/usr/local/nagios/etc/nagios.cfg --pnpcfg=/usr/local/pnp4nagios/etc
[INFO] Reading /usr/local/nagios/etc/nagios.cfg
[OK ] Running product is 'nagios'
[OK ] object_cache_file is defined
[OK ] object_cache_file=/usr/local/nagios/var/objects.cache
[INFO] Reading /usr/local/nagios/var/objects.cache
[OK ] resource_file is defined
[OK ] resource_file=/usr/local/nagios/etc/resource.cfg
[INFO] Reading /usr/local/nagios/etc/resource.cfg
[INFO] Reading /usr/local/pnp4nagios/etc/process_perfdata.cfg
[INFO] Reading /usr/local/pnp4nagios/etc/pnp4nagios_release
[OK ] Found PNP4Nagios version "0.6.22"
[OK ] ./configure Options '--prefix=/usr/local/pnp4nagios' '--with-nagios-user=nagios' '--with-nagios-group=nagios' '--with-rrdtool=/usr/bin/rrdtool' '--with-httpd-conf=/etc/httpd/conf.d'
[OK ] Effective User is 'nagios'
[OK ] User nagios exists with ID '500'
[OK ] Effective group is 'nagios'
[OK ] Group nagios exists with ID '500'
[INFO] ========== Checking Sync Mode Config ============
[OK ] process_performance_data is 1 compared with '/1/'
[OK ] enable_environment_macros is 1 compared with '/1/'
[OK ] service_perfdata_command is defined
[OK ] service_perfdata_command=process-service-perfdata
[OK ] host_perfdata_command is defined
[OK ] host_perfdata_command=process-host-perfdata
[INFO] Nagios config looks good so far
[INFO] ========== Checking config values ============
service_perfdata_command at verify_pnp_config line 462.
[OK ] Command process-service-perfdata is defined
[OK ] '/usr/bin/printf "%b" "$LASTSERVICECHECK$\t$HOSTNAME$\t$SERVICEDESC$\t$SERVICESTATE$\t$SERVICEATTEMPT$\t$SERVICESTATETYPE$\t$SERVICEEXECUTIONTIME$\t$SERVICELATENCY$\t$SERVICEOUTPUT$\t$SERVICEPERFDATA$\n" >> /usr/local/nagios/var/service-perfdata.out'
[CRIT] Command looks suspect (/usr/bin/printf "%b" "$LASTSERVICECHECK$\t$HOSTNAME$\t$SERVICEDESC$\t$SERVICESTATE$\t$SERVICEATTEMPT$\t$SERVICESTATETYPE$\t$SERVICEEXECUTIONTIME$\t$SERVICELATENCY$\t$SERVICEOUTPUT$\t$SERVICEPERFDATA$\n" >> /usr/local/nagios/var/service-perfdata.out)
pnp4nagios有几种工作模式(Synchronous Mode、Bulk Mode、Bulk Mode with NPCD、Bulk Mode with npcdmod、Gearman Mode),我们默认安装是Synchronous Mode。这几种模式的区别请参考官网上的安装文档。
2、验证nagios配置文件,启动之前要检查
[root@localhost objects]# /usr/local/nagios/bin/nagios -v /usr/local/nagios/etc/nagios.cfg
3、重启
[root@localhost objects]# /etc/init.d/nagios restart
Running configuration check...done.
Stopping nagios: done.
Starting nagios: done.
[root@localhost objects]# /etc/init.d/npcd restart
Stopping npcd: done.
Starting npcd: done.
pnp4nagios自检页
http://192.168.71.132:85/pnp4nagios/
Your environment passed all requirements. Remove or rename the /usr/local/pnp4nagios/share/install.php file now.
要保证所有检查项都通过,最后重命名 install.php
[root@zw_test_26_74 xebest]# cd /usr/local/pnp4nagios/share/
[root@zw_test_26_74 share]# mv install.php install.php.bak
--------------------------------------------------------------------------------------------------
页面错误处理:
图形界面点击报错处理:
Please check the documentation for information about the following error.
session_start(): open(/var/lib/php/session/sess_fj7qcq3rq6rjmdd7ibgucmpha1, O_RDWR) failed: Permission denied (13)
file [line]:
/usr/local/pnp4nagios/lib/kohana/system/libraries/Session.php [159]:
back
[root@zw_test_26_74 php]# pwd
/var/lib/php
[root@zw_test_26_74 php]# ll
total 4
drwxrwx---. 2 nagios nagios 4096 Nov 17 17:06 session
[root@zw_test_26_74 php]# chown nagios:nagios /var/lib/php -R
[root@zw_test_26_74 perfdata]# pwd
/usr/local/pnp4nagios/var/perfdata
监控页面http报警告
HTTP WARNING: HTTP/1.1 403 Forbidden - 5159 bytes in 0.001 second response time
[root@zw_test_26_74 html]# pwd
/var/www/html
[root@zw_test_26_74 html]# touch index.html
------------------------------------------------------------------------------------------------------
useradd nagios
groupadd nagcmd
usermod -a -G nagcmd nagios
--with-ssl-lib=/usr/lib/x86_64-linux-gnu
客户端需要安装: epel-release-6-8.noarch.rpm
nrpe-2.15
nagios-plugins-2.1.1
yum install -y gcc glibc glibc-common gd gd-devel make net-snmp php-gd xinetd
nrpe: - ./configure && make all && make install-plugin && make install-daemon && make install-daemon-config && make install-xinetd
以上两个安装包
nagios-plugins-2.1.1 : ./configure --prefix=/usr/local/nagios && make && make install
------------------------------------------------------------------------------------------------------
Protected multilib versions: libcom_err-1.41.12-22.el6.i686 != libcom_err-1.41.12-21.el6.x86_64
You have new mail in /var/spool/mail/root
[root@26-149-cy-search-list nrpe-2.15]# yum -y install openssl-devel --setopt=protected_multilib=false
需要监控的组件: cpu load memory disk
------------------------------------------------------------------------------------------------------
9、远程linux主机的的监控
监控端添加如下:
1. servier端配置:
[root@mi1 ~]# vi /usr/local/nagios/etc/nrpe.cfg
allowed_hosts=192.168.26.74
command[check_users]=/usr/local/nagios/libexec/check_users -w 5 -c 10
command[check_load]=/usr/local/nagios/libexec/check_load -w 15,10,5 -c 30,25,20
command[check_zombie_procs]=/usr/local/nagios/libexec/check_procs -w 5 -c 10 -s Z
command[check_total_procs]=/usr/local/nagios/libexec/check_procs -w 300 -c 400
command[check_disk]=/usr/local/nagios/libexec/check_disk -w 20% -c 10% -p /dev/sda1
command[check_swap]=/usr/local/nagios/libexec/check_swap -w 20% -c 10%
安装完成后会生成 vi /etc/xinetd.d/nrpe 文件,在only_from 后面加上server端ip
# default: on
# description: NRPE (Nagios Remote Plugin Executor)
service nrpe
{
flags = REUSE
socket_type = stream
port = 5666
wait = no
user = nagios --如果监控oracle,则改成oracle用户
group = nagios
server = /usr/local/nagios/bin/nrpe
server_args = -c /usr/local/nagios/etc/nrpe.cfg --inetd
log_on_failure += USERID
disable = no
only_from = 192.168.26.74
}
3. server 端添加nrpe 服务
[root@zw_test_26_74 etc]# vi /etc/services
nrpe 5666/tcp # NRPE
4. 重启xinted服务
[root@zw_test_26_74 etc]# service xinetd restart
Stopping xinetd: [FAILED]
Starting xinetd: [ OK ]
查看nrpe是否启动
[root@zw_test_26_74 etc]# netstat -an |grep 5666
tcp 0 0 :::5666 :::* LISTEN
可以看到 nrpe在监听了
7. 测试NRPE是否则正常工作
使用上面在被监控机上安装的check_nrpe 这个插件测试NRPE 是否工作正常。
[root@zw_test_26_74 etc]# /usr/local/nagios/libexec/check_nrpe -H 192.168.26.74
[root@zw_test_26_74 etc]# /usr/local/nagios/libexec/check_nrpe -H 192.168.26.75 -c check_oracle_tns
NRPE v2.15
/usr/local/nagios/libexec/check_ssh -p 22310
也就是在本地用check_nrpe连接nrpe daemon是正常的。
*在commands.cfg 中创建check_nrpe 的命令定义,因为只有在commands.cfg 中定义过的命令才能在services.cfg 中使用;
10.2 在commands.cfg中增加对check_nrpe的定义意义如下:
# vi /usr/local/nagios/etc/objects/commands.cfg
# 'check_nrpe' command definition
define command{
command_name check_nrpe # 定义命令名称为check_nrpe,在services.cfg中要使用这个名称.
command_line $USER1$/check_nrpe -H $HOSTADDRESS$ -c $ARG1$ #这是定义实际运行的插件程序.
# 这个命令行的书写要完全按照check_nrpe这个命令的用法,不知道用法的就用check_nrpe –h查看.
}
-c 后面带的$ARG1$ 参数是传给nrpe daemon 执行的检测命令,之前说过了它必须是nrpe.cfg 中所定义的那5条命令中的其中一条。
在services.cfg 中使用check_nrpe 的时候要用 “!” 带上这个参数。
10.3 定义对Nagios-Linux 主机的监控
注意 check_nrpe是 /usr/local/nagios/etc/objects/commands.cfg 里面定义的
!check_users 感叹号后面的是/usr/local/nagios/etc/nrpe.cfg 里面定义的
下面就可以在services.cfg 中定义对Nagios-Linux 主机的监控了
define service{
use generic-service,srv-pnp
host_name web04_26_58
service_description check_users
check_command check_nrpe!check_users
}
define service{
use generic-service,srv-pnp
host_name web04_26_58
service_description check_load
check_command check_nrpe!check_load
}
define service{
use generic-service,srv-pnp
host_name web04_26_58
service_description check_disk
check_command check_nrpe!check_disk
}
define service{
use generic-service,srv-pnp
host_name web04_26_58
service_description check_zombie_procs
check_command check_nrpe!check_zombie_procs
}
define service{
use generic-service,srv-pnp
host_name web04_26_58
service_description check_total_procs
check_command check_nrpe!check_total_procs
}
NRPE: Command 'check_disk' not defined
#command[check_disk]=/usr/local/nagios/libexec/check_disk -w 20% -c 10% -p /dev/sda1
command[check_disk]=/usr/local/nagios/libexec/check_disk -w 20% -c 10% -p
把nrpe.cfg -p后面的参数去掉
----------------------------------------------------------------------------------------
nagios 监控oracle
3.修改 check_oracle 脚本。将 $ORACLE_HOME 以及 $PATH 手动加入,避免出现问题。
[oracle@ora ~]$ echo ${ORACLE_HOME}
/u01/oracle/database/10.2.0/db_1
[oracle@ora ~]$ echo ${PATH}
/usr/kerberos/bin:/usr/local/bin:/bin:/usr/bin:/usr/X11R6/bin:/u01/oracle/bin:/u01/oracle/database/10.2.0/
db_1/bin:.
这么做是为了确保脚本可以正常执行 sqlplus 以及 tnsping 等命令
二、配置 nrpe 服务
在nrpe.conf文件末尾加入如下内容
#Check Oracle
#############################check oracle ######################################
command[check_oracle_tns]=/usr/local/nagios/libexec/check_oracle --tns llmj
command[check_oracle_db]=/usr/local/nagios/libexec/check_oracle --db llmj
command[check_oracle_login]=/usr/local/nagios/libexec/check_oracle --login llmj
command[check_oracle_cache]=/usr/local/nagios/libexec/check_oracle --cache llmj system oracle123 80 90
command[check_oracle_tablespace_system]=/usr/local/nagios/libexec/check_oracle --tablespace llmj system oracle123 SYSTEM 90 80
command[check_oracle_tablespace_sysaux]=/usr/local/nagios/libexec/check_oracle --tablespace llmj system oracle123 SYSAUX 90 80
command[check_oracle_tablespace_undotbs1]=/usr/local/nagios/libexec/check_oracle --tablespace llmj system oracle123 UNDOTBS1 90 80
command[check_oracle_tablespace_users]=/usr/local/nagios/libexec/check_oracle --tablespace llmj system oracle123 USERS 90 80
command[check_oracle_tablespace_llmj_tabs]=/usr/local/nagios/libexec/check_oracle --tablespace llmj system oracle123 LLMJ_TABS 90 80
command[check_oracle_tablespace_llmj_temp]=/usr/local/nagios/libexec/check_oracle --tablespaceTEMP llmj system oracle123 TEMP 90 80
具体参数写法请参考 check_oracle --help
重启nrpe进程
[root@ora etc]# killall nrpe
[root@ora etc]# /usr/local/nagios/bin/nrpe -c /usr/local/nagios/etc/nrpe.cfg –d
三、配置 Nagios 服务端
在 nagios/etc/objects 添加oracle 主机配置文件,orac.cfg。
define host {
use linux-server
host_name oracle
alias Oracle 11g
address 192.168.26.75
}
define service{
use generic-service,srv-pnp
host_name oracle
service_description check_oracle_db
check_command check_nrpe!check_oracle_db
}
define service{
use generic-service,srv-pnp
host_name oracle
service_description check_oracle_tns
check_command check_nrpe!check_oracle_tns
}
define service{
use generic-service,srv-pnp
host_name oracle
service_description check_oracle_login
check_command check_nrpe!check_oracle_login
}
define service{
use generic-service,srv-pnp
host_name oracle
service_description check_oracle_cache
check_command check_nrpe!check_oracle_cache
}
define service{
use generic-service,srv-pnp
host_name oracle
service_description check_oracle_tablespace
check_command check_nrpe!check_oracle_tablespace
}
4.验证配置文件无误后重启 nagios 服务
[root@june objects]# nagios -v /usr/local/nagios/etc/nagios.cfg
5.重新启动nagios服务
[root@june objects]# service nagios restart
[root@june objects]# service xinetd restart --修改客户端配置需要重启该服务
------------------------------------------------------------------------------
发送邮件:
1、Nagios邮件报警的配置
11.1 安装sendmail 组件
首先要确保sendmail 相关组件的完整安装,我们可以使用如下的命令来完成sendmail 的安装:
[root@bairui212 Packages] mount -o loop /dev/cdrom/mnt 挂载磁盘
# yum install -y sendmail* 把以下两个包给安装上
-r--r--r--149 root root 734360 8月17 2010 sendmail-8.14.4-8.el6.x86_64.rpm
-r--r--r--254 root root 188480 8月17 2010 sendmail-cf-8.14.4-8.el6.noarch.rpm
vi /etc/mail.rc
set from=893229927@qq.com
set smtp=smtp.qq.com
set smtp-auth-user=893229927@qq.com
set smtp-auth-password=zw125@#
set smtp-auth=login
然后重新启动sendmail服务:
[root@bairui212 Packages] service sendmail restart
2. Sendmail配置
然后发送测试邮件,验证sendmail的可用性:
[root@bairui212 Packages]# echo "hello word" | mail -s "xebest !!" zw_hard@sina.com
echo "hello word"| mail -s "send mail vi /etc/mail.rc " 893229927@qq.com
3.邮件报警的配置
在上面我们已经简单配置过了/usr/local/nagios/etc/objects/contacts.cfg 文件,Nagios 会将报警邮件发送到配置文件里的E-mail 地址。
3.1邮件监控定义监控人邮件地址
vi /usr/local/nagios/etc/objects/contacts.cfg
define contact{
contact_name nagiosadmin ; Short name of user
use generic-contact ; Inherit default values from generic-contact template (defined above)
alias Nagios Admin ; Full name of user
email supdb@bankresearch.com.cn
; <<***** CHANGE THIS TO YOUR EMAIL ADDRESS ******
}
注意:除了配置监控邮件的接收人外,还要确保:
* 本主机与邮件服务器互通
*本主机SendMail可以使用外部SMTP服务发送邮件
--------------------------------------------------------------------------------------------
hostgroup 组管理
2.配置nagios主配置文件nagios.cfg
# cat nagios.cfg 只写出改动文件,下同
新添加下面4句,指向子文件所在位置
cfg_file=/usr/local/nagios/etc/hostgroups.cfg
cfg_file=/usr/local/nagios/etc/contactgroups.cfg
command_check_interval=10s
#command_check_interval=-1 #原来为-1,改成10s
# cat hostgroup.cfg 定义组与组成员
define hostgroup {
hostgroup_name sa-servers
alias sa servers
members nagios-server #(如果有多用户,可以以"," 分隔,不能有空格)
}
--------------------------------------------------------------
# cat contactgroups.cfg
define contactgroup {
contactgroup_name sagroup
alias system administrator group
members nagiosadmin
}