# yum -y httpd gcc glibc glibc-common gd gd-devel php
# useradd nagios
# mkdir /usr/local/nagios
# chown -R nagios.nagios /usr/local/nagios
2.下载安装nagios相关包
http://sourceforge.net/projects/nagios/files/
nagios-3.2.2.tar.gz
nagios-plugins-1.4.16.tar.gz
nrpe-2.12.tar.gz
# tail -xvf /soft/nagios-3.2.2.tar.gz -C /usr/src
# cd /usr/src/nagios-3.2.2/
# ./configure --prefix=/usr/local/nagios
# make all && make install && make install-init && make install-commandmode && make install-config
# tar -xvf /soft/nagios-plugins-1.4.16.tar.gz -C /usr/src
# cd /usr/src/nagios-plugins-1.4.16/
# ./configure --prefix=/usr/local/nagios
# make && make install
# vi /etc/httpd/conf/httpd.conf
User apache
Group apache
修改为
User nagios
Group nagios
然后找到
DirectoryIndex index.html index.html.var
添加index.php修改为
DirectoryIndex index.html index.html.var index.php
再在其下增加如下行
AddType application/x-httpd-php .php
在配置文件末尾增加如下信息:
#setting for nagios
ScriptAlias /nagios/cgi-bin "/usr/local/nagios/sbin"
<Directory "/usr/local/nagios/sbin">
AuthType Basic
Options ExecCGI
AllowOverride None
Order allow,deny
Allow from all
AuthName "Nagios Access"
AuthUserFile /usr/local/nagios/etc/htpasswd
Require valid-user
</Directory>
Alias /nagios "/usr/local/nagios/share"
<Directory "/usr/local/nagios/share">AuthType Basic
Options None
AllowOverride None
Order allow,deny
Allow from all
AuthName "nagios Access"
AuthUserFile /usr/local/nagios/etc/htpasswd
Require valid-user
</Directory>
3.新增验证文件,并重启apache和nagios服务
# /usr/bin/htpasswd -c /usr/local/nagios/etc/htpasswd calvin
New password:Re-type new password:
Adding password for user calvin
# service httpd restart
Stopping httpd: [FAILED]
Starting httpd: [ OK ]
# vipw
修改最后一行为
nagios:x:500:501::/home/nagios:/bin/bash
# service nagios start
Starting nagios: done.
通过浏览器输入http://192.168.217.131/nagios/
4.开始配置监控其他机器
服务端配置:
# pwd/usr/local/nagios/etc
# vi nagios.cfg
cfg_dir=/usr/local/nagios/etc/hosts
# mkdir hosts
# cd hosts
# vi linux.cfg
define host{
use linux-server
host_name dbtest.sink.com
alias dbtest.sink.com
address 192.168.217.130
}
define service{
use generic-service
host_name dbtest.sink.com
service_description check-swap
check_command check_nrpe!check_swap
}
define service{
use generic-service
host_name dbtest.sink.com
service_description check-load
check_command check_nrpe!check_load
}
define service{
use generic-service
host_name dbtest.sink.com
service_description check-disk-sda3-usage
check_command check_nrpe!check_disk_sda3_usage
}
define service{
use generic-service
host_name dbtest.sink.com
service_description check-users
check_command check_nrpe!check_users
}
define service{
use generic-service
host_name dbtest.sink.com
service_description otal-procs
check_command check_nrpe!check_total_procs
}
define service{
use generic-service
host_name dbtest.sink.com
service_description check-disk-sdb1-usage
check_command check_nrpe!check_disk_sdb1_usage
}
define service{
use generic-service
host_name dbtest.sink.com
service_description check-diskio-sdb1
check_command check_nrpe!check_diskio_sdb1
}
重启nagios服务
# /etc/init.d/nagios restart
Running configuration check... CONFIG ERROR! Restart aborted. Check your Nagios configuration.
如果报错用下面命令检查配置
# /usr/local/nagios/bin/nagios -v /usr/local/nagios/etc/nagios.cfg
如果报类似下面这种错误,说明nrpe没装
Error: Service check command 'check_nrpe' specified in service 'check-disk' for host 'wlstest.sink.com' not defined anywhere!
安装nrpe
# tar -xvf /soft/nrpe-2.12.tar.gz -C /usr/src
# cd /usr/src/nrpe-2.12/
# ./configure && make all && make install-plugin
# ll /usr/local/nagios/libexec/|grep check_nrpe
-rwxrwxr-x. 1 nagios nagios 75340 Aug 3 22:44 check_nrpe
# pwd
/usr/local/nagios/etc
# vi monitor/commands.cfg
#'check_nrpe ' command definition
define command{
command_name check_nrpe
command_line $USER1$/check_nrpe -H $HOSTADDRESS$ -c $ARG1$
}
再检查一下是否通过
# /usr/local/nagios/bin/nagios -v /usr/local/nagios/etc/nagios.cfg
重启nagios
# service nagios restart
被监控端配置
安装nagios-lugins
# useradd nagios
# mkdir /usr/local/nagios
# chown -R nagios.nagios /usr/local/nagios
# tar -xvf /soft/nagios-plugins-1.4.16.tar.gz -C /usr/src
# cd /usr/src/nagios-plugins-1.4.16/
# ./configure --prefix=/usr/local/nagios
# make && make install
# 这里有个命令/usr/local/nagios/libexec/check_diskio是摘抄了别人写的shell脚本,nagios-plugins不带这个命令,内容如下:
# vi /usr/local/nagios/libexec/check_diskio
建立后要修改权限:
chown nagios:nagios /usr/local/nagios/libexec/check_diskio
#!/bin/sh
# by siliencen - siliencen@gmail.com
iostat=`which iostat 2>/dev/null`
bc=`which bc 2>/dev/null`
function help {
echo -e "This plugin shows the I/O usage_rate of the specified disk, using the iostat external program.\n\t example \n\t ./io -d sda2 -w 10 -c 20"
exit -1
}
# Ensuring we have the needed tools:
( [ ! -f $iostat ] ) && ( echo "ERROR: iostat command not found .Please install" && exit -1 )
# Getting parameters:
while getopts "d:w:c:h" OPT; do
case $OPT in
"d") disk=$OPTARG;;
"w") warning=$OPTARG;;
"c") critical=$OPTARG;;
"h") help;;
esac
done
# Adjusting the three warn and crit levels:
crit_util=`echo $critical`
warn_util=`echo $warning`
# Checking parameters:
[ ! -b "/dev/$disk" ] && echo "ERROR: Device incorrectly specified" && help
( [ $warn_util == "" ] || [ $crit_util == "" ] ) && echo "ERROR: You must specify all warning and critical levels" && help
( [[ "$warn_util" -ge "$crit_util" ]] ) && echo "ERROR: critical levels must be highter than warning levels" && help
# Doing the actual check:
util=`$iostat -dx 1 10 $disk | grep $disk | awk '{print $12}'|sort -nr | head -n 1 `
# Comparing the result and setting the correct level:
if ( echo ${util} ${crit_util}|awk '!($1>=$2){exit 1}' );then
msg="CRITICAL"
status=2
else if ( echo ${util} ${warn_util} |awk '!($1>=$2){exit 1}');then
msg="WARNING"
status=1
else
msg="OK"
status=0
fi
fi
# Printing the results:
echo "$msg - I/O stats Disk:$disk util_rate=$util "
# Bye!
exit $status
安装nrpe,这里注意要加--enable-ssl
# rpm -qa|grep openssl ####检查一下openssl有没装,没装的话要装上
# tar -xvf /soft/nrpe-2.12.tar.gz -C /usr/src
# cd /usr/src/nrpe-2.12/
# ./configure --enable-ssl --with-ssl-lib=/usr/lib/
# make all && make install-plugin && make install-daemon && make install-daemon-config
配置允许监控服务器ip
# vi /usr/local/nagios/etc/nrpe.cfg
allowed_hosts=192.168.217.131,127.0.0.1
启动nrpe进程
# /usr/local/nagios/bin/nrpe -c /usr/local/nagios/etc/nrpe.cfg -d
# netstat -an|grep 5666
tcp 0 0 0.0.0.0:5666 0.0.0.0:* LISTEN
在监控服务器检查是否可以探测到nrpe进程
# /usr/local/nagios/libexec/check_nrpe -H 192.168.217.130
NRPE v2.12
在本机(被监控机)检查是否可以探测到nrpe进程
# /usr/local/nagios/libexec/check_nrpe -H 127.0.0.1
NRPE v2.12
检验nrpe的参数文件配置:
# cat /usr/local/nagios/etc/nrpe.cfg |grep -v "#" |grep -v "^$"
log_facility=daemon
pid_file=/var/run/nrpe.pid
server_port=5666
nrpe_user=nagios
nrpe_group=nagios
allowed_hosts=192.168.217.131,127.0.0.1
dont_blame_nrpe=0
debug=0
command_timeout=60
connection_timeout=300
command[check_users]=/usr/local/nagios/libexec/check_users -w 20 -c 30
command[check_load]=/usr/local/nagios/libexec/check_load -w 15,10,5 -c 30,25,20
command[check_disk_sdb1_usage]=/usr/local/nagios/libexec/check_disk -w 10% -c 5% -p /dev/sdb1
command[check_zombie_procs]=/usr/local/nagios/libexec/check_procs -w 5 -c 10 -s Z
command[check_total_procs]=/usr/local/nagios/libexec/check_procs -w 900 -c 1500
command[check_swap]=/usr/local/nagios/libexec/check_swap -w 10% -c 5%
command[check_diskio_sdb1]=/usr/local/nagios/libexec/check_diskio -d sdb1 -w 15 -c 20
command[check_disk_sda3_usage]=/usr/local/nagios/libexec/check_disk -w 10% -c 5% -p /dev/sda3
# ps -ef|grep nrpe
nagios 7385 1 0 Aug03 ? 00:00:00 /usr/local/nagios/bin/nrpe -c /usr/local/nagios/etc/nrpe.cfg -d
root 7926 7892 0 01:37 pts/0 00:00:00 grep nrpe
# kill -9 7385
# /usr/local/nagios/bin/nrpe -c /usr/local/nagios/etc/nrpe.cfg -d
被监控端:
在cgi.cfg文件的按如下配置修改或添加已经验证过的用户名calvin
# cat /usr/local/nagios/etc/cgi.cfg|grep calvin
default_user_name=calvin
authorized_for_system_information=nagiosadmin,calvin
authorized_for_configuration_information=nagiosadmin,calvin
authorized_for_system_commands=nagiosadmin,calvin
authorized_for_all_services=nagiosadmin,calvin
authorized_for_all_hosts=nagiosadmin,calvin
authorized_for_all_service_commands=nagiosadmin,calvin
authorized_for_all_host_commands=nagiosadmin,calvin
重启nagios
# /etc/init.d/nagios restart
通过浏览器输入http://192.168.217.131/nagios,然后点击左边菜单的Services就可以看到监控的服务了
4. 监控邮件报警设置
监控服务器安装sendmail客户端
# yum install sendmail设置邮件服务器和发送报警的邮箱地址,以及用户名和密码
# cat /etc/mail.rc
set from=calvin@163.com ###发送邮件的邮箱
set smtp=smtp.163.com ###smtp服务器地址,这里是网易
set smtp-auth-user=calvin ### 邮箱用户名
set smtp-auth-password=111111 ### 邮箱密码
set smtp-auth=login ### 邮箱登陆验证
测试是否可以发送邮件:
# echo "test mail"|mail -s "test" calvin@163.com
设置nagios配置文件
# vi /usr/local/nagios/etc/objects/contacts.cfg
define contact{
contact_name calvin ####自定义通知联系人名
use generic-contact ####通知服务类型
alias calvin ####别名
service_notification_period 24x7 ####服务监控报警周期
host_notification_period 24x7 ####主机监控报警周期
email calvin@163.com ####你的邮件地址
}
define contact{
contact_name calvintest ####自定义通知联系人名
use generic-contact ####通知服务类型
alias calvintest ####别名
service_notification_period 24x7 ####服务监控报警周期
host_notification_period 24x7 ####主机监控报警周期
email calvin@126.com ####你的邮件地址
}
define contactgroup{
contactgroup_name admin
alias admin
members calvin,calvintest ####如需要发送给多人,在这里配置,用逗号隔开
}
检查命令文件,一般情况下只需核查,不需要修改:
# cat /usr/local/nagios/etc/objects/commands.cfg
....
# 'notify-host-by-email' command definition
define command{
command_name notify-host-by-email
command_line /usr/bin/printf "%b" "***** Nagios *****\n\nNotification Type: $NOTIFICATIONTYPE$\nHost: $HOSTNAME$\nState: $HOSTSTATE$\nAddress: $HOSTADDRESS$\nInfo: $HOSTOUTPUT$\n\nDate/Time: $LONGDATETIME$\n" | /bin/mail -s "** $NOTIFICATIONTYPE$ Host Alert: $HOSTNAME$ is $HOSTSTATE$ **" $CONTACTEMAIL$
}
# 'notify-service-by-email' command definition
define command{
command_name notify-service-by-email
command_line /usr/bin/printf "%b" "***** Nagios *****\n\nNotification Type: $NOTIFICATIONTYPE$\n\nService: $SERVICEDESC$\nHost: $HOSTALIAS$\nAddress: $HOSTADDRESS$\nState: $SERVICESTATE$\n\nDate/Time: $LONGDATETIME$\n\nAdditional Info:\n\n$SERVICEOUTPUT$\n" | /bin/mail -s "** $NOTIFICATIONTYPE$ Service Alert: $HOSTALIAS$/$SERVICEDESC$ is $SERVICESTATE$ **" $CONTACTEMAIL$
}
....
有时候用sendmail无法是用外部smtp发送邮件(有人说redhat6可以,本人在redhat5.4上面确实不行),这个时候用mailx即可:
先卸载sendmail,也可以不做
# yum remove sendmail
下载mailx
http://sourceforge.net/projects/heirloom/files/latest/download?source=files
安装mailx
# tar xvf /soft/mailx-12.4.tar.bz2 -C /usr/src
# cd /usr/src/mailx-12.4
# make
# make install UCBINSTALL=/usr/bin/install
# mailx -V
12.4 7/29/08
# which mailx
/usr/local/bin/mailx
配置 /etc/nail.rc,和/etc/mail.rc配置一样
# echo "test mail"|mailx -s "test" calvin@163.com mailx
最后修改/usr/local/nagios/etc/objects/commands.cfg 文件里把/bin/mail替换成/usr/local/bin/mailx即可
报警间隔设置
新建立一个escalations.cfg文件,并和其他服务配置文件一样添加到nagios.cfg里
比如,escalations.cfg内容如下:
###设置对主机dbtest.sink.com的check-load和check-swap这两个监控第2次至第4次报警间隔为60分钟;
define serviceescalation{
host_name dbtest.sink.com
service_description check-load,check-swap ####报警间隔设置的服务名,多个用逗号隔开
first_notification 2
last_notification 4
notification_interval 60
contact_groups admin
}
###设置对主机dbtest.sink.com的check-load和check-swap这两个监控第5次开始不报警;
define serviceescalation{
host_name dbtest.sink.com
service_description check-load,check-swap
first_notification 5
last_notification 0
notification_interval 0
contact_groups admin
}
补充:如果想设置某些服务不报警,可以在服务配置文件里设置notifications_enabled 0
安装依赖包
# yum install -y php php-common php-gd php-devel php-cli perl perl-URI perl-String-CRC32 perl-IO-Socket-INET6 perl-Socket6
下载rrdtool和pnp
http://download.csdn.net/download/bhjsj04516/1769396
http://downloads.sourceforge.net/project/pnp4nagios/PNP/pnp-0.4.14/pnp-0.4.14.tar.gz?use_mirror=nchc
rrdtool-1.0.50.tar.gz
pnp-0.4.14.tar.gz
开始安装
# tar -xvf /soft/rrdtool-1.0.50.tar.gz -C /usr/src/
# cd /usr/src/rrdtool-1.0.50/
# ./configure && make && make install
# tar -xvf /soft/pnp-0.4.14.tar.gz -C /usr/src/
# cd /usr/src/pnp-0.4.14/
# ./configure --with-rrdtool=/usr/local/rrdtool-1.0.50/bin/rrdtool
# make all && make install && make install-config && make install-init
在apache配置文件里添加首页php页面,之前已添加
# cat /etc/httpd/conf/httpd.conf
DirectoryIndex index.html index.html.var index.php
访问验证php出图页面,如果看到下面的结果,说明还没采集到数据
http://192.168.217.131/nagios/pnp/index.php?host=192.168.217.131
Initalising
Using /usr/local/nagios/share/perfdata/
RRDTool /usr/local/rrdtool-1.0.50/bin/rrdtool found.
RRDTool /usr/local/rrdtool-1.0.50/bin/rrdtool is executable
PHP Function proc_open is enabled
PHP Function fpassthru is enabled
PHP Function xml_parser_create is enabled
PHP zlib Support found.
PHP GD Support found.
RRD Base Directory /usr/local/nagios/share/perfdata/ found.
Hostname 192.168.217.131 is set.
Directory /usr/local/nagios/share/perfdata/192.168.217.131 not found.
修改nagios.cfg文件
# vi /usr/local/nagios/etc/nagios.cfg
process_performance_data=1 ####修改为1
host_perfdata_command=process-host-perfdata ####打开注释
service_perfdata_command=process-service-perfdata ####打开注释
修改commands.cfg文件,按如下注释或者删除这两个命令的原定义,替换成新的
# vi /usr/local/nagios/etc/monitor/commands.cfg
#define command{
# command_name process-host-perfdata
# command_line /usr/bin/printf "%b" "$LASTHOSTCHECK$\t$HOSTNAME$\t$HOSTSTATE$\t$HOSTATTEMPT$\t$HOSTSTATETYPE$\t$HOSTEXECUTIONTIME$\t$HOSTOUTPUT$\t$HOSTPERFDATA$\n" >> /usr/local/nagios/var/host-perfdata.out
# }
# 'process-service-perfdata' command definition
#define command{
# command_name process-service-perfdata
# command_line /usr/bin/printf "%b" "$LASTSERVICECHECK$\t$HOSTNAME$\t$SERVICEDESC$\t$SERVICESTATE$\t$SERVICEATTEMPT$\t$SERVICESTATETYPE$\t$SERVICEEXECUTIONTIME$\t$SERVICELATENCY$\t$SERVICEOUTPUT$\t$SERVICEPERFDATA$\n" >> /usr/local/nagios/var/service-perfdata.out
# }
define command{
command_name process-host-perfdata
command_line /usr/local/nagios/libexec/process_perfdata.pl
}
define command{
command_name process-service-perfdata
command_line /usr/local/nagios/libexec/process_perfdata.pl
}
在监控hosts主机文件添加如下两行:
# vi /usr/local/nagios/etc/hosts/linux.cfg
define host{
use linux-server
host_name FZ-APP
alias OS_10.10.5.55
address 10.10.5.55
process_perf_data 1 ####添加
action_url /nagios/pnp/index.php?host=$HOSTNAME$ ####添加
}
重启nagios,就可以看到主机前面有个太阳了,点进去可以看到绘图。
参考:
http://wenku.baidu.com/view/4daf21a0284ac850ad02426c.html
http://blog.chinaunix.net/uid-26849186-id-3806307.html
http://www.2cto.com/os/201208/148484.html