实验目的:部署Nagios监控软件,实时监控主机和服务(有问题通过Email告知Admin)
实验环境:rhel5.5 10.1.1.190(web) 10.1.1.191(mysql) 10.1.1.192(web) nagios:10.1.1.193
实验前提:Nagios监控服务器有web服务(此处采用apache)
实验步骤:
第一:部署Nagios所需软件包说明
fping-2.4-1.b2.2.el5.rf.i386.rpm
nagios-2.9-1.el5.rf.i386.rpm --nagios主程序
nagios-devel-2.9-1.el5.rf.i386.rpm --nagios所需库文件
nagios-nrpe-2.5.2-1.el5.rf.i386.rpm --nrpe服务
nagios-plugins-1.4.9-1.el5.rf.i386.rpm --nagios服务插件
nagios-plugins-nrpe-2.12-3.el5.i386.rpm --nrpe服务插件
perl-Crypt-DES-2.05-3.2.el5.rf.i386.rpm
perl-Net-SNMP-5.2.0-1.2.el5.rf.i386.rpm
|-监控服务,监控主机 ------>nagios服务
【nagios】
|-资源阀值,eg:磁盘容量超过70%报警等 ------->nrpe服务提供
第二:nagios主配置文件配置
配置环境:/etc/nagios/
localhost.cfg 模板参考
cgi.cfg 权限定义
command-plugins.cfg 插件定义
commands.cfg 插件定义
nagios.cfg nagios服务配置
nrpe.cfg nrpe服务
resource.cfg 插件目录位置定义
/etc/nagios/nagios.cfg
cfg_file=/etc/nagios/contactgroups.cfg
cfg_file=/etc/nagios/contacts.cfg
#cfg_file=/etc/nagios/dependencies.cfg
#cfg_file=/etc/nagios/escalations.cfg
cfg_file=/etc/nagios/hostgroups.cfg
cfg_file=/etc/nagios/hosts.cfg
cfg_file=/etc/nagios/services.cfg
cfg_file=/etc/nagios/timeperiods.cfg
check_external_commands=1
第三:nagios.cfg文件中,相关文件的配置
1./etc/nagios/timeperiods.cfg 定义上班时间(三班倒)
define timeperiod{
timeperiod_name morning
alias morning
sunday 8:00-16:00
monday 8:00-16:00
tuesday 8:00-16:00
wednesday 8:00-16:00
thursday 8:00-16:00
friday 8:00-16:00
saturday 8:00-16:00
}
define timeperiod{
timeperiod_name afternoon
alias aftertime
sunday 16:00-24:00
monday 16:00-24:00
tuesday 16:00-24:00
wednesday 16:00-24:00
thursday 16:00-24:00
friday 16:00-24:00
saturday 16:00-24:00
}
define timeperiod{
timeperiod_name evening
alias evening
sunday 24:00-8:00
monday 24:00-8:00
tuesday 24:00-8:00
wednesday 24:00-8:00
thursday 24:00-8:00
friday 24:00-8:00
saturday 24:00-8:00
}
2./etc/nagios/contacts.cfg定义联系人
define contact{
contact_name john
alias sql Admin, web admin
service_notification_period evening
host_notification_period evening
service_notification_options w,u,c,r
host_notification_options d,r
service_notification_commands notify-by-email
host_notification_commands host-notify-by-email
email john@163.com
}
define contact{
contact_name terry
alias web admin
service_notification_period evening
host_notification_period evening
service_notification_options w,u,c,r
host_notification_options d,r
service_notification_commands notify-by-email
host_notification_commands host-notify-by-email
email terry@163.com
}
3./etc/nagios/contactgroups.cfg 定义联系组
define contactgroup{
contactgroup_name webadm
alias web Administrators
members terry,john
}
define contactgroup{
contactgroup_name sqladm
alias web Administrators
members john
}
4./etc/nagios/hosts.cfg定义被监控主机
define host{
host_name station190.cluster.com
alias station190
notification_options d,u,r
notification_period 24x7
notification_interval 10
max_check_attemps 4
contact_groups webadm
address 10.1.1.190
check_command check-host-alive
\<-在 commands.cfg中一定要定义
}
define host{
host_name station191.cluster.com
alias station191
notification_options d,u,r
notification_period 24x7
notification_interval 10
max_check_attemps 4
contact_groups sqladm
address 10.1.1.191
check_command check-host-alive
\ <-在 commands.cfg中一定要 定义
}
define host{
host_name station192.cluster.com
alias station192
notification_options d,u,r
notification_period 24x7
notification_interval 10
max_check_attemps 4
contact_groups webadm
address 10.1.1.192
check_command check-host-alive
\<-在 commands.cfg中一定要 定义
}
--------------------------------------------------------------------------------------------------------------
define command{
command_name check-host-alive
command_line $USER1$/check_ping -H $HOSTADDRESS$ -w 3000.0,80% -c 5000.0,100% -p 1
}
--------------------------------------------------------------------------------------------------------------
5./etc/nagios/hostgroups.cfg 定义主机组
define hostgroup{
hostgroup_name web_service
alias www Servers
members station190.cluster.com, station192.cluster.com
}
define hostgroup{
hostgroup_name sql_service
alias sql Servers
members station191.cluster.com
}
6./etc/nagios/services.cfg
#--------------------------------主机----------------------------------------
define service{
host_name station190.cluster.com
service_description check-host-alive
check_command check-host-alive
max_check_attempts 5
normal_check_interval 3
retry_check_interval 2
check_period 24x7
notification_interval 10
notification_period 24x7
contact_groups webadm
}
define service{
host_name station191.cluster.com
service_description check-host-alive
check_command check-host-alive
max_check_attempts 5
normal_check_interval 3
retry_check_interval 2
check_period 24x7
notification_interval 10
notification_period 24x7
contact_groups webadm
}
define service{
host_name station192.cluster.com
service_description check-host-alive
check_command check-host-alive
max_check_attempts 5
normal_check_interval 3
retry_check_interval 2
check_period 24x7
notification_interval 10
notification_period 24x7
contact_groups webadm
}
#------------------------------服务-----------------------------------------
define service{
host_name station190.cluster.com
service_description check_http
check_command check_http
max_check_attempts 5
normal_check_interval 3
retry_check_interval 2
check_period 24x7
notification_interval 10
notification_period 24x7
contact_groups webadm
}
define service{
host_name station191.cluster.com
service_description check_mysql
check_command check_mysql
max_check_attempts 5
normal_check_interval 3
retry_check_interval 2
check_period 24x7
notification_interval 10
notification_period 24x7
contact_groups sqladm
}
define service{
host_name station192.cluster.com
service_description check_http
check_command check_http
max_check_attempts 5
normal_check_interval 3
retry_check_interval 2
check_period 24x7
notification_interval 10
notification_period 24x7
contact_groups webadm
}
注意:mysql服务检测需要:
#------------------------------------------------------------------------------------
host:10.1.1.191
mysql>grant select on *.* to 'test'@'10.1.1.193' identified by '123123';
mysql>flush privileges;
nagios:10.1.1.193
在/etc/nagios/commands.cfg中添加:
define command{
command_name check_mysql
command_line $USER1$/check_mysql -H $HOSTADDRESS$ -P 3306 -u test -p123
}
dns服务:
define command{
command_name check_dns
command_line $USER1$/check_dns -H www.yahoo.com -s $HOSTADDRESS$
}
nagios相关插件存放位置:/usr/lib/nagios/plugins
7./etc/nagios/cgi.cfg 权限定义
default_user_name=terry
authorized_for_system_information=terry
authorized_for_configuration_information=terry
authorized_for_system_commands=terry
authorized_for_all_services=terry
authorized_for_all_hosts=terry
authorized_for_all_service_commands=terry
authorized_for_all_host_commands=terry
第四:校验 nagios配置是否有错
nagios -v /etc/nagios/nagios.cfg <- 校验
Total Warnings: 0
Total Errors: 0
启动nagios:
service nagios start <- 不报错
第五:配置apache服务器(在nagios部署之前完成apache搭建)
yum -y install httpd-*
安装nagios之后会产生:
/etc/httpd/conf.d/nagios.conf
ScriptAlias /nagios/cgi-bin "/usr/lib/nagios/cgi"
<Directory "/usr/lib/nagios/cgi">
Options ExecCGI
AllowOverride None
Order allow,deny
Allow from 10.1.1.0/255.255.255.0 #####
AuthName "Nagios Access"
AuthType Basic
AuthUserFile /etc/nagios/htpasswd.users <- 定义用户
Require valid-user
</Directory>
Alias /nagios "/usr/share/nagios"
<Directory "/usr/share/nagios">
Options None
AllowOverride None
Order allow,deny
Allow from 10.1.1.0/255.255.255.0 ####
AuthName "Nagios Access"
AuthType Basic
AuthUserFile /etc/nagios/htpasswd.users <- 定义
Require valid-user
</Directory>
htpasswd -c /etc/nagios/htpasswd.users terry
=========================================
资源阀值监控:
服务器
hosts.cfg -> 对应主机
commands.cfg
define command {
command_name check_nrpe
command_line $USER1$/check_nrpe -H $HOSTADDRESS$ -P 5666 -c $ARG1$
}
/etc/nagios/services.cfg
define service{
host_name station146.cluster.com
service_description check_disk1
check_command check_nrpe!check_disk1 <- command
max_check_attempts 5
normal_check_interval 3
retry_check_interval 2
check_period 24x7
notification_interval 10
notification_period 24x7
contact_groups sqladm
}
define service{
host_name station146.cluster.com
service_description check_disk1
check_command check_nrpe!check_user <- command
max_check_attempts 5
normal_check_interval 3
retry_check_interval 2
check_period 24x7
notification_interval 10
notification_period 24x7
contact_groups sqladm
}
客户端
nagios-plugins-1.4.9-1.el5.rf
nagios-nrpe
1.cat /etc/nagios/nrpe.cfg
pid_file=/var/run/nrpe.pid
server_port=5666
server_address=127.0.0.1, 10.1.1.193
nrpe_user=nagios
nrpe_group=nagios
allowed_hosts=127.0.0.1,10.1.1.193
dont_blame_nrpe=0
debug=0
command_timeout=60
command[check_disk1]=/usr/lib/nagios/plugins/check_disk -w 20% -c 10% -p /dev/sda1 (剩余20%警告)
command[check_user]=/usr/lib/nagios/plugins/check_users -w 5 -c 10
2.vi /etc/xined.d/nrpt
disable = no
service xinted restart
-------------------------------Nagios部署完成---------------------------------------
有不足之处请网友批评指正!稍后会每天发布一遍个人实验积累的笔记!
实验环境:rhel5.5 10.1.1.190(web) 10.1.1.191(mysql) 10.1.1.192(web) nagios:10.1.1.193
实验前提:Nagios监控服务器有web服务(此处采用apache)
实验步骤:
第一:部署Nagios所需软件包说明
fping-2.4-1.b2.2.el5.rf.i386.rpm
nagios-2.9-1.el5.rf.i386.rpm --nagios主程序
nagios-devel-2.9-1.el5.rf.i386.rpm --nagios所需库文件
nagios-nrpe-2.5.2-1.el5.rf.i386.rpm --nrpe服务
nagios-plugins-1.4.9-1.el5.rf.i386.rpm --nagios服务插件
nagios-plugins-nrpe-2.12-3.el5.i386.rpm --nrpe服务插件
perl-Crypt-DES-2.05-3.2.el5.rf.i386.rpm
perl-Net-SNMP-5.2.0-1.2.el5.rf.i386.rpm
|-监控服务,监控主机 ------>nagios服务
【nagios】
|-资源阀值,eg:磁盘容量超过70%报警等 ------->nrpe服务提供
第二:nagios主配置文件配置
配置环境:/etc/nagios/
localhost.cfg 模板参考
cgi.cfg 权限定义
command-plugins.cfg 插件定义
commands.cfg 插件定义
nagios.cfg nagios服务配置
nrpe.cfg nrpe服务
resource.cfg 插件目录位置定义
/etc/nagios/nagios.cfg
cfg_file=/etc/nagios/contactgroups.cfg
cfg_file=/etc/nagios/contacts.cfg
#cfg_file=/etc/nagios/dependencies.cfg
#cfg_file=/etc/nagios/escalations.cfg
cfg_file=/etc/nagios/hostgroups.cfg
cfg_file=/etc/nagios/hosts.cfg
cfg_file=/etc/nagios/services.cfg
cfg_file=/etc/nagios/timeperiods.cfg
check_external_commands=1
第三:nagios.cfg文件中,相关文件的配置
1./etc/nagios/timeperiods.cfg 定义上班时间(三班倒)
define timeperiod{
timeperiod_name morning
alias morning
sunday 8:00-16:00
monday 8:00-16:00
tuesday 8:00-16:00
wednesday 8:00-16:00
thursday 8:00-16:00
friday 8:00-16:00
saturday 8:00-16:00
}
define timeperiod{
timeperiod_name afternoon
alias aftertime
sunday 16:00-24:00
monday 16:00-24:00
tuesday 16:00-24:00
wednesday 16:00-24:00
thursday 16:00-24:00
friday 16:00-24:00
saturday 16:00-24:00
}
define timeperiod{
timeperiod_name evening
alias evening
sunday 24:00-8:00
monday 24:00-8:00
tuesday 24:00-8:00
wednesday 24:00-8:00
thursday 24:00-8:00
friday 24:00-8:00
saturday 24:00-8:00
}
2./etc/nagios/contacts.cfg定义联系人
define contact{
contact_name john
alias sql Admin, web admin
service_notification_period evening
host_notification_period evening
service_notification_options w,u,c,r
host_notification_options d,r
service_notification_commands notify-by-email
host_notification_commands host-notify-by-email
email john@163.com
}
define contact{
contact_name terry
alias web admin
service_notification_period evening
host_notification_period evening
service_notification_options w,u,c,r
host_notification_options d,r
service_notification_commands notify-by-email
host_notification_commands host-notify-by-email
email terry@163.com
}
3./etc/nagios/contactgroups.cfg 定义联系组
define contactgroup{
contactgroup_name webadm
alias web Administrators
members terry,john
}
define contactgroup{
contactgroup_name sqladm
alias web Administrators
members john
}
4./etc/nagios/hosts.cfg定义被监控主机
define host{
host_name station190.cluster.com
alias station190
notification_options d,u,r
notification_period 24x7
notification_interval 10
max_check_attemps 4
contact_groups webadm
address 10.1.1.190
check_command check-host-alive
\<-在 commands.cfg中一定要定义
}
define host{
host_name station191.cluster.com
alias station191
notification_options d,u,r
notification_period 24x7
notification_interval 10
max_check_attemps 4
contact_groups sqladm
address 10.1.1.191
check_command check-host-alive
\ <-在 commands.cfg中一定要 定义
}
define host{
host_name station192.cluster.com
alias station192
notification_options d,u,r
notification_period 24x7
notification_interval 10
max_check_attemps 4
contact_groups webadm
address 10.1.1.192
check_command check-host-alive
\<-在 commands.cfg中一定要 定义
}
--------------------------------------------------------------------------------------------------------------
define command{
command_name check-host-alive
command_line $USER1$/check_ping -H $HOSTADDRESS$ -w 3000.0,80% -c 5000.0,100% -p 1
}
--------------------------------------------------------------------------------------------------------------
5./etc/nagios/hostgroups.cfg 定义主机组
define hostgroup{
hostgroup_name web_service
alias www Servers
members station190.cluster.com, station192.cluster.com
}
define hostgroup{
hostgroup_name sql_service
alias sql Servers
members station191.cluster.com
}
6./etc/nagios/services.cfg
#--------------------------------主机----------------------------------------
define service{
host_name station190.cluster.com
service_description check-host-alive
check_command check-host-alive
max_check_attempts 5
normal_check_interval 3
retry_check_interval 2
check_period 24x7
notification_interval 10
notification_period 24x7
contact_groups webadm
}
define service{
host_name station191.cluster.com
service_description check-host-alive
check_command check-host-alive
max_check_attempts 5
normal_check_interval 3
retry_check_interval 2
check_period 24x7
notification_interval 10
notification_period 24x7
contact_groups webadm
}
define service{
host_name station192.cluster.com
service_description check-host-alive
check_command check-host-alive
max_check_attempts 5
normal_check_interval 3
retry_check_interval 2
check_period 24x7
notification_interval 10
notification_period 24x7
contact_groups webadm
}
#------------------------------服务-----------------------------------------
define service{
host_name station190.cluster.com
service_description check_http
check_command check_http
max_check_attempts 5
normal_check_interval 3
retry_check_interval 2
check_period 24x7
notification_interval 10
notification_period 24x7
contact_groups webadm
}
define service{
host_name station191.cluster.com
service_description check_mysql
check_command check_mysql
max_check_attempts 5
normal_check_interval 3
retry_check_interval 2
check_period 24x7
notification_interval 10
notification_period 24x7
contact_groups sqladm
}
define service{
host_name station192.cluster.com
service_description check_http
check_command check_http
max_check_attempts 5
normal_check_interval 3
retry_check_interval 2
check_period 24x7
notification_interval 10
notification_period 24x7
contact_groups webadm
}
注意:mysql服务检测需要:
#------------------------------------------------------------------------------------
host:10.1.1.191
mysql>grant select on *.* to 'test'@'10.1.1.193' identified by '123123';
mysql>flush privileges;
nagios:10.1.1.193
在/etc/nagios/commands.cfg中添加:
define command{
command_name check_mysql
command_line $USER1$/check_mysql -H $HOSTADDRESS$ -P 3306 -u test -p123
}
dns服务:
define command{
command_name check_dns
command_line $USER1$/check_dns -H www.yahoo.com -s $HOSTADDRESS$
}
nagios相关插件存放位置:/usr/lib/nagios/plugins
7./etc/nagios/cgi.cfg 权限定义
default_user_name=terry
authorized_for_system_information=terry
authorized_for_configuration_information=terry
authorized_for_system_commands=terry
authorized_for_all_services=terry
authorized_for_all_hosts=terry
authorized_for_all_service_commands=terry
authorized_for_all_host_commands=terry
第四:校验 nagios配置是否有错
nagios -v /etc/nagios/nagios.cfg <- 校验
Total Warnings: 0
Total Errors: 0
启动nagios:
service nagios start <- 不报错
第五:配置apache服务器(在nagios部署之前完成apache搭建)
yum -y install httpd-*
安装nagios之后会产生:
/etc/httpd/conf.d/nagios.conf
ScriptAlias /nagios/cgi-bin "/usr/lib/nagios/cgi"
<Directory "/usr/lib/nagios/cgi">
Options ExecCGI
AllowOverride None
Order allow,deny
Allow from 10.1.1.0/255.255.255.0 #####
AuthName "Nagios Access"
AuthType Basic
AuthUserFile /etc/nagios/htpasswd.users <- 定义用户
Require valid-user
</Directory>
Alias /nagios "/usr/share/nagios"
<Directory "/usr/share/nagios">
Options None
AllowOverride None
Order allow,deny
Allow from 10.1.1.0/255.255.255.0 ####
AuthName "Nagios Access"
AuthType Basic
AuthUserFile /etc/nagios/htpasswd.users <- 定义
Require valid-user
</Directory>
htpasswd -c /etc/nagios/htpasswd.users terry
=========================================
资源阀值监控:
服务器
hosts.cfg -> 对应主机
commands.cfg
define command {
command_name check_nrpe
command_line $USER1$/check_nrpe -H $HOSTADDRESS$ -P 5666 -c $ARG1$
}
/etc/nagios/services.cfg
define service{
host_name station146.cluster.com
service_description check_disk1
check_command check_nrpe!check_disk1 <- command
max_check_attempts 5
normal_check_interval 3
retry_check_interval 2
check_period 24x7
notification_interval 10
notification_period 24x7
contact_groups sqladm
}
define service{
host_name station146.cluster.com
service_description check_disk1
check_command check_nrpe!check_user <- command
max_check_attempts 5
normal_check_interval 3
retry_check_interval 2
check_period 24x7
notification_interval 10
notification_period 24x7
contact_groups sqladm
}
客户端
nagios-plugins-1.4.9-1.el5.rf
nagios-nrpe
1.cat /etc/nagios/nrpe.cfg
pid_file=/var/run/nrpe.pid
server_port=5666
server_address=127.0.0.1, 10.1.1.193
nrpe_user=nagios
nrpe_group=nagios
allowed_hosts=127.0.0.1,10.1.1.193
dont_blame_nrpe=0
debug=0
command_timeout=60
command[check_disk1]=/usr/lib/nagios/plugins/check_disk -w 20% -c 10% -p /dev/sda1 (剩余20%警告)
command[check_user]=/usr/lib/nagios/plugins/check_users -w 5 -c 10
2.vi /etc/xined.d/nrpt
disable = no
service xinted restart
-------------------------------Nagios部署完成---------------------------------------
有不足之处请网友批评指正!稍后会每天发布一遍个人实验积累的笔记!
转载于:https://blog.51cto.com/peishuangcai/736635