服务器端IP地址:
192.168.100.172
客户端IP地址:192.168.100.117
1、安装基本环境:
[root@dawning-01 src]#
yum install openssl openssl-devel gcc
sysstat
2、创建用户:
[
root@dawning-01
src]#
groupadd nagios
[root@dawning-01 src]#
useradd -g nagios -s /sbin/nologin nagios
[root@dawning-01 src]#
passwd nagios
3、解压缩:
[root@dawning-01 src]#
tar -zxvf nagios-plugins-1.4.16.tar.gz
[root@dawning-01 src]#
tar -zxvf nrpe-2.14.tar.gz
[root@dawning-01 src]#
cd nagios-plugins-1.4.16
4、编译、安装nagios-plugins:
[root@dawning-01 nagios-plugins-1.4.16]#
./configure --prefix=/usr/local/nagios --with-nagios-user=nagios --with-nagios-group=nagios
[root@dawning-01 nagios-plugins-1.4.16]#
make
[root@dawning-01 nagios-plugins-1.4.16]#
make install
5、检查:
[root@dawning-01 nagios-plugins-1.4.16]#
ll /usr/local/nagios/
total 12
drwxr-xr-x 2 root root 4096 Dec 8 17:43 include
drwxr-xr-x 2 root root 4096 Dec 8 17:43 libexec
drwxr-xr-x 3 root root 4096 Dec 8 17:43 share
6、编译、安装nrpe:
[root@dawning-01 nagios-plugins-1.4.16]#
cd /usr/local/src/nrpe-2.14
[root@dawning-01 nrpe-2.14]#
./configure --prefix=/usr/local/nagios --enable-ssl --with-nagios-user=nagios --with-nagios-group=nagios
[root@dawning-01 nrpe-2.14]#
make all
[root@dawning-01 nrpe-2.14]#
make install-plugin
[root@dawning-01 nrpe-2.14]#
make install-daemon
[root@dawning-01 nrpe-2.14]#
make install-daemon-config
[root@dawning-01 nrpe-2.14]#
make install-xinetd(若不配置依赖xinetd服务,可不安装。)
[root@dawning-01 nrpe-2.14]#
yum install xinetd(若不配置依赖xinetd服务,可不安装。)
配置NRPE以守护进程运行:
6-1、更改/etc/xinetd.d/nrpe文件,设置允许nagios服务器连接,如nagios服务器的ip为192.168.100.172:
[root@dawning-01 nrpe-2.14]# vim /etc/xinetd.d/nrpe
only_from = 127.0.0.1 192.168.100.172
6-2、在/etc/services结尾增加:
[root@dawning-01 nrpe-2.14]# vim /etc/services
nrpe 5666/tcp # NRPE
6-3、启动xinetd,并检查5666(TCP)端口是否正常运行在xinetd服务之下:
# service xinetd restart
[root@dawning-01 nrpe-2.14]#
netstat -natlp | grep 5666
tcp 0 0 :::5666 :::* LISTEN 22021/xinetd
7、修改相关权限:
[root@dawning-01 nrpe-2.14]#
chown nagios:nagios /usr/local/nagios/
[root@dawning-01 nrpe-2.14]#
chown -R nagios.nagios /usr/local/nagios/libexec/
8、检查结果:
[root@dawning-01 nrpe-2.14]#
ll /usr/local/nagios/
total 20
drwxrwxr-x 2 nagios nagios 4096 Dec 8 17:53 bin
drwxrwxr-x 2 nagios nagios 4096 Dec 8 17:53 etc
drwxr-xr-x 2 root root 4096 Dec 8 17:43 include
drwxrwxr-x 2 nagios nagios 4096 Dec 8 17:52 libexec
drwxr-xr-x 3 root root 4096 Dec 8 17:43 share
9、修改相关配置文件:
[root@dawning-01 nrpe-2.14]#
vim /usr/local/nagios/etc/nrpe.cfg
第81行,添加nagios服务器的IP地址。
allowed_hosts=127.0.0.1,192.168.100.172
10、修改防火墙相关配置:
[root@dawning-01 nrpe-2.14]#
vim /etc/sysconfig/iptables
开放5666端口:
-A INPUT -m state --state NEW -m tcp -p tcp --dport 5666 -j ACCEPT
[root@dawning-01 nrpe-2.14]#
servcie iptables restart
11、检查本机的nrpe配置是否正确:
[root@dawning-01 nrpe-2.14]#
/usr/local/nagios/libexec/check_nrpe -H 127.0.0.1
NRPE v2.14
12、在服务器端配置相应项,并测试服务器与客户端的通信:
[root@had04 ~]#
vim /usr/local/nagios/
etc/nrpe.cfg
allowed_hosts=127.0.0.1,192.168.100.117
[root@had04 ~]#
service nagios restart
[root@had04 ~]#
/usr/local/nagios/libexec/check_nrpe -H 192.168.100.117
NRPE v2.14
13、根据本机实际环境添加需要监控的项目:
[root@had04 ~]#
vim /usr/local/nagios/etc/Linuxservers/192.168.100.117.cfg
[root@had04 ~]#
cat /usr/local/nagios/etc/Linuxservers/192.168.100.117.cfg
###############################################################################
# LOCALHOST.CFG - SAMPLE OBJECT CONFIG FILE FOR MONITORING THIS MACHINE
#
# Last Modified: 05-31-2007
#
# NOTE: This config file is intended to serve as an *extremely* simple
# example of how you can create configuration entries to monitor
# the local (Linux) machine.
#
###############################################################################
###############################################################################
###############################################################################
#
# HOST DEFINITION
#
###############################################################################
###############################################################################
# Define a host for the local machine
define host{
use linux-server,host-pnp4 ; Name of host template to use
; This host definition will inherit all variables that are defined
; in (or inherited by) the linux-server host template definition.
host_name 192.168.100.117
alias 192.168.100.117
address 192.168.100.117
parents linksys-Ruijie
icon_image linux40.gif
statusmap_image linux40.gd2
2d_coords 500,200
3d_coords 500,200,100
}
###############################################################################
###############################################################################
#
# HOST GROUP DEFINITION
#
###############################################################################
###############################################################################
# Define an optional hostgroup for Linux machines
#define hostgroup{
# hostgroup_name linux-servers ; The name of the hostgroup
# alias Linux Servers ; Long name of the group
# members * ; Comma separated list of hosts that belong to this group
# }
###############################################################################
###############################################################################
#
# SERVICE DEFINITIONS
#
###############################################################################
###############################################################################
# Define a service of "CPU_stat" with the local machine
define service{
use generic-service,svr-pnp4 ; Name of service template to use
host_name 192.168.100.117
service_description CPU-stats
check_command check_nrpe!check_cpu
}
# Define a service to "ping" the local machine
define service{
use generic-service,svr-pnp4 ; Name of service template to use
host_name 192.168.100.117
service_description PING
check_command check_nrpe!check_ping
}
# Define a service to check the disk space of the root partition
# on the local machine. Warning if < 20% free, critical if
# < 10% free space on partition.
define service{
use generic-service,svr-pnp4 ; Name of service template to use
host_name 192.168.100.117
service_description Root Partition
check_command check_nrpe!check_sda1
}
# Define a service to check the number of currently logged in
# users on the local machine. Warning if > 20 users, critical
# if > 50 users.
define service{
use generic-service,svr-pnp4 ; Name of service template to use
host_name 192.168.100.117
service_description Current Users
check_command check_nrpe!check_users
}
# Define a service to check the number of currently running procs
# on the local machine. Warning if > 250 processes, critical if
# > 400 users.
define service{
use generic-service,svr-pnp4 ; Name of service template to use
host_name 192.168.100.117
service_description Total Processes
check_command check_nrpe!check_total_procs
#process_perf_data 1
}
# Define a service to check the load on the local machine.
define service{
use generic-service,svr-pnp4 ; Name of service template to use
host_name 192.168.100.117
service_description Current Load
check_command check_nrpe!check_load
}
# Define a service to check the swap usage the local machine.
# Critical if less than 10% of swap is free, warning if less than 20% is free
define service{
use generic-service,svr-pnp4 ; Name of service template to use
host_name 192.168.100.117
service_description Swap Usage
check_command check_nrpe!check_swap
}
# Define a service to check SSH on the local machine.
# Disable notifications for this service by default, as not all users may have SSH enabled.
define service{
use generic-service,svr-pnp4 ; Name of service template to use
host_name 192.168.100.117
service_description SSH
check_command check_nrpe!check_ssh
notifications_enabled 1
}
define service{
use generic-service,svr-pnp4 ; Name of service template to use
host_name 192.168.100.117
service_description Zombie_Procs
check_command check_nrpe!check_zombie_procs
}
define service{
use generic-service,svr-pnp4 ; Name of service template to use
host_name 192.168.100.117
service_description Check_Disk
check_command check_nrpe!check_disk
}
define service{
use generic-service,svr-pnp4 ; Name of service template to use
host_name 192.168.100.117
service_description Check_Uptime
check_command check_nrpe!check_uptime
}
define service{
use generic-service,svr-pnp4 ; Name of service template to use
host_name 192.168.100.117
service_description Check_Memory
check_command check_nrpe!check_memory
}
define service{
use generic-service,svr-pnp4 ; Name of service template to use
host_name 192.168.100.117
service_description Check_IOSTAT
check_command check_nrpe!check_iostat
}
define service{
use generic-service,svr-pnp4 ; Name of service template to use
host_name 192.168.100.117
service_description TCP_stat
check_command check_nrpe!check_tcpstat
}
define service{
use generic-service,svr-pnp4 ; Name of service template to use
host_name 192.168.100.117
service_description Traffic_eth0
check_command check_nrpe!check_net_traffic_eth0
}
define service{
use generic-service,svr-pnp4 ; Name of service template to use
host_name 192.168.100.117
service_description check_procs_uGuard
check_command check_nrpe!check_procs_uGuard
}
14、解释以上标注服务--添加服务实现对linux主机eth0网卡流量的监控:
确认客户端/
usr/local/nagios/libexec/目录下存在监控文件check_net_traffic.sh。
在客户端执行命令:
[root@dawning-01 libexec]#
/usr/local/nagios/libexec/check_net_traffic.sh -d eth0 -w 2M -c 3M
This script is First run! TIME="2016-03-05 22:30:26";RX=33205684942;TX=27540050551;DEV=eth0
[root@dawning-01 libexec]#
/usr/local/nagios/libexec/check_net_traffic.sh -d eth0 -w 2M -c 3M
eth0 Traffic is OK - In: 940 B/s Out: 775 B/s interval: 37s |in=940;2097152;3145728;0;1073741824 out=775;2097152;3145728;0;1073741824
check_net_traffic.sh相关参数说明:
[root@dawning-01 libexec]#
/usr/local/nagios/libexec/check_net_traffic.sh -h
/usr/local/nagios/libexec/check_net_traffic.sh: illegal option -- h
Usage: /usr/local/nagios/libexec/check_net_traffic.sh -d [ eth|bond ]
For example: /usr/local/nagios/libexec/check_net_traffic.sh -d bond0 -w 100[B|K|M|G] -c 200[B|K|M|G]
15、添加进程中包含'
vms-server'字符串的服务:
15-1、手动检查该服务:
[root@dawning-01 libexec]#
./check_procs -w 1:1 -c 1:2 -a 'vms-server'
PROCS OK: 1 process with args 'vms-server'
15-2、在客户端添加服务:
[root@dawning-01 libexec]#
vim /usr/local/nagios/etc/nrpe.cfg
添加以下内容:
command[check_procs_vms-server]=/usr/local/nagios/libexec/check_procs -a 'vms-server' -w 1:1 -c 1:2
15-3、在服务器端添加检查该服务的名称:
[root@had04 ~]#
vim /usr/local/nagios/etc/Linuxservers/192.168.100.117.cfg
define service{
use generic-service,svr-pnp4 ; Name of service template to use
host_name 192.168.100.117
service_description check_procs_vms-server
check_command check_nrpe!check_procs_vms-server
}
15-4、重启服务器端nagios服务:
[root@had04 ~]#
service nagios restart
16、添加检查端口是否打开的服务:
16-1、手动检查该服务:
[root@dawning-01 libexec]#
./check_tcp -H 192.168.100.117 -p 1692
TCP OK - 0.000 second response time on port 1692|time=0.000314s;;;0.000000;10.000000
16-2、在客户端添加服务:
[root@dawning-01 libexec]#
vim /usr/local/nagios/etc/nrpe.cfg
添加以下内容:
command[check_tcp-1692]=/usr/local/nagios/libexec/check_tcp -p 1692
16-3、在服务器端添加检查该服务的名称:
define service{
use generic-service,svr-pnp4 ; Name of service template to use
host_name 192.168.100.117
service_description check_tcp-1692
check_command check_nrpe!check_tcp-1692
}
16-4、重启服务器端nagios服务:
[root@had04 ~]#
service nagios restart
linux技术交流群:295294329 本文朋友郭工整理,感谢他的共享!!!