zabbix版本:4.2.3
系统:CentOS7.6
解决问题:可以直观的看见哪十个耗的CPU和内存资源最大
1、先确认自己的zabbix配置的conf文件读取是在哪个目录下的
# grep '^Include' /etc/zabbix/zabbix_agentd.conf
Include=/data/software/zabbix/conf/*.conf
2、确认自己的zabbix调用时在哪个目录下的
/data/software/zabbix/scripts/
一、先配置个定时任务提取进程相关信息
1、cat /data/software/zabbix/scripts/top.sh
#!/usr/bin/env bash
top -n 1 -b > /data/software/zabbix/tmp/top.txt
2、配置定时任务获取top信息
# 获取top信息
*/1 * * * * /bin/bash /data/software/zabbix/scripts/top.sh &> /dev/null
3、编写脚本找出进程消耗占用前十的进程名称
# cat discovery_process.sh
#!/usr/bin/env bash
# 监控进程
process_file=/data/software/zabbix/tmp/top.txt
TABLESPACE=$(tail -n +8 ${process_file} | awk '{a[$NF]+=$6}END{for(k in a)print a[k]/1024,k}' | sort -gr | head -10 | cut -d" " -f2)
COUNT=$(echo "$TABLESPACE" | wc -l)
INDEX=0
echo '{"data":['
echo "$TABLESPACE" | while read LINE; do
echo -n '{"{#PROCESSNAME}":"'$LINE'"}'
INDEX=$(expr $INDEX + 1)
if [ $INDEX -lt $COUNT ]; then
echo ','
fi
done
echo ']}'
检查脚本:sh discovery_process.sh
{"data":[
{"{#PROCESSNAME}":"php-fpm"},
{"{#PROCESSNAME}":"nginx"},
{"{#PROCESSNAME}":"zabbix_agentd"},
{"{#PROCESSNAME}":"rsyslogd"},
{"{#PROCESSNAME}":"sshd"},
{"{#PROCESSNAME}":"AliYunDunUpdate"},
{"{#PROCESSNAME}":"mingetty"},
{"{#PROCESSNAME}":"master"},
{"{#PROCESSNAME}":"bash"},
{"{#PROCESSNAME}":"aliyun-service"}]}
4、根据进程名称和监控指标名,解析top进程资源情况,编写脚本
cat process_monitor.sh
#!/usr/bin/env bash
# 根据进程名称和监控指标名,解析top进程资源情况
process_file=/data/software/zabbix/tmp/top.txt
process=$1
name=$2
case $2 in
mem_res)
echo "$(tail -n +8 ${process_file} | awk '{a[$NF]+=$6}END{for(k in a)print a[k]/1024,k}' | grep "$process" | cut -d" " -f1)" ;;
cpu_p)
echo "$(tail -n +8 ${process_file} | awk '{a[$NF]+=$9}END{for(k in a)print a[k],k}' | grep "$process" | cut -d" " -f1)" ;;
mem_virt)
echo "$(tail -n +8 ${process_file} | awk '{a[$NF]+=$5}END{for(k in a)print a[k]/1024,k}' | grep "$process"|cut -d" " -f1)" ;;
mem_p)
echo "$(tail -n +8 ${process_file} | awk '{a[$NF]+=$10}END{for(k in a)print a[k],k}' | grep "$process"|cut -d" " -f1)" ;;
pid)
echo "$(tail -n +8 ${process_file} | awk '{a[$NF]+=$1}END{for(k in a)print a[k],k}' | grep "$process"|cut -d" " -f1)" ;;
runtime)
echo "$(tail -n +8 ${process_file} | awk '{a[$NF]+=$11}END{for(k in a)print a[k],k}' | grep "$process"|cut -d" " -f1)" ;;
*)
echo "Error input:";;
esac
exit 0
5、检查脚本:
# sh process_monitor.sh php-fpm cpu_p
0
# sh process_monitor.sh php-fpm pid
524222
6、在zabbix的conf目录配置conf文件信息
cat process_monitor.conf
#top10_process
UserParameter=process.discovery,/data/software/zabbix/scripts/discovery_process.sh
UserParameter=process.resource[*],/data/software/zabbix/scripts/process_monitor.sh $1 $2
7、添加权限 chown zabbix.zabbix /data/software/zabbix/scripts/* && chmod +x /data/software/zabbix/scripts/*
8、重启zabbix-agent :systemctl restart zabbix-agent
9、测试
# zabbix_get -p 27050 -k 'process.discovery' -s 192.168.0.10
{"data":[
{"{#PROCESSNAME}":"sshd"},
{"{#PROCESSNAME}":"python"},
{"{#PROCESSNAME}":"bash"},
{"{#PROCESSNAME}":"zabbix_agentd"},
{"{#PROCESSNAME}":"crond"},
{"{#PROCESSNAME}":"php-fpm"},
{"{#PROCESSNAME}":"nginx"},
{"{#PROCESSNAME}":"rsyslogd"},
{"{#PROCESSNAME}":"snmpd"},
{"{#PROCESSNAME}":"sendmail"}]}
# zabbix_get -p 27050 -k 'process.resource[python,mem_p]' -s 192.168.0.10
28.7
# zabbix_get -p 27050 -k 'process.resource[python,pid]' -s 192.168.0.10
1635094
10、添加模板
11、添加自动发现规则,获取top信息
键值:process.discovery
如果更新间隔不是60秒的,你可以在discovery_process.sh脚本里添加一段命令,
top -n 1 -b > /data/software/zabbix/tmp/top.txt(获取top信息的定时任务)
添加之后去掉定时任务
12、添加监控项原型,获取CPU和内存前十的进程名称(如nginx等)
名称:{#PROCESSNAME}_cpu%
键值:process.resource[{#PROCESSNAME},cpu_p]
其他都一样的配置,注意名称和键值
13、添加趋势图形
名称:{#PROCESSNAME}_used%
选两个就可以了
之后在主机里添加该模板就可以了