自动化添加ELK监控脚本

最新推荐文章于 2024-06-06 10:27:11 发布
卡布奇诺加点冰
最新推荐文章于 2024-06-06 10:27:11 发布
阅读量646
点赞数
分类专栏： Elastic Stack
本文链接：https://blog.csdn.net/qq_16592497/article/details/82257251
版权
Elastic Stack 专栏收录该内容
12 篇文章 0 订阅
订阅专栏
#auto_add_elk.sh 根据给定参数自动添加ELK监控，Kibana仪表盘需要重新画！
#版权声明：不定期更新维护地址，看这里→_→	https://blog.csdn.net/qq_16592497/article/details/82257251

#用法简介
function print_usage(){
	echo -e "\e[1;31mThe number of parameters is wrong, please check your parameters! \e[0m"
	echo -e "Usage:\e[1;32msh $0 <cluster_id> <cluster_name> <URL> <function_id>\e[0m"
	echo "function_id: 1 cluster_scheduler; 2 job_history; 3 dfs_health; 4 apps_pending; 5 apps_running; \
6 memory_used_percent; 7 capacity_used_percent; 8 files_total"
	echo "e.g.  sh $0 lf-324-03 廊坊能力开放 http://10.162.3.171:8088/cluster,http://10.162.3.172:8088/cluster 1"
	echo "e.g.  sh $0 lf-324-03 廊坊能力开放 http://10.162.3.172:19888/jobhistory 2"
	echo "e.g.  sh $0 lf-324-03 廊坊能力开放 http://10.162.3.171:50070/dfshealth.html#tab-overview 3"
	echo "e.g.  sh $0 lf-324-03 廊坊能力开放 http://10.162.3.171:8088/cluster,http://10.162.3.172:8088/cluster 4"
	echo "e.g.  sh $0 lf-324-03 廊坊能力开放 http://10.162.3.171:8088/cluster,http://10.162.3.172:8088/cluster 5"
	echo "e.g.  sh $0 lf-324-03 廊坊能力开放 http://10.162.3.172:8088/cluster,http://10.162.3.171:8088/cluster 6"
	echo "e.g.  sh $0 lf-324-03 廊坊能力开放 http://10.162.3.171:50070/dfshealth.html#tab-overview,\
http://10.162.3.172:50070/dfshealth.html#tab-overview 7"
	echo "e.g.  sh $0 lf-324-03 廊坊能力开放 http://10.162.3.171:50070/dfshealth.html#tab-overview,\
http://10.162.3.172:50070/dfshealth.html#tab-overview 8"
}
#如果脚本传入参数个数不正确则打印使用手册
if [[ $# -ne 4 ]]; then
	#调用函数打印使用手册
	print_usage
	#exit退出shell脚本，不加数字参数则退出码不变，即为最后一个命令的退出码
	exit
fi

#传参赋值
cluster_id=$1
cluster_name=$2
URL=$3
function_id=$4
if [[ ${function_id} -eq 1 ]];then
	operation_name="cluster_scheduler"
	#保存分隔符变量的值，将其设置为","，切分传入参数赋值给数组，将分隔符变量的值恢复
	old_IFS=${IFS}; IFS=","; URL=($3); IFS=${old_IFS}
elif [[ ${function_id} -eq 2 ]]; then
	operation_name="job_history"
elif [[ ${function_id} -eq 3 ]]; then
	operation_name="dfs_health"
elif [[ ${function_id} -eq 4 ]]; then
	operation_name="apps_pending"
	#保存分隔符变量的值，将其设置为","，切分传入参数赋值给数组，将分隔符变量的值恢复
	old_IFS=${IFS}; IFS=","; URL=($3); IFS=${old_IFS}
elif [[ ${function_id} -eq 5 ]]; then
	operation_name="apps_running"
	#保存分隔符变量的值，将其设置为","，切分传入参数赋值给数组，将分隔符变量的值恢复
	old_IFS=${IFS}; IFS=","; URL=($3); IFS=${old_IFS}
elif [[ ${function_id} -eq 6 ]]; then
	operation_name="memory_used_percent"
	#保存分隔符变量的值，将其设置为","，切分传入参数赋值给数组，将分隔符变量的值恢复
	old_IFS=${IFS}; IFS=","; URL=($3); IFS=${old_IFS}
elif [[ ${function_id} -eq 7 ]]; then
	operation_name="capacity_used_percent"
	#保存分隔符变量的值，将其设置为","，切分传入参数赋值给数组，将分隔符变量的值恢复
	old_IFS=${IFS}; IFS=","; URL=($3); IFS=${old_IFS}
elif [[ ${function_id} -eq 8 ]]; then
	operation_name="files_total"
	#保存分隔符变量的值，将其设置为","，切分传入参数赋值给数组，将分隔符变量的值恢复
	old_IFS=${IFS}; IFS=","; URL=($3); IFS=${old_IFS}
else
	echo "function_id: 1 cluster_scheduler; 2 job_history; 3 dfs_health; 4 apps_pending; 5 apps_running; \
6 memory_used_percent; 7 capacity_used_percent; 8 files_total"
	echo -e "\e[1;31mSorry! Functions to be developed...\e[0m"
	exit
fi

#####################################################################################
#请务必检查以下变量是否需要改动！尤其是第一次运行该脚本时
work_dir=/home/es
logstash_dir=${work_dir}/logstash
elasticsearch_hosts=[\"10.245.2.71:9200\",\"10.245.2.72:9200\",\"10.245.2.73:9200\"]
#elasticsearch_index=${cluster_id}-${operation_name}
elasticsearch_index=${operation_name}

cluster_dir=${work_dir}/cluster
#operation_dir=${cluster_dir}/${cluster_id}/${operation_name}
operation_dir=${cluster_dir}/${operation_name}/${cluster_id}
#logstash_input_file=${operation_dir}/${operation_name}.info
logstash_input_file=${cluster_dir}/${operation_name}.info
#####################################################################################

#mkdir递归创建文件存放目录
mkdir -p ${operation_dir}

#判断脚本文件模板是否存在，若不存在则输出提示信息并停止程序
if [[ -e ${cluster_dir}/${operation_name}.sh.template ]];then
	#利用模板克隆出集群的爬取处理数据脚本，sed插入脚本所需变量的值
	cp ${cluster_dir}/${operation_name}.sh.template ${operation_dir}.sh
	sed -i '1i\cluster_name='${cluster_name} ${operation_dir}.sh
	sed -i '2i\operation_name='${operation_name} ${operation_dir}.sh
	sed -i '3i\operation_dir='${operation_dir} ${operation_dir}.sh
	sed -i '4i\logstash_input_file='${logstash_input_file} ${operation_dir}.sh
	#将 URL 数组的内容依次插入脚本文件
	for (( i = 0; i < ${#URL[@]}; i++ )); do
		sed -i $[${i}+1]'i\URL['$i']='${URL[$i]} ${operation_dir}.sh
	done
else
	echo -e "\e[1;31m${cluster_dir}/${operation_name}.sh.template does not exist. Please verify! \e[0m"
	exit
fi
#判断配置文件模板是否存在，若不存在则输出提示信息并停止程序
if [[ -e ${logstash_dir}/config/${operation_name}.conf.template ]];then
	#利用模板克隆出集群的 Logstash 配置文件，sed替换配置文件所需变量的值
	#注意：这里为了减少 Logstash 的启动数量，将 ${cluster_id}_${operation_name}.conf 改为 ${operation_name}.conf
	cp ${logstash_dir}/config/${operation_name}.conf.template ${logstash_dir}/config/${operation_name}.conf
	sed -i 's|${logstash_input_file}|'${logstash_input_file}'|g' ${logstash_dir}/config/${operation_name}.conf
	sed -i 's|${elasticsearch_hosts}|'${elasticsearch_hosts}'|g' ${logstash_dir}/config/${operation_name}.conf
	sed -i 's|${elasticsearch_index}|'${elasticsearch_index}'|g' ${logstash_dir}/config/${operation_name}.conf
else
	echo -e "\e[1;31m${logstash_dir}/config/${operation_name}.conf.template does not exist. Please verify! \e[0m"
	exit
fi

########################################################################################################################
#自动添加crontab任务
#crontab -l查看定时任务列表，grep过滤验证是否含有该行信息，返回信息不保留，标准输出重定向到Linux黑洞，标准错误重定向到标准输出
crontab -l | grep "* * * * * sh ${cluster_dir}/crontab_${operation_name}.sh" > /dev/null 2>&1
#if判断上一条语句的执行情况，$?查看返回值若不为0说明执行失败即该文件中不包含该行信息，需添加
if [[ $? -ne 0 ]]; then
	echo -e "\e[1;31mAdd Crontab:\n\e[1;32mcrontab -e\e[0m"
	echo "* * * * * sh ${cluster_dir}/crontab_${operation_name}.sh"
	#exit退出shell脚本，不加数字参数则退出码不变，即为最后一个命令的退出码
	exit
else
	#grep过滤验证该文件中是否含有该行信息，返回信息不保留，标准输出和标准错误全都重定向到Linux黑洞
	grep "sh ${operation_dir}.sh" ${cluster_dir}/crontab_${operation_name}.sh &> /dev/null
	#if判断上一条语句的执行情况，$?查看返回值若不为0说明执行失败即该文件中不包含该行信息，需添加
	if [[ $? -ne 0 ]]; then
		echo "sh ${operation_dir}.sh" >> ${cluster_dir}/crontab_${operation_name}.sh
	fi
fi
########################################################################################################################

#ps查看进程，grep过滤出logstash进程，sed过滤出Logstash正在监控的配置文件名称
conf_names=`ps -ef | grep logstash | grep -v grep | sed -r 's|.*/(.+conf) .*|\1|'`
#初始化flag值为0
flag=0
#for循环遍历检查该配置文件是否在使用，若已在使用，设置flag值为1
for conf_name in ${conf_names}; do
	if [[ ${conf_name}x == ${operation_name}.confx ]]; then
		flag=1
	fi
done
#if判断若配置文件是否已在使用，若在使用则输出提示信息，否则后台运行Logstash
if [[ ${flag} -eq 1 ]]; then
	echo -e "\e[1;31mConfiguration File ${operation_name}.conf is being used! \e[0m"
	echo -e "\e[1;31mIf you know what you're doing! \e[0m"
	#ps查看进程，grep过滤出监控该配置文件的logstash进程，awk获取进程号PID
	PID=`ps -ef | grep logstash | grep ${operation_name}.conf | awk '{ print $2 }'`
	echo -e "\e[1;31mkill ${PID}\e[0m and \e[1;32msh $*\e[0m"
else
	#nohup运行logstash --config.reload.automatic选项启用自动配置重新加载，因此每次修改配置文件时都不必停止并重新启动Logstash
	#nohup ${logstash_dir}/bin/logstash -f ${logstash_dir}/config/${operation_name}.conf --config.reload.automatic \
	#--path.data=${operation_dir}/data >> ${cluster_dir}/${cluster_id}/logstash_${operation_name}.out 2>&1 &
	nohup ${logstash_dir}/bin/logstash -f ${logstash_dir}/config/${operation_name}.conf --config.reload.automatic \
	--path.data=${cluster_dir}/${operation_name}/data >> ${cluster_dir}/logstash_${operation_name}.out 2>&1 &
fi

#jps查看当前运行的进程，grep过滤出 Logstash 的进程，wc统计个数
num=`jps | grep Logstash | wc -l`
echo "当前运行的 Logstash 进程有 ${num} 个"