- 背景
- 监控后端程序日志中是否包含:OutOfMemory,包含则重启,并且通过Dingding告警
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time : 2023/8/1 10:23
# @File : outofmemory.py
# @Author : zk_linux
# @Software: PyCharm
# @Description:
import readline
import time
import subprocess
import re
import logging
import os
from collections import deque
from temp import ail_the_alarm
logging.basicConfig(level=logging.INFO,
filename='./log/outofmemory.log',
filemode='a',
format='%(asctime)s - %(pathname)s[line:%(lineno)d] - %(levelname)s: %(message)s'
)
def wc_count(file_name):
'''
获取文件行数
:param file_name:
:return:
'''
file_lins = len(open(file_name).readlines())
return file_lins
def getLastContent(file_name, lastCount):
'''
新增行数
:param file_name:
:param lastCount:
:return:
'''
with open(file_name, "r") as f:
output = deque(f, lastCount);
return list(output)
def filterate(list, key="OutOfMemory"):
for item in list:
if key in item:
return True
return False
def restart_container():
restart = subprocess.run(['docker restart business'], shell=True, stderr=subprocess.PIPE)
def monitor_log_file(log_file, interval=5):
now_max_count = 0
while True:
new_last_count = wc_count(log_file)
add_file_count = new_last_count - now_max_count
logging.info("The program is normal and under continuous monitoring, add_file_count: %s",add_file_count)
now_max_count = new_last_count
if add_file_count > 0:
add_lins = getLastContent(log_file, add_file_count)
filter_results = filterate(add_lins)
if filter_results == True:
logging.error("Program memory overflow, attempting to restart container")
restart_container()
webhook = ail_the_alarm.DingTalkUrl('/server/scripts/config.ini').get_config()
ding_msg = ail_the_alarm.Send_Dingding('01', webhook['mobile_number'], web_url=webhook['prod_webhook_url'],secret=webhook['prod_secret'], msg="内存溢出,尝试重启esl-business")
ding_msg.send_dingnding()
time.sleep(interval)
if __name__ == "__main__":
log_file = "/usr/local/esl/zk/log/log_error.log"
monitor_log_file(log_file)
通过systemctl管理脚本
/etc/systemd/system
[root@acs-01 system]# cat omm.service
[Unit]
Description=My Python Script Service
After=network.target
[Service]
Type=simple
User=root
WorkingDirectory= /server/scripts/
ExecStart=/usr/bin/python3 outofmemory.py
[Install]
WantedBy=multi-user.target