SNMP.py
Linux系统,安装snmpwalk,python3
#!/usr/bin/python3
#_*_ coding:utf8 _*_
import os,time,re,subprocess
import smtplib
from smtplib import SMTP
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
from email.header import Header
from concurrent.futures import ThreadPoolExecutor
from queue import Queue
## snmp OID
# snmpwalk -v 2c -c snmp读密码 SNMP服务器IP .1.3.6.1.2.1.25.3.3.1.2 # CPU负载
# snmpwalk -v 2c -c snmp读密码 SNMP服务器IP HOST-RESOURCES-MIB::hrProcessorLoad # CPU负载
# snmpwalk -v 2c -c snmp读密码 SNMP服务器IP 1.3.6.1.2.1.25.2 # 内存、硬盘使用率
# snmpwalk -v 2c -c snmp读密码 SNMP服务器IP 1.3.6.1.2.1.25 # 进程使用cpu信息、进程使用内存信息,及内存、硬盘使用率
## 发邮件通知,设置邮件内容
def 纯文本邮件内容对象(邮件主题, 邮件正文, 发件人, 收件人):
#创建一个带附件的实例
message = MIMEMultipart()
message['From'] = Header(发件人, 'utf-8')
message['To'] = Header(收件人, 'utf-8')
# 邮件主题
subject = 邮件主题
message['Subject'] = Header(subject, 'utf-8')
# 邮件正文,纯文本
TEXT = MIMEText(邮件正文, 'plain', 'utf-8')
message.attach(TEXT)
return(message)
def 批量发邮件(SMTP_SERVER, 发件人, 邮箱密码, L_邮件任务):
try:
client = smtplib.SMTP()
#client = smtplib.SMTP_SSL() # 需要使用SSL,可以这样创建client
client.connect(SMTP_SERVER, 25)
#client.set_debuglevel(1) # 开启DEBUG模式
client.login(发件人, 邮箱密码)
#except smtplib.SMTPConnectError as e:
# print('邮件发送失败,连接失败:', e.smtp_code, e.smtp_error)
#except smtplib.SMTPAuthenticationError as e:
# print('邮件发送失败,认证错误:', e.smtp_code, e.smtp_error)
#except smtplib.SMTPSenderRefused as e:
# print('邮件发送失败,发件人被拒绝:', e.smtp_code, e.smtp_error)
#except smtplib.SMTPRecipientsRefused as e:
# print('邮件发送失败,收件人被拒绝:', e.smtp_code, e.smtp_error)
#except smtplib.SMTPDataError as e:
# print('邮件发送失败,数据接收拒绝:', e.smtp_code, e.smtp_error)
#except smtplib.SMTPException as e:
# print('邮件发送失败, ', e.message)
except Exception as e:
print(f"邮件发送异常 {e}")
else:
for 收件人,邮件主题,邮件正文 in L_邮件任务:
time.sleep(2)
message = 纯文本邮件内容对象(邮件主题, 邮件正文, 发件人, 收件人)
try:
client.sendmail(发件人, 收件人, message.as_string())
except Exception as e:
print(e)
else:
print(f"{time.strftime('%Y%m%d')} 发送成功: {收件人:32s} {邮件主题}")
client.quit()
print("发送完成")
## 获取CPU占用最高的前N个进程信息
def SHOW_PID_CPU_TOP_N(N, D_PID_NAME, D_PID_CPU):
R_L_PID_CPU_TOP_N = []
L_PID_CPU = [D_PID_CPU[i] for i in D_PID_CPU] # 从D_PID_CPU字典中提取CPU占用量值做成列表
L_PID_CPU.sort(reverse=True) # 从大到小排序
for i in L_PID_CPU[0:N]: # 取开头N个
for PID,PerfCPU in D_PID_CPU.items():
if i == PerfCPU:
PID_NAME = D_PID_NAME[PID]
R_TEXT = str(PID_NAME) + ' ' + str(PerfCPU)
R_L_PID_CPU_TOP_N.append(R_TEXT)
return(R_L_PID_CPU_TOP_N)
## 获取内存占用最高的前N个进程信息
def SHOW_PID_MEM_TOP_N(N, D_PID_NAME, D_PID_MEM):
R_L_PID_MEM_TOP_N = []
L_PID_MEM = [D_PID_MEM[i] for i in D_PID_MEM]
L_PID_MEM.sort(reverse=True) # 从大大小排序
for i in L_PID_MEM[0:N]: # 取开头N个
for PID,PerfMem in D_PID_MEM.items():
if i == PerfMem:
PID_NAME = D_PID_NAME[PID]
R_TEXT = str(PID_NAME) + ' ' + str(PerfMem) + ' KBytes'
R_L_PID_MEM_TOP_N.append(R_TEXT)
return(R_L_PID_MEM_TOP_N)
## 处理SNMP查询结果,查CPU及内存,生成字典,方便使用
'''
# 进程名
HOST-RESOURCES-MIB::hrSWRunName.1 = STRING: "System Idle Process"
HOST-RESOURCES-MIB::hrSWRunName.4 = STRING: "System"
# 进程占用CPU
HOST-RESOURCES-MIB::hrSWRunPerfCPU.1 = INTEGER: 88064
HOST-RESOURCES-MIB::hrSWRunPerfCPU.4 = INTEGER: 725
# 进程使用内存
HOST-RESOURCES-MIB::hrSWRunPerfMem.1 = INTEGER: 4 KBytes
HOST-RESOURCES-MIB::hrSWRunPerfMem.4 = INTEGER: 140 KBytes
'''
def PID_NAME_CPU_MEM(output):
D_PID_NAME = {} # Key:'进程号', Value:'进程名'
D_PID_CPU = {} # Key:'进程号', Value:进程占用CPU时间(数值)
D_PID_MEM = {} # Key:'进程号', Value:进程占用内存数值(KBytes)
RE_PID = "(HOST-RESOURCES-MIB::hrSWRunName(.*))|(HOST-RESOURCES-MIB::hrSWRunPerfCPU(.*))|(HOST-RESOURCES-MIB::hrSWRunPerfMem(.*))" # SNMP返回结果用的RE过滤规则
R = re.finditer(RE_PID, output)
L_PID_INFO = [i.group() for i in R]
for i in L_PID_INFO: # 'HOST-RESOURCES-MIB::hrSWRunName.1 = STRING: "System Idle Process"'
K,V = i.split(' = ') # 分割为 'HOST-RESOURCES-MIB::hrSWRunName.1' 和 'STRING: "System Idle Process"'
NAME,INDEX = K.split('.') # 分割为 'HOST-RESOURCES-MIB::hrSWRunName' 和 '1'
if NAME == 'HOST-RESOURCES-MIB::hrSWRunName':
VV = V.split(': ')[-1] # 从 STRING: "System Idle Process" 中提取 "System Idle Process"
D_PID_NAME[INDEX] = VV # {'1':"System Idle Process"}
elif NAME == 'HOST-RESOURCES-MIB::hrSWRunPerfCPU':
VV = int(V.split(': ')[-1])
D_PID_CPU[INDEX] = VV # {'1': 88064}
elif NAME == 'HOST-RESOURCES-MIB::hrSWRunPerfMem':
VV = V.split(': ')[-1]
PID_MEM = int(VV.split(' ')[0])
D_PID_MEM[INDEX] = PID_MEM # {'1':4}
return(D_PID_NAME, D_PID_CPU, D_PID_MEM)
def 单位转换(INT, UNIT):
if UNIT == 'Bytes':
X = round(INT/1024/1024/1024)
return(f"{X}GB")
else:
return(f"{INT}{UNIT}")
## 执行SNMP命令获取CPU、内存、硬盘分区使用率信息
## 返回 (StatusCode, SYS_INFO, D_SYS_INFO)
## StatusCode 含义: 0 成功,1 查CPU失败,2 查MEM/DISK失败,3 查CPU/MEM/DISK都失败
## SYS_INFO (文本)字典类型存储CPU/MEM/DISK的使用率百分比
def SNMP_CPU_MEM_DISK_USED(SYSTEM, SNMP_HOST, SNMP_PWD):
D_SYS_INFO = {'CPU':-1, 'MEM':-1, 'SWAP':-1, 'DISK':{}}
SYS_INFO = '' # 记录查询分析结果
StatusCode = 0 # 函数执行结果状态码,SNMP命令执行情况:0 正常,1 获取CPU使用率的SNMP命令失败,2 获取CPU/内存/硬盘信息的SNMP命令失败,3 两个全部失败
## 获取CPU使用率的SNMP命令
CMD = 'snmpwalk -v 2c -c ' + SNMP_PWD + ' ' + SNMP_HOST + ' HOST-RESOURCES-MIB::hrProcessorLoad'
(CPU_status,CPU_output) = subprocess.getstatusoutput(CMD) # 执行命令
#print CPU_status
#print CPU_output
## 获取内存、硬盘使用率的SNMP命令
CMD = 'snmpwalk -v 2c -c ' + SNMP_PWD + ' ' + SNMP_HOST + ' 1.3.6.1.2.1.25.2'
(MEM_DISK_status,ALL_output) = subprocess.getstatusoutput(CMD) # 执行命令
#print(CMD)
#print MEM_DISK_status
#print ALL_output
## 两次查询都成功再继续分析
if CPU_status == 0 and MEM_DISK_status == 0:
## CPU负载
L_CPU = [] # 存放每个核心的负载值
L = CPU_output.split('\n')
for i in L:
X = i.split('=')[-1]
Y = X.split(':')[-1]
Z = int(Y)
L_CPU.append(Z)
if L_CPU != []:
CPU_PCT = sum(L_CPU) / len(L_CPU) # 计算CPU使用率
D_SYS_INFO['CPU'] = CPU_PCT # 记录CPU使用率
SYS_INFO += f"CPU({len(L_CPU)}):{CPU_PCT:.1f}% | " # 记录CPU(核心数):使用率%
else: # SNMP信息中找不到CPU信息
SYS_INFO += f"CPU({len(L_CPU)}):ERROR | " # CPU使用率标记为错误
## 内存、磁盘分区使用率
D_Storage = {} # 字典,
PHYSICAL_MEM_ID = None # 记录物理内存对应的ID号
BUFFERS_MEM_ID = None # 记录buff内存对应的ID号
CACHED_MEM_ID = None # 记录buff/cache内存对应的ID号
#SHARED_MEM_ID = None # 记录shared内存对应的ID号
SWAP_SPACE_ID = None # 记录SWAP内存对应的ID号
L_DISK_ID = [] # 记录各分区的ID
RE_hrStorage = '(HOST-RESOURCES-MIB::hrStorageType(.*))|(HOST-RESOURCES-MIB::hrStorageDescr(.*))|(HOST-RESOURCES-MIB::hrStorageSize(.*))|(HOST-RESOURCES-MIB::hrStorageUsed(.*))|(HOST-RESOURCES-MIB::hrStorageAllocationUnits(.*))'
R = re.finditer(RE_hrStorage, ALL_output) # 从SNMP返回结果中过滤出需要的数据
L_hrStorage_INFO = [i.group() for i in R] # 过滤出的结果做成列表
#print("L_hrStorage_INFO", L_hrStorage_INFO)
if SYSTEM == 'Linux':
## Linux 计算内存使用率
for i in L_hrStorage_INFO:
K,V = i.split('=')
D_Storage[K] = V # 过滤出的结果做成字典
if V == ' STRING: Physical memory':
PHYSICAL_MEM_ID = K.split('.')[-1] # 物理内存对应的ID号
elif V == ' STRING: Cached memory':
CACHED_MEM_ID = K.split('.')[-1] # buff/cache内存对应的ID号
#elif V == ' STRING: Shared memory':
# SHARED_MEM_ID = K.split('.')[-1] # shared内存对应的ID号
elif V == ' STRING: Memory buffers':
BUFFERS_MEM_ID = K.split('.')[-1] # buff内存对应的ID号
elif V == ' STRING: Swap space':
SWAP_SPACE_ID = K.split('.')[-1] # SWAP内存对应的ID号
elif V == ' OID: HOST-RESOURCES-TYPES::hrStorageFixedDisk':
L_DISK_ID.append(K.split('.')[-1]) # 磁盘各分区对应的ID号
#print("PHYSICAL_MEM_ID", PHYSICAL_MEM_ID)
#print("BUFFERS_MEM_ID", BUFFERS_MEM_ID)
#print("CACHED_MEM_ID", CACHED_MEM_ID)
#print("SHARED_MEM_ID", SHARED_MEM_ID)
#print("SWAP_SPACE_ID", SWAP_SPACE_ID)
#print("D_Storage", D_Storage)
#for KEY in D_Storage:
# print(f"|{KEY}|{D_Storage[KEY]}|")
if PHYSICAL_MEM_ID != None and BUFFERS_MEM_ID != None and CACHED_MEM_ID != None and SWAP_SPACE_ID != None:
PHYSICAL_MEM_Size_str = D_Storage['HOST-RESOURCES-MIB::hrStorageSize.' + PHYSICAL_MEM_ID] # 内存总大小(簇数量)
PHYSICAL_MEM_Size_int = int(PHYSICAL_MEM_Size_str.split(':')[-1])
PHYSICAL_MEM_Used_str = D_Storage['HOST-RESOURCES-MIB::hrStorageUsed.' + PHYSICAL_MEM_ID] # 内存使用大小(簇数量)
PHYSICAL_MEM_Used_int = int(PHYSICAL_MEM_Used_str.split(':')[-1])
BUFFERS_MEM_Used_str = D_Storage['HOST-RESOURCES-MIB::hrStorageUsed.' + BUFFERS_MEM_ID]
BUFFERS_MEM_Used_int = int(BUFFERS_MEM_Used_str.split(':')[-1])
CACHED_MEM_Used_str = D_Storage['HOST-RESOURCES-MIB::hrStorageUsed.' + CACHED_MEM_ID]
CACHED_MEM_Used_int = int(CACHED_MEM_Used_str.split(':')[-1])
#SHARED_MEM_Used_str = D_Storage['HOST-RESOURCES-MIB::hrStorageUsed.' + SHARED_MEM_ID]
#SHARED_MEM_Used_int = int(SHARED_MEM_Used_str.split(':')[-1])
SWAP_SPACE_Size_str = D_Storage['HOST-RESOURCES-MIB::hrStorageSize.' + SWAP_SPACE_ID] # SWAP总大小(簇数量)
SWAP_SPACE_Size_int = int(SWAP_SPACE_Size_str.split(':')[-1])
SWAP_SPACE_Used_str = D_Storage['HOST-RESOURCES-MIB::hrStorageUsed.' + SWAP_SPACE_ID] # SWAP总大小(簇数量)
SWAP_SPACE_Used_int = int(SWAP_SPACE_Used_str.split(':')[-1])
#print("PHYSICAL_MEM_Size_int", PHYSICAL_MEM_Size_int)
#print("PHYSICAL_MEM_Used_int", PHYSICAL_MEM_Used_int)
#print("BUFFERS_MEM_Used_int", BUFFERS_MEM_Used_int)
#print("CACHED_MEM_Used_int", CACHED_MEM_Used_int)
#print("SHARED_MEM_Used_int", SHARED_MEM_Used_int)
#print("SWAP_SPACE_Size_int", SWAP_SPACE_Size_int)
#print("SWAP_SPACE_Used_int", SWAP_SPACE_Used_int)
## 计算物理内存大小
MEM_Unit_str = D_Storage['HOST-RESOURCES-MIB::hrStorageAllocationUnits.'+PHYSICAL_MEM_ID] # 分区簇大小
#print("MEM_Unit_str", MEM_Unit_str)
Unit_INT, Unit_STR = MEM_Unit_str.split(':')[-1].split()
MEM_Unit = (int(Unit_INT), Unit_STR) # (簇大小, 单位)
#print(f"MEM_Unit={MEM_Unit}")
PHYSICAL_MEM_GB = 单位转换(PHYSICAL_MEM_Size_int*MEM_Unit[0], MEM_Unit[1])
## 计算交换分区大小
SWAP_Unit_str = D_Storage['HOST-RESOURCES-MIB::hrStorageAllocationUnits.'+SWAP_SPACE_ID] # 分区簇大小
#print("SWAP_Unit_str", SWAP_Unit_str)
Unit_INT, Unit_STR = SWAP_Unit_str.split(':')[-1].split()
SWAP_Unit = (int(Unit_INT), Unit_STR) # (簇大小, 单位)
#print(f"SWAP_Unit={SWAP_Unit}")
SWAP_GB = 单位转换(PHYSICAL_MEM_Size_int*SWAP_Unit[0], SWAP_Unit[1])
## Linux 计算物理内存使用率
## 【free命令 total】 = hrStorageSize.1(Physical memory 物理内存总容量)
## 【free命令 free 】 = hrStorageSize.1(Physical memory 物理内存总容量) - hrStorageUsed.1(Physical memory 物理内存已经使用容量)
## 【free命令 used 】 = total - free - buff/cache
## 【free命令 buff/cache】 = hrStorageUsed.6(Memory buffers) + hrStorageUsed.7(Cached memory)
## 内存使用量 = hrStorageSize.1 - (hrStorageSize.1 - hrStorageUsed.1)- (hrStorageUsed.6 + hrStorageUsed.7)
## 内存使用量(公式简化) = hrStorageUsed.1 - hrStorageUsed.6 - hrStorageUsed.7
## 内存使用率 = 内存使用量 / 内存总容量
## 内存使用率 = (hrStorageUsed.1 - hrStorageUsed.6 - hrStorageUsed.7) / hrStorageSize.1
MEM_PCT = ((PHYSICAL_MEM_Used_int - BUFFERS_MEM_Used_int - CACHED_MEM_Used_int) * 100) / PHYSICAL_MEM_Size_int # 计算内存使用率(百分比)
SYS_INFO += f"MEM({PHYSICAL_MEM_GB}):{MEM_PCT:.1f}% | " # 记录MEM(总大小(GB)):使用率%
D_SYS_INFO['MEM'] = MEM_PCT # 记录内存使用率
## Linux 计算SWAP使用率
SWAP_PCT = (SWAP_SPACE_Used_int * 100) / SWAP_SPACE_Size_int
#print("SWAP_PCT", SWAP_PCT)
SYS_INFO += f"SWAP({SWAP_GB}):{SWAP_PCT:.1f}% | " # 记录SWAP_PCT(总大小(GB)):使用率%
D_SYS_INFO['SWAP'] = SWAP_PCT # 记录SWAP使用率
else: # SNMP信息中获取内存或交换分区失败
SYS_INFO += "MEM(ERROR) " # 内存查询失败
SYS_INFO += "SWAP(ERROR) " # SWAP查询失败
elif SYSTEM == 'Windows':
## Windows 计算内存使用率
MEM_ID = None # 记录物理内存对应的ID号
V_MEM_ID = None # 记录虚拟内存对应的ID号
for i in L_hrStorage_INFO:
K,V = i.split('=')
D_Storage[K] = V
if V == ' OID: HOST-RESOURCES-TYPES::hrStorageRam':
MEM_ID = K.split('.')[-1] # 物理内存对应的ID号
elif V == ' OID: HOST-RESOURCES-TYPES::hrStorageVirtualMemory':
V_MEM_ID = K.split('.')[-1] # 虚拟内存对应的ID号
elif V == ' OID: HOST-RESOURCES-TYPES::hrStorageFixedDisk':
L_DISK_ID.append(K.split('.')[-1]) # 磁盘各分区对应的ID号
if MEM_ID != None:
## 计算使用率
MEM_Size_str = D_Storage['HOST-RESOURCES-MIB::hrStorageSize.'+MEM_ID] # 内存总大小(簇数量)
MEM_Size_int = int(MEM_Size_str.split(':')[-1])
MEM_Used_str = D_Storage['HOST-RESOURCES-MIB::hrStorageUsed.'+MEM_ID] # 内存使用大小(簇数量)
MEM_Used_int = int(MEM_Used_str.split(':')[-1])
#print("MEM_Size_str", MEM_Size_str)
#print("MEM_Size_int", MEM_Size_int)
#print("MEM_Used_str", MEM_Used_str)
#print("MEM_Used_int", MEM_Used_int)
MEM_PCT = (MEM_Used_int * 100) / MEM_Size_int # 计算百分比
#print("MEM_PCT", MEM_PCT)
## 计算大小
MEM_Unit_str = D_Storage['HOST-RESOURCES-MIB::hrStorageAllocationUnits.'+MEM_ID] # 分区簇大小
#print("MEM_Unit_str", MEM_Unit_str)
Unit_INT, Unit_STR = MEM_Unit_str.split(':')[-1].split()
MEM_Unit = (int(Unit_INT), Unit_STR) # (簇大小, 单位)
#print(f"MEM_Unit={MEM_Unit}")
## 记录
SYS_INFO += f"MEM({单位转换(MEM_Size_int*MEM_Unit[0], MEM_Unit[1])}):{MEM_PCT:.1f}% | " # 记录MEM(总大小(GB)):使用率%
D_SYS_INFO['MEM'] = MEM_PCT # 记录内存使用率
else:
SYS_INFO += "MEM(ERROR) | " # 内存查询失败
if V_MEM_ID != None:
## 计算使用率
V_MEM_Size_str = D_Storage['HOST-RESOURCES-MIB::hrStorageSize.'+V_MEM_ID] # 内存总大小(簇数量)
V_MEM_Size_int = int(V_MEM_Size_str.split(':')[-1])
V_MEM_Used_str = D_Storage['HOST-RESOURCES-MIB::hrStorageUsed.'+V_MEM_ID] # 内存使用大小(簇数量)
V_MEM_Used_int = int(V_MEM_Used_str.split(':')[-1])
V_MEM_PCT = (V_MEM_Used_int * 100) / V_MEM_Size_int # 计算百分比
## 计算大小
V_MEM_Unit_str = D_Storage['HOST-RESOURCES-MIB::hrStorageAllocationUnits.'+V_MEM_ID] # 分区簇大小
#print("V_MEM_Unit_str", V_MEM_Unit_str)
Unit_INT, Unit_STR = V_MEM_Unit_str.split(':')[-1].split()
V_MEM_Unit = (int(Unit_INT), Unit_STR) # (簇大小, 单位)
#print(f"V_MEM_Unit={V_MEM_Unit}")
## 记录
SYS_INFO += f"V_MEM({单位转换(V_MEM_Size_int*V_MEM_Unit[0], V_MEM_Unit[1])}):{V_MEM_PCT:.1f}% | " # 记录V_MEM(总大小(GB)):使用率%
D_SYS_INFO['SWAP'] = V_MEM_PCT # 记录SWAP使用率
else:
SYS_INFO += "V_MEM(ERROR) | " # SWAP查询失败
else:
## 非 Linux 非 Windows 系统
SYS_INFO += 'NOT Linux/Windows'
## 计算磁盘分区使用率
if L_DISK_ID != []:
L_DISK_PATH_INFO = []
for i in L_DISK_ID:
DISK_Descr = D_Storage['HOST-RESOURCES-MIB::hrStorageDescr.'+i].split('STRING: ')[-1] # 分区盘符等信息
if SYSTEM == 'Windows':
DISK_Descr = DISK_Descr[0:2] # Windows系统简化分区盘符描述,只保留 'C:' 'D:' 形式
DISK_Unit_str = D_Storage['HOST-RESOURCES-MIB::hrStorageAllocationUnits.'+i] # 分区簇大小
#print("DISK_Unit_str", DISK_Unit_str)
Unit_INT, Unit_STR = DISK_Unit_str.split(':')[-1].split()
DISK_Unit = (int(Unit_INT), Unit_STR) # (簇大小, 单位)
#print(f"DISK_Unit={DISK_Unit}")
DISK_Size_str = D_Storage['HOST-RESOURCES-MIB::hrStorageSize.'+i] # 分区总大小(簇数量)
DISK_Size_int = int(DISK_Size_str.split(':')[-1])
DISK_Used_str = D_Storage['HOST-RESOURCES-MIB::hrStorageUsed.'+i] # 分区已用大小(簇数量)
DISK_Used_int = int(DISK_Used_str.split(':')[-1])
DISK_PCT = (DISK_Used_int * 100) / DISK_Size_int # 计算当前分区使用率
D_SYS_INFO['DISK'][DISK_Descr] = (round(DISK_PCT, 1), DISK_Size_int*DISK_Unit[0], DISK_Unit[1]) # 记录当前分区(使用率,总大小,单位)
L_DISK_PATH_INFO.append(f"{DISK_Descr}({单位转换(DISK_Size_int*DISK_Unit[0], DISK_Unit[1])}):{DISK_PCT:.1f}%")
SYS_INFO += 'DISK_PATH: '
SYS_INFO += ', '.join(L_DISK_PATH_INFO)
else: ## SNMP信息中获取磁盘分区信息失败
SYS_INFO += f"DISK_PATH: ERROR"
else:
## 记录查询失败的情况,0 成功,1 查CPU失败,2 查MEM/DISK失败,3 查CPU/MEM/DISK都失败
if CPU_status != 0:
StatusCode += 1
SYS_INFO += '【CPU 查询失败】'
if MEM_DISK_status != 0:
StatusCode += 2
SYS_INFO += '【MEM/DISK 查询失败】'
return(StatusCode, SYS_INFO, D_SYS_INFO)
def 普通_执行(TASK):
SYSTEM, SNMP_PARAM, ALARM_PARAM, SAVE_FILE_PATH = TASK
SNMP_IP, SNMP_PORT, SNMP_PWD = SNMP_PARAM
CPU_ALARM, MEM_ALARM, SWAP_ALARM, DISK_ALARM, CPU_TOP_N, MEM_TOP_N = ALARM_PARAM
L_ALARM = [] # 超阈值项目列表
L_ERROR = [] # 错误信息列表
L_CPU_TOP = [] # 当CPU超过阈值时记录CPU使用率最高的N个进程信息
L_MEM_TOP = [] # 当MEM超过阈值时记录MEM使用率最高的N个进程信息
SNMP_HOST = SNMP_IP + ':' + str(SNMP_PORT)
StatusCode, SYS_INFO, D_SYS_INFO = SNMP_CPU_MEM_DISK_USED(SYSTEM, SNMP_HOST, SNMP_PWD)
if StatusCode == 0: # SNMP 执行成功
## 检查 CPU/MEM 查询结果
## 当 CPU 或 MEM 任意一个超过阈值时再次查SNMP记录进程信息
if D_SYS_INFO['CPU'] > CPU_ALARM or D_SYS_INFO['MEM'] > MEM_ALARM:
CMD = 'snmpwalk -v 2c -c ' + SNMP_PWD + ' ' + SNMP_HOST + ' 1.3.6.1.2.1.25' ## 获取CPU、内存、硬盘、进程信息的SNMP命令
#print("再次执行SNMP命令查询进程信息")
(status,output) = subprocess.getstatusoutput(CMD) # 执行命令
if status == 0:
D_PID_NAME, D_PID_CPU, D_PID_MEM = PID_NAME_CPU_MEM(output)
## CPU 超过阈值,记录CPU占用最高的前N个进程信息
if D_SYS_INFO['CPU'] > CPU_ALARM:
L_ALARM.append('CPU>'+str(CPU_ALARM))
L_CPU_TOP = SHOW_PID_CPU_TOP_N(CPU_TOP_N, D_PID_NAME, D_PID_CPU)
## MEM 超过阈值,记录MEM占用最高的前N个进程信息
if D_SYS_INFO['MEM'] > MEM_ALARM:
L_ALARM.append('MEM>'+str(MEM_ALARM))
L_MEM_TOP = SHOW_PID_MEM_TOP_N(MEM_TOP_N, D_PID_NAME, D_PID_MEM)
else:
L_ERROR.append('SNMP 查 CPU/MEM Uesd Top 失败')
## 检查 SWAP 查询结果
SWAP_PCT = D_SYS_INFO['SWAP']
if SWAP_PCT == 'ERROR':
L_ERROR.append('查SWAP失败')
else:
if SWAP_PCT > SWAP_ALARM: # SWAP 使用率超过阈值
L_ALARM.append('SWAP>'+str(SWAP_ALARM))
## 分析磁盘分区使用率
if D_SYS_INFO['DISK'] == {}:
L_ERROR.append('查DISK失败')
else:
for K in D_SYS_INFO['DISK']:
if D_SYS_INFO['DISK'][K][0] > DISK_ALARM: # 磁盘分区使用率超过阈值
L_ALARM.append('DISK PATH '+K+'>'+str(DISK_ALARM))
elif StatusCode == 1:
L_ERROR.append('查CPU失败')
elif StatusCode == 2:
L_ERROR.append('查MEM/DISK失败')
elif StatusCode == 3:
L_ERROR.append('查CPU/MEM/DISK都失败')
## SNMP查询结果
# D_SYS_INFO = {'CPU':'', 'MEM':'', 'SWAP':'', 'DISK':{}}
if SAVE_FILE_PATH == '':
LOG_FILE = SYSTEM+'_'+SNMP_IP + '_' + SNMP_PORT + '.log'
else:
LOG_FILE = SAVE_FILE_PATH
TIME = time.strftime('%Y-%m-%d %H:%M:%S')
HOST = f"{SNMP_IP}:{SNMP_PORT}"
LOG_TEXT = f"{HOST:21s} {TIME} | {SYS_INFO}"
ALARM_TEXT = '' # 告警信息,SNMP执行出错,指标超过阈值
if L_ERROR != []:
ALARM_TEXT += 'SNMP ERROR:\n'
for i in L_ERROR:
ALARM_TEXT += ' {0}\n'.format(i)
if L_ALARM != []:
ALARM_TEXT += 'SYS ALARM:\n'
for i in L_ALARM:
ALARM_TEXT += ' {0}\n'.format(i)
if L_CPU_TOP != []:
ALARM_TEXT += ' CPU_TOP_{0}\n'.format(CPU_TOP_N)
for i in L_CPU_TOP:
ALARM_TEXT += ' {0}\n'.format(i)
if L_MEM_TOP != []:
ALARM_TEXT += ' MEM_TOP_{0}\n'.format(MEM_TOP_N)
for i in L_MEM_TOP:
ALARM_TEXT += ' {0}\n'.format(i)
if ALARM_TEXT != '':
ALARM_TEXT = HOST + ' ' + ALARM_TEXT
return((LOG_TEXT, ALARM_TEXT, SAVE_FILE_PATH))
def 显示任务进度(q, 计时开始时间, 显示时间间隔, 任务数量):
while 1:
time.sleep(显示时间间隔)
if q.empty():
continue
else:
LEN_NOW = q.qsize()
print(f"进度 {LEN_NOW/任务数量*100:.0f}% ({LEN_NOW}/{任务数量}) 已用时 {int(time.time()-计时开始时间)} 秒")
if q.qsize() == 任务数量:
print("完成")
break # 终止线程
## 记录并分析SNMP返回结果
def 线程_执行(q, TASK):
SYSTEM, SNMP_PARAM, ALARM_PARAM, SAVE_FILE_PATH = TASK
SNMP_IP, SNMP_PORT, SNMP_PWD = SNMP_PARAM
CPU_ALARM, MEM_ALARM, SWAP_ALARM, DISK_ALARM, CPU_TOP_N, MEM_TOP_N = ALARM_PARAM
L_ALARM = [] # 超阈值项目列表
L_ERROR = [] # 错误信息列表
L_CPU_TOP = [] # 当CPU超过阈值时记录CPU使用率最高的N个进程信息
L_MEM_TOP = [] # 当MEM超过阈值时记录MEM使用率最高的N个进程信息
SNMP_HOST = SNMP_IP + ':' + str(SNMP_PORT)
StatusCode, D_SYS_INFO = SNMP_CPU_MEM_DISK_USED(SYSTEM, SNMP_HOST, SNMP_PWD)
if StatusCode == 0: # SNMP 执行成功
## 检查 CPU/MEM 查询结果
## 当 CPU 或 MEM 任意一个超过阈值时再次查SNMP记录进程信息
if D_SYS_INFO['CPU'] > CPU_ALARM or D_SYS_INFO['MEM'] > MEM_ALARM:
CMD = 'snmpwalk -v 2c -c ' + SNMP_PWD + ' ' + SNMP_HOST + ' 1.3.6.1.2.1.25' ## 获取CPU、内存、硬盘、进程信息的SNMP命令
#print("再次执行SNMP命令查询进程信息")
(status,output) = subprocess.getstatusoutput(CMD) # 执行命令
if status == 0:
D_PID_NAME, D_PID_CPU, D_PID_MEM = PID_NAME_CPU_MEM(output)
## CPU 超过阈值,记录CPU占用最高的前N个进程信息
if D_SYS_INFO['CPU'] > CPU_ALARM:
L_ALARM.append('CPU>'+str(CPU_ALARM))
L_CPU_TOP = SHOW_PID_CPU_TOP_N(CPU_TOP_N, D_PID_NAME, D_PID_CPU)
## MEM 超过阈值,记录MEM占用最高的前N个进程信息
if D_SYS_INFO['MEM'] > MEM_ALARM:
L_ALARM.append('MEM>'+str(MEM_ALARM))
L_MEM_TOP = SHOW_PID_MEM_TOP_N(MEM_TOP_N, D_PID_NAME, D_PID_MEM)
else:
L_ERROR.append('SNMP 查 CPU/MEM Uesd Top 失败')
## 检查 SWAP 查询结果
SWAP_PCT = D_SYS_INFO['SWAP']
if SWAP_PCT == 'ERROR':
L_ERROR.append('查SWAP失败')
else:
if SWAP_PCT > SWAP_ALARM: # SWAP 使用率超过阈值
L_ALARM.append('SWAP>'+str(SWAP_ALARM))
## 分析磁盘分区使用率
if D_SYS_INFO['DISK'] == {}:
L_ERROR.append('查DISK失败')
else:
for K in D_SYS_INFO['DISK']:
if D_SYS_INFO['DISK'][K] > DISK_ALARM: # 磁盘分区使用率超过阈值
L_ALARM.append('DISK PATH '+K+'>'+str(DISK_ALARM))
elif StatusCode == 1:
L_ERROR.append('查CPU失败')
elif StatusCode == 2:
L_ERROR.append('查MEM/DISK失败')
elif StatusCode == 3:
L_ERROR.append('查CPU/MEM/DISK都失败')
## SNMP查询结果
# D_SYS_INFO = {'CPU':'', 'MEM':'', 'SWAP':'', 'DISK':{}}
if SAVE_FILE_PATH == '':
LOG_FILE = SYSTEM+'_'+SNMP_IP + '_' + SNMP_PORT + '.log'
else:
LOG_FILE = SAVE_FILE_PATH
TIME = time.strftime('%Y-%m-%d %H:%M:%S')
#LOG_TEXT = '{0} CPU: {1}% MEM: {2}% SWAP: {3}% DISK:{4}\n'.format(TIME, D_SYS_INFO['CPU'], D_SYS_INFO['MEM'], D_SYS_INFO['SWAP'], D_SYS_INFO['DISK'])
HOST = f"{SNMP_IP}:{SNMP_PORT}"
LOG_TEXT = f"{HOST:21s} {TIME} CPU:{D_SYS_INFO['CPU']:5.1f}% MEM:{D_SYS_INFO['MEM']:5.1f}% SWAP:{D_SYS_INFO['SWAP']:5.1f}% DISK:{D_SYS_INFO['DISK']}"
ALARM_TEXT = '' # 告警信息,SNMP执行出错,指标超过阈值
if L_ERROR != []:
ALARM_TEXT += 'SNMP ERROR:\n'
for i in L_ERROR:
ALARM_TEXT += ' {0}\n'.format(i)
if L_ALARM != []:
ALARM_TEXT += 'SYS ALARM:\n'
for i in L_ALARM:
ALARM_TEXT += ' {0}\n'.format(i)
if L_CPU_TOP != []:
ALARM_TEXT += ' CPU_TOP_{0}\n'.format(CPU_TOP_N)
for i in L_CPU_TOP:
ALARM_TEXT += ' {0}\n'.format(i)
if L_MEM_TOP != []:
ALARM_TEXT += ' MEM_TOP_{0}\n'.format(MEM_TOP_N)
for i in L_MEM_TOP:
ALARM_TEXT += ' {0}\n'.format(i)
if ALARM_TEXT != '':
ALARM_TEXT = HOST + ' ' + ALARM_TEXT
q.put((LOG_TEXT, ALARM_TEXT, SAVE_FILE_PATH))
def 多线程_执行(L_TASK, 并发数量):
q = Queue() # 创建一个队列
t1 = ThreadPoolExecutor(并发数量) # 同时运行数量,不填则默认为cpu的个数*5
for TASK in L_TASK:
try:
obj=t1.submit(线程_执行, q, TASK)
except Exception as e:
print(f"线程执行失败 {e}")
t2 = ThreadPoolExecutor(1) # 新开一个线程,用于显示进度
计时开始时间 = time.time()
显示时间间隔 = 2 # 秒
任务数量 = len(L_TASK) # 任务数量
obj=t2.submit(显示任务进度, q, 计时开始时间, 显示时间间隔, 任务数量)
t1.shutdown()
t2.shutdown()
L_R = []
while 1:
if q.empty():
break
L_R.append(q.get())
return(L_R)
def 顺序执行(L_TASK, ALARM_SAVE_DIR, SHOW_or_SAVE, MAIL, MAIL_PARAM):
if SHOW_or_SAVE == 'SHOW':
for TASK in L_TASK:
LOG_TEXT, ALARM_TEXT, SAVE_FILE_PATH = 普通_执行(TASK)
print(LOG_TEXT)
if ALARM_TEXT != '':
print(ALARM_TEXT)
else:
ALARM_TEXT_ALL = ''
for TASK in L_TASK:
LOG_TEXT, ALARM_TEXT, SAVE_FILE_PATH = 普通_执行(TASK)
with open(SAVE_FILE_PATH, 'a') as f:
f.write(LOG_TEXT+'\n')
if ALARM_TEXT != '':
ALARM_TEXT_ALL += ALARM_TEXT + '\n'
if ALARM_TEXT_ALL != '':
if os.path.isdir(ALARM_SAVE_DIR):
pass
else:
os.makedirs(ALARM_SAVE_DIR)
ALARM_TIME = time.strftime('%Y%m%d_%H%M%S')
ALARM_SAVE_PATH = os.path.join(ALARM_SAVE_DIR, f"{ALARM_TIME}.ALARM")
with open(ALARM_SAVE_PATH, 'w') as f:
f.write(ALARM_TEXT_ALL)
if MAIL == 1:
邮件服务器,发件人邮箱账号,发件人邮箱密码,收件人邮箱地址 = MAIL_PARAM
L_邮件任务 = []
主题 = f'服务器SNMP告警'
邮件正文 = ALARM_TEXT_ALL
邮件任务 = (收件人邮箱地址, 主题, 邮件正文)
L_邮件任务.append(邮件任务)
批量发邮件(邮件服务器, 发件人邮箱账号, 发件人邮箱密码, L_邮件任务)
def 并发执行(L_TASK, ALARM_SAVE_DIR, SHOW_or_SAVE, MAIL):
并发数量 = len(L_TASK)
R = 多线程_执行(L_TASK, 并发数量)
if SHOW_or_SAVE == 'SHOW':
for LOG_TEXT, ALARM_TEXT, SAVE_FILE_PATH in R:
print(LOG_TEXT)
if ALARM_TEXT != '':
print(ALARM_TEXT)
else:
ALARM_TEXT_ALL = ''
for LOG_TEXT, ALARM_TEXT, SAVE_FILE_PATH in R:
with open(SAVE_FILE_PATH, 'a') as f:
f.write(LOG_TEXT+'\n')
if ALARM_TEXT != '':
ALARM_TEXT_ALL += ALARM_TEXT + '\n'
if ALARM_TEXT_ALL != '':
if os.path.isdir(ALARM_SAVE_DIR):
pass
else:
os.makedirs(ALARM_SAVE_DIR)
ALARM_TIME = time.strftime('%Y%m%d_%H%M%S')
ALARM_SAVE_PATH = os.path.join(ALARM_SAVE_DIR, f"{ALARM_TIME}.ALARM")
with open(ALARM_SAVE_PATH, 'w') as f:
f.write(ALARM_TEXT_ALL)
if MAIL == 1:
SMTP_SERVER = "smtp.邮件服务器.com"
发件人 = "邮箱账号"
邮箱密码 = "邮箱密码"
L_邮件任务 = []
收件人 = "邮箱账号"
主题 = f'服务器SNMP告警'
邮件正文 = ALARM_TEXT_ALL
邮件任务 = (收件人, 主题, 邮件正文)
L_邮件任务.append(邮件任务)
批量发邮件(SMTP_SERVER, 发件人, 邮箱密码, L_邮件任务)
if __name__ == '__main__':
## 运行前设置参数
## SNMP参数设置
#SNMP_IP = '192.168.250.22' # SNMP被监控机的SNMP地址
#SNMP_PORT = '161' # SNMP被监控机的SNMP端口
#SNMP_PWD = 'public' # SNMP被监控机的SNMP密码
## 报警参数设置
CPU_ALARM = 80 # CPU告警阈值(0-100)推荐 80
MEM_ALARM = 80 # 内存告警阈值(0-100)推荐 80
SWAP_ALARM = 80 # SWAP告警阈值(0-100)推荐 80
DISK_ALARM = 80 # 分区使用率告警阈值(0-100)推荐 80
CPU_TOP_N = 5 # 报警时显示N个CPU使用最高的进程
MEM_TOP_N = 5 # 报警时显示N个MEM使用最高的进程
ALARM_PARAM = [CPU_ALARM, MEM_ALARM, SWAP_ALARM, DISK_ALARM, CPU_TOP_N, MEM_TOP_N]
## 发邮件参数设置
SMTP_SERVER = 'smtp.XXX.com' # 发送者邮箱的SMTP服务器地址
SENDER_USER = 'ABCg@XXX.com' # 发送者用户名
SENDER_PASS = 'pwd@000' # 发送者密码
RECEIVER = 'A@X.com' # 接收者的邮箱 receiver
MAIL_PARAM = [SMTP_SERVER, SENDER_USER, SENDER_PASS, RECEIVER]
## 其他参数设置
ALARM_SAVE_DIR = '/home/it/ALARM/' # 告警信息本地存储目录
SHOW_or_SAVE = 'SAVE' # 'SHOW' 显示SNMP查询结果,'SAVE'保存SNMP结果
MAIL = 0 # 是否发邮件标志,0不发,1发送(当执行SNMP出错或者指标超过阈值时)
L_TASK = []
# 填 Linux 或 Windows 根据系统不同采用不同的CPU、内存计算方式
L_TASK.append(('Linux', ['192.168.100.2', '161', 'snmp密码'], ALARM_PARAM, '/home/it/SNMP/192.168.100.2.log'))
L_TASK.append(('Windows', ['192.168.200.2', '161', 'snmp密码'], ALARM_PARAM, '/home/it/SNMP/192.168.200.2.log'))
## 运行
顺序执行(L_TASK, ALARM_SAVE_DIR, SHOW_or_SAVE, MAIL, MAIL_PARAM)
#并发执行(L_TASK, ALARM_SAVE_DIR, SHOW_or_SAVE, MAIL) # 有问题未解决
## 加入系统计划任务
## /usr/bin/python3 /home/it/PY/SNMP.py >> /home/it/PY/py.log