【背景:】公司最近一段时间虚拟机问题较多,由此引申出来对存储交换机的监控。
【目标:】
1、交换机端口state 由online转为no_light的时候,邮件告警交换机IP,对应端口以及对应的主机。【频次5分钟一次】
2、交换机光衰:交换机端口 为online,且rx power <300的时候,则发送告警邮件【一天两次】
以上规则由相应的运维人员提出,因厂而异
【实现过程:】
1、连接交换机并执行命令switchshow,sfpshow,alishow
2、将命令结果保存至数据库
3、对数据库数据进行分析。数据库为mysql8.0,若版本过低,则可能无法执行row number over (partition by )的语句
4、发送邮件
【代码:】
1.py 实现目标1
入库及online->no_light分析
# _*_ coding=utf-8 _*_
import paramiko
import datetime,time,re
import os
import sys,logging
# import socket
import platform
import xlwt,pymysql,gc
import smtplib,base64
import email.mime.text
import email.mime.multipart
# from email.mime.text import MIMEText
# from email.mime.multipart import MIMEMultipart
from email.mime.application import MIMEApplication
day = time.strftime('%Y%m%d',time.localtime())
inserttime = time.strftime('%Y%m%d%H%M',time.localtime())
inserttime_read=inserttime[0:4]+"年"+inserttime[4:6]+"月"+inserttime[6:8]+"日 "+inserttime[8:10]+":"+inserttime[10:12]
h = time.strftime('%Y%m%d%H',time.localtime())
def sendmail(context,email_subject,file_excel_path= r"/root/switch/excel/SanSwitch", file_problem="SanSwitch_problem" ):
form_adder = 'xxx@163.com'
str_pwd = base64.b64decode("RGVkYzM0NTQjds").decode("utf-8") ##应公司要求,做了个简单的加密
to_adder = ['xxx@xxx.com','xxx@xxx.com']
smtp_server = 'xxxx.com'
msg = email.mime.multipart.MIMEMultipart()
txt = email.mime.text.MIMEText(context, 'plain', 'utf-8')
msg['From'] = form_adder
msg['To'] = ','.join(to_adder)
msg['subject'] = str(inserttime_read)+ email_subject
msg.attach(txt)
ret = True
try:
server = smtplib.SMTP(smtp_server,25)
# tls 方式验证
server.starttls()
server.login(form_adder,str_pwd)
server.sendmail(form_adder,to_adder,msg.as_string())
server.quit()
except Exception as e:
print(e)
ret = False
return ret
def ssh_connect(ip,username,password,cmd,port=22):
a=[]
b=[]
c=[]
ssh = paramiko.SSHClient() # 创建一个ssh客户端对象
ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy()) # 设置以什么方式连接远程客户端,这里配置自动协商
try:
ssh.connect(hostname=ip, port=2222, username=username, password=password) # 通过账号密码连接远程客户端
for n in range(len(cmd)):
stdin, stdout, stderr = ssh.exec_command(cmd[n]) # 远程执行命令,结果会返回标准输入、标准输出、标准错误输出
# print(cmd[n])
if n==0:
for i in stdout.readlines(): # *******************************************************
i = i.strip() # * 祝 *
j = i.split() # * 祖 *
a.append((' '.join(j))) # * 国 *
if n==1: # * 繁 *
for i in stdout.readlines(): # * 荣 *
i = i.strip() # * 富 *
j = i.split() # * 强 *
b.append((' '.join(j))) # * 国 *
if n==2: # * 泰 *
for i in stdout.readlines(): # * 民 *
i = i.strip() # * 安 *
j = i.split() # * *
c.append((' '.join(j))) # ********************************************************
ssh.close()
return a,b,c
except Exception as e:
print("Exception:",e)
connect_error_ip.append(ip)
return a,b,c
#连接数据库
conn1=pymysql.connect(host='xx.xx.xx.xx',user='user',password=base64.b64decode("SUplVmhYTYSIDD82ZhNWpI").decode("utf-8"),database='devops', charset='utf8')
cursor=conn1.cursor()
##设计3个表
SQL_create_switchshow='create table if not exists pvs_switchshow' \
' (id int NOT NULL AUTO_INCREMENT,' \
'insertTime varchar(45) COLLATE utf8_bin DEFAULT NULL,' \
'switchname varchar(20),ip varchar(16) not null ,' \
'Index_switch varchar(5),' \
'port varchar(5),' \
'Address varchar(10),' \
'Media varchar(5),' \
'Speed varchar(5),' \
'State varchar(20),' \
'proto varchar(10),' \
'proto_port varchar(10),' \
'wwn varchar(30),primary key(id) )'
SQL_create_sftpshow = " CREATE TABLE if not exists `pvs_sfpshow` ( `id` INT NOT NULL AUTO_INCREMENT, " \
"insertTime VARCHAR(45) COLLATE UTF8_BIN DEFAULT NULL, " \
"IP VARCHAR(45) COLLATE UTF8_BIN DEFAULT NULL, " \
"port VARCHAR(45)CHARACTER SET UTF8 COLLATE UTF8_BIN DEFAULT NULL, " \
"rx_dbm VARCHAR(45)CHARACTER SET UTF8 COLLATE UTF8_BIN DEFAULT NULL, " \
"rx_uw VARCHAR(45)CHARACTER SET UTF8 COLLATE UTF8_BIN DEFAULT NULL, " \
"tx_dbm VARCHAR(45)CHARACTER SET UTF8 COLLATE UTF8_BIN DEFAULT NULL, " \
"tx_uw VARCHAR(45)CHARACTER SET UTF8 COLLATE UTF8_BIN DEFAULT NULL, PRIMARY KEY (`id`)) " \
"ENGINE=INNODB AUTO_INCREMENT=18183 DEFAULT CHARSET=UTF8MB3 COLLATE = UTF8_BIN"
SQL_create_alishow = "CREATE TABLE if not exists `pvs_alishow` ( `id` INT NOT NULL AUTO_INCREMENT, " \
"`insertTime` VARCHAR(45) COLLATE UTF8_BIN DEFAULT NULL, " \
"IP VARCHAR(45) COLLATE UTF8_BIN DEFAULT NULL, " \
" `aliasname` VARCHAR(45)CHARACTER SET UTF8 COLLATE UTF8_BIN DEFAULT NULL, " \
" `wwn` VARCHAR(200)CHARACTER SET UTF8 COLLATE UTF8_BIN DEFAULT NULL, " \
" PRIMARY KEY (`id`)) " \
"ENGINE=INNODB AUTO_INCREMENT=18183 DEFAULT CHARSET=UTF8MB3 COLLATE = UTF8_BIN"
##数据库建索引
'''
create index sfpshow_inserttime on devops.pvs_sfpshow(inserttime);
create index switchshow_inserttime on devops.pvs_switchshow(inserttime);
create index alishow_inserttime on devops.pvs_alishow(inserttime)
'''
#创建三张表
# print(SQL_create_switchshow)
cursor.execute(SQL_create_switchshow)
cursor.execute(SQL_create_sftpshow)
cursor.execute(SQL_create_alishow)
## 命令列表
cmd = ["switchshow","sfpshow -all|grep -E 'TX|RX|Port'","alishow"]
sfpshow_problem_list=[] ##光衰问题列表
count=0 ##光衰问题计数
switchshow_problem_list=[] #问题交换机列表
connect_error_ip=[]
ip_list=['XX.XX','XX.XX']
for ip in ip_list:
l1,l2,l3 = ssh_connect(ip=ip,port=22,username="XXXX",password="XXXX",cmd=cmd)#此处阔以加密
if l1 != []:
switchName = l1[0].split(":")[1].strip()
# print(switchName)
##switchshow 命令结果进行入库
for i in range(len(l1)):
if re.match("^\d",l1[i]):
#print(l1[i])
if l1[i].split()[5] == 'Online':
# print(l1[i].split()[0], l1[i].split()[5],l1[i].split()[8])
if len(l1[i].split()[8]) != 1:
# print(len(l1[i].split()[8]))
SQL1="insert into devops.pvs_switchshow (insertTime,switchName,ip,Index_switch,port,Address,Media,Speed,State,proto,proto_port,wwn)" \
" values ("+inserttime+",\""+switchName+"\","+\
"\""+ip+"\","+\
"\""+l1[i].split()[0]+"\","+\
"\""+l1[i].split()[1]+"\","+\
"\""+l1[i].split()[2]+"\","+\
"\""+l1[i].split()[3]+"\","+\
"\""+l1[i].split()[4]+"\","+\
"\""+l1[i].split()[5]+"\","+\
"\""+l1[i].split()[6]+"\","+\
"\""+l1[i].split()[7]+"\","+\
"\""+l1[i].split()[8] +"\")"
print(SQL1)
cursor.execute(SQL1)
conn1.commit()
else:
# print("特殊情况")
# print(len(l1[i].split()[8]))
t=l1[i].split()[8]+' '+l1[i].split()[9]+' '+l1[i].split()[10]+' '+l1[i].split()[11]+' '+l1[i].split()[12]+' ' + \
l1[i].split()[13] + ' '+l1[i].split()[14]
# print(t)
SQL1 = "insert into devops.pvs_switchshow (insertTime,switchName,ip,Index_switch,port,Address,Media,Speed,State,proto,proto_port,wwn)" \
" values ("+inserttime+",\""+switchName+"\","+\
"\""+ip+"\","+\
"\""+l1[i].split()[0]+"\","+\
"\""+l1[i].split()[1]+"\","+\
"\""+l1[i].split()[2]+"\","+\
"\""+l1[i].split()[3]+"\","+\
"\""+l1[i].split()[4]+"\","+\
"\""+l1[i].split()[5]+"\","+\
"\""+l1[i].split()[6]+"\","+\
"\""+l1[i].split()[7]+"\","+\
'\"' + t + '\")'
print(SQL1)
cursor.execute(SQL1)
conn1.commit()
else:
#print(l1[i].split()[0], l1[i].split()[5])
SQL1 = "insert into devops.pvs_switchshow (insertTime,switchName,ip,Index_switch,port,Address,Media,Speed,State,proto)" \
" values ("+inserttime+",\""+switchName+"\","+\
"\""+ip+"\","+\
"\""+l1[i].split()[0]+"\","+\
"\""+l1[i].split()[1]+"\","+\
"\""+l1[i].split()[2]+"\","+\
"\""+l1[i].split()[3]+"\","+\
"\""+l1[i].split()[4]+"\","+\
"\""+l1[i].split()[5]+"\","+\
'\"' + l1[i].split()[6] + '\")'
print(SQL1)
cursor.execute(SQL1)
conn1.commit()
'''
#print(l1[i].split()[0])
if A == None:
A="AlarmState_10_3_125_203_list_"+l1[i].split()[0]
if B == None:
B=l1[i].split()[5]
exec("{}=B".format(A))
print(A)
print(B)
'''
##SFPSHOW 命令数据入库
# print("L2++++++++++++++++++++++++++++++++++++++++")
if l2 != []:
for i in range(len(l2)):
#print(l2[i])
if "RX Power" in ''.join(l2[i]):
if "inf" not in ''.join(l2[i]):
if "inf" not in ''.join(l2[i + 1]):
#print('10.3.125.203', i)
port = l2[i - 1].split()[1].split(":")[0]
RX_DBM = l2[i].split('dBm')[0].split(':')[1].strip()
TX_DBM = l2[i + 1].split('dBm')[0].split(':')[1].strip()
RX_UW = l2[i].split('uW')[0].split('(')[1].strip()
TX_UW = l2[i + 1].split('uW')[0].split('(')[1].strip()
SQL1 = r'insert into devops.pvs_sfpshow (insertTime,IP,port,rx_dbm,rx_uw,tx_dbm,tx_uw) values("%s","%s","%s","%s","%s","%s","%s")' % \
(inserttime, ip, port, RX_DBM, RX_UW, TX_DBM, TX_UW)
# print(SQL1)
cursor.execute(SQL1)
conn1.commit()
## 因要关联alishow,此处的将改为sql查询方式
# if float(RX_UW) <= 300.0 and float(RX_DBM) < -5.0:
# text = "switchname:" + ip + " port:" + port + " RX_POWER_DBM:" + RX_DBM + " RX_UW:" + RX_UW + " TX_POWER_DBM:" + TX_DBM + " TX_UW:" + TX_UW
# sfpshow_problem_list.append(text)
#
# ##问题计数
# count = count + 1
# print(count)
# print('\n'.join(sfpshow_problem_list))
# print("L3++++++++++++++++++++++++++++++++++++++++")
if l3 != []:
L3=' '.join(l3).replace('alias','\nalias').replace('zone','\nzone').replace('Effective','\nEffective')
# print(l3)
list_alishow=L3.split('\n')
for i in range(len(list_alishow)):
if re.match('^alias',list_alishow[i]):
#print(list_alishow[i])
wwn=" ".join(list_alishow[i].split()[2:])
server_name=list_alishow[i].split()[1]
SQL1="insert into devops.pvs_alishow (insertTime,IP,aliasname,wwn) " \
"values (" + inserttime +",\"" +ip+"\",\""+server_name+"\",\""+wwn+"\")"
print(SQL1)
cursor.execute(SQL1)
conn1.commit()
##找出状态变化的port【从online变为no_light】
'''
select tt1.port,tt1.ip,tt1.inserttime,tt1.switchname,tt1.state from
(select *,row_number() over (partition by port,ip,switchname order by inserttime desc) as t from devops.pvs_switchshow )tt1
where exists
(select port,state from
(select *,row_number() over (partition by port,ip,switchname order by inserttime desc) as t from devops.pvs_switchshow )tt2
where tt2.t=2 and tt2.state='Online' and tt1.port=tt2.port ) and tt1.t=1 and tt1.state='No_Light';
'''
### 找出状态变化的port【从online变为no_light】,状态变化的时间范围8天,alishow表里匹配不出则显示none
SQL1="select ttttt.*,tt3.aliasname,tt3.t from " \
"(select tt1.switchname,tt1.ip,tt1.port,tt1.inserttime,tt1.state,tt2.inserttime tt2_inserttime,tt2.state tt2_state,tt2.wwn " \
"from " \
"(select *,row_number() over (partition by port,ip,switchname order by inserttime desc) as t " \
"from devops.pvs_switchshow " \
"where inserttime>=date_format(date_sub(now(), interval 8 day),'%Y%m%d%H%i'))tt1 , " \
"(select *,row_number() over (partition by port,ip,switchname order by inserttime desc) as t " \
"from devops.pvs_switchshow " \
"where inserttime>=date_format(date_sub(now(), interval 8 day),'%Y%m%d%H%i'))tt2 " \
"where tt2.t=2 and tt2.state='Online' and tt1.port=tt2.port and tt1.ip=tt2.ip and tt1.t=1 and tt1.state='No_Light' )ttttt" \
" left join " \
"(select aliasname,inserttime t3_inserttime,wwn ,row_number() over (partition by aliasname order by inserttime desc) as t " \
"from devops.pvs_alishow where inserttime>=date_format(date_sub(now(), interval 8 day),'%Y%m%d%H%i')) tt3 " \
" on ttttt.wwn=tt3.wwn where tt3.t=1 or tt3.t is null";
cursor.execute(SQL1)
res=cursor.fetchall()
list_problem=list(res)
if len(list_problem) != 0:
context_status_change = inserttime_read+'时交换机状态变化online → No_Light的端口数量有:'+str(len(list_problem))+"个\n分别是:\n"
for i in list_problem:
# print("交换机 {} {}:{}端口 由 {}时的状态:{} 在 {} 时转变为:{},对应的服务器是{},wwn是{} ".format(
# i[0],i[1],i[2],i[5],i[6],i[3],i[4],i[8],i[7]))
context_status_change = context_status_change +"\n交换机 {} {}:{}端口 由 {}时的状态:{} 在 {} 时转变为:{},对应的服务器是{},wwn是{} ".format(i[0],i[1],i[2],i[5],i[6],i[3],i[4],i[8],i[7])
print("\n\n\n")
else:
context_status_change = inserttime_read + '时所有交换机没有端口发生变化:online → No_Light\n\n'
###sfpshow关联alishow表
##邮件内容
print(context_status_change)
if len(connect_error_ip)!=0 or len(list_problem) !=0:
sendmail(context="ssh登录不上的IP有:\n"+'\n'.join(connect_error_ip)+'\n\n\n\n\n'+context_status_change,email_subject='存储交换机监控告警信息')
# print('\n'.join(l1))
# print('\n'.join(l2))
##邮件格式
print(connect_error_ip)
print(inserttime[0:4]+"年"+inserttime[4:6]+"月"+inserttime[6:8]+"日 "+inserttime[8:10]+":"+inserttime[10:12])
cursor.close()
conn1.close()
光衰的脚本2.py 实现目标2
代码如下
# _*_ coding=utf-8 _*_
import paramiko
import datetime,time,re
import os
import sys,logging
# import socket
import platform
import xlwt,pymysql,gc
import smtplib,base64
import email.mime.text
import email.mime.multipart
# from email.mime.text import MIMEText
# from email.mime.multipart import MIMEMultipart
from email.mime.application import MIMEApplication
day = time.strftime('%Y%m%d',time.localtime())
inserttime = time.strftime('%Y%m%d%H%M',time.localtime())
h = time.strftime('%Y%m%d%H',time.localtime())
inserttime_read=inserttime[0:4]+"年"+inserttime[4:6]+"月"+inserttime[6:8]+"日 "+inserttime[8:10]+":"+inserttime[10:12]
def sendmail(context,email_subject,file):
form_adder = 'xxx@163.com'
str_pwd = base64.b64decode("RGDSDSWSDVkYzM0NTQj").decode("utf-8")
to_adder = ['XXX@163.com','XXXX@163.com']
smtp_server = 'XXX.com'
msg = email.mime.multipart.MIMEMultipart()
txt = email.mime.text.MIMEText(context, 'plain', 'utf-8')
msg['From'] = form_adder
msg['To'] = ','.join(to_adder)
msg['subject'] = str(inserttime_read)+ email_subject
msg.attach(txt)
# ##附件内容
# file = file_excel_path+'.'+h+".xls"
part = MIMEApplication(open(file, 'rb').read())
part.add_header('Content-Disposition', 'attachment', filename=file)
msg.attach(part)
ret = True
try:
server = smtplib.SMTP(smtp_server,25)
# tls 方式验证
server.starttls()
server.login(form_adder,str_pwd)
server.sendmail(form_adder,to_adder,msg.as_string())
server.quit()
except Exception as e:
print(e)
ret = False
return ret
#操作excel
def w_excel(res,file):
book = xlwt.Workbook() #新建一个excel
sheet = book.add_sheet('rx_power') #新建一个sheet页
title = ['switchname','ip','port','inserttime','wwn','aliasname','rx_dbm','rx_uw']
#写表头
i = 0
for header in title:
sheet.write(0,i,header)
i+=1
#写入数据
for row in range(1,len(res)):
for col in range(0,len(res[row])):
sheet.write(row,col,res[row][col])
row+=1
col+=1
book.save(file)
print("导出成功!")
#连接数据库
conn1=pymysql.connect(host='xxx',user='user',password=base64.b64decode("SUplVmhaSDFSDFSDFYT2ZhNWpI").decode("utf-8"),database='devops', charset='utf8')
cursor=conn1.cursor()
###sfpshow关联alishow表
SQL2="select tt1.switchname,tt.ip,tt.port,tt.inserttime,tt1.wwn,tt3.aliasname,tt.rx_dbm,tt.rx_uw " \
"from " \
"(select inserttime,ip,port,rx_dbm,rx_uw,tx_dbm,tx_uw ," \
"row_number() over (partition by Ip,port order by inserttime desc) as t " \
"from devops.pvs_sfpshow Where inserttime>=date_format(date_sub(now(), interval 8 day),'%Y%m%d%H%i') ) tt " \
"left join " \
"(select *,row_number() over (partition by port,ip,switchname order by inserttime desc) as t " \
"from devops.pvs_switchshow where inserttime>=date_format(date_sub(now(), interval 8 day),'%Y%m%d%H%i'))tt1 " \
"on tt1.ip=tt.ip and tt1.port=tt.port " \
"left join " \
"(select aliasname,inserttime t3_inserttime,wwn ,row_number() over (partition by aliasname order by inserttime desc) as t " \
"from devops.pvs_alishow where inserttime>=date_format(date_sub(now(), interval 8 day),'%Y%m%d%H%i')) tt3 " \
"on tt1.wwn=tt3.wwn " \
"where (tt3.t=1 or tt3.t is null) and tt.rx_dbm <-5 and tt.rx_uw<300 and tt1.t=1 and tt.t=1 and tt1.state='Online' " \
" order by tt.ip, tt.port "
SQL3="select distinct tt1.IP " \
"from " \
"(select inserttime,ip,port,rx_dbm,rx_uw,tx_dbm,tx_uw ," \
"row_number() over (partition by Ip,port order by inserttime desc) as t " \
"from devops.pvs_sfpshow Where inserttime>=date_format(date_sub(now(), interval 8 day),'%Y%m%d%H%i') ) tt " \
"left join " \
"(select *,row_number() over (partition by port,ip,switchname order by inserttime desc) as t " \
"from devops.pvs_switchshow where inserttime>=date_format(date_sub(now(), interval 8 day),'%Y%m%d%H%i'))tt1 " \
"on tt1.ip=tt.ip and tt1.port=tt.port " \
"left join " \
"(select aliasname,inserttime t3_inserttime,wwn ,row_number() over (partition by aliasname order by inserttime desc) as t " \
"from devops.pvs_alishow where inserttime>=date_format(date_sub(now(), interval 8 day),'%Y%m%d%H%i')) tt3 " \
"on tt1.wwn=tt3.wwn " \
"where (tt3.t=1 or tt3.t is null) and tt.rx_dbm <-5 and tt.rx_uw<300 and tt1.t=1 and tt.t=1 and tt1.state='Online' " \
" order by tt1.IP "
cursor.execute(SQL2)
res2=cursor.fetchall()
list_problem2=list(res2)
cursor.execute(SQL3)
res3=cursor.fetchall()
list_problem3=list(res3)
if len(list_problem2) != 0:
context_problem_rx="\n\n\n"+inserttime_read+'时交换机端口状态为Online的 rx_power<300 且 rx_dbm<-5的端口数量有:'+str(len(list_problem2))+"个\n分别是:\n\n\n"
for i in range(len(list_problem2)):
if list_problem2[i][5] == None:
if list_problem2[i][4] ==None:
if i >0 and list_problem2[i][1] !=list_problem2[i-1][1]:
context_problem_rx += "\n"
context_problem_rx+="\n交换机 {} IP:{}:{}端口 在 {} 时的rx_power为:{} rx_dbm 为{}:对应的服务器是{},wwn是{} ".format(
list_problem2[i][0], list_problem2[i][1], list_problem2[i][2], list_problem2[i][3], list_problem2[i][7],
list_problem2[i][6], "未知的服务器", "未知的wwn号")
else:
if i >0 and list_problem2[i][1] !=list_problem2[i-1][1]:
context_problem_rx += "\n"
context_problem_rx +="\n交换机 {} IP:{}:{}端口 在 {} 时的rx_power为:{} rx_dbm 为{}:对应的服务器是{},wwn是{} ".format(
list_problem2[i][0], list_problem2[i][1], list_problem2[i][2], list_problem2[i][3], list_problem2[i][7],
list_problem2[i][6], "未知的服务器", list_problem2[i][4])
else:
if i > 0 and list_problem2[i][1] != list_problem2[i - 1][1]:
context_problem_rx += "\n"
context_problem_rx +="\n交换机 {} IP:{}:{}端口 在 {} 时的rx_power为:{} rx_dbm 为{}:对应的服务器是{},wwn是{} ".format(
list_problem2[i][0], list_problem2[i][1], list_problem2[i][2], list_problem2[i][3], list_problem2[i][7],
list_problem2[i][6], list_problem2[i][5], list_problem2[i][4])
else:
context_problem_rx = inserttime + '时交换机端口rx_power没有异常\n\n'
##邮件内容
print(context_problem_rx)
#邮件附件
file=r"e:\\SANSwitch\sanswitch."+inserttime+".xls"
w_excel(res2,file)
sendmail(context=context_problem_rx,email_subject='存储交换机监控光衰告警信息',file=file)
# print('\n'.join(l1))
# print('\n'.join(l2))
##邮件格式
cursor.close()
conn1.close()