公司使用Percona组件管理mysql,通过这个组件,各个mysql节点会定时上报各类监控指标到SERVER端存储,但是监控指标只统计了两个snapshot时间间隔中的慢查询发生数量,没有具体的SQL信息和过程数据,所以要想分析慢SQL,还需要回到原库去查SLOWLOG。当集群规模较大时(例如我们测试环境接近2000个MYSQL节点,跑不同的业务),逐台统计费时费力,因此做了一个简单的python脚本,去自动收集和分析这些慢日志。
大致思路如下:
1、首先从多个server端获取mysql节点信息,更新至台账表
2、由于各个mysql数据库和操作系统的用户密码并不完全统一,因此需要先使用几个常用用户密码去探测,并更新至台账表。
3、由于各个mysql数据库系统时间不是自然时间,因此还需要获取各个节点当前的操作系统时间。
4、连server端,检查指定时间段内,是否出现过慢查询
5、利用fabric去存在慢查询的节点操作系统下载slowlog
6、解析slowlog并入库保存
代码如下,非常初级,请轻拍
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
import hashlib
from sqlalchemy import *
from sqlalchemy import exc
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import *
import traceback
import datetime
import time
import subprocess
import threading
import errno
import os
import re
import cx_Oracle
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
import smtplib
import timeout_decorator
import sys
import MySQLdb
# 台账库
mc = {
'Host':127.0.0.1',
'Port': 3307,
'User': 'test',
'Pass': 'test',
'db': 'test',
}
# 连接数据库
try:
conn = MySQLdb.connect(host=mc.get('Host', None), port=mc.get('Port', None),
user=mc.get('User', None), passwd=mc.get('Pass', None), db=mc.get('db', None), charset='utf8')
conn.autocommit(True)
except Exception as e:
print (e)
Base = declarative_base()
class MYAWRHOST(Base):
__tablename__ = "myawr_host"
id = Column("id", Integer(), primary_key=True)
ip = Column("ip_addr", String(20))
port = Column("port", Integer())
dbname = Column("db_name", String(50))
hostname = Column("hostname", String(50))
version = Column("version", String(50))
snapid = Column("snap_id", Integer())
snaptime = Column("snap_time", DateTime())
class SLOWLOG(Base):
__tablename__ = "slow_log"
id = Column("id", Integer(), primary_key=True)
mysql_ip = Column("mysql_ip", String(50))
start_time = Column("start_time", Integer())
db_user = Column("db_user", String(20))
app_ip = Column("app_ip", String(15))
sqlid = Column("SQLID", String(40))
thread_id = Column("thread_id", Integer())
exectime = Column("exec_duration", String(30))
rows_sent = Column("rows_sent", Integer())
rows_examined = Column("rows_examined", Integer())
slowsql = Column("slowsql", Text())
update_time=Column("update_time", DateTime())
class SLOWRESULT(Base):
__tablename__ = "slow_result"
id = Column("id", Integer(), primary_key=True)
mysql_ip = Column("mysql_ip", String(50))
client = Column("client", String(50))
sqlid = Column("SQLID", String(40))
exectime = Column("exec_duration", Integer())
locktime = Column("lock_duration", Integer())
rows_sent = Column("rows_sent", Integer())
exec_count = Column("exec_count", Integer())
slowsql = Column("slowsql", Text())
fullstatic = Column("fullstatic", String(200))
update_time=Column("update_time", DateTime())
class MYSQLMONILOG(Base):
__tablename__ = "mysql_moni_log"
jobid = Column("jobid", Integer(),autoincrement=True, primary_key=True)
id = Column("id", Integer())
ip = Column("ip_addr", String(20))
port = Column("port", Integer())
dbname = Column("db_name", String(50))
hostname = Column("hostname", String(50))
version = Column("version", String(50))
snapid_begin = Column("snap_id_begin", Integer())
snaptime_begin = Column("snap_time_begin", DateTime())
snapid_end = Column("snap_id_end", Integer())
snaptime_end = Column("snap_time_end", DateTime())
jobname = Column("jobname", String(50))
desc = Column("desc", String(50))
status = Column("status", String(50))
updatetime = Column("update_time", DateTime())
class TBFULLLOG(Base):
__tablename__ = "tbfull_log"
id = Column("id", Integer(), autoincrement=True, primary_key=True)
MYSQL_IP = Column("MYSQL_IP", String(50))
DB_NAME = Column("DB_NAME", String(50))
snap_id = Column("snap_id", Integer())
snap_time = Column("snap_time", Integer())
query = Column("query", Text)
db = Column("db", String(64))
exec_count = Column("exec_count", Integer())
total_latency = Column("total_latency", Integer())
no_index_used_count = Column("no_index_used_count", Integer())
no_good_index_used_count = Column("no_good_index_used_count", Integer())
no_index_used_pct = Column("no_index_used_pct", Integer())
rows_sent = Column("rows_sent", Integer())
rows_examined = Column("rows_examined", Integer())
rows_sent_avg = Column("rows_sent_avg", Integer())
rows_examined_avg = Column("rows_examined_avg", Integer())
first_seen = Column("first_seen", String(19))
last_seen = Column("last_seen", String(19))
digest = Column("digest", String(32))
dbstrmoni = "mysql://test:test@127.0.0.1:3307/test?charset=utf8"
def select(connstr,str):
try:
conn = cx_Oracle.connect(connstr)
cursor0 = conn.cursor()
cursor0.execute(str)
res0 = cursor0.fetchall()
except Exception as e:
print(e)
res0=""
return res0
def selectsql(dbstr,sql):
#dbstr = type + "://" + user + ":" + password + "@" + dbstr + "?charset=utf8"
engine = create_engine(dbstr)
DBsession = sessionmaker(bind=engine)
session = DBsession()
rset = []
try:
e = session.execute(sql)
#rkey = e.keys()
for x in e.fetchall():
rset.append(list(x))
except Exception as e:
traceback.print_exc()
finally:
session.close()
if len(rset)==0:
print("selectsql查询结果为空:",dbstr,sql)
return rset
def updatesql(dbstr,sql):
#dbstr = type + "://" + user + ":" + password + "@" + dbstr + "?charset=utf8"
engine = create_engine(dbstr)
DBsession = sessionmaker(bind=engine)
session = DBsession()
try:
session.execute(sql)
session.commit()
#rkey = e.keys()
except Exception as e:
print("updatesql查询报错",sql)
traceback.print_exc()
finally:
session.close()
def setmysql(id, ip, port, dbname, hostname, version, snapid, snaptime):
try:
#print("insert",id, ip, port, dbname, hostname, version, snapid, snaptime)
engine = create_engine("mysql://test:test@127.0.0.1:3307/test?charset=utf8")
DBsession = sessionmaker(bind=engine)
session = DBsession()
myawrhost = MYAWRHOST(
id=id,
ip=ip,
port=port,
dbname=dbname,
hostname=hostname,
version=version,
snapid=snapid,
snaptime=snaptime
)
session.add(myawrhost)
session.commit()
except exc.SQLAlchemyError as e:
traceback.print_exc()
pass
session.close()
def settbfulllog(id,MYSQL_IP,DB_NAME,ret2):
try:
#print("insert",id, ip, port, dbname, hostname, version, snapid, snaptime)
engine = create_engine("mysql://test:test@127.0.0.1:3307/test?charset=utf8")
DBsession = sessionmaker(bind=engine)
session = DBsession()
for i in range(len(ret2)):
SNAP_ID = ret2[i][0]
SNAP_TIME = ret2[i][1]
QUERY = ret2[i][2]
DB = ret2[i][3]
EXEC_COUNT = ret2[i][4]
TOTAL_LATENCY = ret2[i][5]
no_index_used_count = ret2[i][6]
no_good_index_used_count = ret2[i][7]
no_index_used_pct = ret2[i][8]
rows_sent = ret2[i][9]
rows_examined = ret2[i][10]
rows_sent_avg = ret2[i][11]
rows_examined_avg = ret2[i][12]
first_seen = ret2[i][13]
last_seen = ret2[i][14]
DIGEST = ret2[i][15]
tbfulllog = TBFULLLOG(
id=id,
MYSQL_IP=MYSQL_IP,
DB_NAME=DB_NAME,
snap_id=SNAP_ID,
snap_time=SNAP_TIME,
query=QUERY,
db=DB,
exec_count=EXEC_COUNT,
total_latency=TOTAL_LATENCY,
no_index_used_count=no_index_used_count,
no_good_index_used_count=no_good_index_used_count,
no_index_used_pct=no_index_used_pct,
rows_sent=rows_sent,
rows_examined=rows_examined,
rows_sent_avg=rows_sent_avg,
rows_examined_avg=rows_examined_avg,
first_seen=first_seen,
last_seen=last_seen,
digest=DIGEST
)
session.add(tbfulllog)
session.commit()
except exc.SQLAlchemyError as e:
traceback.print_exc()
pass
session.close()
def geterror():
sql0="SELECT ID,IP_ADDR,PORT,DB_NAME,HOSTNAME,VERSION FROM MYAWR_HOST where DESC1='连接失败'"
ret=selectsql(dbstrmoni,sql0)
return ret
def awrserverip(dbstrawr):
mysqldb=dbstrawr[dbstrawr.index("@") + 1:-1].split("/")[0]
return mysqldb
#step1 获取主机清单
#step2 更新db信息表
def initjob(dbstrawr):
sql0="SELECT ID,IP_ADDR,PORT,DB_NAME,HOSTNAME,VERSION FROM MYAWR_HOST"
macs=selectsql(dbstrawr,sql0)
for mac in macs:
id=mac[0]
ip=mac[1]
port=mac[2]
dbname=mac[3]
hostname=mac[4]
version=mac[5]
sqlcheckdb="SELECT DB_NAME FROM TEST.MYAWR_HOST WHERE IP_ADDR='%s' AND PORT='%s'"%(ip,port)
repcheckdb=selectsql(dbstrmoni,sqlcheckdb)
if len(repcheckdb)==0:
pattern = '[a-z]{3,4}-[0-9]{4}-[0-9]' # Converts 'djm' to 'd.*j.*m'
regex = re.compile(pattern) # Compiles a regex.
if regex.match(hostname):
user="root"
passwd="root"
remoteday=getremoteday_docker(user,passwd,ip)
if remoteday=="timeout":
user = "root"
passwd = "root123"
remoteday = getremoteday_docker(user,passwd,ip)
else:
user="sysadmin"
passwd="pass1234"
remoteday=getremoteday(user,passwd,ip)
#print(ip,remoteday,len(remoteday),type(remoteday))
if remoteday !="error":
desc="连接成功"
if remoteday==time.strftime("%Y%m%d"):
remoteday="realday"
else:
remoteday="sysday"
#setmysql(id,ip,port,dbname,hostname,version,snapid,snap_time)
else:
desc="连接失败"
remoteday="unknown"
#setmysql(id,ip,port,dbname,hostname,version,snapid,snap_time)
app,env=gethostinfo_new(hostname,ip)
sql0="replace into test.myawr_host(id,ip_addr,port,app,env,db_name,hostname,version,desc1,remoteday,user1,passwd,awrdb) values('%s','%s','%s','%s','%s','%s','%s','%s','%s','%s','%s','%s','%s')"%(id,ip,port,app,env,dbname,hostname,version,desc,remoteday,user,passwd,awrserverip(dbstrawr))
#print(sql0)
updatesql(dbstrmoni,sql0)
else:
print(ip+":"+str(port)+"已经登记,跳过")
def addawrdb(dbstrawr):
sql0="SELECT ID,IP_ADDR,PORT,DB_NAME,HOSTNAME,VERSION FROM MYAWR_HOST"
macs=selectsql(dbstrawr,sql0)
for mac in macs:
id=mac[0]
ip=mac[1]
port=mac[2]
dbname=mac[3]
hostname=mac[4]
version=mac[5]
sqlcheckdb="SELECT DB_NAME FROM TEST.MYAWR_HOST WHERE IP_ADDR='%s' AND PORT='%s'"%(ip,port)
repcheckdb=selectsql(dbstrmoni,sqlcheckdb)
sql0 = "update test.myawr_host set awrdb='%s' where ip_addr='%s' and port='%s'" % (awrserverip(dbstrawr),ip, port)
# print(sql0)
updatesql(dbstrmoni, sql0)
#step3 获取需要计算的snapid
def getsnapid(realday,sysday,dbstrawr):
# 更新历史作业状态
sql4 = "UPDATE TEST.mysql_moni_log SET STATUS='关闭' WHERE STATUS='待运行' AND JOBNAME='slowquery' "
updatesql(dbstrmoni, sql4)
sql0="SELECT ID,IP_ADDR ,PORT,DB_NAME,HOSTNAME,VERSION,DESC1,REMOTEDAY,USER1,PASSWD FROM TEST.MYAWR_HOST WHERE AWRDB='%s'"%awrserverip(dbstrawr)
rets=selectsql(dbstrmoni,sql0)
for ret in rets:
id=ret[0]
ip=ret[1]
port=ret[2]
dbname=ret[3]
hostname=ret[4]
version=ret[5]
desc=ret[6]
remoteday=ret[7]
user = ret[8]
passwd=ret[9]
if desc=="连接成功":
if remoteday=="realday":
checkday=realday
sql1 = "SELECT MIN(SNAP_ID),MAX(SNAP_ID) FROM %s.myawr_snapshot where SNAP_TIME BETWEEN STR_TO_DATE('%s','%%Y%%m%%d') and STR_TO_DATE('%s','%%Y%%m%%d')" % (dbname, checkday, time.strftime("%Y%m%d"))
else:
checkday=sysday
sql1 = "SELECT MIN(SNAP_ID),MAX(SNAP_ID) FROM %s.myawr_snapshot where SNAP_TIME >= STR_TO_DATE('%s','%%Y%%m%%d')" % (dbname, checkday)
rets1=selectsql(dbstrawr,sql1)
if len(rets1)>0:
if rets1[0][0] != None and rets1[0][1] != None:
minsnapid=int(rets1[0][0])
maxsnapid=int(rets1[0][1])
if maxsnapid>minsnapid:
#初始化作业控制表
sql2="INSERT INTO TEST.mysql_moni_log VALUES(NULL,%s,'%s','%s','%s','%s','%s',%s,CURRENT_TIMESTAMP,'%s',CURRENT_TIMESTAMP,'%s','%s','%s',CURRENT_TIMESTAMP,'%s')"%(id,ip,port,dbname,hostname,version,minsnapid,maxsnapid,"slowquery","待计算","待运行",awrserverip(dbstrawr))
updatesql(dbstrmoni,sql2)
#初始化全表扫描作业
def initjobtbfull(realday,sysday,dbstrawr):
# 更新历史作业状态
sql1 = "UPDATE TEST.mysql_moni_log SET STATUS='关闭' WHERE STATUS='待运行' AND JOBNAME='tableaccessfull' "
updatesql(dbstrmoni, sql1)
sql0="SELECT ID,IP_ADDR ,PORT,DB_NAME,HOSTNAME,VERSION,DESC1,REMOTEDAY,USER1,PASSWD FROM TEST.MYAWR_HOST WHERE AWRDB='%s'"%awrserverip(dbstrawr)
rets=selectsql(dbstrmoni,sql0)
for ret in rets:
id=ret[0]
ip=ret[1]
port=ret[2]
dbname=ret[3]
hostname=ret[4]
version=ret[5]
desc=ret[6]
remoteday=ret[7]
user = ret[8]
passwd=ret[9]
if desc=="连接成功":
if remoteday=="realday":
checkday=realday
else:
checkday=sysday
sql1="SELECT MIN(SNAP_ID),MAX(SNAP_ID) FROM %s.myawr_snapshot where SNAP_TIME BETWEEN STR_TO_DATE('%s','%%Y%%m%%d') and STR_TO_DATE('%s','%%Y%%m%%d')"%(dbname,checkday,time.strftime("%Y%m%d"))
rets1=selectsql(dbstrawr,sql1)
if len(rets1)>0:
if rets1[0][0] != None and rets1[0][1] != None:
minsnapid=int(rets1[0][0])
maxsnapid=int(rets1[0][1])
if maxsnapid>minsnapid:
#初始化作业控制表
sql2="INSERT INTO TEST.mysql_moni_log VALUES(NULL,%s,'%s','%s','%s','%s','%s',%s,CURRENT_TIMESTAMP,'%s',CURRENT_TIMESTAMP,'%s','%s','%s',CURRENT_TIMESTAMP,'%s')"%(id,ip,port,dbname,hostname,version,minsnapid,maxsnapid,"tableaccessfull","待计算","待运行",awrserverip(dbstrawr))
updatesql(dbstrmoni,sql2)
#计算全表扫描
def jobcounttbfull(dbstrawr):
sql0="SELECT A.JOBID, A.ID,A.IP_ADDR,A.DB_NAME,A.snap_id_begin,A.snap_id_end,A.hostname,B.APP,B.ENV FROM mysql_moni_log A,MYAWR_HOST B WHERE A.IP_ADDR=B.IP_ADDR AND A.DB_NAME=B.DB_NAME AND A.STATUS='待运行' AND A.JOBNAME='tableaccessfull' AND AWRDB='%s'"%awrserverip(dbstrawr)
rets0=selectsql(dbstrmoni,sql0)
for r0 in rets0:
jobid=r0[0]
id=r0[1]
ip=r0[2]
dbname=r0[3]
minid=r0[4]
maxid=r0[5]
hostname=r0[6]
app = r0[7]
env = r0[8]
print(id, ip, dbname, "%s任务开始执行" % jobid)
if maxid-minid>0:
sql2="SELECT A.SNAP_ID,A.SNAP_TIME,B.QUERY,B.DB,B.EXEC_COUNT,B.TOTAL_LATENCY,B.no_index_used_count,B.no_good_index_used_count,B.no_index_used_pct,B.rows_sent,B.rows_examined,B.rows_sent_avg,B.rows_examined_avg,B.first_seen,B.last_seen,B.DIGEST FROM %s.`myawr_db_statements_with_full_table_scans` B,%s.`myawr_snapshot` A WHERE A.SNAP_ID=B.SNAP_ID and B.DB IS NOT NULL and a.snap_id>=%s AND a.snap_id<=%s"%(dbname,dbname,minid,maxid)
ret2=selectsql(dbstrawr,sql2)
fullcount = len(ret2)
if fullcount>0:
if ret2[0][0] != None and ret2[0][1] != None:
print("全表扫描数量:",fullcount)
settbfulllog(None, ip, dbname,ret2)
#settbfulllog(None,ip,dbname,SNAP_ID,SNAP_TIME,QUERY,DB,EXEC_COUNT,TOTAL_LATENCY,no_index_used_count,no_good_index_used_count,no_index_used_pct,rows_sent,rows_examined,rows_sent_avg,rows_examined_avg,first_seen,last_seen,DIGEST)
#fields = ['SNAP_ID','SNAP_TIME','QUERY','DB','EXEC_COUNT','TOTAL_LATENCY','no_index_used_count','no_good_index_used_count','no_index_used_pct','rows_sent','rows_examined','rows_sent_avg','rows_examined_avg','first_seen','last_seen','DIGEST']
#do_save_to_mysql('tbfull_log', ','.join(fields), tmp)
status="关闭"
desc=fullcount
else:
status="关闭"
desc="snapid无变化"
sql5 = "UPDATE test.mysql_moni_log T SET T.DESC='%s',T.STATUS='%s',T.SNAP_TIME_END=CURRENT_TIMESTAMP WHERE T.JOBID=%s" % (desc, status, jobid)
updatesql(dbstrmoni,sql5)
print(id,ip,dbname,"%s任务执行完成"%jobid,status,desc)
#计算slowcout数量
def jobcountslow(realday,sysday,dbstrawr):
sql0="SELECT JOBID, ID,IP_ADDR,DB_NAME,snap_id_begin,snap_id_end,hostname FROM mysql_moni_log WHERE STATUS='待运行' AND JOBNAME='slowquery' AND AWRDB='%s'"%awrserverip(dbstrawr)
rets0=selectsql(dbstrmoni,sql0)
for r0 in rets0:
jobid=r0[0]
id=r0[1]
ip=r0[2]
dbname=r0[3]
minid=r0[4]
maxid=r0[5]
hostname=r0[6]
print(id, ip, dbname, "%s任务开始执行" % jobid)
if maxid-minid>0:
sql2="SELECT MAX(variable_value),MIN(variable_value) from %s.myawr_db_metrics where variable_name ='slow_queries' AND snap_id>=%s AND snap_id<=%s"%(dbname,minid,maxid)
ret2=selectsql(dbstrawr,sql2)
if ret2[0][0] != None and ret2[0][1] != None:
maxslow = int(ret2[0][0])
minslow = int(ret2[0][1])
print("SLOWLOG最大值",maxslow, "SLOWLOG最小值",minslow)
if maxslow-minslow>0:
print("从自然日期%s(系统日期%s)开始,共出现了%s次馒查询"%(realday,sysday,maxslow-minslow))
sql3 = "SELECT variable_value from %s.myawr_db_variables where variable_name ='SLOW_QUERY_LOG_FILE' AND snap_id=%s" % (dbname, maxid)
try:
logdir = selectsql(dbstrawr, sql3)[0][0]
except Exception as e:
print(e, dbstrawr, sql3)
sql4 = "SELECT variable_value from %s.myawr_db_variables where variable_name ='BASEDIR' AND snap_id=%s" % (dbname, maxid)
try:
basedir = selectsql(dbstrawr, sql4)[0][0]
except Exception as e:
print(e, dbstrawr, sql4)
day=time.strftime("%Y%m%d")
pattern = '[a-z]{3,4}-[0-9]{4}-[0-9]' # Converts 'djm' to 'd.*j.*m'
regex = re.compile(pattern) # Compiles a regex.
sql5 = "SELECT user1,passwd from myawr_host where ip_addr ='%s' AND db_name= '%s'" % (ip,dbname)
ret5 = selectsql(dbstrmoni, sql5)
user=ret5[0][0]
passwd=ret5[0][1]
if regex.match(hostname):
# 容器化部署mysql 用户/密码 root/icbc
resp = fabjob_docker(ip, logdir, basedir, day, realday, sysday,user,passwd)
else:
# 传统部署mysql 用户/密码 sysop/Qwert789)
resp = fabjob(ip, logdir, basedir, day, realday, sysday,user,passwd)
if resp=="succ":
status="执行成功"
desc=maxslow-minslow
elif resp=="error":
status="fabric执行失败"
desc=maxslow-minslow
else:
status = "执行成功"
desc = "0"
else:
status="执行失败"
desc="未获取到值"
else:
status="关闭"
desc="snapid无变化"
sql5 = "UPDATE test.mysql_moni_log T SET T.DESC='%s',T.STATUS='%s',T.SNAP_TIME_END=CURRENT_TIMESTAMP WHERE T.JOBID=%s" % (desc, status, jobid)
updatesql(dbstrmoni,sql5)
print(id,ip,dbname,"%s任务执行完成"%jobid,status,desc)
@timeout_decorator.timeout(60)
def getremoteday(user,passwd,ip):
strfab = '''fab -f fab_MYSQL_slowlog.py -u %s -p "%s" -H %s getday:ip="%s"''' % (user,passwd,ip,ip)
print(strfab)
try:
p=subprocess.Popen(strfab, shell=True)
p.wait()
f = open('./logs/mysqldate_%s.log'%ip, 'rt', encoding='GBK')
day=f.readline().replace("\r","").replace("\n","")
print("%s remote day:%s"%(ip,day))
return day
except timeout_decorator.timeout_decorator.TimeoutError as e1:
print("语句超时")
p.kill()
return("error")
except Exception as e2:
print(e2)
p.kill()
return("error")
@timeout_decorator.timeout(60)
def getremoteday_docker(user,passwd,ip):
strfab = '''fab -f fab_MYSQL_slowlog_docker.py -u %s -p "%s" -H %s getday:ip="%s"''' % (user,passwd,ip,ip)
print(strfab)
try:
p=subprocess.Popen(strfab, shell=True)
p.wait()
f = open('./logs/mysqldate_%s.log'%ip, 'rt', encoding='GBK')
day=f.readline().replace("\r","").replace("\n","")
print("%s remote day:%s"%(ip,day))
return day
except timeout_decorator.timeout_decorator.TimeoutError as e1:
print("语句超时")
p.kill()
return("timeout")
except Exception as e2:
print(e2)
p.kill()
return("error")
@timeout_decorator.timeout(120)
def fabjob(ip, logdir,basedir,logday,realday,sysday,user,passwd):
strfab = '''fab -f fab_MYSQL_slowlog.py -u %s -p "%s" -H %s task:ip="%s",logdir="%s",basedir="%s",day="%s",realday="%s",sysday="%s"''' % (user,passwd,ip, ip, logdir.replace("slow_queries.log",""), basedir,logday,realday,sysday)
print(strfab)
try:
p=subprocess.Popen(strfab, shell=True)
p.wait()
return "succ"
except timeout_decorator.timeout_decorator.TimeoutError as e:
print("语句超时")
p.kill()
return("error")
@timeout_decorator.timeout(120)
def fabjob_docker(ip, logdir, basedir, logday, realday, sysday,user,passwd):
strfab = '''fab -f fab_MYSQL_slowlog_docker.py -u %s -p "%s" -H %s task:ip="%s",logdir="%s",basedir="%s",day="%s",realday="%s",sysday="%s"''' % (
user,passwd,ip, ip, logdir.replace("slow_queries.log", ""), basedir, logday, realday, sysday)
print(strfab)
try:
p = subprocess.Popen(strfab, shell=True)
p.wait()
return "succ"
except timeout_decorator.timeout_decorator.TimeoutError as e:
print("语句超时")
p.kill()
return ("error")
#获取应用信息
def gethostinfo_new(hostname,ip):
pattern = '[a-z]{3,4}-[0-9]{4}-[0-9]' # Converts 'djm' to 'd.*j.*m'
regex = re.compile(pattern) # Compiles a regex.
if regex.match(hostname):
app="F-"+hostname.split("-")[0].upper()
env="DOCKER环境"
return app, env
else:
constr0='test/test@127.0.0.2/test'
str0="SELECT APP,ENV||'-'||FUNC||'-'||NODE DESC1 FROM V_SERVER_ALL_EXT WHERE IP='%s'"%ip
res0 = select(constr0,str0)
#print("res0",res0)
app=""
env=""
if len(res0)>0:
if res0[0][0]!=None:
app=res0[0][0]
env=res0[0][1]
return app , env
def read_slow_log_to_list(file_name):
# 组合每一分列表[],[]...
sqltext = []
# 每组分列表
sql = []
# 拼接多个SQL语句
output = ''
# 设置分组列表标识
isflag = 1
#print("file_name",file_name)
with open(file_name,encoding="utf-8") as f:
try:
for line in f:
line = line.strip()
if line.startswith('# Time'):
if len(output)>0:
sql.append(output)
if len(sql)>0:
sqltext.append(sql)
sql = []
output=''
sql.append(line)
elif line.startswith('SET'):
sql.append(line)
elif line.startswith('# User@Host'):
sql.append(line)
elif line.startswith('# Query_time'):
sql.append(line)
elif line.startswith('use'):
continue
elif line.startswith('# administrator'):
continue
else:
if line.endswith(';'):
if len(output) == 0:
sql.append(line)
isflag = 0
else:
line = output + ' ' + line
sql.append(line)
output = ''
isflag = 0
else:
output += str(' ') + line
if isflag == 0:
sqltext.append(sql)
isflag = 1
sql = []
except Exception as e:
print(e)
pass
#print("sqltxt")
return sqltext
def handler_slowlog(file_name):
slow_info = []
ip = file_name.replace(".log", "").split("_")[1]
res = read_slow_log_to_list(file_name)
for res in res:
if len(res)==5:
try:
# print res
# time = res[0]
# User@Host 信息
userhost = res[1].strip()
# 连接数据库用户
db_user = userhost.replace('# User@Host:', '').split('[')[0].strip()
# 应用程序连接DB所在的ip
app_ip = userhost.replace('# User@Host:', '').split()[2].replace('[', '').replace(']', '').strip()
app=''
env=''
# 开启线程id
thread_id = userhost.replace('# User@Host:', '').split(':')[1].strip()
# print db_user, app_ip, thread_id
querytime = res[2].strip()
# 执行持续时长
exec_duration = querytime.replace('# ', '').split()[1]
# 执行结果集返回行数
rows_sent = querytime.replace('# ', '').split()[5]
# 完成查询需要评估行数量
rows_examined = querytime.replace('# ', '').split()[7]
# print exec_duration, rows_sent, rows_examined
# 开始时间
#print("res3",res[3])
start_time = res[3].replace(';', '').split('=')[1]
# 慢SQL语句
slowsql = res[4]
# print start_time, db_user, app_ip, thread_id, exec_duration, rows_sent, rows_examined, slowsql
tmp = (ip,start_time, db_user, app_ip,app,env,thread_id,
exec_duration, rows_sent, rows_examined, slowsql)
slow_info.append(tmp)
# break
except Exception as e:
print("res",res,traceback.print_exc())
pass
else:
#print("res元素数量错误",len(res),res)
pass
return slow_info
def log2mysql():
engine = create_engine("mysql://test:test@127.0.0.1:3307/test?charset=utf8")
DBsession = sessionmaker(bind=engine)
session = DBsession()
for dir,root,loglist in os.walk("./logs"):
for log in loglist:
type=log.replace(".log","").split("_")[0]
if type=="monislowlog":
file_name = os.path.join("./logs/", log)
# 返回慢查询日志文件中处理后的各列数据
res = handler_slowlog(file_name)
# 表中需要插入数据的列名
try:
for val in res:
hash = hashlib.sha1()
hash.update(val[10].encode('utf-8'))
sqlid=hash.hexdigest()
ret=session.query(SLOWLOG).filter_by(sqlid=sqlid).first()
if ret is None:
slowlog= SLOWLOG(
mysql_ip = val[0],
start_time = val[1],
db_user = val[2],
app_ip = val[3],
sqlid = sqlid,
thread_id = val[6],
exectime = val[7],
rows_sent = val[8],
rows_examined = val[9],
slowsql = val[10],
update_time=datetime.datetime.now()
)
session.add(slowlog)
else:
if val[7] >ret.exectime:
ret.start_time=val[1]
ret.app_id=val[3]
ret.thread_id=val[6]
ret.exectime = val[7]
ret.rows_sent = val[8]
ret.rows_examined = val[9]
ret.update_time = datetime.datetime.now()
session.commit()
except Exception as e:
print ("入库错误:%s" % (e))
#print(sql)
def read_slowresult_log_to_list(file_name):
# 组合每一分列表[],[]...
sqltext = []
# 每组分列表
sql = []
# 拼接多个SQL语句
output = ''
# 设置分组列表标识
isflag = 1
#print("file_name",file_name)
with open(file_name,encoding="utf-8") as f:
try:
for line in f:
line = line.strip()
if line.startswith('Count: '):
if len(output)>0:
sql.append(output)
if len(sql)>0:
sqltext.append(sql)
sql = []
output=''
sql.append(line)
else:
output += str(' ') + line
if isflag == 0:
sqltext.append(sql)
isflag = 1
sql = []
except Exception as e:
print(e)
pass
#print(sqltext)
return sqltext
def handler_slowresult(file_name):
slow_info = []
ip = file_name.replace(".log", "").split("_")[1]
res = read_slowresult_log_to_list(file_name)
for res in res:
if len(res)==2:
try:
# 数据格式 ['Count: 1 Time=3266.61s (3266s) Lock=0.00s (0s) Rows=75.0 (75), chashu[chashu]@[193.168.1.1]' , 'SQL']
# SQL的统计信息
sqlstat = res[0].strip()
# sql语句
sqltext = res[1].strip()
# 连接用户信息
sqlperf, client = sqlstat.split(", ")
sqlcount,sqltimes,sqllocks,sqlrows=sqlperf.split(" ")
#print("SQL的统计信息",sqlcount,sqltimes,sqllocks,sqlrows,client)
#print("SQL的文本", sqltext)
#执行次数
count=sqlcount.replace("Count: ","").strip()
#最大执行时间
exec_duration=sqltimes.replace("Time=","").split("s")[0].strip()
#最大锁时间
locktime=sqllocks.replace("Lock=","").split("s")[0].strip()
#最大返回记录数
rows_sent=sqlrows.replace("Rows=","").split(" (")[0].strip()
tmp = (ip,client,exec_duration,locktime,rows_sent,count,sqltext,sqlperf)
slow_info.append(tmp)
# break
except Exception as e:
print("res",res,traceback.print_exc())
pass
else:
#print("res元素数量错误",len(res),res)
pass
return slow_info
def slowresult2mysql():
engine = create_engine("mysql://test:test@127.0.0.1:3307/test?charset=utf8")
DBsession = sessionmaker(bind=engine)
session = DBsession()
for dir,root,loglist in os.walk("./logs"):
for log in loglist:
type=log.replace(".log","").split("_")[0]
if type=="monislowresult":
file_name = os.path.join("./logs/", log)
# 返回慢查询日志文件中处理后的各列数据
res = handler_slowresult(file_name)
# 表中需要插入数据的列名
try:
for val in res:
hash = hashlib.sha1()
hash.update(val[6].encode('utf-8'))
sqlid=hash.hexdigest()
ret=session.query(SLOWRESULT).filter_by(sqlid=sqlid).first()
if ret is None:
slowresult= SLOWRESULT(
mysql_ip = val[0],
client = val[1],
sqlid = sqlid,
exectime = val[2],
locktime=val[3],
rows_sent = val[4],
exec_count = val[5],
slowsql = val[6],
fullstatic=val[7],
update_time=datetime.datetime.now()
)
session.add(slowresult)
session.commit()
except Exception as e:
print ("入库错误:%s" % (e))
#print(sql)
if __name__ == '__main__':
#flag=1 : step1 step2
#flag=2 : step3 step4
#flag=3 : step 1-4
flag= sys.argv[1]
realday = sys.argv[2]
sysday = sys.argv[3]
#realday="20190412"
#sysday="20190331"
# percona server
dbstrawrs=["mysql://root:root@awr1:3306/myawr?charset=utf8","mysql://root:root@awr2:3306/myawr?charset=utf8"]
for dbstrawr in dbstrawrs:
if len(sys.argv)!=4:
print("调用方法: python MYSQL_slowlog_new.py <标志:1-只初始化作业 2-只执行作业 3-初始化并执行作业> <版本开始自然日期> <版本开始系统日期> ")
else:
print("参数列表:标志-%s,版本开始自然日期-%s,版本开始系统日期-%s"%(flag,realday,sysday))
if flag=="1" or flag=="3":
# 清理过期日志
ls = os.listdir("./logs")
for i in ls:
os.remove(os.path.join("./logs", i))
#step 2 初始化MYAWR_HOST,并获取数据库当前系统时间
initjob(dbstrawr)
if flag=="2" or flag=="3":
# 清理过期日志
ls = os.listdir("./logs")
for i in ls:
os.remove(os.path.join("./logs", i))
# step 3 获取自然时间(系统时间)至今的满日志记录数
getsnapid(realday,sysday,dbstrawr)
#step 4 fabric整合日志备份,生成结果文件
jobcountslow(realday,sysday,dbstrawr)
#step 5 结果文件入库
log2mysql()
initjobtbfull(realday,sysday,dbstrawr)
jobcounttbfull(dbstrawr)
if flag=="4" or flag=="3":
#计算全表扫描
initjobtbfull(realday,sysday,dbstrawr)
jobcounttbfull(dbstrawr)
if flag=="5":
print("测试开始")
slowresult2mysql()