mysql的通用日志(general_log)记录了mysql的所有操作,记录内容与审计日志相同,开启后会消耗一定的性能。
通过对general_log的分析,可以统计一段时间内数据库所有的SQL和执行频次,进而可以让DBA进行合理的优化,及时发现有问题的SQL.
为什么不用审计日志分析,再使用general_log?
general_log是社区版mysql自带的功能,不需要添加其他插件,开启和关闭方便。
开启通用日志(动态开启):
SET GLOBAL general_log=0; #关闭通用日志
TRUNCATE TABLE mysql.general_log; #清空通用日志表CSV
SET GLOBAL log_output='TABLE'; #根据慢查询和通用日志写入到表中
SET GLOBAL general_log=1; #开启通用日志
关闭通用日志(动态关闭):
SET GLOBAL general_log=0; #关闭通用日志
TRUNCATE TABLE mysql.general_log; #清空通用日志表CSV
SET GLOBAL log_output='FILE'; #根据慢查询和通用日志写入到文件中
建议可以在测试环境开启general_log,或在服务空闲时开启。
分析脚本如下:
from pymysql import connect as MysqlConn #pip3 install pymysql==0.9.3
from pymysql import cursors as MysqlCur
from difflib import SequenceMatcher
from threading import Thread
from queue import Queue
class ReadGeneralLog(Thread):
def __init__(self,queue_sqls,host,port,user,pswd):
self._Queue = queue_sqls
self.host,self.port,self.user,self.pswd = host,port,user,pswd
super(ReadGeneralLog, self).__init__()
def run(self):
connect = MysqlConn(host=self.host, port=self.port, user=self.user, passwd=self.pswd, db="mysql", charset='utf8')
with connect.cursor(cursor=MysqlCur.DictCursor) as cursor:
cursor.execute("SELECT * FROM general_log")
for row in cursor.fetchall():
if row["command_type"] =="Query": #在此处可以进行其他条件的过滤,如用户名,客户端地址等
self._Queue.put(row["argument"].decode("utf8"))
connect.close()
self._Queue.put("PutEnd.")
class AnalyzeGeneralLog(Thread):
def __init__(self, queue_sqls):
self._Queue = queue_sqls
self.analyze_res = {}
self.analyze_statis = {}
super(AnalyzeGeneralLog, self).__init__()
def run(self):
temp_sql = "SELECT UPDATE DELETE INSERT WHERE JOIN AND IN ''= 0123456789 ABCDEFGHIJKLMNOPQRSTUVWXYZ"
while True:
sql = self._Queue.get()
if sql == "PutEnd.":
break
# 去除SQL中的空格和换行
sql = ' '.join(sql.split())
# 计算相似度
rate = round(SequenceMatcher(None, temp_sql, sql).quick_ratio(),6)
self.analyze_res[rate] = sql
self.analyze_statis[rate] = self.analyze_statis.get(rate,0) + 1
for rate,count in self.analyze_statis.items():
#出现次数,相似度,SQL
print("%s\t%s\t%s"%(count,rate,self.analyze_res[rate]))
if __name__ == '__main__':
queue_sqls = Queue(maxsize=10000)
RGL = ReadGeneralLog(queue_sqls,"127.0.0.1", 3306, "root", "123456")
AGL = AnalyzeGeneralLog(queue_sqls)
RGL.start()
AGL.start()
RGL.join()
AGL.join()
原文链接:https://blog.csdn.net/chenqiushi123/article/details/109629085