一、背景
最近接触了一款数据库性能洞察软件DBdoctor,出自海信(对,就是那个做家电的海信,我也是很吃惊),这款产品是用eBPF技术来诊断数据库性能问题的,而且这个软件只聚焦在数据库性能,所以对数据库性能问题研究的非常深入,比较适合我们这种没有专业DBA的公司,推荐给大家,他们还有一个技术交流群,关注公众号:“DBdoctor服务”就能进群,免费版不是全部功能,在群里跟他们聊聊天几乎就送你企业版,还是比较慷慨的。网上资料比较少,在这里专门记录下这款软件一些隐藏功能的使用方法,希望对大家有用。
产品官网:海信聚好看云服务—值得托付的人工智能互联网科技平台 - 海信聚好看云服务
这是他们产品负责人张纪宽的博客DBdoctor,致力于解决数据库的一切性能问题_dbdoctor.-CSDN博客,里面也有一些文章介绍,一并推荐给大家
本篇主要介绍:在无agent和promethues监控情况下,如何用DBdoctor的自定义模板进行数据采集,这是一种自定义编码,灵活配置的方式,需要有一定的编码基础。
二、操作步骤
1. 使用admin账号登录管理员操作界面【安装成功后,默认密码123456】
2. 登录后选择数据采集管理->模板配置 【模板类型设置为自定义】
a. 自定义指标获取,内置python模板,可基于python灵活调用依赖的接口或数据库获取对应的指标,安装对应的输出格式进行返回即可
b.具体python格式如下:
import json
import time
from decimal import Decimal
import requests, logging
from flask import Flask
from flask import request
app = Flask(__name__)
logging.basicConfig(filename='UniteServer.log', level=logging.DEBUG, format='%(asctime)s %(message)s',
datefmt='%m/%d/%Y %I:%M:%S %p')
class CollectResourceHandler(object):
def __init__(self):
"""
定义全局
"""
self.devUrl = "https://inner-gw-test.hisense.com:443/itops/apmrs/v2/applications/RE_APP_ID/units/RE_UNIT/fields/RE_FILED/history?user_key=19890da04f60d&historyType=0&startTime=RE_START&endTime=RE_END"
self.devHeaders = {
"X-Auth-Token": "eyJhbGciOiJIUzUxMiJ9.eyJzdWIiOiJuUVNDaEpjSkduZm9SOWpIbTN6VHdkUWtRSklxMW5URzc4MTV1eTI3T2ZMMkxsSnh6MmRBOTloUFJaS2ttaExvcWlXcHJZU2pXYy9iWmJ2VXFXRjRtWTVDYW9nQXI0MVFNQUFHVFU3OFNPNi9aUGJCWHBzQjRwZ0VwMDBNRjlsWWdCZlgwUDlBMUZ1TDE1eVhqQURJbzF1V01TRmZRQkEyUWx4SUNGRFJUU0U9IiwianRpIjoiYjk4YmRkZTAtNTNlYy00MzgxLWI2N2YtMTVmOWZmZGFlZTcyIiwiaWF0IjoxNjkwNzgwNDY1fQ."}
self.prodUrl = "https://inner-gw.hisense.com:443/itops/apmrs/v2/applications/RE_APP_ID/units/RE_UNIT/fields/RE_FILED/history?user_key=c6f685016efa&historyType=0&startTime=RE_START&endTime=RE_END"
self.prodHeaders = {
"X-Auth-Token": "eyJhbGciOiJIUzUxMiJ9.eyJzdWIiOiJ7XCJ1c2VyTmFtZVwiOlwiem50eXl3cHRcIixcInBhc3NXb3JkXCI6XCJcIixcImlkXCI6XCI1MjBcIixcInJlZ2lvbklkXCI6XCIwXCIsXCJ0ZW5hbnRJZFwiOm51bGwsXCJ0ZW5hbnROYW1lXCI6bnVsbCxcImZ1bGxOYW1lXCI6bnVsbCxcInNlbGZTZXJ2aWNlXCI6bnVsbCxcInVzZXJUeXBlXCI6bnVsbCxcImlwQWRkcmVzc1wiOm51bGwsXCJzZXNzaW9uSWRcIjpudWxsLFwibG9naW5UaW1lXCI6bnVsbCxcImdyb3VwTmFtZVwiOm51bGwsXCJwcm94eVN1YlRva2VuXCI6bnVsbCxcImVmZmVjdGl2ZVRpbWVcIjo3MjAwMDAwMDAsXCJlZmZlY3RpdmVVcmxcIjpudWxsLFwib3BlcmF0b3JHcm91cFwiOm51bGx9IiwianRpIjoiMTgzMjQ4MTQwNDQyNDYxOTcifQ."}
def generateRealUrl(self, req, unit, filed, type):
"""
根据instanceId与目标数据库在监控系统中的映射信息生成对接第三方监控的url
"""
instanceId = req.get('instanceId')
startTime = req.get('startTime')
endTime = req.get('endTime')
td = req.get('thirdParty')
env = td.get('env')
metric = req.get('metricType')
url = ""
if env == "dev":
url = self.devUrl
else:
url = self.prodUrl
thirdPartyId = ""
if type == 1:
thirdPartyId = td.get('appId')
else:
thirdPartyId = td.get('mysqlAppId')
url = url.replace("RE_APP_ID", thirdPartyId).replace("RE_UNIT", unit).replace("RE_FILED", filed).replace(
"RE_END", str(endTime * 1000)).replace("RE_START", str(startTime * 1000))
if metric == 'PROCESS_SPACE' or metric == "HOST_SPACE":
url = url + "&instanceId=" + td.get('mountPath')
elif metric == 'PROCESS_DISK_IO' or metric == 'HOST_DISK_IO':
url = url + "&instanceId=" + td.get('device')
return url
def generateCPUQuota(self, req):
"""
获取数据库CPU规格
"""
quota = req.get('thirdParty').get('cpuQuota')
timeA = int(time.time())
listA = [timeA, quota]
return [listA]
def parseRes(self, resp, metric):
"""
解析第三方监控响应并返回给dbdoctor
"""
res = []
data = resp.json()
logging.debug("parseRes: ", data)
if data.get('code') != 0 or data.get('data') == None or data.get('data').get('values') == None:
return []
jValue = data.get('data').get('values')
for i in jValue:
timeA = int(i.get('collectTime')) / 1000
valueA = float(Decimal(float(i.get('value'))).quantize(Decimal('0.00')))
if metric == "PROCESS_NET_IN" or metric == "PROCESS_NET_OUT":
valueA = valueA * 1024
listA = [timeA, valueA]
res.append(listA)
return res
def parseHostConnRes(self, resp, thirdParty):
res = []
fdLimit = int(thirdParty.get('fdLimit'))
data = resp.json()
logging.debug("parseHostConnRes: ", data)
if data.get('code') != 0 or data.get('data') == None or data.get('data').get('values') == None:
return []
jValue = data.get('data').get('values')
for i in jValue:
timeA = int(i.get('collectTime')) / 1000
valueA = float(Decimal(float(i.get('value')) / fdLimit * 100).quantize(Decimal('0.00')))
listA = [timeA, valueA]
res.append(listA)
return res
def getHeaders(self, req):
env = req.get('thirdParty').get('env')
if env == "dev":
return self.devHeaders
else:
return self.prodHeaders
def doResourceQuery(self, req):
"""
根据指标类型获取第三方资源数据
"""
res = []
resp = ""
metric = req.get('metricType')
if metric == 'PROCESS_CPU' or metric == "HOST_CPU":
"""
获取CPU使用率
"""
resp = requests.get(self.generateRealUrl(req, "cpu", "CpuUtilization", 1), headers=self.getHeaders(req))
elif metric == 'PROCESS_MEM' or metric == "HOST_MEM":
"""
获取MEM使用率
"""
resp = requests.get(self.generateRealUrl(req, "memory", "PhysicalMemoryUtilization", 1),
headers=self.getHeaders(req))
elif metric == 'PROCESS_SPACE' or metric == "HOST_SPACE":
"""
获取磁盘空间使用率
"""
resp = requests.get(self.generateRealUrl(req, "disk", "DiskUtilization", 1), headers=self.getHeaders(req))
elif metric == 'PROCESS_CONNECTION':
"""
获取数据库实例连接数使用率
"""
resp = requests.get(self.generateRealUrl(req, "msconnratio", "ConnectionRatio", 2),
headers=self.getHeaders(req))
elif metric == 'HOST_CONNECTION':
"""
获取数据库所在主机连接数使用率
"""
resp = requests.get(self.generateRealUrl(req, "tcp", "ConnectionsPassive", 1), headers=self.getHeaders(req))
return self.parseHostConnRes(resp, req.get('thirdParty'))
elif metric == 'PROCESS_DISK_IO' or metric == 'HOST_DISK_IO':
"""
获取磁盘IO使用率
"""
resp = requests.get(self.generateRealUrl(req, "ioresp", "TtranTime", 1), headers=self.getHeaders(req))
elif metric == 'PROCESS_QPS':
"""
获取数据库qps
"""
resp = requests.get(self.generateRealUrl(req, "TPSInfo", "TPS", 2), headers=self.getHeaders(req))
elif metric == 'PROCESS_NET_IN':
"""
获取数据库入口流量
"""
resp = requests.get(self.generateRealUrl(req, "interface", "ReceivedPersec", 1),
headers=self.getHeaders(req))
elif metric == 'PROCESS_NET_OUT':
"""
获取数据库出口流量
"""
resp = requests.get(self.generateRealUrl(req, "interface", "SentPersec", 1), headers=self.getHeaders(req))
elif metric == 'PROCESS_CPU_QUOTA':
"""
获取CPU规格
"""
return self.generateCPUQuota(req)
else:
return []
return self.parseRes(resp, metric)
3.模板创建成功后,我们新增纳管实例时,数据采集方式可选择我们新增的数据采集方式
注意: 系统内置了第三方参数配置,可针对当前依赖的第三方环境配置,进行灵活配置,如我们可以新增一个aliKey 对应的value设置为真是的某一个值,这样我们在python代码中,可以使用td = req.get('thirdParty')来获取所有的第三方参数,然后再获取某一个具体的第三方参数的值,aliKey= td.get('aliKey'),这样我们在python代码中即可使用我们获取到aliKey的真实值了
以上为DBDoctor使用自定义模板进行数据集采集的使用方式,欢迎大家有问题一起讨论