Promethues operate blackbox(http/tcp/dns/icmp探测)
由于grafana dashboard市场下载的blackbox不太好用,我做了简单修改,效果如下:
prometheus配置
由于prometheus operator采用servicemonitor或者probe方式来对blackbox进行数据采集的时候均存在一定的问题,所以对于这部分scrap配置,采用手动配置了
[root@k8s-master-1 prometheus-operator]# cat prometheus-server.yaml
---
apiVersion: monitoring.coreos.com/v1
kind: Prometheus
metadata:
name: server
namespace: monitor
spec:
image: prom/prometheus:v2.36.2
nodeSelector:
kubernetes.io/hostname: "k8s-master-1"
serviceMonitorSelector:
matchLabels:
application: "prometheus"
probeSelector:
matchLabels:
application: "prometheus"
serviceAccountName: prometheus-server
additionalScrapeConfigs:
name: blackbox
key: blackbox-config.yaml
storage:
volumeClaimTemplate: #如果配置了这个,prometheus-server的存储就会保存在这里
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 10Gi
---
apiVersion: v1
kind: Service
metadata:
name: prometheus-server
namespace: monitor
labels:
application: "prometheus-server"
spec:
selector:
prometheus: server
type: NodePort
ports:
- name: metrics
port: 9090
targetPort: 9090
protocol: TCP
nodePort: 39090
blackbox配置
[root@k8s-master-1 blackbox]# cat blackbox.yaml
apiVersion: v1
kind: ConfigMap
metadata:
name: blackbox-config
namespace: monitor
data:
blackbox.yml: |-
modules:
http_2xx: # http 检测模块 Blockbox-Exporter 中所有的探针均是以 Module 的信息进行配置
prober: http
timeout: 15s
http:
valid_http_versions: ["HTTP/1.1", "HTTP/2"]
valid_status_codes: [200,301,302]
method: GET
preferred_ip_protocol: "ip4" # 选用IPV4
follow_redirects: true # 跟进重定向
tcp_connect: # TCP模块
prober: tcp
timeout: 15s
dns_tcp: # tcp探测DNS
prober: dns
dns:
transport_protocol: "tcp" # 默认是 udp
preferred_ip_protocol: "ip4" # 默认是 ip6
query_name: "kubernetes.default.svc.cluster.local" # 利用这个域名来检查 dns 服务器
query_type: "A" # 如果是 kube-dns ,一定要加入这个,因为不支持Ipv6
icmp:
prober: icmp
icmp:
preferred_ip_protocol: "ip4"
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: blackbox
namespace: monitor
spec:
replicas: 1
selector:
matchLabels:
app: blackbox
template:
metadata:
labels:
app: blackbox
spec:
nodeName: k8s-master-1
containers:
- image: prom/blackbox-exporter:v0.21.1
name: blackbox
args:
- --config.file=/etc/blackbox_exporter/blackbox.yml
- --log.level=info
- --web.listen-address=:9115
ports:
- name: http
containerPort: 9115
volumeMounts:
- name: config
mountPath: /etc/blackbox_exporter
dnsPolicy: ClusterFirst
volumes:
- name: config
configMap:
name: blackbox-config
---
apiVersion: v1
kind: Service
metadata:
name: blackbox
namespace: monitor
labels:
app: blackbox
spec:
selector:
app: blackbox
type: NodePort
ports:
- name: http
port: 9115
targetPort: 9115
nodePort: 39115
---
#apiVersion: v1
#kind: Secret
#metadata:
# name: blackbox
# namespace: monitor
#stringData:
# blackbox.yaml: |
# - job_name: "icmp-check" # 这样写会出问题
# metrics_path: /probe
# params:
# module: icmp
# static_configs:
# - targets:
# - 192.168.0.10
# - 192.168.0.11
---
# 使用这种方式进行黑盒监控时多个target只抓取了第一个target,原因暂时未找到,生成的scrp_url=10.70.0.128:9115/probe?module=tcp_connect&target=192.168.0.10:22&target=192.168.0.11:22""
# issues https://github.com/prometheus-operator/prometheus-operator/issues/2821
#apiVersion: monitoring.coreos.com/v1
#kind: ServiceMonitor
#metadata:
# name: blackbox
# namespace: monitor
# labels:
# application: "prometheus"
#spec:
# namespaceSelector:
# matchNames: ["monitor"]
# selector:
# matchLabels:
# app: blackbox
# endpoints:
# - interval: "15s"
# path: /probe
# port: http
# scheme: HTTP
# params:
# module:
# - tcp_connect
# target:
# - 192.168.0.10:22
# - 192.168.0.11:22
# relabelings:
# - sourceLabels: [__address__]
# targetLabel: __param_target
# - sourceLabels: [__param_target]
# targetLabel: instance
# - targetLabel: __address__
# replacement: blackbox.monitor.svc.cluster.local:9115
---
# 这种太重复了,会导致生成很多instance,需要对标签进行处理,后续二个job 需进行标签聚合,太麻烦了
#apiVersion: monitoring.coreos.com/v1
#kind: Probe
#metadata:
# name: blackbox-tcp-check
# namespace: monitor
# labels:
# application: "prometheus"
#spec:
# jobName: tcp-check
# module: tcp_connect
# prober:
# url: blackbox.monitor.svc.cluster.local:9115
# targets:
# staticConfig:
# static:
# - 192.168.0.10:22
# - 192.168.0.11:22
# metricRelabelings:
# - sourceLabels: [__address__]
# targetLabel: instance
---
#apiVersion: monitoring.coreos.com/v1
#kind: Probe
#metadata:
# name: blackbox-icmp-check
# namespace: monitor
# labels:
# application: "prometheus"
#spec:
# jobName: icmp-check
# module: icmp
# prober:
# url: blackbox.monitor.svc.cluster.local:9115
# targets:
# staticConfig:
# static:
# - 192.168.0.10
# - 192.168.0.11
# metricRelabelings:
# - sourceLabels: [__address__] # 基于IP保证后续instance一致
# targetLabel: instanc
[root@k8s-master-1 blackbox]# cat blackbox-config.yaml
- job_name: "ICMP-CHECK"
metrics_path: /probe
params:
module:
- icmp
static_configs:
- targets:
- 192.168.0.10
- 192.168.0.11
labels:
blackbox: icmp
relabel_configs:
- source_labels: [__address__] # 为params赋值target=[__address__],调整向blackbox请求的URL参数
target_label: __param_target
- target_label: __address__ # 让prometheus去blackbox抓取信息
replacement: blackbox.monitor.svc.cluster.local:9115
- source_labels: [__param_target] # 將 instace 的值修改成 target 的值
target_label: instance
- job_name: "TCP-CHECK"
metrics_path: /probe
params:
module:
- tcp_connect
static_configs:
- targets:
- 192.168.0.10:22
- 192.168.0.11:22
- 192.168.0.10:3306
- 192.168.0.10:6443
- 192.168.0.10:1250
- 192.168.0.10:2380
- 192.168.0.11:2380
- 192.168.0.10:2049
- 192.168.0.10:111
labels:
blackbox: tcp
relabel_configs:
- source_labels: [__address__]
target_label: __param_target
- target_label: __address__
replacement: blackbox.monitor.svc.cluster.local:9115
- source_labels: [__param_target]
# regex: "(.*):(.*)" # 聚合instance,将其与第一个job为一类,丢弃端口,grafana不做聚合,放弃这种方式
# replacement: $1
target_label: instance
- job_name: "HTTP-CHECK"
metrics_path: /probe
params:
module:
- http_2xx
static_configs:
- targets:
- http://www.baidu.com
- http://www.huya.com
- http://www.douyu.com
- http://www.bilibili.com
labels:
blackbox: http_2xx
relabel_configs:
- source_labels: [__address__]
target_label: __param_target
- target_label: __address__
replacement: blackbox.monitor.svc.cluster.local:9115
- source_labels: [__param_target]
target_label: instance
- job_name: "DNS-CHECK"
metrics_path: /probe
params:
module:
- dns_tcp
static_configs:
- targets:
- 10.0.0.10 # K8S集群内DNS服务器
labels:
blackbox: dns
relabel_configs:
- source_labels: [__address__]
target_label: __param_target
- target_label: __address__
replacement: blackbox.monitor.svc.cluster.local:9115
- source_labels: [__param_target]
target_label: instance
创建prometheus资源需要的additionalScrapeConfigs:kubectl create secret generic blackbox --from-file=blackbox-config.yaml -n monitor
prometheus检查
导入grafana配置
{
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": {
"type": "datasource",
"uid": "grafana"
},
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"target": {
"limit": 100,
"matchAny": false,
"tags": [],
"type": "dashboard"
},
"type": "dashboard"
}
]
},
"description": "Quick overview of values from blackbox exporters",
"editable": true,
"fiscalYearStartMonth": 0,
"gnetId": 11529,
"graphTooltip": 0,
"id": 3,
"iteration": 1656837727417,
"links": [],
"liveNow": false,
"panels": [
{
"columns": [],
"datasource": {
"type": "prometheus",
"uid": "wt9lNze7z"
},
"fontSize": "100%",
"gridPos": {
"h": 8,
"w": 11,
"x": 0,
"y": 0
},
"id": 103,
"links": [],
"repeatDirection": "h",
"scroll": true,
"showHeader": true,
"sort": {
"col": 4,
"desc": false
},
"styles": [
{
"alias": "Time",
"align": "auto",
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"pattern": "Time",
"type": "hidden"
},
{
"alias": "1=UP, 0=DOWN",
"align": "auto",
"colorMode": "row",
"colors": [
"rgba(245, 54, 54, 0.9)",
"rgba(245, 54, 54, 0.9)",
"rgba(50, 172, 45, 0.97)"
],
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 0,
"pattern": "Value",
"thresholds": [
"0",
"1"
],
"type": "number",
"unit": "short"
},
{
"alias": "",
"align": "auto",
"colors": [
"rgba(245, 54, 54, 0.9)",
"rgba(237, 129, 40, 0.89)",
"rgba(50, 172, 45, 0.97)"
],
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"pattern": "__name__",
"thresholds": [],
"type": "hidden",
"unit": "short"
},
{
"alias": "",
"align": "auto",
"colors": [
"rgba(245, 54, 54, 0.9)",
"rgba(237, 129, 40, 0.89)",
"rgba(50, 172, 45, 0.97)"
],
"decimals": 2,
"pattern": "/.*/",
"thresholds": [],
"type": "number",
"unit": "short"
}
],
"targets": [
{
"expr": "probe_success",
"format": "table",
"instant": true,
"intervalFactor": 1,
"refId": "A"
}
],
"title": "黑盒监控汇总数据",
"transform": "table",
"type": "table-old"
},
{
"columns": [],
"datasource": {
"type": "prometheus",
"uid": "wt9lNze7z"
},
"fontSize": "100%",
"gridPos": {
"h": 8,
"w": 13,
"x": 11,
"y": 0
},
"id": 109,
"links": [],
"scroll": true,
"showHeader": true,
"sort": {
"col": 3,
"desc": false
},
"styles": [
{
"alias": "Time",
"align": "auto",
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"pattern": "Time",
"type": "hidden"
},
{
"alias": "Time Left",
"align": "auto",
"colorMode": "row",
"colors": [
"rgba(245, 54, 54, 0.9)",
"rgba(237, 129, 40, 0.89)",
"rgba(50, 172, 45, 0.97)"
],
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 0,
"pattern": "Value",
"thresholds": [
"0",
"2592000"
],
"type": "number",
"unit": "s"
},
{
"alias": "",
"align": "auto",
"colors": [
"rgba(245, 54, 54, 0.9)",
"rgba(237, 129, 40, 0.89)",
"rgba(50, 172, 45, 0.97)"
],
"decimals": 2,
"pattern": "/.*/",
"thresholds": [],
"type": "number",
"unit": "short"
}
],
"targets": [
{
"expr": "probe_ssl_earliest_cert_expiry-time()",
"format": "table",
"instant": true,
"intervalFactor": 1,
"legendFormat": "{{instance}}",
"refId": "A"
}
],
"title": "SSL证书过期情况",
"transform": "table",
"type": "table-old"
},
{
"datasource": {
"type": "prometheus",
"uid": "wt9lNze7z"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "smooth",
"lineWidth": 3,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "short"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 24,
"x": 0,
"y": 8
},
"id": 107,
"links": [],
"options": {
"legend": {
"calcs": [
"max",
"min"
],
"displayMode": "table",
"placement": "right"
},
"tooltip": {
"mode": "multi",
"sort": "desc"
}
},
"pluginVersion": "8.5.1",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "wt9lNze7z"
},
"expr": "probe_duration_seconds",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{instance}}",
"refId": "A"
}
],
"title": "探测耗时",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "wt9lNze7z"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "smooth",
"lineStyle": {
"fill": "solid"
},
"lineWidth": 3,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": true,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "short"
},
"overrides": []
},
"gridPos": {
"h": 6,
"w": 24,
"x": 0,
"y": 16
},
"id": 119,
"links": [],
"options": {
"legend": {
"calcs": [
"min"
],
"displayMode": "list",
"placement": "bottom"
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "8.5.1",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "wt9lNze7z"
},
"expr": "probe_success{blackbox=\"dns\"}",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{instance}}",
"refId": "A"
}
],
"title": "DNS探测",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "wt9lNze7z"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "smooth",
"lineWidth": 3,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": true,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "short"
},
"overrides": [
{
"__systemRef": "hideSeriesFrom",
"matcher": {
"id": "byNames",
"options": {
"mode": "exclude",
"names": [
"192.168.0.11"
],
"prefix": "All except:",
"readOnly": true
}
},
"properties": [
{
"id": "custom.hideFrom",
"value": {
"legend": false,
"tooltip": false,
"viz": true
}
}
]
}
]
},
"gridPos": {
"h": 6,
"w": 24,
"x": 0,
"y": 22
},
"id": 115,
"links": [],
"options": {
"legend": {
"calcs": [
"min"
],
"displayMode": "list",
"placement": "bottom"
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "8.5.1",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "wt9lNze7z"
},
"editorMode": "code",
"exemplar": false,
"expr": "probe_success{blackbox=\"icmp\"}",
"format": "time_series",
"instant": false,
"intervalFactor": 1,
"legendFormat": "{{instance}}",
"range": true,
"refId": "A"
}
],
"title": "网络探测",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "wt9lNze7z"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "smooth",
"lineStyle": {
"fill": "solid"
},
"lineWidth": 3,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": true,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "short"
},
"overrides": []
},
"gridPos": {
"h": 6,
"w": 24,
"x": 0,
"y": 28
},
"id": 113,
"links": [],
"options": {
"legend": {
"calcs": [
"min"
],
"displayMode": "list",
"placement": "bottom"
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "8.5.1",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "wt9lNze7z"
},
"expr": "probe_success{blackbox=\"tcp\"}",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{instance}}",
"refId": "A"
}
],
"title": "端口探测",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "wt9lNze7z"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "smooth",
"lineWidth": 3,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 6,
"w": 24,
"x": 0,
"y": 34
},
"id": 117,
"links": [],
"options": {
"legend": {
"calcs": [
"min"
],
"displayMode": "list",
"placement": "bottom"
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "8.5.1",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "wt9lNze7z"
},
"expr": "probe_success{blackbox=\"http_2xx\"}",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{instance}}",
"refId": "A"
}
],
"title": "站点探测",
"type": "timeseries"
}
],
"refresh": "30s",
"schemaVersion": 36,
"style": "dark",
"tags": [
"blackbox",
"prometheus"
],
"templating": {
"list": [
{
"auto": true,
"auto_count": 10,
"auto_min": "10s",
"current": {
"selected": false,
"text": "auto",
"value": "$__auto_interval_interval"
},
"hide": 2,
"label": "Interval",
"name": "interval",
"options": [
{
"selected": true,
"text": "auto",
"value": "$__auto_interval_interval"
},
{
"selected": false,
"text": "5s",
"value": "5s"
},
{
"selected": false,
"text": "10s",
"value": "10s"
},
{
"selected": false,
"text": "30s",
"value": "30s"
},
{
"selected": false,
"text": "1m",
"value": "1m"
},
{
"selected": false,
"text": "10m",
"value": "10m"
},
{
"selected": false,
"text": "30m",
"value": "30m"
},
{
"selected": false,
"text": "1h",
"value": "1h"
},
{
"selected": false,
"text": "6h",
"value": "6h"
},
{
"selected": false,
"text": "12h",
"value": "12h"
},
{
"selected": false,
"text": "1d",
"value": "1d"
},
{
"selected": false,
"text": "7d",
"value": "7d"
},
{
"selected": false,
"text": "14d",
"value": "14d"
},
{
"selected": false,
"text": "30d",
"value": "30d"
}
],
"query": "5s,10s,30s,1m,10m,30m,1h,6h,12h,1d,7d,14d,30d",
"refresh": 2,
"skipUrlSync": false,
"type": "interval"
},
{
"current": {
"selected": false,
"text": "All",
"value": "$__all"
},
"datasource": {
"type": "prometheus",
"uid": "wt9lNze7z"
},
"definition": "",
"hide": 2,
"includeAll": true,
"multi": true,
"name": "targets",
"options": [],
"query": {
"query": "label_values(probe_success, instance)",
"refId": "Prometheus-targets-Variable-Query"
},
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"sort": 0,
"tagValuesQuery": "",
"tagsQuery": "",
"type": "query",
"useTags": false
}
]
},
"time": {
"from": "now-5m",
"to": "now"
},
"timepicker": {
"refresh_intervals": [
"5s",
"10s",
"30s",
"1m",
"5m",
"15m",
"30m",
"1h",
"2h",
"1d"
],
"time_options": [
"5m",
"15m",
"1h",
"6h",
"12h",
"24h",
"2d",
"7d",
"30d"
]
},
"timezone": "",
"title": "Blackbox Exporter Quick Overview-1",
"uid": "xtkCtBkiz2",
"version": 28,
"weekStart": ""
}