prometheus 监控PM2中的nodejs服务 Grafana展示

场景:

近期生产nodejs服务是用pm2去管理的,那么该如何使用prometheus进行监控告警呢?说干就干

1、在github上发现一个开源的exporterGitHub - saikatharryc/pm2-prometheus-exporter: :rabbit: pm2 prometheus exporter

废话不多说直接试试看吧。

pm2 install pm2-metrics

# 下载完成以后启动

pm2 start pm2-metrics

暴露的是9209端口,我们访问一下看看

http://<HOST>:9209/metrics

下面我们配置一下prometheus 的配置文件,添加一个job,重启prometheus

# test pm2
  - job_name: 'nodejs-app'
    static_configs:
    - targets: ['172.19.143.3:9209']

访问prometheus的targets 检查一下,可以看到是没问题的

那么好,如何绘图呢?首先,我去grafana的官网看了下有没有能直接用的Dashboard。

很遗憾,并没有 (图中的两个用不了,可能是版本原因吧,只有一两个表有数据)。

经过一番查找,找个一个能看的,上json

{
  "annotations": {
    "list": [
      {
        "builtIn": 1,
        "datasource": {
          "type": "datasource",
          "uid": "grafana"
        },
        "enable": true,
        "hide": true,
        "iconColor": "rgba(0, 211, 255, 1)",
        "name": "Annotations & Alerts",
        "target": {
          "limit": 100,
          "matchAny": false,
          "tags": [],
          "type": "dashboard"
        },
        "type": "dashboard"
      }
    ]
  },
  "editable": true,
  "fiscalYearStartMonth": 0,
  "graphTooltip": 0,
  "id": 4,
  "links": [
    {
      "asDropdown": false,
      "icon": "external link",
      "includeVars": false,
      "keepTime": false,
      "tags": [],
      "targetBlank": false,
      "title": "New link",
      "tooltip": "",
      "type": "dashboards",
      "url": ""
    }
  ],
  "liveNow": false,
  "panels": [
    {
      "datasource": "Prometheus",
      "description": "",
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "thresholds"
          },
          "custom": {
            "fillOpacity": 70,
            "lineWidth": 0,
            "spanNulls": false
          },
          "mappings": [],
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "red",
                "value": null
              },
              {
                "color": "green",
                "value": 1
              }
            ]
          }
        },
        "overrides": []
      },
      "gridPos": {
        "h": 5,
        "w": 24,
        "x": 0,
        "y": 0
      },
      "id": 2,
      "links": [],
      "options": {
        "alignValue": "left",
        "legend": {
          "displayMode": "list",
          "placement": "bottom"
        },
        "mergeValues": true,
        "rowHeight": 0.9,
        "showValue": "auto",
        "tooltip": {
          "mode": "single",
          "sort": "none"
        }
      },
      "pluginVersion": "8.5.2",
      "targets": [
        {
          "datasource": "Prometheus",
          "editorMode": "code",
          "exemplar": false,
          "expr": "pm2_up{name!=\"pm2-metrics\"}",
          "format": "time_series",
          "instant": false,
          "interval": "",
          "intervalFactor": 2,
          "legendFormat": "{{name}}",
          "range": true,
          "refId": "A",
          "step": 120
        }
      ],
      "title": "Status",
      "type": "state-timeline"
    },
    {
      "datasource": "Prometheus",
      "description": "Количество запущенных процессов (если больше одного, что-то не так)",
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "palette-classic"
          },
          "mappings": [],
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "green",
                "value": null
              },
              {
                "color": "red",
                "value": 80
              }
            ]
          }
        },
        "overrides": []
      },
      "gridPos": {
        "h": 3,
        "w": 24,
        "x": 0,
        "y": 5
      },
      "id": 8,
      "links": [],
      "options": {
        "colorMode": "value",
        "graphMode": "area",
        "justifyMode": "auto",
        "orientation": "auto",
        "reduceOptions": {
          "calcs": ["lastNotNull"],
          "fields": "",
          "values": false
        },
        "textMode": "auto"
      },
      "pluginVersion": "8.5.2",
      "targets": [
        {
          "datasource": "Prometheus",
          "editorMode": "code",
          "expr": "pm2_instances{name!=\"pm2-metrics\"}",
          "format": "time_series",
          "intervalFactor": 2,
          "legendFormat": "{{name}}",
          "range": true,
          "refId": "A",
          "step": 120
        }
      ],
      "title": "Instances",
      "type": "stat"
    },
    {
      "datasource": "Prometheus",
      "description": "Перезапуск срабатывает в случае изменения файлов или при фатальной ошибке",
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "palette-classic"
          },
          "mappings": [],
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "green",
                "value": null
              },
              {
                "color": "red",
                "value": 80
              }
            ]
          },
          "unit": "none"
        },
        "overrides": []
      },
      "gridPos": {
        "h": 3,
        "w": 24,
        "x": 0,
        "y": 8
      },
      "id": 17,
      "links": [],
      "options": {
        "colorMode": "value",
        "graphMode": "area",
        "justifyMode": "auto",
        "orientation": "auto",
        "reduceOptions": {
          "calcs": ["lastNotNull"],
          "fields": "",
          "values": false
        },
        "text": {},
        "textMode": "value_and_name"
      },
      "pluginVersion": "8.5.2",
      "targets": [
        {
          "datasource": "Prometheus",
          "editorMode": "code",
          "expr": "pm2_restarts{name!=\"pm2-metrics\"}",
          "format": "time_series",
          "intervalFactor": 2,
          "legendFormat": "{{name}}",
          "range": true,
          "refId": "A",
          "step": 120
        }
      ],
      "title": "Restarts",
      "type": "stat"
    },
    {
      "datasource": "Prometheus",
      "description": "Сколько времени живет микросервис",
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "palette-classic"
          },
          "decimals": 1,
          "mappings": [
            {
              "options": {
                "match": "null",
                "result": {
                  "text": "N/A"
                }
              },
              "type": "special"
            }
          ],
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "green",
                "value": null
              }
            ]
          },
          "unit": "s"
        },
        "overrides": []
      },
      "gridPos": {
        "h": 3,
        "w": 24,
        "x": 0,
        "y": 11
      },
      "id": 7,
      "links": [],
      "maxDataPoints": 100,
      "options": {
        "colorMode": "value",
        "graphMode": "none",
        "justifyMode": "auto",
        "orientation": "vertical",
        "reduceOptions": {
          "calcs": ["mean"],
          "fields": "",
          "values": false
        },
        "textMode": "auto"
      },
      "pluginVersion": "8.5.2",
      "targets": [
        {
          "datasource": "Prometheus",
          "editorMode": "code",
          "exemplar": false,
          "expr": "pm2_uptime{name!=\"pm2-metrics\"}",
          "format": "time_series",
          "instant": false,
          "intervalFactor": 1,
          "legendFormat": "{{name}}",
          "range": true,
          "refId": "A",
          "step": 1800
        }
      ],
      "title": "Uptime",
      "type": "stat"
    },
    {
      "aliasColors": {},
      "bars": false,
      "dashLength": 10,
      "dashes": false,
      "datasource": "Prometheus",
      "decimals": 2,
      "description": "",
      "fieldConfig": {
        "defaults": {
          "unit": "%"
        },
        "overrides": []
      },
      "fill": 1,
      "fillGradient": 0,
      "gridPos": {
        "h": 9,
        "w": 12,
        "x": 0,
        "y": 14
      },
      "hiddenSeries": false,
      "id": 4,
      "legend": {
        "alignAsTable": true,
        "avg": true,
        "current": true,
        "hideEmpty": false,
        "hideZero": false,
        "max": true,
        "min": true,
        "rightSide": true,
        "show": true,
        "sort": "current",
        "sortDesc": true,
        "total": false,
        "values": true
      },
      "lines": true,
      "linewidth": 1,
      "links": [],
      "nullPointMode": "null",
      "options": {
        "alertThreshold": true
      },
      "percentage": false,
      "pluginVersion": "8.5.2",
      "pointradius": 5,
      "points": false,
      "renderer": "flot",
      "seriesOverrides": [],
      "spaceLength": 10,
      "stack": false,
      "steppedLine": false,
      "targets": [
        {
          "datasource": "Prometheus",
          "editorMode": "code",
          "exemplar": false,
          "expr": "pm2_cpu{name!=\"pm2-metrics\"}",
          "format": "time_series",
          "hide": false,
          "instant": false,
          "interval": "",
          "intervalFactor": 1,
          "legendFormat": "{{name}}",
          "range": true,
          "refId": "A",
          "step": 240
        }
      ],
      "thresholds": [],
      "timeRegions": [],
      "title": "CPU",
      "tooltip": {
        "shared": true,
        "sort": 0,
        "value_type": "individual"
      },
      "type": "graph",
      "xaxis": {
        "mode": "time",
        "show": true,
        "values": []
      },
      "yaxes": [
        {
          "$$hashKey": "object:87",
          "format": "%",
          "logBase": 1,
          "min": "0",
          "show": true
        },
        {
          "$$hashKey": "object:88",
          "format": "short",
          "logBase": 1,
          "show": false
        }
      ],
      "yaxis": {
        "align": false
      }
    },
    {
      "aliasColors": {},
      "bars": false,
      "dashLength": 10,
      "dashes": false,
      "datasource": "Prometheus",
      "decimals": 2,
      "fill": 1,
      "fillGradient": 0,
      "gridPos": {
        "h": 9,
        "w": 12,
        "x": 12,
        "y": 14
      },
      "hiddenSeries": false,
      "id": 5,
      "legend": {
        "alignAsTable": true,
        "avg": true,
        "current": true,
        "max": true,
        "min": true,
        "rightSide": true,
        "show": true,
        "sort": "current",
        "sortDesc": true,
        "total": false,
        "values": true
      },
      "lines": true,
      "linewidth": 1,
      "links": [],
      "nullPointMode": "null",
      "options": {
        "alertThreshold": true
      },
      "percentage": false,
      "pluginVersion": "8.5.2",
      "pointradius": 5,
      "points": false,
      "renderer": "flot",
      "seriesOverrides": [],
      "spaceLength": 10,
      "stack": false,
      "steppedLine": false,
      "targets": [
        {
          "datasource": "Prometheus",
          "editorMode": "code",
          "expr": "pm2_memory{name!=\"pm2-metrics\"}",
          "format": "time_series",
          "intervalFactor": 1,
          "legendFormat": "{{name}}",
          "range": true,
          "refId": "A",
          "step": 240
        }
      ],
      "thresholds": [],
      "timeRegions": [],
      "title": "Memory",
      "tooltip": {
        "shared": true,
        "sort": 0,
        "value_type": "individual"
      },
      "type": "graph",
      "xaxis": {
        "mode": "time",
        "show": true,
        "values": []
      },
      "yaxes": [
        {
          "$$hashKey": "object:87",
          "format": "decbytes",
          "logBase": 1,
          "min": "0",
          "show": true
        },
        {
          "$$hashKey": "object:88",
          "format": "short",
          "logBase": 1,
          "show": false
        }
      ],
      "yaxis": {
        "align": false
      }
    },
    {
      "aliasColors": {},
      "bars": false,
      "dashLength": 10,
      "dashes": false,
      "datasource": "Prometheus",
      "decimals": 2,
      "fill": 1,
      "fillGradient": 0,
      "gridPos": {
        "h": 7,
        "w": 12,
        "x": 0,
        "y": 23
      },
      "hiddenSeries": false,
      "id": 11,
      "legend": {
        "alignAsTable": true,
        "avg": true,
        "current": true,
        "max": true,
        "min": true,
        "rightSide": true,
        "show": true,
        "sort": "current",
        "sortDesc": true,
        "total": false,
        "values": true
      },
      "lines": true,
      "linewidth": 1,
      "links": [],
      "nullPointMode": "null",
      "options": {
        "alertThreshold": true
      },
      "percentage": false,
      "pluginVersion": "8.5.2",
      "pointradius": 5,
      "points": false,
      "renderer": "flot",
      "seriesOverrides": [],
      "spaceLength": 10,
      "stack": false,
      "steppedLine": false,
      "targets": [
        {
          "datasource": "Prometheus",
          "editorMode": "code",
          "expr": "pm2_http_mean_latency{name!=\"pm2-metrics\"}",
          "format": "time_series",
          "intervalFactor": 2,
          "legendFormat": "mean-{{name}}",
          "range": true,
          "refId": "A",
          "step": 240
        },
        {
          "datasource": "Prometheus",
          "editorMode": "code",
          "expr": "pm2_http_p95_latency{name!=\"pm2-metrics\"}",
          "format": "time_series",
          "intervalFactor": 1,
          "legendFormat": "p95-{{name}}",
          "range": true,
          "refId": "B"
        }
      ],
      "thresholds": [],
      "timeRegions": [],
      "title": "HTTP Latency",
      "tooltip": {
        "shared": true,
        "sort": 0,
        "value_type": "individual"
      },
      "type": "graph",
      "xaxis": {
        "mode": "time",
        "show": true,
        "values": []
      },
      "yaxes": [
        {
          "$$hashKey": "object:310",
          "format": "ms",
          "logBase": 1,
          "min": "0",
          "show": true
        },
        {
          "$$hashKey": "object:311",
          "format": "short",
          "logBase": 1,
          "show": false
        }
      ],
      "yaxis": {
        "align": false
      }
    },
    {
      "aliasColors": {},
      "bars": false,
      "dashLength": 10,
      "dashes": false,
      "datasource": "Prometheus",
      "decimals": 2,
      "fill": 1,
      "fillGradient": 0,
      "gridPos": {
        "h": 7,
        "w": 12,
        "x": 12,
        "y": 23
      },
      "hiddenSeries": false,
      "id": 12,
      "legend": {
        "alignAsTable": true,
        "avg": true,
        "current": true,
        "max": true,
        "min": true,
        "rightSide": true,
        "show": true,
        "sort": "current",
        "sortDesc": true,
        "total": false,
        "values": true
      },
      "lines": true,
      "linewidth": 1,
      "links": [],
      "nullPointMode": "null",
      "options": {
        "alertThreshold": true
      },
      "percentage": false,
      "pluginVersion": "8.5.2",
      "pointradius": 5,
      "points": false,
      "renderer": "flot",
      "seriesOverrides": [],
      "spaceLength": 10,
      "stack": false,
      "steppedLine": false,
      "targets": [
        {
          "datasource": "Prometheus",
          "editorMode": "code",
          "expr": "pm2_event_loop_latency{name!=\"pm2-metrics\"}",
          "format": "time_series",
          "intervalFactor": 2,
          "legendFormat": "mean-{{name}}",
          "range": true,
          "refId": "A",
          "step": 240
        },
        {
          "datasource": "Prometheus",
          "editorMode": "code",
          "expr": "pm2_event_loop_latency_p95{name!=\"pm2-metrics\"}",
          "format": "time_series",
          "intervalFactor": 1,
          "legendFormat": "p95-{{name}}",
          "range": true,
          "refId": "B"
        }
      ],
      "thresholds": [],
      "timeRegions": [],
      "title": "Event Loop Latency",
      "tooltip": {
        "shared": true,
        "sort": 0,
        "value_type": "individual"
      },
      "type": "graph",
      "xaxis": {
        "mode": "time",
        "show": true,
        "values": []
      },
      "yaxes": [
        {
          "$$hashKey": "object:673",
          "format": "ms",
          "logBase": 1,
          "min": "0",
          "show": true
        },
        {
          "$$hashKey": "object:674",
          "format": "short",
          "logBase": 1,
          "show": false
        }
      ],
      "yaxis": {
        "align": false
      }
    },
    {
      "aliasColors": {},
      "bars": false,
      "dashLength": 10,
      "dashes": false,
      "datasource": "Prometheus",
      "decimals": 0,
      "fill": 0,
      "fillGradient": 0,
      "gridPos": {
        "h": 7,
        "w": 12,
        "x": 0,
        "y": 30
      },
      "hiddenSeries": false,
      "id": 13,
      "legend": {
        "alignAsTable": true,
        "avg": true,
        "current": true,
        "max": false,
        "min": false,
        "rightSide": true,
        "show": true,
        "sort": "current",
        "sortDesc": true,
        "total": false,
        "values": true
      },
      "lines": true,
      "linewidth": 1,
      "links": [],
      "nullPointMode": "null",
      "options": {
        "alertThreshold": true
      },
      "percentage": false,
      "pluginVersion": "8.5.2",
      "pointradius": 5,
      "points": false,
      "renderer": "flot",
      "seriesOverrides": [
        {
          "alias": "Apache Down",
          "color": "#BF1B00"
        },
        {
          "alias": "Apache Down",
          "transform": "negative-Y"
        }
      ],
      "spaceLength": 10,
      "stack": false,
      "steppedLine": false,
      "targets": [
        {
          "datasource": "Prometheus",
          "editorMode": "code",
          "expr": "pm2_active_handles{name!=\"pm2-metrics\"}",
          "format": "time_series",
          "intervalFactor": 2,
          "legendFormat": "{{name}}",
          "range": true,
          "refId": "A",
          "step": 120
        }
      ],
      "thresholds": [],
      "timeRegions": [],
      "title": "Active handles",
      "tooltip": {
        "shared": true,
        "sort": 0,
        "value_type": "individual"
      },
      "type": "graph",
      "xaxis": {
        "mode": "time",
        "show": true,
        "values": []
      },
      "yaxes": [
        {
          "$$hashKey": "object:599",
          "format": "short",
          "logBase": 1,
          "min": "0",
          "show": true
        },
        {
          "$$hashKey": "object:600",
          "format": "short",
          "logBase": 1,
          "show": false
        }
      ],
      "yaxis": {
        "align": false
      }
    }
  ],
  "refresh": "",
  "schemaVersion": 36,
  "style": "dark",
  "tags": [],
  "templating": {
    "list": []
  },
  "time": {
    "from": "now-3h",
    "to": "now"
  },
  "timepicker": {
    "refresh_intervals": [
      "5s",
      "10s",
      "30s",
      "1m",
      "5m",
      "15m",
      "30m",
      "1h",
      "2h",
      "1d"
    ],
    "time_options": ["5m", "15m", "1h", "6h", "12h", "24h", "2d", "7d", "30d"]
  },
  "timezone": "",
  "title": "Микросервисы",
  "uid": "_GcdVy_7z",
  "version": 24,
  "weekStart": ""
}

导入

ok,看看成果

告警规则

groups:
- name: pm2_alerts
  rules:
  - alert: HighCPUUsage
    expr: pm2_cpu > 90
    for: 5m
    labels:
      severity: critical
    annotations:
      summary: 检测到高CPU使用率
      description: '{{ $labels.name }} 进程正在经历高CPU使用率 ({{ $value }}%)'

  - alert: HighMemoryUsage
    expr: pm2_memory / 1024 / 1024 > 800 # 假设阈值为500 MB
    for: 5m
    labels:
      severity: critical
    annotations:
      summary: 检测到高内存使用率
      description: '{{ $labels.name }} 进程正在消耗大量内存 ({{ $value | humanize }})'

  - alert: ProcessRestartFrequency
    expr: rate(pm2_restarts[1h]) > 10
    for: 10m
    labels:
      severity: warning
    annotations:
      summary: 进程频繁重启
      description: "进程'{{ $labels.name }}'(实例ID:{{ $labels.instance }})在1小时内重启次数超过10次,当前重启次数为{{ $value }}次。"

评论 4
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值