1、开启阿里云监控
引用项目 https://pypi.org/project/aliyun-exporter/
https://github.com/aylei/aliyun-exporter
打开阿里云后台进入云监控选择主机监控,选择主机批量安装或升级插件。为了方便你可以选择新购ECS自动安装云监控 打开这样每次新增加的主机就能自动加入监控
阿里云免费监控api 接口每个月100w次超过就会收费
2、 获取阿里云AccessKey
用户选择 AccessKey 打开选择继续使用AccessKey 然后选择创建AccessKey 这个key 不能泄露权限很大。当然也可以配置RAM 访问控制
3、准备阿里云配置文件及dockerfile
vi aliyun-exporter.yml
credential:
access_key_id: # 阿里云获取的AccessKey ID
access_key_secret: # 阿里云获取的AccessKey secret
region_id: cn-shenzhen # ecs 所在的区域
metrics:
acs_ecs_dashboard:
- name: CPUUtilization
period: 60
- name: InternetInRate
period: 60
- name: IntranetInRate
period: 60
- name: InternetOutRate
period: 60
- name: IntranetOutRate
period: 60
- name: InternetOutRate_Percent
period: 60
- name: DiskReadBPS
period: 60
- name: DiskWriteBPS
period: 60
- name: DiskReadIOPS
period: 60
- name: DiskWriteIOPS
period: 60
- name: VPC_PublicIP_InternetInRate
period: 60
- name: VPC_PublicIP_InternetOutRate
period: 60
- name: VPC_PublicIP_InternetOutRate_Percent
period: 60
- name: cpu_idle
period: 15
- name: cpu_system
period: 15
- name: cpu_user
period: 15
- name: cpu_wait
period: 15
- name: cpu_other
period: 15
- name: cpu_total
period: 15
- name: memory_totalspace
period: 15
- name: memory_usedspace
period: 15
- name: memory_actualusedspace
period: 15
- name: memory_freespace
period: 15
- name: memory_freeutilization
period: 15
- name: memory_usedutilization
period: 15
- name: load_1m
period: 15
- name: load_5m
period: 15
- name: load_15m
period: 15
- name: diskusage_used
period: 15
- name: diskusage_utilization
period: 15
- name: diskusage_free
period: 15
- name: diskusage_total
period: 15
- name: disk_readbytes
period: 15
- name: disk_writebytes
period: 15
- name: disk_readiops
period: 15
- name: disk_writeiops
period: 15
- name: fs_inodeutilization
period: 15
- name: networkin_rate
period: 15
- name: networkout_rate
period: 15
- name: networkin_packages
period: 15
- name: networkout_packages
period: 15
- name: networkin_errorpackages
period: 15
- name: networkout_errorpackages
period: 15
- name: net_tcpconnection
period: 15
info_metrics:
- ecs
Dockerfile
vi Dockerfile
FROM alpine:latest
MAINTAINER QIST Docker Maintainers "87984115@qq.com"
RUN sed -i 's/dl-cdn.alpinelinux.org/mirrors.aliyun.com/g' /etc/apk/repositories \
&& apk update \
&& apk add --no-cache gcc python3 libc-dev python3-dev \
&& pip3 install aliyun-exporter \
&& apk del gcc libc-dev python3-dev \
&& rm -rf /var/cache/apk/* \
&& rm -r /usr/lib/python*/ensurepip \
&& rm -r /root/.cache \
&& mkdir -p /etc/aliyun-exporter
COPY aliyun-exporter.yml /etc/aliyun-exporter/aliyun-exporter.yml
EXPOSE 9525
CMD ["/usr/bin/aliyun-exporter", "-p", "9525", "-c", "/etc/aliyun-exporter/aliyun-exporter.yml"]
生成 镜像 docker build -t aliyun-exporter .
docker tag aliyun-exporter xxxx.com/aliyun-exporter # xxxx.com docker 仓库地址
测试docker 是否能正常运行
docker run -ti --rm -p9525:9525 aliyun-exporter
访问 http://127.0.0.1:9525/metrics
正常push 到docker 仓库
docker push xxxx.com/aliyun-exporter
4、k8s 运行容器 准备yaml
shenzhen 监控区域的名字 为了区分多区域
vi shenzhen-aliyun-exporter.yaml
---
apiVersion: v1
kind: ConfigMap
metadata:
name: ali-shenzhen
namespace: monitoring
data:
aliyun-exporter.yml: |
credential:
access_key_id: 阿里云AccessKey ID
access_key_secret: 阿里云AccessKey secret
region_id: cn-shenzhen # 阿里云监控区域
metrics:
acs_ecs_dashboard:
- name: CPUUtilization
period: 60
- name: InternetInRate
period: 60
- name: IntranetInRate
period: 60
- name: InternetOutRate
period: 60
- name: IntranetOutRate
period: 60
- name: InternetOutRate_Percent
period: 60
- name: DiskReadBPS
period: 60
- name: DiskWriteBPS
period: 60
- name: DiskReadIOPS
period: 60
- name: DiskWriteIOPS
period: 60
- name: VPC_PublicIP_InternetInRate
period: 60
- name: VPC_PublicIP_InternetOutRate
period: 60
- name: VPC_PublicIP_InternetOutRate_Percent
period: 60
- name: cpu_idle
period: 15
- name: cpu_system
period: 15
- name: cpu_user
period: 15
- name: cpu_wait
period: 15
- name: cpu_other
period: 15
- name: cpu_total
period: 15
- name: memory_totalspace
period: 15
- name: memory_usedspace
period: 15
- name: memory_actualusedspace
period: 15
- name: memory_freespace
period: 15
- name: memory_freeutilization
period: 15
- name: memory_usedutilization
period: 15
- name: load_1m
period: 15
- name: load_5m
period: 15
- name: load_15m
period: 15
- name: diskusage_used
period: 15
- name: diskusage_utilization
period: 15
- name: diskusage_free
period: 15
- name: diskusage_total
period: 15
- name: disk_readbytes
period: 15
- name: disk_writebytes
period: 15
- name: disk_readiops
period: 15
- name: disk_writeiops
period: 15
- name: fs_inodeutilization
period: 15
- name: networkin_rate
period: 15
- name: networkout_rate
period: 15
- name: networkin_packages
period: 15
- name: networkout_packages
period: 15
- name: networkin_errorpackages
period: 15
- name: networkout_errorpackages
period: 15
- name: net_tcpconnection
period: 15
info_metrics:
- ecs
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: ali-shenzhen
namespace: monitoring
spec:
replicas: 1
selector:
matchLabels:
k8s-app: aliyun-exporter
template:
metadata:
labels:
k8s-app: aliyun-exporter
spec:
volumes:
- name: config
configMap:
name: ali-shenzhen
containers:
- name: ali-shenzhen
image: xxxx.com/aliyun-exporter
imagePullPolicy: IfNotPresent
ports:
- containerPort: 9525
name: http-metrics
protocol: TCP
readinessProbe:
tcpSocket:
port: http-metrics
initialDelaySeconds: 10
periodSeconds: 10
livenessProbe:
tcpSocket:
port: http-metrics
initialDelaySeconds: 15
periodSeconds: 20
initialDelaySeconds: 3
periodSeconds: 5
successThreshold: 1
timeoutSeconds: 3
resources:
requests:
cpu: 200m
memory: 30Mi
limits:
memory: 50Mi
cpu: 250m
volumeMounts:
- mountPath: /etc/aliyun-exporter
name: "config"
---
kind: Service
apiVersion: v1
metadata:
labels:
k8s-app: aliyun-exporter
name: ali-shenzhen
namespace: monitoring
spec:
selector:
k8s-app: aliyun-exporter
ports:
- protocol: TCP
port: 9525
name: http-metrics
---
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
labels:
k8s-app: aliyun-exporter
name: ali-shenzhen
namespace: monitoring
spec:
endpoints:
- honorLabels: true
interval: 30s
port: http-metrics
jobLabel: k8s-app
selector:
matchLabels:
k8s-app: aliyun-exporter
name: ali-shenzhen 多区域用到,不同的区域需要不通的服务器名称
k8s-app: aliyun-exporter 是grafana 展示用到关联数据
为了 方便多区域监控所以创建ConfigMap 单个区域监控同时在封装容器是写入了AccessKey 及区域就可以不用写ConfigMap
创建aliyun-exporter
kubectl apply -f shenzhen-aliyun-exporter.yaml
验证服务是否正常
打开prometheus
![使用aliyun-exporter 获取阿里监控到私有prometheus数据库并展示](https://i-blog.csdnimg.cn/blog_migrate/970cd27003f4565d42197f5f8bbcea57.png)
http://10.65.1.43:9525/metrics
![使用aliyun-exporter 获取阿里监控到私有prometheus数据库并展示](https://i-blog.csdnimg.cn/blog_migrate/a46cd8706bdb718cf09b19fa0d8f0f61.png)
可以正常打开
打开grafana 可以创建指标 这里提供两张Dashboard
vi ECS-Overview.json
{
"__inputs": [
{
"name": "DS_PROMETHEUS",
"label": "Prometheus",
"description": "",
"type": "datasource",
"pluginId": "prometheus",
"pluginName": "Prometheus"
}
],
"__requires": [
{
"type": "grafana",
"id": "grafana",
"name": "Grafana",
"version": "6.3.2"
},
{
"type": "datasource",
"id": "prometheus",
"name": "Prometheus",
"version": "1.0.0"
},
{
"type": "panel",
"id": "singlestat",
"name": "Singlestat",
"version": ""
},
{
"type": "panel",
"id": "table",
"name": "Table",
"version": ""
}
],
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": "-- Grafana --",
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"type": "dashboard"
}
]
},
"description": "",
"editable": true,
"gnetId": null,
"graphTooltip": 0,
"id": null,
"iteration": 1565320060700,
"links": [],
"panels": [
{
"cacheTimeout": null,
"colorBackground": false,
"colorValue": false,
"colors": [
"#299c46",
"rgba(237, 129, 40, 0.89)",
"#d44a3a"
],
"datasource": "${DS_PROMETHEUS}",
"format": "none",
"gauge": {
"maxValue": 100,
"minValue": 0,
"show": true,
"thresholdLabels": false,
"thresholdMarkers": true
},
"id": 12,
"interval": null,
"links": [],
"mappingType": 1,
"mappingTypes": [
{
"name": "value to text",
"value": 1
},
{
"name": "range to text",
"value": 2
}
],
"maxDataPoints": 100,
"nullPointMode": "connected",
"nullText": null,
"options": {},
"postfix": "",
"postfixFontSize": "50%",
"prefix": "",
"prefixFontSize": "50%",
"rangeMaps": [
{
"from": "null",
"text": "N/A",
"to": "null"
}
],
"sparkline": {
"fillColor": "rgba(31, 118, 189, 0.18)",
"full": false,
"lineColor": "rgb(31, 120, 193)",
"show": true
},
"tableColumn": "",
"targets": [
{
"annotations": false,
"expr": "quantile(0.95, aliyun_acs_ecs_dashboard_CPUUtilization)",
"format": "time_series",
"intervalFactor": 1,
"labelSelector": "*",
"legendFormat": "",
"refId": "A",
"target": "Query",
"type": "timeserie"
}
],
"thresholds": "60,80",
"title": "P95 CPU Utilization",
"type": "singlestat",
"valueFontSize": "80%",
"valueMaps": [
{
"op": "=",
"text": "N/A",
"value": "null"
}
],
"valueName": "avg"
},
{
"cacheTimeout": null,
"colorBackground": false,
"colorValue": false,
"colors": [
"#299c46",
"rgba(237, 129, 40, 0.89)",
"#d44a3a"
],
"datasource": "${DS_PROMETHEUS}",
"format": "none",
"gauge": {
"maxValue": 100,
"minValue": 0,
"show": true,
"thresholdLabels": false,
"thresholdMarkers": true
},
"gridPos": {
"h": 3,
"w": 6,
"x": 6,
"y": 0
},
"id": 14,
"interval": null,
"links": [],
"mappingType": 1,
"mappingTypes": [
{
"name": "value to text",
"value": 1
},
{
"name": "range to text",
"value": 2
}
],
"maxDataPoints": 100,
"nullPointMode": "connected",
"nullText": null,
"options": {},
"postfix": "",
"postfixFontSize": "50%",
"prefix": "",
"prefixFontSize": "50%",
"rangeMaps": [
{
"from": "null",
"text": "N/A",
"to": "null"
}
],
"sparkline": {
"fillColor": "rgba(31, 118, 189, 0.18)",
"full": false,
"lineColor": "rgb(31, 120, 193)",
"show": true
},
"tableColumn": "",
"targets": [
{
"annotations": false,
"expr": "quantile(0.95, aliyun_acs_ecs_dashboard_load_5m)",
"format": "time_series",
"intervalFactor": 1,
"labelSelector": "*",
"legendFormat": "",
"refId": "A",
"target": "Query",
"type": "timeserie"
}
],
"thresholds": "10,20",
"title": "P95 Load",
"type": "singlestat",
"valueFontSize": "80%",
"valueMaps": [
{
"op": "=",
"text": "N/A",
"value": "null"
}
],
"valueName": "avg"
},
{
"cacheTimeout": null,
"colorBackground": false,
"colorValue": false,
"colors": [
"#299c46",
"rgba(237, 129, 40, 0.89)",
"#d44a3a"
],
"datasource": "${DS_PROMETHEUS}",
"format": "none",
"gauge": {
"maxValue": 100,
"minValue": 0,
"show": true,
"thresholdLabels": false,
"thresholdMarkers": true
},
"gridPos": {
"h": 3,
"w": 6,
"x": 12,
"y": 0
},
"id": 13,
"interval": null,
"links": [],
"mappingType": 1,
"mappingTypes": [
{
"name": "value to text",
"value": 1
},
{
"name": "range to text",
"value": 2
}
],
"maxDataPoints": 100,
"nullPointMode": "connected",
"nullText": null,
"options": {},
"postfix": "",
"postfixFontSize": "50%",
"prefix": "",
"prefixFontSize": "50%",
"rangeMaps": [
{
"from": "null",
"text": "N/A",
"to": "null"
}
],
"sparkline": {
"fillColor": "rgba(31, 118, 189, 0.18)",
"full": false,
"lineColor": "rgb(31, 120, 193)",
"show": true
},
"tableColumn": "",
"targets": [
{
"annotations": false,
"expr": "quantile(0.95, aliyun_acs_ecs_dashboard_memory_usedutilization)",
"format": "time_series",
"intervalFactor": 1,
"labelSelector": "*",
"legendFormat": "",
"refId": "A",
"target": "Query",
"type": "timeserie"
}
],
"thresholds": "60,80",
"title": "P95 Memory",
"type": "singlestat",
"valueFontSize": "80%",
"valueMaps": [
{
"op": "=",
"text": "N/A",
"value": "null"
}
],
"valueName": "avg"
},
{
"columns": [],
"fontSize": "100%",
"gridPos": {
"h": 7,
"w": 12,
"x": 0,
"y": 3
},
"id": 2,
"links": [],
"options": {},
"pageSize": null,
"scroll": true,
"showHeader": true,
"sort": {
"col": 0,
"desc": true
},
"styles": [
{
"alias": "Time",
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"pattern": "Time",
"type": "hidden"
},
{
"alias": "",
"colorMode": null,
"colors": [
"rgba(245, 54, 54, 0.9)",
"rgba(237, 129, 40, 0.89)",
"rgba(50, 172, 45, 0.97)"
],
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": true,
"linkTargetBlank": false,
"linkTooltip": "detail view",
"linkUrl": "/d/yOtBAPZWk/ecs-detail?orgId=1&var-instanceId=${__cell}",
"mappingType": 1,
"pattern": "instanceId",
"thresholds": [],
"type": "number",
"unit": "short"
},
{
"alias": "",
"colorMode": "row",
"colors": [
"rgba(50, 172, 45, 0.97)",
"rgba(237, 129, 40, 0.89)",
"rgba(245, 54, 54, 0.9)"
],
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"mappingType": 1,
"pattern": "Value",
"thresholds": [
"80",
"95"
],
"type": "number",
"unit": "short"
},
{
"alias": "",
"colorMode": null,
"colors": [
"rgba(245, 54, 54, 0.9)",
"rgba(237, 129, 40, 0.89)",
"rgba(50, 172, 45, 0.97)"
],
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"mappingType": 1,
"pattern": "HostName",
"thresholds": [],
"type": "number",
"unit": "short"
},
{
"alias": "",
"colorMode": null,
"colors": [
"rgba(245, 54, 54, 0.9)",
"rgba(237, 129, 40, 0.89)",
"rgba(50, 172, 45, 0.97)"
],
"decimals": 2,
"pattern": "/.*/",
"thresholds": [],
"type": "hidden",
"unit": "short"
}
],
"targets": [
{
"expr": "(avg_over_time(aliyun_acs_ecs_dashboard_CPUUtilization[$interval]) > 80) * on (instanceId) group_left(VpcAttributes,HostName,InnerIpAddress) \nlabel_replace(aliyun_meta_ecs_info,\"instanceId\",\"$1\",\"InstanceId\",\"(.*)\")",
"format": "table",
"instant": true,
"intervalFactor": 1,
"refId": "A"
}
],
"title": "CPU Pressure",
"transform": "table",
"type": "table"
},
{
"columns": [],
"fontSize": "100%",
"gridPos": {
"h": 7,
"w": 12,
"x": 12,
"y": 3
},
"id": 3,
"links": [],
"options": {},
"pageSize": null,
"scroll": true,
"showHeader": true,
"sort": {
"col": 8,
"desc": true
},
"styles": [
{
"alias": "Time",
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"pattern": "Time",
"type": "hidden"
},
{
"alias": "",
"colorMode": null,
"colors": [
"rgba(245, 54, 54, 0.9)",
"rgba(237, 129, 40, 0.89)",
"rgba(50, 172, 45, 0.97)"
],
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": true,
"linkTargetBlank": false,
"linkTooltip": "detail view",
"linkUrl": "/d/yOtBAPZWk/ecs-detail?orgId=1&var-instanceId=${__cell}",
"mappingType": 1,
"pattern": "instanceId",
"thresholds": [],