1.查询集群fsid列表
from(bucket: "cts")
|> range(start: v.timeRangeStart, stop: v.timeRangeStop)
|> filter(fn: (r) => r["_measurement"] == "ceph_cluster_stats")
|> filter(fn: (r) => r["_field"] == "value")
|> group(columns: ["fsid"])
|> distinct(column: "fsid")
2. 查询集群名称列表
需要在telegraf配置文件的tag中增加cluster_name
import "influxdata/influxdb/v1"
v1.tagValues(
bucket: v.bucket,
tag: "cluster_name",
predicate: (r) => true,
start: -1d
)
3.查看ceph集群使用率 百分比
from(bucket: "cts")
|> range(start: v.timeRangeStart, stop: v.timeRangeStop)
|> filter(fn: (r) => r["_measurement"] == "ceph_cluster_stats")
|> filter(fn: (r) => r["_field"] == "value")
|> filter(fn: (r) => r["cluster_name"] == "横扫饥饿做回自己")
|> filter(fn: (r) => r["type_instance"] == "bytes_used" or r["type_instance"] == "bytes_total")
|> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: true)
|> pivot(columnKey: ["type_instance"], rowKey: ["_time"], valueColumn: "_value")
|>filter(fn: (r) => r.bytes_total != 0)
|> map(fn: (r) => ({ _value: r.bytes_used / float(v: r.bytes_total) }))
|> yield(name: "mean")
4.Ceph健康状态
HEALTH_ERR = 0,
HEALTH_WARN = 1,
HEALTH_OK = 2
from(bucket: "cts")
|> range(start: v.timeRangeStart, stop: v.timeRangeStop)
|> filter(fn: (r) => r["_measurement"] == "ceph_cluster_stats")
|> filter(fn: (r) => r["_field"] == "value")
|> filter(fn: (r) => r["cluster_name"] == "横扫饥饿做回自己")
|> filter(fn: (r) => r["type_instance"] == "health")
|> yield(name: "mean")
5.Ceph总磁盘数
from(bucket: "cts")
|> range(start: v.timeRangeStart, stop: v.timeRangeStop)
|> filter(fn: (r) => r["_measurement"] == "ceph_cluster_stats")
|> filter(fn: (r) => r["_field"] == "value")
|> filter(fn: (r) => r["cluster_name"] == "${cluster_name}")
|> filter(fn: (r) => r["type_instance"] == "num_osd")
|> yield(name: "mean")
5.Ceph活跃盘数
from(bucket: "cts")
|> range(start: v.timeRangeStart, stop: v.timeRangeStop)
|> filter(fn: (r) => r["_measurement"] == "ceph_cluster_stats")
|> filter(fn: (r) => r["_field"] == "value")
|> filter(fn: (r) => r["cluster_name"] == "${cluster_name}")
|> filter(fn: (r) => r["type_instance"] == "num_osd_up")
|> yield(name: "mean")
5.Ceph在线盘数
from(bucket: "cts")
|> range(start: v.timeRangeStart, stop: v.timeRangeStop)
|> filter(fn: (r) => r["_measurement"] == "ceph_cluster_stats")
|> filter(fn: (r) => r["_field"] == "value")
|> filter(fn: (r) => r["cluster_name"] == "${cluster_name}")
|> filter(fn: (r) => r["type_instance"] == "num_osd_in")
|> yield(name: "mean")
6.异常磁盘数
from(bucket: "cts")
|> range(start: v.timeRangeStart, stop: v.timeRangeStop)
|> filter(fn: (r) => r["_measurement"] == "ceph_cluster_stats")
|> filter(fn: (r) => r["_field"] == "value")
|> filter(fn: (r) => r["cluster_name"] == "${cluster_name}")
|> filter(fn: (r) => r["type_instance"] == "num_osd" or r["type_instance"] == "num_osd_up")
|> pivot(columnKey: ["type_instance"], rowKey: ["_time"], valueColumn: "_value")
|> map(fn: (r) => ({ _value: r.num_osd - r.num_osd_up }))
|> yield(name: "mean")
7.磁盘读写延时
from(bucket: "cts")
|> range(start: v.timeRangeStart, stop: v.timeRangeStop)
|> filter(fn: (r) => r["type_instance"] == "osd.op_latency")
|> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)
|> derivative(unit: v.windowPeriod)
|> yield(name: "mean")
8.集群iops
from(bucket: "telegraf_ceph")
|> range(start: v.timeRangeStart, stop: v.timeRangeStop)
|> filter(fn: (r) => r["_measurement"] == "ceph_pgmap")
|> filter(fn: (r) => r["_field"] == "read_op_per_sec" or r["_field"] == "write_op_per_sec")
|> filter(fn: (r) => r["cluster_name"] == "${cluster_name}")
|> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)
|> yield(name: "mean")
9.集群读写延时
from(bucket: "cts")
|> range(start: v.timeRangeStart, stop: v.timeRangeStop)
|> filter(fn: (r) => r["type_instance"] == "osd.op_latency")
|> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)
|> derivative(unit: v.windowPeriod)
|> yield(name: "mean")
10.重构延时
from(bucket: "telegraf_ceph")
|> range(start: v.timeRangeStart, stop: v.timeRangeStop)
|> filter(fn: (r) => r["_measurement"] == "ceph_pool_stats")
|> filter(fn: (r) => r["_field"] == "recovering_bytes_per_sec" )
|> filter(fn: (r) => r["cluster_name"] == "${cluster_name}")
|> group(columns: ["cluster_name","_field","_start","_stop"], mode: "by")
|> sum(column: "_value")
11.重构速率
from(bucket: "telegraf_ceph")
|> range(start: v.timeRangeStart, stop: v.timeRangeStop)
|> filter(fn: (r) => r["_measurement"] == "ceph_pool_stats")
|> filter(fn: (r) => r["_field"] == "recovering_keys_per_sec" )
|> filter(fn: (r) => r["cluster_name"] == "${cluster_name}")
|> group(columns: ["cluster_name","_field","_start","_stop"], mode: "by")
|> sum(column: "_value")
12. PG分布
from(bucket: "cts")
|> range(start: v.timeRangeStart, stop: v.timeRangeStop)
|> filter(fn: (r) => r["_measurement"] == "ceph_daemon_stats")
|> filter(fn: (r) => r["type_instance"] == "osd.numpg")
|> filter(fn: (r) => r["cluster_name"] == "${cluster_name}")
|> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)
|> yield(name: "mean")