文章目录
前言
Metrics是一个给JAVA服务的各项指标提供度量工具的包,在JAVA代码中嵌入Metrics代码,可以方便的对业务代码的各个指标进行监控。
官方文档:
- github:https://github.com/dropwizard/metrics
- 官方文档:https://metrics.dropwizard.io/4.2.0/getting-started.html
环境搭建
<dependency>
<groupId>io.dropwizard.metrics</groupId>
<artifactId>metrics-core</artifactId>
<version>3.2.6</version>
</dependency>
五大metric
Meter
主要用来测量服务接口的调用频率。
public class MeterExample {
private static final MetricRegistry registry = new MetricRegistry();
private final static Meter requestMeter = registry.meter("tqs");
private final static Meter sizeMeter = registry.meter("volume");
public static void main(String[] args) {
ConsoleReporter reporter = ConsoleReporter.forRegistry(registry)
.convertRatesTo(TimeUnit.MINUTES)
.convertDurationsTo(TimeUnit.MINUTES).build();
reporter.start(10, TimeUnit.SECONDS);
for (; ; ) {
upload(new byte[current().nextInt(1000)]);
randomSleep();
}
}
private static void randomSleep() {
try {
TimeUnit.SECONDS.sleep(current().nextInt(10));
} catch (InterruptedException e) {
e.printStackTrace();
}
}
private static void upload(byte[] bytes) {
requestMeter.mark();
sizeMeter.mark(bytes.length);
}
}
-- Meters ----------------------------------------------------------------------
tqs
count = 2
mean rate = 11.80 events/minute
1-minute rate = 12.00 events/minute
5-minute rate = 12.00 events/minute
15-minute rate = 12.00 events/minute
volume
count = 900
mean rate = 5310.19 events/minute
1-minute rate = 4714.49 events/minute
5-minute rate = 4610.97 events/minute
15-minute rate = 4593.04 events/minute
21-12-2 16:14:34 ===============================================================
-- Meters ----------------------------------------------------------------------
tqs
count = 3
mean rate = 8.93 events/minute
1-minute rate = 11.12 events/minute
5-minute rate = 11.80 events/minute
15-minute rate = 11.93 events/minute
volume
count = 1824
mean rate = 5428.88 events/minute
1-minute rate = 4877.28 events/minute
5-minute rate = 4643.08 events/minute
15-minute rate = 4603.72 events/minute
可以看到这些数据一分钟平均速率8.93,每分钟上传的字节数是5428.88字节。
Guage
返回一个时刻的value值。
SimpleGauge
public class SimpleGaugeExample {
private static final MetricRegistry registry = new MetricRegistry();
private static final ConsoleReporter reporter = ConsoleReporter.forRegistry(registry)
.convertRatesTo(TimeUnit.SECONDS)
.convertDurationsTo(TimeUnit.SECONDS).build();
private static Queue<Long> queue = new LinkedBlockingDeque<>();
public static void main(String[] args) {
registry.register(MetricRegistry.name(SimpleGaugeExample.class), (Gauge<Integer>)queue::size);
reporter.start(1,TimeUnit.SECONDS);
new Thread(() -> {
for (; ; ) {
randomSleep();
queue.add(System.nanoTime());
}
}).start();
new Thread(() -> {
for (; ; ) {
randomSleep();
queue.poll();
}
}).start();
}
private static void randomSleep() {
try {
TimeUnit.SECONDS.sleep(current().nextInt(5));
} catch (InterruptedException e) {
e.printStackTrace();
}
}
}
运行上面代码,可以看到,每隔10秒,对queue的size进行输出。
-- Gauges ----------------------------------------------------------------------
fast.cloud.nacos.metrics.SimpleGaugeExample
value = 11
21-12-2 16:19:05 ===============================================================
-- Gauges ----------------------------------------------------------------------
fast.cloud.nacos.metrics.SimpleGaugeExample
value = 11
21-12-2 16:19:06 ===============================================================
-- Gauges ----------------------------------------------------------------------
fast.cloud.nacos.metrics.SimpleGaugeExample
value = 11
我们可以看到生产者、消费者的速率,以及队列的挤压情况。
JmxAttributeGauge
public class JmxAttributeGaugeExample {
private static final MetricRegistry registry = new MetricRegistry();
private static final ConsoleReporter reporter = ConsoleReporter.forRegistry(registry)
.convertRatesTo(TimeUnit.SECONDS)
.convertDurationsTo(TimeUnit.SECONDS).build();
public static void main(String[] args) throws MalformedObjectNameException, InterruptedException {
reporter.start(10,TimeUnit.SECONDS);
registry.register(MetricRegistry.name(JmxAttributeGaugeExample.class, "HeapMemory"), new JmxAttributeGauge(new ObjectName("java.lang:type=Memory"), "HeapMemoryUsage"));
registry.register(MetricRegistry.name(JmxAttributeGaugeExample.class, "NonHeapMemory"), new JmxAttributeGauge(new ObjectName("java.lang:type=Memory"), "NonHeapMemoryUsage"));
Thread.currentThread().join();
}
}
-- Gauges ----------------------------------------------------------------------
fast.cloud.nacos.metrics.metric.JmxAttributeGaugeExample.HeapMemory
value = javax.management.openmbean.CompositeDataSupport(compositeType=javax.management.openmbean.CompositeType(name=java.lang.management.MemoryUsage,items=((itemName=committed,itemType=javax.management.openmbean.SimpleType(name=java.lang.Long)),(itemName=init,itemType=javax.management.openmbean.SimpleType(name=java.lang.Long)),(itemName=max,itemType=javax.management.openmbean.SimpleType(name=java.lang.Long)),(itemName=used,itemType=javax.management.openmbean.SimpleType(name=java.lang.Long)))),contents={committed=257425408, init=268435456, max=3817865216, used=16163152})
fast.cloud.nacos.metrics.metric.JmxAttributeGaugeExample.NonHeapMemory
value = javax.management.openmbean.CompositeDataSupport(compositeType=javax.management.openmbean.CompositeType(name=java.lang.management.MemoryUsage,items=((itemName=committed,itemType=javax.management.openmbean.SimpleType(name=java.lang.Long)),(itemName=init,itemType=javax.management.openmbean.SimpleType(name=java.lang.Long)),(itemName=max,itemType=javax.management.openmbean.SimpleType(name=java.lang.Long)),(itemName=used,itemType=javax.management.openmbean.SimpleType(name=java.lang.Long)))),contents={committed=10944512, init=2555904, max=-1, used=10009504})
21-12-2 16:21:47 ===============================================================
-- Gauges ----------------------------------------------------------------------
fast.cloud.nacos.metrics.metric.JmxAttributeGaugeExample.HeapMemory
value = javax.management.openmbean.CompositeDataSupport(compositeType=javax.management.openmbean.CompositeType(name=java.lang.management.MemoryUsage,items=((itemName=committed,itemType=javax.management.openmbean.SimpleType(name=java.lang.Long)),(itemName=init,itemType=javax.management.openmbean.SimpleType(name=java.lang.Long)),(itemName=max,itemType=javax.management.openmbean.SimpleType(name=java.lang.Long)),(itemName=used,itemType=javax.management.openmbean.SimpleType(name=java.lang.Long)))),contents={committed=257425408, init=268435456, max=3817865216, used=16163152})
fast.cloud.nacos.metrics.metric.JmxAttributeGaugeExample.NonHeapMemory
value = javax.management.openmbean.CompositeDataSupport(compositeType=javax.management.openmbean.CompositeType(name=java.lang.management.MemoryUsage,items=((itemName=committed,itemType=javax.management.openmbean.SimpleType(name=java.lang.Long)),(itemName=init,itemType=javax.management.openmbean.SimpleType(name=java.lang.Long)),(itemName=max,itemType=javax.management.openmbean.SimpleType(name=java.lang.Long)),(itemName=used,itemType=javax.management.openmbean.SimpleType(name=java.lang.Long)))),contents={committed=10944512, init=2555904, max=-1, used=10130232})
可以看到,每个十秒输出JVM堆内存及非堆内存的信息。
RadioGauge
用于创建两个数字之间的某种比率,比如业务受理的成功率或者失败率等。
public class RadioGauge {
private static final MetricRegistry registry = new MetricRegistry();
private static final ConsoleReporter reporter = ConsoleReporter.forRegistry(registry)
.convertRatesTo(TimeUnit.SECONDS)
.convertDurationsTo(TimeUnit.SECONDS).build();
private final static Meter totalMeter = new Meter();
private final static Meter successMeter = new Meter();
public static void main(String[] args) {
reporter.start(10,TimeUnit.SECONDS);
registry.gauge("success-rate", () -> new RatioGauge() {
@Override
protected Ratio getRatio() {
return Ratio.of(successMeter.getCount(), totalMeter.getCount());
}
});
for (; ; ) {
shortSleep();
business();
}
}
private static void business() {
totalMeter.mark();
try {
int x = 10 / current().nextInt(6);
successMeter.mark();
} catch (Exception e) {
System.out.println("ERROR");
}
}
private static void shortSleep() {
try {
TimeUnit.SECONDS.sleep(current().nextInt(6));
} catch (InterruptedException e) {
e.printStackTrace();
}
}
}
21-12-2 16:32:18 ===============================================================
-- Gauges ----------------------------------------------------------------------
success-rate
value = 1.0
ERROR
21-12-2 16:32:28 ===============================================================
-- Gauges ----------------------------------------------------------------------
success-rate
value = 0.75
21-12-2 16:32:38 ===============================================================
-- Gauges ----------------------------------------------------------------------
success-rate
value = 0.875
每隔十秒,业务的成功率度量信息将会输出到控制台上。
CacheGauge
我们对Gauge的实时性要求没有那么高,就可以使用cached gauge。
public class CacheGauge {
private static final MetricRegistry registry = new MetricRegistry();
private static final ConsoleReporter reporter = ConsoleReporter.forRegistry(registry)
.convertRatesTo(TimeUnit.SECONDS)
.convertDurationsTo(TimeUnit.SECONDS).build();
public static void main(String[] args) throws InterruptedException {
reporter.start(10,TimeUnit.SECONDS);
registry.gauge("cache-db-size", () -> new CachedGauge(30,TimeUnit.SECONDS) {
@Override
protected Object loadValue() {
return queryFromDbSize();
}
});
Thread.currentThread().join();
}
private static Object queryFromDbSize() {
System.out.println("=====queryFromDbSize====");
return System.currentTimeMillis();
}
}
21-12-2 16:34:08 ===============================================================
-- Gauges ----------------------------------------------------------------------
cache-db-size
=====queryFromDbSize====
value = 1638434048893
21-12-2 16:34:18 ===============================================================
-- Gauges ----------------------------------------------------------------------
cache-db-size
value = 1638434048893
21-12-2 16:34:28 ===============================================================
-- Gauges ----------------------------------------------------------------------
cache-db-size
value = 1638434048893
21-12-2 16:34:38 ===============================================================
-- Gauges ----------------------------------------------------------------------
cache-db-size
value = 1638434048893
可以看到没有输出=queryFromDbSize的时候,这时候就是取得缓存中的数据。
Counter
我们每次对query取size是有性能影响的,Counter metric 使用64的递增或者递减,避免了性能的侵入。
public class CounterExample {
private static final MetricRegistry registry = new MetricRegistry();
private static final ConsoleReporter reporter = ConsoleReporter.forRegistry(registry)
.convertRatesTo(TimeUnit.SECONDS)
.convertDurationsTo(TimeUnit.SECONDS).build();
private static final BlockingQueue<Long> queue = new LinkedBlockingQueue<>();
public static void main(String[] args) throws InterruptedException {
reporter.start(10, TimeUnit.SECONDS);
Counter counter = registry.counter("queue-count", Counter::new);
new Thread(() -> {
for (; ; ) {
randomSleep();
queue.add(System.nanoTime());
counter.inc();
}
});
new Thread(() -> {
for (; ; ) {
randomSleep();
if (queue.poll() != null) {
counter.dec();
}
}
});
Thread.currentThread().join();
}
private static void randomSleep() {
try {
TimeUnit.SECONDS.sleep(current().nextInt(500));
} catch (InterruptedException e) {
e.printStackTrace();
}
}
}
-- Counters --------------------------------------------------------------------
queue-count
count = 0
21-12-2 16:40:38 ===============================================================
-- Counters --------------------------------------------------------------------
queue-count
count = 0
21-12-2 16:40:48 ===============================================================
-- Counters --------------------------------------------------------------------
queue-count
count = 0
Histogram
是一种统计报告图
public class HistogramExample {
private static final MetricRegistry registry = new MetricRegistry();
private static final ConsoleReporter reporter = ConsoleReporter.forRegistry(registry)
.convertRatesTo(TimeUnit.SECONDS)
.convertDurationsTo(TimeUnit.SECONDS).build();
private static final Histogram histogram = registry.histogram("search-result");
public static void main(String[] args) {
reporter.start(10, TimeUnit.SECONDS);
while (true) {
doSearch();
randomSleep();
}
}
private static void doSearch() {
histogram.update(current().nextInt(10));
}
private static void randomSleep() {
try {
TimeUnit.SECONDS.sleep(current().nextInt(10));
} catch (InterruptedException e) {
e.printStackTrace();
}
}
}
-- Histograms ------------------------------------------------------------------
search-result
count = 3
min = 0
max = 5
mean = 1.95
stddev = 2.16
median = 1.00
75% <= 5.00
95% <= 5.00
98% <= 5.00
99% <= 5.00
99.9% <= 5.00
21-12-2 16:42:59 ===============================================================
-- Histograms ------------------------------------------------------------------
search-result
count = 4
min = 0
max = 9
mean = 3.89
stddev = 3.65
median = 5.00
75% <= 9.00
95% <= 9.00
98% <= 9.00
99% <= 9.00
99.9% <= 9.00
可以看到75%的搜索结构都小于9。
count: 参与统计的数据有多少条
min: 统计数据的最小值
max: 统计数据的最大值
mean: 统计数据的中间值
stddev: 标准方差
median: 统计数据的中间值
75%:
Timers
Timer其实是 Histogram 和 Meter 的结合, histogram 某部分代码/调用的耗时, meter统计TPS。
public class TimerTest {
public static Random random = new Random();
public static void main(String[] args) throws InterruptedException {
MetricRegistry registry = new MetricRegistry();
ConsoleReporter reporter = ConsoleReporter.forRegistry(registry).build();
reporter.start(1, TimeUnit.SECONDS);
Timer timer = registry.timer(MetricRegistry.name(TimerTest.class,"get-latency"));
Timer.Context ctx;
while(true){
ctx = timer.time();
Thread.sleep(random.nextInt(1000));
ctx.stop();
}
}
}
21-12-2 16:49:06 ===============================================================
-- Timers ----------------------------------------------------------------------
fast.cloud.nacos.metrics.metric.TimerTest.get-latency
count = 7
mean rate = 1.75 calls/second
1-minute rate = 0.00 calls/second
5-minute rate = 0.00 calls/second
15-minute rate = 0.00 calls/second
min = 125.79 milliseconds
max = 746.72 milliseconds
mean = 474.94 milliseconds
stddev = 217.70 milliseconds
median = 385.33 milliseconds
75% <= 704.84 milliseconds
95% <= 746.72 milliseconds
98% <= 746.72 milliseconds
99% <= 746.72 milliseconds
99.9% <= 746.72 milliseconds
21-12-2 16:49:07 ===============================================================
-- Timers ----------------------------------------------------------------------
fast.cloud.nacos.metrics.metric.TimerTest.get-latency
count = 10
mean rate = 2.00 calls/second
1-minute rate = 0.00 calls/second
5-minute rate = 0.00 calls/second
15-minute rate = 0.00 calls/second
min = 125.79 milliseconds
max = 971.09 milliseconds
mean = 493.35 milliseconds
stddev = 251.51 milliseconds
median = 368.69 milliseconds
75% <= 704.84 milliseconds
95% <= 971.09 milliseconds
98% <= 971.09 milliseconds
99% <= 971.09 milliseconds
99.9% <= 971.09 milliseconds
四种reporter
ConsoleReporter
public class ConsoleReporterExample {
private static final MetricRegistry registry = new MetricRegistry();
private static final ConsoleReporter reporter = ConsoleReporter.forRegistry(registry)
.convertRatesTo(TimeUnit.SECONDS)
.convertDurationsTo(TimeUnit.SECONDS).build();
private static final Histogram histogram = registry.histogram("search-result");
public static void main(String[] args) {
reporter.start(10,TimeUnit.SECONDS);
while (true) {
doSearch();
randomSleep();
}
}
private static void doSearch() {
histogram.update(current().nextInt(10));
}
private static void randomSleep() {
try {
TimeUnit.SECONDS.sleep(current().nextInt(500));
} catch (InterruptedException e) {
e.printStackTrace();
}
}
}
顾名思义,输出到控制台:
-- Histograms ------------------------------------------------------------------
search-result
count = 1
min = 9
max = 9
mean = 9.00
stddev = 0.00
median = 9.00
75% <= 9.00
95% <= 9.00
98% <= 9.00
99% <= 9.00
99.9% <= 9.00
JmxReporter
通过jconsole查看
public class JmxReporterExample {
private static final MetricRegistry registry = new MetricRegistry();
private static final JmxReporter reporter = JmxReporter.forRegistry(registry)
.convertRatesTo(TimeUnit.SECONDS)
.convertDurationsTo(TimeUnit.SECONDS).build();
private static final Histogram histogram = registry.histogram("search-result");
public static void main(String[] args) {
reporter.start();
while (true) {
doSearch();
randomSleep();
}
}
private static void doSearch() {
histogram.update(current().nextInt(10));
}
private static void randomSleep() {
try {
TimeUnit.SECONDS.sleep(current().nextInt(500));
} catch (InterruptedException e) {
e.printStackTrace();
}
}
}
CsvReporter
public class CsvReporterExample {
private static final MetricRegistry registry = new MetricRegistry();
private static final CsvReporter reporter = CsvReporter.forRegistry(registry)
.convertRatesTo(TimeUnit.SECONDS)
.convertDurationsTo(TimeUnit.SECONDS).build(new File("/Users/qinfuxiang/Downloads"));
private static final Histogram histogram = registry.histogram("search-result");
public static void main(String[] args) {
reporter.start(10,TimeUnit.SECONDS);
while (true) {
doSearch();
randomSleep();
}
}
private static void doSearch() {
histogram.update(current().nextInt(10));
}
private static void randomSleep() {
try {
TimeUnit.SECONDS.sleep(current().nextInt(500));
} catch (InterruptedException e) {
e.printStackTrace();
}
}
}
会在指定目录下导出数据到csv中。
Slf4jReporter
通过log输出到控制台
public class SLF4jReporterExample {
private static final MetricRegistry registry = new MetricRegistry();
private static final Slf4jReporter reporter = Slf4jReporter.forRegistry(registry)
.outputTo(LoggerFactory.getLogger(SLF4jReporterExample.class))
.convertRatesTo(TimeUnit.SECONDS)
.convertDurationsTo(TimeUnit.SECONDS).build();
private static final Histogram histogram = registry.histogram("search-result");
public static void main(String[] args) {
reporter.start(10,TimeUnit.SECONDS);
while (true) {
doSearch();
randomSleep();
}
}
private static void doSearch() {
histogram.update(current().nextInt(10));
}
private static void randomSleep() {
try {
TimeUnit.SECONDS.sleep(current().nextInt(500));
} catch (InterruptedException e) {
e.printStackTrace();
}
}
}
17:07:17.866 [metrics-logger-reporter-1-thread-1] INFO fast.cloud.nacos.metrics.reporter.SLF4jReporterExample - type=HISTOGRAM, name=search-result, count=1, min=3, max=3, mean=3.0, stddev=0.0, median=3.0, p75=3.0, p95=3.0, p98=3.0, p99=3.0, p999=3.0
17:07:27.852 [metrics-logger-reporter-1-thread-1] INFO fast.cloud.nacos.metrics.reporter.SLF4jReporterExample - type=HISTOGRAM, name=search-result, count=1, min=3, max=3, mean=3.0, stddev=0.0, median=3.0, p75=3.0, p95=3.0, p98=3.0, p99=3.0, p999=3.0
17:07:37.853 [metrics-logger-reporter-1-thread-1] INFO fast.cloud.nacos.metrics.reporter.SLF4jReporterExample - type=HISTOGRAM, name=search-result, count=1, min=3, max=3, mean=3.0, stddev=0.0, median=3.0, p75=3.0, p95=3.0, p98=3.0, p99=3.0, p999=3.0
17:07:47.851 [metrics-logger-reporter-1-thread-1] INFO fast.cloud.nacos.metrics.reporter.SLF4jReporterExample - type=HISTOGRAM, name=search-result, count=1, min=3, max=3, mean=3.0, stddev=0.0, median=3.0, p75=3.0, p95=3.0, p98=3.0, p99=3.0, p999=3.0