MySQL、PostgreSQL、MongoDB 运行监控小工具 sqshq/sampler
安装
## 源码下载地址
## 1、 https://github.com/sqshq/sampler
## 2、 https://gitee.com/mirrors/Sampler
sudo wget https://github.com/sqshq/sampler/releases/download/v1.1.0/sampler-1.1.0-linux-amd64 -O /usr/local/bin/sampler
sudo chmod +x /usr/local/bin/sampler
配置文件config.yml
MySQL
# prerequisite: installed mysql shell
variables:
mysql_connection: mysql -u root -s --database mysql --skip-column-names
sparklines:
- title: MySQL (random number example)
pty: true
init: $mysql_connection
sample: select rand();
PostgreSQL
# prerequisite: installed psql shell
variables:
PGPASSWORD: pwd
postgres_connection: psql -h localhost -U postgres --no-align --tuples-only
sparklines:
- title: PostgreSQL (random number example)
init: $postgres_connection
sample: select random();
MongoDB
# prerequisite: installed mongo shell
variables:
mongo_connection: mongo --quiet --host=localhost test
sparklines:
- title: MongoDB (random number example)
init: $mongo_connection
sample: Math.random();
Neo4j
# prerequisite: installed cypher shell
variables:
neo4j_connection: cypher-shell -u neo4j -p pwd --format plain
sparklines:
- title: Neo4j (random number example)
pty: true
init: $neo4j_connection
sample: RETURN rand();
transform: echo "$sample" | tail -n 1
Kafka lag per consumer group
variables:
kafka_connection: $KAFKA_HOME/bin/kafka-consumer-groups --bootstrap-server localhost:9092
runcharts:
- title: Kafka lag per consumer group
rate-ms: 5000
scale: 0
items:
- label: A->B
sample: $kafka_connection --group group_a --describe | awk 'NR>1 {sum += $5} END {print sum}'
- label: B->C
sample: $kafka_connection --group group_b --describe | awk 'NR>1 {sum += $5} END {print sum}'
- label: C->D
sample: $kafka_connection --group group_c --describe | awk 'NR>1 {sum += $5} END {print sum}'
config.yml
variables:
PGPASSWORD: xxxxxx
postgres_connection: psql -h localhost -U postgres -p 5432 -d test --no-align
--tuples-only
runcharts:
- title: 数据库数据写(Byte)
position: [[52, 6], [28, 11]]
rate-ms: 1000
legend:
enabled: true
details: false
scale: 0
items:
- label: 后台写
color: 178
init: $postgres_connection
sample: select bs.block_size * (buffers_checkpoint + buffers_clean + buffers_backend)
as total_writen from pg_stat_bgwriter,(SELECT CAST(current_setting('block_size') AS INTEGER) AS block_size) AS bs;
- label: 检查点平均写
color: 162
init: $postgres_connection
sample: select buffers_checkpoint * bs.block_size / (checkpoints_timed + checkpoints_req)
as checkpoint_write_avg from pg_stat_bgwriter,(SELECT CAST(current_setting('block_size') AS INTEGER) AS block_size) AS bs;
- title: 数据库连接
position: [[24, 6], [28, 11]]
rate-ms: 1000
legend:
enabled: true
details: false
scale: 0
items:
- label: 数据库连接总数
init: $postgres_connection
sample: select count(*) from pg_stat_activity;
- label: 活跃数据库连接
init: $postgres_connection
sample: select count(*) from pg_stat_activity where state = 'active' and pid
<> pg_backend_pid();
- label: 空闲数据库连接
init: $postgres_connection
sample: select count(*) from pg_stat_activity where state = 'idle' and pid
<> pg_backend_pid();
- label: 空闲未提交数据库连接
init: $postgres_connection
sample: select count(*) from pg_stat_activity where state = 'idle in transaction'
and pid <> pg_backend_pid();
- title: 数据库增删改
position: [[0, 6], [24, 11]]
rate-ms: 1000
legend:
enabled: true
details: false
scale: 0
items:
- label: 插入行数
init: $postgres_connection
sample: select tup_inserted from pg_stat_database where datname = current_database();
- label: 删除行数
init: $postgres_connection
sample: select tup_deleted from pg_stat_database where datname = current_database();
- label: 更新行数
init: $postgres_connection
sample: select tup_updated from pg_stat_database where datname = current_database();
gauges:
- title: 数据库缓存命中率
position: [[39, 0], [21, 3]]
rate-ms: 1000
scale: 2
color: 32
percent-only: true
cur:
init: $postgres_connection
sample: select round(sum(blks_hit)*100/sum(blks_hit+blks_read),2)::numeric
from pg_stat_database where datname = current_database();
max:
sample: echo 100
min:
sample: echo 0
- title: 表缓存命中率
position: [[39, 3], [21, 3]]
rate-ms: 1000
scale: 2
color: 160
percent-only: true
cur:
init: $postgres_connection
sample: SELECT SUM(heap_blks_hit)*100 / nullif(SUM(heap_blks_hit) + SUM(heap_blks_read),
0) FROM pg_statio_user_tables;
max:
sample: echo 100
min:
sample: echo 0
- title: 索引缓存命中率
position: [[60, 3], [20, 3]]
rate-ms: 1000
scale: 2
color: 178
percent-only: true
cur:
init: $postgres_connection
sample: SELECT (SUM(idx_blks_hit))*100 / nullif(SUM(idx_blks_hit + idx_blks_read),
0) FROM pg_statio_user_indexes;
max:
sample: echo 100
min:
sample: echo 0
- title: 数据库事务提交率
position: [[60, 0], [20, 3]]
rate-ms: 1000
scale: 2
color: 122
percent-only: true
cur:
init: $postgres_connection
sample: select round(100*(xact_commit::numeric/(case when xact_commit > 0
then xact_commit else 1 end + xact_rollback)),2)::numeric as commit_ratio
from pg_stat_database where datname = current_database();
max:
sample: echo 100
min:
sample: echo 0
textboxes:
- title: 服务器状态
position: [[0, 0], [39, 6]]
rate-ms: 500
color: 200
sample: top -bn 1 | head -n 5
- title: 数据库会话 锁情况
position: [[0, 29], [80, 12]]
rate-ms: 1000
color: 160
sample: psql -h localhost -U postgres -p 5432 -d test -c "SELECT COUNT(pg_stat_activity.pid)
AS number_of_queries,substring(TRIM(leading FROM regexp_replace(pg_stat_activity.query,
'[\n\r]+'::text, ' '::text, 'g'::text)) FROM 0 FOR 80) AS query_name,MAX(age(current_timestamp,
query_start)) AS max_wait_time,wait_event,usename,locktype,MODE,granted FROM
pg_stat_activity LEFT JOIN pg_locks ON pg_stat_activity.pid = pg_locks.pid
WHERE query != '<IDLE>' AND query NOT ilike '%pg_%' AND query NOT ilike '%application_name%'
AND query NOT ilike '%inet%' AND age(current_timestamp, query_start) > '5
milliseconds'::INTERVAL GROUP BY query_name,wait_event,usename,locktype,MODE,granted
ORDER BY max_wait_time DESC;"
- title: 数据库实例概况
position: [[0, 17], [34, 12]]
rate-ms: 1000
color: 122
init: $postgres_connection
sample: select '数据库版本: ' opttype,substring(version(),'(.*) on ') union all
select '数据库当前用户: ',current_role union all select '当前访问数据库: ',current_database()
union all select '当前数据库大小: ',pg_size_pretty(pg_database_size(current_database()))
union all select '当前数据库表数量:',count(*)::varchar from pg_tables where schemaname
not in('pg_catalog','information_schema') union all select '实例端口: ',setting
from pg_settings where name = 'port' union all select '实例初始化时间: ',to_timestamp(((system_identifier>>32)
& (2^32 -1)::bigint))::varchar from pg_control_system() union all select '实例启动时间: ',pg_postmaster_start_time()::varchar
union all select '实例数据目录: ',setting from pg_settings where name = 'data_directory'
union all select '实例参数文件: ',setting from pg_settings where name = 'config_file'
union all select '实例访问配置文件:',setting from pg_settings where name = 'hba_file';
- title: 对象膨胀情况
position: [[34, 17], [46, 12]]
rate-ms: 1000
color: 80
sample: psql -h localhost -U postgres -p 5432 -d test -c "WITH constants AS (SELECT
current_setting('block_size') ::NUMERIC AS bs, 23 AS hdr, 4 AS ma), bloat_info
AS (SELECT ma, bs, schemaname, tablename, (datawidth + (hdr + ma - (CASE WHEN
hdr%ma = 0 THEN ma ELSE hdr%ma END))) ::NUMERIC AS datahdr, (maxfracsum *
(nullhdr + ma - (CASE WHEN nullhdr%ma = 0 THEN ma ELSE nullhdr%ma END))) AS
nullhdr2 FROM (SELECT schemaname, tablename, hdr, ma, bs, SUM((1 - null_frac)
* avg_width) AS datawidth, MAX(null_frac) AS maxfracsum, hdr + (SELECT 1 +
COUNT(*) / 8 FROM pg_stats s2 WHERE null_frac <> 0 AND s2.schemaname = s.schemaname
AND s2.tablename = s.tablename) AS nullhdr FROM pg_stats s, constants GROUP
BY 1,2,3,4,5) AS foo), table_bloat AS (SELECT schemaname, tablename, cc.relpages,
bs, ceil((cc.reltuples * ((datahdr + ma - (CASE WHEN datahdr%ma = 0 THEN ma
ELSE datahdr%ma END)) + nullhdr2 + 4)) / (bs - 20 ::FLOAT)) AS otta FROM bloat_info
JOIN pg_class cc ON cc.relname = bloat_info.tablename JOIN pg_namespace nn
ON cc.relnamespace = nn.oid AND nn.nspname = bloat_info.schemaname AND nn.nspname
<> 'information_schema'), index_bloat AS (SELECT schemaname, tablename, bs,
coalesce(c2.relname, '?') AS iname, coalesce(c2.reltuples, 0) AS ituples,
coalesce(c2.relpages, 0) AS ipages, coalesce(ceil((c2.reltuples * (datahdr
- 12)) / (bs - 20 ::FLOAT)), 0) AS iotta FROM bloat_info JOIN pg_class cc
ON cc.relname = bloat_info.tablename JOIN pg_namespace nn ON cc.relnamespace
= nn.oid AND nn.nspname = bloat_info.schemaname AND nn.nspname <> 'information_schema'
JOIN pg_index i ON indrelid = cc.oid JOIN pg_class c2 ON c2.oid = i.indexrelid)
SELECT TYPE 对象类型, schemaname||'.'||object_name 对象名, bloat 膨胀率, pg_size_pretty(raw_waste)
AS 膨胀量 FROM (SELECT 'table' AS TYPE, schemaname, tablename AS object_name,
round(CASE WHEN otta = 0 THEN 0.0 ELSE table_bloat.relpages / otta ::NUMERIC
END, 1) AS bloat, CASE WHEN relpages < otta THEN '0' ELSE (bs * (table_bloat.relpages
- otta) ::bigint) ::bigint END AS raw_waste FROM table_bloat UNION SELECT
'index' AS TYPE, schemaname, tablename || '::' || iname AS object_name, round(CASE
WHEN iotta = 0 OR ipages = 0 THEN 0.0 ELSE ipages / iotta ::NUMERIC END, 1)
AS bloat, CASE WHEN ipages < iotta THEN '0' ELSE (bs * (ipages - iotta)) ::bigint
END AS raw_waste FROM index_bloat) bloat_summary ORDER BY raw_waste DESC,
bloat DESC LIMIT 10;"
Runchart
runcharts:
- title: Search engine response time
rate-ms: 500 # sampling rate, default = 1000
scale: 2 # number of digits after sample decimal point, default = 1
legend:
enabled: true # enables item labels, default = true
details: false # enables item statistics: cur/min/max/dlt values, default = true
items:
- label: GOOGLE
sample: curl -o /dev/null -s -w '%{time_total}' https://www.google.com
color: 178 # 8-bit color number, default one is chosen from a pre-defined palette
- label: YAHOO
sample: curl -o /dev/null -s -w '%{time_total}' https://search.yahoo.com
- label: BING
sample: curl -o /dev/null -s -w '%{time_total}' https://www.bing.com
Sparkline
sparklines:
- title: CPU usage
rate-ms: 200
scale: 0
sample: ps -A -o %cpu | awk '{s+=$1} END {print s}'
- title: Free memory pages
rate-ms: 200
scale: 0
sample: memory_pressure | grep 'Pages free' | awk '{print $3}'
Barchart
barcharts:
- title: Local network activity
rate-ms: 500 # sampling rate, default = 1000
scale: 0 # number of digits after sample decimal point, default = 1
items:
- label: UDP bytes in
sample: nettop -J bytes_in -l 1 -m udp | awk '{sum += $4} END {print sum}'
- label: UDP bytes out
sample: nettop -J bytes_out -l 1 -m udp | awk '{sum += $4} END {print sum}'
- label: TCP bytes in
sample: nettop -J bytes_in -l 1 -m tcp | awk '{sum += $4} END {print sum}'
- label: TCP bytes out
sample: nettop -J bytes_out -l 1 -m tcp | awk '{sum += $4} END {print sum}'
Gauge
gauges:
- title: Minute progress
rate-ms: 500 # sampling rate, default = 1000
scale: 2 # number of digits after sample decimal point, default = 1
percent-only: false # toggle display of the current value, default = false
color: 178 # 8-bit color number, default one is chosen from a pre-defined palette
cur:
sample: date +%S # sample script for current value
max:
sample: echo 60 # sample script for max value
min:
sample: echo 0 # sample script for min value
- title: Year progress
cur:
sample: date +%j
max:
sample: echo 365
min:
sample: echo 0
Textbox
textboxes:
- title: Local weather
rate-ms: 10000 # sampling rate, default = 1000
sample: curl wttr.in?0ATQF
border: false # border around the item, default = true
color: 178 # 8-bit color number, default is white
- title: Docker containers stats
rate-ms: 500
sample: docker stats --no-stream --format "table {{.Name}}\t{{.CPUPerc}}\t{{.MemUsage}}\t{{.PIDs}}"
Asciibox
asciiboxes:
- title: UTC time
rate-ms: 500 # sampling rate, default = 1000
font: 3d # font type, default = 2d
border: false # border around the item, default = true
color: 43 # 8-bit color number, default is white
sample: env TZ=UTC date +%r
Bells and whistles
Triggers
Triggers allow to perform conditional actions, like visual/sound alerts or an arbitrary shell command. The following examples illustrate the concept.
Clock gauge, which shows minute progress and announces current time at the beginning of each minute
gauges:
- title: MINUTE PROGRESS
position: [[0, 18], [80, 0]]
cur:
sample: date +%S
max:
sample: echo 60
min:
sample: echo 0
triggers:
- title: CLOCK BELL EVERY MINUTE
condition: '[ $label == "cur" ] && [ $cur -eq 0 ] && echo 1 || echo 0' # expects "1" as TRUE indicator
actions:
terminal-bell: true # standard terminal bell, default = false
sound: true # NASA quindar tone, default = false
visual: false # notification with current value on top of the component area, default = false
script: say -v samantha `date +%I:%M%p` # an arbitrary script, which can use $cur, $prev and $label variables
Search engine latency chart, which alerts user when latency exceeds a threshold
runcharts:
- title: SEARCH ENGINE RESPONSE TIME (sec)
rate-ms: 200
items:
- label: GOOGLE
sample: curl -o /dev/null -s -w '%{time_total}' https://www.google.com
- label: YAHOO
sample: curl -o /dev/null -s -w '%{time_total}' https://search.yahoo.com
triggers:
- title: Latency threshold exceeded
condition: echo "$prev < 0.3 && $cur > 0.3" |bc -l # expects "1" as TRUE indicator
actions:
terminal-bell: true # standard terminal bell, default = false
sound: true # NASA quindar tone, default = false
visual: true # visual notification on top of the component area, default = false
script: 'say alert: ${label} latency exceeded ${cur} second' # an arbitrary script, which can use $cur, $prev and $label variables
Basic mode
textboxes:
- title: MongoDB polling
rate-ms: 500
init: mongo --quiet --host=localhost test # executes only once to start the interactive session
sample: Date.now(); # executes with a required rate, in scope of the interactive session
transform: echo result = $sample # executes in scope of local session, $sample variable is available for transformation
PTY mode
textboxes:
- title: Neo4j polling
pty: true # enables pseudo-terminal mode, default = false
init: cypher-shell -u neo4j -p pwd --format plain
sample: RETURN rand();
transform: echo "$sample" | tail -n 1
- title: Top on a remote server
pty: true # enables pseudo-terminal mode, default = false
init: ssh -i ~/user.pem ec2-user@1.2.3.4
sample: top
Multistep init
textboxes:
- title: Java application uptime
multistep-init:
- java -jar jmxterm-1.0.0-uber.jar
- open host:port # or local PID
- bean java.lang:type=Runtime
sample: get Uptime
Variables
variables:
mongoconnection: mongo --quiet --host=localhost test
barcharts:
- title: MongoDB documents by status
items:
- label: IN_PROGRESS
init: $mongoconnection
sample: db.getCollection('events').find({status:'IN_PROGRESS'}).count()
- label: SUCCESS
init: $mongoconnection
sample: db.getCollection('events').find({status:'SUCCESS'}).count()
- label: FAIL
init: $mongoconnection
sample: db.getCollection('events').find({status:'FAIL'}).count()
Color theme
theme: light # default = dark
sparklines:
- title: CPU usage
sample: ps -A -o %cpu | awk '{s+=$1} END {print s}'
运行
sampler -c config.yml