1.基本统计信息
1)pg_class数据字典(存放基本统计信息)
--reltuples: If the table has never yet been vacuumed or analyzed, reltuples contains -1 indicating that the row count is unknown.
--relpasge
--relallvisible:number of pages tagged in the visibility map;Index Only Scan计算成本,vacuum更新
SELECT reltuples, relpages, relallvisible FROM pg_class WHERE relname ='t_skew';
postgres=# SELECT reltuples, relpages, relallvisible FROM pg_class WHERE relname ='t_skew';
reltuples | relpages | relallvisible
-----------+----------+---------------
100009 | 443 | 0
(1 row)
2)reltuples作为cards(查询无过滤条件)
postgres=# explain select * from t_skew;
QUERY PLAN
--------------------------------------------------------------
Seq Scan on t_skew (cost=0.00..1443.09 rows=100009 width=4) -->Cards100009
(1 row)
2)关于统计信息
统计信息对于性能至关重要,对于表进行analyze可以收集统计信息(自动收集/手动收集);
vacuum full/cluster(create index/reindex无法触发统计信息收集)
pg使用300*default_statistics_target进行采样--?待源码验证,对于小表完全可以覆盖;
对应大表采样可能不够准确;
当数据库变更频繁需要及时更新统计信息才能确保优化器成本计算是准确的
3)统计信息与成本计算
--a)创建表
create table t_stats(id int,c1 int) WITH (autovacuum_enabled = false);
--b)查询信息
SELECT reltuples, relpages, relallvisible,FROM pg_class WHERE relname = 't_stats';
postgres=# SELECT reltuples, relpages, relallvisible FROM pg_class WHERE relname = 't_stats';
reltuples | relpages | relallvisible
-----------+----------+---------------
0 | 0 | 0
(1 row)
--c)执行计划
postgres=# explain (costs on,analyze on,buffers on,timing on,settings on,summary on) select * from t_stats;
QUERY PLAN
-----------------------------------------------------------------------------------------------------
Seq Scan on t_stats (cost=0.00..32.60 rows=2260 width=8) (actual time=0.003..0.003 rows=0 loops=1)
Settings: work_mem = '8MB'
Planning Time: 0.058 ms
Execution Time: 0.050 ms
(4 rows)
--d)插入数据10w
postgres=# insert into t_stats select generate_series(1,100000),1;
postgres=# analyze verbose t_stats;
INFO: analyzing "public.t_stats"
INFO: "t_stats": scanned 443 of 443 pages, containing 100000 live rows and 0 dead rows; 30000 rows in sample, 100000 estimated total rows
ANALYZE
--e)创建索引
create index idx_t_stats_com on t_stats(id,c1);
--f)查看数据
postgres=# SELECT reltuples, relpages, relallvisible FROM pg_class WHERE relname = 't_stats';
reltuples | relpages | relallvisible
-----------+----------+---------------
100000 | 443 | 0
(1 row)
--id=1单谓词
postgres=# explain select * from t_stats where id =1;
QUERY PLAN
------------------------------------------------------------------------------------
Index Only Scan using idx_t_stats_com on t_stats (cost=0.29..8.31 rows=1 width=8)
Index Cond: (id = 1)
(2 rows)
--id1 and c1=1多谓词(索引全覆盖)
postgres=# explain select * from t_stats where id =1 and c1=1;
QUERY PLAN
------------------------------------------------------------------------------------
Index Only Scan using idx_t_stats_com on t_stats (cost=0.29..8.31 rows=1 width=8)
Index Cond: ((id = 1) AND (c1 = 1))
(2 rows)
postgres=# explain (costs on,analyze on,buffers on,timing on,settings on)select * from t_stats where id =1 and c1=1;
QUERY PLAN
------------------------------------------------------------------------------------------------------------------------------
Index Only Scan using idx_t_stats_com on t_stats (cost=0.29..8.31 rows=1 width=8) (actual time=0.071..0.072 rows=1 loops=1)
Index Cond: ((id = 1) AND (c1 = 1))
Heap Fetches: 1
Buffers: shared read=3
I/O Timings: read=0.037
Settings: work_mem = '8MB'
Planning Time: 0.054 ms
Execution Time: 0.097 ms
(8 rows)
--vacuum 更新relallvisible(no use)
--添加列实现非全覆盖,Index Only Scan-> Index Scan
postgres=# explain (costs on,analyze on,buffers on,timing on,settings on)select * from t_stats where id =1 and c1=1;
QUERY PLAN
--------------------------------------------------------------------------------------------------------------------------
Index Scan using idx_t_stats_com on t_stats (cost=0.29..8.31 rows=1 width=12) (actual time=0.260..0.260 rows=1 loops=1)
Index Cond: ((id = 1) AND (c1 = 1))
Buffers: shared read=3
I/O Timings: read=0.018
Settings: work_mem = '8MB'
Planning Time: 0.120 ms
Execution Time: 0.307 ms
(7 rows)
--c1全表扫描:443 pages
postgres=# explain (costs on,analyze on,buffers on,timing on,settings on)select * from t_stats where c1=1;
QUERY PLAN
---------------------------------------------------------------------------------------------------------------
Seq Scan on t_stats (cost=0.00..1693.00 rows=100000 width=12) (actual time=0.272..7.941 rows=100000 loops=1)
Filter: (c1 = 1)
Buffers: shared read=443
I/O Timings: read=1.320
Settings: work_mem = '8MB'
Planning Time: 0.053 ms
Execution Time: 10.482 ms
(7 rows)