本文为云贝教育 刘峰老师 原创,请尊重知识产权,转发请注明出处,不接受任何抄袭、演绎和未经注明出处的转载。【PostgreSQL】VACUUM实验脚本 - 课程体系 - 云贝教育
一、死元组产生的过程
1.1 创建pageinspect扩展
create extension pageinspect;
1.2 创建表t1
testdb=# create table t1(id int);
CREATE TABLE
testdb=#
testdb=# insert into t1 values(1);
INSERT 0 1
testdb=# insert into t1 values(2);
INSERT 0 1
testdb=# select * from t1;
id
----
1
2
(2 rows)
1.3 查看表t1的行
postgre=# SELECT * FROM heap_page_items(get_raw_page('t1', 0));
lp | lp_off | lp_flags | lp_len | t_xmin | t_xmax | t_field3 | t_ctid | t_info | mask2 | t_infomask| t_hoff | t_bits | t_oid | t_data
----+--------+----------+--------+--------+--------+----------+--------+----------+--------+-----------+--------+-------+------------
1 | 8160 | 1 | 28 | 904 | 0 | 0 | (0,1) | 1 | 2304 | 24 | | | \x01000000
2 | 8128 | 1 | 28 | 905 | 0 | 0 | (0,2) | 1 | 2304 | 24 | | | \x02000000
(2 rows)
1.4 修改第二行数据
testdb=# update t1 set id=22 where id=2;
UPDATE 1
1.5 查看T1表块中的信息
testdb=# SELECT * FROM heap_page_items(get_raw_page('t1', 0));
lp | lp_off | lp_flags | lp_len | t_xmin | t_xmax | t_field3 | t_ctid | t_info
mask2 | t_infomask | t_hoff | t_bits | t_oid | t_data
----+--------+----------+--------+--------+--------+----------+--------+-------
------+------------+--------+--------+-------+------------
1 | 8160 | 1 | 28 | 904 | 0 | 0 | (0,1) |
1 | 2304 | 24 | | | \x01000000
2 | 8128 | 1 | 28 | 905 | 907 | 0 | (0,3) |
16385 | 256 | 24 | | | \x02000000
3 | 8096 | 1 | 28 | 907 | 0 | 0 | (0,3) |
32769 | 10240 | 24 | | | \x16000000
(3 rows)
这里看到T1表的块0中有3条数据,ID=2的数据仍然存在块中
二、vacuum实验
2.1 vacuum
1) 创建表
testdb=# drop table t1;
DROP TABLE
testdb=# create table t1 as select * from pg_class;
SELECT 433
testdb=# insert into t1 select * from t1;
INSERT 0 431
testdb=# insert into t1 select * from t1;
INSERT 0 862
testdb=# insert into t1 select * from t1;
INSERT 0 1724
testdb=# insert into t1 select * from t1;
INSERT 0 3448
2)收集表的统计信息
testdb=# analyze t1;
ANALYZE
testdb=# select relname, relpages,reltuples from pg_class where relname='t1';
relname | relpages | reltuples
---------+----------+-----------
t1 | 173 | 6896
(1 row)
3)查看此时全表扫描的执行计划
testdb=# explain(analyze,buffers) select count(1) from t1;
QUERY PLAN
-------------------------------------------------------------------------------
---------------------------
Aggregate (cost=224.60..224.61 rows=1 width=8) (actual time=0.840..0.841 rows
=1 loops=1)
Buffers: shared hit=173
-> Seq Scan on t1 (cost=0.00..214.28 rows=4128 width=0) (actual time=0.005
..0.340 rows=4128 loops=1)
Buffers: shared hit=173
Planning:
Buffers: shared hit=2
Planning Time: 0.047 ms
Execution Time: 0.861 ms
(8 rows)
从上面可以看出,在共享缓存池中命中的块173,和统计信息一致。
4)删除表的数据
testdb=# delete from t1 where oid >10000;
DELETE 2480
5)再查看执行计划
testdb=# explain(analyze,buffers) select count(1) from t1;
QUERY PLAN
-------------------------------------------------------------------------------
---------------------------
Aggregate (cost=224.60..224.61 rows=1 width=8) (actual time=0.840..0.841 rows
=1 loops=1)
Buffers: shared hit=173
-> Seq Scan on t1 (cost=0.00..214.28 rows=4128 width=0) (actual time=0.005
..0.340 rows=4128 loops=1)
Buffers: shared hit=173
Planning:
Buffers: shared hit=2
Planning Time: 0.047 ms
Execution Time: 0.861 ms
(8 rows)
此时仍然扫描的173个块
6)vacuum
testdb=# vacuum t1;
VACUUM
7)查看执行计划
testdb=# explain(analyze,buffers) select count(1) from t1;
QUERY PLAN
-------------------------------------------------------------------------------
---------------------------
Aggregate (cost=222.60..222.61 rows=1 width=8) (actual time=0.556..0.557 rows
=1 loops=1)
Buffers: shared hit=171
-> Seq Scan on t1 (cost=0.00..212.28 rows=4128 width=0) (actual time=0.005
..0.356 rows=4128 loops=1)
Buffers: shared hit=171
Planning:
Buffers: shared hit=2
Planning Time: 0.055 ms
Execution Time: 0.576 ms
(8 rows)
8)此时统计信息也收集
testdb=# select relname, relpages,reltuples from pg_class where relname='t1';
relname | relpages | reltuples
---------+----------+-----------
t1 | 171 | 4128
(1 row)
2.2 vacuum full
在2.1的基础上
1)执行vacuum full
testdb=# vacuum full t1;
VACUUM
testdb=# select relname, relpages,reltuples from pg_class where relname='t1';
relname | relpages | reltuples
---------+----------+-----------
t1 | 98 | 4128
(1 row)
2)查看执行计划
testdb=# explain(analyze,buffers) select count(1) from t1;
-------------------------------------------------------------------------------
---------------------------
Aggregate (cost=149.60..149.61 rows=1 width=8) (actual time=2.617..2.618 rows
=1 loops=1)
Buffers: shared read=98
-> Seq Scan on t1 (cost=0.00..139.28 rows=4128 width=0) (actual time=0.093
..1.940 rows=4128 loops=1)
Buffers: shared read=98
Planning:
Buffers: shared hit=2
Planning Time: 0.116 ms
Execution Time: 2.658 ms
(8 rows)
三、free space 实验
1)测试表准备
testdb=# create table t5 as select * from pg_class;
SELECT 434
testdb=# insert into t5 select * from t5;
INSERT 0 434
testdb=# insert into t5 select * from t5;
INSERT 0 868
testdb=# insert into t5 select * from t5;
INSERT 0 1736
testdb=# insert into t5 select * from t5;
INSERT 0 3472
2)删除大量数据
testdb=# select count(1) from t5 where oid>10000;
count
-------
2816
(1 row)
testdb=# delete from t5 where oid>10000;
DELETE 2816
3)创建监控插件
testdb=# CREATE EXTENSION pg_freespacemap;
CREATE EXTENSION
testdb=# SELECT count(*) as "number of pages",
pg_size_pretty(cast(avg(avail) as bigint)) as "Av. freespace size",
round(100 * avg(avail)/8192 ,2) as "Av. freespace ratio"
FROM pg_freespace('accounts');
4)查看当前表T5的FREE情况
testdb=# SELECT count(*) as "number of pages",
testdb-# pg_size_pretty(cast(avg(avail) as bigint)) as "Av. freespace size",
testdb-# round(100 * avg(avail)/8192 ,2) as "Av. freespace ratio"
testdb-# FROM pg_freespace('t5');
number of pages | Av. freespace size | Av. freespace ratio
-----------------+--------------------+---------------------
174 | 86 bytes | 1.05
(1 row)
5)vacuum之后再查
testdb=# vacuum t5;
VACUUM
testdb=# SELECT count(*) as "number of pages",
testdb-# pg_size_pretty(cast(avg(avail) as bigint)) as "Av. freespace size",
testdb-# round(100 * avg(avail)/8192 ,2) as "Av. freespace ratio"
testdb-# FROM pg_freespace('t5');
number of pages | Av. freespace size | Av. freespace ratio
-----------------+--------------------+---------------------
174 | 3586 bytes | 43.77
(1 row)
6)vacuum full之后再查
testdb=# vacuum full t5;
VACUUM
testdb=# SELECT count(*) as "number of pages",
testdb-# pg_size_pretty(cast(avg(avail) as bigint)) as "Av. freespace size",
testdb-# round(100 * avg(avail)/8192 ,2) as "Av. freespace ratio"
testdb-# FROM pg_freespace('t5');
number of pages | Av. freespace size | Av. freespace ratio
-----------------+--------------------+---------------------
98 | 0 bytes | 0.00
(1 row)
四、autovacuum实验
1、创建实验表
postgre=# create table t2(id int,name text);
CREATE TABLE
postgre=# insert into t2 values(generate_series(1,1000),'aaaaa'||generate_series(1,1000));
INSERT 0 1000
2、查看表t2的信息
cmdb=# select relname,reltuples,relpages from pg_class where relname='t2';
relname | reltuples | relpages
---------+-----------+----------
t2 | 1000 | 8
(1 row)
3、计算触发条件
autovacuum Vacuum=0.2*1000+50=250;
autovacuum ANALYZE=0.1*1000+50=150;
4、修改t2表151行
postgre=# update t2 set name='bbb' where id<152; UPDATE 151 postgre=# SELECT schemaname, n_tup_ins as "inserts", n_tup_upd as "updates", n_tup_del as "deletes", n_live_tup as "live_tuples", n_dead_tup as "dead_tuples", last_vacuum, last_autovacuum, last_analyze, last_autoanalyze FROM pg_stat_user_tables WHERE relname = 't2'; -[ RECORD 1 ]----+------------------------------ schemaname | public inserts | 1000 updates | 151 deletes | 0 live_tuples | 1000 dead_tuples | 151 last_vacuum | last_autovacuum | last_analyze | last_autoanalyze | 2023-03-16 09:24:23.355939+08
以上实验可以确认,在修改151条数据时,触发了autovacuum ANALYZE
5、再次修改251条数据
postgre=# update t2 set name='bbb' where id<101; UPDATE 100 postgre=# SELECT schemaname, n_tup_ins as "inserts", n_tup_upd as "updates", n_tup_del as "deletes", n_live_tup as "live_tuples", n_dead_tup as "dead_tuples", last_vacuum, last_autovacuum, last_analyze, last_autoanalyze FROM pg_stat_user_tables WHERE relname = 't2'; -[ RECORD 1 ]----+------------------------------ schemaname | public inserts | 1000 updates | 402 deletes | 0 live_tuples | 1000 dead_tuples | 0 last_vacuum | last_autovacuum | 2023-03-16 09:27:23.392077+08 last_analyze | last_autoanalyze | 2023-03-16 09:27:23.397413+08
以上实验可以确认,在修改100条数据时,间隔一分钟左右触发了autovacuum。
这里其实只需要看dead_tuples是否超过了250,在触发了autovacuum之后,dead_tuples被置为0