当一个页在被读取或更新时,在某些场景下,pg可以进行快速页清理和剪枝(cleanup and pruning)。
前面的update操作没能在该页找到足够空间存放新元组
该页存放的数据大于fillfactor参数的值(类似oracle pctfree)
Page pruning删除在所有快照中均已不可见的元组(超出database horizon),删除的范围不会超过该页,执行速度非常快。指向被清理元组的指针依旧保留,因为它们可能会被索引用到。因此,vm和fsm文件均不会更新,被清理出来的空间仅用于update,不用于insert。
本文对Page Prunning进行演示如下:
1.环境设置
数据库版本:PostgreSQL 14.7
[postgres@du101 ~]$ psql
psql (14.7)
Type "help" for help.
postgres=# CREATE EXTENSION pageinspect;
CREATE EXTENSION
postgres=# create extension pgstattuple;
CREATE EXTENSION
postgres=# CREATE TABLE hot(id integer, s char(2000)) WITH (fillfactor = 75,autovacuum_enabled = false);
CREATE TABLE
postgres=# CREATE INDEX hot_id ON hot(id);
CREATE INDEX
postgres=# CREATE INDEX hot_s ON hot(s);
CREATE INDEX
postgres=#
根据表的结构,假设表hot的s列仅仅包含拉丁字母,所以每个tuple拥有固定的大小2004 bytes,再加上24 bytes的tuple header。此时存储参数fillfactor设置为75%,则这个page的空间可以容纳4个tuple,但是只能插入3个tuple。
2.用到的函数
CREATE OR REPLACE FUNCTION heap_page(relname
text, pageno integer)
RETURNS TABLE(
ctid tid, state text,
xmin text, xmax text,
hhu text, hot text, t_ctid tid
) AS $$
SELECT (pageno,lp)::text::tid AS ctid,
CASE lp_flags
WHEN 0 THEN 'unused'
WHEN 1 THEN 'normal'
WHEN 2 THEN 'redirect to '||lp_off
WHEN 3 THEN 'dead'
END AS state,
t_xmin || CASE
WHEN (t_infomask & 256) > 0 THEN ' c'
WHEN (t_infomask & 512) > 0 THEN ' a'
ELSE ''
END AS xmin,
t_xmax || CASE
WHEN (t_infomask & 1024) > 0 THEN ' c'
WHEN (t_infomask & 2048) > 0 THEN ' a'
ELSE ''
END AS xmax,
CASE WHEN (t_infomask2 & 16384) > 0 THEN 't' END AS hhu,
CASE WHEN (t_infomask2 & 32768) > 0 THEN 't' END AS hot,
t_ctid
FROM heap_page_items(get_raw_page(relname,pageno))
ORDER BY lp;
$$ LANGUAGE sql;
CREATE OR REPLACE FUNCTION index_page(relname text, pageno integer)
RETURNS TABLE(itemoffset smallint, htid tid, dead boolean)
AS $$
SELECT itemoffset,
htid,
dead -- starting from v.13
FROM bt_page_items(relname,pageno);
$$ LANGUAGE sql;
3.page prunning演示
数据块发生page pruning的条件如下:
While a heap page is being read or updated, PostgreSQL can perform some quickpage cleanup, or pruning. It happens in the following cases:
• The previous UPDATE operation did not find enough space to place a new tuple into the same page. This event is reflected in the page header.
• The heap page contains more data than allowed by the 100 fillfactor storage parameter.
根据page pruning的条件:The previous UPDATE operation did not find enough space to place a new tuple into the same page.
所以得是第5个tuple的时候,才会发生page prunning。
postgres=# INSERT INTO hot VALUES (1, 'A');
INSERT 0 1
postgres=# UPDATE hot SET s = 'B';
UPDATE 1
postgres=# UPDATE hot SET s = 'C';
UPDATE 1
postgres=# UPDATE hot SET s = 'D';
UPDATE 1
postgres=# SELECT * FROM heap_page('hot',0);
ctid | state | xmin | xmax | hhu | hot | t_ctid
-------+--------+-------+-------+-----+-----+--------
(0,1) | normal | 739 c | 740 c | | | (0,2)
(0,2) | normal | 740 c | 741 c | | | (0,3)
(0,3) | normal | 741 c | 742 | | | (0,4)
(0,4) | normal | 742 | 0 a | | | (0,4)
(4 rows)
postgres=#
postgres=# SELECT * FROM index_page('hot_id',1);
itemoffset | htid | dead
------------+-------+------
1 | (0,1) | f
2 | (0,2) | f
3 | (0,3) | f
4 | (0,4) | f
(4 rows)
postgres=#
--索引块的0号page,存放的是元数据。
postgres=# SELECT * FROM index_page('hot_s',1);
itemoffset | htid | dead
------------+-------+------
1 | (0,1) | f
2 | (0,2) | f
3 | (0,3) | f
4 | (0,4) | f
(4 rows)
postgres=#
--数据块的tuple通过t_cid指向到(0,4),每个索引都有一个对应的索引条目
postgres=# select pg_table_size('hot'),pg_indexes_size('hot');
pg_table_size | pg_indexes_size
---------------+-----------------
16384 | 32768
(1 row)
postgres=# SELECT * FROM pgstattuple('public.hot')\gx
-[ RECORD 1 ]------+------
table_len | 8192
tuple_count | 1
tuple_len | 2032
tuple_percent | 24.8
dead_tuple_count | 3
dead_tuple_len | 6096
dead_tuple_percent | 74.41
free_space | 20
free_percent | 0.24
postgres=# SELECT * FROM pgstatindex('public.hot_s')\gx
-[ RECORD 1 ]------+------
version | 4
tree_level | 0
index_size | 16384
root_block_no | 1
internal_pages | 0
leaf_pages | 1
empty_pages | 0
deleted_pages | 0
avg_leaf_density | 2.6
leaf_fragmentation | 0
postgres=# SELECT * FROM pgstatindex('public.hot_id')\gx
-[ RECORD 1 ]------+------
version | 4
tree_level | 0
index_size | 16384
root_block_no | 1
internal_pages | 0
leaf_pages | 1
empty_pages | 0
deleted_pages | 0
avg_leaf_density | 1.03
leaf_fragmentation | 0
postgres=#
----数据块中存在dead_tuple,且dead_tuple的数量为3,只有最后一个tuple为live tuple。
postgres=# UPDATE hot SET s = 'E';
UPDATE 1
postgres=#
--再次修改数据,会触发page prunning
postgres=# SELECT * FROM heap_page('hot',0);
ctid | state | xmin | xmax | hhu | hot | t_ctid
-------+--------+-------+------+-----+-----+--------
(0,1) | dead | | | | |
(0,2) | dead | | | | |
(0,3) | dead | | | | |
(0,4) | normal | 742 c | 745 | | | (0,5)
(0,5) | normal | 745 | 0 a | | | (0,5)
(5 rows)
postgres=#
--数据块中的指针,在发生page prunning的时候不能被移除,因为还有索引条目指向了这个执行。PostgreSQL只是改变了它的状态从normal到dead。
postgres=# SELECT * FROM index_page('hot_id',1);
itemoffset | htid | dead
------------+-------+------
1 | (0,1) | f
2 | (0,2) | f
3 | (0,3) | f
4 | (0,4) | f
5 | (0,5) | f
(5 rows)
postgres=# SELECT * FROM index_page('hot_s',1);
itemoffset | htid | dead
------------+-------+------
1 | (0,1) | f
2 | (0,2) | f
3 | (0,3) | f
4 | (0,4) | f
5 | (0,5) | f
(5 rows)
postgres=#
--page prunning只会对数据块有效,并不会影响到索引。
postgres=# select pg_table_size('hot'),pg_indexes_size('hot');
pg_table_size | pg_indexes_size
---------------+-----------------
16384 | 32768
(1 row)
postgres=# SELECT * FROM pgstattuple('public.hot')\gx
-[ RECORD 1 ]------+-----
table_len | 8192
tuple_count | 1
tuple_len | 2032
tuple_percent | 24.8
dead_tuple_count | 1
dead_tuple_len | 2032
dead_tuple_percent | 24.8
free_space | 4080
free_percent | 49.8
postgres=# SELECT * FROM pgstatindex('public.hot_s')\gx
-[ RECORD 1 ]------+------
version | 4
tree_level | 0
index_size | 16384
root_block_no | 1
internal_pages | 0
leaf_pages | 1
empty_pages | 0
deleted_pages | 0
avg_leaf_density | 3.24
leaf_fragmentation | 0
postgres=# SELECT * FROM pgstatindex('public.hot_id')\gx
-[ RECORD 1 ]------+------
version | 4
tree_level | 0
index_size | 16384
root_block_no | 1
internal_pages | 0
leaf_pages | 1
empty_pages | 0
deleted_pages | 0
avg_leaf_density | 1.28
leaf_fragmentation | 0
postgres=#
--发生page pruning后数据块的可用空间增加了。
postgres=# UPDATE hot SET s = 'F';
UPDATE 1
postgres=# SELECT * FROM heap_page('hot',0);
ctid | state | xmin | xmax | hhu | hot | t_ctid
-------+--------+-------+-------+-----+-----+--------
(0,1) | dead | | | | |
(0,2) | dead | | | | |
(0,3) | dead | | | | |
(0,4) | normal | 742 c | 745 c | | | (0,5)
(0,5) | normal | 745 c | 746 | | | (0,6)
(0,6) | normal | 746 | 0 a | | | (0,6)
(6 rows)
postgres=#
postgres=# select count(*) from hot;
count
-------
1
(1 row)
postgres=# SELECT * FROM heap_page('hot',0);
ctid | state | xmin | xmax | hhu | hot | t_ctid
-------+--------+-------+------+-----+-----+--------
(0,1) | dead | | | | |
(0,2) | dead | | | | |
(0,3) | dead | | | | |
(0,4) | dead | | | | |
(0,5) | dead | | | | |
(0,6) | normal | 746 c | 0 a | | | (0,6)
(6 rows)
postgres=#
---select的读操作也会发生page prunning。
postgres=# SELECT * FROM index_page('hot_id',1);
itemoffset | htid | dead
------------+-------+------
1 | (0,1) | f
2 | (0,2) | f
3 | (0,3) | f
4 | (0,4) | f
5 | (0,5) | f
6 | (0,6) | f
(6 rows)
postgres=# SELECT * FROM index_page('hot_s',1);
itemoffset | htid | dead
------------+-------+------
1 | (0,1) | f
2 | (0,2) | f
3 | (0,3) | f
4 | (0,4) | f
5 | (0,5) | f
6 | (0,6) | f
(6 rows)
postgres=# select pg_table_size('hot'),pg_indexes_size('hot');
pg_table_size | pg_indexes_size
---------------+-----------------
16384 | 32768
(1 row)
postgres=#
--增加了相应的索引条目,表大小,索引大小没有发生变化。
4.索引空间回收
postgres=# select count(*) from hot where id=1;
count
-------
1
(1 row)
postgres=# SELECT * FROM index_page('hot_id',1);
itemoffset | htid | dead
------------+-------+------
1 | (0,1) | t
2 | (0,2) | t
3 | (0,3) | t
4 | (0,4) | t
5 | (0,5) | t
6 | (0,6) | f
(6 rows)
postgres=#
--利用索引去查询表,索引条目在索引块上被标识为dead状态。
当索引扫描遇到一个指向死元组的条目时,标记该条目“killed”。后续索引扫描会在VACUUM删除他们之前跳过这些条目。此外,PG可以在索引页面已满时删除这样的条目,以避免页分裂。
PG14如何进一步减少索引膨胀:自下而上的索引元组删除比之前方法更进一步:他在索引页分裂即将发生前就删除指向死元组的索引条目。这可以减少索引条目的数量并避免昂贵的分裂,以及稍后VACUUM清理参数的膨胀。
postgres=# SELECT * FROM index_page('hot_s',1);
itemoffset | htid | dead
------------+-------+------
1 | (0,1) | f
2 | (0,2) | f
3 | (0,3) | f
4 | (0,4) | f
5 | (0,5) | f
6 | (0,6) | f
(6 rows)
postgres=# select count(*) from hot where s='A';
count
-------
0
(1 row)
postgres=# SELECT * FROM index_page('hot_s',1);
itemoffset | htid | dead
------------+-------+------
1 | (0,1) | t
2 | (0,2) | f
3 | (0,3) | f
4 | (0,4) | f
5 | (0,5) | f
6 | (0,6) | f
(6 rows)
postgres=# select count(*) from hot where s='B';
count
-------
0
(1 row)
postgres=# SELECT * FROM index_page('hot_s',1);
itemoffset | htid | dead
------------+-------+------
1 | (0,1) | t
2 | (0,2) | t
3 | (0,3) | f
4 | (0,4) | f
5 | (0,5) | f
6 | (0,6) | f
(6 rows)
postgres=# select count(*) from hot where s='F';
count
-------
1
(1 row)
postgres=# SELECT * FROM index_page('hot_s',1);
itemoffset | htid | dead
------------+-------+------
1 | (0,1) | t
2 | (0,2) | t
3 | (0,3) | f
4 | (0,4) | f
5 | (0,5) | f
6 | (0,6) | f
(6 rows)
postgres=#
--因为update的时候,id是没有发生变化的,s发生了变化,每个s的值都会有1个索引条目。
postgres=# select pg_table_size('hot'),pg_indexes_size('hot');
pg_table_size | pg_indexes_size
---------------+-----------------
16384 | 32768
(1 row)
postgres=# SELECT * FROM pgstattuple('public.hot')\gx
-[ RECORD 1 ]------+------
table_len | 8192
tuple_count | 1
tuple_len | 2032
tuple_percent | 24.8
dead_tuple_count | 0
dead_tuple_len | 0
dead_tuple_percent | 0
free_space | 6108
free_percent | 74.56
postgres=# SELECT * FROM pgstatindex('public.hot_s')\gx
-[ RECORD 1 ]------+------
version | 4
tree_level | 0
index_size | 16384
root_block_no | 1
internal_pages | 0
leaf_pages | 1
empty_pages | 0
deleted_pages | 0
avg_leaf_density | 3.88
leaf_fragmentation | 0
postgres=# SELECT * FROM pgstatindex('public.hot_id')\gx
-[ RECORD 1 ]------+------
version | 4
tree_level | 0
index_size | 16384
root_block_no | 1
internal_pages | 0
leaf_pages | 1
empty_pages | 0
deleted_pages | 0
avg_leaf_density | 1.52
leaf_fragmentation | 0
postgres=#
5.vacuum和vacuum full操作
postgres=# vacuum hot;
VACUUM
postgres=# SELECT * FROM pgstattuple('public.hot')\gx
-[ RECORD 1 ]------+------
table_len | 8192
tuple_count | 1
tuple_len | 2032
tuple_percent | 24.8
dead_tuple_count | 0
dead_tuple_len | 0
dead_tuple_percent | 0
free_space | 6108
free_percent | 74.56
postgres=# SELECT * FROM pgstatindex('public.hot_s')\gx
-[ RECORD 1 ]------+------
version | 4
tree_level | 0
index_size | 16384
root_block_no | 1
internal_pages | 0
leaf_pages | 1
empty_pages | 0
deleted_pages | 0
avg_leaf_density | 0.69
leaf_fragmentation | 0
postgres=# SELECT * FROM pgstatindex('public.hot_id')\gx
-[ RECORD 1 ]------+------
version | 4
tree_level | 0
index_size | 16384
root_block_no | 1
internal_pages | 0
leaf_pages | 1
empty_pages | 0
deleted_pages | 0
avg_leaf_density | 0.29
leaf_fragmentation | 0
postgres=#
---vacuum之后已经分配的表和索引的大小没有发生变化。
postgres=# SELECT * FROM heap_page('hot',0);
ctid | state | xmin | xmax | hhu | hot | t_ctid
-------+--------+-------+------+-----+-----+--------
(0,1) | unused | | | | |
(0,2) | unused | | | | |
(0,3) | unused | | | | |
(0,4) | unused | | | | |
(0,5) | unused | | | | |
(0,6) | normal | 746 c | 0 a | | | (0,6)
(6 rows)
postgres=# SELECT * FROM index_page('hot_s',1);
itemoffset | htid | dead
------------+-------+------
1 | (0,6) | f
(1 row)
postgres=# SELECT * FROM index_page('hot_id',1);
itemoffset | htid | dead
------------+-------+------
1 | (0,6) | f
(1 row)
postgres=#
---vacuum之后,标识为dead的索引条目被回收了,数据库块中标识为dead的指针变为unused,可以重用了。
postgres=# vacuum full hot;
VACUUM
postgres=# SELECT * FROM heap_page('hot',0);
ctid | state | xmin | xmax | hhu | hot | t_ctid
-------+--------+-------+------+-----+-----+--------
(0,1) | normal | 746 c | 0 a | | | (0,1)
(1 row)
postgres=# SELECT * FROM index_page('hot_s',1);
itemoffset | htid | dead
------------+-------+------
1 | (0,1) | f
(1 row)
postgres=# SELECT * FROM index_page('hot_id',1);
itemoffset | htid | dead
------------+-------+------
1 | (0,1) | f
(1 row)
postgres=#
---vacuum full之后,表所占有的空间回收给了操作系统。
6.Hot update和page prunning
postgres=# truncate table hot;
TRUNCATE TABLE
postgres=# drop index hot_s;
DROP INDEX
postgres=# insert into hot values (1,'A');
INSERT 0 1
postgres=# update hot set s='B';
UPDATE 1
postgres=# SELECT * FROM heap_page('hot',0);
ctid | state | xmin | xmax | hhu | hot | t_ctid
-------+--------+-------+------+-----+-----+--------
(0,1) | normal | 758 c | 759 | t | | (0,2)
(0,2) | normal | 759 | 0 a | | t | (0,2)
(2 rows)
postgres=# SELECT * FROM index_page('hot_id',1);
itemoffset | htid | dead
------------+-------+------
1 | (0,1) | f
(1 row)
postgres=#
---删除索引hot_s之后,此时的update为hot update。
postgres=# update hot set s='C';
UPDATE 1
postgres=# update hot set s='D';
UPDATE 1
postgres=# SELECT * FROM heap_page('hot',0);
ctid | state | xmin | xmax | hhu | hot | t_ctid
-------+--------+-------+-------+-----+-----+--------
(0,1) | normal | 758 c | 759 c | t | | (0,2)
(0,2) | normal | 759 c | 760 c | t | t | (0,3)
(0,3) | normal | 760 c | 761 | t | t | (0,4)
(0,4) | normal | 761 | 0 a | | t | (0,4)
(4 rows)
postgres=# SELECT * FROM index_page('hot_id',1);
itemoffset | htid | dead
------------+-------+------
1 | (0,1) | f
(1 row)
postgres=# update hot set s='E';
UPDATE 1
postgres=#
----触发了page prunning
postgres=# SELECT * FROM heap_page('hot',0);
ctid | state | xmin | xmax | hhu | hot | t_ctid
-------+---------------+-------+------+-----+-----+--------
(0,1) | redirect to 4 | | | | |
(0,2) | normal | 762 | 0 a | | t | (0,2)
(0,3) | unused | | | | |
(0,4) | normal | 761 c | 762 | t | t | (0,2)
(4 rows)
postgres=# SELECT * FROM index_page('hot_id',1);
itemoffset | htid | dead
------------+-------+------
1 | (0,1) | f
(1 row)
postgres=#
postgres=# UPDATE hot SET s = 'F';
UPDATE 1
postgres=# UPDATE hot SET s = 'G';
UPDATE 1
postgres=# SELECT * FROM heap_page('hot',0);
ctid | state | xmin | xmax | hhu | hot | t_ctid
-------+---------------+-------+-------+-----+-----+--------
(0,1) | redirect to 4 | | | | |
(0,2) | normal | 762 c | 763 c | t | t | (0,3)
(0,3) | normal | 763 c | 764 | t | t | (0,5)
(0,4) | normal | 761 c | 762 c | t | t | (0,2)
(0,5) | normal | 764 | 0 a | | t | (0,5)
(5 rows)
postgres=# SELECT * FROM index_page('hot_id',1);
itemoffset | htid | dead
------------+-------+------
1 | (0,1) | f
(1 row)
postgres=# UPDATE hot SET s = 'H';
UPDATE 1
postgres=#
----触发了page prunning
postgres=# SELECT * FROM heap_page('hot',0);
ctid | state | xmin | xmax | hhu | hot | t_ctid
-------+---------------+-------+------+-----+-----+--------
(0,1) | redirect to 5 | | | | |
(0,2) | normal | 765 | 0 a | | t | (0,2)
(0,3) | unused | | | | |
(0,4) | unused | | | | |
(0,5) | normal | 764 c | 765 | t | t | (0,2)
(5 rows)
postgres=# SELECT * FROM index_page('hot_id',1);
itemoffset | htid | dead
------------+-------+------
1 | (0,1) | f
(1 row)
postgres=#
如果页已经没有足够空间放新元组,HOT Chain将会分裂,pg必须加一个独立索引项指向新页的元组。
--开启REPEATABLE READ事务,即使发生page prunning,也会保留dead元组信息,只是会发生hot 链分裂。
postgres=# BEGIN ISOLATION LEVEL REPEATABLE READ;
BEGIN
postgres=*# select 1;
?column?
----------
1
(1 row)
postgres=*# UPDATE hot SET s = 'I';
UPDATE 1
postgres=*# UPDATE hot SET s = 'J';
UPDATE 1
postgres=*# UPDATE hot SET s = 'K';
UPDATE 1
postgres=*# SELECT * FROM heap_page('hot',0);
ctid | state | xmin | xmax | hhu | hot | t_ctid
-------+---------------+-------+------+-----+-----+--------
(0,1) | redirect to 2 | | | | |
(0,2) | normal | 765 c | 766 | t | t | (0,3)
(0,3) | normal | 766 | 766 | t | t | (0,4)
(0,4) | normal | 766 | 766 | t | t | (0,5)
(0,5) | normal | 766 | 0 a | | t | (0,5)
(5 rows)
postgres=*# UPDATE hot SET s = 'L';
UPDATE 1
postgres=*#
----触发page prunning
postgres=*# SELECT * FROM heap_page('hot',0);
ctid | state | xmin | xmax | hhu | hot | t_ctid
-------+---------------+-------+------+-----+-----+--------
(0,1) | redirect to 2 | | | | |
(0,2) | normal | 765 c | 766 | t | t | (0,3)
(0,3) | normal | 766 | 766 | t | t | (0,4)
(0,4) | normal | 766 | 766 | t | t | (0,5)
(0,5) | normal | 766 | 766 | | t | (1,1)
(5 rows)
postgres=*# SELECT * FROM heap_page('hot',1);
ctid | state | xmin | xmax | hhu | hot | t_ctid
-------+--------+------+------+-----+-----+--------
(1,1) | normal | 766 | 0 a | | | (1,1)
(1 row)
postgres=*# commit;
COMMIT
postgres=#
--REPEATABLE READ事务提交之后,虽然数据不再需要,但是发生的分裂,也会保留。
postgres=# SELECT * FROM heap_page('hot',0);
ctid | state | xmin | xmax | hhu | hot | t_ctid
-------+---------------+-------+------+-----+-----+--------
(0,1) | redirect to 2 | | | | |
(0,2) | normal | 765 c | 766 | t | t | (0,3)
(0,3) | normal | 766 | 766 | t | t | (0,4)
(0,4) | normal | 766 | 766 | t | t | (0,5)
(0,5) | normal | 766 | 766 | | t | (1,1)
(5 rows)
postgres=# SELECT * FROM heap_page('hot',1);
ctid | state | xmin | xmax | hhu | hot | t_ctid
-------+--------+------+------+-----+-----+--------
(1,1) | normal | 766 | 0 a | | | (1,1)
(1 row)
postgres=# SELECT * FROM index_page('hot_id',1);
itemoffset | htid | dead
------------+-------+------
1 | (0,1) | f
2 | (1,1) | f
(2 rows)
postgres=#