postgres 源码解析6 Page 与 Tuple 组织方式

1 Page 结构解析

  在postgres中,表格默认为堆表(Heap);每个表文件数据放在若干个物理文件中,每个物理文件由多个页(Page)组成,数据页默认大小为 8K,最大为32K,其结构如下图:
在这里插入图片描述
内部结构:

/*
 * A postgres disk page is an abstraction layered on top of a postgres
 * disk block (which is simply a unit of i/o, see block.h).
 *
 * specifically, while a disk block can be unformatted, a postgres
 * disk page is always a slotted page of the form:
 *
 * +----------------+---------------------------------+
 * | PageHeaderData | linp1 linp2 linp3 ...           |
 * +-----------+----+---------------------------------+
 * | ... linpN |									  |
 * +-----------+--------------------------------------+
 * |		   ^ pd_lower							  |
 * |												  |
 * |			 v pd_upper							  |
 * +-------------+------------------------------------+
 * |			 | tupleN ...                         |
 * +-------------+------------------+-----------------+
 * |	   ... tuple3 tuple2 tuple1 | "special space" |
 * +--------------------------------+-----------------+
 *									^ pd_special

页由三部分所组成: == 页头 + 项指针 + 元组 ==

typedef struct PageHeaderData
{
	/* XXX LSN is member of *any* block, not only page-organized ones */
	PageXLogRecPtr pd_lsn;		/* LSN: next byte after last byte of xlog
								 * record for last change to this page */
	uint16		pd_checksum;	/* checksum */
	uint16		pd_flags;		/* flag bits, see below */
	LocationIndex pd_lower;		/* offset to start of free space */
	LocationIndex pd_upper;		/* offset to end of free space */
	LocationIndex pd_special;	/* offset to start of special space */        // index 
	uint16		pd_pagesize_version;
	TransactionId pd_prune_xid; /* oldest prunable XID, or zero if none */  
	ItemIdData	pd_linp[FLEXIBLE_ARRAY_MEMBER]; /* line pointer array */     // 项指针数组
} PageHeaderData;
typedef PageHeaderData *PageHeader;

pd_lsn———— 本页面最近一次变更所写入的XLOG记录对应的LSN。其类型是PageXLogRecPtr,该结构由xlogid和xrecoff两个属性组成,前者表示wal日志的逻辑id,后者表是在wal日志中的偏移量,两者都是32位无符号数。因此pd_lsn是一个8B的无符号整数。
pd_checksum———— 本页面的校验和值(9.3版本以后才有),2个字节的无符号整型。
pd_flags———— 标志位,见下面的定义,2个字节的无符号整型。
pd_lower、pd_upper———— pd_lower指向行指针的末尾,表示空闲空间的起始位置。pd_upper指向最新堆元组的起始位置,表示空闲空间的结束位置。都是2个字节的无符号整型。
pd_special ———— 在索引页中会用到该字段,在堆表页中它指向页尾。2个字节无符号整型。
pd_pagesize_version ————类似校验,2个字节。
pd_prune_xid ———— 字面意思是可剪枝的最老的事务ID,4个字节。
pd_linp[FLEXIBLE_ARRAY_MEMBER] ———— ItemIdData类型的数组。

/*
 * pd_flags contains the following flag bits.  Undefined bits are initialized
 * to zero and may be used in the future.
 *
 * PD_HAS_FREE_LINES is set if there are any LP_UNUSED line pointers before
 * pd_lower.  This should be considered a hint rather than the truth, since
 * changes to it are not WAL-logged.
 *
 * PD_PAGE_FULL is set if an UPDATE doesn't find enough free space in the
 * page for its new tuple version; this suggests that a prune is needed.
 * Again, this is just a hint.
 */
#define PD_HAS_FREE_LINES	0x0001	/* are there any unused line pointers? */
#define PD_PAGE_FULL		0x0002	/* not enough free space for new tuple? */
#define PD_ALL_VISIBLE		0x0004	/* all tuples on page are visible to
									 * everyone */
#define PD_VALID_FLAG_BITS	0x0007	/* OR of all valid pd_flags bits */

项指针:

typedef struct ItemIdData
{
	unsigned	lp_off:15,		/* offset to tuple (from start of page) */
				lp_flags:2,		/* state of line pointer, see below */
				lp_len:15;		/* byte length of tuple */
} ItemIdData;
typedef ItemIdData *ItemId;
/*
 * lp_flags has these possible states.  An UNUSED line pointer is available
 * for immediate re-use, the other states are not.
 */
#define LP_UNUSED		0		/* unused (should always have lp_len=0) */
#define LP_NORMAL		1		/* used (should always have lp_len>0) */
#define LP_REDIRECT		2		/* HOT redirect (should have lp_len=0) */
#define LP_DEAD			3		/* dead, may or may not have storage */

ItemIdData类型由lp_off、lp_flags、lp_len三个属性组成。每一个ItemIdData结构用来指向文件块中的一个元组,其中lp_off是元组在文件块(Page)中的偏移量,lp_len则说明了该元组的长度,lp_flags则表示元组的状态(分为未使用、正常使用、HOT重定向和死亡四种状态)。每个ItemIdData元素大小为4个字节

2 元组结构解析

struct HeapTupleHeaderData
{
	union
	{
		HeapTupleFields t_heap;        
		DatumTupleFields t_datum;
	}			t_choice;

	ItemPointerData t_ctid;		/* current TID of this or newer tuple (or a
								 * speculative insertion token) */

	/* Fields below here must match MinimalTupleData! */
#define FIELDNO_HEAPTUPLEHEADERDATA_INFOMASK2 2
	uint16		t_infomask2;	/* number of attributes + various flags */

#define FIELDNO_HEAPTUPLEHEADERDATA_INFOMASK 3
	uint16		t_infomask;		/* various flag bits, see below */

#define FIELDNO_HEAPTUPLEHEADERDATA_HOFF 4
	uint8		t_hoff;			/* sizeof header incl. bitmap, padding */

	/* ^ - 23 bytes - ^ */

#define FIELDNO_HEAPTUPLEHEADERDATA_BITS 5
	bits8		t_bits[FLEXIBLE_ARRAY_MEMBER];	/* bitmap of NULLs */

	/* MORE DATA FOLLOWS AT END OF STRUCT */
	// 元组的真实数据
};
typedef struct HeapTupleHeaderData HeapTupleHeaderData;
typedef HeapTupleHeaderData *HeapTupleHeader;

t_choise:是具体两个成员的联合类型:
t_heap:用于记录对元组执行插入/删除操作的事务ID和命令ID, 这些字段信息用于可见性判断
t_datum:当一个新的元组在内存中形成的时候,并不关心事务可见性,因此t_choise中只需用DatumTupleFields来记录元组长度等信息,这是临时信息。在把该元组插入到表文件时,需要在元组头信息中记录插入该元组的事务和命令ID,此时会把t_choise转换为HeapTupleFields结构并填充相应数据后再进行元组的插入。
t_ctid:用于记录当前元组或新元组的物理位置,若元组被更新(删除旧版本元组,然后插入新版本元组),则记录的是新版本元组的物理位置。
t_infomask2:使用其低11位表示当前元组的属性个数,其他位用于包含用于HOT技术及元组可见性的标志位。
t_infomask:标识元组的当前状态,比如:是否具有OID、是否有空属性等,t_infomask的每一位对应不同的状态,共16种状态。
t_hoff:表示元组头的大小。
t_bits:用于标识该元组哪些字段为空。

typedef struct HeapTupleFields
{
	TransactionId t_xmin;		/* inserting xact ID */
	TransactionId t_xmax;		/* deleting or locking xact ID */

	union
	{
		CommandId	t_cid;		/* inserting or deleting command ID, or both */
		TransactionId t_xvac;	/* old-style VACUUM FULL xact ID */
	}			t_field3;
} HeapTupleFields;

typedef struct DatumTupleFields
{
	int32		datum_len_;		/* varlena header (do not touch directly!) */

	int32		datum_typmod;	/* -1, or identifier of a record type */

	Oid			datum_typeid;	/* composite type OID, or RECORDOID */

	/*
	 * datum_typeid cannot be a domain over composite, only plain composite,
	 * even if the datum is meant as a value of a domain-over-composite type.
	 * This is in line with the general principle that CoerceToDomain does not
	 * change the physical representation of the base type value.
	 *
	 * Note: field ordering is chosen with thought that Oid might someday
	 * widen to 64 bits.
	 */
} DatumTupleFields;

typedef struct ItemPointerData
{
	BlockIdData ip_blkid;            // 块号
	OffsetNumber ip_posid;			 // 表示该元组对应的ItemIdData数组的下标
}

/* If compiler understands packed and aligned pragmas, use those */
#if defined(pg_attribute_packed) && defined(pg_attribute_aligned)
			pg_attribute_packed()
			pg_attribute_aligned(2)
#endif
ItemPointerData;
typedef ItemPointerData *ItemPointer;

注意:在插入元组时,t_ctid并不是在构建元组时就存在,而是在元组写入文件块对应的共享内存文件页后才设置的,其代码如下:

void
RelationPutHeapTuple(Relation relation,
					 Buffer buffer,
					 HeapTuple tuple,
					 bool token)
{
	Page		pageHeader;
	OffsetNumber offnum;
    
	Assert(!token || HeapTupleHeaderIsSpeculative(tuple->t_data));

	pageHeader = BufferGetPage(buffer);

    /* 
	 * Add the tuple to the page 
	 * 此时tuple中的t_ctid还是一个非法值
	 */
	offnum = PageAddItem(pageHeader, (Item) tuple->t_data,
						 tuple->t_len, InvalidOffsetNumber, false, true);

	if (offnum == InvalidOffsetNumber)
		elog(PANIC, "failed to add tuple to page");

	/* Update tuple->t_self to the actual position where it was stored */
	ItemPointerSet(&(tuple->t_self), BufferGetBlockNumber(buffer), offnum);

	/*
	 * Insert the correct position into CTID of the stored tuple, too (unless
	 * this is a speculative insertion, in which case the token is held in
	 * CTID field instead)
	 */
	if (!token)
	{
        /*
         * 修改tuple中的t_ctid
         */
		ItemId		itemId = PageGetItemId(pageHeader, offnum);
		Item		item = PageGetItem(pageHeader, itemId);

		((HeapTupleHeader) item)->t_ctid = tuple->t_self;
	}
}

参考:https://blog.csdn.net/obvious__/article/details/109328425

  • 3
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值