PostgreSQL中的等待事件wait_event

PostgreSQL9.6版本开始,加入了wait_event特性。通过查阅pg_stat_activity中的wait_event_type和wait_event我们可以了解到每个sql进程“当前”更详细的执行状态,无论是对于异常定位排查,还是系统优化来说都更加方便了。

这篇博文简单讨论下wait_event相关的原理。

一、等待事件分类
I.事件类型 :

共有9个分类,每一类都由不同事件组成

/* ----------
 * Wait Classes
 * ----------
 */
#define PG_WAIT_LWLOCK				0x01000000U   /* 等待LWLock */
#define PG_WAIT_LOCK				0x03000000U   /* 等待Lock */
#define PG_WAIT_BUFFER_PIN			0x04000000U   /* 等待访问数据缓冲区 */
#define PG_WAIT_ACTIVITY			0x05000000U   /* 服务器进程处于空闲状态 */
#define PG_WAIT_CLIENT				0x06000000U   /* 等待应用客户端程序在套接字中进行操作 */
#define PG_WAIT_EXTENSION			0x07000000U   /* 等待扩展模块中的操作 */
#define PG_WAIT_IPC					0x08000000U   /* 等待进程间通信 */
#define PG_WAIT_TIMEOUT				0x09000000U   /* 等待达到超时时间 */
#define PG_WAIT_IO					0x0A000000U   /* 等待IO操作完成 */
II. 具体事件:

每种类型和具体事件含义请参考:https://www.postgresql.org/docs/12/monitoring-stats.html

1.PG_WAIT_LWLOCK类
/* 等待LWLock */

定义共有65种事件

const char *const MainLWLockNames[] = {
	"<unassigned:0>",
	"ShmemIndexLock",
	"OidGenLock",
	"XidGenLock",
	"ProcArrayLock",
	"SInvalReadLock",
	"SInvalWriteLock",
	"WALBufMappingLock",
	"WALWriteLock",
	"ControlFileLock",
	"CheckpointLock",
	"CLogControlLock",
	"SubtransControlLock",
	"MultiXactGenLock",
	"MultiXactOffsetControlLock",
	"MultiXactMemberControlLock",
	"RelCacheInitLock",
	"CheckpointerCommLock",
	"TwoPhaseStateLock",
	"TablespaceCreateLock",
	"BtreeVacuumLock",
	"AddinShmemInitLock",
	"AutovacuumLock",
	"AutovacuumScheduleLock",
	"SyncScanLock",
	"RelationMappingLock",
	"AsyncCtlLock",
	"AsyncQueueLock",
	"SerializableXactHashLock",
	"SerializableFinishedListLock",
	"SerializablePredicateLockListLock",
	"OldSerXidLock",
	"SyncRepLock",
	"BackgroundWorkerLock",
	"DynamicSharedMemoryControlLock",
	"AutoFileLock",
	"ReplicationSlotAllocationLock",
	"ReplicationSlotControlLock",
	"CommitTsControlLock",
	"CommitTsLock",
	"ReplicationOriginLock",
	"MultiXactTruncationLock",
	"OldSnapshotTimeMapLock",
	"LogicalRepWorkerLock",
	"CLogTruncationLock"
};
typedef enum BuiltinTrancheIds
{
	LWTRANCHE_CLOG_BUFFERS = NUM_INDIVIDUAL_LWLOCKS,
	LWTRANCHE_COMMITTS_BUFFERS,
	LWTRANCHE_SUBTRANS_BUFFERS,
	LWTRANCHE_MXACTOFFSET_BUFFERS,
	LWTRANCHE_MXACTMEMBER_BUFFERS,
	LWTRANCHE_ASYNC_BUFFERS,
	LWTRANCHE_OLDSERXID_BUFFERS,
	LWTRANCHE_WAL_INSERT,
	LWTRANCHE_BUFFER_CONTENT,
	LWTRANCHE_BUFFER_IO_IN_PROGRESS,
	LWTRANCHE_REPLICATION_ORIGIN,
	LWTRANCHE_REPLICATION_SLOT_IO_IN_PROGRESS,
	LWTRANCHE_PROC,
	LWTRANCHE_BUFFER_MAPPING,
	LWTRANCHE_LOCK_MANAGER,
	LWTRANCHE_PREDICATE_LOCK_MANAGER,
	LWTRANCHE_PARALLEL_HASH_JOIN,
	LWTRANCHE_PARALLEL_QUERY_DSA,
	LWTRANCHE_SESSION_DSA,
	LWTRANCHE_SESSION_RECORD_TABLE,
	LWTRANCHE_SESSION_TYPMOD_TABLE,
	LWTRANCHE_SHARED_TUPLESTORE,
	LWTRANCHE_TBM,
	LWTRANCHE_PARALLEL_APPEND,
	LWTRANCHE_FIRST_USER_DEFINED
}			BuiltinTrancheIds;

2. PG_WAIT_LOCK类型
/* 等待Lock */

共有10种事件

/*
 * LOCKTAG is the key information needed to look up a LOCK item in the
 * lock hashtable.  A LOCKTAG value uniquely identifies a lockable object.
 *
 * The LockTagType enum defines the different kinds of objects we can lock.
 * We can handle up to 256 different LockTagTypes.
 */
typedef enum LockTagType
{
	LOCKTAG_RELATION,			/* whole relation */
	LOCKTAG_RELATION_EXTEND,	/* the right to extend a relation */
	LOCKTAG_PAGE,				/* one page of a relation */
	LOCKTAG_TUPLE,				/* one physical tuple */
	LOCKTAG_TRANSACTION,		/* transaction (for waiting for xact done) */
	LOCKTAG_VIRTUALTRANSACTION, /* virtual transaction (ditto) */
	LOCKTAG_SPECULATIVE_TOKEN,	/* speculative insertion Xid and token */
	LOCKTAG_OBJECT,				/* non-relation database object */
	LOCKTAG_USERLOCK,			/* reserved for old contrib/userlock code */
	LOCKTAG_ADVISORY			/* advisory user locks */
} LockTagType;

3. PG_WAIT_BUFFER_PIN类型
/* 等待访问数据缓冲区 */

共有1种事件

/* ----------
 * pgstat_get_wait_event() -
 *
 *	Return a string representing the current wait event, backend is
 *	waiting on.
 */
const char *
pgstat_get_wait_event(uint32 wait_event_info)
{
	/* 省略部分代码行 */

	switch (classId)
	{
		/* 省略其他分支 */
		case PG_WAIT_BUFFER_PIN:
			event_name = "BufferPin";
			break;
		/* 省略其他分支 */
     }
    
    return event_name;
}

4. PG_WAIT_ACTIVITY类型
/* 当前服务器进程处于空闲状态 */

共有14种事件,分别对应14个进程主函数

typedef enum
{
	WAIT_EVENT_ARCHIVER_MAIN = PG_WAIT_ACTIVITY,
	WAIT_EVENT_AUTOVACUUM_MAIN,
	WAIT_EVENT_BGWRITER_HIBERNATE,
	WAIT_EVENT_BGWRITER_MAIN,
	WAIT_EVENT_CHECKPOINTER_MAIN,
	WAIT_EVENT_LOGICAL_APPLY_MAIN,
	WAIT_EVENT_LOGICAL_LAUNCHER_MAIN,
	WAIT_EVENT_PGSTAT_MAIN,
	WAIT_EVENT_RECOVERY_WAL_ALL,
	WAIT_EVENT_RECOVERY_WAL_STREAM,
	WAIT_EVENT_SYSLOGGER_MAIN,
	WAIT_EVENT_WAL_RECEIVER_MAIN,
	WAIT_EVENT_WAL_SENDER_MAIN,
	WAIT_EVENT_WAL_WRITER_MAIN
} WaitEventActivity;

5. PG_WAIT_CLIENT类型
/* 等待应用客户端程序在套接字中进行操作 */

共有9种事件

/* ----------
 * Wait Events - Client
 *
 * Use this category when a process is waiting to send data to or receive data
 * from the frontend process to which it is connected.  This is never used for
 * a background process, which has no client connection.
 * ----------
 */
typedef enum
{
	WAIT_EVENT_CLIENT_READ = PG_WAIT_CLIENT,
	WAIT_EVENT_CLIENT_WRITE,
	WAIT_EVENT_LIBPQWALRECEIVER_CONNECT,
	WAIT_EVENT_LIBPQWALRECEIVER_RECEIVE,
	WAIT_EVENT_SSL_OPEN_SERVER,
	WAIT_EVENT_WAL_RECEIVER_WAIT_START,
	WAIT_EVENT_WAL_SENDER_WAIT_WAL,
	WAIT_EVENT_WAL_SENDER_WRITE_DATA,
	WAIT_EVENT_GSS_OPEN_SERVER,
} WaitEventClient;

6. PG_WAIT_EXTENSION类型
/* 等待扩展模块中的操作 */

共有1种事件

/* ----------
 * pgstat_get_wait_event() -
 *
 *	Return a string representing the current wait event, backend is
 *	waiting on.
 */
const char *
pgstat_get_wait_event(uint32 wait_event_info)
{
	/* 省略部分代码行 */

	switch (classId)
	{
		/* 省略其他分支 */
		case PG_WAIT_EXTENSION:
			event_name = "Extension";
			break;
		/* 省略其他分支 */
	}

	return event_name;
}

7. PG_WAIT_IPC类型
/* 等待进程间通信 */

共有37种事件

typedef enum
{
	WAIT_EVENT_BGWORKER_SHUTDOWN = PG_WAIT_IPC,
	WAIT_EVENT_BGWORKER_STARTUP,
	WAIT_EVENT_BTREE_PAGE,
	WAIT_EVENT_CLOG_GROUP_UPDATE,
	WAIT_EVENT_CHECKPOINT_DONE,
	WAIT_EVENT_CHECKPOINT_START,
	WAIT_EVENT_EXECUTE_GATHER,
	WAIT_EVENT_HASH_BATCH_ALLOCATING,
	WAIT_EVENT_HASH_BATCH_ELECTING,
	WAIT_EVENT_HASH_BATCH_LOADING,
	WAIT_EVENT_HASH_BUILD_ALLOCATING,
	WAIT_EVENT_HASH_BUILD_ELECTING,
	WAIT_EVENT_HASH_BUILD_HASHING_INNER,
	WAIT_EVENT_HASH_BUILD_HASHING_OUTER,
	WAIT_EVENT_HASH_GROW_BATCHES_ALLOCATING,
	WAIT_EVENT_HASH_GROW_BATCHES_DECIDING,
	WAIT_EVENT_HASH_GROW_BATCHES_ELECTING,
	WAIT_EVENT_HASH_GROW_BATCHES_FINISHING,
	WAIT_EVENT_HASH_GROW_BATCHES_REPARTITIONING,
	WAIT_EVENT_HASH_GROW_BUCKETS_ALLOCATING,
	WAIT_EVENT_HASH_GROW_BUCKETS_ELECTING,
	WAIT_EVENT_HASH_GROW_BUCKETS_REINSERTING,
	WAIT_EVENT_LOGICAL_SYNC_DATA,
	WAIT_EVENT_LOGICAL_SYNC_STATE_CHANGE,
	WAIT_EVENT_MQ_INTERNAL,
	WAIT_EVENT_MQ_PUT_MESSAGE,
	WAIT_EVENT_MQ_RECEIVE,
	WAIT_EVENT_MQ_SEND,
	WAIT_EVENT_PARALLEL_BITMAP_SCAN,
	WAIT_EVENT_PARALLEL_CREATE_INDEX_SCAN,
	WAIT_EVENT_PARALLEL_FINISH,
	WAIT_EVENT_PROCARRAY_GROUP_UPDATE,
	WAIT_EVENT_PROMOTE,
	WAIT_EVENT_REPLICATION_ORIGIN_DROP,
	WAIT_EVENT_REPLICATION_SLOT_DROP,
	WAIT_EVENT_SAFE_SNAPSHOT,
	WAIT_EVENT_SYNC_REP
} WaitEventIPC;

8. PG_WAIT_TIMEOUT类型
/* 等待达到超时时间 */

共有3种事件

/* ----------
 * Wait Events - Timeout
 *
 * Use this category when a process is waiting for a timeout to expire.
 * ----------
 */
typedef enum
{
	WAIT_EVENT_BASE_BACKUP_THROTTLE = PG_WAIT_TIMEOUT,
	WAIT_EVENT_PG_SLEEP,
	WAIT_EVENT_RECOVERY_APPLY_DELAY
} WaitEventTimeout;

9. PG_WAIT_IO类型
/* 等待IO操作完成 */

共68种事件

/* ----------
 * Wait Events - IO
 *
 * Use this category when a process is waiting for a IO.
 * ----------
 */
typedef enum
{
	WAIT_EVENT_BUFFILE_READ = PG_WAIT_IO,
	WAIT_EVENT_BUFFILE_WRITE,
	WAIT_EVENT_CONTROL_FILE_READ,
	WAIT_EVENT_CONTROL_FILE_SYNC,
	WAIT_EVENT_CONTROL_FILE_SYNC_UPDATE,
	WAIT_EVENT_CONTROL_FILE_WRITE,
	WAIT_EVENT_CONTROL_FILE_WRITE_UPDATE,
	WAIT_EVENT_COPY_FILE_READ,
	WAIT_EVENT_COPY_FILE_WRITE,
	WAIT_EVENT_DATA_FILE_EXTEND,
	WAIT_EVENT_DATA_FILE_FLUSH,
	WAIT_EVENT_DATA_FILE_IMMEDIATE_SYNC,
	WAIT_EVENT_DATA_FILE_PREFETCH,
	WAIT_EVENT_DATA_FILE_READ,
	WAIT_EVENT_DATA_FILE_SYNC,
	WAIT_EVENT_DATA_FILE_TRUNCATE,
	WAIT_EVENT_DATA_FILE_WRITE,
	WAIT_EVENT_DSM_FILL_ZERO_WRITE,
	WAIT_EVENT_LOCK_FILE_ADDTODATADIR_READ,
	WAIT_EVENT_LOCK_FILE_ADDTODATADIR_SYNC,
	WAIT_EVENT_LOCK_FILE_ADDTODATADIR_WRITE,
	WAIT_EVENT_LOCK_FILE_CREATE_READ,
	WAIT_EVENT_LOCK_FILE_CREATE_SYNC,
	WAIT_EVENT_LOCK_FILE_CREATE_WRITE,
	WAIT_EVENT_LOCK_FILE_RECHECKDATADIR_READ,
	WAIT_EVENT_LOGICAL_REWRITE_CHECKPOINT_SYNC,
	WAIT_EVENT_LOGICAL_REWRITE_MAPPING_SYNC,
	WAIT_EVENT_LOGICAL_REWRITE_MAPPING_WRITE,
	WAIT_EVENT_LOGICAL_REWRITE_SYNC,
	WAIT_EVENT_LOGICAL_REWRITE_TRUNCATE,
	WAIT_EVENT_LOGICAL_REWRITE_WRITE,
	WAIT_EVENT_RELATION_MAP_READ,
	WAIT_EVENT_RELATION_MAP_SYNC,
	WAIT_EVENT_RELATION_MAP_WRITE,
	WAIT_EVENT_REORDER_BUFFER_READ,
	WAIT_EVENT_REORDER_BUFFER_WRITE,
	WAIT_EVENT_REORDER_LOGICAL_MAPPING_READ,
	WAIT_EVENT_REPLICATION_SLOT_READ,
	WAIT_EVENT_REPLICATION_SLOT_RESTORE_SYNC,
	WAIT_EVENT_REPLICATION_SLOT_SYNC,
	WAIT_EVENT_REPLICATION_SLOT_WRITE,
	WAIT_EVENT_SLRU_FLUSH_SYNC,
	WAIT_EVENT_SLRU_READ,
	WAIT_EVENT_SLRU_SYNC,
	WAIT_EVENT_SLRU_WRITE,
	WAIT_EVENT_SNAPBUILD_READ,
	WAIT_EVENT_SNAPBUILD_SYNC,
	WAIT_EVENT_SNAPBUILD_WRITE,
	WAIT_EVENT_TIMELINE_HISTORY_FILE_SYNC,
	WAIT_EVENT_TIMELINE_HISTORY_FILE_WRITE,
	WAIT_EVENT_TIMELINE_HISTORY_READ,
	WAIT_EVENT_TIMELINE_HISTORY_SYNC,
	WAIT_EVENT_TIMELINE_HISTORY_WRITE,
	WAIT_EVENT_TWOPHASE_FILE_READ,
	WAIT_EVENT_TWOPHASE_FILE_SYNC,
	WAIT_EVENT_TWOPHASE_FILE_WRITE,
	WAIT_EVENT_WALSENDER_TIMELINE_HISTORY_READ,
	WAIT_EVENT_WAL_BOOTSTRAP_SYNC,
	WAIT_EVENT_WAL_BOOTSTRAP_WRITE,
	WAIT_EVENT_WAL_COPY_READ,
	WAIT_EVENT_WAL_COPY_SYNC,
	WAIT_EVENT_WAL_COPY_WRITE,
	WAIT_EVENT_WAL_INIT_SYNC,
	WAIT_EVENT_WAL_INIT_WRITE,
	WAIT_EVENT_WAL_READ,
	WAIT_EVENT_WAL_SYNC,
	WAIT_EVENT_WAL_SYNC_METHOD_ASSIGN,
	WAIT_EVENT_WAL_WRITE
} WaitEventIO;
二、等待事件原理

等待事件较多,挑几种出来分析一下实现原理
从9.6版本开始,加入了该特性,体现在backend共享内存结构体PGPROC中新增了uint32 wait_event_info; /* proc’s wait information */如下#82行

struct PGPROC
{
	/* proc->links MUST BE FIRST IN STRUCT (see ProcSleep,ProcWakeup,etc) */
	SHM_QUEUE	links;			/* list link if process is in a list */
	PGPROC	  **procgloballist; /* procglobal list that owns this PGPROC */

	PGSemaphore sem;			/* ONE semaphore to sleep on */
	int			waitStatus;		/* STATUS_WAITING, STATUS_OK or STATUS_ERROR */

	Latch		procLatch;		/* generic latch for process */

	LocalTransactionId lxid;	/* local id of top-level transaction currently
								 * being executed by this proc, if running;
								 * else InvalidLocalTransactionId */
	int			pid;			/* Backend's process ID; 0 if prepared xact */
	int			pgprocno;

	/* These fields are zero while a backend is still starting up: */
	BackendId	backendId;		/* This backend's backend ID (if assigned) */
	Oid			databaseId;		/* OID of database this backend is using */
	Oid			roleId;			/* OID of role using this backend */

	Oid			tempNamespaceId;	/* OID of temp schema this backend is
									 * using */

	bool		isBackgroundWorker; /* true if background worker. */

	/*
	 * While in hot standby mode, shows that a conflict signal has been sent
	 * for the current transaction. Set/cleared while holding ProcArrayLock,
	 * though not required. Accessed without lock, if needed.
	 */
	bool		recoveryConflictPending;

	/* Info about LWLock the process is currently waiting for, if any. */
	bool		lwWaiting;		/* true if waiting for an LW lock */
	uint8		lwWaitMode;		/* lwlock mode being waited for */
	proclist_node lwWaitLink;	/* position in LW lock wait list */

	/* Support for condition variables. */
	proclist_node cvWaitLink;	/* position in CV wait list */

	/* Info about lock the process is currently waiting for, if any. */
	/* waitLock and waitProcLock are NULL if not currently waiting. */
	LOCK	   *waitLock;		/* Lock object we're sleeping on ... */
	PROCLOCK   *waitProcLock;	/* Per-holder info for awaited lock */
	LOCKMODE	waitLockMode;	/* type of lock we're waiting for */
	LOCKMASK	heldLocks;		/* bitmask for lock types already held on this
								 * lock object by this backend */

	/*
	 * Info to allow us to wait for synchronous replication, if needed.
	 * waitLSN is InvalidXLogRecPtr if not waiting; set only by user backend.
	 * syncRepState must not be touched except by owning process or WALSender.
	 * syncRepLinks used only while holding SyncRepLock.
	 */
	XLogRecPtr	waitLSN;		/* waiting for this LSN or higher */
	int			syncRepState;	/* wait state for sync rep */
	SHM_QUEUE	syncRepLinks;	/* list link if process is in syncrep queue */

	/*
	 * All PROCLOCK objects for locks held or awaited by this backend are
	 * linked into one of these lists, according to the partition number of
	 * their lock.
	 */
	SHM_QUEUE	myProcLocks[NUM_LOCK_PARTITIONS];

	struct XidCache subxids;	/* cache for subtransaction XIDs */

	/* Support for group XID clearing. */
	/* true, if member of ProcArray group waiting for XID clear */
	bool		procArrayGroupMember;
	/* next ProcArray group member waiting for XID clear */
	pg_atomic_uint32 procArrayGroupNext;

	/*
	 * latest transaction id among the transaction's main XID and
	 * subtransactions
	 */
	TransactionId procArrayGroupMemberXid;
   
	uint32		wait_event_info;	/* proc's wait information */

	/* Support for group transaction status update. */
	bool		clogGroupMember;	/* true, if member of clog group */
	pg_atomic_uint32 clogGroupNext; /* next clog group member */
	TransactionId clogGroupMemberXid;	/* transaction id of clog group member */
	XidStatus	clogGroupMemberXidStatus;	/* transaction status of clog
											 * group member */
	int			clogGroupMemberPage;	/* clog page corresponding to
										 * transaction id of clog group member */
	XLogRecPtr	clogGroupMemberLsn; /* WAL location of commit record for clog
									 * group member */

	/* Per-backend LWLock.  Protects fields below (but not group fields). */
	LWLock		backendLock;

	/* Lock manager data, recording fast-path locks taken by this backend. */
	uint64		fpLockBits;		/* lock modes held for each fast-path slot */
	Oid			fpRelId[FP_LOCK_SLOTS_PER_BACKEND]; /* slots for rel oids */
	bool		fpVXIDLock;		/* are we holding a fast-path VXID lock? */
	LocalTransactionId fpLocalTransactionId;	/* lxid for fast-path VXID
												 * lock */

	/*
	 * Support for lock groups.  Use LockHashPartitionLockByProc on the group
	 * leader to get the LWLock protecting these fields.
	 */
	PGPROC	   *lockGroupLeader;	/* lock group leader, if I'm a member */
	dlist_head	lockGroupMembers;	/* list of members, if I'm a leader */
	dlist_node	lockGroupLink;	/* my member link, if I'm a member */
};
I.详细分析一个Lock类型的事件,了解等待事件的具体实现

如下sql进行update,等待事件类型为Lock,具体事件为transactionid

postgres=# select pid,wait_event_type,wait_event,query from pg_stat_activity where pid=21318;
-[ RECORD 1 ]---+-------------------------------------
pid             | 21318
wait_event_type | Lock
wait_event      | transactionid
query           | update tbl_test set content='c05a2f0059b30755727a2807a17674bq' where id=1;

打印stack信息:

[postgres@postgres_zabbix ~]$ pstack 21318
#0  0x00007fa72ea38913 in __epoll_wait_nocancel () from /lib64/libc.so.6
#1  0x00000000008522cd in WaitEventSetWaitBlock (set=0x207b7b0, cur_timeout=-1, occurred_events=0x7ffcb383b5b0, nevents=1) at latch.c:1080
#2  0x00000000008521a8 in WaitEventSetWait (set=0x207b7b0, timeout=-1, occurred_events=0x7ffcb383b5b0, nevents=1, wait_event_info=50331652) at latch.c:1032
#3  0x0000000000851a94 in WaitLatchOrSocket (latch=0x7fa72e4011f4, wakeEvents=33, sock=-1, timeout=-1, wait_event_info=50331652) at latch.c:407
#4  0x000000000085195f in WaitLatch (latch=0x7fa72e4011f4, wakeEvents=33, timeout=0, wait_event_info=50331652) at latch.c:347
#5  0x0000000000866a39 in ProcSleep (locallock=0x1fe46f0, lockMethodTable=0xb8d7a0 <default_lockmethod>) at proc.c:1289
#6  0x0000000000860d04 in WaitOnLock (locallock=0x1fe46f0, owner=0x1ff6688) at lock.c:1768
#7  0x000000000085fe2a in LockAcquireExtended (locktag=0x7ffcb383ba90, lockmode=5, sessionLock=false, dontWait=false, reportMemoryError=true, locallockp=0x0) at lock.c:1050
#8  0x000000000085f47f in LockAcquire (locktag=0x7ffcb383ba90, lockmode=5, sessionLock=false, dontWait=false) at lock.c:713
#9  0x000000000085e302 in XactLockTableWait (xid=501, rel=0x7fa72f6ba198, ctid=0x7ffcb383bb44, oper=XLTW_Update) at lmgr.c:658
#10 0x00000000004c980f in heap_update (relation=0x7fa72f6ba198, otid=0x7ffcb383be60, newtup=0x208c7c8, cid=0, crosscheck=0x0, wait=true, tmfd=0x7ffcb383bd60, lockmode=0x7ffcb383bd5c) at heapam.c:3228
#11 0x00000000004d3f63 in heapam_tuple_update (relation=0x7fa72f6ba198, otid=0x7ffcb383be60, slot=0x207b340, cid=0, snapshot=0x2045760, crosscheck=0x0, wait=true, tmfd=0x7ffcb383bd60, lockmode=0x7ffcb383bd5c, update_indexes=0x7ffcb383bd5b) at heapam_handler.c:332
#12 0x00000000006da7ba in table_tuple_update (rel=0x7fa72f6ba198, otid=0x7ffcb383be60, slot=0x207b340, cid=0, snapshot=0x2045760, crosscheck=0x0, wait=true, tmfd=0x7ffcb383bd60, lockmode=0x7ffcb383bd5c, update_indexes=0x7ffcb383bd5b) at ../../../src/include/access/tableam.h:1261
#13 0x00000000006dc636 in ExecUpdate (mtstate=0x2079df0, tupleid=0x7ffcb383be60, oldtuple=0x0, slot=0x207b340, planSlot=0x207b1e0, epqstate=0x2079ee8, estate=0x2079a70, canSetTag=true) at nodeModifyTable.c:1312
#14 0x00000000006ddb1f in ExecModifyTable (pstate=0x2079df0) at nodeModifyTable.c:2223
#15 0x00000000006b21c9 in ExecProcNodeFirst (node=0x2079df0) at execProcnode.c:445
#16 0x00000000006a8356 in ExecProcNode (node=0x2079df0) at ../../../src/include/executor/executor.h:239
#17 0x00000000006aa6d2 in ExecutePlan (estate=0x2079a70, planstate=0x2079df0, use_parallel_mode=false, peration=CMD_UPDATE, sendTuples=false, numberTuples=0, direction=ForwardScanDirection, dest=0x2088838, execute_once=true) at execMain.c:1646
#18 0x00000000006a8833 in standard_ExecutorRun (queryDesc=0x207c6b0, direction=ForwardScanDirection, count=0, execute_once=true) at execMain.c:364
#19 0x00000000006a86d8 in ExecutorRun (queryDesc=0x207c6b0, direction=ForwardScanDirection, count=0, execute_once=true) at execMain.c:308
#20 0x000000000087ee7a in ProcessQuery (plan=0x1fecfc0,  sourceText=0x1fc6030 "update tbl_test set content='c05a2f0059b30755727a2807a17674bq' where id=1;", params=0x0, queryEnv=0x0, dest=0x2088838, completionTag=0x7ffcb383c270 "") at pquery.c:161
#21 0x00000000008805c1 in PortalRunMulti (portal=0x202b250, isTopLevel=true, setHoldSnapshot=false, dest=0x2088838, altdest=0x2088838, completionTag=0x7ffcb383c270 "") at pquery.c:1283
#22 0x000000000087fbfb in PortalRun (portal=0x202b250, count=9223372036854775807, isTopLevel=true, run_once=true, dest=0x2088838, altdest=0x2088838, completionTag=0x7ffcb383c270 "") at pquery.c:796
#23 0x0000000000879fa1 in exec_simple_query (query_string=0x1fc6030 "update tbl_test set content='c05a2f0059b30755727a2807a17674bq' where id=1;") at postgres.c:1215
#24 0x000000000087e00f in PostgresMain (argc=1, argv=0x1fef398, dbname=0x1fef258 "postgres", username=0x1fef238 "postgres") at postgres.c:4236
#25 0x00000000007e5c42 in BackendRun (port=0x1fe6f40) at postmaster.c:4431
#26 0x00000000007e5441 in BackendStartup (port=0x1fe6f40) at postmaster.c:4122
#27 0x00000000007e1aef in ServerLoop () at postmaster.c:1704
#28 0x00000000007e13af in PostmasterMain (argc=1, argv=0x1fc1d80) at postmaster.c:1377
#29 0x000000000070ef6e in main (argc=1, argv=0x1fc1d80) at main.c:228
[postgres@postgres_zabbix ~]$

主要关注以下几行
#8行申请锁,lockmode为5即ShareLock
#4行设置等待事件为wait_event_info=50331652
#0行进入epoll_wait中观察list链表,并sleep


#0  0x00007fa72ea38913 in __epoll_wait_nocancel () from /lib64/libc.so.6
#4  0x000000000085195f in WaitLatch (latch=0x7fa72e4011f4, wakeEvents=33, timeout=0, wait_event_info=50331652) at latch.c:347
#8  0x000000000085f47f in LockAcquire (locktag=0x7ffcb383ba90, lockmode=5, sessionLock=false, dontWait=false) at lock.c:713

使用框图简单描述下wait_event的设置,以及wait_event的查询显示的代码调用逻辑。

这里蓝色框表示函数入口,绿色框表示变量

在这里插入图片描述

1)左半边框图表示event的设置

可以看到该update申请Lock时,具体设置的等待事件为wait_event_info=50331652

这里粘贴一部分gdb跟踪过程,大致可以看到wait_event_info=50331652的设置过程

Breakpoint 3, ProcSleep (locallock=0x1fe46f0, lockMethodTable=0xb8d7a0 <default_lockmethod>) at proc.c:1065
1065            LOCKMODE        lockmode = locallock->tag.mode;
(gdb) 
1066            LOCK       *lock = locallock->lock;
(gdb) 
1076            PGPROC     *leader = MyProc->lockGroupLeader;
(gdb) p MyProc->wait_event_info  /*目前还未设置等待事件,值为0*/
$4 = 0
(gdb) n
1083            if (leader != NULL)
(gdb) 
1117            if (myHeldLocks != 0)
(gdb) 
1179                    proc = (PGPROC *) &(waitQueue->links);
(gdb) 
1263                            enable_timeout_after(DEADLOCK_TIMEOUT, DeadlockTimeout);
(gdb) 
1282                    if (InHotStandby)
(gdb) n  /*wait_event_info是在waitlatch函数中设置的,该函数定义最后一个形参就是wait_event_info*/
         /*在这里调用该函数时传递的实参为事件类型和具体事件或运算的结果,即1290行的 PG_WAIT_LOCK | locallock->tag.lock.locktag_type*/
1289                            (void) WaitLatch(MyLatch, WL_LATCH_SET | WL_EXIT_ON_PM_DEATH, 0,   
1290      PG_WAIT_LOCK | locallock->tag.lock.locktag_type);
(gdb) p locallock->tag.lock.locktag_type
$5 = 4 '\004'
(gdb) 
1291                            ResetLatch(MyLatch);
(gdb) p MyProc->wait_event_info
$7 = 50331652       /*可以看到wait_event_info已经被配置为50331652*/
(gdb) p/x  MyProc->wait_event_info
$8 = 0x03000004      /*以16进制显示为0x03000004*/

2)右半边框图表示event的查询显示

可以看到在pg_stat_activity中查询到该update语句对应的等待事件类型为Lock,具体事件为wait_event=transactionid

主要分析下如何从wait_event_info=50331652得到wait_event=transactionid

50331652是一个十进制数,之前有提到postgresql中等待事件类型的宏定义对应的实际值都是使用无符号16进制数来表示。

将50331652转化为无符号16进制为0x03000004U,

关注框图右侧部分的逻辑
a. wait_event_type 的运算取值
wait_event_type = wait_event_info & 0xFF000000
= 0x03000004U & 0xFF000000
意为保留wait_event_info的高两位,运算值为0x03000000U,这个值比较眼熟吧,正是lock类型的定义

#define PG_WAIT_LOCK				0x03000000U

b. wait_event的运算取值:
step1 求locktag_type
locktag_type=wait_event_info & 0x0000FFFF
= 0x03000004U & 0x0000FFFF
意为保留wait_event_info的的低四位,运算值为0x00000004U,对应十进制值为4

step2 求wait_event
wait_event=LockTagTypeNames[locktag_type]
=LockTagTypeNames[4]

值为LockTagTypeNames下标为4的成员,从LockTagTypeNames数组定义可以找到下标为4对应的正是transactionid

/* This must match enum LockTagType! */
const char *const LockTagTypeNames[] = {
	"relation",
	"extend",
	"page",
	"tuple",
	"transactionid",
	"virtualxid",
	"speculative token",
	"object",
	"userlock",
	"advisory"
};
II.简单列举下其他几种类型

1.PG_WAIT_LWLOCK 类的buffer_content事件

如下一个表进行autovacuum ,可以看到当前的等待事件类型为LWLock,具体事件为buffer_content

-[ RECORD 1 ]---+----------------------------------------------------------------
pid 202623
wait_event_type | LWLock
wait_event      | buffer_content
query           | autovacuum: VACUUM public.apply_info_search (to prevent wraparound
backend_type    | autovacuum worker 

来看下stack信息:

[postgres@postgres_zabbix ~]$ pstack 202623
#0 0x002aae05c12aob in do_futex_wait.constprop.1 () from/lib64/libpthread.so 
#1 0x002aae05c12a9f in new_sem_wait_slow.constprop.0 () from /lib64/libpthrd.so
#2 0x002aae05c12b3b in sem_wait@@GLIBC 2.2.5 ()from /lib64/libpthread.so.0
#3 0x0000000068a8e2 in PGSemaphoreLock()
#4 0x000000006f2c34 in LWLockAcquire ()
#5 0x00000000960se1 in LockBufferForcleanup () 
#6 0x00000000964930 in btvacuumpage()
#7 0x000000004c423f in btvacuumscan()
#8 0x0000000066438e in btbulkdelete()
#9 0x000009005d3ecl in lazy_vacuum_index () 
#10 0x00000060654da5 in Lazy_vacuum_rel () 
#11 0x0009000095d29bs in vacuum_rel ()
#12 0x0009090005d3852 in vacuum()
#13 0x09000009068d690 in do_autovacuum ()
#14 0x00006606668daac in AutoVacworkerMain.isra.6 ()
#15 0x000099000068349 in StartAutoVacworker ()
#16 0x0000990000059aa in sigusr1_handler () 
#17 <signal handler called>
#18 0x0002aae7e5d783 in seLect_nocancel () from /Lib64/libc.so.6
#19 0x000000000478ba inserverLoop ()
#20 0x00000006669559 in PostmasterMain ()
#21 0x0096006047972b in main ()

可以看到当前进程通过LWLockAcquire 函数申请LWlock,尝试访问临界数据。目前在等锁,pg中LWlock是通过PGsemaphore实现(底层调用sem_wait等接口),最终调入futex接口进行sleep,等待临界资源可操作。

进程strace信息:

strace -p 202623
strace: Process 202623 attached 
futex(0x2aae0f901df8, FUTEX_WAIT, 0, NULL^Cstrace: Process 202623 detached
<detached ...>

使用futex是为了解决传统semaphore不必要的系统调用造成大量的性能损耗,具体可参考:Futex设计与实现

2.PG_WAIT_IPC 类的Bgworkershutdown事件

一个select查询,当前的等待事件类型为IPC,等待事件为Bgworkershutdown

-[ RECORD 1 ]---+----------------------------------------------------------------
pid 			| 20262
wait_event_type | IPC
wait_event      | Bgworkershutdown
query           | select sum(mem_used) from qsump_pacloud_oscginfo_activity_detail_info_day

stack信息:

[postgres@postgres_zabbix ~]$ pstack 20262
#0 0x00002aae97066903 in _epoll_wait nocancel () from /Lib64/libc.so.6
#1 0x00000000006e156e in WaitEventsetWait () 
#2 0x00000000006e1b97 in WaitLatchOrSocket ()
#3 0x000000000068f89a in WaitForBackgroundworkerShutdown ()
#4 0x00000000004ddbde in WaitForParallelworkersToExit.isra.1 ()
#5 0x00000000004de76d in DestroyParallelContext ()
#6 0x00000000004dec6b in AtEoxact_Parallel ()
#7 0x00000000004e8997 in AbortTransaction() 
#8 0x00000000004e8fc5 in AbortCurrentTransaction()
#9 0x0000000000701501 in PostgresMain()
#10 0x0000000000478cdf in ServerLoop () 
#11 0x000000000069c559 in PostmasterMain ()
#12 0x000000000047972b in Main () 
[postgres@postgres_zabbix ~]$

从stack可以看到,
#8行 该select 进程当前在进行事务回滚
#5行 准备销毁ParallelContext
#3行 等待后台并行进程shutdown
#0行 进入epoll_wait中观察list链表,并sleep

ps可以看到目前20262有两个并行进程20273 ,202734

[postgres@postgres_zabbix ~]$ ps -ef|grep 20262| grep -v grep 
postgres 20262 20260 0 May18? 00:00:00 postgres: pg12: pguser postgres 127.0.0.1(35442) SELECT
postgres 20273 20260 0 May18? 00:00:00 postgres: pg12: bgworker: parat 00:00:00 postgres: pg12: bgworker: parallel worker for PID 20262
postgres 20274 20260 0 May18? 00:00:00 postgres: pg12: bgworker: parat 00:00:00 postgres: pg12: bgworker: parallel worker for PID 20262
[postgres@postgres_zabbix ~]$

不进行详细的代码分析了,大致场景为查询进程事务回滚,在等待并行进程退出并返回结果。也就是目前是进程间通信状态,因此等待事件的类型为IPC,事件就是Bgworkershutdown

3.PG_WAIT_ACTIVITY 类型的CheckpointerMain事件
4.PG_WAIT_CLIENT 类型的ClientRead 事件

示例3和4请参考文章最后提到的另一篇博文

三、总结

通过以上的例子可以发现,PostgreSQL的等待事件,其实就是根据各种使用场景,自定义事件和类型,最终的“等待”基本是通过封装epoll、 futex等系统接口实现的。

特别是epoll的使用场景最多,了解epoll相关可以参考之前记录的一篇博文:《PostgreSQL中的io多路复用–select和epoll实现》

  • 10
    点赞
  • 21
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值