前言
LightDB是基于PostgreSQL的分布式关系型数据库,100%兼容PostgreSQL。
锁是实现数据库并发控制必不可少的功能,LightDBP数据库通过其特有的多版本属性实现了MVCC,实现了读不阻塞写,写不阻塞读。但是经常会出现一些异常的情况,比如有些慢SQL,可能它被阻塞了,无法获取资源,也就是无法获得到锁。
SQL被阻塞
如何分析哪些SQL被阻塞,无法执行了,可以通过查询获悉
SELECT
wait_locks.locktype lock_type,
wait_locks.pid AS wait_pid,
wait_activity.usename AS wait_user,
wait_activity.application_name AS wait_application,
wait_activity.query AS wait_query,
hold_locks.pid AS hold_pid
FROM
pg_catalog.pg_locks wait_locks
JOIN pg_catalog.pg_stat_activity wait_activity ON wait_activity.pid = wait_locks.pid
JOIN pg_catalog.pg_locks hold_locks ON hold_locks.locktype = wait_locks.locktype
AND hold_locks.DATABASE IS NOT DISTINCT
FROM
wait_locks.DATABASE
AND hold_locks.relation IS NOT DISTINCT
FROM
wait_locks.relation
AND hold_locks.page IS NOT DISTINCT
FROM
wait_locks.page
AND hold_locks.tuple IS NOT DISTINCT
FROM
wait_locks.tuple
AND hold_locks.virtualxid IS NOT DISTINCT
FROM
wait_locks.virtualxid
AND hold_locks.transactionid IS NOT DISTINCT
FROM
wait_locks.transactionid
AND hold_locks.classid IS NOT DISTINCT
FROM
wait_locks.classid
AND hold_locks.objid IS NOT DISTINCT
FROM
wait_locks.objid
AND hold_locks.objsubid IS NOT DISTINCT
FROM
wait_locks.objsubid
AND hold_locks.pid != wait_locks.pid
JOIN pg_catalog.pg_stat_activity hold_activity ON hold_activity.pid = hold_locks.pid
WHERE
NOT wait_locks.GRANTED;
谁阻塞SQL
通过上文知道哪些SQL被阻塞,无法获取锁,那阻塞它的是谁了?真正获取到锁的又是哪些了?如何去消除阻塞?
通过上文可以知道被阻塞SQL的pid,这里称为wait_pid,通过wait_pid去查询哪些SQL阻塞它了。
WITH t_wait AS (
SELECT A
.MODE,
A.locktype,
A.DATABASE,
A.relation,
A.page,
A.tuple,
A.classid,
A.GRANTED,
A.objid,
A.objsubid,
A.pid,
A.virtualtransaction,
A.virtualxid,
A.transactionid,
A.fastpath,
b.STATE,
b.query,
b.xact_start,
b.query_start,
b.usename,
b.datname,
b.client_addr,
b.client_port,
b.application_name
FROM
pg_locks A,
pg_stat_activity b
WHERE
A.pid = b.pid
AND NOT A.GRANTED
),
t_run AS (
SELECT A
.MODE,
A.locktype,
A.DATABASE,
A.relation,
A.page,
A.tuple,
A.classid,
A.GRANTED,
A.objid,
A.objsubid,
A.pid,
A.virtualtransaction,
A.virtualxid,
A.transactionid,
A.fastpath,
b.STATE,
b.query,
b.xact_start,
b.query_start,
b.usename,
b.datname,
b.client_addr,
b.client_port,
b.application_name
FROM
pg_locks A,
pg_stat_activity b
WHERE
A.pid = b.pid
AND A.GRANTED
),
t_overlap AS (
SELECT
r.*
FROM
t_wait w
JOIN t_run r ON (
r.locktype IS NOT DISTINCT
FROM
w.locktype
AND r.DATABASE IS NOT DISTINCT
FROM
w.DATABASE
AND r.relation IS NOT DISTINCT
FROM
w.relation
AND r.page IS NOT DISTINCT
FROM
w.page
AND r.tuple IS NOT DISTINCT
FROM
w.tuple
AND r.virtualxid IS NOT DISTINCT
FROM
w.virtualxid
AND r.transactionid IS NOT DISTINCT
FROM
w.transactionid
AND r.classid IS NOT DISTINCT
FROM
w.classid
AND r.objid IS NOT DISTINCT
FROM
w.objid
AND r.objsubid IS NOT DISTINCT
FROM
w.objsubid
AND r.pid != w.pid AND w.pid = #{wait_pid}
)
)SELECT
string_agg (
'Pid: ' ||
CASE
WHEN pid IS NULL THEN
'NULL' ELSE pid :: TEXT
END || chr( 10 ) || 'Lock_Granted: ' ||
CASE
WHEN GRANTED IS NULL THEN
'NULL' ELSE GRANTED :: TEXT
END || ' , Mode: ' ||
CASE
WHEN MODE IS NULL THEN
'NULL' ELSE MODE :: TEXT
END || ' , FastPath: ' ||
CASE
WHEN fastpath IS NULL THEN
'NULL' ELSE fastpath :: TEXT
END || ' , VirtualTransaction: ' ||
CASE
WHEN virtualtransaction IS NULL THEN
'NULL' ELSE virtualtransaction :: TEXT
END || ' , Session_State: ' ||
CASE
WHEN STATE IS NULL THEN
'NULL' ELSE STATE :: TEXT
END || chr( 10 ) || 'Username: ' ||
CASE
WHEN usename IS NULL THEN
'NULL' ELSE usename :: TEXT
END || ' , Database: ' ||
CASE
WHEN datname IS NULL THEN
'NULL' ELSE datname :: TEXT
END || ' , Client_Addr: ' ||
CASE
WHEN client_addr IS NULL THEN
'NULL' ELSE client_addr :: TEXT
END || ' , Client_Port: ' ||
CASE
WHEN client_port IS NULL THEN
'NULL' ELSE client_port :: TEXT
END || ' , Application_Name: ' ||
CASE
WHEN application_name IS NULL THEN
'NULL' ELSE application_name :: TEXT
END || chr( 10 ) || 'Xact_Start: ' ||
CASE
WHEN xact_start IS NULL THEN
'NULL' ELSE xact_start :: TEXT
END || ' , Query_Start: ' ||
CASE
WHEN query_start IS NULL THEN
'NULL' ELSE query_start :: TEXT
END || ' , Xact_Elapse: ' ||
CASE
WHEN ( now( ) - xact_start ) IS NULL THEN
'NULL' ELSE ( now( ) - xact_start ) :: TEXT
END || ' , Query_Elapse: ' ||
CASE
WHEN ( now( ) - query_start ) IS NULL THEN
'NULL' ELSE ( now( ) - query_start ) :: TEXT
END || chr( 10 ) || 'SQL (Current SQL in Transaction): ' || chr( 10 ) ||
CASE
WHEN query IS NULL THEN
'NULL' ELSE query :: TEXT
END,
chr( 10 ) || '--------' || chr( 10 )
ORDER BY
(
CASE
MODE
WHEN 'INVALID' THEN
0
WHEN 'AccessShareLock' THEN
1
WHEN 'RowShareLock' THEN
2
WHEN 'RowExclusiveLock' THEN
3
WHEN 'ShareUpdateExclusiveLock' THEN
4
WHEN 'ShareLock' THEN
5
WHEN 'ShareRowExclusiveLock' THEN
6
WHEN 'ExclusiveLock' THEN
7
WHEN 'AccessExclusiveLock' THEN
8 ELSE 0
END
) DESC,
( CASE WHEN GRANTED THEN 0 ELSE 1 END )
) AS lock_conflict
FROM
t_overlap
其实可以通过pg_blocking_pids 函数来知悉哪些pid(称为pids)阻塞wait_pid,这些pids获取到了资源,也就是获取到了锁,没有及时释放锁,才是造成阻塞的元凶。
简单模拟一下阻塞情况
--准备表,创建数据
CREATE table locktest (id int4,name VARCHAR);
INSERT into locktest(id,name) VALUES(1,'3');
--Session1
BEGIN
SELECT * FROM locktest LIMIT 1
select pg_backend_pid();
--end(不执行)
--Session2
BEGIN
select pg_backend_pid();
truncate locktest;
--end(不执行)
实验可以看出Session2 执行 truncate 语句就会被一直阻塞,无法运行成功。
参考
PostgreSQL: Documentation: 14: 52.74. pg_locks
PostgreSQL: Documentation: 14: 28.2. The Statistics Collector