Postgres2015全国用户大会将于11月20至21日在北京丽亭华苑酒店召开。本次大会嘉宾阵容强大,国内顶级PostgreSQL数据库专家将悉数到场,并特邀欧洲、俄罗斯、日本、美国等国家和地区的数据库方面专家助阵:
- Postgres-XC项目的发起人铃木市一(SUZUKI Koichi)
- Postgres-XL的项目发起人Mason Sharp
- pgpool的作者石井达夫(Tatsuo Ishii)
- PG-Strom的作者海外浩平(Kaigai Kohei)
- Greenplum研发总监姚延栋
- 周正中(德哥), PostgreSQL中国用户会创始人之一
- 汪洋,平安科技数据库技术部经理
- ……
|
我们前面分析了checkpoint的原理,并且对整个过程checkpoint了跟踪,但是并未发现到底是什么造成tps的下降。
本文主要针对PostgreSQL的lwlock重点跟踪一下:
src/backend/storage/lmgr/lwlock.c
TRACE_POSTGRESQL_LWLOCK_WAIT_START(T_NAME(l), T_ID(l), mode);
for (;;){/* "false" means cannot accept cancel/die interrupt here. */PGSemaphoreLock(&proc->sem, false);if (!proc->lwWaiting)break;extraWaits++;}
TRACE_POSTGRESQL_LWLOCK_WAIT_DONE(T_NAME(l), T_ID(l), mode);
src/include/storage/lwlock.h
typedef enum LWLockMode{LW_EXCLUSIVE,LW_SHARED,LW_WAIT_UNTIL_FREE /* A special mode used in PGPROC->lwlockMode,* when waiting for lock to become free. Not* to be used as LWLockAcquire argument */} LWLockMode;
lwlock-wait-start | (char *, int, LWLockMode) | Probe that fires when an LWLock was not immediately available and a server process has begun to wait for the lock to become available. arg0 is the LWLock's tranche. arg1 is the LWLock's offset within its tranche. arg2 is the requested lock mode, either exclusive or shared. |
lwlock-wait-done | (char *, int, LWLockMode) | Probe that fires when a server process has been released from its wait for an LWLock (it does not actually have the lock yet). arg0 is the LWLock's tranche. arg1 is the LWLock's offset within its tranche. arg2 is the requested lock mode, either exclusive or shared. |
$ vi test.sql\setrandom id 1 50000000update tbl set info=now(),crt_time=now() where id=:id;$ pgbench -M prepared -n -r -f ./test.sql -c 3 -j 3 -P 2 -T 100000000progress: 776.0 s, 13169.7 tps, lat 0.226 ms stddev 0.063progress: 778.0 s, 7930.0 tps, lat 0.377 ms stddev 0.096progress: 780.0 s, 7660.1 tps, lat 0.390 ms stddev 0.108progress: 782.0 s, 8118.8 tps, lat 0.368 ms stddev 0.098progress: 784.0 s, 8576.5 tps, lat 0.348 ms stddev 0.093progress: 786.0 s, 9251.4 tps, lat 0.323 ms stddev 0.085progress: 788.0 s, 9812.2 tps, lat 0.304 ms stddev 0.078progress: 790.0 s, 10036.8 tps, lat 0.297 ms stddev 0.074progress: 792.0 s, 10359.1 tps, lat 0.288 ms stddev 0.081progress: 794.0 s, 10675.3 tps, lat 0.280 ms stddev 0.077progress: 796.0 s, 10904.1 tps, lat 0.274 ms stddev 0.071progress: 798.0 s, 11470.2 tps, lat 0.260 ms stddev 0.076progress: 800.0 s, 11909.3 tps, lat 0.250 ms stddev 0.074progress: 802.0 s, 12297.4 tps, lat 0.243 ms stddev 0.073progress: 804.0 s, 12463.8 tps, lat 0.239 ms stddev 0.075progress: 806.0 s, 12513.3 tps, lat 0.238 ms stddev 0.076progress: 808.0 s, 12669.9 tps, lat 0.235 ms stddev 0.068
以13000为TPS,一次请求需要230786.9微秒.
1000000*(1/(13000/3))
digoal=# select pid,query from pg_stat_activity;pid | query------+-------------------------------------------------------4249 | update tbl set info=now(),crt_time=now() where id=$1;5527 | autovacuum: VACUUM ANALYZE digoal.tbl4250 | update tbl set info=now(),crt_time=now() where id=$1;4251 | update tbl set info=now(),crt_time=now() where id=$1;4134 | select pid,query from pg_stat_activity;(5 rows)ps -ewf|grep 1484postgres 1484 30863 4 12:35 ? 00:02:17 postgres: wal writer process
跟踪这几个进程的锁等待情况。
# stap -DMAXSKIPPED=100000 -v 11111 -e 'global s_var, e_var, stat_var;
/* probe lwlock__wait__start(const char *, int, LWLockMode); */probe process("/opt/pgsql/bin/postgres").mark("lwlock__wait__start") {s_var[pid()] = gettimeofday_us()}
/* probe lwlock__wait__done(const char *, int, LWLockMode); */probe process("/opt/pgsql/bin/postgres").mark("lwlock__wait__start") {e_var[pid()] = gettimeofday_us()if ( s_var[pid()] > 0 && ((pid() >= 4249 && pid() <= 4251) || pid() ==1484 ) )stat_var[pid()] <<< e_var[pid()] - s_var[pid()]}
probe timer.s(1) {foreach ([v1] in stat_var +) {if ( @count(stat_var[v1]) >0 ) {printf("pid: %d, min: %d, max: %d, avg: %d, sum: %d, count: %d\n", v1, @min(stat_var[v1]), @max(stat_var[v1]), @avg(stat_var[v1]), @sum(stat_var[v1]), @count(stat_var[v1]))}}printf("----------------------------------end-----------------------------\n")delete s_vardelete e_vardelete stat_var}'
当检查点发生时,WAL writer process出现了比较多的lwlock等待。
1484是wal writer process,其他几个是数据库测试进程。
从count列可以看出,检查点发生后,数据库测试进程的等待次数变少了。
从avg来看,等待的平均时间发生了细微的变化,大概增加了1微秒的平均时间,这个等待并不足以造成性能下降。
因为正常的一次请求需要用掉
230786.9微秒。
pid: 1484, min: 0, max: 3, avg: 1, sum: 25, count: 19pid: 4249, min: 0, max: 2, avg: 0, sum: 161, count: 214pid: 4251, min: 0, max: 3, avg: 0, sum: 181, count: 227pid: 4250, min: 0, max: 2, avg: 0, sum: 181, count: 232----------------------------------end-----------------------------pid: 4249, min: 0, max: 3, avg: 0, sum: 134, count: 150pid: 4250, min: 0, max: 5, avg: 1, sum: 167, count: 157pid: 4251, min: 0, max: 3, avg: 0, sum: 145, count: 168pid: 1484, min: 0, max: 3, avg: 0, sum: 142, count: 255----------------------------------end-----------------------------pid: 4250, min: 0, max: 6, avg: 2, sum: 187, count: 75pid: 4251, min: 0, max: 3, avg: 1, sum: 88, count: 83pid: 4249, min: 0, max: 3, avg: 0, sum: 102, count: 107pid: 1484, min: 0, max: 6, avg: 0, sum: 302, count: 467----------------------------------end-----------------------------pid: 4250, min: 0, max: 6, avg: 2, sum: 198, count: 84pid: 4251, min: 0, max: 4, avg: 1, sum: 124, count: 102pid: 4249, min: 0, max: 2, avg: 0, sum: 98, count: 108pid: 1484, min: 0, max: 4, avg: 0, sum: 380, count: 633----------------------------------end-----------------------------pid: 4250, min: 0, max: 6, avg: 2, sum: 197, count: 77pid: 4251, min: 0, max: 4, avg: 1, sum: 106, count: 102pid: 4249, min: 0, max: 2, avg: 0, sum: 101, count: 111pid: 1484, min: 0, max: 4, avg: 0, sum: 396, count: 719----------------------------------end-----------------------------pid: 4249, min: 0, max: 4, avg: 1, sum: 88, count: 85pid: 4251, min: 0, max: 6, avg: 1, sum: 102, count: 91pid: 4250, min: 0, max: 6, avg: 2, sum: 221, count: 108pid: 1484, min: 0, max: 4, avg: 0, sum: 360, count: 705----------------------------------end-----------------------------pid: 4250, min: 0, max: 7, avg: 2, sum: 211, count: 93pid: 4249, min: 0, max: 13, avg: 1, sum: 152, count: 124pid: 4251, min: 0, max: 4, avg: 1, sum: 132, count: 130pid: 1484, min: 0, max: 5, avg: 0, sum: 419, count: 730----------------------------------end-----------------------------
从另一个方面来观察,看看检查点发生时单次请求,PG所有函数的处理时间如何?
postgres 3167 2767 93 13:59 ? 00:00:47 postgres: digoal digoal [local] UPDATE
但是函数量太大,会死机没法跟踪,即使不死机,也不合理,因为正常1毫秒以内的请求,使用STAP跟踪后,额外开销比较大,会上升到100多毫秒。
# stap -v 11111 -x 3167 -e 'global f_start[999999],f_stop[999999]
probe process("/opt/pgsql/bin/postgres").function("*@/opt/soft_bak/postgresql-9.4.1/src/*").call {f_start[execname(), pid(), tid(), cpu()] = gettimeofday_ns()}
probe process("/opt/pgsql/bin/postgres").function("*@/opt/soft_bak/postgresql-9.4.1/src/*").return {t=gettimeofday_ns()a=execname()b=cpu()c=pid()d=pp()e=tid()if (f_start[a,c,e,b]) {f_stop[a,d] <<< t - f_start[a,c,e,b]}}
probe timer.s(1) {foreach ([a,d] in f_stop @sum - limit 10 ) {printf("avg_ns:%d, sum_ns:%d, cnt:%d, execname:%s, pp:%s\n", @avg(f_stop[a,d]), @sum(f_stop[a,d]), @count(f_stop[a,d]), a, d)}delete f_startdelete f_stop}'
例如:
开启stap前digoal=> update tbl set info=now() where id=1;UPDATE 1Time: 1.129 ms开启stap后digoal=> update tbl set info=now() where id=1;UPDATE 1Time: 167.001 ms
可以缩小
function("*@/opt/soft_bak/postgresql-9.4.1/src/*")的范围,再跟踪。
例如我们首先怀疑是锁的问题,那么可以缩小范围到
function("*@/opt/soft_bak/postgresql-9.4.1/src/
backend/storage/lmgr/
*")
# stap -v 11111 -x 3167 -e 'global f_start[999999],f_stop[999999]
probe process("/opt/pgsql/bin/postgres").function("*@/opt/soft_bak/postgresql-9.4.1/src/backend/storage/lmgr/*").call {f_start[execname(), pid(), tid(), cpu()] = gettimeofday_ns()}
probe process("/opt/pgsql/bin/postgres").function("*@/opt/soft_bak/postgresql-9.4.1/src/backend/storage/lmgr/*").return {t=gettimeofday_ns()a=execname()b=cpu()c=pid()d=pp()e=tid()if (f_start[a,c,e,b]) {f_stop[a,d] <<< t - f_start[a,c,e,b]}}
probe timer.s(1) {foreach ([a,d] in f_stop @sum - limit 10 ) {printf("avg_ns:%d, sum_ns:%d, cnt:%d, execname:%s, pp:%s\n", @avg(f_stop[a,d]), @sum(f_stop[a,d]), @count(f_stop[a,d]), a, d)}printf("-------------------------end--------------------------\n")delete f_startdelete f_stop}'
结果截取:
-------------------------end--------------------------
avg_ns:3108, sum_ns:80113700, cnt:25772, execname:postgres, pp:process("/opt/pgsql9.4.1/bin/postgres").function("LWLockRelease@/opt/soft_bak/postgresql-9.4.1/src/backend/storage/lmgr/lwlock.c:1118").return
avg_ns:3237, sum_ns:74622113, cnt:23051, execname:postgres, pp:process("/opt/pgsql9.4.1/bin/postgres").function("LWLockAcquire@/opt/soft_bak/postgresql-9.4.1/src/backend/storage/lmgr/lwlock.c:465").return
avg_ns:10610, sum_ns:37179631, cnt:3504, execname:postgres, pp:process("/opt/pgsql9.4.1/bin/postgres").function("LockRelationOid@/opt/soft_bak/postgresql-9.4.1/src/backend/storage/lmgr/lmgr.c:87").return
avg_ns:7832, sum_ns:34298340, cnt:4379, execname:postgres, pp:process("/opt/pgsql9.4.1/bin/postgres").function("LockAcquire@/opt/soft_bak/postgresql-9.4.1/src/backend/storage/lmgr/lock.c:672").return
-------------------------end-------------------------- 发生检查点后,请求量变少了.
avg_ns:3122, sum_ns:70382918, cnt:22539, execname:postgres, pp:process("/opt/pgsql9.4.1/bin/postgres").function("LWLockRelease@/opt/soft_bak/postgresql-9.4.1/src/backend/storage/lmgr/lwlock.c:1118").return
avg_ns:3302, sum_ns:66548991, cnt:20152, execname:postgres, pp:process("/opt/pgsql9.4.1/bin/postgres").function("LWLockAcquire@/opt/soft_bak/postgresql-9.4.1/src/backend/storage/lmgr/lwlock.c:465").return
avg_ns:10591, sum_ns:32493427, cnt:3068, execname:postgres, pp:process("/opt/pgsql9.4.1/bin/postgres").function("LockRelationOid@/opt/soft_bak/postgresql-9.4.1/src/backend/storage/lmgr/lmgr.c:87").return
avg_ns:7832, sum_ns:30037745, cnt:3835, execname:postgres, pp:process("/opt/pgsql9.4.1/bin/postgres").function("LockAcquire@/opt/soft_bak/postgresql-9.4.1/src/backend/storage/lmgr/lock.c:672").return
-------------------------end--------------------------
每秒需要获得和释放几百万次的LWLock。排名前4的耗时较大。
当发生检查点时,请求量变少了,和前一篇跟踪LWLock等待的结果是一样的,请求量变少。
看样子也不是这个原因导致的性能下降。
原因还是回到代码,从第四篇的跟踪来看,性能下降出现在FLUSH BUFFER的循环中,也就是从这里开始:
TRACE_POSTGRESQL_BUFFER_SYNC_START(NBuffers, num_to_write);
/** Loop over all buffers again, and write the ones (still) marked with* BM_CHECKPOINT_NEEDED. In this loop, we start at the clock sweep point* since we might as well dump soon-to-be-recycled buffers first.** Note that we don't read the buffer alloc count here --- that should be* left untouched till the next BgBufferSync() call.*/buf_id = StrategySyncStart(NULL, NULL);num_to_scan = NBuffers;num_written = 0;while (num_to_scan-- > 0){......
StrategySyncStart是指根据LRU算法,让checkpointer从
nextVictimBuffer
开始sync。
/** StrategySyncStart -- tell BufferSync where to start syncing** The result is the buffer index of the best buffer to sync first.* BufferSync() will proceed circularly around the buffer array from there.** In addition, we return the completed-pass count (which is effectively* the higher-order bits of nextVictimBuffer) and the count of recent buffer* allocs if non-NULL pointers are passed. The alloc count is reset after* being read.*/intStrategySyncStart(uint32 *complete_passes, uint32 *num_buf_alloc){int result;
LWLockAcquire(BufFreelistLock, LW_EXCLUSIVE);result = StrategyControl->nextVictimBuffer;if (complete_passes)*complete_passes = StrategyControl->completePasses;if (num_buf_alloc){*num_buf_alloc = StrategyControl->numBufferAllocs;StrategyControl->numBufferAllocs = 0;}LWLockRelease(BufFreelistLock);return result;}
但是对于我们的测试用例来说,实际上几乎所有的dirty buffer都是热的。从哪里开始都一样。
所以,性能下降与这个并无关联:
1. sync buffer时,可能有短暂的锁冲突,但是,从我们前面LWLock和再前面的跟踪结果来看影响极小,几乎可以忽略。
2. 检查点会将脏块同步到磁盘,然后脏块会变成非脏块,但是它依旧还在shared buffer中,所以紧接着的操作也不需要物理读(md read)。
$ pgbench -M prepared -n -r -f ./test.sql -P 1 -c 26 -j 26 -T 10000000progress: 1.0 s, 13550.0 tps, lat 1.891 ms stddev 3.474progress: 2.0 s, 15505.7 tps, lat 1.655 ms stddev 3.549progress: 3.0 s, 18030.0 tps, lat 1.443 ms stddev 3.210progress: 4.0 s, 20844.0 tps, lat 1.258 ms stddev 2.867progress: 5.0 s, 25317.6 tps, lat 1.025 ms stddev 2.127progress: 6.0 s, 31146.7 tps, lat 0.833 ms stddev 1.702progress: 7.0 s, 40137.8 tps, lat 0.646 ms stddev 0.975progress: 8.0 s, 51564.9 tps, lat 0.502 ms stddev 0.368progress: 9.0 s, 53915.6 tps, lat 0.480 ms stddev 0.311progress: 10.0 s, 56830.9 tps, lat 0.456 ms stddev 0.281progress: 11.0 s, 57333.8 tps, lat 0.452 ms stddev 0.283progress: 12.0 s, 58938.8 tps, lat 0.439 ms stddev 0.270progress: 13.0 s, 59608.3 tps, lat 0.434 ms stddev 0.265progress: 14.0 s, 59014.8 tps, lat 0.439 ms stddev 0.281progress: 15.0 s, 60241.9 tps, lat 0.430 ms stddev 0.259progress: 16.0 s, 61337.1 tps, lat 0.422 ms stddev 0.248progress: 17.0 s, 60821.9 tps, lat 0.426 ms stddev 0.259progress: 18.0 s, 60850.2 tps, lat 0.425 ms stddev 0.265
我们可以来跟踪一下物理读和物理写,你就可以看到,检查点发生后,其实并没有物理读。
smgr-md-read-start | (ForkNumber, BlockNumber, Oid, Oid, Oid, int) | Probe that fires when beginning to read a block from a relation. arg0 and arg1 contain the fork and block numbers of the page. arg2, arg3, and arg4 contain the tablespace, database, and relation OIDs identifying the relation. arg5 is the ID of the backend which created the temporary relation for a local buffer, or InvalidBackendId (-1) for a shared buffer. |
smgr-md-read-done | (ForkNumber, BlockNumber, Oid, Oid, Oid, int, int, int) | Probe that fires when a block read is complete. arg0 and arg1 contain the fork and block numbers of the page. arg2, arg3, and arg4 contain the tablespace, database, and relation OIDs identifying the relation. arg5 is the ID of the backend which created the temporary relation for a local buffer, or InvalidBackendId (-1) for a shared buffer. arg6 is the number of bytes actually read, while arg7 is the number requested (if these are different it indicates trouble). |
src
/** ReadBuffer_common -- common logic for all ReadBuffer variants** *hit is set to true if the request was satisfied from shared buffer cache.*/static BufferReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,BlockNumber blockNum, ReadBufferMode mode,BufferAccessStrategy strategy, bool *hit){......probe buffer__read__start(ForkNumber, BlockNumber, Oid, Oid, Oid, int, bool);probe buffer__read__done(ForkNumber, BlockNumber, Oid, Oid, Oid, int, bool, bool);
跟踪读写:
# stap -DMAXSKIPPED=100000 -v 11111 -e 'global s_var, e_var, stat_var;
/* probe smgr__md__read__start(ForkNumber, BlockNumber, Oid, Oid, Oid, int); */probe process("/opt/pgsql/bin/postgres").mark("smgr__md__read__start") {s_var[pid(),1] = gettimeofday_us()}
/* probe smgr__md__read__done(ForkNumber, BlockNumber, Oid, Oid, Oid, int, int, int); */probe process("/opt/pgsql/bin/postgres").mark("smgr__md__read__done") {e_var[pid(),1] = gettimeofday_us()if ( s_var[pid(),1] > 0 )stat_var[pid(),1] <<< e_var[pid(),1] - s_var[pid(),1]}
/* probe buffer__write__start(ForkNumber, BlockNumber, Oid, Oid, Oid, int, bool); */probe process("/opt/pgsql/bin/postgres").mark("buffer__write__start") {s_var[pid(),2] = gettimeofday_us()}
/* probe buffer__write__done(ForkNumber, BlockNumber, Oid, Oid, Oid, int, bool, bool); */probe process("/opt/pgsql/bin/postgres").mark("buffer__write__done") {e_var[pid(),2] = gettimeofday_us()if ( s_var[pid(),2] > 0 )stat_var[pid(),2] <<< e_var[pid(),2] - s_var[pid(),2]}
probe timer.s(1) {foreach ([v1,v2] in stat_var +) {if ( @count(stat_var[v1,v2]) >0 ) {printf("pid: %d, what: %d, min: %d, max: %d, avg: %d, sum: %d, count: %d\n", v1, v2, @min(stat_var[v1,v2]), @max(stat_var[v1,v2]), @avg(stat_var[v1,v2]), @sum(stat_var[v1,v2]), @count(stat_var[v1,v2]))}}printf("----------------------------------end-----------------------------\n")delete s_vardelete e_vardelete stat_var}'
开启压力测试:
$ pgbench -M prepared -n -r -f ./test.sql -P 1 -c 26 -j 26 -T 10000000......progress: 56.0 s, 60813.0 tps, lat 0.426 ms stddev 0.271progress: 57.0 s, 59748.9 tps, lat 0.433 ms stddev 0.390progress: 58.0 s, 61135.6 tps, lat 0.423 ms stddev 0.398progress: 59.0 s, 64174.6 tps, lat 0.403 ms stddev 0.333progress: 60.0 s, 59503.6 tps, lat 0.435 ms stddev 0.282progress: 61.0 s, 60662.4 tps, lat 0.427 ms stddev 0.271progress: 62.0 s, 60582.2 tps, lat 0.427 ms stddev 0.274progress: 63.0 s, 61523.6 tps, lat 0.421 ms stddev 0.258progress: 64.0 s, 62394.7 tps, lat 0.415 ms stddev 0.251progress: 65.0 s, 60892.6 tps, lat 0.425 ms stddev 0.277progress: 66.0 s, 60703.2 tps, lat 0.426 ms stddev 0.280progress: 67.0 s, 61629.9 tps, lat 0.420 ms stddev 0.261progress: 68.0 s, 61231.5 tps, lat 0.423 ms stddev 0.270
当tps进入顶峰,现在所有的热块都在内存中,可以看到现在没有物理读了。
----------------------------------end---------------------------------------------------------------end---------------------------------------------------------------end---------------------------------------------------------------end---------------------------------------------------------------end---------------------------------------------------------------end-----------------------------
执行检查点:
progress: 20.0 s, 60582.5 tps, lat 0.427 ms stddev 0.265progress: 21.0 s, 40974.3 tps, lat 0.624 ms stddev 1.782progress: 22.0 s, 14963.3 tps, lat 1.754 ms stddev 4.243progress: 23.0 s, 16382.7 tps, lat 1.577 ms stddev 4.138progress: 24.0 s, 20179.2 tps, lat 1.286 ms stddev 2.839progress: 25.0 s, 23749.2 tps, lat 1.101 ms stddev 2.816progress: 26.0 s, 27464.7 tps, lat 0.945 ms stddev 2.581progress: 27.0 s, 33850.6 tps, lat 0.764 ms stddev 2.011progress: 28.0 s, 43315.9 tps, lat 0.600 ms stddev 1.134progress: 29.0 s, 50216.6 tps, lat 0.516 ms stddev 0.492progress: 30.0 s, 52857.0 tps, lat 0.490 ms stddev 0.323progress: 31.0 s, 53428.6 tps, lat 0.485 ms stddev 0.322progress: 32.0 s, 54710.5 tps, lat 0.473 ms stddev 0.303progress: 33.0 s, 57824.2 tps, lat 0.448 ms stddev 0.289progress: 34.0 s, 59573.5 tps, lat 0.435 ms stddev 0.273progress: 35.0 s, 58162.8 tps, lat 0.445 ms stddev 0.413progress: 36.0 s, 57951.4 tps, lat 0.447 ms stddev 0.435
跟踪结果,我们只看到checkpointer在write buffer,并没有看到pgbench的连接产生物理读(md read):
----------------------------------end-----------------------------pid: 18027, what: 2, min: 42, max: 101, avg: 49, sum: 57592, count: 1154----------------------------------end-----------------------------pid: 18027, what: 2, min: 42, max: 124, avg: 51, sum: 520545, count: 10079----------------------------------end-----------------------------pid: 18027, what: 2, min: 44, max: 191, avg: 65, sum: 464196, count: 7139----------------------------------end-----------------------------pid: 18027, what: 2, min: 42, max: 145, avg: 53, sum: 467482, count: 8799----------------------------------end-----------------------------pid: 18027, what: 2, min: 42, max: 131, avg: 53, sum: 447230, count: 8366----------------------------------end-----------------------------pid: 18027, what: 2, min: 37, max: 175, avg: 57, sum: 429778, count: 7465----------------------------------end-----------------------------pid: 18027, what: 2, min: 42, max: 10551, avg: 65, sum: 331686, count: 5065----------------------------------end-----------------------------pid: 18027, what: 2, min: 41, max: 158, avg: 61, sum: 397299, count: 6429----------------------------------end-----------------------------pid: 18027, what: 2, min: 29, max: 1173, avg: 61, sum: 672717, count: 10976----------------------------------end-----------------------------pid: 18027, what: 2, min: 26, max: 11093, avg: 58, sum: 933458, count: 16027----------------------------------end-----------------------------pid: 18027, what: 2, min: 27, max: 448, avg: 56, sum: 937812, count: 16633----------------------------------end-----------------------------pid: 18027, what: 2, min: 27, max: 112, avg: 55, sum: 937420, count: 16996----------------------------------end-----------------------------pid: 18027, what: 2, min: 27, max: 98, avg: 54, sum: 431355, count: 7901----------------------------------end-----------------------------
到目前为止,只能说,LWLOCK,WRITE BUFFER都没有对性能造成影响,我们并没有找到执行checkpoint时,带来的性能下降的真正原因。
下一篇将揭晓答案。
[参考]