PostgreSQL Why checkpointer impact performance so much ? - 5

Postgres2015全国用户大会将于11月20至21日在北京丽亭华苑酒店召开。本次大会嘉宾阵容强大,国内顶级PostgreSQL数据库专家将悉数到场,并特邀欧洲、俄罗斯、日本、美国等国家和地区的数据库方面专家助阵:

  • Postgres-XC项目的发起人铃木市一(SUZUKI Koichi)
  • Postgres-XL的项目发起人Mason Sharp
  • pgpool的作者石井达夫(Tatsuo Ishii)
  • PG-Strom的作者海外浩平(Kaigai Kohei)
  • Greenplum研发总监姚延栋
  • 周正中(德哥), PostgreSQL中国用户会创始人之一
  • 汪洋,平安科技数据库技术部经理
  • ……


 
  • 2015年度PG大象会报名地址:http://postgres2015.eventdove.com/
  • PostgreSQL中国社区: http://postgres.cn/
  • PostgreSQL专业1群: 3336901(已满)
  • PostgreSQL专业2群: 100910388
  • PostgreSQL专业3群: 150657323



我们前面分析了checkpoint的原理,并且对整个过程checkpoint了跟踪,但是并未发现到底是什么造成tps的下降。
本文主要针对PostgreSQL的lwlock重点跟踪一下:
src/backend/storage/lmgr/lwlock.c

                TRACE_POSTGRESQL_LWLOCK_WAIT_START(T_NAME(l), T_ID(l), mode);

                for (;;)
                {
                        /* "false" means cannot accept cancel/die interrupt here. */
                        PGSemaphoreLock(&proc->sem, false);
                        if (!proc->lwWaiting)
                                break;
                        extraWaits++;
                }

                TRACE_POSTGRESQL_LWLOCK_WAIT_DONE(T_NAME(l), T_ID(l), mode);

src/include/storage/lwlock.h

typedef enum LWLockMode
{
        LW_EXCLUSIVE,
        LW_SHARED,
        LW_WAIT_UNTIL_FREE                      /* A special mode used in PGPROC->lwlockMode,
                                                                 * when waiting for lock to become free. Not
                                                                 * to be used as LWLockAcquire argument */
} LWLockMode;
lwlock-wait-start (char *, int, LWLockMode) Probe that fires when an LWLock was not immediately available and a server process has begun to wait for the lock to become available. arg0 is the LWLock's tranche. arg1 is the LWLock's offset within its tranche. arg2 is the requested lock mode, either exclusive or shared.
lwlock-wait-done (char *, int, LWLockMode) Probe that fires when a server process has been released from its wait for an LWLock (it does not actually have the lock yet). arg0 is the LWLock's tranche. arg1 is the LWLock's offset within its tranche. arg2 is the requested lock mode, either exclusive or shared.

$ vi test.sql
\setrandom id 1 50000000
update tbl set info=now(),crt_time=now() where id=:id;
$ pgbench -M prepared -n -r -f ./test.sql -c 3 -j 3 -P 2 -T 100000000
progress: 776.0 s, 13169.7 tps, lat 0.226 ms stddev 0.063
progress: 778.0 s, 7930.0 tps, lat 0.377 ms stddev 0.096
progress: 780.0 s, 7660.1 tps, lat 0.390 ms stddev 0.108
progress: 782.0 s, 8118.8 tps, lat 0.368 ms stddev 0.098
progress: 784.0 s, 8576.5 tps, lat 0.348 ms stddev 0.093
progress: 786.0 s, 9251.4 tps, lat 0.323 ms stddev 0.085
progress: 788.0 s, 9812.2 tps, lat 0.304 ms stddev 0.078
progress: 790.0 s, 10036.8 tps, lat 0.297 ms stddev 0.074
progress: 792.0 s, 10359.1 tps, lat 0.288 ms stddev 0.081
progress: 794.0 s, 10675.3 tps, lat 0.280 ms stddev 0.077
progress: 796.0 s, 10904.1 tps, lat 0.274 ms stddev 0.071
progress: 798.0 s, 11470.2 tps, lat 0.260 ms stddev 0.076
progress: 800.0 s, 11909.3 tps, lat 0.250 ms stddev 0.074
progress: 802.0 s, 12297.4 tps, lat 0.243 ms stddev 0.073
progress: 804.0 s, 12463.8 tps, lat 0.239 ms stddev 0.075
progress: 806.0 s, 12513.3 tps, lat 0.238 ms stddev 0.076
progress: 808.0 s, 12669.9 tps, lat 0.235 ms stddev 0.068

以13000为TPS,一次请求需要230786.9微秒.
1000000*(1/(13000/3))

digoal=# select pid,query from pg_stat_activity;
 pid  |                         query                         
------+-------------------------------------------------------
 4249 | update tbl set info=now(),crt_time=now() where id=$1;
 5527 | autovacuum: VACUUM ANALYZE digoal.tbl
 4250 | update tbl set info=now(),crt_time=now() where id=$1;
 4251 | update tbl set info=now(),crt_time=now() where id=$1;
 4134 | select pid,query from pg_stat_activity;
(5 rows)
ps -ewf|grep 1484
postgres  1484 30863  4 12:35 ?        00:02:17 postgres: wal writer process 

跟踪这几个进程的锁等待情况。

# stap -DMAXSKIPPED=100000 -v 11111 -e '
global s_var, e_var, stat_var;

/* probe lwlock__wait__start(const char *, int, LWLockMode); */
probe process("/opt/pgsql/bin/postgres").mark("lwlock__wait__start") {
  s_var[pid()] = gettimeofday_us()
}

/* probe lwlock__wait__done(const char *, int, LWLockMode); */
probe process("/opt/pgsql/bin/postgres").mark("lwlock__wait__start") {
  e_var[pid()] = gettimeofday_us()
  if ( s_var[pid()] > 0 && ((pid() >= 4249 && pid() <= 4251) || pid() ==1484 )   )
    stat_var[pid()] <<< e_var[pid()] - s_var[pid()]
}

probe timer.s(1) {
  foreach ([v1] in stat_var +) {
    if ( @count(stat_var[v1]) >0 ) {
      printf("pid: %d, min: %d, max: %d, avg: %d, sum: %d, count: %d\n", v1, @min(stat_var[v1]), @max(stat_var[v1]), @avg(stat_var[v1]), @sum(stat_var[v1]), @count(stat_var[v1]))
    }
  }
  printf("----------------------------------end-----------------------------\n")
  delete s_var
  delete e_var
  delete stat_var
}'

当检查点发生时,WAL writer process出现了比较多的lwlock等待。
1484是wal writer process,其他几个是数据库测试进程。
从count列可以看出,检查点发生后,数据库测试进程的等待次数变少了。
从avg来看,等待的平均时间发生了细微的变化,大概增加了1微秒的平均时间,这个等待并不足以造成性能下降。
因为正常的一次请求需要用掉 230786.9微秒。

pid: 1484, min: 0, max: 3, avg: 1, sum: 25, count: 19
pid: 4249, min: 0, max: 2, avg: 0, sum: 161, count: 214
pid: 4251, min: 0, max: 3, avg: 0, sum: 181, count: 227
pid: 4250, min: 0, max: 2, avg: 0, sum: 181, count: 232
----------------------------------end-----------------------------
pid: 4249, min: 0, max: 3, avg: 0, sum: 134, count: 150
pid: 4250, min: 0, max: 5, avg: 1, sum: 167, count: 157
pid: 4251, min: 0, max: 3, avg: 0, sum: 145, count: 168
pid: 1484, min: 0, max: 3, avg: 0, sum: 142, count: 255
----------------------------------end-----------------------------
pid: 4250, min: 0, max: 6, avg: 2, sum: 187, count: 75
pid: 4251, min: 0, max: 3, avg: 1, sum: 88, count: 83
pid: 4249, min: 0, max: 3, avg: 0, sum: 102, count: 107
pid: 1484, min: 0, max: 6, avg: 0, sum: 302, count: 467
----------------------------------end-----------------------------
pid: 4250, min: 0, max: 6, avg: 2, sum: 198, count: 84
pid: 4251, min: 0, max: 4, avg: 1, sum: 124, count: 102
pid: 4249, min: 0, max: 2, avg: 0, sum: 98, count: 108
pid: 1484, min: 0, max: 4, avg: 0, sum: 380, count: 633
----------------------------------end-----------------------------
pid: 4250, min: 0, max: 6, avg: 2, sum: 197, count: 77
pid: 4251, min: 0, max: 4, avg: 1, sum: 106, count: 102
pid: 4249, min: 0, max: 2, avg: 0, sum: 101, count: 111
pid: 1484, min: 0, max: 4, avg: 0, sum: 396, count: 719
----------------------------------end-----------------------------
pid: 4249, min: 0, max: 4, avg: 1, sum: 88, count: 85
pid: 4251, min: 0, max: 6, avg: 1, sum: 102, count: 91
pid: 4250, min: 0, max: 6, avg: 2, sum: 221, count: 108
pid: 1484, min: 0, max: 4, avg: 0, sum: 360, count: 705
----------------------------------end-----------------------------
pid: 4250, min: 0, max: 7, avg: 2, sum: 211, count: 93
pid: 4249, min: 0, max: 13, avg: 1, sum: 152, count: 124
pid: 4251, min: 0, max: 4, avg: 1, sum: 132, count: 130
pid: 1484, min: 0, max: 5, avg: 0, sum: 419, count: 730
----------------------------------end-----------------------------


从另一个方面来观察,看看检查点发生时单次请求,PG所有函数的处理时间如何?

postgres  3167  2767 93 13:59 ?        00:00:47 postgres: digoal digoal [local] UPDATE

但是函数量太大,会死机没法跟踪,即使不死机,也不合理,因为正常1毫秒以内的请求,使用STAP跟踪后,额外开销比较大,会上升到100多毫秒。

# stap -v 11111 -x 3167 -e '
global f_start[999999],f_stop[999999]

probe process("/opt/pgsql/bin/postgres").function("*@/opt/soft_bak/postgresql-9.4.1/src/*").call { 
  f_start[execname(), pid(), tid(), cpu()] = gettimeofday_ns()
}

probe process("/opt/pgsql/bin/postgres").function("*@/opt/soft_bak/postgresql-9.4.1/src/*").return { 
  t=gettimeofday_ns()
  a=execname()
  b=cpu()
  c=pid()
  d=pp()
  e=tid()
  if (f_start[a,c,e,b]) {
    f_stop[a,d] <<< t - f_start[a,c,e,b]
  }
}

probe timer.s(1) {
  foreach ([a,d] in f_stop @sum - limit 10 ) { 
    printf("avg_ns:%d, sum_ns:%d, cnt:%d, execname:%s, pp:%s\n", @avg(f_stop[a,d]), @sum(f_stop[a,d]), @count(f_stop[a,d]), a, d)
  }
delete f_start
delete f_stop
}'

例如:

开启stap
digoal=> update tbl set info=now() where id=1;
UPDATE 1
Time: 1.129 ms
开启stap
digoal=> update tbl set info=now() where id=1;
UPDATE 1
Time: 167.001 ms


可以缩小 function("*@/opt/soft_bak/postgresql-9.4.1/src/*")的范围,再跟踪。
例如我们首先怀疑是锁的问题,那么可以缩小范围到 function("*@/opt/soft_bak/postgresql-9.4.1/src/ backend/storage/lmgr/ *")
  
  
# stap -v 11111 -x 3167 -e '
global f_start[999999],f_stop[999999]

probe process("/opt/pgsql/bin/postgres").function("*@/opt/soft_bak/postgresql-9.4.1/src/backend/storage/lmgr/*").call { 
  f_start[execname(), pid(), tid(), cpu()] = gettimeofday_ns()
}

probe process("/opt/pgsql/bin/postgres").function("*@/opt/soft_bak/postgresql-9.4.1/src/backend/storage/lmgr/*").return { 
  t=gettimeofday_ns()
  a=execname()
  b=cpu()
  c=pid()
  d=pp()
  e=tid()
  if (f_start[a,c,e,b]) {
    f_stop[a,d] <<< t - f_start[a,c,e,b]
  }
}

probe timer.s(1) {
  foreach ([a,d] in f_stop @sum - limit 10 ) { 
    printf("avg_ns:%d, sum_ns:%d, cnt:%d, execname:%s, pp:%s\n", @avg(f_stop[a,d]), @sum(f_stop[a,d]), @count(f_stop[a,d]), a, d)
  }
printf("-------------------------end--------------------------\n")
delete f_start
delete f_stop
}'

结果截取:

-------------------------end--------------------------
avg_ns:3108, sum_ns:80113700, cnt:25772, execname:postgres, pp:process("/opt/pgsql9.4.1/bin/postgres").function("LWLockRelease@/opt/soft_bak/postgresql-9.4.1/src/backend/storage/lmgr/lwlock.c:1118").return
avg_ns:3237, sum_ns:74622113, cnt:23051, execname:postgres, pp:process("/opt/pgsql9.4.1/bin/postgres").function("LWLockAcquire@/opt/soft_bak/postgresql-9.4.1/src/backend/storage/lmgr/lwlock.c:465").return
avg_ns:10610, sum_ns:37179631, cnt:3504, execname:postgres, pp:process("/opt/pgsql9.4.1/bin/postgres").function("LockRelationOid@/opt/soft_bak/postgresql-9.4.1/src/backend/storage/lmgr/lmgr.c:87").return
avg_ns:7832, sum_ns:34298340, cnt:4379, execname:postgres, pp:process("/opt/pgsql9.4.1/bin/postgres").function("LockAcquire@/opt/soft_bak/postgresql-9.4.1/src/backend/storage/lmgr/lock.c:672").return
-------------------------end-------------------------- 发生检查点后,请求量变少了.
avg_ns:3122, sum_ns:70382918, cnt:22539, execname:postgres, pp:process("/opt/pgsql9.4.1/bin/postgres").function("LWLockRelease@/opt/soft_bak/postgresql-9.4.1/src/backend/storage/lmgr/lwlock.c:1118").return
avg_ns:3302, sum_ns:66548991, cnt:20152, execname:postgres, pp:process("/opt/pgsql9.4.1/bin/postgres").function("LWLockAcquire@/opt/soft_bak/postgresql-9.4.1/src/backend/storage/lmgr/lwlock.c:465").return
avg_ns:10591, sum_ns:32493427, cnt:3068, execname:postgres, pp:process("/opt/pgsql9.4.1/bin/postgres").function("LockRelationOid@/opt/soft_bak/postgresql-9.4.1/src/backend/storage/lmgr/lmgr.c:87").return
avg_ns:7832, sum_ns:30037745, cnt:3835, execname:postgres, pp:process("/opt/pgsql9.4.1/bin/postgres").function("LockAcquire@/opt/soft_bak/postgresql-9.4.1/src/backend/storage/lmgr/lock.c:672").return
-------------------------end--------------------------

每秒需要获得和释放几百万次的LWLock。排名前4的耗时较大。
当发生检查点时,请求量变少了,和前一篇跟踪LWLock等待的结果是一样的,请求量变少。
看样子也不是这个原因导致的性能下降。
原因还是回到代码,从第四篇的跟踪来看,性能下降出现在FLUSH BUFFER的循环中,也就是从这里开始:

        TRACE_POSTGRESQL_BUFFER_SYNC_START(NBuffers, num_to_write);

        /*
         * Loop over all buffers again, and write the ones (still) marked with
         * BM_CHECKPOINT_NEEDED.  In this loop, we start at the clock sweep point
         * since we might as well dump soon-to-be-recycled buffers first.
         *
         * Note that we don't read the buffer alloc count here --- that should be
         * left untouched till the next BgBufferSync() call.
         */
        buf_id = StrategySyncStart(NULL, NULL);
        num_to_scan = NBuffers;
        num_written = 0;
        while (num_to_scan-- > 0)
        {
......


StrategySyncStart是指根据LRU算法,让checkpointer从 nextVictimBuffer 开始sync。

/*
 * StrategySyncStart -- tell BufferSync where to start syncing
 *
 * The result is the buffer index of the best buffer to sync first.
 * BufferSync() will proceed circularly around the buffer array from there.
 *
 * In addition, we return the completed-pass count (which is effectively
 * the higher-order bits of nextVictimBuffer) and the count of recent buffer
 * allocs if non-NULL pointers are passed.  The alloc count is reset after
 * being read.
 */
int
StrategySyncStart(uint32 *complete_passes, uint32 *num_buf_alloc)
{
        int                     result;

        LWLockAcquire(BufFreelistLock, LW_EXCLUSIVE);
        result = StrategyControl->nextVictimBuffer;
        if (complete_passes)
                *complete_passes = StrategyControl->completePasses;
        if (num_buf_alloc)
        {
                *num_buf_alloc = StrategyControl->numBufferAllocs;
                StrategyControl->numBufferAllocs = 0;
        }
        LWLockRelease(BufFreelistLock);
        return result;
}


但是对于我们的测试用例来说,实际上几乎所有的dirty buffer都是热的。从哪里开始都一样。
所以,性能下降与这个并无关联:
1. sync buffer时,可能有短暂的锁冲突,但是,从我们前面LWLock和再前面的跟踪结果来看影响极小,几乎可以忽略。
2. 检查点会将脏块同步到磁盘,然后脏块会变成非脏块,但是它依旧还在shared buffer中,所以紧接着的操作也不需要物理读(md read)。
  
  
$ pgbench -M prepared -n -r -f ./test.sql -P 1 -c 26 -j 26 -T 10000000
progress: 1.0 s, 13550.0 tps, lat 1.891 ms stddev 3.474
progress: 2.0 s, 15505.7 tps, lat 1.655 ms stddev 3.549
progress: 3.0 s, 18030.0 tps, lat 1.443 ms stddev 3.210
progress: 4.0 s, 20844.0 tps, lat 1.258 ms stddev 2.867
progress: 5.0 s, 25317.6 tps, lat 1.025 ms stddev 2.127
progress: 6.0 s, 31146.7 tps, lat 0.833 ms stddev 1.702
progress: 7.0 s, 40137.8 tps, lat 0.646 ms stddev 0.975
progress: 8.0 s, 51564.9 tps, lat 0.502 ms stddev 0.368
progress: 9.0 s, 53915.6 tps, lat 0.480 ms stddev 0.311
progress: 10.0 s, 56830.9 tps, lat 0.456 ms stddev 0.281
progress: 11.0 s, 57333.8 tps, lat 0.452 ms stddev 0.283
progress: 12.0 s, 58938.8 tps, lat 0.439 ms stddev 0.270
progress: 13.0 s, 59608.3 tps, lat 0.434 ms stddev 0.265
progress: 14.0 s, 59014.8 tps, lat 0.439 ms stddev 0.281
progress: 15.0 s, 60241.9 tps, lat 0.430 ms stddev 0.259
progress: 16.0 s, 61337.1 tps, lat 0.422 ms stddev 0.248
progress: 17.0 s, 60821.9 tps, lat 0.426 ms stddev 0.259
progress: 18.0 s, 60850.2 tps, lat 0.425 ms stddev 0.265

我们可以来跟踪一下物理读和物理写,你就可以看到,检查点发生后,其实并没有物理读。
smgr-md-read-start (ForkNumber, BlockNumber, Oid, Oid, Oid, int) Probe that fires when beginning to read a block from a relation. arg0 and arg1 contain the fork and block numbers of the page. arg2, arg3, and arg4 contain the tablespace, database, and relation OIDs identifying the relation. arg5 is the ID of the backend which created the temporary relation for a local buffer, or InvalidBackendId (-1) for a shared buffer.
smgr-md-read-done (ForkNumber, BlockNumber, Oid, Oid, Oid, int, int, int) Probe that fires when a block read is complete. arg0 and arg1 contain the fork and block numbers of the page. arg2, arg3, and arg4 contain the tablespace, database, and relation OIDs identifying the relation. arg5 is the ID of the backend which created the temporary relation for a local buffer, or InvalidBackendId (-1) for a shared buffer. arg6 is the number of bytes actually read, while arg7 is the number requested (if these are different it indicates trouble).
src

/*
 * ReadBuffer_common -- common logic for all ReadBuffer variants
 *
 * *hit is set to true if the request was satisfied from shared buffer cache.
 */
static Buffer
ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
                                  BlockNumber blockNum, ReadBufferMode mode,
                                  BufferAccessStrategy strategy, bool *hit)
{
......
        probe buffer__read__start(ForkNumber, BlockNumber, Oid, Oid, Oid, int, bool);
        probe buffer__read__done(ForkNumber, BlockNumber, Oid, Oid, Oid, int, bool, bool);

跟踪读写:

# stap -DMAXSKIPPED=100000 -v 11111 -e '
global s_var, e_var, stat_var;

/* probe smgr__md__read__start(ForkNumber, BlockNumber, Oid, Oid, Oid, int); */
probe process("/opt/pgsql/bin/postgres").mark("smgr__md__read__start") {
  s_var[pid(),1] = gettimeofday_us()
}

/* probe smgr__md__read__done(ForkNumber, BlockNumber, Oid, Oid, Oid, int, int, int); */
probe process("/opt/pgsql/bin/postgres").mark("smgr__md__read__done") {
  e_var[pid(),1] = gettimeofday_us()
  if ( s_var[pid(),1] > 0 )
    stat_var[pid(),1] <<< e_var[pid(),1] - s_var[pid(),1]
}

/* probe buffer__write__start(ForkNumber, BlockNumber, Oid, Oid, Oid, int, bool); */
probe process("/opt/pgsql/bin/postgres").mark("buffer__write__start") {
  s_var[pid(),2] = gettimeofday_us()
}

/* probe buffer__write__done(ForkNumber, BlockNumber, Oid, Oid, Oid, int, bool, bool); */
probe process("/opt/pgsql/bin/postgres").mark("buffer__write__done") {
  e_var[pid(),2] = gettimeofday_us()
  if ( s_var[pid(),2] > 0 )
    stat_var[pid(),2] <<< e_var[pid(),2] - s_var[pid(),2]
}

probe timer.s(1) {
  foreach ([v1,v2] in stat_var +) {
    if ( @count(stat_var[v1,v2]) >0 ) {
      printf("pid: %d, what: %d, min: %d, max: %d, avg: %d, sum: %d, count: %d\n", v1, v2, @min(stat_var[v1,v2]), @max(stat_var[v1,v2]), @avg(stat_var[v1,v2]), @sum(stat_var[v1,v2]), @count(stat_var[v1,v2]))
    }
  }
  printf("----------------------------------end-----------------------------\n")
  delete s_var
  delete e_var
  delete stat_var
}'

开启压力测试:

$ pgbench -M prepared -n -r -f ./test.sql -P 1 -c 26 -j 26 -T 10000000
......
progress: 56.0 s, 60813.0 tps, lat 0.426 ms stddev 0.271
progress: 57.0 s, 59748.9 tps, lat 0.433 ms stddev 0.390
progress: 58.0 s, 61135.6 tps, lat 0.423 ms stddev 0.398
progress: 59.0 s, 64174.6 tps, lat 0.403 ms stddev 0.333
progress: 60.0 s, 59503.6 tps, lat 0.435 ms stddev 0.282
progress: 61.0 s, 60662.4 tps, lat 0.427 ms stddev 0.271
progress: 62.0 s, 60582.2 tps, lat 0.427 ms stddev 0.274
progress: 63.0 s, 61523.6 tps, lat 0.421 ms stddev 0.258
progress: 64.0 s, 62394.7 tps, lat 0.415 ms stddev 0.251
progress: 65.0 s, 60892.6 tps, lat 0.425 ms stddev 0.277
progress: 66.0 s, 60703.2 tps, lat 0.426 ms stddev 0.280
progress: 67.0 s, 61629.9 tps, lat 0.420 ms stddev 0.261
progress: 68.0 s, 61231.5 tps, lat 0.423 ms stddev 0.270

当tps进入顶峰,现在所有的热块都在内存中,可以看到现在没有物理读了。

----------------------------------end-----------------------------
----------------------------------end-----------------------------
----------------------------------end-----------------------------
----------------------------------end-----------------------------
----------------------------------end-----------------------------
----------------------------------end-----------------------------

执行检查点:

progress: 20.0 s, 60582.5 tps, lat 0.427 ms stddev 0.265
progress: 21.0 s, 40974.3 tps, lat 0.624 ms stddev 1.782
progress: 22.0 s, 14963.3 tps, lat 1.754 ms stddev 4.243
progress: 23.0 s, 16382.7 tps, lat 1.577 ms stddev 4.138
progress: 24.0 s, 20179.2 tps, lat 1.286 ms stddev 2.839
progress: 25.0 s, 23749.2 tps, lat 1.101 ms stddev 2.816
progress: 26.0 s, 27464.7 tps, lat 0.945 ms stddev 2.581
progress: 27.0 s, 33850.6 tps, lat 0.764 ms stddev 2.011
progress: 28.0 s, 43315.9 tps, lat 0.600 ms stddev 1.134
progress: 29.0 s, 50216.6 tps, lat 0.516 ms stddev 0.492
progress: 30.0 s, 52857.0 tps, lat 0.490 ms stddev 0.323
progress: 31.0 s, 53428.6 tps, lat 0.485 ms stddev 0.322
progress: 32.0 s, 54710.5 tps, lat 0.473 ms stddev 0.303
progress: 33.0 s, 57824.2 tps, lat 0.448 ms stddev 0.289
progress: 34.0 s, 59573.5 tps, lat 0.435 ms stddev 0.273
progress: 35.0 s, 58162.8 tps, lat 0.445 ms stddev 0.413
progress: 36.0 s, 57951.4 tps, lat 0.447 ms stddev 0.435

跟踪结果,我们只看到checkpointer在write buffer,并没有看到pgbench的连接产生物理读(md read):

----------------------------------end-----------------------------
pid: 18027, what: 2, min: 42, max: 101, avg: 49, sum: 57592, count: 1154
----------------------------------end-----------------------------
pid: 18027, what: 2, min: 42, max: 124, avg: 51, sum: 520545, count: 10079
----------------------------------end-----------------------------
pid: 18027, what: 2, min: 44, max: 191, avg: 65, sum: 464196, count: 7139
----------------------------------end-----------------------------
pid: 18027, what: 2, min: 42, max: 145, avg: 53, sum: 467482, count: 8799
----------------------------------end-----------------------------
pid: 18027, what: 2, min: 42, max: 131, avg: 53, sum: 447230, count: 8366
----------------------------------end-----------------------------
pid: 18027, what: 2, min: 37, max: 175, avg: 57, sum: 429778, count: 7465
----------------------------------end-----------------------------
pid: 18027, what: 2, min: 42, max: 10551, avg: 65, sum: 331686, count: 5065
----------------------------------end-----------------------------
pid: 18027, what: 2, min: 41, max: 158, avg: 61, sum: 397299, count: 6429
----------------------------------end-----------------------------
pid: 18027, what: 2, min: 29, max: 1173, avg: 61, sum: 672717, count: 10976
----------------------------------end-----------------------------
pid: 18027, what: 2, min: 26, max: 11093, avg: 58, sum: 933458, count: 16027
----------------------------------end-----------------------------
pid: 18027, what: 2, min: 27, max: 448, avg: 56, sum: 937812, count: 16633
----------------------------------end-----------------------------
pid: 18027, what: 2, min: 27, max: 112, avg: 55, sum: 937420, count: 16996
----------------------------------end-----------------------------
pid: 18027, what: 2, min: 27, max: 98, avg: 54, sum: 431355, count: 7901
----------------------------------end-----------------------------


到目前为止,只能说,LWLOCK,WRITE BUFFER都没有对性能造成影响,我们并没有找到执行checkpoint时,带来的性能下降的真正原因。
下一篇将揭晓答案。

[参考]
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值