MySQL优化:可配置选项的WAIT_FOR_READ

转载请署名:印风

-----------------------------------

http://bugs.mysql.com/bug.php?id=64258

innodb层使用一个常量WAIT_FOR_READ来控制当需要等待从磁盘读取数据时,需要等待的时间,其默认值在5.55.1的版本中都是5000us5ms),而现在比较牛的存储设备(flush/ssd)一般能在100us内完成读操作,BUG64258认为这个值应该是个可配置的选项,通过设定符合的值以符合我们的硬件设备性能。

这是个static静态变量,在文件buf0buf.c中会用到WAIT_FOR_READ:

buf/buf0buf.c:280:staticconst int WAIT_FOR_READ = 5000;
buf/buf0buf.c:2091: os_thread_sleep(WAIT_FOR_READ);
buf/buf0buf.c:2632: os_thread_sleep(WAIT_FOR_READ);
buf/buf0buf.c:2880: os_thread_sleep(WAIT_FOR_READ);


在两个函数buf_page_get_genbuf_page_get_zip会被调用到,这里我们只考虑前者,看看在什么情况下会进入sleep状态

这是个通用的获取数据库page的函数,比较冗长,在经过检查bufferpool、异步请求磁盘页以及对压缩页的处理等一大堆代码后,调用如下代码段:

2858     switch (rw_latch) {
2859     case RW_NO_LATCH:
2860         if (must_read) {
2861             /* Let us wait until the read operation
2862             completes */
2863 
2864             if (innobase_get_slow_log() && trx && trx->take_stats)
2865             {
2866                 ut_usectime(&sec, &ms);
2867                 start_time = (ib_uint64_t)sec * 1000000 + ms;
2868             } else {
2869                 start_time = 0;
2870             }
2871             for (;;) {
2872                 enum buf_io_fix io_fix;
2873 
2874                 mutex_enter(&block->mutex);
2875                 io_fix = buf_block_get_io_fix(block);
2876                 mutex_exit(&block->mutex);
2877 
2878                 if (io_fix == BUF_IO_READ) {
2879 
2880                     os_thread_sleep(WAIT_FOR_READ);
2881                 } else {
2882                     break;
2883                 }
2884             }
2885             if (innobase_get_slow_log() && trx && trx->take_stats && start_time)
2886             {
2887                 ut_usectime(&sec, &ms);
2888                 finish_time = (ib_uint64_t)sec * 1000000 + ms;
2889                 trx->io_reads_wait_timer += (ulint)(finish_time - start_time);
2890             }
2891         }

io_fix的含义不是很了解,看看注释:

56/** Flags for io_fix types */
57enum buf_io_fix {
58 BUF_IO_NONE = 0, /**< no pending I/O */
59 BUF_IO_READ, /**< read pending */
60 BUF_IO_WRITE /**< write pending */
61};

其中这里用到的是BUF_IO_READ,应该是read pending,可能是正在等待磁盘读的一个IO状态标识。


从代码里,我们可以看到,当当前的block->page->io_fixBUF_IO_READ时,会不停的在一个for(;;)里循环,每次检查后,会sleep WAIT_FOR_READ us后再次检查。如果这是一个高速存储设备,sleep的时间太长显然是不合理的。


以下是一个简单的patch,增加了一个选项innobase_wait_for_read,来控制sleep的时间,基于percona5.5.18

手头有ssd测试环境的同学,帮忙测试看看有木有效果...

diff -ur Percona-Server-5.5.18.stock/storage/innobase/buf/buf0buf.c Percona-Server-5.5.18.sleep/storage/innobase/buf/buf0buf.c
--- Percona-Server-5.5.18.stock/storage/innobase/buf/buf0buf.c  2012-01-07 16:38:37.000000000 +0800
+++ Percona-Server-5.5.18.sleep/storage/innobase/buf/buf0buf.c  2012-02-17 16:22:05.000000000 +0800
@@ -57,6 +57,8 @@
 /* prototypes for new functions added to ha_innodb.cc */
 trx_t* innobase_get_trx();
 
+extern innobase_wait_for_read;
+
 inline void _increment_page_get_statistics(buf_block_t* block, trx_t* trx)
 {
    ulint           block_hash;
@@ -276,8 +278,6 @@
 */
 
 #ifndef UNIV_HOTBACKUP
-/** Value in microseconds */
-static const int WAIT_FOR_READ = 5000;
 /** Number of attemtps made to read in a page in the buffer pool */
 static const ulint BUF_PAGE_READ_MAX_RETRIES = 100;
 
@@ -2088,7 +2088,7 @@
 
            if (io_fix == BUF_IO_READ) {
 
-               os_thread_sleep(WAIT_FOR_READ);
+               os_thread_sleep(innobase_wait_for_read);
            } else {
                break;
            }   
@@ -2629,7 +2629,7 @@
            Try again later. */
            //buf_pool_mutex_exit(buf_pool);
            mutex_exit(block_mutex);
-           os_thread_sleep(WAIT_FOR_READ);
+           os_thread_sleep(innobase_wait_for_read);
 
            goto loop;
        }   
@@ -2877,7 +2877,7 @@
 
                if (io_fix == BUF_IO_READ) {
 
-                   os_thread_sleep(WAIT_FOR_READ);
+                   os_thread_sleep(innobase_wait_for_read);
                } else {
                    break;
                }

diff -ur Percona-Server-5.5.18.stock/storage/innobase/handler/ha_innodb.cc Percona-Server-5.5.18.sleep/storage/innobase/handler/ha_innodb.cc
--- Percona-Server-5.5.18.stock/storage/innobase/handler/ha_innodb.cc   2012-01-07 16:38:37.000000000 +0800
+++ Percona-Server-5.5.18.sleep/storage/innobase/handler/ha_innodb.cc   2012-02-17 16:33:46.000000000 +0800
@@ -198,6 +198,7 @@
 static my_bool innobase_buffer_pool_shm_checksum   = TRUE;
 static uint    innobase_buffer_pool_shm_key        = 0;

+ulong innobase_wait_for_read = 0;

 static char*   internal_innobase_data_file_path    = NULL;

@@ -12098,6 +12099,11 @@
 //  " or 2 (write at commit, flush once per second).",
 //  NULL, NULL, 1, 0, 2, 0);

+MYSQL_SYSVAR_ULONG(wait_for_read, innobase_wait_for_read,
+  PLUGIN_VAR_OPCMDARG,
+  "set a value to decide how long when read page operation need to sleep",
+  NULL, NULL, 5000, 0, 5000, 0);
+
 static MYSQL_SYSVAR_BOOL(use_global_flush_log_at_trx_commit, srv_use_global_flush_log_at_trx_commit,
   PLUGIN_VAR_NOCMDARG,
   "Use global innodb_flush_log_at_trx_commit value. (default: ON).",
@@ -12656,6 +12662,7 @@
   MYSQL_SYSVAR(corrupt_table_action),
   MYSQL_SYSVAR(lazy_drop_table),
   MYSQL_SYSVAR(fake_changes),
+  MYSQL_SYSVAR(wait_for_read),
   NULL
 };


  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值