转载请署名:印风
-----------------------------------
http://bugs.mysql.com/bug.php?id=64258
在innodb层使用一个常量WAIT_FOR_READ来控制当需要等待从磁盘读取数据时,需要等待的时间,其默认值在5.5及5.1的版本中都是5000us(5ms),而现在比较牛的存储设备(flush/ssd)一般能在100us内完成读操作,BUG64258认为这个值应该是个可配置的选项,通过设定符合的值以符合我们的硬件设备性能。
这是个static静态变量,在文件buf0buf.c中会用到WAIT_FOR_READ:
buf/buf0buf.c:280:staticconst int WAIT_FOR_READ = 5000;
buf/buf0buf.c:2091: os_thread_sleep(WAIT_FOR_READ);
buf/buf0buf.c:2632: os_thread_sleep(WAIT_FOR_READ);
buf/buf0buf.c:2880: os_thread_sleep(WAIT_FOR_READ);
在两个函数buf_page_get_gen和buf_page_get_zip会被调用到,这里我们只考虑前者,看看在什么情况下会进入sleep状态
这是个通用的获取数据库page的函数,比较冗长,在经过检查bufferpool、异步请求磁盘页以及对压缩页的处理等一大堆代码后,调用如下代码段:
2858 switch (rw_latch) {
2859 case RW_NO_LATCH:
2860 if (must_read) {
2861 /* Let us wait until the read operation
2862 completes */
2863
2864 if (innobase_get_slow_log() && trx && trx->take_stats)
2865 {
2866 ut_usectime(&sec, &ms);
2867 start_time = (ib_uint64_t)sec * 1000000 + ms;
2868 } else {
2869 start_time = 0;
2870 }
2871 for (;;) {
2872 enum buf_io_fix io_fix;
2873
2874 mutex_enter(&block->mutex);
2875 io_fix = buf_block_get_io_fix(block);
2876 mutex_exit(&block->mutex);
2877
2878 if (io_fix == BUF_IO_READ) {
2879
2880 os_thread_sleep(WAIT_FOR_READ);
2881 } else {
2882 break;
2883 }
2884 }
2885 if (innobase_get_slow_log() && trx && trx->take_stats && start_time)
2886 {
2887 ut_usectime(&sec, &ms);
2888 finish_time = (ib_uint64_t)sec * 1000000 + ms;
2889 trx->io_reads_wait_timer += (ulint)(finish_time - start_time);
2890 }
2891 }
io_fix的含义不是很了解,看看注释:
56/** Flags for io_fix types */
57enum buf_io_fix {
58 BUF_IO_NONE = 0, /**< no pending I/O */
59 BUF_IO_READ, /**< read pending */
60 BUF_IO_WRITE /**< write pending */
61};
其中这里用到的是BUF_IO_READ,应该是read pending,可能是正在等待磁盘读的一个IO状态标识。
从代码里,我们可以看到,当当前的block->page->io_fix为BUF_IO_READ时,会不停的在一个for(;;)里循环,每次检查后,会sleep WAIT_FOR_READ us后再次检查。如果这是一个高速存储设备,sleep的时间太长显然是不合理的。
以下是一个简单的patch,增加了一个选项innobase_wait_for_read,来控制sleep的时间,基于percona5.5.18
手头有ssd测试环境的同学,帮忙测试看看有木有效果...
diff -ur Percona-Server-5.5.18.stock/storage/innobase/buf/buf0buf.c Percona-Server-5.5.18.sleep/storage/innobase/buf/buf0buf.c
--- Percona-Server-5.5.18.stock/storage/innobase/buf/buf0buf.c 2012-01-07 16:38:37.000000000 +0800
+++ Percona-Server-5.5.18.sleep/storage/innobase/buf/buf0buf.c 2012-02-17 16:22:05.000000000 +0800
@@ -57,6 +57,8 @@
/* prototypes for new functions added to ha_innodb.cc */
trx_t* innobase_get_trx();
+extern innobase_wait_for_read;
+
inline void _increment_page_get_statistics(buf_block_t* block, trx_t* trx)
{
ulint block_hash;
@@ -276,8 +278,6 @@
*/
#ifndef UNIV_HOTBACKUP
-/** Value in microseconds */
-static const int WAIT_FOR_READ = 5000;
/** Number of attemtps made to read in a page in the buffer pool */
static const ulint BUF_PAGE_READ_MAX_RETRIES = 100;
@@ -2088,7 +2088,7 @@
if (io_fix == BUF_IO_READ) {
- os_thread_sleep(WAIT_FOR_READ);
+ os_thread_sleep(innobase_wait_for_read);
} else {
break;
}
@@ -2629,7 +2629,7 @@
Try again later. */
//buf_pool_mutex_exit(buf_pool);
mutex_exit(block_mutex);
- os_thread_sleep(WAIT_FOR_READ);
+ os_thread_sleep(innobase_wait_for_read);
goto loop;
}
@@ -2877,7 +2877,7 @@
if (io_fix == BUF_IO_READ) {
- os_thread_sleep(WAIT_FOR_READ);
+ os_thread_sleep(innobase_wait_for_read);
} else {
break;
}
diff -ur Percona-Server-5.5.18.stock/storage/innobase/handler/ha_innodb.cc Percona-Server-5.5.18.sleep/storage/innobase/handler/ha_innodb.cc
--- Percona-Server-5.5.18.stock/storage/innobase/handler/ha_innodb.cc 2012-01-07 16:38:37.000000000 +0800
+++ Percona-Server-5.5.18.sleep/storage/innobase/handler/ha_innodb.cc 2012-02-17 16:33:46.000000000 +0800
@@ -198,6 +198,7 @@
static my_bool innobase_buffer_pool_shm_checksum = TRUE;
static uint innobase_buffer_pool_shm_key = 0;
+ulong innobase_wait_for_read = 0;
static char* internal_innobase_data_file_path = NULL;
@@ -12098,6 +12099,11 @@
// " or 2 (write at commit, flush once per second).",
// NULL, NULL, 1, 0, 2, 0);
+MYSQL_SYSVAR_ULONG(wait_for_read, innobase_wait_for_read,
+ PLUGIN_VAR_OPCMDARG,
+ "set a value to decide how long when read page operation need to sleep",
+ NULL, NULL, 5000, 0, 5000, 0);
+
static MYSQL_SYSVAR_BOOL(use_global_flush_log_at_trx_commit, srv_use_global_flush_log_at_trx_commit,
PLUGIN_VAR_NOCMDARG,
"Use global innodb_flush_log_at_trx_commit value. (default: ON).",
@@ -12656,6 +12662,7 @@
MYSQL_SYSVAR(corrupt_table_action),
MYSQL_SYSVAR(lazy_drop_table),
MYSQL_SYSVAR(fake_changes),
+ MYSQL_SYSVAR(wait_for_read),
NULL
};