之前已经介绍过实现思路(见 http://www.linuxidc.com/Linux/2012-03/57203.htm ),binlog预分配在两年前被Yoshinori Matsunobu在5.1里实现,但其存在问题的是,即如果在非xfs系统上时,可能会在预分配文件时因为持有大锁,导致tps长时间为0。这里转换思路,使用一个daemon plugin来做文件分配,当binlog切换时,直接将预分配好的文件(命名为MySQL-binlog.PA) rename成binlog文件。
一组简单的测试数据,之前我在percona-discuess 上发过,直接拿过来了...
my test: (using mysqlslap, ext3 filesystem)
set sync_binlog = 1
create a simple table:
create table xxx (a int auto_increment, b int, c varchar(50), primary key(a))
mysqlslap --no-defaults -uxx --create-schema=test --number-of-queries=2000000 --concurrency=$i -S $sock --query="insert into xxx values (NULL,2, 'sadasda')"
threads with binlog-prealloc Original version
10 152s 455s
30 104s 216s
50 97s 164s
70 98s 143s
90 98s 132s
110 101s 127s
130 102s 124s
150 104s 123s
以下Patch基于Percona Server5.5.24,目前处于测试中,比较简陋...
cmake时需要增加选项 -DWITH_BINLOG_PREALLOC=ON
然后 install plugin:
set binlog_prealloc = 1;
install plugin binlog_prealloc soname 'libbinlog_prealloc.so';
flush logs;
Index: a/CMakeLists.txt
===================================================================
--- a.orig/CMakeLists.txt
+++ a/CMakeLists.txt
@@ -161,6 +161,11 @@ INCLUDE(install_layout)
INCLUDE(mysql_add_executable)
# Handle options
+OPTION(WITH_BINLOG_PREALLOC"if allow binlog file prealloced"OFF)
+IF(WITH_BINLOG_PREALLOC)
+ADD_DEFINITIONS(-DWITH_BINLOG_PREALLOC)
+ENDIF()
+
OPTION(DISABLE_SHARED
"Don't build shared libraries, compile code as position-dependent"OFF)
IF(DISABLE_SHARED)
Index: a/include/my_global.h
===================================================================
--- a.orig/include/my_global.h
+++ a/include/my_global.h
@@ -1501,4 +1501,8 @@staticinlinedoublerint(doublex)
#endif /* EMBEDDED_LIBRARY */
+#ifdefined (HAVE_POSIX_FALLOCATE) && defined(WITH_BINLOG_PREALLOC)
+#define BINLOG_PREALLOC
+#endif
+
#endif /* my_global_h */
Index: a/plugin/daemon_example/CMakeLists.txt
===================================================================
--- a.orig/plugin/daemon_example/CMakeLists.txt
+++ a/plugin/daemon_example/CMakeLists.txt
@@ -17,3 +17,6 @@ MYSQL_ADD_PLUGIN(daemon_example daemon_e
MODULE_ONLY MODULE_OUTPUT_NAME"libdaemon_example")
INSTALL(FILES daemon_example.ini DESTINATION ${INSTALL_PLUGINDIR})
+
+MYSQL_ADD_PLUGIN(binlog_prealloc binlog_prealloc.cc
+ MODULE_ONLY MODULE_OUTPUT_NAME"libbinlog_prealloc")
Index: a/plugin/daemon_example/binlog_prealloc.cc
===================================================================
--- /dev/null
+++ a/plugin/daemon_example/binlog_prealloc.cc
@@ -0,0 +1,111 @@
+#ifndef MYSQL_SERVER
+#define MYSQL_SERVER
+#endif
+
+#include
+#include
+#include
+#include"my_global.h"
+#include
+#include
+#include"log.h"
+
+#if!defined(__attribute__) && (defined(__cplusplus) || !defined(__GNUC__) || __GNUC__ == 2 && __GNUC_MINOR__
+#define __attribute__(A)
+#endif
+
+/*defined in log.cc*/
+staticpthread_t bin_prealloc_thread;
+externunsignedlongmax_binlog_size;
+externmy_bool binlog_prealloc_inited;
+externulong binlog_prealloc ;
+externmy_bool use_plugin_prealloc;
+externmy_bool has_prealloc_next;
+externpthread_mutex_t binlog_prealloc_mutex;
+externpthread_cond_t binlog_prealloc_cond;
+externcharprealloc_file[FN_REFLEN];
+externintcreate_prealloc_file(char*filename);
+
+pthread_handler_t bin_prealloc_func(void*p)
+{
+
+intfd;
+longi = 0;
+intret = 0;
+intlen = 0;
+
+while(1){
+if( binlog_prealloc == 0 ||
+ !binlog_prealloc_inited ) {
+ sleep(2);
+continue;
+ }
+
+ pthread_mutex_lock(&(binlog_prealloc_mutex));
+if(has_prealloc_next)
+ pthread_cond_wait(&(binlog_prealloc_cond), &(binlog_prealloc_mutex));
+
+intret = create_prealloc_file(prealloc_file);
+if(unlikely(use_plugin_prealloc) == FALSE)
+ use_plugin_prealloc = TRUE;
+
+ has_prealloc_next = TRUE;
+
+ pthread_mutex_unlock(&(binlog_prealloc_mutex));
+ }
+
+return0;
+}
+
+
+staticintbin_prealloc_init(void*p)
+{
+ pthread_attr_t attr;
+
+ pthread_attr_init(&attr);
+ pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
+
+ use_plugin_prealloc = FALSE;
+
+if(pthread_create(&bin_prealloc_thread, &attr,
+ bin_prealloc_func, NULL) != 0){
+
+ fprintf(stderr,"Plugin 'bin_prealloc': "
+"Could not create bin_prealloc thread!\n");
+return1;
+ }
+
+return0;
+}
+
+
+staticintbin_prealloc_deinit(void*p)
+{
+ pthread_cancel(bin_prealloc_thread);
+ pthread_join(bin_prealloc_thread, NULL);
+
+ has_prealloc_next = FALSE;
+ use_plugin_prealloc = FALSE;
+return0;
+}
+
+
+structst_mysql_daemon bin_prealloc = { MYSQL_DAEMON_INTERFACE_VERSION };
+
+mysql_declare_plugin(bin_prealloc)
+{
+ MYSQL_DAEMON_PLUGIN,
+ &bin_prealloc,
+"binlog_prealloc",
+"yinfeng.zwx@taobao.com",
+"a daemon plugin to prealloc binlog file",
+ PLUGIN_LICENSE_GPL,
+ bin_prealloc_init,
+ bin_prealloc_deinit,
+ 0x0100,
+ NULL,
+ NULL,
+ NULL
+}
+mysql_declare_plugin_end;
+
Index: a/sql/log_event.cc
===================================================================
--- a.orig/sql/log_event.cc
+++ a/sql/log_event.cc
@@ -65,6 +65,10 @@
*/
#define FMT_G_BUFSIZE(PREC) (3 + (PREC) + 5 + 1)
+#ifdef BINLOG_PREALLOC
+externulonglong fetch_active_size(void);
+externulonglong use_binlog_prealloc;
+#endif
#if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION)
staticintrows_event_stmt_cleanup(Relay_log_infoconst*rli, THD* thd);
@@ -1018,7 +1022,14 @@intLog_event::read_log_event(IO_CACHE*
intresult=0;
charbuf[LOG_EVENT_MINIMAL_HEADER_LEN];
DBUG_ENTER("Log_event::read_log_event");
-
+#ifdef BINLOG_PREALLOC
+if(use_binlog_prealloc && file->file_name && file->type == READ_CACHE) {
+if(mysql_bin_log.is_active(file->file_name))
+ file->end_of_file= fetch_active_size();
+else
+ file->end_of_file= ~(my_off_t) 0;
+ }
+#endif
if(log_lock)
mysql_mutex_lock(log_lock);
if(my_b_read(file, (uchar*) buf,sizeof(buf)))
Index: a/sql/log.cc
===================================================================
--- a.orig/sql/log.cc
+++ a/sql/log.cc
@@ -90,6 +90,112 @@staticSHOW_VAR binlog_status_vars_detai
{NullS, NullS, SHOW_LONG}
};
+#ifdef BINLOG_PREALLOC
+
+ulonglong active_binlog_size = 0;
+ulong binlog_prealloc = 0;
+
+my_bool use_binlog_prealloc = FALSE;
+my_bool binlog_prealloc_inited = FALSE;
+my_bool use_plugin_prealloc = FALSE;
+my_bool has_prealloc_next = FALSE;
+
+pthread_mutex_t binlog_prealloc_mutex;
+pthread_cond_t binlog_prealloc_cond;
+
+charprealloc_file[FN_REFLEN];
+
+staticvoidinit_binlog_prealloc(constchar* name)
+{
+ use_plugin_prealloc = FALSE;
+ has_prealloc_next = FALSE;
+
+ bzero(prealloc_file, FN_REFLEN);
+
+/*hardcode the prealloc file name */
+ fn_format(prealloc_file, name, mysql_data_home,"", 4);
+size_tlength = strlen(prealloc_file);
+ prealloc_file[length] ='.';
+ prealloc_file[length+1] ='P';
+ prealloc_file[length+2] ='A';
+ prealloc_file[length+3] ='\0';
+
+ pthread_mutex_init(&(binlog_prealloc_mutex), NULL);
+ pthread_cond_init(&(binlog_prealloc_cond), NULL);
+ binlog_prealloc_inited = TRUE;
+}
+
+ulonglong set_active_size(ulonglong new_val)
+{
+return__sync_val_compare_and_swap(&active_binlog_size,
+ active_binlog_size, new_val);
+}
+
+
+ulonglong fetch_active_size(void)
+{
+return__sync_add_and_fetch(&active_binlog_size,0);
+}
+
+intcreate_prealloc_file(char*file_name)
+{
+intfd = 0;
+intret = 0;
+ fd = open(file_name, O_CREAT | O_RDWR, 0);
+if(fd == -1)
+return-1;
+
+ ret = posix_fallocate(fd, 0, max_binlog_size)
+ || my_sync(fd, MYF(MY_WME));
+
+ close(fd);
+returnret;
+}
+
+
+intprealloc_binlog_with_newname(char*new_name)
+{
+intret = 0;
+
+if(use_plugin_prealloc) {
+ ret = pthread_mutex_trylock(&(binlog_prealloc_mutex));
+
+/*if can't get lock ,simply return to orignal mode(means no prealloc...) */
+if(ret != 0)
+return-1;
+
+ my_bool success = FALSE;
+
+if(has_prealloc_next &&
+ access(prealloc_file, 0) == 0 &&
+ rename(prealloc_file, new_name) == 0)
+ success = TRUE;
+
+ has_prealloc_next = FALSE;
+
+ pthread_mutex_unlock(&(binlog_prealloc_mutex));
+
+ pthread_cond_broadcast(&(binlog_prealloc_cond));
+
+if(!success)
+return-1;
+
+ }else{
+
+intret = create_prealloc_file(new_name);
+
+if(ret!= 0) {
+ fprintf(stderr,"Prealloc Binlog Failed:%s\n", new_name);
+return-1;
+ }
+ }
+
+ chmod(new_name, 438);
+ use_binlog_prealloc = TRUE;
+return0;
+}
+
+#endif
/**
purge logs, master and slave sides both, related error code
convertor.
@@ -2122,6 +2228,12 @@ File open_binlog(IO_CACHE *log, const ch
*errmsg = "Could not open log file";
goto err;
}
+#ifdef BINLOG_PREALLOC
+ if (use_binlog_prealloc) {
+ log->file_name=(char *)log_file_name;
+ log->end_of_file= fetch_active_size();
+ }
+#endif
if (check_binlog_magic(log,errmsg))
goto err;
DBUG_RETURN(file);
@@ -3182,7 +3294,9 @@ bool MYSQL_BIN_LOG::open(const char *log
if (flush_io_cache(&log_file) ||
mysql_file_sync(log_file.file, MYF(MY_WME)))
goto err;
-
+#ifdef BINLOG_PREALLOC
+ set_active_size(log_file.pos_in_file);
+#endif
if (write_file_name_to_index_file)
{
#ifdef HAVE_REPLICATION
@@ -4378,7 +4492,16 @@ int MYSQL_BIN_LOG::new_file_impl(bool ne
old_name=name;
name=0; // Don't free name
close(LOG_CLOSE_TO_BE_OPENED | LOG_CLOSE_INDEX);
-
+#ifdef BINLOG_PREALLOC
+ /*try to pre-alloc binlog file,we don't care if this will fail*/
+if(!is_relay_log && binlog_prealloc) {
+if(unlikely(!binlog_prealloc_inited)) {
+ init_binlog_prealloc(old_name);
+ }
+ prealloc_binlog_with_newname(new_name_ptr);
+ }else
+ use_binlog_prealloc = FALSE;
+#endif
/*
Note that at this point, log_state != LOG_CLOSED (important for is_open()).
*/
@@ -5156,6 +5279,9 @@ err:
else
{
boolcheck_purge;
+#ifdef BINLOG_PREALLOC
+ set_active_size(event_info->log_pos);
+#endif
signal_update();
error= rotate(false, &check_purge);
mysql_mutex_unlock(&LOCK_log);
@@ -5540,6 +5666,9 @@boolMYSQL_BIN_LOG::write_incident(THD *
if(!error && !(error= flush_and_sync(0)))
{
boolcheck_purge=false;
+#ifdef BINLOG_PREALLOC
+ set_active_size(ev.log_pos);
+#endif
signal_update();
error= rotate(false, &check_purge);
mysql_mutex_unlock(&LOCK_log);
@@ -5695,6 +5824,9 @@void
MYSQL_BIN_LOG::trx_group_commit_leader(group_commit_entry *leader)
{
DBUG_ENTER("MYSQL_BIN_LOG::trx_group_commit_leader");
+#ifdef BINLOG_PREALLOC
+ ulonglong last_actual_pos = 0;
+#endif
uint xid_count= 0;
uint write_count= 0;
boolcheck_purge=false;
@@ -5753,6 +5885,9 @@ MYSQL_BIN_LOG::trx_group_commit_leader(g
}
cache_data->commit_bin_log_file_pos= my_b_write_tell(&log_file);
+#ifdef BINLOG_PREALLOC
+ last_actual_pos = cache_data->commit_bin_log_file_pos;
+#endif
if(cache_data->using_xa && cache_data->xa_xid)
xid_count++;
}
@@ -5773,6 +5908,9 @@ MYSQL_BIN_LOG::trx_group_commit_leader(g
}
else
{
+#ifdef BINLOG_PREALLOC
+ set_active_size(last_actual_pos);
+#endif
signal_update();
}
@@ -6005,6 +6143,18 @@voidMYSQL_BIN_LOG::close(uint exiting)
original position on system that doesn't support pwrite().
*/
mysql_file_seek(log_file.file, org_position, MY_SEEK_SET, MYF(0));
+#ifdef BINLOG_PREALLOC
+ end_io_cache(&log_file);
+ DBUG_ASSERT(is_active(log_file_name));
+ mysql_mutex_assert_owner(&LOCK_log);
+ set_active_size(log_file.pos_in_file);
+if(use_binlog_prealloc && my_chsize(log_file.file,
+ log_file.pos_in_file, 0, MYF(MY_WME)))
+ {
+ write_error= 1;
+ sql_print_error(ER(ER_ERROR_ON_WRITE), name, errno);
+ }
+#endif
}
/* this will cleanup IO_CACHE, sync and close the file */
Index: a/sql/sys_vars.cc
===================================================================
--- a.orig/sql/sys_vars.cc
+++ a/sql/sys_vars.cc
@@ -3330,6 +3330,12 @@staticSys_var_uint Sys_slave_net_timeou
VALID_RANGE(1, LONG_TIMEOUT), DEFAULT(SLAVE_NET_TIMEOUT), BLOCK_SIZE(1),
NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0),
ON_UPDATE(fix_slave_net_timeout));
+#ifdef BINLOG_PREALLOC
+staticSys_var_ulong Sys_binlog_prealloc(
+"binlog_prealloc","default 0 , if binlog_prealloc >0, means prealloc binlog file",
+ GLOBAL_VAR(binlog_prealloc), CMD_LINE(REQUIRED_ARG),
+ VALID_RANGE(0,100), DEFAULT(0), BLOCK_SIZE(1));
+#endif
staticboolcheck_slave_skip_counter(sys_var *self, THD *thd, set_var *var)
{
Index: a/sql/mysqld.h
===================================================================
--- a.orig/sql/mysqld.h
+++ a/sql/mysqld.h
@@ -238,7 +238,9 @@externI_List threads;
externcharerr_shared_dir[];
externTYPELIB thread_handling_typelib;
externmy_decimal decimal_zero;
-
+#ifdef BINLOG_PREALLOC
+externulong binlog_prealloc;
+#endif
externulonglong opt_log_warnings_suppress;
externchar* enforce_storage_engine;