[MySQL Patch] Binlog文件预分配

转载请署名:印风

------------------------------------------------------------------------------------

之前已经介绍过实现思路,binlog预分配在两年前被Yoshinori Matsunobu在5.1里实现,但其存在问题的是,即如果在非xfs系统上时,可能会在预分配文件时因为持有大锁,导致tps长时间为0。这里转换思路,使用一个daemon plugin来做文件分配,当binlog切换时,直接将预分配好的文件(命名为mysql-binlog.PA)  rename成binlog文件。

一组简单的测试数据,之前我在percona-discuess 上发过,直接拿过来了...

my test: (using mysqlslap, ext3 filesystem)
set sync_binlog       = 1

create a simple table:
     create table xxx (a int auto_increment, b int, c varchar(50), primary key(a))

mysqlslap --no-defaults -uxx --create-schema=test --number-of-queries=2000000 --concurrency=$i  -S $sock  --query="insert into xxx values  (NULL,2,     'sadasda')"

threads              with binlog-prealloc                             Original version
10                               152s                                               455s                          
30                               104s                                               216s
50                                97s                                                164s
70                                98s                                                143s
90                                98s                                                132s
110                              101s                                              127s
130                              102s                                              124s
150                              104s                                              123s


以下Patch基于Percona Server5.5.24,目前处于测试中,比较简陋...

cmake时需要增加选项  -DWITH_BINLOG_PREALLOC=ON

然后 install plugin:
set binlog_prealloc = 1;
install plugin binlog_prealloc soname 'libbinlog_prealloc.so';
flush logs;

Index: a/CMakeLists.txt
===================================================================
--- a.orig/CMakeLists.txt
+++ a/CMakeLists.txt
@@ -161,6 +161,11 @@ INCLUDE(install_layout)
 INCLUDE(mysql_add_executable)
 
 # Handle options
+OPTION(WITH_BINLOG_PREALLOC "if allow binlog file prealloced" OFF)
+IF(WITH_BINLOG_PREALLOC)
+ADD_DEFINITIONS(-DWITH_BINLOG_PREALLOC)
+ENDIF()
+
 OPTION(DISABLE_SHARED 
  "Don't build shared libraries, compile code as position-dependent" OFF)
 IF(DISABLE_SHARED)
Index: a/include/my_global.h
===================================================================
--- a.orig/include/my_global.h
+++ a/include/my_global.h
@@ -1501,4 +1501,8 @@ static inline double rint(double x)
 
 #endif /* EMBEDDED_LIBRARY */
 
+#if defined (HAVE_POSIX_FALLOCATE) && defined(WITH_BINLOG_PREALLOC)
+#define BINLOG_PREALLOC
+#endif
+
 #endif /* my_global_h */
Index: a/plugin/daemon_example/CMakeLists.txt
===================================================================
--- a.orig/plugin/daemon_example/CMakeLists.txt
+++ a/plugin/daemon_example/CMakeLists.txt
@@ -17,3 +17,6 @@ MYSQL_ADD_PLUGIN(daemon_example daemon_e
   MODULE_ONLY MODULE_OUTPUT_NAME "libdaemon_example")
 
 INSTALL(FILES daemon_example.ini DESTINATION ${INSTALL_PLUGINDIR})
+
+MYSQL_ADD_PLUGIN(binlog_prealloc binlog_prealloc.cc
+  MODULE_ONLY MODULE_OUTPUT_NAME "libbinlog_prealloc")
Index: a/plugin/daemon_example/binlog_prealloc.cc
===================================================================
--- /dev/null
+++ a/plugin/daemon_example/binlog_prealloc.cc
@@ -0,0 +1,111 @@
+#ifndef MYSQL_SERVER
+#define MYSQL_SERVER
+#endif
+
+#include <string.h>
+#include <mysql/plugin.h>
+#include <mysql_version.h>
+#include "my_global.h"
+#include <my_sys.h>
+#include <sys/time.h>
+#include "log.h"
+
+#if !defined(__attribute__) && (defined(__cplusplus) || !defined(__GNUC__)  || __GNUC__ == 2 && __GNUC_MINOR__ < 8)
+#define __attribute__(A)
+#endif
+
+/*defined in log.cc*/
+static pthread_t bin_prealloc_thread;
+extern unsigned long max_binlog_size;
+extern my_bool binlog_prealloc_inited;
+extern ulong     binlog_prealloc ;
+extern my_bool use_plugin_prealloc;
+extern my_bool has_prealloc_next;
+extern pthread_mutex_t binlog_prealloc_mutex;
+extern pthread_cond_t  binlog_prealloc_cond;
+extern char prealloc_file[FN_REFLEN];
+extern int create_prealloc_file(char *filename);
+
+pthread_handler_t bin_prealloc_func(void *p)
+{
+
+    int fd;
+    long i  = 0;
+    int ret = 0;
+    int len = 0;
+    
+    while (1){
+        if ( binlog_prealloc == 0 || 
+                   !binlog_prealloc_inited ) {
+            sleep(2);
+            continue;
+        } 
+        
+        pthread_mutex_lock(&(binlog_prealloc_mutex));
+        if (has_prealloc_next)
+           pthread_cond_wait(&(binlog_prealloc_cond), &(binlog_prealloc_mutex));
+          
+        int ret = create_prealloc_file(prealloc_file);
+        if (unlikely(use_plugin_prealloc) == FALSE)
+            use_plugin_prealloc = TRUE;
+
+        has_prealloc_next = TRUE;
+
+        pthread_mutex_unlock(&(binlog_prealloc_mutex));
+    }
+    
+    return 0;
+}
+
+
+static int bin_prealloc_init(void *p)
+{
+    pthread_attr_t attr;
+
+    pthread_attr_init(&attr);
+    pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
+
+    use_plugin_prealloc = FALSE; 
+    
+    if (pthread_create(&bin_prealloc_thread, &attr,
+                bin_prealloc_func, NULL) != 0){
+
+        fprintf(stderr, "Plugin 'bin_prealloc': "
+                "Could not create bin_prealloc thread!\n");
+        return 1;
+    }
+
+    return 0;
+}
+
+
+static int bin_prealloc_deinit(void *p)
+{
+    pthread_cancel(bin_prealloc_thread);
+    pthread_join(bin_prealloc_thread, NULL);
+
+    has_prealloc_next = FALSE; 
+    use_plugin_prealloc = FALSE;
+    return 0;
+}
+
+
+struct st_mysql_daemon bin_prealloc = { MYSQL_DAEMON_INTERFACE_VERSION };
+
+mysql_declare_plugin(bin_prealloc)
+{
+    MYSQL_DAEMON_PLUGIN,
+    &bin_prealloc,
+    "binlog_prealloc",
+    "yinfeng.zwx@taobao.com",
+    "a daemon plugin to prealloc binlog file",
+    PLUGIN_LICENSE_GPL,
+    bin_prealloc_init,
+    bin_prealloc_deinit,
+    0x0100,
+    NULL,
+    NULL,
+    NULL
+}
+mysql_declare_plugin_end;
+
Index: a/sql/log_event.cc
===================================================================
--- a.orig/sql/log_event.cc
+++ a/sql/log_event.cc
@@ -65,6 +65,10 @@
 */
 #define FMT_G_BUFSIZE(PREC) (3 + (PREC) + 5 + 1)
 
+#ifdef BINLOG_PREALLOC
+extern ulonglong fetch_active_size(void);
+extern ulonglong use_binlog_prealloc;
+#endif
 
 #if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION)
 static int rows_event_stmt_cleanup(Relay_log_info const *rli, THD* thd);
@@ -1018,7 +1022,14 @@ int Log_event::read_log_event(IO_CACHE* 
   int result=0;
   char buf[LOG_EVENT_MINIMAL_HEADER_LEN];
   DBUG_ENTER("Log_event::read_log_event");
-
+#ifdef BINLOG_PREALLOC
+  if (use_binlog_prealloc && file->file_name && file->type == READ_CACHE) {
+     if (mysql_bin_log.is_active(file->file_name))
+         file->end_of_file= fetch_active_size();
+     else
+         file->end_of_file= ~(my_off_t) 0;
+  }
+#endif
   if (log_lock)
     mysql_mutex_lock(log_lock);
   if (my_b_read(file, (uchar*) buf, sizeof(buf)))
Index: a/sql/log.cc
===================================================================
--- a.orig/sql/log.cc
+++ a/sql/log.cc
@@ -90,6 +90,112 @@ static SHOW_VAR binlog_status_vars_detai
   {NullS, NullS, SHOW_LONG}
 };
 
+#ifdef BINLOG_PREALLOC
+
+ulonglong active_binlog_size  = 0; 
+ulong     binlog_prealloc     = 0; 
+
+my_bool use_binlog_prealloc    =  FALSE;
+my_bool binlog_prealloc_inited =  FALSE;
+my_bool use_plugin_prealloc    =  FALSE;
+my_bool has_prealloc_next      =  FALSE;
+
+pthread_mutex_t binlog_prealloc_mutex;
+pthread_cond_t  binlog_prealloc_cond;
+
+char prealloc_file[FN_REFLEN];
+
+static void init_binlog_prealloc(const char * name)
+{
+    use_plugin_prealloc = FALSE;
+    has_prealloc_next   = FALSE;
+
+    bzero(prealloc_file, FN_REFLEN);
+
+    /*hardcode the prealloc file name */
+    fn_format(prealloc_file, name, mysql_data_home, "", 4);
+    size_t length = strlen(prealloc_file);
+    prealloc_file[length]   = '.' ;
+    prealloc_file[length+1] = 'P' ;
+    prealloc_file[length+2] = 'A'; 
+    prealloc_file[length+3] = '\0';
+
+    pthread_mutex_init(&(binlog_prealloc_mutex), NULL);
+    pthread_cond_init(&(binlog_prealloc_cond), NULL);
+    binlog_prealloc_inited = TRUE;  
+}
+
+ulonglong set_active_size(ulonglong new_val)
+{  
+       return  __sync_val_compare_and_swap(&active_binlog_size,
+                           active_binlog_size, new_val);
+}
+
+
+ulonglong fetch_active_size(void)
+{  
+    return __sync_add_and_fetch(&active_binlog_size,0);
+}
+
+int create_prealloc_file(char *file_name)
+{   
+    int fd = 0;
+    int ret = 0;
+    fd = open(file_name, O_CREAT | O_RDWR, 0);
+    if (fd == -1)
+        return -1;
+
+    ret = posix_fallocate(fd, 0, max_binlog_size)
+        || my_sync(fd, MYF(MY_WME));
+
+    close(fd);
+    return ret;
+}
+
+
+int prealloc_binlog_with_newname(char *new_name)
+{  
+    int ret = 0;
+
+    if (use_plugin_prealloc) {
+        ret = pthread_mutex_trylock(&(binlog_prealloc_mutex));
+
+        /*if can't get lock ,simply return to orignal mode(means no prealloc...) */
+        if (ret != 0)
+            return -1;
+
+        my_bool success = FALSE;
+
+        if (has_prealloc_next &&
+                access(prealloc_file, 0) == 0 &&
+                rename(prealloc_file, new_name) == 0)
+            success = TRUE;
+
+        has_prealloc_next = FALSE;
+
+        pthread_mutex_unlock(&(binlog_prealloc_mutex));
+
+        pthread_cond_broadcast(&(binlog_prealloc_cond));
+
+        if (!success)
+            return -1;
+
+    } else {
+
+        int ret = create_prealloc_file(new_name);
+
+        if (ret!= 0) {
+            fprintf(stderr, "Prealloc Binlog Failed:%s\n", new_name);
+            return -1;
+        }
+    }
+
+    chmod(new_name, 438);
+    use_binlog_prealloc = TRUE;
+    return 0;
+}
+
+#endif
 /**
    purge logs, master and slave sides both, related error code
    convertor.
@@ -2122,6 +2228,12 @@ File open_binlog(IO_CACHE *log, const ch
     *errmsg = "Could not open log file";
     goto err;
   }
+#ifdef BINLOG_PREALLOC
+  if (use_binlog_prealloc) {
+    log->file_name=(char *)log_file_name;
+    log->end_of_file= fetch_active_size();
+  }
+#endif
   if (check_binlog_magic(log,errmsg))
     goto err;
   DBUG_RETURN(file);
@@ -3182,7 +3294,9 @@ bool MYSQL_BIN_LOG::open(const char *log
     if (flush_io_cache(&log_file) ||
         mysql_file_sync(log_file.file, MYF(MY_WME)))
       goto err;
-
+#ifdef BINLOG_PREALLOC   
+    set_active_size(log_file.pos_in_file); 
+#endif
     if (write_file_name_to_index_file)
     {
 #ifdef HAVE_REPLICATION
@@ -4378,7 +4492,16 @@ int MYSQL_BIN_LOG::new_file_impl(bool ne
   old_name=name;
   name=0;				// Don't free name
   close(LOG_CLOSE_TO_BE_OPENED | LOG_CLOSE_INDEX);
-
+#ifdef BINLOG_PREALLOC
+  /*try to pre-alloc binlog file,we don't care if this will fail*/
+  if (!is_relay_log && binlog_prealloc) {
+     if (unlikely(!binlog_prealloc_inited)) {
+        init_binlog_prealloc(old_name); 
+     }
+    prealloc_binlog_with_newname(new_name_ptr); 
+  } else
+    use_binlog_prealloc = FALSE;
+#endif
   /*
      Note that at this point, log_state != LOG_CLOSED (important for is_open()).
   */
@@ -5156,6 +5279,9 @@ err:
         else
         {
           bool check_purge;
+#ifdef BINLOG_PREALLOC        
+          set_active_size(event_info->log_pos);
+#endif
           signal_update();
           error= rotate(false, &check_purge);
           mysql_mutex_unlock(&LOCK_log);
@@ -5540,6 +5666,9 @@ bool MYSQL_BIN_LOG::write_incident(THD *
     if (!error && !(error= flush_and_sync(0)))
     {
       bool check_purge= false;
+#ifdef BINLOG_PREALLOC        
+      set_active_size(ev.log_pos);
+#endif 
       signal_update();
       error= rotate(false, &check_purge);
       mysql_mutex_unlock(&LOCK_log);
@@ -5695,6 +5824,9 @@ void
 MYSQL_BIN_LOG::trx_group_commit_leader(group_commit_entry *leader)
 {
   DBUG_ENTER("MYSQL_BIN_LOG::trx_group_commit_leader");
+#ifdef BINLOG_PREALLOC
+  ulonglong last_actual_pos = 0;
+#endif
   uint xid_count= 0;
   uint write_count= 0;
   bool check_purge= false;
@@ -5753,6 +5885,9 @@ MYSQL_BIN_LOG::trx_group_commit_leader(g
       }
 
       cache_data->commit_bin_log_file_pos= my_b_write_tell(&log_file);
+#ifdef BINLOG_PREALLOC
+      last_actual_pos = cache_data->commit_bin_log_file_pos;
+#endif
       if (cache_data->using_xa && cache_data->xa_xid)
         xid_count++;
     }
@@ -5773,6 +5908,9 @@ MYSQL_BIN_LOG::trx_group_commit_leader(g
       }
       else
       {
+#ifdef BINLOG_PREALLOC     
+        set_active_size(last_actual_pos);
+#endif
         signal_update();
       }
 
@@ -6005,6 +6143,18 @@ void MYSQL_BIN_LOG::close(uint exiting)
         original position on system that doesn't support pwrite().
       */
       mysql_file_seek(log_file.file, org_position, MY_SEEK_SET, MYF(0));
+#ifdef BINLOG_PREALLOC
+      end_io_cache(&log_file);
+      DBUG_ASSERT(is_active(log_file_name));
+      mysql_mutex_assert_owner(&LOCK_log);
+      set_active_size(log_file.pos_in_file);
+      if (use_binlog_prealloc && my_chsize(log_file.file,
+                  log_file.pos_in_file, 0, MYF(MY_WME)))
+      {
+          write_error= 1;
+          sql_print_error(ER(ER_ERROR_ON_WRITE), name, errno);
+      }
+#endif
     }
 
     /* this will cleanup IO_CACHE, sync and close the file */
Index: a/sql/sys_vars.cc
===================================================================
--- a.orig/sql/sys_vars.cc
+++ a/sql/sys_vars.cc
@@ -3330,6 +3330,12 @@ static Sys_var_uint Sys_slave_net_timeou
        VALID_RANGE(1, LONG_TIMEOUT), DEFAULT(SLAVE_NET_TIMEOUT), BLOCK_SIZE(1),
        NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0),
        ON_UPDATE(fix_slave_net_timeout));
+#ifdef BINLOG_PREALLOC
+static Sys_var_ulong Sys_binlog_prealloc(
+       "binlog_prealloc", "default 0 , if binlog_prealloc >0, means prealloc binlog file",
+       GLOBAL_VAR(binlog_prealloc), CMD_LINE(REQUIRED_ARG),
+       VALID_RANGE(0,100), DEFAULT(0), BLOCK_SIZE(1));
+#endif
 
 static bool check_slave_skip_counter(sys_var *self, THD *thd, set_var *var)
 {
Index: a/sql/mysqld.h
===================================================================
--- a.orig/sql/mysqld.h
+++ a/sql/mysqld.h
@@ -238,7 +238,9 @@ extern I_List<THD> threads;
 extern char err_shared_dir[];
 extern TYPELIB thread_handling_typelib;
 extern my_decimal decimal_zero;
-
+#ifdef BINLOG_PREALLOC 
+extern ulong binlog_prealloc;
+#endif
 extern ulonglong opt_log_warnings_suppress;
 
 extern char* enforce_storage_engine;

欢迎试用,有任何问题请留言或邮件给我: yinfeng.zwx@taobao.com


评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值