which xlog fsync method support O_DIRECT bypasses the kernel buffers

Postgres2015全国用户大会将于11月20至21日在北京丽亭华苑酒店召开。本次大会嘉宾阵容强大,国内顶级PostgreSQL数据库专家将悉数到场,并特邀欧洲、俄罗斯、日本、美国等国家和地区的数据库方面专家助阵:

  • Postgres-XC项目的发起人铃木市一(SUZUKI Koichi)
  • Postgres-XL的项目发起人Mason Sharp
  • pgpool的作者石井达夫(Tatsuo Ishii)
  • PG-Strom的作者海外浩平(Kaigai Kohei)
  • Greenplum研发总监姚延栋
  • 周正中(德哥), PostgreSQL中国用户会创始人之一
  • 汪洋,平安科技数据库技术部经理
  • ……


 
  • 2015年度PG大象会报名地址:http://postgres2015.eventdove.com/
  • PostgreSQL中国社区: http://postgres.cn/
  • PostgreSQL专业1群: 3336901(已满)
  • PostgreSQL专业2群: 100910388
  • PostgreSQL专业3群: 150657323



PostgreSQL WAL日志的fsync方法较多, 哪些方法支持O_DIRECT呢?
所有支持的fsync方法 : 

#wal_sync_method = fsync                # the default is the first option
                                        # supported by the operating system:
                                        #   open_datasync
                                        #   fdatasync (default on Linux)
                                        #   fsync
                                        #   fsync_writethrough
                                        #   open_sync


从代码来看,  只有 open_sync和 open_datasync支持 O_DIRECT , 旁路内核buffer, 但是要旁路内核BUFFER, 还需要符合一个条件.
未开启归档, 没有下游节点使用流复制协议来复制XLOG数据.
原因是开启归档或有下游流复制节点时, XLOG文件写入后还会被读出来, 所以O_DIRECT没有意义.

涉及代码如下 : 
1. src/include/access/xlogdefs.h

/*
 *      Because O_DIRECT bypasses the kernel buffers, and because we never
 *      read those buffers except during crash recovery or if wal_level != minimal,
 *      it is a win to use it in all cases where we sync on each write().  We could
 *      allow O_DIRECT with fsync(), but it is unclear if fsync() could process
 *      writes not buffered in the kernel.  Also, O_DIRECT is never enough to force
 *      data to the drives, it merely tries to bypass the kernel cache, so we still
 *      need O_SYNC/O_DSYNC.
 */
#ifdef O_DIRECT
#define PG_O_DIRECT                             O_DIRECT
#else
#define PG_O_DIRECT                             0
#endif

#if defined(O_SYNC)
#define OPEN_SYNC_FLAG          O_SYNC
#elif defined(O_FSYNC)
#define OPEN_SYNC_FLAG          O_FSYNC
#endif


2. src/backend/access/transam/xlog.c

/*
 * GUC support
 */
const struct config_enum_entry sync_method_options[] = {
        {"fsync", SYNC_METHOD_FSYNC, false},
#ifdef HAVE_FSYNC_WRITETHROUGH
        {"fsync_writethrough", SYNC_METHOD_FSYNC_WRITETHROUGH, false},
#endif
#ifdef HAVE_FDATASYNC
        {"fdatasync", SYNC_METHOD_FDATASYNC, false},
#endif
#ifdef OPEN_SYNC_FLAG
        {"open_sync", SYNC_METHOD_OPEN, false},
#endif
#ifdef OPEN_DATASYNC_FLAG
        {"open_datasync", SYNC_METHOD_OPEN_DSYNC, false},
#endif
        {NULL, 0, false}
};


/*
 * Return the (possible) sync flag used for opening a file, depending on the
 * value of the GUC wal_sync_method.
 */
static int
get_sync_bit(int method)
{
        int                     o_direct_flag = 0;

        /* If fsync is disabled, never open in sync mode */
        if (!enableFsync)
                return 0;

        /*
         * Optimize writes by bypassing kernel cache with O_DIRECT when using
         * O_SYNC/O_FSYNC and O_DSYNC.  But only if archiving and streaming are
         * disabled, otherwise the archive command or walsender process will read
         * the WAL soon after writing it, which is guaranteed to cause a physical
         * read if we bypassed the kernel cache. We also skip the
         * posix_fadvise(POSIX_FADV_DONTNEED) call in XLogFileClose() for the same
         * reason.
         *
         * Never use O_DIRECT in walreceiver process for similar reasons; the WAL
         * written by walreceiver is normally read by the startup process soon
         * after its written. Also, walreceiver performs unaligned writes, which
         * don't work with O_DIRECT, so it is required for correctness too.
         */
        if (!XLogIsNeeded() && !AmWalReceiverProcess())
                o_direct_flag = PG_O_DIRECT;

        switch (method)
        {
                        /*
                         * enum values for all sync options are defined even if they are
                         * not supported on the current platform.  But if not, they are
                         * not included in the enum option array, and therefore will never
                         * be seen here.
                         */
                case SYNC_METHOD_FSYNC:
                case SYNC_METHOD_FSYNC_WRITETHROUGH:
                case SYNC_METHOD_FDATASYNC:
                        return 0;
#ifdef OPEN_SYNC_FLAG
                case SYNC_METHOD_OPEN:
                        return OPEN_SYNC_FLAG | o_direct_flag;
#endif
#ifdef OPEN_DATASYNC_FLAG
                case SYNC_METHOD_OPEN_DSYNC:
                        return OPEN_DATASYNC_FLAG | o_direct_flag;
#endif
                default:
                        /* can't happen (unless we are out of sync with option array) */
                        elog(ERROR, "unrecognized wal_sync_method: %d", method);
                        return 0;                       /* silence warning */
        }
}

 
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值