fuse支持1M跟内核版本和libfuse的版本都有关系。
fuse内核修改
首先需要内核支持大块的IO。
内核版本的修改如下,按照commit的信息,应该是合入到4.20-rc1(4.18的不同版本这个修改是否合入需要对应的内核源码才能确认),github上看的commit提交日期为2018-10-1
github的commit地址如下:
https://github.com/torvalds/linux/commit/5da784cce4308ae10a79e3c8c41b13fb9568e4e0
path内容如下
From 5da784cce4308ae10a79e3c8c41b13fb9568e4e0 Mon Sep 17 00:00:00 2001
From: Constantine Shulyupin <const@MakeLinux.com>
Date: Thu, 6 Sep 2018 15:37:06 +0300
Subject: [PATCH] fuse: add max_pages to init_out
Replace FUSE_MAX_PAGES_PER_REQ with the configurable parameter max_pages to
improve performance.
Old RFC with detailed description of the problem and many fixes by Mitsuo
Hayasaka (mitsuo.hayasaka.hu@hitachi.com):
- https://lkml.org/lkml/2012/7/5/136
We've encountered performance degradation and fixed it on a big and complex
virtual environment.
Environment to reproduce degradation and improvement:
1. Add lag to user mode FUSE
Add nanosleep(&(struct timespec){ 0, 1000 }, NULL); to xmp_write_buf in
passthrough_fh.c
2. patch UM fuse with configurable max_pages parameter. The patch will be
provided latter.
3. run test script and perform test on tmpfs
fuse_test()
{
cd /tmp
mkdir -p fusemnt
passthrough_fh -o max_pages=$1 /tmp/fusemnt
grep fuse /proc/self/mounts
dd conv=fdatasync oflag=dsync if=/dev/zero of=fusemnt/tmp/tmp \
count=1K bs=1M 2>&1 | grep -v records
rm fusemnt/tmp/tmp
killall passthrough_fh
}
Test results:
passthrough_fh /tmp/fusemnt fuse.passthrough_fh \
rw,nosuid,nodev,relatime,user_id=0,group_id=0 0 0
1073741824 bytes (1.1 GB) copied, 1.73867 s, 618 MB/s
passthrough_fh /tmp/fusemnt fuse.passthrough_fh \
rw,nosuid,nodev,relatime,user_id=0,group_id=0,max_pages=256 0 0
1073741824 bytes (1.1 GB) copied, 1.15643 s, 928 MB/s
Obviously with bigger lag the difference between 'before' and 'after'
will be more significant.
Mitsuo Hayasaka, in 2012 (https://lkml.org/lkml/2012/7/5/136),
observed improvement from 400-550 to 520-740.
Signed-off-by: Constantine Shulyupin <const@MakeLinux.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
fs/fuse/dev.c | 5 ++--
fs/fuse/file.c | 59 ++++++++++++++++++++-------------------
fs/fuse/fuse_i.h | 10 +++++--
fs/fuse/inode.c | 8 +++++-
include/uapi/linux/fuse.h | 7 ++++-
5 files changed, 54 insertions(+), 35 deletions(-)
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index fefb9dd8a2f479..69d4df78a417c9 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -61,6 +61,7 @@ static struct fuse_req *__fuse_request_alloc(unsigned npages, gfp_t flags)
struct page **pages = NULL;
struct fuse_page_desc *page_descs = NULL;
+ WARN_ON(npages > FUSE_MAX_MAX_PAGES);
if (npages > FUSE_REQ_INLINE_PAGES) {
pages = kzalloc(npages * (sizeof(*pages) +
sizeof(*page_descs)), flags);
@@ -1674,7 +1675,7 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
unsigned int num;
unsigned int offset;
size_t total_len = 0;
- int num_pages;
+ unsigned int num_pages;
offset = outarg->offset & ~PAGE_MASK;
file_size = i_size_read(inode);
@@ -1686,7 +1687,7 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
num = file_size - outarg->offset;
num_pages = (num + offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
- num_pages = min(num_pages, FUSE_MAX_PAGES_PER_REQ);
+ num_pages = min(num_pages, fc->max_pages);
req = fuse_get_req(fc, num_pages);
if (IS_ERR(req))
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index b10d14baeb1f07..035843b501fe5b 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -850,11 +850,11 @@ static int fuse_readpages_fill(void *_data, struct page *page)
fuse_wait_on_page_writeback(inode, page->index);
if (req->num_pages &&
- (req->num_pages == FUSE_MAX_PAGES_PER_REQ ||
+ (req->num_pages == fc->max_pages ||
(req->num_pages + 1) * PAGE_SIZE > fc->max_read ||
req->pages[req->num_pages - 1]->index + 1 != page->index)) {
- int nr_alloc = min_t(unsigned, data->nr_pages,
- FUSE_MAX_PAGES_PER_REQ);
+ unsigned int nr_alloc = min_t(unsigned int, data->nr_pages,
+ fc->max_pages);
fuse_send_readpages(req, data->file);
if (fc->async_read)
req = fuse_get_req_for_background(fc, nr_alloc);
@@ -889,7 +889,7 @@ static int fuse_readpages(struct file *file, struct address_space *mapping,
struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_fill_data data;
int err;
- int nr_alloc = min_t(unsigned, nr_pages, FUSE_MAX_PAGES_PER_REQ);
+ unsigned int nr_alloc = min_t(unsigned int, nr_pages, fc->max_pages);
err = -EIO;
if (is_bad_inode(inode))
@@ -1104,12 +1104,13 @@ static ssize_t fuse_fill_write_pages(struct fuse_req *req,
return count > 0 ? count : err;
}
-static inline unsigned fuse_wr_pages(loff_t pos, size_t len)
+static inline unsigned int fuse_wr_pages(loff_t pos, size_t len,
+ unsigned int max_pages)
{
- return min_t(unsigned,
+ return min_t(unsigned int,
((pos + len - 1) >> PAGE_SHIFT) -
(pos >> PAGE_SHIFT) + 1,
- FUSE_MAX_PAGES_PER_REQ);
+ max_pages);
}
static ssize_t fuse_perform_write(struct kiocb *iocb,
@@ -1131,7 +1132,8 @@ static ssize_t fuse_perform_write(struct kiocb *iocb,
do {
struct fuse_req *req;
ssize_t count;
- unsigned nr_pages = fuse_wr_pages(pos, iov_iter_count(ii));
+ unsigned int nr_pages = fuse_wr_pages(pos, iov_iter_count(ii),
+ fc->max_pages);
req = fuse_get_req(fc, nr_pages);
if (IS_ERR(req)) {
@@ -1321,11 +1323,6 @@ static int fuse_get_user_pages(struct fuse_req *req, struct iov_iter *ii,
return ret < 0 ? ret : 0;
}
-static inline int fuse_iter_npages(const struct iov_iter *ii_p)
-{
- return iov_iter_npages(ii_p, FUSE_MAX_PAGES_PER_REQ);
-}
-
ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter,
loff_t *ppos, int flags)
{
@@ -1345,9 +1342,10 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter,
int err = 0;
if (io->async)
- req = fuse_get_req_for_background(fc, fuse_iter_npages(iter));
+ req = fuse_get_req_for_background(fc, iov_iter_npages(iter,
+ fc->max_pages));
else
- req = fuse_get_req(fc, fuse_iter_npages(iter));
+ req = fuse_get_req(fc, iov_iter_npages(iter, fc->max_pages));
if (IS_ERR(req))
return PTR_ERR(req);
@@ -1392,9 +1390,10 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter,
fuse_put_request(fc, req);
if (io->async)
req = fuse_get_req_for_background(fc,
- fuse_iter_npages(iter));
+ iov_iter_npages(iter, fc->max_pages));
else
- req = fuse_get_req(fc, fuse_iter_npages(iter));
+ req = fuse_get_req(fc, iov_iter_npages(iter,
+ fc->max_pages));
if (IS_ERR(req))
break;
}
@@ -1823,7 +1822,7 @@ static int fuse_writepages_fill(struct page *page,
is_writeback = fuse_page_is_writeback(inode, page->index);
if (req && req->num_pages &&
- (is_writeback || req->num_pages == FUSE_MAX_PAGES_PER_REQ ||
+ (is_writeback || req->num_pages == fc->max_pages ||
(req->num_pages + 1) * PAGE_SIZE > fc->max_write ||
data->orig_pages[req->num_pages - 1]->index + 1 != page->index)) {
fuse_writepages_send(data);
@@ -1851,7 +1850,7 @@ static int fuse_writepages_fill(struct page *page,
struct fuse_inode *fi = get_fuse_inode(inode);
err = -ENOMEM;
- req = fuse_request_alloc_nofs(FUSE_MAX_PAGES_PER_REQ);
+ req = fuse_request_alloc_nofs(fc->max_pages);
if (!req) {
__free_page(tmp_page);
goto out_unlock;
@@ -1908,6 +1907,7 @@ static int fuse_writepages(struct address_space *mapping,
struct writeback_control *wbc)
{
struct inode *inode = mapping->host;
+ struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_fill_wb_data data;
int err;
@@ -1920,7 +1920,7 @@ static int fuse_writepages(struct address_space *mapping,
data.ff = NULL;
err = -ENOMEM;
- data.orig_pages = kcalloc(FUSE_MAX_PAGES_PER_REQ,
+ data.orig_pages = kcalloc(fc->max_pages,
sizeof(struct page *),
GFP_NOFS);
if (!data.orig_pages)
@@ -2391,10 +2391,11 @@ static int fuse_copy_ioctl_iovec_old(struct iovec *dst, void *src,
}
/* Make sure iov_length() won't overflow */
-static int fuse_verify_ioctl_iov(struct iovec *iov, size_t count)
+static int fuse_verify_ioctl_iov(struct fuse_conn *fc, struct iovec *iov,
+ size_t count)
{
size_t n;
- u32 max = FUSE_MAX_PAGES_PER_REQ << PAGE_SHIFT;
+ u32 max = fc->max_pages << PAGE_SHIFT;
for (n = 0; n < count; n++, iov++) {
if (iov->iov_len > (size_t) max)
@@ -2518,7 +2519,7 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
BUILD_BUG_ON(sizeof(struct fuse_ioctl_iovec) * FUSE_IOCTL_MAX_IOV > PAGE_SIZE);
err = -ENOMEM;
- pages = kcalloc(FUSE_MAX_PAGES_PER_REQ, sizeof(pages[0]), GFP_KERNEL);
+ pages = kcalloc(fc->max_pages, sizeof(pages[0]), GFP_KERNEL);
iov_page = (struct iovec *) __get_free_page(GFP_KERNEL);
if (!pages || !iov_page)
goto out;
@@ -2557,7 +2558,7 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
/* make sure there are enough buffer pages and init request with them */
err = -ENOMEM;
- if (max_pages > FUSE_MAX_PAGES_PER_REQ)
+ if (max_pages > fc->max_pages)
goto out;
while (num_pages < max_pages) {
pages[num_pages] = alloc_page(GFP_KERNEL | __GFP_HIGHMEM);
@@ -2644,11 +2645,11 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
in_iov = iov_page;
out_iov = in_iov + in_iovs;
- err = fuse_verify_ioctl_iov(in_iov, in_iovs);
+ err = fuse_verify_ioctl_iov(fc, in_iov, in_iovs);
if (err)
goto out;
- err = fuse_verify_ioctl_iov(out_iov, out_iovs);
+ err = fuse_verify_ioctl_iov(fc, out_iov, out_iovs);
if (err)
goto out;
@@ -2839,9 +2840,9 @@ static void fuse_do_truncate(struct file *file)
fuse_do_setattr(file_dentry(file), &attr, file);
}
-static inline loff_t fuse_round_up(loff_t off)
+static inline loff_t fuse_round_up(struct fuse_conn *fc, loff_t off)
{
- return round_up(off, FUSE_MAX_PAGES_PER_REQ << PAGE_SHIFT);
+ return round_up(off, fc->max_pages << PAGE_SHIFT);
}
static ssize_t
@@ -2870,7 +2871,7 @@ fuse_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
if (async_dio && iov_iter_rw(iter) != WRITE && offset + count > i_size) {
if (offset >= i_size)
return 0;
- iov_iter_truncate(iter, fuse_round_up(i_size - offset));
+ iov_iter_truncate(iter, fuse_round_up(ff->fc, i_size - offset));
count = iov_iter_count(iter);
}
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index f5bdce84e76673..3d578745c85249 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -28,8 +28,11 @@
#include <linux/refcount.h>
#include <linux/user_namespace.h>
-/** Max number of pages that can be used in a single read request */
-#define FUSE_MAX_PAGES_PER_REQ 32
+/** Default max number of pages that can be used in a single read request */
+#define FUSE_DEFAULT_MAX_PAGES_PER_REQ 32
+
+/** Maximum of max_pages received in init_out */
+#define FUSE_MAX_MAX_PAGES 256
/** Bias for fi->writectr, meaning new writepages must not be sent */
#define FUSE_NOWRITE INT_MIN
@@ -525,6 +528,9 @@ struct fuse_conn {
/** Maximum write size */
unsigned max_write;
+ /** Maxmum number of pages that can be used in a single request */
+ unsigned int max_pages;
+
/** Input queue */
struct fuse_iqueue iq;
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 82db1ab5342095..8cebf4d5f51b3c 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -928,6 +928,11 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
}
if (arg->flags & FUSE_ABORT_ERROR)
fc->abort_err = 1;
+ if (arg->flags & FUSE_MAX_PAGES) {
+ fc->max_pages =
+ min_t(unsigned int, FUSE_MAX_MAX_PAGES,
+ max_t(unsigned int, arg->max_pages, 1));
+ }
} else {
ra_pages = fc->max_read / PAGE_SIZE;
fc->no_lock = 1;
@@ -959,7 +964,7 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req)
FUSE_DO_READDIRPLUS | FUSE_READDIRPLUS_AUTO | FUSE_ASYNC_DIO |
FUSE_WRITEBACK_CACHE | FUSE_NO_OPEN_SUPPORT |
FUSE_PARALLEL_DIROPS | FUSE_HANDLE_KILLPRIV | FUSE_POSIX_ACL |
- FUSE_ABORT_ERROR;
+ FUSE_ABORT_ERROR | FUSE_MAX_PAGES;
req->in.h.opcode = FUSE_INIT;
req->in.numargs = 1;
req->in.args[0].size = sizeof(*arg);
@@ -1152,6 +1157,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
fc->user_id = d.user_id;
fc->group_id = d.group_id;
fc->max_read = max_t(unsigned, 4096, d.max_read);
+ fc->max_pages = FUSE_DEFAULT_MAX_PAGES_PER_REQ;
/* Used by get_root_inode() */
sb->s_fs_info = fc;
diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h
index 31a504f1ee6052..76f46f159992c9 100644
--- a/include/uapi/linux/fuse.h
+++ b/include/uapi/linux/fuse.h
@@ -120,6 +120,7 @@
* 7.28
* - add FUSE_COPY_FILE_RANGE
* - add FOPEN_CACHE_DIR
+ * - add FUSE_MAX_PAGES, add max_pages to init_out
*/
#ifndef _LINUX_FUSE_H
@@ -255,6 +256,7 @@ struct fuse_file_lock {
* FUSE_HANDLE_KILLPRIV: fs handles killing suid/sgid/cap on write/chown/trunc
* FUSE_POSIX_ACL: filesystem supports posix acls
* FUSE_ABORT_ERROR: reading the device after abort returns ECONNABORTED
+ * FUSE_MAX_PAGES: init_out.max_pages contains the max number of req pages
*/
#define FUSE_ASYNC_READ (1 << 0)
#define FUSE_POSIX_LOCKS (1 << 1)
@@ -278,6 +280,7 @@ struct fuse_file_lock {
#define FUSE_HANDLE_KILLPRIV (1 << 19)
#define FUSE_POSIX_ACL (1 << 20)
#define FUSE_ABORT_ERROR (1 << 21)
+#define FUSE_MAX_PAGES (1 << 22)
/**
* CUSE INIT request/reply flags
@@ -617,7 +620,9 @@ struct fuse_init_out {
uint16_t congestion_threshold;
uint32_t max_write;
uint32_t time_gran;
- uint32_t unused[9];
+ uint16_t max_pages;
+ uint16_t padding;
+ uint32_t unused[8];
};
#define CUSE_INIT_INFO_MAX 4096
libfuse修改
libfuse的修改如下,这个修改是先需要内核版本支持后,才能生效。
按照commit的信息,应该是合入到fuse-3.6.0(目前ufs-client用的libfuse的静态库是支持这个修改的),github上看的commit提交日期为2019-6-13
github的commit地址如下:
https://github.com/libfuse/libfuse/commit/027d0d17c8a4605109f09d9c988e255b64a2c19a
path内容如下
From 027d0d17c8a4605109f09d9c988e255b64a2c19a Mon Sep 17 00:00:00 2001
From: scosu <mpargmann@allfex.org>
Date: Thu, 13 Jun 2019 13:59:10 +0200
Subject: [PATCH] fuse_lowlevel: Add max_pages support (#384)
Starting with kernel version 4.20 fuse supports a new property
'max_pages' which is the maximum number of pages that can be used per
request. This can be set via an argument during initialization.
This new property allows writes to be larger than 128k.
This patch sets the property if the matching capability is set
(FUSE_MAX_PAGES). It will also set max_write to 1MiB. Filesystems have
the possibility to decrease this size by setting max_write to a smaller
size. The max_pages and bufsize fields are adjusted accordingly.
Cc: Constantine Shulyupin <const@MakeLinux.com>
Signed-off-by: Markus Pargmann <scosu@quobyte.com>
---
ChangeLog.rst | 6 ++++++
lib/fuse_i.h | 6 ++++++
lib/fuse_lowlevel.c | 30 +++++++++++++++++++++---------
3 files changed, 33 insertions(+), 9 deletions(-)
diff --git a/ChangeLog.rst b/ChangeLog.rst
index 288577594..abbd3ee14 100644
--- a/ChangeLog.rst
+++ b/ChangeLog.rst
@@ -4,6 +4,12 @@ Unreleased Changes
* Added a new example (passthrough_hp). The functionality is similar
to passthrough_ll, but the implementation focuses on performance and
correctness rather than simplicity.
+* Added support for fuse kernel feature `max_pages` which allows to increase
+ the maximum number of pages that can be used per request. This feature was
+ introduced in kernel 4.20. `max_pages` is set based on the value in
+ `max_write`. By default `max_write` will be 1MiB now for kernels that support
+ `max_pages`. If you want smaller buffers or writes you have to set
+ `max_write` manually.
libfuse 3.5.0 (2019-04-16)
==========================
diff --git a/lib/fuse_i.h b/lib/fuse_i.h
index cf3555168..d38b630ac 100644
--- a/lib/fuse_i.h
+++ b/lib/fuse_i.h
@@ -131,3 +131,9 @@ struct fuse *fuse_new_31(struct fuse_args *args, const struct fuse_operations *o
int fuse_loop_mt_32(struct fuse *f, struct fuse_loop_config *config);
int fuse_session_loop_mt_32(struct fuse_session *se, struct fuse_loop_config *config);
+#define FUSE_MAX_MAX_PAGES 256
+#define FUSE_DEFAULT_MAX_PAGES_PER_REQ 32
+
+/* room needed in buffer to accommodate header */
+#define FUSE_BUFFER_HEADER_SIZE 0x1000
+
diff --git a/lib/fuse_lowlevel.c b/lib/fuse_lowlevel.c
index c96e21104..3684b8b30 100644
--- a/lib/fuse_lowlevel.c
+++ b/lib/fuse_lowlevel.c
@@ -1909,6 +1909,14 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid, const void *inarg)
se->conn.capable |= FUSE_CAP_HANDLE_KILLPRIV;
if (arg->flags & FUSE_NO_OPENDIR_SUPPORT)
se->conn.capable |= FUSE_CAP_NO_OPENDIR_SUPPORT;
+ if (!(arg->flags & FUSE_MAX_PAGES)) {
+ size_t max_bufsize =
+ FUSE_DEFAULT_MAX_PAGES_PER_REQ * getpagesize()
+ + FUSE_BUFFER_HEADER_SIZE;
+ if (bufsize > max_bufsize) {
+ bufsize = max_bufsize;
+ }
+ }
} else {
se->conn.max_readahead = 0;
}
@@ -1955,10 +1963,10 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid, const void *inarg)
bufsize);
bufsize = FUSE_MIN_READ_BUFFER;
}
+ se->bufsize = bufsize;
- bufsize -= 4096;
- if (bufsize < se->conn.max_write)
- se->conn.max_write = bufsize;
+ if (se->conn.max_write > bufsize - FUSE_BUFFER_HEADER_SIZE)
+ se->conn.max_write = bufsize - FUSE_BUFFER_HEADER_SIZE;
se->got_init = 1;
if (se->op.init)
@@ -1985,6 +1993,14 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid, const void *inarg)
return;
}
+ if (se->conn.max_write < bufsize - FUSE_BUFFER_HEADER_SIZE) {
+ se->bufsize = se->conn.max_write + FUSE_BUFFER_HEADER_SIZE;
+ }
+ if (arg->flags & FUSE_MAX_PAGES) {
+ outarg.flags |= FUSE_MAX_PAGES;
+ outarg.max_pages = (se->conn.max_write - 1) / getpagesize() + 1;
+ }
+
/* Always enable big writes, this is superseded
by the max_write option */
outarg.flags |= FUSE_BIG_WRITES;
@@ -2807,11 +2823,6 @@ int fuse_session_receive_buf_int(struct fuse_session *se, struct fuse_buf *buf,
return res;
}
-#define KERNEL_BUF_PAGES 32
-
-/* room needed in buffer to accommodate header */
-#define HEADER_SIZE 0x1000
-
struct fuse_session *fuse_session_new(struct fuse_args *args,
const struct fuse_lowlevel_ops *op,
size_t op_size, void *userdata)
@@ -2872,7 +2883,8 @@ struct fuse_session *fuse_session_new(struct fuse_args *args,
if (se->debug)
fprintf(stderr, "FUSE library version: %s\n", PACKAGE_VERSION);
- se->bufsize = KERNEL_BUF_PAGES * getpagesize() + HEADER_SIZE;
+ se->bufsize = FUSE_MAX_MAX_PAGES * getpagesize() +
+ FUSE_BUFFER_HEADER_SIZE;
list_init_req(&se->list);
list_init_req(&se->interrupts);