From de82b923012ff8790bcfff381eb3ca9069d00f49 Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Tue, 14 May 2013 11:25:39 -0400 Subject: [PATCH 1/6] fuse: allocate for_background dio requests based on io->async state Commit 8b41e671 introduced explicit background checking for fuse_req structures with BUG_ON() checks for the appropriate type of request in in the associated send functions. Commit bcba24cc introduced the ability to send dio requests as background requests but does not update the request allocation based on the type of I/O request. As a result, a BUG_ON() triggers in the fuse_request_send_background() background path if an async I/O is sent. Allocate a request based on the async state of the fuse_io_priv to avoid the BUG. Signed-off-by: Brian Foster Signed-off-by: Miklos Szeredi --- fs/fuse/file.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index d1c9b85b3f58..fe191325fefa 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1278,7 +1278,10 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, const struct iovec *iov, iov_iter_init(&ii, iov, nr_segs, count, 0); - req = fuse_get_req(fc, fuse_iter_npages(&ii)); + if (io->async) + req = fuse_get_req_for_background(fc, fuse_iter_npages(&ii)); + else + req = fuse_get_req(fc, fuse_iter_npages(&ii)); if (IS_ERR(req)) return PTR_ERR(req); @@ -1314,7 +1317,11 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, const struct iovec *iov, break; if (count) { fuse_put_request(fc, req); - req = fuse_get_req(fc, fuse_iter_npages(&ii)); + if (io->async) + req = fuse_get_req_for_background(fc, + fuse_iter_npages(&ii)); + else + req = fuse_get_req(fc, fuse_iter_npages(&ii)); if (IS_ERR(req)) break; } From 3634a6327815d39dd93e5c44a602daae91c66297 Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Fri, 17 May 2013 09:30:32 -0400 Subject: [PATCH 2/6] fuse: truncate pagecache range on hole punch fuse supports hole punch via the fallocate() FALLOC_FL_PUNCH_HOLE interface. When a hole punch is passed through, the page cache is not cleared and thus allows reading stale data from the cache. This is easily demonstrable (using FOPEN_KEEP_CACHE) by reading a smallish random data file into cache, punching a hole and creating a copy of the file. Drop caches or remount and observe that the original file no longer matches the file copied after the hole punch. The original file contains a zeroed range and the latter file contains stale data. Protect against writepage requests in progress and punch out the associated page cache range after a successful client fs hole punch. Signed-off-by: Brian Foster Signed-off-by: Miklos Szeredi --- fs/fuse/file.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index fe191325fefa..a200a2d80377 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -16,6 +16,7 @@ #include #include #include +#include static const struct file_operations fuse_direct_io_file_operations; @@ -2453,6 +2454,7 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset, loff_t length) { struct fuse_file *ff = file->private_data; + struct inode *inode = file->f_inode; struct fuse_conn *fc = ff->fc; struct fuse_req *req; struct fuse_fallocate_in inarg = { @@ -2466,9 +2468,16 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset, if (fc->no_fallocate) return -EOPNOTSUPP; + if (mode & FALLOC_FL_PUNCH_HOLE) { + mutex_lock(&inode->i_mutex); + fuse_set_nowrite(inode); + } + req = fuse_get_req_nopages(fc); - if (IS_ERR(req)) - return PTR_ERR(req); + if (IS_ERR(req)) { + err = PTR_ERR(req); + goto out; + } req->in.h.opcode = FUSE_FALLOCATE; req->in.h.nodeid = ff->nodeid; @@ -2483,6 +2492,15 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset, } fuse_put_request(fc, req); +out: + if (mode & FALLOC_FL_PUNCH_HOLE) { + if (!err) + truncate_pagecache_range(inode, offset, + offset + length - 1); + fuse_release_nowrite(inode); + mutex_unlock(&inode->i_mutex); + } + return err; } From bee6c307800bbb26ba1a855b1841c2f0c4b7622a Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Fri, 17 May 2013 15:27:34 -0400 Subject: [PATCH 3/6] fuse: update inode size and invalidate attributes on fallocate An fallocate request without FALLOC_FL_KEEP_SIZE set can extend the size of a file. Update the inode size after a successful fallocate. Also invalidate the inode attributes after a successful fallocate to ensure we pick up the latest attribute values (i.e., i_blocks). Signed-off-by: Brian Foster Signed-off-by: Miklos Szeredi --- fs/fuse/file.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index a200a2d80377..d9f467907791 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -2492,11 +2492,20 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset, } fuse_put_request(fc, req); + if (err) + goto out; + + /* we could have extended the file */ + if (!(mode & FALLOC_FL_KEEP_SIZE)) + fuse_write_update_size(inode, offset + length); + + if (mode & FALLOC_FL_PUNCH_HOLE) + truncate_pagecache_range(inode, offset, offset + length - 1); + + fuse_invalidate_attr(inode); + out: if (mode & FALLOC_FL_PUNCH_HOLE) { - if (!err) - truncate_pagecache_range(inode, offset, - offset + length - 1); fuse_release_nowrite(inode); mutex_unlock(&inode->i_mutex); } From 28420dad233520811c0e0860e7fb4975ed863fc4 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 3 Jun 2013 14:40:22 +0200 Subject: [PATCH 4/6] fuse: fix readdirplus Oops in fuse_dentry_revalidate Fix bug introduced by commit 4582a4ab2a "FUSE: Adapt readdirplus to application usage patterns". We need to check for a positive dentry; negative dentries are not added by readdirplus. Secondly we need to advise the use of readdirplus on the *parent*, otherwise the whole thing is useless. Thirdly all this is only relevant if "readdirplus_auto" mode is selected by the filesystem. We advise the use of readdirplus only if the dentry was still valid. If we had to redo the lookup then there was no use in doing the -plus version. Reported-by: Bernd Schubert Signed-off-by: Miklos Szeredi CC: Feng Shuo CC: stable@vger.kernel.org --- fs/fuse/dir.c | 12 +++++++++--- fs/fuse/inode.c | 7 ++++--- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 254df56b847b..f3f783dc4f75 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -180,6 +180,8 @@ u64 fuse_get_attr_version(struct fuse_conn *fc) static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags) { struct inode *inode; + struct dentry *parent; + struct fuse_conn *fc; inode = ACCESS_ONCE(entry->d_inode); if (inode && is_bad_inode(inode)) @@ -187,10 +189,8 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags) else if (fuse_dentry_time(entry) < get_jiffies_64()) { int err; struct fuse_entry_out outarg; - struct fuse_conn *fc; struct fuse_req *req; struct fuse_forget_link *forget; - struct dentry *parent; u64 attr_version; /* For negative dentries, always do a fresh lookup */ @@ -241,8 +241,14 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags) entry_attr_timeout(&outarg), attr_version); fuse_change_entry_timeout(entry, &outarg); + } else if (inode) { + fc = get_fuse_conn(inode); + if (fc->readdirplus_auto) { + parent = dget_parent(entry); + fuse_advise_use_readdirplus(parent->d_inode); + dput(parent); + } } - fuse_advise_use_readdirplus(inode); return 1; } diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 6201f81e4d3a..9a0cdde14a08 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -867,10 +867,11 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req) fc->dont_mask = 1; if (arg->flags & FUSE_AUTO_INVAL_DATA) fc->auto_inval_data = 1; - if (arg->flags & FUSE_DO_READDIRPLUS) + if (arg->flags & FUSE_DO_READDIRPLUS) { fc->do_readdirplus = 1; - if (arg->flags & FUSE_READDIRPLUS_AUTO) - fc->readdirplus_auto = 1; + if (arg->flags & FUSE_READDIRPLUS_AUTO) + fc->readdirplus_auto = 1; + } if (arg->flags & FUSE_ASYNC_DIO) fc->async_dio = 1; } else { From c9ecf989cc7626e9edf8abef79f64b909542129b Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Thu, 30 May 2013 15:35:50 -0400 Subject: [PATCH 5/6] fuse: return -EIOCBQUEUED from fuse_direct_IO() for all async requests If request submission fails for an async request (i.e., get_user_pages() returns -ERESTARTSYS), we currently skip the -EIOCBQUEUED return and drop into wait_for_sync_kiocb() forever. Avoid this by always returning -EIOCBQUEUED for async requests. If an error occurs, the error is passed into fuse_aio_complete(), returned via aio_complete() and thus propagated to userspace via io_getevents(). Signed-off-by: Brian Foster Reviewed-by: Maxim Patlasov Signed-off-by: Miklos Szeredi --- fs/fuse/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index d9f467907791..b3ad8d61a162 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -2432,7 +2432,7 @@ fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, fuse_aio_complete(io, ret < 0 ? ret : 0, -1); /* we have a non-extending, async request, so return */ - if (ret > 0 && !is_sync_kiocb(iocb)) + if (!is_sync_kiocb(iocb)) return -EIOCBQUEUED; ret = wait_on_sync_kiocb(iocb); From e5c5f05dca0cf90f0f3bb1aea85dcf658baff185 Mon Sep 17 00:00:00 2001 From: Maxim Patlasov Date: Thu, 30 May 2013 16:41:34 +0400 Subject: [PATCH 6/6] fuse: fix alignment in short read optimization for async_dio The bug was introduced with async_dio feature: trying to optimize short reads, we cut number-of-bytes-to-read to i_size boundary. Hence the following example: truncate --size=300 /mnt/file dd if=/mnt/file of=/dev/null iflag=direct led to FUSE_READ request of 300 bytes size. This turned out to be problem for userspace fuse implementations who rely on assumption that kernel fuse does not change alignment of request from client FS. The patch turns off the optimization if async_dio is disabled. And, if it's enabled, the patch fixes adjustment of number-of-bytes-to-read to preserve alignment. Note, that we cannot throw out short read optimization entirely because otherwise a direct read of a huge size issued on a tiny file would generate a huge amount of fuse requests and most of them would be ACKed by userspace with zero bytes read. Signed-off-by: Maxim Patlasov Reviewed-by: Brian Foster Signed-off-by: Miklos Szeredi --- fs/fuse/file.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index b3ad8d61a162..e570081f9f76 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -2373,6 +2373,11 @@ static void fuse_do_truncate(struct file *file) fuse_do_setattr(inode, &attr, file); } +static inline loff_t fuse_round_up(loff_t off) +{ + return round_up(off, FUSE_MAX_PAGES_PER_REQ << PAGE_SHIFT); +} + static ssize_t fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t offset, unsigned long nr_segs) @@ -2380,6 +2385,7 @@ fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, ssize_t ret = 0; struct file *file = iocb->ki_filp; struct fuse_file *ff = file->private_data; + bool async_dio = ff->fc->async_dio; loff_t pos = 0; struct inode *inode; loff_t i_size; @@ -2391,10 +2397,10 @@ fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, i_size = i_size_read(inode); /* optimization for short read */ - if (rw != WRITE && offset + count > i_size) { + if (async_dio && rw != WRITE && offset + count > i_size) { if (offset >= i_size) return 0; - count = i_size - offset; + count = min_t(loff_t, count, fuse_round_up(i_size - offset)); } io = kmalloc(sizeof(struct fuse_io_priv), GFP_KERNEL); @@ -2412,7 +2418,7 @@ fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, * By default, we want to optimize all I/Os with async request * submission to the client filesystem if supported. */ - io->async = ff->fc->async_dio; + io->async = async_dio; io->iocb = iocb; /* @@ -2420,7 +2426,7 @@ fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, * to wait on real async I/O requests, so we must submit this request * synchronously. */ - if (!is_sync_kiocb(iocb) && (offset + count > i_size)) + if (!is_sync_kiocb(iocb) && (offset + count > i_size) && rw == WRITE) io->async = false; if (rw == WRITE)