From 5c791fe1e2a4f401f819065ea4fc0450849f1818 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 22 Oct 2021 17:03:01 +0200 Subject: [PATCH 01/23] fuse: make sure reclaim doesn't write the inode In writeback cache mode mtime/ctime updates are cached, and flushed to the server using the ->write_inode() callback. Closing the file will result in a dirty inode being immediately written, but in other cases the inode can remain dirty after all references are dropped. This result in the inode being written back from reclaim, which can deadlock on a regular allocation while the request is being served. The usual mechanisms (GFP_NOFS/PF_MEMALLOC*) don't work for FUSE, because serving a request involves unrelated userspace process(es). Instead do the same as for dirty pages: make sure the inode is written before the last reference is gone. - fallocate(2)/copy_file_range(2): these call file_update_time() or file_modified(), so flush the inode before returning from the call - unlink(2), link(2) and rename(2): these call fuse_update_ctime(), so flush the ctime directly from this helper Reported-by: chenguanyou Signed-off-by: Miklos Szeredi --- fs/fuse/dir.c | 8 ++++++++ fs/fuse/file.c | 15 +++++++++++++++ fs/fuse/fuse_i.h | 1 + fs/fuse/inode.c | 3 +++ 4 files changed, 27 insertions(+) diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index d9b977c0f38d..2798fbe8d001 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -738,11 +738,19 @@ static int fuse_symlink(struct user_namespace *mnt_userns, struct inode *dir, return create_new_entry(fm, &args, dir, entry, S_IFLNK); } +void fuse_flush_time_update(struct inode *inode) +{ + int err = sync_inode_metadata(inode, 1); + + mapping_set_error(inode->i_mapping, err); +} + void fuse_update_ctime(struct inode *inode) { if (!IS_NOCMTIME(inode)) { inode->i_ctime = current_time(inode); mark_inode_dirty_sync(inode); + fuse_flush_time_update(inode); } } diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 11404f8c21c7..5c5ed58d91a7 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1848,6 +1848,17 @@ int fuse_write_inode(struct inode *inode, struct writeback_control *wbc) struct fuse_file *ff; int err; + /* + * Inode is always written before the last reference is dropped and + * hence this should not be reached from reclaim. + * + * Writing back the inode from reclaim can deadlock if the request + * processing itself needs an allocation. Allocations triggering + * reclaim while serving a request can't be prevented, because it can + * involve any number of unrelated userspace processes. + */ + WARN_ON(wbc->for_reclaim); + ff = __fuse_write_file_get(fi); err = fuse_flush_times(inode, ff); if (ff) @@ -3002,6 +3013,8 @@ out: if (lock_inode) inode_unlock(inode); + fuse_flush_time_update(inode); + return err; } @@ -3111,6 +3124,8 @@ out: inode_unlock(inode_out); file_accessed(file_in); + fuse_flush_time_update(inode_out); + return err; } diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index f55f9f94b1a4..a59e36c7deae 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -1148,6 +1148,7 @@ int fuse_allow_current_process(struct fuse_conn *fc); u64 fuse_lock_owner_id(struct fuse_conn *fc, fl_owner_t id); +void fuse_flush_time_update(struct inode *inode); void fuse_update_ctime(struct inode *inode); int fuse_update_attributes(struct inode *inode, struct file *file); diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 12d49a1914e8..2f999d38c9b4 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -118,6 +118,9 @@ static void fuse_evict_inode(struct inode *inode) { struct fuse_inode *fi = get_fuse_inode(inode); + /* Will write inode on close/munmap and in all other dirtiers */ + WARN_ON(inode->i_state & I_DIRTY_INODE); + truncate_inode_pages_final(&inode->i_data); clear_inode(inode); if (inode->i_sb->s_flags & SB_ACTIVE) { From 36ea23374d1f7b6a9d96a2b61d38830fdf23e45d Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 22 Oct 2021 17:03:01 +0200 Subject: [PATCH 02/23] fuse: write inode in fuse_vma_close() instead of fuse_release() Fuse ->release() is otherwise asynchronous for the reason that it can happen in contexts unrelated to close/munmap. Inode is already written back from fuse_flush(). Add it to fuse_vma_close() as well to make sure inode dirtying from mmaps also get written out before the file is released. Also add error handling. Signed-off-by: Miklos Szeredi --- fs/fuse/file.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 5c5ed58d91a7..01d9b3272015 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -339,12 +339,6 @@ static int fuse_open(struct inode *inode, struct file *file) static int fuse_release(struct inode *inode, struct file *file) { - struct fuse_conn *fc = get_fuse_conn(inode); - - /* see fuse_vma_close() for !writeback_cache case */ - if (fc->writeback_cache) - write_inode_now(inode, 1); - fuse_release_common(file, false); /* return value is ignored by VFS */ @@ -2351,12 +2345,15 @@ static int fuse_launder_page(struct page *page) } /* - * Write back dirty pages now, because there may not be any suitable - * open files later + * Write back dirty data/metadata now (there may not be any suitable + * open files later for data) */ static void fuse_vma_close(struct vm_area_struct *vma) { - filemap_write_and_wait(vma->vm_file->f_mapping); + int err; + + err = write_inode_now(vma->vm_file->f_mapping->host, 1); + mapping_set_error(vma->vm_file->f_mapping, err); } /* From bda9a71980e083699a0360963c0135657b73f47a Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 22 Oct 2021 17:03:01 +0200 Subject: [PATCH 03/23] fuse: annotate lock in fuse_reverse_inval_entry() Add missing inode lock annotatation; found by syzbot. Reported-and-tested-by: syzbot+9f747458f5990eaa8d43@syzkaller.appspotmail.com Signed-off-by: Miklos Szeredi --- fs/fuse/dir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 2798fbe8d001..80a2181b402b 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -1079,7 +1079,7 @@ int fuse_reverse_inval_entry(struct fuse_conn *fc, u64 parent_nodeid, if (!parent) return -ENOENT; - inode_lock(parent); + inode_lock_nested(parent, I_MUTEX_PARENT); if (!S_ISDIR(parent->i_mode)) goto unlock; From 5fe0fc9f1de63de748b87f121c038d39192a271d Mon Sep 17 00:00:00 2001 From: Peng Hao Date: Wed, 8 Sep 2021 16:38:28 +0800 Subject: [PATCH 04/23] fuse: use kmap_local_page() Due to the introduction of kmap_local_*, the storage of slots used for short-term mapping has changed from per-CPU to per-thread. kmap_atomic() disable preemption, while kmap_local_*() only disable migration. There is no need to disable preemption in several kamp_atomic places used in fuse. Link: https://lwn.net/Articles/836144/ Signed-off-by: Peng Hao Signed-off-by: Miklos Szeredi --- fs/fuse/dev.c | 8 ++++---- fs/fuse/ioctl.c | 4 ++-- fs/fuse/readdir.c | 4 ++-- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index dde341a6388a..491c092d427b 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -756,7 +756,7 @@ static int fuse_copy_do(struct fuse_copy_state *cs, void **val, unsigned *size) { unsigned ncpy = min(*size, cs->len); if (val) { - void *pgaddr = kmap_atomic(cs->pg); + void *pgaddr = kmap_local_page(cs->pg); void *buf = pgaddr + cs->offset; if (cs->write) @@ -764,7 +764,7 @@ static int fuse_copy_do(struct fuse_copy_state *cs, void **val, unsigned *size) else memcpy(*val, buf, ncpy); - kunmap_atomic(pgaddr); + kunmap_local(pgaddr); *val += ncpy; } *size -= ncpy; @@ -949,10 +949,10 @@ static int fuse_copy_page(struct fuse_copy_state *cs, struct page **pagep, } } if (page) { - void *mapaddr = kmap_atomic(page); + void *mapaddr = kmap_local_page(page); void *buf = mapaddr + offset; offset += fuse_copy_do(cs, &buf, &count); - kunmap_atomic(mapaddr); + kunmap_local(mapaddr); } else offset += fuse_copy_do(cs, NULL, &count); } diff --git a/fs/fuse/ioctl.c b/fs/fuse/ioctl.c index 546ea3d58fb4..fbc09dab1f85 100644 --- a/fs/fuse/ioctl.c +++ b/fs/fuse/ioctl.c @@ -286,11 +286,11 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg, in_iovs + out_iovs > FUSE_IOCTL_MAX_IOV) goto out; - vaddr = kmap_atomic(ap.pages[0]); + vaddr = kmap_local_page(ap.pages[0]); err = fuse_copy_ioctl_iovec(fm->fc, iov_page, vaddr, transferred, in_iovs + out_iovs, (flags & FUSE_IOCTL_COMPAT) != 0); - kunmap_atomic(vaddr); + kunmap_local(vaddr); if (err) goto out; diff --git a/fs/fuse/readdir.c b/fs/fuse/readdir.c index bc267832310c..e38ac983435a 100644 --- a/fs/fuse/readdir.c +++ b/fs/fuse/readdir.c @@ -76,11 +76,11 @@ static void fuse_add_dirent_to_cache(struct file *file, WARN_ON(fi->rdc.pos != pos)) goto unlock; - addr = kmap_atomic(page); + addr = kmap_local_page(page); if (!offset) clear_page(addr); memcpy(addr + offset, dirent, reclen); - kunmap_atomic(addr); + kunmap_local(addr); fi->rdc.size = (index << PAGE_SHIFT) + offset + reclen; fi->rdc.pos = dirent->off; unlock: From b5d9758297858288f1d8cd9b24a4e2f899f169e0 Mon Sep 17 00:00:00 2001 From: Peng Hao Date: Mon, 6 Sep 2021 20:31:48 +0800 Subject: [PATCH 05/23] fuse: delete redundant code 'ia->io=io' has been set in fuse_io_alloc. Signed-off-by: Peng Hao Signed-off-by: Miklos Szeredi --- fs/fuse/file.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 01d9b3272015..be13f695e1d6 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1445,7 +1445,6 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter, if (!ia) return -ENOMEM; - ia->io = io; if (!cuse && fuse_range_is_writeback(inode, idx_from, idx_to)) { if (!write) inode_lock(inode); From 371e8fd02969383204b1f6023451125dbc20dfbd Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 22 Oct 2021 17:03:01 +0200 Subject: [PATCH 06/23] fuse: move fuse_invalidate_attr() into fuse_update_ctime() Logically it belongs there since attributes are invalidated due to the updated ctime. This is a cleanup and should not change behavior. Signed-off-by: Miklos Szeredi --- fs/fuse/dir.c | 9 ++------- fs/fuse/xattr.c | 10 ++++------ 2 files changed, 6 insertions(+), 13 deletions(-) diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 80a2181b402b..1b345ab48fd0 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -747,6 +747,7 @@ void fuse_flush_time_update(struct inode *inode) void fuse_update_ctime(struct inode *inode) { + fuse_invalidate_attr(inode); if (!IS_NOCMTIME(inode)) { inode->i_ctime = current_time(inode); mark_inode_dirty_sync(inode); @@ -784,7 +785,6 @@ static int fuse_unlink(struct inode *dir, struct dentry *entry) if (inode->i_nlink > 0) drop_nlink(inode); spin_unlock(&fi->lock); - fuse_invalidate_attr(inode); fuse_dir_changed(dir); fuse_invalidate_entry_cache(entry); fuse_update_ctime(inode); @@ -841,13 +841,10 @@ static int fuse_rename_common(struct inode *olddir, struct dentry *oldent, err = fuse_simple_request(fm, &args); if (!err) { /* ctime changes */ - fuse_invalidate_attr(d_inode(oldent)); fuse_update_ctime(d_inode(oldent)); - if (flags & RENAME_EXCHANGE) { - fuse_invalidate_attr(d_inode(newent)); + if (flags & RENAME_EXCHANGE) fuse_update_ctime(d_inode(newent)); - } fuse_dir_changed(olddir); if (olddir != newdir) @@ -855,7 +852,6 @@ static int fuse_rename_common(struct inode *olddir, struct dentry *oldent, /* newent will end up negative */ if (!(flags & RENAME_EXCHANGE) && d_really_is_positive(newent)) { - fuse_invalidate_attr(d_inode(newent)); fuse_invalidate_entry_cache(newent); fuse_update_ctime(d_inode(newent)); } @@ -938,7 +934,6 @@ static int fuse_link(struct dentry *entry, struct inode *newdir, if (likely(inode->i_nlink < UINT_MAX)) inc_nlink(inode); spin_unlock(&fi->lock); - fuse_invalidate_attr(inode); fuse_update_ctime(inode); } else if (err == -EINTR) { fuse_invalidate_attr(inode); diff --git a/fs/fuse/xattr.c b/fs/fuse/xattr.c index 61dfaf7b7d20..0d3e7177fce0 100644 --- a/fs/fuse/xattr.c +++ b/fs/fuse/xattr.c @@ -42,10 +42,9 @@ int fuse_setxattr(struct inode *inode, const char *name, const void *value, fm->fc->no_setxattr = 1; err = -EOPNOTSUPP; } - if (!err) { - fuse_invalidate_attr(inode); + if (!err) fuse_update_ctime(inode); - } + return err; } @@ -173,10 +172,9 @@ int fuse_removexattr(struct inode *inode, const char *name) fm->fc->no_removexattr = 1; err = -EOPNOTSUPP; } - if (!err) { - fuse_invalidate_attr(inode); + if (!err) fuse_update_ctime(inode); - } + return err; } From 84840efc3c0f225ee5597e0a013e9da03afc73c6 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 22 Oct 2021 17:03:02 +0200 Subject: [PATCH 07/23] fuse: simplify __fuse_write_file_get() Use list_first_entry_or_null() instead of list_empty() + list_entry(). Signed-off-by: Miklos Szeredi --- fs/fuse/file.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index be13f695e1d6..31266ca9c1f2 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1815,14 +1815,13 @@ static void fuse_writepage_end(struct fuse_mount *fm, struct fuse_args *args, static struct fuse_file *__fuse_write_file_get(struct fuse_inode *fi) { - struct fuse_file *ff = NULL; + struct fuse_file *ff; spin_lock(&fi->lock); - if (!list_empty(&fi->write_files)) { - ff = list_entry(fi->write_files.next, struct fuse_file, - write_entry); + ff = list_first_entry_or_null(&fi->write_files, struct fuse_file, + write_entry); + if (ff) fuse_file_get(ff); - } spin_unlock(&fi->lock); return ff; From cefd1b83275d4c587bdeb2fe7aed07908642f875 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 22 Oct 2021 17:03:02 +0200 Subject: [PATCH 08/23] fuse: decrement nlink on overwriting rename Rename didn't decrement/clear nlink on overwritten target inode. Create a common helper fuse_entry_unlinked() that handles this for unlink, rmdir and rename. Signed-off-by: Miklos Szeredi --- fs/fuse/dir.c | 49 +++++++++++++++++++++++++++---------------------- 1 file changed, 27 insertions(+), 22 deletions(-) diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 1b345ab48fd0..dd83b5d43fe5 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -755,6 +755,29 @@ void fuse_update_ctime(struct inode *inode) } } +static void fuse_entry_unlinked(struct dentry *entry) +{ + struct inode *inode = d_inode(entry); + struct fuse_conn *fc = get_fuse_conn(inode); + struct fuse_inode *fi = get_fuse_inode(inode); + + spin_lock(&fi->lock); + fi->attr_version = atomic64_inc_return(&fc->attr_version); + /* + * If i_nlink == 0 then unlink doesn't make sense, yet this can + * happen if userspace filesystem is careless. It would be + * difficult to enforce correct nlink usage so just ignore this + * condition here + */ + if (S_ISDIR(inode->i_mode)) + clear_nlink(inode); + else if (inode->i_nlink > 0) + drop_nlink(inode); + spin_unlock(&fi->lock); + fuse_invalidate_entry_cache(entry); + fuse_update_ctime(inode); +} + static int fuse_unlink(struct inode *dir, struct dentry *entry) { int err; @@ -771,23 +794,8 @@ static int fuse_unlink(struct inode *dir, struct dentry *entry) args.in_args[0].value = entry->d_name.name; err = fuse_simple_request(fm, &args); if (!err) { - struct inode *inode = d_inode(entry); - struct fuse_inode *fi = get_fuse_inode(inode); - - spin_lock(&fi->lock); - fi->attr_version = atomic64_inc_return(&fm->fc->attr_version); - /* - * If i_nlink == 0 then unlink doesn't make sense, yet this can - * happen if userspace filesystem is careless. It would be - * difficult to enforce correct nlink usage so just ignore this - * condition here - */ - if (inode->i_nlink > 0) - drop_nlink(inode); - spin_unlock(&fi->lock); fuse_dir_changed(dir); - fuse_invalidate_entry_cache(entry); - fuse_update_ctime(inode); + fuse_entry_unlinked(entry); } else if (err == -EINTR) fuse_invalidate_entry(entry); return err; @@ -809,9 +817,8 @@ static int fuse_rmdir(struct inode *dir, struct dentry *entry) args.in_args[0].value = entry->d_name.name; err = fuse_simple_request(fm, &args); if (!err) { - clear_nlink(d_inode(entry)); fuse_dir_changed(dir); - fuse_invalidate_entry_cache(entry); + fuse_entry_unlinked(entry); } else if (err == -EINTR) fuse_invalidate_entry(entry); return err; @@ -851,10 +858,8 @@ static int fuse_rename_common(struct inode *olddir, struct dentry *oldent, fuse_dir_changed(newdir); /* newent will end up negative */ - if (!(flags & RENAME_EXCHANGE) && d_really_is_positive(newent)) { - fuse_invalidate_entry_cache(newent); - fuse_update_ctime(d_inode(newent)); - } + if (!(flags & RENAME_EXCHANGE) && d_really_is_positive(newent)) + fuse_entry_unlinked(newent); } else if (err == -EINTR) { /* If request was interrupted, DEITY only knows if the rename actually took place. If the invalidation From 97f044f690bac2b094bfb7fb2d177ef946c85880 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 22 Oct 2021 17:03:02 +0200 Subject: [PATCH 09/23] fuse: don't increment nlink in link() The fuse_iget() call in create_new_entry() already updated the inode with all the new attributes and incremented the attribute version. Incrementing the nlink will result in the wrong count. This wasn't noticed because the attributes were invalidated right after this. Updating ctime is still needed for the writeback case when the ctime is not refreshed. Signed-off-by: Miklos Szeredi --- fs/fuse/dir.c | 30 +++++++++++------------------- 1 file changed, 11 insertions(+), 19 deletions(-) diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index dd83b5d43fe5..26587affaf2c 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -745,9 +745,8 @@ void fuse_flush_time_update(struct inode *inode) mapping_set_error(inode->i_mapping, err); } -void fuse_update_ctime(struct inode *inode) +static void fuse_update_ctime_in_cache(struct inode *inode) { - fuse_invalidate_attr(inode); if (!IS_NOCMTIME(inode)) { inode->i_ctime = current_time(inode); mark_inode_dirty_sync(inode); @@ -755,6 +754,12 @@ void fuse_update_ctime(struct inode *inode) } } +void fuse_update_ctime(struct inode *inode) +{ + fuse_invalidate_attr(inode); + fuse_update_ctime_in_cache(inode); +} + static void fuse_entry_unlinked(struct dentry *entry) { struct inode *inode = d_inode(entry); @@ -925,24 +930,11 @@ static int fuse_link(struct dentry *entry, struct inode *newdir, args.in_args[1].size = newent->d_name.len + 1; args.in_args[1].value = newent->d_name.name; err = create_new_entry(fm, &args, newdir, newent, inode->i_mode); - /* Contrary to "normal" filesystems it can happen that link - makes two "logical" inodes point to the same "physical" - inode. We invalidate the attributes of the old one, so it - will reflect changes in the backing inode (link count, - etc.) - */ - if (!err) { - struct fuse_inode *fi = get_fuse_inode(inode); - - spin_lock(&fi->lock); - fi->attr_version = atomic64_inc_return(&fm->fc->attr_version); - if (likely(inode->i_nlink < UINT_MAX)) - inc_nlink(inode); - spin_unlock(&fi->lock); - fuse_update_ctime(inode); - } else if (err == -EINTR) { + if (!err) + fuse_update_ctime_in_cache(inode); + else if (err == -EINTR) fuse_invalidate_attr(inode); - } + return err; } From fa5eee57e33e79b71b40e6950c29cc46f5cc5cb7 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 22 Oct 2021 17:03:02 +0200 Subject: [PATCH 10/23] fuse: selective attribute invalidation Only invalidate attributes that the operation might have changed. Introduce two constants for common combinations of changed attributes: FUSE_STATX_MODIFY: file contents are modified but not size FUSE_STATX_MODSIZE: size and/or file contents modified Signed-off-by: Miklos Szeredi --- fs/fuse/dax.c | 2 +- fs/fuse/dir.c | 4 ++-- fs/fuse/file.c | 16 ++++++++-------- fs/fuse/fuse_i.h | 8 ++++++++ 4 files changed, 19 insertions(+), 11 deletions(-) diff --git a/fs/fuse/dax.c b/fs/fuse/dax.c index 281d79f8b3d3..89bf96f4fb10 100644 --- a/fs/fuse/dax.c +++ b/fs/fuse/dax.c @@ -735,7 +735,7 @@ static ssize_t fuse_dax_direct_write(struct kiocb *iocb, struct iov_iter *from) if (ret < 0) return ret; - fuse_invalidate_attr(inode); + fuse_invalidate_attr_mask(inode, FUSE_STATX_MODSIZE); fuse_write_update_size(inode, iocb->ki_pos); return ret; } diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 26587affaf2c..8315ca75d657 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -116,7 +116,7 @@ u64 entry_attr_timeout(struct fuse_entry_out *o) return time_to_jiffies(o->attr_valid, o->attr_valid_nsec); } -static void fuse_invalidate_attr_mask(struct inode *inode, u32 mask) +void fuse_invalidate_attr_mask(struct inode *inode, u32 mask) { set_mask_bits(&get_fuse_inode(inode)->inval_mask, 0, mask); } @@ -756,7 +756,7 @@ static void fuse_update_ctime_in_cache(struct inode *inode) void fuse_update_ctime(struct inode *inode) { - fuse_invalidate_attr(inode); + fuse_invalidate_attr_mask(inode, STATX_CTIME); fuse_update_ctime_in_cache(inode); } diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 31266ca9c1f2..06967d8e2742 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -211,7 +211,7 @@ void fuse_finish_open(struct inode *inode, struct file *file) i_size_write(inode, 0); spin_unlock(&fi->lock); truncate_pagecache(inode, 0); - fuse_invalidate_attr(inode); + fuse_invalidate_attr_mask(inode, FUSE_STATX_MODSIZE); if (fc->writeback_cache) file_update_time(file); } else if (!(ff->open_flags & FOPEN_KEEP_CACHE)) { @@ -515,7 +515,7 @@ inval_attr_out: * enabled, i_blocks from cached attr may not be accurate. */ if (!err && fm->fc->writeback_cache) - fuse_invalidate_attr(inode); + fuse_invalidate_attr_mask(inode, STATX_BLOCKS); return err; } @@ -1266,7 +1266,7 @@ static ssize_t fuse_perform_write(struct kiocb *iocb, fuse_write_update_size(inode, pos); clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state); - fuse_invalidate_attr(inode); + fuse_invalidate_attr_mask(inode, FUSE_STATX_MODSIZE); return res > 0 ? res : err; } @@ -1556,7 +1556,7 @@ static ssize_t fuse_direct_write_iter(struct kiocb *iocb, struct iov_iter *from) FUSE_DIO_WRITE); } } - fuse_invalidate_attr(inode); + fuse_invalidate_attr_mask(inode, FUSE_STATX_MODSIZE); if (res > 0) fuse_write_update_size(inode, iocb->ki_pos); inode_unlock(inode); @@ -1769,7 +1769,7 @@ static void fuse_writepage_end(struct fuse_mount *fm, struct fuse_args *args, * is enabled, we trust local ctime/mtime. */ if (!fc->writeback_cache) - fuse_invalidate_attr(inode); + fuse_invalidate_attr_mask(inode, FUSE_STATX_MODIFY); spin_lock(&fi->lock); rb_erase(&wpa->writepages_entry, &fi->writepages); while (wpa->next) { @@ -2875,7 +2875,7 @@ fuse_direct_IO(struct kiocb *iocb, struct iov_iter *iter) if (iov_iter_rw(iter) == WRITE) { ret = fuse_direct_io(io, iter, &pos, FUSE_DIO_WRITE); - fuse_invalidate_attr(inode); + fuse_invalidate_attr_mask(inode, FUSE_STATX_MODSIZE); } else { ret = __fuse_direct_read(io, iter, &pos); } @@ -2996,7 +2996,7 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset, if (mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE)) truncate_pagecache_range(inode, offset, offset + length - 1); - fuse_invalidate_attr(inode); + fuse_invalidate_attr_mask(inode, FUSE_STATX_MODSIZE); out: if (!(mode & FALLOC_FL_KEEP_SIZE)) @@ -3109,7 +3109,7 @@ static ssize_t __fuse_copy_file_range(struct file *file_in, loff_t pos_in, file_update_time(file_out); } - fuse_invalidate_attr(inode_out); + fuse_invalidate_attr_mask(inode_out, FUSE_STATX_MODSIZE); err = outarg.size; out: diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index a59e36c7deae..4f2f658fe5f1 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -1065,7 +1065,15 @@ void fuse_wait_aborted(struct fuse_conn *fc); /** * Invalidate inode attributes */ + +/* Attributes possibly changed on data modification */ +#define FUSE_STATX_MODIFY (STATX_MTIME | STATX_CTIME | STATX_BLOCKS) + +/* Attributes possibly changed on data and/or size modification */ +#define FUSE_STATX_MODSIZE (FUSE_STATX_MODIFY | STATX_SIZE) + void fuse_invalidate_attr(struct inode *inode); +void fuse_invalidate_attr_mask(struct inode *inode, u32 mask); void fuse_invalidate_entry_cache(struct dentry *entry); From 8c56e03d2e08d83776c89e4b6563ca8cfdf7da54 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 22 Oct 2021 17:03:02 +0200 Subject: [PATCH 11/23] fuse: don't bump attr_version in cached write The attribute version in fuse_inode should be updated whenever the attributes might have changed on the server. In case of cached writes this is not the case, so updating the attr_version is unnecessary and could possibly affect performance. Open code the remaining part of fuse_write_update_size(). Signed-off-by: Miklos Szeredi --- fs/fuse/file.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 06967d8e2742..c09ab835821a 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -2309,15 +2309,18 @@ static int fuse_write_end(struct file *file, struct address_space *mapping, if (!copied) goto unlock; + pos += copied; if (!PageUptodate(page)) { /* Zero any unwritten bytes at the end of the page */ - size_t endoff = (pos + copied) & ~PAGE_MASK; + size_t endoff = pos & ~PAGE_MASK; if (endoff) zero_user_segment(page, endoff, PAGE_SIZE); SetPageUptodate(page); } - fuse_write_update_size(inode, pos + copied); + if (pos > inode->i_size) + i_size_write(inode, pos); + set_page_dirty(page); unlock: From 27ae449ba26eb6c1cd217fa28339841c55bc79e1 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 22 Oct 2021 17:03:02 +0200 Subject: [PATCH 12/23] fuse: rename fuse_write_update_size() This function already updates the attr_version in fuse_inode, regardless of whether the size was changed or not. Rename the helper to fuse_write_update_attr() to reflect the more generic nature. Signed-off-by: Miklos Szeredi --- fs/fuse/dax.c | 2 +- fs/fuse/dev.c | 2 +- fs/fuse/file.c | 12 ++++++------ fs/fuse/fuse_i.h | 2 +- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/fs/fuse/dax.c b/fs/fuse/dax.c index 89bf96f4fb10..a73bae6497b0 100644 --- a/fs/fuse/dax.c +++ b/fs/fuse/dax.c @@ -736,7 +736,7 @@ static ssize_t fuse_dax_direct_write(struct kiocb *iocb, struct iov_iter *from) return ret; fuse_invalidate_attr_mask(inode, FUSE_STATX_MODSIZE); - fuse_write_update_size(inode, iocb->ki_pos); + fuse_write_update_attr(inode, iocb->ki_pos); return ret; } diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 491c092d427b..75ae30d568ea 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -1591,7 +1591,7 @@ static int fuse_notify_store(struct fuse_conn *fc, unsigned int size, end = outarg.offset + outarg.size; if (end > file_size) { file_size = end; - fuse_write_update_size(inode, file_size); + fuse_write_update_attr(inode, file_size); } num = outarg.size; diff --git a/fs/fuse/file.c b/fs/fuse/file.c index c09ab835821a..b5f37b8df0e0 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1066,7 +1066,7 @@ static ssize_t fuse_send_write(struct fuse_io_args *ia, loff_t pos, return err ?: ia->write.out.size; } -bool fuse_write_update_size(struct inode *inode, loff_t pos) +bool fuse_write_update_attr(struct inode *inode, loff_t pos) { struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_inode *fi = get_fuse_inode(inode); @@ -1263,7 +1263,7 @@ static ssize_t fuse_perform_write(struct kiocb *iocb, } while (!err && iov_iter_count(ii)); if (res > 0) - fuse_write_update_size(inode, pos); + fuse_write_update_attr(inode, pos); clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state); fuse_invalidate_attr_mask(inode, FUSE_STATX_MODSIZE); @@ -1558,7 +1558,7 @@ static ssize_t fuse_direct_write_iter(struct kiocb *iocb, struct iov_iter *from) } fuse_invalidate_attr_mask(inode, FUSE_STATX_MODSIZE); if (res > 0) - fuse_write_update_size(inode, iocb->ki_pos); + fuse_write_update_attr(inode, iocb->ki_pos); inode_unlock(inode); return res; @@ -2901,7 +2901,7 @@ fuse_direct_IO(struct kiocb *iocb, struct iov_iter *iter) if (iov_iter_rw(iter) == WRITE) { if (ret > 0) - fuse_write_update_size(inode, pos); + fuse_write_update_attr(inode, pos); else if (ret < 0 && offset + count > i_size) fuse_do_truncate(file); } @@ -2990,7 +2990,7 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset, /* we could have extended the file */ if (!(mode & FALLOC_FL_KEEP_SIZE)) { - bool changed = fuse_write_update_size(inode, offset + length); + bool changed = fuse_write_update_attr(inode, offset + length); if (changed && fm->fc->writeback_cache) file_update_time(file); @@ -3108,7 +3108,7 @@ static ssize_t __fuse_copy_file_range(struct file *file_in, loff_t pos_in, ALIGN(pos_out + outarg.size, PAGE_SIZE) - 1); if (fc->writeback_cache) { - fuse_write_update_size(inode_out, pos_out + outarg.size); + fuse_write_update_attr(inode_out, pos_out + outarg.size); file_update_time(file_out); } diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 4f2f658fe5f1..7f5847a48b4b 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -1217,7 +1217,7 @@ long fuse_ioctl_common(struct file *file, unsigned int cmd, __poll_t fuse_file_poll(struct file *file, poll_table *wait); int fuse_dev_release(struct inode *inode, struct file *file); -bool fuse_write_update_size(struct inode *inode, loff_t pos); +bool fuse_write_update_attr(struct inode *inode, loff_t pos); int fuse_flush_times(struct inode *inode, struct fuse_file *ff); int fuse_write_inode(struct inode *inode, struct writeback_control *wbc); From d347739a0e760e9f370aa021da3feacc37d3e511 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 22 Oct 2021 17:03:02 +0200 Subject: [PATCH 13/23] fuse: always invalidate attributes after writes Extend the fuse_write_update_attr() helper to invalidate cached attributes after a write. This has already been done in all cases except in fuse_notify_store(), so this is mostly a cleanup. fuse_direct_write_iter() calls fuse_direct_IO() which already calls fuse_write_update_attr(), so don't repeat that again in the former. Signed-off-by: Miklos Szeredi --- fs/fuse/dax.c | 5 +---- fs/fuse/dev.c | 2 +- fs/fuse/file.c | 26 ++++++++++++-------------- fs/fuse/fuse_i.h | 2 +- 4 files changed, 15 insertions(+), 20 deletions(-) diff --git a/fs/fuse/dax.c b/fs/fuse/dax.c index a73bae6497b0..713818d74de6 100644 --- a/fs/fuse/dax.c +++ b/fs/fuse/dax.c @@ -732,11 +732,8 @@ static ssize_t fuse_dax_direct_write(struct kiocb *iocb, struct iov_iter *from) ssize_t ret; ret = fuse_direct_io(&io, from, &iocb->ki_pos, FUSE_DIO_WRITE); - if (ret < 0) - return ret; - fuse_invalidate_attr_mask(inode, FUSE_STATX_MODSIZE); - fuse_write_update_attr(inode, iocb->ki_pos); + fuse_write_update_attr(inode, iocb->ki_pos, ret); return ret; } diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 75ae30d568ea..337fa6f5a7c6 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -1591,7 +1591,7 @@ static int fuse_notify_store(struct fuse_conn *fc, unsigned int size, end = outarg.offset + outarg.size; if (end > file_size) { file_size = end; - fuse_write_update_attr(inode, file_size); + fuse_write_update_attr(inode, file_size, outarg.size); } num = outarg.size; diff --git a/fs/fuse/file.c b/fs/fuse/file.c index b5f37b8df0e0..c3fd88da2a0b 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1066,7 +1066,7 @@ static ssize_t fuse_send_write(struct fuse_io_args *ia, loff_t pos, return err ?: ia->write.out.size; } -bool fuse_write_update_attr(struct inode *inode, loff_t pos) +bool fuse_write_update_attr(struct inode *inode, loff_t pos, ssize_t written) { struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_inode *fi = get_fuse_inode(inode); @@ -1074,12 +1074,14 @@ bool fuse_write_update_attr(struct inode *inode, loff_t pos) spin_lock(&fi->lock); fi->attr_version = atomic64_inc_return(&fc->attr_version); - if (pos > inode->i_size) { + if (written > 0 && pos > inode->i_size) { i_size_write(inode, pos); ret = true; } spin_unlock(&fi->lock); + fuse_invalidate_attr_mask(inode, FUSE_STATX_MODSIZE); + return ret; } @@ -1262,11 +1264,8 @@ static ssize_t fuse_perform_write(struct kiocb *iocb, kfree(ap->pages); } while (!err && iov_iter_count(ii)); - if (res > 0) - fuse_write_update_attr(inode, pos); - + fuse_write_update_attr(inode, pos, res); clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state); - fuse_invalidate_attr_mask(inode, FUSE_STATX_MODSIZE); return res > 0 ? res : err; } @@ -1554,11 +1553,9 @@ static ssize_t fuse_direct_write_iter(struct kiocb *iocb, struct iov_iter *from) } else { res = fuse_direct_io(&io, from, &iocb->ki_pos, FUSE_DIO_WRITE); + fuse_write_update_attr(inode, iocb->ki_pos, res); } } - fuse_invalidate_attr_mask(inode, FUSE_STATX_MODSIZE); - if (res > 0) - fuse_write_update_attr(inode, iocb->ki_pos); inode_unlock(inode); return res; @@ -2900,9 +2897,8 @@ fuse_direct_IO(struct kiocb *iocb, struct iov_iter *iter) kref_put(&io->refcnt, fuse_io_release); if (iov_iter_rw(iter) == WRITE) { - if (ret > 0) - fuse_write_update_attr(inode, pos); - else if (ret < 0 && offset + count > i_size) + fuse_write_update_attr(inode, pos, ret); + if (ret < 0 && offset + count > i_size) fuse_do_truncate(file); } @@ -2990,7 +2986,8 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset, /* we could have extended the file */ if (!(mode & FALLOC_FL_KEEP_SIZE)) { - bool changed = fuse_write_update_attr(inode, offset + length); + bool changed = fuse_write_update_attr(inode, offset + length, + length); if (changed && fm->fc->writeback_cache) file_update_time(file); @@ -3108,7 +3105,8 @@ static ssize_t __fuse_copy_file_range(struct file *file_in, loff_t pos_in, ALIGN(pos_out + outarg.size, PAGE_SIZE) - 1); if (fc->writeback_cache) { - fuse_write_update_attr(inode_out, pos_out + outarg.size); + fuse_write_update_attr(inode_out, pos_out + outarg.size, + outarg.size); file_update_time(file_out); } diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 7f5847a48b4b..66c410a733c1 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -1217,7 +1217,7 @@ long fuse_ioctl_common(struct file *file, unsigned int cmd, __poll_t fuse_file_poll(struct file *file, poll_table *wait); int fuse_dev_release(struct inode *inode, struct file *file); -bool fuse_write_update_attr(struct inode *inode, loff_t pos); +bool fuse_write_update_attr(struct inode *inode, loff_t pos, ssize_t written); int fuse_flush_times(struct inode *inode, struct fuse_file *ff); int fuse_write_inode(struct inode *inode, struct writeback_control *wbc); From 484ce65715b06aead8c4901f01ca32c5a240bc71 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 22 Oct 2021 17:03:03 +0200 Subject: [PATCH 14/23] fuse: fix attr version comparison in fuse_read_update_size() A READ request returning a short count is taken as indication of EOF, and the cached file size is modified accordingly. Fix the attribute version checking to allow for changes to fc->attr_version on other inodes. Signed-off-by: Miklos Szeredi --- fs/fuse/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index c3fd88da2a0b..ddd563fda648 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -787,7 +787,7 @@ static void fuse_read_update_size(struct inode *inode, loff_t size, struct fuse_inode *fi = get_fuse_inode(inode); spin_lock(&fi->lock); - if (attr_ver == fi->attr_version && size < inode->i_size && + if (attr_ver >= fi->attr_version && size < inode->i_size && !test_bit(FUSE_I_SIZE_UNSTABLE, &fi->state)) { fi->attr_version = atomic64_inc_return(&fc->attr_version); i_size_write(inode, size); From 20235b435a5c8897e46d094454408b6ab7157dbd Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 22 Oct 2021 17:03:03 +0200 Subject: [PATCH 15/23] fuse: cleanup code conditional on fc->writeback_cache It's safe to call file_update_time() if writeback cache is not enabled, since S_NOCMTIME is set in this case. This part is purely a cleanup. __fuse_copy_file_range() also calls fuse_write_update_attr() only in the writeback cache case. This is inconsistent with other callers, where it's called unconditionally. Signed-off-by: Miklos Szeredi --- fs/fuse/file.c | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index ddd563fda648..bc450daf27a2 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -211,9 +211,8 @@ void fuse_finish_open(struct inode *inode, struct file *file) i_size_write(inode, 0); spin_unlock(&fi->lock); truncate_pagecache(inode, 0); + file_update_time(file); fuse_invalidate_attr_mask(inode, FUSE_STATX_MODSIZE); - if (fc->writeback_cache) - file_update_time(file); } else if (!(ff->open_flags & FOPEN_KEEP_CACHE)) { invalidate_inode_pages2(inode->i_mapping); } @@ -2986,10 +2985,7 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset, /* we could have extended the file */ if (!(mode & FALLOC_FL_KEEP_SIZE)) { - bool changed = fuse_write_update_attr(inode, offset + length, - length); - - if (changed && fm->fc->writeback_cache) + if (fuse_write_update_attr(inode, offset + length, length)) file_update_time(file); } @@ -3104,13 +3100,8 @@ static ssize_t __fuse_copy_file_range(struct file *file_in, loff_t pos_in, ALIGN_DOWN(pos_out, PAGE_SIZE), ALIGN(pos_out + outarg.size, PAGE_SIZE) - 1); - if (fc->writeback_cache) { - fuse_write_update_attr(inode_out, pos_out + outarg.size, - outarg.size); - file_update_time(file_out); - } - - fuse_invalidate_attr_mask(inode_out, FUSE_STATX_MODSIZE); + file_update_time(file_out); + fuse_write_update_attr(inode_out, pos_out + outarg.size, outarg.size); err = outarg.size; out: From c15016b7ae1caf77f80ae87a71745368ef651ba6 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 22 Oct 2021 17:03:03 +0200 Subject: [PATCH 16/23] fuse: simplify local variables holding writeback cache state There are two instances of "bool is_wb = fc->writeback_cache" where the actual use mostly involves checking "is_wb && S_ISREG(inode->i_mode)". Clean up these cases by storing the second condition in the local variable. Signed-off-by: Miklos Szeredi --- fs/fuse/dir.c | 8 ++++---- fs/fuse/inode.c | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 8315ca75d657..607ff7d2fd4d 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -1561,10 +1561,10 @@ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr, struct fuse_setattr_in inarg; struct fuse_attr_out outarg; bool is_truncate = false; - bool is_wb = fc->writeback_cache; + bool is_wb = fc->writeback_cache && S_ISREG(inode->i_mode); loff_t oldsize; int err; - bool trust_local_cmtime = is_wb && S_ISREG(inode->i_mode); + bool trust_local_cmtime = is_wb; bool fault_blocked = false; if (!fc->default_permissions) @@ -1608,7 +1608,7 @@ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr, } /* Flush dirty data/metadata before non-truncate SETATTR */ - if (is_wb && S_ISREG(inode->i_mode) && + if (is_wb && attr->ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID | ATTR_MTIME_SET | ATTR_TIMES_SET)) { @@ -1679,7 +1679,7 @@ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr, attr_timeout(&outarg)); oldsize = inode->i_size; /* see the comment in fuse_change_attributes() */ - if (!is_wb || is_truncate || !S_ISREG(inode->i_mode)) + if (!is_wb || is_truncate) i_size_write(inode, outarg.attr.size); if (is_truncate) { diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 2f999d38c9b4..db33f2050f74 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -223,7 +223,7 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, { struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_inode *fi = get_fuse_inode(inode); - bool is_wb = fc->writeback_cache; + bool is_wb = fc->writeback_cache && S_ISREG(inode->i_mode); loff_t oldsize; struct timespec64 old_mtime; @@ -243,7 +243,7 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, * extend local i_size without keeping userspace server in sync. So, * attr->size coming from server can be stale. We cannot trust it. */ - if (!is_wb || !S_ISREG(inode->i_mode)) + if (!is_wb) i_size_write(inode, attr->size); spin_unlock(&fi->lock); From 04d82db0c557e074a5d898b43de81fe659b9cc5a Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 22 Oct 2021 17:03:03 +0200 Subject: [PATCH 17/23] fuse: move reverting attributes to fuse_change_attributes() In case of writeback_cache fuse_fillattr() would revert the queried attributes to the cached version. Move this to fuse_change_attributes() in order to manage the writeback logic in a central helper. This will be necessary for patches that follow. Only fuse_do_getattr() -> fuse_fillattr() uses the attributes after calling fuse_change_attributes(), so this should not change behavior. Signed-off-by: Miklos Szeredi --- fs/fuse/dir.c | 9 --------- fs/fuse/inode.c | 13 +++++++++++++ 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 607ff7d2fd4d..d4172390f2b1 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -944,15 +944,6 @@ static void fuse_fillattr(struct inode *inode, struct fuse_attr *attr, unsigned int blkbits; struct fuse_conn *fc = get_fuse_conn(inode); - /* see the comment in fuse_change_attributes() */ - if (fc->writeback_cache && S_ISREG(inode->i_mode)) { - attr->size = i_size_read(inode); - attr->mtime = inode->i_mtime.tv_sec; - attr->mtimensec = inode->i_mtime.tv_nsec; - attr->ctime = inode->i_ctime.tv_sec; - attr->ctimensec = inode->i_ctime.tv_nsec; - } - stat->dev = inode->i_sb->s_dev; stat->ino = attr->ino; stat->mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777); diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index db33f2050f74..1ecc46dff93e 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -228,6 +228,19 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, struct timespec64 old_mtime; spin_lock(&fi->lock); + /* + * In case of writeback_cache enabled, writes update mtime, ctime and + * may update i_size. In these cases trust the cached value in the + * inode. + */ + if (is_wb) { + attr->size = i_size_read(inode); + attr->mtime = inode->i_mtime.tv_sec; + attr->mtimensec = inode->i_mtime.tv_nsec; + attr->ctime = inode->i_ctime.tv_sec; + attr->ctimensec = inode->i_ctime.tv_nsec; + } + if ((attr_version != 0 && fi->attr_version > attr_version) || test_bit(FUSE_I_SIZE_UNSTABLE, &fi->state)) { spin_unlock(&fi->lock); From 4b52f059b5ddbb364d35f2bcc3d267a009078db7 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 22 Oct 2021 17:03:03 +0200 Subject: [PATCH 18/23] fuse: add cache_mask If writeback_cache is enabled, then the size, mtime and ctime attributes of regular files are always valid in the kernel's cache. They are retrieved from userspace only when the inode is freshly looked up. Add a more generic "cache_mask", that indicates which attributes are currently valid in cache. This patch doesn't change behavior. Signed-off-by: Miklos Szeredi --- fs/fuse/dir.c | 3 ++- fs/fuse/fuse_i.h | 4 +++- fs/fuse/inode.c | 31 ++++++++++++++++++++++++------- 3 files changed, 29 insertions(+), 9 deletions(-) diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index d4172390f2b1..4c934627b419 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -1667,7 +1667,8 @@ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr, } fuse_change_attributes_common(inode, &outarg.attr, - attr_timeout(&outarg)); + attr_timeout(&outarg), + fuse_get_cache_mask(inode)); oldsize = inode->i_size; /* see the comment in fuse_change_attributes() */ if (!is_wb || is_truncate) diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 66c410a733c1..531f6d98efc6 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -1031,7 +1031,9 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, u64 attr_valid, u64 attr_version); void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr, - u64 attr_valid); + u64 attr_valid, u32 cache_mask); + +u32 fuse_get_cache_mask(struct inode *inode); /** * Initialize the client device diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 1ecc46dff93e..8b89e3ba7df3 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -164,7 +164,7 @@ static ino_t fuse_squash_ino(u64 ino64) } void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr, - u64 attr_valid) + u64 attr_valid, u32 cache_mask) { struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_inode *fi = get_fuse_inode(inode); @@ -184,9 +184,11 @@ void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr, inode->i_atime.tv_sec = attr->atime; inode->i_atime.tv_nsec = attr->atimensec; /* mtime from server may be stale due to local buffered write */ - if (!fc->writeback_cache || !S_ISREG(inode->i_mode)) { + if (!(cache_mask & STATX_MTIME)) { inode->i_mtime.tv_sec = attr->mtime; inode->i_mtime.tv_nsec = attr->mtimensec; + } + if (!(cache_mask & STATX_CTIME)) { inode->i_ctime.tv_sec = attr->ctime; inode->i_ctime.tv_nsec = attr->ctimensec; } @@ -218,12 +220,22 @@ void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr, inode->i_flags &= ~S_NOSEC; } +u32 fuse_get_cache_mask(struct inode *inode) +{ + struct fuse_conn *fc = get_fuse_conn(inode); + + if (!fc->writeback_cache || !S_ISREG(inode->i_mode)) + return 0; + + return STATX_MTIME | STATX_CTIME | STATX_SIZE; +} + void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, u64 attr_valid, u64 attr_version) { struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_inode *fi = get_fuse_inode(inode); - bool is_wb = fc->writeback_cache && S_ISREG(inode->i_mode); + u32 cache_mask; loff_t oldsize; struct timespec64 old_mtime; @@ -233,10 +245,15 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, * may update i_size. In these cases trust the cached value in the * inode. */ - if (is_wb) { + cache_mask = fuse_get_cache_mask(inode); + if (cache_mask & STATX_SIZE) attr->size = i_size_read(inode); + + if (cache_mask & STATX_MTIME) { attr->mtime = inode->i_mtime.tv_sec; attr->mtimensec = inode->i_mtime.tv_nsec; + } + if (cache_mask & STATX_CTIME) { attr->ctime = inode->i_ctime.tv_sec; attr->ctimensec = inode->i_ctime.tv_nsec; } @@ -248,7 +265,7 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, } old_mtime = inode->i_mtime; - fuse_change_attributes_common(inode, attr, attr_valid); + fuse_change_attributes_common(inode, attr, attr_valid, cache_mask); oldsize = inode->i_size; /* @@ -256,11 +273,11 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, * extend local i_size without keeping userspace server in sync. So, * attr->size coming from server can be stale. We cannot trust it. */ - if (!is_wb) + if (!(cache_mask & STATX_SIZE)) i_size_write(inode, attr->size); spin_unlock(&fi->lock); - if (!is_wb && S_ISREG(inode->i_mode)) { + if (!cache_mask && S_ISREG(inode->i_mode)) { bool inval = false; if (oldsize != attr->size) { From ec85537519b330a0deb8fe742fd1b0efc40a1710 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 22 Oct 2021 17:03:03 +0200 Subject: [PATCH 19/23] fuse: take cache_mask into account in getattr When deciding to send a GETATTR request take into account the cache mask (which attributes are always valid). The cache mask takes precedence over the invalid mask. This results in the GETATTR request not being sent unnecessarily. Signed-off-by: Miklos Szeredi --- fs/fuse/dir.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 4c934627b419..03767bbafb8a 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -1021,12 +1021,14 @@ static int fuse_update_get_attr(struct inode *inode, struct file *file, struct fuse_inode *fi = get_fuse_inode(inode); int err = 0; bool sync; + u32 inval_mask = READ_ONCE(fi->inval_mask); + u32 cache_mask = fuse_get_cache_mask(inode); if (flags & AT_STATX_FORCE_SYNC) sync = true; else if (flags & AT_STATX_DONT_SYNC) sync = false; - else if (request_mask & READ_ONCE(fi->inval_mask)) + else if (request_mask & inval_mask & ~cache_mask) sync = true; else sync = time_before64(fi->i_time, get_jiffies_64()); From c6c745b81033a4c1f0e5f3b16398a10f2d000c29 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 22 Oct 2021 17:03:03 +0200 Subject: [PATCH 20/23] fuse: only update necessary attributes fuse_update_attributes() refreshes metadata for internal use. Each use needs a particular set of attributes to be refreshed, but currently that cannot be expressed and all but atime are refreshed. Add a mask argument, which lets fuse_update_get_attr() to decide based on the cache_mask and the inval_mask whether a GETATTR call is needed or not. Reported-by: Yongji Xie Signed-off-by: Miklos Szeredi --- fs/fuse/dir.c | 6 ++---- fs/fuse/file.c | 9 +++++---- fs/fuse/fuse_i.h | 2 +- fs/fuse/readdir.c | 2 +- 4 files changed, 9 insertions(+), 10 deletions(-) diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 03767bbafb8a..0654bfedcbb0 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -1045,11 +1045,9 @@ static int fuse_update_get_attr(struct inode *inode, struct file *file, return err; } -int fuse_update_attributes(struct inode *inode, struct file *file) +int fuse_update_attributes(struct inode *inode, struct file *file, u32 mask) { - /* Do *not* need to get atime for internal purposes */ - return fuse_update_get_attr(inode, file, NULL, - STATX_BASIC_STATS & ~STATX_ATIME, 0); + return fuse_update_get_attr(inode, file, NULL, mask, 0); } int fuse_reverse_inval_entry(struct fuse_conn *fc, u64 parent_nodeid, diff --git a/fs/fuse/file.c b/fs/fuse/file.c index bc450daf27a2..26730e699d68 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -996,7 +996,7 @@ static ssize_t fuse_cache_read_iter(struct kiocb *iocb, struct iov_iter *to) if (fc->auto_inval_data || (iocb->ki_pos + iov_iter_count(to) > i_size_read(inode))) { int err; - err = fuse_update_attributes(inode, iocb->ki_filp); + err = fuse_update_attributes(inode, iocb->ki_filp, STATX_SIZE); if (err) return err; } @@ -1282,7 +1282,8 @@ static ssize_t fuse_cache_write_iter(struct kiocb *iocb, struct iov_iter *from) if (fc->writeback_cache) { /* Update size (EOF optimization) and mode (SUID clearing) */ - err = fuse_update_attributes(mapping->host, file); + err = fuse_update_attributes(mapping->host, file, + STATX_SIZE | STATX_MODE); if (err) return err; @@ -2633,7 +2634,7 @@ static loff_t fuse_lseek(struct file *file, loff_t offset, int whence) return vfs_setpos(file, outarg.offset, inode->i_sb->s_maxbytes); fallback: - err = fuse_update_attributes(inode, file); + err = fuse_update_attributes(inode, file, STATX_SIZE); if (!err) return generic_file_llseek(file, offset, whence); else @@ -2653,7 +2654,7 @@ static loff_t fuse_file_llseek(struct file *file, loff_t offset, int whence) break; case SEEK_END: inode_lock(inode); - retval = fuse_update_attributes(inode, file); + retval = fuse_update_attributes(inode, file, STATX_SIZE); if (!retval) retval = generic_file_llseek(file, offset, whence); inode_unlock(inode); diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 531f6d98efc6..198637b41e19 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -1161,7 +1161,7 @@ u64 fuse_lock_owner_id(struct fuse_conn *fc, fl_owner_t id); void fuse_flush_time_update(struct inode *inode); void fuse_update_ctime(struct inode *inode); -int fuse_update_attributes(struct inode *inode, struct file *file); +int fuse_update_attributes(struct inode *inode, struct file *file, u32 mask); void fuse_flush_writepages(struct inode *inode); diff --git a/fs/fuse/readdir.c b/fs/fuse/readdir.c index e38ac983435a..b4e565711045 100644 --- a/fs/fuse/readdir.c +++ b/fs/fuse/readdir.c @@ -454,7 +454,7 @@ static int fuse_readdir_cached(struct file *file, struct dir_context *ctx) * cache; both cases require an up-to-date mtime value. */ if (!ctx->pos && fc->auto_inval_data) { - int err = fuse_update_attributes(inode, file); + int err = fuse_update_attributes(inode, file, STATX_MTIME); if (err) return err; From a390ccb316beb8ea594b8695d53926710ca454a3 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Sun, 24 Oct 2021 16:26:07 +0300 Subject: [PATCH 21/23] fuse: add FOPEN_NOFLUSH Add flag returned by FUSE_OPEN and FUSE_CREATE requests to avoid flushing data cache on close. Different filesystems implement ->flush() is different ways: - Most disk filesystems do not implement ->flush() at all - Some network filesystem (e.g. nfs) flush local write cache of FMODE_WRITE file and send a "flush" command to server - Some network filesystem (e.g. cifs) flush local write cache of FMODE_WRITE file without sending an additional command to server FUSE flushes local write cache of ANY file, even non FMODE_WRITE and sends a "flush" command to server (if server implements it). The FUSE implementation of ->flush() seems over agressive and arbitrary and does not make a lot of sense when writeback caching is disabled. Instead of deciding on another arbitrary implementation that makes sense, leave the choice of per-file flush behavior in the hands of the server. Link: https://lore.kernel.org/linux-fsdevel/CAJfpegspE8e6aKd47uZtSYX8Y-1e1FWS0VL0DH2Skb9gQP5RJQ@mail.gmail.com/ Suggested-by: Miklos Szeredi Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- fs/fuse/file.c | 3 +++ include/uapi/linux/fuse.h | 7 ++++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 26730e699d68..b7f1a164e18a 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -476,6 +476,9 @@ static int fuse_flush(struct file *file, fl_owner_t id) if (fuse_is_bad(inode)) return -EIO; + if (ff->open_flags & FOPEN_NOFLUSH && !fm->fc->writeback_cache) + return 0; + err = write_inode_now(inode, 1); if (err) return err; diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index 36ed092227fa..a1dc3ee1d17c 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -184,6 +184,9 @@ * * 7.34 * - add FUSE_SYNCFS + * + * 7.35 + * - add FOPEN_NOFLUSH */ #ifndef _LINUX_FUSE_H @@ -219,7 +222,7 @@ #define FUSE_KERNEL_VERSION 7 /** Minor version number of this interface */ -#define FUSE_KERNEL_MINOR_VERSION 34 +#define FUSE_KERNEL_MINOR_VERSION 35 /** The node ID of the root inode */ #define FUSE_ROOT_ID 1 @@ -290,12 +293,14 @@ struct fuse_file_lock { * FOPEN_NONSEEKABLE: the file is not seekable * FOPEN_CACHE_DIR: allow caching this directory * FOPEN_STREAM: the file is stream-like (no file position at all) + * FOPEN_NOFLUSH: don't flush data cache on close (unless FUSE_WRITEBACK_CACHE) */ #define FOPEN_DIRECT_IO (1 << 0) #define FOPEN_KEEP_CACHE (1 << 1) #define FOPEN_NONSEEKABLE (1 << 2) #define FOPEN_CACHE_DIR (1 << 3) #define FOPEN_STREAM (1 << 4) +#define FOPEN_NOFLUSH (1 << 5) /** * INIT request/reply flags From 7c594bbd2de9f03e7c8d808004045696bc9c1a67 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 2 Nov 2021 11:08:19 +0100 Subject: [PATCH 22/23] virtiofs: use strscpy for copying the queue name Always null terminate fsvq->name. Reported-by: kernel test robot Fixes: b43b7e81eb2b ("virtiofs: provide a helper function for virtqueue initialization") Signed-off-by: Miklos Szeredi --- fs/fuse/virtio_fs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c index 94fc874f5de7..4cfa4bc1f579 100644 --- a/fs/fuse/virtio_fs.c +++ b/fs/fuse/virtio_fs.c @@ -649,7 +649,7 @@ static void virtio_fs_vq_done(struct virtqueue *vq) static void virtio_fs_init_vq(struct virtio_fs_vq *fsvq, char *name, int vq_type) { - strncpy(fsvq->name, name, VQ_NAME_LEN); + strscpy(fsvq->name, name, VQ_NAME_LEN); spin_lock_init(&fsvq->lock); INIT_LIST_HEAD(&fsvq->queued_reqs); INIT_LIST_HEAD(&fsvq->end_reqs); From 712a951025c0667ff00b25afc360f74e639dfabe Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 2 Nov 2021 11:10:37 +0100 Subject: [PATCH 23/23] fuse: fix page stealing It is possible to trigger a crash by splicing anon pipe bufs to the fuse device. The reason for this is that anon_pipe_buf_release() will reuse buf->page if the refcount is 1, but that page might have already been stolen and its flags modified (e.g. PG_lru added). This happens in the unlikely case of fuse_dev_splice_write() getting around to calling pipe_buf_release() after a page has been stolen, added to the page cache and removed from the page cache. Fix by calling pipe_buf_release() right after the page was inserted into the page cache. In this case the page has an elevated refcount so any release function will know that the page isn't reusable. Reported-by: Frank Dinoff Link: https://lore.kernel.org/r/CAAmZXrsGg2xsP1CK+cbuEMumtrqdvD-NKnWzhNcvn71RV3c1yw@mail.gmail.com/ Fixes: dd3bb14f44a6 ("fuse: support splice() writing to fuse device") Cc: # v2.6.35 Signed-off-by: Miklos Szeredi --- fs/fuse/dev.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 337fa6f5a7c6..79f7eda49e06 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -847,6 +847,12 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep) replace_page_cache_page(oldpage, newpage); + /* + * Release while we have extra ref on stolen page. Otherwise + * anon_pipe_buf_release() might think the page can be reused. + */ + pipe_buf_release(cs->pipe, buf); + get_page(newpage); if (!(buf->flags & PIPE_BUF_FLAG_LRU)) @@ -2031,8 +2037,12 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe, pipe_lock(pipe); out_free: - for (idx = 0; idx < nbuf; idx++) - pipe_buf_release(pipe, &bufs[idx]); + for (idx = 0; idx < nbuf; idx++) { + struct pipe_buffer *buf = &bufs[idx]; + + if (buf->ops) + pipe_buf_release(pipe, buf); + } pipe_unlock(pipe); kvfree(bufs);