From 2067231a9e2cbbcae0a4aca6ac36ff2dd6a7b701 Mon Sep 17 00:00:00 2001 From: Sun Ke Date: Fri, 12 Aug 2022 09:14:40 +0800 Subject: [PATCH 1/7] NFS: Fix missing unlock in nfs_unlink() Add the missing unlock before goto. Fixes: 3c59366c207e ("NFS: don't unhash dentry during unlink/rename") Signed-off-by: Sun Ke Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index dbab3caa15ed..1b879584d4fe 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -2484,8 +2484,10 @@ int nfs_unlink(struct inode *dir, struct dentry *dentry) */ error = -ETXTBSY; if (WARN_ON(dentry->d_flags & DCACHE_NFSFS_RENAMED) || - WARN_ON(dentry->d_fsdata == NFS_FSDATA_BLOCKED)) + WARN_ON(dentry->d_fsdata == NFS_FSDATA_BLOCKED)) { + spin_unlock(&dentry->d_lock); goto out; + } if (dentry->d_fsdata) /* old devname */ kfree(dentry->d_fsdata); From 67f4b5dc49913abcdb5cc736e73674e2f352f81d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 13 Aug 2022 08:22:25 -0400 Subject: [PATCH 2/7] NFS: Fix another fsync() issue after a server reboot Currently, when the writeback code detects a server reboot, it redirties any pages that were not committed to disk, and it sets the flag NFS_CONTEXT_RESEND_WRITES in the nfs_open_context of the file descriptor that dirtied the file. While this allows the file descriptor in question to redrive its own writes, it violates the fsync() requirement that we should be synchronising all writes to disk. While the problem is infrequent, we do see corner cases where an untimely server reboot causes the fsync() call to abandon its attempt to sync data to disk and causing data corruption issues due to missed error conditions or similar. In order to tighted up the client's ability to deal with this situation without introducing livelocks, add a counter that records the number of times pages are redirtied due to a server reboot-like condition, and use that in fsync() to redrive the sync to disk. Fixes: 2197e9b06c22 ("NFS: Fix up fsync() when the server rebooted") Cc: stable@vger.kernel.org Signed-off-by: Trond Myklebust --- fs/nfs/file.c | 15 ++++++--------- fs/nfs/inode.c | 1 + fs/nfs/write.c | 6 ++++-- include/linux/nfs_fs.h | 1 + 4 files changed, 12 insertions(+), 11 deletions(-) diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 54237a231687..749e5487df50 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -221,8 +221,10 @@ nfs_file_fsync_commit(struct file *file, int datasync) int nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) { - struct nfs_open_context *ctx = nfs_file_open_context(file); struct inode *inode = file_inode(file); + struct nfs_inode *nfsi = NFS_I(inode); + long save_nredirtied = atomic_long_read(&nfsi->redirtied_pages); + long nredirtied; int ret; trace_nfs_fsync_enter(inode); @@ -237,15 +239,10 @@ nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) ret = pnfs_sync_inode(inode, !!datasync); if (ret != 0) break; - if (!test_and_clear_bit(NFS_CONTEXT_RESEND_WRITES, &ctx->flags)) + nredirtied = atomic_long_read(&nfsi->redirtied_pages); + if (nredirtied == save_nredirtied) break; - /* - * If nfs_file_fsync_commit detected a server reboot, then - * resend all dirty pages that might have been covered by - * the NFS_CONTEXT_RESEND_WRITES flag - */ - start = 0; - end = LLONG_MAX; + save_nredirtied = nredirtied; } trace_nfs_fsync_exit(inode, ret); diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index b4e46b0ffa2d..bea7c005119c 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -426,6 +426,7 @@ nfs_ilookup(struct super_block *sb, struct nfs_fattr *fattr, struct nfs_fh *fh) static void nfs_inode_init_regular(struct nfs_inode *nfsi) { atomic_long_set(&nfsi->nrequests, 0); + atomic_long_set(&nfsi->redirtied_pages, 0); INIT_LIST_HEAD(&nfsi->commit_info.list); atomic_long_set(&nfsi->commit_info.ncommit, 0); atomic_set(&nfsi->commit_info.rpcs_out, 0); diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 4a3796811b4b..989c734cf91d 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1420,10 +1420,12 @@ static void nfs_initiate_write(struct nfs_pgio_header *hdr, */ static void nfs_redirty_request(struct nfs_page *req) { + struct nfs_inode *nfsi = NFS_I(page_file_mapping(req->wb_page)->host); + /* Bump the transmission count */ req->wb_nio++; nfs_mark_request_dirty(req); - set_bit(NFS_CONTEXT_RESEND_WRITES, &nfs_req_openctx(req)->flags); + atomic_long_inc(&nfsi->redirtied_pages); nfs_end_page_writeback(req); nfs_release_request(req); } @@ -1904,7 +1906,7 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data) /* We have a mismatch. Write the page again */ dprintk_cont(" mismatch\n"); nfs_mark_request_dirty(req); - set_bit(NFS_CONTEXT_RESEND_WRITES, &nfs_req_openctx(req)->flags); + atomic_long_inc(&NFS_I(data->inode)->redirtied_pages); next: nfs_unlock_and_release_request(req); /* Latency breaker */ diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index b32ed68e7dc4..f08e581f0161 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -182,6 +182,7 @@ struct nfs_inode { /* Regular file */ struct { atomic_long_t nrequests; + atomic_long_t redirtied_pages; struct nfs_mds_commit_info commit_info; struct mutex commit_mutex; }; From edf79efcc9d0cf38fcc1efe688fd697b9bb0ddc4 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 13 Aug 2022 08:46:35 -0400 Subject: [PATCH 3/7] NFS: Remove a bogus flag setting in pnfs_write_done_resend_to_mds Since pnfs_write_done_resend_to_mds() does not actually call end_page_writeback() on the pages that are being redirected to the metadata server, callers of fsync() do not see the I/O as complete until the writeback to the MDS finishes. We therefore do not need to set NFS_CONTEXT_RESEND_WRITES, since there is nothing to redrive. Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 41a9b6b58fb9..2613b7e36eb9 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -2817,7 +2817,6 @@ int pnfs_write_done_resend_to_mds(struct nfs_pgio_header *hdr) /* Resend all requests through the MDS */ nfs_pageio_init_write(&pgio, hdr->inode, FLUSH_STABLE, true, hdr->completion_ops); - set_bit(NFS_CONTEXT_RESEND_WRITES, &hdr->args.context->flags); return nfs_pageio_resend(&pgio, hdr); } EXPORT_SYMBOL_GPL(pnfs_write_done_resend_to_mds); From 5f6277a0c15e1ea54b6fd3d78c9fff7bfe42556c Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 13 Aug 2022 08:51:45 -0400 Subject: [PATCH 4/7] NFS: Cleanup to remove unused flag NFS_CONTEXT_RESEND_WRITES Signed-off-by: Trond Myklebust --- include/linux/nfs_fs.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index f08e581f0161..7931fa472561 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -83,7 +83,6 @@ struct nfs_open_context { fmode_t mode; unsigned long flags; -#define NFS_CONTEXT_RESEND_WRITES (1) #define NFS_CONTEXT_BAD (2) #define NFS_CONTEXT_UNLOCK (3) #define NFS_CONTEXT_FILE_OPEN (4) From f16857e62bac60786104c020ad7c86e2163b2c5b Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 19 Aug 2022 09:55:59 +1000 Subject: [PATCH 5/7] NFS: unlink/rmdir shouldn't call d_delete() twice on ENOENT nfs_unlink() calls d_delete() twice if it receives ENOENT from the server - once in nfs_dentry_handle_enoent() from nfs_safe_remove and once in nfs_dentry_remove_handle_error(). nfs_rmddir() also calls it twice - the nfs_dentry_handle_enoent() call is direct and inside a region locked with ->rmdir_sem It is safe to call d_delete() twice if the refcount > 1 as the dentry is simply unhashed. If the refcount is 1, the first call sets d_inode to NULL and the second call crashes. This patch guards the d_delete() call from nfs_dentry_handle_enoent() leaving the one under ->remdir_sem in case that is important. In mainline it would be safe to remove the d_delete() call. However in older kernels to which this might be backported, that would change the behaviour of nfs_unlink(). nfs_unlink() used to unhash the dentry which resulted in nfs_dentry_handle_enoent() not calling d_delete(). So in older kernels we need the d_delete() in nfs_dentry_remove_handle_error() when called from nfs_unlink() but not when called from nfs_rmdir(). To make the code work correctly for old and new kernels, and from both nfs_unlink() and nfs_rmdir(), we protect the d_delete() call with simple_positive(). This ensures it is never called in a circumstance where it could crash. Fixes: 3c59366c207e ("NFS: don't unhash dentry during unlink/rename") Fixes: 9019fb391de0 ("NFS: Label the dentry with a verifier in nfs_rmdir() and nfs_unlink()") Signed-off-by: NeilBrown Tested-by: Olga Kornievskaia Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 1b879584d4fe..5d6c2ddc7ea6 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -2382,7 +2382,8 @@ static void nfs_dentry_remove_handle_error(struct inode *dir, { switch (error) { case -ENOENT: - d_delete(dentry); + if (d_really_is_positive(dentry)) + d_delete(dentry); nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); break; case 0: From fcfc8be1e9cf2f12b50dce8b579b3ae54443a014 Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Thu, 18 Aug 2022 15:07:05 -0400 Subject: [PATCH 6/7] NFSv4.2 fix problems with __nfs42_ssc_open A destination server while doing a COPY shouldn't accept using the passed in filehandle if its not a regular filehandle. If alloc_file_pseudo() has failed, we need to decrement a reference on the newly created inode, otherwise it leaks. Reported-by: Al Viro Fixes: ec4b092508982 ("NFS: inter ssc open") Signed-off-by: Olga Kornievskaia Signed-off-by: Trond Myklebust --- fs/nfs/nfs4file.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index e88f6b18445e..9eb181287879 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -340,6 +340,11 @@ static struct file *__nfs42_ssc_open(struct vfsmount *ss_mnt, goto out; } + if (!S_ISREG(fattr->mode)) { + res = ERR_PTR(-EBADF); + goto out; + } + res = ERR_PTR(-ENOMEM); len = strlen(SSC_READ_NAME_BODY) + 16; read_name = kzalloc(len, GFP_KERNEL); @@ -357,6 +362,7 @@ static struct file *__nfs42_ssc_open(struct vfsmount *ss_mnt, r_ino->i_fop); if (IS_ERR(filep)) { res = ERR_CAST(filep); + iput(r_ino); goto out_free_name; } From ed06fce0b034b2e25bd93430f5c4cbb28036cc1a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 3 Aug 2022 14:55:03 -0400 Subject: [PATCH 7/7] SUNRPC: RPC level errors should set task->tk_rpc_status Fix up a case in call_encode() where we're failing to set task->tk_rpc_status when an RPC level error occurred. Fixes: 9c5948c24869 ("SUNRPC: task should be exit if encode return EKEYEXPIRED more times") Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index b098e707ad41..7d268a291486 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1902,7 +1902,7 @@ call_encode(struct rpc_task *task) break; case -EKEYEXPIRED: if (!task->tk_cred_retry) { - rpc_exit(task, task->tk_status); + rpc_call_rpcerror(task, task->tk_status); } else { task->tk_action = call_refresh; task->tk_cred_retry--;