From 23ba98de6dcec665e15c0ca19244379bb0d30932 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 29 Jul 2022 17:01:07 -0400 Subject: [PATCH 01/78] nfsd: eliminate the NFSD_FILE_BREAK_* flags We had a report from the spring Bake-a-thon of data corruption in some nfstest_interop tests. Looking at the traces showed the NFS server allowing a v3 WRITE to proceed while a read delegation was still outstanding. Currently, we only set NFSD_FILE_BREAK_* flags if NFSD_MAY_NOT_BREAK_LEASE was set when we call nfsd_file_alloc. NFSD_MAY_NOT_BREAK_LEASE was intended to be set when finding files for COMMIT ops, where we need a writeable filehandle but don't need to break read leases. It doesn't make any sense to consult that flag when allocating a file since the file may be used on subsequent calls where we do want to break the lease (and the usage of it here seems to be reverse from what it should be anyway). Also, after calling nfsd_open_break_lease, we don't want to clear the BREAK_* bits. A lease could end up being set on it later (more than once) and we need to be able to break those leases as well. This means that the NFSD_FILE_BREAK_* flags now just mirror NFSD_MAY_{READ,WRITE} flags, so there's no need for them at all. Just drop those flags and unconditionally call nfsd_open_break_lease every time. Reported-by: Olga Kornieskaia Link: https://bugzilla.redhat.com/show_bug.cgi?id=2107360 Fixes: 65294c1f2c5e (nfsd: add a new struct file caching facility to nfsd) Cc: # 5.4.x : bb283ca18d1e NFSD: Clean up the show_nf_flags() macro Cc: # 5.4.x Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/filecache.c | 22 +--------------------- fs/nfsd/filecache.h | 4 +--- fs/nfsd/trace.h | 2 -- 3 files changed, 2 insertions(+), 26 deletions(-) diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index 9cb2d590c036..e1f98d32cee1 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -184,12 +184,6 @@ nfsd_file_alloc(struct inode *inode, unsigned int may, unsigned int hashval, nf->nf_hashval = hashval; refcount_set(&nf->nf_ref, 1); nf->nf_may = may & NFSD_FILE_MAY_MASK; - if (may & NFSD_MAY_NOT_BREAK_LEASE) { - if (may & NFSD_MAY_WRITE) - __set_bit(NFSD_FILE_BREAK_WRITE, &nf->nf_flags); - if (may & NFSD_MAY_READ) - __set_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags); - } nf->nf_mark = NULL; trace_nfsd_file_alloc(nf); } @@ -958,21 +952,7 @@ wait_for_construction: this_cpu_inc(nfsd_file_cache_hits); - if (!(may_flags & NFSD_MAY_NOT_BREAK_LEASE)) { - bool write = (may_flags & NFSD_MAY_WRITE); - - if (test_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags) || - (test_bit(NFSD_FILE_BREAK_WRITE, &nf->nf_flags) && write)) { - status = nfserrno(nfsd_open_break_lease( - file_inode(nf->nf_file), may_flags)); - if (status == nfs_ok) { - clear_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags); - if (write) - clear_bit(NFSD_FILE_BREAK_WRITE, - &nf->nf_flags); - } - } - } + status = nfserrno(nfsd_open_break_lease(file_inode(nf->nf_file), may_flags)); out: if (status == nfs_ok) { *pnf = nf; diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h index 1da0c79a5580..c9e3c6eb4776 100644 --- a/fs/nfsd/filecache.h +++ b/fs/nfsd/filecache.h @@ -37,9 +37,7 @@ struct nfsd_file { struct net *nf_net; #define NFSD_FILE_HASHED (0) #define NFSD_FILE_PENDING (1) -#define NFSD_FILE_BREAK_READ (2) -#define NFSD_FILE_BREAK_WRITE (3) -#define NFSD_FILE_REFERENCED (4) +#define NFSD_FILE_REFERENCED (2) unsigned long nf_flags; struct inode *nf_inode; unsigned int nf_hashval; diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index a60ead3b227a..081179fb17e8 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -696,8 +696,6 @@ DEFINE_CLID_EVENT(confirmed_r); __print_flags(val, "|", \ { 1 << NFSD_FILE_HASHED, "HASHED" }, \ { 1 << NFSD_FILE_PENDING, "PENDING" }, \ - { 1 << NFSD_FILE_BREAK_READ, "BREAK_READ" }, \ - { 1 << NFSD_FILE_BREAK_WRITE, "BREAK_WRITE" }, \ { 1 << NFSD_FILE_REFERENCED, "REFERENCED"}) DECLARE_EVENT_CLASS(nfsd_file_class, From c770f31d8f580ed4b965c64f924ec1cc50e41734 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 19 Jul 2022 09:18:35 -0400 Subject: [PATCH 02/78] SUNRPC: Fix xdr_encode_bool() I discovered that xdr_encode_bool() was returning the same address that was passed in the @p parameter. The documenting comment states that the intent is to return the address of the next buffer location, just like the other "xdr_encode_*" helpers. The result was the encoded results of NFSv3 PATHCONF operations were not formed correctly. Fixes: ded04a587f6c ("NFSD: Update the NFSv3 PATHCONF3res encoder to use struct xdr_stream") Signed-off-by: Chuck Lever Reviewed-by: Jeff Layton --- include/linux/sunrpc/xdr.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 5860f32e3958..986c8a17ca5e 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -419,8 +419,8 @@ static inline int xdr_stream_encode_item_absent(struct xdr_stream *xdr) */ static inline __be32 *xdr_encode_bool(__be32 *p, u32 n) { - *p = n ? xdr_one : xdr_zero; - return p++; + *p++ = n ? xdr_one : xdr_zero; + return p; } /** From 184cefbe62627730c30282df12bcff9aae4816ea Mon Sep 17 00:00:00 2001 From: Benjamin Coddington Date: Mon, 13 Jun 2022 09:40:06 -0400 Subject: [PATCH 03/78] NLM: Defend against file_lock changes after vfs_test_lock() Instead of trusting that struct file_lock returns completely unchanged after vfs_test_lock() when there's no conflicting lock, stash away our nlm_lockowner reference so we can properly release it for all cases. This defends against another file_lock implementation overwriting fl_owner when the return type is F_UNLCK. Reported-by: Roberto Bergantinos Corpas Tested-by: Roberto Bergantinos Corpas Signed-off-by: Benjamin Coddington Signed-off-by: Chuck Lever --- fs/lockd/svc4proc.c | 4 +++- fs/lockd/svclock.c | 10 +--------- fs/lockd/svcproc.c | 5 ++++- include/linux/lockd/lockd.h | 1 + 4 files changed, 9 insertions(+), 11 deletions(-) diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c index 176b468a61c7..4f247ab8be61 100644 --- a/fs/lockd/svc4proc.c +++ b/fs/lockd/svc4proc.c @@ -87,6 +87,7 @@ __nlm4svc_proc_test(struct svc_rqst *rqstp, struct nlm_res *resp) struct nlm_args *argp = rqstp->rq_argp; struct nlm_host *host; struct nlm_file *file; + struct nlm_lockowner *test_owner; __be32 rc = rpc_success; dprintk("lockd: TEST4 called\n"); @@ -96,6 +97,7 @@ __nlm4svc_proc_test(struct svc_rqst *rqstp, struct nlm_res *resp) if ((resp->status = nlm4svc_retrieve_args(rqstp, argp, &host, &file))) return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success; + test_owner = argp->lock.fl.fl_owner; /* Now check for conflicting locks */ resp->status = nlmsvc_testlock(rqstp, file, host, &argp->lock, &resp->lock, &resp->cookie); if (resp->status == nlm_drop_reply) @@ -103,7 +105,7 @@ __nlm4svc_proc_test(struct svc_rqst *rqstp, struct nlm_res *resp) else dprintk("lockd: TEST4 status %d\n", ntohl(resp->status)); - nlmsvc_release_lockowner(&argp->lock); + nlmsvc_put_lockowner(test_owner); nlmsvc_release_host(host); nlm_release_file(file); return rc; diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index cb3658ab9b7a..9c1aa75441e1 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c @@ -340,7 +340,7 @@ nlmsvc_get_lockowner(struct nlm_lockowner *lockowner) return lockowner; } -static void nlmsvc_put_lockowner(struct nlm_lockowner *lockowner) +void nlmsvc_put_lockowner(struct nlm_lockowner *lockowner) { if (!refcount_dec_and_lock(&lockowner->count, &lockowner->host->h_lock)) return; @@ -590,7 +590,6 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file, int error; int mode; __be32 ret; - struct nlm_lockowner *test_owner; dprintk("lockd: nlmsvc_testlock(%s/%ld, ty=%d, %Ld-%Ld)\n", nlmsvc_file_inode(file)->i_sb->s_id, @@ -604,9 +603,6 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file, goto out; } - /* If there's a conflicting lock, remember to clean up the test lock */ - test_owner = (struct nlm_lockowner *)lock->fl.fl_owner; - mode = lock_to_openmode(&lock->fl); error = vfs_test_lock(file->f_file[mode], &lock->fl); if (error) { @@ -635,10 +631,6 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file, conflock->fl.fl_end = lock->fl.fl_end; locks_release_private(&lock->fl); - /* Clean up the test lock */ - lock->fl.fl_owner = NULL; - nlmsvc_put_lockowner(test_owner); - ret = nlm_lck_denied; out: return ret; diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c index 4dc1b40a489a..b09ca35b527c 100644 --- a/fs/lockd/svcproc.c +++ b/fs/lockd/svcproc.c @@ -116,6 +116,7 @@ __nlmsvc_proc_test(struct svc_rqst *rqstp, struct nlm_res *resp) struct nlm_args *argp = rqstp->rq_argp; struct nlm_host *host; struct nlm_file *file; + struct nlm_lockowner *test_owner; __be32 rc = rpc_success; dprintk("lockd: TEST called\n"); @@ -125,6 +126,8 @@ __nlmsvc_proc_test(struct svc_rqst *rqstp, struct nlm_res *resp) if ((resp->status = nlmsvc_retrieve_args(rqstp, argp, &host, &file))) return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success; + test_owner = argp->lock.fl.fl_owner; + /* Now check for conflicting locks */ resp->status = cast_status(nlmsvc_testlock(rqstp, file, host, &argp->lock, &resp->lock, &resp->cookie)); if (resp->status == nlm_drop_reply) @@ -133,7 +136,7 @@ __nlmsvc_proc_test(struct svc_rqst *rqstp, struct nlm_res *resp) dprintk("lockd: TEST status %d vers %d\n", ntohl(resp->status), rqstp->rq_vers); - nlmsvc_release_lockowner(&argp->lock); + nlmsvc_put_lockowner(test_owner); nlmsvc_release_host(host); nlm_release_file(file); return rc; diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index fcef192e5e45..70ce419e2709 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -292,6 +292,7 @@ void nlmsvc_locks_init_private(struct file_lock *, struct nlm_host *, pid_t); __be32 nlm_lookup_file(struct svc_rqst *, struct nlm_file **, struct nlm_lock *); void nlm_release_file(struct nlm_file *); +void nlmsvc_put_lockowner(struct nlm_lockowner *); void nlmsvc_release_lockowner(struct nlm_lock *); void nlmsvc_mark_resources(struct net *); void nlmsvc_free_host_resources(struct nlm_host *); From 28fffa6c57906ffa07e84f12aae5d99993cafdae Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 21 Jun 2022 10:06:16 -0400 Subject: [PATCH 04/78] SUNRPC: Expand the svc_alloc_arg_err tracepoint Record not only the number of pages requested, but the number of pages that were actually allocated, to get a measure of progress (or lack thereof). Signed-off-by: Chuck Lever --- include/trace/events/sunrpc.h | 14 +++++++++----- net/sunrpc/svc_xprt.c | 2 +- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index b61d9c90fa26..5c48be033cc7 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -1989,20 +1989,24 @@ TRACE_EVENT(svc_wake_up, TRACE_EVENT(svc_alloc_arg_err, TP_PROTO( - unsigned int pages + unsigned int requested, + unsigned int allocated ), - TP_ARGS(pages), + TP_ARGS(requested, allocated), TP_STRUCT__entry( - __field(unsigned int, pages) + __field(unsigned int, requested) + __field(unsigned int, allocated) ), TP_fast_assign( - __entry->pages = pages; + __entry->requested = requested; + __entry->allocated = allocated; ), - TP_printk("pages=%u", __entry->pages) + TP_printk("requested=%u allocated=%u", + __entry->requested, __entry->allocated) ); DECLARE_EVENT_CLASS(svc_deferred_event, diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 2c4dd7ca95b0..2106003645a7 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -691,7 +691,7 @@ static int svc_alloc_arg(struct svc_rqst *rqstp) set_current_state(TASK_RUNNING); return -EINTR; } - trace_svc_alloc_arg_err(pages); + trace_svc_alloc_arg_err(pages, ret); memalloc_retry_wait(GFP_KERNEL); } rqstp->rq_page_end = &rqstp->rq_pages[pages]; From 051382885552e12541cc0ebf82092be374a9ed2a Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 21 Jun 2022 10:06:23 -0400 Subject: [PATCH 05/78] NFSD: Instrument fh_verify() Capture file handles and how they map to local inodes. In particular, NFSv4 PUTFH uses fh_verify() so we can now observe which file handles are the target of OPEN, LOOKUP, RENAME, and so on. Signed-off-by: Chuck Lever --- fs/nfsd/nfsfh.c | 5 +++-- fs/nfsd/trace.h | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+), 2 deletions(-) diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index c29baa03dfaf..5e2ed4b2a925 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -331,8 +331,6 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, int access) struct dentry *dentry; __be32 error; - dprintk("nfsd: fh_verify(%s)\n", SVCFH_fmt(fhp)); - if (!fhp->fh_dentry) { error = nfsd_set_fh_dentry(rqstp, fhp); if (error) @@ -340,6 +338,9 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, int access) } dentry = fhp->fh_dentry; exp = fhp->fh_export; + + trace_nfsd_fh_verify(rqstp, fhp, type, access); + /* * We still have to do all these permission checks, even when * fh_dentry is already set: diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index 081179fb17e8..d23ecf7ca9f8 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -171,6 +171,52 @@ TRACE_EVENT(nfsd_compound_encode_err, __entry->opnum, __entry->status) ); +#define show_fs_file_type(x) \ + __print_symbolic(x, \ + { S_IFLNK, "LNK" }, \ + { S_IFREG, "REG" }, \ + { S_IFDIR, "DIR" }, \ + { S_IFCHR, "CHR" }, \ + { S_IFBLK, "BLK" }, \ + { S_IFIFO, "FIFO" }, \ + { S_IFSOCK, "SOCK" }) + +TRACE_EVENT(nfsd_fh_verify, + TP_PROTO( + const struct svc_rqst *rqstp, + const struct svc_fh *fhp, + umode_t type, + int access + ), + TP_ARGS(rqstp, fhp, type, access), + TP_STRUCT__entry( + __field(unsigned int, netns_ino) + __sockaddr(server, rqstp->rq_xprt->xpt_remotelen) + __sockaddr(client, rqstp->rq_xprt->xpt_remotelen) + __field(u32, xid) + __field(u32, fh_hash) + __field(void *, inode) + __field(unsigned long, type) + __field(unsigned long, access) + ), + TP_fast_assign( + __entry->netns_ino = SVC_NET(rqstp)->ns.inum; + __assign_sockaddr(server, &rqstp->rq_xprt->xpt_local, + rqstp->rq_xprt->xpt_locallen); + __assign_sockaddr(client, &rqstp->rq_xprt->xpt_remote, + rqstp->rq_xprt->xpt_remotelen); + __entry->xid = be32_to_cpu(rqstp->rq_xid); + __entry->fh_hash = knfsd_fh_hash(&fhp->fh_handle); + __entry->inode = d_inode(fhp->fh_dentry); + __entry->type = type; + __entry->access = access; + ), + TP_printk("xid=0x%08x fh_hash=0x%08x inode=%p type=%s access=%s", + __entry->xid, __entry->fh_hash, __entry->inode, + show_fs_file_type(__entry->type), + show_nfsd_may_flags(__entry->access) + ) +); DECLARE_EVENT_CLASS(nfsd_fh_err_class, TP_PROTO(struct svc_rqst *rqstp, From f532c9ff103897be0e2a787c0876683c3dc39ed3 Mon Sep 17 00:00:00 2001 From: Zhang Jiaming Date: Thu, 23 Jun 2022 16:20:05 +0800 Subject: [PATCH 06/78] NFSD: Fix space and spelling mistake Add a blank space after ','. Change 'succesful' to 'successful'. Signed-off-by: Zhang Jiaming Signed-off-by: Chuck Lever --- fs/nfsd/nfs4proc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 3895eb52d2b1..d267b9bcf1fc 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -828,7 +828,7 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, goto out_umask; status = nfsd_create(rqstp, &cstate->current_fh, create->cr_name, create->cr_namelen, - &create->cr_iattr,S_IFCHR, rdev, &resfh); + &create->cr_iattr, S_IFCHR, rdev, &resfh); break; case NF4SOCK: @@ -2711,7 +2711,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp) if (op->opdesc->op_flags & OP_MODIFIES_SOMETHING) { /* * Don't execute this op if we couldn't encode a - * succesful reply: + * successful reply: */ u32 plen = op->opdesc->op_rsize_bop(rqstp, op); /* From 842e00ac3aa3b4a4f7f750c8ab54f8578fc875d3 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 28 Jun 2022 22:25:25 +0100 Subject: [PATCH 07/78] nfsd: remove redundant assignment to variable len Variable len is being assigned a value zero and this is never read, it is being re-assigned later. The assignment is redundant and can be removed. Cleans up clang scan-build warning: fs/nfsd/nfsctl.c:636:2: warning: Value stored to 'len' is never read Signed-off-by: Colin Ian King Signed-off-by: Chuck Lever --- fs/nfsd/nfsctl.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 0621c2faf242..66c352bf61b1 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -633,7 +633,6 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size) } /* Now write current state into reply buffer */ - len = 0; sep = ""; remaining = SIMPLE_TRANSACTION_LIMIT; for (num=2 ; num <= 4 ; num++) { From 36f2ef2dd44e1c34b281336a41cf42a477d4b43f Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 1 Jul 2022 10:37:15 -0400 Subject: [PATCH 08/78] SUNRPC: Fix server-side fault injection documentation Fixes: 37324e6bb120 ("SUNRPC: Cache deferral injection") Signed-off-by: Chuck Lever --- Documentation/fault-injection/fault-injection.rst | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/Documentation/fault-injection/fault-injection.rst b/Documentation/fault-injection/fault-injection.rst index eb9c2d9a4f5f..17779a2772e5 100644 --- a/Documentation/fault-injection/fault-injection.rst +++ b/Documentation/fault-injection/fault-injection.rst @@ -169,6 +169,13 @@ configuration of fault-injection capabilities. default is 'N', setting it to 'Y' will disable disconnect injection on the RPC server. +- /sys/kernel/debug/fail_sunrpc/ignore-cache-wait: + + Format: { 'Y' | 'N' } + + default is 'N', setting it to 'Y' will disable cache wait + injection on the RPC server. + - /sys/kernel/debug/fail_function/inject: Format: { 'function-name' | '!function-name' | '' } From ca3f9acb6d3faf78da2b63324f7c737dbddf7f69 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 8 Jul 2022 14:23:45 -0400 Subject: [PATCH 09/78] NFSD: Demote a WARN to a pr_warn() The call trace doesn't add much value, but it sure is noisy. Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/nfs4proc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index d267b9bcf1fc..5af9f8d1feb6 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -630,9 +630,9 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, } status = nfsd4_process_open2(rqstp, resfh, open); - WARN(status && open->op_created, - "nfsd4_process_open2 failed to open newly-created file! status=%u\n", - be32_to_cpu(status)); + if (status && open->op_created) + pr_warn("nfsd4_process_open2 failed to open newly-created file: status=%u\n", + be32_to_cpu(status)); if (reclaim && !status) nn->somebody_reclaimed = true; out: From 0fd244c115f0321fc5e34ad2291f2a572508e3f7 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 8 Jul 2022 14:23:52 -0400 Subject: [PATCH 10/78] NFSD: Report filecache LRU size Surface the NFSD filecache's LRU list length to help field troubleshooters monitor filecache issues. Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/filecache.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index e1f98d32cee1..c61c1cf32346 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -1048,7 +1048,7 @@ nfsd_file_create(struct svc_rqst *rqstp, struct svc_fh *fhp, static int nfsd_file_cache_stats_show(struct seq_file *m, void *v) { unsigned int i, count = 0, longest = 0; - unsigned long hits = 0; + unsigned long lru = 0, hits = 0; /* * No need for spinlocks here since we're not terribly interested in @@ -1061,6 +1061,7 @@ static int nfsd_file_cache_stats_show(struct seq_file *m, void *v) count += nfsd_file_hashtbl[i].nfb_count; longest = max(longest, nfsd_file_hashtbl[i].nfb_count); } + lru = list_lru_count(&nfsd_file_lru); } mutex_unlock(&nfsd_mutex); @@ -1069,6 +1070,7 @@ static int nfsd_file_cache_stats_show(struct seq_file *m, void *v) seq_printf(m, "total entries: %u\n", count); seq_printf(m, "longest chain: %u\n", longest); + seq_printf(m, "lru entries: %lu\n", lru); seq_printf(m, "cache hits: %lu\n", hits); return 0; } From 29d4bdbbb910f33d6058d2c51278f00f656df325 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 8 Jul 2022 14:23:59 -0400 Subject: [PATCH 11/78] NFSD: Report count of calls to nfsd_file_acquire() Count the number of successful acquisitions that did not create a file (ie, acquisitions that do not result in a compulsory cache miss). This count can be compared directly with the reported hit count to compute a hit ratio. Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/filecache.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index c61c1cf32346..6e9d1af86e7b 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -43,6 +43,7 @@ struct nfsd_fcache_bucket { }; static DEFINE_PER_CPU(unsigned long, nfsd_file_cache_hits); +static DEFINE_PER_CPU(unsigned long, nfsd_file_acquisitions); struct nfsd_fcache_disposal { struct work_struct work; @@ -955,6 +956,8 @@ wait_for_construction: status = nfserrno(nfsd_open_break_lease(file_inode(nf->nf_file), may_flags)); out: if (status == nfs_ok) { + if (open) + this_cpu_inc(nfsd_file_acquisitions); *pnf = nf; } else { nfsd_file_put(nf); @@ -1047,8 +1050,9 @@ nfsd_file_create(struct svc_rqst *rqstp, struct svc_fh *fhp, */ static int nfsd_file_cache_stats_show(struct seq_file *m, void *v) { + unsigned long hits = 0, acquisitions = 0; unsigned int i, count = 0, longest = 0; - unsigned long lru = 0, hits = 0; + unsigned long lru = 0; /* * No need for spinlocks here since we're not terribly interested in @@ -1065,13 +1069,16 @@ static int nfsd_file_cache_stats_show(struct seq_file *m, void *v) } mutex_unlock(&nfsd_mutex); - for_each_possible_cpu(i) + for_each_possible_cpu(i) { hits += per_cpu(nfsd_file_cache_hits, i); + acquisitions += per_cpu(nfsd_file_acquisitions, i); + } seq_printf(m, "total entries: %u\n", count); seq_printf(m, "longest chain: %u\n", longest); seq_printf(m, "lru entries: %lu\n", lru); seq_printf(m, "cache hits: %lu\n", hits); + seq_printf(m, "acquisitions: %lu\n", acquisitions); return 0; } From d63293272abb51c02457f1017dfd61c3270d9ae3 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 8 Jul 2022 14:24:05 -0400 Subject: [PATCH 12/78] NFSD: Report count of freed filecache items Surface the count of freed nfsd_file items. Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/filecache.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index 6e9d1af86e7b..e15e719ede9d 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -44,6 +44,7 @@ struct nfsd_fcache_bucket { static DEFINE_PER_CPU(unsigned long, nfsd_file_cache_hits); static DEFINE_PER_CPU(unsigned long, nfsd_file_acquisitions); +static DEFINE_PER_CPU(unsigned long, nfsd_file_releases); struct nfsd_fcache_disposal { struct work_struct work; @@ -196,6 +197,8 @@ nfsd_file_free(struct nfsd_file *nf) { bool flush = false; + this_cpu_inc(nfsd_file_releases); + trace_nfsd_file_put_final(nf); if (nf->nf_mark) nfsd_file_mark_put(nf->nf_mark); @@ -1050,7 +1053,7 @@ nfsd_file_create(struct svc_rqst *rqstp, struct svc_fh *fhp, */ static int nfsd_file_cache_stats_show(struct seq_file *m, void *v) { - unsigned long hits = 0, acquisitions = 0; + unsigned long hits = 0, acquisitions = 0, releases = 0; unsigned int i, count = 0, longest = 0; unsigned long lru = 0; @@ -1072,6 +1075,7 @@ static int nfsd_file_cache_stats_show(struct seq_file *m, void *v) for_each_possible_cpu(i) { hits += per_cpu(nfsd_file_cache_hits, i); acquisitions += per_cpu(nfsd_file_acquisitions, i); + releases += per_cpu(nfsd_file_releases, i); } seq_printf(m, "total entries: %u\n", count); @@ -1079,6 +1083,7 @@ static int nfsd_file_cache_stats_show(struct seq_file *m, void *v) seq_printf(m, "lru entries: %lu\n", lru); seq_printf(m, "cache hits: %lu\n", hits); seq_printf(m, "acquisitions: %lu\n", acquisitions); + seq_printf(m, "releases: %lu\n", releases); return 0; } From 904940e94a887701db24401e3ed6928a1d4e329f Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 8 Jul 2022 14:24:12 -0400 Subject: [PATCH 13/78] NFSD: Report average age of filecache items This is a measure of how long items stay in the filecache, to help assess how efficient the cache is. Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/filecache.c | 11 ++++++++++- fs/nfsd/filecache.h | 1 + 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index e15e719ede9d..27611aca16a4 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -45,6 +45,7 @@ struct nfsd_fcache_bucket { static DEFINE_PER_CPU(unsigned long, nfsd_file_cache_hits); static DEFINE_PER_CPU(unsigned long, nfsd_file_acquisitions); static DEFINE_PER_CPU(unsigned long, nfsd_file_releases); +static DEFINE_PER_CPU(unsigned long, nfsd_file_total_age); struct nfsd_fcache_disposal { struct work_struct work; @@ -178,6 +179,7 @@ nfsd_file_alloc(struct inode *inode, unsigned int may, unsigned int hashval, if (nf) { INIT_HLIST_NODE(&nf->nf_node); INIT_LIST_HEAD(&nf->nf_lru); + nf->nf_birthtime = ktime_get(); nf->nf_file = NULL; nf->nf_cred = get_current_cred(); nf->nf_net = net; @@ -195,9 +197,11 @@ nfsd_file_alloc(struct inode *inode, unsigned int may, unsigned int hashval, static bool nfsd_file_free(struct nfsd_file *nf) { + s64 age = ktime_to_ms(ktime_sub(ktime_get(), nf->nf_birthtime)); bool flush = false; this_cpu_inc(nfsd_file_releases); + this_cpu_add(nfsd_file_total_age, age); trace_nfsd_file_put_final(nf); if (nf->nf_mark) @@ -1055,7 +1059,7 @@ static int nfsd_file_cache_stats_show(struct seq_file *m, void *v) { unsigned long hits = 0, acquisitions = 0, releases = 0; unsigned int i, count = 0, longest = 0; - unsigned long lru = 0; + unsigned long lru = 0, total_age = 0; /* * No need for spinlocks here since we're not terribly interested in @@ -1076,6 +1080,7 @@ static int nfsd_file_cache_stats_show(struct seq_file *m, void *v) hits += per_cpu(nfsd_file_cache_hits, i); acquisitions += per_cpu(nfsd_file_acquisitions, i); releases += per_cpu(nfsd_file_releases, i); + total_age += per_cpu(nfsd_file_total_age, i); } seq_printf(m, "total entries: %u\n", count); @@ -1084,6 +1089,10 @@ static int nfsd_file_cache_stats_show(struct seq_file *m, void *v) seq_printf(m, "cache hits: %lu\n", hits); seq_printf(m, "acquisitions: %lu\n", acquisitions); seq_printf(m, "releases: %lu\n", releases); + if (releases) + seq_printf(m, "mean age (ms): %ld\n", total_age / releases); + else + seq_printf(m, "mean age (ms): -\n"); return 0; } diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h index c9e3c6eb4776..c6ad5fe47f12 100644 --- a/fs/nfsd/filecache.h +++ b/fs/nfsd/filecache.h @@ -44,6 +44,7 @@ struct nfsd_file { refcount_t nf_ref; unsigned char nf_may; struct nfsd_file_mark *nf_mark; + ktime_t nf_birthtime; }; int nfsd_file_cache_init(void); From 0bac5a264d9a923f5b01f3521e1519a8d0358342 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 8 Jul 2022 14:24:18 -0400 Subject: [PATCH 14/78] NFSD: Add nfsd_file_lru_dispose_list() helper Refactor the invariant part of nfsd_file_lru_walk_list() into a separate helper function. Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/filecache.c | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index 27611aca16a4..af22d9e34db5 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -451,11 +451,31 @@ out_skip: return LRU_SKIP; } +/* + * Unhash items on @dispose immediately, then queue them on the + * disposal workqueue to finish releasing them in the background. + * + * cel: Note that between the time list_lru_shrink_walk runs and + * now, these items are in the hash table but marked unhashed. + * Why release these outside of lru_cb ? There's no lock ordering + * problem since lru_cb currently takes no lock. + */ +static void nfsd_file_gc_dispose_list(struct list_head *dispose) +{ + struct nfsd_file *nf; + + list_for_each_entry(nf, dispose, nf_lru) { + spin_lock(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock); + nfsd_file_do_unhash(nf); + spin_unlock(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock); + } + nfsd_file_dispose_list_delayed(dispose); +} + static unsigned long nfsd_file_lru_walk_list(struct shrink_control *sc) { LIST_HEAD(head); - struct nfsd_file *nf; unsigned long ret; if (sc) @@ -465,12 +485,7 @@ nfsd_file_lru_walk_list(struct shrink_control *sc) ret = list_lru_walk(&nfsd_file_lru, nfsd_file_lru_cb, &head, LONG_MAX); - list_for_each_entry(nf, &head, nf_lru) { - spin_lock(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock); - nfsd_file_do_unhash(nf); - spin_unlock(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock); - } - nfsd_file_dispose_list_delayed(&head); + nfsd_file_gc_dispose_list(&head); return ret; } From 3bc6d3470fe412f818f9bff6b71d1be3a76af8f3 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 8 Jul 2022 14:24:25 -0400 Subject: [PATCH 15/78] NFSD: Refactor nfsd_file_gc() Refactor nfsd_file_gc() to use the new list_lru helper. Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/filecache.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index af22d9e34db5..f5e9d9f72d5d 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -492,7 +492,11 @@ nfsd_file_lru_walk_list(struct shrink_control *sc) static void nfsd_file_gc(void) { - nfsd_file_lru_walk_list(NULL); + LIST_HEAD(dispose); + + list_lru_walk(&nfsd_file_lru, nfsd_file_lru_cb, + &dispose, LONG_MAX); + nfsd_file_gc_dispose_list(&dispose); } static void From 39f1d1ff8148902c5692ffb0e1c4479416ab44a7 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 8 Jul 2022 14:24:31 -0400 Subject: [PATCH 16/78] NFSD: Refactor nfsd_file_lru_scan() Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/filecache.c | 25 +++++++------------------ 1 file changed, 7 insertions(+), 18 deletions(-) diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index f5e9d9f72d5d..029fb4cfa224 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -472,23 +472,6 @@ static void nfsd_file_gc_dispose_list(struct list_head *dispose) nfsd_file_dispose_list_delayed(dispose); } -static unsigned long -nfsd_file_lru_walk_list(struct shrink_control *sc) -{ - LIST_HEAD(head); - unsigned long ret; - - if (sc) - ret = list_lru_shrink_walk(&nfsd_file_lru, sc, - nfsd_file_lru_cb, &head); - else - ret = list_lru_walk(&nfsd_file_lru, - nfsd_file_lru_cb, - &head, LONG_MAX); - nfsd_file_gc_dispose_list(&head); - return ret; -} - static void nfsd_file_gc(void) { @@ -515,7 +498,13 @@ nfsd_file_lru_count(struct shrinker *s, struct shrink_control *sc) static unsigned long nfsd_file_lru_scan(struct shrinker *s, struct shrink_control *sc) { - return nfsd_file_lru_walk_list(sc); + LIST_HEAD(dispose); + unsigned long ret; + + ret = list_lru_shrink_walk(&nfsd_file_lru, sc, + nfsd_file_lru_cb, &dispose); + nfsd_file_gc_dispose_list(&dispose); + return ret; } static struct shrinker nfsd_file_shrinker = { From 94660cc19c75083af046b0f8362e3d3bc2eba21d Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 8 Jul 2022 14:24:38 -0400 Subject: [PATCH 17/78] NFSD: Report the number of items evicted by the LRU walk Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/filecache.c | 13 ++++++++++--- fs/nfsd/trace.h | 29 +++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+), 3 deletions(-) diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index 029fb4cfa224..e21dcc5e90ab 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -46,6 +46,7 @@ static DEFINE_PER_CPU(unsigned long, nfsd_file_cache_hits); static DEFINE_PER_CPU(unsigned long, nfsd_file_acquisitions); static DEFINE_PER_CPU(unsigned long, nfsd_file_releases); static DEFINE_PER_CPU(unsigned long, nfsd_file_total_age); +static DEFINE_PER_CPU(unsigned long, nfsd_file_evictions); struct nfsd_fcache_disposal { struct work_struct work; @@ -446,6 +447,7 @@ nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru, goto out_skip; list_lru_isolate_move(lru, &nf->nf_lru, head); + this_cpu_inc(nfsd_file_evictions); return LRU_REMOVED; out_skip: return LRU_SKIP; @@ -476,9 +478,11 @@ static void nfsd_file_gc(void) { LIST_HEAD(dispose); + unsigned long ret; - list_lru_walk(&nfsd_file_lru, nfsd_file_lru_cb, - &dispose, LONG_MAX); + ret = list_lru_walk(&nfsd_file_lru, nfsd_file_lru_cb, + &dispose, LONG_MAX); + trace_nfsd_file_gc_removed(ret, list_lru_count(&nfsd_file_lru)); nfsd_file_gc_dispose_list(&dispose); } @@ -503,6 +507,7 @@ nfsd_file_lru_scan(struct shrinker *s, struct shrink_control *sc) ret = list_lru_shrink_walk(&nfsd_file_lru, sc, nfsd_file_lru_cb, &dispose); + trace_nfsd_file_shrinker_removed(ret, list_lru_count(&nfsd_file_lru)); nfsd_file_gc_dispose_list(&dispose); return ret; } @@ -1065,7 +1070,7 @@ nfsd_file_create(struct svc_rqst *rqstp, struct svc_fh *fhp, */ static int nfsd_file_cache_stats_show(struct seq_file *m, void *v) { - unsigned long hits = 0, acquisitions = 0, releases = 0; + unsigned long hits = 0, acquisitions = 0, releases = 0, evictions = 0; unsigned int i, count = 0, longest = 0; unsigned long lru = 0, total_age = 0; @@ -1089,6 +1094,7 @@ static int nfsd_file_cache_stats_show(struct seq_file *m, void *v) acquisitions += per_cpu(nfsd_file_acquisitions, i); releases += per_cpu(nfsd_file_releases, i); total_age += per_cpu(nfsd_file_total_age, i); + evictions += per_cpu(nfsd_file_evictions, i); } seq_printf(m, "total entries: %u\n", count); @@ -1097,6 +1103,7 @@ static int nfsd_file_cache_stats_show(struct seq_file *m, void *v) seq_printf(m, "cache hits: %lu\n", hits); seq_printf(m, "acquisitions: %lu\n", acquisitions); seq_printf(m, "releases: %lu\n", releases); + seq_printf(m, "evictions: %lu\n", evictions); if (releases) seq_printf(m, "mean age (ms): %ld\n", total_age / releases); else diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index d23ecf7ca9f8..22f57fe7a6f7 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -895,6 +895,35 @@ TRACE_EVENT(nfsd_file_fsnotify_handle_event, __entry->nlink, __entry->mode, __entry->mask) ); +DECLARE_EVENT_CLASS(nfsd_file_lruwalk_class, + TP_PROTO( + unsigned long removed, + unsigned long remaining + ), + TP_ARGS(removed, remaining), + TP_STRUCT__entry( + __field(unsigned long, removed) + __field(unsigned long, remaining) + ), + TP_fast_assign( + __entry->removed = removed; + __entry->remaining = remaining; + ), + TP_printk("%lu entries removed, %lu remaining", + __entry->removed, __entry->remaining) +); + +#define DEFINE_NFSD_FILE_LRUWALK_EVENT(name) \ +DEFINE_EVENT(nfsd_file_lruwalk_class, name, \ + TP_PROTO( \ + unsigned long removed, \ + unsigned long remaining \ + ), \ + TP_ARGS(removed, remaining)) + +DEFINE_NFSD_FILE_LRUWALK_EVENT(nfsd_file_gc_removed); +DEFINE_NFSD_FILE_LRUWALK_EVENT(nfsd_file_shrinker_removed); + #include "cache.h" TRACE_DEFINE_ENUM(RC_DROPIT); From df2aff524faceaf743b7c5ab0f4fb86cb511f782 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 8 Jul 2022 14:24:45 -0400 Subject: [PATCH 18/78] NFSD: Record number of flush calls Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/filecache.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index e21dcc5e90ab..1b5252895625 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -46,6 +46,7 @@ static DEFINE_PER_CPU(unsigned long, nfsd_file_cache_hits); static DEFINE_PER_CPU(unsigned long, nfsd_file_acquisitions); static DEFINE_PER_CPU(unsigned long, nfsd_file_releases); static DEFINE_PER_CPU(unsigned long, nfsd_file_total_age); +static DEFINE_PER_CPU(unsigned long, nfsd_file_pages_flushed); static DEFINE_PER_CPU(unsigned long, nfsd_file_evictions); struct nfsd_fcache_disposal { @@ -243,7 +244,12 @@ nfsd_file_check_write_error(struct nfsd_file *nf) static void nfsd_file_flush(struct nfsd_file *nf) { - if (nf->nf_file && vfs_fsync(nf->nf_file, 1) != 0) + struct file *file = nf->nf_file; + + if (!file || !(file->f_mode & FMODE_WRITE)) + return; + this_cpu_add(nfsd_file_pages_flushed, file->f_mapping->nrpages); + if (vfs_fsync(file, 1) != 0) nfsd_reset_write_verifier(net_generic(nf->nf_net, nfsd_net_id)); } @@ -1070,7 +1076,8 @@ nfsd_file_create(struct svc_rqst *rqstp, struct svc_fh *fhp, */ static int nfsd_file_cache_stats_show(struct seq_file *m, void *v) { - unsigned long hits = 0, acquisitions = 0, releases = 0, evictions = 0; + unsigned long releases = 0, pages_flushed = 0, evictions = 0; + unsigned long hits = 0, acquisitions = 0; unsigned int i, count = 0, longest = 0; unsigned long lru = 0, total_age = 0; @@ -1095,6 +1102,7 @@ static int nfsd_file_cache_stats_show(struct seq_file *m, void *v) releases += per_cpu(nfsd_file_releases, i); total_age += per_cpu(nfsd_file_total_age, i); evictions += per_cpu(nfsd_file_evictions, i); + pages_flushed += per_cpu(nfsd_file_pages_flushed, i); } seq_printf(m, "total entries: %u\n", count); @@ -1108,6 +1116,7 @@ static int nfsd_file_cache_stats_show(struct seq_file *m, void *v) seq_printf(m, "mean age (ms): %ld\n", total_age / releases); else seq_printf(m, "mean age (ms): -\n"); + seq_printf(m, "pages flushed: %lu\n", pages_flushed); return 0; } From 8b330f78040cbe16cf8029df70391b2a491f17e2 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 8 Jul 2022 14:24:51 -0400 Subject: [PATCH 19/78] NFSD: Zero counters when the filecache is re-initialized If nfsd_file_cache_init() is called after a shutdown, be sure the stat counters are reset. Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/filecache.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index 1b5252895625..777698a2e5a6 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -824,6 +824,8 @@ nfsd_file_cache_shutdown_net(struct net *net) void nfsd_file_cache_shutdown(void) { + int i; + set_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags); lease_unregister_notifier(&nfsd_file_lease_notifier); @@ -847,6 +849,15 @@ nfsd_file_cache_shutdown(void) nfsd_file_hashtbl = NULL; destroy_workqueue(nfsd_filecache_wq); nfsd_filecache_wq = NULL; + + for_each_possible_cpu(i) { + per_cpu(nfsd_file_cache_hits, i) = 0; + per_cpu(nfsd_file_acquisitions, i) = 0; + per_cpu(nfsd_file_releases, i) = 0; + per_cpu(nfsd_file_total_age, i) = 0; + per_cpu(nfsd_file_pages_flushed, i) = 0; + per_cpu(nfsd_file_evictions, i) = 0; + } } static bool From 2e6c6e4c4375bfd3defa5b1ff3604d9f33d1c936 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 8 Jul 2022 14:24:58 -0400 Subject: [PATCH 20/78] NFSD: Hook up the filecache stat file There has always been the capability of exporting filecache metrics via /proc, but it was never hooked up. Let's surface these metrics to enable better observability of the filecache. Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/nfsctl.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 66c352bf61b1..7002edbf2687 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -25,6 +25,7 @@ #include "state.h" #include "netns.h" #include "pnfs.h" +#include "filecache.h" /* * We have a single directory with several nodes in it. @@ -45,6 +46,7 @@ enum { NFSD_Ports, NFSD_MaxBlkSize, NFSD_MaxConnections, + NFSD_Filecache, NFSD_SupportedEnctypes, /* * The below MUST come last. Otherwise we leave a hole in nfsd_files[] @@ -229,6 +231,13 @@ static const struct file_operations reply_cache_stats_operations = { .release = single_release, }; +static const struct file_operations filecache_ops = { + .open = nfsd_file_cache_stats_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + /*----------------------------------------------------------------------------*/ /* * payload - write methods @@ -1370,6 +1379,7 @@ static int nfsd_fill_super(struct super_block *sb, struct fs_context *fc) [NFSD_Ports] = {"portlist", &transaction_ops, S_IWUSR|S_IRUGO}, [NFSD_MaxBlkSize] = {"max_block_size", &transaction_ops, S_IWUSR|S_IRUGO}, [NFSD_MaxConnections] = {"max_connections", &transaction_ops, S_IWUSR|S_IRUGO}, + [NFSD_Filecache] = {"filecache", &filecache_ops, S_IRUGO}, #if defined(CONFIG_SUNRPC_GSS) || defined(CONFIG_SUNRPC_GSS_MODULE) [NFSD_SupportedEnctypes] = {"supported_krb5_enctypes", &supported_enctypes_ops, S_IRUGO}, #endif /* CONFIG_SUNRPC_GSS or CONFIG_SUNRPC_GSS_MODULE */ From 668ed92e651d3c25f9b6e8cb7ceca54d00daa96d Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 8 Jul 2022 14:25:04 -0400 Subject: [PATCH 21/78] NFSD: WARN when freeing an item still linked via nf_lru Add a guardrail to prevent freeing memory that is still on a list. This includes either a dispose list or the LRU list. This is the sign of a bug, but this class of bugs can be detected so that they don't endanger system stability, especially while debugging. Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/filecache.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index 777698a2e5a6..f58685f1b120 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -214,6 +214,14 @@ nfsd_file_free(struct nfsd_file *nf) fput(nf->nf_file); flush = true; } + + /* + * If this item is still linked via nf_lru, that's a bug. + * WARN and leak it to preserve system stability. + */ + if (WARN_ON_ONCE(!list_empty(&nf->nf_lru))) + return flush; + call_rcu(&nf->nf_rcu, nfsd_file_slab_free); return flush; } @@ -343,7 +351,7 @@ nfsd_file_dispose_list(struct list_head *dispose) while(!list_empty(dispose)) { nf = list_first_entry(dispose, struct nfsd_file, nf_lru); - list_del(&nf->nf_lru); + list_del_init(&nf->nf_lru); nfsd_file_flush(nf); nfsd_file_put_noref(nf); } @@ -357,7 +365,7 @@ nfsd_file_dispose_list_sync(struct list_head *dispose) while(!list_empty(dispose)) { nf = list_first_entry(dispose, struct nfsd_file, nf_lru); - list_del(&nf->nf_lru); + list_del_init(&nf->nf_lru); nfsd_file_flush(nf); if (!refcount_dec_and_test(&nf->nf_ref)) continue; From c46203acddd9b9200dbc53d0603c97355fd3a03b Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 8 Jul 2022 14:25:11 -0400 Subject: [PATCH 22/78] NFSD: Trace filecache LRU activity Observe the operation of garbage collection and the lifetime of filecache items. Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/filecache.c | 44 +++++++++++++++++++++++++++++++------------- fs/nfsd/trace.h | 39 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 70 insertions(+), 13 deletions(-) diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index f58685f1b120..e831a7b4f916 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -261,6 +261,18 @@ nfsd_file_flush(struct nfsd_file *nf) nfsd_reset_write_verifier(net_generic(nf->nf_net, nfsd_net_id)); } +static void nfsd_file_lru_add(struct nfsd_file *nf) +{ + if (list_lru_add(&nfsd_file_lru, &nf->nf_lru)) + trace_nfsd_file_lru_add(nf); +} + +static void nfsd_file_lru_remove(struct nfsd_file *nf) +{ + if (list_lru_del(&nfsd_file_lru, &nf->nf_lru)) + trace_nfsd_file_lru_del(nf); +} + static void nfsd_file_do_unhash(struct nfsd_file *nf) { @@ -280,8 +292,7 @@ nfsd_file_unhash(struct nfsd_file *nf) { if (test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { nfsd_file_do_unhash(nf); - if (!list_empty(&nf->nf_lru)) - list_lru_del(&nfsd_file_lru, &nf->nf_lru); + nfsd_file_lru_remove(nf); return true; } return false; @@ -444,27 +455,34 @@ nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru, * counter. Here we check the counter and then test and clear the flag. * That order is deliberate to ensure that we can do this locklessly. */ - if (refcount_read(&nf->nf_ref) > 1) - goto out_skip; + if (refcount_read(&nf->nf_ref) > 1) { + trace_nfsd_file_gc_in_use(nf); + return LRU_SKIP; + } /* * Don't throw out files that are still undergoing I/O or * that have uncleared errors pending. */ - if (nfsd_file_check_writeback(nf)) - goto out_skip; + if (nfsd_file_check_writeback(nf)) { + trace_nfsd_file_gc_writeback(nf); + return LRU_SKIP; + } - if (test_and_clear_bit(NFSD_FILE_REFERENCED, &nf->nf_flags)) - goto out_skip; + if (test_and_clear_bit(NFSD_FILE_REFERENCED, &nf->nf_flags)) { + trace_nfsd_file_gc_referenced(nf); + return LRU_SKIP; + } - if (!test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) - goto out_skip; + if (!test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { + trace_nfsd_file_gc_hashed(nf); + return LRU_SKIP; + } list_lru_isolate_move(lru, &nf->nf_lru, head); this_cpu_inc(nfsd_file_evictions); + trace_nfsd_file_gc_disposed(nf); return LRU_REMOVED; -out_skip: - return LRU_SKIP; } /* @@ -1017,7 +1035,7 @@ open_file: refcount_inc(&nf->nf_ref); __set_bit(NFSD_FILE_HASHED, &nf->nf_flags); __set_bit(NFSD_FILE_PENDING, &nf->nf_flags); - list_lru_add(&nfsd_file_lru, &nf->nf_lru); + nfsd_file_lru_add(nf); hlist_add_head_rcu(&nf->nf_node, &nfsd_file_hashtbl[hashval].nfb_head); ++nfsd_file_hashtbl[hashval].nfb_count; nfsd_file_hashtbl[hashval].nfb_maxcount = max(nfsd_file_hashtbl[hashval].nfb_maxcount, diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index 22f57fe7a6f7..64a6c577d7b9 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -895,6 +895,45 @@ TRACE_EVENT(nfsd_file_fsnotify_handle_event, __entry->nlink, __entry->mode, __entry->mask) ); +DECLARE_EVENT_CLASS(nfsd_file_gc_class, + TP_PROTO( + const struct nfsd_file *nf + ), + TP_ARGS(nf), + TP_STRUCT__entry( + __field(void *, nf_inode) + __field(void *, nf_file) + __field(int, nf_ref) + __field(unsigned long, nf_flags) + ), + TP_fast_assign( + __entry->nf_inode = nf->nf_inode; + __entry->nf_file = nf->nf_file; + __entry->nf_ref = refcount_read(&nf->nf_ref); + __entry->nf_flags = nf->nf_flags; + ), + TP_printk("inode=%p ref=%d nf_flags=%s nf_file=%p", + __entry->nf_inode, __entry->nf_ref, + show_nf_flags(__entry->nf_flags), + __entry->nf_file + ) +); + +#define DEFINE_NFSD_FILE_GC_EVENT(name) \ +DEFINE_EVENT(nfsd_file_gc_class, name, \ + TP_PROTO( \ + const struct nfsd_file *nf \ + ), \ + TP_ARGS(nf)) + +DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_lru_add); +DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_lru_del); +DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_in_use); +DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_writeback); +DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_referenced); +DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_hashed); +DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_disposed); + DECLARE_EVENT_CLASS(nfsd_file_lruwalk_class, TP_PROTO( unsigned long removed, From 4a0e73e635e3f36b616ad5c943e3d23debe4632f Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 8 Jul 2022 14:25:17 -0400 Subject: [PATCH 23/78] NFSD: Leave open files out of the filecache LRU There have been reports of problems when running fstests generic/531 against Linux NFS servers with NFSv4. The NFS server that hosts the test's SCRATCH_DEV suffers from CPU soft lock-ups during the test. Analysis shows that: fs/nfsd/filecache.c 482 ret = list_lru_walk(&nfsd_file_lru, 483 nfsd_file_lru_cb, 484 &head, LONG_MAX); causes nfsd_file_gc() to walk the entire length of the filecache LRU list every time it is called (which is quite frequently). The walk holds a spinlock the entire time that prevents other nfsd threads from accessing the filecache. What's more, for NFSv4 workloads, none of the items that are visited during this walk may be evicted, since they are all files that are held OPEN by NFS clients. Address this by ensuring that open files are not kept on the LRU list. Reported-by: Frank van der Linden Reported-by: Wang Yugui Link: https://bugzilla.linux-nfs.org/show_bug.cgi?id=386 Suggested-by: Trond Myklebust Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/filecache.c | 24 +++++++++++++++++++----- fs/nfsd/trace.h | 2 ++ 2 files changed, 21 insertions(+), 5 deletions(-) diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index e831a7b4f916..69d1ab6cfc14 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -263,6 +263,7 @@ nfsd_file_flush(struct nfsd_file *nf) static void nfsd_file_lru_add(struct nfsd_file *nf) { + set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags); if (list_lru_add(&nfsd_file_lru, &nf->nf_lru)) trace_nfsd_file_lru_add(nf); } @@ -292,7 +293,6 @@ nfsd_file_unhash(struct nfsd_file *nf) { if (test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { nfsd_file_do_unhash(nf); - nfsd_file_lru_remove(nf); return true; } return false; @@ -313,6 +313,7 @@ nfsd_file_unhash_and_release_locked(struct nfsd_file *nf, struct list_head *disp if (refcount_dec_not_one(&nf->nf_ref)) return true; + nfsd_file_lru_remove(nf); list_add(&nf->nf_lru, dispose); return true; } @@ -324,6 +325,7 @@ nfsd_file_put_noref(struct nfsd_file *nf) if (refcount_dec_and_test(&nf->nf_ref)) { WARN_ON(test_bit(NFSD_FILE_HASHED, &nf->nf_flags)); + nfsd_file_lru_remove(nf); nfsd_file_free(nf); } } @@ -333,7 +335,7 @@ nfsd_file_put(struct nfsd_file *nf) { might_sleep(); - set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags); + nfsd_file_lru_add(nf); if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags) == 0) { nfsd_file_flush(nf); nfsd_file_put_noref(nf); @@ -433,8 +435,18 @@ nfsd_file_dispose_list_delayed(struct list_head *dispose) } } -/* +/** + * nfsd_file_lru_cb - Examine an entry on the LRU list + * @item: LRU entry to examine + * @lru: controlling LRU + * @lock: LRU list lock (unused) + * @arg: dispose list + * * Note this can deadlock with nfsd_file_cache_purge. + * + * Return values: + * %LRU_REMOVED: @item was removed from the LRU + * %LRU_SKIP: @item cannot be evicted */ static enum lru_status nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru, @@ -456,8 +468,9 @@ nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru, * That order is deliberate to ensure that we can do this locklessly. */ if (refcount_read(&nf->nf_ref) > 1) { + list_lru_isolate(lru, &nf->nf_lru); trace_nfsd_file_gc_in_use(nf); - return LRU_SKIP; + return LRU_REMOVED; } /* @@ -1014,6 +1027,7 @@ wait_for_construction: goto retry; } + nfsd_file_lru_remove(nf); this_cpu_inc(nfsd_file_cache_hits); status = nfserrno(nfsd_open_break_lease(file_inode(nf->nf_file), may_flags)); @@ -1035,7 +1049,6 @@ open_file: refcount_inc(&nf->nf_ref); __set_bit(NFSD_FILE_HASHED, &nf->nf_flags); __set_bit(NFSD_FILE_PENDING, &nf->nf_flags); - nfsd_file_lru_add(nf); hlist_add_head_rcu(&nf->nf_node, &nfsd_file_hashtbl[hashval].nfb_head); ++nfsd_file_hashtbl[hashval].nfb_count; nfsd_file_hashtbl[hashval].nfb_maxcount = max(nfsd_file_hashtbl[hashval].nfb_maxcount, @@ -1060,6 +1073,7 @@ open_file: */ if (status != nfs_ok || inode->i_nlink == 0) { bool do_free; + nfsd_file_lru_remove(nf); spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock); do_free = nfsd_file_unhash(nf); spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock); diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index 64a6c577d7b9..9507a6c8eeee 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -927,7 +927,9 @@ DEFINE_EVENT(nfsd_file_gc_class, name, \ TP_ARGS(nf)) DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_lru_add); +DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_lru_add_disposed); DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_lru_del); +DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_lru_del_disposed); DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_in_use); DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_writeback); DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_referenced); From edead3a55804739b2e4af0f35e9c7326264e7b22 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 8 Jul 2022 14:25:24 -0400 Subject: [PATCH 24/78] NFSD: Fix the filecache LRU shrinker Without LRU item rotation, the shrinker visits only a few items on the end of the LRU list, and those would always be long-term OPEN files for NFSv4 workloads. That makes the filecache shrinker completely ineffective. Adopt the same strategy as the inode LRU by using LRU_ROTATE. Suggested-by: Dave Chinner Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/filecache.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index 69d1ab6cfc14..380f6c0da465 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -446,6 +446,7 @@ nfsd_file_dispose_list_delayed(struct list_head *dispose) * * Return values: * %LRU_REMOVED: @item was removed from the LRU + * %LRU_ROTATE: @item is to be moved to the LRU tail * %LRU_SKIP: @item cannot be evicted */ static enum lru_status @@ -484,7 +485,7 @@ nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru, if (test_and_clear_bit(NFSD_FILE_REFERENCED, &nf->nf_flags)) { trace_nfsd_file_gc_referenced(nf); - return LRU_SKIP; + return LRU_ROTATE; } if (!test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { @@ -526,7 +527,7 @@ nfsd_file_gc(void) unsigned long ret; ret = list_lru_walk(&nfsd_file_lru, nfsd_file_lru_cb, - &dispose, LONG_MAX); + &dispose, list_lru_count(&nfsd_file_lru)); trace_nfsd_file_gc_removed(ret, list_lru_count(&nfsd_file_lru)); nfsd_file_gc_dispose_list(&dispose); } From 6df19411367a5fb4ef61854cbd1af269c077f917 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 8 Jul 2022 14:25:30 -0400 Subject: [PATCH 25/78] NFSD: Never call nfsd_file_gc() in foreground paths The checks in nfsd_file_acquire() and nfsd_file_put() that directly invoke filecache garbage collection are intended to keep cache occupancy between a low- and high-watermark. The reason to limit the capacity of the filecache is to keep filecache lookups reasonably fast. However, invoking garbage collection at those points has some undesirable negative impacts. Files that are held open by NFSv4 clients often push the occupancy of the filecache over these watermarks. At that point: - Every call to nfsd_file_acquire() and nfsd_file_put() results in an LRU walk. This has the same effect on lookup latency as long chains in the hash table. - Garbage collection will then run on every nfsd thread, causing a lot of unnecessary lock contention. - Limiting cache capacity pushes out files used only by NFSv3 clients, which are the type of files the filecache is supposed to help. To address those negative impacts, remove the direct calls to the garbage collector. Subsequent patches will address maintaining lookup efficiency as cache capacity increases. Suggested-by: Wang Yugui Suggested-by: Dave Chinner Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/filecache.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index 380f6c0da465..23be87dac1a1 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -29,8 +29,6 @@ #define NFSD_LAUNDRETTE_DELAY (2 * HZ) #define NFSD_FILE_SHUTDOWN (1) -#define NFSD_FILE_LRU_THRESHOLD (4096UL) -#define NFSD_FILE_LRU_LIMIT (NFSD_FILE_LRU_THRESHOLD << 2) /* We only care about NFSD_MAY_READ/WRITE for this cache */ #define NFSD_FILE_MAY_MASK (NFSD_MAY_READ|NFSD_MAY_WRITE) @@ -66,8 +64,6 @@ static struct fsnotify_group *nfsd_file_fsnotify_group; static atomic_long_t nfsd_filecache_count; static struct delayed_work nfsd_filecache_laundrette; -static void nfsd_file_gc(void); - static void nfsd_file_schedule_laundrette(void) { @@ -344,9 +340,6 @@ nfsd_file_put(struct nfsd_file *nf) nfsd_file_schedule_laundrette(); } else nfsd_file_put_noref(nf); - - if (atomic_long_read(&nfsd_filecache_count) >= NFSD_FILE_LRU_LIMIT) - nfsd_file_gc(); } struct nfsd_file * @@ -1055,8 +1048,7 @@ open_file: nfsd_file_hashtbl[hashval].nfb_maxcount = max(nfsd_file_hashtbl[hashval].nfb_maxcount, nfsd_file_hashtbl[hashval].nfb_count); spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock); - if (atomic_long_inc_return(&nfsd_filecache_count) >= NFSD_FILE_LRU_THRESHOLD) - nfsd_file_gc(); + atomic_long_inc(&nfsd_filecache_count); nf->nf_mark = nfsd_file_mark_find_or_create(nf); if (nf->nf_mark) { From 54f7df7094b329ca35d9f9808692bb16c48b13e9 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 8 Jul 2022 14:25:37 -0400 Subject: [PATCH 26/78] NFSD: No longer record nf_hashval in the trace log I'm about to replace nfsd_file_hashtbl with an rhashtable. The individual hash values will no longer be visible or relevant, so remove them from the tracepoints. Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/filecache.c | 15 ++++++++------- fs/nfsd/trace.h | 45 +++++++++++++++++++++------------------------ 2 files changed, 29 insertions(+), 31 deletions(-) diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index 23be87dac1a1..4de27c0f9ec9 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -589,7 +589,7 @@ nfsd_file_close_inode_sync(struct inode *inode) LIST_HEAD(dispose); __nfsd_file_close_inode(inode, hashval, &dispose); - trace_nfsd_file_close_inode_sync(inode, hashval, !list_empty(&dispose)); + trace_nfsd_file_close_inode_sync(inode, !list_empty(&dispose)); nfsd_file_dispose_list_sync(&dispose); } @@ -609,7 +609,7 @@ nfsd_file_close_inode(struct inode *inode) LIST_HEAD(dispose); __nfsd_file_close_inode(inode, hashval, &dispose); - trace_nfsd_file_close_inode(inode, hashval, !list_empty(&dispose)); + trace_nfsd_file_close_inode(inode, !list_empty(&dispose)); nfsd_file_dispose_list_delayed(&dispose); } @@ -963,7 +963,7 @@ nfsd_file_is_cached(struct inode *inode) } } rcu_read_unlock(); - trace_nfsd_file_is_cached(inode, hashval, (int)ret); + trace_nfsd_file_is_cached(inode, (int)ret); return ret; } @@ -995,9 +995,8 @@ retry: new = nfsd_file_alloc(inode, may_flags, hashval, net); if (!new) { - trace_nfsd_file_acquire(rqstp, hashval, inode, may_flags, - NULL, nfserr_jukebox); - return nfserr_jukebox; + status = nfserr_jukebox; + goto out_status; } spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock); @@ -1035,8 +1034,10 @@ out: nf = NULL; } - trace_nfsd_file_acquire(rqstp, hashval, inode, may_flags, nf, status); +out_status: + trace_nfsd_file_acquire(rqstp, inode, may_flags, nf, status); return status; + open_file: nf = new; /* Take reference for the hashtable */ diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index 9507a6c8eeee..9a146d71ad9e 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -748,7 +748,6 @@ DECLARE_EVENT_CLASS(nfsd_file_class, TP_PROTO(struct nfsd_file *nf), TP_ARGS(nf), TP_STRUCT__entry( - __field(unsigned int, nf_hashval) __field(void *, nf_inode) __field(int, nf_ref) __field(unsigned long, nf_flags) @@ -756,15 +755,13 @@ DECLARE_EVENT_CLASS(nfsd_file_class, __field(struct file *, nf_file) ), TP_fast_assign( - __entry->nf_hashval = nf->nf_hashval; __entry->nf_inode = nf->nf_inode; __entry->nf_ref = refcount_read(&nf->nf_ref); __entry->nf_flags = nf->nf_flags; __entry->nf_may = nf->nf_may; __entry->nf_file = nf->nf_file; ), - TP_printk("hash=0x%x inode=%p ref=%d flags=%s may=%s file=%p", - __entry->nf_hashval, + TP_printk("inode=%p ref=%d flags=%s may=%s nf_file=%p", __entry->nf_inode, __entry->nf_ref, show_nf_flags(__entry->nf_flags), @@ -784,15 +781,18 @@ DEFINE_NFSD_FILE_EVENT(nfsd_file_put); DEFINE_NFSD_FILE_EVENT(nfsd_file_unhash_and_release_locked); TRACE_EVENT(nfsd_file_acquire, - TP_PROTO(struct svc_rqst *rqstp, unsigned int hash, - struct inode *inode, unsigned int may_flags, - struct nfsd_file *nf, __be32 status), + TP_PROTO( + struct svc_rqst *rqstp, + struct inode *inode, + unsigned int may_flags, + struct nfsd_file *nf, + __be32 status + ), - TP_ARGS(rqstp, hash, inode, may_flags, nf, status), + TP_ARGS(rqstp, inode, may_flags, nf, status), TP_STRUCT__entry( __field(u32, xid) - __field(unsigned int, hash) __field(void *, inode) __field(unsigned long, may_flags) __field(int, nf_ref) @@ -804,7 +804,6 @@ TRACE_EVENT(nfsd_file_acquire, TP_fast_assign( __entry->xid = be32_to_cpu(rqstp->rq_xid); - __entry->hash = hash; __entry->inode = inode; __entry->may_flags = may_flags; __entry->nf_ref = nf ? refcount_read(&nf->nf_ref) : 0; @@ -814,8 +813,8 @@ TRACE_EVENT(nfsd_file_acquire, __entry->status = be32_to_cpu(status); ), - TP_printk("xid=0x%x hash=0x%x inode=%p may_flags=%s ref=%d nf_flags=%s nf_may=%s nf_file=%p status=%u", - __entry->xid, __entry->hash, __entry->inode, + TP_printk("xid=0x%x inode=%p may_flags=%s ref=%d nf_flags=%s nf_may=%s nf_file=%p status=%u", + __entry->xid, __entry->inode, show_nfsd_may_flags(__entry->may_flags), __entry->nf_ref, show_nf_flags(__entry->nf_flags), show_nfsd_may_flags(__entry->nf_may), @@ -826,7 +825,6 @@ TRACE_EVENT(nfsd_file_open, TP_PROTO(struct nfsd_file *nf, __be32 status), TP_ARGS(nf, status), TP_STRUCT__entry( - __field(unsigned int, nf_hashval) __field(void *, nf_inode) /* cannot be dereferenced */ __field(int, nf_ref) __field(unsigned long, nf_flags) @@ -834,15 +832,13 @@ TRACE_EVENT(nfsd_file_open, __field(void *, nf_file) /* cannot be dereferenced */ ), TP_fast_assign( - __entry->nf_hashval = nf->nf_hashval; __entry->nf_inode = nf->nf_inode; __entry->nf_ref = refcount_read(&nf->nf_ref); __entry->nf_flags = nf->nf_flags; __entry->nf_may = nf->nf_may; __entry->nf_file = nf->nf_file; ), - TP_printk("hash=0x%x inode=%p ref=%d flags=%s may=%s file=%p", - __entry->nf_hashval, + TP_printk("inode=%p ref=%d flags=%s may=%s file=%p", __entry->nf_inode, __entry->nf_ref, show_nf_flags(__entry->nf_flags), @@ -851,26 +847,27 @@ TRACE_EVENT(nfsd_file_open, ) DECLARE_EVENT_CLASS(nfsd_file_search_class, - TP_PROTO(struct inode *inode, unsigned int hash, int found), - TP_ARGS(inode, hash, found), + TP_PROTO( + struct inode *inode, + int found + ), + TP_ARGS(inode, found), TP_STRUCT__entry( __field(struct inode *, inode) - __field(unsigned int, hash) __field(int, found) ), TP_fast_assign( __entry->inode = inode; - __entry->hash = hash; __entry->found = found; ), - TP_printk("hash=0x%x inode=%p found=%d", __entry->hash, - __entry->inode, __entry->found) + TP_printk("inode=%p found=%d", + __entry->inode, __entry->found) ); #define DEFINE_NFSD_FILE_SEARCH_EVENT(name) \ DEFINE_EVENT(nfsd_file_search_class, name, \ - TP_PROTO(struct inode *inode, unsigned int hash, int found), \ - TP_ARGS(inode, hash, found)) + TP_PROTO(struct inode *inode, int found), \ + TP_ARGS(inode, found)) DEFINE_NFSD_FILE_SEARCH_EVENT(nfsd_file_close_inode_sync); DEFINE_NFSD_FILE_SEARCH_EVENT(nfsd_file_close_inode); From f53cef15dddec7203df702cdc62e554190385450 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 8 Jul 2022 14:25:44 -0400 Subject: [PATCH 27/78] NFSD: Remove lockdep assertion from unhash_and_release_locked() IIUC, holding the hash bucket lock is needed only in nfsd_file_unhash, and there is already a lockdep assertion there. Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/filecache.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index 4de27c0f9ec9..92e3e7d4fcd1 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -300,8 +300,6 @@ nfsd_file_unhash(struct nfsd_file *nf) static bool nfsd_file_unhash_and_release_locked(struct nfsd_file *nf, struct list_head *dispose) { - lockdep_assert_held(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock); - trace_nfsd_file_unhash_and_release_locked(nf); if (!nfsd_file_unhash(nf)) return false; From 8755326399f471ec3b31e2ab8c5074c0d28a0fb5 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 8 Jul 2022 14:25:50 -0400 Subject: [PATCH 28/78] NFSD: nfsd_file_unhash can compute hashval from nf->nf_inode Remove an unnecessary usage of nf_hashval. Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/filecache.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index 92e3e7d4fcd1..a9034d87f006 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -273,13 +273,17 @@ static void nfsd_file_lru_remove(struct nfsd_file *nf) static void nfsd_file_do_unhash(struct nfsd_file *nf) { - lockdep_assert_held(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock); + struct inode *inode = nf->nf_inode; + unsigned int hashval = (unsigned int)hash_long(inode->i_ino, + NFSD_FILE_HASH_BITS); + + lockdep_assert_held(&nfsd_file_hashtbl[hashval].nfb_lock); trace_nfsd_file_unhash(nf); if (nfsd_file_check_write_error(nf)) nfsd_reset_write_verifier(net_generic(nf->nf_net, nfsd_net_id)); - --nfsd_file_hashtbl[nf->nf_hashval].nfb_count; + --nfsd_file_hashtbl[hashval].nfb_count; hlist_del_rcu(&nf->nf_node); atomic_long_dec(&nfsd_filecache_count); } From a845511007a63467fee575353c706806c21218b1 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 8 Jul 2022 14:25:57 -0400 Subject: [PATCH 29/78] NFSD: Refactor __nfsd_file_close_inode() The code that computes the hashval is the same in both callers. To prevent them from going stale, reframe the documenting comments to remove descriptions of the underlying hash table structure, which is about to be replaced. Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/filecache.c | 40 +++++++++++++++++++------------------ fs/nfsd/trace.h | 48 +++++++++++++++++++++++++++++++++------------ 2 files changed, 56 insertions(+), 32 deletions(-) diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index a9034d87f006..1ff58978421c 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -559,39 +559,44 @@ static struct shrinker nfsd_file_shrinker = { .seeks = 1, }; -static void -__nfsd_file_close_inode(struct inode *inode, unsigned int hashval, - struct list_head *dispose) +/* + * Find all cache items across all net namespaces that match @inode and + * move them to @dispose. The lookup is atomic wrt nfsd_file_acquire(). + */ +static unsigned int +__nfsd_file_close_inode(struct inode *inode, struct list_head *dispose) { + unsigned int hashval = (unsigned int)hash_long(inode->i_ino, + NFSD_FILE_HASH_BITS); + unsigned int count = 0; struct nfsd_file *nf; struct hlist_node *tmp; spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock); hlist_for_each_entry_safe(nf, tmp, &nfsd_file_hashtbl[hashval].nfb_head, nf_node) { - if (inode == nf->nf_inode) + if (inode == nf->nf_inode) { nfsd_file_unhash_and_release_locked(nf, dispose); + count++; + } } spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock); + return count; } /** * nfsd_file_close_inode_sync - attempt to forcibly close a nfsd_file * @inode: inode of the file to attempt to remove * - * Walk the whole hash bucket, looking for any files that correspond to "inode". - * If any do, then unhash them and put the hashtable reference to them and - * destroy any that had their last reference put. Also ensure that any of the - * fputs also have their final __fput done as well. + * Unhash and put, then flush and fput all cache items associated with @inode. */ void nfsd_file_close_inode_sync(struct inode *inode) { - unsigned int hashval = (unsigned int)hash_long(inode->i_ino, - NFSD_FILE_HASH_BITS); LIST_HEAD(dispose); + unsigned int count; - __nfsd_file_close_inode(inode, hashval, &dispose); - trace_nfsd_file_close_inode_sync(inode, !list_empty(&dispose)); + count = __nfsd_file_close_inode(inode, &dispose); + trace_nfsd_file_close_inode_sync(inode, count); nfsd_file_dispose_list_sync(&dispose); } @@ -599,19 +604,16 @@ nfsd_file_close_inode_sync(struct inode *inode) * nfsd_file_close_inode - attempt a delayed close of a nfsd_file * @inode: inode of the file to attempt to remove * - * Walk the whole hash bucket, looking for any files that correspond to "inode". - * If any do, then unhash them and put the hashtable reference to them and - * destroy any that had their last reference put. + * Unhash and put all cache item associated with @inode. */ static void nfsd_file_close_inode(struct inode *inode) { - unsigned int hashval = (unsigned int)hash_long(inode->i_ino, - NFSD_FILE_HASH_BITS); LIST_HEAD(dispose); + unsigned int count; - __nfsd_file_close_inode(inode, hashval, &dispose); - trace_nfsd_file_close_inode(inode, !list_empty(&dispose)); + count = __nfsd_file_close_inode(inode, &dispose); + trace_nfsd_file_close_inode(inode, count); nfsd_file_dispose_list_delayed(&dispose); } diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index 9a146d71ad9e..79bf985bfc7d 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -848,31 +848,53 @@ TRACE_EVENT(nfsd_file_open, DECLARE_EVENT_CLASS(nfsd_file_search_class, TP_PROTO( - struct inode *inode, + const struct inode *inode, + unsigned int count + ), + TP_ARGS(inode, count), + TP_STRUCT__entry( + __field(const struct inode *, inode) + __field(unsigned int, count) + ), + TP_fast_assign( + __entry->inode = inode; + __entry->count = count; + ), + TP_printk("inode=%p count=%u", + __entry->inode, __entry->count) +); + +#define DEFINE_NFSD_FILE_SEARCH_EVENT(name) \ +DEFINE_EVENT(nfsd_file_search_class, name, \ + TP_PROTO( \ + const struct inode *inode, \ + unsigned int count \ + ), \ + TP_ARGS(inode, count)) + +DEFINE_NFSD_FILE_SEARCH_EVENT(nfsd_file_close_inode_sync); +DEFINE_NFSD_FILE_SEARCH_EVENT(nfsd_file_close_inode); + +TRACE_EVENT(nfsd_file_is_cached, + TP_PROTO( + const struct inode *inode, int found ), TP_ARGS(inode, found), TP_STRUCT__entry( - __field(struct inode *, inode) + __field(const struct inode *, inode) __field(int, found) ), TP_fast_assign( __entry->inode = inode; __entry->found = found; ), - TP_printk("inode=%p found=%d", - __entry->inode, __entry->found) + TP_printk("inode=%p is %scached", + __entry->inode, + __entry->found ? "" : "not " + ) ); -#define DEFINE_NFSD_FILE_SEARCH_EVENT(name) \ -DEFINE_EVENT(nfsd_file_search_class, name, \ - TP_PROTO(struct inode *inode, int found), \ - TP_ARGS(inode, found)) - -DEFINE_NFSD_FILE_SEARCH_EVENT(nfsd_file_close_inode_sync); -DEFINE_NFSD_FILE_SEARCH_EVENT(nfsd_file_close_inode); -DEFINE_NFSD_FILE_SEARCH_EVENT(nfsd_file_is_cached); - TRACE_EVENT(nfsd_file_fsnotify_handle_event, TP_PROTO(struct inode *inode, u32 mask), TP_ARGS(inode, mask), From cb7ec76e73ff6640241c8f1f2f35c81d4005a2d6 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 8 Jul 2022 14:26:03 -0400 Subject: [PATCH 30/78] NFSD: nfsd_file_hash_remove can compute hashval Remove an unnecessary use of nf_hashval. Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/filecache.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index 1ff58978421c..0766f6f39f16 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -288,6 +288,18 @@ nfsd_file_do_unhash(struct nfsd_file *nf) atomic_long_dec(&nfsd_filecache_count); } +static void +nfsd_file_hash_remove(struct nfsd_file *nf) +{ + struct inode *inode = nf->nf_inode; + unsigned int hashval = (unsigned int)hash_long(inode->i_ino, + NFSD_FILE_HASH_BITS); + + spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock); + nfsd_file_do_unhash(nf); + spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock); +} + static bool nfsd_file_unhash(struct nfsd_file *nf) { @@ -507,11 +519,8 @@ static void nfsd_file_gc_dispose_list(struct list_head *dispose) { struct nfsd_file *nf; - list_for_each_entry(nf, dispose, nf_lru) { - spin_lock(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock); - nfsd_file_do_unhash(nf); - spin_unlock(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock); - } + list_for_each_entry(nf, dispose, nf_lru) + nfsd_file_hash_remove(nf); nfsd_file_dispose_list_delayed(dispose); } From f0743c2b25c65debd4f599a7c861428cd9de5906 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 8 Jul 2022 14:26:10 -0400 Subject: [PATCH 31/78] NFSD: Remove nfsd_file::nf_hashval The value in this field can always be computed from nf_inode, thus it is no longer used. Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/filecache.c | 6 ++---- fs/nfsd/filecache.h | 1 - 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index 0766f6f39f16..1178cff78df9 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -168,8 +168,7 @@ nfsd_file_mark_find_or_create(struct nfsd_file *nf) } static struct nfsd_file * -nfsd_file_alloc(struct inode *inode, unsigned int may, unsigned int hashval, - struct net *net) +nfsd_file_alloc(struct inode *inode, unsigned int may, struct net *net) { struct nfsd_file *nf; @@ -183,7 +182,6 @@ nfsd_file_alloc(struct inode *inode, unsigned int may, unsigned int hashval, nf->nf_net = net; nf->nf_flags = 0; nf->nf_inode = inode; - nf->nf_hashval = hashval; refcount_set(&nf->nf_ref, 1); nf->nf_may = may & NFSD_FILE_MAY_MASK; nf->nf_mark = NULL; @@ -1006,7 +1004,7 @@ retry: if (nf) goto wait_for_construction; - new = nfsd_file_alloc(inode, may_flags, hashval, net); + new = nfsd_file_alloc(inode, may_flags, net); if (!new) { status = nfserr_jukebox; goto out_status; diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h index c6ad5fe47f12..82051e1b8420 100644 --- a/fs/nfsd/filecache.h +++ b/fs/nfsd/filecache.h @@ -40,7 +40,6 @@ struct nfsd_file { #define NFSD_FILE_REFERENCED (2) unsigned long nf_flags; struct inode *nf_inode; - unsigned int nf_hashval; refcount_t nf_ref; unsigned char nf_may; struct nfsd_file_mark *nf_mark; From c7b824c3d06c85e054caf86e227255112c5e3c38 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 8 Jul 2022 14:26:16 -0400 Subject: [PATCH 32/78] NFSD: Replace the "init once" mechanism In a moment, the nfsd_file_hashtbl global will be replaced with an rhashtable. Replace the one or two spots that need to check if the hash table is available. We can easily reuse the SHUTDOWN flag for this purpose. Document that this mechanism relies on callers to hold the nfsd_mutex to prevent init, shutdown, and purging to run concurrently. Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/filecache.c | 42 ++++++++++++++++++++++++++---------------- 1 file changed, 26 insertions(+), 16 deletions(-) diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index 1178cff78df9..433448b3786b 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -28,7 +28,7 @@ #define NFSD_FILE_HASH_SIZE (1 << NFSD_FILE_HASH_BITS) #define NFSD_LAUNDRETTE_DELAY (2 * HZ) -#define NFSD_FILE_SHUTDOWN (1) +#define NFSD_FILE_CACHE_UP (0) /* We only care about NFSD_MAY_READ/WRITE for this cache */ #define NFSD_FILE_MAY_MASK (NFSD_MAY_READ|NFSD_MAY_WRITE) @@ -59,7 +59,7 @@ static struct kmem_cache *nfsd_file_slab; static struct kmem_cache *nfsd_file_mark_slab; static struct nfsd_fcache_bucket *nfsd_file_hashtbl; static struct list_lru nfsd_file_lru; -static long nfsd_file_lru_flags; +static unsigned long nfsd_file_flags; static struct fsnotify_group *nfsd_file_fsnotify_group; static atomic_long_t nfsd_filecache_count; static struct delayed_work nfsd_filecache_laundrette; @@ -67,9 +67,8 @@ static struct delayed_work nfsd_filecache_laundrette; static void nfsd_file_schedule_laundrette(void) { - long count = atomic_long_read(&nfsd_filecache_count); - - if (count == 0 || test_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags)) + if ((atomic_long_read(&nfsd_filecache_count) == 0) || + test_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags) == 0) return; queue_delayed_work(system_wq, &nfsd_filecache_laundrette, @@ -698,9 +697,8 @@ nfsd_file_cache_init(void) int ret = -ENOMEM; unsigned int i; - clear_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags); - - if (nfsd_file_hashtbl) + lockdep_assert_held(&nfsd_mutex); + if (test_and_set_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags) == 1) return 0; nfsd_filecache_wq = alloc_workqueue("nfsd_filecache", 0, 0); @@ -786,8 +784,8 @@ out_err: /* * Note this can deadlock with nfsd_file_lru_cb. */ -void -nfsd_file_cache_purge(struct net *net) +static void +__nfsd_file_cache_purge(struct net *net) { unsigned int i; struct nfsd_file *nf; @@ -795,9 +793,6 @@ nfsd_file_cache_purge(struct net *net) LIST_HEAD(dispose); bool del; - if (!nfsd_file_hashtbl) - return; - for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) { struct nfsd_fcache_bucket *nfb = &nfsd_file_hashtbl[i]; @@ -858,6 +853,19 @@ nfsd_file_cache_start_net(struct net *net) return nn->fcache_disposal ? 0 : -ENOMEM; } +/** + * nfsd_file_cache_purge - Remove all cache items associated with @net + * @net: target net namespace + * + */ +void +nfsd_file_cache_purge(struct net *net) +{ + lockdep_assert_held(&nfsd_mutex); + if (test_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags) == 1) + __nfsd_file_cache_purge(net); +} + void nfsd_file_cache_shutdown_net(struct net *net) { @@ -870,7 +878,9 @@ nfsd_file_cache_shutdown(void) { int i; - set_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags); + lockdep_assert_held(&nfsd_mutex); + if (test_and_clear_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags) == 0) + return; lease_unregister_notifier(&nfsd_file_lease_notifier); unregister_shrinker(&nfsd_file_shrinker); @@ -879,7 +889,7 @@ nfsd_file_cache_shutdown(void) * calling nfsd_file_cache_purge */ cancel_delayed_work_sync(&nfsd_filecache_laundrette); - nfsd_file_cache_purge(NULL); + __nfsd_file_cache_purge(NULL); list_lru_destroy(&nfsd_file_lru); rcu_barrier(); fsnotify_put_group(nfsd_file_fsnotify_group); @@ -1143,7 +1153,7 @@ static int nfsd_file_cache_stats_show(struct seq_file *m, void *v) * don't end up racing with server shutdown */ mutex_lock(&nfsd_mutex); - if (nfsd_file_hashtbl) { + if (test_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags) == 1) { for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) { count += nfsd_file_hashtbl[i].nfb_count; longest = max(longest, nfsd_file_hashtbl[i].nfb_count); From fc22945ecc2a0a028f3683115f98a922d506c284 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 8 Jul 2022 14:26:23 -0400 Subject: [PATCH 33/78] NFSD: Set up an rhashtable for the filecache Add code to initialize and tear down an rhashtable. The rhashtable is not used yet. Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/filecache.c | 160 ++++++++++++++++++++++++++++++++++++++------ fs/nfsd/filecache.h | 1 + 2 files changed, 140 insertions(+), 21 deletions(-) diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index 433448b3786b..a5ae5b4620d0 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -13,6 +13,7 @@ #include #include #include +#include #include "vfs.h" #include "nfsd.h" @@ -63,6 +64,136 @@ static unsigned long nfsd_file_flags; static struct fsnotify_group *nfsd_file_fsnotify_group; static atomic_long_t nfsd_filecache_count; static struct delayed_work nfsd_filecache_laundrette; +static struct rhashtable nfsd_file_rhash_tbl + ____cacheline_aligned_in_smp; + +enum nfsd_file_lookup_type { + NFSD_FILE_KEY_INODE, + NFSD_FILE_KEY_FULL, +}; + +struct nfsd_file_lookup_key { + struct inode *inode; + struct net *net; + const struct cred *cred; + unsigned char need; + enum nfsd_file_lookup_type type; +}; + +/* + * The returned hash value is based solely on the address of an in-code + * inode, a pointer to a slab-allocated object. The entropy in such a + * pointer is concentrated in its middle bits. + */ +static u32 nfsd_file_inode_hash(const struct inode *inode, u32 seed) +{ + unsigned long ptr = (unsigned long)inode; + u32 k; + + k = ptr >> L1_CACHE_SHIFT; + k &= 0x00ffffff; + return jhash2(&k, 1, seed); +} + +/** + * nfsd_file_key_hashfn - Compute the hash value of a lookup key + * @data: key on which to compute the hash value + * @len: rhash table's key_len parameter (unused) + * @seed: rhash table's random seed of the day + * + * Return value: + * Computed 32-bit hash value + */ +static u32 nfsd_file_key_hashfn(const void *data, u32 len, u32 seed) +{ + const struct nfsd_file_lookup_key *key = data; + + return nfsd_file_inode_hash(key->inode, seed); +} + +/** + * nfsd_file_obj_hashfn - Compute the hash value of an nfsd_file + * @data: object on which to compute the hash value + * @len: rhash table's key_len parameter (unused) + * @seed: rhash table's random seed of the day + * + * Return value: + * Computed 32-bit hash value + */ +static u32 nfsd_file_obj_hashfn(const void *data, u32 len, u32 seed) +{ + const struct nfsd_file *nf = data; + + return nfsd_file_inode_hash(nf->nf_inode, seed); +} + +static bool +nfsd_match_cred(const struct cred *c1, const struct cred *c2) +{ + int i; + + if (!uid_eq(c1->fsuid, c2->fsuid)) + return false; + if (!gid_eq(c1->fsgid, c2->fsgid)) + return false; + if (c1->group_info == NULL || c2->group_info == NULL) + return c1->group_info == c2->group_info; + if (c1->group_info->ngroups != c2->group_info->ngroups) + return false; + for (i = 0; i < c1->group_info->ngroups; i++) { + if (!gid_eq(c1->group_info->gid[i], c2->group_info->gid[i])) + return false; + } + return true; +} + +/** + * nfsd_file_obj_cmpfn - Match a cache item against search criteria + * @arg: search criteria + * @ptr: cache item to check + * + * Return values: + * %0 - Item matches search criteria + * %1 - Item does not match search criteria + */ +static int nfsd_file_obj_cmpfn(struct rhashtable_compare_arg *arg, + const void *ptr) +{ + const struct nfsd_file_lookup_key *key = arg->key; + const struct nfsd_file *nf = ptr; + + switch (key->type) { + case NFSD_FILE_KEY_INODE: + if (nf->nf_inode != key->inode) + return 1; + break; + case NFSD_FILE_KEY_FULL: + if (nf->nf_inode != key->inode) + return 1; + if (nf->nf_may != key->need) + return 1; + if (nf->nf_net != key->net) + return 1; + if (!nfsd_match_cred(nf->nf_cred, key->cred)) + return 1; + if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags) == 0) + return 1; + break; + } + return 0; +} + +static const struct rhashtable_params nfsd_file_rhash_params = { + .key_len = sizeof_field(struct nfsd_file, nf_inode), + .key_offset = offsetof(struct nfsd_file, nf_inode), + .head_offset = offsetof(struct nfsd_file, nf_rhash), + .hashfn = nfsd_file_key_hashfn, + .obj_hashfn = nfsd_file_obj_hashfn, + .obj_cmpfn = nfsd_file_obj_cmpfn, + /* Reduce resizing churn on light workloads */ + .min_size = 512, /* buckets */ + .automatic_shrinking = true, +}; static void nfsd_file_schedule_laundrette(void) @@ -694,13 +825,18 @@ static const struct fsnotify_ops nfsd_file_fsnotify_ops = { int nfsd_file_cache_init(void) { - int ret = -ENOMEM; + int ret; unsigned int i; lockdep_assert_held(&nfsd_mutex); if (test_and_set_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags) == 1) return 0; + ret = rhashtable_init(&nfsd_file_rhash_tbl, &nfsd_file_rhash_params); + if (ret) + return ret; + + ret = -ENOMEM; nfsd_filecache_wq = alloc_workqueue("nfsd_filecache", 0, 0); if (!nfsd_filecache_wq) goto out; @@ -778,6 +914,7 @@ out_err: nfsd_file_hashtbl = NULL; destroy_workqueue(nfsd_filecache_wq); nfsd_filecache_wq = NULL; + rhashtable_destroy(&nfsd_file_rhash_tbl); goto out; } @@ -903,6 +1040,7 @@ nfsd_file_cache_shutdown(void) nfsd_file_hashtbl = NULL; destroy_workqueue(nfsd_filecache_wq); nfsd_filecache_wq = NULL; + rhashtable_destroy(&nfsd_file_rhash_tbl); for_each_possible_cpu(i) { per_cpu(nfsd_file_cache_hits, i) = 0; @@ -914,26 +1052,6 @@ nfsd_file_cache_shutdown(void) } } -static bool -nfsd_match_cred(const struct cred *c1, const struct cred *c2) -{ - int i; - - if (!uid_eq(c1->fsuid, c2->fsuid)) - return false; - if (!gid_eq(c1->fsgid, c2->fsgid)) - return false; - if (c1->group_info == NULL || c2->group_info == NULL) - return c1->group_info == c2->group_info; - if (c1->group_info->ngroups != c2->group_info->ngroups) - return false; - for (i = 0; i < c1->group_info->ngroups; i++) { - if (!gid_eq(c1->group_info->gid[i], c2->group_info->gid[i])) - return false; - } - return true; -} - static struct nfsd_file * nfsd_file_find_locked(struct inode *inode, unsigned int may_flags, unsigned int hashval, struct net *net) diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h index 82051e1b8420..5cbfc61a7d7d 100644 --- a/fs/nfsd/filecache.h +++ b/fs/nfsd/filecache.h @@ -29,6 +29,7 @@ struct nfsd_file_mark { * never be dereferenced, only used for comparison. */ struct nfsd_file { + struct rhash_head nf_rhash; struct hlist_node nf_node; struct list_head nf_lru; struct rcu_head nf_rcu; From ce502f81ba884c1fe45dc0ebddbcaaa4ec0fc5fb Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 8 Jul 2022 14:26:30 -0400 Subject: [PATCH 34/78] NFSD: Convert the filecache to use rhashtable Enable the filecache hash table to start small, then grow with the workload. Smaller server deployments benefit because there should be lower memory utilization. Larger server deployments should see improved scaling with the number of open files. Suggested-by: Jeff Layton Suggested-by: Dave Chinner Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/filecache.c | 261 +++++++++++++++++++------------------------- fs/nfsd/trace.h | 63 ++++++++++- 2 files changed, 177 insertions(+), 147 deletions(-) diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index a5ae5b4620d0..aed3cb6bf865 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -62,7 +62,6 @@ static struct nfsd_fcache_bucket *nfsd_file_hashtbl; static struct list_lru nfsd_file_lru; static unsigned long nfsd_file_flags; static struct fsnotify_group *nfsd_file_fsnotify_group; -static atomic_long_t nfsd_filecache_count; static struct delayed_work nfsd_filecache_laundrette; static struct rhashtable nfsd_file_rhash_tbl ____cacheline_aligned_in_smp; @@ -198,7 +197,7 @@ static const struct rhashtable_params nfsd_file_rhash_params = { static void nfsd_file_schedule_laundrette(void) { - if ((atomic_long_read(&nfsd_filecache_count) == 0) || + if ((atomic_read(&nfsd_file_rhash_tbl.nelems) == 0) || test_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags) == 0) return; @@ -298,7 +297,7 @@ nfsd_file_mark_find_or_create(struct nfsd_file *nf) } static struct nfsd_file * -nfsd_file_alloc(struct inode *inode, unsigned int may, struct net *net) +nfsd_file_alloc(struct nfsd_file_lookup_key *key, unsigned int may) { struct nfsd_file *nf; @@ -309,11 +308,14 @@ nfsd_file_alloc(struct inode *inode, unsigned int may, struct net *net) nf->nf_birthtime = ktime_get(); nf->nf_file = NULL; nf->nf_cred = get_current_cred(); - nf->nf_net = net; + nf->nf_net = key->net; nf->nf_flags = 0; - nf->nf_inode = inode; - refcount_set(&nf->nf_ref, 1); - nf->nf_may = may & NFSD_FILE_MAY_MASK; + __set_bit(NFSD_FILE_HASHED, &nf->nf_flags); + __set_bit(NFSD_FILE_PENDING, &nf->nf_flags); + nf->nf_inode = key->inode; + /* nf_ref is pre-incremented for hash table */ + refcount_set(&nf->nf_ref, 2); + nf->nf_may = key->need; nf->nf_mark = NULL; trace_nfsd_file_alloc(nf); } @@ -399,40 +401,21 @@ static void nfsd_file_lru_remove(struct nfsd_file *nf) } static void -nfsd_file_do_unhash(struct nfsd_file *nf) +nfsd_file_hash_remove(struct nfsd_file *nf) { - struct inode *inode = nf->nf_inode; - unsigned int hashval = (unsigned int)hash_long(inode->i_ino, - NFSD_FILE_HASH_BITS); - - lockdep_assert_held(&nfsd_file_hashtbl[hashval].nfb_lock); - trace_nfsd_file_unhash(nf); if (nfsd_file_check_write_error(nf)) nfsd_reset_write_verifier(net_generic(nf->nf_net, nfsd_net_id)); - --nfsd_file_hashtbl[hashval].nfb_count; - hlist_del_rcu(&nf->nf_node); - atomic_long_dec(&nfsd_filecache_count); -} - -static void -nfsd_file_hash_remove(struct nfsd_file *nf) -{ - struct inode *inode = nf->nf_inode; - unsigned int hashval = (unsigned int)hash_long(inode->i_ino, - NFSD_FILE_HASH_BITS); - - spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock); - nfsd_file_do_unhash(nf); - spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock); + rhashtable_remove_fast(&nfsd_file_rhash_tbl, &nf->nf_rhash, + nfsd_file_rhash_params); } static bool nfsd_file_unhash(struct nfsd_file *nf) { if (test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { - nfsd_file_do_unhash(nf); + nfsd_file_hash_remove(nf); return true; } return false; @@ -442,9 +425,9 @@ nfsd_file_unhash(struct nfsd_file *nf) * Return true if the file was unhashed. */ static bool -nfsd_file_unhash_and_release_locked(struct nfsd_file *nf, struct list_head *dispose) +nfsd_file_unhash_and_dispose(struct nfsd_file *nf, struct list_head *dispose) { - trace_nfsd_file_unhash_and_release_locked(nf); + trace_nfsd_file_unhash_and_dispose(nf); if (!nfsd_file_unhash(nf)) return false; /* keep final reference for nfsd_file_lru_dispose */ @@ -703,20 +686,23 @@ static struct shrinker nfsd_file_shrinker = { static unsigned int __nfsd_file_close_inode(struct inode *inode, struct list_head *dispose) { - unsigned int hashval = (unsigned int)hash_long(inode->i_ino, - NFSD_FILE_HASH_BITS); - unsigned int count = 0; - struct nfsd_file *nf; - struct hlist_node *tmp; + struct nfsd_file_lookup_key key = { + .type = NFSD_FILE_KEY_INODE, + .inode = inode, + }; + unsigned int count = 0; + struct nfsd_file *nf; - spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock); - hlist_for_each_entry_safe(nf, tmp, &nfsd_file_hashtbl[hashval].nfb_head, nf_node) { - if (inode == nf->nf_inode) { - nfsd_file_unhash_and_release_locked(nf, dispose); - count++; - } - } - spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock); + rcu_read_lock(); + do { + nf = rhashtable_lookup(&nfsd_file_rhash_tbl, &key, + nfsd_file_rhash_params); + if (!nf) + break; + nfsd_file_unhash_and_dispose(nf, dispose); + count++; + } while (1); + rcu_read_unlock(); return count; } @@ -924,30 +910,35 @@ out_err: static void __nfsd_file_cache_purge(struct net *net) { - unsigned int i; - struct nfsd_file *nf; - struct hlist_node *next; + struct rhashtable_iter iter; + struct nfsd_file *nf; LIST_HEAD(dispose); bool del; - for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) { - struct nfsd_fcache_bucket *nfb = &nfsd_file_hashtbl[i]; + rhashtable_walk_enter(&nfsd_file_rhash_tbl, &iter); + do { + rhashtable_walk_start(&iter); - spin_lock(&nfb->nfb_lock); - hlist_for_each_entry_safe(nf, next, &nfb->nfb_head, nf_node) { + nf = rhashtable_walk_next(&iter); + while (!IS_ERR_OR_NULL(nf)) { if (net && nf->nf_net != net) continue; - del = nfsd_file_unhash_and_release_locked(nf, &dispose); + del = nfsd_file_unhash_and_dispose(nf, &dispose); /* * Deadlock detected! Something marked this entry as * unhased, but hasn't removed it from the hash list. */ WARN_ON_ONCE(!del); + + nf = rhashtable_walk_next(&iter); } - spin_unlock(&nfb->nfb_lock); - nfsd_file_dispose_list(&dispose); - } + + rhashtable_walk_stop(&iter); + } while (nf == ERR_PTR(-EAGAIN)); + rhashtable_walk_exit(&iter); + + nfsd_file_dispose_list(&dispose); } static struct nfsd_fcache_disposal * @@ -1052,56 +1043,29 @@ nfsd_file_cache_shutdown(void) } } -static struct nfsd_file * -nfsd_file_find_locked(struct inode *inode, unsigned int may_flags, - unsigned int hashval, struct net *net) -{ - struct nfsd_file *nf; - unsigned char need = may_flags & NFSD_FILE_MAY_MASK; - - hlist_for_each_entry_rcu(nf, &nfsd_file_hashtbl[hashval].nfb_head, - nf_node, lockdep_is_held(&nfsd_file_hashtbl[hashval].nfb_lock)) { - if (nf->nf_may != need) - continue; - if (nf->nf_inode != inode) - continue; - if (nf->nf_net != net) - continue; - if (!nfsd_match_cred(nf->nf_cred, current_cred())) - continue; - if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) - continue; - if (nfsd_file_get(nf) != NULL) - return nf; - } - return NULL; -} - /** - * nfsd_file_is_cached - are there any cached open files for this fh? - * @inode: inode of the file to check + * nfsd_file_is_cached - are there any cached open files for this inode? + * @inode: inode to check * - * Scan the hashtable for open files that match this fh. Returns true if there - * are any, and false if not. + * The lookup matches inodes in all net namespaces and is atomic wrt + * nfsd_file_acquire(). + * + * Return values: + * %true: filecache contains at least one file matching this inode + * %false: filecache contains no files matching this inode */ bool nfsd_file_is_cached(struct inode *inode) { - bool ret = false; - struct nfsd_file *nf; - unsigned int hashval; + struct nfsd_file_lookup_key key = { + .type = NFSD_FILE_KEY_INODE, + .inode = inode, + }; + bool ret = false; - hashval = (unsigned int)hash_long(inode->i_ino, NFSD_FILE_HASH_BITS); - - rcu_read_lock(); - hlist_for_each_entry_rcu(nf, &nfsd_file_hashtbl[hashval].nfb_head, - nf_node) { - if (inode == nf->nf_inode) { - ret = true; - break; - } - } - rcu_read_unlock(); + if (rhashtable_lookup_fast(&nfsd_file_rhash_tbl, &key, + nfsd_file_rhash_params) != NULL) + ret = true; trace_nfsd_file_is_cached(inode, (int)ret); return ret; } @@ -1110,39 +1074,51 @@ static __be32 nfsd_do_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, unsigned int may_flags, struct nfsd_file **pnf, bool open) { - __be32 status; - struct net *net = SVC_NET(rqstp); + struct nfsd_file_lookup_key key = { + .type = NFSD_FILE_KEY_FULL, + .need = may_flags & NFSD_FILE_MAY_MASK, + .net = SVC_NET(rqstp), + }; struct nfsd_file *nf, *new; - struct inode *inode; - unsigned int hashval; bool retry = true; + __be32 status; - /* FIXME: skip this if fh_dentry is already set? */ status = fh_verify(rqstp, fhp, S_IFREG, may_flags|NFSD_MAY_OWNER_OVERRIDE); if (status != nfs_ok) return status; + key.inode = d_inode(fhp->fh_dentry); + key.cred = get_current_cred(); - inode = d_inode(fhp->fh_dentry); - hashval = (unsigned int)hash_long(inode->i_ino, NFSD_FILE_HASH_BITS); retry: - rcu_read_lock(); - nf = nfsd_file_find_locked(inode, may_flags, hashval, net); - rcu_read_unlock(); + /* Avoid allocation if the item is already in cache */ + nf = rhashtable_lookup_fast(&nfsd_file_rhash_tbl, &key, + nfsd_file_rhash_params); + if (nf) + nf = nfsd_file_get(nf); if (nf) goto wait_for_construction; - new = nfsd_file_alloc(inode, may_flags, net); + new = nfsd_file_alloc(&key, may_flags); if (!new) { status = nfserr_jukebox; goto out_status; } - spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock); - nf = nfsd_file_find_locked(inode, may_flags, hashval, net); - if (nf == NULL) + nf = rhashtable_lookup_get_insert_key(&nfsd_file_rhash_tbl, + &key, &new->nf_rhash, + nfsd_file_rhash_params); + if (!nf) { + nf = new; goto open_file; - spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock); + } + if (IS_ERR(nf)) + goto insert_err; + nf = nfsd_file_get(nf); + if (nf == NULL) { + nf = new; + goto open_file; + } nfsd_file_slab_free(&new->nf_rcu); wait_for_construction: @@ -1150,6 +1126,7 @@ wait_for_construction: /* Did construction of this file fail? */ if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { + trace_nfsd_file_cons_err(rqstp, key.inode, may_flags, nf); if (!retry) { status = nfserr_jukebox; goto out; @@ -1174,22 +1151,11 @@ out: } out_status: - trace_nfsd_file_acquire(rqstp, inode, may_flags, nf, status); + put_cred(key.cred); + trace_nfsd_file_acquire(rqstp, key.inode, may_flags, nf, status); return status; open_file: - nf = new; - /* Take reference for the hashtable */ - refcount_inc(&nf->nf_ref); - __set_bit(NFSD_FILE_HASHED, &nf->nf_flags); - __set_bit(NFSD_FILE_PENDING, &nf->nf_flags); - hlist_add_head_rcu(&nf->nf_node, &nfsd_file_hashtbl[hashval].nfb_head); - ++nfsd_file_hashtbl[hashval].nfb_count; - nfsd_file_hashtbl[hashval].nfb_maxcount = max(nfsd_file_hashtbl[hashval].nfb_maxcount, - nfsd_file_hashtbl[hashval].nfb_count); - spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock); - atomic_long_inc(&nfsd_filecache_count); - nf->nf_mark = nfsd_file_mark_find_or_create(nf); if (nf->nf_mark) { if (open) { @@ -1204,19 +1170,20 @@ open_file: * If construction failed, or we raced with a call to unlink() * then unhash. */ - if (status != nfs_ok || inode->i_nlink == 0) { - bool do_free; - nfsd_file_lru_remove(nf); - spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock); - do_free = nfsd_file_unhash(nf); - spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock); - if (do_free) + if (status != nfs_ok || key.inode->i_nlink == 0) + if (nfsd_file_unhash(nf)) nfsd_file_put_noref(nf); - } clear_bit_unlock(NFSD_FILE_PENDING, &nf->nf_flags); smp_mb__after_atomic(); wake_up_bit(&nf->nf_flags, NFSD_FILE_PENDING); goto out; + +insert_err: + nfsd_file_slab_free(&new->nf_rcu); + trace_nfsd_file_insert_err(rqstp, key.inode, may_flags, PTR_ERR(nf)); + nf = NULL; + status = nfserr_jukebox; + goto out_status; } /** @@ -1262,21 +1229,23 @@ static int nfsd_file_cache_stats_show(struct seq_file *m, void *v) { unsigned long releases = 0, pages_flushed = 0, evictions = 0; unsigned long hits = 0, acquisitions = 0; - unsigned int i, count = 0, longest = 0; + unsigned int i, count = 0, buckets = 0; unsigned long lru = 0, total_age = 0; - /* - * No need for spinlocks here since we're not terribly interested in - * accuracy. We do take the nfsd_mutex simply to ensure that we - * don't end up racing with server shutdown - */ + /* Serialize with server shutdown */ mutex_lock(&nfsd_mutex); if (test_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags) == 1) { - for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) { - count += nfsd_file_hashtbl[i].nfb_count; - longest = max(longest, nfsd_file_hashtbl[i].nfb_count); - } + struct bucket_table *tbl; + struct rhashtable *ht; + lru = list_lru_count(&nfsd_file_lru); + + rcu_read_lock(); + ht = &nfsd_file_rhash_tbl; + count = atomic_read(&ht->nelems); + tbl = rht_dereference_rcu(ht->tbl, ht); + buckets = tbl->size; + rcu_read_unlock(); } mutex_unlock(&nfsd_mutex); @@ -1290,7 +1259,7 @@ static int nfsd_file_cache_stats_show(struct seq_file *m, void *v) } seq_printf(m, "total entries: %u\n", count); - seq_printf(m, "longest chain: %u\n", longest); + seq_printf(m, "hash buckets: %u\n", buckets); seq_printf(m, "lru entries: %lu\n", lru); seq_printf(m, "cache hits: %lu\n", hits); seq_printf(m, "acquisitions: %lu\n", acquisitions); diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index 79bf985bfc7d..5d634361debf 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -778,7 +778,7 @@ DEFINE_NFSD_FILE_EVENT(nfsd_file_alloc); DEFINE_NFSD_FILE_EVENT(nfsd_file_put_final); DEFINE_NFSD_FILE_EVENT(nfsd_file_unhash); DEFINE_NFSD_FILE_EVENT(nfsd_file_put); -DEFINE_NFSD_FILE_EVENT(nfsd_file_unhash_and_release_locked); +DEFINE_NFSD_FILE_EVENT(nfsd_file_unhash_and_dispose); TRACE_EVENT(nfsd_file_acquire, TP_PROTO( @@ -821,6 +821,67 @@ TRACE_EVENT(nfsd_file_acquire, __entry->nf_file, __entry->status) ); +TRACE_EVENT(nfsd_file_insert_err, + TP_PROTO( + const struct svc_rqst *rqstp, + const struct inode *inode, + unsigned int may_flags, + long error + ), + TP_ARGS(rqstp, inode, may_flags, error), + TP_STRUCT__entry( + __field(u32, xid) + __field(const void *, inode) + __field(unsigned long, may_flags) + __field(long, error) + ), + TP_fast_assign( + __entry->xid = be32_to_cpu(rqstp->rq_xid); + __entry->inode = inode; + __entry->may_flags = may_flags; + __entry->error = error; + ), + TP_printk("xid=0x%x inode=%p may_flags=%s error=%ld", + __entry->xid, __entry->inode, + show_nfsd_may_flags(__entry->may_flags), + __entry->error + ) +); + +TRACE_EVENT(nfsd_file_cons_err, + TP_PROTO( + const struct svc_rqst *rqstp, + const struct inode *inode, + unsigned int may_flags, + const struct nfsd_file *nf + ), + TP_ARGS(rqstp, inode, may_flags, nf), + TP_STRUCT__entry( + __field(u32, xid) + __field(const void *, inode) + __field(unsigned long, may_flags) + __field(unsigned int, nf_ref) + __field(unsigned long, nf_flags) + __field(unsigned long, nf_may) + __field(const void *, nf_file) + ), + TP_fast_assign( + __entry->xid = be32_to_cpu(rqstp->rq_xid); + __entry->inode = inode; + __entry->may_flags = may_flags; + __entry->nf_ref = refcount_read(&nf->nf_ref); + __entry->nf_flags = nf->nf_flags; + __entry->nf_may = nf->nf_may; + __entry->nf_file = nf->nf_file; + ), + TP_printk("xid=0x%x inode=%p may_flags=%s ref=%u nf_flags=%s nf_may=%s nf_file=%p", + __entry->xid, __entry->inode, + show_nfsd_may_flags(__entry->may_flags), __entry->nf_ref, + show_nf_flags(__entry->nf_flags), + show_nfsd_may_flags(__entry->nf_may), __entry->nf_file + ) +); + TRACE_EVENT(nfsd_file_open, TP_PROTO(struct nfsd_file *nf, __be32 status), TP_ARGS(nf, status), From 0ec8e9d1539a7b8109a554028bbce441052f847e Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 8 Jul 2022 14:26:36 -0400 Subject: [PATCH 35/78] NFSD: Clean up unused code after rhashtable conversion Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/filecache.c | 33 +-------------------------------- fs/nfsd/filecache.h | 1 - 2 files changed, 1 insertion(+), 33 deletions(-) diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index aed3cb6bf865..bfa4df7daff3 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -22,11 +22,6 @@ #include "filecache.h" #include "trace.h" -#define NFSDDBG_FACILITY NFSDDBG_FH - -/* FIXME: dynamically size this for the machine somehow? */ -#define NFSD_FILE_HASH_BITS 12 -#define NFSD_FILE_HASH_SIZE (1 << NFSD_FILE_HASH_BITS) #define NFSD_LAUNDRETTE_DELAY (2 * HZ) #define NFSD_FILE_CACHE_UP (0) @@ -34,13 +29,6 @@ /* We only care about NFSD_MAY_READ/WRITE for this cache */ #define NFSD_FILE_MAY_MASK (NFSD_MAY_READ|NFSD_MAY_WRITE) -struct nfsd_fcache_bucket { - struct hlist_head nfb_head; - spinlock_t nfb_lock; - unsigned int nfb_count; - unsigned int nfb_maxcount; -}; - static DEFINE_PER_CPU(unsigned long, nfsd_file_cache_hits); static DEFINE_PER_CPU(unsigned long, nfsd_file_acquisitions); static DEFINE_PER_CPU(unsigned long, nfsd_file_releases); @@ -58,7 +46,6 @@ static struct workqueue_struct *nfsd_filecache_wq __read_mostly; static struct kmem_cache *nfsd_file_slab; static struct kmem_cache *nfsd_file_mark_slab; -static struct nfsd_fcache_bucket *nfsd_file_hashtbl; static struct list_lru nfsd_file_lru; static unsigned long nfsd_file_flags; static struct fsnotify_group *nfsd_file_fsnotify_group; @@ -303,7 +290,6 @@ nfsd_file_alloc(struct nfsd_file_lookup_key *key, unsigned int may) nf = kmem_cache_alloc(nfsd_file_slab, GFP_KERNEL); if (nf) { - INIT_HLIST_NODE(&nf->nf_node); INIT_LIST_HEAD(&nf->nf_lru); nf->nf_birthtime = ktime_get(); nf->nf_file = NULL; @@ -811,8 +797,7 @@ static const struct fsnotify_ops nfsd_file_fsnotify_ops = { int nfsd_file_cache_init(void) { - int ret; - unsigned int i; + int ret; lockdep_assert_held(&nfsd_mutex); if (test_and_set_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags) == 1) @@ -827,13 +812,6 @@ nfsd_file_cache_init(void) if (!nfsd_filecache_wq) goto out; - nfsd_file_hashtbl = kvcalloc(NFSD_FILE_HASH_SIZE, - sizeof(*nfsd_file_hashtbl), GFP_KERNEL); - if (!nfsd_file_hashtbl) { - pr_err("nfsd: unable to allocate nfsd_file_hashtbl\n"); - goto out_err; - } - nfsd_file_slab = kmem_cache_create("nfsd_file", sizeof(struct nfsd_file), 0, 0, NULL); if (!nfsd_file_slab) { @@ -877,11 +855,6 @@ nfsd_file_cache_init(void) goto out_notifier; } - for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) { - INIT_HLIST_HEAD(&nfsd_file_hashtbl[i].nfb_head); - spin_lock_init(&nfsd_file_hashtbl[i].nfb_lock); - } - INIT_DELAYED_WORK(&nfsd_filecache_laundrette, nfsd_file_gc_worker); out: return ret; @@ -896,8 +869,6 @@ out_err: nfsd_file_slab = NULL; kmem_cache_destroy(nfsd_file_mark_slab); nfsd_file_mark_slab = NULL; - kvfree(nfsd_file_hashtbl); - nfsd_file_hashtbl = NULL; destroy_workqueue(nfsd_filecache_wq); nfsd_filecache_wq = NULL; rhashtable_destroy(&nfsd_file_rhash_tbl); @@ -1027,8 +998,6 @@ nfsd_file_cache_shutdown(void) fsnotify_wait_marks_destroyed(); kmem_cache_destroy(nfsd_file_mark_slab); nfsd_file_mark_slab = NULL; - kvfree(nfsd_file_hashtbl); - nfsd_file_hashtbl = NULL; destroy_workqueue(nfsd_filecache_wq); nfsd_filecache_wq = NULL; rhashtable_destroy(&nfsd_file_rhash_tbl); diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h index 5cbfc61a7d7d..ee9ed99d8b8f 100644 --- a/fs/nfsd/filecache.h +++ b/fs/nfsd/filecache.h @@ -30,7 +30,6 @@ struct nfsd_file_mark { */ struct nfsd_file { struct rhash_head nf_rhash; - struct hlist_node nf_node; struct list_head nf_lru; struct rcu_head nf_rcu; struct file *nf_file; From be0230069fcbf7d332d010b57c1d0cfd623a84d6 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 8 Jul 2022 14:26:43 -0400 Subject: [PATCH 36/78] NFSD: Separate tracepoints for acquire and create These tracepoints collect different information: the create case does not open a file, so there's no nf_file available. Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/filecache.c | 9 ++++---- fs/nfsd/nfs4state.c | 1 + fs/nfsd/trace.h | 54 ++++++++++++++++++++++++++++++++++++++------- 3 files changed, 52 insertions(+), 12 deletions(-) diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index bfa4df7daff3..df54f882752c 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -1040,7 +1040,7 @@ nfsd_file_is_cached(struct inode *inode) } static __be32 -nfsd_do_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, +nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, unsigned int may_flags, struct nfsd_file **pnf, bool open) { struct nfsd_file_lookup_key key = { @@ -1121,7 +1121,8 @@ out: out_status: put_cred(key.cred); - trace_nfsd_file_acquire(rqstp, key.inode, may_flags, nf, status); + if (open) + trace_nfsd_file_acquire(rqstp, key.inode, may_flags, nf, status); return status; open_file: @@ -1169,7 +1170,7 @@ __be32 nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, unsigned int may_flags, struct nfsd_file **pnf) { - return nfsd_do_file_acquire(rqstp, fhp, may_flags, pnf, true); + return nfsd_file_do_acquire(rqstp, fhp, may_flags, pnf, true); } /** @@ -1186,7 +1187,7 @@ __be32 nfsd_file_create(struct svc_rqst *rqstp, struct svc_fh *fhp, unsigned int may_flags, struct nfsd_file **pnf) { - return nfsd_do_file_acquire(rqstp, fhp, may_flags, pnf, false); + return nfsd_file_do_acquire(rqstp, fhp, may_flags, pnf, false); } /* diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 9409a0dc1b76..3a05c095dfe5 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -5104,6 +5104,7 @@ static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp, goto out_put_access; nf->nf_file = open->op_filp; open->op_filp = NULL; + trace_nfsd_file_create(rqstp, access, nf); } spin_lock(&fp->fi_lock); diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index 5d634361debf..6bbfb987271d 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -782,10 +782,10 @@ DEFINE_NFSD_FILE_EVENT(nfsd_file_unhash_and_dispose); TRACE_EVENT(nfsd_file_acquire, TP_PROTO( - struct svc_rqst *rqstp, - struct inode *inode, + const struct svc_rqst *rqstp, + const struct inode *inode, unsigned int may_flags, - struct nfsd_file *nf, + const struct nfsd_file *nf, __be32 status ), @@ -793,12 +793,12 @@ TRACE_EVENT(nfsd_file_acquire, TP_STRUCT__entry( __field(u32, xid) - __field(void *, inode) + __field(const void *, inode) __field(unsigned long, may_flags) - __field(int, nf_ref) + __field(unsigned int, nf_ref) __field(unsigned long, nf_flags) __field(unsigned long, nf_may) - __field(struct file *, nf_file) + __field(const void *, nf_file) __field(u32, status) ), @@ -813,12 +813,50 @@ TRACE_EVENT(nfsd_file_acquire, __entry->status = be32_to_cpu(status); ), - TP_printk("xid=0x%x inode=%p may_flags=%s ref=%d nf_flags=%s nf_may=%s nf_file=%p status=%u", + TP_printk("xid=0x%x inode=%p may_flags=%s ref=%u nf_flags=%s nf_may=%s nf_file=%p status=%u", __entry->xid, __entry->inode, show_nfsd_may_flags(__entry->may_flags), __entry->nf_ref, show_nf_flags(__entry->nf_flags), show_nfsd_may_flags(__entry->nf_may), - __entry->nf_file, __entry->status) + __entry->nf_file, __entry->status + ) +); + +TRACE_EVENT(nfsd_file_create, + TP_PROTO( + const struct svc_rqst *rqstp, + unsigned int may_flags, + const struct nfsd_file *nf + ), + + TP_ARGS(rqstp, may_flags, nf), + + TP_STRUCT__entry( + __field(const void *, nf_inode) + __field(const void *, nf_file) + __field(unsigned long, may_flags) + __field(unsigned long, nf_flags) + __field(unsigned long, nf_may) + __field(unsigned int, nf_ref) + __field(u32, xid) + ), + + TP_fast_assign( + __entry->nf_inode = nf->nf_inode; + __entry->nf_file = nf->nf_file; + __entry->may_flags = may_flags; + __entry->nf_flags = nf->nf_flags; + __entry->nf_may = nf->nf_may; + __entry->nf_ref = refcount_read(&nf->nf_ref); + __entry->xid = be32_to_cpu(rqstp->rq_xid); + ), + + TP_printk("xid=0x%x inode=%p may_flags=%s ref=%u nf_flags=%s nf_may=%s nf_file=%p", + __entry->xid, __entry->nf_inode, + show_nfsd_may_flags(__entry->may_flags), + __entry->nf_ref, show_nf_flags(__entry->nf_flags), + show_nfsd_may_flags(__entry->nf_may), __entry->nf_file + ) ); TRACE_EVENT(nfsd_file_insert_err, From b40a2839470cd62ed68c4a32d72a18ee8975b1ac Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 8 Jul 2022 14:26:49 -0400 Subject: [PATCH 37/78] NFSD: Move nfsd_file_trace_alloc() tracepoint Avoid recording the allocation of an nfsd_file item that is immediately released because a matching item was already inserted in the hash. Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/filecache.c | 2 +- fs/nfsd/trace.h | 25 ++++++++++++++++++++++++- 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index df54f882752c..ac38b41c89c6 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -303,7 +303,6 @@ nfsd_file_alloc(struct nfsd_file_lookup_key *key, unsigned int may) refcount_set(&nf->nf_ref, 2); nf->nf_may = key->need; nf->nf_mark = NULL; - trace_nfsd_file_alloc(nf); } return nf; } @@ -1126,6 +1125,7 @@ out_status: return status; open_file: + trace_nfsd_file_alloc(nf); nf->nf_mark = nfsd_file_mark_find_or_create(nf); if (nf->nf_mark) { if (open) { diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index 6bbfb987271d..9ebd67d461f9 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -774,12 +774,35 @@ DEFINE_EVENT(nfsd_file_class, name, \ TP_PROTO(struct nfsd_file *nf), \ TP_ARGS(nf)) -DEFINE_NFSD_FILE_EVENT(nfsd_file_alloc); DEFINE_NFSD_FILE_EVENT(nfsd_file_put_final); DEFINE_NFSD_FILE_EVENT(nfsd_file_unhash); DEFINE_NFSD_FILE_EVENT(nfsd_file_put); DEFINE_NFSD_FILE_EVENT(nfsd_file_unhash_and_dispose); +TRACE_EVENT(nfsd_file_alloc, + TP_PROTO( + const struct nfsd_file *nf + ), + TP_ARGS(nf), + TP_STRUCT__entry( + __field(const void *, nf_inode) + __field(unsigned long, nf_flags) + __field(unsigned long, nf_may) + __field(unsigned int, nf_ref) + ), + TP_fast_assign( + __entry->nf_inode = nf->nf_inode; + __entry->nf_flags = nf->nf_flags; + __entry->nf_ref = refcount_read(&nf->nf_ref); + __entry->nf_may = nf->nf_may; + ), + TP_printk("inode=%p ref=%u flags=%s may=%s", + __entry->nf_inode, __entry->nf_ref, + show_nf_flags(__entry->nf_flags), + show_nfsd_may_flags(__entry->nf_may) + ) +); + TRACE_EVENT(nfsd_file_acquire, TP_PROTO( const struct svc_rqst *rqstp, From 5e138c4a750dc140d881dab4a8804b094bbc08d2 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 8 Jul 2022 14:27:02 -0400 Subject: [PATCH 38/78] NFSD: NFSv4 CLOSE should release an nfsd_file immediately The last close of a file should enable other accessors to open and use that file immediately. Leaving the file open in the filecache prevents other users from accessing that file until the filecache garbage-collects the file -- sometimes that takes several seconds. Reported-by: Wang Yugui Link: https://bugzilla.linux-nfs.org/show_bug.cgi?387 Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/filecache.c | 18 ++++++++++++++++++ fs/nfsd/filecache.h | 1 + fs/nfsd/nfs4state.c | 4 ++-- 3 files changed, 21 insertions(+), 2 deletions(-) diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index ac38b41c89c6..58721fced7f3 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -452,6 +452,24 @@ nfsd_file_put(struct nfsd_file *nf) nfsd_file_put_noref(nf); } +/** + * nfsd_file_close - Close an nfsd_file + * @nf: nfsd_file to close + * + * If this is the final reference for @nf, free it immediately. + * This reflects an on-the-wire CLOSE or DELEGRETURN into the + * VFS and exported filesystem. + */ +void nfsd_file_close(struct nfsd_file *nf) +{ + nfsd_file_put(nf); + if (refcount_dec_if_one(&nf->nf_ref)) { + nfsd_file_unhash(nf); + nfsd_file_lru_remove(nf); + nfsd_file_free(nf); + } +} + struct nfsd_file * nfsd_file_get(struct nfsd_file *nf) { diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h index ee9ed99d8b8f..28145f162892 100644 --- a/fs/nfsd/filecache.h +++ b/fs/nfsd/filecache.h @@ -52,6 +52,7 @@ void nfsd_file_cache_shutdown(void); int nfsd_file_cache_start_net(struct net *net); void nfsd_file_cache_shutdown_net(struct net *net); void nfsd_file_put(struct nfsd_file *nf); +void nfsd_file_close(struct nfsd_file *nf); struct nfsd_file *nfsd_file_get(struct nfsd_file *nf); void nfsd_file_close_inode_sync(struct inode *inode); bool nfsd_file_is_cached(struct inode *inode); diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 3a05c095dfe5..9d1a3e131c49 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -820,9 +820,9 @@ static void __nfs4_file_put_access(struct nfs4_file *fp, int oflag) swap(f2, fp->fi_fds[O_RDWR]); spin_unlock(&fp->fi_lock); if (f1) - nfsd_file_put(f1); + nfsd_file_close(f1); if (f2) - nfsd_file_put(f2); + nfsd_file_close(f2); } } From 427f5f83a3191cbf024c5aea6e5b601cdf88d895 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 8 Jul 2022 14:27:09 -0400 Subject: [PATCH 39/78] NFSD: Ensure nf_inode is never dereferenced The documenting comment for struct nf_file states: /* * A representation of a file that has been opened by knfsd. These are hashed * in the hashtable by inode pointer value. Note that this object doesn't * hold a reference to the inode by itself, so the nf_inode pointer should * never be dereferenced, only used for comparison. */ Replace the two existing dereferences to make the comment always true. Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/filecache.c | 5 ++--- fs/nfsd/filecache.h | 2 +- fs/nfsd/nfs4state.c | 2 +- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index 58721fced7f3..57d06a2e94a2 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -228,12 +228,11 @@ nfsd_file_mark_put(struct nfsd_file_mark *nfm) } static struct nfsd_file_mark * -nfsd_file_mark_find_or_create(struct nfsd_file *nf) +nfsd_file_mark_find_or_create(struct nfsd_file *nf, struct inode *inode) { int err; struct fsnotify_mark *mark; struct nfsd_file_mark *nfm = NULL, *new; - struct inode *inode = nf->nf_inode; do { fsnotify_group_lock(nfsd_file_fsnotify_group); @@ -1144,7 +1143,7 @@ out_status: open_file: trace_nfsd_file_alloc(nf); - nf->nf_mark = nfsd_file_mark_find_or_create(nf); + nf->nf_mark = nfsd_file_mark_find_or_create(nf, key.inode); if (nf->nf_mark) { if (open) { status = nfsd_open_verified(rqstp, fhp, may_flags, diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h index 28145f162892..8e8c0c47d67d 100644 --- a/fs/nfsd/filecache.h +++ b/fs/nfsd/filecache.h @@ -39,7 +39,7 @@ struct nfsd_file { #define NFSD_FILE_PENDING (1) #define NFSD_FILE_REFERENCED (2) unsigned long nf_flags; - struct inode *nf_inode; + struct inode *nf_inode; /* don't deref */ refcount_t nf_ref; unsigned char nf_may; struct nfsd_file_mark *nf_mark; diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 9d1a3e131c49..994bd11bafe0 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -2564,7 +2564,7 @@ static void nfs4_show_fname(struct seq_file *s, struct nfsd_file *f) static void nfs4_show_superblock(struct seq_file *s, struct nfsd_file *f) { - struct inode *inode = f->nf_inode; + struct inode *inode = file_inode(f->nf_file); seq_printf(s, "superblock: \"%02x:%02x:%ld\"", MAJOR(inode->i_sb->s_dev), From 6867137ebcf4155fe25f2ecf7c29b9fb90a76d1d Mon Sep 17 00:00:00 2001 From: Dai Ngo Date: Fri, 15 Jul 2022 16:54:51 -0700 Subject: [PATCH 40/78] NFSD: refactoring v4 specific code to a helper in nfs4state.c This patch moves the v4 specific code from nfsd_init_net() to nfsd4_init_leases_net() helper in nfs4state.c Signed-off-by: Dai Ngo Signed-off-by: Chuck Lever --- fs/nfsd/nfs4state.c | 12 ++++++++++++ fs/nfsd/nfsctl.c | 9 +-------- fs/nfsd/nfsd.h | 4 ++++ 3 files changed, 17 insertions(+), 8 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 994bd11bafe0..8676f4c71fa4 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -4330,6 +4330,18 @@ out: return -ENOMEM; } +void nfsd4_init_leases_net(struct nfsd_net *nn) +{ + nn->nfsd4_lease = 90; /* default lease time */ + nn->nfsd4_grace = 90; + nn->somebody_reclaimed = false; + nn->track_reclaim_completes = false; + nn->clverifier_counter = prandom_u32(); + nn->clientid_base = prandom_u32(); + nn->clientid_counter = nn->clientid_base + 1; + nn->s2s_cp_cl_id = nn->clientid_counter++; +} + static void init_nfs4_replay(struct nfs4_replay *rp) { rp->rp_status = nfserr_serverfault; diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 7002edbf2687..164c822ae3ae 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -1484,14 +1484,7 @@ static __net_init int nfsd_init_net(struct net *net) retval = nfsd_reply_cache_init(nn); if (retval) goto out_drc_error; - nn->nfsd4_lease = 90; /* default lease time */ - nn->nfsd4_grace = 90; - nn->somebody_reclaimed = false; - nn->track_reclaim_completes = false; - nn->clverifier_counter = prandom_u32(); - nn->clientid_base = prandom_u32(); - nn->clientid_counter = nn->clientid_base + 1; - nn->s2s_cp_cl_id = nn->clientid_counter++; + nfsd4_init_leases_net(nn); get_random_bytes(&nn->siphash_key, sizeof(nn->siphash_key)); seqlock_init(&nn->writeverf_lock); diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index 9a8b09afc173..ef8087691138 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -496,12 +496,16 @@ extern void unregister_cld_notifier(void); extern void nfsd4_ssc_init_umount_work(struct nfsd_net *nn); #endif +extern void nfsd4_init_leases_net(struct nfsd_net *nn); + #else /* CONFIG_NFSD_V4 */ static inline int nfsd4_is_junction(struct dentry *dentry) { return 0; } +static inline void nfsd4_init_leases_net(struct nfsd_net *nn) {}; + #define register_cld_notifier() 0 #define unregister_cld_notifier() do { } while(0) From 0926c39515aa065a296e97dfc8790026f1e53f86 Mon Sep 17 00:00:00 2001 From: Dai Ngo Date: Fri, 15 Jul 2022 16:54:52 -0700 Subject: [PATCH 41/78] NFSD: keep track of the number of v4 clients in the system Add counter nfs4_client_count to keep track of the total number of v4 clients, including courtesy clients, in the system. Signed-off-by: Dai Ngo Signed-off-by: Chuck Lever --- fs/nfsd/netns.h | 2 ++ fs/nfsd/nfs4state.c | 10 ++++++++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h index 1b1a962a1804..ce864f001a3e 100644 --- a/fs/nfsd/netns.h +++ b/fs/nfsd/netns.h @@ -189,6 +189,8 @@ struct nfsd_net { struct nfsd_fcache_disposal *fcache_disposal; siphash_key_t siphash_key; + + atomic_t nfs4_client_count; }; /* Simple check to find out if a given net was properly initialized */ diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 8676f4c71fa4..4842cc827f08 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -2053,7 +2053,8 @@ STALE_CLIENTID(clientid_t *clid, struct nfsd_net *nn) * This type of memory management is somewhat inefficient, but we use it * anyway since SETCLIENTID is not a common operation. */ -static struct nfs4_client *alloc_client(struct xdr_netobj name) +static struct nfs4_client *alloc_client(struct xdr_netobj name, + struct nfsd_net *nn) { struct nfs4_client *clp; int i; @@ -2076,6 +2077,7 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name) atomic_set(&clp->cl_rpc_users, 0); clp->cl_cb_state = NFSD4_CB_UNKNOWN; clp->cl_state = NFSD4_ACTIVE; + atomic_inc(&nn->nfs4_client_count); atomic_set(&clp->cl_delegs_in_recall, 0); INIT_LIST_HEAD(&clp->cl_idhash); INIT_LIST_HEAD(&clp->cl_openowners); @@ -2183,6 +2185,7 @@ static __be32 mark_client_expired_locked(struct nfs4_client *clp) static void __destroy_client(struct nfs4_client *clp) { + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); int i; struct nfs4_openowner *oo; struct nfs4_delegation *dp; @@ -2226,6 +2229,7 @@ __destroy_client(struct nfs4_client *clp) nfsd4_shutdown_callback(clp); if (clp->cl_cb_conn.cb_xprt) svc_xprt_put(clp->cl_cb_conn.cb_xprt); + atomic_add_unless(&nn->nfs4_client_count, -1, 0); free_client(clp); wake_up_all(&expiry_wq); } @@ -2848,7 +2852,7 @@ static struct nfs4_client *create_client(struct xdr_netobj name, struct nfsd_net *nn = net_generic(net, nfsd_net_id); struct dentry *dentries[ARRAY_SIZE(client_files)]; - clp = alloc_client(name); + clp = alloc_client(name, nn); if (clp == NULL) return NULL; @@ -4340,6 +4344,8 @@ void nfsd4_init_leases_net(struct nfsd_net *nn) nn->clientid_base = prandom_u32(); nn->clientid_counter = nn->clientid_base + 1; nn->s2s_cp_cl_id = nn->clientid_counter++; + + atomic_set(&nn->nfs4_client_count, 0); } static void init_nfs4_replay(struct nfs4_replay *rp) From 4271c2c0887562318a0afef97d32d8a71cbe0743 Mon Sep 17 00:00:00 2001 From: Dai Ngo Date: Fri, 15 Jul 2022 16:54:53 -0700 Subject: [PATCH 42/78] NFSD: limit the number of v4 clients to 1024 per 1GB of system memory Currently there is no limit on how many v4 clients are supported by the system. This can be a problem in systems with small memory configuration to function properly when a very large number of clients exist that creates memory shortage conditions. This patch enforces a limit of 1024 NFSv4 clients, including courtesy clients, per 1GB of system memory. When the number of the clients reaches the limit, requests that create new clients are returned with NFS4ERR_DELAY and the laundromat is kicked start to trim old clients. Due to the overhead of the upcall to remove the client record, the maximun number of clients the laundromat removes on each run is limited to 128. This is done to ensure the laundromat can still process the other tasks in a timely manner. Since there is now a limit of the number of clients, the 24-hr idle time limit of courtesy client is no longer needed and was removed. Signed-off-by: Dai Ngo Signed-off-by: Chuck Lever --- fs/nfsd/netns.h | 1 + fs/nfsd/nfs4state.c | 27 +++++++++++++++++++++------ fs/nfsd/nfsd.h | 2 ++ 3 files changed, 24 insertions(+), 6 deletions(-) diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h index ce864f001a3e..ffe17743cc74 100644 --- a/fs/nfsd/netns.h +++ b/fs/nfsd/netns.h @@ -191,6 +191,7 @@ struct nfsd_net { siphash_key_t siphash_key; atomic_t nfs4_client_count; + int nfs4_max_clients; }; /* Simple check to find out if a given net was properly initialized */ diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 4842cc827f08..e46e3392d557 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -2059,6 +2059,10 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name, struct nfs4_client *clp; int i; + if (atomic_read(&nn->nfs4_client_count) >= nn->nfs4_max_clients) { + mod_delayed_work(laundry_wq, &nn->laundromat_work, 0); + return NULL; + } clp = kmem_cache_zalloc(client_slab, GFP_KERNEL); if (clp == NULL) return NULL; @@ -4336,6 +4340,9 @@ out: void nfsd4_init_leases_net(struct nfsd_net *nn) { + struct sysinfo si; + u64 max_clients; + nn->nfsd4_lease = 90; /* default lease time */ nn->nfsd4_grace = 90; nn->somebody_reclaimed = false; @@ -4346,6 +4353,10 @@ void nfsd4_init_leases_net(struct nfsd_net *nn) nn->s2s_cp_cl_id = nn->clientid_counter++; atomic_set(&nn->nfs4_client_count, 0); + si_meminfo(&si); + max_clients = (u64)si.totalram * si.mem_unit / (1024 * 1024 * 1024); + max_clients *= NFS4_CLIENTS_PER_GB; + nn->nfs4_max_clients = max_t(int, max_clients, NFS4_CLIENTS_PER_GB); } static void init_nfs4_replay(struct nfs4_replay *rp) @@ -5811,9 +5822,12 @@ static void nfs4_get_client_reaplist(struct nfsd_net *nn, struct list_head *reaplist, struct laundry_time *lt) { + unsigned int maxreap, reapcnt = 0; struct list_head *pos, *next; struct nfs4_client *clp; + maxreap = (atomic_read(&nn->nfs4_client_count) >= nn->nfs4_max_clients) ? + NFSD_CLIENT_MAX_TRIM_PER_RUN : 0; INIT_LIST_HEAD(reaplist); spin_lock(&nn->client_lock); list_for_each_safe(pos, next, &nn->client_lru) { @@ -5824,14 +5838,15 @@ nfs4_get_client_reaplist(struct nfsd_net *nn, struct list_head *reaplist, break; if (!atomic_read(&clp->cl_rpc_users)) clp->cl_state = NFSD4_COURTESY; - if (!client_has_state(clp) || - ktime_get_boottime_seconds() >= - (clp->cl_time + NFSD_COURTESY_CLIENT_TIMEOUT)) + if (!client_has_state(clp)) goto exp_client; - if (nfs4_anylock_blockers(clp)) { + if (!nfs4_anylock_blockers(clp)) + if (reapcnt >= maxreap) + continue; exp_client: - if (!mark_client_expired_locked(clp)) - list_add(&clp->cl_lru, reaplist); + if (!mark_client_expired_locked(clp)) { + list_add(&clp->cl_lru, reaplist); + reapcnt++; } } spin_unlock(&nn->client_lock); diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index ef8087691138..57a468ed85c3 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -341,6 +341,8 @@ void nfsd_lockd_shutdown(void); #define NFSD_LAUNDROMAT_MINTIMEOUT 1 /* seconds */ #define NFSD_COURTESY_CLIENT_TIMEOUT (24 * 60 * 60) /* seconds */ +#define NFSD_CLIENT_MAX_TRIM_PER_RUN 128 +#define NFS4_CLIENTS_PER_GB 1024 /* * The following attributes are currently not supported by the NFSv4 server: From 3a5940bfa17fb9964bf9688b4356ca643a8f5e2d Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 20 Jul 2022 08:39:23 -0400 Subject: [PATCH 43/78] nfsd: silence extraneous printk on nfsd.ko insertion This printk pops every time nfsd.ko gets plugged in. Most kmods don't do that and this one is not very informative. Olaf's email address seems to be defunct at this point anyway. Just drop it. Cc: Olaf Kirch Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/nfsctl.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 164c822ae3ae..917fa1892fd2 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -1519,7 +1519,6 @@ static struct pernet_operations nfsd_net_ops = { static int __init init_nfsd(void) { int retval; - printk(KERN_INFO "Installing knfsd (copyright (C) 1996 okir@monad.swb.de).\n"); retval = nfsd4_init_slabs(); if (retval) From 095a764b7afb06c9499b798c04eaa3cbf70ebe2d Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 22 Jul 2022 16:08:38 -0400 Subject: [PATCH 44/78] NFSD: Optimize nfsd4_encode_operation() write_bytes_to_xdr_buf() is a generic way to place a variable-length data item in an already-reserved spot in the encoding buffer. However, it is costly, and here, it is unnecessary because the data item is fixed in size, the buffer destination address is always word-aligned, and the destination location is already in @p. Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 2acea7792bb2..b52ea7d8313a 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -5373,8 +5373,7 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op) so->so_replay.rp_buf, len); } status: - /* Note that op->status is already in network byte order: */ - write_bytes_to_xdr_buf(xdr->buf, post_err_offset - 4, &op->status, 4); + *p = op->status; } /* From ab04de60ae1cc64ae16b77feae795311b97720c7 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 22 Jul 2022 16:08:45 -0400 Subject: [PATCH 45/78] NFSD: Optimize nfsd4_encode_fattr() write_bytes_to_xdr_buf() is a generic way to place a variable-length data item in an already-reserved spot in the encoding buffer. However, it is costly. In nfsd4_encode_fattr(), it is unnecessary because the data item is fixed in size and the buffer destination address is always word-aligned. Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index b52ea7d8313a..e590236a60ab 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -2828,10 +2828,9 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, struct kstat stat; struct svc_fh *tempfh = NULL; struct kstatfs statfs; - __be32 *p; + __be32 *p, *attrlen_p; int starting_len = xdr->buf->len; int attrlen_offset; - __be32 attrlen; u32 dummy; u64 dummy64; u32 rdattr_err = 0; @@ -2919,10 +2918,9 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, goto out; attrlen_offset = xdr->buf->len; - p = xdr_reserve_space(xdr, 4); - if (!p) + attrlen_p = xdr_reserve_space(xdr, XDR_UNIT); + if (!attrlen_p) goto out_resource; - p++; /* to be backfilled later */ if (bmval0 & FATTR4_WORD0_SUPPORTED_ATTRS) { u32 supp[3]; @@ -3344,8 +3342,7 @@ out_acl: *p++ = cpu_to_be32(err == 0); } - attrlen = htonl(xdr->buf->len - attrlen_offset - 4); - write_bytes_to_xdr_buf(xdr->buf, attrlen_offset, &attrlen, 4); + *attrlen_p = cpu_to_be32(xdr->buf->len - attrlen_offset - XDR_UNIT); status = nfs_ok; out: From c738b218a2e5a753a336b4b7fee6720b902c7ace Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 22 Jul 2022 16:08:51 -0400 Subject: [PATCH 46/78] NFSD: Clean up SPLICE_OK in nfsd4_encode_read() Do the test_bit() once -- this reduces the number of locked-bus operations and makes the function a little easier to read. Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index e590236a60ab..c9468f205dad 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -3980,6 +3980,7 @@ static __be32 nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_read *read) { + bool splice_ok = test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags); unsigned long maxcount; struct xdr_stream *xdr = resp->xdr; struct file *file; @@ -3992,11 +3993,10 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, p = xdr_reserve_space(xdr, 8); /* eof flag and byte count */ if (!p) { - WARN_ON_ONCE(test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags)); + WARN_ON_ONCE(splice_ok); return nfserr_resource; } - if (resp->xdr->buf->page_len && - test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags)) { + if (resp->xdr->buf->page_len && splice_ok) { WARN_ON_ONCE(1); return nfserr_resource; } @@ -4005,8 +4005,7 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, maxcount = min_t(unsigned long, read->rd_length, (xdr->buf->buflen - xdr->buf->len)); - if (file->f_op->splice_read && - test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags)) + if (file->f_op->splice_read && splice_ok) nfserr = nfsd4_encode_splice_read(resp, read, file, maxcount); else nfserr = nfsd4_encode_readv(resp, read, file, maxcount); From 24c7fb85498eda1d4c6b42cc4886328429814990 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 22 Jul 2022 16:08:57 -0400 Subject: [PATCH 47/78] NFSD: Add an nfsd4_read::rd_eof field Refactor: Make the EOF result available in the entire NFSv4 READ path. Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 11 +++++------ fs/nfsd/xdr4.h | 5 +++-- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index c9468f205dad..16ae1be1bbac 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -3879,7 +3879,6 @@ static __be32 nfsd4_encode_splice_read( struct xdr_stream *xdr = resp->xdr; struct xdr_buf *buf = xdr->buf; int status, space_left; - u32 eof; __be32 nfserr; __be32 *p = xdr->p - 2; @@ -3888,7 +3887,8 @@ static __be32 nfsd4_encode_splice_read( return nfserr_resource; nfserr = nfsd_splice_read(read->rd_rqstp, read->rd_fhp, - file, read->rd_offset, &maxcount, &eof); + file, read->rd_offset, &maxcount, + &read->rd_eof); read->rd_length = maxcount; if (nfserr) goto out_err; @@ -3899,7 +3899,7 @@ static __be32 nfsd4_encode_splice_read( goto out_err; } - *(p++) = htonl(eof); + *(p++) = htonl(read->rd_eof); *(p++) = htonl(maxcount); buf->page_len = maxcount; @@ -3943,7 +3943,6 @@ static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp, struct file *file, unsigned long maxcount) { struct xdr_stream *xdr = resp->xdr; - u32 eof; int starting_len = xdr->buf->len - 8; __be32 nfserr; __be32 tmp; @@ -3955,7 +3954,7 @@ static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp, nfserr = nfsd_readv(resp->rqstp, read->rd_fhp, file, read->rd_offset, resp->rqstp->rq_vec, read->rd_vlen, &maxcount, - &eof); + &read->rd_eof); read->rd_length = maxcount; if (nfserr) return nfserr; @@ -3963,7 +3962,7 @@ static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp, return nfserr_io; xdr_truncate_encode(xdr, starting_len + 8 + xdr_align_size(maxcount)); - tmp = htonl(eof); + tmp = htonl(read->rd_eof); write_bytes_to_xdr_buf(xdr->buf, starting_len , &tmp, 4); tmp = htonl(maxcount); write_bytes_to_xdr_buf(xdr->buf, starting_len + 4, &tmp, 4); diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index 7b744011f2d3..6e6a89008ce1 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -302,9 +302,10 @@ struct nfsd4_read { u32 rd_length; /* request */ int rd_vlen; struct nfsd_file *rd_nf; - + struct svc_rqst *rd_rqstp; /* response */ - struct svc_fh *rd_fhp; /* response */ + struct svc_fh *rd_fhp; /* response */ + u32 rd_eof; /* response */ }; struct nfsd4_readdir { From 28d5bc468efe74b790e052f758ce083a5015c665 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 22 Jul 2022 16:09:04 -0400 Subject: [PATCH 48/78] NFSD: Optimize nfsd4_encode_readv() write_bytes_to_xdr_buf() is pretty expensive to use for inserting an XDR data item that is always 1 XDR_UNIT at an address that is always XDR word-aligned. Since both the readv and splice read paths encode EOF and maxcount values, move both to a common code path. Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 16ae1be1bbac..1e59d4ce529f 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -3880,7 +3880,6 @@ static __be32 nfsd4_encode_splice_read( struct xdr_buf *buf = xdr->buf; int status, space_left; __be32 nfserr; - __be32 *p = xdr->p - 2; /* Make sure there will be room for padding if needed */ if (xdr->end - xdr->p < 1) @@ -3899,9 +3898,6 @@ static __be32 nfsd4_encode_splice_read( goto out_err; } - *(p++) = htonl(read->rd_eof); - *(p++) = htonl(maxcount); - buf->page_len = maxcount; buf->len += maxcount; xdr->page_ptr += (buf->page_base + maxcount + PAGE_SIZE - 1) @@ -3962,11 +3958,6 @@ static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp, return nfserr_io; xdr_truncate_encode(xdr, starting_len + 8 + xdr_align_size(maxcount)); - tmp = htonl(read->rd_eof); - write_bytes_to_xdr_buf(xdr->buf, starting_len , &tmp, 4); - tmp = htonl(maxcount); - write_bytes_to_xdr_buf(xdr->buf, starting_len + 4, &tmp, 4); - tmp = xdr_zero; pad = (maxcount&3) ? 4 - (maxcount&3) : 0; write_bytes_to_xdr_buf(xdr->buf, starting_len + 8 + maxcount, @@ -4008,11 +3999,14 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, nfserr = nfsd4_encode_splice_read(resp, read, file, maxcount); else nfserr = nfsd4_encode_readv(resp, read, file, maxcount); - - if (nfserr) + if (nfserr) { xdr_truncate_encode(xdr, starting_len); + return nfserr; + } - return nfserr; + p = xdr_encode_bool(p, read->rd_eof); + *p = cpu_to_be32(read->rd_length); + return nfs_ok; } static __be32 From 071ae99feadfc55979f89287d6ad2c6a315cb46d Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 22 Jul 2022 16:09:10 -0400 Subject: [PATCH 49/78] NFSD: Simplify starting_len Clean-up: Now that nfsd4_encode_readv() does not have to encode the EOF or rd_length values, it no longer needs to subtract 8 from @starting_len. Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 1e59d4ce529f..32f4f48458e6 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -3939,7 +3939,7 @@ static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp, struct file *file, unsigned long maxcount) { struct xdr_stream *xdr = resp->xdr; - int starting_len = xdr->buf->len - 8; + unsigned int starting_len = xdr->buf->len; __be32 nfserr; __be32 tmp; int pad; @@ -3954,14 +3954,13 @@ static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp, read->rd_length = maxcount; if (nfserr) return nfserr; - if (svc_encode_result_payload(resp->rqstp, starting_len + 8, maxcount)) + if (svc_encode_result_payload(resp->rqstp, starting_len, maxcount)) return nfserr_io; - xdr_truncate_encode(xdr, starting_len + 8 + xdr_align_size(maxcount)); + xdr_truncate_encode(xdr, starting_len + xdr_align_size(maxcount)); tmp = xdr_zero; pad = (maxcount&3) ? 4 - (maxcount&3) : 0; - write_bytes_to_xdr_buf(xdr->buf, starting_len + 8 + maxcount, - &tmp, pad); + write_bytes_to_xdr_buf(xdr->buf, starting_len + maxcount, &tmp, pad); return 0; } From 5e64d85c7d0c59cfcd61d899720b8ccfe895d743 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 22 Jul 2022 16:09:16 -0400 Subject: [PATCH 50/78] NFSD: Use xdr_pad_size() Clean up: Use a helper instead of open-coding the calculation of the XDR pad size. Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 32f4f48458e6..71bac0039c42 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -3940,9 +3940,8 @@ static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp, { struct xdr_stream *xdr = resp->xdr; unsigned int starting_len = xdr->buf->len; + __be32 zero = xdr_zero; __be32 nfserr; - __be32 tmp; - int pad; read->rd_vlen = xdr_reserve_space_vec(xdr, resp->rqstp->rq_vec, maxcount); if (read->rd_vlen < 0) @@ -3958,11 +3957,9 @@ static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp, return nfserr_io; xdr_truncate_encode(xdr, starting_len + xdr_align_size(maxcount)); - tmp = xdr_zero; - pad = (maxcount&3) ? 4 - (maxcount&3) : 0; - write_bytes_to_xdr_buf(xdr->buf, starting_len + maxcount, &tmp, pad); - return 0; - + write_bytes_to_xdr_buf(xdr->buf, starting_len + maxcount, &zero, + xdr_pad_size(maxcount)); + return nfs_ok; } static __be32 From 99b002a1fa00d90e66357315757e7277447ce973 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 22 Jul 2022 16:09:23 -0400 Subject: [PATCH 51/78] NFSD: Clean up nfsd4_encode_readlink() Similar changes to nfsd4_encode_readv(), all bundled into a single patch. Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 24 +++++++++--------------- 1 file changed, 9 insertions(+), 15 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 71bac0039c42..358b3338c4cc 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -4008,16 +4008,13 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, static __be32 nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_readlink *readlink) { - int maxcount; - __be32 wire_count; - int zero = 0; + __be32 *p, *maxcount_p, zero = xdr_zero; struct xdr_stream *xdr = resp->xdr; int length_offset = xdr->buf->len; - int status; - __be32 *p; + int maxcount, status; - p = xdr_reserve_space(xdr, 4); - if (!p) + maxcount_p = xdr_reserve_space(xdr, XDR_UNIT); + if (!maxcount_p) return nfserr_resource; maxcount = PAGE_SIZE; @@ -4042,14 +4039,11 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd nfserr = nfserrno(status); goto out_err; } - - wire_count = htonl(maxcount); - write_bytes_to_xdr_buf(xdr->buf, length_offset, &wire_count, 4); - xdr_truncate_encode(xdr, length_offset + 4 + ALIGN(maxcount, 4)); - if (maxcount & 3) - write_bytes_to_xdr_buf(xdr->buf, length_offset + 4 + maxcount, - &zero, 4 - (maxcount&3)); - return 0; + *maxcount_p = cpu_to_be32(maxcount); + xdr_truncate_encode(xdr, length_offset + 4 + xdr_align_size(maxcount)); + write_bytes_to_xdr_buf(xdr->buf, length_offset + 4 + maxcount, &zero, + xdr_pad_size(maxcount)); + return nfs_ok; out_err: xdr_truncate_encode(xdr, length_offset); From 5304877936c0a67e1a01464d113bae4c81eacdb6 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 27 Jul 2022 14:40:03 -0400 Subject: [PATCH 52/78] NFSD: Fix strncpy() fortify warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In function ‘strncpy’, inlined from ‘nfsd4_ssc_setup_dul’ at /home/cel/src/linux/manet/fs/nfsd/nfs4proc.c:1392:3, inlined from ‘nfsd4_interssc_connect’ at /home/cel/src/linux/manet/fs/nfsd/nfs4proc.c:1489:11: /home/cel/src/linux/manet/include/linux/fortify-string.h:52:33: warning: ‘__builtin_strncpy’ specified bound 63 equals destination size [-Wstringop-truncation] 52 | #define __underlying_strncpy __builtin_strncpy | ^ /home/cel/src/linux/manet/include/linux/fortify-string.h:89:16: note: in expansion of macro ‘__underlying_strncpy’ 89 | return __underlying_strncpy(p, q, size); | ^~~~~~~~~~~~~~~~~~~~ Signed-off-by: Chuck Lever --- fs/nfsd/nfs4proc.c | 2 +- include/linux/nfs_ssc.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 5af9f8d1feb6..660bd49f3a32 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1389,7 +1389,7 @@ try_again: return 0; } if (work) { - strncpy(work->nsui_ipaddr, ipaddr, sizeof(work->nsui_ipaddr)); + strlcpy(work->nsui_ipaddr, ipaddr, sizeof(work->nsui_ipaddr) - 1); refcount_set(&work->nsui_refcnt, 2); work->nsui_busy = true; list_add_tail(&work->nsui_list, &nn->nfsd_ssc_mount_list); diff --git a/include/linux/nfs_ssc.h b/include/linux/nfs_ssc.h index 222ae8883e85..75843c00f326 100644 --- a/include/linux/nfs_ssc.h +++ b/include/linux/nfs_ssc.h @@ -64,7 +64,7 @@ struct nfsd4_ssc_umount_item { refcount_t nsui_refcnt; unsigned long nsui_expire; struct vfsmount *nsui_vfsmount; - char nsui_ipaddr[RPC_MAX_ADDRBUFLEN]; + char nsui_ipaddr[RPC_MAX_ADDRBUFLEN + 1]; }; #endif From bb4d842722b84a2731257054b6405f2d866fc5f3 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 27 Jul 2022 14:40:09 -0400 Subject: [PATCH 53/78] NFSD: nfserrno(-ENOMEM) is nfserr_jukebox Suggested-by: Dai Ngo Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 358b3338c4cc..9b8091c177bc 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1810,7 +1810,7 @@ nfsd4_decode_test_stateid(struct nfsd4_compoundargs *argp, struct nfsd4_test_sta for (i = 0; i < test_stateid->ts_num_ids; i++) { stateid = svcxdr_tmpalloc(argp, sizeof(*stateid)); if (!stateid) - return nfserrno(-ENOMEM); /* XXX: not jukebox? */ + return nfserr_jukebox; INIT_LIST_HEAD(&stateid->ts_id_list); list_add_tail(&stateid->ts_id_list, &test_stateid->ts_stateid_list); status = nfsd4_decode_stateid4(argp, &stateid->ts_id_stateid); @@ -1933,7 +1933,7 @@ nfsd4_decode_copy(struct nfsd4_compoundargs *argp, struct nfsd4_copy *copy) ns_dummy = kmalloc(sizeof(struct nl4_server), GFP_KERNEL); if (ns_dummy == NULL) - return nfserrno(-ENOMEM); /* XXX: jukebox? */ + return nfserr_jukebox; for (i = 0; i < count - 1; i++) { status = nfsd4_decode_nl4_server(argp, ns_dummy); if (status) { From 09426ef2a64ee189ca1e3298f1e874842dbf35ea Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 27 Jul 2022 14:40:16 -0400 Subject: [PATCH 54/78] NFSD: Shrink size of struct nfsd4_copy_notify struct nfsd4_copy_notify is part of struct nfsd4_op, which resides in an 8-element array. sizeof(struct nfsd4_op): Before: /* size: 2208, cachelines: 35, members: 5 */ After: /* size: 1696, cachelines: 27, members: 5 */ Signed-off-by: Chuck Lever --- fs/nfsd/nfs4proc.c | 4 ++-- fs/nfsd/nfs4xdr.c | 12 ++++++++++-- fs/nfsd/xdr4.h | 4 ++-- 3 files changed, 14 insertions(+), 6 deletions(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 660bd49f3a32..9090532c1f2c 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1953,9 +1953,9 @@ nfsd4_copy_notify(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, /* For now, only return one server address in cpn_src, the * address used by the client to connect to this server. */ - cn->cpn_src.nl4_type = NL4_NETADDR; + cn->cpn_src->nl4_type = NL4_NETADDR; status = nfsd4_set_netaddr((struct sockaddr *)&rqstp->rq_daddr, - &cn->cpn_src.u.nl4_addr); + &cn->cpn_src->u.nl4_addr); WARN_ON_ONCE(status); if (status) { nfs4_put_cpntf_state(nn, cps); diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 9b8091c177bc..06a9d7632552 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1952,10 +1952,17 @@ nfsd4_decode_copy_notify(struct nfsd4_compoundargs *argp, { __be32 status; + cn->cpn_src = svcxdr_tmpalloc(argp, sizeof(*cn->cpn_src)); + if (cn->cpn_src == NULL) + return nfserr_jukebox; + cn->cpn_dst = svcxdr_tmpalloc(argp, sizeof(*cn->cpn_dst)); + if (cn->cpn_dst == NULL) + return nfserr_jukebox; + status = nfsd4_decode_stateid4(argp, &cn->cpn_src_stateid); if (status) return status; - return nfsd4_decode_nl4_server(argp, &cn->cpn_dst); + return nfsd4_decode_nl4_server(argp, cn->cpn_dst); } static __be32 @@ -4898,7 +4905,8 @@ nfsd4_encode_copy_notify(struct nfsd4_compoundres *resp, __be32 nfserr, *p++ = cpu_to_be32(1); - return nfsd42_encode_nl4_server(resp, &cn->cpn_src); + nfserr = nfsd42_encode_nl4_server(resp, cn->cpn_src); + return nfserr; } static __be32 diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index 6e6a89008ce1..f253fc3f4708 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -595,13 +595,13 @@ struct nfsd4_offload_status { struct nfsd4_copy_notify { /* request */ stateid_t cpn_src_stateid; - struct nl4_server cpn_dst; + struct nl4_server *cpn_dst; /* response */ stateid_t cpn_cnr_stateid; u64 cpn_sec; u32 cpn_nsec; - struct nl4_server cpn_src; + struct nl4_server *cpn_src; }; struct nfsd4_op { From 87689df694916c40e8e6c179ab1c8710f65cb6c6 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 27 Jul 2022 14:40:22 -0400 Subject: [PATCH 55/78] NFSD: Shrink size of struct nfsd4_copy struct nfsd4_copy is part of struct nfsd4_op, which resides in an 8-element array. sizeof(struct nfsd4_op): Before: /* size: 1696, cachelines: 27, members: 5 */ After: /* size: 672, cachelines: 11, members: 5 */ Signed-off-by: Chuck Lever --- fs/nfsd/nfs4proc.c | 8 ++++++-- fs/nfsd/nfs4xdr.c | 5 ++++- fs/nfsd/xdr4.h | 2 +- 3 files changed, 11 insertions(+), 4 deletions(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 9090532c1f2c..d8621d008c13 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1289,6 +1289,7 @@ void nfs4_put_copy(struct nfsd4_copy *copy) { if (!refcount_dec_and_test(©->refcount)) return; + kfree(copy->cp_src); kfree(copy); } @@ -1549,7 +1550,7 @@ nfsd4_setup_inter_ssc(struct svc_rqst *rqstp, if (status) goto out; - status = nfsd4_interssc_connect(©->cp_src, rqstp, mount); + status = nfsd4_interssc_connect(copy->cp_src, rqstp, mount); if (status) goto out; @@ -1761,7 +1762,7 @@ static void dup_copy_fields(struct nfsd4_copy *src, struct nfsd4_copy *dst) dst->nf_src = nfsd_file_get(src->nf_src); memcpy(&dst->cp_stateid, &src->cp_stateid, sizeof(src->cp_stateid)); - memcpy(&dst->cp_src, &src->cp_src, sizeof(struct nl4_server)); + memcpy(dst->cp_src, src->cp_src, sizeof(struct nl4_server)); memcpy(&dst->stateid, &src->stateid, sizeof(src->stateid)); memcpy(&dst->c_fh, &src->c_fh, sizeof(src->c_fh)); dst->ss_mnt = src->ss_mnt; @@ -1855,6 +1856,9 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, async_copy = kzalloc(sizeof(struct nfsd4_copy), GFP_KERNEL); if (!async_copy) goto out_err; + async_copy->cp_src = kmalloc(sizeof(*async_copy->cp_src), GFP_KERNEL); + if (!async_copy->cp_src) + goto out_err; if (!nfs4_init_copy_state(nn, copy)) goto out_err; refcount_set(&async_copy->refcount, 1); diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 06a9d7632552..90d3a1a302b4 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1920,6 +1920,9 @@ nfsd4_decode_copy(struct nfsd4_compoundargs *argp, struct nfsd4_copy *copy) if (xdr_stream_decode_u32(argp->xdr, &count) < 0) return nfserr_bad_xdr; + copy->cp_src = svcxdr_tmpalloc(argp, sizeof(*copy->cp_src)); + if (copy->cp_src == NULL) + return nfserr_jukebox; copy->cp_intra = false; if (count == 0) { /* intra-server copy */ copy->cp_intra = true; @@ -1927,7 +1930,7 @@ nfsd4_decode_copy(struct nfsd4_compoundargs *argp, struct nfsd4_copy *copy) } /* decode all the supplied server addresses but use only the first */ - status = nfsd4_decode_nl4_server(argp, ©->cp_src); + status = nfsd4_decode_nl4_server(argp, copy->cp_src); if (status) return status; diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index f253fc3f4708..f5ad2939e6ee 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -540,7 +540,7 @@ struct nfsd4_copy { u64 cp_src_pos; u64 cp_dst_pos; u64 cp_count; - struct nl4_server cp_src; + struct nl4_server *cp_src; bool cp_intra; /* both */ From d314309425ad5dc1b6facdb2d456580fb5fa5e3a Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 27 Jul 2022 14:40:28 -0400 Subject: [PATCH 56/78] NFSD: Reorder the fields in struct nfsd4_op Pack the fields to reduce the size of struct nfsd4_op, which is used an array in struct nfsd4_compoundargs. sizeof(struct nfsd4_op): Before: /* size: 672, cachelines: 11, members: 5 */ After: /* size: 640, cachelines: 10, members: 5 */ Signed-off-by: Chuck Lever --- fs/nfsd/xdr4.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index f5ad2939e6ee..a36678e3ca0e 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -606,8 +606,9 @@ struct nfsd4_copy_notify { struct nfsd4_op { u32 opnum; - const struct nfsd4_operation * opdesc; __be32 status; + const struct nfsd4_operation *opdesc; + struct nfs4_replay *replay; union nfsd4_op_u { struct nfsd4_access access; struct nfsd4_close close; @@ -671,7 +672,6 @@ struct nfsd4_op { struct nfsd4_listxattrs listxattrs; struct nfsd4_removexattr removexattr; } u; - struct nfs4_replay * replay; }; bool nfsd4_cache_this_op(struct nfsd4_op *); From 8ea6e2c90bb0eb74a595a12e23a1dff9abbc760a Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 27 Jul 2022 14:40:35 -0400 Subject: [PATCH 57/78] NFSD: Make nfs4_put_copy() static Clean up: All call sites are in fs/nfsd/nfs4proc.c. Signed-off-by: Chuck Lever --- fs/nfsd/nfs4proc.c | 2 +- fs/nfsd/state.h | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index d8621d008c13..851f35fca95d 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1285,7 +1285,7 @@ out: return status; } -void nfs4_put_copy(struct nfsd4_copy *copy) +static void nfs4_put_copy(struct nfsd4_copy *copy) { if (!refcount_dec_and_test(©->refcount)) return; diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index f3d6313914ed..ae596dbf8667 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -703,7 +703,6 @@ extern struct nfs4_client_reclaim *nfs4_client_to_reclaim(struct xdr_netobj name extern bool nfs4_has_reclaimed_state(struct xdr_netobj name, struct nfsd_net *nn); void put_nfs4_file(struct nfs4_file *fi); -extern void nfs4_put_copy(struct nfsd4_copy *copy); extern struct nfsd4_copy * find_async_copy(struct nfs4_client *clp, stateid_t *staetid); extern void nfs4_put_cpntf_state(struct nfsd_net *nn, From 1913cdf56cb5bfbc8170873728d13598cbecda23 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 27 Jul 2022 14:40:41 -0400 Subject: [PATCH 58/78] NFSD: Replace boolean fields in struct nfsd4_copy Clean up: saves 8 bytes, and we can replace check_and_set_stop_copy() with an atomic bitop. Signed-off-by: Chuck Lever --- fs/nfsd/nfs4proc.c | 51 +++++++++++++++++----------------------------- fs/nfsd/nfs4xdr.c | 12 +++++------ fs/nfsd/xdr4.h | 33 +++++++++++++++++++++++++----- 3 files changed, 53 insertions(+), 43 deletions(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 851f35fca95d..4b53fdb1ed0c 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1293,23 +1293,9 @@ static void nfs4_put_copy(struct nfsd4_copy *copy) kfree(copy); } -static bool -check_and_set_stop_copy(struct nfsd4_copy *copy) -{ - bool value; - - spin_lock(©->cp_clp->async_lock); - value = copy->stopped; - if (!copy->stopped) - copy->stopped = true; - spin_unlock(©->cp_clp->async_lock); - return value; -} - static void nfsd4_stop_copy(struct nfsd4_copy *copy) { - /* only 1 thread should stop the copy */ - if (!check_and_set_stop_copy(copy)) + if (!test_and_set_bit(NFSD4_COPY_F_STOPPED, ©->cp_flags)) kthread_stop(copy->copy_task); nfs4_put_copy(copy); } @@ -1678,8 +1664,9 @@ static const struct nfsd4_callback_ops nfsd4_cb_offload_ops = { static void nfsd4_init_copy_res(struct nfsd4_copy *copy, bool sync) { copy->cp_res.wr_stable_how = - copy->committed ? NFS_FILE_SYNC : NFS_UNSTABLE; - copy->cp_synchronous = sync; + test_bit(NFSD4_COPY_F_COMMITTED, ©->cp_flags) ? + NFS_FILE_SYNC : NFS_UNSTABLE; + nfsd4_copy_set_sync(copy, sync); gen_boot_verifier(©->cp_res.wr_verifier, copy->cp_clp->net); } @@ -1708,16 +1695,16 @@ static ssize_t _nfsd_copy_file_range(struct nfsd4_copy *copy) copy->cp_res.wr_bytes_written += bytes_copied; src_pos += bytes_copied; dst_pos += bytes_copied; - } while (bytes_total > 0 && !copy->cp_synchronous); + } while (bytes_total > 0 && nfsd4_copy_is_async(copy)); /* for a non-zero asynchronous copy do a commit of data */ - if (!copy->cp_synchronous && copy->cp_res.wr_bytes_written > 0) { + if (nfsd4_copy_is_async(copy) && copy->cp_res.wr_bytes_written > 0) { since = READ_ONCE(dst->f_wb_err); status = vfs_fsync_range(dst, copy->cp_dst_pos, copy->cp_res.wr_bytes_written, 0); if (!status) status = filemap_check_wb_err(dst->f_mapping, since); if (!status) - copy->committed = true; + set_bit(NFSD4_COPY_F_COMMITTED, ©->cp_flags); } return bytes_copied; } @@ -1738,7 +1725,7 @@ static __be32 nfsd4_do_copy(struct nfsd4_copy *copy, bool sync) status = nfs_ok; } - if (!copy->cp_intra) /* Inter server SSC */ + if (nfsd4_ssc_is_inter(copy)) nfsd4_cleanup_inter_ssc(copy->ss_mnt, copy->nf_src, copy->nf_dst); else @@ -1752,13 +1739,13 @@ static void dup_copy_fields(struct nfsd4_copy *src, struct nfsd4_copy *dst) dst->cp_src_pos = src->cp_src_pos; dst->cp_dst_pos = src->cp_dst_pos; dst->cp_count = src->cp_count; - dst->cp_synchronous = src->cp_synchronous; + dst->cp_flags = src->cp_flags; memcpy(&dst->cp_res, &src->cp_res, sizeof(src->cp_res)); memcpy(&dst->fh, &src->fh, sizeof(src->fh)); dst->cp_clp = src->cp_clp; dst->nf_dst = nfsd_file_get(src->nf_dst); - dst->cp_intra = src->cp_intra; - if (src->cp_intra) /* for inter, file_src doesn't exist yet */ + /* for inter, nf_src doesn't exist yet */ + if (!nfsd4_ssc_is_inter(src)) dst->nf_src = nfsd_file_get(src->nf_src); memcpy(&dst->cp_stateid, &src->cp_stateid, sizeof(src->cp_stateid)); @@ -1772,7 +1759,7 @@ static void cleanup_async_copy(struct nfsd4_copy *copy) { nfs4_free_copy_state(copy); nfsd_file_put(copy->nf_dst); - if (copy->cp_intra) + if (!nfsd4_ssc_is_inter(copy)) nfsd_file_put(copy->nf_src); spin_lock(©->cp_clp->async_lock); list_del(©->copies); @@ -1785,7 +1772,7 @@ static int nfsd4_do_async_copy(void *data) struct nfsd4_copy *copy = (struct nfsd4_copy *)data; struct nfsd4_copy *cb_copy; - if (!copy->cp_intra) { /* Inter server SSC */ + if (nfsd4_ssc_is_inter(copy)) { copy->nf_src = kzalloc(sizeof(struct nfsd_file), GFP_KERNEL); if (!copy->nf_src) { copy->nfserr = nfserr_serverfault; @@ -1817,7 +1804,7 @@ do_callback: ©->fh, copy->cp_count, copy->nfserr); nfsd4_run_cb(&cb_copy->cp_cb); out: - if (!copy->cp_intra) + if (nfsd4_ssc_is_inter(copy)) kfree(copy->nf_src); cleanup_async_copy(copy); return 0; @@ -1831,8 +1818,8 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, __be32 status; struct nfsd4_copy *async_copy = NULL; - if (!copy->cp_intra) { /* Inter server SSC */ - if (!inter_copy_offload_enable || copy->cp_synchronous) { + if (nfsd4_ssc_is_inter(copy)) { + if (!inter_copy_offload_enable || nfsd4_copy_is_sync(copy)) { status = nfserr_notsupp; goto out; } @@ -1849,7 +1836,7 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, copy->cp_clp = cstate->clp; memcpy(©->fh, &cstate->current_fh.fh_handle, sizeof(struct knfsd_fh)); - if (!copy->cp_synchronous) { + if (nfsd4_copy_is_async(copy)) { struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); status = nfserrno(-ENOMEM); @@ -1884,7 +1871,7 @@ out_err: if (async_copy) cleanup_async_copy(async_copy); status = nfserrno(-ENOMEM); - if (!copy->cp_intra) + if (nfsd4_ssc_is_inter(copy)) nfsd4_interssc_disconnect(copy->ss_mnt); goto out; } @@ -2613,7 +2600,7 @@ check_if_stalefh_allowed(struct nfsd4_compoundargs *args) return; } putfh = (struct nfsd4_putfh *)&saved_op->u; - if (!copy->cp_intra) + if (nfsd4_ssc_is_inter(copy)) putfh->no_verify = true; } } diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 90d3a1a302b4..1e9690a061ec 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1896,8 +1896,8 @@ static __be32 nfsd4_decode_nl4_server(struct nfsd4_compoundargs *argp, static __be32 nfsd4_decode_copy(struct nfsd4_compoundargs *argp, struct nfsd4_copy *copy) { + u32 consecutive, i, count, sync; struct nl4_server *ns_dummy; - u32 consecutive, i, count; __be32 status; status = nfsd4_decode_stateid4(argp, ©->cp_src_stateid); @@ -1915,17 +1915,17 @@ nfsd4_decode_copy(struct nfsd4_compoundargs *argp, struct nfsd4_copy *copy) /* ca_consecutive: we always do consecutive copies */ if (xdr_stream_decode_u32(argp->xdr, &consecutive) < 0) return nfserr_bad_xdr; - if (xdr_stream_decode_u32(argp->xdr, ©->cp_synchronous) < 0) + if (xdr_stream_decode_bool(argp->xdr, &sync) < 0) return nfserr_bad_xdr; + nfsd4_copy_set_sync(copy, sync); if (xdr_stream_decode_u32(argp->xdr, &count) < 0) return nfserr_bad_xdr; copy->cp_src = svcxdr_tmpalloc(argp, sizeof(*copy->cp_src)); if (copy->cp_src == NULL) return nfserr_jukebox; - copy->cp_intra = false; if (count == 0) { /* intra-server copy */ - copy->cp_intra = true; + __set_bit(NFSD4_COPY_F_INTRA, ©->cp_flags); return nfs_ok; } @@ -4704,13 +4704,13 @@ nfsd4_encode_copy(struct nfsd4_compoundres *resp, __be32 nfserr, __be32 *p; nfserr = nfsd42_encode_write_res(resp, ©->cp_res, - !!copy->cp_synchronous); + nfsd4_copy_is_sync(copy)); if (nfserr) return nfserr; p = xdr_reserve_space(resp->xdr, 4 + 4); *p++ = xdr_one; /* cr_consecutive */ - *p++ = cpu_to_be32(copy->cp_synchronous); + *p = nfsd4_copy_is_sync(copy) ? xdr_one : xdr_zero; return 0; } diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index a36678e3ca0e..eb09bf5aa70e 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -541,10 +541,12 @@ struct nfsd4_copy { u64 cp_dst_pos; u64 cp_count; struct nl4_server *cp_src; - bool cp_intra; - /* both */ - u32 cp_synchronous; + unsigned long cp_flags; +#define NFSD4_COPY_F_STOPPED (0) +#define NFSD4_COPY_F_INTRA (1) +#define NFSD4_COPY_F_SYNCHRONOUS (2) +#define NFSD4_COPY_F_COMMITTED (3) /* response */ struct nfsd42_write_res cp_res; @@ -564,14 +566,35 @@ struct nfsd4_copy { struct list_head copies; struct task_struct *copy_task; refcount_t refcount; - bool stopped; struct vfsmount *ss_mnt; struct nfs_fh c_fh; nfs4_stateid stateid; - bool committed; }; +static inline void nfsd4_copy_set_sync(struct nfsd4_copy *copy, bool sync) +{ + if (sync) + set_bit(NFSD4_COPY_F_SYNCHRONOUS, ©->cp_flags); + else + clear_bit(NFSD4_COPY_F_SYNCHRONOUS, ©->cp_flags); +} + +static inline bool nfsd4_copy_is_sync(const struct nfsd4_copy *copy) +{ + return test_bit(NFSD4_COPY_F_SYNCHRONOUS, ©->cp_flags); +} + +static inline bool nfsd4_copy_is_async(const struct nfsd4_copy *copy) +{ + return !test_bit(NFSD4_COPY_F_SYNCHRONOUS, ©->cp_flags); +} + +static inline bool nfsd4_ssc_is_inter(const struct nfsd4_copy *copy) +{ + return !test_bit(NFSD4_COPY_F_INTRA, ©->cp_flags); +} + struct nfsd4_seek { /* request */ stateid_t seek_stateid; From 24d796ea383b8a4c8234e06d1b14bbcd371192ea Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 27 Jul 2022 14:40:47 -0400 Subject: [PATCH 59/78] NFSD: Refactor nfsd4_cleanup_inter_ssc() (1/2) The @src parameter is sometimes a pointer to a struct nfsd_file and sometimes a pointer to struct file hiding in a phony struct nfsd_file. Refactor nfsd4_cleanup_inter_ssc() so the @src parameter is always an explicit struct file. Signed-off-by: Chuck Lever --- fs/nfsd/nfs4proc.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 4b53fdb1ed0c..cb9274f24c7d 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1554,7 +1554,7 @@ out: } static void -nfsd4_cleanup_inter_ssc(struct vfsmount *ss_mnt, struct nfsd_file *src, +nfsd4_cleanup_inter_ssc(struct vfsmount *ss_mnt, struct file *filp, struct nfsd_file *dst) { bool found = false; @@ -1563,9 +1563,9 @@ nfsd4_cleanup_inter_ssc(struct vfsmount *ss_mnt, struct nfsd_file *src, struct nfsd4_ssc_umount_item *ni = NULL; struct nfsd_net *nn = net_generic(dst->nf_net, nfsd_net_id); - nfs42_ssc_close(src->nf_file); + nfs42_ssc_close(filp); nfsd_file_put(dst); - fput(src->nf_file); + fput(filp); if (!nn) { mntput(ss_mnt); @@ -1608,7 +1608,7 @@ nfsd4_setup_inter_ssc(struct svc_rqst *rqstp, } static void -nfsd4_cleanup_inter_ssc(struct vfsmount *ss_mnt, struct nfsd_file *src, +nfsd4_cleanup_inter_ssc(struct vfsmount *ss_mnt, struct file *filp, struct nfsd_file *dst) { } @@ -1726,7 +1726,7 @@ static __be32 nfsd4_do_copy(struct nfsd4_copy *copy, bool sync) } if (nfsd4_ssc_is_inter(copy)) - nfsd4_cleanup_inter_ssc(copy->ss_mnt, copy->nf_src, + nfsd4_cleanup_inter_ssc(copy->ss_mnt, copy->nf_src->nf_file, copy->nf_dst); else nfsd4_cleanup_intra_ssc(copy->nf_src, copy->nf_dst); From 478ed7b10d875da2743d1a22822b9f8a82df8f12 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 27 Jul 2022 14:40:53 -0400 Subject: [PATCH 60/78] NFSD: Refactor nfsd4_cleanup_inter_ssc() (2/2) Move the nfsd4_cleanup_*() call sites out of nfsd4_do_copy(). A subsequent patch will modify one of the new call sites to avoid the need to manufacture the phony struct nfsd_file. Signed-off-by: Chuck Lever --- fs/nfsd/nfs4proc.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index cb9274f24c7d..8f85365435df 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1724,13 +1724,6 @@ static __be32 nfsd4_do_copy(struct nfsd4_copy *copy, bool sync) nfsd4_init_copy_res(copy, sync); status = nfs_ok; } - - if (nfsd4_ssc_is_inter(copy)) - nfsd4_cleanup_inter_ssc(copy->ss_mnt, copy->nf_src->nf_file, - copy->nf_dst); - else - nfsd4_cleanup_intra_ssc(copy->nf_src, copy->nf_dst); - return status; } @@ -1786,9 +1779,14 @@ static int nfsd4_do_async_copy(void *data) nfsd4_interssc_disconnect(copy->ss_mnt); goto do_callback; } + copy->nfserr = nfsd4_do_copy(copy, 0); + nfsd4_cleanup_inter_ssc(copy->ss_mnt, copy->nf_src->nf_file, + copy->nf_dst); + } else { + copy->nfserr = nfsd4_do_copy(copy, 0); + nfsd4_cleanup_intra_ssc(copy->nf_src, copy->nf_dst); } - copy->nfserr = nfsd4_do_copy(copy, 0); do_callback: cb_copy = kzalloc(sizeof(struct nfsd4_copy), GFP_KERNEL); if (!cb_copy) @@ -1864,6 +1862,7 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, status = nfs_ok; } else { status = nfsd4_do_copy(copy, 1); + nfsd4_cleanup_intra_ssc(copy->nf_src, copy->nf_dst); } out: return status; From 3b7bf5933cada732783554edf0dc61283551c6cf Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 27 Jul 2022 14:40:59 -0400 Subject: [PATCH 61/78] NFSD: Refactor nfsd4_do_copy() Refactor: Now that nfsd4_do_copy() no longer calls the cleanup helpers, plumb the use of struct file pointers all the way down to _nfsd_copy_file_range(). Signed-off-by: Chuck Lever --- fs/nfsd/nfs4proc.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 8f85365435df..328e11d95696 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1670,10 +1670,10 @@ static void nfsd4_init_copy_res(struct nfsd4_copy *copy, bool sync) gen_boot_verifier(©->cp_res.wr_verifier, copy->cp_clp->net); } -static ssize_t _nfsd_copy_file_range(struct nfsd4_copy *copy) +static ssize_t _nfsd_copy_file_range(struct nfsd4_copy *copy, + struct file *dst, + struct file *src) { - struct file *dst = copy->nf_dst->nf_file; - struct file *src = copy->nf_src->nf_file; errseq_t since; ssize_t bytes_copied = 0; u64 bytes_total = copy->cp_count; @@ -1709,12 +1709,15 @@ static ssize_t _nfsd_copy_file_range(struct nfsd4_copy *copy) return bytes_copied; } -static __be32 nfsd4_do_copy(struct nfsd4_copy *copy, bool sync) +static __be32 nfsd4_do_copy(struct nfsd4_copy *copy, + struct file *src, struct file *dst, + bool sync) { __be32 status; ssize_t bytes; - bytes = _nfsd_copy_file_range(copy); + bytes = _nfsd_copy_file_range(copy, dst, src); + /* for async copy, we ignore the error, client can always retry * to get the error */ @@ -1779,11 +1782,13 @@ static int nfsd4_do_async_copy(void *data) nfsd4_interssc_disconnect(copy->ss_mnt); goto do_callback; } - copy->nfserr = nfsd4_do_copy(copy, 0); + copy->nfserr = nfsd4_do_copy(copy, copy->nf_src->nf_file, + copy->nf_dst->nf_file, false); nfsd4_cleanup_inter_ssc(copy->ss_mnt, copy->nf_src->nf_file, copy->nf_dst); } else { - copy->nfserr = nfsd4_do_copy(copy, 0); + copy->nfserr = nfsd4_do_copy(copy, copy->nf_src->nf_file, + copy->nf_dst->nf_file, false); nfsd4_cleanup_intra_ssc(copy->nf_src, copy->nf_dst); } @@ -1861,7 +1866,8 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, wake_up_process(async_copy->copy_task); status = nfs_ok; } else { - status = nfsd4_do_copy(copy, 1); + status = nfsd4_do_copy(copy, copy->nf_src->nf_file, + copy->nf_dst->nf_file, true); nfsd4_cleanup_intra_ssc(copy->nf_src, copy->nf_dst); } out: From ad1e46c9b07b13659635ee5405f83ad0df143116 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 27 Jul 2022 14:41:06 -0400 Subject: [PATCH 62/78] NFSD: Remove kmalloc from nfsd4_do_async_copy() Instead of manufacturing a phony struct nfsd_file, pass the struct file returned by nfs42_ssc_open() directly to nfsd4_do_copy(). Signed-off-by: Chuck Lever --- fs/nfsd/nfs4proc.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 328e11d95696..cbdbe1e9de89 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1763,29 +1763,31 @@ static void cleanup_async_copy(struct nfsd4_copy *copy) nfs4_put_copy(copy); } +/** + * nfsd4_do_async_copy - kthread function for background server-side COPY + * @data: arguments for COPY operation + * + * Return values: + * %0: Copy operation is done. + */ static int nfsd4_do_async_copy(void *data) { struct nfsd4_copy *copy = (struct nfsd4_copy *)data; struct nfsd4_copy *cb_copy; if (nfsd4_ssc_is_inter(copy)) { - copy->nf_src = kzalloc(sizeof(struct nfsd_file), GFP_KERNEL); - if (!copy->nf_src) { - copy->nfserr = nfserr_serverfault; - nfsd4_interssc_disconnect(copy->ss_mnt); - goto do_callback; - } - copy->nf_src->nf_file = nfs42_ssc_open(copy->ss_mnt, ©->c_fh, - ©->stateid); - if (IS_ERR(copy->nf_src->nf_file)) { + struct file *filp; + + filp = nfs42_ssc_open(copy->ss_mnt, ©->c_fh, + ©->stateid); + if (IS_ERR(filp)) { copy->nfserr = nfserr_offload_denied; nfsd4_interssc_disconnect(copy->ss_mnt); goto do_callback; } - copy->nfserr = nfsd4_do_copy(copy, copy->nf_src->nf_file, + copy->nfserr = nfsd4_do_copy(copy, filp, copy->nf_dst->nf_file, false); - nfsd4_cleanup_inter_ssc(copy->ss_mnt, copy->nf_src->nf_file, - copy->nf_dst); + nfsd4_cleanup_inter_ssc(copy->ss_mnt, filp, copy->nf_dst); } else { copy->nfserr = nfsd4_do_copy(copy, copy->nf_src->nf_file, copy->nf_dst->nf_file, false); @@ -1807,8 +1809,6 @@ do_callback: ©->fh, copy->cp_count, copy->nfserr); nfsd4_run_cb(&cb_copy->cp_cb); out: - if (nfsd4_ssc_is_inter(copy)) - kfree(copy->nf_src); cleanup_async_copy(copy); return 0; } From e72f9bc006c08841c46d27747a4debc747a8fe13 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 27 Jul 2022 14:41:12 -0400 Subject: [PATCH 63/78] NFSD: Add nfsd4_send_cb_offload() Refactor for legibility. Signed-off-by: Chuck Lever --- fs/nfsd/nfs4proc.c | 37 ++++++++++++++++++++++--------------- 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index cbdbe1e9de89..baea3d39d5ce 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1763,6 +1763,27 @@ static void cleanup_async_copy(struct nfsd4_copy *copy) nfs4_put_copy(copy); } +static void nfsd4_send_cb_offload(struct nfsd4_copy *copy) +{ + struct nfsd4_copy *cb_copy; + + cb_copy = kzalloc(sizeof(struct nfsd4_copy), GFP_KERNEL); + if (!cb_copy) + return; + + refcount_set(&cb_copy->refcount, 1); + memcpy(&cb_copy->cp_res, ©->cp_res, sizeof(copy->cp_res)); + cb_copy->cp_clp = copy->cp_clp; + cb_copy->nfserr = copy->nfserr; + memcpy(&cb_copy->fh, ©->fh, sizeof(copy->fh)); + + nfsd4_init_cb(&cb_copy->cp_cb, cb_copy->cp_clp, + &nfsd4_cb_offload_ops, NFSPROC4_CLNT_CB_OFFLOAD); + trace_nfsd_cb_offload(copy->cp_clp, ©->cp_res.cb_stateid, + ©->fh, copy->cp_count, copy->nfserr); + nfsd4_run_cb(&cb_copy->cp_cb); +} + /** * nfsd4_do_async_copy - kthread function for background server-side COPY * @data: arguments for COPY operation @@ -1773,7 +1794,6 @@ static void cleanup_async_copy(struct nfsd4_copy *copy) static int nfsd4_do_async_copy(void *data) { struct nfsd4_copy *copy = (struct nfsd4_copy *)data; - struct nfsd4_copy *cb_copy; if (nfsd4_ssc_is_inter(copy)) { struct file *filp; @@ -1795,20 +1815,7 @@ static int nfsd4_do_async_copy(void *data) } do_callback: - cb_copy = kzalloc(sizeof(struct nfsd4_copy), GFP_KERNEL); - if (!cb_copy) - goto out; - refcount_set(&cb_copy->refcount, 1); - memcpy(&cb_copy->cp_res, ©->cp_res, sizeof(copy->cp_res)); - cb_copy->cp_clp = copy->cp_clp; - cb_copy->nfserr = copy->nfserr; - memcpy(&cb_copy->fh, ©->fh, sizeof(copy->fh)); - nfsd4_init_cb(&cb_copy->cp_cb, cb_copy->cp_clp, - &nfsd4_cb_offload_ops, NFSPROC4_CLNT_CB_OFFLOAD); - trace_nfsd_cb_offload(copy->cp_clp, ©->cp_res.cb_stateid, - ©->fh, copy->cp_count, copy->nfserr); - nfsd4_run_cb(&cb_copy->cp_cb); -out: + nfsd4_send_cb_offload(copy); cleanup_async_copy(copy); return 0; } From a11ada99ce93a79393dc6683d22f7915748c8f6b Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 27 Jul 2022 14:41:18 -0400 Subject: [PATCH 64/78] NFSD: Move copy offload callback arguments into a separate structure Refactor so that CB_OFFLOAD arguments can be passed without allocating a whole struct nfsd4_copy object. On my system (x86_64) this removes another 96 bytes from struct nfsd4_copy. Signed-off-by: Chuck Lever --- fs/nfsd/nfs4callback.c | 37 +++++++++++++++++------------------ fs/nfsd/nfs4proc.c | 44 +++++++++++++++++++++--------------------- fs/nfsd/xdr4.h | 11 +++++++---- 3 files changed, 47 insertions(+), 45 deletions(-) diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 11f8715d92d6..4ce328209f61 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -679,7 +679,7 @@ static int nfs4_xdr_dec_cb_notify_lock(struct rpc_rqst *rqstp, * case NFS4_OK: * write_response4 coa_resok4; * default: - * length4 coa_bytes_copied; + * length4 coa_bytes_copied; * }; * struct CB_OFFLOAD4args { * nfs_fh4 coa_fh; @@ -688,21 +688,22 @@ static int nfs4_xdr_dec_cb_notify_lock(struct rpc_rqst *rqstp, * }; */ static void encode_offload_info4(struct xdr_stream *xdr, - __be32 nfserr, - const struct nfsd4_copy *cp) + const struct nfsd4_cb_offload *cbo) { __be32 *p; p = xdr_reserve_space(xdr, 4); - *p++ = nfserr; - if (!nfserr) { + *p = cbo->co_nfserr; + switch (cbo->co_nfserr) { + case nfs_ok: p = xdr_reserve_space(xdr, 4 + 8 + 4 + NFS4_VERIFIER_SIZE); p = xdr_encode_empty_array(p); - p = xdr_encode_hyper(p, cp->cp_res.wr_bytes_written); - *p++ = cpu_to_be32(cp->cp_res.wr_stable_how); - p = xdr_encode_opaque_fixed(p, cp->cp_res.wr_verifier.data, + p = xdr_encode_hyper(p, cbo->co_res.wr_bytes_written); + *p++ = cpu_to_be32(cbo->co_res.wr_stable_how); + p = xdr_encode_opaque_fixed(p, cbo->co_res.wr_verifier.data, NFS4_VERIFIER_SIZE); - } else { + break; + default: p = xdr_reserve_space(xdr, 8); /* We always return success if bytes were written */ p = xdr_encode_hyper(p, 0); @@ -710,18 +711,16 @@ static void encode_offload_info4(struct xdr_stream *xdr, } static void encode_cb_offload4args(struct xdr_stream *xdr, - __be32 nfserr, - const struct knfsd_fh *fh, - const struct nfsd4_copy *cp, + const struct nfsd4_cb_offload *cbo, struct nfs4_cb_compound_hdr *hdr) { __be32 *p; p = xdr_reserve_space(xdr, 4); - *p++ = cpu_to_be32(OP_CB_OFFLOAD); - encode_nfs_fh4(xdr, fh); - encode_stateid4(xdr, &cp->cp_res.cb_stateid); - encode_offload_info4(xdr, nfserr, cp); + *p = cpu_to_be32(OP_CB_OFFLOAD); + encode_nfs_fh4(xdr, &cbo->co_fh); + encode_stateid4(xdr, &cbo->co_res.cb_stateid); + encode_offload_info4(xdr, cbo); hdr->nops++; } @@ -731,8 +730,8 @@ static void nfs4_xdr_enc_cb_offload(struct rpc_rqst *req, const void *data) { const struct nfsd4_callback *cb = data; - const struct nfsd4_copy *cp = - container_of(cb, struct nfsd4_copy, cp_cb); + const struct nfsd4_cb_offload *cbo = + container_of(cb, struct nfsd4_cb_offload, co_cb); struct nfs4_cb_compound_hdr hdr = { .ident = 0, .minorversion = cb->cb_clp->cl_minorversion, @@ -740,7 +739,7 @@ static void nfs4_xdr_enc_cb_offload(struct rpc_rqst *req, encode_cb_compound4args(xdr, &hdr); encode_cb_sequence4args(xdr, cb, &hdr); - encode_cb_offload4args(xdr, cp->nfserr, &cp->fh, cp, &hdr); + encode_cb_offload4args(xdr, cbo, &hdr); encode_cb_nops(&hdr); } diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index baea3d39d5ce..ddc747e92e7a 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1645,9 +1645,10 @@ nfsd4_cleanup_intra_ssc(struct nfsd_file *src, struct nfsd_file *dst) static void nfsd4_cb_offload_release(struct nfsd4_callback *cb) { - struct nfsd4_copy *copy = container_of(cb, struct nfsd4_copy, cp_cb); + struct nfsd4_cb_offload *cbo = + container_of(cb, struct nfsd4_cb_offload, co_cb); - nfs4_put_copy(copy); + kfree(cbo); } static int nfsd4_cb_offload_done(struct nfsd4_callback *cb, @@ -1763,25 +1764,23 @@ static void cleanup_async_copy(struct nfsd4_copy *copy) nfs4_put_copy(copy); } -static void nfsd4_send_cb_offload(struct nfsd4_copy *copy) +static void nfsd4_send_cb_offload(struct nfsd4_copy *copy, __be32 nfserr) { - struct nfsd4_copy *cb_copy; + struct nfsd4_cb_offload *cbo; - cb_copy = kzalloc(sizeof(struct nfsd4_copy), GFP_KERNEL); - if (!cb_copy) + cbo = kzalloc(sizeof(*cbo), GFP_KERNEL); + if (!cbo) return; - refcount_set(&cb_copy->refcount, 1); - memcpy(&cb_copy->cp_res, ©->cp_res, sizeof(copy->cp_res)); - cb_copy->cp_clp = copy->cp_clp; - cb_copy->nfserr = copy->nfserr; - memcpy(&cb_copy->fh, ©->fh, sizeof(copy->fh)); + memcpy(&cbo->co_res, ©->cp_res, sizeof(copy->cp_res)); + memcpy(&cbo->co_fh, ©->fh, sizeof(copy->fh)); + cbo->co_nfserr = nfserr; - nfsd4_init_cb(&cb_copy->cp_cb, cb_copy->cp_clp, - &nfsd4_cb_offload_ops, NFSPROC4_CLNT_CB_OFFLOAD); - trace_nfsd_cb_offload(copy->cp_clp, ©->cp_res.cb_stateid, - ©->fh, copy->cp_count, copy->nfserr); - nfsd4_run_cb(&cb_copy->cp_cb); + nfsd4_init_cb(&cbo->co_cb, copy->cp_clp, &nfsd4_cb_offload_ops, + NFSPROC4_CLNT_CB_OFFLOAD); + trace_nfsd_cb_offload(copy->cp_clp, &cbo->co_res.cb_stateid, + &cbo->co_fh, copy->cp_count, nfserr); + nfsd4_run_cb(&cbo->co_cb); } /** @@ -1794,6 +1793,7 @@ static void nfsd4_send_cb_offload(struct nfsd4_copy *copy) static int nfsd4_do_async_copy(void *data) { struct nfsd4_copy *copy = (struct nfsd4_copy *)data; + __be32 nfserr; if (nfsd4_ssc_is_inter(copy)) { struct file *filp; @@ -1801,21 +1801,21 @@ static int nfsd4_do_async_copy(void *data) filp = nfs42_ssc_open(copy->ss_mnt, ©->c_fh, ©->stateid); if (IS_ERR(filp)) { - copy->nfserr = nfserr_offload_denied; + nfserr = nfserr_offload_denied; nfsd4_interssc_disconnect(copy->ss_mnt); goto do_callback; } - copy->nfserr = nfsd4_do_copy(copy, filp, - copy->nf_dst->nf_file, false); + nfserr = nfsd4_do_copy(copy, filp, copy->nf_dst->nf_file, + false); nfsd4_cleanup_inter_ssc(copy->ss_mnt, filp, copy->nf_dst); } else { - copy->nfserr = nfsd4_do_copy(copy, copy->nf_src->nf_file, - copy->nf_dst->nf_file, false); + nfserr = nfsd4_do_copy(copy, copy->nf_src->nf_file, + copy->nf_dst->nf_file, false); nfsd4_cleanup_intra_ssc(copy->nf_src, copy->nf_dst); } do_callback: - nfsd4_send_cb_offload(copy); + nfsd4_send_cb_offload(copy, nfserr); cleanup_async_copy(copy); return 0; } diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index eb09bf5aa70e..8c8d7b503096 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -533,6 +533,13 @@ struct nfsd42_write_res { stateid_t cb_stateid; }; +struct nfsd4_cb_offload { + struct nfsd4_callback co_cb; + struct nfsd42_write_res co_res; + __be32 co_nfserr; + struct knfsd_fh co_fh; +}; + struct nfsd4_copy { /* request */ stateid_t cp_src_stateid; @@ -550,10 +557,6 @@ struct nfsd4_copy { /* response */ struct nfsd42_write_res cp_res; - - /* for cb_offload */ - struct nfsd4_callback cp_cb; - __be32 nfserr; struct knfsd_fh fh; struct nfs4_client *cp_clp; From bbf936edd543e7220f60f9cbd6933b916550396d Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 26 Jul 2022 16:45:30 +1000 Subject: [PATCH 65/78] NFSD: drop fh argument from alloc_init_deleg Currently, we pass the fh of the opened file down through several functions so that alloc_init_deleg can pass it to delegation_blocked. The filehandle of the open file is available in the nfs4_file however, so there's no need to pass it in a separate argument. Drop the argument from alloc_init_deleg, nfs4_open_delegation and nfs4_set_delegation. Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/nfs4state.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index e46e3392d557..279c7a1502c9 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1131,7 +1131,6 @@ static void block_delegations(struct knfsd_fh *fh) static struct nfs4_delegation * alloc_init_deleg(struct nfs4_client *clp, struct nfs4_file *fp, - struct svc_fh *current_fh, struct nfs4_clnt_odstate *odstate) { struct nfs4_delegation *dp; @@ -1141,7 +1140,7 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_file *fp, n = atomic_long_inc_return(&num_delegations); if (n < 0 || n > max_delegations) goto out_dec; - if (delegation_blocked(¤t_fh->fh_handle)) + if (delegation_blocked(&fp->fi_fhandle)) goto out_dec; dp = delegstateid(nfs4_alloc_stid(clp, deleg_slab, nfs4_free_deleg)); if (dp == NULL) @@ -5290,7 +5289,7 @@ static int nfsd4_check_conflicting_opens(struct nfs4_client *clp, } static struct nfs4_delegation * -nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh, +nfs4_set_delegation(struct nfs4_client *clp, struct nfs4_file *fp, struct nfs4_clnt_odstate *odstate) { int status = 0; @@ -5335,7 +5334,7 @@ nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh, return ERR_PTR(status); status = -ENOMEM; - dp = alloc_init_deleg(clp, fp, fh, odstate); + dp = alloc_init_deleg(clp, fp, odstate); if (!dp) goto out_delegees; @@ -5403,8 +5402,7 @@ static void nfsd4_open_deleg_none_ext(struct nfsd4_open *open, int status) * proper support for them. */ static void -nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, - struct nfs4_ol_stateid *stp) +nfs4_open_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp) { struct nfs4_delegation *dp; struct nfs4_openowner *oo = openowner(stp->st_stateowner); @@ -5436,7 +5434,7 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, default: goto out_no_deleg; } - dp = nfs4_set_delegation(clp, fh, stp->st_stid.sc_file, stp->st_clnt_odstate); + dp = nfs4_set_delegation(clp, stp->st_stid.sc_file, stp->st_clnt_odstate); if (IS_ERR(dp)) goto out_no_deleg; @@ -5568,7 +5566,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf * Attempt to hand out a delegation. No error return, because the * OPEN succeeds even if we fail. */ - nfs4_open_delegation(current_fh, open, stp); + nfs4_open_delegation(open, stp); nodeleg: status = nfs_ok; trace_nfsd_open(&stp->st_stid.sc_stateid); From 876c553cb41026cb6ad3cef970a35e5f69c42a25 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 26 Jul 2022 16:45:30 +1000 Subject: [PATCH 66/78] NFSD: verify the opened dentry after setting a delegation Between opening a file and setting a delegation on it, someone could rename or unlink the dentry. If this happens, we do not want to grant a delegation on the open. On a CLAIM_NULL open, we're opening by filename, and we may (in the non-create case) or may not (in the create case) be holding i_rwsem when attempting to set a delegation. The latter case allows a race. After getting a lease, redo the lookup of the file being opened and validate that the resulting dentry matches the one in the open file description. To properly redo the lookup we need an rqst pointer to pass to nfsd_lookup_dentry(), so make sure that is available. Signed-off-by: Jeff Layton Signed-off-by: NeilBrown Signed-off-by: Chuck Lever --- fs/nfsd/nfs4proc.c | 1 + fs/nfsd/nfs4state.c | 54 ++++++++++++++++++++++++++++++++++++++++----- fs/nfsd/xdr4.h | 1 + 3 files changed, 51 insertions(+), 5 deletions(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index ddc747e92e7a..e30866ee661a 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -547,6 +547,7 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, open->op_openowner); open->op_filp = NULL; + open->op_rqstp = rqstp; /* This check required by spec. */ if (open->op_create && open->op_claim_type != NFS4_OPEN_CLAIM_NULL) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 279c7a1502c9..8f64af3e38d8 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -5288,11 +5288,44 @@ static int nfsd4_check_conflicting_opens(struct nfs4_client *clp, return 0; } +/* + * It's possible that between opening the dentry and setting the delegation, + * that it has been renamed or unlinked. Redo the lookup to verify that this + * hasn't happened. + */ +static int +nfsd4_verify_deleg_dentry(struct nfsd4_open *open, struct nfs4_file *fp, + struct svc_fh *parent) +{ + struct svc_export *exp; + struct dentry *child; + __be32 err; + + /* parent may already be locked, and it may get unlocked by + * this call, but that is safe. + */ + err = nfsd_lookup_dentry(open->op_rqstp, parent, + open->op_fname, open->op_fnamelen, + &exp, &child); + + if (err) + return -EAGAIN; + + dput(child); + if (child != file_dentry(fp->fi_deleg_file->nf_file)) + return -EAGAIN; + + return 0; +} + static struct nfs4_delegation * -nfs4_set_delegation(struct nfs4_client *clp, - struct nfs4_file *fp, struct nfs4_clnt_odstate *odstate) +nfs4_set_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp, + struct svc_fh *parent) { int status = 0; + struct nfs4_client *clp = stp->st_stid.sc_client; + struct nfs4_file *fp = stp->st_stid.sc_file; + struct nfs4_clnt_odstate *odstate = stp->st_clnt_odstate; struct nfs4_delegation *dp; struct nfsd_file *nf; struct file_lock *fl; @@ -5347,6 +5380,13 @@ nfs4_set_delegation(struct nfs4_client *clp, locks_free_lock(fl); if (status) goto out_clnt_odstate; + + if (parent) { + status = nfsd4_verify_deleg_dentry(open, fp, parent); + if (status) + goto out_unlock; + } + status = nfsd4_check_conflicting_opens(clp, fp); if (status) goto out_unlock; @@ -5402,11 +5442,13 @@ static void nfsd4_open_deleg_none_ext(struct nfsd4_open *open, int status) * proper support for them. */ static void -nfs4_open_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp) +nfs4_open_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp, + struct svc_fh *currentfh) { struct nfs4_delegation *dp; struct nfs4_openowner *oo = openowner(stp->st_stateowner); struct nfs4_client *clp = stp->st_stid.sc_client; + struct svc_fh *parent = NULL; int cb_up; int status = 0; @@ -5420,6 +5462,8 @@ nfs4_open_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp) goto out_no_deleg; break; case NFS4_OPEN_CLAIM_NULL: + parent = currentfh; + fallthrough; case NFS4_OPEN_CLAIM_FH: /* * Let's not give out any delegations till everyone's @@ -5434,7 +5478,7 @@ nfs4_open_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp) default: goto out_no_deleg; } - dp = nfs4_set_delegation(clp, stp->st_stid.sc_file, stp->st_clnt_odstate); + dp = nfs4_set_delegation(open, stp, parent); if (IS_ERR(dp)) goto out_no_deleg; @@ -5566,7 +5610,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf * Attempt to hand out a delegation. No error return, because the * OPEN succeeds even if we fail. */ - nfs4_open_delegation(open, stp); + nfs4_open_delegation(open, stp, &resp->cstate.current_fh); nodeleg: status = nfs_ok; trace_nfsd_open(&stp->st_stid.sc_stateid); diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index 8c8d7b503096..96267258e629 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -279,6 +279,7 @@ struct nfsd4_open { struct nfs4_clnt_odstate *op_odstate; /* used during processing */ struct nfs4_acl *op_acl; struct xdr_netobj op_label; + struct svc_rqst *op_rqstp; }; struct nfsd4_open_confirm { From 7fe2a71dda349a1afa75781f0cc7975be9784d15 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 26 Jul 2022 16:45:30 +1000 Subject: [PATCH 67/78] NFSD: introduce struct nfsd_attrs The attributes that nfsd might want to set on a file include 'struct iattr' as well as an ACL and security label. The latter two are passed around quite separately from the first, in part because they are only needed for NFSv4. This leads to some clumsiness in the code, such as the attributes NOT being set in nfsd_create_setattr(). We need to keep the directory locked until all attributes are set to ensure the file is never visibile without all its attributes. This need combined with the inconsistent handling of attributes leads to more clumsiness. As a first step towards tidying this up, introduce 'struct nfsd_attrs'. This is passed (by reference) to vfs.c functions that work with attributes, and is assembled by the various nfs*proc functions which call them. As yet only iattr is included, but future patches will expand this. Signed-off-by: NeilBrown Signed-off-by: Chuck Lever --- fs/nfsd/nfs3proc.c | 20 ++++++++++++++++---- fs/nfsd/nfs4proc.c | 23 ++++++++++++++++------- fs/nfsd/nfs4state.c | 5 ++++- fs/nfsd/nfsproc.c | 17 +++++++++++++---- fs/nfsd/vfs.c | 24 ++++++++++++++---------- fs/nfsd/vfs.h | 12 ++++++++---- 6 files changed, 71 insertions(+), 30 deletions(-) diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index 981a3a7a6e16..aca454449d3a 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c @@ -67,12 +67,15 @@ nfsd3_proc_setattr(struct svc_rqst *rqstp) { struct nfsd3_sattrargs *argp = rqstp->rq_argp; struct nfsd3_attrstat *resp = rqstp->rq_resp; + struct nfsd_attrs attrs = { + .na_iattr = &argp->attrs, + }; dprintk("nfsd: SETATTR(3) %s\n", SVCFH_fmt(&argp->fh)); fh_copy(&resp->fh, &argp->fh); - resp->status = nfsd_setattr(rqstp, &resp->fh, &argp->attrs, + resp->status = nfsd_setattr(rqstp, &resp->fh, &attrs, argp->check_guard, argp->guardtime); return rpc_success; } @@ -233,6 +236,9 @@ nfsd3_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp, { struct iattr *iap = &argp->attrs; struct dentry *parent, *child; + struct nfsd_attrs attrs = { + .na_iattr = iap, + }; __u32 v_mtime, v_atime; struct inode *inode; __be32 status; @@ -331,7 +337,7 @@ nfsd3_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp, } set_attr: - status = nfsd_create_setattr(rqstp, fhp, resfhp, iap); + status = nfsd_create_setattr(rqstp, fhp, resfhp, &attrs); out: fh_unlock(fhp); @@ -368,6 +374,9 @@ nfsd3_proc_mkdir(struct svc_rqst *rqstp) { struct nfsd3_createargs *argp = rqstp->rq_argp; struct nfsd3_diropres *resp = rqstp->rq_resp; + struct nfsd_attrs attrs = { + .na_iattr = &argp->attrs, + }; dprintk("nfsd: MKDIR(3) %s %.*s\n", SVCFH_fmt(&argp->fh), @@ -378,7 +387,7 @@ nfsd3_proc_mkdir(struct svc_rqst *rqstp) fh_copy(&resp->dirfh, &argp->fh); fh_init(&resp->fh, NFS3_FHSIZE); resp->status = nfsd_create(rqstp, &resp->dirfh, argp->name, argp->len, - &argp->attrs, S_IFDIR, 0, &resp->fh); + &attrs, S_IFDIR, 0, &resp->fh); fh_unlock(&resp->dirfh); return rpc_success; } @@ -428,6 +437,9 @@ nfsd3_proc_mknod(struct svc_rqst *rqstp) { struct nfsd3_mknodargs *argp = rqstp->rq_argp; struct nfsd3_diropres *resp = rqstp->rq_resp; + struct nfsd_attrs attrs = { + .na_iattr = &argp->attrs, + }; int type; dev_t rdev = 0; @@ -453,7 +465,7 @@ nfsd3_proc_mknod(struct svc_rqst *rqstp) type = nfs3_ftypes[argp->ftype]; resp->status = nfsd_create(rqstp, &resp->dirfh, argp->name, argp->len, - &argp->attrs, type, rdev, &resp->fh); + &attrs, type, rdev, &resp->fh); fh_unlock(&resp->dirfh); out: return rpc_success; diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index e30866ee661a..0d6ee19ffb15 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -286,6 +286,9 @@ nfsd4_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp, struct svc_fh *resfhp, struct nfsd4_open *open) { struct iattr *iap = &open->op_iattr; + struct nfsd_attrs attrs = { + .na_iattr = iap, + }; struct dentry *parent, *child; __u32 v_mtime, v_atime; struct inode *inode; @@ -404,7 +407,7 @@ nfsd4_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp, } set_attr: - status = nfsd_create_setattr(rqstp, fhp, resfhp, iap); + status = nfsd_create_setattr(rqstp, fhp, resfhp, &attrs); out: fh_unlock(fhp); @@ -787,6 +790,9 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) { struct nfsd4_create *create = &u->create; + struct nfsd_attrs attrs = { + .na_iattr = &create->cr_iattr, + }; struct svc_fh resfh; __be32 status; dev_t rdev; @@ -818,7 +824,7 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, goto out_umask; status = nfsd_create(rqstp, &cstate->current_fh, create->cr_name, create->cr_namelen, - &create->cr_iattr, S_IFBLK, rdev, &resfh); + &attrs, S_IFBLK, rdev, &resfh); break; case NF4CHR: @@ -829,26 +835,26 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, goto out_umask; status = nfsd_create(rqstp, &cstate->current_fh, create->cr_name, create->cr_namelen, - &create->cr_iattr, S_IFCHR, rdev, &resfh); + &attrs, S_IFCHR, rdev, &resfh); break; case NF4SOCK: status = nfsd_create(rqstp, &cstate->current_fh, create->cr_name, create->cr_namelen, - &create->cr_iattr, S_IFSOCK, 0, &resfh); + &attrs, S_IFSOCK, 0, &resfh); break; case NF4FIFO: status = nfsd_create(rqstp, &cstate->current_fh, create->cr_name, create->cr_namelen, - &create->cr_iattr, S_IFIFO, 0, &resfh); + &attrs, S_IFIFO, 0, &resfh); break; case NF4DIR: create->cr_iattr.ia_valid &= ~ATTR_SIZE; status = nfsd_create(rqstp, &cstate->current_fh, create->cr_name, create->cr_namelen, - &create->cr_iattr, S_IFDIR, 0, &resfh); + &attrs, S_IFDIR, 0, &resfh); break; default: @@ -1142,6 +1148,9 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) { struct nfsd4_setattr *setattr = &u->setattr; + struct nfsd_attrs attrs = { + .na_iattr = &setattr->sa_iattr, + }; __be32 status = nfs_ok; int err; @@ -1174,7 +1183,7 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, &setattr->sa_label); if (status) goto out; - status = nfsd_setattr(rqstp, &cstate->current_fh, &setattr->sa_iattr, + status = nfsd_setattr(rqstp, &cstate->current_fh, &attrs, 0, (time64_t)0); out: fh_drop_write(&cstate->current_fh); diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 8f64af3e38d8..3bbafd67f50b 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -5060,11 +5060,14 @@ nfsd4_truncate(struct svc_rqst *rqstp, struct svc_fh *fh, .ia_valid = ATTR_SIZE, .ia_size = 0, }; + struct nfsd_attrs attrs = { + .na_iattr = &iattr, + }; if (!open->op_truncate) return 0; if (!(open->op_share_access & NFS4_SHARE_ACCESS_WRITE)) return nfserr_inval; - return nfsd_setattr(rqstp, fh, &iattr, 0, (time64_t)0); + return nfsd_setattr(rqstp, fh, &attrs, 0, (time64_t)0); } static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp, diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index fcdab8a8a41f..fffd56809b5e 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -51,6 +51,9 @@ nfsd_proc_setattr(struct svc_rqst *rqstp) struct nfsd_sattrargs *argp = rqstp->rq_argp; struct nfsd_attrstat *resp = rqstp->rq_resp; struct iattr *iap = &argp->attrs; + struct nfsd_attrs attrs = { + .na_iattr = iap, + }; struct svc_fh *fhp; dprintk("nfsd: SETATTR %s, valid=%x, size=%ld\n", @@ -100,7 +103,7 @@ nfsd_proc_setattr(struct svc_rqst *rqstp) } } - resp->status = nfsd_setattr(rqstp, fhp, iap, 0, (time64_t)0); + resp->status = nfsd_setattr(rqstp, fhp, &attrs, 0, (time64_t)0); if (resp->status != nfs_ok) goto out; @@ -260,6 +263,9 @@ nfsd_proc_create(struct svc_rqst *rqstp) svc_fh *dirfhp = &argp->fh; svc_fh *newfhp = &resp->fh; struct iattr *attr = &argp->attrs; + struct nfsd_attrs attrs = { + .na_iattr = attr, + }; struct inode *inode; struct dentry *dchild; int type, mode; @@ -385,7 +391,7 @@ nfsd_proc_create(struct svc_rqst *rqstp) if (!inode) { /* File doesn't exist. Create it and set attrs */ resp->status = nfsd_create_locked(rqstp, dirfhp, argp->name, - argp->len, attr, type, rdev, + argp->len, &attrs, type, rdev, newfhp); } else if (type == S_IFREG) { dprintk("nfsd: existing %s, valid=%x, size=%ld\n", @@ -396,7 +402,7 @@ nfsd_proc_create(struct svc_rqst *rqstp) */ attr->ia_valid &= ATTR_SIZE; if (attr->ia_valid) - resp->status = nfsd_setattr(rqstp, newfhp, attr, 0, + resp->status = nfsd_setattr(rqstp, newfhp, &attrs, 0, (time64_t)0); } @@ -511,6 +517,9 @@ nfsd_proc_mkdir(struct svc_rqst *rqstp) { struct nfsd_createargs *argp = rqstp->rq_argp; struct nfsd_diropres *resp = rqstp->rq_resp; + struct nfsd_attrs attrs = { + .na_iattr = &argp->attrs, + }; dprintk("nfsd: MKDIR %s %.*s\n", SVCFH_fmt(&argp->fh), argp->len, argp->name); @@ -522,7 +531,7 @@ nfsd_proc_mkdir(struct svc_rqst *rqstp) argp->attrs.ia_valid &= ~ATTR_SIZE; fh_init(&resp->fh, NFS_FHSIZE); resp->status = nfsd_create(rqstp, &argp->fh, argp->name, argp->len, - &argp->attrs, S_IFDIR, 0, &resp->fh); + &attrs, S_IFDIR, 0, &resp->fh); fh_put(&argp->fh); if (resp->status != nfs_ok) goto out; diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index d79db56475d4..ad613e3a9301 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -349,11 +349,13 @@ nfsd_get_write_access(struct svc_rqst *rqstp, struct svc_fh *fhp, * Set various file attributes. After this call fhp needs an fh_put. */ __be32 -nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, +nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, + struct nfsd_attrs *attr, int check_guard, time64_t guardtime) { struct dentry *dentry; struct inode *inode; + struct iattr *iap = attr->na_iattr; int accmode = NFSD_MAY_SATTR; umode_t ftype = 0; __be32 err; @@ -1202,14 +1204,15 @@ out: * @rqstp: RPC transaction being executed * @fhp: NFS filehandle of parent directory * @resfhp: NFS filehandle of new object - * @iap: requested attributes of new object + * @attrs: requested attributes of new object * * Returns nfs_ok on success, or an nfsstat in network byte order. */ __be32 nfsd_create_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, - struct svc_fh *resfhp, struct iattr *iap) + struct svc_fh *resfhp, struct nfsd_attrs *attrs) { + struct iattr *iap = attrs->na_iattr; __be32 status; /* @@ -1230,7 +1233,7 @@ nfsd_create_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, * if the attributes have not changed. */ if (iap->ia_valid) - status = nfsd_setattr(rqstp, resfhp, iap, 0, (time64_t)0); + status = nfsd_setattr(rqstp, resfhp, attrs, 0, (time64_t)0); else status = nfserrno(commit_metadata(resfhp)); @@ -1269,11 +1272,12 @@ nfsd_check_ignore_resizing(struct iattr *iap) /* The parent directory should already be locked: */ __be32 nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp, - char *fname, int flen, struct iattr *iap, - int type, dev_t rdev, struct svc_fh *resfhp) + char *fname, int flen, struct nfsd_attrs *attrs, + int type, dev_t rdev, struct svc_fh *resfhp) { struct dentry *dentry, *dchild; struct inode *dirp; + struct iattr *iap = attrs->na_iattr; __be32 err; int host_err; @@ -1347,7 +1351,7 @@ nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp, if (host_err < 0) goto out_nfserr; - err = nfsd_create_setattr(rqstp, fhp, resfhp, iap); + err = nfsd_create_setattr(rqstp, fhp, resfhp, attrs); out: dput(dchild); @@ -1366,8 +1370,8 @@ out_nfserr: */ __be32 nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, - char *fname, int flen, struct iattr *iap, - int type, dev_t rdev, struct svc_fh *resfhp) + char *fname, int flen, struct nfsd_attrs *attrs, + int type, dev_t rdev, struct svc_fh *resfhp) { struct dentry *dentry, *dchild = NULL; __be32 err; @@ -1399,7 +1403,7 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, dput(dchild); if (err) return err; - return nfsd_create_locked(rqstp, fhp, fname, flen, iap, type, + return nfsd_create_locked(rqstp, fhp, fname, flen, attrs, type, rdev, resfhp); } diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h index 26347d76f44a..d8b1a36fca95 100644 --- a/fs/nfsd/vfs.h +++ b/fs/nfsd/vfs.h @@ -42,6 +42,10 @@ struct nfsd_file; typedef int (*nfsd_filldir_t)(void *, const char *, int, loff_t, u64, unsigned); /* nfsd/vfs.c */ +struct nfsd_attrs { + struct iattr *na_iattr; /* input */ +}; + int nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp, struct svc_export **expp); __be32 nfsd_lookup(struct svc_rqst *, struct svc_fh *, @@ -50,7 +54,7 @@ __be32 nfsd_lookup_dentry(struct svc_rqst *, struct svc_fh *, const char *, unsigned int, struct svc_export **, struct dentry **); __be32 nfsd_setattr(struct svc_rqst *, struct svc_fh *, - struct iattr *, int, time64_t); + struct nfsd_attrs *, int, time64_t); int nfsd_mountpoint(struct dentry *, struct svc_export *); #ifdef CONFIG_NFSD_V4 __be32 nfsd4_set_nfs4_label(struct svc_rqst *, struct svc_fh *, @@ -63,14 +67,14 @@ __be32 nfsd4_clone_file_range(struct svc_rqst *rqstp, u64 count, bool sync); #endif /* CONFIG_NFSD_V4 */ __be32 nfsd_create_locked(struct svc_rqst *, struct svc_fh *, - char *name, int len, struct iattr *attrs, + char *name, int len, struct nfsd_attrs *attrs, int type, dev_t rdev, struct svc_fh *res); __be32 nfsd_create(struct svc_rqst *, struct svc_fh *, - char *name, int len, struct iattr *attrs, + char *name, int len, struct nfsd_attrs *attrs, int type, dev_t rdev, struct svc_fh *res); __be32 nfsd_access(struct svc_rqst *, struct svc_fh *, u32 *, u32 *); __be32 nfsd_create_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, - struct svc_fh *resfhp, struct iattr *iap); + struct svc_fh *resfhp, struct nfsd_attrs *iap); __be32 nfsd_commit(struct svc_rqst *rqst, struct svc_fh *fhp, u64 offset, u32 count, __be32 *verf); #ifdef CONFIG_NFSD_V4 From 93adc1e391a761441d783828b93979b38093d011 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 26 Jul 2022 16:45:30 +1000 Subject: [PATCH 68/78] NFSD: set attributes when creating symlinks The NFS protocol includes attributes when creating symlinks. Linux does store attributes for symlinks and allows them to be set, though they are not used for permission checking. NFSD currently doesn't set standard (struct iattr) attributes when creating symlinks, but for NFSv4 it does set ACLs and security labels. This is inconsistent. To improve consistency, pass the provided attributes into nfsd_symlink() and call nfsd_create_setattr() to set them. NOTE: this results in a behaviour change for all NFS versions when the client sends non-default attributes with a SYMLINK request. With the Linux client, the only attributes are: attr.ia_mode = S_IFLNK | S_IRWXUGO; attr.ia_valid = ATTR_MODE; so the final outcome will be unchanged. Other clients might sent different attributes, and if they did they probably expect them to be honoured. We ignore any error from nfsd_create_setattr(). It isn't really clear what should be done if a file is successfully created, but the attributes cannot be set. NFS doesn't allow partial success to be reported. Reporting failure is probably more misleading than reporting success, so the status is ignored. Signed-off-by: NeilBrown Signed-off-by: Chuck Lever --- fs/nfsd/nfs3proc.c | 5 ++++- fs/nfsd/nfs4proc.c | 2 +- fs/nfsd/nfsproc.c | 5 ++++- fs/nfsd/vfs.c | 25 ++++++++++++++++++------- fs/nfsd/vfs.h | 5 +++-- 5 files changed, 30 insertions(+), 12 deletions(-) diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index aca454449d3a..0078e73009fa 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c @@ -397,6 +397,9 @@ nfsd3_proc_symlink(struct svc_rqst *rqstp) { struct nfsd3_symlinkargs *argp = rqstp->rq_argp; struct nfsd3_diropres *resp = rqstp->rq_resp; + struct nfsd_attrs attrs = { + .na_iattr = &argp->attrs, + }; if (argp->tlen == 0) { resp->status = nfserr_inval; @@ -423,7 +426,7 @@ nfsd3_proc_symlink(struct svc_rqst *rqstp) fh_copy(&resp->dirfh, &argp->ffh); fh_init(&resp->fh, NFS3_FHSIZE); resp->status = nfsd_symlink(rqstp, &resp->dirfh, argp->fname, - argp->flen, argp->tname, &resp->fh); + argp->flen, argp->tname, &attrs, &resp->fh); kfree(argp->tname); out: return rpc_success; diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 0d6ee19ffb15..5bc3cbfc0048 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -813,7 +813,7 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, case NF4LNK: status = nfsd_symlink(rqstp, &cstate->current_fh, create->cr_name, create->cr_namelen, - create->cr_data, &resfh); + create->cr_data, &attrs, &resfh); break; case NF4BLK: diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index fffd56809b5e..acaf58e8e86f 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -478,6 +478,9 @@ nfsd_proc_symlink(struct svc_rqst *rqstp) { struct nfsd_symlinkargs *argp = rqstp->rq_argp; struct nfsd_stat *resp = rqstp->rq_resp; + struct nfsd_attrs attrs = { + .na_iattr = &argp->attrs, + }; struct svc_fh newfh; if (argp->tlen > NFS_MAXPATHLEN) { @@ -499,7 +502,7 @@ nfsd_proc_symlink(struct svc_rqst *rqstp) fh_init(&newfh, NFS_FHSIZE); resp->status = nfsd_symlink(rqstp, &argp->ffh, argp->fname, argp->flen, - argp->tname, &newfh); + argp->tname, &attrs, &newfh); kfree(argp->tname); fh_put(&argp->ffh); diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index ad613e3a9301..b6f07e9c6f77 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1445,15 +1445,25 @@ nfsd_readlink(struct svc_rqst *rqstp, struct svc_fh *fhp, char *buf, int *lenp) return 0; } -/* - * Create a symlink and look up its inode +/** + * nfsd_symlink - Create a symlink and look up its inode + * @rqstp: RPC transaction being executed + * @fhp: NFS filehandle of parent directory + * @fname: filename of the new symlink + * @flen: length of @fname + * @path: content of the new symlink (NUL-terminated) + * @attrs: requested attributes of new object + * @resfhp: NFS filehandle of new object + * * N.B. After this call _both_ fhp and resfhp need an fh_put + * + * Returns nfs_ok on success, or an nfsstat in network byte order. */ __be32 nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp, - char *fname, int flen, - char *path, - struct svc_fh *resfhp) + char *fname, int flen, + char *path, struct nfsd_attrs *attrs, + struct svc_fh *resfhp) { struct dentry *dentry, *dnew; __be32 err, cerr; @@ -1483,13 +1493,14 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp, host_err = vfs_symlink(&init_user_ns, d_inode(dentry), dnew, path); err = nfserrno(host_err); + cerr = fh_compose(resfhp, fhp->fh_export, dnew, fhp); + if (!err) + nfsd_create_setattr(rqstp, fhp, resfhp, attrs); fh_unlock(fhp); if (!err) err = nfserrno(commit_metadata(fhp)); - fh_drop_write(fhp); - cerr = fh_compose(resfhp, fhp->fh_export, dnew, fhp); dput(dnew); if (err==0) err = cerr; out: diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h index d8b1a36fca95..5047cec4c423 100644 --- a/fs/nfsd/vfs.h +++ b/fs/nfsd/vfs.h @@ -114,8 +114,9 @@ __be32 nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, __be32 nfsd_readlink(struct svc_rqst *, struct svc_fh *, char *, int *); __be32 nfsd_symlink(struct svc_rqst *, struct svc_fh *, - char *name, int len, char *path, - struct svc_fh *res); + char *name, int len, char *path, + struct nfsd_attrs *attrs, + struct svc_fh *res); __be32 nfsd_link(struct svc_rqst *, struct svc_fh *, char *, int, struct svc_fh *); ssize_t nfsd_copy_file_range(struct file *, u64, From d6a97d3f589a3a46a16183e03f3774daee251317 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 26 Jul 2022 16:45:30 +1000 Subject: [PATCH 69/78] NFSD: add security label to struct nfsd_attrs nfsd_setattr() now sets a security label if provided, and nfsv4 provides it in the 'open' and 'create' paths and the 'setattr' path. If setting the label failed (including because the kernel doesn't support labels), an error field in 'struct nfsd_attrs' is set, and the caller can respond. The open/create callers clear FATTR4_WORD2_SECURITY_LABEL in the returned attr set in this case. The setattr caller returns the error. Signed-off-by: NeilBrown Signed-off-by: Chuck Lever --- fs/nfsd/nfs4proc.c | 49 +++++++++------------------------------------- fs/nfsd/vfs.c | 29 +++------------------------ fs/nfsd/vfs.h | 5 +++-- 3 files changed, 15 insertions(+), 68 deletions(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 5bc3cbfc0048..bc7ab271524d 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -64,36 +64,6 @@ MODULE_PARM_DESC(nfsd4_ssc_umount_timeout, "idle msecs before unmount export from source server"); #endif -#ifdef CONFIG_NFSD_V4_SECURITY_LABEL -#include - -static inline void -nfsd4_security_inode_setsecctx(struct svc_fh *resfh, struct xdr_netobj *label, u32 *bmval) -{ - struct inode *inode = d_inode(resfh->fh_dentry); - int status; - - inode_lock(inode); - status = security_inode_setsecctx(resfh->fh_dentry, - label->data, label->len); - inode_unlock(inode); - - if (status) - /* - * XXX: We should really fail the whole open, but we may - * already have created a new file, so it may be too - * late. For now this seems the least of evils: - */ - bmval[2] &= ~FATTR4_WORD2_SECURITY_LABEL; - - return; -} -#else -static inline void -nfsd4_security_inode_setsecctx(struct svc_fh *resfh, struct xdr_netobj *label, u32 *bmval) -{ } -#endif - #define NFSDDBG_FACILITY NFSDDBG_PROC static u32 nfsd_attrmask[] = { @@ -288,6 +258,7 @@ nfsd4_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap = &open->op_iattr; struct nfsd_attrs attrs = { .na_iattr = iap, + .na_seclabel = &open->op_label, }; struct dentry *parent, *child; __u32 v_mtime, v_atime; @@ -409,6 +380,8 @@ nfsd4_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp, set_attr: status = nfsd_create_setattr(rqstp, fhp, resfhp, &attrs); + if (attrs.na_labelerr) + open->op_bmval[2] &= ~FATTR4_WORD2_SECURITY_LABEL; out: fh_unlock(fhp); if (child && !IS_ERR(child)) @@ -450,9 +423,6 @@ do_open_lookup(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, stru status = nfsd4_create_file(rqstp, current_fh, *resfh, open); current->fs->umask = 0; - if (!status && open->op_label.len) - nfsd4_security_inode_setsecctx(*resfh, &open->op_label, open->op_bmval); - /* * Following rfc 3530 14.2.16, and rfc 5661 18.16.4 * use the returned bitmask to indicate which attributes @@ -792,6 +762,7 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_create *create = &u->create; struct nfsd_attrs attrs = { .na_iattr = &create->cr_iattr, + .na_seclabel = &create->cr_label, }; struct svc_fh resfh; __be32 status; @@ -864,8 +835,8 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (status) goto out; - if (create->cr_label.len) - nfsd4_security_inode_setsecctx(&resfh, &create->cr_label, create->cr_bmval); + if (attrs.na_labelerr) + create->cr_bmval[2] &= ~FATTR4_WORD2_SECURITY_LABEL; if (create->cr_acl != NULL) do_set_nfs4_acl(rqstp, &resfh, create->cr_acl, @@ -1150,6 +1121,7 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_setattr *setattr = &u->setattr; struct nfsd_attrs attrs = { .na_iattr = &setattr->sa_iattr, + .na_seclabel = &setattr->sa_label, }; __be32 status = nfs_ok; int err; @@ -1178,13 +1150,10 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, setattr->sa_acl); if (status) goto out; - if (setattr->sa_label.len) - status = nfsd4_set_nfs4_label(rqstp, &cstate->current_fh, - &setattr->sa_label); - if (status) - goto out; status = nfsd_setattr(rqstp, &cstate->current_fh, &attrs, 0, (time64_t)0); + if (!status) + status = nfserrno(attrs.na_labelerr); out: fh_drop_write(&cstate->current_fh); return status; diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index b6f07e9c6f77..e9e1d5cd6ffb 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -458,6 +458,9 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, host_err = notify_change(&init_user_ns, dentry, iap, NULL); out_unlock: + if (attr->na_seclabel && attr->na_seclabel->len) + attr->na_labelerr = security_inode_setsecctx(dentry, + attr->na_seclabel->data, attr->na_seclabel->len); fh_unlock(fhp); if (size_change) put_write_access(inode); @@ -496,32 +499,6 @@ int nfsd4_is_junction(struct dentry *dentry) return 0; return 1; } -#ifdef CONFIG_NFSD_V4_SECURITY_LABEL -__be32 nfsd4_set_nfs4_label(struct svc_rqst *rqstp, struct svc_fh *fhp, - struct xdr_netobj *label) -{ - __be32 error; - int host_error; - struct dentry *dentry; - - error = fh_verify(rqstp, fhp, 0 /* S_IFREG */, NFSD_MAY_SATTR); - if (error) - return error; - - dentry = fhp->fh_dentry; - - inode_lock(d_inode(dentry)); - host_error = security_inode_setsecctx(dentry, label->data, label->len); - inode_unlock(d_inode(dentry)); - return nfserrno(host_error); -} -#else -__be32 nfsd4_set_nfs4_label(struct svc_rqst *rqstp, struct svc_fh *fhp, - struct xdr_netobj *label) -{ - return nfserr_notsupp; -} -#endif static struct nfsd4_compound_state *nfsd4_get_cstate(struct svc_rqst *rqstp) { diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h index 5047cec4c423..d5d4cfe37c93 100644 --- a/fs/nfsd/vfs.h +++ b/fs/nfsd/vfs.h @@ -44,6 +44,9 @@ typedef int (*nfsd_filldir_t)(void *, const char *, int, loff_t, u64, unsigned); /* nfsd/vfs.c */ struct nfsd_attrs { struct iattr *na_iattr; /* input */ + struct xdr_netobj *na_seclabel; /* input */ + + int na_labelerr; /* output */ }; int nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp, @@ -57,8 +60,6 @@ __be32 nfsd_setattr(struct svc_rqst *, struct svc_fh *, struct nfsd_attrs *, int, time64_t); int nfsd_mountpoint(struct dentry *, struct svc_export *); #ifdef CONFIG_NFSD_V4 -__be32 nfsd4_set_nfs4_label(struct svc_rqst *, struct svc_fh *, - struct xdr_netobj *); __be32 nfsd4_vfs_fallocate(struct svc_rqst *, struct svc_fh *, struct file *, loff_t, loff_t, int); __be32 nfsd4_clone_file_range(struct svc_rqst *rqstp, From c0cbe70742f4a70893cd6e5f6b10b6e89b6db95b Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 26 Jul 2022 16:45:30 +1000 Subject: [PATCH 70/78] NFSD: add posix ACLs to struct nfsd_attrs pacl and dpacl pointers are added to struct nfsd_attrs, which requires that we have an nfsd_attrs_free() function to free them. Those nfsv4 functions that can set ACLs now set up these pointers based on the passed in NFSv4 ACL. nfsd_setattr() sets the acls as appropriate. Errors are handled as with security labels. Signed-off-by: NeilBrown Signed-off-by: Chuck Lever --- fs/nfsd/acl.h | 6 ++++-- fs/nfsd/nfs4acl.c | 46 +++++++--------------------------------------- fs/nfsd/nfs4proc.c | 46 ++++++++++++++++------------------------------ fs/nfsd/vfs.c | 9 +++++++++ fs/nfsd/vfs.h | 11 +++++++++++ 5 files changed, 47 insertions(+), 71 deletions(-) diff --git a/fs/nfsd/acl.h b/fs/nfsd/acl.h index ba14d2f4b64f..4b7324458a94 100644 --- a/fs/nfsd/acl.h +++ b/fs/nfsd/acl.h @@ -38,6 +38,8 @@ struct nfs4_acl; struct svc_fh; struct svc_rqst; +struct nfsd_attrs; +enum nfs_ftype4; int nfs4_acl_bytes(int entries); int nfs4_acl_get_whotype(char *, u32); @@ -45,7 +47,7 @@ __be32 nfs4_acl_write_who(struct xdr_stream *xdr, int who); int nfsd4_get_nfs4_acl(struct svc_rqst *rqstp, struct dentry *dentry, struct nfs4_acl **acl); -__be32 nfsd4_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp, - struct nfs4_acl *acl); +__be32 nfsd4_acl_to_attr(enum nfs_ftype4 type, struct nfs4_acl *acl, + struct nfsd_attrs *attr); #endif /* LINUX_NFS4_ACL_H */ diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c index eaa3a0cf38f1..bb8e2f6d7d03 100644 --- a/fs/nfsd/nfs4acl.c +++ b/fs/nfsd/nfs4acl.c @@ -751,58 +751,26 @@ out_estate: return ret; } -__be32 -nfsd4_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp, - struct nfs4_acl *acl) +__be32 nfsd4_acl_to_attr(enum nfs_ftype4 type, struct nfs4_acl *acl, + struct nfsd_attrs *attr) { - __be32 error; int host_error; - struct dentry *dentry; - struct inode *inode; - struct posix_acl *pacl = NULL, *dpacl = NULL; unsigned int flags = 0; - /* Get inode */ - error = fh_verify(rqstp, fhp, 0, NFSD_MAY_SATTR); - if (error) - return error; + if (!acl) + return nfs_ok; - dentry = fhp->fh_dentry; - inode = d_inode(dentry); - - if (S_ISDIR(inode->i_mode)) + if (type == NF4DIR) flags = NFS4_ACL_DIR; - host_error = nfs4_acl_nfsv4_to_posix(acl, &pacl, &dpacl, flags); + host_error = nfs4_acl_nfsv4_to_posix(acl, &attr->na_pacl, + &attr->na_dpacl, flags); if (host_error == -EINVAL) return nfserr_attrnotsupp; - if (host_error < 0) - goto out_nfserr; - - fh_lock(fhp); - - host_error = set_posix_acl(&init_user_ns, inode, ACL_TYPE_ACCESS, pacl); - if (host_error < 0) - goto out_drop_lock; - - if (S_ISDIR(inode->i_mode)) { - host_error = set_posix_acl(&init_user_ns, inode, - ACL_TYPE_DEFAULT, dpacl); - } - -out_drop_lock: - fh_unlock(fhp); - - posix_acl_release(pacl); - posix_acl_release(dpacl); -out_nfserr: - if (host_error == -EOPNOTSUPP) - return nfserr_attrnotsupp; else return nfserrno(host_error); } - static short ace2type(struct nfs4_ace *ace) { diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index bc7ab271524d..ade2ea29feb0 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -128,26 +128,6 @@ is_create_with_attrs(struct nfsd4_open *open) || open->op_createmode == NFS4_CREATE_EXCLUSIVE4_1); } -/* - * if error occurs when setting the acl, just clear the acl bit - * in the returned attr bitmap. - */ -static void -do_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp, - struct nfs4_acl *acl, u32 *bmval) -{ - __be32 status; - - status = nfsd4_set_nfs4_acl(rqstp, fhp, acl); - if (status) - /* - * We should probably fail the whole open at this point, - * but we've already created the file, so it's too late; - * So this seems the least of evils: - */ - bmval[0] &= ~FATTR4_WORD0_ACL; -} - static inline void fh_dup2(struct svc_fh *dst, struct svc_fh *src) { @@ -281,6 +261,9 @@ nfsd4_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp, if (host_err) return nfserrno(host_err); + if (is_create_with_attrs(open)) + nfsd4_acl_to_attr(NF4REG, open->op_acl, &attrs); + fh_lock_nested(fhp, I_MUTEX_PARENT); child = lookup_one_len(open->op_fname, parent, open->op_fnamelen); @@ -382,8 +365,11 @@ set_attr: if (attrs.na_labelerr) open->op_bmval[2] &= ~FATTR4_WORD2_SECURITY_LABEL; + if (attrs.na_aclerr) + open->op_bmval[0] &= ~FATTR4_WORD0_ACL; out: fh_unlock(fhp); + nfsd_attrs_free(&attrs); if (child && !IS_ERR(child)) dput(child); fh_drop_write(fhp); @@ -446,9 +432,6 @@ do_open_lookup(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, stru if (status) goto out; - if (is_create_with_attrs(open) && open->op_acl != NULL) - do_set_nfs4_acl(rqstp, *resfh, open->op_acl, open->op_bmval); - nfsd4_set_open_owner_reply_cache(cstate, open, *resfh); accmode = NFSD_MAY_NOP; if (open->op_created || @@ -779,6 +762,7 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (status) return status; + status = nfsd4_acl_to_attr(create->cr_type, create->cr_acl, &attrs); current->fs->umask = create->cr_umask; switch (create->cr_type) { case NF4LNK: @@ -837,10 +821,8 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (attrs.na_labelerr) create->cr_bmval[2] &= ~FATTR4_WORD2_SECURITY_LABEL; - - if (create->cr_acl != NULL) - do_set_nfs4_acl(rqstp, &resfh, create->cr_acl, - create->cr_bmval); + if (attrs.na_aclerr) + create->cr_bmval[0] &= ~FATTR4_WORD0_ACL; fh_unlock(&cstate->current_fh); set_change_info(&create->cr_cinfo, &cstate->current_fh); @@ -849,6 +831,7 @@ out: fh_put(&resfh); out_umask: current->fs->umask = 0; + nfsd_attrs_free(&attrs); return status; } @@ -1123,6 +1106,7 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, .na_iattr = &setattr->sa_iattr, .na_seclabel = &setattr->sa_label, }; + struct inode *inode; __be32 status = nfs_ok; int err; @@ -1145,9 +1129,10 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (status) goto out; - if (setattr->sa_acl != NULL) - status = nfsd4_set_nfs4_acl(rqstp, &cstate->current_fh, - setattr->sa_acl); + inode = cstate->current_fh.fh_dentry->d_inode; + status = nfsd4_acl_to_attr(S_ISDIR(inode->i_mode) ? NF4DIR : NF4REG, + setattr->sa_acl, &attrs); + if (status) goto out; status = nfsd_setattr(rqstp, &cstate->current_fh, &attrs, @@ -1155,6 +1140,7 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (!status) status = nfserrno(attrs.na_labelerr); out: + nfsd_attrs_free(&attrs); fh_drop_write(&cstate->current_fh); return status; } diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index e9e1d5cd6ffb..fdea5cce2b22 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -461,6 +461,15 @@ out_unlock: if (attr->na_seclabel && attr->na_seclabel->len) attr->na_labelerr = security_inode_setsecctx(dentry, attr->na_seclabel->data, attr->na_seclabel->len); + if (IS_ENABLED(CONFIG_FS_POSIX_ACL) && attr->na_pacl) + attr->na_aclerr = set_posix_acl(&init_user_ns, + inode, ACL_TYPE_ACCESS, + attr->na_pacl); + if (IS_ENABLED(CONFIG_FS_POSIX_ACL) && + !attr->na_aclerr && attr->na_dpacl && S_ISDIR(inode->i_mode)) + attr->na_aclerr = set_posix_acl(&init_user_ns, + inode, ACL_TYPE_DEFAULT, + attr->na_dpacl); fh_unlock(fhp); if (size_change) put_write_access(inode); diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h index d5d4cfe37c93..c95cd414b4bb 100644 --- a/fs/nfsd/vfs.h +++ b/fs/nfsd/vfs.h @@ -6,6 +6,8 @@ #ifndef LINUX_NFSD_VFS_H #define LINUX_NFSD_VFS_H +#include +#include #include "nfsfh.h" #include "nfsd.h" @@ -45,10 +47,19 @@ typedef int (*nfsd_filldir_t)(void *, const char *, int, loff_t, u64, unsigned); struct nfsd_attrs { struct iattr *na_iattr; /* input */ struct xdr_netobj *na_seclabel; /* input */ + struct posix_acl *na_pacl; /* input */ + struct posix_acl *na_dpacl; /* input */ int na_labelerr; /* output */ + int na_aclerr; /* output */ }; +static inline void nfsd_attrs_free(struct nfsd_attrs *attrs) +{ + posix_acl_release(attrs->na_pacl); + posix_acl_release(attrs->na_dpacl); +} + int nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp, struct svc_export **expp); __be32 nfsd_lookup(struct svc_rqst *, struct svc_fh *, From 927bfc5600cd6333c9ef9f090f19e66b7d4c8ee1 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 26 Jul 2022 16:45:30 +1000 Subject: [PATCH 71/78] NFSD: change nfsd_create()/nfsd_symlink() to unlock directory before returning. nfsd_create() usually returns with the directory still locked. nfsd_symlink() usually returns with it unlocked. This is clumsy. Until recently nfsd_create() needed to keep the directory locked until ACLs and security label had been set. These are now set inside nfsd_create() (in nfsd_setattr()) so this need is gone. So change nfsd_create() and nfsd_symlink() to always unlock, and remove any fh_unlock() calls that follow calls to these functions. Signed-off-by: NeilBrown Signed-off-by: Chuck Lever --- fs/nfsd/nfs3proc.c | 2 -- fs/nfsd/nfs4proc.c | 2 -- fs/nfsd/vfs.c | 38 +++++++++++++++++++++----------------- 3 files changed, 21 insertions(+), 21 deletions(-) diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index 0078e73009fa..1fb75a68b8e5 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c @@ -388,7 +388,6 @@ nfsd3_proc_mkdir(struct svc_rqst *rqstp) fh_init(&resp->fh, NFS3_FHSIZE); resp->status = nfsd_create(rqstp, &resp->dirfh, argp->name, argp->len, &attrs, S_IFDIR, 0, &resp->fh); - fh_unlock(&resp->dirfh); return rpc_success; } @@ -469,7 +468,6 @@ nfsd3_proc_mknod(struct svc_rqst *rqstp) type = nfs3_ftypes[argp->ftype]; resp->status = nfsd_create(rqstp, &resp->dirfh, argp->name, argp->len, &attrs, type, rdev, &resp->fh); - fh_unlock(&resp->dirfh); out: return rpc_success; } diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index ade2ea29feb0..d585c01d8da6 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -823,8 +823,6 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, create->cr_bmval[2] &= ~FATTR4_WORD2_SECURITY_LABEL; if (attrs.na_aclerr) create->cr_bmval[0] &= ~FATTR4_WORD0_ACL; - - fh_unlock(&cstate->current_fh); set_change_info(&create->cr_cinfo, &cstate->current_fh); fh_dup2(&cstate->current_fh, &resfh); out: diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index fdea5cce2b22..e726495f2075 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1379,8 +1379,10 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, fh_lock_nested(fhp, I_MUTEX_PARENT); dchild = lookup_one_len(fname, dentry, flen); host_err = PTR_ERR(dchild); - if (IS_ERR(dchild)) - return nfserrno(host_err); + if (IS_ERR(dchild)) { + err = nfserrno(host_err); + goto out_unlock; + } err = fh_compose(resfhp, fhp->fh_export, dchild, fhp); /* * We unconditionally drop our ref to dchild as fh_compose will have @@ -1388,9 +1390,12 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, */ dput(dchild); if (err) - return err; - return nfsd_create_locked(rqstp, fhp, fname, flen, attrs, type, - rdev, resfhp); + goto out_unlock; + err = nfsd_create_locked(rqstp, fhp, fname, flen, attrs, type, + rdev, resfhp); +out_unlock: + fh_unlock(fhp); + return err; } /* @@ -1467,16 +1472,19 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp, goto out; host_err = fh_want_write(fhp); - if (host_err) - goto out_nfserr; + if (host_err) { + err = nfserrno(host_err); + goto out; + } fh_lock(fhp); dentry = fhp->fh_dentry; dnew = lookup_one_len(fname, dentry, flen); - host_err = PTR_ERR(dnew); - if (IS_ERR(dnew)) - goto out_nfserr; - + if (IS_ERR(dnew)) { + err = nfserrno(PTR_ERR(dnew)); + fh_unlock(fhp); + goto out_drop_write; + } host_err = vfs_symlink(&init_user_ns, d_inode(dentry), dnew, path); err = nfserrno(host_err); cerr = fh_compose(resfhp, fhp->fh_export, dnew, fhp); @@ -1485,16 +1493,12 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp, fh_unlock(fhp); if (!err) err = nfserrno(commit_metadata(fhp)); - fh_drop_write(fhp); - dput(dnew); if (err==0) err = cerr; +out_drop_write: + fh_drop_write(fhp); out: return err; - -out_nfserr: - err = nfserrno(host_err); - goto out; } /* From b677c0c63a135a916493c064906582e9f3ed4802 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 26 Jul 2022 16:45:30 +1000 Subject: [PATCH 72/78] NFSD: always drop directory lock in nfsd_unlink() Some error paths in nfsd_unlink() allow it to exit without unlocking the directory. This is not a problem in practice as the directory will be locked with an fh_put(), but it is untidy and potentially confusing. This allows us to remove all the fh_unlock() calls that are immediately after nfsd_unlink() calls. Reviewed-by: Jeff Layton Signed-off-by: NeilBrown Signed-off-by: Chuck Lever --- fs/nfsd/nfs3proc.c | 2 -- fs/nfsd/nfs4proc.c | 4 +--- fs/nfsd/vfs.c | 7 +++++-- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index 1fb75a68b8e5..3de9b008626c 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c @@ -490,7 +490,6 @@ nfsd3_proc_remove(struct svc_rqst *rqstp) fh_copy(&resp->fh, &argp->fh); resp->status = nfsd_unlink(rqstp, &resp->fh, -S_IFDIR, argp->name, argp->len); - fh_unlock(&resp->fh); return rpc_success; } @@ -511,7 +510,6 @@ nfsd3_proc_rmdir(struct svc_rqst *rqstp) fh_copy(&resp->fh, &argp->fh); resp->status = nfsd_unlink(rqstp, &resp->fh, S_IFDIR, argp->name, argp->len); - fh_unlock(&resp->fh); return rpc_success; } diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index d585c01d8da6..8d01cb6d04e3 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1002,10 +1002,8 @@ nfsd4_remove(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, return nfserr_grace; status = nfsd_unlink(rqstp, &cstate->current_fh, 0, remove->rm_name, remove->rm_namelen); - if (!status) { - fh_unlock(&cstate->current_fh); + if (!status) set_change_info(&remove->rm_cinfo, &cstate->current_fh); - } return status; } diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index e726495f2075..00b0b80c0c44 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1753,12 +1753,12 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, rdentry = lookup_one_len(fname, dentry, flen); host_err = PTR_ERR(rdentry); if (IS_ERR(rdentry)) - goto out_drop_write; + goto out_unlock; if (d_really_is_negative(rdentry)) { dput(rdentry); host_err = -ENOENT; - goto out_drop_write; + goto out_unlock; } rinode = d_inode(rdentry); ihold(rinode); @@ -1796,6 +1796,9 @@ out_nfserr: } out: return err; +out_unlock: + fh_unlock(fhp); + goto out_drop_write; } /* From e18bcb33bc5b69bccc2b532075aa00bb49cc01c5 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 26 Jul 2022 16:45:30 +1000 Subject: [PATCH 73/78] NFSD: only call fh_unlock() once in nfsd_link() On non-error paths, nfsd_link() calls fh_unlock() twice. This is safe because fh_unlock() records that the unlock has been done and doesn't repeat it. However it makes the code a little confusing and interferes with changes that are planned for directory locking. So rearrange the code to ensure fh_unlock() is called exactly once if fh_lock() was called. Reviewed-by: Jeff Layton Signed-off-by: NeilBrown Signed-off-by: Chuck Lever --- fs/nfsd/vfs.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 00b0b80c0c44..cce4130e7008 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1541,9 +1541,10 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp, dirp = d_inode(ddir); dnew = lookup_one_len(name, ddir, len); - host_err = PTR_ERR(dnew); - if (IS_ERR(dnew)) - goto out_nfserr; + if (IS_ERR(dnew)) { + err = nfserrno(PTR_ERR(dnew)); + goto out_unlock; + } dold = tfhp->fh_dentry; @@ -1562,17 +1563,17 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp, else err = nfserrno(host_err); } -out_dput: dput(dnew); -out_unlock: - fh_unlock(ffhp); +out_drop_write: fh_drop_write(tfhp); out: return err; -out_nfserr: - err = nfserrno(host_err); - goto out_unlock; +out_dput: + dput(dnew); +out_unlock: + fh_unlock(ffhp); + goto out_drop_write; } static void From 19d008b46941b8c668402170522e0f7a9258409c Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 26 Jul 2022 16:45:30 +1000 Subject: [PATCH 74/78] NFSD: reduce locking in nfsd_lookup() nfsd_lookup() takes an exclusive lock on the parent inode, but no callers want the lock and it may not be needed at all if the result is in the dcache. Change nfsd_lookup_dentry() to not take the lock, and call lookup_one_len_locked() which takes lock only if needed. nfsd4_open() currently expects the lock to still be held, but that isn't necessary as nfsd_validate_delegated_dentry() provides required guarantees without the lock. NOTE: NFSv4 requires directory changeinfo for OPEN even when a create wasn't requested and no change happened. Now that nfsd_lookup() doesn't use fh_lock(), we need to explicitly fill the attributes when no create happens. A new fh_fill_both_attrs() is provided for that task. Signed-off-by: NeilBrown Signed-off-by: Chuck Lever --- fs/nfsd/nfs4proc.c | 20 ++++++++++++-------- fs/nfsd/nfs4state.c | 3 --- fs/nfsd/nfsfh.c | 19 +++++++++++++++++++ fs/nfsd/nfsfh.h | 2 +- fs/nfsd/vfs.c | 34 ++++++++++++++-------------------- 5 files changed, 46 insertions(+), 32 deletions(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 8d01cb6d04e3..46c147696105 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -302,6 +302,11 @@ nfsd4_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp, if (d_really_is_positive(child)) { status = nfs_ok; + /* NFSv4 protocol requires change attributes even though + * no change happened. + */ + fh_fill_both_attrs(fhp); + switch (open->op_createmode) { case NFS4_CREATE_UNCHECKED: if (!d_is_reg(child)) @@ -417,15 +422,15 @@ do_open_lookup(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, stru if (nfsd4_create_is_exclusive(open->op_createmode) && status == 0) open->op_bmval[1] |= (FATTR4_WORD1_TIME_ACCESS | FATTR4_WORD1_TIME_MODIFY); - } else - /* - * Note this may exit with the parent still locked. - * We will hold the lock until nfsd4_open's final - * lookup, to prevent renames or unlinks until we've had - * a chance to an acquire a delegation if appropriate. - */ + } else { status = nfsd_lookup(rqstp, current_fh, open->op_fname, open->op_fnamelen, *resfh); + if (!status) + /* NFSv4 protocol requires change attributes even though + * no change happened. + */ + fh_fill_both_attrs(current_fh); + } if (status) goto out; status = nfsd_check_obj_isreg(*resfh); @@ -1043,7 +1048,6 @@ nfsd4_secinfo(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, &exp, &dentry); if (err) return err; - fh_unlock(&cstate->current_fh); if (d_really_is_negative(dentry)) { exp_put(exp); err = nfserr_noent; diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 3bbafd67f50b..e479a1df5435 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -5304,9 +5304,6 @@ nfsd4_verify_deleg_dentry(struct nfsd4_open *open, struct nfs4_file *fp, struct dentry *child; __be32 err; - /* parent may already be locked, and it may get unlocked by - * this call, but that is safe. - */ err = nfsd_lookup_dentry(open->op_rqstp, parent, open->op_fname, open->op_fnamelen, &exp, &child); diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index 5e2ed4b2a925..cd2946a88d72 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -672,6 +672,25 @@ void fh_fill_post_attrs(struct svc_fh *fhp) nfsd4_change_attribute(&fhp->fh_post_attr, inode); } +/** + * fh_fill_both_attrs - Fill pre-op and post-op attributes + * @fhp: file handle to be updated + * + * This is used when the directory wasn't changed, but wcc attributes + * are needed anyway. + */ +void fh_fill_both_attrs(struct svc_fh *fhp) +{ + fh_fill_post_attrs(fhp); + if (!fhp->fh_post_saved) + return; + fhp->fh_pre_change = fhp->fh_post_change; + fhp->fh_pre_mtime = fhp->fh_post_attr.mtime; + fhp->fh_pre_ctime = fhp->fh_post_attr.ctime; + fhp->fh_pre_size = fhp->fh_post_attr.size; + fhp->fh_pre_saved = true; +} + /* * Release a file handle. */ diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h index fb9d358a267e..28a4f9a94e2c 100644 --- a/fs/nfsd/nfsfh.h +++ b/fs/nfsd/nfsfh.h @@ -322,7 +322,7 @@ static inline u64 nfsd4_change_attribute(struct kstat *stat, extern void fh_fill_pre_attrs(struct svc_fh *fhp); extern void fh_fill_post_attrs(struct svc_fh *fhp); - +extern void fh_fill_both_attrs(struct svc_fh *fhp); /* * Lock a file handle/inode diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index cce4130e7008..23baeca0f5ee 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -199,27 +199,13 @@ nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp, goto out_nfserr; } } else { - /* - * In the nfsd4_open() case, this may be held across - * subsequent open and delegation acquisition which may - * need to take the child's i_mutex: - */ - fh_lock_nested(fhp, I_MUTEX_PARENT); - dentry = lookup_one_len(name, dparent, len); + dentry = lookup_one_len_unlocked(name, dparent, len); host_err = PTR_ERR(dentry); if (IS_ERR(dentry)) goto out_nfserr; if (nfsd_mountpoint(dentry, exp)) { - /* - * We don't need the i_mutex after all. It's - * still possible we could open this (regular - * files can be mountpoints too), but the - * i_mutex is just there to prevent renames of - * something that we might be about to delegate, - * and a mountpoint won't be renamed: - */ - fh_unlock(fhp); - if ((host_err = nfsd_cross_mnt(rqstp, &dentry, &exp))) { + host_err = nfsd_cross_mnt(rqstp, &dentry, &exp); + if (host_err) { dput(dentry); goto out_nfserr; } @@ -234,7 +220,15 @@ out_nfserr: return nfserrno(host_err); } -/* +/** + * nfsd_lookup - look up a single path component for nfsd + * + * @rqstp: the request context + * @fhp: the file handle of the directory + * @name: the component name, or %NULL to look up parent + * @len: length of name to examine + * @resfh: pointer to pre-initialised filehandle to hold result. + * * Look up one component of a pathname. * N.B. After this call _both_ fhp and resfh need an fh_put * @@ -244,11 +238,11 @@ out_nfserr: * returned. Otherwise the covered directory is returned. * NOTE: this mountpoint crossing is not supported properly by all * clients and is explicitly disallowed for NFSv3 - * NeilBrown + * */ __be32 nfsd_lookup(struct svc_rqst *rqstp, struct svc_fh *fhp, const char *name, - unsigned int len, struct svc_fh *resfh) + unsigned int len, struct svc_fh *resfh) { struct svc_export *exp; struct dentry *dentry; From debf16f0c671cb8db154a9ebcd6014cfff683b80 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 26 Jul 2022 16:45:30 +1000 Subject: [PATCH 75/78] NFSD: use explicit lock/unlock for directory ops When creating or unlinking a name in a directory use explicit inode_lock_nested() instead of fh_lock(), and explicit calls to fh_fill_pre_attrs() and fh_fill_post_attrs(). This is already done for renames, with lock_rename() as the explicit locking. Also move the 'fill' calls closer to the operation that might change the attributes. This way they are avoided on some error paths. For the v2-only code in nfsproc.c, the fill calls are not replaced as they aren't needed. Making the locking explicit will simplify proposed future changes to locking for directories. It also makes it easily visible exactly where pre/post attributes are used - not all callers of fh_lock() actually need the pre/post attributes. Reviewed-by: Jeff Layton Signed-off-by: NeilBrown Signed-off-by: Chuck Lever --- fs/nfsd/nfs3proc.c | 6 ++++-- fs/nfsd/nfs4proc.c | 6 ++++-- fs/nfsd/nfsproc.c | 5 ++--- fs/nfsd/vfs.c | 30 +++++++++++++++++++----------- 4 files changed, 29 insertions(+), 18 deletions(-) diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index 3de9b008626c..a41cca619338 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c @@ -260,7 +260,7 @@ nfsd3_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp, if (host_err) return nfserrno(host_err); - fh_lock_nested(fhp, I_MUTEX_PARENT); + inode_lock_nested(inode, I_MUTEX_PARENT); child = lookup_one_len(argp->name, parent, argp->len); if (IS_ERR(child)) { @@ -318,11 +318,13 @@ nfsd3_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp, if (!IS_POSIXACL(inode)) iap->ia_mode &= ~current_umask(); + fh_fill_pre_attrs(fhp); host_err = vfs_create(&init_user_ns, inode, child, iap->ia_mode, true); if (host_err < 0) { status = nfserrno(host_err); goto out; } + fh_fill_post_attrs(fhp); /* A newly created file already has a file size of zero. */ if ((iap->ia_valid & ATTR_SIZE) && (iap->ia_size == 0)) @@ -340,7 +342,7 @@ set_attr: status = nfsd_create_setattr(rqstp, fhp, resfhp, &attrs); out: - fh_unlock(fhp); + inode_unlock(inode); if (child && !IS_ERR(child)) dput(child); fh_drop_write(fhp); diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 46c147696105..a72ab97f77ef 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -264,7 +264,7 @@ nfsd4_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp, if (is_create_with_attrs(open)) nfsd4_acl_to_attr(NF4REG, open->op_acl, &attrs); - fh_lock_nested(fhp, I_MUTEX_PARENT); + inode_lock_nested(inode, I_MUTEX_PARENT); child = lookup_one_len(open->op_fname, parent, open->op_fnamelen); if (IS_ERR(child)) { @@ -348,10 +348,12 @@ nfsd4_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp, if (!IS_POSIXACL(inode)) iap->ia_mode &= ~current_umask(); + fh_fill_pre_attrs(fhp); status = nfsd4_vfs_create(fhp, child, open); if (status != nfs_ok) goto out; open->op_created = true; + fh_fill_post_attrs(fhp); /* A newly created file already has a file size of zero. */ if ((iap->ia_valid & ATTR_SIZE) && (iap->ia_size == 0)) @@ -373,7 +375,7 @@ set_attr: if (attrs.na_aclerr) open->op_bmval[0] &= ~FATTR4_WORD0_ACL; out: - fh_unlock(fhp); + inode_unlock(inode); nfsd_attrs_free(&attrs); if (child && !IS_ERR(child)) dput(child); diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index acaf58e8e86f..7381972f1677 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -291,7 +291,7 @@ nfsd_proc_create(struct svc_rqst *rqstp) goto done; } - fh_lock_nested(dirfhp, I_MUTEX_PARENT); + inode_lock_nested(dirfhp->fh_dentry->d_inode, I_MUTEX_PARENT); dchild = lookup_one_len(argp->name, dirfhp->fh_dentry, argp->len); if (IS_ERR(dchild)) { resp->status = nfserrno(PTR_ERR(dchild)); @@ -407,8 +407,7 @@ nfsd_proc_create(struct svc_rqst *rqstp) } out_unlock: - /* We don't really need to unlock, as fh_put does it. */ - fh_unlock(dirfhp); + inode_unlock(dirfhp->fh_dentry->d_inode); fh_drop_write(dirfhp); done: fh_put(dirfhp); diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 23baeca0f5ee..19b47ca2f238 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1370,7 +1370,7 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, if (host_err) return nfserrno(host_err); - fh_lock_nested(fhp, I_MUTEX_PARENT); + inode_lock_nested(dentry->d_inode, I_MUTEX_PARENT); dchild = lookup_one_len(fname, dentry, flen); host_err = PTR_ERR(dchild); if (IS_ERR(dchild)) { @@ -1385,10 +1385,12 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, dput(dchild); if (err) goto out_unlock; + fh_fill_pre_attrs(fhp); err = nfsd_create_locked(rqstp, fhp, fname, flen, attrs, type, rdev, resfhp); + fh_fill_post_attrs(fhp); out_unlock: - fh_unlock(fhp); + inode_unlock(dentry->d_inode); return err; } @@ -1471,20 +1473,22 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp, goto out; } - fh_lock(fhp); dentry = fhp->fh_dentry; + inode_lock_nested(dentry->d_inode, I_MUTEX_PARENT); dnew = lookup_one_len(fname, dentry, flen); if (IS_ERR(dnew)) { err = nfserrno(PTR_ERR(dnew)); - fh_unlock(fhp); + inode_unlock(dentry->d_inode); goto out_drop_write; } + fh_fill_pre_attrs(fhp); host_err = vfs_symlink(&init_user_ns, d_inode(dentry), dnew, path); err = nfserrno(host_err); cerr = fh_compose(resfhp, fhp->fh_export, dnew, fhp); if (!err) nfsd_create_setattr(rqstp, fhp, resfhp, attrs); - fh_unlock(fhp); + fh_fill_post_attrs(fhp); + inode_unlock(dentry->d_inode); if (!err) err = nfserrno(commit_metadata(fhp)); dput(dnew); @@ -1530,9 +1534,9 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp, goto out; } - fh_lock_nested(ffhp, I_MUTEX_PARENT); ddir = ffhp->fh_dentry; dirp = d_inode(ddir); + inode_lock_nested(dirp, I_MUTEX_PARENT); dnew = lookup_one_len(name, ddir, len); if (IS_ERR(dnew)) { @@ -1545,8 +1549,10 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp, err = nfserr_noent; if (d_really_is_negative(dold)) goto out_dput; + fh_fill_pre_attrs(ffhp); host_err = vfs_link(dold, &init_user_ns, dirp, dnew, NULL); - fh_unlock(ffhp); + fh_fill_post_attrs(ffhp); + inode_unlock(dirp); if (!host_err) { err = nfserrno(commit_metadata(ffhp)); if (!err) @@ -1566,7 +1572,7 @@ out: out_dput: dput(dnew); out_unlock: - fh_unlock(ffhp); + inode_unlock(dirp); goto out_drop_write; } @@ -1741,9 +1747,9 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, if (host_err) goto out_nfserr; - fh_lock_nested(fhp, I_MUTEX_PARENT); dentry = fhp->fh_dentry; dirp = d_inode(dentry); + inode_lock_nested(dirp, I_MUTEX_PARENT); rdentry = lookup_one_len(fname, dentry, flen); host_err = PTR_ERR(rdentry); @@ -1761,6 +1767,7 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, if (!type) type = d_inode(rdentry)->i_mode & S_IFMT; + fh_fill_pre_attrs(fhp); if (type != S_IFDIR) { if (rdentry->d_sb->s_export_op->flags & EXPORT_OP_CLOSE_BEFORE_UNLINK) nfsd_close_cached_files(rdentry); @@ -1768,8 +1775,9 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, } else { host_err = vfs_rmdir(&init_user_ns, dirp, rdentry); } + fh_fill_post_attrs(fhp); - fh_unlock(fhp); + inode_unlock(dirp); if (!host_err) host_err = commit_metadata(fhp); dput(rdentry); @@ -1792,7 +1800,7 @@ out_nfserr: out: return err; out_unlock: - fh_unlock(fhp); + inode_unlock(dirp); goto out_drop_write; } From bb4d53d66e4b8c8b8e5634802262e53851a2d2db Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 26 Jul 2022 16:45:30 +1000 Subject: [PATCH 76/78] NFSD: use (un)lock_inode instead of fh_(un)lock for file operations When locking a file to access ACLs and xattrs etc, use explicit locking with inode_lock() instead of fh_lock(). This means that the calls to fh_fill_pre/post_attr() are also explicit which improves readability and allows us to place them only where they are needed. Only the xattr calls need pre/post information. When locking a file we don't need I_MUTEX_PARENT as the file is not a parent of anything, so we can use inode_lock() directly rather than the inode_lock_nested() call that fh_lock() uses. Reviewed-by: Jeff Layton Signed-off-by: NeilBrown Signed-off-by: Chuck Lever --- fs/nfsd/nfs2acl.c | 6 +++--- fs/nfsd/nfs3acl.c | 4 ++-- fs/nfsd/nfs4state.c | 9 +++++---- fs/nfsd/vfs.c | 34 ++++++++++++++++++++-------------- 4 files changed, 30 insertions(+), 23 deletions(-) diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c index b5760801d377..9edd3c1a30fb 100644 --- a/fs/nfsd/nfs2acl.c +++ b/fs/nfsd/nfs2acl.c @@ -111,7 +111,7 @@ static __be32 nfsacld_proc_setacl(struct svc_rqst *rqstp) if (error) goto out_errno; - fh_lock(fh); + inode_lock(inode); error = set_posix_acl(&init_user_ns, inode, ACL_TYPE_ACCESS, argp->acl_access); @@ -122,7 +122,7 @@ static __be32 nfsacld_proc_setacl(struct svc_rqst *rqstp) if (error) goto out_drop_lock; - fh_unlock(fh); + inode_unlock(inode); fh_drop_write(fh); @@ -136,7 +136,7 @@ out: return rpc_success; out_drop_lock: - fh_unlock(fh); + inode_unlock(inode); fh_drop_write(fh); out_errno: resp->status = nfserrno(error); diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c index 35b2ebda14da..9446c6743664 100644 --- a/fs/nfsd/nfs3acl.c +++ b/fs/nfsd/nfs3acl.c @@ -101,7 +101,7 @@ static __be32 nfsd3_proc_setacl(struct svc_rqst *rqstp) if (error) goto out_errno; - fh_lock(fh); + inode_lock(inode); error = set_posix_acl(&init_user_ns, inode, ACL_TYPE_ACCESS, argp->acl_access); @@ -111,7 +111,7 @@ static __be32 nfsd3_proc_setacl(struct svc_rqst *rqstp) argp->acl_default); out_drop_lock: - fh_unlock(fh); + inode_unlock(inode); fh_drop_write(fh); out_errno: resp->status = nfserrno(error); diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index e479a1df5435..c5d199d7e6b4 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -7397,21 +7397,22 @@ out: static __be32 nfsd_test_lock(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file_lock *lock) { struct nfsd_file *nf; + struct inode *inode; __be32 err; err = nfsd_file_acquire(rqstp, fhp, NFSD_MAY_READ, &nf); if (err) return err; - fh_lock(fhp); /* to block new leases till after test_lock: */ - err = nfserrno(nfsd_open_break_lease(fhp->fh_dentry->d_inode, - NFSD_MAY_READ)); + inode = fhp->fh_dentry->d_inode; + inode_lock(inode); /* to block new leases till after test_lock: */ + err = nfserrno(nfsd_open_break_lease(inode, NFSD_MAY_READ)); if (err) goto out; lock->fl_file = nf->nf_file; err = nfserrno(vfs_test_lock(nf->nf_file, lock)); lock->fl_file = NULL; out: - fh_unlock(fhp); + inode_unlock(inode); nfsd_file_put(nf); return err; } diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 19b47ca2f238..8d5aa2faabf7 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -416,7 +416,7 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, return err; } - fh_lock(fhp); + inode_lock(inode); if (size_change) { /* * RFC5661, Section 18.30.4: @@ -464,7 +464,7 @@ out_unlock: attr->na_aclerr = set_posix_acl(&init_user_ns, inode, ACL_TYPE_DEFAULT, attr->na_dpacl); - fh_unlock(fhp); + inode_unlock(inode); if (size_change) put_write_access(inode); out: @@ -2155,13 +2155,16 @@ out: return err; } -/* - * Removexattr and setxattr need to call fh_lock to both lock the inode - * and set the change attribute. Since the top-level vfs_removexattr - * and vfs_setxattr calls already do their own inode_lock calls, call - * the _locked variant. Pass in a NULL pointer for delegated_inode, - * and let the client deal with NFS4ERR_DELAY (same as with e.g. - * setattr and remove). +/** + * nfsd_removexattr - Remove an extended attribute + * @rqstp: RPC transaction being executed + * @fhp: NFS filehandle of object with xattr to remove + * @name: name of xattr to remove (NUL-terminate) + * + * Pass in a NULL pointer for delegated_inode, and let the client deal + * with NFS4ERR_DELAY (same as with e.g. setattr and remove). + * + * Returns nfs_ok on success, or an nfsstat in network byte order. */ __be32 nfsd_removexattr(struct svc_rqst *rqstp, struct svc_fh *fhp, char *name) @@ -2177,12 +2180,14 @@ nfsd_removexattr(struct svc_rqst *rqstp, struct svc_fh *fhp, char *name) if (ret) return nfserrno(ret); - fh_lock(fhp); + inode_lock(fhp->fh_dentry->d_inode); + fh_fill_pre_attrs(fhp); ret = __vfs_removexattr_locked(&init_user_ns, fhp->fh_dentry, name, NULL); - fh_unlock(fhp); + fh_fill_post_attrs(fhp); + inode_unlock(fhp->fh_dentry->d_inode); fh_drop_write(fhp); return nfsd_xattr_errno(ret); @@ -2202,12 +2207,13 @@ nfsd_setxattr(struct svc_rqst *rqstp, struct svc_fh *fhp, char *name, ret = fh_want_write(fhp); if (ret) return nfserrno(ret); - fh_lock(fhp); + inode_lock(fhp->fh_dentry->d_inode); + fh_fill_pre_attrs(fhp); ret = __vfs_setxattr_locked(&init_user_ns, fhp->fh_dentry, name, buf, len, flags, NULL); - - fh_unlock(fhp); + fh_fill_post_attrs(fhp); + inode_unlock(fhp->fh_dentry->d_inode); fh_drop_write(fhp); return nfsd_xattr_errno(ret); From dd8dd403d7b223cc77ee89d8d09caf045e90e648 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 26 Jul 2022 16:45:30 +1000 Subject: [PATCH 77/78] NFSD: discard fh_locked flag and fh_lock/fh_unlock As all inode locking is now fully balanced, fh_put() does not need to call fh_unlock(). fh_lock() and fh_unlock() are no longer used, so discard them. These are the only real users of ->fh_locked, so discard that too. Reviewed-by: Jeff Layton Signed-off-by: NeilBrown Signed-off-by: Chuck Lever --- fs/nfsd/nfsfh.c | 3 +-- fs/nfsd/nfsfh.h | 56 ++++--------------------------------------------- fs/nfsd/vfs.c | 17 +-------------- 3 files changed, 6 insertions(+), 70 deletions(-) diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index cd2946a88d72..a5b71526cee0 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -549,7 +549,7 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry, if (ref_fh == fhp) fh_put(ref_fh); - if (fhp->fh_locked || fhp->fh_dentry) { + if (fhp->fh_dentry) { printk(KERN_ERR "fh_compose: fh %pd2 not initialized!\n", dentry); } @@ -700,7 +700,6 @@ fh_put(struct svc_fh *fhp) struct dentry * dentry = fhp->fh_dentry; struct svc_export * exp = fhp->fh_export; if (dentry) { - fh_unlock(fhp); fhp->fh_dentry = NULL; dput(dentry); fh_clear_pre_post_attrs(fhp); diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h index 28a4f9a94e2c..c3ae6414fc5c 100644 --- a/fs/nfsd/nfsfh.h +++ b/fs/nfsd/nfsfh.h @@ -81,7 +81,6 @@ typedef struct svc_fh { struct dentry * fh_dentry; /* validated dentry */ struct svc_export * fh_export; /* export pointer */ - bool fh_locked; /* inode locked by us */ bool fh_want_write; /* remount protection taken */ bool fh_no_wcc; /* no wcc data needed */ bool fh_no_atomic_attr; @@ -93,7 +92,7 @@ typedef struct svc_fh { bool fh_post_saved; /* post-op attrs saved */ bool fh_pre_saved; /* pre-op attrs saved */ - /* Pre-op attributes saved during fh_lock */ + /* Pre-op attributes saved when inode is locked */ __u64 fh_pre_size; /* size before operation */ struct timespec64 fh_pre_mtime; /* mtime before oper */ struct timespec64 fh_pre_ctime; /* ctime before oper */ @@ -103,7 +102,7 @@ typedef struct svc_fh { */ u64 fh_pre_change; - /* Post-op attributes saved in fh_unlock */ + /* Post-op attributes saved in fh_fill_post_attrs() */ struct kstat fh_post_attr; /* full attrs after operation */ u64 fh_post_change; /* nfsv4 change; see above */ } svc_fh; @@ -223,8 +222,8 @@ void fh_put(struct svc_fh *); static __inline__ struct svc_fh * fh_copy(struct svc_fh *dst, struct svc_fh *src) { - WARN_ON(src->fh_dentry || src->fh_locked); - + WARN_ON(src->fh_dentry); + *dst = *src; return dst; } @@ -323,51 +322,4 @@ static inline u64 nfsd4_change_attribute(struct kstat *stat, extern void fh_fill_pre_attrs(struct svc_fh *fhp); extern void fh_fill_post_attrs(struct svc_fh *fhp); extern void fh_fill_both_attrs(struct svc_fh *fhp); - -/* - * Lock a file handle/inode - * NOTE: both fh_lock and fh_unlock are done "by hand" in - * vfs.c:nfsd_rename as it needs to grab 2 i_mutex's at once - * so, any changes here should be reflected there. - */ - -static inline void -fh_lock_nested(struct svc_fh *fhp, unsigned int subclass) -{ - struct dentry *dentry = fhp->fh_dentry; - struct inode *inode; - - BUG_ON(!dentry); - - if (fhp->fh_locked) { - printk(KERN_WARNING "fh_lock: %pd2 already locked!\n", - dentry); - return; - } - - inode = d_inode(dentry); - inode_lock_nested(inode, subclass); - fh_fill_pre_attrs(fhp); - fhp->fh_locked = true; -} - -static inline void -fh_lock(struct svc_fh *fhp) -{ - fh_lock_nested(fhp, I_MUTEX_NORMAL); -} - -/* - * Unlock a file handle/inode - */ -static inline void -fh_unlock(struct svc_fh *fhp) -{ - if (fhp->fh_locked) { - fh_fill_post_attrs(fhp); - inode_unlock(d_inode(fhp->fh_dentry)); - fhp->fh_locked = false; - } -} - #endif /* _LINUX_NFSD_NFSFH_H */ diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 8d5aa2faabf7..9f486b788ed0 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1265,13 +1265,6 @@ nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp, dirp = d_inode(dentry); dchild = dget(resfhp->fh_dentry); - if (!fhp->fh_locked) { - WARN_ONCE(1, "nfsd_create: parent %pd2 not locked!\n", - dentry); - err = nfserr_io; - goto out; - } - err = nfsd_permission(rqstp, fhp->fh_export, dentry, NFSD_MAY_CREATE); if (err) goto out; @@ -1634,10 +1627,7 @@ retry: goto out; } - /* cannot use fh_lock as we need deadlock protective ordering - * so do it by hand */ trap = lock_rename(tdentry, fdentry); - ffhp->fh_locked = tfhp->fh_locked = true; fh_fill_pre_attrs(ffhp); fh_fill_pre_attrs(tfhp); @@ -1693,17 +1683,12 @@ retry: dput(odentry); out_nfserr: err = nfserrno(host_err); - /* - * We cannot rely on fh_unlock on the two filehandles, - * as that would do the wrong thing if the two directories - * were the same, so again we do it by hand. - */ + if (!close_cached) { fh_fill_post_attrs(ffhp); fh_fill_post_attrs(tfhp); } unlock_rename(tdentry, fdentry); - ffhp->fh_locked = tfhp->fh_locked = false; fh_drop_write(ffhp); /* From 6930bcbfb6ceda63e298c6af6d733ecdf6bd4cde Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 1 Aug 2022 15:57:26 -0400 Subject: [PATCH 78/78] lockd: detect and reject lock arguments that overflow lockd doesn't currently vet the start and length in nlm4 requests like it should, and can end up generating lock requests with arguments that overflow when passed to the filesystem. The NLM4 protocol uses unsigned 64-bit arguments for both start and length, whereas struct file_lock tracks the start and end as loff_t values. By the time we get around to calling nlm4svc_retrieve_args, we've lost the information that would allow us to determine if there was an overflow. Start tracking the actual start and len for NLM4 requests in the nlm_lock. In nlm4svc_retrieve_args, vet these values to ensure they won't cause an overflow, and return NLM4_FBIG if they do. Link: https://bugzilla.linux-nfs.org/show_bug.cgi?id=392 Reported-by: Jan Kasiak Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever Cc: # 5.14+ --- fs/lockd/svc4proc.c | 8 ++++++++ fs/lockd/xdr4.c | 19 ++----------------- include/linux/lockd/xdr.h | 2 ++ 3 files changed, 12 insertions(+), 17 deletions(-) diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c index 4f247ab8be61..bf274f23969b 100644 --- a/fs/lockd/svc4proc.c +++ b/fs/lockd/svc4proc.c @@ -32,6 +32,10 @@ nlm4svc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp, if (!nlmsvc_ops) return nlm_lck_denied_nolocks; + if (lock->lock_start > OFFSET_MAX || + (lock->lock_len && ((lock->lock_len - 1) > (OFFSET_MAX - lock->lock_start)))) + return nlm4_fbig; + /* Obtain host handle */ if (!(host = nlmsvc_lookup_host(rqstp, lock->caller, lock->len)) || (argp->monitor && nsm_monitor(host) < 0)) @@ -50,6 +54,10 @@ nlm4svc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp, /* Set up the missing parts of the file_lock structure */ lock->fl.fl_file = file->f_file[mode]; lock->fl.fl_pid = current->tgid; + lock->fl.fl_start = (loff_t)lock->lock_start; + lock->fl.fl_end = lock->lock_len ? + (loff_t)(lock->lock_start + lock->lock_len - 1) : + OFFSET_MAX; lock->fl.fl_lmops = &nlmsvc_lock_operations; nlmsvc_locks_init_private(&lock->fl, host, (pid_t)lock->svid); if (!lock->fl.fl_owner) { diff --git a/fs/lockd/xdr4.c b/fs/lockd/xdr4.c index 856267c0864b..712fdfeb8ef0 100644 --- a/fs/lockd/xdr4.c +++ b/fs/lockd/xdr4.c @@ -20,13 +20,6 @@ #include "svcxdr.h" -static inline loff_t -s64_to_loff_t(__s64 offset) -{ - return (loff_t)offset; -} - - static inline s64 loff_t_to_s64(loff_t offset) { @@ -70,8 +63,6 @@ static bool svcxdr_decode_lock(struct xdr_stream *xdr, struct nlm_lock *lock) { struct file_lock *fl = &lock->fl; - u64 len, start; - s64 end; if (!svcxdr_decode_string(xdr, &lock->caller, &lock->len)) return false; @@ -81,20 +72,14 @@ svcxdr_decode_lock(struct xdr_stream *xdr, struct nlm_lock *lock) return false; if (xdr_stream_decode_u32(xdr, &lock->svid) < 0) return false; - if (xdr_stream_decode_u64(xdr, &start) < 0) + if (xdr_stream_decode_u64(xdr, &lock->lock_start) < 0) return false; - if (xdr_stream_decode_u64(xdr, &len) < 0) + if (xdr_stream_decode_u64(xdr, &lock->lock_len) < 0) return false; locks_init_lock(fl); fl->fl_flags = FL_POSIX; fl->fl_type = F_RDLCK; - end = start + len - 1; - fl->fl_start = s64_to_loff_t(start); - if (len == 0 || end < 0) - fl->fl_end = OFFSET_MAX; - else - fl->fl_end = s64_to_loff_t(end); return true; } diff --git a/include/linux/lockd/xdr.h b/include/linux/lockd/xdr.h index 398f70093cd3..67e4a2c5500b 100644 --- a/include/linux/lockd/xdr.h +++ b/include/linux/lockd/xdr.h @@ -41,6 +41,8 @@ struct nlm_lock { struct nfs_fh fh; struct xdr_netobj oh; u32 svid; + u64 lock_start; + u64 lock_len; struct file_lock fl; };