From 40882deb83c29d8df4470d4e5e7f137b6acf7ad1 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 13 Mar 2023 18:45:53 -0400 Subject: [PATCH 01/20] NFSv4.1: Always send a RECLAIM_COMPLETE after establishing lease The spec requires that we always at least send a RECLAIM_COMPLETE when we're done establishing the lease and recovering any state. Fixes: fce5c838e133 ("nfs41: RECLAIM_COMPLETE functionality") Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/nfs4state.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 2a0ca5c7f082..660ccfaf463e 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -67,6 +67,8 @@ #define OPENOWNER_POOL_SIZE 8 +static void nfs4_state_start_reclaim_reboot(struct nfs_client *clp); + const nfs4_stateid zero_stateid = { { .data = { 0 } }, .type = NFS4_SPECIAL_STATEID_TYPE, @@ -330,6 +332,8 @@ do_confirm: status = nfs4_proc_create_session(clp, cred); if (status != 0) goto out; + if (!(clp->cl_exchange_flags & EXCHGID4_FLAG_CONFIRMED_R)) + nfs4_state_start_reclaim_reboot(clp); nfs41_finish_session_reset(clp); nfs_mark_client_ready(clp, NFS_CS_READY); out: From c1d889cf99b8f9b548f45d0d15b7d4108bbc715f Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Fri, 10 Mar 2023 14:58:40 -0800 Subject: [PATCH 02/20] lockd: simplify two-level sysctl registration for nlm_sysctls There is no need to declare two tables to just create directories, this can be easily be done with a prefix path with register_sysctl(). Simplify this registration. Signed-off-by: Luis Chamberlain Reviewed-by: Jeff Layton Signed-off-by: Anna Schumaker --- fs/lockd/svc.c | 20 +------------------- 1 file changed, 1 insertion(+), 19 deletions(-) diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index 9a47303b2cba..bb94949bc223 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -510,24 +510,6 @@ static struct ctl_table nlm_sysctls[] = { { } }; -static struct ctl_table nlm_sysctl_dir[] = { - { - .procname = "nfs", - .mode = 0555, - .child = nlm_sysctls, - }, - { } -}; - -static struct ctl_table nlm_sysctl_root[] = { - { - .procname = "fs", - .mode = 0555, - .child = nlm_sysctl_dir, - }, - { } -}; - #endif /* CONFIG_SYSCTL */ /* @@ -644,7 +626,7 @@ static int __init init_nlm(void) #ifdef CONFIG_SYSCTL err = -ENOMEM; - nlm_sysctl_table = register_sysctl_table(nlm_sysctl_root); + nlm_sysctl_table = register_sysctl("fs/nfs", nlm_sysctls); if (nlm_sysctl_table == NULL) goto err_sysctl; #endif From a2183160ca7e2bcb210954ad69d7cbf012fea0f1 Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Fri, 10 Mar 2023 14:58:41 -0800 Subject: [PATCH 03/20] nfs: simplify two-level sysctl registration for nfs4_cb_sysctls There is no need to declare two tables to just create directories, this can be easily be done with a prefix path with register_sysctl(). Simplify this registration. Signed-off-by: Luis Chamberlain Reviewed-by: Jeff Layton Signed-off-by: Anna Schumaker --- fs/nfs/nfs4sysctl.c | 21 ++------------------- 1 file changed, 2 insertions(+), 19 deletions(-) diff --git a/fs/nfs/nfs4sysctl.c b/fs/nfs/nfs4sysctl.c index c394e4447100..e776200e9a11 100644 --- a/fs/nfs/nfs4sysctl.c +++ b/fs/nfs/nfs4sysctl.c @@ -37,27 +37,10 @@ static struct ctl_table nfs4_cb_sysctls[] = { { } }; -static struct ctl_table nfs4_cb_sysctl_dir[] = { - { - .procname = "nfs", - .mode = 0555, - .child = nfs4_cb_sysctls, - }, - { } -}; - -static struct ctl_table nfs4_cb_sysctl_root[] = { - { - .procname = "fs", - .mode = 0555, - .child = nfs4_cb_sysctl_dir, - }, - { } -}; - int nfs4_register_sysctl(void) { - nfs4_callback_sysctl_table = register_sysctl_table(nfs4_cb_sysctl_root); + nfs4_callback_sysctl_table = register_sysctl("fs/nfs", + nfs4_cb_sysctls); if (nfs4_callback_sysctl_table == NULL) return -ENOMEM; return 0; From 39724217447fc3b3ca76ff6fd5086e26165ba6d8 Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Fri, 10 Mar 2023 14:58:42 -0800 Subject: [PATCH 04/20] nfs: simplify two-level sysctl registration for nfs_cb_sysctls There is no need to declare two tables to just create directories, this can be easily be done with a prefix path with register_sysctl(). Simplify this registration. Signed-off-by: Luis Chamberlain Reviewed-by: Jeff Layton Signed-off-by: Anna Schumaker --- fs/nfs/sysctl.c | 20 +------------------- 1 file changed, 1 insertion(+), 19 deletions(-) diff --git a/fs/nfs/sysctl.c b/fs/nfs/sysctl.c index 7aea195ddb35..f39e2089bc4c 100644 --- a/fs/nfs/sysctl.c +++ b/fs/nfs/sysctl.c @@ -32,27 +32,9 @@ static struct ctl_table nfs_cb_sysctls[] = { { } }; -static struct ctl_table nfs_cb_sysctl_dir[] = { - { - .procname = "nfs", - .mode = 0555, - .child = nfs_cb_sysctls, - }, - { } -}; - -static struct ctl_table nfs_cb_sysctl_root[] = { - { - .procname = "fs", - .mode = 0555, - .child = nfs_cb_sysctl_dir, - }, - { } -}; - int nfs_register_sysctl(void) { - nfs_callback_sysctl_table = register_sysctl_table(nfs_cb_sysctl_root); + nfs_callback_sysctl_table = register_sysctl("fs/nfs", nfs_cb_sysctls); if (nfs_callback_sysctl_table == NULL) return -ENOMEM; return 0; From 17c6d0ce83406aebc98f8ce16c66f5416106963d Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Sat, 11 Mar 2023 15:39:41 -0800 Subject: [PATCH 05/20] sunrpc: simplify one-level sysctl registration for xr_tunables_table There is no need to declare an extra tables to just create directory, this can be easily be done with a prefix path with register_sysctl(). Simplify this registration. Signed-off-by: Luis Chamberlain Reviewed-by: Jeff Layton Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/transport.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 10bb2b929c6d..29b0562d62e7 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -140,15 +140,6 @@ static struct ctl_table xr_tunables_table[] = { { }, }; -static struct ctl_table sunrpc_table[] = { - { - .procname = "sunrpc", - .mode = 0555, - .child = xr_tunables_table - }, - { }, -}; - #endif static const struct rpc_xprt_ops xprt_rdma_procs; @@ -799,7 +790,7 @@ int xprt_rdma_init(void) #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) if (!sunrpc_table_header) - sunrpc_table_header = register_sysctl_table(sunrpc_table); + sunrpc_table_header = register_sysctl("sunrpc", xr_tunables_table); #endif return 0; } From c946cb69f238b2bd3e42790b8450f76b62b491f4 Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Sat, 11 Mar 2023 15:39:42 -0800 Subject: [PATCH 06/20] sunrpc: simplify one-level sysctl registration for xs_tunables_table There is no need to declare an extra tables to just create directory, this can be easily be done with a prefix path with register_sysctl(). Simplify this registration. Signed-off-by: Luis Chamberlain Reviewed-by: Jeff Layton Signed-off-by: Anna Schumaker --- net/sunrpc/xprtsock.c | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 6cacd70a15ff..5f9030b81c9e 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -78,7 +78,7 @@ static unsigned int xs_tcp_fin_timeout __read_mostly = XS_TCP_LINGER_TO; /* * We can register our own files under /proc/sys/sunrpc by - * calling register_sysctl_table() again. The files in that + * calling register_sysctl() again. The files in that * directory become the union of all files registered there. * * We simply need to make sure that we don't collide with @@ -158,15 +158,6 @@ static struct ctl_table xs_tunables_table[] = { { }, }; -static struct ctl_table sunrpc_table[] = { - { - .procname = "sunrpc", - .mode = 0555, - .child = xs_tunables_table - }, - { }, -}; - /* * Wait duration for a reply from the RPC portmapper. */ @@ -3178,7 +3169,7 @@ static struct xprt_class xs_bc_tcp_transport = { int init_socket_xprt(void) { if (!sunrpc_table_header) - sunrpc_table_header = register_sysctl_table(sunrpc_table); + sunrpc_table_header = register_sysctl("sunrpc", xs_tunables_table); xprt_register_transport(&xs_local_transport); xprt_register_transport(&xs_udp_transport); From 32e356be32b6676d2d641ed4a4fe088c6d211d92 Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Sat, 11 Mar 2023 15:39:43 -0800 Subject: [PATCH 07/20] sunrpc: move sunrpc_table and proc routines above No need to do a forward declaration for sunrpc_table, just move the sysctls up as everyone else does it. This will make the next change easier to read. This change produces no functional changes. Signed-off-by: Luis Chamberlain Reviewed-by: Jeff Layton Signed-off-by: Anna Schumaker --- net/sunrpc/sysctl.c | 35 ++++++++++++++++------------------- 1 file changed, 16 insertions(+), 19 deletions(-) diff --git a/net/sunrpc/sysctl.c b/net/sunrpc/sysctl.c index 3aad6ef18504..afdfcc5403af 100644 --- a/net/sunrpc/sysctl.c +++ b/net/sunrpc/sysctl.c @@ -40,25 +40,6 @@ EXPORT_SYMBOL_GPL(nlm_debug); #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) -static struct ctl_table_header *sunrpc_table_header; -static struct ctl_table sunrpc_table[]; - -void -rpc_register_sysctl(void) -{ - if (!sunrpc_table_header) - sunrpc_table_header = register_sysctl_table(sunrpc_table); -} - -void -rpc_unregister_sysctl(void) -{ - if (sunrpc_table_header) { - unregister_sysctl_table(sunrpc_table_header); - sunrpc_table_header = NULL; - } -} - static int proc_do_xprt(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { @@ -142,6 +123,7 @@ done: return 0; } +static struct ctl_table_header *sunrpc_table_header; static struct ctl_table debug_table[] = { { @@ -190,4 +172,19 @@ static struct ctl_table sunrpc_table[] = { { } }; +void +rpc_register_sysctl(void) +{ + if (!sunrpc_table_header) + sunrpc_table_header = register_sysctl_table(sunrpc_table); +} + +void +rpc_unregister_sysctl(void) +{ + if (sunrpc_table_header) { + unregister_sysctl_table(sunrpc_table_header); + sunrpc_table_header = NULL; + } +} #endif From 703c6d03f165183b97526c182aa1d701b40f0453 Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Sat, 11 Mar 2023 15:39:44 -0800 Subject: [PATCH 08/20] sunrpc: simplify one-level sysctl registration for debug_table There is no need to declare an extra tables to just create directory, this can be easily be done with a prefix path with register_sysctl(). Simplify this registration. Signed-off-by: Luis Chamberlain Reviewed-by: Jeff Layton Signed-off-by: Anna Schumaker --- net/sunrpc/sysctl.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/net/sunrpc/sysctl.c b/net/sunrpc/sysctl.c index afdfcc5403af..93941ab12549 100644 --- a/net/sunrpc/sysctl.c +++ b/net/sunrpc/sysctl.c @@ -163,20 +163,11 @@ static struct ctl_table debug_table[] = { { } }; -static struct ctl_table sunrpc_table[] = { - { - .procname = "sunrpc", - .mode = 0555, - .child = debug_table - }, - { } -}; - void rpc_register_sysctl(void) { if (!sunrpc_table_header) - sunrpc_table_header = register_sysctl_table(sunrpc_table); + sunrpc_table_header = register_sysctl("sunrpc", debug_table); } void From 01c3a40084a42b7a242776d1e9f8e10d4e96c6ef Mon Sep 17 00:00:00 2001 From: Dave Wysochanski Date: Mon, 20 Feb 2023 08:43:04 -0500 Subject: [PATCH 09/20] NFS: Rename readpage_async_filler to nfs_read_add_folio Rename readpage_async_filler to nfs_read_add_folio to better reflect what this function does (add a folio to the nfs_pageio_descriptor), and simplify arguments to this function by removing struct nfs_readdesc. Signed-off-by: Dave Wysochanski Tested-by: Daire Byrne Signed-off-by: Anna Schumaker --- fs/nfs/read.c | 54 +++++++++++++++++++++++++-------------------------- 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/fs/nfs/read.c b/fs/nfs/read.c index e90988591df4..8cb9e8d3c51d 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -127,11 +127,6 @@ static void nfs_readpage_release(struct nfs_page *req, int error) nfs_release_request(req); } -struct nfs_readdesc { - struct nfs_pageio_descriptor pgio; - struct nfs_open_context *ctx; -}; - static void nfs_page_group_set_uptodate(struct nfs_page *req) { if (nfs_page_group_sync_on_bit(req, PG_UPTODATE)) @@ -153,7 +148,8 @@ static void nfs_read_completion(struct nfs_pgio_header *hdr) if (test_bit(NFS_IOHDR_EOF, &hdr->flags)) { /* note: regions of the page not covered by a - * request are zeroed in readpage_async_filler */ + * request are zeroed in nfs_read_add_folio + */ if (bytes > hdr->good_bytes) { /* nothing in this request was good, so zero * the full extent of the request */ @@ -281,7 +277,9 @@ static void nfs_readpage_result(struct rpc_task *task, nfs_readpage_retry(task, hdr); } -static int readpage_async_filler(struct nfs_readdesc *desc, struct folio *folio) +static int nfs_read_add_folio(struct nfs_pageio_descriptor *pgio, + struct nfs_open_context *ctx, + struct folio *folio) { struct inode *inode = folio_file_mapping(folio)->host; struct nfs_server *server = NFS_SERVER(inode); @@ -303,15 +301,15 @@ static int readpage_async_filler(struct nfs_readdesc *desc, struct folio *folio) goto out_unlock; } - new = nfs_page_create_from_folio(desc->ctx, folio, 0, aligned_len); + new = nfs_page_create_from_folio(ctx, folio, 0, aligned_len); if (IS_ERR(new)) goto out_error; if (len < fsize) folio_zero_segment(folio, len, fsize); - if (!nfs_pageio_add_request(&desc->pgio, new)) { + if (!nfs_pageio_add_request(pgio, new)) { nfs_list_remove_request(new); - error = desc->pgio.pg_error; + error = pgio->pg_error; nfs_readpage_release(new, error); goto out; } @@ -332,8 +330,9 @@ out: */ int nfs_read_folio(struct file *file, struct folio *folio) { - struct nfs_readdesc desc; struct inode *inode = file_inode(file); + struct nfs_pageio_descriptor pgio; + struct nfs_open_context *ctx; int ret; trace_nfs_aop_readpage(inode, folio); @@ -357,25 +356,25 @@ int nfs_read_folio(struct file *file, struct folio *folio) if (NFS_STALE(inode)) goto out_unlock; - desc.ctx = get_nfs_open_context(nfs_file_open_context(file)); + ctx = get_nfs_open_context(nfs_file_open_context(file)); - xchg(&desc.ctx->error, 0); - nfs_pageio_init_read(&desc.pgio, inode, false, + xchg(&ctx->error, 0); + nfs_pageio_init_read(&pgio, inode, false, &nfs_async_read_completion_ops); - ret = readpage_async_filler(&desc, folio); + ret = nfs_read_add_folio(&pgio, ctx, folio); if (ret) goto out; - nfs_pageio_complete_read(&desc.pgio); - ret = desc.pgio.pg_error < 0 ? desc.pgio.pg_error : 0; + nfs_pageio_complete_read(&pgio); + ret = pgio.pg_error < 0 ? pgio.pg_error : 0; if (!ret) { ret = folio_wait_locked_killable(folio); if (!folio_test_uptodate(folio) && !ret) - ret = xchg(&desc.ctx->error, 0); + ret = xchg(&ctx->error, 0); } out: - put_nfs_open_context(desc.ctx); + put_nfs_open_context(ctx); trace_nfs_aop_readpage_done(inode, folio, ret); return ret; out_unlock: @@ -386,9 +385,10 @@ out_unlock: void nfs_readahead(struct readahead_control *ractl) { + struct nfs_pageio_descriptor pgio; + struct nfs_open_context *ctx; unsigned int nr_pages = readahead_count(ractl); struct file *file = ractl->file; - struct nfs_readdesc desc; struct inode *inode = ractl->mapping->host; struct folio *folio; int ret; @@ -403,24 +403,24 @@ void nfs_readahead(struct readahead_control *ractl) if (file == NULL) { ret = -EBADF; - desc.ctx = nfs_find_open_context(inode, NULL, FMODE_READ); - if (desc.ctx == NULL) + ctx = nfs_find_open_context(inode, NULL, FMODE_READ); + if (ctx == NULL) goto out; } else - desc.ctx = get_nfs_open_context(nfs_file_open_context(file)); + ctx = get_nfs_open_context(nfs_file_open_context(file)); - nfs_pageio_init_read(&desc.pgio, inode, false, + nfs_pageio_init_read(&pgio, inode, false, &nfs_async_read_completion_ops); while ((folio = readahead_folio(ractl)) != NULL) { - ret = readpage_async_filler(&desc, folio); + ret = nfs_read_add_folio(&pgio, ctx, folio); if (ret) break; } - nfs_pageio_complete_read(&desc.pgio); + nfs_pageio_complete_read(&pgio); - put_nfs_open_context(desc.ctx); + put_nfs_open_context(ctx); out: trace_nfs_aop_readahead_done(inode, nr_pages, ret); } From 88a4d7bdeec97890cd543b58dd3588f1f879f51b Mon Sep 17 00:00:00 2001 From: Dave Wysochanski Date: Mon, 20 Feb 2023 08:43:05 -0500 Subject: [PATCH 10/20] NFS: Configure support for netfs when NFS fscache is configured As first steps for support of the netfs library when NFS_FSCACHE is configured, add NETFS_SUPPORT to Kconfig and add the required netfs_inode into struct nfs_inode. Using netfs requires we move the VFS inode structure to be stored inside struct netfs_inode, along with the fscache_cookie. Thus, if NFS_FSCACHE is configured, place netfs_inode inside an anonymous union so the vfs_inode memory is the same and we do not need to modify other non-fscache areas of NFS. In addition, inside the NFS fscache code, use the new helpers, netfs_inode() and netfs_i_cookie() helpers, and remove our own helper, nfs_i_fscache(). Later patches will convert NFS fscache to fully use netfs. Signed-off-by: Dave Wysochanski Tested-by: Daire Byrne Signed-off-by: Anna Schumaker --- fs/nfs/Kconfig | 1 + fs/nfs/fscache.c | 20 +++++++++----------- fs/nfs/fscache.h | 15 ++++++--------- include/linux/nfs_fs.h | 24 ++++++++++-------------- 4 files changed, 26 insertions(+), 34 deletions(-) diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index c1c7ed2fd860..b6fc169be1b1 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig @@ -170,6 +170,7 @@ config ROOT_NFS config NFS_FSCACHE bool "Provide NFS client caching support" depends on NFS_FS=m && FSCACHE || NFS_FS=y && FSCACHE=y + select NETFS_SUPPORT help Say Y here if you want NFS data to be cached locally on disc through the general filesystem cache manager diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c index ea5f2976dfab..3b4dafa771bd 100644 --- a/fs/nfs/fscache.c +++ b/fs/nfs/fscache.c @@ -163,13 +163,14 @@ void nfs_fscache_init_inode(struct inode *inode) struct nfs_server *nfss = NFS_SERVER(inode); struct nfs_inode *nfsi = NFS_I(inode); - nfsi->fscache = NULL; + netfs_inode(inode)->cache = NULL; if (!(nfss->fscache && S_ISREG(inode->i_mode))) return; nfs_fscache_update_auxdata(&auxdata, inode); - nfsi->fscache = fscache_acquire_cookie(NFS_SB(inode->i_sb)->fscache, + netfs_inode(inode)->cache = fscache_acquire_cookie( + nfss->fscache, 0, nfsi->fh.data, /* index_key */ nfsi->fh.size, @@ -183,11 +184,8 @@ void nfs_fscache_init_inode(struct inode *inode) */ void nfs_fscache_clear_inode(struct inode *inode) { - struct nfs_inode *nfsi = NFS_I(inode); - struct fscache_cookie *cookie = nfs_i_fscache(inode); - - fscache_relinquish_cookie(cookie, false); - nfsi->fscache = NULL; + fscache_relinquish_cookie(netfs_i_cookie(netfs_inode(inode)), false); + netfs_inode(inode)->cache = NULL; } /* @@ -212,7 +210,7 @@ void nfs_fscache_clear_inode(struct inode *inode) void nfs_fscache_open_file(struct inode *inode, struct file *filp) { struct nfs_fscache_inode_auxdata auxdata; - struct fscache_cookie *cookie = nfs_i_fscache(inode); + struct fscache_cookie *cookie = netfs_i_cookie(netfs_inode(inode)); bool open_for_write = inode_is_open_for_write(inode); if (!fscache_cookie_valid(cookie)) @@ -230,7 +228,7 @@ EXPORT_SYMBOL_GPL(nfs_fscache_open_file); void nfs_fscache_release_file(struct inode *inode, struct file *filp) { struct nfs_fscache_inode_auxdata auxdata; - struct fscache_cookie *cookie = nfs_i_fscache(inode); + struct fscache_cookie *cookie = netfs_i_cookie(netfs_inode(inode)); loff_t i_size = i_size_read(inode); nfs_fscache_update_auxdata(&auxdata, inode); @@ -243,7 +241,7 @@ void nfs_fscache_release_file(struct inode *inode, struct file *filp) static int fscache_fallback_read_page(struct inode *inode, struct page *page) { struct netfs_cache_resources cres; - struct fscache_cookie *cookie = nfs_i_fscache(inode); + struct fscache_cookie *cookie = netfs_i_cookie(&NFS_I(inode)->netfs); struct iov_iter iter; struct bio_vec bvec; int ret; @@ -269,7 +267,7 @@ static int fscache_fallback_write_page(struct inode *inode, struct page *page, bool no_space_allocated_yet) { struct netfs_cache_resources cres; - struct fscache_cookie *cookie = nfs_i_fscache(inode); + struct fscache_cookie *cookie = netfs_i_cookie(&NFS_I(inode)->netfs); struct iov_iter iter; struct bio_vec bvec; loff_t start = page_offset(page); diff --git a/fs/nfs/fscache.h b/fs/nfs/fscache.h index 2a37af880978..38614ed8f951 100644 --- a/fs/nfs/fscache.h +++ b/fs/nfs/fscache.h @@ -54,7 +54,7 @@ static inline bool nfs_fscache_release_folio(struct folio *folio, gfp_t gfp) if (current_is_kswapd() || !(gfp & __GFP_FS)) return false; folio_wait_fscache(folio); - fscache_note_page_release(nfs_i_fscache(folio->mapping->host)); + fscache_note_page_release(netfs_i_cookie(&NFS_I(folio->mapping->host)->netfs)); nfs_inc_fscache_stats(folio->mapping->host, NFSIOS_FSCACHE_PAGES_UNCACHED); } @@ -66,7 +66,7 @@ static inline bool nfs_fscache_release_folio(struct folio *folio, gfp_t gfp) */ static inline int nfs_fscache_read_page(struct inode *inode, struct page *page) { - if (nfs_i_fscache(inode)) + if (netfs_inode(inode)->cache) return __nfs_fscache_read_page(inode, page); return -ENOBUFS; } @@ -78,7 +78,7 @@ static inline int nfs_fscache_read_page(struct inode *inode, struct page *page) static inline void nfs_fscache_write_page(struct inode *inode, struct page *page) { - if (nfs_i_fscache(inode)) + if (netfs_inode(inode)->cache) __nfs_fscache_write_page(inode, page); } @@ -101,13 +101,10 @@ static inline void nfs_fscache_update_auxdata(struct nfs_fscache_inode_auxdata * static inline void nfs_fscache_invalidate(struct inode *inode, int flags) { struct nfs_fscache_inode_auxdata auxdata; - struct nfs_inode *nfsi = NFS_I(inode); + struct fscache_cookie *cookie = netfs_i_cookie(&NFS_I(inode)->netfs); - if (nfsi->fscache) { - nfs_fscache_update_auxdata(&auxdata, inode); - fscache_invalidate(nfsi->fscache, &auxdata, - i_size_read(inode), flags); - } + nfs_fscache_update_auxdata(&auxdata, inode); + fscache_invalidate(cookie, &auxdata, i_size_read(inode), flags); } /* diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index bf89fe6fc3ba..041e79a48f7a 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -31,6 +31,10 @@ #include #include +#ifdef CONFIG_NFS_FSCACHE +#include +#endif + #include #include #include @@ -204,14 +208,15 @@ struct nfs_inode { /* how many bytes have been written/read and how many bytes queued up */ __u64 write_io; __u64 read_io; -#ifdef CONFIG_NFS_FSCACHE - struct fscache_cookie *fscache; -#endif - struct inode vfs_inode; - #ifdef CONFIG_NFS_V4_2 struct nfs4_xattr_cache *xattr_cache; #endif + union { + struct inode vfs_inode; +#ifdef CONFIG_NFS_FSCACHE + struct netfs_inode netfs; /* netfs context and VFS inode */ +#endif + }; }; struct nfs4_copy_state { @@ -329,15 +334,6 @@ static inline int NFS_STALE(const struct inode *inode) return test_bit(NFS_INO_STALE, &NFS_I(inode)->flags); } -static inline struct fscache_cookie *nfs_i_fscache(struct inode *inode) -{ -#ifdef CONFIG_NFS_FSCACHE - return NFS_I(inode)->fscache; -#else - return NULL; -#endif -} - static inline __u64 NFS_FILEID(const struct inode *inode) { return NFS_I(inode)->fileid; From 000dbe0bec058cbf2ca9e156e4a5584f5158b0f9 Mon Sep 17 00:00:00 2001 From: Dave Wysochanski Date: Mon, 20 Feb 2023 08:43:06 -0500 Subject: [PATCH 11/20] NFS: Convert buffered read paths to use netfs when fscache is enabled Convert the NFS buffered read code paths to corresponding netfs APIs, but only when fscache is configured and enabled. The netfs API defines struct netfs_request_ops which must be filled in by the network filesystem. For NFS, we only need to define 5 of the functions, the main one being the issue_read() function. The issue_read() function is called by the netfs layer when a read cannot be fulfilled locally, and must be sent to the server (either the cache is not active, or it is active but the data is not available). Once the read from the server is complete, netfs requires a call to netfs_subreq_terminated() which conveys either how many bytes were read successfully, or an error. Note that issue_read() is called with a structure, netfs_io_subrequest, which defines the IO requested, and contains a start and a length (both in bytes), and assumes the underlying netfs will return a either an error on the whole region, or the number of bytes successfully read. The NFS IO path is page based and the main APIs are the pgio APIs defined in pagelist.c. For the pgio APIs, there is no way for the caller to know how many RPCs will be sent and how the pages will be broken up into underlying RPCs, each of which will have their own completion and return code. In contrast, netfs is subrequest based, a single subrequest may contain multiple pages, and a single subrequest is initiated with issue_read() and terminated with netfs_subreq_terminated(). Thus, to utilze the netfs APIs, NFS needs some way to accommodate the netfs API requirement on the single response to the whole subrequest, while also minimizing disruptive changes to the NFS pgio layer. The approach taken with this patch is to allocate a small structure for each nfs_netfs_issue_read() call, store the final error and number of bytes successfully transferred in the structure, and update these values as each RPC completes. The refcount on the structure is used as a marker for the last RPC completion, is incremented in nfs_netfs_read_initiate(), and decremented inside nfs_netfs_read_completion(), when a nfs_pgio_header contains a valid pointer to the data. On the final put (which signals the final outstanding RPC is complete) in nfs_netfs_read_completion(), call netfs_subreq_terminated() with either the final error value (if one or more READs complete with an error) or the number of bytes successfully transferred (if all RPCs complete successfully). Note that when all RPCs complete successfully, the number of bytes transferred is capped to the length of the subrequest. Capping the transferred length to the subrequest length prevents "Subreq overread" warnings from netfs. This is due to the "aligned_len" in nfs_pageio_add_page(), and the corner case where NFS requests a full page at the end of the file, even when i_size reflects only a partial page (NFS overread). Signed-off-by: Dave Wysochanski Tested-by: Daire Byrne Signed-off-by: Anna Schumaker --- fs/nfs/fscache.c | 220 ++++++++++++++++++++++++--------------- fs/nfs/fscache.h | 122 ++++++++++++++++------ fs/nfs/inode.c | 2 + fs/nfs/internal.h | 9 ++ fs/nfs/pagelist.c | 4 + fs/nfs/read.c | 61 ++++++----- include/linux/nfs_page.h | 3 + include/linux/nfs_xdr.h | 3 + 8 files changed, 273 insertions(+), 151 deletions(-) diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c index 3b4dafa771bd..95c2b3056e2b 100644 --- a/fs/nfs/fscache.c +++ b/fs/nfs/fscache.c @@ -15,6 +15,9 @@ #include #include #include +#include +#include +#include #include "internal.h" #include "iostat.h" @@ -235,108 +238,153 @@ void nfs_fscache_release_file(struct inode *inode, struct file *filp) fscache_unuse_cookie(cookie, &auxdata, &i_size); } -/* - * Fallback page reading interface. - */ -static int fscache_fallback_read_page(struct inode *inode, struct page *page) +int nfs_netfs_read_folio(struct file *file, struct folio *folio) { - struct netfs_cache_resources cres; - struct fscache_cookie *cookie = netfs_i_cookie(&NFS_I(inode)->netfs); - struct iov_iter iter; - struct bio_vec bvec; - int ret; + if (!netfs_inode(folio_inode(folio))->cache) + return -ENOBUFS; - memset(&cres, 0, sizeof(cres)); - bvec_set_page(&bvec, page, PAGE_SIZE, 0); - iov_iter_bvec(&iter, ITER_DEST, &bvec, 1, PAGE_SIZE); - - ret = fscache_begin_read_operation(&cres, cookie); - if (ret < 0) - return ret; - - ret = fscache_read(&cres, page_offset(page), &iter, NETFS_READ_HOLE_FAIL, - NULL, NULL); - fscache_end_operation(&cres); - return ret; + return netfs_read_folio(file, folio); } -/* - * Fallback page writing interface. - */ -static int fscache_fallback_write_page(struct inode *inode, struct page *page, - bool no_space_allocated_yet) +int nfs_netfs_readahead(struct readahead_control *ractl) { - struct netfs_cache_resources cres; - struct fscache_cookie *cookie = netfs_i_cookie(&NFS_I(inode)->netfs); - struct iov_iter iter; - struct bio_vec bvec; - loff_t start = page_offset(page); - size_t len = PAGE_SIZE; - int ret; + struct inode *inode = ractl->mapping->host; - memset(&cres, 0, sizeof(cres)); - bvec_set_page(&bvec, page, PAGE_SIZE, 0); - iov_iter_bvec(&iter, ITER_SOURCE, &bvec, 1, PAGE_SIZE); + if (!netfs_inode(inode)->cache) + return -ENOBUFS; - ret = fscache_begin_write_operation(&cres, cookie); - if (ret < 0) - return ret; - - ret = cres.ops->prepare_write(&cres, &start, &len, i_size_read(inode), - no_space_allocated_yet); - if (ret == 0) - ret = fscache_write(&cres, page_offset(page), &iter, NULL, NULL); - fscache_end_operation(&cres); - return ret; + netfs_readahead(ractl); + return 0; } -/* - * Retrieve a page from fscache - */ -int __nfs_fscache_read_page(struct inode *inode, struct page *page) +atomic_t nfs_netfs_debug_id; +static int nfs_netfs_init_request(struct netfs_io_request *rreq, struct file *file) { - int ret; + rreq->netfs_priv = get_nfs_open_context(nfs_file_open_context(file)); + rreq->debug_id = atomic_inc_return(&nfs_netfs_debug_id); - trace_nfs_fscache_read_page(inode, page); - if (PageChecked(page)) { - ClearPageChecked(page); - ret = 1; - goto out; + return 0; +} + +static void nfs_netfs_free_request(struct netfs_io_request *rreq) +{ + put_nfs_open_context(rreq->netfs_priv); +} + +static inline int nfs_netfs_begin_cache_operation(struct netfs_io_request *rreq) +{ + return fscache_begin_read_operation(&rreq->cache_resources, + netfs_i_cookie(netfs_inode(rreq->inode))); +} + +static struct nfs_netfs_io_data *nfs_netfs_alloc(struct netfs_io_subrequest *sreq) +{ + struct nfs_netfs_io_data *netfs; + + netfs = kzalloc(sizeof(*netfs), GFP_KERNEL_ACCOUNT); + if (!netfs) + return NULL; + netfs->sreq = sreq; + refcount_set(&netfs->refcount, 1); + return netfs; +} + +static bool nfs_netfs_clamp_length(struct netfs_io_subrequest *sreq) +{ + size_t rsize = NFS_SB(sreq->rreq->inode->i_sb)->rsize; + + sreq->len = min(sreq->len, rsize); + return true; +} + +static void nfs_netfs_issue_read(struct netfs_io_subrequest *sreq) +{ + struct nfs_netfs_io_data *netfs; + struct nfs_pageio_descriptor pgio; + struct inode *inode = sreq->rreq->inode; + struct nfs_open_context *ctx = sreq->rreq->netfs_priv; + struct page *page; + int err; + pgoff_t start = (sreq->start + sreq->transferred) >> PAGE_SHIFT; + pgoff_t last = ((sreq->start + sreq->len - + sreq->transferred - 1) >> PAGE_SHIFT); + XA_STATE(xas, &sreq->rreq->mapping->i_pages, start); + + nfs_pageio_init_read(&pgio, inode, false, + &nfs_async_read_completion_ops); + + netfs = nfs_netfs_alloc(sreq); + if (!netfs) + return netfs_subreq_terminated(sreq, -ENOMEM, false); + + pgio.pg_netfs = netfs; /* used in completion */ + + xas_lock(&xas); + xas_for_each(&xas, page, last) { + /* nfs_read_add_folio() may schedule() due to pNFS layout and other RPCs */ + xas_pause(&xas); + xas_unlock(&xas); + err = nfs_read_add_folio(&pgio, ctx, page_folio(page)); + if (err < 0) { + netfs->error = err; + goto out; + } + xas_lock(&xas); } - - ret = fscache_fallback_read_page(inode, page); - if (ret < 0) { - nfs_inc_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_READ_FAIL); - SetPageChecked(page); - goto out; - } - - /* Read completed synchronously */ - nfs_inc_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_READ_OK); - SetPageUptodate(page); - ret = 0; + xas_unlock(&xas); out: - trace_nfs_fscache_read_page_exit(inode, page, ret); - return ret; + nfs_pageio_complete_read(&pgio); + nfs_netfs_put(netfs); } -/* - * Store a newly fetched page in fscache. We can be certain there's no page - * stored in the cache as yet otherwise we would've read it from there. - */ -void __nfs_fscache_write_page(struct inode *inode, struct page *page) +void nfs_netfs_initiate_read(struct nfs_pgio_header *hdr) { - int ret; + struct nfs_netfs_io_data *netfs = hdr->netfs; - trace_nfs_fscache_write_page(inode, page); + if (!netfs) + return; - ret = fscache_fallback_write_page(inode, page, true); - - if (ret != 0) { - nfs_inc_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_WRITTEN_FAIL); - nfs_inc_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_UNCACHED); - } else { - nfs_inc_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_WRITTEN_OK); - } - trace_nfs_fscache_write_page_exit(inode, page, ret); + nfs_netfs_get(netfs); } + +int nfs_netfs_folio_unlock(struct folio *folio) +{ + struct inode *inode = folio_file_mapping(folio)->host; + + /* + * If fscache is enabled, netfs will unlock pages. + */ + if (netfs_inode(inode)->cache) + return 0; + + return 1; +} + +void nfs_netfs_read_completion(struct nfs_pgio_header *hdr) +{ + struct nfs_netfs_io_data *netfs = hdr->netfs; + struct netfs_io_subrequest *sreq; + + if (!netfs) + return; + + sreq = netfs->sreq; + if (test_bit(NFS_IOHDR_EOF, &hdr->flags)) + __set_bit(NETFS_SREQ_CLEAR_TAIL, &sreq->flags); + + if (hdr->error) + netfs->error = hdr->error; + else + atomic64_add(hdr->res.count, &netfs->transferred); + + nfs_netfs_put(netfs); + hdr->netfs = NULL; +} + +const struct netfs_request_ops nfs_netfs_ops = { + .init_request = nfs_netfs_init_request, + .free_request = nfs_netfs_free_request, + .begin_cache_operation = nfs_netfs_begin_cache_operation, + .issue_read = nfs_netfs_issue_read, + .clamp_length = nfs_netfs_clamp_length +}; diff --git a/fs/nfs/fscache.h b/fs/nfs/fscache.h index 38614ed8f951..e1706e736c64 100644 --- a/fs/nfs/fscache.h +++ b/fs/nfs/fscache.h @@ -34,6 +34,58 @@ struct nfs_fscache_inode_auxdata { u64 change_attr; }; +struct nfs_netfs_io_data { + /* + * NFS may split a netfs_io_subrequest into multiple RPCs, each + * with their own read completion. In netfs, we can only call + * netfs_subreq_terminated() once for each subrequest. Use the + * refcount here to double as a marker of the last RPC completion, + * and only call netfs via netfs_subreq_terminated() once. + */ + refcount_t refcount; + struct netfs_io_subrequest *sreq; + + /* + * Final disposition of the netfs_io_subrequest, sent in + * netfs_subreq_terminated() + */ + atomic64_t transferred; + int error; +}; + +static inline void nfs_netfs_get(struct nfs_netfs_io_data *netfs) +{ + refcount_inc(&netfs->refcount); +} + +static inline void nfs_netfs_put(struct nfs_netfs_io_data *netfs) +{ + ssize_t final_len; + + /* Only the last RPC completion should call netfs_subreq_terminated() */ + if (!refcount_dec_and_test(&netfs->refcount)) + return; + + /* + * The NFS pageio interface may read a complete page, even when netfs + * only asked for a partial page. Specifically, this may be seen when + * one thread is truncating a file while another one is reading the last + * page of the file. + * Correct the final length here to be no larger than the netfs subrequest + * length, and thus avoid netfs's "Subreq overread" warning message. + */ + final_len = min_t(s64, netfs->sreq->len, atomic64_read(&netfs->transferred)); + netfs_subreq_terminated(netfs->sreq, netfs->error ?: final_len, false); + kfree(netfs); +} +static inline void nfs_netfs_inode_init(struct nfs_inode *nfsi) +{ + netfs_inode_init(&nfsi->netfs, &nfs_netfs_ops); +} +extern void nfs_netfs_initiate_read(struct nfs_pgio_header *hdr); +extern void nfs_netfs_read_completion(struct nfs_pgio_header *hdr); +extern int nfs_netfs_folio_unlock(struct folio *folio); + /* * fscache.c */ @@ -44,9 +96,8 @@ extern void nfs_fscache_init_inode(struct inode *); extern void nfs_fscache_clear_inode(struct inode *); extern void nfs_fscache_open_file(struct inode *, struct file *); extern void nfs_fscache_release_file(struct inode *, struct file *); - -extern int __nfs_fscache_read_page(struct inode *, struct page *); -extern void __nfs_fscache_write_page(struct inode *, struct page *); +extern int nfs_netfs_readahead(struct readahead_control *ractl); +extern int nfs_netfs_read_folio(struct file *file, struct folio *folio); static inline bool nfs_fscache_release_folio(struct folio *folio, gfp_t gfp) { @@ -54,34 +105,11 @@ static inline bool nfs_fscache_release_folio(struct folio *folio, gfp_t gfp) if (current_is_kswapd() || !(gfp & __GFP_FS)) return false; folio_wait_fscache(folio); - fscache_note_page_release(netfs_i_cookie(&NFS_I(folio->mapping->host)->netfs)); - nfs_inc_fscache_stats(folio->mapping->host, - NFSIOS_FSCACHE_PAGES_UNCACHED); } + fscache_note_page_release(netfs_i_cookie(netfs_inode(folio->mapping->host))); return true; } -/* - * Retrieve a page from an inode data storage object. - */ -static inline int nfs_fscache_read_page(struct inode *inode, struct page *page) -{ - if (netfs_inode(inode)->cache) - return __nfs_fscache_read_page(inode, page); - return -ENOBUFS; -} - -/* - * Store a page newly fetched from the server in an inode data storage object - * in the cache. - */ -static inline void nfs_fscache_write_page(struct inode *inode, - struct page *page) -{ - if (netfs_inode(inode)->cache) - __nfs_fscache_write_page(inode, page); -} - static inline void nfs_fscache_update_auxdata(struct nfs_fscache_inode_auxdata *auxdata, struct inode *inode) { @@ -117,7 +145,28 @@ static inline const char *nfs_server_fscache_state(struct nfs_server *server) return "no "; } +static inline void nfs_netfs_set_pgio_header(struct nfs_pgio_header *hdr, + struct nfs_pageio_descriptor *desc) +{ + hdr->netfs = desc->pg_netfs; +} +static inline void nfs_netfs_set_pageio_descriptor(struct nfs_pageio_descriptor *desc, + struct nfs_pgio_header *hdr) +{ + desc->pg_netfs = hdr->netfs; +} +static inline void nfs_netfs_reset_pageio_descriptor(struct nfs_pageio_descriptor *desc) +{ + desc->pg_netfs = NULL; +} #else /* CONFIG_NFS_FSCACHE */ +static inline void nfs_netfs_inode_init(struct nfs_inode *nfsi) {} +static inline void nfs_netfs_initiate_read(struct nfs_pgio_header *hdr) {} +static inline void nfs_netfs_read_completion(struct nfs_pgio_header *hdr) {} +static inline int nfs_netfs_folio_unlock(struct folio *folio) +{ + return 1; +} static inline void nfs_fscache_release_super_cookie(struct super_block *sb) {} static inline void nfs_fscache_init_inode(struct inode *inode) {} @@ -125,22 +174,29 @@ static inline void nfs_fscache_clear_inode(struct inode *inode) {} static inline void nfs_fscache_open_file(struct inode *inode, struct file *filp) {} static inline void nfs_fscache_release_file(struct inode *inode, struct file *file) {} +static inline int nfs_netfs_readahead(struct readahead_control *ractl) +{ + return -ENOBUFS; +} +static inline int nfs_netfs_read_folio(struct file *file, struct folio *folio) +{ + return -ENOBUFS; +} static inline bool nfs_fscache_release_folio(struct folio *folio, gfp_t gfp) { return true; /* may release folio */ } -static inline int nfs_fscache_read_page(struct inode *inode, struct page *page) -{ - return -ENOBUFS; -} -static inline void nfs_fscache_write_page(struct inode *inode, struct page *page) {} static inline void nfs_fscache_invalidate(struct inode *inode, int flags) {} static inline const char *nfs_server_fscache_state(struct nfs_server *server) { return "no "; } - +static inline void nfs_netfs_set_pgio_header(struct nfs_pgio_header *hdr, + struct nfs_pageio_descriptor *desc) {} +static inline void nfs_netfs_set_pageio_descriptor(struct nfs_pageio_descriptor *desc, + struct nfs_pgio_header *hdr) {} +static inline void nfs_netfs_reset_pageio_descriptor(struct nfs_pageio_descriptor *desc) {} #endif /* CONFIG_NFS_FSCACHE */ #endif /* _NFS_FSCACHE_H */ diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 222a28320e1c..5c8027e3c961 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -2254,6 +2254,8 @@ struct inode *nfs_alloc_inode(struct super_block *sb) #ifdef CONFIG_NFS_V4_2 nfsi->xattr_cache = NULL; #endif + nfs_netfs_inode_init(nfsi); + return &nfsi->vfs_inode; } EXPORT_SYMBOL_GPL(nfs_alloc_inode); diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 2a65fe2a63ab..9ba07553eab8 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -452,6 +452,10 @@ extern void nfs_sb_deactive(struct super_block *sb); extern int nfs_client_for_each_server(struct nfs_client *clp, int (*fn)(struct nfs_server *, void *), void *data); +#ifdef CONFIG_NFS_FSCACHE +extern const struct netfs_request_ops nfs_netfs_ops; +#endif + /* io.c */ extern void nfs_start_io_read(struct inode *inode); extern void nfs_end_io_read(struct inode *inode); @@ -481,9 +485,14 @@ extern int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh, bool struct nfs_pgio_completion_ops; /* read.c */ +extern const struct nfs_pgio_completion_ops nfs_async_read_completion_ops; extern void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode, bool force_mds, const struct nfs_pgio_completion_ops *compl_ops); +extern int nfs_read_add_folio(struct nfs_pageio_descriptor *pgio, + struct nfs_open_context *ctx, + struct folio *folio); +extern void nfs_pageio_complete_read(struct nfs_pageio_descriptor *pgio); extern void nfs_read_prepare(struct rpc_task *task, void *calldata); extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio); diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 64fa8de199de..6efb5068c116 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -26,6 +26,7 @@ #include "internal.h" #include "pnfs.h" #include "nfstrace.h" +#include "fscache.h" #define NFSDBG_FACILITY NFSDBG_PAGECACHE @@ -105,6 +106,7 @@ void nfs_pgheader_init(struct nfs_pageio_descriptor *desc, hdr->good_bytes = mirror->pg_count; hdr->io_completion = desc->pg_io_completion; hdr->dreq = desc->pg_dreq; + nfs_netfs_set_pgio_header(hdr, desc); hdr->release = release; hdr->completion_ops = desc->pg_completion_ops; if (hdr->completion_ops->init_hdr) @@ -941,6 +943,7 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc, desc->pg_lseg = NULL; desc->pg_io_completion = NULL; desc->pg_dreq = NULL; + nfs_netfs_reset_pageio_descriptor(desc); desc->pg_bsize = bsize; desc->pg_mirror_count = 1; @@ -1477,6 +1480,7 @@ int nfs_pageio_resend(struct nfs_pageio_descriptor *desc, desc->pg_io_completion = hdr->io_completion; desc->pg_dreq = hdr->dreq; + nfs_netfs_set_pageio_descriptor(desc, hdr); list_splice_init(&hdr->pages, &pages); while (!list_empty(&pages)) { struct nfs_page *req = nfs_list_entry(pages.next); diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 8cb9e8d3c51d..f71eeee67e20 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -31,7 +31,7 @@ #define NFSDBG_FACILITY NFSDBG_PAGECACHE -static const struct nfs_pgio_completion_ops nfs_async_read_completion_ops; +const struct nfs_pgio_completion_ops nfs_async_read_completion_ops; static const struct nfs_rw_ops nfs_rw_read_ops; static struct kmem_cache *nfs_rdata_cachep; @@ -74,7 +74,7 @@ void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, } EXPORT_SYMBOL_GPL(nfs_pageio_init_read); -static void nfs_pageio_complete_read(struct nfs_pageio_descriptor *pgio) +void nfs_pageio_complete_read(struct nfs_pageio_descriptor *pgio) { struct nfs_pgio_mirror *pgm; unsigned long npages; @@ -110,20 +110,14 @@ EXPORT_SYMBOL_GPL(nfs_pageio_reset_read_mds); static void nfs_readpage_release(struct nfs_page *req, int error) { - struct inode *inode = d_inode(nfs_req_openctx(req)->dentry); struct folio *folio = nfs_page_to_folio(req); - dprintk("NFS: read done (%s/%llu %d@%lld)\n", inode->i_sb->s_id, - (unsigned long long)NFS_FILEID(inode), req->wb_bytes, - (long long)req_offset(req)); - if (nfs_error_is_fatal_on_server(error) && error != -ETIMEDOUT) folio_set_error(folio); - if (nfs_page_group_sync_on_bit(req, PG_UNLOCKPAGE)) { - if (folio_test_uptodate(folio)) - nfs_fscache_write_page(inode, &folio->page); - folio_unlock(folio); - } + if (nfs_page_group_sync_on_bit(req, PG_UNLOCKPAGE)) + if (nfs_netfs_folio_unlock(folio)) + folio_unlock(folio); + nfs_release_request(req); } @@ -177,6 +171,8 @@ static void nfs_read_completion(struct nfs_pgio_header *hdr) nfs_list_remove_request(req); nfs_readpage_release(req, error); } + nfs_netfs_read_completion(hdr); + out: hdr->release(hdr); } @@ -187,6 +183,7 @@ static void nfs_initiate_read(struct nfs_pgio_header *hdr, struct rpc_task_setup *task_setup_data, int how) { rpc_ops->read_setup(hdr, msg); + nfs_netfs_initiate_read(hdr); trace_nfs_initiate_read(hdr); } @@ -202,7 +199,7 @@ nfs_async_read_error(struct list_head *head, int error) } } -static const struct nfs_pgio_completion_ops nfs_async_read_completion_ops = { +const struct nfs_pgio_completion_ops nfs_async_read_completion_ops = { .error_cleanup = nfs_async_read_error, .completion = nfs_read_completion, }; @@ -277,9 +274,9 @@ static void nfs_readpage_result(struct rpc_task *task, nfs_readpage_retry(task, hdr); } -static int nfs_read_add_folio(struct nfs_pageio_descriptor *pgio, - struct nfs_open_context *ctx, - struct folio *folio) +int nfs_read_add_folio(struct nfs_pageio_descriptor *pgio, + struct nfs_open_context *ctx, + struct folio *folio) { struct inode *inode = folio_file_mapping(folio)->host; struct nfs_server *server = NFS_SERVER(inode); @@ -295,15 +292,11 @@ static int nfs_read_add_folio(struct nfs_pageio_descriptor *pgio, aligned_len = min_t(unsigned int, ALIGN(len, rsize), fsize); - if (!IS_SYNC(inode)) { - error = nfs_fscache_read_page(inode, &folio->page); - if (error == 0) - goto out_unlock; - } - new = nfs_page_create_from_folio(ctx, folio, 0, aligned_len); - if (IS_ERR(new)) - goto out_error; + if (IS_ERR(new)) { + error = PTR_ERR(new); + goto out; + } if (len < fsize) folio_zero_segment(folio, len, fsize); @@ -314,10 +307,6 @@ static int nfs_read_add_folio(struct nfs_pageio_descriptor *pgio, goto out; } return 0; -out_error: - error = PTR_ERR(new); -out_unlock: - folio_unlock(folio); out: return error; } @@ -356,6 +345,10 @@ int nfs_read_folio(struct file *file, struct folio *folio) if (NFS_STALE(inode)) goto out_unlock; + ret = nfs_netfs_read_folio(file, folio); + if (!ret) + goto out; + ctx = get_nfs_open_context(nfs_file_open_context(file)); xchg(&ctx->error, 0); @@ -364,7 +357,7 @@ int nfs_read_folio(struct file *file, struct folio *folio) ret = nfs_read_add_folio(&pgio, ctx, folio); if (ret) - goto out; + goto out_put; nfs_pageio_complete_read(&pgio); ret = pgio.pg_error < 0 ? pgio.pg_error : 0; @@ -373,14 +366,14 @@ int nfs_read_folio(struct file *file, struct folio *folio) if (!folio_test_uptodate(folio) && !ret) ret = xchg(&ctx->error, 0); } -out: +out_put: put_nfs_open_context(ctx); +out: trace_nfs_aop_readpage_done(inode, folio, ret); return ret; out_unlock: folio_unlock(folio); - trace_nfs_aop_readpage_done(inode, folio, ret); - return ret; + goto out; } void nfs_readahead(struct readahead_control *ractl) @@ -401,6 +394,10 @@ void nfs_readahead(struct readahead_control *ractl) if (NFS_STALE(inode)) goto out; + ret = nfs_netfs_readahead(ractl); + if (!ret) + goto out; + if (file == NULL) { ret = -EBADF; ctx = nfs_find_open_context(inode, NULL, FMODE_READ); diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index a2f1ca657623..aa9f4c6ebe26 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -105,6 +105,9 @@ struct nfs_pageio_descriptor { struct pnfs_layout_segment *pg_lseg; struct nfs_io_completion *pg_io_completion; struct nfs_direct_req *pg_dreq; +#ifdef CONFIG_NFS_FSCACHE + void *pg_netfs; +#endif unsigned int pg_bsize; /* default bsize for mirrors */ u32 pg_mirror_count; diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index e86cf6642d21..e196ef595908 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1619,6 +1619,9 @@ struct nfs_pgio_header { const struct nfs_rw_ops *rw_ops; struct nfs_io_completion *io_completion; struct nfs_direct_req *dreq; +#ifdef CONFIG_NFS_FSCACHE + void *netfs; +#endif int pnfs_error; int error; /* merge with pnfs_error */ From 0631d5e02a1c722b90c11a47dcb6d83ef88ab47b Mon Sep 17 00:00:00 2001 From: Dave Wysochanski Date: Mon, 20 Feb 2023 08:43:07 -0500 Subject: [PATCH 12/20] NFS: Remove all NFSIOS_FSCACHE counters due to conversion to netfs API The old NFSIOS_FSCACHE counters are no longer accurate or useful with the conversion to the new netfs API. The new API does not have a page based interface, and so the counters in nfs_stat_fscachecounters are no longer obtainable. The new netfs the API has extensive statistics inside /proc/fs/fscache/stats so we no longer need NFS specific fscache stats. Note this also removes the 'fsc:' line from /proc/self/mountstats so it will be a user-visible change. Signed-off-by: Dave Wysochanski Reviewed-by: Jeff Layton Tested-by: Daire Byrne Signed-off-by: Anna Schumaker --- fs/nfs/iostat.h | 17 ----------------- fs/nfs/super.c | 11 ----------- include/linux/nfs_iostat.h | 12 ------------ 3 files changed, 40 deletions(-) diff --git a/fs/nfs/iostat.h b/fs/nfs/iostat.h index 2ddaab1ac653..5aa776b5a3e7 100644 --- a/fs/nfs/iostat.h +++ b/fs/nfs/iostat.h @@ -17,9 +17,6 @@ struct nfs_iostats { unsigned long long bytes[__NFSIOS_BYTESMAX]; -#ifdef CONFIG_NFS_FSCACHE - unsigned long long fscache[__NFSIOS_FSCACHEMAX]; -#endif unsigned long events[__NFSIOS_COUNTSMAX]; } ____cacheline_aligned; @@ -49,20 +46,6 @@ static inline void nfs_add_stats(const struct inode *inode, nfs_add_server_stats(NFS_SERVER(inode), stat, addend); } -#ifdef CONFIG_NFS_FSCACHE -static inline void nfs_add_fscache_stats(struct inode *inode, - enum nfs_stat_fscachecounters stat, - long addend) -{ - this_cpu_add(NFS_SERVER(inode)->io_stats->fscache[stat], addend); -} -static inline void nfs_inc_fscache_stats(struct inode *inode, - enum nfs_stat_fscachecounters stat) -{ - this_cpu_inc(NFS_SERVER(inode)->io_stats->fscache[stat]); -} -#endif - static inline struct nfs_iostats __percpu *nfs_alloc_iostats(void) { return alloc_percpu(struct nfs_iostats); diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 05ae23657527..90796db8c205 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -692,10 +692,6 @@ int nfs_show_stats(struct seq_file *m, struct dentry *root) totals.events[i] += stats->events[i]; for (i = 0; i < __NFSIOS_BYTESMAX; i++) totals.bytes[i] += stats->bytes[i]; -#ifdef CONFIG_NFS_FSCACHE - for (i = 0; i < __NFSIOS_FSCACHEMAX; i++) - totals.fscache[i] += stats->fscache[i]; -#endif preempt_enable(); } @@ -706,13 +702,6 @@ int nfs_show_stats(struct seq_file *m, struct dentry *root) seq_puts(m, "\n\tbytes:\t"); for (i = 0; i < __NFSIOS_BYTESMAX; i++) seq_printf(m, "%Lu ", totals.bytes[i]); -#ifdef CONFIG_NFS_FSCACHE - if (nfss->options & NFS_OPTION_FSCACHE) { - seq_puts(m, "\n\tfsc:\t"); - for (i = 0; i < __NFSIOS_FSCACHEMAX; i++) - seq_printf(m, "%Lu ", totals.fscache[i]); - } -#endif seq_putc(m, '\n'); rpc_clnt_show_stats(m, nfss->client); diff --git a/include/linux/nfs_iostat.h b/include/linux/nfs_iostat.h index 027874c36c88..8d946089d151 100644 --- a/include/linux/nfs_iostat.h +++ b/include/linux/nfs_iostat.h @@ -119,16 +119,4 @@ enum nfs_stat_eventcounters { __NFSIOS_COUNTSMAX, }; -/* - * NFS local caching servicing counters - */ -enum nfs_stat_fscachecounters { - NFSIOS_FSCACHE_PAGES_READ_OK, - NFSIOS_FSCACHE_PAGES_READ_FAIL, - NFSIOS_FSCACHE_PAGES_WRITTEN_OK, - NFSIOS_FSCACHE_PAGES_WRITTEN_FAIL, - NFSIOS_FSCACHE_PAGES_UNCACHED, - __NFSIOS_FSCACHEMAX, -}; - #endif /* _LINUX_NFS_IOSTAT */ From 03f5bd75a4c19714a929a0f4e7bbf36b49e874c1 Mon Sep 17 00:00:00 2001 From: Dave Wysochanski Date: Mon, 20 Feb 2023 08:43:08 -0500 Subject: [PATCH 13/20] NFS: Remove fscache specific trace points and NFS_INO_FSCACHE bit The NFS specific trace points are no longer needed as tracing is well covered by netfs and fscache. Signed-off-by: Dave Wysochanski Reviewed-by: Jeff Layton Tested-by: Daire Byrne Signed-off-by: Anna Schumaker --- fs/nfs/nfstrace.h | 91 ------------------------------------------ include/linux/nfs_fs.h | 1 - 2 files changed, 92 deletions(-) diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h index a778713343df..4e90ca531176 100644 --- a/fs/nfs/nfstrace.h +++ b/fs/nfs/nfstrace.h @@ -39,7 +39,6 @@ { BIT(NFS_INO_STALE), "STALE" }, \ { BIT(NFS_INO_ACL_LRU_SET), "ACL_LRU_SET" }, \ { BIT(NFS_INO_INVALIDATING), "INVALIDATING" }, \ - { BIT(NFS_INO_FSCACHE), "FSCACHE" }, \ { BIT(NFS_INO_LAYOUTCOMMIT), "NEED_LAYOUTCOMMIT" }, \ { BIT(NFS_INO_LAYOUTCOMMITTING), "LAYOUTCOMMIT" }, \ { BIT(NFS_INO_LAYOUTSTATS), "LAYOUTSTATS" }, \ @@ -1243,96 +1242,6 @@ TRACE_EVENT(nfs_readpage_short, ) ); -DECLARE_EVENT_CLASS(nfs_fscache_page_event, - TP_PROTO( - const struct inode *inode, - struct page *page - ), - - TP_ARGS(inode, page), - - TP_STRUCT__entry( - __field(dev_t, dev) - __field(u32, fhandle) - __field(u64, fileid) - __field(loff_t, offset) - ), - - TP_fast_assign( - const struct nfs_inode *nfsi = NFS_I(inode); - const struct nfs_fh *fh = &nfsi->fh; - - __entry->offset = page_index(page) << PAGE_SHIFT; - __entry->dev = inode->i_sb->s_dev; - __entry->fileid = nfsi->fileid; - __entry->fhandle = nfs_fhandle_hash(fh); - ), - - TP_printk( - "fileid=%02x:%02x:%llu fhandle=0x%08x " - "offset=%lld", - MAJOR(__entry->dev), MINOR(__entry->dev), - (unsigned long long)__entry->fileid, - __entry->fhandle, - (long long)__entry->offset - ) -); -DECLARE_EVENT_CLASS(nfs_fscache_page_event_done, - TP_PROTO( - const struct inode *inode, - struct page *page, - int error - ), - - TP_ARGS(inode, page, error), - - TP_STRUCT__entry( - __field(int, error) - __field(dev_t, dev) - __field(u32, fhandle) - __field(u64, fileid) - __field(loff_t, offset) - ), - - TP_fast_assign( - const struct nfs_inode *nfsi = NFS_I(inode); - const struct nfs_fh *fh = &nfsi->fh; - - __entry->offset = page_index(page) << PAGE_SHIFT; - __entry->dev = inode->i_sb->s_dev; - __entry->fileid = nfsi->fileid; - __entry->fhandle = nfs_fhandle_hash(fh); - __entry->error = error; - ), - - TP_printk( - "fileid=%02x:%02x:%llu fhandle=0x%08x " - "offset=%lld error=%d", - MAJOR(__entry->dev), MINOR(__entry->dev), - (unsigned long long)__entry->fileid, - __entry->fhandle, - (long long)__entry->offset, __entry->error - ) -); -#define DEFINE_NFS_FSCACHE_PAGE_EVENT(name) \ - DEFINE_EVENT(nfs_fscache_page_event, name, \ - TP_PROTO( \ - const struct inode *inode, \ - struct page *page \ - ), \ - TP_ARGS(inode, page)) -#define DEFINE_NFS_FSCACHE_PAGE_EVENT_DONE(name) \ - DEFINE_EVENT(nfs_fscache_page_event_done, name, \ - TP_PROTO( \ - const struct inode *inode, \ - struct page *page, \ - int error \ - ), \ - TP_ARGS(inode, page, error)) -DEFINE_NFS_FSCACHE_PAGE_EVENT(nfs_fscache_read_page); -DEFINE_NFS_FSCACHE_PAGE_EVENT_DONE(nfs_fscache_read_page_exit); -DEFINE_NFS_FSCACHE_PAGE_EVENT(nfs_fscache_write_page); -DEFINE_NFS_FSCACHE_PAGE_EVENT_DONE(nfs_fscache_write_page_exit); TRACE_EVENT(nfs_pgio_error, TP_PROTO( diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 041e79a48f7a..12bb868f9a18 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -281,7 +281,6 @@ struct nfs4_copy_state { #define NFS_INO_ACL_LRU_SET (2) /* Inode is on the LRU list */ #define NFS_INO_INVALIDATING (3) /* inode is being invalidated */ #define NFS_INO_PRESERVE_UNLINKED (4) /* preserve file if removed while open */ -#define NFS_INO_FSCACHE (5) /* inode can be cached by FS-Cache */ #define NFS_INO_LAYOUTCOMMIT (9) /* layoutcommit required */ #define NFS_INO_LAYOUTCOMMITTING (10) /* layoutcommit inflight */ #define NFS_INO_LAYOUTSTATS (11) /* layoutstats inflight */ From 3db63daabe210af32a09533fe7d8d47c711a103c Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 22 Mar 2023 09:27:04 +1100 Subject: [PATCH 14/20] NFSv3: handle out-of-order write replies. NFSv3 includes pre/post wcc attributes which allow the client to determine if all changes to the file have been made by the client itself, or if any might have been made by some other client. If there are gaps in the pre/post ctime sequence it must be assumed that some other client changed the file in that gap and the local cache must be suspect. The next time the file is opened the cache should be invalidated. Since Commit 1c341b777501 ("NFS: Add deferred cache invalidation for close-to-open consistency violations") in linux 5.3 the Linux client has been triggering this invalidation. The chunk in nfs_update_inode() in particularly triggers. Unfortunately Linux NFS assumes that all replies will be processed in the order sent, and will arrive in the order processed. This is not true in general. Consequently Linux NFS might ignore the wcc info in a WRITE reply because the reply is in response to a WRITE that was sent before some other request for which a reply has already been seen. This is detected by Linux using the gencount tests in nfs_inode_attr_cmp(). Also, when the gencount tests pass it is still possible that the request were processed on the server in a different order, and a gap seen in the ctime sequence might be filled in by a subsequent reply, so gaps should not immediately trigger delayed invalidation. The net result is that writing to a server and then reading the file back can result in going to the server for the read rather than serving it from cache - all because a couple of replies arrived out-of-order. This is a performance regression over kernels before 5.3, though the change in 5.3 is a correctness improvement. This has been seen with Linux writing to a Netapp server which occasionally re-orders requests. In testing the majority of requests were in-order, but a few (maybe 2 or three at a time) could be re-ordered. This patch addresses the problem by recording any gaps seen in the pre/post ctime sequence and not triggering invalidation until either there are too many gaps to fit in the table, or until there are no more active writes and the remaining gaps cannot be resolved. We allocate a table of 16 gaps on demand. If the allocation fails we revert to current behaviour which is of little cost as we are unlikely to be able to cache the writes anyway. In the table we store "start->end" pair when iversion is updated and "end<-start" pairs pre/post pairs reported by the server. Usually these exactly cancel out and so nothing is stored. When there are out-of-order replies we do store gaps and these will eventually be cancelled against later replies when this client is the only writer. If the final write is out-of-order there may be one gap remaining when the file is closed. This will be noticed and if there is precisely on gap and if the iversion can be advanced to match it, then we do so. This patch makes no attempt to handle directories correctly. The same problem potentially exists in the out-of-order replies to create/unlink requests can cause future lookup requires to be sent to the server unnecessarily. A similar scheme using the same primitives could be used to notice and handle out-of-order replies. Signed-off-by: NeilBrown Signed-off-by: Anna Schumaker --- fs/nfs/inode.c | 112 +++++++++++++++++++++++++++++++++++------ include/linux/nfs_fs.h | 47 +++++++++++++++++ 2 files changed, 144 insertions(+), 15 deletions(-) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 5c8027e3c961..eb8af1e404d9 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -208,11 +208,12 @@ void nfs_set_cache_invalid(struct inode *inode, unsigned long flags) nfsi->cache_validity |= flags; - if (inode->i_mapping->nrpages == 0) - nfsi->cache_validity &= ~(NFS_INO_INVALID_DATA | - NFS_INO_DATA_INVAL_DEFER); - else if (nfsi->cache_validity & NFS_INO_INVALID_DATA) - nfsi->cache_validity &= ~NFS_INO_DATA_INVAL_DEFER; + if (inode->i_mapping->nrpages == 0) { + nfsi->cache_validity &= ~NFS_INO_INVALID_DATA; + nfs_ooo_clear(nfsi); + } else if (nfsi->cache_validity & NFS_INO_INVALID_DATA) { + nfs_ooo_clear(nfsi); + } trace_nfs_set_cache_invalid(inode, 0); } EXPORT_SYMBOL_GPL(nfs_set_cache_invalid); @@ -677,9 +678,10 @@ static int nfs_vmtruncate(struct inode * inode, loff_t offset) trace_nfs_size_truncate(inode, offset); i_size_write(inode, offset); /* Optimisation */ - if (offset == 0) - NFS_I(inode)->cache_validity &= ~(NFS_INO_INVALID_DATA | - NFS_INO_DATA_INVAL_DEFER); + if (offset == 0) { + NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_DATA; + nfs_ooo_clear(NFS_I(inode)); + } NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_SIZE; spin_unlock(&inode->i_lock); @@ -1109,7 +1111,7 @@ void nfs_inode_attach_open_context(struct nfs_open_context *ctx) spin_lock(&inode->i_lock); if (list_empty(&nfsi->open_files) && - (nfsi->cache_validity & NFS_INO_DATA_INVAL_DEFER)) + nfs_ooo_test(nfsi)) nfs_set_cache_invalid(inode, NFS_INO_INVALID_DATA | NFS_INO_REVAL_FORCED); list_add_tail_rcu(&ctx->list, &nfsi->open_files); @@ -1353,8 +1355,8 @@ int nfs_clear_invalid_mapping(struct address_space *mapping) set_bit(NFS_INO_INVALIDATING, bitlock); smp_wmb(); - nfsi->cache_validity &= - ~(NFS_INO_INVALID_DATA | NFS_INO_DATA_INVAL_DEFER); + nfsi->cache_validity &= ~NFS_INO_INVALID_DATA; + nfs_ooo_clear(nfsi); spin_unlock(&inode->i_lock); trace_nfs_invalidate_mapping_enter(inode); ret = nfs_invalidate_mapping(inode, mapping); @@ -1816,6 +1818,66 @@ static int nfs_inode_finish_partial_attr_update(const struct nfs_fattr *fattr, return 0; } +static void nfs_ooo_merge(struct nfs_inode *nfsi, + u64 start, u64 end) +{ + int i, cnt; + + if (nfsi->cache_validity & NFS_INO_DATA_INVAL_DEFER) + /* No point merging anything */ + return; + + if (!nfsi->ooo) { + nfsi->ooo = kmalloc(sizeof(*nfsi->ooo), GFP_ATOMIC); + if (!nfsi->ooo) { + nfsi->cache_validity |= NFS_INO_DATA_INVAL_DEFER; + return; + } + nfsi->ooo->cnt = 0; + } + + /* add this range, merging if possible */ + cnt = nfsi->ooo->cnt; + for (i = 0; i < cnt; i++) { + if (end == nfsi->ooo->gap[i].start) + end = nfsi->ooo->gap[i].end; + else if (start == nfsi->ooo->gap[i].end) + start = nfsi->ooo->gap[i].start; + else + continue; + /* Remove 'i' from table and loop to insert the new range */ + cnt -= 1; + nfsi->ooo->gap[i] = nfsi->ooo->gap[cnt]; + i = -1; + } + if (start != end) { + if (cnt >= ARRAY_SIZE(nfsi->ooo->gap)) { + nfsi->cache_validity |= NFS_INO_DATA_INVAL_DEFER; + kfree(nfsi->ooo); + nfsi->ooo = NULL; + return; + } + nfsi->ooo->gap[cnt].start = start; + nfsi->ooo->gap[cnt].end = end; + cnt += 1; + } + nfsi->ooo->cnt = cnt; +} + +static void nfs_ooo_record(struct nfs_inode *nfsi, + struct nfs_fattr *fattr) +{ + /* This reply was out-of-order, so record in the + * pre/post change id, possibly cancelling + * gaps created when iversion was jumpped forward. + */ + if ((fattr->valid & NFS_ATTR_FATTR_CHANGE) && + (fattr->valid & NFS_ATTR_FATTR_PRECHANGE)) + nfs_ooo_merge(nfsi, + fattr->change_attr, + fattr->pre_change_attr); +} + static int nfs_refresh_inode_locked(struct inode *inode, struct nfs_fattr *fattr) { @@ -1826,8 +1888,12 @@ static int nfs_refresh_inode_locked(struct inode *inode, if (attr_cmp > 0 || nfs_inode_finish_partial_attr_update(fattr, inode)) ret = nfs_update_inode(inode, fattr); - else if (attr_cmp == 0) - ret = nfs_check_inode_attributes(inode, fattr); + else { + nfs_ooo_record(NFS_I(inode), fattr); + + if (attr_cmp == 0) + ret = nfs_check_inode_attributes(inode, fattr); + } trace_nfs_refresh_inode_exit(inode, ret); return ret; @@ -1918,6 +1984,8 @@ int nfs_post_op_update_inode_force_wcc_locked(struct inode *inode, struct nfs_fa if (attr_cmp < 0) return 0; if ((fattr->valid & NFS_ATTR_FATTR) == 0 || !attr_cmp) { + /* Record the pre/post change info before clearing PRECHANGE */ + nfs_ooo_record(NFS_I(inode), fattr); fattr->valid &= ~(NFS_ATTR_FATTR_PRECHANGE | NFS_ATTR_FATTR_PRESIZE | NFS_ATTR_FATTR_PREMTIME @@ -2072,6 +2140,15 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) /* More cache consistency checks */ if (fattr->valid & NFS_ATTR_FATTR_CHANGE) { + if (!have_writers && nfsi->ooo && nfsi->ooo->cnt == 1 && + nfsi->ooo->gap[0].end == inode_peek_iversion_raw(inode)) { + /* There is one remaining gap that hasn't been + * merged into iversion - do that now. + */ + inode_set_iversion_raw(inode, nfsi->ooo->gap[0].start); + kfree(nfsi->ooo); + nfsi->ooo = NULL; + } if (!inode_eq_iversion_raw(inode, fattr->change_attr)) { /* Could it be a race with writeback? */ if (!(have_writers || have_delegation)) { @@ -2093,8 +2170,11 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) dprintk("NFS: change_attr change on server for file %s/%ld\n", inode->i_sb->s_id, inode->i_ino); - } else if (!have_delegation) - nfsi->cache_validity |= NFS_INO_DATA_INVAL_DEFER; + } else if (!have_delegation) { + nfs_ooo_record(nfsi, fattr); + nfs_ooo_merge(nfsi, inode_peek_iversion_raw(inode), + fattr->change_attr); + } inode_set_iversion_raw(inode, fattr->change_attr); } } else { @@ -2248,6 +2328,7 @@ struct inode *nfs_alloc_inode(struct super_block *sb) return NULL; nfsi->flags = 0UL; nfsi->cache_validity = 0UL; + nfsi->ooo = NULL; #if IS_ENABLED(CONFIG_NFS_V4) nfsi->nfs4_acl = NULL; #endif /* CONFIG_NFS_V4 */ @@ -2262,6 +2343,7 @@ EXPORT_SYMBOL_GPL(nfs_alloc_inode); void nfs_free_inode(struct inode *inode) { + kfree(NFS_I(inode)->ooo); kmem_cache_free(nfs_inode_cachep, NFS_I(inode)); } EXPORT_SYMBOL_GPL(nfs_free_inode); diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 12bb868f9a18..279262057a92 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -195,6 +195,39 @@ struct nfs_inode { /* Open contexts for shared mmap writes */ struct list_head open_files; + /* Keep track of out-of-order replies. + * The ooo array contains start/end pairs of + * numbers from the changeid sequence when + * the inode's iversion has been updated. + * It also contains end/start pair (i.e. reverse order) + * of sections of the changeid sequence that have + * been seen in replies from the server. + * Normally these should match and when both + * A:B and B:A are found in ooo, they are both removed. + * And if a reply with A:B causes an iversion update + * of A:B, then neither are added. + * When a reply has pre_change that doesn't match + * iversion, then the changeid pair and any consequent + * change in iversion ARE added. Later replies + * might fill in the gaps, or possibly a gap is caused + * by a change from another client. + * When a file or directory is opened, if the ooo table + * is not empty, then we assume the gaps were due to + * another client and we invalidate the cached data. + * + * We can only track a limited number of concurrent gaps. + * Currently that limit is 16. + * We allocate the table on demand. If there is insufficient + * memory, then we probably cannot cache the file anyway + * so there is no loss. + */ + struct { + int cnt; + struct { + u64 start, end; + } gap[16]; + } *ooo; + #if IS_ENABLED(CONFIG_NFS_V4) struct nfs4_cached_acl *nfs4_acl; /* NFSv4 state */ @@ -612,6 +645,20 @@ nfs_fileid_to_ino_t(u64 fileid) return ino; } +static inline void nfs_ooo_clear(struct nfs_inode *nfsi) +{ + nfsi->cache_validity &= ~NFS_INO_DATA_INVAL_DEFER; + kfree(nfsi->ooo); + nfsi->ooo = NULL; +} + +static inline bool nfs_ooo_test(struct nfs_inode *nfsi) +{ + return (nfsi->cache_validity & NFS_INO_DATA_INVAL_DEFER) || + (nfsi->ooo && nfsi->ooo->cnt > 0); + +} + #define NFS_JUKEBOX_RETRY_TIME (5 * HZ) /* We need to block new opens while a file is being unlinked. From 61f02e0ab81e7c9415dc32e36fe45fc380dce2f0 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Tue, 4 Apr 2023 12:05:22 -0400 Subject: [PATCH 15/20] NFS: Convert the readdir array-of-pages into an array-of-folios This patch only converts the actual array, but doesn't touch the individual nfs_cache_array pages and related functions (that will be done in the next patch). I also adjust the names of the fields in the nfs_readdir_descriptor to say "folio" instead of "page". Signed-off-by: Anna Schumaker --- fs/nfs/dir.c | 129 ++++++++++++++++++++++++++------------------------- 1 file changed, 65 insertions(+), 64 deletions(-) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 6fbcbb8d6587..9edb0584c6d1 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -154,10 +154,10 @@ struct nfs_cache_array { struct nfs_readdir_descriptor { struct file *file; - struct page *page; + struct folio *folio; struct dir_context *ctx; - pgoff_t page_index; - pgoff_t page_index_max; + pgoff_t folio_index; + pgoff_t folio_index_max; u64 dir_cookie; u64 last_cookie; loff_t current_index; @@ -312,8 +312,8 @@ static int nfs_readdir_array_can_expand(struct nfs_cache_array *array) } static int nfs_readdir_page_array_append(struct page *page, - const struct nfs_entry *entry, - u64 *cookie) + const struct nfs_entry *entry, + u64 *cookie) { struct nfs_cache_array *array; struct nfs_cache_array_entry *cache_entry; @@ -485,7 +485,7 @@ static void nfs_readdir_seek_next_array(struct nfs_cache_array *array, desc->last_cookie = array->last_cookie; desc->current_index += array->size; desc->cache_entry_index = 0; - desc->page_index++; + desc->folio_index++; } else desc->last_cookie = nfs_readdir_array_index_cookie(array); } @@ -494,7 +494,7 @@ static void nfs_readdir_rewind_search(struct nfs_readdir_descriptor *desc) { desc->current_index = 0; desc->last_cookie = 0; - desc->page_index = 0; + desc->folio_index = 0; } static int nfs_readdir_search_for_pos(struct nfs_cache_array *array, @@ -568,7 +568,7 @@ static int nfs_readdir_search_array(struct nfs_readdir_descriptor *desc) struct nfs_cache_array *array; int status; - array = kmap_local_page(desc->page); + array = kmap_local_folio(desc->folio, 0); if (desc->dir_cookie == 0) status = nfs_readdir_search_for_pos(array, desc); @@ -819,16 +819,17 @@ static int nfs_readdir_entry_decode(struct nfs_readdir_descriptor *desc, } /* Perform conversion from xdr to cache array */ -static int nfs_readdir_page_filler(struct nfs_readdir_descriptor *desc, - struct nfs_entry *entry, - struct page **xdr_pages, unsigned int buflen, - struct page **arrays, size_t narrays, - u64 change_attr) +static int nfs_readdir_folio_filler(struct nfs_readdir_descriptor *desc, + struct nfs_entry *entry, + struct page **xdr_pages, unsigned int buflen, + struct folio **arrays, size_t narrays, + u64 change_attr) { struct address_space *mapping = desc->file->f_mapping; + struct folio *folio = *arrays; struct xdr_stream stream; + struct page *scratch, *new; struct xdr_buf buf; - struct page *scratch, *new, *page = *arrays; u64 cookie; int status; @@ -844,36 +845,36 @@ static int nfs_readdir_page_filler(struct nfs_readdir_descriptor *desc, if (status != 0) break; - status = nfs_readdir_page_array_append(page, entry, &cookie); + status = nfs_readdir_page_array_append(folio_page(folio, 0), entry, &cookie); if (status != -ENOSPC) continue; - if (page->mapping != mapping) { + if (folio->mapping != mapping) { if (!--narrays) break; new = nfs_readdir_page_array_alloc(cookie, GFP_KERNEL); if (!new) break; arrays++; - *arrays = page = new; + *arrays = folio = page_folio(new); } else { new = nfs_readdir_page_get_next(mapping, cookie, change_attr); if (!new) break; - if (page != *arrays) - nfs_readdir_page_unlock_and_put(page); - page = new; + if (folio != *arrays) + nfs_readdir_page_unlock_and_put(folio_page(folio, 0)); + folio = page_folio(new); } - desc->page_index_max++; - status = nfs_readdir_page_array_append(page, entry, &cookie); + desc->folio_index_max++; + status = nfs_readdir_page_array_append(folio_page(folio, 0), entry, &cookie); } while (!status && !entry->eof); switch (status) { case -EBADCOOKIE: if (!entry->eof) break; - nfs_readdir_page_set_eof(page); + nfs_readdir_page_set_eof(folio_page(folio, 0)); fallthrough; case -EAGAIN: status = 0; @@ -886,8 +887,8 @@ static int nfs_readdir_page_filler(struct nfs_readdir_descriptor *desc, ; } - if (page != *arrays) - nfs_readdir_page_unlock_and_put(page); + if (folio != *arrays) + nfs_readdir_page_unlock_and_put(folio_page(folio, 0)); put_page(scratch); return status; @@ -927,11 +928,11 @@ out_freepages: static int nfs_readdir_xdr_to_array(struct nfs_readdir_descriptor *desc, __be32 *verf_arg, __be32 *verf_res, - struct page **arrays, size_t narrays) + struct folio **arrays, size_t narrays) { u64 change_attr; struct page **pages; - struct page *page = *arrays; + struct folio *folio = *arrays; struct nfs_entry *entry; size_t array_size; struct inode *inode = file_inode(desc->file); @@ -942,7 +943,7 @@ static int nfs_readdir_xdr_to_array(struct nfs_readdir_descriptor *desc, entry = kzalloc(sizeof(*entry), GFP_KERNEL); if (!entry) return -ENOMEM; - entry->cookie = nfs_readdir_page_last_cookie(page); + entry->cookie = nfs_readdir_page_last_cookie(folio_page(folio, 0)); entry->fh = nfs_alloc_fhandle(); entry->fattr = nfs_alloc_fattr_with_label(NFS_SERVER(inode)); entry->server = NFS_SERVER(inode); @@ -962,10 +963,10 @@ static int nfs_readdir_xdr_to_array(struct nfs_readdir_descriptor *desc, pglen = status; if (pglen != 0) - status = nfs_readdir_page_filler(desc, entry, pages, pglen, - arrays, narrays, change_attr); + status = nfs_readdir_folio_filler(desc, entry, pages, pglen, + arrays, narrays, change_attr); else - nfs_readdir_page_set_eof(page); + nfs_readdir_page_set_eof(folio_page(folio, 0)); desc->buffer_fills++; free_pages: @@ -977,21 +978,21 @@ out: return status; } -static void nfs_readdir_page_put(struct nfs_readdir_descriptor *desc) +static void nfs_readdir_folio_put(struct nfs_readdir_descriptor *desc) { - put_page(desc->page); - desc->page = NULL; + folio_put(desc->folio); + desc->folio = NULL; } static void -nfs_readdir_page_unlock_and_put_cached(struct nfs_readdir_descriptor *desc) +nfs_readdir_folio_unlock_and_put_cached(struct nfs_readdir_descriptor *desc) { - unlock_page(desc->page); - nfs_readdir_page_put(desc); + folio_unlock(desc->folio); + nfs_readdir_folio_put(desc); } -static struct page * -nfs_readdir_page_get_cached(struct nfs_readdir_descriptor *desc) +static struct folio * +nfs_readdir_folio_get_cached(struct nfs_readdir_descriptor *desc) { struct address_space *mapping = desc->file->f_mapping; u64 change_attr = inode_peek_iversion_raw(mapping->host); @@ -1003,7 +1004,7 @@ nfs_readdir_page_get_cached(struct nfs_readdir_descriptor *desc) return NULL; if (desc->clear_cache && !nfs_readdir_page_needs_filling(page)) nfs_readdir_page_reinit_array(page, cookie, change_attr); - return page; + return page_folio(page); } /* @@ -1017,21 +1018,21 @@ static int find_and_lock_cache_page(struct nfs_readdir_descriptor *desc) __be32 verf[NFS_DIR_VERIFIER_SIZE]; int res; - desc->page = nfs_readdir_page_get_cached(desc); - if (!desc->page) + desc->folio = nfs_readdir_folio_get_cached(desc); + if (!desc->folio) return -ENOMEM; - if (nfs_readdir_page_needs_filling(desc->page)) { + if (nfs_readdir_page_needs_filling(folio_page(desc->folio, 0))) { /* Grow the dtsize if we had to go back for more pages */ - if (desc->page_index == desc->page_index_max) + if (desc->folio_index == desc->folio_index_max) nfs_grow_dtsize(desc); - desc->page_index_max = desc->page_index; + desc->folio_index_max = desc->folio_index; trace_nfs_readdir_cache_fill(desc->file, nfsi->cookieverf, desc->last_cookie, - desc->page->index, desc->dtsize); + desc->folio->index, desc->dtsize); res = nfs_readdir_xdr_to_array(desc, nfsi->cookieverf, verf, - &desc->page, 1); + &desc->folio, 1); if (res < 0) { - nfs_readdir_page_unlock_and_put_cached(desc); + nfs_readdir_folio_unlock_and_put_cached(desc); trace_nfs_readdir_cache_fill_done(inode, res); if (res == -EBADCOOKIE || res == -ENOTSYNC) { invalidate_inode_pages2(desc->file->f_mapping); @@ -1059,7 +1060,7 @@ static int find_and_lock_cache_page(struct nfs_readdir_descriptor *desc) res = nfs_readdir_search_array(desc); if (res == 0) return 0; - nfs_readdir_page_unlock_and_put_cached(desc); + nfs_readdir_folio_unlock_and_put_cached(desc); return res; } @@ -1087,7 +1088,7 @@ static void nfs_do_filldir(struct nfs_readdir_descriptor *desc, unsigned int i; bool first_emit = !desc->dir_cookie; - array = kmap_local_page(desc->page); + array = kmap_local_folio(desc->folio, 0); for (i = desc->cache_entry_index; i < array->size; i++) { struct nfs_cache_array_entry *ent; @@ -1136,7 +1137,7 @@ static void nfs_do_filldir(struct nfs_readdir_descriptor *desc, */ static int uncached_readdir(struct nfs_readdir_descriptor *desc) { - struct page **arrays; + struct folio **arrays; size_t i, sz = 512; __be32 verf[NFS_DIR_VERIFIER_SIZE]; int status = -ENOMEM; @@ -1147,14 +1148,14 @@ static int uncached_readdir(struct nfs_readdir_descriptor *desc) arrays = kcalloc(sz, sizeof(*arrays), GFP_KERNEL); if (!arrays) goto out; - arrays[0] = nfs_readdir_page_array_alloc(desc->dir_cookie, GFP_KERNEL); + arrays[0] = page_folio(nfs_readdir_page_array_alloc(desc->dir_cookie, GFP_KERNEL)); if (!arrays[0]) goto out; - desc->page_index = 0; + desc->folio_index = 0; desc->cache_entry_index = 0; desc->last_cookie = desc->dir_cookie; - desc->page_index_max = 0; + desc->folio_index_max = 0; trace_nfs_readdir_uncached(desc->file, desc->verf, desc->last_cookie, -1, desc->dtsize); @@ -1166,10 +1167,10 @@ static int uncached_readdir(struct nfs_readdir_descriptor *desc) } for (i = 0; !desc->eob && i < sz && arrays[i]; i++) { - desc->page = arrays[i]; + desc->folio = arrays[i]; nfs_do_filldir(desc, verf); } - desc->page = NULL; + desc->folio = NULL; /* * Grow the dtsize if we have to go back for more pages, @@ -1179,16 +1180,16 @@ static int uncached_readdir(struct nfs_readdir_descriptor *desc) if (!desc->eob) nfs_grow_dtsize(desc); else if (desc->buffer_fills == 1 && - i < (desc->page_index_max >> 1)) + i < (desc->folio_index_max >> 1)) nfs_shrink_dtsize(desc); } out_free: for (i = 0; i < sz && arrays[i]; i++) - nfs_readdir_page_array_free(arrays[i]); + nfs_readdir_page_array_free(folio_page(arrays[i], 0)); out: if (!nfs_readdir_use_cookie(desc->file)) nfs_readdir_rewind_search(desc); - desc->page_index_max = -1; + desc->folio_index_max = -1; kfree(arrays); dfprintk(DIRCACHE, "NFS: %s: returns %d\n", __func__, status); return status; @@ -1240,11 +1241,11 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx) goto out; desc->file = file; desc->ctx = ctx; - desc->page_index_max = -1; + desc->folio_index_max = -1; spin_lock(&file->f_lock); desc->dir_cookie = dir_ctx->dir_cookie; - desc->page_index = dir_ctx->page_index; + desc->folio_index = dir_ctx->page_index; desc->last_cookie = dir_ctx->last_cookie; desc->attr_gencount = dir_ctx->attr_gencount; desc->eof = dir_ctx->eof; @@ -1291,8 +1292,8 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx) break; nfs_do_filldir(desc, nfsi->cookieverf); - nfs_readdir_page_unlock_and_put_cached(desc); - if (desc->page_index == desc->page_index_max) + nfs_readdir_folio_unlock_and_put_cached(desc); + if (desc->folio_index == desc->folio_index_max) desc->clear_cache = force_clear; } while (!desc->eob && !desc->eof); @@ -1300,7 +1301,7 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx) dir_ctx->dir_cookie = desc->dir_cookie; dir_ctx->last_cookie = desc->last_cookie; dir_ctx->attr_gencount = desc->attr_gencount; - dir_ctx->page_index = desc->page_index; + dir_ctx->page_index = desc->folio_index; dir_ctx->force_clear = force_clear; dir_ctx->eof = desc->eof; dir_ctx->dtsize = desc->dtsize; From ec108d3cc7663db06dc3fe38a25391b3b7f925f6 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Tue, 4 Apr 2023 15:12:52 -0400 Subject: [PATCH 16/20] NFS: Convert readdir page array functions to use a folio Signed-off-by: Anna Schumaker --- fs/nfs/dir.c | 201 +++++++++++++++++++++++++-------------------------- 1 file changed, 99 insertions(+), 102 deletions(-) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 9edb0584c6d1..8257de6dba45 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -55,7 +55,7 @@ static int nfs_closedir(struct inode *, struct file *); static int nfs_readdir(struct file *, struct dir_context *); static int nfs_fsync_dir(struct file *, loff_t, loff_t, int); static loff_t nfs_llseek_dir(struct file *, loff_t, int); -static void nfs_readdir_free_folio(struct folio *); +static void nfs_readdir_clear_array(struct folio *); const struct file_operations nfs_dir_operations = { .llseek = nfs_llseek_dir, @@ -67,7 +67,7 @@ const struct file_operations nfs_dir_operations = { }; const struct address_space_operations nfs_dir_aops = { - .free_folio = nfs_readdir_free_folio, + .free_folio = nfs_readdir_clear_array, }; #define NFS_INIT_DTSIZE PAGE_SIZE @@ -146,8 +146,8 @@ struct nfs_cache_array { u64 change_attr; u64 last_cookie; unsigned int size; - unsigned char page_full : 1, - page_is_eof : 1, + unsigned char folio_full : 1, + folio_is_eof : 1, cookies_are_ordered : 1; struct nfs_cache_array_entry array[]; }; @@ -198,17 +198,17 @@ static void nfs_grow_dtsize(struct nfs_readdir_descriptor *desc) nfs_set_dtsize(desc, desc->dtsize << 1); } -static void nfs_readdir_page_init_array(struct page *page, u64 last_cookie, - u64 change_attr) +static void nfs_readdir_folio_init_array(struct folio *folio, u64 last_cookie, + u64 change_attr) { struct nfs_cache_array *array; - array = kmap_local_page(page); + array = kmap_local_folio(folio, 0); array->change_attr = change_attr; array->last_cookie = last_cookie; array->size = 0; - array->page_full = 0; - array->page_is_eof = 0; + array->folio_full = 0; + array->folio_is_eof = 0; array->cookies_are_ordered = 1; kunmap_local(array); } @@ -216,44 +216,39 @@ static void nfs_readdir_page_init_array(struct page *page, u64 last_cookie, /* * we are freeing strings created by nfs_add_to_readdir_array() */ -static void nfs_readdir_clear_array(struct page *page) +static void nfs_readdir_clear_array(struct folio *folio) { struct nfs_cache_array *array; unsigned int i; - array = kmap_local_page(page); + array = kmap_local_folio(folio, 0); for (i = 0; i < array->size; i++) kfree(array->array[i].name); array->size = 0; kunmap_local(array); } -static void nfs_readdir_free_folio(struct folio *folio) +static void nfs_readdir_folio_reinit_array(struct folio *folio, u64 last_cookie, + u64 change_attr) { - nfs_readdir_clear_array(&folio->page); + nfs_readdir_clear_array(folio); + nfs_readdir_folio_init_array(folio, last_cookie, change_attr); } -static void nfs_readdir_page_reinit_array(struct page *page, u64 last_cookie, - u64 change_attr) +static struct folio * +nfs_readdir_folio_array_alloc(u64 last_cookie, gfp_t gfp_flags) { - nfs_readdir_clear_array(page); - nfs_readdir_page_init_array(page, last_cookie, change_attr); + struct folio *folio = folio_alloc(gfp_flags, 0); + if (folio) + nfs_readdir_folio_init_array(folio, last_cookie, 0); + return folio; } -static struct page * -nfs_readdir_page_array_alloc(u64 last_cookie, gfp_t gfp_flags) +static void nfs_readdir_folio_array_free(struct folio *folio) { - struct page *page = alloc_page(gfp_flags); - if (page) - nfs_readdir_page_init_array(page, last_cookie, 0); - return page; -} - -static void nfs_readdir_page_array_free(struct page *page) -{ - if (page) { - nfs_readdir_clear_array(page); - put_page(page); + if (folio) { + nfs_readdir_clear_array(folio); + folio_put(folio); } } @@ -264,13 +259,13 @@ static u64 nfs_readdir_array_index_cookie(struct nfs_cache_array *array) static void nfs_readdir_array_set_eof(struct nfs_cache_array *array) { - array->page_is_eof = 1; - array->page_full = 1; + array->folio_is_eof = 1; + array->folio_full = 1; } static bool nfs_readdir_array_is_full(struct nfs_cache_array *array) { - return array->page_full; + return array->folio_full; } /* @@ -302,16 +297,16 @@ static size_t nfs_readdir_array_maxentries(void) */ static int nfs_readdir_array_can_expand(struct nfs_cache_array *array) { - if (array->page_full) + if (array->folio_full) return -ENOSPC; if (array->size == nfs_readdir_array_maxentries()) { - array->page_full = 1; + array->folio_full = 1; return -ENOSPC; } return 0; } -static int nfs_readdir_page_array_append(struct page *page, +static int nfs_readdir_folio_array_append(struct folio *folio, const struct nfs_entry *entry, u64 *cookie) { @@ -322,7 +317,7 @@ static int nfs_readdir_page_array_append(struct page *page, name = nfs_readdir_copy_name(entry->name, entry->len); - array = kmap_atomic(page); + array = kmap_atomic(folio_page(folio, 0)); if (!name) goto out; ret = nfs_readdir_array_can_expand(array); @@ -361,17 +356,17 @@ out: * 127 readdir entries for a typical 64-bit system, that works out to a * cache of ~ 33 million entries per directory. */ -static pgoff_t nfs_readdir_page_cookie_hash(u64 cookie) +static pgoff_t nfs_readdir_folio_cookie_hash(u64 cookie) { if (cookie == 0) return 0; return hash_64(cookie, 18); } -static bool nfs_readdir_page_validate(struct page *page, u64 last_cookie, - u64 change_attr) +static bool nfs_readdir_folio_validate(struct folio *folio, u64 last_cookie, + u64 change_attr) { - struct nfs_cache_array *array = kmap_local_page(page); + struct nfs_cache_array *array = kmap_local_folio(folio, 0); int ret = true; if (array->change_attr != change_attr) @@ -382,81 +377,83 @@ static bool nfs_readdir_page_validate(struct page *page, u64 last_cookie, return ret; } -static void nfs_readdir_page_unlock_and_put(struct page *page) +static void nfs_readdir_folio_unlock_and_put(struct folio *folio) { - unlock_page(page); - put_page(page); + folio_unlock(folio); + folio_put(folio); } -static void nfs_readdir_page_init_and_validate(struct page *page, u64 cookie, - u64 change_attr) +static void nfs_readdir_folio_init_and_validate(struct folio *folio, u64 cookie, + u64 change_attr) { - if (PageUptodate(page)) { - if (nfs_readdir_page_validate(page, cookie, change_attr)) + if (folio_test_uptodate(folio)) { + if (nfs_readdir_folio_validate(folio, cookie, change_attr)) return; - nfs_readdir_clear_array(page); + nfs_readdir_clear_array(folio); } - nfs_readdir_page_init_array(page, cookie, change_attr); - SetPageUptodate(page); + nfs_readdir_folio_init_array(folio, cookie, change_attr); + folio_mark_uptodate(folio); } -static struct page *nfs_readdir_page_get_locked(struct address_space *mapping, - u64 cookie, u64 change_attr) +static struct folio *nfs_readdir_folio_get_locked(struct address_space *mapping, + u64 cookie, u64 change_attr) { - pgoff_t index = nfs_readdir_page_cookie_hash(cookie); - struct page *page; + pgoff_t index = nfs_readdir_folio_cookie_hash(cookie); + struct folio *folio; - page = grab_cache_page(mapping, index); - if (!page) + folio = filemap_grab_folio(mapping, index); + if (!folio) return NULL; - nfs_readdir_page_init_and_validate(page, cookie, change_attr); - return page; + nfs_readdir_folio_init_and_validate(folio, cookie, change_attr); + return folio; } -static u64 nfs_readdir_page_last_cookie(struct page *page) +static u64 nfs_readdir_folio_last_cookie(struct folio *folio) { struct nfs_cache_array *array; u64 ret; - array = kmap_local_page(page); + array = kmap_local_folio(folio, 0); ret = array->last_cookie; kunmap_local(array); return ret; } -static bool nfs_readdir_page_needs_filling(struct page *page) +static bool nfs_readdir_folio_needs_filling(struct folio *folio) { struct nfs_cache_array *array; bool ret; - array = kmap_local_page(page); + array = kmap_local_folio(folio, 0); ret = !nfs_readdir_array_is_full(array); kunmap_local(array); return ret; } -static void nfs_readdir_page_set_eof(struct page *page) +static void nfs_readdir_folio_set_eof(struct folio *folio) { struct nfs_cache_array *array; - array = kmap_local_page(page); + array = kmap_local_folio(folio, 0); nfs_readdir_array_set_eof(array); kunmap_local(array); } -static struct page *nfs_readdir_page_get_next(struct address_space *mapping, - u64 cookie, u64 change_attr) +static struct folio *nfs_readdir_folio_get_next(struct address_space *mapping, + u64 cookie, u64 change_attr) { - pgoff_t index = nfs_readdir_page_cookie_hash(cookie); - struct page *page; + pgoff_t index = nfs_readdir_folio_cookie_hash(cookie); + struct folio *folio; - page = grab_cache_page_nowait(mapping, index); - if (!page) + folio = __filemap_get_folio(mapping, index, + FGP_LOCK|FGP_CREAT|FGP_NOFS|FGP_NOWAIT, + mapping_gfp_mask(mapping)); + if (!folio) return NULL; - nfs_readdir_page_init_and_validate(page, cookie, change_attr); - if (nfs_readdir_page_last_cookie(page) != cookie) - nfs_readdir_page_reinit_array(page, cookie, change_attr); - return page; + nfs_readdir_folio_init_and_validate(folio, cookie, change_attr); + if (nfs_readdir_folio_last_cookie(folio) != cookie) + nfs_readdir_folio_reinit_array(folio, cookie, change_attr); + return folio; } static inline @@ -481,7 +478,7 @@ bool nfs_readdir_use_cookie(const struct file *filp) static void nfs_readdir_seek_next_array(struct nfs_cache_array *array, struct nfs_readdir_descriptor *desc) { - if (array->page_full) { + if (array->folio_full) { desc->last_cookie = array->last_cookie; desc->current_index += array->size; desc->cache_entry_index = 0; @@ -506,7 +503,7 @@ static int nfs_readdir_search_for_pos(struct nfs_cache_array *array, if (diff < 0) goto out_eof; if (diff >= array->size) { - if (array->page_is_eof) + if (array->folio_is_eof) goto out_eof; nfs_readdir_seek_next_array(array, desc); return -EAGAIN; @@ -554,7 +551,7 @@ static int nfs_readdir_search_for_cookie(struct nfs_cache_array *array, } } check_eof: - if (array->page_is_eof) { + if (array->folio_is_eof) { status = -EBADCOOKIE; if (desc->dir_cookie == array->last_cookie) desc->eof = true; @@ -826,9 +823,9 @@ static int nfs_readdir_folio_filler(struct nfs_readdir_descriptor *desc, u64 change_attr) { struct address_space *mapping = desc->file->f_mapping; - struct folio *folio = *arrays; + struct folio *new, *folio = *arrays; struct xdr_stream stream; - struct page *scratch, *new; + struct page *scratch; struct xdr_buf buf; u64 cookie; int status; @@ -845,36 +842,36 @@ static int nfs_readdir_folio_filler(struct nfs_readdir_descriptor *desc, if (status != 0) break; - status = nfs_readdir_page_array_append(folio_page(folio, 0), entry, &cookie); + status = nfs_readdir_folio_array_append(folio, entry, &cookie); if (status != -ENOSPC) continue; if (folio->mapping != mapping) { if (!--narrays) break; - new = nfs_readdir_page_array_alloc(cookie, GFP_KERNEL); + new = nfs_readdir_folio_array_alloc(cookie, GFP_KERNEL); if (!new) break; arrays++; - *arrays = folio = page_folio(new); + *arrays = folio = new; } else { - new = nfs_readdir_page_get_next(mapping, cookie, - change_attr); + new = nfs_readdir_folio_get_next(mapping, cookie, + change_attr); if (!new) break; if (folio != *arrays) - nfs_readdir_page_unlock_and_put(folio_page(folio, 0)); - folio = page_folio(new); + nfs_readdir_folio_unlock_and_put(folio); + folio = new; } desc->folio_index_max++; - status = nfs_readdir_page_array_append(folio_page(folio, 0), entry, &cookie); + status = nfs_readdir_folio_array_append(folio, entry, &cookie); } while (!status && !entry->eof); switch (status) { case -EBADCOOKIE: if (!entry->eof) break; - nfs_readdir_page_set_eof(folio_page(folio, 0)); + nfs_readdir_folio_set_eof(folio); fallthrough; case -EAGAIN: status = 0; @@ -888,7 +885,7 @@ static int nfs_readdir_folio_filler(struct nfs_readdir_descriptor *desc, } if (folio != *arrays) - nfs_readdir_page_unlock_and_put(folio_page(folio, 0)); + nfs_readdir_folio_unlock_and_put(folio); put_page(scratch); return status; @@ -943,7 +940,7 @@ static int nfs_readdir_xdr_to_array(struct nfs_readdir_descriptor *desc, entry = kzalloc(sizeof(*entry), GFP_KERNEL); if (!entry) return -ENOMEM; - entry->cookie = nfs_readdir_page_last_cookie(folio_page(folio, 0)); + entry->cookie = nfs_readdir_folio_last_cookie(folio); entry->fh = nfs_alloc_fhandle(); entry->fattr = nfs_alloc_fattr_with_label(NFS_SERVER(inode)); entry->server = NFS_SERVER(inode); @@ -966,7 +963,7 @@ static int nfs_readdir_xdr_to_array(struct nfs_readdir_descriptor *desc, status = nfs_readdir_folio_filler(desc, entry, pages, pglen, arrays, narrays, change_attr); else - nfs_readdir_page_set_eof(folio_page(folio, 0)); + nfs_readdir_folio_set_eof(folio); desc->buffer_fills++; free_pages: @@ -997,14 +994,14 @@ nfs_readdir_folio_get_cached(struct nfs_readdir_descriptor *desc) struct address_space *mapping = desc->file->f_mapping; u64 change_attr = inode_peek_iversion_raw(mapping->host); u64 cookie = desc->last_cookie; - struct page *page; + struct folio *folio; - page = nfs_readdir_page_get_locked(mapping, cookie, change_attr); - if (!page) + folio = nfs_readdir_folio_get_locked(mapping, cookie, change_attr); + if (!folio) return NULL; - if (desc->clear_cache && !nfs_readdir_page_needs_filling(page)) - nfs_readdir_page_reinit_array(page, cookie, change_attr); - return page_folio(page); + if (desc->clear_cache && !nfs_readdir_folio_needs_filling(folio)) + nfs_readdir_folio_reinit_array(folio, cookie, change_attr); + return folio; } /* @@ -1021,7 +1018,7 @@ static int find_and_lock_cache_page(struct nfs_readdir_descriptor *desc) desc->folio = nfs_readdir_folio_get_cached(desc); if (!desc->folio) return -ENOMEM; - if (nfs_readdir_page_needs_filling(folio_page(desc->folio, 0))) { + if (nfs_readdir_folio_needs_filling(desc->folio)) { /* Grow the dtsize if we had to go back for more pages */ if (desc->folio_index == desc->folio_index_max) nfs_grow_dtsize(desc); @@ -1115,7 +1112,7 @@ static void nfs_do_filldir(struct nfs_readdir_descriptor *desc, break; } } - if (array->page_is_eof) + if (array->folio_is_eof) desc->eof = !desc->eob; kunmap_local(array); @@ -1148,7 +1145,7 @@ static int uncached_readdir(struct nfs_readdir_descriptor *desc) arrays = kcalloc(sz, sizeof(*arrays), GFP_KERNEL); if (!arrays) goto out; - arrays[0] = page_folio(nfs_readdir_page_array_alloc(desc->dir_cookie, GFP_KERNEL)); + arrays[0] = nfs_readdir_folio_array_alloc(desc->dir_cookie, GFP_KERNEL); if (!arrays[0]) goto out; @@ -1185,7 +1182,7 @@ static int uncached_readdir(struct nfs_readdir_descriptor *desc) } out_free: for (i = 0; i < sz && arrays[i]; i++) - nfs_readdir_page_array_free(folio_page(arrays[i], 0)); + nfs_readdir_folio_array_free(arrays[i]); out: if (!nfs_readdir_use_cookie(desc->file)) nfs_readdir_rewind_search(desc); From 691d0b782066a6eeeecbfceb7910a8f6184e6105 Mon Sep 17 00:00:00 2001 From: Dai Ngo Date: Tue, 18 Apr 2023 13:19:02 -0700 Subject: [PATCH 17/20] SUNRPC: remove the maximum number of retries in call_bind_status Currently call_bind_status places a hard limit of 3 to the number of retries on EACCES error. This limit was done to prevent NLM unlock requests from being hang forever when the server keeps returning garbage. However this change causes problem for cases when NLM service takes longer than 9 seconds to register with the port mapper after a restart. This patch removes this hard coded limit and let the RPC handles the retry based on the standard hard/soft task semantics. Fixes: 0b760113a3a1 ("NLM: Don't hang forever on NLM unlock requests") Reported-by: Helen Chao Tested-by: Helen Chao Signed-off-by: Dai Ngo Reviewed-by: Jeff Layton Signed-off-by: Anna Schumaker --- include/linux/sunrpc/sched.h | 3 +-- net/sunrpc/clnt.c | 3 --- net/sunrpc/sched.c | 1 - 3 files changed, 1 insertion(+), 6 deletions(-) diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index b8ca3ecaf8d7..8ada7dc802d3 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -90,8 +90,7 @@ struct rpc_task { #endif unsigned char tk_priority : 2,/* Task priority */ tk_garb_retry : 2, - tk_cred_retry : 2, - tk_rebind_retry : 2; + tk_cred_retry : 2; }; typedef void (*rpc_action)(struct rpc_task *); diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index fd7e1c630493..d2ee56634308 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -2050,9 +2050,6 @@ call_bind_status(struct rpc_task *task) status = -EOPNOTSUPP; break; } - if (task->tk_rebind_retry == 0) - break; - task->tk_rebind_retry--; rpc_delay(task, 3*HZ); goto retry_timeout; case -ENOBUFS: diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index be587a308e05..c8321de341ee 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -817,7 +817,6 @@ rpc_init_task_statistics(struct rpc_task *task) /* Initialize retry counters */ task->tk_garb_retry = 2; task->tk_cred_retry = 2; - task->tk_rebind_retry = 2; /* starting timestamp */ task->tk_start = ktime_get(); From c5733ae6dc895fa45032df0342c98dedc7051c2d Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Thu, 20 Apr 2023 21:01:32 -0400 Subject: [PATCH 18/20] NFS: set varaiable nfs_netfs_debug_id storage-class-specifier to static smatch reports fs/nfs/fscache.c:260:10: warning: symbol 'nfs_netfs_debug_id' was not declared. Should it be static? This variable is only used in its defining file, so it should be static Signed-off-by: Tom Rix Signed-off-by: Anna Schumaker --- fs/nfs/fscache.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c index 95c2b3056e2b..8c35d88a84b1 100644 --- a/fs/nfs/fscache.c +++ b/fs/nfs/fscache.c @@ -257,7 +257,7 @@ int nfs_netfs_readahead(struct readahead_control *ractl) return 0; } -atomic_t nfs_netfs_debug_id; +static atomic_t nfs_netfs_debug_id; static int nfs_netfs_init_request(struct netfs_io_request *rreq, struct file *file) { rreq->netfs_priv = get_nfs_open_context(nfs_file_open_context(file)); From e025f0a73f6acb920d86549b2177a5883535421d Mon Sep 17 00:00:00 2001 From: Benjamin Coddington Date: Thu, 20 Apr 2023 12:17:35 -0400 Subject: [PATCH 19/20] NFS: Cleanup unused rpc_clnt variable The root rpc_clnt is not used here, clean it up. Fixes: 4dc73c679114 ("NFSv4: keep state manager thread active if swap is enabled") Signed-off-by: Benjamin Coddington Reviewed-by: NeilBrown Signed-off-by: Anna Schumaker --- fs/nfs/nfs4state.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 660ccfaf463e..bbe49315d99e 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1209,10 +1209,6 @@ void nfs4_schedule_state_manager(struct nfs_client *clp) { struct task_struct *task; char buf[INET6_ADDRSTRLEN + sizeof("-manager") + 1]; - struct rpc_clnt *cl = clp->cl_rpcclient; - - while (cl != cl->cl_parent) - cl = cl->cl_parent; set_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state); if (test_and_set_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state) != 0) { From fbd2a05f29a95d5b42b294bf47e55a711424965b Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Thu, 6 Apr 2023 15:16:52 -0400 Subject: [PATCH 20/20] NFSv4.2: Rework scratch handling for READ_PLUS Instead of using a tiny, static scratch buffer, we should use a kmalloc()-ed buffer that is allocated when checking for read plus usage. This lets us use the buffer before decoding any part of the READ_PLUS operation instead of setting it right before segment decoding, meaning it should be a little more robust. Signed-off-by: Anna Schumaker --- fs/nfs/nfs42xdr.c | 4 ++-- fs/nfs/nfs4proc.c | 17 ++++++++++++----- include/linux/nfs_xdr.h | 1 + 3 files changed, 15 insertions(+), 7 deletions(-) diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c index d80ee88ca996..a6df815a140c 100644 --- a/fs/nfs/nfs42xdr.c +++ b/fs/nfs/nfs42xdr.c @@ -1122,7 +1122,6 @@ static int decode_read_plus(struct xdr_stream *xdr, struct nfs_pgio_res *res) uint32_t segments; struct read_plus_segment *segs; int status, i; - char scratch_buf[16]; __be32 *p; status = decode_op_hdr(xdr, OP_READ_PLUS); @@ -1143,7 +1142,6 @@ static int decode_read_plus(struct xdr_stream *xdr, struct nfs_pgio_res *res) if (!segs) return -ENOMEM; - xdr_set_scratch_buffer(xdr, &scratch_buf, sizeof(scratch_buf)); status = -EIO; for (i = 0; i < segments; i++) { status = decode_read_plus_segment(xdr, &segs[i]); @@ -1348,6 +1346,8 @@ static int nfs4_xdr_dec_read_plus(struct rpc_rqst *rqstp, struct compound_hdr hdr; int status; + xdr_set_scratch_buffer(xdr, res->scratch, sizeof(res->scratch)); + status = decode_compound_hdr(xdr, &hdr); if (status) goto out; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 5607b1e2b821..18f25ff4bff7 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5439,6 +5439,8 @@ static bool nfs4_read_plus_not_supported(struct rpc_task *task, static int nfs4_read_done(struct rpc_task *task, struct nfs_pgio_header *hdr) { + if (hdr->res.scratch) + kfree(hdr->res.scratch); if (!nfs4_sequence_done(task, &hdr->res.seq_res)) return -EAGAIN; if (nfs4_read_stateid_changed(task, &hdr->args)) @@ -5452,17 +5454,22 @@ static int nfs4_read_done(struct rpc_task *task, struct nfs_pgio_header *hdr) } #if defined CONFIG_NFS_V4_2 && defined CONFIG_NFS_V4_2_READ_PLUS -static void nfs42_read_plus_support(struct nfs_pgio_header *hdr, +static bool nfs42_read_plus_support(struct nfs_pgio_header *hdr, struct rpc_message *msg) { /* Note: We don't use READ_PLUS with pNFS yet */ - if (nfs_server_capable(hdr->inode, NFS_CAP_READ_PLUS) && !hdr->ds_clp) + if (nfs_server_capable(hdr->inode, NFS_CAP_READ_PLUS) && !hdr->ds_clp) { msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ_PLUS]; + hdr->res.scratch = kmalloc(32, GFP_KERNEL); + return hdr->res.scratch != NULL; + } + return false; } #else -static void nfs42_read_plus_support(struct nfs_pgio_header *hdr, +static bool nfs42_read_plus_support(struct nfs_pgio_header *hdr, struct rpc_message *msg) { + return false; } #endif /* CONFIG_NFS_V4_2 */ @@ -5472,8 +5479,8 @@ static void nfs4_proc_read_setup(struct nfs_pgio_header *hdr, hdr->timestamp = jiffies; if (!hdr->pgio_done_cb) hdr->pgio_done_cb = nfs4_read_done_cb; - msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ]; - nfs42_read_plus_support(hdr, msg); + if (!nfs42_read_plus_support(hdr, msg)) + msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ]; nfs4_init_sequence(&hdr->args.seq_args, &hdr->res.seq_res, 0, 0); } diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index e196ef595908..29a1b39794bf 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -670,6 +670,7 @@ struct nfs_pgio_res { struct { unsigned int replen; /* used by read */ int eof; /* used by read */ + void * scratch; /* used by read */ }; struct { struct nfs_writeverf * verf; /* used by write */