From 560aef74503e928f44ddbf481b8b02d9cef37dbf Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 27 Aug 2007 09:14:56 -0400 Subject: [PATCH 1/8] NFS: Fix use of cancel_delayed_work_sync in nfs_release_automount_timer Doh! We can't use cancel_delayed_work_sync because we may have been called from an unmount that was being performed by nfs_automount_task. Signed-off-by: Trond Myklebust --- fs/nfs/namespace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index aea76d0e5fbd..acfc56f9edc0 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -176,7 +176,7 @@ static void nfs_expire_automounts(struct work_struct *work) void nfs_release_automount_timer(void) { if (list_empty(&nfs_automount_list)) - cancel_delayed_work_sync(&nfs_automount_task); + cancel_delayed_work(&nfs_automount_task); } /* From 65bbf6bdbba0f74e254f706bfb00fe533974f4f1 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 27 Aug 2007 09:57:46 -0400 Subject: [PATCH 2/8] NFSv4: Fix a typo in _nfs4_do_open_reclaim This should fix the following Oops reported by Jeff Garzik: kernel BUG at fs/nfs/nfs4xdr.c:1040! invalid opcode: 0000 [1] SMP CPU 0 Modules linked in: nfs lockd sunrpc af_packet ipv6 cpufreq_ondemand acpi_cpufreq battery floppy nvram sg snd_hda_intel ata_generic snd_pcm_oss snd_mixer_oss snd_pcm i2c_i801 snd_page_alloc e1000 firewire_ohci ata_piix i2c_core sr_mod cdrom sata_sil ahci libata sd_mod scsi_mod ext3 jbd ehci_hcd uhci_hcd Pid: 16353, comm: 10.10.10.1-recl Not tainted 2.6.23-rc3 #1 RIP: 0010:[] [] :nfs:encode_open+0x1c0/0x330 RSP: 0018:ffff8100467c5c60 EFLAGS: 00010202 RAX: ffff81000f89b8b8 RBX: 00000000697a6f6d RCX: ffff81000f89b8b8 RDX: 0000000000000004 RSI: 0000000000000004 RDI: ffff8100467c5c80 RBP: ffff8100467c5c80 R08: ffff81000f89bc30 R09: ffff81000f89b83f R10: 0000000000000001 R11: ffffffff881e79e0 R12: ffff81003cbd1808 R13: ffff81000f89b860 R14: ffff81005fc984e0 R15: ffffffff88240af0 FS: 0000000000000000(0000) GS:ffffffff8052a000(0000) knlGS:0000000000000000 CS: 0010 DS: 0018 ES: 0018 CR0: 000000008005003b CR2: 00002adb9e51a030 CR3: 000000007ea7e000 CR4: 00000000000006e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process 10.10.10.1-recl (pid: 16353, threadinfo ffff8100467c4000, task ffff8100038ce780) Stack: ffff81004aeb6a40 ffff81003cbd1808 ffff81003cbd1808 ffffffff88240b5d ffff81000f89b8bc ffff81005fc984e8 ffff81000f89bc30 ffff81005fc984e8 0000000300000000 0000000000000000 0000000000000000 ffff81003cbd1800 Call Trace: [] :nfs:nfs4_xdr_enc_open_noattr+0x6d/0x90 [] :sunrpc:rpcauth_wrap_req+0x97/0xf0 [] :nfs:nfs4_xdr_enc_open_noattr+0x0/0x90 [] :sunrpc:call_transmit+0x18a/0x290 [] :sunrpc:__rpc_execute+0x6b/0x290 [] :sunrpc:rpc_do_run_task+0x76/0xd0 [] :nfs:_nfs4_proc_open+0x76/0x230 [] :nfs:nfs4_open_recover_helper+0x5e/0xc0 [] :nfs:nfs4_open_recover+0xe4/0x120 [] :nfs:nfs4_open_reclaim+0xa4/0xf0 [] :nfs:nfs4_reclaim_open_state+0x55/0x1b0 [] :nfs:reclaimer+0x2ca/0x390 [] :nfs:reclaimer+0x0/0x390 [] kthread+0x4b/0x80 [] child_rip+0xa/0x12 [] kthread+0x0/0x80 [] child_rip+0x0/0x12 Code: 0f 0b eb fe 48 89 ef c7 00 00 00 00 02 be 08 00 00 00 e8 79 RIP [] :nfs:encode_open+0x1c0/0x330 RSP Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 62b3ae280310..036d8625b4f9 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -646,7 +646,7 @@ static int _nfs4_do_open_reclaim(struct nfs_open_context *ctx, struct nfs4_state rcu_read_lock(); delegation = rcu_dereference(NFS_I(state->inode)->delegation); if (delegation != NULL && (delegation->flags & NFS_DELEGATION_NEED_RECLAIM) != 0) - delegation_type = delegation->flags; + delegation_type = delegation->type; rcu_read_unlock(); opendata->o_arg.u.delegation_type = delegation_type; status = nfs4_open_recover(opendata, state); From deee9369b993b52a8e2f25683b4a67be7a65e8ae Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 27 Aug 2007 11:33:00 -0400 Subject: [PATCH 3/8] NFSv4: Ensure that we pass the correct dentry to nfs4_intent_set_file This patch fixes an Oops that was reported by Gabriel Barazer. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 036d8625b4f9..4b90e17555a9 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1434,7 +1434,7 @@ nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd) } res = d_add_unique(dentry, igrab(state->inode)); if (res != NULL) - dentry = res; + path.dentry = res; nfs4_intent_set_file(nd, &path, state); return res; } From fdb66ff4ace3c4e18809b9b71dfcce1692143147 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 29 Aug 2007 17:58:57 -0400 Subject: [PATCH 4/8] NFS: mount option parser chokes on proto= The new text-based NFS mount option parsing logic doesn't recognize any valid transport protocols due to a silly mistake in the protocol token matching logic. This prevents basic mount requests such as: mount.nfs server:/export /mnt -o proto=tcp from working with the new text-based NFS mount API. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/super.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 46139003ea0c..66a223b22a80 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -911,13 +911,13 @@ static int nfs_parse_mount_options(char *raw, kfree(string); switch (token) { - case Opt_udp: + case Opt_xprt_udp: mnt->flags &= ~NFS_MOUNT_TCP; mnt->nfs_server.protocol = IPPROTO_UDP; mnt->timeo = 7; mnt->retrans = 5; break; - case Opt_tcp: + case Opt_xprt_tcp: mnt->flags |= NFS_MOUNT_TCP; mnt->nfs_server.protocol = IPPROTO_TCP; mnt->timeo = 600; @@ -936,10 +936,10 @@ static int nfs_parse_mount_options(char *raw, kfree(string); switch (token) { - case Opt_udp: + case Opt_xprt_udp: mnt->mount_server.protocol = IPPROTO_UDP; break; - case Opt_tcp: + case Opt_xprt_tcp: mnt->mount_server.protocol = IPPROTO_TCP; break; default: From fdc6e2c8c0dc0ac702fca0b802f5d9ae99a54bb6 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 29 Aug 2007 17:58:59 -0400 Subject: [PATCH 5/8] NFS: Return a real error code from mount(2) Don't filter the return code from the in-kernel rpcbind or NFS mount clients. Return the real error code so that callers of the new NFS text-based mount API can apply a useful retry strategy. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 66a223b22a80..9cd0828010cf 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -1166,7 +1166,7 @@ static int nfs_validate_mount_data(struct nfs_mount_data **options, status = nfs_try_mount(&args, mntfh); if (status) - return -EINVAL; + return status; /* * Translate to nfs_mount_data, which nfs_fill_super From 350c73af6af51ae7654dad91874c0d30dd13bbbe Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 29 Aug 2007 17:59:01 -0400 Subject: [PATCH 6/8] NFS: Off-by-one length error in string handling The hostname was getting truncated in the new text-based NFS mount API. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 9cd0828010cf..ef3643284f72 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -1153,7 +1153,7 @@ static int nfs_validate_mount_data(struct nfs_mount_data **options, c = strchr(dev_name, ':'); if (c == NULL) return -EINVAL; - len = c - dev_name - 1; + len = c - dev_name; if (len > sizeof(data->hostname)) return -EINVAL; strncpy(data->hostname, dev_name, len); From 7d1cca72994c0e910ca443076dcfcfd473871dda Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 29 Aug 2007 17:59:03 -0400 Subject: [PATCH 7/8] NFS: change NFS mount error return when hostname/pathname too long According to the mount(2) man page, the proper error return code for the mount(2) system call when the special device name or the mounted-on directory name is too long is ENAMETOOLONG. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/super.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/nfs/super.c b/fs/nfs/super.c index ef3643284f72..8ed593766f16 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -1155,13 +1155,13 @@ static int nfs_validate_mount_data(struct nfs_mount_data **options, return -EINVAL; len = c - dev_name; if (len > sizeof(data->hostname)) - return -EINVAL; + return -ENAMETOOLONG; strncpy(data->hostname, dev_name, len); args.nfs_server.hostname = data->hostname; c++; if (strlen(c) > NFS_MAXPATHLEN) - return -EINVAL; + return -ENAMETOOLONG; args.nfs_server.export_path = c; status = nfs_try_mount(&args, mntfh); @@ -1677,7 +1677,7 @@ static int nfs4_validate_mount_data(struct nfs4_mount_data **options, /* while calculating len, pretend ':' is '\0' */ len = c - dev_name; if (len > NFS4_MAXNAMLEN) - return -EINVAL; + return -ENAMETOOLONG; *hostname = kzalloc(len, GFP_KERNEL); if (*hostname == NULL) return -ENOMEM; @@ -1686,7 +1686,7 @@ static int nfs4_validate_mount_data(struct nfs4_mount_data **options, c++; /* step over the ':' */ len = strlen(c); if (len > NFS4_MAXPATHLEN) - return -EINVAL; + return -ENAMETOOLONG; *mntpath = kzalloc(len + 1, GFP_KERNEL); if (*mntpath == NULL) return -ENOMEM; From 1b3b4a1a2deb7d3e5d66063bd76304d840c966b3 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 28 Aug 2007 10:29:36 -0400 Subject: [PATCH 8/8] NFS: Fix a write request leak in nfs_invalidate_page() Ryusuke Konishi says: The recent truncate_complete_page() clears the dirty flag from a page before calling a_ops->invalidatepage(), ^^^^^^ static void truncate_complete_page(struct address_space *mapping, struct page *page) { ... cancel_dirty_page(page, PAGE_CACHE_SIZE); <--- Inserted here at kernel 2.6.20 if (PagePrivate(page)) do_invalidatepage(page, 0); ---> will call a_ops->invalidatepage() ... } and this is disturbing nfs_wb_page_priority() from calling nfs_writepage_locked() that is expected to handle the pending request (=nfs_page) associated with the page. int nfs_wb_page_priority(struct inode *inode, struct page *page, int how) { ... if (clear_page_dirty_for_io(page)) { ret = nfs_writepage_locked(page, &wbc); if (ret < 0) goto out; } ... } Since truncate_complete_page() will get rid of the page after a_ops->invalidatepage() returns, the request (=nfs_page) associated with the page becomes a garbage in nfs_inode->nfs_page_tree. ------------------------ Fix this by ensuring that nfs_wb_page_priority() recognises that it may also need to clear out non-dirty pages that have an nfs_page associated with them. Signed-off-by: Trond Myklebust --- fs/nfs/file.c | 2 +- fs/nfs/write.c | 44 ++++++++++++++++++++++++++++++++++++++++++ include/linux/nfs_fs.h | 1 + 3 files changed, 46 insertions(+), 1 deletion(-) diff --git a/fs/nfs/file.c b/fs/nfs/file.c index c87dc713b5d7..579cf8a7d4a7 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -316,7 +316,7 @@ static void nfs_invalidate_page(struct page *page, unsigned long offset) if (offset != 0) return; /* Cancel any unstarted writes on this page */ - nfs_wb_page_priority(page->mapping->host, page, FLUSH_INVALIDATE); + nfs_wb_page_cancel(page->mapping->host, page); } static int nfs_release_page(struct page *page, gfp_t gfp) diff --git a/fs/nfs/write.c b/fs/nfs/write.c index ef97e0c0f5b1..0d7a77cc394b 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1396,6 +1396,50 @@ out: return ret; } +int nfs_wb_page_cancel(struct inode *inode, struct page *page) +{ + struct nfs_page *req; + loff_t range_start = page_offset(page); + loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1); + struct writeback_control wbc = { + .bdi = page->mapping->backing_dev_info, + .sync_mode = WB_SYNC_ALL, + .nr_to_write = LONG_MAX, + .range_start = range_start, + .range_end = range_end, + }; + int ret = 0; + + BUG_ON(!PageLocked(page)); + for (;;) { + req = nfs_page_find_request(page); + if (req == NULL) + goto out; + if (test_bit(PG_NEED_COMMIT, &req->wb_flags)) { + nfs_release_request(req); + break; + } + if (nfs_lock_request_dontget(req)) { + nfs_inode_remove_request(req); + /* + * In case nfs_inode_remove_request has marked the + * page as being dirty + */ + cancel_dirty_page(page, PAGE_CACHE_SIZE); + nfs_unlock_request(req); + break; + } + ret = nfs_wait_on_request(req); + if (ret < 0) + goto out; + } + if (!PagePrivate(page)) + return 0; + ret = nfs_sync_mapping_wait(page->mapping, &wbc, FLUSH_INVALIDATE); +out: + return ret; +} + int nfs_wb_page_priority(struct inode *inode, struct page *page, int how) { loff_t range_start = page_offset(page); diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 157dcb055b5c..7250eeadd7b5 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -431,6 +431,7 @@ extern int nfs_sync_mapping_range(struct address_space *, loff_t, loff_t, int); extern int nfs_wb_all(struct inode *inode); extern int nfs_wb_page(struct inode *inode, struct page* page); extern int nfs_wb_page_priority(struct inode *inode, struct page* page, int how); +extern int nfs_wb_page_cancel(struct inode *inode, struct page* page); #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) extern int nfs_commit_inode(struct inode *, int); extern struct nfs_write_data *nfs_commit_alloc(void);