From c5e20cb700c0e36fb5499093b96e80350c6eb48e Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Mon, 9 Jun 2014 17:47:26 -0400 Subject: [PATCH] pnfs: fix lockup caused by pnfs_generic_pg_test end_offset and req_offset both return u64 - avoid casting to u32 until it's needed, when it's less than the (u32) size returned by nfs_generic_pg_test. Also, fix the comments in pnfs_generic_pg_test. Running the cthon04 special tests caused this lockup in the "write/read at 2GB, 4GB edges" test when running against a file layout server: BUG: soft lockup - CPU#0 stuck for 22s! [bigfile2:823] Modules linked in: nfs_layout_nfsv41_files rpcsec_gss_krb5 nfsv4 nfs fscache ip6t_REJECT nf_conntrack_ipv6 nf_defrag_ipv6 ip6table_mangle ip6table_filter ip6_tables iptable_nat nf_nat_ipv4 nf_nat iptable_mangle ppdev crc32c_intel aesni_intel aes_x86_64 glue_helper lrw gf128mul ablk_helper cryptd serio_raw e1000 shpchp i2c_piix4 i2c_core parport_pc parport nfsd auth_rpcgss oid_registry exportfs nfs_acl lockd sunrpc btrfs xor zlib_deflate raid6_pq mptspi scsi_transport_spi mptscsih mptbase ata_generic floppy autofs4 irq event stamp: 205958 hardirqs last enabled at (205957): [] restore_args+0x0/0x30 hardirqs last disabled at (205958): [] apic_timer_interrupt+0x6a/0x80 softirqs last enabled at (205956): [] __do_softirq+0x1ea/0x2ab softirqs last disabled at (205951): [] irq_exit+0x44/0x9a CPU: 0 PID: 823 Comm: bigfile2 Not tainted 3.15.0-rc1-branch-pgio_plus+ #3 Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 07/31/2013 task: ffff8800792ec480 ti: ffff880078c4e000 task.ti: ffff880078c4e000 RIP: 0010:[] [] nfs_page_group_unlock+0x3e/0x4b [nfs] RSP: 0018:ffff880078c4fab0 EFLAGS: 00000202 RAX: 0000000000000fff RBX: ffff88006bf83300 RCX: 0000000000000000 RDX: 0000000000000001 RSI: 0000000000000000 RDI: ffff88006bf83300 RBP: ffff880078c4fab8 R08: 0000000000000001 R09: 0000000000000000 R10: ffffffff8249840c R11: 0000000000000000 R12: 0000000000000035 R13: ffff88007ffc72d8 R14: 0000000000000001 R15: 0000000000000000 FS: 00007f45f11b7740(0000) GS:ffff88007f200000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f3a8cb632d0 CR3: 000000007931c000 CR4: 00000000001407f0 Stack: ffff88006bf832c0 ffff880078c4fb00 ffffffffa02cec22 ffff880078c4fad8 00000fff810f9d99 ffff880078c4fca0 ffff88006bf832c0 ffff88006bf832c0 ffff880078c4fca0 ffff880078c4fd60 ffff880078c4fb28 ffffffffa02cee34 Call Trace: [] __nfs_pageio_add_request+0x298/0x34f [nfs] [] nfs_pageio_add_request+0x1f/0x42 [nfs] [] nfs_do_writepage+0x1b5/0x1e4 [nfs] [] nfs_writepages_callback+0x13/0x25 [nfs] [] ? nfs_do_writepage+0x1e4/0x1e4 [nfs] [] write_cache_pages+0x254/0x37f [] ? nfs_do_writepage+0x1e4/0x1e4 [nfs] [] ? printk+0x54/0x56 [] ? __set_page_dirty_nobuffers+0x22/0xe9 [] ? put_rpccred+0x38/0x101 [sunrpc] [] nfs_writepages+0xb4/0xf8 [nfs] [] do_writepages+0x21/0x2f [] __filemap_fdatawrite_range+0x55/0x57 [] filemap_write_and_wait_range+0x2d/0x5b [] nfs4_file_fsync+0x3a/0x98 [nfsv4] [] vfs_fsync_range+0x18/0x20 [] generic_file_aio_write+0xa7/0xbd [] nfs_file_write+0xf0/0x170 [nfs] [] do_sync_write+0x59/0x78 [] vfs_write+0xab/0x107 [] SyS_write+0x49/0x7f [] system_call_fastpath+0x16/0x1b Reported-by: Anna Schumaker Signed-off-by: Weston Andros Adamson Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 32 ++++++++++++++++++-------------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index ce46a417e500..ee60c42b72b3 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1433,33 +1433,37 @@ pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req) { unsigned int size; - u64 end; + u64 seg_end, req_start, seg_left; size = nfs_generic_pg_test(pgio, prev, req); if (!size) return 0; /* - * Test if a nfs_page is fully contained in the pnfs_layout_range. - * Note that this test makes several assumptions: - * - that the previous nfs_page in the struct nfs_pageio_descriptor - * is known to lie within the range. - * - that the nfs_page being tested is known to be contiguous with the - * previous nfs_page. - * - Layout ranges are page aligned, so we only have to test the - * start offset of the request. + * 'size' contains the number of bytes left in the current page (up + * to the original size asked for in @req->wb_bytes). + * + * Calculate how many bytes are left in the layout segment + * and if there are less bytes than 'size', return that instead. * * Please also note that 'end_offset' is actually the offset of the * first byte that lies outside the pnfs_layout_range. FIXME? * */ if (pgio->pg_lseg) { - end = end_offset(pgio->pg_lseg->pls_range.offset, - pgio->pg_lseg->pls_range.length); - WARN_ON_ONCE(req_offset(req) > end); - if (req_offset(req) >= end) + seg_end = end_offset(pgio->pg_lseg->pls_range.offset, + pgio->pg_lseg->pls_range.length); + req_start = req_offset(req); + WARN_ON_ONCE(req_start > seg_end); + /* start of request is past the last byte of this segment */ + if (req_start >= seg_end) return 0; - size = min((unsigned int)(end - req_offset(req)), size); + + /* adjust 'size' iff there are fewer bytes left in the + * segment than what nfs_generic_pg_test returned */ + seg_left = seg_end - req_start; + if (seg_left < size) + size = (unsigned int)seg_left; } return size;