From cf3425707ed9ce0d5ebaba20bc3d22dd39e52f2f Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Wed, 26 May 2010 01:50:21 +1000 Subject: [PATCH 1/5] block: bd_start_claiming fix module refcount bd_start_claiming has an unbalanced module_put introduced in 6b4517a79. Signed-off-by: Nick Piggin Acked-by: Tejun Heo Signed-off-by: Jens Axboe --- fs/block_dev.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/block_dev.c b/fs/block_dev.c index 7346c96308a5..204a7632c511 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -734,6 +734,7 @@ static struct block_device *bd_start_claiming(struct block_device *bdev, return ERR_PTR(-ENXIO); whole = bdget_disk(disk, 0); + module_put(disk->fops->owner); put_disk(disk); if (!whole) return ERR_PTR(-ENOMEM); From b0018361c3f934858592cbbb5e1a4f318c2a70ed Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Wed, 26 May 2010 01:51:19 +1000 Subject: [PATCH 2/5] block: bd_start_claiming cleanup I don't like the subtle multi-context code in bd_claim (ie. detects where it has been called based on bd_claiming). It seems clearer to just require a new function to finish a 2-part claim. Also improve commentary in bd_start_claiming as to how it should be used. Signed-off-by: Nick Piggin Acked-by: Tejun Heo Signed-off-by: Jens Axboe --- fs/block_dev.c | 72 +++++++++++++++++++++++++++++++++----------------- 1 file changed, 48 insertions(+), 24 deletions(-) diff --git a/fs/block_dev.c b/fs/block_dev.c index 204a7632c511..a1642ef60197 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -706,8 +706,13 @@ retry: * @bdev is about to be opened exclusively. Check @bdev can be opened * exclusively and mark that an exclusive open is in progress. Each * successful call to this function must be matched with a call to - * either bd_claim() or bd_abort_claiming(). If this function - * succeeds, the matching bd_claim() is guaranteed to succeed. + * either bd_finish_claiming() or bd_abort_claiming() (which do not + * fail). + * + * This function is used to gain exclusive access to the block device + * without actually causing other exclusive open attempts to fail. It + * should be used when the open sequence itself requires exclusive + * access but may subsequently fail. * * CONTEXT: * Might sleep. @@ -783,15 +788,47 @@ static void bd_abort_claiming(struct block_device *whole, void *holder) __bd_abort_claiming(whole, holder); /* releases bdev_lock */ } +/* increment holders when we have a legitimate claim. requires bdev_lock */ +static void __bd_claim(struct block_device *bdev, struct block_device *whole, + void *holder) +{ + /* note that for a whole device bd_holders + * will be incremented twice, and bd_holder will + * be set to bd_claim before being set to holder + */ + whole->bd_holders++; + whole->bd_holder = bd_claim; + bdev->bd_holders++; + bdev->bd_holder = holder; +} + +/** + * bd_finish_claiming - finish claiming a block device + * @bdev: block device of interest (passed to bd_start_claiming()) + * @whole: whole block device returned by bd_start_claiming() + * @holder: holder trying to claim @bdev + * + * Finish a claiming block started by bd_start_claiming(). + * + * CONTEXT: + * Grabs and releases bdev_lock. + */ +static void bd_finish_claiming(struct block_device *bdev, + struct block_device *whole, void *holder) +{ + spin_lock(&bdev_lock); + BUG_ON(whole->bd_claiming != holder); + BUG_ON(!bd_may_claim(bdev, whole, holder)); + __bd_claim(bdev, whole, holder); + __bd_abort_claiming(whole, holder); /* not actually an abort */ +} + /** * bd_claim - claim a block device * @bdev: block device to claim * @holder: holder trying to claim @bdev * - * Try to claim @bdev which must have been opened successfully. This - * function may be called with or without preceding - * blk_start_claiming(). In the former case, this function is always - * successful and terminates the claiming block. + * Try to claim @bdev which must have been opened successfully. * * CONTEXT: * Might sleep. @@ -807,23 +844,10 @@ int bd_claim(struct block_device *bdev, void *holder) might_sleep(); spin_lock(&bdev_lock); - res = bd_prepare_to_claim(bdev, whole, holder); - if (res == 0) { - /* note that for a whole device bd_holders - * will be incremented twice, and bd_holder will - * be set to bd_claim before being set to holder - */ - whole->bd_holders++; - whole->bd_holder = bd_claim; - bdev->bd_holders++; - bdev->bd_holder = holder; - } - - if (whole->bd_claiming) - __bd_abort_claiming(whole, holder); /* releases bdev_lock */ - else - spin_unlock(&bdev_lock); + if (res == 0) + __bd_claim(bdev, whole, holder); + spin_unlock(&bdev_lock); return res; } @@ -1477,7 +1501,7 @@ static int blkdev_open(struct inode * inode, struct file * filp) if (whole) { if (res == 0) - BUG_ON(bd_claim(bdev, filp) != 0); + bd_finish_claiming(bdev, whole, filp); else bd_abort_claiming(whole, filp); } @@ -1713,7 +1737,7 @@ struct block_device *open_bdev_exclusive(const char *path, fmode_t mode, void *h if ((mode & FMODE_WRITE) && bdev_read_only(bdev)) goto out_blkdev_put; - BUG_ON(bd_claim(bdev, holder) != 0); + bd_finish_claiming(bdev, whole, holder); return bdev; out_blkdev_put: From 3e6c05052c262ebe7fdd85e75e9d4f956cdd8d82 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 7 Jun 2010 20:17:38 +0200 Subject: [PATCH 3/5] block: remove duplicate BUG_ON() in bd_finish_claiming() We do the same BUG_ON() just a line later when calling into __bd_abort_claiming(). Reported-by: Tejun Heo Signed-off-by: Jens Axboe --- fs/block_dev.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/block_dev.c b/fs/block_dev.c index a1642ef60197..99d6af811747 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -817,7 +817,6 @@ static void bd_finish_claiming(struct block_device *bdev, struct block_device *whole, void *holder) { spin_lock(&bdev_lock); - BUG_ON(whole->bd_claiming != holder); BUG_ON(!bd_may_claim(bdev, whole, holder)); __bd_claim(bdev, whole, holder); __bd_abort_claiming(whole, holder); /* not actually an abort */ From 1d862f41222b7f385bada9f85a67ca5592ffd33e Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 8 Jun 2010 16:28:45 +0200 Subject: [PATCH 4/5] pipe: fix pipe buffer resizing pipe_set_size() needs to copy pipe bufs from the old circular buffer to the new. The current code gets this wrong in multiple ways, resulting in oops. Test program is available here: http://www.kernel.org/pub/linux/kernel/people/mszeredi/piperesize/ Signed-off-by: Miklos Szeredi Signed-off-by: Jens Axboe --- fs/pipe.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/fs/pipe.c b/fs/pipe.c index 69c4c7c13ea9..f31e2d472984 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -1145,13 +1145,20 @@ static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long nr_pages) * and adjust the indexes. */ if (pipe->nrbufs) { - const unsigned int tail = pipe->nrbufs & (pipe->buffers - 1); - const unsigned int head = pipe->nrbufs - tail; + unsigned int tail; + unsigned int head; + tail = pipe->curbuf + pipe->nrbufs; + if (tail < pipe->buffers) + tail = 0; + else + tail &= (pipe->buffers - 1); + + head = pipe->nrbufs - tail; if (head) memcpy(bufs, pipe->bufs + pipe->curbuf, head * sizeof(struct pipe_buffer)); if (tail) - memcpy(bufs + head, pipe->bufs + pipe->curbuf, tail * sizeof(struct pipe_buffer)); + memcpy(bufs + head, pipe->bufs, tail * sizeof(struct pipe_buffer)); } pipe->curbuf = 0; From 6db40cf047a8723095caf79f5569d21b388d7b31 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 9 Jun 2010 09:27:57 +0200 Subject: [PATCH 5/5] pipe: fix check in "set size" fcntl As it stands this check compares the number of pages to the page size. This makes no sense and makes the fcntl fail in almost any sane case. Fix it by checking if nr_pages is not zero (it can become zero only if arg is too big and round_pipe_size() overflows). Signed-off-by: Miklos Szeredi Signed-off-by: Jens Axboe --- fs/pipe.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/pipe.c b/fs/pipe.c index f31e2d472984..279eef96c51c 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -1215,12 +1215,13 @@ long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg) size = round_pipe_size(arg); nr_pages = size >> PAGE_SHIFT; + ret = -EINVAL; + if (!nr_pages) + goto out; + if (!capable(CAP_SYS_RESOURCE) && size > pipe_max_size) { ret = -EPERM; goto out; - } else if (nr_pages < PAGE_SIZE) { - ret = -EINVAL; - goto out; } ret = pipe_set_size(pipe, nr_pages); break;