Btrfs: deal with DIO bios that span more than one ordered extent

The new DIO bio splitting code has problems when the bio
spans more than one ordered extent.  This will happen as the
generic DIO code merges our get_blocks calls together into
a bigger single bio.

This fixes things by walking forward in the ordered extent
code finding all the overlapping ordered extents and completing them
all at once.

Signed-off-by: Chris Mason <chris.mason@oracle.com>
This commit is contained in:
Chris Mason 2010-11-28 19:56:33 -05:00
parent 450ba0ea06
commit 163cf09c2a
3 changed files with 89 additions and 4 deletions

View File

@ -5602,15 +5602,18 @@ static void btrfs_endio_direct_write(struct bio *bio, int err)
struct btrfs_trans_handle *trans; struct btrfs_trans_handle *trans;
struct btrfs_ordered_extent *ordered = NULL; struct btrfs_ordered_extent *ordered = NULL;
struct extent_state *cached_state = NULL; struct extent_state *cached_state = NULL;
u64 ordered_offset = dip->logical_offset;
u64 ordered_bytes = dip->bytes;
int ret; int ret;
if (err) if (err)
goto out_done; goto out_done;
again:
ret = btrfs_dec_test_ordered_pending(inode, &ordered, ret = btrfs_dec_test_first_ordered_pending(inode, &ordered,
dip->logical_offset, dip->bytes); &ordered_offset,
ordered_bytes);
if (!ret) if (!ret)
goto out_done; goto out_test;
BUG_ON(!ordered); BUG_ON(!ordered);
@ -5670,8 +5673,20 @@ out_unlock:
out: out:
btrfs_delalloc_release_metadata(inode, ordered->len); btrfs_delalloc_release_metadata(inode, ordered->len);
btrfs_end_transaction(trans, root); btrfs_end_transaction(trans, root);
ordered_offset = ordered->file_offset + ordered->len;
btrfs_put_ordered_extent(ordered); btrfs_put_ordered_extent(ordered);
btrfs_put_ordered_extent(ordered); btrfs_put_ordered_extent(ordered);
out_test:
/*
* our bio might span multiple ordered extents. If we haven't
* completed the accounting for the whole dio, go back and try again
*/
if (ordered_offset < dip->logical_offset + dip->bytes) {
ordered_bytes = dip->logical_offset + dip->bytes -
ordered_offset;
goto again;
}
out_done: out_done:
bio->bi_private = dip->private; bio->bi_private = dip->private;

View File

@ -248,6 +248,73 @@ int btrfs_add_ordered_sum(struct inode *inode,
return 0; return 0;
} }
/*
* this is used to account for finished IO across a given range
* of the file. The IO may span ordered extents. If
* a given ordered_extent is completely done, 1 is returned, otherwise
* 0.
*
* test_and_set_bit on a flag in the struct btrfs_ordered_extent is used
* to make sure this function only returns 1 once for a given ordered extent.
*
* file_offset is updated to one byte past the range that is recorded as
* complete. This allows you to walk forward in the file.
*/
int btrfs_dec_test_first_ordered_pending(struct inode *inode,
struct btrfs_ordered_extent **cached,
u64 *file_offset, u64 io_size)
{
struct btrfs_ordered_inode_tree *tree;
struct rb_node *node;
struct btrfs_ordered_extent *entry = NULL;
int ret;
u64 dec_end;
u64 dec_start;
u64 to_dec;
tree = &BTRFS_I(inode)->ordered_tree;
spin_lock(&tree->lock);
node = tree_search(tree, *file_offset);
if (!node) {
ret = 1;
goto out;
}
entry = rb_entry(node, struct btrfs_ordered_extent, rb_node);
if (!offset_in_entry(entry, *file_offset)) {
ret = 1;
goto out;
}
dec_start = max(*file_offset, entry->file_offset);
dec_end = min(*file_offset + io_size, entry->file_offset +
entry->len);
*file_offset = dec_end;
if (dec_start > dec_end) {
printk(KERN_CRIT "bad ordering dec_start %llu end %llu\n",
(unsigned long long)dec_start,
(unsigned long long)dec_end);
}
to_dec = dec_end - dec_start;
if (to_dec > entry->bytes_left) {
printk(KERN_CRIT "bad ordered accounting left %llu size %llu\n",
(unsigned long long)entry->bytes_left,
(unsigned long long)to_dec);
}
entry->bytes_left -= to_dec;
if (entry->bytes_left == 0)
ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags);
else
ret = 1;
out:
if (!ret && cached && entry) {
*cached = entry;
atomic_inc(&entry->refs);
}
spin_unlock(&tree->lock);
return ret == 0;
}
/* /*
* this is used to account for finished IO across a given range * this is used to account for finished IO across a given range
* of the file. The IO should not span ordered extents. If * of the file. The IO should not span ordered extents. If

View File

@ -141,6 +141,9 @@ int btrfs_remove_ordered_extent(struct inode *inode,
int btrfs_dec_test_ordered_pending(struct inode *inode, int btrfs_dec_test_ordered_pending(struct inode *inode,
struct btrfs_ordered_extent **cached, struct btrfs_ordered_extent **cached,
u64 file_offset, u64 io_size); u64 file_offset, u64 io_size);
int btrfs_dec_test_first_ordered_pending(struct inode *inode,
struct btrfs_ordered_extent **cached,
u64 *file_offset, u64 io_size);
int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
u64 start, u64 len, u64 disk_len, int type); u64 start, u64 len, u64 disk_len, int type);
int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset, int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset,