Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs
Pull btrfs fixes from Chris Mason: "I've split out the big send/receive update from my last pull request and now have just the fixes in my for-linus branch. The send/recv branch will wander over to linux-next shortly though. The largest patches in this pull are Josef's patches to fix DIO locking problems and his patch to fix a crash during balance. They are both well tested. The rest are smaller fixes that we've had queued. The last rc came out while I was hacking new and exciting ways to recover from a misplaced rm -rf on my dev box, so these missed rc3." * 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs: (25 commits) Btrfs: fix that repair code is spuriously executed for transid failures Btrfs: fix ordered extent leak when failing to start a transaction Btrfs: fix a dio write regression Btrfs: fix deadlock with freeze and sync V2 Btrfs: revert checksum error statistic which can cause a BUG() Btrfs: remove superblock writing after fatal error Btrfs: allow delayed refs to be merged Btrfs: fix enospc problems when deleting a subvol Btrfs: fix wrong mtime and ctime when creating snapshots Btrfs: fix race in run_clustered_refs Btrfs: don't run __tree_mod_log_free_eb on leaves Btrfs: increase the size of the free space cache Btrfs: barrier before waitqueue_active Btrfs: fix deadlock in wait_for_more_refs btrfs: fix second lock in btrfs_delete_delayed_items() Btrfs: don't allocate a seperate csums array for direct reads Btrfs: do not strdup non existent strings Btrfs: do not use missing devices when showing devname Btrfs: fix that error value is changed by mistake Btrfs: lock extents as we map them in DIO ...
This commit is contained in:
commit
318e151019
|
@ -1438,10 +1438,10 @@ int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info,
|
|||
ret = extent_from_logical(fs_info, logical, path,
|
||||
&found_key);
|
||||
btrfs_release_path(path);
|
||||
if (ret & BTRFS_EXTENT_FLAG_TREE_BLOCK)
|
||||
ret = -EINVAL;
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
if (ret & BTRFS_EXTENT_FLAG_TREE_BLOCK)
|
||||
return -EINVAL;
|
||||
|
||||
extent_item_pos = logical - found_key.objectid;
|
||||
ret = iterate_extent_inodes(fs_info, found_key.objectid,
|
||||
|
|
|
@ -818,6 +818,7 @@ static void free_workspace(int type, struct list_head *workspace)
|
|||
btrfs_compress_op[idx]->free_workspace(workspace);
|
||||
atomic_dec(alloc_workspace);
|
||||
wake:
|
||||
smp_mb();
|
||||
if (waitqueue_active(workspace_wait))
|
||||
wake_up(workspace_wait);
|
||||
}
|
||||
|
|
|
@ -420,12 +420,6 @@ void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info,
|
|||
}
|
||||
spin_unlock(&fs_info->tree_mod_seq_lock);
|
||||
|
||||
/*
|
||||
* we removed the lowest blocker from the blocker list, so there may be
|
||||
* more processible delayed refs.
|
||||
*/
|
||||
wake_up(&fs_info->tree_mod_seq_wait);
|
||||
|
||||
/*
|
||||
* anything that's lower than the lowest existing (read: blocked)
|
||||
* sequence number can be removed from the tree.
|
||||
|
@ -631,6 +625,9 @@ __tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, struct extent_buffer *eb)
|
|||
u32 nritems;
|
||||
int ret;
|
||||
|
||||
if (btrfs_header_level(eb) == 0)
|
||||
return;
|
||||
|
||||
nritems = btrfs_header_nritems(eb);
|
||||
for (i = nritems - 1; i >= 0; i--) {
|
||||
ret = tree_mod_log_insert_key_locked(fs_info, eb, i,
|
||||
|
|
|
@ -1252,7 +1252,6 @@ struct btrfs_fs_info {
|
|||
atomic_t tree_mod_seq;
|
||||
struct list_head tree_mod_seq_list;
|
||||
struct seq_list tree_mod_seq_elem;
|
||||
wait_queue_head_t tree_mod_seq_wait;
|
||||
|
||||
/* this protects tree_mod_log */
|
||||
rwlock_t tree_mod_log_lock;
|
||||
|
@ -3192,7 +3191,7 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans,
|
|||
int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode,
|
||||
struct bio *bio, u32 *dst);
|
||||
int btrfs_lookup_bio_sums_dio(struct btrfs_root *root, struct inode *inode,
|
||||
struct bio *bio, u64 logical_offset, u32 *dst);
|
||||
struct bio *bio, u64 logical_offset);
|
||||
int btrfs_insert_file_extent(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root,
|
||||
u64 objectid, u64 pos,
|
||||
|
|
|
@ -512,8 +512,8 @@ static void __btrfs_remove_delayed_item(struct btrfs_delayed_item *delayed_item)
|
|||
|
||||
rb_erase(&delayed_item->rb_node, root);
|
||||
delayed_item->delayed_node->count--;
|
||||
atomic_dec(&delayed_root->items);
|
||||
if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND &&
|
||||
if (atomic_dec_return(&delayed_root->items) <
|
||||
BTRFS_DELAYED_BACKGROUND &&
|
||||
waitqueue_active(&delayed_root->wait))
|
||||
wake_up(&delayed_root->wait);
|
||||
}
|
||||
|
@ -1028,9 +1028,10 @@ do_again:
|
|||
btrfs_release_delayed_item(prev);
|
||||
ret = 0;
|
||||
btrfs_release_path(path);
|
||||
if (curr)
|
||||
if (curr) {
|
||||
mutex_unlock(&node->mutex);
|
||||
goto do_again;
|
||||
else
|
||||
} else
|
||||
goto delete_fail;
|
||||
}
|
||||
|
||||
|
@ -1055,8 +1056,7 @@ static void btrfs_release_delayed_inode(struct btrfs_delayed_node *delayed_node)
|
|||
delayed_node->count--;
|
||||
|
||||
delayed_root = delayed_node->root->fs_info->delayed_root;
|
||||
atomic_dec(&delayed_root->items);
|
||||
if (atomic_read(&delayed_root->items) <
|
||||
if (atomic_dec_return(&delayed_root->items) <
|
||||
BTRFS_DELAYED_BACKGROUND &&
|
||||
waitqueue_active(&delayed_root->wait))
|
||||
wake_up(&delayed_root->wait);
|
||||
|
|
|
@ -38,17 +38,14 @@
|
|||
static int comp_tree_refs(struct btrfs_delayed_tree_ref *ref2,
|
||||
struct btrfs_delayed_tree_ref *ref1)
|
||||
{
|
||||
if (ref1->node.type == BTRFS_TREE_BLOCK_REF_KEY) {
|
||||
if (ref1->root < ref2->root)
|
||||
return -1;
|
||||
if (ref1->root > ref2->root)
|
||||
return 1;
|
||||
} else {
|
||||
if (ref1->parent < ref2->parent)
|
||||
return -1;
|
||||
if (ref1->parent > ref2->parent)
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -85,7 +82,8 @@ static int comp_data_refs(struct btrfs_delayed_data_ref *ref2,
|
|||
* type of the delayed backrefs and content of delayed backrefs.
|
||||
*/
|
||||
static int comp_entry(struct btrfs_delayed_ref_node *ref2,
|
||||
struct btrfs_delayed_ref_node *ref1)
|
||||
struct btrfs_delayed_ref_node *ref1,
|
||||
bool compare_seq)
|
||||
{
|
||||
if (ref1->bytenr < ref2->bytenr)
|
||||
return -1;
|
||||
|
@ -102,10 +100,12 @@ static int comp_entry(struct btrfs_delayed_ref_node *ref2,
|
|||
if (ref1->type > ref2->type)
|
||||
return 1;
|
||||
/* merging of sequenced refs is not allowed */
|
||||
if (compare_seq) {
|
||||
if (ref1->seq < ref2->seq)
|
||||
return -1;
|
||||
if (ref1->seq > ref2->seq)
|
||||
return 1;
|
||||
}
|
||||
if (ref1->type == BTRFS_TREE_BLOCK_REF_KEY ||
|
||||
ref1->type == BTRFS_SHARED_BLOCK_REF_KEY) {
|
||||
return comp_tree_refs(btrfs_delayed_node_to_tree_ref(ref2),
|
||||
|
@ -139,7 +139,7 @@ static struct btrfs_delayed_ref_node *tree_insert(struct rb_root *root,
|
|||
entry = rb_entry(parent_node, struct btrfs_delayed_ref_node,
|
||||
rb_node);
|
||||
|
||||
cmp = comp_entry(entry, ins);
|
||||
cmp = comp_entry(entry, ins, 1);
|
||||
if (cmp < 0)
|
||||
p = &(*p)->rb_left;
|
||||
else if (cmp > 0)
|
||||
|
@ -233,6 +233,114 @@ int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans,
|
|||
return 0;
|
||||
}
|
||||
|
||||
static void inline drop_delayed_ref(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_delayed_ref_root *delayed_refs,
|
||||
struct btrfs_delayed_ref_node *ref)
|
||||
{
|
||||
rb_erase(&ref->rb_node, &delayed_refs->root);
|
||||
ref->in_tree = 0;
|
||||
btrfs_put_delayed_ref(ref);
|
||||
delayed_refs->num_entries--;
|
||||
if (trans->delayed_ref_updates)
|
||||
trans->delayed_ref_updates--;
|
||||
}
|
||||
|
||||
static int merge_ref(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_delayed_ref_root *delayed_refs,
|
||||
struct btrfs_delayed_ref_node *ref, u64 seq)
|
||||
{
|
||||
struct rb_node *node;
|
||||
int merged = 0;
|
||||
int mod = 0;
|
||||
int done = 0;
|
||||
|
||||
node = rb_prev(&ref->rb_node);
|
||||
while (node) {
|
||||
struct btrfs_delayed_ref_node *next;
|
||||
|
||||
next = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
|
||||
node = rb_prev(node);
|
||||
if (next->bytenr != ref->bytenr)
|
||||
break;
|
||||
if (seq && next->seq >= seq)
|
||||
break;
|
||||
if (comp_entry(ref, next, 0))
|
||||
continue;
|
||||
|
||||
if (ref->action == next->action) {
|
||||
mod = next->ref_mod;
|
||||
} else {
|
||||
if (ref->ref_mod < next->ref_mod) {
|
||||
struct btrfs_delayed_ref_node *tmp;
|
||||
|
||||
tmp = ref;
|
||||
ref = next;
|
||||
next = tmp;
|
||||
done = 1;
|
||||
}
|
||||
mod = -next->ref_mod;
|
||||
}
|
||||
|
||||
merged++;
|
||||
drop_delayed_ref(trans, delayed_refs, next);
|
||||
ref->ref_mod += mod;
|
||||
if (ref->ref_mod == 0) {
|
||||
drop_delayed_ref(trans, delayed_refs, ref);
|
||||
break;
|
||||
} else {
|
||||
/*
|
||||
* You can't have multiples of the same ref on a tree
|
||||
* block.
|
||||
*/
|
||||
WARN_ON(ref->type == BTRFS_TREE_BLOCK_REF_KEY ||
|
||||
ref->type == BTRFS_SHARED_BLOCK_REF_KEY);
|
||||
}
|
||||
|
||||
if (done)
|
||||
break;
|
||||
node = rb_prev(&ref->rb_node);
|
||||
}
|
||||
|
||||
return merged;
|
||||
}
|
||||
|
||||
void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_delayed_ref_root *delayed_refs,
|
||||
struct btrfs_delayed_ref_head *head)
|
||||
{
|
||||
struct rb_node *node;
|
||||
u64 seq = 0;
|
||||
|
||||
spin_lock(&fs_info->tree_mod_seq_lock);
|
||||
if (!list_empty(&fs_info->tree_mod_seq_list)) {
|
||||
struct seq_list *elem;
|
||||
|
||||
elem = list_first_entry(&fs_info->tree_mod_seq_list,
|
||||
struct seq_list, list);
|
||||
seq = elem->seq;
|
||||
}
|
||||
spin_unlock(&fs_info->tree_mod_seq_lock);
|
||||
|
||||
node = rb_prev(&head->node.rb_node);
|
||||
while (node) {
|
||||
struct btrfs_delayed_ref_node *ref;
|
||||
|
||||
ref = rb_entry(node, struct btrfs_delayed_ref_node,
|
||||
rb_node);
|
||||
if (ref->bytenr != head->node.bytenr)
|
||||
break;
|
||||
|
||||
/* We can't merge refs that are outside of our seq count */
|
||||
if (seq && ref->seq >= seq)
|
||||
break;
|
||||
if (merge_ref(trans, delayed_refs, ref, seq))
|
||||
node = rb_prev(&head->node.rb_node);
|
||||
else
|
||||
node = rb_prev(node);
|
||||
}
|
||||
}
|
||||
|
||||
int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_delayed_ref_root *delayed_refs,
|
||||
u64 seq)
|
||||
|
@ -336,18 +444,11 @@ update_existing_ref(struct btrfs_trans_handle *trans,
|
|||
* every changing the extent allocation tree.
|
||||
*/
|
||||
existing->ref_mod--;
|
||||
if (existing->ref_mod == 0) {
|
||||
rb_erase(&existing->rb_node,
|
||||
&delayed_refs->root);
|
||||
existing->in_tree = 0;
|
||||
btrfs_put_delayed_ref(existing);
|
||||
delayed_refs->num_entries--;
|
||||
if (trans->delayed_ref_updates)
|
||||
trans->delayed_ref_updates--;
|
||||
} else {
|
||||
if (existing->ref_mod == 0)
|
||||
drop_delayed_ref(trans, delayed_refs, existing);
|
||||
else
|
||||
WARN_ON(existing->type == BTRFS_TREE_BLOCK_REF_KEY ||
|
||||
existing->type == BTRFS_SHARED_BLOCK_REF_KEY);
|
||||
}
|
||||
} else {
|
||||
WARN_ON(existing->type == BTRFS_TREE_BLOCK_REF_KEY ||
|
||||
existing->type == BTRFS_SHARED_BLOCK_REF_KEY);
|
||||
|
@ -662,9 +763,6 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
|
|||
add_delayed_tree_ref(fs_info, trans, &ref->node, bytenr,
|
||||
num_bytes, parent, ref_root, level, action,
|
||||
for_cow);
|
||||
if (!need_ref_seq(for_cow, ref_root) &&
|
||||
waitqueue_active(&fs_info->tree_mod_seq_wait))
|
||||
wake_up(&fs_info->tree_mod_seq_wait);
|
||||
spin_unlock(&delayed_refs->lock);
|
||||
if (need_ref_seq(for_cow, ref_root))
|
||||
btrfs_qgroup_record_ref(trans, &ref->node, extent_op);
|
||||
|
@ -713,9 +811,6 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
|
|||
add_delayed_data_ref(fs_info, trans, &ref->node, bytenr,
|
||||
num_bytes, parent, ref_root, owner, offset,
|
||||
action, for_cow);
|
||||
if (!need_ref_seq(for_cow, ref_root) &&
|
||||
waitqueue_active(&fs_info->tree_mod_seq_wait))
|
||||
wake_up(&fs_info->tree_mod_seq_wait);
|
||||
spin_unlock(&delayed_refs->lock);
|
||||
if (need_ref_seq(for_cow, ref_root))
|
||||
btrfs_qgroup_record_ref(trans, &ref->node, extent_op);
|
||||
|
@ -744,8 +839,6 @@ int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
|
|||
num_bytes, BTRFS_UPDATE_DELAYED_HEAD,
|
||||
extent_op->is_data);
|
||||
|
||||
if (waitqueue_active(&fs_info->tree_mod_seq_wait))
|
||||
wake_up(&fs_info->tree_mod_seq_wait);
|
||||
spin_unlock(&delayed_refs->lock);
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -167,6 +167,10 @@ int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
|
|||
struct btrfs_trans_handle *trans,
|
||||
u64 bytenr, u64 num_bytes,
|
||||
struct btrfs_delayed_extent_op *extent_op);
|
||||
void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_delayed_ref_root *delayed_refs,
|
||||
struct btrfs_delayed_ref_head *head);
|
||||
|
||||
struct btrfs_delayed_ref_head *
|
||||
btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr);
|
||||
|
|
|
@ -377,9 +377,13 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root,
|
|||
ret = read_extent_buffer_pages(io_tree, eb, start,
|
||||
WAIT_COMPLETE,
|
||||
btree_get_extent, mirror_num);
|
||||
if (!ret && !verify_parent_transid(io_tree, eb,
|
||||
if (!ret) {
|
||||
if (!verify_parent_transid(io_tree, eb,
|
||||
parent_transid, 0))
|
||||
break;
|
||||
else
|
||||
ret = -EIO;
|
||||
}
|
||||
|
||||
/*
|
||||
* This buffer's crc is fine, but its contents are corrupted, so
|
||||
|
@ -754,9 +758,7 @@ static void run_one_async_done(struct btrfs_work *work)
|
|||
limit = btrfs_async_submit_limit(fs_info);
|
||||
limit = limit * 2 / 3;
|
||||
|
||||
atomic_dec(&fs_info->nr_async_submits);
|
||||
|
||||
if (atomic_read(&fs_info->nr_async_submits) < limit &&
|
||||
if (atomic_dec_return(&fs_info->nr_async_submits) < limit &&
|
||||
waitqueue_active(&fs_info->async_submit_wait))
|
||||
wake_up(&fs_info->async_submit_wait);
|
||||
|
||||
|
@ -2032,8 +2034,6 @@ int open_ctree(struct super_block *sb,
|
|||
fs_info->free_chunk_space = 0;
|
||||
fs_info->tree_mod_log = RB_ROOT;
|
||||
|
||||
init_waitqueue_head(&fs_info->tree_mod_seq_wait);
|
||||
|
||||
/* readahead state */
|
||||
INIT_RADIX_TREE(&fs_info->reada_tree, GFP_NOFS & ~__GFP_WAIT);
|
||||
spin_lock_init(&fs_info->reada_lock);
|
||||
|
@ -2528,8 +2528,7 @@ retry_root_backup:
|
|||
goto fail_trans_kthread;
|
||||
|
||||
/* do not make disk changes in broken FS */
|
||||
if (btrfs_super_log_root(disk_super) != 0 &&
|
||||
!(fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)) {
|
||||
if (btrfs_super_log_root(disk_super) != 0) {
|
||||
u64 bytenr = btrfs_super_log_root(disk_super);
|
||||
|
||||
if (fs_devices->rw_devices == 0) {
|
||||
|
@ -3189,30 +3188,14 @@ int close_ctree(struct btrfs_root *root)
|
|||
/* clear out the rbtree of defraggable inodes */
|
||||
btrfs_run_defrag_inodes(fs_info);
|
||||
|
||||
/*
|
||||
* Here come 2 situations when btrfs is broken to flip readonly:
|
||||
*
|
||||
* 1. when btrfs flips readonly somewhere else before
|
||||
* btrfs_commit_super, sb->s_flags has MS_RDONLY flag,
|
||||
* and btrfs will skip to write sb directly to keep
|
||||
* ERROR state on disk.
|
||||
*
|
||||
* 2. when btrfs flips readonly just in btrfs_commit_super,
|
||||
* and in such case, btrfs cannot write sb via btrfs_commit_super,
|
||||
* and since fs_state has been set BTRFS_SUPER_FLAG_ERROR flag,
|
||||
* btrfs will cleanup all FS resources first and write sb then.
|
||||
*/
|
||||
if (!(fs_info->sb->s_flags & MS_RDONLY)) {
|
||||
ret = btrfs_commit_super(root);
|
||||
if (ret)
|
||||
printk(KERN_ERR "btrfs: commit super ret %d\n", ret);
|
||||
}
|
||||
|
||||
if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
|
||||
ret = btrfs_error_commit_super(root);
|
||||
if (ret)
|
||||
printk(KERN_ERR "btrfs: commit super ret %d\n", ret);
|
||||
}
|
||||
if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)
|
||||
btrfs_error_commit_super(root);
|
||||
|
||||
btrfs_put_block_group_cache(fs_info);
|
||||
|
||||
|
@ -3434,18 +3417,11 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
|
|||
if (read_only)
|
||||
return 0;
|
||||
|
||||
if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
|
||||
printk(KERN_WARNING "warning: mount fs with errors, "
|
||||
"running btrfsck is recommended\n");
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int btrfs_error_commit_super(struct btrfs_root *root)
|
||||
void btrfs_error_commit_super(struct btrfs_root *root)
|
||||
{
|
||||
int ret;
|
||||
|
||||
mutex_lock(&root->fs_info->cleaner_mutex);
|
||||
btrfs_run_delayed_iputs(root);
|
||||
mutex_unlock(&root->fs_info->cleaner_mutex);
|
||||
|
@ -3455,10 +3431,6 @@ int btrfs_error_commit_super(struct btrfs_root *root)
|
|||
|
||||
/* cleanup FS via transaction */
|
||||
btrfs_cleanup_transaction(root);
|
||||
|
||||
ret = write_ctree_super(NULL, root, 0);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void btrfs_destroy_ordered_operations(struct btrfs_root *root)
|
||||
|
@ -3782,14 +3754,17 @@ int btrfs_cleanup_transaction(struct btrfs_root *root)
|
|||
/* FIXME: cleanup wait for commit */
|
||||
t->in_commit = 1;
|
||||
t->blocked = 1;
|
||||
smp_mb();
|
||||
if (waitqueue_active(&root->fs_info->transaction_blocked_wait))
|
||||
wake_up(&root->fs_info->transaction_blocked_wait);
|
||||
|
||||
t->blocked = 0;
|
||||
smp_mb();
|
||||
if (waitqueue_active(&root->fs_info->transaction_wait))
|
||||
wake_up(&root->fs_info->transaction_wait);
|
||||
|
||||
t->commit_done = 1;
|
||||
smp_mb();
|
||||
if (waitqueue_active(&t->commit_wait))
|
||||
wake_up(&t->commit_wait);
|
||||
|
||||
|
|
|
@ -54,7 +54,7 @@ int write_ctree_super(struct btrfs_trans_handle *trans,
|
|||
struct btrfs_root *root, int max_mirrors);
|
||||
struct buffer_head *btrfs_read_dev_super(struct block_device *bdev);
|
||||
int btrfs_commit_super(struct btrfs_root *root);
|
||||
int btrfs_error_commit_super(struct btrfs_root *root);
|
||||
void btrfs_error_commit_super(struct btrfs_root *root);
|
||||
struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root,
|
||||
u64 bytenr, u32 blocksize);
|
||||
struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root,
|
||||
|
|
|
@ -2251,6 +2251,16 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
|
|||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* We need to try and merge add/drops of the same ref since we
|
||||
* can run into issues with relocate dropping the implicit ref
|
||||
* and then it being added back again before the drop can
|
||||
* finish. If we merged anything we need to re-loop so we can
|
||||
* get a good ref.
|
||||
*/
|
||||
btrfs_merge_delayed_refs(trans, fs_info, delayed_refs,
|
||||
locked_ref);
|
||||
|
||||
/*
|
||||
* locked_ref is the head node, so we have to go one
|
||||
* node back for any delayed ref updates
|
||||
|
@ -2318,12 +2328,23 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
|
|||
ref->in_tree = 0;
|
||||
rb_erase(&ref->rb_node, &delayed_refs->root);
|
||||
delayed_refs->num_entries--;
|
||||
if (locked_ref) {
|
||||
/*
|
||||
* we modified num_entries, but as we're currently running
|
||||
* delayed refs, skip
|
||||
* wake_up(&delayed_refs->seq_wait);
|
||||
* here.
|
||||
* when we play the delayed ref, also correct the
|
||||
* ref_mod on head
|
||||
*/
|
||||
switch (ref->action) {
|
||||
case BTRFS_ADD_DELAYED_REF:
|
||||
case BTRFS_ADD_DELAYED_EXTENT:
|
||||
locked_ref->node.ref_mod -= ref->ref_mod;
|
||||
break;
|
||||
case BTRFS_DROP_DELAYED_REF:
|
||||
locked_ref->node.ref_mod += ref->ref_mod;
|
||||
break;
|
||||
default:
|
||||
WARN_ON(1);
|
||||
}
|
||||
}
|
||||
spin_unlock(&delayed_refs->lock);
|
||||
|
||||
ret = run_one_delayed_ref(trans, root, ref, extent_op,
|
||||
|
@ -2350,22 +2371,6 @@ next:
|
|||
return count;
|
||||
}
|
||||
|
||||
static void wait_for_more_refs(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_delayed_ref_root *delayed_refs,
|
||||
unsigned long num_refs,
|
||||
struct list_head *first_seq)
|
||||
{
|
||||
spin_unlock(&delayed_refs->lock);
|
||||
pr_debug("waiting for more refs (num %ld, first %p)\n",
|
||||
num_refs, first_seq);
|
||||
wait_event(fs_info->tree_mod_seq_wait,
|
||||
num_refs != delayed_refs->num_entries ||
|
||||
fs_info->tree_mod_seq_list.next != first_seq);
|
||||
pr_debug("done waiting for more refs (num %ld, first %p)\n",
|
||||
delayed_refs->num_entries, fs_info->tree_mod_seq_list.next);
|
||||
spin_lock(&delayed_refs->lock);
|
||||
}
|
||||
|
||||
#ifdef SCRAMBLE_DELAYED_REFS
|
||||
/*
|
||||
* Normally delayed refs get processed in ascending bytenr order. This
|
||||
|
@ -2460,13 +2465,11 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
|
|||
struct btrfs_delayed_ref_root *delayed_refs;
|
||||
struct btrfs_delayed_ref_node *ref;
|
||||
struct list_head cluster;
|
||||
struct list_head *first_seq = NULL;
|
||||
int ret;
|
||||
u64 delayed_start;
|
||||
int run_all = count == (unsigned long)-1;
|
||||
int run_most = 0;
|
||||
unsigned long num_refs = 0;
|
||||
int consider_waiting;
|
||||
int loops;
|
||||
|
||||
/* We'll clean this up in btrfs_cleanup_transaction */
|
||||
if (trans->aborted)
|
||||
|
@ -2484,7 +2487,7 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
|
|||
delayed_refs = &trans->transaction->delayed_refs;
|
||||
INIT_LIST_HEAD(&cluster);
|
||||
again:
|
||||
consider_waiting = 0;
|
||||
loops = 0;
|
||||
spin_lock(&delayed_refs->lock);
|
||||
|
||||
#ifdef SCRAMBLE_DELAYED_REFS
|
||||
|
@ -2512,31 +2515,6 @@ again:
|
|||
if (ret)
|
||||
break;
|
||||
|
||||
if (delayed_start >= delayed_refs->run_delayed_start) {
|
||||
if (consider_waiting == 0) {
|
||||
/*
|
||||
* btrfs_find_ref_cluster looped. let's do one
|
||||
* more cycle. if we don't run any delayed ref
|
||||
* during that cycle (because we can't because
|
||||
* all of them are blocked) and if the number of
|
||||
* refs doesn't change, we avoid busy waiting.
|
||||
*/
|
||||
consider_waiting = 1;
|
||||
num_refs = delayed_refs->num_entries;
|
||||
first_seq = root->fs_info->tree_mod_seq_list.next;
|
||||
} else {
|
||||
wait_for_more_refs(root->fs_info, delayed_refs,
|
||||
num_refs, first_seq);
|
||||
/*
|
||||
* after waiting, things have changed. we
|
||||
* dropped the lock and someone else might have
|
||||
* run some refs, built new clusters and so on.
|
||||
* therefore, we restart staleness detection.
|
||||
*/
|
||||
consider_waiting = 0;
|
||||
}
|
||||
}
|
||||
|
||||
ret = run_clustered_refs(trans, root, &cluster);
|
||||
if (ret < 0) {
|
||||
spin_unlock(&delayed_refs->lock);
|
||||
|
@ -2549,9 +2527,26 @@ again:
|
|||
if (count == 0)
|
||||
break;
|
||||
|
||||
if (ret || delayed_refs->run_delayed_start == 0) {
|
||||
if (delayed_start >= delayed_refs->run_delayed_start) {
|
||||
if (loops == 0) {
|
||||
/*
|
||||
* btrfs_find_ref_cluster looped. let's do one
|
||||
* more cycle. if we don't run any delayed ref
|
||||
* during that cycle (because we can't because
|
||||
* all of them are blocked), bail out.
|
||||
*/
|
||||
loops = 1;
|
||||
} else {
|
||||
/*
|
||||
* no runnable refs left, stop trying
|
||||
*/
|
||||
BUG_ON(run_all);
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (ret) {
|
||||
/* refs were run, let's reset staleness detection */
|
||||
consider_waiting = 0;
|
||||
loops = 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -3007,17 +3002,16 @@ again:
|
|||
}
|
||||
spin_unlock(&block_group->lock);
|
||||
|
||||
num_pages = (int)div64_u64(block_group->key.offset, 1024 * 1024 * 1024);
|
||||
/*
|
||||
* Try to preallocate enough space based on how big the block group is.
|
||||
* Keep in mind this has to include any pinned space which could end up
|
||||
* taking up quite a bit since it's not folded into the other space
|
||||
* cache.
|
||||
*/
|
||||
num_pages = (int)div64_u64(block_group->key.offset, 256 * 1024 * 1024);
|
||||
if (!num_pages)
|
||||
num_pages = 1;
|
||||
|
||||
/*
|
||||
* Just to make absolutely sure we have enough space, we're going to
|
||||
* preallocate 12 pages worth of space for each block group. In
|
||||
* practice we ought to use at most 8, but we need extra space so we can
|
||||
* add our header and have a terminator between the extents and the
|
||||
* bitmaps.
|
||||
*/
|
||||
num_pages *= 16;
|
||||
num_pages *= PAGE_CACHE_SIZE;
|
||||
|
||||
|
@ -4571,9 +4565,11 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
|
|||
if (root->fs_info->quota_enabled) {
|
||||
ret = btrfs_qgroup_reserve(root, num_bytes +
|
||||
nr_extents * root->leafsize);
|
||||
if (ret)
|
||||
if (ret) {
|
||||
mutex_unlock(&BTRFS_I(inode)->delalloc_mutex);
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
ret = reserve_metadata_bytes(root, block_rsv, to_reserve, flush);
|
||||
if (ret) {
|
||||
|
@ -5294,9 +5290,6 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
|
|||
rb_erase(&head->node.rb_node, &delayed_refs->root);
|
||||
|
||||
delayed_refs->num_entries--;
|
||||
smp_mb();
|
||||
if (waitqueue_active(&root->fs_info->tree_mod_seq_wait))
|
||||
wake_up(&root->fs_info->tree_mod_seq_wait);
|
||||
|
||||
/*
|
||||
* we don't take a ref on the node because we're removing it from the
|
||||
|
|
|
@ -2330,24 +2330,11 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
|
|||
if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) {
|
||||
ret = tree->ops->readpage_end_io_hook(page, start, end,
|
||||
state, mirror);
|
||||
if (ret) {
|
||||
/* no IO indicated but software detected errors
|
||||
* in the block, either checksum errors or
|
||||
* issues with the contents */
|
||||
struct btrfs_root *root =
|
||||
BTRFS_I(page->mapping->host)->root;
|
||||
struct btrfs_device *device;
|
||||
|
||||
if (ret)
|
||||
uptodate = 0;
|
||||
device = btrfs_find_device_for_logical(
|
||||
root, start, mirror);
|
||||
if (device)
|
||||
btrfs_dev_stat_inc_and_print(device,
|
||||
BTRFS_DEV_STAT_CORRUPTION_ERRS);
|
||||
} else {
|
||||
else
|
||||
clean_io_failure(start, page);
|
||||
}
|
||||
}
|
||||
|
||||
if (!uptodate && tree->ops && tree->ops->readpage_io_failed_hook) {
|
||||
ret = tree->ops->readpage_io_failed_hook(page, mirror);
|
||||
|
|
|
@ -272,9 +272,9 @@ int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode,
|
|||
}
|
||||
|
||||
int btrfs_lookup_bio_sums_dio(struct btrfs_root *root, struct inode *inode,
|
||||
struct bio *bio, u64 offset, u32 *dst)
|
||||
struct bio *bio, u64 offset)
|
||||
{
|
||||
return __btrfs_lookup_bio_sums(root, inode, bio, offset, dst, 1);
|
||||
return __btrfs_lookup_bio_sums(root, inode, bio, offset, NULL, 1);
|
||||
}
|
||||
|
||||
int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
|
||||
|
|
326
fs/btrfs/inode.c
326
fs/btrfs/inode.c
|
@ -1008,9 +1008,7 @@ static noinline void async_cow_submit(struct btrfs_work *work)
|
|||
nr_pages = (async_cow->end - async_cow->start + PAGE_CACHE_SIZE) >>
|
||||
PAGE_CACHE_SHIFT;
|
||||
|
||||
atomic_sub(nr_pages, &root->fs_info->async_delalloc_pages);
|
||||
|
||||
if (atomic_read(&root->fs_info->async_delalloc_pages) <
|
||||
if (atomic_sub_return(nr_pages, &root->fs_info->async_delalloc_pages) <
|
||||
5 * 1024 * 1024 &&
|
||||
waitqueue_active(&root->fs_info->async_submit_wait))
|
||||
wake_up(&root->fs_info->async_submit_wait);
|
||||
|
@ -1885,8 +1883,11 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
|
|||
trans = btrfs_join_transaction_nolock(root);
|
||||
else
|
||||
trans = btrfs_join_transaction(root);
|
||||
if (IS_ERR(trans))
|
||||
return PTR_ERR(trans);
|
||||
if (IS_ERR(trans)) {
|
||||
ret = PTR_ERR(trans);
|
||||
trans = NULL;
|
||||
goto out;
|
||||
}
|
||||
trans->block_rsv = &root->fs_info->delalloc_block_rsv;
|
||||
ret = btrfs_update_inode_fallback(trans, root, inode);
|
||||
if (ret) /* -ENOMEM or corruption */
|
||||
|
@ -3174,7 +3175,7 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
|
|||
btrfs_i_size_write(dir, dir->i_size - name_len * 2);
|
||||
inode_inc_iversion(dir);
|
||||
dir->i_mtime = dir->i_ctime = CURRENT_TIME;
|
||||
ret = btrfs_update_inode(trans, root, dir);
|
||||
ret = btrfs_update_inode_fallback(trans, root, dir);
|
||||
if (ret)
|
||||
btrfs_abort_transaction(trans, root, ret);
|
||||
out:
|
||||
|
@ -5774,18 +5775,112 @@ out:
|
|||
return ret;
|
||||
}
|
||||
|
||||
static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend,
|
||||
struct extent_state **cached_state, int writing)
|
||||
{
|
||||
struct btrfs_ordered_extent *ordered;
|
||||
int ret = 0;
|
||||
|
||||
while (1) {
|
||||
lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend,
|
||||
0, cached_state);
|
||||
/*
|
||||
* We're concerned with the entire range that we're going to be
|
||||
* doing DIO to, so we need to make sure theres no ordered
|
||||
* extents in this range.
|
||||
*/
|
||||
ordered = btrfs_lookup_ordered_range(inode, lockstart,
|
||||
lockend - lockstart + 1);
|
||||
|
||||
/*
|
||||
* We need to make sure there are no buffered pages in this
|
||||
* range either, we could have raced between the invalidate in
|
||||
* generic_file_direct_write and locking the extent. The
|
||||
* invalidate needs to happen so that reads after a write do not
|
||||
* get stale data.
|
||||
*/
|
||||
if (!ordered && (!writing ||
|
||||
!test_range_bit(&BTRFS_I(inode)->io_tree,
|
||||
lockstart, lockend, EXTENT_UPTODATE, 0,
|
||||
*cached_state)))
|
||||
break;
|
||||
|
||||
unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
|
||||
cached_state, GFP_NOFS);
|
||||
|
||||
if (ordered) {
|
||||
btrfs_start_ordered_extent(inode, ordered, 1);
|
||||
btrfs_put_ordered_extent(ordered);
|
||||
} else {
|
||||
/* Screw you mmap */
|
||||
ret = filemap_write_and_wait_range(inode->i_mapping,
|
||||
lockstart,
|
||||
lockend);
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
/*
|
||||
* If we found a page that couldn't be invalidated just
|
||||
* fall back to buffered.
|
||||
*/
|
||||
ret = invalidate_inode_pages2_range(inode->i_mapping,
|
||||
lockstart >> PAGE_CACHE_SHIFT,
|
||||
lockend >> PAGE_CACHE_SHIFT);
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
|
||||
cond_resched();
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
|
||||
struct buffer_head *bh_result, int create)
|
||||
{
|
||||
struct extent_map *em;
|
||||
struct btrfs_root *root = BTRFS_I(inode)->root;
|
||||
struct extent_state *cached_state = NULL;
|
||||
u64 start = iblock << inode->i_blkbits;
|
||||
u64 lockstart, lockend;
|
||||
u64 len = bh_result->b_size;
|
||||
struct btrfs_trans_handle *trans;
|
||||
int unlock_bits = EXTENT_LOCKED;
|
||||
int ret;
|
||||
|
||||
if (create) {
|
||||
ret = btrfs_delalloc_reserve_space(inode, len);
|
||||
if (ret)
|
||||
return ret;
|
||||
unlock_bits |= EXTENT_DELALLOC | EXTENT_DIRTY;
|
||||
} else {
|
||||
len = min_t(u64, len, root->sectorsize);
|
||||
}
|
||||
|
||||
lockstart = start;
|
||||
lockend = start + len - 1;
|
||||
|
||||
/*
|
||||
* If this errors out it's because we couldn't invalidate pagecache for
|
||||
* this range and we need to fallback to buffered.
|
||||
*/
|
||||
if (lock_extent_direct(inode, lockstart, lockend, &cached_state, create))
|
||||
return -ENOTBLK;
|
||||
|
||||
if (create) {
|
||||
ret = set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
|
||||
lockend, EXTENT_DELALLOC, NULL,
|
||||
&cached_state, GFP_NOFS);
|
||||
if (ret)
|
||||
goto unlock_err;
|
||||
}
|
||||
|
||||
em = btrfs_get_extent(inode, NULL, 0, start, len, 0);
|
||||
if (IS_ERR(em))
|
||||
return PTR_ERR(em);
|
||||
if (IS_ERR(em)) {
|
||||
ret = PTR_ERR(em);
|
||||
goto unlock_err;
|
||||
}
|
||||
|
||||
/*
|
||||
* Ok for INLINE and COMPRESSED extents we need to fallback on buffered
|
||||
|
@ -5804,17 +5899,16 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
|
|||
if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags) ||
|
||||
em->block_start == EXTENT_MAP_INLINE) {
|
||||
free_extent_map(em);
|
||||
return -ENOTBLK;
|
||||
ret = -ENOTBLK;
|
||||
goto unlock_err;
|
||||
}
|
||||
|
||||
/* Just a good old fashioned hole, return */
|
||||
if (!create && (em->block_start == EXTENT_MAP_HOLE ||
|
||||
test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) {
|
||||
free_extent_map(em);
|
||||
/* DIO will do one hole at a time, so just unlock a sector */
|
||||
unlock_extent(&BTRFS_I(inode)->io_tree, start,
|
||||
start + root->sectorsize - 1);
|
||||
return 0;
|
||||
ret = 0;
|
||||
goto unlock_err;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -5827,8 +5921,9 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
|
|||
*
|
||||
*/
|
||||
if (!create) {
|
||||
len = em->len - (start - em->start);
|
||||
goto map;
|
||||
len = min(len, em->len - (start - em->start));
|
||||
lockstart = start + len;
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags) ||
|
||||
|
@ -5860,7 +5955,7 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
|
|||
btrfs_end_transaction(trans, root);
|
||||
if (ret) {
|
||||
free_extent_map(em);
|
||||
return ret;
|
||||
goto unlock_err;
|
||||
}
|
||||
goto unlock;
|
||||
}
|
||||
|
@ -5873,14 +5968,12 @@ must_cow:
|
|||
*/
|
||||
len = bh_result->b_size;
|
||||
em = btrfs_new_extent_direct(inode, em, start, len);
|
||||
if (IS_ERR(em))
|
||||
return PTR_ERR(em);
|
||||
if (IS_ERR(em)) {
|
||||
ret = PTR_ERR(em);
|
||||
goto unlock_err;
|
||||
}
|
||||
len = min(len, em->len - (start - em->start));
|
||||
unlock:
|
||||
clear_extent_bit(&BTRFS_I(inode)->io_tree, start, start + len - 1,
|
||||
EXTENT_LOCKED | EXTENT_DELALLOC | EXTENT_DIRTY, 1,
|
||||
0, NULL, GFP_NOFS);
|
||||
map:
|
||||
bh_result->b_blocknr = (em->block_start + (start - em->start)) >>
|
||||
inode->i_blkbits;
|
||||
bh_result->b_size = len;
|
||||
|
@ -5898,9 +5991,44 @@ map:
|
|||
i_size_write(inode, start + len);
|
||||
}
|
||||
|
||||
/*
|
||||
* In the case of write we need to clear and unlock the entire range,
|
||||
* in the case of read we need to unlock only the end area that we
|
||||
* aren't using if there is any left over space.
|
||||
*/
|
||||
if (lockstart < lockend) {
|
||||
if (create && len < lockend - lockstart) {
|
||||
clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
|
||||
lockstart + len - 1, unlock_bits, 1, 0,
|
||||
&cached_state, GFP_NOFS);
|
||||
/*
|
||||
* Beside unlock, we also need to cleanup reserved space
|
||||
* for the left range by attaching EXTENT_DO_ACCOUNTING.
|
||||
*/
|
||||
clear_extent_bit(&BTRFS_I(inode)->io_tree,
|
||||
lockstart + len, lockend,
|
||||
unlock_bits | EXTENT_DO_ACCOUNTING,
|
||||
1, 0, NULL, GFP_NOFS);
|
||||
} else {
|
||||
clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
|
||||
lockend, unlock_bits, 1, 0,
|
||||
&cached_state, GFP_NOFS);
|
||||
}
|
||||
} else {
|
||||
free_extent_state(cached_state);
|
||||
}
|
||||
|
||||
free_extent_map(em);
|
||||
|
||||
return 0;
|
||||
|
||||
unlock_err:
|
||||
if (create)
|
||||
unlock_bits |= EXTENT_DO_ACCOUNTING;
|
||||
|
||||
clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend,
|
||||
unlock_bits, 1, 0, &cached_state, GFP_NOFS);
|
||||
return ret;
|
||||
}
|
||||
|
||||
struct btrfs_dio_private {
|
||||
|
@ -5908,7 +6036,6 @@ struct btrfs_dio_private {
|
|||
u64 logical_offset;
|
||||
u64 disk_bytenr;
|
||||
u64 bytes;
|
||||
u32 *csums;
|
||||
void *private;
|
||||
|
||||
/* number of bios pending for this dio */
|
||||
|
@ -5928,7 +6055,6 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)
|
|||
struct inode *inode = dip->inode;
|
||||
struct btrfs_root *root = BTRFS_I(inode)->root;
|
||||
u64 start;
|
||||
u32 *private = dip->csums;
|
||||
|
||||
start = dip->logical_offset;
|
||||
do {
|
||||
|
@ -5936,8 +6062,12 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)
|
|||
struct page *page = bvec->bv_page;
|
||||
char *kaddr;
|
||||
u32 csum = ~(u32)0;
|
||||
u64 private = ~(u32)0;
|
||||
unsigned long flags;
|
||||
|
||||
if (get_state_private(&BTRFS_I(inode)->io_tree,
|
||||
start, &private))
|
||||
goto failed;
|
||||
local_irq_save(flags);
|
||||
kaddr = kmap_atomic(page);
|
||||
csum = btrfs_csum_data(root, kaddr + bvec->bv_offset,
|
||||
|
@ -5947,18 +6077,18 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)
|
|||
local_irq_restore(flags);
|
||||
|
||||
flush_dcache_page(bvec->bv_page);
|
||||
if (csum != *private) {
|
||||
if (csum != private) {
|
||||
failed:
|
||||
printk(KERN_ERR "btrfs csum failed ino %llu off"
|
||||
" %llu csum %u private %u\n",
|
||||
(unsigned long long)btrfs_ino(inode),
|
||||
(unsigned long long)start,
|
||||
csum, *private);
|
||||
csum, (unsigned)private);
|
||||
err = -EIO;
|
||||
}
|
||||
}
|
||||
|
||||
start += bvec->bv_len;
|
||||
private++;
|
||||
bvec++;
|
||||
} while (bvec <= bvec_end);
|
||||
|
||||
|
@ -5966,7 +6096,6 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)
|
|||
dip->logical_offset + dip->bytes - 1);
|
||||
bio->bi_private = dip->private;
|
||||
|
||||
kfree(dip->csums);
|
||||
kfree(dip);
|
||||
|
||||
/* If we had a csum failure make sure to clear the uptodate flag */
|
||||
|
@ -6072,7 +6201,7 @@ static struct bio *btrfs_dio_bio_alloc(struct block_device *bdev,
|
|||
|
||||
static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
|
||||
int rw, u64 file_offset, int skip_sum,
|
||||
u32 *csums, int async_submit)
|
||||
int async_submit)
|
||||
{
|
||||
int write = rw & REQ_WRITE;
|
||||
struct btrfs_root *root = BTRFS_I(inode)->root;
|
||||
|
@ -6105,8 +6234,7 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
|
|||
if (ret)
|
||||
goto err;
|
||||
} else if (!skip_sum) {
|
||||
ret = btrfs_lookup_bio_sums_dio(root, inode, bio,
|
||||
file_offset, csums);
|
||||
ret = btrfs_lookup_bio_sums_dio(root, inode, bio, file_offset);
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
|
@ -6132,10 +6260,8 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
|
|||
u64 submit_len = 0;
|
||||
u64 map_length;
|
||||
int nr_pages = 0;
|
||||
u32 *csums = dip->csums;
|
||||
int ret = 0;
|
||||
int async_submit = 0;
|
||||
int write = rw & REQ_WRITE;
|
||||
|
||||
map_length = orig_bio->bi_size;
|
||||
ret = btrfs_map_block(map_tree, READ, start_sector << 9,
|
||||
|
@ -6171,16 +6297,13 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
|
|||
atomic_inc(&dip->pending_bios);
|
||||
ret = __btrfs_submit_dio_bio(bio, inode, rw,
|
||||
file_offset, skip_sum,
|
||||
csums, async_submit);
|
||||
async_submit);
|
||||
if (ret) {
|
||||
bio_put(bio);
|
||||
atomic_dec(&dip->pending_bios);
|
||||
goto out_err;
|
||||
}
|
||||
|
||||
/* Write's use the ordered csums */
|
||||
if (!write && !skip_sum)
|
||||
csums = csums + nr_pages;
|
||||
start_sector += submit_len >> 9;
|
||||
file_offset += submit_len;
|
||||
|
||||
|
@ -6210,7 +6333,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
|
|||
|
||||
submit:
|
||||
ret = __btrfs_submit_dio_bio(bio, inode, rw, file_offset, skip_sum,
|
||||
csums, async_submit);
|
||||
async_submit);
|
||||
if (!ret)
|
||||
return 0;
|
||||
|
||||
|
@ -6246,17 +6369,6 @@ static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode,
|
|||
ret = -ENOMEM;
|
||||
goto free_ordered;
|
||||
}
|
||||
dip->csums = NULL;
|
||||
|
||||
/* Write's use the ordered csum stuff, so we don't need dip->csums */
|
||||
if (!write && !skip_sum) {
|
||||
dip->csums = kmalloc(sizeof(u32) * bio->bi_vcnt, GFP_NOFS);
|
||||
if (!dip->csums) {
|
||||
kfree(dip);
|
||||
ret = -ENOMEM;
|
||||
goto free_ordered;
|
||||
}
|
||||
}
|
||||
|
||||
dip->private = bio->bi_private;
|
||||
dip->inode = inode;
|
||||
|
@ -6341,132 +6453,22 @@ static ssize_t check_direct_IO(struct btrfs_root *root, int rw, struct kiocb *io
|
|||
out:
|
||||
return retval;
|
||||
}
|
||||
|
||||
static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
|
||||
const struct iovec *iov, loff_t offset,
|
||||
unsigned long nr_segs)
|
||||
{
|
||||
struct file *file = iocb->ki_filp;
|
||||
struct inode *inode = file->f_mapping->host;
|
||||
struct btrfs_ordered_extent *ordered;
|
||||
struct extent_state *cached_state = NULL;
|
||||
u64 lockstart, lockend;
|
||||
ssize_t ret;
|
||||
int writing = rw & WRITE;
|
||||
int write_bits = 0;
|
||||
size_t count = iov_length(iov, nr_segs);
|
||||
|
||||
if (check_direct_IO(BTRFS_I(inode)->root, rw, iocb, iov,
|
||||
offset, nr_segs)) {
|
||||
offset, nr_segs))
|
||||
return 0;
|
||||
}
|
||||
|
||||
lockstart = offset;
|
||||
lockend = offset + count - 1;
|
||||
|
||||
if (writing) {
|
||||
ret = btrfs_delalloc_reserve_space(inode, count);
|
||||
if (ret)
|
||||
goto out;
|
||||
}
|
||||
|
||||
while (1) {
|
||||
lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend,
|
||||
0, &cached_state);
|
||||
/*
|
||||
* We're concerned with the entire range that we're going to be
|
||||
* doing DIO to, so we need to make sure theres no ordered
|
||||
* extents in this range.
|
||||
*/
|
||||
ordered = btrfs_lookup_ordered_range(inode, lockstart,
|
||||
lockend - lockstart + 1);
|
||||
|
||||
/*
|
||||
* We need to make sure there are no buffered pages in this
|
||||
* range either, we could have raced between the invalidate in
|
||||
* generic_file_direct_write and locking the extent. The
|
||||
* invalidate needs to happen so that reads after a write do not
|
||||
* get stale data.
|
||||
*/
|
||||
if (!ordered && (!writing ||
|
||||
!test_range_bit(&BTRFS_I(inode)->io_tree,
|
||||
lockstart, lockend, EXTENT_UPTODATE, 0,
|
||||
cached_state)))
|
||||
break;
|
||||
|
||||
unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
|
||||
&cached_state, GFP_NOFS);
|
||||
|
||||
if (ordered) {
|
||||
btrfs_start_ordered_extent(inode, ordered, 1);
|
||||
btrfs_put_ordered_extent(ordered);
|
||||
} else {
|
||||
/* Screw you mmap */
|
||||
ret = filemap_write_and_wait_range(file->f_mapping,
|
||||
lockstart,
|
||||
lockend);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* If we found a page that couldn't be invalidated just
|
||||
* fall back to buffered.
|
||||
*/
|
||||
ret = invalidate_inode_pages2_range(file->f_mapping,
|
||||
lockstart >> PAGE_CACHE_SHIFT,
|
||||
lockend >> PAGE_CACHE_SHIFT);
|
||||
if (ret) {
|
||||
if (ret == -EBUSY)
|
||||
ret = 0;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
cond_resched();
|
||||
}
|
||||
|
||||
/*
|
||||
* we don't use btrfs_set_extent_delalloc because we don't want
|
||||
* the dirty or uptodate bits
|
||||
*/
|
||||
if (writing) {
|
||||
write_bits = EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING;
|
||||
ret = set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend,
|
||||
EXTENT_DELALLOC, NULL, &cached_state,
|
||||
GFP_NOFS);
|
||||
if (ret) {
|
||||
clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
|
||||
lockend, EXTENT_LOCKED | write_bits,
|
||||
1, 0, &cached_state, GFP_NOFS);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
free_extent_state(cached_state);
|
||||
cached_state = NULL;
|
||||
|
||||
ret = __blockdev_direct_IO(rw, iocb, inode,
|
||||
return __blockdev_direct_IO(rw, iocb, inode,
|
||||
BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev,
|
||||
iov, offset, nr_segs, btrfs_get_blocks_direct, NULL,
|
||||
btrfs_submit_direct, 0);
|
||||
|
||||
if (ret < 0 && ret != -EIOCBQUEUED) {
|
||||
clear_extent_bit(&BTRFS_I(inode)->io_tree, offset,
|
||||
offset + iov_length(iov, nr_segs) - 1,
|
||||
EXTENT_LOCKED | write_bits, 1, 0,
|
||||
&cached_state, GFP_NOFS);
|
||||
} else if (ret >= 0 && ret < iov_length(iov, nr_segs)) {
|
||||
/*
|
||||
* We're falling back to buffered, unlock the section we didn't
|
||||
* do IO on.
|
||||
*/
|
||||
clear_extent_bit(&BTRFS_I(inode)->io_tree, offset + ret,
|
||||
offset + iov_length(iov, nr_segs) - 1,
|
||||
EXTENT_LOCKED | write_bits, 1, 0,
|
||||
&cached_state, GFP_NOFS);
|
||||
}
|
||||
out:
|
||||
free_extent_state(cached_state);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
|
||||
|
|
|
@ -424,7 +424,7 @@ static noinline int create_subvol(struct btrfs_root *root,
|
|||
uuid_le_gen(&new_uuid);
|
||||
memcpy(root_item.uuid, new_uuid.b, BTRFS_UUID_SIZE);
|
||||
root_item.otime.sec = cpu_to_le64(cur_time.tv_sec);
|
||||
root_item.otime.nsec = cpu_to_le64(cur_time.tv_nsec);
|
||||
root_item.otime.nsec = cpu_to_le32(cur_time.tv_nsec);
|
||||
root_item.ctime = root_item.otime;
|
||||
btrfs_set_root_ctransid(&root_item, trans->transid);
|
||||
btrfs_set_root_otransid(&root_item, trans->transid);
|
||||
|
|
|
@ -67,7 +67,7 @@ void btrfs_clear_lock_blocking_rw(struct extent_buffer *eb, int rw)
|
|||
{
|
||||
if (eb->lock_nested) {
|
||||
read_lock(&eb->lock);
|
||||
if (&eb->lock_nested && current->pid == eb->lock_owner) {
|
||||
if (eb->lock_nested && current->pid == eb->lock_owner) {
|
||||
read_unlock(&eb->lock);
|
||||
return;
|
||||
}
|
||||
|
|
|
@ -1364,13 +1364,17 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
|
|||
spin_lock(&fs_info->qgroup_lock);
|
||||
|
||||
dstgroup = add_qgroup_rb(fs_info, objectid);
|
||||
if (!dstgroup)
|
||||
if (IS_ERR(dstgroup)) {
|
||||
ret = PTR_ERR(dstgroup);
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
if (srcid) {
|
||||
srcgroup = find_qgroup_rb(fs_info, srcid);
|
||||
if (!srcgroup)
|
||||
if (!srcgroup) {
|
||||
ret = -EINVAL;
|
||||
goto unlock;
|
||||
}
|
||||
dstgroup->rfer = srcgroup->rfer - level_size;
|
||||
dstgroup->rfer_cmpr = srcgroup->rfer_cmpr - level_size;
|
||||
srcgroup->excl = level_size;
|
||||
|
@ -1379,8 +1383,10 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
|
|||
qgroup_dirty(fs_info, srcgroup);
|
||||
}
|
||||
|
||||
if (!inherit)
|
||||
if (!inherit) {
|
||||
ret = -EINVAL;
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
i_qgroups = (u64 *)(inherit + 1);
|
||||
for (i = 0; i < inherit->num_qgroups; ++i) {
|
||||
|
|
|
@ -544,8 +544,8 @@ void btrfs_update_root_times(struct btrfs_trans_handle *trans,
|
|||
struct timespec ct = CURRENT_TIME;
|
||||
|
||||
spin_lock(&root->root_times_lock);
|
||||
item->ctransid = trans->transid;
|
||||
item->ctransid = cpu_to_le64(trans->transid);
|
||||
item->ctime.sec = cpu_to_le64(ct.tv_sec);
|
||||
item->ctime.nsec = cpu_to_le64(ct.tv_nsec);
|
||||
item->ctime.nsec = cpu_to_le32(ct.tv_nsec);
|
||||
spin_unlock(&root->root_times_lock);
|
||||
}
|
||||
|
|
|
@ -838,7 +838,6 @@ int btrfs_sync_fs(struct super_block *sb, int wait)
|
|||
struct btrfs_trans_handle *trans;
|
||||
struct btrfs_fs_info *fs_info = btrfs_sb(sb);
|
||||
struct btrfs_root *root = fs_info->tree_root;
|
||||
int ret;
|
||||
|
||||
trace_btrfs_sync_fs(wait);
|
||||
|
||||
|
@ -849,11 +848,17 @@ int btrfs_sync_fs(struct super_block *sb, int wait)
|
|||
|
||||
btrfs_wait_ordered_extents(root, 0, 0);
|
||||
|
||||
trans = btrfs_start_transaction(root, 0);
|
||||
spin_lock(&fs_info->trans_lock);
|
||||
if (!fs_info->running_transaction) {
|
||||
spin_unlock(&fs_info->trans_lock);
|
||||
return 0;
|
||||
}
|
||||
spin_unlock(&fs_info->trans_lock);
|
||||
|
||||
trans = btrfs_join_transaction(root);
|
||||
if (IS_ERR(trans))
|
||||
return PTR_ERR(trans);
|
||||
ret = btrfs_commit_transaction(trans, root);
|
||||
return ret;
|
||||
return btrfs_commit_transaction(trans, root);
|
||||
}
|
||||
|
||||
static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
|
||||
|
@ -1530,6 +1535,8 @@ static int btrfs_show_devname(struct seq_file *m, struct dentry *root)
|
|||
while (cur_devices) {
|
||||
head = &cur_devices->devices;
|
||||
list_for_each_entry(dev, head, dev_list) {
|
||||
if (dev->missing)
|
||||
continue;
|
||||
if (!first_dev || dev->devid < first_dev->devid)
|
||||
first_dev = dev;
|
||||
}
|
||||
|
|
|
@ -1031,6 +1031,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
|
|||
|
||||
btrfs_i_size_write(parent_inode, parent_inode->i_size +
|
||||
dentry->d_name.len * 2);
|
||||
parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME;
|
||||
ret = btrfs_update_inode(trans, parent_root, parent_inode);
|
||||
if (ret)
|
||||
goto abort_trans_dput;
|
||||
|
@ -1066,7 +1067,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
|
|||
memcpy(new_root_item->parent_uuid, root->root_item.uuid,
|
||||
BTRFS_UUID_SIZE);
|
||||
new_root_item->otime.sec = cpu_to_le64(cur_time.tv_sec);
|
||||
new_root_item->otime.nsec = cpu_to_le64(cur_time.tv_nsec);
|
||||
new_root_item->otime.nsec = cpu_to_le32(cur_time.tv_nsec);
|
||||
btrfs_set_root_otransid(new_root_item, trans->transid);
|
||||
memset(&new_root_item->stime, 0, sizeof(new_root_item->stime));
|
||||
memset(&new_root_item->rtime, 0, sizeof(new_root_item->rtime));
|
||||
|
|
|
@ -227,9 +227,8 @@ loop_lock:
|
|||
cur = pending;
|
||||
pending = pending->bi_next;
|
||||
cur->bi_next = NULL;
|
||||
atomic_dec(&fs_info->nr_async_bios);
|
||||
|
||||
if (atomic_read(&fs_info->nr_async_bios) < limit &&
|
||||
if (atomic_dec_return(&fs_info->nr_async_bios) < limit &&
|
||||
waitqueue_active(&fs_info->async_submit_wait))
|
||||
wake_up(&fs_info->async_submit_wait);
|
||||
|
||||
|
@ -569,9 +568,11 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
|
|||
memcpy(new_device, device, sizeof(*new_device));
|
||||
|
||||
/* Safe because we are under uuid_mutex */
|
||||
if (device->name) {
|
||||
name = rcu_string_strdup(device->name->str, GFP_NOFS);
|
||||
BUG_ON(device->name && !name); /* -ENOMEM */
|
||||
rcu_assign_pointer(new_device->name, name);
|
||||
}
|
||||
new_device->bdev = NULL;
|
||||
new_device->writeable = 0;
|
||||
new_device->in_fs_metadata = 0;
|
||||
|
@ -4605,28 +4606,6 @@ int btrfs_read_sys_array(struct btrfs_root *root)
|
|||
return ret;
|
||||
}
|
||||
|
||||
struct btrfs_device *btrfs_find_device_for_logical(struct btrfs_root *root,
|
||||
u64 logical, int mirror_num)
|
||||
{
|
||||
struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree;
|
||||
int ret;
|
||||
u64 map_length = 0;
|
||||
struct btrfs_bio *bbio = NULL;
|
||||
struct btrfs_device *device;
|
||||
|
||||
BUG_ON(mirror_num == 0);
|
||||
ret = btrfs_map_block(map_tree, WRITE, logical, &map_length, &bbio,
|
||||
mirror_num);
|
||||
if (ret) {
|
||||
BUG_ON(bbio != NULL);
|
||||
return NULL;
|
||||
}
|
||||
BUG_ON(mirror_num != bbio->mirror_num);
|
||||
device = bbio->stripes[mirror_num - 1].dev;
|
||||
kfree(bbio);
|
||||
return device;
|
||||
}
|
||||
|
||||
int btrfs_read_chunk_tree(struct btrfs_root *root)
|
||||
{
|
||||
struct btrfs_path *path;
|
||||
|
|
|
@ -289,8 +289,6 @@ int btrfs_cancel_balance(struct btrfs_fs_info *fs_info);
|
|||
int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset);
|
||||
int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes,
|
||||
u64 *start, u64 *max_avail);
|
||||
struct btrfs_device *btrfs_find_device_for_logical(struct btrfs_root *root,
|
||||
u64 logical, int mirror_num);
|
||||
void btrfs_dev_stat_print_on_error(struct btrfs_device *device);
|
||||
void btrfs_dev_stat_inc_and_print(struct btrfs_device *dev, int index);
|
||||
int btrfs_get_dev_stats(struct btrfs_root *root,
|
||||
|
|
Loading…
Reference in New Issue