Btrfs: reduce CPU contention while waiting for delayed extent operations
We batch up operations to the extent allocation tree, which allows us to deal with the recursive nature of using the extent allocation tree to allocate extents to the extent allocation tree. It also provides a mechanism to sort and collect extent operations, which makes it much more efficient to record extents that are close together. The delayed extent operations must all be finished before the running transaction commits, so we have code to make sure and run a few of the batched operations when closing our transaction handles. This creates a great deal of contention for the locks in the delayed extent operation tree, and also contention for the lock on the extent allocation tree itself. All the extra contention just slows down the operations and doesn't get things done any faster. This commit changes things to use a wait queue instead. As procs want to run the delayed operations, one of them races in and gets permission to hit the tree, and the others step back and wait for progress to be made. Signed-off-by: Chris Mason <chris.mason@fusionio.com>
This commit is contained in:
parent
242e18c7c1
commit
bb721703aa
|
@ -131,6 +131,15 @@ struct btrfs_delayed_ref_root {
|
|||
/* total number of head nodes ready for processing */
|
||||
unsigned long num_heads_ready;
|
||||
|
||||
/*
|
||||
* bumped when someone is making progress on the delayed
|
||||
* refs, so that other procs know they are just adding to
|
||||
* contention intead of helping
|
||||
*/
|
||||
atomic_t procs_running_refs;
|
||||
atomic_t ref_seq;
|
||||
wait_queue_head_t wait;
|
||||
|
||||
/*
|
||||
* set when the tree is flushing before a transaction commit,
|
||||
* used by the throttling code to decide if new updates need
|
||||
|
|
|
@ -2438,6 +2438,16 @@ int btrfs_delayed_refs_qgroup_accounting(struct btrfs_trans_handle *trans,
|
|||
return ret;
|
||||
}
|
||||
|
||||
static int refs_newer(struct btrfs_delayed_ref_root *delayed_refs, int seq,
|
||||
int count)
|
||||
{
|
||||
int val = atomic_read(&delayed_refs->ref_seq);
|
||||
|
||||
if (val < seq || val >= seq + count)
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* this starts processing the delayed reference count updates and
|
||||
* extent insertions we have queued up so far. count can be
|
||||
|
@ -2472,6 +2482,44 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
|
|||
|
||||
delayed_refs = &trans->transaction->delayed_refs;
|
||||
INIT_LIST_HEAD(&cluster);
|
||||
if (count == 0) {
|
||||
count = delayed_refs->num_entries * 2;
|
||||
run_most = 1;
|
||||
}
|
||||
|
||||
if (!run_all && !run_most) {
|
||||
int old;
|
||||
int seq = atomic_read(&delayed_refs->ref_seq);
|
||||
|
||||
progress:
|
||||
old = atomic_cmpxchg(&delayed_refs->procs_running_refs, 0, 1);
|
||||
if (old) {
|
||||
DEFINE_WAIT(__wait);
|
||||
if (delayed_refs->num_entries < 16348)
|
||||
return 0;
|
||||
|
||||
prepare_to_wait(&delayed_refs->wait, &__wait,
|
||||
TASK_UNINTERRUPTIBLE);
|
||||
|
||||
old = atomic_cmpxchg(&delayed_refs->procs_running_refs, 0, 1);
|
||||
if (old) {
|
||||
schedule();
|
||||
finish_wait(&delayed_refs->wait, &__wait);
|
||||
|
||||
if (!refs_newer(delayed_refs, seq, 256))
|
||||
goto progress;
|
||||
else
|
||||
return 0;
|
||||
} else {
|
||||
finish_wait(&delayed_refs->wait, &__wait);
|
||||
goto again;
|
||||
}
|
||||
}
|
||||
|
||||
} else {
|
||||
atomic_inc(&delayed_refs->procs_running_refs);
|
||||
}
|
||||
|
||||
again:
|
||||
loops = 0;
|
||||
spin_lock(&delayed_refs->lock);
|
||||
|
@ -2480,10 +2528,6 @@ again:
|
|||
delayed_refs->run_delayed_start = find_middle(&delayed_refs->root);
|
||||
#endif
|
||||
|
||||
if (count == 0) {
|
||||
count = delayed_refs->num_entries * 2;
|
||||
run_most = 1;
|
||||
}
|
||||
while (1) {
|
||||
if (!(run_all || run_most) &&
|
||||
delayed_refs->num_heads_ready < 64)
|
||||
|
@ -2505,9 +2549,12 @@ again:
|
|||
if (ret < 0) {
|
||||
spin_unlock(&delayed_refs->lock);
|
||||
btrfs_abort_transaction(trans, root, ret);
|
||||
atomic_dec(&delayed_refs->procs_running_refs);
|
||||
return ret;
|
||||
}
|
||||
|
||||
atomic_add(ret, &delayed_refs->ref_seq);
|
||||
|
||||
count -= min_t(unsigned long, ret, count);
|
||||
|
||||
if (count == 0)
|
||||
|
@ -2576,6 +2623,11 @@ again:
|
|||
goto again;
|
||||
}
|
||||
out:
|
||||
atomic_dec(&delayed_refs->procs_running_refs);
|
||||
smp_mb();
|
||||
if (waitqueue_active(&delayed_refs->wait))
|
||||
wake_up(&delayed_refs->wait);
|
||||
|
||||
spin_unlock(&delayed_refs->lock);
|
||||
assert_qgroups_uptodate(trans);
|
||||
return 0;
|
||||
|
|
|
@ -156,6 +156,9 @@ loop:
|
|||
|
||||
spin_lock_init(&cur_trans->commit_lock);
|
||||
spin_lock_init(&cur_trans->delayed_refs.lock);
|
||||
atomic_set(&cur_trans->delayed_refs.procs_running_refs, 0);
|
||||
atomic_set(&cur_trans->delayed_refs.ref_seq, 0);
|
||||
init_waitqueue_head(&cur_trans->delayed_refs.wait);
|
||||
|
||||
INIT_LIST_HEAD(&cur_trans->pending_snapshots);
|
||||
list_add_tail(&cur_trans->list, &fs_info->trans_list);
|
||||
|
@ -577,7 +580,7 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
|
|||
if (!list_empty(&trans->new_bgs))
|
||||
btrfs_create_pending_block_groups(trans, root);
|
||||
|
||||
while (count < 2) {
|
||||
while (count < 1) {
|
||||
unsigned long cur = trans->delayed_ref_updates;
|
||||
trans->delayed_ref_updates = 0;
|
||||
if (cur &&
|
||||
|
@ -589,6 +592,7 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
|
|||
}
|
||||
count++;
|
||||
}
|
||||
|
||||
btrfs_trans_release_metadata(trans, root);
|
||||
trans->block_rsv = NULL;
|
||||
|
||||
|
|
Loading…
Reference in New Issue