bcache: make bch_btree_check() to be multithreaded
When registering a cache device, bch_btree_check() is called to check all btree nodes, to make sure the btree is consistent and not corrupted. bch_btree_check() is recursively executed in a single thread, when there are a lot of data cached and the btree is huge, it may take very long time to check all the btree nodes. In my testing, I observed it took around 50 minutes to finish bch_btree_check(). When checking the bcache btree nodes, the cache set is not running yet, and indeed the whole tree is in read-only state, it is safe to create multiple threads to check the btree in parallel. This patch tries to create multiple threads, and each thread tries to one-by-one check the sub-tree indexed by a key from the btree root node. The parallel thread number depends on how many keys in the btree root node. At most BCH_BTR_CHKTHREAD_MAX (64) threads can be created, but in practice is should be min(cpu-number/2, root-node-keys-number). Signed-off-by: Coly Li <colyli@suse.de> Cc: Christoph Hellwig <hch@infradead.org> Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
parent
feac1a70b8
commit
8e7102273f
|
@ -1897,13 +1897,176 @@ static int bch_btree_check_recurse(struct btree *b, struct btree_op *op)
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static int bch_btree_check_thread(void *arg)
|
||||||
|
{
|
||||||
|
int ret;
|
||||||
|
struct btree_check_info *info = arg;
|
||||||
|
struct btree_check_state *check_state = info->state;
|
||||||
|
struct cache_set *c = check_state->c;
|
||||||
|
struct btree_iter iter;
|
||||||
|
struct bkey *k, *p;
|
||||||
|
int cur_idx, prev_idx, skip_nr;
|
||||||
|
int i, n;
|
||||||
|
|
||||||
|
k = p = NULL;
|
||||||
|
i = n = 0;
|
||||||
|
cur_idx = prev_idx = 0;
|
||||||
|
ret = 0;
|
||||||
|
|
||||||
|
/* root node keys are checked before thread created */
|
||||||
|
bch_btree_iter_init(&c->root->keys, &iter, NULL);
|
||||||
|
k = bch_btree_iter_next_filter(&iter, &c->root->keys, bch_ptr_bad);
|
||||||
|
BUG_ON(!k);
|
||||||
|
|
||||||
|
p = k;
|
||||||
|
while (k) {
|
||||||
|
/*
|
||||||
|
* Fetch a root node key index, skip the keys which
|
||||||
|
* should be fetched by other threads, then check the
|
||||||
|
* sub-tree indexed by the fetched key.
|
||||||
|
*/
|
||||||
|
spin_lock(&check_state->idx_lock);
|
||||||
|
cur_idx = check_state->key_idx;
|
||||||
|
check_state->key_idx++;
|
||||||
|
spin_unlock(&check_state->idx_lock);
|
||||||
|
|
||||||
|
skip_nr = cur_idx - prev_idx;
|
||||||
|
|
||||||
|
while (skip_nr) {
|
||||||
|
k = bch_btree_iter_next_filter(&iter,
|
||||||
|
&c->root->keys,
|
||||||
|
bch_ptr_bad);
|
||||||
|
if (k)
|
||||||
|
p = k;
|
||||||
|
else {
|
||||||
|
/*
|
||||||
|
* No more keys to check in root node,
|
||||||
|
* current checking threads are enough,
|
||||||
|
* stop creating more.
|
||||||
|
*/
|
||||||
|
atomic_set(&check_state->enough, 1);
|
||||||
|
/* Update check_state->enough earlier */
|
||||||
|
smp_mb();
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
skip_nr--;
|
||||||
|
cond_resched();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (p) {
|
||||||
|
struct btree_op op;
|
||||||
|
|
||||||
|
btree_node_prefetch(c->root, p);
|
||||||
|
c->gc_stats.nodes++;
|
||||||
|
bch_btree_op_init(&op, 0);
|
||||||
|
ret = bcache_btree(check_recurse, p, c->root, &op);
|
||||||
|
if (ret)
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
p = NULL;
|
||||||
|
prev_idx = cur_idx;
|
||||||
|
cond_resched();
|
||||||
|
}
|
||||||
|
|
||||||
|
out:
|
||||||
|
info->result = ret;
|
||||||
|
/* update check_state->started among all CPUs */
|
||||||
|
smp_mb();
|
||||||
|
if (atomic_dec_and_test(&check_state->started))
|
||||||
|
wake_up(&check_state->wait);
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
static int bch_btree_chkthread_nr(void)
|
||||||
|
{
|
||||||
|
int n = num_online_cpus()/2;
|
||||||
|
|
||||||
|
if (n == 0)
|
||||||
|
n = 1;
|
||||||
|
else if (n > BCH_BTR_CHKTHREAD_MAX)
|
||||||
|
n = BCH_BTR_CHKTHREAD_MAX;
|
||||||
|
|
||||||
|
return n;
|
||||||
|
}
|
||||||
|
|
||||||
int bch_btree_check(struct cache_set *c)
|
int bch_btree_check(struct cache_set *c)
|
||||||
{
|
{
|
||||||
struct btree_op op;
|
int ret = 0;
|
||||||
|
int i;
|
||||||
|
struct bkey *k = NULL;
|
||||||
|
struct btree_iter iter;
|
||||||
|
struct btree_check_state *check_state;
|
||||||
|
char name[32];
|
||||||
|
|
||||||
bch_btree_op_init(&op, SHRT_MAX);
|
/* check and mark root node keys */
|
||||||
|
for_each_key_filter(&c->root->keys, k, &iter, bch_ptr_invalid)
|
||||||
|
bch_initial_mark_key(c, c->root->level, k);
|
||||||
|
|
||||||
return bcache_btree_root(check_recurse, c, &op);
|
bch_initial_mark_key(c, c->root->level + 1, &c->root->key);
|
||||||
|
|
||||||
|
if (c->root->level == 0)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
check_state = kzalloc(sizeof(struct btree_check_state), GFP_KERNEL);
|
||||||
|
if (!check_state)
|
||||||
|
return -ENOMEM;
|
||||||
|
|
||||||
|
check_state->c = c;
|
||||||
|
check_state->total_threads = bch_btree_chkthread_nr();
|
||||||
|
check_state->key_idx = 0;
|
||||||
|
spin_lock_init(&check_state->idx_lock);
|
||||||
|
atomic_set(&check_state->started, 0);
|
||||||
|
atomic_set(&check_state->enough, 0);
|
||||||
|
init_waitqueue_head(&check_state->wait);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Run multiple threads to check btree nodes in parallel,
|
||||||
|
* if check_state->enough is non-zero, it means current
|
||||||
|
* running check threads are enough, unncessary to create
|
||||||
|
* more.
|
||||||
|
*/
|
||||||
|
for (i = 0; i < check_state->total_threads; i++) {
|
||||||
|
/* fetch latest check_state->enough earlier */
|
||||||
|
smp_mb();
|
||||||
|
if (atomic_read(&check_state->enough))
|
||||||
|
break;
|
||||||
|
|
||||||
|
check_state->infos[i].result = 0;
|
||||||
|
check_state->infos[i].state = check_state;
|
||||||
|
snprintf(name, sizeof(name), "bch_btrchk[%u]", i);
|
||||||
|
atomic_inc(&check_state->started);
|
||||||
|
|
||||||
|
check_state->infos[i].thread =
|
||||||
|
kthread_run(bch_btree_check_thread,
|
||||||
|
&check_state->infos[i],
|
||||||
|
name);
|
||||||
|
if (IS_ERR(check_state->infos[i].thread)) {
|
||||||
|
pr_err("fails to run thread bch_btrchk[%d]", i);
|
||||||
|
for (--i; i >= 0; i--)
|
||||||
|
kthread_stop(check_state->infos[i].thread);
|
||||||
|
ret = -ENOMEM;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
wait_event_interruptible(check_state->wait,
|
||||||
|
atomic_read(&check_state->started) == 0 ||
|
||||||
|
test_bit(CACHE_SET_IO_DISABLE, &c->flags));
|
||||||
|
|
||||||
|
for (i = 0; i < check_state->total_threads; i++) {
|
||||||
|
if (check_state->infos[i].result) {
|
||||||
|
ret = check_state->infos[i].result;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
out:
|
||||||
|
kfree(check_state);
|
||||||
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
void bch_initial_gc_finish(struct cache_set *c)
|
void bch_initial_gc_finish(struct cache_set *c)
|
||||||
|
|
|
@ -145,6 +145,9 @@ struct btree {
|
||||||
struct bio *bio;
|
struct bio *bio;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#define BTREE_FLAG(flag) \
|
#define BTREE_FLAG(flag) \
|
||||||
static inline bool btree_node_ ## flag(struct btree *b) \
|
static inline bool btree_node_ ## flag(struct btree *b) \
|
||||||
{ return test_bit(BTREE_NODE_ ## flag, &b->flags); } \
|
{ return test_bit(BTREE_NODE_ ## flag, &b->flags); } \
|
||||||
|
@ -216,6 +219,25 @@ struct btree_op {
|
||||||
unsigned int insert_collision:1;
|
unsigned int insert_collision:1;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct btree_check_state;
|
||||||
|
struct btree_check_info {
|
||||||
|
struct btree_check_state *state;
|
||||||
|
struct task_struct *thread;
|
||||||
|
int result;
|
||||||
|
};
|
||||||
|
|
||||||
|
#define BCH_BTR_CHKTHREAD_MAX 64
|
||||||
|
struct btree_check_state {
|
||||||
|
struct cache_set *c;
|
||||||
|
int total_threads;
|
||||||
|
int key_idx;
|
||||||
|
spinlock_t idx_lock;
|
||||||
|
atomic_t started;
|
||||||
|
atomic_t enough;
|
||||||
|
wait_queue_head_t wait;
|
||||||
|
struct btree_check_info infos[BCH_BTR_CHKTHREAD_MAX];
|
||||||
|
};
|
||||||
|
|
||||||
static inline void bch_btree_op_init(struct btree_op *op, int write_lock_level)
|
static inline void bch_btree_op_init(struct btree_op *op, int write_lock_level)
|
||||||
{
|
{
|
||||||
memset(op, 0, sizeof(struct btree_op));
|
memset(op, 0, sizeof(struct btree_op));
|
||||||
|
|
Loading…
Reference in New Issue