Btrfs: make a lockdep class for the extent buffer locks

Btrfs is currently using spin_lock_nested with a nested value based
on the tree depth of the block.  But, this doesn't quite work because
the max tree depth is bigger than what spin_lock_nested can deal with,
and because locks are sometimes taken before the level field is filled in.

The solution here is to use lockdep_set_class_and_name instead, and to
set the class before unlocking the pages when the block is read from the
disk and just after init of a freshly allocated tree block.

btrfs_clear_path_blocking is also changed to take the locks in the proper
order, and it also makes sure all the locks currently held are properly
set to blocking before it tries to retake the spinlocks.  Otherwise, lockdep
gets upset about bad lock orderin.

The lockdep magic cam from Peter Zijlstra <peterz@infradead.org>

Signed-off-by: Chris Mason <chris.mason@oracle.com>
This commit is contained in:
Chris Mason 2009-02-12 14:09:45 -05:00
parent 3f3420df50
commit 4008c04a07
7 changed files with 99 additions and 32 deletions

View File

@ -62,14 +62,38 @@ noinline void btrfs_set_path_blocking(struct btrfs_path *p)
/* /*
* reset all the locked nodes in the patch to spinning locks. * reset all the locked nodes in the patch to spinning locks.
*
* held is used to keep lockdep happy, when lockdep is enabled
* we set held to a blocking lock before we go around and
* retake all the spinlocks in the path. You can safely use NULL
* for held
*/ */
noinline void btrfs_clear_path_blocking(struct btrfs_path *p) noinline void btrfs_clear_path_blocking(struct btrfs_path *p,
struct extent_buffer *held)
{ {
int i; int i;
for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
#ifdef CONFIG_DEBUG_LOCK_ALLOC
/* lockdep really cares that we take all of these spinlocks
* in the right order. If any of the locks in the path are not
* currently blocking, it is going to complain. So, make really
* really sure by forcing the path to blocking before we clear
* the path blocking.
*/
if (held)
btrfs_set_lock_blocking(held);
btrfs_set_path_blocking(p);
#endif
for (i = BTRFS_MAX_LEVEL - 1; i >= 0; i--) {
if (p->nodes[i] && p->locks[i]) if (p->nodes[i] && p->locks[i])
btrfs_clear_lock_blocking(p->nodes[i]); btrfs_clear_lock_blocking(p->nodes[i]);
} }
#ifdef CONFIG_DEBUG_LOCK_ALLOC
if (held)
btrfs_clear_lock_blocking(held);
#endif
} }
/* this also releases the path */ /* this also releases the path */
@ -279,7 +303,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
trans->transid, level, &ins); trans->transid, level, &ins);
BUG_ON(ret); BUG_ON(ret);
cow = btrfs_init_new_buffer(trans, root, prealloc_dest, cow = btrfs_init_new_buffer(trans, root, prealloc_dest,
buf->len); buf->len, level);
} else { } else {
cow = btrfs_alloc_free_block(trans, root, buf->len, cow = btrfs_alloc_free_block(trans, root, buf->len,
parent_start, parent_start,
@ -1559,7 +1583,7 @@ cow_done:
if (!p->skip_locking) if (!p->skip_locking)
p->locks[level] = 1; p->locks[level] = 1;
btrfs_clear_path_blocking(p); btrfs_clear_path_blocking(p, NULL);
/* /*
* we have a lock on b and as long as we aren't changing * we have a lock on b and as long as we aren't changing
@ -1598,7 +1622,7 @@ cow_done:
btrfs_set_path_blocking(p); btrfs_set_path_blocking(p);
sret = split_node(trans, root, p, level); sret = split_node(trans, root, p, level);
btrfs_clear_path_blocking(p); btrfs_clear_path_blocking(p, NULL);
BUG_ON(sret > 0); BUG_ON(sret > 0);
if (sret) { if (sret) {
@ -1618,7 +1642,7 @@ cow_done:
btrfs_set_path_blocking(p); btrfs_set_path_blocking(p);
sret = balance_level(trans, root, p, level); sret = balance_level(trans, root, p, level);
btrfs_clear_path_blocking(p); btrfs_clear_path_blocking(p, NULL);
if (sret) { if (sret) {
ret = sret; ret = sret;
@ -1681,13 +1705,13 @@ cow_done:
if (!p->skip_locking) { if (!p->skip_locking) {
int lret; int lret;
btrfs_clear_path_blocking(p); btrfs_clear_path_blocking(p, NULL);
lret = btrfs_try_spin_lock(b); lret = btrfs_try_spin_lock(b);
if (!lret) { if (!lret) {
btrfs_set_path_blocking(p); btrfs_set_path_blocking(p);
btrfs_tree_lock(b); btrfs_tree_lock(b);
btrfs_clear_path_blocking(p); btrfs_clear_path_blocking(p, b);
} }
} }
} else { } else {
@ -1699,7 +1723,7 @@ cow_done:
btrfs_set_path_blocking(p); btrfs_set_path_blocking(p);
sret = split_leaf(trans, root, key, sret = split_leaf(trans, root, key,
p, ins_len, ret == 0); p, ins_len, ret == 0);
btrfs_clear_path_blocking(p); btrfs_clear_path_blocking(p, NULL);
BUG_ON(sret > 0); BUG_ON(sret > 0);
if (sret) { if (sret) {
@ -3919,7 +3943,6 @@ find_next_key:
btrfs_release_path(root, path); btrfs_release_path(root, path);
goto again; goto again;
} else { } else {
btrfs_clear_path_blocking(path);
goto out; goto out;
} }
} }
@ -3939,7 +3962,7 @@ find_next_key:
path->locks[level - 1] = 1; path->locks[level - 1] = 1;
path->nodes[level - 1] = cur; path->nodes[level - 1] = cur;
unlock_up(path, level, 1); unlock_up(path, level, 1);
btrfs_clear_path_blocking(path); btrfs_clear_path_blocking(path, NULL);
} }
out: out:
if (ret == 0) if (ret == 0)

View File

@ -43,11 +43,7 @@ struct btrfs_ordered_sum;
#define BTRFS_ACL_NOT_CACHED ((void *)-1) #define BTRFS_ACL_NOT_CACHED ((void *)-1)
#ifdef CONFIG_LOCKDEP #define BTRFS_MAX_LEVEL 8
# define BTRFS_MAX_LEVEL 7
#else
# define BTRFS_MAX_LEVEL 8
#endif
/* holds pointers to all of the tree roots */ /* holds pointers to all of the tree roots */
#define BTRFS_ROOT_TREE_OBJECTID 1ULL #define BTRFS_ROOT_TREE_OBJECTID 1ULL
@ -1715,7 +1711,8 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
u64 empty_size); u64 empty_size);
struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct btrfs_root *root,
u64 bytenr, u32 blocksize); u64 bytenr, u32 blocksize,
int level);
int btrfs_alloc_extent(struct btrfs_trans_handle *trans, int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct btrfs_root *root,
u64 num_bytes, u64 parent, u64 min_bytes, u64 num_bytes, u64 parent, u64 min_bytes,
@ -1835,7 +1832,6 @@ void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p);
struct btrfs_path *btrfs_alloc_path(void); struct btrfs_path *btrfs_alloc_path(void);
void btrfs_free_path(struct btrfs_path *p); void btrfs_free_path(struct btrfs_path *p);
void btrfs_set_path_blocking(struct btrfs_path *p); void btrfs_set_path_blocking(struct btrfs_path *p);
void btrfs_clear_path_blocking(struct btrfs_path *p);
void btrfs_unlock_up_safe(struct btrfs_path *p, int level); void btrfs_unlock_up_safe(struct btrfs_path *p, int level);
int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,

View File

@ -75,6 +75,40 @@ struct async_submit_bio {
struct btrfs_work work; struct btrfs_work work;
}; };
/* These are used to set the lockdep class on the extent buffer locks.
* The class is set by the readpage_end_io_hook after the buffer has
* passed csum validation but before the pages are unlocked.
*
* The lockdep class is also set by btrfs_init_new_buffer on freshly
* allocated blocks.
*
* The class is based on the level in the tree block, which allows lockdep
* to know that lower nodes nest inside the locks of higher nodes.
*
* We also add a check to make sure the highest level of the tree is
* the same as our lockdep setup here. If BTRFS_MAX_LEVEL changes, this
* code needs update as well.
*/
#ifdef CONFIG_DEBUG_LOCK_ALLOC
# if BTRFS_MAX_LEVEL != 8
# error
# endif
static struct lock_class_key btrfs_eb_class[BTRFS_MAX_LEVEL + 1];
static const char *btrfs_eb_name[BTRFS_MAX_LEVEL + 1] = {
/* leaf */
"btrfs-extent-00",
"btrfs-extent-01",
"btrfs-extent-02",
"btrfs-extent-03",
"btrfs-extent-04",
"btrfs-extent-05",
"btrfs-extent-06",
"btrfs-extent-07",
/* highest possible level */
"btrfs-extent-08",
};
#endif
/* /*
* extents on the btree inode are pretty simple, there's one extent * extents on the btree inode are pretty simple, there's one extent
* that covers the entire device * that covers the entire device
@ -347,6 +381,15 @@ static int check_tree_block_fsid(struct btrfs_root *root,
return ret; return ret;
} }
#ifdef CONFIG_DEBUG_LOCK_ALLOC
void btrfs_set_buffer_lockdep_class(struct extent_buffer *eb, int level)
{
lockdep_set_class_and_name(&eb->lock,
&btrfs_eb_class[level],
btrfs_eb_name[level]);
}
#endif
static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
struct extent_state *state) struct extent_state *state)
{ {
@ -392,6 +435,8 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
} }
found_level = btrfs_header_level(eb); found_level = btrfs_header_level(eb);
btrfs_set_buffer_lockdep_class(eb, found_level);
ret = csum_tree_block(root, eb, 1); ret = csum_tree_block(root, eb, 1);
if (ret) if (ret)
ret = -EIO; ret = -EIO;
@ -1777,7 +1822,6 @@ struct btrfs_root *open_ctree(struct super_block *sb,
ret = find_and_setup_root(tree_root, fs_info, ret = find_and_setup_root(tree_root, fs_info,
BTRFS_DEV_TREE_OBJECTID, dev_root); BTRFS_DEV_TREE_OBJECTID, dev_root);
dev_root->track_dirty = 1; dev_root->track_dirty = 1;
if (ret) if (ret)
goto fail_extent_root; goto fail_extent_root;

View File

@ -101,4 +101,14 @@ int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans,
int btrfs_add_log_tree(struct btrfs_trans_handle *trans, int btrfs_add_log_tree(struct btrfs_trans_handle *trans,
struct btrfs_root *root); struct btrfs_root *root);
int btree_lock_page_hook(struct page *page); int btree_lock_page_hook(struct page *page);
#ifdef CONFIG_DEBUG_LOCK_ALLOC
void btrfs_set_buffer_lockdep_class(struct extent_buffer *eb, int level);
#else
static inline void btrfs_set_buffer_lockdep_class(struct extent_buffer *eb,
int level)
{
}
#endif
#endif #endif

View File

@ -3416,7 +3416,8 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct btrfs_root *root,
u64 bytenr, u32 blocksize) u64 bytenr, u32 blocksize,
int level)
{ {
struct extent_buffer *buf; struct extent_buffer *buf;
@ -3424,6 +3425,7 @@ struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
if (!buf) if (!buf)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
btrfs_set_header_generation(buf, trans->transid); btrfs_set_header_generation(buf, trans->transid);
btrfs_set_buffer_lockdep_class(buf, level);
btrfs_tree_lock(buf); btrfs_tree_lock(buf);
clean_tree_block(trans, root, buf); clean_tree_block(trans, root, buf);
@ -3467,7 +3469,8 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
return ERR_PTR(ret); return ERR_PTR(ret);
} }
buf = btrfs_init_new_buffer(trans, root, ins.objectid, blocksize); buf = btrfs_init_new_buffer(trans, root, ins.objectid,
blocksize, level);
return buf; return buf;
} }

View File

@ -25,21 +25,10 @@
#include "extent_io.h" #include "extent_io.h"
#include "locking.h" #include "locking.h"
/*
* btrfs_header_level() isn't free, so don't call it when lockdep isn't
* on
*/
#ifdef CONFIG_DEBUG_LOCK_ALLOC
static inline void spin_nested(struct extent_buffer *eb)
{
spin_lock_nested(&eb->lock, BTRFS_MAX_LEVEL - btrfs_header_level(eb));
}
#else
static inline void spin_nested(struct extent_buffer *eb) static inline void spin_nested(struct extent_buffer *eb)
{ {
spin_lock(&eb->lock); spin_lock(&eb->lock);
} }
#endif
/* /*
* Setting a lock to blocking will drop the spinlock and set the * Setting a lock to blocking will drop the spinlock and set the

View File

@ -3102,6 +3102,8 @@ int btrfs_read_sys_array(struct btrfs_root *root)
if (!sb) if (!sb)
return -ENOMEM; return -ENOMEM;
btrfs_set_buffer_uptodate(sb); btrfs_set_buffer_uptodate(sb);
btrfs_set_buffer_lockdep_class(sb, 0);
write_extent_buffer(sb, super_copy, 0, BTRFS_SUPER_INFO_SIZE); write_extent_buffer(sb, super_copy, 0, BTRFS_SUPER_INFO_SIZE);
array_size = btrfs_super_sys_array_size(super_copy); array_size = btrfs_super_sys_array_size(super_copy);