Btrfs: allocator fixes for space balancing update

* Reserved extent accounting:  reserved extents have been
allocated in the rbtrees that track free space but have not
been allocated on disk.  They were never properly accounted for
in the past, making it hard to know how much space was really free.

* btrfs_find_block_group used to return NULL for block groups that
had been removed by the space balancing code.  This made it hard
to account for space during the final stages of a balance run.

Signed-off-by: Chris Mason <chris.mason@oracle.com>
This commit is contained in:
Zheng Yan 2008-09-26 10:05:48 -04:00 committed by Chris Mason
parent 24ab9cd85c
commit e856981384
2 changed files with 67 additions and 71 deletions

View File

@ -498,6 +498,7 @@ struct btrfs_space_info {
u64 total_bytes; u64 total_bytes;
u64 bytes_used; u64 bytes_used;
u64 bytes_pinned; u64 bytes_pinned;
u64 bytes_reserved;
int full; int full;
int force_alloc; int force_alloc;
struct list_head list; struct list_head list;
@ -519,6 +520,7 @@ struct btrfs_block_group_cache {
struct btrfs_block_group_item item; struct btrfs_block_group_item item;
spinlock_t lock; spinlock_t lock;
u64 pinned; u64 pinned;
u64 reserved;
u64 flags; u64 flags;
int cached; int cached;
int ro; int ro;

View File

@ -325,12 +325,9 @@ static int noinline find_free_space(struct btrfs_root *root,
struct btrfs_block_group_cache *cache = *cache_ret; struct btrfs_block_group_cache *cache = *cache_ret;
struct btrfs_free_space *info = NULL; struct btrfs_free_space *info = NULL;
u64 last; u64 last;
u64 total_fs_bytes;
u64 search_start = *start_ret; u64 search_start = *start_ret;
WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex)); WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex));
total_fs_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy);
if (!cache) if (!cache)
goto out; goto out;
@ -354,7 +351,7 @@ new_group:
last = cache->key.objectid + cache->key.offset; last = cache->key.objectid + cache->key.offset;
cache = btrfs_lookup_first_block_group(root->fs_info, last); cache = btrfs_lookup_first_block_group(root->fs_info, last);
if (!cache || cache->key.objectid >= total_fs_bytes) if (!cache)
goto out; goto out;
*cache_ret = cache; *cache_ret = cache;
@ -385,7 +382,6 @@ static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
return found; return found;
} }
return NULL; return NULL;
} }
static struct btrfs_block_group_cache * static struct btrfs_block_group_cache *
@ -396,7 +392,6 @@ __btrfs_find_block_group(struct btrfs_root *root,
struct btrfs_block_group_cache *cache; struct btrfs_block_group_cache *cache;
struct btrfs_block_group_cache *found_group = NULL; struct btrfs_block_group_cache *found_group = NULL;
struct btrfs_fs_info *info = root->fs_info; struct btrfs_fs_info *info = root->fs_info;
struct btrfs_space_info *sinfo;
u64 used; u64 used;
u64 last = 0; u64 last = 0;
u64 free_check; u64 free_check;
@ -413,7 +408,7 @@ __btrfs_find_block_group(struct btrfs_root *root,
if (shint && block_group_bits(shint, data) && !shint->ro) { if (shint && block_group_bits(shint, data) && !shint->ro) {
spin_lock(&shint->lock); spin_lock(&shint->lock);
used = btrfs_block_group_used(&shint->item); used = btrfs_block_group_used(&shint->item);
if (used + shint->pinned < if (used + shint->pinned + shint->reserved <
div_factor(shint->key.offset, factor)) { div_factor(shint->key.offset, factor)) {
spin_unlock(&shint->lock); spin_unlock(&shint->lock);
return shint; return shint;
@ -424,7 +419,7 @@ __btrfs_find_block_group(struct btrfs_root *root,
if (hint && !hint->ro && block_group_bits(hint, data)) { if (hint && !hint->ro && block_group_bits(hint, data)) {
spin_lock(&hint->lock); spin_lock(&hint->lock);
used = btrfs_block_group_used(&hint->item); used = btrfs_block_group_used(&hint->item);
if (used + hint->pinned < if (used + hint->pinned + hint->reserved <
div_factor(hint->key.offset, factor)) { div_factor(hint->key.offset, factor)) {
spin_unlock(&hint->lock); spin_unlock(&hint->lock);
return hint; return hint;
@ -437,27 +432,9 @@ __btrfs_find_block_group(struct btrfs_root *root,
else else
last = search_start; last = search_start;
} }
sinfo = __find_space_info(root->fs_info, data);
if (!sinfo)
goto found;
again: again:
while(1) { while (1) {
struct list_head *l; cache = btrfs_lookup_first_block_group(root->fs_info, last);
cache = NULL;
spin_lock(&sinfo->lock);
list_for_each(l, &sinfo->block_groups) {
struct btrfs_block_group_cache *entry;
entry = list_entry(l, struct btrfs_block_group_cache,
list);
if ((entry->key.objectid >= last) &&
(!cache || (entry->key.objectid <
cache->key.objectid)))
cache = entry;
}
spin_unlock(&sinfo->lock);
if (!cache) if (!cache)
break; break;
@ -467,7 +444,8 @@ again:
if (!cache->ro && block_group_bits(cache, data)) { if (!cache->ro && block_group_bits(cache, data)) {
free_check = div_factor(cache->key.offset, factor); free_check = div_factor(cache->key.offset, factor);
if (used + cache->pinned < free_check) { if (used + cache->pinned + cache->reserved <
free_check) {
found_group = cache; found_group = cache;
spin_unlock(&cache->lock); spin_unlock(&cache->lock);
goto found; goto found;
@ -1414,6 +1392,7 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
if (!cache) if (!cache)
break; break;
cache->dirty = 0;
last += cache->key.offset; last += cache->key.offset;
err = write_one_cache_group(trans, root, err = write_one_cache_group(trans, root,
@ -1427,8 +1406,6 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
werr = err; werr = err;
continue; continue;
} }
cache->dirty = 0;
} }
btrfs_free_path(path); btrfs_free_path(path);
mutex_unlock(&root->fs_info->alloc_mutex); mutex_unlock(&root->fs_info->alloc_mutex);
@ -1460,6 +1437,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
found->total_bytes = total_bytes; found->total_bytes = total_bytes;
found->bytes_used = bytes_used; found->bytes_used = bytes_used;
found->bytes_pinned = 0; found->bytes_pinned = 0;
found->bytes_reserved = 0;
found->full = 0; found->full = 0;
found->force_alloc = 0; found->force_alloc = 0;
*space_info = found; *space_info = found;
@ -1539,8 +1517,8 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
thresh = div_factor(space_info->total_bytes, 6); thresh = div_factor(space_info->total_bytes, 6);
if (!force && if (!force &&
(space_info->bytes_used + space_info->bytes_pinned + alloc_bytes) < (space_info->bytes_used + space_info->bytes_pinned +
thresh) space_info->bytes_reserved + alloc_bytes) < thresh)
goto out; goto out;
mutex_lock(&extent_root->fs_info->chunk_mutex); mutex_lock(&extent_root->fs_info->chunk_mutex);
@ -1621,7 +1599,6 @@ static u64 first_logical_byte(struct btrfs_root *root, u64 search_start)
return cache->key.objectid; return cache->key.objectid;
} }
int btrfs_update_pinned_extents(struct btrfs_root *root, int btrfs_update_pinned_extents(struct btrfs_root *root,
u64 bytenr, u64 num, int pin) u64 bytenr, u64 num, int pin)
{ {
@ -1639,29 +1616,20 @@ int btrfs_update_pinned_extents(struct btrfs_root *root,
} }
while (num > 0) { while (num > 0) {
cache = btrfs_lookup_block_group(fs_info, bytenr); cache = btrfs_lookup_block_group(fs_info, bytenr);
if (!cache) { BUG_ON(!cache);
u64 first = first_logical_byte(root, bytenr); len = min(num, cache->key.offset -
WARN_ON(first < bytenr); (bytenr - cache->key.objectid));
len = min(first - bytenr, num);
} else {
len = min(num, cache->key.offset -
(bytenr - cache->key.objectid));
}
if (pin) { if (pin) {
if (cache) { spin_lock(&cache->lock);
spin_lock(&cache->lock); cache->pinned += len;
cache->pinned += len; cache->space_info->bytes_pinned += len;
cache->space_info->bytes_pinned += len; spin_unlock(&cache->lock);
spin_unlock(&cache->lock);
}
fs_info->total_pinned += len; fs_info->total_pinned += len;
} else { } else {
if (cache) { spin_lock(&cache->lock);
spin_lock(&cache->lock); cache->pinned -= len;
cache->pinned -= len; cache->space_info->bytes_pinned -= len;
cache->space_info->bytes_pinned -= len; spin_unlock(&cache->lock);
spin_unlock(&cache->lock);
}
fs_info->total_pinned -= len; fs_info->total_pinned -= len;
} }
bytenr += len; bytenr += len;
@ -1670,6 +1638,36 @@ int btrfs_update_pinned_extents(struct btrfs_root *root,
return 0; return 0;
} }
static int update_reserved_extents(struct btrfs_root *root,
u64 bytenr, u64 num, int reserve)
{
u64 len;
struct btrfs_block_group_cache *cache;
struct btrfs_fs_info *fs_info = root->fs_info;
WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex));
while (num > 0) {
cache = btrfs_lookup_block_group(fs_info, bytenr);
BUG_ON(!cache);
len = min(num, cache->key.offset -
(bytenr - cache->key.objectid));
if (reserve) {
spin_lock(&cache->lock);
cache->reserved += len;
cache->space_info->bytes_reserved += len;
spin_unlock(&cache->lock);
} else {
spin_lock(&cache->lock);
cache->reserved -= len;
cache->space_info->bytes_reserved -= len;
spin_unlock(&cache->lock);
}
bytenr += len;
num -= len;
}
return 0;
}
int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy) int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy)
{ {
u64 last = 0; u64 last = 0;
@ -2126,6 +2124,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
cache = btrfs_lookup_block_group(root->fs_info, bytenr); cache = btrfs_lookup_block_group(root->fs_info, bytenr);
BUG_ON(!cache); BUG_ON(!cache);
btrfs_add_free_space(cache, bytenr, num_bytes); btrfs_add_free_space(cache, bytenr, num_bytes);
update_reserved_extents(root, bytenr, num_bytes, 0);
return 0; return 0;
} }
pin = 1; pin = 1;
@ -2225,14 +2224,11 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans,
search_start = max(search_start, first_logical_byte(root, 0)); search_start = max(search_start, first_logical_byte(root, 0));
orig_search_start = search_start; orig_search_start = search_start;
if (search_end == (u64)-1)
search_end = btrfs_super_total_bytes(&info->super_copy);
search_start = max(search_start, hint_byte); search_start = max(search_start, hint_byte);
total_needed += empty_size; total_needed += empty_size;
new_group: new_group:
block_group = btrfs_lookup_block_group(info, search_start); block_group = btrfs_lookup_first_block_group(info, search_start);
/* /*
* Ok this looks a little tricky, buts its really simple. First if we * Ok this looks a little tricky, buts its really simple. First if we
@ -2257,12 +2253,8 @@ new_group:
ret = do_chunk_alloc(trans, root, ret = do_chunk_alloc(trans, root,
num_bytes + 2 * 1024 * 1024, num_bytes + 2 * 1024 * 1024,
data, 1); data, 1);
if (ret < 0) { if (ret < 0)
struct btrfs_space_info *info;
info = __find_space_info(root->fs_info, data);
goto error; goto error;
}
BUG_ON(ret); BUG_ON(ret);
chunk_alloc_done = 1; chunk_alloc_done = 1;
search_start = orig_search_start; search_start = orig_search_start;
@ -2378,22 +2370,24 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes)
struct list_head *l; struct list_head *l;
printk(KERN_INFO "space_info has %Lu free, is %sfull\n", printk(KERN_INFO "space_info has %Lu free, is %sfull\n",
info->total_bytes - info->bytes_used - info->bytes_pinned, info->total_bytes - info->bytes_used - info->bytes_pinned -
(info->full) ? "" : "not "); info->bytes_reserved, (info->full) ? "" : "not ");
spin_lock(&info->lock); spin_lock(&info->lock);
list_for_each(l, &info->block_groups) { list_for_each(l, &info->block_groups) {
cache = list_entry(l, struct btrfs_block_group_cache, list); cache = list_entry(l, struct btrfs_block_group_cache, list);
spin_lock(&cache->lock); spin_lock(&cache->lock);
printk(KERN_INFO "block group %Lu has %Lu bytes, %Lu used " printk(KERN_INFO "block group %Lu has %Lu bytes, %Lu used "
"%Lu pinned\n", "%Lu pinned %Lu reserved\n",
cache->key.objectid, cache->key.offset, cache->key.objectid, cache->key.offset,
btrfs_block_group_used(&cache->item), cache->pinned); btrfs_block_group_used(&cache->item),
cache->pinned, cache->reserved);
btrfs_dump_free_space(cache, bytes); btrfs_dump_free_space(cache, bytes);
spin_unlock(&cache->lock); spin_unlock(&cache->lock);
} }
spin_unlock(&info->lock); spin_unlock(&info->lock);
} }
static int __btrfs_reserve_extent(struct btrfs_trans_handle *trans, static int __btrfs_reserve_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct btrfs_root *root,
u64 num_bytes, u64 min_alloc_size, u64 num_bytes, u64 min_alloc_size,
@ -2500,6 +2494,7 @@ int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
ret = __btrfs_reserve_extent(trans, root, num_bytes, min_alloc_size, ret = __btrfs_reserve_extent(trans, root, num_bytes, min_alloc_size,
empty_size, hint_byte, search_end, ins, empty_size, hint_byte, search_end, ins,
data); data);
update_reserved_extents(root, ins->objectid, ins->offset, 1);
maybe_unlock_mutex(root); maybe_unlock_mutex(root);
return ret; return ret;
} }
@ -2625,6 +2620,7 @@ int btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans,
ret = __btrfs_alloc_reserved_extent(trans, root, parent, ret = __btrfs_alloc_reserved_extent(trans, root, parent,
root_objectid, ref_generation, root_objectid, ref_generation,
owner, owner_offset, ins); owner, owner_offset, ins);
update_reserved_extents(root, ins->objectid, ins->offset, 0);
maybe_unlock_mutex(root); maybe_unlock_mutex(root);
return ret; return ret;
} }
@ -2685,6 +2681,8 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
owner_objectid, owner_offset, ins); owner_objectid, owner_offset, ins);
BUG_ON(ret); BUG_ON(ret);
} else {
update_reserved_extents(root, ins->objectid, ins->offset, 1);
} }
maybe_unlock_mutex(root); maybe_unlock_mutex(root);
return ret; return ret;
@ -3974,10 +3972,6 @@ int btrfs_read_block_groups(struct btrfs_root *root)
ret = btrfs_add_block_group_cache(root->fs_info, cache); ret = btrfs_add_block_group_cache(root->fs_info, cache);
BUG_ON(ret); BUG_ON(ret);
if (key.objectid >=
btrfs_super_total_bytes(&info->super_copy))
break;
} }
ret = 0; ret = 0;
error: error: