|
|
|
@ -140,13 +140,15 @@
|
|
|
|
|
* number of buddy bitmap orders possible) number of lists. Group-infos are
|
|
|
|
|
* placed in appropriate lists.
|
|
|
|
|
*
|
|
|
|
|
* 2) Average fragment size rb tree (sbi->s_mb_avg_fragment_size_root)
|
|
|
|
|
* 2) Average fragment size lists (sbi->s_mb_avg_fragment_size)
|
|
|
|
|
*
|
|
|
|
|
* Locking: sbi->s_mb_rb_lock (rwlock)
|
|
|
|
|
* Locking: sbi->s_mb_avg_fragment_size_locks(array of rw locks)
|
|
|
|
|
*
|
|
|
|
|
* This is a red black tree consisting of group infos and the tree is sorted
|
|
|
|
|
* by average fragment sizes (which is calculated as ext4_group_info->bb_free
|
|
|
|
|
* / ext4_group_info->bb_fragments).
|
|
|
|
|
* This is an array of lists where in the i-th list there are groups with
|
|
|
|
|
* average fragment size >= 2^i and < 2^(i+1). The average fragment size
|
|
|
|
|
* is computed as ext4_group_info->bb_free / ext4_group_info->bb_fragments.
|
|
|
|
|
* Note that we don't bother with a special list for completely empty groups
|
|
|
|
|
* so we only have MB_NUM_ORDERS(sb) lists.
|
|
|
|
|
*
|
|
|
|
|
* When "mb_optimize_scan" mount option is set, mballoc consults the above data
|
|
|
|
|
* structures to decide the order in which groups are to be traversed for
|
|
|
|
@ -160,7 +162,8 @@
|
|
|
|
|
*
|
|
|
|
|
* At CR = 1, we only consider groups where average fragment size > request
|
|
|
|
|
* size. So, we lookup a group which has average fragment size just above or
|
|
|
|
|
* equal to request size using our rb tree (data structure 2) in O(log N) time.
|
|
|
|
|
* equal to request size using our average fragment size group lists (data
|
|
|
|
|
* structure 2) in O(1) time.
|
|
|
|
|
*
|
|
|
|
|
* If "mb_optimize_scan" mount option is not set, mballoc traverses groups in
|
|
|
|
|
* linear order which requires O(N) search time for each CR 0 and CR 1 phase.
|
|
|
|
@ -802,65 +805,51 @@ static void ext4_mb_mark_free_simple(struct super_block *sb,
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void ext4_mb_rb_insert(struct rb_root *root, struct rb_node *new,
|
|
|
|
|
int (*cmp)(struct rb_node *, struct rb_node *))
|
|
|
|
|
static int mb_avg_fragment_size_order(struct super_block *sb, ext4_grpblk_t len)
|
|
|
|
|
{
|
|
|
|
|
struct rb_node **iter = &root->rb_node, *parent = NULL;
|
|
|
|
|
int order;
|
|
|
|
|
|
|
|
|
|
while (*iter) {
|
|
|
|
|
parent = *iter;
|
|
|
|
|
if (cmp(new, *iter) > 0)
|
|
|
|
|
iter = &((*iter)->rb_left);
|
|
|
|
|
else
|
|
|
|
|
iter = &((*iter)->rb_right);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
rb_link_node(new, parent, iter);
|
|
|
|
|
rb_insert_color(new, root);
|
|
|
|
|
/*
|
|
|
|
|
* We don't bother with a special lists groups with only 1 block free
|
|
|
|
|
* extents and for completely empty groups.
|
|
|
|
|
*/
|
|
|
|
|
order = fls(len) - 2;
|
|
|
|
|
if (order < 0)
|
|
|
|
|
return 0;
|
|
|
|
|
if (order == MB_NUM_ORDERS(sb))
|
|
|
|
|
order--;
|
|
|
|
|
return order;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static int
|
|
|
|
|
ext4_mb_avg_fragment_size_cmp(struct rb_node *rb1, struct rb_node *rb2)
|
|
|
|
|
{
|
|
|
|
|
struct ext4_group_info *grp1 = rb_entry(rb1,
|
|
|
|
|
struct ext4_group_info,
|
|
|
|
|
bb_avg_fragment_size_rb);
|
|
|
|
|
struct ext4_group_info *grp2 = rb_entry(rb2,
|
|
|
|
|
struct ext4_group_info,
|
|
|
|
|
bb_avg_fragment_size_rb);
|
|
|
|
|
int num_frags_1, num_frags_2;
|
|
|
|
|
|
|
|
|
|
num_frags_1 = grp1->bb_fragments ?
|
|
|
|
|
grp1->bb_free / grp1->bb_fragments : 0;
|
|
|
|
|
num_frags_2 = grp2->bb_fragments ?
|
|
|
|
|
grp2->bb_free / grp2->bb_fragments : 0;
|
|
|
|
|
|
|
|
|
|
return (num_frags_2 - num_frags_1);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Reinsert grpinfo into the avg_fragment_size tree with new average
|
|
|
|
|
* fragment size.
|
|
|
|
|
*/
|
|
|
|
|
/* Move group to appropriate avg_fragment_size list */
|
|
|
|
|
static void
|
|
|
|
|
mb_update_avg_fragment_size(struct super_block *sb, struct ext4_group_info *grp)
|
|
|
|
|
{
|
|
|
|
|
struct ext4_sb_info *sbi = EXT4_SB(sb);
|
|
|
|
|
int new_order;
|
|
|
|
|
|
|
|
|
|
if (!test_opt2(sb, MB_OPTIMIZE_SCAN) || grp->bb_free == 0)
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
write_lock(&sbi->s_mb_rb_lock);
|
|
|
|
|
if (!RB_EMPTY_NODE(&grp->bb_avg_fragment_size_rb)) {
|
|
|
|
|
rb_erase(&grp->bb_avg_fragment_size_rb,
|
|
|
|
|
&sbi->s_mb_avg_fragment_size_root);
|
|
|
|
|
RB_CLEAR_NODE(&grp->bb_avg_fragment_size_rb);
|
|
|
|
|
}
|
|
|
|
|
new_order = mb_avg_fragment_size_order(sb,
|
|
|
|
|
grp->bb_free / grp->bb_fragments);
|
|
|
|
|
if (new_order == grp->bb_avg_fragment_size_order)
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
ext4_mb_rb_insert(&sbi->s_mb_avg_fragment_size_root,
|
|
|
|
|
&grp->bb_avg_fragment_size_rb,
|
|
|
|
|
ext4_mb_avg_fragment_size_cmp);
|
|
|
|
|
write_unlock(&sbi->s_mb_rb_lock);
|
|
|
|
|
if (grp->bb_avg_fragment_size_order != -1) {
|
|
|
|
|
write_lock(&sbi->s_mb_avg_fragment_size_locks[
|
|
|
|
|
grp->bb_avg_fragment_size_order]);
|
|
|
|
|
list_del(&grp->bb_avg_fragment_size_node);
|
|
|
|
|
write_unlock(&sbi->s_mb_avg_fragment_size_locks[
|
|
|
|
|
grp->bb_avg_fragment_size_order]);
|
|
|
|
|
}
|
|
|
|
|
grp->bb_avg_fragment_size_order = new_order;
|
|
|
|
|
write_lock(&sbi->s_mb_avg_fragment_size_locks[
|
|
|
|
|
grp->bb_avg_fragment_size_order]);
|
|
|
|
|
list_add_tail(&grp->bb_avg_fragment_size_node,
|
|
|
|
|
&sbi->s_mb_avg_fragment_size[grp->bb_avg_fragment_size_order]);
|
|
|
|
|
write_unlock(&sbi->s_mb_avg_fragment_size_locks[
|
|
|
|
|
grp->bb_avg_fragment_size_order]);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
@ -909,86 +898,56 @@ static void ext4_mb_choose_next_group_cr0(struct ext4_allocation_context *ac,
|
|
|
|
|
*new_cr = 1;
|
|
|
|
|
} else {
|
|
|
|
|
*group = grp->bb_group;
|
|
|
|
|
ac->ac_last_optimal_group = *group;
|
|
|
|
|
ac->ac_flags |= EXT4_MB_CR0_OPTIMIZED;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Choose next group by traversing average fragment size tree. Updates *new_cr
|
|
|
|
|
* if cr lvel needs an update. Sets EXT4_MB_SEARCH_NEXT_LINEAR to indicate that
|
|
|
|
|
* the linear search should continue for one iteration since there's lock
|
|
|
|
|
* contention on the rb tree lock.
|
|
|
|
|
* Choose next group by traversing average fragment size list of suitable
|
|
|
|
|
* order. Updates *new_cr if cr level needs an update.
|
|
|
|
|
*/
|
|
|
|
|
static void ext4_mb_choose_next_group_cr1(struct ext4_allocation_context *ac,
|
|
|
|
|
int *new_cr, ext4_group_t *group, ext4_group_t ngroups)
|
|
|
|
|
{
|
|
|
|
|
struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
|
|
|
|
|
int avg_fragment_size, best_so_far;
|
|
|
|
|
struct rb_node *node, *found;
|
|
|
|
|
struct ext4_group_info *grp;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* If there is contention on the lock, instead of waiting for the lock
|
|
|
|
|
* to become available, just continue searching lineraly. We'll resume
|
|
|
|
|
* our rb tree search later starting at ac->ac_last_optimal_group.
|
|
|
|
|
*/
|
|
|
|
|
if (!read_trylock(&sbi->s_mb_rb_lock)) {
|
|
|
|
|
ac->ac_flags |= EXT4_MB_SEARCH_NEXT_LINEAR;
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
struct ext4_group_info *grp, *iter;
|
|
|
|
|
int i;
|
|
|
|
|
|
|
|
|
|
if (unlikely(ac->ac_flags & EXT4_MB_CR1_OPTIMIZED)) {
|
|
|
|
|
if (sbi->s_mb_stats)
|
|
|
|
|
atomic_inc(&sbi->s_bal_cr1_bad_suggestions);
|
|
|
|
|
/* We have found something at CR 1 in the past */
|
|
|
|
|
grp = ext4_get_group_info(ac->ac_sb, ac->ac_last_optimal_group);
|
|
|
|
|
for (found = rb_next(&grp->bb_avg_fragment_size_rb); found != NULL;
|
|
|
|
|
found = rb_next(found)) {
|
|
|
|
|
grp = rb_entry(found, struct ext4_group_info,
|
|
|
|
|
bb_avg_fragment_size_rb);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for (i = mb_avg_fragment_size_order(ac->ac_sb, ac->ac_g_ex.fe_len);
|
|
|
|
|
i < MB_NUM_ORDERS(ac->ac_sb); i++) {
|
|
|
|
|
if (list_empty(&sbi->s_mb_avg_fragment_size[i]))
|
|
|
|
|
continue;
|
|
|
|
|
read_lock(&sbi->s_mb_avg_fragment_size_locks[i]);
|
|
|
|
|
if (list_empty(&sbi->s_mb_avg_fragment_size[i])) {
|
|
|
|
|
read_unlock(&sbi->s_mb_avg_fragment_size_locks[i]);
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
grp = NULL;
|
|
|
|
|
list_for_each_entry(iter, &sbi->s_mb_avg_fragment_size[i],
|
|
|
|
|
bb_avg_fragment_size_node) {
|
|
|
|
|
if (sbi->s_mb_stats)
|
|
|
|
|
atomic64_inc(&sbi->s_bal_cX_groups_considered[1]);
|
|
|
|
|
if (likely(ext4_mb_good_group(ac, grp->bb_group, 1)))
|
|
|
|
|
if (likely(ext4_mb_good_group(ac, iter->bb_group, 1))) {
|
|
|
|
|
grp = iter;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
goto done;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
node = sbi->s_mb_avg_fragment_size_root.rb_node;
|
|
|
|
|
best_so_far = 0;
|
|
|
|
|
found = NULL;
|
|
|
|
|
|
|
|
|
|
while (node) {
|
|
|
|
|
grp = rb_entry(node, struct ext4_group_info,
|
|
|
|
|
bb_avg_fragment_size_rb);
|
|
|
|
|
avg_fragment_size = 0;
|
|
|
|
|
if (ext4_mb_good_group(ac, grp->bb_group, 1)) {
|
|
|
|
|
avg_fragment_size = grp->bb_fragments ?
|
|
|
|
|
grp->bb_free / grp->bb_fragments : 0;
|
|
|
|
|
if (!best_so_far || avg_fragment_size < best_so_far) {
|
|
|
|
|
best_so_far = avg_fragment_size;
|
|
|
|
|
found = node;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if (avg_fragment_size > ac->ac_g_ex.fe_len)
|
|
|
|
|
node = node->rb_right;
|
|
|
|
|
else
|
|
|
|
|
node = node->rb_left;
|
|
|
|
|
read_unlock(&sbi->s_mb_avg_fragment_size_locks[i]);
|
|
|
|
|
if (grp)
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
done:
|
|
|
|
|
if (found) {
|
|
|
|
|
grp = rb_entry(found, struct ext4_group_info,
|
|
|
|
|
bb_avg_fragment_size_rb);
|
|
|
|
|
if (grp) {
|
|
|
|
|
*group = grp->bb_group;
|
|
|
|
|
ac->ac_flags |= EXT4_MB_CR1_OPTIMIZED;
|
|
|
|
|
} else {
|
|
|
|
|
*new_cr = 2;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
read_unlock(&sbi->s_mb_rb_lock);
|
|
|
|
|
ac->ac_last_optimal_group = *group;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static inline int should_optimize_scan(struct ext4_allocation_context *ac)
|
|
|
|
@ -1017,11 +976,6 @@ next_linear_group(struct ext4_allocation_context *ac, int group, int ngroups)
|
|
|
|
|
goto inc_and_return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (ac->ac_flags & EXT4_MB_SEARCH_NEXT_LINEAR) {
|
|
|
|
|
ac->ac_flags &= ~EXT4_MB_SEARCH_NEXT_LINEAR;
|
|
|
|
|
goto inc_and_return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return group;
|
|
|
|
|
inc_and_return:
|
|
|
|
|
/*
|
|
|
|
@ -1049,8 +1003,10 @@ static void ext4_mb_choose_next_group(struct ext4_allocation_context *ac,
|
|
|
|
|
{
|
|
|
|
|
*new_cr = ac->ac_criteria;
|
|
|
|
|
|
|
|
|
|
if (!should_optimize_scan(ac) || ac->ac_groups_linear_remaining)
|
|
|
|
|
if (!should_optimize_scan(ac) || ac->ac_groups_linear_remaining) {
|
|
|
|
|
*group = next_linear_group(ac, *group, ngroups);
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (*new_cr == 0) {
|
|
|
|
|
ext4_mb_choose_next_group_cr0(ac, new_cr, group, ngroups);
|
|
|
|
@ -1075,23 +1031,25 @@ mb_set_largest_free_order(struct super_block *sb, struct ext4_group_info *grp)
|
|
|
|
|
struct ext4_sb_info *sbi = EXT4_SB(sb);
|
|
|
|
|
int i;
|
|
|
|
|
|
|
|
|
|
if (test_opt2(sb, MB_OPTIMIZE_SCAN) && grp->bb_largest_free_order >= 0) {
|
|
|
|
|
for (i = MB_NUM_ORDERS(sb) - 1; i >= 0; i--)
|
|
|
|
|
if (grp->bb_counters[i] > 0)
|
|
|
|
|
break;
|
|
|
|
|
/* No need to move between order lists? */
|
|
|
|
|
if (!test_opt2(sb, MB_OPTIMIZE_SCAN) ||
|
|
|
|
|
i == grp->bb_largest_free_order) {
|
|
|
|
|
grp->bb_largest_free_order = i;
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (grp->bb_largest_free_order >= 0) {
|
|
|
|
|
write_lock(&sbi->s_mb_largest_free_orders_locks[
|
|
|
|
|
grp->bb_largest_free_order]);
|
|
|
|
|
list_del_init(&grp->bb_largest_free_order_node);
|
|
|
|
|
write_unlock(&sbi->s_mb_largest_free_orders_locks[
|
|
|
|
|
grp->bb_largest_free_order]);
|
|
|
|
|
}
|
|
|
|
|
grp->bb_largest_free_order = -1; /* uninit */
|
|
|
|
|
|
|
|
|
|
for (i = MB_NUM_ORDERS(sb) - 1; i >= 0; i--) {
|
|
|
|
|
if (grp->bb_counters[i] > 0) {
|
|
|
|
|
grp->bb_largest_free_order = i;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if (test_opt2(sb, MB_OPTIMIZE_SCAN) &&
|
|
|
|
|
grp->bb_largest_free_order >= 0 && grp->bb_free) {
|
|
|
|
|
grp->bb_largest_free_order = i;
|
|
|
|
|
if (grp->bb_largest_free_order >= 0 && grp->bb_free) {
|
|
|
|
|
write_lock(&sbi->s_mb_largest_free_orders_locks[
|
|
|
|
|
grp->bb_largest_free_order]);
|
|
|
|
|
list_add_tail(&grp->bb_largest_free_order_node,
|
|
|
|
@ -1148,13 +1106,13 @@ void ext4_mb_generate_buddy(struct super_block *sb,
|
|
|
|
|
EXT4_GROUP_INFO_BBITMAP_CORRUPT);
|
|
|
|
|
}
|
|
|
|
|
mb_set_largest_free_order(sb, grp);
|
|
|
|
|
mb_update_avg_fragment_size(sb, grp);
|
|
|
|
|
|
|
|
|
|
clear_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &(grp->bb_state));
|
|
|
|
|
|
|
|
|
|
period = get_cycles() - period;
|
|
|
|
|
atomic_inc(&sbi->s_mb_buddies_generated);
|
|
|
|
|
atomic64_add(period, &sbi->s_mb_generation_time);
|
|
|
|
|
mb_update_avg_fragment_size(sb, grp);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* The buddy information is attached the buddy cache inode
|
|
|
|
@ -2636,7 +2594,7 @@ static noinline_for_stack int
|
|
|
|
|
ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
|
|
|
|
|
{
|
|
|
|
|
ext4_group_t prefetch_grp = 0, ngroups, group, i;
|
|
|
|
|
int cr = -1;
|
|
|
|
|
int cr = -1, new_cr;
|
|
|
|
|
int err = 0, first_err = 0;
|
|
|
|
|
unsigned int nr = 0, prefetch_ios = 0;
|
|
|
|
|
struct ext4_sb_info *sbi;
|
|
|
|
@ -2707,17 +2665,14 @@ repeat:
|
|
|
|
|
* from the goal value specified
|
|
|
|
|
*/
|
|
|
|
|
group = ac->ac_g_ex.fe_group;
|
|
|
|
|
ac->ac_last_optimal_group = group;
|
|
|
|
|
ac->ac_groups_linear_remaining = sbi->s_mb_max_linear_groups;
|
|
|
|
|
prefetch_grp = group;
|
|
|
|
|
|
|
|
|
|
for (i = 0; i < ngroups; group = next_linear_group(ac, group, ngroups),
|
|
|
|
|
i++) {
|
|
|
|
|
int ret = 0, new_cr;
|
|
|
|
|
for (i = 0, new_cr = cr; i < ngroups; i++,
|
|
|
|
|
ext4_mb_choose_next_group(ac, &new_cr, &group, ngroups)) {
|
|
|
|
|
int ret = 0;
|
|
|
|
|
|
|
|
|
|
cond_resched();
|
|
|
|
|
|
|
|
|
|
ext4_mb_choose_next_group(ac, &new_cr, &group, ngroups);
|
|
|
|
|
if (new_cr != cr) {
|
|
|
|
|
cr = new_cr;
|
|
|
|
|
goto repeat;
|
|
|
|
@ -2991,9 +2946,7 @@ __acquires(&EXT4_SB(sb)->s_mb_rb_lock)
|
|
|
|
|
struct super_block *sb = pde_data(file_inode(seq->file));
|
|
|
|
|
unsigned long position;
|
|
|
|
|
|
|
|
|
|
read_lock(&EXT4_SB(sb)->s_mb_rb_lock);
|
|
|
|
|
|
|
|
|
|
if (*pos < 0 || *pos >= MB_NUM_ORDERS(sb) + 1)
|
|
|
|
|
if (*pos < 0 || *pos >= 2*MB_NUM_ORDERS(sb))
|
|
|
|
|
return NULL;
|
|
|
|
|
position = *pos + 1;
|
|
|
|
|
return (void *) ((unsigned long) position);
|
|
|
|
@ -3005,7 +2958,7 @@ static void *ext4_mb_seq_structs_summary_next(struct seq_file *seq, void *v, lof
|
|
|
|
|
unsigned long position;
|
|
|
|
|
|
|
|
|
|
++*pos;
|
|
|
|
|
if (*pos < 0 || *pos >= MB_NUM_ORDERS(sb) + 1)
|
|
|
|
|
if (*pos < 0 || *pos >= 2*MB_NUM_ORDERS(sb))
|
|
|
|
|
return NULL;
|
|
|
|
|
position = *pos + 1;
|
|
|
|
|
return (void *) ((unsigned long) position);
|
|
|
|
@ -3017,29 +2970,22 @@ static int ext4_mb_seq_structs_summary_show(struct seq_file *seq, void *v)
|
|
|
|
|
struct ext4_sb_info *sbi = EXT4_SB(sb);
|
|
|
|
|
unsigned long position = ((unsigned long) v);
|
|
|
|
|
struct ext4_group_info *grp;
|
|
|
|
|
struct rb_node *n;
|
|
|
|
|
unsigned int count, min, max;
|
|
|
|
|
unsigned int count;
|
|
|
|
|
|
|
|
|
|
position--;
|
|
|
|
|
if (position >= MB_NUM_ORDERS(sb)) {
|
|
|
|
|
seq_puts(seq, "fragment_size_tree:\n");
|
|
|
|
|
n = rb_first(&sbi->s_mb_avg_fragment_size_root);
|
|
|
|
|
if (!n) {
|
|
|
|
|
seq_puts(seq, "\ttree_min: 0\n\ttree_max: 0\n\ttree_nodes: 0\n");
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
grp = rb_entry(n, struct ext4_group_info, bb_avg_fragment_size_rb);
|
|
|
|
|
min = grp->bb_fragments ? grp->bb_free / grp->bb_fragments : 0;
|
|
|
|
|
count = 1;
|
|
|
|
|
while (rb_next(n)) {
|
|
|
|
|
count++;
|
|
|
|
|
n = rb_next(n);
|
|
|
|
|
}
|
|
|
|
|
grp = rb_entry(n, struct ext4_group_info, bb_avg_fragment_size_rb);
|
|
|
|
|
max = grp->bb_fragments ? grp->bb_free / grp->bb_fragments : 0;
|
|
|
|
|
position -= MB_NUM_ORDERS(sb);
|
|
|
|
|
if (position == 0)
|
|
|
|
|
seq_puts(seq, "avg_fragment_size_lists:\n");
|
|
|
|
|
|
|
|
|
|
seq_printf(seq, "\ttree_min: %u\n\ttree_max: %u\n\ttree_nodes: %u\n",
|
|
|
|
|
min, max, count);
|
|
|
|
|
count = 0;
|
|
|
|
|
read_lock(&sbi->s_mb_avg_fragment_size_locks[position]);
|
|
|
|
|
list_for_each_entry(grp, &sbi->s_mb_avg_fragment_size[position],
|
|
|
|
|
bb_avg_fragment_size_node)
|
|
|
|
|
count++;
|
|
|
|
|
read_unlock(&sbi->s_mb_avg_fragment_size_locks[position]);
|
|
|
|
|
seq_printf(seq, "\tlist_order_%u_groups: %u\n",
|
|
|
|
|
(unsigned int)position, count);
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
@ -3049,9 +2995,11 @@ static int ext4_mb_seq_structs_summary_show(struct seq_file *seq, void *v)
|
|
|
|
|
seq_puts(seq, "max_free_order_lists:\n");
|
|
|
|
|
}
|
|
|
|
|
count = 0;
|
|
|
|
|
read_lock(&sbi->s_mb_largest_free_orders_locks[position]);
|
|
|
|
|
list_for_each_entry(grp, &sbi->s_mb_largest_free_orders[position],
|
|
|
|
|
bb_largest_free_order_node)
|
|
|
|
|
count++;
|
|
|
|
|
read_unlock(&sbi->s_mb_largest_free_orders_locks[position]);
|
|
|
|
|
seq_printf(seq, "\tlist_order_%u_groups: %u\n",
|
|
|
|
|
(unsigned int)position, count);
|
|
|
|
|
|
|
|
|
@ -3059,11 +3007,7 @@ static int ext4_mb_seq_structs_summary_show(struct seq_file *seq, void *v)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void ext4_mb_seq_structs_summary_stop(struct seq_file *seq, void *v)
|
|
|
|
|
__releases(&EXT4_SB(sb)->s_mb_rb_lock)
|
|
|
|
|
{
|
|
|
|
|
struct super_block *sb = pde_data(file_inode(seq->file));
|
|
|
|
|
|
|
|
|
|
read_unlock(&EXT4_SB(sb)->s_mb_rb_lock);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const struct seq_operations ext4_mb_seq_structs_summary_ops = {
|
|
|
|
@ -3176,8 +3120,9 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
|
|
|
|
|
init_rwsem(&meta_group_info[i]->alloc_sem);
|
|
|
|
|
meta_group_info[i]->bb_free_root = RB_ROOT;
|
|
|
|
|
INIT_LIST_HEAD(&meta_group_info[i]->bb_largest_free_order_node);
|
|
|
|
|
RB_CLEAR_NODE(&meta_group_info[i]->bb_avg_fragment_size_rb);
|
|
|
|
|
INIT_LIST_HEAD(&meta_group_info[i]->bb_avg_fragment_size_node);
|
|
|
|
|
meta_group_info[i]->bb_largest_free_order = -1; /* uninit */
|
|
|
|
|
meta_group_info[i]->bb_avg_fragment_size_order = -1; /* uninit */
|
|
|
|
|
meta_group_info[i]->bb_group = group;
|
|
|
|
|
|
|
|
|
|
mb_group_bb_bitmap_alloc(sb, meta_group_info[i], group);
|
|
|
|
@ -3426,7 +3371,24 @@ int ext4_mb_init(struct super_block *sb)
|
|
|
|
|
i++;
|
|
|
|
|
} while (i < MB_NUM_ORDERS(sb));
|
|
|
|
|
|
|
|
|
|
sbi->s_mb_avg_fragment_size_root = RB_ROOT;
|
|
|
|
|
sbi->s_mb_avg_fragment_size =
|
|
|
|
|
kmalloc_array(MB_NUM_ORDERS(sb), sizeof(struct list_head),
|
|
|
|
|
GFP_KERNEL);
|
|
|
|
|
if (!sbi->s_mb_avg_fragment_size) {
|
|
|
|
|
ret = -ENOMEM;
|
|
|
|
|
goto out;
|
|
|
|
|
}
|
|
|
|
|
sbi->s_mb_avg_fragment_size_locks =
|
|
|
|
|
kmalloc_array(MB_NUM_ORDERS(sb), sizeof(rwlock_t),
|
|
|
|
|
GFP_KERNEL);
|
|
|
|
|
if (!sbi->s_mb_avg_fragment_size_locks) {
|
|
|
|
|
ret = -ENOMEM;
|
|
|
|
|
goto out;
|
|
|
|
|
}
|
|
|
|
|
for (i = 0; i < MB_NUM_ORDERS(sb); i++) {
|
|
|
|
|
INIT_LIST_HEAD(&sbi->s_mb_avg_fragment_size[i]);
|
|
|
|
|
rwlock_init(&sbi->s_mb_avg_fragment_size_locks[i]);
|
|
|
|
|
}
|
|
|
|
|
sbi->s_mb_largest_free_orders =
|
|
|
|
|
kmalloc_array(MB_NUM_ORDERS(sb), sizeof(struct list_head),
|
|
|
|
|
GFP_KERNEL);
|
|
|
|
@ -3445,7 +3407,6 @@ int ext4_mb_init(struct super_block *sb)
|
|
|
|
|
INIT_LIST_HEAD(&sbi->s_mb_largest_free_orders[i]);
|
|
|
|
|
rwlock_init(&sbi->s_mb_largest_free_orders_locks[i]);
|
|
|
|
|
}
|
|
|
|
|
rwlock_init(&sbi->s_mb_rb_lock);
|
|
|
|
|
|
|
|
|
|
spin_lock_init(&sbi->s_md_lock);
|
|
|
|
|
sbi->s_mb_free_pending = 0;
|
|
|
|
@ -3516,6 +3477,8 @@ out_free_locality_groups:
|
|
|
|
|
free_percpu(sbi->s_locality_groups);
|
|
|
|
|
sbi->s_locality_groups = NULL;
|
|
|
|
|
out:
|
|
|
|
|
kfree(sbi->s_mb_avg_fragment_size);
|
|
|
|
|
kfree(sbi->s_mb_avg_fragment_size_locks);
|
|
|
|
|
kfree(sbi->s_mb_largest_free_orders);
|
|
|
|
|
kfree(sbi->s_mb_largest_free_orders_locks);
|
|
|
|
|
kfree(sbi->s_mb_offsets);
|
|
|
|
@ -3582,6 +3545,8 @@ int ext4_mb_release(struct super_block *sb)
|
|
|
|
|
kvfree(group_info);
|
|
|
|
|
rcu_read_unlock();
|
|
|
|
|
}
|
|
|
|
|
kfree(sbi->s_mb_avg_fragment_size);
|
|
|
|
|
kfree(sbi->s_mb_avg_fragment_size_locks);
|
|
|
|
|
kfree(sbi->s_mb_largest_free_orders);
|
|
|
|
|
kfree(sbi->s_mb_largest_free_orders_locks);
|
|
|
|
|
kfree(sbi->s_mb_offsets);
|
|
|
|
@ -5193,6 +5158,7 @@ static void ext4_mb_group_or_file(struct ext4_allocation_context *ac)
|
|
|
|
|
struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
|
|
|
|
|
int bsbits = ac->ac_sb->s_blocksize_bits;
|
|
|
|
|
loff_t size, isize;
|
|
|
|
|
bool inode_pa_eligible, group_pa_eligible;
|
|
|
|
|
|
|
|
|
|
if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
|
|
|
|
|
return;
|
|
|
|
@ -5200,25 +5166,27 @@ static void ext4_mb_group_or_file(struct ext4_allocation_context *ac)
|
|
|
|
|
if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY))
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
group_pa_eligible = sbi->s_mb_group_prealloc > 0;
|
|
|
|
|
inode_pa_eligible = true;
|
|
|
|
|
size = ac->ac_o_ex.fe_logical + EXT4_C2B(sbi, ac->ac_o_ex.fe_len);
|
|
|
|
|
isize = (i_size_read(ac->ac_inode) + ac->ac_sb->s_blocksize - 1)
|
|
|
|
|
>> bsbits;
|
|
|
|
|
|
|
|
|
|
/* No point in using inode preallocation for closed files */
|
|
|
|
|
if ((size == isize) && !ext4_fs_is_busy(sbi) &&
|
|
|
|
|
!inode_is_open_for_write(ac->ac_inode)) {
|
|
|
|
|
ac->ac_flags |= EXT4_MB_HINT_NOPREALLOC;
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
!inode_is_open_for_write(ac->ac_inode))
|
|
|
|
|
inode_pa_eligible = false;
|
|
|
|
|
|
|
|
|
|
if (sbi->s_mb_group_prealloc <= 0) {
|
|
|
|
|
ac->ac_flags |= EXT4_MB_STREAM_ALLOC;
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* don't use group allocation for large files */
|
|
|
|
|
size = max(size, isize);
|
|
|
|
|
if (size > sbi->s_mb_stream_request) {
|
|
|
|
|
ac->ac_flags |= EXT4_MB_STREAM_ALLOC;
|
|
|
|
|
/* Don't use group allocation for large files */
|
|
|
|
|
if (size > sbi->s_mb_stream_request)
|
|
|
|
|
group_pa_eligible = false;
|
|
|
|
|
|
|
|
|
|
if (!group_pa_eligible) {
|
|
|
|
|
if (inode_pa_eligible)
|
|
|
|
|
ac->ac_flags |= EXT4_MB_STREAM_ALLOC;
|
|
|
|
|
else
|
|
|
|
|
ac->ac_flags |= EXT4_MB_HINT_NOPREALLOC;
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
@ -5565,6 +5533,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
|
|
|
|
|
ext4_fsblk_t block = 0;
|
|
|
|
|
unsigned int inquota = 0;
|
|
|
|
|
unsigned int reserv_clstrs = 0;
|
|
|
|
|
int retries = 0;
|
|
|
|
|
u64 seq;
|
|
|
|
|
|
|
|
|
|
might_sleep();
|
|
|
|
@ -5667,7 +5636,8 @@ repeat:
|
|
|
|
|
ar->len = ac->ac_b_ex.fe_len;
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
if (ext4_mb_discard_preallocations_should_retry(sb, ac, &seq))
|
|
|
|
|
if (++retries < 3 &&
|
|
|
|
|
ext4_mb_discard_preallocations_should_retry(sb, ac, &seq))
|
|
|
|
|
goto repeat;
|
|
|
|
|
/*
|
|
|
|
|
* If block allocation fails then the pa allocated above
|
|
|
|
|