ext4: use percpu_counters for extent_status cache hits/misses
@es_stats_cache_hits and @es_stats_cache_misses are accessed frequently in ext4_es_lookup_extent function, it would influence the ext4 read/write performance in NUMA system. Let's optimize it using percpu_counter, it is profitable for the performance. The test command is as below: fio -name=randwrite -numjobs=8 -filename=/mnt/test1 -rw=randwrite -ioengine=libaio -direct=1 -iodepth=64 -sync=0 -norandommap -group_reporting -runtime=120 -time_based -bs=4k -size=5G And the result is better 10% than the initial implement: without the patch,IOPS=197k, BW=770MiB/s (808MB/s)(90.3GiB/120002msec) with the patch, IOPS=218k, BW=852MiB/s (894MB/s)(99.9GiB/120002msec) Cc: "Theodore Ts'o" <tytso@mit.edu> Cc: Andreas Dilger <adilger.kernel@dilger.ca> Cc: Eric Biggers <ebiggers@kernel.org> Signed-off-by: Yang Guo <guoyang2@huawei.com> Signed-off-by: Shaokun Zhang <zhangshaokun@hisilicon.com>
This commit is contained in:
parent
7727ae5297
commit
520f897a35
|
@ -948,7 +948,7 @@ out:
|
|||
es->es_pblk = es1->es_pblk;
|
||||
if (!ext4_es_is_referenced(es1))
|
||||
ext4_es_set_referenced(es1);
|
||||
stats->es_stats_cache_hits++;
|
||||
percpu_counter_inc(&stats->es_stats_cache_hits);
|
||||
if (next_lblk) {
|
||||
node = rb_next(&es1->rb_node);
|
||||
if (node) {
|
||||
|
@ -959,7 +959,7 @@ out:
|
|||
*next_lblk = 0;
|
||||
}
|
||||
} else {
|
||||
stats->es_stats_cache_misses++;
|
||||
percpu_counter_inc(&stats->es_stats_cache_misses);
|
||||
}
|
||||
|
||||
read_unlock(&EXT4_I(inode)->i_es_lock);
|
||||
|
@ -1586,9 +1586,9 @@ int ext4_seq_es_shrinker_info_show(struct seq_file *seq, void *v)
|
|||
seq_printf(seq, "stats:\n %lld objects\n %lld reclaimable objects\n",
|
||||
percpu_counter_sum_positive(&es_stats->es_stats_all_cnt),
|
||||
percpu_counter_sum_positive(&es_stats->es_stats_shk_cnt));
|
||||
seq_printf(seq, " %lu/%lu cache hits/misses\n",
|
||||
es_stats->es_stats_cache_hits,
|
||||
es_stats->es_stats_cache_misses);
|
||||
seq_printf(seq, " %lld/%lld cache hits/misses\n",
|
||||
percpu_counter_sum_positive(&es_stats->es_stats_cache_hits),
|
||||
percpu_counter_sum_positive(&es_stats->es_stats_cache_misses));
|
||||
if (inode_cnt)
|
||||
seq_printf(seq, " %d inodes on list\n", inode_cnt);
|
||||
|
||||
|
@ -1615,35 +1615,46 @@ int ext4_es_register_shrinker(struct ext4_sb_info *sbi)
|
|||
sbi->s_es_nr_inode = 0;
|
||||
spin_lock_init(&sbi->s_es_lock);
|
||||
sbi->s_es_stats.es_stats_shrunk = 0;
|
||||
sbi->s_es_stats.es_stats_cache_hits = 0;
|
||||
sbi->s_es_stats.es_stats_cache_misses = 0;
|
||||
err = percpu_counter_init(&sbi->s_es_stats.es_stats_cache_hits, 0,
|
||||
GFP_KERNEL);
|
||||
if (err)
|
||||
return err;
|
||||
err = percpu_counter_init(&sbi->s_es_stats.es_stats_cache_misses, 0,
|
||||
GFP_KERNEL);
|
||||
if (err)
|
||||
goto err1;
|
||||
sbi->s_es_stats.es_stats_scan_time = 0;
|
||||
sbi->s_es_stats.es_stats_max_scan_time = 0;
|
||||
err = percpu_counter_init(&sbi->s_es_stats.es_stats_all_cnt, 0, GFP_KERNEL);
|
||||
if (err)
|
||||
return err;
|
||||
goto err2;
|
||||
err = percpu_counter_init(&sbi->s_es_stats.es_stats_shk_cnt, 0, GFP_KERNEL);
|
||||
if (err)
|
||||
goto err1;
|
||||
goto err3;
|
||||
|
||||
sbi->s_es_shrinker.scan_objects = ext4_es_scan;
|
||||
sbi->s_es_shrinker.count_objects = ext4_es_count;
|
||||
sbi->s_es_shrinker.seeks = DEFAULT_SEEKS;
|
||||
err = register_shrinker(&sbi->s_es_shrinker);
|
||||
if (err)
|
||||
goto err2;
|
||||
goto err4;
|
||||
|
||||
return 0;
|
||||
|
||||
err2:
|
||||
err4:
|
||||
percpu_counter_destroy(&sbi->s_es_stats.es_stats_shk_cnt);
|
||||
err1:
|
||||
err3:
|
||||
percpu_counter_destroy(&sbi->s_es_stats.es_stats_all_cnt);
|
||||
err2:
|
||||
percpu_counter_destroy(&sbi->s_es_stats.es_stats_cache_misses);
|
||||
err1:
|
||||
percpu_counter_destroy(&sbi->s_es_stats.es_stats_cache_hits);
|
||||
return err;
|
||||
}
|
||||
|
||||
void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi)
|
||||
{
|
||||
percpu_counter_destroy(&sbi->s_es_stats.es_stats_cache_hits);
|
||||
percpu_counter_destroy(&sbi->s_es_stats.es_stats_cache_misses);
|
||||
percpu_counter_destroy(&sbi->s_es_stats.es_stats_all_cnt);
|
||||
percpu_counter_destroy(&sbi->s_es_stats.es_stats_shk_cnt);
|
||||
unregister_shrinker(&sbi->s_es_shrinker);
|
||||
|
|
|
@ -70,8 +70,8 @@ struct ext4_es_tree {
|
|||
|
||||
struct ext4_es_stats {
|
||||
unsigned long es_stats_shrunk;
|
||||
unsigned long es_stats_cache_hits;
|
||||
unsigned long es_stats_cache_misses;
|
||||
struct percpu_counter es_stats_cache_hits;
|
||||
struct percpu_counter es_stats_cache_misses;
|
||||
u64 es_stats_scan_time;
|
||||
u64 es_stats_max_scan_time;
|
||||
struct percpu_counter es_stats_all_cnt;
|
||||
|
|
Loading…
Reference in New Issue