mm: multi-gen LRU: section for Bloom filters
Move Bloom filters code into a dedicated section. Improve the design doc to explain Bloom filter usage and connection between aging and eviction in their use. Link: https://lkml.kernel.org/r/20230118001827.1040870-4-talumbau@google.com Signed-off-by: T.J. Alumbaugh <talumbau@google.com> Cc: Yu Zhao <yuzhao@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
parent
db19a43d9b
commit
ccbbbb8594
|
@ -170,6 +170,22 @@ promotes hot pages. If the scan was done cacheline efficiently, it
|
|||
adds the PMD entry pointing to the PTE table to the Bloom filter. This
|
||||
forms a feedback loop between the eviction and the aging.
|
||||
|
||||
Bloom Filters
|
||||
-------------
|
||||
Bloom filters are a space and memory efficient data structure for set
|
||||
membership test, i.e., test if an element is not in the set or may be
|
||||
in the set.
|
||||
|
||||
In the eviction path, specifically, in ``lru_gen_look_around()``, if a
|
||||
PMD has a sufficient number of hot pages, its address is placed in the
|
||||
filter. In the aging path, set membership means that the PTE range
|
||||
will be scanned for young pages.
|
||||
|
||||
Note that Bloom filters are probabilistic on set membership. If a test
|
||||
is false positive, the cost is an additional scan of a range of PTEs,
|
||||
which may yield hot pages anyway. Parameters of the filter itself can
|
||||
control the false positive rate in the limit.
|
||||
|
||||
Summary
|
||||
-------
|
||||
The multi-gen LRU can be disassembled into the following parts:
|
||||
|
|
180
mm/vmscan.c
180
mm/vmscan.c
|
@ -3233,6 +3233,98 @@ static bool __maybe_unused seq_is_valid(struct lruvec *lruvec)
|
|||
get_nr_gens(lruvec, LRU_GEN_ANON) <= MAX_NR_GENS;
|
||||
}
|
||||
|
||||
/******************************************************************************
|
||||
* Bloom filters
|
||||
******************************************************************************/
|
||||
|
||||
/*
|
||||
* Bloom filters with m=1<<15, k=2 and the false positive rates of ~1/5 when
|
||||
* n=10,000 and ~1/2 when n=20,000, where, conventionally, m is the number of
|
||||
* bits in a bitmap, k is the number of hash functions and n is the number of
|
||||
* inserted items.
|
||||
*
|
||||
* Page table walkers use one of the two filters to reduce their search space.
|
||||
* To get rid of non-leaf entries that no longer have enough leaf entries, the
|
||||
* aging uses the double-buffering technique to flip to the other filter each
|
||||
* time it produces a new generation. For non-leaf entries that have enough
|
||||
* leaf entries, the aging carries them over to the next generation in
|
||||
* walk_pmd_range(); the eviction also report them when walking the rmap
|
||||
* in lru_gen_look_around().
|
||||
*
|
||||
* For future optimizations:
|
||||
* 1. It's not necessary to keep both filters all the time. The spare one can be
|
||||
* freed after the RCU grace period and reallocated if needed again.
|
||||
* 2. And when reallocating, it's worth scaling its size according to the number
|
||||
* of inserted entries in the other filter, to reduce the memory overhead on
|
||||
* small systems and false positives on large systems.
|
||||
* 3. Jenkins' hash function is an alternative to Knuth's.
|
||||
*/
|
||||
#define BLOOM_FILTER_SHIFT 15
|
||||
|
||||
static inline int filter_gen_from_seq(unsigned long seq)
|
||||
{
|
||||
return seq % NR_BLOOM_FILTERS;
|
||||
}
|
||||
|
||||
static void get_item_key(void *item, int *key)
|
||||
{
|
||||
u32 hash = hash_ptr(item, BLOOM_FILTER_SHIFT * 2);
|
||||
|
||||
BUILD_BUG_ON(BLOOM_FILTER_SHIFT * 2 > BITS_PER_TYPE(u32));
|
||||
|
||||
key[0] = hash & (BIT(BLOOM_FILTER_SHIFT) - 1);
|
||||
key[1] = hash >> BLOOM_FILTER_SHIFT;
|
||||
}
|
||||
|
||||
static bool test_bloom_filter(struct lruvec *lruvec, unsigned long seq, void *item)
|
||||
{
|
||||
int key[2];
|
||||
unsigned long *filter;
|
||||
int gen = filter_gen_from_seq(seq);
|
||||
|
||||
filter = READ_ONCE(lruvec->mm_state.filters[gen]);
|
||||
if (!filter)
|
||||
return true;
|
||||
|
||||
get_item_key(item, key);
|
||||
|
||||
return test_bit(key[0], filter) && test_bit(key[1], filter);
|
||||
}
|
||||
|
||||
static void update_bloom_filter(struct lruvec *lruvec, unsigned long seq, void *item)
|
||||
{
|
||||
int key[2];
|
||||
unsigned long *filter;
|
||||
int gen = filter_gen_from_seq(seq);
|
||||
|
||||
filter = READ_ONCE(lruvec->mm_state.filters[gen]);
|
||||
if (!filter)
|
||||
return;
|
||||
|
||||
get_item_key(item, key);
|
||||
|
||||
if (!test_bit(key[0], filter))
|
||||
set_bit(key[0], filter);
|
||||
if (!test_bit(key[1], filter))
|
||||
set_bit(key[1], filter);
|
||||
}
|
||||
|
||||
static void reset_bloom_filter(struct lruvec *lruvec, unsigned long seq)
|
||||
{
|
||||
unsigned long *filter;
|
||||
int gen = filter_gen_from_seq(seq);
|
||||
|
||||
filter = lruvec->mm_state.filters[gen];
|
||||
if (filter) {
|
||||
bitmap_clear(filter, 0, BIT(BLOOM_FILTER_SHIFT));
|
||||
return;
|
||||
}
|
||||
|
||||
filter = bitmap_zalloc(BIT(BLOOM_FILTER_SHIFT),
|
||||
__GFP_HIGH | __GFP_NOMEMALLOC | __GFP_NOWARN);
|
||||
WRITE_ONCE(lruvec->mm_state.filters[gen], filter);
|
||||
}
|
||||
|
||||
/******************************************************************************
|
||||
* mm_struct list
|
||||
******************************************************************************/
|
||||
|
@ -3352,94 +3444,6 @@ void lru_gen_migrate_mm(struct mm_struct *mm)
|
|||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Bloom filters with m=1<<15, k=2 and the false positive rates of ~1/5 when
|
||||
* n=10,000 and ~1/2 when n=20,000, where, conventionally, m is the number of
|
||||
* bits in a bitmap, k is the number of hash functions and n is the number of
|
||||
* inserted items.
|
||||
*
|
||||
* Page table walkers use one of the two filters to reduce their search space.
|
||||
* To get rid of non-leaf entries that no longer have enough leaf entries, the
|
||||
* aging uses the double-buffering technique to flip to the other filter each
|
||||
* time it produces a new generation. For non-leaf entries that have enough
|
||||
* leaf entries, the aging carries them over to the next generation in
|
||||
* walk_pmd_range(); the eviction also report them when walking the rmap
|
||||
* in lru_gen_look_around().
|
||||
*
|
||||
* For future optimizations:
|
||||
* 1. It's not necessary to keep both filters all the time. The spare one can be
|
||||
* freed after the RCU grace period and reallocated if needed again.
|
||||
* 2. And when reallocating, it's worth scaling its size according to the number
|
||||
* of inserted entries in the other filter, to reduce the memory overhead on
|
||||
* small systems and false positives on large systems.
|
||||
* 3. Jenkins' hash function is an alternative to Knuth's.
|
||||
*/
|
||||
#define BLOOM_FILTER_SHIFT 15
|
||||
|
||||
static inline int filter_gen_from_seq(unsigned long seq)
|
||||
{
|
||||
return seq % NR_BLOOM_FILTERS;
|
||||
}
|
||||
|
||||
static void get_item_key(void *item, int *key)
|
||||
{
|
||||
u32 hash = hash_ptr(item, BLOOM_FILTER_SHIFT * 2);
|
||||
|
||||
BUILD_BUG_ON(BLOOM_FILTER_SHIFT * 2 > BITS_PER_TYPE(u32));
|
||||
|
||||
key[0] = hash & (BIT(BLOOM_FILTER_SHIFT) - 1);
|
||||
key[1] = hash >> BLOOM_FILTER_SHIFT;
|
||||
}
|
||||
|
||||
static void reset_bloom_filter(struct lruvec *lruvec, unsigned long seq)
|
||||
{
|
||||
unsigned long *filter;
|
||||
int gen = filter_gen_from_seq(seq);
|
||||
|
||||
filter = lruvec->mm_state.filters[gen];
|
||||
if (filter) {
|
||||
bitmap_clear(filter, 0, BIT(BLOOM_FILTER_SHIFT));
|
||||
return;
|
||||
}
|
||||
|
||||
filter = bitmap_zalloc(BIT(BLOOM_FILTER_SHIFT),
|
||||
__GFP_HIGH | __GFP_NOMEMALLOC | __GFP_NOWARN);
|
||||
WRITE_ONCE(lruvec->mm_state.filters[gen], filter);
|
||||
}
|
||||
|
||||
static void update_bloom_filter(struct lruvec *lruvec, unsigned long seq, void *item)
|
||||
{
|
||||
int key[2];
|
||||
unsigned long *filter;
|
||||
int gen = filter_gen_from_seq(seq);
|
||||
|
||||
filter = READ_ONCE(lruvec->mm_state.filters[gen]);
|
||||
if (!filter)
|
||||
return;
|
||||
|
||||
get_item_key(item, key);
|
||||
|
||||
if (!test_bit(key[0], filter))
|
||||
set_bit(key[0], filter);
|
||||
if (!test_bit(key[1], filter))
|
||||
set_bit(key[1], filter);
|
||||
}
|
||||
|
||||
static bool test_bloom_filter(struct lruvec *lruvec, unsigned long seq, void *item)
|
||||
{
|
||||
int key[2];
|
||||
unsigned long *filter;
|
||||
int gen = filter_gen_from_seq(seq);
|
||||
|
||||
filter = READ_ONCE(lruvec->mm_state.filters[gen]);
|
||||
if (!filter)
|
||||
return true;
|
||||
|
||||
get_item_key(item, key);
|
||||
|
||||
return test_bit(key[0], filter) && test_bit(key[1], filter);
|
||||
}
|
||||
|
||||
static void reset_mm_stats(struct lruvec *lruvec, struct lru_gen_mm_walk *walk, bool last)
|
||||
{
|
||||
int i;
|
||||
|
|
Loading…
Reference in New Issue