2021-02-26 09:19:31 +08:00
|
|
|
// SPDX-License-Identifier: GPL-2.0
|
|
|
|
/*
|
|
|
|
* Test cases for KFENCE memory safety error detector. Since the interface with
|
|
|
|
* which KFENCE's reports are obtained is via the console, this is the output we
|
|
|
|
* should verify. For each test case checks the presence (or absence) of
|
|
|
|
* generated reports. Relies on 'console' tracepoint to capture reports as they
|
|
|
|
* appear in the kernel log.
|
|
|
|
*
|
|
|
|
* Copyright (C) 2020, Google LLC.
|
|
|
|
* Author: Alexander Potapenko <glider@google.com>
|
|
|
|
* Marco Elver <elver@google.com>
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <kunit/test.h>
|
|
|
|
#include <linux/jiffies.h>
|
|
|
|
#include <linux/kernel.h>
|
|
|
|
#include <linux/kfence.h>
|
|
|
|
#include <linux/mm.h>
|
|
|
|
#include <linux/random.h>
|
|
|
|
#include <linux/slab.h>
|
|
|
|
#include <linux/spinlock.h>
|
|
|
|
#include <linux/string.h>
|
|
|
|
#include <linux/tracepoint.h>
|
|
|
|
#include <trace/events/printk.h>
|
|
|
|
|
2021-07-29 03:02:52 +08:00
|
|
|
#include <asm/kfence.h>
|
|
|
|
|
2021-02-26 09:19:31 +08:00
|
|
|
#include "kfence.h"
|
|
|
|
|
2021-07-29 03:02:52 +08:00
|
|
|
/* May be overridden by <asm/kfence.h>. */
|
|
|
|
#ifndef arch_kfence_test_address
|
|
|
|
#define arch_kfence_test_address(addr) (addr)
|
|
|
|
#endif
|
|
|
|
|
2021-11-06 04:45:40 +08:00
|
|
|
#define KFENCE_TEST_REQUIRES(test, cond) do { \
|
|
|
|
if (!(cond)) \
|
|
|
|
kunit_skip((test), "Test requires: " #cond); \
|
|
|
|
} while (0)
|
|
|
|
|
2021-02-26 09:19:31 +08:00
|
|
|
/* Report as observed from console. */
|
|
|
|
static struct {
|
|
|
|
spinlock_t lock;
|
|
|
|
int nlines;
|
|
|
|
char lines[2][256];
|
|
|
|
} observed = {
|
|
|
|
.lock = __SPIN_LOCK_UNLOCKED(observed.lock),
|
|
|
|
};
|
|
|
|
|
|
|
|
/* Probe for console output: obtains observed lines of interest. */
|
|
|
|
static void probe_console(void *ignore, const char *buf, size_t len)
|
|
|
|
{
|
|
|
|
unsigned long flags;
|
|
|
|
int nlines;
|
|
|
|
|
|
|
|
spin_lock_irqsave(&observed.lock, flags);
|
|
|
|
nlines = observed.nlines;
|
|
|
|
|
|
|
|
if (strnstr(buf, "BUG: KFENCE: ", len) && strnstr(buf, "test_", len)) {
|
|
|
|
/*
|
|
|
|
* KFENCE report and related to the test.
|
|
|
|
*
|
|
|
|
* The provided @buf is not NUL-terminated; copy no more than
|
|
|
|
* @len bytes and let strscpy() add the missing NUL-terminator.
|
|
|
|
*/
|
|
|
|
strscpy(observed.lines[0], buf, min(len + 1, sizeof(observed.lines[0])));
|
|
|
|
nlines = 1;
|
|
|
|
} else if (nlines == 1 && (strnstr(buf, "at 0x", len) || strnstr(buf, "of 0x", len))) {
|
|
|
|
strscpy(observed.lines[nlines++], buf, min(len + 1, sizeof(observed.lines[0])));
|
|
|
|
}
|
|
|
|
|
|
|
|
WRITE_ONCE(observed.nlines, nlines); /* Publish new nlines. */
|
|
|
|
spin_unlock_irqrestore(&observed.lock, flags);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Check if a report related to the test exists. */
|
|
|
|
static bool report_available(void)
|
|
|
|
{
|
|
|
|
return READ_ONCE(observed.nlines) == ARRAY_SIZE(observed.lines);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Information we expect in a report. */
|
|
|
|
struct expect_report {
|
|
|
|
enum kfence_error_type type; /* The type or error. */
|
|
|
|
void *fn; /* Function pointer to expected function where access occurred. */
|
|
|
|
char *addr; /* Address at which the bad access occurred. */
|
|
|
|
bool is_write; /* Is access a write. */
|
|
|
|
};
|
|
|
|
|
|
|
|
static const char *get_access_type(const struct expect_report *r)
|
|
|
|
{
|
|
|
|
return r->is_write ? "write" : "read";
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Check observed report matches information in @r. */
|
|
|
|
static bool report_matches(const struct expect_report *r)
|
|
|
|
{
|
2021-07-29 03:02:52 +08:00
|
|
|
unsigned long addr = (unsigned long)r->addr;
|
2021-02-26 09:19:31 +08:00
|
|
|
bool ret = false;
|
|
|
|
unsigned long flags;
|
|
|
|
typeof(observed.lines) expect;
|
|
|
|
const char *end;
|
|
|
|
char *cur;
|
|
|
|
|
|
|
|
/* Doubled-checked locking. */
|
|
|
|
if (!report_available())
|
|
|
|
return false;
|
|
|
|
|
|
|
|
/* Generate expected report contents. */
|
|
|
|
|
|
|
|
/* Title */
|
|
|
|
cur = expect[0];
|
|
|
|
end = &expect[0][sizeof(expect[0]) - 1];
|
|
|
|
switch (r->type) {
|
|
|
|
case KFENCE_ERROR_OOB:
|
|
|
|
cur += scnprintf(cur, end - cur, "BUG: KFENCE: out-of-bounds %s",
|
|
|
|
get_access_type(r));
|
|
|
|
break;
|
|
|
|
case KFENCE_ERROR_UAF:
|
|
|
|
cur += scnprintf(cur, end - cur, "BUG: KFENCE: use-after-free %s",
|
|
|
|
get_access_type(r));
|
|
|
|
break;
|
|
|
|
case KFENCE_ERROR_CORRUPTION:
|
|
|
|
cur += scnprintf(cur, end - cur, "BUG: KFENCE: memory corruption");
|
|
|
|
break;
|
|
|
|
case KFENCE_ERROR_INVALID:
|
|
|
|
cur += scnprintf(cur, end - cur, "BUG: KFENCE: invalid %s",
|
|
|
|
get_access_type(r));
|
|
|
|
break;
|
|
|
|
case KFENCE_ERROR_INVALID_FREE:
|
|
|
|
cur += scnprintf(cur, end - cur, "BUG: KFENCE: invalid free");
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
scnprintf(cur, end - cur, " in %pS", r->fn);
|
|
|
|
/* The exact offset won't match, remove it; also strip module name. */
|
|
|
|
cur = strchr(expect[0], '+');
|
|
|
|
if (cur)
|
|
|
|
*cur = '\0';
|
|
|
|
|
|
|
|
/* Access information */
|
|
|
|
cur = expect[1];
|
|
|
|
end = &expect[1][sizeof(expect[1]) - 1];
|
|
|
|
|
|
|
|
switch (r->type) {
|
|
|
|
case KFENCE_ERROR_OOB:
|
|
|
|
cur += scnprintf(cur, end - cur, "Out-of-bounds %s at", get_access_type(r));
|
2021-07-29 03:02:52 +08:00
|
|
|
addr = arch_kfence_test_address(addr);
|
2021-02-26 09:19:31 +08:00
|
|
|
break;
|
|
|
|
case KFENCE_ERROR_UAF:
|
|
|
|
cur += scnprintf(cur, end - cur, "Use-after-free %s at", get_access_type(r));
|
2021-07-29 03:02:52 +08:00
|
|
|
addr = arch_kfence_test_address(addr);
|
2021-02-26 09:19:31 +08:00
|
|
|
break;
|
|
|
|
case KFENCE_ERROR_CORRUPTION:
|
|
|
|
cur += scnprintf(cur, end - cur, "Corrupted memory at");
|
|
|
|
break;
|
|
|
|
case KFENCE_ERROR_INVALID:
|
|
|
|
cur += scnprintf(cur, end - cur, "Invalid %s at", get_access_type(r));
|
2021-07-29 03:02:52 +08:00
|
|
|
addr = arch_kfence_test_address(addr);
|
2021-02-26 09:19:31 +08:00
|
|
|
break;
|
|
|
|
case KFENCE_ERROR_INVALID_FREE:
|
|
|
|
cur += scnprintf(cur, end - cur, "Invalid free of");
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2021-07-29 03:02:52 +08:00
|
|
|
cur += scnprintf(cur, end - cur, " 0x%p", (void *)addr);
|
2021-02-26 09:19:31 +08:00
|
|
|
|
|
|
|
spin_lock_irqsave(&observed.lock, flags);
|
|
|
|
if (!report_available())
|
|
|
|
goto out; /* A new report is being captured. */
|
|
|
|
|
|
|
|
/* Finally match expected output to what we actually observed. */
|
|
|
|
ret = strstr(observed.lines[0], expect[0]) && strstr(observed.lines[1], expect[1]);
|
|
|
|
out:
|
|
|
|
spin_unlock_irqrestore(&observed.lock, flags);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* ===== Test cases ===== */
|
|
|
|
|
|
|
|
#define TEST_PRIV_WANT_MEMCACHE ((void *)1)
|
|
|
|
|
|
|
|
/* Cache used by tests; if NULL, allocate from kmalloc instead. */
|
|
|
|
static struct kmem_cache *test_cache;
|
|
|
|
|
|
|
|
static size_t setup_test_cache(struct kunit *test, size_t size, slab_flags_t flags,
|
|
|
|
void (*ctor)(void *))
|
|
|
|
{
|
|
|
|
if (test->priv != TEST_PRIV_WANT_MEMCACHE)
|
|
|
|
return size;
|
|
|
|
|
|
|
|
kunit_info(test, "%s: size=%zu, ctor=%ps\n", __func__, size, ctor);
|
|
|
|
|
|
|
|
/*
|
mm/slab: introduce kmem_cache flag SLAB_NO_MERGE
Allow API users of kmem_cache_create to specify that they don't want
any slab merge or aliasing (with similar sized objects). Use this in
kfence_test.
The SKB (sk_buff) kmem_cache slab is critical for network performance.
Network stack uses kmem_cache_{alloc,free}_bulk APIs to gain
performance by amortising the alloc/free cost.
For the bulk API to perform efficiently the slub fragmentation need to
be low. Especially for the SLUB allocator, the efficiency of bulk free
API depend on objects belonging to the same slab (page).
When running different network performance microbenchmarks, I started
to notice that performance was reduced (slightly) when machines had
longer uptimes. I believe the cause was 'skbuff_head_cache' got
aliased/merged into the general slub for 256 bytes sized objects (with
my kernel config, without CONFIG_HARDENED_USERCOPY).
For SKB kmem_cache network stack have reasons for not merging, but it
varies depending on kernel config (e.g. CONFIG_HARDENED_USERCOPY).
We want to explicitly set SLAB_NO_MERGE for this kmem_cache.
Another use case for the flag has been described by David Sterba [1]:
> This can be used for more fine grained control over the caches or for
> debugging builds where separate slabs can verify that no objects leak.
> The slab_nomerge boot option is too coarse and would need to be
> enabled on all testing hosts. There are some other ways how to disable
> merging, e.g. a slab constructor but this disables poisoning besides
> that it adds additional overhead. Other flags are internal and may
> have other semantics.
> A concrete example what motivates the flag. During 'btrfs balance'
> slab top reported huge increase in caches like
> 1330095 1330095 100% 0.10K 34105 39 136420K Acpi-ParseExt
> 1734684 1734684 100% 0.14K 61953 28 247812K pid_namespace
> 8244036 6873075 83% 0.11K 229001 36 916004K khugepaged_mm_slot
> which was confusing and that it's because of slab merging was not the
> first idea. After rebooting with slab_nomerge all the caches were
> from btrfs_ namespace as expected.
[1] https://lore.kernel.org/all/20230524101748.30714-1-dsterba@suse.com/
[ vbabka@suse.cz: rename to SLAB_NO_MERGE, change the flag value to the
one proposed by David so it does not collide with internal SLAB/SLUB
flags, write a comment for the flag, expand changelog, drop the skbuff
part to be handled spearately ]
Link: https://lore.kernel.org/all/167396280045.539803.7540459812377220500.stgit@firesoul/
Reported-by: David Sterba <dsterba@suse.com>
Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Jesper Dangaard Brouer <brouer@redhat.com>
Acked-by: Roman Gushchin <roman.gushchin@linux.dev>
2023-01-17 21:40:00 +08:00
|
|
|
* Use SLAB_NO_MERGE to prevent merging with existing caches.
|
|
|
|
* Use SLAB_ACCOUNT to allocate via memcg, if enabled.
|
2021-02-26 09:19:31 +08:00
|
|
|
*/
|
mm/slab: introduce kmem_cache flag SLAB_NO_MERGE
Allow API users of kmem_cache_create to specify that they don't want
any slab merge or aliasing (with similar sized objects). Use this in
kfence_test.
The SKB (sk_buff) kmem_cache slab is critical for network performance.
Network stack uses kmem_cache_{alloc,free}_bulk APIs to gain
performance by amortising the alloc/free cost.
For the bulk API to perform efficiently the slub fragmentation need to
be low. Especially for the SLUB allocator, the efficiency of bulk free
API depend on objects belonging to the same slab (page).
When running different network performance microbenchmarks, I started
to notice that performance was reduced (slightly) when machines had
longer uptimes. I believe the cause was 'skbuff_head_cache' got
aliased/merged into the general slub for 256 bytes sized objects (with
my kernel config, without CONFIG_HARDENED_USERCOPY).
For SKB kmem_cache network stack have reasons for not merging, but it
varies depending on kernel config (e.g. CONFIG_HARDENED_USERCOPY).
We want to explicitly set SLAB_NO_MERGE for this kmem_cache.
Another use case for the flag has been described by David Sterba [1]:
> This can be used for more fine grained control over the caches or for
> debugging builds where separate slabs can verify that no objects leak.
> The slab_nomerge boot option is too coarse and would need to be
> enabled on all testing hosts. There are some other ways how to disable
> merging, e.g. a slab constructor but this disables poisoning besides
> that it adds additional overhead. Other flags are internal and may
> have other semantics.
> A concrete example what motivates the flag. During 'btrfs balance'
> slab top reported huge increase in caches like
> 1330095 1330095 100% 0.10K 34105 39 136420K Acpi-ParseExt
> 1734684 1734684 100% 0.14K 61953 28 247812K pid_namespace
> 8244036 6873075 83% 0.11K 229001 36 916004K khugepaged_mm_slot
> which was confusing and that it's because of slab merging was not the
> first idea. After rebooting with slab_nomerge all the caches were
> from btrfs_ namespace as expected.
[1] https://lore.kernel.org/all/20230524101748.30714-1-dsterba@suse.com/
[ vbabka@suse.cz: rename to SLAB_NO_MERGE, change the flag value to the
one proposed by David so it does not collide with internal SLAB/SLUB
flags, write a comment for the flag, expand changelog, drop the skbuff
part to be handled spearately ]
Link: https://lore.kernel.org/all/167396280045.539803.7540459812377220500.stgit@firesoul/
Reported-by: David Sterba <dsterba@suse.com>
Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Jesper Dangaard Brouer <brouer@redhat.com>
Acked-by: Roman Gushchin <roman.gushchin@linux.dev>
2023-01-17 21:40:00 +08:00
|
|
|
flags |= SLAB_NO_MERGE | SLAB_ACCOUNT;
|
2021-02-26 09:19:31 +08:00
|
|
|
test_cache = kmem_cache_create("test", size, 1, flags, ctor);
|
|
|
|
KUNIT_ASSERT_TRUE_MSG(test, test_cache, "could not create cache");
|
|
|
|
|
|
|
|
return size;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void test_cache_destroy(void)
|
|
|
|
{
|
|
|
|
if (!test_cache)
|
|
|
|
return;
|
|
|
|
|
|
|
|
kmem_cache_destroy(test_cache);
|
|
|
|
test_cache = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline size_t kmalloc_cache_alignment(size_t size)
|
|
|
|
{
|
Randomized slab caches for kmalloc()
When exploiting memory vulnerabilities, "heap spraying" is a common
technique targeting those related to dynamic memory allocation (i.e. the
"heap"), and it plays an important role in a successful exploitation.
Basically, it is to overwrite the memory area of vulnerable object by
triggering allocation in other subsystems or modules and therefore
getting a reference to the targeted memory location. It's usable on
various types of vulnerablity including use after free (UAF), heap out-
of-bound write and etc.
There are (at least) two reasons why the heap can be sprayed: 1) generic
slab caches are shared among different subsystems and modules, and
2) dedicated slab caches could be merged with the generic ones.
Currently these two factors cannot be prevented at a low cost: the first
one is a widely used memory allocation mechanism, and shutting down slab
merging completely via `slub_nomerge` would be overkill.
To efficiently prevent heap spraying, we propose the following approach:
to create multiple copies of generic slab caches that will never be
merged, and random one of them will be used at allocation. The random
selection is based on the address of code that calls `kmalloc()`, which
means it is static at runtime (rather than dynamically determined at
each time of allocation, which could be bypassed by repeatedly spraying
in brute force). In other words, the randomness of cache selection will
be with respect to the code address rather than time, i.e. allocations
in different code paths would most likely pick different caches,
although kmalloc() at each place would use the same cache copy whenever
it is executed. In this way, the vulnerable object and memory allocated
in other subsystems and modules will (most probably) be on different
slab caches, which prevents the object from being sprayed.
Meanwhile, the static random selection is further enhanced with a
per-boot random seed, which prevents the attacker from finding a usable
kmalloc that happens to pick the same cache with the vulnerable
subsystem/module by analyzing the open source code. In other words, with
the per-boot seed, the random selection is static during each time the
system starts and runs, but not across different system startups.
The overhead of performance has been tested on a 40-core x86 server by
comparing the results of `perf bench all` between the kernels with and
without this patch based on the latest linux-next kernel, which shows
minor difference. A subset of benchmarks are listed below:
sched/ sched/ syscall/ mem/ mem/
messaging pipe basic memcpy memset
(sec) (sec) (sec) (GB/sec) (GB/sec)
control1 0.019 5.459 0.733 15.258789 51.398026
control2 0.019 5.439 0.730 16.009221 48.828125
control3 0.019 5.282 0.735 16.009221 48.828125
control_avg 0.019 5.393 0.733 15.759077 49.684759
experiment1 0.019 5.374 0.741 15.500992 46.502976
experiment2 0.019 5.440 0.746 16.276042 51.398026
experiment3 0.019 5.242 0.752 15.258789 51.398026
experiment_avg 0.019 5.352 0.746 15.678608 49.766343
The overhead of memory usage was measured by executing `free` after boot
on a QEMU VM with 1GB total memory, and as expected, it's positively
correlated with # of cache copies:
control 4 copies 8 copies 16 copies
total 969.8M 968.2M 968.2M 968.2M
used 20.0M 21.9M 24.1M 26.7M
free 936.9M 933.6M 931.4M 928.6M
available 932.2M 928.8M 926.6M 923.9M
Co-developed-by: Xiu Jianfeng <xiujianfeng@huawei.com>
Signed-off-by: Xiu Jianfeng <xiujianfeng@huawei.com>
Signed-off-by: GONG, Ruiqi <gongruiqi@huaweicloud.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Hyeonggon Yoo <42.hyeyoo@gmail.com>
Acked-by: Dennis Zhou <dennis@kernel.org> # percpu
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
2023-07-14 14:44:22 +08:00
|
|
|
/* just to get ->align so no need to pass in the real caller */
|
|
|
|
enum kmalloc_cache_type type = kmalloc_type(GFP_KERNEL, 0);
|
|
|
|
return kmalloc_caches[type][__kmalloc_index(size, false)]->align;
|
2021-02-26 09:19:31 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Must always inline to match stack trace against caller. */
|
|
|
|
static __always_inline void test_free(void *ptr)
|
|
|
|
{
|
|
|
|
if (test_cache)
|
|
|
|
kmem_cache_free(test_cache, ptr);
|
|
|
|
else
|
|
|
|
kfree(ptr);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If this should be a KFENCE allocation, and on which side the allocation and
|
|
|
|
* the closest guard page should be.
|
|
|
|
*/
|
|
|
|
enum allocation_policy {
|
|
|
|
ALLOCATE_ANY, /* KFENCE, any side. */
|
|
|
|
ALLOCATE_LEFT, /* KFENCE, left side of page. */
|
|
|
|
ALLOCATE_RIGHT, /* KFENCE, right side of page. */
|
|
|
|
ALLOCATE_NONE, /* No KFENCE allocation. */
|
|
|
|
};
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Try to get a guarded allocation from KFENCE. Uses either kmalloc() or the
|
|
|
|
* current test_cache if set up.
|
|
|
|
*/
|
|
|
|
static void *test_alloc(struct kunit *test, size_t size, gfp_t gfp, enum allocation_policy policy)
|
|
|
|
{
|
|
|
|
void *alloc;
|
|
|
|
unsigned long timeout, resched_after;
|
|
|
|
const char *policy_name;
|
|
|
|
|
|
|
|
switch (policy) {
|
|
|
|
case ALLOCATE_ANY:
|
|
|
|
policy_name = "any";
|
|
|
|
break;
|
|
|
|
case ALLOCATE_LEFT:
|
|
|
|
policy_name = "left";
|
|
|
|
break;
|
|
|
|
case ALLOCATE_RIGHT:
|
|
|
|
policy_name = "right";
|
|
|
|
break;
|
|
|
|
case ALLOCATE_NONE:
|
|
|
|
policy_name = "none";
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
kunit_info(test, "%s: size=%zu, gfp=%x, policy=%s, cache=%i\n", __func__, size, gfp,
|
|
|
|
policy_name, !!test_cache);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* 100x the sample interval should be more than enough to ensure we get
|
|
|
|
* a KFENCE allocation eventually.
|
|
|
|
*/
|
2022-02-12 08:32:35 +08:00
|
|
|
timeout = jiffies + msecs_to_jiffies(100 * kfence_sample_interval);
|
2021-02-26 09:19:31 +08:00
|
|
|
/*
|
|
|
|
* Especially for non-preemption kernels, ensure the allocation-gate
|
|
|
|
* timer can catch up: after @resched_after, every failed allocation
|
|
|
|
* attempt yields, to ensure the allocation-gate timer is scheduled.
|
|
|
|
*/
|
2022-02-12 08:32:35 +08:00
|
|
|
resched_after = jiffies + msecs_to_jiffies(kfence_sample_interval);
|
2021-02-26 09:19:31 +08:00
|
|
|
do {
|
|
|
|
if (test_cache)
|
|
|
|
alloc = kmem_cache_alloc(test_cache, gfp);
|
|
|
|
else
|
|
|
|
alloc = kmalloc(size, gfp);
|
|
|
|
|
|
|
|
if (is_kfence_address(alloc)) {
|
2021-11-04 01:19:48 +08:00
|
|
|
struct slab *slab = virt_to_slab(alloc);
|
Randomized slab caches for kmalloc()
When exploiting memory vulnerabilities, "heap spraying" is a common
technique targeting those related to dynamic memory allocation (i.e. the
"heap"), and it plays an important role in a successful exploitation.
Basically, it is to overwrite the memory area of vulnerable object by
triggering allocation in other subsystems or modules and therefore
getting a reference to the targeted memory location. It's usable on
various types of vulnerablity including use after free (UAF), heap out-
of-bound write and etc.
There are (at least) two reasons why the heap can be sprayed: 1) generic
slab caches are shared among different subsystems and modules, and
2) dedicated slab caches could be merged with the generic ones.
Currently these two factors cannot be prevented at a low cost: the first
one is a widely used memory allocation mechanism, and shutting down slab
merging completely via `slub_nomerge` would be overkill.
To efficiently prevent heap spraying, we propose the following approach:
to create multiple copies of generic slab caches that will never be
merged, and random one of them will be used at allocation. The random
selection is based on the address of code that calls `kmalloc()`, which
means it is static at runtime (rather than dynamically determined at
each time of allocation, which could be bypassed by repeatedly spraying
in brute force). In other words, the randomness of cache selection will
be with respect to the code address rather than time, i.e. allocations
in different code paths would most likely pick different caches,
although kmalloc() at each place would use the same cache copy whenever
it is executed. In this way, the vulnerable object and memory allocated
in other subsystems and modules will (most probably) be on different
slab caches, which prevents the object from being sprayed.
Meanwhile, the static random selection is further enhanced with a
per-boot random seed, which prevents the attacker from finding a usable
kmalloc that happens to pick the same cache with the vulnerable
subsystem/module by analyzing the open source code. In other words, with
the per-boot seed, the random selection is static during each time the
system starts and runs, but not across different system startups.
The overhead of performance has been tested on a 40-core x86 server by
comparing the results of `perf bench all` between the kernels with and
without this patch based on the latest linux-next kernel, which shows
minor difference. A subset of benchmarks are listed below:
sched/ sched/ syscall/ mem/ mem/
messaging pipe basic memcpy memset
(sec) (sec) (sec) (GB/sec) (GB/sec)
control1 0.019 5.459 0.733 15.258789 51.398026
control2 0.019 5.439 0.730 16.009221 48.828125
control3 0.019 5.282 0.735 16.009221 48.828125
control_avg 0.019 5.393 0.733 15.759077 49.684759
experiment1 0.019 5.374 0.741 15.500992 46.502976
experiment2 0.019 5.440 0.746 16.276042 51.398026
experiment3 0.019 5.242 0.752 15.258789 51.398026
experiment_avg 0.019 5.352 0.746 15.678608 49.766343
The overhead of memory usage was measured by executing `free` after boot
on a QEMU VM with 1GB total memory, and as expected, it's positively
correlated with # of cache copies:
control 4 copies 8 copies 16 copies
total 969.8M 968.2M 968.2M 968.2M
used 20.0M 21.9M 24.1M 26.7M
free 936.9M 933.6M 931.4M 928.6M
available 932.2M 928.8M 926.6M 923.9M
Co-developed-by: Xiu Jianfeng <xiujianfeng@huawei.com>
Signed-off-by: Xiu Jianfeng <xiujianfeng@huawei.com>
Signed-off-by: GONG, Ruiqi <gongruiqi@huaweicloud.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Hyeonggon Yoo <42.hyeyoo@gmail.com>
Acked-by: Dennis Zhou <dennis@kernel.org> # percpu
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
2023-07-14 14:44:22 +08:00
|
|
|
enum kmalloc_cache_type type = kmalloc_type(GFP_KERNEL, _RET_IP_);
|
2021-06-29 10:34:39 +08:00
|
|
|
struct kmem_cache *s = test_cache ?:
|
Randomized slab caches for kmalloc()
When exploiting memory vulnerabilities, "heap spraying" is a common
technique targeting those related to dynamic memory allocation (i.e. the
"heap"), and it plays an important role in a successful exploitation.
Basically, it is to overwrite the memory area of vulnerable object by
triggering allocation in other subsystems or modules and therefore
getting a reference to the targeted memory location. It's usable on
various types of vulnerablity including use after free (UAF), heap out-
of-bound write and etc.
There are (at least) two reasons why the heap can be sprayed: 1) generic
slab caches are shared among different subsystems and modules, and
2) dedicated slab caches could be merged with the generic ones.
Currently these two factors cannot be prevented at a low cost: the first
one is a widely used memory allocation mechanism, and shutting down slab
merging completely via `slub_nomerge` would be overkill.
To efficiently prevent heap spraying, we propose the following approach:
to create multiple copies of generic slab caches that will never be
merged, and random one of them will be used at allocation. The random
selection is based on the address of code that calls `kmalloc()`, which
means it is static at runtime (rather than dynamically determined at
each time of allocation, which could be bypassed by repeatedly spraying
in brute force). In other words, the randomness of cache selection will
be with respect to the code address rather than time, i.e. allocations
in different code paths would most likely pick different caches,
although kmalloc() at each place would use the same cache copy whenever
it is executed. In this way, the vulnerable object and memory allocated
in other subsystems and modules will (most probably) be on different
slab caches, which prevents the object from being sprayed.
Meanwhile, the static random selection is further enhanced with a
per-boot random seed, which prevents the attacker from finding a usable
kmalloc that happens to pick the same cache with the vulnerable
subsystem/module by analyzing the open source code. In other words, with
the per-boot seed, the random selection is static during each time the
system starts and runs, but not across different system startups.
The overhead of performance has been tested on a 40-core x86 server by
comparing the results of `perf bench all` between the kernels with and
without this patch based on the latest linux-next kernel, which shows
minor difference. A subset of benchmarks are listed below:
sched/ sched/ syscall/ mem/ mem/
messaging pipe basic memcpy memset
(sec) (sec) (sec) (GB/sec) (GB/sec)
control1 0.019 5.459 0.733 15.258789 51.398026
control2 0.019 5.439 0.730 16.009221 48.828125
control3 0.019 5.282 0.735 16.009221 48.828125
control_avg 0.019 5.393 0.733 15.759077 49.684759
experiment1 0.019 5.374 0.741 15.500992 46.502976
experiment2 0.019 5.440 0.746 16.276042 51.398026
experiment3 0.019 5.242 0.752 15.258789 51.398026
experiment_avg 0.019 5.352 0.746 15.678608 49.766343
The overhead of memory usage was measured by executing `free` after boot
on a QEMU VM with 1GB total memory, and as expected, it's positively
correlated with # of cache copies:
control 4 copies 8 copies 16 copies
total 969.8M 968.2M 968.2M 968.2M
used 20.0M 21.9M 24.1M 26.7M
free 936.9M 933.6M 931.4M 928.6M
available 932.2M 928.8M 926.6M 923.9M
Co-developed-by: Xiu Jianfeng <xiujianfeng@huawei.com>
Signed-off-by: Xiu Jianfeng <xiujianfeng@huawei.com>
Signed-off-by: GONG, Ruiqi <gongruiqi@huaweicloud.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Hyeonggon Yoo <42.hyeyoo@gmail.com>
Acked-by: Dennis Zhou <dennis@kernel.org> # percpu
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
2023-07-14 14:44:22 +08:00
|
|
|
kmalloc_caches[type][__kmalloc_index(size, false)];
|
2021-02-26 09:19:31 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Verify that various helpers return the right values
|
|
|
|
* even for KFENCE objects; these are required so that
|
|
|
|
* memcg accounting works correctly.
|
|
|
|
*/
|
2021-11-04 01:19:48 +08:00
|
|
|
KUNIT_EXPECT_EQ(test, obj_to_index(s, slab, alloc), 0U);
|
|
|
|
KUNIT_EXPECT_EQ(test, objs_per_slab(s, slab), 1);
|
2021-02-26 09:19:31 +08:00
|
|
|
|
|
|
|
if (policy == ALLOCATE_ANY)
|
|
|
|
return alloc;
|
2022-05-20 10:18:33 +08:00
|
|
|
if (policy == ALLOCATE_LEFT && PAGE_ALIGNED(alloc))
|
2021-02-26 09:19:31 +08:00
|
|
|
return alloc;
|
2022-05-20 10:18:33 +08:00
|
|
|
if (policy == ALLOCATE_RIGHT && !PAGE_ALIGNED(alloc))
|
2021-02-26 09:19:31 +08:00
|
|
|
return alloc;
|
|
|
|
} else if (policy == ALLOCATE_NONE)
|
|
|
|
return alloc;
|
|
|
|
|
|
|
|
test_free(alloc);
|
|
|
|
|
|
|
|
if (time_after(jiffies, resched_after))
|
|
|
|
cond_resched();
|
|
|
|
} while (time_before(jiffies, timeout));
|
|
|
|
|
|
|
|
KUNIT_ASSERT_TRUE_MSG(test, false, "failed to allocate from KFENCE");
|
|
|
|
return NULL; /* Unreachable. */
|
|
|
|
}
|
|
|
|
|
|
|
|
static void test_out_of_bounds_read(struct kunit *test)
|
|
|
|
{
|
|
|
|
size_t size = 32;
|
|
|
|
struct expect_report expect = {
|
|
|
|
.type = KFENCE_ERROR_OOB,
|
|
|
|
.fn = test_out_of_bounds_read,
|
|
|
|
.is_write = false,
|
|
|
|
};
|
|
|
|
char *buf;
|
|
|
|
|
|
|
|
setup_test_cache(test, size, 0, NULL);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If we don't have our own cache, adjust based on alignment, so that we
|
|
|
|
* actually access guard pages on either side.
|
|
|
|
*/
|
|
|
|
if (!test_cache)
|
|
|
|
size = kmalloc_cache_alignment(size);
|
|
|
|
|
|
|
|
/* Test both sides. */
|
|
|
|
|
|
|
|
buf = test_alloc(test, size, GFP_KERNEL, ALLOCATE_LEFT);
|
|
|
|
expect.addr = buf - 1;
|
|
|
|
READ_ONCE(*expect.addr);
|
|
|
|
KUNIT_EXPECT_TRUE(test, report_matches(&expect));
|
|
|
|
test_free(buf);
|
|
|
|
|
|
|
|
buf = test_alloc(test, size, GFP_KERNEL, ALLOCATE_RIGHT);
|
|
|
|
expect.addr = buf + size;
|
|
|
|
READ_ONCE(*expect.addr);
|
|
|
|
KUNIT_EXPECT_TRUE(test, report_matches(&expect));
|
|
|
|
test_free(buf);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void test_out_of_bounds_write(struct kunit *test)
|
|
|
|
{
|
|
|
|
size_t size = 32;
|
|
|
|
struct expect_report expect = {
|
|
|
|
.type = KFENCE_ERROR_OOB,
|
|
|
|
.fn = test_out_of_bounds_write,
|
|
|
|
.is_write = true,
|
|
|
|
};
|
|
|
|
char *buf;
|
|
|
|
|
|
|
|
setup_test_cache(test, size, 0, NULL);
|
|
|
|
buf = test_alloc(test, size, GFP_KERNEL, ALLOCATE_LEFT);
|
|
|
|
expect.addr = buf - 1;
|
|
|
|
WRITE_ONCE(*expect.addr, 42);
|
|
|
|
KUNIT_EXPECT_TRUE(test, report_matches(&expect));
|
|
|
|
test_free(buf);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void test_use_after_free_read(struct kunit *test)
|
|
|
|
{
|
|
|
|
const size_t size = 32;
|
|
|
|
struct expect_report expect = {
|
|
|
|
.type = KFENCE_ERROR_UAF,
|
|
|
|
.fn = test_use_after_free_read,
|
|
|
|
.is_write = false,
|
|
|
|
};
|
|
|
|
|
|
|
|
setup_test_cache(test, size, 0, NULL);
|
|
|
|
expect.addr = test_alloc(test, size, GFP_KERNEL, ALLOCATE_ANY);
|
|
|
|
test_free(expect.addr);
|
|
|
|
READ_ONCE(*expect.addr);
|
|
|
|
KUNIT_EXPECT_TRUE(test, report_matches(&expect));
|
|
|
|
}
|
|
|
|
|
|
|
|
static void test_double_free(struct kunit *test)
|
|
|
|
{
|
|
|
|
const size_t size = 32;
|
|
|
|
struct expect_report expect = {
|
|
|
|
.type = KFENCE_ERROR_INVALID_FREE,
|
|
|
|
.fn = test_double_free,
|
|
|
|
};
|
|
|
|
|
|
|
|
setup_test_cache(test, size, 0, NULL);
|
|
|
|
expect.addr = test_alloc(test, size, GFP_KERNEL, ALLOCATE_ANY);
|
|
|
|
test_free(expect.addr);
|
|
|
|
test_free(expect.addr); /* Double-free. */
|
|
|
|
KUNIT_EXPECT_TRUE(test, report_matches(&expect));
|
|
|
|
}
|
|
|
|
|
|
|
|
static void test_invalid_addr_free(struct kunit *test)
|
|
|
|
{
|
|
|
|
const size_t size = 32;
|
|
|
|
struct expect_report expect = {
|
|
|
|
.type = KFENCE_ERROR_INVALID_FREE,
|
|
|
|
.fn = test_invalid_addr_free,
|
|
|
|
};
|
|
|
|
char *buf;
|
|
|
|
|
|
|
|
setup_test_cache(test, size, 0, NULL);
|
|
|
|
buf = test_alloc(test, size, GFP_KERNEL, ALLOCATE_ANY);
|
|
|
|
expect.addr = buf + 1; /* Free on invalid address. */
|
|
|
|
test_free(expect.addr); /* Invalid address free. */
|
|
|
|
test_free(buf); /* No error. */
|
|
|
|
KUNIT_EXPECT_TRUE(test, report_matches(&expect));
|
|
|
|
}
|
|
|
|
|
|
|
|
static void test_corruption(struct kunit *test)
|
|
|
|
{
|
|
|
|
size_t size = 32;
|
|
|
|
struct expect_report expect = {
|
|
|
|
.type = KFENCE_ERROR_CORRUPTION,
|
|
|
|
.fn = test_corruption,
|
|
|
|
};
|
|
|
|
char *buf;
|
|
|
|
|
|
|
|
setup_test_cache(test, size, 0, NULL);
|
|
|
|
|
|
|
|
/* Test both sides. */
|
|
|
|
|
|
|
|
buf = test_alloc(test, size, GFP_KERNEL, ALLOCATE_LEFT);
|
|
|
|
expect.addr = buf + size;
|
|
|
|
WRITE_ONCE(*expect.addr, 42);
|
|
|
|
test_free(buf);
|
|
|
|
KUNIT_EXPECT_TRUE(test, report_matches(&expect));
|
|
|
|
|
|
|
|
buf = test_alloc(test, size, GFP_KERNEL, ALLOCATE_RIGHT);
|
|
|
|
expect.addr = buf - 1;
|
|
|
|
WRITE_ONCE(*expect.addr, 42);
|
|
|
|
test_free(buf);
|
|
|
|
KUNIT_EXPECT_TRUE(test, report_matches(&expect));
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* KFENCE is unable to detect an OOB if the allocation's alignment requirements
|
|
|
|
* leave a gap between the object and the guard page. Specifically, an
|
|
|
|
* allocation of e.g. 73 bytes is aligned on 8 and 128 bytes for SLUB or SLAB
|
|
|
|
* respectively. Therefore it is impossible for the allocated object to
|
|
|
|
* contiguously line up with the right guard page.
|
|
|
|
*
|
|
|
|
* However, we test that an access to memory beyond the gap results in KFENCE
|
|
|
|
* detecting an OOB access.
|
|
|
|
*/
|
|
|
|
static void test_kmalloc_aligned_oob_read(struct kunit *test)
|
|
|
|
{
|
|
|
|
const size_t size = 73;
|
|
|
|
const size_t align = kmalloc_cache_alignment(size);
|
|
|
|
struct expect_report expect = {
|
|
|
|
.type = KFENCE_ERROR_OOB,
|
|
|
|
.fn = test_kmalloc_aligned_oob_read,
|
|
|
|
.is_write = false,
|
|
|
|
};
|
|
|
|
char *buf;
|
|
|
|
|
|
|
|
buf = test_alloc(test, size, GFP_KERNEL, ALLOCATE_RIGHT);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The object is offset to the right, so there won't be an OOB to the
|
|
|
|
* left of it.
|
|
|
|
*/
|
|
|
|
READ_ONCE(*(buf - 1));
|
|
|
|
KUNIT_EXPECT_FALSE(test, report_available());
|
|
|
|
|
|
|
|
/*
|
|
|
|
* @buf must be aligned on @align, therefore buf + size belongs to the
|
|
|
|
* same page -> no OOB.
|
|
|
|
*/
|
|
|
|
READ_ONCE(*(buf + size));
|
|
|
|
KUNIT_EXPECT_FALSE(test, report_available());
|
|
|
|
|
|
|
|
/* Overflowing by @align bytes will result in an OOB. */
|
|
|
|
expect.addr = buf + size + align;
|
|
|
|
READ_ONCE(*expect.addr);
|
|
|
|
KUNIT_EXPECT_TRUE(test, report_matches(&expect));
|
|
|
|
|
|
|
|
test_free(buf);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void test_kmalloc_aligned_oob_write(struct kunit *test)
|
|
|
|
{
|
|
|
|
const size_t size = 73;
|
|
|
|
struct expect_report expect = {
|
|
|
|
.type = KFENCE_ERROR_CORRUPTION,
|
|
|
|
.fn = test_kmalloc_aligned_oob_write,
|
|
|
|
};
|
|
|
|
char *buf;
|
|
|
|
|
|
|
|
buf = test_alloc(test, size, GFP_KERNEL, ALLOCATE_RIGHT);
|
|
|
|
/*
|
|
|
|
* The object is offset to the right, so we won't get a page
|
|
|
|
* fault immediately after it.
|
|
|
|
*/
|
|
|
|
expect.addr = buf + size;
|
|
|
|
WRITE_ONCE(*expect.addr, READ_ONCE(*expect.addr) + 1);
|
|
|
|
KUNIT_EXPECT_FALSE(test, report_available());
|
|
|
|
test_free(buf);
|
|
|
|
KUNIT_EXPECT_TRUE(test, report_matches(&expect));
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Test cache shrinking and destroying with KFENCE. */
|
|
|
|
static void test_shrink_memcache(struct kunit *test)
|
|
|
|
{
|
|
|
|
const size_t size = 32;
|
|
|
|
void *buf;
|
|
|
|
|
|
|
|
setup_test_cache(test, size, 0, NULL);
|
|
|
|
KUNIT_EXPECT_TRUE(test, test_cache);
|
|
|
|
buf = test_alloc(test, size, GFP_KERNEL, ALLOCATE_ANY);
|
|
|
|
kmem_cache_shrink(test_cache);
|
|
|
|
test_free(buf);
|
|
|
|
|
|
|
|
KUNIT_EXPECT_FALSE(test, report_available());
|
|
|
|
}
|
|
|
|
|
|
|
|
static void ctor_set_x(void *obj)
|
|
|
|
{
|
|
|
|
/* Every object has at least 8 bytes. */
|
|
|
|
memset(obj, 'x', 8);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Ensure that SL*B does not modify KFENCE objects on bulk free. */
|
|
|
|
static void test_free_bulk(struct kunit *test)
|
|
|
|
{
|
|
|
|
int iter;
|
|
|
|
|
|
|
|
for (iter = 0; iter < 5; iter++) {
|
2022-10-10 10:44:02 +08:00
|
|
|
const size_t size = setup_test_cache(test, get_random_u32_inclusive(8, 307),
|
2022-10-10 10:44:02 +08:00
|
|
|
0, (iter & 1) ? ctor_set_x : NULL);
|
2021-02-26 09:19:31 +08:00
|
|
|
void *objects[] = {
|
|
|
|
test_alloc(test, size, GFP_KERNEL, ALLOCATE_RIGHT),
|
|
|
|
test_alloc(test, size, GFP_KERNEL, ALLOCATE_NONE),
|
|
|
|
test_alloc(test, size, GFP_KERNEL, ALLOCATE_LEFT),
|
|
|
|
test_alloc(test, size, GFP_KERNEL, ALLOCATE_NONE),
|
|
|
|
test_alloc(test, size, GFP_KERNEL, ALLOCATE_NONE),
|
|
|
|
};
|
|
|
|
|
|
|
|
kmem_cache_free_bulk(test_cache, ARRAY_SIZE(objects), objects);
|
|
|
|
KUNIT_ASSERT_FALSE(test, report_available());
|
|
|
|
test_cache_destroy();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Test init-on-free works. */
|
|
|
|
static void test_init_on_free(struct kunit *test)
|
|
|
|
{
|
|
|
|
const size_t size = 32;
|
|
|
|
struct expect_report expect = {
|
|
|
|
.type = KFENCE_ERROR_UAF,
|
|
|
|
.fn = test_init_on_free,
|
|
|
|
.is_write = false,
|
|
|
|
};
|
|
|
|
int i;
|
|
|
|
|
2021-11-06 04:45:40 +08:00
|
|
|
KFENCE_TEST_REQUIRES(test, IS_ENABLED(CONFIG_INIT_ON_FREE_DEFAULT_ON));
|
2021-02-26 09:19:31 +08:00
|
|
|
/* Assume it hasn't been disabled on command line. */
|
|
|
|
|
|
|
|
setup_test_cache(test, size, 0, NULL);
|
|
|
|
expect.addr = test_alloc(test, size, GFP_KERNEL, ALLOCATE_ANY);
|
|
|
|
for (i = 0; i < size; i++)
|
|
|
|
expect.addr[i] = i + 1;
|
|
|
|
test_free(expect.addr);
|
|
|
|
|
|
|
|
for (i = 0; i < size; i++) {
|
|
|
|
/*
|
|
|
|
* This may fail if the page was recycled by KFENCE and then
|
|
|
|
* written to again -- this however, is near impossible with a
|
|
|
|
* default config.
|
|
|
|
*/
|
|
|
|
KUNIT_EXPECT_EQ(test, expect.addr[i], (char)0);
|
|
|
|
|
|
|
|
if (!i) /* Only check first access to not fail test if page is ever re-protected. */
|
|
|
|
KUNIT_EXPECT_TRUE(test, report_matches(&expect));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Ensure that constructors work properly. */
|
|
|
|
static void test_memcache_ctor(struct kunit *test)
|
|
|
|
{
|
|
|
|
const size_t size = 32;
|
|
|
|
char *buf;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
setup_test_cache(test, size, 0, ctor_set_x);
|
|
|
|
buf = test_alloc(test, size, GFP_KERNEL, ALLOCATE_ANY);
|
|
|
|
|
|
|
|
for (i = 0; i < 8; i++)
|
|
|
|
KUNIT_EXPECT_EQ(test, buf[i], (char)'x');
|
|
|
|
|
|
|
|
test_free(buf);
|
|
|
|
|
|
|
|
KUNIT_EXPECT_FALSE(test, report_available());
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Test that memory is zeroed if requested. */
|
|
|
|
static void test_gfpzero(struct kunit *test)
|
|
|
|
{
|
|
|
|
const size_t size = PAGE_SIZE; /* PAGE_SIZE so we can use ALLOCATE_ANY. */
|
|
|
|
char *buf1, *buf2;
|
|
|
|
int i;
|
|
|
|
|
2021-11-06 04:45:40 +08:00
|
|
|
/* Skip if we think it'd take too long. */
|
2022-02-12 08:32:35 +08:00
|
|
|
KFENCE_TEST_REQUIRES(test, kfence_sample_interval <= 100);
|
2021-02-26 09:19:31 +08:00
|
|
|
|
|
|
|
setup_test_cache(test, size, 0, NULL);
|
|
|
|
buf1 = test_alloc(test, size, GFP_KERNEL, ALLOCATE_ANY);
|
|
|
|
for (i = 0; i < size; i++)
|
|
|
|
buf1[i] = i + 1;
|
|
|
|
test_free(buf1);
|
|
|
|
|
|
|
|
/* Try to get same address again -- this can take a while. */
|
|
|
|
for (i = 0;; i++) {
|
|
|
|
buf2 = test_alloc(test, size, GFP_KERNEL | __GFP_ZERO, ALLOCATE_ANY);
|
|
|
|
if (buf1 == buf2)
|
|
|
|
break;
|
|
|
|
test_free(buf2);
|
|
|
|
|
kunit: fix UAF when run kfence test case test_gfpzero
Patch series "kunit: fix a UAF bug and do some optimization", v2.
This series is to fix UAF (use after free) when running kfence test case
test_gfpzero, which is time costly. This UAF bug can be easily triggered
by setting CONFIG_KFENCE_NUM_OBJECTS = 65535. Furthermore, some
optimization for kunit tests has been done.
This patch (of 3):
Kunit will create a new thread to run an actual test case, and the main
process will wait for the completion of the actual test thread until
overtime. The variable "struct kunit test" has local property in function
kunit_try_catch_run, and will be used in the test case thread. Task
kunit_try_catch_run will free "struct kunit test" when kunit runs
overtime, but the actual test case is still run and an UAF bug will be
triggered.
The above problem has been both observed in a physical machine and qemu
platform when running kfence kunit tests. The problem can be triggered
when setting CONFIG_KFENCE_NUM_OBJECTS = 65535. Under this setting, the
test case test_gfpzero will cost hours and kunit will run to overtime.
The follows show the panic log.
BUG: unable to handle page fault for address: ffffffff82d882e9
Call Trace:
kunit_log_append+0x58/0xd0
...
test_alloc.constprop.0.cold+0x6b/0x8a [kfence_test]
test_gfpzero.cold+0x61/0x8ab [kfence_test]
kunit_try_run_case+0x4c/0x70
kunit_generic_run_threadfn_adapter+0x11/0x20
kthread+0x166/0x190
ret_from_fork+0x22/0x30
Kernel panic - not syncing: Fatal exception
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS
Ubuntu-1.8.2-1ubuntu1 04/01/2014
To solve this problem, the test case thread should be stopped when the
kunit frame runs overtime. The stop signal will send in function
kunit_try_catch_run, and test_gfpzero will handle it.
Link: https://lkml.kernel.org/r/20220309083753.1561921-1-liupeng256@huawei.com
Link: https://lkml.kernel.org/r/20220309083753.1561921-2-liupeng256@huawei.com
Signed-off-by: Peng Liu <liupeng256@huawei.com>
Reviewed-by: Marco Elver <elver@google.com>
Reviewed-by: Brendan Higgins <brendanhiggins@google.com>
Tested-by: Brendan Higgins <brendanhiggins@google.com>
Cc: Alexander Potapenko <glider@google.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Wang Kefeng <wangkefeng.wang@huawei.com>
Cc: Daniel Latypov <dlatypov@google.com>
Cc: David Gow <davidgow@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2022-03-23 05:48:16 +08:00
|
|
|
if (kthread_should_stop() || (i == CONFIG_KFENCE_NUM_OBJECTS)) {
|
2021-02-26 09:19:31 +08:00
|
|
|
kunit_warn(test, "giving up ... cannot get same object back\n");
|
|
|
|
return;
|
|
|
|
}
|
2022-03-23 05:48:22 +08:00
|
|
|
cond_resched();
|
2021-02-26 09:19:31 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
for (i = 0; i < size; i++)
|
|
|
|
KUNIT_EXPECT_EQ(test, buf2[i], (char)0);
|
|
|
|
|
|
|
|
test_free(buf2);
|
|
|
|
|
|
|
|
KUNIT_EXPECT_FALSE(test, report_available());
|
|
|
|
}
|
|
|
|
|
|
|
|
static void test_invalid_access(struct kunit *test)
|
|
|
|
{
|
|
|
|
const struct expect_report expect = {
|
|
|
|
.type = KFENCE_ERROR_INVALID,
|
|
|
|
.fn = test_invalid_access,
|
|
|
|
.addr = &__kfence_pool[10],
|
|
|
|
.is_write = false,
|
|
|
|
};
|
|
|
|
|
|
|
|
READ_ONCE(__kfence_pool[10]);
|
|
|
|
KUNIT_EXPECT_TRUE(test, report_matches(&expect));
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Test SLAB_TYPESAFE_BY_RCU works. */
|
|
|
|
static void test_memcache_typesafe_by_rcu(struct kunit *test)
|
|
|
|
{
|
|
|
|
const size_t size = 32;
|
|
|
|
struct expect_report expect = {
|
|
|
|
.type = KFENCE_ERROR_UAF,
|
|
|
|
.fn = test_memcache_typesafe_by_rcu,
|
|
|
|
.is_write = false,
|
|
|
|
};
|
|
|
|
|
|
|
|
setup_test_cache(test, size, SLAB_TYPESAFE_BY_RCU, NULL);
|
|
|
|
KUNIT_EXPECT_TRUE(test, test_cache); /* Want memcache. */
|
|
|
|
|
|
|
|
expect.addr = test_alloc(test, size, GFP_KERNEL, ALLOCATE_ANY);
|
|
|
|
*expect.addr = 42;
|
|
|
|
|
|
|
|
rcu_read_lock();
|
|
|
|
test_free(expect.addr);
|
|
|
|
KUNIT_EXPECT_EQ(test, *expect.addr, (char)42);
|
|
|
|
/*
|
|
|
|
* Up to this point, memory should not have been freed yet, and
|
|
|
|
* therefore there should be no KFENCE report from the above access.
|
|
|
|
*/
|
|
|
|
rcu_read_unlock();
|
|
|
|
|
|
|
|
/* Above access to @expect.addr should not have generated a report! */
|
|
|
|
KUNIT_EXPECT_FALSE(test, report_available());
|
|
|
|
|
|
|
|
/* Only after rcu_barrier() is the memory guaranteed to be freed. */
|
|
|
|
rcu_barrier();
|
|
|
|
|
|
|
|
/* Expect use-after-free. */
|
|
|
|
KUNIT_EXPECT_EQ(test, *expect.addr, (char)42);
|
|
|
|
KUNIT_EXPECT_TRUE(test, report_matches(&expect));
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Test krealloc(). */
|
|
|
|
static void test_krealloc(struct kunit *test)
|
|
|
|
{
|
|
|
|
const size_t size = 32;
|
|
|
|
const struct expect_report expect = {
|
|
|
|
.type = KFENCE_ERROR_UAF,
|
|
|
|
.fn = test_krealloc,
|
|
|
|
.addr = test_alloc(test, size, GFP_KERNEL, ALLOCATE_ANY),
|
|
|
|
.is_write = false,
|
|
|
|
};
|
|
|
|
char *buf = expect.addr;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
KUNIT_EXPECT_FALSE(test, test_cache);
|
|
|
|
KUNIT_EXPECT_EQ(test, ksize(buf), size); /* Precise size match after KFENCE alloc. */
|
|
|
|
for (i = 0; i < size; i++)
|
|
|
|
buf[i] = i + 1;
|
|
|
|
|
|
|
|
/* Check that we successfully change the size. */
|
|
|
|
buf = krealloc(buf, size * 3, GFP_KERNEL); /* Grow. */
|
|
|
|
/* Note: Might no longer be a KFENCE alloc. */
|
|
|
|
KUNIT_EXPECT_GE(test, ksize(buf), size * 3);
|
|
|
|
for (i = 0; i < size; i++)
|
|
|
|
KUNIT_EXPECT_EQ(test, buf[i], (char)(i + 1));
|
|
|
|
for (; i < size * 3; i++) /* Fill to extra bytes. */
|
|
|
|
buf[i] = i + 1;
|
|
|
|
|
|
|
|
buf = krealloc(buf, size * 2, GFP_KERNEL); /* Shrink. */
|
|
|
|
KUNIT_EXPECT_GE(test, ksize(buf), size * 2);
|
|
|
|
for (i = 0; i < size * 2; i++)
|
|
|
|
KUNIT_EXPECT_EQ(test, buf[i], (char)(i + 1));
|
|
|
|
|
|
|
|
buf = krealloc(buf, 0, GFP_KERNEL); /* Free. */
|
|
|
|
KUNIT_EXPECT_EQ(test, (unsigned long)buf, (unsigned long)ZERO_SIZE_PTR);
|
|
|
|
KUNIT_ASSERT_FALSE(test, report_available()); /* No reports yet! */
|
|
|
|
|
|
|
|
READ_ONCE(*expect.addr); /* Ensure krealloc() actually freed earlier KFENCE object. */
|
|
|
|
KUNIT_ASSERT_TRUE(test, report_matches(&expect));
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Test that some objects from a bulk allocation belong to KFENCE pool. */
|
|
|
|
static void test_memcache_alloc_bulk(struct kunit *test)
|
|
|
|
{
|
|
|
|
const size_t size = 32;
|
|
|
|
bool pass = false;
|
|
|
|
unsigned long timeout;
|
|
|
|
|
|
|
|
setup_test_cache(test, size, 0, NULL);
|
|
|
|
KUNIT_EXPECT_TRUE(test, test_cache); /* Want memcache. */
|
|
|
|
/*
|
|
|
|
* 100x the sample interval should be more than enough to ensure we get
|
|
|
|
* a KFENCE allocation eventually.
|
|
|
|
*/
|
2022-02-12 08:32:35 +08:00
|
|
|
timeout = jiffies + msecs_to_jiffies(100 * kfence_sample_interval);
|
2021-02-26 09:19:31 +08:00
|
|
|
do {
|
|
|
|
void *objects[100];
|
|
|
|
int i, num = kmem_cache_alloc_bulk(test_cache, GFP_ATOMIC, ARRAY_SIZE(objects),
|
|
|
|
objects);
|
|
|
|
if (!num)
|
|
|
|
continue;
|
|
|
|
for (i = 0; i < ARRAY_SIZE(objects); i++) {
|
|
|
|
if (is_kfence_address(objects[i])) {
|
|
|
|
pass = true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
kmem_cache_free_bulk(test_cache, num, objects);
|
|
|
|
/*
|
|
|
|
* kmem_cache_alloc_bulk() disables interrupts, and calling it
|
|
|
|
* in a tight loop may not give KFENCE a chance to switch the
|
|
|
|
* static branch. Call cond_resched() to let KFENCE chime in.
|
|
|
|
*/
|
|
|
|
cond_resched();
|
|
|
|
} while (!pass && time_before(jiffies, timeout));
|
|
|
|
|
|
|
|
KUNIT_EXPECT_TRUE(test, pass);
|
|
|
|
KUNIT_EXPECT_FALSE(test, report_available());
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* KUnit does not provide a way to provide arguments to tests, and we encode
|
|
|
|
* additional info in the name. Set up 2 tests per test case, one using the
|
|
|
|
* default allocator, and another using a custom memcache (suffix '-memcache').
|
|
|
|
*/
|
|
|
|
#define KFENCE_KUNIT_CASE(test_name) \
|
|
|
|
{ .run_case = test_name, .name = #test_name }, \
|
|
|
|
{ .run_case = test_name, .name = #test_name "-memcache" }
|
|
|
|
|
|
|
|
static struct kunit_case kfence_test_cases[] = {
|
|
|
|
KFENCE_KUNIT_CASE(test_out_of_bounds_read),
|
|
|
|
KFENCE_KUNIT_CASE(test_out_of_bounds_write),
|
|
|
|
KFENCE_KUNIT_CASE(test_use_after_free_read),
|
|
|
|
KFENCE_KUNIT_CASE(test_double_free),
|
|
|
|
KFENCE_KUNIT_CASE(test_invalid_addr_free),
|
|
|
|
KFENCE_KUNIT_CASE(test_corruption),
|
|
|
|
KFENCE_KUNIT_CASE(test_free_bulk),
|
|
|
|
KFENCE_KUNIT_CASE(test_init_on_free),
|
|
|
|
KUNIT_CASE(test_kmalloc_aligned_oob_read),
|
|
|
|
KUNIT_CASE(test_kmalloc_aligned_oob_write),
|
|
|
|
KUNIT_CASE(test_shrink_memcache),
|
|
|
|
KUNIT_CASE(test_memcache_ctor),
|
|
|
|
KUNIT_CASE(test_invalid_access),
|
|
|
|
KUNIT_CASE(test_gfpzero),
|
|
|
|
KUNIT_CASE(test_memcache_typesafe_by_rcu),
|
|
|
|
KUNIT_CASE(test_krealloc),
|
|
|
|
KUNIT_CASE(test_memcache_alloc_bulk),
|
|
|
|
{},
|
|
|
|
};
|
|
|
|
|
|
|
|
/* ===== End test cases ===== */
|
|
|
|
|
|
|
|
static int test_init(struct kunit *test)
|
|
|
|
{
|
|
|
|
unsigned long flags;
|
|
|
|
int i;
|
|
|
|
|
2021-09-08 10:56:24 +08:00
|
|
|
if (!__kfence_pool)
|
|
|
|
return -EINVAL;
|
|
|
|
|
2021-02-26 09:19:31 +08:00
|
|
|
spin_lock_irqsave(&observed.lock, flags);
|
|
|
|
for (i = 0; i < ARRAY_SIZE(observed.lines); i++)
|
|
|
|
observed.lines[i][0] = '\0';
|
|
|
|
observed.nlines = 0;
|
|
|
|
spin_unlock_irqrestore(&observed.lock, flags);
|
|
|
|
|
|
|
|
/* Any test with 'memcache' in its name will want a memcache. */
|
|
|
|
if (strstr(test->name, "memcache"))
|
|
|
|
test->priv = TEST_PRIV_WANT_MEMCACHE;
|
|
|
|
else
|
|
|
|
test->priv = NULL;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void test_exit(struct kunit *test)
|
|
|
|
{
|
|
|
|
test_cache_destroy();
|
|
|
|
}
|
|
|
|
|
kfence: test: use new suite_{init/exit} support, add .kunitconfig
Currently, the kfence test suite could not run via "normal" means since
KUnit didn't support per-suite setup/teardown. So it manually called
internal kunit functions to run itself.
This has some downsides, like missing TAP headers => can't use kunit.py
to run or even parse the test results (w/o tweaks).
Use the newly added support and convert it over, adding a .kunitconfig
so it's even easier to run from kunit.py.
People can now run the test via
$ ./tools/testing/kunit/kunit.py run --kunitconfig=mm/kfence --arch=x86_64
...
[11:02:32] Testing complete. Passed: 23, Failed: 0, Crashed: 0, Skipped: 2, Errors: 0
[11:02:32] Elapsed time: 43.562s total, 0.003s configuring, 9.268s building, 34.281s running
Cc: kasan-dev@googlegroups.com
Signed-off-by: Daniel Latypov <dlatypov@google.com>
Tested-by: David Gow <davidgow@google.com>
Reviewed-by: Marco Elver <elver@google.com>
Reviewed-by: Brendan Higgins <brendanhiggins@google.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
2022-04-30 02:12:58 +08:00
|
|
|
static int kfence_suite_init(struct kunit_suite *suite)
|
2021-02-26 09:19:31 +08:00
|
|
|
{
|
2023-04-13 18:08:59 +08:00
|
|
|
register_trace_console(probe_console, NULL);
|
kfence: test: use new suite_{init/exit} support, add .kunitconfig
Currently, the kfence test suite could not run via "normal" means since
KUnit didn't support per-suite setup/teardown. So it manually called
internal kunit functions to run itself.
This has some downsides, like missing TAP headers => can't use kunit.py
to run or even parse the test results (w/o tweaks).
Use the newly added support and convert it over, adding a .kunitconfig
so it's even easier to run from kunit.py.
People can now run the test via
$ ./tools/testing/kunit/kunit.py run --kunitconfig=mm/kfence --arch=x86_64
...
[11:02:32] Testing complete. Passed: 23, Failed: 0, Crashed: 0, Skipped: 2, Errors: 0
[11:02:32] Elapsed time: 43.562s total, 0.003s configuring, 9.268s building, 34.281s running
Cc: kasan-dev@googlegroups.com
Signed-off-by: Daniel Latypov <dlatypov@google.com>
Tested-by: David Gow <davidgow@google.com>
Reviewed-by: Marco Elver <elver@google.com>
Reviewed-by: Brendan Higgins <brendanhiggins@google.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
2022-04-30 02:12:58 +08:00
|
|
|
return 0;
|
2021-02-26 09:19:31 +08:00
|
|
|
}
|
|
|
|
|
kfence: test: use new suite_{init/exit} support, add .kunitconfig
Currently, the kfence test suite could not run via "normal" means since
KUnit didn't support per-suite setup/teardown. So it manually called
internal kunit functions to run itself.
This has some downsides, like missing TAP headers => can't use kunit.py
to run or even parse the test results (w/o tweaks).
Use the newly added support and convert it over, adding a .kunitconfig
so it's even easier to run from kunit.py.
People can now run the test via
$ ./tools/testing/kunit/kunit.py run --kunitconfig=mm/kfence --arch=x86_64
...
[11:02:32] Testing complete. Passed: 23, Failed: 0, Crashed: 0, Skipped: 2, Errors: 0
[11:02:32] Elapsed time: 43.562s total, 0.003s configuring, 9.268s building, 34.281s running
Cc: kasan-dev@googlegroups.com
Signed-off-by: Daniel Latypov <dlatypov@google.com>
Tested-by: David Gow <davidgow@google.com>
Reviewed-by: Marco Elver <elver@google.com>
Reviewed-by: Brendan Higgins <brendanhiggins@google.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
2022-04-30 02:12:58 +08:00
|
|
|
static void kfence_suite_exit(struct kunit_suite *suite)
|
2021-02-26 09:19:31 +08:00
|
|
|
{
|
2023-04-13 18:08:59 +08:00
|
|
|
unregister_trace_console(probe_console, NULL);
|
2021-02-26 09:19:31 +08:00
|
|
|
tracepoint_synchronize_unregister();
|
|
|
|
}
|
|
|
|
|
kfence: test: use new suite_{init/exit} support, add .kunitconfig
Currently, the kfence test suite could not run via "normal" means since
KUnit didn't support per-suite setup/teardown. So it manually called
internal kunit functions to run itself.
This has some downsides, like missing TAP headers => can't use kunit.py
to run or even parse the test results (w/o tweaks).
Use the newly added support and convert it over, adding a .kunitconfig
so it's even easier to run from kunit.py.
People can now run the test via
$ ./tools/testing/kunit/kunit.py run --kunitconfig=mm/kfence --arch=x86_64
...
[11:02:32] Testing complete. Passed: 23, Failed: 0, Crashed: 0, Skipped: 2, Errors: 0
[11:02:32] Elapsed time: 43.562s total, 0.003s configuring, 9.268s building, 34.281s running
Cc: kasan-dev@googlegroups.com
Signed-off-by: Daniel Latypov <dlatypov@google.com>
Tested-by: David Gow <davidgow@google.com>
Reviewed-by: Marco Elver <elver@google.com>
Reviewed-by: Brendan Higgins <brendanhiggins@google.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
2022-04-30 02:12:58 +08:00
|
|
|
static struct kunit_suite kfence_test_suite = {
|
|
|
|
.name = "kfence",
|
|
|
|
.test_cases = kfence_test_cases,
|
|
|
|
.init = test_init,
|
|
|
|
.exit = test_exit,
|
|
|
|
.suite_init = kfence_suite_init,
|
|
|
|
.suite_exit = kfence_suite_exit,
|
|
|
|
};
|
|
|
|
|
|
|
|
kunit_test_suites(&kfence_test_suite);
|
2021-02-26 09:19:31 +08:00
|
|
|
|
|
|
|
MODULE_LICENSE("GPL v2");
|
|
|
|
MODULE_AUTHOR("Alexander Potapenko <glider@google.com>, Marco Elver <elver@google.com>");
|