Merge branch 'akpm' (patches from Andrew)
Merge updates from Andrew Morton: - a few misc bits - ocfs2 updates - almost all of MM * emailed patches from Andrew Morton <akpm@linux-foundation.org>: (131 commits) memory hotplug: fix comments when adding section mm: make alloc_node_mem_map a void call if we don't have CONFIG_FLAT_NODE_MEM_MAP mm: simplify nodemask printing mm,oom_reaper: remove pointless kthread_run() error check mm/page_ext.c: check if page_ext is not prepared writeback: remove unused function parameter mm: do not rely on preempt_count in print_vma_addr mm, sparse: do not swamp log with huge vmemmap allocation failures mm/hmm: remove redundant variable align_end mm/list_lru.c: mark expected switch fall-through mm/shmem.c: mark expected switch fall-through mm/page_alloc.c: broken deferred calculation mm: don't warn about allocations which stall for too long fs: fuse: account fuse_inode slab memory as reclaimable mm, page_alloc: fix potential false positive in __zone_watermark_ok mm: mlock: remove lru_add_drain_all() mm, sysctl: make NUMA stats configurable shmem: convert shmem_init_inodecache() to void Unify migrate_pages and move_pages access checks mm, pagevec: rename pagevec drained field ...
This commit is contained in:
commit
7c225c69f8
|
@ -1864,13 +1864,6 @@
|
||||||
Built with CONFIG_DEBUG_KMEMLEAK_DEFAULT_OFF=y,
|
Built with CONFIG_DEBUG_KMEMLEAK_DEFAULT_OFF=y,
|
||||||
the default is off.
|
the default is off.
|
||||||
|
|
||||||
kmemcheck= [X86] Boot-time kmemcheck enable/disable/one-shot mode
|
|
||||||
Valid arguments: 0, 1, 2
|
|
||||||
kmemcheck=0 (disabled)
|
|
||||||
kmemcheck=1 (enabled)
|
|
||||||
kmemcheck=2 (one-shot mode)
|
|
||||||
Default: 2 (one-shot mode)
|
|
||||||
|
|
||||||
kvm.ignore_msrs=[KVM] Ignore guest accesses to unhandled MSRs.
|
kvm.ignore_msrs=[KVM] Ignore guest accesses to unhandled MSRs.
|
||||||
Default is 0 (don't ignore, but inject #GP)
|
Default is 0 (don't ignore, but inject #GP)
|
||||||
|
|
||||||
|
|
|
@ -21,7 +21,6 @@ whole; patches welcome!
|
||||||
kasan
|
kasan
|
||||||
ubsan
|
ubsan
|
||||||
kmemleak
|
kmemleak
|
||||||
kmemcheck
|
|
||||||
gdb-kernel-debugging
|
gdb-kernel-debugging
|
||||||
kgdb
|
kgdb
|
||||||
kselftest
|
kselftest
|
||||||
|
|
|
@ -1,733 +0,0 @@
|
||||||
Getting started with kmemcheck
|
|
||||||
==============================
|
|
||||||
|
|
||||||
Vegard Nossum <vegardno@ifi.uio.no>
|
|
||||||
|
|
||||||
|
|
||||||
Introduction
|
|
||||||
------------
|
|
||||||
|
|
||||||
kmemcheck is a debugging feature for the Linux Kernel. More specifically, it
|
|
||||||
is a dynamic checker that detects and warns about some uses of uninitialized
|
|
||||||
memory.
|
|
||||||
|
|
||||||
Userspace programmers might be familiar with Valgrind's memcheck. The main
|
|
||||||
difference between memcheck and kmemcheck is that memcheck works for userspace
|
|
||||||
programs only, and kmemcheck works for the kernel only. The implementations
|
|
||||||
are of course vastly different. Because of this, kmemcheck is not as accurate
|
|
||||||
as memcheck, but it turns out to be good enough in practice to discover real
|
|
||||||
programmer errors that the compiler is not able to find through static
|
|
||||||
analysis.
|
|
||||||
|
|
||||||
Enabling kmemcheck on a kernel will probably slow it down to the extent that
|
|
||||||
the machine will not be usable for normal workloads such as e.g. an
|
|
||||||
interactive desktop. kmemcheck will also cause the kernel to use about twice
|
|
||||||
as much memory as normal. For this reason, kmemcheck is strictly a debugging
|
|
||||||
feature.
|
|
||||||
|
|
||||||
|
|
||||||
Downloading
|
|
||||||
-----------
|
|
||||||
|
|
||||||
As of version 2.6.31-rc1, kmemcheck is included in the mainline kernel.
|
|
||||||
|
|
||||||
|
|
||||||
Configuring and compiling
|
|
||||||
-------------------------
|
|
||||||
|
|
||||||
kmemcheck only works for the x86 (both 32- and 64-bit) platform. A number of
|
|
||||||
configuration variables must have specific settings in order for the kmemcheck
|
|
||||||
menu to even appear in "menuconfig". These are:
|
|
||||||
|
|
||||||
- ``CONFIG_CC_OPTIMIZE_FOR_SIZE=n``
|
|
||||||
This option is located under "General setup" / "Optimize for size".
|
|
||||||
|
|
||||||
Without this, gcc will use certain optimizations that usually lead to
|
|
||||||
false positive warnings from kmemcheck. An example of this is a 16-bit
|
|
||||||
field in a struct, where gcc may load 32 bits, then discard the upper
|
|
||||||
16 bits. kmemcheck sees only the 32-bit load, and may trigger a
|
|
||||||
warning for the upper 16 bits (if they're uninitialized).
|
|
||||||
|
|
||||||
- ``CONFIG_SLAB=y`` or ``CONFIG_SLUB=y``
|
|
||||||
This option is located under "General setup" / "Choose SLAB
|
|
||||||
allocator".
|
|
||||||
|
|
||||||
- ``CONFIG_FUNCTION_TRACER=n``
|
|
||||||
This option is located under "Kernel hacking" / "Tracers" / "Kernel
|
|
||||||
Function Tracer"
|
|
||||||
|
|
||||||
When function tracing is compiled in, gcc emits a call to another
|
|
||||||
function at the beginning of every function. This means that when the
|
|
||||||
page fault handler is called, the ftrace framework will be called
|
|
||||||
before kmemcheck has had a chance to handle the fault. If ftrace then
|
|
||||||
modifies memory that was tracked by kmemcheck, the result is an
|
|
||||||
endless recursive page fault.
|
|
||||||
|
|
||||||
- ``CONFIG_DEBUG_PAGEALLOC=n``
|
|
||||||
This option is located under "Kernel hacking" / "Memory Debugging"
|
|
||||||
/ "Debug page memory allocations".
|
|
||||||
|
|
||||||
In addition, I highly recommend turning on ``CONFIG_DEBUG_INFO=y``. This is also
|
|
||||||
located under "Kernel hacking". With this, you will be able to get line number
|
|
||||||
information from the kmemcheck warnings, which is extremely valuable in
|
|
||||||
debugging a problem. This option is not mandatory, however, because it slows
|
|
||||||
down the compilation process and produces a much bigger kernel image.
|
|
||||||
|
|
||||||
Now the kmemcheck menu should be visible (under "Kernel hacking" / "Memory
|
|
||||||
Debugging" / "kmemcheck: trap use of uninitialized memory"). Here follows
|
|
||||||
a description of the kmemcheck configuration variables:
|
|
||||||
|
|
||||||
- ``CONFIG_KMEMCHECK``
|
|
||||||
This must be enabled in order to use kmemcheck at all...
|
|
||||||
|
|
||||||
- ``CONFIG_KMEMCHECK_``[``DISABLED`` | ``ENABLED`` | ``ONESHOT``]``_BY_DEFAULT``
|
|
||||||
This option controls the status of kmemcheck at boot-time. "Enabled"
|
|
||||||
will enable kmemcheck right from the start, "disabled" will boot the
|
|
||||||
kernel as normal (but with the kmemcheck code compiled in, so it can
|
|
||||||
be enabled at run-time after the kernel has booted), and "one-shot" is
|
|
||||||
a special mode which will turn kmemcheck off automatically after
|
|
||||||
detecting the first use of uninitialized memory.
|
|
||||||
|
|
||||||
If you are using kmemcheck to actively debug a problem, then you
|
|
||||||
probably want to choose "enabled" here.
|
|
||||||
|
|
||||||
The one-shot mode is mostly useful in automated test setups because it
|
|
||||||
can prevent floods of warnings and increase the chances of the machine
|
|
||||||
surviving in case something is really wrong. In other cases, the one-
|
|
||||||
shot mode could actually be counter-productive because it would turn
|
|
||||||
itself off at the very first error -- in the case of a false positive
|
|
||||||
too -- and this would come in the way of debugging the specific
|
|
||||||
problem you were interested in.
|
|
||||||
|
|
||||||
If you would like to use your kernel as normal, but with a chance to
|
|
||||||
enable kmemcheck in case of some problem, it might be a good idea to
|
|
||||||
choose "disabled" here. When kmemcheck is disabled, most of the run-
|
|
||||||
time overhead is not incurred, and the kernel will be almost as fast
|
|
||||||
as normal.
|
|
||||||
|
|
||||||
- ``CONFIG_KMEMCHECK_QUEUE_SIZE``
|
|
||||||
Select the maximum number of error reports to store in an internal
|
|
||||||
(fixed-size) buffer. Since errors can occur virtually anywhere and in
|
|
||||||
any context, we need a temporary storage area which is guaranteed not
|
|
||||||
to generate any other page faults when accessed. The queue will be
|
|
||||||
emptied as soon as a tasklet may be scheduled. If the queue is full,
|
|
||||||
new error reports will be lost.
|
|
||||||
|
|
||||||
The default value of 64 is probably fine. If some code produces more
|
|
||||||
than 64 errors within an irqs-off section, then the code is likely to
|
|
||||||
produce many, many more, too, and these additional reports seldom give
|
|
||||||
any more information (the first report is usually the most valuable
|
|
||||||
anyway).
|
|
||||||
|
|
||||||
This number might have to be adjusted if you are not using serial
|
|
||||||
console or similar to capture the kernel log. If you are using the
|
|
||||||
"dmesg" command to save the log, then getting a lot of kmemcheck
|
|
||||||
warnings might overflow the kernel log itself, and the earlier reports
|
|
||||||
will get lost in that way instead. Try setting this to 10 or so on
|
|
||||||
such a setup.
|
|
||||||
|
|
||||||
- ``CONFIG_KMEMCHECK_SHADOW_COPY_SHIFT``
|
|
||||||
Select the number of shadow bytes to save along with each entry of the
|
|
||||||
error-report queue. These bytes indicate what parts of an allocation
|
|
||||||
are initialized, uninitialized, etc. and will be displayed when an
|
|
||||||
error is detected to help the debugging of a particular problem.
|
|
||||||
|
|
||||||
The number entered here is actually the logarithm of the number of
|
|
||||||
bytes that will be saved. So if you pick for example 5 here, kmemcheck
|
|
||||||
will save 2^5 = 32 bytes.
|
|
||||||
|
|
||||||
The default value should be fine for debugging most problems. It also
|
|
||||||
fits nicely within 80 columns.
|
|
||||||
|
|
||||||
- ``CONFIG_KMEMCHECK_PARTIAL_OK``
|
|
||||||
This option (when enabled) works around certain GCC optimizations that
|
|
||||||
produce 32-bit reads from 16-bit variables where the upper 16 bits are
|
|
||||||
thrown away afterwards.
|
|
||||||
|
|
||||||
The default value (enabled) is recommended. This may of course hide
|
|
||||||
some real errors, but disabling it would probably produce a lot of
|
|
||||||
false positives.
|
|
||||||
|
|
||||||
- ``CONFIG_KMEMCHECK_BITOPS_OK``
|
|
||||||
This option silences warnings that would be generated for bit-field
|
|
||||||
accesses where not all the bits are initialized at the same time. This
|
|
||||||
may also hide some real bugs.
|
|
||||||
|
|
||||||
This option is probably obsolete, or it should be replaced with
|
|
||||||
the kmemcheck-/bitfield-annotations for the code in question. The
|
|
||||||
default value is therefore fine.
|
|
||||||
|
|
||||||
Now compile the kernel as usual.
|
|
||||||
|
|
||||||
|
|
||||||
How to use
|
|
||||||
----------
|
|
||||||
|
|
||||||
Booting
|
|
||||||
~~~~~~~
|
|
||||||
|
|
||||||
First some information about the command-line options. There is only one
|
|
||||||
option specific to kmemcheck, and this is called "kmemcheck". It can be used
|
|
||||||
to override the default mode as chosen by the ``CONFIG_KMEMCHECK_*_BY_DEFAULT``
|
|
||||||
option. Its possible settings are:
|
|
||||||
|
|
||||||
- ``kmemcheck=0`` (disabled)
|
|
||||||
- ``kmemcheck=1`` (enabled)
|
|
||||||
- ``kmemcheck=2`` (one-shot mode)
|
|
||||||
|
|
||||||
If SLUB debugging has been enabled in the kernel, it may take precedence over
|
|
||||||
kmemcheck in such a way that the slab caches which are under SLUB debugging
|
|
||||||
will not be tracked by kmemcheck. In order to ensure that this doesn't happen
|
|
||||||
(even though it shouldn't by default), use SLUB's boot option ``slub_debug``,
|
|
||||||
like this: ``slub_debug=-``
|
|
||||||
|
|
||||||
In fact, this option may also be used for fine-grained control over SLUB vs.
|
|
||||||
kmemcheck. For example, if the command line includes
|
|
||||||
``kmemcheck=1 slub_debug=,dentry``, then SLUB debugging will be used only
|
|
||||||
for the "dentry" slab cache, and with kmemcheck tracking all the other
|
|
||||||
caches. This is advanced usage, however, and is not generally recommended.
|
|
||||||
|
|
||||||
|
|
||||||
Run-time enable/disable
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
When the kernel has booted, it is possible to enable or disable kmemcheck at
|
|
||||||
run-time. WARNING: This feature is still experimental and may cause false
|
|
||||||
positive warnings to appear. Therefore, try not to use this. If you find that
|
|
||||||
it doesn't work properly (e.g. you see an unreasonable amount of warnings), I
|
|
||||||
will be happy to take bug reports.
|
|
||||||
|
|
||||||
Use the file ``/proc/sys/kernel/kmemcheck`` for this purpose, e.g.::
|
|
||||||
|
|
||||||
$ echo 0 > /proc/sys/kernel/kmemcheck # disables kmemcheck
|
|
||||||
|
|
||||||
The numbers are the same as for the ``kmemcheck=`` command-line option.
|
|
||||||
|
|
||||||
|
|
||||||
Debugging
|
|
||||||
~~~~~~~~~
|
|
||||||
|
|
||||||
A typical report will look something like this::
|
|
||||||
|
|
||||||
WARNING: kmemcheck: Caught 32-bit read from uninitialized memory (ffff88003e4a2024)
|
|
||||||
80000000000000000000000000000000000000000088ffff0000000000000000
|
|
||||||
i i i i u u u u i i i i i i i i u u u u u u u u u u u u u u u u
|
|
||||||
^
|
|
||||||
|
|
||||||
Pid: 1856, comm: ntpdate Not tainted 2.6.29-rc5 #264 945P-A
|
|
||||||
RIP: 0010:[<ffffffff8104ede8>] [<ffffffff8104ede8>] __dequeue_signal+0xc8/0x190
|
|
||||||
RSP: 0018:ffff88003cdf7d98 EFLAGS: 00210002
|
|
||||||
RAX: 0000000000000030 RBX: ffff88003d4ea968 RCX: 0000000000000009
|
|
||||||
RDX: ffff88003e5d6018 RSI: ffff88003e5d6024 RDI: ffff88003cdf7e84
|
|
||||||
RBP: ffff88003cdf7db8 R08: ffff88003e5d6000 R09: 0000000000000000
|
|
||||||
R10: 0000000000000080 R11: 0000000000000000 R12: 000000000000000e
|
|
||||||
R13: ffff88003cdf7e78 R14: ffff88003d530710 R15: ffff88003d5a98c8
|
|
||||||
FS: 0000000000000000(0000) GS:ffff880001982000(0063) knlGS:00000
|
|
||||||
CS: 0010 DS: 002b ES: 002b CR0: 0000000080050033
|
|
||||||
CR2: ffff88003f806ea0 CR3: 000000003c036000 CR4: 00000000000006a0
|
|
||||||
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
|
|
||||||
DR3: 0000000000000000 DR6: 00000000ffff4ff0 DR7: 0000000000000400
|
|
||||||
[<ffffffff8104f04e>] dequeue_signal+0x8e/0x170
|
|
||||||
[<ffffffff81050bd8>] get_signal_to_deliver+0x98/0x390
|
|
||||||
[<ffffffff8100b87d>] do_notify_resume+0xad/0x7d0
|
|
||||||
[<ffffffff8100c7b5>] int_signal+0x12/0x17
|
|
||||||
[<ffffffffffffffff>] 0xffffffffffffffff
|
|
||||||
|
|
||||||
The single most valuable information in this report is the RIP (or EIP on 32-
|
|
||||||
bit) value. This will help us pinpoint exactly which instruction that caused
|
|
||||||
the warning.
|
|
||||||
|
|
||||||
If your kernel was compiled with ``CONFIG_DEBUG_INFO=y``, then all we have to do
|
|
||||||
is give this address to the addr2line program, like this::
|
|
||||||
|
|
||||||
$ addr2line -e vmlinux -i ffffffff8104ede8
|
|
||||||
arch/x86/include/asm/string_64.h:12
|
|
||||||
include/asm-generic/siginfo.h:287
|
|
||||||
kernel/signal.c:380
|
|
||||||
kernel/signal.c:410
|
|
||||||
|
|
||||||
The "``-e vmlinux``" tells addr2line which file to look in. **IMPORTANT:**
|
|
||||||
This must be the vmlinux of the kernel that produced the warning in the
|
|
||||||
first place! If not, the line number information will almost certainly be
|
|
||||||
wrong.
|
|
||||||
|
|
||||||
The "``-i``" tells addr2line to also print the line numbers of inlined
|
|
||||||
functions. In this case, the flag was very important, because otherwise,
|
|
||||||
it would only have printed the first line, which is just a call to
|
|
||||||
``memcpy()``, which could be called from a thousand places in the kernel, and
|
|
||||||
is therefore not very useful. These inlined functions would not show up in
|
|
||||||
the stack trace above, simply because the kernel doesn't load the extra
|
|
||||||
debugging information. This technique can of course be used with ordinary
|
|
||||||
kernel oopses as well.
|
|
||||||
|
|
||||||
In this case, it's the caller of ``memcpy()`` that is interesting, and it can be
|
|
||||||
found in ``include/asm-generic/siginfo.h``, line 287::
|
|
||||||
|
|
||||||
281 static inline void copy_siginfo(struct siginfo *to, struct siginfo *from)
|
|
||||||
282 {
|
|
||||||
283 if (from->si_code < 0)
|
|
||||||
284 memcpy(to, from, sizeof(*to));
|
|
||||||
285 else
|
|
||||||
286 /* _sigchld is currently the largest know union member */
|
|
||||||
287 memcpy(to, from, __ARCH_SI_PREAMBLE_SIZE + sizeof(from->_sifields._sigchld));
|
|
||||||
288 }
|
|
||||||
|
|
||||||
Since this was a read (kmemcheck usually warns about reads only, though it can
|
|
||||||
warn about writes to unallocated or freed memory as well), it was probably the
|
|
||||||
"from" argument which contained some uninitialized bytes. Following the chain
|
|
||||||
of calls, we move upwards to see where "from" was allocated or initialized,
|
|
||||||
``kernel/signal.c``, line 380::
|
|
||||||
|
|
||||||
359 static void collect_signal(int sig, struct sigpending *list, siginfo_t *info)
|
|
||||||
360 {
|
|
||||||
...
|
|
||||||
367 list_for_each_entry(q, &list->list, list) {
|
|
||||||
368 if (q->info.si_signo == sig) {
|
|
||||||
369 if (first)
|
|
||||||
370 goto still_pending;
|
|
||||||
371 first = q;
|
|
||||||
...
|
|
||||||
377 if (first) {
|
|
||||||
378 still_pending:
|
|
||||||
379 list_del_init(&first->list);
|
|
||||||
380 copy_siginfo(info, &first->info);
|
|
||||||
381 __sigqueue_free(first);
|
|
||||||
...
|
|
||||||
392 }
|
|
||||||
393 }
|
|
||||||
|
|
||||||
Here, it is ``&first->info`` that is being passed on to ``copy_siginfo()``. The
|
|
||||||
variable ``first`` was found on a list -- passed in as the second argument to
|
|
||||||
``collect_signal()``. We continue our journey through the stack, to figure out
|
|
||||||
where the item on "list" was allocated or initialized. We move to line 410::
|
|
||||||
|
|
||||||
395 static int __dequeue_signal(struct sigpending *pending, sigset_t *mask,
|
|
||||||
396 siginfo_t *info)
|
|
||||||
397 {
|
|
||||||
...
|
|
||||||
410 collect_signal(sig, pending, info);
|
|
||||||
...
|
|
||||||
414 }
|
|
||||||
|
|
||||||
Now we need to follow the ``pending`` pointer, since that is being passed on to
|
|
||||||
``collect_signal()`` as ``list``. At this point, we've run out of lines from the
|
|
||||||
"addr2line" output. Not to worry, we just paste the next addresses from the
|
|
||||||
kmemcheck stack dump, i.e.::
|
|
||||||
|
|
||||||
[<ffffffff8104f04e>] dequeue_signal+0x8e/0x170
|
|
||||||
[<ffffffff81050bd8>] get_signal_to_deliver+0x98/0x390
|
|
||||||
[<ffffffff8100b87d>] do_notify_resume+0xad/0x7d0
|
|
||||||
[<ffffffff8100c7b5>] int_signal+0x12/0x17
|
|
||||||
|
|
||||||
$ addr2line -e vmlinux -i ffffffff8104f04e ffffffff81050bd8 \
|
|
||||||
ffffffff8100b87d ffffffff8100c7b5
|
|
||||||
kernel/signal.c:446
|
|
||||||
kernel/signal.c:1806
|
|
||||||
arch/x86/kernel/signal.c:805
|
|
||||||
arch/x86/kernel/signal.c:871
|
|
||||||
arch/x86/kernel/entry_64.S:694
|
|
||||||
|
|
||||||
Remember that since these addresses were found on the stack and not as the
|
|
||||||
RIP value, they actually point to the _next_ instruction (they are return
|
|
||||||
addresses). This becomes obvious when we look at the code for line 446::
|
|
||||||
|
|
||||||
422 int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info)
|
|
||||||
423 {
|
|
||||||
...
|
|
||||||
431 signr = __dequeue_signal(&tsk->signal->shared_pending,
|
|
||||||
432 mask, info);
|
|
||||||
433 /*
|
|
||||||
434 * itimer signal ?
|
|
||||||
435 *
|
|
||||||
436 * itimers are process shared and we restart periodic
|
|
||||||
437 * itimers in the signal delivery path to prevent DoS
|
|
||||||
438 * attacks in the high resolution timer case. This is
|
|
||||||
439 * compliant with the old way of self restarting
|
|
||||||
440 * itimers, as the SIGALRM is a legacy signal and only
|
|
||||||
441 * queued once. Changing the restart behaviour to
|
|
||||||
442 * restart the timer in the signal dequeue path is
|
|
||||||
443 * reducing the timer noise on heavy loaded !highres
|
|
||||||
444 * systems too.
|
|
||||||
445 */
|
|
||||||
446 if (unlikely(signr == SIGALRM)) {
|
|
||||||
...
|
|
||||||
489 }
|
|
||||||
|
|
||||||
So instead of looking at 446, we should be looking at 431, which is the line
|
|
||||||
that executes just before 446. Here we see that what we are looking for is
|
|
||||||
``&tsk->signal->shared_pending``.
|
|
||||||
|
|
||||||
Our next task is now to figure out which function that puts items on this
|
|
||||||
``shared_pending`` list. A crude, but efficient tool, is ``git grep``::
|
|
||||||
|
|
||||||
$ git grep -n 'shared_pending' kernel/
|
|
||||||
...
|
|
||||||
kernel/signal.c:828: pending = group ? &t->signal->shared_pending : &t->pending;
|
|
||||||
kernel/signal.c:1339: pending = group ? &t->signal->shared_pending : &t->pending;
|
|
||||||
...
|
|
||||||
|
|
||||||
There were more results, but none of them were related to list operations,
|
|
||||||
and these were the only assignments. We inspect the line numbers more closely
|
|
||||||
and find that this is indeed where items are being added to the list::
|
|
||||||
|
|
||||||
816 static int send_signal(int sig, struct siginfo *info, struct task_struct *t,
|
|
||||||
817 int group)
|
|
||||||
818 {
|
|
||||||
...
|
|
||||||
828 pending = group ? &t->signal->shared_pending : &t->pending;
|
|
||||||
...
|
|
||||||
851 q = __sigqueue_alloc(t, GFP_ATOMIC, (sig < SIGRTMIN &&
|
|
||||||
852 (is_si_special(info) ||
|
|
||||||
853 info->si_code >= 0)));
|
|
||||||
854 if (q) {
|
|
||||||
855 list_add_tail(&q->list, &pending->list);
|
|
||||||
...
|
|
||||||
890 }
|
|
||||||
|
|
||||||
and::
|
|
||||||
|
|
||||||
1309 int send_sigqueue(struct sigqueue *q, struct task_struct *t, int group)
|
|
||||||
1310 {
|
|
||||||
....
|
|
||||||
1339 pending = group ? &t->signal->shared_pending : &t->pending;
|
|
||||||
1340 list_add_tail(&q->list, &pending->list);
|
|
||||||
....
|
|
||||||
1347 }
|
|
||||||
|
|
||||||
In the first case, the list element we are looking for, ``q``, is being
|
|
||||||
returned from the function ``__sigqueue_alloc()``, which looks like an
|
|
||||||
allocation function. Let's take a look at it::
|
|
||||||
|
|
||||||
187 static struct sigqueue *__sigqueue_alloc(struct task_struct *t, gfp_t flags,
|
|
||||||
188 int override_rlimit)
|
|
||||||
189 {
|
|
||||||
190 struct sigqueue *q = NULL;
|
|
||||||
191 struct user_struct *user;
|
|
||||||
192
|
|
||||||
193 /*
|
|
||||||
194 * We won't get problems with the target's UID changing under us
|
|
||||||
195 * because changing it requires RCU be used, and if t != current, the
|
|
||||||
196 * caller must be holding the RCU readlock (by way of a spinlock) and
|
|
||||||
197 * we use RCU protection here
|
|
||||||
198 */
|
|
||||||
199 user = get_uid(__task_cred(t)->user);
|
|
||||||
200 atomic_inc(&user->sigpending);
|
|
||||||
201 if (override_rlimit ||
|
|
||||||
202 atomic_read(&user->sigpending) <=
|
|
||||||
203 t->signal->rlim[RLIMIT_SIGPENDING].rlim_cur)
|
|
||||||
204 q = kmem_cache_alloc(sigqueue_cachep, flags);
|
|
||||||
205 if (unlikely(q == NULL)) {
|
|
||||||
206 atomic_dec(&user->sigpending);
|
|
||||||
207 free_uid(user);
|
|
||||||
208 } else {
|
|
||||||
209 INIT_LIST_HEAD(&q->list);
|
|
||||||
210 q->flags = 0;
|
|
||||||
211 q->user = user;
|
|
||||||
212 }
|
|
||||||
213
|
|
||||||
214 return q;
|
|
||||||
215 }
|
|
||||||
|
|
||||||
We see that this function initializes ``q->list``, ``q->flags``, and
|
|
||||||
``q->user``. It seems that now is the time to look at the definition of
|
|
||||||
``struct sigqueue``, e.g.::
|
|
||||||
|
|
||||||
14 struct sigqueue {
|
|
||||||
15 struct list_head list;
|
|
||||||
16 int flags;
|
|
||||||
17 siginfo_t info;
|
|
||||||
18 struct user_struct *user;
|
|
||||||
19 };
|
|
||||||
|
|
||||||
And, you might remember, it was a ``memcpy()`` on ``&first->info`` that
|
|
||||||
caused the warning, so this makes perfect sense. It also seems reasonable
|
|
||||||
to assume that it is the caller of ``__sigqueue_alloc()`` that has the
|
|
||||||
responsibility of filling out (initializing) this member.
|
|
||||||
|
|
||||||
But just which fields of the struct were uninitialized? Let's look at
|
|
||||||
kmemcheck's report again::
|
|
||||||
|
|
||||||
WARNING: kmemcheck: Caught 32-bit read from uninitialized memory (ffff88003e4a2024)
|
|
||||||
80000000000000000000000000000000000000000088ffff0000000000000000
|
|
||||||
i i i i u u u u i i i i i i i i u u u u u u u u u u u u u u u u
|
|
||||||
^
|
|
||||||
|
|
||||||
These first two lines are the memory dump of the memory object itself, and
|
|
||||||
the shadow bytemap, respectively. The memory object itself is in this case
|
|
||||||
``&first->info``. Just beware that the start of this dump is NOT the start
|
|
||||||
of the object itself! The position of the caret (^) corresponds with the
|
|
||||||
address of the read (ffff88003e4a2024).
|
|
||||||
|
|
||||||
The shadow bytemap dump legend is as follows:
|
|
||||||
|
|
||||||
- i: initialized
|
|
||||||
- u: uninitialized
|
|
||||||
- a: unallocated (memory has been allocated by the slab layer, but has not
|
|
||||||
yet been handed off to anybody)
|
|
||||||
- f: freed (memory has been allocated by the slab layer, but has been freed
|
|
||||||
by the previous owner)
|
|
||||||
|
|
||||||
In order to figure out where (relative to the start of the object) the
|
|
||||||
uninitialized memory was located, we have to look at the disassembly. For
|
|
||||||
that, we'll need the RIP address again::
|
|
||||||
|
|
||||||
RIP: 0010:[<ffffffff8104ede8>] [<ffffffff8104ede8>] __dequeue_signal+0xc8/0x190
|
|
||||||
|
|
||||||
$ objdump -d --no-show-raw-insn vmlinux | grep -C 8 ffffffff8104ede8:
|
|
||||||
ffffffff8104edc8: mov %r8,0x8(%r8)
|
|
||||||
ffffffff8104edcc: test %r10d,%r10d
|
|
||||||
ffffffff8104edcf: js ffffffff8104ee88 <__dequeue_signal+0x168>
|
|
||||||
ffffffff8104edd5: mov %rax,%rdx
|
|
||||||
ffffffff8104edd8: mov $0xc,%ecx
|
|
||||||
ffffffff8104eddd: mov %r13,%rdi
|
|
||||||
ffffffff8104ede0: mov $0x30,%eax
|
|
||||||
ffffffff8104ede5: mov %rdx,%rsi
|
|
||||||
ffffffff8104ede8: rep movsl %ds:(%rsi),%es:(%rdi)
|
|
||||||
ffffffff8104edea: test $0x2,%al
|
|
||||||
ffffffff8104edec: je ffffffff8104edf0 <__dequeue_signal+0xd0>
|
|
||||||
ffffffff8104edee: movsw %ds:(%rsi),%es:(%rdi)
|
|
||||||
ffffffff8104edf0: test $0x1,%al
|
|
||||||
ffffffff8104edf2: je ffffffff8104edf5 <__dequeue_signal+0xd5>
|
|
||||||
ffffffff8104edf4: movsb %ds:(%rsi),%es:(%rdi)
|
|
||||||
ffffffff8104edf5: mov %r8,%rdi
|
|
||||||
ffffffff8104edf8: callq ffffffff8104de60 <__sigqueue_free>
|
|
||||||
|
|
||||||
As expected, it's the "``rep movsl``" instruction from the ``memcpy()``
|
|
||||||
that causes the warning. We know about ``REP MOVSL`` that it uses the register
|
|
||||||
``RCX`` to count the number of remaining iterations. By taking a look at the
|
|
||||||
register dump again (from the kmemcheck report), we can figure out how many
|
|
||||||
bytes were left to copy::
|
|
||||||
|
|
||||||
RAX: 0000000000000030 RBX: ffff88003d4ea968 RCX: 0000000000000009
|
|
||||||
|
|
||||||
By looking at the disassembly, we also see that ``%ecx`` is being loaded
|
|
||||||
with the value ``$0xc`` just before (ffffffff8104edd8), so we are very
|
|
||||||
lucky. Keep in mind that this is the number of iterations, not bytes. And
|
|
||||||
since this is a "long" operation, we need to multiply by 4 to get the
|
|
||||||
number of bytes. So this means that the uninitialized value was encountered
|
|
||||||
at 4 * (0xc - 0x9) = 12 bytes from the start of the object.
|
|
||||||
|
|
||||||
We can now try to figure out which field of the "``struct siginfo``" that
|
|
||||||
was not initialized. This is the beginning of the struct::
|
|
||||||
|
|
||||||
40 typedef struct siginfo {
|
|
||||||
41 int si_signo;
|
|
||||||
42 int si_errno;
|
|
||||||
43 int si_code;
|
|
||||||
44
|
|
||||||
45 union {
|
|
||||||
..
|
|
||||||
92 } _sifields;
|
|
||||||
93 } siginfo_t;
|
|
||||||
|
|
||||||
On 64-bit, the int is 4 bytes long, so it must the union member that has
|
|
||||||
not been initialized. We can verify this using gdb::
|
|
||||||
|
|
||||||
$ gdb vmlinux
|
|
||||||
...
|
|
||||||
(gdb) p &((struct siginfo *) 0)->_sifields
|
|
||||||
$1 = (union {...} *) 0x10
|
|
||||||
|
|
||||||
Actually, it seems that the union member is located at offset 0x10 -- which
|
|
||||||
means that gcc has inserted 4 bytes of padding between the members ``si_code``
|
|
||||||
and ``_sifields``. We can now get a fuller picture of the memory dump::
|
|
||||||
|
|
||||||
_----------------------------=> si_code
|
|
||||||
/ _--------------------=> (padding)
|
|
||||||
| / _------------=> _sifields(._kill._pid)
|
|
||||||
| | / _----=> _sifields(._kill._uid)
|
|
||||||
| | | /
|
|
||||||
-------|-------|-------|-------|
|
|
||||||
80000000000000000000000000000000000000000088ffff0000000000000000
|
|
||||||
i i i i u u u u i i i i i i i i u u u u u u u u u u u u u u u u
|
|
||||||
|
|
||||||
This allows us to realize another important fact: ``si_code`` contains the
|
|
||||||
value 0x80. Remember that x86 is little endian, so the first 4 bytes
|
|
||||||
"80000000" are really the number 0x00000080. With a bit of research, we
|
|
||||||
find that this is actually the constant ``SI_KERNEL`` defined in
|
|
||||||
``include/asm-generic/siginfo.h``::
|
|
||||||
|
|
||||||
144 #define SI_KERNEL 0x80 /* sent by the kernel from somewhere */
|
|
||||||
|
|
||||||
This macro is used in exactly one place in the x86 kernel: In ``send_signal()``
|
|
||||||
in ``kernel/signal.c``::
|
|
||||||
|
|
||||||
816 static int send_signal(int sig, struct siginfo *info, struct task_struct *t,
|
|
||||||
817 int group)
|
|
||||||
818 {
|
|
||||||
...
|
|
||||||
828 pending = group ? &t->signal->shared_pending : &t->pending;
|
|
||||||
...
|
|
||||||
851 q = __sigqueue_alloc(t, GFP_ATOMIC, (sig < SIGRTMIN &&
|
|
||||||
852 (is_si_special(info) ||
|
|
||||||
853 info->si_code >= 0)));
|
|
||||||
854 if (q) {
|
|
||||||
855 list_add_tail(&q->list, &pending->list);
|
|
||||||
856 switch ((unsigned long) info) {
|
|
||||||
...
|
|
||||||
865 case (unsigned long) SEND_SIG_PRIV:
|
|
||||||
866 q->info.si_signo = sig;
|
|
||||||
867 q->info.si_errno = 0;
|
|
||||||
868 q->info.si_code = SI_KERNEL;
|
|
||||||
869 q->info.si_pid = 0;
|
|
||||||
870 q->info.si_uid = 0;
|
|
||||||
871 break;
|
|
||||||
...
|
|
||||||
890 }
|
|
||||||
|
|
||||||
Not only does this match with the ``.si_code`` member, it also matches the place
|
|
||||||
we found earlier when looking for where siginfo_t objects are enqueued on the
|
|
||||||
``shared_pending`` list.
|
|
||||||
|
|
||||||
So to sum up: It seems that it is the padding introduced by the compiler
|
|
||||||
between two struct fields that is uninitialized, and this gets reported when
|
|
||||||
we do a ``memcpy()`` on the struct. This means that we have identified a false
|
|
||||||
positive warning.
|
|
||||||
|
|
||||||
Normally, kmemcheck will not report uninitialized accesses in ``memcpy()`` calls
|
|
||||||
when both the source and destination addresses are tracked. (Instead, we copy
|
|
||||||
the shadow bytemap as well). In this case, the destination address clearly
|
|
||||||
was not tracked. We can dig a little deeper into the stack trace from above::
|
|
||||||
|
|
||||||
arch/x86/kernel/signal.c:805
|
|
||||||
arch/x86/kernel/signal.c:871
|
|
||||||
arch/x86/kernel/entry_64.S:694
|
|
||||||
|
|
||||||
And we clearly see that the destination siginfo object is located on the
|
|
||||||
stack::
|
|
||||||
|
|
||||||
782 static void do_signal(struct pt_regs *regs)
|
|
||||||
783 {
|
|
||||||
784 struct k_sigaction ka;
|
|
||||||
785 siginfo_t info;
|
|
||||||
...
|
|
||||||
804 signr = get_signal_to_deliver(&info, &ka, regs, NULL);
|
|
||||||
...
|
|
||||||
854 }
|
|
||||||
|
|
||||||
And this ``&info`` is what eventually gets passed to ``copy_siginfo()`` as the
|
|
||||||
destination argument.
|
|
||||||
|
|
||||||
Now, even though we didn't find an actual error here, the example is still a
|
|
||||||
good one, because it shows how one would go about to find out what the report
|
|
||||||
was all about.
|
|
||||||
|
|
||||||
|
|
||||||
Annotating false positives
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
There are a few different ways to make annotations in the source code that
|
|
||||||
will keep kmemcheck from checking and reporting certain allocations. Here
|
|
||||||
they are:
|
|
||||||
|
|
||||||
- ``__GFP_NOTRACK_FALSE_POSITIVE``
|
|
||||||
This flag can be passed to ``kmalloc()`` or ``kmem_cache_alloc()``
|
|
||||||
(therefore also to other functions that end up calling one of
|
|
||||||
these) to indicate that the allocation should not be tracked
|
|
||||||
because it would lead to a false positive report. This is a "big
|
|
||||||
hammer" way of silencing kmemcheck; after all, even if the false
|
|
||||||
positive pertains to particular field in a struct, for example, we
|
|
||||||
will now lose the ability to find (real) errors in other parts of
|
|
||||||
the same struct.
|
|
||||||
|
|
||||||
Example::
|
|
||||||
|
|
||||||
/* No warnings will ever trigger on accessing any part of x */
|
|
||||||
x = kmalloc(sizeof *x, GFP_KERNEL | __GFP_NOTRACK_FALSE_POSITIVE);
|
|
||||||
|
|
||||||
- ``kmemcheck_bitfield_begin(name)``/``kmemcheck_bitfield_end(name)`` and
|
|
||||||
``kmemcheck_annotate_bitfield(ptr, name)``
|
|
||||||
The first two of these three macros can be used inside struct
|
|
||||||
definitions to signal, respectively, the beginning and end of a
|
|
||||||
bitfield. Additionally, this will assign the bitfield a name, which
|
|
||||||
is given as an argument to the macros.
|
|
||||||
|
|
||||||
Having used these markers, one can later use
|
|
||||||
kmemcheck_annotate_bitfield() at the point of allocation, to indicate
|
|
||||||
which parts of the allocation is part of a bitfield.
|
|
||||||
|
|
||||||
Example::
|
|
||||||
|
|
||||||
struct foo {
|
|
||||||
int x;
|
|
||||||
|
|
||||||
kmemcheck_bitfield_begin(flags);
|
|
||||||
int flag_a:1;
|
|
||||||
int flag_b:1;
|
|
||||||
kmemcheck_bitfield_end(flags);
|
|
||||||
|
|
||||||
int y;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct foo *x = kmalloc(sizeof *x);
|
|
||||||
|
|
||||||
/* No warnings will trigger on accessing the bitfield of x */
|
|
||||||
kmemcheck_annotate_bitfield(x, flags);
|
|
||||||
|
|
||||||
Note that ``kmemcheck_annotate_bitfield()`` can be used even before the
|
|
||||||
return value of ``kmalloc()`` is checked -- in other words, passing NULL
|
|
||||||
as the first argument is legal (and will do nothing).
|
|
||||||
|
|
||||||
|
|
||||||
Reporting errors
|
|
||||||
----------------
|
|
||||||
|
|
||||||
As we have seen, kmemcheck will produce false positive reports. Therefore, it
|
|
||||||
is not very wise to blindly post kmemcheck warnings to mailing lists and
|
|
||||||
maintainers. Instead, I encourage maintainers and developers to find errors
|
|
||||||
in their own code. If you get a warning, you can try to work around it, try
|
|
||||||
to figure out if it's a real error or not, or simply ignore it. Most
|
|
||||||
developers know their own code and will quickly and efficiently determine the
|
|
||||||
root cause of a kmemcheck report. This is therefore also the most efficient
|
|
||||||
way to work with kmemcheck.
|
|
||||||
|
|
||||||
That said, we (the kmemcheck maintainers) will always be on the lookout for
|
|
||||||
false positives that we can annotate and silence. So whatever you find,
|
|
||||||
please drop us a note privately! Kernel configs and steps to reproduce (if
|
|
||||||
available) are of course a great help too.
|
|
||||||
|
|
||||||
Happy hacking!
|
|
||||||
|
|
||||||
|
|
||||||
Technical description
|
|
||||||
---------------------
|
|
||||||
|
|
||||||
kmemcheck works by marking memory pages non-present. This means that whenever
|
|
||||||
somebody attempts to access the page, a page fault is generated. The page
|
|
||||||
fault handler notices that the page was in fact only hidden, and so it calls
|
|
||||||
on the kmemcheck code to make further investigations.
|
|
||||||
|
|
||||||
When the investigations are completed, kmemcheck "shows" the page by marking
|
|
||||||
it present (as it would be under normal circumstances). This way, the
|
|
||||||
interrupted code can continue as usual.
|
|
||||||
|
|
||||||
But after the instruction has been executed, we should hide the page again, so
|
|
||||||
that we can catch the next access too! Now kmemcheck makes use of a debugging
|
|
||||||
feature of the processor, namely single-stepping. When the processor has
|
|
||||||
finished the one instruction that generated the memory access, a debug
|
|
||||||
exception is raised. From here, we simply hide the page again and continue
|
|
||||||
execution, this time with the single-stepping feature turned off.
|
|
||||||
|
|
||||||
kmemcheck requires some assistance from the memory allocator in order to work.
|
|
||||||
The memory allocator needs to
|
|
||||||
|
|
||||||
1. Tell kmemcheck about newly allocated pages and pages that are about to
|
|
||||||
be freed. This allows kmemcheck to set up and tear down the shadow memory
|
|
||||||
for the pages in question. The shadow memory stores the status of each
|
|
||||||
byte in the allocation proper, e.g. whether it is initialized or
|
|
||||||
uninitialized.
|
|
||||||
|
|
||||||
2. Tell kmemcheck which parts of memory should be marked uninitialized.
|
|
||||||
There are actually a few more states, such as "not yet allocated" and
|
|
||||||
"recently freed".
|
|
||||||
|
|
||||||
If a slab cache is set up using the SLAB_NOTRACK flag, it will never return
|
|
||||||
memory that can take page faults because of kmemcheck.
|
|
||||||
|
|
||||||
If a slab cache is NOT set up using the SLAB_NOTRACK flag, callers can still
|
|
||||||
request memory with the __GFP_NOTRACK or __GFP_NOTRACK_FALSE_POSITIVE flags.
|
|
||||||
This does not prevent the page faults from occurring, however, but marks the
|
|
||||||
object in question as being initialized so that no warnings will ever be
|
|
||||||
produced for this object.
|
|
||||||
|
|
||||||
Currently, the SLAB and SLUB allocators are supported by kmemcheck.
|
|
|
@ -250,7 +250,6 @@ Table 1-2: Contents of the status files (as of 4.8)
|
||||||
VmExe size of text segment
|
VmExe size of text segment
|
||||||
VmLib size of shared library code
|
VmLib size of shared library code
|
||||||
VmPTE size of page table entries
|
VmPTE size of page table entries
|
||||||
VmPMD size of second level page tables
|
|
||||||
VmSwap amount of swap used by anonymous private data
|
VmSwap amount of swap used by anonymous private data
|
||||||
(shmem swap usage is not included)
|
(shmem swap usage is not included)
|
||||||
HugetlbPages size of hugetlb memory portions
|
HugetlbPages size of hugetlb memory portions
|
||||||
|
|
|
@ -58,6 +58,7 @@ Currently, these files are in /proc/sys/vm:
|
||||||
- percpu_pagelist_fraction
|
- percpu_pagelist_fraction
|
||||||
- stat_interval
|
- stat_interval
|
||||||
- stat_refresh
|
- stat_refresh
|
||||||
|
- numa_stat
|
||||||
- swappiness
|
- swappiness
|
||||||
- user_reserve_kbytes
|
- user_reserve_kbytes
|
||||||
- vfs_cache_pressure
|
- vfs_cache_pressure
|
||||||
|
@ -157,6 +158,10 @@ Note: the minimum value allowed for dirty_bytes is two pages (in bytes); any
|
||||||
value lower than this limit will be ignored and the old configuration will be
|
value lower than this limit will be ignored and the old configuration will be
|
||||||
retained.
|
retained.
|
||||||
|
|
||||||
|
Note: the value of dirty_bytes also must be set greater than
|
||||||
|
dirty_background_bytes or the amount of memory corresponding to
|
||||||
|
dirty_background_ratio.
|
||||||
|
|
||||||
==============================================================
|
==============================================================
|
||||||
|
|
||||||
dirty_expire_centisecs
|
dirty_expire_centisecs
|
||||||
|
@ -176,6 +181,9 @@ generating disk writes will itself start writing out dirty data.
|
||||||
|
|
||||||
The total available memory is not equal to total system memory.
|
The total available memory is not equal to total system memory.
|
||||||
|
|
||||||
|
Note: dirty_ratio must be set greater than dirty_background_ratio or
|
||||||
|
ratio corresponding to dirty_background_bytes.
|
||||||
|
|
||||||
==============================================================
|
==============================================================
|
||||||
|
|
||||||
dirty_writeback_centisecs
|
dirty_writeback_centisecs
|
||||||
|
@ -622,7 +630,7 @@ oom_dump_tasks
|
||||||
|
|
||||||
Enables a system-wide task dump (excluding kernel threads) to be produced
|
Enables a system-wide task dump (excluding kernel threads) to be produced
|
||||||
when the kernel performs an OOM-killing and includes such information as
|
when the kernel performs an OOM-killing and includes such information as
|
||||||
pid, uid, tgid, vm size, rss, nr_ptes, nr_pmds, swapents, oom_score_adj
|
pid, uid, tgid, vm size, rss, pgtables_bytes, swapents, oom_score_adj
|
||||||
score, and name. This is helpful to determine why the OOM killer was
|
score, and name. This is helpful to determine why the OOM killer was
|
||||||
invoked, to identify the rogue task that caused it, and to determine why
|
invoked, to identify the rogue task that caused it, and to determine why
|
||||||
the OOM killer chose the task it did to kill.
|
the OOM killer chose the task it did to kill.
|
||||||
|
@ -792,6 +800,21 @@ with no ill effects: errors and warnings on these stats are suppressed.)
|
||||||
|
|
||||||
==============================================================
|
==============================================================
|
||||||
|
|
||||||
|
numa_stat
|
||||||
|
|
||||||
|
This interface allows runtime configuration of numa statistics.
|
||||||
|
|
||||||
|
When page allocation performance becomes a bottleneck and you can tolerate
|
||||||
|
some possible tool breakage and decreased numa counter precision, you can
|
||||||
|
do:
|
||||||
|
echo 0 > /proc/sys/vm/numa_stat
|
||||||
|
|
||||||
|
When page allocation performance is not a bottleneck and you want all
|
||||||
|
tooling to work, you can do:
|
||||||
|
echo 1 > /proc/sys/vm/numa_stat
|
||||||
|
|
||||||
|
==============================================================
|
||||||
|
|
||||||
swappiness
|
swappiness
|
||||||
|
|
||||||
This control is used to define how aggressive the kernel will swap
|
This control is used to define how aggressive the kernel will swap
|
||||||
|
|
|
@ -0,0 +1,93 @@
|
||||||
|
When do you need to notify inside page table lock ?
|
||||||
|
|
||||||
|
When clearing a pte/pmd we are given a choice to notify the event through
|
||||||
|
(notify version of *_clear_flush call mmu_notifier_invalidate_range) under
|
||||||
|
the page table lock. But that notification is not necessary in all cases.
|
||||||
|
|
||||||
|
For secondary TLB (non CPU TLB) like IOMMU TLB or device TLB (when device use
|
||||||
|
thing like ATS/PASID to get the IOMMU to walk the CPU page table to access a
|
||||||
|
process virtual address space). There is only 2 cases when you need to notify
|
||||||
|
those secondary TLB while holding page table lock when clearing a pte/pmd:
|
||||||
|
|
||||||
|
A) page backing address is free before mmu_notifier_invalidate_range_end()
|
||||||
|
B) a page table entry is updated to point to a new page (COW, write fault
|
||||||
|
on zero page, __replace_page(), ...)
|
||||||
|
|
||||||
|
Case A is obvious you do not want to take the risk for the device to write to
|
||||||
|
a page that might now be used by some completely different task.
|
||||||
|
|
||||||
|
Case B is more subtle. For correctness it requires the following sequence to
|
||||||
|
happen:
|
||||||
|
- take page table lock
|
||||||
|
- clear page table entry and notify ([pmd/pte]p_huge_clear_flush_notify())
|
||||||
|
- set page table entry to point to new page
|
||||||
|
|
||||||
|
If clearing the page table entry is not followed by a notify before setting
|
||||||
|
the new pte/pmd value then you can break memory model like C11 or C++11 for
|
||||||
|
the device.
|
||||||
|
|
||||||
|
Consider the following scenario (device use a feature similar to ATS/PASID):
|
||||||
|
|
||||||
|
Two address addrA and addrB such that |addrA - addrB| >= PAGE_SIZE we assume
|
||||||
|
they are write protected for COW (other case of B apply too).
|
||||||
|
|
||||||
|
[Time N] --------------------------------------------------------------------
|
||||||
|
CPU-thread-0 {try to write to addrA}
|
||||||
|
CPU-thread-1 {try to write to addrB}
|
||||||
|
CPU-thread-2 {}
|
||||||
|
CPU-thread-3 {}
|
||||||
|
DEV-thread-0 {read addrA and populate device TLB}
|
||||||
|
DEV-thread-2 {read addrB and populate device TLB}
|
||||||
|
[Time N+1] ------------------------------------------------------------------
|
||||||
|
CPU-thread-0 {COW_step0: {mmu_notifier_invalidate_range_start(addrA)}}
|
||||||
|
CPU-thread-1 {COW_step0: {mmu_notifier_invalidate_range_start(addrB)}}
|
||||||
|
CPU-thread-2 {}
|
||||||
|
CPU-thread-3 {}
|
||||||
|
DEV-thread-0 {}
|
||||||
|
DEV-thread-2 {}
|
||||||
|
[Time N+2] ------------------------------------------------------------------
|
||||||
|
CPU-thread-0 {COW_step1: {update page table to point to new page for addrA}}
|
||||||
|
CPU-thread-1 {COW_step1: {update page table to point to new page for addrB}}
|
||||||
|
CPU-thread-2 {}
|
||||||
|
CPU-thread-3 {}
|
||||||
|
DEV-thread-0 {}
|
||||||
|
DEV-thread-2 {}
|
||||||
|
[Time N+3] ------------------------------------------------------------------
|
||||||
|
CPU-thread-0 {preempted}
|
||||||
|
CPU-thread-1 {preempted}
|
||||||
|
CPU-thread-2 {write to addrA which is a write to new page}
|
||||||
|
CPU-thread-3 {}
|
||||||
|
DEV-thread-0 {}
|
||||||
|
DEV-thread-2 {}
|
||||||
|
[Time N+3] ------------------------------------------------------------------
|
||||||
|
CPU-thread-0 {preempted}
|
||||||
|
CPU-thread-1 {preempted}
|
||||||
|
CPU-thread-2 {}
|
||||||
|
CPU-thread-3 {write to addrB which is a write to new page}
|
||||||
|
DEV-thread-0 {}
|
||||||
|
DEV-thread-2 {}
|
||||||
|
[Time N+4] ------------------------------------------------------------------
|
||||||
|
CPU-thread-0 {preempted}
|
||||||
|
CPU-thread-1 {COW_step3: {mmu_notifier_invalidate_range_end(addrB)}}
|
||||||
|
CPU-thread-2 {}
|
||||||
|
CPU-thread-3 {}
|
||||||
|
DEV-thread-0 {}
|
||||||
|
DEV-thread-2 {}
|
||||||
|
[Time N+5] ------------------------------------------------------------------
|
||||||
|
CPU-thread-0 {preempted}
|
||||||
|
CPU-thread-1 {}
|
||||||
|
CPU-thread-2 {}
|
||||||
|
CPU-thread-3 {}
|
||||||
|
DEV-thread-0 {read addrA from old page}
|
||||||
|
DEV-thread-2 {read addrB from new page}
|
||||||
|
|
||||||
|
So here because at time N+2 the clear page table entry was not pair with a
|
||||||
|
notification to invalidate the secondary TLB, the device see the new value for
|
||||||
|
addrB before seing the new value for addrA. This break total memory ordering
|
||||||
|
for the device.
|
||||||
|
|
||||||
|
When changing a pte to write protect or to point to a new write protected page
|
||||||
|
with same content (KSM) it is fine to delay the mmu_notifier_invalidate_range
|
||||||
|
call to mmu_notifier_invalidate_range_end() outside the page table lock. This
|
||||||
|
is true even if the thread doing the page table update is preempted right after
|
||||||
|
releasing page table lock but before call mmu_notifier_invalidate_range_end().
|
10
MAINTAINERS
10
MAINTAINERS
|
@ -7692,16 +7692,6 @@ F: include/linux/kdb.h
|
||||||
F: include/linux/kgdb.h
|
F: include/linux/kgdb.h
|
||||||
F: kernel/debug/
|
F: kernel/debug/
|
||||||
|
|
||||||
KMEMCHECK
|
|
||||||
M: Vegard Nossum <vegardno@ifi.uio.no>
|
|
||||||
M: Pekka Enberg <penberg@kernel.org>
|
|
||||||
S: Maintained
|
|
||||||
F: Documentation/dev-tools/kmemcheck.rst
|
|
||||||
F: arch/x86/include/asm/kmemcheck.h
|
|
||||||
F: arch/x86/mm/kmemcheck/
|
|
||||||
F: include/linux/kmemcheck.h
|
|
||||||
F: mm/kmemcheck.c
|
|
||||||
|
|
||||||
KMEMLEAK
|
KMEMLEAK
|
||||||
M: Catalin Marinas <catalin.marinas@arm.com>
|
M: Catalin Marinas <catalin.marinas@arm.com>
|
||||||
S: Maintained
|
S: Maintained
|
||||||
|
|
|
@ -7,7 +7,6 @@
|
||||||
#include <linux/mm_types.h>
|
#include <linux/mm_types.h>
|
||||||
#include <linux/scatterlist.h>
|
#include <linux/scatterlist.h>
|
||||||
#include <linux/dma-debug.h>
|
#include <linux/dma-debug.h>
|
||||||
#include <linux/kmemcheck.h>
|
|
||||||
#include <linux/kref.h>
|
#include <linux/kref.h>
|
||||||
|
|
||||||
#define ARM_MAPPING_ERROR (~(dma_addr_t)0x0)
|
#define ARM_MAPPING_ERROR (~(dma_addr_t)0x0)
|
||||||
|
|
|
@ -57,7 +57,7 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
|
||||||
extern pgd_t *pgd_alloc(struct mm_struct *mm);
|
extern pgd_t *pgd_alloc(struct mm_struct *mm);
|
||||||
extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
|
extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
|
||||||
|
|
||||||
#define PGALLOC_GFP (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO)
|
#define PGALLOC_GFP (GFP_KERNEL | __GFP_ZERO)
|
||||||
|
|
||||||
static inline void clean_pte_table(pte_t *pte)
|
static inline void clean_pte_table(pte_t *pte)
|
||||||
{
|
{
|
||||||
|
|
|
@ -141,7 +141,7 @@ void pgd_free(struct mm_struct *mm, pgd_t *pgd_base)
|
||||||
pte = pmd_pgtable(*pmd);
|
pte = pmd_pgtable(*pmd);
|
||||||
pmd_clear(pmd);
|
pmd_clear(pmd);
|
||||||
pte_free(mm, pte);
|
pte_free(mm, pte);
|
||||||
atomic_long_dec(&mm->nr_ptes);
|
mm_dec_nr_ptes(mm);
|
||||||
no_pmd:
|
no_pmd:
|
||||||
pud_clear(pud);
|
pud_clear(pud);
|
||||||
pmd_free(mm, pmd);
|
pmd_free(mm, pmd);
|
||||||
|
|
|
@ -85,7 +85,7 @@ config ARM64
|
||||||
select HAVE_ARCH_BITREVERSE
|
select HAVE_ARCH_BITREVERSE
|
||||||
select HAVE_ARCH_HUGE_VMAP
|
select HAVE_ARCH_HUGE_VMAP
|
||||||
select HAVE_ARCH_JUMP_LABEL
|
select HAVE_ARCH_JUMP_LABEL
|
||||||
select HAVE_ARCH_KASAN if SPARSEMEM_VMEMMAP && !(ARM64_16K_PAGES && ARM64_VA_BITS_48)
|
select HAVE_ARCH_KASAN if !(ARM64_16K_PAGES && ARM64_VA_BITS_48)
|
||||||
select HAVE_ARCH_KGDB
|
select HAVE_ARCH_KGDB
|
||||||
select HAVE_ARCH_MMAP_RND_BITS
|
select HAVE_ARCH_MMAP_RND_BITS
|
||||||
select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT
|
select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT
|
||||||
|
|
|
@ -26,7 +26,7 @@
|
||||||
|
|
||||||
#define check_pgt_cache() do { } while (0)
|
#define check_pgt_cache() do { } while (0)
|
||||||
|
|
||||||
#define PGALLOC_GFP (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO)
|
#define PGALLOC_GFP (GFP_KERNEL | __GFP_ZERO)
|
||||||
#define PGD_SIZE (PTRS_PER_PGD * sizeof(pgd_t))
|
#define PGD_SIZE (PTRS_PER_PGD * sizeof(pgd_t))
|
||||||
|
|
||||||
#if CONFIG_PGTABLE_LEVELS > 2
|
#if CONFIG_PGTABLE_LEVELS > 2
|
||||||
|
|
|
@ -11,6 +11,7 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#define pr_fmt(fmt) "kasan: " fmt
|
#define pr_fmt(fmt) "kasan: " fmt
|
||||||
|
#include <linux/bootmem.h>
|
||||||
#include <linux/kasan.h>
|
#include <linux/kasan.h>
|
||||||
#include <linux/kernel.h>
|
#include <linux/kernel.h>
|
||||||
#include <linux/sched/task.h>
|
#include <linux/sched/task.h>
|
||||||
|
@ -35,77 +36,117 @@ static pgd_t tmp_pg_dir[PTRS_PER_PGD] __initdata __aligned(PGD_SIZE);
|
||||||
* with the physical address from __pa_symbol.
|
* with the physical address from __pa_symbol.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static void __init kasan_early_pte_populate(pmd_t *pmd, unsigned long addr,
|
static phys_addr_t __init kasan_alloc_zeroed_page(int node)
|
||||||
unsigned long end)
|
{
|
||||||
|
void *p = memblock_virt_alloc_try_nid(PAGE_SIZE, PAGE_SIZE,
|
||||||
|
__pa(MAX_DMA_ADDRESS),
|
||||||
|
MEMBLOCK_ALLOC_ACCESSIBLE, node);
|
||||||
|
return __pa(p);
|
||||||
|
}
|
||||||
|
|
||||||
|
static pte_t *__init kasan_pte_offset(pmd_t *pmd, unsigned long addr, int node,
|
||||||
|
bool early)
|
||||||
|
{
|
||||||
|
if (pmd_none(*pmd)) {
|
||||||
|
phys_addr_t pte_phys = early ? __pa_symbol(kasan_zero_pte)
|
||||||
|
: kasan_alloc_zeroed_page(node);
|
||||||
|
__pmd_populate(pmd, pte_phys, PMD_TYPE_TABLE);
|
||||||
|
}
|
||||||
|
|
||||||
|
return early ? pte_offset_kimg(pmd, addr)
|
||||||
|
: pte_offset_kernel(pmd, addr);
|
||||||
|
}
|
||||||
|
|
||||||
|
static pmd_t *__init kasan_pmd_offset(pud_t *pud, unsigned long addr, int node,
|
||||||
|
bool early)
|
||||||
|
{
|
||||||
|
if (pud_none(*pud)) {
|
||||||
|
phys_addr_t pmd_phys = early ? __pa_symbol(kasan_zero_pmd)
|
||||||
|
: kasan_alloc_zeroed_page(node);
|
||||||
|
__pud_populate(pud, pmd_phys, PMD_TYPE_TABLE);
|
||||||
|
}
|
||||||
|
|
||||||
|
return early ? pmd_offset_kimg(pud, addr) : pmd_offset(pud, addr);
|
||||||
|
}
|
||||||
|
|
||||||
|
static pud_t *__init kasan_pud_offset(pgd_t *pgd, unsigned long addr, int node,
|
||||||
|
bool early)
|
||||||
|
{
|
||||||
|
if (pgd_none(*pgd)) {
|
||||||
|
phys_addr_t pud_phys = early ? __pa_symbol(kasan_zero_pud)
|
||||||
|
: kasan_alloc_zeroed_page(node);
|
||||||
|
__pgd_populate(pgd, pud_phys, PMD_TYPE_TABLE);
|
||||||
|
}
|
||||||
|
|
||||||
|
return early ? pud_offset_kimg(pgd, addr) : pud_offset(pgd, addr);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void __init kasan_pte_populate(pmd_t *pmd, unsigned long addr,
|
||||||
|
unsigned long end, int node, bool early)
|
||||||
{
|
{
|
||||||
pte_t *pte;
|
|
||||||
unsigned long next;
|
unsigned long next;
|
||||||
|
pte_t *pte = kasan_pte_offset(pmd, addr, node, early);
|
||||||
|
|
||||||
if (pmd_none(*pmd))
|
|
||||||
__pmd_populate(pmd, __pa_symbol(kasan_zero_pte), PMD_TYPE_TABLE);
|
|
||||||
|
|
||||||
pte = pte_offset_kimg(pmd, addr);
|
|
||||||
do {
|
do {
|
||||||
|
phys_addr_t page_phys = early ? __pa_symbol(kasan_zero_page)
|
||||||
|
: kasan_alloc_zeroed_page(node);
|
||||||
next = addr + PAGE_SIZE;
|
next = addr + PAGE_SIZE;
|
||||||
set_pte(pte, pfn_pte(sym_to_pfn(kasan_zero_page),
|
set_pte(pte, pfn_pte(__phys_to_pfn(page_phys), PAGE_KERNEL));
|
||||||
PAGE_KERNEL));
|
|
||||||
} while (pte++, addr = next, addr != end && pte_none(*pte));
|
} while (pte++, addr = next, addr != end && pte_none(*pte));
|
||||||
}
|
}
|
||||||
|
|
||||||
static void __init kasan_early_pmd_populate(pud_t *pud,
|
static void __init kasan_pmd_populate(pud_t *pud, unsigned long addr,
|
||||||
unsigned long addr,
|
unsigned long end, int node, bool early)
|
||||||
unsigned long end)
|
|
||||||
{
|
{
|
||||||
pmd_t *pmd;
|
|
||||||
unsigned long next;
|
unsigned long next;
|
||||||
|
pmd_t *pmd = kasan_pmd_offset(pud, addr, node, early);
|
||||||
|
|
||||||
if (pud_none(*pud))
|
|
||||||
__pud_populate(pud, __pa_symbol(kasan_zero_pmd), PMD_TYPE_TABLE);
|
|
||||||
|
|
||||||
pmd = pmd_offset_kimg(pud, addr);
|
|
||||||
do {
|
do {
|
||||||
next = pmd_addr_end(addr, end);
|
next = pmd_addr_end(addr, end);
|
||||||
kasan_early_pte_populate(pmd, addr, next);
|
kasan_pte_populate(pmd, addr, next, node, early);
|
||||||
} while (pmd++, addr = next, addr != end && pmd_none(*pmd));
|
} while (pmd++, addr = next, addr != end && pmd_none(*pmd));
|
||||||
}
|
}
|
||||||
|
|
||||||
static void __init kasan_early_pud_populate(pgd_t *pgd,
|
static void __init kasan_pud_populate(pgd_t *pgd, unsigned long addr,
|
||||||
unsigned long addr,
|
unsigned long end, int node, bool early)
|
||||||
unsigned long end)
|
|
||||||
{
|
{
|
||||||
pud_t *pud;
|
|
||||||
unsigned long next;
|
unsigned long next;
|
||||||
|
pud_t *pud = kasan_pud_offset(pgd, addr, node, early);
|
||||||
|
|
||||||
if (pgd_none(*pgd))
|
|
||||||
__pgd_populate(pgd, __pa_symbol(kasan_zero_pud), PUD_TYPE_TABLE);
|
|
||||||
|
|
||||||
pud = pud_offset_kimg(pgd, addr);
|
|
||||||
do {
|
do {
|
||||||
next = pud_addr_end(addr, end);
|
next = pud_addr_end(addr, end);
|
||||||
kasan_early_pmd_populate(pud, addr, next);
|
kasan_pmd_populate(pud, addr, next, node, early);
|
||||||
} while (pud++, addr = next, addr != end && pud_none(*pud));
|
} while (pud++, addr = next, addr != end && pud_none(*pud));
|
||||||
}
|
}
|
||||||
|
|
||||||
static void __init kasan_map_early_shadow(void)
|
static void __init kasan_pgd_populate(unsigned long addr, unsigned long end,
|
||||||
|
int node, bool early)
|
||||||
{
|
{
|
||||||
unsigned long addr = KASAN_SHADOW_START;
|
|
||||||
unsigned long end = KASAN_SHADOW_END;
|
|
||||||
unsigned long next;
|
unsigned long next;
|
||||||
pgd_t *pgd;
|
pgd_t *pgd;
|
||||||
|
|
||||||
pgd = pgd_offset_k(addr);
|
pgd = pgd_offset_k(addr);
|
||||||
do {
|
do {
|
||||||
next = pgd_addr_end(addr, end);
|
next = pgd_addr_end(addr, end);
|
||||||
kasan_early_pud_populate(pgd, addr, next);
|
kasan_pud_populate(pgd, addr, next, node, early);
|
||||||
} while (pgd++, addr = next, addr != end);
|
} while (pgd++, addr = next, addr != end);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* The early shadow maps everything to a single page of zeroes */
|
||||||
asmlinkage void __init kasan_early_init(void)
|
asmlinkage void __init kasan_early_init(void)
|
||||||
{
|
{
|
||||||
BUILD_BUG_ON(KASAN_SHADOW_OFFSET != KASAN_SHADOW_END - (1UL << 61));
|
BUILD_BUG_ON(KASAN_SHADOW_OFFSET != KASAN_SHADOW_END - (1UL << 61));
|
||||||
BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_START, PGDIR_SIZE));
|
BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_START, PGDIR_SIZE));
|
||||||
BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, PGDIR_SIZE));
|
BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, PGDIR_SIZE));
|
||||||
kasan_map_early_shadow();
|
kasan_pgd_populate(KASAN_SHADOW_START, KASAN_SHADOW_END, NUMA_NO_NODE,
|
||||||
|
true);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Set up full kasan mappings, ensuring that the mapped pages are zeroed */
|
||||||
|
static void __init kasan_map_populate(unsigned long start, unsigned long end,
|
||||||
|
int node)
|
||||||
|
{
|
||||||
|
kasan_pgd_populate(start & PAGE_MASK, PAGE_ALIGN(end), node, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -142,8 +183,8 @@ void __init kasan_init(void)
|
||||||
struct memblock_region *reg;
|
struct memblock_region *reg;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
kimg_shadow_start = (u64)kasan_mem_to_shadow(_text);
|
kimg_shadow_start = (u64)kasan_mem_to_shadow(_text) & PAGE_MASK;
|
||||||
kimg_shadow_end = (u64)kasan_mem_to_shadow(_end);
|
kimg_shadow_end = PAGE_ALIGN((u64)kasan_mem_to_shadow(_end));
|
||||||
|
|
||||||
mod_shadow_start = (u64)kasan_mem_to_shadow((void *)MODULES_VADDR);
|
mod_shadow_start = (u64)kasan_mem_to_shadow((void *)MODULES_VADDR);
|
||||||
mod_shadow_end = (u64)kasan_mem_to_shadow((void *)MODULES_END);
|
mod_shadow_end = (u64)kasan_mem_to_shadow((void *)MODULES_END);
|
||||||
|
@ -161,19 +202,8 @@ void __init kasan_init(void)
|
||||||
|
|
||||||
clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END);
|
clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END);
|
||||||
|
|
||||||
vmemmap_populate(kimg_shadow_start, kimg_shadow_end,
|
kasan_map_populate(kimg_shadow_start, kimg_shadow_end,
|
||||||
pfn_to_nid(virt_to_pfn(lm_alias(_text))));
|
pfn_to_nid(virt_to_pfn(lm_alias(_text))));
|
||||||
|
|
||||||
/*
|
|
||||||
* vmemmap_populate() has populated the shadow region that covers the
|
|
||||||
* kernel image with SWAPPER_BLOCK_SIZE mappings, so we have to round
|
|
||||||
* the start and end addresses to SWAPPER_BLOCK_SIZE as well, to prevent
|
|
||||||
* kasan_populate_zero_shadow() from replacing the page table entries
|
|
||||||
* (PMD or PTE) at the edges of the shadow region for the kernel
|
|
||||||
* image.
|
|
||||||
*/
|
|
||||||
kimg_shadow_start = round_down(kimg_shadow_start, SWAPPER_BLOCK_SIZE);
|
|
||||||
kimg_shadow_end = round_up(kimg_shadow_end, SWAPPER_BLOCK_SIZE);
|
|
||||||
|
|
||||||
kasan_populate_zero_shadow((void *)KASAN_SHADOW_START,
|
kasan_populate_zero_shadow((void *)KASAN_SHADOW_START,
|
||||||
(void *)mod_shadow_start);
|
(void *)mod_shadow_start);
|
||||||
|
@ -191,9 +221,9 @@ void __init kasan_init(void)
|
||||||
if (start >= end)
|
if (start >= end)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
vmemmap_populate((unsigned long)kasan_mem_to_shadow(start),
|
kasan_map_populate((unsigned long)kasan_mem_to_shadow(start),
|
||||||
(unsigned long)kasan_mem_to_shadow(end),
|
(unsigned long)kasan_mem_to_shadow(end),
|
||||||
pfn_to_nid(virt_to_pfn(start)));
|
pfn_to_nid(virt_to_pfn(start)));
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
|
@ -42,21 +42,9 @@
|
||||||
#undef DEBUG
|
#undef DEBUG
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* BAD_PAGE is the page that is used for page faults when linux
|
|
||||||
* is out-of-memory. Older versions of linux just did a
|
|
||||||
* do_exit(), but using this instead means there is less risk
|
|
||||||
* for a process dying in kernel mode, possibly leaving a inode
|
|
||||||
* unused etc..
|
|
||||||
*
|
|
||||||
* BAD_PAGETABLE is the accompanying page-table: it is initialized
|
|
||||||
* to point to BAD_PAGE entries.
|
|
||||||
*
|
|
||||||
* ZERO_PAGE is a special page that is used for zero-initialized
|
* ZERO_PAGE is a special page that is used for zero-initialized
|
||||||
* data and COW.
|
* data and COW.
|
||||||
*/
|
*/
|
||||||
static unsigned long empty_bad_page_table;
|
|
||||||
static unsigned long empty_bad_page;
|
|
||||||
|
|
||||||
unsigned long empty_zero_page;
|
unsigned long empty_zero_page;
|
||||||
EXPORT_SYMBOL(empty_zero_page);
|
EXPORT_SYMBOL(empty_zero_page);
|
||||||
|
|
||||||
|
@ -72,8 +60,6 @@ void __init paging_init(void)
|
||||||
unsigned long zones_size[MAX_NR_ZONES] = {0, };
|
unsigned long zones_size[MAX_NR_ZONES] = {0, };
|
||||||
|
|
||||||
/* allocate some pages for kernel housekeeping tasks */
|
/* allocate some pages for kernel housekeeping tasks */
|
||||||
empty_bad_page_table = (unsigned long) alloc_bootmem_pages(PAGE_SIZE);
|
|
||||||
empty_bad_page = (unsigned long) alloc_bootmem_pages(PAGE_SIZE);
|
|
||||||
empty_zero_page = (unsigned long) alloc_bootmem_pages(PAGE_SIZE);
|
empty_zero_page = (unsigned long) alloc_bootmem_pages(PAGE_SIZE);
|
||||||
|
|
||||||
memset((void *) empty_zero_page, 0, PAGE_SIZE);
|
memset((void *) empty_zero_page, 0, PAGE_SIZE);
|
||||||
|
|
|
@ -40,20 +40,9 @@
|
||||||
#include <asm/sections.h>
|
#include <asm/sections.h>
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* BAD_PAGE is the page that is used for page faults when linux
|
|
||||||
* is out-of-memory. Older versions of linux just did a
|
|
||||||
* do_exit(), but using this instead means there is less risk
|
|
||||||
* for a process dying in kernel mode, possibly leaving a inode
|
|
||||||
* unused etc..
|
|
||||||
*
|
|
||||||
* BAD_PAGETABLE is the accompanying page-table: it is initialized
|
|
||||||
* to point to BAD_PAGE entries.
|
|
||||||
*
|
|
||||||
* ZERO_PAGE is a special page that is used for zero-initialized
|
* ZERO_PAGE is a special page that is used for zero-initialized
|
||||||
* data and COW.
|
* data and COW.
|
||||||
*/
|
*/
|
||||||
static unsigned long empty_bad_page_table;
|
|
||||||
static unsigned long empty_bad_page;
|
|
||||||
unsigned long empty_zero_page;
|
unsigned long empty_zero_page;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -78,8 +67,6 @@ void __init paging_init(void)
|
||||||
* Initialize the bad page table and bad page to point
|
* Initialize the bad page table and bad page to point
|
||||||
* to a couple of allocated pages.
|
* to a couple of allocated pages.
|
||||||
*/
|
*/
|
||||||
empty_bad_page_table = (unsigned long)alloc_bootmem_pages(PAGE_SIZE);
|
|
||||||
empty_bad_page = (unsigned long)alloc_bootmem_pages(PAGE_SIZE);
|
|
||||||
empty_zero_page = (unsigned long)alloc_bootmem_pages(PAGE_SIZE);
|
empty_zero_page = (unsigned long)alloc_bootmem_pages(PAGE_SIZE);
|
||||||
memset((void *)empty_zero_page, 0, PAGE_SIZE);
|
memset((void *)empty_zero_page, 0, PAGE_SIZE);
|
||||||
|
|
||||||
|
|
|
@ -196,8 +196,8 @@ config TIMER_DIVIDE
|
||||||
default "128"
|
default "128"
|
||||||
|
|
||||||
config CPU_BIG_ENDIAN
|
config CPU_BIG_ENDIAN
|
||||||
bool "Generate big endian code"
|
bool
|
||||||
default n
|
default !CPU_LITTLE_ENDIAN
|
||||||
|
|
||||||
config CPU_LITTLE_ENDIAN
|
config CPU_LITTLE_ENDIAN
|
||||||
bool "Generate little endian code"
|
bool "Generate little endian code"
|
||||||
|
|
|
@ -31,12 +31,7 @@
|
||||||
* tables. Each page table is also a single 4K page, giving 512 (==
|
* tables. Each page table is also a single 4K page, giving 512 (==
|
||||||
* PTRS_PER_PTE) 8 byte ptes. Each pud entry is initialized to point to
|
* PTRS_PER_PTE) 8 byte ptes. Each pud entry is initialized to point to
|
||||||
* invalid_pmd_table, each pmd entry is initialized to point to
|
* invalid_pmd_table, each pmd entry is initialized to point to
|
||||||
* invalid_pte_table, each pte is initialized to 0. When memory is low,
|
* invalid_pte_table, each pte is initialized to 0.
|
||||||
* and a pmd table or a page table allocation fails, empty_bad_pmd_table
|
|
||||||
* and empty_bad_page_table is returned back to higher layer code, so
|
|
||||||
* that the failure is recognized later on. Linux does not seem to
|
|
||||||
* handle these failures very well though. The empty_bad_page_table has
|
|
||||||
* invalid pte entries in it, to force page faults.
|
|
||||||
*
|
*
|
||||||
* Kernel mappings: kernel mappings are held in the swapper_pg_table.
|
* Kernel mappings: kernel mappings are held in the swapper_pg_table.
|
||||||
* The layout is identical to userspace except it's indexed with the
|
* The layout is identical to userspace except it's indexed with the
|
||||||
|
@ -175,7 +170,6 @@
|
||||||
printk("%s:%d: bad pgd %016lx.\n", __FILE__, __LINE__, pgd_val(e))
|
printk("%s:%d: bad pgd %016lx.\n", __FILE__, __LINE__, pgd_val(e))
|
||||||
|
|
||||||
extern pte_t invalid_pte_table[PTRS_PER_PTE];
|
extern pte_t invalid_pte_table[PTRS_PER_PTE];
|
||||||
extern pte_t empty_bad_page_table[PTRS_PER_PTE];
|
|
||||||
|
|
||||||
#ifndef __PAGETABLE_PUD_FOLDED
|
#ifndef __PAGETABLE_PUD_FOLDED
|
||||||
/*
|
/*
|
||||||
|
|
|
@ -433,14 +433,6 @@ ENTRY(swapper_pg_dir)
|
||||||
ENTRY(empty_zero_page)
|
ENTRY(empty_zero_page)
|
||||||
.space PAGE_SIZE
|
.space PAGE_SIZE
|
||||||
|
|
||||||
.balign PAGE_SIZE
|
|
||||||
ENTRY(empty_bad_page)
|
|
||||||
.space PAGE_SIZE
|
|
||||||
|
|
||||||
.balign PAGE_SIZE
|
|
||||||
ENTRY(empty_bad_pte_table)
|
|
||||||
.space PAGE_SIZE
|
|
||||||
|
|
||||||
.balign PAGE_SIZE
|
.balign PAGE_SIZE
|
||||||
ENTRY(large_page_table)
|
ENTRY(large_page_table)
|
||||||
.space PAGE_SIZE
|
.space PAGE_SIZE
|
||||||
|
|
|
@ -23,7 +23,6 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <linux/dma-debug.h>
|
#include <linux/dma-debug.h>
|
||||||
#include <linux/kmemcheck.h>
|
|
||||||
#include <linux/dma-mapping.h>
|
#include <linux/dma-mapping.h>
|
||||||
|
|
||||||
extern const struct dma_map_ops or1k_dma_map_ops;
|
extern const struct dma_map_ops or1k_dma_map_ops;
|
||||||
|
|
|
@ -18,7 +18,7 @@ static inline gfp_t pgtable_gfp_flags(struct mm_struct *mm, gfp_t gfp)
|
||||||
}
|
}
|
||||||
#endif /* MODULE */
|
#endif /* MODULE */
|
||||||
|
|
||||||
#define PGALLOC_GFP (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO)
|
#define PGALLOC_GFP (GFP_KERNEL | __GFP_ZERO)
|
||||||
|
|
||||||
#ifdef CONFIG_PPC_BOOK3S
|
#ifdef CONFIG_PPC_BOOK3S
|
||||||
#include <asm/book3s/pgalloc.h>
|
#include <asm/book3s/pgalloc.h>
|
||||||
|
|
|
@ -433,6 +433,7 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
|
||||||
pud = pud_offset(pgd, start);
|
pud = pud_offset(pgd, start);
|
||||||
pgd_clear(pgd);
|
pgd_clear(pgd);
|
||||||
pud_free_tlb(tlb, pud, start);
|
pud_free_tlb(tlb, pud, start);
|
||||||
|
mm_dec_nr_puds(tlb->mm);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
|
@ -200,7 +200,7 @@ static void destroy_pagetable_page(struct mm_struct *mm)
|
||||||
/* We allow PTE_FRAG_NR fragments from a PTE page */
|
/* We allow PTE_FRAG_NR fragments from a PTE page */
|
||||||
if (page_ref_sub_and_test(page, PTE_FRAG_NR - count)) {
|
if (page_ref_sub_and_test(page, PTE_FRAG_NR - count)) {
|
||||||
pgtable_page_dtor(page);
|
pgtable_page_dtor(page);
|
||||||
free_hot_cold_page(page, 0);
|
free_unref_page(page);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -404,7 +404,7 @@ void pte_fragment_free(unsigned long *table, int kernel)
|
||||||
if (put_page_testzero(page)) {
|
if (put_page_testzero(page)) {
|
||||||
if (!kernel)
|
if (!kernel)
|
||||||
pgtable_page_dtor(page);
|
pgtable_page_dtor(page);
|
||||||
free_hot_cold_page(page, 0);
|
free_unref_page(page);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -44,6 +44,8 @@ static inline int init_new_context(struct task_struct *tsk,
|
||||||
mm->context.asce_limit = STACK_TOP_MAX;
|
mm->context.asce_limit = STACK_TOP_MAX;
|
||||||
mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
|
mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
|
||||||
_ASCE_USER_BITS | _ASCE_TYPE_REGION3;
|
_ASCE_USER_BITS | _ASCE_TYPE_REGION3;
|
||||||
|
/* pgd_alloc() did not account this pud */
|
||||||
|
mm_inc_nr_puds(mm);
|
||||||
break;
|
break;
|
||||||
case -PAGE_SIZE:
|
case -PAGE_SIZE:
|
||||||
/* forked 5-level task, set new asce with new_mm->pgd */
|
/* forked 5-level task, set new asce with new_mm->pgd */
|
||||||
|
@ -59,7 +61,7 @@ static inline int init_new_context(struct task_struct *tsk,
|
||||||
/* forked 2-level compat task, set new asce with new mm->pgd */
|
/* forked 2-level compat task, set new asce with new mm->pgd */
|
||||||
mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
|
mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
|
||||||
_ASCE_USER_BITS | _ASCE_TYPE_SEGMENT;
|
_ASCE_USER_BITS | _ASCE_TYPE_SEGMENT;
|
||||||
/* pgd_alloc() did not increase mm->nr_pmds */
|
/* pgd_alloc() did not account this pmd */
|
||||||
mm_inc_nr_pmds(mm);
|
mm_inc_nr_pmds(mm);
|
||||||
}
|
}
|
||||||
crst_table_init((unsigned long *) mm->pgd, pgd_entry_type(mm));
|
crst_table_init((unsigned long *) mm->pgd, pgd_entry_type(mm));
|
||||||
|
|
|
@ -1172,11 +1172,11 @@ static int __init dwarf_unwinder_init(void)
|
||||||
|
|
||||||
dwarf_frame_cachep = kmem_cache_create("dwarf_frames",
|
dwarf_frame_cachep = kmem_cache_create("dwarf_frames",
|
||||||
sizeof(struct dwarf_frame), 0,
|
sizeof(struct dwarf_frame), 0,
|
||||||
SLAB_PANIC | SLAB_HWCACHE_ALIGN | SLAB_NOTRACK, NULL);
|
SLAB_PANIC | SLAB_HWCACHE_ALIGN, NULL);
|
||||||
|
|
||||||
dwarf_reg_cachep = kmem_cache_create("dwarf_regs",
|
dwarf_reg_cachep = kmem_cache_create("dwarf_regs",
|
||||||
sizeof(struct dwarf_reg), 0,
|
sizeof(struct dwarf_reg), 0,
|
||||||
SLAB_PANIC | SLAB_HWCACHE_ALIGN | SLAB_NOTRACK, NULL);
|
SLAB_PANIC | SLAB_HWCACHE_ALIGN, NULL);
|
||||||
|
|
||||||
dwarf_frame_pool = mempool_create_slab_pool(DWARF_FRAME_MIN_REQ,
|
dwarf_frame_pool = mempool_create_slab_pool(DWARF_FRAME_MIN_REQ,
|
||||||
dwarf_frame_cachep);
|
dwarf_frame_cachep);
|
||||||
|
|
|
@ -101,14 +101,6 @@ empty_zero_page:
|
||||||
mmu_pdtp_cache:
|
mmu_pdtp_cache:
|
||||||
.space PAGE_SIZE, 0
|
.space PAGE_SIZE, 0
|
||||||
|
|
||||||
.global empty_bad_page
|
|
||||||
empty_bad_page:
|
|
||||||
.space PAGE_SIZE, 0
|
|
||||||
|
|
||||||
.global empty_bad_pte_table
|
|
||||||
empty_bad_pte_table:
|
|
||||||
.space PAGE_SIZE, 0
|
|
||||||
|
|
||||||
.global fpu_in_use
|
.global fpu_in_use
|
||||||
fpu_in_use: .quad 0
|
fpu_in_use: .quad 0
|
||||||
|
|
||||||
|
|
|
@ -59,7 +59,7 @@ void arch_task_cache_init(void)
|
||||||
|
|
||||||
task_xstate_cachep = kmem_cache_create("task_xstate", xstate_size,
|
task_xstate_cachep = kmem_cache_create("task_xstate", xstate_size,
|
||||||
__alignof__(union thread_xstate),
|
__alignof__(union thread_xstate),
|
||||||
SLAB_PANIC | SLAB_NOTRACK, NULL);
|
SLAB_PANIC, NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef CONFIG_SH_FPU_EMU
|
#ifdef CONFIG_SH_FPU_EMU
|
||||||
|
|
|
@ -231,6 +231,36 @@ extern unsigned long _PAGE_ALL_SZ_BITS;
|
||||||
extern struct page *mem_map_zero;
|
extern struct page *mem_map_zero;
|
||||||
#define ZERO_PAGE(vaddr) (mem_map_zero)
|
#define ZERO_PAGE(vaddr) (mem_map_zero)
|
||||||
|
|
||||||
|
/* This macro must be updated when the size of struct page grows above 80
|
||||||
|
* or reduces below 64.
|
||||||
|
* The idea that compiler optimizes out switch() statement, and only
|
||||||
|
* leaves clrx instructions
|
||||||
|
*/
|
||||||
|
#define mm_zero_struct_page(pp) do { \
|
||||||
|
unsigned long *_pp = (void *)(pp); \
|
||||||
|
\
|
||||||
|
/* Check that struct page is either 64, 72, or 80 bytes */ \
|
||||||
|
BUILD_BUG_ON(sizeof(struct page) & 7); \
|
||||||
|
BUILD_BUG_ON(sizeof(struct page) < 64); \
|
||||||
|
BUILD_BUG_ON(sizeof(struct page) > 80); \
|
||||||
|
\
|
||||||
|
switch (sizeof(struct page)) { \
|
||||||
|
case 80: \
|
||||||
|
_pp[9] = 0; /* fallthrough */ \
|
||||||
|
case 72: \
|
||||||
|
_pp[8] = 0; /* fallthrough */ \
|
||||||
|
default: \
|
||||||
|
_pp[7] = 0; \
|
||||||
|
_pp[6] = 0; \
|
||||||
|
_pp[5] = 0; \
|
||||||
|
_pp[4] = 0; \
|
||||||
|
_pp[3] = 0; \
|
||||||
|
_pp[2] = 0; \
|
||||||
|
_pp[1] = 0; \
|
||||||
|
_pp[0] = 0; \
|
||||||
|
} \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
/* PFNs are real physical page numbers. However, mem_map only begins to record
|
/* PFNs are real physical page numbers. However, mem_map only begins to record
|
||||||
* per-page information starting at pfn_base. This is to handle systems where
|
* per-page information starting at pfn_base. This is to handle systems where
|
||||||
* the first physical page in the machine is at some huge physical address,
|
* the first physical page in the machine is at some huge physical address,
|
||||||
|
|
|
@ -397,7 +397,7 @@ static void hugetlb_free_pte_range(struct mmu_gather *tlb, pmd_t *pmd,
|
||||||
|
|
||||||
pmd_clear(pmd);
|
pmd_clear(pmd);
|
||||||
pte_free_tlb(tlb, token, addr);
|
pte_free_tlb(tlb, token, addr);
|
||||||
atomic_long_dec(&tlb->mm->nr_ptes);
|
mm_dec_nr_ptes(tlb->mm);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
|
static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
|
||||||
|
@ -472,6 +472,7 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
|
||||||
pud = pud_offset(pgd, start);
|
pud = pud_offset(pgd, start);
|
||||||
pgd_clear(pgd);
|
pgd_clear(pgd);
|
||||||
pud_free_tlb(tlb, pud, start);
|
pud_free_tlb(tlb, pud, start);
|
||||||
|
mm_dec_nr_puds(tlb->mm);
|
||||||
}
|
}
|
||||||
|
|
||||||
void hugetlb_free_pgd_range(struct mmu_gather *tlb,
|
void hugetlb_free_pgd_range(struct mmu_gather *tlb,
|
||||||
|
|
|
@ -2540,9 +2540,16 @@ void __init mem_init(void)
|
||||||
{
|
{
|
||||||
high_memory = __va(last_valid_pfn << PAGE_SHIFT);
|
high_memory = __va(last_valid_pfn << PAGE_SHIFT);
|
||||||
|
|
||||||
register_page_bootmem_info();
|
|
||||||
free_all_bootmem();
|
free_all_bootmem();
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Must be done after boot memory is put on freelist, because here we
|
||||||
|
* might set fields in deferred struct pages that have not yet been
|
||||||
|
* initialized, and free_all_bootmem() initializes all the reserved
|
||||||
|
* deferred pages for us.
|
||||||
|
*/
|
||||||
|
register_page_bootmem_info();
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Set up the zero page, mark it reserved, so that page count
|
* Set up the zero page, mark it reserved, so that page count
|
||||||
* is not manipulated when freeing the page from user ptes.
|
* is not manipulated when freeing the page from user ptes.
|
||||||
|
@ -2637,30 +2644,19 @@ int __meminit vmemmap_populate(unsigned long vstart, unsigned long vend,
|
||||||
vstart = vstart & PMD_MASK;
|
vstart = vstart & PMD_MASK;
|
||||||
vend = ALIGN(vend, PMD_SIZE);
|
vend = ALIGN(vend, PMD_SIZE);
|
||||||
for (; vstart < vend; vstart += PMD_SIZE) {
|
for (; vstart < vend; vstart += PMD_SIZE) {
|
||||||
pgd_t *pgd = pgd_offset_k(vstart);
|
pgd_t *pgd = vmemmap_pgd_populate(vstart, node);
|
||||||
unsigned long pte;
|
unsigned long pte;
|
||||||
pud_t *pud;
|
pud_t *pud;
|
||||||
pmd_t *pmd;
|
pmd_t *pmd;
|
||||||
|
|
||||||
if (pgd_none(*pgd)) {
|
if (!pgd)
|
||||||
pud_t *new = vmemmap_alloc_block(PAGE_SIZE, node);
|
return -ENOMEM;
|
||||||
|
|
||||||
if (!new)
|
pud = vmemmap_pud_populate(pgd, vstart, node);
|
||||||
return -ENOMEM;
|
if (!pud)
|
||||||
pgd_populate(&init_mm, pgd, new);
|
return -ENOMEM;
|
||||||
}
|
|
||||||
|
|
||||||
pud = pud_offset(pgd, vstart);
|
|
||||||
if (pud_none(*pud)) {
|
|
||||||
pmd_t *new = vmemmap_alloc_block(PAGE_SIZE, node);
|
|
||||||
|
|
||||||
if (!new)
|
|
||||||
return -ENOMEM;
|
|
||||||
pud_populate(&init_mm, pud, new);
|
|
||||||
}
|
|
||||||
|
|
||||||
pmd = pmd_offset(pud, vstart);
|
pmd = pmd_offset(pud, vstart);
|
||||||
|
|
||||||
pte = pmd_val(*pmd);
|
pte = pmd_val(*pmd);
|
||||||
if (!(pte & _PAGE_VALID)) {
|
if (!(pte & _PAGE_VALID)) {
|
||||||
void *block = vmemmap_alloc_block(PMD_SIZE, node);
|
void *block = vmemmap_alloc_block(PMD_SIZE, node);
|
||||||
|
@ -2927,7 +2923,7 @@ void __flush_tlb_all(void)
|
||||||
pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
|
pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
|
||||||
unsigned long address)
|
unsigned long address)
|
||||||
{
|
{
|
||||||
struct page *page = alloc_page(GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
|
struct page *page = alloc_page(GFP_KERNEL | __GFP_ZERO);
|
||||||
pte_t *pte = NULL;
|
pte_t *pte = NULL;
|
||||||
|
|
||||||
if (page)
|
if (page)
|
||||||
|
@ -2939,11 +2935,11 @@ pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
|
||||||
pgtable_t pte_alloc_one(struct mm_struct *mm,
|
pgtable_t pte_alloc_one(struct mm_struct *mm,
|
||||||
unsigned long address)
|
unsigned long address)
|
||||||
{
|
{
|
||||||
struct page *page = alloc_page(GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
|
struct page *page = alloc_page(GFP_KERNEL | __GFP_ZERO);
|
||||||
if (!page)
|
if (!page)
|
||||||
return NULL;
|
return NULL;
|
||||||
if (!pgtable_page_ctor(page)) {
|
if (!pgtable_page_ctor(page)) {
|
||||||
free_hot_cold_page(page, 0);
|
free_unref_page(page);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
return (pte_t *) page_address(page);
|
return (pte_t *) page_address(page);
|
||||||
|
|
|
@ -409,7 +409,7 @@ void __homecache_free_pages(struct page *page, unsigned int order)
|
||||||
if (put_page_testzero(page)) {
|
if (put_page_testzero(page)) {
|
||||||
homecache_change_page_home(page, order, PAGE_HOME_HASH);
|
homecache_change_page_home(page, order, PAGE_HOME_HASH);
|
||||||
if (order == 0) {
|
if (order == 0) {
|
||||||
free_hot_cold_page(page, false);
|
free_unref_page(page);
|
||||||
} else {
|
} else {
|
||||||
init_page_count(page);
|
init_page_count(page);
|
||||||
__free_pages(page, order);
|
__free_pages(page, order);
|
||||||
|
|
|
@ -22,8 +22,6 @@
|
||||||
/* allocated in paging_init, zeroed in mem_init, and unchanged thereafter */
|
/* allocated in paging_init, zeroed in mem_init, and unchanged thereafter */
|
||||||
unsigned long *empty_zero_page = NULL;
|
unsigned long *empty_zero_page = NULL;
|
||||||
EXPORT_SYMBOL(empty_zero_page);
|
EXPORT_SYMBOL(empty_zero_page);
|
||||||
/* allocated in paging_init and unchanged thereafter */
|
|
||||||
static unsigned long *empty_bad_page = NULL;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Initialized during boot, and readonly for initializing page tables
|
* Initialized during boot, and readonly for initializing page tables
|
||||||
|
@ -146,7 +144,6 @@ void __init paging_init(void)
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
empty_zero_page = (unsigned long *) alloc_bootmem_low_pages(PAGE_SIZE);
|
empty_zero_page = (unsigned long *) alloc_bootmem_low_pages(PAGE_SIZE);
|
||||||
empty_bad_page = (unsigned long *) alloc_bootmem_low_pages(PAGE_SIZE);
|
|
||||||
for (i = 0; i < ARRAY_SIZE(zones_size); i++)
|
for (i = 0; i < ARRAY_SIZE(zones_size); i++)
|
||||||
zones_size[i] = 0;
|
zones_size[i] = 0;
|
||||||
|
|
||||||
|
|
|
@ -28,7 +28,7 @@ extern void free_pgd_slow(struct mm_struct *mm, pgd_t *pgd);
|
||||||
#define pgd_alloc(mm) get_pgd_slow(mm)
|
#define pgd_alloc(mm) get_pgd_slow(mm)
|
||||||
#define pgd_free(mm, pgd) free_pgd_slow(mm, pgd)
|
#define pgd_free(mm, pgd) free_pgd_slow(mm, pgd)
|
||||||
|
|
||||||
#define PGALLOC_GFP (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO)
|
#define PGALLOC_GFP (GFP_KERNEL | __GFP_ZERO)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Allocate one PTE table.
|
* Allocate one PTE table.
|
||||||
|
|
|
@ -97,7 +97,7 @@ void free_pgd_slow(struct mm_struct *mm, pgd_t *pgd)
|
||||||
pte = pmd_pgtable(*pmd);
|
pte = pmd_pgtable(*pmd);
|
||||||
pmd_clear(pmd);
|
pmd_clear(pmd);
|
||||||
pte_free(mm, pte);
|
pte_free(mm, pte);
|
||||||
atomic_long_dec(&mm->nr_ptes);
|
mm_dec_nr_ptes(mm);
|
||||||
pmd_free(mm, pmd);
|
pmd_free(mm, pmd);
|
||||||
mm_dec_nr_pmds(mm);
|
mm_dec_nr_pmds(mm);
|
||||||
free:
|
free:
|
||||||
|
|
|
@ -110,9 +110,8 @@ config X86
|
||||||
select HAVE_ARCH_AUDITSYSCALL
|
select HAVE_ARCH_AUDITSYSCALL
|
||||||
select HAVE_ARCH_HUGE_VMAP if X86_64 || X86_PAE
|
select HAVE_ARCH_HUGE_VMAP if X86_64 || X86_PAE
|
||||||
select HAVE_ARCH_JUMP_LABEL
|
select HAVE_ARCH_JUMP_LABEL
|
||||||
select HAVE_ARCH_KASAN if X86_64 && SPARSEMEM_VMEMMAP
|
select HAVE_ARCH_KASAN if X86_64
|
||||||
select HAVE_ARCH_KGDB
|
select HAVE_ARCH_KGDB
|
||||||
select HAVE_ARCH_KMEMCHECK
|
|
||||||
select HAVE_ARCH_MMAP_RND_BITS if MMU
|
select HAVE_ARCH_MMAP_RND_BITS if MMU
|
||||||
select HAVE_ARCH_MMAP_RND_COMPAT_BITS if MMU && COMPAT
|
select HAVE_ARCH_MMAP_RND_COMPAT_BITS if MMU && COMPAT
|
||||||
select HAVE_ARCH_COMPAT_MMAP_BASES if MMU && COMPAT
|
select HAVE_ARCH_COMPAT_MMAP_BASES if MMU && COMPAT
|
||||||
|
@ -1430,7 +1429,7 @@ config ARCH_DMA_ADDR_T_64BIT
|
||||||
|
|
||||||
config X86_DIRECT_GBPAGES
|
config X86_DIRECT_GBPAGES
|
||||||
def_bool y
|
def_bool y
|
||||||
depends on X86_64 && !DEBUG_PAGEALLOC && !KMEMCHECK
|
depends on X86_64 && !DEBUG_PAGEALLOC
|
||||||
---help---
|
---help---
|
||||||
Certain kernel features effectively disable kernel
|
Certain kernel features effectively disable kernel
|
||||||
linear 1 GB mappings (even if the CPU otherwise
|
linear 1 GB mappings (even if the CPU otherwise
|
||||||
|
|
|
@ -158,11 +158,6 @@ ifdef CONFIG_X86_X32
|
||||||
endif
|
endif
|
||||||
export CONFIG_X86_X32_ABI
|
export CONFIG_X86_X32_ABI
|
||||||
|
|
||||||
# Don't unroll struct assignments with kmemcheck enabled
|
|
||||||
ifeq ($(CONFIG_KMEMCHECK),y)
|
|
||||||
KBUILD_CFLAGS += $(call cc-option,-fno-builtin-memcpy)
|
|
||||||
endif
|
|
||||||
|
|
||||||
#
|
#
|
||||||
# If the function graph tracer is used with mcount instead of fentry,
|
# If the function graph tracer is used with mcount instead of fentry,
|
||||||
# '-maccumulate-outgoing-args' is needed to prevent a GCC bug
|
# '-maccumulate-outgoing-args' is needed to prevent a GCC bug
|
||||||
|
|
|
@ -7,7 +7,6 @@
|
||||||
* Documentation/DMA-API.txt for documentation.
|
* Documentation/DMA-API.txt for documentation.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <linux/kmemcheck.h>
|
|
||||||
#include <linux/scatterlist.h>
|
#include <linux/scatterlist.h>
|
||||||
#include <linux/dma-debug.h>
|
#include <linux/dma-debug.h>
|
||||||
#include <asm/io.h>
|
#include <asm/io.h>
|
||||||
|
|
|
@ -1,43 +1 @@
|
||||||
/* SPDX-License-Identifier: GPL-2.0 */
|
/* SPDX-License-Identifier: GPL-2.0 */
|
||||||
#ifndef ASM_X86_KMEMCHECK_H
|
|
||||||
#define ASM_X86_KMEMCHECK_H
|
|
||||||
|
|
||||||
#include <linux/types.h>
|
|
||||||
#include <asm/ptrace.h>
|
|
||||||
|
|
||||||
#ifdef CONFIG_KMEMCHECK
|
|
||||||
bool kmemcheck_active(struct pt_regs *regs);
|
|
||||||
|
|
||||||
void kmemcheck_show(struct pt_regs *regs);
|
|
||||||
void kmemcheck_hide(struct pt_regs *regs);
|
|
||||||
|
|
||||||
bool kmemcheck_fault(struct pt_regs *regs,
|
|
||||||
unsigned long address, unsigned long error_code);
|
|
||||||
bool kmemcheck_trap(struct pt_regs *regs);
|
|
||||||
#else
|
|
||||||
static inline bool kmemcheck_active(struct pt_regs *regs)
|
|
||||||
{
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void kmemcheck_show(struct pt_regs *regs)
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void kmemcheck_hide(struct pt_regs *regs)
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline bool kmemcheck_fault(struct pt_regs *regs,
|
|
||||||
unsigned long address, unsigned long error_code)
|
|
||||||
{
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline bool kmemcheck_trap(struct pt_regs *regs)
|
|
||||||
{
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
#endif /* CONFIG_KMEMCHECK */
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
|
@ -667,11 +667,6 @@ static inline bool pte_accessible(struct mm_struct *mm, pte_t a)
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int pte_hidden(pte_t pte)
|
|
||||||
{
|
|
||||||
return pte_flags(pte) & _PAGE_HIDDEN;
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline int pmd_present(pmd_t pmd)
|
static inline int pmd_present(pmd_t pmd)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
|
|
|
@ -32,7 +32,6 @@
|
||||||
|
|
||||||
#define _PAGE_BIT_SPECIAL _PAGE_BIT_SOFTW1
|
#define _PAGE_BIT_SPECIAL _PAGE_BIT_SOFTW1
|
||||||
#define _PAGE_BIT_CPA_TEST _PAGE_BIT_SOFTW1
|
#define _PAGE_BIT_CPA_TEST _PAGE_BIT_SOFTW1
|
||||||
#define _PAGE_BIT_HIDDEN _PAGE_BIT_SOFTW3 /* hidden by kmemcheck */
|
|
||||||
#define _PAGE_BIT_SOFT_DIRTY _PAGE_BIT_SOFTW3 /* software dirty tracking */
|
#define _PAGE_BIT_SOFT_DIRTY _PAGE_BIT_SOFTW3 /* software dirty tracking */
|
||||||
#define _PAGE_BIT_DEVMAP _PAGE_BIT_SOFTW4
|
#define _PAGE_BIT_DEVMAP _PAGE_BIT_SOFTW4
|
||||||
|
|
||||||
|
@ -79,18 +78,6 @@
|
||||||
#define _PAGE_KNL_ERRATUM_MASK 0
|
#define _PAGE_KNL_ERRATUM_MASK 0
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef CONFIG_KMEMCHECK
|
|
||||||
#define _PAGE_HIDDEN (_AT(pteval_t, 1) << _PAGE_BIT_HIDDEN)
|
|
||||||
#else
|
|
||||||
#define _PAGE_HIDDEN (_AT(pteval_t, 0))
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/*
|
|
||||||
* The same hidden bit is used by kmemcheck, but since kmemcheck
|
|
||||||
* works on kernel pages while soft-dirty engine on user space,
|
|
||||||
* they do not conflict with each other.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifdef CONFIG_MEM_SOFT_DIRTY
|
#ifdef CONFIG_MEM_SOFT_DIRTY
|
||||||
#define _PAGE_SOFT_DIRTY (_AT(pteval_t, 1) << _PAGE_BIT_SOFT_DIRTY)
|
#define _PAGE_SOFT_DIRTY (_AT(pteval_t, 1) << _PAGE_BIT_SOFT_DIRTY)
|
||||||
#else
|
#else
|
||||||
|
|
|
@ -179,8 +179,6 @@ static inline void *__memcpy3d(void *to, const void *from, size_t len)
|
||||||
* No 3D Now!
|
* No 3D Now!
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef CONFIG_KMEMCHECK
|
|
||||||
|
|
||||||
#if (__GNUC__ >= 4)
|
#if (__GNUC__ >= 4)
|
||||||
#define memcpy(t, f, n) __builtin_memcpy(t, f, n)
|
#define memcpy(t, f, n) __builtin_memcpy(t, f, n)
|
||||||
#else
|
#else
|
||||||
|
@ -189,13 +187,6 @@ static inline void *__memcpy3d(void *to, const void *from, size_t len)
|
||||||
? __constant_memcpy((t), (f), (n)) \
|
? __constant_memcpy((t), (f), (n)) \
|
||||||
: __memcpy((t), (f), (n)))
|
: __memcpy((t), (f), (n)))
|
||||||
#endif
|
#endif
|
||||||
#else
|
|
||||||
/*
|
|
||||||
* kmemcheck becomes very happy if we use the REP instructions unconditionally,
|
|
||||||
* because it means that we know both memory operands in advance.
|
|
||||||
*/
|
|
||||||
#define memcpy(t, f, n) __memcpy((t), (f), (n))
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
#endif /* !CONFIG_FORTIFY_SOURCE */
|
#endif /* !CONFIG_FORTIFY_SOURCE */
|
||||||
|
|
|
@ -33,7 +33,6 @@ extern void *memcpy(void *to, const void *from, size_t len);
|
||||||
extern void *__memcpy(void *to, const void *from, size_t len);
|
extern void *__memcpy(void *to, const void *from, size_t len);
|
||||||
|
|
||||||
#ifndef CONFIG_FORTIFY_SOURCE
|
#ifndef CONFIG_FORTIFY_SOURCE
|
||||||
#ifndef CONFIG_KMEMCHECK
|
|
||||||
#if (__GNUC__ == 4 && __GNUC_MINOR__ < 3) || __GNUC__ < 4
|
#if (__GNUC__ == 4 && __GNUC_MINOR__ < 3) || __GNUC__ < 4
|
||||||
#define memcpy(dst, src, len) \
|
#define memcpy(dst, src, len) \
|
||||||
({ \
|
({ \
|
||||||
|
@ -46,13 +45,6 @@ extern void *__memcpy(void *to, const void *from, size_t len);
|
||||||
__ret; \
|
__ret; \
|
||||||
})
|
})
|
||||||
#endif
|
#endif
|
||||||
#else
|
|
||||||
/*
|
|
||||||
* kmemcheck becomes very happy if we use the REP instructions unconditionally,
|
|
||||||
* because it means that we know both memory operands in advance.
|
|
||||||
*/
|
|
||||||
#define memcpy(dst, src, len) __inline_memcpy((dst), (src), (len))
|
|
||||||
#endif
|
|
||||||
#endif /* !CONFIG_FORTIFY_SOURCE */
|
#endif /* !CONFIG_FORTIFY_SOURCE */
|
||||||
|
|
||||||
#define __HAVE_ARCH_MEMSET
|
#define __HAVE_ARCH_MEMSET
|
||||||
|
|
|
@ -1,7 +1,4 @@
|
||||||
#ifdef CONFIG_KMEMCHECK
|
#ifndef _ASM_X86_XOR_H
|
||||||
/* kmemcheck doesn't handle MMX/SSE/SSE2 instructions */
|
|
||||||
# include <asm-generic/xor.h>
|
|
||||||
#elif !defined(_ASM_X86_XOR_H)
|
|
||||||
#define _ASM_X86_XOR_H
|
#define _ASM_X86_XOR_H
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
|
@ -187,21 +187,6 @@ static void early_init_intel(struct cpuinfo_x86 *c)
|
||||||
if (c->x86 == 6 && c->x86_model < 15)
|
if (c->x86 == 6 && c->x86_model < 15)
|
||||||
clear_cpu_cap(c, X86_FEATURE_PAT);
|
clear_cpu_cap(c, X86_FEATURE_PAT);
|
||||||
|
|
||||||
#ifdef CONFIG_KMEMCHECK
|
|
||||||
/*
|
|
||||||
* P4s have a "fast strings" feature which causes single-
|
|
||||||
* stepping REP instructions to only generate a #DB on
|
|
||||||
* cache-line boundaries.
|
|
||||||
*
|
|
||||||
* Ingo Molnar reported a Pentium D (model 6) and a Xeon
|
|
||||||
* (model 2) with the same problem.
|
|
||||||
*/
|
|
||||||
if (c->x86 == 15)
|
|
||||||
if (msr_clear_bit(MSR_IA32_MISC_ENABLE,
|
|
||||||
MSR_IA32_MISC_ENABLE_FAST_STRING_BIT) > 0)
|
|
||||||
pr_info("kmemcheck: Disabling fast string operations\n");
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If fast string is not enabled in IA32_MISC_ENABLE for any reason,
|
* If fast string is not enabled in IA32_MISC_ENABLE for any reason,
|
||||||
* clear the fast string and enhanced fast string CPU capabilities.
|
* clear the fast string and enhanced fast string CPU capabilities.
|
||||||
|
|
|
@ -57,7 +57,7 @@
|
||||||
# error "Need more virtual address space for the ESPFIX hack"
|
# error "Need more virtual address space for the ESPFIX hack"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define PGALLOC_GFP (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO)
|
#define PGALLOC_GFP (GFP_KERNEL | __GFP_ZERO)
|
||||||
|
|
||||||
/* This contains the *bottom* address of the espfix stack */
|
/* This contains the *bottom* address of the espfix stack */
|
||||||
DEFINE_PER_CPU_READ_MOSTLY(unsigned long, espfix_stack);
|
DEFINE_PER_CPU_READ_MOSTLY(unsigned long, espfix_stack);
|
||||||
|
|
|
@ -42,7 +42,6 @@
|
||||||
#include <linux/edac.h>
|
#include <linux/edac.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#include <asm/kmemcheck.h>
|
|
||||||
#include <asm/stacktrace.h>
|
#include <asm/stacktrace.h>
|
||||||
#include <asm/processor.h>
|
#include <asm/processor.h>
|
||||||
#include <asm/debugreg.h>
|
#include <asm/debugreg.h>
|
||||||
|
@ -749,10 +748,6 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code)
|
||||||
if (!dr6 && user_mode(regs))
|
if (!dr6 && user_mode(regs))
|
||||||
user_icebp = 1;
|
user_icebp = 1;
|
||||||
|
|
||||||
/* Catch kmemcheck conditions! */
|
|
||||||
if ((dr6 & DR_STEP) && kmemcheck_trap(regs))
|
|
||||||
goto exit;
|
|
||||||
|
|
||||||
/* Store the virtualized DR6 value */
|
/* Store the virtualized DR6 value */
|
||||||
tsk->thread.debugreg6 = dr6;
|
tsk->thread.debugreg6 = dr6;
|
||||||
|
|
||||||
|
|
|
@ -29,8 +29,6 @@ obj-$(CONFIG_X86_PTDUMP) += debug_pagetables.o
|
||||||
|
|
||||||
obj-$(CONFIG_HIGHMEM) += highmem_32.o
|
obj-$(CONFIG_HIGHMEM) += highmem_32.o
|
||||||
|
|
||||||
obj-$(CONFIG_KMEMCHECK) += kmemcheck/
|
|
||||||
|
|
||||||
KASAN_SANITIZE_kasan_init_$(BITS).o := n
|
KASAN_SANITIZE_kasan_init_$(BITS).o := n
|
||||||
obj-$(CONFIG_KASAN) += kasan_init_$(BITS).o
|
obj-$(CONFIG_KASAN) += kasan_init_$(BITS).o
|
||||||
|
|
||||||
|
|
|
@ -20,7 +20,6 @@
|
||||||
#include <asm/cpufeature.h> /* boot_cpu_has, ... */
|
#include <asm/cpufeature.h> /* boot_cpu_has, ... */
|
||||||
#include <asm/traps.h> /* dotraplinkage, ... */
|
#include <asm/traps.h> /* dotraplinkage, ... */
|
||||||
#include <asm/pgalloc.h> /* pgd_*(), ... */
|
#include <asm/pgalloc.h> /* pgd_*(), ... */
|
||||||
#include <asm/kmemcheck.h> /* kmemcheck_*(), ... */
|
|
||||||
#include <asm/fixmap.h> /* VSYSCALL_ADDR */
|
#include <asm/fixmap.h> /* VSYSCALL_ADDR */
|
||||||
#include <asm/vsyscall.h> /* emulate_vsyscall */
|
#include <asm/vsyscall.h> /* emulate_vsyscall */
|
||||||
#include <asm/vm86.h> /* struct vm86 */
|
#include <asm/vm86.h> /* struct vm86 */
|
||||||
|
@ -1256,8 +1255,6 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
|
||||||
* Detect and handle instructions that would cause a page fault for
|
* Detect and handle instructions that would cause a page fault for
|
||||||
* both a tracked kernel page and a userspace page.
|
* both a tracked kernel page and a userspace page.
|
||||||
*/
|
*/
|
||||||
if (kmemcheck_active(regs))
|
|
||||||
kmemcheck_hide(regs);
|
|
||||||
prefetchw(&mm->mmap_sem);
|
prefetchw(&mm->mmap_sem);
|
||||||
|
|
||||||
if (unlikely(kmmio_fault(regs, address)))
|
if (unlikely(kmmio_fault(regs, address)))
|
||||||
|
@ -1280,9 +1277,6 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
|
||||||
if (!(error_code & (X86_PF_RSVD | X86_PF_USER | X86_PF_PROT))) {
|
if (!(error_code & (X86_PF_RSVD | X86_PF_USER | X86_PF_PROT))) {
|
||||||
if (vmalloc_fault(address) >= 0)
|
if (vmalloc_fault(address) >= 0)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
if (kmemcheck_fault(regs, address, error_code))
|
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Can handle a stale RO->RW TLB: */
|
/* Can handle a stale RO->RW TLB: */
|
||||||
|
|
|
@ -92,8 +92,7 @@ __ref void *alloc_low_pages(unsigned int num)
|
||||||
unsigned int order;
|
unsigned int order;
|
||||||
|
|
||||||
order = get_order((unsigned long)num << PAGE_SHIFT);
|
order = get_order((unsigned long)num << PAGE_SHIFT);
|
||||||
return (void *)__get_free_pages(GFP_ATOMIC | __GFP_NOTRACK |
|
return (void *)__get_free_pages(GFP_ATOMIC | __GFP_ZERO, order);
|
||||||
__GFP_ZERO, order);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((pgt_buf_end + num) > pgt_buf_top || !can_use_brk_pgt) {
|
if ((pgt_buf_end + num) > pgt_buf_top || !can_use_brk_pgt) {
|
||||||
|
@ -164,12 +163,11 @@ static int page_size_mask;
|
||||||
static void __init probe_page_size_mask(void)
|
static void __init probe_page_size_mask(void)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* For CONFIG_KMEMCHECK or pagealloc debugging, identity mapping will
|
* For pagealloc debugging, identity mapping will use small pages.
|
||||||
* use small pages.
|
|
||||||
* This will simplify cpa(), which otherwise needs to support splitting
|
* This will simplify cpa(), which otherwise needs to support splitting
|
||||||
* large pages into small in interrupt context, etc.
|
* large pages into small in interrupt context, etc.
|
||||||
*/
|
*/
|
||||||
if (boot_cpu_has(X86_FEATURE_PSE) && !debug_pagealloc_enabled() && !IS_ENABLED(CONFIG_KMEMCHECK))
|
if (boot_cpu_has(X86_FEATURE_PSE) && !debug_pagealloc_enabled())
|
||||||
page_size_mask |= 1 << PG_LEVEL_2M;
|
page_size_mask |= 1 << PG_LEVEL_2M;
|
||||||
else
|
else
|
||||||
direct_gbpages = 0;
|
direct_gbpages = 0;
|
||||||
|
|
|
@ -184,7 +184,7 @@ static __ref void *spp_getpage(void)
|
||||||
void *ptr;
|
void *ptr;
|
||||||
|
|
||||||
if (after_bootmem)
|
if (after_bootmem)
|
||||||
ptr = (void *) get_zeroed_page(GFP_ATOMIC | __GFP_NOTRACK);
|
ptr = (void *) get_zeroed_page(GFP_ATOMIC);
|
||||||
else
|
else
|
||||||
ptr = alloc_bootmem_pages(PAGE_SIZE);
|
ptr = alloc_bootmem_pages(PAGE_SIZE);
|
||||||
|
|
||||||
|
@ -1173,12 +1173,18 @@ void __init mem_init(void)
|
||||||
|
|
||||||
/* clear_bss() already clear the empty_zero_page */
|
/* clear_bss() already clear the empty_zero_page */
|
||||||
|
|
||||||
register_page_bootmem_info();
|
|
||||||
|
|
||||||
/* this will put all memory onto the freelists */
|
/* this will put all memory onto the freelists */
|
||||||
free_all_bootmem();
|
free_all_bootmem();
|
||||||
after_bootmem = 1;
|
after_bootmem = 1;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Must be done after boot memory is put on freelist, because here we
|
||||||
|
* might set fields in deferred struct pages that have not yet been
|
||||||
|
* initialized, and free_all_bootmem() initializes all the reserved
|
||||||
|
* deferred pages for us.
|
||||||
|
*/
|
||||||
|
register_page_bootmem_info();
|
||||||
|
|
||||||
/* Register memory areas for /proc/kcore */
|
/* Register memory areas for /proc/kcore */
|
||||||
kclist_add(&kcore_vsyscall, (void *)VSYSCALL_ADDR,
|
kclist_add(&kcore_vsyscall, (void *)VSYSCALL_ADDR,
|
||||||
PAGE_SIZE, KCORE_OTHER);
|
PAGE_SIZE, KCORE_OTHER);
|
||||||
|
@ -1399,7 +1405,6 @@ static int __meminit vmemmap_populate_hugepages(unsigned long start,
|
||||||
vmemmap_verify((pte_t *)pmd, node, addr, next);
|
vmemmap_verify((pte_t *)pmd, node, addr, next);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
pr_warn_once("vmemmap: falling back to regular page backing\n");
|
|
||||||
if (vmemmap_populate_basepages(addr, next, node))
|
if (vmemmap_populate_basepages(addr, next, node))
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
}
|
}
|
||||||
|
|
|
@ -4,12 +4,14 @@
|
||||||
#include <linux/bootmem.h>
|
#include <linux/bootmem.h>
|
||||||
#include <linux/kasan.h>
|
#include <linux/kasan.h>
|
||||||
#include <linux/kdebug.h>
|
#include <linux/kdebug.h>
|
||||||
|
#include <linux/memblock.h>
|
||||||
#include <linux/mm.h>
|
#include <linux/mm.h>
|
||||||
#include <linux/sched.h>
|
#include <linux/sched.h>
|
||||||
#include <linux/sched/task.h>
|
#include <linux/sched/task.h>
|
||||||
#include <linux/vmalloc.h>
|
#include <linux/vmalloc.h>
|
||||||
|
|
||||||
#include <asm/e820/types.h>
|
#include <asm/e820/types.h>
|
||||||
|
#include <asm/pgalloc.h>
|
||||||
#include <asm/tlbflush.h>
|
#include <asm/tlbflush.h>
|
||||||
#include <asm/sections.h>
|
#include <asm/sections.h>
|
||||||
#include <asm/pgtable.h>
|
#include <asm/pgtable.h>
|
||||||
|
@ -18,7 +20,134 @@ extern struct range pfn_mapped[E820_MAX_ENTRIES];
|
||||||
|
|
||||||
static p4d_t tmp_p4d_table[PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE);
|
static p4d_t tmp_p4d_table[PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE);
|
||||||
|
|
||||||
static int __init map_range(struct range *range)
|
static __init void *early_alloc(size_t size, int nid)
|
||||||
|
{
|
||||||
|
return memblock_virt_alloc_try_nid_nopanic(size, size,
|
||||||
|
__pa(MAX_DMA_ADDRESS), BOOTMEM_ALLOC_ACCESSIBLE, nid);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void __init kasan_populate_pmd(pmd_t *pmd, unsigned long addr,
|
||||||
|
unsigned long end, int nid)
|
||||||
|
{
|
||||||
|
pte_t *pte;
|
||||||
|
|
||||||
|
if (pmd_none(*pmd)) {
|
||||||
|
void *p;
|
||||||
|
|
||||||
|
if (boot_cpu_has(X86_FEATURE_PSE) &&
|
||||||
|
((end - addr) == PMD_SIZE) &&
|
||||||
|
IS_ALIGNED(addr, PMD_SIZE)) {
|
||||||
|
p = early_alloc(PMD_SIZE, nid);
|
||||||
|
if (p && pmd_set_huge(pmd, __pa(p), PAGE_KERNEL))
|
||||||
|
return;
|
||||||
|
else if (p)
|
||||||
|
memblock_free(__pa(p), PMD_SIZE);
|
||||||
|
}
|
||||||
|
|
||||||
|
p = early_alloc(PAGE_SIZE, nid);
|
||||||
|
pmd_populate_kernel(&init_mm, pmd, p);
|
||||||
|
}
|
||||||
|
|
||||||
|
pte = pte_offset_kernel(pmd, addr);
|
||||||
|
do {
|
||||||
|
pte_t entry;
|
||||||
|
void *p;
|
||||||
|
|
||||||
|
if (!pte_none(*pte))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
p = early_alloc(PAGE_SIZE, nid);
|
||||||
|
entry = pfn_pte(PFN_DOWN(__pa(p)), PAGE_KERNEL);
|
||||||
|
set_pte_at(&init_mm, addr, pte, entry);
|
||||||
|
} while (pte++, addr += PAGE_SIZE, addr != end);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void __init kasan_populate_pud(pud_t *pud, unsigned long addr,
|
||||||
|
unsigned long end, int nid)
|
||||||
|
{
|
||||||
|
pmd_t *pmd;
|
||||||
|
unsigned long next;
|
||||||
|
|
||||||
|
if (pud_none(*pud)) {
|
||||||
|
void *p;
|
||||||
|
|
||||||
|
if (boot_cpu_has(X86_FEATURE_GBPAGES) &&
|
||||||
|
((end - addr) == PUD_SIZE) &&
|
||||||
|
IS_ALIGNED(addr, PUD_SIZE)) {
|
||||||
|
p = early_alloc(PUD_SIZE, nid);
|
||||||
|
if (p && pud_set_huge(pud, __pa(p), PAGE_KERNEL))
|
||||||
|
return;
|
||||||
|
else if (p)
|
||||||
|
memblock_free(__pa(p), PUD_SIZE);
|
||||||
|
}
|
||||||
|
|
||||||
|
p = early_alloc(PAGE_SIZE, nid);
|
||||||
|
pud_populate(&init_mm, pud, p);
|
||||||
|
}
|
||||||
|
|
||||||
|
pmd = pmd_offset(pud, addr);
|
||||||
|
do {
|
||||||
|
next = pmd_addr_end(addr, end);
|
||||||
|
if (!pmd_large(*pmd))
|
||||||
|
kasan_populate_pmd(pmd, addr, next, nid);
|
||||||
|
} while (pmd++, addr = next, addr != end);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void __init kasan_populate_p4d(p4d_t *p4d, unsigned long addr,
|
||||||
|
unsigned long end, int nid)
|
||||||
|
{
|
||||||
|
pud_t *pud;
|
||||||
|
unsigned long next;
|
||||||
|
|
||||||
|
if (p4d_none(*p4d)) {
|
||||||
|
void *p = early_alloc(PAGE_SIZE, nid);
|
||||||
|
|
||||||
|
p4d_populate(&init_mm, p4d, p);
|
||||||
|
}
|
||||||
|
|
||||||
|
pud = pud_offset(p4d, addr);
|
||||||
|
do {
|
||||||
|
next = pud_addr_end(addr, end);
|
||||||
|
if (!pud_large(*pud))
|
||||||
|
kasan_populate_pud(pud, addr, next, nid);
|
||||||
|
} while (pud++, addr = next, addr != end);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void __init kasan_populate_pgd(pgd_t *pgd, unsigned long addr,
|
||||||
|
unsigned long end, int nid)
|
||||||
|
{
|
||||||
|
void *p;
|
||||||
|
p4d_t *p4d;
|
||||||
|
unsigned long next;
|
||||||
|
|
||||||
|
if (pgd_none(*pgd)) {
|
||||||
|
p = early_alloc(PAGE_SIZE, nid);
|
||||||
|
pgd_populate(&init_mm, pgd, p);
|
||||||
|
}
|
||||||
|
|
||||||
|
p4d = p4d_offset(pgd, addr);
|
||||||
|
do {
|
||||||
|
next = p4d_addr_end(addr, end);
|
||||||
|
kasan_populate_p4d(p4d, addr, next, nid);
|
||||||
|
} while (p4d++, addr = next, addr != end);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void __init kasan_populate_shadow(unsigned long addr, unsigned long end,
|
||||||
|
int nid)
|
||||||
|
{
|
||||||
|
pgd_t *pgd;
|
||||||
|
unsigned long next;
|
||||||
|
|
||||||
|
addr = addr & PAGE_MASK;
|
||||||
|
end = round_up(end, PAGE_SIZE);
|
||||||
|
pgd = pgd_offset_k(addr);
|
||||||
|
do {
|
||||||
|
next = pgd_addr_end(addr, end);
|
||||||
|
kasan_populate_pgd(pgd, addr, next, nid);
|
||||||
|
} while (pgd++, addr = next, addr != end);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void __init map_range(struct range *range)
|
||||||
{
|
{
|
||||||
unsigned long start;
|
unsigned long start;
|
||||||
unsigned long end;
|
unsigned long end;
|
||||||
|
@ -26,7 +155,7 @@ static int __init map_range(struct range *range)
|
||||||
start = (unsigned long)kasan_mem_to_shadow(pfn_to_kaddr(range->start));
|
start = (unsigned long)kasan_mem_to_shadow(pfn_to_kaddr(range->start));
|
||||||
end = (unsigned long)kasan_mem_to_shadow(pfn_to_kaddr(range->end));
|
end = (unsigned long)kasan_mem_to_shadow(pfn_to_kaddr(range->end));
|
||||||
|
|
||||||
return vmemmap_populate(start, end, NUMA_NO_NODE);
|
kasan_populate_shadow(start, end, early_pfn_to_nid(range->start));
|
||||||
}
|
}
|
||||||
|
|
||||||
static void __init clear_pgds(unsigned long start,
|
static void __init clear_pgds(unsigned long start,
|
||||||
|
@ -189,16 +318,16 @@ void __init kasan_init(void)
|
||||||
if (pfn_mapped[i].end == 0)
|
if (pfn_mapped[i].end == 0)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
if (map_range(&pfn_mapped[i]))
|
map_range(&pfn_mapped[i]);
|
||||||
panic("kasan: unable to allocate shadow!");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
kasan_populate_zero_shadow(
|
kasan_populate_zero_shadow(
|
||||||
kasan_mem_to_shadow((void *)PAGE_OFFSET + MAXMEM),
|
kasan_mem_to_shadow((void *)PAGE_OFFSET + MAXMEM),
|
||||||
kasan_mem_to_shadow((void *)__START_KERNEL_map));
|
kasan_mem_to_shadow((void *)__START_KERNEL_map));
|
||||||
|
|
||||||
vmemmap_populate((unsigned long)kasan_mem_to_shadow(_stext),
|
kasan_populate_shadow((unsigned long)kasan_mem_to_shadow(_stext),
|
||||||
(unsigned long)kasan_mem_to_shadow(_end),
|
(unsigned long)kasan_mem_to_shadow(_end),
|
||||||
NUMA_NO_NODE);
|
early_pfn_to_nid(__pa(_stext)));
|
||||||
|
|
||||||
kasan_populate_zero_shadow(kasan_mem_to_shadow((void *)MODULES_END),
|
kasan_populate_zero_shadow(kasan_mem_to_shadow((void *)MODULES_END),
|
||||||
(void *)KASAN_SHADOW_END);
|
(void *)KASAN_SHADOW_END);
|
||||||
|
|
|
@ -1 +0,0 @@
|
||||||
obj-y := error.o kmemcheck.o opcode.o pte.o selftest.o shadow.o
|
|
|
@ -1,228 +1 @@
|
||||||
// SPDX-License-Identifier: GPL-2.0
|
// SPDX-License-Identifier: GPL-2.0
|
||||||
#include <linux/interrupt.h>
|
|
||||||
#include <linux/kdebug.h>
|
|
||||||
#include <linux/kmemcheck.h>
|
|
||||||
#include <linux/kernel.h>
|
|
||||||
#include <linux/types.h>
|
|
||||||
#include <linux/ptrace.h>
|
|
||||||
#include <linux/stacktrace.h>
|
|
||||||
#include <linux/string.h>
|
|
||||||
|
|
||||||
#include "error.h"
|
|
||||||
#include "shadow.h"
|
|
||||||
|
|
||||||
enum kmemcheck_error_type {
|
|
||||||
KMEMCHECK_ERROR_INVALID_ACCESS,
|
|
||||||
KMEMCHECK_ERROR_BUG,
|
|
||||||
};
|
|
||||||
|
|
||||||
#define SHADOW_COPY_SIZE (1 << CONFIG_KMEMCHECK_SHADOW_COPY_SHIFT)
|
|
||||||
|
|
||||||
struct kmemcheck_error {
|
|
||||||
enum kmemcheck_error_type type;
|
|
||||||
|
|
||||||
union {
|
|
||||||
/* KMEMCHECK_ERROR_INVALID_ACCESS */
|
|
||||||
struct {
|
|
||||||
/* Kind of access that caused the error */
|
|
||||||
enum kmemcheck_shadow state;
|
|
||||||
/* Address and size of the erroneous read */
|
|
||||||
unsigned long address;
|
|
||||||
unsigned int size;
|
|
||||||
};
|
|
||||||
};
|
|
||||||
|
|
||||||
struct pt_regs regs;
|
|
||||||
struct stack_trace trace;
|
|
||||||
unsigned long trace_entries[32];
|
|
||||||
|
|
||||||
/* We compress it to a char. */
|
|
||||||
unsigned char shadow_copy[SHADOW_COPY_SIZE];
|
|
||||||
unsigned char memory_copy[SHADOW_COPY_SIZE];
|
|
||||||
};
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Create a ring queue of errors to output. We can't call printk() directly
|
|
||||||
* from the kmemcheck traps, since this may call the console drivers and
|
|
||||||
* result in a recursive fault.
|
|
||||||
*/
|
|
||||||
static struct kmemcheck_error error_fifo[CONFIG_KMEMCHECK_QUEUE_SIZE];
|
|
||||||
static unsigned int error_count;
|
|
||||||
static unsigned int error_rd;
|
|
||||||
static unsigned int error_wr;
|
|
||||||
static unsigned int error_missed_count;
|
|
||||||
|
|
||||||
static struct kmemcheck_error *error_next_wr(void)
|
|
||||||
{
|
|
||||||
struct kmemcheck_error *e;
|
|
||||||
|
|
||||||
if (error_count == ARRAY_SIZE(error_fifo)) {
|
|
||||||
++error_missed_count;
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
e = &error_fifo[error_wr];
|
|
||||||
if (++error_wr == ARRAY_SIZE(error_fifo))
|
|
||||||
error_wr = 0;
|
|
||||||
++error_count;
|
|
||||||
return e;
|
|
||||||
}
|
|
||||||
|
|
||||||
static struct kmemcheck_error *error_next_rd(void)
|
|
||||||
{
|
|
||||||
struct kmemcheck_error *e;
|
|
||||||
|
|
||||||
if (error_count == 0)
|
|
||||||
return NULL;
|
|
||||||
|
|
||||||
e = &error_fifo[error_rd];
|
|
||||||
if (++error_rd == ARRAY_SIZE(error_fifo))
|
|
||||||
error_rd = 0;
|
|
||||||
--error_count;
|
|
||||||
return e;
|
|
||||||
}
|
|
||||||
|
|
||||||
void kmemcheck_error_recall(void)
|
|
||||||
{
|
|
||||||
static const char *desc[] = {
|
|
||||||
[KMEMCHECK_SHADOW_UNALLOCATED] = "unallocated",
|
|
||||||
[KMEMCHECK_SHADOW_UNINITIALIZED] = "uninitialized",
|
|
||||||
[KMEMCHECK_SHADOW_INITIALIZED] = "initialized",
|
|
||||||
[KMEMCHECK_SHADOW_FREED] = "freed",
|
|
||||||
};
|
|
||||||
|
|
||||||
static const char short_desc[] = {
|
|
||||||
[KMEMCHECK_SHADOW_UNALLOCATED] = 'a',
|
|
||||||
[KMEMCHECK_SHADOW_UNINITIALIZED] = 'u',
|
|
||||||
[KMEMCHECK_SHADOW_INITIALIZED] = 'i',
|
|
||||||
[KMEMCHECK_SHADOW_FREED] = 'f',
|
|
||||||
};
|
|
||||||
|
|
||||||
struct kmemcheck_error *e;
|
|
||||||
unsigned int i;
|
|
||||||
|
|
||||||
e = error_next_rd();
|
|
||||||
if (!e)
|
|
||||||
return;
|
|
||||||
|
|
||||||
switch (e->type) {
|
|
||||||
case KMEMCHECK_ERROR_INVALID_ACCESS:
|
|
||||||
printk(KERN_WARNING "WARNING: kmemcheck: Caught %d-bit read from %s memory (%p)\n",
|
|
||||||
8 * e->size, e->state < ARRAY_SIZE(desc) ?
|
|
||||||
desc[e->state] : "(invalid shadow state)",
|
|
||||||
(void *) e->address);
|
|
||||||
|
|
||||||
printk(KERN_WARNING);
|
|
||||||
for (i = 0; i < SHADOW_COPY_SIZE; ++i)
|
|
||||||
printk(KERN_CONT "%02x", e->memory_copy[i]);
|
|
||||||
printk(KERN_CONT "\n");
|
|
||||||
|
|
||||||
printk(KERN_WARNING);
|
|
||||||
for (i = 0; i < SHADOW_COPY_SIZE; ++i) {
|
|
||||||
if (e->shadow_copy[i] < ARRAY_SIZE(short_desc))
|
|
||||||
printk(KERN_CONT " %c", short_desc[e->shadow_copy[i]]);
|
|
||||||
else
|
|
||||||
printk(KERN_CONT " ?");
|
|
||||||
}
|
|
||||||
printk(KERN_CONT "\n");
|
|
||||||
printk(KERN_WARNING "%*c\n", 2 + 2
|
|
||||||
* (int) (e->address & (SHADOW_COPY_SIZE - 1)), '^');
|
|
||||||
break;
|
|
||||||
case KMEMCHECK_ERROR_BUG:
|
|
||||||
printk(KERN_EMERG "ERROR: kmemcheck: Fatal error\n");
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
__show_regs(&e->regs, 1);
|
|
||||||
print_stack_trace(&e->trace, 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void do_wakeup(unsigned long data)
|
|
||||||
{
|
|
||||||
while (error_count > 0)
|
|
||||||
kmemcheck_error_recall();
|
|
||||||
|
|
||||||
if (error_missed_count > 0) {
|
|
||||||
printk(KERN_WARNING "kmemcheck: Lost %d error reports because "
|
|
||||||
"the queue was too small\n", error_missed_count);
|
|
||||||
error_missed_count = 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static DECLARE_TASKLET(kmemcheck_tasklet, &do_wakeup, 0);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Save the context of an error report.
|
|
||||||
*/
|
|
||||||
void kmemcheck_error_save(enum kmemcheck_shadow state,
|
|
||||||
unsigned long address, unsigned int size, struct pt_regs *regs)
|
|
||||||
{
|
|
||||||
static unsigned long prev_ip;
|
|
||||||
|
|
||||||
struct kmemcheck_error *e;
|
|
||||||
void *shadow_copy;
|
|
||||||
void *memory_copy;
|
|
||||||
|
|
||||||
/* Don't report several adjacent errors from the same EIP. */
|
|
||||||
if (regs->ip == prev_ip)
|
|
||||||
return;
|
|
||||||
prev_ip = regs->ip;
|
|
||||||
|
|
||||||
e = error_next_wr();
|
|
||||||
if (!e)
|
|
||||||
return;
|
|
||||||
|
|
||||||
e->type = KMEMCHECK_ERROR_INVALID_ACCESS;
|
|
||||||
|
|
||||||
e->state = state;
|
|
||||||
e->address = address;
|
|
||||||
e->size = size;
|
|
||||||
|
|
||||||
/* Save regs */
|
|
||||||
memcpy(&e->regs, regs, sizeof(*regs));
|
|
||||||
|
|
||||||
/* Save stack trace */
|
|
||||||
e->trace.nr_entries = 0;
|
|
||||||
e->trace.entries = e->trace_entries;
|
|
||||||
e->trace.max_entries = ARRAY_SIZE(e->trace_entries);
|
|
||||||
e->trace.skip = 0;
|
|
||||||
save_stack_trace_regs(regs, &e->trace);
|
|
||||||
|
|
||||||
/* Round address down to nearest 16 bytes */
|
|
||||||
shadow_copy = kmemcheck_shadow_lookup(address
|
|
||||||
& ~(SHADOW_COPY_SIZE - 1));
|
|
||||||
BUG_ON(!shadow_copy);
|
|
||||||
|
|
||||||
memcpy(e->shadow_copy, shadow_copy, SHADOW_COPY_SIZE);
|
|
||||||
|
|
||||||
kmemcheck_show_addr(address);
|
|
||||||
memory_copy = (void *) (address & ~(SHADOW_COPY_SIZE - 1));
|
|
||||||
memcpy(e->memory_copy, memory_copy, SHADOW_COPY_SIZE);
|
|
||||||
kmemcheck_hide_addr(address);
|
|
||||||
|
|
||||||
tasklet_hi_schedule_first(&kmemcheck_tasklet);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Save the context of a kmemcheck bug.
|
|
||||||
*/
|
|
||||||
void kmemcheck_error_save_bug(struct pt_regs *regs)
|
|
||||||
{
|
|
||||||
struct kmemcheck_error *e;
|
|
||||||
|
|
||||||
e = error_next_wr();
|
|
||||||
if (!e)
|
|
||||||
return;
|
|
||||||
|
|
||||||
e->type = KMEMCHECK_ERROR_BUG;
|
|
||||||
|
|
||||||
memcpy(&e->regs, regs, sizeof(*regs));
|
|
||||||
|
|
||||||
e->trace.nr_entries = 0;
|
|
||||||
e->trace.entries = e->trace_entries;
|
|
||||||
e->trace.max_entries = ARRAY_SIZE(e->trace_entries);
|
|
||||||
e->trace.skip = 1;
|
|
||||||
save_stack_trace(&e->trace);
|
|
||||||
|
|
||||||
tasklet_hi_schedule_first(&kmemcheck_tasklet);
|
|
||||||
}
|
|
||||||
|
|
|
@ -1,16 +1 @@
|
||||||
/* SPDX-License-Identifier: GPL-2.0 */
|
/* SPDX-License-Identifier: GPL-2.0 */
|
||||||
#ifndef ARCH__X86__MM__KMEMCHECK__ERROR_H
|
|
||||||
#define ARCH__X86__MM__KMEMCHECK__ERROR_H
|
|
||||||
|
|
||||||
#include <linux/ptrace.h>
|
|
||||||
|
|
||||||
#include "shadow.h"
|
|
||||||
|
|
||||||
void kmemcheck_error_save(enum kmemcheck_shadow state,
|
|
||||||
unsigned long address, unsigned int size, struct pt_regs *regs);
|
|
||||||
|
|
||||||
void kmemcheck_error_save_bug(struct pt_regs *regs);
|
|
||||||
|
|
||||||
void kmemcheck_error_recall(void);
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
|
@ -1,658 +0,0 @@
|
||||||
/**
|
|
||||||
* kmemcheck - a heavyweight memory checker for the linux kernel
|
|
||||||
* Copyright (C) 2007, 2008 Vegard Nossum <vegardno@ifi.uio.no>
|
|
||||||
* (With a lot of help from Ingo Molnar and Pekka Enberg.)
|
|
||||||
*
|
|
||||||
* This program is free software; you can redistribute it and/or modify
|
|
||||||
* it under the terms of the GNU General Public License (version 2) as
|
|
||||||
* published by the Free Software Foundation.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include <linux/init.h>
|
|
||||||
#include <linux/interrupt.h>
|
|
||||||
#include <linux/kallsyms.h>
|
|
||||||
#include <linux/kernel.h>
|
|
||||||
#include <linux/kmemcheck.h>
|
|
||||||
#include <linux/mm.h>
|
|
||||||
#include <linux/page-flags.h>
|
|
||||||
#include <linux/percpu.h>
|
|
||||||
#include <linux/ptrace.h>
|
|
||||||
#include <linux/string.h>
|
|
||||||
#include <linux/types.h>
|
|
||||||
|
|
||||||
#include <asm/cacheflush.h>
|
|
||||||
#include <asm/kmemcheck.h>
|
|
||||||
#include <asm/pgtable.h>
|
|
||||||
#include <asm/tlbflush.h>
|
|
||||||
|
|
||||||
#include "error.h"
|
|
||||||
#include "opcode.h"
|
|
||||||
#include "pte.h"
|
|
||||||
#include "selftest.h"
|
|
||||||
#include "shadow.h"
|
|
||||||
|
|
||||||
|
|
||||||
#ifdef CONFIG_KMEMCHECK_DISABLED_BY_DEFAULT
|
|
||||||
# define KMEMCHECK_ENABLED 0
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef CONFIG_KMEMCHECK_ENABLED_BY_DEFAULT
|
|
||||||
# define KMEMCHECK_ENABLED 1
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef CONFIG_KMEMCHECK_ONESHOT_BY_DEFAULT
|
|
||||||
# define KMEMCHECK_ENABLED 2
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int kmemcheck_enabled = KMEMCHECK_ENABLED;
|
|
||||||
|
|
||||||
int __init kmemcheck_init(void)
|
|
||||||
{
|
|
||||||
#ifdef CONFIG_SMP
|
|
||||||
/*
|
|
||||||
* Limit SMP to use a single CPU. We rely on the fact that this code
|
|
||||||
* runs before SMP is set up.
|
|
||||||
*/
|
|
||||||
if (setup_max_cpus > 1) {
|
|
||||||
printk(KERN_INFO
|
|
||||||
"kmemcheck: Limiting number of CPUs to 1.\n");
|
|
||||||
setup_max_cpus = 1;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if (!kmemcheck_selftest()) {
|
|
||||||
printk(KERN_INFO "kmemcheck: self-tests failed; disabling\n");
|
|
||||||
kmemcheck_enabled = 0;
|
|
||||||
return -EINVAL;
|
|
||||||
}
|
|
||||||
|
|
||||||
printk(KERN_INFO "kmemcheck: Initialized\n");
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
early_initcall(kmemcheck_init);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* We need to parse the kmemcheck= option before any memory is allocated.
|
|
||||||
*/
|
|
||||||
static int __init param_kmemcheck(char *str)
|
|
||||||
{
|
|
||||||
int val;
|
|
||||||
int ret;
|
|
||||||
|
|
||||||
if (!str)
|
|
||||||
return -EINVAL;
|
|
||||||
|
|
||||||
ret = kstrtoint(str, 0, &val);
|
|
||||||
if (ret)
|
|
||||||
return ret;
|
|
||||||
kmemcheck_enabled = val;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
early_param("kmemcheck", param_kmemcheck);
|
|
||||||
|
|
||||||
int kmemcheck_show_addr(unsigned long address)
|
|
||||||
{
|
|
||||||
pte_t *pte;
|
|
||||||
|
|
||||||
pte = kmemcheck_pte_lookup(address);
|
|
||||||
if (!pte)
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
set_pte(pte, __pte(pte_val(*pte) | _PAGE_PRESENT));
|
|
||||||
__flush_tlb_one(address);
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
int kmemcheck_hide_addr(unsigned long address)
|
|
||||||
{
|
|
||||||
pte_t *pte;
|
|
||||||
|
|
||||||
pte = kmemcheck_pte_lookup(address);
|
|
||||||
if (!pte)
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_PRESENT));
|
|
||||||
__flush_tlb_one(address);
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
struct kmemcheck_context {
|
|
||||||
bool busy;
|
|
||||||
int balance;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* There can be at most two memory operands to an instruction, but
|
|
||||||
* each address can cross a page boundary -- so we may need up to
|
|
||||||
* four addresses that must be hidden/revealed for each fault.
|
|
||||||
*/
|
|
||||||
unsigned long addr[4];
|
|
||||||
unsigned long n_addrs;
|
|
||||||
unsigned long flags;
|
|
||||||
|
|
||||||
/* Data size of the instruction that caused a fault. */
|
|
||||||
unsigned int size;
|
|
||||||
};
|
|
||||||
|
|
||||||
static DEFINE_PER_CPU(struct kmemcheck_context, kmemcheck_context);
|
|
||||||
|
|
||||||
bool kmemcheck_active(struct pt_regs *regs)
|
|
||||||
{
|
|
||||||
struct kmemcheck_context *data = this_cpu_ptr(&kmemcheck_context);
|
|
||||||
|
|
||||||
return data->balance > 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Save an address that needs to be shown/hidden */
|
|
||||||
static void kmemcheck_save_addr(unsigned long addr)
|
|
||||||
{
|
|
||||||
struct kmemcheck_context *data = this_cpu_ptr(&kmemcheck_context);
|
|
||||||
|
|
||||||
BUG_ON(data->n_addrs >= ARRAY_SIZE(data->addr));
|
|
||||||
data->addr[data->n_addrs++] = addr;
|
|
||||||
}
|
|
||||||
|
|
||||||
static unsigned int kmemcheck_show_all(void)
|
|
||||||
{
|
|
||||||
struct kmemcheck_context *data = this_cpu_ptr(&kmemcheck_context);
|
|
||||||
unsigned int i;
|
|
||||||
unsigned int n;
|
|
||||||
|
|
||||||
n = 0;
|
|
||||||
for (i = 0; i < data->n_addrs; ++i)
|
|
||||||
n += kmemcheck_show_addr(data->addr[i]);
|
|
||||||
|
|
||||||
return n;
|
|
||||||
}
|
|
||||||
|
|
||||||
static unsigned int kmemcheck_hide_all(void)
|
|
||||||
{
|
|
||||||
struct kmemcheck_context *data = this_cpu_ptr(&kmemcheck_context);
|
|
||||||
unsigned int i;
|
|
||||||
unsigned int n;
|
|
||||||
|
|
||||||
n = 0;
|
|
||||||
for (i = 0; i < data->n_addrs; ++i)
|
|
||||||
n += kmemcheck_hide_addr(data->addr[i]);
|
|
||||||
|
|
||||||
return n;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Called from the #PF handler.
|
|
||||||
*/
|
|
||||||
void kmemcheck_show(struct pt_regs *regs)
|
|
||||||
{
|
|
||||||
struct kmemcheck_context *data = this_cpu_ptr(&kmemcheck_context);
|
|
||||||
|
|
||||||
BUG_ON(!irqs_disabled());
|
|
||||||
|
|
||||||
if (unlikely(data->balance != 0)) {
|
|
||||||
kmemcheck_show_all();
|
|
||||||
kmemcheck_error_save_bug(regs);
|
|
||||||
data->balance = 0;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* None of the addresses actually belonged to kmemcheck. Note that
|
|
||||||
* this is not an error.
|
|
||||||
*/
|
|
||||||
if (kmemcheck_show_all() == 0)
|
|
||||||
return;
|
|
||||||
|
|
||||||
++data->balance;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* The IF needs to be cleared as well, so that the faulting
|
|
||||||
* instruction can run "uninterrupted". Otherwise, we might take
|
|
||||||
* an interrupt and start executing that before we've had a chance
|
|
||||||
* to hide the page again.
|
|
||||||
*
|
|
||||||
* NOTE: In the rare case of multiple faults, we must not override
|
|
||||||
* the original flags:
|
|
||||||
*/
|
|
||||||
if (!(regs->flags & X86_EFLAGS_TF))
|
|
||||||
data->flags = regs->flags;
|
|
||||||
|
|
||||||
regs->flags |= X86_EFLAGS_TF;
|
|
||||||
regs->flags &= ~X86_EFLAGS_IF;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Called from the #DB handler.
|
|
||||||
*/
|
|
||||||
void kmemcheck_hide(struct pt_regs *regs)
|
|
||||||
{
|
|
||||||
struct kmemcheck_context *data = this_cpu_ptr(&kmemcheck_context);
|
|
||||||
int n;
|
|
||||||
|
|
||||||
BUG_ON(!irqs_disabled());
|
|
||||||
|
|
||||||
if (unlikely(data->balance != 1)) {
|
|
||||||
kmemcheck_show_all();
|
|
||||||
kmemcheck_error_save_bug(regs);
|
|
||||||
data->n_addrs = 0;
|
|
||||||
data->balance = 0;
|
|
||||||
|
|
||||||
if (!(data->flags & X86_EFLAGS_TF))
|
|
||||||
regs->flags &= ~X86_EFLAGS_TF;
|
|
||||||
if (data->flags & X86_EFLAGS_IF)
|
|
||||||
regs->flags |= X86_EFLAGS_IF;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (kmemcheck_enabled)
|
|
||||||
n = kmemcheck_hide_all();
|
|
||||||
else
|
|
||||||
n = kmemcheck_show_all();
|
|
||||||
|
|
||||||
if (n == 0)
|
|
||||||
return;
|
|
||||||
|
|
||||||
--data->balance;
|
|
||||||
|
|
||||||
data->n_addrs = 0;
|
|
||||||
|
|
||||||
if (!(data->flags & X86_EFLAGS_TF))
|
|
||||||
regs->flags &= ~X86_EFLAGS_TF;
|
|
||||||
if (data->flags & X86_EFLAGS_IF)
|
|
||||||
regs->flags |= X86_EFLAGS_IF;
|
|
||||||
}
|
|
||||||
|
|
||||||
void kmemcheck_show_pages(struct page *p, unsigned int n)
|
|
||||||
{
|
|
||||||
unsigned int i;
|
|
||||||
|
|
||||||
for (i = 0; i < n; ++i) {
|
|
||||||
unsigned long address;
|
|
||||||
pte_t *pte;
|
|
||||||
unsigned int level;
|
|
||||||
|
|
||||||
address = (unsigned long) page_address(&p[i]);
|
|
||||||
pte = lookup_address(address, &level);
|
|
||||||
BUG_ON(!pte);
|
|
||||||
BUG_ON(level != PG_LEVEL_4K);
|
|
||||||
|
|
||||||
set_pte(pte, __pte(pte_val(*pte) | _PAGE_PRESENT));
|
|
||||||
set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_HIDDEN));
|
|
||||||
__flush_tlb_one(address);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
bool kmemcheck_page_is_tracked(struct page *p)
|
|
||||||
{
|
|
||||||
/* This will also check the "hidden" flag of the PTE. */
|
|
||||||
return kmemcheck_pte_lookup((unsigned long) page_address(p));
|
|
||||||
}
|
|
||||||
|
|
||||||
void kmemcheck_hide_pages(struct page *p, unsigned int n)
|
|
||||||
{
|
|
||||||
unsigned int i;
|
|
||||||
|
|
||||||
for (i = 0; i < n; ++i) {
|
|
||||||
unsigned long address;
|
|
||||||
pte_t *pte;
|
|
||||||
unsigned int level;
|
|
||||||
|
|
||||||
address = (unsigned long) page_address(&p[i]);
|
|
||||||
pte = lookup_address(address, &level);
|
|
||||||
BUG_ON(!pte);
|
|
||||||
BUG_ON(level != PG_LEVEL_4K);
|
|
||||||
|
|
||||||
set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_PRESENT));
|
|
||||||
set_pte(pte, __pte(pte_val(*pte) | _PAGE_HIDDEN));
|
|
||||||
__flush_tlb_one(address);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Access may NOT cross page boundary */
|
|
||||||
static void kmemcheck_read_strict(struct pt_regs *regs,
|
|
||||||
unsigned long addr, unsigned int size)
|
|
||||||
{
|
|
||||||
void *shadow;
|
|
||||||
enum kmemcheck_shadow status;
|
|
||||||
|
|
||||||
shadow = kmemcheck_shadow_lookup(addr);
|
|
||||||
if (!shadow)
|
|
||||||
return;
|
|
||||||
|
|
||||||
kmemcheck_save_addr(addr);
|
|
||||||
status = kmemcheck_shadow_test(shadow, size);
|
|
||||||
if (status == KMEMCHECK_SHADOW_INITIALIZED)
|
|
||||||
return;
|
|
||||||
|
|
||||||
if (kmemcheck_enabled)
|
|
||||||
kmemcheck_error_save(status, addr, size, regs);
|
|
||||||
|
|
||||||
if (kmemcheck_enabled == 2)
|
|
||||||
kmemcheck_enabled = 0;
|
|
||||||
|
|
||||||
/* Don't warn about it again. */
|
|
||||||
kmemcheck_shadow_set(shadow, size);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool kmemcheck_is_obj_initialized(unsigned long addr, size_t size)
|
|
||||||
{
|
|
||||||
enum kmemcheck_shadow status;
|
|
||||||
void *shadow;
|
|
||||||
|
|
||||||
shadow = kmemcheck_shadow_lookup(addr);
|
|
||||||
if (!shadow)
|
|
||||||
return true;
|
|
||||||
|
|
||||||
status = kmemcheck_shadow_test_all(shadow, size);
|
|
||||||
|
|
||||||
return status == KMEMCHECK_SHADOW_INITIALIZED;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Access may cross page boundary */
|
|
||||||
static void kmemcheck_read(struct pt_regs *regs,
|
|
||||||
unsigned long addr, unsigned int size)
|
|
||||||
{
|
|
||||||
unsigned long page = addr & PAGE_MASK;
|
|
||||||
unsigned long next_addr = addr + size - 1;
|
|
||||||
unsigned long next_page = next_addr & PAGE_MASK;
|
|
||||||
|
|
||||||
if (likely(page == next_page)) {
|
|
||||||
kmemcheck_read_strict(regs, addr, size);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* What we do is basically to split the access across the
|
|
||||||
* two pages and handle each part separately. Yes, this means
|
|
||||||
* that we may now see reads that are 3 + 5 bytes, for
|
|
||||||
* example (and if both are uninitialized, there will be two
|
|
||||||
* reports), but it makes the code a lot simpler.
|
|
||||||
*/
|
|
||||||
kmemcheck_read_strict(regs, addr, next_page - addr);
|
|
||||||
kmemcheck_read_strict(regs, next_page, next_addr - next_page);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void kmemcheck_write_strict(struct pt_regs *regs,
|
|
||||||
unsigned long addr, unsigned int size)
|
|
||||||
{
|
|
||||||
void *shadow;
|
|
||||||
|
|
||||||
shadow = kmemcheck_shadow_lookup(addr);
|
|
||||||
if (!shadow)
|
|
||||||
return;
|
|
||||||
|
|
||||||
kmemcheck_save_addr(addr);
|
|
||||||
kmemcheck_shadow_set(shadow, size);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void kmemcheck_write(struct pt_regs *regs,
|
|
||||||
unsigned long addr, unsigned int size)
|
|
||||||
{
|
|
||||||
unsigned long page = addr & PAGE_MASK;
|
|
||||||
unsigned long next_addr = addr + size - 1;
|
|
||||||
unsigned long next_page = next_addr & PAGE_MASK;
|
|
||||||
|
|
||||||
if (likely(page == next_page)) {
|
|
||||||
kmemcheck_write_strict(regs, addr, size);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* See comment in kmemcheck_read(). */
|
|
||||||
kmemcheck_write_strict(regs, addr, next_page - addr);
|
|
||||||
kmemcheck_write_strict(regs, next_page, next_addr - next_page);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Copying is hard. We have two addresses, each of which may be split across
|
|
||||||
* a page (and each page will have different shadow addresses).
|
|
||||||
*/
|
|
||||||
static void kmemcheck_copy(struct pt_regs *regs,
|
|
||||||
unsigned long src_addr, unsigned long dst_addr, unsigned int size)
|
|
||||||
{
|
|
||||||
uint8_t shadow[8];
|
|
||||||
enum kmemcheck_shadow status;
|
|
||||||
|
|
||||||
unsigned long page;
|
|
||||||
unsigned long next_addr;
|
|
||||||
unsigned long next_page;
|
|
||||||
|
|
||||||
uint8_t *x;
|
|
||||||
unsigned int i;
|
|
||||||
unsigned int n;
|
|
||||||
|
|
||||||
BUG_ON(size > sizeof(shadow));
|
|
||||||
|
|
||||||
page = src_addr & PAGE_MASK;
|
|
||||||
next_addr = src_addr + size - 1;
|
|
||||||
next_page = next_addr & PAGE_MASK;
|
|
||||||
|
|
||||||
if (likely(page == next_page)) {
|
|
||||||
/* Same page */
|
|
||||||
x = kmemcheck_shadow_lookup(src_addr);
|
|
||||||
if (x) {
|
|
||||||
kmemcheck_save_addr(src_addr);
|
|
||||||
for (i = 0; i < size; ++i)
|
|
||||||
shadow[i] = x[i];
|
|
||||||
} else {
|
|
||||||
for (i = 0; i < size; ++i)
|
|
||||||
shadow[i] = KMEMCHECK_SHADOW_INITIALIZED;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
n = next_page - src_addr;
|
|
||||||
BUG_ON(n > sizeof(shadow));
|
|
||||||
|
|
||||||
/* First page */
|
|
||||||
x = kmemcheck_shadow_lookup(src_addr);
|
|
||||||
if (x) {
|
|
||||||
kmemcheck_save_addr(src_addr);
|
|
||||||
for (i = 0; i < n; ++i)
|
|
||||||
shadow[i] = x[i];
|
|
||||||
} else {
|
|
||||||
/* Not tracked */
|
|
||||||
for (i = 0; i < n; ++i)
|
|
||||||
shadow[i] = KMEMCHECK_SHADOW_INITIALIZED;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Second page */
|
|
||||||
x = kmemcheck_shadow_lookup(next_page);
|
|
||||||
if (x) {
|
|
||||||
kmemcheck_save_addr(next_page);
|
|
||||||
for (i = n; i < size; ++i)
|
|
||||||
shadow[i] = x[i - n];
|
|
||||||
} else {
|
|
||||||
/* Not tracked */
|
|
||||||
for (i = n; i < size; ++i)
|
|
||||||
shadow[i] = KMEMCHECK_SHADOW_INITIALIZED;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
page = dst_addr & PAGE_MASK;
|
|
||||||
next_addr = dst_addr + size - 1;
|
|
||||||
next_page = next_addr & PAGE_MASK;
|
|
||||||
|
|
||||||
if (likely(page == next_page)) {
|
|
||||||
/* Same page */
|
|
||||||
x = kmemcheck_shadow_lookup(dst_addr);
|
|
||||||
if (x) {
|
|
||||||
kmemcheck_save_addr(dst_addr);
|
|
||||||
for (i = 0; i < size; ++i) {
|
|
||||||
x[i] = shadow[i];
|
|
||||||
shadow[i] = KMEMCHECK_SHADOW_INITIALIZED;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
n = next_page - dst_addr;
|
|
||||||
BUG_ON(n > sizeof(shadow));
|
|
||||||
|
|
||||||
/* First page */
|
|
||||||
x = kmemcheck_shadow_lookup(dst_addr);
|
|
||||||
if (x) {
|
|
||||||
kmemcheck_save_addr(dst_addr);
|
|
||||||
for (i = 0; i < n; ++i) {
|
|
||||||
x[i] = shadow[i];
|
|
||||||
shadow[i] = KMEMCHECK_SHADOW_INITIALIZED;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Second page */
|
|
||||||
x = kmemcheck_shadow_lookup(next_page);
|
|
||||||
if (x) {
|
|
||||||
kmemcheck_save_addr(next_page);
|
|
||||||
for (i = n; i < size; ++i) {
|
|
||||||
x[i - n] = shadow[i];
|
|
||||||
shadow[i] = KMEMCHECK_SHADOW_INITIALIZED;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
status = kmemcheck_shadow_test(shadow, size);
|
|
||||||
if (status == KMEMCHECK_SHADOW_INITIALIZED)
|
|
||||||
return;
|
|
||||||
|
|
||||||
if (kmemcheck_enabled)
|
|
||||||
kmemcheck_error_save(status, src_addr, size, regs);
|
|
||||||
|
|
||||||
if (kmemcheck_enabled == 2)
|
|
||||||
kmemcheck_enabled = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
enum kmemcheck_method {
|
|
||||||
KMEMCHECK_READ,
|
|
||||||
KMEMCHECK_WRITE,
|
|
||||||
};
|
|
||||||
|
|
||||||
static void kmemcheck_access(struct pt_regs *regs,
|
|
||||||
unsigned long fallback_address, enum kmemcheck_method fallback_method)
|
|
||||||
{
|
|
||||||
const uint8_t *insn;
|
|
||||||
const uint8_t *insn_primary;
|
|
||||||
unsigned int size;
|
|
||||||
|
|
||||||
struct kmemcheck_context *data = this_cpu_ptr(&kmemcheck_context);
|
|
||||||
|
|
||||||
/* Recursive fault -- ouch. */
|
|
||||||
if (data->busy) {
|
|
||||||
kmemcheck_show_addr(fallback_address);
|
|
||||||
kmemcheck_error_save_bug(regs);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
data->busy = true;
|
|
||||||
|
|
||||||
insn = (const uint8_t *) regs->ip;
|
|
||||||
insn_primary = kmemcheck_opcode_get_primary(insn);
|
|
||||||
|
|
||||||
kmemcheck_opcode_decode(insn, &size);
|
|
||||||
|
|
||||||
switch (insn_primary[0]) {
|
|
||||||
#ifdef CONFIG_KMEMCHECK_BITOPS_OK
|
|
||||||
/* AND, OR, XOR */
|
|
||||||
/*
|
|
||||||
* Unfortunately, these instructions have to be excluded from
|
|
||||||
* our regular checking since they access only some (and not
|
|
||||||
* all) bits. This clears out "bogus" bitfield-access warnings.
|
|
||||||
*/
|
|
||||||
case 0x80:
|
|
||||||
case 0x81:
|
|
||||||
case 0x82:
|
|
||||||
case 0x83:
|
|
||||||
switch ((insn_primary[1] >> 3) & 7) {
|
|
||||||
/* OR */
|
|
||||||
case 1:
|
|
||||||
/* AND */
|
|
||||||
case 4:
|
|
||||||
/* XOR */
|
|
||||||
case 6:
|
|
||||||
kmemcheck_write(regs, fallback_address, size);
|
|
||||||
goto out;
|
|
||||||
|
|
||||||
/* ADD */
|
|
||||||
case 0:
|
|
||||||
/* ADC */
|
|
||||||
case 2:
|
|
||||||
/* SBB */
|
|
||||||
case 3:
|
|
||||||
/* SUB */
|
|
||||||
case 5:
|
|
||||||
/* CMP */
|
|
||||||
case 7:
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* MOVS, MOVSB, MOVSW, MOVSD */
|
|
||||||
case 0xa4:
|
|
||||||
case 0xa5:
|
|
||||||
/*
|
|
||||||
* These instructions are special because they take two
|
|
||||||
* addresses, but we only get one page fault.
|
|
||||||
*/
|
|
||||||
kmemcheck_copy(regs, regs->si, regs->di, size);
|
|
||||||
goto out;
|
|
||||||
|
|
||||||
/* CMPS, CMPSB, CMPSW, CMPSD */
|
|
||||||
case 0xa6:
|
|
||||||
case 0xa7:
|
|
||||||
kmemcheck_read(regs, regs->si, size);
|
|
||||||
kmemcheck_read(regs, regs->di, size);
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* If the opcode isn't special in any way, we use the data from the
|
|
||||||
* page fault handler to determine the address and type of memory
|
|
||||||
* access.
|
|
||||||
*/
|
|
||||||
switch (fallback_method) {
|
|
||||||
case KMEMCHECK_READ:
|
|
||||||
kmemcheck_read(regs, fallback_address, size);
|
|
||||||
goto out;
|
|
||||||
case KMEMCHECK_WRITE:
|
|
||||||
kmemcheck_write(regs, fallback_address, size);
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
|
|
||||||
out:
|
|
||||||
data->busy = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool kmemcheck_fault(struct pt_regs *regs, unsigned long address,
|
|
||||||
unsigned long error_code)
|
|
||||||
{
|
|
||||||
pte_t *pte;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* XXX: Is it safe to assume that memory accesses from virtual 86
|
|
||||||
* mode or non-kernel code segments will _never_ access kernel
|
|
||||||
* memory (e.g. tracked pages)? For now, we need this to avoid
|
|
||||||
* invoking kmemcheck for PnP BIOS calls.
|
|
||||||
*/
|
|
||||||
if (regs->flags & X86_VM_MASK)
|
|
||||||
return false;
|
|
||||||
if (regs->cs != __KERNEL_CS)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
pte = kmemcheck_pte_lookup(address);
|
|
||||||
if (!pte)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
WARN_ON_ONCE(in_nmi());
|
|
||||||
|
|
||||||
if (error_code & 2)
|
|
||||||
kmemcheck_access(regs, address, KMEMCHECK_WRITE);
|
|
||||||
else
|
|
||||||
kmemcheck_access(regs, address, KMEMCHECK_READ);
|
|
||||||
|
|
||||||
kmemcheck_show(regs);
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool kmemcheck_trap(struct pt_regs *regs)
|
|
||||||
{
|
|
||||||
if (!kmemcheck_active(regs))
|
|
||||||
return false;
|
|
||||||
|
|
||||||
/* We're done. */
|
|
||||||
kmemcheck_hide(regs);
|
|
||||||
return true;
|
|
||||||
}
|
|
|
@ -1,107 +1 @@
|
||||||
// SPDX-License-Identifier: GPL-2.0
|
// SPDX-License-Identifier: GPL-2.0
|
||||||
#include <linux/types.h>
|
|
||||||
|
|
||||||
#include "opcode.h"
|
|
||||||
|
|
||||||
static bool opcode_is_prefix(uint8_t b)
|
|
||||||
{
|
|
||||||
return
|
|
||||||
/* Group 1 */
|
|
||||||
b == 0xf0 || b == 0xf2 || b == 0xf3
|
|
||||||
/* Group 2 */
|
|
||||||
|| b == 0x2e || b == 0x36 || b == 0x3e || b == 0x26
|
|
||||||
|| b == 0x64 || b == 0x65
|
|
||||||
/* Group 3 */
|
|
||||||
|| b == 0x66
|
|
||||||
/* Group 4 */
|
|
||||||
|| b == 0x67;
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef CONFIG_X86_64
|
|
||||||
static bool opcode_is_rex_prefix(uint8_t b)
|
|
||||||
{
|
|
||||||
return (b & 0xf0) == 0x40;
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
static bool opcode_is_rex_prefix(uint8_t b)
|
|
||||||
{
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#define REX_W (1 << 3)
|
|
||||||
|
|
||||||
/*
|
|
||||||
* This is a VERY crude opcode decoder. We only need to find the size of the
|
|
||||||
* load/store that caused our #PF and this should work for all the opcodes
|
|
||||||
* that we care about. Moreover, the ones who invented this instruction set
|
|
||||||
* should be shot.
|
|
||||||
*/
|
|
||||||
void kmemcheck_opcode_decode(const uint8_t *op, unsigned int *size)
|
|
||||||
{
|
|
||||||
/* Default operand size */
|
|
||||||
int operand_size_override = 4;
|
|
||||||
|
|
||||||
/* prefixes */
|
|
||||||
for (; opcode_is_prefix(*op); ++op) {
|
|
||||||
if (*op == 0x66)
|
|
||||||
operand_size_override = 2;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* REX prefix */
|
|
||||||
if (opcode_is_rex_prefix(*op)) {
|
|
||||||
uint8_t rex = *op;
|
|
||||||
|
|
||||||
++op;
|
|
||||||
if (rex & REX_W) {
|
|
||||||
switch (*op) {
|
|
||||||
case 0x63:
|
|
||||||
*size = 4;
|
|
||||||
return;
|
|
||||||
case 0x0f:
|
|
||||||
++op;
|
|
||||||
|
|
||||||
switch (*op) {
|
|
||||||
case 0xb6:
|
|
||||||
case 0xbe:
|
|
||||||
*size = 1;
|
|
||||||
return;
|
|
||||||
case 0xb7:
|
|
||||||
case 0xbf:
|
|
||||||
*size = 2;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
*size = 8;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* escape opcode */
|
|
||||||
if (*op == 0x0f) {
|
|
||||||
++op;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* This is move with zero-extend and sign-extend, respectively;
|
|
||||||
* we don't have to think about 0xb6/0xbe, because this is
|
|
||||||
* already handled in the conditional below.
|
|
||||||
*/
|
|
||||||
if (*op == 0xb7 || *op == 0xbf)
|
|
||||||
operand_size_override = 2;
|
|
||||||
}
|
|
||||||
|
|
||||||
*size = (*op & 1) ? operand_size_override : 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
const uint8_t *kmemcheck_opcode_get_primary(const uint8_t *op)
|
|
||||||
{
|
|
||||||
/* skip prefixes */
|
|
||||||
while (opcode_is_prefix(*op))
|
|
||||||
++op;
|
|
||||||
if (opcode_is_rex_prefix(*op))
|
|
||||||
++op;
|
|
||||||
return op;
|
|
||||||
}
|
|
||||||
|
|
|
@ -1,10 +1 @@
|
||||||
/* SPDX-License-Identifier: GPL-2.0 */
|
/* SPDX-License-Identifier: GPL-2.0 */
|
||||||
#ifndef ARCH__X86__MM__KMEMCHECK__OPCODE_H
|
|
||||||
#define ARCH__X86__MM__KMEMCHECK__OPCODE_H
|
|
||||||
|
|
||||||
#include <linux/types.h>
|
|
||||||
|
|
||||||
void kmemcheck_opcode_decode(const uint8_t *op, unsigned int *size);
|
|
||||||
const uint8_t *kmemcheck_opcode_get_primary(const uint8_t *op);
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
|
@ -1,23 +1 @@
|
||||||
// SPDX-License-Identifier: GPL-2.0
|
// SPDX-License-Identifier: GPL-2.0
|
||||||
#include <linux/mm.h>
|
|
||||||
|
|
||||||
#include <asm/pgtable.h>
|
|
||||||
|
|
||||||
#include "pte.h"
|
|
||||||
|
|
||||||
pte_t *kmemcheck_pte_lookup(unsigned long address)
|
|
||||||
{
|
|
||||||
pte_t *pte;
|
|
||||||
unsigned int level;
|
|
||||||
|
|
||||||
pte = lookup_address(address, &level);
|
|
||||||
if (!pte)
|
|
||||||
return NULL;
|
|
||||||
if (level != PG_LEVEL_4K)
|
|
||||||
return NULL;
|
|
||||||
if (!pte_hidden(*pte))
|
|
||||||
return NULL;
|
|
||||||
|
|
||||||
return pte;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
|
@ -1,11 +1 @@
|
||||||
/* SPDX-License-Identifier: GPL-2.0 */
|
/* SPDX-License-Identifier: GPL-2.0 */
|
||||||
#ifndef ARCH__X86__MM__KMEMCHECK__PTE_H
|
|
||||||
#define ARCH__X86__MM__KMEMCHECK__PTE_H
|
|
||||||
|
|
||||||
#include <linux/mm.h>
|
|
||||||
|
|
||||||
#include <asm/pgtable.h>
|
|
||||||
|
|
||||||
pte_t *kmemcheck_pte_lookup(unsigned long address);
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
|
@ -1,71 +1 @@
|
||||||
// SPDX-License-Identifier: GPL-2.0
|
// SPDX-License-Identifier: GPL-2.0
|
||||||
#include <linux/bug.h>
|
|
||||||
#include <linux/kernel.h>
|
|
||||||
|
|
||||||
#include "opcode.h"
|
|
||||||
#include "selftest.h"
|
|
||||||
|
|
||||||
struct selftest_opcode {
|
|
||||||
unsigned int expected_size;
|
|
||||||
const uint8_t *insn;
|
|
||||||
const char *desc;
|
|
||||||
};
|
|
||||||
|
|
||||||
static const struct selftest_opcode selftest_opcodes[] = {
|
|
||||||
/* REP MOVS */
|
|
||||||
{1, "\xf3\xa4", "rep movsb <mem8>, <mem8>"},
|
|
||||||
{4, "\xf3\xa5", "rep movsl <mem32>, <mem32>"},
|
|
||||||
|
|
||||||
/* MOVZX / MOVZXD */
|
|
||||||
{1, "\x66\x0f\xb6\x51\xf8", "movzwq <mem8>, <reg16>"},
|
|
||||||
{1, "\x0f\xb6\x51\xf8", "movzwq <mem8>, <reg32>"},
|
|
||||||
|
|
||||||
/* MOVSX / MOVSXD */
|
|
||||||
{1, "\x66\x0f\xbe\x51\xf8", "movswq <mem8>, <reg16>"},
|
|
||||||
{1, "\x0f\xbe\x51\xf8", "movswq <mem8>, <reg32>"},
|
|
||||||
|
|
||||||
#ifdef CONFIG_X86_64
|
|
||||||
/* MOVZX / MOVZXD */
|
|
||||||
{1, "\x49\x0f\xb6\x51\xf8", "movzbq <mem8>, <reg64>"},
|
|
||||||
{2, "\x49\x0f\xb7\x51\xf8", "movzbq <mem16>, <reg64>"},
|
|
||||||
|
|
||||||
/* MOVSX / MOVSXD */
|
|
||||||
{1, "\x49\x0f\xbe\x51\xf8", "movsbq <mem8>, <reg64>"},
|
|
||||||
{2, "\x49\x0f\xbf\x51\xf8", "movsbq <mem16>, <reg64>"},
|
|
||||||
{4, "\x49\x63\x51\xf8", "movslq <mem32>, <reg64>"},
|
|
||||||
#endif
|
|
||||||
};
|
|
||||||
|
|
||||||
static bool selftest_opcode_one(const struct selftest_opcode *op)
|
|
||||||
{
|
|
||||||
unsigned size;
|
|
||||||
|
|
||||||
kmemcheck_opcode_decode(op->insn, &size);
|
|
||||||
|
|
||||||
if (size == op->expected_size)
|
|
||||||
return true;
|
|
||||||
|
|
||||||
printk(KERN_WARNING "kmemcheck: opcode %s: expected size %d, got %d\n",
|
|
||||||
op->desc, op->expected_size, size);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
static bool selftest_opcodes_all(void)
|
|
||||||
{
|
|
||||||
bool pass = true;
|
|
||||||
unsigned int i;
|
|
||||||
|
|
||||||
for (i = 0; i < ARRAY_SIZE(selftest_opcodes); ++i)
|
|
||||||
pass = pass && selftest_opcode_one(&selftest_opcodes[i]);
|
|
||||||
|
|
||||||
return pass;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool kmemcheck_selftest(void)
|
|
||||||
{
|
|
||||||
bool pass = true;
|
|
||||||
|
|
||||||
pass = pass && selftest_opcodes_all();
|
|
||||||
|
|
||||||
return pass;
|
|
||||||
}
|
|
||||||
|
|
|
@ -1,7 +1 @@
|
||||||
/* SPDX-License-Identifier: GPL-2.0 */
|
/* SPDX-License-Identifier: GPL-2.0 */
|
||||||
#ifndef ARCH_X86_MM_KMEMCHECK_SELFTEST_H
|
|
||||||
#define ARCH_X86_MM_KMEMCHECK_SELFTEST_H
|
|
||||||
|
|
||||||
bool kmemcheck_selftest(void);
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
|
@ -1,173 +0,0 @@
|
||||||
#include <linux/kmemcheck.h>
|
|
||||||
#include <linux/export.h>
|
|
||||||
#include <linux/mm.h>
|
|
||||||
|
|
||||||
#include <asm/page.h>
|
|
||||||
#include <asm/pgtable.h>
|
|
||||||
|
|
||||||
#include "pte.h"
|
|
||||||
#include "shadow.h"
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Return the shadow address for the given address. Returns NULL if the
|
|
||||||
* address is not tracked.
|
|
||||||
*
|
|
||||||
* We need to be extremely careful not to follow any invalid pointers,
|
|
||||||
* because this function can be called for *any* possible address.
|
|
||||||
*/
|
|
||||||
void *kmemcheck_shadow_lookup(unsigned long address)
|
|
||||||
{
|
|
||||||
pte_t *pte;
|
|
||||||
struct page *page;
|
|
||||||
|
|
||||||
if (!virt_addr_valid(address))
|
|
||||||
return NULL;
|
|
||||||
|
|
||||||
pte = kmemcheck_pte_lookup(address);
|
|
||||||
if (!pte)
|
|
||||||
return NULL;
|
|
||||||
|
|
||||||
page = virt_to_page(address);
|
|
||||||
if (!page->shadow)
|
|
||||||
return NULL;
|
|
||||||
return page->shadow + (address & (PAGE_SIZE - 1));
|
|
||||||
}
|
|
||||||
|
|
||||||
static void mark_shadow(void *address, unsigned int n,
|
|
||||||
enum kmemcheck_shadow status)
|
|
||||||
{
|
|
||||||
unsigned long addr = (unsigned long) address;
|
|
||||||
unsigned long last_addr = addr + n - 1;
|
|
||||||
unsigned long page = addr & PAGE_MASK;
|
|
||||||
unsigned long last_page = last_addr & PAGE_MASK;
|
|
||||||
unsigned int first_n;
|
|
||||||
void *shadow;
|
|
||||||
|
|
||||||
/* If the memory range crosses a page boundary, stop there. */
|
|
||||||
if (page == last_page)
|
|
||||||
first_n = n;
|
|
||||||
else
|
|
||||||
first_n = page + PAGE_SIZE - addr;
|
|
||||||
|
|
||||||
shadow = kmemcheck_shadow_lookup(addr);
|
|
||||||
if (shadow)
|
|
||||||
memset(shadow, status, first_n);
|
|
||||||
|
|
||||||
addr += first_n;
|
|
||||||
n -= first_n;
|
|
||||||
|
|
||||||
/* Do full-page memset()s. */
|
|
||||||
while (n >= PAGE_SIZE) {
|
|
||||||
shadow = kmemcheck_shadow_lookup(addr);
|
|
||||||
if (shadow)
|
|
||||||
memset(shadow, status, PAGE_SIZE);
|
|
||||||
|
|
||||||
addr += PAGE_SIZE;
|
|
||||||
n -= PAGE_SIZE;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Do the remaining page, if any. */
|
|
||||||
if (n > 0) {
|
|
||||||
shadow = kmemcheck_shadow_lookup(addr);
|
|
||||||
if (shadow)
|
|
||||||
memset(shadow, status, n);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void kmemcheck_mark_unallocated(void *address, unsigned int n)
|
|
||||||
{
|
|
||||||
mark_shadow(address, n, KMEMCHECK_SHADOW_UNALLOCATED);
|
|
||||||
}
|
|
||||||
|
|
||||||
void kmemcheck_mark_uninitialized(void *address, unsigned int n)
|
|
||||||
{
|
|
||||||
mark_shadow(address, n, KMEMCHECK_SHADOW_UNINITIALIZED);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Fill the shadow memory of the given address such that the memory at that
|
|
||||||
* address is marked as being initialized.
|
|
||||||
*/
|
|
||||||
void kmemcheck_mark_initialized(void *address, unsigned int n)
|
|
||||||
{
|
|
||||||
mark_shadow(address, n, KMEMCHECK_SHADOW_INITIALIZED);
|
|
||||||
}
|
|
||||||
EXPORT_SYMBOL_GPL(kmemcheck_mark_initialized);
|
|
||||||
|
|
||||||
void kmemcheck_mark_freed(void *address, unsigned int n)
|
|
||||||
{
|
|
||||||
mark_shadow(address, n, KMEMCHECK_SHADOW_FREED);
|
|
||||||
}
|
|
||||||
|
|
||||||
void kmemcheck_mark_unallocated_pages(struct page *p, unsigned int n)
|
|
||||||
{
|
|
||||||
unsigned int i;
|
|
||||||
|
|
||||||
for (i = 0; i < n; ++i)
|
|
||||||
kmemcheck_mark_unallocated(page_address(&p[i]), PAGE_SIZE);
|
|
||||||
}
|
|
||||||
|
|
||||||
void kmemcheck_mark_uninitialized_pages(struct page *p, unsigned int n)
|
|
||||||
{
|
|
||||||
unsigned int i;
|
|
||||||
|
|
||||||
for (i = 0; i < n; ++i)
|
|
||||||
kmemcheck_mark_uninitialized(page_address(&p[i]), PAGE_SIZE);
|
|
||||||
}
|
|
||||||
|
|
||||||
void kmemcheck_mark_initialized_pages(struct page *p, unsigned int n)
|
|
||||||
{
|
|
||||||
unsigned int i;
|
|
||||||
|
|
||||||
for (i = 0; i < n; ++i)
|
|
||||||
kmemcheck_mark_initialized(page_address(&p[i]), PAGE_SIZE);
|
|
||||||
}
|
|
||||||
|
|
||||||
enum kmemcheck_shadow kmemcheck_shadow_test(void *shadow, unsigned int size)
|
|
||||||
{
|
|
||||||
#ifdef CONFIG_KMEMCHECK_PARTIAL_OK
|
|
||||||
uint8_t *x;
|
|
||||||
unsigned int i;
|
|
||||||
|
|
||||||
x = shadow;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Make sure _some_ bytes are initialized. Gcc frequently generates
|
|
||||||
* code to access neighboring bytes.
|
|
||||||
*/
|
|
||||||
for (i = 0; i < size; ++i) {
|
|
||||||
if (x[i] == KMEMCHECK_SHADOW_INITIALIZED)
|
|
||||||
return x[i];
|
|
||||||
}
|
|
||||||
|
|
||||||
return x[0];
|
|
||||||
#else
|
|
||||||
return kmemcheck_shadow_test_all(shadow, size);
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
enum kmemcheck_shadow kmemcheck_shadow_test_all(void *shadow, unsigned int size)
|
|
||||||
{
|
|
||||||
uint8_t *x;
|
|
||||||
unsigned int i;
|
|
||||||
|
|
||||||
x = shadow;
|
|
||||||
|
|
||||||
/* All bytes must be initialized. */
|
|
||||||
for (i = 0; i < size; ++i) {
|
|
||||||
if (x[i] != KMEMCHECK_SHADOW_INITIALIZED)
|
|
||||||
return x[i];
|
|
||||||
}
|
|
||||||
|
|
||||||
return x[0];
|
|
||||||
}
|
|
||||||
|
|
||||||
void kmemcheck_shadow_set(void *shadow, unsigned int size)
|
|
||||||
{
|
|
||||||
uint8_t *x;
|
|
||||||
unsigned int i;
|
|
||||||
|
|
||||||
x = shadow;
|
|
||||||
for (i = 0; i < size; ++i)
|
|
||||||
x[i] = KMEMCHECK_SHADOW_INITIALIZED;
|
|
||||||
}
|
|
|
@ -1,19 +1 @@
|
||||||
/* SPDX-License-Identifier: GPL-2.0 */
|
/* SPDX-License-Identifier: GPL-2.0 */
|
||||||
#ifndef ARCH__X86__MM__KMEMCHECK__SHADOW_H
|
|
||||||
#define ARCH__X86__MM__KMEMCHECK__SHADOW_H
|
|
||||||
|
|
||||||
enum kmemcheck_shadow {
|
|
||||||
KMEMCHECK_SHADOW_UNALLOCATED,
|
|
||||||
KMEMCHECK_SHADOW_UNINITIALIZED,
|
|
||||||
KMEMCHECK_SHADOW_INITIALIZED,
|
|
||||||
KMEMCHECK_SHADOW_FREED,
|
|
||||||
};
|
|
||||||
|
|
||||||
void *kmemcheck_shadow_lookup(unsigned long address);
|
|
||||||
|
|
||||||
enum kmemcheck_shadow kmemcheck_shadow_test(void *shadow, unsigned int size);
|
|
||||||
enum kmemcheck_shadow kmemcheck_shadow_test_all(void *shadow,
|
|
||||||
unsigned int size);
|
|
||||||
void kmemcheck_shadow_set(void *shadow, unsigned int size);
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
|
@ -753,7 +753,7 @@ static int split_large_page(struct cpa_data *cpa, pte_t *kpte,
|
||||||
|
|
||||||
if (!debug_pagealloc_enabled())
|
if (!debug_pagealloc_enabled())
|
||||||
spin_unlock(&cpa_lock);
|
spin_unlock(&cpa_lock);
|
||||||
base = alloc_pages(GFP_KERNEL | __GFP_NOTRACK, 0);
|
base = alloc_pages(GFP_KERNEL, 0);
|
||||||
if (!debug_pagealloc_enabled())
|
if (!debug_pagealloc_enabled())
|
||||||
spin_lock(&cpa_lock);
|
spin_lock(&cpa_lock);
|
||||||
if (!base)
|
if (!base)
|
||||||
|
@ -904,7 +904,7 @@ static void unmap_pud_range(p4d_t *p4d, unsigned long start, unsigned long end)
|
||||||
|
|
||||||
static int alloc_pte_page(pmd_t *pmd)
|
static int alloc_pte_page(pmd_t *pmd)
|
||||||
{
|
{
|
||||||
pte_t *pte = (pte_t *)get_zeroed_page(GFP_KERNEL | __GFP_NOTRACK);
|
pte_t *pte = (pte_t *)get_zeroed_page(GFP_KERNEL);
|
||||||
if (!pte)
|
if (!pte)
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
|
@ -914,7 +914,7 @@ static int alloc_pte_page(pmd_t *pmd)
|
||||||
|
|
||||||
static int alloc_pmd_page(pud_t *pud)
|
static int alloc_pmd_page(pud_t *pud)
|
||||||
{
|
{
|
||||||
pmd_t *pmd = (pmd_t *)get_zeroed_page(GFP_KERNEL | __GFP_NOTRACK);
|
pmd_t *pmd = (pmd_t *)get_zeroed_page(GFP_KERNEL);
|
||||||
if (!pmd)
|
if (!pmd)
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
|
@ -1120,7 +1120,7 @@ static int populate_pgd(struct cpa_data *cpa, unsigned long addr)
|
||||||
pgd_entry = cpa->pgd + pgd_index(addr);
|
pgd_entry = cpa->pgd + pgd_index(addr);
|
||||||
|
|
||||||
if (pgd_none(*pgd_entry)) {
|
if (pgd_none(*pgd_entry)) {
|
||||||
p4d = (p4d_t *)get_zeroed_page(GFP_KERNEL | __GFP_NOTRACK);
|
p4d = (p4d_t *)get_zeroed_page(GFP_KERNEL);
|
||||||
if (!p4d)
|
if (!p4d)
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
|
@ -1132,7 +1132,7 @@ static int populate_pgd(struct cpa_data *cpa, unsigned long addr)
|
||||||
*/
|
*/
|
||||||
p4d = p4d_offset(pgd_entry, addr);
|
p4d = p4d_offset(pgd_entry, addr);
|
||||||
if (p4d_none(*p4d)) {
|
if (p4d_none(*p4d)) {
|
||||||
pud = (pud_t *)get_zeroed_page(GFP_KERNEL | __GFP_NOTRACK);
|
pud = (pud_t *)get_zeroed_page(GFP_KERNEL);
|
||||||
if (!pud)
|
if (!pud)
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
|
|
|
@ -7,7 +7,7 @@
|
||||||
#include <asm/fixmap.h>
|
#include <asm/fixmap.h>
|
||||||
#include <asm/mtrr.h>
|
#include <asm/mtrr.h>
|
||||||
|
|
||||||
#define PGALLOC_GFP (GFP_KERNEL_ACCOUNT | __GFP_NOTRACK | __GFP_ZERO)
|
#define PGALLOC_GFP (GFP_KERNEL_ACCOUNT | __GFP_ZERO)
|
||||||
|
|
||||||
#ifdef CONFIG_HIGHPTE
|
#ifdef CONFIG_HIGHPTE
|
||||||
#define PGALLOC_USER_GFP __GFP_HIGHMEM
|
#define PGALLOC_USER_GFP __GFP_HIGHMEM
|
||||||
|
|
|
@ -207,7 +207,7 @@ int __init efi_alloc_page_tables(void)
|
||||||
if (efi_enabled(EFI_OLD_MEMMAP))
|
if (efi_enabled(EFI_OLD_MEMMAP))
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
gfp_mask = GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO;
|
gfp_mask = GFP_KERNEL | __GFP_ZERO;
|
||||||
efi_pgd = (pgd_t *)__get_free_page(gfp_mask);
|
efi_pgd = (pgd_t *)__get_free_page(gfp_mask);
|
||||||
if (!efi_pgd)
|
if (!efi_pgd)
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
|
|
|
@ -2047,7 +2047,7 @@ static int blk_mq_init_hctx(struct request_queue *q,
|
||||||
* Allocate space for all possible cpus to avoid allocation at
|
* Allocate space for all possible cpus to avoid allocation at
|
||||||
* runtime
|
* runtime
|
||||||
*/
|
*/
|
||||||
hctx->ctxs = kmalloc_node(nr_cpu_ids * sizeof(void *),
|
hctx->ctxs = kmalloc_array_node(nr_cpu_ids, sizeof(void *),
|
||||||
GFP_KERNEL, node);
|
GFP_KERNEL, node);
|
||||||
if (!hctx->ctxs)
|
if (!hctx->ctxs)
|
||||||
goto unregister_cpu_notifier;
|
goto unregister_cpu_notifier;
|
||||||
|
|
|
@ -122,12 +122,7 @@ calibrate_xor_blocks(void)
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
b1 = (void *) __get_free_pages(GFP_KERNEL, 2);
|
||||||
* Note: Since the memory is not actually used for _anything_ but to
|
|
||||||
* test the XOR speed, we don't really want kmemcheck to warn about
|
|
||||||
* reading uninitialized bytes here.
|
|
||||||
*/
|
|
||||||
b1 = (void *) __get_free_pages(GFP_KERNEL | __GFP_NOTRACK, 2);
|
|
||||||
if (!b1) {
|
if (!b1) {
|
||||||
printk(KERN_WARNING "xor: Yikes! No memory available.\n");
|
printk(KERN_WARNING "xor: Yikes! No memory available.\n");
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
|
|
|
@ -20,6 +20,7 @@
|
||||||
#include <linux/radix-tree.h>
|
#include <linux/radix-tree.h>
|
||||||
#include <linux/fs.h>
|
#include <linux/fs.h>
|
||||||
#include <linux/slab.h>
|
#include <linux/slab.h>
|
||||||
|
#include <linux/backing-dev.h>
|
||||||
#ifdef CONFIG_BLK_DEV_RAM_DAX
|
#ifdef CONFIG_BLK_DEV_RAM_DAX
|
||||||
#include <linux/pfn_t.h>
|
#include <linux/pfn_t.h>
|
||||||
#include <linux/dax.h>
|
#include <linux/dax.h>
|
||||||
|
@ -448,6 +449,7 @@ static struct brd_device *brd_alloc(int i)
|
||||||
disk->flags = GENHD_FL_EXT_DEVT;
|
disk->flags = GENHD_FL_EXT_DEVT;
|
||||||
sprintf(disk->disk_name, "ram%d", i);
|
sprintf(disk->disk_name, "ram%d", i);
|
||||||
set_capacity(disk, rd_size * 2);
|
set_capacity(disk, rd_size * 2);
|
||||||
|
disk->queue->backing_dev_info->capabilities |= BDI_CAP_SYNCHRONOUS_IO;
|
||||||
|
|
||||||
#ifdef CONFIG_BLK_DEV_RAM_DAX
|
#ifdef CONFIG_BLK_DEV_RAM_DAX
|
||||||
queue_flag_set_unlocked(QUEUE_FLAG_DAX, brd->brd_queue);
|
queue_flag_set_unlocked(QUEUE_FLAG_DAX, brd->brd_queue);
|
||||||
|
|
|
@ -23,14 +23,14 @@ static const char * const backends[] = {
|
||||||
#if IS_ENABLED(CONFIG_CRYPTO_LZ4)
|
#if IS_ENABLED(CONFIG_CRYPTO_LZ4)
|
||||||
"lz4",
|
"lz4",
|
||||||
#endif
|
#endif
|
||||||
#if IS_ENABLED(CONFIG_CRYPTO_DEFLATE)
|
|
||||||
"deflate",
|
|
||||||
#endif
|
|
||||||
#if IS_ENABLED(CONFIG_CRYPTO_LZ4HC)
|
#if IS_ENABLED(CONFIG_CRYPTO_LZ4HC)
|
||||||
"lz4hc",
|
"lz4hc",
|
||||||
#endif
|
#endif
|
||||||
#if IS_ENABLED(CONFIG_CRYPTO_842)
|
#if IS_ENABLED(CONFIG_CRYPTO_842)
|
||||||
"842",
|
"842",
|
||||||
|
#endif
|
||||||
|
#if IS_ENABLED(CONFIG_CRYPTO_ZSTD)
|
||||||
|
"zstd",
|
||||||
#endif
|
#endif
|
||||||
NULL
|
NULL
|
||||||
};
|
};
|
||||||
|
|
|
@ -122,14 +122,6 @@ static inline bool is_partial_io(struct bio_vec *bvec)
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static void zram_revalidate_disk(struct zram *zram)
|
|
||||||
{
|
|
||||||
revalidate_disk(zram->disk);
|
|
||||||
/* revalidate_disk reset the BDI_CAP_STABLE_WRITES so set again */
|
|
||||||
zram->disk->queue->backing_dev_info->capabilities |=
|
|
||||||
BDI_CAP_STABLE_WRITES;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Check if request is within bounds and aligned on zram logical blocks.
|
* Check if request is within bounds and aligned on zram logical blocks.
|
||||||
*/
|
*/
|
||||||
|
@ -436,7 +428,7 @@ static void put_entry_bdev(struct zram *zram, unsigned long entry)
|
||||||
WARN_ON_ONCE(!was_set);
|
WARN_ON_ONCE(!was_set);
|
||||||
}
|
}
|
||||||
|
|
||||||
void zram_page_end_io(struct bio *bio)
|
static void zram_page_end_io(struct bio *bio)
|
||||||
{
|
{
|
||||||
struct page *page = bio->bi_io_vec[0].bv_page;
|
struct page *page = bio->bi_io_vec[0].bv_page;
|
||||||
|
|
||||||
|
@ -1373,7 +1365,8 @@ static ssize_t disksize_store(struct device *dev,
|
||||||
zram->comp = comp;
|
zram->comp = comp;
|
||||||
zram->disksize = disksize;
|
zram->disksize = disksize;
|
||||||
set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT);
|
set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT);
|
||||||
zram_revalidate_disk(zram);
|
|
||||||
|
revalidate_disk(zram->disk);
|
||||||
up_write(&zram->init_lock);
|
up_write(&zram->init_lock);
|
||||||
|
|
||||||
return len;
|
return len;
|
||||||
|
@ -1420,7 +1413,7 @@ static ssize_t reset_store(struct device *dev,
|
||||||
/* Make sure all the pending I/O are finished */
|
/* Make sure all the pending I/O are finished */
|
||||||
fsync_bdev(bdev);
|
fsync_bdev(bdev);
|
||||||
zram_reset_device(zram);
|
zram_reset_device(zram);
|
||||||
zram_revalidate_disk(zram);
|
revalidate_disk(zram->disk);
|
||||||
bdput(bdev);
|
bdput(bdev);
|
||||||
|
|
||||||
mutex_lock(&bdev->bd_mutex);
|
mutex_lock(&bdev->bd_mutex);
|
||||||
|
@ -1539,6 +1532,7 @@ static int zram_add(void)
|
||||||
/* zram devices sort of resembles non-rotational disks */
|
/* zram devices sort of resembles non-rotational disks */
|
||||||
queue_flag_set_unlocked(QUEUE_FLAG_NONROT, zram->disk->queue);
|
queue_flag_set_unlocked(QUEUE_FLAG_NONROT, zram->disk->queue);
|
||||||
queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, zram->disk->queue);
|
queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, zram->disk->queue);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* To ensure that we always get PAGE_SIZE aligned
|
* To ensure that we always get PAGE_SIZE aligned
|
||||||
* and n*PAGE_SIZED sized I/O requests.
|
* and n*PAGE_SIZED sized I/O requests.
|
||||||
|
@ -1563,6 +1557,8 @@ static int zram_add(void)
|
||||||
if (ZRAM_LOGICAL_BLOCK_SIZE == PAGE_SIZE)
|
if (ZRAM_LOGICAL_BLOCK_SIZE == PAGE_SIZE)
|
||||||
blk_queue_max_write_zeroes_sectors(zram->disk->queue, UINT_MAX);
|
blk_queue_max_write_zeroes_sectors(zram->disk->queue, UINT_MAX);
|
||||||
|
|
||||||
|
zram->disk->queue->backing_dev_info->capabilities |=
|
||||||
|
(BDI_CAP_STABLE_WRITES | BDI_CAP_SYNCHRONOUS_IO);
|
||||||
add_disk(zram->disk);
|
add_disk(zram->disk);
|
||||||
|
|
||||||
ret = sysfs_create_group(&disk_to_dev(zram->disk)->kobj,
|
ret = sysfs_create_group(&disk_to_dev(zram->disk)->kobj,
|
||||||
|
|
|
@ -259,7 +259,6 @@
|
||||||
#include <linux/cryptohash.h>
|
#include <linux/cryptohash.h>
|
||||||
#include <linux/fips.h>
|
#include <linux/fips.h>
|
||||||
#include <linux/ptrace.h>
|
#include <linux/ptrace.h>
|
||||||
#include <linux/kmemcheck.h>
|
|
||||||
#include <linux/workqueue.h>
|
#include <linux/workqueue.h>
|
||||||
#include <linux/irq.h>
|
#include <linux/irq.h>
|
||||||
#include <linux/syscalls.h>
|
#include <linux/syscalls.h>
|
||||||
|
|
|
@ -553,8 +553,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
|
||||||
* invalidated it. Free it and try again
|
* invalidated it. Free it and try again
|
||||||
*/
|
*/
|
||||||
release_pages(e->user_pages,
|
release_pages(e->user_pages,
|
||||||
e->robj->tbo.ttm->num_pages,
|
e->robj->tbo.ttm->num_pages);
|
||||||
false);
|
|
||||||
kvfree(e->user_pages);
|
kvfree(e->user_pages);
|
||||||
e->user_pages = NULL;
|
e->user_pages = NULL;
|
||||||
}
|
}
|
||||||
|
@ -691,8 +690,7 @@ error_free_pages:
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
release_pages(e->user_pages,
|
release_pages(e->user_pages,
|
||||||
e->robj->tbo.ttm->num_pages,
|
e->robj->tbo.ttm->num_pages);
|
||||||
false);
|
|
||||||
kvfree(e->user_pages);
|
kvfree(e->user_pages);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -347,7 +347,7 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
free_pages:
|
free_pages:
|
||||||
release_pages(bo->tbo.ttm->pages, bo->tbo.ttm->num_pages, false);
|
release_pages(bo->tbo.ttm->pages, bo->tbo.ttm->num_pages);
|
||||||
|
|
||||||
unlock_mmap_sem:
|
unlock_mmap_sem:
|
||||||
up_read(¤t->mm->mmap_sem);
|
up_read(¤t->mm->mmap_sem);
|
||||||
|
|
|
@ -659,7 +659,7 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
release_pages:
|
release_pages:
|
||||||
release_pages(pages, pinned, 0);
|
release_pages(pages, pinned);
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -779,7 +779,7 @@ static struct page **etnaviv_gem_userptr_do_get_pages(
|
||||||
up_read(&mm->mmap_sem);
|
up_read(&mm->mmap_sem);
|
||||||
|
|
||||||
if (ret < 0) {
|
if (ret < 0) {
|
||||||
release_pages(pvec, pinned, 0);
|
release_pages(pvec, pinned);
|
||||||
kvfree(pvec);
|
kvfree(pvec);
|
||||||
return ERR_PTR(ret);
|
return ERR_PTR(ret);
|
||||||
}
|
}
|
||||||
|
@ -852,7 +852,7 @@ static int etnaviv_gem_userptr_get_pages(struct etnaviv_gem_object *etnaviv_obj)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
release_pages(pvec, pinned, 0);
|
release_pages(pvec, pinned);
|
||||||
kvfree(pvec);
|
kvfree(pvec);
|
||||||
|
|
||||||
work = kmalloc(sizeof(*work), GFP_KERNEL);
|
work = kmalloc(sizeof(*work), GFP_KERNEL);
|
||||||
|
@ -886,7 +886,7 @@ static void etnaviv_gem_userptr_release(struct etnaviv_gem_object *etnaviv_obj)
|
||||||
if (etnaviv_obj->pages) {
|
if (etnaviv_obj->pages) {
|
||||||
int npages = etnaviv_obj->base.size >> PAGE_SHIFT;
|
int npages = etnaviv_obj->base.size >> PAGE_SHIFT;
|
||||||
|
|
||||||
release_pages(etnaviv_obj->pages, npages, 0);
|
release_pages(etnaviv_obj->pages, npages);
|
||||||
kvfree(etnaviv_obj->pages);
|
kvfree(etnaviv_obj->pages);
|
||||||
}
|
}
|
||||||
put_task_struct(etnaviv_obj->userptr.task);
|
put_task_struct(etnaviv_obj->userptr.task);
|
||||||
|
|
|
@ -1859,7 +1859,7 @@ static void i915_address_space_init(struct i915_address_space *vm,
|
||||||
INIT_LIST_HEAD(&vm->unbound_list);
|
INIT_LIST_HEAD(&vm->unbound_list);
|
||||||
|
|
||||||
list_add_tail(&vm->global_link, &dev_priv->vm_list);
|
list_add_tail(&vm->global_link, &dev_priv->vm_list);
|
||||||
pagevec_init(&vm->free_pages, false);
|
pagevec_init(&vm->free_pages);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void i915_address_space_fini(struct i915_address_space *vm)
|
static void i915_address_space_fini(struct i915_address_space *vm)
|
||||||
|
|
|
@ -554,7 +554,7 @@ __i915_gem_userptr_get_pages_worker(struct work_struct *_work)
|
||||||
}
|
}
|
||||||
mutex_unlock(&obj->mm.lock);
|
mutex_unlock(&obj->mm.lock);
|
||||||
|
|
||||||
release_pages(pvec, pinned, 0);
|
release_pages(pvec, pinned);
|
||||||
kvfree(pvec);
|
kvfree(pvec);
|
||||||
|
|
||||||
i915_gem_object_put(obj);
|
i915_gem_object_put(obj);
|
||||||
|
@ -668,7 +668,7 @@ i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj)
|
||||||
__i915_gem_userptr_set_active(obj, true);
|
__i915_gem_userptr_set_active(obj, true);
|
||||||
|
|
||||||
if (IS_ERR(pages))
|
if (IS_ERR(pages))
|
||||||
release_pages(pvec, pinned, 0);
|
release_pages(pvec, pinned);
|
||||||
kvfree(pvec);
|
kvfree(pvec);
|
||||||
|
|
||||||
return pages;
|
return pages;
|
||||||
|
|
|
@ -597,7 +597,7 @@ release_sg:
|
||||||
kfree(ttm->sg);
|
kfree(ttm->sg);
|
||||||
|
|
||||||
release_pages:
|
release_pages:
|
||||||
release_pages(ttm->pages, pinned, 0);
|
release_pages(ttm->pages, pinned);
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1667,8 +1667,9 @@ int qib_setup_eagerbufs(struct qib_ctxtdata *rcd)
|
||||||
}
|
}
|
||||||
if (!rcd->rcvegrbuf_phys) {
|
if (!rcd->rcvegrbuf_phys) {
|
||||||
rcd->rcvegrbuf_phys =
|
rcd->rcvegrbuf_phys =
|
||||||
kmalloc_node(chunk * sizeof(rcd->rcvegrbuf_phys[0]),
|
kmalloc_array_node(chunk,
|
||||||
GFP_KERNEL, rcd->node_id);
|
sizeof(rcd->rcvegrbuf_phys[0]),
|
||||||
|
GFP_KERNEL, rcd->node_id);
|
||||||
if (!rcd->rcvegrbuf_phys)
|
if (!rcd->rcvegrbuf_phys)
|
||||||
goto bail_rcvegrbuf;
|
goto bail_rcvegrbuf;
|
||||||
}
|
}
|
||||||
|
|
|
@ -238,7 +238,7 @@ int rvt_driver_qp_init(struct rvt_dev_info *rdi)
|
||||||
rdi->qp_dev->qp_table_size = rdi->dparms.qp_table_size;
|
rdi->qp_dev->qp_table_size = rdi->dparms.qp_table_size;
|
||||||
rdi->qp_dev->qp_table_bits = ilog2(rdi->dparms.qp_table_size);
|
rdi->qp_dev->qp_table_bits = ilog2(rdi->dparms.qp_table_size);
|
||||||
rdi->qp_dev->qp_table =
|
rdi->qp_dev->qp_table =
|
||||||
kmalloc_node(rdi->qp_dev->qp_table_size *
|
kmalloc_array_node(rdi->qp_dev->qp_table_size,
|
||||||
sizeof(*rdi->qp_dev->qp_table),
|
sizeof(*rdi->qp_dev->qp_table),
|
||||||
GFP_KERNEL, rdi->dparms.node);
|
GFP_KERNEL, rdi->dparms.node);
|
||||||
if (!rdi->qp_dev->qp_table)
|
if (!rdi->qp_dev->qp_table)
|
||||||
|
|
|
@ -15,7 +15,6 @@
|
||||||
#include <linux/errno.h>
|
#include <linux/errno.h>
|
||||||
#include <linux/err.h>
|
#include <linux/err.h>
|
||||||
#include <linux/kernel.h>
|
#include <linux/kernel.h>
|
||||||
#include <linux/kmemcheck.h>
|
|
||||||
#include <linux/ctype.h>
|
#include <linux/ctype.h>
|
||||||
#include <linux/delay.h>
|
#include <linux/delay.h>
|
||||||
#include <linux/idr.h>
|
#include <linux/idr.h>
|
||||||
|
@ -904,7 +903,6 @@ struct c2port_device *c2port_device_register(char *name,
|
||||||
return ERR_PTR(-EINVAL);
|
return ERR_PTR(-EINVAL);
|
||||||
|
|
||||||
c2dev = kmalloc(sizeof(struct c2port_device), GFP_KERNEL);
|
c2dev = kmalloc(sizeof(struct c2port_device), GFP_KERNEL);
|
||||||
kmemcheck_annotate_bitfield(c2dev, flags);
|
|
||||||
if (unlikely(!c2dev))
|
if (unlikely(!c2dev))
|
||||||
return ERR_PTR(-ENOMEM);
|
return ERR_PTR(-ENOMEM);
|
||||||
|
|
||||||
|
|
|
@ -517,7 +517,7 @@ static int ena_refill_rx_bufs(struct ena_ring *rx_ring, u32 num)
|
||||||
|
|
||||||
|
|
||||||
rc = ena_alloc_rx_page(rx_ring, rx_info,
|
rc = ena_alloc_rx_page(rx_ring, rx_info,
|
||||||
__GFP_COLD | GFP_ATOMIC | __GFP_COMP);
|
GFP_ATOMIC | __GFP_COMP);
|
||||||
if (unlikely(rc < 0)) {
|
if (unlikely(rc < 0)) {
|
||||||
netif_warn(rx_ring->adapter, rx_err, rx_ring->netdev,
|
netif_warn(rx_ring->adapter, rx_err, rx_ring->netdev,
|
||||||
"failed to alloc buffer for rx queue %d\n",
|
"failed to alloc buffer for rx queue %d\n",
|
||||||
|
|
|
@ -295,7 +295,7 @@ again:
|
||||||
order = alloc_order;
|
order = alloc_order;
|
||||||
|
|
||||||
/* Try to obtain pages, decreasing order if necessary */
|
/* Try to obtain pages, decreasing order if necessary */
|
||||||
gfp = GFP_ATOMIC | __GFP_COLD | __GFP_COMP | __GFP_NOWARN;
|
gfp = GFP_ATOMIC | __GFP_COMP | __GFP_NOWARN;
|
||||||
while (order >= 0) {
|
while (order >= 0) {
|
||||||
pages = alloc_pages_node(node, gfp, order);
|
pages = alloc_pages_node(node, gfp, order);
|
||||||
if (pages)
|
if (pages)
|
||||||
|
|
|
@ -304,8 +304,7 @@ int aq_ring_rx_fill(struct aq_ring_s *self)
|
||||||
buff->flags = 0U;
|
buff->flags = 0U;
|
||||||
buff->len = AQ_CFG_RX_FRAME_MAX;
|
buff->len = AQ_CFG_RX_FRAME_MAX;
|
||||||
|
|
||||||
buff->page = alloc_pages(GFP_ATOMIC | __GFP_COLD |
|
buff->page = alloc_pages(GFP_ATOMIC | __GFP_COMP, pages_order);
|
||||||
__GFP_COMP, pages_order);
|
|
||||||
if (!buff->page) {
|
if (!buff->page) {
|
||||||
err = -ENOMEM;
|
err = -ENOMEM;
|
||||||
goto err_exit;
|
goto err_exit;
|
||||||
|
|
|
@ -198,7 +198,7 @@ static inline void
|
||||||
struct sk_buff *skb;
|
struct sk_buff *skb;
|
||||||
struct octeon_skb_page_info *skb_pg_info;
|
struct octeon_skb_page_info *skb_pg_info;
|
||||||
|
|
||||||
page = alloc_page(GFP_ATOMIC | __GFP_COLD);
|
page = alloc_page(GFP_ATOMIC);
|
||||||
if (unlikely(!page))
|
if (unlikely(!page))
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
|
|
|
@ -193,7 +193,7 @@ static int mlx4_en_fill_rx_buffers(struct mlx4_en_priv *priv)
|
||||||
|
|
||||||
if (mlx4_en_prepare_rx_desc(priv, ring,
|
if (mlx4_en_prepare_rx_desc(priv, ring,
|
||||||
ring->actual_size,
|
ring->actual_size,
|
||||||
GFP_KERNEL | __GFP_COLD)) {
|
GFP_KERNEL)) {
|
||||||
if (ring->actual_size < MLX4_EN_MIN_RX_SIZE) {
|
if (ring->actual_size < MLX4_EN_MIN_RX_SIZE) {
|
||||||
en_err(priv, "Failed to allocate enough rx buffers\n");
|
en_err(priv, "Failed to allocate enough rx buffers\n");
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
|
@ -551,8 +551,7 @@ static void mlx4_en_refill_rx_buffers(struct mlx4_en_priv *priv,
|
||||||
do {
|
do {
|
||||||
if (mlx4_en_prepare_rx_desc(priv, ring,
|
if (mlx4_en_prepare_rx_desc(priv, ring,
|
||||||
ring->prod & ring->size_mask,
|
ring->prod & ring->size_mask,
|
||||||
GFP_ATOMIC | __GFP_COLD |
|
GFP_ATOMIC | __GFP_MEMALLOC))
|
||||||
__GFP_MEMALLOC))
|
|
||||||
break;
|
break;
|
||||||
ring->prod++;
|
ring->prod++;
|
||||||
} while (likely(--missing));
|
} while (likely(--missing));
|
||||||
|
|
|
@ -1185,7 +1185,7 @@ static void *nfp_net_rx_alloc_one(struct nfp_net_dp *dp, dma_addr_t *dma_addr)
|
||||||
} else {
|
} else {
|
||||||
struct page *page;
|
struct page *page;
|
||||||
|
|
||||||
page = alloc_page(GFP_KERNEL | __GFP_COLD);
|
page = alloc_page(GFP_KERNEL);
|
||||||
frag = page ? page_address(page) : NULL;
|
frag = page ? page_address(page) : NULL;
|
||||||
}
|
}
|
||||||
if (!frag) {
|
if (!frag) {
|
||||||
|
|
|
@ -1092,8 +1092,7 @@ static int ql_get_next_chunk(struct ql_adapter *qdev, struct rx_ring *rx_ring,
|
||||||
{
|
{
|
||||||
if (!rx_ring->pg_chunk.page) {
|
if (!rx_ring->pg_chunk.page) {
|
||||||
u64 map;
|
u64 map;
|
||||||
rx_ring->pg_chunk.page = alloc_pages(__GFP_COLD | __GFP_COMP |
|
rx_ring->pg_chunk.page = alloc_pages(__GFP_COMP | GFP_ATOMIC,
|
||||||
GFP_ATOMIC,
|
|
||||||
qdev->lbq_buf_order);
|
qdev->lbq_buf_order);
|
||||||
if (unlikely(!rx_ring->pg_chunk.page)) {
|
if (unlikely(!rx_ring->pg_chunk.page)) {
|
||||||
netif_err(qdev, drv, qdev->ndev,
|
netif_err(qdev, drv, qdev->ndev,
|
||||||
|
|
|
@ -163,7 +163,7 @@ static int ef4_init_rx_buffers(struct ef4_rx_queue *rx_queue, bool atomic)
|
||||||
do {
|
do {
|
||||||
page = ef4_reuse_page(rx_queue);
|
page = ef4_reuse_page(rx_queue);
|
||||||
if (page == NULL) {
|
if (page == NULL) {
|
||||||
page = alloc_pages(__GFP_COLD | __GFP_COMP |
|
page = alloc_pages(__GFP_COMP |
|
||||||
(atomic ? GFP_ATOMIC : GFP_KERNEL),
|
(atomic ? GFP_ATOMIC : GFP_KERNEL),
|
||||||
efx->rx_buffer_order);
|
efx->rx_buffer_order);
|
||||||
if (unlikely(page == NULL))
|
if (unlikely(page == NULL))
|
||||||
|
|
|
@ -163,7 +163,7 @@ static int efx_init_rx_buffers(struct efx_rx_queue *rx_queue, bool atomic)
|
||||||
do {
|
do {
|
||||||
page = efx_reuse_page(rx_queue);
|
page = efx_reuse_page(rx_queue);
|
||||||
if (page == NULL) {
|
if (page == NULL) {
|
||||||
page = alloc_pages(__GFP_COLD | __GFP_COMP |
|
page = alloc_pages(__GFP_COMP |
|
||||||
(atomic ? GFP_ATOMIC : GFP_KERNEL),
|
(atomic ? GFP_ATOMIC : GFP_KERNEL),
|
||||||
efx->rx_buffer_order);
|
efx->rx_buffer_order);
|
||||||
if (unlikely(page == NULL))
|
if (unlikely(page == NULL))
|
||||||
|
|
|
@ -335,7 +335,7 @@ static int xlgmac_alloc_pages(struct xlgmac_pdata *pdata,
|
||||||
dma_addr_t pages_dma;
|
dma_addr_t pages_dma;
|
||||||
|
|
||||||
/* Try to obtain pages, decreasing order if necessary */
|
/* Try to obtain pages, decreasing order if necessary */
|
||||||
gfp |= __GFP_COLD | __GFP_COMP | __GFP_NOWARN;
|
gfp |= __GFP_COMP | __GFP_NOWARN;
|
||||||
while (order >= 0) {
|
while (order >= 0) {
|
||||||
pages = alloc_pages(gfp, order);
|
pages = alloc_pages(gfp, order);
|
||||||
if (pages)
|
if (pages)
|
||||||
|
|
|
@ -906,7 +906,7 @@ static int netcp_allocate_rx_buf(struct netcp_intf *netcp, int fdq)
|
||||||
sw_data[0] = (u32)bufptr;
|
sw_data[0] = (u32)bufptr;
|
||||||
} else {
|
} else {
|
||||||
/* Allocate a secondary receive queue entry */
|
/* Allocate a secondary receive queue entry */
|
||||||
page = alloc_page(GFP_ATOMIC | GFP_DMA | __GFP_COLD);
|
page = alloc_page(GFP_ATOMIC | GFP_DMA);
|
||||||
if (unlikely(!page)) {
|
if (unlikely(!page)) {
|
||||||
dev_warn_ratelimited(netcp->ndev_dev, "Secondary page alloc failed\n");
|
dev_warn_ratelimited(netcp->ndev_dev, "Secondary page alloc failed\n");
|
||||||
goto fail;
|
goto fail;
|
||||||
|
|
|
@ -1030,7 +1030,6 @@ static bool try_fill_recv(struct virtnet_info *vi, struct receive_queue *rq,
|
||||||
int err;
|
int err;
|
||||||
bool oom;
|
bool oom;
|
||||||
|
|
||||||
gfp |= __GFP_COLD;
|
|
||||||
do {
|
do {
|
||||||
if (vi->mergeable_rx_bufs)
|
if (vi->mergeable_rx_bufs)
|
||||||
err = add_recvbuf_mergeable(vi, rq, gfp);
|
err = add_recvbuf_mergeable(vi, rq, gfp);
|
||||||
|
|
|
@ -23,6 +23,7 @@
|
||||||
#include <linux/ndctl.h>
|
#include <linux/ndctl.h>
|
||||||
#include <linux/fs.h>
|
#include <linux/fs.h>
|
||||||
#include <linux/nd.h>
|
#include <linux/nd.h>
|
||||||
|
#include <linux/backing-dev.h>
|
||||||
#include "btt.h"
|
#include "btt.h"
|
||||||
#include "nd.h"
|
#include "nd.h"
|
||||||
|
|
||||||
|
@ -1402,6 +1403,8 @@ static int btt_blk_init(struct btt *btt)
|
||||||
btt->btt_disk->private_data = btt;
|
btt->btt_disk->private_data = btt;
|
||||||
btt->btt_disk->queue = btt->btt_queue;
|
btt->btt_disk->queue = btt->btt_queue;
|
||||||
btt->btt_disk->flags = GENHD_FL_EXT_DEVT;
|
btt->btt_disk->flags = GENHD_FL_EXT_DEVT;
|
||||||
|
btt->btt_disk->queue->backing_dev_info->capabilities |=
|
||||||
|
BDI_CAP_SYNCHRONOUS_IO;
|
||||||
|
|
||||||
blk_queue_make_request(btt->btt_queue, btt_make_request);
|
blk_queue_make_request(btt->btt_queue, btt_make_request);
|
||||||
blk_queue_logical_block_size(btt->btt_queue, btt->sector_size);
|
blk_queue_logical_block_size(btt->btt_queue, btt->sector_size);
|
||||||
|
|
|
@ -31,6 +31,7 @@
|
||||||
#include <linux/uio.h>
|
#include <linux/uio.h>
|
||||||
#include <linux/dax.h>
|
#include <linux/dax.h>
|
||||||
#include <linux/nd.h>
|
#include <linux/nd.h>
|
||||||
|
#include <linux/backing-dev.h>
|
||||||
#include "pmem.h"
|
#include "pmem.h"
|
||||||
#include "pfn.h"
|
#include "pfn.h"
|
||||||
#include "nd.h"
|
#include "nd.h"
|
||||||
|
@ -394,6 +395,7 @@ static int pmem_attach_disk(struct device *dev,
|
||||||
disk->fops = &pmem_fops;
|
disk->fops = &pmem_fops;
|
||||||
disk->queue = q;
|
disk->queue = q;
|
||||||
disk->flags = GENHD_FL_EXT_DEVT;
|
disk->flags = GENHD_FL_EXT_DEVT;
|
||||||
|
disk->queue->backing_dev_info->capabilities |= BDI_CAP_SYNCHRONOUS_IO;
|
||||||
nvdimm_namespace_disk_name(ndns, disk->disk_name);
|
nvdimm_namespace_disk_name(ndns, disk->disk_name);
|
||||||
set_capacity(disk, (pmem->size - pmem->pfn_pad - pmem->data_offset)
|
set_capacity(disk, (pmem->size - pmem->pfn_pad - pmem->data_offset)
|
||||||
/ 512);
|
/ 512);
|
||||||
|
|
|
@ -1152,7 +1152,7 @@ static int mdc_read_page_remote(void *data, struct page *page0)
|
||||||
}
|
}
|
||||||
|
|
||||||
for (npages = 1; npages < max_pages; npages++) {
|
for (npages = 1; npages < max_pages; npages++) {
|
||||||
page = page_cache_alloc_cold(inode->i_mapping);
|
page = page_cache_alloc(inode->i_mapping);
|
||||||
if (!page)
|
if (!page)
|
||||||
break;
|
break;
|
||||||
page_pool[npages] = page;
|
page_pool[npages] = page;
|
||||||
|
|
|
@ -308,7 +308,7 @@ static void afs_kill_pages(struct afs_vnode *vnode, bool error,
|
||||||
_enter("{%x:%u},%lx-%lx",
|
_enter("{%x:%u},%lx-%lx",
|
||||||
vnode->fid.vid, vnode->fid.vnode, first, last);
|
vnode->fid.vid, vnode->fid.vnode, first, last);
|
||||||
|
|
||||||
pagevec_init(&pv, 0);
|
pagevec_init(&pv);
|
||||||
|
|
||||||
do {
|
do {
|
||||||
_debug("kill %lx-%lx", first, last);
|
_debug("kill %lx-%lx", first, last);
|
||||||
|
@ -497,20 +497,13 @@ static int afs_writepages_region(struct address_space *mapping,
|
||||||
_enter(",,%lx,%lx,", index, end);
|
_enter(",,%lx,%lx,", index, end);
|
||||||
|
|
||||||
do {
|
do {
|
||||||
n = find_get_pages_tag(mapping, &index, PAGECACHE_TAG_DIRTY,
|
n = find_get_pages_range_tag(mapping, &index, end,
|
||||||
1, &page);
|
PAGECACHE_TAG_DIRTY, 1, &page);
|
||||||
if (!n)
|
if (!n)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
_debug("wback %lx", page->index);
|
_debug("wback %lx", page->index);
|
||||||
|
|
||||||
if (page->index > end) {
|
|
||||||
*_next = index;
|
|
||||||
put_page(page);
|
|
||||||
_leave(" = 0 [%lx]", *_next);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* at this point we hold neither mapping->tree_lock nor lock on
|
/* at this point we hold neither mapping->tree_lock nor lock on
|
||||||
* the page itself: the page may be truncated or invalidated
|
* the page itself: the page may be truncated or invalidated
|
||||||
* (changing page->mapping to NULL), or even swizzled back from
|
* (changing page->mapping to NULL), or even swizzled back from
|
||||||
|
@ -609,7 +602,7 @@ void afs_pages_written_back(struct afs_vnode *vnode, struct afs_call *call)
|
||||||
|
|
||||||
ASSERT(wb != NULL);
|
ASSERT(wb != NULL);
|
||||||
|
|
||||||
pagevec_init(&pv, 0);
|
pagevec_init(&pv);
|
||||||
|
|
||||||
do {
|
do {
|
||||||
_debug("done %lx-%lx", first, last);
|
_debug("done %lx-%lx", first, last);
|
||||||
|
|
|
@ -3797,7 +3797,7 @@ int btree_write_cache_pages(struct address_space *mapping,
|
||||||
int scanned = 0;
|
int scanned = 0;
|
||||||
int tag;
|
int tag;
|
||||||
|
|
||||||
pagevec_init(&pvec, 0);
|
pagevec_init(&pvec);
|
||||||
if (wbc->range_cyclic) {
|
if (wbc->range_cyclic) {
|
||||||
index = mapping->writeback_index; /* Start from prev offset */
|
index = mapping->writeback_index; /* Start from prev offset */
|
||||||
end = -1;
|
end = -1;
|
||||||
|
@ -3814,8 +3814,8 @@ retry:
|
||||||
if (wbc->sync_mode == WB_SYNC_ALL)
|
if (wbc->sync_mode == WB_SYNC_ALL)
|
||||||
tag_pages_for_writeback(mapping, index, end);
|
tag_pages_for_writeback(mapping, index, end);
|
||||||
while (!done && !nr_to_write_done && (index <= end) &&
|
while (!done && !nr_to_write_done && (index <= end) &&
|
||||||
(nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag,
|
(nr_pages = pagevec_lookup_range_tag(&pvec, mapping, &index, end,
|
||||||
min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) {
|
tag))) {
|
||||||
unsigned i;
|
unsigned i;
|
||||||
|
|
||||||
scanned = 1;
|
scanned = 1;
|
||||||
|
@ -3825,11 +3825,6 @@ retry:
|
||||||
if (!PagePrivate(page))
|
if (!PagePrivate(page))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
if (!wbc->range_cyclic && page->index > end) {
|
|
||||||
done = 1;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
spin_lock(&mapping->private_lock);
|
spin_lock(&mapping->private_lock);
|
||||||
if (!PagePrivate(page)) {
|
if (!PagePrivate(page)) {
|
||||||
spin_unlock(&mapping->private_lock);
|
spin_unlock(&mapping->private_lock);
|
||||||
|
@ -3941,7 +3936,7 @@ static int extent_write_cache_pages(struct address_space *mapping,
|
||||||
if (!igrab(inode))
|
if (!igrab(inode))
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
pagevec_init(&pvec, 0);
|
pagevec_init(&pvec);
|
||||||
if (wbc->range_cyclic) {
|
if (wbc->range_cyclic) {
|
||||||
index = mapping->writeback_index; /* Start from prev offset */
|
index = mapping->writeback_index; /* Start from prev offset */
|
||||||
end = -1;
|
end = -1;
|
||||||
|
@ -3961,8 +3956,8 @@ retry:
|
||||||
tag_pages_for_writeback(mapping, index, end);
|
tag_pages_for_writeback(mapping, index, end);
|
||||||
done_index = index;
|
done_index = index;
|
||||||
while (!done && !nr_to_write_done && (index <= end) &&
|
while (!done && !nr_to_write_done && (index <= end) &&
|
||||||
(nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag,
|
(nr_pages = pagevec_lookup_range_tag(&pvec, mapping,
|
||||||
min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) {
|
&index, end, tag))) {
|
||||||
unsigned i;
|
unsigned i;
|
||||||
|
|
||||||
scanned = 1;
|
scanned = 1;
|
||||||
|
@ -3987,12 +3982,6 @@ retry:
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!wbc->range_cyclic && page->index > end) {
|
|
||||||
done = 1;
|
|
||||||
unlock_page(page);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (wbc->sync_mode != WB_SYNC_NONE) {
|
if (wbc->sync_mode != WB_SYNC_NONE) {
|
||||||
if (PageWriteback(page))
|
if (PageWriteback(page))
|
||||||
flush_fn(data);
|
flush_fn(data);
|
||||||
|
|
|
@ -1592,7 +1592,7 @@ void clean_bdev_aliases(struct block_device *bdev, sector_t block, sector_t len)
|
||||||
struct buffer_head *head;
|
struct buffer_head *head;
|
||||||
|
|
||||||
end = (block + len - 1) >> (PAGE_SHIFT - bd_inode->i_blkbits);
|
end = (block + len - 1) >> (PAGE_SHIFT - bd_inode->i_blkbits);
|
||||||
pagevec_init(&pvec, 0);
|
pagevec_init(&pvec);
|
||||||
while (pagevec_lookup_range(&pvec, bd_mapping, &index, end)) {
|
while (pagevec_lookup_range(&pvec, bd_mapping, &index, end)) {
|
||||||
count = pagevec_count(&pvec);
|
count = pagevec_count(&pvec);
|
||||||
for (i = 0; i < count; i++) {
|
for (i = 0; i < count; i++) {
|
||||||
|
@ -3514,7 +3514,7 @@ page_cache_seek_hole_data(struct inode *inode, loff_t offset, loff_t length,
|
||||||
if (length <= 0)
|
if (length <= 0)
|
||||||
return -ENOENT;
|
return -ENOENT;
|
||||||
|
|
||||||
pagevec_init(&pvec, 0);
|
pagevec_init(&pvec);
|
||||||
|
|
||||||
do {
|
do {
|
||||||
unsigned nr_pages, i;
|
unsigned nr_pages, i;
|
||||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue