Merge branch 'akpm' (patches from Andrew)
Merge updates from Andrew Morton: "Am experimenting with splitting MM up into identifiable subsystems perhaps with a view to gitifying it in complex ways. Also with more verbose "incoming" emails. Most of MM is here and a few other trees. Subsystems affected by this patch series: - hotfixes - iommu - scripts - arch/sh - ocfs2 - mm:slab-generic - mm:slub - mm:kmemleak - mm:kasan - mm:cleanups - mm:debug - mm:pagecache - mm:swap - mm:memcg - mm:gup - mm:pagemap - mm:infrastructure - mm:vmalloc - mm:initialization - mm:pagealloc - mm:vmscan - mm:tools - mm:proc - mm:ras - mm:oom-kill hotfixes: mm: vmscan: scan anonymous pages on file refaults mm/nvdimm: add is_ioremap_addr and use that to check ioremap address mm/memcontrol: fix wrong statistics in memory.stat mm/z3fold.c: lock z3fold page before __SetPageMovable() nilfs2: do not use unexported cpu_to_le32()/le32_to_cpu() in uapi header MAINTAINERS: nilfs2: update email address iommu: include/linux/dmar.h: replace single-char identifiers in macros scripts: scripts/decode_stacktrace: match basepath using shell prefix operator, not regex scripts/decode_stacktrace: look for modules with .ko.debug extension scripts/spelling.txt: drop "sepc" from the misspelling list scripts/spelling.txt: add spelling fix for prohibited scripts/decode_stacktrace: Accept dash/underscore in modules scripts/spelling.txt: add more spellings to spelling.txt arch/sh: arch/sh/configs/sdk7786_defconfig: remove CONFIG_LOGFS sh: config: remove left-over BACKLIGHT_LCD_SUPPORT sh: prevent warnings when using iounmap ocfs2: fs: ocfs: fix spelling mistake "hearbeating" -> "heartbeat" ocfs2/dlm: use struct_size() helper ocfs2: add last unlock times in locking_state ocfs2: add locking filter debugfs file ocfs2: add first lock wait time in locking_state ocfs: no need to check return value of debugfs_create functions fs/ocfs2/dlmglue.c: unneeded variable: "status" ocfs2: use kmemdup rather than duplicating its implementation mm:slab-generic: Patch series "mm/slab: Improved sanity checking": mm/slab: validate cache membership under freelist hardening mm/slab: sanity-check page type when looking up cache lkdtm/heap: add tests for freelist hardening mm:slub: mm/slub.c: avoid double string traverse in kmem_cache_flags() slub: don't panic for memcg kmem cache creation failure mm:kmemleak: mm/kmemleak.c: fix check for softirq context mm/kmemleak.c: change error at _write when kmemleak is disabled docs: kmemleak: add more documentation details mm:kasan: mm/kasan: print frame description for stack bugs Patch series "Bitops instrumentation for KASAN", v5: lib/test_kasan: add bitops tests x86: use static_cpu_has in uaccess region to avoid instrumentation asm-generic, x86: add bitops instrumentation for KASAN Patch series "mm/kasan: Add object validation in ksize()", v3: mm/kasan: introduce __kasan_check_{read,write} mm/kasan: change kasan_check_{read,write} to return boolean lib/test_kasan: Add test for double-kzfree detection mm/slab: refactor common ksize KASAN logic into slab_common.c mm/kasan: add object validation in ksize() mm:cleanups: include/linux/pfn_t.h: remove pfn_t_to_virt() Patch series "remove ARCH_SELECT_MEMORY_MODEL where it has no effect": arm: remove ARCH_SELECT_MEMORY_MODEL s390: remove ARCH_SELECT_MEMORY_MODEL sparc: remove ARCH_SELECT_MEMORY_MODEL mm/gup.c: make follow_page_mask() static mm/memory.c: trivial clean up in insert_page() mm: make !CONFIG_HUGE_PAGE wrappers into static inlines include/linux/mm_types.h: ifdef struct vm_area_struct::swap_readahead_info mm: remove the account_page_dirtied export mm/page_isolation.c: change the prototype of undo_isolate_page_range() include/linux/vmpressure.h: use spinlock_t instead of struct spinlock mm: remove the exporting of totalram_pages include/linux/pagemap.h: document trylock_page() return value mm:debug: mm/failslab.c: by default, do not fail allocations with direct reclaim only Patch series "debug_pagealloc improvements": mm, debug_pagelloc: use static keys to enable debugging mm, page_alloc: more extensive free page checking with debug_pagealloc mm, debug_pagealloc: use a page type instead of page_ext flag mm:pagecache: Patch series "fix filler_t callback type mismatches", v2: mm/filemap.c: fix an overly long line in read_cache_page mm/filemap: don't cast ->readpage to filler_t for do_read_cache_page jffs2: pass the correct prototype to read_cache_page 9p: pass the correct prototype to read_cache_page mm/filemap.c: correct the comment about VM_FAULT_RETRY mm:swap: mm, swap: fix race between swapoff and some swap operations mm/swap_state.c: simplify total_swapcache_pages() with get_swap_device() mm, swap: use rbtree for swap_extent mm/mincore.c: fix race between swapoff and mincore mm:memcg: memcg, oom: no oom-kill for __GFP_RETRY_MAYFAIL memcg, fsnotify: no oom-kill for remote memcg charging mm, memcg: introduce memory.events.local mm: memcontrol: dump memory.stat during cgroup OOM Patch series "mm: reparent slab memory on cgroup removal", v7: mm: memcg/slab: postpone kmem_cache memcg pointer initialization to memcg_link_cache() mm: memcg/slab: rename slab delayed deactivation functions and fields mm: memcg/slab: generalize postponed non-root kmem_cache deactivation mm: memcg/slab: introduce __memcg_kmem_uncharge_memcg() mm: memcg/slab: unify SLAB and SLUB page accounting mm: memcg/slab: don't check the dying flag on kmem_cache creation mm: memcg/slab: synchronize access to kmem_cache dying flag using a spinlock mm: memcg/slab: rework non-root kmem_cache lifecycle management mm: memcg/slab: stop setting page->mem_cgroup pointer for slab pages mm: memcg/slab: reparent memcg kmem_caches on cgroup removal mm, memcg: add a memcg_slabinfo debugfs file mm:gup: Patch series "switch the remaining architectures to use generic GUP", v4: mm: use untagged_addr() for get_user_pages_fast addresses mm: simplify gup_fast_permitted mm: lift the x86_32 PAE version of gup_get_pte to common code MIPS: use the generic get_user_pages_fast code sh: add the missing pud_page definition sh: use the generic get_user_pages_fast code sparc64: add the missing pgd_page definition sparc64: define untagged_addr() sparc64: use the generic get_user_pages_fast code mm: rename CONFIG_HAVE_GENERIC_GUP to CONFIG_HAVE_FAST_GUP mm: reorder code blocks in gup.c mm: consolidate the get_user_pages* implementations mm: validate get_user_pages_fast flags mm: move the powerpc hugepd code to mm/gup.c mm: switch gup_hugepte to use try_get_compound_head mm: mark the page referenced in gup_hugepte mm/gup: speed up check_and_migrate_cma_pages() on huge page mm/gup.c: remove some BUG_ONs from get_gate_page() mm/gup.c: mark undo_dev_pagemap as __maybe_unused mm:pagemap: asm-generic, x86: introduce generic pte_{alloc,free}_one[_kernel] alpha: switch to generic version of pte allocation arm: switch to generic version of pte allocation arm64: switch to generic version of pte allocation csky: switch to generic version of pte allocation m68k: sun3: switch to generic version of pte allocation mips: switch to generic version of pte allocation nds32: switch to generic version of pte allocation nios2: switch to generic version of pte allocation parisc: switch to generic version of pte allocation riscv: switch to generic version of pte allocation um: switch to generic version of pte allocation unicore32: switch to generic version of pte allocation mm/pgtable: drop pgtable_t variable from pte_fn_t functions mm/memory.c: fail when offset == num in first check of __vm_map_pages() mm:infrastructure: mm/mmu_notifier: use hlist_add_head_rcu() mm:vmalloc: Patch series "Some cleanups for the KVA/vmalloc", v5: mm/vmalloc.c: remove "node" argument mm/vmalloc.c: preload a CPU with one object for split purpose mm/vmalloc.c: get rid of one single unlink_va() when merge mm/vmalloc.c: switch to WARN_ON() and move it under unlink_va() mm/vmalloc.c: spelling> s/informaion/information/ mm:initialization: mm/large system hash: use vmalloc for size > MAX_ORDER when !hashdist mm/large system hash: clear hashdist when only one node with memory is booted mm:pagealloc: arm64: move jump_label_init() before parse_early_param() Patch series "add init_on_alloc/init_on_free boot options", v10: mm: security: introduce init_on_alloc=1 and init_on_free=1 boot options mm: init: report memory auto-initialization features at boot time mm:vmscan: mm: vmscan: remove double slab pressure by inc'ing sc->nr_scanned mm: vmscan: correct some vmscan counters for THP swapout mm:tools: tools/vm/slabinfo: order command line options tools/vm/slabinfo: add partial slab listing to -X tools/vm/slabinfo: add option to sort by partial slabs tools/vm/slabinfo: add sorting info to help menu mm:proc: proc: use down_read_killable mmap_sem for /proc/pid/maps proc: use down_read_killable mmap_sem for /proc/pid/smaps_rollup proc: use down_read_killable mmap_sem for /proc/pid/pagemap proc: use down_read_killable mmap_sem for /proc/pid/clear_refs proc: use down_read_killable mmap_sem for /proc/pid/map_files mm: use down_read_killable for locking mmap_sem in access_remote_vm mm: smaps: split PSS into components mm: vmalloc: show number of vmalloc pages in /proc/meminfo mm:ras: mm/memory-failure.c: clarify error message mm:oom-kill: mm: memcontrol: use CSS_TASK_ITER_PROCS at mem_cgroup_scan_tasks() mm, oom: refactor dump_tasks for memcg OOMs mm, oom: remove redundant task_in_mem_cgroup() check oom: decouple mems_allowed from oom_unkillable_task mm/oom_kill.c: remove redundant OOM score normalization in select_bad_process()" * akpm: (147 commits) mm/oom_kill.c: remove redundant OOM score normalization in select_bad_process() oom: decouple mems_allowed from oom_unkillable_task mm, oom: remove redundant task_in_mem_cgroup() check mm, oom: refactor dump_tasks for memcg OOMs mm: memcontrol: use CSS_TASK_ITER_PROCS at mem_cgroup_scan_tasks() mm/memory-failure.c: clarify error message mm: vmalloc: show number of vmalloc pages in /proc/meminfo mm: smaps: split PSS into components mm: use down_read_killable for locking mmap_sem in access_remote_vm proc: use down_read_killable mmap_sem for /proc/pid/map_files proc: use down_read_killable mmap_sem for /proc/pid/clear_refs proc: use down_read_killable mmap_sem for /proc/pid/pagemap proc: use down_read_killable mmap_sem for /proc/pid/smaps_rollup proc: use down_read_killable mmap_sem for /proc/pid/maps tools/vm/slabinfo: add sorting info to help menu tools/vm/slabinfo: add option to sort by partial slabs tools/vm/slabinfo: add partial slab listing to -X tools/vm/slabinfo: order command line options mm: vmscan: correct some vmscan counters for THP swapout mm: vmscan: remove double slab pressure by inc'ing sc->nr_scanned ...
This commit is contained in:
commit
ef8f3d48af
|
@ -3,18 +3,28 @@ Date: August 2017
|
|||
Contact: Daniel Colascione <dancol@google.com>
|
||||
Description:
|
||||
This file provides pre-summed memory information for a
|
||||
process. The format is identical to /proc/pid/smaps,
|
||||
process. The format is almost identical to /proc/pid/smaps,
|
||||
except instead of an entry for each VMA in a process,
|
||||
smaps_rollup has a single entry (tagged "[rollup]")
|
||||
for which each field is the sum of the corresponding
|
||||
fields from all the maps in /proc/pid/smaps.
|
||||
For more details, see the procfs man page.
|
||||
Additionally, the fields Pss_Anon, Pss_File and Pss_Shmem
|
||||
are not present in /proc/pid/smaps. These fields represent
|
||||
the sum of the Pss field of each type (anon, file, shmem).
|
||||
For more details, see Documentation/filesystems/proc.txt
|
||||
and the procfs man page.
|
||||
|
||||
Typical output looks like this:
|
||||
|
||||
00100000-ff709000 ---p 00000000 00:00 0 [rollup]
|
||||
Size: 1192 kB
|
||||
KernelPageSize: 4 kB
|
||||
MMUPageSize: 4 kB
|
||||
Rss: 884 kB
|
||||
Pss: 385 kB
|
||||
Pss_Anon: 301 kB
|
||||
Pss_File: 80 kB
|
||||
Pss_Shmem: 4 kB
|
||||
Shared_Clean: 696 kB
|
||||
Shared_Dirty: 0 kB
|
||||
Private_Clean: 120 kB
|
||||
|
|
|
@ -1146,6 +1146,11 @@ PAGE_SIZE multiple when read back.
|
|||
otherwise, a value change in this file generates a file
|
||||
modified event.
|
||||
|
||||
Note that all fields in this file are hierarchical and the
|
||||
file modified event can be generated due to an event down the
|
||||
hierarchy. For for the local events at the cgroup level see
|
||||
memory.events.local.
|
||||
|
||||
low
|
||||
The number of times the cgroup is reclaimed due to
|
||||
high memory pressure even though its usage is under
|
||||
|
@ -1185,6 +1190,11 @@ PAGE_SIZE multiple when read back.
|
|||
The number of processes belonging to this cgroup
|
||||
killed by any kind of OOM killer.
|
||||
|
||||
memory.events.local
|
||||
Similar to memory.events but the fields in the file are local
|
||||
to the cgroup i.e. not hierarchical. The file modified event
|
||||
generated on this file reflects only the local events.
|
||||
|
||||
memory.stat
|
||||
A read-only flat-keyed file which exists on non-root cgroups.
|
||||
|
||||
|
|
|
@ -805,12 +805,10 @@
|
|||
tracking down these problems.
|
||||
|
||||
debug_pagealloc=
|
||||
[KNL] When CONFIG_DEBUG_PAGEALLOC is set, this
|
||||
parameter enables the feature at boot time. In
|
||||
default, it is disabled. We can avoid allocating huge
|
||||
chunk of memory for debug pagealloc if we don't enable
|
||||
it at boot time and the system will work mostly same
|
||||
with the kernel built without CONFIG_DEBUG_PAGEALLOC.
|
||||
[KNL] When CONFIG_DEBUG_PAGEALLOC is set, this parameter
|
||||
enables the feature at boot time. By default, it is
|
||||
disabled and the system will work mostly the same as a
|
||||
kernel built without CONFIG_DEBUG_PAGEALLOC.
|
||||
on: enable the feature
|
||||
|
||||
debugpat [X86] Enable PAT debugging
|
||||
|
@ -1670,6 +1668,15 @@
|
|||
|
||||
initrd= [BOOT] Specify the location of the initial ramdisk
|
||||
|
||||
init_on_alloc= [MM] Fill newly allocated pages and heap objects with
|
||||
zeroes.
|
||||
Format: 0 | 1
|
||||
Default set by CONFIG_INIT_ON_ALLOC_DEFAULT_ON.
|
||||
|
||||
init_on_free= [MM] Fill freed pages and heap objects with zeroes.
|
||||
Format: 0 | 1
|
||||
Default set by CONFIG_INIT_ON_FREE_DEFAULT_ON.
|
||||
|
||||
init_pkru= [x86] Specify the default memory protection keys rights
|
||||
register contents for all processes. 0x55555554 by
|
||||
default (disallow access to all but pkey 0). Can
|
||||
|
|
|
@ -54,7 +54,7 @@ The Linux kernel provides more basic utility functions.
|
|||
Bit Operations
|
||||
--------------
|
||||
|
||||
.. kernel-doc:: arch/x86/include/asm/bitops.h
|
||||
.. kernel-doc:: include/asm-generic/bitops-instrumented.h
|
||||
:internal:
|
||||
|
||||
Bitmap Operations
|
||||
|
|
|
@ -2,8 +2,8 @@ Kernel Memory Leak Detector
|
|||
===========================
|
||||
|
||||
Kmemleak provides a way of detecting possible kernel memory leaks in a
|
||||
way similar to a tracing garbage collector
|
||||
(https://en.wikipedia.org/wiki/Garbage_collection_%28computer_science%29#Tracing_garbage_collectors),
|
||||
way similar to a `tracing garbage collector
|
||||
<https://en.wikipedia.org/wiki/Tracing_garbage_collection>`_,
|
||||
with the difference that the orphan objects are not freed but only
|
||||
reported via /sys/kernel/debug/kmemleak. A similar method is used by the
|
||||
Valgrind tool (``memcheck --leak-check``) to detect the memory leaks in
|
||||
|
@ -15,10 +15,13 @@ Usage
|
|||
|
||||
CONFIG_DEBUG_KMEMLEAK in "Kernel hacking" has to be enabled. A kernel
|
||||
thread scans the memory every 10 minutes (by default) and prints the
|
||||
number of new unreferenced objects found. To display the details of all
|
||||
the possible memory leaks::
|
||||
number of new unreferenced objects found. If the ``debugfs`` isn't already
|
||||
mounted, mount with::
|
||||
|
||||
# mount -t debugfs nodev /sys/kernel/debug/
|
||||
|
||||
To display the details of all the possible scanned memory leaks::
|
||||
|
||||
# cat /sys/kernel/debug/kmemleak
|
||||
|
||||
To trigger an intermediate memory scan::
|
||||
|
@ -72,6 +75,9 @@ If CONFIG_DEBUG_KMEMLEAK_DEFAULT_OFF are enabled, the kmemleak is
|
|||
disabled by default. Passing ``kmemleak=on`` on the kernel command
|
||||
line enables the function.
|
||||
|
||||
If you are getting errors like "Error while writing to stdout" or "write_loop:
|
||||
Invalid argument", make sure kmemleak is properly enabled.
|
||||
|
||||
Basic Algorithm
|
||||
---------------
|
||||
|
||||
|
@ -218,3 +224,37 @@ the pointer is calculated by other methods than the usual container_of
|
|||
macro or the pointer is stored in a location not scanned by kmemleak.
|
||||
|
||||
Page allocations and ioremap are not tracked.
|
||||
|
||||
Testing with kmemleak-test
|
||||
--------------------------
|
||||
|
||||
To check if you have all set up to use kmemleak, you can use the kmemleak-test
|
||||
module, a module that deliberately leaks memory. Set CONFIG_DEBUG_KMEMLEAK_TEST
|
||||
as module (it can't be used as bult-in) and boot the kernel with kmemleak
|
||||
enabled. Load the module and perform a scan with::
|
||||
|
||||
# modprobe kmemleak-test
|
||||
# echo scan > /sys/kernel/debug/kmemleak
|
||||
|
||||
Note that the you may not get results instantly or on the first scanning. When
|
||||
kmemleak gets results, it'll log ``kmemleak: <count of leaks> new suspected
|
||||
memory leaks``. Then read the file to see then::
|
||||
|
||||
# cat /sys/kernel/debug/kmemleak
|
||||
unreferenced object 0xffff89862ca702e8 (size 32):
|
||||
comm "modprobe", pid 2088, jiffies 4294680594 (age 375.486s)
|
||||
hex dump (first 32 bytes):
|
||||
6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk
|
||||
6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b a5 kkkkkkkkkkkkkkk.
|
||||
backtrace:
|
||||
[<00000000e0a73ec7>] 0xffffffffc01d2036
|
||||
[<000000000c5d2a46>] do_one_initcall+0x41/0x1df
|
||||
[<0000000046db7e0a>] do_init_module+0x55/0x200
|
||||
[<00000000542b9814>] load_module+0x203c/0x2480
|
||||
[<00000000c2850256>] __do_sys_finit_module+0xba/0xe0
|
||||
[<000000006564e7ef>] do_syscall_64+0x43/0x110
|
||||
[<000000007c873fa6>] entry_SYSCALL_64_after_hwframe+0x44/0xa9
|
||||
...
|
||||
|
||||
Removing the module with ``rmmod kmemleak_test`` should also trigger some
|
||||
kmemleak results.
|
||||
|
|
|
@ -154,9 +154,11 @@ Table 1-1: Process specific entries in /proc
|
|||
symbol the task is blocked in - or "0" if not blocked.
|
||||
pagemap Page table
|
||||
stack Report full stack trace, enable via CONFIG_STACKTRACE
|
||||
smaps an extension based on maps, showing the memory consumption of
|
||||
smaps An extension based on maps, showing the memory consumption of
|
||||
each mapping and flags associated with it
|
||||
numa_maps an extension based on maps, showing the memory locality and
|
||||
smaps_rollup Accumulated smaps stats for all mappings of the process. This
|
||||
can be derived from smaps, but is faster and more convenient
|
||||
numa_maps An extension based on maps, showing the memory locality and
|
||||
binding policy as well as mem usage (in pages) of each mapping.
|
||||
..............................................................................
|
||||
|
||||
|
@ -366,7 +368,7 @@ Table 1-4: Contents of the stat files (as of 2.6.30-rc7)
|
|||
exit_code the thread's exit_code in the form reported by the waitpid system call
|
||||
..............................................................................
|
||||
|
||||
The /proc/PID/maps file containing the currently mapped memory regions and
|
||||
The /proc/PID/maps file contains the currently mapped memory regions and
|
||||
their access permissions.
|
||||
|
||||
The format is:
|
||||
|
@ -417,11 +419,14 @@ is not associated with a file:
|
|||
or if empty, the mapping is anonymous.
|
||||
|
||||
The /proc/PID/smaps is an extension based on maps, showing the memory
|
||||
consumption for each of the process's mappings. For each of mappings there
|
||||
is a series of lines such as the following:
|
||||
consumption for each of the process's mappings. For each mapping (aka Virtual
|
||||
Memory Area, or VMA) there is a series of lines such as the following:
|
||||
|
||||
08048000-080bc000 r-xp 00000000 03:02 13130 /bin/bash
|
||||
|
||||
Size: 1084 kB
|
||||
KernelPageSize: 4 kB
|
||||
MMUPageSize: 4 kB
|
||||
Rss: 892 kB
|
||||
Pss: 374 kB
|
||||
Shared_Clean: 892 kB
|
||||
|
@ -443,11 +448,14 @@ Locked: 0 kB
|
|||
THPeligible: 0
|
||||
VmFlags: rd ex mr mw me dw
|
||||
|
||||
the first of these lines shows the same information as is displayed for the
|
||||
mapping in /proc/PID/maps. The remaining lines show the size of the mapping
|
||||
(size), the amount of the mapping that is currently resident in RAM (RSS), the
|
||||
process' proportional share of this mapping (PSS), the number of clean and
|
||||
dirty private pages in the mapping.
|
||||
The first of these lines shows the same information as is displayed for the
|
||||
mapping in /proc/PID/maps. Following lines show the size of the mapping
|
||||
(size); the size of each page allocated when backing a VMA (KernelPageSize),
|
||||
which is usually the same as the size in the page table entries; the page size
|
||||
used by the MMU when backing a VMA (in most cases, the same as KernelPageSize);
|
||||
the amount of the mapping that is currently resident in RAM (RSS); the
|
||||
process' proportional share of this mapping (PSS); and the number of clean and
|
||||
dirty shared and private pages in the mapping.
|
||||
|
||||
The "proportional set size" (PSS) of a process is the count of pages it has
|
||||
in memory, where each page is divided by the number of processes sharing it.
|
||||
|
@ -532,6 +540,19 @@ guarantees:
|
|||
2) If there is something at a given vaddr during the entirety of the
|
||||
life of the smaps/maps walk, there will be some output for it.
|
||||
|
||||
The /proc/PID/smaps_rollup file includes the same fields as /proc/PID/smaps,
|
||||
but their values are the sums of the corresponding values for all mappings of
|
||||
the process. Additionally, it contains these fields:
|
||||
|
||||
Pss_Anon
|
||||
Pss_File
|
||||
Pss_Shmem
|
||||
|
||||
They represent the proportional shares of anonymous, file, and shmem pages, as
|
||||
described for smaps above. These fields are omitted in smaps since each
|
||||
mapping identifies the type (anon, file, or shmem) of all pages it contains.
|
||||
Thus all information in smaps_rollup can be derived from smaps, but at a
|
||||
significantly higher cost.
|
||||
|
||||
The /proc/PID/clear_refs is used to reset the PG_Referenced and ACCESSED/YOUNG
|
||||
bits on both physical and virtual pages associated with a process, and the
|
||||
|
|
|
@ -11260,7 +11260,7 @@ F: include/uapi/linux/nfs*
|
|||
F: include/uapi/linux/sunrpc/
|
||||
|
||||
NILFS2 FILESYSTEM
|
||||
M: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
|
||||
M: Ryusuke Konishi <konishi.ryusuke@gmail.com>
|
||||
L: linux-nilfs@vger.kernel.org
|
||||
W: https://nilfs.sourceforge.io/
|
||||
W: https://nilfs.osdn.jp/
|
||||
|
|
|
@ -5,6 +5,8 @@
|
|||
#include <linux/mm.h>
|
||||
#include <linux/mmzone.h>
|
||||
|
||||
#include <asm-generic/pgalloc.h> /* for pte_{alloc,free}_one */
|
||||
|
||||
/*
|
||||
* Allocate and free page tables. The xxx_kernel() versions are
|
||||
* used to allocate a kernel page table - this turns on ASN bits
|
||||
|
@ -41,7 +43,7 @@ pgd_free(struct mm_struct *mm, pgd_t *pgd)
|
|||
static inline pmd_t *
|
||||
pmd_alloc_one(struct mm_struct *mm, unsigned long address)
|
||||
{
|
||||
pmd_t *ret = (pmd_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
|
||||
pmd_t *ret = (pmd_t *)__get_free_page(GFP_PGTABLE_USER);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -51,42 +53,6 @@ pmd_free(struct mm_struct *mm, pmd_t *pmd)
|
|||
free_page((unsigned long)pmd);
|
||||
}
|
||||
|
||||
static inline pte_t *
|
||||
pte_alloc_one_kernel(struct mm_struct *mm)
|
||||
{
|
||||
pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
|
||||
return pte;
|
||||
}
|
||||
|
||||
static inline void
|
||||
pte_free_kernel(struct mm_struct *mm, pte_t *pte)
|
||||
{
|
||||
free_page((unsigned long)pte);
|
||||
}
|
||||
|
||||
static inline pgtable_t
|
||||
pte_alloc_one(struct mm_struct *mm)
|
||||
{
|
||||
pte_t *pte = pte_alloc_one_kernel(mm);
|
||||
struct page *page;
|
||||
|
||||
if (!pte)
|
||||
return NULL;
|
||||
page = virt_to_page(pte);
|
||||
if (!pgtable_page_ctor(page)) {
|
||||
__free_page(page);
|
||||
return NULL;
|
||||
}
|
||||
return page;
|
||||
}
|
||||
|
||||
static inline void
|
||||
pte_free(struct mm_struct *mm, pgtable_t page)
|
||||
{
|
||||
pgtable_page_dtor(page);
|
||||
__free_page(page);
|
||||
}
|
||||
|
||||
#define check_pgt_cache() do { } while (0)
|
||||
|
||||
#endif /* _ALPHA_PGALLOC_H */
|
||||
|
|
|
@ -75,6 +75,7 @@ config ARM
|
|||
select HAVE_DYNAMIC_FTRACE_WITH_REGS if HAVE_DYNAMIC_FTRACE
|
||||
select HAVE_EFFICIENT_UNALIGNED_ACCESS if (CPU_V6 || CPU_V6K || CPU_V7) && MMU
|
||||
select HAVE_EXIT_THREAD
|
||||
select HAVE_FAST_GUP if ARM_LPAE
|
||||
select HAVE_FTRACE_MCOUNT_RECORD if !XIP_KERNEL
|
||||
select HAVE_FUNCTION_GRAPH_TRACER if !THUMB2_KERNEL && !CC_IS_CLANG
|
||||
select HAVE_FUNCTION_TRACER if !XIP_KERNEL
|
||||
|
@ -1622,16 +1623,9 @@ config ARCH_SPARSEMEM_ENABLE
|
|||
config ARCH_SPARSEMEM_DEFAULT
|
||||
def_bool ARCH_SPARSEMEM_ENABLE
|
||||
|
||||
config ARCH_SELECT_MEMORY_MODEL
|
||||
def_bool ARCH_SPARSEMEM_ENABLE
|
||||
|
||||
config HAVE_ARCH_PFN_VALID
|
||||
def_bool ARCH_HAS_HOLES_MEMORYMODEL || !SPARSEMEM
|
||||
|
||||
config HAVE_GENERIC_GUP
|
||||
def_bool y
|
||||
depends on ARM_LPAE
|
||||
|
||||
config HIGHMEM
|
||||
bool "High Memory Support"
|
||||
depends on MMU
|
||||
|
|
|
@ -54,8 +54,6 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
|
|||
extern pgd_t *pgd_alloc(struct mm_struct *mm);
|
||||
extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
|
||||
|
||||
#define PGALLOC_GFP (GFP_KERNEL | __GFP_ZERO)
|
||||
|
||||
static inline void clean_pte_table(pte_t *pte)
|
||||
{
|
||||
clean_dcache_area(pte + PTE_HWTABLE_PTRS, PTE_HWTABLE_SIZE);
|
||||
|
@ -77,54 +75,41 @@ static inline void clean_pte_table(pte_t *pte)
|
|||
* | h/w pt 1 |
|
||||
* +------------+
|
||||
*/
|
||||
|
||||
#define __HAVE_ARCH_PTE_ALLOC_ONE_KERNEL
|
||||
#define __HAVE_ARCH_PTE_ALLOC_ONE
|
||||
#include <asm-generic/pgalloc.h>
|
||||
|
||||
static inline pte_t *
|
||||
pte_alloc_one_kernel(struct mm_struct *mm)
|
||||
{
|
||||
pte_t *pte;
|
||||
pte_t *pte = __pte_alloc_one_kernel(mm);
|
||||
|
||||
pte = (pte_t *)__get_free_page(PGALLOC_GFP);
|
||||
if (pte)
|
||||
clean_pte_table(pte);
|
||||
|
||||
return pte;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_HIGHPTE
|
||||
#define PGTABLE_HIGHMEM __GFP_HIGHMEM
|
||||
#else
|
||||
#define PGTABLE_HIGHMEM 0
|
||||
#endif
|
||||
|
||||
static inline pgtable_t
|
||||
pte_alloc_one(struct mm_struct *mm)
|
||||
{
|
||||
struct page *pte;
|
||||
|
||||
#ifdef CONFIG_HIGHPTE
|
||||
pte = alloc_pages(PGALLOC_GFP | __GFP_HIGHMEM, 0);
|
||||
#else
|
||||
pte = alloc_pages(PGALLOC_GFP, 0);
|
||||
#endif
|
||||
pte = __pte_alloc_one(mm, GFP_PGTABLE_USER | PGTABLE_HIGHMEM);
|
||||
if (!pte)
|
||||
return NULL;
|
||||
if (!PageHighMem(pte))
|
||||
clean_pte_table(page_address(pte));
|
||||
if (!pgtable_page_ctor(pte)) {
|
||||
__free_page(pte);
|
||||
return NULL;
|
||||
}
|
||||
return pte;
|
||||
}
|
||||
|
||||
/*
|
||||
* Free one PTE table.
|
||||
*/
|
||||
static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
|
||||
{
|
||||
if (pte)
|
||||
free_page((unsigned long)pte);
|
||||
}
|
||||
|
||||
static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
|
||||
{
|
||||
pgtable_page_dtor(pte);
|
||||
__free_page(pte);
|
||||
}
|
||||
|
||||
static inline void __pmd_populate(pmd_t *pmdp, phys_addr_t pte,
|
||||
pmdval_t prot)
|
||||
{
|
||||
|
|
|
@ -8,8 +8,7 @@
|
|||
#include <asm/mach/map.h>
|
||||
#include <asm/mmu_context.h>
|
||||
|
||||
static int __init set_permissions(pte_t *ptep, pgtable_t token,
|
||||
unsigned long addr, void *data)
|
||||
static int __init set_permissions(pte_t *ptep, unsigned long addr, void *data)
|
||||
{
|
||||
efi_memory_desc_t *md = data;
|
||||
pte_t pte = *ptep;
|
||||
|
|
|
@ -493,8 +493,7 @@ void __init dma_contiguous_remap(void)
|
|||
}
|
||||
}
|
||||
|
||||
static int __dma_update_pte(pte_t *pte, pgtable_t token, unsigned long addr,
|
||||
void *data)
|
||||
static int __dma_update_pte(pte_t *pte, unsigned long addr, void *data)
|
||||
{
|
||||
struct page *page = virt_to_page(addr);
|
||||
pgprot_t prot = *(pgprot_t *)data;
|
||||
|
|
|
@ -729,7 +729,7 @@ static void __init *early_alloc(unsigned long sz)
|
|||
|
||||
static void *__init late_alloc(unsigned long sz)
|
||||
{
|
||||
void *ptr = (void *)__get_free_pages(PGALLOC_GFP, get_order(sz));
|
||||
void *ptr = (void *)__get_free_pages(GFP_PGTABLE_KERNEL, get_order(sz));
|
||||
|
||||
if (!ptr || !pgtable_page_ctor(virt_to_page(ptr)))
|
||||
BUG();
|
||||
|
|
|
@ -14,8 +14,7 @@ struct page_change_data {
|
|||
pgprot_t clear_mask;
|
||||
};
|
||||
|
||||
static int change_page_range(pte_t *ptep, pgtable_t token, unsigned long addr,
|
||||
void *data)
|
||||
static int change_page_range(pte_t *ptep, unsigned long addr, void *data)
|
||||
{
|
||||
struct page_change_data *cdata = data;
|
||||
pte_t pte = *ptep;
|
||||
|
|
|
@ -143,6 +143,7 @@ config ARM64
|
|||
select HAVE_DMA_CONTIGUOUS
|
||||
select HAVE_DYNAMIC_FTRACE
|
||||
select HAVE_EFFICIENT_UNALIGNED_ACCESS
|
||||
select HAVE_FAST_GUP
|
||||
select HAVE_FTRACE_MCOUNT_RECORD
|
||||
select HAVE_FUNCTION_TRACER
|
||||
select HAVE_FUNCTION_GRAPH_TRACER
|
||||
|
@ -267,9 +268,6 @@ config ZONE_DMA32
|
|||
bool "Support DMA32 zone" if EXPERT
|
||||
default y
|
||||
|
||||
config HAVE_GENERIC_GUP
|
||||
def_bool y
|
||||
|
||||
config ARCH_ENABLE_MEMORY_HOTPLUG
|
||||
def_bool y
|
||||
|
||||
|
|
|
@ -13,18 +13,23 @@
|
|||
#include <asm/cacheflush.h>
|
||||
#include <asm/tlbflush.h>
|
||||
|
||||
#include <asm-generic/pgalloc.h> /* for pte_{alloc,free}_one */
|
||||
|
||||
#define check_pgt_cache() do { } while (0)
|
||||
|
||||
#define PGALLOC_GFP (GFP_KERNEL | __GFP_ZERO)
|
||||
#define PGD_SIZE (PTRS_PER_PGD * sizeof(pgd_t))
|
||||
|
||||
#if CONFIG_PGTABLE_LEVELS > 2
|
||||
|
||||
static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
|
||||
{
|
||||
gfp_t gfp = GFP_PGTABLE_USER;
|
||||
struct page *page;
|
||||
|
||||
page = alloc_page(PGALLOC_GFP);
|
||||
if (mm == &init_mm)
|
||||
gfp = GFP_PGTABLE_KERNEL;
|
||||
|
||||
page = alloc_page(gfp);
|
||||
if (!page)
|
||||
return NULL;
|
||||
if (!pgtable_pmd_page_ctor(page)) {
|
||||
|
@ -61,7 +66,7 @@ static inline void __pud_populate(pud_t *pudp, phys_addr_t pmdp, pudval_t prot)
|
|||
|
||||
static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
|
||||
{
|
||||
return (pud_t *)__get_free_page(PGALLOC_GFP);
|
||||
return (pud_t *)__get_free_page(GFP_PGTABLE_USER);
|
||||
}
|
||||
|
||||
static inline void pud_free(struct mm_struct *mm, pud_t *pudp)
|
||||
|
@ -89,42 +94,6 @@ static inline void __pgd_populate(pgd_t *pgdp, phys_addr_t pudp, pgdval_t prot)
|
|||
extern pgd_t *pgd_alloc(struct mm_struct *mm);
|
||||
extern void pgd_free(struct mm_struct *mm, pgd_t *pgdp);
|
||||
|
||||
static inline pte_t *
|
||||
pte_alloc_one_kernel(struct mm_struct *mm)
|
||||
{
|
||||
return (pte_t *)__get_free_page(PGALLOC_GFP);
|
||||
}
|
||||
|
||||
static inline pgtable_t
|
||||
pte_alloc_one(struct mm_struct *mm)
|
||||
{
|
||||
struct page *pte;
|
||||
|
||||
pte = alloc_pages(PGALLOC_GFP, 0);
|
||||
if (!pte)
|
||||
return NULL;
|
||||
if (!pgtable_page_ctor(pte)) {
|
||||
__free_page(pte);
|
||||
return NULL;
|
||||
}
|
||||
return pte;
|
||||
}
|
||||
|
||||
/*
|
||||
* Free a PTE table.
|
||||
*/
|
||||
static inline void pte_free_kernel(struct mm_struct *mm, pte_t *ptep)
|
||||
{
|
||||
if (ptep)
|
||||
free_page((unsigned long)ptep);
|
||||
}
|
||||
|
||||
static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
|
||||
{
|
||||
pgtable_page_dtor(pte);
|
||||
__free_page(pte);
|
||||
}
|
||||
|
||||
static inline void __pmd_populate(pmd_t *pmdp, phys_addr_t ptep,
|
||||
pmdval_t prot)
|
||||
{
|
||||
|
|
|
@ -82,8 +82,7 @@ int __init efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int __init set_permissions(pte_t *ptep, pgtable_t token,
|
||||
unsigned long addr, void *data)
|
||||
static int __init set_permissions(pte_t *ptep, unsigned long addr, void *data)
|
||||
{
|
||||
efi_memory_desc_t *md = data;
|
||||
pte_t pte = READ_ONCE(*ptep);
|
||||
|
|
|
@ -283,6 +283,11 @@ void __init setup_arch(char **cmdline_p)
|
|||
|
||||
setup_machine_fdt(__fdt_pointer);
|
||||
|
||||
/*
|
||||
* Initialise the static keys early as they may be enabled by the
|
||||
* cpufeature code and early parameters.
|
||||
*/
|
||||
jump_label_init();
|
||||
parse_early_param();
|
||||
|
||||
/*
|
||||
|
|
|
@ -420,11 +420,6 @@ void __init smp_cpus_done(unsigned int max_cpus)
|
|||
void __init smp_prepare_boot_cpu(void)
|
||||
{
|
||||
set_my_cpu_offset(per_cpu_offset(smp_processor_id()));
|
||||
/*
|
||||
* Initialise the static keys early as they may be enabled by the
|
||||
* cpufeature code.
|
||||
*/
|
||||
jump_label_init();
|
||||
cpuinfo_store_boot_cpu();
|
||||
|
||||
/*
|
||||
|
|
|
@ -362,7 +362,7 @@ static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys,
|
|||
|
||||
static phys_addr_t __pgd_pgtable_alloc(int shift)
|
||||
{
|
||||
void *ptr = (void *)__get_free_page(PGALLOC_GFP);
|
||||
void *ptr = (void *)__get_free_page(GFP_PGTABLE_KERNEL);
|
||||
BUG_ON(!ptr);
|
||||
|
||||
/* Ensure the zeroed page is visible to the page table walker */
|
||||
|
|
|
@ -19,8 +19,7 @@ struct page_change_data {
|
|||
|
||||
bool rodata_full __ro_after_init = IS_ENABLED(CONFIG_RODATA_FULL_DEFAULT_ENABLED);
|
||||
|
||||
static int change_page_range(pte_t *ptep, pgtable_t token, unsigned long addr,
|
||||
void *data)
|
||||
static int change_page_range(pte_t *ptep, unsigned long addr, void *data)
|
||||
{
|
||||
struct page_change_data *cdata = data;
|
||||
pte_t pte = READ_ONCE(*ptep);
|
||||
|
|
|
@ -19,10 +19,12 @@ static struct kmem_cache *pgd_cache __ro_after_init;
|
|||
|
||||
pgd_t *pgd_alloc(struct mm_struct *mm)
|
||||
{
|
||||
gfp_t gfp = GFP_PGTABLE_USER;
|
||||
|
||||
if (PGD_SIZE == PAGE_SIZE)
|
||||
return (pgd_t *)__get_free_page(PGALLOC_GFP);
|
||||
return (pgd_t *)__get_free_page(gfp);
|
||||
else
|
||||
return kmem_cache_alloc(pgd_cache, PGALLOC_GFP);
|
||||
return kmem_cache_alloc(pgd_cache, gfp);
|
||||
}
|
||||
|
||||
void pgd_free(struct mm_struct *mm, pgd_t *pgd)
|
||||
|
|
|
@ -8,6 +8,9 @@
|
|||
#include <linux/mm.h>
|
||||
#include <linux/sched.h>
|
||||
|
||||
#define __HAVE_ARCH_PTE_ALLOC_ONE_KERNEL
|
||||
#include <asm-generic/pgalloc.h> /* for pte_{alloc,free}_one */
|
||||
|
||||
static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd,
|
||||
pte_t *pte)
|
||||
{
|
||||
|
@ -39,33 +42,6 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
|
|||
return pte;
|
||||
}
|
||||
|
||||
static inline struct page *pte_alloc_one(struct mm_struct *mm)
|
||||
{
|
||||
struct page *pte;
|
||||
|
||||
pte = alloc_pages(GFP_KERNEL | __GFP_ZERO, 0);
|
||||
if (!pte)
|
||||
return NULL;
|
||||
|
||||
if (!pgtable_page_ctor(pte)) {
|
||||
__free_page(pte);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return pte;
|
||||
}
|
||||
|
||||
static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
|
||||
{
|
||||
free_pages((unsigned long)pte, PTE_ORDER);
|
||||
}
|
||||
|
||||
static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
|
||||
{
|
||||
pgtable_page_dtor(pte);
|
||||
__free_pages(pte, PTE_ORDER);
|
||||
}
|
||||
|
||||
static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
|
||||
{
|
||||
free_pages((unsigned long)pgd, PGD_ORDER);
|
||||
|
|
|
@ -13,55 +13,18 @@
|
|||
|
||||
#include <asm/tlb.h>
|
||||
|
||||
#include <asm-generic/pgalloc.h> /* for pte_{alloc,free}_one */
|
||||
|
||||
extern const char bad_pmd_string[];
|
||||
|
||||
#define pmd_alloc_one(mm,address) ({ BUG(); ((pmd_t *)2); })
|
||||
|
||||
|
||||
static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
|
||||
{
|
||||
free_page((unsigned long) pte);
|
||||
}
|
||||
|
||||
static inline void pte_free(struct mm_struct *mm, pgtable_t page)
|
||||
{
|
||||
pgtable_page_dtor(page);
|
||||
__free_page(page);
|
||||
}
|
||||
|
||||
#define __pte_free_tlb(tlb,pte,addr) \
|
||||
do { \
|
||||
pgtable_page_dtor(pte); \
|
||||
tlb_remove_page((tlb), pte); \
|
||||
} while (0)
|
||||
|
||||
static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
|
||||
{
|
||||
unsigned long page = __get_free_page(GFP_KERNEL);
|
||||
|
||||
if (!page)
|
||||
return NULL;
|
||||
|
||||
memset((void *)page, 0, PAGE_SIZE);
|
||||
return (pte_t *) (page);
|
||||
}
|
||||
|
||||
static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
|
||||
{
|
||||
struct page *page = alloc_pages(GFP_KERNEL, 0);
|
||||
|
||||
if (page == NULL)
|
||||
return NULL;
|
||||
|
||||
clear_highpage(page);
|
||||
if (!pgtable_page_ctor(page)) {
|
||||
__free_page(page);
|
||||
return NULL;
|
||||
}
|
||||
return page;
|
||||
|
||||
}
|
||||
|
||||
static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte)
|
||||
{
|
||||
pmd_val(*pmd) = __pa((unsigned long)pte);
|
||||
|
|
|
@ -34,6 +34,7 @@ config MIPS
|
|||
select GENERIC_SCHED_CLOCK if !CAVIUM_OCTEON_SOC
|
||||
select GENERIC_SMP_IDLE_THREAD
|
||||
select GENERIC_TIME_VSYSCALL
|
||||
select GUP_GET_PTE_LOW_HIGH if CPU_MIPS32 && PHYS_ADDR_T_64BIT
|
||||
select HANDLE_DOMAIN_IRQ
|
||||
select HAVE_ARCH_COMPILER_H
|
||||
select HAVE_ARCH_JUMP_LABEL
|
||||
|
@ -52,6 +53,7 @@ config MIPS
|
|||
select HAVE_DMA_CONTIGUOUS
|
||||
select HAVE_DYNAMIC_FTRACE
|
||||
select HAVE_EXIT_THREAD
|
||||
select HAVE_FAST_GUP
|
||||
select HAVE_FTRACE_MCOUNT_RECORD
|
||||
select HAVE_FUNCTION_GRAPH_TRACER
|
||||
select HAVE_FUNCTION_TRACER
|
||||
|
|
|
@ -13,6 +13,8 @@
|
|||
#include <linux/mm.h>
|
||||
#include <linux/sched.h>
|
||||
|
||||
#include <asm-generic/pgalloc.h> /* for pte_{alloc,free}_one */
|
||||
|
||||
static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd,
|
||||
pte_t *pte)
|
||||
{
|
||||
|
@ -50,37 +52,6 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
|
|||
free_pages((unsigned long)pgd, PGD_ORDER);
|
||||
}
|
||||
|
||||
static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
|
||||
{
|
||||
return (pte_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, PTE_ORDER);
|
||||
}
|
||||
|
||||
static inline struct page *pte_alloc_one(struct mm_struct *mm)
|
||||
{
|
||||
struct page *pte;
|
||||
|
||||
pte = alloc_pages(GFP_KERNEL, PTE_ORDER);
|
||||
if (!pte)
|
||||
return NULL;
|
||||
clear_highpage(pte);
|
||||
if (!pgtable_page_ctor(pte)) {
|
||||
__free_page(pte);
|
||||
return NULL;
|
||||
}
|
||||
return pte;
|
||||
}
|
||||
|
||||
static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
|
||||
{
|
||||
free_pages((unsigned long)pte, PTE_ORDER);
|
||||
}
|
||||
|
||||
static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
|
||||
{
|
||||
pgtable_page_dtor(pte);
|
||||
__free_pages(pte, PTE_ORDER);
|
||||
}
|
||||
|
||||
#define __pte_free_tlb(tlb,pte,address) \
|
||||
do { \
|
||||
pgtable_page_dtor(pte); \
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
#include <asm/cmpxchg.h>
|
||||
#include <asm/io.h>
|
||||
#include <asm/pgtable-bits.h>
|
||||
#include <asm/cpu-features.h>
|
||||
|
||||
struct mm_struct;
|
||||
struct vm_area_struct;
|
||||
|
@ -626,6 +627,8 @@ static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
|
|||
|
||||
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
|
||||
|
||||
#define gup_fast_permitted(start, end) (!cpu_has_dc_aliases)
|
||||
|
||||
#include <asm-generic/pgtable.h>
|
||||
|
||||
/*
|
||||
|
|
|
@ -7,7 +7,6 @@ obj-y += cache.o
|
|||
obj-y += context.o
|
||||
obj-y += extable.o
|
||||
obj-y += fault.o
|
||||
obj-y += gup.o
|
||||
obj-y += init.o
|
||||
obj-y += mmap.o
|
||||
obj-y += page.o
|
||||
|
|
|
@ -1,303 +0,0 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Lockless get_user_pages_fast for MIPS
|
||||
*
|
||||
* Copyright (C) 2008 Nick Piggin
|
||||
* Copyright (C) 2008 Novell Inc.
|
||||
* Copyright (C) 2011 Ralf Baechle
|
||||
*/
|
||||
#include <linux/sched.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/vmstat.h>
|
||||
#include <linux/highmem.h>
|
||||
#include <linux/swap.h>
|
||||
#include <linux/hugetlb.h>
|
||||
|
||||
#include <asm/cpu-features.h>
|
||||
#include <asm/pgtable.h>
|
||||
|
||||
static inline pte_t gup_get_pte(pte_t *ptep)
|
||||
{
|
||||
#if defined(CONFIG_PHYS_ADDR_T_64BIT) && defined(CONFIG_CPU_MIPS32)
|
||||
pte_t pte;
|
||||
|
||||
retry:
|
||||
pte.pte_low = ptep->pte_low;
|
||||
smp_rmb();
|
||||
pte.pte_high = ptep->pte_high;
|
||||
smp_rmb();
|
||||
if (unlikely(pte.pte_low != ptep->pte_low))
|
||||
goto retry;
|
||||
|
||||
return pte;
|
||||
#else
|
||||
return READ_ONCE(*ptep);
|
||||
#endif
|
||||
}
|
||||
|
||||
static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
|
||||
int write, struct page **pages, int *nr)
|
||||
{
|
||||
pte_t *ptep = pte_offset_map(&pmd, addr);
|
||||
do {
|
||||
pte_t pte = gup_get_pte(ptep);
|
||||
struct page *page;
|
||||
|
||||
if (!pte_present(pte) ||
|
||||
pte_special(pte) || (write && !pte_write(pte))) {
|
||||
pte_unmap(ptep);
|
||||
return 0;
|
||||
}
|
||||
VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
|
||||
page = pte_page(pte);
|
||||
get_page(page);
|
||||
SetPageReferenced(page);
|
||||
pages[*nr] = page;
|
||||
(*nr)++;
|
||||
|
||||
} while (ptep++, addr += PAGE_SIZE, addr != end);
|
||||
|
||||
pte_unmap(ptep - 1);
|
||||
return 1;
|
||||
}
|
||||
|
||||
static inline void get_head_page_multiple(struct page *page, int nr)
|
||||
{
|
||||
VM_BUG_ON(page != compound_head(page));
|
||||
VM_BUG_ON(page_count(page) == 0);
|
||||
page_ref_add(page, nr);
|
||||
SetPageReferenced(page);
|
||||
}
|
||||
|
||||
static int gup_huge_pmd(pmd_t pmd, unsigned long addr, unsigned long end,
|
||||
int write, struct page **pages, int *nr)
|
||||
{
|
||||
pte_t pte = *(pte_t *)&pmd;
|
||||
struct page *head, *page;
|
||||
int refs;
|
||||
|
||||
if (write && !pte_write(pte))
|
||||
return 0;
|
||||
/* hugepages are never "special" */
|
||||
VM_BUG_ON(pte_special(pte));
|
||||
VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
|
||||
|
||||
refs = 0;
|
||||
head = pte_page(pte);
|
||||
page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
|
||||
do {
|
||||
VM_BUG_ON(compound_head(page) != head);
|
||||
pages[*nr] = page;
|
||||
(*nr)++;
|
||||
page++;
|
||||
refs++;
|
||||
} while (addr += PAGE_SIZE, addr != end);
|
||||
|
||||
get_head_page_multiple(head, refs);
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
|
||||
int write, struct page **pages, int *nr)
|
||||
{
|
||||
unsigned long next;
|
||||
pmd_t *pmdp;
|
||||
|
||||
pmdp = pmd_offset(&pud, addr);
|
||||
do {
|
||||
pmd_t pmd = *pmdp;
|
||||
|
||||
next = pmd_addr_end(addr, end);
|
||||
if (pmd_none(pmd))
|
||||
return 0;
|
||||
if (unlikely(pmd_huge(pmd))) {
|
||||
if (!gup_huge_pmd(pmd, addr, next, write, pages,nr))
|
||||
return 0;
|
||||
} else {
|
||||
if (!gup_pte_range(pmd, addr, next, write, pages,nr))
|
||||
return 0;
|
||||
}
|
||||
} while (pmdp++, addr = next, addr != end);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int gup_huge_pud(pud_t pud, unsigned long addr, unsigned long end,
|
||||
int write, struct page **pages, int *nr)
|
||||
{
|
||||
pte_t pte = *(pte_t *)&pud;
|
||||
struct page *head, *page;
|
||||
int refs;
|
||||
|
||||
if (write && !pte_write(pte))
|
||||
return 0;
|
||||
/* hugepages are never "special" */
|
||||
VM_BUG_ON(pte_special(pte));
|
||||
VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
|
||||
|
||||
refs = 0;
|
||||
head = pte_page(pte);
|
||||
page = head + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
|
||||
do {
|
||||
VM_BUG_ON(compound_head(page) != head);
|
||||
pages[*nr] = page;
|
||||
(*nr)++;
|
||||
page++;
|
||||
refs++;
|
||||
} while (addr += PAGE_SIZE, addr != end);
|
||||
|
||||
get_head_page_multiple(head, refs);
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end,
|
||||
int write, struct page **pages, int *nr)
|
||||
{
|
||||
unsigned long next;
|
||||
pud_t *pudp;
|
||||
|
||||
pudp = pud_offset(&pgd, addr);
|
||||
do {
|
||||
pud_t pud = *pudp;
|
||||
|
||||
next = pud_addr_end(addr, end);
|
||||
if (pud_none(pud))
|
||||
return 0;
|
||||
if (unlikely(pud_huge(pud))) {
|
||||
if (!gup_huge_pud(pud, addr, next, write, pages,nr))
|
||||
return 0;
|
||||
} else {
|
||||
if (!gup_pmd_range(pud, addr, next, write, pages,nr))
|
||||
return 0;
|
||||
}
|
||||
} while (pudp++, addr = next, addr != end);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Like get_user_pages_fast() except its IRQ-safe in that it won't fall
|
||||
* back to the regular GUP.
|
||||
* Note a difference with get_user_pages_fast: this always returns the
|
||||
* number of pages pinned, 0 if no pages were pinned.
|
||||
*/
|
||||
int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
|
||||
struct page **pages)
|
||||
{
|
||||
struct mm_struct *mm = current->mm;
|
||||
unsigned long addr, len, end;
|
||||
unsigned long next;
|
||||
unsigned long flags;
|
||||
pgd_t *pgdp;
|
||||
int nr = 0;
|
||||
|
||||
start &= PAGE_MASK;
|
||||
addr = start;
|
||||
len = (unsigned long) nr_pages << PAGE_SHIFT;
|
||||
end = start + len;
|
||||
if (unlikely(!access_ok((void __user *)start, len)))
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* XXX: batch / limit 'nr', to avoid large irq off latency
|
||||
* needs some instrumenting to determine the common sizes used by
|
||||
* important workloads (eg. DB2), and whether limiting the batch
|
||||
* size will decrease performance.
|
||||
*
|
||||
* It seems like we're in the clear for the moment. Direct-IO is
|
||||
* the main guy that batches up lots of get_user_pages, and even
|
||||
* they are limited to 64-at-a-time which is not so many.
|
||||
*/
|
||||
/*
|
||||
* This doesn't prevent pagetable teardown, but does prevent
|
||||
* the pagetables and pages from being freed.
|
||||
*
|
||||
* So long as we atomically load page table pointers versus teardown,
|
||||
* we can follow the address down to the page and take a ref on it.
|
||||
*/
|
||||
local_irq_save(flags);
|
||||
pgdp = pgd_offset(mm, addr);
|
||||
do {
|
||||
pgd_t pgd = *pgdp;
|
||||
|
||||
next = pgd_addr_end(addr, end);
|
||||
if (pgd_none(pgd))
|
||||
break;
|
||||
if (!gup_pud_range(pgd, addr, next, write, pages, &nr))
|
||||
break;
|
||||
} while (pgdp++, addr = next, addr != end);
|
||||
local_irq_restore(flags);
|
||||
|
||||
return nr;
|
||||
}
|
||||
|
||||
/**
|
||||
* get_user_pages_fast() - pin user pages in memory
|
||||
* @start: starting user address
|
||||
* @nr_pages: number of pages from start to pin
|
||||
* @gup_flags: flags modifying pin behaviour
|
||||
* @pages: array that receives pointers to the pages pinned.
|
||||
* Should be at least nr_pages long.
|
||||
*
|
||||
* Attempt to pin user pages in memory without taking mm->mmap_sem.
|
||||
* If not successful, it will fall back to taking the lock and
|
||||
* calling get_user_pages().
|
||||
*
|
||||
* Returns number of pages pinned. This may be fewer than the number
|
||||
* requested. If nr_pages is 0 or negative, returns 0. If no pages
|
||||
* were pinned, returns -errno.
|
||||
*/
|
||||
int get_user_pages_fast(unsigned long start, int nr_pages,
|
||||
unsigned int gup_flags, struct page **pages)
|
||||
{
|
||||
struct mm_struct *mm = current->mm;
|
||||
unsigned long addr, len, end;
|
||||
unsigned long next;
|
||||
pgd_t *pgdp;
|
||||
int ret, nr = 0;
|
||||
|
||||
start &= PAGE_MASK;
|
||||
addr = start;
|
||||
len = (unsigned long) nr_pages << PAGE_SHIFT;
|
||||
|
||||
end = start + len;
|
||||
if (end < start || cpu_has_dc_aliases)
|
||||
goto slow_irqon;
|
||||
|
||||
/* XXX: batch / limit 'nr' */
|
||||
local_irq_disable();
|
||||
pgdp = pgd_offset(mm, addr);
|
||||
do {
|
||||
pgd_t pgd = *pgdp;
|
||||
|
||||
next = pgd_addr_end(addr, end);
|
||||
if (pgd_none(pgd))
|
||||
goto slow;
|
||||
if (!gup_pud_range(pgd, addr, next, gup_flags & FOLL_WRITE,
|
||||
pages, &nr))
|
||||
goto slow;
|
||||
} while (pgdp++, addr = next, addr != end);
|
||||
local_irq_enable();
|
||||
|
||||
VM_BUG_ON(nr != (end - start) >> PAGE_SHIFT);
|
||||
return nr;
|
||||
slow:
|
||||
local_irq_enable();
|
||||
|
||||
slow_irqon:
|
||||
/* Try to get the remaining pages with get_user_pages */
|
||||
start += nr << PAGE_SHIFT;
|
||||
pages += nr;
|
||||
|
||||
ret = get_user_pages_unlocked(start, (end - start) >> PAGE_SHIFT,
|
||||
pages, gup_flags);
|
||||
|
||||
/* Have to be a bit careful with return values */
|
||||
if (nr > 0) {
|
||||
if (ret < 0)
|
||||
ret = nr;
|
||||
else
|
||||
ret += nr;
|
||||
}
|
||||
return ret;
|
||||
}
|
|
@ -9,6 +9,9 @@
|
|||
#include <asm/tlbflush.h>
|
||||
#include <asm/proc-fns.h>
|
||||
|
||||
#define __HAVE_ARCH_PTE_ALLOC_ONE
|
||||
#include <asm-generic/pgalloc.h> /* for pte_{alloc,free}_one */
|
||||
|
||||
/*
|
||||
* Since we have only two-level page tables, these are trivial
|
||||
*/
|
||||
|
@ -22,43 +25,17 @@ extern void pgd_free(struct mm_struct *mm, pgd_t * pgd);
|
|||
|
||||
#define check_pgt_cache() do { } while (0)
|
||||
|
||||
static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
|
||||
{
|
||||
pte_t *pte;
|
||||
|
||||
pte =
|
||||
(pte_t *) __get_free_page(GFP_KERNEL | __GFP_RETRY_MAYFAIL |
|
||||
__GFP_ZERO);
|
||||
|
||||
return pte;
|
||||
}
|
||||
|
||||
static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
|
||||
{
|
||||
pgtable_t pte;
|
||||
|
||||
pte = alloc_pages(GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_ZERO, 0);
|
||||
pte = __pte_alloc_one(mm, GFP_PGTABLE_USER);
|
||||
if (pte)
|
||||
cpu_dcache_wb_page((unsigned long)page_address(pte));
|
||||
|
||||
return pte;
|
||||
}
|
||||
|
||||
/*
|
||||
* Free one PTE table.
|
||||
*/
|
||||
static inline void pte_free_kernel(struct mm_struct *mm, pte_t * pte)
|
||||
{
|
||||
if (pte) {
|
||||
free_page((unsigned long)pte);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
|
||||
{
|
||||
__free_page(pte);
|
||||
}
|
||||
|
||||
/*
|
||||
* Populate the pmdp entry with a pointer to the pte. This pmd is part
|
||||
* of the mm address space.
|
||||
|
|
|
@ -12,6 +12,8 @@
|
|||
|
||||
#include <linux/mm.h>
|
||||
|
||||
#include <asm-generic/pgalloc.h> /* for pte_{alloc,free}_one */
|
||||
|
||||
static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd,
|
||||
pte_t *pte)
|
||||
{
|
||||
|
@ -37,41 +39,6 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
|
|||
free_pages((unsigned long)pgd, PGD_ORDER);
|
||||
}
|
||||
|
||||
static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
|
||||
{
|
||||
pte_t *pte;
|
||||
|
||||
pte = (pte_t *) __get_free_pages(GFP_KERNEL|__GFP_ZERO, PTE_ORDER);
|
||||
|
||||
return pte;
|
||||
}
|
||||
|
||||
static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
|
||||
{
|
||||
struct page *pte;
|
||||
|
||||
pte = alloc_pages(GFP_KERNEL, PTE_ORDER);
|
||||
if (pte) {
|
||||
if (!pgtable_page_ctor(pte)) {
|
||||
__free_page(pte);
|
||||
return NULL;
|
||||
}
|
||||
clear_highpage(pte);
|
||||
}
|
||||
return pte;
|
||||
}
|
||||
|
||||
static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
|
||||
{
|
||||
free_pages((unsigned long)pte, PTE_ORDER);
|
||||
}
|
||||
|
||||
static inline void pte_free(struct mm_struct *mm, struct page *pte)
|
||||
{
|
||||
pgtable_page_dtor(pte);
|
||||
__free_pages(pte, PTE_ORDER);
|
||||
}
|
||||
|
||||
#define __pte_free_tlb(tlb, pte, addr) \
|
||||
do { \
|
||||
pgtable_page_dtor(pte); \
|
||||
|
|
|
@ -10,6 +10,8 @@
|
|||
|
||||
#include <asm/cache.h>
|
||||
|
||||
#include <asm-generic/pgalloc.h> /* for pte_{alloc,free}_one */
|
||||
|
||||
/* Allocate the top level pgd (page directory)
|
||||
*
|
||||
* Here (for 64 bit kernels) we implement a Hybrid L2/L3 scheme: we
|
||||
|
@ -122,37 +124,6 @@ pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte)
|
|||
pmd_populate_kernel(mm, pmd, page_address(pte_page))
|
||||
#define pmd_pgtable(pmd) pmd_page(pmd)
|
||||
|
||||
static inline pgtable_t
|
||||
pte_alloc_one(struct mm_struct *mm)
|
||||
{
|
||||
struct page *page = alloc_page(GFP_KERNEL|__GFP_ZERO);
|
||||
if (!page)
|
||||
return NULL;
|
||||
if (!pgtable_page_ctor(page)) {
|
||||
__free_page(page);
|
||||
return NULL;
|
||||
}
|
||||
return page;
|
||||
}
|
||||
|
||||
static inline pte_t *
|
||||
pte_alloc_one_kernel(struct mm_struct *mm)
|
||||
{
|
||||
pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
|
||||
return pte;
|
||||
}
|
||||
|
||||
static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
|
||||
{
|
||||
free_page((unsigned long)pte);
|
||||
}
|
||||
|
||||
static inline void pte_free(struct mm_struct *mm, struct page *pte)
|
||||
{
|
||||
pgtable_page_dtor(pte);
|
||||
pte_free_kernel(mm, page_address(pte));
|
||||
}
|
||||
|
||||
#define check_pgt_cache() do { } while (0)
|
||||
|
||||
#endif
|
||||
|
|
|
@ -125,6 +125,7 @@ config PPC
|
|||
select ARCH_HAS_FORTIFY_SOURCE
|
||||
select ARCH_HAS_GCOV_PROFILE_ALL
|
||||
select ARCH_HAS_KCOV
|
||||
select ARCH_HAS_HUGEPD if HUGETLB_PAGE
|
||||
select ARCH_HAS_MMIOWB if PPC64
|
||||
select ARCH_HAS_PHYS_TO_DMA
|
||||
select ARCH_HAS_PMEM_API if PPC64
|
||||
|
@ -185,12 +186,12 @@ config PPC
|
|||
select HAVE_DYNAMIC_FTRACE_WITH_REGS if MPROFILE_KERNEL
|
||||
select HAVE_EBPF_JIT if PPC64
|
||||
select HAVE_EFFICIENT_UNALIGNED_ACCESS if !(CPU_LITTLE_ENDIAN && POWER7_CPU)
|
||||
select HAVE_FAST_GUP
|
||||
select HAVE_FTRACE_MCOUNT_RECORD
|
||||
select HAVE_FUNCTION_ERROR_INJECTION
|
||||
select HAVE_FUNCTION_GRAPH_TRACER
|
||||
select HAVE_FUNCTION_TRACER
|
||||
select HAVE_GCC_PLUGINS if GCC_VERSION >= 50200 # plugin support on gcc <= 5.1 is buggy on PPC
|
||||
select HAVE_GENERIC_GUP
|
||||
select HAVE_HW_BREAKPOINT if PERF_EVENTS && (PPC_BOOK3S || PPC_8xx)
|
||||
select HAVE_IDE
|
||||
select HAVE_IOREMAP_PROT
|
||||
|
|
|
@ -140,6 +140,20 @@ static inline void pte_frag_set(mm_context_t *ctx, void *p)
|
|||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_PPC64
|
||||
#define is_ioremap_addr is_ioremap_addr
|
||||
static inline bool is_ioremap_addr(const void *x)
|
||||
{
|
||||
#ifdef CONFIG_MMU
|
||||
unsigned long addr = (unsigned long)x;
|
||||
|
||||
return addr >= IOREMAP_BASE && addr < IOREMAP_END;
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
#endif /* CONFIG_PPC64 */
|
||||
|
||||
#endif /* __ASSEMBLY__ */
|
||||
|
||||
#endif /* _ASM_POWERPC_PGTABLE_H */
|
||||
|
|
|
@ -827,7 +827,7 @@ static noinline int kvmppc_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
|
|||
*
|
||||
* Note: If EOI is incorrectly used by SW to lower the CPPR
|
||||
* value (ie more favored), we do not check for rejection of
|
||||
* a pending interrupt, this is a SW error and PAPR sepcifies
|
||||
* a pending interrupt, this is a SW error and PAPR specifies
|
||||
* that we don't have to deal with it.
|
||||
*
|
||||
* The sending of an EOI to the ICS is handled after the
|
||||
|
|
|
@ -511,13 +511,6 @@ retry:
|
|||
return page;
|
||||
}
|
||||
|
||||
static unsigned long hugepte_addr_end(unsigned long addr, unsigned long end,
|
||||
unsigned long sz)
|
||||
{
|
||||
unsigned long __boundary = (addr + sz) & ~(sz-1);
|
||||
return (__boundary - 1 < end - 1) ? __boundary : end;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PPC_MM_SLICES
|
||||
unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
|
||||
unsigned long len, unsigned long pgoff,
|
||||
|
@ -665,68 +658,3 @@ void flush_dcache_icache_hugepage(struct page *page)
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
static int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
|
||||
unsigned long end, int write, struct page **pages, int *nr)
|
||||
{
|
||||
unsigned long pte_end;
|
||||
struct page *head, *page;
|
||||
pte_t pte;
|
||||
int refs;
|
||||
|
||||
pte_end = (addr + sz) & ~(sz-1);
|
||||
if (pte_end < end)
|
||||
end = pte_end;
|
||||
|
||||
pte = READ_ONCE(*ptep);
|
||||
|
||||
if (!pte_access_permitted(pte, write))
|
||||
return 0;
|
||||
|
||||
/* hugepages are never "special" */
|
||||
VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
|
||||
|
||||
refs = 0;
|
||||
head = pte_page(pte);
|
||||
|
||||
page = head + ((addr & (sz-1)) >> PAGE_SHIFT);
|
||||
do {
|
||||
VM_BUG_ON(compound_head(page) != head);
|
||||
pages[*nr] = page;
|
||||
(*nr)++;
|
||||
page++;
|
||||
refs++;
|
||||
} while (addr += PAGE_SIZE, addr != end);
|
||||
|
||||
if (!page_cache_add_speculative(head, refs)) {
|
||||
*nr -= refs;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (unlikely(pte_val(pte) != pte_val(*ptep))) {
|
||||
/* Could be optimized better */
|
||||
*nr -= refs;
|
||||
while (refs--)
|
||||
put_page(head);
|
||||
return 0;
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
int gup_huge_pd(hugepd_t hugepd, unsigned long addr, unsigned int pdshift,
|
||||
unsigned long end, int write, struct page **pages, int *nr)
|
||||
{
|
||||
pte_t *ptep;
|
||||
unsigned long sz = 1UL << hugepd_shift(hugepd);
|
||||
unsigned long next;
|
||||
|
||||
ptep = hugepte_offset(hugepd, addr, pdshift);
|
||||
do {
|
||||
next = hugepte_addr_end(addr, end, sz);
|
||||
if (!gup_hugepte(ptep, sz, addr, end, write, pages, nr))
|
||||
return 0;
|
||||
} while (ptep++, addr = next, addr != end);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
|
|
@ -10,6 +10,8 @@
|
|||
#include <linux/mm.h>
|
||||
#include <asm/tlb.h>
|
||||
|
||||
#include <asm-generic/pgalloc.h> /* for pte_{alloc,free}_one */
|
||||
|
||||
static inline void pmd_populate_kernel(struct mm_struct *mm,
|
||||
pmd_t *pmd, pte_t *pte)
|
||||
{
|
||||
|
@ -74,33 +76,6 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
|
|||
|
||||
#endif /* __PAGETABLE_PMD_FOLDED */
|
||||
|
||||
static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
|
||||
{
|
||||
return (pte_t *)__get_free_page(
|
||||
GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_ZERO);
|
||||
}
|
||||
|
||||
static inline struct page *pte_alloc_one(struct mm_struct *mm)
|
||||
{
|
||||
struct page *pte;
|
||||
|
||||
pte = alloc_page(GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_ZERO);
|
||||
if (likely(pte != NULL))
|
||||
pgtable_page_ctor(pte);
|
||||
return pte;
|
||||
}
|
||||
|
||||
static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
|
||||
{
|
||||
free_page((unsigned long)pte);
|
||||
}
|
||||
|
||||
static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
|
||||
{
|
||||
pgtable_page_dtor(pte);
|
||||
__free_page(pte);
|
||||
}
|
||||
|
||||
#define __pte_free_tlb(tlb, pte, buf) \
|
||||
do { \
|
||||
pgtable_page_dtor(pte); \
|
||||
|
|
|
@ -139,6 +139,7 @@ config S390
|
|||
select HAVE_DMA_CONTIGUOUS
|
||||
select HAVE_DYNAMIC_FTRACE
|
||||
select HAVE_DYNAMIC_FTRACE_WITH_REGS
|
||||
select HAVE_FAST_GUP
|
||||
select HAVE_EFFICIENT_UNALIGNED_ACCESS
|
||||
select HAVE_FENTRY
|
||||
select HAVE_FTRACE_MCOUNT_RECORD
|
||||
|
@ -146,7 +147,6 @@ config S390
|
|||
select HAVE_FUNCTION_TRACER
|
||||
select HAVE_FUTEX_CMPXCHG if FUTEX
|
||||
select HAVE_GCC_PLUGINS
|
||||
select HAVE_GENERIC_GUP
|
||||
select HAVE_KERNEL_BZIP2
|
||||
select HAVE_KERNEL_GZIP
|
||||
select HAVE_KERNEL_LZ4
|
||||
|
@ -641,9 +641,6 @@ config ARCH_SPARSEMEM_ENABLE
|
|||
config ARCH_SPARSEMEM_DEFAULT
|
||||
def_bool y
|
||||
|
||||
config ARCH_SELECT_MEMORY_MODEL
|
||||
def_bool y
|
||||
|
||||
config ARCH_ENABLE_MEMORY_HOTPLUG
|
||||
def_bool y if SPARSEMEM
|
||||
|
||||
|
|
|
@ -1270,14 +1270,8 @@ static inline pte_t *pte_offset(pmd_t *pmd, unsigned long address)
|
|||
#define pte_offset_map(pmd, address) pte_offset_kernel(pmd, address)
|
||||
#define pte_unmap(pte) do { } while (0)
|
||||
|
||||
static inline bool gup_fast_permitted(unsigned long start, int nr_pages)
|
||||
static inline bool gup_fast_permitted(unsigned long start, unsigned long end)
|
||||
{
|
||||
unsigned long len, end;
|
||||
|
||||
len = (unsigned long) nr_pages << PAGE_SHIFT;
|
||||
end = start + len;
|
||||
if (end < start)
|
||||
return false;
|
||||
return end <= current->mm->context.asce_limit;
|
||||
}
|
||||
#define gup_fast_permitted gup_fast_permitted
|
||||
|
|
|
@ -15,6 +15,7 @@ config SUPERH
|
|||
select HAVE_ARCH_TRACEHOOK
|
||||
select HAVE_PERF_EVENTS
|
||||
select HAVE_DEBUG_BUGVERBOSE
|
||||
select HAVE_FAST_GUP if MMU
|
||||
select ARCH_HAVE_CUSTOM_GPIO_H
|
||||
select ARCH_HAVE_NMI_SAFE_CMPXCHG if (GUSA_RB || CPU_SH4A)
|
||||
select ARCH_HAS_GCOV_PROFILE_ALL
|
||||
|
@ -64,6 +65,7 @@ config SUPERH
|
|||
config SUPERH32
|
||||
def_bool "$(ARCH)" = "sh"
|
||||
select ARCH_32BIT_OFF_T
|
||||
select GUP_GET_PTE_LOW_HIGH if X2TLB
|
||||
select HAVE_KPROBES
|
||||
select HAVE_KRETPROBES
|
||||
select HAVE_IOREMAP_PROT if MMU && !X2TLB
|
||||
|
|
|
@ -40,7 +40,6 @@ CONFIG_FB=y
|
|||
CONFIG_FIRMWARE_EDID=y
|
||||
CONFIG_FB_HIT=y
|
||||
CONFIG_FB_SH_MOBILE_LCDC=y
|
||||
CONFIG_BACKLIGHT_LCD_SUPPORT=y
|
||||
CONFIG_FRAMEBUFFER_CONSOLE=y
|
||||
CONFIG_FONTS=y
|
||||
CONFIG_FONT_PEARL_8x8=y
|
||||
|
|
|
@ -191,7 +191,6 @@ CONFIG_CONFIGFS_FS=y
|
|||
CONFIG_JFFS2_FS=m
|
||||
CONFIG_JFFS2_FS_XATTR=y
|
||||
CONFIG_UBIFS_FS=m
|
||||
CONFIG_LOGFS=m
|
||||
CONFIG_CRAMFS=m
|
||||
CONFIG_SQUASHFS=m
|
||||
CONFIG_ROMFS_FS=m
|
||||
|
|
|
@ -85,7 +85,6 @@ CONFIG_WATCHDOG=y
|
|||
CONFIG_SH_WDT=y
|
||||
CONFIG_SSB=y
|
||||
CONFIG_FB=y
|
||||
CONFIG_BACKLIGHT_LCD_SUPPORT=y
|
||||
# CONFIG_LCD_CLASS_DEVICE is not set
|
||||
CONFIG_FRAMEBUFFER_CONSOLE=y
|
||||
CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y
|
||||
|
|
|
@ -369,7 +369,11 @@ static inline int iounmap_fixed(void __iomem *addr) { return -EINVAL; }
|
|||
|
||||
#define ioremap_nocache ioremap
|
||||
#define ioremap_uc ioremap
|
||||
#define iounmap __iounmap
|
||||
|
||||
static inline void iounmap(void __iomem *addr)
|
||||
{
|
||||
__iounmap(addr);
|
||||
}
|
||||
|
||||
/*
|
||||
* Convert a physical pointer to a virtual kernel pointer for /dev/mem
|
||||
|
|
|
@ -38,6 +38,9 @@ static inline unsigned long pud_page_vaddr(pud_t pud)
|
|||
return pud_val(pud);
|
||||
}
|
||||
|
||||
/* only used by the stubbed out hugetlb gup code, should never be called */
|
||||
#define pud_page(pud) NULL
|
||||
|
||||
#define pmd_index(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1))
|
||||
static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address)
|
||||
{
|
||||
|
|
|
@ -149,6 +149,43 @@ extern void paging_init(void);
|
|||
extern void page_table_range_init(unsigned long start, unsigned long end,
|
||||
pgd_t *pgd);
|
||||
|
||||
static inline bool __pte_access_permitted(pte_t pte, u64 prot)
|
||||
{
|
||||
return (pte_val(pte) & (prot | _PAGE_SPECIAL)) == prot;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X2TLB
|
||||
static inline bool pte_access_permitted(pte_t pte, bool write)
|
||||
{
|
||||
u64 prot = _PAGE_PRESENT;
|
||||
|
||||
prot |= _PAGE_EXT(_PAGE_EXT_KERN_READ | _PAGE_EXT_USER_READ);
|
||||
if (write)
|
||||
prot |= _PAGE_EXT(_PAGE_EXT_KERN_WRITE | _PAGE_EXT_USER_WRITE);
|
||||
return __pte_access_permitted(pte, prot);
|
||||
}
|
||||
#elif defined(CONFIG_SUPERH64)
|
||||
static inline bool pte_access_permitted(pte_t pte, bool write)
|
||||
{
|
||||
u64 prot = _PAGE_PRESENT | _PAGE_USER | _PAGE_READ;
|
||||
|
||||
if (write)
|
||||
prot |= _PAGE_WRITE;
|
||||
return __pte_access_permitted(pte, prot);
|
||||
}
|
||||
#else
|
||||
static inline bool pte_access_permitted(pte_t pte, bool write)
|
||||
{
|
||||
u64 prot = _PAGE_PRESENT | _PAGE_USER;
|
||||
|
||||
if (write)
|
||||
prot |= _PAGE_RW;
|
||||
return __pte_access_permitted(pte, prot);
|
||||
}
|
||||
#endif
|
||||
|
||||
#define pte_access_permitted pte_access_permitted
|
||||
|
||||
/* arch/sh/mm/mmap.c */
|
||||
#define HAVE_ARCH_UNMAPPED_AREA
|
||||
#define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
|
||||
|
|
|
@ -17,7 +17,7 @@ cacheops-$(CONFIG_CPU_SHX3) += cache-shx3.o
|
|||
obj-y += $(cacheops-y)
|
||||
|
||||
mmu-y := nommu.o extable_32.o
|
||||
mmu-$(CONFIG_MMU) := extable_$(BITS).o fault.o gup.o ioremap.o kmap.o \
|
||||
mmu-$(CONFIG_MMU) := extable_$(BITS).o fault.o ioremap.o kmap.o \
|
||||
pgtable.o tlbex_$(BITS).o tlbflush_$(BITS).o
|
||||
|
||||
obj-y += $(mmu-y)
|
||||
|
|
277
arch/sh/mm/gup.c
277
arch/sh/mm/gup.c
|
@ -1,277 +0,0 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Lockless get_user_pages_fast for SuperH
|
||||
*
|
||||
* Copyright (C) 2009 - 2010 Paul Mundt
|
||||
*
|
||||
* Cloned from the x86 and PowerPC versions, by:
|
||||
*
|
||||
* Copyright (C) 2008 Nick Piggin
|
||||
* Copyright (C) 2008 Novell Inc.
|
||||
*/
|
||||
#include <linux/sched.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/vmstat.h>
|
||||
#include <linux/highmem.h>
|
||||
#include <asm/pgtable.h>
|
||||
|
||||
static inline pte_t gup_get_pte(pte_t *ptep)
|
||||
{
|
||||
#ifndef CONFIG_X2TLB
|
||||
return READ_ONCE(*ptep);
|
||||
#else
|
||||
/*
|
||||
* With get_user_pages_fast, we walk down the pagetables without
|
||||
* taking any locks. For this we would like to load the pointers
|
||||
* atomically, but that is not possible with 64-bit PTEs. What
|
||||
* we do have is the guarantee that a pte will only either go
|
||||
* from not present to present, or present to not present or both
|
||||
* -- it will not switch to a completely different present page
|
||||
* without a TLB flush in between; something that we are blocking
|
||||
* by holding interrupts off.
|
||||
*
|
||||
* Setting ptes from not present to present goes:
|
||||
* ptep->pte_high = h;
|
||||
* smp_wmb();
|
||||
* ptep->pte_low = l;
|
||||
*
|
||||
* And present to not present goes:
|
||||
* ptep->pte_low = 0;
|
||||
* smp_wmb();
|
||||
* ptep->pte_high = 0;
|
||||
*
|
||||
* We must ensure here that the load of pte_low sees l iff pte_high
|
||||
* sees h. We load pte_high *after* loading pte_low, which ensures we
|
||||
* don't see an older value of pte_high. *Then* we recheck pte_low,
|
||||
* which ensures that we haven't picked up a changed pte high. We might
|
||||
* have got rubbish values from pte_low and pte_high, but we are
|
||||
* guaranteed that pte_low will not have the present bit set *unless*
|
||||
* it is 'l'. And get_user_pages_fast only operates on present ptes, so
|
||||
* we're safe.
|
||||
*
|
||||
* gup_get_pte should not be used or copied outside gup.c without being
|
||||
* very careful -- it does not atomically load the pte or anything that
|
||||
* is likely to be useful for you.
|
||||
*/
|
||||
pte_t pte;
|
||||
|
||||
retry:
|
||||
pte.pte_low = ptep->pte_low;
|
||||
smp_rmb();
|
||||
pte.pte_high = ptep->pte_high;
|
||||
smp_rmb();
|
||||
if (unlikely(pte.pte_low != ptep->pte_low))
|
||||
goto retry;
|
||||
|
||||
return pte;
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
* The performance critical leaf functions are made noinline otherwise gcc
|
||||
* inlines everything into a single function which results in too much
|
||||
* register pressure.
|
||||
*/
|
||||
static noinline int gup_pte_range(pmd_t pmd, unsigned long addr,
|
||||
unsigned long end, int write, struct page **pages, int *nr)
|
||||
{
|
||||
u64 mask, result;
|
||||
pte_t *ptep;
|
||||
|
||||
#ifdef CONFIG_X2TLB
|
||||
result = _PAGE_PRESENT | _PAGE_EXT(_PAGE_EXT_KERN_READ | _PAGE_EXT_USER_READ);
|
||||
if (write)
|
||||
result |= _PAGE_EXT(_PAGE_EXT_KERN_WRITE | _PAGE_EXT_USER_WRITE);
|
||||
#elif defined(CONFIG_SUPERH64)
|
||||
result = _PAGE_PRESENT | _PAGE_USER | _PAGE_READ;
|
||||
if (write)
|
||||
result |= _PAGE_WRITE;
|
||||
#else
|
||||
result = _PAGE_PRESENT | _PAGE_USER;
|
||||
if (write)
|
||||
result |= _PAGE_RW;
|
||||
#endif
|
||||
|
||||
mask = result | _PAGE_SPECIAL;
|
||||
|
||||
ptep = pte_offset_map(&pmd, addr);
|
||||
do {
|
||||
pte_t pte = gup_get_pte(ptep);
|
||||
struct page *page;
|
||||
|
||||
if ((pte_val(pte) & mask) != result) {
|
||||
pte_unmap(ptep);
|
||||
return 0;
|
||||
}
|
||||
VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
|
||||
page = pte_page(pte);
|
||||
get_page(page);
|
||||
__flush_anon_page(page, addr);
|
||||
flush_dcache_page(page);
|
||||
pages[*nr] = page;
|
||||
(*nr)++;
|
||||
|
||||
} while (ptep++, addr += PAGE_SIZE, addr != end);
|
||||
pte_unmap(ptep - 1);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
|
||||
int write, struct page **pages, int *nr)
|
||||
{
|
||||
unsigned long next;
|
||||
pmd_t *pmdp;
|
||||
|
||||
pmdp = pmd_offset(&pud, addr);
|
||||
do {
|
||||
pmd_t pmd = *pmdp;
|
||||
|
||||
next = pmd_addr_end(addr, end);
|
||||
if (pmd_none(pmd))
|
||||
return 0;
|
||||
if (!gup_pte_range(pmd, addr, next, write, pages, nr))
|
||||
return 0;
|
||||
} while (pmdp++, addr = next, addr != end);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end,
|
||||
int write, struct page **pages, int *nr)
|
||||
{
|
||||
unsigned long next;
|
||||
pud_t *pudp;
|
||||
|
||||
pudp = pud_offset(&pgd, addr);
|
||||
do {
|
||||
pud_t pud = *pudp;
|
||||
|
||||
next = pud_addr_end(addr, end);
|
||||
if (pud_none(pud))
|
||||
return 0;
|
||||
if (!gup_pmd_range(pud, addr, next, write, pages, nr))
|
||||
return 0;
|
||||
} while (pudp++, addr = next, addr != end);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Like get_user_pages_fast() except its IRQ-safe in that it won't fall
|
||||
* back to the regular GUP.
|
||||
* Note a difference with get_user_pages_fast: this always returns the
|
||||
* number of pages pinned, 0 if no pages were pinned.
|
||||
*/
|
||||
int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
|
||||
struct page **pages)
|
||||
{
|
||||
struct mm_struct *mm = current->mm;
|
||||
unsigned long addr, len, end;
|
||||
unsigned long next;
|
||||
unsigned long flags;
|
||||
pgd_t *pgdp;
|
||||
int nr = 0;
|
||||
|
||||
start &= PAGE_MASK;
|
||||
addr = start;
|
||||
len = (unsigned long) nr_pages << PAGE_SHIFT;
|
||||
end = start + len;
|
||||
if (unlikely(!access_ok((void __user *)start, len)))
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* This doesn't prevent pagetable teardown, but does prevent
|
||||
* the pagetables and pages from being freed.
|
||||
*/
|
||||
local_irq_save(flags);
|
||||
pgdp = pgd_offset(mm, addr);
|
||||
do {
|
||||
pgd_t pgd = *pgdp;
|
||||
|
||||
next = pgd_addr_end(addr, end);
|
||||
if (pgd_none(pgd))
|
||||
break;
|
||||
if (!gup_pud_range(pgd, addr, next, write, pages, &nr))
|
||||
break;
|
||||
} while (pgdp++, addr = next, addr != end);
|
||||
local_irq_restore(flags);
|
||||
|
||||
return nr;
|
||||
}
|
||||
|
||||
/**
|
||||
* get_user_pages_fast() - pin user pages in memory
|
||||
* @start: starting user address
|
||||
* @nr_pages: number of pages from start to pin
|
||||
* @gup_flags: flags modifying pin behaviour
|
||||
* @pages: array that receives pointers to the pages pinned.
|
||||
* Should be at least nr_pages long.
|
||||
*
|
||||
* Attempt to pin user pages in memory without taking mm->mmap_sem.
|
||||
* If not successful, it will fall back to taking the lock and
|
||||
* calling get_user_pages().
|
||||
*
|
||||
* Returns number of pages pinned. This may be fewer than the number
|
||||
* requested. If nr_pages is 0 or negative, returns 0. If no pages
|
||||
* were pinned, returns -errno.
|
||||
*/
|
||||
int get_user_pages_fast(unsigned long start, int nr_pages,
|
||||
unsigned int gup_flags, struct page **pages)
|
||||
{
|
||||
struct mm_struct *mm = current->mm;
|
||||
unsigned long addr, len, end;
|
||||
unsigned long next;
|
||||
pgd_t *pgdp;
|
||||
int nr = 0;
|
||||
|
||||
start &= PAGE_MASK;
|
||||
addr = start;
|
||||
len = (unsigned long) nr_pages << PAGE_SHIFT;
|
||||
|
||||
end = start + len;
|
||||
if (end < start)
|
||||
goto slow_irqon;
|
||||
|
||||
local_irq_disable();
|
||||
pgdp = pgd_offset(mm, addr);
|
||||
do {
|
||||
pgd_t pgd = *pgdp;
|
||||
|
||||
next = pgd_addr_end(addr, end);
|
||||
if (pgd_none(pgd))
|
||||
goto slow;
|
||||
if (!gup_pud_range(pgd, addr, next, gup_flags & FOLL_WRITE,
|
||||
pages, &nr))
|
||||
goto slow;
|
||||
} while (pgdp++, addr = next, addr != end);
|
||||
local_irq_enable();
|
||||
|
||||
VM_BUG_ON(nr != (end - start) >> PAGE_SHIFT);
|
||||
return nr;
|
||||
|
||||
{
|
||||
int ret;
|
||||
|
||||
slow:
|
||||
local_irq_enable();
|
||||
slow_irqon:
|
||||
/* Try to get the remaining pages with get_user_pages */
|
||||
start += nr << PAGE_SHIFT;
|
||||
pages += nr;
|
||||
|
||||
ret = get_user_pages_unlocked(start,
|
||||
(end - start) >> PAGE_SHIFT, pages,
|
||||
gup_flags);
|
||||
|
||||
/* Have to be a bit careful with return values */
|
||||
if (nr > 0) {
|
||||
if (ret < 0)
|
||||
ret = nr;
|
||||
else
|
||||
ret += nr;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
}
|
|
@ -28,6 +28,7 @@ config SPARC
|
|||
select RTC_DRV_M48T59
|
||||
select RTC_SYSTOHC
|
||||
select HAVE_ARCH_JUMP_LABEL if SPARC64
|
||||
select HAVE_FAST_GUP if SPARC64
|
||||
select GENERIC_IRQ_SHOW
|
||||
select ARCH_WANT_IPC_PARSE_VERSION
|
||||
select GENERIC_PCI_IOMAP
|
||||
|
@ -300,9 +301,6 @@ config NODES_SPAN_OTHER_NODES
|
|||
def_bool y
|
||||
depends on NEED_MULTIPLE_NODES
|
||||
|
||||
config ARCH_SELECT_MEMORY_MODEL
|
||||
def_bool y if SPARC64
|
||||
|
||||
config ARCH_SPARSEMEM_ENABLE
|
||||
def_bool y if SPARC64
|
||||
select SPARSEMEM_VMEMMAP_ENABLE
|
||||
|
|
|
@ -864,6 +864,9 @@ static inline unsigned long pud_page_vaddr(pud_t pud)
|
|||
#define pgd_present(pgd) (pgd_val(pgd) != 0U)
|
||||
#define pgd_clear(pgdp) (pgd_val(*(pgdp)) = 0UL)
|
||||
|
||||
/* only used by the stubbed out hugetlb gup code, should never be called */
|
||||
#define pgd_page(pgd) NULL
|
||||
|
||||
static inline unsigned long pud_large(pud_t pud)
|
||||
{
|
||||
pte_t pte = __pte(pud_val(pud));
|
||||
|
@ -1075,6 +1078,46 @@ static inline int io_remap_pfn_range(struct vm_area_struct *vma,
|
|||
}
|
||||
#define io_remap_pfn_range io_remap_pfn_range
|
||||
|
||||
static inline unsigned long untagged_addr(unsigned long start)
|
||||
{
|
||||
if (adi_capable()) {
|
||||
long addr = start;
|
||||
|
||||
/* If userspace has passed a versioned address, kernel
|
||||
* will not find it in the VMAs since it does not store
|
||||
* the version tags in the list of VMAs. Storing version
|
||||
* tags in list of VMAs is impractical since they can be
|
||||
* changed any time from userspace without dropping into
|
||||
* kernel. Any address search in VMAs will be done with
|
||||
* non-versioned addresses. Ensure the ADI version bits
|
||||
* are dropped here by sign extending the last bit before
|
||||
* ADI bits. IOMMU does not implement version tags.
|
||||
*/
|
||||
return (addr << (long)adi_nbits()) >> (long)adi_nbits();
|
||||
}
|
||||
|
||||
return start;
|
||||
}
|
||||
#define untagged_addr untagged_addr
|
||||
|
||||
static inline bool pte_access_permitted(pte_t pte, bool write)
|
||||
{
|
||||
u64 prot;
|
||||
|
||||
if (tlb_type == hypervisor) {
|
||||
prot = _PAGE_PRESENT_4V | _PAGE_P_4V;
|
||||
if (write)
|
||||
prot |= _PAGE_WRITE_4V;
|
||||
} else {
|
||||
prot = _PAGE_PRESENT_4U | _PAGE_P_4U;
|
||||
if (write)
|
||||
prot |= _PAGE_WRITE_4U;
|
||||
}
|
||||
|
||||
return (pte_val(pte) & (prot | _PAGE_SPECIAL)) == prot;
|
||||
}
|
||||
#define pte_access_permitted pte_access_permitted
|
||||
|
||||
#include <asm/tlbflush.h>
|
||||
#include <asm-generic/pgtable.h>
|
||||
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
asflags-y := -ansi
|
||||
ccflags-y := -Werror
|
||||
|
||||
obj-$(CONFIG_SPARC64) += ultra.o tlb.o tsb.o gup.o
|
||||
obj-$(CONFIG_SPARC64) += ultra.o tlb.o tsb.o
|
||||
obj-y += fault_$(BITS).o
|
||||
obj-y += init_$(BITS).o
|
||||
obj-$(CONFIG_SPARC32) += extable.o srmmu.o iommu.o io-unit.o
|
||||
|
|
|
@ -1,340 +0,0 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Lockless get_user_pages_fast for sparc, cribbed from powerpc
|
||||
*
|
||||
* Copyright (C) 2008 Nick Piggin
|
||||
* Copyright (C) 2008 Novell Inc.
|
||||
*/
|
||||
|
||||
#include <linux/sched.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/vmstat.h>
|
||||
#include <linux/pagemap.h>
|
||||
#include <linux/rwsem.h>
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/adi.h>
|
||||
|
||||
/*
|
||||
* The performance critical leaf functions are made noinline otherwise gcc
|
||||
* inlines everything into a single function which results in too much
|
||||
* register pressure.
|
||||
*/
|
||||
static noinline int gup_pte_range(pmd_t pmd, unsigned long addr,
|
||||
unsigned long end, int write, struct page **pages, int *nr)
|
||||
{
|
||||
unsigned long mask, result;
|
||||
pte_t *ptep;
|
||||
|
||||
if (tlb_type == hypervisor) {
|
||||
result = _PAGE_PRESENT_4V|_PAGE_P_4V;
|
||||
if (write)
|
||||
result |= _PAGE_WRITE_4V;
|
||||
} else {
|
||||
result = _PAGE_PRESENT_4U|_PAGE_P_4U;
|
||||
if (write)
|
||||
result |= _PAGE_WRITE_4U;
|
||||
}
|
||||
mask = result | _PAGE_SPECIAL;
|
||||
|
||||
ptep = pte_offset_kernel(&pmd, addr);
|
||||
do {
|
||||
struct page *page, *head;
|
||||
pte_t pte = *ptep;
|
||||
|
||||
if ((pte_val(pte) & mask) != result)
|
||||
return 0;
|
||||
VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
|
||||
|
||||
/* The hugepage case is simplified on sparc64 because
|
||||
* we encode the sub-page pfn offsets into the
|
||||
* hugepage PTEs. We could optimize this in the future
|
||||
* use page_cache_add_speculative() for the hugepage case.
|
||||
*/
|
||||
page = pte_page(pte);
|
||||
head = compound_head(page);
|
||||
if (!page_cache_get_speculative(head))
|
||||
return 0;
|
||||
if (unlikely(pte_val(pte) != pte_val(*ptep))) {
|
||||
put_page(head);
|
||||
return 0;
|
||||
}
|
||||
|
||||
pages[*nr] = page;
|
||||
(*nr)++;
|
||||
} while (ptep++, addr += PAGE_SIZE, addr != end);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr,
|
||||
unsigned long end, int write, struct page **pages,
|
||||
int *nr)
|
||||
{
|
||||
struct page *head, *page;
|
||||
int refs;
|
||||
|
||||
if (!(pmd_val(pmd) & _PAGE_VALID))
|
||||
return 0;
|
||||
|
||||
if (write && !pmd_write(pmd))
|
||||
return 0;
|
||||
|
||||
refs = 0;
|
||||
page = pmd_page(pmd) + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
|
||||
head = compound_head(page);
|
||||
do {
|
||||
VM_BUG_ON(compound_head(page) != head);
|
||||
pages[*nr] = page;
|
||||
(*nr)++;
|
||||
page++;
|
||||
refs++;
|
||||
} while (addr += PAGE_SIZE, addr != end);
|
||||
|
||||
if (!page_cache_add_speculative(head, refs)) {
|
||||
*nr -= refs;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (unlikely(pmd_val(pmd) != pmd_val(*pmdp))) {
|
||||
*nr -= refs;
|
||||
while (refs--)
|
||||
put_page(head);
|
||||
return 0;
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int gup_huge_pud(pud_t *pudp, pud_t pud, unsigned long addr,
|
||||
unsigned long end, int write, struct page **pages,
|
||||
int *nr)
|
||||
{
|
||||
struct page *head, *page;
|
||||
int refs;
|
||||
|
||||
if (!(pud_val(pud) & _PAGE_VALID))
|
||||
return 0;
|
||||
|
||||
if (write && !pud_write(pud))
|
||||
return 0;
|
||||
|
||||
refs = 0;
|
||||
page = pud_page(pud) + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
|
||||
head = compound_head(page);
|
||||
do {
|
||||
VM_BUG_ON(compound_head(page) != head);
|
||||
pages[*nr] = page;
|
||||
(*nr)++;
|
||||
page++;
|
||||
refs++;
|
||||
} while (addr += PAGE_SIZE, addr != end);
|
||||
|
||||
if (!page_cache_add_speculative(head, refs)) {
|
||||
*nr -= refs;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (unlikely(pud_val(pud) != pud_val(*pudp))) {
|
||||
*nr -= refs;
|
||||
while (refs--)
|
||||
put_page(head);
|
||||
return 0;
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
|
||||
int write, struct page **pages, int *nr)
|
||||
{
|
||||
unsigned long next;
|
||||
pmd_t *pmdp;
|
||||
|
||||
pmdp = pmd_offset(&pud, addr);
|
||||
do {
|
||||
pmd_t pmd = *pmdp;
|
||||
|
||||
next = pmd_addr_end(addr, end);
|
||||
if (pmd_none(pmd))
|
||||
return 0;
|
||||
if (unlikely(pmd_large(pmd))) {
|
||||
if (!gup_huge_pmd(pmdp, pmd, addr, next,
|
||||
write, pages, nr))
|
||||
return 0;
|
||||
} else if (!gup_pte_range(pmd, addr, next, write,
|
||||
pages, nr))
|
||||
return 0;
|
||||
} while (pmdp++, addr = next, addr != end);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end,
|
||||
int write, struct page **pages, int *nr)
|
||||
{
|
||||
unsigned long next;
|
||||
pud_t *pudp;
|
||||
|
||||
pudp = pud_offset(&pgd, addr);
|
||||
do {
|
||||
pud_t pud = *pudp;
|
||||
|
||||
next = pud_addr_end(addr, end);
|
||||
if (pud_none(pud))
|
||||
return 0;
|
||||
if (unlikely(pud_large(pud))) {
|
||||
if (!gup_huge_pud(pudp, pud, addr, next,
|
||||
write, pages, nr))
|
||||
return 0;
|
||||
} else if (!gup_pmd_range(pud, addr, next, write, pages, nr))
|
||||
return 0;
|
||||
} while (pudp++, addr = next, addr != end);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Note a difference with get_user_pages_fast: this always returns the
|
||||
* number of pages pinned, 0 if no pages were pinned.
|
||||
*/
|
||||
int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
|
||||
struct page **pages)
|
||||
{
|
||||
struct mm_struct *mm = current->mm;
|
||||
unsigned long addr, len, end;
|
||||
unsigned long next, flags;
|
||||
pgd_t *pgdp;
|
||||
int nr = 0;
|
||||
|
||||
#ifdef CONFIG_SPARC64
|
||||
if (adi_capable()) {
|
||||
long addr = start;
|
||||
|
||||
/* If userspace has passed a versioned address, kernel
|
||||
* will not find it in the VMAs since it does not store
|
||||
* the version tags in the list of VMAs. Storing version
|
||||
* tags in list of VMAs is impractical since they can be
|
||||
* changed any time from userspace without dropping into
|
||||
* kernel. Any address search in VMAs will be done with
|
||||
* non-versioned addresses. Ensure the ADI version bits
|
||||
* are dropped here by sign extending the last bit before
|
||||
* ADI bits. IOMMU does not implement version tags.
|
||||
*/
|
||||
addr = (addr << (long)adi_nbits()) >> (long)adi_nbits();
|
||||
start = addr;
|
||||
}
|
||||
#endif
|
||||
start &= PAGE_MASK;
|
||||
addr = start;
|
||||
len = (unsigned long) nr_pages << PAGE_SHIFT;
|
||||
end = start + len;
|
||||
|
||||
local_irq_save(flags);
|
||||
pgdp = pgd_offset(mm, addr);
|
||||
do {
|
||||
pgd_t pgd = *pgdp;
|
||||
|
||||
next = pgd_addr_end(addr, end);
|
||||
if (pgd_none(pgd))
|
||||
break;
|
||||
if (!gup_pud_range(pgd, addr, next, write, pages, &nr))
|
||||
break;
|
||||
} while (pgdp++, addr = next, addr != end);
|
||||
local_irq_restore(flags);
|
||||
|
||||
return nr;
|
||||
}
|
||||
|
||||
int get_user_pages_fast(unsigned long start, int nr_pages,
|
||||
unsigned int gup_flags, struct page **pages)
|
||||
{
|
||||
struct mm_struct *mm = current->mm;
|
||||
unsigned long addr, len, end;
|
||||
unsigned long next;
|
||||
pgd_t *pgdp;
|
||||
int nr = 0;
|
||||
|
||||
#ifdef CONFIG_SPARC64
|
||||
if (adi_capable()) {
|
||||
long addr = start;
|
||||
|
||||
/* If userspace has passed a versioned address, kernel
|
||||
* will not find it in the VMAs since it does not store
|
||||
* the version tags in the list of VMAs. Storing version
|
||||
* tags in list of VMAs is impractical since they can be
|
||||
* changed any time from userspace without dropping into
|
||||
* kernel. Any address search in VMAs will be done with
|
||||
* non-versioned addresses. Ensure the ADI version bits
|
||||
* are dropped here by sign extending the last bit before
|
||||
* ADI bits. IOMMU does not implements version tags,
|
||||
*/
|
||||
addr = (addr << (long)adi_nbits()) >> (long)adi_nbits();
|
||||
start = addr;
|
||||
}
|
||||
#endif
|
||||
start &= PAGE_MASK;
|
||||
addr = start;
|
||||
len = (unsigned long) nr_pages << PAGE_SHIFT;
|
||||
end = start + len;
|
||||
|
||||
/*
|
||||
* XXX: batch / limit 'nr', to avoid large irq off latency
|
||||
* needs some instrumenting to determine the common sizes used by
|
||||
* important workloads (eg. DB2), and whether limiting the batch size
|
||||
* will decrease performance.
|
||||
*
|
||||
* It seems like we're in the clear for the moment. Direct-IO is
|
||||
* the main guy that batches up lots of get_user_pages, and even
|
||||
* they are limited to 64-at-a-time which is not so many.
|
||||
*/
|
||||
/*
|
||||
* This doesn't prevent pagetable teardown, but does prevent
|
||||
* the pagetables from being freed on sparc.
|
||||
*
|
||||
* So long as we atomically load page table pointers versus teardown,
|
||||
* we can follow the address down to the the page and take a ref on it.
|
||||
*/
|
||||
local_irq_disable();
|
||||
|
||||
pgdp = pgd_offset(mm, addr);
|
||||
do {
|
||||
pgd_t pgd = *pgdp;
|
||||
|
||||
next = pgd_addr_end(addr, end);
|
||||
if (pgd_none(pgd))
|
||||
goto slow;
|
||||
if (!gup_pud_range(pgd, addr, next, gup_flags & FOLL_WRITE,
|
||||
pages, &nr))
|
||||
goto slow;
|
||||
} while (pgdp++, addr = next, addr != end);
|
||||
|
||||
local_irq_enable();
|
||||
|
||||
VM_BUG_ON(nr != (end - start) >> PAGE_SHIFT);
|
||||
return nr;
|
||||
|
||||
{
|
||||
int ret;
|
||||
|
||||
slow:
|
||||
local_irq_enable();
|
||||
|
||||
/* Try to get the remaining pages with get_user_pages */
|
||||
start += nr << PAGE_SHIFT;
|
||||
pages += nr;
|
||||
|
||||
ret = get_user_pages_unlocked(start,
|
||||
(end - start) >> PAGE_SHIFT, pages,
|
||||
gup_flags);
|
||||
|
||||
/* Have to be a bit careful with return values */
|
||||
if (nr > 0) {
|
||||
if (ret < 0)
|
||||
ret = nr;
|
||||
else
|
||||
ret += nr;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
}
|
|
@ -10,6 +10,8 @@
|
|||
|
||||
#include <linux/mm.h>
|
||||
|
||||
#include <asm-generic/pgalloc.h> /* for pte_{alloc,free}_one */
|
||||
|
||||
#define pmd_populate_kernel(mm, pmd, pte) \
|
||||
set_pmd(pmd, __pmd(_PAGE_TABLE + (unsigned long) __pa(pte)))
|
||||
|
||||
|
@ -25,20 +27,6 @@
|
|||
extern pgd_t *pgd_alloc(struct mm_struct *);
|
||||
extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
|
||||
|
||||
extern pte_t *pte_alloc_one_kernel(struct mm_struct *);
|
||||
extern pgtable_t pte_alloc_one(struct mm_struct *);
|
||||
|
||||
static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
|
||||
{
|
||||
free_page((unsigned long) pte);
|
||||
}
|
||||
|
||||
static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
|
||||
{
|
||||
pgtable_page_dtor(pte);
|
||||
__free_page(pte);
|
||||
}
|
||||
|
||||
#define __pte_free_tlb(tlb,pte, address) \
|
||||
do { \
|
||||
pgtable_page_dtor(pte); \
|
||||
|
|
|
@ -208,28 +208,6 @@ void pgd_free(struct mm_struct *mm, pgd_t *pgd)
|
|||
free_page((unsigned long) pgd);
|
||||
}
|
||||
|
||||
pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
|
||||
{
|
||||
pte_t *pte;
|
||||
|
||||
pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
|
||||
return pte;
|
||||
}
|
||||
|
||||
pgtable_t pte_alloc_one(struct mm_struct *mm)
|
||||
{
|
||||
struct page *pte;
|
||||
|
||||
pte = alloc_page(GFP_KERNEL|__GFP_ZERO);
|
||||
if (!pte)
|
||||
return NULL;
|
||||
if (!pgtable_page_ctor(pte)) {
|
||||
__free_page(pte);
|
||||
return NULL;
|
||||
}
|
||||
return pte;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_3_LEVEL_PGTABLES
|
||||
pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address)
|
||||
{
|
||||
|
|
|
@ -14,6 +14,10 @@
|
|||
#include <asm/cacheflush.h>
|
||||
#include <asm/tlbflush.h>
|
||||
|
||||
#define __HAVE_ARCH_PTE_ALLOC_ONE_KERNEL
|
||||
#define __HAVE_ARCH_PTE_ALLOC_ONE
|
||||
#include <asm-generic/pgalloc.h>
|
||||
|
||||
#define check_pgt_cache() do { } while (0)
|
||||
|
||||
#define _PAGE_USER_TABLE (PMD_TYPE_TABLE | PMD_PRESENT)
|
||||
|
@ -25,17 +29,14 @@ extern void free_pgd_slow(struct mm_struct *mm, pgd_t *pgd);
|
|||
#define pgd_alloc(mm) get_pgd_slow(mm)
|
||||
#define pgd_free(mm, pgd) free_pgd_slow(mm, pgd)
|
||||
|
||||
#define PGALLOC_GFP (GFP_KERNEL | __GFP_ZERO)
|
||||
|
||||
/*
|
||||
* Allocate one PTE table.
|
||||
*/
|
||||
static inline pte_t *
|
||||
pte_alloc_one_kernel(struct mm_struct *mm)
|
||||
{
|
||||
pte_t *pte;
|
||||
pte_t *pte = __pte_alloc_one_kernel(mm);
|
||||
|
||||
pte = (pte_t *)__get_free_page(PGALLOC_GFP);
|
||||
if (pte)
|
||||
clean_dcache_area(pte, PTRS_PER_PTE * sizeof(pte_t));
|
||||
|
||||
|
@ -47,35 +48,14 @@ pte_alloc_one(struct mm_struct *mm)
|
|||
{
|
||||
struct page *pte;
|
||||
|
||||
pte = alloc_pages(PGALLOC_GFP, 0);
|
||||
pte = __pte_alloc_one(mm, GFP_PGTABLE_USER);
|
||||
if (!pte)
|
||||
return NULL;
|
||||
if (!PageHighMem(pte)) {
|
||||
void *page = page_address(pte);
|
||||
clean_dcache_area(page, PTRS_PER_PTE * sizeof(pte_t));
|
||||
}
|
||||
if (!pgtable_page_ctor(pte)) {
|
||||
__free_page(pte);
|
||||
}
|
||||
|
||||
if (!PageHighMem(pte))
|
||||
clean_pte_table(page_address(pte));
|
||||
return pte;
|
||||
}
|
||||
|
||||
/*
|
||||
* Free one PTE table.
|
||||
*/
|
||||
static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
|
||||
{
|
||||
if (pte)
|
||||
free_page((unsigned long)pte);
|
||||
}
|
||||
|
||||
static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
|
||||
{
|
||||
pgtable_page_dtor(pte);
|
||||
__free_page(pte);
|
||||
}
|
||||
|
||||
static inline void __pmd_populate(pmd_t *pmdp, unsigned long pmdval)
|
||||
{
|
||||
set_pmd(pmdp, __pmd(pmdval));
|
||||
|
|
|
@ -32,7 +32,7 @@
|
|||
*/
|
||||
#define GPIO_GEDR (PKUNITY_GPIO_BASE + 0x0018)
|
||||
/*
|
||||
* Sepcial Voltage Detect Reg GPIO_GPIR.
|
||||
* Special Voltage Detect Reg GPIO_GPIR.
|
||||
*/
|
||||
#define GPIO_GPIR (PKUNITY_GPIO_BASE + 0x0020)
|
||||
|
||||
|
|
|
@ -123,6 +123,7 @@ config X86
|
|||
select GENERIC_STRNLEN_USER
|
||||
select GENERIC_TIME_VSYSCALL
|
||||
select GENERIC_GETTIMEOFDAY
|
||||
select GUP_GET_PTE_LOW_HIGH if X86_PAE
|
||||
select HARDLOCKUP_CHECK_TIMESTAMP if X86_64
|
||||
select HAVE_ACPI_APEI if ACPI
|
||||
select HAVE_ACPI_APEI_NMI if ACPI
|
||||
|
@ -158,6 +159,7 @@ config X86
|
|||
select HAVE_EFFICIENT_UNALIGNED_ACCESS
|
||||
select HAVE_EISA
|
||||
select HAVE_EXIT_THREAD
|
||||
select HAVE_FAST_GUP
|
||||
select HAVE_FENTRY if X86_64 || DYNAMIC_FTRACE
|
||||
select HAVE_FTRACE_MCOUNT_RECORD
|
||||
select HAVE_FUNCTION_GRAPH_TRACER
|
||||
|
@ -2906,9 +2908,6 @@ config HAVE_ATOMIC_IOMAP
|
|||
config X86_DEV_DMA_OPS
|
||||
bool
|
||||
|
||||
config HAVE_GENERIC_GUP
|
||||
def_bool y
|
||||
|
||||
source "drivers/firmware/Kconfig"
|
||||
|
||||
source "arch/x86/kvm/Kconfig"
|
||||
|
|
|
@ -358,7 +358,7 @@ int ia32_setup_rt_frame(int sig, struct ksignal *ksig,
|
|||
put_user_ex(ptr_to_compat(&frame->uc), &frame->puc);
|
||||
|
||||
/* Create the ucontext. */
|
||||
if (boot_cpu_has(X86_FEATURE_XSAVE))
|
||||
if (static_cpu_has(X86_FEATURE_XSAVE))
|
||||
put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags);
|
||||
else
|
||||
put_user_ex(0, &frame->uc.uc_flags);
|
||||
|
|
|
@ -49,23 +49,8 @@
|
|||
#define CONST_MASK_ADDR(nr, addr) WBYTE_ADDR((void *)(addr) + ((nr)>>3))
|
||||
#define CONST_MASK(nr) (1 << ((nr) & 7))
|
||||
|
||||
/**
|
||||
* set_bit - Atomically set a bit in memory
|
||||
* @nr: the bit to set
|
||||
* @addr: the address to start counting from
|
||||
*
|
||||
* This function is atomic and may not be reordered. See __set_bit()
|
||||
* if you do not require the atomic guarantees.
|
||||
*
|
||||
* Note: there are no guarantees that this function will not be reordered
|
||||
* on non x86 architectures, so if you are writing portable code,
|
||||
* make sure not to rely on its reordering guarantees.
|
||||
*
|
||||
* Note that @nr may be almost arbitrarily large; this function is not
|
||||
* restricted to acting on a single-word quantity.
|
||||
*/
|
||||
static __always_inline void
|
||||
set_bit(long nr, volatile unsigned long *addr)
|
||||
arch_set_bit(long nr, volatile unsigned long *addr)
|
||||
{
|
||||
if (IS_IMMEDIATE(nr)) {
|
||||
asm volatile(LOCK_PREFIX "orb %1,%0"
|
||||
|
@ -78,32 +63,14 @@ set_bit(long nr, volatile unsigned long *addr)
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* __set_bit - Set a bit in memory
|
||||
* @nr: the bit to set
|
||||
* @addr: the address to start counting from
|
||||
*
|
||||
* Unlike set_bit(), this function is non-atomic and may be reordered.
|
||||
* If it's called on the same region of memory simultaneously, the effect
|
||||
* may be that only one operation succeeds.
|
||||
*/
|
||||
static __always_inline void __set_bit(long nr, volatile unsigned long *addr)
|
||||
static __always_inline void
|
||||
arch___set_bit(long nr, volatile unsigned long *addr)
|
||||
{
|
||||
asm volatile(__ASM_SIZE(bts) " %1,%0" : : ADDR, "Ir" (nr) : "memory");
|
||||
}
|
||||
|
||||
/**
|
||||
* clear_bit - Clears a bit in memory
|
||||
* @nr: Bit to clear
|
||||
* @addr: Address to start counting from
|
||||
*
|
||||
* clear_bit() is atomic and may not be reordered. However, it does
|
||||
* not contain a memory barrier, so if it is used for locking purposes,
|
||||
* you should call smp_mb__before_atomic() and/or smp_mb__after_atomic()
|
||||
* in order to ensure changes are visible on other processors.
|
||||
*/
|
||||
static __always_inline void
|
||||
clear_bit(long nr, volatile unsigned long *addr)
|
||||
arch_clear_bit(long nr, volatile unsigned long *addr)
|
||||
{
|
||||
if (IS_IMMEDIATE(nr)) {
|
||||
asm volatile(LOCK_PREFIX "andb %1,%0"
|
||||
|
@ -115,26 +82,21 @@ clear_bit(long nr, volatile unsigned long *addr)
|
|||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* clear_bit_unlock - Clears a bit in memory
|
||||
* @nr: Bit to clear
|
||||
* @addr: Address to start counting from
|
||||
*
|
||||
* clear_bit() is atomic and implies release semantics before the memory
|
||||
* operation. It can be used for an unlock.
|
||||
*/
|
||||
static __always_inline void clear_bit_unlock(long nr, volatile unsigned long *addr)
|
||||
static __always_inline void
|
||||
arch_clear_bit_unlock(long nr, volatile unsigned long *addr)
|
||||
{
|
||||
barrier();
|
||||
clear_bit(nr, addr);
|
||||
arch_clear_bit(nr, addr);
|
||||
}
|
||||
|
||||
static __always_inline void __clear_bit(long nr, volatile unsigned long *addr)
|
||||
static __always_inline void
|
||||
arch___clear_bit(long nr, volatile unsigned long *addr)
|
||||
{
|
||||
asm volatile(__ASM_SIZE(btr) " %1,%0" : : ADDR, "Ir" (nr) : "memory");
|
||||
}
|
||||
|
||||
static __always_inline bool clear_bit_unlock_is_negative_byte(long nr, volatile unsigned long *addr)
|
||||
static __always_inline bool
|
||||
arch_clear_bit_unlock_is_negative_byte(long nr, volatile unsigned long *addr)
|
||||
{
|
||||
bool negative;
|
||||
asm volatile(LOCK_PREFIX "andb %2,%1"
|
||||
|
@ -143,48 +105,23 @@ static __always_inline bool clear_bit_unlock_is_negative_byte(long nr, volatile
|
|||
: "ir" ((char) ~(1 << nr)) : "memory");
|
||||
return negative;
|
||||
}
|
||||
#define arch_clear_bit_unlock_is_negative_byte \
|
||||
arch_clear_bit_unlock_is_negative_byte
|
||||
|
||||
// Let everybody know we have it
|
||||
#define clear_bit_unlock_is_negative_byte clear_bit_unlock_is_negative_byte
|
||||
|
||||
/*
|
||||
* __clear_bit_unlock - Clears a bit in memory
|
||||
* @nr: Bit to clear
|
||||
* @addr: Address to start counting from
|
||||
*
|
||||
* __clear_bit() is non-atomic and implies release semantics before the memory
|
||||
* operation. It can be used for an unlock if no other CPUs can concurrently
|
||||
* modify other bits in the word.
|
||||
*/
|
||||
static __always_inline void __clear_bit_unlock(long nr, volatile unsigned long *addr)
|
||||
static __always_inline void
|
||||
arch___clear_bit_unlock(long nr, volatile unsigned long *addr)
|
||||
{
|
||||
__clear_bit(nr, addr);
|
||||
arch___clear_bit(nr, addr);
|
||||
}
|
||||
|
||||
/**
|
||||
* __change_bit - Toggle a bit in memory
|
||||
* @nr: the bit to change
|
||||
* @addr: the address to start counting from
|
||||
*
|
||||
* Unlike change_bit(), this function is non-atomic and may be reordered.
|
||||
* If it's called on the same region of memory simultaneously, the effect
|
||||
* may be that only one operation succeeds.
|
||||
*/
|
||||
static __always_inline void __change_bit(long nr, volatile unsigned long *addr)
|
||||
static __always_inline void
|
||||
arch___change_bit(long nr, volatile unsigned long *addr)
|
||||
{
|
||||
asm volatile(__ASM_SIZE(btc) " %1,%0" : : ADDR, "Ir" (nr) : "memory");
|
||||
}
|
||||
|
||||
/**
|
||||
* change_bit - Toggle a bit in memory
|
||||
* @nr: Bit to change
|
||||
* @addr: Address to start counting from
|
||||
*
|
||||
* change_bit() is atomic and may not be reordered.
|
||||
* Note that @nr may be almost arbitrarily large; this function is not
|
||||
* restricted to acting on a single-word quantity.
|
||||
*/
|
||||
static __always_inline void change_bit(long nr, volatile unsigned long *addr)
|
||||
static __always_inline void
|
||||
arch_change_bit(long nr, volatile unsigned long *addr)
|
||||
{
|
||||
if (IS_IMMEDIATE(nr)) {
|
||||
asm volatile(LOCK_PREFIX "xorb %1,%0"
|
||||
|
@ -196,42 +133,20 @@ static __always_inline void change_bit(long nr, volatile unsigned long *addr)
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* test_and_set_bit - Set a bit and return its old value
|
||||
* @nr: Bit to set
|
||||
* @addr: Address to count from
|
||||
*
|
||||
* This operation is atomic and cannot be reordered.
|
||||
* It also implies a memory barrier.
|
||||
*/
|
||||
static __always_inline bool test_and_set_bit(long nr, volatile unsigned long *addr)
|
||||
static __always_inline bool
|
||||
arch_test_and_set_bit(long nr, volatile unsigned long *addr)
|
||||
{
|
||||
return GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(bts), *addr, c, "Ir", nr);
|
||||
}
|
||||
|
||||
/**
|
||||
* test_and_set_bit_lock - Set a bit and return its old value for lock
|
||||
* @nr: Bit to set
|
||||
* @addr: Address to count from
|
||||
*
|
||||
* This is the same as test_and_set_bit on x86.
|
||||
*/
|
||||
static __always_inline bool
|
||||
test_and_set_bit_lock(long nr, volatile unsigned long *addr)
|
||||
arch_test_and_set_bit_lock(long nr, volatile unsigned long *addr)
|
||||
{
|
||||
return test_and_set_bit(nr, addr);
|
||||
return arch_test_and_set_bit(nr, addr);
|
||||
}
|
||||
|
||||
/**
|
||||
* __test_and_set_bit - Set a bit and return its old value
|
||||
* @nr: Bit to set
|
||||
* @addr: Address to count from
|
||||
*
|
||||
* This operation is non-atomic and can be reordered.
|
||||
* If two examples of this operation race, one can appear to succeed
|
||||
* but actually fail. You must protect multiple accesses with a lock.
|
||||
*/
|
||||
static __always_inline bool __test_and_set_bit(long nr, volatile unsigned long *addr)
|
||||
static __always_inline bool
|
||||
arch___test_and_set_bit(long nr, volatile unsigned long *addr)
|
||||
{
|
||||
bool oldbit;
|
||||
|
||||
|
@ -242,28 +157,13 @@ static __always_inline bool __test_and_set_bit(long nr, volatile unsigned long *
|
|||
return oldbit;
|
||||
}
|
||||
|
||||
/**
|
||||
* test_and_clear_bit - Clear a bit and return its old value
|
||||
* @nr: Bit to clear
|
||||
* @addr: Address to count from
|
||||
*
|
||||
* This operation is atomic and cannot be reordered.
|
||||
* It also implies a memory barrier.
|
||||
*/
|
||||
static __always_inline bool test_and_clear_bit(long nr, volatile unsigned long *addr)
|
||||
static __always_inline bool
|
||||
arch_test_and_clear_bit(long nr, volatile unsigned long *addr)
|
||||
{
|
||||
return GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(btr), *addr, c, "Ir", nr);
|
||||
}
|
||||
|
||||
/**
|
||||
* __test_and_clear_bit - Clear a bit and return its old value
|
||||
* @nr: Bit to clear
|
||||
* @addr: Address to count from
|
||||
*
|
||||
* This operation is non-atomic and can be reordered.
|
||||
* If two examples of this operation race, one can appear to succeed
|
||||
* but actually fail. You must protect multiple accesses with a lock.
|
||||
*
|
||||
/*
|
||||
* Note: the operation is performed atomically with respect to
|
||||
* the local CPU, but not other CPUs. Portable code should not
|
||||
* rely on this behaviour.
|
||||
|
@ -271,7 +171,8 @@ static __always_inline bool test_and_clear_bit(long nr, volatile unsigned long *
|
|||
* accessed from a hypervisor on the same CPU if running in a VM: don't change
|
||||
* this without also updating arch/x86/kernel/kvm.c
|
||||
*/
|
||||
static __always_inline bool __test_and_clear_bit(long nr, volatile unsigned long *addr)
|
||||
static __always_inline bool
|
||||
arch___test_and_clear_bit(long nr, volatile unsigned long *addr)
|
||||
{
|
||||
bool oldbit;
|
||||
|
||||
|
@ -282,8 +183,8 @@ static __always_inline bool __test_and_clear_bit(long nr, volatile unsigned long
|
|||
return oldbit;
|
||||
}
|
||||
|
||||
/* WARNING: non atomic and it can be reordered! */
|
||||
static __always_inline bool __test_and_change_bit(long nr, volatile unsigned long *addr)
|
||||
static __always_inline bool
|
||||
arch___test_and_change_bit(long nr, volatile unsigned long *addr)
|
||||
{
|
||||
bool oldbit;
|
||||
|
||||
|
@ -295,15 +196,8 @@ static __always_inline bool __test_and_change_bit(long nr, volatile unsigned lon
|
|||
return oldbit;
|
||||
}
|
||||
|
||||
/**
|
||||
* test_and_change_bit - Change a bit and return its old value
|
||||
* @nr: Bit to change
|
||||
* @addr: Address to count from
|
||||
*
|
||||
* This operation is atomic and cannot be reordered.
|
||||
* It also implies a memory barrier.
|
||||
*/
|
||||
static __always_inline bool test_and_change_bit(long nr, volatile unsigned long *addr)
|
||||
static __always_inline bool
|
||||
arch_test_and_change_bit(long nr, volatile unsigned long *addr)
|
||||
{
|
||||
return GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(btc), *addr, c, "Ir", nr);
|
||||
}
|
||||
|
@ -326,16 +220,7 @@ static __always_inline bool variable_test_bit(long nr, volatile const unsigned l
|
|||
return oldbit;
|
||||
}
|
||||
|
||||
#if 0 /* Fool kernel-doc since it doesn't do macros yet */
|
||||
/**
|
||||
* test_bit - Determine whether a bit is set
|
||||
* @nr: bit number to test
|
||||
* @addr: Address to start counting from
|
||||
*/
|
||||
static bool test_bit(int nr, const volatile unsigned long *addr);
|
||||
#endif
|
||||
|
||||
#define test_bit(nr, addr) \
|
||||
#define arch_test_bit(nr, addr) \
|
||||
(__builtin_constant_p((nr)) \
|
||||
? constant_test_bit((nr), (addr)) \
|
||||
: variable_test_bit((nr), (addr)))
|
||||
|
@ -504,6 +389,8 @@ static __always_inline int fls64(__u64 x)
|
|||
|
||||
#include <asm-generic/bitops/const_hweight.h>
|
||||
|
||||
#include <asm-generic/bitops-instrumented.h>
|
||||
|
||||
#include <asm-generic/bitops/le.h>
|
||||
|
||||
#include <asm-generic/bitops/ext2-atomic-setbit.h>
|
||||
|
|
|
@ -6,6 +6,9 @@
|
|||
#include <linux/mm.h> /* for struct page */
|
||||
#include <linux/pagemap.h>
|
||||
|
||||
#define __HAVE_ARCH_PTE_ALLOC_ONE
|
||||
#include <asm-generic/pgalloc.h> /* for pte_{alloc,free}_one */
|
||||
|
||||
static inline int __paravirt_pgd_alloc(struct mm_struct *mm) { return 0; }
|
||||
|
||||
#ifdef CONFIG_PARAVIRT_XXL
|
||||
|
@ -47,24 +50,8 @@ extern gfp_t __userpte_alloc_gfp;
|
|||
extern pgd_t *pgd_alloc(struct mm_struct *);
|
||||
extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
|
||||
|
||||
extern pte_t *pte_alloc_one_kernel(struct mm_struct *);
|
||||
extern pgtable_t pte_alloc_one(struct mm_struct *);
|
||||
|
||||
/* Should really implement gc for free page table pages. This could be
|
||||
done with a reference count in struct page. */
|
||||
|
||||
static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
|
||||
{
|
||||
BUG_ON((unsigned long)pte & (PAGE_SIZE-1));
|
||||
free_page((unsigned long)pte);
|
||||
}
|
||||
|
||||
static inline void pte_free(struct mm_struct *mm, struct page *pte)
|
||||
{
|
||||
pgtable_page_dtor(pte);
|
||||
__free_page(pte);
|
||||
}
|
||||
|
||||
extern void ___pte_free_tlb(struct mmu_gather *tlb, struct page *pte);
|
||||
|
||||
static inline void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte,
|
||||
|
|
|
@ -285,53 +285,6 @@ static inline pud_t native_pudp_get_and_clear(pud_t *pudp)
|
|||
#define __pte_to_swp_entry(pte) (__swp_entry(__pteval_swp_type(pte), \
|
||||
__pteval_swp_offset(pte)))
|
||||
|
||||
#define gup_get_pte gup_get_pte
|
||||
/*
|
||||
* WARNING: only to be used in the get_user_pages_fast() implementation.
|
||||
*
|
||||
* With get_user_pages_fast(), we walk down the pagetables without taking
|
||||
* any locks. For this we would like to load the pointers atomically,
|
||||
* but that is not possible (without expensive cmpxchg8b) on PAE. What
|
||||
* we do have is the guarantee that a PTE will only either go from not
|
||||
* present to present, or present to not present or both -- it will not
|
||||
* switch to a completely different present page without a TLB flush in
|
||||
* between; something that we are blocking by holding interrupts off.
|
||||
*
|
||||
* Setting ptes from not present to present goes:
|
||||
*
|
||||
* ptep->pte_high = h;
|
||||
* smp_wmb();
|
||||
* ptep->pte_low = l;
|
||||
*
|
||||
* And present to not present goes:
|
||||
*
|
||||
* ptep->pte_low = 0;
|
||||
* smp_wmb();
|
||||
* ptep->pte_high = 0;
|
||||
*
|
||||
* We must ensure here that the load of pte_low sees 'l' iff pte_high
|
||||
* sees 'h'. We load pte_high *after* loading pte_low, which ensures we
|
||||
* don't see an older value of pte_high. *Then* we recheck pte_low,
|
||||
* which ensures that we haven't picked up a changed pte high. We might
|
||||
* have gotten rubbish values from pte_low and pte_high, but we are
|
||||
* guaranteed that pte_low will not have the present bit set *unless*
|
||||
* it is 'l'. Because get_user_pages_fast() only operates on present ptes
|
||||
* we're safe.
|
||||
*/
|
||||
static inline pte_t gup_get_pte(pte_t *ptep)
|
||||
{
|
||||
pte_t pte;
|
||||
|
||||
do {
|
||||
pte.pte_low = ptep->pte_low;
|
||||
smp_rmb();
|
||||
pte.pte_high = ptep->pte_high;
|
||||
smp_rmb();
|
||||
} while (unlikely(pte.pte_low != ptep->pte_low));
|
||||
|
||||
return pte;
|
||||
}
|
||||
|
||||
#include <asm/pgtable-invert.h>
|
||||
|
||||
#endif /* _ASM_X86_PGTABLE_3LEVEL_H */
|
||||
|
|
|
@ -259,14 +259,8 @@ extern void init_extra_mapping_uc(unsigned long phys, unsigned long size);
|
|||
extern void init_extra_mapping_wb(unsigned long phys, unsigned long size);
|
||||
|
||||
#define gup_fast_permitted gup_fast_permitted
|
||||
static inline bool gup_fast_permitted(unsigned long start, int nr_pages)
|
||||
static inline bool gup_fast_permitted(unsigned long start, unsigned long end)
|
||||
{
|
||||
unsigned long len, end;
|
||||
|
||||
len = (unsigned long)nr_pages << PAGE_SHIFT;
|
||||
end = start + len;
|
||||
if (end < start)
|
||||
return false;
|
||||
if (end >> __VIRTUAL_MASK_SHIFT)
|
||||
return false;
|
||||
return true;
|
||||
|
|
|
@ -391,7 +391,7 @@ static int __setup_rt_frame(int sig, struct ksignal *ksig,
|
|||
put_user_ex(&frame->uc, &frame->puc);
|
||||
|
||||
/* Create the ucontext. */
|
||||
if (boot_cpu_has(X86_FEATURE_XSAVE))
|
||||
if (static_cpu_has(X86_FEATURE_XSAVE))
|
||||
put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags);
|
||||
else
|
||||
put_user_ex(0, &frame->uc.uc_flags);
|
||||
|
|
|
@ -650,7 +650,7 @@ static u64 __update_clear_spte_slow(u64 *sptep, u64 spte)
|
|||
|
||||
/*
|
||||
* The idea using the light way get the spte on x86_32 guest is from
|
||||
* gup_get_pte(arch/x86/mm/gup.c).
|
||||
* gup_get_pte (mm/gup.c).
|
||||
*
|
||||
* An spte tlb flush may be pending, because kvm_set_pte_rmapp
|
||||
* coalesces them and we are running out of the MMU lock. Therefore
|
||||
|
|
|
@ -13,33 +13,17 @@ phys_addr_t physical_mask __ro_after_init = (1ULL << __PHYSICAL_MASK_SHIFT) - 1;
|
|||
EXPORT_SYMBOL(physical_mask);
|
||||
#endif
|
||||
|
||||
#define PGALLOC_GFP (GFP_KERNEL_ACCOUNT | __GFP_ZERO)
|
||||
|
||||
#ifdef CONFIG_HIGHPTE
|
||||
#define PGALLOC_USER_GFP __GFP_HIGHMEM
|
||||
#define PGTABLE_HIGHMEM __GFP_HIGHMEM
|
||||
#else
|
||||
#define PGALLOC_USER_GFP 0
|
||||
#define PGTABLE_HIGHMEM 0
|
||||
#endif
|
||||
|
||||
gfp_t __userpte_alloc_gfp = PGALLOC_GFP | PGALLOC_USER_GFP;
|
||||
|
||||
pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
|
||||
{
|
||||
return (pte_t *)__get_free_page(PGALLOC_GFP & ~__GFP_ACCOUNT);
|
||||
}
|
||||
gfp_t __userpte_alloc_gfp = GFP_PGTABLE_USER | PGTABLE_HIGHMEM;
|
||||
|
||||
pgtable_t pte_alloc_one(struct mm_struct *mm)
|
||||
{
|
||||
struct page *pte;
|
||||
|
||||
pte = alloc_pages(__userpte_alloc_gfp, 0);
|
||||
if (!pte)
|
||||
return NULL;
|
||||
if (!pgtable_page_ctor(pte)) {
|
||||
__free_page(pte);
|
||||
return NULL;
|
||||
}
|
||||
return pte;
|
||||
return __pte_alloc_one(mm, __userpte_alloc_gfp);
|
||||
}
|
||||
|
||||
static int __init setup_userpte(char *arg)
|
||||
|
@ -235,7 +219,7 @@ static int preallocate_pmds(struct mm_struct *mm, pmd_t *pmds[], int count)
|
|||
{
|
||||
int i;
|
||||
bool failed = false;
|
||||
gfp_t gfp = PGALLOC_GFP;
|
||||
gfp_t gfp = GFP_PGTABLE_USER;
|
||||
|
||||
if (mm == &init_mm)
|
||||
gfp &= ~__GFP_ACCOUNT;
|
||||
|
@ -399,14 +383,14 @@ static inline pgd_t *_pgd_alloc(void)
|
|||
* We allocate one page for pgd.
|
||||
*/
|
||||
if (!SHARED_KERNEL_PMD)
|
||||
return (pgd_t *)__get_free_pages(PGALLOC_GFP,
|
||||
return (pgd_t *)__get_free_pages(GFP_PGTABLE_USER,
|
||||
PGD_ALLOCATION_ORDER);
|
||||
|
||||
/*
|
||||
* Now PAE kernel is not running as a Xen domain. We can allocate
|
||||
* a 32-byte slab for pgd to save memory space.
|
||||
*/
|
||||
return kmem_cache_alloc(pgd_cache, PGALLOC_GFP);
|
||||
return kmem_cache_alloc(pgd_cache, GFP_PGTABLE_USER);
|
||||
}
|
||||
|
||||
static inline void _pgd_free(pgd_t *pgd)
|
||||
|
@ -424,7 +408,8 @@ void __init pgd_cache_init(void)
|
|||
|
||||
static inline pgd_t *_pgd_alloc(void)
|
||||
{
|
||||
return (pgd_t *)__get_free_pages(PGALLOC_GFP, PGD_ALLOCATION_ORDER);
|
||||
return (pgd_t *)__get_free_pages(GFP_PGTABLE_USER,
|
||||
PGD_ALLOCATION_ORDER);
|
||||
}
|
||||
|
||||
static inline void _pgd_free(pgd_t *pgd)
|
||||
|
|
|
@ -2700,8 +2700,7 @@ struct remap_data {
|
|||
struct mmu_update *mmu_update;
|
||||
};
|
||||
|
||||
static int remap_area_pfn_pte_fn(pte_t *ptep, pgtable_t token,
|
||||
unsigned long addr, void *data)
|
||||
static int remap_area_pfn_pte_fn(pte_t *ptep, unsigned long addr, void *data)
|
||||
{
|
||||
struct remap_data *rmd = data;
|
||||
pte_t pte = pte_mkspecial(mfn_pte(*rmd->pfn, rmd->prot));
|
||||
|
|
|
@ -175,7 +175,7 @@ static const unsigned int r8a77470_crit_mod_clks[] __initconst = {
|
|||
*---------------------------------------------------
|
||||
* 0 0 20 x80 x78 x50
|
||||
* 0 1 26 x60 x60 x56
|
||||
* 1 0 Prohibitted setting
|
||||
* 1 0 Prohibited setting
|
||||
* 1 1 30 x52 x52 x50
|
||||
*
|
||||
* *1 : Table 7.4 indicates VCO output (PLL0 = VCO)
|
||||
|
|
|
@ -35,8 +35,7 @@ struct remap_pfn {
|
|||
pgprot_t prot;
|
||||
};
|
||||
|
||||
static int remap_pfn(pte_t *pte, pgtable_t token,
|
||||
unsigned long addr, void *data)
|
||||
static int remap_pfn(pte_t *pte, unsigned long addr, void *data)
|
||||
{
|
||||
struct remap_pfn *r = data;
|
||||
|
||||
|
|
|
@ -127,7 +127,7 @@ __malloc void *_uverbs_alloc(struct uverbs_attr_bundle *bundle, size_t size,
|
|||
res = (void *)pbundle->internal_buffer + pbundle->internal_used;
|
||||
pbundle->internal_used =
|
||||
ALIGN(new_used, sizeof(*pbundle->internal_buffer));
|
||||
if (flags & __GFP_ZERO)
|
||||
if (want_init_on_alloc(flags))
|
||||
memset(res, 0, size);
|
||||
return res;
|
||||
}
|
||||
|
|
|
@ -121,6 +121,9 @@ static const struct crashtype crashtypes[] = {
|
|||
CRASHTYPE(READ_AFTER_FREE),
|
||||
CRASHTYPE(WRITE_BUDDY_AFTER_FREE),
|
||||
CRASHTYPE(READ_BUDDY_AFTER_FREE),
|
||||
CRASHTYPE(SLAB_FREE_DOUBLE),
|
||||
CRASHTYPE(SLAB_FREE_CROSS),
|
||||
CRASHTYPE(SLAB_FREE_PAGE),
|
||||
CRASHTYPE(SOFTLOCKUP),
|
||||
CRASHTYPE(HARDLOCKUP),
|
||||
CRASHTYPE(SPINLOCKUP),
|
||||
|
@ -427,6 +430,7 @@ static int __init lkdtm_module_init(void)
|
|||
lkdtm_bugs_init(&recur_count);
|
||||
lkdtm_perms_init();
|
||||
lkdtm_usercopy_init();
|
||||
lkdtm_heap_init();
|
||||
|
||||
/* Register debugfs interface */
|
||||
lkdtm_debugfs_root = debugfs_create_dir("provoke-crash", NULL);
|
||||
|
@ -473,6 +477,7 @@ static void __exit lkdtm_module_exit(void)
|
|||
debugfs_remove_recursive(lkdtm_debugfs_root);
|
||||
|
||||
/* Handle test-specific clean-up. */
|
||||
lkdtm_heap_exit();
|
||||
lkdtm_usercopy_exit();
|
||||
|
||||
if (lkdtm_kprobe != NULL)
|
||||
|
|
|
@ -7,6 +7,10 @@
|
|||
#include <linux/slab.h>
|
||||
#include <linux/sched.h>
|
||||
|
||||
static struct kmem_cache *double_free_cache;
|
||||
static struct kmem_cache *a_cache;
|
||||
static struct kmem_cache *b_cache;
|
||||
|
||||
/*
|
||||
* This tries to stay within the next largest power-of-2 kmalloc cache
|
||||
* to avoid actually overwriting anything important if it's not detected
|
||||
|
@ -146,3 +150,71 @@ void lkdtm_READ_BUDDY_AFTER_FREE(void)
|
|||
|
||||
kfree(val);
|
||||
}
|
||||
|
||||
void lkdtm_SLAB_FREE_DOUBLE(void)
|
||||
{
|
||||
int *val;
|
||||
|
||||
val = kmem_cache_alloc(double_free_cache, GFP_KERNEL);
|
||||
if (!val) {
|
||||
pr_info("Unable to allocate double_free_cache memory.\n");
|
||||
return;
|
||||
}
|
||||
|
||||
/* Just make sure we got real memory. */
|
||||
*val = 0x12345678;
|
||||
pr_info("Attempting double slab free ...\n");
|
||||
kmem_cache_free(double_free_cache, val);
|
||||
kmem_cache_free(double_free_cache, val);
|
||||
}
|
||||
|
||||
void lkdtm_SLAB_FREE_CROSS(void)
|
||||
{
|
||||
int *val;
|
||||
|
||||
val = kmem_cache_alloc(a_cache, GFP_KERNEL);
|
||||
if (!val) {
|
||||
pr_info("Unable to allocate a_cache memory.\n");
|
||||
return;
|
||||
}
|
||||
|
||||
/* Just make sure we got real memory. */
|
||||
*val = 0x12345679;
|
||||
pr_info("Attempting cross-cache slab free ...\n");
|
||||
kmem_cache_free(b_cache, val);
|
||||
}
|
||||
|
||||
void lkdtm_SLAB_FREE_PAGE(void)
|
||||
{
|
||||
unsigned long p = __get_free_page(GFP_KERNEL);
|
||||
|
||||
pr_info("Attempting non-Slab slab free ...\n");
|
||||
kmem_cache_free(NULL, (void *)p);
|
||||
free_page(p);
|
||||
}
|
||||
|
||||
/*
|
||||
* We have constructors to keep the caches distinctly separated without
|
||||
* needing to boot with "slab_nomerge".
|
||||
*/
|
||||
static void ctor_double_free(void *region)
|
||||
{ }
|
||||
static void ctor_a(void *region)
|
||||
{ }
|
||||
static void ctor_b(void *region)
|
||||
{ }
|
||||
|
||||
void __init lkdtm_heap_init(void)
|
||||
{
|
||||
double_free_cache = kmem_cache_create("lkdtm-heap-double_free",
|
||||
64, 0, 0, ctor_double_free);
|
||||
a_cache = kmem_cache_create("lkdtm-heap-a", 64, 0, 0, ctor_a);
|
||||
b_cache = kmem_cache_create("lkdtm-heap-b", 64, 0, 0, ctor_b);
|
||||
}
|
||||
|
||||
void __exit lkdtm_heap_exit(void)
|
||||
{
|
||||
kmem_cache_destroy(double_free_cache);
|
||||
kmem_cache_destroy(a_cache);
|
||||
kmem_cache_destroy(b_cache);
|
||||
}
|
||||
|
|
|
@ -29,11 +29,16 @@ void lkdtm_STACK_GUARD_PAGE_TRAILING(void);
|
|||
void lkdtm_UNSET_SMEP(void);
|
||||
|
||||
/* lkdtm_heap.c */
|
||||
void __init lkdtm_heap_init(void);
|
||||
void __exit lkdtm_heap_exit(void);
|
||||
void lkdtm_OVERWRITE_ALLOCATION(void);
|
||||
void lkdtm_WRITE_AFTER_FREE(void);
|
||||
void lkdtm_READ_AFTER_FREE(void);
|
||||
void lkdtm_WRITE_BUDDY_AFTER_FREE(void);
|
||||
void lkdtm_READ_BUDDY_AFTER_FREE(void);
|
||||
void lkdtm_SLAB_FREE_DOUBLE(void);
|
||||
void lkdtm_SLAB_FREE_CROSS(void);
|
||||
void lkdtm_SLAB_FREE_PAGE(void);
|
||||
|
||||
/* lkdtm_perms.c */
|
||||
void __init lkdtm_perms_init(void);
|
||||
|
|
|
@ -598,7 +598,7 @@ enum ht_channel_width {
|
|||
HT_CHANNEL_WIDTH_MAX,
|
||||
};
|
||||
|
||||
/* Ref: 802.11i sepc D10.0 7.3.2.25.1
|
||||
/* Ref: 802.11i spec D10.0 7.3.2.25.1
|
||||
* Cipher Suites Encryption Algorithms
|
||||
*/
|
||||
enum rt_enc_alg {
|
||||
|
|
|
@ -2965,7 +2965,7 @@ lpfc_stop_hba_timers(struct lpfc_hba *phba)
|
|||
del_timer_sync(&phba->fcp_poll_timer);
|
||||
break;
|
||||
case LPFC_PCI_DEV_OC:
|
||||
/* Stop any OneConnect device sepcific driver timers */
|
||||
/* Stop any OneConnect device specific driver timers */
|
||||
lpfc_sli4_stop_fcf_redisc_wait_timer(phba);
|
||||
break;
|
||||
default:
|
||||
|
|
|
@ -45,7 +45,7 @@ static u32 phy_CalculateBitShift(u32 BitMask)
|
|||
/**
|
||||
* Function: PHY_QueryBBReg
|
||||
*
|
||||
* OverView: Read "sepcific bits" from BB register
|
||||
* OverView: Read "specific bits" from BB register
|
||||
*
|
||||
* Input:
|
||||
* struct adapter * Adapter,
|
||||
|
|
|
@ -264,8 +264,7 @@ void gntdev_put_map(struct gntdev_priv *priv, struct gntdev_grant_map *map)
|
|||
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
static int find_grant_ptes(pte_t *pte, pgtable_t token,
|
||||
unsigned long addr, void *data)
|
||||
static int find_grant_ptes(pte_t *pte, unsigned long addr, void *data)
|
||||
{
|
||||
struct gntdev_grant_map *map = data;
|
||||
unsigned int pgnr = (addr - map->vma->vm_start) >> PAGE_SHIFT;
|
||||
|
@ -292,8 +291,7 @@ static int find_grant_ptes(pte_t *pte, pgtable_t token,
|
|||
}
|
||||
|
||||
#ifdef CONFIG_X86
|
||||
static int set_grant_ptes_as_special(pte_t *pte, pgtable_t token,
|
||||
unsigned long addr, void *data)
|
||||
static int set_grant_ptes_as_special(pte_t *pte, unsigned long addr, void *data)
|
||||
{
|
||||
set_pte_at(current->mm, addr, pte, pte_mkspecial(*pte));
|
||||
return 0;
|
||||
|
|
|
@ -731,8 +731,7 @@ struct remap_pfn {
|
|||
unsigned long i;
|
||||
};
|
||||
|
||||
static int remap_pfn_fn(pte_t *ptep, pgtable_t token, unsigned long addr,
|
||||
void *data)
|
||||
static int remap_pfn_fn(pte_t *ptep, unsigned long addr, void *data)
|
||||
{
|
||||
struct remap_pfn *r = data;
|
||||
struct page *page = r->pages[r->i];
|
||||
|
@ -966,8 +965,7 @@ static int privcmd_mmap(struct file *file, struct vm_area_struct *vma)
|
|||
* on a per pfn/pte basis. Mapping calls that fail with ENOENT
|
||||
* can be then retried until success.
|
||||
*/
|
||||
static int is_mapped_fn(pte_t *pte, struct page *pmd_page,
|
||||
unsigned long addr, void *data)
|
||||
static int is_mapped_fn(pte_t *pte, unsigned long addr, void *data)
|
||||
{
|
||||
return pte_none(*pte) ? 0 : -EBUSY;
|
||||
}
|
||||
|
|
|
@ -93,8 +93,7 @@ static void setup_hparams(unsigned long gfn, void *data)
|
|||
info->fgfn++;
|
||||
}
|
||||
|
||||
static int remap_pte_fn(pte_t *ptep, pgtable_t token, unsigned long addr,
|
||||
void *data)
|
||||
static int remap_pte_fn(pte_t *ptep, unsigned long addr, void *data)
|
||||
{
|
||||
struct remap_data *info = data;
|
||||
struct page *page = info->pages[info->index++];
|
||||
|
|
|
@ -35,8 +35,9 @@
|
|||
* @page: structure to page
|
||||
*
|
||||
*/
|
||||
static int v9fs_fid_readpage(struct p9_fid *fid, struct page *page)
|
||||
static int v9fs_fid_readpage(void *data, struct page *page)
|
||||
{
|
||||
struct p9_fid *fid = data;
|
||||
struct inode *inode = page->mapping->host;
|
||||
struct bio_vec bvec = {.bv_page = page, .bv_len = PAGE_SIZE};
|
||||
struct iov_iter to;
|
||||
|
@ -107,7 +108,8 @@ static int v9fs_vfs_readpages(struct file *filp, struct address_space *mapping,
|
|||
if (ret == 0)
|
||||
return ret;
|
||||
|
||||
ret = read_cache_pages(mapping, pages, (void *)v9fs_vfs_readpage, filp);
|
||||
ret = read_cache_pages(mapping, pages, v9fs_fid_readpage,
|
||||
filp->private_data);
|
||||
p9_debug(P9_DEBUG_VFS, " = %d\n", ret);
|
||||
return ret;
|
||||
}
|
||||
|
|
|
@ -109,9 +109,9 @@ static int jffs2_do_readpage_nolock (struct inode *inode, struct page *pg)
|
|||
return ret;
|
||||
}
|
||||
|
||||
int jffs2_do_readpage_unlock(struct inode *inode, struct page *pg)
|
||||
int jffs2_do_readpage_unlock(void *data, struct page *pg)
|
||||
{
|
||||
int ret = jffs2_do_readpage_nolock(inode, pg);
|
||||
int ret = jffs2_do_readpage_nolock(data, pg);
|
||||
unlock_page(pg);
|
||||
return ret;
|
||||
}
|
||||
|
|
|
@ -687,7 +687,7 @@ unsigned char *jffs2_gc_fetch_page(struct jffs2_sb_info *c,
|
|||
struct page *pg;
|
||||
|
||||
pg = read_cache_page(inode->i_mapping, offset >> PAGE_SHIFT,
|
||||
(void *)jffs2_do_readpage_unlock, inode);
|
||||
jffs2_do_readpage_unlock, inode);
|
||||
if (IS_ERR(pg))
|
||||
return (void *)pg;
|
||||
|
||||
|
|
|
@ -155,7 +155,7 @@ extern const struct file_operations jffs2_file_operations;
|
|||
extern const struct inode_operations jffs2_file_inode_operations;
|
||||
extern const struct address_space_operations jffs2_file_address_operations;
|
||||
int jffs2_fsync(struct file *, loff_t, loff_t, int);
|
||||
int jffs2_do_readpage_unlock (struct inode *inode, struct page *pg);
|
||||
int jffs2_do_readpage_unlock(void *data, struct page *pg);
|
||||
|
||||
/* ioctl.c */
|
||||
long jffs2_ioctl(struct file *, unsigned int, unsigned long);
|
||||
|
|
|
@ -288,10 +288,13 @@ struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group,
|
|||
/*
|
||||
* For queues with unlimited length lost events are not expected and
|
||||
* can possibly have security implications. Avoid losing events when
|
||||
* memory is short.
|
||||
* memory is short. For the limited size queues, avoid OOM killer in the
|
||||
* target monitoring memcg as it may have security repercussion.
|
||||
*/
|
||||
if (group->max_events == UINT_MAX)
|
||||
gfp |= __GFP_NOFAIL;
|
||||
else
|
||||
gfp |= __GFP_RETRY_MAYFAIL;
|
||||
|
||||
/* Whoever is interested in the event, pays for the allocation. */
|
||||
memalloc_use_memcg(group->memcg);
|
||||
|
|
|
@ -90,9 +90,13 @@ int inotify_handle_event(struct fsnotify_group *group,
|
|||
i_mark = container_of(inode_mark, struct inotify_inode_mark,
|
||||
fsn_mark);
|
||||
|
||||
/* Whoever is interested in the event, pays for the allocation. */
|
||||
/*
|
||||
* Whoever is interested in the event, pays for the allocation. Do not
|
||||
* trigger OOM killer in the target monitoring memcg as it may have
|
||||
* security repercussion.
|
||||
*/
|
||||
memalloc_use_memcg(group->memcg);
|
||||
event = kmalloc(alloc_len, GFP_KERNEL_ACCOUNT);
|
||||
event = kmalloc(alloc_len, GFP_KERNEL_ACCOUNT | __GFP_RETRY_MAYFAIL);
|
||||
memalloc_unuse_memcg();
|
||||
|
||||
if (unlikely(!event)) {
|
||||
|
|
|
@ -6191,17 +6191,17 @@ int ocfs2_begin_truncate_log_recovery(struct ocfs2_super *osb,
|
|||
if (le16_to_cpu(tl->tl_used)) {
|
||||
trace_ocfs2_truncate_log_recovery_num(le16_to_cpu(tl->tl_used));
|
||||
|
||||
*tl_copy = kmalloc(tl_bh->b_size, GFP_KERNEL);
|
||||
/*
|
||||
* Assuming the write-out below goes well, this copy will be
|
||||
* passed back to recovery for processing.
|
||||
*/
|
||||
*tl_copy = kmemdup(tl_bh->b_data, tl_bh->b_size, GFP_KERNEL);
|
||||
if (!(*tl_copy)) {
|
||||
status = -ENOMEM;
|
||||
mlog_errno(status);
|
||||
goto bail;
|
||||
}
|
||||
|
||||
/* Assuming the write-out below goes well, this copy
|
||||
* will be passed back to recovery for processing. */
|
||||
memcpy(*tl_copy, tl_bh->b_data, tl_bh->b_size);
|
||||
|
||||
/* All we need to do to clear the truncate log is set
|
||||
* tl_used. */
|
||||
tl->tl_used = 0;
|
||||
|
|
|
@ -242,57 +242,29 @@ static struct dentry *blockcheck_debugfs_create(const char *name,
|
|||
static void ocfs2_blockcheck_debug_remove(struct ocfs2_blockcheck_stats *stats)
|
||||
{
|
||||
if (stats) {
|
||||
debugfs_remove(stats->b_debug_check);
|
||||
stats->b_debug_check = NULL;
|
||||
debugfs_remove(stats->b_debug_failure);
|
||||
stats->b_debug_failure = NULL;
|
||||
debugfs_remove(stats->b_debug_recover);
|
||||
stats->b_debug_recover = NULL;
|
||||
debugfs_remove(stats->b_debug_dir);
|
||||
debugfs_remove_recursive(stats->b_debug_dir);
|
||||
stats->b_debug_dir = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
static int ocfs2_blockcheck_debug_install(struct ocfs2_blockcheck_stats *stats,
|
||||
struct dentry *parent)
|
||||
static void ocfs2_blockcheck_debug_install(struct ocfs2_blockcheck_stats *stats,
|
||||
struct dentry *parent)
|
||||
{
|
||||
int rc = -EINVAL;
|
||||
|
||||
if (!stats)
|
||||
goto out;
|
||||
|
||||
stats->b_debug_dir = debugfs_create_dir("blockcheck", parent);
|
||||
if (!stats->b_debug_dir)
|
||||
goto out;
|
||||
|
||||
stats->b_debug_check =
|
||||
blockcheck_debugfs_create("blocks_checked",
|
||||
stats->b_debug_dir,
|
||||
&stats->b_check_count);
|
||||
blockcheck_debugfs_create("blocks_checked", stats->b_debug_dir,
|
||||
&stats->b_check_count);
|
||||
|
||||
stats->b_debug_failure =
|
||||
blockcheck_debugfs_create("checksums_failed",
|
||||
stats->b_debug_dir,
|
||||
&stats->b_failure_count);
|
||||
blockcheck_debugfs_create("checksums_failed", stats->b_debug_dir,
|
||||
&stats->b_failure_count);
|
||||
|
||||
stats->b_debug_recover =
|
||||
blockcheck_debugfs_create("ecc_recoveries",
|
||||
stats->b_debug_dir,
|
||||
&stats->b_recover_count);
|
||||
if (stats->b_debug_check && stats->b_debug_failure &&
|
||||
stats->b_debug_recover)
|
||||
rc = 0;
|
||||
|
||||
out:
|
||||
if (rc)
|
||||
ocfs2_blockcheck_debug_remove(stats);
|
||||
return rc;
|
||||
blockcheck_debugfs_create("ecc_recoveries", stats->b_debug_dir,
|
||||
&stats->b_recover_count);
|
||||
}
|
||||
#else
|
||||
static inline int ocfs2_blockcheck_debug_install(struct ocfs2_blockcheck_stats *stats,
|
||||
struct dentry *parent)
|
||||
static inline void ocfs2_blockcheck_debug_install(struct ocfs2_blockcheck_stats *stats,
|
||||
struct dentry *parent)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void ocfs2_blockcheck_debug_remove(struct ocfs2_blockcheck_stats *stats)
|
||||
|
@ -301,10 +273,10 @@ static inline void ocfs2_blockcheck_debug_remove(struct ocfs2_blockcheck_stats *
|
|||
#endif /* CONFIG_DEBUG_FS */
|
||||
|
||||
/* Always-called wrappers for starting and stopping the debugfs files */
|
||||
int ocfs2_blockcheck_stats_debugfs_install(struct ocfs2_blockcheck_stats *stats,
|
||||
struct dentry *parent)
|
||||
void ocfs2_blockcheck_stats_debugfs_install(struct ocfs2_blockcheck_stats *stats,
|
||||
struct dentry *parent)
|
||||
{
|
||||
return ocfs2_blockcheck_debug_install(stats, parent);
|
||||
ocfs2_blockcheck_debug_install(stats, parent);
|
||||
}
|
||||
|
||||
void ocfs2_blockcheck_stats_debugfs_remove(struct ocfs2_blockcheck_stats *stats)
|
||||
|
|
|
@ -25,9 +25,6 @@ struct ocfs2_blockcheck_stats {
|
|||
* ocfs2_blockcheck_stats_debugfs_install()
|
||||
*/
|
||||
struct dentry *b_debug_dir; /* Parent of the debugfs files */
|
||||
struct dentry *b_debug_check; /* Exposes b_check_count */
|
||||
struct dentry *b_debug_failure; /* Exposes b_failure_count */
|
||||
struct dentry *b_debug_recover; /* Exposes b_recover_count */
|
||||
};
|
||||
|
||||
|
||||
|
@ -56,8 +53,8 @@ int ocfs2_block_check_validate_bhs(struct buffer_head **bhs, int nr,
|
|||
struct ocfs2_blockcheck_stats *stats);
|
||||
|
||||
/* Debug Initialization */
|
||||
int ocfs2_blockcheck_stats_debugfs_install(struct ocfs2_blockcheck_stats *stats,
|
||||
struct dentry *parent);
|
||||
void ocfs2_blockcheck_stats_debugfs_install(struct ocfs2_blockcheck_stats *stats,
|
||||
struct dentry *parent);
|
||||
void ocfs2_blockcheck_stats_debugfs_remove(struct ocfs2_blockcheck_stats *stats);
|
||||
|
||||
/*
|
||||
|
|
|
@ -92,10 +92,6 @@ static struct o2hb_debug_buf *o2hb_db_failedregions;
|
|||
#define O2HB_DEBUG_REGION_PINNED "pinned"
|
||||
|
||||
static struct dentry *o2hb_debug_dir;
|
||||
static struct dentry *o2hb_debug_livenodes;
|
||||
static struct dentry *o2hb_debug_liveregions;
|
||||
static struct dentry *o2hb_debug_quorumregions;
|
||||
static struct dentry *o2hb_debug_failedregions;
|
||||
|
||||
static LIST_HEAD(o2hb_all_regions);
|
||||
|
||||
|
@ -1184,7 +1180,7 @@ bail:
|
|||
if (atomic_read(®->hr_steady_iterations) != 0) {
|
||||
if (atomic_dec_and_test(®->hr_unsteady_iterations)) {
|
||||
printk(KERN_NOTICE "o2hb: Unable to stabilize "
|
||||
"heartbeart on region %s (%s)\n",
|
||||
"heartbeat on region %s (%s)\n",
|
||||
config_item_name(®->hr_item),
|
||||
reg->hr_dev_name);
|
||||
atomic_set(®->hr_steady_iterations, 0);
|
||||
|
@ -1391,11 +1387,7 @@ static const struct file_operations o2hb_debug_fops = {
|
|||
|
||||
void o2hb_exit(void)
|
||||
{
|
||||
debugfs_remove(o2hb_debug_failedregions);
|
||||
debugfs_remove(o2hb_debug_quorumregions);
|
||||
debugfs_remove(o2hb_debug_liveregions);
|
||||
debugfs_remove(o2hb_debug_livenodes);
|
||||
debugfs_remove(o2hb_debug_dir);
|
||||
debugfs_remove_recursive(o2hb_debug_dir);
|
||||
kfree(o2hb_db_livenodes);
|
||||
kfree(o2hb_db_liveregions);
|
||||
kfree(o2hb_db_quorumregions);
|
||||
|
@ -1419,79 +1411,37 @@ static struct dentry *o2hb_debug_create(const char *name, struct dentry *dir,
|
|||
&o2hb_debug_fops);
|
||||
}
|
||||
|
||||
static int o2hb_debug_init(void)
|
||||
static void o2hb_debug_init(void)
|
||||
{
|
||||
int ret = -ENOMEM;
|
||||
|
||||
o2hb_debug_dir = debugfs_create_dir(O2HB_DEBUG_DIR, NULL);
|
||||
if (!o2hb_debug_dir) {
|
||||
mlog_errno(ret);
|
||||
goto bail;
|
||||
}
|
||||
|
||||
o2hb_debug_livenodes = o2hb_debug_create(O2HB_DEBUG_LIVENODES,
|
||||
o2hb_debug_dir,
|
||||
&o2hb_db_livenodes,
|
||||
sizeof(*o2hb_db_livenodes),
|
||||
O2HB_DB_TYPE_LIVENODES,
|
||||
sizeof(o2hb_live_node_bitmap),
|
||||
O2NM_MAX_NODES,
|
||||
o2hb_live_node_bitmap);
|
||||
if (!o2hb_debug_livenodes) {
|
||||
mlog_errno(ret);
|
||||
goto bail;
|
||||
}
|
||||
o2hb_debug_create(O2HB_DEBUG_LIVENODES, o2hb_debug_dir,
|
||||
&o2hb_db_livenodes, sizeof(*o2hb_db_livenodes),
|
||||
O2HB_DB_TYPE_LIVENODES, sizeof(o2hb_live_node_bitmap),
|
||||
O2NM_MAX_NODES, o2hb_live_node_bitmap);
|
||||
|
||||
o2hb_debug_liveregions = o2hb_debug_create(O2HB_DEBUG_LIVEREGIONS,
|
||||
o2hb_debug_dir,
|
||||
&o2hb_db_liveregions,
|
||||
sizeof(*o2hb_db_liveregions),
|
||||
O2HB_DB_TYPE_LIVEREGIONS,
|
||||
sizeof(o2hb_live_region_bitmap),
|
||||
O2NM_MAX_REGIONS,
|
||||
o2hb_live_region_bitmap);
|
||||
if (!o2hb_debug_liveregions) {
|
||||
mlog_errno(ret);
|
||||
goto bail;
|
||||
}
|
||||
o2hb_debug_create(O2HB_DEBUG_LIVEREGIONS, o2hb_debug_dir,
|
||||
&o2hb_db_liveregions, sizeof(*o2hb_db_liveregions),
|
||||
O2HB_DB_TYPE_LIVEREGIONS,
|
||||
sizeof(o2hb_live_region_bitmap), O2NM_MAX_REGIONS,
|
||||
o2hb_live_region_bitmap);
|
||||
|
||||
o2hb_debug_quorumregions =
|
||||
o2hb_debug_create(O2HB_DEBUG_QUORUMREGIONS,
|
||||
o2hb_debug_dir,
|
||||
&o2hb_db_quorumregions,
|
||||
sizeof(*o2hb_db_quorumregions),
|
||||
O2HB_DB_TYPE_QUORUMREGIONS,
|
||||
sizeof(o2hb_quorum_region_bitmap),
|
||||
O2NM_MAX_REGIONS,
|
||||
o2hb_quorum_region_bitmap);
|
||||
if (!o2hb_debug_quorumregions) {
|
||||
mlog_errno(ret);
|
||||
goto bail;
|
||||
}
|
||||
o2hb_debug_create(O2HB_DEBUG_QUORUMREGIONS, o2hb_debug_dir,
|
||||
&o2hb_db_quorumregions,
|
||||
sizeof(*o2hb_db_quorumregions),
|
||||
O2HB_DB_TYPE_QUORUMREGIONS,
|
||||
sizeof(o2hb_quorum_region_bitmap), O2NM_MAX_REGIONS,
|
||||
o2hb_quorum_region_bitmap);
|
||||
|
||||
o2hb_debug_failedregions =
|
||||
o2hb_debug_create(O2HB_DEBUG_FAILEDREGIONS,
|
||||
o2hb_debug_dir,
|
||||
&o2hb_db_failedregions,
|
||||
sizeof(*o2hb_db_failedregions),
|
||||
O2HB_DB_TYPE_FAILEDREGIONS,
|
||||
sizeof(o2hb_failed_region_bitmap),
|
||||
O2NM_MAX_REGIONS,
|
||||
o2hb_failed_region_bitmap);
|
||||
if (!o2hb_debug_failedregions) {
|
||||
mlog_errno(ret);
|
||||
goto bail;
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
bail:
|
||||
if (ret)
|
||||
o2hb_exit();
|
||||
|
||||
return ret;
|
||||
o2hb_debug_create(O2HB_DEBUG_FAILEDREGIONS, o2hb_debug_dir,
|
||||
&o2hb_db_failedregions,
|
||||
sizeof(*o2hb_db_failedregions),
|
||||
O2HB_DB_TYPE_FAILEDREGIONS,
|
||||
sizeof(o2hb_failed_region_bitmap), O2NM_MAX_REGIONS,
|
||||
o2hb_failed_region_bitmap);
|
||||
}
|
||||
|
||||
int o2hb_init(void)
|
||||
void o2hb_init(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
|
@ -1511,7 +1461,7 @@ int o2hb_init(void)
|
|||
|
||||
o2hb_dependent_users = 0;
|
||||
|
||||
return o2hb_debug_init();
|
||||
o2hb_debug_init();
|
||||
}
|
||||
|
||||
/* if we're already in a callback then we're already serialized by the sem */
|
||||
|
|
|
@ -63,7 +63,7 @@ void o2hb_unregister_callback(const char *region_uuid,
|
|||
void o2hb_fill_node_map(unsigned long *map,
|
||||
unsigned bytes);
|
||||
void o2hb_exit(void);
|
||||
int o2hb_init(void);
|
||||
void o2hb_init(void);
|
||||
int o2hb_check_node_heartbeating_no_sem(u8 node_num);
|
||||
int o2hb_check_node_heartbeating_from_callback(u8 node_num);
|
||||
void o2hb_stop_all_regions(void);
|
||||
|
|
|
@ -38,10 +38,6 @@
|
|||
#define SHOW_SOCK_STATS 1
|
||||
|
||||
static struct dentry *o2net_dentry;
|
||||
static struct dentry *sc_dentry;
|
||||
static struct dentry *nst_dentry;
|
||||
static struct dentry *stats_dentry;
|
||||
static struct dentry *nodes_dentry;
|
||||
|
||||
static DEFINE_SPINLOCK(o2net_debug_lock);
|
||||
|
||||
|
@ -490,36 +486,23 @@ static const struct file_operations nodes_fops = {
|
|||
|
||||
void o2net_debugfs_exit(void)
|
||||
{
|
||||
debugfs_remove(nodes_dentry);
|
||||
debugfs_remove(stats_dentry);
|
||||
debugfs_remove(sc_dentry);
|
||||
debugfs_remove(nst_dentry);
|
||||
debugfs_remove(o2net_dentry);
|
||||
debugfs_remove_recursive(o2net_dentry);
|
||||
}
|
||||
|
||||
int o2net_debugfs_init(void)
|
||||
void o2net_debugfs_init(void)
|
||||
{
|
||||
umode_t mode = S_IFREG|S_IRUSR;
|
||||
|
||||
o2net_dentry = debugfs_create_dir(O2NET_DEBUG_DIR, NULL);
|
||||
if (o2net_dentry)
|
||||
nst_dentry = debugfs_create_file(NST_DEBUG_NAME, mode,
|
||||
o2net_dentry, NULL, &nst_seq_fops);
|
||||
if (nst_dentry)
|
||||
sc_dentry = debugfs_create_file(SC_DEBUG_NAME, mode,
|
||||
o2net_dentry, NULL, &sc_seq_fops);
|
||||
if (sc_dentry)
|
||||
stats_dentry = debugfs_create_file(STATS_DEBUG_NAME, mode,
|
||||
o2net_dentry, NULL, &stats_seq_fops);
|
||||
if (stats_dentry)
|
||||
nodes_dentry = debugfs_create_file(NODES_DEBUG_NAME, mode,
|
||||
o2net_dentry, NULL, &nodes_fops);
|
||||
if (nodes_dentry)
|
||||
return 0;
|
||||
|
||||
o2net_debugfs_exit();
|
||||
mlog_errno(-ENOMEM);
|
||||
return -ENOMEM;
|
||||
debugfs_create_file(NST_DEBUG_NAME, mode, o2net_dentry, NULL,
|
||||
&nst_seq_fops);
|
||||
debugfs_create_file(SC_DEBUG_NAME, mode, o2net_dentry, NULL,
|
||||
&sc_seq_fops);
|
||||
debugfs_create_file(STATS_DEBUG_NAME, mode, o2net_dentry, NULL,
|
||||
&stats_seq_fops);
|
||||
debugfs_create_file(NODES_DEBUG_NAME, mode, o2net_dentry, NULL,
|
||||
&nodes_fops);
|
||||
}
|
||||
|
||||
#endif /* CONFIG_DEBUG_FS */
|
||||
|
|
|
@ -828,9 +828,7 @@ static int __init init_o2nm(void)
|
|||
{
|
||||
int ret = -1;
|
||||
|
||||
ret = o2hb_init();
|
||||
if (ret)
|
||||
goto out;
|
||||
o2hb_init();
|
||||
|
||||
ret = o2net_init();
|
||||
if (ret)
|
||||
|
|
|
@ -76,7 +76,7 @@ static void o2quo_fence_self(void)
|
|||
};
|
||||
}
|
||||
|
||||
/* Indicate that a timeout occurred on a hearbeat region write. The
|
||||
/* Indicate that a timeout occurred on a heartbeat region write. The
|
||||
* other nodes in the cluster may consider us dead at that time so we
|
||||
* want to "fence" ourselves so that we don't scribble on the disk
|
||||
* after they think they've recovered us. This can't solve all
|
||||
|
|
|
@ -1762,7 +1762,7 @@ static void o2net_hb_node_up_cb(struct o2nm_node *node, int node_num,
|
|||
(msecs_to_jiffies(o2net_reconnect_delay()) + 1);
|
||||
|
||||
if (node_num != o2nm_this_node()) {
|
||||
/* believe it or not, accept and node hearbeating testing
|
||||
/* believe it or not, accept and node heartbeating testing
|
||||
* can succeed for this node before we got here.. so
|
||||
* only use set_nn_state to clear the persistent error
|
||||
* if that hasn't already happened */
|
||||
|
@ -2129,8 +2129,7 @@ int o2net_init(void)
|
|||
|
||||
o2quo_init();
|
||||
|
||||
if (o2net_debugfs_init())
|
||||
goto out;
|
||||
o2net_debugfs_init();
|
||||
|
||||
o2net_hand = kzalloc(sizeof(struct o2net_handshake), GFP_KERNEL);
|
||||
o2net_keep_req = kzalloc(sizeof(struct o2net_msg), GFP_KERNEL);
|
||||
|
|
|
@ -109,16 +109,15 @@ struct o2net_send_tracking;
|
|||
struct o2net_sock_container;
|
||||
|
||||
#ifdef CONFIG_DEBUG_FS
|
||||
int o2net_debugfs_init(void);
|
||||
void o2net_debugfs_init(void);
|
||||
void o2net_debugfs_exit(void);
|
||||
void o2net_debug_add_nst(struct o2net_send_tracking *nst);
|
||||
void o2net_debug_del_nst(struct o2net_send_tracking *nst);
|
||||
void o2net_debug_add_sc(struct o2net_sock_container *sc);
|
||||
void o2net_debug_del_sc(struct o2net_sock_container *sc);
|
||||
#else
|
||||
static inline int o2net_debugfs_init(void)
|
||||
static inline void o2net_debugfs_init(void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
static inline void o2net_debugfs_exit(void)
|
||||
{
|
||||
|
|
|
@ -851,7 +851,7 @@ static const struct file_operations debug_state_fops = {
|
|||
/* end - debug state funcs */
|
||||
|
||||
/* files in subroot */
|
||||
int dlm_debug_init(struct dlm_ctxt *dlm)
|
||||
void dlm_debug_init(struct dlm_ctxt *dlm)
|
||||
{
|
||||
struct dlm_debug_ctxt *dc = dlm->dlm_debug_ctxt;
|
||||
|
||||
|
@ -860,10 +860,6 @@ int dlm_debug_init(struct dlm_ctxt *dlm)
|
|||
S_IFREG|S_IRUSR,
|
||||
dlm->dlm_debugfs_subroot,
|
||||
dlm, &debug_state_fops);
|
||||
if (!dc->debug_state_dentry) {
|
||||
mlog_errno(-ENOMEM);
|
||||
goto bail;
|
||||
}
|
||||
|
||||
/* for dumping lockres */
|
||||
dc->debug_lockres_dentry =
|
||||
|
@ -871,20 +867,12 @@ int dlm_debug_init(struct dlm_ctxt *dlm)
|
|||
S_IFREG|S_IRUSR,
|
||||
dlm->dlm_debugfs_subroot,
|
||||
dlm, &debug_lockres_fops);
|
||||
if (!dc->debug_lockres_dentry) {
|
||||
mlog_errno(-ENOMEM);
|
||||
goto bail;
|
||||
}
|
||||
|
||||
/* for dumping mles */
|
||||
dc->debug_mle_dentry = debugfs_create_file(DLM_DEBUGFS_MLE_STATE,
|
||||
S_IFREG|S_IRUSR,
|
||||
dlm->dlm_debugfs_subroot,
|
||||
dlm, &debug_mle_fops);
|
||||
if (!dc->debug_mle_dentry) {
|
||||
mlog_errno(-ENOMEM);
|
||||
goto bail;
|
||||
}
|
||||
|
||||
/* for dumping lockres on the purge list */
|
||||
dc->debug_purgelist_dentry =
|
||||
|
@ -892,15 +880,6 @@ int dlm_debug_init(struct dlm_ctxt *dlm)
|
|||
S_IFREG|S_IRUSR,
|
||||
dlm->dlm_debugfs_subroot,
|
||||
dlm, &debug_purgelist_fops);
|
||||
if (!dc->debug_purgelist_dentry) {
|
||||
mlog_errno(-ENOMEM);
|
||||
goto bail;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
bail:
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
void dlm_debug_shutdown(struct dlm_ctxt *dlm)
|
||||
|
@ -920,24 +899,16 @@ void dlm_debug_shutdown(struct dlm_ctxt *dlm)
|
|||
/* subroot - domain dir */
|
||||
int dlm_create_debugfs_subroot(struct dlm_ctxt *dlm)
|
||||
{
|
||||
dlm->dlm_debugfs_subroot = debugfs_create_dir(dlm->name,
|
||||
dlm_debugfs_root);
|
||||
if (!dlm->dlm_debugfs_subroot) {
|
||||
mlog_errno(-ENOMEM);
|
||||
goto bail;
|
||||
}
|
||||
|
||||
dlm->dlm_debug_ctxt = kzalloc(sizeof(struct dlm_debug_ctxt),
|
||||
GFP_KERNEL);
|
||||
if (!dlm->dlm_debug_ctxt) {
|
||||
mlog_errno(-ENOMEM);
|
||||
goto bail;
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
dlm->dlm_debugfs_subroot = debugfs_create_dir(dlm->name,
|
||||
dlm_debugfs_root);
|
||||
return 0;
|
||||
bail:
|
||||
dlm_destroy_debugfs_subroot(dlm);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
void dlm_destroy_debugfs_subroot(struct dlm_ctxt *dlm)
|
||||
|
@ -946,14 +917,9 @@ void dlm_destroy_debugfs_subroot(struct dlm_ctxt *dlm)
|
|||
}
|
||||
|
||||
/* debugfs root */
|
||||
int dlm_create_debugfs_root(void)
|
||||
void dlm_create_debugfs_root(void)
|
||||
{
|
||||
dlm_debugfs_root = debugfs_create_dir(DLM_DEBUGFS_DIR, NULL);
|
||||
if (!dlm_debugfs_root) {
|
||||
mlog_errno(-ENOMEM);
|
||||
return -ENOMEM;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void dlm_destroy_debugfs_root(void)
|
||||
|
|
|
@ -28,20 +28,19 @@ struct debug_lockres {
|
|||
struct dlm_lock_resource *dl_res;
|
||||
};
|
||||
|
||||
int dlm_debug_init(struct dlm_ctxt *dlm);
|
||||
void dlm_debug_init(struct dlm_ctxt *dlm);
|
||||
void dlm_debug_shutdown(struct dlm_ctxt *dlm);
|
||||
|
||||
int dlm_create_debugfs_subroot(struct dlm_ctxt *dlm);
|
||||
void dlm_destroy_debugfs_subroot(struct dlm_ctxt *dlm);
|
||||
|
||||
int dlm_create_debugfs_root(void);
|
||||
void dlm_create_debugfs_root(void);
|
||||
void dlm_destroy_debugfs_root(void);
|
||||
|
||||
#else
|
||||
|
||||
static inline int dlm_debug_init(struct dlm_ctxt *dlm)
|
||||
static inline void dlm_debug_init(struct dlm_ctxt *dlm)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
static inline void dlm_debug_shutdown(struct dlm_ctxt *dlm)
|
||||
{
|
||||
|
@ -53,9 +52,8 @@ static inline int dlm_create_debugfs_subroot(struct dlm_ctxt *dlm)
|
|||
static inline void dlm_destroy_debugfs_subroot(struct dlm_ctxt *dlm)
|
||||
{
|
||||
}
|
||||
static inline int dlm_create_debugfs_root(void)
|
||||
static inline void dlm_create_debugfs_root(void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
static inline void dlm_destroy_debugfs_root(void)
|
||||
{
|
||||
|
|
|
@ -1881,11 +1881,7 @@ static int dlm_join_domain(struct dlm_ctxt *dlm)
|
|||
goto bail;
|
||||
}
|
||||
|
||||
status = dlm_debug_init(dlm);
|
||||
if (status < 0) {
|
||||
mlog_errno(status);
|
||||
goto bail;
|
||||
}
|
||||
dlm_debug_init(dlm);
|
||||
|
||||
snprintf(wq_name, O2NM_MAX_NAME_LEN, "dlm_wq-%s", dlm->name);
|
||||
dlm->dlm_worker = alloc_workqueue(wq_name, WQ_MEM_RECLAIM, 0);
|
||||
|
@ -2346,9 +2342,7 @@ static int __init dlm_init(void)
|
|||
goto error;
|
||||
}
|
||||
|
||||
status = dlm_create_debugfs_root();
|
||||
if (status)
|
||||
goto error;
|
||||
dlm_create_debugfs_root();
|
||||
|
||||
return 0;
|
||||
error:
|
||||
|
|
|
@ -2161,7 +2161,7 @@ put:
|
|||
* think that $RECOVERY is currently mastered by a dead node. If so,
|
||||
* we wait a short time to allow that node to get notified by its own
|
||||
* heartbeat stack, then check again. All $RECOVERY lock resources
|
||||
* mastered by dead nodes are purged when the hearbeat callback is
|
||||
* mastered by dead nodes are purged when the heartbeat callback is
|
||||
* fired, so we can know for sure that it is safe to continue once
|
||||
* the node returns a live node or no node. */
|
||||
static int dlm_pre_master_reco_lockres(struct dlm_ctxt *dlm,
|
||||
|
|
|
@ -1109,7 +1109,7 @@ static int dlm_send_mig_lockres_msg(struct dlm_ctxt *dlm,
|
|||
{
|
||||
u64 mig_cookie = be64_to_cpu(mres->mig_cookie);
|
||||
int mres_total_locks = be32_to_cpu(mres->total_locks);
|
||||
int sz, ret = 0, status = 0;
|
||||
int ret = 0, status = 0;
|
||||
u8 orig_flags = mres->flags,
|
||||
orig_master = mres->master;
|
||||
|
||||
|
@ -1117,9 +1117,6 @@ static int dlm_send_mig_lockres_msg(struct dlm_ctxt *dlm,
|
|||
if (!mres->num_locks)
|
||||
return 0;
|
||||
|
||||
sz = sizeof(struct dlm_migratable_lockres) +
|
||||
(mres->num_locks * sizeof(struct dlm_migratable_lock));
|
||||
|
||||
/* add an all-done flag if we reached the last lock */
|
||||
orig_flags = mres->flags;
|
||||
BUG_ON(total_locks > mres_total_locks);
|
||||
|
@ -1133,7 +1130,8 @@ static int dlm_send_mig_lockres_msg(struct dlm_ctxt *dlm,
|
|||
|
||||
/* send it */
|
||||
ret = o2net_send_message(DLM_MIG_LOCKRES_MSG, dlm->key, mres,
|
||||
sz, send_to, &status);
|
||||
struct_size(mres, ml, mres->num_locks),
|
||||
send_to, &status);
|
||||
if (ret < 0) {
|
||||
/* XXX: negative status is not handled.
|
||||
* this will end up killing this node. */
|
||||
|
|
|
@ -426,6 +426,7 @@ static void ocfs2_remove_lockres_tracking(struct ocfs2_lock_res *res)
|
|||
static void ocfs2_init_lock_stats(struct ocfs2_lock_res *res)
|
||||
{
|
||||
res->l_lock_refresh = 0;
|
||||
res->l_lock_wait = 0;
|
||||
memset(&res->l_lock_prmode, 0, sizeof(struct ocfs2_lock_stats));
|
||||
memset(&res->l_lock_exmode, 0, sizeof(struct ocfs2_lock_stats));
|
||||
}
|
||||
|
@ -460,6 +461,8 @@ static void ocfs2_update_lock_stats(struct ocfs2_lock_res *res, int level,
|
|||
|
||||
if (ret)
|
||||
stats->ls_fail++;
|
||||
|
||||
stats->ls_last = ktime_to_us(ktime_get_real());
|
||||
}
|
||||
|
||||
static inline void ocfs2_track_lock_refresh(struct ocfs2_lock_res *lockres)
|
||||
|
@ -467,6 +470,21 @@ static inline void ocfs2_track_lock_refresh(struct ocfs2_lock_res *lockres)
|
|||
lockres->l_lock_refresh++;
|
||||
}
|
||||
|
||||
static inline void ocfs2_track_lock_wait(struct ocfs2_lock_res *lockres)
|
||||
{
|
||||
struct ocfs2_mask_waiter *mw;
|
||||
|
||||
if (list_empty(&lockres->l_mask_waiters)) {
|
||||
lockres->l_lock_wait = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
mw = list_first_entry(&lockres->l_mask_waiters,
|
||||
struct ocfs2_mask_waiter, mw_item);
|
||||
lockres->l_lock_wait =
|
||||
ktime_to_us(ktime_mono_to_real(mw->mw_lock_start));
|
||||
}
|
||||
|
||||
static inline void ocfs2_init_start_time(struct ocfs2_mask_waiter *mw)
|
||||
{
|
||||
mw->mw_lock_start = ktime_get();
|
||||
|
@ -482,6 +500,9 @@ static inline void ocfs2_update_lock_stats(struct ocfs2_lock_res *res,
|
|||
static inline void ocfs2_track_lock_refresh(struct ocfs2_lock_res *lockres)
|
||||
{
|
||||
}
|
||||
static inline void ocfs2_track_lock_wait(struct ocfs2_lock_res *lockres)
|
||||
{
|
||||
}
|
||||
static inline void ocfs2_init_start_time(struct ocfs2_mask_waiter *mw)
|
||||
{
|
||||
}
|
||||
|
@ -875,6 +896,7 @@ static void lockres_set_flags(struct ocfs2_lock_res *lockres,
|
|||
list_del_init(&mw->mw_item);
|
||||
mw->mw_status = 0;
|
||||
complete(&mw->mw_complete);
|
||||
ocfs2_track_lock_wait(lockres);
|
||||
}
|
||||
}
|
||||
static void lockres_or_flags(struct ocfs2_lock_res *lockres, unsigned long or)
|
||||
|
@ -1386,6 +1408,7 @@ static void lockres_add_mask_waiter(struct ocfs2_lock_res *lockres,
|
|||
list_add_tail(&mw->mw_item, &lockres->l_mask_waiters);
|
||||
mw->mw_mask = mask;
|
||||
mw->mw_goal = goal;
|
||||
ocfs2_track_lock_wait(lockres);
|
||||
}
|
||||
|
||||
/* returns 0 if the mw that was removed was already satisfied, -EBUSY
|
||||
|
@ -1402,6 +1425,7 @@ static int __lockres_remove_mask_waiter(struct ocfs2_lock_res *lockres,
|
|||
|
||||
list_del_init(&mw->mw_item);
|
||||
init_completion(&mw->mw_complete);
|
||||
ocfs2_track_lock_wait(lockres);
|
||||
}
|
||||
|
||||
return ret;
|
||||
|
@ -2989,6 +3013,8 @@ struct ocfs2_dlm_debug *ocfs2_new_dlm_debug(void)
|
|||
kref_init(&dlm_debug->d_refcnt);
|
||||
INIT_LIST_HEAD(&dlm_debug->d_lockres_tracking);
|
||||
dlm_debug->d_locking_state = NULL;
|
||||
dlm_debug->d_locking_filter = NULL;
|
||||
dlm_debug->d_filter_secs = 0;
|
||||
out:
|
||||
return dlm_debug;
|
||||
}
|
||||
|
@ -3079,17 +3105,43 @@ static void *ocfs2_dlm_seq_next(struct seq_file *m, void *v, loff_t *pos)
|
|||
* - Lock stats printed
|
||||
* New in version 3
|
||||
* - Max time in lock stats is in usecs (instead of nsecs)
|
||||
* New in version 4
|
||||
* - Add last pr/ex unlock times and first lock wait time in usecs
|
||||
*/
|
||||
#define OCFS2_DLM_DEBUG_STR_VERSION 3
|
||||
#define OCFS2_DLM_DEBUG_STR_VERSION 4
|
||||
static int ocfs2_dlm_seq_show(struct seq_file *m, void *v)
|
||||
{
|
||||
int i;
|
||||
char *lvb;
|
||||
struct ocfs2_lock_res *lockres = v;
|
||||
#ifdef CONFIG_OCFS2_FS_STATS
|
||||
u64 now, last;
|
||||
struct ocfs2_dlm_debug *dlm_debug =
|
||||
((struct ocfs2_dlm_seq_priv *)m->private)->p_dlm_debug;
|
||||
#endif
|
||||
|
||||
if (!lockres)
|
||||
return -EINVAL;
|
||||
|
||||
#ifdef CONFIG_OCFS2_FS_STATS
|
||||
if (!lockres->l_lock_wait && dlm_debug->d_filter_secs) {
|
||||
now = ktime_to_us(ktime_get_real());
|
||||
if (lockres->l_lock_prmode.ls_last >
|
||||
lockres->l_lock_exmode.ls_last)
|
||||
last = lockres->l_lock_prmode.ls_last;
|
||||
else
|
||||
last = lockres->l_lock_exmode.ls_last;
|
||||
/*
|
||||
* Use d_filter_secs field to filter lock resources dump,
|
||||
* the default d_filter_secs(0) value filters nothing,
|
||||
* otherwise, only dump the last N seconds active lock
|
||||
* resources.
|
||||
*/
|
||||
if (div_u64(now - last, 1000000) > dlm_debug->d_filter_secs)
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
seq_printf(m, "0x%x\t", OCFS2_DLM_DEBUG_STR_VERSION);
|
||||
|
||||
if (lockres->l_type == OCFS2_LOCK_TYPE_DENTRY)
|
||||
|
@ -3131,6 +3183,9 @@ static int ocfs2_dlm_seq_show(struct seq_file *m, void *v)
|
|||
# define lock_max_prmode(_l) ((_l)->l_lock_prmode.ls_max)
|
||||
# define lock_max_exmode(_l) ((_l)->l_lock_exmode.ls_max)
|
||||
# define lock_refresh(_l) ((_l)->l_lock_refresh)
|
||||
# define lock_last_prmode(_l) ((_l)->l_lock_prmode.ls_last)
|
||||
# define lock_last_exmode(_l) ((_l)->l_lock_exmode.ls_last)
|
||||
# define lock_wait(_l) ((_l)->l_lock_wait)
|
||||
#else
|
||||
# define lock_num_prmode(_l) (0)
|
||||
# define lock_num_exmode(_l) (0)
|
||||
|
@ -3141,6 +3196,9 @@ static int ocfs2_dlm_seq_show(struct seq_file *m, void *v)
|
|||
# define lock_max_prmode(_l) (0)
|
||||
# define lock_max_exmode(_l) (0)
|
||||
# define lock_refresh(_l) (0)
|
||||
# define lock_last_prmode(_l) (0ULL)
|
||||
# define lock_last_exmode(_l) (0ULL)
|
||||
# define lock_wait(_l) (0ULL)
|
||||
#endif
|
||||
/* The following seq_print was added in version 2 of this output */
|
||||
seq_printf(m, "%u\t"
|
||||
|
@ -3151,7 +3209,10 @@ static int ocfs2_dlm_seq_show(struct seq_file *m, void *v)
|
|||
"%llu\t"
|
||||
"%u\t"
|
||||
"%u\t"
|
||||
"%u\t",
|
||||
"%u\t"
|
||||
"%llu\t"
|
||||
"%llu\t"
|
||||
"%llu\t",
|
||||
lock_num_prmode(lockres),
|
||||
lock_num_exmode(lockres),
|
||||
lock_num_prmode_failed(lockres),
|
||||
|
@ -3160,7 +3221,10 @@ static int ocfs2_dlm_seq_show(struct seq_file *m, void *v)
|
|||
lock_total_exmode(lockres),
|
||||
lock_max_prmode(lockres),
|
||||
lock_max_exmode(lockres),
|
||||
lock_refresh(lockres));
|
||||
lock_refresh(lockres),
|
||||
lock_last_prmode(lockres),
|
||||
lock_last_exmode(lockres),
|
||||
lock_wait(lockres));
|
||||
|
||||
/* End the line */
|
||||
seq_printf(m, "\n");
|
||||
|
@ -3214,9 +3278,8 @@ static const struct file_operations ocfs2_dlm_debug_fops = {
|
|||
.llseek = seq_lseek,
|
||||
};
|
||||
|
||||
static int ocfs2_dlm_init_debug(struct ocfs2_super *osb)
|
||||
static void ocfs2_dlm_init_debug(struct ocfs2_super *osb)
|
||||
{
|
||||
int ret = 0;
|
||||
struct ocfs2_dlm_debug *dlm_debug = osb->osb_dlm_debug;
|
||||
|
||||
dlm_debug->d_locking_state = debugfs_create_file("locking_state",
|
||||
|
@ -3224,16 +3287,11 @@ static int ocfs2_dlm_init_debug(struct ocfs2_super *osb)
|
|||
osb->osb_debug_root,
|
||||
osb,
|
||||
&ocfs2_dlm_debug_fops);
|
||||
if (!dlm_debug->d_locking_state) {
|
||||
ret = -EINVAL;
|
||||
mlog(ML_ERROR,
|
||||
"Unable to create locking state debugfs file.\n");
|
||||
goto out;
|
||||
}
|
||||
|
||||
ocfs2_get_dlm_debug(dlm_debug);
|
||||
out:
|
||||
return ret;
|
||||
dlm_debug->d_locking_filter = debugfs_create_u32("locking_filter",
|
||||
0600,
|
||||
osb->osb_debug_root,
|
||||
&dlm_debug->d_filter_secs);
|
||||
}
|
||||
|
||||
static void ocfs2_dlm_shutdown_debug(struct ocfs2_super *osb)
|
||||
|
@ -3242,6 +3300,7 @@ static void ocfs2_dlm_shutdown_debug(struct ocfs2_super *osb)
|
|||
|
||||
if (dlm_debug) {
|
||||
debugfs_remove(dlm_debug->d_locking_state);
|
||||
debugfs_remove(dlm_debug->d_locking_filter);
|
||||
ocfs2_put_dlm_debug(dlm_debug);
|
||||
}
|
||||
}
|
||||
|
@ -3256,11 +3315,7 @@ int ocfs2_dlm_init(struct ocfs2_super *osb)
|
|||
goto local;
|
||||
}
|
||||
|
||||
status = ocfs2_dlm_init_debug(osb);
|
||||
if (status < 0) {
|
||||
mlog_errno(status);
|
||||
goto bail;
|
||||
}
|
||||
ocfs2_dlm_init_debug(osb);
|
||||
|
||||
/* launch downconvert thread */
|
||||
osb->dc_task = kthread_run(ocfs2_downconvert_thread, osb, "ocfs2dc-%s",
|
||||
|
@ -4352,7 +4407,6 @@ static int ocfs2_downconvert_thread_should_wake(struct ocfs2_super *osb)
|
|||
|
||||
static int ocfs2_downconvert_thread(void *arg)
|
||||
{
|
||||
int status = 0;
|
||||
struct ocfs2_super *osb = arg;
|
||||
|
||||
/* only quit once we've been asked to stop and there is no more
|
||||
|
@ -4370,7 +4424,7 @@ static int ocfs2_downconvert_thread(void *arg)
|
|||
}
|
||||
|
||||
osb->dc_task = NULL;
|
||||
return status;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void ocfs2_wake_downconvert_thread(struct ocfs2_super *osb)
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue