x86/mm: Define virtual memory map for 5-level paging

The first part of memory map (up to %esp fixup) simply scales existing
map for 4-level paging by factor of 9 -- number of bits addressed by
the additional page table level.

The rest of the map is unchanged.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-arch@vger.kernel.org
Cc: linux-mm@kvack.org
Link: http://lkml.kernel.org/r/20170330080731.65421-4-kirill.shutemov@linux.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
Kirill A. Shutemov 2017-03-30 11:07:27 +03:00 committed by Ingo Molnar
parent 361b4b58ec
commit 4c7c44837b
6 changed files with 60 additions and 8 deletions

View File

@ -4,7 +4,7 @@
Virtual memory map with 4 level page tables: Virtual memory map with 4 level page tables:
0000000000000000 - 00007fffffffffff (=47 bits) user space, different per mm 0000000000000000 - 00007fffffffffff (=47 bits) user space, different per mm
hole caused by [48:63] sign extension hole caused by [47:63] sign extension
ffff800000000000 - ffff87ffffffffff (=43 bits) guard hole, reserved for hypervisor ffff800000000000 - ffff87ffffffffff (=43 bits) guard hole, reserved for hypervisor
ffff880000000000 - ffffc7ffffffffff (=64 TB) direct mapping of all phys. memory ffff880000000000 - ffffc7ffffffffff (=64 TB) direct mapping of all phys. memory
ffffc80000000000 - ffffc8ffffffffff (=40 bits) hole ffffc80000000000 - ffffc8ffffffffff (=40 bits) hole
@ -23,12 +23,39 @@ ffffffffa0000000 - ffffffffff5fffff (=1526 MB) module mapping space (variable)
ffffffffff600000 - ffffffffffdfffff (=8 MB) vsyscalls ffffffffff600000 - ffffffffffdfffff (=8 MB) vsyscalls
ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole
Virtual memory map with 5 level page tables:
0000000000000000 - 00ffffffffffffff (=56 bits) user space, different per mm
hole caused by [56:63] sign extension
ff00000000000000 - ff0fffffffffffff (=52 bits) guard hole, reserved for hypervisor
ff10000000000000 - ff8fffffffffffff (=55 bits) direct mapping of all phys. memory
ff90000000000000 - ff91ffffffffffff (=49 bits) hole
ff92000000000000 - ffd1ffffffffffff (=54 bits) vmalloc/ioremap space
ffd2000000000000 - ffd3ffffffffffff (=49 bits) hole
ffd4000000000000 - ffd5ffffffffffff (=49 bits) virtual memory map (512TB)
... unused hole ...
ffd8000000000000 - fff7ffffffffffff (=53 bits) kasan shadow memory (8PB)
... unused hole ...
ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks
... unused hole ...
ffffffef00000000 - fffffffeffffffff (=64 GB) EFI region mapping space
... unused hole ...
ffffffff80000000 - ffffffff9fffffff (=512 MB) kernel text mapping, from phys 0
ffffffffa0000000 - ffffffffff5fffff (=1526 MB) module mapping space
ffffffffff600000 - ffffffffffdfffff (=8 MB) vsyscalls
ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole
Architecture defines a 64-bit virtual address. Implementations can support
less. Currently supported are 48- and 57-bit virtual addresses. Bits 63
through to the most-significant implemented bit are set to either all ones
or all zero. This causes hole between user space and kernel addresses.
The direct mapping covers all memory in the system up to the highest The direct mapping covers all memory in the system up to the highest
memory address (this means in some cases it can also include PCI memory memory address (this means in some cases it can also include PCI memory
holes). holes).
vmalloc space is lazily synchronized into the different PML4 pages of vmalloc space is lazily synchronized into the different PML4/PML5 pages of
the processes using the page fault handler, with init_level4_pgt as the processes using the page fault handler, with init_top_pgt as
reference. reference.
Current X86-64 implementations support up to 46 bits of address space (64 TB), Current X86-64 implementations support up to 46 bits of address space (64 TB),

View File

@ -291,6 +291,7 @@ config ARCH_SUPPORTS_DEBUG_PAGEALLOC
config KASAN_SHADOW_OFFSET config KASAN_SHADOW_OFFSET
hex hex
depends on KASAN depends on KASAN
default 0xdff8000000000000 if X86_5LEVEL
default 0xdffffc0000000000 default 0xdffffc0000000000
config HAVE_INTEL_TXT config HAVE_INTEL_TXT

View File

@ -11,9 +11,12 @@
* 'kernel address space start' >> KASAN_SHADOW_SCALE_SHIFT * 'kernel address space start' >> KASAN_SHADOW_SCALE_SHIFT
*/ */
#define KASAN_SHADOW_START (KASAN_SHADOW_OFFSET + \ #define KASAN_SHADOW_START (KASAN_SHADOW_OFFSET + \
(0xffff800000000000ULL >> 3)) ((-1UL << __VIRTUAL_MASK_SHIFT) >> 3))
/* 47 bits for kernel address -> (47 - 3) bits for shadow */ /*
#define KASAN_SHADOW_END (KASAN_SHADOW_START + (1ULL << (47 - 3))) * 47 bits for kernel address -> (47 - 3) bits for shadow
* 56 bits for kernel address -> (56 - 3) bits for shadow
*/
#define KASAN_SHADOW_END (KASAN_SHADOW_START + (1ULL << (__VIRTUAL_MASK_SHIFT - 3)))
#ifndef __ASSEMBLY__ #ifndef __ASSEMBLY__

View File

@ -36,7 +36,12 @@
* hypervisor to fit. Choosing 16 slots here is arbitrary, but it's * hypervisor to fit. Choosing 16 slots here is arbitrary, but it's
* what Xen requires. * what Xen requires.
*/ */
#ifdef CONFIG_X86_5LEVEL
#define __PAGE_OFFSET_BASE _AC(0xff10000000000000, UL)
#else
#define __PAGE_OFFSET_BASE _AC(0xffff880000000000, UL) #define __PAGE_OFFSET_BASE _AC(0xffff880000000000, UL)
#endif
#ifdef CONFIG_RANDOMIZE_MEMORY #ifdef CONFIG_RANDOMIZE_MEMORY
#define __PAGE_OFFSET page_offset_base #define __PAGE_OFFSET page_offset_base
#else #else
@ -46,8 +51,13 @@
#define __START_KERNEL_map _AC(0xffffffff80000000, UL) #define __START_KERNEL_map _AC(0xffffffff80000000, UL)
/* See Documentation/x86/x86_64/mm.txt for a description of the memory map. */ /* See Documentation/x86/x86_64/mm.txt for a description of the memory map. */
#ifdef CONFIG_X86_5LEVEL
#define __PHYSICAL_MASK_SHIFT 52
#define __VIRTUAL_MASK_SHIFT 56
#else
#define __PHYSICAL_MASK_SHIFT 46 #define __PHYSICAL_MASK_SHIFT 46
#define __VIRTUAL_MASK_SHIFT 47 #define __VIRTUAL_MASK_SHIFT 47
#endif
/* /*
* Kernel image size is limited to 1GiB due to the fixmap living in the * Kernel image size is limited to 1GiB due to the fixmap living in the

View File

@ -56,9 +56,15 @@ typedef struct { pteval_t pte; } pte_t;
/* See Documentation/x86/x86_64/mm.txt for a description of the memory map. */ /* See Documentation/x86/x86_64/mm.txt for a description of the memory map. */
#define MAXMEM _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL) #define MAXMEM _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL)
#ifdef CONFIG_X86_5LEVEL
#define VMALLOC_SIZE_TB _AC(16384, UL)
#define __VMALLOC_BASE _AC(0xff92000000000000, UL)
#define __VMEMMAP_BASE _AC(0xffd4000000000000, UL)
#else
#define VMALLOC_SIZE_TB _AC(32, UL) #define VMALLOC_SIZE_TB _AC(32, UL)
#define __VMALLOC_BASE _AC(0xffffc90000000000, UL) #define __VMALLOC_BASE _AC(0xffffc90000000000, UL)
#define __VMEMMAP_BASE _AC(0xffffea0000000000, UL) #define __VMEMMAP_BASE _AC(0xffffea0000000000, UL)
#endif
#ifdef CONFIG_RANDOMIZE_MEMORY #ifdef CONFIG_RANDOMIZE_MEMORY
#define VMALLOC_START vmalloc_base #define VMALLOC_START vmalloc_base
#define VMEMMAP_START vmemmap_base #define VMEMMAP_START vmemmap_base

View File

@ -26,8 +26,13 @@
# endif # endif
#else /* CONFIG_X86_32 */ #else /* CONFIG_X86_32 */
# define SECTION_SIZE_BITS 27 /* matt - 128 is convenient right now */ # define SECTION_SIZE_BITS 27 /* matt - 128 is convenient right now */
# define MAX_PHYSADDR_BITS 44 # ifdef CONFIG_X86_5LEVEL
# define MAX_PHYSMEM_BITS 46 # define MAX_PHYSADDR_BITS 52
# define MAX_PHYSMEM_BITS 52
# else
# define MAX_PHYSADDR_BITS 44
# define MAX_PHYSMEM_BITS 46
# endif
#endif #endif
#endif /* CONFIG_SPARSEMEM */ #endif /* CONFIG_SPARSEMEM */