Merge branch 'x86-kdump-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x865 kdump updates from Thomas Gleixner: "Yet more kexec/kdump updates: - Properly support kexec when AMD's memory encryption (SME) is enabled - Pass reserved e820 ranges to the kexec kernel so both PCI and SME can work" * 'x86-kdump-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: fs/proc/vmcore: Enable dumping of encrypted memory when SEV was active x86/kexec: Set the C-bit in the identity map page table when SEV is active x86/kexec: Do not map kexec area as decrypted when SEV is active x86/crash: Add e820 reserved ranges to kdump kernel's e820 table x86/mm: Rework ioremap resource mapping determination x86/e820, ioport: Add a new I/O resource descriptor IORES_DESC_RESERVED x86/mm: Create a workarea in the kernel for SME early encryption x86/mm: Identify the end of the kernel area to be reserved
This commit is contained in:
commit
565eb5f8c5
|
@ -13,4 +13,6 @@ extern char __end_rodata_aligned[];
|
|||
extern char __end_rodata_hpage_align[];
|
||||
#endif
|
||||
|
||||
extern char __end_of_kernel_reserve[];
|
||||
|
||||
#endif /* _ASM_X86_SECTIONS_H */
|
||||
|
|
|
@ -375,6 +375,12 @@ int crash_setup_memmap_entries(struct kimage *image, struct boot_params *params)
|
|||
walk_iomem_res_desc(IORES_DESC_ACPI_NV_STORAGE, flags, 0, -1, &cmd,
|
||||
memmap_entry_callback);
|
||||
|
||||
/* Add e820 reserved ranges */
|
||||
cmd.type = E820_TYPE_RESERVED;
|
||||
flags = IORESOURCE_MEM;
|
||||
walk_iomem_res_desc(IORES_DESC_RESERVED, flags, 0, -1, &cmd,
|
||||
memmap_entry_callback);
|
||||
|
||||
/* Add crashk_low_res region */
|
||||
if (crashk_low_res.end) {
|
||||
ei.addr = crashk_low_res.start;
|
||||
|
|
|
@ -1063,10 +1063,10 @@ static unsigned long __init e820_type_to_iores_desc(struct e820_entry *entry)
|
|||
case E820_TYPE_NVS: return IORES_DESC_ACPI_NV_STORAGE;
|
||||
case E820_TYPE_PMEM: return IORES_DESC_PERSISTENT_MEMORY;
|
||||
case E820_TYPE_PRAM: return IORES_DESC_PERSISTENT_MEMORY_LEGACY;
|
||||
case E820_TYPE_RESERVED: return IORES_DESC_RESERVED;
|
||||
case E820_TYPE_RESERVED_KERN: /* Fall-through: */
|
||||
case E820_TYPE_RAM: /* Fall-through: */
|
||||
case E820_TYPE_UNUSABLE: /* Fall-through: */
|
||||
case E820_TYPE_RESERVED: /* Fall-through: */
|
||||
default: return IORES_DESC_NONE;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -123,12 +123,13 @@ static void free_transition_pgtable(struct kimage *image)
|
|||
|
||||
static int init_transition_pgtable(struct kimage *image, pgd_t *pgd)
|
||||
{
|
||||
pgprot_t prot = PAGE_KERNEL_EXEC_NOENC;
|
||||
unsigned long vaddr, paddr;
|
||||
int result = -ENOMEM;
|
||||
p4d_t *p4d;
|
||||
pud_t *pud;
|
||||
pmd_t *pmd;
|
||||
pte_t *pte;
|
||||
unsigned long vaddr, paddr;
|
||||
int result = -ENOMEM;
|
||||
|
||||
vaddr = (unsigned long)relocate_kernel;
|
||||
paddr = __pa(page_address(image->control_code_page)+PAGE_SIZE);
|
||||
|
@ -165,7 +166,11 @@ static int init_transition_pgtable(struct kimage *image, pgd_t *pgd)
|
|||
set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE));
|
||||
}
|
||||
pte = pte_offset_kernel(pmd, vaddr);
|
||||
set_pte(pte, pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL_EXEC_NOENC));
|
||||
|
||||
if (sev_active())
|
||||
prot = PAGE_KERNEL_EXEC;
|
||||
|
||||
set_pte(pte, pfn_pte(paddr >> PAGE_SHIFT, prot));
|
||||
return 0;
|
||||
err:
|
||||
return result;
|
||||
|
@ -202,6 +207,11 @@ static int init_pgtable(struct kimage *image, unsigned long start_pgtable)
|
|||
level4p = (pgd_t *)__va(start_pgtable);
|
||||
clear_page(level4p);
|
||||
|
||||
if (sev_active()) {
|
||||
info.page_flag |= _PAGE_ENC;
|
||||
info.kernpg_flag |= _PAGE_ENC;
|
||||
}
|
||||
|
||||
if (direct_gbpages)
|
||||
info.direct_gbpages = true;
|
||||
|
||||
|
@ -644,8 +654,20 @@ void arch_kexec_unprotect_crashkres(void)
|
|||
kexec_mark_crashkres(false);
|
||||
}
|
||||
|
||||
/*
|
||||
* During a traditional boot under SME, SME will encrypt the kernel,
|
||||
* so the SME kexec kernel also needs to be un-encrypted in order to
|
||||
* replicate a normal SME boot.
|
||||
*
|
||||
* During a traditional boot under SEV, the kernel has already been
|
||||
* loaded encrypted, so the SEV kexec kernel needs to be encrypted in
|
||||
* order to replicate a normal SEV boot.
|
||||
*/
|
||||
int arch_kexec_post_alloc_pages(void *vaddr, unsigned int pages, gfp_t gfp)
|
||||
{
|
||||
if (sev_active())
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* If SME is active we need to be sure that kexec pages are
|
||||
* not encrypted because when we boot to the new kernel the
|
||||
|
@ -656,6 +678,9 @@ int arch_kexec_post_alloc_pages(void *vaddr, unsigned int pages, gfp_t gfp)
|
|||
|
||||
void arch_kexec_pre_free_pages(void *vaddr, unsigned int pages)
|
||||
{
|
||||
if (sev_active())
|
||||
return;
|
||||
|
||||
/*
|
||||
* If SME is active we need to reset the pages back to being
|
||||
* an encrypted mapping before freeing them.
|
||||
|
|
|
@ -836,8 +836,14 @@ dump_kernel_offset(struct notifier_block *self, unsigned long v, void *p)
|
|||
|
||||
void __init setup_arch(char **cmdline_p)
|
||||
{
|
||||
/*
|
||||
* Reserve the memory occupied by the kernel between _text and
|
||||
* __end_of_kernel_reserve symbols. Any kernel sections after the
|
||||
* __end_of_kernel_reserve symbol must be explicitly reserved with a
|
||||
* separate memblock_reserve() or they will be discarded.
|
||||
*/
|
||||
memblock_reserve(__pa_symbol(_text),
|
||||
(unsigned long)__bss_stop - (unsigned long)_text);
|
||||
(unsigned long)__end_of_kernel_reserve - (unsigned long)_text);
|
||||
|
||||
/*
|
||||
* Make sure page 0 is always reserved because on systems with
|
||||
|
|
|
@ -368,6 +368,14 @@ SECTIONS
|
|||
__bss_stop = .;
|
||||
}
|
||||
|
||||
/*
|
||||
* The memory occupied from _text to here, __end_of_kernel_reserve, is
|
||||
* automatically reserved in setup_arch(). Anything after here must be
|
||||
* explicitly reserved using memblock_reserve() or it will be discarded
|
||||
* and treated as available memory.
|
||||
*/
|
||||
__end_of_kernel_reserve = .;
|
||||
|
||||
. = ALIGN(PAGE_SIZE);
|
||||
.brk : AT(ADDR(.brk) - LOAD_OFFSET) {
|
||||
__brk_base = .;
|
||||
|
@ -379,10 +387,34 @@ SECTIONS
|
|||
. = ALIGN(PAGE_SIZE); /* keep VO_INIT_SIZE page aligned */
|
||||
_end = .;
|
||||
|
||||
#ifdef CONFIG_AMD_MEM_ENCRYPT
|
||||
/*
|
||||
* Early scratch/workarea section: Lives outside of the kernel proper
|
||||
* (_text - _end).
|
||||
*
|
||||
* Resides after _end because even though the .brk section is after
|
||||
* __end_of_kernel_reserve, the .brk section is later reserved as a
|
||||
* part of the kernel. Since it is located after __end_of_kernel_reserve
|
||||
* it will be discarded and become part of the available memory. As
|
||||
* such, it can only be used by very early boot code and must not be
|
||||
* needed afterwards.
|
||||
*
|
||||
* Currently used by SME for performing in-place encryption of the
|
||||
* kernel during boot. Resides on a 2MB boundary to simplify the
|
||||
* pagetable setup used for SME in-place encryption.
|
||||
*/
|
||||
. = ALIGN(HPAGE_SIZE);
|
||||
.init.scratch : AT(ADDR(.init.scratch) - LOAD_OFFSET) {
|
||||
__init_scratch_begin = .;
|
||||
*(.init.scratch)
|
||||
. = ALIGN(HPAGE_SIZE);
|
||||
__init_scratch_end = .;
|
||||
}
|
||||
#endif
|
||||
|
||||
STABS_DEBUG
|
||||
DWARF_DEBUG
|
||||
|
||||
/* Sections to be discarded */
|
||||
DISCARDS
|
||||
/DISCARD/ : {
|
||||
*(.eh_frame)
|
||||
|
|
|
@ -28,9 +28,11 @@
|
|||
|
||||
#include "physaddr.h"
|
||||
|
||||
struct ioremap_mem_flags {
|
||||
bool system_ram;
|
||||
bool desc_other;
|
||||
/*
|
||||
* Descriptor controlling ioremap() behavior.
|
||||
*/
|
||||
struct ioremap_desc {
|
||||
unsigned int flags;
|
||||
};
|
||||
|
||||
/*
|
||||
|
@ -62,13 +64,14 @@ int ioremap_change_attr(unsigned long vaddr, unsigned long size,
|
|||
return err;
|
||||
}
|
||||
|
||||
static bool __ioremap_check_ram(struct resource *res)
|
||||
/* Does the range (or a subset of) contain normal RAM? */
|
||||
static unsigned int __ioremap_check_ram(struct resource *res)
|
||||
{
|
||||
unsigned long start_pfn, stop_pfn;
|
||||
unsigned long i;
|
||||
|
||||
if ((res->flags & IORESOURCE_SYSTEM_RAM) != IORESOURCE_SYSTEM_RAM)
|
||||
return false;
|
||||
return 0;
|
||||
|
||||
start_pfn = (res->start + PAGE_SIZE - 1) >> PAGE_SHIFT;
|
||||
stop_pfn = (res->end + 1) >> PAGE_SHIFT;
|
||||
|
@ -76,28 +79,44 @@ static bool __ioremap_check_ram(struct resource *res)
|
|||
for (i = 0; i < (stop_pfn - start_pfn); ++i)
|
||||
if (pfn_valid(start_pfn + i) &&
|
||||
!PageReserved(pfn_to_page(start_pfn + i)))
|
||||
return true;
|
||||
return IORES_MAP_SYSTEM_RAM;
|
||||
}
|
||||
|
||||
return false;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __ioremap_check_desc_other(struct resource *res)
|
||||
/*
|
||||
* In a SEV guest, NONE and RESERVED should not be mapped encrypted because
|
||||
* there the whole memory is already encrypted.
|
||||
*/
|
||||
static unsigned int __ioremap_check_encrypted(struct resource *res)
|
||||
{
|
||||
return (res->desc != IORES_DESC_NONE);
|
||||
if (!sev_active())
|
||||
return 0;
|
||||
|
||||
switch (res->desc) {
|
||||
case IORES_DESC_NONE:
|
||||
case IORES_DESC_RESERVED:
|
||||
break;
|
||||
default:
|
||||
return IORES_MAP_ENCRYPTED;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __ioremap_res_check(struct resource *res, void *arg)
|
||||
static int __ioremap_collect_map_flags(struct resource *res, void *arg)
|
||||
{
|
||||
struct ioremap_mem_flags *flags = arg;
|
||||
struct ioremap_desc *desc = arg;
|
||||
|
||||
if (!flags->system_ram)
|
||||
flags->system_ram = __ioremap_check_ram(res);
|
||||
if (!(desc->flags & IORES_MAP_SYSTEM_RAM))
|
||||
desc->flags |= __ioremap_check_ram(res);
|
||||
|
||||
if (!flags->desc_other)
|
||||
flags->desc_other = __ioremap_check_desc_other(res);
|
||||
if (!(desc->flags & IORES_MAP_ENCRYPTED))
|
||||
desc->flags |= __ioremap_check_encrypted(res);
|
||||
|
||||
return flags->system_ram && flags->desc_other;
|
||||
return ((desc->flags & (IORES_MAP_SYSTEM_RAM | IORES_MAP_ENCRYPTED)) ==
|
||||
(IORES_MAP_SYSTEM_RAM | IORES_MAP_ENCRYPTED));
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -106,15 +125,15 @@ static int __ioremap_res_check(struct resource *res, void *arg)
|
|||
* resource described not as IORES_DESC_NONE (e.g. IORES_DESC_ACPI_TABLES).
|
||||
*/
|
||||
static void __ioremap_check_mem(resource_size_t addr, unsigned long size,
|
||||
struct ioremap_mem_flags *flags)
|
||||
struct ioremap_desc *desc)
|
||||
{
|
||||
u64 start, end;
|
||||
|
||||
start = (u64)addr;
|
||||
end = start + size - 1;
|
||||
memset(flags, 0, sizeof(*flags));
|
||||
memset(desc, 0, sizeof(struct ioremap_desc));
|
||||
|
||||
walk_mem_res(start, end, flags, __ioremap_res_check);
|
||||
walk_mem_res(start, end, desc, __ioremap_collect_map_flags);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -131,15 +150,15 @@ static void __ioremap_check_mem(resource_size_t addr, unsigned long size,
|
|||
* have to convert them into an offset in a page-aligned mapping, but the
|
||||
* caller shouldn't need to know that small detail.
|
||||
*/
|
||||
static void __iomem *__ioremap_caller(resource_size_t phys_addr,
|
||||
unsigned long size, enum page_cache_mode pcm,
|
||||
void *caller, bool encrypted)
|
||||
static void __iomem *
|
||||
__ioremap_caller(resource_size_t phys_addr, unsigned long size,
|
||||
enum page_cache_mode pcm, void *caller, bool encrypted)
|
||||
{
|
||||
unsigned long offset, vaddr;
|
||||
resource_size_t last_addr;
|
||||
const resource_size_t unaligned_phys_addr = phys_addr;
|
||||
const unsigned long unaligned_size = size;
|
||||
struct ioremap_mem_flags mem_flags;
|
||||
struct ioremap_desc io_desc;
|
||||
struct vm_struct *area;
|
||||
enum page_cache_mode new_pcm;
|
||||
pgprot_t prot;
|
||||
|
@ -158,12 +177,12 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr,
|
|||
return NULL;
|
||||
}
|
||||
|
||||
__ioremap_check_mem(phys_addr, size, &mem_flags);
|
||||
__ioremap_check_mem(phys_addr, size, &io_desc);
|
||||
|
||||
/*
|
||||
* Don't allow anybody to remap normal RAM that we're using..
|
||||
*/
|
||||
if (mem_flags.system_ram) {
|
||||
if (io_desc.flags & IORES_MAP_SYSTEM_RAM) {
|
||||
WARN_ONCE(1, "ioremap on RAM at %pa - %pa\n",
|
||||
&phys_addr, &last_addr);
|
||||
return NULL;
|
||||
|
@ -201,7 +220,7 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr,
|
|||
* resulting mapping.
|
||||
*/
|
||||
prot = PAGE_KERNEL_IO;
|
||||
if ((sev_active() && mem_flags.desc_other) || encrypted)
|
||||
if ((io_desc.flags & IORES_MAP_ENCRYPTED) || encrypted)
|
||||
prot = pgprot_encrypted(prot);
|
||||
|
||||
switch (pcm) {
|
||||
|
|
|
@ -70,6 +70,19 @@ struct sme_populate_pgd_data {
|
|||
unsigned long vaddr_end;
|
||||
};
|
||||
|
||||
/*
|
||||
* This work area lives in the .init.scratch section, which lives outside of
|
||||
* the kernel proper. It is sized to hold the intermediate copy buffer and
|
||||
* more than enough pagetable pages.
|
||||
*
|
||||
* By using this section, the kernel can be encrypted in place and it
|
||||
* avoids any possibility of boot parameters or initramfs images being
|
||||
* placed such that the in-place encryption logic overwrites them. This
|
||||
* section is 2MB aligned to allow for simple pagetable setup using only
|
||||
* PMD entries (see vmlinux.lds.S).
|
||||
*/
|
||||
static char sme_workarea[2 * PMD_PAGE_SIZE] __section(.init.scratch);
|
||||
|
||||
static char sme_cmdline_arg[] __initdata = "mem_encrypt";
|
||||
static char sme_cmdline_on[] __initdata = "on";
|
||||
static char sme_cmdline_off[] __initdata = "off";
|
||||
|
@ -311,8 +324,13 @@ void __init sme_encrypt_kernel(struct boot_params *bp)
|
|||
}
|
||||
#endif
|
||||
|
||||
/* Set the encryption workarea to be immediately after the kernel */
|
||||
workarea_start = kernel_end;
|
||||
/*
|
||||
* We're running identity mapped, so we must obtain the address to the
|
||||
* SME encryption workarea using rip-relative addressing.
|
||||
*/
|
||||
asm ("lea sme_workarea(%%rip), %0"
|
||||
: "=r" (workarea_start)
|
||||
: "p" (sme_workarea));
|
||||
|
||||
/*
|
||||
* Calculate required number of workarea bytes needed:
|
||||
|
|
|
@ -166,7 +166,7 @@ void __weak elfcorehdr_free(unsigned long long addr)
|
|||
*/
|
||||
ssize_t __weak elfcorehdr_read(char *buf, size_t count, u64 *ppos)
|
||||
{
|
||||
return read_from_oldmem(buf, count, ppos, 0, false);
|
||||
return read_from_oldmem(buf, count, ppos, 0, sev_active());
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -174,7 +174,7 @@ ssize_t __weak elfcorehdr_read(char *buf, size_t count, u64 *ppos)
|
|||
*/
|
||||
ssize_t __weak elfcorehdr_read_notes(char *buf, size_t count, u64 *ppos)
|
||||
{
|
||||
return read_from_oldmem(buf, count, ppos, 0, sme_active());
|
||||
return read_from_oldmem(buf, count, ppos, 0, mem_encrypt_active());
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -374,7 +374,7 @@ static ssize_t __read_vmcore(char *buffer, size_t buflen, loff_t *fpos,
|
|||
buflen);
|
||||
start = m->paddr + *fpos - m->offset;
|
||||
tmp = read_from_oldmem(buffer, tsz, &start,
|
||||
userbuf, sme_active());
|
||||
userbuf, mem_encrypt_active());
|
||||
if (tmp < 0)
|
||||
return tmp;
|
||||
buflen -= tsz;
|
||||
|
|
|
@ -12,6 +12,7 @@
|
|||
#ifndef __ASSEMBLY__
|
||||
#include <linux/compiler.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/bits.h>
|
||||
/*
|
||||
* Resources are tree-like, allowing
|
||||
* nesting etc..
|
||||
|
@ -133,6 +134,15 @@ enum {
|
|||
IORES_DESC_PERSISTENT_MEMORY_LEGACY = 5,
|
||||
IORES_DESC_DEVICE_PRIVATE_MEMORY = 6,
|
||||
IORES_DESC_DEVICE_PUBLIC_MEMORY = 7,
|
||||
IORES_DESC_RESERVED = 8,
|
||||
};
|
||||
|
||||
/*
|
||||
* Flags controlling ioremap() behavior.
|
||||
*/
|
||||
enum {
|
||||
IORES_MAP_SYSTEM_RAM = BIT(0),
|
||||
IORES_MAP_ENCRYPTED = BIT(1),
|
||||
};
|
||||
|
||||
/* helpers to define resources */
|
||||
|
|
Loading…
Reference in New Issue