- Add support for unaccepted memory as specified in the UEFI spec v2.9.
The gist of it all is that Intel TDX and AMD SEV-SNP confidential computing guests define the notion of accepting memory before using it and thus preventing a whole set of attacks against such guests like memory replay and the like. There are a couple of strategies of how memory should be accepted - the current implementation does an on-demand way of accepting. -----BEGIN PGP SIGNATURE----- iQIzBAABCgAdFiEEzv7L6UO9uDPlPSfHEsHwGGHeVUoFAmSZ0f4ACgkQEsHwGGHe VUpasw//RKoNW9HSU1csY+XnG9uuaT6QKgji+gIEZWWIGPO9iibvbBj6P5WxJE8T fe7yb6CGa6d6thoU0v+mQGVVvCd7OjCFwPD5wAo4mXToD7Ig+4mI6jMkaKifqa2f N1Uuy8u/zQnGyWrP5Y//WH5bJYfsmds4UGwXI2nLvKlhE7MG90/ePjt7iqnnwZsy waLp6a0Q1VeOvnfRszFLHZw/SoER5RSJ4qeVqttkFNmPPEKMK1Kirrl2poR56OQJ nMr6LqVtD7erlSJ36VRXOKzLI443A4iIEIg/wBjIOU6L5ZEWJGNqtCDnIqFJ6+TM XatsejfRYkkMZH0qXtX9+M0u+HJHbZPCH5rEcA21P3Nbd7od/ANq91qCGoMjtUZ4 7pZohMG8M6IDvkLiOb8fQVkR5k/9Jbk8UvdN/8jdPx1ERxYMFO3BDvJpV2gzrW4B KYtFTPR7j2nY3eKfDpe3flanqYzKUBsKoTlLnlH7UHaiMZ2idwG8AQjlrhC/erCq /Lq1LXt4Mq46FyHABc+PSHytu0WWj1nBUftRt+lviY/Uv7TlkBldOTT7wm7itsfF HUCTfLWl0CJXKPq8rbbZhAG/exN6Ay6MO3E3OcNq8A72E5y4cXenuG3ic/0tUuOu FfjpiMk35qE2Qb4hnj1YtF3XINtd1MpKcuwzGSzEdv9s3J7hrS0= =FS95 -----END PGP SIGNATURE----- Merge tag 'x86_cc_for_v6.5' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip Pull x86 confidential computing update from Borislav Petkov: - Add support for unaccepted memory as specified in the UEFI spec v2.9. The gist of it all is that Intel TDX and AMD SEV-SNP confidential computing guests define the notion of accepting memory before using it and thus preventing a whole set of attacks against such guests like memory replay and the like. There are a couple of strategies of how memory should be accepted - the current implementation does an on-demand way of accepting. * tag 'x86_cc_for_v6.5' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: virt: sevguest: Add CONFIG_CRYPTO dependency x86/efi: Safely enable unaccepted memory in UEFI x86/sev: Add SNP-specific unaccepted memory support x86/sev: Use large PSC requests if applicable x86/sev: Allow for use of the early boot GHCB for PSC requests x86/sev: Put PSC struct on the stack in prep for unaccepted memory support x86/sev: Fix calculation of end address based on number of pages x86/tdx: Add unaccepted memory support x86/tdx: Refactor try_accept_one() x86/tdx: Make _tdx_hypercall() and __tdx_module_call() available in boot stub efi/unaccepted: Avoid load_unaligned_zeropad() stepping into unaccepted memory efi: Add unaccepted memory support x86/boot/compressed: Handle unaccepted memory efi/libstub: Implement support for unaccepted memory efi/x86: Get full memory map in allocate_e820() mm: Add support for unaccepted memory
This commit is contained in:
commit
2c96136a3f
|
@ -887,9 +887,11 @@ config INTEL_TDX_GUEST
|
|||
bool "Intel TDX (Trust Domain Extensions) - Guest Support"
|
||||
depends on X86_64 && CPU_SUP_INTEL
|
||||
depends on X86_X2APIC
|
||||
depends on EFI_STUB
|
||||
select ARCH_HAS_CC_PLATFORM
|
||||
select X86_MEM_ENCRYPT
|
||||
select X86_MCE
|
||||
select UNACCEPTED_MEMORY
|
||||
help
|
||||
Support running as a guest under Intel TDX. Without this support,
|
||||
the guest kernel can not boot or run under TDX.
|
||||
|
@ -1544,11 +1546,13 @@ config X86_MEM_ENCRYPT
|
|||
config AMD_MEM_ENCRYPT
|
||||
bool "AMD Secure Memory Encryption (SME) support"
|
||||
depends on X86_64 && CPU_SUP_AMD
|
||||
depends on EFI_STUB
|
||||
select DMA_COHERENT_POOL
|
||||
select ARCH_USE_MEMREMAP_PROT
|
||||
select INSTRUCTION_DECODER
|
||||
select ARCH_HAS_CC_PLATFORM
|
||||
select X86_MEM_ENCRYPT
|
||||
select UNACCEPTED_MEMORY
|
||||
help
|
||||
Say yes to enable support for the encryption of system memory.
|
||||
This requires an AMD processor that supports Secure Memory
|
||||
|
|
|
@ -106,7 +106,8 @@ ifdef CONFIG_X86_64
|
|||
endif
|
||||
|
||||
vmlinux-objs-$(CONFIG_ACPI) += $(obj)/acpi.o
|
||||
vmlinux-objs-$(CONFIG_INTEL_TDX_GUEST) += $(obj)/tdx.o $(obj)/tdcall.o
|
||||
vmlinux-objs-$(CONFIG_INTEL_TDX_GUEST) += $(obj)/tdx.o $(obj)/tdcall.o $(obj)/tdx-shared.o
|
||||
vmlinux-objs-$(CONFIG_UNACCEPTED_MEMORY) += $(obj)/mem.o
|
||||
|
||||
vmlinux-objs-$(CONFIG_EFI) += $(obj)/efi.o
|
||||
vmlinux-objs-$(CONFIG_EFI_MIXED) += $(obj)/efi_mixed.o
|
||||
|
|
|
@ -16,6 +16,7 @@ typedef guid_t efi_guid_t __aligned(__alignof__(u32));
|
|||
#define ACPI_TABLE_GUID EFI_GUID(0xeb9d2d30, 0x2d88, 0x11d3, 0x9a, 0x16, 0x00, 0x90, 0x27, 0x3f, 0xc1, 0x4d)
|
||||
#define ACPI_20_TABLE_GUID EFI_GUID(0x8868e871, 0xe4f1, 0x11d3, 0xbc, 0x22, 0x00, 0x80, 0xc7, 0x3c, 0x88, 0x81)
|
||||
#define EFI_CC_BLOB_GUID EFI_GUID(0x067b1f5f, 0xcf26, 0x44c5, 0x85, 0x54, 0x93, 0xd7, 0x77, 0x91, 0x2d, 0x42)
|
||||
#define LINUX_EFI_UNACCEPTED_MEM_TABLE_GUID EFI_GUID(0xd5d1de3c, 0x105c, 0x44f9, 0x9e, 0xa9, 0xbc, 0xef, 0x98, 0x12, 0x00, 0x31)
|
||||
|
||||
#define EFI32_LOADER_SIGNATURE "EL32"
|
||||
#define EFI64_LOADER_SIGNATURE "EL64"
|
||||
|
@ -32,6 +33,7 @@ typedef struct {
|
|||
} efi_table_hdr_t;
|
||||
|
||||
#define EFI_CONVENTIONAL_MEMORY 7
|
||||
#define EFI_UNACCEPTED_MEMORY 15
|
||||
|
||||
#define EFI_MEMORY_MORE_RELIABLE \
|
||||
((u64)0x0000000000010000ULL) /* higher reliability */
|
||||
|
@ -104,6 +106,14 @@ struct efi_setup_data {
|
|||
u64 reserved[8];
|
||||
};
|
||||
|
||||
struct efi_unaccepted_memory {
|
||||
u32 version;
|
||||
u32 unit_size;
|
||||
u64 phys_base;
|
||||
u64 size;
|
||||
unsigned long bitmap[];
|
||||
};
|
||||
|
||||
static inline int efi_guidcmp (efi_guid_t left, efi_guid_t right)
|
||||
{
|
||||
return memcmp(&left, &right, sizeof (efi_guid_t));
|
||||
|
|
|
@ -22,3 +22,22 @@ void error(char *m)
|
|||
while (1)
|
||||
asm("hlt");
|
||||
}
|
||||
|
||||
/* EFI libstub provides vsnprintf() */
|
||||
#ifdef CONFIG_EFI_STUB
|
||||
void panic(const char *fmt, ...)
|
||||
{
|
||||
static char buf[1024];
|
||||
va_list args;
|
||||
int len;
|
||||
|
||||
va_start(args, fmt);
|
||||
len = vsnprintf(buf, sizeof(buf), fmt, args);
|
||||
va_end(args);
|
||||
|
||||
if (len && buf[len - 1] == '\n')
|
||||
buf[len - 1] = '\0';
|
||||
|
||||
error(buf);
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -6,5 +6,6 @@
|
|||
|
||||
void warn(char *m);
|
||||
void error(char *m) __noreturn;
|
||||
void panic(const char *fmt, ...) __noreturn __cold;
|
||||
|
||||
#endif /* BOOT_COMPRESSED_ERROR_H */
|
||||
|
|
|
@ -672,6 +672,33 @@ static bool process_mem_region(struct mem_vector *region,
|
|||
}
|
||||
|
||||
#ifdef CONFIG_EFI
|
||||
|
||||
/*
|
||||
* Only EFI_CONVENTIONAL_MEMORY and EFI_UNACCEPTED_MEMORY (if supported) are
|
||||
* guaranteed to be free.
|
||||
*
|
||||
* Pick free memory more conservatively than the EFI spec allows: according to
|
||||
* the spec, EFI_BOOT_SERVICES_{CODE|DATA} are also free memory and thus
|
||||
* available to place the kernel image into, but in practice there's firmware
|
||||
* where using that memory leads to crashes. Buggy vendor EFI code registers
|
||||
* for an event that triggers on SetVirtualAddressMap(). The handler assumes
|
||||
* that EFI_BOOT_SERVICES_DATA memory has not been touched by loader yet, which
|
||||
* is probably true for Windows.
|
||||
*
|
||||
* Preserve EFI_BOOT_SERVICES_* regions until after SetVirtualAddressMap().
|
||||
*/
|
||||
static inline bool memory_type_is_free(efi_memory_desc_t *md)
|
||||
{
|
||||
if (md->type == EFI_CONVENTIONAL_MEMORY)
|
||||
return true;
|
||||
|
||||
if (IS_ENABLED(CONFIG_UNACCEPTED_MEMORY) &&
|
||||
md->type == EFI_UNACCEPTED_MEMORY)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns true if we processed the EFI memmap, which we prefer over the E820
|
||||
* table if it is available.
|
||||
|
@ -716,18 +743,7 @@ process_efi_entries(unsigned long minimum, unsigned long image_size)
|
|||
for (i = 0; i < nr_desc; i++) {
|
||||
md = efi_early_memdesc_ptr(pmap, e->efi_memdesc_size, i);
|
||||
|
||||
/*
|
||||
* Here we are more conservative in picking free memory than
|
||||
* the EFI spec allows:
|
||||
*
|
||||
* According to the spec, EFI_BOOT_SERVICES_{CODE|DATA} are also
|
||||
* free memory and thus available to place the kernel image into,
|
||||
* but in practice there's firmware where using that memory leads
|
||||
* to crashes.
|
||||
*
|
||||
* Only EFI_CONVENTIONAL_MEMORY is guaranteed to be free.
|
||||
*/
|
||||
if (md->type != EFI_CONVENTIONAL_MEMORY)
|
||||
if (!memory_type_is_free(md))
|
||||
continue;
|
||||
|
||||
if (efi_soft_reserve_enabled() &&
|
||||
|
|
|
@ -0,0 +1,86 @@
|
|||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
|
||||
#include "error.h"
|
||||
#include "misc.h"
|
||||
#include "tdx.h"
|
||||
#include "sev.h"
|
||||
#include <asm/shared/tdx.h>
|
||||
|
||||
/*
|
||||
* accept_memory() and process_unaccepted_memory() called from EFI stub which
|
||||
* runs before decompresser and its early_tdx_detect().
|
||||
*
|
||||
* Enumerate TDX directly from the early users.
|
||||
*/
|
||||
static bool early_is_tdx_guest(void)
|
||||
{
|
||||
static bool once;
|
||||
static bool is_tdx;
|
||||
|
||||
if (!IS_ENABLED(CONFIG_INTEL_TDX_GUEST))
|
||||
return false;
|
||||
|
||||
if (!once) {
|
||||
u32 eax, sig[3];
|
||||
|
||||
cpuid_count(TDX_CPUID_LEAF_ID, 0, &eax,
|
||||
&sig[0], &sig[2], &sig[1]);
|
||||
is_tdx = !memcmp(TDX_IDENT, sig, sizeof(sig));
|
||||
once = true;
|
||||
}
|
||||
|
||||
return is_tdx;
|
||||
}
|
||||
|
||||
void arch_accept_memory(phys_addr_t start, phys_addr_t end)
|
||||
{
|
||||
/* Platform-specific memory-acceptance call goes here */
|
||||
if (early_is_tdx_guest()) {
|
||||
if (!tdx_accept_memory(start, end))
|
||||
panic("TDX: Failed to accept memory\n");
|
||||
} else if (sev_snp_enabled()) {
|
||||
snp_accept_memory(start, end);
|
||||
} else {
|
||||
error("Cannot accept memory: unknown platform\n");
|
||||
}
|
||||
}
|
||||
|
||||
bool init_unaccepted_memory(void)
|
||||
{
|
||||
guid_t guid = LINUX_EFI_UNACCEPTED_MEM_TABLE_GUID;
|
||||
struct efi_unaccepted_memory *table;
|
||||
unsigned long cfg_table_pa;
|
||||
unsigned int cfg_table_len;
|
||||
enum efi_type et;
|
||||
int ret;
|
||||
|
||||
et = efi_get_type(boot_params);
|
||||
if (et == EFI_TYPE_NONE)
|
||||
return false;
|
||||
|
||||
ret = efi_get_conf_table(boot_params, &cfg_table_pa, &cfg_table_len);
|
||||
if (ret) {
|
||||
warn("EFI config table not found.");
|
||||
return false;
|
||||
}
|
||||
|
||||
table = (void *)efi_find_vendor_table(boot_params, cfg_table_pa,
|
||||
cfg_table_len, guid);
|
||||
if (!table)
|
||||
return false;
|
||||
|
||||
if (table->version != 1)
|
||||
error("Unknown version of unaccepted memory table\n");
|
||||
|
||||
/*
|
||||
* In many cases unaccepted_table is already set by EFI stub, but it
|
||||
* has to be initialized again to cover cases when the table is not
|
||||
* allocated by EFI stub or EFI stub copied the kernel image with
|
||||
* efi_relocate_kernel() before the variable is set.
|
||||
*
|
||||
* It must be initialized before the first usage of accept_memory().
|
||||
*/
|
||||
unaccepted_table = table;
|
||||
|
||||
return true;
|
||||
}
|
|
@ -455,6 +455,12 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
|
|||
#endif
|
||||
|
||||
debug_putstr("\nDecompressing Linux... ");
|
||||
|
||||
if (init_unaccepted_memory()) {
|
||||
debug_putstr("Accepting memory... ");
|
||||
accept_memory(__pa(output), __pa(output) + needed_size);
|
||||
}
|
||||
|
||||
__decompress(input_data, input_len, NULL, NULL, output, output_len,
|
||||
NULL, error);
|
||||
entry_offset = parse_elf(output);
|
||||
|
|
|
@ -247,4 +247,14 @@ static inline unsigned long efi_find_vendor_table(struct boot_params *bp,
|
|||
}
|
||||
#endif /* CONFIG_EFI */
|
||||
|
||||
#ifdef CONFIG_UNACCEPTED_MEMORY
|
||||
bool init_unaccepted_memory(void);
|
||||
#else
|
||||
static inline bool init_unaccepted_memory(void) { return false; }
|
||||
#endif
|
||||
|
||||
/* Defined in EFI stub */
|
||||
extern struct efi_unaccepted_memory *unaccepted_table;
|
||||
void accept_memory(phys_addr_t start, phys_addr_t end);
|
||||
|
||||
#endif /* BOOT_COMPRESSED_MISC_H */
|
||||
|
|
|
@ -115,7 +115,7 @@ static enum es_result vc_read_mem(struct es_em_ctxt *ctxt,
|
|||
/* Include code for early handlers */
|
||||
#include "../../kernel/sev-shared.c"
|
||||
|
||||
static inline bool sev_snp_enabled(void)
|
||||
bool sev_snp_enabled(void)
|
||||
{
|
||||
return sev_status & MSR_AMD64_SEV_SNP_ENABLED;
|
||||
}
|
||||
|
@ -181,6 +181,58 @@ static bool early_setup_ghcb(void)
|
|||
return true;
|
||||
}
|
||||
|
||||
static phys_addr_t __snp_accept_memory(struct snp_psc_desc *desc,
|
||||
phys_addr_t pa, phys_addr_t pa_end)
|
||||
{
|
||||
struct psc_hdr *hdr;
|
||||
struct psc_entry *e;
|
||||
unsigned int i;
|
||||
|
||||
hdr = &desc->hdr;
|
||||
memset(hdr, 0, sizeof(*hdr));
|
||||
|
||||
e = desc->entries;
|
||||
|
||||
i = 0;
|
||||
while (pa < pa_end && i < VMGEXIT_PSC_MAX_ENTRY) {
|
||||
hdr->end_entry = i;
|
||||
|
||||
e->gfn = pa >> PAGE_SHIFT;
|
||||
e->operation = SNP_PAGE_STATE_PRIVATE;
|
||||
if (IS_ALIGNED(pa, PMD_SIZE) && (pa_end - pa) >= PMD_SIZE) {
|
||||
e->pagesize = RMP_PG_SIZE_2M;
|
||||
pa += PMD_SIZE;
|
||||
} else {
|
||||
e->pagesize = RMP_PG_SIZE_4K;
|
||||
pa += PAGE_SIZE;
|
||||
}
|
||||
|
||||
e++;
|
||||
i++;
|
||||
}
|
||||
|
||||
if (vmgexit_psc(boot_ghcb, desc))
|
||||
sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PSC);
|
||||
|
||||
pvalidate_pages(desc);
|
||||
|
||||
return pa;
|
||||
}
|
||||
|
||||
void snp_accept_memory(phys_addr_t start, phys_addr_t end)
|
||||
{
|
||||
struct snp_psc_desc desc = {};
|
||||
unsigned int i;
|
||||
phys_addr_t pa;
|
||||
|
||||
if (!boot_ghcb && !early_setup_ghcb())
|
||||
sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PSC);
|
||||
|
||||
pa = start;
|
||||
while (pa < end)
|
||||
pa = __snp_accept_memory(&desc, pa, end);
|
||||
}
|
||||
|
||||
void sev_es_shutdown_ghcb(void)
|
||||
{
|
||||
if (!boot_ghcb)
|
||||
|
|
|
@ -0,0 +1,23 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/*
|
||||
* AMD SEV header for early boot related functions.
|
||||
*
|
||||
* Author: Tom Lendacky <thomas.lendacky@amd.com>
|
||||
*/
|
||||
|
||||
#ifndef BOOT_COMPRESSED_SEV_H
|
||||
#define BOOT_COMPRESSED_SEV_H
|
||||
|
||||
#ifdef CONFIG_AMD_MEM_ENCRYPT
|
||||
|
||||
bool sev_snp_enabled(void);
|
||||
void snp_accept_memory(phys_addr_t start, phys_addr_t end);
|
||||
|
||||
#else
|
||||
|
||||
static inline bool sev_snp_enabled(void) { return false; }
|
||||
static inline void snp_accept_memory(phys_addr_t start, phys_addr_t end) { }
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
|
@ -0,0 +1,2 @@
|
|||
#include "error.h"
|
||||
#include "../../coco/tdx/tdx-shared.c"
|
|
@ -1,3 +1,3 @@
|
|||
# SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
obj-y += tdx.o tdcall.o
|
||||
obj-y += tdx.o tdx-shared.o tdcall.o
|
||||
|
|
|
@ -0,0 +1,71 @@
|
|||
#include <asm/tdx.h>
|
||||
#include <asm/pgtable.h>
|
||||
|
||||
static unsigned long try_accept_one(phys_addr_t start, unsigned long len,
|
||||
enum pg_level pg_level)
|
||||
{
|
||||
unsigned long accept_size = page_level_size(pg_level);
|
||||
u64 tdcall_rcx;
|
||||
u8 page_size;
|
||||
|
||||
if (!IS_ALIGNED(start, accept_size))
|
||||
return 0;
|
||||
|
||||
if (len < accept_size)
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* Pass the page physical address to the TDX module to accept the
|
||||
* pending, private page.
|
||||
*
|
||||
* Bits 2:0 of RCX encode page size: 0 - 4K, 1 - 2M, 2 - 1G.
|
||||
*/
|
||||
switch (pg_level) {
|
||||
case PG_LEVEL_4K:
|
||||
page_size = 0;
|
||||
break;
|
||||
case PG_LEVEL_2M:
|
||||
page_size = 1;
|
||||
break;
|
||||
case PG_LEVEL_1G:
|
||||
page_size = 2;
|
||||
break;
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
|
||||
tdcall_rcx = start | page_size;
|
||||
if (__tdx_module_call(TDX_ACCEPT_PAGE, tdcall_rcx, 0, 0, 0, NULL))
|
||||
return 0;
|
||||
|
||||
return accept_size;
|
||||
}
|
||||
|
||||
bool tdx_accept_memory(phys_addr_t start, phys_addr_t end)
|
||||
{
|
||||
/*
|
||||
* For shared->private conversion, accept the page using
|
||||
* TDX_ACCEPT_PAGE TDX module call.
|
||||
*/
|
||||
while (start < end) {
|
||||
unsigned long len = end - start;
|
||||
unsigned long accept_size;
|
||||
|
||||
/*
|
||||
* Try larger accepts first. It gives chance to VMM to keep
|
||||
* 1G/2M Secure EPT entries where possible and speeds up
|
||||
* process by cutting number of hypercalls (if successful).
|
||||
*/
|
||||
|
||||
accept_size = try_accept_one(start, len, PG_LEVEL_1G);
|
||||
if (!accept_size)
|
||||
accept_size = try_accept_one(start, len, PG_LEVEL_2M);
|
||||
if (!accept_size)
|
||||
accept_size = try_accept_one(start, len, PG_LEVEL_4K);
|
||||
if (!accept_size)
|
||||
return false;
|
||||
start += accept_size;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
|
@ -14,20 +14,6 @@
|
|||
#include <asm/insn-eval.h>
|
||||
#include <asm/pgtable.h>
|
||||
|
||||
/* TDX module Call Leaf IDs */
|
||||
#define TDX_GET_INFO 1
|
||||
#define TDX_GET_VEINFO 3
|
||||
#define TDX_GET_REPORT 4
|
||||
#define TDX_ACCEPT_PAGE 6
|
||||
#define TDX_WR 8
|
||||
|
||||
/* TDCS fields. To be used by TDG.VM.WR and TDG.VM.RD module calls */
|
||||
#define TDCS_NOTIFY_ENABLES 0x9100000000000010
|
||||
|
||||
/* TDX hypercall Leaf IDs */
|
||||
#define TDVMCALL_MAP_GPA 0x10001
|
||||
#define TDVMCALL_REPORT_FATAL_ERROR 0x10003
|
||||
|
||||
/* MMIO direction */
|
||||
#define EPT_READ 0
|
||||
#define EPT_WRITE 1
|
||||
|
@ -51,24 +37,6 @@
|
|||
|
||||
#define TDREPORT_SUBTYPE_0 0
|
||||
|
||||
/*
|
||||
* Wrapper for standard use of __tdx_hypercall with no output aside from
|
||||
* return code.
|
||||
*/
|
||||
static inline u64 _tdx_hypercall(u64 fn, u64 r12, u64 r13, u64 r14, u64 r15)
|
||||
{
|
||||
struct tdx_hypercall_args args = {
|
||||
.r10 = TDX_HYPERCALL_STANDARD,
|
||||
.r11 = fn,
|
||||
.r12 = r12,
|
||||
.r13 = r13,
|
||||
.r14 = r14,
|
||||
.r15 = r15,
|
||||
};
|
||||
|
||||
return __tdx_hypercall(&args);
|
||||
}
|
||||
|
||||
/* Called from __tdx_hypercall() for unrecoverable failure */
|
||||
noinstr void __tdx_hypercall_failed(void)
|
||||
{
|
||||
|
@ -745,47 +713,6 @@ static bool tdx_cache_flush_required(void)
|
|||
return true;
|
||||
}
|
||||
|
||||
static bool try_accept_one(phys_addr_t *start, unsigned long len,
|
||||
enum pg_level pg_level)
|
||||
{
|
||||
unsigned long accept_size = page_level_size(pg_level);
|
||||
u64 tdcall_rcx;
|
||||
u8 page_size;
|
||||
|
||||
if (!IS_ALIGNED(*start, accept_size))
|
||||
return false;
|
||||
|
||||
if (len < accept_size)
|
||||
return false;
|
||||
|
||||
/*
|
||||
* Pass the page physical address to the TDX module to accept the
|
||||
* pending, private page.
|
||||
*
|
||||
* Bits 2:0 of RCX encode page size: 0 - 4K, 1 - 2M, 2 - 1G.
|
||||
*/
|
||||
switch (pg_level) {
|
||||
case PG_LEVEL_4K:
|
||||
page_size = 0;
|
||||
break;
|
||||
case PG_LEVEL_2M:
|
||||
page_size = 1;
|
||||
break;
|
||||
case PG_LEVEL_1G:
|
||||
page_size = 2;
|
||||
break;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
||||
tdcall_rcx = *start | page_size;
|
||||
if (__tdx_module_call(TDX_ACCEPT_PAGE, tdcall_rcx, 0, 0, 0, NULL))
|
||||
return false;
|
||||
|
||||
*start += accept_size;
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Inform the VMM of the guest's intent for this physical page: shared with
|
||||
* the VMM or private to the guest. The VMM is expected to change its mapping
|
||||
|
@ -810,32 +737,9 @@ static bool tdx_enc_status_changed(unsigned long vaddr, int numpages, bool enc)
|
|||
if (_tdx_hypercall(TDVMCALL_MAP_GPA, start, end - start, 0, 0))
|
||||
return false;
|
||||
|
||||
/* private->shared conversion requires only MapGPA call */
|
||||
if (!enc)
|
||||
return true;
|
||||
|
||||
/*
|
||||
* For shared->private conversion, accept the page using
|
||||
* TDX_ACCEPT_PAGE TDX module call.
|
||||
*/
|
||||
while (start < end) {
|
||||
unsigned long len = end - start;
|
||||
|
||||
/*
|
||||
* Try larger accepts first. It gives chance to VMM to keep
|
||||
* 1G/2M SEPT entries where possible and speeds up process by
|
||||
* cutting number of hypercalls (if successful).
|
||||
*/
|
||||
|
||||
if (try_accept_one(&start, len, PG_LEVEL_1G))
|
||||
continue;
|
||||
|
||||
if (try_accept_one(&start, len, PG_LEVEL_2M))
|
||||
continue;
|
||||
|
||||
if (!try_accept_one(&start, len, PG_LEVEL_4K))
|
||||
return false;
|
||||
}
|
||||
/* shared->private conversion requires memory to be accepted before use */
|
||||
if (enc)
|
||||
return tdx_accept_memory(start, end);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -31,6 +31,8 @@ extern unsigned long efi_mixed_mode_stack_pa;
|
|||
|
||||
#define ARCH_EFI_IRQ_FLAGS_MASK X86_EFLAGS_IF
|
||||
|
||||
#define EFI_UNACCEPTED_UNIT_SIZE PMD_SIZE
|
||||
|
||||
/*
|
||||
* The EFI services are called through variadic functions in many cases. These
|
||||
* functions are implemented in assembler and support only a fixed number of
|
||||
|
|
|
@ -106,8 +106,13 @@ enum psc_op {
|
|||
#define GHCB_HV_FT_SNP BIT_ULL(0)
|
||||
#define GHCB_HV_FT_SNP_AP_CREATION BIT_ULL(1)
|
||||
|
||||
/* SNP Page State Change NAE event */
|
||||
#define VMGEXIT_PSC_MAX_ENTRY 253
|
||||
/*
|
||||
* SNP Page State Change NAE event
|
||||
* The VMGEXIT_PSC_MAX_ENTRY determines the size of the PSC structure, which
|
||||
* is a local stack variable in set_pages_state(). Do not increase this value
|
||||
* without evaluating the impact to stack usage.
|
||||
*/
|
||||
#define VMGEXIT_PSC_MAX_ENTRY 64
|
||||
|
||||
struct psc_hdr {
|
||||
u16 cur_entry;
|
||||
|
|
|
@ -80,11 +80,15 @@ extern void vc_no_ghcb(void);
|
|||
extern void vc_boot_ghcb(void);
|
||||
extern bool handle_vc_boot_ghcb(struct pt_regs *regs);
|
||||
|
||||
/* PVALIDATE return codes */
|
||||
#define PVALIDATE_FAIL_SIZEMISMATCH 6
|
||||
|
||||
/* Software defined (when rFlags.CF = 1) */
|
||||
#define PVALIDATE_FAIL_NOUPDATE 255
|
||||
|
||||
/* RMP page size */
|
||||
#define RMP_PG_SIZE_4K 0
|
||||
#define RMP_PG_SIZE_2M 1
|
||||
|
||||
#define RMPADJUST_VMSA_PAGE_BIT BIT(16)
|
||||
|
||||
|
@ -192,16 +196,17 @@ struct snp_guest_request_ioctl;
|
|||
|
||||
void setup_ghcb(void);
|
||||
void __init early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr,
|
||||
unsigned int npages);
|
||||
unsigned long npages);
|
||||
void __init early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr,
|
||||
unsigned int npages);
|
||||
unsigned long npages);
|
||||
void __init snp_prep_memory(unsigned long paddr, unsigned int sz, enum psc_op op);
|
||||
void snp_set_memory_shared(unsigned long vaddr, unsigned int npages);
|
||||
void snp_set_memory_private(unsigned long vaddr, unsigned int npages);
|
||||
void snp_set_memory_shared(unsigned long vaddr, unsigned long npages);
|
||||
void snp_set_memory_private(unsigned long vaddr, unsigned long npages);
|
||||
void snp_set_wakeup_secondary_cpu(void);
|
||||
bool snp_init(struct boot_params *bp);
|
||||
void __init __noreturn snp_abort(void);
|
||||
int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, struct snp_guest_request_ioctl *rio);
|
||||
void snp_accept_memory(phys_addr_t start, phys_addr_t end);
|
||||
#else
|
||||
static inline void sev_es_ist_enter(struct pt_regs *regs) { }
|
||||
static inline void sev_es_ist_exit(void) { }
|
||||
|
@ -212,12 +217,12 @@ static inline int pvalidate(unsigned long vaddr, bool rmp_psize, bool validate)
|
|||
static inline int rmpadjust(unsigned long vaddr, bool rmp_psize, unsigned long attrs) { return 0; }
|
||||
static inline void setup_ghcb(void) { }
|
||||
static inline void __init
|
||||
early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr, unsigned int npages) { }
|
||||
early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr, unsigned long npages) { }
|
||||
static inline void __init
|
||||
early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr, unsigned int npages) { }
|
||||
early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr, unsigned long npages) { }
|
||||
static inline void __init snp_prep_memory(unsigned long paddr, unsigned int sz, enum psc_op op) { }
|
||||
static inline void snp_set_memory_shared(unsigned long vaddr, unsigned int npages) { }
|
||||
static inline void snp_set_memory_private(unsigned long vaddr, unsigned int npages) { }
|
||||
static inline void snp_set_memory_shared(unsigned long vaddr, unsigned long npages) { }
|
||||
static inline void snp_set_memory_private(unsigned long vaddr, unsigned long npages) { }
|
||||
static inline void snp_set_wakeup_secondary_cpu(void) { }
|
||||
static inline bool snp_init(struct boot_params *bp) { return false; }
|
||||
static inline void snp_abort(void) { }
|
||||
|
@ -225,6 +230,8 @@ static inline int snp_issue_guest_request(u64 exit_code, struct snp_req_data *in
|
|||
{
|
||||
return -ENOTTY;
|
||||
}
|
||||
|
||||
static inline void snp_accept_memory(phys_addr_t start, phys_addr_t end) { }
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
|
|
@ -10,6 +10,20 @@
|
|||
#define TDX_CPUID_LEAF_ID 0x21
|
||||
#define TDX_IDENT "IntelTDX "
|
||||
|
||||
/* TDX module Call Leaf IDs */
|
||||
#define TDX_GET_INFO 1
|
||||
#define TDX_GET_VEINFO 3
|
||||
#define TDX_GET_REPORT 4
|
||||
#define TDX_ACCEPT_PAGE 6
|
||||
#define TDX_WR 8
|
||||
|
||||
/* TDCS fields. To be used by TDG.VM.WR and TDG.VM.RD module calls */
|
||||
#define TDCS_NOTIFY_ENABLES 0x9100000000000010
|
||||
|
||||
/* TDX hypercall Leaf IDs */
|
||||
#define TDVMCALL_MAP_GPA 0x10001
|
||||
#define TDVMCALL_REPORT_FATAL_ERROR 0x10003
|
||||
|
||||
#ifndef __ASSEMBLY__
|
||||
|
||||
/*
|
||||
|
@ -37,8 +51,47 @@ struct tdx_hypercall_args {
|
|||
u64 __tdx_hypercall(struct tdx_hypercall_args *args);
|
||||
u64 __tdx_hypercall_ret(struct tdx_hypercall_args *args);
|
||||
|
||||
/*
|
||||
* Wrapper for standard use of __tdx_hypercall with no output aside from
|
||||
* return code.
|
||||
*/
|
||||
static inline u64 _tdx_hypercall(u64 fn, u64 r12, u64 r13, u64 r14, u64 r15)
|
||||
{
|
||||
struct tdx_hypercall_args args = {
|
||||
.r10 = TDX_HYPERCALL_STANDARD,
|
||||
.r11 = fn,
|
||||
.r12 = r12,
|
||||
.r13 = r13,
|
||||
.r14 = r14,
|
||||
.r15 = r15,
|
||||
};
|
||||
|
||||
return __tdx_hypercall(&args);
|
||||
}
|
||||
|
||||
|
||||
/* Called from __tdx_hypercall() for unrecoverable failure */
|
||||
void __tdx_hypercall_failed(void);
|
||||
|
||||
/*
|
||||
* Used in __tdx_module_call() to gather the output registers' values of the
|
||||
* TDCALL instruction when requesting services from the TDX module. This is a
|
||||
* software only structure and not part of the TDX module/VMM ABI
|
||||
*/
|
||||
struct tdx_module_output {
|
||||
u64 rcx;
|
||||
u64 rdx;
|
||||
u64 r8;
|
||||
u64 r9;
|
||||
u64 r10;
|
||||
u64 r11;
|
||||
};
|
||||
|
||||
/* Used to communicate with the TDX module */
|
||||
u64 __tdx_module_call(u64 fn, u64 rcx, u64 rdx, u64 r8, u64 r9,
|
||||
struct tdx_module_output *out);
|
||||
|
||||
bool tdx_accept_memory(phys_addr_t start, phys_addr_t end);
|
||||
|
||||
#endif /* !__ASSEMBLY__ */
|
||||
#endif /* _ASM_X86_SHARED_TDX_H */
|
||||
|
|
|
@ -5,6 +5,8 @@
|
|||
|
||||
#include <linux/init.h>
|
||||
#include <linux/bits.h>
|
||||
|
||||
#include <asm/errno.h>
|
||||
#include <asm/ptrace.h>
|
||||
#include <asm/shared/tdx.h>
|
||||
|
||||
|
@ -20,21 +22,6 @@
|
|||
|
||||
#ifndef __ASSEMBLY__
|
||||
|
||||
/*
|
||||
* Used to gather the output registers values of the TDCALL and SEAMCALL
|
||||
* instructions when requesting services from the TDX module.
|
||||
*
|
||||
* This is a software only structure and not part of the TDX module/VMM ABI.
|
||||
*/
|
||||
struct tdx_module_output {
|
||||
u64 rcx;
|
||||
u64 rdx;
|
||||
u64 r8;
|
||||
u64 r9;
|
||||
u64 r10;
|
||||
u64 r11;
|
||||
};
|
||||
|
||||
/*
|
||||
* Used by the #VE exception handler to gather the #VE exception
|
||||
* info from the TDX module. This is a software only structure
|
||||
|
@ -55,10 +42,6 @@ struct ve_info {
|
|||
|
||||
void __init tdx_early_init(void);
|
||||
|
||||
/* Used to communicate with the TDX module */
|
||||
u64 __tdx_module_call(u64 fn, u64 rcx, u64 rdx, u64 r8, u64 r9,
|
||||
struct tdx_module_output *out);
|
||||
|
||||
void tdx_get_ve_info(struct ve_info *ve);
|
||||
|
||||
bool tdx_handle_virt_exception(struct pt_regs *regs, struct ve_info *ve);
|
||||
|
|
|
@ -0,0 +1,27 @@
|
|||
#ifndef _ASM_X86_UNACCEPTED_MEMORY_H
|
||||
#define _ASM_X86_UNACCEPTED_MEMORY_H
|
||||
|
||||
#include <linux/efi.h>
|
||||
#include <asm/tdx.h>
|
||||
#include <asm/sev.h>
|
||||
|
||||
static inline void arch_accept_memory(phys_addr_t start, phys_addr_t end)
|
||||
{
|
||||
/* Platform-specific memory-acceptance call goes here */
|
||||
if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST)) {
|
||||
if (!tdx_accept_memory(start, end))
|
||||
panic("TDX: Failed to accept memory\n");
|
||||
} else if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) {
|
||||
snp_accept_memory(start, end);
|
||||
} else {
|
||||
panic("Cannot accept memory: unknown platform\n");
|
||||
}
|
||||
}
|
||||
|
||||
static inline struct efi_unaccepted_memory *efi_get_unaccepted_table(void)
|
||||
{
|
||||
if (efi.unaccepted == EFI_INVALID_TABLE_ADDR)
|
||||
return NULL;
|
||||
return __va(efi.unaccepted);
|
||||
}
|
||||
#endif
|
|
@ -12,6 +12,9 @@
|
|||
#ifndef __BOOT_COMPRESSED
|
||||
#define error(v) pr_err(v)
|
||||
#define has_cpuflag(f) boot_cpu_has(f)
|
||||
#else
|
||||
#undef WARN
|
||||
#define WARN(condition, format...) (!!(condition))
|
||||
#endif
|
||||
|
||||
/* I/O parameters for CPUID-related helpers */
|
||||
|
@ -991,3 +994,103 @@ static void __init setup_cpuid_table(const struct cc_blob_sev_info *cc_info)
|
|||
cpuid_ext_range_max = fn->eax;
|
||||
}
|
||||
}
|
||||
|
||||
static void pvalidate_pages(struct snp_psc_desc *desc)
|
||||
{
|
||||
struct psc_entry *e;
|
||||
unsigned long vaddr;
|
||||
unsigned int size;
|
||||
unsigned int i;
|
||||
bool validate;
|
||||
int rc;
|
||||
|
||||
for (i = 0; i <= desc->hdr.end_entry; i++) {
|
||||
e = &desc->entries[i];
|
||||
|
||||
vaddr = (unsigned long)pfn_to_kaddr(e->gfn);
|
||||
size = e->pagesize ? RMP_PG_SIZE_2M : RMP_PG_SIZE_4K;
|
||||
validate = e->operation == SNP_PAGE_STATE_PRIVATE;
|
||||
|
||||
rc = pvalidate(vaddr, size, validate);
|
||||
if (rc == PVALIDATE_FAIL_SIZEMISMATCH && size == RMP_PG_SIZE_2M) {
|
||||
unsigned long vaddr_end = vaddr + PMD_SIZE;
|
||||
|
||||
for (; vaddr < vaddr_end; vaddr += PAGE_SIZE) {
|
||||
rc = pvalidate(vaddr, RMP_PG_SIZE_4K, validate);
|
||||
if (rc)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (rc) {
|
||||
WARN(1, "Failed to validate address 0x%lx ret %d", vaddr, rc);
|
||||
sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PVALIDATE);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static int vmgexit_psc(struct ghcb *ghcb, struct snp_psc_desc *desc)
|
||||
{
|
||||
int cur_entry, end_entry, ret = 0;
|
||||
struct snp_psc_desc *data;
|
||||
struct es_em_ctxt ctxt;
|
||||
|
||||
vc_ghcb_invalidate(ghcb);
|
||||
|
||||
/* Copy the input desc into GHCB shared buffer */
|
||||
data = (struct snp_psc_desc *)ghcb->shared_buffer;
|
||||
memcpy(ghcb->shared_buffer, desc, min_t(int, GHCB_SHARED_BUF_SIZE, sizeof(*desc)));
|
||||
|
||||
/*
|
||||
* As per the GHCB specification, the hypervisor can resume the guest
|
||||
* before processing all the entries. Check whether all the entries
|
||||
* are processed. If not, then keep retrying. Note, the hypervisor
|
||||
* will update the data memory directly to indicate the status, so
|
||||
* reference the data->hdr everywhere.
|
||||
*
|
||||
* The strategy here is to wait for the hypervisor to change the page
|
||||
* state in the RMP table before guest accesses the memory pages. If the
|
||||
* page state change was not successful, then later memory access will
|
||||
* result in a crash.
|
||||
*/
|
||||
cur_entry = data->hdr.cur_entry;
|
||||
end_entry = data->hdr.end_entry;
|
||||
|
||||
while (data->hdr.cur_entry <= data->hdr.end_entry) {
|
||||
ghcb_set_sw_scratch(ghcb, (u64)__pa(data));
|
||||
|
||||
/* This will advance the shared buffer data points to. */
|
||||
ret = sev_es_ghcb_hv_call(ghcb, &ctxt, SVM_VMGEXIT_PSC, 0, 0);
|
||||
|
||||
/*
|
||||
* Page State Change VMGEXIT can pass error code through
|
||||
* exit_info_2.
|
||||
*/
|
||||
if (WARN(ret || ghcb->save.sw_exit_info_2,
|
||||
"SNP: PSC failed ret=%d exit_info_2=%llx\n",
|
||||
ret, ghcb->save.sw_exit_info_2)) {
|
||||
ret = 1;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Verify that reserved bit is not set */
|
||||
if (WARN(data->hdr.reserved, "Reserved bit is set in the PSC header\n")) {
|
||||
ret = 1;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* Sanity check that entry processing is not going backwards.
|
||||
* This will happen only if hypervisor is tricking us.
|
||||
*/
|
||||
if (WARN(data->hdr.end_entry > end_entry || cur_entry > data->hdr.cur_entry,
|
||||
"SNP: PSC processing going backward, end_entry %d (got %d) cur_entry %d (got %d)\n",
|
||||
end_entry, data->hdr.end_entry, cur_entry, data->hdr.cur_entry)) {
|
||||
ret = 1;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
|
|
@ -119,7 +119,19 @@ static DEFINE_PER_CPU(struct sev_es_save_area *, sev_vmsa);
|
|||
|
||||
struct sev_config {
|
||||
__u64 debug : 1,
|
||||
__reserved : 63;
|
||||
|
||||
/*
|
||||
* A flag used by __set_pages_state() that indicates when the
|
||||
* per-CPU GHCB has been created and registered and thus can be
|
||||
* used by the BSP instead of the early boot GHCB.
|
||||
*
|
||||
* For APs, the per-CPU GHCB is created before they are started
|
||||
* and registered upon startup, so this flag can be used globally
|
||||
* for the BSP and APs.
|
||||
*/
|
||||
ghcbs_initialized : 1,
|
||||
|
||||
__reserved : 62;
|
||||
};
|
||||
|
||||
static struct sev_config sev_cfg __read_mostly;
|
||||
|
@ -645,32 +657,26 @@ static u64 __init get_jump_table_addr(void)
|
|||
return ret;
|
||||
}
|
||||
|
||||
static void pvalidate_pages(unsigned long vaddr, unsigned int npages, bool validate)
|
||||
{
|
||||
unsigned long vaddr_end;
|
||||
int rc;
|
||||
|
||||
vaddr = vaddr & PAGE_MASK;
|
||||
vaddr_end = vaddr + (npages << PAGE_SHIFT);
|
||||
|
||||
while (vaddr < vaddr_end) {
|
||||
rc = pvalidate(vaddr, RMP_PG_SIZE_4K, validate);
|
||||
if (WARN(rc, "Failed to validate address 0x%lx ret %d", vaddr, rc))
|
||||
sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PVALIDATE);
|
||||
|
||||
vaddr = vaddr + PAGE_SIZE;
|
||||
}
|
||||
}
|
||||
|
||||
static void __init early_set_pages_state(unsigned long paddr, unsigned int npages, enum psc_op op)
|
||||
static void early_set_pages_state(unsigned long vaddr, unsigned long paddr,
|
||||
unsigned long npages, enum psc_op op)
|
||||
{
|
||||
unsigned long paddr_end;
|
||||
u64 val;
|
||||
int ret;
|
||||
|
||||
vaddr = vaddr & PAGE_MASK;
|
||||
|
||||
paddr = paddr & PAGE_MASK;
|
||||
paddr_end = paddr + (npages << PAGE_SHIFT);
|
||||
|
||||
while (paddr < paddr_end) {
|
||||
if (op == SNP_PAGE_STATE_SHARED) {
|
||||
/* Page validation must be rescinded before changing to shared */
|
||||
ret = pvalidate(vaddr, RMP_PG_SIZE_4K, false);
|
||||
if (WARN(ret, "Failed to validate address 0x%lx ret %d", paddr, ret))
|
||||
goto e_term;
|
||||
}
|
||||
|
||||
/*
|
||||
* Use the MSR protocol because this function can be called before
|
||||
* the GHCB is established.
|
||||
|
@ -691,7 +697,15 @@ static void __init early_set_pages_state(unsigned long paddr, unsigned int npage
|
|||
paddr, GHCB_MSR_PSC_RESP_VAL(val)))
|
||||
goto e_term;
|
||||
|
||||
paddr = paddr + PAGE_SIZE;
|
||||
if (op == SNP_PAGE_STATE_PRIVATE) {
|
||||
/* Page validation must be performed after changing to private */
|
||||
ret = pvalidate(vaddr, RMP_PG_SIZE_4K, true);
|
||||
if (WARN(ret, "Failed to validate address 0x%lx ret %d", paddr, ret))
|
||||
goto e_term;
|
||||
}
|
||||
|
||||
vaddr += PAGE_SIZE;
|
||||
paddr += PAGE_SIZE;
|
||||
}
|
||||
|
||||
return;
|
||||
|
@ -701,7 +715,7 @@ e_term:
|
|||
}
|
||||
|
||||
void __init early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr,
|
||||
unsigned int npages)
|
||||
unsigned long npages)
|
||||
{
|
||||
/*
|
||||
* This can be invoked in early boot while running identity mapped, so
|
||||
|
@ -716,14 +730,11 @@ void __init early_snp_set_memory_private(unsigned long vaddr, unsigned long padd
|
|||
* Ask the hypervisor to mark the memory pages as private in the RMP
|
||||
* table.
|
||||
*/
|
||||
early_set_pages_state(paddr, npages, SNP_PAGE_STATE_PRIVATE);
|
||||
|
||||
/* Validate the memory pages after they've been added in the RMP table. */
|
||||
pvalidate_pages(vaddr, npages, true);
|
||||
early_set_pages_state(vaddr, paddr, npages, SNP_PAGE_STATE_PRIVATE);
|
||||
}
|
||||
|
||||
void __init early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr,
|
||||
unsigned int npages)
|
||||
unsigned long npages)
|
||||
{
|
||||
/*
|
||||
* This can be invoked in early boot while running identity mapped, so
|
||||
|
@ -734,11 +745,8 @@ void __init early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr
|
|||
if (!(sev_status & MSR_AMD64_SEV_SNP_ENABLED))
|
||||
return;
|
||||
|
||||
/* Invalidate the memory pages before they are marked shared in the RMP table. */
|
||||
pvalidate_pages(vaddr, npages, false);
|
||||
|
||||
/* Ask hypervisor to mark the memory pages shared in the RMP table. */
|
||||
early_set_pages_state(paddr, npages, SNP_PAGE_STATE_SHARED);
|
||||
early_set_pages_state(vaddr, paddr, npages, SNP_PAGE_STATE_SHARED);
|
||||
}
|
||||
|
||||
void __init snp_prep_memory(unsigned long paddr, unsigned int sz, enum psc_op op)
|
||||
|
@ -756,96 +764,16 @@ void __init snp_prep_memory(unsigned long paddr, unsigned int sz, enum psc_op op
|
|||
WARN(1, "invalid memory op %d\n", op);
|
||||
}
|
||||
|
||||
static int vmgexit_psc(struct snp_psc_desc *desc)
|
||||
static unsigned long __set_pages_state(struct snp_psc_desc *data, unsigned long vaddr,
|
||||
unsigned long vaddr_end, int op)
|
||||
{
|
||||
int cur_entry, end_entry, ret = 0;
|
||||
struct snp_psc_desc *data;
|
||||
struct ghcb_state state;
|
||||
struct es_em_ctxt ctxt;
|
||||
unsigned long flags;
|
||||
struct ghcb *ghcb;
|
||||
|
||||
/*
|
||||
* __sev_get_ghcb() needs to run with IRQs disabled because it is using
|
||||
* a per-CPU GHCB.
|
||||
*/
|
||||
local_irq_save(flags);
|
||||
|
||||
ghcb = __sev_get_ghcb(&state);
|
||||
if (!ghcb) {
|
||||
ret = 1;
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
/* Copy the input desc into GHCB shared buffer */
|
||||
data = (struct snp_psc_desc *)ghcb->shared_buffer;
|
||||
memcpy(ghcb->shared_buffer, desc, min_t(int, GHCB_SHARED_BUF_SIZE, sizeof(*desc)));
|
||||
|
||||
/*
|
||||
* As per the GHCB specification, the hypervisor can resume the guest
|
||||
* before processing all the entries. Check whether all the entries
|
||||
* are processed. If not, then keep retrying. Note, the hypervisor
|
||||
* will update the data memory directly to indicate the status, so
|
||||
* reference the data->hdr everywhere.
|
||||
*
|
||||
* The strategy here is to wait for the hypervisor to change the page
|
||||
* state in the RMP table before guest accesses the memory pages. If the
|
||||
* page state change was not successful, then later memory access will
|
||||
* result in a crash.
|
||||
*/
|
||||
cur_entry = data->hdr.cur_entry;
|
||||
end_entry = data->hdr.end_entry;
|
||||
|
||||
while (data->hdr.cur_entry <= data->hdr.end_entry) {
|
||||
ghcb_set_sw_scratch(ghcb, (u64)__pa(data));
|
||||
|
||||
/* This will advance the shared buffer data points to. */
|
||||
ret = sev_es_ghcb_hv_call(ghcb, &ctxt, SVM_VMGEXIT_PSC, 0, 0);
|
||||
|
||||
/*
|
||||
* Page State Change VMGEXIT can pass error code through
|
||||
* exit_info_2.
|
||||
*/
|
||||
if (WARN(ret || ghcb->save.sw_exit_info_2,
|
||||
"SNP: PSC failed ret=%d exit_info_2=%llx\n",
|
||||
ret, ghcb->save.sw_exit_info_2)) {
|
||||
ret = 1;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Verify that reserved bit is not set */
|
||||
if (WARN(data->hdr.reserved, "Reserved bit is set in the PSC header\n")) {
|
||||
ret = 1;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* Sanity check that entry processing is not going backwards.
|
||||
* This will happen only if hypervisor is tricking us.
|
||||
*/
|
||||
if (WARN(data->hdr.end_entry > end_entry || cur_entry > data->hdr.cur_entry,
|
||||
"SNP: PSC processing going backward, end_entry %d (got %d) cur_entry %d (got %d)\n",
|
||||
end_entry, data->hdr.end_entry, cur_entry, data->hdr.cur_entry)) {
|
||||
ret = 1;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
out:
|
||||
__sev_put_ghcb(&state);
|
||||
|
||||
out_unlock:
|
||||
local_irq_restore(flags);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void __set_pages_state(struct snp_psc_desc *data, unsigned long vaddr,
|
||||
unsigned long vaddr_end, int op)
|
||||
{
|
||||
bool use_large_entry;
|
||||
struct psc_hdr *hdr;
|
||||
struct psc_entry *e;
|
||||
unsigned long flags;
|
||||
unsigned long pfn;
|
||||
struct ghcb *ghcb;
|
||||
int i;
|
||||
|
||||
hdr = &data->hdr;
|
||||
|
@ -854,74 +782,104 @@ static void __set_pages_state(struct snp_psc_desc *data, unsigned long vaddr,
|
|||
memset(data, 0, sizeof(*data));
|
||||
i = 0;
|
||||
|
||||
while (vaddr < vaddr_end) {
|
||||
if (is_vmalloc_addr((void *)vaddr))
|
||||
while (vaddr < vaddr_end && i < ARRAY_SIZE(data->entries)) {
|
||||
hdr->end_entry = i;
|
||||
|
||||
if (is_vmalloc_addr((void *)vaddr)) {
|
||||
pfn = vmalloc_to_pfn((void *)vaddr);
|
||||
else
|
||||
use_large_entry = false;
|
||||
} else {
|
||||
pfn = __pa(vaddr) >> PAGE_SHIFT;
|
||||
use_large_entry = true;
|
||||
}
|
||||
|
||||
e->gfn = pfn;
|
||||
e->operation = op;
|
||||
hdr->end_entry = i;
|
||||
|
||||
/*
|
||||
* Current SNP implementation doesn't keep track of the RMP page
|
||||
* size so use 4K for simplicity.
|
||||
*/
|
||||
e->pagesize = RMP_PG_SIZE_4K;
|
||||
if (use_large_entry && IS_ALIGNED(vaddr, PMD_SIZE) &&
|
||||
(vaddr_end - vaddr) >= PMD_SIZE) {
|
||||
e->pagesize = RMP_PG_SIZE_2M;
|
||||
vaddr += PMD_SIZE;
|
||||
} else {
|
||||
e->pagesize = RMP_PG_SIZE_4K;
|
||||
vaddr += PAGE_SIZE;
|
||||
}
|
||||
|
||||
vaddr = vaddr + PAGE_SIZE;
|
||||
e++;
|
||||
i++;
|
||||
}
|
||||
|
||||
if (vmgexit_psc(data))
|
||||
/* Page validation must be rescinded before changing to shared */
|
||||
if (op == SNP_PAGE_STATE_SHARED)
|
||||
pvalidate_pages(data);
|
||||
|
||||
local_irq_save(flags);
|
||||
|
||||
if (sev_cfg.ghcbs_initialized)
|
||||
ghcb = __sev_get_ghcb(&state);
|
||||
else
|
||||
ghcb = boot_ghcb;
|
||||
|
||||
/* Invoke the hypervisor to perform the page state changes */
|
||||
if (!ghcb || vmgexit_psc(ghcb, data))
|
||||
sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PSC);
|
||||
|
||||
if (sev_cfg.ghcbs_initialized)
|
||||
__sev_put_ghcb(&state);
|
||||
|
||||
local_irq_restore(flags);
|
||||
|
||||
/* Page validation must be performed after changing to private */
|
||||
if (op == SNP_PAGE_STATE_PRIVATE)
|
||||
pvalidate_pages(data);
|
||||
|
||||
return vaddr;
|
||||
}
|
||||
|
||||
static void set_pages_state(unsigned long vaddr, unsigned int npages, int op)
|
||||
static void set_pages_state(unsigned long vaddr, unsigned long npages, int op)
|
||||
{
|
||||
unsigned long vaddr_end, next_vaddr;
|
||||
struct snp_psc_desc *desc;
|
||||
struct snp_psc_desc desc;
|
||||
unsigned long vaddr_end;
|
||||
|
||||
desc = kmalloc(sizeof(*desc), GFP_KERNEL_ACCOUNT);
|
||||
if (!desc)
|
||||
panic("SNP: failed to allocate memory for PSC descriptor\n");
|
||||
/* Use the MSR protocol when a GHCB is not available. */
|
||||
if (!boot_ghcb)
|
||||
return early_set_pages_state(vaddr, __pa(vaddr), npages, op);
|
||||
|
||||
vaddr = vaddr & PAGE_MASK;
|
||||
vaddr_end = vaddr + (npages << PAGE_SHIFT);
|
||||
|
||||
while (vaddr < vaddr_end) {
|
||||
/* Calculate the last vaddr that fits in one struct snp_psc_desc. */
|
||||
next_vaddr = min_t(unsigned long, vaddr_end,
|
||||
(VMGEXIT_PSC_MAX_ENTRY * PAGE_SIZE) + vaddr);
|
||||
|
||||
__set_pages_state(desc, vaddr, next_vaddr, op);
|
||||
|
||||
vaddr = next_vaddr;
|
||||
}
|
||||
|
||||
kfree(desc);
|
||||
while (vaddr < vaddr_end)
|
||||
vaddr = __set_pages_state(&desc, vaddr, vaddr_end, op);
|
||||
}
|
||||
|
||||
void snp_set_memory_shared(unsigned long vaddr, unsigned int npages)
|
||||
void snp_set_memory_shared(unsigned long vaddr, unsigned long npages)
|
||||
{
|
||||
if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
|
||||
return;
|
||||
|
||||
pvalidate_pages(vaddr, npages, false);
|
||||
|
||||
set_pages_state(vaddr, npages, SNP_PAGE_STATE_SHARED);
|
||||
}
|
||||
|
||||
void snp_set_memory_private(unsigned long vaddr, unsigned int npages)
|
||||
void snp_set_memory_private(unsigned long vaddr, unsigned long npages)
|
||||
{
|
||||
if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
|
||||
return;
|
||||
|
||||
set_pages_state(vaddr, npages, SNP_PAGE_STATE_PRIVATE);
|
||||
}
|
||||
|
||||
pvalidate_pages(vaddr, npages, true);
|
||||
void snp_accept_memory(phys_addr_t start, phys_addr_t end)
|
||||
{
|
||||
unsigned long vaddr;
|
||||
unsigned int npages;
|
||||
|
||||
if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
|
||||
return;
|
||||
|
||||
vaddr = (unsigned long)__va(start);
|
||||
npages = (end - start) >> PAGE_SHIFT;
|
||||
|
||||
set_pages_state(vaddr, npages, SNP_PAGE_STATE_PRIVATE);
|
||||
}
|
||||
|
||||
static int snp_set_vmsa(void *va, bool vmsa)
|
||||
|
@ -1267,6 +1225,8 @@ void setup_ghcb(void)
|
|||
if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
|
||||
snp_register_per_cpu_ghcb();
|
||||
|
||||
sev_cfg.ghcbs_initialized = true;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
|
@ -96,6 +96,9 @@ static const unsigned long * const efi_tables[] = {
|
|||
#ifdef CONFIG_EFI_COCO_SECRET
|
||||
&efi.coco_secret,
|
||||
#endif
|
||||
#ifdef CONFIG_UNACCEPTED_MEMORY
|
||||
&efi.unaccepted,
|
||||
#endif
|
||||
};
|
||||
|
||||
u64 efi_setup; /* efi setup_data physical address */
|
||||
|
|
|
@ -448,6 +448,9 @@ static ssize_t node_read_meminfo(struct device *dev,
|
|||
"Node %d ShmemPmdMapped: %8lu kB\n"
|
||||
"Node %d FileHugePages: %8lu kB\n"
|
||||
"Node %d FilePmdMapped: %8lu kB\n"
|
||||
#endif
|
||||
#ifdef CONFIG_UNACCEPTED_MEMORY
|
||||
"Node %d Unaccepted: %8lu kB\n"
|
||||
#endif
|
||||
,
|
||||
nid, K(node_page_state(pgdat, NR_FILE_DIRTY)),
|
||||
|
@ -477,6 +480,10 @@ static ssize_t node_read_meminfo(struct device *dev,
|
|||
nid, K(node_page_state(pgdat, NR_SHMEM_PMDMAPPED)),
|
||||
nid, K(node_page_state(pgdat, NR_FILE_THPS)),
|
||||
nid, K(node_page_state(pgdat, NR_FILE_PMDMAPPED))
|
||||
#endif
|
||||
#ifdef CONFIG_UNACCEPTED_MEMORY
|
||||
,
|
||||
nid, K(sum_zone_node_page_state(nid, NR_UNACCEPTED))
|
||||
#endif
|
||||
);
|
||||
len += hugetlb_report_node_meminfo(buf, len, nid);
|
||||
|
|
|
@ -269,6 +269,20 @@ config EFI_COCO_SECRET
|
|||
virt/coco/efi_secret module to access the secrets, which in turn
|
||||
allows userspace programs to access the injected secrets.
|
||||
|
||||
config UNACCEPTED_MEMORY
|
||||
bool
|
||||
depends on EFI_STUB
|
||||
help
|
||||
Some Virtual Machine platforms, such as Intel TDX, require
|
||||
some memory to be "accepted" by the guest before it can be used.
|
||||
This mechanism helps prevent malicious hosts from making changes
|
||||
to guest memory.
|
||||
|
||||
UEFI specification v2.9 introduced EFI_UNACCEPTED_MEMORY memory type.
|
||||
|
||||
This option adds support for unaccepted memory and makes such memory
|
||||
usable by the kernel.
|
||||
|
||||
config EFI_EMBEDDED_FIRMWARE
|
||||
bool
|
||||
select CRYPTO_LIB_SHA256
|
||||
|
|
|
@ -41,3 +41,4 @@ obj-$(CONFIG_EFI_CAPSULE_LOADER) += capsule-loader.o
|
|||
obj-$(CONFIG_EFI_EARLYCON) += earlycon.o
|
||||
obj-$(CONFIG_UEFI_CPER_ARM) += cper-arm.o
|
||||
obj-$(CONFIG_UEFI_CPER_X86) += cper-x86.o
|
||||
obj-$(CONFIG_UNACCEPTED_MEMORY) += unaccepted_memory.o
|
||||
|
|
|
@ -50,6 +50,9 @@ struct efi __read_mostly efi = {
|
|||
#ifdef CONFIG_EFI_COCO_SECRET
|
||||
.coco_secret = EFI_INVALID_TABLE_ADDR,
|
||||
#endif
|
||||
#ifdef CONFIG_UNACCEPTED_MEMORY
|
||||
.unaccepted = EFI_INVALID_TABLE_ADDR,
|
||||
#endif
|
||||
};
|
||||
EXPORT_SYMBOL(efi);
|
||||
|
||||
|
@ -584,6 +587,9 @@ static const efi_config_table_type_t common_tables[] __initconst = {
|
|||
#ifdef CONFIG_EFI_COCO_SECRET
|
||||
{LINUX_EFI_COCO_SECRET_AREA_GUID, &efi.coco_secret, "CocoSecret" },
|
||||
#endif
|
||||
#ifdef CONFIG_UNACCEPTED_MEMORY
|
||||
{LINUX_EFI_UNACCEPTED_MEM_TABLE_GUID, &efi.unaccepted, "Unaccepted" },
|
||||
#endif
|
||||
#ifdef CONFIG_EFI_GENERIC_STUB
|
||||
{LINUX_EFI_SCREEN_INFO_TABLE_GUID, &screen_info_table },
|
||||
#endif
|
||||
|
@ -738,6 +744,25 @@ int __init efi_config_parse_tables(const efi_config_table_t *config_tables,
|
|||
}
|
||||
}
|
||||
|
||||
if (IS_ENABLED(CONFIG_UNACCEPTED_MEMORY) &&
|
||||
efi.unaccepted != EFI_INVALID_TABLE_ADDR) {
|
||||
struct efi_unaccepted_memory *unaccepted;
|
||||
|
||||
unaccepted = early_memremap(efi.unaccepted, sizeof(*unaccepted));
|
||||
if (unaccepted) {
|
||||
unsigned long size;
|
||||
|
||||
if (unaccepted->version == 1) {
|
||||
size = sizeof(*unaccepted) + unaccepted->size;
|
||||
memblock_reserve(efi.unaccepted, size);
|
||||
} else {
|
||||
efi.unaccepted = EFI_INVALID_TABLE_ADDR;
|
||||
}
|
||||
|
||||
early_memunmap(unaccepted, sizeof(*unaccepted));
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -822,6 +847,7 @@ static __initdata char memory_type_name[][13] = {
|
|||
"MMIO Port",
|
||||
"PAL Code",
|
||||
"Persistent",
|
||||
"Unaccepted",
|
||||
};
|
||||
|
||||
char * __init efi_md_typeattr_format(char *buf, size_t size,
|
||||
|
|
|
@ -96,6 +96,8 @@ CFLAGS_arm32-stub.o := -DTEXT_OFFSET=$(TEXT_OFFSET)
|
|||
zboot-obj-$(CONFIG_RISCV) := lib-clz_ctz.o lib-ashldi3.o
|
||||
lib-$(CONFIG_EFI_ZBOOT) += zboot.o $(zboot-obj-y)
|
||||
|
||||
lib-$(CONFIG_UNACCEPTED_MEMORY) += unaccepted_memory.o bitmap.o find.o
|
||||
|
||||
extra-y := $(lib-y)
|
||||
lib-y := $(patsubst %.o,%.stub.o,$(lib-y))
|
||||
|
||||
|
|
|
@ -0,0 +1,41 @@
|
|||
#include <linux/bitmap.h>
|
||||
|
||||
void __bitmap_set(unsigned long *map, unsigned int start, int len)
|
||||
{
|
||||
unsigned long *p = map + BIT_WORD(start);
|
||||
const unsigned int size = start + len;
|
||||
int bits_to_set = BITS_PER_LONG - (start % BITS_PER_LONG);
|
||||
unsigned long mask_to_set = BITMAP_FIRST_WORD_MASK(start);
|
||||
|
||||
while (len - bits_to_set >= 0) {
|
||||
*p |= mask_to_set;
|
||||
len -= bits_to_set;
|
||||
bits_to_set = BITS_PER_LONG;
|
||||
mask_to_set = ~0UL;
|
||||
p++;
|
||||
}
|
||||
if (len) {
|
||||
mask_to_set &= BITMAP_LAST_WORD_MASK(size);
|
||||
*p |= mask_to_set;
|
||||
}
|
||||
}
|
||||
|
||||
void __bitmap_clear(unsigned long *map, unsigned int start, int len)
|
||||
{
|
||||
unsigned long *p = map + BIT_WORD(start);
|
||||
const unsigned int size = start + len;
|
||||
int bits_to_clear = BITS_PER_LONG - (start % BITS_PER_LONG);
|
||||
unsigned long mask_to_clear = BITMAP_FIRST_WORD_MASK(start);
|
||||
|
||||
while (len - bits_to_clear >= 0) {
|
||||
*p &= ~mask_to_clear;
|
||||
len -= bits_to_clear;
|
||||
bits_to_clear = BITS_PER_LONG;
|
||||
mask_to_clear = ~0UL;
|
||||
p++;
|
||||
}
|
||||
if (len) {
|
||||
mask_to_clear &= BITMAP_LAST_WORD_MASK(size);
|
||||
*p &= ~mask_to_clear;
|
||||
}
|
||||
}
|
|
@ -1136,4 +1136,10 @@ void efi_remap_image(unsigned long image_base, unsigned alloc_size,
|
|||
asmlinkage efi_status_t __efiapi
|
||||
efi_zboot_entry(efi_handle_t handle, efi_system_table_t *systab);
|
||||
|
||||
efi_status_t allocate_unaccepted_bitmap(__u32 nr_desc,
|
||||
struct efi_boot_memmap *map);
|
||||
void process_unaccepted_memory(u64 start, u64 end);
|
||||
void accept_memory(phys_addr_t start, phys_addr_t end);
|
||||
void arch_accept_memory(phys_addr_t start, phys_addr_t end);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -0,0 +1,43 @@
|
|||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
#include <linux/bitmap.h>
|
||||
#include <linux/math.h>
|
||||
#include <linux/minmax.h>
|
||||
|
||||
/*
|
||||
* Common helper for find_next_bit() function family
|
||||
* @FETCH: The expression that fetches and pre-processes each word of bitmap(s)
|
||||
* @MUNGE: The expression that post-processes a word containing found bit (may be empty)
|
||||
* @size: The bitmap size in bits
|
||||
* @start: The bitnumber to start searching at
|
||||
*/
|
||||
#define FIND_NEXT_BIT(FETCH, MUNGE, size, start) \
|
||||
({ \
|
||||
unsigned long mask, idx, tmp, sz = (size), __start = (start); \
|
||||
\
|
||||
if (unlikely(__start >= sz)) \
|
||||
goto out; \
|
||||
\
|
||||
mask = MUNGE(BITMAP_FIRST_WORD_MASK(__start)); \
|
||||
idx = __start / BITS_PER_LONG; \
|
||||
\
|
||||
for (tmp = (FETCH) & mask; !tmp; tmp = (FETCH)) { \
|
||||
if ((idx + 1) * BITS_PER_LONG >= sz) \
|
||||
goto out; \
|
||||
idx++; \
|
||||
} \
|
||||
\
|
||||
sz = min(idx * BITS_PER_LONG + __ffs(MUNGE(tmp)), sz); \
|
||||
out: \
|
||||
sz; \
|
||||
})
|
||||
|
||||
unsigned long _find_next_bit(const unsigned long *addr, unsigned long nbits, unsigned long start)
|
||||
{
|
||||
return FIND_NEXT_BIT(addr[idx], /* nop */, nbits, start);
|
||||
}
|
||||
|
||||
unsigned long _find_next_zero_bit(const unsigned long *addr, unsigned long nbits,
|
||||
unsigned long start)
|
||||
{
|
||||
return FIND_NEXT_BIT(~addr[idx], /* nop */, nbits, start);
|
||||
}
|
|
@ -0,0 +1,222 @@
|
|||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
|
||||
#include <linux/efi.h>
|
||||
#include <asm/efi.h>
|
||||
#include "efistub.h"
|
||||
|
||||
struct efi_unaccepted_memory *unaccepted_table;
|
||||
|
||||
efi_status_t allocate_unaccepted_bitmap(__u32 nr_desc,
|
||||
struct efi_boot_memmap *map)
|
||||
{
|
||||
efi_guid_t unaccepted_table_guid = LINUX_EFI_UNACCEPTED_MEM_TABLE_GUID;
|
||||
u64 unaccepted_start = ULLONG_MAX, unaccepted_end = 0, bitmap_size;
|
||||
efi_status_t status;
|
||||
int i;
|
||||
|
||||
/* Check if the table is already installed */
|
||||
unaccepted_table = get_efi_config_table(unaccepted_table_guid);
|
||||
if (unaccepted_table) {
|
||||
if (unaccepted_table->version != 1) {
|
||||
efi_err("Unknown version of unaccepted memory table\n");
|
||||
return EFI_UNSUPPORTED;
|
||||
}
|
||||
return EFI_SUCCESS;
|
||||
}
|
||||
|
||||
/* Check if there's any unaccepted memory and find the max address */
|
||||
for (i = 0; i < nr_desc; i++) {
|
||||
efi_memory_desc_t *d;
|
||||
unsigned long m = (unsigned long)map->map;
|
||||
|
||||
d = efi_early_memdesc_ptr(m, map->desc_size, i);
|
||||
if (d->type != EFI_UNACCEPTED_MEMORY)
|
||||
continue;
|
||||
|
||||
unaccepted_start = min(unaccepted_start, d->phys_addr);
|
||||
unaccepted_end = max(unaccepted_end,
|
||||
d->phys_addr + d->num_pages * PAGE_SIZE);
|
||||
}
|
||||
|
||||
if (unaccepted_start == ULLONG_MAX)
|
||||
return EFI_SUCCESS;
|
||||
|
||||
unaccepted_start = round_down(unaccepted_start,
|
||||
EFI_UNACCEPTED_UNIT_SIZE);
|
||||
unaccepted_end = round_up(unaccepted_end, EFI_UNACCEPTED_UNIT_SIZE);
|
||||
|
||||
/*
|
||||
* If unaccepted memory is present, allocate a bitmap to track what
|
||||
* memory has to be accepted before access.
|
||||
*
|
||||
* One bit in the bitmap represents 2MiB in the address space:
|
||||
* A 4k bitmap can track 64GiB of physical address space.
|
||||
*
|
||||
* In the worst case scenario -- a huge hole in the middle of the
|
||||
* address space -- It needs 256MiB to handle 4PiB of the address
|
||||
* space.
|
||||
*
|
||||
* The bitmap will be populated in setup_e820() according to the memory
|
||||
* map after efi_exit_boot_services().
|
||||
*/
|
||||
bitmap_size = DIV_ROUND_UP(unaccepted_end - unaccepted_start,
|
||||
EFI_UNACCEPTED_UNIT_SIZE * BITS_PER_BYTE);
|
||||
|
||||
status = efi_bs_call(allocate_pool, EFI_LOADER_DATA,
|
||||
sizeof(*unaccepted_table) + bitmap_size,
|
||||
(void **)&unaccepted_table);
|
||||
if (status != EFI_SUCCESS) {
|
||||
efi_err("Failed to allocate unaccepted memory config table\n");
|
||||
return status;
|
||||
}
|
||||
|
||||
unaccepted_table->version = 1;
|
||||
unaccepted_table->unit_size = EFI_UNACCEPTED_UNIT_SIZE;
|
||||
unaccepted_table->phys_base = unaccepted_start;
|
||||
unaccepted_table->size = bitmap_size;
|
||||
memset(unaccepted_table->bitmap, 0, bitmap_size);
|
||||
|
||||
status = efi_bs_call(install_configuration_table,
|
||||
&unaccepted_table_guid, unaccepted_table);
|
||||
if (status != EFI_SUCCESS) {
|
||||
efi_bs_call(free_pool, unaccepted_table);
|
||||
efi_err("Failed to install unaccepted memory config table!\n");
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
/*
|
||||
* The accepted memory bitmap only works at unit_size granularity. Take
|
||||
* unaligned start/end addresses and either:
|
||||
* 1. Accepts the memory immediately and in its entirety
|
||||
* 2. Accepts unaligned parts, and marks *some* aligned part unaccepted
|
||||
*
|
||||
* The function will never reach the bitmap_set() with zero bits to set.
|
||||
*/
|
||||
void process_unaccepted_memory(u64 start, u64 end)
|
||||
{
|
||||
u64 unit_size = unaccepted_table->unit_size;
|
||||
u64 unit_mask = unaccepted_table->unit_size - 1;
|
||||
u64 bitmap_size = unaccepted_table->size;
|
||||
|
||||
/*
|
||||
* Ensure that at least one bit will be set in the bitmap by
|
||||
* immediately accepting all regions under 2*unit_size. This is
|
||||
* imprecise and may immediately accept some areas that could
|
||||
* have been represented in the bitmap. But, results in simpler
|
||||
* code below
|
||||
*
|
||||
* Consider case like this (assuming unit_size == 2MB):
|
||||
*
|
||||
* | 4k | 2044k | 2048k |
|
||||
* ^ 0x0 ^ 2MB ^ 4MB
|
||||
*
|
||||
* Only the first 4k has been accepted. The 0MB->2MB region can not be
|
||||
* represented in the bitmap. The 2MB->4MB region can be represented in
|
||||
* the bitmap. But, the 0MB->4MB region is <2*unit_size and will be
|
||||
* immediately accepted in its entirety.
|
||||
*/
|
||||
if (end - start < 2 * unit_size) {
|
||||
arch_accept_memory(start, end);
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* No matter how the start and end are aligned, at least one unaccepted
|
||||
* unit_size area will remain to be marked in the bitmap.
|
||||
*/
|
||||
|
||||
/* Immediately accept a <unit_size piece at the start: */
|
||||
if (start & unit_mask) {
|
||||
arch_accept_memory(start, round_up(start, unit_size));
|
||||
start = round_up(start, unit_size);
|
||||
}
|
||||
|
||||
/* Immediately accept a <unit_size piece at the end: */
|
||||
if (end & unit_mask) {
|
||||
arch_accept_memory(round_down(end, unit_size), end);
|
||||
end = round_down(end, unit_size);
|
||||
}
|
||||
|
||||
/*
|
||||
* Accept part of the range that before phys_base and cannot be recorded
|
||||
* into the bitmap.
|
||||
*/
|
||||
if (start < unaccepted_table->phys_base) {
|
||||
arch_accept_memory(start,
|
||||
min(unaccepted_table->phys_base, end));
|
||||
start = unaccepted_table->phys_base;
|
||||
}
|
||||
|
||||
/* Nothing to record */
|
||||
if (end < unaccepted_table->phys_base)
|
||||
return;
|
||||
|
||||
/* Translate to offsets from the beginning of the bitmap */
|
||||
start -= unaccepted_table->phys_base;
|
||||
end -= unaccepted_table->phys_base;
|
||||
|
||||
/* Accept memory that doesn't fit into bitmap */
|
||||
if (end > bitmap_size * unit_size * BITS_PER_BYTE) {
|
||||
unsigned long phys_start, phys_end;
|
||||
|
||||
phys_start = bitmap_size * unit_size * BITS_PER_BYTE +
|
||||
unaccepted_table->phys_base;
|
||||
phys_end = end + unaccepted_table->phys_base;
|
||||
|
||||
arch_accept_memory(phys_start, phys_end);
|
||||
end = bitmap_size * unit_size * BITS_PER_BYTE;
|
||||
}
|
||||
|
||||
/*
|
||||
* 'start' and 'end' are now both unit_size-aligned.
|
||||
* Record the range as being unaccepted:
|
||||
*/
|
||||
bitmap_set(unaccepted_table->bitmap,
|
||||
start / unit_size, (end - start) / unit_size);
|
||||
}
|
||||
|
||||
void accept_memory(phys_addr_t start, phys_addr_t end)
|
||||
{
|
||||
unsigned long range_start, range_end;
|
||||
unsigned long bitmap_size;
|
||||
u64 unit_size;
|
||||
|
||||
if (!unaccepted_table)
|
||||
return;
|
||||
|
||||
unit_size = unaccepted_table->unit_size;
|
||||
|
||||
/*
|
||||
* Only care for the part of the range that is represented
|
||||
* in the bitmap.
|
||||
*/
|
||||
if (start < unaccepted_table->phys_base)
|
||||
start = unaccepted_table->phys_base;
|
||||
if (end < unaccepted_table->phys_base)
|
||||
return;
|
||||
|
||||
/* Translate to offsets from the beginning of the bitmap */
|
||||
start -= unaccepted_table->phys_base;
|
||||
end -= unaccepted_table->phys_base;
|
||||
|
||||
/* Make sure not to overrun the bitmap */
|
||||
if (end > unaccepted_table->size * unit_size * BITS_PER_BYTE)
|
||||
end = unaccepted_table->size * unit_size * BITS_PER_BYTE;
|
||||
|
||||
range_start = start / unit_size;
|
||||
bitmap_size = DIV_ROUND_UP(end, unit_size);
|
||||
|
||||
for_each_set_bitrange_from(range_start, range_end,
|
||||
unaccepted_table->bitmap, bitmap_size) {
|
||||
unsigned long phys_start, phys_end;
|
||||
|
||||
phys_start = range_start * unit_size + unaccepted_table->phys_base;
|
||||
phys_end = range_end * unit_size + unaccepted_table->phys_base;
|
||||
|
||||
arch_accept_memory(phys_start, phys_end);
|
||||
bitmap_clear(unaccepted_table->bitmap,
|
||||
range_start, range_end - range_start);
|
||||
}
|
||||
}
|
|
@ -26,6 +26,17 @@ const efi_dxe_services_table_t *efi_dxe_table;
|
|||
u32 image_offset __section(".data");
|
||||
static efi_loaded_image_t *image = NULL;
|
||||
|
||||
typedef union sev_memory_acceptance_protocol sev_memory_acceptance_protocol_t;
|
||||
union sev_memory_acceptance_protocol {
|
||||
struct {
|
||||
efi_status_t (__efiapi * allow_unaccepted_memory)(
|
||||
sev_memory_acceptance_protocol_t *);
|
||||
};
|
||||
struct {
|
||||
u32 allow_unaccepted_memory;
|
||||
} mixed_mode;
|
||||
};
|
||||
|
||||
static efi_status_t
|
||||
preserve_pci_rom_image(efi_pci_io_protocol_t *pci, struct pci_setup_rom **__rom)
|
||||
{
|
||||
|
@ -310,6 +321,29 @@ setup_memory_protection(unsigned long image_base, unsigned long image_size)
|
|||
#endif
|
||||
}
|
||||
|
||||
static void setup_unaccepted_memory(void)
|
||||
{
|
||||
efi_guid_t mem_acceptance_proto = OVMF_SEV_MEMORY_ACCEPTANCE_PROTOCOL_GUID;
|
||||
sev_memory_acceptance_protocol_t *proto;
|
||||
efi_status_t status;
|
||||
|
||||
if (!IS_ENABLED(CONFIG_UNACCEPTED_MEMORY))
|
||||
return;
|
||||
|
||||
/*
|
||||
* Enable unaccepted memory before calling exit boot services in order
|
||||
* for the UEFI to not accept all memory on EBS.
|
||||
*/
|
||||
status = efi_bs_call(locate_protocol, &mem_acceptance_proto, NULL,
|
||||
(void **)&proto);
|
||||
if (status != EFI_SUCCESS)
|
||||
return;
|
||||
|
||||
status = efi_call_proto(proto, allow_unaccepted_memory);
|
||||
if (status != EFI_SUCCESS)
|
||||
efi_err("Memory acceptance protocol failed\n");
|
||||
}
|
||||
|
||||
static const efi_char16_t apple[] = L"Apple";
|
||||
|
||||
static void setup_quirks(struct boot_params *boot_params,
|
||||
|
@ -613,6 +647,16 @@ setup_e820(struct boot_params *params, struct setup_data *e820ext, u32 e820ext_s
|
|||
e820_type = E820_TYPE_PMEM;
|
||||
break;
|
||||
|
||||
case EFI_UNACCEPTED_MEMORY:
|
||||
if (!IS_ENABLED(CONFIG_UNACCEPTED_MEMORY)) {
|
||||
efi_warn_once(
|
||||
"The system has unaccepted memory, but kernel does not support it\nConsider enabling CONFIG_UNACCEPTED_MEMORY\n");
|
||||
continue;
|
||||
}
|
||||
e820_type = E820_TYPE_RAM;
|
||||
process_unaccepted_memory(d->phys_addr,
|
||||
d->phys_addr + PAGE_SIZE * d->num_pages);
|
||||
break;
|
||||
default:
|
||||
continue;
|
||||
}
|
||||
|
@ -681,28 +725,27 @@ static efi_status_t allocate_e820(struct boot_params *params,
|
|||
struct setup_data **e820ext,
|
||||
u32 *e820ext_size)
|
||||
{
|
||||
unsigned long map_size, desc_size, map_key;
|
||||
struct efi_boot_memmap *map;
|
||||
efi_status_t status;
|
||||
__u32 nr_desc, desc_version;
|
||||
__u32 nr_desc;
|
||||
|
||||
/* Only need the size of the mem map and size of each mem descriptor */
|
||||
map_size = 0;
|
||||
status = efi_bs_call(get_memory_map, &map_size, NULL, &map_key,
|
||||
&desc_size, &desc_version);
|
||||
if (status != EFI_BUFFER_TOO_SMALL)
|
||||
return (status != EFI_SUCCESS) ? status : EFI_UNSUPPORTED;
|
||||
status = efi_get_memory_map(&map, false);
|
||||
if (status != EFI_SUCCESS)
|
||||
return status;
|
||||
|
||||
nr_desc = map_size / desc_size + EFI_MMAP_NR_SLACK_SLOTS;
|
||||
|
||||
if (nr_desc > ARRAY_SIZE(params->e820_table)) {
|
||||
u32 nr_e820ext = nr_desc - ARRAY_SIZE(params->e820_table);
|
||||
nr_desc = map->map_size / map->desc_size;
|
||||
if (nr_desc > ARRAY_SIZE(params->e820_table) - EFI_MMAP_NR_SLACK_SLOTS) {
|
||||
u32 nr_e820ext = nr_desc - ARRAY_SIZE(params->e820_table) +
|
||||
EFI_MMAP_NR_SLACK_SLOTS;
|
||||
|
||||
status = alloc_e820ext(nr_e820ext, e820ext, e820ext_size);
|
||||
if (status != EFI_SUCCESS)
|
||||
return status;
|
||||
}
|
||||
|
||||
return EFI_SUCCESS;
|
||||
if (IS_ENABLED(CONFIG_UNACCEPTED_MEMORY) && status == EFI_SUCCESS)
|
||||
status = allocate_unaccepted_bitmap(nr_desc, map);
|
||||
|
||||
efi_bs_call(free_pool, map);
|
||||
return status;
|
||||
}
|
||||
|
||||
struct exit_boot_struct {
|
||||
|
@ -899,6 +942,8 @@ asmlinkage unsigned long efi_main(efi_handle_t handle,
|
|||
|
||||
setup_quirks(boot_params, bzimage_addr, buffer_end - buffer_start);
|
||||
|
||||
setup_unaccepted_memory();
|
||||
|
||||
status = exit_boot(boot_params, handle);
|
||||
if (status != EFI_SUCCESS) {
|
||||
efi_err("exit_boot() failed!\n");
|
||||
|
|
|
@ -0,0 +1,147 @@
|
|||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
|
||||
#include <linux/efi.h>
|
||||
#include <linux/memblock.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <asm/unaccepted_memory.h>
|
||||
|
||||
/* Protects unaccepted memory bitmap */
|
||||
static DEFINE_SPINLOCK(unaccepted_memory_lock);
|
||||
|
||||
/*
|
||||
* accept_memory() -- Consult bitmap and accept the memory if needed.
|
||||
*
|
||||
* Only memory that is explicitly marked as unaccepted in the bitmap requires
|
||||
* an action. All the remaining memory is implicitly accepted and doesn't need
|
||||
* acceptance.
|
||||
*
|
||||
* No need to accept:
|
||||
* - anything if the system has no unaccepted table;
|
||||
* - memory that is below phys_base;
|
||||
* - memory that is above the memory that addressable by the bitmap;
|
||||
*/
|
||||
void accept_memory(phys_addr_t start, phys_addr_t end)
|
||||
{
|
||||
struct efi_unaccepted_memory *unaccepted;
|
||||
unsigned long range_start, range_end;
|
||||
unsigned long flags;
|
||||
u64 unit_size;
|
||||
|
||||
unaccepted = efi_get_unaccepted_table();
|
||||
if (!unaccepted)
|
||||
return;
|
||||
|
||||
unit_size = unaccepted->unit_size;
|
||||
|
||||
/*
|
||||
* Only care for the part of the range that is represented
|
||||
* in the bitmap.
|
||||
*/
|
||||
if (start < unaccepted->phys_base)
|
||||
start = unaccepted->phys_base;
|
||||
if (end < unaccepted->phys_base)
|
||||
return;
|
||||
|
||||
/* Translate to offsets from the beginning of the bitmap */
|
||||
start -= unaccepted->phys_base;
|
||||
end -= unaccepted->phys_base;
|
||||
|
||||
/*
|
||||
* load_unaligned_zeropad() can lead to unwanted loads across page
|
||||
* boundaries. The unwanted loads are typically harmless. But, they
|
||||
* might be made to totally unrelated or even unmapped memory.
|
||||
* load_unaligned_zeropad() relies on exception fixup (#PF, #GP and now
|
||||
* #VE) to recover from these unwanted loads.
|
||||
*
|
||||
* But, this approach does not work for unaccepted memory. For TDX, a
|
||||
* load from unaccepted memory will not lead to a recoverable exception
|
||||
* within the guest. The guest will exit to the VMM where the only
|
||||
* recourse is to terminate the guest.
|
||||
*
|
||||
* There are two parts to fix this issue and comprehensively avoid
|
||||
* access to unaccepted memory. Together these ensure that an extra
|
||||
* "guard" page is accepted in addition to the memory that needs to be
|
||||
* used:
|
||||
*
|
||||
* 1. Implicitly extend the range_contains_unaccepted_memory(start, end)
|
||||
* checks up to end+unit_size if 'end' is aligned on a unit_size
|
||||
* boundary.
|
||||
*
|
||||
* 2. Implicitly extend accept_memory(start, end) to end+unit_size if
|
||||
* 'end' is aligned on a unit_size boundary. (immediately following
|
||||
* this comment)
|
||||
*/
|
||||
if (!(end % unit_size))
|
||||
end += unit_size;
|
||||
|
||||
/* Make sure not to overrun the bitmap */
|
||||
if (end > unaccepted->size * unit_size * BITS_PER_BYTE)
|
||||
end = unaccepted->size * unit_size * BITS_PER_BYTE;
|
||||
|
||||
range_start = start / unit_size;
|
||||
|
||||
spin_lock_irqsave(&unaccepted_memory_lock, flags);
|
||||
for_each_set_bitrange_from(range_start, range_end, unaccepted->bitmap,
|
||||
DIV_ROUND_UP(end, unit_size)) {
|
||||
unsigned long phys_start, phys_end;
|
||||
unsigned long len = range_end - range_start;
|
||||
|
||||
phys_start = range_start * unit_size + unaccepted->phys_base;
|
||||
phys_end = range_end * unit_size + unaccepted->phys_base;
|
||||
|
||||
arch_accept_memory(phys_start, phys_end);
|
||||
bitmap_clear(unaccepted->bitmap, range_start, len);
|
||||
}
|
||||
spin_unlock_irqrestore(&unaccepted_memory_lock, flags);
|
||||
}
|
||||
|
||||
bool range_contains_unaccepted_memory(phys_addr_t start, phys_addr_t end)
|
||||
{
|
||||
struct efi_unaccepted_memory *unaccepted;
|
||||
unsigned long flags;
|
||||
bool ret = false;
|
||||
u64 unit_size;
|
||||
|
||||
unaccepted = efi_get_unaccepted_table();
|
||||
if (!unaccepted)
|
||||
return false;
|
||||
|
||||
unit_size = unaccepted->unit_size;
|
||||
|
||||
/*
|
||||
* Only care for the part of the range that is represented
|
||||
* in the bitmap.
|
||||
*/
|
||||
if (start < unaccepted->phys_base)
|
||||
start = unaccepted->phys_base;
|
||||
if (end < unaccepted->phys_base)
|
||||
return false;
|
||||
|
||||
/* Translate to offsets from the beginning of the bitmap */
|
||||
start -= unaccepted->phys_base;
|
||||
end -= unaccepted->phys_base;
|
||||
|
||||
/*
|
||||
* Also consider the unaccepted state of the *next* page. See fix #1 in
|
||||
* the comment on load_unaligned_zeropad() in accept_memory().
|
||||
*/
|
||||
if (!(end % unit_size))
|
||||
end += unit_size;
|
||||
|
||||
/* Make sure not to overrun the bitmap */
|
||||
if (end > unaccepted->size * unit_size * BITS_PER_BYTE)
|
||||
end = unaccepted->size * unit_size * BITS_PER_BYTE;
|
||||
|
||||
spin_lock_irqsave(&unaccepted_memory_lock, flags);
|
||||
while (start < end) {
|
||||
if (test_bit(start / unit_size, unaccepted->bitmap)) {
|
||||
ret = true;
|
||||
break;
|
||||
}
|
||||
|
||||
start += unit_size;
|
||||
}
|
||||
spin_unlock_irqrestore(&unaccepted_memory_lock, flags);
|
||||
|
||||
return ret;
|
||||
}
|
|
@ -2,6 +2,7 @@ config SEV_GUEST
|
|||
tristate "AMD SEV Guest driver"
|
||||
default m
|
||||
depends on AMD_MEM_ENCRYPT
|
||||
select CRYPTO
|
||||
select CRYPTO_AEAD2
|
||||
select CRYPTO_GCM
|
||||
help
|
||||
|
|
|
@ -168,6 +168,11 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
|
|||
global_zone_page_state(NR_FREE_CMA_PAGES));
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_UNACCEPTED_MEMORY
|
||||
show_val_kb(m, "Unaccepted: ",
|
||||
global_zone_page_state(NR_UNACCEPTED));
|
||||
#endif
|
||||
|
||||
hugetlb_report_meminfo(m);
|
||||
|
||||
arch_report_meminfo(m);
|
||||
|
|
|
@ -108,7 +108,8 @@ typedef struct {
|
|||
#define EFI_MEMORY_MAPPED_IO_PORT_SPACE 12
|
||||
#define EFI_PAL_CODE 13
|
||||
#define EFI_PERSISTENT_MEMORY 14
|
||||
#define EFI_MAX_MEMORY_TYPE 15
|
||||
#define EFI_UNACCEPTED_MEMORY 15
|
||||
#define EFI_MAX_MEMORY_TYPE 16
|
||||
|
||||
/* Attribute values: */
|
||||
#define EFI_MEMORY_UC ((u64)0x0000000000000001ULL) /* uncached */
|
||||
|
@ -417,6 +418,7 @@ void efi_native_runtime_setup(void);
|
|||
#define LINUX_EFI_MOK_VARIABLE_TABLE_GUID EFI_GUID(0xc451ed2b, 0x9694, 0x45d3, 0xba, 0xba, 0xed, 0x9f, 0x89, 0x88, 0xa3, 0x89)
|
||||
#define LINUX_EFI_COCO_SECRET_AREA_GUID EFI_GUID(0xadf956ad, 0xe98c, 0x484c, 0xae, 0x11, 0xb5, 0x1c, 0x7d, 0x33, 0x64, 0x47)
|
||||
#define LINUX_EFI_BOOT_MEMMAP_GUID EFI_GUID(0x800f683f, 0xd08b, 0x423a, 0xa2, 0x93, 0x96, 0x5c, 0x3c, 0x6f, 0xe2, 0xb4)
|
||||
#define LINUX_EFI_UNACCEPTED_MEM_TABLE_GUID EFI_GUID(0xd5d1de3c, 0x105c, 0x44f9, 0x9e, 0xa9, 0xbc, 0xef, 0x98, 0x12, 0x00, 0x31)
|
||||
|
||||
#define RISCV_EFI_BOOT_PROTOCOL_GUID EFI_GUID(0xccd15fec, 0x6f73, 0x4eec, 0x83, 0x95, 0x3e, 0x69, 0xe4, 0xb9, 0x40, 0xbf)
|
||||
|
||||
|
@ -435,6 +437,9 @@ void efi_native_runtime_setup(void);
|
|||
#define DELLEMC_EFI_RCI2_TABLE_GUID EFI_GUID(0x2d9f28a2, 0xa886, 0x456a, 0x97, 0xa8, 0xf1, 0x1e, 0xf2, 0x4f, 0xf4, 0x55)
|
||||
#define AMD_SEV_MEM_ENCRYPT_GUID EFI_GUID(0x0cf29b71, 0x9e51, 0x433a, 0xa3, 0xb7, 0x81, 0xf3, 0xab, 0x16, 0xb8, 0x75)
|
||||
|
||||
/* OVMF protocol GUIDs */
|
||||
#define OVMF_SEV_MEMORY_ACCEPTANCE_PROTOCOL_GUID EFI_GUID(0xc5a010fe, 0x38a7, 0x4531, 0x8a, 0x4a, 0x05, 0x00, 0xd2, 0xfd, 0x16, 0x49)
|
||||
|
||||
typedef struct {
|
||||
efi_guid_t guid;
|
||||
u64 table;
|
||||
|
@ -534,6 +539,14 @@ struct efi_boot_memmap {
|
|||
efi_memory_desc_t map[];
|
||||
};
|
||||
|
||||
struct efi_unaccepted_memory {
|
||||
u32 version;
|
||||
u32 unit_size;
|
||||
u64 phys_base;
|
||||
u64 size;
|
||||
unsigned long bitmap[];
|
||||
};
|
||||
|
||||
/*
|
||||
* Architecture independent structure for describing a memory map for the
|
||||
* benefit of efi_memmap_init_early(), and for passing context between
|
||||
|
@ -636,6 +649,7 @@ extern struct efi {
|
|||
unsigned long tpm_final_log; /* TPM2 Final Events Log table */
|
||||
unsigned long mokvar_table; /* MOK variable config table */
|
||||
unsigned long coco_secret; /* Confidential computing secret table */
|
||||
unsigned long unaccepted; /* Unaccepted memory table */
|
||||
|
||||
efi_get_time_t *get_time;
|
||||
efi_set_time_t *set_time;
|
||||
|
|
|
@ -3839,4 +3839,23 @@ madvise_set_anon_name(struct mm_struct *mm, unsigned long start,
|
|||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_UNACCEPTED_MEMORY
|
||||
|
||||
bool range_contains_unaccepted_memory(phys_addr_t start, phys_addr_t end);
|
||||
void accept_memory(phys_addr_t start, phys_addr_t end);
|
||||
|
||||
#else
|
||||
|
||||
static inline bool range_contains_unaccepted_memory(phys_addr_t start,
|
||||
phys_addr_t end)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline void accept_memory(phys_addr_t start, phys_addr_t end)
|
||||
{
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#endif /* _LINUX_MM_H */
|
||||
|
|
|
@ -143,6 +143,9 @@ enum zone_stat_item {
|
|||
NR_ZSPAGES, /* allocated in zsmalloc */
|
||||
#endif
|
||||
NR_FREE_CMA_PAGES,
|
||||
#ifdef CONFIG_UNACCEPTED_MEMORY
|
||||
NR_UNACCEPTED,
|
||||
#endif
|
||||
NR_VM_ZONE_STAT_ITEMS };
|
||||
|
||||
enum node_stat_item {
|
||||
|
@ -910,6 +913,11 @@ struct zone {
|
|||
/* free areas of different sizes */
|
||||
struct free_area free_area[MAX_ORDER + 1];
|
||||
|
||||
#ifdef CONFIG_UNACCEPTED_MEMORY
|
||||
/* Pages to be accepted. All pages on the list are MAX_ORDER */
|
||||
struct list_head unaccepted_pages;
|
||||
#endif
|
||||
|
||||
/* zone flags, see below */
|
||||
unsigned long flags;
|
||||
|
||||
|
|
|
@ -1436,6 +1436,15 @@ done:
|
|||
*/
|
||||
kmemleak_alloc_phys(found, size, 0);
|
||||
|
||||
/*
|
||||
* Some Virtual Machine platforms, such as Intel TDX or AMD SEV-SNP,
|
||||
* require memory to be accepted before it can be used by the
|
||||
* guest.
|
||||
*
|
||||
* Accept the memory of the allocated buffer.
|
||||
*/
|
||||
accept_memory(found, found + size);
|
||||
|
||||
return found;
|
||||
}
|
||||
|
||||
|
|
|
@ -1375,6 +1375,10 @@ static void __meminit zone_init_free_lists(struct zone *zone)
|
|||
INIT_LIST_HEAD(&zone->free_area[order].free_list[t]);
|
||||
zone->free_area[order].nr_free = 0;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_UNACCEPTED_MEMORY
|
||||
INIT_LIST_HEAD(&zone->unaccepted_pages);
|
||||
#endif
|
||||
}
|
||||
|
||||
void __meminit init_currently_empty_zone(struct zone *zone,
|
||||
|
@ -1960,6 +1964,9 @@ static void __init deferred_free_range(unsigned long pfn,
|
|||
return;
|
||||
}
|
||||
|
||||
/* Accept chunks smaller than MAX_ORDER upfront */
|
||||
accept_memory(PFN_PHYS(pfn), PFN_PHYS(pfn + nr_pages));
|
||||
|
||||
for (i = 0; i < nr_pages; i++, page++, pfn++) {
|
||||
if (pageblock_aligned(pfn))
|
||||
set_pageblock_migratetype(page, MIGRATE_MOVABLE);
|
||||
|
|
173
mm/page_alloc.c
173
mm/page_alloc.c
|
@ -387,6 +387,12 @@ EXPORT_SYMBOL(nr_node_ids);
|
|||
EXPORT_SYMBOL(nr_online_nodes);
|
||||
#endif
|
||||
|
||||
static bool page_contains_unaccepted(struct page *page, unsigned int order);
|
||||
static void accept_page(struct page *page, unsigned int order);
|
||||
static bool try_to_accept_memory(struct zone *zone, unsigned int order);
|
||||
static inline bool has_unaccepted_memory(void);
|
||||
static bool __free_unaccepted(struct page *page);
|
||||
|
||||
int page_group_by_mobility_disabled __read_mostly;
|
||||
|
||||
#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
|
||||
|
@ -1481,6 +1487,13 @@ void __free_pages_core(struct page *page, unsigned int order)
|
|||
|
||||
atomic_long_add(nr_pages, &page_zone(page)->managed_pages);
|
||||
|
||||
if (page_contains_unaccepted(page, order)) {
|
||||
if (order == MAX_ORDER && __free_unaccepted(page))
|
||||
return;
|
||||
|
||||
accept_page(page, order);
|
||||
}
|
||||
|
||||
/*
|
||||
* Bypass PCP and place fresh pages right to the tail, primarily
|
||||
* relevant for memory onlining.
|
||||
|
@ -3159,6 +3172,9 @@ static inline long __zone_watermark_unusable_free(struct zone *z,
|
|||
if (!(alloc_flags & ALLOC_CMA))
|
||||
unusable_free += zone_page_state(z, NR_FREE_CMA_PAGES);
|
||||
#endif
|
||||
#ifdef CONFIG_UNACCEPTED_MEMORY
|
||||
unusable_free += zone_page_state(z, NR_UNACCEPTED);
|
||||
#endif
|
||||
|
||||
return unusable_free;
|
||||
}
|
||||
|
@ -3458,6 +3474,11 @@ retry:
|
|||
gfp_mask)) {
|
||||
int ret;
|
||||
|
||||
if (has_unaccepted_memory()) {
|
||||
if (try_to_accept_memory(zone, order))
|
||||
goto try_this_zone;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
|
||||
/*
|
||||
* Watermark failed for this zone, but see if we can
|
||||
|
@ -3510,6 +3531,11 @@ try_this_zone:
|
|||
|
||||
return page;
|
||||
} else {
|
||||
if (has_unaccepted_memory()) {
|
||||
if (try_to_accept_memory(zone, order))
|
||||
goto try_this_zone;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
|
||||
/* Try again if zone has deferred pages */
|
||||
if (deferred_pages_enabled()) {
|
||||
|
@ -7215,3 +7241,150 @@ bool has_managed_dma(void)
|
|||
return false;
|
||||
}
|
||||
#endif /* CONFIG_ZONE_DMA */
|
||||
|
||||
#ifdef CONFIG_UNACCEPTED_MEMORY
|
||||
|
||||
/* Counts number of zones with unaccepted pages. */
|
||||
static DEFINE_STATIC_KEY_FALSE(zones_with_unaccepted_pages);
|
||||
|
||||
static bool lazy_accept = true;
|
||||
|
||||
static int __init accept_memory_parse(char *p)
|
||||
{
|
||||
if (!strcmp(p, "lazy")) {
|
||||
lazy_accept = true;
|
||||
return 0;
|
||||
} else if (!strcmp(p, "eager")) {
|
||||
lazy_accept = false;
|
||||
return 0;
|
||||
} else {
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
early_param("accept_memory", accept_memory_parse);
|
||||
|
||||
static bool page_contains_unaccepted(struct page *page, unsigned int order)
|
||||
{
|
||||
phys_addr_t start = page_to_phys(page);
|
||||
phys_addr_t end = start + (PAGE_SIZE << order);
|
||||
|
||||
return range_contains_unaccepted_memory(start, end);
|
||||
}
|
||||
|
||||
static void accept_page(struct page *page, unsigned int order)
|
||||
{
|
||||
phys_addr_t start = page_to_phys(page);
|
||||
|
||||
accept_memory(start, start + (PAGE_SIZE << order));
|
||||
}
|
||||
|
||||
static bool try_to_accept_memory_one(struct zone *zone)
|
||||
{
|
||||
unsigned long flags;
|
||||
struct page *page;
|
||||
bool last;
|
||||
|
||||
if (list_empty(&zone->unaccepted_pages))
|
||||
return false;
|
||||
|
||||
spin_lock_irqsave(&zone->lock, flags);
|
||||
page = list_first_entry_or_null(&zone->unaccepted_pages,
|
||||
struct page, lru);
|
||||
if (!page) {
|
||||
spin_unlock_irqrestore(&zone->lock, flags);
|
||||
return false;
|
||||
}
|
||||
|
||||
list_del(&page->lru);
|
||||
last = list_empty(&zone->unaccepted_pages);
|
||||
|
||||
__mod_zone_freepage_state(zone, -MAX_ORDER_NR_PAGES, MIGRATE_MOVABLE);
|
||||
__mod_zone_page_state(zone, NR_UNACCEPTED, -MAX_ORDER_NR_PAGES);
|
||||
spin_unlock_irqrestore(&zone->lock, flags);
|
||||
|
||||
accept_page(page, MAX_ORDER);
|
||||
|
||||
__free_pages_ok(page, MAX_ORDER, FPI_TO_TAIL);
|
||||
|
||||
if (last)
|
||||
static_branch_dec(&zones_with_unaccepted_pages);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool try_to_accept_memory(struct zone *zone, unsigned int order)
|
||||
{
|
||||
long to_accept;
|
||||
int ret = false;
|
||||
|
||||
/* How much to accept to get to high watermark? */
|
||||
to_accept = high_wmark_pages(zone) -
|
||||
(zone_page_state(zone, NR_FREE_PAGES) -
|
||||
__zone_watermark_unusable_free(zone, order, 0));
|
||||
|
||||
/* Accept at least one page */
|
||||
do {
|
||||
if (!try_to_accept_memory_one(zone))
|
||||
break;
|
||||
ret = true;
|
||||
to_accept -= MAX_ORDER_NR_PAGES;
|
||||
} while (to_accept > 0);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline bool has_unaccepted_memory(void)
|
||||
{
|
||||
return static_branch_unlikely(&zones_with_unaccepted_pages);
|
||||
}
|
||||
|
||||
static bool __free_unaccepted(struct page *page)
|
||||
{
|
||||
struct zone *zone = page_zone(page);
|
||||
unsigned long flags;
|
||||
bool first = false;
|
||||
|
||||
if (!lazy_accept)
|
||||
return false;
|
||||
|
||||
spin_lock_irqsave(&zone->lock, flags);
|
||||
first = list_empty(&zone->unaccepted_pages);
|
||||
list_add_tail(&page->lru, &zone->unaccepted_pages);
|
||||
__mod_zone_freepage_state(zone, MAX_ORDER_NR_PAGES, MIGRATE_MOVABLE);
|
||||
__mod_zone_page_state(zone, NR_UNACCEPTED, MAX_ORDER_NR_PAGES);
|
||||
spin_unlock_irqrestore(&zone->lock, flags);
|
||||
|
||||
if (first)
|
||||
static_branch_inc(&zones_with_unaccepted_pages);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
static bool page_contains_unaccepted(struct page *page, unsigned int order)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
static void accept_page(struct page *page, unsigned int order)
|
||||
{
|
||||
}
|
||||
|
||||
static bool try_to_accept_memory(struct zone *zone, unsigned int order)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline bool has_unaccepted_memory(void)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool __free_unaccepted(struct page *page)
|
||||
{
|
||||
BUILD_BUG();
|
||||
return false;
|
||||
}
|
||||
|
||||
#endif /* CONFIG_UNACCEPTED_MEMORY */
|
||||
|
|
|
@ -1180,6 +1180,9 @@ const char * const vmstat_text[] = {
|
|||
"nr_zspages",
|
||||
#endif
|
||||
"nr_free_cma",
|
||||
#ifdef CONFIG_UNACCEPTED_MEMORY
|
||||
"nr_unaccepted",
|
||||
#endif
|
||||
|
||||
/* enum numa_stat_item counters */
|
||||
#ifdef CONFIG_NUMA
|
||||
|
|
Loading…
Reference in New Issue