Merge branch 'for-next/boot' into for-next/core

* for-next/boot: (34 commits)
  arm64: fix KASAN_INLINE
  arm64: Add an override for ID_AA64SMFR0_EL1.FA64
  arm64: Add the arm64.nosve command line option
  arm64: Add the arm64.nosme command line option
  arm64: Expose a __check_override primitive for oddball features
  arm64: Allow the idreg override to deal with variable field width
  arm64: Factor out checking of a feature against the override into a macro
  arm64: Allow sticky E2H when entering EL1
  arm64: Save state of HCR_EL2.E2H before switch to EL1
  arm64: Rename the VHE switch to "finalise_el2"
  arm64: mm: fix booting with 52-bit address space
  arm64: head: remove __PHYS_OFFSET
  arm64: lds: use PROVIDE instead of conditional definitions
  arm64: setup: drop early FDT pointer helpers
  arm64: head: avoid relocating the kernel twice for KASLR
  arm64: kaslr: defer initialization to initcall where permitted
  arm64: head: record CPU boot mode after enabling the MMU
  arm64: head: populate kernel page tables with MMU and caches on
  arm64: head: factor out TTBR1 assignment into a macro
  arm64: idreg-override: use early FDT mapping in ID map
  ...
This commit is contained in:
Will Deacon 2022-07-25 10:59:15 +01:00
commit f96d67a8af
24 changed files with 753 additions and 615 deletions

View File

@ -400,6 +400,12 @@
arm64.nomte [ARM64] Unconditionally disable Memory Tagging Extension arm64.nomte [ARM64] Unconditionally disable Memory Tagging Extension
support support
arm64.nosve [ARM64] Unconditionally disable Scalable Vector
Extension support
arm64.nosme [ARM64] Unconditionally disable Scalable Matrix
Extension support
ataflop= [HW,M68k] ataflop= [HW,M68k]
atarimouse= [HW,MOUSE] Atari Mouse atarimouse= [HW,MOUSE] Atari Mouse

View File

@ -60,12 +60,13 @@ these functions (see arch/arm{,64}/include/asm/virt.h):
* :: * ::
x0 = HVC_VHE_RESTART (arm64 only) x0 = HVC_FINALISE_EL2 (arm64 only)
Attempt to upgrade the kernel's exception level from EL1 to EL2 by enabling Finish configuring EL2 depending on the command-line options,
the VHE mode. This is conditioned by the CPU supporting VHE, the EL2 MMU including an attempt to upgrade the kernel's exception level from
being off, and VHE not being disabled by any other means (command line EL1 to EL2 by enabling the VHE mode. This is conditioned by the CPU
option, for example). supporting VHE, the EL2 MMU being off, and VHE not being disabled by
any other means (command line option, for example).
Any other value of r0/x0 triggers a hypervisor-specific handling, Any other value of r0/x0 triggers a hypervisor-specific handling,
which is not documented here. which is not documented here.

View File

@ -359,6 +359,20 @@ alternative_cb_end
bfi \valreg, \t1sz, #TCR_T1SZ_OFFSET, #TCR_TxSZ_WIDTH bfi \valreg, \t1sz, #TCR_T1SZ_OFFSET, #TCR_TxSZ_WIDTH
.endm .endm
/*
* idmap_get_t0sz - get the T0SZ value needed to cover the ID map
*
* Calculate the maximum allowed value for TCR_EL1.T0SZ so that the
* entire ID map region can be mapped. As T0SZ == (64 - #bits used),
* this number conveniently equals the number of leading zeroes in
* the physical address of _end.
*/
.macro idmap_get_t0sz, reg
adrp \reg, _end
orr \reg, \reg, #(1 << VA_BITS_MIN) - 1
clz \reg, \reg
.endm
/* /*
* tcr_compute_pa_size - set TCR.(I)PS to the highest supported * tcr_compute_pa_size - set TCR.(I)PS to the highest supported
* ID_AA64MMFR0_EL1.PARange value * ID_AA64MMFR0_EL1.PARange value
@ -465,6 +479,18 @@ alternative_endif
_cond_uaccess_extable .Licache_op\@, \fixup _cond_uaccess_extable .Licache_op\@, \fixup
.endm .endm
/*
* load_ttbr1 - install @pgtbl as a TTBR1 page table
* pgtbl preserved
* tmp1/tmp2 clobbered, either may overlap with pgtbl
*/
.macro load_ttbr1, pgtbl, tmp1, tmp2
phys_to_ttbr \tmp1, \pgtbl
offset_ttbr1 \tmp1, \tmp2
msr ttbr1_el1, \tmp1
isb
.endm
/* /*
* To prevent the possibility of old and new partial table walks being visible * To prevent the possibility of old and new partial table walks being visible
* in the tlb, switch the ttbr to a zero page when we invalidate the old * in the tlb, switch the ttbr to a zero page when we invalidate the old
@ -478,10 +504,7 @@ alternative_endif
isb isb
tlbi vmalle1 tlbi vmalle1
dsb nsh dsb nsh
phys_to_ttbr \tmp, \page_table load_ttbr1 \page_table, \tmp, \tmp2
offset_ttbr1 \tmp, \tmp2
msr ttbr1_el1, \tmp
isb
.endm .endm
/* /*

View File

@ -908,7 +908,10 @@ static inline unsigned int get_vmid_bits(u64 mmfr1)
} }
extern struct arm64_ftr_override id_aa64mmfr1_override; extern struct arm64_ftr_override id_aa64mmfr1_override;
extern struct arm64_ftr_override id_aa64pfr0_override;
extern struct arm64_ftr_override id_aa64pfr1_override; extern struct arm64_ftr_override id_aa64pfr1_override;
extern struct arm64_ftr_override id_aa64zfr0_override;
extern struct arm64_ftr_override id_aa64smfr0_override;
extern struct arm64_ftr_override id_aa64isar1_override; extern struct arm64_ftr_override id_aa64isar1_override;
extern struct arm64_ftr_override id_aa64isar2_override; extern struct arm64_ftr_override id_aa64isar2_override;

View File

@ -129,64 +129,6 @@
msr cptr_el2, x0 // Disable copro. traps to EL2 msr cptr_el2, x0 // Disable copro. traps to EL2
.endm .endm
/* SVE register access */
.macro __init_el2_nvhe_sve
mrs x1, id_aa64pfr0_el1
ubfx x1, x1, #ID_AA64PFR0_SVE_SHIFT, #4
cbz x1, .Lskip_sve_\@
bic x0, x0, #CPTR_EL2_TZ // Also disable SVE traps
msr cptr_el2, x0 // Disable copro. traps to EL2
isb
mov x1, #ZCR_ELx_LEN_MASK // SVE: Enable full vector
msr_s SYS_ZCR_EL2, x1 // length for EL1.
.Lskip_sve_\@:
.endm
/* SME register access and priority mapping */
.macro __init_el2_nvhe_sme
mrs x1, id_aa64pfr1_el1
ubfx x1, x1, #ID_AA64PFR1_SME_SHIFT, #4
cbz x1, .Lskip_sme_\@
bic x0, x0, #CPTR_EL2_TSM // Also disable SME traps
msr cptr_el2, x0 // Disable copro. traps to EL2
isb
mrs x1, sctlr_el2
orr x1, x1, #SCTLR_ELx_ENTP2 // Disable TPIDR2 traps
msr sctlr_el2, x1
isb
mov x1, #0 // SMCR controls
mrs_s x2, SYS_ID_AA64SMFR0_EL1
ubfx x2, x2, #ID_AA64SMFR0_EL1_FA64_SHIFT, #1 // Full FP in SM?
cbz x2, .Lskip_sme_fa64_\@
orr x1, x1, SMCR_ELx_FA64_MASK
.Lskip_sme_fa64_\@:
orr x1, x1, #SMCR_ELx_LEN_MASK // Enable full SME vector
msr_s SYS_SMCR_EL2, x1 // length for EL1.
mrs_s x1, SYS_SMIDR_EL1 // Priority mapping supported?
ubfx x1, x1, #SMIDR_EL1_SMPS_SHIFT, #1
cbz x1, .Lskip_sme_\@
msr_s SYS_SMPRIMAP_EL2, xzr // Make all priorities equal
mrs x1, id_aa64mmfr1_el1 // HCRX_EL2 present?
ubfx x1, x1, #ID_AA64MMFR1_HCX_SHIFT, #4
cbz x1, .Lskip_sme_\@
mrs_s x1, SYS_HCRX_EL2
orr x1, x1, #HCRX_EL2_SMPME_MASK // Enable priority mapping
msr_s SYS_HCRX_EL2, x1
.Lskip_sme_\@:
.endm
/* Disable any fine grained traps */ /* Disable any fine grained traps */
.macro __init_el2_fgt .macro __init_el2_fgt
mrs x1, id_aa64mmfr0_el1 mrs x1, id_aa64mmfr0_el1
@ -250,8 +192,6 @@
__init_el2_hstr __init_el2_hstr
__init_el2_nvhe_idregs __init_el2_nvhe_idregs
__init_el2_nvhe_cptr __init_el2_nvhe_cptr
__init_el2_nvhe_sve
__init_el2_nvhe_sme
__init_el2_fgt __init_el2_fgt
__init_el2_nvhe_prepare_eret __init_el2_nvhe_prepare_eret
.endm .endm

View File

@ -8,6 +8,7 @@
#ifndef __ASM_KERNEL_PGTABLE_H #ifndef __ASM_KERNEL_PGTABLE_H
#define __ASM_KERNEL_PGTABLE_H #define __ASM_KERNEL_PGTABLE_H
#include <asm/boot.h>
#include <asm/pgtable-hwdef.h> #include <asm/pgtable-hwdef.h>
#include <asm/sparsemem.h> #include <asm/sparsemem.h>
@ -35,10 +36,8 @@
*/ */
#if ARM64_KERNEL_USES_PMD_MAPS #if ARM64_KERNEL_USES_PMD_MAPS
#define SWAPPER_PGTABLE_LEVELS (CONFIG_PGTABLE_LEVELS - 1) #define SWAPPER_PGTABLE_LEVELS (CONFIG_PGTABLE_LEVELS - 1)
#define IDMAP_PGTABLE_LEVELS (ARM64_HW_PGTABLE_LEVELS(PHYS_MASK_SHIFT) - 1)
#else #else
#define SWAPPER_PGTABLE_LEVELS (CONFIG_PGTABLE_LEVELS) #define SWAPPER_PGTABLE_LEVELS (CONFIG_PGTABLE_LEVELS)
#define IDMAP_PGTABLE_LEVELS (ARM64_HW_PGTABLE_LEVELS(PHYS_MASK_SHIFT))
#endif #endif
@ -87,7 +86,14 @@
+ EARLY_PUDS((vstart), (vend)) /* each PUD needs a next level page table */ \ + EARLY_PUDS((vstart), (vend)) /* each PUD needs a next level page table */ \
+ EARLY_PMDS((vstart), (vend))) /* each PMD needs a next level page table */ + EARLY_PMDS((vstart), (vend))) /* each PMD needs a next level page table */
#define INIT_DIR_SIZE (PAGE_SIZE * EARLY_PAGES(KIMAGE_VADDR, _end)) #define INIT_DIR_SIZE (PAGE_SIZE * EARLY_PAGES(KIMAGE_VADDR, _end))
#define IDMAP_DIR_SIZE (IDMAP_PGTABLE_LEVELS * PAGE_SIZE)
/* the initial ID map may need two extra pages if it needs to be extended */
#if VA_BITS < 48
#define INIT_IDMAP_DIR_SIZE ((INIT_IDMAP_DIR_PAGES + 2) * PAGE_SIZE)
#else
#define INIT_IDMAP_DIR_SIZE (INIT_IDMAP_DIR_PAGES * PAGE_SIZE)
#endif
#define INIT_IDMAP_DIR_PAGES EARLY_PAGES(KIMAGE_VADDR, _end + MAX_FDT_SIZE + SWAPPER_BLOCK_SIZE)
/* Initial memory map size */ /* Initial memory map size */
#if ARM64_KERNEL_USES_PMD_MAPS #if ARM64_KERNEL_USES_PMD_MAPS
@ -107,9 +113,11 @@
#define SWAPPER_PMD_FLAGS (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S) #define SWAPPER_PMD_FLAGS (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S)
#if ARM64_KERNEL_USES_PMD_MAPS #if ARM64_KERNEL_USES_PMD_MAPS
#define SWAPPER_MM_MMUFLAGS (PMD_ATTRINDX(MT_NORMAL) | SWAPPER_PMD_FLAGS) #define SWAPPER_RW_MMUFLAGS (PMD_ATTRINDX(MT_NORMAL) | SWAPPER_PMD_FLAGS)
#define SWAPPER_RX_MMUFLAGS (SWAPPER_RW_MMUFLAGS | PMD_SECT_RDONLY)
#else #else
#define SWAPPER_MM_MMUFLAGS (PTE_ATTRINDX(MT_NORMAL) | SWAPPER_PTE_FLAGS) #define SWAPPER_RW_MMUFLAGS (PTE_ATTRINDX(MT_NORMAL) | SWAPPER_PTE_FLAGS)
#define SWAPPER_RX_MMUFLAGS (SWAPPER_RW_MMUFLAGS | PTE_RDONLY)
#endif #endif
/* /*

View File

@ -174,7 +174,11 @@
#include <linux/types.h> #include <linux/types.h>
#include <asm/bug.h> #include <asm/bug.h>
#if VA_BITS > 48
extern u64 vabits_actual; extern u64 vabits_actual;
#else
#define vabits_actual ((u64)VA_BITS)
#endif
extern s64 memstart_addr; extern s64 memstart_addr;
/* PHYS_OFFSET - the physical address of the start of memory. */ /* PHYS_OFFSET - the physical address of the start of memory. */

View File

@ -60,8 +60,7 @@ static inline void cpu_switch_mm(pgd_t *pgd, struct mm_struct *mm)
* TCR_T0SZ(VA_BITS), unless system RAM is positioned very high in * TCR_T0SZ(VA_BITS), unless system RAM is positioned very high in
* physical memory, in which case it will be smaller. * physical memory, in which case it will be smaller.
*/ */
extern u64 idmap_t0sz; extern int idmap_t0sz;
extern u64 idmap_ptrs_per_pgd;
/* /*
* Ensure TCR.T0SZ is set to the provided value. * Ensure TCR.T0SZ is set to the provided value.
@ -106,13 +105,18 @@ static inline void cpu_uninstall_idmap(void)
cpu_switch_mm(mm->pgd, mm); cpu_switch_mm(mm->pgd, mm);
} }
static inline void cpu_install_idmap(void) static inline void __cpu_install_idmap(pgd_t *idmap)
{ {
cpu_set_reserved_ttbr0(); cpu_set_reserved_ttbr0();
local_flush_tlb_all(); local_flush_tlb_all();
cpu_set_idmap_tcr_t0sz(); cpu_set_idmap_tcr_t0sz();
cpu_switch_mm(lm_alias(idmap_pg_dir), &init_mm); cpu_switch_mm(lm_alias(idmap), &init_mm);
}
static inline void cpu_install_idmap(void)
{
__cpu_install_idmap(idmap_pg_dir);
} }
/* /*
@ -143,7 +147,7 @@ static inline void cpu_install_ttbr0(phys_addr_t ttbr0, unsigned long t0sz)
* Atomically replaces the active TTBR1_EL1 PGD with a new VA-compatible PGD, * Atomically replaces the active TTBR1_EL1 PGD with a new VA-compatible PGD,
* avoiding the possibility of conflicting TLB entries being allocated. * avoiding the possibility of conflicting TLB entries being allocated.
*/ */
static inline void __nocfi cpu_replace_ttbr1(pgd_t *pgdp) static inline void __nocfi cpu_replace_ttbr1(pgd_t *pgdp, pgd_t *idmap)
{ {
typedef void (ttbr_replace_func)(phys_addr_t); typedef void (ttbr_replace_func)(phys_addr_t);
extern ttbr_replace_func idmap_cpu_replace_ttbr1; extern ttbr_replace_func idmap_cpu_replace_ttbr1;
@ -166,7 +170,7 @@ static inline void __nocfi cpu_replace_ttbr1(pgd_t *pgdp)
replace_phys = (void *)__pa_symbol(function_nocfi(idmap_cpu_replace_ttbr1)); replace_phys = (void *)__pa_symbol(function_nocfi(idmap_cpu_replace_ttbr1));
cpu_install_idmap(); __cpu_install_idmap(idmap);
replace_phys(ttbr1); replace_phys(ttbr1);
cpu_uninstall_idmap(); cpu_uninstall_idmap();
} }

View File

@ -36,9 +36,9 @@
#define HVC_RESET_VECTORS 2 #define HVC_RESET_VECTORS 2
/* /*
* HVC_VHE_RESTART - Upgrade the CPU from EL1 to EL2, if possible * HVC_FINALISE_EL2 - Upgrade the CPU from EL1 to EL2, if possible
*/ */
#define HVC_VHE_RESTART 3 #define HVC_FINALISE_EL2 3
/* Max number of HYP stub hypercalls */ /* Max number of HYP stub hypercalls */
#define HVC_STUB_HCALL_NR 4 #define HVC_STUB_HCALL_NR 4
@ -49,6 +49,13 @@
#define BOOT_CPU_MODE_EL1 (0xe11) #define BOOT_CPU_MODE_EL1 (0xe11)
#define BOOT_CPU_MODE_EL2 (0xe12) #define BOOT_CPU_MODE_EL2 (0xe12)
/*
* Flags returned together with the boot mode, but not preserved in
* __boot_cpu_mode. Used by the idreg override code to work out the
* boot state.
*/
#define BOOT_CPU_FLAG_E2H BIT_ULL(32)
#ifndef __ASSEMBLY__ #ifndef __ASSEMBLY__
#include <asm/ptrace.h> #include <asm/ptrace.h>

View File

@ -64,7 +64,7 @@ obj-$(CONFIG_ACPI) += acpi.o
obj-$(CONFIG_ACPI_NUMA) += acpi_numa.o obj-$(CONFIG_ACPI_NUMA) += acpi_numa.o
obj-$(CONFIG_ARM64_ACPI_PARKING_PROTOCOL) += acpi_parking_protocol.o obj-$(CONFIG_ARM64_ACPI_PARKING_PROTOCOL) += acpi_parking_protocol.o
obj-$(CONFIG_PARAVIRT) += paravirt.o obj-$(CONFIG_PARAVIRT) += paravirt.o
obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o pi/
obj-$(CONFIG_HIBERNATION) += hibernate.o hibernate-asm.o obj-$(CONFIG_HIBERNATION) += hibernate.o hibernate-asm.o
obj-$(CONFIG_ELF_CORE) += elfcore.o obj-$(CONFIG_ELF_CORE) += elfcore.o
obj-$(CONFIG_KEXEC_CORE) += machine_kexec.o relocate_kernel.o \ obj-$(CONFIG_KEXEC_CORE) += machine_kexec.o relocate_kernel.o \

View File

@ -633,7 +633,10 @@ static const struct arm64_ftr_bits ftr_raz[] = {
__ARM64_FTR_REG_OVERRIDE(#id, id, table, &no_override) __ARM64_FTR_REG_OVERRIDE(#id, id, table, &no_override)
struct arm64_ftr_override __ro_after_init id_aa64mmfr1_override; struct arm64_ftr_override __ro_after_init id_aa64mmfr1_override;
struct arm64_ftr_override __ro_after_init id_aa64pfr0_override;
struct arm64_ftr_override __ro_after_init id_aa64pfr1_override; struct arm64_ftr_override __ro_after_init id_aa64pfr1_override;
struct arm64_ftr_override __ro_after_init id_aa64zfr0_override;
struct arm64_ftr_override __ro_after_init id_aa64smfr0_override;
struct arm64_ftr_override __ro_after_init id_aa64isar1_override; struct arm64_ftr_override __ro_after_init id_aa64isar1_override;
struct arm64_ftr_override __ro_after_init id_aa64isar2_override; struct arm64_ftr_override __ro_after_init id_aa64isar2_override;
@ -670,11 +673,14 @@ static const struct __ftr_reg_entry {
ARM64_FTR_REG(SYS_ID_MMFR5_EL1, ftr_id_mmfr5), ARM64_FTR_REG(SYS_ID_MMFR5_EL1, ftr_id_mmfr5),
/* Op1 = 0, CRn = 0, CRm = 4 */ /* Op1 = 0, CRn = 0, CRm = 4 */
ARM64_FTR_REG(SYS_ID_AA64PFR0_EL1, ftr_id_aa64pfr0), ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64PFR0_EL1, ftr_id_aa64pfr0,
&id_aa64pfr0_override),
ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64PFR1_EL1, ftr_id_aa64pfr1, ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64PFR1_EL1, ftr_id_aa64pfr1,
&id_aa64pfr1_override), &id_aa64pfr1_override),
ARM64_FTR_REG(SYS_ID_AA64ZFR0_EL1, ftr_id_aa64zfr0), ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64ZFR0_EL1, ftr_id_aa64zfr0,
ARM64_FTR_REG(SYS_ID_AA64SMFR0_EL1, ftr_id_aa64smfr0), &id_aa64zfr0_override),
ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64SMFR0_EL1, ftr_id_aa64smfr0,
&id_aa64smfr0_override),
/* Op1 = 0, CRn = 0, CRm = 5 */ /* Op1 = 0, CRn = 0, CRm = 5 */
ARM64_FTR_REG(SYS_ID_AA64DFR0_EL1, ftr_id_aa64dfr0), ARM64_FTR_REG(SYS_ID_AA64DFR0_EL1, ftr_id_aa64dfr0),
@ -3295,7 +3301,7 @@ subsys_initcall_sync(init_32bit_el0_mask);
static void __maybe_unused cpu_enable_cnp(struct arm64_cpu_capabilities const *cap) static void __maybe_unused cpu_enable_cnp(struct arm64_cpu_capabilities const *cap)
{ {
cpu_replace_ttbr1(lm_alias(swapper_pg_dir)); cpu_replace_ttbr1(lm_alias(swapper_pg_dir), idmap_pg_dir);
} }
/* /*

View File

@ -37,8 +37,6 @@
#include "efi-header.S" #include "efi-header.S"
#define __PHYS_OFFSET KERNEL_START
#if (PAGE_OFFSET & 0x1fffff) != 0 #if (PAGE_OFFSET & 0x1fffff) != 0
#error PAGE_OFFSET must be at least 2MB aligned #error PAGE_OFFSET must be at least 2MB aligned
#endif #endif
@ -51,9 +49,6 @@
* MMU = off, D-cache = off, I-cache = on or off, * MMU = off, D-cache = off, I-cache = on or off,
* x0 = physical address to the FDT blob. * x0 = physical address to the FDT blob.
* *
* This code is mostly position independent so you call this at
* __pa(PAGE_OFFSET).
*
* Note that the callee-saved registers are used for storing variables * Note that the callee-saved registers are used for storing variables
* that are useful before the MMU is enabled. The allocations are described * that are useful before the MMU is enabled. The allocations are described
* in the entry routines. * in the entry routines.
@ -82,25 +77,34 @@
* primary lowlevel boot path: * primary lowlevel boot path:
* *
* Register Scope Purpose * Register Scope Purpose
* x20 primary_entry() .. __primary_switch() CPU boot mode
* x21 primary_entry() .. start_kernel() FDT pointer passed at boot in x0 * x21 primary_entry() .. start_kernel() FDT pointer passed at boot in x0
* x22 create_idmap() .. start_kernel() ID map VA of the DT blob
* x23 primary_entry() .. start_kernel() physical misalignment/KASLR offset * x23 primary_entry() .. start_kernel() physical misalignment/KASLR offset
* x28 __create_page_tables() callee preserved temp register * x24 __primary_switch() linear map KASLR seed
* x19/x20 __primary_switch() callee preserved temp registers * x25 primary_entry() .. start_kernel() supported VA size
* x24 __primary_switch() .. relocate_kernel() current RELR displacement * x28 create_idmap() callee preserved temp register
*/ */
SYM_CODE_START(primary_entry) SYM_CODE_START(primary_entry)
bl preserve_boot_args bl preserve_boot_args
bl init_kernel_el // w0=cpu_boot_mode bl init_kernel_el // w0=cpu_boot_mode
adrp x23, __PHYS_OFFSET mov x20, x0
and x23, x23, MIN_KIMG_ALIGN - 1 // KASLR offset, defaults to 0 bl create_idmap
bl set_cpu_boot_mode_flag
bl __create_page_tables
/* /*
* The following calls CPU setup code, see arch/arm64/mm/proc.S for * The following calls CPU setup code, see arch/arm64/mm/proc.S for
* details. * details.
* On return, the CPU will be ready for the MMU to be turned on and * On return, the CPU will be ready for the MMU to be turned on and
* the TCR will have been set. * the TCR will have been set.
*/ */
#if VA_BITS > 48
mrs_s x0, SYS_ID_AA64MMFR2_EL1
tst x0, #0xf << ID_AA64MMFR2_LVA_SHIFT
mov x0, #VA_BITS
mov x25, #VA_BITS_MIN
csel x25, x25, x0, eq
mov x0, x25
#endif
bl __cpu_setup // initialise processor bl __cpu_setup // initialise processor
b __primary_switch b __primary_switch
SYM_CODE_END(primary_entry) SYM_CODE_END(primary_entry)
@ -122,28 +126,16 @@ SYM_CODE_START_LOCAL(preserve_boot_args)
b dcache_inval_poc // tail call b dcache_inval_poc // tail call
SYM_CODE_END(preserve_boot_args) SYM_CODE_END(preserve_boot_args)
/* SYM_FUNC_START_LOCAL(clear_page_tables)
* Macro to create a table entry to the next page. /*
* * Clear the init page tables.
* tbl: page table address */
* virt: virtual address adrp x0, init_pg_dir
* shift: #imm page table shift adrp x1, init_pg_end
* ptrs: #imm pointers per table page sub x2, x1, x0
* mov x1, xzr
* Preserves: virt b __pi_memset // tail call
* Corrupts: ptrs, tmp1, tmp2 SYM_FUNC_END(clear_page_tables)
* Returns: tbl -> next level table page address
*/
.macro create_table_entry, tbl, virt, shift, ptrs, tmp1, tmp2
add \tmp1, \tbl, #PAGE_SIZE
phys_to_pte \tmp2, \tmp1
orr \tmp2, \tmp2, #PMD_TYPE_TABLE // address of next table and entry type
lsr \tmp1, \virt, #\shift
sub \ptrs, \ptrs, #1
and \tmp1, \tmp1, \ptrs // table index
str \tmp2, [\tbl, \tmp1, lsl #3]
add \tbl, \tbl, #PAGE_SIZE // next level table page
.endm
/* /*
* Macro to populate page table entries, these entries can be pointers to the next level * Macro to populate page table entries, these entries can be pointers to the next level
@ -179,31 +171,20 @@ SYM_CODE_END(preserve_boot_args)
* vstart: virtual address of start of range * vstart: virtual address of start of range
* vend: virtual address of end of range - we map [vstart, vend] * vend: virtual address of end of range - we map [vstart, vend]
* shift: shift used to transform virtual address into index * shift: shift used to transform virtual address into index
* ptrs: number of entries in page table * order: #imm 2log(number of entries in page table)
* istart: index in table corresponding to vstart * istart: index in table corresponding to vstart
* iend: index in table corresponding to vend * iend: index in table corresponding to vend
* count: On entry: how many extra entries were required in previous level, scales * count: On entry: how many extra entries were required in previous level, scales
* our end index. * our end index.
* On exit: returns how many extra entries required for next page table level * On exit: returns how many extra entries required for next page table level
* *
* Preserves: vstart, vend, shift, ptrs * Preserves: vstart, vend
* Returns: istart, iend, count * Returns: istart, iend, count
*/ */
.macro compute_indices, vstart, vend, shift, ptrs, istart, iend, count .macro compute_indices, vstart, vend, shift, order, istart, iend, count
lsr \iend, \vend, \shift ubfx \istart, \vstart, \shift, \order
mov \istart, \ptrs ubfx \iend, \vend, \shift, \order
sub \istart, \istart, #1 add \iend, \iend, \count, lsl \order
and \iend, \iend, \istart // iend = (vend >> shift) & (ptrs - 1)
mov \istart, \ptrs
mul \istart, \istart, \count
add \iend, \iend, \istart // iend += count * ptrs
// our entries span multiple tables
lsr \istart, \vstart, \shift
mov \count, \ptrs
sub \count, \count, #1
and \istart, \istart, \count
sub \count, \iend, \istart sub \count, \iend, \istart
.endm .endm
@ -218,119 +199,116 @@ SYM_CODE_END(preserve_boot_args)
* vend: virtual address of end of range - we map [vstart, vend - 1] * vend: virtual address of end of range - we map [vstart, vend - 1]
* flags: flags to use to map last level entries * flags: flags to use to map last level entries
* phys: physical address corresponding to vstart - physical memory is contiguous * phys: physical address corresponding to vstart - physical memory is contiguous
* pgds: the number of pgd entries * order: #imm 2log(number of entries in PGD table)
*
* If extra_shift is set, an extra level will be populated if the end address does
* not fit in 'extra_shift' bits. This assumes vend is in the TTBR0 range.
* *
* Temporaries: istart, iend, tmp, count, sv - these need to be different registers * Temporaries: istart, iend, tmp, count, sv - these need to be different registers
* Preserves: vstart, flags * Preserves: vstart, flags
* Corrupts: tbl, rtbl, vend, istart, iend, tmp, count, sv * Corrupts: tbl, rtbl, vend, istart, iend, tmp, count, sv
*/ */
.macro map_memory, tbl, rtbl, vstart, vend, flags, phys, pgds, istart, iend, tmp, count, sv .macro map_memory, tbl, rtbl, vstart, vend, flags, phys, order, istart, iend, tmp, count, sv, extra_shift
sub \vend, \vend, #1 sub \vend, \vend, #1
add \rtbl, \tbl, #PAGE_SIZE add \rtbl, \tbl, #PAGE_SIZE
mov \sv, \rtbl
mov \count, #0 mov \count, #0
compute_indices \vstart, \vend, #PGDIR_SHIFT, \pgds, \istart, \iend, \count
.ifnb \extra_shift
tst \vend, #~((1 << (\extra_shift)) - 1)
b.eq .L_\@
compute_indices \vstart, \vend, #\extra_shift, #(PAGE_SHIFT - 3), \istart, \iend, \count
mov \sv, \rtbl
populate_entries \tbl, \rtbl, \istart, \iend, #PMD_TYPE_TABLE, #PAGE_SIZE, \tmp populate_entries \tbl, \rtbl, \istart, \iend, #PMD_TYPE_TABLE, #PAGE_SIZE, \tmp
mov \tbl, \sv mov \tbl, \sv
.endif
.L_\@:
compute_indices \vstart, \vend, #PGDIR_SHIFT, #\order, \istart, \iend, \count
mov \sv, \rtbl mov \sv, \rtbl
populate_entries \tbl, \rtbl, \istart, \iend, #PMD_TYPE_TABLE, #PAGE_SIZE, \tmp
mov \tbl, \sv
#if SWAPPER_PGTABLE_LEVELS > 3 #if SWAPPER_PGTABLE_LEVELS > 3
compute_indices \vstart, \vend, #PUD_SHIFT, #PTRS_PER_PUD, \istart, \iend, \count compute_indices \vstart, \vend, #PUD_SHIFT, #(PAGE_SHIFT - 3), \istart, \iend, \count
mov \sv, \rtbl
populate_entries \tbl, \rtbl, \istart, \iend, #PMD_TYPE_TABLE, #PAGE_SIZE, \tmp populate_entries \tbl, \rtbl, \istart, \iend, #PMD_TYPE_TABLE, #PAGE_SIZE, \tmp
mov \tbl, \sv mov \tbl, \sv
mov \sv, \rtbl
#endif #endif
#if SWAPPER_PGTABLE_LEVELS > 2 #if SWAPPER_PGTABLE_LEVELS > 2
compute_indices \vstart, \vend, #SWAPPER_TABLE_SHIFT, #PTRS_PER_PMD, \istart, \iend, \count compute_indices \vstart, \vend, #SWAPPER_TABLE_SHIFT, #(PAGE_SHIFT - 3), \istart, \iend, \count
mov \sv, \rtbl
populate_entries \tbl, \rtbl, \istart, \iend, #PMD_TYPE_TABLE, #PAGE_SIZE, \tmp populate_entries \tbl, \rtbl, \istart, \iend, #PMD_TYPE_TABLE, #PAGE_SIZE, \tmp
mov \tbl, \sv mov \tbl, \sv
#endif #endif
compute_indices \vstart, \vend, #SWAPPER_BLOCK_SHIFT, #PTRS_PER_PTE, \istart, \iend, \count compute_indices \vstart, \vend, #SWAPPER_BLOCK_SHIFT, #(PAGE_SHIFT - 3), \istart, \iend, \count
bic \count, \phys, #SWAPPER_BLOCK_SIZE - 1 bic \rtbl, \phys, #SWAPPER_BLOCK_SIZE - 1
populate_entries \tbl, \count, \istart, \iend, \flags, #SWAPPER_BLOCK_SIZE, \tmp populate_entries \tbl, \rtbl, \istart, \iend, \flags, #SWAPPER_BLOCK_SIZE, \tmp
.endm .endm
/* /*
* Setup the initial page tables. We only setup the barest amount which is * Remap a subregion created with the map_memory macro with modified attributes
* required to get the kernel running. The following sections are required: * or output address. The entire remapped region must have been covered in the
* - identity mapping to enable the MMU (low address, TTBR0) * invocation of map_memory.
* - first few MB of the kernel linear mapping to jump to once the MMU has *
* been enabled * x0: last level table address (returned in first argument to map_memory)
* x1: start VA of the existing mapping
* x2: start VA of the region to update
* x3: end VA of the region to update (exclusive)
* x4: start PA associated with the region to update
* x5: attributes to set on the updated region
* x6: order of the last level mappings
*/ */
SYM_FUNC_START_LOCAL(__create_page_tables) SYM_FUNC_START_LOCAL(remap_region)
sub x3, x3, #1 // make end inclusive
// Get the index offset for the start of the last level table
lsr x1, x1, x6
bfi x1, xzr, #0, #PAGE_SHIFT - 3
// Derive the start and end indexes into the last level table
// associated with the provided region
lsr x2, x2, x6
lsr x3, x3, x6
sub x2, x2, x1
sub x3, x3, x1
mov x1, #1
lsl x6, x1, x6 // block size at this level
populate_entries x0, x4, x2, x3, x5, x6, x7
ret
SYM_FUNC_END(remap_region)
SYM_FUNC_START_LOCAL(create_idmap)
mov x28, lr mov x28, lr
/* /*
* Invalidate the init page tables to avoid potential dirty cache lines * The ID map carries a 1:1 mapping of the physical address range
* being evicted. Other page tables are allocated in rodata as part of * covered by the loaded image, which could be anywhere in DRAM. This
* the kernel image, and thus are clean to the PoC per the boot * means that the required size of the VA (== PA) space is decided at
* protocol. * boot time, and could be more than the configured size of the VA
*/ * space for ordinary kernel and user space mappings.
adrp x0, init_pg_dir
adrp x1, init_pg_end
bl dcache_inval_poc
/*
* Clear the init page tables.
*/
adrp x0, init_pg_dir
adrp x1, init_pg_end
sub x1, x1, x0
1: stp xzr, xzr, [x0], #16
stp xzr, xzr, [x0], #16
stp xzr, xzr, [x0], #16
stp xzr, xzr, [x0], #16
subs x1, x1, #64
b.ne 1b
mov x7, SWAPPER_MM_MMUFLAGS
/*
* Create the identity mapping.
*/
adrp x0, idmap_pg_dir
adrp x3, __idmap_text_start // __pa(__idmap_text_start)
#ifdef CONFIG_ARM64_VA_BITS_52
mrs_s x6, SYS_ID_AA64MMFR2_EL1
and x6, x6, #(0xf << ID_AA64MMFR2_LVA_SHIFT)
mov x5, #52
cbnz x6, 1f
#endif
mov x5, #VA_BITS_MIN
1:
adr_l x6, vabits_actual
str x5, [x6]
dmb sy
dc ivac, x6 // Invalidate potentially stale cache line
/*
* VA_BITS may be too small to allow for an ID mapping to be created
* that covers system RAM if that is located sufficiently high in the
* physical address space. So for the ID map, use an extended virtual
* range in that case, and configure an additional translation level
* if needed.
* *
* Calculate the maximum allowed value for TCR_EL1.T0SZ so that the * There are three cases to consider here:
* entire ID map region can be mapped. As T0SZ == (64 - #bits used), * - 39 <= VA_BITS < 48, and the ID map needs up to 48 VA bits to cover
* this number conveniently equals the number of leading zeroes in * the placement of the image. In this case, we configure one extra
* the physical address of __idmap_text_end. * level of translation on the fly for the ID map only. (This case
* also covers 42-bit VA/52-bit PA on 64k pages).
*
* - VA_BITS == 48, and the ID map needs more than 48 VA bits. This can
* only happen when using 64k pages, in which case we need to extend
* the root level table rather than add a level. Note that we can
* treat this case as 'always extended' as long as we take care not
* to program an unsupported T0SZ value into the TCR register.
*
* - Combinations that would require two additional levels of
* translation are not supported, e.g., VA_BITS==36 on 16k pages, or
* VA_BITS==39/4k pages with 5-level paging, where the input address
* requires more than 47 or 48 bits, respectively.
*/ */
adrp x5, __idmap_text_end
clz x5, x5
cmp x5, TCR_T0SZ(VA_BITS_MIN) // default T0SZ small enough?
b.ge 1f // .. then skip VA range extension
adr_l x6, idmap_t0sz
str x5, [x6]
dmb sy
dc ivac, x6 // Invalidate potentially stale cache line
#if (VA_BITS < 48) #if (VA_BITS < 48)
#define IDMAP_PGD_ORDER (VA_BITS - PGDIR_SHIFT)
#define EXTRA_SHIFT (PGDIR_SHIFT + PAGE_SHIFT - 3) #define EXTRA_SHIFT (PGDIR_SHIFT + PAGE_SHIFT - 3)
#define EXTRA_PTRS (1 << (PHYS_MASK_SHIFT - EXTRA_SHIFT))
/* /*
* If VA_BITS < 48, we have to configure an additional table level. * If VA_BITS < 48, we have to configure an additional table level.
@ -342,36 +320,40 @@ SYM_FUNC_START_LOCAL(__create_page_tables)
#if VA_BITS != EXTRA_SHIFT #if VA_BITS != EXTRA_SHIFT
#error "Mismatch between VA_BITS and page size/number of translation levels" #error "Mismatch between VA_BITS and page size/number of translation levels"
#endif #endif
mov x4, EXTRA_PTRS
create_table_entry x0, x3, EXTRA_SHIFT, x4, x5, x6
#else #else
#define IDMAP_PGD_ORDER (PHYS_MASK_SHIFT - PGDIR_SHIFT)
#define EXTRA_SHIFT
/* /*
* If VA_BITS == 48, we don't have to configure an additional * If VA_BITS == 48, we don't have to configure an additional
* translation level, but the top-level table has more entries. * translation level, but the top-level table has more entries.
*/ */
mov x4, #1 << (PHYS_MASK_SHIFT - PGDIR_SHIFT)
str_l x4, idmap_ptrs_per_pgd, x5
#endif #endif
1: adrp x0, init_idmap_pg_dir
ldr_l x4, idmap_ptrs_per_pgd adrp x3, _text
adr_l x6, __idmap_text_end // __pa(__idmap_text_end) adrp x6, _end + MAX_FDT_SIZE + SWAPPER_BLOCK_SIZE
mov x7, SWAPPER_RX_MMUFLAGS
map_memory x0, x1, x3, x6, x7, x3, x4, x10, x11, x12, x13, x14 map_memory x0, x1, x3, x6, x7, x3, IDMAP_PGD_ORDER, x10, x11, x12, x13, x14, EXTRA_SHIFT
/* /* Remap the kernel page tables r/w in the ID map */
* Map the kernel image (starting with PHYS_OFFSET). adrp x1, _text
*/ adrp x2, init_pg_dir
adrp x0, init_pg_dir adrp x3, init_pg_end
mov_q x5, KIMAGE_VADDR // compile time __va(_text) bic x4, x2, #SWAPPER_BLOCK_SIZE - 1
add x5, x5, x23 // add KASLR displacement mov x5, SWAPPER_RW_MMUFLAGS
mov x4, PTRS_PER_PGD mov x6, #SWAPPER_BLOCK_SHIFT
adrp x6, _end // runtime __pa(_end) bl remap_region
adrp x3, _text // runtime __pa(_text)
sub x6, x6, x3 // _end - _text
add x6, x6, x5 // runtime __va(_end)
map_memory x0, x1, x5, x6, x7, x3, x4, x10, x11, x12, x13, x14 /* Remap the FDT after the kernel image */
adrp x1, _text
adrp x22, _end + SWAPPER_BLOCK_SIZE
bic x2, x22, #SWAPPER_BLOCK_SIZE - 1
bfi x22, x21, #0, #SWAPPER_BLOCK_SHIFT // remapped FDT address
add x3, x2, #MAX_FDT_SIZE + SWAPPER_BLOCK_SIZE
bic x4, x21, #SWAPPER_BLOCK_SIZE - 1
mov x5, SWAPPER_RW_MMUFLAGS
mov x6, #SWAPPER_BLOCK_SHIFT
bl remap_region
/* /*
* Since the page tables have been populated with non-cacheable * Since the page tables have been populated with non-cacheable
@ -380,16 +362,27 @@ SYM_FUNC_START_LOCAL(__create_page_tables)
*/ */
dmb sy dmb sy
adrp x0, idmap_pg_dir adrp x0, init_idmap_pg_dir
adrp x1, idmap_pg_end adrp x1, init_idmap_pg_end
bl dcache_inval_poc bl dcache_inval_poc
adrp x0, init_pg_dir
adrp x1, init_pg_end
bl dcache_inval_poc
ret x28 ret x28
SYM_FUNC_END(__create_page_tables) SYM_FUNC_END(create_idmap)
SYM_FUNC_START_LOCAL(create_kernel_mapping)
adrp x0, init_pg_dir
mov_q x5, KIMAGE_VADDR // compile time __va(_text)
add x5, x5, x23 // add KASLR displacement
adrp x6, _end // runtime __pa(_end)
adrp x3, _text // runtime __pa(_text)
sub x6, x6, x3 // _end - _text
add x6, x6, x5 // runtime __va(_end)
mov x7, SWAPPER_RW_MMUFLAGS
map_memory x0, x1, x5, x6, x7, x3, (VA_BITS - PGDIR_SHIFT), x10, x11, x12, x13, x14
dsb ishst // sync with page table walker
ret
SYM_FUNC_END(create_kernel_mapping)
/* /*
* Initialize CPU registers with task-specific and cpu-specific context. * Initialize CPU registers with task-specific and cpu-specific context.
@ -420,7 +413,7 @@ SYM_FUNC_END(__create_page_tables)
/* /*
* The following fragment of code is executed with the MMU enabled. * The following fragment of code is executed with the MMU enabled.
* *
* x0 = __PHYS_OFFSET * x0 = __pa(KERNEL_START)
*/ */
SYM_FUNC_START_LOCAL(__primary_switched) SYM_FUNC_START_LOCAL(__primary_switched)
adr_l x4, init_task adr_l x4, init_task
@ -439,6 +432,9 @@ SYM_FUNC_START_LOCAL(__primary_switched)
sub x4, x4, x0 // the kernel virtual and sub x4, x4, x0 // the kernel virtual and
str_l x4, kimage_voffset, x5 // physical mappings str_l x4, kimage_voffset, x5 // physical mappings
mov x0, x20
bl set_cpu_boot_mode_flag
// Clear BSS // Clear BSS
adr_l x0, __bss_start adr_l x0, __bss_start
mov x1, xzr mov x1, xzr
@ -447,35 +443,30 @@ SYM_FUNC_START_LOCAL(__primary_switched)
bl __pi_memset bl __pi_memset
dsb ishst // Make zero page visible to PTW dsb ishst // Make zero page visible to PTW
#if VA_BITS > 48
adr_l x8, vabits_actual // Set this early so KASAN early init
str x25, [x8] // ... observes the correct value
dc civac, x8 // Make visible to booting secondaries
#endif
#ifdef CONFIG_RANDOMIZE_BASE
adrp x5, memstart_offset_seed // Save KASLR linear map seed
strh w24, [x5, :lo12:memstart_offset_seed]
#endif
#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS) #if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
bl kasan_early_init bl kasan_early_init
#endif #endif
mov x0, x21 // pass FDT address in x0 mov x0, x21 // pass FDT address in x0
bl early_fdt_map // Try mapping the FDT early bl early_fdt_map // Try mapping the FDT early
mov x0, x20 // pass the full boot status
bl init_feature_override // Parse cpu feature overrides bl init_feature_override // Parse cpu feature overrides
#ifdef CONFIG_RANDOMIZE_BASE mov x0, x20
tst x23, ~(MIN_KIMG_ALIGN - 1) // already running randomized? bl finalise_el2 // Prefer VHE if possible
b.ne 0f
bl kaslr_early_init // parse FDT for KASLR options
cbz x0, 0f // KASLR disabled? just proceed
orr x23, x23, x0 // record KASLR offset
ldp x29, x30, [sp], #16 // we must enable KASLR, return
ret // to __primary_switch()
0:
#endif
bl switch_to_vhe // Prefer VHE if possible
ldp x29, x30, [sp], #16 ldp x29, x30, [sp], #16
bl start_kernel bl start_kernel
ASM_BUG() ASM_BUG()
SYM_FUNC_END(__primary_switched) SYM_FUNC_END(__primary_switched)
.pushsection ".rodata", "a"
SYM_DATA_START(kimage_vaddr)
.quad _text
SYM_DATA_END(kimage_vaddr)
EXPORT_SYMBOL(kimage_vaddr)
.popsection
/* /*
* end early head section, begin head code that is also used for * end early head section, begin head code that is also used for
* hotplug and needs to have the same protections as the text region * hotplug and needs to have the same protections as the text region
@ -490,8 +481,9 @@ EXPORT_SYMBOL(kimage_vaddr)
* Since we cannot always rely on ERET synchronizing writes to sysregs (e.g. if * Since we cannot always rely on ERET synchronizing writes to sysregs (e.g. if
* SCTLR_ELx.EOS is clear), we place an ISB prior to ERET. * SCTLR_ELx.EOS is clear), we place an ISB prior to ERET.
* *
* Returns either BOOT_CPU_MODE_EL1 or BOOT_CPU_MODE_EL2 in w0 if * Returns either BOOT_CPU_MODE_EL1 or BOOT_CPU_MODE_EL2 in x0 if
* booted in EL1 or EL2 respectively. * booted in EL1 or EL2 respectively, with the top 32 bits containing
* potential context flags. These flags are *not* stored in __boot_cpu_mode.
*/ */
SYM_FUNC_START(init_kernel_el) SYM_FUNC_START(init_kernel_el)
mrs x0, CurrentEL mrs x0, CurrentEL
@ -520,6 +512,8 @@ SYM_INNER_LABEL(init_el2, SYM_L_LOCAL)
msr vbar_el2, x0 msr vbar_el2, x0
isb isb
mov_q x1, INIT_SCTLR_EL1_MMU_OFF
/* /*
* Fruity CPUs seem to have HCR_EL2.E2H set to RES1, * Fruity CPUs seem to have HCR_EL2.E2H set to RES1,
* making it impossible to start in nVHE mode. Is that * making it impossible to start in nVHE mode. Is that
@ -529,34 +523,19 @@ SYM_INNER_LABEL(init_el2, SYM_L_LOCAL)
and x0, x0, #HCR_E2H and x0, x0, #HCR_E2H
cbz x0, 1f cbz x0, 1f
/* Switching to VHE requires a sane SCTLR_EL1 as a start */ /* Set a sane SCTLR_EL1, the VHE way */
mov_q x0, INIT_SCTLR_EL1_MMU_OFF msr_s SYS_SCTLR_EL12, x1
msr_s SYS_SCTLR_EL12, x0 mov x2, #BOOT_CPU_FLAG_E2H
b 2f
/*
* Force an eret into a helper "function", and let it return
* to our original caller... This makes sure that we have
* initialised the basic PSTATE state.
*/
mov x0, #INIT_PSTATE_EL2
msr spsr_el1, x0
adr x0, __cpu_stick_to_vhe
msr elr_el1, x0
eret
1: 1:
mov_q x0, INIT_SCTLR_EL1_MMU_OFF msr sctlr_el1, x1
msr sctlr_el1, x0 mov x2, xzr
2:
msr elr_el2, lr msr elr_el2, lr
mov w0, #BOOT_CPU_MODE_EL2 mov w0, #BOOT_CPU_MODE_EL2
orr x0, x0, x2
eret eret
__cpu_stick_to_vhe:
mov x0, #HVC_VHE_RESTART
hvc #0
mov x0, #BOOT_CPU_MODE_EL2
ret
SYM_FUNC_END(init_kernel_el) SYM_FUNC_END(init_kernel_el)
/* /*
@ -569,52 +548,21 @@ SYM_FUNC_START_LOCAL(set_cpu_boot_mode_flag)
b.ne 1f b.ne 1f
add x1, x1, #4 add x1, x1, #4
1: str w0, [x1] // Save CPU boot mode 1: str w0, [x1] // Save CPU boot mode
dmb sy
dc ivac, x1 // Invalidate potentially stale cache line
ret ret
SYM_FUNC_END(set_cpu_boot_mode_flag) SYM_FUNC_END(set_cpu_boot_mode_flag)
/*
* These values are written with the MMU off, but read with the MMU on.
* Writers will invalidate the corresponding address, discarding up to a
* 'Cache Writeback Granule' (CWG) worth of data. The linker script ensures
* sufficient alignment that the CWG doesn't overlap another section.
*/
.pushsection ".mmuoff.data.write", "aw"
/*
* We need to find out the CPU boot mode long after boot, so we need to
* store it in a writable variable.
*
* This is not in .bss, because we set it sufficiently early that the boot-time
* zeroing of .bss would clobber it.
*/
SYM_DATA_START(__boot_cpu_mode)
.long BOOT_CPU_MODE_EL2
.long BOOT_CPU_MODE_EL1
SYM_DATA_END(__boot_cpu_mode)
/*
* The booting CPU updates the failed status @__early_cpu_boot_status,
* with MMU turned off.
*/
SYM_DATA_START(__early_cpu_boot_status)
.quad 0
SYM_DATA_END(__early_cpu_boot_status)
.popsection
/* /*
* This provides a "holding pen" for platforms to hold all secondary * This provides a "holding pen" for platforms to hold all secondary
* cores are held until we're ready for them to initialise. * cores are held until we're ready for them to initialise.
*/ */
SYM_FUNC_START(secondary_holding_pen) SYM_FUNC_START(secondary_holding_pen)
bl init_kernel_el // w0=cpu_boot_mode bl init_kernel_el // w0=cpu_boot_mode
bl set_cpu_boot_mode_flag mrs x2, mpidr_el1
mrs x0, mpidr_el1
mov_q x1, MPIDR_HWID_BITMASK mov_q x1, MPIDR_HWID_BITMASK
and x0, x0, x1 and x2, x2, x1
adr_l x3, secondary_holding_pen_release adr_l x3, secondary_holding_pen_release
pen: ldr x4, [x3] pen: ldr x4, [x3]
cmp x4, x0 cmp x4, x2
b.eq secondary_startup b.eq secondary_startup
wfe wfe
b pen b pen
@ -626,7 +574,6 @@ SYM_FUNC_END(secondary_holding_pen)
*/ */
SYM_FUNC_START(secondary_entry) SYM_FUNC_START(secondary_entry)
bl init_kernel_el // w0=cpu_boot_mode bl init_kernel_el // w0=cpu_boot_mode
bl set_cpu_boot_mode_flag
b secondary_startup b secondary_startup
SYM_FUNC_END(secondary_entry) SYM_FUNC_END(secondary_entry)
@ -634,16 +581,24 @@ SYM_FUNC_START_LOCAL(secondary_startup)
/* /*
* Common entry point for secondary CPUs. * Common entry point for secondary CPUs.
*/ */
bl switch_to_vhe mov x20, x0 // preserve boot mode
bl finalise_el2
bl __cpu_secondary_check52bitva bl __cpu_secondary_check52bitva
#if VA_BITS > 48
ldr_l x0, vabits_actual
#endif
bl __cpu_setup // initialise processor bl __cpu_setup // initialise processor
adrp x1, swapper_pg_dir adrp x1, swapper_pg_dir
adrp x2, idmap_pg_dir
bl __enable_mmu bl __enable_mmu
ldr x8, =__secondary_switched ldr x8, =__secondary_switched
br x8 br x8
SYM_FUNC_END(secondary_startup) SYM_FUNC_END(secondary_startup)
SYM_FUNC_START_LOCAL(__secondary_switched) SYM_FUNC_START_LOCAL(__secondary_switched)
mov x0, x20
bl set_cpu_boot_mode_flag
str_l xzr, __early_cpu_boot_status, x3
adr_l x5, vectors adr_l x5, vectors
msr vbar_el1, x5 msr vbar_el1, x5
isb isb
@ -691,6 +646,7 @@ SYM_FUNC_END(__secondary_too_slow)
* *
* x0 = SCTLR_EL1 value for turning on the MMU. * x0 = SCTLR_EL1 value for turning on the MMU.
* x1 = TTBR1_EL1 value * x1 = TTBR1_EL1 value
* x2 = ID map root table address
* *
* Returns to the caller via x30/lr. This requires the caller to be covered * Returns to the caller via x30/lr. This requires the caller to be covered
* by the .idmap.text section. * by the .idmap.text section.
@ -699,20 +655,15 @@ SYM_FUNC_END(__secondary_too_slow)
* If it isn't, park the CPU * If it isn't, park the CPU
*/ */
SYM_FUNC_START(__enable_mmu) SYM_FUNC_START(__enable_mmu)
mrs x2, ID_AA64MMFR0_EL1 mrs x3, ID_AA64MMFR0_EL1
ubfx x2, x2, #ID_AA64MMFR0_TGRAN_SHIFT, 4 ubfx x3, x3, #ID_AA64MMFR0_TGRAN_SHIFT, 4
cmp x2, #ID_AA64MMFR0_TGRAN_SUPPORTED_MIN cmp x3, #ID_AA64MMFR0_TGRAN_SUPPORTED_MIN
b.lt __no_granule_support b.lt __no_granule_support
cmp x2, #ID_AA64MMFR0_TGRAN_SUPPORTED_MAX cmp x3, #ID_AA64MMFR0_TGRAN_SUPPORTED_MAX
b.gt __no_granule_support b.gt __no_granule_support
update_early_cpu_boot_status 0, x2, x3
adrp x2, idmap_pg_dir
phys_to_ttbr x1, x1
phys_to_ttbr x2, x2 phys_to_ttbr x2, x2
msr ttbr0_el1, x2 // load TTBR0 msr ttbr0_el1, x2 // load TTBR0
offset_ttbr1 x1, x3 load_ttbr1 x1, x1, x3
msr ttbr1_el1, x1 // load TTBR1
isb
set_sctlr_el1 x0 set_sctlr_el1 x0
@ -720,7 +671,7 @@ SYM_FUNC_START(__enable_mmu)
SYM_FUNC_END(__enable_mmu) SYM_FUNC_END(__enable_mmu)
SYM_FUNC_START(__cpu_secondary_check52bitva) SYM_FUNC_START(__cpu_secondary_check52bitva)
#ifdef CONFIG_ARM64_VA_BITS_52 #if VA_BITS > 48
ldr_l x0, vabits_actual ldr_l x0, vabits_actual
cmp x0, #52 cmp x0, #52
b.ne 2f b.ne 2f
@ -755,13 +706,10 @@ SYM_FUNC_START_LOCAL(__relocate_kernel)
* Iterate over each entry in the relocation table, and apply the * Iterate over each entry in the relocation table, and apply the
* relocations in place. * relocations in place.
*/ */
ldr w9, =__rela_offset // offset to reloc table adr_l x9, __rela_start
ldr w10, =__rela_size // size of reloc table adr_l x10, __rela_end
mov_q x11, KIMAGE_VADDR // default virtual offset mov_q x11, KIMAGE_VADDR // default virtual offset
add x11, x11, x23 // actual virtual offset add x11, x11, x23 // actual virtual offset
add x9, x9, x11 // __va(.rela)
add x10, x9, x10 // __va(.rela) + sizeof(.rela)
0: cmp x9, x10 0: cmp x9, x10
b.hs 1f b.hs 1f
@ -804,21 +752,9 @@ SYM_FUNC_START_LOCAL(__relocate_kernel)
* entry in x9, the address being relocated by the current address or * entry in x9, the address being relocated by the current address or
* bitmap entry in x13 and the address being relocated by the current * bitmap entry in x13 and the address being relocated by the current
* bit in x14. * bit in x14.
*
* Because addends are stored in place in the binary, RELR relocations
* cannot be applied idempotently. We use x24 to keep track of the
* currently applied displacement so that we can correctly relocate if
* __relocate_kernel is called twice with non-zero displacements (i.e.
* if there is both a physical misalignment and a KASLR displacement).
*/ */
ldr w9, =__relr_offset // offset to reloc table adr_l x9, __relr_start
ldr w10, =__relr_size // size of reloc table adr_l x10, __relr_end
add x9, x9, x11 // __va(.relr)
add x10, x9, x10 // __va(.relr) + sizeof(.relr)
sub x15, x23, x24 // delta from previous offset
cbz x15, 7f // nothing to do if unchanged
mov x24, x23 // save new offset
2: cmp x9, x10 2: cmp x9, x10
b.hs 7f b.hs 7f
@ -826,7 +762,7 @@ SYM_FUNC_START_LOCAL(__relocate_kernel)
tbnz x11, #0, 3f // branch to handle bitmaps tbnz x11, #0, 3f // branch to handle bitmaps
add x13, x11, x23 add x13, x11, x23
ldr x12, [x13] // relocate address entry ldr x12, [x13] // relocate address entry
add x12, x12, x15 add x12, x12, x23
str x12, [x13], #8 // adjust to start of bitmap str x12, [x13], #8 // adjust to start of bitmap
b 2b b 2b
@ -835,7 +771,7 @@ SYM_FUNC_START_LOCAL(__relocate_kernel)
cbz x11, 6f cbz x11, 6f
tbz x11, #0, 5f // skip bit if not set tbz x11, #0, 5f // skip bit if not set
ldr x12, [x14] // relocate bit ldr x12, [x14] // relocate bit
add x12, x12, x15 add x12, x12, x23
str x12, [x14] str x12, [x14]
5: add x14, x14, #8 // move to next bit's address 5: add x14, x14, #8 // move to next bit's address
@ -856,43 +792,32 @@ SYM_FUNC_END(__relocate_kernel)
#endif #endif
SYM_FUNC_START_LOCAL(__primary_switch) SYM_FUNC_START_LOCAL(__primary_switch)
#ifdef CONFIG_RANDOMIZE_BASE adrp x1, reserved_pg_dir
mov x19, x0 // preserve new SCTLR_EL1 value adrp x2, init_idmap_pg_dir
mrs x20, sctlr_el1 // preserve old SCTLR_EL1 value
#endif
adrp x1, init_pg_dir
bl __enable_mmu bl __enable_mmu
#ifdef CONFIG_RELOCATABLE #ifdef CONFIG_RELOCATABLE
#ifdef CONFIG_RELR adrp x23, KERNEL_START
mov x24, #0 // no RELR displacement yet and x23, x23, MIN_KIMG_ALIGN - 1
#endif
bl __relocate_kernel
#ifdef CONFIG_RANDOMIZE_BASE #ifdef CONFIG_RANDOMIZE_BASE
ldr x8, =__primary_switched mov x0, x22
adrp x0, __PHYS_OFFSET adrp x1, init_pg_end
blr x8 mov sp, x1
mov x29, xzr
/* bl __pi_kaslr_early_init
* If we return here, we have a KASLR displacement in x23 which we need and x24, x0, #SZ_2M - 1 // capture memstart offset seed
* to take into account by discarding the current kernel mapping and bic x0, x0, #SZ_2M - 1
* creating a new one. orr x23, x23, x0 // record kernel offset
*/ #endif
pre_disable_mmu_workaround #endif
msr sctlr_el1, x20 // disable the MMU bl clear_page_tables
isb bl create_kernel_mapping
bl __create_page_tables // recreate kernel mapping
tlbi vmalle1 // Remove any stale TLB entries
dsb nsh
isb
set_sctlr_el1 x19 // re-enable the MMU
adrp x1, init_pg_dir
load_ttbr1 x1, x1, x2
#ifdef CONFIG_RELOCATABLE
bl __relocate_kernel bl __relocate_kernel
#endif
#endif #endif
ldr x8, =__primary_switched ldr x8, =__primary_switched
adrp x0, __PHYS_OFFSET adrp x0, KERNEL_START // __pa(KERNEL_START)
br x8 br x8
SYM_FUNC_END(__primary_switch) SYM_FUNC_END(__primary_switch)

View File

@ -16,6 +16,30 @@
#include <asm/ptrace.h> #include <asm/ptrace.h>
#include <asm/virt.h> #include <asm/virt.h>
// Warning, hardcoded register allocation
// This will clobber x1 and x2, and expect x1 to contain
// the id register value as read from the HW
.macro __check_override idreg, fld, width, pass, fail
ubfx x1, x1, #\fld, #\width
cbz x1, \fail
adr_l x1, \idreg\()_override
ldr x2, [x1, FTR_OVR_VAL_OFFSET]
ldr x1, [x1, FTR_OVR_MASK_OFFSET]
ubfx x2, x2, #\fld, #\width
ubfx x1, x1, #\fld, #\width
cmp x1, xzr
and x2, x2, x1
csinv x2, x2, xzr, ne
cbnz x2, \pass
b \fail
.endm
.macro check_override idreg, fld, pass, fail
mrs x1, \idreg\()_el1
__check_override \idreg \fld 4 \pass \fail
.endm
.text .text
.pushsection .hyp.text, "ax" .pushsection .hyp.text, "ax"
@ -51,8 +75,8 @@ SYM_CODE_START_LOCAL(elx_sync)
msr vbar_el2, x1 msr vbar_el2, x1
b 9f b 9f
1: cmp x0, #HVC_VHE_RESTART 1: cmp x0, #HVC_FINALISE_EL2
b.eq mutate_to_vhe b.eq __finalise_el2
2: cmp x0, #HVC_SOFT_RESTART 2: cmp x0, #HVC_SOFT_RESTART
b.ne 3f b.ne 3f
@ -73,27 +97,67 @@ SYM_CODE_START_LOCAL(elx_sync)
eret eret
SYM_CODE_END(elx_sync) SYM_CODE_END(elx_sync)
// nVHE? No way! Give me the real thing! SYM_CODE_START_LOCAL(__finalise_el2)
SYM_CODE_START_LOCAL(mutate_to_vhe) check_override id_aa64pfr0 ID_AA64PFR0_SVE_SHIFT .Linit_sve .Lskip_sve
.Linit_sve: /* SVE register access */
mrs x0, cptr_el2 // Disable SVE traps
bic x0, x0, #CPTR_EL2_TZ
msr cptr_el2, x0
isb
mov x1, #ZCR_ELx_LEN_MASK // SVE: Enable full vector
msr_s SYS_ZCR_EL2, x1 // length for EL1.
.Lskip_sve:
check_override id_aa64pfr1 ID_AA64PFR1_SME_SHIFT .Linit_sme .Lskip_sme
.Linit_sme: /* SME register access and priority mapping */
mrs x0, cptr_el2 // Disable SME traps
bic x0, x0, #CPTR_EL2_TSM
msr cptr_el2, x0
isb
mrs x1, sctlr_el2
orr x1, x1, #SCTLR_ELx_ENTP2 // Disable TPIDR2 traps
msr sctlr_el2, x1
isb
mov x0, #0 // SMCR controls
// Full FP in SM?
mrs_s x1, SYS_ID_AA64SMFR0_EL1
__check_override id_aa64smfr0 ID_AA64SMFR0_EL1_FA64_SHIFT 1 .Linit_sme_fa64 .Lskip_sme_fa64
.Linit_sme_fa64:
orr x0, x0, SMCR_ELx_FA64_MASK
.Lskip_sme_fa64:
orr x0, x0, #SMCR_ELx_LEN_MASK // Enable full SME vector
msr_s SYS_SMCR_EL2, x0 // length for EL1.
mrs_s x1, SYS_SMIDR_EL1 // Priority mapping supported?
ubfx x1, x1, #SMIDR_EL1_SMPS_SHIFT, #1
cbz x1, .Lskip_sme
msr_s SYS_SMPRIMAP_EL2, xzr // Make all priorities equal
mrs x1, id_aa64mmfr1_el1 // HCRX_EL2 present?
ubfx x1, x1, #ID_AA64MMFR1_HCX_SHIFT, #4
cbz x1, .Lskip_sme
mrs_s x1, SYS_HCRX_EL2
orr x1, x1, #HCRX_EL2_SMPME_MASK // Enable priority mapping
msr_s SYS_HCRX_EL2, x1
.Lskip_sme:
// nVHE? No way! Give me the real thing!
// Sanity check: MMU *must* be off // Sanity check: MMU *must* be off
mrs x1, sctlr_el2 mrs x1, sctlr_el2
tbnz x1, #0, 1f tbnz x1, #0, 1f
// Needs to be VHE capable, obviously // Needs to be VHE capable, obviously
mrs x1, id_aa64mmfr1_el1 check_override id_aa64mmfr1 ID_AA64MMFR1_VHE_SHIFT 2f 1f
ubfx x1, x1, #ID_AA64MMFR1_VHE_SHIFT, #4
cbz x1, 1f
// Check whether VHE is disabled from the command line
adr_l x1, id_aa64mmfr1_override
ldr x2, [x1, FTR_OVR_VAL_OFFSET]
ldr x1, [x1, FTR_OVR_MASK_OFFSET]
ubfx x2, x2, #ID_AA64MMFR1_VHE_SHIFT, #4
ubfx x1, x1, #ID_AA64MMFR1_VHE_SHIFT, #4
cmp x1, xzr
and x2, x2, x1
csinv x2, x2, xzr, ne
cbnz x2, 2f
1: mov_q x0, HVC_STUB_ERR 1: mov_q x0, HVC_STUB_ERR
eret eret
@ -140,10 +204,10 @@ SYM_CODE_START_LOCAL(mutate_to_vhe)
msr spsr_el1, x0 msr spsr_el1, x0
b enter_vhe b enter_vhe
SYM_CODE_END(mutate_to_vhe) SYM_CODE_END(__finalise_el2)
// At the point where we reach enter_vhe(), we run with // At the point where we reach enter_vhe(), we run with
// the MMU off (which is enforced by mutate_to_vhe()). // the MMU off (which is enforced by __finalise_el2()).
// We thus need to be in the idmap, or everything will // We thus need to be in the idmap, or everything will
// explode when enabling the MMU. // explode when enabling the MMU.
@ -222,12 +286,12 @@ SYM_FUNC_START(__hyp_reset_vectors)
SYM_FUNC_END(__hyp_reset_vectors) SYM_FUNC_END(__hyp_reset_vectors)
/* /*
* Entry point to switch to VHE if deemed capable * Entry point to finalise EL2 and switch to VHE if deemed capable
*
* w0: boot mode, as returned by init_kernel_el()
*/ */
SYM_FUNC_START(switch_to_vhe) SYM_FUNC_START(finalise_el2)
// Need to have booted at EL2 // Need to have booted at EL2
adr_l x1, __boot_cpu_mode
ldr w0, [x1]
cmp w0, #BOOT_CPU_MODE_EL2 cmp w0, #BOOT_CPU_MODE_EL2
b.ne 1f b.ne 1f
@ -236,9 +300,8 @@ SYM_FUNC_START(switch_to_vhe)
cmp x0, #CurrentEL_EL1 cmp x0, #CurrentEL_EL1
b.ne 1f b.ne 1f
// Turn the world upside down mov x0, #HVC_FINALISE_EL2
mov x0, #HVC_VHE_RESTART
hvc #0 hvc #0
1: 1:
ret ret
SYM_FUNC_END(switch_to_vhe) SYM_FUNC_END(finalise_el2)

View File

@ -19,16 +19,21 @@
#define FTR_ALIAS_NAME_LEN 30 #define FTR_ALIAS_NAME_LEN 30
#define FTR_ALIAS_OPTION_LEN 116 #define FTR_ALIAS_OPTION_LEN 116
static u64 __boot_status __initdata;
struct ftr_set_desc { struct ftr_set_desc {
char name[FTR_DESC_NAME_LEN]; char name[FTR_DESC_NAME_LEN];
struct arm64_ftr_override *override; struct arm64_ftr_override *override;
struct { struct {
char name[FTR_DESC_FIELD_LEN]; char name[FTR_DESC_FIELD_LEN];
u8 shift; u8 shift;
u8 width;
bool (*filter)(u64 val); bool (*filter)(u64 val);
} fields[]; } fields[];
}; };
#define FIELD(n, s, f) { .name = n, .shift = s, .width = 4, .filter = f }
static bool __init mmfr1_vh_filter(u64 val) static bool __init mmfr1_vh_filter(u64 val)
{ {
/* /*
@ -37,24 +42,65 @@ static bool __init mmfr1_vh_filter(u64 val)
* the user was trying to force nVHE on us, proceed with * the user was trying to force nVHE on us, proceed with
* attitude adjustment. * attitude adjustment.
*/ */
return !(is_kernel_in_hyp_mode() && val == 0); return !(__boot_status == (BOOT_CPU_FLAG_E2H | BOOT_CPU_MODE_EL2) &&
val == 0);
} }
static const struct ftr_set_desc mmfr1 __initconst = { static const struct ftr_set_desc mmfr1 __initconst = {
.name = "id_aa64mmfr1", .name = "id_aa64mmfr1",
.override = &id_aa64mmfr1_override, .override = &id_aa64mmfr1_override,
.fields = { .fields = {
{ "vh", ID_AA64MMFR1_VHE_SHIFT, mmfr1_vh_filter }, FIELD("vh", ID_AA64MMFR1_VHE_SHIFT, mmfr1_vh_filter),
{} {}
}, },
}; };
static bool __init pfr0_sve_filter(u64 val)
{
/*
* Disabling SVE also means disabling all the features that
* are associated with it. The easiest way to do it is just to
* override id_aa64zfr0_el1 to be 0.
*/
if (!val) {
id_aa64zfr0_override.val = 0;
id_aa64zfr0_override.mask = GENMASK(63, 0);
}
return true;
}
static const struct ftr_set_desc pfr0 __initconst = {
.name = "id_aa64pfr0",
.override = &id_aa64pfr0_override,
.fields = {
FIELD("sve", ID_AA64PFR0_SVE_SHIFT, pfr0_sve_filter),
{}
},
};
static bool __init pfr1_sme_filter(u64 val)
{
/*
* Similarly to SVE, disabling SME also means disabling all
* the features that are associated with it. Just set
* id_aa64smfr0_el1 to 0 and don't look back.
*/
if (!val) {
id_aa64smfr0_override.val = 0;
id_aa64smfr0_override.mask = GENMASK(63, 0);
}
return true;
}
static const struct ftr_set_desc pfr1 __initconst = { static const struct ftr_set_desc pfr1 __initconst = {
.name = "id_aa64pfr1", .name = "id_aa64pfr1",
.override = &id_aa64pfr1_override, .override = &id_aa64pfr1_override,
.fields = { .fields = {
{ "bt", ID_AA64PFR1_BT_SHIFT }, FIELD("bt", ID_AA64PFR1_BT_SHIFT, NULL ),
{ "mte", ID_AA64PFR1_MTE_SHIFT}, FIELD("mte", ID_AA64PFR1_MTE_SHIFT, NULL),
FIELD("sme", ID_AA64PFR1_SME_SHIFT, pfr1_sme_filter),
{} {}
}, },
}; };
@ -63,10 +109,10 @@ static const struct ftr_set_desc isar1 __initconst = {
.name = "id_aa64isar1", .name = "id_aa64isar1",
.override = &id_aa64isar1_override, .override = &id_aa64isar1_override,
.fields = { .fields = {
{ "gpi", ID_AA64ISAR1_EL1_GPI_SHIFT }, FIELD("gpi", ID_AA64ISAR1_EL1_GPI_SHIFT, NULL),
{ "gpa", ID_AA64ISAR1_EL1_GPA_SHIFT }, FIELD("gpa", ID_AA64ISAR1_EL1_GPA_SHIFT, NULL),
{ "api", ID_AA64ISAR1_EL1_API_SHIFT }, FIELD("api", ID_AA64ISAR1_EL1_API_SHIFT, NULL),
{ "apa", ID_AA64ISAR1_EL1_APA_SHIFT }, FIELD("apa", ID_AA64ISAR1_EL1_APA_SHIFT, NULL),
{} {}
}, },
}; };
@ -75,8 +121,18 @@ static const struct ftr_set_desc isar2 __initconst = {
.name = "id_aa64isar2", .name = "id_aa64isar2",
.override = &id_aa64isar2_override, .override = &id_aa64isar2_override,
.fields = { .fields = {
{ "gpa3", ID_AA64ISAR2_EL1_GPA3_SHIFT }, FIELD("gpa3", ID_AA64ISAR2_EL1_GPA3_SHIFT, NULL),
{ "apa3", ID_AA64ISAR2_EL1_APA3_SHIFT }, FIELD("apa3", ID_AA64ISAR2_EL1_APA3_SHIFT, NULL),
{}
},
};
static const struct ftr_set_desc smfr0 __initconst = {
.name = "id_aa64smfr0",
.override = &id_aa64smfr0_override,
.fields = {
/* FA64 is a one bit field... :-/ */
{ "fa64", ID_AA64SMFR0_EL1_FA64_SHIFT, 1, },
{} {}
}, },
}; };
@ -89,16 +145,18 @@ static const struct ftr_set_desc kaslr __initconst = {
.override = &kaslr_feature_override, .override = &kaslr_feature_override,
#endif #endif
.fields = { .fields = {
{ "disabled", 0 }, FIELD("disabled", 0, NULL),
{} {}
}, },
}; };
static const struct ftr_set_desc * const regs[] __initconst = { static const struct ftr_set_desc * const regs[] __initconst = {
&mmfr1, &mmfr1,
&pfr0,
&pfr1, &pfr1,
&isar1, &isar1,
&isar2, &isar2,
&smfr0,
&kaslr, &kaslr,
}; };
@ -108,6 +166,8 @@ static const struct {
} aliases[] __initconst = { } aliases[] __initconst = {
{ "kvm-arm.mode=nvhe", "id_aa64mmfr1.vh=0" }, { "kvm-arm.mode=nvhe", "id_aa64mmfr1.vh=0" },
{ "kvm-arm.mode=protected", "id_aa64mmfr1.vh=0" }, { "kvm-arm.mode=protected", "id_aa64mmfr1.vh=0" },
{ "arm64.nosve", "id_aa64pfr0.sve=0 id_aa64pfr1.sme=0" },
{ "arm64.nosme", "id_aa64pfr1.sme=0" },
{ "arm64.nobti", "id_aa64pfr1.bt=0" }, { "arm64.nobti", "id_aa64pfr1.bt=0" },
{ "arm64.nopauth", { "arm64.nopauth",
"id_aa64isar1.gpi=0 id_aa64isar1.gpa=0 " "id_aa64isar1.gpi=0 id_aa64isar1.gpa=0 "
@ -144,7 +204,8 @@ static void __init match_options(const char *cmdline)
for (f = 0; strlen(regs[i]->fields[f].name); f++) { for (f = 0; strlen(regs[i]->fields[f].name); f++) {
u64 shift = regs[i]->fields[f].shift; u64 shift = regs[i]->fields[f].shift;
u64 mask = 0xfUL << shift; u64 width = regs[i]->fields[f].width ?: 4;
u64 mask = GENMASK_ULL(shift + width - 1, shift);
u64 v; u64 v;
if (find_field(cmdline, regs[i], f, &v)) if (find_field(cmdline, regs[i], f, &v))
@ -152,7 +213,7 @@ static void __init match_options(const char *cmdline)
/* /*
* If an override gets filtered out, advertise * If an override gets filtered out, advertise
* it by setting the value to 0xf, but * it by setting the value to the all-ones while
* clearing the mask... Yes, this is fragile. * clearing the mask... Yes, this is fragile.
*/ */
if (regs[i]->fields[f].filter && if (regs[i]->fields[f].filter &&
@ -234,9 +295,9 @@ static __init void parse_cmdline(void)
} }
/* Keep checkers quiet */ /* Keep checkers quiet */
void init_feature_override(void); void init_feature_override(u64 boot_status);
asmlinkage void __init init_feature_override(void) asmlinkage void __init init_feature_override(u64 boot_status)
{ {
int i; int i;
@ -247,6 +308,8 @@ asmlinkage void __init init_feature_override(void)
} }
} }
__boot_status = boot_status;
parse_cmdline(); parse_cmdline();
for (i = 0; i < ARRAY_SIZE(regs); i++) { for (i = 0; i < ARRAY_SIZE(regs); i++) {

View File

@ -10,11 +10,8 @@
#error This file should only be included in vmlinux.lds.S #error This file should only be included in vmlinux.lds.S
#endif #endif
#ifdef CONFIG_EFI PROVIDE(__efistub_kernel_size = _edata - _text);
PROVIDE(__efistub_primary_entry_offset = primary_entry - _text);
__efistub_kernel_size = _edata - _text;
__efistub_primary_entry_offset = primary_entry - _text;
/* /*
* The EFI stub has its own symbol namespace prefixed by __efistub_, to * The EFI stub has its own symbol namespace prefixed by __efistub_, to
@ -25,31 +22,37 @@ __efistub_primary_entry_offset = primary_entry - _text;
* linked at. The routines below are all implemented in assembler in a * linked at. The routines below are all implemented in assembler in a
* position independent manner * position independent manner
*/ */
__efistub_memcmp = __pi_memcmp; PROVIDE(__efistub_memcmp = __pi_memcmp);
__efistub_memchr = __pi_memchr; PROVIDE(__efistub_memchr = __pi_memchr);
__efistub_memcpy = __pi_memcpy; PROVIDE(__efistub_memcpy = __pi_memcpy);
__efistub_memmove = __pi_memmove; PROVIDE(__efistub_memmove = __pi_memmove);
__efistub_memset = __pi_memset; PROVIDE(__efistub_memset = __pi_memset);
__efistub_strlen = __pi_strlen; PROVIDE(__efistub_strlen = __pi_strlen);
__efistub_strnlen = __pi_strnlen; PROVIDE(__efistub_strnlen = __pi_strnlen);
__efistub_strcmp = __pi_strcmp; PROVIDE(__efistub_strcmp = __pi_strcmp);
__efistub_strncmp = __pi_strncmp; PROVIDE(__efistub_strncmp = __pi_strncmp);
__efistub_strrchr = __pi_strrchr; PROVIDE(__efistub_strrchr = __pi_strrchr);
__efistub_dcache_clean_poc = __pi_dcache_clean_poc; PROVIDE(__efistub_dcache_clean_poc = __pi_dcache_clean_poc);
#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS) PROVIDE(__efistub__text = _text);
__efistub___memcpy = __pi_memcpy; PROVIDE(__efistub__end = _end);
__efistub___memmove = __pi_memmove; PROVIDE(__efistub__edata = _edata);
__efistub___memset = __pi_memset; PROVIDE(__efistub_screen_info = screen_info);
#endif PROVIDE(__efistub__ctype = _ctype);
__efistub__text = _text; /*
__efistub__end = _end; * The __ prefixed memcpy/memset/memmove symbols are provided by KASAN, which
__efistub__edata = _edata; * instruments the conventional ones. Therefore, any references from the EFI
__efistub_screen_info = screen_info; * stub or other position independent, low level C code should be redirected to
__efistub__ctype = _ctype; * the non-instrumented versions as well.
*/
PROVIDE(__efistub___memcpy = __pi_memcpy);
PROVIDE(__efistub___memmove = __pi_memmove);
PROVIDE(__efistub___memset = __pi_memset);
#endif PROVIDE(__pi___memcpy = __pi_memcpy);
PROVIDE(__pi___memmove = __pi_memmove);
PROVIDE(__pi___memset = __pi_memset);
#ifdef CONFIG_KVM #ifdef CONFIG_KVM

View File

@ -13,7 +13,6 @@
#include <linux/pgtable.h> #include <linux/pgtable.h>
#include <linux/random.h> #include <linux/random.h>
#include <asm/cacheflush.h>
#include <asm/fixmap.h> #include <asm/fixmap.h>
#include <asm/kernel-pgtable.h> #include <asm/kernel-pgtable.h>
#include <asm/memory.h> #include <asm/memory.h>
@ -21,128 +20,45 @@
#include <asm/sections.h> #include <asm/sections.h>
#include <asm/setup.h> #include <asm/setup.h>
enum kaslr_status {
KASLR_ENABLED,
KASLR_DISABLED_CMDLINE,
KASLR_DISABLED_NO_SEED,
KASLR_DISABLED_FDT_REMAP,
};
static enum kaslr_status __initdata kaslr_status;
u64 __ro_after_init module_alloc_base; u64 __ro_after_init module_alloc_base;
u16 __initdata memstart_offset_seed; u16 __initdata memstart_offset_seed;
static __init u64 get_kaslr_seed(void *fdt)
{
int node, len;
fdt64_t *prop;
u64 ret;
node = fdt_path_offset(fdt, "/chosen");
if (node < 0)
return 0;
prop = fdt_getprop_w(fdt, node, "kaslr-seed", &len);
if (!prop || len != sizeof(u64))
return 0;
ret = fdt64_to_cpu(*prop);
*prop = 0;
return ret;
}
struct arm64_ftr_override kaslr_feature_override __initdata; struct arm64_ftr_override kaslr_feature_override __initdata;
/* static int __init kaslr_init(void)
* This routine will be executed with the kernel mapped at its default virtual
* address, and if it returns successfully, the kernel will be remapped, and
* start_kernel() will be executed from a randomized virtual offset. The
* relocation will result in all absolute references (e.g., static variables
* containing function pointers) to be reinitialized, and zero-initialized
* .bss variables will be reset to 0.
*/
u64 __init kaslr_early_init(void)
{ {
void *fdt; u64 module_range;
u64 seed, offset, mask, module_range; u32 seed;
unsigned long raw;
/* /*
* Set a reasonable default for module_alloc_base in case * Set a reasonable default for module_alloc_base in case
* we end up running with module randomization disabled. * we end up running with module randomization disabled.
*/ */
module_alloc_base = (u64)_etext - MODULES_VSIZE; module_alloc_base = (u64)_etext - MODULES_VSIZE;
dcache_clean_inval_poc((unsigned long)&module_alloc_base,
(unsigned long)&module_alloc_base +
sizeof(module_alloc_base));
/*
* Try to map the FDT early. If this fails, we simply bail,
* and proceed with KASLR disabled. We will make another
* attempt at mapping the FDT in setup_machine()
*/
fdt = get_early_fdt_ptr();
if (!fdt) {
kaslr_status = KASLR_DISABLED_FDT_REMAP;
return 0;
}
/*
* Retrieve (and wipe) the seed from the FDT
*/
seed = get_kaslr_seed(fdt);
/*
* Check if 'nokaslr' appears on the command line, and
* return 0 if that is the case.
*/
if (kaslr_feature_override.val & kaslr_feature_override.mask & 0xf) { if (kaslr_feature_override.val & kaslr_feature_override.mask & 0xf) {
kaslr_status = KASLR_DISABLED_CMDLINE; pr_info("KASLR disabled on command line\n");
return 0; return 0;
} }
/* if (!kaslr_offset()) {
* Mix in any entropy obtainable architecturally if enabled pr_warn("KASLR disabled due to lack of seed\n");
* and supported.
*/
if (arch_get_random_seed_long_early(&raw))
seed ^= raw;
if (!seed) {
kaslr_status = KASLR_DISABLED_NO_SEED;
return 0; return 0;
} }
pr_info("KASLR enabled\n");
/* /*
* OK, so we are proceeding with KASLR enabled. Calculate a suitable * KASAN without KASAN_VMALLOC does not expect the module region to
* kernel image offset from the seed. Let's place the kernel in the * intersect the vmalloc region, since shadow memory is allocated for
* middle half of the VMALLOC area (VA_BITS_MIN - 2), and stay clear of * each module at load time, whereas the vmalloc region will already be
* the lower and upper quarters to avoid colliding with other * shadowed by KASAN zero pages.
* allocations.
* Even if we could randomize at page granularity for 16k and 64k pages,
* let's always round to 2 MB so we don't interfere with the ability to
* map using contiguous PTEs
*/ */
mask = ((1UL << (VA_BITS_MIN - 2)) - 1) & ~(SZ_2M - 1); BUILD_BUG_ON((IS_ENABLED(CONFIG_KASAN_GENERIC) ||
offset = BIT(VA_BITS_MIN - 3) + (seed & mask); IS_ENABLED(CONFIG_KASAN_SW_TAGS)) &&
!IS_ENABLED(CONFIG_KASAN_VMALLOC));
/* use the top 16 bits to randomize the linear region */ seed = get_random_u32();
memstart_offset_seed = seed >> 48;
if (!IS_ENABLED(CONFIG_KASAN_VMALLOC) &&
(IS_ENABLED(CONFIG_KASAN_GENERIC) ||
IS_ENABLED(CONFIG_KASAN_SW_TAGS)))
/*
* KASAN without KASAN_VMALLOC does not expect the module region
* to intersect the vmalloc region, since shadow memory is
* allocated for each module at load time, whereas the vmalloc
* region is shadowed by KASAN zero pages. So keep modules
* out of the vmalloc region if KASAN is enabled without
* KASAN_VMALLOC, and put the kernel well within 4 GB of the
* module region.
*/
return offset % SZ_2G;
if (IS_ENABLED(CONFIG_RANDOMIZE_MODULE_REGION_FULL)) { if (IS_ENABLED(CONFIG_RANDOMIZE_MODULE_REGION_FULL)) {
/* /*
@ -154,8 +70,7 @@ u64 __init kaslr_early_init(void)
* resolved normally.) * resolved normally.)
*/ */
module_range = SZ_2G - (u64)(_end - _stext); module_range = SZ_2G - (u64)(_end - _stext);
module_alloc_base = max((u64)_end + offset - SZ_2G, module_alloc_base = max((u64)_end - SZ_2G, (u64)MODULES_VADDR);
(u64)MODULES_VADDR);
} else { } else {
/* /*
* Randomize the module region by setting module_alloc_base to * Randomize the module region by setting module_alloc_base to
@ -167,40 +82,12 @@ u64 __init kaslr_early_init(void)
* when ARM64_MODULE_PLTS is enabled. * when ARM64_MODULE_PLTS is enabled.
*/ */
module_range = MODULES_VSIZE - (u64)(_etext - _stext); module_range = MODULES_VSIZE - (u64)(_etext - _stext);
module_alloc_base = (u64)_etext + offset - MODULES_VSIZE;
} }
/* use the lower 21 bits to randomize the base of the module region */ /* use the lower 21 bits to randomize the base of the module region */
module_alloc_base += (module_range * (seed & ((1 << 21) - 1))) >> 21; module_alloc_base += (module_range * (seed & ((1 << 21) - 1))) >> 21;
module_alloc_base &= PAGE_MASK; module_alloc_base &= PAGE_MASK;
dcache_clean_inval_poc((unsigned long)&module_alloc_base,
(unsigned long)&module_alloc_base +
sizeof(module_alloc_base));
dcache_clean_inval_poc((unsigned long)&memstart_offset_seed,
(unsigned long)&memstart_offset_seed +
sizeof(memstart_offset_seed));
return offset;
}
static int __init kaslr_init(void)
{
switch (kaslr_status) {
case KASLR_ENABLED:
pr_info("KASLR enabled\n");
break;
case KASLR_DISABLED_CMDLINE:
pr_info("KASLR disabled on command line\n");
break;
case KASLR_DISABLED_NO_SEED:
pr_warn("KASLR disabled due to lack of seed\n");
break;
case KASLR_DISABLED_FDT_REMAP:
pr_warn("KASLR disabled due to FDT remapping failure\n");
break;
}
return 0; return 0;
} }
core_initcall(kaslr_init) subsys_initcall(kaslr_init)

View File

@ -0,0 +1,33 @@
# SPDX-License-Identifier: GPL-2.0
# Copyright 2022 Google LLC
KBUILD_CFLAGS := $(subst $(CC_FLAGS_FTRACE),,$(KBUILD_CFLAGS)) -fpie \
-Os -DDISABLE_BRANCH_PROFILING $(DISABLE_STACKLEAK_PLUGIN) \
$(call cc-option,-mbranch-protection=none) \
-I$(srctree)/scripts/dtc/libfdt -fno-stack-protector \
-include $(srctree)/include/linux/hidden.h \
-D__DISABLE_EXPORTS -ffreestanding -D__NO_FORTIFY \
$(call cc-option,-fno-addrsig)
# remove SCS flags from all objects in this directory
KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_SCS), $(KBUILD_CFLAGS))
# disable LTO
KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_LTO), $(KBUILD_CFLAGS))
GCOV_PROFILE := n
KASAN_SANITIZE := n
KCSAN_SANITIZE := n
UBSAN_SANITIZE := n
KCOV_INSTRUMENT := n
$(obj)/%.pi.o: OBJCOPYFLAGS := --prefix-symbols=__pi_ \
--remove-section=.note.gnu.property \
--prefix-alloc-sections=.init
$(obj)/%.pi.o: $(obj)/%.o FORCE
$(call if_changed,objcopy)
$(obj)/lib-%.o: $(srctree)/lib/%.c FORCE
$(call if_changed_rule,cc_o_c)
obj-y := kaslr_early.pi.o lib-fdt.pi.o lib-fdt_ro.pi.o
extra-y := $(patsubst %.pi.o,%.o,$(obj-y))

View File

@ -0,0 +1,112 @@
// SPDX-License-Identifier: GPL-2.0-only
// Copyright 2022 Google LLC
// Author: Ard Biesheuvel <ardb@google.com>
// NOTE: code in this file runs *very* early, and is not permitted to use
// global variables or anything that relies on absolute addressing.
#include <linux/libfdt.h>
#include <linux/init.h>
#include <linux/linkage.h>
#include <linux/types.h>
#include <linux/sizes.h>
#include <linux/string.h>
#include <asm/archrandom.h>
#include <asm/memory.h>
/* taken from lib/string.c */
static char *__strstr(const char *s1, const char *s2)
{
size_t l1, l2;
l2 = strlen(s2);
if (!l2)
return (char *)s1;
l1 = strlen(s1);
while (l1 >= l2) {
l1--;
if (!memcmp(s1, s2, l2))
return (char *)s1;
s1++;
}
return NULL;
}
static bool cmdline_contains_nokaslr(const u8 *cmdline)
{
const u8 *str;
str = __strstr(cmdline, "nokaslr");
return str == cmdline || (str > cmdline && *(str - 1) == ' ');
}
static bool is_kaslr_disabled_cmdline(void *fdt)
{
if (!IS_ENABLED(CONFIG_CMDLINE_FORCE)) {
int node;
const u8 *prop;
node = fdt_path_offset(fdt, "/chosen");
if (node < 0)
goto out;
prop = fdt_getprop(fdt, node, "bootargs", NULL);
if (!prop)
goto out;
if (cmdline_contains_nokaslr(prop))
return true;
if (IS_ENABLED(CONFIG_CMDLINE_EXTEND))
goto out;
return false;
}
out:
return cmdline_contains_nokaslr(CONFIG_CMDLINE);
}
static u64 get_kaslr_seed(void *fdt)
{
int node, len;
fdt64_t *prop;
u64 ret;
node = fdt_path_offset(fdt, "/chosen");
if (node < 0)
return 0;
prop = fdt_getprop_w(fdt, node, "kaslr-seed", &len);
if (!prop || len != sizeof(u64))
return 0;
ret = fdt64_to_cpu(*prop);
*prop = 0;
return ret;
}
asmlinkage u64 kaslr_early_init(void *fdt)
{
u64 seed;
if (is_kaslr_disabled_cmdline(fdt))
return 0;
seed = get_kaslr_seed(fdt);
if (!seed) {
#ifdef CONFIG_ARCH_RANDOM
if (!__early_cpu_has_rndr() ||
!__arm64_rndr((unsigned long *)&seed))
#endif
return 0;
}
/*
* OK, so we are proceeding with KASLR enabled. Calculate a suitable
* kernel image offset from the seed. Let's place the kernel in the
* middle half of the VMALLOC area (VA_BITS_MIN - 2), and stay clear of
* the lower and upper quarters to avoid colliding with other
* allocations.
*/
return BIT(VA_BITS_MIN - 3) + (seed & GENMASK(VA_BITS_MIN - 3, 0));
}

View File

@ -100,10 +100,11 @@ SYM_FUNC_END(__cpu_suspend_enter)
.pushsection ".idmap.text", "awx" .pushsection ".idmap.text", "awx"
SYM_CODE_START(cpu_resume) SYM_CODE_START(cpu_resume)
bl init_kernel_el bl init_kernel_el
bl switch_to_vhe bl finalise_el2
bl __cpu_setup bl __cpu_setup
/* enable the MMU early - so we can access sleep_save_stash by va */ /* enable the MMU early - so we can access sleep_save_stash by va */
adrp x1, swapper_pg_dir adrp x1, swapper_pg_dir
adrp x2, idmap_pg_dir
bl __enable_mmu bl __enable_mmu
ldr x8, =_cpu_resume ldr x8, =_cpu_resume
br x8 br x8

View File

@ -52,7 +52,7 @@ void notrace __cpu_suspend_exit(void)
/* Restore CnP bit in TTBR1_EL1 */ /* Restore CnP bit in TTBR1_EL1 */
if (system_supports_cnp()) if (system_supports_cnp())
cpu_replace_ttbr1(lm_alias(swapper_pg_dir)); cpu_replace_ttbr1(lm_alias(swapper_pg_dir), idmap_pg_dir);
/* /*
* PSTATE was not saved over suspend/resume, re-enable any detected * PSTATE was not saved over suspend/resume, re-enable any detected

View File

@ -199,8 +199,7 @@ SECTIONS
} }
idmap_pg_dir = .; idmap_pg_dir = .;
. += IDMAP_DIR_SIZE; . += PAGE_SIZE;
idmap_pg_end = .;
#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 #ifdef CONFIG_UNMAP_KERNEL_AT_EL0
tramp_pg_dir = .; tramp_pg_dir = .;
@ -236,6 +235,10 @@ SECTIONS
__inittext_end = .; __inittext_end = .;
__initdata_begin = .; __initdata_begin = .;
init_idmap_pg_dir = .;
. += INIT_IDMAP_DIR_SIZE;
init_idmap_pg_end = .;
.init.data : { .init.data : {
INIT_DATA INIT_DATA
INIT_SETUP(16) INIT_SETUP(16)
@ -254,21 +257,17 @@ SECTIONS
HYPERVISOR_RELOC_SECTION HYPERVISOR_RELOC_SECTION
.rela.dyn : ALIGN(8) { .rela.dyn : ALIGN(8) {
__rela_start = .;
*(.rela .rela*) *(.rela .rela*)
__rela_end = .;
} }
__rela_offset = ABSOLUTE(ADDR(.rela.dyn) - KIMAGE_VADDR);
__rela_size = SIZEOF(.rela.dyn);
#ifdef CONFIG_RELR
.relr.dyn : ALIGN(8) { .relr.dyn : ALIGN(8) {
__relr_start = .;
*(.relr.dyn) *(.relr.dyn)
__relr_end = .;
} }
__relr_offset = ABSOLUTE(ADDR(.relr.dyn) - KIMAGE_VADDR);
__relr_size = SIZEOF(.relr.dyn);
#endif
. = ALIGN(SEGMENT_ALIGN); . = ALIGN(SEGMENT_ALIGN);
__initdata_end = .; __initdata_end = .;
__init_end = .; __init_end = .;

View File

@ -236,7 +236,7 @@ static void __init kasan_init_shadow(void)
*/ */
memcpy(tmp_pg_dir, swapper_pg_dir, sizeof(tmp_pg_dir)); memcpy(tmp_pg_dir, swapper_pg_dir, sizeof(tmp_pg_dir));
dsb(ishst); dsb(ishst);
cpu_replace_ttbr1(lm_alias(tmp_pg_dir)); cpu_replace_ttbr1(lm_alias(tmp_pg_dir), idmap_pg_dir);
clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END); clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END);
@ -280,7 +280,7 @@ static void __init kasan_init_shadow(void)
PAGE_KERNEL_RO)); PAGE_KERNEL_RO));
memset(kasan_early_shadow_page, KASAN_SHADOW_INIT, PAGE_SIZE); memset(kasan_early_shadow_page, KASAN_SHADOW_INIT, PAGE_SIZE);
cpu_replace_ttbr1(lm_alias(swapper_pg_dir)); cpu_replace_ttbr1(lm_alias(swapper_pg_dir), idmap_pg_dir);
} }
static void __init kasan_init_depth(void) static void __init kasan_init_depth(void)

View File

@ -43,15 +43,27 @@
#define NO_CONT_MAPPINGS BIT(1) #define NO_CONT_MAPPINGS BIT(1)
#define NO_EXEC_MAPPINGS BIT(2) /* assumes FEAT_HPDS is not used */ #define NO_EXEC_MAPPINGS BIT(2) /* assumes FEAT_HPDS is not used */
u64 idmap_t0sz = TCR_T0SZ(VA_BITS_MIN); int idmap_t0sz __ro_after_init;
u64 idmap_ptrs_per_pgd = PTRS_PER_PGD;
u64 __section(".mmuoff.data.write") vabits_actual; #if VA_BITS > 48
u64 vabits_actual __ro_after_init = VA_BITS_MIN;
EXPORT_SYMBOL(vabits_actual); EXPORT_SYMBOL(vabits_actual);
#endif
u64 kimage_vaddr __ro_after_init = (u64)&_text;
EXPORT_SYMBOL(kimage_vaddr);
u64 kimage_voffset __ro_after_init; u64 kimage_voffset __ro_after_init;
EXPORT_SYMBOL(kimage_voffset); EXPORT_SYMBOL(kimage_voffset);
u32 __boot_cpu_mode[] = { BOOT_CPU_MODE_EL2, BOOT_CPU_MODE_EL1 };
/*
* The booting CPU updates the failed status @__early_cpu_boot_status,
* with MMU turned off.
*/
long __section(".mmuoff.data.write") __early_cpu_boot_status;
/* /*
* Empty_zero_page is a special page that is used for zero-initialized data * Empty_zero_page is a special page that is used for zero-initialized data
* and COW. * and COW.
@ -763,22 +775,57 @@ static void __init map_kernel(pgd_t *pgdp)
kasan_copy_shadow(pgdp); kasan_copy_shadow(pgdp);
} }
static void __init create_idmap(void)
{
u64 start = __pa_symbol(__idmap_text_start);
u64 size = __pa_symbol(__idmap_text_end) - start;
pgd_t *pgd = idmap_pg_dir;
u64 pgd_phys;
/* check if we need an additional level of translation */
if (VA_BITS < 48 && idmap_t0sz < (64 - VA_BITS_MIN)) {
pgd_phys = early_pgtable_alloc(PAGE_SHIFT);
set_pgd(&idmap_pg_dir[start >> VA_BITS],
__pgd(pgd_phys | P4D_TYPE_TABLE));
pgd = __va(pgd_phys);
}
__create_pgd_mapping(pgd, start, start, size, PAGE_KERNEL_ROX,
early_pgtable_alloc, 0);
if (IS_ENABLED(CONFIG_UNMAP_KERNEL_AT_EL0)) {
extern u32 __idmap_kpti_flag;
u64 pa = __pa_symbol(&__idmap_kpti_flag);
/*
* The KPTI G-to-nG conversion code needs a read-write mapping
* of its synchronization flag in the ID map.
*/
__create_pgd_mapping(pgd, pa, pa, sizeof(u32), PAGE_KERNEL,
early_pgtable_alloc, 0);
}
}
void __init paging_init(void) void __init paging_init(void)
{ {
pgd_t *pgdp = pgd_set_fixmap(__pa_symbol(swapper_pg_dir)); pgd_t *pgdp = pgd_set_fixmap(__pa_symbol(swapper_pg_dir));
extern pgd_t init_idmap_pg_dir[];
idmap_t0sz = 63UL - __fls(__pa_symbol(_end) | GENMASK(VA_BITS_MIN - 1, 0));
map_kernel(pgdp); map_kernel(pgdp);
map_mem(pgdp); map_mem(pgdp);
pgd_clear_fixmap(); pgd_clear_fixmap();
cpu_replace_ttbr1(lm_alias(swapper_pg_dir)); cpu_replace_ttbr1(lm_alias(swapper_pg_dir), init_idmap_pg_dir);
init_mm.pgd = swapper_pg_dir; init_mm.pgd = swapper_pg_dir;
memblock_phys_free(__pa_symbol(init_pg_dir), memblock_phys_free(__pa_symbol(init_pg_dir),
__pa_symbol(init_pg_end) - __pa_symbol(init_pg_dir)); __pa_symbol(init_pg_end) - __pa_symbol(init_pg_dir));
memblock_allow_resize(); memblock_allow_resize();
create_idmap();
} }
/* /*

View File

@ -249,8 +249,10 @@ SYM_FUNC_END(idmap_cpu_replace_ttbr1)
* *
* Called exactly once from stop_machine context by each CPU found during boot. * Called exactly once from stop_machine context by each CPU found during boot.
*/ */
__idmap_kpti_flag: .pushsection ".data", "aw", %progbits
.long 1 SYM_DATA(__idmap_kpti_flag, .long 1)
.popsection
SYM_FUNC_START(idmap_kpti_install_ng_mappings) SYM_FUNC_START(idmap_kpti_install_ng_mappings)
cpu .req w0 cpu .req w0
temp_pte .req x0 temp_pte .req x0
@ -273,7 +275,7 @@ SYM_FUNC_START(idmap_kpti_install_ng_mappings)
mov x5, x3 // preserve temp_pte arg mov x5, x3 // preserve temp_pte arg
mrs swapper_ttb, ttbr1_el1 mrs swapper_ttb, ttbr1_el1
adr flag_ptr, __idmap_kpti_flag adr_l flag_ptr, __idmap_kpti_flag
cbnz cpu, __idmap_kpti_secondary cbnz cpu, __idmap_kpti_secondary
@ -396,6 +398,8 @@ SYM_FUNC_END(idmap_kpti_install_ng_mappings)
* *
* Initialise the processor for turning the MMU on. * Initialise the processor for turning the MMU on.
* *
* Input:
* x0 - actual number of VA bits (ignored unless VA_BITS > 48)
* Output: * Output:
* Return in x0 the value of the SCTLR_EL1 register. * Return in x0 the value of the SCTLR_EL1 register.
*/ */
@ -465,12 +469,11 @@ SYM_FUNC_START(__cpu_setup)
tcr_clear_errata_bits tcr, x9, x5 tcr_clear_errata_bits tcr, x9, x5
#ifdef CONFIG_ARM64_VA_BITS_52 #ifdef CONFIG_ARM64_VA_BITS_52
ldr_l x9, vabits_actual sub x9, xzr, x0
sub x9, xzr, x9
add x9, x9, #64 add x9, x9, #64
tcr_set_t1sz tcr, x9 tcr_set_t1sz tcr, x9
#else #else
ldr_l x9, idmap_t0sz idmap_get_t0sz x9
#endif #endif
tcr_set_t0sz tcr, x9 tcr_set_t0sz tcr, x9