diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index fa6f2174276b..8c5643e2eea4 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -211,6 +211,11 @@ static inline bool __kvm_cpu_uses_extended_idmap(void) return false; } +static inline unsigned long __kvm_idmap_ptrs_per_pgd(void) +{ + return PTRS_PER_PGD; +} + static inline void __kvm_extend_hypmap(pgd_t *boot_hyp_pgd, pgd_t *hyp_pgd, pgd_t *merged_hyp_pgd, @@ -221,6 +226,8 @@ static inline unsigned int kvm_get_vmid_bits(void) return 8; } +#define kvm_phys_to_vttbr(addr) (addr) + #endif /* !__ASSEMBLY__ */ #endif /* __ARM_KVM_MMU_H__ */ diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 3af1657fcac3..9b1330806f39 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -639,6 +639,35 @@ config ARM64_VA_BITS default 47 if ARM64_VA_BITS_47 default 48 if ARM64_VA_BITS_48 +choice + prompt "Physical address space size" + default ARM64_PA_BITS_48 + help + Choose the maximum physical address range that the kernel will + support. + +config ARM64_PA_BITS_48 + bool "48-bit" + +config ARM64_PA_BITS_52 + bool "52-bit (ARMv8.2)" + depends on ARM64_64K_PAGES + depends on ARM64_PAN || !ARM64_SW_TTBR0_PAN + help + Enable support for a 52-bit physical address space, introduced as + part of the ARMv8.2-LPA extension. + + With this enabled, the kernel will also continue to work on CPUs that + do not support ARMv8.2-LPA, but with some added memory overhead (and + minor performance overhead). + +endchoice + +config ARM64_PA_BITS + int + default 48 if ARM64_PA_BITS_48 + default 52 if ARM64_PA_BITS_52 + config CPU_BIG_ENDIAN bool "Build big-endian kernel" help diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index c45bc94f15d0..215a49213507 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -343,10 +343,26 @@ alternative_endif * tcr_set_idmap_t0sz - update TCR.T0SZ so that we can load the ID map */ .macro tcr_set_idmap_t0sz, valreg, tmpreg -#ifndef CONFIG_ARM64_VA_BITS_48 ldr_l \tmpreg, idmap_t0sz bfi \valreg, \tmpreg, #TCR_T0SZ_OFFSET, #TCR_TxSZ_WIDTH -#endif + .endm + +/* + * tcr_compute_pa_size - set TCR.(I)PS to the highest supported + * ID_AA64MMFR0_EL1.PARange value + * + * tcr: register with the TCR_ELx value to be updated + * pos: PARange bitfield position + * tmp{0,1}: temporary registers + */ + .macro tcr_compute_pa_size, tcr, pos, tmp0, tmp1 + mrs \tmp0, ID_AA64MMFR0_EL1 + // Narrow PARange to fit the PS field in TCR_ELx + ubfx \tmp0, \tmp0, #ID_AA64MMFR0_PARANGE_SHIFT, #3 + mov \tmp1, #ID_AA64MMFR0_PARANGE_MAX + cmp \tmp0, \tmp1 + csel \tmp0, \tmp1, \tmp0, hi + bfi \tcr, \tmp0, \pos, #3 .endm /* @@ -489,4 +505,20 @@ alternative_else_nop_endif #endif .endm +/* + * Arrange a physical address in a TTBR register, taking care of 52-bit + * addresses. + * + * phys: physical address, preserved + * ttbr: returns the TTBR value + */ + .macro phys_to_ttbr, phys, ttbr +#ifdef CONFIG_ARM64_PA_BITS_52 + orr \ttbr, \phys, \phys, lsr #46 + and \ttbr, \ttbr, #TTBR_BADDR_MASK_52 +#else + mov \ttbr, \phys +#endif + .endm + #endif /* __ASM_ASSEMBLER_H */ diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 672c8684d5c2..b33bdb5eeb3d 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -273,15 +273,26 @@ void kvm_toggle_cache(struct kvm_vcpu *vcpu, bool was_enabled); static inline bool __kvm_cpu_uses_extended_idmap(void) { - return __cpu_uses_extended_idmap(); + return __cpu_uses_extended_idmap_level(); } +static inline unsigned long __kvm_idmap_ptrs_per_pgd(void) +{ + return idmap_ptrs_per_pgd; +} + +/* + * Can't use pgd_populate here, because the extended idmap adds an extra level + * above CONFIG_PGTABLE_LEVELS (which is 2 or 3 if we're using the extended + * idmap), and pgd_populate is only available if CONFIG_PGTABLE_LEVELS = 4. + */ static inline void __kvm_extend_hypmap(pgd_t *boot_hyp_pgd, pgd_t *hyp_pgd, pgd_t *merged_hyp_pgd, unsigned long hyp_idmap_start) { int idmap_idx; + u64 pgd_addr; /* * Use the first entry to access the HYP mappings. It is @@ -289,7 +300,8 @@ static inline void __kvm_extend_hypmap(pgd_t *boot_hyp_pgd, * extended idmap. */ VM_BUG_ON(pgd_val(merged_hyp_pgd[0])); - merged_hyp_pgd[0] = __pgd(__pa(hyp_pgd) | PMD_TYPE_TABLE); + pgd_addr = __phys_to_pgd_val(__pa(hyp_pgd)); + merged_hyp_pgd[0] = __pgd(pgd_addr | PMD_TYPE_TABLE); /* * Create another extended level entry that points to the boot HYP map, @@ -299,7 +311,8 @@ static inline void __kvm_extend_hypmap(pgd_t *boot_hyp_pgd, */ idmap_idx = hyp_idmap_start >> VA_BITS; VM_BUG_ON(pgd_val(merged_hyp_pgd[idmap_idx])); - merged_hyp_pgd[idmap_idx] = __pgd(__pa(boot_hyp_pgd) | PMD_TYPE_TABLE); + pgd_addr = __phys_to_pgd_val(__pa(boot_hyp_pgd)); + merged_hyp_pgd[idmap_idx] = __pgd(pgd_addr | PMD_TYPE_TABLE); } static inline unsigned int kvm_get_vmid_bits(void) @@ -309,5 +322,7 @@ static inline unsigned int kvm_get_vmid_bits(void) return (cpuid_feature_extract_unsigned_field(reg, ID_AA64MMFR1_VMIDBITS_SHIFT) == 2) ? 16 : 8; } +#define kvm_phys_to_vttbr(addr) phys_to_ttbr(addr) + #endif /* __ASSEMBLY__ */ #endif /* __ARM64_KVM_MMU_H__ */ diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index da29766a181c..8f80fcb49252 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -49,7 +49,7 @@ static inline void contextidr_thread_switch(struct task_struct *next) */ static inline void cpu_set_reserved_ttbr0(void) { - unsigned long ttbr = __pa_symbol(empty_zero_page); + unsigned long ttbr = phys_to_ttbr(__pa_symbol(empty_zero_page)); write_sysreg(ttbr, ttbr0_el1); isb(); @@ -68,6 +68,7 @@ static inline void cpu_switch_mm(pgd_t *pgd, struct mm_struct *mm) * physical memory, in which case it will be smaller. */ extern u64 idmap_t0sz; +extern u64 idmap_ptrs_per_pgd; static inline bool __cpu_uses_extended_idmap(void) { @@ -75,6 +76,15 @@ static inline bool __cpu_uses_extended_idmap(void) unlikely(idmap_t0sz != TCR_T0SZ(VA_BITS))); } +/* + * True if the extended ID map requires an extra level of translation table + * to be configured. + */ +static inline bool __cpu_uses_extended_idmap_level(void) +{ + return ARM64_HW_PGTABLE_LEVELS((64 - idmap_t0sz)) > CONFIG_PGTABLE_LEVELS; +} + /* * Set TCR.T0SZ to its default value (based on VA_BITS) */ diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h index 5ca6a573a701..e9d9f1b006ef 100644 --- a/arch/arm64/include/asm/pgalloc.h +++ b/arch/arm64/include/asm/pgalloc.h @@ -44,7 +44,7 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) static inline void __pud_populate(pud_t *pud, phys_addr_t pmd, pudval_t prot) { - set_pud(pud, __pud(pmd | prot)); + set_pud(pud, __pud(__phys_to_pud_val(pmd) | prot)); } static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) @@ -73,7 +73,7 @@ static inline void pud_free(struct mm_struct *mm, pud_t *pud) static inline void __pgd_populate(pgd_t *pgdp, phys_addr_t pud, pgdval_t prot) { - set_pgd(pgdp, __pgd(pud | prot)); + set_pgd(pgdp, __pgd(__phys_to_pgd_val(pud) | prot)); } static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud) @@ -129,7 +129,7 @@ static inline void pte_free(struct mm_struct *mm, pgtable_t pte) static inline void __pmd_populate(pmd_t *pmdp, phys_addr_t pte, pmdval_t prot) { - set_pmd(pmdp, __pmd(pte | prot)); + set_pmd(pmdp, __pmd(__phys_to_pmd_val(pte) | prot)); } /* diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index 8df4cb6ac6f7..e1f6679d763e 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -16,6 +16,8 @@ #ifndef __ASM_PGTABLE_HWDEF_H #define __ASM_PGTABLE_HWDEF_H +#include + /* * Number of page-table levels required to address 'va_bits' wide * address, without section mapping. We resolve the top (va_bits - PAGE_SHIFT) @@ -166,6 +168,14 @@ #define PTE_UXN (_AT(pteval_t, 1) << 54) /* User XN */ #define PTE_HYP_XN (_AT(pteval_t, 1) << 54) /* HYP XN */ +#define PTE_ADDR_LOW (((_AT(pteval_t, 1) << (48 - PAGE_SHIFT)) - 1) << PAGE_SHIFT) +#ifdef CONFIG_ARM64_PA_BITS_52 +#define PTE_ADDR_HIGH (_AT(pteval_t, 0xf) << 12) +#define PTE_ADDR_MASK (PTE_ADDR_LOW | PTE_ADDR_HIGH) +#else +#define PTE_ADDR_MASK PTE_ADDR_LOW +#endif + /* * AttrIndx[2:0] encoding (mapping attributes defined in the MAIR* registers). */ @@ -196,7 +206,7 @@ /* * Highest possible physical address supported. */ -#define PHYS_MASK_SHIFT (48) +#define PHYS_MASK_SHIFT (CONFIG_ARM64_PA_BITS) #define PHYS_MASK ((UL(1) << PHYS_MASK_SHIFT) - 1) /* @@ -272,10 +282,23 @@ #define TCR_TG1_4K (UL(2) << TCR_TG1_SHIFT) #define TCR_TG1_64K (UL(3) << TCR_TG1_SHIFT) +#define TCR_IPS_SHIFT 32 +#define TCR_IPS_MASK (UL(7) << TCR_IPS_SHIFT) #define TCR_A1 (UL(1) << 22) #define TCR_ASID16 (UL(1) << 36) #define TCR_TBI0 (UL(1) << 37) #define TCR_HA (UL(1) << 39) #define TCR_HD (UL(1) << 40) +/* + * TTBR. + */ +#ifdef CONFIG_ARM64_PA_BITS_52 +/* + * This should be GENMASK_ULL(47, 2). + * TTBR_ELx[1] is RES0 in this configuration. + */ +#define TTBR_BADDR_MASK_52 (((UL(1) << 46) - 1) << 2) +#endif + #endif diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 774003b247ad..bfa237e892f1 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -57,9 +57,22 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; #define pte_ERROR(pte) __pte_error(__FILE__, __LINE__, pte_val(pte)) -#define pte_pfn(pte) ((pte_val(pte) & PHYS_MASK) >> PAGE_SHIFT) +/* + * Macros to convert between a physical address and its placement in a + * page table entry, taking care of 52-bit addresses. + */ +#ifdef CONFIG_ARM64_PA_BITS_52 +#define __pte_to_phys(pte) \ + ((pte_val(pte) & PTE_ADDR_LOW) | ((pte_val(pte) & PTE_ADDR_HIGH) << 36)) +#define __phys_to_pte_val(phys) (((phys) | ((phys) >> 36)) & PTE_ADDR_MASK) +#else +#define __pte_to_phys(pte) (pte_val(pte) & PTE_ADDR_MASK) +#define __phys_to_pte_val(phys) (phys) +#endif -#define pfn_pte(pfn,prot) (__pte(((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot))) +#define pte_pfn(pte) (__pte_to_phys(pte) >> PAGE_SHIFT) +#define pfn_pte(pfn,prot) \ + __pte(__phys_to_pte_val((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot)) #define pte_none(pte) (!pte_val(pte)) #define pte_clear(mm,addr,ptep) set_pte(ptep, __pte(0)) @@ -284,6 +297,11 @@ static inline int pte_same(pte_t pte_a, pte_t pte_b) #define __HAVE_ARCH_PTE_SPECIAL +static inline pte_t pgd_pte(pgd_t pgd) +{ + return __pte(pgd_val(pgd)); +} + static inline pte_t pud_pte(pud_t pud) { return __pte(pud_val(pud)); @@ -349,15 +367,24 @@ static inline int pmd_protnone(pmd_t pmd) #define pmd_mkhuge(pmd) (__pmd(pmd_val(pmd) & ~PMD_TABLE_BIT)) -#define pmd_pfn(pmd) (((pmd_val(pmd) & PMD_MASK) & PHYS_MASK) >> PAGE_SHIFT) -#define pfn_pmd(pfn,prot) (__pmd(((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot))) +#define __pmd_to_phys(pmd) __pte_to_phys(pmd_pte(pmd)) +#define __phys_to_pmd_val(phys) __phys_to_pte_val(phys) +#define pmd_pfn(pmd) ((__pmd_to_phys(pmd) & PMD_MASK) >> PAGE_SHIFT) +#define pfn_pmd(pfn,prot) __pmd(__phys_to_pmd_val((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot)) #define mk_pmd(page,prot) pfn_pmd(page_to_pfn(page),prot) #define pud_write(pud) pte_write(pud_pte(pud)) -#define pud_pfn(pud) (((pud_val(pud) & PUD_MASK) & PHYS_MASK) >> PAGE_SHIFT) + +#define __pud_to_phys(pud) __pte_to_phys(pud_pte(pud)) +#define __phys_to_pud_val(phys) __phys_to_pte_val(phys) +#define pud_pfn(pud) ((__pud_to_phys(pud) & PUD_MASK) >> PAGE_SHIFT) +#define pfn_pud(pfn,prot) __pud(__phys_to_pud_val((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot)) #define set_pmd_at(mm, addr, pmdp, pmd) set_pte_at(mm, addr, (pte_t *)pmdp, pmd_pte(pmd)) +#define __pgd_to_phys(pgd) __pte_to_phys(pgd_pte(pgd)) +#define __phys_to_pgd_val(phys) __phys_to_pte_val(phys) + #define __pgprot_modify(prot,mask,bits) \ __pgprot((pgprot_val(prot) & ~(mask)) | (bits)) @@ -408,7 +435,7 @@ static inline void pmd_clear(pmd_t *pmdp) static inline phys_addr_t pmd_page_paddr(pmd_t pmd) { - return pmd_val(pmd) & PHYS_MASK & (s32)PAGE_MASK; + return __pmd_to_phys(pmd); } /* Find an entry in the third-level page table. */ @@ -426,7 +453,7 @@ static inline phys_addr_t pmd_page_paddr(pmd_t pmd) #define pte_set_fixmap_offset(pmd, addr) pte_set_fixmap(pte_offset_phys(pmd, addr)) #define pte_clear_fixmap() clear_fixmap(FIX_PTE) -#define pmd_page(pmd) pfn_to_page(__phys_to_pfn(pmd_val(pmd) & PHYS_MASK)) +#define pmd_page(pmd) pfn_to_page(__phys_to_pfn(__pmd_to_phys(pmd))) /* use ONLY for statically allocated translation tables */ #define pte_offset_kimg(dir,addr) ((pte_t *)__phys_to_kimg(pte_offset_phys((dir), (addr)))) @@ -459,7 +486,7 @@ static inline void pud_clear(pud_t *pudp) static inline phys_addr_t pud_page_paddr(pud_t pud) { - return pud_val(pud) & PHYS_MASK & (s32)PAGE_MASK; + return __pud_to_phys(pud); } /* Find an entry in the second-level page table. */ @@ -472,7 +499,7 @@ static inline phys_addr_t pud_page_paddr(pud_t pud) #define pmd_set_fixmap_offset(pud, addr) pmd_set_fixmap(pmd_offset_phys(pud, addr)) #define pmd_clear_fixmap() clear_fixmap(FIX_PMD) -#define pud_page(pud) pfn_to_page(__phys_to_pfn(pud_val(pud) & PHYS_MASK)) +#define pud_page(pud) pfn_to_page(__phys_to_pfn(__pud_to_phys(pud))) /* use ONLY for statically allocated translation tables */ #define pmd_offset_kimg(dir,addr) ((pmd_t *)__phys_to_kimg(pmd_offset_phys((dir), (addr)))) @@ -511,7 +538,7 @@ static inline void pgd_clear(pgd_t *pgdp) static inline phys_addr_t pgd_page_paddr(pgd_t pgd) { - return pgd_val(pgd) & PHYS_MASK & (s32)PAGE_MASK; + return __pgd_to_phys(pgd); } /* Find an entry in the frst-level page table. */ @@ -524,7 +551,7 @@ static inline phys_addr_t pgd_page_paddr(pgd_t pgd) #define pud_set_fixmap_offset(pgd, addr) pud_set_fixmap(pud_offset_phys(pgd, addr)) #define pud_clear_fixmap() clear_fixmap(FIX_PUD) -#define pgd_page(pgd) pfn_to_page(__phys_to_pfn(pgd_val(pgd) & PHYS_MASK)) +#define pgd_page(pgd) pfn_to_page(__phys_to_pfn(__pgd_to_phys(pgd))) /* use ONLY for statically allocated translation tables */ #define pud_offset_kimg(dir,addr) ((pud_t *)__phys_to_kimg(pud_offset_phys((dir), (addr)))) @@ -734,6 +761,12 @@ static inline void update_mmu_cache(struct vm_area_struct *vma, #define kc_vaddr_to_offset(v) ((v) & ~VA_START) #define kc_offset_to_vaddr(o) ((o) | VA_START) +#ifdef CONFIG_ARM64_PA_BITS_52 +#define phys_to_ttbr(addr) (((addr) | ((addr) >> 46)) & TTBR_BADDR_MASK_52) +#else +#define phys_to_ttbr(addr) (addr) +#endif + #endif /* !__ASSEMBLY__ */ #endif /* __ASM_PGTABLE_H */ diff --git a/arch/arm64/include/asm/sparsemem.h b/arch/arm64/include/asm/sparsemem.h index 74a9d301819f..b299929fe56c 100644 --- a/arch/arm64/include/asm/sparsemem.h +++ b/arch/arm64/include/asm/sparsemem.h @@ -17,7 +17,7 @@ #define __ASM_SPARSEMEM_H #ifdef CONFIG_SPARSEMEM -#define MAX_PHYSMEM_BITS 48 +#define MAX_PHYSMEM_BITS CONFIG_ARM64_PA_BITS #define SECTION_SIZE_BITS 30 #endif diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 08cc88574659..ec144f480b39 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -471,6 +471,14 @@ #define ID_AA64MMFR0_TGRAN64_SUPPORTED 0x0 #define ID_AA64MMFR0_TGRAN16_NI 0x0 #define ID_AA64MMFR0_TGRAN16_SUPPORTED 0x1 +#define ID_AA64MMFR0_PARANGE_48 0x5 +#define ID_AA64MMFR0_PARANGE_52 0x6 + +#ifdef CONFIG_ARM64_PA_BITS_52 +#define ID_AA64MMFR0_PARANGE_MAX ID_AA64MMFR0_PARANGE_52 +#else +#define ID_AA64MMFR0_PARANGE_MAX ID_AA64MMFR0_PARANGE_48 +#endif /* id_aa64mmfr1 */ #define ID_AA64MMFR1_PAN_SHIFT 20 diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 67e86a0f57ac..66f01869e97c 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -147,6 +147,26 @@ preserve_boot_args: b __inval_dcache_area // tail call ENDPROC(preserve_boot_args) +/* + * Macro to arrange a physical address in a page table entry, taking care of + * 52-bit addresses. + * + * Preserves: phys + * Returns: pte + */ + .macro phys_to_pte, phys, pte +#ifdef CONFIG_ARM64_PA_BITS_52 + /* + * We assume \phys is 64K aligned and this is guaranteed by only + * supporting this configuration with 64K pages. + */ + orr \pte, \phys, \phys, lsr #36 + and \pte, \pte, #PTE_ADDR_MASK +#else + mov \pte, \phys +#endif + .endm + /* * Macro to create a table entry to the next page. * @@ -156,14 +176,16 @@ ENDPROC(preserve_boot_args) * ptrs: #imm pointers per table page * * Preserves: virt - * Corrupts: tmp1, tmp2 + * Corrupts: ptrs, tmp1, tmp2 * Returns: tbl -> next level table page address */ .macro create_table_entry, tbl, virt, shift, ptrs, tmp1, tmp2 - lsr \tmp1, \virt, #\shift - and \tmp1, \tmp1, #\ptrs - 1 // table index - add \tmp2, \tbl, #PAGE_SIZE + add \tmp1, \tbl, #PAGE_SIZE + phys_to_pte \tmp1, \tmp2 orr \tmp2, \tmp2, #PMD_TYPE_TABLE // address of next table and entry type + lsr \tmp1, \virt, #\shift + sub \ptrs, \ptrs, #1 + and \tmp1, \tmp1, \ptrs // table index str \tmp2, [\tbl, \tmp1, lsl #3] add \tbl, \tbl, #PAGE_SIZE // next level table page .endm @@ -173,15 +195,17 @@ ENDPROC(preserve_boot_args) * block entry in the next level (tbl) for the given virtual address. * * Preserves: tbl, next, virt - * Corrupts: tmp1, tmp2 + * Corrupts: ptrs_per_pgd, tmp1, tmp2 */ - .macro create_pgd_entry, tbl, virt, tmp1, tmp2 - create_table_entry \tbl, \virt, PGDIR_SHIFT, PTRS_PER_PGD, \tmp1, \tmp2 + .macro create_pgd_entry, tbl, virt, ptrs_per_pgd, tmp1, tmp2 + create_table_entry \tbl, \virt, PGDIR_SHIFT, \ptrs_per_pgd, \tmp1, \tmp2 #if SWAPPER_PGTABLE_LEVELS > 3 - create_table_entry \tbl, \virt, PUD_SHIFT, PTRS_PER_PUD, \tmp1, \tmp2 + mov \ptrs_per_pgd, PTRS_PER_PUD + create_table_entry \tbl, \virt, PUD_SHIFT, \ptrs_per_pgd, \tmp1, \tmp2 #endif #if SWAPPER_PGTABLE_LEVELS > 2 - create_table_entry \tbl, \virt, SWAPPER_TABLE_SHIFT, PTRS_PER_PTE, \tmp1, \tmp2 + mov \ptrs_per_pgd, PTRS_PER_PTE + create_table_entry \tbl, \virt, SWAPPER_TABLE_SHIFT, \ptrs_per_pgd, \tmp1, \tmp2 #endif .endm @@ -190,16 +214,17 @@ ENDPROC(preserve_boot_args) * virtual range (inclusive). * * Preserves: tbl, flags - * Corrupts: phys, start, end, pstate + * Corrupts: phys, start, end, tmp, pstate */ - .macro create_block_map, tbl, flags, phys, start, end - lsr \phys, \phys, #SWAPPER_BLOCK_SHIFT + .macro create_block_map, tbl, flags, phys, start, end, tmp lsr \start, \start, #SWAPPER_BLOCK_SHIFT and \start, \start, #PTRS_PER_PTE - 1 // table index - orr \phys, \flags, \phys, lsl #SWAPPER_BLOCK_SHIFT // table entry + bic \phys, \phys, #SWAPPER_BLOCK_SIZE - 1 lsr \end, \end, #SWAPPER_BLOCK_SHIFT and \end, \end, #PTRS_PER_PTE - 1 // table end index -9999: str \phys, [\tbl, \start, lsl #3] // store the entry +9999: phys_to_pte \phys, \tmp + orr \tmp, \tmp, \flags // table entry + str \tmp, [\tbl, \start, lsl #3] // store the entry add \start, \start, #1 // next entry add \phys, \phys, #SWAPPER_BLOCK_SIZE // next block cmp \start, \end @@ -244,16 +269,34 @@ __create_page_tables: adrp x0, idmap_pg_dir adrp x3, __idmap_text_start // __pa(__idmap_text_start) -#ifndef CONFIG_ARM64_VA_BITS_48 + /* + * VA_BITS may be too small to allow for an ID mapping to be created + * that covers system RAM if that is located sufficiently high in the + * physical address space. So for the ID map, use an extended virtual + * range in that case, and configure an additional translation level + * if needed. + * + * Calculate the maximum allowed value for TCR_EL1.T0SZ so that the + * entire ID map region can be mapped. As T0SZ == (64 - #bits used), + * this number conveniently equals the number of leading zeroes in + * the physical address of __idmap_text_end. + */ + adrp x5, __idmap_text_end + clz x5, x5 + cmp x5, TCR_T0SZ(VA_BITS) // default T0SZ small enough? + b.ge 1f // .. then skip VA range extension + + adr_l x6, idmap_t0sz + str x5, [x6] + dmb sy + dc ivac, x6 // Invalidate potentially stale cache line + +#if (VA_BITS < 48) #define EXTRA_SHIFT (PGDIR_SHIFT + PAGE_SHIFT - 3) -#define EXTRA_PTRS (1 << (48 - EXTRA_SHIFT)) +#define EXTRA_PTRS (1 << (PHYS_MASK_SHIFT - EXTRA_SHIFT)) /* - * If VA_BITS < 48, it may be too small to allow for an ID mapping to be - * created that covers system RAM if that is located sufficiently high - * in the physical address space. So for the ID map, use an extended - * virtual range in that case, by configuring an additional translation - * level. + * If VA_BITS < 48, we have to configure an additional table level. * First, we have to verify our assumption that the current value of * VA_BITS was chosen such that all translation levels are fully * utilised, and that lowering T0SZ will always result in an additional @@ -263,30 +306,22 @@ __create_page_tables: #error "Mismatch between VA_BITS and page size/number of translation levels" #endif + mov x4, EXTRA_PTRS + create_table_entry x0, x3, EXTRA_SHIFT, x4, x5, x6 +#else /* - * Calculate the maximum allowed value for TCR_EL1.T0SZ so that the - * entire ID map region can be mapped. As T0SZ == (64 - #bits used), - * this number conveniently equals the number of leading zeroes in - * the physical address of __idmap_text_end. + * If VA_BITS == 48, we don't have to configure an additional + * translation level, but the top-level table has more entries. */ - adrp x5, __idmap_text_end - clz x5, x5 - cmp x5, TCR_T0SZ(VA_BITS) // default T0SZ small enough? - b.ge 1f // .. then skip additional level - - adr_l x6, idmap_t0sz - str x5, [x6] - dmb sy - dc ivac, x6 // Invalidate potentially stale cache line - - create_table_entry x0, x3, EXTRA_SHIFT, EXTRA_PTRS, x5, x6 -1: + mov x4, #1 << (PHYS_MASK_SHIFT - PGDIR_SHIFT) + str_l x4, idmap_ptrs_per_pgd, x5 #endif - - create_pgd_entry x0, x3, x5, x6 +1: + ldr_l x4, idmap_ptrs_per_pgd + create_pgd_entry x0, x3, x4, x5, x6 mov x5, x3 // __pa(__idmap_text_start) adr_l x6, __idmap_text_end // __pa(__idmap_text_end) - create_block_map x0, x7, x3, x5, x6 + create_block_map x0, x7, x3, x5, x6, x4 /* * Map the kernel image (starting with PHYS_OFFSET). @@ -294,12 +329,13 @@ __create_page_tables: adrp x0, swapper_pg_dir mov_q x5, KIMAGE_VADDR + TEXT_OFFSET // compile time __va(_text) add x5, x5, x23 // add KASLR displacement - create_pgd_entry x0, x5, x3, x6 + mov x4, PTRS_PER_PGD + create_pgd_entry x0, x5, x4, x3, x6 adrp x6, _end // runtime __pa(_end) adrp x3, _text // runtime __pa(_text) sub x6, x6, x3 // _end - _text add x6, x6, x5 // runtime __va(_end) - create_block_map x0, x7, x3, x5, x6 + create_block_map x0, x7, x3, x5, x6, x4 /* * Since the page tables have been populated with non-cacheable @@ -679,8 +715,10 @@ ENTRY(__enable_mmu) update_early_cpu_boot_status 0, x1, x2 adrp x1, idmap_pg_dir adrp x2, swapper_pg_dir - msr ttbr0_el1, x1 // load TTBR0 - msr ttbr1_el1, x2 // load TTBR1 + phys_to_ttbr x1, x3 + phys_to_ttbr x2, x4 + msr ttbr0_el1, x3 // load TTBR0 + msr ttbr1_el1, x4 // load TTBR1 isb msr sctlr_el1, x0 isb diff --git a/arch/arm64/kernel/hibernate-asm.S b/arch/arm64/kernel/hibernate-asm.S index e56d848b6466..84f5d52fddda 100644 --- a/arch/arm64/kernel/hibernate-asm.S +++ b/arch/arm64/kernel/hibernate-asm.S @@ -33,12 +33,14 @@ * Even switching to our copied tables will cause a changed output address at * each stage of the walk. */ -.macro break_before_make_ttbr_switch zero_page, page_table - msr ttbr1_el1, \zero_page +.macro break_before_make_ttbr_switch zero_page, page_table, tmp + phys_to_ttbr \zero_page, \tmp + msr ttbr1_el1, \tmp isb tlbi vmalle1 dsb nsh - msr ttbr1_el1, \page_table + phys_to_ttbr \page_table, \tmp + msr ttbr1_el1, \tmp isb .endm @@ -78,7 +80,7 @@ ENTRY(swsusp_arch_suspend_exit) * We execute from ttbr0, change ttbr1 to our copied linear map tables * with a break-before-make via the zero page */ - break_before_make_ttbr_switch x5, x0 + break_before_make_ttbr_switch x5, x0, x6 mov x21, x1 mov x30, x2 @@ -109,7 +111,7 @@ ENTRY(swsusp_arch_suspend_exit) dsb ish /* wait for PoU cleaning to finish */ /* switch to the restored kernels page tables */ - break_before_make_ttbr_switch x25, x21 + break_before_make_ttbr_switch x25, x21, x6 ic ialluis dsb ish diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c index 3009b8b80f08..f20cf7e99249 100644 --- a/arch/arm64/kernel/hibernate.c +++ b/arch/arm64/kernel/hibernate.c @@ -247,8 +247,7 @@ static int create_safe_exec_page(void *src_start, size_t length, } pte = pte_offset_kernel(pmd, dst_addr); - set_pte(pte, __pte(virt_to_phys((void *)dst) | - pgprot_val(PAGE_KERNEL_EXEC))); + set_pte(pte, pfn_pte(virt_to_pfn(dst), PAGE_KERNEL_EXEC)); /* * Load our new page tables. A strict BBM approach requires that we @@ -264,7 +263,7 @@ static int create_safe_exec_page(void *src_start, size_t length, */ cpu_set_reserved_ttbr0(); local_flush_tlb_all(); - write_sysreg(virt_to_phys(pgd), ttbr0_el1); + write_sysreg(phys_to_ttbr(virt_to_phys(pgd)), ttbr0_el1); isb(); *phys_dst_addr = virt_to_phys((void *)dst); diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S index 3f9615582377..33c40b3eea01 100644 --- a/arch/arm64/kvm/hyp-init.S +++ b/arch/arm64/kvm/hyp-init.S @@ -63,7 +63,8 @@ __do_hyp_init: cmp x0, #HVC_STUB_HCALL_NR b.lo __kvm_handle_stub_hvc - msr ttbr0_el2, x0 + phys_to_ttbr x0, x4 + msr ttbr0_el2, x4 mrs x4, tcr_el1 ldr x5, =TCR_EL2_MASK @@ -71,30 +72,27 @@ __do_hyp_init: mov x5, #TCR_EL2_RES1 orr x4, x4, x5 -#ifndef CONFIG_ARM64_VA_BITS_48 /* - * If we are running with VA_BITS < 48, we may be running with an extra - * level of translation in the ID map. This is only the case if system - * RAM is out of range for the currently configured page size and number - * of translation levels, in which case we will also need the extra - * level for the HYP ID map, or we won't be able to enable the EL2 MMU. + * The ID map may be configured to use an extended virtual address + * range. This is only the case if system RAM is out of range for the + * currently configured page size and VA_BITS, in which case we will + * also need the extended virtual range for the HYP ID map, or we won't + * be able to enable the EL2 MMU. * * However, at EL2, there is only one TTBR register, and we can't switch * between translation tables *and* update TCR_EL2.T0SZ at the same - * time. Bottom line: we need the extra level in *both* our translation - * tables. + * time. Bottom line: we need to use the extended range with *both* our + * translation tables. * * So use the same T0SZ value we use for the ID map. */ ldr_l x5, idmap_t0sz bfi x4, x5, TCR_T0SZ_OFFSET, TCR_TxSZ_WIDTH -#endif + /* - * Read the PARange bits from ID_AA64MMFR0_EL1 and set the PS bits in - * TCR_EL2. + * Set the PS bits in TCR_EL2. */ - mrs x5, ID_AA64MMFR0_EL1 - bfi x4, x5, #16, #3 + tcr_compute_pa_size x4, #TCR_EL2_PS_SHIFT, x5, x6 msr tcr_el2, x4 diff --git a/arch/arm64/kvm/hyp/s2-setup.c b/arch/arm64/kvm/hyp/s2-setup.c index a81f5e10fc8c..603e1ee83e89 100644 --- a/arch/arm64/kvm/hyp/s2-setup.c +++ b/arch/arm64/kvm/hyp/s2-setup.c @@ -32,6 +32,8 @@ u32 __hyp_text __init_stage2_translation(void) * PS is only 3. Fortunately, bit 19 is RES0 in VTCR_EL2... */ parange = read_sysreg(id_aa64mmfr0_el1) & 7; + if (parange > ID_AA64MMFR0_PARANGE_MAX) + parange = ID_AA64MMFR0_PARANGE_MAX; val |= parange << 16; /* Compute the actual PARange... */ diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 916d9ced1c3f..4071602031ed 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -50,6 +50,7 @@ #define NO_CONT_MAPPINGS BIT(1) u64 idmap_t0sz = TCR_T0SZ(VA_BITS); +u64 idmap_ptrs_per_pgd = PTRS_PER_PGD; u64 kimage_voffset __ro_after_init; EXPORT_SYMBOL(kimage_voffset); @@ -601,8 +602,8 @@ static void __init map_kernel(pgd_t *pgd) * entry instead. */ BUG_ON(!IS_ENABLED(CONFIG_ARM64_16K_PAGES)); - set_pud(pud_set_fixmap_offset(pgd, FIXADDR_START), - __pud(__pa_symbol(bm_pmd) | PUD_TYPE_TABLE)); + pud_populate(&init_mm, pud_set_fixmap_offset(pgd, FIXADDR_START), + lm_alias(bm_pmd)); pud_clear_fixmap(); } else { BUG(); @@ -717,7 +718,7 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) if (!p) return -ENOMEM; - set_pmd(pmd, __pmd(__pa(p) | PROT_SECT_NORMAL)); + pmd_set_huge(pmd, __pa(p), __pgprot(PROT_SECT_NORMAL)); } else vmemmap_verify((pte_t *)pmd, node, addr, next); } while (addr = next, addr != end); @@ -910,15 +911,19 @@ int __init arch_ioremap_pmd_supported(void) int pud_set_huge(pud_t *pud, phys_addr_t phys, pgprot_t prot) { + pgprot_t sect_prot = __pgprot(PUD_TYPE_SECT | + pgprot_val(mk_sect_prot(prot))); BUG_ON(phys & ~PUD_MASK); - set_pud(pud, __pud(phys | PUD_TYPE_SECT | pgprot_val(mk_sect_prot(prot)))); + set_pud(pud, pfn_pud(__phys_to_pfn(phys), sect_prot)); return 1; } int pmd_set_huge(pmd_t *pmd, phys_addr_t phys, pgprot_t prot) { + pgprot_t sect_prot = __pgprot(PMD_TYPE_SECT | + pgprot_val(mk_sect_prot(prot))); BUG_ON(phys & ~PMD_MASK); - set_pmd(pmd, __pmd(phys | PMD_TYPE_SECT | pgprot_val(mk_sect_prot(prot)))); + set_pmd(pmd, pfn_pmd(__phys_to_pfn(phys), sect_prot)); return 1; } diff --git a/arch/arm64/mm/pgd.c b/arch/arm64/mm/pgd.c index 051e71ec3335..289f9113a27a 100644 --- a/arch/arm64/mm/pgd.c +++ b/arch/arm64/mm/pgd.c @@ -49,6 +49,14 @@ void __init pgd_cache_init(void) if (PGD_SIZE == PAGE_SIZE) return; +#ifdef CONFIG_ARM64_PA_BITS_52 + /* + * With 52-bit physical addresses, the architecture requires the + * top-level table to be aligned to at least 64 bytes. + */ + BUILD_BUG_ON(PGD_SIZE < 64); +#endif + /* * Naturally aligned pgds required by the architecture. */ diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 3146dc96f05b..bc334588f234 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -143,7 +143,8 @@ ENTRY(cpu_do_switch_mm) bfi x2, x1, #48, #16 // set the ASID msr ttbr1_el1, x2 // in TTBR1 (since TCR.A1 is set) isb - msr ttbr0_el1, x0 // now update TTBR0 + phys_to_ttbr x0, x2 + msr ttbr0_el1, x2 // now update TTBR0 isb post_ttbr_update_workaround ret @@ -160,14 +161,16 @@ ENTRY(idmap_cpu_replace_ttbr1) save_and_disable_daif flags=x2 adrp x1, empty_zero_page - msr ttbr1_el1, x1 + phys_to_ttbr x1, x3 + msr ttbr1_el1, x3 isb tlbi vmalle1 dsb nsh isb - msr ttbr1_el1, x0 + phys_to_ttbr x0, x3 + msr ttbr1_el1, x3 isb restore_daif x2 @@ -230,11 +233,9 @@ ENTRY(__cpu_setup) tcr_set_idmap_t0sz x10, x9 /* - * Read the PARange bits from ID_AA64MMFR0_EL1 and set the IPS bits in - * TCR_EL1. + * Set the IPS bits in TCR_EL1. */ - mrs x9, ID_AA64MMFR0_EL1 - bfi x10, x9, #32, #3 + tcr_compute_pa_size x10, #TCR_IPS_SHIFT, x5, x6 #ifdef CONFIG_ARM64_HW_AFDBM /* * Hardware update of the Access and Dirty bits. diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 6b60c98a6e22..c8d49879307f 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -509,7 +509,7 @@ static void update_vttbr(struct kvm *kvm) pgd_phys = virt_to_phys(kvm->arch.pgd); BUG_ON(pgd_phys & ~VTTBR_BADDR_MASK); vmid = ((u64)(kvm->arch.vmid) << VTTBR_VMID_SHIFT) & VTTBR_VMID_MASK(kvm_vmid_bits); - kvm->arch.vttbr = pgd_phys | vmid; + kvm->arch.vttbr = kvm_phys_to_vttbr(pgd_phys) | vmid; spin_unlock(&kvm_vmid_lock); } diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c index b36945d49986..761787befd3b 100644 --- a/virt/kvm/arm/mmu.c +++ b/virt/kvm/arm/mmu.c @@ -629,14 +629,20 @@ static int __create_hyp_mappings(pgd_t *pgdp, { pgd_t *pgd; pud_t *pud; - unsigned long addr, next; + unsigned long addr, next, ptrs_per_pgd = PTRS_PER_PGD; int err = 0; + /* + * If it's not the hyp_pgd, fall back to the kvm idmap layout. + */ + if (pgdp != hyp_pgd) + ptrs_per_pgd = __kvm_idmap_ptrs_per_pgd(); + mutex_lock(&kvm_hyp_pgd_mutex); addr = start & PAGE_MASK; end = PAGE_ALIGN(end); do { - pgd = pgdp + pgd_index(addr); + pgd = pgdp + ((addr >> PGDIR_SHIFT) & (ptrs_per_pgd - 1)); if (pgd_none(*pgd)) { pud = pud_alloc_one(NULL, addr);