Merge patch series "riscv: Use PUD/P4D/PGD pages for the linear mapping"
Alexandre Ghiti <alexghiti@rivosinc.com> says: This patchset intends to improve tlb utilization by using hugepages for the linear mapping. As reported by Anup in v6, when STRICT_KERNEL_RWX is enabled, we must take care of isolating the kernel text and rodata so that they are not mapped with a PUD mapping which would then assign wrong permissions to the whole region: it is achieved the same way as arm64 by using the memblock nomap API which isolates those regions and re-merge them afterwards thus avoiding any issue with the system resources tree creation. arch/riscv/include/asm/page.h | 19 ++++++- arch/riscv/mm/init.c | 102 ++++++++++++++++++++++++++-------- arch/riscv/mm/physaddr.c | 16 ++++++ drivers/of/fdt.c | 11 ++-- 4 files changed, 118 insertions(+), 30 deletions(-) * b4-shazam-merge: riscv: Use PUD/P4D/PGD pages for the linear mapping riscv: Move the linear mapping creation in its own function riscv: Get rid of riscv_pfn_base variable Link: https://lore.kernel.org/r/20230324155421.271544-1-alexghiti@rivosinc.com Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
This commit is contained in:
commit
2e75ab3189
|
@ -89,9 +89,16 @@ typedef struct page *pgtable_t;
|
|||
#define PTE_FMT "%08lx"
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_64BIT
|
||||
/*
|
||||
* We override this value as its generic definition uses __pa too early in
|
||||
* the boot process (before kernel_map.va_pa_offset is set).
|
||||
*/
|
||||
#define MIN_MEMBLOCK_ADDR 0
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_MMU
|
||||
extern unsigned long riscv_pfn_base;
|
||||
#define ARCH_PFN_OFFSET (riscv_pfn_base)
|
||||
#define ARCH_PFN_OFFSET (PFN_DOWN((unsigned long)phys_ram_base))
|
||||
#else
|
||||
#define ARCH_PFN_OFFSET (PAGE_OFFSET >> PAGE_SHIFT)
|
||||
#endif /* CONFIG_MMU */
|
||||
|
@ -121,7 +128,11 @@ extern phys_addr_t phys_ram_base;
|
|||
#define is_linear_mapping(x) \
|
||||
((x) >= PAGE_OFFSET && (!IS_ENABLED(CONFIG_64BIT) || (x) < PAGE_OFFSET + KERN_VIRT_SIZE))
|
||||
|
||||
#ifndef CONFIG_DEBUG_VIRTUAL
|
||||
#define linear_mapping_pa_to_va(x) ((void *)((unsigned long)(x) + kernel_map.va_pa_offset))
|
||||
#else
|
||||
void *linear_mapping_pa_to_va(unsigned long x);
|
||||
#endif
|
||||
#define kernel_mapping_pa_to_va(y) ({ \
|
||||
unsigned long _y = (unsigned long)(y); \
|
||||
(IS_ENABLED(CONFIG_XIP_KERNEL) && _y < phys_ram_base) ? \
|
||||
|
@ -130,7 +141,11 @@ extern phys_addr_t phys_ram_base;
|
|||
})
|
||||
#define __pa_to_va_nodebug(x) linear_mapping_pa_to_va(x)
|
||||
|
||||
#ifndef CONFIG_DEBUG_VIRTUAL
|
||||
#define linear_mapping_va_to_pa(x) ((unsigned long)(x) - kernel_map.va_pa_offset)
|
||||
#else
|
||||
phys_addr_t linear_mapping_va_to_pa(unsigned long x);
|
||||
#endif
|
||||
#define kernel_mapping_va_to_pa(y) ({ \
|
||||
unsigned long _y = (unsigned long)(y); \
|
||||
(IS_ENABLED(CONFIG_XIP_KERNEL) && _y < kernel_map.virt_addr + XIP_OFFSET) ? \
|
||||
|
|
|
@ -213,6 +213,14 @@ static void __init setup_bootmem(void)
|
|||
phys_ram_end = memblock_end_of_DRAM();
|
||||
if (!IS_ENABLED(CONFIG_XIP_KERNEL))
|
||||
phys_ram_base = memblock_start_of_DRAM();
|
||||
|
||||
/*
|
||||
* In 64-bit, any use of __va/__pa before this point is wrong as we
|
||||
* did not know the start of DRAM before.
|
||||
*/
|
||||
if (IS_ENABLED(CONFIG_64BIT))
|
||||
kernel_map.va_pa_offset = PAGE_OFFSET - phys_ram_base;
|
||||
|
||||
/*
|
||||
* memblock allocator is not aware of the fact that last 4K bytes of
|
||||
* the addressable memory can not be mapped because of IS_ERR_VALUE
|
||||
|
@ -271,9 +279,6 @@ static void __init setup_bootmem(void)
|
|||
#ifdef CONFIG_MMU
|
||||
struct pt_alloc_ops pt_ops __initdata;
|
||||
|
||||
unsigned long riscv_pfn_base __ro_after_init;
|
||||
EXPORT_SYMBOL(riscv_pfn_base);
|
||||
|
||||
pgd_t swapper_pg_dir[PTRS_PER_PGD] __page_aligned_bss;
|
||||
pgd_t trampoline_pg_dir[PTRS_PER_PGD] __page_aligned_bss;
|
||||
static pte_t fixmap_pte[PTRS_PER_PTE] __page_aligned_bss;
|
||||
|
@ -285,7 +290,6 @@ static pmd_t __maybe_unused early_dtb_pmd[PTRS_PER_PMD] __initdata __aligned(PAG
|
|||
|
||||
#ifdef CONFIG_XIP_KERNEL
|
||||
#define pt_ops (*(struct pt_alloc_ops *)XIP_FIXUP(&pt_ops))
|
||||
#define riscv_pfn_base (*(unsigned long *)XIP_FIXUP(&riscv_pfn_base))
|
||||
#define trampoline_pg_dir ((pgd_t *)XIP_FIXUP(trampoline_pg_dir))
|
||||
#define fixmap_pte ((pte_t *)XIP_FIXUP(fixmap_pte))
|
||||
#define early_pg_dir ((pgd_t *)XIP_FIXUP(early_pg_dir))
|
||||
|
@ -671,9 +675,16 @@ void __init create_pgd_mapping(pgd_t *pgdp,
|
|||
|
||||
static uintptr_t __init best_map_size(phys_addr_t base, phys_addr_t size)
|
||||
{
|
||||
/* Upgrade to PMD_SIZE mappings whenever possible */
|
||||
base &= PMD_SIZE - 1;
|
||||
if (!base && size >= PMD_SIZE)
|
||||
if (!(base & (PGDIR_SIZE - 1)) && size >= PGDIR_SIZE)
|
||||
return PGDIR_SIZE;
|
||||
|
||||
if (!(base & (P4D_SIZE - 1)) && size >= P4D_SIZE)
|
||||
return P4D_SIZE;
|
||||
|
||||
if (!(base & (PUD_SIZE - 1)) && size >= PUD_SIZE)
|
||||
return PUD_SIZE;
|
||||
|
||||
if (!(base & (PMD_SIZE - 1)) && size >= PMD_SIZE)
|
||||
return PMD_SIZE;
|
||||
|
||||
return PAGE_SIZE;
|
||||
|
@ -982,11 +993,22 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
|
|||
set_satp_mode();
|
||||
#endif
|
||||
|
||||
kernel_map.va_pa_offset = PAGE_OFFSET - kernel_map.phys_addr;
|
||||
/*
|
||||
* In 64-bit, we defer the setup of va_pa_offset to setup_bootmem,
|
||||
* where we have the system memory layout: this allows us to align
|
||||
* the physical and virtual mappings and then make use of PUD/P4D/PGD
|
||||
* for the linear mapping. This is only possible because the kernel
|
||||
* mapping lies outside the linear mapping.
|
||||
* In 32-bit however, as the kernel resides in the linear mapping,
|
||||
* setup_vm_final can not change the mapping established here,
|
||||
* otherwise the same kernel addresses would get mapped to different
|
||||
* physical addresses (if the start of dram is different from the
|
||||
* kernel physical address start).
|
||||
*/
|
||||
kernel_map.va_pa_offset = IS_ENABLED(CONFIG_64BIT) ?
|
||||
0UL : PAGE_OFFSET - kernel_map.phys_addr;
|
||||
kernel_map.va_kernel_pa_offset = kernel_map.virt_addr - kernel_map.phys_addr;
|
||||
|
||||
riscv_pfn_base = PFN_DOWN(kernel_map.phys_addr);
|
||||
|
||||
/*
|
||||
* The default maximal physical memory size is KERN_VIRT_SIZE for 32-bit
|
||||
* kernel, whereas for 64-bit kernel, the end of the virtual address
|
||||
|
@ -1090,16 +1112,36 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
|
|||
pt_ops_set_fixmap();
|
||||
}
|
||||
|
||||
static void __init setup_vm_final(void)
|
||||
static void __init create_linear_mapping_range(phys_addr_t start,
|
||||
phys_addr_t end)
|
||||
{
|
||||
phys_addr_t pa;
|
||||
uintptr_t va, map_size;
|
||||
phys_addr_t pa, start, end;
|
||||
|
||||
for (pa = start; pa < end; pa += map_size) {
|
||||
va = (uintptr_t)__va(pa);
|
||||
map_size = best_map_size(pa, end - pa);
|
||||
|
||||
create_pgd_mapping(swapper_pg_dir, va, pa, map_size,
|
||||
pgprot_from_va(va));
|
||||
}
|
||||
}
|
||||
|
||||
static void __init create_linear_mapping_page_table(void)
|
||||
{
|
||||
phys_addr_t start, end;
|
||||
u64 i;
|
||||
|
||||
/* Setup swapper PGD for fixmap */
|
||||
create_pgd_mapping(swapper_pg_dir, FIXADDR_START,
|
||||
__pa_symbol(fixmap_pgd_next),
|
||||
PGDIR_SIZE, PAGE_TABLE);
|
||||
#ifdef CONFIG_STRICT_KERNEL_RWX
|
||||
phys_addr_t ktext_start = __pa_symbol(_start);
|
||||
phys_addr_t ktext_size = __init_data_begin - _start;
|
||||
phys_addr_t krodata_start = __pa_symbol(__start_rodata);
|
||||
phys_addr_t krodata_size = _data - __start_rodata;
|
||||
|
||||
/* Isolate kernel text and rodata so they don't get mapped with a PUD */
|
||||
memblock_mark_nomap(ktext_start, ktext_size);
|
||||
memblock_mark_nomap(krodata_start, krodata_size);
|
||||
#endif
|
||||
|
||||
/* Map all memory banks in the linear mapping */
|
||||
for_each_mem_range(i, &start, &end) {
|
||||
|
@ -1111,15 +1153,29 @@ static void __init setup_vm_final(void)
|
|||
if (end >= __pa(PAGE_OFFSET) + memory_limit)
|
||||
end = __pa(PAGE_OFFSET) + memory_limit;
|
||||
|
||||
for (pa = start; pa < end; pa += map_size) {
|
||||
va = (uintptr_t)__va(pa);
|
||||
map_size = best_map_size(pa, end - pa);
|
||||
|
||||
create_pgd_mapping(swapper_pg_dir, va, pa, map_size,
|
||||
pgprot_from_va(va));
|
||||
}
|
||||
create_linear_mapping_range(start, end);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_STRICT_KERNEL_RWX
|
||||
create_linear_mapping_range(ktext_start, ktext_start + ktext_size);
|
||||
create_linear_mapping_range(krodata_start,
|
||||
krodata_start + krodata_size);
|
||||
|
||||
memblock_clear_nomap(ktext_start, ktext_size);
|
||||
memblock_clear_nomap(krodata_start, krodata_size);
|
||||
#endif
|
||||
}
|
||||
|
||||
static void __init setup_vm_final(void)
|
||||
{
|
||||
/* Setup swapper PGD for fixmap */
|
||||
create_pgd_mapping(swapper_pg_dir, FIXADDR_START,
|
||||
__pa_symbol(fixmap_pgd_next),
|
||||
PGDIR_SIZE, PAGE_TABLE);
|
||||
|
||||
/* Map the linear mapping */
|
||||
create_linear_mapping_page_table();
|
||||
|
||||
/* Map the kernel */
|
||||
if (IS_ENABLED(CONFIG_64BIT))
|
||||
create_kernel_page_table(swapper_pg_dir, false);
|
||||
|
|
|
@ -33,3 +33,19 @@ phys_addr_t __phys_addr_symbol(unsigned long x)
|
|||
return __va_to_pa_nodebug(x);
|
||||
}
|
||||
EXPORT_SYMBOL(__phys_addr_symbol);
|
||||
|
||||
phys_addr_t linear_mapping_va_to_pa(unsigned long x)
|
||||
{
|
||||
BUG_ON(!kernel_map.va_pa_offset);
|
||||
|
||||
return ((unsigned long)(x) - kernel_map.va_pa_offset);
|
||||
}
|
||||
EXPORT_SYMBOL(linear_mapping_va_to_pa);
|
||||
|
||||
void *linear_mapping_pa_to_va(unsigned long x)
|
||||
{
|
||||
BUG_ON(!kernel_map.va_pa_offset);
|
||||
|
||||
return ((void *)((unsigned long)(x) + kernel_map.va_pa_offset));
|
||||
}
|
||||
EXPORT_SYMBOL(linear_mapping_pa_to_va);
|
||||
|
|
|
@ -887,12 +887,13 @@ const void * __init of_flat_dt_match_machine(const void *default_match,
|
|||
static void __early_init_dt_declare_initrd(unsigned long start,
|
||||
unsigned long end)
|
||||
{
|
||||
/* ARM64 would cause a BUG to occur here when CONFIG_DEBUG_VM is
|
||||
* enabled since __va() is called too early. ARM64 does make use
|
||||
* of phys_initrd_start/phys_initrd_size so we can skip this
|
||||
* conversion.
|
||||
/*
|
||||
* __va() is not yet available this early on some platforms. In that
|
||||
* case, the platform uses phys_initrd_start/phys_initrd_size instead
|
||||
* and does the VA conversion itself.
|
||||
*/
|
||||
if (!IS_ENABLED(CONFIG_ARM64)) {
|
||||
if (!IS_ENABLED(CONFIG_ARM64) &&
|
||||
!(IS_ENABLED(CONFIG_RISCV) && IS_ENABLED(CONFIG_64BIT))) {
|
||||
initrd_start = (unsigned long)__va(start);
|
||||
initrd_end = (unsigned long)__va(end);
|
||||
initrd_below_start_ok = 1;
|
||||
|
|
Loading…
Reference in New Issue