arm64: hibernate: move page handling function to new trans_pgd.c

Now, that we abstracted the required functions move them to a new home.
Later, we will generalize these function in order to be useful outside
of hibernation.

Signed-off-by: Pavel Tatashin <pasha.tatashin@soleen.com>
Reviewed-by: James Morse <james.morse@arm.com>
Link: https://lore.kernel.org/r/20210125191923.1060122-4-pasha.tatashin@soleen.com
Signed-off-by: Will Deacon <will@kernel.org>
This commit is contained in:
Pavel Tatashin 2021-01-25 14:19:08 -05:00 committed by Will Deacon
parent 41f67d40a3
commit 072e3d96a7
5 changed files with 277 additions and 227 deletions

View File

@ -1132,6 +1132,10 @@ config CRASH_DUMP
For more details see Documentation/admin-guide/kdump/kdump.rst
config TRANS_TABLE
def_bool y
depends on HIBERNATION
config XEN_DOM0
def_bool y
depends on XEN

View File

@ -0,0 +1,21 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (c) 2020, Microsoft Corporation.
* Pavel Tatashin <pasha.tatashin@soleen.com>
*/
#ifndef _ASM_TRANS_TABLE_H
#define _ASM_TRANS_TABLE_H
#include <linux/bits.h>
#include <linux/types.h>
#include <asm/pgtable-types.h>
int trans_pgd_create_copy(pgd_t **dst_pgdp, unsigned long start,
unsigned long end);
int trans_pgd_map_page(pgd_t *trans_pgd, void *page, unsigned long dst_addr,
pgprot_t pgprot);
#endif /* _ASM_TRANS_TABLE_H */

View File

@ -16,7 +16,6 @@
#define pr_fmt(x) "hibernate: " x
#include <linux/cpu.h>
#include <linux/kvm_host.h>
#include <linux/mm.h>
#include <linux/pm.h>
#include <linux/sched.h>
#include <linux/suspend.h>
@ -31,13 +30,12 @@
#include <asm/memory.h>
#include <asm/mmu_context.h>
#include <asm/mte.h>
#include <asm/pgalloc.h>
#include <asm/pgtable-hwdef.h>
#include <asm/sections.h>
#include <asm/smp.h>
#include <asm/smp_plat.h>
#include <asm/suspend.h>
#include <asm/sysreg.h>
#include <asm/trans_pgd.h>
#include <asm/virt.h>
/*
@ -178,54 +176,6 @@ int arch_hibernation_header_restore(void *addr)
}
EXPORT_SYMBOL(arch_hibernation_header_restore);
static int trans_pgd_map_page(pgd_t *trans_pgd, void *page,
unsigned long dst_addr,
pgprot_t pgprot)
{
pgd_t *pgdp;
p4d_t *p4dp;
pud_t *pudp;
pmd_t *pmdp;
pte_t *ptep;
pgdp = pgd_offset_pgd(trans_pgd, dst_addr);
if (pgd_none(READ_ONCE(*pgdp))) {
p4dp = (void *)get_safe_page(GFP_ATOMIC);
if (!pgdp)
return -ENOMEM;
pgd_populate(&init_mm, pgdp, p4dp);
}
p4dp = p4d_offset(pgdp, dst_addr);
if (p4d_none(READ_ONCE(*p4dp))) {
pudp = (void *)get_safe_page(GFP_ATOMIC);
if (!pudp)
return -ENOMEM;
p4d_populate(&init_mm, p4dp, pudp);
}
pudp = pud_offset(p4dp, dst_addr);
if (pud_none(READ_ONCE(*pudp))) {
pmdp = (void *)get_safe_page(GFP_ATOMIC);
if (!pmdp)
return -ENOMEM;
pud_populate(&init_mm, pudp, pmdp);
}
pmdp = pmd_offset(pudp, dst_addr);
if (pmd_none(READ_ONCE(*pmdp))) {
ptep = (void *)get_safe_page(GFP_ATOMIC);
if (!ptep)
return -ENOMEM;
pmd_populate_kernel(&init_mm, pmdp, ptep);
}
ptep = pte_offset_kernel(pmdp, dst_addr);
set_pte(ptep, pfn_pte(virt_to_pfn(page), PAGE_KERNEL_EXEC));
return 0;
}
/*
* Copies length bytes, starting at src_start into an new page,
* perform cache maintenance, then maps it at the specified address low
@ -462,182 +412,6 @@ int swsusp_arch_suspend(void)
return ret;
}
static void _copy_pte(pte_t *dst_ptep, pte_t *src_ptep, unsigned long addr)
{
pte_t pte = READ_ONCE(*src_ptep);
if (pte_valid(pte)) {
/*
* Resume will overwrite areas that may be marked
* read only (code, rodata). Clear the RDONLY bit from
* the temporary mappings we use during restore.
*/
set_pte(dst_ptep, pte_mkwrite(pte));
} else if (debug_pagealloc_enabled() && !pte_none(pte)) {
/*
* debug_pagealloc will removed the PTE_VALID bit if
* the page isn't in use by the resume kernel. It may have
* been in use by the original kernel, in which case we need
* to put it back in our copy to do the restore.
*
* Before marking this entry valid, check the pfn should
* be mapped.
*/
BUG_ON(!pfn_valid(pte_pfn(pte)));
set_pte(dst_ptep, pte_mkpresent(pte_mkwrite(pte)));
}
}
static int copy_pte(pmd_t *dst_pmdp, pmd_t *src_pmdp, unsigned long start,
unsigned long end)
{
pte_t *src_ptep;
pte_t *dst_ptep;
unsigned long addr = start;
dst_ptep = (pte_t *)get_safe_page(GFP_ATOMIC);
if (!dst_ptep)
return -ENOMEM;
pmd_populate_kernel(&init_mm, dst_pmdp, dst_ptep);
dst_ptep = pte_offset_kernel(dst_pmdp, start);
src_ptep = pte_offset_kernel(src_pmdp, start);
do {
_copy_pte(dst_ptep, src_ptep, addr);
} while (dst_ptep++, src_ptep++, addr += PAGE_SIZE, addr != end);
return 0;
}
static int copy_pmd(pud_t *dst_pudp, pud_t *src_pudp, unsigned long start,
unsigned long end)
{
pmd_t *src_pmdp;
pmd_t *dst_pmdp;
unsigned long next;
unsigned long addr = start;
if (pud_none(READ_ONCE(*dst_pudp))) {
dst_pmdp = (pmd_t *)get_safe_page(GFP_ATOMIC);
if (!dst_pmdp)
return -ENOMEM;
pud_populate(&init_mm, dst_pudp, dst_pmdp);
}
dst_pmdp = pmd_offset(dst_pudp, start);
src_pmdp = pmd_offset(src_pudp, start);
do {
pmd_t pmd = READ_ONCE(*src_pmdp);
next = pmd_addr_end(addr, end);
if (pmd_none(pmd))
continue;
if (pmd_table(pmd)) {
if (copy_pte(dst_pmdp, src_pmdp, addr, next))
return -ENOMEM;
} else {
set_pmd(dst_pmdp,
__pmd(pmd_val(pmd) & ~PMD_SECT_RDONLY));
}
} while (dst_pmdp++, src_pmdp++, addr = next, addr != end);
return 0;
}
static int copy_pud(p4d_t *dst_p4dp, p4d_t *src_p4dp, unsigned long start,
unsigned long end)
{
pud_t *dst_pudp;
pud_t *src_pudp;
unsigned long next;
unsigned long addr = start;
if (p4d_none(READ_ONCE(*dst_p4dp))) {
dst_pudp = (pud_t *)get_safe_page(GFP_ATOMIC);
if (!dst_pudp)
return -ENOMEM;
p4d_populate(&init_mm, dst_p4dp, dst_pudp);
}
dst_pudp = pud_offset(dst_p4dp, start);
src_pudp = pud_offset(src_p4dp, start);
do {
pud_t pud = READ_ONCE(*src_pudp);
next = pud_addr_end(addr, end);
if (pud_none(pud))
continue;
if (pud_table(pud)) {
if (copy_pmd(dst_pudp, src_pudp, addr, next))
return -ENOMEM;
} else {
set_pud(dst_pudp,
__pud(pud_val(pud) & ~PUD_SECT_RDONLY));
}
} while (dst_pudp++, src_pudp++, addr = next, addr != end);
return 0;
}
static int copy_p4d(pgd_t *dst_pgdp, pgd_t *src_pgdp, unsigned long start,
unsigned long end)
{
p4d_t *dst_p4dp;
p4d_t *src_p4dp;
unsigned long next;
unsigned long addr = start;
dst_p4dp = p4d_offset(dst_pgdp, start);
src_p4dp = p4d_offset(src_pgdp, start);
do {
next = p4d_addr_end(addr, end);
if (p4d_none(READ_ONCE(*src_p4dp)))
continue;
if (copy_pud(dst_p4dp, src_p4dp, addr, next))
return -ENOMEM;
} while (dst_p4dp++, src_p4dp++, addr = next, addr != end);
return 0;
}
static int copy_page_tables(pgd_t *dst_pgdp, unsigned long start,
unsigned long end)
{
unsigned long next;
unsigned long addr = start;
pgd_t *src_pgdp = pgd_offset_k(start);
dst_pgdp = pgd_offset_pgd(dst_pgdp, start);
do {
next = pgd_addr_end(addr, end);
if (pgd_none(READ_ONCE(*src_pgdp)))
continue;
if (copy_p4d(dst_pgdp, src_pgdp, addr, next))
return -ENOMEM;
} while (dst_pgdp++, src_pgdp++, addr = next, addr != end);
return 0;
}
static int trans_pgd_create_copy(pgd_t **dst_pgdp, unsigned long start,
unsigned long end)
{
int rc;
pgd_t *trans_pgd = (pgd_t *)get_safe_page(GFP_ATOMIC);
if (!trans_pgd) {
pr_err("Failed to allocate memory for temporary page tables.\n");
return -ENOMEM;
}
rc = copy_page_tables(trans_pgd, start, end);
if (!rc)
*dst_pgdp = trans_pgd;
return rc;
}
/*
* Setup then Resume from the hibernate image using swsusp_arch_suspend_exit().
*

View File

@ -6,6 +6,7 @@ obj-y := dma-mapping.o extable.o fault.o init.o \
obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
obj-$(CONFIG_PTDUMP_CORE) += ptdump.o
obj-$(CONFIG_PTDUMP_DEBUGFS) += ptdump_debugfs.o
obj-$(CONFIG_TRANS_TABLE) += trans_pgd.o
obj-$(CONFIG_NUMA) += numa.o
obj-$(CONFIG_DEBUG_VIRTUAL) += physaddr.o
obj-$(CONFIG_ARM64_MTE) += mteswap.o

250
arch/arm64/mm/trans_pgd.c Normal file
View File

@ -0,0 +1,250 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Transitional page tables for kexec and hibernate
*
* This file derived from: arch/arm64/kernel/hibernate.c
*
* Copyright (c) 2020, Microsoft Corporation.
* Pavel Tatashin <pasha.tatashin@soleen.com>
*
*/
/*
* Transitional tables are used during system transferring from one world to
* another: such as during hibernate restore, and kexec reboots. During these
* phases one cannot rely on page table not being overwritten. This is because
* hibernate and kexec can overwrite the current page tables during transition.
*/
#include <asm/trans_pgd.h>
#include <asm/pgalloc.h>
#include <asm/pgtable.h>
#include <linux/suspend.h>
#include <linux/bug.h>
#include <linux/mm.h>
#include <linux/mmzone.h>
static void _copy_pte(pte_t *dst_ptep, pte_t *src_ptep, unsigned long addr)
{
pte_t pte = READ_ONCE(*src_ptep);
if (pte_valid(pte)) {
/*
* Resume will overwrite areas that may be marked
* read only (code, rodata). Clear the RDONLY bit from
* the temporary mappings we use during restore.
*/
set_pte(dst_ptep, pte_mkwrite(pte));
} else if (debug_pagealloc_enabled() && !pte_none(pte)) {
/*
* debug_pagealloc will removed the PTE_VALID bit if
* the page isn't in use by the resume kernel. It may have
* been in use by the original kernel, in which case we need
* to put it back in our copy to do the restore.
*
* Before marking this entry valid, check the pfn should
* be mapped.
*/
BUG_ON(!pfn_valid(pte_pfn(pte)));
set_pte(dst_ptep, pte_mkpresent(pte_mkwrite(pte)));
}
}
static int copy_pte(pmd_t *dst_pmdp, pmd_t *src_pmdp, unsigned long start,
unsigned long end)
{
pte_t *src_ptep;
pte_t *dst_ptep;
unsigned long addr = start;
dst_ptep = (pte_t *)get_safe_page(GFP_ATOMIC);
if (!dst_ptep)
return -ENOMEM;
pmd_populate_kernel(&init_mm, dst_pmdp, dst_ptep);
dst_ptep = pte_offset_kernel(dst_pmdp, start);
src_ptep = pte_offset_kernel(src_pmdp, start);
do {
_copy_pte(dst_ptep, src_ptep, addr);
} while (dst_ptep++, src_ptep++, addr += PAGE_SIZE, addr != end);
return 0;
}
static int copy_pmd(pud_t *dst_pudp, pud_t *src_pudp, unsigned long start,
unsigned long end)
{
pmd_t *src_pmdp;
pmd_t *dst_pmdp;
unsigned long next;
unsigned long addr = start;
if (pud_none(READ_ONCE(*dst_pudp))) {
dst_pmdp = (pmd_t *)get_safe_page(GFP_ATOMIC);
if (!dst_pmdp)
return -ENOMEM;
pud_populate(&init_mm, dst_pudp, dst_pmdp);
}
dst_pmdp = pmd_offset(dst_pudp, start);
src_pmdp = pmd_offset(src_pudp, start);
do {
pmd_t pmd = READ_ONCE(*src_pmdp);
next = pmd_addr_end(addr, end);
if (pmd_none(pmd))
continue;
if (pmd_table(pmd)) {
if (copy_pte(dst_pmdp, src_pmdp, addr, next))
return -ENOMEM;
} else {
set_pmd(dst_pmdp,
__pmd(pmd_val(pmd) & ~PMD_SECT_RDONLY));
}
} while (dst_pmdp++, src_pmdp++, addr = next, addr != end);
return 0;
}
static int copy_pud(p4d_t *dst_p4dp, p4d_t *src_p4dp, unsigned long start,
unsigned long end)
{
pud_t *dst_pudp;
pud_t *src_pudp;
unsigned long next;
unsigned long addr = start;
if (p4d_none(READ_ONCE(*dst_p4dp))) {
dst_pudp = (pud_t *)get_safe_page(GFP_ATOMIC);
if (!dst_pudp)
return -ENOMEM;
p4d_populate(&init_mm, dst_p4dp, dst_pudp);
}
dst_pudp = pud_offset(dst_p4dp, start);
src_pudp = pud_offset(src_p4dp, start);
do {
pud_t pud = READ_ONCE(*src_pudp);
next = pud_addr_end(addr, end);
if (pud_none(pud))
continue;
if (pud_table(pud)) {
if (copy_pmd(dst_pudp, src_pudp, addr, next))
return -ENOMEM;
} else {
set_pud(dst_pudp,
__pud(pud_val(pud) & ~PUD_SECT_RDONLY));
}
} while (dst_pudp++, src_pudp++, addr = next, addr != end);
return 0;
}
static int copy_p4d(pgd_t *dst_pgdp, pgd_t *src_pgdp, unsigned long start,
unsigned long end)
{
p4d_t *dst_p4dp;
p4d_t *src_p4dp;
unsigned long next;
unsigned long addr = start;
dst_p4dp = p4d_offset(dst_pgdp, start);
src_p4dp = p4d_offset(src_pgdp, start);
do {
next = p4d_addr_end(addr, end);
if (p4d_none(READ_ONCE(*src_p4dp)))
continue;
if (copy_pud(dst_p4dp, src_p4dp, addr, next))
return -ENOMEM;
} while (dst_p4dp++, src_p4dp++, addr = next, addr != end);
return 0;
}
static int copy_page_tables(pgd_t *dst_pgdp, unsigned long start,
unsigned long end)
{
unsigned long next;
unsigned long addr = start;
pgd_t *src_pgdp = pgd_offset_k(start);
dst_pgdp = pgd_offset_pgd(dst_pgdp, start);
do {
next = pgd_addr_end(addr, end);
if (pgd_none(READ_ONCE(*src_pgdp)))
continue;
if (copy_p4d(dst_pgdp, src_pgdp, addr, next))
return -ENOMEM;
} while (dst_pgdp++, src_pgdp++, addr = next, addr != end);
return 0;
}
int trans_pgd_create_copy(pgd_t **dst_pgdp, unsigned long start,
unsigned long end)
{
int rc;
pgd_t *trans_pgd = (pgd_t *)get_safe_page(GFP_ATOMIC);
if (!trans_pgd) {
pr_err("Failed to allocate memory for temporary page tables.\n");
return -ENOMEM;
}
rc = copy_page_tables(trans_pgd, start, end);
if (!rc)
*dst_pgdp = trans_pgd;
return rc;
}
int trans_pgd_map_page(pgd_t *trans_pgd, void *page,
unsigned long dst_addr,
pgprot_t pgprot)
{
pgd_t *pgdp;
p4d_t *p4dp;
pud_t *pudp;
pmd_t *pmdp;
pte_t *ptep;
pgdp = pgd_offset_pgd(trans_pgd, dst_addr);
if (pgd_none(READ_ONCE(*pgdp))) {
p4dp = (void *)get_safe_page(GFP_ATOMIC);
if (!pgdp)
return -ENOMEM;
pgd_populate(&init_mm, pgdp, p4dp);
}
p4dp = p4d_offset(pgdp, dst_addr);
if (p4d_none(READ_ONCE(*p4dp))) {
pudp = (void *)get_safe_page(GFP_ATOMIC);
if (!pudp)
return -ENOMEM;
p4d_populate(&init_mm, p4dp, pudp);
}
pudp = pud_offset(p4dp, dst_addr);
if (pud_none(READ_ONCE(*pudp))) {
pmdp = (void *)get_safe_page(GFP_ATOMIC);
if (!pmdp)
return -ENOMEM;
pud_populate(&init_mm, pudp, pmdp);
}
pmdp = pmd_offset(pudp, dst_addr);
if (pmd_none(READ_ONCE(*pmdp))) {
ptep = (void *)get_safe_page(GFP_ATOMIC);
if (!ptep)
return -ENOMEM;
pmd_populate_kernel(&init_mm, pmdp, ptep);
}
ptep = pte_offset_kernel(pmdp, dst_addr);
set_pte(ptep, pfn_pte(virt_to_pfn(page), PAGE_KERNEL_EXEC));
return 0;
}