New Feature:
* Randomize the per-cpu entry areas Cleanups: * Have CR3_ADDR_MASK use PHYSICAL_PAGE_MASK instead of open coding it * Move to "native" set_memory_rox() helper * Clean up pmd_get_atomic() and i386-PAE * Remove some unused page table size macros -----BEGIN PGP SIGNATURE----- iQIzBAABCgAdFiEEV76QKkVc4xCGURexaDWVMHDJkrAFAmOc53UACgkQaDWVMHDJ krCUHw//SGZ+La0hLZLAiAiZTXLZZHpYkOmg1Oj1+11qSU11uZzTFqDpauhaKpRS cJCSh+D+RXe5e2ipgt0+Zl0hESLt7pJf8258OE4ra0DL/IlyO9uqruAs9Kn3eRS/ Fk76nG8gdEU+JKJqpG02GqOLslYQuIy96n9hpuj1x25b614+uezPfC7S4XEat0NT MbJQ+jnVDf16aJIJkzT+iSwhubDVeh+bSHeO0SSCzX23WLUqDeg5NvlyxoCHGbBh UpUTWggV/0pYAkBKRHToeJs8qTWREwuuH/8JGewpe9A0tjdB5wyZfNL2PuracweN 9MauXC3T5f0+Ca4yIIaPq1fF7Ny/PR2dBFihk27rOD0N7tjaZxNwal2pB1sZcmvZ +PAokjyTPVH5ZXjkMYGGAUe1jyjwr2+TgFSZxhTnDuGtyVQiY4pihGKOifLCX6tv x6khvYeTBw7wfaDRtKEAf+2kLHYn+71HszHP/8bNKX9T03h+Zf0i1wdZu5xbM5Gc VK2wR7bCC+UftJJYG0pldcHg2qaF19RBHK2tLwp7zngUv7lTbkKfkgKjre73KV2a D4b76lrqdUMo6UYwYdw7WtDyarZS4OVLq2DcNhwwMddBCaX8kyN5a4AqwQlZYJ0u dM+kuMofE8U3yMxmMhJimkZUsj09yLHIqfynY0jbAcU3nhKZZNY= =wwVF -----END PGP SIGNATURE----- Merge tag 'x86_mm_for_6.2_v2' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip Pull x86 mm updates from Dave Hansen: "New Feature: - Randomize the per-cpu entry areas Cleanups: - Have CR3_ADDR_MASK use PHYSICAL_PAGE_MASK instead of open coding it - Move to "native" set_memory_rox() helper - Clean up pmd_get_atomic() and i386-PAE - Remove some unused page table size macros" * tag 'x86_mm_for_6.2_v2' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (35 commits) x86/mm: Ensure forced page table splitting x86/kasan: Populate shadow for shared chunk of the CPU entry area x86/kasan: Add helpers to align shadow addresses up and down x86/kasan: Rename local CPU_ENTRY_AREA variables to shorten names x86/mm: Populate KASAN shadow for entire per-CPU range of CPU entry area x86/mm: Recompute physical address for every page of per-CPU CEA mapping x86/mm: Rename __change_page_attr_set_clr(.checkalias) x86/mm: Inhibit _PAGE_NX changes from cpa_process_alias() x86/mm: Untangle __change_page_attr_set_clr(.checkalias) x86/mm: Add a few comments x86/mm: Fix CR3_ADDR_MASK x86/mm: Remove P*D_PAGE_MASK and P*D_PAGE_SIZE macros mm: Convert __HAVE_ARCH_P..P_GET to the new style mm: Remove pointless barrier() after pmdp_get_lockless() x86/mm/pae: Get rid of set_64bit() x86_64: Remove pointless set_64bit() usage x86/mm/pae: Be consistent with pXXp_get_and_clear() x86/mm/pae: Use WRITE_ONCE() x86/mm/pae: Don't (ab)use atomic64 mm/gup: Fix the lockless PMD access ...
This commit is contained in:
commit
4f292c4de4
|
@ -10,11 +10,11 @@
|
|||
#include <linux/kernel.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/io.h>
|
||||
#include <linux/set_memory.h>
|
||||
|
||||
#include <asm/fncpy.h>
|
||||
#include <asm/tlb.h>
|
||||
#include <asm/cacheflush.h>
|
||||
#include <asm/set_memory.h>
|
||||
|
||||
#include <asm/mach/map.h>
|
||||
|
||||
|
@ -74,8 +74,7 @@ void *omap_sram_push(void *funcp, unsigned long size)
|
|||
|
||||
dst = fncpy(sram, funcp, size);
|
||||
|
||||
set_memory_ro(base, pages);
|
||||
set_memory_x(base, pages);
|
||||
set_memory_rox(base, pages);
|
||||
|
||||
return dst;
|
||||
}
|
||||
|
@ -126,8 +125,7 @@ static void __init omap_detect_and_map_sram(void)
|
|||
base = (unsigned long)omap_sram_base;
|
||||
pages = PAGE_ALIGN(omap_sram_size) / PAGE_SIZE;
|
||||
|
||||
set_memory_ro(base, pages);
|
||||
set_memory_x(base, pages);
|
||||
set_memory_rox(base, pages);
|
||||
}
|
||||
|
||||
static void (*_omap_sram_reprogram_clock)(u32 dpllctl, u32 ckctl);
|
||||
|
|
|
@ -14,11 +14,11 @@
|
|||
#include <linux/kernel.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/io.h>
|
||||
#include <linux/set_memory.h>
|
||||
|
||||
#include <asm/fncpy.h>
|
||||
#include <asm/tlb.h>
|
||||
#include <asm/cacheflush.h>
|
||||
#include <asm/set_memory.h>
|
||||
|
||||
#include <asm/mach/map.h>
|
||||
|
||||
|
@ -96,8 +96,7 @@ void *omap_sram_push(void *funcp, unsigned long size)
|
|||
|
||||
dst = fncpy(sram, funcp, size);
|
||||
|
||||
set_memory_ro(base, pages);
|
||||
set_memory_x(base, pages);
|
||||
set_memory_rox(base, pages);
|
||||
|
||||
return dst;
|
||||
}
|
||||
|
@ -217,8 +216,7 @@ static void __init omap2_map_sram(void)
|
|||
base = (unsigned long)omap_sram_base;
|
||||
pages = PAGE_ALIGN(omap_sram_size) / PAGE_SIZE;
|
||||
|
||||
set_memory_ro(base, pages);
|
||||
set_memory_x(base, pages);
|
||||
set_memory_rox(base, pages);
|
||||
}
|
||||
|
||||
static void (*_omap2_sram_ddr_init)(u32 *slow_dll_ctrl, u32 fast_dll_ctrl,
|
||||
|
|
|
@ -46,7 +46,7 @@ config MIPS
|
|||
select GENERIC_SCHED_CLOCK if !CAVIUM_OCTEON_SOC
|
||||
select GENERIC_SMP_IDLE_THREAD
|
||||
select GENERIC_TIME_VSYSCALL
|
||||
select GUP_GET_PTE_LOW_HIGH if CPU_MIPS32 && PHYS_ADDR_T_64BIT
|
||||
select GUP_GET_PXX_LOW_HIGH if CPU_MIPS32 && PHYS_ADDR_T_64BIT
|
||||
select HAVE_ARCH_COMPILER_H
|
||||
select HAVE_ARCH_JUMP_LABEL
|
||||
select HAVE_ARCH_KGDB if MIPS_FP_SUPPORT
|
||||
|
|
|
@ -263,7 +263,7 @@ static inline pte_basic_t pte_update(struct mm_struct *mm, unsigned long addr, p
|
|||
}
|
||||
|
||||
#ifdef CONFIG_PPC_16K_PAGES
|
||||
#define __HAVE_ARCH_PTEP_GET
|
||||
#define ptep_get ptep_get
|
||||
static inline pte_t ptep_get(pte_t *ptep)
|
||||
{
|
||||
pte_basic_t val = READ_ONCE(ptep->pte);
|
||||
|
|
|
@ -20,12 +20,12 @@
|
|||
#include <linux/kdebug.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/moduleloader.h>
|
||||
#include <linux/set_memory.h>
|
||||
#include <asm/code-patching.h>
|
||||
#include <asm/cacheflush.h>
|
||||
#include <asm/sstep.h>
|
||||
#include <asm/sections.h>
|
||||
#include <asm/inst.h>
|
||||
#include <asm/set_memory.h>
|
||||
#include <linux/uaccess.h>
|
||||
|
||||
DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
|
||||
|
@ -134,10 +134,9 @@ void *alloc_insn_page(void)
|
|||
if (!page)
|
||||
return NULL;
|
||||
|
||||
if (strict_module_rwx_enabled()) {
|
||||
set_memory_ro((unsigned long)page, 1);
|
||||
set_memory_x((unsigned long)page, 1);
|
||||
}
|
||||
if (strict_module_rwx_enabled())
|
||||
set_memory_rox((unsigned long)page, 1);
|
||||
|
||||
return page;
|
||||
}
|
||||
|
||||
|
|
|
@ -24,7 +24,7 @@ config SUPERH
|
|||
select GENERIC_PCI_IOMAP if PCI
|
||||
select GENERIC_SCHED_CLOCK
|
||||
select GENERIC_SMP_IDLE_THREAD
|
||||
select GUP_GET_PTE_LOW_HIGH if X2TLB
|
||||
select GUP_GET_PXX_LOW_HIGH if X2TLB
|
||||
select HAVE_ARCH_AUDITSYSCALL
|
||||
select HAVE_ARCH_KGDB
|
||||
select HAVE_ARCH_SECCOMP_FILTER
|
||||
|
|
|
@ -28,9 +28,15 @@
|
|||
#define pmd_ERROR(e) \
|
||||
printk("%s:%d: bad pmd %016llx.\n", __FILE__, __LINE__, pmd_val(e))
|
||||
|
||||
typedef struct { unsigned long long pmd; } pmd_t;
|
||||
typedef struct {
|
||||
struct {
|
||||
unsigned long pmd_low;
|
||||
unsigned long pmd_high;
|
||||
};
|
||||
unsigned long long pmd;
|
||||
} pmd_t;
|
||||
#define pmd_val(x) ((x).pmd)
|
||||
#define __pmd(x) ((pmd_t) { (x) } )
|
||||
#define __pmd(x) ((pmd_t) { .pmd = (x) } )
|
||||
|
||||
static inline pmd_t *pud_pgtable(pud_t pud)
|
||||
{
|
||||
|
|
|
@ -58,11 +58,7 @@
|
|||
#define pud_populate(mm, pud, pmd) \
|
||||
set_pud(pud, __pud(_PAGE_TABLE + __pa(pmd)))
|
||||
|
||||
#ifdef CONFIG_64BIT
|
||||
#define set_pud(pudptr, pudval) set_64bit((u64 *) (pudptr), pud_val(pudval))
|
||||
#else
|
||||
#define set_pud(pudptr, pudval) (*(pudptr) = (pudval))
|
||||
#endif
|
||||
|
||||
static inline int pgd_newpage(pgd_t pgd)
|
||||
{
|
||||
|
@ -71,11 +67,7 @@ static inline int pgd_newpage(pgd_t pgd)
|
|||
|
||||
static inline void pgd_mkuptodate(pgd_t pgd) { pgd_val(pgd) &= ~_PAGE_NEWPAGE; }
|
||||
|
||||
#ifdef CONFIG_64BIT
|
||||
#define set_pmd(pmdptr, pmdval) set_64bit((u64 *) (pmdptr), pmd_val(pmdval))
|
||||
#else
|
||||
#define set_pmd(pmdptr, pmdval) (*(pmdptr) = (pmdval))
|
||||
#endif
|
||||
|
||||
static inline void pud_clear (pud_t *pud)
|
||||
{
|
||||
|
|
|
@ -159,7 +159,7 @@ config X86
|
|||
select GENERIC_TIME_VSYSCALL
|
||||
select GENERIC_GETTIMEOFDAY
|
||||
select GENERIC_VDSO_TIME_NS
|
||||
select GUP_GET_PTE_LOW_HIGH if X86_PAE
|
||||
select GUP_GET_PXX_LOW_HIGH if X86_PAE
|
||||
select HARDIRQS_SW_RESEND
|
||||
select HARDLOCKUP_CHECK_TIMESTAMP if X86_64
|
||||
select HAVE_ACPI_APEI if ACPI
|
||||
|
|
|
@ -7,34 +7,6 @@
|
|||
* you need to test for the feature in boot_cpu_data.
|
||||
*/
|
||||
|
||||
/*
|
||||
* CMPXCHG8B only writes to the target if we had the previous
|
||||
* value in registers, otherwise it acts as a read and gives us the
|
||||
* "new previous" value. That is why there is a loop. Preloading
|
||||
* EDX:EAX is a performance optimization: in the common case it means
|
||||
* we need only one locked operation.
|
||||
*
|
||||
* A SIMD/3DNOW!/MMX/FPU 64-bit store here would require at the very
|
||||
* least an FPU save and/or %cr0.ts manipulation.
|
||||
*
|
||||
* cmpxchg8b must be used with the lock prefix here to allow the
|
||||
* instruction to be executed atomically. We need to have the reader
|
||||
* side to see the coherent 64bit value.
|
||||
*/
|
||||
static inline void set_64bit(volatile u64 *ptr, u64 value)
|
||||
{
|
||||
u32 low = value;
|
||||
u32 high = value >> 32;
|
||||
u64 prev = *ptr;
|
||||
|
||||
asm volatile("\n1:\t"
|
||||
LOCK_PREFIX "cmpxchg8b %0\n\t"
|
||||
"jnz 1b"
|
||||
: "=m" (*ptr), "+A" (prev)
|
||||
: "b" (low), "c" (high)
|
||||
: "memory");
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_CMPXCHG64
|
||||
#define arch_cmpxchg64(ptr, o, n) \
|
||||
((__typeof__(*(ptr)))__cmpxchg64((ptr), (unsigned long long)(o), \
|
||||
|
|
|
@ -2,11 +2,6 @@
|
|||
#ifndef _ASM_X86_CMPXCHG_64_H
|
||||
#define _ASM_X86_CMPXCHG_64_H
|
||||
|
||||
static inline void set_64bit(volatile u64 *ptr, u64 val)
|
||||
{
|
||||
*ptr = val;
|
||||
}
|
||||
|
||||
#define arch_cmpxchg64(ptr, o, n) \
|
||||
({ \
|
||||
BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
|
||||
|
|
|
@ -130,10 +130,6 @@ struct cpu_entry_area {
|
|||
};
|
||||
|
||||
#define CPU_ENTRY_AREA_SIZE (sizeof(struct cpu_entry_area))
|
||||
#define CPU_ENTRY_AREA_ARRAY_SIZE (CPU_ENTRY_AREA_SIZE * NR_CPUS)
|
||||
|
||||
/* Total size includes the readonly IDT mapping page as well: */
|
||||
#define CPU_ENTRY_AREA_TOTAL_SIZE (CPU_ENTRY_AREA_ARRAY_SIZE + PAGE_SIZE)
|
||||
|
||||
DECLARE_PER_CPU(struct cpu_entry_area *, cpu_entry_area);
|
||||
DECLARE_PER_CPU(struct cea_exception_stacks *, cea_exception_stacks);
|
||||
|
|
|
@ -28,9 +28,12 @@
|
|||
#ifdef CONFIG_KASAN
|
||||
void __init kasan_early_init(void);
|
||||
void __init kasan_init(void);
|
||||
void __init kasan_populate_shadow_for_vaddr(void *va, size_t size, int nid);
|
||||
#else
|
||||
static inline void kasan_early_init(void) { }
|
||||
static inline void kasan_init(void) { }
|
||||
static inline void kasan_populate_shadow_for_vaddr(void *va, size_t size,
|
||||
int nid) { }
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
|
|
@ -11,20 +11,14 @@
|
|||
#define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT)
|
||||
#define PAGE_MASK (~(PAGE_SIZE-1))
|
||||
|
||||
#define PMD_PAGE_SIZE (_AC(1, UL) << PMD_SHIFT)
|
||||
#define PMD_PAGE_MASK (~(PMD_PAGE_SIZE-1))
|
||||
|
||||
#define PUD_PAGE_SIZE (_AC(1, UL) << PUD_SHIFT)
|
||||
#define PUD_PAGE_MASK (~(PUD_PAGE_SIZE-1))
|
||||
|
||||
#define __VIRTUAL_MASK ((1UL << __VIRTUAL_MASK_SHIFT) - 1)
|
||||
|
||||
/* Cast *PAGE_MASK to a signed type so that it is sign-extended if
|
||||
/* Cast P*D_MASK to a signed type so that it is sign-extended if
|
||||
virtual addresses are 32-bits but physical addresses are larger
|
||||
(ie, 32-bit PAE). */
|
||||
#define PHYSICAL_PAGE_MASK (((signed long)PAGE_MASK) & __PHYSICAL_MASK)
|
||||
#define PHYSICAL_PMD_PAGE_MASK (((signed long)PMD_PAGE_MASK) & __PHYSICAL_MASK)
|
||||
#define PHYSICAL_PUD_PAGE_MASK (((signed long)PUD_PAGE_MASK) & __PHYSICAL_MASK)
|
||||
#define PHYSICAL_PMD_PAGE_MASK (((signed long)PMD_MASK) & __PHYSICAL_MASK)
|
||||
#define PHYSICAL_PUD_PAGE_MASK (((signed long)PUD_MASK) & __PHYSICAL_MASK)
|
||||
|
||||
#define HPAGE_SHIFT PMD_SHIFT
|
||||
#define HPAGE_SIZE (_AC(1,UL) << HPAGE_SHIFT)
|
||||
|
|
|
@ -2,8 +2,6 @@
|
|||
#ifndef _ASM_X86_PGTABLE_3LEVEL_H
|
||||
#define _ASM_X86_PGTABLE_3LEVEL_H
|
||||
|
||||
#include <asm/atomic64_32.h>
|
||||
|
||||
/*
|
||||
* Intel Physical Address Extension (PAE) Mode - three-level page
|
||||
* tables on PPro+ CPUs.
|
||||
|
@ -21,7 +19,15 @@
|
|||
pr_err("%s:%d: bad pgd %p(%016Lx)\n", \
|
||||
__FILE__, __LINE__, &(e), pgd_val(e))
|
||||
|
||||
/* Rules for using set_pte: the pte being assigned *must* be
|
||||
#define pxx_xchg64(_pxx, _ptr, _val) ({ \
|
||||
_pxx##val_t *_p = (_pxx##val_t *)_ptr; \
|
||||
_pxx##val_t _o = *_p; \
|
||||
do { } while (!try_cmpxchg64(_p, &_o, (_val))); \
|
||||
native_make_##_pxx(_o); \
|
||||
})
|
||||
|
||||
/*
|
||||
* Rules for using set_pte: the pte being assigned *must* be
|
||||
* either not present or in a state where the hardware will
|
||||
* not attempt to update the pte. In places where this is
|
||||
* not possible, use pte_get_and_clear to obtain the old pte
|
||||
|
@ -29,75 +35,19 @@
|
|||
*/
|
||||
static inline void native_set_pte(pte_t *ptep, pte_t pte)
|
||||
{
|
||||
ptep->pte_high = pte.pte_high;
|
||||
WRITE_ONCE(ptep->pte_high, pte.pte_high);
|
||||
smp_wmb();
|
||||
ptep->pte_low = pte.pte_low;
|
||||
}
|
||||
|
||||
#define pmd_read_atomic pmd_read_atomic
|
||||
/*
|
||||
* pte_offset_map_lock() on 32-bit PAE kernels was reading the pmd_t with
|
||||
* a "*pmdp" dereference done by GCC. Problem is, in certain places
|
||||
* where pte_offset_map_lock() is called, concurrent page faults are
|
||||
* allowed, if the mmap_lock is hold for reading. An example is mincore
|
||||
* vs page faults vs MADV_DONTNEED. On the page fault side
|
||||
* pmd_populate() rightfully does a set_64bit(), but if we're reading the
|
||||
* pmd_t with a "*pmdp" on the mincore side, a SMP race can happen
|
||||
* because GCC will not read the 64-bit value of the pmd atomically.
|
||||
*
|
||||
* To fix this all places running pte_offset_map_lock() while holding the
|
||||
* mmap_lock in read mode, shall read the pmdp pointer using this
|
||||
* function to know if the pmd is null or not, and in turn to know if
|
||||
* they can run pte_offset_map_lock() or pmd_trans_huge() or other pmd
|
||||
* operations.
|
||||
*
|
||||
* Without THP if the mmap_lock is held for reading, the pmd can only
|
||||
* transition from null to not null while pmd_read_atomic() runs. So
|
||||
* we can always return atomic pmd values with this function.
|
||||
*
|
||||
* With THP if the mmap_lock is held for reading, the pmd can become
|
||||
* trans_huge or none or point to a pte (and in turn become "stable")
|
||||
* at any time under pmd_read_atomic(). We could read it truly
|
||||
* atomically here with an atomic64_read() for the THP enabled case (and
|
||||
* it would be a whole lot simpler), but to avoid using cmpxchg8b we
|
||||
* only return an atomic pmdval if the low part of the pmdval is later
|
||||
* found to be stable (i.e. pointing to a pte). We are also returning a
|
||||
* 'none' (zero) pmdval if the low part of the pmd is zero.
|
||||
*
|
||||
* In some cases the high and low part of the pmdval returned may not be
|
||||
* consistent if THP is enabled (the low part may point to previously
|
||||
* mapped hugepage, while the high part may point to a more recently
|
||||
* mapped hugepage), but pmd_none_or_trans_huge_or_clear_bad() only
|
||||
* needs the low part of the pmd to be read atomically to decide if the
|
||||
* pmd is unstable or not, with the only exception when the low part
|
||||
* of the pmd is zero, in which case we return a 'none' pmd.
|
||||
*/
|
||||
static inline pmd_t pmd_read_atomic(pmd_t *pmdp)
|
||||
{
|
||||
pmdval_t ret;
|
||||
u32 *tmp = (u32 *)pmdp;
|
||||
|
||||
ret = (pmdval_t) (*tmp);
|
||||
if (ret) {
|
||||
/*
|
||||
* If the low part is null, we must not read the high part
|
||||
* or we can end up with a partial pmd.
|
||||
*/
|
||||
smp_rmb();
|
||||
ret |= ((pmdval_t)*(tmp + 1)) << 32;
|
||||
}
|
||||
|
||||
return (pmd_t) { ret };
|
||||
WRITE_ONCE(ptep->pte_low, pte.pte_low);
|
||||
}
|
||||
|
||||
static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte)
|
||||
{
|
||||
set_64bit((unsigned long long *)(ptep), native_pte_val(pte));
|
||||
pxx_xchg64(pte, ptep, native_pte_val(pte));
|
||||
}
|
||||
|
||||
static inline void native_set_pmd(pmd_t *pmdp, pmd_t pmd)
|
||||
{
|
||||
set_64bit((unsigned long long *)(pmdp), native_pmd_val(pmd));
|
||||
pxx_xchg64(pmd, pmdp, native_pmd_val(pmd));
|
||||
}
|
||||
|
||||
static inline void native_set_pud(pud_t *pudp, pud_t pud)
|
||||
|
@ -105,7 +55,7 @@ static inline void native_set_pud(pud_t *pudp, pud_t pud)
|
|||
#ifdef CONFIG_PAGE_TABLE_ISOLATION
|
||||
pud.p4d.pgd = pti_set_user_pgtbl(&pudp->p4d.pgd, pud.p4d.pgd);
|
||||
#endif
|
||||
set_64bit((unsigned long long *)(pudp), native_pud_val(pud));
|
||||
pxx_xchg64(pud, pudp, native_pud_val(pud));
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -116,17 +66,16 @@ static inline void native_set_pud(pud_t *pudp, pud_t pud)
|
|||
static inline void native_pte_clear(struct mm_struct *mm, unsigned long addr,
|
||||
pte_t *ptep)
|
||||
{
|
||||
ptep->pte_low = 0;
|
||||
WRITE_ONCE(ptep->pte_low, 0);
|
||||
smp_wmb();
|
||||
ptep->pte_high = 0;
|
||||
WRITE_ONCE(ptep->pte_high, 0);
|
||||
}
|
||||
|
||||
static inline void native_pmd_clear(pmd_t *pmd)
|
||||
static inline void native_pmd_clear(pmd_t *pmdp)
|
||||
{
|
||||
u32 *tmp = (u32 *)pmd;
|
||||
*tmp = 0;
|
||||
WRITE_ONCE(pmdp->pmd_low, 0);
|
||||
smp_wmb();
|
||||
*(tmp + 1) = 0;
|
||||
WRITE_ONCE(pmdp->pmd_high, 0);
|
||||
}
|
||||
|
||||
static inline void native_pud_clear(pud_t *pudp)
|
||||
|
@ -149,41 +98,26 @@ static inline void pud_clear(pud_t *pudp)
|
|||
*/
|
||||
}
|
||||
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
static inline pte_t native_ptep_get_and_clear(pte_t *ptep)
|
||||
{
|
||||
pte_t res;
|
||||
return pxx_xchg64(pte, ptep, 0ULL);
|
||||
}
|
||||
|
||||
res.pte = (pteval_t)arch_atomic64_xchg((atomic64_t *)ptep, 0);
|
||||
static inline pmd_t native_pmdp_get_and_clear(pmd_t *pmdp)
|
||||
{
|
||||
return pxx_xchg64(pmd, pmdp, 0ULL);
|
||||
}
|
||||
|
||||
return res;
|
||||
static inline pud_t native_pudp_get_and_clear(pud_t *pudp)
|
||||
{
|
||||
return pxx_xchg64(pud, pudp, 0ULL);
|
||||
}
|
||||
#else
|
||||
#define native_ptep_get_and_clear(xp) native_local_ptep_get_and_clear(xp)
|
||||
#endif
|
||||
|
||||
union split_pmd {
|
||||
struct {
|
||||
u32 pmd_low;
|
||||
u32 pmd_high;
|
||||
};
|
||||
pmd_t pmd;
|
||||
};
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
static inline pmd_t native_pmdp_get_and_clear(pmd_t *pmdp)
|
||||
{
|
||||
union split_pmd res, *orig = (union split_pmd *)pmdp;
|
||||
|
||||
/* xchg acts as a barrier before setting of the high bits */
|
||||
res.pmd_low = xchg(&orig->pmd_low, 0);
|
||||
res.pmd_high = orig->pmd_high;
|
||||
orig->pmd_high = 0;
|
||||
|
||||
return res.pmd;
|
||||
}
|
||||
#else
|
||||
#define native_pmdp_get_and_clear(xp) native_local_pmdp_get_and_clear(xp)
|
||||
#define native_pudp_get_and_clear(xp) native_local_pudp_get_and_clear(xp)
|
||||
#endif
|
||||
|
||||
#ifndef pmdp_establish
|
||||
|
@ -199,55 +133,18 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
|
|||
* anybody.
|
||||
*/
|
||||
if (!(pmd_val(pmd) & _PAGE_PRESENT)) {
|
||||
union split_pmd old, new, *ptr;
|
||||
|
||||
ptr = (union split_pmd *)pmdp;
|
||||
|
||||
new.pmd = pmd;
|
||||
|
||||
/* xchg acts as a barrier before setting of the high bits */
|
||||
old.pmd_low = xchg(&ptr->pmd_low, new.pmd_low);
|
||||
old.pmd_high = ptr->pmd_high;
|
||||
ptr->pmd_high = new.pmd_high;
|
||||
return old.pmd;
|
||||
old.pmd_low = xchg(&pmdp->pmd_low, pmd.pmd_low);
|
||||
old.pmd_high = READ_ONCE(pmdp->pmd_high);
|
||||
WRITE_ONCE(pmdp->pmd_high, pmd.pmd_high);
|
||||
|
||||
return old;
|
||||
}
|
||||
|
||||
do {
|
||||
old = *pmdp;
|
||||
} while (cmpxchg64(&pmdp->pmd, old.pmd, pmd.pmd) != old.pmd);
|
||||
|
||||
return old;
|
||||
return pxx_xchg64(pmd, pmdp, pmd.pmd);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
union split_pud {
|
||||
struct {
|
||||
u32 pud_low;
|
||||
u32 pud_high;
|
||||
};
|
||||
pud_t pud;
|
||||
};
|
||||
|
||||
static inline pud_t native_pudp_get_and_clear(pud_t *pudp)
|
||||
{
|
||||
union split_pud res, *orig = (union split_pud *)pudp;
|
||||
|
||||
#ifdef CONFIG_PAGE_TABLE_ISOLATION
|
||||
pti_set_user_pgtbl(&pudp->p4d.pgd, __pgd(0));
|
||||
#endif
|
||||
|
||||
/* xchg acts as a barrier before setting of the high bits */
|
||||
res.pud_low = xchg(&orig->pud_low, 0);
|
||||
res.pud_high = orig->pud_high;
|
||||
orig->pud_high = 0;
|
||||
|
||||
return res.pud;
|
||||
}
|
||||
#else
|
||||
#define native_pudp_get_and_clear(xp) native_local_pudp_get_and_clear(xp)
|
||||
#endif
|
||||
|
||||
/* Encode and de-code a swap entry */
|
||||
#define SWP_TYPE_BITS 5
|
||||
|
||||
|
|
|
@ -18,6 +18,13 @@ typedef union {
|
|||
};
|
||||
pteval_t pte;
|
||||
} pte_t;
|
||||
|
||||
typedef union {
|
||||
struct {
|
||||
unsigned long pmd_low, pmd_high;
|
||||
};
|
||||
pmdval_t pmd;
|
||||
} pmd_t;
|
||||
#endif /* !__ASSEMBLY__ */
|
||||
|
||||
#define SHARED_KERNEL_PMD (!static_cpu_has(X86_FEATURE_PTI))
|
||||
|
|
|
@ -19,6 +19,7 @@ typedef unsigned long pgdval_t;
|
|||
typedef unsigned long pgprotval_t;
|
||||
|
||||
typedef struct { pteval_t pte; } pte_t;
|
||||
typedef struct { pmdval_t pmd; } pmd_t;
|
||||
|
||||
#ifdef CONFIG_X86_5LEVEL
|
||||
extern unsigned int __pgtable_l5_enabled;
|
||||
|
|
|
@ -11,6 +11,12 @@
|
|||
|
||||
#define CPU_ENTRY_AREA_RO_IDT_VADDR ((void *)CPU_ENTRY_AREA_RO_IDT)
|
||||
|
||||
#define CPU_ENTRY_AREA_MAP_SIZE (CPU_ENTRY_AREA_PER_CPU + CPU_ENTRY_AREA_ARRAY_SIZE - CPU_ENTRY_AREA_BASE)
|
||||
#ifdef CONFIG_X86_32
|
||||
#define CPU_ENTRY_AREA_MAP_SIZE (CPU_ENTRY_AREA_PER_CPU + \
|
||||
(CPU_ENTRY_AREA_SIZE * NR_CPUS) - \
|
||||
CPU_ENTRY_AREA_BASE)
|
||||
#else
|
||||
#define CPU_ENTRY_AREA_MAP_SIZE P4D_SIZE
|
||||
#endif
|
||||
|
||||
#endif /* _ASM_X86_PGTABLE_AREAS_H */
|
||||
|
|
|
@ -361,11 +361,9 @@ static inline pudval_t native_pud_val(pud_t pud)
|
|||
#endif
|
||||
|
||||
#if CONFIG_PGTABLE_LEVELS > 2
|
||||
typedef struct { pmdval_t pmd; } pmd_t;
|
||||
|
||||
static inline pmd_t native_make_pmd(pmdval_t val)
|
||||
{
|
||||
return (pmd_t) { val };
|
||||
return (pmd_t) { .pmd = val };
|
||||
}
|
||||
|
||||
static inline pmdval_t native_pmd_val(pmd_t pmd)
|
||||
|
|
|
@ -35,7 +35,7 @@
|
|||
*/
|
||||
#ifdef CONFIG_X86_64
|
||||
/* Mask off the address space ID and SME encryption bits. */
|
||||
#define CR3_ADDR_MASK __sme_clr(0x7FFFFFFFFFFFF000ull)
|
||||
#define CR3_ADDR_MASK __sme_clr(PHYSICAL_PAGE_MASK)
|
||||
#define CR3_PCID_MASK 0xFFFull
|
||||
#define CR3_NOFLUSH BIT_ULL(63)
|
||||
|
||||
|
|
|
@ -6,6 +6,9 @@
|
|||
#include <asm/page.h>
|
||||
#include <asm-generic/set_memory.h>
|
||||
|
||||
#define set_memory_rox set_memory_rox
|
||||
int set_memory_rox(unsigned long addr, int numpages);
|
||||
|
||||
/*
|
||||
* The set_memory_* API can be used to change various attributes of a virtual
|
||||
* address range. The attributes include:
|
||||
|
|
|
@ -2142,11 +2142,6 @@ void __ref text_poke_queue(void *addr, const void *opcode, size_t len, const voi
|
|||
{
|
||||
struct text_poke_loc *tp;
|
||||
|
||||
if (unlikely(system_state == SYSTEM_BOOTING)) {
|
||||
text_poke_early(addr, opcode, len);
|
||||
return;
|
||||
}
|
||||
|
||||
text_poke_flush(addr);
|
||||
|
||||
tp = &tp_vec[tp_vec_nr++];
|
||||
|
@ -2168,11 +2163,6 @@ void __ref text_poke_bp(void *addr, const void *opcode, size_t len, const void *
|
|||
{
|
||||
struct text_poke_loc tp;
|
||||
|
||||
if (unlikely(system_state == SYSTEM_BOOTING)) {
|
||||
text_poke_early(addr, opcode, len);
|
||||
return;
|
||||
}
|
||||
|
||||
text_poke_loc_init(&tp, addr, opcode, len, emulate);
|
||||
text_poke_bp_batch(&tp, 1);
|
||||
}
|
||||
|
|
|
@ -504,7 +504,7 @@ static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size)
|
|||
}
|
||||
|
||||
a = aper + iommu_size;
|
||||
iommu_size -= round_up(a, PMD_PAGE_SIZE) - a;
|
||||
iommu_size -= round_up(a, PMD_SIZE) - a;
|
||||
|
||||
if (iommu_size < 64*1024*1024) {
|
||||
pr_warn("PCI-DMA: Warning: Small IOMMU %luMB."
|
||||
|
|
|
@ -24,10 +24,10 @@
|
|||
#include <linux/module.h>
|
||||
#include <linux/memory.h>
|
||||
#include <linux/vmalloc.h>
|
||||
#include <linux/set_memory.h>
|
||||
|
||||
#include <trace/syscall.h>
|
||||
|
||||
#include <asm/set_memory.h>
|
||||
#include <asm/kprobes.h>
|
||||
#include <asm/ftrace.h>
|
||||
#include <asm/nops.h>
|
||||
|
@ -423,9 +423,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
|
|||
/* ALLOC_TRAMP flags lets us know we created it */
|
||||
ops->flags |= FTRACE_OPS_FL_ALLOC_TRAMP;
|
||||
|
||||
if (likely(system_state != SYSTEM_BOOTING))
|
||||
set_memory_ro((unsigned long)trampoline, npages);
|
||||
set_memory_x((unsigned long)trampoline, npages);
|
||||
set_memory_rox((unsigned long)trampoline, npages);
|
||||
return (unsigned long)trampoline;
|
||||
fail:
|
||||
tramp_free(trampoline);
|
||||
|
|
|
@ -203,7 +203,7 @@ unsigned long __head __startup_64(unsigned long physaddr,
|
|||
load_delta = physaddr - (unsigned long)(_text - __START_KERNEL_map);
|
||||
|
||||
/* Is the address not 2M aligned? */
|
||||
if (load_delta & ~PMD_PAGE_MASK)
|
||||
if (load_delta & ~PMD_MASK)
|
||||
for (;;);
|
||||
|
||||
/* Include the SME encryption mask in the fixup value */
|
||||
|
|
|
@ -266,7 +266,7 @@ static inline bool within_cpu_entry(unsigned long addr, unsigned long end)
|
|||
|
||||
/* CPU entry erea is always used for CPU entry */
|
||||
if (within_area(addr, end, CPU_ENTRY_AREA_BASE,
|
||||
CPU_ENTRY_AREA_TOTAL_SIZE))
|
||||
CPU_ENTRY_AREA_MAP_SIZE))
|
||||
return true;
|
||||
|
||||
/*
|
||||
|
|
|
@ -43,6 +43,7 @@
|
|||
#include <linux/objtool.h>
|
||||
#include <linux/vmalloc.h>
|
||||
#include <linux/pgtable.h>
|
||||
#include <linux/set_memory.h>
|
||||
|
||||
#include <asm/text-patching.h>
|
||||
#include <asm/cacheflush.h>
|
||||
|
@ -51,7 +52,6 @@
|
|||
#include <asm/alternative.h>
|
||||
#include <asm/insn.h>
|
||||
#include <asm/debugreg.h>
|
||||
#include <asm/set_memory.h>
|
||||
#include <asm/ibt.h>
|
||||
|
||||
#include "common.h"
|
||||
|
@ -414,17 +414,11 @@ void *alloc_insn_page(void)
|
|||
if (!page)
|
||||
return NULL;
|
||||
|
||||
/*
|
||||
* First make the page read-only, and only then make it executable to
|
||||
* prevent it from being W+X in between.
|
||||
*/
|
||||
set_memory_ro((unsigned long)page, 1);
|
||||
|
||||
/*
|
||||
* TODO: Once additional kernel code protection mechanisms are set, ensure
|
||||
* that the page was not maliciously altered and it is still zeroed.
|
||||
*/
|
||||
set_memory_x((unsigned long)page, 1);
|
||||
set_memory_rox((unsigned long)page, 1);
|
||||
|
||||
return page;
|
||||
}
|
||||
|
|
|
@ -9,22 +9,60 @@
|
|||
#include <asm/cpu_entry_area.h>
|
||||
#include <asm/fixmap.h>
|
||||
#include <asm/desc.h>
|
||||
#include <asm/kasan.h>
|
||||
|
||||
static DEFINE_PER_CPU_PAGE_ALIGNED(struct entry_stack_page, entry_stack_storage);
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
static DEFINE_PER_CPU_PAGE_ALIGNED(struct exception_stacks, exception_stacks);
|
||||
DEFINE_PER_CPU(struct cea_exception_stacks*, cea_exception_stacks);
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
static DEFINE_PER_CPU_READ_MOSTLY(unsigned long, _cea_offset);
|
||||
|
||||
static __always_inline unsigned int cea_offset(unsigned int cpu)
|
||||
{
|
||||
return per_cpu(_cea_offset, cpu);
|
||||
}
|
||||
|
||||
static __init void init_cea_offsets(void)
|
||||
{
|
||||
unsigned int max_cea;
|
||||
unsigned int i, j;
|
||||
|
||||
max_cea = (CPU_ENTRY_AREA_MAP_SIZE - PAGE_SIZE) / CPU_ENTRY_AREA_SIZE;
|
||||
|
||||
/* O(sodding terrible) */
|
||||
for_each_possible_cpu(i) {
|
||||
unsigned int cea;
|
||||
|
||||
again:
|
||||
cea = prandom_u32_max(max_cea);
|
||||
|
||||
for_each_possible_cpu(j) {
|
||||
if (cea_offset(j) == cea)
|
||||
goto again;
|
||||
|
||||
if (i == j)
|
||||
break;
|
||||
}
|
||||
|
||||
per_cpu(_cea_offset, i) = cea;
|
||||
}
|
||||
}
|
||||
#else /* !X86_64 */
|
||||
DECLARE_PER_CPU_PAGE_ALIGNED(struct doublefault_stack, doublefault_stack);
|
||||
|
||||
static __always_inline unsigned int cea_offset(unsigned int cpu)
|
||||
{
|
||||
return cpu;
|
||||
}
|
||||
static inline void init_cea_offsets(void) { }
|
||||
#endif
|
||||
|
||||
/* Is called from entry code, so must be noinstr */
|
||||
noinstr struct cpu_entry_area *get_cpu_entry_area(int cpu)
|
||||
{
|
||||
unsigned long va = CPU_ENTRY_AREA_PER_CPU + cpu * CPU_ENTRY_AREA_SIZE;
|
||||
unsigned long va = CPU_ENTRY_AREA_PER_CPU + cea_offset(cpu) * CPU_ENTRY_AREA_SIZE;
|
||||
BUILD_BUG_ON(sizeof(struct cpu_entry_area) % PAGE_SIZE != 0);
|
||||
|
||||
return (struct cpu_entry_area *) va;
|
||||
|
@ -148,6 +186,9 @@ static void __init setup_cpu_entry_area(unsigned int cpu)
|
|||
pgprot_t tss_prot = PAGE_KERNEL;
|
||||
#endif
|
||||
|
||||
kasan_populate_shadow_for_vaddr(cea, CPU_ENTRY_AREA_SIZE,
|
||||
early_cpu_to_node(cpu));
|
||||
|
||||
cea_set_pte(&cea->gdt, get_cpu_gdt_paddr(cpu), gdt_prot);
|
||||
|
||||
cea_map_percpu_pages(&cea->entry_stack_page,
|
||||
|
@ -201,7 +242,6 @@ static __init void setup_cpu_entry_area_ptes(void)
|
|||
|
||||
/* The +1 is for the readonly IDT: */
|
||||
BUILD_BUG_ON((CPU_ENTRY_AREA_PAGES+1)*PAGE_SIZE != CPU_ENTRY_AREA_MAP_SIZE);
|
||||
BUILD_BUG_ON(CPU_ENTRY_AREA_TOTAL_SIZE != CPU_ENTRY_AREA_MAP_SIZE);
|
||||
BUG_ON(CPU_ENTRY_AREA_BASE & ~PMD_MASK);
|
||||
|
||||
start = CPU_ENTRY_AREA_BASE;
|
||||
|
@ -217,6 +257,8 @@ void __init setup_cpu_entry_areas(void)
|
|||
{
|
||||
unsigned int cpu;
|
||||
|
||||
init_cea_offsets();
|
||||
|
||||
setup_cpu_entry_area_ptes();
|
||||
|
||||
for_each_possible_cpu(cpu)
|
||||
|
|
|
@ -801,7 +801,7 @@ void __init poking_init(void)
|
|||
spinlock_t *ptl;
|
||||
pte_t *ptep;
|
||||
|
||||
poking_mm = copy_init_mm();
|
||||
poking_mm = mm_alloc();
|
||||
BUG_ON(!poking_mm);
|
||||
|
||||
/*
|
||||
|
|
|
@ -316,10 +316,33 @@ void __init kasan_early_init(void)
|
|||
kasan_map_early_shadow(init_top_pgt);
|
||||
}
|
||||
|
||||
static unsigned long kasan_mem_to_shadow_align_down(unsigned long va)
|
||||
{
|
||||
unsigned long shadow = (unsigned long)kasan_mem_to_shadow((void *)va);
|
||||
|
||||
return round_down(shadow, PAGE_SIZE);
|
||||
}
|
||||
|
||||
static unsigned long kasan_mem_to_shadow_align_up(unsigned long va)
|
||||
{
|
||||
unsigned long shadow = (unsigned long)kasan_mem_to_shadow((void *)va);
|
||||
|
||||
return round_up(shadow, PAGE_SIZE);
|
||||
}
|
||||
|
||||
void __init kasan_populate_shadow_for_vaddr(void *va, size_t size, int nid)
|
||||
{
|
||||
unsigned long shadow_start, shadow_end;
|
||||
|
||||
shadow_start = kasan_mem_to_shadow_align_down((unsigned long)va);
|
||||
shadow_end = kasan_mem_to_shadow_align_up((unsigned long)va + size);
|
||||
kasan_populate_shadow(shadow_start, shadow_end, nid);
|
||||
}
|
||||
|
||||
void __init kasan_init(void)
|
||||
{
|
||||
unsigned long shadow_cea_begin, shadow_cea_per_cpu_begin, shadow_cea_end;
|
||||
int i;
|
||||
void *shadow_cpu_entry_begin, *shadow_cpu_entry_end;
|
||||
|
||||
memcpy(early_top_pgt, init_top_pgt, sizeof(early_top_pgt));
|
||||
|
||||
|
@ -360,16 +383,10 @@ void __init kasan_init(void)
|
|||
map_range(&pfn_mapped[i]);
|
||||
}
|
||||
|
||||
shadow_cpu_entry_begin = (void *)CPU_ENTRY_AREA_BASE;
|
||||
shadow_cpu_entry_begin = kasan_mem_to_shadow(shadow_cpu_entry_begin);
|
||||
shadow_cpu_entry_begin = (void *)round_down(
|
||||
(unsigned long)shadow_cpu_entry_begin, PAGE_SIZE);
|
||||
|
||||
shadow_cpu_entry_end = (void *)(CPU_ENTRY_AREA_BASE +
|
||||
CPU_ENTRY_AREA_MAP_SIZE);
|
||||
shadow_cpu_entry_end = kasan_mem_to_shadow(shadow_cpu_entry_end);
|
||||
shadow_cpu_entry_end = (void *)round_up(
|
||||
(unsigned long)shadow_cpu_entry_end, PAGE_SIZE);
|
||||
shadow_cea_begin = kasan_mem_to_shadow_align_down(CPU_ENTRY_AREA_BASE);
|
||||
shadow_cea_per_cpu_begin = kasan_mem_to_shadow_align_up(CPU_ENTRY_AREA_PER_CPU);
|
||||
shadow_cea_end = kasan_mem_to_shadow_align_up(CPU_ENTRY_AREA_BASE +
|
||||
CPU_ENTRY_AREA_MAP_SIZE);
|
||||
|
||||
kasan_populate_early_shadow(
|
||||
kasan_mem_to_shadow((void *)PAGE_OFFSET + MAXMEM),
|
||||
|
@ -391,12 +408,18 @@ void __init kasan_init(void)
|
|||
|
||||
kasan_populate_early_shadow(
|
||||
kasan_mem_to_shadow((void *)VMALLOC_END + 1),
|
||||
shadow_cpu_entry_begin);
|
||||
(void *)shadow_cea_begin);
|
||||
|
||||
kasan_populate_shadow((unsigned long)shadow_cpu_entry_begin,
|
||||
(unsigned long)shadow_cpu_entry_end, 0);
|
||||
/*
|
||||
* Populate the shadow for the shared portion of the CPU entry area.
|
||||
* Shadows for the per-CPU areas are mapped on-demand, as each CPU's
|
||||
* area is randomly placed somewhere in the 512GiB range and mapping
|
||||
* the entire 512GiB range is prohibitively expensive.
|
||||
*/
|
||||
kasan_populate_shadow(shadow_cea_begin,
|
||||
shadow_cea_per_cpu_begin, 0);
|
||||
|
||||
kasan_populate_early_shadow(shadow_cpu_entry_end,
|
||||
kasan_populate_early_shadow((void *)shadow_cea_end,
|
||||
kasan_mem_to_shadow((void *)__START_KERNEL_map));
|
||||
|
||||
kasan_populate_shadow((unsigned long)kasan_mem_to_shadow(_stext),
|
||||
|
|
|
@ -26,7 +26,7 @@ SYM_FUNC_START(sme_encrypt_execute)
|
|||
* RCX - virtual address of the encryption workarea, including:
|
||||
* - stack page (PAGE_SIZE)
|
||||
* - encryption routine page (PAGE_SIZE)
|
||||
* - intermediate copy buffer (PMD_PAGE_SIZE)
|
||||
* - intermediate copy buffer (PMD_SIZE)
|
||||
* R8 - physical address of the pagetables to use for encryption
|
||||
*/
|
||||
|
||||
|
@ -123,7 +123,7 @@ SYM_FUNC_START(__enc_copy)
|
|||
wbinvd /* Invalidate any cache entries */
|
||||
|
||||
/* Copy/encrypt up to 2MB at a time */
|
||||
movq $PMD_PAGE_SIZE, %r12
|
||||
movq $PMD_SIZE, %r12
|
||||
1:
|
||||
cmpq %r12, %r9
|
||||
jnb 2f
|
||||
|
|
|
@ -93,7 +93,7 @@ struct sme_populate_pgd_data {
|
|||
* section is 2MB aligned to allow for simple pagetable setup using only
|
||||
* PMD entries (see vmlinux.lds.S).
|
||||
*/
|
||||
static char sme_workarea[2 * PMD_PAGE_SIZE] __section(".init.scratch");
|
||||
static char sme_workarea[2 * PMD_SIZE] __section(".init.scratch");
|
||||
|
||||
static char sme_cmdline_arg[] __initdata = "mem_encrypt";
|
||||
static char sme_cmdline_on[] __initdata = "on";
|
||||
|
@ -198,8 +198,8 @@ static void __init __sme_map_range_pmd(struct sme_populate_pgd_data *ppd)
|
|||
while (ppd->vaddr < ppd->vaddr_end) {
|
||||
sme_populate_pgd_large(ppd);
|
||||
|
||||
ppd->vaddr += PMD_PAGE_SIZE;
|
||||
ppd->paddr += PMD_PAGE_SIZE;
|
||||
ppd->vaddr += PMD_SIZE;
|
||||
ppd->paddr += PMD_SIZE;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -225,11 +225,11 @@ static void __init __sme_map_range(struct sme_populate_pgd_data *ppd,
|
|||
vaddr_end = ppd->vaddr_end;
|
||||
|
||||
/* If start is not 2MB aligned, create PTE entries */
|
||||
ppd->vaddr_end = ALIGN(ppd->vaddr, PMD_PAGE_SIZE);
|
||||
ppd->vaddr_end = ALIGN(ppd->vaddr, PMD_SIZE);
|
||||
__sme_map_range_pte(ppd);
|
||||
|
||||
/* Create PMD entries */
|
||||
ppd->vaddr_end = vaddr_end & PMD_PAGE_MASK;
|
||||
ppd->vaddr_end = vaddr_end & PMD_MASK;
|
||||
__sme_map_range_pmd(ppd);
|
||||
|
||||
/* If end is not 2MB aligned, create PTE entries */
|
||||
|
@ -325,7 +325,7 @@ void __init sme_encrypt_kernel(struct boot_params *bp)
|
|||
|
||||
/* Physical addresses gives us the identity mapped virtual addresses */
|
||||
kernel_start = __pa_symbol(_text);
|
||||
kernel_end = ALIGN(__pa_symbol(_end), PMD_PAGE_SIZE);
|
||||
kernel_end = ALIGN(__pa_symbol(_end), PMD_SIZE);
|
||||
kernel_len = kernel_end - kernel_start;
|
||||
|
||||
initrd_start = 0;
|
||||
|
@ -355,12 +355,12 @@ void __init sme_encrypt_kernel(struct boot_params *bp)
|
|||
* executable encryption area size:
|
||||
* stack page (PAGE_SIZE)
|
||||
* encryption routine page (PAGE_SIZE)
|
||||
* intermediate copy buffer (PMD_PAGE_SIZE)
|
||||
* intermediate copy buffer (PMD_SIZE)
|
||||
* pagetable structures for the encryption of the kernel
|
||||
* pagetable structures for workarea (in case not currently mapped)
|
||||
*/
|
||||
execute_start = workarea_start;
|
||||
execute_end = execute_start + (PAGE_SIZE * 2) + PMD_PAGE_SIZE;
|
||||
execute_end = execute_start + (PAGE_SIZE * 2) + PMD_SIZE;
|
||||
execute_len = execute_end - execute_start;
|
||||
|
||||
/*
|
||||
|
@ -383,7 +383,7 @@ void __init sme_encrypt_kernel(struct boot_params *bp)
|
|||
* before it is mapped.
|
||||
*/
|
||||
workarea_len = execute_len + pgtable_area_len;
|
||||
workarea_end = ALIGN(workarea_start + workarea_len, PMD_PAGE_SIZE);
|
||||
workarea_end = ALIGN(workarea_start + workarea_len, PMD_SIZE);
|
||||
|
||||
/*
|
||||
* Set the address to the start of where newly created pagetable
|
||||
|
|
|
@ -220,6 +220,23 @@ within_inclusive(unsigned long addr, unsigned long start, unsigned long end)
|
|||
|
||||
#ifdef CONFIG_X86_64
|
||||
|
||||
/*
|
||||
* The kernel image is mapped into two places in the virtual address space
|
||||
* (addresses without KASLR, of course):
|
||||
*
|
||||
* 1. The kernel direct map (0xffff880000000000)
|
||||
* 2. The "high kernel map" (0xffffffff81000000)
|
||||
*
|
||||
* We actually execute out of #2. If we get the address of a kernel symbol, it
|
||||
* points to #2, but almost all physical-to-virtual translations point to #1.
|
||||
*
|
||||
* This is so that we can have both a directmap of all physical memory *and*
|
||||
* take full advantage of the the limited (s32) immediate addressing range (2G)
|
||||
* of x86_64.
|
||||
*
|
||||
* See Documentation/x86/x86_64/mm.rst for more detail.
|
||||
*/
|
||||
|
||||
static inline unsigned long highmap_start_pfn(void)
|
||||
{
|
||||
return __pa_symbol(_text) >> PAGE_SHIFT;
|
||||
|
@ -605,10 +622,6 @@ static inline pgprot_t verify_rwx(pgprot_t old, pgprot_t new, unsigned long star
|
|||
{
|
||||
unsigned long end;
|
||||
|
||||
/* Kernel text is rw at boot up */
|
||||
if (system_state == SYSTEM_BOOTING)
|
||||
return new;
|
||||
|
||||
/*
|
||||
* 32-bit has some unfixable W+X issues, like EFI code
|
||||
* and writeable data being in the same page. Disable
|
||||
|
@ -765,11 +778,11 @@ phys_addr_t slow_virt_to_phys(void *__virt_addr)
|
|||
switch (level) {
|
||||
case PG_LEVEL_1G:
|
||||
phys_addr = (phys_addr_t)pud_pfn(*(pud_t *)pte) << PAGE_SHIFT;
|
||||
offset = virt_addr & ~PUD_PAGE_MASK;
|
||||
offset = virt_addr & ~PUD_MASK;
|
||||
break;
|
||||
case PG_LEVEL_2M:
|
||||
phys_addr = (phys_addr_t)pmd_pfn(*(pmd_t *)pte) << PAGE_SHIFT;
|
||||
offset = virt_addr & ~PMD_PAGE_MASK;
|
||||
offset = virt_addr & ~PMD_MASK;
|
||||
break;
|
||||
default:
|
||||
phys_addr = (phys_addr_t)pte_pfn(*pte) << PAGE_SHIFT;
|
||||
|
@ -1059,7 +1072,7 @@ __split_large_page(struct cpa_data *cpa, pte_t *kpte, unsigned long address,
|
|||
case PG_LEVEL_1G:
|
||||
ref_prot = pud_pgprot(*(pud_t *)kpte);
|
||||
ref_pfn = pud_pfn(*(pud_t *)kpte);
|
||||
pfninc = PMD_PAGE_SIZE >> PAGE_SHIFT;
|
||||
pfninc = PMD_SIZE >> PAGE_SHIFT;
|
||||
lpaddr = address & PUD_MASK;
|
||||
lpinc = PMD_SIZE;
|
||||
/*
|
||||
|
@ -1646,8 +1659,11 @@ repeat:
|
|||
return err;
|
||||
}
|
||||
|
||||
static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias);
|
||||
static int __change_page_attr_set_clr(struct cpa_data *cpa, int primary);
|
||||
|
||||
/*
|
||||
* Check the directmap and "high kernel map" 'aliases'.
|
||||
*/
|
||||
static int cpa_process_alias(struct cpa_data *cpa)
|
||||
{
|
||||
struct cpa_data alias_cpa;
|
||||
|
@ -1671,6 +1687,12 @@ static int cpa_process_alias(struct cpa_data *cpa)
|
|||
alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY);
|
||||
alias_cpa.curpage = 0;
|
||||
|
||||
/* Directmap always has NX set, do not modify. */
|
||||
if (__supported_pte_mask & _PAGE_NX) {
|
||||
alias_cpa.mask_clr.pgprot &= ~_PAGE_NX;
|
||||
alias_cpa.mask_set.pgprot &= ~_PAGE_NX;
|
||||
}
|
||||
|
||||
cpa->force_flush_all = 1;
|
||||
|
||||
ret = __change_page_attr_set_clr(&alias_cpa, 0);
|
||||
|
@ -1693,6 +1715,15 @@ static int cpa_process_alias(struct cpa_data *cpa)
|
|||
alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY);
|
||||
alias_cpa.curpage = 0;
|
||||
|
||||
/*
|
||||
* [_text, _brk_end) also covers data, do not modify NX except
|
||||
* in cases where the highmap is the primary target.
|
||||
*/
|
||||
if (__supported_pte_mask & _PAGE_NX) {
|
||||
alias_cpa.mask_clr.pgprot &= ~_PAGE_NX;
|
||||
alias_cpa.mask_set.pgprot &= ~_PAGE_NX;
|
||||
}
|
||||
|
||||
cpa->force_flush_all = 1;
|
||||
/*
|
||||
* The high mapping range is imprecise, so ignore the
|
||||
|
@ -1705,12 +1736,19 @@ static int cpa_process_alias(struct cpa_data *cpa)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias)
|
||||
static int __change_page_attr_set_clr(struct cpa_data *cpa, int primary)
|
||||
{
|
||||
unsigned long numpages = cpa->numpages;
|
||||
unsigned long rempages = numpages;
|
||||
int ret = 0;
|
||||
|
||||
/*
|
||||
* No changes, easy!
|
||||
*/
|
||||
if (!(pgprot_val(cpa->mask_set) | pgprot_val(cpa->mask_clr)) &&
|
||||
!cpa->force_split)
|
||||
return ret;
|
||||
|
||||
while (rempages) {
|
||||
/*
|
||||
* Store the remaining nr of pages for the large page
|
||||
|
@ -1723,13 +1761,13 @@ static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias)
|
|||
|
||||
if (!debug_pagealloc_enabled())
|
||||
spin_lock(&cpa_lock);
|
||||
ret = __change_page_attr(cpa, checkalias);
|
||||
ret = __change_page_attr(cpa, primary);
|
||||
if (!debug_pagealloc_enabled())
|
||||
spin_unlock(&cpa_lock);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
if (checkalias) {
|
||||
if (primary && !(cpa->flags & CPA_NO_CHECK_ALIAS)) {
|
||||
ret = cpa_process_alias(cpa);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
@ -1757,7 +1795,7 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
|
|||
struct page **pages)
|
||||
{
|
||||
struct cpa_data cpa;
|
||||
int ret, cache, checkalias;
|
||||
int ret, cache;
|
||||
|
||||
memset(&cpa, 0, sizeof(cpa));
|
||||
|
||||
|
@ -1803,20 +1841,11 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
|
|||
cpa.numpages = numpages;
|
||||
cpa.mask_set = mask_set;
|
||||
cpa.mask_clr = mask_clr;
|
||||
cpa.flags = 0;
|
||||
cpa.flags = in_flag;
|
||||
cpa.curpage = 0;
|
||||
cpa.force_split = force_split;
|
||||
|
||||
if (in_flag & (CPA_ARRAY | CPA_PAGES_ARRAY))
|
||||
cpa.flags |= in_flag;
|
||||
|
||||
/* No alias checking for _NX bit modifications */
|
||||
checkalias = (pgprot_val(mask_set) | pgprot_val(mask_clr)) != _PAGE_NX;
|
||||
/* Has caller explicitly disabled alias checking? */
|
||||
if (in_flag & CPA_NO_CHECK_ALIAS)
|
||||
checkalias = 0;
|
||||
|
||||
ret = __change_page_attr_set_clr(&cpa, checkalias);
|
||||
ret = __change_page_attr_set_clr(&cpa, 1);
|
||||
|
||||
/*
|
||||
* Check whether we really changed something:
|
||||
|
@ -2047,6 +2076,16 @@ int set_memory_ro(unsigned long addr, int numpages)
|
|||
return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_RW), 0);
|
||||
}
|
||||
|
||||
int set_memory_rox(unsigned long addr, int numpages)
|
||||
{
|
||||
pgprot_t clr = __pgprot(_PAGE_RW);
|
||||
|
||||
if (__supported_pte_mask & _PAGE_NX)
|
||||
clr.pgprot |= _PAGE_NX;
|
||||
|
||||
return change_page_attr_clear(&addr, numpages, clr, 0);
|
||||
}
|
||||
|
||||
int set_memory_rw(unsigned long addr, int numpages)
|
||||
{
|
||||
return change_page_attr_set(&addr, numpages, __pgprot(_PAGE_RW), 0);
|
||||
|
@ -2059,11 +2098,9 @@ int set_memory_np(unsigned long addr, int numpages)
|
|||
|
||||
int set_memory_np_noalias(unsigned long addr, int numpages)
|
||||
{
|
||||
int cpa_flags = CPA_NO_CHECK_ALIAS;
|
||||
|
||||
return change_page_attr_set_clr(&addr, numpages, __pgprot(0),
|
||||
__pgprot(_PAGE_PRESENT), 0,
|
||||
cpa_flags, NULL);
|
||||
CPA_NO_CHECK_ALIAS, NULL);
|
||||
}
|
||||
|
||||
int set_memory_4k(unsigned long addr, int numpages)
|
||||
|
@ -2280,7 +2317,7 @@ static int __set_pages_p(struct page *page, int numpages)
|
|||
.numpages = numpages,
|
||||
.mask_set = __pgprot(_PAGE_PRESENT | _PAGE_RW),
|
||||
.mask_clr = __pgprot(0),
|
||||
.flags = 0};
|
||||
.flags = CPA_NO_CHECK_ALIAS };
|
||||
|
||||
/*
|
||||
* No alias checking needed for setting present flag. otherwise,
|
||||
|
@ -2288,7 +2325,7 @@ static int __set_pages_p(struct page *page, int numpages)
|
|||
* mappings (this adds to complexity if we want to do this from
|
||||
* atomic context especially). Let's keep it simple!
|
||||
*/
|
||||
return __change_page_attr_set_clr(&cpa, 0);
|
||||
return __change_page_attr_set_clr(&cpa, 1);
|
||||
}
|
||||
|
||||
static int __set_pages_np(struct page *page, int numpages)
|
||||
|
@ -2299,7 +2336,7 @@ static int __set_pages_np(struct page *page, int numpages)
|
|||
.numpages = numpages,
|
||||
.mask_set = __pgprot(0),
|
||||
.mask_clr = __pgprot(_PAGE_PRESENT | _PAGE_RW),
|
||||
.flags = 0};
|
||||
.flags = CPA_NO_CHECK_ALIAS };
|
||||
|
||||
/*
|
||||
* No alias checking needed for setting not present flag. otherwise,
|
||||
|
@ -2307,7 +2344,7 @@ static int __set_pages_np(struct page *page, int numpages)
|
|||
* mappings (this adds to complexity if we want to do this from
|
||||
* atomic context especially). Let's keep it simple!
|
||||
*/
|
||||
return __change_page_attr_set_clr(&cpa, 0);
|
||||
return __change_page_attr_set_clr(&cpa, 1);
|
||||
}
|
||||
|
||||
int set_direct_map_invalid_noflush(struct page *page)
|
||||
|
@ -2378,7 +2415,7 @@ int __init kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long address,
|
|||
.numpages = numpages,
|
||||
.mask_set = __pgprot(0),
|
||||
.mask_clr = __pgprot(~page_flags & (_PAGE_NX|_PAGE_RW)),
|
||||
.flags = 0,
|
||||
.flags = CPA_NO_CHECK_ALIAS,
|
||||
};
|
||||
|
||||
WARN_ONCE(num_online_cpus() > 1, "Don't call after initializing SMP");
|
||||
|
@ -2391,7 +2428,7 @@ int __init kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long address,
|
|||
|
||||
cpa.mask_set = __pgprot(_PAGE_PRESENT | page_flags);
|
||||
|
||||
retval = __change_page_attr_set_clr(&cpa, 0);
|
||||
retval = __change_page_attr_set_clr(&cpa, 1);
|
||||
__flush_tlb_all();
|
||||
|
||||
out:
|
||||
|
@ -2421,12 +2458,12 @@ int __init kernel_unmap_pages_in_pgd(pgd_t *pgd, unsigned long address,
|
|||
.numpages = numpages,
|
||||
.mask_set = __pgprot(0),
|
||||
.mask_clr = __pgprot(_PAGE_PRESENT | _PAGE_RW),
|
||||
.flags = 0,
|
||||
.flags = CPA_NO_CHECK_ALIAS,
|
||||
};
|
||||
|
||||
WARN_ONCE(num_online_cpus() > 1, "Don't call after initializing SMP");
|
||||
|
||||
retval = __change_page_attr_set_clr(&cpa, 0);
|
||||
retval = __change_page_attr_set_clr(&cpa, 1);
|
||||
__flush_tlb_all();
|
||||
|
||||
return retval;
|
||||
|
|
|
@ -592,7 +592,7 @@ static void pti_set_kernel_image_nonglobal(void)
|
|||
* of the image.
|
||||
*/
|
||||
unsigned long start = PFN_ALIGN(_text);
|
||||
unsigned long end = ALIGN((unsigned long)_end, PMD_PAGE_SIZE);
|
||||
unsigned long end = ALIGN((unsigned long)_end, PMD_SIZE);
|
||||
|
||||
/*
|
||||
* This clears _PAGE_GLOBAL from the entire kernel image.
|
||||
|
|
|
@ -174,7 +174,6 @@ static int modify_irte(struct irq_2_iommu *irq_iommu,
|
|||
index = irq_iommu->irte_index + irq_iommu->sub_handle;
|
||||
irte = &iommu->ir_table->base[index];
|
||||
|
||||
#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE)
|
||||
if ((irte->pst == 1) || (irte_modified->pst == 1)) {
|
||||
bool ret;
|
||||
|
||||
|
@ -188,11 +187,9 @@ static int modify_irte(struct irq_2_iommu *irq_iommu,
|
|||
* same as the old value.
|
||||
*/
|
||||
WARN_ON(!ret);
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
set_64bit(&irte->low, irte_modified->low);
|
||||
set_64bit(&irte->high, irte_modified->high);
|
||||
} else {
|
||||
WRITE_ONCE(irte->low, irte_modified->low);
|
||||
WRITE_ONCE(irte->high, irte_modified->high);
|
||||
}
|
||||
__iommu_flush_cache(iommu, irte, sizeof(*irte));
|
||||
|
||||
|
@ -250,8 +247,8 @@ static int clear_entries(struct irq_2_iommu *irq_iommu)
|
|||
end = start + (1 << irq_iommu->irte_mask);
|
||||
|
||||
for (entry = start; entry < end; entry++) {
|
||||
set_64bit(&entry->low, 0);
|
||||
set_64bit(&entry->high, 0);
|
||||
WRITE_ONCE(entry->low, 0);
|
||||
WRITE_ONCE(entry->high, 0);
|
||||
}
|
||||
bitmap_release_region(iommu->ir_table->bitmap, index,
|
||||
irq_iommu->irte_mask);
|
||||
|
|
|
@ -10,9 +10,9 @@
|
|||
#include <linux/genalloc.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/sram.h>
|
||||
#include <linux/set_memory.h>
|
||||
|
||||
#include <asm/fncpy.h>
|
||||
#include <asm/set_memory.h>
|
||||
|
||||
#include "sram.h"
|
||||
|
||||
|
@ -106,10 +106,7 @@ void *sram_exec_copy(struct gen_pool *pool, void *dst, void *src,
|
|||
|
||||
dst_cpy = fncpy(dst, src, size);
|
||||
|
||||
ret = set_memory_ro((unsigned long)base, pages);
|
||||
if (ret)
|
||||
goto error_out;
|
||||
ret = set_memory_x((unsigned long)base, pages);
|
||||
ret = set_memory_rox((unsigned long)base, pages);
|
||||
if (ret)
|
||||
goto error_out;
|
||||
|
||||
|
|
|
@ -860,8 +860,7 @@ static inline void bpf_prog_lock_ro(struct bpf_prog *fp)
|
|||
static inline void bpf_jit_binary_lock_ro(struct bpf_binary_header *hdr)
|
||||
{
|
||||
set_vm_flush_reset_perms(hdr);
|
||||
set_memory_ro((unsigned long)hdr, hdr->size >> PAGE_SHIFT);
|
||||
set_memory_x((unsigned long)hdr, hdr->size >> PAGE_SHIFT);
|
||||
set_memory_rox((unsigned long)hdr, hdr->size >> PAGE_SHIFT);
|
||||
}
|
||||
|
||||
int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap);
|
||||
|
|
|
@ -309,24 +309,28 @@ static inline void ptep_clear(struct mm_struct *mm, unsigned long addr,
|
|||
ptep_get_and_clear(mm, addr, ptep);
|
||||
}
|
||||
|
||||
#ifndef __HAVE_ARCH_PTEP_GET
|
||||
#ifndef ptep_get
|
||||
static inline pte_t ptep_get(pte_t *ptep)
|
||||
{
|
||||
return READ_ONCE(*ptep);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_GUP_GET_PTE_LOW_HIGH
|
||||
#ifndef pmdp_get
|
||||
static inline pmd_t pmdp_get(pmd_t *pmdp)
|
||||
{
|
||||
return READ_ONCE(*pmdp);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_GUP_GET_PXX_LOW_HIGH
|
||||
/*
|
||||
* WARNING: only to be used in the get_user_pages_fast() implementation.
|
||||
*
|
||||
* With get_user_pages_fast(), we walk down the pagetables without taking any
|
||||
* locks. For this we would like to load the pointers atomically, but sometimes
|
||||
* that is not possible (e.g. without expensive cmpxchg8b on x86_32 PAE). What
|
||||
* we do have is the guarantee that a PTE will only either go from not present
|
||||
* to present, or present to not present or both -- it will not switch to a
|
||||
* completely different present page without a TLB flush in between; something
|
||||
* that we are blocking by holding interrupts off.
|
||||
* For walking the pagetables without holding any locks. Some architectures
|
||||
* (eg x86-32 PAE) cannot load the entries atomically without using expensive
|
||||
* instructions. We are guaranteed that a PTE will only either go from not
|
||||
* present to present, or present to not present -- it will not switch to a
|
||||
* completely different present page without a TLB flush inbetween; which we
|
||||
* are blocking by holding interrupts off.
|
||||
*
|
||||
* Setting ptes from not present to present goes:
|
||||
*
|
||||
|
@ -361,15 +365,42 @@ static inline pte_t ptep_get_lockless(pte_t *ptep)
|
|||
|
||||
return pte;
|
||||
}
|
||||
#else /* CONFIG_GUP_GET_PTE_LOW_HIGH */
|
||||
#define ptep_get_lockless ptep_get_lockless
|
||||
|
||||
#if CONFIG_PGTABLE_LEVELS > 2
|
||||
static inline pmd_t pmdp_get_lockless(pmd_t *pmdp)
|
||||
{
|
||||
pmd_t pmd;
|
||||
|
||||
do {
|
||||
pmd.pmd_low = pmdp->pmd_low;
|
||||
smp_rmb();
|
||||
pmd.pmd_high = pmdp->pmd_high;
|
||||
smp_rmb();
|
||||
} while (unlikely(pmd.pmd_low != pmdp->pmd_low));
|
||||
|
||||
return pmd;
|
||||
}
|
||||
#define pmdp_get_lockless pmdp_get_lockless
|
||||
#endif /* CONFIG_PGTABLE_LEVELS > 2 */
|
||||
#endif /* CONFIG_GUP_GET_PXX_LOW_HIGH */
|
||||
|
||||
/*
|
||||
* We require that the PTE can be read atomically.
|
||||
*/
|
||||
#ifndef ptep_get_lockless
|
||||
static inline pte_t ptep_get_lockless(pte_t *ptep)
|
||||
{
|
||||
return ptep_get(ptep);
|
||||
}
|
||||
#endif /* CONFIG_GUP_GET_PTE_LOW_HIGH */
|
||||
#endif
|
||||
|
||||
#ifndef pmdp_get_lockless
|
||||
static inline pmd_t pmdp_get_lockless(pmd_t *pmdp)
|
||||
{
|
||||
return pmdp_get(pmdp);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
||||
#ifndef __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR
|
||||
|
@ -1313,18 +1344,6 @@ static inline int pud_trans_unstable(pud_t *pud)
|
|||
#endif
|
||||
}
|
||||
|
||||
#ifndef pmd_read_atomic
|
||||
static inline pmd_t pmd_read_atomic(pmd_t *pmdp)
|
||||
{
|
||||
/*
|
||||
* Depend on compiler for an atomic pmd read. NOTE: this is
|
||||
* only going to work, if the pmdval_t isn't larger than
|
||||
* an unsigned long.
|
||||
*/
|
||||
return *pmdp;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef arch_needs_pgtable_deposit
|
||||
#define arch_needs_pgtable_deposit() (false)
|
||||
#endif
|
||||
|
@ -1351,13 +1370,13 @@ static inline pmd_t pmd_read_atomic(pmd_t *pmdp)
|
|||
*/
|
||||
static inline int pmd_none_or_trans_huge_or_clear_bad(pmd_t *pmd)
|
||||
{
|
||||
pmd_t pmdval = pmd_read_atomic(pmd);
|
||||
pmd_t pmdval = pmdp_get_lockless(pmd);
|
||||
/*
|
||||
* The barrier will stabilize the pmdval in a register or on
|
||||
* the stack so that it will stop changing under the code.
|
||||
*
|
||||
* When CONFIG_TRANSPARENT_HUGEPAGE=y on x86 32bit PAE,
|
||||
* pmd_read_atomic is allowed to return a not atomic pmdval
|
||||
* pmdp_get_lockless is allowed to return a not atomic pmdval
|
||||
* (for example pointing to an hugepage that has never been
|
||||
* mapped in the pmd). The below checks will only care about
|
||||
* the low part of the pmd with 32bit PAE x86 anyway, with the
|
||||
|
|
|
@ -65,6 +65,7 @@ extern void sched_dead(struct task_struct *p);
|
|||
void __noreturn do_task_dead(void);
|
||||
void __noreturn make_task_dead(int signr);
|
||||
|
||||
extern void mm_cache_init(void);
|
||||
extern void proc_caches_init(void);
|
||||
|
||||
extern void fork_init(void);
|
||||
|
@ -90,7 +91,6 @@ extern void exit_itimers(struct task_struct *);
|
|||
extern pid_t kernel_clone(struct kernel_clone_args *kargs);
|
||||
struct task_struct *create_io_thread(int (*fn)(void *), void *arg, int node);
|
||||
struct task_struct *fork_idle(int);
|
||||
struct mm_struct *copy_init_mm(void);
|
||||
extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags);
|
||||
extern pid_t user_mode_thread(int (*fn)(void *), void *arg, unsigned long flags);
|
||||
extern long kernel_wait4(pid_t, int __user *, int, struct rusage *);
|
||||
|
|
|
@ -14,6 +14,16 @@ static inline int set_memory_x(unsigned long addr, int numpages) { return 0; }
|
|||
static inline int set_memory_nx(unsigned long addr, int numpages) { return 0; }
|
||||
#endif
|
||||
|
||||
#ifndef set_memory_rox
|
||||
static inline int set_memory_rox(unsigned long addr, int numpages)
|
||||
{
|
||||
int ret = set_memory_ro(addr, numpages);
|
||||
if (ret)
|
||||
return ret;
|
||||
return set_memory_x(addr, numpages);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef CONFIG_ARCH_HAS_SET_DIRECT_MAP
|
||||
static inline int set_direct_map_invalid_noflush(struct page *page)
|
||||
{
|
||||
|
|
|
@ -863,6 +863,7 @@ static void __init mm_init(void)
|
|||
/* Should be run after espfix64 is set up. */
|
||||
pti_init();
|
||||
kmsan_init_runtime();
|
||||
mm_cache_init();
|
||||
}
|
||||
|
||||
#ifdef CONFIG_RANDOMIZE_KSTACK_OFFSET
|
||||
|
@ -998,7 +999,7 @@ asmlinkage __visible void __init __no_sanitize_address start_kernel(void)
|
|||
sort_main_extable();
|
||||
trap_init();
|
||||
mm_init();
|
||||
|
||||
poking_init();
|
||||
ftrace_init();
|
||||
|
||||
/* trace_printk can be enabled here */
|
||||
|
@ -1137,7 +1138,6 @@ asmlinkage __visible void __init __no_sanitize_address start_kernel(void)
|
|||
taskstats_init_early();
|
||||
delayacct_init();
|
||||
|
||||
poking_init();
|
||||
check_bugs();
|
||||
|
||||
acpi_subsystem_init();
|
||||
|
|
|
@ -494,8 +494,7 @@ static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
|
|||
refcount_set(&kvalue->refcnt, 1);
|
||||
bpf_map_inc(map);
|
||||
|
||||
set_memory_ro((long)st_map->image, 1);
|
||||
set_memory_x((long)st_map->image, 1);
|
||||
set_memory_rox((long)st_map->image, 1);
|
||||
err = st_ops->reg(kdata);
|
||||
if (likely(!err)) {
|
||||
/* Pair with smp_load_acquire() during lookup_elem().
|
||||
|
|
|
@ -868,8 +868,7 @@ static struct bpf_prog_pack *alloc_new_pack(bpf_jit_fill_hole_t bpf_fill_ill_ins
|
|||
list_add_tail(&pack->list, &pack_list);
|
||||
|
||||
set_vm_flush_reset_perms(pack->ptr);
|
||||
set_memory_ro((unsigned long)pack->ptr, BPF_PROG_PACK_SIZE / PAGE_SIZE);
|
||||
set_memory_x((unsigned long)pack->ptr, BPF_PROG_PACK_SIZE / PAGE_SIZE);
|
||||
set_memory_rox((unsigned long)pack->ptr, BPF_PROG_PACK_SIZE / PAGE_SIZE);
|
||||
return pack;
|
||||
}
|
||||
|
||||
|
@ -887,8 +886,7 @@ void *bpf_prog_pack_alloc(u32 size, bpf_jit_fill_hole_t bpf_fill_ill_insns)
|
|||
if (ptr) {
|
||||
bpf_fill_ill_insns(ptr, size);
|
||||
set_vm_flush_reset_perms(ptr);
|
||||
set_memory_ro((unsigned long)ptr, size / PAGE_SIZE);
|
||||
set_memory_x((unsigned long)ptr, size / PAGE_SIZE);
|
||||
set_memory_rox((unsigned long)ptr, size / PAGE_SIZE);
|
||||
}
|
||||
goto out;
|
||||
}
|
||||
|
|
|
@ -468,8 +468,7 @@ again:
|
|||
if (err < 0)
|
||||
goto out;
|
||||
|
||||
set_memory_ro((long)im->image, 1);
|
||||
set_memory_x((long)im->image, 1);
|
||||
set_memory_rox((long)im->image, 1);
|
||||
|
||||
WARN_ON(tr->cur_image && tr->selector == 0);
|
||||
WARN_ON(!tr->cur_image && tr->selector);
|
||||
|
|
|
@ -7493,7 +7493,7 @@ static u64 perf_get_pgtable_size(struct mm_struct *mm, unsigned long addr)
|
|||
return pud_leaf_size(pud);
|
||||
|
||||
pmdp = pmd_offset_lockless(pudp, pud, addr);
|
||||
pmd = READ_ONCE(*pmdp);
|
||||
pmd = pmdp_get_lockless(pmdp);
|
||||
if (!pmd_present(pmd))
|
||||
return 0;
|
||||
|
||||
|
|
|
@ -2607,11 +2607,6 @@ struct task_struct * __init fork_idle(int cpu)
|
|||
return task;
|
||||
}
|
||||
|
||||
struct mm_struct *copy_init_mm(void)
|
||||
{
|
||||
return dup_mm(NULL, &init_mm);
|
||||
}
|
||||
|
||||
/*
|
||||
* This is like kernel_clone(), but shaved down and tailored to just
|
||||
* creating io_uring workers. It returns a created task, or an error pointer.
|
||||
|
@ -3030,10 +3025,27 @@ static void sighand_ctor(void *data)
|
|||
init_waitqueue_head(&sighand->signalfd_wqh);
|
||||
}
|
||||
|
||||
void __init proc_caches_init(void)
|
||||
void __init mm_cache_init(void)
|
||||
{
|
||||
unsigned int mm_size;
|
||||
|
||||
/*
|
||||
* The mm_cpumask is located at the end of mm_struct, and is
|
||||
* dynamically sized based on the maximum CPU number this system
|
||||
* can have, taking hotplug into account (nr_cpu_ids).
|
||||
*/
|
||||
mm_size = sizeof(struct mm_struct) + cpumask_size();
|
||||
|
||||
mm_cachep = kmem_cache_create_usercopy("mm_struct",
|
||||
mm_size, ARCH_MIN_MMSTRUCT_ALIGN,
|
||||
SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT,
|
||||
offsetof(struct mm_struct, saved_auxv),
|
||||
sizeof_field(struct mm_struct, saved_auxv),
|
||||
NULL);
|
||||
}
|
||||
|
||||
void __init proc_caches_init(void)
|
||||
{
|
||||
sighand_cachep = kmem_cache_create("sighand_cache",
|
||||
sizeof(struct sighand_struct), 0,
|
||||
SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_TYPESAFE_BY_RCU|
|
||||
|
@ -3051,19 +3063,6 @@ void __init proc_caches_init(void)
|
|||
SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT,
|
||||
NULL);
|
||||
|
||||
/*
|
||||
* The mm_cpumask is located at the end of mm_struct, and is
|
||||
* dynamically sized based on the maximum CPU number this system
|
||||
* can have, taking hotplug into account (nr_cpu_ids).
|
||||
*/
|
||||
mm_size = sizeof(struct mm_struct) + cpumask_size();
|
||||
|
||||
mm_cachep = kmem_cache_create_usercopy("mm_struct",
|
||||
mm_size, ARCH_MIN_MMSTRUCT_ALIGN,
|
||||
SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT,
|
||||
offsetof(struct mm_struct, saved_auxv),
|
||||
sizeof_field(struct mm_struct, saved_auxv),
|
||||
NULL);
|
||||
vm_area_cachep = KMEM_CACHE(vm_area_struct, SLAB_PANIC|SLAB_ACCOUNT);
|
||||
mmap_init();
|
||||
nsproxy_cache_init();
|
||||
|
|
|
@ -1078,7 +1078,7 @@ config GUP_TEST
|
|||
comment "GUP_TEST needs to have DEBUG_FS enabled"
|
||||
depends on !GUP_TEST && !DEBUG_FS
|
||||
|
||||
config GUP_GET_PTE_LOW_HIGH
|
||||
config GUP_GET_PXX_LOW_HIGH
|
||||
bool
|
||||
|
||||
config ARCH_HAS_PTE_SPECIAL
|
||||
|
|
2
mm/gup.c
2
mm/gup.c
|
@ -2721,7 +2721,7 @@ static int gup_pmd_range(pud_t *pudp, pud_t pud, unsigned long addr, unsigned lo
|
|||
|
||||
pmdp = pmd_offset_lockless(pudp, pud, addr);
|
||||
do {
|
||||
pmd_t pmd = READ_ONCE(*pmdp);
|
||||
pmd_t pmd = pmdp_get_lockless(pmdp);
|
||||
|
||||
next = pmd_addr_end(addr, end);
|
||||
if (!pmd_present(pmd))
|
||||
|
|
3
mm/hmm.c
3
mm/hmm.c
|
@ -361,8 +361,7 @@ again:
|
|||
* huge or device mapping one and compute corresponding pfn
|
||||
* values.
|
||||
*/
|
||||
pmd = pmd_read_atomic(pmdp);
|
||||
barrier();
|
||||
pmd = pmdp_get_lockless(pmdp);
|
||||
if (!pmd_devmap(pmd) && !pmd_trans_huge(pmd))
|
||||
goto again;
|
||||
|
||||
|
|
|
@ -857,7 +857,7 @@ static int find_pmd_or_thp_or_none(struct mm_struct *mm,
|
|||
if (!*pmd)
|
||||
return SCAN_PMD_NULL;
|
||||
|
||||
pmde = pmd_read_atomic(*pmd);
|
||||
pmde = pmdp_get_lockless(*pmd);
|
||||
|
||||
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
||||
/* See comments in pmd_none_or_trans_huge_or_clear_bad() */
|
||||
|
|
|
@ -126,7 +126,7 @@ static int clean_record_pte(pte_t *pte, unsigned long addr,
|
|||
static int wp_clean_pmd_entry(pmd_t *pmd, unsigned long addr, unsigned long end,
|
||||
struct mm_walk *walk)
|
||||
{
|
||||
pmd_t pmdval = pmd_read_atomic(pmd);
|
||||
pmd_t pmdval = pmdp_get_lockless(pmd);
|
||||
|
||||
if (!pmd_trans_unstable(&pmdval))
|
||||
return 0;
|
||||
|
|
|
@ -297,7 +297,7 @@ static unsigned long change_pte_range(struct mmu_gather *tlb,
|
|||
*/
|
||||
static inline int pmd_none_or_clear_bad_unless_trans_huge(pmd_t *pmd)
|
||||
{
|
||||
pmd_t pmdval = pmd_read_atomic(pmd);
|
||||
pmd_t pmdval = pmdp_get_lockless(pmd);
|
||||
|
||||
/* See pmd_none_or_trans_huge_or_clear_bad for info on barrier */
|
||||
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
||||
|
|
|
@ -632,7 +632,7 @@ retry:
|
|||
break;
|
||||
}
|
||||
|
||||
dst_pmdval = pmd_read_atomic(dst_pmd);
|
||||
dst_pmdval = pmdp_get_lockless(dst_pmd);
|
||||
/*
|
||||
* If the dst_pmd is mapped as THP don't
|
||||
* override it and just be strict.
|
||||
|
|
|
@ -4084,10 +4084,7 @@ restart:
|
|||
/* walk_pte_range() may call get_next_vma() */
|
||||
vma = args->vma;
|
||||
for (i = pmd_index(start), addr = start; addr != end; i++, addr = next) {
|
||||
pmd_t val = pmd_read_atomic(pmd + i);
|
||||
|
||||
/* for pmd_read_atomic() */
|
||||
barrier();
|
||||
pmd_t val = pmdp_get_lockless(pmd + i);
|
||||
|
||||
next = pmd_addr_end(addr, end);
|
||||
|
||||
|
|
|
@ -124,8 +124,7 @@ int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr,
|
|||
if (err < 0)
|
||||
goto out;
|
||||
|
||||
set_memory_ro((long)image, 1);
|
||||
set_memory_x((long)image, 1);
|
||||
set_memory_rox((long)image, 1);
|
||||
prog_ret = dummy_ops_call_op(image, args);
|
||||
|
||||
err = dummy_ops_copy_args(args);
|
||||
|
|
Loading…
Reference in New Issue