Merge branch 'mm-pkeys-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull protection keys syscall interface from Thomas Gleixner: "This is the final step of Protection Keys support which adds the syscalls so user space can actually allocate keys and protect memory areas with them. Details and usage examples can be found in the documentation. The mm side of this has been acked by Mel" * 'mm-pkeys-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/pkeys: Update documentation x86/mm/pkeys: Do not skip PKRU register if debug registers are not used x86/pkeys: Fix pkeys build breakage for some non-x86 arches x86/pkeys: Add self-tests x86/pkeys: Allow configuration of init_pkru x86/pkeys: Default to a restrictive init PKRU pkeys: Add details of system call use to Documentation/ generic syscalls: Wire up memory protection keys syscalls x86: Wire up protection keys system calls x86/pkeys: Allocation/free syscalls x86/pkeys: Make mprotect_key() mask off additional vm_flags mm: Implement new pkey_mprotect() system call x86/pkeys: Add fault handling for PF_PK page fault bit
This commit is contained in:
commit
93c26d7dc0
|
@ -1666,6 +1666,11 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
|
||||||
|
|
||||||
initrd= [BOOT] Specify the location of the initial ramdisk
|
initrd= [BOOT] Specify the location of the initial ramdisk
|
||||||
|
|
||||||
|
init_pkru= [x86] Specify the default memory protection keys rights
|
||||||
|
register contents for all processes. 0x55555554 by
|
||||||
|
default (disallow access to all but pkey 0). Can
|
||||||
|
override in debugfs after boot.
|
||||||
|
|
||||||
inport.irq= [HW] Inport (ATI XL and Microsoft) busmouse driver
|
inport.irq= [HW] Inport (ATI XL and Microsoft) busmouse driver
|
||||||
Format: <irq>
|
Format: <irq>
|
||||||
|
|
||||||
|
|
|
@ -18,10 +18,68 @@ even though there is theoretically space in the PAE PTEs. These
|
||||||
permissions are enforced on data access only and have no effect on
|
permissions are enforced on data access only and have no effect on
|
||||||
instruction fetches.
|
instruction fetches.
|
||||||
|
|
||||||
=========================== Config Option ===========================
|
=========================== Syscalls ===========================
|
||||||
|
|
||||||
This config option adds approximately 1.5kb of text. and 50 bytes of
|
There are 3 system calls which directly interact with pkeys:
|
||||||
data to the executable. A workload which does large O_DIRECT reads
|
|
||||||
of holes in XFS files was run to exercise get_user_pages_fast(). No
|
int pkey_alloc(unsigned long flags, unsigned long init_access_rights)
|
||||||
performance delta was observed with the config option
|
int pkey_free(int pkey);
|
||||||
enabled or disabled.
|
int pkey_mprotect(unsigned long start, size_t len,
|
||||||
|
unsigned long prot, int pkey);
|
||||||
|
|
||||||
|
Before a pkey can be used, it must first be allocated with
|
||||||
|
pkey_alloc(). An application calls the WRPKRU instruction
|
||||||
|
directly in order to change access permissions to memory covered
|
||||||
|
with a key. In this example WRPKRU is wrapped by a C function
|
||||||
|
called pkey_set().
|
||||||
|
|
||||||
|
int real_prot = PROT_READ|PROT_WRITE;
|
||||||
|
pkey = pkey_alloc(0, PKEY_DENY_WRITE);
|
||||||
|
ptr = mmap(NULL, PAGE_SIZE, PROT_NONE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
|
||||||
|
ret = pkey_mprotect(ptr, PAGE_SIZE, real_prot, pkey);
|
||||||
|
... application runs here
|
||||||
|
|
||||||
|
Now, if the application needs to update the data at 'ptr', it can
|
||||||
|
gain access, do the update, then remove its write access:
|
||||||
|
|
||||||
|
pkey_set(pkey, 0); // clear PKEY_DENY_WRITE
|
||||||
|
*ptr = foo; // assign something
|
||||||
|
pkey_set(pkey, PKEY_DENY_WRITE); // set PKEY_DENY_WRITE again
|
||||||
|
|
||||||
|
Now when it frees the memory, it will also free the pkey since it
|
||||||
|
is no longer in use:
|
||||||
|
|
||||||
|
munmap(ptr, PAGE_SIZE);
|
||||||
|
pkey_free(pkey);
|
||||||
|
|
||||||
|
(Note: pkey_set() is a wrapper for the RDPKRU and WRPKRU instructions.
|
||||||
|
An example implementation can be found in
|
||||||
|
tools/testing/selftests/x86/protection_keys.c)
|
||||||
|
|
||||||
|
=========================== Behavior ===========================
|
||||||
|
|
||||||
|
The kernel attempts to make protection keys consistent with the
|
||||||
|
behavior of a plain mprotect(). For instance if you do this:
|
||||||
|
|
||||||
|
mprotect(ptr, size, PROT_NONE);
|
||||||
|
something(ptr);
|
||||||
|
|
||||||
|
you can expect the same effects with protection keys when doing this:
|
||||||
|
|
||||||
|
pkey = pkey_alloc(0, PKEY_DISABLE_WRITE | PKEY_DISABLE_READ);
|
||||||
|
pkey_mprotect(ptr, size, PROT_READ|PROT_WRITE, pkey);
|
||||||
|
something(ptr);
|
||||||
|
|
||||||
|
That should be true whether something() is a direct access to 'ptr'
|
||||||
|
like:
|
||||||
|
|
||||||
|
*ptr = foo;
|
||||||
|
|
||||||
|
or when the kernel does the access on the application's behalf like
|
||||||
|
with a read():
|
||||||
|
|
||||||
|
read(fd, ptr, 1);
|
||||||
|
|
||||||
|
The kernel will send a SIGSEGV in both cases, but si_code will be set
|
||||||
|
to SEGV_PKERR when violating protection keys versus SEGV_ACCERR when
|
||||||
|
the plain mprotect() permissions are violated.
|
||||||
|
|
|
@ -78,4 +78,9 @@
|
||||||
#define MAP_HUGE_SHIFT 26
|
#define MAP_HUGE_SHIFT 26
|
||||||
#define MAP_HUGE_MASK 0x3f
|
#define MAP_HUGE_MASK 0x3f
|
||||||
|
|
||||||
|
#define PKEY_DISABLE_ACCESS 0x1
|
||||||
|
#define PKEY_DISABLE_WRITE 0x2
|
||||||
|
#define PKEY_ACCESS_MASK (PKEY_DISABLE_ACCESS |\
|
||||||
|
PKEY_DISABLE_WRITE)
|
||||||
|
|
||||||
#endif /* __ALPHA_MMAN_H__ */
|
#endif /* __ALPHA_MMAN_H__ */
|
||||||
|
|
|
@ -105,4 +105,9 @@
|
||||||
#define MAP_HUGE_SHIFT 26
|
#define MAP_HUGE_SHIFT 26
|
||||||
#define MAP_HUGE_MASK 0x3f
|
#define MAP_HUGE_MASK 0x3f
|
||||||
|
|
||||||
|
#define PKEY_DISABLE_ACCESS 0x1
|
||||||
|
#define PKEY_DISABLE_WRITE 0x2
|
||||||
|
#define PKEY_ACCESS_MASK (PKEY_DISABLE_ACCESS |\
|
||||||
|
PKEY_DISABLE_WRITE)
|
||||||
|
|
||||||
#endif /* _ASM_MMAN_H */
|
#endif /* _ASM_MMAN_H */
|
||||||
|
|
|
@ -75,4 +75,9 @@
|
||||||
#define MAP_HUGE_SHIFT 26
|
#define MAP_HUGE_SHIFT 26
|
||||||
#define MAP_HUGE_MASK 0x3f
|
#define MAP_HUGE_MASK 0x3f
|
||||||
|
|
||||||
|
#define PKEY_DISABLE_ACCESS 0x1
|
||||||
|
#define PKEY_DISABLE_WRITE 0x2
|
||||||
|
#define PKEY_ACCESS_MASK (PKEY_DISABLE_ACCESS |\
|
||||||
|
PKEY_DISABLE_WRITE)
|
||||||
|
|
||||||
#endif /* __PARISC_MMAN_H__ */
|
#endif /* __PARISC_MMAN_H__ */
|
||||||
|
|
|
@ -386,3 +386,8 @@
|
||||||
377 i386 copy_file_range sys_copy_file_range
|
377 i386 copy_file_range sys_copy_file_range
|
||||||
378 i386 preadv2 sys_preadv2 compat_sys_preadv2
|
378 i386 preadv2 sys_preadv2 compat_sys_preadv2
|
||||||
379 i386 pwritev2 sys_pwritev2 compat_sys_pwritev2
|
379 i386 pwritev2 sys_pwritev2 compat_sys_pwritev2
|
||||||
|
380 i386 pkey_mprotect sys_pkey_mprotect
|
||||||
|
381 i386 pkey_alloc sys_pkey_alloc
|
||||||
|
382 i386 pkey_free sys_pkey_free
|
||||||
|
#383 i386 pkey_get sys_pkey_get
|
||||||
|
#384 i386 pkey_set sys_pkey_set
|
||||||
|
|
|
@ -335,6 +335,11 @@
|
||||||
326 common copy_file_range sys_copy_file_range
|
326 common copy_file_range sys_copy_file_range
|
||||||
327 64 preadv2 sys_preadv2
|
327 64 preadv2 sys_preadv2
|
||||||
328 64 pwritev2 sys_pwritev2
|
328 64 pwritev2 sys_pwritev2
|
||||||
|
329 common pkey_mprotect sys_pkey_mprotect
|
||||||
|
330 common pkey_alloc sys_pkey_alloc
|
||||||
|
331 common pkey_free sys_pkey_free
|
||||||
|
#332 common pkey_get sys_pkey_get
|
||||||
|
#333 common pkey_set sys_pkey_set
|
||||||
|
|
||||||
#
|
#
|
||||||
# x32-specific system call numbers start at 512 to avoid cache impact
|
# x32-specific system call numbers start at 512 to avoid cache impact
|
||||||
|
|
|
@ -23,6 +23,14 @@ typedef struct {
|
||||||
const struct vdso_image *vdso_image; /* vdso image in use */
|
const struct vdso_image *vdso_image; /* vdso image in use */
|
||||||
|
|
||||||
atomic_t perf_rdpmc_allowed; /* nonzero if rdpmc is allowed */
|
atomic_t perf_rdpmc_allowed; /* nonzero if rdpmc is allowed */
|
||||||
|
#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
|
||||||
|
/*
|
||||||
|
* One bit per protection key says whether userspace can
|
||||||
|
* use it or not. protected by mmap_sem.
|
||||||
|
*/
|
||||||
|
u16 pkey_allocation_map;
|
||||||
|
s16 execute_only_pkey;
|
||||||
|
#endif
|
||||||
} mm_context_t;
|
} mm_context_t;
|
||||||
|
|
||||||
#ifdef CONFIG_SMP
|
#ifdef CONFIG_SMP
|
||||||
|
|
|
@ -4,6 +4,7 @@
|
||||||
#include <asm/desc.h>
|
#include <asm/desc.h>
|
||||||
#include <linux/atomic.h>
|
#include <linux/atomic.h>
|
||||||
#include <linux/mm_types.h>
|
#include <linux/mm_types.h>
|
||||||
|
#include <linux/pkeys.h>
|
||||||
|
|
||||||
#include <trace/events/tlb.h>
|
#include <trace/events/tlb.h>
|
||||||
|
|
||||||
|
@ -107,7 +108,16 @@ static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
|
||||||
static inline int init_new_context(struct task_struct *tsk,
|
static inline int init_new_context(struct task_struct *tsk,
|
||||||
struct mm_struct *mm)
|
struct mm_struct *mm)
|
||||||
{
|
{
|
||||||
|
#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
|
||||||
|
if (cpu_feature_enabled(X86_FEATURE_OSPKE)) {
|
||||||
|
/* pkey 0 is the default and always allocated */
|
||||||
|
mm->context.pkey_allocation_map = 0x1;
|
||||||
|
/* -1 means unallocated or invalid */
|
||||||
|
mm->context.execute_only_pkey = -1;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
init_new_context_ldt(tsk, mm);
|
init_new_context_ldt(tsk, mm);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
static inline void destroy_context(struct mm_struct *mm)
|
static inline void destroy_context(struct mm_struct *mm)
|
||||||
|
@ -195,16 +205,20 @@ static inline void arch_unmap(struct mm_struct *mm, struct vm_area_struct *vma,
|
||||||
mpx_notify_unmap(mm, vma, start, end);
|
mpx_notify_unmap(mm, vma, start, end);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
|
||||||
static inline int vma_pkey(struct vm_area_struct *vma)
|
static inline int vma_pkey(struct vm_area_struct *vma)
|
||||||
{
|
{
|
||||||
u16 pkey = 0;
|
|
||||||
#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
|
|
||||||
unsigned long vma_pkey_mask = VM_PKEY_BIT0 | VM_PKEY_BIT1 |
|
unsigned long vma_pkey_mask = VM_PKEY_BIT0 | VM_PKEY_BIT1 |
|
||||||
VM_PKEY_BIT2 | VM_PKEY_BIT3;
|
VM_PKEY_BIT2 | VM_PKEY_BIT3;
|
||||||
pkey = (vma->vm_flags & vma_pkey_mask) >> VM_PKEY_SHIFT;
|
|
||||||
#endif
|
return (vma->vm_flags & vma_pkey_mask) >> VM_PKEY_SHIFT;
|
||||||
return pkey;
|
|
||||||
}
|
}
|
||||||
|
#else
|
||||||
|
static inline int vma_pkey(struct vm_area_struct *vma)
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
static inline bool __pkru_allows_pkey(u16 pkey, bool write)
|
static inline bool __pkru_allows_pkey(u16 pkey, bool write)
|
||||||
{
|
{
|
||||||
|
@ -258,5 +272,4 @@ static inline bool arch_pte_access_permitted(pte_t pte, bool write)
|
||||||
{
|
{
|
||||||
return __pkru_allows_pkey(pte_flags_pkey(pte_flags(pte)), write);
|
return __pkru_allows_pkey(pte_flags_pkey(pte_flags(pte)), write);
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif /* _ASM_X86_MMU_CONTEXT_H */
|
#endif /* _ASM_X86_MMU_CONTEXT_H */
|
||||||
|
|
|
@ -10,7 +10,6 @@ extern int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
|
||||||
* Try to dedicate one of the protection keys to be used as an
|
* Try to dedicate one of the protection keys to be used as an
|
||||||
* execute-only protection key.
|
* execute-only protection key.
|
||||||
*/
|
*/
|
||||||
#define PKEY_DEDICATED_EXECUTE_ONLY 15
|
|
||||||
extern int __execute_only_pkey(struct mm_struct *mm);
|
extern int __execute_only_pkey(struct mm_struct *mm);
|
||||||
static inline int execute_only_pkey(struct mm_struct *mm)
|
static inline int execute_only_pkey(struct mm_struct *mm)
|
||||||
{
|
{
|
||||||
|
@ -31,4 +30,76 @@ static inline int arch_override_mprotect_pkey(struct vm_area_struct *vma,
|
||||||
return __arch_override_mprotect_pkey(vma, prot, pkey);
|
return __arch_override_mprotect_pkey(vma, prot, pkey);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
extern int __arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
|
||||||
|
unsigned long init_val);
|
||||||
|
|
||||||
|
#define ARCH_VM_PKEY_FLAGS (VM_PKEY_BIT0 | VM_PKEY_BIT1 | VM_PKEY_BIT2 | VM_PKEY_BIT3)
|
||||||
|
|
||||||
|
#define mm_pkey_allocation_map(mm) (mm->context.pkey_allocation_map)
|
||||||
|
#define mm_set_pkey_allocated(mm, pkey) do { \
|
||||||
|
mm_pkey_allocation_map(mm) |= (1U << pkey); \
|
||||||
|
} while (0)
|
||||||
|
#define mm_set_pkey_free(mm, pkey) do { \
|
||||||
|
mm_pkey_allocation_map(mm) &= ~(1U << pkey); \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
static inline
|
||||||
|
bool mm_pkey_is_allocated(struct mm_struct *mm, int pkey)
|
||||||
|
{
|
||||||
|
return mm_pkey_allocation_map(mm) & (1U << pkey);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Returns a positive, 4-bit key on success, or -1 on failure.
|
||||||
|
*/
|
||||||
|
static inline
|
||||||
|
int mm_pkey_alloc(struct mm_struct *mm)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* Note: this is the one and only place we make sure
|
||||||
|
* that the pkey is valid as far as the hardware is
|
||||||
|
* concerned. The rest of the kernel trusts that
|
||||||
|
* only good, valid pkeys come out of here.
|
||||||
|
*/
|
||||||
|
u16 all_pkeys_mask = ((1U << arch_max_pkey()) - 1);
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Are we out of pkeys? We must handle this specially
|
||||||
|
* because ffz() behavior is undefined if there are no
|
||||||
|
* zeros.
|
||||||
|
*/
|
||||||
|
if (mm_pkey_allocation_map(mm) == all_pkeys_mask)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
ret = ffz(mm_pkey_allocation_map(mm));
|
||||||
|
|
||||||
|
mm_set_pkey_allocated(mm, ret);
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline
|
||||||
|
int mm_pkey_free(struct mm_struct *mm, int pkey)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* pkey 0 is special, always allocated and can never
|
||||||
|
* be freed.
|
||||||
|
*/
|
||||||
|
if (!pkey)
|
||||||
|
return -EINVAL;
|
||||||
|
if (!mm_pkey_is_allocated(mm, pkey))
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
mm_set_pkey_free(mm, pkey);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
extern int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
|
||||||
|
unsigned long init_val);
|
||||||
|
extern int __arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
|
||||||
|
unsigned long init_val);
|
||||||
|
extern void copy_init_pkru_to_fpregs(void);
|
||||||
|
|
||||||
#endif /*_ASM_X86_PKEYS_H */
|
#endif /*_ASM_X86_PKEYS_H */
|
||||||
|
|
|
@ -12,6 +12,7 @@
|
||||||
#include <asm/traps.h>
|
#include <asm/traps.h>
|
||||||
|
|
||||||
#include <linux/hardirq.h>
|
#include <linux/hardirq.h>
|
||||||
|
#include <linux/pkeys.h>
|
||||||
|
|
||||||
#define CREATE_TRACE_POINTS
|
#define CREATE_TRACE_POINTS
|
||||||
#include <asm/trace/fpu.h>
|
#include <asm/trace/fpu.h>
|
||||||
|
@ -505,6 +506,9 @@ static inline void copy_init_fpstate_to_fpregs(void)
|
||||||
copy_kernel_to_fxregs(&init_fpstate.fxsave);
|
copy_kernel_to_fxregs(&init_fpstate.fxsave);
|
||||||
else
|
else
|
||||||
copy_kernel_to_fregs(&init_fpstate.fsave);
|
copy_kernel_to_fregs(&init_fpstate.fsave);
|
||||||
|
|
||||||
|
if (boot_cpu_has(X86_FEATURE_OSPKE))
|
||||||
|
copy_init_pkru_to_fpregs();
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
|
@ -5,6 +5,7 @@
|
||||||
*/
|
*/
|
||||||
#include <linux/compat.h>
|
#include <linux/compat.h>
|
||||||
#include <linux/cpu.h>
|
#include <linux/cpu.h>
|
||||||
|
#include <linux/mman.h>
|
||||||
#include <linux/pkeys.h>
|
#include <linux/pkeys.h>
|
||||||
|
|
||||||
#include <asm/fpu/api.h>
|
#include <asm/fpu/api.h>
|
||||||
|
@ -866,9 +867,10 @@ const void *get_xsave_field_ptr(int xsave_state)
|
||||||
return get_xsave_addr(&fpu->state.xsave, xsave_state);
|
return get_xsave_addr(&fpu->state.xsave, xsave_state);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef CONFIG_ARCH_HAS_PKEYS
|
||||||
|
|
||||||
#define NR_VALID_PKRU_BITS (CONFIG_NR_PROTECTION_KEYS * 2)
|
#define NR_VALID_PKRU_BITS (CONFIG_NR_PROTECTION_KEYS * 2)
|
||||||
#define PKRU_VALID_MASK (NR_VALID_PKRU_BITS - 1)
|
#define PKRU_VALID_MASK (NR_VALID_PKRU_BITS - 1)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This will go out and modify PKRU register to set the access
|
* This will go out and modify PKRU register to set the access
|
||||||
* rights for @pkey to @init_val.
|
* rights for @pkey to @init_val.
|
||||||
|
@ -914,6 +916,7 @@ int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
#endif /* ! CONFIG_ARCH_HAS_PKEYS */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This is similar to user_regset_copyout(), but will not add offset to
|
* This is similar to user_regset_copyout(), but will not add offset to
|
||||||
|
|
|
@ -109,12 +109,13 @@ void __show_regs(struct pt_regs *regs, int all)
|
||||||
get_debugreg(d7, 7);
|
get_debugreg(d7, 7);
|
||||||
|
|
||||||
/* Only print out debug registers if they are in their non-default state. */
|
/* Only print out debug registers if they are in their non-default state. */
|
||||||
if ((d0 == 0) && (d1 == 0) && (d2 == 0) && (d3 == 0) &&
|
if (!((d0 == 0) && (d1 == 0) && (d2 == 0) && (d3 == 0) &&
|
||||||
(d6 == DR6_RESERVED) && (d7 == 0x400))
|
(d6 == DR6_RESERVED) && (d7 == 0x400))) {
|
||||||
return;
|
printk(KERN_DEFAULT "DR0: %016lx DR1: %016lx DR2: %016lx\n",
|
||||||
|
d0, d1, d2);
|
||||||
printk(KERN_DEFAULT "DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2);
|
printk(KERN_DEFAULT "DR3: %016lx DR6: %016lx DR7: %016lx\n",
|
||||||
printk(KERN_DEFAULT "DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7);
|
d3, d6, d7);
|
||||||
|
}
|
||||||
|
|
||||||
if (boot_cpu_has(X86_FEATURE_OSPKE))
|
if (boot_cpu_has(X86_FEATURE_OSPKE))
|
||||||
printk(KERN_DEFAULT "PKRU: %08x\n", read_pkru());
|
printk(KERN_DEFAULT "PKRU: %08x\n", read_pkru());
|
||||||
|
|
|
@ -1144,6 +1144,15 @@ access_error(unsigned long error_code, struct vm_area_struct *vma)
|
||||||
{
|
{
|
||||||
/* This is only called for the current mm, so: */
|
/* This is only called for the current mm, so: */
|
||||||
bool foreign = false;
|
bool foreign = false;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Read or write was blocked by protection keys. This is
|
||||||
|
* always an unconditional error and can never result in
|
||||||
|
* a follow-up action to resolve the fault, like a COW.
|
||||||
|
*/
|
||||||
|
if (error_code & PF_PK)
|
||||||
|
return 1;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Make sure to check the VMA so that we do not perform
|
* Make sure to check the VMA so that we do not perform
|
||||||
* faults just to hit a PF_PK as soon as we fill in a
|
* faults just to hit a PF_PK as soon as we fill in a
|
||||||
|
|
|
@ -11,6 +11,7 @@
|
||||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||||
* more details.
|
* more details.
|
||||||
*/
|
*/
|
||||||
|
#include <linux/debugfs.h> /* debugfs_create_u32() */
|
||||||
#include <linux/mm_types.h> /* mm_struct, vma, etc... */
|
#include <linux/mm_types.h> /* mm_struct, vma, etc... */
|
||||||
#include <linux/pkeys.h> /* PKEY_* */
|
#include <linux/pkeys.h> /* PKEY_* */
|
||||||
#include <uapi/asm-generic/mman-common.h>
|
#include <uapi/asm-generic/mman-common.h>
|
||||||
|
@ -21,8 +22,19 @@
|
||||||
|
|
||||||
int __execute_only_pkey(struct mm_struct *mm)
|
int __execute_only_pkey(struct mm_struct *mm)
|
||||||
{
|
{
|
||||||
|
bool need_to_set_mm_pkey = false;
|
||||||
|
int execute_only_pkey = mm->context.execute_only_pkey;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
|
/* Do we need to assign a pkey for mm's execute-only maps? */
|
||||||
|
if (execute_only_pkey == -1) {
|
||||||
|
/* Go allocate one to use, which might fail */
|
||||||
|
execute_only_pkey = mm_pkey_alloc(mm);
|
||||||
|
if (execute_only_pkey < 0)
|
||||||
|
return -1;
|
||||||
|
need_to_set_mm_pkey = true;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We do not want to go through the relatively costly
|
* We do not want to go through the relatively costly
|
||||||
* dance to set PKRU if we do not need to. Check it
|
* dance to set PKRU if we do not need to. Check it
|
||||||
|
@ -32,22 +44,33 @@ int __execute_only_pkey(struct mm_struct *mm)
|
||||||
* can make fpregs inactive.
|
* can make fpregs inactive.
|
||||||
*/
|
*/
|
||||||
preempt_disable();
|
preempt_disable();
|
||||||
if (fpregs_active() &&
|
if (!need_to_set_mm_pkey &&
|
||||||
!__pkru_allows_read(read_pkru(), PKEY_DEDICATED_EXECUTE_ONLY)) {
|
fpregs_active() &&
|
||||||
|
!__pkru_allows_read(read_pkru(), execute_only_pkey)) {
|
||||||
preempt_enable();
|
preempt_enable();
|
||||||
return PKEY_DEDICATED_EXECUTE_ONLY;
|
return execute_only_pkey;
|
||||||
}
|
}
|
||||||
preempt_enable();
|
preempt_enable();
|
||||||
ret = arch_set_user_pkey_access(current, PKEY_DEDICATED_EXECUTE_ONLY,
|
|
||||||
|
/*
|
||||||
|
* Set up PKRU so that it denies access for everything
|
||||||
|
* other than execution.
|
||||||
|
*/
|
||||||
|
ret = arch_set_user_pkey_access(current, execute_only_pkey,
|
||||||
PKEY_DISABLE_ACCESS);
|
PKEY_DISABLE_ACCESS);
|
||||||
/*
|
/*
|
||||||
* If the PKRU-set operation failed somehow, just return
|
* If the PKRU-set operation failed somehow, just return
|
||||||
* 0 and effectively disable execute-only support.
|
* 0 and effectively disable execute-only support.
|
||||||
*/
|
*/
|
||||||
if (ret)
|
if (ret) {
|
||||||
return 0;
|
mm_set_pkey_free(mm, execute_only_pkey);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
return PKEY_DEDICATED_EXECUTE_ONLY;
|
/* We got one, store it and use it from here on out */
|
||||||
|
if (need_to_set_mm_pkey)
|
||||||
|
mm->context.execute_only_pkey = execute_only_pkey;
|
||||||
|
return execute_only_pkey;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline bool vma_is_pkey_exec_only(struct vm_area_struct *vma)
|
static inline bool vma_is_pkey_exec_only(struct vm_area_struct *vma)
|
||||||
|
@ -55,7 +78,7 @@ static inline bool vma_is_pkey_exec_only(struct vm_area_struct *vma)
|
||||||
/* Do this check first since the vm_flags should be hot */
|
/* Do this check first since the vm_flags should be hot */
|
||||||
if ((vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC)) != VM_EXEC)
|
if ((vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC)) != VM_EXEC)
|
||||||
return false;
|
return false;
|
||||||
if (vma_pkey(vma) != PKEY_DEDICATED_EXECUTE_ONLY)
|
if (vma_pkey(vma) != vma->vm_mm->context.execute_only_pkey)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
|
@ -99,3 +122,106 @@ int __arch_override_mprotect_pkey(struct vm_area_struct *vma, int prot, int pkey
|
||||||
*/
|
*/
|
||||||
return vma_pkey(vma);
|
return vma_pkey(vma);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define PKRU_AD_KEY(pkey) (PKRU_AD_BIT << ((pkey) * PKRU_BITS_PER_PKEY))
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Make the default PKRU value (at execve() time) as restrictive
|
||||||
|
* as possible. This ensures that any threads clone()'d early
|
||||||
|
* in the process's lifetime will not accidentally get access
|
||||||
|
* to data which is pkey-protected later on.
|
||||||
|
*/
|
||||||
|
u32 init_pkru_value = PKRU_AD_KEY( 1) | PKRU_AD_KEY( 2) | PKRU_AD_KEY( 3) |
|
||||||
|
PKRU_AD_KEY( 4) | PKRU_AD_KEY( 5) | PKRU_AD_KEY( 6) |
|
||||||
|
PKRU_AD_KEY( 7) | PKRU_AD_KEY( 8) | PKRU_AD_KEY( 9) |
|
||||||
|
PKRU_AD_KEY(10) | PKRU_AD_KEY(11) | PKRU_AD_KEY(12) |
|
||||||
|
PKRU_AD_KEY(13) | PKRU_AD_KEY(14) | PKRU_AD_KEY(15);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Called from the FPU code when creating a fresh set of FPU
|
||||||
|
* registers. This is called from a very specific context where
|
||||||
|
* we know the FPU regstiers are safe for use and we can use PKRU
|
||||||
|
* directly. The fact that PKRU is only available when we are
|
||||||
|
* using eagerfpu mode makes this possible.
|
||||||
|
*/
|
||||||
|
void copy_init_pkru_to_fpregs(void)
|
||||||
|
{
|
||||||
|
u32 init_pkru_value_snapshot = READ_ONCE(init_pkru_value);
|
||||||
|
/*
|
||||||
|
* Any write to PKRU takes it out of the XSAVE 'init
|
||||||
|
* state' which increases context switch cost. Avoid
|
||||||
|
* writing 0 when PKRU was already 0.
|
||||||
|
*/
|
||||||
|
if (!init_pkru_value_snapshot && !read_pkru())
|
||||||
|
return;
|
||||||
|
/*
|
||||||
|
* Override the PKRU state that came from 'init_fpstate'
|
||||||
|
* with the baseline from the process.
|
||||||
|
*/
|
||||||
|
write_pkru(init_pkru_value_snapshot);
|
||||||
|
}
|
||||||
|
|
||||||
|
static ssize_t init_pkru_read_file(struct file *file, char __user *user_buf,
|
||||||
|
size_t count, loff_t *ppos)
|
||||||
|
{
|
||||||
|
char buf[32];
|
||||||
|
unsigned int len;
|
||||||
|
|
||||||
|
len = sprintf(buf, "0x%x\n", init_pkru_value);
|
||||||
|
return simple_read_from_buffer(user_buf, count, ppos, buf, len);
|
||||||
|
}
|
||||||
|
|
||||||
|
static ssize_t init_pkru_write_file(struct file *file,
|
||||||
|
const char __user *user_buf, size_t count, loff_t *ppos)
|
||||||
|
{
|
||||||
|
char buf[32];
|
||||||
|
ssize_t len;
|
||||||
|
u32 new_init_pkru;
|
||||||
|
|
||||||
|
len = min(count, sizeof(buf) - 1);
|
||||||
|
if (copy_from_user(buf, user_buf, len))
|
||||||
|
return -EFAULT;
|
||||||
|
|
||||||
|
/* Make the buffer a valid string that we can not overrun */
|
||||||
|
buf[len] = '\0';
|
||||||
|
if (kstrtouint(buf, 0, &new_init_pkru))
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Don't allow insane settings that will blow the system
|
||||||
|
* up immediately if someone attempts to disable access
|
||||||
|
* or writes to pkey 0.
|
||||||
|
*/
|
||||||
|
if (new_init_pkru & (PKRU_AD_BIT|PKRU_WD_BIT))
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
WRITE_ONCE(init_pkru_value, new_init_pkru);
|
||||||
|
return count;
|
||||||
|
}
|
||||||
|
|
||||||
|
static const struct file_operations fops_init_pkru = {
|
||||||
|
.read = init_pkru_read_file,
|
||||||
|
.write = init_pkru_write_file,
|
||||||
|
.llseek = default_llseek,
|
||||||
|
};
|
||||||
|
|
||||||
|
static int __init create_init_pkru_value(void)
|
||||||
|
{
|
||||||
|
debugfs_create_file("init_pkru", S_IRUSR | S_IWUSR,
|
||||||
|
arch_debugfs_dir, NULL, &fops_init_pkru);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
late_initcall(create_init_pkru_value);
|
||||||
|
|
||||||
|
static __init int setup_init_pkru(char *opt)
|
||||||
|
{
|
||||||
|
u32 new_init_pkru;
|
||||||
|
|
||||||
|
if (kstrtouint(opt, 0, &new_init_pkru))
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
WRITE_ONCE(init_pkru_value, new_init_pkru);
|
||||||
|
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
__setup("init_pkru=", setup_init_pkru);
|
||||||
|
|
|
@ -117,4 +117,9 @@
|
||||||
#define MAP_HUGE_SHIFT 26
|
#define MAP_HUGE_SHIFT 26
|
||||||
#define MAP_HUGE_MASK 0x3f
|
#define MAP_HUGE_MASK 0x3f
|
||||||
|
|
||||||
|
#define PKEY_DISABLE_ACCESS 0x1
|
||||||
|
#define PKEY_DISABLE_WRITE 0x2
|
||||||
|
#define PKEY_ACCESS_MASK (PKEY_DISABLE_ACCESS |\
|
||||||
|
PKEY_DISABLE_WRITE)
|
||||||
|
|
||||||
#endif /* _XTENSA_MMAN_H */
|
#endif /* _XTENSA_MMAN_H */
|
||||||
|
|
|
@ -4,11 +4,6 @@
|
||||||
#include <linux/mm_types.h>
|
#include <linux/mm_types.h>
|
||||||
#include <asm/mmu_context.h>
|
#include <asm/mmu_context.h>
|
||||||
|
|
||||||
#define PKEY_DISABLE_ACCESS 0x1
|
|
||||||
#define PKEY_DISABLE_WRITE 0x2
|
|
||||||
#define PKEY_ACCESS_MASK (PKEY_DISABLE_ACCESS |\
|
|
||||||
PKEY_DISABLE_WRITE)
|
|
||||||
|
|
||||||
#ifdef CONFIG_ARCH_HAS_PKEYS
|
#ifdef CONFIG_ARCH_HAS_PKEYS
|
||||||
#include <asm/pkeys.h>
|
#include <asm/pkeys.h>
|
||||||
#else /* ! CONFIG_ARCH_HAS_PKEYS */
|
#else /* ! CONFIG_ARCH_HAS_PKEYS */
|
||||||
|
@ -16,18 +11,34 @@
|
||||||
#define execute_only_pkey(mm) (0)
|
#define execute_only_pkey(mm) (0)
|
||||||
#define arch_override_mprotect_pkey(vma, prot, pkey) (0)
|
#define arch_override_mprotect_pkey(vma, prot, pkey) (0)
|
||||||
#define PKEY_DEDICATED_EXECUTE_ONLY 0
|
#define PKEY_DEDICATED_EXECUTE_ONLY 0
|
||||||
#endif /* ! CONFIG_ARCH_HAS_PKEYS */
|
#define ARCH_VM_PKEY_FLAGS 0
|
||||||
|
|
||||||
/*
|
static inline bool mm_pkey_is_allocated(struct mm_struct *mm, int pkey)
|
||||||
* This is called from mprotect_pkey().
|
|
||||||
*
|
|
||||||
* Returns true if the protection keys is valid.
|
|
||||||
*/
|
|
||||||
static inline bool validate_pkey(int pkey)
|
|
||||||
{
|
{
|
||||||
if (pkey < 0)
|
return (pkey == 0);
|
||||||
return false;
|
|
||||||
return (pkey < arch_max_pkey());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline int mm_pkey_alloc(struct mm_struct *mm)
|
||||||
|
{
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int mm_pkey_free(struct mm_struct *mm, int pkey)
|
||||||
|
{
|
||||||
|
WARN_ONCE(1, "free of protection key when disabled");
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
|
||||||
|
unsigned long init_val)
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void copy_init_pkru_to_fpregs(void)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* ! CONFIG_ARCH_HAS_PKEYS */
|
||||||
|
|
||||||
#endif /* _LINUX_PKEYS_H */
|
#endif /* _LINUX_PKEYS_H */
|
||||||
|
|
|
@ -898,4 +898,12 @@ asmlinkage long sys_copy_file_range(int fd_in, loff_t __user *off_in,
|
||||||
|
|
||||||
asmlinkage long sys_mlock2(unsigned long start, size_t len, int flags);
|
asmlinkage long sys_mlock2(unsigned long start, size_t len, int flags);
|
||||||
|
|
||||||
|
asmlinkage long sys_pkey_mprotect(unsigned long start, size_t len,
|
||||||
|
unsigned long prot, int pkey);
|
||||||
|
asmlinkage long sys_pkey_alloc(unsigned long flags, unsigned long init_val);
|
||||||
|
asmlinkage long sys_pkey_free(int pkey);
|
||||||
|
//asmlinkage long sys_pkey_get(int pkey, unsigned long flags);
|
||||||
|
//asmlinkage long sys_pkey_set(int pkey, unsigned long access_rights,
|
||||||
|
// unsigned long flags);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -72,4 +72,9 @@
|
||||||
#define MAP_HUGE_SHIFT 26
|
#define MAP_HUGE_SHIFT 26
|
||||||
#define MAP_HUGE_MASK 0x3f
|
#define MAP_HUGE_MASK 0x3f
|
||||||
|
|
||||||
|
#define PKEY_DISABLE_ACCESS 0x1
|
||||||
|
#define PKEY_DISABLE_WRITE 0x2
|
||||||
|
#define PKEY_ACCESS_MASK (PKEY_DISABLE_ACCESS |\
|
||||||
|
PKEY_DISABLE_WRITE)
|
||||||
|
|
||||||
#endif /* __ASM_GENERIC_MMAN_COMMON_H */
|
#endif /* __ASM_GENERIC_MMAN_COMMON_H */
|
||||||
|
|
|
@ -724,9 +724,19 @@ __SYSCALL(__NR_copy_file_range, sys_copy_file_range)
|
||||||
__SC_COMP(__NR_preadv2, sys_preadv2, compat_sys_preadv2)
|
__SC_COMP(__NR_preadv2, sys_preadv2, compat_sys_preadv2)
|
||||||
#define __NR_pwritev2 287
|
#define __NR_pwritev2 287
|
||||||
__SC_COMP(__NR_pwritev2, sys_pwritev2, compat_sys_pwritev2)
|
__SC_COMP(__NR_pwritev2, sys_pwritev2, compat_sys_pwritev2)
|
||||||
|
#define __NR_pkey_mprotect 288
|
||||||
|
__SYSCALL(__NR_pkey_mprotect, sys_pkey_mprotect)
|
||||||
|
#define __NR_pkey_alloc 289
|
||||||
|
__SYSCALL(__NR_pkey_alloc, sys_pkey_alloc)
|
||||||
|
#define __NR_pkey_free 290
|
||||||
|
__SYSCALL(__NR_pkey_free, sys_pkey_free)
|
||||||
|
#define __NR_pkey_get 291
|
||||||
|
//__SYSCALL(__NR_pkey_get, sys_pkey_get)
|
||||||
|
#define __NR_pkey_set 292
|
||||||
|
//__SYSCALL(__NR_pkey_set, sys_pkey_set)
|
||||||
|
|
||||||
#undef __NR_syscalls
|
#undef __NR_syscalls
|
||||||
#define __NR_syscalls 288
|
#define __NR_syscalls 291
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* All syscalls below here should go away really,
|
* All syscalls below here should go away really,
|
||||||
|
|
|
@ -250,3 +250,8 @@ cond_syscall(sys_execveat);
|
||||||
|
|
||||||
/* membarrier */
|
/* membarrier */
|
||||||
cond_syscall(sys_membarrier);
|
cond_syscall(sys_membarrier);
|
||||||
|
|
||||||
|
/* memory protection keys */
|
||||||
|
cond_syscall(sys_pkey_mprotect);
|
||||||
|
cond_syscall(sys_pkey_alloc);
|
||||||
|
cond_syscall(sys_pkey_free);
|
||||||
|
|
|
@ -23,11 +23,13 @@
|
||||||
#include <linux/mmu_notifier.h>
|
#include <linux/mmu_notifier.h>
|
||||||
#include <linux/migrate.h>
|
#include <linux/migrate.h>
|
||||||
#include <linux/perf_event.h>
|
#include <linux/perf_event.h>
|
||||||
|
#include <linux/pkeys.h>
|
||||||
#include <linux/ksm.h>
|
#include <linux/ksm.h>
|
||||||
#include <linux/pkeys.h>
|
#include <linux/pkeys.h>
|
||||||
#include <asm/uaccess.h>
|
#include <asm/uaccess.h>
|
||||||
#include <asm/pgtable.h>
|
#include <asm/pgtable.h>
|
||||||
#include <asm/cacheflush.h>
|
#include <asm/cacheflush.h>
|
||||||
|
#include <asm/mmu_context.h>
|
||||||
#include <asm/tlbflush.h>
|
#include <asm/tlbflush.h>
|
||||||
|
|
||||||
#include "internal.h"
|
#include "internal.h"
|
||||||
|
@ -353,8 +355,11 @@ fail:
|
||||||
return error;
|
return error;
|
||||||
}
|
}
|
||||||
|
|
||||||
SYSCALL_DEFINE3(mprotect, unsigned long, start, size_t, len,
|
/*
|
||||||
unsigned long, prot)
|
* pkey==-1 when doing a legacy mprotect()
|
||||||
|
*/
|
||||||
|
static int do_mprotect_pkey(unsigned long start, size_t len,
|
||||||
|
unsigned long prot, int pkey)
|
||||||
{
|
{
|
||||||
unsigned long nstart, end, tmp, reqprot;
|
unsigned long nstart, end, tmp, reqprot;
|
||||||
struct vm_area_struct *vma, *prev;
|
struct vm_area_struct *vma, *prev;
|
||||||
|
@ -383,6 +388,14 @@ SYSCALL_DEFINE3(mprotect, unsigned long, start, size_t, len,
|
||||||
if (down_write_killable(¤t->mm->mmap_sem))
|
if (down_write_killable(¤t->mm->mmap_sem))
|
||||||
return -EINTR;
|
return -EINTR;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If userspace did not allocate the pkey, do not let
|
||||||
|
* them use it here.
|
||||||
|
*/
|
||||||
|
error = -EINVAL;
|
||||||
|
if ((pkey != -1) && !mm_pkey_is_allocated(current->mm, pkey))
|
||||||
|
goto out;
|
||||||
|
|
||||||
vma = find_vma(current->mm, start);
|
vma = find_vma(current->mm, start);
|
||||||
error = -ENOMEM;
|
error = -ENOMEM;
|
||||||
if (!vma)
|
if (!vma)
|
||||||
|
@ -409,8 +422,9 @@ SYSCALL_DEFINE3(mprotect, unsigned long, start, size_t, len,
|
||||||
prev = vma;
|
prev = vma;
|
||||||
|
|
||||||
for (nstart = start ; ; ) {
|
for (nstart = start ; ; ) {
|
||||||
|
unsigned long mask_off_old_flags;
|
||||||
unsigned long newflags;
|
unsigned long newflags;
|
||||||
int pkey = arch_override_mprotect_pkey(vma, prot, -1);
|
int new_vma_pkey;
|
||||||
|
|
||||||
/* Here we know that vma->vm_start <= nstart < vma->vm_end. */
|
/* Here we know that vma->vm_start <= nstart < vma->vm_end. */
|
||||||
|
|
||||||
|
@ -418,8 +432,17 @@ SYSCALL_DEFINE3(mprotect, unsigned long, start, size_t, len,
|
||||||
if (rier && (vma->vm_flags & VM_MAYEXEC))
|
if (rier && (vma->vm_flags & VM_MAYEXEC))
|
||||||
prot |= PROT_EXEC;
|
prot |= PROT_EXEC;
|
||||||
|
|
||||||
newflags = calc_vm_prot_bits(prot, pkey);
|
/*
|
||||||
newflags |= (vma->vm_flags & ~(VM_READ | VM_WRITE | VM_EXEC));
|
* Each mprotect() call explicitly passes r/w/x permissions.
|
||||||
|
* If a permission is not passed to mprotect(), it must be
|
||||||
|
* cleared from the VMA.
|
||||||
|
*/
|
||||||
|
mask_off_old_flags = VM_READ | VM_WRITE | VM_EXEC |
|
||||||
|
ARCH_VM_PKEY_FLAGS;
|
||||||
|
|
||||||
|
new_vma_pkey = arch_override_mprotect_pkey(vma, prot, pkey);
|
||||||
|
newflags = calc_vm_prot_bits(prot, new_vma_pkey);
|
||||||
|
newflags |= (vma->vm_flags & ~mask_off_old_flags);
|
||||||
|
|
||||||
/* newflags >> 4 shift VM_MAY% in place of VM_% */
|
/* newflags >> 4 shift VM_MAY% in place of VM_% */
|
||||||
if ((newflags & ~(newflags >> 4)) & (VM_READ | VM_WRITE | VM_EXEC)) {
|
if ((newflags & ~(newflags >> 4)) & (VM_READ | VM_WRITE | VM_EXEC)) {
|
||||||
|
@ -455,3 +478,60 @@ out:
|
||||||
up_write(¤t->mm->mmap_sem);
|
up_write(¤t->mm->mmap_sem);
|
||||||
return error;
|
return error;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
SYSCALL_DEFINE3(mprotect, unsigned long, start, size_t, len,
|
||||||
|
unsigned long, prot)
|
||||||
|
{
|
||||||
|
return do_mprotect_pkey(start, len, prot, -1);
|
||||||
|
}
|
||||||
|
|
||||||
|
SYSCALL_DEFINE4(pkey_mprotect, unsigned long, start, size_t, len,
|
||||||
|
unsigned long, prot, int, pkey)
|
||||||
|
{
|
||||||
|
return do_mprotect_pkey(start, len, prot, pkey);
|
||||||
|
}
|
||||||
|
|
||||||
|
SYSCALL_DEFINE2(pkey_alloc, unsigned long, flags, unsigned long, init_val)
|
||||||
|
{
|
||||||
|
int pkey;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
/* No flags supported yet. */
|
||||||
|
if (flags)
|
||||||
|
return -EINVAL;
|
||||||
|
/* check for unsupported init values */
|
||||||
|
if (init_val & ~PKEY_ACCESS_MASK)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
down_write(¤t->mm->mmap_sem);
|
||||||
|
pkey = mm_pkey_alloc(current->mm);
|
||||||
|
|
||||||
|
ret = -ENOSPC;
|
||||||
|
if (pkey == -1)
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
ret = arch_set_user_pkey_access(current, pkey, init_val);
|
||||||
|
if (ret) {
|
||||||
|
mm_pkey_free(current->mm, pkey);
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
ret = pkey;
|
||||||
|
out:
|
||||||
|
up_write(¤t->mm->mmap_sem);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
SYSCALL_DEFINE1(pkey_free, int, pkey)
|
||||||
|
{
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
down_write(¤t->mm->mmap_sem);
|
||||||
|
ret = mm_pkey_free(current->mm, pkey);
|
||||||
|
up_write(¤t->mm->mmap_sem);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We could provie warnings or errors if any VMA still
|
||||||
|
* has the pkey set here.
|
||||||
|
*/
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
|
@ -5,7 +5,8 @@ include ../lib.mk
|
||||||
.PHONY: all all_32 all_64 warn_32bit_failure clean
|
.PHONY: all all_32 all_64 warn_32bit_failure clean
|
||||||
|
|
||||||
TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt ptrace_syscall test_mremap_vdso \
|
TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt ptrace_syscall test_mremap_vdso \
|
||||||
check_initial_reg_state sigreturn ldt_gdt iopl mpx-mini-test
|
check_initial_reg_state sigreturn ldt_gdt iopl \
|
||||||
|
protection_keys
|
||||||
TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \
|
TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \
|
||||||
test_FCMOV test_FCOMI test_FISTTP \
|
test_FCMOV test_FCOMI test_FISTTP \
|
||||||
vdso_restorer
|
vdso_restorer
|
||||||
|
|
|
@ -0,0 +1,219 @@
|
||||||
|
#ifndef _PKEYS_HELPER_H
|
||||||
|
#define _PKEYS_HELPER_H
|
||||||
|
#define _GNU_SOURCE
|
||||||
|
#include <string.h>
|
||||||
|
#include <stdarg.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <stdbool.h>
|
||||||
|
#include <signal.h>
|
||||||
|
#include <assert.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <ucontext.h>
|
||||||
|
#include <sys/mman.h>
|
||||||
|
|
||||||
|
#define NR_PKEYS 16
|
||||||
|
#define PKRU_BITS_PER_PKEY 2
|
||||||
|
|
||||||
|
#ifndef DEBUG_LEVEL
|
||||||
|
#define DEBUG_LEVEL 0
|
||||||
|
#endif
|
||||||
|
#define DPRINT_IN_SIGNAL_BUF_SIZE 4096
|
||||||
|
extern int dprint_in_signal;
|
||||||
|
extern char dprint_in_signal_buffer[DPRINT_IN_SIGNAL_BUF_SIZE];
|
||||||
|
static inline void sigsafe_printf(const char *format, ...)
|
||||||
|
{
|
||||||
|
va_list ap;
|
||||||
|
|
||||||
|
va_start(ap, format);
|
||||||
|
if (!dprint_in_signal) {
|
||||||
|
vprintf(format, ap);
|
||||||
|
} else {
|
||||||
|
int len = vsnprintf(dprint_in_signal_buffer,
|
||||||
|
DPRINT_IN_SIGNAL_BUF_SIZE,
|
||||||
|
format, ap);
|
||||||
|
/*
|
||||||
|
* len is amount that would have been printed,
|
||||||
|
* but actual write is truncated at BUF_SIZE.
|
||||||
|
*/
|
||||||
|
if (len > DPRINT_IN_SIGNAL_BUF_SIZE)
|
||||||
|
len = DPRINT_IN_SIGNAL_BUF_SIZE;
|
||||||
|
write(1, dprint_in_signal_buffer, len);
|
||||||
|
}
|
||||||
|
va_end(ap);
|
||||||
|
}
|
||||||
|
#define dprintf_level(level, args...) do { \
|
||||||
|
if (level <= DEBUG_LEVEL) \
|
||||||
|
sigsafe_printf(args); \
|
||||||
|
fflush(NULL); \
|
||||||
|
} while (0)
|
||||||
|
#define dprintf0(args...) dprintf_level(0, args)
|
||||||
|
#define dprintf1(args...) dprintf_level(1, args)
|
||||||
|
#define dprintf2(args...) dprintf_level(2, args)
|
||||||
|
#define dprintf3(args...) dprintf_level(3, args)
|
||||||
|
#define dprintf4(args...) dprintf_level(4, args)
|
||||||
|
|
||||||
|
extern unsigned int shadow_pkru;
|
||||||
|
static inline unsigned int __rdpkru(void)
|
||||||
|
{
|
||||||
|
unsigned int eax, edx;
|
||||||
|
unsigned int ecx = 0;
|
||||||
|
unsigned int pkru;
|
||||||
|
|
||||||
|
asm volatile(".byte 0x0f,0x01,0xee\n\t"
|
||||||
|
: "=a" (eax), "=d" (edx)
|
||||||
|
: "c" (ecx));
|
||||||
|
pkru = eax;
|
||||||
|
return pkru;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline unsigned int _rdpkru(int line)
|
||||||
|
{
|
||||||
|
unsigned int pkru = __rdpkru();
|
||||||
|
|
||||||
|
dprintf4("rdpkru(line=%d) pkru: %x shadow: %x\n",
|
||||||
|
line, pkru, shadow_pkru);
|
||||||
|
assert(pkru == shadow_pkru);
|
||||||
|
|
||||||
|
return pkru;
|
||||||
|
}
|
||||||
|
|
||||||
|
#define rdpkru() _rdpkru(__LINE__)
|
||||||
|
|
||||||
|
static inline void __wrpkru(unsigned int pkru)
|
||||||
|
{
|
||||||
|
unsigned int eax = pkru;
|
||||||
|
unsigned int ecx = 0;
|
||||||
|
unsigned int edx = 0;
|
||||||
|
|
||||||
|
dprintf4("%s() changing %08x to %08x\n", __func__, __rdpkru(), pkru);
|
||||||
|
asm volatile(".byte 0x0f,0x01,0xef\n\t"
|
||||||
|
: : "a" (eax), "c" (ecx), "d" (edx));
|
||||||
|
assert(pkru == __rdpkru());
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void wrpkru(unsigned int pkru)
|
||||||
|
{
|
||||||
|
dprintf4("%s() changing %08x to %08x\n", __func__, __rdpkru(), pkru);
|
||||||
|
/* will do the shadow check for us: */
|
||||||
|
rdpkru();
|
||||||
|
__wrpkru(pkru);
|
||||||
|
shadow_pkru = pkru;
|
||||||
|
dprintf4("%s(%08x) pkru: %08x\n", __func__, pkru, __rdpkru());
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* These are technically racy. since something could
|
||||||
|
* change PKRU between the read and the write.
|
||||||
|
*/
|
||||||
|
static inline void __pkey_access_allow(int pkey, int do_allow)
|
||||||
|
{
|
||||||
|
unsigned int pkru = rdpkru();
|
||||||
|
int bit = pkey * 2;
|
||||||
|
|
||||||
|
if (do_allow)
|
||||||
|
pkru &= (1<<bit);
|
||||||
|
else
|
||||||
|
pkru |= (1<<bit);
|
||||||
|
|
||||||
|
dprintf4("pkru now: %08x\n", rdpkru());
|
||||||
|
wrpkru(pkru);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void __pkey_write_allow(int pkey, int do_allow_write)
|
||||||
|
{
|
||||||
|
long pkru = rdpkru();
|
||||||
|
int bit = pkey * 2 + 1;
|
||||||
|
|
||||||
|
if (do_allow_write)
|
||||||
|
pkru &= (1<<bit);
|
||||||
|
else
|
||||||
|
pkru |= (1<<bit);
|
||||||
|
|
||||||
|
wrpkru(pkru);
|
||||||
|
dprintf4("pkru now: %08x\n", rdpkru());
|
||||||
|
}
|
||||||
|
|
||||||
|
#define PROT_PKEY0 0x10 /* protection key value (bit 0) */
|
||||||
|
#define PROT_PKEY1 0x20 /* protection key value (bit 1) */
|
||||||
|
#define PROT_PKEY2 0x40 /* protection key value (bit 2) */
|
||||||
|
#define PROT_PKEY3 0x80 /* protection key value (bit 3) */
|
||||||
|
|
||||||
|
#define PAGE_SIZE 4096
|
||||||
|
#define MB (1<<20)
|
||||||
|
|
||||||
|
static inline void __cpuid(unsigned int *eax, unsigned int *ebx,
|
||||||
|
unsigned int *ecx, unsigned int *edx)
|
||||||
|
{
|
||||||
|
/* ecx is often an input as well as an output. */
|
||||||
|
asm volatile(
|
||||||
|
"cpuid;"
|
||||||
|
: "=a" (*eax),
|
||||||
|
"=b" (*ebx),
|
||||||
|
"=c" (*ecx),
|
||||||
|
"=d" (*edx)
|
||||||
|
: "0" (*eax), "2" (*ecx));
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx) */
|
||||||
|
#define X86_FEATURE_PKU (1<<3) /* Protection Keys for Userspace */
|
||||||
|
#define X86_FEATURE_OSPKE (1<<4) /* OS Protection Keys Enable */
|
||||||
|
|
||||||
|
static inline int cpu_has_pku(void)
|
||||||
|
{
|
||||||
|
unsigned int eax;
|
||||||
|
unsigned int ebx;
|
||||||
|
unsigned int ecx;
|
||||||
|
unsigned int edx;
|
||||||
|
|
||||||
|
eax = 0x7;
|
||||||
|
ecx = 0x0;
|
||||||
|
__cpuid(&eax, &ebx, &ecx, &edx);
|
||||||
|
|
||||||
|
if (!(ecx & X86_FEATURE_PKU)) {
|
||||||
|
dprintf2("cpu does not have PKU\n");
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
if (!(ecx & X86_FEATURE_OSPKE)) {
|
||||||
|
dprintf2("cpu does not have OSPKE\n");
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
#define XSTATE_PKRU_BIT (9)
|
||||||
|
#define XSTATE_PKRU 0x200
|
||||||
|
|
||||||
|
int pkru_xstate_offset(void)
|
||||||
|
{
|
||||||
|
unsigned int eax;
|
||||||
|
unsigned int ebx;
|
||||||
|
unsigned int ecx;
|
||||||
|
unsigned int edx;
|
||||||
|
int xstate_offset;
|
||||||
|
int xstate_size;
|
||||||
|
unsigned long XSTATE_CPUID = 0xd;
|
||||||
|
int leaf;
|
||||||
|
|
||||||
|
/* assume that XSTATE_PKRU is set in XCR0 */
|
||||||
|
leaf = XSTATE_PKRU_BIT;
|
||||||
|
{
|
||||||
|
eax = XSTATE_CPUID;
|
||||||
|
ecx = leaf;
|
||||||
|
__cpuid(&eax, &ebx, &ecx, &edx);
|
||||||
|
|
||||||
|
if (leaf == XSTATE_PKRU_BIT) {
|
||||||
|
xstate_offset = ebx;
|
||||||
|
xstate_size = eax;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (xstate_size == 0) {
|
||||||
|
printf("could not find size/offset of PKRU in xsave state\n");
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
return xstate_offset;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* _PKEYS_HELPER_H */
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue