Merge branch 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 fixes from Ingo Molnar:
 "Misc fixes and updates:

   - a handful of MDS documentation/comment updates

   - a cleanup related to hweight interfaces

   - a SEV guest fix for large pages

   - a kprobes LTO fix

   - and a final cleanup commit for vDSO HPET support removal"

* 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/speculation/mds: Improve CPU buffer clear documentation
  x86/speculation/mds: Revert CPU buffer clear on double fault exit
  x86/kconfig: Disable CONFIG_GENERIC_HWEIGHT and remove __HAVE_ARCH_SW_HWEIGHT
  x86/mm: Do not use set_{pud, pmd}_safe() when splitting a large page
  x86/kprobes: Make trampoline_handler() global and visible
  x86/vdso: Remove hpet_page from vDSO
This commit is contained in:
Linus Torvalds 2019-05-16 11:02:27 -07:00
commit d396360acd
11 changed files with 121 additions and 103 deletions

View File

@ -142,45 +142,13 @@ Mitigation points
mds_user_clear.
The mitigation is invoked in prepare_exit_to_usermode() which covers
most of the kernel to user space transitions. There are a few exceptions
which are not invoking prepare_exit_to_usermode() on return to user
space. These exceptions use the paranoid exit code.
all but one of the kernel to user space transitions. The exception
is when we return from a Non Maskable Interrupt (NMI), which is
handled directly in do_nmi().
- Non Maskable Interrupt (NMI):
Access to sensible data like keys, credentials in the NMI context is
mostly theoretical: The CPU can do prefetching or execute a
misspeculated code path and thereby fetching data which might end up
leaking through a buffer.
But for mounting other attacks the kernel stack address of the task is
already valuable information. So in full mitigation mode, the NMI is
mitigated on the return from do_nmi() to provide almost complete
coverage.
- Double fault (#DF):
A double fault is usually fatal, but the ESPFIX workaround, which can
be triggered from user space through modify_ldt(2) is a recoverable
double fault. #DF uses the paranoid exit path, so explicit mitigation
in the double fault handler is required.
- Machine Check Exception (#MC):
Another corner case is a #MC which hits between the CPU buffer clear
invocation and the actual return to user. As this still is in kernel
space it takes the paranoid exit path which does not clear the CPU
buffers. So the #MC handler repopulates the buffers to some
extent. Machine checks are not reliably controllable and the window is
extremly small so mitigation would just tick a checkbox that this
theoretical corner case is covered. To keep the amount of special
cases small, ignore #MC.
- Debug Exception (#DB):
This takes the paranoid exit path only when the INT1 breakpoint is in
kernel space. #DB on a user space address takes the regular exit path,
so no extra mitigation required.
(The reason that NMI is special is that prepare_exit_to_usermode() can
enable IRQs. In NMI context, NMIs are blocked, and we don't want to
enable IRQs with NMIs blocked.)
2. C-State transition

View File

@ -270,9 +270,6 @@ config GENERIC_BUG
config GENERIC_BUG_RELATIVE_POINTERS
bool
config GENERIC_HWEIGHT
def_bool y
config ARCH_MAY_HAVE_PC_FDC
def_bool y
depends on ISA_DMA_API

View File

@ -73,14 +73,12 @@ const char *outfilename;
enum {
sym_vvar_start,
sym_vvar_page,
sym_hpet_page,
sym_pvclock_page,
sym_hvclock_page,
};
const int special_pages[] = {
sym_vvar_page,
sym_hpet_page,
sym_pvclock_page,
sym_hvclock_page,
};
@ -93,7 +91,6 @@ struct vdso_sym {
struct vdso_sym required_syms[] = {
[sym_vvar_start] = {"vvar_start", true},
[sym_vvar_page] = {"vvar_page", true},
[sym_hpet_page] = {"hpet_page", true},
[sym_pvclock_page] = {"pvclock_page", true},
[sym_hvclock_page] = {"hvclock_page", true},
{"VDSO32_NOTE_MASK", true},

View File

@ -12,8 +12,6 @@
#define REG_OUT "a"
#endif
#define __HAVE_ARCH_SW_HWEIGHT
static __always_inline unsigned int __arch_hweight32(unsigned int w)
{
unsigned int res;

View File

@ -19,7 +19,6 @@ struct vdso_image {
long sym_vvar_start; /* Negative offset to the vvar area */
long sym_vvar_page;
long sym_hpet_page;
long sym_pvclock_page;
long sym_hvclock_page;
long sym_VDSO32_NOTE_MASK;

View File

@ -768,7 +768,7 @@ static struct kprobe kretprobe_kprobe = {
/*
* Called from kretprobe_trampoline
*/
static __used void *trampoline_handler(struct pt_regs *regs)
__used __visible void *trampoline_handler(struct pt_regs *regs)
{
struct kprobe_ctlblk *kcb;
struct kretprobe_instance *ri = NULL;

View File

@ -58,7 +58,6 @@
#include <asm/alternative.h>
#include <asm/fpu/xstate.h>
#include <asm/trace/mpx.h>
#include <asm/nospec-branch.h>
#include <asm/mpx.h>
#include <asm/vm86.h>
#include <asm/umip.h>
@ -368,13 +367,6 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
regs->ip = (unsigned long)general_protection;
regs->sp = (unsigned long)&gpregs->orig_ax;
/*
* This situation can be triggered by userspace via
* modify_ldt(2) and the return does not take the regular
* user space exit, so a CPU buffer clear is required when
* MDS mitigation is enabled.
*/
mds_user_clear_cpu_buffers();
return;
}
#endif

View File

@ -58,6 +58,37 @@
#include "ident_map.c"
#define DEFINE_POPULATE(fname, type1, type2, init) \
static inline void fname##_init(struct mm_struct *mm, \
type1##_t *arg1, type2##_t *arg2, bool init) \
{ \
if (init) \
fname##_safe(mm, arg1, arg2); \
else \
fname(mm, arg1, arg2); \
}
DEFINE_POPULATE(p4d_populate, p4d, pud, init)
DEFINE_POPULATE(pgd_populate, pgd, p4d, init)
DEFINE_POPULATE(pud_populate, pud, pmd, init)
DEFINE_POPULATE(pmd_populate_kernel, pmd, pte, init)
#define DEFINE_ENTRY(type1, type2, init) \
static inline void set_##type1##_init(type1##_t *arg1, \
type2##_t arg2, bool init) \
{ \
if (init) \
set_##type1##_safe(arg1, arg2); \
else \
set_##type1(arg1, arg2); \
}
DEFINE_ENTRY(p4d, p4d, init)
DEFINE_ENTRY(pud, pud, init)
DEFINE_ENTRY(pmd, pmd, init)
DEFINE_ENTRY(pte, pte, init)
/*
* NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the
* physical space so we can cache the place of the first one and move
@ -414,7 +445,7 @@ void __init cleanup_highmap(void)
*/
static unsigned long __meminit
phys_pte_init(pte_t *pte_page, unsigned long paddr, unsigned long paddr_end,
pgprot_t prot)
pgprot_t prot, bool init)
{
unsigned long pages = 0, paddr_next;
unsigned long paddr_last = paddr_end;
@ -432,7 +463,7 @@ phys_pte_init(pte_t *pte_page, unsigned long paddr, unsigned long paddr_end,
E820_TYPE_RAM) &&
!e820__mapped_any(paddr & PAGE_MASK, paddr_next,
E820_TYPE_RESERVED_KERN))
set_pte_safe(pte, __pte(0));
set_pte_init(pte, __pte(0), init);
continue;
}
@ -452,7 +483,7 @@ phys_pte_init(pte_t *pte_page, unsigned long paddr, unsigned long paddr_end,
pr_info(" pte=%p addr=%lx pte=%016lx\n", pte, paddr,
pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL).pte);
pages++;
set_pte_safe(pte, pfn_pte(paddr >> PAGE_SHIFT, prot));
set_pte_init(pte, pfn_pte(paddr >> PAGE_SHIFT, prot), init);
paddr_last = (paddr & PAGE_MASK) + PAGE_SIZE;
}
@ -468,7 +499,7 @@ phys_pte_init(pte_t *pte_page, unsigned long paddr, unsigned long paddr_end,
*/
static unsigned long __meminit
phys_pmd_init(pmd_t *pmd_page, unsigned long paddr, unsigned long paddr_end,
unsigned long page_size_mask, pgprot_t prot)
unsigned long page_size_mask, pgprot_t prot, bool init)
{
unsigned long pages = 0, paddr_next;
unsigned long paddr_last = paddr_end;
@ -487,7 +518,7 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long paddr, unsigned long paddr_end,
E820_TYPE_RAM) &&
!e820__mapped_any(paddr & PMD_MASK, paddr_next,
E820_TYPE_RESERVED_KERN))
set_pmd_safe(pmd, __pmd(0));
set_pmd_init(pmd, __pmd(0), init);
continue;
}
@ -496,7 +527,8 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long paddr, unsigned long paddr_end,
spin_lock(&init_mm.page_table_lock);
pte = (pte_t *)pmd_page_vaddr(*pmd);
paddr_last = phys_pte_init(pte, paddr,
paddr_end, prot);
paddr_end, prot,
init);
spin_unlock(&init_mm.page_table_lock);
continue;
}
@ -524,19 +556,20 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long paddr, unsigned long paddr_end,
if (page_size_mask & (1<<PG_LEVEL_2M)) {
pages++;
spin_lock(&init_mm.page_table_lock);
set_pte_safe((pte_t *)pmd,
set_pte_init((pte_t *)pmd,
pfn_pte((paddr & PMD_MASK) >> PAGE_SHIFT,
__pgprot(pgprot_val(prot) | _PAGE_PSE)));
__pgprot(pgprot_val(prot) | _PAGE_PSE)),
init);
spin_unlock(&init_mm.page_table_lock);
paddr_last = paddr_next;
continue;
}
pte = alloc_low_page();
paddr_last = phys_pte_init(pte, paddr, paddr_end, new_prot);
paddr_last = phys_pte_init(pte, paddr, paddr_end, new_prot, init);
spin_lock(&init_mm.page_table_lock);
pmd_populate_kernel_safe(&init_mm, pmd, pte);
pmd_populate_kernel_init(&init_mm, pmd, pte, init);
spin_unlock(&init_mm.page_table_lock);
}
update_page_count(PG_LEVEL_2M, pages);
@ -551,7 +584,7 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long paddr, unsigned long paddr_end,
*/
static unsigned long __meminit
phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end,
unsigned long page_size_mask)
unsigned long page_size_mask, bool init)
{
unsigned long pages = 0, paddr_next;
unsigned long paddr_last = paddr_end;
@ -573,7 +606,7 @@ phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end,
E820_TYPE_RAM) &&
!e820__mapped_any(paddr & PUD_MASK, paddr_next,
E820_TYPE_RESERVED_KERN))
set_pud_safe(pud, __pud(0));
set_pud_init(pud, __pud(0), init);
continue;
}
@ -583,7 +616,7 @@ phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end,
paddr_last = phys_pmd_init(pmd, paddr,
paddr_end,
page_size_mask,
prot);
prot, init);
continue;
}
/*
@ -610,9 +643,10 @@ phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end,
if (page_size_mask & (1<<PG_LEVEL_1G)) {
pages++;
spin_lock(&init_mm.page_table_lock);
set_pte_safe((pte_t *)pud,
set_pte_init((pte_t *)pud,
pfn_pte((paddr & PUD_MASK) >> PAGE_SHIFT,
PAGE_KERNEL_LARGE));
PAGE_KERNEL_LARGE),
init);
spin_unlock(&init_mm.page_table_lock);
paddr_last = paddr_next;
continue;
@ -620,10 +654,10 @@ phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end,
pmd = alloc_low_page();
paddr_last = phys_pmd_init(pmd, paddr, paddr_end,
page_size_mask, prot);
page_size_mask, prot, init);
spin_lock(&init_mm.page_table_lock);
pud_populate_safe(&init_mm, pud, pmd);
pud_populate_init(&init_mm, pud, pmd, init);
spin_unlock(&init_mm.page_table_lock);
}
@ -634,14 +668,15 @@ phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end,
static unsigned long __meminit
phys_p4d_init(p4d_t *p4d_page, unsigned long paddr, unsigned long paddr_end,
unsigned long page_size_mask)
unsigned long page_size_mask, bool init)
{
unsigned long paddr_next, paddr_last = paddr_end;
unsigned long vaddr = (unsigned long)__va(paddr);
int i = p4d_index(vaddr);
if (!pgtable_l5_enabled())
return phys_pud_init((pud_t *) p4d_page, paddr, paddr_end, page_size_mask);
return phys_pud_init((pud_t *) p4d_page, paddr, paddr_end,
page_size_mask, init);
for (; i < PTRS_PER_P4D; i++, paddr = paddr_next) {
p4d_t *p4d;
@ -657,39 +692,34 @@ phys_p4d_init(p4d_t *p4d_page, unsigned long paddr, unsigned long paddr_end,
E820_TYPE_RAM) &&
!e820__mapped_any(paddr & P4D_MASK, paddr_next,
E820_TYPE_RESERVED_KERN))
set_p4d_safe(p4d, __p4d(0));
set_p4d_init(p4d, __p4d(0), init);
continue;
}
if (!p4d_none(*p4d)) {
pud = pud_offset(p4d, 0);
paddr_last = phys_pud_init(pud, paddr,
paddr_end,
page_size_mask);
paddr_last = phys_pud_init(pud, paddr, paddr_end,
page_size_mask, init);
continue;
}
pud = alloc_low_page();
paddr_last = phys_pud_init(pud, paddr, paddr_end,
page_size_mask);
page_size_mask, init);
spin_lock(&init_mm.page_table_lock);
p4d_populate_safe(&init_mm, p4d, pud);
p4d_populate_init(&init_mm, p4d, pud, init);
spin_unlock(&init_mm.page_table_lock);
}
return paddr_last;
}
/*
* Create page table mapping for the physical memory for specific physical
* addresses. The virtual and physical addresses have to be aligned on PMD level
* down. It returns the last physical address mapped.
*/
unsigned long __meminit
kernel_physical_mapping_init(unsigned long paddr_start,
static unsigned long __meminit
__kernel_physical_mapping_init(unsigned long paddr_start,
unsigned long paddr_end,
unsigned long page_size_mask)
unsigned long page_size_mask,
bool init)
{
bool pgd_changed = false;
unsigned long vaddr, vaddr_start, vaddr_end, vaddr_next, paddr_last;
@ -709,19 +739,22 @@ kernel_physical_mapping_init(unsigned long paddr_start,
p4d = (p4d_t *)pgd_page_vaddr(*pgd);
paddr_last = phys_p4d_init(p4d, __pa(vaddr),
__pa(vaddr_end),
page_size_mask);
page_size_mask,
init);
continue;
}
p4d = alloc_low_page();
paddr_last = phys_p4d_init(p4d, __pa(vaddr), __pa(vaddr_end),
page_size_mask);
page_size_mask, init);
spin_lock(&init_mm.page_table_lock);
if (pgtable_l5_enabled())
pgd_populate_safe(&init_mm, pgd, p4d);
pgd_populate_init(&init_mm, pgd, p4d, init);
else
p4d_populate_safe(&init_mm, p4d_offset(pgd, vaddr), (pud_t *) p4d);
p4d_populate_init(&init_mm, p4d_offset(pgd, vaddr),
(pud_t *) p4d, init);
spin_unlock(&init_mm.page_table_lock);
pgd_changed = true;
}
@ -732,6 +765,37 @@ kernel_physical_mapping_init(unsigned long paddr_start,
return paddr_last;
}
/*
* Create page table mapping for the physical memory for specific physical
* addresses. Note that it can only be used to populate non-present entries.
* The virtual and physical addresses have to be aligned on PMD level
* down. It returns the last physical address mapped.
*/
unsigned long __meminit
kernel_physical_mapping_init(unsigned long paddr_start,
unsigned long paddr_end,
unsigned long page_size_mask)
{
return __kernel_physical_mapping_init(paddr_start, paddr_end,
page_size_mask, true);
}
/*
* This function is similar to kernel_physical_mapping_init() above with the
* exception that it uses set_{pud,pmd}() instead of the set_{pud,pte}_safe()
* when updating the mapping. The caller is responsible to flush the TLBs after
* the function returns.
*/
unsigned long __meminit
kernel_physical_mapping_change(unsigned long paddr_start,
unsigned long paddr_end,
unsigned long page_size_mask)
{
return __kernel_physical_mapping_init(paddr_start, paddr_end,
page_size_mask, false);
}
#ifndef CONFIG_NUMA
void __init initmem_init(void)
{

View File

@ -301,7 +301,11 @@ static int __init early_set_memory_enc_dec(unsigned long vaddr,
else
split_page_size_mask = 1 << PG_LEVEL_2M;
kernel_physical_mapping_init(__pa(vaddr & pmask),
/*
* kernel_physical_mapping_change() does not flush the TLBs, so
* a TLB flush is required after we exit from the for loop.
*/
kernel_physical_mapping_change(__pa(vaddr & pmask),
__pa((vaddr_end & pmask) + psize),
split_page_size_mask);
}

View File

@ -13,6 +13,9 @@ void early_ioremap_page_table_range_init(void);
unsigned long kernel_physical_mapping_init(unsigned long start,
unsigned long end,
unsigned long page_size_mask);
unsigned long kernel_physical_mapping_change(unsigned long start,
unsigned long end,
unsigned long page_size_mask);
void zone_sizes_init(void);
extern int after_bootmem;

View File

@ -10,7 +10,6 @@
* The Hamming Weight of a number is the total number of bits set in it.
*/
#ifndef __HAVE_ARCH_SW_HWEIGHT
unsigned int __sw_hweight32(unsigned int w)
{
#ifdef CONFIG_ARCH_HAS_FAST_MULTIPLIER
@ -27,7 +26,6 @@ unsigned int __sw_hweight32(unsigned int w)
#endif
}
EXPORT_SYMBOL(__sw_hweight32);
#endif
unsigned int __sw_hweight16(unsigned int w)
{
@ -46,7 +44,6 @@ unsigned int __sw_hweight8(unsigned int w)
}
EXPORT_SYMBOL(__sw_hweight8);
#ifndef __HAVE_ARCH_SW_HWEIGHT
unsigned long __sw_hweight64(__u64 w)
{
#if BITS_PER_LONG == 32
@ -69,4 +66,3 @@ unsigned long __sw_hweight64(__u64 w)
#endif
}
EXPORT_SYMBOL(__sw_hweight64);
#endif