Merge branch 'fixes' into next
Merge our fixes branch into next, this brings in a number of commits that fix bugs we don't want to hit in next, in particular the fix for CVE-2019-12817.
This commit is contained in:
commit
8b8dc69514
|
@ -884,6 +884,23 @@ static inline int pmd_present(pmd_t pmd)
|
|||
return false;
|
||||
}
|
||||
|
||||
static inline int pmd_is_serializing(pmd_t pmd)
|
||||
{
|
||||
/*
|
||||
* If the pmd is undergoing a split, the _PAGE_PRESENT bit is clear
|
||||
* and _PAGE_INVALID is set (see pmd_present, pmdp_invalidate).
|
||||
*
|
||||
* This condition may also occur when flushing a pmd while flushing
|
||||
* it (see ptep_modify_prot_start), so callers must ensure this
|
||||
* case is fine as well.
|
||||
*/
|
||||
if ((pmd_raw(pmd) & cpu_to_be64(_PAGE_PRESENT | _PAGE_INVALID)) ==
|
||||
cpu_to_be64(_PAGE_INVALID))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline int pmd_bad(pmd_t pmd)
|
||||
{
|
||||
if (radix_enabled())
|
||||
|
@ -1100,6 +1117,19 @@ static inline int pmd_protnone(pmd_t pmd)
|
|||
#define pmd_access_permitted pmd_access_permitted
|
||||
static inline bool pmd_access_permitted(pmd_t pmd, bool write)
|
||||
{
|
||||
/*
|
||||
* pmdp_invalidate sets this combination (which is not caught by
|
||||
* !pte_present() check in pte_access_permitted), to prevent
|
||||
* lock-free lookups, as part of the serialize_against_pte_lookup()
|
||||
* synchronisation.
|
||||
*
|
||||
* This also catches the case where the PTE's hardware PRESENT bit is
|
||||
* cleared while TLB is flushed, which is suboptimal but should not
|
||||
* be frequent.
|
||||
*/
|
||||
if (pmd_is_serializing(pmd))
|
||||
return false;
|
||||
|
||||
return pte_access_permitted(pmd_pte(pmd), write);
|
||||
}
|
||||
|
||||
|
|
|
@ -13,7 +13,11 @@ extern void btext_update_display(unsigned long phys, int width, int height,
|
|||
int depth, int pitch);
|
||||
extern void btext_setup_display(int width, int height, int depth, int pitch,
|
||||
unsigned long address);
|
||||
#ifdef CONFIG_PPC32
|
||||
extern void btext_prepare_BAT(void);
|
||||
#else
|
||||
static inline void btext_prepare_BAT(void) { }
|
||||
#endif
|
||||
extern void btext_map(void);
|
||||
extern void btext_unmap(void);
|
||||
|
||||
|
|
|
@ -94,6 +94,9 @@ static inline bool kdump_in_progress(void)
|
|||
return crashing_cpu >= 0;
|
||||
}
|
||||
|
||||
void relocate_new_kernel(unsigned long indirection_page, unsigned long reboot_code_buffer,
|
||||
unsigned long start_address) __noreturn;
|
||||
|
||||
#ifdef CONFIG_KEXEC_FILE
|
||||
extern const struct kexec_file_ops kexec_elf64_ops;
|
||||
|
||||
|
|
|
@ -323,6 +323,13 @@ struct vm_area_struct;
|
|||
#endif /* __ASSEMBLY__ */
|
||||
#include <asm/slice.h>
|
||||
|
||||
/*
|
||||
* Allow 30-bit DMA for very limited Broadcom wifi chips on many powerbooks.
|
||||
*/
|
||||
#ifdef CONFIG_PPC32
|
||||
#define ARCH_ZONE_DMA_BITS 30
|
||||
#else
|
||||
#define ARCH_ZONE_DMA_BITS 31
|
||||
#endif
|
||||
|
||||
#endif /* _ASM_POWERPC_PAGE_H */
|
||||
|
|
|
@ -315,7 +315,7 @@ TRAMP_REAL_BEGIN(machine_check_common_early)
|
|||
mfspr r11,SPRN_DSISR /* Save DSISR */
|
||||
std r11,_DSISR(r1)
|
||||
std r9,_CCR(r1) /* Save CR in stackframe */
|
||||
kuap_save_amr_and_lock r9, r10, cr1
|
||||
/* We don't touch AMR here, we never go to virtual mode */
|
||||
/* Save r9 through r13 from EXMC save area to stack frame. */
|
||||
EXCEPTION_PROLOG_COMMON_2(PACA_EXMC)
|
||||
mfmsr r11 /* get MSR value */
|
||||
|
|
|
@ -757,6 +757,7 @@ __secondary_start:
|
|||
stw r0,0(r3)
|
||||
|
||||
/* load up the MMU */
|
||||
bl load_segment_registers
|
||||
bl load_up_mmu
|
||||
|
||||
/* ptr to phys current thread */
|
||||
|
|
|
@ -83,7 +83,7 @@ END_BTB_FLUSH_SECTION
|
|||
SAVE_4GPRS(3, r11); \
|
||||
SAVE_2GPRS(7, r11)
|
||||
|
||||
.macro SYSCALL_ENTRY trapno intno
|
||||
.macro SYSCALL_ENTRY trapno intno srr1
|
||||
mfspr r10, SPRN_SPRG_THREAD
|
||||
#ifdef CONFIG_KVM_BOOKE_HV
|
||||
BEGIN_FTR_SECTION
|
||||
|
@ -94,7 +94,7 @@ BEGIN_FTR_SECTION
|
|||
mfspr r11, SPRN_SRR1
|
||||
mtocrf 0x80, r11 /* check MSR[GS] without clobbering reg */
|
||||
bf 3, 1975f
|
||||
b kvmppc_handler_BOOKE_INTERRUPT_\intno\()_SPRN_SRR1
|
||||
b kvmppc_handler_\intno\()_\srr1
|
||||
1975:
|
||||
mr r12, r13
|
||||
lwz r13, THREAD_NORMSAVE(2)(r10)
|
||||
|
@ -145,9 +145,9 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV)
|
|||
tophys(r11,r11)
|
||||
addi r11,r11,global_dbcr0@l
|
||||
#ifdef CONFIG_SMP
|
||||
lwz r9,TASK_CPU(r2)
|
||||
slwi r9,r9,3
|
||||
add r11,r11,r9
|
||||
lwz r10, TASK_CPU(r2)
|
||||
slwi r10, r10, 3
|
||||
add r11, r11, r10
|
||||
#endif
|
||||
lwz r12,0(r11)
|
||||
mtspr SPRN_DBCR0,r12
|
||||
|
|
|
@ -413,7 +413,7 @@ interrupt_base:
|
|||
|
||||
/* System Call Interrupt */
|
||||
START_EXCEPTION(SystemCall)
|
||||
SYSCALL_ENTRY 0xc00 SYSCALL
|
||||
SYSCALL_ENTRY 0xc00 BOOKE_INTERRUPT_SYSCALL SPRN_SRR1
|
||||
|
||||
/* Auxiliary Processor Unavailable Interrupt */
|
||||
EXCEPTION(0x2900, AP_UNAVAIL, AuxillaryProcessorUnavailable, \
|
||||
|
|
|
@ -30,7 +30,6 @@ typedef void (*relocate_new_kernel_t)(
|
|||
*/
|
||||
void default_machine_kexec(struct kimage *image)
|
||||
{
|
||||
extern const unsigned char relocate_new_kernel[];
|
||||
extern const unsigned int relocate_new_kernel_size;
|
||||
unsigned long page_list;
|
||||
unsigned long reboot_code_buffer, reboot_code_buffer_phys;
|
||||
|
@ -58,6 +57,9 @@ void default_machine_kexec(struct kimage *image)
|
|||
reboot_code_buffer + KEXEC_CONTROL_PAGE_SIZE);
|
||||
printk(KERN_INFO "Bye!\n");
|
||||
|
||||
if (!IS_ENABLED(CONFIG_FSL_BOOKE) && !IS_ENABLED(CONFIG_44x))
|
||||
relocate_new_kernel(page_list, reboot_code_buffer_phys, image->start);
|
||||
|
||||
/* now call it */
|
||||
rnk = (relocate_new_kernel_t) reboot_code_buffer;
|
||||
(*rnk)(page_list, reboot_code_buffer_phys, image->start);
|
||||
|
|
|
@ -2349,6 +2349,7 @@ static void __init prom_check_displays(void)
|
|||
prom_printf("W=%d H=%d LB=%d addr=0x%x\n",
|
||||
width, height, pitch, addr);
|
||||
btext_setup_display(width, height, 8, pitch, addr);
|
||||
btext_prepare_BAT();
|
||||
}
|
||||
#endif /* CONFIG_PPC_EARLY_DEBUG_BOOTX */
|
||||
}
|
||||
|
|
|
@ -27,7 +27,7 @@ fi
|
|||
WHITELIST="add_reloc_offset __bss_start __bss_stop copy_and_flush
|
||||
_end enter_prom $MEM_FUNCS reloc_offset __secondary_hold
|
||||
__secondary_hold_acknowledge __secondary_hold_spinloop __start
|
||||
logo_linux_clut224
|
||||
logo_linux_clut224 btext_prepare_BAT
|
||||
reloc_got2 kernstart_addr memstart_addr linux_banner _stext
|
||||
__prom_init_toc_start __prom_init_toc_end btext_setup_display TOC."
|
||||
|
||||
|
|
|
@ -833,6 +833,7 @@ static void flush_guest_tlb(struct kvm *kvm)
|
|||
}
|
||||
}
|
||||
asm volatile("ptesync": : :"memory");
|
||||
asm volatile(PPC_INVALIDATE_ERAT : : :"memory");
|
||||
}
|
||||
|
||||
void kvmppc_check_need_tlb_flush(struct kvm *kvm, int pcpu,
|
||||
|
|
|
@ -2507,17 +2507,28 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
|
|||
LOAD_REG_ADDR(r11, dawr_force_enable)
|
||||
lbz r11, 0(r11)
|
||||
cmpdi r11, 0
|
||||
bne 3f
|
||||
li r3, H_HARDWARE
|
||||
beqlr
|
||||
blr
|
||||
3:
|
||||
/* Emulate H_SET_DABR/X on P8 for the sake of compat mode guests */
|
||||
rlwimi r5, r4, 5, DAWRX_DR | DAWRX_DW
|
||||
rlwimi r5, r4, 2, DAWRX_WT
|
||||
clrrdi r4, r4, 3
|
||||
std r4, VCPU_DAWR(r3)
|
||||
std r5, VCPU_DAWRX(r3)
|
||||
/*
|
||||
* If came in through the real mode hcall handler then it is necessary
|
||||
* to write the registers since the return path won't. Otherwise it is
|
||||
* sufficient to store then in the vcpu struct as they will be loaded
|
||||
* next time the vcpu is run.
|
||||
*/
|
||||
mfmsr r6
|
||||
andi. r6, r6, MSR_DR /* in real mode? */
|
||||
bne 4f
|
||||
mtspr SPRN_DAWR, r4
|
||||
mtspr SPRN_DAWRX, r5
|
||||
li r3, 0
|
||||
4: li r3, 0
|
||||
blr
|
||||
|
||||
_GLOBAL(kvmppc_h_cede) /* r3 = vcpu pointer, r11 = msr, r13 = paca */
|
||||
|
|
|
@ -55,20 +55,52 @@ EXPORT_SYMBOL_GPL(hash__alloc_context_id);
|
|||
|
||||
void slb_setup_new_exec(void);
|
||||
|
||||
static int realloc_context_ids(mm_context_t *ctx)
|
||||
{
|
||||
int i, id;
|
||||
|
||||
/*
|
||||
* id 0 (aka. ctx->id) is special, we always allocate a new one, even if
|
||||
* there wasn't one allocated previously (which happens in the exec
|
||||
* case where ctx is newly allocated).
|
||||
*
|
||||
* We have to be a bit careful here. We must keep the existing ids in
|
||||
* the array, so that we can test if they're non-zero to decide if we
|
||||
* need to allocate a new one. However in case of error we must free the
|
||||
* ids we've allocated but *not* any of the existing ones (or risk a
|
||||
* UAF). That's why we decrement i at the start of the error handling
|
||||
* loop, to skip the id that we just tested but couldn't reallocate.
|
||||
*/
|
||||
for (i = 0; i < ARRAY_SIZE(ctx->extended_id); i++) {
|
||||
if (i == 0 || ctx->extended_id[i]) {
|
||||
id = hash__alloc_context_id();
|
||||
if (id < 0)
|
||||
goto error;
|
||||
|
||||
ctx->extended_id[i] = id;
|
||||
}
|
||||
}
|
||||
|
||||
/* The caller expects us to return id */
|
||||
return ctx->id;
|
||||
|
||||
error:
|
||||
for (i--; i >= 0; i--) {
|
||||
if (ctx->extended_id[i])
|
||||
ida_free(&mmu_context_ida, ctx->extended_id[i]);
|
||||
}
|
||||
|
||||
return id;
|
||||
}
|
||||
|
||||
static int hash__init_new_context(struct mm_struct *mm)
|
||||
{
|
||||
int index;
|
||||
|
||||
index = hash__alloc_context_id();
|
||||
if (index < 0)
|
||||
return index;
|
||||
|
||||
mm->context.hash_context = kmalloc(sizeof(struct hash_mm_context),
|
||||
GFP_KERNEL);
|
||||
if (!mm->context.hash_context) {
|
||||
ida_free(&mmu_context_ida, index);
|
||||
if (!mm->context.hash_context)
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
/*
|
||||
* The old code would re-promote on fork, we don't do that when using
|
||||
|
@ -96,13 +128,20 @@ static int hash__init_new_context(struct mm_struct *mm)
|
|||
mm->context.hash_context->spt = kmalloc(sizeof(struct subpage_prot_table),
|
||||
GFP_KERNEL);
|
||||
if (!mm->context.hash_context->spt) {
|
||||
ida_free(&mmu_context_ida, index);
|
||||
kfree(mm->context.hash_context);
|
||||
return -ENOMEM;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
index = realloc_context_ids(&mm->context);
|
||||
if (index < 0) {
|
||||
#ifdef CONFIG_PPC_SUBPAGE_PROT
|
||||
kfree(mm->context.hash_context->spt);
|
||||
#endif
|
||||
kfree(mm->context.hash_context);
|
||||
return index;
|
||||
}
|
||||
|
||||
pkey_mm_init(mm);
|
||||
|
|
|
@ -116,6 +116,9 @@ pmd_t pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
|
|||
/*
|
||||
* This ensures that generic code that rely on IRQ disabling
|
||||
* to prevent a parallel THP split work as expected.
|
||||
*
|
||||
* Marking the entry with _PAGE_INVALID && ~_PAGE_PRESENT requires
|
||||
* a special case check in pmd_access_permitted.
|
||||
*/
|
||||
serialize_against_pte_lookup(vma->vm_mm);
|
||||
return __pmd(old_pmd);
|
||||
|
|
|
@ -253,7 +253,8 @@ void __init paging_init(void)
|
|||
(long int)((top_of_ram - total_ram) >> 20));
|
||||
|
||||
#ifdef CONFIG_ZONE_DMA
|
||||
max_zone_pfns[ZONE_DMA] = min(max_low_pfn, 0x7fffffffUL >> PAGE_SHIFT);
|
||||
max_zone_pfns[ZONE_DMA] = min(max_low_pfn,
|
||||
((1UL << ARCH_ZONE_DMA_BITS) - 1) >> PAGE_SHIFT);
|
||||
#endif
|
||||
max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
|
||||
#ifdef CONFIG_HIGHMEM
|
||||
|
|
|
@ -372,13 +372,25 @@ pte_t *__find_linux_pte(pgd_t *pgdir, unsigned long ea,
|
|||
pdshift = PMD_SHIFT;
|
||||
pmdp = pmd_offset(&pud, ea);
|
||||
pmd = READ_ONCE(*pmdp);
|
||||
|
||||
/*
|
||||
* A hugepage collapse is captured by pmd_none, because
|
||||
* it mark the pmd none and do a hpte invalidate.
|
||||
* A hugepage collapse is captured by this condition, see
|
||||
* pmdp_collapse_flush.
|
||||
*/
|
||||
if (pmd_none(pmd))
|
||||
return NULL;
|
||||
|
||||
#ifdef CONFIG_PPC_BOOK3S_64
|
||||
/*
|
||||
* A hugepage split is captured by this condition, see
|
||||
* pmdp_invalidate.
|
||||
*
|
||||
* Huge page modification can be caught here too.
|
||||
*/
|
||||
if (pmd_is_serializing(pmd))
|
||||
return NULL;
|
||||
#endif
|
||||
|
||||
if (pmd_trans_huge(pmd) || pmd_devmap(pmd)) {
|
||||
if (is_thp)
|
||||
*is_thp = true;
|
||||
|
|
|
@ -7,6 +7,7 @@ config PPC_PMAC
|
|||
select PPC_INDIRECT_PCI if PPC32
|
||||
select PPC_MPC106 if PPC32
|
||||
select PPC_NATIVE
|
||||
select ZONE_DMA if PPC32
|
||||
default y
|
||||
|
||||
config PPC_PMAC64
|
||||
|
|
|
@ -3,4 +3,5 @@ subpage_prot
|
|||
tempfile
|
||||
prot_sao
|
||||
segv_errors
|
||||
wild_bctr
|
||||
wild_bctr
|
||||
large_vm_fork_separation
|
|
@ -2,7 +2,8 @@
|
|||
noarg:
|
||||
$(MAKE) -C ../
|
||||
|
||||
TEST_GEN_PROGS := hugetlb_vs_thp_test subpage_prot prot_sao segv_errors wild_bctr
|
||||
TEST_GEN_PROGS := hugetlb_vs_thp_test subpage_prot prot_sao segv_errors wild_bctr \
|
||||
large_vm_fork_separation
|
||||
TEST_GEN_FILES := tempfile
|
||||
|
||||
top_srcdir = ../../../../..
|
||||
|
@ -13,6 +14,7 @@ $(TEST_GEN_PROGS): ../harness.c
|
|||
$(OUTPUT)/prot_sao: ../utils.c
|
||||
|
||||
$(OUTPUT)/wild_bctr: CFLAGS += -m64
|
||||
$(OUTPUT)/large_vm_fork_separation: CFLAGS += -m64
|
||||
|
||||
$(OUTPUT)/tempfile:
|
||||
dd if=/dev/zero of=$@ bs=64k count=1
|
||||
|
|
|
@ -0,0 +1,87 @@
|
|||
// SPDX-License-Identifier: GPL-2.0+
|
||||
//
|
||||
// Copyright 2019, Michael Ellerman, IBM Corp.
|
||||
//
|
||||
// Test that allocating memory beyond the memory limit and then forking is
|
||||
// handled correctly, ie. the child is able to access the mappings beyond the
|
||||
// memory limit and the child's writes are not visible to the parent.
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/wait.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "utils.h"
|
||||
|
||||
|
||||
#ifndef MAP_FIXED_NOREPLACE
|
||||
#define MAP_FIXED_NOREPLACE MAP_FIXED // "Should be safe" above 512TB
|
||||
#endif
|
||||
|
||||
|
||||
static int test(void)
|
||||
{
|
||||
int p2c[2], c2p[2], rc, status, c, *p;
|
||||
unsigned long page_size;
|
||||
pid_t pid;
|
||||
|
||||
page_size = sysconf(_SC_PAGESIZE);
|
||||
SKIP_IF(page_size != 65536);
|
||||
|
||||
// Create a mapping at 512TB to allocate an extended_id
|
||||
p = mmap((void *)(512ul << 40), page_size, PROT_READ | PROT_WRITE,
|
||||
MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED_NOREPLACE, -1, 0);
|
||||
if (p == MAP_FAILED) {
|
||||
perror("mmap");
|
||||
printf("Error: couldn't mmap(), confirm kernel has 4TB support?\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
printf("parent writing %p = 1\n", p);
|
||||
*p = 1;
|
||||
|
||||
FAIL_IF(pipe(p2c) == -1 || pipe(c2p) == -1);
|
||||
|
||||
pid = fork();
|
||||
if (pid == 0) {
|
||||
FAIL_IF(read(p2c[0], &c, 1) != 1);
|
||||
|
||||
pid = getpid();
|
||||
printf("child writing %p = %d\n", p, pid);
|
||||
*p = pid;
|
||||
|
||||
FAIL_IF(write(c2p[1], &c, 1) != 1);
|
||||
FAIL_IF(read(p2c[0], &c, 1) != 1);
|
||||
exit(0);
|
||||
}
|
||||
|
||||
c = 0;
|
||||
FAIL_IF(write(p2c[1], &c, 1) != 1);
|
||||
FAIL_IF(read(c2p[0], &c, 1) != 1);
|
||||
|
||||
// Prevent compiler optimisation
|
||||
barrier();
|
||||
|
||||
rc = 0;
|
||||
printf("parent reading %p = %d\n", p, *p);
|
||||
if (*p != 1) {
|
||||
printf("Error: BUG! parent saw child's write! *p = %d\n", *p);
|
||||
rc = 1;
|
||||
}
|
||||
|
||||
FAIL_IF(write(p2c[1], &c, 1) != 1);
|
||||
FAIL_IF(waitpid(pid, &status, 0) == -1);
|
||||
FAIL_IF(!WIFEXITED(status) || WEXITSTATUS(status));
|
||||
|
||||
if (rc == 0)
|
||||
printf("success: test completed OK\n");
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
int main(void)
|
||||
{
|
||||
return test_harness(test, "large_vm_fork_separation");
|
||||
}
|
Loading…
Reference in New Issue