powerpc fixes for 4.13 #4

The highlight is Ben's patch to work around a host killing bug when running KVM
 guests with the Radix MMU on Power9. See the long change log of that commit for
 more detail.
 
 And then three fairly minor fixes:
  - Fix of_node_put() underflow during reconfig remove, using old DLPAR tools.
  - Fix recently introduced ld version check with 64-bit LE-only toolchain.
  - Free the subpage_prot_table correctly, avoiding a memory leak.
 
 Thanks to:
   Aneesh Kumar K.V, Benjamin Herrenschmidt, Laurent Vivier.
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v1
 
 iQIcBAABAgAGBQJZezIGAAoJEFHr6jzI4aWAyukP/3mxlQ3WdQlYPByZ18cj6YL5
 L0kbRxDgAosD9HzcOPqku1um1l7D6Gk5KZFZfsol7SasSmCZEwV4MbdTrAiRxS6K
 tF0V14hP/BDQeIKfxlnUepzfL8PY7CkO6sDAa6BjHXvBk4+POI+37uw9+2GEV8DY
 tA45fHjA/Zq3eUXsK0WTHIcd09lJXXarf9Tlx+YNZ+3yJ1OMfOji3CXgTkjtwYM9
 XTtsKzsagY1zLwr5gXJu1P05+OGna2VmY6+Tn2lnf7scTFW3qYGF3eWRx71diiKS
 PpZCjqfzWF4+TDIGPoYIrkTE+ZKR0lyo6F38GYwae0cYZMs9pGPEpeNahd8Nun+v
 MLU6TnhNfOI40GEYgmOMNKHPJJLSx59Qr/GnrAi/h2nUEocuN76jzNbaeFBtj3jD
 /vrRTmVUtt1wGqORX7BK4YZFHqcHmZBCM7bQnxibJtLv7fMue0sk58fs2jAaZ1iD
 NacpzsXG7CWYgj6ApclVCYuF99dXTpjrw/WPxilXDg84Pxb7Dv1SpIvLb2T5Guq+
 iqqavViRHP1ng+5/giIsOvF9CnsCzbRYLb0zZTP91nckMmYI6wX2zc56lofjcI5j
 Qc5o/aJvBk4vSM9sibBGEdrZJ1Vt16gGorQ5NZUurZund/cVqvQFhm/4Tvnc0cVN
 yvLNZI8am35pI9CCJ2im
 =Z6uF
 -----END PGP SIGNATURE-----

Merge tag 'powerpc-4.13-4' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux

Pull powerpc fixes from Michael Ellerman:
 "The highlight is Ben's patch to work around a host killing bug when
  running KVM guests with the Radix MMU on Power9. See the long change
  log of that commit for more detail.

  And then three fairly minor fixes:

   - fix of_node_put() underflow during reconfig remove, using old DLPAR
     tools.

   - fix recently introduced ld version check with 64-bit LE-only
     toolchain.

   - free the subpage_prot_table correctly, avoiding a memory leak.

  Thanks to: Aneesh Kumar K.V, Benjamin Herrenschmidt, Laurent Vivier"

* tag 'powerpc-4.13-4' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux:
  powerpc/mm/hash: Free the subpage_prot_table correctly
  powerpc/Makefile: Fix ld version check with 64-bit LE-only toolchain
  powerpc/pseries: Fix of_node_put() underflow during reconfig remove
  powerpc/mm/radix: Workaround prefetch issue with KVM
This commit is contained in:
Linus Torvalds 2017-07-28 13:25:15 -07:00
commit 080012bad6
9 changed files with 168 additions and 36 deletions

View File

@ -59,6 +59,19 @@ machine-$(CONFIG_PPC64) += 64
machine-$(CONFIG_CPU_LITTLE_ENDIAN) += le machine-$(CONFIG_CPU_LITTLE_ENDIAN) += le
UTS_MACHINE := $(subst $(space),,$(machine-y)) UTS_MACHINE := $(subst $(space),,$(machine-y))
# XXX This needs to be before we override LD below
ifdef CONFIG_PPC32
KBUILD_LDFLAGS_MODULE += arch/powerpc/lib/crtsavres.o
else
ifeq ($(call ld-ifversion, -ge, 225000000, y),y)
# Have the linker provide sfpr if possible.
# There is a corresponding test in arch/powerpc/lib/Makefile
KBUILD_LDFLAGS_MODULE += --save-restore-funcs
else
KBUILD_LDFLAGS_MODULE += arch/powerpc/lib/crtsavres.o
endif
endif
ifeq ($(CONFIG_CPU_LITTLE_ENDIAN),y) ifeq ($(CONFIG_CPU_LITTLE_ENDIAN),y)
override LD += -EL override LD += -EL
LDEMULATION := lppc LDEMULATION := lppc
@ -190,18 +203,6 @@ else
CHECKFLAGS += -D__LITTLE_ENDIAN__ CHECKFLAGS += -D__LITTLE_ENDIAN__
endif endif
ifdef CONFIG_PPC32
KBUILD_LDFLAGS_MODULE += arch/powerpc/lib/crtsavres.o
else
ifeq ($(call ld-ifversion, -ge, 225000000, y),y)
# Have the linker provide sfpr if possible.
# There is a corresponding test in arch/powerpc/lib/Makefile
KBUILD_LDFLAGS_MODULE += --save-restore-funcs
else
KBUILD_LDFLAGS_MODULE += arch/powerpc/lib/crtsavres.o
endif
endif
ifeq ($(CONFIG_476FPE_ERR46),y) ifeq ($(CONFIG_476FPE_ERR46),y)
KBUILD_LDFLAGS_MODULE += --ppc476-workaround \ KBUILD_LDFLAGS_MODULE += --ppc476-workaround \
-T $(srctree)/arch/powerpc/platforms/44x/ppc476_modules.lds -T $(srctree)/arch/powerpc/platforms/44x/ppc476_modules.lds

View File

@ -59,13 +59,14 @@ extern struct patb_entry *partition_tb;
#define PRTS_MASK 0x1f /* process table size field */ #define PRTS_MASK 0x1f /* process table size field */
#define PRTB_MASK 0x0ffffffffffff000UL #define PRTB_MASK 0x0ffffffffffff000UL
/* /* Number of supported PID bits */
* Limit process table to PAGE_SIZE table. This extern unsigned int mmu_pid_bits;
* also limit the max pid we can support.
* MAX_USER_CONTEXT * 16 bytes of space. /* Base PID to allocate from */
*/ extern unsigned int mmu_base_pid;
#define PRTB_SIZE_SHIFT (CONTEXT_BITS + 4)
#define PRTB_ENTRIES (1ul << CONTEXT_BITS) #define PRTB_SIZE_SHIFT (mmu_pid_bits + 4)
#define PRTB_ENTRIES (1ul << mmu_pid_bits)
/* /*
* Power9 currently only support 64K partition table size. * Power9 currently only support 64K partition table size.

View File

@ -45,7 +45,7 @@ extern void set_context(unsigned long id, pgd_t *pgd);
#ifdef CONFIG_PPC_BOOK3S_64 #ifdef CONFIG_PPC_BOOK3S_64
extern void radix__switch_mmu_context(struct mm_struct *prev, extern void radix__switch_mmu_context(struct mm_struct *prev,
struct mm_struct *next); struct mm_struct *next);
static inline void switch_mmu_context(struct mm_struct *prev, static inline void switch_mmu_context(struct mm_struct *prev,
struct mm_struct *next, struct mm_struct *next,
struct task_struct *tsk) struct task_struct *tsk)
@ -67,6 +67,12 @@ extern void __destroy_context(unsigned long context_id);
extern void mmu_context_init(void); extern void mmu_context_init(void);
#endif #endif
#if defined(CONFIG_KVM_BOOK3S_HV_POSSIBLE) && defined(CONFIG_PPC_RADIX_MMU)
extern void radix_kvm_prefetch_workaround(struct mm_struct *mm);
#else
static inline void radix_kvm_prefetch_workaround(struct mm_struct *mm) { }
#endif
extern void switch_cop(struct mm_struct *next); extern void switch_cop(struct mm_struct *next);
extern int use_cop(unsigned long acop, struct mm_struct *mm); extern int use_cop(unsigned long acop, struct mm_struct *mm);
extern void drop_cop(unsigned long acop, struct mm_struct *mm); extern void drop_cop(unsigned long acop, struct mm_struct *mm);
@ -79,9 +85,13 @@ static inline void switch_mm_irqs_off(struct mm_struct *prev,
struct mm_struct *next, struct mm_struct *next,
struct task_struct *tsk) struct task_struct *tsk)
{ {
bool new_on_cpu = false;
/* Mark this context has been used on the new CPU */ /* Mark this context has been used on the new CPU */
if (!cpumask_test_cpu(smp_processor_id(), mm_cpumask(next))) if (!cpumask_test_cpu(smp_processor_id(), mm_cpumask(next))) {
cpumask_set_cpu(smp_processor_id(), mm_cpumask(next)); cpumask_set_cpu(smp_processor_id(), mm_cpumask(next));
new_on_cpu = true;
}
/* 32-bit keeps track of the current PGDIR in the thread struct */ /* 32-bit keeps track of the current PGDIR in the thread struct */
#ifdef CONFIG_PPC32 #ifdef CONFIG_PPC32
@ -109,6 +119,10 @@ static inline void switch_mm_irqs_off(struct mm_struct *prev,
if (cpu_has_feature(CPU_FTR_ALTIVEC)) if (cpu_has_feature(CPU_FTR_ALTIVEC))
asm volatile ("dssall"); asm volatile ("dssall");
#endif /* CONFIG_ALTIVEC */ #endif /* CONFIG_ALTIVEC */
if (new_on_cpu)
radix_kvm_prefetch_workaround(next);
/* /*
* The actual HW switching method differs between the various * The actual HW switching method differs between the various
* sub architectures. Out of line for now * sub architectures. Out of line for now

View File

@ -1443,12 +1443,14 @@ mc_cont:
ori r6,r6,1 ori r6,r6,1
mtspr SPRN_CTRLT,r6 mtspr SPRN_CTRLT,r6
4: 4:
/* Read the guest SLB and save it away */ /* Check if we are running hash or radix and store it in cr2 */
ld r5, VCPU_KVM(r9) ld r5, VCPU_KVM(r9)
lbz r0, KVM_RADIX(r5) lbz r0, KVM_RADIX(r5)
cmpwi r0, 0 cmpwi cr2,r0,0
/* Read the guest SLB and save it away */
li r5, 0 li r5, 0
bne 3f /* for radix, save 0 entries */ bne cr2, 3f /* for radix, save 0 entries */
lwz r0,VCPU_SLB_NR(r9) /* number of entries in SLB */ lwz r0,VCPU_SLB_NR(r9) /* number of entries in SLB */
mtctr r0 mtctr r0
li r6,0 li r6,0
@ -1712,11 +1714,6 @@ BEGIN_FTR_SECTION_NESTED(96)
END_FTR_SECTION_NESTED(CPU_FTR_ARCH_300, 0, 96) END_FTR_SECTION_NESTED(CPU_FTR_ARCH_300, 0, 96)
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
22: 22:
/* Clear out SLB */
li r5,0
slbmte r5,r5
slbia
ptesync
/* Restore host values of some registers */ /* Restore host values of some registers */
BEGIN_FTR_SECTION BEGIN_FTR_SECTION
@ -1737,10 +1734,56 @@ BEGIN_FTR_SECTION
mtspr SPRN_PID, r7 mtspr SPRN_PID, r7
mtspr SPRN_IAMR, r8 mtspr SPRN_IAMR, r8
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
#ifdef CONFIG_PPC_RADIX_MMU
/*
* Are we running hash or radix ?
*/
beq cr2,3f
/* Radix: Handle the case where the guest used an illegal PID */
LOAD_REG_ADDR(r4, mmu_base_pid)
lwz r3, VCPU_GUEST_PID(r9)
lwz r5, 0(r4)
cmpw cr0,r3,r5
blt 2f
/*
* Illegal PID, the HW might have prefetched and cached in the TLB
* some translations for the LPID 0 / guest PID combination which
* Linux doesn't know about, so we need to flush that PID out of
* the TLB. First we need to set LPIDR to 0 so tlbiel applies to
* the right context.
*/
li r0,0
mtspr SPRN_LPID,r0
isync
/* Then do a congruence class local flush */
ld r6,VCPU_KVM(r9)
lwz r0,KVM_TLB_SETS(r6)
mtctr r0
li r7,0x400 /* IS field = 0b01 */
ptesync
sldi r0,r3,32 /* RS has PID */
1: PPC_TLBIEL(7,0,2,1,1) /* RIC=2, PRS=1, R=1 */
addi r7,r7,0x1000
bdnz 1b
ptesync
2: /* Flush the ERAT on radix P9 DD1 guest exit */
BEGIN_FTR_SECTION BEGIN_FTR_SECTION
PPC_INVALIDATE_ERAT PPC_INVALIDATE_ERAT
END_FTR_SECTION_IFSET(CPU_FTR_POWER9_DD1) END_FTR_SECTION_IFSET(CPU_FTR_POWER9_DD1)
b 4f
#endif /* CONFIG_PPC_RADIX_MMU */
/* Hash: clear out SLB */
3: li r5,0
slbmte r5,r5
slbia
ptesync
4:
/* /*
* POWER7/POWER8 guest -> host partition switch code. * POWER7/POWER8 guest -> host partition switch code.
* We don't have to lock against tlbies but we do * We don't have to lock against tlbies but we do

View File

@ -126,9 +126,10 @@ static int hash__init_new_context(struct mm_struct *mm)
static int radix__init_new_context(struct mm_struct *mm) static int radix__init_new_context(struct mm_struct *mm)
{ {
unsigned long rts_field; unsigned long rts_field;
int index; int index, max_id;
index = alloc_context_id(1, PRTB_ENTRIES - 1); max_id = (1 << mmu_pid_bits) - 1;
index = alloc_context_id(mmu_base_pid, max_id);
if (index < 0) if (index < 0)
return index; return index;

View File

@ -25,6 +25,9 @@
#include <trace/events/thp.h> #include <trace/events/thp.h>
unsigned int mmu_pid_bits;
unsigned int mmu_base_pid;
static int native_register_process_table(unsigned long base, unsigned long pg_sz, static int native_register_process_table(unsigned long base, unsigned long pg_sz,
unsigned long table_size) unsigned long table_size)
{ {
@ -261,11 +264,34 @@ static void __init radix_init_pgtable(void)
for_each_memblock(memory, reg) for_each_memblock(memory, reg)
WARN_ON(create_physical_mapping(reg->base, WARN_ON(create_physical_mapping(reg->base,
reg->base + reg->size)); reg->base + reg->size));
/* Find out how many PID bits are supported */
if (cpu_has_feature(CPU_FTR_HVMODE)) {
if (!mmu_pid_bits)
mmu_pid_bits = 20;
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
/*
* When KVM is possible, we only use the top half of the
* PID space to avoid collisions between host and guest PIDs
* which can cause problems due to prefetch when exiting the
* guest with AIL=3
*/
mmu_base_pid = 1 << (mmu_pid_bits - 1);
#else
mmu_base_pid = 1;
#endif
} else {
/* The guest uses the bottom half of the PID space */
if (!mmu_pid_bits)
mmu_pid_bits = 19;
mmu_base_pid = 1;
}
/* /*
* Allocate Partition table and process table for the * Allocate Partition table and process table for the
* host. * host.
*/ */
BUILD_BUG_ON_MSG((PRTB_SIZE_SHIFT > 36), "Process table size too large."); BUG_ON(PRTB_SIZE_SHIFT > 36);
process_tb = early_alloc_pgtable(1UL << PRTB_SIZE_SHIFT); process_tb = early_alloc_pgtable(1UL << PRTB_SIZE_SHIFT);
/* /*
* Fill in the process table. * Fill in the process table.
@ -339,6 +365,12 @@ static int __init radix_dt_scan_page_sizes(unsigned long node,
if (type == NULL || strcmp(type, "cpu") != 0) if (type == NULL || strcmp(type, "cpu") != 0)
return 0; return 0;
/* Find MMU PID size */
prop = of_get_flat_dt_prop(node, "ibm,mmu-pid-bits", &size);
if (prop && size == 4)
mmu_pid_bits = be32_to_cpup(prop);
/* Grab page size encodings */
prop = of_get_flat_dt_prop(node, "ibm,processor-radix-AP-encodings", &size); prop = of_get_flat_dt_prop(node, "ibm,processor-radix-AP-encodings", &size);
if (!prop) if (!prop)
return 0; return 0;

View File

@ -36,7 +36,7 @@ void subpage_prot_free(struct mm_struct *mm)
} }
} }
addr = 0; addr = 0;
for (i = 0; i < 2; ++i) { for (i = 0; i < (TASK_SIZE_USER64 >> 43); ++i) {
p = spt->protptrs[i]; p = spt->protptrs[i];
if (!p) if (!p)
continue; continue;

View File

@ -12,12 +12,12 @@
#include <linux/mm.h> #include <linux/mm.h>
#include <linux/hugetlb.h> #include <linux/hugetlb.h>
#include <linux/memblock.h> #include <linux/memblock.h>
#include <asm/ppc-opcode.h>
#include <asm/ppc-opcode.h>
#include <asm/tlb.h> #include <asm/tlb.h>
#include <asm/tlbflush.h> #include <asm/tlbflush.h>
#include <asm/trace.h> #include <asm/trace.h>
#include <asm/cputhreads.h>
#define RIC_FLUSH_TLB 0 #define RIC_FLUSH_TLB 0
#define RIC_FLUSH_PWC 1 #define RIC_FLUSH_PWC 1
@ -454,3 +454,44 @@ void radix__flush_tlb_pte_p9_dd1(unsigned long old_pte, struct mm_struct *mm,
else else
radix__flush_tlb_page_psize(mm, address, mmu_virtual_psize); radix__flush_tlb_page_psize(mm, address, mmu_virtual_psize);
} }
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
extern void radix_kvm_prefetch_workaround(struct mm_struct *mm)
{
unsigned int pid = mm->context.id;
if (unlikely(pid == MMU_NO_CONTEXT))
return;
/*
* If this context hasn't run on that CPU before and KVM is
* around, there's a slim chance that the guest on another
* CPU just brought in obsolete translation into the TLB of
* this CPU due to a bad prefetch using the guest PID on
* the way into the hypervisor.
*
* We work around this here. If KVM is possible, we check if
* any sibling thread is in KVM. If it is, the window may exist
* and thus we flush that PID from the core.
*
* A potential future improvement would be to mark which PIDs
* have never been used on the system and avoid it if the PID
* is new and the process has no other cpumask bit set.
*/
if (cpu_has_feature(CPU_FTR_HVMODE) && radix_enabled()) {
int cpu = smp_processor_id();
int sib = cpu_first_thread_sibling(cpu);
bool flush = false;
for (; sib <= cpu_last_thread_sibling(cpu) && !flush; sib++) {
if (sib == cpu)
continue;
if (paca[sib].kvm_hstate.kvm_vcpu)
flush = true;
}
if (flush)
_tlbiel_pid(pid, RIC_FLUSH_ALL);
}
}
EXPORT_SYMBOL_GPL(radix_kvm_prefetch_workaround);
#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */

View File

@ -82,7 +82,6 @@ static int pSeries_reconfig_remove_node(struct device_node *np)
of_detach_node(np); of_detach_node(np);
of_node_put(parent); of_node_put(parent);
of_node_put(np); /* Must decrement the refcount */
return 0; return 0;
} }