powerpc/64s/radix: introduce options to disable use of the tlbie instruction
Introduce two options to control the use of the tlbie instruction. A boot time option which completely disables the kernel using the instruction, this is currently incompatible with HASH MMU, KVM, and coherent accelerators. And a debugfs option can be switched at runtime and avoids using tlbie for invalidating CPU TLBs for normal process and kernel address mappings. Coherent accelerators are still managed with tlbie, as will KVM partition scope translations. Cross-CPU TLB flushing is implemented with IPIs and tlbiel. This is a basic implementation which does not attempt to make any optimisation beyond the tlbie implementation. This is useful for performance testing among other things. For example in certain situations on large systems, using IPIs may be faster than tlbie as they can be directed rather than broadcast. Later we may also take advantage of the IPIs to do more interesting things such as trim the mm cpumask more aggressively. Signed-off-by: Nicholas Piggin <npiggin@gmail.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20190902152931.17840-7-npiggin@gmail.com
This commit is contained in:
parent
7d805accbe
commit
2275d7b575
|
@ -860,6 +860,10 @@
|
|||
disable_radix [PPC]
|
||||
Disable RADIX MMU mode on POWER9
|
||||
|
||||
disable_tlbie [PPC]
|
||||
Disable TLBIE instruction. Currently does not work
|
||||
with KVM, with HASH MMU, or with coherent accelerators.
|
||||
|
||||
disable_cpu_apicid= [X86,APIC,SMP]
|
||||
Format: <int>
|
||||
The number of initial APIC ID for the
|
||||
|
|
|
@ -162,4 +162,13 @@ static inline void flush_tlb_pgtable(struct mmu_gather *tlb, unsigned long addre
|
|||
|
||||
radix__flush_tlb_pwc(tlb, address);
|
||||
}
|
||||
|
||||
extern bool tlbie_capable;
|
||||
extern bool tlbie_enabled;
|
||||
|
||||
static inline bool cputlb_use_tlbie(void)
|
||||
{
|
||||
return tlbie_enabled;
|
||||
}
|
||||
|
||||
#endif /* _ASM_POWERPC_BOOK3S_64_TLBFLUSH_H */
|
||||
|
|
|
@ -5462,6 +5462,12 @@ static int kvmppc_radix_possible(void)
|
|||
static int kvmppc_book3s_init_hv(void)
|
||||
{
|
||||
int r;
|
||||
|
||||
if (!tlbie_capable) {
|
||||
pr_err("KVM-HV: Host does not support TLBIE\n");
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
/*
|
||||
* FIXME!! Do we need to check on all cpus ?
|
||||
*/
|
||||
|
|
|
@ -8,6 +8,7 @@
|
|||
#include <linux/memblock.h>
|
||||
#include <misc/cxl-base.h>
|
||||
|
||||
#include <asm/debugfs.h>
|
||||
#include <asm/pgalloc.h>
|
||||
#include <asm/tlb.h>
|
||||
#include <asm/trace.h>
|
||||
|
@ -469,3 +470,49 @@ int pmd_move_must_withdraw(struct spinlock *new_pmd_ptl,
|
|||
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Does the CPU support tlbie?
|
||||
*/
|
||||
bool tlbie_capable __read_mostly = true;
|
||||
EXPORT_SYMBOL(tlbie_capable);
|
||||
|
||||
/*
|
||||
* Should tlbie be used for management of CPU TLBs, for kernel and process
|
||||
* address spaces? tlbie may still be used for nMMU accelerators, and for KVM
|
||||
* guest address spaces.
|
||||
*/
|
||||
bool tlbie_enabled __read_mostly = true;
|
||||
|
||||
static int __init setup_disable_tlbie(char *str)
|
||||
{
|
||||
if (!radix_enabled()) {
|
||||
pr_err("disable_tlbie: Unable to disable TLBIE with Hash MMU.\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
tlbie_capable = false;
|
||||
tlbie_enabled = false;
|
||||
|
||||
return 1;
|
||||
}
|
||||
__setup("disable_tlbie", setup_disable_tlbie);
|
||||
|
||||
static int __init pgtable_debugfs_setup(void)
|
||||
{
|
||||
if (!tlbie_capable)
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* There is no locking vs tlb flushing when changing this value.
|
||||
* The tlb flushers will see one value or another, and use either
|
||||
* tlbie or tlbiel with IPIs. In both cases the TLBs will be
|
||||
* invalidated as expected.
|
||||
*/
|
||||
debugfs_create_bool("tlbie_enabled", 0600,
|
||||
powerpc_debugfs_root,
|
||||
&tlbie_enabled);
|
||||
|
||||
return 0;
|
||||
}
|
||||
arch_initcall(pgtable_debugfs_setup);
|
||||
|
|
|
@ -270,6 +270,39 @@ static inline void _tlbie_pid(unsigned long pid, unsigned long ric)
|
|||
asm volatile("eieio; tlbsync; ptesync": : :"memory");
|
||||
}
|
||||
|
||||
struct tlbiel_pid {
|
||||
unsigned long pid;
|
||||
unsigned long ric;
|
||||
};
|
||||
|
||||
static void do_tlbiel_pid(void *info)
|
||||
{
|
||||
struct tlbiel_pid *t = info;
|
||||
|
||||
if (t->ric == RIC_FLUSH_TLB)
|
||||
_tlbiel_pid(t->pid, RIC_FLUSH_TLB);
|
||||
else if (t->ric == RIC_FLUSH_PWC)
|
||||
_tlbiel_pid(t->pid, RIC_FLUSH_PWC);
|
||||
else
|
||||
_tlbiel_pid(t->pid, RIC_FLUSH_ALL);
|
||||
}
|
||||
|
||||
static inline void _tlbiel_pid_multicast(struct mm_struct *mm,
|
||||
unsigned long pid, unsigned long ric)
|
||||
{
|
||||
struct cpumask *cpus = mm_cpumask(mm);
|
||||
struct tlbiel_pid t = { .pid = pid, .ric = ric };
|
||||
|
||||
on_each_cpu_mask(cpus, do_tlbiel_pid, &t, 1);
|
||||
/*
|
||||
* Always want the CPU translations to be invalidated with tlbiel in
|
||||
* these paths, so while coprocessors must use tlbie, we can not
|
||||
* optimise away the tlbiel component.
|
||||
*/
|
||||
if (atomic_read(&mm->context.copros) > 0)
|
||||
_tlbie_pid(pid, RIC_FLUSH_ALL);
|
||||
}
|
||||
|
||||
static inline void _tlbie_lpid(unsigned long lpid, unsigned long ric)
|
||||
{
|
||||
asm volatile("ptesync": : :"memory");
|
||||
|
@ -370,6 +403,53 @@ static __always_inline void _tlbie_va(unsigned long va, unsigned long pid,
|
|||
asm volatile("eieio; tlbsync; ptesync": : :"memory");
|
||||
}
|
||||
|
||||
struct tlbiel_va {
|
||||
unsigned long pid;
|
||||
unsigned long va;
|
||||
unsigned long psize;
|
||||
unsigned long ric;
|
||||
};
|
||||
|
||||
static void do_tlbiel_va(void *info)
|
||||
{
|
||||
struct tlbiel_va *t = info;
|
||||
|
||||
if (t->ric == RIC_FLUSH_TLB)
|
||||
_tlbiel_va(t->va, t->pid, t->psize, RIC_FLUSH_TLB);
|
||||
else if (t->ric == RIC_FLUSH_PWC)
|
||||
_tlbiel_va(t->va, t->pid, t->psize, RIC_FLUSH_PWC);
|
||||
else
|
||||
_tlbiel_va(t->va, t->pid, t->psize, RIC_FLUSH_ALL);
|
||||
}
|
||||
|
||||
static inline void _tlbiel_va_multicast(struct mm_struct *mm,
|
||||
unsigned long va, unsigned long pid,
|
||||
unsigned long psize, unsigned long ric)
|
||||
{
|
||||
struct cpumask *cpus = mm_cpumask(mm);
|
||||
struct tlbiel_va t = { .va = va, .pid = pid, .psize = psize, .ric = ric };
|
||||
on_each_cpu_mask(cpus, do_tlbiel_va, &t, 1);
|
||||
if (atomic_read(&mm->context.copros) > 0)
|
||||
_tlbie_va(va, pid, psize, RIC_FLUSH_TLB);
|
||||
}
|
||||
|
||||
struct tlbiel_va_range {
|
||||
unsigned long pid;
|
||||
unsigned long start;
|
||||
unsigned long end;
|
||||
unsigned long page_size;
|
||||
unsigned long psize;
|
||||
bool also_pwc;
|
||||
};
|
||||
|
||||
static void do_tlbiel_va_range(void *info)
|
||||
{
|
||||
struct tlbiel_va_range *t = info;
|
||||
|
||||
_tlbiel_va_range(t->start, t->end, t->pid, t->page_size,
|
||||
t->psize, t->also_pwc);
|
||||
}
|
||||
|
||||
static __always_inline void _tlbie_lpid_va(unsigned long va, unsigned long lpid,
|
||||
unsigned long psize, unsigned long ric)
|
||||
{
|
||||
|
@ -393,6 +473,21 @@ static inline void _tlbie_va_range(unsigned long start, unsigned long end,
|
|||
asm volatile("eieio; tlbsync; ptesync": : :"memory");
|
||||
}
|
||||
|
||||
static inline void _tlbiel_va_range_multicast(struct mm_struct *mm,
|
||||
unsigned long start, unsigned long end,
|
||||
unsigned long pid, unsigned long page_size,
|
||||
unsigned long psize, bool also_pwc)
|
||||
{
|
||||
struct cpumask *cpus = mm_cpumask(mm);
|
||||
struct tlbiel_va_range t = { .start = start, .end = end,
|
||||
.pid = pid, .page_size = page_size,
|
||||
.psize = psize, .also_pwc = also_pwc };
|
||||
|
||||
on_each_cpu_mask(cpus, do_tlbiel_va_range, &t, 1);
|
||||
if (atomic_read(&mm->context.copros) > 0)
|
||||
_tlbie_va_range(start, end, pid, page_size, psize, also_pwc);
|
||||
}
|
||||
|
||||
/*
|
||||
* Base TLB flushing operations:
|
||||
*
|
||||
|
@ -530,10 +625,14 @@ void radix__flush_tlb_mm(struct mm_struct *mm)
|
|||
goto local;
|
||||
}
|
||||
|
||||
if (mm_needs_flush_escalation(mm))
|
||||
_tlbie_pid(pid, RIC_FLUSH_ALL);
|
||||
else
|
||||
_tlbie_pid(pid, RIC_FLUSH_TLB);
|
||||
if (cputlb_use_tlbie()) {
|
||||
if (mm_needs_flush_escalation(mm))
|
||||
_tlbie_pid(pid, RIC_FLUSH_ALL);
|
||||
else
|
||||
_tlbie_pid(pid, RIC_FLUSH_TLB);
|
||||
} else {
|
||||
_tlbiel_pid_multicast(mm, pid, RIC_FLUSH_TLB);
|
||||
}
|
||||
} else {
|
||||
local:
|
||||
_tlbiel_pid(pid, RIC_FLUSH_TLB);
|
||||
|
@ -559,7 +658,10 @@ static void __flush_all_mm(struct mm_struct *mm, bool fullmm)
|
|||
goto local;
|
||||
}
|
||||
}
|
||||
_tlbie_pid(pid, RIC_FLUSH_ALL);
|
||||
if (cputlb_use_tlbie())
|
||||
_tlbie_pid(pid, RIC_FLUSH_ALL);
|
||||
else
|
||||
_tlbiel_pid_multicast(mm, pid, RIC_FLUSH_ALL);
|
||||
} else {
|
||||
local:
|
||||
_tlbiel_pid(pid, RIC_FLUSH_ALL);
|
||||
|
@ -594,7 +696,10 @@ void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr,
|
|||
exit_flush_lazy_tlbs(mm);
|
||||
goto local;
|
||||
}
|
||||
_tlbie_va(vmaddr, pid, psize, RIC_FLUSH_TLB);
|
||||
if (cputlb_use_tlbie())
|
||||
_tlbie_va(vmaddr, pid, psize, RIC_FLUSH_TLB);
|
||||
else
|
||||
_tlbiel_va_multicast(mm, vmaddr, pid, psize, RIC_FLUSH_TLB);
|
||||
} else {
|
||||
local:
|
||||
_tlbiel_va(vmaddr, pid, psize, RIC_FLUSH_TLB);
|
||||
|
@ -616,6 +721,24 @@ EXPORT_SYMBOL(radix__flush_tlb_page);
|
|||
#define radix__flush_all_mm radix__local_flush_all_mm
|
||||
#endif /* CONFIG_SMP */
|
||||
|
||||
static void do_tlbiel_kernel(void *info)
|
||||
{
|
||||
_tlbiel_pid(0, RIC_FLUSH_ALL);
|
||||
}
|
||||
|
||||
static inline void _tlbiel_kernel_broadcast(void)
|
||||
{
|
||||
on_each_cpu(do_tlbiel_kernel, NULL, 1);
|
||||
if (tlbie_capable) {
|
||||
/*
|
||||
* Coherent accelerators don't refcount kernel memory mappings,
|
||||
* so have to always issue a tlbie for them. This is quite a
|
||||
* slow path anyway.
|
||||
*/
|
||||
_tlbie_pid(0, RIC_FLUSH_ALL);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* If kernel TLBIs ever become local rather than global, then
|
||||
* drivers/misc/ocxl/link.c:ocxl_link_add_pe will need some work, as it
|
||||
|
@ -623,7 +746,10 @@ EXPORT_SYMBOL(radix__flush_tlb_page);
|
|||
*/
|
||||
void radix__flush_tlb_kernel_range(unsigned long start, unsigned long end)
|
||||
{
|
||||
_tlbie_pid(0, RIC_FLUSH_ALL);
|
||||
if (cputlb_use_tlbie())
|
||||
_tlbie_pid(0, RIC_FLUSH_ALL);
|
||||
else
|
||||
_tlbiel_kernel_broadcast();
|
||||
}
|
||||
EXPORT_SYMBOL(radix__flush_tlb_kernel_range);
|
||||
|
||||
|
@ -679,10 +805,14 @@ is_local:
|
|||
if (local) {
|
||||
_tlbiel_pid(pid, RIC_FLUSH_TLB);
|
||||
} else {
|
||||
if (mm_needs_flush_escalation(mm))
|
||||
_tlbie_pid(pid, RIC_FLUSH_ALL);
|
||||
else
|
||||
_tlbie_pid(pid, RIC_FLUSH_TLB);
|
||||
if (cputlb_use_tlbie()) {
|
||||
if (mm_needs_flush_escalation(mm))
|
||||
_tlbie_pid(pid, RIC_FLUSH_ALL);
|
||||
else
|
||||
_tlbie_pid(pid, RIC_FLUSH_TLB);
|
||||
} else {
|
||||
_tlbiel_pid_multicast(mm, pid, RIC_FLUSH_TLB);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
bool hflush = flush_all_sizes;
|
||||
|
@ -707,8 +837,8 @@ is_local:
|
|||
gflush = false;
|
||||
}
|
||||
|
||||
asm volatile("ptesync": : :"memory");
|
||||
if (local) {
|
||||
asm volatile("ptesync": : :"memory");
|
||||
__tlbiel_va_range(start, end, pid, page_size, mmu_virtual_psize);
|
||||
if (hflush)
|
||||
__tlbiel_va_range(hstart, hend, pid,
|
||||
|
@ -717,7 +847,8 @@ is_local:
|
|||
__tlbiel_va_range(gstart, gend, pid,
|
||||
PUD_SIZE, MMU_PAGE_1G);
|
||||
asm volatile("ptesync": : :"memory");
|
||||
} else {
|
||||
} else if (cputlb_use_tlbie()) {
|
||||
asm volatile("ptesync": : :"memory");
|
||||
__tlbie_va_range(start, end, pid, page_size, mmu_virtual_psize);
|
||||
if (hflush)
|
||||
__tlbie_va_range(hstart, hend, pid,
|
||||
|
@ -727,6 +858,15 @@ is_local:
|
|||
PUD_SIZE, MMU_PAGE_1G);
|
||||
fixup_tlbie();
|
||||
asm volatile("eieio; tlbsync; ptesync": : :"memory");
|
||||
} else {
|
||||
_tlbiel_va_range_multicast(mm,
|
||||
start, end, pid, page_size, mmu_virtual_psize, false);
|
||||
if (hflush)
|
||||
_tlbiel_va_range_multicast(mm,
|
||||
hstart, hend, pid, PMD_SIZE, MMU_PAGE_2M, false);
|
||||
if (gflush)
|
||||
_tlbiel_va_range_multicast(mm,
|
||||
gstart, gend, pid, PUD_SIZE, MMU_PAGE_1G, false);
|
||||
}
|
||||
}
|
||||
preempt_enable();
|
||||
|
@ -903,16 +1043,26 @@ is_local:
|
|||
if (local) {
|
||||
_tlbiel_pid(pid, also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB);
|
||||
} else {
|
||||
if (mm_needs_flush_escalation(mm))
|
||||
also_pwc = true;
|
||||
if (cputlb_use_tlbie()) {
|
||||
if (mm_needs_flush_escalation(mm))
|
||||
also_pwc = true;
|
||||
|
||||
_tlbie_pid(pid,
|
||||
also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB);
|
||||
} else {
|
||||
_tlbiel_pid_multicast(mm, pid,
|
||||
also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB);
|
||||
}
|
||||
|
||||
_tlbie_pid(pid, also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB);
|
||||
}
|
||||
} else {
|
||||
if (local)
|
||||
_tlbiel_va_range(start, end, pid, page_size, psize, also_pwc);
|
||||
else
|
||||
else if (cputlb_use_tlbie())
|
||||
_tlbie_va_range(start, end, pid, page_size, psize, also_pwc);
|
||||
else
|
||||
_tlbiel_va_range_multicast(mm,
|
||||
start, end, pid, page_size, psize, also_pwc);
|
||||
}
|
||||
preempt_enable();
|
||||
}
|
||||
|
@ -954,7 +1104,11 @@ void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr)
|
|||
exit_flush_lazy_tlbs(mm);
|
||||
goto local;
|
||||
}
|
||||
_tlbie_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true);
|
||||
if (cputlb_use_tlbie())
|
||||
_tlbie_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true);
|
||||
else
|
||||
_tlbiel_va_range_multicast(mm,
|
||||
addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true);
|
||||
} else {
|
||||
local:
|
||||
_tlbiel_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true);
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
#include <linux/sched/task.h>
|
||||
|
||||
#include <asm/cputable.h>
|
||||
#include <asm/mmu.h>
|
||||
#include <misc/cxl-base.h>
|
||||
|
||||
#include "cxl.h"
|
||||
|
@ -315,6 +316,9 @@ static int __init init_cxl(void)
|
|||
{
|
||||
int rc = 0;
|
||||
|
||||
if (!tlbie_capable)
|
||||
return -EINVAL;
|
||||
|
||||
if ((rc = cxl_file_init()))
|
||||
return rc;
|
||||
|
||||
|
|
|
@ -2,12 +2,16 @@
|
|||
// Copyright 2017 IBM Corp.
|
||||
#include <linux/module.h>
|
||||
#include <linux/pci.h>
|
||||
#include <asm/mmu.h>
|
||||
#include "ocxl_internal.h"
|
||||
|
||||
static int __init init_ocxl(void)
|
||||
{
|
||||
int rc = 0;
|
||||
|
||||
if (!tlbie_capable)
|
||||
return -EINVAL;
|
||||
|
||||
rc = ocxl_file_init();
|
||||
if (rc)
|
||||
return rc;
|
||||
|
|
Loading…
Reference in New Issue