riscv: Make __flush_tlb_range() loop over pte instead of flushing the whole tlb
[ Upstream commit 9d4e8d5fa7dbbb606b355f40d918a1feef821bc5 ] Currently, when the range to flush covers more than one page (a 4K page or a hugepage), __flush_tlb_range() flushes the whole tlb. Flushing the whole tlb comes with a greater cost than flushing a single entry so we should flush single entries up to a certain threshold so that: threshold * cost of flushing a single entry < cost of flushing the whole tlb. Co-developed-by: Mayuresh Chitale <mchitale@ventanamicro.com> Signed-off-by: Mayuresh Chitale <mchitale@ventanamicro.com> Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com> Reviewed-by: Andrew Jones <ajones@ventanamicro.com> Tested-by: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com> # On RZ/Five SMARC Reviewed-by: Samuel Holland <samuel.holland@sifive.com> Tested-by: Samuel Holland <samuel.holland@sifive.com> Link: https://lore.kernel.org/r/20231030133027.19542-4-alexghiti@rivosinc.com Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com> Stable-dep-of: d9807d60c145 ("riscv: mm: execute local TLB flush after populating vmemmap") Signed-off-by: Sasha Levin <sashal@kernel.org>
This commit is contained in:
parent
72afe05d62
commit
53a38f8f75
arch/riscv
|
@ -273,9 +273,6 @@ void sbi_set_timer(uint64_t stime_value);
|
|||
void sbi_shutdown(void);
|
||||
void sbi_send_ipi(unsigned int cpu);
|
||||
int sbi_remote_fence_i(const struct cpumask *cpu_mask);
|
||||
int sbi_remote_sfence_vma(const struct cpumask *cpu_mask,
|
||||
unsigned long start,
|
||||
unsigned long size);
|
||||
|
||||
int sbi_remote_sfence_vma_asid(const struct cpumask *cpu_mask,
|
||||
unsigned long start,
|
||||
|
|
|
@ -11,6 +11,9 @@
|
|||
#include <asm/smp.h>
|
||||
#include <asm/errata_list.h>
|
||||
|
||||
#define FLUSH_TLB_MAX_SIZE ((unsigned long)-1)
|
||||
#define FLUSH_TLB_NO_ASID ((unsigned long)-1)
|
||||
|
||||
#ifdef CONFIG_MMU
|
||||
extern unsigned long asid_mask;
|
||||
|
||||
|
|
|
@ -11,6 +11,7 @@
|
|||
#include <linux/reboot.h>
|
||||
#include <asm/sbi.h>
|
||||
#include <asm/smp.h>
|
||||
#include <asm/tlbflush.h>
|
||||
|
||||
/* default SBI version is 0.1 */
|
||||
unsigned long sbi_spec_version __ro_after_init = SBI_SPEC_VERSION_DEFAULT;
|
||||
|
@ -376,32 +377,15 @@ int sbi_remote_fence_i(const struct cpumask *cpu_mask)
|
|||
}
|
||||
EXPORT_SYMBOL(sbi_remote_fence_i);
|
||||
|
||||
/**
|
||||
* sbi_remote_sfence_vma() - Execute SFENCE.VMA instructions on given remote
|
||||
* harts for the specified virtual address range.
|
||||
* @cpu_mask: A cpu mask containing all the target harts.
|
||||
* @start: Start of the virtual address
|
||||
* @size: Total size of the virtual address range.
|
||||
*
|
||||
* Return: 0 on success, appropriate linux error code otherwise.
|
||||
*/
|
||||
int sbi_remote_sfence_vma(const struct cpumask *cpu_mask,
|
||||
unsigned long start,
|
||||
unsigned long size)
|
||||
{
|
||||
return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_SFENCE_VMA,
|
||||
cpu_mask, start, size, 0, 0);
|
||||
}
|
||||
EXPORT_SYMBOL(sbi_remote_sfence_vma);
|
||||
|
||||
/**
|
||||
* sbi_remote_sfence_vma_asid() - Execute SFENCE.VMA instructions on given
|
||||
* remote harts for a virtual address range belonging to a specific ASID.
|
||||
* remote harts for a virtual address range belonging to a specific ASID or not.
|
||||
*
|
||||
* @cpu_mask: A cpu mask containing all the target harts.
|
||||
* @start: Start of the virtual address
|
||||
* @size: Total size of the virtual address range.
|
||||
* @asid: The value of address space identifier (ASID).
|
||||
* @asid: The value of address space identifier (ASID), or FLUSH_TLB_NO_ASID
|
||||
* for flushing all address spaces.
|
||||
*
|
||||
* Return: 0 on success, appropriate linux error code otherwise.
|
||||
*/
|
||||
|
@ -410,8 +394,12 @@ int sbi_remote_sfence_vma_asid(const struct cpumask *cpu_mask,
|
|||
unsigned long size,
|
||||
unsigned long asid)
|
||||
{
|
||||
return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_SFENCE_VMA_ASID,
|
||||
cpu_mask, start, size, asid, 0);
|
||||
if (asid == FLUSH_TLB_NO_ASID)
|
||||
return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_SFENCE_VMA,
|
||||
cpu_mask, start, size, 0, 0);
|
||||
else
|
||||
return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_SFENCE_VMA_ASID,
|
||||
cpu_mask, start, size, asid, 0);
|
||||
}
|
||||
EXPORT_SYMBOL(sbi_remote_sfence_vma_asid);
|
||||
|
||||
|
|
|
@ -8,28 +8,50 @@
|
|||
|
||||
static inline void local_flush_tlb_all_asid(unsigned long asid)
|
||||
{
|
||||
__asm__ __volatile__ ("sfence.vma x0, %0"
|
||||
:
|
||||
: "r" (asid)
|
||||
: "memory");
|
||||
if (asid != FLUSH_TLB_NO_ASID)
|
||||
__asm__ __volatile__ ("sfence.vma x0, %0"
|
||||
:
|
||||
: "r" (asid)
|
||||
: "memory");
|
||||
else
|
||||
local_flush_tlb_all();
|
||||
}
|
||||
|
||||
static inline void local_flush_tlb_page_asid(unsigned long addr,
|
||||
unsigned long asid)
|
||||
{
|
||||
__asm__ __volatile__ ("sfence.vma %0, %1"
|
||||
:
|
||||
: "r" (addr), "r" (asid)
|
||||
: "memory");
|
||||
if (asid != FLUSH_TLB_NO_ASID)
|
||||
__asm__ __volatile__ ("sfence.vma %0, %1"
|
||||
:
|
||||
: "r" (addr), "r" (asid)
|
||||
: "memory");
|
||||
else
|
||||
local_flush_tlb_page(addr);
|
||||
}
|
||||
|
||||
static inline void local_flush_tlb_range(unsigned long start,
|
||||
unsigned long size, unsigned long stride)
|
||||
/*
|
||||
* Flush entire TLB if number of entries to be flushed is greater
|
||||
* than the threshold below.
|
||||
*/
|
||||
static unsigned long tlb_flush_all_threshold __read_mostly = 64;
|
||||
|
||||
static void local_flush_tlb_range_threshold_asid(unsigned long start,
|
||||
unsigned long size,
|
||||
unsigned long stride,
|
||||
unsigned long asid)
|
||||
{
|
||||
if (size <= stride)
|
||||
local_flush_tlb_page(start);
|
||||
else
|
||||
local_flush_tlb_all();
|
||||
unsigned long nr_ptes_in_range = DIV_ROUND_UP(size, stride);
|
||||
int i;
|
||||
|
||||
if (nr_ptes_in_range > tlb_flush_all_threshold) {
|
||||
local_flush_tlb_all_asid(asid);
|
||||
return;
|
||||
}
|
||||
|
||||
for (i = 0; i < nr_ptes_in_range; ++i) {
|
||||
local_flush_tlb_page_asid(start, asid);
|
||||
start += stride;
|
||||
}
|
||||
}
|
||||
|
||||
static inline void local_flush_tlb_range_asid(unsigned long start,
|
||||
|
@ -37,8 +59,10 @@ static inline void local_flush_tlb_range_asid(unsigned long start,
|
|||
{
|
||||
if (size <= stride)
|
||||
local_flush_tlb_page_asid(start, asid);
|
||||
else
|
||||
else if (size == FLUSH_TLB_MAX_SIZE)
|
||||
local_flush_tlb_all_asid(asid);
|
||||
else
|
||||
local_flush_tlb_range_threshold_asid(start, size, stride, asid);
|
||||
}
|
||||
|
||||
static void __ipi_flush_tlb_all(void *info)
|
||||
|
@ -51,7 +75,7 @@ void flush_tlb_all(void)
|
|||
if (riscv_use_ipi_for_rfence())
|
||||
on_each_cpu(__ipi_flush_tlb_all, NULL, 1);
|
||||
else
|
||||
sbi_remote_sfence_vma(NULL, 0, -1);
|
||||
sbi_remote_sfence_vma_asid(NULL, 0, FLUSH_TLB_MAX_SIZE, FLUSH_TLB_NO_ASID);
|
||||
}
|
||||
|
||||
struct flush_tlb_range_data {
|
||||
|
@ -68,18 +92,12 @@ static void __ipi_flush_tlb_range_asid(void *info)
|
|||
local_flush_tlb_range_asid(d->start, d->size, d->stride, d->asid);
|
||||
}
|
||||
|
||||
static void __ipi_flush_tlb_range(void *info)
|
||||
{
|
||||
struct flush_tlb_range_data *d = info;
|
||||
|
||||
local_flush_tlb_range(d->start, d->size, d->stride);
|
||||
}
|
||||
|
||||
static void __flush_tlb_range(struct mm_struct *mm, unsigned long start,
|
||||
unsigned long size, unsigned long stride)
|
||||
{
|
||||
struct flush_tlb_range_data ftd;
|
||||
struct cpumask *cmask = mm_cpumask(mm);
|
||||
unsigned long asid = FLUSH_TLB_NO_ASID;
|
||||
unsigned int cpuid;
|
||||
bool broadcast;
|
||||
|
||||
|
@ -89,39 +107,24 @@ static void __flush_tlb_range(struct mm_struct *mm, unsigned long start,
|
|||
cpuid = get_cpu();
|
||||
/* check if the tlbflush needs to be sent to other CPUs */
|
||||
broadcast = cpumask_any_but(cmask, cpuid) < nr_cpu_ids;
|
||||
if (static_branch_unlikely(&use_asid_allocator)) {
|
||||
unsigned long asid = atomic_long_read(&mm->context.id) & asid_mask;
|
||||
|
||||
if (broadcast) {
|
||||
if (riscv_use_ipi_for_rfence()) {
|
||||
ftd.asid = asid;
|
||||
ftd.start = start;
|
||||
ftd.size = size;
|
||||
ftd.stride = stride;
|
||||
on_each_cpu_mask(cmask,
|
||||
__ipi_flush_tlb_range_asid,
|
||||
&ftd, 1);
|
||||
} else
|
||||
sbi_remote_sfence_vma_asid(cmask,
|
||||
start, size, asid);
|
||||
} else {
|
||||
local_flush_tlb_range_asid(start, size, stride, asid);
|
||||
}
|
||||
if (static_branch_unlikely(&use_asid_allocator))
|
||||
asid = atomic_long_read(&mm->context.id) & asid_mask;
|
||||
|
||||
if (broadcast) {
|
||||
if (riscv_use_ipi_for_rfence()) {
|
||||
ftd.asid = asid;
|
||||
ftd.start = start;
|
||||
ftd.size = size;
|
||||
ftd.stride = stride;
|
||||
on_each_cpu_mask(cmask,
|
||||
__ipi_flush_tlb_range_asid,
|
||||
&ftd, 1);
|
||||
} else
|
||||
sbi_remote_sfence_vma_asid(cmask,
|
||||
start, size, asid);
|
||||
} else {
|
||||
if (broadcast) {
|
||||
if (riscv_use_ipi_for_rfence()) {
|
||||
ftd.asid = 0;
|
||||
ftd.start = start;
|
||||
ftd.size = size;
|
||||
ftd.stride = stride;
|
||||
on_each_cpu_mask(cmask,
|
||||
__ipi_flush_tlb_range,
|
||||
&ftd, 1);
|
||||
} else
|
||||
sbi_remote_sfence_vma(cmask, start, size);
|
||||
} else {
|
||||
local_flush_tlb_range(start, size, stride);
|
||||
}
|
||||
local_flush_tlb_range_asid(start, size, stride, asid);
|
||||
}
|
||||
|
||||
put_cpu();
|
||||
|
@ -129,7 +132,7 @@ static void __flush_tlb_range(struct mm_struct *mm, unsigned long start,
|
|||
|
||||
void flush_tlb_mm(struct mm_struct *mm)
|
||||
{
|
||||
__flush_tlb_range(mm, 0, -1, PAGE_SIZE);
|
||||
__flush_tlb_range(mm, 0, FLUSH_TLB_MAX_SIZE, PAGE_SIZE);
|
||||
}
|
||||
|
||||
void flush_tlb_mm_range(struct mm_struct *mm,
|
||||
|
|
Loading…
Reference in New Issue