powerpc/64s/radix: Optimize TLB range flush barriers
Short range flushes issue a sequences of tlbie(l) instructions for
individual effective addresses. These do not all require individual
barrier sequences, only one covering all tlbie(l) instructions.
Commit f7327e0ba3
("powerpc/mm/radix: Remove unnecessary ptesync")
made a similar optimization for tlbiel for PID flushing.
For tlbie, the ISA says:
The tlbsync instruction provides an ordering function for the
effects of all tlbie instructions executed by the thread executing
the tlbsync instruction, with respect to the memory barrier
created by a subsequent ptesync instruction executed by the same
thread.
Time to munmap 30 pages of memory (after mmap, touch):
local global
vanilla 10.9us 22.3us
patched 3.4us 14.4us
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
This commit is contained in:
parent
a54c61f46e
commit
14001c6093
|
@ -84,7 +84,7 @@ static inline void _tlbie_pid(unsigned long pid, unsigned long ric)
|
|||
trace_tlbie(0, 0, rb, rs, ric, prs, r);
|
||||
}
|
||||
|
||||
static inline void _tlbiel_va(unsigned long va, unsigned long pid,
|
||||
static inline void __tlbiel_va(unsigned long va, unsigned long pid,
|
||||
unsigned long ap, unsigned long ric)
|
||||
{
|
||||
unsigned long rb,rs,prs,r;
|
||||
|
@ -95,14 +95,20 @@ static inline void _tlbiel_va(unsigned long va, unsigned long pid,
|
|||
prs = 1; /* process scoped */
|
||||
r = 1; /* raidx format */
|
||||
|
||||
asm volatile("ptesync": : :"memory");
|
||||
asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
|
||||
: : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
|
||||
asm volatile("ptesync": : :"memory");
|
||||
trace_tlbie(0, 1, rb, rs, ric, prs, r);
|
||||
}
|
||||
|
||||
static inline void _tlbie_va(unsigned long va, unsigned long pid,
|
||||
static inline void _tlbiel_va(unsigned long va, unsigned long pid,
|
||||
unsigned long ap, unsigned long ric)
|
||||
{
|
||||
asm volatile("ptesync": : :"memory");
|
||||
__tlbiel_va(va, pid, ap, ric);
|
||||
asm volatile("ptesync": : :"memory");
|
||||
}
|
||||
|
||||
static inline void __tlbie_va(unsigned long va, unsigned long pid,
|
||||
unsigned long ap, unsigned long ric)
|
||||
{
|
||||
unsigned long rb,rs,prs,r;
|
||||
|
@ -113,13 +119,20 @@ static inline void _tlbie_va(unsigned long va, unsigned long pid,
|
|||
prs = 1; /* process scoped */
|
||||
r = 1; /* raidx format */
|
||||
|
||||
asm volatile("ptesync": : :"memory");
|
||||
asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
|
||||
: : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
|
||||
asm volatile("eieio; tlbsync; ptesync": : :"memory");
|
||||
trace_tlbie(0, 0, rb, rs, ric, prs, r);
|
||||
}
|
||||
|
||||
static inline void _tlbie_va(unsigned long va, unsigned long pid,
|
||||
unsigned long ap, unsigned long ric)
|
||||
{
|
||||
asm volatile("ptesync": : :"memory");
|
||||
__tlbie_va(va, pid, ap, ric);
|
||||
asm volatile("eieio; tlbsync; ptesync": : :"memory");
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Base TLB flushing operations:
|
||||
*
|
||||
|
@ -341,13 +354,17 @@ void radix__flush_tlb_range_psize(struct mm_struct *mm, unsigned long start,
|
|||
else
|
||||
_tlbie_pid(pid, RIC_FLUSH_TLB);
|
||||
} else {
|
||||
asm volatile("ptesync": : :"memory");
|
||||
for (addr = start; addr < end; addr += page_size) {
|
||||
|
||||
if (local)
|
||||
_tlbiel_va(addr, pid, ap, RIC_FLUSH_TLB);
|
||||
__tlbiel_va(addr, pid, ap, RIC_FLUSH_TLB);
|
||||
else
|
||||
_tlbie_va(addr, pid, ap, RIC_FLUSH_TLB);
|
||||
__tlbie_va(addr, pid, ap, RIC_FLUSH_TLB);
|
||||
}
|
||||
if (local)
|
||||
asm volatile("ptesync": : :"memory");
|
||||
else
|
||||
asm volatile("eieio; tlbsync; ptesync": : :"memory");
|
||||
}
|
||||
preempt_enable();
|
||||
}
|
||||
|
@ -378,6 +395,7 @@ void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr)
|
|||
_tlbie_pid(pid, RIC_FLUSH_PWC);
|
||||
|
||||
/* Then iterate the pages */
|
||||
asm volatile("ptesync": : :"memory");
|
||||
end = addr + HPAGE_PMD_SIZE;
|
||||
for (; addr < end; addr += PAGE_SIZE) {
|
||||
if (local)
|
||||
|
@ -386,6 +404,11 @@ void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr)
|
|||
_tlbie_va(addr, pid, ap, RIC_FLUSH_TLB);
|
||||
}
|
||||
|
||||
if (local)
|
||||
asm volatile("ptesync": : :"memory");
|
||||
else
|
||||
asm volatile("eieio; tlbsync; ptesync": : :"memory");
|
||||
|
||||
preempt_enable();
|
||||
}
|
||||
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
|
||||
|
|
Loading…
Reference in New Issue