powerpc, membarrier: Skip memory barrier in switch_mm()
Allow PowerPC to skip the full memory barrier in switch_mm(), and only issue the barrier when scheduling into a task belonging to a process that has registered to use expedited private. Threads targeting the same VM but which belong to different thread groups is a tricky case. It has a few consequences: It turns out that we cannot rely on get_nr_threads(p) to count the number of threads using a VM. We can use (atomic_read(&mm->mm_users) == 1 && get_nr_threads(p) == 1) instead to skip the synchronize_sched() for cases where the VM only has a single user, and that user only has a single thread. It also turns out that we cannot use for_each_thread() to set thread flags in all threads using a VM, as it only iterates on the thread group. Therefore, test the membarrier state variable directly rather than relying on thread flags. This means membarrier_register_private_expedited() needs to set the MEMBARRIER_STATE_PRIVATE_EXPEDITED flag, issue synchronize_sched(), and only then set MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY which allows private expedited membarrier commands to succeed. membarrier_arch_switch_mm() now tests for the MEMBARRIER_STATE_PRIVATE_EXPEDITED flag. Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com> Acked-by: Thomas Gleixner <tglx@linutronix.de> Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Alan Stern <stern@rowland.harvard.edu> Cc: Alexander Viro <viro@zeniv.linux.org.uk> Cc: Andrea Parri <parri.andrea@gmail.com> Cc: Andrew Hunter <ahh@google.com> Cc: Andy Lutomirski <luto@kernel.org> Cc: Avi Kivity <avi@scylladb.com> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Boqun Feng <boqun.feng@gmail.com> Cc: Dave Watson <davejwatson@fb.com> Cc: David Sehr <sehr@google.com> Cc: Greg Hackmann <ghackmann@google.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Maged Michael <maged.michael@gmail.com> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: Nicholas Piggin <npiggin@gmail.com> Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Cc: Paul Mackerras <paulus@samba.org> Cc: Russell King <linux@armlinux.org.uk> Cc: Will Deacon <will.deacon@arm.com> Cc: linux-api@vger.kernel.org Cc: linux-arch@vger.kernel.org Cc: linuxppc-dev@lists.ozlabs.org Link: http://lkml.kernel.org/r/20180129202020.8515-3-mathieu.desnoyers@efficios.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
parent
667ca1ec7c
commit
3ccfebedd8
|
@ -8944,6 +8944,7 @@ L: linux-kernel@vger.kernel.org
|
||||||
S: Supported
|
S: Supported
|
||||||
F: kernel/sched/membarrier.c
|
F: kernel/sched/membarrier.c
|
||||||
F: include/uapi/linux/membarrier.h
|
F: include/uapi/linux/membarrier.h
|
||||||
|
F: arch/powerpc/include/asm/membarrier.h
|
||||||
|
|
||||||
MEMORY MANAGEMENT
|
MEMORY MANAGEMENT
|
||||||
L: linux-mm@kvack.org
|
L: linux-mm@kvack.org
|
||||||
|
|
|
@ -140,6 +140,7 @@ config PPC
|
||||||
select ARCH_HAS_FORTIFY_SOURCE
|
select ARCH_HAS_FORTIFY_SOURCE
|
||||||
select ARCH_HAS_GCOV_PROFILE_ALL
|
select ARCH_HAS_GCOV_PROFILE_ALL
|
||||||
select ARCH_HAS_PMEM_API if PPC64
|
select ARCH_HAS_PMEM_API if PPC64
|
||||||
|
select ARCH_HAS_MEMBARRIER_CALLBACKS
|
||||||
select ARCH_HAS_SCALED_CPUTIME if VIRT_CPU_ACCOUNTING_NATIVE
|
select ARCH_HAS_SCALED_CPUTIME if VIRT_CPU_ACCOUNTING_NATIVE
|
||||||
select ARCH_HAS_SG_CHAIN
|
select ARCH_HAS_SG_CHAIN
|
||||||
select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
|
select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
|
||||||
|
|
|
@ -0,0 +1,26 @@
|
||||||
|
#ifndef _ASM_POWERPC_MEMBARRIER_H
|
||||||
|
#define _ASM_POWERPC_MEMBARRIER_H
|
||||||
|
|
||||||
|
static inline void membarrier_arch_switch_mm(struct mm_struct *prev,
|
||||||
|
struct mm_struct *next,
|
||||||
|
struct task_struct *tsk)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* Only need the full barrier when switching between processes.
|
||||||
|
* Barrier when switching from kernel to userspace is not
|
||||||
|
* required here, given that it is implied by mmdrop(). Barrier
|
||||||
|
* when switching from userspace to kernel is not needed after
|
||||||
|
* store to rq->curr.
|
||||||
|
*/
|
||||||
|
if (likely(!(atomic_read(&next->membarrier_state) &
|
||||||
|
MEMBARRIER_STATE_PRIVATE_EXPEDITED) || !prev))
|
||||||
|
return;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The membarrier system call requires a full memory barrier
|
||||||
|
* after storing to rq->curr, before going back to user-space.
|
||||||
|
*/
|
||||||
|
smp_mb();
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* _ASM_POWERPC_MEMBARRIER_H */
|
|
@ -12,6 +12,7 @@
|
||||||
|
|
||||||
#include <linux/mm.h>
|
#include <linux/mm.h>
|
||||||
#include <linux/cpu.h>
|
#include <linux/cpu.h>
|
||||||
|
#include <linux/sched/mm.h>
|
||||||
|
|
||||||
#include <asm/mmu_context.h>
|
#include <asm/mmu_context.h>
|
||||||
|
|
||||||
|
@ -58,6 +59,10 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
|
||||||
*
|
*
|
||||||
* On the read side the barrier is in pte_xchg(), which orders
|
* On the read side the barrier is in pte_xchg(), which orders
|
||||||
* the store to the PTE vs the load of mm_cpumask.
|
* the store to the PTE vs the load of mm_cpumask.
|
||||||
|
*
|
||||||
|
* This full barrier is needed by membarrier when switching
|
||||||
|
* between processes after store to rq->curr, before user-space
|
||||||
|
* memory accesses.
|
||||||
*/
|
*/
|
||||||
smp_mb();
|
smp_mb();
|
||||||
|
|
||||||
|
@ -80,6 +85,8 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
|
||||||
|
|
||||||
if (new_on_cpu)
|
if (new_on_cpu)
|
||||||
radix_kvm_prefetch_workaround(next);
|
radix_kvm_prefetch_workaround(next);
|
||||||
|
else
|
||||||
|
membarrier_arch_switch_mm(prev, next, tsk);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The actual HW switching method differs between the various
|
* The actual HW switching method differs between the various
|
||||||
|
|
|
@ -215,14 +215,25 @@ static inline void memalloc_noreclaim_restore(unsigned int flags)
|
||||||
#ifdef CONFIG_MEMBARRIER
|
#ifdef CONFIG_MEMBARRIER
|
||||||
enum {
|
enum {
|
||||||
MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY = (1U << 0),
|
MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY = (1U << 0),
|
||||||
MEMBARRIER_STATE_SWITCH_MM = (1U << 1),
|
MEMBARRIER_STATE_PRIVATE_EXPEDITED = (1U << 1),
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#ifdef CONFIG_ARCH_HAS_MEMBARRIER_CALLBACKS
|
||||||
|
#include <asm/membarrier.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
static inline void membarrier_execve(struct task_struct *t)
|
static inline void membarrier_execve(struct task_struct *t)
|
||||||
{
|
{
|
||||||
atomic_set(&t->mm->membarrier_state, 0);
|
atomic_set(&t->mm->membarrier_state, 0);
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
|
#ifdef CONFIG_ARCH_HAS_MEMBARRIER_CALLBACKS
|
||||||
|
static inline void membarrier_arch_switch_mm(struct mm_struct *prev,
|
||||||
|
struct mm_struct *next,
|
||||||
|
struct task_struct *tsk)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
#endif
|
||||||
static inline void membarrier_execve(struct task_struct *t)
|
static inline void membarrier_execve(struct task_struct *t)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
|
@ -1412,6 +1412,9 @@ config USERFAULTFD
|
||||||
Enable the userfaultfd() system call that allows to intercept and
|
Enable the userfaultfd() system call that allows to intercept and
|
||||||
handle page faults in userland.
|
handle page faults in userland.
|
||||||
|
|
||||||
|
config ARCH_HAS_MEMBARRIER_CALLBACKS
|
||||||
|
bool
|
||||||
|
|
||||||
config EMBEDDED
|
config EMBEDDED
|
||||||
bool "Embedded system"
|
bool "Embedded system"
|
||||||
option allnoconfig_y
|
option allnoconfig_y
|
||||||
|
|
|
@ -2698,16 +2698,6 @@ static struct rq *finish_task_switch(struct task_struct *prev)
|
||||||
prev_state = prev->state;
|
prev_state = prev->state;
|
||||||
vtime_task_switch(prev);
|
vtime_task_switch(prev);
|
||||||
perf_event_task_sched_in(prev, current);
|
perf_event_task_sched_in(prev, current);
|
||||||
/*
|
|
||||||
* The membarrier system call requires a full memory barrier
|
|
||||||
* after storing to rq->curr, before going back to user-space.
|
|
||||||
*
|
|
||||||
* TODO: This smp_mb__after_unlock_lock can go away if PPC end
|
|
||||||
* up adding a full barrier to switch_mm(), or we should figure
|
|
||||||
* out if a smp_mb__after_unlock_lock is really the proper API
|
|
||||||
* to use.
|
|
||||||
*/
|
|
||||||
smp_mb__after_unlock_lock();
|
|
||||||
finish_task(prev);
|
finish_task(prev);
|
||||||
finish_lock_switch(rq);
|
finish_lock_switch(rq);
|
||||||
finish_arch_post_lock_switch();
|
finish_arch_post_lock_switch();
|
||||||
|
|
|
@ -118,6 +118,14 @@ static void membarrier_register_private_expedited(void)
|
||||||
if (atomic_read(&mm->membarrier_state)
|
if (atomic_read(&mm->membarrier_state)
|
||||||
& MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY)
|
& MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY)
|
||||||
return;
|
return;
|
||||||
|
atomic_or(MEMBARRIER_STATE_PRIVATE_EXPEDITED, &mm->membarrier_state);
|
||||||
|
if (!(atomic_read(&mm->mm_users) == 1 && get_nr_threads(p) == 1)) {
|
||||||
|
/*
|
||||||
|
* Ensure all future scheduler executions will observe the
|
||||||
|
* new thread flag state for this process.
|
||||||
|
*/
|
||||||
|
synchronize_sched();
|
||||||
|
}
|
||||||
atomic_or(MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY,
|
atomic_or(MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY,
|
||||||
&mm->membarrier_state);
|
&mm->membarrier_state);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue