mips/atomic: Fix loongson_llsc_mb() wreckage
The comment describing the loongson_llsc_mb() reorder case doesn't
make any sense what so ever. Instruction re-ordering is not an SMP
artifact, but rather a CPU local phenomenon. Clarify the comment by
explaining that these issue cause a coherence fail.
For the branch speculation case; if futex_atomic_cmpxchg_inatomic()
needs one at the bne branch target, then surely the normal
__cmpxch_asm() implementation does too. We cannot rely on the
barriers from cmpxchg() because cmpxchg_local() is implemented with
the same macro, and branch prediction and speculation are, too, CPU
local.
Fixes: e02e07e312
("MIPS: Loongson: Introduce and use loongson_llsc_mb()")
Cc: Huacai Chen <chenhc@lemote.com>
Cc: Huang Pei <huangpei@loongson.cn>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Paul Burton <paul.burton@mips.com>
This commit is contained in:
parent
dfc8d8de85
commit
1c6c1ca318
|
@ -193,6 +193,7 @@ static __inline__ int atomic_sub_if_positive(int i, atomic_t * v)
|
|||
if (kernel_uses_llsc) {
|
||||
int temp;
|
||||
|
||||
loongson_llsc_mb();
|
||||
__asm__ __volatile__(
|
||||
" .set push \n"
|
||||
" .set "MIPS_ISA_LEVEL" \n"
|
||||
|
@ -200,12 +201,12 @@ static __inline__ int atomic_sub_if_positive(int i, atomic_t * v)
|
|||
" .set pop \n"
|
||||
" subu %0, %1, %3 \n"
|
||||
" move %1, %0 \n"
|
||||
" bltz %0, 1f \n"
|
||||
" bltz %0, 2f \n"
|
||||
" .set push \n"
|
||||
" .set "MIPS_ISA_LEVEL" \n"
|
||||
" sc %1, %2 \n"
|
||||
"\t" __scbeqz " %1, 1b \n"
|
||||
"1: \n"
|
||||
"2: \n"
|
||||
" .set pop \n"
|
||||
: "=&r" (result), "=&r" (temp),
|
||||
"+" GCC_OFF_SMALL_ASM() (v->counter)
|
||||
|
|
|
@ -238,36 +238,40 @@
|
|||
|
||||
/*
|
||||
* Some Loongson 3 CPUs have a bug wherein execution of a memory access (load,
|
||||
* store or pref) in between an ll & sc can cause the sc instruction to
|
||||
* store or prefetch) in between an LL & SC can cause the SC instruction to
|
||||
* erroneously succeed, breaking atomicity. Whilst it's unusual to write code
|
||||
* containing such sequences, this bug bites harder than we might otherwise
|
||||
* expect due to reordering & speculation:
|
||||
*
|
||||
* 1) A memory access appearing prior to the ll in program order may actually
|
||||
* be executed after the ll - this is the reordering case.
|
||||
* 1) A memory access appearing prior to the LL in program order may actually
|
||||
* be executed after the LL - this is the reordering case.
|
||||
*
|
||||
* In order to avoid this we need to place a memory barrier (ie. a sync
|
||||
* instruction) prior to every ll instruction, in between it & any earlier
|
||||
* memory access instructions. Many of these cases are already covered by
|
||||
* smp_mb__before_llsc() but for the remaining cases, typically ones in
|
||||
* which multiple CPUs may operate on a memory location but ordering is not
|
||||
* usually guaranteed, we use loongson_llsc_mb() below.
|
||||
* In order to avoid this we need to place a memory barrier (ie. a SYNC
|
||||
* instruction) prior to every LL instruction, in between it and any earlier
|
||||
* memory access instructions.
|
||||
*
|
||||
* This reordering case is fixed by 3A R2 CPUs, ie. 3A2000 models and later.
|
||||
*
|
||||
* 2) If a conditional branch exists between an ll & sc with a target outside
|
||||
* of the ll-sc loop, for example an exit upon value mismatch in cmpxchg()
|
||||
* 2) If a conditional branch exists between an LL & SC with a target outside
|
||||
* of the LL-SC loop, for example an exit upon value mismatch in cmpxchg()
|
||||
* or similar, then misprediction of the branch may allow speculative
|
||||
* execution of memory accesses from outside of the ll-sc loop.
|
||||
* execution of memory accesses from outside of the LL-SC loop.
|
||||
*
|
||||
* In order to avoid this we need a memory barrier (ie. a sync instruction)
|
||||
* In order to avoid this we need a memory barrier (ie. a SYNC instruction)
|
||||
* at each affected branch target, for which we also use loongson_llsc_mb()
|
||||
* defined below.
|
||||
*
|
||||
* This case affects all current Loongson 3 CPUs.
|
||||
*
|
||||
* The above described cases cause an error in the cache coherence protocol;
|
||||
* such that the Invalidate of a competing LL-SC goes 'missing' and SC
|
||||
* erroneously observes its core still has Exclusive state and lets the SC
|
||||
* proceed.
|
||||
*
|
||||
* Therefore the error only occurs on SMP systems.
|
||||
*/
|
||||
#ifdef CONFIG_CPU_LOONGSON3_WORKAROUNDS /* Loongson-3's LLSC workaround */
|
||||
#define loongson_llsc_mb() __asm__ __volatile__(__WEAK_LLSC_MB : : :"memory")
|
||||
#define loongson_llsc_mb() __asm__ __volatile__("sync" : : :"memory")
|
||||
#else
|
||||
#define loongson_llsc_mb() do { } while (0)
|
||||
#endif
|
||||
|
|
|
@ -249,6 +249,7 @@ static inline int test_and_set_bit(unsigned long nr,
|
|||
unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
|
||||
unsigned long temp;
|
||||
|
||||
loongson_llsc_mb();
|
||||
do {
|
||||
__asm__ __volatile__(
|
||||
" .set push \n"
|
||||
|
@ -305,6 +306,7 @@ static inline int test_and_set_bit_lock(unsigned long nr,
|
|||
unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
|
||||
unsigned long temp;
|
||||
|
||||
loongson_llsc_mb();
|
||||
do {
|
||||
__asm__ __volatile__(
|
||||
" .set push \n"
|
||||
|
@ -364,6 +366,7 @@ static inline int test_and_clear_bit(unsigned long nr,
|
|||
unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
|
||||
unsigned long temp;
|
||||
|
||||
loongson_llsc_mb();
|
||||
do {
|
||||
__asm__ __volatile__(
|
||||
" " __LL "%0, %1 # test_and_clear_bit \n"
|
||||
|
@ -379,6 +382,7 @@ static inline int test_and_clear_bit(unsigned long nr,
|
|||
unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
|
||||
unsigned long temp;
|
||||
|
||||
loongson_llsc_mb();
|
||||
do {
|
||||
__asm__ __volatile__(
|
||||
" .set push \n"
|
||||
|
@ -438,6 +442,7 @@ static inline int test_and_change_bit(unsigned long nr,
|
|||
unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
|
||||
unsigned long temp;
|
||||
|
||||
loongson_llsc_mb();
|
||||
do {
|
||||
__asm__ __volatile__(
|
||||
" .set push \n"
|
||||
|
|
|
@ -46,6 +46,7 @@ extern unsigned long __xchg_called_with_bad_pointer(void)
|
|||
__typeof(*(m)) __ret; \
|
||||
\
|
||||
if (kernel_uses_llsc) { \
|
||||
loongson_llsc_mb(); \
|
||||
__asm__ __volatile__( \
|
||||
" .set push \n" \
|
||||
" .set noat \n" \
|
||||
|
@ -117,6 +118,7 @@ static inline unsigned long __xchg(volatile void *ptr, unsigned long x,
|
|||
__typeof(*(m)) __ret; \
|
||||
\
|
||||
if (kernel_uses_llsc) { \
|
||||
loongson_llsc_mb(); \
|
||||
__asm__ __volatile__( \
|
||||
" .set push \n" \
|
||||
" .set noat \n" \
|
||||
|
@ -134,6 +136,7 @@ static inline unsigned long __xchg(volatile void *ptr, unsigned long x,
|
|||
: "=&r" (__ret), "=" GCC_OFF_SMALL_ASM() (*m) \
|
||||
: GCC_OFF_SMALL_ASM() (*m), "Jr" (old), "Jr" (new) \
|
||||
: "memory"); \
|
||||
loongson_llsc_mb(); \
|
||||
} else { \
|
||||
unsigned long __flags; \
|
||||
\
|
||||
|
@ -229,6 +232,7 @@ static inline unsigned long __cmpxchg64(volatile void *ptr,
|
|||
*/
|
||||
local_irq_save(flags);
|
||||
|
||||
loongson_llsc_mb();
|
||||
asm volatile(
|
||||
" .set push \n"
|
||||
" .set " MIPS_ISA_ARCH_LEVEL " \n"
|
||||
|
@ -274,6 +278,7 @@ static inline unsigned long __cmpxchg64(volatile void *ptr,
|
|||
"r" (old),
|
||||
"r" (new)
|
||||
: "memory");
|
||||
loongson_llsc_mb();
|
||||
|
||||
local_irq_restore(flags);
|
||||
return ret;
|
||||
|
|
|
@ -132,6 +132,7 @@ static inline int mips_atomic_set(unsigned long addr, unsigned long new)
|
|||
[efault] "i" (-EFAULT)
|
||||
: "memory");
|
||||
} else if (cpu_has_llsc) {
|
||||
loongson_llsc_mb();
|
||||
__asm__ __volatile__ (
|
||||
" .set push \n"
|
||||
" .set "MIPS_ISA_ARCH_LEVEL" \n"
|
||||
|
|
Loading…
Reference in New Issue