[ARM] barriers: improve xchg, bitops and atomic SMP barriers

Mathieu Desnoyers pointed out that the ARM barriers were lacking:

- cmpxchg, xchg and atomic add return need memory barriers on
  architectures which can reorder the relative order in which memory
  read/writes can be seen between CPUs, which seems to include recent
  ARM architectures. Those barriers are currently missing on ARM.

- test_and_xxx_bit were missing SMP barriers.

So put these barriers in.  Provide separate atomic_add/atomic_sub
operations which do not require barriers.

Reported-Reviewed-and-Acked-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
This commit is contained in:
Russell King 2009-05-25 20:58:00 +01:00 committed by Russell King
parent 290815710b
commit bac4e960b5
5 changed files with 71 additions and 13 deletions

View File

@ -114,3 +114,16 @@
.align 3; \ .align 3; \
.long 9999b,9001f; \ .long 9999b,9001f; \
.previous .previous
/*
* SMP data memory barrier
*/
.macro smp_dmb
#ifdef CONFIG_SMP
#if __LINUX_ARM_ARCH__ >= 7
dmb
#elif __LINUX_ARM_ARCH__ == 6
mcr p15, 0, r0, c7, c10, 5 @ dmb
#endif
#endif
.endm

View File

@ -44,11 +44,29 @@ static inline void atomic_set(atomic_t *v, int i)
: "cc"); : "cc");
} }
static inline void atomic_add(int i, atomic_t *v)
{
unsigned long tmp;
int result;
__asm__ __volatile__("@ atomic_add\n"
"1: ldrex %0, [%2]\n"
" add %0, %0, %3\n"
" strex %1, %0, [%2]\n"
" teq %1, #0\n"
" bne 1b"
: "=&r" (result), "=&r" (tmp)
: "r" (&v->counter), "Ir" (i)
: "cc");
}
static inline int atomic_add_return(int i, atomic_t *v) static inline int atomic_add_return(int i, atomic_t *v)
{ {
unsigned long tmp; unsigned long tmp;
int result; int result;
smp_mb();
__asm__ __volatile__("@ atomic_add_return\n" __asm__ __volatile__("@ atomic_add_return\n"
"1: ldrex %0, [%2]\n" "1: ldrex %0, [%2]\n"
" add %0, %0, %3\n" " add %0, %0, %3\n"
@ -59,14 +77,34 @@ static inline int atomic_add_return(int i, atomic_t *v)
: "r" (&v->counter), "Ir" (i) : "r" (&v->counter), "Ir" (i)
: "cc"); : "cc");
smp_mb();
return result; return result;
} }
static inline void atomic_sub(int i, atomic_t *v)
{
unsigned long tmp;
int result;
__asm__ __volatile__("@ atomic_sub\n"
"1: ldrex %0, [%2]\n"
" sub %0, %0, %3\n"
" strex %1, %0, [%2]\n"
" teq %1, #0\n"
" bne 1b"
: "=&r" (result), "=&r" (tmp)
: "r" (&v->counter), "Ir" (i)
: "cc");
}
static inline int atomic_sub_return(int i, atomic_t *v) static inline int atomic_sub_return(int i, atomic_t *v)
{ {
unsigned long tmp; unsigned long tmp;
int result; int result;
smp_mb();
__asm__ __volatile__("@ atomic_sub_return\n" __asm__ __volatile__("@ atomic_sub_return\n"
"1: ldrex %0, [%2]\n" "1: ldrex %0, [%2]\n"
" sub %0, %0, %3\n" " sub %0, %0, %3\n"
@ -77,6 +115,8 @@ static inline int atomic_sub_return(int i, atomic_t *v)
: "r" (&v->counter), "Ir" (i) : "r" (&v->counter), "Ir" (i)
: "cc"); : "cc");
smp_mb();
return result; return result;
} }
@ -84,6 +124,8 @@ static inline int atomic_cmpxchg(atomic_t *ptr, int old, int new)
{ {
unsigned long oldval, res; unsigned long oldval, res;
smp_mb();
do { do {
__asm__ __volatile__("@ atomic_cmpxchg\n" __asm__ __volatile__("@ atomic_cmpxchg\n"
"ldrex %1, [%2]\n" "ldrex %1, [%2]\n"
@ -95,6 +137,8 @@ static inline int atomic_cmpxchg(atomic_t *ptr, int old, int new)
: "cc"); : "cc");
} while (res); } while (res);
smp_mb();
return oldval; return oldval;
} }
@ -135,6 +179,7 @@ static inline int atomic_add_return(int i, atomic_t *v)
return val; return val;
} }
#define atomic_add(i, v) (void) atomic_add_return(i, v)
static inline int atomic_sub_return(int i, atomic_t *v) static inline int atomic_sub_return(int i, atomic_t *v)
{ {
@ -148,6 +193,7 @@ static inline int atomic_sub_return(int i, atomic_t *v)
return val; return val;
} }
#define atomic_sub(i, v) (void) atomic_sub_return(i, v)
static inline int atomic_cmpxchg(atomic_t *v, int old, int new) static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
{ {
@ -187,10 +233,8 @@ static inline int atomic_add_unless(atomic_t *v, int a, int u)
} }
#define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0) #define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0)
#define atomic_add(i, v) (void) atomic_add_return(i, v) #define atomic_inc(v) atomic_add(1, v)
#define atomic_inc(v) (void) atomic_add_return(1, v) #define atomic_dec(v) atomic_sub(1, v)
#define atomic_sub(i, v) (void) atomic_sub_return(i, v)
#define atomic_dec(v) (void) atomic_sub_return(1, v)
#define atomic_inc_and_test(v) (atomic_add_return(1, v) == 0) #define atomic_inc_and_test(v) (atomic_add_return(1, v) == 0)
#define atomic_dec_and_test(v) (atomic_sub_return(1, v) == 0) #define atomic_dec_and_test(v) (atomic_sub_return(1, v) == 0)
@ -200,11 +244,10 @@ static inline int atomic_add_unless(atomic_t *v, int a, int u)
#define atomic_add_negative(i,v) (atomic_add_return(i, v) < 0) #define atomic_add_negative(i,v) (atomic_add_return(i, v) < 0)
/* Atomic operations are already serializing on ARM */ #define smp_mb__before_atomic_dec() smp_mb()
#define smp_mb__before_atomic_dec() barrier() #define smp_mb__after_atomic_dec() smp_mb()
#define smp_mb__after_atomic_dec() barrier() #define smp_mb__before_atomic_inc() smp_mb()
#define smp_mb__before_atomic_inc() barrier() #define smp_mb__after_atomic_inc() smp_mb()
#define smp_mb__after_atomic_inc() barrier()
#include <asm-generic/atomic.h> #include <asm-generic/atomic.h>
#endif #endif

View File

@ -248,6 +248,8 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size
unsigned int tmp; unsigned int tmp;
#endif #endif
smp_mb();
switch (size) { switch (size) {
#if __LINUX_ARM_ARCH__ >= 6 #if __LINUX_ARM_ARCH__ >= 6
case 1: case 1:
@ -307,6 +309,7 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size
__bad_xchg(ptr, size), ret = 0; __bad_xchg(ptr, size), ret = 0;
break; break;
} }
smp_mb();
return ret; return ret;
} }

View File

@ -815,10 +815,7 @@ __kuser_helper_start:
*/ */
__kuser_memory_barrier: @ 0xffff0fa0 __kuser_memory_barrier: @ 0xffff0fa0
smp_dmb
#if __LINUX_ARM_ARCH__ >= 6 && defined(CONFIG_SMP)
mcr p15, 0, r0, c7, c10, 5 @ dmb
#endif
usr_ret lr usr_ret lr
.align 5 .align 5

View File

@ -18,12 +18,14 @@
mov r2, #1 mov r2, #1
add r1, r1, r0, lsr #3 @ Get byte offset add r1, r1, r0, lsr #3 @ Get byte offset
mov r3, r2, lsl r3 @ create mask mov r3, r2, lsl r3 @ create mask
smp_dmb
1: ldrexb r2, [r1] 1: ldrexb r2, [r1]
ands r0, r2, r3 @ save old value of bit ands r0, r2, r3 @ save old value of bit
\instr r2, r2, r3 @ toggle bit \instr r2, r2, r3 @ toggle bit
strexb ip, r2, [r1] strexb ip, r2, [r1]
cmp ip, #0 cmp ip, #0
bne 1b bne 1b
smp_dmb
cmp r0, #0 cmp r0, #0
movne r0, #1 movne r0, #1
2: mov pc, lr 2: mov pc, lr