From 4009338d62e9d1c21b230bd7ebab6c43c482430e Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 18 Aug 2011 11:33:39 -0700 Subject: [PATCH 01/12] x86, cmpxchg: has LOCK_PREFIX Not . Signed-off-by: Jeremy Fitzhardinge Link: http://lkml.kernel.org/r/4E5BCC40.3030501@goop.org Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/cmpxchg_32.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/include/asm/cmpxchg_32.h b/arch/x86/include/asm/cmpxchg_32.h index 3deb7250624c..024b694517ce 100644 --- a/arch/x86/include/asm/cmpxchg_32.h +++ b/arch/x86/include/asm/cmpxchg_32.h @@ -1,7 +1,7 @@ #ifndef _ASM_X86_CMPXCHG_32_H #define _ASM_X86_CMPXCHG_32_H -#include /* for LOCK_PREFIX */ +#include /* Provides LOCK_PREFIX */ /* * Note: if you use set64_bit(), __cmpxchg64(), or their variants, you From 416185bd5ac8a749ee43d1b0967b43782843d1a0 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 18 Aug 2011 11:34:46 -0700 Subject: [PATCH 02/12] x86, cmpxchg: Move 32-bit __cmpxchg_wrong_size to match 64 bit. Signed-off-by: Jeremy Fitzhardinge Link: http://lkml.kernel.org/r/4E5BCC40.3030501@goop.org Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/cmpxchg_32.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/x86/include/asm/cmpxchg_32.h b/arch/x86/include/asm/cmpxchg_32.h index 024b694517ce..59d8e36d1eed 100644 --- a/arch/x86/include/asm/cmpxchg_32.h +++ b/arch/x86/include/asm/cmpxchg_32.h @@ -9,6 +9,7 @@ */ extern void __xchg_wrong_size(void); +extern void __cmpxchg_wrong_size(void); /* * Note: no "lock" prefix even on SMP: xchg always implies lock anyway. @@ -84,8 +85,6 @@ static inline void set_64bit(volatile u64 *ptr, u64 value) : "memory"); } -extern void __cmpxchg_wrong_size(void); - /* * Atomic compare and exchange. Compare OLD with MEM, if identical, * store NEW in MEM. Return the initial value in MEM. Success is From 00a41546e8008b9944382eed1841c785f4fc8d9c Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 18 Aug 2011 11:40:22 -0700 Subject: [PATCH 03/12] x86, cmpxchg: Move 64-bit set64_bit() to match 32-bit Reduce arbitrary differences between 32 and 64 bits. Signed-off-by: Jeremy Fitzhardinge Link: http://lkml.kernel.org/r/4E5BCC40.3030501@goop.org Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/cmpxchg_64.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/x86/include/asm/cmpxchg_64.h b/arch/x86/include/asm/cmpxchg_64.h index 7cf5c0a24434..5bfa560ef14e 100644 --- a/arch/x86/include/asm/cmpxchg_64.h +++ b/arch/x86/include/asm/cmpxchg_64.h @@ -3,11 +3,6 @@ #include /* Provides LOCK_PREFIX */ -static inline void set_64bit(volatile u64 *ptr, u64 val) -{ - *ptr = val; -} - extern void __xchg_wrong_size(void); extern void __cmpxchg_wrong_size(void); @@ -66,6 +61,11 @@ extern void __cmpxchg_wrong_size(void); #define xchg(ptr, v) \ __xchg((v), (ptr), sizeof(*ptr)) +static inline void set_64bit(volatile u64 *ptr, u64 val) +{ + *ptr = val; +} + #define __HAVE_ARCH_CMPXCHG 1 /* From e9826380d83d1bda3ee5663bf3fa4667a6fbe60a Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 18 Aug 2011 11:48:06 -0700 Subject: [PATCH 04/12] x86, cmpxchg: Unify cmpxchg into cmpxchg.h Everything that's actually common between 32 and 64-bit is moved into cmpxchg.h. xchg/cmpxchg will fail with a link error if they're passed an unsupported size (which includes 64-bit args on 32-bit systems). Signed-off-by: Jeremy Fitzhardinge Link: http://lkml.kernel.org/r/4E5BCC40.3030501@goop.org Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/cmpxchg.h | 155 ++++++++++++++++++++++++++++++ arch/x86/include/asm/cmpxchg_32.h | 113 ---------------------- arch/x86/include/asm/cmpxchg_64.h | 131 ------------------------- 3 files changed, 155 insertions(+), 244 deletions(-) diff --git a/arch/x86/include/asm/cmpxchg.h b/arch/x86/include/asm/cmpxchg.h index a460fa088d4c..efe3ec778a58 100644 --- a/arch/x86/include/asm/cmpxchg.h +++ b/arch/x86/include/asm/cmpxchg.h @@ -1,5 +1,160 @@ +#ifndef ASM_X86_CMPXCHG_H +#define ASM_X86_CMPXCHG_H + +#include /* Provides LOCK_PREFIX */ + +/* Non-existant functions to indicate usage errors at link time. */ +extern void __xchg_wrong_size(void); +extern void __cmpxchg_wrong_size(void); + +/* + * Constants for operation sizes. On 32-bit, the 64-bit size it set to + * -1 because sizeof will never return -1, thereby making those switch + * case statements guaranteeed dead code which the compiler will + * eliminate, and allowing the "missing symbol in the default case" to + * indicate a usage error. + */ +#define __X86_CASE_B 1 +#define __X86_CASE_W 2 +#define __X86_CASE_L 4 +#ifdef CONFIG_64BIT +#define __X86_CASE_Q 8 +#else +#define __X86_CASE_Q -1 /* sizeof will never return -1 */ +#endif + +/* + * Note: no "lock" prefix even on SMP: xchg always implies lock anyway. + * Since this is generally used to protect other memory information, we + * use "asm volatile" and "memory" clobbers to prevent gcc from moving + * information around. + */ +#define __xchg(x, ptr, size) \ +({ \ + __typeof(*(ptr)) __x = (x); \ + switch (size) { \ + case __X86_CASE_B: \ + { \ + volatile u8 *__ptr = (volatile u8 *)(ptr); \ + asm volatile("xchgb %0,%1" \ + : "=q" (__x), "+m" (*__ptr) \ + : "0" (__x) \ + : "memory"); \ + break; \ + } \ + case __X86_CASE_W: \ + { \ + volatile u16 *__ptr = (volatile u16 *)(ptr); \ + asm volatile("xchgw %0,%1" \ + : "=r" (__x), "+m" (*__ptr) \ + : "0" (__x) \ + : "memory"); \ + break; \ + } \ + case __X86_CASE_L: \ + { \ + volatile u32 *__ptr = (volatile u32 *)(ptr); \ + asm volatile("xchgl %0,%1" \ + : "=r" (__x), "+m" (*__ptr) \ + : "0" (__x) \ + : "memory"); \ + break; \ + } \ + case __X86_CASE_Q: \ + { \ + volatile u64 *__ptr = (volatile u64 *)(ptr); \ + asm volatile("xchgq %0,%1" \ + : "=r" (__x), "+m" (*__ptr) \ + : "0" (__x) \ + : "memory"); \ + break; \ + } \ + default: \ + __xchg_wrong_size(); \ + } \ + __x; \ +}) + +#define xchg(ptr, v) \ + __xchg((v), (ptr), sizeof(*ptr)) + +/* + * Atomic compare and exchange. Compare OLD with MEM, if identical, + * store NEW in MEM. Return the initial value in MEM. Success is + * indicated by comparing RETURN with OLD. + */ +#define __raw_cmpxchg(ptr, old, new, size, lock) \ +({ \ + __typeof__(*(ptr)) __ret; \ + __typeof__(*(ptr)) __old = (old); \ + __typeof__(*(ptr)) __new = (new); \ + switch (size) { \ + case __X86_CASE_B: \ + { \ + volatile u8 *__ptr = (volatile u8 *)(ptr); \ + asm volatile(lock "cmpxchgb %2,%1" \ + : "=a" (__ret), "+m" (*__ptr) \ + : "q" (__new), "0" (__old) \ + : "memory"); \ + break; \ + } \ + case __X86_CASE_W: \ + { \ + volatile u16 *__ptr = (volatile u16 *)(ptr); \ + asm volatile(lock "cmpxchgw %2,%1" \ + : "=a" (__ret), "+m" (*__ptr) \ + : "r" (__new), "0" (__old) \ + : "memory"); \ + break; \ + } \ + case __X86_CASE_L: \ + { \ + volatile u32 *__ptr = (volatile u32 *)(ptr); \ + asm volatile(lock "cmpxchgl %2,%1" \ + : "=a" (__ret), "+m" (*__ptr) \ + : "r" (__new), "0" (__old) \ + : "memory"); \ + break; \ + } \ + case __X86_CASE_Q: \ + { \ + volatile u64 *__ptr = (volatile u64 *)(ptr); \ + asm volatile(lock "cmpxchgq %2,%1" \ + : "=a" (__ret), "+m" (*__ptr) \ + : "r" (__new), "0" (__old) \ + : "memory"); \ + break; \ + } \ + default: \ + __cmpxchg_wrong_size(); \ + } \ + __ret; \ +}) + +#define __cmpxchg(ptr, old, new, size) \ + __raw_cmpxchg((ptr), (old), (new), (size), LOCK_PREFIX) + +#define __sync_cmpxchg(ptr, old, new, size) \ + __raw_cmpxchg((ptr), (old), (new), (size), "lock; ") + +#define __cmpxchg_local(ptr, old, new, size) \ + __raw_cmpxchg((ptr), (old), (new), (size), "") + #ifdef CONFIG_X86_32 # include "cmpxchg_32.h" #else # include "cmpxchg_64.h" #endif + +#ifdef __HAVE_ARCH_CMPXCHG +#define cmpxchg(ptr, old, new) \ + __cmpxchg((ptr), (old), (new), sizeof(*ptr)) + +#define sync_cmpxchg(ptr, old, new) \ + __sync_cmpxchg((ptr), (old), (new), sizeof(*ptr)) + +#define cmpxchg_local(ptr, old, new) \ + __cmpxchg_local((ptr), (old), (new), sizeof(*ptr)) +#endif + +#endif /* ASM_X86_CMPXCHG_H */ diff --git a/arch/x86/include/asm/cmpxchg_32.h b/arch/x86/include/asm/cmpxchg_32.h index 59d8e36d1eed..fbebb07dd80b 100644 --- a/arch/x86/include/asm/cmpxchg_32.h +++ b/arch/x86/include/asm/cmpxchg_32.h @@ -1,62 +1,11 @@ #ifndef _ASM_X86_CMPXCHG_32_H #define _ASM_X86_CMPXCHG_32_H -#include /* Provides LOCK_PREFIX */ - /* * Note: if you use set64_bit(), __cmpxchg64(), or their variants, you * you need to test for the feature in boot_cpu_data. */ -extern void __xchg_wrong_size(void); -extern void __cmpxchg_wrong_size(void); - -/* - * Note: no "lock" prefix even on SMP: xchg always implies lock anyway. - * Since this is generally used to protect other memory information, we - * use "asm volatile" and "memory" clobbers to prevent gcc from moving - * information around. - */ -#define __xchg(x, ptr, size) \ -({ \ - __typeof(*(ptr)) __x = (x); \ - switch (size) { \ - case 1: \ - { \ - volatile u8 *__ptr = (volatile u8 *)(ptr); \ - asm volatile("xchgb %0,%1" \ - : "=q" (__x), "+m" (*__ptr) \ - : "0" (__x) \ - : "memory"); \ - break; \ - } \ - case 2: \ - { \ - volatile u16 *__ptr = (volatile u16 *)(ptr); \ - asm volatile("xchgw %0,%1" \ - : "=r" (__x), "+m" (*__ptr) \ - : "0" (__x) \ - : "memory"); \ - break; \ - } \ - case 4: \ - { \ - volatile u32 *__ptr = (volatile u32 *)(ptr); \ - asm volatile("xchgl %0,%1" \ - : "=r" (__x), "+m" (*__ptr) \ - : "0" (__x) \ - : "memory"); \ - break; \ - } \ - default: \ - __xchg_wrong_size(); \ - } \ - __x; \ -}) - -#define xchg(ptr, v) \ - __xchg((v), (ptr), sizeof(*ptr)) - /* * CMPXCHG8B only writes to the target if we had the previous * value in registers, otherwise it acts as a read and gives us the @@ -85,70 +34,8 @@ static inline void set_64bit(volatile u64 *ptr, u64 value) : "memory"); } -/* - * Atomic compare and exchange. Compare OLD with MEM, if identical, - * store NEW in MEM. Return the initial value in MEM. Success is - * indicated by comparing RETURN with OLD. - */ -#define __raw_cmpxchg(ptr, old, new, size, lock) \ -({ \ - __typeof__(*(ptr)) __ret; \ - __typeof__(*(ptr)) __old = (old); \ - __typeof__(*(ptr)) __new = (new); \ - switch (size) { \ - case 1: \ - { \ - volatile u8 *__ptr = (volatile u8 *)(ptr); \ - asm volatile(lock "cmpxchgb %2,%1" \ - : "=a" (__ret), "+m" (*__ptr) \ - : "q" (__new), "0" (__old) \ - : "memory"); \ - break; \ - } \ - case 2: \ - { \ - volatile u16 *__ptr = (volatile u16 *)(ptr); \ - asm volatile(lock "cmpxchgw %2,%1" \ - : "=a" (__ret), "+m" (*__ptr) \ - : "r" (__new), "0" (__old) \ - : "memory"); \ - break; \ - } \ - case 4: \ - { \ - volatile u32 *__ptr = (volatile u32 *)(ptr); \ - asm volatile(lock "cmpxchgl %2,%1" \ - : "=a" (__ret), "+m" (*__ptr) \ - : "r" (__new), "0" (__old) \ - : "memory"); \ - break; \ - } \ - default: \ - __cmpxchg_wrong_size(); \ - } \ - __ret; \ -}) - -#define __cmpxchg(ptr, old, new, size) \ - __raw_cmpxchg((ptr), (old), (new), (size), LOCK_PREFIX) - -#define __sync_cmpxchg(ptr, old, new, size) \ - __raw_cmpxchg((ptr), (old), (new), (size), "lock; ") - -#define __cmpxchg_local(ptr, old, new, size) \ - __raw_cmpxchg((ptr), (old), (new), (size), "") - #ifdef CONFIG_X86_CMPXCHG #define __HAVE_ARCH_CMPXCHG 1 - -#define cmpxchg(ptr, old, new) \ - __cmpxchg((ptr), (old), (new), sizeof(*ptr)) - -#define sync_cmpxchg(ptr, old, new) \ - __sync_cmpxchg((ptr), (old), (new), sizeof(*ptr)) - -#define cmpxchg_local(ptr, old, new) \ - __cmpxchg_local((ptr), (old), (new), sizeof(*ptr)) #endif #ifdef CONFIG_X86_CMPXCHG64 diff --git a/arch/x86/include/asm/cmpxchg_64.h b/arch/x86/include/asm/cmpxchg_64.h index 5bfa560ef14e..285da02c38fa 100644 --- a/arch/x86/include/asm/cmpxchg_64.h +++ b/arch/x86/include/asm/cmpxchg_64.h @@ -1,66 +1,6 @@ #ifndef _ASM_X86_CMPXCHG_64_H #define _ASM_X86_CMPXCHG_64_H -#include /* Provides LOCK_PREFIX */ - -extern void __xchg_wrong_size(void); -extern void __cmpxchg_wrong_size(void); - -/* - * Note: no "lock" prefix even on SMP: xchg always implies lock anyway. - * Since this is generally used to protect other memory information, we - * use "asm volatile" and "memory" clobbers to prevent gcc from moving - * information around. - */ -#define __xchg(x, ptr, size) \ -({ \ - __typeof(*(ptr)) __x = (x); \ - switch (size) { \ - case 1: \ - { \ - volatile u8 *__ptr = (volatile u8 *)(ptr); \ - asm volatile("xchgb %0,%1" \ - : "=q" (__x), "+m" (*__ptr) \ - : "0" (__x) \ - : "memory"); \ - break; \ - } \ - case 2: \ - { \ - volatile u16 *__ptr = (volatile u16 *)(ptr); \ - asm volatile("xchgw %0,%1" \ - : "=r" (__x), "+m" (*__ptr) \ - : "0" (__x) \ - : "memory"); \ - break; \ - } \ - case 4: \ - { \ - volatile u32 *__ptr = (volatile u32 *)(ptr); \ - asm volatile("xchgl %0,%1" \ - : "=r" (__x), "+m" (*__ptr) \ - : "0" (__x) \ - : "memory"); \ - break; \ - } \ - case 8: \ - { \ - volatile u64 *__ptr = (volatile u64 *)(ptr); \ - asm volatile("xchgq %0,%1" \ - : "=r" (__x), "+m" (*__ptr) \ - : "0" (__x) \ - : "memory"); \ - break; \ - } \ - default: \ - __xchg_wrong_size(); \ - } \ - __x; \ -}) - -#define xchg(ptr, v) \ - __xchg((v), (ptr), sizeof(*ptr)) - static inline void set_64bit(volatile u64 *ptr, u64 val) { *ptr = val; @@ -68,77 +8,6 @@ static inline void set_64bit(volatile u64 *ptr, u64 val) #define __HAVE_ARCH_CMPXCHG 1 -/* - * Atomic compare and exchange. Compare OLD with MEM, if identical, - * store NEW in MEM. Return the initial value in MEM. Success is - * indicated by comparing RETURN with OLD. - */ -#define __raw_cmpxchg(ptr, old, new, size, lock) \ -({ \ - __typeof__(*(ptr)) __ret; \ - __typeof__(*(ptr)) __old = (old); \ - __typeof__(*(ptr)) __new = (new); \ - switch (size) { \ - case 1: \ - { \ - volatile u8 *__ptr = (volatile u8 *)(ptr); \ - asm volatile(lock "cmpxchgb %2,%1" \ - : "=a" (__ret), "+m" (*__ptr) \ - : "q" (__new), "0" (__old) \ - : "memory"); \ - break; \ - } \ - case 2: \ - { \ - volatile u16 *__ptr = (volatile u16 *)(ptr); \ - asm volatile(lock "cmpxchgw %2,%1" \ - : "=a" (__ret), "+m" (*__ptr) \ - : "r" (__new), "0" (__old) \ - : "memory"); \ - break; \ - } \ - case 4: \ - { \ - volatile u32 *__ptr = (volatile u32 *)(ptr); \ - asm volatile(lock "cmpxchgl %2,%1" \ - : "=a" (__ret), "+m" (*__ptr) \ - : "r" (__new), "0" (__old) \ - : "memory"); \ - break; \ - } \ - case 8: \ - { \ - volatile u64 *__ptr = (volatile u64 *)(ptr); \ - asm volatile(lock "cmpxchgq %2,%1" \ - : "=a" (__ret), "+m" (*__ptr) \ - : "r" (__new), "0" (__old) \ - : "memory"); \ - break; \ - } \ - default: \ - __cmpxchg_wrong_size(); \ - } \ - __ret; \ -}) - -#define __cmpxchg(ptr, old, new, size) \ - __raw_cmpxchg((ptr), (old), (new), (size), LOCK_PREFIX) - -#define __sync_cmpxchg(ptr, old, new, size) \ - __raw_cmpxchg((ptr), (old), (new), (size), "lock; ") - -#define __cmpxchg_local(ptr, old, new, size) \ - __raw_cmpxchg((ptr), (old), (new), (size), "") - -#define cmpxchg(ptr, old, new) \ - __cmpxchg((ptr), (old), (new), sizeof(*ptr)) - -#define sync_cmpxchg(ptr, old, new) \ - __sync_cmpxchg((ptr), (old), (new), sizeof(*ptr)) - -#define cmpxchg_local(ptr, old, new) \ - __cmpxchg_local((ptr), (old), (new), sizeof(*ptr)) - #define cmpxchg64(ptr, o, n) \ ({ \ BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ From 433b3520616be694e0aa777089346c8718c91a7b Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 21 Jun 2011 12:00:55 -0700 Subject: [PATCH 05/12] x86: Add xadd helper macro Add a common xadd implementation. Signed-off-by: Jeremy Fitzhardinge Link: http://lkml.kernel.org/r/4E5BCC40.3030501@goop.org Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/cmpxchg.h | 43 ++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/arch/x86/include/asm/cmpxchg.h b/arch/x86/include/asm/cmpxchg.h index efe3ec778a58..0d0d9cdd3309 100644 --- a/arch/x86/include/asm/cmpxchg.h +++ b/arch/x86/include/asm/cmpxchg.h @@ -6,6 +6,7 @@ /* Non-existant functions to indicate usage errors at link time. */ extern void __xchg_wrong_size(void); extern void __cmpxchg_wrong_size(void); +extern void __xadd_wrong_size(void); /* * Constants for operation sizes. On 32-bit, the 64-bit size it set to @@ -157,4 +158,46 @@ extern void __cmpxchg_wrong_size(void); __cmpxchg_local((ptr), (old), (new), sizeof(*ptr)) #endif +#define __xadd(ptr, inc, lock) \ + ({ \ + __typeof__ (*(ptr)) __ret = (inc); \ + switch (sizeof(*(ptr))) { \ + case __X86_CASE_B: \ + asm volatile (lock "xaddb %b0, %1\n" \ + : "+r" (__ret), "+m" (*(ptr)) \ + : : "memory", "cc"); \ + break; \ + case __X86_CASE_W: \ + asm volatile (lock "xaddw %w0, %1\n" \ + : "+r" (__ret), "+m" (*(ptr)) \ + : : "memory", "cc"); \ + break; \ + case __X86_CASE_L: \ + asm volatile (lock "xaddl %0, %1\n" \ + : "+r" (__ret), "+m" (*(ptr)) \ + : : "memory", "cc"); \ + break; \ + case __X86_CASE_Q: \ + asm volatile (lock "xaddq %q0, %1\n" \ + : "+r" (__ret), "+m" (*(ptr)) \ + : : "memory", "cc"); \ + break; \ + default: \ + __xadd_wrong_size(); \ + } \ + __ret; \ + }) + +/* + * xadd() adds "inc" to "*ptr" and atomically returns the previous + * value of "*ptr". + * + * xadd() is locked when multiple CPUs are online + * xadd_sync() is always locked + * xadd_local() is never locked + */ +#define xadd(ptr, inc) __xadd((ptr), (inc), LOCK_PREFIX) +#define xadd_sync(ptr, inc) __xadd((ptr), (inc), "lock; ") +#define xadd_local(ptr, inc) __xadd((ptr), (inc), "") + #endif /* ASM_X86_CMPXCHG_H */ From 8b8bc2f7311c3223213dbe346d9cc2e299fdb5eb Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 23 Aug 2011 16:59:58 -0700 Subject: [PATCH 06/12] x86: Use xadd helper more widely This covers the trivial cases from open-coded xadd to the xadd macros. Signed-off-by: Jeremy Fitzhardinge Link: http://lkml.kernel.org/r/4E5BCC40.3030501@goop.org Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/atomic.h | 8 ++------ arch/x86/include/asm/atomic64_64.h | 6 +----- arch/x86/include/asm/rwsem.h | 8 +------- arch/x86/include/asm/uv/uv_bau.h | 6 +----- 4 files changed, 5 insertions(+), 23 deletions(-) diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h index 10572e309ab2..58cb6d4085f7 100644 --- a/arch/x86/include/asm/atomic.h +++ b/arch/x86/include/asm/atomic.h @@ -172,18 +172,14 @@ static inline int atomic_add_negative(int i, atomic_t *v) */ static inline int atomic_add_return(int i, atomic_t *v) { - int __i; #ifdef CONFIG_M386 + int __i; unsigned long flags; if (unlikely(boot_cpu_data.x86 <= 3)) goto no_xadd; #endif /* Modern 486+ processor */ - __i = i; - asm volatile(LOCK_PREFIX "xaddl %0, %1" - : "+r" (i), "+m" (v->counter) - : : "memory"); - return i + __i; + return i + xadd(&v->counter, i); #ifdef CONFIG_M386 no_xadd: /* Legacy 386 processor */ diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h index 017594d403f6..0e1cbfc8ee06 100644 --- a/arch/x86/include/asm/atomic64_64.h +++ b/arch/x86/include/asm/atomic64_64.h @@ -170,11 +170,7 @@ static inline int atomic64_add_negative(long i, atomic64_t *v) */ static inline long atomic64_add_return(long i, atomic64_t *v) { - long __i = i; - asm volatile(LOCK_PREFIX "xaddq %0, %1;" - : "+r" (i), "+m" (v->counter) - : : "memory"); - return i + __i; + return i + xadd(&v->counter, i); } static inline long atomic64_sub_return(long i, atomic64_t *v) diff --git a/arch/x86/include/asm/rwsem.h b/arch/x86/include/asm/rwsem.h index df4cd32b4cc6..2dbe4a721ce5 100644 --- a/arch/x86/include/asm/rwsem.h +++ b/arch/x86/include/asm/rwsem.h @@ -204,13 +204,7 @@ static inline void rwsem_atomic_add(long delta, struct rw_semaphore *sem) */ static inline long rwsem_atomic_update(long delta, struct rw_semaphore *sem) { - long tmp = delta; - - asm volatile(LOCK_PREFIX "xadd %0,%1" - : "+r" (tmp), "+m" (sem->count) - : : "memory"); - - return tmp + delta; + return delta + xadd(&sem->count, delta); } #endif /* __KERNEL__ */ diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h index 37d369859c8e..c568ccca6e0e 100644 --- a/arch/x86/include/asm/uv/uv_bau.h +++ b/arch/x86/include/asm/uv/uv_bau.h @@ -656,11 +656,7 @@ static inline int atomic_read_short(const struct atomic_short *v) */ static inline int atom_asr(short i, struct atomic_short *v) { - short __i = i; - asm volatile(LOCK_PREFIX "xaddw %0, %1" - : "+r" (i), "+m" (v->counter) - : : "memory"); - return i + __i; + return i + xadd(&v->counter, i); } /* From 84eb950db13ca40a0572ce9957e14723500943d6 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Fri, 2 Jul 2010 23:26:36 +0100 Subject: [PATCH 07/12] x86, ticketlock: Clean up types and accessors A few cleanups to the way spinlocks are defined and accessed: - define __ticket_t which is the size of a spinlock ticket (ie, enough bits to hold all the cpus) - Define struct arch_spinlock as a union containing plain slock and the head and tail tickets - Use head and tail to implement some of the spinlock predicates. - Make all ticket variables unsigned. - Use TICKET_SHIFT to form constants Most of this will be used in later patches. Signed-off-by: Jeremy Fitzhardinge Link: http://lkml.kernel.org/r/4E5BCC40.3030501@goop.org Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/spinlock.h | 24 ++++++++++-------------- arch/x86/include/asm/spinlock_types.h | 20 ++++++++++++++++++-- 2 files changed, 28 insertions(+), 16 deletions(-) diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h index ee67edf86fdd..ea2a04f69ca9 100644 --- a/arch/x86/include/asm/spinlock.h +++ b/arch/x86/include/asm/spinlock.h @@ -55,11 +55,9 @@ * much between them in performance though, especially as locks are out of line. */ #if (NR_CPUS < 256) -#define TICKET_SHIFT 8 - static __always_inline void __ticket_spin_lock(arch_spinlock_t *lock) { - short inc = 0x0100; + unsigned short inc = 1 << TICKET_SHIFT; asm volatile ( LOCK_PREFIX "xaddw %w0, %1\n" @@ -78,7 +76,7 @@ static __always_inline void __ticket_spin_lock(arch_spinlock_t *lock) static __always_inline int __ticket_spin_trylock(arch_spinlock_t *lock) { - int tmp, new; + unsigned int tmp, new; asm volatile("movzwl %2, %0\n\t" "cmpb %h0,%b0\n\t" @@ -103,12 +101,10 @@ static __always_inline void __ticket_spin_unlock(arch_spinlock_t *lock) : "memory", "cc"); } #else -#define TICKET_SHIFT 16 - static __always_inline void __ticket_spin_lock(arch_spinlock_t *lock) { - int inc = 0x00010000; - int tmp; + unsigned inc = 1 << TICKET_SHIFT; + unsigned tmp; asm volatile(LOCK_PREFIX "xaddl %0, %1\n" "movzwl %w0, %2\n\t" @@ -128,8 +124,8 @@ static __always_inline void __ticket_spin_lock(arch_spinlock_t *lock) static __always_inline int __ticket_spin_trylock(arch_spinlock_t *lock) { - int tmp; - int new; + unsigned tmp; + unsigned new; asm volatile("movl %2,%0\n\t" "movl %0,%1\n\t" @@ -159,16 +155,16 @@ static __always_inline void __ticket_spin_unlock(arch_spinlock_t *lock) static inline int __ticket_spin_is_locked(arch_spinlock_t *lock) { - int tmp = ACCESS_ONCE(lock->slock); + struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets); - return !!(((tmp >> TICKET_SHIFT) ^ tmp) & ((1 << TICKET_SHIFT) - 1)); + return !!(tmp.tail ^ tmp.head); } static inline int __ticket_spin_is_contended(arch_spinlock_t *lock) { - int tmp = ACCESS_ONCE(lock->slock); + struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets); - return (((tmp >> TICKET_SHIFT) - tmp) & ((1 << TICKET_SHIFT) - 1)) > 1; + return ((tmp.tail - tmp.head) & TICKET_MASK) > 1; } #ifndef CONFIG_PARAVIRT_SPINLOCKS diff --git a/arch/x86/include/asm/spinlock_types.h b/arch/x86/include/asm/spinlock_types.h index 7c7a486fcb68..1c51bd231e49 100644 --- a/arch/x86/include/asm/spinlock_types.h +++ b/arch/x86/include/asm/spinlock_types.h @@ -5,11 +5,27 @@ # error "please don't include this file directly" #endif +#include + +#if (CONFIG_NR_CPUS < 256) +typedef u8 __ticket_t; +#else +typedef u16 __ticket_t; +#endif + +#define TICKET_SHIFT (sizeof(__ticket_t) * 8) +#define TICKET_MASK ((__ticket_t)((1 << TICKET_SHIFT) - 1)) + typedef struct arch_spinlock { - unsigned int slock; + union { + unsigned int slock; + struct __raw_tickets { + __ticket_t head, tail; + } tickets; + }; } arch_spinlock_t; -#define __ARCH_SPIN_LOCK_UNLOCKED { 0 } +#define __ARCH_SPIN_LOCK_UNLOCKED { { .slock = 0 } } #include From c576a3ea905c25d50339503e0e5c7fef724e0147 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Sat, 3 Jul 2010 01:06:04 +0100 Subject: [PATCH 08/12] x86, ticketlock: Convert spin loop to C The inner loop of __ticket_spin_lock isn't doing anything very special, so reimplement it in C. For the 8 bit ticket lock variant, we use a register union to get direct access to the lower and upper bytes in the tickets, but unfortunately gcc won't generate a direct comparison between the two halves of the register, so the generated asm isn't quite as pretty as the hand-coded version. However benchmarking shows that this is actually a small improvement in runtime performance on some benchmarks, and never a slowdown. We also need to make sure there's a barrier at the end of the lock loop to make sure that the compiler doesn't move any instructions from within the locked region into the region where we don't yet own the lock. Signed-off-by: Jeremy Fitzhardinge Link: http://lkml.kernel.org/r/4E5BCC40.3030501@goop.org Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/spinlock.h | 58 ++++++++++++++++----------------- 1 file changed, 29 insertions(+), 29 deletions(-) diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h index ea2a04f69ca9..5240cdefa683 100644 --- a/arch/x86/include/asm/spinlock.h +++ b/arch/x86/include/asm/spinlock.h @@ -57,21 +57,21 @@ #if (NR_CPUS < 256) static __always_inline void __ticket_spin_lock(arch_spinlock_t *lock) { - unsigned short inc = 1 << TICKET_SHIFT; + register union { + struct __raw_tickets tickets; + unsigned short slock; + } inc = { .slock = 1 << TICKET_SHIFT }; - asm volatile ( - LOCK_PREFIX "xaddw %w0, %1\n" - "1:\t" - "cmpb %h0, %b0\n\t" - "je 2f\n\t" - "rep ; nop\n\t" - "movb %1, %b0\n\t" - /* don't need lfence here, because loads are in-order */ - "jmp 1b\n" - "2:" - : "+Q" (inc), "+m" (lock->slock) - : - : "memory", "cc"); + asm volatile (LOCK_PREFIX "xaddw %w0, %1\n" + : "+Q" (inc), "+m" (lock->slock) : : "memory", "cc"); + + for (;;) { + if (inc.tickets.head == inc.tickets.tail) + break; + cpu_relax(); + inc.tickets.head = ACCESS_ONCE(lock->tickets.head); + } + barrier(); /* make sure nothing creeps before the lock is taken */ } static __always_inline int __ticket_spin_trylock(arch_spinlock_t *lock) @@ -104,22 +104,22 @@ static __always_inline void __ticket_spin_unlock(arch_spinlock_t *lock) static __always_inline void __ticket_spin_lock(arch_spinlock_t *lock) { unsigned inc = 1 << TICKET_SHIFT; - unsigned tmp; + __ticket_t tmp; - asm volatile(LOCK_PREFIX "xaddl %0, %1\n" - "movzwl %w0, %2\n\t" - "shrl $16, %0\n\t" - "1:\t" - "cmpl %0, %2\n\t" - "je 2f\n\t" - "rep ; nop\n\t" - "movzwl %1, %2\n\t" - /* don't need lfence here, because loads are in-order */ - "jmp 1b\n" - "2:" - : "+r" (inc), "+m" (lock->slock), "=&r" (tmp) - : - : "memory", "cc"); + asm volatile(LOCK_PREFIX "xaddl %0, %1\n\t" + : "+r" (inc), "+m" (lock->slock) + : : "memory", "cc"); + + tmp = inc; + inc >>= TICKET_SHIFT; + + for (;;) { + if ((__ticket_t)inc == tmp) + break; + cpu_relax(); + tmp = ACCESS_ONCE(lock->tickets.head); + } + barrier(); /* make sure nothing creeps before the lock is taken */ } static __always_inline int __ticket_spin_trylock(arch_spinlock_t *lock) From 2994488fe5bb721de1ded53af1a2fc41f47f6ddc Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 13 Jul 2010 14:07:45 -0700 Subject: [PATCH 09/12] x86, ticketlock: Convert __ticket_spin_lock to use xadd() Convert the two variants of __ticket_spin_lock() to use xadd(), which has the effect of making them identical, so remove the duplicate function. Signed-off-by: Jeremy Fitzhardinge Link: http://lkml.kernel.org/r/4E5BCC40.3030501@goop.org Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/spinlock.h | 35 +++++---------------------------- 1 file changed, 5 insertions(+), 30 deletions(-) diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h index 5240cdefa683..b69e0b473de6 100644 --- a/arch/x86/include/asm/spinlock.h +++ b/arch/x86/include/asm/spinlock.h @@ -54,26 +54,22 @@ * save some instructions and make the code more elegant. There really isn't * much between them in performance though, especially as locks are out of line. */ -#if (NR_CPUS < 256) static __always_inline void __ticket_spin_lock(arch_spinlock_t *lock) { - register union { - struct __raw_tickets tickets; - unsigned short slock; - } inc = { .slock = 1 << TICKET_SHIFT }; + register struct __raw_tickets inc = { .tail = 1 }; - asm volatile (LOCK_PREFIX "xaddw %w0, %1\n" - : "+Q" (inc), "+m" (lock->slock) : : "memory", "cc"); + inc = xadd(&lock->tickets, inc); for (;;) { - if (inc.tickets.head == inc.tickets.tail) + if (inc.head == inc.tail) break; cpu_relax(); - inc.tickets.head = ACCESS_ONCE(lock->tickets.head); + inc.head = ACCESS_ONCE(lock->tickets.head); } barrier(); /* make sure nothing creeps before the lock is taken */ } +#if (NR_CPUS < 256) static __always_inline int __ticket_spin_trylock(arch_spinlock_t *lock) { unsigned int tmp, new; @@ -101,27 +97,6 @@ static __always_inline void __ticket_spin_unlock(arch_spinlock_t *lock) : "memory", "cc"); } #else -static __always_inline void __ticket_spin_lock(arch_spinlock_t *lock) -{ - unsigned inc = 1 << TICKET_SHIFT; - __ticket_t tmp; - - asm volatile(LOCK_PREFIX "xaddl %0, %1\n\t" - : "+r" (inc), "+m" (lock->slock) - : : "memory", "cc"); - - tmp = inc; - inc >>= TICKET_SHIFT; - - for (;;) { - if ((__ticket_t)inc == tmp) - break; - cpu_relax(); - tmp = ACCESS_ONCE(lock->tickets.head); - } - barrier(); /* make sure nothing creeps before the lock is taken */ -} - static __always_inline int __ticket_spin_trylock(arch_spinlock_t *lock) { unsigned tmp; From 229855d6f3b40d01a903120c433d75e483a0b06d Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 13 Jul 2010 15:14:26 -0700 Subject: [PATCH 10/12] x86, ticketlock: Make __ticket_spin_trylock common Make trylock code common regardless of ticket size. (Also, rename arch_spinlock.slock to head_tail.) Signed-off-by: Jeremy Fitzhardinge Link: http://lkml.kernel.org/r/4E5BCC40.3030501@goop.org Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/spinlock.h | 49 ++++++--------------------- arch/x86/include/asm/spinlock_types.h | 6 ++-- 2 files changed, 15 insertions(+), 40 deletions(-) diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h index b69e0b473de6..f5695eeb83ff 100644 --- a/arch/x86/include/asm/spinlock.h +++ b/arch/x86/include/asm/spinlock.h @@ -69,60 +69,33 @@ static __always_inline void __ticket_spin_lock(arch_spinlock_t *lock) barrier(); /* make sure nothing creeps before the lock is taken */ } -#if (NR_CPUS < 256) static __always_inline int __ticket_spin_trylock(arch_spinlock_t *lock) { - unsigned int tmp, new; + arch_spinlock_t old, new; - asm volatile("movzwl %2, %0\n\t" - "cmpb %h0,%b0\n\t" - "leal 0x100(%" REG_PTR_MODE "0), %1\n\t" - "jne 1f\n\t" - LOCK_PREFIX "cmpxchgw %w1,%2\n\t" - "1:" - "sete %b1\n\t" - "movzbl %b1,%0\n\t" - : "=&a" (tmp), "=&q" (new), "+m" (lock->slock) - : - : "memory", "cc"); + old.tickets = ACCESS_ONCE(lock->tickets); + if (old.tickets.head != old.tickets.tail) + return 0; - return tmp; + new.head_tail = old.head_tail + (1 << TICKET_SHIFT); + + /* cmpxchg is a full barrier, so nothing can move before it */ + return cmpxchg(&lock->head_tail, old.head_tail, new.head_tail) == old.head_tail; } +#if (NR_CPUS < 256) static __always_inline void __ticket_spin_unlock(arch_spinlock_t *lock) { asm volatile(UNLOCK_LOCK_PREFIX "incb %0" - : "+m" (lock->slock) + : "+m" (lock->head_tail) : : "memory", "cc"); } #else -static __always_inline int __ticket_spin_trylock(arch_spinlock_t *lock) -{ - unsigned tmp; - unsigned new; - - asm volatile("movl %2,%0\n\t" - "movl %0,%1\n\t" - "roll $16, %0\n\t" - "cmpl %0,%1\n\t" - "leal 0x00010000(%" REG_PTR_MODE "0), %1\n\t" - "jne 1f\n\t" - LOCK_PREFIX "cmpxchgl %1,%2\n\t" - "1:" - "sete %b1\n\t" - "movzbl %b1,%0\n\t" - : "=&a" (tmp), "=&q" (new), "+m" (lock->slock) - : - : "memory", "cc"); - - return tmp; -} - static __always_inline void __ticket_spin_unlock(arch_spinlock_t *lock) { asm volatile(UNLOCK_LOCK_PREFIX "incw %0" - : "+m" (lock->slock) + : "+m" (lock->head_tail) : : "memory", "cc"); } diff --git a/arch/x86/include/asm/spinlock_types.h b/arch/x86/include/asm/spinlock_types.h index 1c51bd231e49..8ebd5df7451e 100644 --- a/arch/x86/include/asm/spinlock_types.h +++ b/arch/x86/include/asm/spinlock_types.h @@ -9,8 +9,10 @@ #if (CONFIG_NR_CPUS < 256) typedef u8 __ticket_t; +typedef u16 __ticketpair_t; #else typedef u16 __ticket_t; +typedef u32 __ticketpair_t; #endif #define TICKET_SHIFT (sizeof(__ticket_t) * 8) @@ -18,14 +20,14 @@ typedef u16 __ticket_t; typedef struct arch_spinlock { union { - unsigned int slock; + __ticketpair_t head_tail; struct __raw_tickets { __ticket_t head, tail; } tickets; }; } arch_spinlock_t; -#define __ARCH_SPIN_LOCK_UNLOCKED { { .slock = 0 } } +#define __ARCH_SPIN_LOCK_UNLOCKED { { 0 } } #include From 61e2cd0acc248c14793cefd7e23e209be9e0b70d Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 29 Aug 2011 14:47:58 -0700 Subject: [PATCH 11/12] x86, cmpxchg: Use __compiletime_error() to make usage messages a bit nicer Use __compiletime_error() to produce a compile-time error rather than link-time, where available. Signed-off-by: Jeremy Fitzhardinge Link: http://lkml.kernel.org/r/4E5BCC40.3030501@goop.org Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/cmpxchg.h | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/cmpxchg.h b/arch/x86/include/asm/cmpxchg.h index 0d0d9cdd3309..5d3acdf5a7a6 100644 --- a/arch/x86/include/asm/cmpxchg.h +++ b/arch/x86/include/asm/cmpxchg.h @@ -1,12 +1,19 @@ #ifndef ASM_X86_CMPXCHG_H #define ASM_X86_CMPXCHG_H +#include #include /* Provides LOCK_PREFIX */ -/* Non-existant functions to indicate usage errors at link time. */ -extern void __xchg_wrong_size(void); -extern void __cmpxchg_wrong_size(void); -extern void __xadd_wrong_size(void); +/* + * Non-existant functions to indicate usage errors at link time + * (or compile-time if the compiler implements __compiletime_error(). + */ +extern void __xchg_wrong_size(void) + __compiletime_error("Bad argument size for xchg"); +extern void __cmpxchg_wrong_size(void) + __compiletime_error("Bad argument size for cmpxchg"); +extern void __xadd_wrong_size(void) + __compiletime_error("Bad argument size for xadd"); /* * Constants for operation sizes. On 32-bit, the 64-bit size it set to From 4a7f340c6a75ec5fca23d9c80a59f3f28cc4a61e Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 8 Sep 2011 17:50:12 -0700 Subject: [PATCH 12/12] x86, ticketlock: remove obsolete comment The note about partial registers is not really relevent now that we rely on gcc to generate all the assembler. Signed-off-by: Jeremy Fitzhardinge --- arch/x86/include/asm/spinlock.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h index f5695eeb83ff..972c260919a3 100644 --- a/arch/x86/include/asm/spinlock.h +++ b/arch/x86/include/asm/spinlock.h @@ -49,10 +49,6 @@ * issues and should be optimal for the uncontended case. Note the tail must be * in the high part, because a wide xadd increment of the low part would carry * up and contaminate the high part. - * - * With fewer than 2^8 possible CPUs, we can use x86's partial registers to - * save some instructions and make the code more elegant. There really isn't - * much between them in performance though, especially as locks are out of line. */ static __always_inline void __ticket_spin_lock(arch_spinlock_t *lock) {