From 4650d02ad2d9b2c1c7aa36055166db6aee68f72e Mon Sep 17 00:00:00 2001
From: Palmer Dabbelt <palmer@dabbelt.com>
Date: Tue, 14 Nov 2017 11:35:37 -0800
Subject: [PATCH 1/8] RISC-V: Remove unused arguments from ATOMIC_OP

Our atomics are generated from a complicated series of preprocessor
macros, each of which is slightly different from the last.  When writing
the macros I'd accidentally left some unused arguments floating around.
This patch removes the unused macro arguments.

Signed-off-by: Palmer Dabbelt <palmer@dabbelt.com>
---
 arch/riscv/include/asm/atomic.h | 94 ++++++++++++++++-----------------
 1 file changed, 47 insertions(+), 47 deletions(-)

diff --git a/arch/riscv/include/asm/atomic.h b/arch/riscv/include/asm/atomic.h
index e2e37c57cbeb..40c73dd59c15 100644
--- a/arch/riscv/include/asm/atomic.h
+++ b/arch/riscv/include/asm/atomic.h
@@ -50,30 +50,30 @@ static __always_inline void atomic64_set(atomic64_t *v, long i)
  * have the AQ or RL bits set.  These don't return anything, so there's only
  * one version to worry about.
  */
-#define ATOMIC_OP(op, asm_op, c_op, I, asm_type, c_type, prefix)				\
-static __always_inline void atomic##prefix##_##op(c_type i, atomic##prefix##_t *v)		\
-{												\
-	__asm__ __volatile__ (									\
-		"amo" #asm_op "." #asm_type " zero, %1, %0"					\
-		: "+A" (v->counter)								\
-		: "r" (I)									\
-		: "memory");									\
+#define ATOMIC_OP(op, asm_op, I, asm_type, c_type, prefix)				\
+static __always_inline void atomic##prefix##_##op(c_type i, atomic##prefix##_t *v)	\
+{											\
+	__asm__ __volatile__ (								\
+		"amo" #asm_op "." #asm_type " zero, %1, %0"				\
+		: "+A" (v->counter)							\
+		: "r" (I)								\
+		: "memory");								\
 }
 
 #ifdef CONFIG_GENERIC_ATOMIC64
-#define ATOMIC_OPS(op, asm_op, c_op, I)			\
-        ATOMIC_OP (op, asm_op, c_op, I, w,  int,   )
+#define ATOMIC_OPS(op, asm_op, I)			\
+        ATOMIC_OP (op, asm_op, I, w,  int,   )
 #else
-#define ATOMIC_OPS(op, asm_op, c_op, I)			\
-        ATOMIC_OP (op, asm_op, c_op, I, w,  int,   )	\
-        ATOMIC_OP (op, asm_op, c_op, I, d, long, 64)
+#define ATOMIC_OPS(op, asm_op, I)			\
+        ATOMIC_OP (op, asm_op, I, w,  int,   )	\
+        ATOMIC_OP (op, asm_op, I, d, long, 64)
 #endif
 
-ATOMIC_OPS(add, add, +,  i)
-ATOMIC_OPS(sub, add, +, -i)
-ATOMIC_OPS(and, and, &,  i)
-ATOMIC_OPS( or,  or, |,  i)
-ATOMIC_OPS(xor, xor, ^,  i)
+ATOMIC_OPS(add, add,  i)
+ATOMIC_OPS(sub, add, -i)
+ATOMIC_OPS(and, and,  i)
+ATOMIC_OPS( or,  or,  i)
+ATOMIC_OPS(xor, xor,  i)
 
 #undef ATOMIC_OP
 #undef ATOMIC_OPS
@@ -83,7 +83,7 @@ ATOMIC_OPS(xor, xor, ^,  i)
  * There's two flavors of these: the arithmatic ops have both fetch and return
  * versions, while the logical ops only have fetch versions.
  */
-#define ATOMIC_FETCH_OP(op, asm_op, c_op, I, asm_or, c_or, asm_type, c_type, prefix)			\
+#define ATOMIC_FETCH_OP(op, asm_op, I, asm_or, c_or, asm_type, c_type, prefix)				\
 static __always_inline c_type atomic##prefix##_fetch_##op##c_or(c_type i, atomic##prefix##_t *v)	\
 {													\
 	register c_type ret;										\
@@ -103,13 +103,13 @@ static __always_inline c_type atomic##prefix##_##op##_return##c_or(c_type i, ato
 
 #ifdef CONFIG_GENERIC_ATOMIC64
 #define ATOMIC_OPS(op, asm_op, c_op, I, asm_or, c_or)				\
-        ATOMIC_FETCH_OP (op, asm_op, c_op, I, asm_or, c_or, w,  int,   )	\
+        ATOMIC_FETCH_OP (op, asm_op,       I, asm_or, c_or, w,  int,   )	\
         ATOMIC_OP_RETURN(op, asm_op, c_op, I, asm_or, c_or, w,  int,   )
 #else
 #define ATOMIC_OPS(op, asm_op, c_op, I, asm_or, c_or)				\
-        ATOMIC_FETCH_OP (op, asm_op, c_op, I, asm_or, c_or, w,  int,   )	\
+        ATOMIC_FETCH_OP (op, asm_op,       I, asm_or, c_or, w,  int,   )	\
         ATOMIC_OP_RETURN(op, asm_op, c_op, I, asm_or, c_or, w,  int,   )	\
-        ATOMIC_FETCH_OP (op, asm_op, c_op, I, asm_or, c_or, d, long, 64)	\
+        ATOMIC_FETCH_OP (op, asm_op,       I, asm_or, c_or, d, long, 64)	\
         ATOMIC_OP_RETURN(op, asm_op, c_op, I, asm_or, c_or, d, long, 64)
 #endif
 
@@ -126,28 +126,28 @@ ATOMIC_OPS(sub, add, +, -i, .aqrl,         )
 #undef ATOMIC_OPS
 
 #ifdef CONFIG_GENERIC_ATOMIC64
-#define ATOMIC_OPS(op, asm_op, c_op, I, asm_or, c_or)				\
-        ATOMIC_FETCH_OP(op, asm_op, c_op, I, asm_or, c_or, w,  int,   )
+#define ATOMIC_OPS(op, asm_op, I, asm_or, c_or)				\
+        ATOMIC_FETCH_OP(op, asm_op, I, asm_or, c_or, w,  int,   )
 #else
-#define ATOMIC_OPS(op, asm_op, c_op, I, asm_or, c_or)				\
-        ATOMIC_FETCH_OP(op, asm_op, c_op, I, asm_or, c_or, w,  int,   )		\
-        ATOMIC_FETCH_OP(op, asm_op, c_op, I, asm_or, c_or, d, long, 64)
+#define ATOMIC_OPS(op, asm_op, I, asm_or, c_or)				\
+        ATOMIC_FETCH_OP(op, asm_op, I, asm_or, c_or, w,  int,   )	\
+        ATOMIC_FETCH_OP(op, asm_op, I, asm_or, c_or, d, long, 64)
 #endif
 
-ATOMIC_OPS(and, and, &,  i,      , _relaxed)
-ATOMIC_OPS(and, and, &,  i, .aq  , _acquire)
-ATOMIC_OPS(and, and, &,  i, .rl  , _release)
-ATOMIC_OPS(and, and, &,  i, .aqrl,         )
+ATOMIC_OPS(and, and, i,      , _relaxed)
+ATOMIC_OPS(and, and, i, .aq  , _acquire)
+ATOMIC_OPS(and, and, i, .rl  , _release)
+ATOMIC_OPS(and, and, i, .aqrl,         )
 
-ATOMIC_OPS( or,  or, |,  i,      , _relaxed)
-ATOMIC_OPS( or,  or, |,  i, .aq  , _acquire)
-ATOMIC_OPS( or,  or, |,  i, .rl  , _release)
-ATOMIC_OPS( or,  or, |,  i, .aqrl,         )
+ATOMIC_OPS( or,  or, i,      , _relaxed)
+ATOMIC_OPS( or,  or, i, .aq  , _acquire)
+ATOMIC_OPS( or,  or, i, .rl  , _release)
+ATOMIC_OPS( or,  or, i, .aqrl,         )
 
-ATOMIC_OPS(xor, xor, ^,  i,      , _relaxed)
-ATOMIC_OPS(xor, xor, ^,  i, .aq  , _acquire)
-ATOMIC_OPS(xor, xor, ^,  i, .rl  , _release)
-ATOMIC_OPS(xor, xor, ^,  i, .aqrl,         )
+ATOMIC_OPS(xor, xor, i,      , _relaxed)
+ATOMIC_OPS(xor, xor, i, .aq  , _acquire)
+ATOMIC_OPS(xor, xor, i, .rl  , _release)
+ATOMIC_OPS(xor, xor, i, .aqrl,         )
 
 #undef ATOMIC_OPS
 
@@ -182,13 +182,13 @@ ATOMIC_OPS(add_negative, add,  <, 0)
 #undef ATOMIC_OP
 #undef ATOMIC_OPS
 
-#define ATOMIC_OP(op, func_op, c_op, I, c_type, prefix)				\
+#define ATOMIC_OP(op, func_op, I, c_type, prefix)				\
 static __always_inline void atomic##prefix##_##op(atomic##prefix##_t *v)	\
 {										\
 	atomic##prefix##_##func_op(I, v);					\
 }
 
-#define ATOMIC_FETCH_OP(op, func_op, c_op, I, c_type, prefix)				\
+#define ATOMIC_FETCH_OP(op, func_op, I, c_type, prefix)					\
 static __always_inline c_type atomic##prefix##_fetch_##op(atomic##prefix##_t *v)	\
 {											\
 	return atomic##prefix##_fetch_##func_op(I, v);					\
@@ -202,16 +202,16 @@ static __always_inline c_type atomic##prefix##_##op##_return(atomic##prefix##_t
 
 #ifdef CONFIG_GENERIC_ATOMIC64
 #define ATOMIC_OPS(op, asm_op, c_op, I)						\
-        ATOMIC_OP       (op, asm_op, c_op, I,  int,   )				\
-        ATOMIC_FETCH_OP (op, asm_op, c_op, I,  int,   )				\
+        ATOMIC_OP       (op, asm_op,       I,  int,   )				\
+        ATOMIC_FETCH_OP (op, asm_op,       I,  int,   )				\
         ATOMIC_OP_RETURN(op, asm_op, c_op, I,  int,   )
 #else
 #define ATOMIC_OPS(op, asm_op, c_op, I)						\
-        ATOMIC_OP       (op, asm_op, c_op, I,  int,   )				\
-        ATOMIC_FETCH_OP (op, asm_op, c_op, I,  int,   )				\
+        ATOMIC_OP       (op, asm_op,       I,  int,   )				\
+        ATOMIC_FETCH_OP (op, asm_op,       I,  int,   )				\
         ATOMIC_OP_RETURN(op, asm_op, c_op, I,  int,   )				\
-        ATOMIC_OP       (op, asm_op, c_op, I, long, 64)				\
-        ATOMIC_FETCH_OP (op, asm_op, c_op, I, long, 64)				\
+        ATOMIC_OP       (op, asm_op,       I, long, 64)				\
+        ATOMIC_FETCH_OP (op, asm_op,       I, long, 64)				\
         ATOMIC_OP_RETURN(op, asm_op, c_op, I, long, 64)
 #endif
 

From 8286d51a6c244738aeb071fcd7d2e36a3374e150 Mon Sep 17 00:00:00 2001
From: Palmer Dabbelt <palmer@sifive.com>
Date: Tue, 28 Nov 2017 14:02:50 -0800
Subject: [PATCH 2/8] RISC-V: Comment on why {,cmp}xchg is ordered how it is

This is another memory model FIXME.

Signed-off-by: Palmer Dabbelt <palmer@sifive.com>
---
 arch/riscv/include/asm/atomic.h | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/arch/riscv/include/asm/atomic.h b/arch/riscv/include/asm/atomic.h
index 40c73dd59c15..e65d1cd89e28 100644
--- a/arch/riscv/include/asm/atomic.h
+++ b/arch/riscv/include/asm/atomic.h
@@ -300,8 +300,13 @@ static __always_inline long atomic64_inc_not_zero(atomic64_t *v)
 
 /*
  * atomic_{cmp,}xchg is required to have exactly the same ordering semantics as
- * {cmp,}xchg and the operations that return, so they need a barrier.  We just
- * use the other implementations directly.
+ * {cmp,}xchg and the operations that return, so they need a barrier.
+ */
+/*
+ * FIXME: atomic_cmpxchg_{acquire,release,relaxed} are all implemented by
+ * assigning the same barrier to both the LR and SC operations, but that might
+ * not make any sense.  We're waiting on a memory model specification to
+ * determine exactly what the right thing to do is here.
  */
 #define ATOMIC_OP(c_t, prefix, c_or, size, asm_or)						\
 static __always_inline c_t atomic##prefix##_cmpxchg##c_or(atomic##prefix##_t *v, c_t o, c_t n) 	\

From 61a60d35b7d1b0b3a31bc21d15805a3654f60920 Mon Sep 17 00:00:00 2001
From: Palmer Dabbelt <palmer@sifive.com>
Date: Tue, 28 Nov 2017 14:03:48 -0800
Subject: [PATCH 3/8] RISC-V: Remove __smp_bp__{before,after}_atomic

These duplicate the asm-generic definitions are therefor aren't useful.

Signed-off-by: Palmer Dabbelt <palmer@sifive.com>
---
 arch/riscv/include/asm/barrier.h | 15 ---------------
 1 file changed, 15 deletions(-)

diff --git a/arch/riscv/include/asm/barrier.h b/arch/riscv/include/asm/barrier.h
index 183534b7c39b..455ee16127fb 100644
--- a/arch/riscv/include/asm/barrier.h
+++ b/arch/riscv/include/asm/barrier.h
@@ -38,21 +38,6 @@
 #define smp_rmb()	RISCV_FENCE(r,r)
 #define smp_wmb()	RISCV_FENCE(w,w)
 
-/*
- * These fences exist to enforce ordering around the relaxed AMOs.  The
- * documentation defines that
- * "
- *     atomic_fetch_add();
- *   is equivalent to:
- *     smp_mb__before_atomic();
- *     atomic_fetch_add_relaxed();
- *     smp_mb__after_atomic();
- * "
- * So we emit full fences on both sides.
- */
-#define __smb_mb__before_atomic()	smp_mb()
-#define __smb_mb__after_atomic()	smp_mb()
-
 /*
  * These barriers prevent accesses performed outside a spinlock from being moved
  * inside a spinlock.  Since RISC-V sets the aq/rl bits on our spinlock only

From 3343eb6806f365b9e3d451040671fa9336e57513 Mon Sep 17 00:00:00 2001
From: Palmer Dabbelt <palmer@sifive.com>
Date: Tue, 28 Nov 2017 14:03:55 -0800
Subject: [PATCH 4/8] RISC-V: Remove smb_mb__{before,after}_spinlock()

These are obselete.

Signed-off-by: Palmer Dabbelt <palmer@sifive.com>
---
 arch/riscv/include/asm/barrier.h | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/arch/riscv/include/asm/barrier.h b/arch/riscv/include/asm/barrier.h
index 455ee16127fb..773c4e039cd7 100644
--- a/arch/riscv/include/asm/barrier.h
+++ b/arch/riscv/include/asm/barrier.h
@@ -38,14 +38,6 @@
 #define smp_rmb()	RISCV_FENCE(r,r)
 #define smp_wmb()	RISCV_FENCE(w,w)
 
-/*
- * These barriers prevent accesses performed outside a spinlock from being moved
- * inside a spinlock.  Since RISC-V sets the aq/rl bits on our spinlock only
- * enforce release consistency, we need full fences here.
- */
-#define smb_mb__before_spinlock()	smp_mb()
-#define smb_mb__after_spinlock()	smp_mb()
-
 #include <asm-generic/barrier.h>
 
 #endif /* __ASSEMBLY__ */

From 9347ce54cd699db92d37e66191aa4b9a0a92304e Mon Sep 17 00:00:00 2001
From: Palmer Dabbelt <palmer@sifive.com>
Date: Tue, 28 Nov 2017 14:04:05 -0800
Subject: [PATCH 5/8] RISC-V: __test_and_op_bit_ord should be strongly ordered

I mis-read the documentation.  After looking at it again the
documentation is actually as clear as it can be, it's just that I didn't
actually read it in order and therefor did the wrong thing.

Signed-off-by: Palmer Dabbelt <palmer@sifive.com>
---
 arch/riscv/include/asm/bitops.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/riscv/include/asm/bitops.h b/arch/riscv/include/asm/bitops.h
index 7c281ef1d583..f30daf26f08f 100644
--- a/arch/riscv/include/asm/bitops.h
+++ b/arch/riscv/include/asm/bitops.h
@@ -67,7 +67,7 @@
 		: "memory");
 
 #define __test_and_op_bit(op, mod, nr, addr) 			\
-	__test_and_op_bit_ord(op, mod, nr, addr, )
+	__test_and_op_bit_ord(op, mod, nr, addr, .aqrl)
 #define __op_bit(op, mod, nr, addr)				\
 	__op_bit_ord(op, mod, nr, addr, )
 

From 21db403660d1433b8a02b26d5d4084921b857c40 Mon Sep 17 00:00:00 2001
From: Palmer Dabbelt <palmer@sifive.com>
Date: Tue, 28 Nov 2017 14:05:04 -0800
Subject: [PATCH 6/8] RISC-V: Add READ_ONCE in arch_spin_is_locked()

This was just incorrect in the original version.

Signed-off-by: Palmer Dabbelt <palmer@sifive.com>
---
 arch/riscv/include/asm/spinlock.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/riscv/include/asm/spinlock.h b/arch/riscv/include/asm/spinlock.h
index 04c71d938afd..a6a005c4f2fb 100644
--- a/arch/riscv/include/asm/spinlock.h
+++ b/arch/riscv/include/asm/spinlock.h
@@ -24,7 +24,7 @@
 
 /* FIXME: Replace this with a ticket lock, like MIPS. */
 
-#define arch_spin_is_locked(x)	((x)->lock != 0)
+#define arch_spin_is_locked(x)	(READ_ONCE((x)->lock) != 0)
 
 static inline void arch_spin_unlock(arch_spinlock_t *lock)
 {

From c901e45a999a1935d7adf653e1cf12dfbcd737aa Mon Sep 17 00:00:00 2001
From: Palmer Dabbelt <palmer@sifive.com>
Date: Tue, 28 Nov 2017 14:06:17 -0800
Subject: [PATCH 7/8] RISC-V: `sfence.vma` orderes the instruction cache

This is just a comment change, but it's one that bit me on the mailing
list.  It turns out that issuing a `sfence.vma` enforces instruction
cache ordering in addition to TLB ordering.  This isn't explicitly
called out in the ISA manual, but Andrew will be making that more clear
in a future revision.

CC: Andrew Waterman <andrew@sifive.com>
Signed-off-by: Palmer Dabbelt <palmer@sifive.com>
---
 arch/riscv/include/asm/tlbflush.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h
index 5ee4ae370b5e..c79fab3d377d 100644
--- a/arch/riscv/include/asm/tlbflush.h
+++ b/arch/riscv/include/asm/tlbflush.h
@@ -17,7 +17,10 @@
 
 #ifdef CONFIG_MMU
 
-/* Flush entire local TLB */
+/*
+ * Flush entire local TLB.  'sfence.vma' implicitly fences with the instruction
+ * cache as well, so a 'fence.i' is not necessary.
+ */
 static inline void local_flush_tlb_all(void)
 {
 	__asm__ __volatile__ ("sfence.vma" : : : "memory");

From bf730552734372e45b10fe056726de1950fdfdde Mon Sep 17 00:00:00 2001
From: Palmer Dabbelt <palmer@sifive.com>
Date: Tue, 28 Nov 2017 14:06:31 -0800
Subject: [PATCH 8/8] RISC-V: remove spin_unlock_wait()

This was removed from the other architectures in commit
952111d7db02 ("arch: Remove spin_unlock_wait() arch-specific
definitions").  That landed between when we got upstream and when our
patches were reviewed, so this is a followup patch.

Signed-off-by: Palmer Dabbelt <palmer@sifive.com>
---
 arch/riscv/include/asm/spinlock.h | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/arch/riscv/include/asm/spinlock.h b/arch/riscv/include/asm/spinlock.h
index a6a005c4f2fb..2fd27e8ef1fd 100644
--- a/arch/riscv/include/asm/spinlock.h
+++ b/arch/riscv/include/asm/spinlock.h
@@ -58,15 +58,6 @@ static inline void arch_spin_lock(arch_spinlock_t *lock)
 	}
 }
 
-static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
-{
-	smp_rmb();
-	do {
-		cpu_relax();
-	} while (arch_spin_is_locked(lock));
-	smp_acquire__after_ctrl_dep();
-}
-
 /***********************************************************/
 
 static inline void arch_read_lock(arch_rwlock_t *lock)