MIPS: Enforce strong ordering for MMIO accessors

Architecturally the MIPS ISA does not specify ordering requirements for
uncached bus accesses such as MMIO operations normally use and therefore
explicit barriers have to be inserted between MMIO accesses where
unspecified ordering of operations would cause unpredictable results.

For example the R2020 write buffer implements write gathering and
combining[1] and as used with the DECstation models 2100 and 3100 for
MMIO accesses it bypasses the read buffer entirely, because conflicts
are resolved by the memory controller for DRAM accesses only[2] (NB the
R2020 and R3020 buffers are the same except for the maximum clock rate).

Consequently if a device has say a 16-bit control register at offset 0,
a 16-bit event mask register at offset 2 and a 16-bit reset register at
offset 4, and the initial value of the control register is 0x1111, then
in the absence of barriers a hypothetical code sequence like this:

u16 init_dev(u16 __iomem *dev);
	u16 x;

	write16(dev + 2, 0xffff);
	write16(dev + 0, 0x2222);
	x = read16(dev + 0);
	write16(dev + 1, 0x3333);
	write16(dev + 0, 0x4444);

	return x;
}

will return 0x1111 and issue a single 32-bit write of 0x33334444 (in the
little-endian bus configuration) to offset 0 on the system bus.

This is because the read to set `x' from offset 0 bypasses the write of
0x2222 that is still in the write buffer pending the completion of the
write of 0xffff to the reset register.  Then the write of 0x3333 to the
event mask register is merged with the preceding write to the control
register as they share the same word address, making it a 32-bit write
of 0x33332222 to offset 0.  Finally the write of 0x4444 to the control
register is combined with the outstanding 32-bit write of 0x33332222 to
offset 0, because, again, it shares the same address.

This is an example from a legacy system, given here because it is well
documented and affects a machine we actually support.  But likewise
modern MIPS systems may implement weak MMIO ordering, possibly even
without having it clearly documented except for being compliant with the
architecture specification with respect to the currently defined SYNC
instruction variants[3].

Considering the above and that we are required to implement MMIO
accessors such that individual accesses made with them are strongly
ordered with respect to each other[4], add the necessary barriers to our
`inX', `outX', `readX' and `writeX' handlers, as well the associated
special use variants.  It's up to platforms then to possibly define the
respective barriers so as to expand to nil if no ordering enforcement is
actually needed for a given system; SYNC is supposed to be as cheap as
a NOP on strongly ordered MIPS implementations though.

Retain the option to generate weakly-ordered accessors, so that the
arrangement for `war_io_reorder_wmb' is not lost in case we need it for
fully raw accessors in the future.  The reason for this is that it is
unclear from commit 1e820da3c9 ("MIPS: Loongson-3: Introduce
CONFIG_LOONGSON3_ENHANCEMENT") and especially commit 8faca49a67
("MIPS: Modify core io.h macros to account for the Octeon Errata
Core-301.") why they are needed there under the previous assumption that
these accessors can be weakly ordered.

References:

[1] "LR3020 Write Buffer", LSI Logic Corporation, September 1988,
    Section "Byte Gathering", pp. 6-7

[2] "DECstation 3100 Desktop Workstation Functional Specification",
    Digital Equipment Corporation, Revision 1.3, August 28, 1990,
    Section 6.1 "Processor", p. 4

[3] "MIPS Architecture For Programmers, Volume II-A: The MIPS32
    Instruction Set Manual", Imagination Technologies LTD, Document
    Number: MD00086, Revision 6.06, December 15, 2016, Table 5.5
    "Encodings of the Bits[10:6] of the SYNC instruction; the SType
    Field", p. 409

[4] "LINUX KERNEL MEMORY BARRIERS", Documentation/memory-barriers.txt,
    Section "KERNEL I/O BARRIER EFFECTS"

Signed-off-by: Maciej W. Rozycki <macro@linux-mips.org>
References: 8faca49a67 ("MIPS: Modify core io.h macros to account for the Octeon Errata Core-301.")
References: 1e820da3c9 ("MIPS: Loongson-3: Introduce CONFIG_LOONGSON3_ENHANCEMENT")
Signed-off-by: Paul Burton <paul.burton@mips.com>
Patchwork: https://patchwork.linux-mips.org/patch/20864/
Cc: Ralf Baechle <ralf@linux-mips.org>
This commit is contained in:
Maciej W. Rozycki 2018-10-08 01:37:16 +01:00 committed by Paul Burton
parent a711d43cbb
commit 3d474dacae
No known key found for this signature in database
GPG Key ID: 3EA79FACB57500DD
1 changed files with 20 additions and 8 deletions

View File

@ -311,7 +311,7 @@ static inline void iounmap(const volatile void __iomem *addr)
#define war_io_reorder_wmb() barrier()
#endif
#define __BUILD_MEMORY_SINGLE(pfx, bwlq, type, irq) \
#define __BUILD_MEMORY_SINGLE(pfx, bwlq, type, barrier, irq) \
\
static inline void pfx##write##bwlq(type val, \
volatile void __iomem *mem) \
@ -319,7 +319,10 @@ static inline void pfx##write##bwlq(type val, \
volatile type *__mem; \
type __val; \
\
war_io_reorder_wmb(); \
if (barrier) \
iobarrier_rw(); \
else \
war_io_reorder_wmb(); \
\
__mem = (void *)__swizzle_addr_##bwlq((unsigned long)(mem)); \
\
@ -356,6 +359,9 @@ static inline type pfx##read##bwlq(const volatile void __iomem *mem) \
\
__mem = (void *)__swizzle_addr_##bwlq((unsigned long)(mem)); \
\
if (barrier) \
iobarrier_rw(); \
\
if (sizeof(type) != sizeof(u64) || sizeof(u64) == sizeof(long)) \
__val = *__mem; \
else if (cpu_has_64bits) { \
@ -383,14 +389,17 @@ static inline type pfx##read##bwlq(const volatile void __iomem *mem) \
return pfx##ioswab##bwlq(__mem, __val); \
}
#define __BUILD_IOPORT_SINGLE(pfx, bwlq, type, p) \
#define __BUILD_IOPORT_SINGLE(pfx, bwlq, type, barrier, p) \
\
static inline void pfx##out##bwlq##p(type val, unsigned long port) \
{ \
volatile type *__addr; \
type __val; \
\
war_io_reorder_wmb(); \
if (barrier) \
iobarrier_rw(); \
else \
war_io_reorder_wmb(); \
\
__addr = (void *)__swizzle_addr_##bwlq(mips_io_port_base + port); \
\
@ -411,6 +420,9 @@ static inline type pfx##in##bwlq##p(unsigned long port) \
\
BUILD_BUG_ON(sizeof(type) > sizeof(unsigned long)); \
\
if (barrier) \
iobarrier_rw(); \
\
__val = *__addr; \
\
/* prevent prefetching of coherent DMA data prematurely */ \
@ -420,7 +432,7 @@ static inline type pfx##in##bwlq##p(unsigned long port) \
#define __BUILD_MEMORY_PFX(bus, bwlq, type) \
\
__BUILD_MEMORY_SINGLE(bus, bwlq, type, 1)
__BUILD_MEMORY_SINGLE(bus, bwlq, type, 1, 1)
#define BUILDIO_MEM(bwlq, type) \
\
@ -434,8 +446,8 @@ BUILDIO_MEM(l, u32)
BUILDIO_MEM(q, u64)
#define __BUILD_IOPORT_PFX(bus, bwlq, type) \
__BUILD_IOPORT_SINGLE(bus, bwlq, type,) \
__BUILD_IOPORT_SINGLE(bus, bwlq, type, _p)
__BUILD_IOPORT_SINGLE(bus, bwlq, type, 1,) \
__BUILD_IOPORT_SINGLE(bus, bwlq, type, 1, _p)
#define BUILDIO_IOPORT(bwlq, type) \
__BUILD_IOPORT_PFX(, bwlq, type) \
@ -450,7 +462,7 @@ BUILDIO_IOPORT(q, u64)
#define __BUILDIO(bwlq, type) \
\
__BUILD_MEMORY_SINGLE(____raw_, bwlq, type, 0)
__BUILD_MEMORY_SINGLE(____raw_, bwlq, type, 1, 0)
__BUILDIO(q, u64)