riscv: implement Zicbom-based CMO instructions + the t-head variant

This series is based on the alternatives changes done in my svpbmt
series and thus also depends on Atish's isa-extension parsing series.

It implements using the cache-management instructions from the  Zicbom-
extension to handle cache flush, etc actions on platforms needing them.

SoCs using cpu cores from T-Head like the Allwinne D1 implement a
different set of cache instructions. But while they are different,
instructions they provide the same functionality, so a variant can easly
hook into the existing alternatives mechanism on those.

[Palmer:  Some minor fixups, including a RISCV_ISA_ZICBOM dependency on
MMU that's probably not strictly necessary.  The Zicbom support will
trip up sparse for users that have new toolchains, I just sent a patch.]

Link: https://lore.kernel.org/all/20220706231536.2041855-1-heiko@sntech.de/
Link: https://lore.kernel.org/linux-sparse/20220811033138.20676-1-palmer@rivosinc.com/T/#u

* palmer/riscv-zicbom:
  riscv: implement cache-management errata for T-Head SoCs
  riscv: Add support for non-coherent devices using zicbom extension
  dt-bindings: riscv: document cbom-block-size
  of: also handle dma-noncoherent in of_dma_is_coherent()
This commit is contained in:
Palmer Dabbelt 2022-08-10 18:23:51 -07:00
commit 3aefb2ee5b
No known key found for this signature in database
GPG Key ID: EF4CA1502CCBAB41
15 changed files with 297 additions and 9 deletions

View File

@ -63,6 +63,11 @@ properties:
- riscv,sv48
- riscv,none
riscv,cbom-block-size:
$ref: /schemas/types.yaml#/definitions/uint32
description:
The blocksize in bytes for the Zicbom cache operations.
riscv,isa:
description:
Identifies the specific RISC-V instruction set architecture

View File

@ -113,6 +113,7 @@ config RISCV
select MODULES_USE_ELF_RELA if MODULES
select MODULE_SECTIONS if MODULES
select OF
select OF_DMA_DEFAULT_COHERENT
select OF_EARLY_FLATTREE
select OF_IRQ
select PCI_DOMAINS_GENERIC if PCI
@ -218,6 +219,14 @@ config PGTABLE_LEVELS
config LOCKDEP_SUPPORT
def_bool y
config RISCV_DMA_NONCOHERENT
bool
select ARCH_HAS_DMA_PREP_COHERENT
select ARCH_HAS_SYNC_DMA_FOR_DEVICE
select ARCH_HAS_SYNC_DMA_FOR_CPU
select ARCH_HAS_SETUP_DMA_OPS
select DMA_DIRECT_REMAP
source "arch/riscv/Kconfig.socs"
source "arch/riscv/Kconfig.erratas"
@ -392,6 +401,28 @@ config RISCV_ISA_SVPBMT
If you don't know what to do here, say Y.
config CC_HAS_ZICBOM
bool
default y if 64BIT && $(cc-option,-mabi=lp64 -march=rv64ima_zicbom)
default y if 32BIT && $(cc-option,-mabi=ilp32 -march=rv32ima_zicbom)
config RISCV_ISA_ZICBOM
bool "Zicbom extension support for non-coherent DMA operation"
depends on CC_HAS_ZICBOM
depends on !XIP_KERNEL && MMU
select RISCV_DMA_NONCOHERENT
select RISCV_ALTERNATIVE
default y
help
Adds support to dynamically detect the presence of the ZICBOM
extension (Cache Block Management Operations) and enable its
usage.
The Zicbom extension can be used to handle for example
non-coherent DMA support on devices that need it.
If you don't know what to do here, say Y.
config FPU
bool "FPU support"
default y

View File

@ -55,4 +55,15 @@ config ERRATA_THEAD_PBMT
If you don't know what to do here, say "Y".
config ERRATA_THEAD_CMO
bool "Apply T-Head cache management errata"
depends on ERRATA_THEAD
select RISCV_DMA_NONCOHERENT
default y
help
This will apply the cache management errata to handle the
non-standard handling on non-coherent operations on T-Head SoCs.
If you don't know what to do here, say "Y".
endmenu # "CPU errata selection"

View File

@ -56,6 +56,10 @@ riscv-march-$(CONFIG_RISCV_ISA_C) := $(riscv-march-y)c
toolchain-need-zicsr-zifencei := $(call cc-option-yn, -march=$(riscv-march-y)_zicsr_zifencei)
riscv-march-$(toolchain-need-zicsr-zifencei) := $(riscv-march-y)_zicsr_zifencei
# Check if the toolchain supports Zicbom extension
toolchain-supports-zicbom := $(call cc-option-yn, -march=$(riscv-march-y)_zicbom)
riscv-march-$(toolchain-supports-zicbom) := $(riscv-march-y)_zicbom
KBUILD_CFLAGS += -march=$(subst fd,,$(riscv-march-y))
KBUILD_AFLAGS += -march=$(riscv-march-y)

View File

@ -27,6 +27,23 @@ static bool errata_probe_pbmt(unsigned int stage,
return false;
}
static bool errata_probe_cmo(unsigned int stage,
unsigned long arch_id, unsigned long impid)
{
#ifdef CONFIG_ERRATA_THEAD_CMO
if (arch_id != 0 || impid != 0)
return false;
if (stage == RISCV_ALTERNATIVES_EARLY_BOOT)
return false;
riscv_noncoherent_supported();
return true;
#else
return false;
#endif
}
static u32 thead_errata_probe(unsigned int stage,
unsigned long archid, unsigned long impid)
{
@ -35,6 +52,9 @@ static u32 thead_errata_probe(unsigned int stage,
if (errata_probe_pbmt(stage, archid, impid))
cpu_req_errata |= (1U << ERRATA_THEAD_PBMT);
if (errata_probe_cmo(stage, archid, impid))
cpu_req_errata |= (1U << ERRATA_THEAD_CMO);
return cpu_req_errata;
}

View File

@ -11,6 +11,10 @@
#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT)
#ifdef CONFIG_RISCV_DMA_NONCOHERENT
#define ARCH_DMA_MINALIGN L1_CACHE_BYTES
#endif
/*
* RISC-V requires the stack pointer to be 16-byte aligned, so ensure that
* the flat loader aligns it accordingly.

View File

@ -42,6 +42,16 @@ void flush_icache_mm(struct mm_struct *mm, bool local);
#endif /* CONFIG_SMP */
#ifdef CONFIG_RISCV_ISA_ZICBOM
void riscv_init_cbom_blocksize(void);
#else
static inline void riscv_init_cbom_blocksize(void) { }
#endif
#ifdef CONFIG_RISCV_DMA_NONCOHERENT
void riscv_noncoherent_supported(void);
#endif
/*
* Bits in sys_riscv_flush_icache()'s flags argument.
*/

View File

@ -16,11 +16,13 @@
#ifdef CONFIG_ERRATA_THEAD
#define ERRATA_THEAD_PBMT 0
#define ERRATA_THEAD_NUMBER 1
#define ERRATA_THEAD_CMO 1
#define ERRATA_THEAD_NUMBER 2
#endif
#define CPUFEATURE_SVPBMT 0
#define CPUFEATURE_NUMBER 1
#define CPUFEATURE_ZICBOM 1
#define CPUFEATURE_NUMBER 2
#ifdef __ASSEMBLY__
@ -87,6 +89,59 @@ asm volatile(ALTERNATIVE( \
#define ALT_THEAD_PMA(_val)
#endif
/*
* dcache.ipa rs1 (invalidate, physical address)
* | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 |
* 0000001 01010 rs1 000 00000 0001011
* dache.iva rs1 (invalida, virtual address)
* 0000001 00110 rs1 000 00000 0001011
*
* dcache.cpa rs1 (clean, physical address)
* | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 |
* 0000001 01001 rs1 000 00000 0001011
* dcache.cva rs1 (clean, virtual address)
* 0000001 00100 rs1 000 00000 0001011
*
* dcache.cipa rs1 (clean then invalidate, physical address)
* | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 |
* 0000001 01011 rs1 000 00000 0001011
* dcache.civa rs1 (... virtual address)
* 0000001 00111 rs1 000 00000 0001011
*
* sync.s (make sure all cache operations finished)
* | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 |
* 0000000 11001 00000 000 00000 0001011
*/
#define THEAD_inval_A0 ".long 0x0265000b"
#define THEAD_clean_A0 ".long 0x0245000b"
#define THEAD_flush_A0 ".long 0x0275000b"
#define THEAD_SYNC_S ".long 0x0190000b"
#define ALT_CMO_OP(_op, _start, _size, _cachesize) \
asm volatile(ALTERNATIVE_2( \
__nops(6), \
"mv a0, %1\n\t" \
"j 2f\n\t" \
"3:\n\t" \
"cbo." __stringify(_op) " (a0)\n\t" \
"add a0, a0, %0\n\t" \
"2:\n\t" \
"bltu a0, %2, 3b\n\t" \
"nop", 0, CPUFEATURE_ZICBOM, CONFIG_RISCV_ISA_ZICBOM, \
"mv a0, %1\n\t" \
"j 2f\n\t" \
"3:\n\t" \
THEAD_##_op##_A0 "\n\t" \
"add a0, a0, %0\n\t" \
"2:\n\t" \
"bltu a0, %2, 3b\n\t" \
THEAD_SYNC_S, THEAD_VENDOR_ID, \
ERRATA_THEAD_CMO, CONFIG_ERRATA_THEAD_CMO) \
: : "r"(_cachesize), \
"r"((unsigned long)(_start) & ~((_cachesize) - 1UL)), \
"r"((unsigned long)(_start) + (_size)) \
: "a0")
#endif /* __ASSEMBLY__ */
#endif

View File

@ -54,6 +54,7 @@ extern unsigned long elf_hwcap;
enum riscv_isa_ext_id {
RISCV_ISA_EXT_SSCOFPMF = RISCV_ISA_EXT_BASE,
RISCV_ISA_EXT_SVPBMT,
RISCV_ISA_EXT_ZICBOM,
RISCV_ISA_EXT_ID_MAX = RISCV_ISA_EXT_MAX,
};

View File

@ -93,6 +93,7 @@ int riscv_of_parent_hartid(struct device_node *node, unsigned long *hartid)
static struct riscv_isa_ext_data isa_ext_arr[] = {
__RISCV_ISA_EXT_DATA(sscofpmf, RISCV_ISA_EXT_SSCOFPMF),
__RISCV_ISA_EXT_DATA(svpbmt, RISCV_ISA_EXT_SVPBMT),
__RISCV_ISA_EXT_DATA(zicbom, RISCV_ISA_EXT_ZICBOM),
__RISCV_ISA_EXT_DATA("", RISCV_ISA_EXT_MAX),
};

View File

@ -12,6 +12,7 @@
#include <linux/module.h>
#include <linux/of.h>
#include <asm/alternative.h>
#include <asm/cacheflush.h>
#include <asm/errata_list.h>
#include <asm/hwcap.h>
#include <asm/patch.h>
@ -200,6 +201,7 @@ void __init riscv_fill_hwcap(void)
} else {
SET_ISA_EXT_MAP("sscofpmf", RISCV_ISA_EXT_SSCOFPMF);
SET_ISA_EXT_MAP("svpbmt", RISCV_ISA_EXT_SVPBMT);
SET_ISA_EXT_MAP("zicbom", RISCV_ISA_EXT_ZICBOM);
}
#undef SET_ISA_EXT_MAP
}
@ -261,6 +263,25 @@ static bool __init_or_module cpufeature_probe_svpbmt(unsigned int stage)
return false;
}
static bool __init_or_module cpufeature_probe_zicbom(unsigned int stage)
{
#ifdef CONFIG_RISCV_ISA_ZICBOM
switch (stage) {
case RISCV_ALTERNATIVES_EARLY_BOOT:
return false;
default:
if (riscv_isa_extension_available(NULL, ZICBOM)) {
riscv_noncoherent_supported();
return true;
} else {
return false;
}
}
#endif
return false;
}
/*
* Probe presence of individual extensions.
*
@ -275,6 +296,9 @@ static u32 __init_or_module cpufeature_probe(unsigned int stage)
if (cpufeature_probe_svpbmt(stage))
cpu_req_feature |= (1U << CPUFEATURE_SVPBMT);
if (cpufeature_probe_zicbom(stage))
cpu_req_feature |= (1U << CPUFEATURE_ZICBOM);
return cpu_req_feature;
}

View File

@ -22,6 +22,7 @@
#include <linux/crash_dump.h>
#include <asm/alternative.h>
#include <asm/cacheflush.h>
#include <asm/cpu_ops.h>
#include <asm/early_ioremap.h>
#include <asm/pgtable.h>
@ -296,6 +297,7 @@ void __init setup_arch(char **cmdline_p)
#endif
riscv_fill_hwcap();
riscv_init_cbom_blocksize();
apply_boot_alternatives();
}

View File

@ -30,3 +30,4 @@ endif
endif
obj-$(CONFIG_DEBUG_VIRTUAL) += physaddr.o
obj-$(CONFIG_RISCV_DMA_NONCOHERENT) += dma-noncoherent.o

View File

@ -0,0 +1,116 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* RISC-V specific functions to support DMA for non-coherent devices
*
* Copyright (c) 2021 Western Digital Corporation or its affiliates.
*/
#include <linux/dma-direct.h>
#include <linux/dma-map-ops.h>
#include <linux/mm.h>
#include <linux/of.h>
#include <linux/of_device.h>
#include <asm/cacheflush.h>
static unsigned int riscv_cbom_block_size = L1_CACHE_BYTES;
static bool noncoherent_supported;
void arch_sync_dma_for_device(phys_addr_t paddr, size_t size,
enum dma_data_direction dir)
{
void *vaddr = phys_to_virt(paddr);
switch (dir) {
case DMA_TO_DEVICE:
ALT_CMO_OP(clean, vaddr, size, riscv_cbom_block_size);
break;
case DMA_FROM_DEVICE:
ALT_CMO_OP(clean, vaddr, size, riscv_cbom_block_size);
break;
case DMA_BIDIRECTIONAL:
ALT_CMO_OP(flush, vaddr, size, riscv_cbom_block_size);
break;
default:
break;
}
}
void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size,
enum dma_data_direction dir)
{
void *vaddr = phys_to_virt(paddr);
switch (dir) {
case DMA_TO_DEVICE:
break;
case DMA_FROM_DEVICE:
case DMA_BIDIRECTIONAL:
ALT_CMO_OP(flush, vaddr, size, riscv_cbom_block_size);
break;
default:
break;
}
}
void arch_dma_prep_coherent(struct page *page, size_t size)
{
void *flush_addr = page_address(page);
ALT_CMO_OP(flush, flush_addr, size, riscv_cbom_block_size);
}
void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
const struct iommu_ops *iommu, bool coherent)
{
WARN_TAINT(!coherent && riscv_cbom_block_size > ARCH_DMA_MINALIGN,
TAINT_CPU_OUT_OF_SPEC,
"%s %s: ARCH_DMA_MINALIGN smaller than riscv,cbom-block-size (%d < %d)",
dev_driver_string(dev), dev_name(dev),
ARCH_DMA_MINALIGN, riscv_cbom_block_size);
WARN_TAINT(!coherent && !noncoherent_supported, TAINT_CPU_OUT_OF_SPEC,
"%s %s: device non-coherent but no non-coherent operations supported",
dev_driver_string(dev), dev_name(dev));
dev->dma_coherent = coherent;
}
#ifdef CONFIG_RISCV_ISA_ZICBOM
void riscv_init_cbom_blocksize(void)
{
struct device_node *node;
int ret;
u32 val;
for_each_of_cpu_node(node) {
unsigned long hartid;
int cbom_hartid;
ret = riscv_of_processor_hartid(node, &hartid);
if (ret)
continue;
if (hartid < 0)
continue;
/* set block-size for cbom extension if available */
ret = of_property_read_u32(node, "riscv,cbom-block-size", &val);
if (ret)
continue;
if (!riscv_cbom_block_size) {
riscv_cbom_block_size = val;
cbom_hartid = hartid;
} else {
if (riscv_cbom_block_size != val)
pr_warn("cbom-block-size mismatched between harts %d and %lu\n",
cbom_hartid, hartid);
}
}
}
#endif
void riscv_noncoherent_supported(void)
{
noncoherent_supported = true;
}

View File

@ -1045,26 +1045,29 @@ phys_addr_t __init of_dma_get_max_cpu_address(struct device_node *np)
*
* It returns true if "dma-coherent" property was found
* for this device in the DT, or if DMA is coherent by
* default for OF devices on the current platform.
* default for OF devices on the current platform and no
* "dma-noncoherent" property was found for this device.
*/
bool of_dma_is_coherent(struct device_node *np)
{
struct device_node *node;
if (IS_ENABLED(CONFIG_OF_DMA_DEFAULT_COHERENT))
return true;
bool is_coherent = IS_ENABLED(CONFIG_OF_DMA_DEFAULT_COHERENT);
node = of_node_get(np);
while (node) {
if (of_property_read_bool(node, "dma-coherent")) {
of_node_put(node);
return true;
is_coherent = true;
break;
}
if (of_property_read_bool(node, "dma-noncoherent")) {
is_coherent = false;
break;
}
node = of_get_next_dma_parent(node);
}
of_node_put(node);
return false;
return is_coherent;
}
EXPORT_SYMBOL_GPL(of_dma_is_coherent);