From b0063aad5dd86308c9b9c433ac8f3ab5b49aab1b Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Tue, 13 May 2014 18:04:30 +0200 Subject: [PATCH 1/7] ARM: mvebu: use hardware I/O coherency also for PCI devices Since the beginning of the introduction of hardware I/O coherency support for Armada 370 and Armada XP, the special DMA operations should have applied to all DMA capable devices. Unfortunately, while the original code properly took into account platform devices, it didn't take into account PCI devices, which can also be DMA masters. This commit fixes that by registering a bus notifier on pci_bus_type, to register our custom DMA operations, like is already done for platform devices. While doing this, we also rename mvebu_hwcc_platform_notifier() to mvebu_hwcc_notifier() and mvebu_hwcc_platform_nb to mvebu_hwcc_nb because they are no longer specific to platform devices. Signed-off-by: Thomas Petazzoni Link: https://lkml.kernel.org/r/1399997070-11434-1-git-send-email-thomas.petazzoni@free-electrons.com Signed-off-by: Jason Cooper --- arch/arm/mach-mvebu/coherency.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/arch/arm/mach-mvebu/coherency.c b/arch/arm/mach-mvebu/coherency.c index d5a975b6a590..5723178f6589 100644 --- a/arch/arm/mach-mvebu/coherency.c +++ b/arch/arm/mach-mvebu/coherency.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include "armada-370-xp.h" @@ -274,8 +275,8 @@ static struct dma_map_ops mvebu_hwcc_dma_ops = { .set_dma_mask = arm_dma_set_mask, }; -static int mvebu_hwcc_platform_notifier(struct notifier_block *nb, - unsigned long event, void *__dev) +static int mvebu_hwcc_notifier(struct notifier_block *nb, + unsigned long event, void *__dev) { struct device *dev = __dev; @@ -286,8 +287,8 @@ static int mvebu_hwcc_platform_notifier(struct notifier_block *nb, return NOTIFY_OK; } -static struct notifier_block mvebu_hwcc_platform_nb = { - .notifier_call = mvebu_hwcc_platform_notifier, +static struct notifier_block mvebu_hwcc_nb = { + .notifier_call = mvebu_hwcc_notifier, }; static void __init armada_370_coherency_init(struct device_node *np) @@ -375,9 +376,19 @@ static int __init coherency_late_init(void) } bus_register_notifier(&platform_bus_type, - &mvebu_hwcc_platform_nb); + &mvebu_hwcc_nb); return 0; } postcore_initcall(coherency_late_init); + +static int __init coherency_pci_init(void) +{ + if (coherency_available()) + bus_register_notifier(&pci_bus_type, + &mvebu_hwcc_nb); + return 0; +} + +arch_initcall(coherency_pci_init); From 497a92308af8e9385fa3d135f7f416a997e4b93b Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Thu, 15 May 2014 16:59:34 +0200 Subject: [PATCH 2/7] ARM: mvebu: implement L2/PCIe deadlock workaround The Marvell Armada 375 and Armada 38x SOCs, which use the Cortex-A9 CPU core, the PL310 cache and the Marvell PCIe hardware block are affected a L2/PCIe deadlock caused by a system erratum when hardware I/O coherency is used. This deadlock can be avoided by mapping the PCIe memory areas as strongly-ordered (note: MT_UNCACHED is strongly-ordered), and by removing the outer cache sync done in software. This is implemented in this patch by: * Registering a custom arch_ioremap_caller function that allows to make sure PCI memory regions are mapped MT_UNCACHED. * Adding at runtime the 'arm,io-coherent' property to the PL310 cache controller. This cannot be done permanently in the DT, because the hardware I/O coherency can only be enabled when CONFIG_SMP is enabled, in the current kernel situation. Signed-off-by: Thomas Petazzoni Link: https://lkml.kernel.org/r/1400165974-9059-4-git-send-email-thomas.petazzoni@free-electrons.com Signed-off-by: Jason Cooper --- arch/arm/mach-mvebu/coherency.c | 39 +++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/arch/arm/mach-mvebu/coherency.c b/arch/arm/mach-mvebu/coherency.c index 5723178f6589..a2225070ac3d 100644 --- a/arch/arm/mach-mvebu/coherency.c +++ b/arch/arm/mach-mvebu/coherency.c @@ -32,6 +32,7 @@ #include #include #include +#include #include "armada-370-xp.h" #include "coherency.h" #include "mvebu-soc-id.h" @@ -309,9 +310,47 @@ static void __init armada_370_coherency_init(struct device_node *np) set_cpu_coherent(); } +/* + * This ioremap hook is used on Armada 375/38x to ensure that PCIe + * memory areas are mapped as MT_UNCACHED instead of MT_DEVICE. This + * is needed as a workaround for a deadlock issue between the PCIe + * interface and the cache controller. + */ +static void __iomem * +armada_pcie_wa_ioremap_caller(phys_addr_t phys_addr, size_t size, + unsigned int mtype, void *caller) +{ + struct resource pcie_mem; + + mvebu_mbus_get_pcie_mem_aperture(&pcie_mem); + + if (pcie_mem.start <= phys_addr && (phys_addr + size) <= pcie_mem.end) + mtype = MT_UNCACHED; + + return __arm_ioremap_caller(phys_addr, size, mtype, caller); +} + static void __init armada_375_380_coherency_init(struct device_node *np) { + struct device_node *cache_dn; + coherency_cpu_base = of_iomap(np, 0); + arch_ioremap_caller = armada_pcie_wa_ioremap_caller; + + /* + * Add the PL310 property "arm,io-coherent". This makes sure the + * outer sync operation is not used, which allows to + * workaround the system erratum that causes deadlocks when + * doing PCIe in an SMP situation on Armada 375 and Armada + * 38x. + */ + for_each_compatible_node(cache_dn, NULL, "arm,pl310-cache") { + struct property *p; + + p = kzalloc(sizeof(*p), GFP_KERNEL); + p->name = kstrdup("arm,io-coherent", GFP_KERNEL); + of_add_property(cache_dn, p); + } } static int coherency_type(void) From 8828ccc3f2a8e32ddf1a0e1a80742e8482f18cea Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Tue, 20 May 2014 17:13:03 +0200 Subject: [PATCH 3/7] ARM: mvebu: coherency: fix registration of PCI bus notifier when !PCI Commit b0063aad5dd8 ("ARM: mvebu: use hardware I/O coherency also for PCI devices") added a reference to the pci_bus_type variable, but this variable is only available when CONFIG_PCI is enabled. Therefore, there is now a build failure in !CONFIG_PCI situations. This commit fixes that by enclosing the entire initcall into a IS_ENABLED(CONFIG_PCI) condition. Reported-by: Arnd Bergmann Signed-off-by: Thomas Petazzoni Link: https://lkml.kernel.org/r/1400598783-706-1-git-send-email-thomas.petazzoni@free-electrons.com Signed-off-by: Jason Cooper --- arch/arm/mach-mvebu/coherency.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm/mach-mvebu/coherency.c b/arch/arm/mach-mvebu/coherency.c index a2225070ac3d..477202fd39cc 100644 --- a/arch/arm/mach-mvebu/coherency.c +++ b/arch/arm/mach-mvebu/coherency.c @@ -422,6 +422,7 @@ static int __init coherency_late_init(void) postcore_initcall(coherency_late_init); +#if IS_ENABLED(CONFIG_PCI) static int __init coherency_pci_init(void) { if (coherency_available()) @@ -431,3 +432,4 @@ static int __init coherency_pci_init(void) } arch_initcall(coherency_pci_init); +#endif From 4fbe63937eb2a54040de58d0726d4796412fba3d Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Thu, 22 May 2014 14:47:59 +0200 Subject: [PATCH 4/7] ARM: mvebu: fix big endian booting after coherency code rework As part of the introduction of the cpuidle support for Armada XP, the coherency code was significantly reworked, especially in the coherency_ll.S file. However, when the ll_get_cpuid function was created, the big-endian specific code that switches the endianess of the register was not updated properly. This patch fixes this code, and therefore makes big endian systems bootable again. Signed-off-by: Thomas Petazzoni Link: https://lkml.kernel.org/r/1400762882-10116-2-git-send-email-thomas.petazzoni@free-electrons.com Fixes: 2e8a5942f875 ("ARM: mvebu: Split low level functions to manipulate HW coherency") Reported-by: Kevin Hilman Cc: Kevin Hilman Acked-by: Gregory CLEMENT Signed-off-by: Jason Cooper --- arch/arm/mach-mvebu/coherency_ll.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-mvebu/coherency_ll.S b/arch/arm/mach-mvebu/coherency_ll.S index 6828f9f157b0..a5e62c62819a 100644 --- a/arch/arm/mach-mvebu/coherency_ll.S +++ b/arch/arm/mach-mvebu/coherency_ll.S @@ -49,7 +49,7 @@ ENTRY(ll_get_cpuid) and r3, r3, #15 mov r2, #(1 << 24) lsl r3, r2, r3 -ARM_BE8(rev r1, r1) +ARM_BE8(rev r3, r3) mov pc, lr ENDPROC(ll_get_cpuid) From 90ba76f610b80d8fd33b8c36034172a98c5db05f Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Thu, 22 May 2014 14:48:00 +0200 Subject: [PATCH 5/7] ARM: mvebu: fix indentation of assembly instructions in coherency_ll.S This commit does not make any functional change, it only fixes the indentation of a few assembly instructions in arch/arm/mach-mvebu/coherency_ll.S. Signed-off-by: Thomas Petazzoni Link: https://lkml.kernel.org/r/1400762882-10116-3-git-send-email-thomas.petazzoni@free-electrons.com Acked-by: Gregory CLEMENT Signed-off-by: Jason Cooper --- arch/arm/mach-mvebu/coherency_ll.S | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm/mach-mvebu/coherency_ll.S b/arch/arm/mach-mvebu/coherency_ll.S index a5e62c62819a..7d1b5a51b656 100644 --- a/arch/arm/mach-mvebu/coherency_ll.S +++ b/arch/arm/mach-mvebu/coherency_ll.S @@ -66,10 +66,10 @@ ENTRY(ll_add_cpu_to_smp_group) * ll_get_cpuid, we can use it to save lr modifing it with the * following bl */ - mov r0, lr + mov r0, lr bl ll_get_coherency_base bl ll_get_cpuid - mov lr, r0 + mov lr, r0 add r0, r1, #ARMADA_XP_CFB_CFG_REG_OFFSET 1: ldrex r2, [r0] @@ -108,10 +108,10 @@ ENTRY(ll_disable_coherency) * ll_get_cpuid, we can use it to save lr modifing it with the * following bl */ - mov r0, lr + mov r0, lr bl ll_get_coherency_base bl ll_get_cpuid - mov lr, r0 + mov lr, r0 add r0, r1, #ARMADA_XP_CFB_CTL_REG_OFFSET 1: ldrex r2, [r0] From 4dd1b7fa431d6b59022b3493312c6c1b52dbf547 Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Thu, 22 May 2014 14:48:01 +0200 Subject: [PATCH 6/7] ARM: mvebu: improve comments in coherency_ll.S This commit makes no functional change, it only improves a bit the various code comments in mach-mvebu/coherency_ll.S, by fixing a few typos and adding a few more details. Signed-off-by: Thomas Petazzoni Link: https://lkml.kernel.org/r/1400762882-10116-4-git-send-email-thomas.petazzoni@free-electrons.com Acked-by: Gregory CLEMENT Signed-off-by: Jason Cooper --- arch/arm/mach-mvebu/coherency_ll.S | 50 +++++++++++++++++++----------- 1 file changed, 32 insertions(+), 18 deletions(-) diff --git a/arch/arm/mach-mvebu/coherency_ll.S b/arch/arm/mach-mvebu/coherency_ll.S index 7d1b5a51b656..311442ae128b 100644 --- a/arch/arm/mach-mvebu/coherency_ll.S +++ b/arch/arm/mach-mvebu/coherency_ll.S @@ -24,26 +24,32 @@ #include .text -/* Returns with the coherency address in r1 (r0 is untouched)*/ +/* Returns the coherency base address in r1 (r0 is untouched) */ ENTRY(ll_get_coherency_base) mrc p15, 0, r1, c1, c0, 0 tst r1, #CR_M @ Check MMU bit enabled bne 1f - /* use physical address of the coherency register */ + /* + * MMU is disabled, use the physical address of the coherency + * base address. + */ adr r1, 3f ldr r3, [r1] ldr r1, [r1, r3] b 2f 1: - /* use virtual address of the coherency register */ + /* + * MMU is enabled, use the virtual address of the coherency + * base address. + */ ldr r1, =coherency_base ldr r1, [r1] 2: mov pc, lr ENDPROC(ll_get_coherency_base) -/* Returns with the CPU ID in r3 (r0 is untouched)*/ +/* Returns the CPU ID in r3 (r0 is untouched) */ ENTRY(ll_get_cpuid) mrc 15, 0, r3, cr0, cr0, 5 and r3, r3, #15 @@ -53,18 +59,22 @@ ARM_BE8(rev r3, r3) mov pc, lr ENDPROC(ll_get_cpuid) -/* ll_add_cpu_to_smp_group, ll_enable_coherency and - * ll_disable_coherency use strex/ldrex whereas MMU can be off. The - * Armada XP SoC has an exclusive monitor that can track transactions - * to Device and/or SO and as such also when MMU is disabled the - * exclusive transactions will be functional +/* + * ll_add_cpu_to_smp_group(), ll_enable_coherency() and + * ll_disable_coherency() use the strex/ldrex instructions while the + * MMU can be disabled. The Armada XP SoC has an exclusive monitor + * that tracks transactions to Device and/or SO memory and thanks to + * that, exclusive transactions are functional even when the MMU is + * disabled. */ ENTRY(ll_add_cpu_to_smp_group) /* - * r0 being untouched in ll_get_coherency_base and - * ll_get_cpuid, we can use it to save lr modifing it with the - * following bl + * As r0 is not modified by ll_get_coherency_base() and + * ll_get_cpuid(), we use it to temporarly save lr and avoid + * it being modified by the branch and link calls. This + * function is used very early in the secondary CPU boot, and + * no stack is available at this point. */ mov r0, lr bl ll_get_coherency_base @@ -82,9 +92,11 @@ ENDPROC(ll_add_cpu_to_smp_group) ENTRY(ll_enable_coherency) /* - * r0 being untouched in ll_get_coherency_base and - * ll_get_cpuid, we can use it to save lr modifing it with the - * following bl + * As r0 is not modified by ll_get_coherency_base() and + * ll_get_cpuid(), we use it to temporarly save lr and avoid + * it being modified by the branch and link calls. This + * function is used very early in the secondary CPU boot, and + * no stack is available at this point. */ mov r0, lr bl ll_get_coherency_base @@ -104,9 +116,11 @@ ENDPROC(ll_enable_coherency) ENTRY(ll_disable_coherency) /* - * r0 being untouched in ll_get_coherency_base and - * ll_get_cpuid, we can use it to save lr modifing it with the - * following bl + * As r0 is not modified by ll_get_coherency_base() and + * ll_get_cpuid(), we use it to temporarly save lr and avoid + * it being modified by the branch and link calls. This + * function is used very early in the secondary CPU boot, and + * no stack is available at this point. */ mov r0, lr bl ll_get_coherency_base From 07ae144be1b2ac45f893bc1ed3fe1a49f7128e46 Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Thu, 22 May 2014 14:48:02 +0200 Subject: [PATCH 7/7] ARM: mvebu: returns ll_get_cpuid() to ll_get_coherency_cpumask() In the refactoring of the coherency fabric assembly code, a function called ll_get_cpuid() was created to factorize common logic between functions adding CPU to the SMP coherency group, enabling and disabling the coherency. However, the name of the function is highly misleading: ll_get_cpuid() makes one think tat it returns the ID of the CPU, i.e 0 for CPU0, 1 for CPU1, etc. In fact, this is not at all what this function returns: it returns a CPU mask for the current CPU, usable for the coherency fabric configuration and control registers. Therefore this commit renames this function to ll_get_coherency_cpumask(), and adds additional comments on top of the function to explain in more details what it does, and also how the endianess issue is handled. Signed-off-by: Thomas Petazzoni Link: https://lkml.kernel.org/r/1400762882-10116-5-git-send-email-thomas.petazzoni@free-electrons.com Acked-by: Gregory CLEMENT Signed-off-by: Jason Cooper --- arch/arm/mach-mvebu/coherency_ll.S | 43 +++++++++++++++++------------- 1 file changed, 25 insertions(+), 18 deletions(-) diff --git a/arch/arm/mach-mvebu/coherency_ll.S b/arch/arm/mach-mvebu/coherency_ll.S index 311442ae128b..510c29e079ca 100644 --- a/arch/arm/mach-mvebu/coherency_ll.S +++ b/arch/arm/mach-mvebu/coherency_ll.S @@ -49,15 +49,22 @@ ENTRY(ll_get_coherency_base) mov pc, lr ENDPROC(ll_get_coherency_base) -/* Returns the CPU ID in r3 (r0 is untouched) */ -ENTRY(ll_get_cpuid) +/* + * Returns the coherency CPU mask in r3 (r0 is untouched). This + * coherency CPU mask can be used with the coherency fabric + * configuration and control registers. Note that the mask is already + * endian-swapped as appropriate so that the calling functions do not + * have to care about endianness issues while accessing the coherency + * fabric registers + */ +ENTRY(ll_get_coherency_cpumask) mrc 15, 0, r3, cr0, cr0, 5 and r3, r3, #15 mov r2, #(1 << 24) lsl r3, r2, r3 ARM_BE8(rev r3, r3) mov pc, lr -ENDPROC(ll_get_cpuid) +ENDPROC(ll_get_coherency_cpumask) /* * ll_add_cpu_to_smp_group(), ll_enable_coherency() and @@ -71,14 +78,14 @@ ENDPROC(ll_get_cpuid) ENTRY(ll_add_cpu_to_smp_group) /* * As r0 is not modified by ll_get_coherency_base() and - * ll_get_cpuid(), we use it to temporarly save lr and avoid - * it being modified by the branch and link calls. This - * function is used very early in the secondary CPU boot, and - * no stack is available at this point. + * ll_get_coherency_cpumask(), we use it to temporarly save lr + * and avoid it being modified by the branch and link + * calls. This function is used very early in the secondary + * CPU boot, and no stack is available at this point. */ mov r0, lr bl ll_get_coherency_base - bl ll_get_cpuid + bl ll_get_coherency_cpumask mov lr, r0 add r0, r1, #ARMADA_XP_CFB_CFG_REG_OFFSET 1: @@ -93,14 +100,14 @@ ENDPROC(ll_add_cpu_to_smp_group) ENTRY(ll_enable_coherency) /* * As r0 is not modified by ll_get_coherency_base() and - * ll_get_cpuid(), we use it to temporarly save lr and avoid - * it being modified by the branch and link calls. This - * function is used very early in the secondary CPU boot, and - * no stack is available at this point. + * ll_get_coherency_cpumask(), we use it to temporarly save lr + * and avoid it being modified by the branch and link + * calls. This function is used very early in the secondary + * CPU boot, and no stack is available at this point. */ mov r0, lr bl ll_get_coherency_base - bl ll_get_cpuid + bl ll_get_coherency_cpumask mov lr, r0 add r0, r1, #ARMADA_XP_CFB_CTL_REG_OFFSET 1: @@ -117,14 +124,14 @@ ENDPROC(ll_enable_coherency) ENTRY(ll_disable_coherency) /* * As r0 is not modified by ll_get_coherency_base() and - * ll_get_cpuid(), we use it to temporarly save lr and avoid - * it being modified by the branch and link calls. This - * function is used very early in the secondary CPU boot, and - * no stack is available at this point. + * ll_get_coherency_cpumask(), we use it to temporarly save lr + * and avoid it being modified by the branch and link + * calls. This function is used very early in the secondary + * CPU boot, and no stack is available at this point. */ mov r0, lr bl ll_get_coherency_base - bl ll_get_cpuid + bl ll_get_coherency_cpumask mov lr, r0 add r0, r1, #ARMADA_XP_CFB_CTL_REG_OFFSET 1: