From ecd7274fb4cd2d6c035a52d59ca3d6ec936a07be Mon Sep 17 00:00:00 2001 From: Will Deacon <will@kernel.org> Date: Thu, 4 Jun 2020 18:11:15 +0100 Subject: [PATCH 01/13] iommu: Remove unused IOMMU_SYS_CACHE_ONLY flag The IOMMU_SYS_CACHE_ONLY flag was never exposed via the DMA API and has no in-tree users. Remove it. Cc: Robin Murphy <robin.murphy@arm.com> Cc: "Isaac J. Manjarres" <isaacm@codeaurora.org> Cc: Joerg Roedel <joro@8bytes.org> Cc: Rob Clark <robdclark@gmail.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Sai Prakash Ranjan <saiprakash.ranjan@codeaurora.org> Signed-off-by: Will Deacon <will@kernel.org> --- drivers/iommu/io-pgtable-arm.c | 3 --- include/linux/iommu.h | 6 ------ 2 files changed, 9 deletions(-) diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index 04fbd4bf0ff9..8f175c02f8e3 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -438,9 +438,6 @@ static arm_lpae_iopte arm_lpae_prot_to_pte(struct arm_lpae_io_pgtable *data, else if (prot & IOMMU_CACHE) pte |= (ARM_LPAE_MAIR_ATTR_IDX_CACHE << ARM_LPAE_PTE_ATTRINDX_SHIFT); - else if (prot & IOMMU_SYS_CACHE_ONLY) - pte |= (ARM_LPAE_MAIR_ATTR_IDX_INC_OCACHE - << ARM_LPAE_PTE_ATTRINDX_SHIFT); } if (prot & IOMMU_CACHE) diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 5f0b7859d2eb..bee1a8fa1fb1 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -31,12 +31,6 @@ * if the IOMMU page table format is equivalent. */ #define IOMMU_PRIV (1 << 5) -/* - * Non-coherent masters can use this page protection flag to set cacheable - * memory attributes for only a transparent outer level of cache, also known as - * the last-level or system cache. - */ -#define IOMMU_SYS_CACHE_ONLY (1 << 6) struct iommu_ops; struct iommu_group; From 976492922a6a9c06f6f61436848adf2f03702e5c Mon Sep 17 00:00:00 2001 From: Robin Murphy <robin.murphy@arm.com> Date: Wed, 24 Jun 2020 11:24:51 +0100 Subject: [PATCH 02/13] iommu/arm-smmu: Update impl quirks comment The comment about implementation and integration quirks being mutually-exclusive is out of date, and in fact the code is already structured for the case it anticipates, so document that properly. Signed-off-by: Robin Murphy <robin.murphy@arm.com> Link: https://lore.kernel.org/r/1e742177e084621f3454fbaf768325a6c215656a.1592994291.git.robin.murphy@arm.com Signed-off-by: Will Deacon <will@kernel.org> --- drivers/iommu/arm-smmu-impl.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/iommu/arm-smmu-impl.c b/drivers/iommu/arm-smmu-impl.c index c75b9d957b70..1fb4bb994dbf 100644 --- a/drivers/iommu/arm-smmu-impl.c +++ b/drivers/iommu/arm-smmu-impl.c @@ -153,10 +153,9 @@ struct arm_smmu_device *arm_smmu_impl_init(struct arm_smmu_device *smmu) const struct device_node *np = smmu->dev->of_node; /* - * We will inevitably have to combine model-specific implementation - * quirks with platform-specific integration quirks, but everything - * we currently support happens to work out as straightforward - * mutually-exclusive assignments. + * Set the impl for model-specific implementation quirks first, + * such that platform integration quirks can pick it up and + * inherit from it if necessary. */ switch (smmu->model) { case ARM_MMU500: @@ -168,6 +167,7 @@ struct arm_smmu_device *arm_smmu_impl_init(struct arm_smmu_device *smmu) break; } + /* This is implicitly MMU-400 */ if (of_property_read_bool(np, "calxeda,smmu-secure-config-access")) smmu->impl = &calxeda_impl; From 7b6b70d88b10c61af7050820ca041ff9e73482ae Mon Sep 17 00:00:00 2001 From: Jonathan Marek <jonathan@marek.ca> Date: Tue, 9 Jun 2020 15:40:19 -0400 Subject: [PATCH 03/13] dt-bindings: arm-smmu: Add sm8150 and sm8250 compatible strings Add compatible strings for sm8150 and sm8250 iommus to documentation. Signed-off-by: Jonathan Marek <jonathan@marek.ca> Acked-by: Rob Herring <robh@kernel.org> Link: https://lore.kernel.org/r/20200609194030.17756-2-jonathan@marek.ca Signed-off-by: Will Deacon <will@kernel.org> --- Documentation/devicetree/bindings/iommu/arm,smmu.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml index d7ceb4c34423..3ac741236c8b 100644 --- a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml +++ b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml @@ -37,6 +37,8 @@ properties: - enum: - qcom,sc7180-smmu-500 - qcom,sdm845-smmu-500 + - qcom,sm8150-smmu-500 + - qcom,sm8250-smmu-500 - const: arm,mmu-500 - items: - const: arm,mmu-500 From 2c5c3cfb2da577bb23c3d9d771cc08300dec3559 Mon Sep 17 00:00:00 2001 From: Jonathan Marek <jonathan@marek.ca> Date: Tue, 9 Jun 2020 15:40:20 -0400 Subject: [PATCH 04/13] iommu: arm-smmu-impl: Use qcom impl for sm8150 and sm8250 compatibles Use the qcom implementation for IOMMU hardware on sm8150 and sm8250 SoCs. Signed-off-by: Jonathan Marek <jonathan@marek.ca> Reviewed-by: Bjorn Andersson <bjorn.andersson@linaro.org> Link: https://lore.kernel.org/r/20200609194030.17756-3-jonathan@marek.ca Signed-off-by: Will Deacon <will@kernel.org> --- drivers/iommu/arm-smmu-impl.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/arm-smmu-impl.c b/drivers/iommu/arm-smmu-impl.c index 1fb4bb994dbf..22a9acd76955 100644 --- a/drivers/iommu/arm-smmu-impl.c +++ b/drivers/iommu/arm-smmu-impl.c @@ -172,7 +172,9 @@ struct arm_smmu_device *arm_smmu_impl_init(struct arm_smmu_device *smmu) smmu->impl = &calxeda_impl; if (of_device_is_compatible(np, "qcom,sdm845-smmu-500") || - of_device_is_compatible(np, "qcom,sc7180-smmu-500")) + of_device_is_compatible(np, "qcom,sc7180-smmu-500") || + of_device_is_compatible(np, "qcom,sm8150-smmu-500") || + of_device_is_compatible(np, "qcom,sm8250-smmu-500")) return qcom_smmu_impl_init(smmu); return smmu; From 6a79a5a3842b6a9f639fe2874dd6ae0bd4b24d1a Mon Sep 17 00:00:00 2001 From: Tomasz Nowicki <tn@semihalf.com> Date: Wed, 15 Jul 2020 09:06:46 +0200 Subject: [PATCH 05/13] iommu/arm-smmu: Call configuration impl hook before consuming features 'cfg_probe' hook is called at the very end of configuration probing procedure and therefore features override and workaround may become complex like for ID register fixups. In preparation for adding Marvell errata move 'cfg_probe' a bit earlier to have chance to adjust the detected features before we start consuming them. Since the Cavium quirk (the only user) does not alter features it is safe to do so. Suggested-by: Robin Murphy <robin.murphy@arm.com> Signed-off-by: Tomasz Nowicki <tn@semihalf.com> Reviewed-by: Robin Murphy <robin.murphy@arm.com> Link: https://lore.kernel.org/r/20200715070649.18733-2-tn@semihalf.com Signed-off-by: Will Deacon <will@kernel.org> --- drivers/iommu/arm-smmu.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 243bc4cb2705..19f906de6420 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -1728,7 +1728,7 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu) unsigned int size; u32 id; bool cttw_reg, cttw_fw = smmu->features & ARM_SMMU_FEAT_COHERENT_WALK; - int i; + int i, ret; dev_notice(smmu->dev, "probing hardware configuration...\n"); dev_notice(smmu->dev, "SMMUv%d with:\n", @@ -1891,6 +1891,12 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu) smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K; } + if (smmu->impl && smmu->impl->cfg_probe) { + ret = smmu->impl->cfg_probe(smmu); + if (ret) + return ret; + } + /* Now we've corralled the various formats, what'll it do? */ if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S) smmu->pgsize_bitmap |= SZ_4K | SZ_64K | SZ_1M | SZ_16M; @@ -1918,9 +1924,6 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu) dev_notice(smmu->dev, "\tStage-2: %lu-bit IPA -> %lu-bit PA\n", smmu->ipa_size, smmu->pa_size); - if (smmu->impl && smmu->impl->cfg_probe) - return smmu->impl->cfg_probe(smmu); - return 0; } From f2d9848aeb9fa71523bbfb226203ffb7d50877d2 Mon Sep 17 00:00:00 2001 From: Hanna Hawa <hannah@marvell.com> Date: Wed, 15 Jul 2020 09:06:47 +0200 Subject: [PATCH 06/13] iommu/arm-smmu: Workaround for Marvell Armada-AP806 SoC erratum #582743 Due to erratum #582743, the Marvell Armada-AP806 can't access 64bit to ARM SMMUv2 registers. Provide implementation relevant hooks: - split the writeq/readq to two accesses of writel/readl. - mask the MMU_IDR2.PTFSv8 fields to not use AArch64 format (but only AARCH32_L) since with AArch64 format 32 bits access is not supported. Note that most 64-bit registers like TTBRn can be accessed as two 32-bit halves without issue, and AArch32 format ensures that the register writes which must be atomic (for TLBI etc.) need only be 32-bit. Signed-off-by: Hanna Hawa <hannah@marvell.com> Signed-off-by: Gregory CLEMENT <gregory.clement@bootlin.com> Signed-off-by: Tomasz Nowicki <tn@semihalf.com> Reviewed-by: Robin Murphy <robin.murphy@arm.com> Link: https://lore.kernel.org/r/20200715070649.18733-3-tn@semihalf.com Signed-off-by: Will Deacon <will@kernel.org> --- Documentation/arm64/silicon-errata.rst | 3 ++ drivers/iommu/arm-smmu-impl.c | 45 ++++++++++++++++++++++++++ 2 files changed, 48 insertions(+) diff --git a/Documentation/arm64/silicon-errata.rst b/Documentation/arm64/silicon-errata.rst index 936cf2a59ca4..157214d3abe1 100644 --- a/Documentation/arm64/silicon-errata.rst +++ b/Documentation/arm64/silicon-errata.rst @@ -125,6 +125,9 @@ stable kernels. | Cavium | ThunderX2 Core | #219 | CAVIUM_TX2_ERRATUM_219 | +----------------+-----------------+-----------------+-----------------------------+ +----------------+-----------------+-----------------+-----------------------------+ +| Marvell | ARM-MMU-500 | #582743 | N/A | ++----------------+-----------------+-----------------+-----------------------------+ ++----------------+-----------------+-----------------+-----------------------------+ | Freescale/NXP | LS2080A/LS1043A | A-008585 | FSL_ERRATUM_A008585 | +----------------+-----------------+-----------------+-----------------------------+ +----------------+-----------------+-----------------+-----------------------------+ diff --git a/drivers/iommu/arm-smmu-impl.c b/drivers/iommu/arm-smmu-impl.c index 22a9acd76955..c87d825f651e 100644 --- a/drivers/iommu/arm-smmu-impl.c +++ b/drivers/iommu/arm-smmu-impl.c @@ -147,6 +147,48 @@ static const struct arm_smmu_impl arm_mmu500_impl = { .reset = arm_mmu500_reset, }; +static u64 mrvl_mmu500_readq(struct arm_smmu_device *smmu, int page, int off) +{ + /* + * Marvell Armada-AP806 erratum #582743. + * Split all the readq to double readl + */ + return hi_lo_readq_relaxed(arm_smmu_page(smmu, page) + off); +} + +static void mrvl_mmu500_writeq(struct arm_smmu_device *smmu, int page, int off, + u64 val) +{ + /* + * Marvell Armada-AP806 erratum #582743. + * Split all the writeq to double writel + */ + hi_lo_writeq_relaxed(val, arm_smmu_page(smmu, page) + off); +} + +static int mrvl_mmu500_cfg_probe(struct arm_smmu_device *smmu) +{ + + /* + * Armada-AP806 erratum #582743. + * Hide the SMMU_IDR2.PTFSv8 fields to sidestep the AArch64 + * formats altogether and allow using 32 bits access on the + * interconnect. + */ + smmu->features &= ~(ARM_SMMU_FEAT_FMT_AARCH64_4K | + ARM_SMMU_FEAT_FMT_AARCH64_16K | + ARM_SMMU_FEAT_FMT_AARCH64_64K); + + return 0; +} + +static const struct arm_smmu_impl mrvl_mmu500_impl = { + .read_reg64 = mrvl_mmu500_readq, + .write_reg64 = mrvl_mmu500_writeq, + .cfg_probe = mrvl_mmu500_cfg_probe, + .reset = arm_mmu500_reset, +}; + struct arm_smmu_device *arm_smmu_impl_init(struct arm_smmu_device *smmu) { @@ -177,5 +219,8 @@ struct arm_smmu_device *arm_smmu_impl_init(struct arm_smmu_device *smmu) of_device_is_compatible(np, "qcom,sm8250-smmu-500")) return qcom_smmu_impl_init(smmu); + if (of_device_is_compatible(np, "marvell,ap806-smmu-500")) + smmu->impl = &mrvl_mmu500_impl; + return smmu; } From e85e84d19b9dac3d3ac92c9f6a24d034691f1243 Mon Sep 17 00:00:00 2001 From: Tomasz Nowicki <tn@semihalf.com> Date: Wed, 15 Jul 2020 09:06:48 +0200 Subject: [PATCH 07/13] dt-bindings: arm-smmu: add compatible string for Marvell Armada-AP806 SMMU-500 Add specific compatible string for Marvell usage due to errata of accessing 64bits registers of ARM SMMU, in AP806. AP806 SoC uses the generic ARM-MMU500, and there's no specific implementation of Marvell, this compatible is used for errata only. Signed-off-by: Hanna Hawa <hannah@marvell.com> Signed-off-by: Gregory CLEMENT <gregory.clement@bootlin.com> Signed-off-by: Tomasz Nowicki <tn@semihalf.com> Reviewed-by: Rob Herring <robh@kernel.org> Reviewed-by: Robin Murphy <robin.murphy@arm.com> Link: https://lore.kernel.org/r/20200715070649.18733-4-tn@semihalf.com Signed-off-by: Will Deacon <will@kernel.org> --- Documentation/devicetree/bindings/iommu/arm,smmu.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml index 3ac741236c8b..93fb9fe068b9 100644 --- a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml +++ b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml @@ -40,6 +40,10 @@ properties: - qcom,sm8150-smmu-500 - qcom,sm8250-smmu-500 - const: arm,mmu-500 + - description: Marvell SoCs implementing "arm,mmu-500" + items: + - const: marvell,ap806-smmu-500 + - const: arm,mmu-500 - items: - const: arm,mmu-500 - const: arm,smmu-v2 From 49fbb25030265c660de732513f18275d88ff99d3 Mon Sep 17 00:00:00 2001 From: John Garry <john.garry@huawei.com> Date: Tue, 23 Jun 2020 01:28:37 +0800 Subject: [PATCH 08/13] iommu/arm-smmu-v3: Fix trivial typo Set "cmq" -> "cmdq". Signed-off-by: John Garry <john.garry@huawei.com> Signed-off-by: Will Deacon <will@kernel.org> --- drivers/iommu/arm-smmu-v3.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index f578677a5c41..a8e814c652fe 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -1479,7 +1479,7 @@ static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu, } /* - * Try to unlock the cmq lock. This will fail if we're the last + * Try to unlock the cmdq lock. This will fail if we're the last * reader, in which case we can safely update cmdq->q.llq.cons */ if (!arm_smmu_cmdq_shared_tryunlock(cmdq)) { From cd8479cf0de9d1950b51032a216fbf630f8542a4 Mon Sep 17 00:00:00 2001 From: Krishna Reddy <vdumpa@nvidia.com> Date: Sat, 18 Jul 2020 12:34:53 -0700 Subject: [PATCH 09/13] iommu/arm-smmu: move TLB timeout and spin count macros Move TLB timeout and spin count macros to header file to allow using the same from vendor specific implementations. Signed-off-by: Krishna Reddy <vdumpa@nvidia.com> Reviewed-by: Jon Hunter <jonathanh@nvidia.com> Reviewed-by: Nicolin Chen <nicoleotsuka@gmail.com> Reviewed-by: Pritesh Raithatha <praithatha@nvidia.com> Reviewed-by: Robin Murphy <robin.murphy@arm.com> Reviewed-by: Thierry Reding <thierry.reding@gmail.com> Link: https://lore.kernel.org/r/20200718193457.30046-2-vdumpa@nvidia.com Signed-off-by: Will Deacon <will@kernel.org> --- drivers/iommu/arm-smmu.c | 3 --- drivers/iommu/arm-smmu.h | 2 ++ 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 19f906de6420..cdd15ead9bc4 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -52,9 +52,6 @@ */ #define QCOM_DUMMY_VAL -1 -#define TLB_LOOP_TIMEOUT 1000000 /* 1s! */ -#define TLB_SPIN_COUNT 10 - #define MSI_IOVA_BASE 0x8000000 #define MSI_IOVA_LENGTH 0x100000 diff --git a/drivers/iommu/arm-smmu.h b/drivers/iommu/arm-smmu.h index d172c024be61..c7d0122a7c6c 100644 --- a/drivers/iommu/arm-smmu.h +++ b/drivers/iommu/arm-smmu.h @@ -236,6 +236,8 @@ enum arm_smmu_cbar_type { /* Maximum number of context banks per SMMU */ #define ARM_SMMU_MAX_CBS 128 +#define TLB_LOOP_TIMEOUT 1000000 /* 1s! */ +#define TLB_SPIN_COUNT 10 /* Shared driver definitions */ enum arm_smmu_arch_version { From 6c019f4e697ebed2225b20cc5d6276673834853d Mon Sep 17 00:00:00 2001 From: Krishna Reddy <vdumpa@nvidia.com> Date: Sat, 18 Jul 2020 12:34:54 -0700 Subject: [PATCH 10/13] iommu/arm-smmu: ioremap smmu mmio region before implementation init ioremap smmu mmio region before calling into implementation init. This is necessary to allow mapped address available during vendor specific implementation init. Signed-off-by: Krishna Reddy <vdumpa@nvidia.com> Reviewed-by: Jon Hunter <jonathanh@nvidia.com> Reviewed-by: Nicolin Chen <nicoleotsuka@gmail.com> Reviewed-by: Pritesh Raithatha <praithatha@nvidia.com> Reviewed-by: Robin Murphy <robin.murphy@arm.com> Reviewed-by: Thierry Reding <thierry.reding@gmail.com> Link: https://lore.kernel.org/r/20200718193457.30046-3-vdumpa@nvidia.com Signed-off-by: Will Deacon <will@kernel.org> --- drivers/iommu/arm-smmu.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index cdd15ead9bc4..de520115d3df 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -2123,10 +2123,6 @@ static int arm_smmu_device_probe(struct platform_device *pdev) if (err) return err; - smmu = arm_smmu_impl_init(smmu); - if (IS_ERR(smmu)) - return PTR_ERR(smmu); - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); ioaddr = res->start; smmu->base = devm_ioremap_resource(dev, res); @@ -2138,6 +2134,10 @@ static int arm_smmu_device_probe(struct platform_device *pdev) */ smmu->numpage = resource_size(res); + smmu = arm_smmu_impl_init(smmu); + if (IS_ERR(smmu)) + return PTR_ERR(smmu); + num_irqs = 0; while ((res = platform_get_resource(pdev, IORESOURCE_IRQ, num_irqs))) { num_irqs++; From aab5a1c882760a5bc990b14e5c0c2ee4dab5f1ac Mon Sep 17 00:00:00 2001 From: Krishna Reddy <vdumpa@nvidia.com> Date: Sat, 18 Jul 2020 12:34:55 -0700 Subject: [PATCH 11/13] iommu/arm-smmu: add NVIDIA implementation for ARM MMU-500 usage NVIDIA's Tegra194 SoC has three ARM MMU-500 instances. It uses two of the ARM MMU-500s together to interleave IOVA accesses across them and must be programmed identically. This implementation supports programming the two ARM MMU-500s that must be programmed identically. The third ARM MMU-500 instance is supported by standard arm-smmu.c driver itself. Signed-off-by: Krishna Reddy <vdumpa@nvidia.com> Reviewed-by: Jon Hunter <jonathanh@nvidia.com> Reviewed-by: Nicolin Chen <nicoleotsuka@gmail.com> Reviewed-by: Pritesh Raithatha <praithatha@nvidia.com> Reviewed-by: Robin Murphy <robin.murphy@arm.com> Reviewed-by: Thierry Reding <thierry.reding@gmail.com> Link: https://lore.kernel.org/r/20200718193457.30046-4-vdumpa@nvidia.com Signed-off-by: Will Deacon <will@kernel.org> --- MAINTAINERS | 2 + drivers/iommu/Makefile | 2 +- drivers/iommu/arm-smmu-impl.c | 3 + drivers/iommu/arm-smmu-nvidia.c | 179 ++++++++++++++++++++++++++++++++ drivers/iommu/arm-smmu.c | 1 + drivers/iommu/arm-smmu.h | 1 + 6 files changed, 187 insertions(+), 1 deletion(-) create mode 100644 drivers/iommu/arm-smmu-nvidia.c diff --git a/MAINTAINERS b/MAINTAINERS index 496fd4eafb68..ee2c0ba13a0f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -16810,8 +16810,10 @@ F: drivers/i2c/busses/i2c-tegra.c TEGRA IOMMU DRIVERS M: Thierry Reding <thierry.reding@gmail.com> +R: Krishna Reddy <vdumpa@nvidia.com> L: linux-tegra@vger.kernel.org S: Supported +F: drivers/iommu/arm-smmu-nvidia.c F: drivers/iommu/tegra* TEGRA KBC DRIVER diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile index 342190196dfb..2b8203db73ec 100644 --- a/drivers/iommu/Makefile +++ b/drivers/iommu/Makefile @@ -15,7 +15,7 @@ obj-$(CONFIG_AMD_IOMMU) += amd/iommu.o amd/init.o amd/quirks.o obj-$(CONFIG_AMD_IOMMU_DEBUGFS) += amd/debugfs.o obj-$(CONFIG_AMD_IOMMU_V2) += amd/iommu_v2.o obj-$(CONFIG_ARM_SMMU) += arm_smmu.o -arm_smmu-objs += arm-smmu.o arm-smmu-impl.o arm-smmu-qcom.o +arm_smmu-objs += arm-smmu.o arm-smmu-impl.o arm-smmu-nvidia.o arm-smmu-qcom.o obj-$(CONFIG_ARM_SMMU_V3) += arm-smmu-v3.o obj-$(CONFIG_DMAR_TABLE) += intel/dmar.o obj-$(CONFIG_INTEL_IOMMU) += intel/iommu.o intel/pasid.o diff --git a/drivers/iommu/arm-smmu-impl.c b/drivers/iommu/arm-smmu-impl.c index c87d825f651e..f4ff124a1967 100644 --- a/drivers/iommu/arm-smmu-impl.c +++ b/drivers/iommu/arm-smmu-impl.c @@ -213,6 +213,9 @@ struct arm_smmu_device *arm_smmu_impl_init(struct arm_smmu_device *smmu) if (of_property_read_bool(np, "calxeda,smmu-secure-config-access")) smmu->impl = &calxeda_impl; + if (of_device_is_compatible(np, "nvidia,tegra194-smmu")) + return nvidia_smmu_impl_init(smmu); + if (of_device_is_compatible(np, "qcom,sdm845-smmu-500") || of_device_is_compatible(np, "qcom,sc7180-smmu-500") || of_device_is_compatible(np, "qcom,sm8150-smmu-500") || diff --git a/drivers/iommu/arm-smmu-nvidia.c b/drivers/iommu/arm-smmu-nvidia.c new file mode 100644 index 000000000000..2f55e5793d34 --- /dev/null +++ b/drivers/iommu/arm-smmu-nvidia.c @@ -0,0 +1,179 @@ +// SPDX-License-Identifier: GPL-2.0-only +// Copyright (C) 2019-2020 NVIDIA CORPORATION. All rights reserved. + +#include <linux/bitfield.h> +#include <linux/delay.h> +#include <linux/of.h> +#include <linux/platform_device.h> +#include <linux/slab.h> + +#include "arm-smmu.h" + +/* + * Tegra194 has three ARM MMU-500 Instances. + * Two of them are used together and must be programmed identically for + * interleaved IOVA accesses across them and translates accesses from + * non-isochronous HW devices. + * Third one is used for translating accesses from isochronous HW devices. + * This implementation supports programming of the two instances that must + * be programmed identically. + * The third instance usage is through standard arm-smmu driver itself and + * is out of scope of this implementation. + */ +#define NUM_SMMU_INSTANCES 2 + +struct nvidia_smmu { + struct arm_smmu_device smmu; + void __iomem *bases[NUM_SMMU_INSTANCES]; +}; + +static inline void __iomem *nvidia_smmu_page(struct arm_smmu_device *smmu, + unsigned int inst, int page) +{ + struct nvidia_smmu *nvidia_smmu; + + nvidia_smmu = container_of(smmu, struct nvidia_smmu, smmu); + return nvidia_smmu->bases[inst] + (page << smmu->pgshift); +} + +static u32 nvidia_smmu_read_reg(struct arm_smmu_device *smmu, + int page, int offset) +{ + void __iomem *reg = nvidia_smmu_page(smmu, 0, page) + offset; + + return readl_relaxed(reg); +} + +static void nvidia_smmu_write_reg(struct arm_smmu_device *smmu, + int page, int offset, u32 val) +{ + unsigned int i; + + for (i = 0; i < NUM_SMMU_INSTANCES; i++) { + void __iomem *reg = nvidia_smmu_page(smmu, i, page) + offset; + + writel_relaxed(val, reg); + } +} + +static u64 nvidia_smmu_read_reg64(struct arm_smmu_device *smmu, + int page, int offset) +{ + void __iomem *reg = nvidia_smmu_page(smmu, 0, page) + offset; + + return readq_relaxed(reg); +} + +static void nvidia_smmu_write_reg64(struct arm_smmu_device *smmu, + int page, int offset, u64 val) +{ + unsigned int i; + + for (i = 0; i < NUM_SMMU_INSTANCES; i++) { + void __iomem *reg = nvidia_smmu_page(smmu, i, page) + offset; + + writeq_relaxed(val, reg); + } +} + +static void nvidia_smmu_tlb_sync(struct arm_smmu_device *smmu, int page, + int sync, int status) +{ + unsigned int delay; + + arm_smmu_writel(smmu, page, sync, 0); + + for (delay = 1; delay < TLB_LOOP_TIMEOUT; delay *= 2) { + unsigned int spin_cnt; + + for (spin_cnt = TLB_SPIN_COUNT; spin_cnt > 0; spin_cnt--) { + u32 val = 0; + unsigned int i; + + for (i = 0; i < NUM_SMMU_INSTANCES; i++) { + void __iomem *reg; + + reg = nvidia_smmu_page(smmu, i, page) + status; + val |= readl_relaxed(reg); + } + + if (!(val & ARM_SMMU_sTLBGSTATUS_GSACTIVE)) + return; + + cpu_relax(); + } + + udelay(delay); + } + + dev_err_ratelimited(smmu->dev, + "TLB sync timed out -- SMMU may be deadlocked\n"); +} + +static int nvidia_smmu_reset(struct arm_smmu_device *smmu) +{ + unsigned int i; + + for (i = 0; i < NUM_SMMU_INSTANCES; i++) { + u32 val; + void __iomem *reg = nvidia_smmu_page(smmu, i, ARM_SMMU_GR0) + + ARM_SMMU_GR0_sGFSR; + + /* clear global FSR */ + val = readl_relaxed(reg); + writel_relaxed(val, reg); + } + + return 0; +} + +static const struct arm_smmu_impl nvidia_smmu_impl = { + .read_reg = nvidia_smmu_read_reg, + .write_reg = nvidia_smmu_write_reg, + .read_reg64 = nvidia_smmu_read_reg64, + .write_reg64 = nvidia_smmu_write_reg64, + .reset = nvidia_smmu_reset, + .tlb_sync = nvidia_smmu_tlb_sync, +}; + +struct arm_smmu_device *nvidia_smmu_impl_init(struct arm_smmu_device *smmu) +{ + struct resource *res; + struct device *dev = smmu->dev; + struct nvidia_smmu *nvidia_smmu; + struct platform_device *pdev = to_platform_device(dev); + + nvidia_smmu = devm_kzalloc(dev, sizeof(*nvidia_smmu), GFP_KERNEL); + if (!nvidia_smmu) + return ERR_PTR(-ENOMEM); + + /* + * Copy the data from struct arm_smmu_device *smmu allocated in + * arm-smmu.c. The smmu from struct nvidia_smmu replaces the smmu + * pointer used in arm-smmu.c once this function returns. + * This is necessary to derive nvidia_smmu from smmu pointer passed + * through arm_smmu_impl function calls subsequently. + */ + nvidia_smmu->smmu = *smmu; + /* Instance 0 is ioremapped by arm-smmu.c. */ + nvidia_smmu->bases[0] = smmu->base; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 1); + if (!res) + return ERR_PTR(-ENODEV); + + nvidia_smmu->bases[1] = devm_ioremap_resource(dev, res); + if (IS_ERR(nvidia_smmu->bases[1])) + return ERR_CAST(nvidia_smmu->bases[1]); + + nvidia_smmu->smmu.impl = &nvidia_smmu_impl; + + /* + * Free the struct arm_smmu_device *smmu allocated in arm-smmu.c. + * Once this function returns, arm-smmu.c would use arm_smmu_device + * allocated as part of struct nvidia_smmu. + */ + devm_kfree(dev, smmu); + + return &nvidia_smmu->smmu; +} diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index de520115d3df..35422a17f610 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -1946,6 +1946,7 @@ static const struct of_device_id arm_smmu_of_match[] = { { .compatible = "arm,mmu-401", .data = &arm_mmu401 }, { .compatible = "arm,mmu-500", .data = &arm_mmu500 }, { .compatible = "cavium,smmu-v2", .data = &cavium_smmuv2 }, + { .compatible = "nvidia,smmu-500", .data = &arm_mmu500 }, { .compatible = "qcom,smmu-v2", .data = &qcom_smmuv2 }, { }, }; diff --git a/drivers/iommu/arm-smmu.h b/drivers/iommu/arm-smmu.h index c7d0122a7c6c..fad63efa1a72 100644 --- a/drivers/iommu/arm-smmu.h +++ b/drivers/iommu/arm-smmu.h @@ -452,6 +452,7 @@ static inline void arm_smmu_writeq(struct arm_smmu_device *smmu, int page, arm_smmu_writeq((s), ARM_SMMU_CB((s), (n)), (o), (v)) struct arm_smmu_device *arm_smmu_impl_init(struct arm_smmu_device *smmu); +struct arm_smmu_device *nvidia_smmu_impl_init(struct arm_smmu_device *smmu); struct arm_smmu_device *qcom_smmu_impl_init(struct arm_smmu_device *smmu); int arm_mmu500_reset(struct arm_smmu_device *smmu); From 3d2deb0cdb69e85d05760c6c72189d7653709ee1 Mon Sep 17 00:00:00 2001 From: Krishna Reddy <vdumpa@nvidia.com> Date: Sat, 18 Jul 2020 12:34:56 -0700 Subject: [PATCH 12/13] dt-bindings: arm-smmu: add binding for Tegra194 SMMU Add binding for NVIDIA's Tegra194 SoC SMMU. Signed-off-by: Krishna Reddy <vdumpa@nvidia.com> Reviewed-by: Jon Hunter <jonathanh@nvidia.com> Reviewed-by: Rob Herring <robh@kernel.org> Reviewed-by: Robin Murphy <robin.murphy@arm.com> Link: https://lore.kernel.org/r/20200718193457.30046-5-vdumpa@nvidia.com Signed-off-by: Will Deacon <will@kernel.org> --- .../devicetree/bindings/iommu/arm,smmu.yaml | 25 ++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml index 93fb9fe068b9..503160a7b9a0 100644 --- a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml +++ b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml @@ -44,6 +44,11 @@ properties: items: - const: marvell,ap806-smmu-500 - const: arm,mmu-500 + - description: NVIDIA SoCs that program two ARM MMU-500s identically + items: + - enum: + - nvidia,tegra194-smmu + - const: nvidia,smmu-500 - items: - const: arm,mmu-500 - const: arm,smmu-v2 @@ -61,7 +66,8 @@ properties: - cavium,smmu-v2 reg: - maxItems: 1 + minItems: 1 + maxItems: 2 '#global-interrupts': description: The number of global interrupts exposed by the device. @@ -144,6 +150,23 @@ required: additionalProperties: false +allOf: + - if: + properties: + compatible: + contains: + enum: + - nvidia,tegra194-smmu + then: + properties: + reg: + minItems: 2 + maxItems: 2 + else: + properties: + reg: + maxItems: 1 + examples: - |+ /* SMMU with stream matching or stream indexing */ From aa7ec73297df57a86308fee78d2bf86e22ea0bae Mon Sep 17 00:00:00 2001 From: Krishna Reddy <vdumpa@nvidia.com> Date: Sat, 18 Jul 2020 12:34:57 -0700 Subject: [PATCH 13/13] iommu/arm-smmu: Add global/context fault implementation hooks Add global/context fault hooks to allow vendor specific implementations override default fault interrupt handlers. Update NVIDIA implementation to override the default global/context fault interrupt handlers and handle interrupts across the two ARM MMU-500s that are programmed identically. Signed-off-by: Krishna Reddy <vdumpa@nvidia.com> Reviewed-by: Jon Hunter <jonathanh@nvidia.com> Reviewed-by: Nicolin Chen <nicoleotsuka@gmail.com> Reviewed-by: Pritesh Raithatha <praithatha@nvidia.com> Reviewed-by: Robin Murphy <robin.murphy@arm.com> Reviewed-by: Thierry Reding <thierry.reding@gmail.com> Link: https://lore.kernel.org/r/20200718193457.30046-6-vdumpa@nvidia.com Signed-off-by: Will Deacon <will@kernel.org> --- drivers/iommu/arm-smmu-nvidia.c | 99 +++++++++++++++++++++++++++++++++ drivers/iommu/arm-smmu.c | 17 +++++- drivers/iommu/arm-smmu.h | 3 + 3 files changed, 117 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/arm-smmu-nvidia.c b/drivers/iommu/arm-smmu-nvidia.c index 2f55e5793d34..31368057e9be 100644 --- a/drivers/iommu/arm-smmu-nvidia.c +++ b/drivers/iommu/arm-smmu-nvidia.c @@ -127,6 +127,103 @@ static int nvidia_smmu_reset(struct arm_smmu_device *smmu) return 0; } +static irqreturn_t nvidia_smmu_global_fault_inst(int irq, + struct arm_smmu_device *smmu, + int inst) +{ + u32 gfsr, gfsynr0, gfsynr1, gfsynr2; + void __iomem *gr0_base = nvidia_smmu_page(smmu, inst, 0); + + gfsr = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSR); + if (!gfsr) + return IRQ_NONE; + + gfsynr0 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR0); + gfsynr1 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR1); + gfsynr2 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR2); + + dev_err_ratelimited(smmu->dev, + "Unexpected global fault, this could be serious\n"); + dev_err_ratelimited(smmu->dev, + "\tGFSR 0x%08x, GFSYNR0 0x%08x, GFSYNR1 0x%08x, GFSYNR2 0x%08x\n", + gfsr, gfsynr0, gfsynr1, gfsynr2); + + writel_relaxed(gfsr, gr0_base + ARM_SMMU_GR0_sGFSR); + return IRQ_HANDLED; +} + +static irqreturn_t nvidia_smmu_global_fault(int irq, void *dev) +{ + unsigned int inst; + irqreturn_t ret = IRQ_NONE; + struct arm_smmu_device *smmu = dev; + + for (inst = 0; inst < NUM_SMMU_INSTANCES; inst++) { + irqreturn_t irq_ret; + + irq_ret = nvidia_smmu_global_fault_inst(irq, smmu, inst); + if (irq_ret == IRQ_HANDLED) + ret = IRQ_HANDLED; + } + + return ret; +} + +static irqreturn_t nvidia_smmu_context_fault_bank(int irq, + struct arm_smmu_device *smmu, + int idx, int inst) +{ + u32 fsr, fsynr, cbfrsynra; + unsigned long iova; + void __iomem *gr1_base = nvidia_smmu_page(smmu, inst, 1); + void __iomem *cb_base = nvidia_smmu_page(smmu, inst, smmu->numpage + idx); + + fsr = readl_relaxed(cb_base + ARM_SMMU_CB_FSR); + if (!(fsr & ARM_SMMU_FSR_FAULT)) + return IRQ_NONE; + + fsynr = readl_relaxed(cb_base + ARM_SMMU_CB_FSYNR0); + iova = readq_relaxed(cb_base + ARM_SMMU_CB_FAR); + cbfrsynra = readl_relaxed(gr1_base + ARM_SMMU_GR1_CBFRSYNRA(idx)); + + dev_err_ratelimited(smmu->dev, + "Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, cbfrsynra=0x%x, cb=%d\n", + fsr, iova, fsynr, cbfrsynra, idx); + + writel_relaxed(fsr, cb_base + ARM_SMMU_CB_FSR); + return IRQ_HANDLED; +} + +static irqreturn_t nvidia_smmu_context_fault(int irq, void *dev) +{ + int idx; + unsigned int inst; + irqreturn_t ret = IRQ_NONE; + struct arm_smmu_device *smmu; + struct iommu_domain *domain = dev; + struct arm_smmu_domain *smmu_domain; + + smmu_domain = container_of(domain, struct arm_smmu_domain, domain); + smmu = smmu_domain->smmu; + + for (inst = 0; inst < NUM_SMMU_INSTANCES; inst++) { + irqreturn_t irq_ret; + + /* + * Interrupt line is shared between all contexts. + * Check for faults across all contexts. + */ + for (idx = 0; idx < smmu->num_context_banks; idx++) { + irq_ret = nvidia_smmu_context_fault_bank(irq, smmu, + idx, inst); + if (irq_ret == IRQ_HANDLED) + ret = IRQ_HANDLED; + } + } + + return ret; +} + static const struct arm_smmu_impl nvidia_smmu_impl = { .read_reg = nvidia_smmu_read_reg, .write_reg = nvidia_smmu_write_reg, @@ -134,6 +231,8 @@ static const struct arm_smmu_impl nvidia_smmu_impl = { .write_reg64 = nvidia_smmu_write_reg64, .reset = nvidia_smmu_reset, .tlb_sync = nvidia_smmu_tlb_sync, + .global_fault = nvidia_smmu_global_fault, + .context_fault = nvidia_smmu_context_fault, }; struct arm_smmu_device *nvidia_smmu_impl_init(struct arm_smmu_device *smmu) diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 35422a17f610..56b7103e1368 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -670,6 +670,7 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, enum io_pgtable_fmt fmt; struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); struct arm_smmu_cfg *cfg = &smmu_domain->cfg; + irqreturn_t (*context_fault)(int irq, void *dev); mutex_lock(&smmu_domain->init_mutex); if (smmu_domain->smmu) @@ -832,7 +833,13 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, * handler seeing a half-initialised domain state. */ irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx]; - ret = devm_request_irq(smmu->dev, irq, arm_smmu_context_fault, + + if (smmu->impl && smmu->impl->context_fault) + context_fault = smmu->impl->context_fault; + else + context_fault = arm_smmu_context_fault; + + ret = devm_request_irq(smmu->dev, irq, context_fault, IRQF_SHARED, "arm-smmu-context-fault", domain); if (ret < 0) { dev_err(smmu->dev, "failed to request context IRQ %d (%u)\n", @@ -2108,6 +2115,7 @@ static int arm_smmu_device_probe(struct platform_device *pdev) struct arm_smmu_device *smmu; struct device *dev = &pdev->dev; int num_irqs, i, err; + irqreturn_t (*global_fault)(int irq, void *dev); smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL); if (!smmu) { @@ -2194,9 +2202,14 @@ static int arm_smmu_device_probe(struct platform_device *pdev) smmu->num_context_irqs = smmu->num_context_banks; } + if (smmu->impl && smmu->impl->global_fault) + global_fault = smmu->impl->global_fault; + else + global_fault = arm_smmu_global_fault; + for (i = 0; i < smmu->num_global_irqs; ++i) { err = devm_request_irq(smmu->dev, smmu->irqs[i], - arm_smmu_global_fault, + global_fault, IRQF_SHARED, "arm-smmu global fault", smmu); diff --git a/drivers/iommu/arm-smmu.h b/drivers/iommu/arm-smmu.h index fad63efa1a72..d890a4a968e8 100644 --- a/drivers/iommu/arm-smmu.h +++ b/drivers/iommu/arm-smmu.h @@ -18,6 +18,7 @@ #include <linux/io-64-nonatomic-hi-lo.h> #include <linux/io-pgtable.h> #include <linux/iommu.h> +#include <linux/irqreturn.h> #include <linux/mutex.h> #include <linux/spinlock.h> #include <linux/types.h> @@ -389,6 +390,8 @@ struct arm_smmu_impl { void (*tlb_sync)(struct arm_smmu_device *smmu, int page, int sync, int status); int (*def_domain_type)(struct device *dev); + irqreturn_t (*global_fault)(int irq, void *dev); + irqreturn_t (*context_fault)(int irq, void *dev); }; static inline void __iomem *arm_smmu_page(struct arm_smmu_device *smmu, int n)