From b847de4e5087fc8577c38a697d14fd2a5ce93352 Mon Sep 17 00:00:00 2001 From: Sunil Goutham Date: Fri, 5 May 2017 16:47:46 +0530 Subject: [PATCH 01/20] iommu/arm-smmu-v3: Increase CMDQ drain timeout value Waiting for a CMD_SYNC to be processed involves waiting for the command queue to drain, which can take an awful lot longer than waiting for a single entry to become available. Consequently, the common timeout value of 100us has been observed to be too short on some platforms when a CMD_SYNC is issued into a queued full of TLBI commands. This patch resolves the issue by using a different (1s) timeout when waiting for the CMDQ to drain and using a simple back-off mechanism when polling the cons pointer in the absence of WFE support. Signed-off-by: Sunil Goutham [will: rewrote commit message and cosmetic changes] Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu-v3.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index 380969aa60d5..6a06be7626db 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -408,6 +408,7 @@ /* High-level queue structures */ #define ARM_SMMU_POLL_TIMEOUT_US 100 +#define ARM_SMMU_CMDQ_DRAIN_TIMEOUT_US 1000000 /* 1s! */ #define MSI_IOVA_BASE 0x8000000 #define MSI_IOVA_LENGTH 0x100000 @@ -737,7 +738,13 @@ static void queue_inc_prod(struct arm_smmu_queue *q) */ static int queue_poll_cons(struct arm_smmu_queue *q, bool drain, bool wfe) { - ktime_t timeout = ktime_add_us(ktime_get(), ARM_SMMU_POLL_TIMEOUT_US); + ktime_t timeout; + unsigned int delay = 1; + + /* Wait longer if it's queue drain */ + timeout = ktime_add_us(ktime_get(), drain ? + ARM_SMMU_CMDQ_DRAIN_TIMEOUT_US : + ARM_SMMU_POLL_TIMEOUT_US); while (queue_sync_cons(q), (drain ? !queue_empty(q) : queue_full(q))) { if (ktime_compare(ktime_get(), timeout) > 0) @@ -747,7 +754,8 @@ static int queue_poll_cons(struct arm_smmu_queue *q, bool drain, bool wfe) wfe(); } else { cpu_relax(); - udelay(1); + udelay(delay); + delay *= 2; } } From 60ab7a75c8d83049b0e6189b4128247513880b19 Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Tue, 13 Jun 2017 15:58:30 +0530 Subject: [PATCH 02/20] iommu/io-pgtable-arm-v7s: constify dummy_tlb_ops. File size before: text data bss dec hex filename 6146 56 9 6211 1843 drivers/iommu/io-pgtable-arm-v7s.o File size After adding 'const': text data bss dec hex filename 6170 24 9 6203 183b drivers/iommu/io-pgtable-arm-v7s.o Signed-off-by: Arvind Yadav Signed-off-by: Will Deacon --- drivers/iommu/io-pgtable-arm-v7s.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c index 8d6ca28c3e1f..f8869951610c 100644 --- a/drivers/iommu/io-pgtable-arm-v7s.c +++ b/drivers/iommu/io-pgtable-arm-v7s.c @@ -749,7 +749,7 @@ static void dummy_tlb_sync(void *cookie) WARN_ON(cookie != cfg_cookie); } -static struct iommu_gather_ops dummy_tlb_ops = { +static const struct iommu_gather_ops dummy_tlb_ops = { .tlb_flush_all = dummy_tlb_flush_all, .tlb_add_flush = dummy_tlb_add_flush, .tlb_sync = dummy_tlb_sync, From 84c24379a783c514e5ff7c8fc8a21cf8d64fd05f Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 19 Jun 2017 16:41:56 +0100 Subject: [PATCH 03/20] iommu/arm-smmu: Plumb in new ACPI identifiers Revision C of IORT now allows us to identify ARM MMU-401 and the Cavium ThunderX implementation. Wire them up so that we can probe these models once firmware starts using the new codes in place of generic ones, and so that the appropriate features and quirks get enabled when we do. For the sake of backports and mitigating sychronisation problems with the ACPICA headers, we'll carry a backup copy of the new definitions locally for the short term to make life simpler. CC: stable@vger.kernel.org # 4.10 Acked-by: Robert Richter Tested-by: Robert Richter Signed-off-by: Robin Murphy Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 7ec30b08b3bd..7ecd1a0b8419 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -312,6 +312,14 @@ enum arm_smmu_implementation { CAVIUM_SMMUV2, }; +/* Until ACPICA headers cover IORT rev. C */ +#ifndef ACPI_IORT_SMMU_CORELINK_MMU401 +#define ACPI_IORT_SMMU_CORELINK_MMU401 0x4 +#endif +#ifndef ACPI_IORT_SMMU_CAVIUM_THUNDERX +#define ACPI_IORT_SMMU_CAVIUM_THUNDERX 0x5 +#endif + struct arm_smmu_s2cr { struct iommu_group *group; int count; @@ -2073,6 +2081,10 @@ static int acpi_smmu_get_data(u32 model, struct arm_smmu_device *smmu) smmu->version = ARM_SMMU_V1; smmu->model = GENERIC_SMMU; break; + case ACPI_IORT_SMMU_CORELINK_MMU401: + smmu->version = ARM_SMMU_V1_64K; + smmu->model = GENERIC_SMMU; + break; case ACPI_IORT_SMMU_V2: smmu->version = ARM_SMMU_V2; smmu->model = GENERIC_SMMU; @@ -2081,6 +2093,10 @@ static int acpi_smmu_get_data(u32 model, struct arm_smmu_device *smmu) smmu->version = ARM_SMMU_V2; smmu->model = ARM_MMU500; break; + case ACPI_IORT_SMMU_CAVIUM_THUNDERX: + smmu->version = ARM_SMMU_V2; + smmu->model = CAVIUM_SMMUV2; + break; default: ret = -ENODEV; } From ebdd13c93f8e878afcdba642f48cd1bd85619e2a Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Thu, 22 Jun 2017 12:51:00 +0530 Subject: [PATCH 04/20] iommu: arm-smmu-v3: make of_device_ids const of_device_ids are not supposed to change at runtime. All functions working with of_device_ids provided by work with const of_device_ids. So mark the non-const structs as const. Signed-off-by: Arvind Yadav Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu-v3.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index 6a06be7626db..0fd09745822f 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -2776,7 +2776,7 @@ static int arm_smmu_device_remove(struct platform_device *pdev) return 0; } -static struct of_device_id arm_smmu_of_match[] = { +static const struct of_device_id arm_smmu_of_match[] = { { .compatible = "arm,smmu-v3", }, { }, }; From 5c2d0218290afa3c335f38583bf4f8e8adad4c76 Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Thu, 22 Jun 2017 12:57:42 +0530 Subject: [PATCH 05/20] iommu: arm-smmu: Handle return of iommu_device_register. iommu_device_register returns an error code and, although it currently never fails, we should check its return value anyway. Signed-off-by: Arvind Yadav [will: adjusted to follow arm-smmu.c] Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu-v3.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index 0fd09745822f..029fe0cffee7 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -2744,6 +2744,10 @@ static int arm_smmu_device_probe(struct platform_device *pdev) iommu_device_set_fwnode(&smmu->iommu, dev->fwnode); ret = iommu_device_register(&smmu->iommu); + if (ret) { + dev_err(dev, "Failed to register iommu\n"); + return ret; + } #ifdef CONFIG_PCI if (pci_bus_type.iommu_ops != &arm_smmu_ops) { From 9db829d2818501f07583542c05d01513b9161e14 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 22 Jun 2017 16:53:50 +0100 Subject: [PATCH 06/20] iommu/io-pgtable-arm-v7s: Check table PTEs more precisely Whilst we don't support the PXN bit at all, so should never encounter a level 1 section or supersection PTE with it set, it would still be wise to check both table type bits to resolve any theoretical ambiguity. Signed-off-by: Robin Murphy Signed-off-by: Will Deacon --- drivers/iommu/io-pgtable-arm-v7s.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c index f8869951610c..46da7aa7c7d0 100644 --- a/drivers/iommu/io-pgtable-arm-v7s.c +++ b/drivers/iommu/io-pgtable-arm-v7s.c @@ -92,7 +92,8 @@ #define ARM_V7S_PTE_TYPE_CONT_PAGE 0x1 #define ARM_V7S_PTE_IS_VALID(pte) (((pte) & 0x3) != 0) -#define ARM_V7S_PTE_IS_TABLE(pte, lvl) (lvl == 1 && ((pte) & ARM_V7S_PTE_TYPE_TABLE)) +#define ARM_V7S_PTE_IS_TABLE(pte, lvl) \ + ((lvl) == 1 && (((pte) & 0x3) == ARM_V7S_PTE_TYPE_TABLE)) /* Page table bits */ #define ARM_V7S_ATTR_XN(lvl) BIT(4 * (2 - (lvl))) From fb3a95795da53d05a4fc5fcdc0d3ec69e7163355 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 22 Jun 2017 16:53:51 +0100 Subject: [PATCH 07/20] iommu/io-pgtable-arm: Improve split_blk_unmap The current split_blk_unmap implementation suffers from some inscrutable pointer trickery for creating the tables to replace the block entry, but more than that it also suffers from hideous inefficiency. For example, the most pathological case of unmapping a level 3 page from a level 1 block will allocate 513 lower-level tables to remap the entire block at page granularity, when only 2 are actually needed (the rest can be covered by level 2 block entries). Also, we would like to be able to relax the spinlock requirement in future, for which the roll-back-and-try-again logic for race resolution would be pretty hideous under the current paradigm. Both issues can be resolved most neatly by turning things sideways: instead of repeatedly recursing into __arm_lpae_map() map to build up an entire new sub-table depth-first, we can directly replace the block entry with a next-level table of block/page entries, then repeat by unmapping at the next level if necessary. With a little refactoring of some helper functions, the code ends up not much bigger than before, but considerably easier to follow and to adapt in future. Signed-off-by: Robin Murphy Signed-off-by: Will Deacon --- drivers/iommu/io-pgtable-arm.c | 118 +++++++++++++++++++-------------- 1 file changed, 69 insertions(+), 49 deletions(-) diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index 6e5df5e0a3bd..dd7477010291 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -264,19 +264,38 @@ static int __arm_lpae_unmap(struct arm_lpae_io_pgtable *data, unsigned long iova, size_t size, int lvl, arm_lpae_iopte *ptep); +static void __arm_lpae_init_pte(struct arm_lpae_io_pgtable *data, + phys_addr_t paddr, arm_lpae_iopte prot, + int lvl, arm_lpae_iopte *ptep) +{ + arm_lpae_iopte pte = prot; + + if (data->iop.cfg.quirks & IO_PGTABLE_QUIRK_ARM_NS) + pte |= ARM_LPAE_PTE_NS; + + if (lvl == ARM_LPAE_MAX_LEVELS - 1) + pte |= ARM_LPAE_PTE_TYPE_PAGE; + else + pte |= ARM_LPAE_PTE_TYPE_BLOCK; + + pte |= ARM_LPAE_PTE_AF | ARM_LPAE_PTE_SH_IS; + pte |= pfn_to_iopte(paddr >> data->pg_shift, data); + + __arm_lpae_set_pte(ptep, pte, &data->iop.cfg); +} + static int arm_lpae_init_pte(struct arm_lpae_io_pgtable *data, unsigned long iova, phys_addr_t paddr, arm_lpae_iopte prot, int lvl, arm_lpae_iopte *ptep) { - arm_lpae_iopte pte = prot; - struct io_pgtable_cfg *cfg = &data->iop.cfg; + arm_lpae_iopte pte = *ptep; - if (iopte_leaf(*ptep, lvl)) { + if (iopte_leaf(pte, lvl)) { /* We require an unmap first */ WARN_ON(!selftest_running); return -EEXIST; - } else if (iopte_type(*ptep, lvl) == ARM_LPAE_PTE_TYPE_TABLE) { + } else if (iopte_type(pte, lvl) == ARM_LPAE_PTE_TYPE_TABLE) { /* * We need to unmap and free the old table before * overwriting it with a block entry. @@ -289,21 +308,24 @@ static int arm_lpae_init_pte(struct arm_lpae_io_pgtable *data, return -EINVAL; } - if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS) - pte |= ARM_LPAE_PTE_NS; - - if (lvl == ARM_LPAE_MAX_LEVELS - 1) - pte |= ARM_LPAE_PTE_TYPE_PAGE; - else - pte |= ARM_LPAE_PTE_TYPE_BLOCK; - - pte |= ARM_LPAE_PTE_AF | ARM_LPAE_PTE_SH_IS; - pte |= pfn_to_iopte(paddr >> data->pg_shift, data); - - __arm_lpae_set_pte(ptep, pte, cfg); + __arm_lpae_init_pte(data, paddr, prot, lvl, ptep); return 0; } +static arm_lpae_iopte arm_lpae_install_table(arm_lpae_iopte *table, + arm_lpae_iopte *ptep, + struct io_pgtable_cfg *cfg) +{ + arm_lpae_iopte new; + + new = __pa(table) | ARM_LPAE_PTE_TYPE_TABLE; + if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS) + new |= ARM_LPAE_PTE_NSTABLE; + + __arm_lpae_set_pte(ptep, new, cfg); + return new; +} + static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova, phys_addr_t paddr, size_t size, arm_lpae_iopte prot, int lvl, arm_lpae_iopte *ptep) @@ -331,10 +353,7 @@ static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova, if (!cptep) return -ENOMEM; - pte = __pa(cptep) | ARM_LPAE_PTE_TYPE_TABLE; - if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS) - pte |= ARM_LPAE_PTE_NSTABLE; - __arm_lpae_set_pte(ptep, pte, cfg); + arm_lpae_install_table(cptep, ptep, cfg); } else if (!iopte_leaf(pte, lvl)) { cptep = iopte_deref(pte, data); } else { @@ -452,40 +471,43 @@ static void arm_lpae_free_pgtable(struct io_pgtable *iop) static int arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data, unsigned long iova, size_t size, - arm_lpae_iopte prot, int lvl, - arm_lpae_iopte *ptep, size_t blk_size) + arm_lpae_iopte blk_pte, int lvl, + arm_lpae_iopte *ptep) { - unsigned long blk_start, blk_end; + struct io_pgtable_cfg *cfg = &data->iop.cfg; + arm_lpae_iopte pte, *tablep; phys_addr_t blk_paddr; - arm_lpae_iopte table = 0; + size_t tablesz = ARM_LPAE_GRANULE(data); + size_t split_sz = ARM_LPAE_BLOCK_SIZE(lvl, data); + int i, unmap_idx = -1; - blk_start = iova & ~(blk_size - 1); - blk_end = blk_start + blk_size; - blk_paddr = iopte_to_pfn(*ptep, data) << data->pg_shift; + if (WARN_ON(lvl == ARM_LPAE_MAX_LEVELS)) + return 0; - for (; blk_start < blk_end; blk_start += size, blk_paddr += size) { - arm_lpae_iopte *tablep; + tablep = __arm_lpae_alloc_pages(tablesz, GFP_ATOMIC, cfg); + if (!tablep) + return 0; /* Bytes unmapped */ + if (size == split_sz) + unmap_idx = ARM_LPAE_LVL_IDX(iova, lvl, data); + + blk_paddr = iopte_to_pfn(blk_pte, data) << data->pg_shift; + pte = iopte_prot(blk_pte); + + for (i = 0; i < tablesz / sizeof(pte); i++, blk_paddr += split_sz) { /* Unmap! */ - if (blk_start == iova) + if (i == unmap_idx) continue; - /* __arm_lpae_map expects a pointer to the start of the table */ - tablep = &table - ARM_LPAE_LVL_IDX(blk_start, lvl, data); - if (__arm_lpae_map(data, blk_start, blk_paddr, size, prot, lvl, - tablep) < 0) { - if (table) { - /* Free the table we allocated */ - tablep = iopte_deref(table, data); - __arm_lpae_free_pgtable(data, lvl + 1, tablep); - } - return 0; /* Bytes unmapped */ - } + __arm_lpae_init_pte(data, blk_paddr, pte, lvl, &tablep[i]); } - __arm_lpae_set_pte(ptep, table, &data->iop.cfg); - iova &= ~(blk_size - 1); - io_pgtable_tlb_add_flush(&data->iop, iova, blk_size, blk_size, true); + arm_lpae_install_table(tablep, ptep, cfg); + + if (unmap_idx < 0) + return __arm_lpae_unmap(data, iova, size, lvl, tablep); + + io_pgtable_tlb_add_flush(&data->iop, iova, size, size, true); return size; } @@ -495,7 +517,6 @@ static int __arm_lpae_unmap(struct arm_lpae_io_pgtable *data, { arm_lpae_iopte pte; struct io_pgtable *iop = &data->iop; - size_t blk_size = ARM_LPAE_BLOCK_SIZE(lvl, data); /* Something went horribly wrong and we ran out of page table */ if (WARN_ON(lvl == ARM_LPAE_MAX_LEVELS)) @@ -507,7 +528,7 @@ static int __arm_lpae_unmap(struct arm_lpae_io_pgtable *data, return 0; /* If the size matches this level, we're in the right place */ - if (size == blk_size) { + if (size == ARM_LPAE_BLOCK_SIZE(lvl, data)) { __arm_lpae_set_pte(ptep, 0, &iop->cfg); if (!iopte_leaf(pte, lvl)) { @@ -527,9 +548,8 @@ static int __arm_lpae_unmap(struct arm_lpae_io_pgtable *data, * Insert a table at the next level to map the old region, * minus the part we want to unmap */ - return arm_lpae_split_blk_unmap(data, iova, size, - iopte_prot(pte), lvl, ptep, - blk_size); + return arm_lpae_split_blk_unmap(data, iova, size, pte, + lvl + 1, ptep); } /* Keep on walkin' */ From b9f1ef30ac2e9942b8628d551f4a21e8cec1415c Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 22 Jun 2017 16:53:52 +0100 Subject: [PATCH 08/20] iommu/io-pgtable-arm-v7s: Refactor split_blk_unmap Whilst the short-descriptor format's split_blk_unmap implementation has no need to be recursive, it followed the pattern of the LPAE version anyway for the sake of consistency. With the latter now reworked for both efficiency and future scalability improvements, tweak the former similarly, not least to make it less obtuse. Signed-off-by: Robin Murphy Signed-off-by: Will Deacon --- drivers/iommu/io-pgtable-arm-v7s.c | 85 ++++++++++++++++-------------- 1 file changed, 45 insertions(+), 40 deletions(-) diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c index 46da7aa7c7d0..dc74631322e4 100644 --- a/drivers/iommu/io-pgtable-arm-v7s.c +++ b/drivers/iommu/io-pgtable-arm-v7s.c @@ -281,6 +281,13 @@ static arm_v7s_iopte arm_v7s_prot_to_pte(int prot, int lvl, else if (prot & IOMMU_CACHE) pte |= ARM_V7S_ATTR_B | ARM_V7S_ATTR_C; + pte |= ARM_V7S_PTE_TYPE_PAGE; + if (lvl == 1 && (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS)) + pte |= ARM_V7S_ATTR_NS_SECTION; + + if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_MTK_4GB) + pte |= ARM_V7S_ATTR_MTK_4GB; + return pte; } @@ -353,7 +360,7 @@ static int arm_v7s_init_pte(struct arm_v7s_io_pgtable *data, int lvl, int num_entries, arm_v7s_iopte *ptep) { struct io_pgtable_cfg *cfg = &data->iop.cfg; - arm_v7s_iopte pte = arm_v7s_prot_to_pte(prot, lvl, cfg); + arm_v7s_iopte pte; int i; for (i = 0; i < num_entries; i++) @@ -375,13 +382,7 @@ static int arm_v7s_init_pte(struct arm_v7s_io_pgtable *data, return -EEXIST; } - pte |= ARM_V7S_PTE_TYPE_PAGE; - if (lvl == 1 && (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS)) - pte |= ARM_V7S_ATTR_NS_SECTION; - - if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_MTK_4GB) - pte |= ARM_V7S_ATTR_MTK_4GB; - + pte = arm_v7s_prot_to_pte(prot, lvl, cfg); if (num_entries > 1) pte = arm_v7s_pte_to_cont(pte, lvl); @@ -391,6 +392,20 @@ static int arm_v7s_init_pte(struct arm_v7s_io_pgtable *data, return 0; } +static arm_v7s_iopte arm_v7s_install_table(arm_v7s_iopte *table, + arm_v7s_iopte *ptep, + struct io_pgtable_cfg *cfg) +{ + arm_v7s_iopte new; + + new = virt_to_phys(table) | ARM_V7S_PTE_TYPE_TABLE; + if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS) + new |= ARM_V7S_ATTR_NS_TABLE; + + __arm_v7s_set_pte(ptep, new, 1, cfg); + return new; +} + static int __arm_v7s_map(struct arm_v7s_io_pgtable *data, unsigned long iova, phys_addr_t paddr, size_t size, int prot, int lvl, arm_v7s_iopte *ptep) @@ -418,11 +433,7 @@ static int __arm_v7s_map(struct arm_v7s_io_pgtable *data, unsigned long iova, if (!cptep) return -ENOMEM; - pte = virt_to_phys(cptep) | ARM_V7S_PTE_TYPE_TABLE; - if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS) - pte |= ARM_V7S_ATTR_NS_TABLE; - - __arm_v7s_set_pte(ptep, pte, 1, cfg); + arm_v7s_install_table(cptep, ptep, cfg); } else if (ARM_V7S_PTE_IS_TABLE(pte, lvl)) { cptep = iopte_deref(pte, lvl); } else { @@ -503,41 +514,35 @@ static void arm_v7s_split_cont(struct arm_v7s_io_pgtable *data, static int arm_v7s_split_blk_unmap(struct arm_v7s_io_pgtable *data, unsigned long iova, size_t size, - arm_v7s_iopte *ptep) + arm_v7s_iopte blk_pte, arm_v7s_iopte *ptep) { - unsigned long blk_start, blk_end, blk_size; - phys_addr_t blk_paddr; - arm_v7s_iopte table = 0; - int prot = arm_v7s_pte_to_prot(*ptep, 1); + struct io_pgtable_cfg *cfg = &data->iop.cfg; + arm_v7s_iopte pte, *tablep; + int i, unmap_idx, num_entries, num_ptes; - blk_size = ARM_V7S_BLOCK_SIZE(1); - blk_start = iova & ARM_V7S_LVL_MASK(1); - blk_end = blk_start + ARM_V7S_BLOCK_SIZE(1); - blk_paddr = *ptep & ARM_V7S_LVL_MASK(1); + tablep = __arm_v7s_alloc_table(2, GFP_ATOMIC, data); + if (!tablep) + return 0; /* Bytes unmapped */ - for (; blk_start < blk_end; blk_start += size, blk_paddr += size) { - arm_v7s_iopte *tablep; + num_ptes = ARM_V7S_PTES_PER_LVL(2); + num_entries = size >> ARM_V7S_LVL_SHIFT(2); + unmap_idx = ARM_V7S_LVL_IDX(iova, 2); + pte = arm_v7s_prot_to_pte(arm_v7s_pte_to_prot(blk_pte, 1), 2, cfg); + if (num_entries > 1) + pte = arm_v7s_pte_to_cont(pte, 2); + + for (i = 0; i < num_ptes; i += num_entries, pte += size) { /* Unmap! */ - if (blk_start == iova) + if (i == unmap_idx) continue; - /* __arm_v7s_map expects a pointer to the start of the table */ - tablep = &table - ARM_V7S_LVL_IDX(blk_start, 1); - if (__arm_v7s_map(data, blk_start, blk_paddr, size, prot, 1, - tablep) < 0) { - if (table) { - /* Free the table we allocated */ - tablep = iopte_deref(table, 1); - __arm_v7s_free_table(tablep, 2, data); - } - return 0; /* Bytes unmapped */ - } + __arm_v7s_set_pte(&tablep[i], pte, num_entries, cfg); } - __arm_v7s_set_pte(ptep, table, 1, &data->iop.cfg); - iova &= ~(blk_size - 1); - io_pgtable_tlb_add_flush(&data->iop, iova, blk_size, blk_size, true); + arm_v7s_install_table(tablep, ptep, cfg); + + io_pgtable_tlb_add_flush(&data->iop, iova, size, size, true); return size; } @@ -594,7 +599,7 @@ static int __arm_v7s_unmap(struct arm_v7s_io_pgtable *data, * Insert a table at the next level to map the old region, * minus the part we want to unmap */ - return arm_v7s_split_blk_unmap(data, iova, size, ptep); + return arm_v7s_split_blk_unmap(data, iova, size, pte[0], ptep); } /* Keep on walkin' */ From 81b3c25218447c65f93adf08b099a322b6803536 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 22 Jun 2017 16:53:53 +0100 Subject: [PATCH 09/20] iommu/io-pgtable: Introduce explicit coherency Once we remove the serialising spinlock, a potential race opens up for non-coherent IOMMUs whereby a caller of .map() can be sure that cache maintenance has been performed on their new PTE, but will have no guarantee that such maintenance for table entries above it has actually completed (e.g. if another CPU took an interrupt immediately after writing the table entry, but before initiating the DMA sync). Handling this race safely will add some potentially non-trivial overhead to installing a table entry, which we would much rather avoid on coherent systems where it will be unnecessary, and where we are stirivng to minimise latency by removing the locking in the first place. To that end, let's introduce an explicit notion of cache-coherency to io-pgtable, such that we will be able to avoid penalising IOMMUs which know enough to know when they are coherent. Signed-off-by: Robin Murphy Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu-v3.c | 3 +++ drivers/iommu/arm-smmu.c | 3 +++ drivers/iommu/io-pgtable-arm-v7s.c | 17 ++++++++++------- drivers/iommu/io-pgtable-arm.c | 11 ++++++----- drivers/iommu/io-pgtable.h | 6 ++++++ 5 files changed, 28 insertions(+), 12 deletions(-) diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index 029fe0cffee7..d50c8d4b9af9 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -1563,6 +1563,9 @@ static int arm_smmu_domain_finalise(struct iommu_domain *domain) .iommu_dev = smmu->dev, }; + if (smmu->features & ARM_SMMU_FEAT_COHERENCY) + pgtbl_cfg.quirks = IO_PGTABLE_QUIRK_NO_DMA; + pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain); if (!pgtbl_ops) return -ENOMEM; diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 7ecd1a0b8419..2bc5df8a490f 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -1018,6 +1018,9 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, .iommu_dev = smmu->dev, }; + if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK) + pgtbl_cfg.quirks = IO_PGTABLE_QUIRK_NO_DMA; + smmu_domain->smmu = smmu; pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain); if (!pgtbl_ops) { diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c index dc74631322e4..ec024c75a09e 100644 --- a/drivers/iommu/io-pgtable-arm-v7s.c +++ b/drivers/iommu/io-pgtable-arm-v7s.c @@ -187,7 +187,8 @@ static arm_v7s_iopte *iopte_deref(arm_v7s_iopte pte, int lvl) static void *__arm_v7s_alloc_table(int lvl, gfp_t gfp, struct arm_v7s_io_pgtable *data) { - struct device *dev = data->iop.cfg.iommu_dev; + struct io_pgtable_cfg *cfg = &data->iop.cfg; + struct device *dev = cfg->iommu_dev; dma_addr_t dma; size_t size = ARM_V7S_TABLE_SIZE(lvl); void *table = NULL; @@ -196,7 +197,7 @@ static void *__arm_v7s_alloc_table(int lvl, gfp_t gfp, table = (void *)__get_dma_pages(__GFP_ZERO, get_order(size)); else if (lvl == 2) table = kmem_cache_zalloc(data->l2_tables, gfp | GFP_DMA); - if (table && !selftest_running) { + if (table && !(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA)) { dma = dma_map_single(dev, table, size, DMA_TO_DEVICE); if (dma_mapping_error(dev, dma)) goto out_free; @@ -225,10 +226,11 @@ out_free: static void __arm_v7s_free_table(void *table, int lvl, struct arm_v7s_io_pgtable *data) { - struct device *dev = data->iop.cfg.iommu_dev; + struct io_pgtable_cfg *cfg = &data->iop.cfg; + struct device *dev = cfg->iommu_dev; size_t size = ARM_V7S_TABLE_SIZE(lvl); - if (!selftest_running) + if (!(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA)) dma_unmap_single(dev, __arm_v7s_dma_addr(table), size, DMA_TO_DEVICE); if (lvl == 1) @@ -240,7 +242,7 @@ static void __arm_v7s_free_table(void *table, int lvl, static void __arm_v7s_pte_sync(arm_v7s_iopte *ptep, int num_entries, struct io_pgtable_cfg *cfg) { - if (selftest_running) + if (!(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA)) return; dma_sync_single_for_device(cfg->iommu_dev, __arm_v7s_dma_addr(ptep), @@ -657,7 +659,8 @@ static struct io_pgtable *arm_v7s_alloc_pgtable(struct io_pgtable_cfg *cfg, if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS | IO_PGTABLE_QUIRK_NO_PERMS | IO_PGTABLE_QUIRK_TLBI_ON_MAP | - IO_PGTABLE_QUIRK_ARM_MTK_4GB)) + IO_PGTABLE_QUIRK_ARM_MTK_4GB | + IO_PGTABLE_QUIRK_NO_DMA)) return NULL; /* If ARM_MTK_4GB is enabled, the NO_PERMS is also expected. */ @@ -774,7 +777,7 @@ static int __init arm_v7s_do_selftests(void) .tlb = &dummy_tlb_ops, .oas = 32, .ias = 32, - .quirks = IO_PGTABLE_QUIRK_ARM_NS, + .quirks = IO_PGTABLE_QUIRK_ARM_NS | IO_PGTABLE_QUIRK_NO_DMA, .pgsize_bitmap = SZ_4K | SZ_64K | SZ_1M | SZ_16M, }; unsigned int iova, size, iova_start; diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index dd7477010291..6334f51912ea 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -217,7 +217,7 @@ static void *__arm_lpae_alloc_pages(size_t size, gfp_t gfp, if (!pages) return NULL; - if (!selftest_running) { + if (!(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA)) { dma = dma_map_single(dev, pages, size, DMA_TO_DEVICE); if (dma_mapping_error(dev, dma)) goto out_free; @@ -243,7 +243,7 @@ out_free: static void __arm_lpae_free_pages(void *pages, size_t size, struct io_pgtable_cfg *cfg) { - if (!selftest_running) + if (!(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA)) dma_unmap_single(cfg->iommu_dev, __arm_lpae_dma_addr(pages), size, DMA_TO_DEVICE); free_pages_exact(pages, size); @@ -254,7 +254,7 @@ static void __arm_lpae_set_pte(arm_lpae_iopte *ptep, arm_lpae_iopte pte, { *ptep = pte; - if (!selftest_running) + if (!(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA)) dma_sync_single_for_device(cfg->iommu_dev, __arm_lpae_dma_addr(ptep), sizeof(pte), DMA_TO_DEVICE); @@ -693,7 +693,7 @@ arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie) u64 reg; struct arm_lpae_io_pgtable *data; - if (cfg->quirks & ~IO_PGTABLE_QUIRK_ARM_NS) + if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS | IO_PGTABLE_QUIRK_NO_DMA)) return NULL; data = arm_lpae_alloc_pgtable(cfg); @@ -782,7 +782,7 @@ arm_64_lpae_alloc_pgtable_s2(struct io_pgtable_cfg *cfg, void *cookie) struct arm_lpae_io_pgtable *data; /* The NS quirk doesn't apply at stage 2 */ - if (cfg->quirks) + if (cfg->quirks & ~IO_PGTABLE_QUIRK_NO_DMA) return NULL; data = arm_lpae_alloc_pgtable(cfg); @@ -1086,6 +1086,7 @@ static int __init arm_lpae_do_selftests(void) struct io_pgtable_cfg cfg = { .tlb = &dummy_tlb_ops, .oas = 48, + .quirks = IO_PGTABLE_QUIRK_NO_DMA, }; for (i = 0; i < ARRAY_SIZE(pgsize); ++i) { diff --git a/drivers/iommu/io-pgtable.h b/drivers/iommu/io-pgtable.h index 969d82cc92ca..524263a7ae6f 100644 --- a/drivers/iommu/io-pgtable.h +++ b/drivers/iommu/io-pgtable.h @@ -65,11 +65,17 @@ struct io_pgtable_cfg { * PTEs, for Mediatek IOMMUs which treat it as a 33rd address bit * when the SoC is in "4GB mode" and they can only access the high * remap of DRAM (0x1_00000000 to 0x1_ffffffff). + * + * IO_PGTABLE_QUIRK_NO_DMA: Guarantees that the tables will only ever + * be accessed by a fully cache-coherent IOMMU or CPU (e.g. for a + * software-emulated IOMMU), such that pagetable updates need not + * be treated as explicit DMA data. */ #define IO_PGTABLE_QUIRK_ARM_NS BIT(0) #define IO_PGTABLE_QUIRK_NO_PERMS BIT(1) #define IO_PGTABLE_QUIRK_TLBI_ON_MAP BIT(2) #define IO_PGTABLE_QUIRK_ARM_MTK_4GB BIT(3) + #define IO_PGTABLE_QUIRK_NO_DMA BIT(4) unsigned long quirks; unsigned long pgsize_bitmap; unsigned int ias; From 2c3d273eabe8b1ed3b3cffe2c79643b1bf7e2d4a Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 22 Jun 2017 16:53:54 +0100 Subject: [PATCH 10/20] iommu/io-pgtable-arm: Support lockless operation For parallel I/O with multiple concurrent threads servicing the same device (or devices, if several share a domain), serialising page table updates becomes a massive bottleneck. On reflection, though, we don't strictly need to do that - for valid IOMMU API usage, there are in fact only two races that we need to guard against: multiple map requests for different blocks within the same region, when the intermediate-level table for that region does not yet exist; and multiple unmaps of different parts of the same block entry. Both of those are fairly easily solved by using a cmpxchg to install the new table, such that if we then find that someone else's table got there first, we can simply free ours and continue. Make the requisite changes such that we can withstand being called without the caller maintaining a lock. In theory, this opens up a few corners in which wildly misbehaving callers making nonsensical overlapping requests might lead to crashes instead of just unpredictable results, but correct code really does not deserve to pay a significant performance cost for the sake of masking bugs in theoretical broken code. Signed-off-by: Robin Murphy Signed-off-by: Will Deacon --- drivers/iommu/io-pgtable-arm.c | 72 +++++++++++++++++++++++++++------- 1 file changed, 57 insertions(+), 15 deletions(-) diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index 6334f51912ea..52700fa958c2 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -20,6 +20,7 @@ #define pr_fmt(fmt) "arm-lpae io-pgtable: " fmt +#include #include #include #include @@ -99,6 +100,8 @@ #define ARM_LPAE_PTE_ATTR_HI_MASK (((arm_lpae_iopte)6) << 52) #define ARM_LPAE_PTE_ATTR_MASK (ARM_LPAE_PTE_ATTR_LO_MASK | \ ARM_LPAE_PTE_ATTR_HI_MASK) +/* Software bit for solving coherency races */ +#define ARM_LPAE_PTE_SW_SYNC (((arm_lpae_iopte)1) << 55) /* Stage-1 PTE */ #define ARM_LPAE_PTE_AP_UNPRIV (((arm_lpae_iopte)1) << 6) @@ -249,15 +252,20 @@ static void __arm_lpae_free_pages(void *pages, size_t size, free_pages_exact(pages, size); } +static void __arm_lpae_sync_pte(arm_lpae_iopte *ptep, + struct io_pgtable_cfg *cfg) +{ + dma_sync_single_for_device(cfg->iommu_dev, __arm_lpae_dma_addr(ptep), + sizeof(*ptep), DMA_TO_DEVICE); +} + static void __arm_lpae_set_pte(arm_lpae_iopte *ptep, arm_lpae_iopte pte, struct io_pgtable_cfg *cfg) { *ptep = pte; if (!(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA)) - dma_sync_single_for_device(cfg->iommu_dev, - __arm_lpae_dma_addr(ptep), - sizeof(pte), DMA_TO_DEVICE); + __arm_lpae_sync_pte(ptep, cfg); } static int __arm_lpae_unmap(struct arm_lpae_io_pgtable *data, @@ -314,16 +322,30 @@ static int arm_lpae_init_pte(struct arm_lpae_io_pgtable *data, static arm_lpae_iopte arm_lpae_install_table(arm_lpae_iopte *table, arm_lpae_iopte *ptep, + arm_lpae_iopte curr, struct io_pgtable_cfg *cfg) { - arm_lpae_iopte new; + arm_lpae_iopte old, new; new = __pa(table) | ARM_LPAE_PTE_TYPE_TABLE; if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS) new |= ARM_LPAE_PTE_NSTABLE; - __arm_lpae_set_pte(ptep, new, cfg); - return new; + /* Ensure the table itself is visible before its PTE can be */ + wmb(); + + old = cmpxchg64_relaxed(ptep, curr, new); + + if ((cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA) || + (old & ARM_LPAE_PTE_SW_SYNC)) + return old; + + /* Even if it's not ours, there's no point waiting; just kick it */ + __arm_lpae_sync_pte(ptep, cfg); + if (old == curr) + WRITE_ONCE(*ptep, new | ARM_LPAE_PTE_SW_SYNC); + + return old; } static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova, @@ -332,6 +354,7 @@ static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova, { arm_lpae_iopte *cptep, pte; size_t block_size = ARM_LPAE_BLOCK_SIZE(lvl, data); + size_t tblsz = ARM_LPAE_GRANULE(data); struct io_pgtable_cfg *cfg = &data->iop.cfg; /* Find our entry at the current level */ @@ -346,17 +369,23 @@ static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova, return -EINVAL; /* Grab a pointer to the next level */ - pte = *ptep; + pte = READ_ONCE(*ptep); if (!pte) { - cptep = __arm_lpae_alloc_pages(ARM_LPAE_GRANULE(data), - GFP_ATOMIC, cfg); + cptep = __arm_lpae_alloc_pages(tblsz, GFP_ATOMIC, cfg); if (!cptep) return -ENOMEM; - arm_lpae_install_table(cptep, ptep, cfg); - } else if (!iopte_leaf(pte, lvl)) { + pte = arm_lpae_install_table(cptep, ptep, 0, cfg); + if (pte) + __arm_lpae_free_pages(cptep, tblsz, cfg); + } else if (!(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA) && + !(pte & ARM_LPAE_PTE_SW_SYNC)) { + __arm_lpae_sync_pte(ptep, cfg); + } + + if (pte && !iopte_leaf(pte, lvl)) { cptep = iopte_deref(pte, data); - } else { + } else if (pte) { /* We require an unmap first */ WARN_ON(!selftest_running); return -EEXIST; @@ -502,7 +531,19 @@ static int arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data, __arm_lpae_init_pte(data, blk_paddr, pte, lvl, &tablep[i]); } - arm_lpae_install_table(tablep, ptep, cfg); + pte = arm_lpae_install_table(tablep, ptep, blk_pte, cfg); + if (pte != blk_pte) { + __arm_lpae_free_pages(tablep, tablesz, cfg); + /* + * We may race against someone unmapping another part of this + * block, but anything else is invalid. We can't misinterpret + * a page entry here since we're never at the last level. + */ + if (iopte_type(pte, lvl - 1) != ARM_LPAE_PTE_TYPE_TABLE) + return 0; + + tablep = iopte_deref(pte, data); + } if (unmap_idx < 0) return __arm_lpae_unmap(data, iova, size, lvl, tablep); @@ -523,7 +564,7 @@ static int __arm_lpae_unmap(struct arm_lpae_io_pgtable *data, return 0; ptep += ARM_LPAE_LVL_IDX(iova, lvl, data); - pte = *ptep; + pte = READ_ONCE(*ptep); if (WARN_ON(!pte)) return 0; @@ -585,7 +626,8 @@ static phys_addr_t arm_lpae_iova_to_phys(struct io_pgtable_ops *ops, return 0; /* Grab the IOPTE we're interested in */ - pte = *(ptep + ARM_LPAE_LVL_IDX(iova, lvl, data)); + ptep += ARM_LPAE_LVL_IDX(iova, lvl, data); + pte = READ_ONCE(*ptep); /* Valid entry? */ if (!pte) From 119ff305b02793dc31eb1e921b85925ce10dc0a1 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 22 Jun 2017 16:53:55 +0100 Subject: [PATCH 11/20] iommu/io-pgtable-arm-v7s: Support lockless operation Mirroring the LPAE implementation, rework the v7s code to be robust against concurrent operations. The same two potential races exist, and are solved in the same manner, with the fixed 2-level structure making life ever so slightly simpler. What complicates matters compared to LPAE, however, is large page entries, since we can't update a block of 16 PTEs atomically, nor assume available software bits to do clever things with. As most users are never likely to do partial unmaps anyway (due to DMA API rules), it doesn't seem unreasonable for this case to remain behind a serialising lock; we just pull said lock down into the bowels of the implementation so it's well out of the way of the normal call paths. Signed-off-by: Robin Murphy Signed-off-by: Will Deacon --- drivers/iommu/io-pgtable-arm-v7s.c | 84 ++++++++++++++++++++++-------- 1 file changed, 63 insertions(+), 21 deletions(-) diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c index ec024c75a09e..690629457565 100644 --- a/drivers/iommu/io-pgtable-arm-v7s.c +++ b/drivers/iommu/io-pgtable-arm-v7s.c @@ -32,6 +32,7 @@ #define pr_fmt(fmt) "arm-v7s io-pgtable: " fmt +#include #include #include #include @@ -39,6 +40,7 @@ #include #include #include +#include #include #include @@ -168,6 +170,7 @@ struct arm_v7s_io_pgtable { arm_v7s_iopte *pgd; struct kmem_cache *l2_tables; + spinlock_t split_lock; }; static dma_addr_t __arm_v7s_dma_addr(void *pages) @@ -396,16 +399,22 @@ static int arm_v7s_init_pte(struct arm_v7s_io_pgtable *data, static arm_v7s_iopte arm_v7s_install_table(arm_v7s_iopte *table, arm_v7s_iopte *ptep, + arm_v7s_iopte curr, struct io_pgtable_cfg *cfg) { - arm_v7s_iopte new; + arm_v7s_iopte old, new; new = virt_to_phys(table) | ARM_V7S_PTE_TYPE_TABLE; if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS) new |= ARM_V7S_ATTR_NS_TABLE; - __arm_v7s_set_pte(ptep, new, 1, cfg); - return new; + /* Ensure the table itself is visible before its PTE can be */ + wmb(); + + old = cmpxchg_relaxed(ptep, curr, new); + __arm_v7s_pte_sync(ptep, 1, cfg); + + return old; } static int __arm_v7s_map(struct arm_v7s_io_pgtable *data, unsigned long iova, @@ -429,16 +438,23 @@ static int __arm_v7s_map(struct arm_v7s_io_pgtable *data, unsigned long iova, return -EINVAL; /* Grab a pointer to the next level */ - pte = *ptep; + pte = READ_ONCE(*ptep); if (!pte) { cptep = __arm_v7s_alloc_table(lvl + 1, GFP_ATOMIC, data); if (!cptep) return -ENOMEM; - arm_v7s_install_table(cptep, ptep, cfg); - } else if (ARM_V7S_PTE_IS_TABLE(pte, lvl)) { - cptep = iopte_deref(pte, lvl); + pte = arm_v7s_install_table(cptep, ptep, 0, cfg); + if (pte) + __arm_v7s_free_table(cptep, lvl + 1, data); } else { + /* We've no easy way of knowing if it's synced yet, so... */ + __arm_v7s_pte_sync(ptep, 1, cfg); + } + + if (ARM_V7S_PTE_IS_TABLE(pte, lvl)) { + cptep = iopte_deref(pte, lvl); + } else if (pte) { /* We require an unmap first */ WARN_ON(!selftest_running); return -EEXIST; @@ -491,27 +507,31 @@ static void arm_v7s_free_pgtable(struct io_pgtable *iop) kfree(data); } -static void arm_v7s_split_cont(struct arm_v7s_io_pgtable *data, - unsigned long iova, int idx, int lvl, - arm_v7s_iopte *ptep) +static arm_v7s_iopte arm_v7s_split_cont(struct arm_v7s_io_pgtable *data, + unsigned long iova, int idx, int lvl, + arm_v7s_iopte *ptep) { struct io_pgtable *iop = &data->iop; arm_v7s_iopte pte; size_t size = ARM_V7S_BLOCK_SIZE(lvl); int i; + /* Check that we didn't lose a race to get the lock */ + pte = *ptep; + if (!arm_v7s_pte_is_cont(pte, lvl)) + return pte; + ptep -= idx & (ARM_V7S_CONT_PAGES - 1); - pte = arm_v7s_cont_to_pte(*ptep, lvl); - for (i = 0; i < ARM_V7S_CONT_PAGES; i++) { - ptep[i] = pte; - pte += size; - } + pte = arm_v7s_cont_to_pte(pte, lvl); + for (i = 0; i < ARM_V7S_CONT_PAGES; i++) + ptep[i] = pte + i * size; __arm_v7s_pte_sync(ptep, ARM_V7S_CONT_PAGES, &iop->cfg); size *= ARM_V7S_CONT_PAGES; io_pgtable_tlb_add_flush(iop, iova, size, size, true); io_pgtable_tlb_sync(iop); + return pte; } static int arm_v7s_split_blk_unmap(struct arm_v7s_io_pgtable *data, @@ -542,7 +562,16 @@ static int arm_v7s_split_blk_unmap(struct arm_v7s_io_pgtable *data, __arm_v7s_set_pte(&tablep[i], pte, num_entries, cfg); } - arm_v7s_install_table(tablep, ptep, cfg); + pte = arm_v7s_install_table(tablep, ptep, blk_pte, cfg); + if (pte != blk_pte) { + __arm_v7s_free_table(tablep, 2, data); + + if (!ARM_V7S_PTE_IS_TABLE(pte, 1)) + return 0; + + tablep = iopte_deref(pte, 1); + return __arm_v7s_unmap(data, iova, size, 2, tablep); + } io_pgtable_tlb_add_flush(&data->iop, iova, size, size, true); return size; @@ -563,17 +592,28 @@ static int __arm_v7s_unmap(struct arm_v7s_io_pgtable *data, idx = ARM_V7S_LVL_IDX(iova, lvl); ptep += idx; do { - if (WARN_ON(!ARM_V7S_PTE_IS_VALID(ptep[i]))) + pte[i] = READ_ONCE(ptep[i]); + if (WARN_ON(!ARM_V7S_PTE_IS_VALID(pte[i]))) return 0; - pte[i] = ptep[i]; } while (++i < num_entries); /* * If we've hit a contiguous 'large page' entry at this level, it * needs splitting first, unless we're unmapping the whole lot. + * + * For splitting, we can't rewrite 16 PTEs atomically, and since we + * can't necessarily assume TEX remap we don't have a software bit to + * mark live entries being split. In practice (i.e. DMA API code), we + * will never be splitting large pages anyway, so just wrap this edge + * case in a lock for the sake of correctness and be done with it. */ - if (num_entries <= 1 && arm_v7s_pte_is_cont(pte[0], lvl)) - arm_v7s_split_cont(data, iova, idx, lvl, ptep); + if (num_entries <= 1 && arm_v7s_pte_is_cont(pte[0], lvl)) { + unsigned long flags; + + spin_lock_irqsave(&data->split_lock, flags); + pte[0] = arm_v7s_split_cont(data, iova, idx, lvl, ptep); + spin_unlock_irqrestore(&data->split_lock, flags); + } /* If the size matches this level, we're in the right place */ if (num_entries) { @@ -631,7 +671,8 @@ static phys_addr_t arm_v7s_iova_to_phys(struct io_pgtable_ops *ops, u32 mask; do { - pte = ptep[ARM_V7S_LVL_IDX(iova, ++lvl)]; + ptep += ARM_V7S_LVL_IDX(iova, ++lvl); + pte = READ_ONCE(*ptep); ptep = iopte_deref(pte, lvl); } while (ARM_V7S_PTE_IS_TABLE(pte, lvl)); @@ -672,6 +713,7 @@ static struct io_pgtable *arm_v7s_alloc_pgtable(struct io_pgtable_cfg *cfg, if (!data) return NULL; + spin_lock_init(&data->split_lock); data->l2_tables = kmem_cache_create("io-pgtable_armv7s_l2", ARM_V7S_TABLE_SIZE(2), ARM_V7S_TABLE_SIZE(2), From 523d7423e21bfe78452a640878d1de189ac45d91 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 22 Jun 2017 16:53:56 +0100 Subject: [PATCH 12/20] iommu/arm-smmu: Remove io-pgtable spinlock With the io-pgtable code now robust against (valid) races, we no longer need to serialise all operations with a lock. This might make broken callers who issue concurrent operations on overlapping addresses go even more wrong than before, but hey, they already had little hope of useful or deterministic results. We do however still have to keep a lock around to serialise the ATS1* translation ops, as parallel iova_to_phys() calls could lead to unpredictable hardware behaviour otherwise. Signed-off-by: Robin Murphy Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu.c | 45 +++++++++++++--------------------------- 1 file changed, 14 insertions(+), 31 deletions(-) diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 2bc5df8a490f..bc89b4d6c043 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -433,10 +433,10 @@ enum arm_smmu_domain_stage { struct arm_smmu_domain { struct arm_smmu_device *smmu; struct io_pgtable_ops *pgtbl_ops; - spinlock_t pgtbl_lock; struct arm_smmu_cfg cfg; enum arm_smmu_domain_stage stage; struct mutex init_mutex; /* Protects smmu pointer */ + spinlock_t cb_lock; /* Serialises ATS1* ops */ struct iommu_domain domain; }; @@ -1113,7 +1113,7 @@ static struct iommu_domain *arm_smmu_domain_alloc(unsigned type) } mutex_init(&smmu_domain->init_mutex); - spin_lock_init(&smmu_domain->pgtbl_lock); + spin_lock_init(&smmu_domain->cb_lock); return &smmu_domain->domain; } @@ -1391,35 +1391,23 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova, phys_addr_t paddr, size_t size, int prot) { - int ret; - unsigned long flags; - struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); - struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops; + struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops; if (!ops) return -ENODEV; - spin_lock_irqsave(&smmu_domain->pgtbl_lock, flags); - ret = ops->map(ops, iova, paddr, size, prot); - spin_unlock_irqrestore(&smmu_domain->pgtbl_lock, flags); - return ret; + return ops->map(ops, iova, paddr, size, prot); } static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size) { - size_t ret; - unsigned long flags; - struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); - struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops; + struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops; if (!ops) return 0; - spin_lock_irqsave(&smmu_domain->pgtbl_lock, flags); - ret = ops->unmap(ops, iova, size); - spin_unlock_irqrestore(&smmu_domain->pgtbl_lock, flags); - return ret; + return ops->unmap(ops, iova, size); } static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain, @@ -1433,10 +1421,11 @@ static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain, void __iomem *cb_base; u32 tmp; u64 phys; - unsigned long va; + unsigned long va, flags; cb_base = ARM_SMMU_CB(smmu, cfg->cbndx); + spin_lock_irqsave(&smmu_domain->cb_lock, flags); /* ATS1 registers can only be written atomically */ va = iova & ~0xfffUL; if (smmu->version == ARM_SMMU_V2) @@ -1446,6 +1435,7 @@ static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain, if (readl_poll_timeout_atomic(cb_base + ARM_SMMU_CB_ATSR, tmp, !(tmp & ATSR_ACTIVE), 5, 50)) { + spin_unlock_irqrestore(&smmu_domain->cb_lock, flags); dev_err(dev, "iova to phys timed out on %pad. Falling back to software table walk.\n", &iova); @@ -1453,6 +1443,7 @@ static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain, } phys = readq_relaxed(cb_base + ARM_SMMU_CB_PAR); + spin_unlock_irqrestore(&smmu_domain->cb_lock, flags); if (phys & CB_PAR_F) { dev_err(dev, "translation fault!\n"); dev_err(dev, "PAR = 0x%llx\n", phys); @@ -1465,10 +1456,8 @@ static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain, static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova) { - phys_addr_t ret; - unsigned long flags; struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); - struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops; + struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops; if (domain->type == IOMMU_DOMAIN_IDENTITY) return iova; @@ -1476,17 +1465,11 @@ static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain, if (!ops) return 0; - spin_lock_irqsave(&smmu_domain->pgtbl_lock, flags); if (smmu_domain->smmu->features & ARM_SMMU_FEAT_TRANS_OPS && - smmu_domain->stage == ARM_SMMU_DOMAIN_S1) { - ret = arm_smmu_iova_to_phys_hard(domain, iova); - } else { - ret = ops->iova_to_phys(ops, iova); - } + smmu_domain->stage == ARM_SMMU_DOMAIN_S1) + return arm_smmu_iova_to_phys_hard(domain, iova); - spin_unlock_irqrestore(&smmu_domain->pgtbl_lock, flags); - - return ret; + return ops->iova_to_phys(ops, iova); } static bool arm_smmu_capable(enum iommu_cap cap) From 58188afeb727e0f73706f1460707bd3ba6ccc221 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 22 Jun 2017 16:53:57 +0100 Subject: [PATCH 13/20] iommu/arm-smmu-v3: Remove io-pgtable spinlock As for SMMUv2, take advantage of io-pgtable's newfound tolerance for concurrency. Unfortunately in this case the command queue lock remains a point of serialisation for the unmap path, but there may be a little more we can do to ameliorate that in future. Signed-off-by: Robin Murphy Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu-v3.c | 33 ++++++--------------------------- 1 file changed, 6 insertions(+), 27 deletions(-) diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index d50c8d4b9af9..884b1a49a52a 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -646,7 +646,6 @@ struct arm_smmu_domain { struct mutex init_mutex; /* Protects smmu pointer */ struct io_pgtable_ops *pgtbl_ops; - spinlock_t pgtbl_lock; enum arm_smmu_domain_stage stage; union { @@ -1414,7 +1413,6 @@ static struct iommu_domain *arm_smmu_domain_alloc(unsigned type) } mutex_init(&smmu_domain->init_mutex); - spin_lock_init(&smmu_domain->pgtbl_lock); return &smmu_domain->domain; } @@ -1686,44 +1684,29 @@ out_unlock: static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova, phys_addr_t paddr, size_t size, int prot) { - int ret; - unsigned long flags; - struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); - struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops; + struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops; if (!ops) return -ENODEV; - spin_lock_irqsave(&smmu_domain->pgtbl_lock, flags); - ret = ops->map(ops, iova, paddr, size, prot); - spin_unlock_irqrestore(&smmu_domain->pgtbl_lock, flags); - return ret; + return ops->map(ops, iova, paddr, size, prot); } static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size) { - size_t ret; - unsigned long flags; - struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); - struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops; + struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops; if (!ops) return 0; - spin_lock_irqsave(&smmu_domain->pgtbl_lock, flags); - ret = ops->unmap(ops, iova, size); - spin_unlock_irqrestore(&smmu_domain->pgtbl_lock, flags); - return ret; + return ops->unmap(ops, iova, size); } static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova) { - phys_addr_t ret; - unsigned long flags; - struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); - struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops; + struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops; if (domain->type == IOMMU_DOMAIN_IDENTITY) return iova; @@ -1731,11 +1714,7 @@ arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova) if (!ops) return 0; - spin_lock_irqsave(&smmu_domain->pgtbl_lock, flags); - ret = ops->iova_to_phys(ops, iova); - spin_unlock_irqrestore(&smmu_domain->pgtbl_lock, flags); - - return ret; + return ops->iova_to_phys(ops, iova); } static struct platform_driver arm_smmu_driver; From c1004803b40596c1aabbbc78a6b1b33e4dfd96c6 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 23 Jun 2017 11:45:57 +0100 Subject: [PATCH 14/20] iommu/io-pgtable: depend on !GENERIC_ATOMIC64 when using COMPILE_TEST with LPAE The LPAE/ARMv8 page table format relies on the ability to read and write 64-bit page table entries in an atomic fashion. With the move to a lockless implementation, we also need support for cmpxchg64 to resolve races when installing table entries concurrently. Unfortunately, not all architectures support cmpxchg64, so the code can fail to compiler when building for these architectures using COMPILE_TEST. Rather than disable COMPILE_TEST altogether, instead check that GENERIC_ATOMIC64 is not selected, which is a reasonable indication that the architecture has support for 64-bit cmpxchg. Reported-by: kbuild test robot Signed-off-by: Will Deacon --- drivers/iommu/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index 6ee3a25ae731..c88cfa7522b2 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -23,7 +23,7 @@ config IOMMU_IO_PGTABLE config IOMMU_IO_PGTABLE_LPAE bool "ARMv7/v8 Long Descriptor Format" select IOMMU_IO_PGTABLE - depends on HAS_DMA && (ARM || ARM64 || COMPILE_TEST) + depends on HAS_DMA && (ARM || ARM64 || (COMPILE_TEST && !GENERIC_ATOMIC64)) help Enable support for the ARM long descriptor pagetable format. This allocator supports 4K/2M/1G, 16K/32M and 64K/512M page From 77f3445866c39d8866b31d8d9fa47c7c20938e05 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 23 Jun 2017 12:02:38 +0100 Subject: [PATCH 15/20] iommu/io-pgtable-arm: Use dma_wmb() instead of wmb() when publishing table When writing a new table entry, we must ensure that the contents of the table is made visible to the SMMU page table walker before the updated table entry itself. This is currently achieved using wmb(), which expands to an expensive and unnecessary DSB instruction. Ideally, we'd just use cmpxchg64_release when writing the table entry, but this doesn't have memory ordering semantics on !SMP systems. Instead, use dma_wmb(), which emits DMB OSHST. Strictly speaking, this does more than we require (since it targets the outer-shareable domain), but it's likely to be significantly faster than the DSB approach. Reported-by: Linu Cherian Suggested-by: Robin Murphy Signed-off-by: Will Deacon --- drivers/iommu/io-pgtable-arm-v7s.c | 8 ++++++-- drivers/iommu/io-pgtable-arm.c | 8 ++++++-- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c index 690629457565..af330f513653 100644 --- a/drivers/iommu/io-pgtable-arm-v7s.c +++ b/drivers/iommu/io-pgtable-arm-v7s.c @@ -408,8 +408,12 @@ static arm_v7s_iopte arm_v7s_install_table(arm_v7s_iopte *table, if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS) new |= ARM_V7S_ATTR_NS_TABLE; - /* Ensure the table itself is visible before its PTE can be */ - wmb(); + /* + * Ensure the table itself is visible before its PTE can be. + * Whilst we could get away with cmpxchg64_release below, this + * doesn't have any ordering semantics when !CONFIG_SMP. + */ + dma_wmb(); old = cmpxchg_relaxed(ptep, curr, new); __arm_v7s_pte_sync(ptep, 1, cfg); diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index 52700fa958c2..b182039862c5 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -331,8 +331,12 @@ static arm_lpae_iopte arm_lpae_install_table(arm_lpae_iopte *table, if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS) new |= ARM_LPAE_PTE_NSTABLE; - /* Ensure the table itself is visible before its PTE can be */ - wmb(); + /* + * Ensure the table itself is visible before its PTE can be. + * Whilst we could get away with cmpxchg64_release below, this + * doesn't have any ordering semantics when !CONFIG_SMP. + */ + dma_wmb(); old = cmpxchg64_relaxed(ptep, curr, new); From 12275bf0a4deb690a5dc9903d207060737b7bae6 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Thu, 22 Jun 2017 21:20:54 +0200 Subject: [PATCH 16/20] iommu/arm-smmu-v3, acpi: Add temporary Cavium SMMU-V3 IORT model number definitions The model number is already defined in acpica and we are actually waiting for the acpi maintainers to include it: https://github.com/acpica/acpica/commit/d00a4eb86e64 Adding those temporary definitions until the change makes it into include/acpi/actbl2.h. Once that is done this patch can be reverted. Acked-by: Lorenzo Pieralisi Signed-off-by: Robert Richter Signed-off-by: Will Deacon --- drivers/acpi/arm64/iort.c | 5 +++++ drivers/iommu/arm-smmu-v3.c | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c index c5fecf97ee2f..da225710b009 100644 --- a/drivers/acpi/arm64/iort.c +++ b/drivers/acpi/arm64/iort.c @@ -31,6 +31,11 @@ #define IORT_IOMMU_TYPE ((1 << ACPI_IORT_NODE_SMMU) | \ (1 << ACPI_IORT_NODE_SMMU_V3)) +/* Until ACPICA headers cover IORT rev. C */ +#ifndef ACPI_IORT_SMMU_V3_CAVIUM_CN99XX +#define ACPI_IORT_SMMU_V3_CAVIUM_CN99XX 0x2 +#endif + struct iort_its_msi_chip { struct list_head list; struct fwnode_handle *fw_node; diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index 884b1a49a52a..da481d53e09b 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -413,6 +413,11 @@ #define MSI_IOVA_BASE 0x8000000 #define MSI_IOVA_LENGTH 0x100000 +/* Until ACPICA headers cover IORT rev. C */ +#ifndef ACPI_IORT_SMMU_V3_CAVIUM_CN99XX +#define ACPI_IORT_SMMU_V3_CAVIUM_CN99XX 0x2 +#endif + static bool disable_bypass; module_param_named(disable_bypass, disable_bypass, bool, S_IRUGO); MODULE_PARM_DESC(disable_bypass, From 403e8c7c5bcaff3291a2c7012fe80f707a854d10 Mon Sep 17 00:00:00 2001 From: Linu Cherian Date: Thu, 22 Jun 2017 17:35:36 +0530 Subject: [PATCH 17/20] ACPI/IORT: Fixup SMMUv3 resource size for Cavium ThunderX2 SMMUv3 model Cavium ThunderX2 implementation doesn't support second page in SMMU register space. Hence, resource size is set as 64k for this model. Signed-off-by: Linu Cherian Signed-off-by: Geetha Sowjanya Signed-off-by: Will Deacon --- drivers/acpi/arm64/iort.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c index da225710b009..a8ebda9f7e97 100644 --- a/drivers/acpi/arm64/iort.c +++ b/drivers/acpi/arm64/iort.c @@ -833,6 +833,18 @@ static int __init arm_smmu_v3_count_resources(struct acpi_iort_node *node) return num_res; } +static unsigned long arm_smmu_v3_resource_size(struct acpi_iort_smmu_v3 *smmu) +{ + /* + * Override the size, for Cavium ThunderX2 implementation + * which doesn't support the page 1 SMMU register space. + */ + if (smmu->model == ACPI_IORT_SMMU_V3_CAVIUM_CN99XX) + return SZ_64K; + + return SZ_128K; +} + static void __init arm_smmu_v3_init_resources(struct resource *res, struct acpi_iort_node *node) { @@ -843,7 +855,8 @@ static void __init arm_smmu_v3_init_resources(struct resource *res, smmu = (struct acpi_iort_smmu_v3 *)node->node_data; res[num_res].start = smmu->base_address; - res[num_res].end = smmu->base_address + SZ_128K - 1; + res[num_res].end = smmu->base_address + + arm_smmu_v3_resource_size(smmu) - 1; res[num_res].flags = IORESOURCE_MEM; num_res++; From e5b829de053d9994dfc8652ce558e90e3406c578 Mon Sep 17 00:00:00 2001 From: Linu Cherian Date: Thu, 22 Jun 2017 17:35:37 +0530 Subject: [PATCH 18/20] iommu/arm-smmu-v3: Add workaround for Cavium ThunderX2 erratum #74 Cavium ThunderX2 SMMU implementation doesn't support page 1 register space and PAGE0_REGS_ONLY option is enabled as an errata workaround. This option when turned on, replaces all page 1 offsets used for EVTQ_PROD/CONS, PRIQ_PROD/CONS register access with page 0 offsets. SMMU resource size checks are now based on SMMU option PAGE0_REGS_ONLY, since resource size can be either 64k/128k. For this, arm_smmu_device_dt_probe/acpi_probe has been moved before platform_get_resource call, so that SMMU options are set beforehand. Signed-off-by: Linu Cherian Signed-off-by: Geetha Sowjanya Signed-off-by: Will Deacon --- Documentation/arm64/silicon-errata.txt | 1 + .../devicetree/bindings/iommu/arm,smmu-v3.txt | 6 ++ drivers/iommu/arm-smmu-v3.c | 68 ++++++++++++++----- 3 files changed, 57 insertions(+), 18 deletions(-) diff --git a/Documentation/arm64/silicon-errata.txt b/Documentation/arm64/silicon-errata.txt index 10f2dddbf449..4693a328947a 100644 --- a/Documentation/arm64/silicon-errata.txt +++ b/Documentation/arm64/silicon-errata.txt @@ -62,6 +62,7 @@ stable kernels. | Cavium | ThunderX GICv3 | #23154 | CAVIUM_ERRATUM_23154 | | Cavium | ThunderX Core | #27456 | CAVIUM_ERRATUM_27456 | | Cavium | ThunderX SMMUv2 | #27704 | N/A | +| Cavium | ThunderX2 SMMUv3| #74 | N/A | | | | | | | Freescale/NXP | LS2080A/LS1043A | A-008585 | FSL_ERRATUM_A008585 | | | | | | diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt b/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt index be57550e14e4..e7855cf6038e 100644 --- a/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt +++ b/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt @@ -49,6 +49,12 @@ the PCIe specification. - hisilicon,broken-prefetch-cmd : Avoid sending CMD_PREFETCH_* commands to the SMMU. +- cavium,cn9900-broken-page1-regspace + : Replaces all page 1 offsets used for EVTQ_PROD/CONS, + PRIQ_PROD/CONS register access with page 0 offsets. + Set for Cavium ThunderX2 silicon that doesn't support + SMMU page1 register space. + ** Example smmu@2b400000 { diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index da481d53e09b..2d5b48b4260a 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -603,6 +603,7 @@ struct arm_smmu_device { u32 features; #define ARM_SMMU_OPT_SKIP_PREFETCH (1 << 0) +#define ARM_SMMU_OPT_PAGE0_REGS_ONLY (1 << 1) u32 options; struct arm_smmu_cmdq cmdq; @@ -668,9 +669,20 @@ struct arm_smmu_option_prop { static struct arm_smmu_option_prop arm_smmu_options[] = { { ARM_SMMU_OPT_SKIP_PREFETCH, "hisilicon,broken-prefetch-cmd" }, + { ARM_SMMU_OPT_PAGE0_REGS_ONLY, "cavium,cn9900-broken-page1-regspace"}, { 0, NULL}, }; +static inline void __iomem *arm_smmu_page1_fixup(unsigned long offset, + struct arm_smmu_device *smmu) +{ + if ((offset > SZ_64K) && + (smmu->options & ARM_SMMU_OPT_PAGE0_REGS_ONLY)) + offset -= SZ_64K; + + return smmu->base + offset; +} + static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom) { return container_of(dom, struct arm_smmu_domain, domain); @@ -1956,8 +1968,8 @@ static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu, return -ENOMEM; } - q->prod_reg = smmu->base + prod_off; - q->cons_reg = smmu->base + cons_off; + q->prod_reg = arm_smmu_page1_fixup(prod_off, smmu); + q->cons_reg = arm_smmu_page1_fixup(cons_off, smmu); q->ent_dwords = dwords; q->q_base = Q_BASE_RWA; @@ -2358,8 +2370,10 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass) /* Event queue */ writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE); - writel_relaxed(smmu->evtq.q.prod, smmu->base + ARM_SMMU_EVTQ_PROD); - writel_relaxed(smmu->evtq.q.cons, smmu->base + ARM_SMMU_EVTQ_CONS); + writel_relaxed(smmu->evtq.q.prod, + arm_smmu_page1_fixup(ARM_SMMU_EVTQ_PROD, smmu)); + writel_relaxed(smmu->evtq.q.cons, + arm_smmu_page1_fixup(ARM_SMMU_EVTQ_CONS, smmu)); enables |= CR0_EVTQEN; ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0, @@ -2374,9 +2388,9 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass) writeq_relaxed(smmu->priq.q.q_base, smmu->base + ARM_SMMU_PRIQ_BASE); writel_relaxed(smmu->priq.q.prod, - smmu->base + ARM_SMMU_PRIQ_PROD); + arm_smmu_page1_fixup(ARM_SMMU_PRIQ_PROD, smmu)); writel_relaxed(smmu->priq.q.cons, - smmu->base + ARM_SMMU_PRIQ_CONS); + arm_smmu_page1_fixup(ARM_SMMU_PRIQ_CONS, smmu)); enables |= CR0_PRIQEN; ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0, @@ -2600,6 +2614,14 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu) } #ifdef CONFIG_ACPI +static void acpi_smmu_get_options(u32 model, struct arm_smmu_device *smmu) +{ + if (model == ACPI_IORT_SMMU_V3_CAVIUM_CN99XX) + smmu->options |= ARM_SMMU_OPT_PAGE0_REGS_ONLY; + + dev_notice(smmu->dev, "option mask 0x%x\n", smmu->options); +} + static int arm_smmu_device_acpi_probe(struct platform_device *pdev, struct arm_smmu_device *smmu) { @@ -2612,6 +2634,8 @@ static int arm_smmu_device_acpi_probe(struct platform_device *pdev, /* Retrieve SMMUv3 specific data */ iort_smmu = (struct acpi_iort_smmu_v3 *)node->node_data; + acpi_smmu_get_options(iort_smmu->model, smmu); + if (iort_smmu->flags & ACPI_IORT_SMMU_V3_COHACC_OVERRIDE) smmu->features |= ARM_SMMU_FEAT_COHERENCY; @@ -2647,6 +2671,14 @@ static int arm_smmu_device_dt_probe(struct platform_device *pdev, return ret; } +static unsigned long arm_smmu_resource_size(struct arm_smmu_device *smmu) +{ + if (smmu->options & ARM_SMMU_OPT_PAGE0_REGS_ONLY) + return SZ_64K; + else + return SZ_128K; +} + static int arm_smmu_device_probe(struct platform_device *pdev) { int irq, ret; @@ -2663,9 +2695,20 @@ static int arm_smmu_device_probe(struct platform_device *pdev) } smmu->dev = dev; + if (dev->of_node) { + ret = arm_smmu_device_dt_probe(pdev, smmu); + } else { + ret = arm_smmu_device_acpi_probe(pdev, smmu); + if (ret == -ENODEV) + return ret; + } + + /* Set bypass mode according to firmware probing result */ + bypass = !!ret; + /* Base address */ res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (resource_size(res) + 1 < SZ_128K) { + if (resource_size(res) + 1 < arm_smmu_resource_size(smmu)) { dev_err(dev, "MMIO region too small (%pr)\n", res); return -EINVAL; } @@ -2692,17 +2735,6 @@ static int arm_smmu_device_probe(struct platform_device *pdev) if (irq > 0) smmu->gerr_irq = irq; - if (dev->of_node) { - ret = arm_smmu_device_dt_probe(pdev, smmu); - } else { - ret = arm_smmu_device_acpi_probe(pdev, smmu); - if (ret == -ENODEV) - return ret; - } - - /* Set bypass mode according to firmware probing result */ - bypass = !!ret; - /* Probe the h/w */ ret = arm_smmu_device_hw_probe(smmu); if (ret) From 99caf177f6fd3e67575f6ce05b36e8e041bcef60 Mon Sep 17 00:00:00 2001 From: shameer Date: Wed, 17 May 2017 10:12:05 +0100 Subject: [PATCH 19/20] iommu/arm-smmu-v3: Enable ACPI based HiSilicon CMD_PREFETCH quirk(erratum 161010701) HiSilicon SMMUv3 on Hip06/Hip07 platforms doesn't support CMD_PREFETCH command. The dt based support for this quirk is already present in the driver(hisilicon,broken-prefetch-cmd). This adds ACPI support for the quirk using the IORT smmu model number. Signed-off-by: shameer Signed-off-by: hanjun [will: rewrote patch] Signed-off-by: Will Deacon --- Documentation/arm64/silicon-errata.txt | 1 + drivers/iommu/arm-smmu-v3.c | 12 +++++++++++- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/Documentation/arm64/silicon-errata.txt b/Documentation/arm64/silicon-errata.txt index 4693a328947a..ef4e43590685 100644 --- a/Documentation/arm64/silicon-errata.txt +++ b/Documentation/arm64/silicon-errata.txt @@ -67,6 +67,7 @@ stable kernels. | Freescale/NXP | LS2080A/LS1043A | A-008585 | FSL_ERRATUM_A008585 | | | | | | | Hisilicon | Hip0{5,6,7} | #161010101 | HISILICON_ERRATUM_161010101 | +| Hisilicon | Hip0{6,7} | #161010701 | N/A | | | | | | | Qualcomm Tech. | Falkor v1 | E1003 | QCOM_FALKOR_ERRATUM_1003 | | Qualcomm Tech. | Falkor v1 | E1009 | QCOM_FALKOR_ERRATUM_1009 | diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index 2d5b48b4260a..81fc1b5c91ee 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -414,6 +414,10 @@ #define MSI_IOVA_LENGTH 0x100000 /* Until ACPICA headers cover IORT rev. C */ +#ifndef ACPI_IORT_SMMU_HISILICON_HI161X +#define ACPI_IORT_SMMU_HISILICON_HI161X 0x1 +#endif + #ifndef ACPI_IORT_SMMU_V3_CAVIUM_CN99XX #define ACPI_IORT_SMMU_V3_CAVIUM_CN99XX 0x2 #endif @@ -2616,8 +2620,14 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu) #ifdef CONFIG_ACPI static void acpi_smmu_get_options(u32 model, struct arm_smmu_device *smmu) { - if (model == ACPI_IORT_SMMU_V3_CAVIUM_CN99XX) + switch (model) { + case ACPI_IORT_SMMU_V3_CAVIUM_CN99XX: smmu->options |= ARM_SMMU_OPT_PAGE0_REGS_ONLY; + break; + case ACPI_IORT_SMMU_HISILICON_HI161X: + smmu->options |= ARM_SMMU_OPT_SKIP_PREFETCH; + break; + } dev_notice(smmu->dev, "option mask 0x%x\n", smmu->options); } From f935448acf462c26142e8b04f1c8829b28d3b9d8 Mon Sep 17 00:00:00 2001 From: Geetha Sowjanya Date: Fri, 23 Jun 2017 19:04:36 +0530 Subject: [PATCH 20/20] iommu/arm-smmu-v3: Add workaround for Cavium ThunderX2 erratum #126 Cavium ThunderX2 SMMU doesn't support MSI and also doesn't have unique irq lines for gerror, eventq and cmdq-sync. New named irq "combined" is set as a errata workaround, which allows to share the irq line by register single irq handler for all the interrupts. Acked-by: Lorenzo Pieralisi Signed-off-by: Geetha sowjanya [will: reworked irq equality checking and added SPI check] Signed-off-by: Will Deacon --- Documentation/arm64/silicon-errata.txt | 1 + .../devicetree/bindings/iommu/arm,smmu-v3.txt | 6 ++ drivers/acpi/arm64/iort.c | 57 +++++++---- drivers/iommu/arm-smmu-v3.c | 94 ++++++++++++++----- 4 files changed, 118 insertions(+), 40 deletions(-) diff --git a/Documentation/arm64/silicon-errata.txt b/Documentation/arm64/silicon-errata.txt index ef4e43590685..856479525776 100644 --- a/Documentation/arm64/silicon-errata.txt +++ b/Documentation/arm64/silicon-errata.txt @@ -63,6 +63,7 @@ stable kernels. | Cavium | ThunderX Core | #27456 | CAVIUM_ERRATUM_27456 | | Cavium | ThunderX SMMUv2 | #27704 | N/A | | Cavium | ThunderX2 SMMUv3| #74 | N/A | +| Cavium | ThunderX2 SMMUv3| #126 | N/A | | | | | | | Freescale/NXP | LS2080A/LS1043A | A-008585 | FSL_ERRATUM_A008585 | | | | | | diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt b/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt index e7855cf6038e..c9abbf3e4f68 100644 --- a/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt +++ b/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt @@ -26,6 +26,12 @@ the PCIe specification. * "priq" - PRI Queue not empty * "cmdq-sync" - CMD_SYNC complete * "gerror" - Global Error activated + * "combined" - The combined interrupt is optional, + and should only be provided if the + hardware supports just a single, + combined interrupt line. + If provided, then the combined interrupt + will be used in preference to any others. - #iommu-cells : See the generic IOMMU binding described in devicetree/bindings/pci/pci-iommu.txt diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c index a8ebda9f7e97..83d65d96676f 100644 --- a/drivers/acpi/arm64/iort.c +++ b/drivers/acpi/arm64/iort.c @@ -833,6 +833,24 @@ static int __init arm_smmu_v3_count_resources(struct acpi_iort_node *node) return num_res; } +static bool arm_smmu_v3_is_combined_irq(struct acpi_iort_smmu_v3 *smmu) +{ + /* + * Cavium ThunderX2 implementation doesn't not support unique + * irq line. Use single irq line for all the SMMUv3 interrupts. + */ + if (smmu->model != ACPI_IORT_SMMU_V3_CAVIUM_CN99XX) + return false; + + /* + * ThunderX2 doesn't support MSIs from the SMMU, so we're checking + * SPI numbers here. + */ + return smmu->event_gsiv == smmu->pri_gsiv && + smmu->event_gsiv == smmu->gerr_gsiv && + smmu->event_gsiv == smmu->sync_gsiv; +} + static unsigned long arm_smmu_v3_resource_size(struct acpi_iort_smmu_v3 *smmu) { /* @@ -860,26 +878,33 @@ static void __init arm_smmu_v3_init_resources(struct resource *res, res[num_res].flags = IORESOURCE_MEM; num_res++; + if (arm_smmu_v3_is_combined_irq(smmu)) { + if (smmu->event_gsiv) + acpi_iort_register_irq(smmu->event_gsiv, "combined", + ACPI_EDGE_SENSITIVE, + &res[num_res++]); + } else { - if (smmu->event_gsiv) - acpi_iort_register_irq(smmu->event_gsiv, "eventq", - ACPI_EDGE_SENSITIVE, - &res[num_res++]); + if (smmu->event_gsiv) + acpi_iort_register_irq(smmu->event_gsiv, "eventq", + ACPI_EDGE_SENSITIVE, + &res[num_res++]); - if (smmu->pri_gsiv) - acpi_iort_register_irq(smmu->pri_gsiv, "priq", - ACPI_EDGE_SENSITIVE, - &res[num_res++]); + if (smmu->pri_gsiv) + acpi_iort_register_irq(smmu->pri_gsiv, "priq", + ACPI_EDGE_SENSITIVE, + &res[num_res++]); - if (smmu->gerr_gsiv) - acpi_iort_register_irq(smmu->gerr_gsiv, "gerror", - ACPI_EDGE_SENSITIVE, - &res[num_res++]); + if (smmu->gerr_gsiv) + acpi_iort_register_irq(smmu->gerr_gsiv, "gerror", + ACPI_EDGE_SENSITIVE, + &res[num_res++]); - if (smmu->sync_gsiv) - acpi_iort_register_irq(smmu->sync_gsiv, "cmdq-sync", - ACPI_EDGE_SENSITIVE, - &res[num_res++]); + if (smmu->sync_gsiv) + acpi_iort_register_irq(smmu->sync_gsiv, "cmdq-sync", + ACPI_EDGE_SENSITIVE, + &res[num_res++]); + } } static bool __init arm_smmu_v3_is_coherent(struct acpi_iort_node *node) diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index 81fc1b5c91ee..568c400eeaed 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -615,6 +615,7 @@ struct arm_smmu_device { struct arm_smmu_priq priq; int gerr_irq; + int combined_irq; unsigned long ias; /* IPA */ unsigned long oas; /* PA */ @@ -1330,6 +1331,24 @@ static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev) return IRQ_HANDLED; } +static irqreturn_t arm_smmu_combined_irq_thread(int irq, void *dev) +{ + struct arm_smmu_device *smmu = dev; + + arm_smmu_evtq_thread(irq, dev); + if (smmu->features & ARM_SMMU_FEAT_PRI) + arm_smmu_priq_thread(irq, dev); + + return IRQ_HANDLED; +} + +static irqreturn_t arm_smmu_combined_irq_handler(int irq, void *dev) +{ + arm_smmu_gerror_handler(irq, dev); + arm_smmu_cmdq_sync_handler(irq, dev); + return IRQ_WAKE_THREAD; +} + /* IO_PGTABLE API */ static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu) { @@ -2229,18 +2248,9 @@ static void arm_smmu_setup_msis(struct arm_smmu_device *smmu) devm_add_action(dev, arm_smmu_free_msis, dev); } -static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu) +static void arm_smmu_setup_unique_irqs(struct arm_smmu_device *smmu) { - int ret, irq; - u32 irqen_flags = IRQ_CTRL_EVTQ_IRQEN | IRQ_CTRL_GERROR_IRQEN; - - /* Disable IRQs first */ - ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_IRQ_CTRL, - ARM_SMMU_IRQ_CTRLACK); - if (ret) { - dev_err(smmu->dev, "failed to disable irqs\n"); - return ret; - } + int irq, ret; arm_smmu_setup_msis(smmu); @@ -2283,10 +2293,41 @@ static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu) if (ret < 0) dev_warn(smmu->dev, "failed to enable priq irq\n"); - else - irqen_flags |= IRQ_CTRL_PRIQ_IRQEN; } } +} + +static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu) +{ + int ret, irq; + u32 irqen_flags = IRQ_CTRL_EVTQ_IRQEN | IRQ_CTRL_GERROR_IRQEN; + + /* Disable IRQs first */ + ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_IRQ_CTRL, + ARM_SMMU_IRQ_CTRLACK); + if (ret) { + dev_err(smmu->dev, "failed to disable irqs\n"); + return ret; + } + + irq = smmu->combined_irq; + if (irq) { + /* + * Cavium ThunderX2 implementation doesn't not support unique + * irq lines. Use single irq line for all the SMMUv3 interrupts. + */ + ret = devm_request_threaded_irq(smmu->dev, irq, + arm_smmu_combined_irq_handler, + arm_smmu_combined_irq_thread, + IRQF_ONESHOT, + "arm-smmu-v3-combined-irq", smmu); + if (ret < 0) + dev_warn(smmu->dev, "failed to enable combined irq\n"); + } else + arm_smmu_setup_unique_irqs(smmu); + + if (smmu->features & ARM_SMMU_FEAT_PRI) + irqen_flags |= IRQ_CTRL_PRIQ_IRQEN; /* Enable interrupt generation on the SMMU */ ret = arm_smmu_write_reg_sync(smmu, irqen_flags, @@ -2729,22 +2770,27 @@ static int arm_smmu_device_probe(struct platform_device *pdev) return PTR_ERR(smmu->base); /* Interrupt lines */ - irq = platform_get_irq_byname(pdev, "eventq"); - if (irq > 0) - smmu->evtq.q.irq = irq; - irq = platform_get_irq_byname(pdev, "priq"); + irq = platform_get_irq_byname(pdev, "combined"); if (irq > 0) - smmu->priq.q.irq = irq; + smmu->combined_irq = irq; + else { + irq = platform_get_irq_byname(pdev, "eventq"); + if (irq > 0) + smmu->evtq.q.irq = irq; - irq = platform_get_irq_byname(pdev, "cmdq-sync"); - if (irq > 0) - smmu->cmdq.q.irq = irq; + irq = platform_get_irq_byname(pdev, "priq"); + if (irq > 0) + smmu->priq.q.irq = irq; - irq = platform_get_irq_byname(pdev, "gerror"); - if (irq > 0) - smmu->gerr_irq = irq; + irq = platform_get_irq_byname(pdev, "cmdq-sync"); + if (irq > 0) + smmu->cmdq.q.irq = irq; + irq = platform_get_irq_byname(pdev, "gerror"); + if (irq > 0) + smmu->gerr_irq = irq; + } /* Probe the h/w */ ret = arm_smmu_device_hw_probe(smmu); if (ret)