From c9d76d0655c06b8c1f944e46c4fd9e9cf4b331c0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Niklas=20S=C3=B6derlund?= Date: Wed, 29 Aug 2018 23:29:21 +0200 Subject: [PATCH 01/73] dma-mapping: fix return type of dma_set_max_seg_size() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The function dma_set_max_seg_size() can return either 0 on success or -EIO on error. Change its return type from unsigned int to int to capture this. Signed-off-by: Niklas Söderlund Reviewed-by: Geert Uytterhoeven Signed-off-by: Christoph Hellwig --- include/linux/dma-mapping.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 15bd41447025..0f81c713f6e9 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -676,8 +676,7 @@ static inline unsigned int dma_get_max_seg_size(struct device *dev) return SZ_64K; } -static inline unsigned int dma_set_max_seg_size(struct device *dev, - unsigned int size) +static inline int dma_set_max_seg_size(struct device *dev, unsigned int size) { if (dev->dma_parms) { dev->dma_parms->max_segment_size = size; From b18814e767a445534ab9ccba02e82a31208f85d6 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 4 Nov 2018 17:27:56 +0100 Subject: [PATCH 02/73] dma-direct: provide page based alloc/free helpers Some architectures support remapping highmem into DMA coherent allocations. To use the common code for them we need variants of dma_direct_{alloc,free}_pages that do not use kernel virtual addresses. Signed-off-by: Christoph Hellwig Reviewed-by: Robin Murphy --- include/linux/dma-direct.h | 3 +++ kernel/dma/direct.c | 32 ++++++++++++++++++++++---------- 2 files changed, 25 insertions(+), 10 deletions(-) diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h index 9e66bfe369aa..61b78f934f64 100644 --- a/include/linux/dma-direct.h +++ b/include/linux/dma-direct.h @@ -67,6 +67,9 @@ void *dma_direct_alloc_pages(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs); void dma_direct_free_pages(struct device *dev, size_t size, void *cpu_addr, dma_addr_t dma_addr, unsigned long attrs); +struct page *__dma_direct_alloc_pages(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs); +void __dma_direct_free_pages(struct device *dev, size_t size, struct page *page); dma_addr_t dma_direct_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction dir, unsigned long attrs); diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c index 22a12ab5a5e9..680287779b0a 100644 --- a/kernel/dma/direct.c +++ b/kernel/dma/direct.c @@ -103,14 +103,13 @@ static bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size) min_not_zero(dev->coherent_dma_mask, dev->bus_dma_mask); } -void *dma_direct_alloc_pages(struct device *dev, size_t size, +struct page *__dma_direct_alloc_pages(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs) { unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; int page_order = get_order(size); struct page *page = NULL; u64 phys_mask; - void *ret; if (attrs & DMA_ATTR_NO_WARN) gfp |= __GFP_NOWARN; @@ -150,11 +149,22 @@ again: } } + return page; +} + +void *dma_direct_alloc_pages(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs) +{ + struct page *page; + void *ret; + + page = __dma_direct_alloc_pages(dev, size, dma_handle, gfp, attrs); if (!page) return NULL; + ret = page_address(page); if (force_dma_unencrypted()) { - set_memory_decrypted((unsigned long)ret, 1 << page_order); + set_memory_decrypted((unsigned long)ret, 1 << get_order(size)); *dma_handle = __phys_to_dma(dev, page_to_phys(page)); } else { *dma_handle = phys_to_dma(dev, page_to_phys(page)); @@ -163,20 +173,22 @@ again: return ret; } -/* - * NOTE: this function must never look at the dma_addr argument, because we want - * to be able to use it as a helper for iommu implementations as well. - */ +void __dma_direct_free_pages(struct device *dev, size_t size, struct page *page) +{ + unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; + + if (!dma_release_from_contiguous(dev, page, count)) + __free_pages(page, get_order(size)); +} + void dma_direct_free_pages(struct device *dev, size_t size, void *cpu_addr, dma_addr_t dma_addr, unsigned long attrs) { - unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; unsigned int page_order = get_order(size); if (force_dma_unencrypted()) set_memory_encrypted((unsigned long)cpu_addr, 1 << page_order); - if (!dma_release_from_contiguous(dev, virt_to_page(cpu_addr), count)) - free_pages((unsigned long)cpu_addr, page_order); + __dma_direct_free_pages(dev, size, virt_to_page(cpu_addr)); } void *dma_direct_alloc(struct device *dev, size_t size, From 704f2c20eaa566f6906e8812b6e2115889bd753d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 22 Sep 2018 20:47:26 +0200 Subject: [PATCH 03/73] dma-direct: reject highmem pages from dma_alloc_from_contiguous dma_alloc_from_contiguous can return highmem pages depending on the setup, which a plain non-remapping DMA allocator can't handle. Detect this case and fail the allocation. Signed-off-by: Christoph Hellwig Reviewed-by: Robin Murphy --- kernel/dma/direct.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c index 680287779b0a..c49849bcced6 100644 --- a/kernel/dma/direct.c +++ b/kernel/dma/direct.c @@ -162,6 +162,18 @@ void *dma_direct_alloc_pages(struct device *dev, size_t size, if (!page) return NULL; + if (PageHighMem(page)) { + /* + * Depending on the cma= arguments and per-arch setup + * dma_alloc_from_contiguous could return highmem pages. + * Without remapping there is no way to return them here, + * so log an error and fail. + */ + dev_info(dev, "Rejecting highmem page from CMA.\n"); + __dma_direct_free_pages(dev, size, page); + return NULL; + } + ret = page_address(page); if (force_dma_unencrypted()) { set_memory_decrypted((unsigned long)ret, 1 << get_order(size)); From f0edfea8ef93ed6cc5f747c46c85c8e53e0798a0 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 24 Aug 2018 10:31:08 +0200 Subject: [PATCH 04/73] dma-mapping: move the remap helpers to a separate file The dma remap code only makes sense for not cache coherent architectures (or possibly the corner case of highmem CMA allocations) and currently is only used by arm, arm64, csky and xtensa. Split it out into a separate file with a separate Kconfig symbol, which gets the right copyright notice given that this code was written by Laura Abbott working for Code Aurora at that point. Signed-off-by: Christoph Hellwig Acked-by: Laura Abbott Reviewed-by: Robin Murphy --- arch/arm/Kconfig | 1 + arch/arm64/Kconfig | 1 + arch/csky/Kconfig | 1 + arch/xtensa/Kconfig | 1 + kernel/dma/Kconfig | 4 ++ kernel/dma/Makefile | 2 +- kernel/dma/mapping.c | 84 ------------------------------------------ kernel/dma/remap.c | 88 ++++++++++++++++++++++++++++++++++++++++++++ 8 files changed, 97 insertions(+), 85 deletions(-) create mode 100644 kernel/dma/remap.c diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 91be74d8df65..3b2852df6eb3 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -30,6 +30,7 @@ config ARM select CPU_PM if (SUSPEND || CPU_IDLE) select DCACHE_WORD_ACCESS if HAVE_EFFICIENT_UNALIGNED_ACCESS select DMA_DIRECT_OPS if !MMU + select DMA_REMAP if MMU select EDAC_SUPPORT select EDAC_ATOMIC_SCRUB select GENERIC_ALLOCATOR diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 787d7850e064..5d065acb6d10 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -82,6 +82,7 @@ config ARM64 select CRC32 select DCACHE_WORD_ACCESS select DMA_DIRECT_OPS + select DMA_REMAP select EDAC_SUPPORT select FRAME_POINTER select GENERIC_ALLOCATOR diff --git a/arch/csky/Kconfig b/arch/csky/Kconfig index cb64f8dacd08..8a30e006a845 100644 --- a/arch/csky/Kconfig +++ b/arch/csky/Kconfig @@ -9,6 +9,7 @@ config CSKY select CLKSRC_OF select DMA_DIRECT_OPS select DMA_NONCOHERENT_OPS + select DMA_REMAP select IRQ_DOMAIN select HANDLE_DOMAIN_IRQ select DW_APB_TIMER_OF diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig index d29b7365da8d..239bfb16c58b 100644 --- a/arch/xtensa/Kconfig +++ b/arch/xtensa/Kconfig @@ -11,6 +11,7 @@ config XTENSA select CLONE_BACKWARDS select COMMON_CLK select DMA_DIRECT_OPS + select DMA_REMAP if MMU select GENERIC_ATOMIC64 select GENERIC_CLOCKEVENTS select GENERIC_IRQ_SHOW diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig index 645c7a2ecde8..c92e08173ed8 100644 --- a/kernel/dma/Kconfig +++ b/kernel/dma/Kconfig @@ -51,3 +51,7 @@ config SWIOTLB bool select DMA_DIRECT_OPS select NEED_DMA_MAP_STATE + +config DMA_REMAP + depends on MMU + bool diff --git a/kernel/dma/Makefile b/kernel/dma/Makefile index 7d581e4eea4a..f4feeceb8020 100644 --- a/kernel/dma/Makefile +++ b/kernel/dma/Makefile @@ -7,4 +7,4 @@ obj-$(CONFIG_DMA_DIRECT_OPS) += direct.o obj-$(CONFIG_DMA_VIRT_OPS) += virt.o obj-$(CONFIG_DMA_API_DEBUG) += debug.o obj-$(CONFIG_SWIOTLB) += swiotlb.o - +obj-$(CONFIG_DMA_REMAP) += remap.o diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c index 58dec7a92b7b..dfbc3deb95cd 100644 --- a/kernel/dma/mapping.c +++ b/kernel/dma/mapping.c @@ -262,87 +262,3 @@ int dma_common_mmap(struct device *dev, struct vm_area_struct *vma, #endif /* !CONFIG_ARCH_NO_COHERENT_DMA_MMAP */ } EXPORT_SYMBOL(dma_common_mmap); - -#ifdef CONFIG_MMU -static struct vm_struct *__dma_common_pages_remap(struct page **pages, - size_t size, unsigned long vm_flags, pgprot_t prot, - const void *caller) -{ - struct vm_struct *area; - - area = get_vm_area_caller(size, vm_flags, caller); - if (!area) - return NULL; - - if (map_vm_area(area, prot, pages)) { - vunmap(area->addr); - return NULL; - } - - return area; -} - -/* - * remaps an array of PAGE_SIZE pages into another vm_area - * Cannot be used in non-sleeping contexts - */ -void *dma_common_pages_remap(struct page **pages, size_t size, - unsigned long vm_flags, pgprot_t prot, - const void *caller) -{ - struct vm_struct *area; - - area = __dma_common_pages_remap(pages, size, vm_flags, prot, caller); - if (!area) - return NULL; - - area->pages = pages; - - return area->addr; -} - -/* - * remaps an allocated contiguous region into another vm_area. - * Cannot be used in non-sleeping contexts - */ - -void *dma_common_contiguous_remap(struct page *page, size_t size, - unsigned long vm_flags, - pgprot_t prot, const void *caller) -{ - int i; - struct page **pages; - struct vm_struct *area; - - pages = kmalloc(sizeof(struct page *) << get_order(size), GFP_KERNEL); - if (!pages) - return NULL; - - for (i = 0; i < (size >> PAGE_SHIFT); i++) - pages[i] = nth_page(page, i); - - area = __dma_common_pages_remap(pages, size, vm_flags, prot, caller); - - kfree(pages); - - if (!area) - return NULL; - return area->addr; -} - -/* - * unmaps a range previously mapped by dma_common_*_remap - */ -void dma_common_free_remap(void *cpu_addr, size_t size, unsigned long vm_flags) -{ - struct vm_struct *area = find_vm_area(cpu_addr); - - if (!area || (area->flags & vm_flags) != vm_flags) { - WARN(1, "trying to free invalid coherent area: %p\n", cpu_addr); - return; - } - - unmap_kernel_range((unsigned long)cpu_addr, PAGE_ALIGN(size)); - vunmap(cpu_addr); -} -#endif diff --git a/kernel/dma/remap.c b/kernel/dma/remap.c new file mode 100644 index 000000000000..a15c393ea4e5 --- /dev/null +++ b/kernel/dma/remap.c @@ -0,0 +1,88 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2014 The Linux Foundation + */ +#include +#include +#include + +static struct vm_struct *__dma_common_pages_remap(struct page **pages, + size_t size, unsigned long vm_flags, pgprot_t prot, + const void *caller) +{ + struct vm_struct *area; + + area = get_vm_area_caller(size, vm_flags, caller); + if (!area) + return NULL; + + if (map_vm_area(area, prot, pages)) { + vunmap(area->addr); + return NULL; + } + + return area; +} + +/* + * Remaps an array of PAGE_SIZE pages into another vm_area. + * Cannot be used in non-sleeping contexts + */ +void *dma_common_pages_remap(struct page **pages, size_t size, + unsigned long vm_flags, pgprot_t prot, + const void *caller) +{ + struct vm_struct *area; + + area = __dma_common_pages_remap(pages, size, vm_flags, prot, caller); + if (!area) + return NULL; + + area->pages = pages; + + return area->addr; +} + +/* + * Remaps an allocated contiguous region into another vm_area. + * Cannot be used in non-sleeping contexts + */ +void *dma_common_contiguous_remap(struct page *page, size_t size, + unsigned long vm_flags, + pgprot_t prot, const void *caller) +{ + int i; + struct page **pages; + struct vm_struct *area; + + pages = kmalloc(sizeof(struct page *) << get_order(size), GFP_KERNEL); + if (!pages) + return NULL; + + for (i = 0; i < (size >> PAGE_SHIFT); i++) + pages[i] = nth_page(page, i); + + area = __dma_common_pages_remap(pages, size, vm_flags, prot, caller); + + kfree(pages); + + if (!area) + return NULL; + return area->addr; +} + +/* + * Unmaps a range previously mapped by dma_common_*_remap + */ +void dma_common_free_remap(void *cpu_addr, size_t size, unsigned long vm_flags) +{ + struct vm_struct *area = find_vm_area(cpu_addr); + + if (!area || (area->flags & vm_flags) != vm_flags) { + WARN(1, "trying to free invalid coherent area: %p\n", cpu_addr); + return; + } + + unmap_kernel_range((unsigned long)cpu_addr, PAGE_ALIGN(size)); + vunmap(cpu_addr); +} From 0c3b3171ceccb8830c2bb5adff1b4e9b204c1450 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 4 Nov 2018 20:29:28 +0100 Subject: [PATCH 05/73] dma-mapping: move the arm64 noncoherent alloc/free support to common code The arm64 codebase to implement coherent dma allocation for architectures with non-coherent DMA is a good start for a generic implementation, given that is uses the generic remap helpers, provides the atomic pool for allocations that can't sleep and still is realtively simple and well tested. Move it to kernel/dma and allow architectures to opt into it using a config symbol. Architectures just need to provide a new arch_dma_prep_coherent helper to writeback an invalidate the caches for any memory that gets remapped for uncached access. Signed-off-by: Christoph Hellwig Reviewed-by: Will Deacon Reviewed-by: Robin Murphy --- arch/arm64/Kconfig | 2 +- arch/arm64/mm/dma-mapping.c | 184 ++------------------------------ include/linux/dma-mapping.h | 5 + include/linux/dma-noncoherent.h | 2 + kernel/dma/Kconfig | 5 + kernel/dma/remap.c | 158 ++++++++++++++++++++++++++- 6 files changed, 180 insertions(+), 176 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 5d065acb6d10..2e645ea693ea 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -82,7 +82,7 @@ config ARM64 select CRC32 select DCACHE_WORD_ACCESS select DMA_DIRECT_OPS - select DMA_REMAP + select DMA_DIRECT_REMAP select EDAC_SUPPORT select FRAME_POINTER select GENERIC_ALLOCATOR diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index a3ac26284845..e2e7e5d0f94e 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -33,113 +33,6 @@ #include -static struct gen_pool *atomic_pool __ro_after_init; - -#define DEFAULT_DMA_COHERENT_POOL_SIZE SZ_256K -static size_t atomic_pool_size __initdata = DEFAULT_DMA_COHERENT_POOL_SIZE; - -static int __init early_coherent_pool(char *p) -{ - atomic_pool_size = memparse(p, &p); - return 0; -} -early_param("coherent_pool", early_coherent_pool); - -static void *__alloc_from_pool(size_t size, struct page **ret_page, gfp_t flags) -{ - unsigned long val; - void *ptr = NULL; - - if (!atomic_pool) { - WARN(1, "coherent pool not initialised!\n"); - return NULL; - } - - val = gen_pool_alloc(atomic_pool, size); - if (val) { - phys_addr_t phys = gen_pool_virt_to_phys(atomic_pool, val); - - *ret_page = phys_to_page(phys); - ptr = (void *)val; - memset(ptr, 0, size); - } - - return ptr; -} - -static bool __in_atomic_pool(void *start, size_t size) -{ - return addr_in_gen_pool(atomic_pool, (unsigned long)start, size); -} - -static int __free_from_pool(void *start, size_t size) -{ - if (!__in_atomic_pool(start, size)) - return 0; - - gen_pool_free(atomic_pool, (unsigned long)start, size); - - return 1; -} - -void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, - gfp_t flags, unsigned long attrs) -{ - struct page *page; - void *ptr, *coherent_ptr; - pgprot_t prot = pgprot_writecombine(PAGE_KERNEL); - - size = PAGE_ALIGN(size); - - if (!gfpflags_allow_blocking(flags)) { - struct page *page = NULL; - void *addr = __alloc_from_pool(size, &page, flags); - - if (addr) - *dma_handle = phys_to_dma(dev, page_to_phys(page)); - - return addr; - } - - ptr = dma_direct_alloc_pages(dev, size, dma_handle, flags, attrs); - if (!ptr) - goto no_mem; - - /* remove any dirty cache lines on the kernel alias */ - __dma_flush_area(ptr, size); - - /* create a coherent mapping */ - page = virt_to_page(ptr); - coherent_ptr = dma_common_contiguous_remap(page, size, VM_USERMAP, - prot, __builtin_return_address(0)); - if (!coherent_ptr) - goto no_map; - - return coherent_ptr; - -no_map: - dma_direct_free_pages(dev, size, ptr, *dma_handle, attrs); -no_mem: - return NULL; -} - -void arch_dma_free(struct device *dev, size_t size, void *vaddr, - dma_addr_t dma_handle, unsigned long attrs) -{ - if (!__free_from_pool(vaddr, PAGE_ALIGN(size))) { - void *kaddr = phys_to_virt(dma_to_phys(dev, dma_handle)); - - vunmap(vaddr); - dma_direct_free_pages(dev, size, kaddr, dma_handle, attrs); - } -} - -long arch_dma_coherent_to_pfn(struct device *dev, void *cpu_addr, - dma_addr_t dma_addr) -{ - return __phys_to_pfn(dma_to_phys(dev, dma_addr)); -} - pgprot_t arch_dma_mmap_pgprot(struct device *dev, pgprot_t prot, unsigned long attrs) { @@ -160,6 +53,11 @@ void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr, __dma_unmap_area(phys_to_virt(paddr), size, dir); } +void arch_dma_prep_coherent(struct page *page, size_t size) +{ + __dma_flush_area(page_address(page), size); +} + #ifdef CONFIG_IOMMU_DMA static int __swiotlb_get_sgtable_page(struct sg_table *sgt, struct page *page, size_t size) @@ -191,67 +89,6 @@ static int __swiotlb_mmap_pfn(struct vm_area_struct *vma, } #endif /* CONFIG_IOMMU_DMA */ -static int __init atomic_pool_init(void) -{ - pgprot_t prot = __pgprot(PROT_NORMAL_NC); - unsigned long nr_pages = atomic_pool_size >> PAGE_SHIFT; - struct page *page; - void *addr; - unsigned int pool_size_order = get_order(atomic_pool_size); - - if (dev_get_cma_area(NULL)) - page = dma_alloc_from_contiguous(NULL, nr_pages, - pool_size_order, false); - else - page = alloc_pages(GFP_DMA32, pool_size_order); - - if (page) { - int ret; - void *page_addr = page_address(page); - - memset(page_addr, 0, atomic_pool_size); - __dma_flush_area(page_addr, atomic_pool_size); - - atomic_pool = gen_pool_create(PAGE_SHIFT, -1); - if (!atomic_pool) - goto free_page; - - addr = dma_common_contiguous_remap(page, atomic_pool_size, - VM_USERMAP, prot, atomic_pool_init); - - if (!addr) - goto destroy_genpool; - - ret = gen_pool_add_virt(atomic_pool, (unsigned long)addr, - page_to_phys(page), - atomic_pool_size, -1); - if (ret) - goto remove_mapping; - - gen_pool_set_algo(atomic_pool, - gen_pool_first_fit_order_align, - NULL); - - pr_info("DMA: preallocated %zu KiB pool for atomic allocations\n", - atomic_pool_size / 1024); - return 0; - } - goto out; - -remove_mapping: - dma_common_free_remap(addr, atomic_pool_size, VM_USERMAP); -destroy_genpool: - gen_pool_destroy(atomic_pool); - atomic_pool = NULL; -free_page: - if (!dma_release_from_contiguous(NULL, page, nr_pages)) - __free_pages(page, pool_size_order); -out: - pr_err("DMA: failed to allocate %zu KiB pool for atomic coherent allocation\n", - atomic_pool_size / 1024); - return -ENOMEM; -} - /******************************************** * The following APIs are for dummy DMA ops * ********************************************/ @@ -350,8 +187,7 @@ static int __init arm64_dma_init(void) TAINT_CPU_OUT_OF_SPEC, "ARCH_DMA_MINALIGN smaller than CTR_EL0.CWG (%d < %d)", ARCH_DMA_MINALIGN, cache_line_size()); - - return atomic_pool_init(); + return dma_atomic_pool_init(GFP_DMA32, __pgprot(PROT_NORMAL_NC)); } arch_initcall(arm64_dma_init); @@ -397,7 +233,7 @@ static void *__iommu_alloc_attrs(struct device *dev, size_t size, page = alloc_pages(gfp, get_order(size)); addr = page ? page_address(page) : NULL; } else { - addr = __alloc_from_pool(size, &page, gfp); + addr = dma_alloc_from_pool(size, &page, gfp); } if (!addr) return NULL; @@ -407,7 +243,7 @@ static void *__iommu_alloc_attrs(struct device *dev, size_t size, if (coherent) __free_pages(page, get_order(size)); else - __free_from_pool(addr, size); + dma_free_from_pool(addr, size); addr = NULL; } } else if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) { @@ -471,9 +307,9 @@ static void __iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr, * coherent devices. * Hence how dodgy the below logic looks... */ - if (__in_atomic_pool(cpu_addr, size)) { + if (dma_in_atomic_pool(cpu_addr, size)) { iommu_dma_unmap_page(dev, handle, iosize, 0, 0); - __free_from_pool(cpu_addr, size); + dma_free_from_pool(cpu_addr, size); } else if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) { struct page *page = vmalloc_to_page(cpu_addr); diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 0f81c713f6e9..1a0edcde7d14 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -455,6 +455,11 @@ void *dma_common_pages_remap(struct page **pages, size_t size, const void *caller); void dma_common_free_remap(void *cpu_addr, size_t size, unsigned long vm_flags); +int __init dma_atomic_pool_init(gfp_t gfp, pgprot_t prot); +bool dma_in_atomic_pool(void *start, size_t size); +void *dma_alloc_from_pool(size_t size, struct page **ret_page, gfp_t flags); +bool dma_free_from_pool(void *start, size_t size); + /** * dma_mmap_attrs - map a coherent DMA allocation into user space * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices diff --git a/include/linux/dma-noncoherent.h b/include/linux/dma-noncoherent.h index 9051b055beec..306557331d7d 100644 --- a/include/linux/dma-noncoherent.h +++ b/include/linux/dma-noncoherent.h @@ -69,4 +69,6 @@ static inline void arch_sync_dma_for_cpu_all(struct device *dev) } #endif /* CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL */ +void arch_dma_prep_coherent(struct page *page, size_t size); + #endif /* _LINUX_DMA_NONCOHERENT_H */ diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig index c92e08173ed8..41c3b1df70eb 100644 --- a/kernel/dma/Kconfig +++ b/kernel/dma/Kconfig @@ -55,3 +55,8 @@ config SWIOTLB config DMA_REMAP depends on MMU bool + +config DMA_DIRECT_REMAP + bool + depends on DMA_DIRECT_OPS + select DMA_REMAP diff --git a/kernel/dma/remap.c b/kernel/dma/remap.c index a15c393ea4e5..b32bb08f96ae 100644 --- a/kernel/dma/remap.c +++ b/kernel/dma/remap.c @@ -1,8 +1,13 @@ // SPDX-License-Identifier: GPL-2.0 /* + * Copyright (C) 2012 ARM Ltd. * Copyright (c) 2014 The Linux Foundation */ -#include +#include +#include +#include +#include +#include #include #include @@ -86,3 +91,154 @@ void dma_common_free_remap(void *cpu_addr, size_t size, unsigned long vm_flags) unmap_kernel_range((unsigned long)cpu_addr, PAGE_ALIGN(size)); vunmap(cpu_addr); } + +#ifdef CONFIG_DMA_DIRECT_REMAP +static struct gen_pool *atomic_pool __ro_after_init; + +#define DEFAULT_DMA_COHERENT_POOL_SIZE SZ_256K +static size_t atomic_pool_size __initdata = DEFAULT_DMA_COHERENT_POOL_SIZE; + +static int __init early_coherent_pool(char *p) +{ + atomic_pool_size = memparse(p, &p); + return 0; +} +early_param("coherent_pool", early_coherent_pool); + +int __init dma_atomic_pool_init(gfp_t gfp, pgprot_t prot) +{ + unsigned int pool_size_order = get_order(atomic_pool_size); + unsigned long nr_pages = atomic_pool_size >> PAGE_SHIFT; + struct page *page; + void *addr; + int ret; + + if (dev_get_cma_area(NULL)) + page = dma_alloc_from_contiguous(NULL, nr_pages, + pool_size_order, false); + else + page = alloc_pages(gfp, pool_size_order); + if (!page) + goto out; + + memset(page_address(page), 0, atomic_pool_size); + arch_dma_prep_coherent(page, atomic_pool_size); + + atomic_pool = gen_pool_create(PAGE_SHIFT, -1); + if (!atomic_pool) + goto free_page; + + addr = dma_common_contiguous_remap(page, atomic_pool_size, VM_USERMAP, + prot, __builtin_return_address(0)); + if (!addr) + goto destroy_genpool; + + ret = gen_pool_add_virt(atomic_pool, (unsigned long)addr, + page_to_phys(page), atomic_pool_size, -1); + if (ret) + goto remove_mapping; + gen_pool_set_algo(atomic_pool, gen_pool_first_fit_order_align, NULL); + + pr_info("DMA: preallocated %zu KiB pool for atomic allocations\n", + atomic_pool_size / 1024); + return 0; + +remove_mapping: + dma_common_free_remap(addr, atomic_pool_size, VM_USERMAP); +destroy_genpool: + gen_pool_destroy(atomic_pool); + atomic_pool = NULL; +free_page: + if (!dma_release_from_contiguous(NULL, page, nr_pages)) + __free_pages(page, pool_size_order); +out: + pr_err("DMA: failed to allocate %zu KiB pool for atomic coherent allocation\n", + atomic_pool_size / 1024); + return -ENOMEM; +} + +bool dma_in_atomic_pool(void *start, size_t size) +{ + return addr_in_gen_pool(atomic_pool, (unsigned long)start, size); +} + +void *dma_alloc_from_pool(size_t size, struct page **ret_page, gfp_t flags) +{ + unsigned long val; + void *ptr = NULL; + + if (!atomic_pool) { + WARN(1, "coherent pool not initialised!\n"); + return NULL; + } + + val = gen_pool_alloc(atomic_pool, size); + if (val) { + phys_addr_t phys = gen_pool_virt_to_phys(atomic_pool, val); + + *ret_page = pfn_to_page(__phys_to_pfn(phys)); + ptr = (void *)val; + memset(ptr, 0, size); + } + + return ptr; +} + +bool dma_free_from_pool(void *start, size_t size) +{ + if (!dma_in_atomic_pool(start, size)) + return false; + gen_pool_free(atomic_pool, (unsigned long)start, size); + return true; +} + +void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, + gfp_t flags, unsigned long attrs) +{ + struct page *page = NULL; + void *ret, *kaddr; + + size = PAGE_ALIGN(size); + + if (!gfpflags_allow_blocking(flags)) { + ret = dma_alloc_from_pool(size, &page, flags); + if (!ret) + return NULL; + *dma_handle = phys_to_dma(dev, page_to_phys(page)); + return ret; + } + + kaddr = dma_direct_alloc_pages(dev, size, dma_handle, flags, attrs); + if (!kaddr) + return NULL; + page = virt_to_page(kaddr); + + /* remove any dirty cache lines on the kernel alias */ + arch_dma_prep_coherent(page, size); + + /* create a coherent mapping */ + ret = dma_common_contiguous_remap(page, size, VM_USERMAP, + arch_dma_mmap_pgprot(dev, PAGE_KERNEL, attrs), + __builtin_return_address(0)); + if (!ret) + dma_direct_free_pages(dev, size, kaddr, *dma_handle, attrs); + return ret; +} + +void arch_dma_free(struct device *dev, size_t size, void *vaddr, + dma_addr_t dma_handle, unsigned long attrs) +{ + if (!dma_free_from_pool(vaddr, PAGE_ALIGN(size))) { + void *kaddr = phys_to_virt(dma_to_phys(dev, dma_handle)); + + vunmap(vaddr); + dma_direct_free_pages(dev, size, kaddr, dma_handle, attrs); + } +} + +long arch_dma_coherent_to_pfn(struct device *dev, void *cpu_addr, + dma_addr_t dma_addr) +{ + return __phys_to_pfn(dma_to_phys(dev, dma_addr)); +} +#endif /* CONFIG_DMA_DIRECT_REMAP */ From bfd56cd605219d90b210a5377fca31a644efe95c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 4 Nov 2018 17:38:39 +0100 Subject: [PATCH 06/73] dma-mapping: support highmem in the generic remap allocator By using __dma_direct_alloc_pages we can deal entirely with struct page instead of having to derive a kernel virtual address. Signed-off-by: Christoph Hellwig Reviewed-by: Robin Murphy --- kernel/dma/remap.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/kernel/dma/remap.c b/kernel/dma/remap.c index b32bb08f96ae..dcc82dd668f8 100644 --- a/kernel/dma/remap.c +++ b/kernel/dma/remap.c @@ -196,7 +196,7 @@ void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flags, unsigned long attrs) { struct page *page = NULL; - void *ret, *kaddr; + void *ret; size = PAGE_ALIGN(size); @@ -208,10 +208,9 @@ void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, return ret; } - kaddr = dma_direct_alloc_pages(dev, size, dma_handle, flags, attrs); - if (!kaddr) + page = __dma_direct_alloc_pages(dev, size, dma_handle, flags, attrs); + if (!page) return NULL; - page = virt_to_page(kaddr); /* remove any dirty cache lines on the kernel alias */ arch_dma_prep_coherent(page, size); @@ -221,7 +220,7 @@ void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, arch_dma_mmap_pgprot(dev, PAGE_KERNEL, attrs), __builtin_return_address(0)); if (!ret) - dma_direct_free_pages(dev, size, kaddr, *dma_handle, attrs); + __dma_direct_free_pages(dev, size, page); return ret; } @@ -229,10 +228,11 @@ void arch_dma_free(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle, unsigned long attrs) { if (!dma_free_from_pool(vaddr, PAGE_ALIGN(size))) { - void *kaddr = phys_to_virt(dma_to_phys(dev, dma_handle)); + phys_addr_t phys = dma_to_phys(dev, dma_handle); + struct page *page = pfn_to_page(__phys_to_pfn(phys)); vunmap(vaddr); - dma_direct_free_pages(dev, size, kaddr, dma_handle, attrs); + __dma_direct_free_pages(dev, size, page); } } From e440e26a0251b054243b3db6356f3869a9f9c2d1 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 4 Nov 2018 17:51:50 +0100 Subject: [PATCH 07/73] dma-remap: support DMA_ATTR_NO_KERNEL_MAPPING Do not waste vmalloc space on allocations that do not require a mapping into the kernel address space. Signed-off-by: Christoph Hellwig Reviewed-by: Robin Murphy --- kernel/dma/remap.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/kernel/dma/remap.c b/kernel/dma/remap.c index dcc82dd668f8..68a64e3ff6a1 100644 --- a/kernel/dma/remap.c +++ b/kernel/dma/remap.c @@ -200,7 +200,8 @@ void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, size = PAGE_ALIGN(size); - if (!gfpflags_allow_blocking(flags)) { + if (!gfpflags_allow_blocking(flags) && + !(attrs & DMA_ATTR_NO_KERNEL_MAPPING)) { ret = dma_alloc_from_pool(size, &page, flags); if (!ret) return NULL; @@ -215,6 +216,9 @@ void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, /* remove any dirty cache lines on the kernel alias */ arch_dma_prep_coherent(page, size); + if (attrs & DMA_ATTR_NO_KERNEL_MAPPING) + return page; /* opaque cookie */ + /* create a coherent mapping */ ret = dma_common_contiguous_remap(page, size, VM_USERMAP, arch_dma_mmap_pgprot(dev, PAGE_KERNEL, attrs), @@ -227,7 +231,10 @@ void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, void arch_dma_free(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle, unsigned long attrs) { - if (!dma_free_from_pool(vaddr, PAGE_ALIGN(size))) { + if (attrs & DMA_ATTR_NO_KERNEL_MAPPING) { + /* vaddr is a struct page cookie, not a kernel address */ + __dma_direct_free_pages(dev, size, vaddr); + } else if (!dma_free_from_pool(vaddr, PAGE_ALIGN(size))) { phys_addr_t phys = dma_to_phys(dev, dma_handle); struct page *page = pfn_to_page(__phys_to_pfn(phys)); From de90d7c42859d57a59aef3a6ec6e3013bcb337e7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 4 Nov 2018 17:53:47 +0100 Subject: [PATCH 08/73] csky: don't select DMA_NONCOHERENT_OPS This option is gone past Linux 4.19. Signed-off-by: Christoph Hellwig Acked-by: Guo Ren --- arch/csky/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/csky/Kconfig b/arch/csky/Kconfig index 8a30e006a845..c0cf8e948821 100644 --- a/arch/csky/Kconfig +++ b/arch/csky/Kconfig @@ -8,7 +8,6 @@ config CSKY select CLKSRC_MMIO select CLKSRC_OF select DMA_DIRECT_OPS - select DMA_NONCOHERENT_OPS select DMA_REMAP select IRQ_DOMAIN select HANDLE_DOMAIN_IRQ From 576d0d552be803b22867ed98a8619d68b1f78bbe Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 4 Nov 2018 17:46:21 +0100 Subject: [PATCH 09/73] csky: don't use GFP_DMA in atomic_pool_init csky does not implement ZONE_DMA, which means passing GFP_DMA is a no-op. Signed-off-by: Christoph Hellwig Acked-by: Guo Ren --- arch/csky/mm/dma-mapping.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/csky/mm/dma-mapping.c b/arch/csky/mm/dma-mapping.c index 85437b21e045..ad4046939713 100644 --- a/arch/csky/mm/dma-mapping.c +++ b/arch/csky/mm/dma-mapping.c @@ -35,7 +35,7 @@ static int __init atomic_pool_init(void) if (!atomic_pool) BUG(); - page = alloc_pages(GFP_KERNEL | GFP_DMA, get_order(size)); + page = alloc_pages(GFP_KERNEL, get_order(size)); if (!page) BUG(); From f04b951f6c7eccd85ea7750a5fafa68fb98d6bfa Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 4 Nov 2018 17:47:44 +0100 Subject: [PATCH 10/73] csky: use the generic remapping dma alloc implementation The csky code was largely copied from arm/arm64, so switch to the generic arm64-based implementation instead. Signed-off-by: Christoph Hellwig Acked-by: Guo Ren --- arch/csky/Kconfig | 2 +- arch/csky/mm/dma-mapping.c | 142 +------------------------------------ 2 files changed, 3 insertions(+), 141 deletions(-) diff --git a/arch/csky/Kconfig b/arch/csky/Kconfig index c0cf8e948821..ea74f3a9eeaf 100644 --- a/arch/csky/Kconfig +++ b/arch/csky/Kconfig @@ -8,7 +8,7 @@ config CSKY select CLKSRC_MMIO select CLKSRC_OF select DMA_DIRECT_OPS - select DMA_REMAP + select DMA_DIRECT_REMAP select IRQ_DOMAIN select HANDLE_DOMAIN_IRQ select DW_APB_TIMER_OF diff --git a/arch/csky/mm/dma-mapping.c b/arch/csky/mm/dma-mapping.c index ad4046939713..80783bb71c5c 100644 --- a/arch/csky/mm/dma-mapping.c +++ b/arch/csky/mm/dma-mapping.c @@ -14,73 +14,13 @@ #include #include -static struct gen_pool *atomic_pool; -static size_t atomic_pool_size __initdata = SZ_256K; - -static int __init early_coherent_pool(char *p) -{ - atomic_pool_size = memparse(p, &p); - return 0; -} -early_param("coherent_pool", early_coherent_pool); - static int __init atomic_pool_init(void) { - struct page *page; - size_t size = atomic_pool_size; - void *ptr; - int ret; - - atomic_pool = gen_pool_create(PAGE_SHIFT, -1); - if (!atomic_pool) - BUG(); - - page = alloc_pages(GFP_KERNEL, get_order(size)); - if (!page) - BUG(); - - ptr = dma_common_contiguous_remap(page, size, VM_ALLOC, - pgprot_noncached(PAGE_KERNEL), - __builtin_return_address(0)); - if (!ptr) - BUG(); - - ret = gen_pool_add_virt(atomic_pool, (unsigned long)ptr, - page_to_phys(page), atomic_pool_size, -1); - if (ret) - BUG(); - - gen_pool_set_algo(atomic_pool, gen_pool_first_fit_order_align, NULL); - - pr_info("DMA: preallocated %zu KiB pool for atomic coherent pool\n", - atomic_pool_size / 1024); - - pr_info("DMA: vaddr: 0x%x phy: 0x%lx,\n", (unsigned int)ptr, - page_to_phys(page)); - - return 0; + return dma_atomic_pool_init(GFP_KERNEL, pgprot_noncached(PAGE_KERNEL)); } postcore_initcall(atomic_pool_init); -static void *csky_dma_alloc_atomic(struct device *dev, size_t size, - dma_addr_t *dma_handle) -{ - unsigned long addr; - - addr = gen_pool_alloc(atomic_pool, size); - if (addr) - *dma_handle = gen_pool_virt_to_phys(atomic_pool, addr); - - return (void *)addr; -} - -static void csky_dma_free_atomic(struct device *dev, size_t size, void *vaddr, - dma_addr_t dma_handle, unsigned long attrs) -{ - gen_pool_free(atomic_pool, (unsigned long)vaddr, size); -} - -static void __dma_clear_buffer(struct page *page, size_t size) +void arch_dma_prep_coherent(struct page *page, size_t size) { if (PageHighMem(page)) { unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; @@ -107,84 +47,6 @@ static void __dma_clear_buffer(struct page *page, size_t size) } } -static void *csky_dma_alloc_nonatomic(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t gfp, - unsigned long attrs) -{ - void *vaddr; - struct page *page; - unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; - - if (DMA_ATTR_NON_CONSISTENT & attrs) { - pr_err("csky %s can't support DMA_ATTR_NON_CONSISTENT.\n", __func__); - return NULL; - } - - if (IS_ENABLED(CONFIG_DMA_CMA)) - page = dma_alloc_from_contiguous(dev, count, get_order(size), - gfp); - else - page = alloc_pages(gfp, get_order(size)); - - if (!page) { - pr_err("csky %s no more free pages.\n", __func__); - return NULL; - } - - *dma_handle = page_to_phys(page); - - __dma_clear_buffer(page, size); - - if (attrs & DMA_ATTR_NO_KERNEL_MAPPING) - return page; - - vaddr = dma_common_contiguous_remap(page, PAGE_ALIGN(size), VM_USERMAP, - pgprot_noncached(PAGE_KERNEL), __builtin_return_address(0)); - if (!vaddr) - BUG(); - - return vaddr; -} - -static void csky_dma_free_nonatomic( - struct device *dev, - size_t size, - void *vaddr, - dma_addr_t dma_handle, - unsigned long attrs - ) -{ - struct page *page = phys_to_page(dma_handle); - unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; - - if ((unsigned int)vaddr >= VMALLOC_START) - dma_common_free_remap(vaddr, size, VM_USERMAP); - - if (IS_ENABLED(CONFIG_DMA_CMA)) - dma_release_from_contiguous(dev, page, count); - else - __free_pages(page, get_order(size)); -} - -void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, - gfp_t gfp, unsigned long attrs) -{ - if (gfpflags_allow_blocking(gfp)) - return csky_dma_alloc_nonatomic(dev, size, dma_handle, gfp, - attrs); - else - return csky_dma_alloc_atomic(dev, size, dma_handle); -} - -void arch_dma_free(struct device *dev, size_t size, void *vaddr, - dma_addr_t dma_handle, unsigned long attrs) -{ - if (!addr_in_gen_pool(atomic_pool, (unsigned int) vaddr, size)) - csky_dma_free_nonatomic(dev, size, vaddr, dma_handle, attrs); - else - csky_dma_free_atomic(dev, size, vaddr, dma_handle, attrs); -} - static inline void cache_op(phys_addr_t paddr, size_t size, void (*fn)(unsigned long start, unsigned long end)) { From a1da439cc0d929891f0f7372c9e03530c809068c Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Wed, 5 Dec 2018 11:14:01 +0100 Subject: [PATCH 11/73] dma-mapping: fix lack of DMA address assignment in generic remap allocator Commit bfd56cd60521 ("dma-mapping: support highmem in the generic remap allocator") replaced dma_direct_alloc_pages() with __dma_direct_alloc_pages(), which doesn't set dma_handle and zero allocated memory. Fix it by doing this directly in the caller function. Fixes: bfd56cd60521 ("dma-mapping: support highmem in the generic remap allocator") Signed-off-by: Marek Szyprowski Tested-by: Thierry Reding Signed-off-by: Christoph Hellwig --- kernel/dma/remap.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/kernel/dma/remap.c b/kernel/dma/remap.c index 68a64e3ff6a1..8a44317cfc1a 100644 --- a/kernel/dma/remap.c +++ b/kernel/dma/remap.c @@ -223,8 +223,14 @@ void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, ret = dma_common_contiguous_remap(page, size, VM_USERMAP, arch_dma_mmap_pgprot(dev, PAGE_KERNEL, attrs), __builtin_return_address(0)); - if (!ret) + if (!ret) { __dma_direct_free_pages(dev, size, page); + return ret; + } + + *dma_handle = phys_to_dma(dev, page_to_phys(page)); + memset(ret, 0, size); + return ret; } From 42ee3cae0ed38b6c04038bf851ea2496da2135bb Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 21 Nov 2018 18:52:35 +0100 Subject: [PATCH 12/73] dma-mapping: provide a generic DMA_MAPPING_ERROR Error handling of the dma_map_single and dma_map_page APIs is a little problematic at the moment, in that we use different encodings in the returned dma_addr_t to indicate an error. That means we require an additional indirect call to figure out if a dma mapping call returned an error, and a lot of boilerplate code to implement these semantics. Instead return the maximum addressable value as the error. As long as we don't allow mapping single-byte ranges with single-byte alignment this value can never be a valid return. Additionaly if drivers do not check the return value from the dma_map* routines this values means they will generally not be pointed to actual memory. Once the default value is added here we can start removing the various mapping_error methods and just rely on this generic check. Signed-off-by: Christoph Hellwig Reviewed-by: Robin Murphy Acked-by: Russell King Acked-by: Linus Torvalds --- include/linux/dma-mapping.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 1a0edcde7d14..f89d277cc8ed 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -133,6 +133,8 @@ struct dma_map_ops { u64 (*get_required_mask)(struct device *dev); }; +#define DMA_MAPPING_ERROR (~(dma_addr_t)0) + extern const struct dma_map_ops dma_direct_ops; extern const struct dma_map_ops dma_virt_ops; @@ -581,8 +583,11 @@ static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) const struct dma_map_ops *ops = get_dma_ops(dev); debug_dma_mapping_error(dev, dma_addr); + if (ops->mapping_error) return ops->mapping_error(dev, dma_addr); + if (dma_addr == DMA_MAPPING_ERROR) + return 1; return 0; } From b0cbeae4944924640bf550b75487729a20204c14 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 21 Nov 2018 18:52:35 +0100 Subject: [PATCH 13/73] dma-direct: remove the mapping_error dma_map_ops method The dma-direct code already returns (~(dma_addr_t)0x0) on mapping failures, so we can switch over to returning DMA_MAPPING_ERROR and let the core dma-mapping code handle the rest. Signed-off-by: Christoph Hellwig Acked-by: Linus Torvalds --- arch/powerpc/kernel/dma-swiotlb.c | 1 - include/linux/dma-direct.h | 3 --- kernel/dma/direct.c | 8 +------- kernel/dma/swiotlb.c | 11 +++++------ 4 files changed, 6 insertions(+), 17 deletions(-) diff --git a/arch/powerpc/kernel/dma-swiotlb.c b/arch/powerpc/kernel/dma-swiotlb.c index 5fc335f4d9cd..3d8df2cf8be9 100644 --- a/arch/powerpc/kernel/dma-swiotlb.c +++ b/arch/powerpc/kernel/dma-swiotlb.c @@ -59,7 +59,6 @@ const struct dma_map_ops powerpc_swiotlb_dma_ops = { .sync_single_for_device = swiotlb_sync_single_for_device, .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu, .sync_sg_for_device = swiotlb_sync_sg_for_device, - .mapping_error = dma_direct_mapping_error, .get_required_mask = swiotlb_powerpc_get_required, }; diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h index 61b78f934f64..6e5a47ae7d64 100644 --- a/include/linux/dma-direct.h +++ b/include/linux/dma-direct.h @@ -5,8 +5,6 @@ #include #include -#define DIRECT_MAPPING_ERROR (~(dma_addr_t)0) - #ifdef CONFIG_ARCH_HAS_PHYS_TO_DMA #include #else @@ -76,5 +74,4 @@ dma_addr_t dma_direct_map_page(struct device *dev, struct page *page, int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents, enum dma_data_direction dir, unsigned long attrs); int dma_direct_supported(struct device *dev, u64 mask); -int dma_direct_mapping_error(struct device *dev, dma_addr_t dma_addr); #endif /* _LINUX_DMA_DIRECT_H */ diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c index c49849bcced6..308f88a750c8 100644 --- a/kernel/dma/direct.c +++ b/kernel/dma/direct.c @@ -289,7 +289,7 @@ dma_addr_t dma_direct_map_page(struct device *dev, struct page *page, dma_addr_t dma_addr = phys_to_dma(dev, phys); if (!check_addr(dev, dma_addr, size, __func__)) - return DIRECT_MAPPING_ERROR; + return DMA_MAPPING_ERROR; if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) dma_direct_sync_single_for_device(dev, dma_addr, size, dir); @@ -336,11 +336,6 @@ int dma_direct_supported(struct device *dev, u64 mask) return mask >= phys_to_dma(dev, min_mask); } -int dma_direct_mapping_error(struct device *dev, dma_addr_t dma_addr) -{ - return dma_addr == DIRECT_MAPPING_ERROR; -} - const struct dma_map_ops dma_direct_ops = { .alloc = dma_direct_alloc, .free = dma_direct_free, @@ -359,7 +354,6 @@ const struct dma_map_ops dma_direct_ops = { #endif .get_required_mask = dma_direct_get_required_mask, .dma_supported = dma_direct_supported, - .mapping_error = dma_direct_mapping_error, .cache_sync = arch_dma_cache_sync, }; EXPORT_SYMBOL(dma_direct_ops); diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index 045930e32c0e..ff1ce81bb623 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -631,21 +631,21 @@ static dma_addr_t swiotlb_bounce_page(struct device *dev, phys_addr_t *phys, if (unlikely(swiotlb_force == SWIOTLB_NO_FORCE)) { dev_warn_ratelimited(dev, "Cannot do DMA to address %pa\n", phys); - return DIRECT_MAPPING_ERROR; + return DMA_MAPPING_ERROR; } /* Oh well, have to allocate and map a bounce buffer. */ *phys = swiotlb_tbl_map_single(dev, __phys_to_dma(dev, io_tlb_start), *phys, size, dir, attrs); if (*phys == SWIOTLB_MAP_ERROR) - return DIRECT_MAPPING_ERROR; + return DMA_MAPPING_ERROR; /* Ensure that the address returned is DMA'ble */ dma_addr = __phys_to_dma(dev, *phys); if (unlikely(!dma_capable(dev, dma_addr, size))) { swiotlb_tbl_unmap_single(dev, *phys, size, dir, attrs | DMA_ATTR_SKIP_CPU_SYNC); - return DIRECT_MAPPING_ERROR; + return DMA_MAPPING_ERROR; } return dma_addr; @@ -680,7 +680,7 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page, if (!dev_is_dma_coherent(dev) && (attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0 && - dev_addr != DIRECT_MAPPING_ERROR) + dev_addr != DMA_MAPPING_ERROR) arch_sync_dma_for_device(dev, phys, size, dir); return dev_addr; @@ -789,7 +789,7 @@ swiotlb_map_sg_attrs(struct device *dev, struct scatterlist *sgl, int nelems, for_each_sg(sgl, sg, nelems, i) { sg->dma_address = swiotlb_map_page(dev, sg_page(sg), sg->offset, sg->length, dir, attrs); - if (sg->dma_address == DIRECT_MAPPING_ERROR) + if (sg->dma_address == DMA_MAPPING_ERROR) goto out_error; sg_dma_len(sg) = sg->length; } @@ -869,7 +869,6 @@ swiotlb_dma_supported(struct device *hwdev, u64 mask) } const struct dma_map_ops swiotlb_dma_ops = { - .mapping_error = dma_direct_mapping_error, .alloc = dma_direct_alloc, .free = dma_direct_free, .sync_single_for_cpu = swiotlb_sync_single_for_cpu, From 72fd97bf4e75e37552640614a0ea98897fc1dd77 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 21 Nov 2018 18:57:36 +0100 Subject: [PATCH 14/73] arm: remove the mapping_error dma_map_ops method Arm already returns (~(dma_addr_t)0x0) on mapping failures, so we can switch over to returning DMA_MAPPING_ERROR and let the core dma-mapping code handle the rest. Signed-off-by: Christoph Hellwig Acked-by: Russell King Acked-by: Linus Torvalds --- arch/arm/common/dmabounce.c | 12 +++------- arch/arm/include/asm/dma-iommu.h | 2 -- arch/arm/mm/dma-mapping.c | 39 ++++++++++++-------------------- 3 files changed, 18 insertions(+), 35 deletions(-) diff --git a/arch/arm/common/dmabounce.c b/arch/arm/common/dmabounce.c index 9a92de63426f..5ba4622030ca 100644 --- a/arch/arm/common/dmabounce.c +++ b/arch/arm/common/dmabounce.c @@ -257,7 +257,7 @@ static inline dma_addr_t map_single(struct device *dev, void *ptr, size_t size, if (buf == NULL) { dev_err(dev, "%s: unable to map unsafe buffer %p!\n", __func__, ptr); - return ARM_MAPPING_ERROR; + return DMA_MAPPING_ERROR; } dev_dbg(dev, "%s: unsafe buffer %p (dma=%#x) mapped to %p (dma=%#x)\n", @@ -327,7 +327,7 @@ static dma_addr_t dmabounce_map_page(struct device *dev, struct page *page, ret = needs_bounce(dev, dma_addr, size); if (ret < 0) - return ARM_MAPPING_ERROR; + return DMA_MAPPING_ERROR; if (ret == 0) { arm_dma_ops.sync_single_for_device(dev, dma_addr, size, dir); @@ -336,7 +336,7 @@ static dma_addr_t dmabounce_map_page(struct device *dev, struct page *page, if (PageHighMem(page)) { dev_err(dev, "DMA buffer bouncing of HIGHMEM pages is not supported\n"); - return ARM_MAPPING_ERROR; + return DMA_MAPPING_ERROR; } return map_single(dev, page_address(page) + offset, size, dir, attrs); @@ -453,11 +453,6 @@ static int dmabounce_dma_supported(struct device *dev, u64 dma_mask) return arm_dma_ops.dma_supported(dev, dma_mask); } -static int dmabounce_mapping_error(struct device *dev, dma_addr_t dma_addr) -{ - return arm_dma_ops.mapping_error(dev, dma_addr); -} - static const struct dma_map_ops dmabounce_ops = { .alloc = arm_dma_alloc, .free = arm_dma_free, @@ -472,7 +467,6 @@ static const struct dma_map_ops dmabounce_ops = { .sync_sg_for_cpu = arm_dma_sync_sg_for_cpu, .sync_sg_for_device = arm_dma_sync_sg_for_device, .dma_supported = dmabounce_dma_supported, - .mapping_error = dmabounce_mapping_error, }; static int dmabounce_init_pool(struct dmabounce_pool *pool, struct device *dev, diff --git a/arch/arm/include/asm/dma-iommu.h b/arch/arm/include/asm/dma-iommu.h index 6821f1249300..772f48ef84b7 100644 --- a/arch/arm/include/asm/dma-iommu.h +++ b/arch/arm/include/asm/dma-iommu.h @@ -9,8 +9,6 @@ #include #include -#define ARM_MAPPING_ERROR (~(dma_addr_t)0x0) - struct dma_iommu_mapping { /* iommu specific data */ struct iommu_domain *domain; diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 661fe48ab78d..2cfb17bad1e6 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -179,11 +179,6 @@ static void arm_dma_sync_single_for_device(struct device *dev, __dma_page_cpu_to_dev(page, offset, size, dir); } -static int arm_dma_mapping_error(struct device *dev, dma_addr_t dma_addr) -{ - return dma_addr == ARM_MAPPING_ERROR; -} - const struct dma_map_ops arm_dma_ops = { .alloc = arm_dma_alloc, .free = arm_dma_free, @@ -197,7 +192,6 @@ const struct dma_map_ops arm_dma_ops = { .sync_single_for_device = arm_dma_sync_single_for_device, .sync_sg_for_cpu = arm_dma_sync_sg_for_cpu, .sync_sg_for_device = arm_dma_sync_sg_for_device, - .mapping_error = arm_dma_mapping_error, .dma_supported = arm_dma_supported, }; EXPORT_SYMBOL(arm_dma_ops); @@ -217,7 +211,6 @@ const struct dma_map_ops arm_coherent_dma_ops = { .get_sgtable = arm_dma_get_sgtable, .map_page = arm_coherent_dma_map_page, .map_sg = arm_dma_map_sg, - .mapping_error = arm_dma_mapping_error, .dma_supported = arm_dma_supported, }; EXPORT_SYMBOL(arm_coherent_dma_ops); @@ -774,7 +767,7 @@ static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp &= ~(__GFP_COMP); args.gfp = gfp; - *handle = ARM_MAPPING_ERROR; + *handle = DMA_MAPPING_ERROR; allowblock = gfpflags_allow_blocking(gfp); cma = allowblock ? dev_get_cma_area(dev) : false; @@ -1217,7 +1210,7 @@ static inline dma_addr_t __alloc_iova(struct dma_iommu_mapping *mapping, if (i == mapping->nr_bitmaps) { if (extend_iommu_mapping(mapping)) { spin_unlock_irqrestore(&mapping->lock, flags); - return ARM_MAPPING_ERROR; + return DMA_MAPPING_ERROR; } start = bitmap_find_next_zero_area(mapping->bitmaps[i], @@ -1225,7 +1218,7 @@ static inline dma_addr_t __alloc_iova(struct dma_iommu_mapping *mapping, if (start > mapping->bits) { spin_unlock_irqrestore(&mapping->lock, flags); - return ARM_MAPPING_ERROR; + return DMA_MAPPING_ERROR; } bitmap_set(mapping->bitmaps[i], start, count); @@ -1409,7 +1402,7 @@ __iommu_create_mapping(struct device *dev, struct page **pages, size_t size, int i; dma_addr = __alloc_iova(mapping, size); - if (dma_addr == ARM_MAPPING_ERROR) + if (dma_addr == DMA_MAPPING_ERROR) return dma_addr; iova = dma_addr; @@ -1436,7 +1429,7 @@ __iommu_create_mapping(struct device *dev, struct page **pages, size_t size, fail: iommu_unmap(mapping->domain, dma_addr, iova-dma_addr); __free_iova(mapping, dma_addr, size); - return ARM_MAPPING_ERROR; + return DMA_MAPPING_ERROR; } static int __iommu_remove_mapping(struct device *dev, dma_addr_t iova, size_t size) @@ -1497,7 +1490,7 @@ static void *__iommu_alloc_simple(struct device *dev, size_t size, gfp_t gfp, return NULL; *handle = __iommu_create_mapping(dev, &page, size, attrs); - if (*handle == ARM_MAPPING_ERROR) + if (*handle == DMA_MAPPING_ERROR) goto err_mapping; return addr; @@ -1525,7 +1518,7 @@ static void *__arm_iommu_alloc_attrs(struct device *dev, size_t size, struct page **pages; void *addr = NULL; - *handle = ARM_MAPPING_ERROR; + *handle = DMA_MAPPING_ERROR; size = PAGE_ALIGN(size); if (coherent_flag == COHERENT || !gfpflags_allow_blocking(gfp)) @@ -1546,7 +1539,7 @@ static void *__arm_iommu_alloc_attrs(struct device *dev, size_t size, return NULL; *handle = __iommu_create_mapping(dev, pages, size, attrs); - if (*handle == ARM_MAPPING_ERROR) + if (*handle == DMA_MAPPING_ERROR) goto err_buffer; if (attrs & DMA_ATTR_NO_KERNEL_MAPPING) @@ -1696,10 +1689,10 @@ static int __map_sg_chunk(struct device *dev, struct scatterlist *sg, int prot; size = PAGE_ALIGN(size); - *handle = ARM_MAPPING_ERROR; + *handle = DMA_MAPPING_ERROR; iova_base = iova = __alloc_iova(mapping, size); - if (iova == ARM_MAPPING_ERROR) + if (iova == DMA_MAPPING_ERROR) return -ENOMEM; for (count = 0, s = sg; count < (size >> PAGE_SHIFT); s = sg_next(s)) { @@ -1739,7 +1732,7 @@ static int __iommu_map_sg(struct device *dev, struct scatterlist *sg, int nents, for (i = 1; i < nents; i++) { s = sg_next(s); - s->dma_address = ARM_MAPPING_ERROR; + s->dma_address = DMA_MAPPING_ERROR; s->dma_length = 0; if (s->offset || (size & ~PAGE_MASK) || size + s->length > max) { @@ -1914,7 +1907,7 @@ static dma_addr_t arm_coherent_iommu_map_page(struct device *dev, struct page *p int ret, prot, len = PAGE_ALIGN(size + offset); dma_addr = __alloc_iova(mapping, len); - if (dma_addr == ARM_MAPPING_ERROR) + if (dma_addr == DMA_MAPPING_ERROR) return dma_addr; prot = __dma_info_to_prot(dir, attrs); @@ -1926,7 +1919,7 @@ static dma_addr_t arm_coherent_iommu_map_page(struct device *dev, struct page *p return dma_addr + offset; fail: __free_iova(mapping, dma_addr, len); - return ARM_MAPPING_ERROR; + return DMA_MAPPING_ERROR; } /** @@ -2020,7 +2013,7 @@ static dma_addr_t arm_iommu_map_resource(struct device *dev, size_t len = PAGE_ALIGN(size + offset); dma_addr = __alloc_iova(mapping, len); - if (dma_addr == ARM_MAPPING_ERROR) + if (dma_addr == DMA_MAPPING_ERROR) return dma_addr; prot = __dma_info_to_prot(dir, attrs) | IOMMU_MMIO; @@ -2032,7 +2025,7 @@ static dma_addr_t arm_iommu_map_resource(struct device *dev, return dma_addr + offset; fail: __free_iova(mapping, dma_addr, len); - return ARM_MAPPING_ERROR; + return DMA_MAPPING_ERROR; } /** @@ -2105,7 +2098,6 @@ const struct dma_map_ops iommu_ops = { .map_resource = arm_iommu_map_resource, .unmap_resource = arm_iommu_unmap_resource, - .mapping_error = arm_dma_mapping_error, .dma_supported = arm_dma_supported, }; @@ -2124,7 +2116,6 @@ const struct dma_map_ops iommu_coherent_ops = { .map_resource = arm_iommu_map_resource, .unmap_resource = arm_iommu_unmap_resource, - .mapping_error = arm_dma_mapping_error, .dma_supported = arm_dma_supported, }; From d11e3d3d03360cd49497c837490576f793baf746 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 21 Nov 2018 18:56:25 +0100 Subject: [PATCH 15/73] powerpc/iommu: remove the mapping_error dma_map_ops method The powerpc iommu code already returns (~(dma_addr_t)0x0) on mapping failures, so we can switch over to returning DMA_MAPPING_ERROR and let the core dma-mapping code handle the rest. Signed-off-by: Christoph Hellwig Acked-by: Linus Torvalds --- arch/powerpc/include/asm/iommu.h | 4 ---- arch/powerpc/kernel/dma-iommu.c | 6 ------ arch/powerpc/kernel/iommu.c | 28 ++++++++++++++-------------- arch/powerpc/platforms/cell/iommu.c | 1 - arch/powerpc/platforms/pseries/vio.c | 3 +-- 5 files changed, 15 insertions(+), 27 deletions(-) diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h index 35db0cbc9222..55312990d1d2 100644 --- a/arch/powerpc/include/asm/iommu.h +++ b/arch/powerpc/include/asm/iommu.h @@ -143,8 +143,6 @@ struct scatterlist; #ifdef CONFIG_PPC64 -#define IOMMU_MAPPING_ERROR (~(dma_addr_t)0x0) - static inline void set_iommu_table_base(struct device *dev, struct iommu_table *base) { @@ -242,8 +240,6 @@ static inline int __init tce_iommu_bus_notifier_init(void) } #endif /* !CONFIG_IOMMU_API */ -int dma_iommu_mapping_error(struct device *dev, dma_addr_t dma_addr); - #else static inline void *get_iommu_table_base(struct device *dev) diff --git a/arch/powerpc/kernel/dma-iommu.c b/arch/powerpc/kernel/dma-iommu.c index f9fe2080ceb9..5ebacf0fe41a 100644 --- a/arch/powerpc/kernel/dma-iommu.c +++ b/arch/powerpc/kernel/dma-iommu.c @@ -106,11 +106,6 @@ static u64 dma_iommu_get_required_mask(struct device *dev) return mask; } -int dma_iommu_mapping_error(struct device *dev, dma_addr_t dma_addr) -{ - return dma_addr == IOMMU_MAPPING_ERROR; -} - struct dma_map_ops dma_iommu_ops = { .alloc = dma_iommu_alloc_coherent, .free = dma_iommu_free_coherent, @@ -121,6 +116,5 @@ struct dma_map_ops dma_iommu_ops = { .map_page = dma_iommu_map_page, .unmap_page = dma_iommu_unmap_page, .get_required_mask = dma_iommu_get_required_mask, - .mapping_error = dma_iommu_mapping_error, }; EXPORT_SYMBOL(dma_iommu_ops); diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index f0dc680e659a..ca7f73488c62 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -197,11 +197,11 @@ static unsigned long iommu_range_alloc(struct device *dev, if (unlikely(npages == 0)) { if (printk_ratelimit()) WARN_ON(1); - return IOMMU_MAPPING_ERROR; + return DMA_MAPPING_ERROR; } if (should_fail_iommu(dev)) - return IOMMU_MAPPING_ERROR; + return DMA_MAPPING_ERROR; /* * We don't need to disable preemption here because any CPU can @@ -277,7 +277,7 @@ again: } else { /* Give up */ spin_unlock_irqrestore(&(pool->lock), flags); - return IOMMU_MAPPING_ERROR; + return DMA_MAPPING_ERROR; } } @@ -309,13 +309,13 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl, unsigned long attrs) { unsigned long entry; - dma_addr_t ret = IOMMU_MAPPING_ERROR; + dma_addr_t ret = DMA_MAPPING_ERROR; int build_fail; entry = iommu_range_alloc(dev, tbl, npages, NULL, mask, align_order); - if (unlikely(entry == IOMMU_MAPPING_ERROR)) - return IOMMU_MAPPING_ERROR; + if (unlikely(entry == DMA_MAPPING_ERROR)) + return DMA_MAPPING_ERROR; entry += tbl->it_offset; /* Offset into real TCE table */ ret = entry << tbl->it_page_shift; /* Set the return dma address */ @@ -327,12 +327,12 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl, /* tbl->it_ops->set() only returns non-zero for transient errors. * Clean up the table bitmap in this case and return - * IOMMU_MAPPING_ERROR. For all other errors the functionality is + * DMA_MAPPING_ERROR. For all other errors the functionality is * not altered. */ if (unlikely(build_fail)) { __iommu_free(tbl, ret, npages); - return IOMMU_MAPPING_ERROR; + return DMA_MAPPING_ERROR; } /* Flush/invalidate TLB caches if necessary */ @@ -477,7 +477,7 @@ int ppc_iommu_map_sg(struct device *dev, struct iommu_table *tbl, DBG(" - vaddr: %lx, size: %lx\n", vaddr, slen); /* Handle failure */ - if (unlikely(entry == IOMMU_MAPPING_ERROR)) { + if (unlikely(entry == DMA_MAPPING_ERROR)) { if (!(attrs & DMA_ATTR_NO_WARN) && printk_ratelimit()) dev_info(dev, "iommu_alloc failed, tbl %p " @@ -544,7 +544,7 @@ int ppc_iommu_map_sg(struct device *dev, struct iommu_table *tbl, */ if (outcount < incount) { outs = sg_next(outs); - outs->dma_address = IOMMU_MAPPING_ERROR; + outs->dma_address = DMA_MAPPING_ERROR; outs->dma_length = 0; } @@ -562,7 +562,7 @@ int ppc_iommu_map_sg(struct device *dev, struct iommu_table *tbl, npages = iommu_num_pages(s->dma_address, s->dma_length, IOMMU_PAGE_SIZE(tbl)); __iommu_free(tbl, vaddr, npages); - s->dma_address = IOMMU_MAPPING_ERROR; + s->dma_address = DMA_MAPPING_ERROR; s->dma_length = 0; } if (s == outs) @@ -776,7 +776,7 @@ dma_addr_t iommu_map_page(struct device *dev, struct iommu_table *tbl, unsigned long mask, enum dma_data_direction direction, unsigned long attrs) { - dma_addr_t dma_handle = IOMMU_MAPPING_ERROR; + dma_addr_t dma_handle = DMA_MAPPING_ERROR; void *vaddr; unsigned long uaddr; unsigned int npages, align; @@ -796,7 +796,7 @@ dma_addr_t iommu_map_page(struct device *dev, struct iommu_table *tbl, dma_handle = iommu_alloc(dev, tbl, vaddr, npages, direction, mask >> tbl->it_page_shift, align, attrs); - if (dma_handle == IOMMU_MAPPING_ERROR) { + if (dma_handle == DMA_MAPPING_ERROR) { if (!(attrs & DMA_ATTR_NO_WARN) && printk_ratelimit()) { dev_info(dev, "iommu_alloc failed, tbl %p " @@ -868,7 +868,7 @@ void *iommu_alloc_coherent(struct device *dev, struct iommu_table *tbl, io_order = get_iommu_order(size, tbl); mapping = iommu_alloc(dev, tbl, ret, nio_pages, DMA_BIDIRECTIONAL, mask >> tbl->it_page_shift, io_order, 0); - if (mapping == IOMMU_MAPPING_ERROR) { + if (mapping == DMA_MAPPING_ERROR) { free_pages((unsigned long)ret, order); return NULL; } diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c index 12352a58072a..af2a3c15e0ec 100644 --- a/arch/powerpc/platforms/cell/iommu.c +++ b/arch/powerpc/platforms/cell/iommu.c @@ -654,7 +654,6 @@ static const struct dma_map_ops dma_iommu_fixed_ops = { .dma_supported = dma_suported_and_switch, .map_page = dma_fixed_map_page, .unmap_page = dma_fixed_unmap_page, - .mapping_error = dma_iommu_mapping_error, }; static void cell_dma_dev_setup(struct device *dev) diff --git a/arch/powerpc/platforms/pseries/vio.c b/arch/powerpc/platforms/pseries/vio.c index 88f1ad1d6309..a29ad7db918a 100644 --- a/arch/powerpc/platforms/pseries/vio.c +++ b/arch/powerpc/platforms/pseries/vio.c @@ -519,7 +519,7 @@ static dma_addr_t vio_dma_iommu_map_page(struct device *dev, struct page *page, { struct vio_dev *viodev = to_vio_dev(dev); struct iommu_table *tbl; - dma_addr_t ret = IOMMU_MAPPING_ERROR; + dma_addr_t ret = DMA_MAPPING_ERROR; tbl = get_iommu_table_base(dev); if (vio_cmo_alloc(viodev, roundup(size, IOMMU_PAGE_SIZE(tbl)))) { @@ -625,7 +625,6 @@ static const struct dma_map_ops vio_dma_mapping_ops = { .unmap_page = vio_dma_iommu_unmap_page, .dma_supported = vio_dma_iommu_dma_supported, .get_required_mask = vio_dma_get_required_mask, - .mapping_error = dma_iommu_mapping_error, }; /** From 122da4e081be6e854625de21278f5ab6ce103ba9 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 21 Nov 2018 19:18:58 +0100 Subject: [PATCH 16/73] mips/jazz: remove the mapping_error dma_map_ops method The Jazz iommu code already returns (~(dma_addr_t)0x0) on mapping failures, so we can switch over to returning DMA_MAPPING_ERROR and let the core dma-mapping code handle the rest. Signed-off-by: Christoph Hellwig Acked-by: Linus Torvalds --- arch/mips/include/asm/jazzdma.h | 6 ------ arch/mips/jazz/jazzdma.c | 16 +++++----------- 2 files changed, 5 insertions(+), 17 deletions(-) diff --git a/arch/mips/include/asm/jazzdma.h b/arch/mips/include/asm/jazzdma.h index d913439c738c..d13f940022d5 100644 --- a/arch/mips/include/asm/jazzdma.h +++ b/arch/mips/include/asm/jazzdma.h @@ -39,12 +39,6 @@ extern int vdma_get_enable(int channel); #define VDMA_PAGE(a) ((unsigned int)(a) >> 12) #define VDMA_OFFSET(a) ((unsigned int)(a) & (VDMA_PAGESIZE-1)) -/* - * error code returned by vdma_alloc() - * (See also arch/mips/kernel/jazzdma.c) - */ -#define VDMA_ERROR 0xffffffff - /* * VDMA pagetable entry description */ diff --git a/arch/mips/jazz/jazzdma.c b/arch/mips/jazz/jazzdma.c index 4c41ed0a637e..6256d35dbf4d 100644 --- a/arch/mips/jazz/jazzdma.c +++ b/arch/mips/jazz/jazzdma.c @@ -104,12 +104,12 @@ unsigned long vdma_alloc(unsigned long paddr, unsigned long size) if (vdma_debug) printk("vdma_alloc: Invalid physical address: %08lx\n", paddr); - return VDMA_ERROR; /* invalid physical address */ + return DMA_MAPPING_ERROR; /* invalid physical address */ } if (size > 0x400000 || size == 0) { if (vdma_debug) printk("vdma_alloc: Invalid size: %08lx\n", size); - return VDMA_ERROR; /* invalid physical address */ + return DMA_MAPPING_ERROR; /* invalid physical address */ } spin_lock_irqsave(&vdma_lock, flags); @@ -123,7 +123,7 @@ unsigned long vdma_alloc(unsigned long paddr, unsigned long size) first < VDMA_PGTBL_ENTRIES) first++; if (first + pages > VDMA_PGTBL_ENTRIES) { /* nothing free */ spin_unlock_irqrestore(&vdma_lock, flags); - return VDMA_ERROR; + return DMA_MAPPING_ERROR; } last = first + 1; @@ -569,7 +569,7 @@ static void *jazz_dma_alloc(struct device *dev, size_t size, return NULL; *dma_handle = vdma_alloc(virt_to_phys(ret), size); - if (*dma_handle == VDMA_ERROR) { + if (*dma_handle == DMA_MAPPING_ERROR) { dma_direct_free_pages(dev, size, ret, *dma_handle, attrs); return NULL; } @@ -620,7 +620,7 @@ static int jazz_dma_map_sg(struct device *dev, struct scatterlist *sglist, arch_sync_dma_for_device(dev, sg_phys(sg), sg->length, dir); sg->dma_address = vdma_alloc(sg_phys(sg), sg->length); - if (sg->dma_address == VDMA_ERROR) + if (sg->dma_address == DMA_MAPPING_ERROR) return 0; sg_dma_len(sg) = sg->length; } @@ -674,11 +674,6 @@ static void jazz_dma_sync_sg_for_cpu(struct device *dev, arch_sync_dma_for_cpu(dev, sg_phys(sg), sg->length, dir); } -static int jazz_dma_mapping_error(struct device *dev, dma_addr_t dma_addr) -{ - return dma_addr == VDMA_ERROR; -} - const struct dma_map_ops jazz_dma_ops = { .alloc = jazz_dma_alloc, .free = jazz_dma_free, @@ -692,6 +687,5 @@ const struct dma_map_ops jazz_dma_ops = { .sync_sg_for_device = jazz_dma_sync_sg_for_device, .dma_supported = dma_direct_supported, .cache_sync = arch_dma_cache_sync, - .mapping_error = jazz_dma_mapping_error, }; EXPORT_SYMBOL(jazz_dma_ops); From 44899aa31ff64fac370667cb71400ddb434b8951 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 21 Nov 2018 18:58:15 +0100 Subject: [PATCH 17/73] s390: remove the mapping_error dma_map_ops method S390 already returns (~(dma_addr_t)0x0) on mapping failures, so we can switch over to returning DMA_MAPPING_ERROR and let the core dma-mapping code handle the rest. Signed-off-by: Christoph Hellwig Acked-by: Linus Torvalds --- arch/s390/pci/pci_dma.c | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c index d387a0fbdd7e..346ba382193a 100644 --- a/arch/s390/pci/pci_dma.c +++ b/arch/s390/pci/pci_dma.c @@ -15,8 +15,6 @@ #include #include -#define S390_MAPPING_ERROR (~(dma_addr_t) 0x0) - static struct kmem_cache *dma_region_table_cache; static struct kmem_cache *dma_page_table_cache; static int s390_iommu_strict; @@ -301,7 +299,7 @@ static dma_addr_t dma_alloc_address(struct device *dev, int size) out_error: spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags); - return S390_MAPPING_ERROR; + return DMA_MAPPING_ERROR; } static void dma_free_address(struct device *dev, dma_addr_t dma_addr, int size) @@ -349,7 +347,7 @@ static dma_addr_t s390_dma_map_pages(struct device *dev, struct page *page, /* This rounds up number of pages based on size and offset */ nr_pages = iommu_num_pages(pa, size, PAGE_SIZE); dma_addr = dma_alloc_address(dev, nr_pages); - if (dma_addr == S390_MAPPING_ERROR) { + if (dma_addr == DMA_MAPPING_ERROR) { ret = -ENOSPC; goto out_err; } @@ -372,7 +370,7 @@ out_free: out_err: zpci_err("map error:\n"); zpci_err_dma(ret, pa); - return S390_MAPPING_ERROR; + return DMA_MAPPING_ERROR; } static void s390_dma_unmap_pages(struct device *dev, dma_addr_t dma_addr, @@ -449,7 +447,7 @@ static int __s390_dma_map_sg(struct device *dev, struct scatterlist *sg, int ret; dma_addr_base = dma_alloc_address(dev, nr_pages); - if (dma_addr_base == S390_MAPPING_ERROR) + if (dma_addr_base == DMA_MAPPING_ERROR) return -ENOMEM; dma_addr = dma_addr_base; @@ -496,7 +494,7 @@ static int s390_dma_map_sg(struct device *dev, struct scatterlist *sg, for (i = 1; i < nr_elements; i++) { s = sg_next(s); - s->dma_address = S390_MAPPING_ERROR; + s->dma_address = DMA_MAPPING_ERROR; s->dma_length = 0; if (s->offset || (size & ~PAGE_MASK) || @@ -546,11 +544,6 @@ static void s390_dma_unmap_sg(struct device *dev, struct scatterlist *sg, } } -static int s390_mapping_error(struct device *dev, dma_addr_t dma_addr) -{ - return dma_addr == S390_MAPPING_ERROR; -} - int zpci_dma_init_device(struct zpci_dev *zdev) { int rc; @@ -675,7 +668,6 @@ const struct dma_map_ops s390_pci_dma_ops = { .unmap_sg = s390_dma_unmap_sg, .map_page = s390_dma_map_pages, .unmap_page = s390_dma_unmap_pages, - .mapping_error = s390_mapping_error, /* dma_supported is unconditionally true without a callback */ }; EXPORT_SYMBOL_GPL(s390_pci_dma_ops); From 06301c5e0a16fc4de00582986071aae2b62d6f0a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 21 Nov 2018 18:59:05 +0100 Subject: [PATCH 18/73] sparc: remove the mapping_error dma_map_ops method Sparc already returns (~(dma_addr_t)0x0) on mapping failures, so we can switch over to returning DMA_MAPPING_ERROR and let the core dma-mapping code handle the rest. Signed-off-by: Christoph Hellwig Acked-by: Linus Torvalds --- arch/sparc/kernel/iommu.c | 12 +++--------- arch/sparc/kernel/iommu_common.h | 2 -- arch/sparc/kernel/pci_sun4v.c | 14 ++++---------- 3 files changed, 7 insertions(+), 21 deletions(-) diff --git a/arch/sparc/kernel/iommu.c b/arch/sparc/kernel/iommu.c index 40d008b0bd3e..0626bae5e3da 100644 --- a/arch/sparc/kernel/iommu.c +++ b/arch/sparc/kernel/iommu.c @@ -315,7 +315,7 @@ bad: bad_no_ctx: if (printk_ratelimit()) WARN_ON(1); - return SPARC_MAPPING_ERROR; + return DMA_MAPPING_ERROR; } static void strbuf_flush(struct strbuf *strbuf, struct iommu *iommu, @@ -548,7 +548,7 @@ static int dma_4u_map_sg(struct device *dev, struct scatterlist *sglist, if (outcount < incount) { outs = sg_next(outs); - outs->dma_address = SPARC_MAPPING_ERROR; + outs->dma_address = DMA_MAPPING_ERROR; outs->dma_length = 0; } @@ -574,7 +574,7 @@ iommu_map_failed: iommu_tbl_range_free(&iommu->tbl, vaddr, npages, IOMMU_ERROR_CODE); - s->dma_address = SPARC_MAPPING_ERROR; + s->dma_address = DMA_MAPPING_ERROR; s->dma_length = 0; } if (s == outs) @@ -742,11 +742,6 @@ static void dma_4u_sync_sg_for_cpu(struct device *dev, spin_unlock_irqrestore(&iommu->lock, flags); } -static int dma_4u_mapping_error(struct device *dev, dma_addr_t dma_addr) -{ - return dma_addr == SPARC_MAPPING_ERROR; -} - static int dma_4u_supported(struct device *dev, u64 device_mask) { struct iommu *iommu = dev->archdata.iommu; @@ -772,7 +767,6 @@ static const struct dma_map_ops sun4u_dma_ops = { .sync_single_for_cpu = dma_4u_sync_single_for_cpu, .sync_sg_for_cpu = dma_4u_sync_sg_for_cpu, .dma_supported = dma_4u_supported, - .mapping_error = dma_4u_mapping_error, }; const struct dma_map_ops *dma_ops = &sun4u_dma_ops; diff --git a/arch/sparc/kernel/iommu_common.h b/arch/sparc/kernel/iommu_common.h index e3c02ba32500..d62ed9c5682d 100644 --- a/arch/sparc/kernel/iommu_common.h +++ b/arch/sparc/kernel/iommu_common.h @@ -48,6 +48,4 @@ static inline int is_span_boundary(unsigned long entry, return iommu_is_span_boundary(entry, nr, shift, boundary_size); } -#define SPARC_MAPPING_ERROR (~(dma_addr_t)0x0) - #endif /* _IOMMU_COMMON_H */ diff --git a/arch/sparc/kernel/pci_sun4v.c b/arch/sparc/kernel/pci_sun4v.c index 565d9ac883d0..fa0e42b4cbfb 100644 --- a/arch/sparc/kernel/pci_sun4v.c +++ b/arch/sparc/kernel/pci_sun4v.c @@ -414,12 +414,12 @@ static dma_addr_t dma_4v_map_page(struct device *dev, struct page *page, bad: if (printk_ratelimit()) WARN_ON(1); - return SPARC_MAPPING_ERROR; + return DMA_MAPPING_ERROR; iommu_map_fail: local_irq_restore(flags); iommu_tbl_range_free(tbl, bus_addr, npages, IOMMU_ERROR_CODE); - return SPARC_MAPPING_ERROR; + return DMA_MAPPING_ERROR; } static void dma_4v_unmap_page(struct device *dev, dma_addr_t bus_addr, @@ -592,7 +592,7 @@ static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist, if (outcount < incount) { outs = sg_next(outs); - outs->dma_address = SPARC_MAPPING_ERROR; + outs->dma_address = DMA_MAPPING_ERROR; outs->dma_length = 0; } @@ -609,7 +609,7 @@ iommu_map_failed: iommu_tbl_range_free(tbl, vaddr, npages, IOMMU_ERROR_CODE); /* XXX demap? XXX */ - s->dma_address = SPARC_MAPPING_ERROR; + s->dma_address = DMA_MAPPING_ERROR; s->dma_length = 0; } if (s == outs) @@ -688,11 +688,6 @@ static int dma_4v_supported(struct device *dev, u64 device_mask) return pci64_dma_supported(to_pci_dev(dev), device_mask); } -static int dma_4v_mapping_error(struct device *dev, dma_addr_t dma_addr) -{ - return dma_addr == SPARC_MAPPING_ERROR; -} - static const struct dma_map_ops sun4v_dma_ops = { .alloc = dma_4v_alloc_coherent, .free = dma_4v_free_coherent, @@ -701,7 +696,6 @@ static const struct dma_map_ops sun4v_dma_ops = { .map_sg = dma_4v_map_sg, .unmap_sg = dma_4v_unmap_sg, .dma_supported = dma_4v_supported, - .mapping_error = dma_4v_mapping_error, }; static void pci_sun4v_scan_bus(struct pci_pbm_info *pbm, struct device *parent) From 748c3c4d13232c96a9b173a4cc46406e12b1c8e5 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 21 Nov 2018 18:59:50 +0100 Subject: [PATCH 19/73] parisc/ccio: remove the mapping_error dma_map_ops method The CCIO iommu code already returns (~(dma_addr_t)0x0) on mapping failures, so we can switch over to returning DMA_MAPPING_ERROR and let the core dma-mapping code handle the rest. Signed-off-by: Christoph Hellwig Acked-by: Linus Torvalds --- drivers/parisc/ccio-dma.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/drivers/parisc/ccio-dma.c b/drivers/parisc/ccio-dma.c index 701a7d6a74d5..714aac72df0e 100644 --- a/drivers/parisc/ccio-dma.c +++ b/drivers/parisc/ccio-dma.c @@ -110,8 +110,6 @@ #define CMD_TLB_DIRECT_WRITE 35 /* IO_COMMAND for I/O TLB Writes */ #define CMD_TLB_PURGE 33 /* IO_COMMAND to Purge I/O TLB entry */ -#define CCIO_MAPPING_ERROR (~(dma_addr_t)0) - struct ioa_registers { /* Runway Supervisory Set */ int32_t unused1[12]; @@ -740,7 +738,7 @@ ccio_map_single(struct device *dev, void *addr, size_t size, BUG_ON(!dev); ioc = GET_IOC(dev); if (!ioc) - return CCIO_MAPPING_ERROR; + return DMA_MAPPING_ERROR; BUG_ON(size <= 0); @@ -1021,11 +1019,6 @@ ccio_unmap_sg(struct device *dev, struct scatterlist *sglist, int nents, DBG_RUN_SG("%s() DONE (nents %d)\n", __func__, nents); } -static int ccio_mapping_error(struct device *dev, dma_addr_t dma_addr) -{ - return dma_addr == CCIO_MAPPING_ERROR; -} - static const struct dma_map_ops ccio_ops = { .dma_supported = ccio_dma_supported, .alloc = ccio_alloc, @@ -1034,7 +1027,6 @@ static const struct dma_map_ops ccio_ops = { .unmap_page = ccio_unmap_page, .map_sg = ccio_map_sg, .unmap_sg = ccio_unmap_sg, - .mapping_error = ccio_mapping_error, }; #ifdef CONFIG_PROC_FS From fb1b53f16c5e457eac8837407da37f29bf0a509a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 21 Nov 2018 19:00:17 +0100 Subject: [PATCH 20/73] parisc/sba_iommu: remove the mapping_error dma_map_ops method The SBA iommu code already returns (~(dma_addr_t)0x0) on mapping failures, so we can switch over to returning DMA_MAPPING_ERROR and let the core dma-mapping code handle the rest. Signed-off-by: Christoph Hellwig Acked-by: Linus Torvalds --- drivers/parisc/sba_iommu.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/drivers/parisc/sba_iommu.c b/drivers/parisc/sba_iommu.c index c1e599a429af..452d306ce5cb 100644 --- a/drivers/parisc/sba_iommu.c +++ b/drivers/parisc/sba_iommu.c @@ -93,8 +93,6 @@ #define DEFAULT_DMA_HINT_REG 0 -#define SBA_MAPPING_ERROR (~(dma_addr_t)0) - struct sba_device *sba_list; EXPORT_SYMBOL_GPL(sba_list); @@ -725,7 +723,7 @@ sba_map_single(struct device *dev, void *addr, size_t size, ioc = GET_IOC(dev); if (!ioc) - return SBA_MAPPING_ERROR; + return DMA_MAPPING_ERROR; /* save offset bits */ offset = ((dma_addr_t) (long) addr) & ~IOVP_MASK; @@ -1080,11 +1078,6 @@ sba_unmap_sg(struct device *dev, struct scatterlist *sglist, int nents, } -static int sba_mapping_error(struct device *dev, dma_addr_t dma_addr) -{ - return dma_addr == SBA_MAPPING_ERROR; -} - static const struct dma_map_ops sba_ops = { .dma_supported = sba_dma_supported, .alloc = sba_alloc, @@ -1093,7 +1086,6 @@ static const struct dma_map_ops sba_ops = { .unmap_page = sba_unmap_page, .map_sg = sba_map_sg, .unmap_sg = sba_unmap_sg, - .mapping_error = sba_mapping_error, }; From 52f0b3ee0b2caab04e7a1db1cb4009a277a802af Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 21 Nov 2018 19:04:31 +0100 Subject: [PATCH 21/73] arm64: remove the dummy_dma_ops mapping_error method Just return DMA_MAPPING_ERROR from __dummy_map_page and let the core dma-mapping code handle the rest. Signed-off-by: Christoph Hellwig Acked-by: Linus Torvalds --- arch/arm64/mm/dma-mapping.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index e2e7e5d0f94e..3c2c088a3562 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -119,7 +119,7 @@ static dma_addr_t __dummy_map_page(struct device *dev, struct page *page, enum dma_data_direction dir, unsigned long attrs) { - return 0; + return DMA_MAPPING_ERROR; } static void __dummy_unmap_page(struct device *dev, dma_addr_t dev_addr, @@ -154,11 +154,6 @@ static void __dummy_sync_sg(struct device *dev, { } -static int __dummy_mapping_error(struct device *hwdev, dma_addr_t dma_addr) -{ - return 1; -} - static int __dummy_dma_supported(struct device *hwdev, u64 mask) { return 0; @@ -176,7 +171,6 @@ const struct dma_map_ops dummy_dma_ops = { .sync_single_for_device = __dummy_sync_single, .sync_sg_for_cpu = __dummy_sync_sg, .sync_sg_for_device = __dummy_sync_sg, - .mapping_error = __dummy_mapping_error, .dma_supported = __dummy_dma_supported, }; EXPORT_SYMBOL(dummy_dma_ops); From a20388be321469afb8b726b455c3e7c77a7304dd Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 21 Nov 2018 19:04:03 +0100 Subject: [PATCH 22/73] alpha: remove the mapping_error dma_map_ops method Return DMA_MAPPING_ERROR instead of 0 on a dma mapping failure and let the core dma-mapping code handle the rest. Signed-off-by: Christoph Hellwig Acked-by: Linus Torvalds --- arch/alpha/kernel/pci_iommu.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/arch/alpha/kernel/pci_iommu.c b/arch/alpha/kernel/pci_iommu.c index 46e08e0d9181..e1716e0d92fd 100644 --- a/arch/alpha/kernel/pci_iommu.c +++ b/arch/alpha/kernel/pci_iommu.c @@ -291,7 +291,7 @@ pci_map_single_1(struct pci_dev *pdev, void *cpu_addr, size_t size, use direct_map above, it now must be considered an error. */ if (! alpha_mv.mv_pci_tbi) { printk_once(KERN_WARNING "pci_map_single: no HW sg\n"); - return 0; + return DMA_MAPPING_ERROR; } arena = hose->sg_pci; @@ -307,7 +307,7 @@ pci_map_single_1(struct pci_dev *pdev, void *cpu_addr, size_t size, if (dma_ofs < 0) { printk(KERN_WARNING "pci_map_single failed: " "could not allocate dma page tables\n"); - return 0; + return DMA_MAPPING_ERROR; } paddr &= PAGE_MASK; @@ -455,7 +455,7 @@ try_again: memset(cpu_addr, 0, size); *dma_addrp = pci_map_single_1(pdev, cpu_addr, size, 0); - if (*dma_addrp == 0) { + if (*dma_addrp == DMA_MAPPING_ERROR) { free_pages((unsigned long)cpu_addr, order); if (alpha_mv.mv_pci_tbi || (gfp & GFP_DMA)) return NULL; @@ -671,7 +671,7 @@ static int alpha_pci_map_sg(struct device *dev, struct scatterlist *sg, sg->dma_address = pci_map_single_1(pdev, SG_ENT_VIRT_ADDRESS(sg), sg->length, dac_allowed); - return sg->dma_address != 0; + return sg->dma_address != DMA_MAPPING_ERROR; } start = sg; @@ -935,11 +935,6 @@ iommu_unbind(struct pci_iommu_arena *arena, long pg_start, long pg_count) return 0; } -static int alpha_pci_mapping_error(struct device *dev, dma_addr_t dma_addr) -{ - return dma_addr == 0; -} - const struct dma_map_ops alpha_pci_ops = { .alloc = alpha_pci_alloc_coherent, .free = alpha_pci_free_coherent, @@ -947,7 +942,6 @@ const struct dma_map_ops alpha_pci_ops = { .unmap_page = alpha_pci_unmap_page, .map_sg = alpha_pci_map_sg, .unmap_sg = alpha_pci_unmap_sg, - .mapping_error = alpha_pci_mapping_error, .dma_supported = alpha_pci_supported, }; EXPORT_SYMBOL(alpha_pci_ops); From 52aee3e83d44bcf586bc466c07feeb5a6cf91be0 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 21 Nov 2018 19:06:58 +0100 Subject: [PATCH 23/73] ia64/sba_iommu: improve internal map_page users Remove the odd sba_{un,}map_single_attrs wrappers, check errors everywhere. Signed-off-by: Christoph Hellwig Acked-by: Linus Torvalds --- arch/ia64/hp/common/sba_iommu.c | 73 +++++++++++++-------------------- 1 file changed, 29 insertions(+), 44 deletions(-) diff --git a/arch/ia64/hp/common/sba_iommu.c b/arch/ia64/hp/common/sba_iommu.c index e8a93b07283e..c56f28c98102 100644 --- a/arch/ia64/hp/common/sba_iommu.c +++ b/arch/ia64/hp/common/sba_iommu.c @@ -907,11 +907,12 @@ sba_mark_invalid(struct ioc *ioc, dma_addr_t iova, size_t byte_cnt) } /** - * sba_map_single_attrs - map one buffer and return IOVA for DMA + * sba_map_page - map one buffer and return IOVA for DMA * @dev: instance of PCI owned by the driver that's asking. - * @addr: driver buffer to map. - * @size: number of bytes to map in driver buffer. - * @dir: R/W or both. + * @page: page to map + * @poff: offset into page + * @size: number of bytes to map + * @dir: dma direction * @attrs: optional dma attributes * * See Documentation/DMA-API-HOWTO.txt @@ -944,7 +945,7 @@ static dma_addr_t sba_map_page(struct device *dev, struct page *page, ** Device is bit capable of DMA'ing to the buffer... ** just return the PCI address of ptr */ - DBG_BYPASS("sba_map_single_attrs() bypass mask/addr: " + DBG_BYPASS("sba_map_page() bypass mask/addr: " "0x%lx/0x%lx\n", to_pci_dev(dev)->dma_mask, pci_addr); return pci_addr; @@ -966,7 +967,7 @@ static dma_addr_t sba_map_page(struct device *dev, struct page *page, #ifdef ASSERT_PDIR_SANITY spin_lock_irqsave(&ioc->res_lock, flags); - if (sba_check_pdir(ioc,"Check before sba_map_single_attrs()")) + if (sba_check_pdir(ioc,"Check before sba_map_page()")) panic("Sanity check failed"); spin_unlock_irqrestore(&ioc->res_lock, flags); #endif @@ -997,20 +998,12 @@ static dma_addr_t sba_map_page(struct device *dev, struct page *page, /* form complete address */ #ifdef ASSERT_PDIR_SANITY spin_lock_irqsave(&ioc->res_lock, flags); - sba_check_pdir(ioc,"Check after sba_map_single_attrs()"); + sba_check_pdir(ioc,"Check after sba_map_page()"); spin_unlock_irqrestore(&ioc->res_lock, flags); #endif return SBA_IOVA(ioc, iovp, offset); } -static dma_addr_t sba_map_single_attrs(struct device *dev, void *addr, - size_t size, enum dma_data_direction dir, - unsigned long attrs) -{ - return sba_map_page(dev, virt_to_page(addr), - (unsigned long)addr & ~PAGE_MASK, size, dir, attrs); -} - #ifdef ENABLE_MARK_CLEAN static SBA_INLINE void sba_mark_clean(struct ioc *ioc, dma_addr_t iova, size_t size) @@ -1036,7 +1029,7 @@ sba_mark_clean(struct ioc *ioc, dma_addr_t iova, size_t size) #endif /** - * sba_unmap_single_attrs - unmap one IOVA and free resources + * sba_unmap_page - unmap one IOVA and free resources * @dev: instance of PCI owned by the driver that's asking. * @iova: IOVA of driver buffer previously mapped. * @size: number of bytes mapped in driver buffer. @@ -1063,7 +1056,7 @@ static void sba_unmap_page(struct device *dev, dma_addr_t iova, size_t size, /* ** Address does not fall w/in IOVA, must be bypassing */ - DBG_BYPASS("sba_unmap_single_attrs() bypass addr: 0x%lx\n", + DBG_BYPASS("sba_unmap_page() bypass addr: 0x%lx\n", iova); #ifdef ENABLE_MARK_CLEAN @@ -1114,12 +1107,6 @@ static void sba_unmap_page(struct device *dev, dma_addr_t iova, size_t size, #endif /* DELAYED_RESOURCE_CNT == 0 */ } -void sba_unmap_single_attrs(struct device *dev, dma_addr_t iova, size_t size, - enum dma_data_direction dir, unsigned long attrs) -{ - sba_unmap_page(dev, iova, size, dir, attrs); -} - /** * sba_alloc_coherent - allocate/map shared mem for DMA * @dev: instance of PCI owned by the driver that's asking. @@ -1132,30 +1119,24 @@ static void * sba_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flags, unsigned long attrs) { + struct page *page; struct ioc *ioc; + int node = -1; void *addr; ioc = GET_IOC(dev); ASSERT(ioc); - #ifdef CONFIG_NUMA - { - struct page *page; - - page = alloc_pages_node(ioc->node, flags, get_order(size)); - if (unlikely(!page)) - return NULL; - - addr = page_address(page); - } -#else - addr = (void *) __get_free_pages(flags, get_order(size)); + node = ioc->node; #endif - if (unlikely(!addr)) + + page = alloc_pages_node(node, flags, get_order(size)); + if (unlikely(!page)) return NULL; + addr = page_address(page); memset(addr, 0, size); - *dma_handle = virt_to_phys(addr); + *dma_handle = page_to_phys(page); #ifdef ALLOW_IOV_BYPASS ASSERT(dev->coherent_dma_mask); @@ -1174,9 +1155,10 @@ sba_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, * If device can't bypass or bypass is disabled, pass the 32bit fake * device to map single to get an iova mapping. */ - *dma_handle = sba_map_single_attrs(&ioc->sac_only_dev->dev, addr, - size, 0, 0); - + *dma_handle = sba_map_page(&ioc->sac_only_dev->dev, page, 0, size, + DMA_BIDIRECTIONAL, 0); + if (dma_mapping_error(dev, *dma_handle)) + return NULL; return addr; } @@ -1193,7 +1175,7 @@ sba_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, static void sba_free_coherent(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle, unsigned long attrs) { - sba_unmap_single_attrs(dev, dma_handle, size, 0, 0); + sba_unmap_page(dev, dma_handle, size, 0, 0); free_pages((unsigned long) vaddr, get_order(size)); } @@ -1483,7 +1465,10 @@ static int sba_map_sg_attrs(struct device *dev, struct scatterlist *sglist, /* Fast path single entry scatterlists. */ if (nents == 1) { sglist->dma_length = sglist->length; - sglist->dma_address = sba_map_single_attrs(dev, sba_sg_address(sglist), sglist->length, dir, attrs); + sglist->dma_address = sba_map_page(dev, sg_page(sglist), + sglist->offset, sglist->length, dir, attrs); + if (dma_mapping_error(dev, sglist->dma_address)) + return 0; return 1; } @@ -1572,8 +1557,8 @@ static void sba_unmap_sg_attrs(struct device *dev, struct scatterlist *sglist, while (nents && sglist->dma_length) { - sba_unmap_single_attrs(dev, sglist->dma_address, - sglist->dma_length, dir, attrs); + sba_unmap_page(dev, sglist->dma_address, sglist->dma_length, + dir, attrs); sglist = sg_next(sglist); nents--; } From 07256950cd69a31d79bfd605a7023c61cf09b161 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 21 Nov 2018 19:16:38 +0100 Subject: [PATCH 24/73] ia64/sba_iommu: remove the mapping_error dma_map_ops method Return DMA_MAPPING_ERROR instead of 0 on a dma mapping failure and let the core dma-mapping code handle the rest. Signed-off-by: Christoph Hellwig Acked-by: Linus Torvalds --- arch/ia64/hp/common/sba_iommu.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/arch/ia64/hp/common/sba_iommu.c b/arch/ia64/hp/common/sba_iommu.c index c56f28c98102..0d21c0b5b23d 100644 --- a/arch/ia64/hp/common/sba_iommu.c +++ b/arch/ia64/hp/common/sba_iommu.c @@ -974,7 +974,7 @@ static dma_addr_t sba_map_page(struct device *dev, struct page *page, pide = sba_alloc_range(ioc, dev, size); if (pide < 0) - return 0; + return DMA_MAPPING_ERROR; iovp = (dma_addr_t) pide << iovp_shift; @@ -2155,11 +2155,6 @@ static int sba_dma_supported (struct device *dev, u64 mask) return ((mask & 0xFFFFFFFFUL) == 0xFFFFFFFFUL); } -static int sba_dma_mapping_error(struct device *dev, dma_addr_t dma_addr) -{ - return 0; -} - __setup("nosbagart", nosbagart); static int __init @@ -2193,7 +2188,6 @@ const struct dma_map_ops sba_dma_ops = { .map_sg = sba_map_sg_attrs, .unmap_sg = sba_unmap_sg_attrs, .dma_supported = sba_dma_supported, - .mapping_error = sba_dma_mapping_error, }; void sba_dma_init(void) From 608b9761a06089ac22bb157bc70d16e9df12ca7c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 21 Nov 2018 19:18:10 +0100 Subject: [PATCH 25/73] ia64/sn: remove the mapping_error dma_map_ops method Return DMA_MAPPING_ERROR instead of 0 on a dma mapping failure and let the core dma-mapping code handle the rest. Signed-off-by: Christoph Hellwig Acked-by: Linus Torvalds --- arch/ia64/sn/pci/pci_dma.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/arch/ia64/sn/pci/pci_dma.c b/arch/ia64/sn/pci/pci_dma.c index 4ce4ee4ef9f2..b7d42e4edc1f 100644 --- a/arch/ia64/sn/pci/pci_dma.c +++ b/arch/ia64/sn/pci/pci_dma.c @@ -196,7 +196,7 @@ static dma_addr_t sn_dma_map_page(struct device *dev, struct page *page, if (!dma_addr) { printk(KERN_ERR "%s: out of ATEs\n", __func__); - return 0; + return DMA_MAPPING_ERROR; } return dma_addr; } @@ -314,11 +314,6 @@ static int sn_dma_map_sg(struct device *dev, struct scatterlist *sgl, return nhwentries; } -static int sn_dma_mapping_error(struct device *dev, dma_addr_t dma_addr) -{ - return 0; -} - static u64 sn_dma_get_required_mask(struct device *dev) { return DMA_BIT_MASK(64); @@ -441,7 +436,6 @@ static struct dma_map_ops sn_dma_ops = { .unmap_page = sn_dma_unmap_page, .map_sg = sn_dma_map_sg, .unmap_sg = sn_dma_unmap_sg, - .mapping_error = sn_dma_mapping_error, .dma_supported = sn_dma_supported, .get_required_mask = sn_dma_get_required_mask, }; From 9e8aa6b5461be50197e0bebc040e9fb6029a3d6b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 21 Nov 2018 19:20:44 +0100 Subject: [PATCH 26/73] x86/amd_gart: remove the mapping_error dma_map_ops method Return DMA_MAPPING_ERROR instead of the magic bad_dma_addr on a dma mapping failure and let the core dma-mapping code handle the rest. Remove the magic EMERGENCY_PAGES that the bad_dma_addr gets redirected to. Signed-off-by: Christoph Hellwig Acked-by: Linus Torvalds --- arch/x86/kernel/amd_gart_64.c | 40 ++++++----------------------------- 1 file changed, 6 insertions(+), 34 deletions(-) diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c index 3f9d1b4019bb..ad84f13d0c1f 100644 --- a/arch/x86/kernel/amd_gart_64.c +++ b/arch/x86/kernel/amd_gart_64.c @@ -50,8 +50,6 @@ static unsigned long iommu_pages; /* .. and in pages */ static u32 *iommu_gatt_base; /* Remapping table */ -static dma_addr_t bad_dma_addr; - /* * If this is disabled the IOMMU will use an optimized flushing strategy * of only flushing when an mapping is reused. With it true the GART is @@ -74,8 +72,6 @@ static u32 gart_unmapped_entry; (((x) & 0xfffff000) | (((x) >> 32) << 4) | GPTE_VALID | GPTE_COHERENT) #define GPTE_DECODE(x) (((x) & 0xfffff000) | (((u64)(x) & 0xff0) << 28)) -#define EMERGENCY_PAGES 32 /* = 128KB */ - #ifdef CONFIG_AGP #define AGPEXTERN extern #else @@ -184,14 +180,6 @@ static void iommu_full(struct device *dev, size_t size, int dir) */ dev_err(dev, "PCI-DMA: Out of IOMMU space for %lu bytes\n", size); - - if (size > PAGE_SIZE*EMERGENCY_PAGES) { - if (dir == PCI_DMA_FROMDEVICE || dir == PCI_DMA_BIDIRECTIONAL) - panic("PCI-DMA: Memory would be corrupted\n"); - if (dir == PCI_DMA_TODEVICE || dir == PCI_DMA_BIDIRECTIONAL) - panic(KERN_ERR - "PCI-DMA: Random memory would be DMAed\n"); - } #ifdef CONFIG_IOMMU_LEAK dump_leak(); #endif @@ -220,7 +208,7 @@ static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem, int i; if (unlikely(phys_mem + size > GART_MAX_PHYS_ADDR)) - return bad_dma_addr; + return DMA_MAPPING_ERROR; iommu_page = alloc_iommu(dev, npages, align_mask); if (iommu_page == -1) { @@ -229,7 +217,7 @@ static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem, if (panic_on_overflow) panic("dma_map_area overflow %lu bytes\n", size); iommu_full(dev, size, dir); - return bad_dma_addr; + return DMA_MAPPING_ERROR; } for (i = 0; i < npages; i++) { @@ -271,7 +259,7 @@ static void gart_unmap_page(struct device *dev, dma_addr_t dma_addr, int npages; int i; - if (dma_addr < iommu_bus_base + EMERGENCY_PAGES*PAGE_SIZE || + if (dma_addr == DMA_MAPPING_ERROR || dma_addr >= iommu_bus_base + iommu_size) return; @@ -315,7 +303,7 @@ static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg, if (nonforced_iommu(dev, addr, s->length)) { addr = dma_map_area(dev, addr, s->length, dir, 0); - if (addr == bad_dma_addr) { + if (addr == DMA_MAPPING_ERROR) { if (i > 0) gart_unmap_sg(dev, sg, i, dir, 0); nents = 0; @@ -471,7 +459,7 @@ error: iommu_full(dev, pages << PAGE_SHIFT, dir); for_each_sg(sg, s, nents, i) - s->dma_address = bad_dma_addr; + s->dma_address = DMA_MAPPING_ERROR; return 0; } @@ -490,7 +478,7 @@ gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr, *dma_addr = dma_map_area(dev, virt_to_phys(vaddr), size, DMA_BIDIRECTIONAL, (1UL << get_order(size)) - 1); flush_gart(); - if (unlikely(*dma_addr == bad_dma_addr)) + if (unlikely(*dma_addr == DMA_MAPPING_ERROR)) goto out_free; return vaddr; out_free: @@ -507,11 +495,6 @@ gart_free_coherent(struct device *dev, size_t size, void *vaddr, dma_direct_free_pages(dev, size, vaddr, dma_addr, attrs); } -static int gart_mapping_error(struct device *dev, dma_addr_t dma_addr) -{ - return (dma_addr == bad_dma_addr); -} - static int no_agp; static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size) @@ -695,7 +678,6 @@ static const struct dma_map_ops gart_dma_ops = { .unmap_page = gart_unmap_page, .alloc = gart_alloc_coherent, .free = gart_free_coherent, - .mapping_error = gart_mapping_error, .dma_supported = dma_direct_supported, }; @@ -730,7 +712,6 @@ int __init gart_iommu_init(void) unsigned long aper_base, aper_size; unsigned long start_pfn, end_pfn; unsigned long scratch; - long i; if (!amd_nb_has_feature(AMD_NB_GART)) return 0; @@ -784,19 +765,12 @@ int __init gart_iommu_init(void) } #endif - /* - * Out of IOMMU space handling. - * Reserve some invalid pages at the beginning of the GART. - */ - bitmap_set(iommu_gart_bitmap, 0, EMERGENCY_PAGES); - pr_info("PCI-DMA: Reserving %luMB of IOMMU area in the AGP aperture\n", iommu_size >> 20); agp_memory_reserved = iommu_size; iommu_start = aper_size - iommu_size; iommu_bus_base = info.aper_base + iommu_start; - bad_dma_addr = iommu_bus_base; iommu_gatt_base = agp_gatt_table + (iommu_start>>PAGE_SHIFT); /* @@ -838,8 +812,6 @@ int __init gart_iommu_init(void) if (!scratch) panic("Cannot allocate iommu scratch page"); gart_unmapped_entry = GPTE_ENCODE(__pa(scratch)); - for (i = EMERGENCY_PAGES; i < iommu_pages; i++) - iommu_gatt_base[i] = gart_unmapped_entry; flush_gart(); dma_ops = &gart_dma_ops; From 887712a0a5b31e0cf28087f6445de431b4722e52 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 21 Nov 2018 19:24:00 +0100 Subject: [PATCH 27/73] x86/calgary: remove the mapping_error dma_map_ops method Return DMA_MAPPING_ERROR instead of the magic bad_dma_addr on a dma mapping failure and let the core dma-mapping code handle the rest. Remove the magic EMERGENCY_PAGES that the bad_dma_addr gets redirected to. Signed-off-by: Christoph Hellwig Acked-by: Linus Torvalds --- arch/x86/kernel/pci-calgary_64.c | 30 +++++++----------------------- 1 file changed, 7 insertions(+), 23 deletions(-) diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index bbfc8b1e9104..c70720f61a34 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c @@ -51,8 +51,6 @@ #include #include -#define CALGARY_MAPPING_ERROR 0 - #ifdef CONFIG_CALGARY_IOMMU_ENABLED_BY_DEFAULT int use_calgary __read_mostly = 1; #else @@ -157,8 +155,6 @@ static const unsigned long phb_debug_offsets[] = { #define PHB_DEBUG_STUFF_OFFSET 0x0020 -#define EMERGENCY_PAGES 32 /* = 128KB */ - unsigned int specified_table_size = TCE_TABLE_SIZE_UNSPECIFIED; static int translate_empty_slots __read_mostly = 0; static int calgary_detected __read_mostly = 0; @@ -255,7 +251,7 @@ static unsigned long iommu_range_alloc(struct device *dev, if (panic_on_overflow) panic("Calgary: fix the allocator.\n"); else - return CALGARY_MAPPING_ERROR; + return DMA_MAPPING_ERROR; } } @@ -274,11 +270,10 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl, dma_addr_t ret; entry = iommu_range_alloc(dev, tbl, npages); - - if (unlikely(entry == CALGARY_MAPPING_ERROR)) { + if (unlikely(entry == DMA_MAPPING_ERROR)) { pr_warn("failed to allocate %u pages in iommu %p\n", npages, tbl); - return CALGARY_MAPPING_ERROR; + return DMA_MAPPING_ERROR; } /* set the return dma address */ @@ -294,12 +289,10 @@ static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr, unsigned int npages) { unsigned long entry; - unsigned long badend; unsigned long flags; /* were we called with bad_dma_address? */ - badend = CALGARY_MAPPING_ERROR + (EMERGENCY_PAGES * PAGE_SIZE); - if (unlikely(dma_addr < badend)) { + if (unlikely(dma_addr == DMA_MAPPING_ERROR)) { WARN(1, KERN_ERR "Calgary: driver tried unmapping bad DMA " "address 0x%Lx\n", dma_addr); return; @@ -383,7 +376,7 @@ static int calgary_map_sg(struct device *dev, struct scatterlist *sg, npages = iommu_num_pages(vaddr, s->length, PAGE_SIZE); entry = iommu_range_alloc(dev, tbl, npages); - if (entry == CALGARY_MAPPING_ERROR) { + if (entry == DMA_MAPPING_ERROR) { /* makes sure unmap knows to stop */ s->dma_length = 0; goto error; @@ -401,7 +394,7 @@ static int calgary_map_sg(struct device *dev, struct scatterlist *sg, error: calgary_unmap_sg(dev, sg, nelems, dir, 0); for_each_sg(sg, s, nelems, i) { - sg->dma_address = CALGARY_MAPPING_ERROR; + sg->dma_address = DMA_MAPPING_ERROR; sg->dma_length = 0; } return 0; @@ -454,7 +447,7 @@ static void* calgary_alloc_coherent(struct device *dev, size_t size, /* set up tces to cover the allocated range */ mapping = iommu_alloc(dev, tbl, ret, npages, DMA_BIDIRECTIONAL); - if (mapping == CALGARY_MAPPING_ERROR) + if (mapping == DMA_MAPPING_ERROR) goto free; *dma_handle = mapping; return ret; @@ -479,11 +472,6 @@ static void calgary_free_coherent(struct device *dev, size_t size, free_pages((unsigned long)vaddr, get_order(size)); } -static int calgary_mapping_error(struct device *dev, dma_addr_t dma_addr) -{ - return dma_addr == CALGARY_MAPPING_ERROR; -} - static const struct dma_map_ops calgary_dma_ops = { .alloc = calgary_alloc_coherent, .free = calgary_free_coherent, @@ -491,7 +479,6 @@ static const struct dma_map_ops calgary_dma_ops = { .unmap_sg = calgary_unmap_sg, .map_page = calgary_map_page, .unmap_page = calgary_unmap_page, - .mapping_error = calgary_mapping_error, .dma_supported = dma_direct_supported, }; @@ -739,9 +726,6 @@ static void __init calgary_reserve_regions(struct pci_dev *dev) u64 start; struct iommu_table *tbl = pci_iommu(dev->bus); - /* reserve EMERGENCY_PAGES from bad_dma_address and up */ - iommu_range_reserve(tbl, CALGARY_MAPPING_ERROR, EMERGENCY_PAGES); - /* avoid the BIOS/VGA first 640KB-1MB region */ /* for CalIOC2 - avoid the entire first MB */ if (is_calgary(dev->device)) { From b3aa14f022543ed86823c97c145495b747102fa9 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 21 Nov 2018 19:28:34 +0100 Subject: [PATCH 28/73] iommu: remove the mapping_error dma_map_ops method Return DMA_MAPPING_ERROR instead of 0 on a dma mapping failure and let the core dma-mapping code handle the rest. Note that the existing code used AMD_IOMMU_MAPPING_ERROR to check from a 0 return from the IOVA allocator, which is replaced with an explicit 0 as in the implementation and other users of that interface. Signed-off-by: Christoph Hellwig Acked-by: Linus Torvalds --- drivers/iommu/amd_iommu.c | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 1167ff0416cf..c5d6c7c42b0a 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -55,8 +55,6 @@ #include "amd_iommu_types.h" #include "irq_remapping.h" -#define AMD_IOMMU_MAPPING_ERROR 0 - #define CMD_SET_TYPE(cmd, t) ((cmd)->data[1] |= ((t) << 28)) #define LOOP_TIMEOUT 100000 @@ -2339,7 +2337,7 @@ static dma_addr_t __map_single(struct device *dev, paddr &= PAGE_MASK; address = dma_ops_alloc_iova(dev, dma_dom, pages, dma_mask); - if (address == AMD_IOMMU_MAPPING_ERROR) + if (!address) goto out; prot = dir2prot(direction); @@ -2376,7 +2374,7 @@ out_unmap: dma_ops_free_iova(dma_dom, address, pages); - return AMD_IOMMU_MAPPING_ERROR; + return DMA_MAPPING_ERROR; } /* @@ -2427,7 +2425,7 @@ static dma_addr_t map_page(struct device *dev, struct page *page, if (PTR_ERR(domain) == -EINVAL) return (dma_addr_t)paddr; else if (IS_ERR(domain)) - return AMD_IOMMU_MAPPING_ERROR; + return DMA_MAPPING_ERROR; dma_mask = *dev->dma_mask; dma_dom = to_dma_ops_domain(domain); @@ -2504,7 +2502,7 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, npages = sg_num_pages(dev, sglist, nelems); address = dma_ops_alloc_iova(dev, dma_dom, npages, dma_mask); - if (address == AMD_IOMMU_MAPPING_ERROR) + if (address == DMA_MAPPING_ERROR) goto out_err; prot = dir2prot(direction); @@ -2627,7 +2625,7 @@ static void *alloc_coherent(struct device *dev, size_t size, *dma_addr = __map_single(dev, dma_dom, page_to_phys(page), size, DMA_BIDIRECTIONAL, dma_mask); - if (*dma_addr == AMD_IOMMU_MAPPING_ERROR) + if (*dma_addr == DMA_MAPPING_ERROR) goto out_free; return page_address(page); @@ -2678,11 +2676,6 @@ static int amd_iommu_dma_supported(struct device *dev, u64 mask) return check_device(dev); } -static int amd_iommu_mapping_error(struct device *dev, dma_addr_t dma_addr) -{ - return dma_addr == AMD_IOMMU_MAPPING_ERROR; -} - static const struct dma_map_ops amd_iommu_dma_ops = { .alloc = alloc_coherent, .free = free_coherent, @@ -2691,7 +2684,6 @@ static const struct dma_map_ops amd_iommu_dma_ops = { .map_sg = map_sg, .unmap_sg = unmap_sg, .dma_supported = amd_iommu_dma_supported, - .mapping_error = amd_iommu_mapping_error, }; static int init_reserved_iova_ranges(void) From 964f2311a6862f1fbcc044d0828ad90030928b7f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 21 Nov 2018 19:32:03 +0100 Subject: [PATCH 29/73] iommu/intel: small map_page cleanup Pass the page + offset to the low-level __iommu_map_single helper (which gets renamed to fit the new calling conventions) as both callers have the page at hand. Signed-off-by: Christoph Hellwig Acked-by: Linus Torvalds --- drivers/iommu/intel-iommu.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 41a4b8808802..66b4444398ae 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -3597,9 +3597,11 @@ static int iommu_no_mapping(struct device *dev) return 0; } -static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr, - size_t size, int dir, u64 dma_mask) +static dma_addr_t __intel_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, int dir, + u64 dma_mask) { + phys_addr_t paddr = page_to_phys(page) + offset; struct dmar_domain *domain; phys_addr_t start_paddr; unsigned long iova_pfn; @@ -3661,8 +3663,7 @@ static dma_addr_t intel_map_page(struct device *dev, struct page *page, enum dma_data_direction dir, unsigned long attrs) { - return __intel_map_single(dev, page_to_phys(page) + offset, size, - dir, *dev->dma_mask); + return __intel_map_page(dev, page, offset, size, dir, *dev->dma_mask); } static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t size) @@ -3753,9 +3754,8 @@ static void *intel_alloc_coherent(struct device *dev, size_t size, return NULL; memset(page_address(page), 0, size); - *dma_handle = __intel_map_single(dev, page_to_phys(page), size, - DMA_BIDIRECTIONAL, - dev->coherent_dma_mask); + *dma_handle = __intel_map_page(dev, page, 0, size, DMA_BIDIRECTIONAL, + dev->coherent_dma_mask); if (*dma_handle) return page_address(page); if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT)) From 524a669bdd5ff45cb927f0c524ed4e7a4fb3650b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 21 Nov 2018 19:34:10 +0100 Subject: [PATCH 30/73] iommu/vt-d: remove the mapping_error dma_map_ops method Return DMA_MAPPING_ERROR instead of 0 on a dma mapping failure and let the core dma-mapping code handle the rest. Signed-off-by: Christoph Hellwig Acked-by: Linus Torvalds --- drivers/iommu/intel-iommu.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 66b4444398ae..0ad67d65bbce 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -3617,7 +3617,7 @@ static dma_addr_t __intel_map_page(struct device *dev, struct page *page, domain = get_valid_domain_for_dev(dev); if (!domain) - return 0; + return DMA_MAPPING_ERROR; iommu = domain_get_iommu(domain); size = aligned_nrpages(paddr, size); @@ -3655,7 +3655,7 @@ error: free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(size)); pr_err("Device %s request: %zx@%llx dir %d --- failed\n", dev_name(dev), size, (unsigned long long)paddr, dir); - return 0; + return DMA_MAPPING_ERROR; } static dma_addr_t intel_map_page(struct device *dev, struct page *page, @@ -3756,7 +3756,7 @@ static void *intel_alloc_coherent(struct device *dev, size_t size, *dma_handle = __intel_map_page(dev, page, 0, size, DMA_BIDIRECTIONAL, dev->coherent_dma_mask); - if (*dma_handle) + if (*dma_handle != DMA_MAPPING_ERROR) return page_address(page); if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT)) __free_pages(page, order); @@ -3865,11 +3865,6 @@ static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nele return nelems; } -static int intel_mapping_error(struct device *dev, dma_addr_t dma_addr) -{ - return !dma_addr; -} - static const struct dma_map_ops intel_dma_ops = { .alloc = intel_alloc_coherent, .free = intel_free_coherent, @@ -3877,7 +3872,6 @@ static const struct dma_map_ops intel_dma_ops = { .unmap_sg = intel_unmap_sg, .map_page = intel_map_page, .unmap_page = intel_unmap_page, - .mapping_error = intel_mapping_error, .dma_supported = dma_direct_supported, }; From cad34be747b8a92146e71c8267f2c1d6794e34c0 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 21 Nov 2018 19:35:19 +0100 Subject: [PATCH 31/73] iommu/dma-iommu: remove the mapping_error dma_map_ops method Return DMA_MAPPING_ERROR instead of 0 on a dma mapping failure and let the core dma-mapping code handle the rest. Signed-off-by: Christoph Hellwig Acked-by: Linus Torvalds --- arch/arm64/mm/dma-mapping.c | 7 +++---- drivers/iommu/dma-iommu.c | 23 ++++++++--------------- include/linux/dma-iommu.h | 1 - 3 files changed, 11 insertions(+), 20 deletions(-) diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 3c2c088a3562..4c0f498069e8 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -233,7 +233,7 @@ static void *__iommu_alloc_attrs(struct device *dev, size_t size, return NULL; *handle = iommu_dma_map_page(dev, page, 0, iosize, ioprot); - if (iommu_dma_mapping_error(dev, *handle)) { + if (*handle == DMA_MAPPING_ERROR) { if (coherent) __free_pages(page, get_order(size)); else @@ -250,7 +250,7 @@ static void *__iommu_alloc_attrs(struct device *dev, size_t size, return NULL; *handle = iommu_dma_map_page(dev, page, 0, iosize, ioprot); - if (iommu_dma_mapping_error(dev, *handle)) { + if (*handle == DMA_MAPPING_ERROR) { dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT); return NULL; @@ -410,7 +410,7 @@ static dma_addr_t __iommu_map_page(struct device *dev, struct page *page, dma_addr_t dev_addr = iommu_dma_map_page(dev, page, offset, size, prot); if (!coherent && !(attrs & DMA_ATTR_SKIP_CPU_SYNC) && - !iommu_dma_mapping_error(dev, dev_addr)) + dev_addr != DMA_MAPPING_ERROR) __dma_map_area(page_address(page) + offset, size, dir); return dev_addr; @@ -493,7 +493,6 @@ static const struct dma_map_ops iommu_dma_ops = { .sync_sg_for_device = __iommu_sync_sg_for_device, .map_resource = iommu_dma_map_resource, .unmap_resource = iommu_dma_unmap_resource, - .mapping_error = iommu_dma_mapping_error, }; static int __init __iommu_dma_init(void) diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index d1b04753b204..60c7e9e9901e 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -32,8 +32,6 @@ #include #include -#define IOMMU_MAPPING_ERROR 0 - struct iommu_dma_msi_page { struct list_head list; dma_addr_t iova; @@ -523,7 +521,7 @@ void iommu_dma_free(struct device *dev, struct page **pages, size_t size, { __iommu_dma_unmap(iommu_get_dma_domain(dev), *handle, size); __iommu_dma_free_pages(pages, PAGE_ALIGN(size) >> PAGE_SHIFT); - *handle = IOMMU_MAPPING_ERROR; + *handle = DMA_MAPPING_ERROR; } /** @@ -556,7 +554,7 @@ struct page **iommu_dma_alloc(struct device *dev, size_t size, gfp_t gfp, dma_addr_t iova; unsigned int count, min_size, alloc_sizes = domain->pgsize_bitmap; - *handle = IOMMU_MAPPING_ERROR; + *handle = DMA_MAPPING_ERROR; min_size = alloc_sizes & -alloc_sizes; if (min_size < PAGE_SIZE) { @@ -649,11 +647,11 @@ static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys, iova = iommu_dma_alloc_iova(domain, size, dma_get_mask(dev), dev); if (!iova) - return IOMMU_MAPPING_ERROR; + return DMA_MAPPING_ERROR; if (iommu_map(domain, iova, phys - iova_off, size, prot)) { iommu_dma_free_iova(cookie, iova, size); - return IOMMU_MAPPING_ERROR; + return DMA_MAPPING_ERROR; } return iova + iova_off; } @@ -694,7 +692,7 @@ static int __finalise_sg(struct device *dev, struct scatterlist *sg, int nents, s->offset += s_iova_off; s->length = s_length; - sg_dma_address(s) = IOMMU_MAPPING_ERROR; + sg_dma_address(s) = DMA_MAPPING_ERROR; sg_dma_len(s) = 0; /* @@ -737,11 +735,11 @@ static void __invalidate_sg(struct scatterlist *sg, int nents) int i; for_each_sg(sg, s, nents, i) { - if (sg_dma_address(s) != IOMMU_MAPPING_ERROR) + if (sg_dma_address(s) != DMA_MAPPING_ERROR) s->offset += sg_dma_address(s); if (sg_dma_len(s)) s->length = sg_dma_len(s); - sg_dma_address(s) = IOMMU_MAPPING_ERROR; + sg_dma_address(s) = DMA_MAPPING_ERROR; sg_dma_len(s) = 0; } } @@ -858,11 +856,6 @@ void iommu_dma_unmap_resource(struct device *dev, dma_addr_t handle, __iommu_dma_unmap(iommu_get_dma_domain(dev), handle, size); } -int iommu_dma_mapping_error(struct device *dev, dma_addr_t dma_addr) -{ - return dma_addr == IOMMU_MAPPING_ERROR; -} - static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev, phys_addr_t msi_addr, struct iommu_domain *domain) { @@ -882,7 +875,7 @@ static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev, return NULL; iova = __iommu_dma_map(dev, msi_addr, size, prot, domain); - if (iommu_dma_mapping_error(dev, iova)) + if (iova == DMA_MAPPING_ERROR) goto out_free_page; INIT_LIST_HEAD(&msi_page->list); diff --git a/include/linux/dma-iommu.h b/include/linux/dma-iommu.h index e8ca5e654277..e760dc5d1fa8 100644 --- a/include/linux/dma-iommu.h +++ b/include/linux/dma-iommu.h @@ -69,7 +69,6 @@ dma_addr_t iommu_dma_map_resource(struct device *dev, phys_addr_t phys, size_t size, enum dma_data_direction dir, unsigned long attrs); void iommu_dma_unmap_resource(struct device *dev, dma_addr_t handle, size_t size, enum dma_data_direction dir, unsigned long attrs); -int iommu_dma_mapping_error(struct device *dev, dma_addr_t dma_addr); /* The DMA API isn't _quite_ the whole story, though... */ void iommu_dma_map_msi_msg(int irq, struct msi_msg *msg); From a4abe0ad10654b122caed873d8a7a58989d7cf9b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 21 Nov 2018 19:38:19 +0100 Subject: [PATCH 32/73] xen-swiotlb: remove the mapping_error dma_map_ops method Return DMA_MAPPING_ERROR instead of 0 on a dma mapping failure and let the core dma-mapping code handle the rest. Signed-off-by: Christoph Hellwig Acked-by: Linus Torvalds --- drivers/xen/swiotlb-xen.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index 2a7f545bd0b5..6dc969d5ea2f 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -53,8 +53,6 @@ * API. */ -#define XEN_SWIOTLB_ERROR_CODE (~(dma_addr_t)0x0) - static char *xen_io_tlb_start, *xen_io_tlb_end; static unsigned long xen_io_tlb_nslabs; /* @@ -406,7 +404,7 @@ static dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page, map = swiotlb_tbl_map_single(dev, start_dma_addr, phys, size, dir, attrs); if (map == SWIOTLB_MAP_ERROR) - return XEN_SWIOTLB_ERROR_CODE; + return DMA_MAPPING_ERROR; dev_addr = xen_phys_to_bus(map); xen_dma_map_page(dev, pfn_to_page(map >> PAGE_SHIFT), @@ -421,7 +419,7 @@ static dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page, attrs |= DMA_ATTR_SKIP_CPU_SYNC; swiotlb_tbl_unmap_single(dev, map, size, dir, attrs); - return XEN_SWIOTLB_ERROR_CODE; + return DMA_MAPPING_ERROR; } /* @@ -700,11 +698,6 @@ xen_swiotlb_get_sgtable(struct device *dev, struct sg_table *sgt, return dma_common_get_sgtable(dev, sgt, cpu_addr, handle, size, attrs); } -static int xen_swiotlb_mapping_error(struct device *dev, dma_addr_t dma_addr) -{ - return dma_addr == XEN_SWIOTLB_ERROR_CODE; -} - const struct dma_map_ops xen_swiotlb_dma_ops = { .alloc = xen_swiotlb_alloc_coherent, .free = xen_swiotlb_free_coherent, @@ -719,5 +712,4 @@ const struct dma_map_ops xen_swiotlb_dma_ops = { .dma_supported = xen_swiotlb_dma_supported, .mmap = xen_swiotlb_dma_mmap, .get_sgtable = xen_swiotlb_get_sgtable, - .mapping_error = xen_swiotlb_mapping_error, }; From 68c9ac1d1fd51233cfac15484c6153b90aaa4ca4 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 4 Dec 2018 14:33:24 -0800 Subject: [PATCH 33/73] dma-mapping: remove the mapping_error dma_map_ops method No users left except for vmd which just forwards it. Signed-off-by: Christoph Hellwig Acked-by: Linus Torvalds --- drivers/pci/controller/vmd.c | 6 ------ include/linux/dma-mapping.h | 5 ----- 2 files changed, 11 deletions(-) diff --git a/drivers/pci/controller/vmd.c b/drivers/pci/controller/vmd.c index e50b0b5815ff..98ce79eac128 100644 --- a/drivers/pci/controller/vmd.c +++ b/drivers/pci/controller/vmd.c @@ -394,11 +394,6 @@ static void vmd_sync_sg_for_device(struct device *dev, struct scatterlist *sg, vmd_dma_ops(dev)->sync_sg_for_device(to_vmd_dev(dev), sg, nents, dir); } -static int vmd_mapping_error(struct device *dev, dma_addr_t addr) -{ - return vmd_dma_ops(dev)->mapping_error(to_vmd_dev(dev), addr); -} - static int vmd_dma_supported(struct device *dev, u64 mask) { return vmd_dma_ops(dev)->dma_supported(to_vmd_dev(dev), mask); @@ -446,7 +441,6 @@ static void vmd_setup_dma_ops(struct vmd_dev *vmd) ASSIGN_VMD_DMA_OPS(source, dest, sync_single_for_device); ASSIGN_VMD_DMA_OPS(source, dest, sync_sg_for_cpu); ASSIGN_VMD_DMA_OPS(source, dest, sync_sg_for_device); - ASSIGN_VMD_DMA_OPS(source, dest, mapping_error); ASSIGN_VMD_DMA_OPS(source, dest, dma_supported); ASSIGN_VMD_DMA_OPS(source, dest, get_required_mask); add_dma_domain(domain); diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index f89d277cc8ed..f4ac26d5294a 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -128,7 +128,6 @@ struct dma_map_ops { enum dma_data_direction dir); void (*cache_sync)(struct device *dev, void *vaddr, size_t size, enum dma_data_direction direction); - int (*mapping_error)(struct device *dev, dma_addr_t dma_addr); int (*dma_supported)(struct device *dev, u64 mask); u64 (*get_required_mask)(struct device *dev); }; @@ -580,12 +579,8 @@ static inline void dma_free_coherent(struct device *dev, size_t size, static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) { - const struct dma_map_ops *ops = get_dma_ops(dev); - debug_dma_mapping_error(dev, dma_addr); - if (ops->mapping_error) - return ops->mapping_error(dev, dma_addr); if (dma_addr == DMA_MAPPING_ERROR) return 1; return 0; From b14b9d25a3c707c85e7e31e15766a71365b52ab7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 30 Nov 2018 10:59:37 +0100 Subject: [PATCH 34/73] dma-mapping: return an error code from dma_mapping_error Currently dma_mapping_error returns a boolean as int, with 1 meaning error. This is rather unusual and many callers have to convert it to errno value. The callers are highly inconsistent with error codes ranging from -ENOMEM over -EIO, -EINVAL and -EFAULT ranging to -EAGAIN. Return -ENOMEM which seems to be what the largest number of callers convert it to, and which also matches the typical error case where we are out of resources. Signed-off-by: Christoph Hellwig Acked-by: Russell King Acked-by: Linus Torvalds --- include/linux/dma-mapping.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index f4ac26d5294a..7799c2b27849 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -582,7 +582,7 @@ static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) debug_dma_mapping_error(dev, dma_addr); if (dma_addr == DMA_MAPPING_ERROR) - return 1; + return -ENOMEM; return 0; } From 7c703e54cc71df5baa962e24a5663d88173bba5c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 9 Nov 2018 09:51:00 +0100 Subject: [PATCH 35/73] arch: switch the default on ARCH_HAS_SG_CHAIN These days architectures are mostly out of the business of dealing with struct scatterlist at all, unless they have architecture specific iommu drivers. Replace the ARCH_HAS_SG_CHAIN symbol with a ARCH_NO_SG_CHAIN one only enabled for architectures with horrible legacy iommu drivers like alpha and parisc, and conditionally for arm which wants to keep it disable for legacy platforms. Signed-off-by: Christoph Hellwig Reviewed-by: Palmer Dabbelt --- .../features/io/sg-chain/arch-support.txt | 33 ------------------- arch/alpha/Kconfig | 1 + arch/arc/Kconfig | 1 - arch/arm/Kconfig | 2 +- arch/arm64/Kconfig | 1 - arch/ia64/Kconfig | 1 - arch/parisc/Kconfig | 1 + arch/powerpc/Kconfig | 1 - arch/s390/Kconfig | 1 - arch/sparc/Kconfig | 1 - arch/x86/Kconfig | 1 - arch/xtensa/Kconfig | 1 - include/linux/scatterlist.h | 6 ++-- lib/Kconfig | 2 +- lib/scatterlist.c | 2 +- 15 files changed, 8 insertions(+), 47 deletions(-) delete mode 100644 Documentation/features/io/sg-chain/arch-support.txt diff --git a/Documentation/features/io/sg-chain/arch-support.txt b/Documentation/features/io/sg-chain/arch-support.txt deleted file mode 100644 index 6554f0372c3f..000000000000 --- a/Documentation/features/io/sg-chain/arch-support.txt +++ /dev/null @@ -1,33 +0,0 @@ -# -# Feature name: sg-chain -# Kconfig: ARCH_HAS_SG_CHAIN -# description: arch supports chained scatter-gather lists -# - ----------------------- - | arch |status| - ----------------------- - | alpha: | TODO | - | arc: | ok | - | arm: | ok | - | arm64: | ok | - | c6x: | TODO | - | h8300: | TODO | - | hexagon: | TODO | - | ia64: | ok | - | m68k: | TODO | - | microblaze: | TODO | - | mips: | TODO | - | nds32: | TODO | - | nios2: | TODO | - | openrisc: | TODO | - | parisc: | TODO | - | powerpc: | ok | - | riscv: | TODO | - | s390: | ok | - | sh: | TODO | - | sparc: | ok | - | um: | TODO | - | unicore32: | TODO | - | x86: | ok | - | xtensa: | TODO | - ----------------------- diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig index 5b4f88363453..a7e748a46c18 100644 --- a/arch/alpha/Kconfig +++ b/arch/alpha/Kconfig @@ -5,6 +5,7 @@ config ALPHA select ARCH_MIGHT_HAVE_PC_PARPORT select ARCH_MIGHT_HAVE_PC_SERIO select ARCH_NO_PREEMPT + select ARCH_NO_SG_CHAIN select ARCH_USE_CMPXCHG_LOCKREF select HAVE_AOUT select HAVE_IDE diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index c9e2a1323536..fd48d698da29 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -13,7 +13,6 @@ config ARC select ARCH_HAS_PTE_SPECIAL select ARCH_HAS_SYNC_DMA_FOR_CPU select ARCH_HAS_SYNC_DMA_FOR_DEVICE - select ARCH_HAS_SG_CHAIN select ARCH_SUPPORTS_ATOMIC_RMW if ARC_HAS_LLSC select BUILDTIME_EXTABLE_SORT select CLONE_BACKWARDS diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 3b2852df6eb3..a858ee791ef0 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -19,6 +19,7 @@ config ARM select ARCH_HAVE_CUSTOM_GPIO_H select ARCH_HAS_GCOV_PROFILE_ALL select ARCH_MIGHT_HAVE_PC_PARPORT + select ARCH_NO_SG_CHAIN if !ARM_HAS_SG_CHAIN select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX select ARCH_OPTIONAL_KERNEL_RWX_DEFAULT if CPU_V7 select ARCH_SUPPORTS_ATOMIC_RMW @@ -119,7 +120,6 @@ config ARM . config ARM_HAS_SG_CHAIN - select ARCH_HAS_SG_CHAIN bool config ARM_DMA_USE_IOMMU diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 2e645ea693ea..06cf0ef24367 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -23,7 +23,6 @@ config ARM64 select ARCH_HAS_MEMBARRIER_SYNC_CORE select ARCH_HAS_PTE_SPECIAL select ARCH_HAS_SET_MEMORY - select ARCH_HAS_SG_CHAIN select ARCH_HAS_STRICT_KERNEL_RWX select ARCH_HAS_STRICT_MODULE_RWX select ARCH_HAS_SYNC_DMA_FOR_DEVICE diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index 36773def6920..d6f203658994 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -29,7 +29,6 @@ config IA64 select HAVE_MEMBLOCK_NODE_MAP select HAVE_VIRT_CPU_ACCOUNTING select ARCH_HAS_DMA_MARK_CLEAN - select ARCH_HAS_SG_CHAIN select VIRT_TO_BUS select ARCH_DISCARD_MEMBLOCK select GENERIC_IRQ_PROBE diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index 92a339ee28b3..428ee50fc3db 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -11,6 +11,7 @@ config PARISC select ARCH_HAS_ELF_RANDOMIZE select ARCH_HAS_STRICT_KERNEL_RWX select ARCH_HAS_UBSAN_SANITIZE_ALL + select ARCH_NO_SG_CHAIN select ARCH_SUPPORTS_MEMORY_FAILURE select RTC_CLASS select RTC_DRV_GENERIC diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 8be31261aec8..4bc8edd83cee 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -138,7 +138,6 @@ config PPC select ARCH_HAS_PTE_SPECIAL select ARCH_HAS_MEMBARRIER_CALLBACKS select ARCH_HAS_SCALED_CPUTIME if VIRT_CPU_ACCOUNTING_NATIVE && PPC64 - select ARCH_HAS_SG_CHAIN select ARCH_HAS_STRICT_KERNEL_RWX if ((PPC_BOOK3S_64 || PPC32) && !RELOCATABLE && !HIBERNATION) select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_HAS_UACCESS_FLUSHCACHE if PPC64 diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 5173366af8f3..5624e8607054 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -73,7 +73,6 @@ config S390 select ARCH_HAS_KCOV select ARCH_HAS_PTE_SPECIAL select ARCH_HAS_SET_MEMORY - select ARCH_HAS_SG_CHAIN select ARCH_HAS_STRICT_KERNEL_RWX select ARCH_HAS_STRICT_MODULE_RWX select ARCH_HAS_UBSAN_SANITIZE_ALL diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index 490b2c95c212..8853b6ceae17 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -40,7 +40,6 @@ config SPARC select MODULES_USE_ELF_RELA select ODD_RT_SIGACTION select OLD_SIGSUSPEND - select ARCH_HAS_SG_CHAIN select CPU_NO_EFFICIENT_FFS select LOCKDEP_SMALL if LOCKDEP select NEED_DMA_MAP_STATE diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 9d734f3c8234..adc845b66f01 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -66,7 +66,6 @@ config X86 select ARCH_HAS_UACCESS_FLUSHCACHE if X86_64 select ARCH_HAS_UACCESS_MCSAFE if X86_64 && X86_MCE select ARCH_HAS_SET_MEMORY - select ARCH_HAS_SG_CHAIN select ARCH_HAS_STRICT_KERNEL_RWX select ARCH_HAS_STRICT_MODULE_RWX select ARCH_HAS_SYNC_CORE_BEFORE_USERMODE diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig index 239bfb16c58b..75488b606edc 100644 --- a/arch/xtensa/Kconfig +++ b/arch/xtensa/Kconfig @@ -1,7 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 config XTENSA def_bool y - select ARCH_HAS_SG_CHAIN select ARCH_HAS_SYNC_DMA_FOR_CPU select ARCH_HAS_SYNC_DMA_FOR_DEVICE select ARCH_NO_COHERENT_DMA_MMAP if !MMU diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h index 093aa57120b0..b96f0d0b5b8f 100644 --- a/include/linux/scatterlist.h +++ b/include/linux/scatterlist.h @@ -324,10 +324,10 @@ size_t sg_zero_buffer(struct scatterlist *sgl, unsigned int nents, * Like SG_CHUNK_SIZE, but for archs that have sg chaining. This limit * is totally arbitrary, a setting of 2048 will get you at least 8mb ios. */ -#ifdef CONFIG_ARCH_HAS_SG_CHAIN -#define SG_MAX_SEGMENTS 2048 -#else +#ifdef CONFIG_ARCH_NO_SG_CHAIN #define SG_MAX_SEGMENTS SG_CHUNK_SIZE +#else +#define SG_MAX_SEGMENTS 2048 #endif #ifdef CONFIG_SG_POOL diff --git a/lib/Kconfig b/lib/Kconfig index a9965f4af4dd..d5a5e2ebf286 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -577,7 +577,7 @@ config SG_POOL # sg chaining option # -config ARCH_HAS_SG_CHAIN +config ARCH_NO_SG_CHAIN def_bool n config ARCH_HAS_PMEM_API diff --git a/lib/scatterlist.c b/lib/scatterlist.c index 7c6096a71704..9ba349e775ef 100644 --- a/lib/scatterlist.c +++ b/lib/scatterlist.c @@ -271,7 +271,7 @@ int __sg_alloc_table(struct sg_table *table, unsigned int nents, if (nents == 0) return -EINVAL; -#ifndef CONFIG_ARCH_HAS_SG_CHAIN +#ifdef CONFIG_ARCH_NO_SG_CHAIN if (WARN_ON_ONCE(nents > max_ents)) return -EINVAL; #endif From 9f191555ba4ba8fc82e589670e46a7f79b72a157 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 10 Dec 2018 14:00:28 +0000 Subject: [PATCH 36/73] dma-debug: Expose nr_total_entries in debugfs Expose nr_total_entries in debugfs, so that {num,min}_free_entries become even more meaningful to users interested in current/maximum utilisation. This becomes even more relevant once nr_total_entries may change at runtime beyond just the existing AMD GART debug code. Suggested-by: John Garry Signed-off-by: Robin Murphy Tested-by: Qian Cai Signed-off-by: Christoph Hellwig --- Documentation/DMA-API.txt | 3 +++ kernel/dma/debug.c | 7 +++++++ 2 files changed, 10 insertions(+) diff --git a/Documentation/DMA-API.txt b/Documentation/DMA-API.txt index ac66ae2509a9..6bdb095393b0 100644 --- a/Documentation/DMA-API.txt +++ b/Documentation/DMA-API.txt @@ -723,6 +723,9 @@ dma-api/min_free_entries This read-only file can be read to get the dma-api/num_free_entries The current number of free dma_debug_entries in the allocator. +dma-api/nr_total_entries The total number of dma_debug_entries in the + allocator, both free and used. + dma-api/driver-filter You can write a name of a driver into this file to limit the debug output to requests from that particular driver. Write an empty string to diff --git a/kernel/dma/debug.c b/kernel/dma/debug.c index 231ca4628062..f6a141eb9438 100644 --- a/kernel/dma/debug.c +++ b/kernel/dma/debug.c @@ -142,6 +142,7 @@ static struct dentry *show_all_errors_dent __read_mostly; static struct dentry *show_num_errors_dent __read_mostly; static struct dentry *num_free_entries_dent __read_mostly; static struct dentry *min_free_entries_dent __read_mostly; +static struct dentry *nr_total_entries_dent __read_mostly; static struct dentry *filter_dent __read_mostly; /* per-driver filter related state */ @@ -926,6 +927,12 @@ static int dma_debug_fs_init(void) if (!min_free_entries_dent) goto out_err; + nr_total_entries_dent = debugfs_create_u32("nr_total_entries", 0444, + dma_debug_dent, + &nr_total_entries); + if (!nr_total_entries_dent) + goto out_err; + filter_dent = debugfs_create_file("driver_filter", 0644, dma_debug_dent, NULL, &filter_fops); if (!filter_dent) From f737b095c60c635db260e02fdb9f0efb9f3360c4 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 10 Dec 2018 14:00:27 +0000 Subject: [PATCH 37/73] dma-debug: Use pr_fmt() Use pr_fmt() to generate the "DMA-API: " prefix consistently. This results in it being added to a couple of pr_*() messages which were missing it before, and for the err_printk() calls moves it to the actual start of the message instead of somewhere in the middle. Signed-off-by: Robin Murphy Tested-by: Qian Cai Signed-off-by: Christoph Hellwig --- kernel/dma/debug.c | 74 ++++++++++++++++++++++++---------------------- 1 file changed, 38 insertions(+), 36 deletions(-) diff --git a/kernel/dma/debug.c b/kernel/dma/debug.c index f6a141eb9438..29486eb9d1dc 100644 --- a/kernel/dma/debug.c +++ b/kernel/dma/debug.c @@ -17,6 +17,8 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +#define pr_fmt(fmt) "DMA-API: " fmt + #include #include #include @@ -235,7 +237,7 @@ static bool driver_filter(struct device *dev) error_count += 1; \ if (driver_filter(dev) && \ (show_all_errors || show_num_errors > 0)) { \ - WARN(1, "%s %s: " format, \ + WARN(1, pr_fmt("%s %s: ") format, \ dev ? dev_driver_string(dev) : "NULL", \ dev ? dev_name(dev) : "NULL", ## arg); \ dump_entry_trace(entry); \ @@ -520,7 +522,7 @@ static void active_cacheline_inc_overlap(phys_addr_t cln) * prematurely. */ WARN_ONCE(overlap > ACTIVE_CACHELINE_MAX_OVERLAP, - "DMA-API: exceeded %d overlapping mappings of cacheline %pa\n", + pr_fmt("exceeded %d overlapping mappings of cacheline %pa\n"), ACTIVE_CACHELINE_MAX_OVERLAP, &cln); } @@ -615,7 +617,7 @@ void debug_dma_assert_idle(struct page *page) cln = to_cacheline_number(entry); err_printk(entry->dev, entry, - "DMA-API: cpu touching an active dma mapped cacheline [cln=%pa]\n", + "cpu touching an active dma mapped cacheline [cln=%pa]\n", &cln); } @@ -635,7 +637,7 @@ static void add_dma_entry(struct dma_debug_entry *entry) rc = active_cacheline_insert(entry); if (rc == -ENOMEM) { - pr_err("DMA-API: cacheline tracking ENOMEM, dma-debug disabled\n"); + pr_err("cacheline tracking ENOMEM, dma-debug disabled\n"); global_disable = true; } @@ -674,7 +676,7 @@ static struct dma_debug_entry *dma_entry_alloc(void) if (list_empty(&free_entries)) { global_disable = true; spin_unlock_irqrestore(&free_entries_lock, flags); - pr_err("DMA-API: debugging out of memory - disabling\n"); + pr_err("debugging out of memory - disabling\n"); return NULL; } @@ -778,7 +780,7 @@ static int prealloc_memory(u32 num_entries) num_free_entries = num_entries; min_free_entries = num_entries; - pr_info("DMA-API: preallocated %d debug entries\n", num_entries); + pr_info("preallocated %d debug entries\n", num_entries); return 0; @@ -851,7 +853,7 @@ static ssize_t filter_write(struct file *file, const char __user *userbuf, * switched off. */ if (current_driver_name[0]) - pr_info("DMA-API: switching off dma-debug driver filter\n"); + pr_info("switching off dma-debug driver filter\n"); current_driver_name[0] = 0; current_driver = NULL; goto out_unlock; @@ -869,7 +871,7 @@ static ssize_t filter_write(struct file *file, const char __user *userbuf, current_driver_name[i] = 0; current_driver = NULL; - pr_info("DMA-API: enable driver filter for driver [%s]\n", + pr_info("enable driver filter for driver [%s]\n", current_driver_name); out_unlock: @@ -888,7 +890,7 @@ static int dma_debug_fs_init(void) { dma_debug_dent = debugfs_create_dir("dma-api", NULL); if (!dma_debug_dent) { - pr_err("DMA-API: can not create debugfs directory\n"); + pr_err("can not create debugfs directory\n"); return -ENOMEM; } @@ -980,7 +982,7 @@ static int dma_debug_device_change(struct notifier_block *nb, unsigned long acti count = device_dma_allocations(dev, &entry); if (count == 0) break; - err_printk(dev, entry, "DMA-API: device driver has pending " + err_printk(dev, entry, "device driver has pending " "DMA allocations while released from device " "[count=%d]\n" "One of leaked entries details: " @@ -1030,14 +1032,14 @@ static int dma_debug_init(void) } if (dma_debug_fs_init() != 0) { - pr_err("DMA-API: error creating debugfs entries - disabling\n"); + pr_err("error creating debugfs entries - disabling\n"); global_disable = true; return 0; } if (prealloc_memory(nr_prealloc_entries) != 0) { - pr_err("DMA-API: debugging out of memory error - disabled\n"); + pr_err("debugging out of memory error - disabled\n"); global_disable = true; return 0; @@ -1047,7 +1049,7 @@ static int dma_debug_init(void) dma_debug_initialized = true; - pr_info("DMA-API: debugging enabled by kernel config\n"); + pr_info("debugging enabled by kernel config\n"); return 0; } core_initcall(dma_debug_init); @@ -1058,7 +1060,7 @@ static __init int dma_debug_cmdline(char *str) return -EINVAL; if (strncmp(str, "off", 3) == 0) { - pr_info("DMA-API: debugging disabled on kernel command line\n"); + pr_info("debugging disabled on kernel command line\n"); global_disable = true; } @@ -1092,11 +1094,11 @@ static void check_unmap(struct dma_debug_entry *ref) if (dma_mapping_error(ref->dev, ref->dev_addr)) { err_printk(ref->dev, NULL, - "DMA-API: device driver tries to free an " + "device driver tries to free an " "invalid DMA memory address\n"); } else { err_printk(ref->dev, NULL, - "DMA-API: device driver tries to free DMA " + "device driver tries to free DMA " "memory it has not allocated [device " "address=0x%016llx] [size=%llu bytes]\n", ref->dev_addr, ref->size); @@ -1105,7 +1107,7 @@ static void check_unmap(struct dma_debug_entry *ref) } if (ref->size != entry->size) { - err_printk(ref->dev, entry, "DMA-API: device driver frees " + err_printk(ref->dev, entry, "device driver frees " "DMA memory with different size " "[device address=0x%016llx] [map size=%llu bytes] " "[unmap size=%llu bytes]\n", @@ -1113,7 +1115,7 @@ static void check_unmap(struct dma_debug_entry *ref) } if (ref->type != entry->type) { - err_printk(ref->dev, entry, "DMA-API: device driver frees " + err_printk(ref->dev, entry, "device driver frees " "DMA memory with wrong function " "[device address=0x%016llx] [size=%llu bytes] " "[mapped as %s] [unmapped as %s]\n", @@ -1121,7 +1123,7 @@ static void check_unmap(struct dma_debug_entry *ref) type2name[entry->type], type2name[ref->type]); } else if ((entry->type == dma_debug_coherent) && (phys_addr(ref) != phys_addr(entry))) { - err_printk(ref->dev, entry, "DMA-API: device driver frees " + err_printk(ref->dev, entry, "device driver frees " "DMA memory with different CPU address " "[device address=0x%016llx] [size=%llu bytes] " "[cpu alloc address=0x%016llx] " @@ -1133,7 +1135,7 @@ static void check_unmap(struct dma_debug_entry *ref) if (ref->sg_call_ents && ref->type == dma_debug_sg && ref->sg_call_ents != entry->sg_call_ents) { - err_printk(ref->dev, entry, "DMA-API: device driver frees " + err_printk(ref->dev, entry, "device driver frees " "DMA sg list with different entry count " "[map count=%d] [unmap count=%d]\n", entry->sg_call_ents, ref->sg_call_ents); @@ -1144,7 +1146,7 @@ static void check_unmap(struct dma_debug_entry *ref) * DMA API don't handle this properly, so check for it here */ if (ref->direction != entry->direction) { - err_printk(ref->dev, entry, "DMA-API: device driver frees " + err_printk(ref->dev, entry, "device driver frees " "DMA memory with different direction " "[device address=0x%016llx] [size=%llu bytes] " "[mapped with %s] [unmapped with %s]\n", @@ -1160,7 +1162,7 @@ static void check_unmap(struct dma_debug_entry *ref) */ if (entry->map_err_type == MAP_ERR_NOT_CHECKED) { err_printk(ref->dev, entry, - "DMA-API: device driver failed to check map error" + "device driver failed to check map error" "[device address=0x%016llx] [size=%llu bytes] " "[mapped as %s]", ref->dev_addr, ref->size, @@ -1185,7 +1187,7 @@ static void check_for_stack(struct device *dev, return; addr = page_address(page) + offset; if (object_is_on_stack(addr)) - err_printk(dev, NULL, "DMA-API: device driver maps memory from stack [addr=%p]\n", addr); + err_printk(dev, NULL, "device driver maps memory from stack [addr=%p]\n", addr); } else { /* Stack is vmalloced. */ int i; @@ -1195,7 +1197,7 @@ static void check_for_stack(struct device *dev, continue; addr = (u8 *)current->stack + i * PAGE_SIZE + offset; - err_printk(dev, NULL, "DMA-API: device driver maps memory from stack [probable addr=%p]\n", addr); + err_printk(dev, NULL, "device driver maps memory from stack [probable addr=%p]\n", addr); break; } } @@ -1215,7 +1217,7 @@ static void check_for_illegal_area(struct device *dev, void *addr, unsigned long { if (overlap(addr, len, _stext, _etext) || overlap(addr, len, __start_rodata, __end_rodata)) - err_printk(dev, NULL, "DMA-API: device driver maps memory from kernel text or rodata [addr=%p] [len=%lu]\n", addr, len); + err_printk(dev, NULL, "device driver maps memory from kernel text or rodata [addr=%p] [len=%lu]\n", addr, len); } static void check_sync(struct device *dev, @@ -1231,7 +1233,7 @@ static void check_sync(struct device *dev, entry = bucket_find_contain(&bucket, ref, &flags); if (!entry) { - err_printk(dev, NULL, "DMA-API: device driver tries " + err_printk(dev, NULL, "device driver tries " "to sync DMA memory it has not allocated " "[device address=0x%016llx] [size=%llu bytes]\n", (unsigned long long)ref->dev_addr, ref->size); @@ -1239,7 +1241,7 @@ static void check_sync(struct device *dev, } if (ref->size > entry->size) { - err_printk(dev, entry, "DMA-API: device driver syncs" + err_printk(dev, entry, "device driver syncs" " DMA memory outside allocated range " "[device address=0x%016llx] " "[allocation size=%llu bytes] " @@ -1252,7 +1254,7 @@ static void check_sync(struct device *dev, goto out; if (ref->direction != entry->direction) { - err_printk(dev, entry, "DMA-API: device driver syncs " + err_printk(dev, entry, "device driver syncs " "DMA memory with different direction " "[device address=0x%016llx] [size=%llu bytes] " "[mapped with %s] [synced with %s]\n", @@ -1263,7 +1265,7 @@ static void check_sync(struct device *dev, if (to_cpu && !(entry->direction == DMA_FROM_DEVICE) && !(ref->direction == DMA_TO_DEVICE)) - err_printk(dev, entry, "DMA-API: device driver syncs " + err_printk(dev, entry, "device driver syncs " "device read-only DMA memory for cpu " "[device address=0x%016llx] [size=%llu bytes] " "[mapped with %s] [synced with %s]\n", @@ -1273,7 +1275,7 @@ static void check_sync(struct device *dev, if (!to_cpu && !(entry->direction == DMA_TO_DEVICE) && !(ref->direction == DMA_FROM_DEVICE)) - err_printk(dev, entry, "DMA-API: device driver syncs " + err_printk(dev, entry, "device driver syncs " "device write-only DMA memory to device " "[device address=0x%016llx] [size=%llu bytes] " "[mapped with %s] [synced with %s]\n", @@ -1283,7 +1285,7 @@ static void check_sync(struct device *dev, if (ref->sg_call_ents && ref->type == dma_debug_sg && ref->sg_call_ents != entry->sg_call_ents) { - err_printk(ref->dev, entry, "DMA-API: device driver syncs " + err_printk(ref->dev, entry, "device driver syncs " "DMA sg list with different entry count " "[map count=%d] [sync count=%d]\n", entry->sg_call_ents, ref->sg_call_ents); @@ -1304,7 +1306,7 @@ static void check_sg_segment(struct device *dev, struct scatterlist *sg) * whoever generated the list forgot to check them. */ if (sg->length > max_seg) - err_printk(dev, NULL, "DMA-API: mapping sg segment longer than device claims to support [len=%u] [max=%u]\n", + err_printk(dev, NULL, "mapping sg segment longer than device claims to support [len=%u] [max=%u]\n", sg->length, max_seg); /* * In some cases this could potentially be the DMA API @@ -1314,7 +1316,7 @@ static void check_sg_segment(struct device *dev, struct scatterlist *sg) start = sg_dma_address(sg); end = start + sg_dma_len(sg) - 1; if ((start ^ end) & ~boundary) - err_printk(dev, NULL, "DMA-API: mapping sg segment across boundary [start=0x%016llx] [end=0x%016llx] [boundary=0x%016llx]\n", + err_printk(dev, NULL, "mapping sg segment across boundary [start=0x%016llx] [end=0x%016llx] [boundary=0x%016llx]\n", start, end, boundary); #endif } @@ -1326,11 +1328,11 @@ void debug_dma_map_single(struct device *dev, const void *addr, return; if (!virt_addr_valid(addr)) - err_printk(dev, NULL, "DMA-API: device driver maps memory from invalid area [addr=%p] [len=%lu]\n", + err_printk(dev, NULL, "device driver maps memory from invalid area [addr=%p] [len=%lu]\n", addr, len); if (is_vmalloc_addr(addr)) - err_printk(dev, NULL, "DMA-API: device driver maps memory from vmalloc area [addr=%p] [len=%lu]\n", + err_printk(dev, NULL, "device driver maps memory from vmalloc area [addr=%p] [len=%lu]\n", addr, len); } EXPORT_SYMBOL(debug_dma_map_single); @@ -1787,7 +1789,7 @@ static int __init dma_debug_driver_setup(char *str) } if (current_driver_name[0]) - pr_info("DMA-API: enable driver filter for driver [%s]\n", + pr_info("enable driver filter for driver [%s]\n", current_driver_name); From 2b9d9ac02b9d8d32c515c82bb17401c429f160ab Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 10 Dec 2018 14:00:29 +0000 Subject: [PATCH 38/73] dma-debug: Dynamically expand the dma_debug_entry pool Certain drivers such as large multi-queue network adapters can use pools of mapped DMA buffers larger than the default dma_debug_entry pool of 65536 entries, with the result that merely probing such a device can cause DMA debug to disable itself during boot unless explicitly given an appropriate "dma_debug_entries=..." option. Developers trying to debug some other driver on such a system may not be immediately aware of this, and at worst it can hide bugs if they fail to realise that dma-debug has already disabled itself unexpectedly by the time their code of interest gets to run. Even once they do realise, it can be a bit of a pain to emprirically determine a suitable number of preallocated entries to configure, short of massively over-allocating. There's really no need for such a static limit, though, since we can quite easily expand the pool at runtime in those rare cases that the preallocated entries are insufficient, which is arguably the least surprising and most useful behaviour. To that end, refactor the prealloc_memory() logic a little bit to generalise it for runtime reallocations as well. Signed-off-by: Robin Murphy Tested-by: Qian Cai Signed-off-by: Christoph Hellwig --- Documentation/DMA-API.txt | 11 +++--- kernel/dma/debug.c | 79 ++++++++++++++++++++------------------- 2 files changed, 46 insertions(+), 44 deletions(-) diff --git a/Documentation/DMA-API.txt b/Documentation/DMA-API.txt index 6bdb095393b0..0fcb7561af1e 100644 --- a/Documentation/DMA-API.txt +++ b/Documentation/DMA-API.txt @@ -717,8 +717,8 @@ dma-api/num_errors The number in this file shows how many dma-api/min_free_entries This read-only file can be read to get the minimum number of free dma_debug_entries the allocator has ever seen. If this value goes - down to zero the code will disable itself - because it is not longer reliable. + down to zero the code will attempt to increase + nr_total_entries to compensate. dma-api/num_free_entries The current number of free dma_debug_entries in the allocator. @@ -745,10 +745,9 @@ driver filter at boot time. The debug code will only print errors for that driver afterwards. This filter can be disabled or changed later using debugfs. When the code disables itself at runtime this is most likely because it ran -out of dma_debug_entries. These entries are preallocated at boot. The number -of preallocated entries is defined per architecture. If it is too low for you -boot with 'dma_debug_entries=' to overwrite the -architectural default. +out of dma_debug_entries and was unable to allocate more on-demand. 65536 +entries are preallocated at boot - if this is too low for you boot with +'dma_debug_entries=' to overwrite the default. :: diff --git a/kernel/dma/debug.c b/kernel/dma/debug.c index 29486eb9d1dc..ef7c90b7a346 100644 --- a/kernel/dma/debug.c +++ b/kernel/dma/debug.c @@ -47,6 +47,8 @@ #ifndef PREALLOC_DMA_DEBUG_ENTRIES #define PREALLOC_DMA_DEBUG_ENTRIES (1 << 16) #endif +/* If the pool runs out, add this many new entries at once */ +#define DMA_DEBUG_DYNAMIC_ENTRIES 256 enum { dma_debug_single, @@ -646,6 +648,34 @@ static void add_dma_entry(struct dma_debug_entry *entry) */ } +static int dma_debug_create_entries(u32 num_entries, gfp_t gfp) +{ + struct dma_debug_entry *entry, *next_entry; + int i; + + for (i = 0; i < num_entries; ++i) { + entry = kzalloc(sizeof(*entry), gfp); + if (!entry) + goto out_err; + + list_add_tail(&entry->list, &free_entries); + } + + num_free_entries += num_entries; + nr_total_entries += num_entries; + + return 0; + +out_err: + + list_for_each_entry_safe(entry, next_entry, &free_entries, list) { + list_del(&entry->list); + kfree(entry); + } + + return -ENOMEM; +} + static struct dma_debug_entry *__dma_entry_alloc(void) { struct dma_debug_entry *entry; @@ -672,12 +702,14 @@ static struct dma_debug_entry *dma_entry_alloc(void) unsigned long flags; spin_lock_irqsave(&free_entries_lock, flags); - - if (list_empty(&free_entries)) { - global_disable = true; - spin_unlock_irqrestore(&free_entries_lock, flags); - pr_err("debugging out of memory - disabling\n"); - return NULL; + if (num_free_entries == 0) { + if (dma_debug_create_entries(DMA_DEBUG_DYNAMIC_ENTRIES, + GFP_ATOMIC)) { + global_disable = true; + spin_unlock_irqrestore(&free_entries_lock, flags); + pr_err("debugging out of memory - disabling\n"); + return NULL; + } } entry = __dma_entry_alloc(); @@ -764,36 +796,6 @@ int dma_debug_resize_entries(u32 num_entries) * 2. Preallocate a given number of dma_debug_entry structs */ -static int prealloc_memory(u32 num_entries) -{ - struct dma_debug_entry *entry, *next_entry; - int i; - - for (i = 0; i < num_entries; ++i) { - entry = kzalloc(sizeof(*entry), GFP_KERNEL); - if (!entry) - goto out_err; - - list_add_tail(&entry->list, &free_entries); - } - - num_free_entries = num_entries; - min_free_entries = num_entries; - - pr_info("preallocated %d debug entries\n", num_entries); - - return 0; - -out_err: - - list_for_each_entry_safe(entry, next_entry, &free_entries, list) { - list_del(&entry->list); - kfree(entry); - } - - return -ENOMEM; -} - static ssize_t filter_read(struct file *file, char __user *user_buf, size_t count, loff_t *ppos) { @@ -1038,14 +1040,15 @@ static int dma_debug_init(void) return 0; } - if (prealloc_memory(nr_prealloc_entries) != 0) { + if (dma_debug_create_entries(nr_prealloc_entries, GFP_KERNEL) != 0) { pr_err("debugging out of memory error - disabled\n"); global_disable = true; return 0; } - nr_total_entries = num_free_entries; + min_free_entries = num_free_entries; + pr_info("preallocated %d debug entries\n", nr_total_entries); dma_debug_initialized = true; From ceb51173b2b5bd7af0d99079f6bbacdcfc58fe08 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 10 Dec 2018 14:00:30 +0000 Subject: [PATCH 39/73] dma-debug: Make leak-like behaviour apparent Now that we can dynamically allocate DMA debug entries to cope with drivers maintaining excessively large numbers of live mappings, a driver which *does* actually have a bug leaking mappings (and is not unloaded) will no longer trigger the "DMA-API: debugging out of memory - disabling" message until it gets to actual kernel OOM conditions, which means it could go unnoticed for a while. To that end, let's inform the user each time the pool has grown to a multiple of its initial size, which should make it apparent that they either have a leak or might want to increase the preallocation size. Signed-off-by: Robin Murphy Tested-by: Qian Cai Signed-off-by: Christoph Hellwig --- Documentation/DMA-API.txt | 6 +++++- kernel/dma/debug.c | 13 +++++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/Documentation/DMA-API.txt b/Documentation/DMA-API.txt index 0fcb7561af1e..7a7d8a415ce8 100644 --- a/Documentation/DMA-API.txt +++ b/Documentation/DMA-API.txt @@ -747,7 +747,11 @@ driver afterwards. This filter can be disabled or changed later using debugfs. When the code disables itself at runtime this is most likely because it ran out of dma_debug_entries and was unable to allocate more on-demand. 65536 entries are preallocated at boot - if this is too low for you boot with -'dma_debug_entries=' to overwrite the default. +'dma_debug_entries=' to overwrite the default. The +code will print to the kernel log each time it has dynamically allocated +as many entries as were initially preallocated. This is to indicate that a +larger preallocation size may be appropriate, or if it happens continually +that a driver may be leaking mappings. :: diff --git a/kernel/dma/debug.c b/kernel/dma/debug.c index ef7c90b7a346..912c23f4c177 100644 --- a/kernel/dma/debug.c +++ b/kernel/dma/debug.c @@ -691,6 +691,18 @@ static struct dma_debug_entry *__dma_entry_alloc(void) return entry; } +void __dma_entry_alloc_check_leak(void) +{ + u32 tmp = nr_total_entries % nr_prealloc_entries; + + /* Shout each time we tick over some multiple of the initial pool */ + if (tmp < DMA_DEBUG_DYNAMIC_ENTRIES) { + pr_info("dma_debug_entry pool grown to %u (%u00%%)\n", + nr_total_entries, + (nr_total_entries / nr_prealloc_entries)); + } +} + /* struct dma_entry allocator * * The next two functions implement the allocator for @@ -710,6 +722,7 @@ static struct dma_debug_entry *dma_entry_alloc(void) pr_err("debugging out of memory - disabling\n"); return NULL; } + __dma_entry_alloc_check_leak(); } entry = __dma_entry_alloc(); From a8a4c98fc9ac84ee9e068fbb16210d2ab8cfefe0 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 10 Dec 2018 14:00:31 +0000 Subject: [PATCH 40/73] x86/dma/amd-gart: Stop resizing dma_debug_entry pool dma-debug is now capable of adding new entries to its pool on-demand if the initial preallocation was insufficient, so the IOMMU_LEAK logic no longer needs to explicitly change the pool size. This does lose it the ability to save a couple of megabytes of RAM by reducing the pool size below its default, but it seems unlikely that that is a realistic concern these days (or indeed that anyone is actively debugging AGP drivers' DMA usage any more). Getting rid of dma_debug_resize_entries() will make room for further streamlining in the dma-debug code itself. Removing the call reveals quite a lot of cruft which has been useless for nearly a decade since commit 19c1a6f5764d ("x86 gart: reimplement IOMMU_LEAK feature by using DMA_API_DEBUG"), including the entire 'iommu=leak' parameter, which controlled nothing except whether dma_debug_resize_entries() was called or not. Signed-off-by: Robin Murphy Acked-by: Thomas Gleixner Tested-by: Qian Cai Signed-off-by: Christoph Hellwig --- Documentation/x86/x86_64/boot-options.txt | 5 +---- arch/x86/kernel/amd_gart_64.c | 23 ----------------------- 2 files changed, 1 insertion(+), 27 deletions(-) diff --git a/Documentation/x86/x86_64/boot-options.txt b/Documentation/x86/x86_64/boot-options.txt index ad6d2a80cf05..abc53886655e 100644 --- a/Documentation/x86/x86_64/boot-options.txt +++ b/Documentation/x86/x86_64/boot-options.txt @@ -209,7 +209,7 @@ IOMMU (input/output memory management unit) mapping with memory protection, etc. Kernel boot message: "PCI-DMA: Using Calgary IOMMU" - iommu=[][,noagp][,off][,force][,noforce][,leak[=] + iommu=[][,noagp][,off][,force][,noforce] [,memaper[=]][,merge][,fullflush][,nomerge] [,noaperture][,calgary] @@ -228,9 +228,6 @@ IOMMU (input/output memory management unit) allowed Overwrite iommu off workarounds for specific chipsets. fullflush Flush IOMMU on each allocation (default). nofullflush Don't use IOMMU fullflush. - leak Turn on simple iommu leak tracing (only when - CONFIG_IOMMU_LEAK is on). Default number of leak pages - is 20. memaper[=] Allocate an own aperture over RAM with size 32MB<> 20); @@ -825,16 +812,6 @@ void __init gart_parse_options(char *p) { int arg; -#ifdef CONFIG_IOMMU_LEAK - if (!strncmp(p, "leak", 4)) { - leak_trace = 1; - p += 4; - if (*p == '=') - ++p; - if (isdigit(*p) && get_option(&p, &arg)) - iommu_leak_pages = arg; - } -#endif if (isdigit(*p) && get_option(&p, &arg)) iommu_size = arg; if (!strncmp(p, "fullflush", 9)) From 0cb0e25e421436a83ee39857923e4213b983e463 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 10 Dec 2018 14:00:32 +0000 Subject: [PATCH 41/73] dma/debug: Remove dma_debug_resize_entries() With the only caller now gone, we can clean up this part of dma-debug's exposed internals and make way to tweak the allocation behaviour. Signed-off-by: Robin Murphy Tested-by: Qian Cai Signed-off-by: Christoph Hellwig --- include/linux/dma-debug.h | 7 ------ kernel/dma/debug.c | 46 --------------------------------------- 2 files changed, 53 deletions(-) diff --git a/include/linux/dma-debug.h b/include/linux/dma-debug.h index 30213adbb6b9..46e6131a72b6 100644 --- a/include/linux/dma-debug.h +++ b/include/linux/dma-debug.h @@ -30,8 +30,6 @@ struct bus_type; extern void dma_debug_add_bus(struct bus_type *bus); -extern int dma_debug_resize_entries(u32 num_entries); - extern void debug_dma_map_single(struct device *dev, const void *addr, unsigned long len); @@ -101,11 +99,6 @@ static inline void dma_debug_add_bus(struct bus_type *bus) { } -static inline int dma_debug_resize_entries(u32 num_entries) -{ - return 0; -} - static inline void debug_dma_map_single(struct device *dev, const void *addr, unsigned long len) { diff --git a/kernel/dma/debug.c b/kernel/dma/debug.c index 912c23f4c177..36a42874b05f 100644 --- a/kernel/dma/debug.c +++ b/kernel/dma/debug.c @@ -755,52 +755,6 @@ static void dma_entry_free(struct dma_debug_entry *entry) spin_unlock_irqrestore(&free_entries_lock, flags); } -int dma_debug_resize_entries(u32 num_entries) -{ - int i, delta, ret = 0; - unsigned long flags; - struct dma_debug_entry *entry; - LIST_HEAD(tmp); - - spin_lock_irqsave(&free_entries_lock, flags); - - if (nr_total_entries < num_entries) { - delta = num_entries - nr_total_entries; - - spin_unlock_irqrestore(&free_entries_lock, flags); - - for (i = 0; i < delta; i++) { - entry = kzalloc(sizeof(*entry), GFP_KERNEL); - if (!entry) - break; - - list_add_tail(&entry->list, &tmp); - } - - spin_lock_irqsave(&free_entries_lock, flags); - - list_splice(&tmp, &free_entries); - nr_total_entries += i; - num_free_entries += i; - } else { - delta = nr_total_entries - num_entries; - - for (i = 0; i < delta && !list_empty(&free_entries); i++) { - entry = __dma_entry_alloc(); - kfree(entry); - } - - nr_total_entries -= i; - } - - if (nr_total_entries != num_entries) - ret = 1; - - spin_unlock_irqrestore(&free_entries_lock, flags); - - return ret; -} - /* * DMA-API debugging init code * From ad78dee0b630527bdfed809d1f5ed95c601886ae Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 10 Dec 2018 14:00:33 +0000 Subject: [PATCH 42/73] dma-debug: Batch dma_debug_entry allocation DMA debug entries are one of those things which aren't that useful individually - we will always want some larger quantity of them - and which we don't really need to manage the exact number of - we only care about having 'enough'. In that regard, the current behaviour of creating them one-by-one leads to a lot of unwarranted function call overhead and memory wasted on alignment padding. Now that we don't have to worry about freeing anything via dma_debug_resize_entries(), we can optimise the allocation behaviour by grabbing whole pages at once, which will save considerably on the aforementioned overheads, and probably offer a little more cache/TLB locality benefit for traversing the lists under normal operation. This should also give even less reason for an architecture-level override of the preallocation size, so make the definition unconditional - if there is still any desire to change the compile-time value for some platforms it would be better off as a Kconfig option anyway. Since freeing a whole page of entries at once becomes enough of a challenge that it's not really worth complicating dma_debug_init(), we may as well tweak the preallocation behaviour such that as long as we manage to allocate *some* pages, we can leave debugging enabled on a best-effort basis rather than otherwise wasting them. Signed-off-by: Robin Murphy Tested-by: Qian Cai Signed-off-by: Christoph Hellwig --- Documentation/DMA-API.txt | 4 +++- kernel/dma/debug.c | 50 ++++++++++++++++----------------------- 2 files changed, 24 insertions(+), 30 deletions(-) diff --git a/Documentation/DMA-API.txt b/Documentation/DMA-API.txt index 7a7d8a415ce8..016eb6909b8a 100644 --- a/Documentation/DMA-API.txt +++ b/Documentation/DMA-API.txt @@ -747,7 +747,9 @@ driver afterwards. This filter can be disabled or changed later using debugfs. When the code disables itself at runtime this is most likely because it ran out of dma_debug_entries and was unable to allocate more on-demand. 65536 entries are preallocated at boot - if this is too low for you boot with -'dma_debug_entries=' to overwrite the default. The +'dma_debug_entries=' to overwrite the default. Note +that the code allocates entries in batches, so the exact number of +preallocated entries may be greater than the actual number requested. The code will print to the kernel log each time it has dynamically allocated as many entries as were initially preallocated. This is to indicate that a larger preallocation size may be appropriate, or if it happens continually diff --git a/kernel/dma/debug.c b/kernel/dma/debug.c index 36a42874b05f..20ab0f6c1b70 100644 --- a/kernel/dma/debug.c +++ b/kernel/dma/debug.c @@ -43,12 +43,9 @@ #define HASH_FN_SHIFT 13 #define HASH_FN_MASK (HASH_SIZE - 1) -/* allow architectures to override this if absolutely required */ -#ifndef PREALLOC_DMA_DEBUG_ENTRIES #define PREALLOC_DMA_DEBUG_ENTRIES (1 << 16) -#endif /* If the pool runs out, add this many new entries at once */ -#define DMA_DEBUG_DYNAMIC_ENTRIES 256 +#define DMA_DEBUG_DYNAMIC_ENTRIES (PAGE_SIZE / sizeof(struct dma_debug_entry)) enum { dma_debug_single, @@ -648,32 +645,22 @@ static void add_dma_entry(struct dma_debug_entry *entry) */ } -static int dma_debug_create_entries(u32 num_entries, gfp_t gfp) +static int dma_debug_create_entries(gfp_t gfp) { - struct dma_debug_entry *entry, *next_entry; + struct dma_debug_entry *entry; int i; - for (i = 0; i < num_entries; ++i) { - entry = kzalloc(sizeof(*entry), gfp); - if (!entry) - goto out_err; + entry = (void *)get_zeroed_page(gfp); + if (!entry) + return -ENOMEM; - list_add_tail(&entry->list, &free_entries); - } + for (i = 0; i < DMA_DEBUG_DYNAMIC_ENTRIES; i++) + list_add_tail(&entry[i].list, &free_entries); - num_free_entries += num_entries; - nr_total_entries += num_entries; + num_free_entries += DMA_DEBUG_DYNAMIC_ENTRIES; + nr_total_entries += DMA_DEBUG_DYNAMIC_ENTRIES; return 0; - -out_err: - - list_for_each_entry_safe(entry, next_entry, &free_entries, list) { - list_del(&entry->list); - kfree(entry); - } - - return -ENOMEM; } static struct dma_debug_entry *__dma_entry_alloc(void) @@ -715,8 +702,7 @@ static struct dma_debug_entry *dma_entry_alloc(void) spin_lock_irqsave(&free_entries_lock, flags); if (num_free_entries == 0) { - if (dma_debug_create_entries(DMA_DEBUG_DYNAMIC_ENTRIES, - GFP_ATOMIC)) { + if (dma_debug_create_entries(GFP_ATOMIC)) { global_disable = true; spin_unlock_irqrestore(&free_entries_lock, flags); pr_err("debugging out of memory - disabling\n"); @@ -987,7 +973,7 @@ void dma_debug_add_bus(struct bus_type *bus) static int dma_debug_init(void) { - int i; + int i, nr_pages; /* Do not use dma_debug_initialized here, since we really want to be * called to set dma_debug_initialized @@ -1007,15 +993,21 @@ static int dma_debug_init(void) return 0; } - if (dma_debug_create_entries(nr_prealloc_entries, GFP_KERNEL) != 0) { + nr_pages = DIV_ROUND_UP(nr_prealloc_entries, DMA_DEBUG_DYNAMIC_ENTRIES); + for (i = 0; i < nr_pages; ++i) + dma_debug_create_entries(GFP_KERNEL); + if (num_free_entries >= nr_prealloc_entries) { + pr_info("preallocated %d debug entries\n", nr_total_entries); + } else if (num_free_entries > 0) { + pr_warn("%d debug entries requested but only %d allocated\n", + nr_prealloc_entries, nr_total_entries); + } else { pr_err("debugging out of memory error - disabled\n"); global_disable = true; return 0; } - min_free_entries = num_free_entries; - pr_info("preallocated %d debug entries\n", nr_total_entries); dma_debug_initialized = true; From 7227b202623986505c9dd6d2eadad977cd43625e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 3 Dec 2018 12:28:35 +0100 Subject: [PATCH 43/73] sparc: remove not needed sbus_dma_ops methods No need to BUG_ON() on the cache maintainance ops - they are no-ops by default, and there is nothing in the DMA API contract that prohibits calling them on sbus devices (even if such drivers are unlikely to ever appear). Similarly a dma_supported method that always returns 0 is rather pointless. The only thing that indicates is that no one ever calls the method on sbus devices. Signed-off-by: Christoph Hellwig Acked-by: David S. Miller --- arch/sparc/kernel/ioport.c | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/arch/sparc/kernel/ioport.c b/arch/sparc/kernel/ioport.c index 6799c93c9f27..4b2167a0ec0b 100644 --- a/arch/sparc/kernel/ioport.c +++ b/arch/sparc/kernel/ioport.c @@ -391,23 +391,6 @@ static void sbus_unmap_sg(struct device *dev, struct scatterlist *sg, int n, mmu_release_scsi_sgl(dev, sg, n); } -static void sbus_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, - int n, enum dma_data_direction dir) -{ - BUG(); -} - -static void sbus_sync_sg_for_device(struct device *dev, struct scatterlist *sg, - int n, enum dma_data_direction dir) -{ - BUG(); -} - -static int sbus_dma_supported(struct device *dev, u64 mask) -{ - return 0; -} - static const struct dma_map_ops sbus_dma_ops = { .alloc = sbus_alloc_coherent, .free = sbus_free_coherent, @@ -415,9 +398,6 @@ static const struct dma_map_ops sbus_dma_ops = { .unmap_page = sbus_unmap_page, .map_sg = sbus_map_sg, .unmap_sg = sbus_unmap_sg, - .sync_sg_for_cpu = sbus_sync_sg_for_cpu, - .sync_sg_for_device = sbus_sync_sg_for_device, - .dma_supported = sbus_dma_supported, }; static int __init sparc_register_ioport(void) From 53b7670e5735ba1c662230377d764799aaf57300 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 3 Dec 2018 14:02:26 +0100 Subject: [PATCH 44/73] sparc: factor the dma coherent mapping into helper Factor the code to remap memory returned from the DMA coherent allocator into two helpers that can be shared by the IOMMU and direct mapping code. Signed-off-by: Christoph Hellwig Acked-by: David S. Miller Acked-by: Sam Ravnborg --- arch/sparc/kernel/ioport.c | 151 ++++++++++++++++--------------------- 1 file changed, 67 insertions(+), 84 deletions(-) diff --git a/arch/sparc/kernel/ioport.c b/arch/sparc/kernel/ioport.c index 4b2167a0ec0b..ef3c61aec32a 100644 --- a/arch/sparc/kernel/ioport.c +++ b/arch/sparc/kernel/ioport.c @@ -247,6 +247,53 @@ static void _sparc_free_io(struct resource *res) release_resource(res); } +static unsigned long sparc_dma_alloc_resource(struct device *dev, size_t len) +{ + struct resource *res; + + res = kzalloc(sizeof(*res), GFP_KERNEL); + if (!res) + return 0; + res->name = dev->of_node->name; + + if (allocate_resource(&_sparc_dvma, res, len, _sparc_dvma.start, + _sparc_dvma.end, PAGE_SIZE, NULL, NULL) != 0) { + printk("%s: cannot occupy 0x%zx", __func__, len); + kfree(res); + return 0; + } + + return res->start; +} + +static bool sparc_dma_free_resource(void *cpu_addr, size_t size) +{ + unsigned long addr = (unsigned long)cpu_addr; + struct resource *res; + + res = lookup_resource(&_sparc_dvma, addr); + if (!res) { + printk("%s: cannot free %p\n", __func__, cpu_addr); + return false; + } + + if ((addr & (PAGE_SIZE - 1)) != 0) { + printk("%s: unaligned va %p\n", __func__, cpu_addr); + return false; + } + + size = PAGE_ALIGN(size); + if (resource_size(res) != size) { + printk("%s: region 0x%lx asked 0x%zx\n", + __func__, (long)resource_size(res), size); + return false; + } + + release_resource(res); + kfree(res); + return true; +} + #ifdef CONFIG_SBUS void sbus_set_sbus64(struct device *dev, int x) @@ -264,10 +311,8 @@ static void *sbus_alloc_coherent(struct device *dev, size_t len, dma_addr_t *dma_addrp, gfp_t gfp, unsigned long attrs) { - struct platform_device *op = to_platform_device(dev); unsigned long len_total = PAGE_ALIGN(len); - unsigned long va; - struct resource *res; + unsigned long va, addr; int order; /* XXX why are some lengths signed, others unsigned? */ @@ -284,32 +329,23 @@ static void *sbus_alloc_coherent(struct device *dev, size_t len, if (va == 0) goto err_nopages; - if ((res = kzalloc(sizeof(struct resource), GFP_KERNEL)) == NULL) + addr = sparc_dma_alloc_resource(dev, len_total); + if (!addr) goto err_nomem; - if (allocate_resource(&_sparc_dvma, res, len_total, - _sparc_dvma.start, _sparc_dvma.end, PAGE_SIZE, NULL, NULL) != 0) { - printk("sbus_alloc_consistent: cannot occupy 0x%lx", len_total); - goto err_nova; - } - // XXX The sbus_map_dma_area does this for us below, see comments. // srmmu_mapiorange(0, virt_to_phys(va), res->start, len_total); /* * XXX That's where sdev would be used. Currently we load * all iommu tables with the same translations. */ - if (sbus_map_dma_area(dev, dma_addrp, va, res->start, len_total) != 0) + if (sbus_map_dma_area(dev, dma_addrp, va, addr, len_total) != 0) goto err_noiommu; - res->name = op->dev.of_node->name; - - return (void *)(unsigned long)res->start; + return (void *)addr; err_noiommu: - release_resource(res); -err_nova: - kfree(res); + sparc_dma_free_resource((void *)addr, len_total); err_nomem: free_pages(va, order); err_nopages: @@ -319,29 +355,11 @@ err_nopages: static void sbus_free_coherent(struct device *dev, size_t n, void *p, dma_addr_t ba, unsigned long attrs) { - struct resource *res; struct page *pgv; - if ((res = lookup_resource(&_sparc_dvma, - (unsigned long)p)) == NULL) { - printk("sbus_free_consistent: cannot free %p\n", p); - return; - } - - if (((unsigned long)p & (PAGE_SIZE-1)) != 0) { - printk("sbus_free_consistent: unaligned va %p\n", p); - return; - } - n = PAGE_ALIGN(n); - if (resource_size(res) != n) { - printk("sbus_free_consistent: region 0x%lx asked 0x%zx\n", - (long)resource_size(res), n); + if (!sparc_dma_free_resource(p, n)) return; - } - - release_resource(res); - kfree(res); pgv = virt_to_page(p); sbus_unmap_dma_area(dev, ba, n); @@ -418,45 +436,30 @@ arch_initcall(sparc_register_ioport); void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs) { - unsigned long len_total = PAGE_ALIGN(size); + unsigned long addr; void *va; - struct resource *res; - int order; - if (size == 0) { + if (!size || size > 256 * 1024) /* __get_free_pages() limit */ return NULL; - } - if (size > 256*1024) { /* __get_free_pages() limit */ + + size = PAGE_ALIGN(size); + va = (void *) __get_free_pages(gfp, get_order(size)); + if (!va) { + printk("%s: no %zd pages\n", __func__, size >> PAGE_SHIFT); return NULL; } - order = get_order(len_total); - va = (void *) __get_free_pages(gfp, order); - if (va == NULL) { - printk("%s: no %ld pages\n", __func__, len_total>>PAGE_SHIFT); - goto err_nopages; - } - - if ((res = kzalloc(sizeof(struct resource), GFP_KERNEL)) == NULL) { - printk("%s: no core\n", __func__); + addr = sparc_dma_alloc_resource(dev, size); + if (!addr) goto err_nomem; - } - if (allocate_resource(&_sparc_dvma, res, len_total, - _sparc_dvma.start, _sparc_dvma.end, PAGE_SIZE, NULL, NULL) != 0) { - printk("%s: cannot occupy 0x%lx", __func__, len_total); - goto err_nova; - } - srmmu_mapiorange(0, virt_to_phys(va), res->start, len_total); + srmmu_mapiorange(0, virt_to_phys(va), addr, size); *dma_handle = virt_to_phys(va); - return (void *) res->start; + return (void *)addr; -err_nova: - kfree(res); err_nomem: - free_pages((unsigned long)va, order); -err_nopages: + free_pages((unsigned long)va, get_order(size)); return NULL; } @@ -471,31 +474,11 @@ err_nopages: void arch_dma_free(struct device *dev, size_t size, void *cpu_addr, dma_addr_t dma_addr, unsigned long attrs) { - struct resource *res; - - if ((res = lookup_resource(&_sparc_dvma, - (unsigned long)cpu_addr)) == NULL) { - printk("%s: cannot free %p\n", __func__, cpu_addr); + if (!sparc_dma_free_resource(cpu_addr, PAGE_ALIGN(size))) return; - } - - if (((unsigned long)cpu_addr & (PAGE_SIZE-1)) != 0) { - printk("%s: unaligned va %p\n", __func__, cpu_addr); - return; - } - - size = PAGE_ALIGN(size); - if (resource_size(res) != size) { - printk("%s: region 0x%lx asked 0x%zx\n", __func__, - (long)resource_size(res), size); - return; - } dma_make_coherent(dma_addr, size); srmmu_unmapiorange((unsigned long)cpu_addr, size); - - release_resource(res); - kfree(res); free_pages((unsigned long)phys_to_virt(dma_addr), get_order(size)); } From ce65d36f3ea79368170ca58f2efd28cdba3d70e9 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 3 Dec 2018 14:04:32 +0100 Subject: [PATCH 45/73] sparc: remove the sparc32_dma_ops indirection There is no good reason to have a double indirection for the sparc32 dma ops, so remove the sparc32_dma_ops and define separate dma_map_ops instance for the different IOMMU types. Signed-off-by: Christoph Hellwig Acked-by: David S. Miller --- arch/sparc/include/asm/dma.h | 48 +----------- arch/sparc/kernel/ioport.c | 124 +------------------------------ arch/sparc/mm/io-unit.c | 65 ++++++++++++----- arch/sparc/mm/iommu.c | 137 ++++++++++++++++++++++------------- 4 files changed, 138 insertions(+), 236 deletions(-) diff --git a/arch/sparc/include/asm/dma.h b/arch/sparc/include/asm/dma.h index a1d7c86917c6..462e7c794a09 100644 --- a/arch/sparc/include/asm/dma.h +++ b/arch/sparc/include/asm/dma.h @@ -91,54 +91,10 @@ extern int isa_dma_bridge_buggy; #endif #ifdef CONFIG_SPARC32 - -/* Routines for data transfer buffers. */ struct device; -struct scatterlist; - -struct sparc32_dma_ops { - __u32 (*get_scsi_one)(struct device *, char *, unsigned long); - void (*get_scsi_sgl)(struct device *, struct scatterlist *, int); - void (*release_scsi_one)(struct device *, __u32, unsigned long); - void (*release_scsi_sgl)(struct device *, struct scatterlist *,int); -#ifdef CONFIG_SBUS - int (*map_dma_area)(struct device *, dma_addr_t *, unsigned long, unsigned long, int); - void (*unmap_dma_area)(struct device *, unsigned long, int); -#endif -}; -extern const struct sparc32_dma_ops *sparc32_dma_ops; - -#define mmu_get_scsi_one(dev,vaddr,len) \ - sparc32_dma_ops->get_scsi_one(dev, vaddr, len) -#define mmu_get_scsi_sgl(dev,sg,sz) \ - sparc32_dma_ops->get_scsi_sgl(dev, sg, sz) -#define mmu_release_scsi_one(dev,vaddr,len) \ - sparc32_dma_ops->release_scsi_one(dev, vaddr,len) -#define mmu_release_scsi_sgl(dev,sg,sz) \ - sparc32_dma_ops->release_scsi_sgl(dev, sg, sz) - -#ifdef CONFIG_SBUS -/* - * mmu_map/unmap are provided by iommu/iounit; Invalid to call on IIep. - * - * The mmu_map_dma_area establishes two mappings in one go. - * These mappings point to pages normally mapped at 'va' (linear address). - * First mapping is for CPU visible address at 'a', uncached. - * This is an alias, but it works because it is an uncached mapping. - * Second mapping is for device visible address, or "bus" address. - * The bus address is returned at '*pba'. - * - * These functions seem distinct, but are hard to split. - * On sun4m, page attributes depend on the CPU type, so we have to - * know if we are mapping RAM or I/O, so it has to be an additional argument - * to a separate mapping function for CPU visible mappings. - */ -#define sbus_map_dma_area(dev,pba,va,a,len) \ - sparc32_dma_ops->map_dma_area(dev, pba, va, a, len) -#define sbus_unmap_dma_area(dev,ba,len) \ - sparc32_dma_ops->unmap_dma_area(dev, ba, len) -#endif /* CONFIG_SBUS */ +unsigned long sparc_dma_alloc_resource(struct device *dev, size_t len); +bool sparc_dma_free_resource(void *cpu_addr, size_t size); #endif #endif /* !(_ASM_SPARC_DMA_H) */ diff --git a/arch/sparc/kernel/ioport.c b/arch/sparc/kernel/ioport.c index ef3c61aec32a..51c128d80193 100644 --- a/arch/sparc/kernel/ioport.c +++ b/arch/sparc/kernel/ioport.c @@ -52,8 +52,6 @@ #include #include -const struct sparc32_dma_ops *sparc32_dma_ops; - /* This function must make sure that caches and memory are coherent after DMA * On LEON systems without cache snooping it flushes the entire D-CACHE. */ @@ -247,7 +245,7 @@ static void _sparc_free_io(struct resource *res) release_resource(res); } -static unsigned long sparc_dma_alloc_resource(struct device *dev, size_t len) +unsigned long sparc_dma_alloc_resource(struct device *dev, size_t len) { struct resource *res; @@ -266,7 +264,7 @@ static unsigned long sparc_dma_alloc_resource(struct device *dev, size_t len) return res->start; } -static bool sparc_dma_free_resource(void *cpu_addr, size_t size) +bool sparc_dma_free_resource(void *cpu_addr, size_t size) { unsigned long addr = (unsigned long)cpu_addr; struct resource *res; @@ -302,122 +300,6 @@ void sbus_set_sbus64(struct device *dev, int x) } EXPORT_SYMBOL(sbus_set_sbus64); -/* - * Allocate a chunk of memory suitable for DMA. - * Typically devices use them for control blocks. - * CPU may access them without any explicit flushing. - */ -static void *sbus_alloc_coherent(struct device *dev, size_t len, - dma_addr_t *dma_addrp, gfp_t gfp, - unsigned long attrs) -{ - unsigned long len_total = PAGE_ALIGN(len); - unsigned long va, addr; - int order; - - /* XXX why are some lengths signed, others unsigned? */ - if (len <= 0) { - return NULL; - } - /* XXX So what is maxphys for us and how do drivers know it? */ - if (len > 256*1024) { /* __get_free_pages() limit */ - return NULL; - } - - order = get_order(len_total); - va = __get_free_pages(gfp, order); - if (va == 0) - goto err_nopages; - - addr = sparc_dma_alloc_resource(dev, len_total); - if (!addr) - goto err_nomem; - - // XXX The sbus_map_dma_area does this for us below, see comments. - // srmmu_mapiorange(0, virt_to_phys(va), res->start, len_total); - /* - * XXX That's where sdev would be used. Currently we load - * all iommu tables with the same translations. - */ - if (sbus_map_dma_area(dev, dma_addrp, va, addr, len_total) != 0) - goto err_noiommu; - - return (void *)addr; - -err_noiommu: - sparc_dma_free_resource((void *)addr, len_total); -err_nomem: - free_pages(va, order); -err_nopages: - return NULL; -} - -static void sbus_free_coherent(struct device *dev, size_t n, void *p, - dma_addr_t ba, unsigned long attrs) -{ - struct page *pgv; - - n = PAGE_ALIGN(n); - if (!sparc_dma_free_resource(p, n)) - return; - - pgv = virt_to_page(p); - sbus_unmap_dma_area(dev, ba, n); - - __free_pages(pgv, get_order(n)); -} - -/* - * Map a chunk of memory so that devices can see it. - * CPU view of this memory may be inconsistent with - * a device view and explicit flushing is necessary. - */ -static dma_addr_t sbus_map_page(struct device *dev, struct page *page, - unsigned long offset, size_t len, - enum dma_data_direction dir, - unsigned long attrs) -{ - void *va = page_address(page) + offset; - - /* XXX why are some lengths signed, others unsigned? */ - if (len <= 0) { - return 0; - } - /* XXX So what is maxphys for us and how do drivers know it? */ - if (len > 256*1024) { /* __get_free_pages() limit */ - return 0; - } - return mmu_get_scsi_one(dev, va, len); -} - -static void sbus_unmap_page(struct device *dev, dma_addr_t ba, size_t n, - enum dma_data_direction dir, unsigned long attrs) -{ - mmu_release_scsi_one(dev, ba, n); -} - -static int sbus_map_sg(struct device *dev, struct scatterlist *sg, int n, - enum dma_data_direction dir, unsigned long attrs) -{ - mmu_get_scsi_sgl(dev, sg, n); - return n; -} - -static void sbus_unmap_sg(struct device *dev, struct scatterlist *sg, int n, - enum dma_data_direction dir, unsigned long attrs) -{ - mmu_release_scsi_sgl(dev, sg, n); -} - -static const struct dma_map_ops sbus_dma_ops = { - .alloc = sbus_alloc_coherent, - .free = sbus_free_coherent, - .map_page = sbus_map_page, - .unmap_page = sbus_unmap_page, - .map_sg = sbus_map_sg, - .unmap_sg = sbus_unmap_sg, -}; - static int __init sparc_register_ioport(void) { register_proc_sparc_ioport(); @@ -491,7 +373,7 @@ void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr, dma_make_coherent(paddr, PAGE_ALIGN(size)); } -const struct dma_map_ops *dma_ops = &sbus_dma_ops; +const struct dma_map_ops *dma_ops; EXPORT_SYMBOL(dma_ops); #ifdef CONFIG_PROC_FS diff --git a/arch/sparc/mm/io-unit.c b/arch/sparc/mm/io-unit.c index c8cb27d3ea75..2088d292c6e5 100644 --- a/arch/sparc/mm/io-unit.c +++ b/arch/sparc/mm/io-unit.c @@ -12,7 +12,7 @@ #include #include /* pte_offset_map => kmap_atomic */ #include -#include +#include #include #include @@ -140,18 +140,26 @@ nexti: scan = find_next_zero_bit(iounit->bmap, limit, scan); return vaddr; } -static __u32 iounit_get_scsi_one(struct device *dev, char *vaddr, unsigned long len) +static dma_addr_t iounit_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t len, enum dma_data_direction dir, + unsigned long attrs) { + void *vaddr = page_address(page) + offset; struct iounit_struct *iounit = dev->archdata.iommu; unsigned long ret, flags; + /* XXX So what is maxphys for us and how do drivers know it? */ + if (!len || len > 256 * 1024) + return DMA_MAPPING_ERROR; + spin_lock_irqsave(&iounit->lock, flags); ret = iounit_get_area(iounit, (unsigned long)vaddr, len); spin_unlock_irqrestore(&iounit->lock, flags); return ret; } -static void iounit_get_scsi_sgl(struct device *dev, struct scatterlist *sg, int sz) +static int iounit_map_sg(struct device *dev, struct scatterlist *sg, int sz, + enum dma_data_direction dir, unsigned long attrs) { struct iounit_struct *iounit = dev->archdata.iommu; unsigned long flags; @@ -165,9 +173,11 @@ static void iounit_get_scsi_sgl(struct device *dev, struct scatterlist *sg, int sg = sg_next(sg); } spin_unlock_irqrestore(&iounit->lock, flags); + return sz; } -static void iounit_release_scsi_one(struct device *dev, __u32 vaddr, unsigned long len) +static void iounit_unmap_page(struct device *dev, dma_addr_t vaddr, size_t len, + enum dma_data_direction dir, unsigned long attrs) { struct iounit_struct *iounit = dev->archdata.iommu; unsigned long flags; @@ -181,7 +191,8 @@ static void iounit_release_scsi_one(struct device *dev, __u32 vaddr, unsigned lo spin_unlock_irqrestore(&iounit->lock, flags); } -static void iounit_release_scsi_sgl(struct device *dev, struct scatterlist *sg, int sz) +static void iounit_unmap_sg(struct device *dev, struct scatterlist *sg, int sz, + enum dma_data_direction dir, unsigned long attrs) { struct iounit_struct *iounit = dev->archdata.iommu; unsigned long flags; @@ -201,14 +212,27 @@ static void iounit_release_scsi_sgl(struct device *dev, struct scatterlist *sg, } #ifdef CONFIG_SBUS -static int iounit_map_dma_area(struct device *dev, dma_addr_t *pba, unsigned long va, unsigned long addr, int len) +static void *iounit_alloc(struct device *dev, size_t len, + dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs) { struct iounit_struct *iounit = dev->archdata.iommu; - unsigned long page, end; + unsigned long va, addr, page, end, ret; pgprot_t dvma_prot; iopte_t __iomem *iopte; - *pba = addr; + /* XXX So what is maxphys for us and how do drivers know it? */ + if (!len || len > 256 * 1024) + return NULL; + + len = PAGE_ALIGN(len); + va = __get_free_pages(gfp, get_order(len)); + if (!va) + return NULL; + + addr = ret = sparc_dma_alloc_resource(dev, len); + if (!addr) + goto out_free_pages; + *dma_handle = addr; dvma_prot = __pgprot(SRMMU_CACHE | SRMMU_ET_PTE | SRMMU_PRIV); end = PAGE_ALIGN((addr + len)); @@ -237,27 +261,32 @@ static int iounit_map_dma_area(struct device *dev, dma_addr_t *pba, unsigned lon flush_cache_all(); flush_tlb_all(); - return 0; + return (void *)ret; + +out_free_pages: + free_pages(va, get_order(len)); + return NULL; } -static void iounit_unmap_dma_area(struct device *dev, unsigned long addr, int len) +static void iounit_free(struct device *dev, size_t size, void *cpu_addr, + dma_addr_t dma_addr, unsigned long attrs) { /* XXX Somebody please fill this in */ } #endif -static const struct sparc32_dma_ops iounit_dma_ops = { - .get_scsi_one = iounit_get_scsi_one, - .get_scsi_sgl = iounit_get_scsi_sgl, - .release_scsi_one = iounit_release_scsi_one, - .release_scsi_sgl = iounit_release_scsi_sgl, +static const struct dma_map_ops iounit_dma_ops = { #ifdef CONFIG_SBUS - .map_dma_area = iounit_map_dma_area, - .unmap_dma_area = iounit_unmap_dma_area, + .alloc = iounit_alloc, + .free = iounit_free, #endif + .map_page = iounit_map_page, + .unmap_page = iounit_unmap_page, + .map_sg = iounit_map_sg, + .unmap_sg = iounit_unmap_sg, }; void __init ld_mmu_iounit(void) { - sparc32_dma_ops = &iounit_dma_ops; + dma_ops = &iounit_dma_ops; } diff --git a/arch/sparc/mm/iommu.c b/arch/sparc/mm/iommu.c index 2c5f8a648f8c..3599485717e7 100644 --- a/arch/sparc/mm/iommu.c +++ b/arch/sparc/mm/iommu.c @@ -13,7 +13,7 @@ #include #include #include /* pte_offset_map => kmap_atomic */ -#include +#include #include #include @@ -205,38 +205,44 @@ static u32 iommu_get_one(struct device *dev, struct page *page, int npages) return busa0; } -static u32 iommu_get_scsi_one(struct device *dev, char *vaddr, unsigned int len) +static dma_addr_t __sbus_iommu_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t len) { - unsigned long off; - int npages; - struct page *page; - u32 busa; - - off = (unsigned long)vaddr & ~PAGE_MASK; - npages = (off + len + PAGE_SIZE-1) >> PAGE_SHIFT; - page = virt_to_page((unsigned long)vaddr & PAGE_MASK); - busa = iommu_get_one(dev, page, npages); - return busa + off; + void *vaddr = page_address(page) + offset; + unsigned long off = (unsigned long)vaddr & ~PAGE_MASK; + unsigned long npages = (off + len + PAGE_SIZE - 1) >> PAGE_SHIFT; + + /* XXX So what is maxphys for us and how do drivers know it? */ + if (!len || len > 256 * 1024) + return DMA_MAPPING_ERROR; + return iommu_get_one(dev, virt_to_page(vaddr), npages) + off; } -static __u32 iommu_get_scsi_one_gflush(struct device *dev, char *vaddr, unsigned long len) +static dma_addr_t sbus_iommu_map_page_gflush(struct device *dev, + struct page *page, unsigned long offset, size_t len, + enum dma_data_direction dir, unsigned long attrs) { flush_page_for_dma(0); - return iommu_get_scsi_one(dev, vaddr, len); + return __sbus_iommu_map_page(dev, page, offset, len); } -static __u32 iommu_get_scsi_one_pflush(struct device *dev, char *vaddr, unsigned long len) +static dma_addr_t sbus_iommu_map_page_pflush(struct device *dev, + struct page *page, unsigned long offset, size_t len, + enum dma_data_direction dir, unsigned long attrs) { - unsigned long page = ((unsigned long) vaddr) & PAGE_MASK; + void *vaddr = page_address(page) + offset; + unsigned long p = ((unsigned long)vaddr) & PAGE_MASK; - while(page < ((unsigned long)(vaddr + len))) { - flush_page_for_dma(page); - page += PAGE_SIZE; + while (p < (unsigned long)vaddr + len) { + flush_page_for_dma(p); + p += PAGE_SIZE; } - return iommu_get_scsi_one(dev, vaddr, len); + + return __sbus_iommu_map_page(dev, page, offset, len); } -static void iommu_get_scsi_sgl_gflush(struct device *dev, struct scatterlist *sg, int sz) +static int sbus_iommu_map_sg_gflush(struct device *dev, struct scatterlist *sg, + int sz, enum dma_data_direction dir, unsigned long attrs) { int n; @@ -248,9 +254,12 @@ static void iommu_get_scsi_sgl_gflush(struct device *dev, struct scatterlist *sg sg->dma_length = sg->length; sg = sg_next(sg); } + + return sz; } -static void iommu_get_scsi_sgl_pflush(struct device *dev, struct scatterlist *sg, int sz) +static int sbus_iommu_map_sg_pflush(struct device *dev, struct scatterlist *sg, + int sz, enum dma_data_direction dir, unsigned long attrs) { unsigned long page, oldpage = 0; int n, i; @@ -279,6 +288,8 @@ static void iommu_get_scsi_sgl_pflush(struct device *dev, struct scatterlist *sg sg->dma_length = sg->length; sg = sg_next(sg); } + + return sz; } static void iommu_release_one(struct device *dev, u32 busa, int npages) @@ -297,23 +308,23 @@ static void iommu_release_one(struct device *dev, u32 busa, int npages) bit_map_clear(&iommu->usemap, ioptex, npages); } -static void iommu_release_scsi_one(struct device *dev, __u32 vaddr, unsigned long len) +static void sbus_iommu_unmap_page(struct device *dev, dma_addr_t dma_addr, + size_t len, enum dma_data_direction dir, unsigned long attrs) { - unsigned long off; + unsigned long off = dma_addr & ~PAGE_MASK; int npages; - off = vaddr & ~PAGE_MASK; npages = (off + len + PAGE_SIZE-1) >> PAGE_SHIFT; - iommu_release_one(dev, vaddr & PAGE_MASK, npages); + iommu_release_one(dev, dma_addr & PAGE_MASK, npages); } -static void iommu_release_scsi_sgl(struct device *dev, struct scatterlist *sg, int sz) +static void sbus_iommu_unmap_sg(struct device *dev, struct scatterlist *sg, + int sz, enum dma_data_direction dir, unsigned long attrs) { int n; while(sz != 0) { --sz; - n = (sg->length + sg->offset + PAGE_SIZE-1) >> PAGE_SHIFT; iommu_release_one(dev, sg->dma_address & PAGE_MASK, n); sg->dma_address = 0x21212121; @@ -322,15 +333,28 @@ static void iommu_release_scsi_sgl(struct device *dev, struct scatterlist *sg, i } #ifdef CONFIG_SBUS -static int iommu_map_dma_area(struct device *dev, dma_addr_t *pba, unsigned long va, - unsigned long addr, int len) +static void *sbus_iommu_alloc(struct device *dev, size_t len, + dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs) { struct iommu_struct *iommu = dev->archdata.iommu; - unsigned long page, end; + unsigned long va, addr, page, end, ret; iopte_t *iopte = iommu->page_table; iopte_t *first; int ioptex; + /* XXX So what is maxphys for us and how do drivers know it? */ + if (!len || len > 256 * 1024) + return NULL; + + len = PAGE_ALIGN(len); + va = __get_free_pages(gfp, get_order(len)); + if (va == 0) + return NULL; + + addr = ret = sparc_dma_alloc_resource(dev, len); + if (!addr) + goto out_free_pages; + BUG_ON((va & ~PAGE_MASK) != 0); BUG_ON((addr & ~PAGE_MASK) != 0); BUG_ON((len & ~PAGE_MASK) != 0); @@ -385,16 +409,25 @@ static int iommu_map_dma_area(struct device *dev, dma_addr_t *pba, unsigned long flush_tlb_all(); iommu_invalidate(iommu->regs); - *pba = iommu->start + (ioptex << PAGE_SHIFT); - return 0; + *dma_handle = iommu->start + (ioptex << PAGE_SHIFT); + return (void *)ret; + +out_free_pages: + free_pages(va, get_order(len)); + return NULL; } -static void iommu_unmap_dma_area(struct device *dev, unsigned long busa, int len) +static void sbus_iommu_free(struct device *dev, size_t len, void *cpu_addr, + dma_addr_t busa, unsigned long attrs) { struct iommu_struct *iommu = dev->archdata.iommu; iopte_t *iopte = iommu->page_table; - unsigned long end; + struct page *page = virt_to_page(cpu_addr); int ioptex = (busa - iommu->start) >> PAGE_SHIFT; + unsigned long end; + + if (!sparc_dma_free_resource(cpu_addr, len)) + return; BUG_ON((busa & ~PAGE_MASK) != 0); BUG_ON((len & ~PAGE_MASK) != 0); @@ -408,38 +441,40 @@ static void iommu_unmap_dma_area(struct device *dev, unsigned long busa, int len flush_tlb_all(); iommu_invalidate(iommu->regs); bit_map_clear(&iommu->usemap, ioptex, len >> PAGE_SHIFT); + + __free_pages(page, get_order(len)); } #endif -static const struct sparc32_dma_ops iommu_dma_gflush_ops = { - .get_scsi_one = iommu_get_scsi_one_gflush, - .get_scsi_sgl = iommu_get_scsi_sgl_gflush, - .release_scsi_one = iommu_release_scsi_one, - .release_scsi_sgl = iommu_release_scsi_sgl, +static const struct dma_map_ops sbus_iommu_dma_gflush_ops = { #ifdef CONFIG_SBUS - .map_dma_area = iommu_map_dma_area, - .unmap_dma_area = iommu_unmap_dma_area, + .alloc = sbus_iommu_alloc, + .free = sbus_iommu_free, #endif + .map_page = sbus_iommu_map_page_gflush, + .unmap_page = sbus_iommu_unmap_page, + .map_sg = sbus_iommu_map_sg_gflush, + .unmap_sg = sbus_iommu_unmap_sg, }; -static const struct sparc32_dma_ops iommu_dma_pflush_ops = { - .get_scsi_one = iommu_get_scsi_one_pflush, - .get_scsi_sgl = iommu_get_scsi_sgl_pflush, - .release_scsi_one = iommu_release_scsi_one, - .release_scsi_sgl = iommu_release_scsi_sgl, +static const struct dma_map_ops sbus_iommu_dma_pflush_ops = { #ifdef CONFIG_SBUS - .map_dma_area = iommu_map_dma_area, - .unmap_dma_area = iommu_unmap_dma_area, + .alloc = sbus_iommu_alloc, + .free = sbus_iommu_free, #endif + .map_page = sbus_iommu_map_page_pflush, + .unmap_page = sbus_iommu_unmap_page, + .map_sg = sbus_iommu_map_sg_pflush, + .unmap_sg = sbus_iommu_unmap_sg, }; void __init ld_mmu_iommu(void) { if (flush_page_for_dma_global) { /* flush_page_for_dma flushes everything, no matter of what page is it */ - sparc32_dma_ops = &iommu_dma_gflush_ops; + dma_ops = &sbus_iommu_dma_gflush_ops; } else { - sparc32_dma_ops = &iommu_dma_pflush_ops; + dma_ops = &sbus_iommu_dma_pflush_ops; } if (viking_mxcc_present || srmmu_modtype == HyperSparc) { From a24ca8a253bd6e2644eba069b78fec80928d7b76 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 3 Dec 2018 14:21:58 +0100 Subject: [PATCH 46/73] sparc: remove not required includes from dma-mapping.h The only thing we need to explicitly pull in is the defines for the CPU type. Signed-off-by: Christoph Hellwig Acked-by: David S. Miller --- arch/sparc/include/asm/dma-mapping.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/sparc/include/asm/dma-mapping.h b/arch/sparc/include/asm/dma-mapping.h index b0bb2fcaf1c9..55a44f08a9a4 100644 --- a/arch/sparc/include/asm/dma-mapping.h +++ b/arch/sparc/include/asm/dma-mapping.h @@ -2,9 +2,7 @@ #ifndef ___ASM_SPARC_DMA_MAPPING_H #define ___ASM_SPARC_DMA_MAPPING_H -#include -#include -#include +#include extern const struct dma_map_ops *dma_ops; From b535d1fca6d63ae6096e7030331e6e698aac4455 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 8 Dec 2018 09:39:12 -0800 Subject: [PATCH 47/73] sparc: move the leon PCI memory space comment to It has nothing to do with the content of the pci.h header. Suggested by: Sam Ravnborg Signed-off-by: Christoph Hellwig Acked-by: David S. Miller --- arch/sparc/include/asm/leon.h | 9 +++++++++ arch/sparc/include/asm/pci_32.h | 9 --------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/arch/sparc/include/asm/leon.h b/arch/sparc/include/asm/leon.h index c68bb5b76e3d..77ea406ff9df 100644 --- a/arch/sparc/include/asm/leon.h +++ b/arch/sparc/include/asm/leon.h @@ -255,4 +255,13 @@ extern int leon_ipi_irq; #define _pfn_valid(pfn) ((pfn < last_valid_pfn) && (pfn >= PFN(phys_base))) #define _SRMMU_PTE_PMASK_LEON 0xffffffff +/* + * On LEON PCI Memory space is mapped 1:1 with physical address space. + * + * I/O space is located at low 64Kbytes in PCI I/O space. The I/O addresses + * are converted into CPU addresses to virtual addresses that are mapped with + * MMU to the PCI Host PCI I/O space window which are translated to the low + * 64Kbytes by the Host controller. + */ + #endif diff --git a/arch/sparc/include/asm/pci_32.h b/arch/sparc/include/asm/pci_32.h index cfc0ee9476c6..a475380ea108 100644 --- a/arch/sparc/include/asm/pci_32.h +++ b/arch/sparc/include/asm/pci_32.h @@ -23,15 +23,6 @@ /* generic pci stuff */ #include #else -/* - * On LEON PCI Memory space is mapped 1:1 with physical address space. - * - * I/O space is located at low 64Kbytes in PCI I/O space. The I/O addresses - * are converted into CPU addresses to virtual addresses that are mapped with - * MMU to the PCI Host PCI I/O space window which are translated to the low - * 64Kbytes by the Host controller. - */ - static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel) { return PCI_IRQ_NONE; From 6aa69750ef1b162417811aa2d940a8df30acd930 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 8 Dec 2018 09:40:03 -0800 Subject: [PATCH 48/73] sparc: merge 32-bit and 64-bit version of pci.h There are enough common defintions that a single header seems nicer. Also drop the pointless include. Signed-off-by: Christoph Hellwig Acked-by: David S. Miller Acked-by: Sam Ravnborg --- arch/sparc/include/asm/pci.h | 53 ++++++++++++++++++++++++++++++--- arch/sparc/include/asm/pci_32.h | 32 -------------------- arch/sparc/include/asm/pci_64.h | 52 -------------------------------- 3 files changed, 49 insertions(+), 88 deletions(-) delete mode 100644 arch/sparc/include/asm/pci_32.h delete mode 100644 arch/sparc/include/asm/pci_64.h diff --git a/arch/sparc/include/asm/pci.h b/arch/sparc/include/asm/pci.h index cad79a6ce0e4..cfec79bb1831 100644 --- a/arch/sparc/include/asm/pci.h +++ b/arch/sparc/include/asm/pci.h @@ -1,9 +1,54 @@ /* SPDX-License-Identifier: GPL-2.0 */ #ifndef ___ASM_SPARC_PCI_H #define ___ASM_SPARC_PCI_H -#if defined(__sparc__) && defined(__arch64__) -#include + + +/* Can be used to override the logic in pci_scan_bus for skipping + * already-configured bus numbers - to be used for buggy BIOSes + * or architectures with incomplete PCI setup by the loader. + */ +#define pcibios_assign_all_busses() 0 + +#define PCIBIOS_MIN_IO 0UL +#define PCIBIOS_MIN_MEM 0UL + +#define PCI_IRQ_NONE 0xffffffff + + +#ifdef CONFIG_SPARC64 + +/* PCI IOMMU mapping bypass support. */ + +/* PCI 64-bit addressing works for all slots on all controller + * types on sparc64. However, it requires that the device + * can drive enough of the 64 bits. + */ +#define PCI64_REQUIRED_MASK (~(u64)0) +#define PCI64_ADDR_BASE 0xfffc000000000000UL + +/* Return the index of the PCI controller for device PDEV. */ +int pci_domain_nr(struct pci_bus *bus); +static inline int pci_proc_domain(struct pci_bus *bus) +{ + return 1; +} + +/* Platform support for /proc/bus/pci/X/Y mmap()s. */ +#define HAVE_PCI_MMAP +#define arch_can_pci_mmap_io() 1 +#define HAVE_ARCH_PCI_GET_UNMAPPED_AREA +#define get_pci_unmapped_area get_fb_unmapped_area + +#define HAVE_ARCH_PCI_RESOURCE_TO_USER +#endif /* CONFIG_SPARC64 */ + +#if defined(CONFIG_SPARC64) || defined(CONFIG_LEON_PCI) +static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel) +{ + return PCI_IRQ_NONE; +} #else -#include -#endif +#include #endif + +#endif /* ___ASM_SPARC_PCI_H */ diff --git a/arch/sparc/include/asm/pci_32.h b/arch/sparc/include/asm/pci_32.h deleted file mode 100644 index a475380ea108..000000000000 --- a/arch/sparc/include/asm/pci_32.h +++ /dev/null @@ -1,32 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __SPARC_PCI_H -#define __SPARC_PCI_H - -#ifdef __KERNEL__ - -#include - -/* Can be used to override the logic in pci_scan_bus for skipping - * already-configured bus numbers - to be used for buggy BIOSes - * or architectures with incomplete PCI setup by the loader. - */ -#define pcibios_assign_all_busses() 0 - -#define PCIBIOS_MIN_IO 0UL -#define PCIBIOS_MIN_MEM 0UL - -#define PCI_IRQ_NONE 0xffffffff - -#endif /* __KERNEL__ */ - -#ifndef CONFIG_LEON_PCI -/* generic pci stuff */ -#include -#else -static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel) -{ - return PCI_IRQ_NONE; -} -#endif - -#endif /* __SPARC_PCI_H */ diff --git a/arch/sparc/include/asm/pci_64.h b/arch/sparc/include/asm/pci_64.h deleted file mode 100644 index fac77813402c..000000000000 --- a/arch/sparc/include/asm/pci_64.h +++ /dev/null @@ -1,52 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __SPARC64_PCI_H -#define __SPARC64_PCI_H - -#ifdef __KERNEL__ - -#include - -/* Can be used to override the logic in pci_scan_bus for skipping - * already-configured bus numbers - to be used for buggy BIOSes - * or architectures with incomplete PCI setup by the loader. - */ -#define pcibios_assign_all_busses() 0 - -#define PCIBIOS_MIN_IO 0UL -#define PCIBIOS_MIN_MEM 0UL - -#define PCI_IRQ_NONE 0xffffffff - -/* PCI IOMMU mapping bypass support. */ - -/* PCI 64-bit addressing works for all slots on all controller - * types on sparc64. However, it requires that the device - * can drive enough of the 64 bits. - */ -#define PCI64_REQUIRED_MASK (~(u64)0) -#define PCI64_ADDR_BASE 0xfffc000000000000UL - -/* Return the index of the PCI controller for device PDEV. */ - -int pci_domain_nr(struct pci_bus *bus); -static inline int pci_proc_domain(struct pci_bus *bus) -{ - return 1; -} - -/* Platform support for /proc/bus/pci/X/Y mmap()s. */ - -#define HAVE_PCI_MMAP -#define arch_can_pci_mmap_io() 1 -#define HAVE_ARCH_PCI_GET_UNMAPPED_AREA -#define get_pci_unmapped_area get_fb_unmapped_area - -static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel) -{ - return PCI_IRQ_NONE; -} - -#define HAVE_ARCH_PCI_RESOURCE_TO_USER -#endif /* __KERNEL__ */ - -#endif /* __SPARC64_PCI_H */ From 48cc8f7a1d5a4b1c5b32de5ad8a53b1c9194c6d5 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 12 Dec 2018 17:09:58 +0100 Subject: [PATCH 49/73] sparc: use DT node full_name in sparc_dma_alloc_resource The sparc tree already has this change for the pre-refactored code, but pulling it into the dma-mapping tree like this should ease the merge conflicts a bit. Signed-off-by: Christoph Hellwig Acked-by: Sam Ravnborg Acked-by: David Miller --- arch/sparc/kernel/ioport.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/sparc/kernel/ioport.c b/arch/sparc/kernel/ioport.c index 51c128d80193..baa235652c27 100644 --- a/arch/sparc/kernel/ioport.c +++ b/arch/sparc/kernel/ioport.c @@ -252,7 +252,7 @@ unsigned long sparc_dma_alloc_resource(struct device *dev, size_t len) res = kzalloc(sizeof(*res), GFP_KERNEL); if (!res) return 0; - res->name = dev->of_node->name; + res->name = dev->of_node->full_name; if (allocate_resource(&_sparc_dvma, res, len, _sparc_dvma.start, _sparc_dvma.end, PAGE_SIZE, NULL, NULL) != 0) { From 20b105feda8d42360bd690b8fdf6b2f00ba4a993 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 4 Dec 2018 08:04:31 -0800 Subject: [PATCH 50/73] dma-mapping: remove a pointless memset in dma_atomic_pool_init We already zero the memory after allocating it from the pool that this function fills, and having the memset here in this form means we can't support CMA highmem allocations. Signed-off-by: Christoph Hellwig Reported-by: Russell King - ARM Linux --- kernel/dma/remap.c | 1 - 1 file changed, 1 deletion(-) diff --git a/kernel/dma/remap.c b/kernel/dma/remap.c index 8a44317cfc1a..18cc09fc27b9 100644 --- a/kernel/dma/remap.c +++ b/kernel/dma/remap.c @@ -121,7 +121,6 @@ int __init dma_atomic_pool_init(gfp_t gfp, pgprot_t prot) if (!page) goto out; - memset(page_address(page), 0, atomic_pool_size); arch_dma_prep_coherent(page, atomic_pool_size); atomic_pool = gen_pool_create(PAGE_SHIFT, -1); From 8d59b5f2a44611d7327a2a14b36090d692186f60 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 3 Dec 2018 14:58:59 +0100 Subject: [PATCH 51/73] dma-mapping: simplify the dma_sync_single_range_for_{cpu,device} implementation We can just call the regular calls after adding offset the the address instead of reimplementing them. Signed-off-by: Christoph Hellwig Acked-by: Jesper Dangaard Brouer Tested-by: Jesper Dangaard Brouer Tested-by: Tony Luck --- include/linux/dma-debug.h | 27 ------------------------ include/linux/dma-mapping.h | 34 +++++++++--------------------- kernel/dma/debug.c | 42 ------------------------------------- 3 files changed, 10 insertions(+), 93 deletions(-) diff --git a/include/linux/dma-debug.h b/include/linux/dma-debug.h index 46e6131a72b6..2ad5c363d7d5 100644 --- a/include/linux/dma-debug.h +++ b/include/linux/dma-debug.h @@ -70,17 +70,6 @@ extern void debug_dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle, size_t size, int direction); -extern void debug_dma_sync_single_range_for_cpu(struct device *dev, - dma_addr_t dma_handle, - unsigned long offset, - size_t size, - int direction); - -extern void debug_dma_sync_single_range_for_device(struct device *dev, - dma_addr_t dma_handle, - unsigned long offset, - size_t size, int direction); - extern void debug_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nelems, int direction); @@ -167,22 +156,6 @@ static inline void debug_dma_sync_single_for_device(struct device *dev, { } -static inline void debug_dma_sync_single_range_for_cpu(struct device *dev, - dma_addr_t dma_handle, - unsigned long offset, - size_t size, - int direction) -{ -} - -static inline void debug_dma_sync_single_range_for_device(struct device *dev, - dma_addr_t dma_handle, - unsigned long offset, - size_t size, - int direction) -{ -} - static inline void debug_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nelems, int direction) diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 7799c2b27849..8916499d2805 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -360,6 +360,13 @@ static inline void dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, debug_dma_sync_single_for_cpu(dev, addr, size, dir); } +static inline void dma_sync_single_range_for_cpu(struct device *dev, + dma_addr_t addr, unsigned long offset, size_t size, + enum dma_data_direction dir) +{ + return dma_sync_single_for_cpu(dev, addr + offset, size, dir); +} + static inline void dma_sync_single_for_device(struct device *dev, dma_addr_t addr, size_t size, enum dma_data_direction dir) @@ -372,32 +379,11 @@ static inline void dma_sync_single_for_device(struct device *dev, debug_dma_sync_single_for_device(dev, addr, size, dir); } -static inline void dma_sync_single_range_for_cpu(struct device *dev, - dma_addr_t addr, - unsigned long offset, - size_t size, - enum dma_data_direction dir) -{ - const struct dma_map_ops *ops = get_dma_ops(dev); - - BUG_ON(!valid_dma_direction(dir)); - if (ops->sync_single_for_cpu) - ops->sync_single_for_cpu(dev, addr + offset, size, dir); - debug_dma_sync_single_range_for_cpu(dev, addr, offset, size, dir); -} - static inline void dma_sync_single_range_for_device(struct device *dev, - dma_addr_t addr, - unsigned long offset, - size_t size, - enum dma_data_direction dir) + dma_addr_t addr, unsigned long offset, size_t size, + enum dma_data_direction dir) { - const struct dma_map_ops *ops = get_dma_ops(dev); - - BUG_ON(!valid_dma_direction(dir)); - if (ops->sync_single_for_device) - ops->sync_single_for_device(dev, addr + offset, size, dir); - debug_dma_sync_single_range_for_device(dev, addr, offset, size, dir); + return dma_sync_single_for_device(dev, addr + offset, size, dir); } static inline void diff --git a/kernel/dma/debug.c b/kernel/dma/debug.c index 20ab0f6c1b70..164706da2a73 100644 --- a/kernel/dma/debug.c +++ b/kernel/dma/debug.c @@ -1633,48 +1633,6 @@ void debug_dma_sync_single_for_device(struct device *dev, } EXPORT_SYMBOL(debug_dma_sync_single_for_device); -void debug_dma_sync_single_range_for_cpu(struct device *dev, - dma_addr_t dma_handle, - unsigned long offset, size_t size, - int direction) -{ - struct dma_debug_entry ref; - - if (unlikely(dma_debug_disabled())) - return; - - ref.type = dma_debug_single; - ref.dev = dev; - ref.dev_addr = dma_handle; - ref.size = offset + size; - ref.direction = direction; - ref.sg_call_ents = 0; - - check_sync(dev, &ref, true); -} -EXPORT_SYMBOL(debug_dma_sync_single_range_for_cpu); - -void debug_dma_sync_single_range_for_device(struct device *dev, - dma_addr_t dma_handle, - unsigned long offset, - size_t size, int direction) -{ - struct dma_debug_entry ref; - - if (unlikely(dma_debug_disabled())) - return; - - ref.type = dma_debug_single; - ref.dev = dev; - ref.dev_addr = dma_handle; - ref.size = offset + size; - ref.direction = direction; - ref.sg_call_ents = 0; - - check_sync(dev, &ref, false); -} -EXPORT_SYMBOL(debug_dma_sync_single_range_for_device); - void debug_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nelems, int direction) { From 7f0fee242e899f2eb42fd9e72bcfc3cb59aad1ce Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 6 Dec 2018 12:24:27 -0800 Subject: [PATCH 52/73] dma-mapping: merge dma_unmap_page_attrs and dma_unmap_single_attrs The two functions are exactly the same, so don't bother implementing them twice. Signed-off-by: Christoph Hellwig Acked-by: Jesper Dangaard Brouer Tested-by: Jesper Dangaard Brouer Tested-by: Tony Luck --- include/linux/dma-mapping.h | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 8916499d2805..3b431cc58794 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -253,6 +253,12 @@ static inline void dma_unmap_single_attrs(struct device *dev, dma_addr_t addr, debug_dma_unmap_page(dev, addr, size, dir, true); } +static inline void dma_unmap_page_attrs(struct device *dev, dma_addr_t addr, + size_t size, enum dma_data_direction dir, unsigned long attrs) +{ + return dma_unmap_single_attrs(dev, addr, size, dir, attrs); +} + /* * dma_maps_sg_attrs returns 0 on error and > 0 on success. * It should never return a value < 0. @@ -300,19 +306,6 @@ static inline dma_addr_t dma_map_page_attrs(struct device *dev, return addr; } -static inline void dma_unmap_page_attrs(struct device *dev, - dma_addr_t addr, size_t size, - enum dma_data_direction dir, - unsigned long attrs) -{ - const struct dma_map_ops *ops = get_dma_ops(dev); - - BUG_ON(!valid_dma_direction(dir)); - if (ops->unmap_page) - ops->unmap_page(dev, addr, size, dir, attrs); - debug_dma_unmap_page(dev, addr, size, dir, false); -} - static inline dma_addr_t dma_map_resource(struct device *dev, phys_addr_t phys_addr, size_t size, From 05887cb610a54bf568de7f0bc07c4a64e45ac6f9 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 6 Dec 2018 12:25:54 -0800 Subject: [PATCH 53/73] dma-mapping: move dma_get_required_mask to kernel/dma dma_get_required_mask should really be with the rest of the DMA mapping implementation instead of in drivers/base as a lone outlier. Signed-off-by: Christoph Hellwig Acked-by: Jesper Dangaard Brouer Tested-by: Jesper Dangaard Brouer Tested-by: Tony Luck --- drivers/base/platform.c | 31 ------------------------------- kernel/dma/mapping.c | 34 +++++++++++++++++++++++++++++++++- 2 files changed, 33 insertions(+), 32 deletions(-) diff --git a/drivers/base/platform.c b/drivers/base/platform.c index 41b91af95afb..eae841935a45 100644 --- a/drivers/base/platform.c +++ b/drivers/base/platform.c @@ -1179,37 +1179,6 @@ int __init platform_bus_init(void) return error; } -#ifndef ARCH_HAS_DMA_GET_REQUIRED_MASK -static u64 dma_default_get_required_mask(struct device *dev) -{ - u32 low_totalram = ((max_pfn - 1) << PAGE_SHIFT); - u32 high_totalram = ((max_pfn - 1) >> (32 - PAGE_SHIFT)); - u64 mask; - - if (!high_totalram) { - /* convert to mask just covering totalram */ - low_totalram = (1 << (fls(low_totalram) - 1)); - low_totalram += low_totalram - 1; - mask = low_totalram; - } else { - high_totalram = (1 << (fls(high_totalram) - 1)); - high_totalram += high_totalram - 1; - mask = (((u64)high_totalram) << 32) + 0xffffffff; - } - return mask; -} - -u64 dma_get_required_mask(struct device *dev) -{ - const struct dma_map_ops *ops = get_dma_ops(dev); - - if (ops->get_required_mask) - return ops->get_required_mask(dev); - return dma_default_get_required_mask(dev); -} -EXPORT_SYMBOL_GPL(dma_get_required_mask); -#endif - static __initdata LIST_HEAD(early_platform_driver_list); static __initdata LIST_HEAD(early_platform_device_list); diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c index dfbc3deb95cd..dfe29d18dba1 100644 --- a/kernel/dma/mapping.c +++ b/kernel/dma/mapping.c @@ -5,7 +5,7 @@ * Copyright (c) 2006 SUSE Linux Products GmbH * Copyright (c) 2006 Tejun Heo */ - +#include /* for max_pfn */ #include #include #include @@ -262,3 +262,35 @@ int dma_common_mmap(struct device *dev, struct vm_area_struct *vma, #endif /* !CONFIG_ARCH_NO_COHERENT_DMA_MMAP */ } EXPORT_SYMBOL(dma_common_mmap); + +#ifndef ARCH_HAS_DMA_GET_REQUIRED_MASK +static u64 dma_default_get_required_mask(struct device *dev) +{ + u32 low_totalram = ((max_pfn - 1) << PAGE_SHIFT); + u32 high_totalram = ((max_pfn - 1) >> (32 - PAGE_SHIFT)); + u64 mask; + + if (!high_totalram) { + /* convert to mask just covering totalram */ + low_totalram = (1 << (fls(low_totalram) - 1)); + low_totalram += low_totalram - 1; + mask = low_totalram; + } else { + high_totalram = (1 << (fls(high_totalram) - 1)); + high_totalram += high_totalram - 1; + mask = (((u64)high_totalram) << 32) + 0xffffffff; + } + return mask; +} + +u64 dma_get_required_mask(struct device *dev) +{ + const struct dma_map_ops *ops = get_dma_ops(dev); + + if (ops->get_required_mask) + return ops->get_required_mask(dev); + return dma_default_get_required_mask(dev); +} +EXPORT_SYMBOL_GPL(dma_get_required_mask); +#endif + From 7249c1a52df9967cd23550f3dc24fb6ca43cdc6a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 6 Dec 2018 12:43:30 -0800 Subject: [PATCH 54/73] dma-mapping: move various slow path functions out of line There is no need to have all setup and coherent allocation / freeing routines inline. Move them out of line to keep the implemeation nicely encapsulated and save some kernel text size. Signed-off-by: Christoph Hellwig Acked-by: Jesper Dangaard Brouer Tested-by: Jesper Dangaard Brouer Tested-by: Tony Luck --- arch/powerpc/include/asm/dma-mapping.h | 1 - include/linux/dma-mapping.h | 150 +++---------------------- kernel/dma/mapping.c | 140 ++++++++++++++++++++++- 3 files changed, 151 insertions(+), 140 deletions(-) diff --git a/arch/powerpc/include/asm/dma-mapping.h b/arch/powerpc/include/asm/dma-mapping.h index 8fa394520af6..5201f2b7838c 100644 --- a/arch/powerpc/include/asm/dma-mapping.h +++ b/arch/powerpc/include/asm/dma-mapping.h @@ -108,7 +108,6 @@ static inline void set_dma_offset(struct device *dev, dma_addr_t off) } #define HAVE_ARCH_DMA_SET_MASK 1 -extern int dma_set_mask(struct device *dev, u64 dma_mask); extern u64 __dma_get_required_mask(struct device *dev); diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 3b431cc58794..0bbce52606c2 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -440,107 +440,24 @@ bool dma_in_atomic_pool(void *start, size_t size); void *dma_alloc_from_pool(size_t size, struct page **ret_page, gfp_t flags); bool dma_free_from_pool(void *start, size_t size); -/** - * dma_mmap_attrs - map a coherent DMA allocation into user space - * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices - * @vma: vm_area_struct describing requested user mapping - * @cpu_addr: kernel CPU-view address returned from dma_alloc_attrs - * @handle: device-view address returned from dma_alloc_attrs - * @size: size of memory originally requested in dma_alloc_attrs - * @attrs: attributes of mapping properties requested in dma_alloc_attrs - * - * Map a coherent DMA buffer previously allocated by dma_alloc_attrs - * into user space. The coherent DMA buffer must not be freed by the - * driver until the user space mapping has been released. - */ -static inline int -dma_mmap_attrs(struct device *dev, struct vm_area_struct *vma, void *cpu_addr, - dma_addr_t dma_addr, size_t size, unsigned long attrs) -{ - const struct dma_map_ops *ops = get_dma_ops(dev); - BUG_ON(!ops); - if (ops->mmap) - return ops->mmap(dev, vma, cpu_addr, dma_addr, size, attrs); - return dma_common_mmap(dev, vma, cpu_addr, dma_addr, size, attrs); -} - +int dma_mmap_attrs(struct device *dev, struct vm_area_struct *vma, + void *cpu_addr, dma_addr_t dma_addr, size_t size, + unsigned long attrs); #define dma_mmap_coherent(d, v, c, h, s) dma_mmap_attrs(d, v, c, h, s, 0) int dma_common_get_sgtable(struct device *dev, struct sg_table *sgt, void *cpu_addr, dma_addr_t dma_addr, size_t size, unsigned long attrs); -static inline int -dma_get_sgtable_attrs(struct device *dev, struct sg_table *sgt, void *cpu_addr, - dma_addr_t dma_addr, size_t size, - unsigned long attrs) -{ - const struct dma_map_ops *ops = get_dma_ops(dev); - BUG_ON(!ops); - if (ops->get_sgtable) - return ops->get_sgtable(dev, sgt, cpu_addr, dma_addr, size, - attrs); - return dma_common_get_sgtable(dev, sgt, cpu_addr, dma_addr, size, - attrs); -} - +int dma_get_sgtable_attrs(struct device *dev, struct sg_table *sgt, + void *cpu_addr, dma_addr_t dma_addr, size_t size, + unsigned long attrs); #define dma_get_sgtable(d, t, v, h, s) dma_get_sgtable_attrs(d, t, v, h, s, 0) -#ifndef arch_dma_alloc_attrs -#define arch_dma_alloc_attrs(dev) (true) -#endif - -static inline void *dma_alloc_attrs(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t flag, - unsigned long attrs) -{ - const struct dma_map_ops *ops = get_dma_ops(dev); - void *cpu_addr; - - BUG_ON(!ops); - WARN_ON_ONCE(dev && !dev->coherent_dma_mask); - - if (dma_alloc_from_dev_coherent(dev, size, dma_handle, &cpu_addr)) - return cpu_addr; - - /* let the implementation decide on the zone to allocate from: */ - flag &= ~(__GFP_DMA | __GFP_DMA32 | __GFP_HIGHMEM); - - if (!arch_dma_alloc_attrs(&dev)) - return NULL; - if (!ops->alloc) - return NULL; - - cpu_addr = ops->alloc(dev, size, dma_handle, flag, attrs); - debug_dma_alloc_coherent(dev, size, *dma_handle, cpu_addr); - return cpu_addr; -} - -static inline void dma_free_attrs(struct device *dev, size_t size, - void *cpu_addr, dma_addr_t dma_handle, - unsigned long attrs) -{ - const struct dma_map_ops *ops = get_dma_ops(dev); - - BUG_ON(!ops); - - if (dma_release_from_dev_coherent(dev, get_order(size), cpu_addr)) - return; - /* - * On non-coherent platforms which implement DMA-coherent buffers via - * non-cacheable remaps, ops->free() may call vunmap(). Thus getting - * this far in IRQ context is a) at risk of a BUG_ON() or trying to - * sleep on some machines, and b) an indication that the driver is - * probably misusing the coherent API anyway. - */ - WARN_ON(irqs_disabled()); - - if (!ops->free || !cpu_addr) - return; - - debug_dma_free_coherent(dev, size, cpu_addr, dma_handle); - ops->free(dev, size, cpu_addr, dma_handle, attrs); -} +void *dma_alloc_attrs(struct device *dev, size_t size, dma_addr_t *dma_handle, + gfp_t flag, unsigned long attrs); +void dma_free_attrs(struct device *dev, size_t size, void *cpu_addr, + dma_addr_t dma_handle, unsigned long attrs); static inline void *dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp) @@ -565,35 +482,9 @@ static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) return 0; } -static inline void dma_check_mask(struct device *dev, u64 mask) -{ - if (sme_active() && (mask < (((u64)sme_get_me_mask() << 1) - 1))) - dev_warn(dev, "SME is active, device will require DMA bounce buffers\n"); -} - -static inline int dma_supported(struct device *dev, u64 mask) -{ - const struct dma_map_ops *ops = get_dma_ops(dev); - - if (!ops) - return 0; - if (!ops->dma_supported) - return 1; - return ops->dma_supported(dev, mask); -} - -#ifndef HAVE_ARCH_DMA_SET_MASK -static inline int dma_set_mask(struct device *dev, u64 mask) -{ - if (!dev->dma_mask || !dma_supported(dev, mask)) - return -EIO; - - dma_check_mask(dev, mask); - - *dev->dma_mask = mask; - return 0; -} -#endif +int dma_supported(struct device *dev, u64 mask); +int dma_set_mask(struct device *dev, u64 mask); +int dma_set_coherent_mask(struct device *dev, u64 mask); static inline u64 dma_get_mask(struct device *dev) { @@ -602,21 +493,6 @@ static inline u64 dma_get_mask(struct device *dev) return DMA_BIT_MASK(32); } -#ifdef CONFIG_ARCH_HAS_DMA_SET_COHERENT_MASK -int dma_set_coherent_mask(struct device *dev, u64 mask); -#else -static inline int dma_set_coherent_mask(struct device *dev, u64 mask) -{ - if (!dma_supported(dev, mask)) - return -EIO; - - dma_check_mask(dev, mask); - - dev->coherent_dma_mask = mask; - return 0; -} -#endif - /* * Set both the DMA mask and the coherent DMA mask to the same thing. * Note that we don't check the return value from dma_set_coherent_mask() diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c index dfe29d18dba1..176ae3e08916 100644 --- a/kernel/dma/mapping.c +++ b/kernel/dma/mapping.c @@ -223,7 +223,20 @@ int dma_common_get_sgtable(struct device *dev, struct sg_table *sgt, sg_set_page(sgt->sgl, page, PAGE_ALIGN(size), 0); return ret; } -EXPORT_SYMBOL(dma_common_get_sgtable); + +int dma_get_sgtable_attrs(struct device *dev, struct sg_table *sgt, + void *cpu_addr, dma_addr_t dma_addr, size_t size, + unsigned long attrs) +{ + const struct dma_map_ops *ops = get_dma_ops(dev); + BUG_ON(!ops); + if (ops->get_sgtable) + return ops->get_sgtable(dev, sgt, cpu_addr, dma_addr, size, + attrs); + return dma_common_get_sgtable(dev, sgt, cpu_addr, dma_addr, size, + attrs); +} +EXPORT_SYMBOL(dma_get_sgtable_attrs); /* * Create userspace mapping for the DMA-coherent memory. @@ -261,7 +274,31 @@ int dma_common_mmap(struct device *dev, struct vm_area_struct *vma, return -ENXIO; #endif /* !CONFIG_ARCH_NO_COHERENT_DMA_MMAP */ } -EXPORT_SYMBOL(dma_common_mmap); + +/** + * dma_mmap_attrs - map a coherent DMA allocation into user space + * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices + * @vma: vm_area_struct describing requested user mapping + * @cpu_addr: kernel CPU-view address returned from dma_alloc_attrs + * @dma_addr: device-view address returned from dma_alloc_attrs + * @size: size of memory originally requested in dma_alloc_attrs + * @attrs: attributes of mapping properties requested in dma_alloc_attrs + * + * Map a coherent DMA buffer previously allocated by dma_alloc_attrs into user + * space. The coherent DMA buffer must not be freed by the driver until the + * user space mapping has been released. + */ +int dma_mmap_attrs(struct device *dev, struct vm_area_struct *vma, + void *cpu_addr, dma_addr_t dma_addr, size_t size, + unsigned long attrs) +{ + const struct dma_map_ops *ops = get_dma_ops(dev); + BUG_ON(!ops); + if (ops->mmap) + return ops->mmap(dev, vma, cpu_addr, dma_addr, size, attrs); + return dma_common_mmap(dev, vma, cpu_addr, dma_addr, size, attrs); +} +EXPORT_SYMBOL(dma_mmap_attrs); #ifndef ARCH_HAS_DMA_GET_REQUIRED_MASK static u64 dma_default_get_required_mask(struct device *dev) @@ -294,3 +331,102 @@ u64 dma_get_required_mask(struct device *dev) EXPORT_SYMBOL_GPL(dma_get_required_mask); #endif +#ifndef arch_dma_alloc_attrs +#define arch_dma_alloc_attrs(dev) (true) +#endif + +void *dma_alloc_attrs(struct device *dev, size_t size, dma_addr_t *dma_handle, + gfp_t flag, unsigned long attrs) +{ + const struct dma_map_ops *ops = get_dma_ops(dev); + void *cpu_addr; + + BUG_ON(!ops); + WARN_ON_ONCE(dev && !dev->coherent_dma_mask); + + if (dma_alloc_from_dev_coherent(dev, size, dma_handle, &cpu_addr)) + return cpu_addr; + + /* let the implementation decide on the zone to allocate from: */ + flag &= ~(__GFP_DMA | __GFP_DMA32 | __GFP_HIGHMEM); + + if (!arch_dma_alloc_attrs(&dev)) + return NULL; + if (!ops->alloc) + return NULL; + + cpu_addr = ops->alloc(dev, size, dma_handle, flag, attrs); + debug_dma_alloc_coherent(dev, size, *dma_handle, cpu_addr); + return cpu_addr; +} +EXPORT_SYMBOL(dma_alloc_attrs); + +void dma_free_attrs(struct device *dev, size_t size, void *cpu_addr, + dma_addr_t dma_handle, unsigned long attrs) +{ + const struct dma_map_ops *ops = get_dma_ops(dev); + + BUG_ON(!ops); + + if (dma_release_from_dev_coherent(dev, get_order(size), cpu_addr)) + return; + /* + * On non-coherent platforms which implement DMA-coherent buffers via + * non-cacheable remaps, ops->free() may call vunmap(). Thus getting + * this far in IRQ context is a) at risk of a BUG_ON() or trying to + * sleep on some machines, and b) an indication that the driver is + * probably misusing the coherent API anyway. + */ + WARN_ON(irqs_disabled()); + + if (!ops->free || !cpu_addr) + return; + + debug_dma_free_coherent(dev, size, cpu_addr, dma_handle); + ops->free(dev, size, cpu_addr, dma_handle, attrs); +} +EXPORT_SYMBOL(dma_free_attrs); + +static inline void dma_check_mask(struct device *dev, u64 mask) +{ + if (sme_active() && (mask < (((u64)sme_get_me_mask() << 1) - 1))) + dev_warn(dev, "SME is active, device will require DMA bounce buffers\n"); +} + +int dma_supported(struct device *dev, u64 mask) +{ + const struct dma_map_ops *ops = get_dma_ops(dev); + + if (!ops) + return 0; + if (!ops->dma_supported) + return 1; + return ops->dma_supported(dev, mask); +} +EXPORT_SYMBOL(dma_supported); + +#ifndef HAVE_ARCH_DMA_SET_MASK +int dma_set_mask(struct device *dev, u64 mask) +{ + if (!dev->dma_mask || !dma_supported(dev, mask)) + return -EIO; + + dma_check_mask(dev, mask); + *dev->dma_mask = mask; + return 0; +} +EXPORT_SYMBOL(dma_set_mask); +#endif + +#ifndef CONFIG_ARCH_HAS_DMA_SET_COHERENT_MASK +int dma_set_coherent_mask(struct device *dev, u64 mask) +{ + if (!dma_supported(dev, mask)) + return -EIO; + + dma_check_mask(dev, mask); + dev->coherent_dma_mask = mask; + return 0; +} +EXPORT_SYMBOL(dma_set_coherent_mask); +#endif From 8ddbe5943c0b1259b5ddb6dc1729863433fc256c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 6 Dec 2018 12:47:50 -0800 Subject: [PATCH 55/73] dma-mapping: move dma_cache_sync out of line This isn't exactly a slow path routine, but it is not super critical either, and moving it out of line will help to keep the include chain clean for the following DMA indirection bypass work. Signed-off-by: Christoph Hellwig Acked-by: Jesper Dangaard Brouer Tested-by: Jesper Dangaard Brouer Tested-by: Tony Luck --- include/linux/dma-mapping.h | 12 ++---------- kernel/dma/mapping.c | 11 +++++++++++ 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 0bbce52606c2..0f0078490df4 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -411,16 +411,8 @@ dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, #define dma_map_page(d, p, o, s, r) dma_map_page_attrs(d, p, o, s, r, 0) #define dma_unmap_page(d, a, s, r) dma_unmap_page_attrs(d, a, s, r, 0) -static inline void -dma_cache_sync(struct device *dev, void *vaddr, size_t size, - enum dma_data_direction dir) -{ - const struct dma_map_ops *ops = get_dma_ops(dev); - - BUG_ON(!valid_dma_direction(dir)); - if (ops->cache_sync) - ops->cache_sync(dev, vaddr, size, dir); -} +void dma_cache_sync(struct device *dev, void *vaddr, size_t size, + enum dma_data_direction dir); extern int dma_common_mmap(struct device *dev, struct vm_area_struct *vma, void *cpu_addr, dma_addr_t dma_addr, size_t size, diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c index 176ae3e08916..0b18cfbdde95 100644 --- a/kernel/dma/mapping.c +++ b/kernel/dma/mapping.c @@ -430,3 +430,14 @@ int dma_set_coherent_mask(struct device *dev, u64 mask) } EXPORT_SYMBOL(dma_set_coherent_mask); #endif + +void dma_cache_sync(struct device *dev, void *vaddr, size_t size, + enum dma_data_direction dir) +{ + const struct dma_map_ops *ops = get_dma_ops(dev); + + BUG_ON(!valid_dma_direction(dir)); + if (ops->cache_sync) + ops->cache_sync(dev, vaddr, size, dir); +} +EXPORT_SYMBOL(dma_cache_sync); From 3731c3d4774e38b9d91c01943e1e6a243c1776be Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 6 Dec 2018 12:50:26 -0800 Subject: [PATCH 56/73] dma-mapping: always build the direct mapping code All architectures except for sparc64 use the dma-direct code in some form, and even for sparc64 we had the discussion of a direct mapping mode a while ago. In preparation for directly calling the direct mapping code don't bother having it optionally but always build the code in. This is a minor hardship for some powerpc and arm configs that don't pull it in yet (although they should in a relase ot two), and sparc64 which currently doesn't need it at all, but it will reduce the ifdef mess we'd otherwise need significantly. Signed-off-by: Christoph Hellwig Acked-by: Jesper Dangaard Brouer Tested-by: Jesper Dangaard Brouer Tested-by: Tony Luck --- arch/alpha/Kconfig | 1 - arch/arc/Kconfig | 1 - arch/arm/Kconfig | 1 - arch/arm64/Kconfig | 1 - arch/c6x/Kconfig | 1 - arch/csky/Kconfig | 1 - arch/h8300/Kconfig | 1 - arch/hexagon/Kconfig | 1 - arch/m68k/Kconfig | 1 - arch/microblaze/Kconfig | 1 - arch/mips/Kconfig | 1 - arch/nds32/Kconfig | 1 - arch/nios2/Kconfig | 1 - arch/openrisc/Kconfig | 1 - arch/parisc/Kconfig | 1 - arch/riscv/Kconfig | 1 - arch/s390/Kconfig | 1 - arch/sh/Kconfig | 1 - arch/sparc/Kconfig | 1 - arch/unicore32/Kconfig | 1 - arch/x86/Kconfig | 1 - arch/xtensa/Kconfig | 1 - kernel/dma/Kconfig | 7 ------- kernel/dma/Makefile | 3 +-- 24 files changed, 1 insertion(+), 31 deletions(-) diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig index a7e748a46c18..5da6ff54b3e7 100644 --- a/arch/alpha/Kconfig +++ b/arch/alpha/Kconfig @@ -203,7 +203,6 @@ config ALPHA_EIGER config ALPHA_JENSEN bool "Jensen" depends on BROKEN - select DMA_DIRECT_OPS help DEC PC 150 AXP (aka Jensen): This is a very old Digital system - one of the first-generation Alpha systems. A number of these systems diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index fd48d698da29..7deaabeb531a 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -17,7 +17,6 @@ config ARC select BUILDTIME_EXTABLE_SORT select CLONE_BACKWARDS select COMMON_CLK - select DMA_DIRECT_OPS select GENERIC_ATOMIC64 if !ISA_ARCV2 || !(ARC_HAS_LL64 && ARC_HAS_LLSC) select GENERIC_CLOCKEVENTS select GENERIC_FIND_FIRST_BIT diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index a858ee791ef0..586fc30b23bd 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -30,7 +30,6 @@ config ARM select CLONE_BACKWARDS select CPU_PM if (SUSPEND || CPU_IDLE) select DCACHE_WORD_ACCESS if HAVE_EFFICIENT_UNALIGNED_ACCESS - select DMA_DIRECT_OPS if !MMU select DMA_REMAP if MMU select EDAC_SUPPORT select EDAC_ATOMIC_SCRUB diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 06cf0ef24367..2092080240b0 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -80,7 +80,6 @@ config ARM64 select CPU_PM if (SUSPEND || CPU_IDLE) select CRC32 select DCACHE_WORD_ACCESS - select DMA_DIRECT_OPS select DMA_DIRECT_REMAP select EDAC_SUPPORT select FRAME_POINTER diff --git a/arch/c6x/Kconfig b/arch/c6x/Kconfig index 84420109113d..456e154674d1 100644 --- a/arch/c6x/Kconfig +++ b/arch/c6x/Kconfig @@ -9,7 +9,6 @@ config C6X select ARCH_HAS_SYNC_DMA_FOR_CPU select ARCH_HAS_SYNC_DMA_FOR_DEVICE select CLKDEV_LOOKUP - select DMA_DIRECT_OPS select GENERIC_ATOMIC64 select GENERIC_IRQ_SHOW select HAVE_ARCH_TRACEHOOK diff --git a/arch/csky/Kconfig b/arch/csky/Kconfig index ea74f3a9eeaf..37bed8aadf95 100644 --- a/arch/csky/Kconfig +++ b/arch/csky/Kconfig @@ -7,7 +7,6 @@ config CSKY select COMMON_CLK select CLKSRC_MMIO select CLKSRC_OF - select DMA_DIRECT_OPS select DMA_DIRECT_REMAP select IRQ_DOMAIN select HANDLE_DOMAIN_IRQ diff --git a/arch/h8300/Kconfig b/arch/h8300/Kconfig index d19c6b16cd5d..6472a0685470 100644 --- a/arch/h8300/Kconfig +++ b/arch/h8300/Kconfig @@ -22,7 +22,6 @@ config H8300 select HAVE_ARCH_KGDB select HAVE_ARCH_HASH select CPU_NO_EFFICIENT_FFS - select DMA_DIRECT_OPS config CPU_BIG_ENDIAN def_bool y diff --git a/arch/hexagon/Kconfig b/arch/hexagon/Kconfig index 2b688af379e6..d71036c598de 100644 --- a/arch/hexagon/Kconfig +++ b/arch/hexagon/Kconfig @@ -31,7 +31,6 @@ config HEXAGON select GENERIC_CLOCKEVENTS_BROADCAST select MODULES_USE_ELF_RELA select GENERIC_CPU_DEVICES - select DMA_DIRECT_OPS ---help--- Qualcomm Hexagon is a processor architecture designed for high performance and low power across a wide variety of applications. diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig index 1bc9f1ba759a..8a5868e9a3a0 100644 --- a/arch/m68k/Kconfig +++ b/arch/m68k/Kconfig @@ -26,7 +26,6 @@ config M68K select MODULES_USE_ELF_RELA select OLD_SIGSUSPEND3 select OLD_SIGACTION - select DMA_DIRECT_OPS if HAS_DMA select ARCH_DISCARD_MEMBLOCK config CPU_BIG_ENDIAN diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig index effed2efd306..eda9e2315ef5 100644 --- a/arch/microblaze/Kconfig +++ b/arch/microblaze/Kconfig @@ -12,7 +12,6 @@ config MICROBLAZE select TIMER_OF select CLONE_BACKWARDS3 select COMMON_CLK - select DMA_DIRECT_OPS select GENERIC_ATOMIC64 select GENERIC_CLOCKEVENTS select GENERIC_CPU_DEVICES diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 8272ea4c7264..2993aa9842c0 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -16,7 +16,6 @@ config MIPS select BUILDTIME_EXTABLE_SORT select CLONE_BACKWARDS select CPU_PM if CPU_IDLE - select DMA_DIRECT_OPS select GENERIC_ATOMIC64 if !64BIT select GENERIC_CLOCKEVENTS select GENERIC_CMOS_UPDATE diff --git a/arch/nds32/Kconfig b/arch/nds32/Kconfig index 7a04adacb2f0..1af6bbae7220 100644 --- a/arch/nds32/Kconfig +++ b/arch/nds32/Kconfig @@ -11,7 +11,6 @@ config NDS32 select CLKSRC_MMIO select CLONE_BACKWARDS select COMMON_CLK - select DMA_DIRECT_OPS select GENERIC_ATOMIC64 select GENERIC_CPU_DEVICES select GENERIC_CLOCKEVENTS diff --git a/arch/nios2/Kconfig b/arch/nios2/Kconfig index 7e95506e957a..f6c4b0f49997 100644 --- a/arch/nios2/Kconfig +++ b/arch/nios2/Kconfig @@ -4,7 +4,6 @@ config NIOS2 select ARCH_HAS_SYNC_DMA_FOR_CPU select ARCH_HAS_SYNC_DMA_FOR_DEVICE select ARCH_NO_SWAP - select DMA_DIRECT_OPS select TIMER_OF select GENERIC_ATOMIC64 select GENERIC_CLOCKEVENTS diff --git a/arch/openrisc/Kconfig b/arch/openrisc/Kconfig index 285f7d05c8ed..d0feebad5a8f 100644 --- a/arch/openrisc/Kconfig +++ b/arch/openrisc/Kconfig @@ -7,7 +7,6 @@ config OPENRISC def_bool y select ARCH_HAS_SYNC_DMA_FOR_DEVICE - select DMA_DIRECT_OPS select OF select OF_EARLY_FLATTREE select IRQ_DOMAIN diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index 428ee50fc3db..6e1b71da0e71 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -185,7 +185,6 @@ config PA11 depends on PA7000 || PA7100LC || PA7200 || PA7300LC select ARCH_HAS_SYNC_DMA_FOR_CPU select ARCH_HAS_SYNC_DMA_FOR_DEVICE - select DMA_DIRECT_OPS select DMA_NONCOHERENT_CACHE_SYNC config PREFETCH diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 55da93f4e818..51d89c4b1dca 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -19,7 +19,6 @@ config RISCV select ARCH_WANT_FRAME_POINTERS select CLONE_BACKWARDS select COMMON_CLK - select DMA_DIRECT_OPS select GENERIC_CLOCKEVENTS select GENERIC_CPU_DEVICES select GENERIC_IRQ_SHOW diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 5624e8607054..21d271d04ca6 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -139,7 +139,6 @@ config S390 select HAVE_COPY_THREAD_TLS select HAVE_DEBUG_KMEMLEAK select HAVE_DMA_CONTIGUOUS - select DMA_DIRECT_OPS select HAVE_DYNAMIC_FTRACE select HAVE_DYNAMIC_FTRACE_WITH_REGS select HAVE_EFFICIENT_UNALIGNED_ACCESS diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index f82a4da7adf3..10fd4e9c454b 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -7,7 +7,6 @@ config SUPERH select ARCH_NO_COHERENT_DMA_MMAP if !MMU select HAVE_PATA_PLATFORM select CLKDEV_LOOKUP - select DMA_DIRECT_OPS select HAVE_IDE if HAS_IOPORT_MAP select HAVE_MEMBLOCK_NODE_MAP select ARCH_DISCARD_MEMBLOCK diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index 8853b6ceae17..f5bb9ded1d18 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -48,7 +48,6 @@ config SPARC config SPARC32 def_bool !64BIT select ARCH_HAS_SYNC_DMA_FOR_CPU - select DMA_DIRECT_OPS select GENERIC_ATOMIC64 select CLZ_TAB select HAVE_UID16 diff --git a/arch/unicore32/Kconfig b/arch/unicore32/Kconfig index a4c05159dca5..2681027d7bff 100644 --- a/arch/unicore32/Kconfig +++ b/arch/unicore32/Kconfig @@ -4,7 +4,6 @@ config UNICORE32 select ARCH_HAS_DEVMEM_IS_ALLOWED select ARCH_MIGHT_HAVE_PC_PARPORT select ARCH_MIGHT_HAVE_PC_SERIO - select DMA_DIRECT_OPS select HAVE_GENERIC_DMA_COHERENT select HAVE_KERNEL_GZIP select HAVE_KERNEL_BZIP2 diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index adc845b66f01..c14d4a35be13 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -89,7 +89,6 @@ config X86 select CLOCKSOURCE_VALIDATE_LAST_CYCLE select CLOCKSOURCE_WATCHDOG select DCACHE_WORD_ACCESS - select DMA_DIRECT_OPS select EDAC_ATOMIC_SCRUB select EDAC_SUPPORT select GENERIC_CLOCKEVENTS diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig index 75488b606edc..36338e7564a3 100644 --- a/arch/xtensa/Kconfig +++ b/arch/xtensa/Kconfig @@ -9,7 +9,6 @@ config XTENSA select BUILDTIME_EXTABLE_SORT select CLONE_BACKWARDS select COMMON_CLK - select DMA_DIRECT_OPS select DMA_REMAP if MMU select GENERIC_ATOMIC64 select GENERIC_CLOCKEVENTS diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig index 41c3b1df70eb..ca88b867e7fe 100644 --- a/kernel/dma/Kconfig +++ b/kernel/dma/Kconfig @@ -35,13 +35,8 @@ config ARCH_HAS_DMA_COHERENT_TO_PFN config ARCH_HAS_DMA_MMAP_PGPROT bool -config DMA_DIRECT_OPS - bool - depends on HAS_DMA - config DMA_NONCOHERENT_CACHE_SYNC bool - depends on DMA_DIRECT_OPS config DMA_VIRT_OPS bool @@ -49,7 +44,6 @@ config DMA_VIRT_OPS config SWIOTLB bool - select DMA_DIRECT_OPS select NEED_DMA_MAP_STATE config DMA_REMAP @@ -58,5 +52,4 @@ config DMA_REMAP config DMA_DIRECT_REMAP bool - depends on DMA_DIRECT_OPS select DMA_REMAP diff --git a/kernel/dma/Makefile b/kernel/dma/Makefile index f4feeceb8020..a626f643cd63 100644 --- a/kernel/dma/Makefile +++ b/kernel/dma/Makefile @@ -1,9 +1,8 @@ # SPDX-License-Identifier: GPL-2.0 -obj-$(CONFIG_HAS_DMA) += mapping.o +obj-$(CONFIG_HAS_DMA) += mapping.o direct.o obj-$(CONFIG_DMA_CMA) += contiguous.o obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT) += coherent.o -obj-$(CONFIG_DMA_DIRECT_OPS) += direct.o obj-$(CONFIG_DMA_VIRT_OPS) += virt.o obj-$(CONFIG_DMA_API_DEBUG) += debug.o obj-$(CONFIG_SWIOTLB) += swiotlb.o From 90ac706e98fcb24fb0b0a259558987f33cc2f0f6 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 6 Dec 2018 13:14:44 -0800 Subject: [PATCH 57/73] dma-mapping: factor out dummy DMA ops The dummy DMA ops are currently used by arm64 for any device which has an invalid ACPI description and is thus barred from using DMA due to not knowing whether is is cache-coherent or not. Factor these out into general dma-mapping code so that they can be referenced from other common code paths. In the process, we can prune all the optional callbacks which just do the same thing as the default behaviour, and fill in .map_resource for completeness. Signed-off-by: Robin Murphy [hch: moved to a separate source file] Reviewed-by: Rafael J. Wysocki Acked-by: Jesper Dangaard Brouer Tested-by: Jesper Dangaard Brouer Tested-by: Tony Luck Signed-off-by: Christoph Hellwig --- arch/arm64/include/asm/dma-mapping.h | 4 +- arch/arm64/mm/dma-mapping.c | 86 ---------------------------- include/linux/dma-mapping.h | 1 + kernel/dma/Makefile | 2 +- kernel/dma/dummy.c | 39 +++++++++++++ 5 files changed, 42 insertions(+), 90 deletions(-) create mode 100644 kernel/dma/dummy.c diff --git a/arch/arm64/include/asm/dma-mapping.h b/arch/arm64/include/asm/dma-mapping.h index c41f3fb1446c..273e778f7de2 100644 --- a/arch/arm64/include/asm/dma-mapping.h +++ b/arch/arm64/include/asm/dma-mapping.h @@ -24,15 +24,13 @@ #include #include -extern const struct dma_map_ops dummy_dma_ops; - static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) { /* * We expect no ISA devices, and all other DMA masters are expected to * have someone call arch_setup_dma_ops at device creation time. */ - return &dummy_dma_ops; + return &dma_dummy_ops; } void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 4c0f498069e8..6ff6ec8806c1 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -89,92 +89,6 @@ static int __swiotlb_mmap_pfn(struct vm_area_struct *vma, } #endif /* CONFIG_IOMMU_DMA */ -/******************************************** - * The following APIs are for dummy DMA ops * - ********************************************/ - -static void *__dummy_alloc(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t flags, - unsigned long attrs) -{ - return NULL; -} - -static void __dummy_free(struct device *dev, size_t size, - void *vaddr, dma_addr_t dma_handle, - unsigned long attrs) -{ -} - -static int __dummy_mmap(struct device *dev, - struct vm_area_struct *vma, - void *cpu_addr, dma_addr_t dma_addr, size_t size, - unsigned long attrs) -{ - return -ENXIO; -} - -static dma_addr_t __dummy_map_page(struct device *dev, struct page *page, - unsigned long offset, size_t size, - enum dma_data_direction dir, - unsigned long attrs) -{ - return DMA_MAPPING_ERROR; -} - -static void __dummy_unmap_page(struct device *dev, dma_addr_t dev_addr, - size_t size, enum dma_data_direction dir, - unsigned long attrs) -{ -} - -static int __dummy_map_sg(struct device *dev, struct scatterlist *sgl, - int nelems, enum dma_data_direction dir, - unsigned long attrs) -{ - return 0; -} - -static void __dummy_unmap_sg(struct device *dev, - struct scatterlist *sgl, int nelems, - enum dma_data_direction dir, - unsigned long attrs) -{ -} - -static void __dummy_sync_single(struct device *dev, - dma_addr_t dev_addr, size_t size, - enum dma_data_direction dir) -{ -} - -static void __dummy_sync_sg(struct device *dev, - struct scatterlist *sgl, int nelems, - enum dma_data_direction dir) -{ -} - -static int __dummy_dma_supported(struct device *hwdev, u64 mask) -{ - return 0; -} - -const struct dma_map_ops dummy_dma_ops = { - .alloc = __dummy_alloc, - .free = __dummy_free, - .mmap = __dummy_mmap, - .map_page = __dummy_map_page, - .unmap_page = __dummy_unmap_page, - .map_sg = __dummy_map_sg, - .unmap_sg = __dummy_unmap_sg, - .sync_single_for_cpu = __dummy_sync_single, - .sync_single_for_device = __dummy_sync_single, - .sync_sg_for_cpu = __dummy_sync_sg, - .sync_sg_for_device = __dummy_sync_sg, - .dma_supported = __dummy_dma_supported, -}; -EXPORT_SYMBOL(dummy_dma_ops); - static int __init arm64_dma_init(void) { WARN_TAINT(ARCH_DMA_MINALIGN < cache_line_size(), diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 0f0078490df4..269ee27fc3d9 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -136,6 +136,7 @@ struct dma_map_ops { extern const struct dma_map_ops dma_direct_ops; extern const struct dma_map_ops dma_virt_ops; +extern const struct dma_map_ops dma_dummy_ops; #define DMA_BIT_MASK(n) (((n) == 64) ? ~0ULL : ((1ULL<<(n))-1)) diff --git a/kernel/dma/Makefile b/kernel/dma/Makefile index a626f643cd63..72ff6e46aa86 100644 --- a/kernel/dma/Makefile +++ b/kernel/dma/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 -obj-$(CONFIG_HAS_DMA) += mapping.o direct.o +obj-$(CONFIG_HAS_DMA) += mapping.o direct.o dummy.o obj-$(CONFIG_DMA_CMA) += contiguous.o obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT) += coherent.o obj-$(CONFIG_DMA_VIRT_OPS) += virt.o diff --git a/kernel/dma/dummy.c b/kernel/dma/dummy.c new file mode 100644 index 000000000000..05607642c888 --- /dev/null +++ b/kernel/dma/dummy.c @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Dummy DMA ops that always fail. + */ +#include + +static int dma_dummy_mmap(struct device *dev, struct vm_area_struct *vma, + void *cpu_addr, dma_addr_t dma_addr, size_t size, + unsigned long attrs) +{ + return -ENXIO; +} + +static dma_addr_t dma_dummy_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, enum dma_data_direction dir, + unsigned long attrs) +{ + return DMA_MAPPING_ERROR; +} + +static int dma_dummy_map_sg(struct device *dev, struct scatterlist *sgl, + int nelems, enum dma_data_direction dir, + unsigned long attrs) +{ + return 0; +} + +static int dma_dummy_supported(struct device *hwdev, u64 mask) +{ + return 0; +} + +const struct dma_map_ops dma_dummy_ops = { + .mmap = dma_dummy_mmap, + .map_page = dma_dummy_map_page, + .map_sg = dma_dummy_map_sg, + .dma_supported = dma_dummy_supported, +}; +EXPORT_SYMBOL(dma_dummy_ops); From e5361ca29f2fea345c08d2b5cb5e3b1840cbafb8 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 6 Dec 2018 13:20:49 -0800 Subject: [PATCH 58/73] ACPI / scan: Refactor _CCA enforcement Rather than checking the DMA attribute at each callsite, just pass it through for acpi_dma_configure() to handle directly. That can then deal with the relatively exceptional DEV_DMA_NOT_SUPPORTED case by explicitly installing dummy DMA ops instead of just skipping setup entirely. This will then free up the dev->dma_ops == NULL case for some valuable fastpath optimisations. Signed-off-by: Robin Murphy Reviewed-by: Rafael J. Wysocki Acked-by: Jesper Dangaard Brouer Tested-by: Jesper Dangaard Brouer Signed-off-by: Christoph Hellwig Tested-by: Tony Luck --- drivers/acpi/scan.c | 5 +++++ drivers/base/platform.c | 3 +-- drivers/pci/pci-driver.c | 3 +-- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index bd1c59fb0e17..b75ae34ed188 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -1456,6 +1456,11 @@ int acpi_dma_configure(struct device *dev, enum dev_dma_attr attr) const struct iommu_ops *iommu; u64 dma_addr = 0, size = 0; + if (attr == DEV_DMA_NOT_SUPPORTED) { + set_dma_ops(dev, &dma_dummy_ops); + return 0; + } + iort_dma_setup(dev, &dma_addr, &size); iommu = iort_iommu_configure(dev); diff --git a/drivers/base/platform.c b/drivers/base/platform.c index eae841935a45..c1ddf191711e 100644 --- a/drivers/base/platform.c +++ b/drivers/base/platform.c @@ -1138,8 +1138,7 @@ int platform_dma_configure(struct device *dev) ret = of_dma_configure(dev, dev->of_node, true); } else if (has_acpi_companion(dev)) { attr = acpi_get_dma_attr(to_acpi_device_node(dev->fwnode)); - if (attr != DEV_DMA_NOT_SUPPORTED) - ret = acpi_dma_configure(dev, attr); + ret = acpi_dma_configure(dev, attr); } return ret; diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index bef17c3fca67..1b58e058b13f 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -1602,8 +1602,7 @@ static int pci_dma_configure(struct device *dev) struct acpi_device *adev = to_acpi_device_node(bridge->fwnode); enum dev_dma_attr attr = acpi_get_dma_attr(adev); - if (attr != DEV_DMA_NOT_SUPPORTED) - ret = acpi_dma_configure(dev, attr); + ret = acpi_dma_configure(dev, acpi_get_dma_attr(adev)); } pci_put_host_bridge_device(bridge); From b907e20508d02462a50c2841da0a5e3883fdab39 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 3 Dec 2018 11:42:52 +0100 Subject: [PATCH 59/73] swiotlb: remove SWIOTLB_MAP_ERROR We can use DMA_MAPPING_ERROR instead, which already maps to the same value. Signed-off-by: Christoph Hellwig Acked-by: Jesper Dangaard Brouer Tested-by: Jesper Dangaard Brouer Tested-by: Tony Luck --- drivers/xen/swiotlb-xen.c | 4 ++-- include/linux/swiotlb.h | 3 --- kernel/dma/swiotlb.c | 4 ++-- 3 files changed, 4 insertions(+), 7 deletions(-) diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index 6dc969d5ea2f..833e80b46eb2 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -403,7 +403,7 @@ static dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page, map = swiotlb_tbl_map_single(dev, start_dma_addr, phys, size, dir, attrs); - if (map == SWIOTLB_MAP_ERROR) + if (map == DMA_MAPPING_ERROR) return DMA_MAPPING_ERROR; dev_addr = xen_phys_to_bus(map); @@ -572,7 +572,7 @@ xen_swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, sg_phys(sg), sg->length, dir, attrs); - if (map == SWIOTLB_MAP_ERROR) { + if (map == DMA_MAPPING_ERROR) { dev_warn(hwdev, "swiotlb buffer is full\n"); /* Don't panic here, we expect map_sg users to do proper error handling. */ diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index a387b59640a4..14aec0b70dd9 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -46,9 +46,6 @@ enum dma_sync_target { SYNC_FOR_DEVICE = 1, }; -/* define the last possible byte of physical address space as a mapping error */ -#define SWIOTLB_MAP_ERROR (~(phys_addr_t)0x0) - extern phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, dma_addr_t tbl_dma_addr, phys_addr_t phys, size_t size, diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index ff1ce81bb623..19ba8e473d71 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -526,7 +526,7 @@ not_found: spin_unlock_irqrestore(&io_tlb_lock, flags); if (!(attrs & DMA_ATTR_NO_WARN) && printk_ratelimit()) dev_warn(hwdev, "swiotlb buffer is full (sz: %zd bytes)\n", size); - return SWIOTLB_MAP_ERROR; + return DMA_MAPPING_ERROR; found: spin_unlock_irqrestore(&io_tlb_lock, flags); @@ -637,7 +637,7 @@ static dma_addr_t swiotlb_bounce_page(struct device *dev, phys_addr_t *phys, /* Oh well, have to allocate and map a bounce buffer. */ *phys = swiotlb_tbl_map_single(dev, __phys_to_dma(dev, io_tlb_start), *phys, size, dir, attrs); - if (*phys == SWIOTLB_MAP_ERROR) + if (*phys == DMA_MAPPING_ERROR) return DMA_MAPPING_ERROR; /* Ensure that the address returned is DMA'ble */ From 68c608345cc569bcfa1c1b2add4c00c343ecf933 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 6 Dec 2018 07:06:04 -0800 Subject: [PATCH 60/73] swiotlb: remove dma_mark_clean Instead of providing a special dma_mark_clean hook just for ia64, switch ia64 to use the normal arch_sync_dma_for_cpu hooks instead. This means that we now also set the PG_arch_1 bit for pages in the swiotlb buffer, which isn't stricly needed as we will never execute code out of the swiotlb buffer, but otherwise harmless. Signed-off-by: Christoph Hellwig Acked-by: Jesper Dangaard Brouer Tested-by: Jesper Dangaard Brouer Tested-by: Tony Luck --- arch/ia64/Kconfig | 3 ++- arch/ia64/kernel/dma-mapping.c | 20 +++++++++++++++++++- arch/ia64/mm/init.c | 17 +++++++---------- drivers/xen/swiotlb-xen.c | 20 +------------------- include/linux/dma-direct.h | 8 -------- kernel/dma/swiotlb.c | 18 +----------------- 6 files changed, 30 insertions(+), 56 deletions(-) diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index d6f203658994..c587e3316c38 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -28,7 +28,8 @@ config IA64 select HAVE_ARCH_TRACEHOOK select HAVE_MEMBLOCK_NODE_MAP select HAVE_VIRT_CPU_ACCOUNTING - select ARCH_HAS_DMA_MARK_CLEAN + select ARCH_HAS_DMA_COHERENT_TO_PFN + select ARCH_HAS_SYNC_DMA_FOR_CPU select VIRT_TO_BUS select ARCH_DISCARD_MEMBLOCK select GENERIC_IRQ_PROBE diff --git a/arch/ia64/kernel/dma-mapping.c b/arch/ia64/kernel/dma-mapping.c index 7a471d8d67d4..36dd6aa6d759 100644 --- a/arch/ia64/kernel/dma-mapping.c +++ b/arch/ia64/kernel/dma-mapping.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -#include +#include #include #include @@ -16,6 +16,24 @@ const struct dma_map_ops *dma_get_ops(struct device *dev) EXPORT_SYMBOL(dma_get_ops); #ifdef CONFIG_SWIOTLB +void *arch_dma_alloc(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs) +{ + return dma_direct_alloc_pages(dev, size, dma_handle, gfp, attrs); +} + +void arch_dma_free(struct device *dev, size_t size, void *cpu_addr, + dma_addr_t dma_addr, unsigned long attrs) +{ + dma_direct_free_pages(dev, size, cpu_addr, dma_addr, attrs); +} + +long arch_dma_coherent_to_pfn(struct device *dev, void *cpu_addr, + dma_addr_t dma_addr) +{ + return page_to_pfn(virt_to_page(cpu_addr)); +} + void __init swiotlb_dma_init(void) { dma_ops = &swiotlb_dma_ops; diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index d5e12ff1d73c..0cf43bb13d6e 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -8,6 +8,7 @@ #include #include +#include #include #include #include @@ -71,18 +72,14 @@ __ia64_sync_icache_dcache (pte_t pte) * DMA can be marked as "clean" so that lazy_mmu_prot_update() doesn't have to * flush them when they get mapped into an executable vm-area. */ -void -dma_mark_clean(void *addr, size_t size) +void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr, + size_t size, enum dma_data_direction dir) { - unsigned long pg_addr, end; + unsigned long pfn = PHYS_PFN(paddr); - pg_addr = PAGE_ALIGN((unsigned long) addr); - end = (unsigned long) addr + size; - while (pg_addr + PAGE_SIZE <= end) { - struct page *page = virt_to_page(pg_addr); - set_bit(PG_arch_1, &page->flags); - pg_addr += PAGE_SIZE; - } + do { + set_bit(PG_arch_1, &pfn_to_page(pfn)->flags); + } while (++pfn <= PHYS_PFN(paddr + size - 1)); } inline void diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index 833e80b46eb2..989cf872b98c 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -441,21 +441,8 @@ static void xen_unmap_single(struct device *hwdev, dma_addr_t dev_addr, xen_dma_unmap_page(hwdev, dev_addr, size, dir, attrs); /* NOTE: We use dev_addr here, not paddr! */ - if (is_xen_swiotlb_buffer(dev_addr)) { + if (is_xen_swiotlb_buffer(dev_addr)) swiotlb_tbl_unmap_single(hwdev, paddr, size, dir, attrs); - return; - } - - if (dir != DMA_FROM_DEVICE) - return; - - /* - * phys_to_virt doesn't work with hihgmem page but we could - * call dma_mark_clean() with hihgmem page here. However, we - * are fine since dma_mark_clean() is null on POWERPC. We can - * make dma_mark_clean() take a physical address if necessary. - */ - dma_mark_clean(phys_to_virt(paddr), size); } static void xen_swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr, @@ -493,11 +480,6 @@ xen_swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr, if (target == SYNC_FOR_DEVICE) xen_dma_sync_single_for_device(hwdev, dev_addr, size, dir); - - if (dir != DMA_FROM_DEVICE) - return; - - dma_mark_clean(phys_to_virt(paddr), size); } void diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h index 6e5a47ae7d64..1aa73f4907ae 100644 --- a/include/linux/dma-direct.h +++ b/include/linux/dma-direct.h @@ -48,14 +48,6 @@ static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr) return __sme_clr(__dma_to_phys(dev, daddr)); } -#ifdef CONFIG_ARCH_HAS_DMA_MARK_CLEAN -void dma_mark_clean(void *addr, size_t size); -#else -static inline void dma_mark_clean(void *addr, size_t size) -{ -} -#endif /* CONFIG_ARCH_HAS_DMA_MARK_CLEAN */ - u64 dma_direct_get_required_mask(struct device *dev); void *dma_direct_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs); diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index 19ba8e473d71..2e126bac5d7d 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -706,21 +706,8 @@ void swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr, (attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0) arch_sync_dma_for_cpu(hwdev, paddr, size, dir); - if (is_swiotlb_buffer(paddr)) { + if (is_swiotlb_buffer(paddr)) swiotlb_tbl_unmap_single(hwdev, paddr, size, dir, attrs); - return; - } - - if (dir != DMA_FROM_DEVICE) - return; - - /* - * phys_to_virt doesn't work with hihgmem page but we could - * call dma_mark_clean() with hihgmem page here. However, we - * are fine since dma_mark_clean() is null on POWERPC. We can - * make dma_mark_clean() take a physical address if necessary. - */ - dma_mark_clean(phys_to_virt(paddr), size); } /* @@ -750,9 +737,6 @@ swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr, if (!dev_is_dma_coherent(hwdev) && target == SYNC_FOR_DEVICE) arch_sync_dma_for_device(hwdev, paddr, size, dir); - - if (!is_swiotlb_buffer(paddr) && dir == DMA_FROM_DEVICE) - dma_mark_clean(phys_to_virt(paddr), size); } void From 58dfd4ac022037c6a562e92fc6d2a778819b2162 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 3 Dec 2018 07:43:05 +0100 Subject: [PATCH 61/73] dma-direct: improve addressability error reporting Only report report a DMA addressability report once to avoid spewing the kernel log with repeated message. Also provide a stack trace to make it easy to find the actual caller that caused the problem. Last but not least move the actual check into the fast path and only leave the error reporting in a helper. Signed-off-by: Christoph Hellwig Acked-by: Jesper Dangaard Brouer Tested-by: Jesper Dangaard Brouer Tested-by: Tony Luck --- kernel/dma/direct.c | 36 +++++++++++++++--------------------- 1 file changed, 15 insertions(+), 21 deletions(-) diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c index 308f88a750c8..edb24f94ea1e 100644 --- a/kernel/dma/direct.c +++ b/kernel/dma/direct.c @@ -30,27 +30,16 @@ static inline bool force_dma_unencrypted(void) return sev_active(); } -static bool -check_addr(struct device *dev, dma_addr_t dma_addr, size_t size, - const char *caller) +static void report_addr(struct device *dev, dma_addr_t dma_addr, size_t size) { - if (unlikely(dev && !dma_capable(dev, dma_addr, size))) { - if (!dev->dma_mask) { - dev_err(dev, - "%s: call on device without dma_mask\n", - caller); - return false; - } - - if (*dev->dma_mask >= DMA_BIT_MASK(32) || dev->bus_dma_mask) { - dev_err(dev, - "%s: overflow %pad+%zu of device mask %llx bus mask %llx\n", - caller, &dma_addr, size, - *dev->dma_mask, dev->bus_dma_mask); - } - return false; + if (!dev->dma_mask) { + dev_err_once(dev, "DMA map on device without dma_mask\n"); + } else if (*dev->dma_mask >= DMA_BIT_MASK(32) || dev->bus_dma_mask) { + dev_err_once(dev, + "overflow %pad+%zu of DMA mask %llx bus mask %llx\n", + &dma_addr, size, *dev->dma_mask, dev->bus_dma_mask); } - return true; + WARN_ON_ONCE(1); } static inline dma_addr_t phys_to_dma_direct(struct device *dev, @@ -288,8 +277,10 @@ dma_addr_t dma_direct_map_page(struct device *dev, struct page *page, phys_addr_t phys = page_to_phys(page) + offset; dma_addr_t dma_addr = phys_to_dma(dev, phys); - if (!check_addr(dev, dma_addr, size, __func__)) + if (unlikely(dev && !dma_capable(dev, dma_addr, size))) { + report_addr(dev, dma_addr, size); return DMA_MAPPING_ERROR; + } if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) dma_direct_sync_single_for_device(dev, dma_addr, size, dir); @@ -306,8 +297,11 @@ int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents, BUG_ON(!sg_page(sg)); sg_dma_address(sg) = phys_to_dma(dev, sg_phys(sg)); - if (!check_addr(dev, sg_dma_address(sg), sg->length, __func__)) + if (unlikely(dev && !dma_capable(dev, sg_dma_address(sg), + sg->length))) { + report_addr(dev, sg_dma_address(sg), sg->length); return 0; + } sg_dma_len(sg) = sg->length; } From 17ac524719f3fc88c1a90528f4789e4b4f618512 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 3 Dec 2018 11:14:09 +0100 Subject: [PATCH 62/73] dma-direct: use dma_direct_map_page to implement dma_direct_map_sg No need to duplicate the mapping logic. Signed-off-by: Christoph Hellwig Acked-by: Jesper Dangaard Brouer Tested-by: Jesper Dangaard Brouer Tested-by: Tony Luck --- kernel/dma/direct.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c index edb24f94ea1e..d45306473c90 100644 --- a/kernel/dma/direct.c +++ b/kernel/dma/direct.c @@ -217,6 +217,7 @@ static void dma_direct_sync_single_for_device(struct device *dev, arch_sync_dma_for_device(dev, dma_to_phys(dev, addr), size, dir); } +#if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) static void dma_direct_sync_sg_for_device(struct device *dev, struct scatterlist *sgl, int nents, enum dma_data_direction dir) { @@ -229,6 +230,7 @@ static void dma_direct_sync_sg_for_device(struct device *dev, for_each_sg(sgl, sg, nents, i) arch_sync_dma_for_device(dev, sg_phys(sg), sg->length, dir); } +#endif #if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) || \ defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL) @@ -294,19 +296,13 @@ int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents, struct scatterlist *sg; for_each_sg(sgl, sg, nents, i) { - BUG_ON(!sg_page(sg)); - - sg_dma_address(sg) = phys_to_dma(dev, sg_phys(sg)); - if (unlikely(dev && !dma_capable(dev, sg_dma_address(sg), - sg->length))) { - report_addr(dev, sg_dma_address(sg), sg->length); + sg->dma_address = dma_direct_map_page(dev, sg_page(sg), + sg->offset, sg->length, dir, attrs); + if (sg->dma_address == DMA_MAPPING_ERROR) return 0; - } sg_dma_len(sg) = sg->length; } - if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) - dma_direct_sync_sg_for_device(dev, sgl, nents, dir); return nents; } From 55897af63091ebc2c3f239c6a6666f748113ac50 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 3 Dec 2018 11:43:54 +0100 Subject: [PATCH 63/73] dma-direct: merge swiotlb_dma_ops into the dma_direct code While the dma-direct code is (relatively) clean and simple we actually have to use the swiotlb ops for the mapping on many architectures due to devices with addressing limits. Instead of keeping two implementations around this commit allows the dma-direct implementation to call the swiotlb bounce buffering functions and thus share the guts of the mapping implementation. This also simplified the dma-mapping setup on a few architectures where we don't have to differenciate which implementation to use. Signed-off-by: Christoph Hellwig Acked-by: Jesper Dangaard Brouer Tested-by: Jesper Dangaard Brouer Tested-by: Tony Luck --- arch/arm64/mm/dma-mapping.c | 2 +- arch/ia64/hp/common/hwsw_iommu.c | 2 +- arch/ia64/hp/common/sba_iommu.c | 6 +- arch/ia64/kernel/dma-mapping.c | 2 +- arch/mips/include/asm/dma-mapping.h | 2 - arch/powerpc/kernel/dma-swiotlb.c | 16 +- arch/riscv/include/asm/dma-mapping.h | 15 -- arch/x86/kernel/pci-swiotlb.c | 4 +- arch/x86/mm/mem_encrypt.c | 7 - arch/x86/pci/sta2x11-fixup.c | 1 - include/linux/dma-direct.h | 12 ++ include/linux/swiotlb.h | 74 ++++----- kernel/dma/direct.c | 113 +++++++++---- kernel/dma/swiotlb.c | 232 ++------------------------- 14 files changed, 150 insertions(+), 338 deletions(-) delete mode 100644 arch/riscv/include/asm/dma-mapping.h diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 6ff6ec8806c1..ab1e417204d0 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -463,7 +463,7 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, const struct iommu_ops *iommu, bool coherent) { if (!dev->dma_ops) - dev->dma_ops = &swiotlb_dma_ops; + dev->dma_ops = &dma_direct_ops; dev->dma_coherent = coherent; __iommu_setup_dma_ops(dev, dma_base, size, iommu); diff --git a/arch/ia64/hp/common/hwsw_iommu.c b/arch/ia64/hp/common/hwsw_iommu.c index 58969039bed2..f40ca499b246 100644 --- a/arch/ia64/hp/common/hwsw_iommu.c +++ b/arch/ia64/hp/common/hwsw_iommu.c @@ -38,7 +38,7 @@ static inline int use_swiotlb(struct device *dev) const struct dma_map_ops *hwsw_dma_get_ops(struct device *dev) { if (use_swiotlb(dev)) - return &swiotlb_dma_ops; + return &dma_direct_ops; return &sba_dma_ops; } EXPORT_SYMBOL(hwsw_dma_get_ops); diff --git a/arch/ia64/hp/common/sba_iommu.c b/arch/ia64/hp/common/sba_iommu.c index 0d21c0b5b23d..5ee74820a0f6 100644 --- a/arch/ia64/hp/common/sba_iommu.c +++ b/arch/ia64/hp/common/sba_iommu.c @@ -2065,8 +2065,6 @@ static int __init acpi_sba_ioc_init_acpi(void) /* This has to run before acpi_scan_init(). */ arch_initcall(acpi_sba_ioc_init_acpi); -extern const struct dma_map_ops swiotlb_dma_ops; - static int __init sba_init(void) { @@ -2080,7 +2078,7 @@ sba_init(void) * a successful kdump kernel boot is to use the swiotlb. */ if (is_kdump_kernel()) { - dma_ops = &swiotlb_dma_ops; + dma_ops = &dma_direct_ops; if (swiotlb_late_init_with_default_size(64 * (1<<20)) != 0) panic("Unable to initialize software I/O TLB:" " Try machvec=dig boot option"); @@ -2102,7 +2100,7 @@ sba_init(void) * If we didn't find something sba_iommu can claim, we * need to setup the swiotlb and switch to the dig machvec. */ - dma_ops = &swiotlb_dma_ops; + dma_ops = &dma_direct_ops; if (swiotlb_late_init_with_default_size(64 * (1<<20)) != 0) panic("Unable to find SBA IOMMU or initialize " "software I/O TLB: Try machvec=dig boot option"); diff --git a/arch/ia64/kernel/dma-mapping.c b/arch/ia64/kernel/dma-mapping.c index 36dd6aa6d759..80cd3e1ea95a 100644 --- a/arch/ia64/kernel/dma-mapping.c +++ b/arch/ia64/kernel/dma-mapping.c @@ -36,7 +36,7 @@ long arch_dma_coherent_to_pfn(struct device *dev, void *cpu_addr, void __init swiotlb_dma_init(void) { - dma_ops = &swiotlb_dma_ops; + dma_ops = &dma_direct_ops; swiotlb_init(1); } #endif diff --git a/arch/mips/include/asm/dma-mapping.h b/arch/mips/include/asm/dma-mapping.h index b4c477eb46ce..69f914667f3e 100644 --- a/arch/mips/include/asm/dma-mapping.h +++ b/arch/mips/include/asm/dma-mapping.h @@ -10,8 +10,6 @@ static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) { #if defined(CONFIG_MACH_JAZZ) return &jazz_dma_ops; -#elif defined(CONFIG_SWIOTLB) - return &swiotlb_dma_ops; #else return &dma_direct_ops; #endif diff --git a/arch/powerpc/kernel/dma-swiotlb.c b/arch/powerpc/kernel/dma-swiotlb.c index 3d8df2cf8be9..430a7d0aa2cb 100644 --- a/arch/powerpc/kernel/dma-swiotlb.c +++ b/arch/powerpc/kernel/dma-swiotlb.c @@ -50,15 +50,15 @@ const struct dma_map_ops powerpc_swiotlb_dma_ops = { .alloc = __dma_nommu_alloc_coherent, .free = __dma_nommu_free_coherent, .mmap = dma_nommu_mmap_coherent, - .map_sg = swiotlb_map_sg_attrs, - .unmap_sg = swiotlb_unmap_sg_attrs, + .map_sg = dma_direct_map_sg, + .unmap_sg = dma_direct_unmap_sg, .dma_supported = swiotlb_dma_supported, - .map_page = swiotlb_map_page, - .unmap_page = swiotlb_unmap_page, - .sync_single_for_cpu = swiotlb_sync_single_for_cpu, - .sync_single_for_device = swiotlb_sync_single_for_device, - .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu, - .sync_sg_for_device = swiotlb_sync_sg_for_device, + .map_page = dma_direct_map_page, + .unmap_page = dma_direct_unmap_page, + .sync_single_for_cpu = dma_direct_sync_single_for_cpu, + .sync_single_for_device = dma_direct_sync_single_for_device, + .sync_sg_for_cpu = dma_direct_sync_sg_for_cpu, + .sync_sg_for_device = dma_direct_sync_sg_for_device, .get_required_mask = swiotlb_powerpc_get_required, }; diff --git a/arch/riscv/include/asm/dma-mapping.h b/arch/riscv/include/asm/dma-mapping.h deleted file mode 100644 index 8facc1c8fa05..000000000000 --- a/arch/riscv/include/asm/dma-mapping.h +++ /dev/null @@ -1,15 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#ifndef _RISCV_ASM_DMA_MAPPING_H -#define _RISCV_ASM_DMA_MAPPING_H 1 - -#ifdef CONFIG_SWIOTLB -#include -static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) -{ - return &swiotlb_dma_ops; -} -#else -#include -#endif /* CONFIG_SWIOTLB */ - -#endif /* _RISCV_ASM_DMA_MAPPING_H */ diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c index bd08b9e1c9e2..5f5302028a9a 100644 --- a/arch/x86/kernel/pci-swiotlb.c +++ b/arch/x86/kernel/pci-swiotlb.c @@ -62,10 +62,8 @@ IOMMU_INIT(pci_swiotlb_detect_4gb, void __init pci_swiotlb_init(void) { - if (swiotlb) { + if (swiotlb) swiotlb_init(0); - dma_ops = &swiotlb_dma_ops; - } } void __init pci_swiotlb_late_init(void) diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c index 006f373f54ab..385afa2b9e17 100644 --- a/arch/x86/mm/mem_encrypt.c +++ b/arch/x86/mm/mem_encrypt.c @@ -380,13 +380,6 @@ void __init mem_encrypt_init(void) /* Call into SWIOTLB to update the SWIOTLB DMA buffers */ swiotlb_update_mem_attributes(); - /* - * With SEV, DMA operations cannot use encryption, we need to use - * SWIOTLB to bounce buffer DMA operation. - */ - if (sev_active()) - dma_ops = &swiotlb_dma_ops; - /* * With SEV, we need to unroll the rep string I/O instructions. */ diff --git a/arch/x86/pci/sta2x11-fixup.c b/arch/x86/pci/sta2x11-fixup.c index 7a5bafb76d77..3cdafea55ab6 100644 --- a/arch/x86/pci/sta2x11-fixup.c +++ b/arch/x86/pci/sta2x11-fixup.c @@ -168,7 +168,6 @@ static void sta2x11_setup_pdev(struct pci_dev *pdev) return; pci_set_consistent_dma_mask(pdev, STA2X11_AMBA_SIZE - 1); pci_set_dma_mask(pdev, STA2X11_AMBA_SIZE - 1); - pdev->dev.dma_ops = &swiotlb_dma_ops; pdev->dev.archdata.is_sta2x11 = true; /* We must enable all devices as master, for audio DMA to work */ diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h index 1aa73f4907ae..3b0a3ea3876d 100644 --- a/include/linux/dma-direct.h +++ b/include/linux/dma-direct.h @@ -63,7 +63,19 @@ void __dma_direct_free_pages(struct device *dev, size_t size, struct page *page) dma_addr_t dma_direct_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction dir, unsigned long attrs); +void dma_direct_unmap_page(struct device *dev, dma_addr_t addr, + size_t size, enum dma_data_direction dir, unsigned long attrs); int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents, enum dma_data_direction dir, unsigned long attrs); +void dma_direct_unmap_sg(struct device *dev, struct scatterlist *sgl, + int nents, enum dma_data_direction dir, unsigned long attrs); +void dma_direct_sync_single_for_device(struct device *dev, + dma_addr_t addr, size_t size, enum dma_data_direction dir); +void dma_direct_sync_sg_for_device(struct device *dev, + struct scatterlist *sgl, int nents, enum dma_data_direction dir); +void dma_direct_sync_single_for_cpu(struct device *dev, + dma_addr_t addr, size_t size, enum dma_data_direction dir); +void dma_direct_sync_sg_for_cpu(struct device *dev, + struct scatterlist *sgl, int nents, enum dma_data_direction dir); int dma_direct_supported(struct device *dev, u64 mask); #endif /* _LINUX_DMA_DIRECT_H */ diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index 14aec0b70dd9..7c007ed7505f 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -16,8 +16,6 @@ enum swiotlb_force { SWIOTLB_NO_FORCE, /* swiotlb=noforce */ }; -extern enum swiotlb_force swiotlb_force; - /* * Maximum allowable number of contiguous slabs to map, * must be a power of 2. What is the appropriate value ? @@ -62,56 +60,44 @@ extern void swiotlb_tbl_sync_single(struct device *hwdev, size_t size, enum dma_data_direction dir, enum dma_sync_target target); -/* Accessory functions. */ - -extern dma_addr_t swiotlb_map_page(struct device *dev, struct page *page, - unsigned long offset, size_t size, - enum dma_data_direction dir, - unsigned long attrs); -extern void swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr, - size_t size, enum dma_data_direction dir, - unsigned long attrs); - -extern int -swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems, - enum dma_data_direction dir, - unsigned long attrs); - -extern void -swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl, - int nelems, enum dma_data_direction dir, - unsigned long attrs); - -extern void -swiotlb_sync_single_for_cpu(struct device *hwdev, dma_addr_t dev_addr, - size_t size, enum dma_data_direction dir); - -extern void -swiotlb_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sg, - int nelems, enum dma_data_direction dir); - -extern void -swiotlb_sync_single_for_device(struct device *hwdev, dma_addr_t dev_addr, - size_t size, enum dma_data_direction dir); - -extern void -swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg, - int nelems, enum dma_data_direction dir); - extern int swiotlb_dma_supported(struct device *hwdev, u64 mask); #ifdef CONFIG_SWIOTLB -extern void __init swiotlb_exit(void); +extern enum swiotlb_force swiotlb_force; +extern phys_addr_t io_tlb_start, io_tlb_end; + +static inline bool is_swiotlb_buffer(phys_addr_t paddr) +{ + return paddr >= io_tlb_start && paddr < io_tlb_end; +} + +bool swiotlb_map(struct device *dev, phys_addr_t *phys, dma_addr_t *dma_addr, + size_t size, enum dma_data_direction dir, unsigned long attrs); +void __init swiotlb_exit(void); unsigned int swiotlb_max_segment(void); #else -static inline void swiotlb_exit(void) { } -static inline unsigned int swiotlb_max_segment(void) { return 0; } -#endif +#define swiotlb_force SWIOTLB_NO_FORCE +static inline bool is_swiotlb_buffer(phys_addr_t paddr) +{ + return false; +} +static inline bool swiotlb_map(struct device *dev, phys_addr_t *phys, + dma_addr_t *dma_addr, size_t size, enum dma_data_direction dir, + unsigned long attrs) +{ + return false; +} +static inline void swiotlb_exit(void) +{ +} +static inline unsigned int swiotlb_max_segment(void) +{ + return 0; +} +#endif /* CONFIG_SWIOTLB */ extern void swiotlb_print_info(void); extern void swiotlb_set_max_segment(unsigned int); -extern const struct dma_map_ops swiotlb_dma_ops; - #endif /* __LINUX_SWIOTLB_H */ diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c index d45306473c90..85d8286a0ba2 100644 --- a/kernel/dma/direct.c +++ b/kernel/dma/direct.c @@ -13,6 +13,7 @@ #include #include #include +#include /* * Most architectures use ZONE_DMA for the first 16 Megabytes, but @@ -209,69 +210,110 @@ void dma_direct_free(struct device *dev, size_t size, dma_direct_free_pages(dev, size, cpu_addr, dma_addr, attrs); } -static void dma_direct_sync_single_for_device(struct device *dev, +#if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) || \ + defined(CONFIG_SWIOTLB) +void dma_direct_sync_single_for_device(struct device *dev, dma_addr_t addr, size_t size, enum dma_data_direction dir) { - if (dev_is_dma_coherent(dev)) - return; - arch_sync_dma_for_device(dev, dma_to_phys(dev, addr), size, dir); + phys_addr_t paddr = dma_to_phys(dev, addr); + + if (unlikely(is_swiotlb_buffer(paddr))) + swiotlb_tbl_sync_single(dev, paddr, size, dir, SYNC_FOR_DEVICE); + + if (!dev_is_dma_coherent(dev)) + arch_sync_dma_for_device(dev, paddr, size, dir); } -#if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) -static void dma_direct_sync_sg_for_device(struct device *dev, +void dma_direct_sync_sg_for_device(struct device *dev, struct scatterlist *sgl, int nents, enum dma_data_direction dir) { struct scatterlist *sg; int i; - if (dev_is_dma_coherent(dev)) - return; + for_each_sg(sgl, sg, nents, i) { + if (unlikely(is_swiotlb_buffer(sg_phys(sg)))) + swiotlb_tbl_sync_single(dev, sg_phys(sg), sg->length, + dir, SYNC_FOR_DEVICE); - for_each_sg(sgl, sg, nents, i) - arch_sync_dma_for_device(dev, sg_phys(sg), sg->length, dir); + if (!dev_is_dma_coherent(dev)) + arch_sync_dma_for_device(dev, sg_phys(sg), sg->length, + dir); + } } #endif #if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) || \ - defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL) -static void dma_direct_sync_single_for_cpu(struct device *dev, + defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL) || \ + defined(CONFIG_SWIOTLB) +void dma_direct_sync_single_for_cpu(struct device *dev, dma_addr_t addr, size_t size, enum dma_data_direction dir) { - if (dev_is_dma_coherent(dev)) - return; - arch_sync_dma_for_cpu(dev, dma_to_phys(dev, addr), size, dir); - arch_sync_dma_for_cpu_all(dev); + phys_addr_t paddr = dma_to_phys(dev, addr); + + if (!dev_is_dma_coherent(dev)) { + arch_sync_dma_for_cpu(dev, paddr, size, dir); + arch_sync_dma_for_cpu_all(dev); + } + + if (unlikely(is_swiotlb_buffer(paddr))) + swiotlb_tbl_sync_single(dev, paddr, size, dir, SYNC_FOR_CPU); } -static void dma_direct_sync_sg_for_cpu(struct device *dev, +void dma_direct_sync_sg_for_cpu(struct device *dev, struct scatterlist *sgl, int nents, enum dma_data_direction dir) { struct scatterlist *sg; int i; - if (dev_is_dma_coherent(dev)) - return; + for_each_sg(sgl, sg, nents, i) { + if (!dev_is_dma_coherent(dev)) + arch_sync_dma_for_cpu(dev, sg_phys(sg), sg->length, dir); + + if (unlikely(is_swiotlb_buffer(sg_phys(sg)))) + swiotlb_tbl_sync_single(dev, sg_phys(sg), sg->length, dir, + SYNC_FOR_CPU); + } - for_each_sg(sgl, sg, nents, i) - arch_sync_dma_for_cpu(dev, sg_phys(sg), sg->length, dir); - arch_sync_dma_for_cpu_all(dev); + if (!dev_is_dma_coherent(dev)) + arch_sync_dma_for_cpu_all(dev); } -static void dma_direct_unmap_page(struct device *dev, dma_addr_t addr, +void dma_direct_unmap_page(struct device *dev, dma_addr_t addr, size_t size, enum dma_data_direction dir, unsigned long attrs) { + phys_addr_t phys = dma_to_phys(dev, addr); + if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) dma_direct_sync_single_for_cpu(dev, addr, size, dir); + + if (unlikely(is_swiotlb_buffer(phys))) + swiotlb_tbl_unmap_single(dev, phys, size, dir, attrs); } -static void dma_direct_unmap_sg(struct device *dev, struct scatterlist *sgl, +void dma_direct_unmap_sg(struct device *dev, struct scatterlist *sgl, + int nents, enum dma_data_direction dir, unsigned long attrs) +{ + struct scatterlist *sg; + int i; + + for_each_sg(sgl, sg, nents, i) + dma_direct_unmap_page(dev, sg->dma_address, sg_dma_len(sg), dir, + attrs); +} +#else +void dma_direct_unmap_sg(struct device *dev, struct scatterlist *sgl, int nents, enum dma_data_direction dir, unsigned long attrs) { - if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) - dma_direct_sync_sg_for_cpu(dev, sgl, nents, dir); } #endif +static inline bool dma_direct_possible(struct device *dev, dma_addr_t dma_addr, + size_t size) +{ + return swiotlb_force != SWIOTLB_FORCE && + (!dev || dma_capable(dev, dma_addr, size)); +} + dma_addr_t dma_direct_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction dir, unsigned long attrs) @@ -279,13 +321,14 @@ dma_addr_t dma_direct_map_page(struct device *dev, struct page *page, phys_addr_t phys = page_to_phys(page) + offset; dma_addr_t dma_addr = phys_to_dma(dev, phys); - if (unlikely(dev && !dma_capable(dev, dma_addr, size))) { + if (unlikely(!dma_direct_possible(dev, dma_addr, size)) && + !swiotlb_map(dev, &phys, &dma_addr, size, dir, attrs)) { report_addr(dev, dma_addr, size); return DMA_MAPPING_ERROR; } - if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) - dma_direct_sync_single_for_device(dev, dma_addr, size, dir); + if (!dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) + arch_sync_dma_for_device(dev, phys, size, dir); return dma_addr; } @@ -299,11 +342,15 @@ int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents, sg->dma_address = dma_direct_map_page(dev, sg_page(sg), sg->offset, sg->length, dir, attrs); if (sg->dma_address == DMA_MAPPING_ERROR) - return 0; + goto out_unmap; sg_dma_len(sg) = sg->length; } return nents; + +out_unmap: + dma_direct_unmap_sg(dev, sgl, i, dir, attrs | DMA_ATTR_SKIP_CPU_SYNC); + return 0; } /* @@ -331,12 +378,14 @@ const struct dma_map_ops dma_direct_ops = { .free = dma_direct_free, .map_page = dma_direct_map_page, .map_sg = dma_direct_map_sg, -#if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) +#if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) || \ + defined(CONFIG_SWIOTLB) .sync_single_for_device = dma_direct_sync_single_for_device, .sync_sg_for_device = dma_direct_sync_sg_for_device, #endif #if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) || \ - defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL) + defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL) || \ + defined(CONFIG_SWIOTLB) .sync_single_for_cpu = dma_direct_sync_single_for_cpu, .sync_sg_for_cpu = dma_direct_sync_sg_for_cpu, .unmap_page = dma_direct_unmap_page, diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index 2e126bac5d7d..d6361776dc5c 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -21,7 +21,6 @@ #include #include -#include #include #include #include @@ -65,7 +64,7 @@ enum swiotlb_force swiotlb_force; * swiotlb_tbl_sync_single_*, to see if the memory was in fact allocated by this * API. */ -static phys_addr_t io_tlb_start, io_tlb_end; +phys_addr_t io_tlb_start, io_tlb_end; /* * The number of IO TLB blocks (in groups of 64) between io_tlb_start and @@ -383,11 +382,6 @@ void __init swiotlb_exit(void) max_segment = 0; } -static int is_swiotlb_buffer(phys_addr_t paddr) -{ - return paddr >= io_tlb_start && paddr < io_tlb_end; -} - /* * Bounce: copy the swiotlb buffer back to the original dma location */ @@ -623,221 +617,36 @@ void swiotlb_tbl_sync_single(struct device *hwdev, phys_addr_t tlb_addr, } } -static dma_addr_t swiotlb_bounce_page(struct device *dev, phys_addr_t *phys, +/* + * Create a swiotlb mapping for the buffer at @phys, and in case of DMAing + * to the device copy the data into it as well. + */ +bool swiotlb_map(struct device *dev, phys_addr_t *phys, dma_addr_t *dma_addr, size_t size, enum dma_data_direction dir, unsigned long attrs) { - dma_addr_t dma_addr; + trace_swiotlb_bounced(dev, *dma_addr, size, swiotlb_force); if (unlikely(swiotlb_force == SWIOTLB_NO_FORCE)) { dev_warn_ratelimited(dev, "Cannot do DMA to address %pa\n", phys); - return DMA_MAPPING_ERROR; + return false; } /* Oh well, have to allocate and map a bounce buffer. */ *phys = swiotlb_tbl_map_single(dev, __phys_to_dma(dev, io_tlb_start), *phys, size, dir, attrs); if (*phys == DMA_MAPPING_ERROR) - return DMA_MAPPING_ERROR; + return false; /* Ensure that the address returned is DMA'ble */ - dma_addr = __phys_to_dma(dev, *phys); - if (unlikely(!dma_capable(dev, dma_addr, size))) { + *dma_addr = __phys_to_dma(dev, *phys); + if (unlikely(!dma_capable(dev, *dma_addr, size))) { swiotlb_tbl_unmap_single(dev, *phys, size, dir, attrs | DMA_ATTR_SKIP_CPU_SYNC); - return DMA_MAPPING_ERROR; + return false; } - return dma_addr; -} - -/* - * Map a single buffer of the indicated size for DMA in streaming mode. The - * physical address to use is returned. - * - * Once the device is given the dma address, the device owns this memory until - * either swiotlb_unmap_page or swiotlb_dma_sync_single is performed. - */ -dma_addr_t swiotlb_map_page(struct device *dev, struct page *page, - unsigned long offset, size_t size, - enum dma_data_direction dir, - unsigned long attrs) -{ - phys_addr_t phys = page_to_phys(page) + offset; - dma_addr_t dev_addr = phys_to_dma(dev, phys); - - BUG_ON(dir == DMA_NONE); - /* - * If the address happens to be in the device's DMA window, - * we can safely return the device addr and not worry about bounce - * buffering it. - */ - if (!dma_capable(dev, dev_addr, size) || - swiotlb_force == SWIOTLB_FORCE) { - trace_swiotlb_bounced(dev, dev_addr, size, swiotlb_force); - dev_addr = swiotlb_bounce_page(dev, &phys, size, dir, attrs); - } - - if (!dev_is_dma_coherent(dev) && - (attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0 && - dev_addr != DMA_MAPPING_ERROR) - arch_sync_dma_for_device(dev, phys, size, dir); - - return dev_addr; -} - -/* - * Unmap a single streaming mode DMA translation. The dma_addr and size must - * match what was provided for in a previous swiotlb_map_page call. All - * other usages are undefined. - * - * After this call, reads by the cpu to the buffer are guaranteed to see - * whatever the device wrote there. - */ -void swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr, - size_t size, enum dma_data_direction dir, - unsigned long attrs) -{ - phys_addr_t paddr = dma_to_phys(hwdev, dev_addr); - - BUG_ON(dir == DMA_NONE); - - if (!dev_is_dma_coherent(hwdev) && - (attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0) - arch_sync_dma_for_cpu(hwdev, paddr, size, dir); - - if (is_swiotlb_buffer(paddr)) - swiotlb_tbl_unmap_single(hwdev, paddr, size, dir, attrs); -} - -/* - * Make physical memory consistent for a single streaming mode DMA translation - * after a transfer. - * - * If you perform a swiotlb_map_page() but wish to interrogate the buffer - * using the cpu, yet do not wish to teardown the dma mapping, you must - * call this function before doing so. At the next point you give the dma - * address back to the card, you must first perform a - * swiotlb_dma_sync_for_device, and then the device again owns the buffer - */ -static void -swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr, - size_t size, enum dma_data_direction dir, - enum dma_sync_target target) -{ - phys_addr_t paddr = dma_to_phys(hwdev, dev_addr); - - BUG_ON(dir == DMA_NONE); - - if (!dev_is_dma_coherent(hwdev) && target == SYNC_FOR_CPU) - arch_sync_dma_for_cpu(hwdev, paddr, size, dir); - - if (is_swiotlb_buffer(paddr)) - swiotlb_tbl_sync_single(hwdev, paddr, size, dir, target); - - if (!dev_is_dma_coherent(hwdev) && target == SYNC_FOR_DEVICE) - arch_sync_dma_for_device(hwdev, paddr, size, dir); -} - -void -swiotlb_sync_single_for_cpu(struct device *hwdev, dma_addr_t dev_addr, - size_t size, enum dma_data_direction dir) -{ - swiotlb_sync_single(hwdev, dev_addr, size, dir, SYNC_FOR_CPU); -} - -void -swiotlb_sync_single_for_device(struct device *hwdev, dma_addr_t dev_addr, - size_t size, enum dma_data_direction dir) -{ - swiotlb_sync_single(hwdev, dev_addr, size, dir, SYNC_FOR_DEVICE); -} - -/* - * Map a set of buffers described by scatterlist in streaming mode for DMA. - * This is the scatter-gather version of the above swiotlb_map_page - * interface. Here the scatter gather list elements are each tagged with the - * appropriate dma address and length. They are obtained via - * sg_dma_{address,length}(SG). - * - * Device ownership issues as mentioned above for swiotlb_map_page are the - * same here. - */ -int -swiotlb_map_sg_attrs(struct device *dev, struct scatterlist *sgl, int nelems, - enum dma_data_direction dir, unsigned long attrs) -{ - struct scatterlist *sg; - int i; - - for_each_sg(sgl, sg, nelems, i) { - sg->dma_address = swiotlb_map_page(dev, sg_page(sg), sg->offset, - sg->length, dir, attrs); - if (sg->dma_address == DMA_MAPPING_ERROR) - goto out_error; - sg_dma_len(sg) = sg->length; - } - - return nelems; - -out_error: - swiotlb_unmap_sg_attrs(dev, sgl, i, dir, - attrs | DMA_ATTR_SKIP_CPU_SYNC); - sg_dma_len(sgl) = 0; - return 0; -} - -/* - * Unmap a set of streaming mode DMA translations. Again, cpu read rules - * concerning calls here are the same as for swiotlb_unmap_page() above. - */ -void -swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl, - int nelems, enum dma_data_direction dir, - unsigned long attrs) -{ - struct scatterlist *sg; - int i; - - BUG_ON(dir == DMA_NONE); - - for_each_sg(sgl, sg, nelems, i) - swiotlb_unmap_page(hwdev, sg->dma_address, sg_dma_len(sg), dir, - attrs); -} - -/* - * Make physical memory consistent for a set of streaming mode DMA translations - * after a transfer. - * - * The same as swiotlb_sync_single_* but for a scatter-gather list, same rules - * and usage. - */ -static void -swiotlb_sync_sg(struct device *hwdev, struct scatterlist *sgl, - int nelems, enum dma_data_direction dir, - enum dma_sync_target target) -{ - struct scatterlist *sg; - int i; - - for_each_sg(sgl, sg, nelems, i) - swiotlb_sync_single(hwdev, sg->dma_address, - sg_dma_len(sg), dir, target); -} - -void -swiotlb_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sg, - int nelems, enum dma_data_direction dir) -{ - swiotlb_sync_sg(hwdev, sg, nelems, dir, SYNC_FOR_CPU); -} - -void -swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg, - int nelems, enum dma_data_direction dir) -{ - swiotlb_sync_sg(hwdev, sg, nelems, dir, SYNC_FOR_DEVICE); + return true; } /* @@ -851,18 +660,3 @@ swiotlb_dma_supported(struct device *hwdev, u64 mask) { return __phys_to_dma(hwdev, io_tlb_end - 1) <= mask; } - -const struct dma_map_ops swiotlb_dma_ops = { - .alloc = dma_direct_alloc, - .free = dma_direct_free, - .sync_single_for_cpu = swiotlb_sync_single_for_cpu, - .sync_single_for_device = swiotlb_sync_single_for_device, - .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu, - .sync_sg_for_device = swiotlb_sync_sg_for_device, - .map_sg = swiotlb_map_sg_attrs, - .unmap_sg = swiotlb_unmap_sg_attrs, - .map_page = swiotlb_map_page, - .unmap_page = swiotlb_unmap_page, - .dma_supported = dma_direct_supported, -}; -EXPORT_SYMBOL(swiotlb_dma_ops); From 190d4e5916a2d70a11009022b968fca948fb5dc7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 6 Dec 2018 13:37:00 -0800 Subject: [PATCH 64/73] vmd: use the proper dma_* APIs instead of direct methods calls With the bypass support for the direct mapping we might not always have methods to call, so use the proper APIs instead. The only downside is that we will create two dma-debug entries for each mapping if CONFIG_DMA_DEBUG is enabled. Signed-off-by: Christoph Hellwig Acked-by: Jesper Dangaard Brouer Tested-by: Jesper Dangaard Brouer Tested-by: Tony Luck --- drivers/pci/controller/vmd.c | 42 +++++++++++++++--------------------- 1 file changed, 17 insertions(+), 25 deletions(-) diff --git a/drivers/pci/controller/vmd.c b/drivers/pci/controller/vmd.c index 98ce79eac128..3890812cdf87 100644 --- a/drivers/pci/controller/vmd.c +++ b/drivers/pci/controller/vmd.c @@ -307,39 +307,32 @@ static struct device *to_vmd_dev(struct device *dev) return &vmd->dev->dev; } -static const struct dma_map_ops *vmd_dma_ops(struct device *dev) -{ - return get_dma_ops(to_vmd_dev(dev)); -} - static void *vmd_alloc(struct device *dev, size_t size, dma_addr_t *addr, gfp_t flag, unsigned long attrs) { - return vmd_dma_ops(dev)->alloc(to_vmd_dev(dev), size, addr, flag, - attrs); + return dma_alloc_attrs(to_vmd_dev(dev), size, addr, flag, attrs); } static void vmd_free(struct device *dev, size_t size, void *vaddr, dma_addr_t addr, unsigned long attrs) { - return vmd_dma_ops(dev)->free(to_vmd_dev(dev), size, vaddr, addr, - attrs); + return dma_free_attrs(to_vmd_dev(dev), size, vaddr, addr, attrs); } static int vmd_mmap(struct device *dev, struct vm_area_struct *vma, void *cpu_addr, dma_addr_t addr, size_t size, unsigned long attrs) { - return vmd_dma_ops(dev)->mmap(to_vmd_dev(dev), vma, cpu_addr, addr, - size, attrs); + return dma_mmap_attrs(to_vmd_dev(dev), vma, cpu_addr, addr, size, + attrs); } static int vmd_get_sgtable(struct device *dev, struct sg_table *sgt, void *cpu_addr, dma_addr_t addr, size_t size, unsigned long attrs) { - return vmd_dma_ops(dev)->get_sgtable(to_vmd_dev(dev), sgt, cpu_addr, - addr, size, attrs); + return dma_get_sgtable_attrs(to_vmd_dev(dev), sgt, cpu_addr, addr, size, + attrs); } static dma_addr_t vmd_map_page(struct device *dev, struct page *page, @@ -347,61 +340,60 @@ static dma_addr_t vmd_map_page(struct device *dev, struct page *page, enum dma_data_direction dir, unsigned long attrs) { - return vmd_dma_ops(dev)->map_page(to_vmd_dev(dev), page, offset, size, - dir, attrs); + return dma_map_page_attrs(to_vmd_dev(dev), page, offset, size, dir, + attrs); } static void vmd_unmap_page(struct device *dev, dma_addr_t addr, size_t size, enum dma_data_direction dir, unsigned long attrs) { - vmd_dma_ops(dev)->unmap_page(to_vmd_dev(dev), addr, size, dir, attrs); + dma_unmap_page_attrs(to_vmd_dev(dev), addr, size, dir, attrs); } static int vmd_map_sg(struct device *dev, struct scatterlist *sg, int nents, enum dma_data_direction dir, unsigned long attrs) { - return vmd_dma_ops(dev)->map_sg(to_vmd_dev(dev), sg, nents, dir, attrs); + return dma_map_sg_attrs(to_vmd_dev(dev), sg, nents, dir, attrs); } static void vmd_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, enum dma_data_direction dir, unsigned long attrs) { - vmd_dma_ops(dev)->unmap_sg(to_vmd_dev(dev), sg, nents, dir, attrs); + dma_unmap_sg_attrs(to_vmd_dev(dev), sg, nents, dir, attrs); } static void vmd_sync_single_for_cpu(struct device *dev, dma_addr_t addr, size_t size, enum dma_data_direction dir) { - vmd_dma_ops(dev)->sync_single_for_cpu(to_vmd_dev(dev), addr, size, dir); + dma_sync_single_for_cpu(to_vmd_dev(dev), addr, size, dir); } static void vmd_sync_single_for_device(struct device *dev, dma_addr_t addr, size_t size, enum dma_data_direction dir) { - vmd_dma_ops(dev)->sync_single_for_device(to_vmd_dev(dev), addr, size, - dir); + dma_sync_single_for_device(to_vmd_dev(dev), addr, size, dir); } static void vmd_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nents, enum dma_data_direction dir) { - vmd_dma_ops(dev)->sync_sg_for_cpu(to_vmd_dev(dev), sg, nents, dir); + dma_sync_sg_for_cpu(to_vmd_dev(dev), sg, nents, dir); } static void vmd_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nents, enum dma_data_direction dir) { - vmd_dma_ops(dev)->sync_sg_for_device(to_vmd_dev(dev), sg, nents, dir); + dma_sync_sg_for_device(to_vmd_dev(dev), sg, nents, dir); } static int vmd_dma_supported(struct device *dev, u64 mask) { - return vmd_dma_ops(dev)->dma_supported(to_vmd_dev(dev), mask); + return dma_supported(to_vmd_dev(dev), mask); } static u64 vmd_get_required_mask(struct device *dev) { - return vmd_dma_ops(dev)->get_required_mask(to_vmd_dev(dev)); + return dma_get_required_mask(to_vmd_dev(dev)); } static void vmd_teardown_dma_ops(struct vmd_dev *vmd) From 356da6d0cde3323236977fce54c1f9612a742036 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 6 Dec 2018 13:39:32 -0800 Subject: [PATCH 65/73] dma-mapping: bypass indirect calls for dma-direct Avoid expensive indirect calls in the fast path DMA mapping operations by directly calling the dma_direct_* ops if we are using the directly mapped DMA operations. Signed-off-by: Christoph Hellwig Acked-by: Jesper Dangaard Brouer Tested-by: Jesper Dangaard Brouer Tested-by: Tony Luck --- arch/alpha/include/asm/dma-mapping.h | 2 +- arch/arc/mm/cache.c | 2 +- arch/arm/include/asm/dma-mapping.h | 2 +- arch/arm/mm/dma-mapping-nommu.c | 14 +--- arch/arm64/mm/dma-mapping.c | 3 - arch/ia64/hp/common/hwsw_iommu.c | 2 +- arch/ia64/hp/common/sba_iommu.c | 4 +- arch/ia64/kernel/dma-mapping.c | 1 - arch/mips/include/asm/dma-mapping.h | 2 +- arch/parisc/kernel/setup.c | 4 - arch/sparc/include/asm/dma-mapping.h | 4 +- arch/x86/kernel/pci-dma.c | 2 +- drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 2 +- drivers/iommu/amd_iommu.c | 13 +--- include/asm-generic/dma-mapping.h | 2 +- include/linux/dma-direct.h | 17 ---- include/linux/dma-mapping.h | 111 +++++++++++++++++++++++---- include/linux/dma-noncoherent.h | 5 +- kernel/dma/direct.c | 37 ++------- kernel/dma/mapping.c | 40 ++++++---- 20 files changed, 150 insertions(+), 119 deletions(-) diff --git a/arch/alpha/include/asm/dma-mapping.h b/arch/alpha/include/asm/dma-mapping.h index 8beeafd4f68e..0ee6a5c99b16 100644 --- a/arch/alpha/include/asm/dma-mapping.h +++ b/arch/alpha/include/asm/dma-mapping.h @@ -7,7 +7,7 @@ extern const struct dma_map_ops alpha_pci_ops; static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) { #ifdef CONFIG_ALPHA_JENSEN - return &dma_direct_ops; + return NULL; #else return &alpha_pci_ops; #endif diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c index f2701c13a66b..e188bb3ede53 100644 --- a/arch/arc/mm/cache.c +++ b/arch/arc/mm/cache.c @@ -1280,7 +1280,7 @@ void __init arc_cache_init_master(void) /* * In case of IOC (say IOC+SLC case), pointers above could still be set * but end up not being relevant as the first function in chain is not - * called at all for @dma_direct_ops + * called at all for devices using coherent DMA. * arch_sync_dma_for_cpu() -> dma_cache_*() -> __dma_cache_*() */ } diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h index 965b7c846ecb..31d3b96f0f4b 100644 --- a/arch/arm/include/asm/dma-mapping.h +++ b/arch/arm/include/asm/dma-mapping.h @@ -18,7 +18,7 @@ extern const struct dma_map_ops arm_coherent_dma_ops; static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) { - return IS_ENABLED(CONFIG_MMU) ? &arm_dma_ops : &dma_direct_ops; + return IS_ENABLED(CONFIG_MMU) ? &arm_dma_ops : NULL; } #ifdef __arch_page_to_dma diff --git a/arch/arm/mm/dma-mapping-nommu.c b/arch/arm/mm/dma-mapping-nommu.c index 712416ecd8e6..f304b10e23a4 100644 --- a/arch/arm/mm/dma-mapping-nommu.c +++ b/arch/arm/mm/dma-mapping-nommu.c @@ -22,7 +22,7 @@ #include "dma.h" /* - * dma_direct_ops is used if + * The generic direct mapping code is used if * - MMU/MPU is off * - cpu is v7m w/o cache support * - device is coherent @@ -209,16 +209,9 @@ const struct dma_map_ops arm_nommu_dma_ops = { }; EXPORT_SYMBOL(arm_nommu_dma_ops); -static const struct dma_map_ops *arm_nommu_get_dma_map_ops(bool coherent) -{ - return coherent ? &dma_direct_ops : &arm_nommu_dma_ops; -} - void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, const struct iommu_ops *iommu, bool coherent) { - const struct dma_map_ops *dma_ops; - if (IS_ENABLED(CONFIG_CPU_V7M)) { /* * Cache support for v7m is optional, so can be treated as @@ -234,7 +227,6 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, dev->archdata.dma_coherent = (get_cr() & CR_M) ? coherent : true; } - dma_ops = arm_nommu_get_dma_map_ops(dev->archdata.dma_coherent); - - set_dma_ops(dev, dma_ops); + if (!dev->archdata.dma_coherent) + set_dma_ops(dev, &arm_nommu_dma_ops); } diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index ab1e417204d0..95eda81e3f2d 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -462,9 +462,6 @@ static void __iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, const struct iommu_ops *iommu, bool coherent) { - if (!dev->dma_ops) - dev->dma_ops = &dma_direct_ops; - dev->dma_coherent = coherent; __iommu_setup_dma_ops(dev, dma_base, size, iommu); diff --git a/arch/ia64/hp/common/hwsw_iommu.c b/arch/ia64/hp/common/hwsw_iommu.c index f40ca499b246..8840ed97712f 100644 --- a/arch/ia64/hp/common/hwsw_iommu.c +++ b/arch/ia64/hp/common/hwsw_iommu.c @@ -38,7 +38,7 @@ static inline int use_swiotlb(struct device *dev) const struct dma_map_ops *hwsw_dma_get_ops(struct device *dev) { if (use_swiotlb(dev)) - return &dma_direct_ops; + return NULL; return &sba_dma_ops; } EXPORT_SYMBOL(hwsw_dma_get_ops); diff --git a/arch/ia64/hp/common/sba_iommu.c b/arch/ia64/hp/common/sba_iommu.c index 5ee74820a0f6..5a361e51cb1e 100644 --- a/arch/ia64/hp/common/sba_iommu.c +++ b/arch/ia64/hp/common/sba_iommu.c @@ -2078,7 +2078,7 @@ sba_init(void) * a successful kdump kernel boot is to use the swiotlb. */ if (is_kdump_kernel()) { - dma_ops = &dma_direct_ops; + dma_ops = NULL; if (swiotlb_late_init_with_default_size(64 * (1<<20)) != 0) panic("Unable to initialize software I/O TLB:" " Try machvec=dig boot option"); @@ -2100,7 +2100,7 @@ sba_init(void) * If we didn't find something sba_iommu can claim, we * need to setup the swiotlb and switch to the dig machvec. */ - dma_ops = &dma_direct_ops; + dma_ops = NULL; if (swiotlb_late_init_with_default_size(64 * (1<<20)) != 0) panic("Unable to find SBA IOMMU or initialize " "software I/O TLB: Try machvec=dig boot option"); diff --git a/arch/ia64/kernel/dma-mapping.c b/arch/ia64/kernel/dma-mapping.c index 80cd3e1ea95a..ad7d9963de34 100644 --- a/arch/ia64/kernel/dma-mapping.c +++ b/arch/ia64/kernel/dma-mapping.c @@ -36,7 +36,6 @@ long arch_dma_coherent_to_pfn(struct device *dev, void *cpu_addr, void __init swiotlb_dma_init(void) { - dma_ops = &dma_direct_ops; swiotlb_init(1); } #endif diff --git a/arch/mips/include/asm/dma-mapping.h b/arch/mips/include/asm/dma-mapping.h index 69f914667f3e..20dfaad3a55d 100644 --- a/arch/mips/include/asm/dma-mapping.h +++ b/arch/mips/include/asm/dma-mapping.h @@ -11,7 +11,7 @@ static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) #if defined(CONFIG_MACH_JAZZ) return &jazz_dma_ops; #else - return &dma_direct_ops; + return NULL; #endif } diff --git a/arch/parisc/kernel/setup.c b/arch/parisc/kernel/setup.c index cd227f1cf629..54818cd78bd0 100644 --- a/arch/parisc/kernel/setup.c +++ b/arch/parisc/kernel/setup.c @@ -99,10 +99,6 @@ void __init dma_ops_init(void) case pcxl2: pa7300lc_init(); - case pcxl: /* falls through */ - case pcxs: - case pcxt: - hppa_dma_ops = &dma_direct_ops; break; default: break; diff --git a/arch/sparc/include/asm/dma-mapping.h b/arch/sparc/include/asm/dma-mapping.h index 55a44f08a9a4..ed32845bd2d2 100644 --- a/arch/sparc/include/asm/dma-mapping.h +++ b/arch/sparc/include/asm/dma-mapping.h @@ -12,11 +12,11 @@ static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) { #ifdef CONFIG_SPARC_LEON if (sparc_cpu_model == sparc_leon) - return &dma_direct_ops; + return NULL; #endif #if defined(CONFIG_SPARC32) && defined(CONFIG_PCI) if (bus == &pci_bus_type) - return &dma_direct_ops; + return NULL; #endif return dma_ops; } diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index f4562fcec681..d460998ae828 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -17,7 +17,7 @@ static bool disable_dac_quirk __read_mostly; -const struct dma_map_ops *dma_ops = &dma_direct_ops; +const struct dma_map_ops *dma_ops; EXPORT_SYMBOL(dma_ops); #ifdef CONFIG_IOMMU_DEBUG diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index 61a84b958d67..50637f372e9f 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -581,7 +581,7 @@ static int vmw_dma_select_mode(struct vmw_private *dev_priv) dev_priv->map_mode = vmw_dma_map_populate; - if (dma_ops->sync_single_for_cpu) + if (dma_ops && dma_ops->sync_single_for_cpu) dev_priv->map_mode = vmw_dma_alloc_coherent; #ifdef CONFIG_SWIOTLB if (swiotlb_nr_tbl() == 0) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index c5d6c7c42b0a..567221cca13c 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -2184,7 +2184,7 @@ static int amd_iommu_add_device(struct device *dev) dev_name(dev)); iommu_ignore_device(dev); - dev->dma_ops = &dma_direct_ops; + dev->dma_ops = NULL; goto out; } init_iommu_group(dev); @@ -2770,17 +2770,6 @@ int __init amd_iommu_init_dma_ops(void) swiotlb = (iommu_pass_through || sme_me_mask) ? 1 : 0; iommu_detected = 1; - /* - * In case we don't initialize SWIOTLB (actually the common case - * when AMD IOMMU is enabled and SME is not active), make sure there - * are global dma_ops set as a fall-back for devices not handled by - * this driver (for example non-PCI devices). When SME is active, - * make sure that swiotlb variable remains set so the global dma_ops - * continue to be SWIOTLB. - */ - if (!swiotlb) - dma_ops = &dma_direct_ops; - if (amd_iommu_unmap_flush) pr_info("AMD-Vi: IO/TLB flush on unmap enabled\n"); else diff --git a/include/asm-generic/dma-mapping.h b/include/asm-generic/dma-mapping.h index 880a292d792f..c13f46109e88 100644 --- a/include/asm-generic/dma-mapping.h +++ b/include/asm-generic/dma-mapping.h @@ -4,7 +4,7 @@ static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) { - return &dma_direct_ops; + return NULL; } #endif /* _ASM_GENERIC_DMA_MAPPING_H */ diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h index 3b0a3ea3876d..b7338702592a 100644 --- a/include/linux/dma-direct.h +++ b/include/linux/dma-direct.h @@ -60,22 +60,5 @@ void dma_direct_free_pages(struct device *dev, size_t size, void *cpu_addr, struct page *__dma_direct_alloc_pages(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs); void __dma_direct_free_pages(struct device *dev, size_t size, struct page *page); -dma_addr_t dma_direct_map_page(struct device *dev, struct page *page, - unsigned long offset, size_t size, enum dma_data_direction dir, - unsigned long attrs); -void dma_direct_unmap_page(struct device *dev, dma_addr_t addr, - size_t size, enum dma_data_direction dir, unsigned long attrs); -int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents, - enum dma_data_direction dir, unsigned long attrs); -void dma_direct_unmap_sg(struct device *dev, struct scatterlist *sgl, - int nents, enum dma_data_direction dir, unsigned long attrs); -void dma_direct_sync_single_for_device(struct device *dev, - dma_addr_t addr, size_t size, enum dma_data_direction dir); -void dma_direct_sync_sg_for_device(struct device *dev, - struct scatterlist *sgl, int nents, enum dma_data_direction dir); -void dma_direct_sync_single_for_cpu(struct device *dev, - dma_addr_t addr, size_t size, enum dma_data_direction dir); -void dma_direct_sync_sg_for_cpu(struct device *dev, - struct scatterlist *sgl, int nents, enum dma_data_direction dir); int dma_direct_supported(struct device *dev, u64 mask); #endif /* _LINUX_DMA_DIRECT_H */ diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 269ee27fc3d9..f422aec0f53c 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -134,7 +134,6 @@ struct dma_map_ops { #define DMA_MAPPING_ERROR (~(dma_addr_t)0) -extern const struct dma_map_ops dma_direct_ops; extern const struct dma_map_ops dma_virt_ops; extern const struct dma_map_ops dma_dummy_ops; @@ -222,6 +221,69 @@ static inline const struct dma_map_ops *get_dma_ops(struct device *dev) } #endif +static inline bool dma_is_direct(const struct dma_map_ops *ops) +{ + return likely(!ops); +} + +/* + * All the dma_direct_* declarations are here just for the indirect call bypass, + * and must not be used directly drivers! + */ +dma_addr_t dma_direct_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, enum dma_data_direction dir, + unsigned long attrs); +int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents, + enum dma_data_direction dir, unsigned long attrs); + +#if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) || \ + defined(CONFIG_SWIOTLB) +void dma_direct_sync_single_for_device(struct device *dev, + dma_addr_t addr, size_t size, enum dma_data_direction dir); +void dma_direct_sync_sg_for_device(struct device *dev, + struct scatterlist *sgl, int nents, enum dma_data_direction dir); +#else +static inline void dma_direct_sync_single_for_device(struct device *dev, + dma_addr_t addr, size_t size, enum dma_data_direction dir) +{ +} +static inline void dma_direct_sync_sg_for_device(struct device *dev, + struct scatterlist *sgl, int nents, enum dma_data_direction dir) +{ +} +#endif + +#if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) || \ + defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL) || \ + defined(CONFIG_SWIOTLB) +void dma_direct_unmap_page(struct device *dev, dma_addr_t addr, + size_t size, enum dma_data_direction dir, unsigned long attrs); +void dma_direct_unmap_sg(struct device *dev, struct scatterlist *sgl, + int nents, enum dma_data_direction dir, unsigned long attrs); +void dma_direct_sync_single_for_cpu(struct device *dev, + dma_addr_t addr, size_t size, enum dma_data_direction dir); +void dma_direct_sync_sg_for_cpu(struct device *dev, + struct scatterlist *sgl, int nents, enum dma_data_direction dir); +#else +static inline void dma_direct_unmap_page(struct device *dev, dma_addr_t addr, + size_t size, enum dma_data_direction dir, unsigned long attrs) +{ +} +static inline void dma_direct_unmap_sg(struct device *dev, + struct scatterlist *sgl, int nents, enum dma_data_direction dir, + unsigned long attrs) +{ +} +static inline void dma_direct_sync_single_for_cpu(struct device *dev, + dma_addr_t addr, size_t size, enum dma_data_direction dir) +{ +} +static inline void dma_direct_sync_sg_for_cpu(struct device *dev, + struct scatterlist *sgl, int nents, enum dma_data_direction dir) +{ +} +#endif + static inline dma_addr_t dma_map_single_attrs(struct device *dev, void *ptr, size_t size, enum dma_data_direction dir, @@ -232,9 +294,12 @@ static inline dma_addr_t dma_map_single_attrs(struct device *dev, void *ptr, BUG_ON(!valid_dma_direction(dir)); debug_dma_map_single(dev, ptr, size); - addr = ops->map_page(dev, virt_to_page(ptr), - offset_in_page(ptr), size, - dir, attrs); + if (dma_is_direct(ops)) + addr = dma_direct_map_page(dev, virt_to_page(ptr), + offset_in_page(ptr), size, dir, attrs); + else + addr = ops->map_page(dev, virt_to_page(ptr), + offset_in_page(ptr), size, dir, attrs); debug_dma_map_page(dev, virt_to_page(ptr), offset_in_page(ptr), size, dir, addr, true); @@ -249,7 +314,9 @@ static inline void dma_unmap_single_attrs(struct device *dev, dma_addr_t addr, const struct dma_map_ops *ops = get_dma_ops(dev); BUG_ON(!valid_dma_direction(dir)); - if (ops->unmap_page) + if (dma_is_direct(ops)) + dma_direct_unmap_page(dev, addr, size, dir, attrs); + else if (ops->unmap_page) ops->unmap_page(dev, addr, size, dir, attrs); debug_dma_unmap_page(dev, addr, size, dir, true); } @@ -272,7 +339,10 @@ static inline int dma_map_sg_attrs(struct device *dev, struct scatterlist *sg, int ents; BUG_ON(!valid_dma_direction(dir)); - ents = ops->map_sg(dev, sg, nents, dir, attrs); + if (dma_is_direct(ops)) + ents = dma_direct_map_sg(dev, sg, nents, dir, attrs); + else + ents = ops->map_sg(dev, sg, nents, dir, attrs); BUG_ON(ents < 0); debug_dma_map_sg(dev, sg, nents, ents, dir); @@ -287,7 +357,9 @@ static inline void dma_unmap_sg_attrs(struct device *dev, struct scatterlist *sg BUG_ON(!valid_dma_direction(dir)); debug_dma_unmap_sg(dev, sg, nents, dir); - if (ops->unmap_sg) + if (dma_is_direct(ops)) + dma_direct_unmap_sg(dev, sg, nents, dir, attrs); + else if (ops->unmap_sg) ops->unmap_sg(dev, sg, nents, dir, attrs); } @@ -301,7 +373,10 @@ static inline dma_addr_t dma_map_page_attrs(struct device *dev, dma_addr_t addr; BUG_ON(!valid_dma_direction(dir)); - addr = ops->map_page(dev, page, offset, size, dir, attrs); + if (dma_is_direct(ops)) + addr = dma_direct_map_page(dev, page, offset, size, dir, attrs); + else + addr = ops->map_page(dev, page, offset, size, dir, attrs); debug_dma_map_page(dev, page, offset, size, dir, addr, false); return addr; @@ -322,7 +397,7 @@ static inline dma_addr_t dma_map_resource(struct device *dev, BUG_ON(pfn_valid(PHYS_PFN(phys_addr))); addr = phys_addr; - if (ops->map_resource) + if (ops && ops->map_resource) addr = ops->map_resource(dev, phys_addr, size, dir, attrs); debug_dma_map_resource(dev, phys_addr, size, dir, addr); @@ -337,7 +412,7 @@ static inline void dma_unmap_resource(struct device *dev, dma_addr_t addr, const struct dma_map_ops *ops = get_dma_ops(dev); BUG_ON(!valid_dma_direction(dir)); - if (ops->unmap_resource) + if (ops && ops->unmap_resource) ops->unmap_resource(dev, addr, size, dir, attrs); debug_dma_unmap_resource(dev, addr, size, dir); } @@ -349,7 +424,9 @@ static inline void dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, const struct dma_map_ops *ops = get_dma_ops(dev); BUG_ON(!valid_dma_direction(dir)); - if (ops->sync_single_for_cpu) + if (dma_is_direct(ops)) + dma_direct_sync_single_for_cpu(dev, addr, size, dir); + else if (ops->sync_single_for_cpu) ops->sync_single_for_cpu(dev, addr, size, dir); debug_dma_sync_single_for_cpu(dev, addr, size, dir); } @@ -368,7 +445,9 @@ static inline void dma_sync_single_for_device(struct device *dev, const struct dma_map_ops *ops = get_dma_ops(dev); BUG_ON(!valid_dma_direction(dir)); - if (ops->sync_single_for_device) + if (dma_is_direct(ops)) + dma_direct_sync_single_for_device(dev, addr, size, dir); + else if (ops->sync_single_for_device) ops->sync_single_for_device(dev, addr, size, dir); debug_dma_sync_single_for_device(dev, addr, size, dir); } @@ -387,7 +466,9 @@ dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, const struct dma_map_ops *ops = get_dma_ops(dev); BUG_ON(!valid_dma_direction(dir)); - if (ops->sync_sg_for_cpu) + if (dma_is_direct(ops)) + dma_direct_sync_sg_for_cpu(dev, sg, nelems, dir); + else if (ops->sync_sg_for_cpu) ops->sync_sg_for_cpu(dev, sg, nelems, dir); debug_dma_sync_sg_for_cpu(dev, sg, nelems, dir); } @@ -399,7 +480,9 @@ dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, const struct dma_map_ops *ops = get_dma_ops(dev); BUG_ON(!valid_dma_direction(dir)); - if (ops->sync_sg_for_device) + if (dma_is_direct(ops)) + dma_direct_sync_sg_for_device(dev, sg, nelems, dir); + else if (ops->sync_sg_for_device) ops->sync_sg_for_device(dev, sg, nelems, dir); debug_dma_sync_sg_for_device(dev, sg, nelems, dir); diff --git a/include/linux/dma-noncoherent.h b/include/linux/dma-noncoherent.h index 306557331d7d..69b36ed31a99 100644 --- a/include/linux/dma-noncoherent.h +++ b/include/linux/dma-noncoherent.h @@ -38,7 +38,10 @@ pgprot_t arch_dma_mmap_pgprot(struct device *dev, pgprot_t prot, void arch_dma_cache_sync(struct device *dev, void *vaddr, size_t size, enum dma_data_direction direction); #else -#define arch_dma_cache_sync NULL +static inline void arch_dma_cache_sync(struct device *dev, void *vaddr, + size_t size, enum dma_data_direction direction) +{ +} #endif /* CONFIG_DMA_NONCOHERENT_CACHE_SYNC */ #ifdef CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c index 85d8286a0ba2..79da61b49fa4 100644 --- a/kernel/dma/direct.c +++ b/kernel/dma/direct.c @@ -223,6 +223,7 @@ void dma_direct_sync_single_for_device(struct device *dev, if (!dev_is_dma_coherent(dev)) arch_sync_dma_for_device(dev, paddr, size, dir); } +EXPORT_SYMBOL(dma_direct_sync_single_for_device); void dma_direct_sync_sg_for_device(struct device *dev, struct scatterlist *sgl, int nents, enum dma_data_direction dir) @@ -240,6 +241,7 @@ void dma_direct_sync_sg_for_device(struct device *dev, dir); } } +EXPORT_SYMBOL(dma_direct_sync_sg_for_device); #endif #if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) || \ @@ -258,6 +260,7 @@ void dma_direct_sync_single_for_cpu(struct device *dev, if (unlikely(is_swiotlb_buffer(paddr))) swiotlb_tbl_sync_single(dev, paddr, size, dir, SYNC_FOR_CPU); } +EXPORT_SYMBOL(dma_direct_sync_single_for_cpu); void dma_direct_sync_sg_for_cpu(struct device *dev, struct scatterlist *sgl, int nents, enum dma_data_direction dir) @@ -277,6 +280,7 @@ void dma_direct_sync_sg_for_cpu(struct device *dev, if (!dev_is_dma_coherent(dev)) arch_sync_dma_for_cpu_all(dev); } +EXPORT_SYMBOL(dma_direct_sync_sg_for_cpu); void dma_direct_unmap_page(struct device *dev, dma_addr_t addr, size_t size, enum dma_data_direction dir, unsigned long attrs) @@ -289,6 +293,7 @@ void dma_direct_unmap_page(struct device *dev, dma_addr_t addr, if (unlikely(is_swiotlb_buffer(phys))) swiotlb_tbl_unmap_single(dev, phys, size, dir, attrs); } +EXPORT_SYMBOL(dma_direct_unmap_page); void dma_direct_unmap_sg(struct device *dev, struct scatterlist *sgl, int nents, enum dma_data_direction dir, unsigned long attrs) @@ -300,11 +305,7 @@ void dma_direct_unmap_sg(struct device *dev, struct scatterlist *sgl, dma_direct_unmap_page(dev, sg->dma_address, sg_dma_len(sg), dir, attrs); } -#else -void dma_direct_unmap_sg(struct device *dev, struct scatterlist *sgl, - int nents, enum dma_data_direction dir, unsigned long attrs) -{ -} +EXPORT_SYMBOL(dma_direct_unmap_sg); #endif static inline bool dma_direct_possible(struct device *dev, dma_addr_t dma_addr, @@ -331,6 +332,7 @@ dma_addr_t dma_direct_map_page(struct device *dev, struct page *page, arch_sync_dma_for_device(dev, phys, size, dir); return dma_addr; } +EXPORT_SYMBOL(dma_direct_map_page); int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents, enum dma_data_direction dir, unsigned long attrs) @@ -352,6 +354,7 @@ out_unmap: dma_direct_unmap_sg(dev, sgl, i, dir, attrs | DMA_ATTR_SKIP_CPU_SYNC); return 0; } +EXPORT_SYMBOL(dma_direct_map_sg); /* * Because 32-bit DMA masks are so common we expect every architecture to be @@ -372,27 +375,3 @@ int dma_direct_supported(struct device *dev, u64 mask) return mask >= phys_to_dma(dev, min_mask); } - -const struct dma_map_ops dma_direct_ops = { - .alloc = dma_direct_alloc, - .free = dma_direct_free, - .map_page = dma_direct_map_page, - .map_sg = dma_direct_map_sg, -#if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) || \ - defined(CONFIG_SWIOTLB) - .sync_single_for_device = dma_direct_sync_single_for_device, - .sync_sg_for_device = dma_direct_sync_sg_for_device, -#endif -#if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) || \ - defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL) || \ - defined(CONFIG_SWIOTLB) - .sync_single_for_cpu = dma_direct_sync_single_for_cpu, - .sync_sg_for_cpu = dma_direct_sync_sg_for_cpu, - .unmap_page = dma_direct_unmap_page, - .unmap_sg = dma_direct_unmap_sg, -#endif - .get_required_mask = dma_direct_get_required_mask, - .dma_supported = dma_direct_supported, - .cache_sync = arch_dma_cache_sync, -}; -EXPORT_SYMBOL(dma_direct_ops); diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c index 0b18cfbdde95..fc84c81029d9 100644 --- a/kernel/dma/mapping.c +++ b/kernel/dma/mapping.c @@ -7,6 +7,7 @@ */ #include /* for max_pfn */ #include +#include #include #include #include @@ -229,8 +230,8 @@ int dma_get_sgtable_attrs(struct device *dev, struct sg_table *sgt, unsigned long attrs) { const struct dma_map_ops *ops = get_dma_ops(dev); - BUG_ON(!ops); - if (ops->get_sgtable) + + if (!dma_is_direct(ops) && ops->get_sgtable) return ops->get_sgtable(dev, sgt, cpu_addr, dma_addr, size, attrs); return dma_common_get_sgtable(dev, sgt, cpu_addr, dma_addr, size, @@ -293,8 +294,8 @@ int dma_mmap_attrs(struct device *dev, struct vm_area_struct *vma, unsigned long attrs) { const struct dma_map_ops *ops = get_dma_ops(dev); - BUG_ON(!ops); - if (ops->mmap) + + if (!dma_is_direct(ops) && ops->mmap) return ops->mmap(dev, vma, cpu_addr, dma_addr, size, attrs); return dma_common_mmap(dev, vma, cpu_addr, dma_addr, size, attrs); } @@ -324,6 +325,8 @@ u64 dma_get_required_mask(struct device *dev) { const struct dma_map_ops *ops = get_dma_ops(dev); + if (dma_is_direct(ops)) + return dma_direct_get_required_mask(dev); if (ops->get_required_mask) return ops->get_required_mask(dev); return dma_default_get_required_mask(dev); @@ -341,7 +344,6 @@ void *dma_alloc_attrs(struct device *dev, size_t size, dma_addr_t *dma_handle, const struct dma_map_ops *ops = get_dma_ops(dev); void *cpu_addr; - BUG_ON(!ops); WARN_ON_ONCE(dev && !dev->coherent_dma_mask); if (dma_alloc_from_dev_coherent(dev, size, dma_handle, &cpu_addr)) @@ -352,10 +354,14 @@ void *dma_alloc_attrs(struct device *dev, size_t size, dma_addr_t *dma_handle, if (!arch_dma_alloc_attrs(&dev)) return NULL; - if (!ops->alloc) + + if (dma_is_direct(ops)) + cpu_addr = dma_direct_alloc(dev, size, dma_handle, flag, attrs); + else if (ops->alloc) + cpu_addr = ops->alloc(dev, size, dma_handle, flag, attrs); + else return NULL; - cpu_addr = ops->alloc(dev, size, dma_handle, flag, attrs); debug_dma_alloc_coherent(dev, size, *dma_handle, cpu_addr); return cpu_addr; } @@ -366,8 +372,6 @@ void dma_free_attrs(struct device *dev, size_t size, void *cpu_addr, { const struct dma_map_ops *ops = get_dma_ops(dev); - BUG_ON(!ops); - if (dma_release_from_dev_coherent(dev, get_order(size), cpu_addr)) return; /* @@ -379,11 +383,14 @@ void dma_free_attrs(struct device *dev, size_t size, void *cpu_addr, */ WARN_ON(irqs_disabled()); - if (!ops->free || !cpu_addr) + if (!cpu_addr) return; debug_dma_free_coherent(dev, size, cpu_addr, dma_handle); - ops->free(dev, size, cpu_addr, dma_handle, attrs); + if (dma_is_direct(ops)) + dma_direct_free(dev, size, cpu_addr, dma_handle, attrs); + else if (ops->free) + ops->free(dev, size, cpu_addr, dma_handle, attrs); } EXPORT_SYMBOL(dma_free_attrs); @@ -397,9 +404,9 @@ int dma_supported(struct device *dev, u64 mask) { const struct dma_map_ops *ops = get_dma_ops(dev); - if (!ops) - return 0; - if (!ops->dma_supported) + if (dma_is_direct(ops)) + return dma_direct_supported(dev, mask); + if (ops->dma_supported) return 1; return ops->dma_supported(dev, mask); } @@ -437,7 +444,10 @@ void dma_cache_sync(struct device *dev, void *vaddr, size_t size, const struct dma_map_ops *ops = get_dma_ops(dev); BUG_ON(!valid_dma_direction(dir)); - if (ops->cache_sync) + + if (dma_is_direct(ops)) + arch_dma_cache_sync(dev, vaddr, size, dir); + else if (ops->cache_sync) ops->cache_sync(dev, vaddr, size, dir); } EXPORT_SYMBOL(dma_cache_sync); From 8ee94e3fc54d989897969d7ca8deacfe7850855c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 15 Dec 2018 11:01:25 +0100 Subject: [PATCH 66/73] ia64: only select ARCH_HAS_DMA_COHERENT_TO_PFN if swiotlb is enabled Otherwise we get a build failure due in swiotlb-less configs with non-generic kernels. Signed-off-by: Christoph Hellwig --- arch/ia64/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index c587e3316c38..cbf6c67c7166 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -28,7 +28,7 @@ config IA64 select HAVE_ARCH_TRACEHOOK select HAVE_MEMBLOCK_NODE_MAP select HAVE_VIRT_CPU_ACCOUNTING - select ARCH_HAS_DMA_COHERENT_TO_PFN + select ARCH_HAS_DMA_COHERENT_TO_PFN if SWIOTLB select ARCH_HAS_SYNC_DMA_FOR_CPU select VIRT_TO_BUS select ARCH_DISCARD_MEMBLOCK From 664204410afb3b0f538d176ad0c2713b04abd4b9 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Fri, 14 Dec 2018 18:49:01 -0700 Subject: [PATCH 67/73] PCI: Remove unused attr variable in pci_dma_configure Clang warns: drivers/pci/pci-driver.c:1603:21: error: unused variable 'attr' [-Werror,-Wunused-variable] Commit e5361ca29f2f ("ACPI / scan: Refactor _CCA enforcement") removed attr's use and replaced it with its assigned value so it is no longer needed. Signed-off-by: Nathan Chancellor Signed-off-by: Christoph Hellwig --- drivers/pci/pci-driver.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index 1b58e058b13f..ea55444e6ead 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -1600,7 +1600,6 @@ static int pci_dma_configure(struct device *dev) ret = of_dma_configure(dev, bridge->parent->of_node, true); } else if (has_acpi_companion(bridge)) { struct acpi_device *adev = to_acpi_device_node(bridge->fwnode); - enum dev_dma_attr attr = acpi_get_dma_attr(adev); ret = acpi_dma_configure(dev, acpi_get_dma_attr(adev)); } From 9ab91e7c5c514b675470d9b57393cbe455722060 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 14 Dec 2018 15:18:08 +0100 Subject: [PATCH 68/73] arm64: default to the direct mapping in get_arch_dma_ops Otherwise the direct mapping won't work at all given that a NULL dev->dma_ops causes a fallback. Note that we already explicitly set dev->dma_ops to dma_dummy_ops for dma-incapable devices, so this fallback should not be needed anyway. Fixes: 356da6d0cd ("dma-mapping: bypass indirect calls for dma-direct") Signed-off-by: Christoph Hellwig Reported-by: Marek Szyprowski Tested-by: Marek Szyprowski Reviewed-by: Robin Murphy --- arch/arm64/include/asm/dma-mapping.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/arch/arm64/include/asm/dma-mapping.h b/arch/arm64/include/asm/dma-mapping.h index 273e778f7de2..95dbf3ef735a 100644 --- a/arch/arm64/include/asm/dma-mapping.h +++ b/arch/arm64/include/asm/dma-mapping.h @@ -26,11 +26,7 @@ static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) { - /* - * We expect no ISA devices, and all other DMA masters are expected to - * have someone call arch_setup_dma_ops at device creation time. - */ - return &dma_dummy_ops; + return NULL; } void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, From 1e2934ad7f93476e92039d4b8914d9f71789448f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 16 Dec 2018 10:20:04 +0100 Subject: [PATCH 69/73] sparc/io-unit: fix ->map_sg return value Just decrementing the sz value will lead to an incorrect return value. Instead of just introducing a local variable switch to the standard for_each_sg helper and standard naming of the arguments. Fixes: ce65d36f3e ("sparc: remove the sparc32_dma_ops indirection") Reported-by: Guenter Roeck Signed-off-by: Christoph Hellwig Acked-by: Sam Ravnborg --- arch/sparc/mm/io-unit.c | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/arch/sparc/mm/io-unit.c b/arch/sparc/mm/io-unit.c index 2088d292c6e5..91be13935d40 100644 --- a/arch/sparc/mm/io-unit.c +++ b/arch/sparc/mm/io-unit.c @@ -158,22 +158,22 @@ static dma_addr_t iounit_map_page(struct device *dev, struct page *page, return ret; } -static int iounit_map_sg(struct device *dev, struct scatterlist *sg, int sz, +static int iounit_map_sg(struct device *dev, struct scatterlist *sgl, int nents, enum dma_data_direction dir, unsigned long attrs) { struct iounit_struct *iounit = dev->archdata.iommu; + struct scatterlist *sg; unsigned long flags; + int i; /* FIXME: Cache some resolved pages - often several sg entries are to the same page */ spin_lock_irqsave(&iounit->lock, flags); - while (sz != 0) { - --sz; + for_each_sg(sgl, sg, nents, i) { sg->dma_address = iounit_get_area(iounit, (unsigned long) sg_virt(sg), sg->length); sg->dma_length = sg->length; - sg = sg_next(sg); } spin_unlock_irqrestore(&iounit->lock, flags); - return sz; + return nents; } static void iounit_unmap_page(struct device *dev, dma_addr_t vaddr, size_t len, @@ -191,22 +191,21 @@ static void iounit_unmap_page(struct device *dev, dma_addr_t vaddr, size_t len, spin_unlock_irqrestore(&iounit->lock, flags); } -static void iounit_unmap_sg(struct device *dev, struct scatterlist *sg, int sz, - enum dma_data_direction dir, unsigned long attrs) +static void iounit_unmap_sg(struct device *dev, struct scatterlist *sgl, + int nents, enum dma_data_direction dir, unsigned long attrs) { struct iounit_struct *iounit = dev->archdata.iommu; - unsigned long flags; - unsigned long vaddr, len; + unsigned long flags, vaddr, len; + struct scatterlist *sg; + int i; spin_lock_irqsave(&iounit->lock, flags); - while (sz != 0) { - --sz; + for_each_sg(sgl, sg, nents, i) { len = ((sg->dma_address & ~PAGE_MASK) + sg->length + (PAGE_SIZE-1)) >> PAGE_SHIFT; vaddr = (sg->dma_address - IOUNIT_DMA_BASE) >> PAGE_SHIFT; IOD(("iounit_release %08lx-%08lx\n", (long)vaddr, (long)len+vaddr)); for (len += vaddr; vaddr < len; vaddr++) clear_bit(vaddr, iounit->bmap); - sg = sg_next(sg); } spin_unlock_irqrestore(&iounit->lock, flags); } From 6c503d0d88db9d57c1dc4c87175c94766b6a6c61 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 16 Dec 2018 10:23:28 +0100 Subject: [PATCH 70/73] sparc/iommu: fix ->map_sg return value Just decrementing the sz value will lead to an incorrect return value. Instead of just introducing a local variable switch to the standard for_each_sg helper and standard naming of the arguments. Fixes: ce65d36f3e ("sparc: remove the sparc32_dma_ops indirection") Reported-by: Guenter Roeck Signed-off-by: Christoph Hellwig Acked-by: Sam Ravnborg --- arch/sparc/mm/iommu.c | 39 ++++++++++++++++++--------------------- 1 file changed, 18 insertions(+), 21 deletions(-) diff --git a/arch/sparc/mm/iommu.c b/arch/sparc/mm/iommu.c index 3599485717e7..fb771a634452 100644 --- a/arch/sparc/mm/iommu.c +++ b/arch/sparc/mm/iommu.c @@ -241,32 +241,31 @@ static dma_addr_t sbus_iommu_map_page_pflush(struct device *dev, return __sbus_iommu_map_page(dev, page, offset, len); } -static int sbus_iommu_map_sg_gflush(struct device *dev, struct scatterlist *sg, - int sz, enum dma_data_direction dir, unsigned long attrs) +static int sbus_iommu_map_sg_gflush(struct device *dev, struct scatterlist *sgl, + int nents, enum dma_data_direction dir, unsigned long attrs) { - int n; + struct scatterlist *sg; + int i, n; flush_page_for_dma(0); - while (sz != 0) { - --sz; + + for_each_sg(sgl, sg, nents, i) { n = (sg->length + sg->offset + PAGE_SIZE-1) >> PAGE_SHIFT; sg->dma_address = iommu_get_one(dev, sg_page(sg), n) + sg->offset; sg->dma_length = sg->length; - sg = sg_next(sg); } - return sz; + return nents; } -static int sbus_iommu_map_sg_pflush(struct device *dev, struct scatterlist *sg, - int sz, enum dma_data_direction dir, unsigned long attrs) +static int sbus_iommu_map_sg_pflush(struct device *dev, struct scatterlist *sgl, + int nents, enum dma_data_direction dir, unsigned long attrs) { unsigned long page, oldpage = 0; - int n, i; - - while(sz != 0) { - --sz; + struct scatterlist *sg; + int i, j, n; + for_each_sg(sgl, sg, nents, j) { n = (sg->length + sg->offset + PAGE_SIZE-1) >> PAGE_SHIFT; /* @@ -286,10 +285,9 @@ static int sbus_iommu_map_sg_pflush(struct device *dev, struct scatterlist *sg, sg->dma_address = iommu_get_one(dev, sg_page(sg), n) + sg->offset; sg->dma_length = sg->length; - sg = sg_next(sg); } - return sz; + return nents; } static void iommu_release_one(struct device *dev, u32 busa, int npages) @@ -318,17 +316,16 @@ static void sbus_iommu_unmap_page(struct device *dev, dma_addr_t dma_addr, iommu_release_one(dev, dma_addr & PAGE_MASK, npages); } -static void sbus_iommu_unmap_sg(struct device *dev, struct scatterlist *sg, - int sz, enum dma_data_direction dir, unsigned long attrs) +static void sbus_iommu_unmap_sg(struct device *dev, struct scatterlist *sgl, + int nents, enum dma_data_direction dir, unsigned long attrs) { - int n; + struct scatterlist *sg; + int i, n; - while(sz != 0) { - --sz; + for_each_sg(sgl, sg, nents, i) { n = (sg->length + sg->offset + PAGE_SIZE-1) >> PAGE_SHIFT; iommu_release_one(dev, sg->dma_address & PAGE_MASK, n); sg->dma_address = 0x21212121; - sg = sg_next(sg); } } From 518a2f1925c3165befbf06b75e07636549d92c1c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 14 Dec 2018 09:00:40 +0100 Subject: [PATCH 71/73] dma-mapping: zero memory returned from dma_alloc_* If we want to map memory from the DMA allocator to userspace it must be zeroed at allocation time to prevent stale data leaks. We already do this on most common architectures, but some architectures don't do this yet, fix them up, either by passing GFP_ZERO when we use the normal page allocator or doing a manual memset otherwise. Signed-off-by: Christoph Hellwig Acked-by: Geert Uytterhoeven [m68k] Acked-by: Sam Ravnborg [sparc] --- arch/alpha/kernel/pci_iommu.c | 2 +- arch/arc/mm/dma.c | 2 +- arch/c6x/mm/dma-coherent.c | 5 ++++- arch/m68k/kernel/dma.c | 2 +- arch/microblaze/mm/consistent.c | 2 +- arch/openrisc/kernel/dma.c | 2 +- arch/parisc/kernel/pci-dma.c | 4 ++-- arch/s390/pci/pci_dma.c | 2 +- arch/sparc/kernel/ioport.c | 2 +- arch/sparc/mm/io-unit.c | 2 +- arch/sparc/mm/iommu.c | 2 +- arch/xtensa/kernel/pci-dma.c | 2 +- drivers/misc/mic/host/mic_boot.c | 2 +- kernel/dma/virt.c | 2 +- 14 files changed, 18 insertions(+), 15 deletions(-) diff --git a/arch/alpha/kernel/pci_iommu.c b/arch/alpha/kernel/pci_iommu.c index e1716e0d92fd..aa0f50d0f823 100644 --- a/arch/alpha/kernel/pci_iommu.c +++ b/arch/alpha/kernel/pci_iommu.c @@ -443,7 +443,7 @@ static void *alpha_pci_alloc_coherent(struct device *dev, size_t size, gfp &= ~GFP_DMA; try_again: - cpu_addr = (void *)__get_free_pages(gfp, order); + cpu_addr = (void *)__get_free_pages(gfp | __GFP_ZERO, order); if (! cpu_addr) { printk(KERN_INFO "pci_alloc_consistent: " "get_free_pages failed from %pf\n", diff --git a/arch/arc/mm/dma.c b/arch/arc/mm/dma.c index db203ff69ccf..1525ac00fd02 100644 --- a/arch/arc/mm/dma.c +++ b/arch/arc/mm/dma.c @@ -33,7 +33,7 @@ void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, */ BUG_ON(gfp & __GFP_HIGHMEM); - page = alloc_pages(gfp, order); + page = alloc_pages(gfp | __GFP_ZERO, order); if (!page) return NULL; diff --git a/arch/c6x/mm/dma-coherent.c b/arch/c6x/mm/dma-coherent.c index 01305c787201..75b79571732c 100644 --- a/arch/c6x/mm/dma-coherent.c +++ b/arch/c6x/mm/dma-coherent.c @@ -78,6 +78,7 @@ static void __free_dma_pages(u32 addr, int order) void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp, unsigned long attrs) { + void *ret; u32 paddr; int order; @@ -94,7 +95,9 @@ void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, if (!paddr) return NULL; - return phys_to_virt(paddr); + ret = phys_to_virt(paddr); + memset(ret, 0, 1 << order); + return ret; } /* diff --git a/arch/m68k/kernel/dma.c b/arch/m68k/kernel/dma.c index e99993c57d6b..b4aa853051bd 100644 --- a/arch/m68k/kernel/dma.c +++ b/arch/m68k/kernel/dma.c @@ -32,7 +32,7 @@ void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, size = PAGE_ALIGN(size); order = get_order(size); - page = alloc_pages(flag, order); + page = alloc_pages(flag | __GFP_ZERO, order); if (!page) return NULL; diff --git a/arch/microblaze/mm/consistent.c b/arch/microblaze/mm/consistent.c index 45e0a1aa9357..3002cbca3059 100644 --- a/arch/microblaze/mm/consistent.c +++ b/arch/microblaze/mm/consistent.c @@ -81,7 +81,7 @@ void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, size = PAGE_ALIGN(size); order = get_order(size); - vaddr = __get_free_pages(gfp, order); + vaddr = __get_free_pages(gfp | __GFP_ZERO, order); if (!vaddr) return NULL; diff --git a/arch/openrisc/kernel/dma.c b/arch/openrisc/kernel/dma.c index 159336adfa2f..f79457cb3741 100644 --- a/arch/openrisc/kernel/dma.c +++ b/arch/openrisc/kernel/dma.c @@ -89,7 +89,7 @@ arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, .mm = &init_mm }; - page = alloc_pages_exact(size, gfp); + page = alloc_pages_exact(size, gfp | __GFP_ZERO); if (!page) return NULL; diff --git a/arch/parisc/kernel/pci-dma.c b/arch/parisc/kernel/pci-dma.c index 04c48f1ef3fb..239162355b58 100644 --- a/arch/parisc/kernel/pci-dma.c +++ b/arch/parisc/kernel/pci-dma.c @@ -404,7 +404,7 @@ static void *pcxl_dma_alloc(struct device *dev, size_t size, order = get_order(size); size = 1 << (order + PAGE_SHIFT); vaddr = pcxl_alloc_range(size); - paddr = __get_free_pages(flag, order); + paddr = __get_free_pages(flag | __GFP_ZERO, order); flush_kernel_dcache_range(paddr, size); paddr = __pa(paddr); map_uncached_pages(vaddr, size, paddr); @@ -429,7 +429,7 @@ static void *pcx_dma_alloc(struct device *dev, size_t size, if ((attrs & DMA_ATTR_NON_CONSISTENT) == 0) return NULL; - addr = (void *)__get_free_pages(flag, get_order(size)); + addr = (void *)__get_free_pages(flag | __GFP_ZERO, get_order(size)); if (addr) *dma_handle = (dma_addr_t)virt_to_phys(addr); diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c index 346ba382193a..9e52d1527f71 100644 --- a/arch/s390/pci/pci_dma.c +++ b/arch/s390/pci/pci_dma.c @@ -404,7 +404,7 @@ static void *s390_dma_alloc(struct device *dev, size_t size, dma_addr_t map; size = PAGE_ALIGN(size); - page = alloc_pages(flag, get_order(size)); + page = alloc_pages(flag | __GFP_ZERO, get_order(size)); if (!page) return NULL; diff --git a/arch/sparc/kernel/ioport.c b/arch/sparc/kernel/ioport.c index baa235652c27..f89603855f1e 100644 --- a/arch/sparc/kernel/ioport.c +++ b/arch/sparc/kernel/ioport.c @@ -325,7 +325,7 @@ void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, return NULL; size = PAGE_ALIGN(size); - va = (void *) __get_free_pages(gfp, get_order(size)); + va = (void *) __get_free_pages(gfp | __GFP_ZERO, get_order(size)); if (!va) { printk("%s: no %zd pages\n", __func__, size >> PAGE_SHIFT); return NULL; diff --git a/arch/sparc/mm/io-unit.c b/arch/sparc/mm/io-unit.c index 91be13935d40..f770ee7229d8 100644 --- a/arch/sparc/mm/io-unit.c +++ b/arch/sparc/mm/io-unit.c @@ -224,7 +224,7 @@ static void *iounit_alloc(struct device *dev, size_t len, return NULL; len = PAGE_ALIGN(len); - va = __get_free_pages(gfp, get_order(len)); + va = __get_free_pages(gfp | __GFP_ZERO, get_order(len)); if (!va) return NULL; diff --git a/arch/sparc/mm/iommu.c b/arch/sparc/mm/iommu.c index fb771a634452..e8d5d73ca40d 100644 --- a/arch/sparc/mm/iommu.c +++ b/arch/sparc/mm/iommu.c @@ -344,7 +344,7 @@ static void *sbus_iommu_alloc(struct device *dev, size_t len, return NULL; len = PAGE_ALIGN(len); - va = __get_free_pages(gfp, get_order(len)); + va = __get_free_pages(gfp | __GFP_ZERO, get_order(len)); if (va == 0) return NULL; diff --git a/arch/xtensa/kernel/pci-dma.c b/arch/xtensa/kernel/pci-dma.c index 1fc138b6bc0a..9171bff76fc4 100644 --- a/arch/xtensa/kernel/pci-dma.c +++ b/arch/xtensa/kernel/pci-dma.c @@ -160,7 +160,7 @@ void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, flag & __GFP_NOWARN); if (!page) - page = alloc_pages(flag, get_order(size)); + page = alloc_pages(flag | __GFP_ZERO, get_order(size)); if (!page) return NULL; diff --git a/drivers/misc/mic/host/mic_boot.c b/drivers/misc/mic/host/mic_boot.c index c327985c9523..6479435ac96b 100644 --- a/drivers/misc/mic/host/mic_boot.c +++ b/drivers/misc/mic/host/mic_boot.c @@ -149,7 +149,7 @@ static void *__mic_dma_alloc(struct device *dev, size_t size, struct scif_hw_dev *scdev = dev_get_drvdata(dev); struct mic_device *mdev = scdev_to_mdev(scdev); dma_addr_t tmp; - void *va = kmalloc(size, gfp); + void *va = kmalloc(size, gfp | __GFP_ZERO); if (va) { tmp = mic_map_single(mdev, va, size); diff --git a/kernel/dma/virt.c b/kernel/dma/virt.c index 631ddec4b60a..ebe128833af7 100644 --- a/kernel/dma/virt.c +++ b/kernel/dma/virt.c @@ -13,7 +13,7 @@ static void *dma_virt_alloc(struct device *dev, size_t size, { void *ret; - ret = (void *)__get_free_pages(gfp, get_order(size)); + ret = (void *)__get_free_pages(gfp | __GFP_ZERO, get_order(size)); if (ret) *dma_handle = (uintptr_t)ret; return ret; From 06d4dd2f2ce1cdb625f77c0676d5af6ba310c01d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 14 Dec 2018 09:15:02 +0100 Subject: [PATCH 72/73] dma-mapping: deprecate dma_zalloc_coherent We now always return zeroed memory from dma_alloc_coherent. Note that simply passing GFP_ZERO to dma_alloc_coherent wasn't always doing the right thing to start with given that various allocators are not backed by the page allocator and thus would ignore GFP_ZERO. Signed-off-by: Christoph Hellwig --- Documentation/DMA-API.txt | 9 --------- include/linux/dma-mapping.h | 7 ++++--- 2 files changed, 4 insertions(+), 12 deletions(-) diff --git a/Documentation/DMA-API.txt b/Documentation/DMA-API.txt index 016eb6909b8a..e133ccd60228 100644 --- a/Documentation/DMA-API.txt +++ b/Documentation/DMA-API.txt @@ -58,15 +58,6 @@ specify the ``GFP_`` flags (see kmalloc()) for the allocation (the implementation may choose to ignore flags that affect the location of the returned memory, like GFP_DMA). -:: - - void * - dma_zalloc_coherent(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t flag) - -Wraps dma_alloc_coherent() and also zeroes the returned memory if the -allocation attempt succeeded. - :: void diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index f422aec0f53c..a52c6409bdc2 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -644,12 +644,13 @@ static inline unsigned long dma_max_pfn(struct device *dev) } #endif +/* + * Please always use dma_alloc_coherent instead as it already zeroes the memory! + */ static inline void *dma_zalloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flag) { - void *ret = dma_alloc_coherent(dev, size, dma_handle, - flag | __GFP_ZERO); - return ret; + return dma_alloc_coherent(dev, size, dma_handle, flag); } static inline int dma_get_cache_alignment(void) From 8b1cce9f5832a8eda17d37a3c49fb7dd2d650f46 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Thu, 20 Dec 2018 17:35:47 +0100 Subject: [PATCH 73/73] dma-mapping: fix inverted logic in dma_supported The cleanup in commit 356da6d0cde3 ("dma-mapping: bypass indirect calls for dma-direct") accidentally inverted the logic in the check for the presence of a ->dma_supported() callback. Switch this back to the way it was to prevent a crash on boot. Fixes: 356da6d0cde3 ("dma-mapping: bypass indirect calls for dma-direct") Signed-off-by: Thierry Reding Signed-off-by: Christoph Hellwig --- kernel/dma/mapping.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c index fc84c81029d9..d7c34d2d1ba5 100644 --- a/kernel/dma/mapping.c +++ b/kernel/dma/mapping.c @@ -406,7 +406,7 @@ int dma_supported(struct device *dev, u64 mask) if (dma_is_direct(ops)) return dma_direct_supported(dev, mask); - if (ops->dma_supported) + if (!ops->dma_supported) return 1; return ops->dma_supported(dev, mask); }