From 83ca259489409a1fe8a83dad83a82f32174d4f31 Mon Sep 17 00:00:00 2001 From: Dongli Zhang Date: Fri, 5 Apr 2019 09:15:25 +0800 Subject: [PATCH 1/6] swiotlb: dump used and total slots when swiotlb buffer is full So far the kernel only prints the requested size if swiotlb buffer if full. It is not possible to know whether it is simply an out of buffer, or it is because swiotlb cannot allocate buffer with the requested size due to fragmentation. As 'io_tlb_used' is available since commit 71602fe6d4e9 ("swiotlb: add debugfs to track swiotlb buffer usage"), both 'io_tlb_used' and 'io_tlb_nslabs' are printed when swiotlb buffer is full. Signed-off-by: Dongli Zhang Signed-off-by: Konrad Rzeszutek Wilk --- kernel/dma/swiotlb.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index 2b0c8fd9658e..82c767374c70 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -533,7 +533,8 @@ phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, not_found: spin_unlock_irqrestore(&io_tlb_lock, flags); if (!(attrs & DMA_ATTR_NO_WARN) && printk_ratelimit()) - dev_warn(hwdev, "swiotlb buffer is full (sz: %zd bytes)\n", size); + dev_warn(hwdev, "swiotlb buffer is full (sz: %zd bytes), total %lu, used %lu\n", + size, io_tlb_nslabs, io_tlb_used); return DMA_MAPPING_ERROR; found: io_tlb_used += nslots; From 53b29c336830db48ad3dc737f88b8c065b1f0851 Mon Sep 17 00:00:00 2001 From: Dongli Zhang Date: Fri, 12 Apr 2019 19:38:26 +0800 Subject: [PATCH 2/6] swiotlb: save io_tlb_used to local variable before leaving critical section MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When swiotlb is full, the kernel would print io_tlb_used. However, the result might be inaccurate at that time because we have left the critical section protected by spinlock. Therefore, we backup the io_tlb_used into local variable before leaving critical section. Fixes: 83ca25948940 ("swiotlb: dump used and total slots when swiotlb buffer is full") Suggested-by: HÃ¥kon Bugge Signed-off-by: Dongli Zhang Signed-off-by: Konrad Rzeszutek Wilk --- kernel/dma/swiotlb.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index 82c767374c70..38d57218809c 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -445,6 +445,7 @@ phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, unsigned long mask; unsigned long offset_slots; unsigned long max_slots; + unsigned long tmp_io_tlb_used; if (no_iotlb_memory) panic("Can not allocate SWIOTLB buffer earlier and can't now provide you with the DMA bounce buffer"); @@ -531,10 +532,12 @@ phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, } while (index != wrap); not_found: + tmp_io_tlb_used = io_tlb_used; + spin_unlock_irqrestore(&io_tlb_lock, flags); if (!(attrs & DMA_ATTR_NO_WARN) && printk_ratelimit()) - dev_warn(hwdev, "swiotlb buffer is full (sz: %zd bytes), total %lu, used %lu\n", - size, io_tlb_nslabs, io_tlb_used); + dev_warn(hwdev, "swiotlb buffer is full (sz: %zd bytes), total %lu (slots), used %lu (slots)\n", + size, io_tlb_nslabs, tmp_io_tlb_used); return DMA_MAPPING_ERROR; found: io_tlb_used += nslots; From aca351cc4c034b4880f0a0dc3602ed3761ef6f01 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 11 Apr 2019 09:19:57 +0200 Subject: [PATCH 3/6] swiotlb-xen: make instances match their method names Just drop two pointless _attrs prefixes to make the code a little more grep-able. Reviewed-by: Stefano Stabellini Signed-off-by: Christoph Hellwig Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/swiotlb-xen.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index bb7888429be6..272e4aef2a86 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -501,9 +501,8 @@ xen_swiotlb_sync_single_for_device(struct device *hwdev, dma_addr_t dev_addr, * concerning calls here are the same as for swiotlb_unmap_page() above. */ static void -xen_swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl, - int nelems, enum dma_data_direction dir, - unsigned long attrs) +xen_swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sgl, int nelems, + enum dma_data_direction dir, unsigned long attrs) { struct scatterlist *sg; int i; @@ -532,9 +531,8 @@ xen_swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl, * same here. */ static int -xen_swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, - int nelems, enum dma_data_direction dir, - unsigned long attrs) +xen_swiotlb_map_sg(struct device *hwdev, struct scatterlist *sgl, int nelems, + enum dma_data_direction dir, unsigned long attrs) { struct scatterlist *sg; int i; @@ -559,8 +557,7 @@ xen_swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, /* Don't panic here, we expect map_sg users to do proper error handling. */ attrs |= DMA_ATTR_SKIP_CPU_SYNC; - xen_swiotlb_unmap_sg_attrs(hwdev, sgl, i, dir, - attrs); + xen_swiotlb_unmap_sg(hwdev, sgl, i, dir, attrs); sg_dma_len(sgl) = 0; return 0; } @@ -687,8 +684,8 @@ const struct dma_map_ops xen_swiotlb_dma_ops = { .sync_single_for_device = xen_swiotlb_sync_single_for_device, .sync_sg_for_cpu = xen_swiotlb_sync_sg_for_cpu, .sync_sg_for_device = xen_swiotlb_sync_sg_for_device, - .map_sg = xen_swiotlb_map_sg_attrs, - .unmap_sg = xen_swiotlb_unmap_sg_attrs, + .map_sg = xen_swiotlb_map_sg, + .unmap_sg = xen_swiotlb_unmap_sg, .map_page = xen_swiotlb_map_page, .unmap_page = xen_swiotlb_unmap_page, .dma_supported = xen_swiotlb_dma_supported, From 8b35d9feed8e4e66dc4048f776c356e26e2a8216 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 11 Apr 2019 09:19:58 +0200 Subject: [PATCH 4/6] swiotlb-xen: use ->map_page to implement ->map_sg We can simply loop over the segments and map them, removing lots of duplicate code. Reviewed-by: Stefano Stabellini Signed-off-by: Christoph Hellwig Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/swiotlb-xen.c | 68 ++++++--------------------------------- 1 file changed, 10 insertions(+), 58 deletions(-) diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index 272e4aef2a86..e40bf1b707e3 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -514,24 +514,8 @@ xen_swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sgl, int nelems, } -/* - * Map a set of buffers described by scatterlist in streaming mode for DMA. - * This is the scatter-gather version of the above xen_swiotlb_map_page - * interface. Here the scatter gather list elements are each tagged with the - * appropriate dma address and length. They are obtained via - * sg_dma_{address,length}(SG). - * - * NOTE: An implementation may be able to use a smaller number of - * DMA address/length pairs than there are SG table elements. - * (for example via virtual mapping capabilities) - * The routine returns the number of addr/length pairs actually - * used, at most nents. - * - * Device ownership issues as mentioned above for xen_swiotlb_map_page are the - * same here. - */ static int -xen_swiotlb_map_sg(struct device *hwdev, struct scatterlist *sgl, int nelems, +xen_swiotlb_map_sg(struct device *dev, struct scatterlist *sgl, int nelems, enum dma_data_direction dir, unsigned long attrs) { struct scatterlist *sg; @@ -540,50 +524,18 @@ xen_swiotlb_map_sg(struct device *hwdev, struct scatterlist *sgl, int nelems, BUG_ON(dir == DMA_NONE); for_each_sg(sgl, sg, nelems, i) { - phys_addr_t paddr = sg_phys(sg); - dma_addr_t dev_addr = xen_phys_to_bus(paddr); - - if (swiotlb_force == SWIOTLB_FORCE || - xen_arch_need_swiotlb(hwdev, paddr, dev_addr) || - !dma_capable(hwdev, dev_addr, sg->length) || - range_straddles_page_boundary(paddr, sg->length)) { - phys_addr_t map = swiotlb_tbl_map_single(hwdev, - start_dma_addr, - sg_phys(sg), - sg->length, - dir, attrs); - if (map == DMA_MAPPING_ERROR) { - dev_warn(hwdev, "swiotlb buffer is full\n"); - /* Don't panic here, we expect map_sg users - to do proper error handling. */ - attrs |= DMA_ATTR_SKIP_CPU_SYNC; - xen_swiotlb_unmap_sg(hwdev, sgl, i, dir, attrs); - sg_dma_len(sgl) = 0; - return 0; - } - dev_addr = xen_phys_to_bus(map); - xen_dma_map_page(hwdev, pfn_to_page(map >> PAGE_SHIFT), - dev_addr, - map & ~PAGE_MASK, - sg->length, - dir, - attrs); - sg->dma_address = dev_addr; - } else { - /* we are not interested in the dma_addr returned by - * xen_dma_map_page, only in the potential cache flushes executed - * by the function. */ - xen_dma_map_page(hwdev, pfn_to_page(paddr >> PAGE_SHIFT), - dev_addr, - paddr & ~PAGE_MASK, - sg->length, - dir, - attrs); - sg->dma_address = dev_addr; - } + sg->dma_address = xen_swiotlb_map_page(dev, sg_page(sg), + sg->offset, sg->length, dir, attrs); + if (sg->dma_address == DMA_MAPPING_ERROR) + goto out_unmap; sg_dma_len(sg) = sg->length; } + return nelems; +out_unmap: + xen_swiotlb_unmap_sg(dev, sgl, i, dir, attrs | DMA_ATTR_SKIP_CPU_SYNC); + sg_dma_len(sgl) = 0; + return 0; } /* From 2e12dceef3d3fb8d796e384aa242062d1643cad4 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 11 Apr 2019 09:19:59 +0200 Subject: [PATCH 5/6] swiotlb-xen: simplify the DMA sync method implementations Get rid of the grand multiplexer and implement the sync_single_for_cpu and sync_single_for_device methods directly, and then loop over them for the scatterlist based variants. Note that this also loses a few comments related to highlevel DMA API concepts, which have nothing to do with the swiotlb-xen implementation details. Reviewed-by: Stefano Stabellini Signed-off-by: Christoph Hellwig Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/swiotlb-xen.c | 84 +++++++++++++-------------------------- 1 file changed, 28 insertions(+), 56 deletions(-) diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index e40bf1b707e3..ba558094d0e6 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -452,48 +452,28 @@ static void xen_swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr, xen_unmap_single(hwdev, dev_addr, size, dir, attrs); } -/* - * Make physical memory consistent for a single streaming mode DMA translation - * after a transfer. - * - * If you perform a xen_swiotlb_map_page() but wish to interrogate the buffer - * using the cpu, yet do not wish to teardown the dma mapping, you must - * call this function before doing so. At the next point you give the dma - * address back to the card, you must first perform a - * xen_swiotlb_dma_sync_for_device, and then the device again owns the buffer - */ static void -xen_swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr, - size_t size, enum dma_data_direction dir, - enum dma_sync_target target) +xen_swiotlb_sync_single_for_cpu(struct device *dev, dma_addr_t dma_addr, + size_t size, enum dma_data_direction dir) { - phys_addr_t paddr = xen_bus_to_phys(dev_addr); + phys_addr_t paddr = xen_bus_to_phys(dma_addr); - BUG_ON(dir == DMA_NONE); + xen_dma_sync_single_for_cpu(dev, dma_addr, size, dir); - if (target == SYNC_FOR_CPU) - xen_dma_sync_single_for_cpu(hwdev, dev_addr, size, dir); - - /* NOTE: We use dev_addr here, not paddr! */ - if (is_xen_swiotlb_buffer(dev_addr)) - swiotlb_tbl_sync_single(hwdev, paddr, size, dir, target); - - if (target == SYNC_FOR_DEVICE) - xen_dma_sync_single_for_device(hwdev, dev_addr, size, dir); + if (is_xen_swiotlb_buffer(dma_addr)) + swiotlb_tbl_sync_single(dev, paddr, size, dir, SYNC_FOR_CPU); } -void -xen_swiotlb_sync_single_for_cpu(struct device *hwdev, dma_addr_t dev_addr, - size_t size, enum dma_data_direction dir) +static void +xen_swiotlb_sync_single_for_device(struct device *dev, dma_addr_t dma_addr, + size_t size, enum dma_data_direction dir) { - xen_swiotlb_sync_single(hwdev, dev_addr, size, dir, SYNC_FOR_CPU); -} + phys_addr_t paddr = xen_bus_to_phys(dma_addr); -void -xen_swiotlb_sync_single_for_device(struct device *hwdev, dma_addr_t dev_addr, - size_t size, enum dma_data_direction dir) -{ - xen_swiotlb_sync_single(hwdev, dev_addr, size, dir, SYNC_FOR_DEVICE); + if (is_xen_swiotlb_buffer(dma_addr)) + swiotlb_tbl_sync_single(dev, paddr, size, dir, SYNC_FOR_DEVICE); + + xen_dma_sync_single_for_device(dev, dma_addr, size, dir); } /* @@ -538,38 +518,30 @@ out_unmap: return 0; } -/* - * Make physical memory consistent for a set of streaming mode DMA translations - * after a transfer. - * - * The same as swiotlb_sync_single_* but for a scatter-gather list, same rules - * and usage. - */ static void -xen_swiotlb_sync_sg(struct device *hwdev, struct scatterlist *sgl, - int nelems, enum dma_data_direction dir, - enum dma_sync_target target) +xen_swiotlb_sync_sg_for_cpu(struct device *dev, struct scatterlist *sgl, + int nelems, enum dma_data_direction dir) { struct scatterlist *sg; int i; - for_each_sg(sgl, sg, nelems, i) - xen_swiotlb_sync_single(hwdev, sg->dma_address, - sg_dma_len(sg), dir, target); + for_each_sg(sgl, sg, nelems, i) { + xen_swiotlb_sync_single_for_cpu(dev, sg->dma_address, + sg->length, dir); + } } static void -xen_swiotlb_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sg, - int nelems, enum dma_data_direction dir) -{ - xen_swiotlb_sync_sg(hwdev, sg, nelems, dir, SYNC_FOR_CPU); -} - -static void -xen_swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg, +xen_swiotlb_sync_sg_for_device(struct device *dev, struct scatterlist *sgl, int nelems, enum dma_data_direction dir) { - xen_swiotlb_sync_sg(hwdev, sg, nelems, dir, SYNC_FOR_DEVICE); + struct scatterlist *sg; + int i; + + for_each_sg(sgl, sg, nelems, i) { + xen_swiotlb_sync_single_for_device(dev, sg->dma_address, + sg->length, dir); + } } /* From 063b8271ec8f706d833e61dfca40c512504a62c1 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 11 Apr 2019 09:20:00 +0200 Subject: [PATCH 6/6] swiotlb-xen: ensure we have a single callsite for xen_dma_map_page Refactor the code a bit to make further changes easier. Reviewed-by: Stefano Stabellini Signed-off-by: Christoph Hellwig Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/swiotlb-xen.c | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index ba558094d0e6..64cb94dfedd4 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -388,13 +388,8 @@ static dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page, if (dma_capable(dev, dev_addr, size) && !range_straddles_page_boundary(phys, size) && !xen_arch_need_swiotlb(dev, phys, dev_addr) && - (swiotlb_force != SWIOTLB_FORCE)) { - /* we are not interested in the dma_addr returned by - * xen_dma_map_page, only in the potential cache flushes executed - * by the function. */ - xen_dma_map_page(dev, page, dev_addr, offset, size, dir, attrs); - return dev_addr; - } + swiotlb_force != SWIOTLB_FORCE) + goto done; /* * Oh well, have to allocate and map a bounce buffer. @@ -407,19 +402,25 @@ static dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page, return DMA_MAPPING_ERROR; dev_addr = xen_phys_to_bus(map); - xen_dma_map_page(dev, pfn_to_page(map >> PAGE_SHIFT), - dev_addr, map & ~PAGE_MASK, size, dir, attrs); /* * Ensure that the address returned is DMA'ble */ - if (dma_capable(dev, dev_addr, size)) - return dev_addr; + if (unlikely(!dma_capable(dev, dev_addr, size))) { + swiotlb_tbl_unmap_single(dev, map, size, dir, + attrs | DMA_ATTR_SKIP_CPU_SYNC); + return DMA_MAPPING_ERROR; + } - attrs |= DMA_ATTR_SKIP_CPU_SYNC; - swiotlb_tbl_unmap_single(dev, map, size, dir, attrs); - - return DMA_MAPPING_ERROR; + page = pfn_to_page(map >> PAGE_SHIFT); + offset = map & ~PAGE_MASK; +done: + /* + * we are not interested in the dma_addr returned by xen_dma_map_page, + * only in the potential cache flushes executed by the function. + */ + xen_dma_map_page(dev, page, dev_addr, offset, size, dir, attrs); + return dev_addr; } /*