add swiotlb support to arm
This fixes a cascade of regressions that originally started with the addition of the ia64 port, but only got fatal once we removed most uses of block layer bounce buffering in Linux 4.18. The reason is that while the original i386/PAE code that was the first architecture that supported > 4GB of memory without an iommu decided to leave bounce buffering to the subsystems, which in those days just mean block and networking as no one else consumer arbitrary userspace memory. Later with ia64, x86_64 and other ports we assumed that either an iommu or something that fakes it up ("software IOTLB" in beautiful Intel speak) is present and that subsystems can rely on that for dealing with addressing limitations in devices. Except that the ARM LPAE scheme that added larger physical address to 32-bit ARM did not follow that scheme and thus only worked by chance and only for block and networking I/O directly to highmem. Long story, short fix - add swiotlb support to arm when build for LPAE platforms, which actuallys turns out to be pretty trivial with the modern dma-direct / swiotlb code to fix the Linux 4.18-ish regression. -----BEGIN PGP SIGNATURE----- iQI/BAABCgApFiEEgdbnc3r/njty3Iq9D55TZVIEUYMFAl1DFj8LHGhjaEBsc3Qu ZGUACgkQD55TZVIEUYPFqg/+Oh62VCFCkIK07NAeTq6EmrfHI8I1Wm/SFWPOOB+a vm7nMcSG3C8K8PRHzGc6Zk3SC1+RrHghcyKw54yLT1Mhroakv6Um7p2y8S3M4tmZ uEg8yYbtzxvuaY9T42s2msZURbBCEELzA2bYbQzgQ1zczRI1zuMI07ssMr91IQ91 HC1OjAUoxUkp/+2uU/X2k6DvPQLSJSyWvKgbi1bjNpE+FRCKJP+2a2K3psBQuDBe aJXiz/kD2L/JNvF/e4c414d5GnGXwtIYs1kbskmnj3LeToS+JjX+6ZcENorpScIP c20s/3H6nsb14TFy548rJUlAHdcd9kOdeTw+0oPUliNLCogGs6FKNU4N5gVAo+bC AWDP0wMHMWkrVz6lQL9PR78IHrHOxFYS5/uHsqqdKo5YTsgaHnwKEiPxX1aiKQ67 ovUrOnGRo4R9Y4YwD+BbHY9qw9jFMqazBdLWMivK5NxqltsahOug8w2emTFfXzQn m4APJYa0RVJA4mkh3ejcci5qHyyzPOjslyIJn7eaJPV2rknkxRn9UngkgJLnzHfc +lKiD1zaRy82nV4auPjYRiOdAoQN40YFB/RT16OVkjkT+jJEE2UAMjqh2SRlRusp Ce8vK7pw6VpDNGJRQveQA+1n9OR/jl0Jf8R7GFRrf9c/bM1J8GErJ6xS/EwNPrgI 5dE= =D6Uy -----END PGP SIGNATURE----- Merge tag 'arm-swiotlb-5.3' of git://git.infradead.org/users/hch/dma-mapping Pull arm swiotlb support from Christoph Hellwig: "This fixes a cascade of regressions that originally started with the addition of the ia64 port, but only got fatal once we removed most uses of block layer bounce buffering in Linux 4.18. The reason is that while the original i386/PAE code that was the first architecture that supported > 4GB of memory without an iommu decided to leave bounce buffering to the subsystems, which in those days just mean block and networking as no one else consumed arbitrary userspace memory. Later with ia64, x86_64 and other ports we assumed that either an iommu or something that fakes it up ("software IOTLB" in beautiful Intel speak) is present and that subsystems can rely on that for dealing with addressing limitations in devices. Except that the ARM LPAE scheme that added larger physical address to 32-bit ARM did not follow that scheme and thus only worked by chance and only for block and networking I/O directly to highmem. Long story, short fix - add swiotlb support to arm when build for LPAE platforms, which actuallys turns out to be pretty trivial with the modern dma-direct / swiotlb code to fix the Linux 4.18-ish regression" * tag 'arm-swiotlb-5.3' of git://git.infradead.org/users/hch/dma-mapping: arm: use swiotlb for bounce buffering on LPAE configs dma-mapping: check pfn validity in dma_common_{mmap,get_sgtable}
This commit is contained in:
commit
234172f6bb
|
@ -18,7 +18,9 @@ extern const struct dma_map_ops arm_coherent_dma_ops;
|
|||
|
||||
static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
|
||||
{
|
||||
return IS_ENABLED(CONFIG_MMU) ? &arm_dma_ops : NULL;
|
||||
if (IS_ENABLED(CONFIG_MMU) && !IS_ENABLED(CONFIG_ARM_LPAE))
|
||||
return &arm_dma_ops;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#ifdef __arch_page_to_dma
|
||||
|
|
|
@ -663,6 +663,11 @@ config ARM_LPAE
|
|||
depends on MMU && CPU_32v7 && !CPU_32v6 && !CPU_32v5 && \
|
||||
!CPU_32v4 && !CPU_32v3
|
||||
select PHYS_ADDR_T_64BIT
|
||||
select SWIOTLB
|
||||
select ARCH_HAS_DMA_COHERENT_TO_PFN
|
||||
select ARCH_HAS_DMA_MMAP_PGPROT
|
||||
select ARCH_HAS_SYNC_DMA_FOR_DEVICE
|
||||
select ARCH_HAS_SYNC_DMA_FOR_CPU
|
||||
help
|
||||
Say Y if you have an ARMv7 processor supporting the LPAE page
|
||||
table format and you would like to access memory beyond the
|
||||
|
|
|
@ -15,6 +15,7 @@
|
|||
#include <linux/init.h>
|
||||
#include <linux/device.h>
|
||||
#include <linux/dma-mapping.h>
|
||||
#include <linux/dma-noncoherent.h>
|
||||
#include <linux/dma-contiguous.h>
|
||||
#include <linux/highmem.h>
|
||||
#include <linux/memblock.h>
|
||||
|
@ -1125,6 +1126,19 @@ int arm_dma_supported(struct device *dev, u64 mask)
|
|||
|
||||
static const struct dma_map_ops *arm_get_dma_map_ops(bool coherent)
|
||||
{
|
||||
/*
|
||||
* When CONFIG_ARM_LPAE is set, physical address can extend above
|
||||
* 32-bits, which then can't be addressed by devices that only support
|
||||
* 32-bit DMA.
|
||||
* Use the generic dma-direct / swiotlb ops code in that case, as that
|
||||
* handles bounce buffering for us.
|
||||
*
|
||||
* Note: this checks CONFIG_ARM_LPAE instead of CONFIG_SWIOTLB as the
|
||||
* latter is also selected by the Xen code, but that code for now relies
|
||||
* on non-NULL dev_dma_ops. To be cleaned up later.
|
||||
*/
|
||||
if (IS_ENABLED(CONFIG_ARM_LPAE))
|
||||
return NULL;
|
||||
return coherent ? &arm_coherent_dma_ops : &arm_dma_ops;
|
||||
}
|
||||
|
||||
|
@ -2329,6 +2343,9 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
|
|||
const struct dma_map_ops *dma_ops;
|
||||
|
||||
dev->archdata.dma_coherent = coherent;
|
||||
#ifdef CONFIG_SWIOTLB
|
||||
dev->dma_coherent = coherent;
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Don't override the dma_ops if they have already been set. Ideally
|
||||
|
@ -2363,3 +2380,47 @@ void arch_teardown_dma_ops(struct device *dev)
|
|||
/* Let arch_setup_dma_ops() start again from scratch upon re-probe */
|
||||
set_dma_ops(dev, NULL);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SWIOTLB
|
||||
void arch_sync_dma_for_device(struct device *dev, phys_addr_t paddr,
|
||||
size_t size, enum dma_data_direction dir)
|
||||
{
|
||||
__dma_page_cpu_to_dev(phys_to_page(paddr), paddr & (PAGE_SIZE - 1),
|
||||
size, dir);
|
||||
}
|
||||
|
||||
void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr,
|
||||
size_t size, enum dma_data_direction dir)
|
||||
{
|
||||
__dma_page_dev_to_cpu(phys_to_page(paddr), paddr & (PAGE_SIZE - 1),
|
||||
size, dir);
|
||||
}
|
||||
|
||||
long arch_dma_coherent_to_pfn(struct device *dev, void *cpu_addr,
|
||||
dma_addr_t dma_addr)
|
||||
{
|
||||
return dma_to_pfn(dev, dma_addr);
|
||||
}
|
||||
|
||||
pgprot_t arch_dma_mmap_pgprot(struct device *dev, pgprot_t prot,
|
||||
unsigned long attrs)
|
||||
{
|
||||
if (!dev_is_dma_coherent(dev))
|
||||
return __get_dma_pgprot(attrs, prot);
|
||||
return prot;
|
||||
}
|
||||
|
||||
void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
|
||||
gfp_t gfp, unsigned long attrs)
|
||||
{
|
||||
return __dma_alloc(dev, size, dma_handle, gfp,
|
||||
__get_dma_pgprot(attrs, PAGE_KERNEL), false,
|
||||
attrs, __builtin_return_address(0));
|
||||
}
|
||||
|
||||
void arch_dma_free(struct device *dev, size_t size, void *cpu_addr,
|
||||
dma_addr_t dma_handle, unsigned long attrs)
|
||||
{
|
||||
__arm_dma_free(dev, size, cpu_addr, dma_handle, attrs, false);
|
||||
}
|
||||
#endif /* CONFIG_SWIOTLB */
|
||||
|
|
|
@ -21,6 +21,7 @@
|
|||
#include <linux/dma-contiguous.h>
|
||||
#include <linux/sizes.h>
|
||||
#include <linux/stop_machine.h>
|
||||
#include <linux/swiotlb.h>
|
||||
|
||||
#include <asm/cp15.h>
|
||||
#include <asm/mach-types.h>
|
||||
|
@ -463,6 +464,10 @@ static void __init free_highpages(void)
|
|||
*/
|
||||
void __init mem_init(void)
|
||||
{
|
||||
#ifdef CONFIG_ARM_LPAE
|
||||
swiotlb_init(1);
|
||||
#endif
|
||||
|
||||
set_max_mapnr(pfn_to_page(max_pfn) - mem_map);
|
||||
|
||||
/* this will put all unused low memory onto the freelists */
|
||||
|
|
|
@ -116,11 +116,16 @@ int dma_common_get_sgtable(struct device *dev, struct sg_table *sgt,
|
|||
int ret;
|
||||
|
||||
if (!dev_is_dma_coherent(dev)) {
|
||||
unsigned long pfn;
|
||||
|
||||
if (!IS_ENABLED(CONFIG_ARCH_HAS_DMA_COHERENT_TO_PFN))
|
||||
return -ENXIO;
|
||||
|
||||
page = pfn_to_page(arch_dma_coherent_to_pfn(dev, cpu_addr,
|
||||
dma_addr));
|
||||
/* If the PFN is not valid, we do not have a struct page */
|
||||
pfn = arch_dma_coherent_to_pfn(dev, cpu_addr, dma_addr);
|
||||
if (!pfn_valid(pfn))
|
||||
return -ENXIO;
|
||||
page = pfn_to_page(pfn);
|
||||
} else {
|
||||
page = virt_to_page(cpu_addr);
|
||||
}
|
||||
|
@ -170,7 +175,11 @@ int dma_common_mmap(struct device *dev, struct vm_area_struct *vma,
|
|||
if (!dev_is_dma_coherent(dev)) {
|
||||
if (!IS_ENABLED(CONFIG_ARCH_HAS_DMA_COHERENT_TO_PFN))
|
||||
return -ENXIO;
|
||||
|
||||
/* If the PFN is not valid, we do not have a struct page */
|
||||
pfn = arch_dma_coherent_to_pfn(dev, cpu_addr, dma_addr);
|
||||
if (!pfn_valid(pfn))
|
||||
return -ENXIO;
|
||||
} else {
|
||||
pfn = page_to_pfn(virt_to_page(cpu_addr));
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue