From cdd4f4c71047cdd28c13891e6e217765ea1c1bdc Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Mon, 19 Sep 2016 17:36:29 -0400 Subject: [PATCH 1/4] sparc: migrate exception table users off module.h and onto extable.h These files were only including module.h for exception table related functions. We've now separated that content out into its own file "extable.h" so now move over to that and avoid all the extra header content in module.h that we don't really need to compile these files. Cc: "David S. Miller" Cc: sparclinux@vger.kernel.org Signed-off-by: Paul Gortmaker Signed-off-by: David S. Miller --- arch/sparc/kernel/kprobes.c | 2 +- arch/sparc/kernel/traps_64.c | 2 +- arch/sparc/kernel/unaligned_64.c | 2 +- arch/sparc/mm/fault_64.c | 2 +- arch/sparc/mm/init_64.c | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/sparc/kernel/kprobes.c b/arch/sparc/kernel/kprobes.c index cd83be527586..b0377db12d83 100644 --- a/arch/sparc/kernel/kprobes.c +++ b/arch/sparc/kernel/kprobes.c @@ -5,7 +5,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/sparc/kernel/traps_64.c b/arch/sparc/kernel/traps_64.c index d21cd625c0de..4094a51b1970 100644 --- a/arch/sparc/kernel/traps_64.c +++ b/arch/sparc/kernel/traps_64.c @@ -8,7 +8,7 @@ * I like traps on v9, :)))) */ -#include +#include #include #include #include diff --git a/arch/sparc/kernel/unaligned_64.c b/arch/sparc/kernel/unaligned_64.c index 9aacb9159262..52c00d90d4b4 100644 --- a/arch/sparc/kernel/unaligned_64.c +++ b/arch/sparc/kernel/unaligned_64.c @@ -11,7 +11,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c index 3f291d8c57f7..643c149a3151 100644 --- a/arch/sparc/mm/fault_64.c +++ b/arch/sparc/mm/fault_64.c @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index 7ac6b62fb7c1..439784b7b7ac 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -5,7 +5,7 @@ * Copyright (C) 1997-1999 Jakub Jelinek (jj@sunsite.mff.cuni.cz) */ -#include +#include #include #include #include From 8914391b4e6517ca3dbbb975fc38ce13b0c5ee45 Mon Sep 17 00:00:00 2001 From: chris hyser Date: Wed, 28 Sep 2016 12:19:45 -0700 Subject: [PATCH 2/4] sparc64: Enable PCI IOMMU version 2 API Enable Version 2 of the PCI IOMMU API needed for advanced features such as PCI Relaxed Ordering and greater than 2 GB DMA address space per root complex. Signed-off-by: Chris Hyser Signed-off-by: David S. Miller --- arch/sparc/kernel/pci_sun4v.c | 36 +++++++++++++++++++++++++---------- 1 file changed, 26 insertions(+), 10 deletions(-) diff --git a/arch/sparc/kernel/pci_sun4v.c b/arch/sparc/kernel/pci_sun4v.c index 61c6f935accc..c5c819daf800 100644 --- a/arch/sparc/kernel/pci_sun4v.c +++ b/arch/sparc/kernel/pci_sun4v.c @@ -30,8 +30,19 @@ #define DRIVER_NAME "pci_sun4v" #define PFX DRIVER_NAME ": " -static unsigned long vpci_major = 1; -static unsigned long vpci_minor = 1; +static unsigned long vpci_major; +static unsigned long vpci_minor; + +struct vpci_version { + unsigned long major; + unsigned long minor; +}; + +/* Ordered from largest major to lowest */ +static struct vpci_version vpci_versions[] = { + { .major = 2, .minor = 0 }, + { .major = 1, .minor = 1 }, +}; #define PGLIST_NENTS (PAGE_SIZE / sizeof(u64)) @@ -907,22 +918,27 @@ static int pci_sun4v_probe(struct platform_device *op) struct device_node *dp; struct iommu *iommu; u32 devhandle; - int i, err; + int i, err = -ENODEV; dp = op->dev.of_node; if (!hvapi_negotiated++) { - err = sun4v_hvapi_register(HV_GRP_PCI, - vpci_major, - &vpci_minor); + for (i = 0; i < ARRAY_SIZE(vpci_versions); i++) { + vpci_major = vpci_versions[i].major; + vpci_minor = vpci_versions[i].minor; + + err = sun4v_hvapi_register(HV_GRP_PCI, vpci_major, + &vpci_minor); + if (!err) + break; + } if (err) { - printk(KERN_ERR PFX "Could not register hvapi, " - "err=%d\n", err); + pr_err(PFX "Could not register hvapi, err=%d\n", err); return err; } - printk(KERN_INFO PFX "Registered hvapi major[%lu] minor[%lu]\n", - vpci_major, vpci_minor); + pr_info(PFX "Registered hvapi major[%lu] minor[%lu]\n", + vpci_major, vpci_minor); dma_ops = &sun4v_dma_ops; } From aa7bde1a8b49391d34f17905a04c3acf7770283d Mon Sep 17 00:00:00 2001 From: chris hyser Date: Wed, 28 Sep 2016 12:19:50 -0700 Subject: [PATCH 3/4] sparc64: Enable setting "relaxed ordering" in IOMMU mappings Enable relaxed ordering for memory writes in IOMMU TSB entry from dma_4v_alloc_coherent(), dma_4v_map_page() and dma_4v_map_sg() when dma_attrs DMA_ATTR_WEAK_ORDERING is set. This requires PCI IOMMU I/O Translation Services version 2.0 API. Many PCIe devices allow enabling relaxed-ordering (memory writes bypassing other memory writes) for various DMA buffers. A notable exception is the Mellanox mlx4 IB adapter. Due to the nature of x86 HW this appears to have little performance impact there. On SPARC HW however, this results in major performance degradation getting only about 3Gbps. Enabling RO in the IOMMU entries corresponding to mlx4 data buffers increases the throughput to about 13 Gbps. Orabug: 19245907 Signed-off-by: Chris Hyser Signed-off-by: David S. Miller --- arch/sparc/include/asm/hypervisor.h | 1 + arch/sparc/kernel/pci_sun4v.c | 16 +++++++++++++++- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/arch/sparc/include/asm/hypervisor.h b/arch/sparc/include/asm/hypervisor.h index f5b6537306f0..666d5ba230d2 100644 --- a/arch/sparc/include/asm/hypervisor.h +++ b/arch/sparc/include/asm/hypervisor.h @@ -1744,6 +1744,7 @@ unsigned long sun4v_vintr_set_target(unsigned long dev_handle, #define HV_PCI_MAP_ATTR_READ 0x01 #define HV_PCI_MAP_ATTR_WRITE 0x02 +#define HV_PCI_MAP_ATTR_RELAXED_ORDER 0x04 #define HV_PCI_DEVICE_BUILD(b,d,f) \ ((((b) & 0xff) << 16) | \ diff --git a/arch/sparc/kernel/pci_sun4v.c b/arch/sparc/kernel/pci_sun4v.c index c5c819daf800..db57d8acdc01 100644 --- a/arch/sparc/kernel/pci_sun4v.c +++ b/arch/sparc/kernel/pci_sun4v.c @@ -78,6 +78,10 @@ static long iommu_batch_flush(struct iommu_batch *p) u64 *pglist = p->pglist; unsigned long npages = p->npages; + /* VPCI maj=1, min=[0,1] only supports read and write */ + if (vpci_major < 2) + prot &= (HV_PCI_MAP_ATTR_READ | HV_PCI_MAP_ATTR_WRITE); + while (npages != 0) { long num; @@ -144,6 +148,7 @@ static void *dma_4v_alloc_coherent(struct device *dev, size_t size, unsigned long attrs) { unsigned long flags, order, first_page, npages, n; + unsigned long prot = 0; struct iommu *iommu; struct page *page; void *ret; @@ -157,6 +162,9 @@ static void *dma_4v_alloc_coherent(struct device *dev, size_t size, npages = size >> IO_PAGE_SHIFT; + if (attrs & DMA_ATTR_WEAK_ORDERING) + prot = HV_PCI_MAP_ATTR_RELAXED_ORDER; + nid = dev->archdata.numa_node; page = alloc_pages_node(nid, gfp, order); if (unlikely(!page)) @@ -180,7 +188,7 @@ static void *dma_4v_alloc_coherent(struct device *dev, size_t size, local_irq_save(flags); iommu_batch_start(dev, - (HV_PCI_MAP_ATTR_READ | + (HV_PCI_MAP_ATTR_READ | prot | HV_PCI_MAP_ATTR_WRITE), entry); @@ -277,6 +285,9 @@ static dma_addr_t dma_4v_map_page(struct device *dev, struct page *page, if (direction != DMA_TO_DEVICE) prot |= HV_PCI_MAP_ATTR_WRITE; + if (attrs & DMA_ATTR_WEAK_ORDERING) + prot |= HV_PCI_MAP_ATTR_RELAXED_ORDER; + local_irq_save(flags); iommu_batch_start(dev, prot, entry); @@ -355,6 +366,9 @@ static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist, if (direction != DMA_TO_DEVICE) prot |= HV_PCI_MAP_ATTR_WRITE; + if (attrs & DMA_ATTR_WEAK_ORDERING) + prot |= HV_PCI_MAP_ATTR_RELAXED_ORDER; + outs = s = segstart = &sglist[0]; outcount = 1; incount = nelems; From 98e98eb6a8a179a444a30d903714fab356726155 Mon Sep 17 00:00:00 2001 From: "netmonk@netmonk.org" Date: Sat, 1 Oct 2016 23:42:41 +0200 Subject: [PATCH 4/4] sparc: fixing ident and beautifying code Good evening, Following LinuxCodingStyle documentation and with the help of Sam, fixed severals identation issues in the code, and few others cosmetic changes And last and i hope least fixing my name :) Signed-off-by : Dominique Carrel Acked-by: Sam Ravnborg Signed-off-by: David S. Miller --- arch/sparc/prom/ranges.c | 52 +++++++++++++++++++--------------------- 1 file changed, 24 insertions(+), 28 deletions(-) diff --git a/arch/sparc/prom/ranges.c b/arch/sparc/prom/ranges.c index ad143c13bdc0..6d8dc2aa94ce 100644 --- a/arch/sparc/prom/ranges.c +++ b/arch/sparc/prom/ranges.c @@ -16,9 +16,8 @@ static struct linux_prom_ranges promlib_obio_ranges[PROMREG_MAX]; static int num_obio_ranges; /* Adjust register values based upon the ranges parameters. */ -static void -prom_adjust_regs(struct linux_prom_registers *regp, int nregs, - struct linux_prom_ranges *rangep, int nranges) +static void prom_adjust_regs(struct linux_prom_registers *regp, int nregs, + struct linux_prom_ranges *rangep, int nranges) { int regc, rngc; @@ -34,33 +33,30 @@ prom_adjust_regs(struct linux_prom_registers *regp, int nregs, } } -static void -prom_adjust_ranges(struct linux_prom_ranges *ranges1, int nranges1, - struct linux_prom_ranges *ranges2, int nranges2) +static void prom_adjust_ranges(struct linux_prom_ranges *ranges1, int nranges1, + struct linux_prom_ranges *ranges2, int nranges2) { int rng1c, rng2c; - for(rng1c=0; rng1c < nranges1; rng1c++) { - for(rng2c=0; rng2c < nranges2; rng2c++) - if(ranges1[rng1c].ot_parent_space == ranges2[rng2c].ot_child_space && + for (rng1c = 0; rng1c < nranges1; rng1c++) { + for (rng2c = 0; rng2c < nranges2; rng2c++) + if (ranges1[rng1c].ot_parent_space == ranges2[rng2c].ot_child_space && ranges1[rng1c].ot_parent_base >= ranges2[rng2c].ot_child_base && ranges2[rng2c].ot_child_base + ranges2[rng2c].or_size - ranges1[rng1c].ot_parent_base > 0U) break; - if(rng2c == nranges2) /* oops */ + if (rng2c == nranges2) /* oops */ prom_printf("adjust_ranges: Could not find matching bus type...\n"); else if (ranges1[rng1c].ot_parent_base + ranges1[rng1c].or_size > ranges2[rng2c].ot_child_base + ranges2[rng2c].or_size) - ranges1[rng1c].or_size = - ranges2[rng2c].ot_child_base + ranges2[rng2c].or_size - ranges1[rng1c].ot_parent_base; + ranges1[rng1c].or_size = ranges2[rng2c].ot_child_base + ranges2[rng2c].or_size - ranges1[rng1c].ot_parent_base; ranges1[rng1c].ot_parent_space = ranges2[rng2c].ot_parent_space; ranges1[rng1c].ot_parent_base += ranges2[rng2c].ot_parent_base; } } /* Apply probed obio ranges to registers passed, if no ranges return. */ -void -prom_apply_obio_ranges(struct linux_prom_registers *regs, int nregs) +void prom_apply_obio_ranges(struct linux_prom_registers *regs, int nregs) { - if(num_obio_ranges) + if (num_obio_ranges) prom_adjust_regs(regs, nregs, promlib_obio_ranges, num_obio_ranges); } EXPORT_SYMBOL(prom_apply_obio_ranges); @@ -76,40 +72,40 @@ void __init prom_ranges_init(void) node = prom_getchild(prom_root_node); obio_node = prom_searchsiblings(node, "obio"); - if(obio_node) { + if (obio_node) { success = prom_getproperty(obio_node, "ranges", (char *) promlib_obio_ranges, sizeof(promlib_obio_ranges)); - if(success != -1) - num_obio_ranges = (success/sizeof(struct linux_prom_ranges)); + if (success != -1) + num_obio_ranges = (success / sizeof(struct linux_prom_ranges)); } - if(num_obio_ranges) + if (num_obio_ranges) prom_printf("PROMLIB: obio_ranges %d\n", num_obio_ranges); } void prom_apply_generic_ranges(phandle node, phandle parent, - struct linux_prom_registers *regs, int nregs) + struct linux_prom_registers *regs, int nregs) { int success; int num_ranges; struct linux_prom_ranges ranges[PROMREG_MAX]; - + success = prom_getproperty(node, "ranges", (char *) ranges, - sizeof (ranges)); + sizeof(ranges)); if (success != -1) { - num_ranges = (success/sizeof(struct linux_prom_ranges)); + num_ranges = (success / sizeof(struct linux_prom_ranges)); if (parent) { struct linux_prom_ranges parent_ranges[PROMREG_MAX]; int num_parent_ranges; - + success = prom_getproperty(parent, "ranges", - (char *) parent_ranges, - sizeof (parent_ranges)); + (char *) parent_ranges, + sizeof(parent_ranges)); if (success != -1) { - num_parent_ranges = (success/sizeof(struct linux_prom_ranges)); - prom_adjust_ranges (ranges, num_ranges, parent_ranges, num_parent_ranges); + num_parent_ranges = (success / sizeof(struct linux_prom_ranges)); + prom_adjust_ranges(ranges, num_ranges, parent_ranges, num_parent_ranges); } } prom_adjust_regs(regs, nregs, ranges, num_ranges);