anolis: iommu/vt-d: add kh40000_iommu_dma_ops for KH-40000

ANBZ: #4330

Add kh40000_iommu_dma_ops for KH-40000 platform.

For coherent DMA access, memory can be allocated only from the memory node
of the node where the device resides.

For streaming DMA access, add a PCI read operation at the end of DMA
access.

Signed-off-by: leoliu-oc <leoliu-oc@zhaoxin.com>
Reviewed-by: Artie Ding <artie.ding@linux.alibaba.com>
Link: https://gitee.com/anolis/cloud-kernel/pulls/3375
This commit is contained in:
leoliu-oc 2024-08-08 17:15:42 +08:00
parent 81e1e12eef
commit cc48a6b845
4 changed files with 203 additions and 0 deletions

View File

@ -5408,3 +5408,7 @@
A hex value specifying bitmask with supplemental xhci
host controller quirks. Meaning of each bit can be
consulted in header drivers/usb/host/xhci.h.
zhaoxin_patch_bitmask=
[X86] Bitmask for Zhaoxin Platform's patch.
bit 0: enable KH-40000 dma patch's node check function

View File

@ -32,6 +32,12 @@ static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
extern bool is_zhaoxin_kh40000(void);
#if IS_BUILTIN(CONFIG_INTEL_IOMMU) && IS_BUILTIN(CONFIG_X86_64)
phys_addr_t kh40000_iommu_iova_to_phys(struct device *dev, dma_addr_t paddr);
void kh40000_sync_single_dma_for_cpu(struct device *dev, dma_addr_t paddr,
enum dma_data_direction dir, bool is_iommu);
#endif /* CONFIG_INTEL_IOMMU && CONFIG_X86_64 */
bool arch_dma_alloc_attrs(struct device **dev);
#define arch_dma_alloc_attrs arch_dma_alloc_attrs

View File

@ -175,6 +175,87 @@ static int __init pci_iommu_init(void)
/* Must execute after PCI subsystem */
rootfs_initcall(pci_iommu_init);
#if IS_BUILTIN(CONFIG_INTEL_IOMMU) && IS_BUILTIN(CONFIG_X86_64)
/***
* usage:
* set "zhaoxin_patch_bitmask=<value>" in cmdline
* value description:
* bit 0: enable(1) node check or not(0). default 1
*/
enum {
ZHAOXIN_P2CW_NODE_CHECK = BIT(0),
ZHAOXIN_PATCH_CODE_MAX = ZHAOXIN_P2CW_NODE_CHECK,
};
#define ZHAOXIN_PATCH_CODE_DEFAULT ZHAOXIN_P2CW_NODE_CHECK
unsigned long zhaoxin_patch_code = ZHAOXIN_PATCH_CODE_DEFAULT;
static int __init zhaoxin_patch_code_setup(char *str)
{
int err = kstrtoul(str, 0, &zhaoxin_patch_code);
if (err || (zhaoxin_patch_code > ZHAOXIN_PATCH_CODE_MAX)) {
pr_err("cmdline 'zhaoxin_patch_bitmask=%s' inappropriate\n", str);
zhaoxin_patch_code = ZHAOXIN_PATCH_CODE_DEFAULT;
return err;
}
if (ZHAOXIN_P2CW_NODE_CHECK & zhaoxin_patch_code)
pr_info("zhaoxin dma patch node check is enabled\n");
return 0;
}
__setup("zhaoxin_patch_bitmask=", zhaoxin_patch_code_setup);
static struct pci_dev *kh40000_get_pci_dev(struct device *dev)
{
if (dev_is_pci(dev))
return to_pci_dev(dev);
if (dev->parent)
return kh40000_get_pci_dev(dev->parent);
return NULL;
}
void kh40000_sync_single_dma_for_cpu(struct device *dev, dma_addr_t paddr,
enum dma_data_direction dir, bool is_iommu)
{
u8 vid;
struct pci_dev *pci;
u64 dma_mask = *dev->dma_mask;
/* check direction */
if ((dir != DMA_FROM_DEVICE) && (dir != DMA_BIDIRECTIONAL))
return;
/* check dma capability */
if (dma_mask <= DMA_BIT_MASK(32))
return;
/* check device type */
pci = kh40000_get_pci_dev(dev);
if (pci == NULL)
return;
/* get real physical address */
if (is_iommu)
paddr = kh40000_iommu_iova_to_phys(dev, paddr);
/* check node or not */
if ((zhaoxin_patch_code & ZHAOXIN_P2CW_NODE_CHECK)) {
unsigned long pfn = PFN_DOWN(paddr);
if (pfn_to_nid(pfn) == dev_to_node(dev))
return;
}
pci_read_config_byte(pci, PCI_VENDOR_ID, &vid);
}
#endif /* CONFIG_INTEL_IOMMU && IS_BUILTIN(CONFIG_X86_64) */
#ifdef CONFIG_PCI
/* Many VIA bridges seem to corrupt data for DAC. Disable it here */

View File

@ -4757,6 +4757,7 @@ const struct attribute_group *intel_iommu_groups[] = {
NULL,
};
const struct dma_map_ops kh40000_iommu_dma_ops;
int __init intel_iommu_init(void)
{
int ret = -ENODEV;
@ -4850,6 +4851,9 @@ int __init intel_iommu_init(void)
#endif
dma_ops = &intel_dma_ops;
if (is_zhaoxin_kh40000())
dma_ops = &kh40000_iommu_dma_ops;
init_iommu_pm_ops();
for_each_active_iommu(iommu, drhd) {
@ -5499,3 +5503,111 @@ static void __init check_tylersburg_isoch(void)
pr_warn("Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",
vtisochctrl);
}
#if IS_BUILTIN(CONFIG_INTEL_IOMMU) && IS_BUILTIN(CONFIG_X86_64)
static void *kh40000_iommu_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
gfp_t flags, unsigned long attrs)
{
struct page *page = NULL;
int order;
nodemask_t nodemask;
int node = dev_to_node(dev);
nodes_clear(nodemask);
size = PAGE_ALIGN(size);
order = get_order(size);
if (!iommu_no_mapping(dev))
flags &= ~(GFP_DMA | GFP_DMA32);
else if (dev->coherent_dma_mask < dma_get_required_mask(dev)) {
if (dev->coherent_dma_mask < DMA_BIT_MASK(32))
flags |= GFP_DMA;
else
flags |= GFP_DMA32;
}
if (node == NUMA_NO_NODE) {
page = __alloc_pages_nodemask(flags, order, numa_mem_id(), NULL);
} else {
if (!(flags & (GFP_DMA | GFP_DMA32))) {
node_set(node, nodemask);
page = __alloc_pages_nodemask(flags | __GFP_HIGH, order, node, &nodemask);
} else {
page = __alloc_pages_nodemask(flags | __GFP_HIGH, order, node, NULL);
}
}
if (!page)
return NULL;
memset(page_address(page), 0, size);
*dma_handle = __intel_map_single(dev, page_to_phys(page), size, DMA_BIDIRECTIONAL,
dev->coherent_dma_mask);
if (*dma_handle)
return page_address(page);
return NULL;
}
phys_addr_t kh40000_iommu_iova_to_phys(struct device *dev, dma_addr_t paddr)
{
/* only patch remote DMA access */
if (!iommu_no_mapping(dev)) {
struct dmar_domain *domain = find_domain(dev);
paddr = intel_iommu_iova_to_phys(&(domain->domain), paddr);
}
return paddr;
}
static void kh40000_iommu_sync_single_for_cpu(struct device *dev, dma_addr_t addr, size_t size,
enum dma_data_direction dir)
{
kh40000_sync_single_dma_for_cpu(dev, addr, dir, 1);
}
static void kh40000_iommu_unmap_page(struct device *dev, dma_addr_t addr, size_t size,
enum dma_data_direction dir, unsigned long attrs)
{
kh40000_sync_single_dma_for_cpu(dev, addr, dir, 1);
intel_unmap_page(dev, addr, size, dir, attrs);
}
static void kh40000_iommu_unmap_sg(struct device *dev, struct scatterlist *sglist, int nelems,
enum dma_data_direction dir, unsigned long attrs)
{
dma_addr_t startaddr = sg_dma_address(sglist) & PAGE_MASK;
unsigned long nrpages = 0;
struct scatterlist *sg;
int i;
for_each_sg(sglist, sg, nelems, i) {
nrpages += aligned_nrpages(sg_dma_address(sg), sg_dma_len(sg));
kh40000_iommu_sync_single_for_cpu(dev, sg->dma_address, sg_dma_len(sg), dir);
}
intel_unmap(dev, startaddr, nrpages << VTD_PAGE_SHIFT);
}
static void kh40000_iommu_sync_sg_for_cpu(struct device *dev, struct scatterlist *sgl, int nelems,
enum dma_data_direction dir)
{
struct scatterlist *sg;
int i;
for_each_sg(sgl, sg, nelems, i)
kh40000_iommu_sync_single_for_cpu(dev, sg->dma_address, sg_dma_len(sg), dir);
}
const struct dma_map_ops kh40000_iommu_dma_ops = {
.alloc = kh40000_iommu_alloc_coherent,
.free = intel_free_coherent,
.map_sg = intel_map_sg,
.unmap_sg = kh40000_iommu_unmap_sg,
.map_page = intel_map_page,
.unmap_page = kh40000_iommu_unmap_page,
.sync_single_for_cpu = kh40000_iommu_sync_single_for_cpu,
.sync_sg_for_cpu = kh40000_iommu_sync_sg_for_cpu,
.mapping_error = intel_mapping_error,
.dma_supported = dma_direct_supported,
};
#endif /* IS_BUILTIN(CONFIG_INTEL_IOMMU) && IS_BUILTIN(CONFIG_X86_64) */