From 39abf8aa7dfad0badb4741373023cda369aacc8e Mon Sep 17 00:00:00 2001 From: Hiroshi Doyu Date: Thu, 2 Aug 2012 11:46:40 +0300 Subject: [PATCH 01/26] iommu/tegra: smmu: debugfs for TLB/PTC statistics Add debugfs entries to collect TLB/PTC statistics. $ echo "reset" > /sys/kernel/debug/smmu/mc/{tlb,ptc} $ echo "on" > /sys/kernel/debug/smmu/mc/{tlb,ptc} $ echo "off" > /sys/kernel/debug/smmu/mc/{tlb,ptc} $ cat /sys/kernel/debug/smmu/mc/{tlb,ptc} hit:0014910c miss:00014d22 The above format is: hit:miss: fscanf(fp, "hit:%lx miss:%lx", &hit, &miss); Signed-off-by: Hiroshi Doyu Signed-off-by: Joerg Roedel --- drivers/iommu/tegra-smmu.c | 202 ++++++++++++++++++++++++++++++++++--- 1 file changed, 190 insertions(+), 12 deletions(-) diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c index 4ba325ab6262..95b4698abd3e 100644 --- a/drivers/iommu/tegra-smmu.c +++ b/drivers/iommu/tegra-smmu.c @@ -32,6 +32,8 @@ #include #include #include +#include +#include #include #include @@ -47,16 +49,29 @@ #define SMMU_CONFIG_DISABLE 0 #define SMMU_CONFIG_ENABLE 1 -#define SMMU_TLB_CONFIG 0x14 -#define SMMU_TLB_CONFIG_STATS__MASK (1 << 31) -#define SMMU_TLB_CONFIG_STATS__ENABLE (1 << 31) +/* REVISIT: To support multiple MCs */ +enum { + _MC = 0, +}; + +enum { + _TLB = 0, + _PTC, +}; + +#define SMMU_CACHE_CONFIG_BASE 0x14 +#define __SMMU_CACHE_CONFIG(mc, cache) (SMMU_CACHE_CONFIG_BASE + 4 * cache) +#define SMMU_CACHE_CONFIG(cache) __SMMU_CACHE_CONFIG(_MC, cache) + +#define SMMU_CACHE_CONFIG_STATS_SHIFT 31 +#define SMMU_CACHE_CONFIG_STATS_ENABLE (1 << SMMU_CACHE_CONFIG_STATS_SHIFT) +#define SMMU_CACHE_CONFIG_STATS_TEST_SHIFT 30 +#define SMMU_CACHE_CONFIG_STATS_TEST (1 << SMMU_CACHE_CONFIG_STATS_TEST_SHIFT) + #define SMMU_TLB_CONFIG_HIT_UNDER_MISS__ENABLE (1 << 29) #define SMMU_TLB_CONFIG_ACTIVE_LINES__VALUE 0x10 #define SMMU_TLB_CONFIG_RESET_VAL 0x20000010 -#define SMMU_PTC_CONFIG 0x18 -#define SMMU_PTC_CONFIG_STATS__MASK (1 << 31) -#define SMMU_PTC_CONFIG_STATS__ENABLE (1 << 31) #define SMMU_PTC_CONFIG_CACHE__ENABLE (1 << 29) #define SMMU_PTC_CONFIG_INDEX_MAP__PATTERN 0x3f #define SMMU_PTC_CONFIG_RESET_VAL 0x2000003f @@ -86,10 +101,10 @@ #define SMMU_ASID_SECURITY 0x38 -#define SMMU_STATS_TLB_HIT_COUNT 0x1f0 -#define SMMU_STATS_TLB_MISS_COUNT 0x1f4 -#define SMMU_STATS_PTC_HIT_COUNT 0x1f8 -#define SMMU_STATS_PTC_MISS_COUNT 0x1fc +#define SMMU_STATS_CACHE_COUNT_BASE 0x1f0 + +#define SMMU_STATS_CACHE_COUNT(mc, cache, hitmiss) \ + (SMMU_STATS_CACHE_COUNT_BASE + 8 * cache + 4 * hitmiss) #define SMMU_TRANSLATION_ENABLE_0 0x228 #define SMMU_TRANSLATION_ENABLE_1 0x22c @@ -251,6 +266,8 @@ struct smmu_device { unsigned long translation_enable_2; unsigned long asid_security; + struct dentry *debugfs_root; + struct device_node *ahb; int num_as; @@ -412,8 +429,8 @@ static int smmu_setup_regs(struct smmu_device *smmu) smmu_write(smmu, smmu->translation_enable_1, SMMU_TRANSLATION_ENABLE_1); smmu_write(smmu, smmu->translation_enable_2, SMMU_TRANSLATION_ENABLE_2); smmu_write(smmu, smmu->asid_security, SMMU_ASID_SECURITY); - smmu_write(smmu, SMMU_TLB_CONFIG_RESET_VAL, SMMU_TLB_CONFIG); - smmu_write(smmu, SMMU_PTC_CONFIG_RESET_VAL, SMMU_PTC_CONFIG); + smmu_write(smmu, SMMU_TLB_CONFIG_RESET_VAL, SMMU_CACHE_CONFIG(_TLB)); + smmu_write(smmu, SMMU_PTC_CONFIG_RESET_VAL, SMMU_CACHE_CONFIG(_PTC)); smmu_flush_regs(smmu, 1); @@ -892,6 +909,164 @@ static struct iommu_ops smmu_iommu_ops = { .pgsize_bitmap = SMMU_IOMMU_PGSIZES, }; +/* Should be in the order of enum */ +static const char * const smmu_debugfs_mc[] = { "mc", }; +static const char * const smmu_debugfs_cache[] = { "tlb", "ptc", }; + +static ssize_t smmu_debugfs_stats_write(struct file *file, + const char __user *buffer, + size_t count, loff_t *pos) +{ + struct smmu_device *smmu; + struct dentry *dent; + int i, cache, mc; + enum { + _OFF = 0, + _ON, + _RESET, + }; + const char * const command[] = { + [_OFF] = "off", + [_ON] = "on", + [_RESET] = "reset", + }; + char str[] = "reset"; + u32 val; + size_t offs; + + count = min_t(size_t, count, sizeof(str)); + if (copy_from_user(str, buffer, count)) + return -EINVAL; + + for (i = 0; i < ARRAY_SIZE(command); i++) + if (strncmp(str, command[i], + strlen(command[i])) == 0) + break; + + if (i == ARRAY_SIZE(command)) + return -EINVAL; + + dent = file->f_dentry; + cache = (int)dent->d_inode->i_private; + mc = (int)dent->d_parent->d_inode->i_private; + smmu = dent->d_parent->d_parent->d_inode->i_private; + + offs = SMMU_CACHE_CONFIG(cache); + val = smmu_read(smmu, offs); + switch (i) { + case _OFF: + val &= ~SMMU_CACHE_CONFIG_STATS_ENABLE; + val &= ~SMMU_CACHE_CONFIG_STATS_TEST; + smmu_write(smmu, val, offs); + break; + case _ON: + val |= SMMU_CACHE_CONFIG_STATS_ENABLE; + val &= ~SMMU_CACHE_CONFIG_STATS_TEST; + smmu_write(smmu, val, offs); + break; + case _RESET: + val |= SMMU_CACHE_CONFIG_STATS_TEST; + smmu_write(smmu, val, offs); + val &= ~SMMU_CACHE_CONFIG_STATS_TEST; + smmu_write(smmu, val, offs); + break; + default: + BUG(); + break; + } + + dev_dbg(smmu->dev, "%s() %08x, %08x @%08x\n", __func__, + val, smmu_read(smmu, offs), offs); + + return count; +} + +static int smmu_debugfs_stats_show(struct seq_file *s, void *v) +{ + struct smmu_device *smmu; + struct dentry *dent; + int i, cache, mc; + const char * const stats[] = { "hit", "miss", }; + + dent = d_find_alias(s->private); + cache = (int)dent->d_inode->i_private; + mc = (int)dent->d_parent->d_inode->i_private; + smmu = dent->d_parent->d_parent->d_inode->i_private; + + for (i = 0; i < ARRAY_SIZE(stats); i++) { + u32 val; + size_t offs; + + offs = SMMU_STATS_CACHE_COUNT(mc, cache, i); + val = smmu_read(smmu, offs); + seq_printf(s, "%s:%08x ", stats[i], val); + + dev_dbg(smmu->dev, "%s() %s %08x @%08x\n", __func__, + stats[i], val, offs); + } + seq_printf(s, "\n"); + + return 0; +} + +static int smmu_debugfs_stats_open(struct inode *inode, struct file *file) +{ + return single_open(file, smmu_debugfs_stats_show, inode); +} + +static const struct file_operations smmu_debugfs_stats_fops = { + .open = smmu_debugfs_stats_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, + .write = smmu_debugfs_stats_write, +}; + +static void smmu_debugfs_delete(struct smmu_device *smmu) +{ + debugfs_remove_recursive(smmu->debugfs_root); +} + +static void smmu_debugfs_create(struct smmu_device *smmu) +{ + int i; + struct dentry *root; + + root = debugfs_create_file(dev_name(smmu->dev), + S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO, + NULL, smmu, NULL); + if (!root) + goto err_out; + smmu->debugfs_root = root; + + for (i = 0; i < ARRAY_SIZE(smmu_debugfs_mc); i++) { + int j; + struct dentry *mc; + + mc = debugfs_create_file(smmu_debugfs_mc[i], + S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO, + root, (void *)i, NULL); + if (!mc) + goto err_out; + + for (j = 0; j < ARRAY_SIZE(smmu_debugfs_cache); j++) { + struct dentry *cache; + + cache = debugfs_create_file(smmu_debugfs_cache[j], + S_IWUGO | S_IRUGO, mc, + (void *)j, + &smmu_debugfs_stats_fops); + if (!cache) + goto err_out; + } + } + + return; + +err_out: + smmu_debugfs_delete(smmu); +} + static int tegra_smmu_suspend(struct device *dev) { struct smmu_device *smmu = dev_get_drvdata(dev); @@ -996,6 +1171,7 @@ static int tegra_smmu_probe(struct platform_device *pdev) if (!smmu->avp_vector_page) return -ENOMEM; + smmu_debugfs_create(smmu); smmu_handle = smmu; return 0; } @@ -1005,6 +1181,8 @@ static int tegra_smmu_remove(struct platform_device *pdev) struct smmu_device *smmu = platform_get_drvdata(pdev); int i; + smmu_debugfs_delete(smmu); + smmu_write(smmu, SMMU_CONFIG_DISABLE, SMMU_CONFIG); for (i = 0; i < smmu->num_as; i++) free_pdir(&smmu->as[i]); From 2166f0eefc435b1e99d1b179e1294ee2bf7f7bd9 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sun, 26 Aug 2012 09:51:15 +0800 Subject: [PATCH 02/26] iommu/amd: remove duplicated include from amd_iommu_init.c Remove duplicated include. Signed-off-by: Wei Yongjun Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu_init.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c index 18a89b760aaa..860c1c342cd9 100644 --- a/drivers/iommu/amd_iommu_init.c +++ b/drivers/iommu/amd_iommu_init.c @@ -26,7 +26,6 @@ #include #include #include -#include #include #include #include From df805abb28fa49514ab8e231d4be3d30a76a32fb Mon Sep 17 00:00:00 2001 From: Frank Arnold Date: Mon, 27 Aug 2012 19:21:04 +0200 Subject: [PATCH 03/26] iommu/amd: Fix some typos Fix some typos in comments and user-visible messages. No functional changes. Signed-off-by: Frank Arnold Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 10 +++++----- drivers/iommu/amd_iommu_init.c | 10 +++++----- drivers/iommu/amd_iommu_types.h | 4 ++-- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index b64502dfa9f4..450b25808d98 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -684,7 +684,7 @@ static void iommu_poll_ppr_log(struct amd_iommu *iommu) /* * Release iommu->lock because ppr-handling might need to - * re-aquire it + * re-acquire it */ spin_unlock_irqrestore(&iommu->lock, flags); @@ -802,7 +802,7 @@ static void build_inv_iommu_pages(struct iommu_cmd *cmd, u64 address, CMD_SET_TYPE(cmd, CMD_INV_IOMMU_PAGES); if (s) /* size bit - we flush more than one 4kb page */ cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK; - if (pde) /* PDE bit - we wan't flush everything not only the PTEs */ + if (pde) /* PDE bit - we want to flush everything, not only the PTEs */ cmd->data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK; } @@ -2153,7 +2153,7 @@ static bool pci_pri_tlp_required(struct pci_dev *pdev) } /* - * If a device is not yet associated with a domain, this function does + * If a device is not yet associated with a domain, this function * assigns it visible for the hardware */ static int attach_device(struct device *dev, @@ -2403,7 +2403,7 @@ static struct protection_domain *get_domain(struct device *dev) if (domain != NULL) return domain; - /* Device not bount yet - bind it */ + /* Device not bound yet - bind it */ dma_dom = find_protection_domain(devid); if (!dma_dom) dma_dom = amd_iommu_rlookup_table[devid]->default_dom; @@ -2942,7 +2942,7 @@ static void __init prealloc_protection_domains(void) alloc_passthrough_domain(); dev_data->passthrough = true; attach_device(&dev->dev, pt_domain); - pr_info("AMD-Vi: Using passthough domain for device %s\n", + pr_info("AMD-Vi: Using passthrough domain for device %s\n", dev_name(&dev->dev)); } diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c index 860c1c342cd9..577669fece85 100644 --- a/drivers/iommu/amd_iommu_init.c +++ b/drivers/iommu/amd_iommu_init.c @@ -177,7 +177,7 @@ u16 *amd_iommu_alias_table; struct amd_iommu **amd_iommu_rlookup_table; /* - * AMD IOMMU allows up to 2^16 differend protection domains. This is a bitmap + * AMD IOMMU allows up to 2^16 different protection domains. This is a bitmap * to know which ones are already in use. */ unsigned long *amd_iommu_pd_alloc_bitmap; @@ -477,7 +477,7 @@ static int __init find_last_devid_acpi(struct acpi_table_header *table) /**************************************************************************** * - * The following functions belong the the code path which parses the ACPI table + * The following functions belong to the code path which parses the ACPI table * the second time. In this ACPI parsing iteration we allocate IOMMU specific * data structures, initialize the device/alias/rlookup table and also * basically initialize the hardware. @@ -690,7 +690,7 @@ static void __init set_dev_entry_from_acpi(struct amd_iommu *iommu, } /* - * Reads the device exclusion range from ACPI and initialize IOMMU with + * Reads the device exclusion range from ACPI and initializes the IOMMU with * it */ static void __init set_device_exclusion_range(u16 devid, struct ivmd_header *m) @@ -1140,7 +1140,7 @@ static int __init amd_iommu_init_pci(void) /**************************************************************************** * * The following functions initialize the MSI interrupts for all IOMMUs - * in the system. Its a bit challenging because there could be multiple + * in the system. It's a bit challenging because there could be multiple * IOMMUs per PCI BDF but we can call pci_enable_msi(x) only once per * pci_dev. * @@ -1198,7 +1198,7 @@ enable_faults: * * The next functions belong to the third pass of parsing the ACPI * table. In this last pass the memory mapping requirements are - * gathered (like exclusion and unity mapping reanges). + * gathered (like exclusion and unity mapping ranges). * ****************************************************************************/ diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h index d0dab865a8b8..d1390b82b194 100644 --- a/drivers/iommu/amd_iommu_types.h +++ b/drivers/iommu/amd_iommu_types.h @@ -255,7 +255,7 @@ #define PAGE_SIZE_ALIGN(address, pagesize) \ ((address) & ~((pagesize) - 1)) /* - * Creates an IOMMU PTE for an address an a given pagesize + * Creates an IOMMU PTE for an address and a given pagesize * The PTE has no permission bits set * Pagesize is expected to be a power-of-two larger than 4096 */ @@ -404,7 +404,7 @@ struct iommu_dev_data { struct list_head dev_data_list; /* For global dev_data_list */ struct iommu_dev_data *alias_data;/* The alias dev_data */ struct protection_domain *domain; /* Domain the device is bound to */ - atomic_t bind; /* Domain attach reverent count */ + atomic_t bind; /* Domain attach reference count */ u16 devid; /* PCI Device ID */ bool iommu_v2; /* Device can make use of IOMMUv2 */ bool passthrough; /* Default for device is pt_domain */ From 5a2c937a8be8c9c5a6d23308b5ee841b6394a1cf Mon Sep 17 00:00:00 2001 From: Hiroshi Doyu Date: Fri, 14 Sep 2012 10:22:00 -0600 Subject: [PATCH 04/26] iommu/tegra: smmu: Use debugfs_create_dir for directory The commit c3b1a35 "debugfs: make sure that debugfs_create_file() gets used only for regulars" doesn't allow to use debugfs_create_file() for dir. Keep debugfs data in smmu_device instead of directory's i_private. Signed-off-by: Hiroshi Doyu Signed-off-by: Stephen Warren Signed-off-by: Joerg Roedel --- drivers/iommu/tegra-smmu.c | 52 +++++++++++++++++++++++++------------- 1 file changed, 35 insertions(+), 17 deletions(-) diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c index 95b4698abd3e..7e42c71c1ffc 100644 --- a/drivers/iommu/tegra-smmu.c +++ b/drivers/iommu/tegra-smmu.c @@ -246,6 +246,12 @@ struct smmu_as { spinlock_t client_lock; /* for client list */ }; +struct smmu_debugfs_info { + struct smmu_device *smmu; + int mc; + int cache; +}; + /* * Per SMMU device - IOMMU device */ @@ -267,6 +273,7 @@ struct smmu_device { unsigned long asid_security; struct dentry *debugfs_root; + struct smmu_debugfs_info *debugfs_info; struct device_node *ahb; @@ -917,9 +924,10 @@ static ssize_t smmu_debugfs_stats_write(struct file *file, const char __user *buffer, size_t count, loff_t *pos) { + struct smmu_debugfs_info *info; struct smmu_device *smmu; struct dentry *dent; - int i, cache, mc; + int i; enum { _OFF = 0, _ON, @@ -947,11 +955,10 @@ static ssize_t smmu_debugfs_stats_write(struct file *file, return -EINVAL; dent = file->f_dentry; - cache = (int)dent->d_inode->i_private; - mc = (int)dent->d_parent->d_inode->i_private; - smmu = dent->d_parent->d_parent->d_inode->i_private; + info = dent->d_inode->i_private; + smmu = info->smmu; - offs = SMMU_CACHE_CONFIG(cache); + offs = SMMU_CACHE_CONFIG(info->cache); val = smmu_read(smmu, offs); switch (i) { case _OFF: @@ -983,21 +990,21 @@ static ssize_t smmu_debugfs_stats_write(struct file *file, static int smmu_debugfs_stats_show(struct seq_file *s, void *v) { + struct smmu_debugfs_info *info; struct smmu_device *smmu; struct dentry *dent; - int i, cache, mc; + int i; const char * const stats[] = { "hit", "miss", }; dent = d_find_alias(s->private); - cache = (int)dent->d_inode->i_private; - mc = (int)dent->d_parent->d_inode->i_private; - smmu = dent->d_parent->d_parent->d_inode->i_private; + info = dent->d_inode->i_private; + smmu = info->smmu; for (i = 0; i < ARRAY_SIZE(stats); i++) { u32 val; size_t offs; - offs = SMMU_STATS_CACHE_COUNT(mc, cache, i); + offs = SMMU_STATS_CACHE_COUNT(info->mc, info->cache, i); val = smmu_read(smmu, offs); seq_printf(s, "%s:%08x ", stats[i], val); @@ -1025,16 +1032,22 @@ static const struct file_operations smmu_debugfs_stats_fops = { static void smmu_debugfs_delete(struct smmu_device *smmu) { debugfs_remove_recursive(smmu->debugfs_root); + kfree(smmu->debugfs_info); } static void smmu_debugfs_create(struct smmu_device *smmu) { int i; + size_t bytes; struct dentry *root; - root = debugfs_create_file(dev_name(smmu->dev), - S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO, - NULL, smmu, NULL); + bytes = ARRAY_SIZE(smmu_debugfs_mc) * ARRAY_SIZE(smmu_debugfs_cache) * + sizeof(*smmu->debugfs_info); + smmu->debugfs_info = kmalloc(bytes, GFP_KERNEL); + if (!smmu->debugfs_info) + return; + + root = debugfs_create_dir(dev_name(smmu->dev), NULL); if (!root) goto err_out; smmu->debugfs_root = root; @@ -1043,18 +1056,23 @@ static void smmu_debugfs_create(struct smmu_device *smmu) int j; struct dentry *mc; - mc = debugfs_create_file(smmu_debugfs_mc[i], - S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO, - root, (void *)i, NULL); + mc = debugfs_create_dir(smmu_debugfs_mc[i], root); if (!mc) goto err_out; for (j = 0; j < ARRAY_SIZE(smmu_debugfs_cache); j++) { struct dentry *cache; + struct smmu_debugfs_info *info; + + info = smmu->debugfs_info; + info += i * ARRAY_SIZE(smmu_debugfs_mc) + j; + info->smmu = smmu; + info->mc = i; + info->cache = j; cache = debugfs_create_file(smmu_debugfs_cache[j], S_IWUGO | S_IRUGO, mc, - (void *)j, + (void *)info, &smmu_debugfs_stats_fops); if (!cache) goto err_out; From e6bc59330e34626082a6b2e4733802e372781c8d Mon Sep 17 00:00:00 2001 From: Stephen Warren Date: Tue, 4 Sep 2012 16:36:15 -0600 Subject: [PATCH 05/26] dma: tegra: move smmu.h into SMMU driver There's no need to place these defines into arch/arm/mach-tegra/include/. Move them into the SMMU driver to clean up mach-tegra, as a pre-requisite for single-zImage. Signed-off-by: Stephen Warren Cc: Hiroshi Doyu Acked-by: Hiroshi Doyu Signed-off-by: Joerg Roedel --- arch/arm/mach-tegra/include/mach/smmu.h | 63 ------------------------- drivers/iommu/tegra-smmu.c | 41 +++++++++++++++- 2 files changed, 40 insertions(+), 64 deletions(-) delete mode 100644 arch/arm/mach-tegra/include/mach/smmu.h diff --git a/arch/arm/mach-tegra/include/mach/smmu.h b/arch/arm/mach-tegra/include/mach/smmu.h deleted file mode 100644 index dad403a9cf00..000000000000 --- a/arch/arm/mach-tegra/include/mach/smmu.h +++ /dev/null @@ -1,63 +0,0 @@ -/* - * IOMMU API for SMMU in Tegra30 - * - * Copyright (c) 2012, NVIDIA CORPORATION. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - */ - -#ifndef MACH_SMMU_H -#define MACH_SMMU_H - -enum smmu_hwgrp { - HWGRP_AFI, - HWGRP_AVPC, - HWGRP_DC, - HWGRP_DCB, - HWGRP_EPP, - HWGRP_G2, - HWGRP_HC, - HWGRP_HDA, - HWGRP_ISP, - HWGRP_MPE, - HWGRP_NV, - HWGRP_NV2, - HWGRP_PPCS, - HWGRP_SATA, - HWGRP_VDE, - HWGRP_VI, - - HWGRP_COUNT, - - HWGRP_END = ~0, -}; - -#define HWG_AFI (1 << HWGRP_AFI) -#define HWG_AVPC (1 << HWGRP_AVPC) -#define HWG_DC (1 << HWGRP_DC) -#define HWG_DCB (1 << HWGRP_DCB) -#define HWG_EPP (1 << HWGRP_EPP) -#define HWG_G2 (1 << HWGRP_G2) -#define HWG_HC (1 << HWGRP_HC) -#define HWG_HDA (1 << HWGRP_HDA) -#define HWG_ISP (1 << HWGRP_ISP) -#define HWG_MPE (1 << HWGRP_MPE) -#define HWG_NV (1 << HWGRP_NV) -#define HWG_NV2 (1 << HWGRP_NV2) -#define HWG_PPCS (1 << HWGRP_PPCS) -#define HWG_SATA (1 << HWGRP_SATA) -#define HWG_VDE (1 << HWGRP_VDE) -#define HWG_VI (1 << HWGRP_VI) - -#endif /* MACH_SMMU_H */ diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c index 7e42c71c1ffc..e1d17feb52b8 100644 --- a/drivers/iommu/tegra-smmu.c +++ b/drivers/iommu/tegra-smmu.c @@ -39,9 +39,48 @@ #include #include -#include #include +enum smmu_hwgrp { + HWGRP_AFI, + HWGRP_AVPC, + HWGRP_DC, + HWGRP_DCB, + HWGRP_EPP, + HWGRP_G2, + HWGRP_HC, + HWGRP_HDA, + HWGRP_ISP, + HWGRP_MPE, + HWGRP_NV, + HWGRP_NV2, + HWGRP_PPCS, + HWGRP_SATA, + HWGRP_VDE, + HWGRP_VI, + + HWGRP_COUNT, + + HWGRP_END = ~0, +}; + +#define HWG_AFI (1 << HWGRP_AFI) +#define HWG_AVPC (1 << HWGRP_AVPC) +#define HWG_DC (1 << HWGRP_DC) +#define HWG_DCB (1 << HWGRP_DCB) +#define HWG_EPP (1 << HWGRP_EPP) +#define HWG_G2 (1 << HWGRP_G2) +#define HWG_HC (1 << HWGRP_HC) +#define HWG_HDA (1 << HWGRP_HDA) +#define HWG_ISP (1 << HWGRP_ISP) +#define HWG_MPE (1 << HWGRP_MPE) +#define HWG_NV (1 << HWGRP_NV) +#define HWG_NV2 (1 << HWGRP_NV2) +#define HWG_PPCS (1 << HWGRP_PPCS) +#define HWG_SATA (1 << HWGRP_SATA) +#define HWG_VDE (1 << HWGRP_VDE) +#define HWG_VI (1 << HWGRP_VI) + /* bitmap of the page sizes currently supported */ #define SMMU_IOMMU_PGSIZES (SZ_4K) From f8ffcc9279f90ba3790c6dc0f01207ad6ea1c8df Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Thu, 6 Sep 2012 12:34:09 +0800 Subject: [PATCH 06/26] iommu/exynos: use list_del_init instead of list_del/INIT_LIST_HEAD Using list_del_init() instead of list_del() + INIT_LIST_HEAD(). spatch with a semantic match is used to found this problem. (http://coccinelle.lip6.fr/) Signed-off-by: Wei Yongjun Signed-off-by: Joerg Roedel --- drivers/iommu/exynos-iommu.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c index 80bad32aa463..7fe44f83cc37 100644 --- a/drivers/iommu/exynos-iommu.c +++ b/drivers/iommu/exynos-iommu.c @@ -840,8 +840,7 @@ static void exynos_iommu_detach_device(struct iommu_domain *domain, if (__exynos_sysmmu_disable(data)) { dev_dbg(dev, "%s: Detached IOMMU with pgtable %#lx\n", __func__, __pa(priv->pgtable)); - list_del(&data->node); - INIT_LIST_HEAD(&data->node); + list_del_init(&data->node); } else { dev_dbg(dev, "%s: Detaching IOMMU with pgtable %#lx delayed", From 2e12bc29fc5a12242d68e11875db3dd58efad9ff Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Fri, 11 Nov 2011 17:26:44 -0700 Subject: [PATCH 07/26] intel-iommu: Default to non-coherent for domains unattached to iommus domain_update_iommu_coherency() currently defaults to setting domains as coherent when the domain is not attached to any iommus. This allows for a window in domain_context_mapping_one() where such a domain can update context entries non-coherently, and only after update the domain capability to clear iommu_coherency. This can be seen using KVM device assignment on VT-d systems that do not support coherency in the ecap register. When a device is added to a guest, a domain is created (iommu_coherency = 0), the device is attached, and ranges are mapped. If we then hot unplug the device, the coherency is updated and set to the default (1) since no iommus are attached to the domain. A subsequent attach of a device makes use of the same dmar domain (now marked coherent) updates context entries with coherency enabled, and only disables coherency as the last step in the process. To fix this, switch domain_update_iommu_coherency() to use the safer, non-coherent default for domains not attached to iommus. Signed-off-by: Alex Williamson Tested-by: Donald Dutile Acked-by: Donald Dutile Acked-by: Chris Wright Cc: stable@vger.kernel.org Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 2297ec193eb4..cb9e1146b02f 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -589,7 +589,9 @@ static void domain_update_iommu_coherency(struct dmar_domain *domain) { int i; - domain->iommu_coherency = 1; + i = find_first_bit(domain->iommu_bmp, g_num_of_iommus); + + domain->iommu_coherency = i < g_num_of_iommus ? 1 : 0; for_each_set_bit(i, domain->iommu_bmp, g_num_of_iommus) { if (!ecap_coherent(g_iommus[i]->ecap)) { From 500c25edda62a5d39c4d3b345c2e229a3f9d26be Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 28 Sep 2012 16:22:26 +0200 Subject: [PATCH 08/26] iommu/amd: Fix features reporting When the AMD IOMMU doesn't have extended features, an empty line gets issued in dmesg like so: [ 3.061417] AMD-Vi: Found IOMMU at 0000:00:00.2 cap 0x40 [ 3.066757] <--- [ 3.068294] pci 0000:00:00.2: irq 72 for MSI/MSI-X [ 3.081213] AMD-Vi: Lazy IO/TLB flushing enabled Fix it. Signed-off-by: Borislav Petkov Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu_init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c index 577669fece85..1772b3cbebf4 100644 --- a/drivers/iommu/amd_iommu_init.c +++ b/drivers/iommu/amd_iommu_init.c @@ -1114,8 +1114,8 @@ static void print_iommu_info(void) if (iommu_feature(iommu, (1ULL << i))) pr_cont(" %s", feat_str[i]); } - } pr_cont("\n"); + } } } From 6efed63bec36e260204a50cfe6878cd36b710ade Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 14 Jun 2012 15:52:58 +0200 Subject: [PATCH 09/26] iommu/amd: Keep track of HPET and IOAPIC device ids The IVRS ACPI table provides information about the IOAPICs and the HPETs available in the system and which PCI device ID they use in transactions. Save that information for later usage in interrupt remapping. Signed-off-by: Joerg Roedel Reviewed-by: Konrad Rzeszutek Wilk --- drivers/iommu/amd_iommu.c | 3 ++ drivers/iommu/amd_iommu_init.c | 68 ++++++++++++++++++++++++++++++++- drivers/iommu/amd_iommu_types.h | 34 +++++++++++++++++ 3 files changed, 103 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index b64502dfa9f4..6f514831af1f 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -72,6 +72,9 @@ static DEFINE_SPINLOCK(iommu_pd_list_lock); static LIST_HEAD(dev_data_list); static DEFINE_SPINLOCK(dev_data_list_lock); +LIST_HEAD(ioapic_map); +LIST_HEAD(hpet_map); + /* * Domain for untranslated devices - only allocated * if iommu=pt passed on kernel cmd line. diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c index 18a89b760aaa..617985961195 100644 --- a/drivers/iommu/amd_iommu_init.c +++ b/drivers/iommu/amd_iommu_init.c @@ -55,6 +55,10 @@ #define IVHD_DEV_ALIAS_RANGE 0x43 #define IVHD_DEV_EXT_SELECT 0x46 #define IVHD_DEV_EXT_SELECT_RANGE 0x47 +#define IVHD_DEV_SPECIAL 0x48 + +#define IVHD_SPECIAL_IOAPIC 1 +#define IVHD_SPECIAL_HPET 2 #define IVHD_FLAG_HT_TUN_EN_MASK 0x01 #define IVHD_FLAG_PASSPW_EN_MASK 0x02 @@ -690,6 +694,31 @@ static void __init set_dev_entry_from_acpi(struct amd_iommu *iommu, set_iommu_for_device(iommu, devid); } +static int add_special_device(u8 type, u8 id, u16 devid) +{ + struct devid_map *entry; + struct list_head *list; + + if (type != IVHD_SPECIAL_IOAPIC && type != IVHD_SPECIAL_HPET) + return -EINVAL; + + entry = kzalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) + return -ENOMEM; + + entry->id = id; + entry->devid = devid; + + if (type == IVHD_SPECIAL_IOAPIC) + list = &ioapic_map; + else + list = &hpet_map; + + list_add_tail(&entry->list, list); + + return 0; +} + /* * Reads the device exclusion range from ACPI and initialize IOMMU with * it @@ -717,7 +746,7 @@ static void __init set_device_exclusion_range(u16 devid, struct ivmd_header *m) * Takes a pointer to an AMD IOMMU entry in the ACPI table and * initializes the hardware and our data structures with it. */ -static void __init init_iommu_from_acpi(struct amd_iommu *iommu, +static int __init init_iommu_from_acpi(struct amd_iommu *iommu, struct ivhd_header *h) { u8 *p = (u8 *)h; @@ -867,12 +896,43 @@ static void __init init_iommu_from_acpi(struct amd_iommu *iommu, flags, ext_flags); } break; + case IVHD_DEV_SPECIAL: { + u8 handle, type; + const char *var; + u16 devid; + int ret; + + handle = e->ext & 0xff; + devid = (e->ext >> 8) & 0xffff; + type = (e->ext >> 24) & 0xff; + + if (type == IVHD_SPECIAL_IOAPIC) + var = "IOAPIC"; + else if (type == IVHD_SPECIAL_HPET) + var = "HPET"; + else + var = "UNKNOWN"; + + DUMP_printk(" DEV_SPECIAL(%s[%d])\t\tdevid: %02x:%02x.%x\n", + var, (int)handle, + PCI_BUS(devid), + PCI_SLOT(devid), + PCI_FUNC(devid)); + + set_dev_entry_from_acpi(iommu, devid, e->flags, 0); + ret = add_special_device(type, handle, devid); + if (ret) + return ret; + break; + } default: break; } p += ivhd_entry_length(p); } + + return 0; } /* Initializes the device->iommu mapping for the driver */ @@ -912,6 +972,8 @@ static void __init free_iommu_all(void) */ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) { + int ret; + spin_lock_init(&iommu->lock); /* Add IOMMU to internal data structures */ @@ -947,7 +1009,9 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) iommu->int_enabled = false; - init_iommu_from_acpi(iommu, h); + ret = init_iommu_from_acpi(iommu, h); + if (ret) + return ret; init_iommu_devices(iommu); return 0; diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h index d0dab865a8b8..4495b78b1296 100644 --- a/drivers/iommu/amd_iommu_types.h +++ b/drivers/iommu/amd_iommu_types.h @@ -565,6 +565,16 @@ struct amd_iommu { u32 stored_l2[0x83]; }; +struct devid_map { + struct list_head list; + u8 id; + u16 devid; +}; + +/* Map HPET and IOAPIC ids to the devid used by the IOMMU */ +extern struct list_head ioapic_map; +extern struct list_head hpet_map; + /* * List with all IOMMUs in the system. This list is not locked because it is * only written and read at driver initialization or suspend time @@ -678,6 +688,30 @@ static inline u16 calc_devid(u8 bus, u8 devfn) return (((u16)bus) << 8) | devfn; } +static inline int get_ioapic_devid(int id) +{ + struct devid_map *entry; + + list_for_each_entry(entry, &ioapic_map, list) { + if (entry->id == id) + return entry->devid; + } + + return -EINVAL; +} + +static inline int get_hpet_devid(int id) +{ + struct devid_map *entry; + + list_for_each_entry(entry, &hpet_map, list) { + if (entry->id == id) + return entry->devid; + } + + return -EINVAL; +} + #ifdef CONFIG_AMD_IOMMU_STATS struct __iommu_counter { From 05152a0494449f60031da683e6d4cd60d0882214 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 15 Jun 2012 16:53:51 +0200 Subject: [PATCH 10/26] iommu/amd: Add slab-cache for irq remapping tables The irq remapping tables for the AMD IOMMU need to be aligned on a 128 byte boundary. Create a seperate slab-cache to guarantee this alignment. Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 2 ++ drivers/iommu/amd_iommu_init.c | 23 +++++++++++++++++++++++ drivers/iommu/amd_iommu_types.h | 9 +++++++++ drivers/iommu/irq_remapping.h | 5 +++++ 4 files changed, 39 insertions(+) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 6f514831af1f..b6a8079ff7bb 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -95,6 +95,8 @@ struct iommu_cmd { u32 data[4]; }; +struct kmem_cache *amd_iommu_irq_cache; + static void update_domain(struct protection_domain *domain); static int __init alloc_passthrough_domain(void); diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c index 617985961195..073ed5d1e053 100644 --- a/drivers/iommu/amd_iommu_init.c +++ b/drivers/iommu/amd_iommu_init.c @@ -36,6 +36,7 @@ #include "amd_iommu_proto.h" #include "amd_iommu_types.h" +#include "irq_remapping.h" /* * definitions for the ACPI scanning code @@ -127,6 +128,7 @@ struct ivmd_header { } __attribute__((packed)); bool amd_iommu_dump; +bool amd_iommu_irq_remap __read_mostly; static bool amd_iommu_detected; static bool __initdata amd_iommu_disabled; @@ -1530,6 +1532,11 @@ static struct syscore_ops amd_iommu_syscore_ops = { static void __init free_on_init_error(void) { + if (amd_iommu_irq_cache) { + kmem_cache_destroy(amd_iommu_irq_cache); + amd_iommu_irq_cache = NULL; + } + amd_iommu_uninit_devices(); free_pages((unsigned long)amd_iommu_pd_alloc_bitmap, @@ -1669,6 +1676,19 @@ static int __init early_amd_iommu_init(void) if (ret) goto out; + if (amd_iommu_irq_remap) { + /* + * Interrupt remapping enabled, create kmem_cache for the + * remapping tables. + */ + amd_iommu_irq_cache = kmem_cache_create("irq_remap_cache", + MAX_IRQS_PER_TABLE * sizeof(u32), + IRQ_TABLE_ALIGNMENT, + 0, NULL); + if (!amd_iommu_irq_cache) + goto out; + } + ret = init_memory_definitions(ivrs_base); if (ret) goto out; @@ -1716,6 +1736,9 @@ static bool detect_ivrs(void) /* Make sure ACS will be enabled during PCI probe */ pci_request_acs(); + if (!disable_irq_remap) + amd_iommu_irq_remap = true; + return true; } diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h index 4495b78b1296..953cea80daa7 100644 --- a/drivers/iommu/amd_iommu_types.h +++ b/drivers/iommu/amd_iommu_types.h @@ -334,6 +334,15 @@ extern bool amd_iommu_np_cache; /* Only true if all IOMMUs support device IOTLBs */ extern bool amd_iommu_iotlb_sup; +#define MAX_IRQS_PER_TABLE 256 +#define IRQ_TABLE_ALIGNMENT 128 + +/* Interrupt remapping feature used? */ +extern bool amd_iommu_irq_remap; + +/* kmem_cache to get tables with 128 byte alignement */ +extern struct kmem_cache *amd_iommu_irq_cache; + /* * Make iterating over all IOMMUs easier */ diff --git a/drivers/iommu/irq_remapping.h b/drivers/iommu/irq_remapping.h index b12974cc1dfe..624d360d9be4 100644 --- a/drivers/iommu/irq_remapping.h +++ b/drivers/iommu/irq_remapping.h @@ -83,6 +83,11 @@ struct irq_remap_ops { extern struct irq_remap_ops intel_irq_remap_ops; +#else /* CONFIG_IRQ_REMAP */ + +#define irq_remapping_enabled 0 +#define disable_irq_remap 1 + #endif /* CONFIG_IRQ_REMAP */ #endif /* __IRQ_REMAPPING_H */ From 0ea2c422bc8da99d14baa46d4789861a4f8d4ec0 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 15 Jun 2012 18:05:20 +0200 Subject: [PATCH 11/26] iommu/amd: Allocate data structures to keep track of irq remapping tables To easily map device ids to interrupt remapping table entries a new lookup table is necessary. Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu_init.c | 16 ++++++++++++++++ drivers/iommu/amd_iommu_types.h | 9 +++++++++ 2 files changed, 25 insertions(+) diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c index 073ed5d1e053..2d923fe7c2a1 100644 --- a/drivers/iommu/amd_iommu_init.c +++ b/drivers/iommu/amd_iommu_init.c @@ -183,6 +183,12 @@ u16 *amd_iommu_alias_table; */ struct amd_iommu **amd_iommu_rlookup_table; +/* + * This table is used to find the irq remapping table for a given device id + * quickly. + */ +struct irq_remap_table **irq_lookup_table; + /* * AMD IOMMU allows up to 2^16 differend protection domains. This is a bitmap * to know which ones are already in use. @@ -1532,9 +1538,13 @@ static struct syscore_ops amd_iommu_syscore_ops = { static void __init free_on_init_error(void) { + free_pages((unsigned long)irq_lookup_table, + get_order(rlookup_table_size)); + if (amd_iommu_irq_cache) { kmem_cache_destroy(amd_iommu_irq_cache); amd_iommu_irq_cache = NULL; + } amd_iommu_uninit_devices(); @@ -1687,6 +1697,12 @@ static int __init early_amd_iommu_init(void) 0, NULL); if (!amd_iommu_irq_cache) goto out; + + irq_lookup_table = (void *)__get_free_pages( + GFP_KERNEL | __GFP_ZERO, + get_order(rlookup_table_size)); + if (!irq_lookup_table) + goto out; } ret = init_memory_definitions(ivrs_base); diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h index 953cea80daa7..1a7d48078d3f 100644 --- a/drivers/iommu/amd_iommu_types.h +++ b/drivers/iommu/amd_iommu_types.h @@ -175,6 +175,7 @@ #define DEV_ENTRY_EX 0x67 #define DEV_ENTRY_SYSMGT1 0x68 #define DEV_ENTRY_SYSMGT2 0x69 +#define DEV_ENTRY_IRQ_TBL_EN 0x80 #define DEV_ENTRY_INIT_PASS 0xb8 #define DEV_ENTRY_EINT_PASS 0xb9 #define DEV_ENTRY_NMI_PASS 0xba @@ -337,6 +338,14 @@ extern bool amd_iommu_iotlb_sup; #define MAX_IRQS_PER_TABLE 256 #define IRQ_TABLE_ALIGNMENT 128 +struct irq_remap_table { + spinlock_t lock; + unsigned min_index; + u32 *table; +}; + +extern struct irq_remap_table **irq_lookup_table; + /* Interrupt remapping feature used? */ extern bool amd_iommu_irq_remap; From eb1eb7ae65a9d32f6c16a90419caf01221f94734 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 5 Jul 2012 11:58:02 +0200 Subject: [PATCH 12/26] iommu/amd: Check if IOAPIC information is correct When the IOAPIC information provided in the IVRS table is not correct or not complete the system may not boot at all when interrupt remapping is enabled. So check if this information is correct and print out a firmware bug message when it is not. Signed-off-by: Joerg Roedel --- drivers/iommu/Kconfig | 2 +- drivers/iommu/amd_iommu_init.c | 27 ++++++++++++++++++++++++--- 2 files changed, 25 insertions(+), 4 deletions(-) diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index 9f69b561f5db..e39f9dbf297b 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -42,7 +42,7 @@ config AMD_IOMMU select PCI_PRI select PCI_PASID select IOMMU_API - depends on X86_64 && PCI && ACPI + depends on X86_64 && PCI && ACPI && X86_IO_APIC ---help--- With this option you can enable support for AMD IOMMU hardware in your system. An IOMMU is a hardware component which provides diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c index 2d923fe7c2a1..f8a222b0ac3f 100644 --- a/drivers/iommu/amd_iommu_init.c +++ b/drivers/iommu/amd_iommu_init.c @@ -33,6 +33,7 @@ #include #include #include +#include #include "amd_iommu_proto.h" #include "amd_iommu_types.h" @@ -1575,6 +1576,23 @@ static void __init free_on_init_error(void) #endif } +static bool __init check_ioapic_information(void) +{ + int idx; + + for (idx = 0; idx < nr_ioapics; idx++) { + int id = mpc_ioapic_id(idx); + + if (get_ioapic_devid(id) < 0) { + pr_err(FW_BUG "AMD-Vi: IO-APIC[%d] not in IVRS table\n", id); + pr_err("AMD-Vi: Disabling interrupt remapping due to BIOS Bug\n"); + return false; + } + } + + return true; +} + /* * This is the hardware init function for AMD IOMMU in the system. * This function is called either from amd_iommu_init or from the interrupt @@ -1661,9 +1679,6 @@ static int __init early_amd_iommu_init(void) if (amd_iommu_pd_alloc_bitmap == NULL) goto out; - /* init the device table */ - init_device_table(); - /* * let all alias entries point to itself */ @@ -1686,6 +1701,9 @@ static int __init early_amd_iommu_init(void) if (ret) goto out; + if (amd_iommu_irq_remap) + amd_iommu_irq_remap = check_ioapic_information(); + if (amd_iommu_irq_remap) { /* * Interrupt remapping enabled, create kmem_cache for the @@ -1709,6 +1727,9 @@ static int __init early_amd_iommu_init(void) if (ret) goto out; + /* init the device table */ + init_device_table(); + out: /* Don't leak any ACPI memory */ early_acpi_os_unmap_memory((char __iomem *)ivrs_base, ivrs_size); From 33f28c59e18d83fd2aeef258d211be66b9b80eb3 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 15 Jun 2012 18:03:31 +0200 Subject: [PATCH 13/26] iommu/amd: Split device table initialization into irq and dma part When the IOMMU is enabled very early (as with irq-remapping) some devices are still in BIOS hand. When dma is blocked early this can cause lots of IO_PAGE_FAULTs. So delay the DMA initialization and do it right before the dma_ops are initialized. To be secure, block all interrupts by default when irq-remapping is enabled in the system. They will be reenabled on demand later. Without blocking interrupts by default devices can issue arbitrary interrupts by sending special DMA packets to the CPU that look like MSI messages. This is especially dangerous when a device is assigned to a KVM guest because the guest can then DoS the host. Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu_init.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c index f8a222b0ac3f..8a7f1971d633 100644 --- a/drivers/iommu/amd_iommu_init.c +++ b/drivers/iommu/amd_iommu_init.c @@ -1381,7 +1381,7 @@ static int __init init_memory_definitions(struct acpi_table_header *table) * Init the device table to not allow DMA access for devices and * suppress all page faults */ -static void init_device_table(void) +static void init_device_table_dma(void) { u32 devid; @@ -1391,6 +1391,17 @@ static void init_device_table(void) } } +static void init_device_table(void) +{ + u32 devid; + + if (!amd_iommu_irq_remap) + return; + + for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) + set_dev_entry_bit(devid, DEV_ENTRY_IRQ_TBL_EN); +} + static void iommu_init_flags(struct amd_iommu *iommu) { iommu->acpi_flags & IVHD_FLAG_HT_TUN_EN_MASK ? @@ -1781,8 +1792,14 @@ static bool detect_ivrs(void) static int amd_iommu_init_dma(void) { + struct amd_iommu *iommu; int ret; + init_device_table_dma(); + + for_each_iommu(iommu) + iommu_flush_all_caches(iommu); + if (iommu_pass_through) ret = amd_iommu_init_passthrough(); else From f6fec00a9202987f1be2ae0a722518b742a9a799 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 21 Jun 2012 16:51:25 +0200 Subject: [PATCH 14/26] iommu/amd: Make sure IOMMU is not considered to translate itself The IVRS table usually includes the IOMMU device. But the IOMMU does never translate itself, so make sure the IOMMU driver knows this. Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu_init.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c index 8a7f1971d633..68b3305a126a 100644 --- a/drivers/iommu/amd_iommu_init.c +++ b/drivers/iommu/amd_iommu_init.c @@ -1021,6 +1021,13 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) ret = init_iommu_from_acpi(iommu, h); if (ret) return ret; + + /* + * Make sure IOMMU is not considered to translate itself. The IVRS + * table tells us so, but this is a lie! + */ + amd_iommu_rlookup_table[iommu->devid] = NULL; + init_iommu_devices(iommu); return 0; From 7ef2798deb695f112f25e348b199dca79eb1ea68 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 21 Jun 2012 16:46:04 +0200 Subject: [PATCH 15/26] iommu/amd: Add IRTE invalidation routine Add routine to invalidate the IOMMU cache for interupt translations. Also include the IRTE caches when flushing all IOMMU caches. Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 27 +++++++++++++++++++++++++++ drivers/iommu/amd_iommu_types.h | 3 +++ 2 files changed, 30 insertions(+) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index b6a8079ff7bb..e27b354c3e9b 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -902,6 +902,13 @@ static void build_inv_all(struct iommu_cmd *cmd) CMD_SET_TYPE(cmd, CMD_INV_ALL); } +static void build_inv_irt(struct iommu_cmd *cmd, u16 devid) +{ + memset(cmd, 0, sizeof(*cmd)); + cmd->data[0] = devid; + CMD_SET_TYPE(cmd, CMD_INV_IRT); +} + /* * Writes the command to the IOMMUs command buffer and informs the * hardware about the new command. @@ -1023,12 +1030,32 @@ static void iommu_flush_all(struct amd_iommu *iommu) iommu_completion_wait(iommu); } +static void iommu_flush_irt(struct amd_iommu *iommu, u16 devid) +{ + struct iommu_cmd cmd; + + build_inv_irt(&cmd, devid); + + iommu_queue_command(iommu, &cmd); +} + +static void iommu_flush_irt_all(struct amd_iommu *iommu) +{ + u32 devid; + + for (devid = 0; devid <= MAX_DEV_TABLE_ENTRIES; devid++) + iommu_flush_irt(iommu, devid); + + iommu_completion_wait(iommu); +} + void iommu_flush_all_caches(struct amd_iommu *iommu) { if (iommu_feature(iommu, FEATURE_IA)) { iommu_flush_all(iommu); } else { iommu_flush_dte_all(iommu); + iommu_flush_irt_all(iommu); iommu_flush_tlb_all(iommu); } } diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h index 1a7d48078d3f..90afccb1237b 100644 --- a/drivers/iommu/amd_iommu_types.h +++ b/drivers/iommu/amd_iommu_types.h @@ -152,6 +152,7 @@ #define CMD_INV_DEV_ENTRY 0x02 #define CMD_INV_IOMMU_PAGES 0x03 #define CMD_INV_IOTLB_PAGES 0x04 +#define CMD_INV_IRT 0x05 #define CMD_COMPLETE_PPR 0x07 #define CMD_INV_ALL 0x08 @@ -184,6 +185,8 @@ #define DEV_ENTRY_MODE_MASK 0x07 #define DEV_ENTRY_MODE_SHIFT 0x09 +#define MAX_DEV_TABLE_ENTRIES 0xffff + /* constants to configure the command buffer */ #define CMD_BUFFER_SIZE 8192 #define CMD_BUFFER_UNINITIALIZED 1 From 2b324506341cb3baf65f3b6a0f950dd7648938ac Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 21 Jun 2012 16:29:10 +0200 Subject: [PATCH 16/26] iommu/amd: Add routines to manage irq remapping tables Add routines to: * Alloc remapping tables and single entries from these tables * Change entries in the tables * Free entries in the table Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 228 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 228 insertions(+) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index e27b354c3e9b..4e429e781333 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -31,6 +31,12 @@ #include #include #include +#include +#include +#include +#include +#include +#include #include #include #include @@ -3773,3 +3779,225 @@ int amd_iommu_device_info(struct pci_dev *pdev, return 0; } EXPORT_SYMBOL(amd_iommu_device_info); + +#ifdef CONFIG_IRQ_REMAP + +/***************************************************************************** + * + * Interrupt Remapping Implementation + * + *****************************************************************************/ + +union irte { + u32 val; + struct { + u32 valid : 1, + no_fault : 1, + int_type : 3, + rq_eoi : 1, + dm : 1, + rsvd_1 : 1, + destination : 8, + vector : 8, + rsvd_2 : 8; + } fields; +}; + +#define DTE_IRQ_PHYS_ADDR_MASK (((1ULL << 45)-1) << 6) +#define DTE_IRQ_REMAP_INTCTL (2ULL << 60) +#define DTE_IRQ_TABLE_LEN (8ULL << 1) +#define DTE_IRQ_REMAP_ENABLE 1ULL + +static void set_dte_irq_entry(u16 devid, struct irq_remap_table *table) +{ + u64 dte; + + dte = amd_iommu_dev_table[devid].data[2]; + dte &= ~DTE_IRQ_PHYS_ADDR_MASK; + dte |= virt_to_phys(table->table); + dte |= DTE_IRQ_REMAP_INTCTL; + dte |= DTE_IRQ_TABLE_LEN; + dte |= DTE_IRQ_REMAP_ENABLE; + + amd_iommu_dev_table[devid].data[2] = dte; +} + +#define IRTE_ALLOCATED (~1U) + +static struct irq_remap_table *get_irq_table(u16 devid, bool ioapic) +{ + struct irq_remap_table *table = NULL; + struct amd_iommu *iommu; + unsigned long flags; + u16 alias; + + write_lock_irqsave(&amd_iommu_devtable_lock, flags); + + iommu = amd_iommu_rlookup_table[devid]; + if (!iommu) + goto out_unlock; + + table = irq_lookup_table[devid]; + if (table) + goto out; + + alias = amd_iommu_alias_table[devid]; + table = irq_lookup_table[alias]; + if (table) { + irq_lookup_table[devid] = table; + set_dte_irq_entry(devid, table); + iommu_flush_dte(iommu, devid); + goto out; + } + + /* Nothing there yet, allocate new irq remapping table */ + table = kzalloc(sizeof(*table), GFP_ATOMIC); + if (!table) + goto out; + + if (ioapic) + /* Keep the first 32 indexes free for IOAPIC interrupts */ + table->min_index = 32; + + table->table = kmem_cache_alloc(amd_iommu_irq_cache, GFP_ATOMIC); + if (!table->table) { + kfree(table); + goto out; + } + + memset(table->table, 0, MAX_IRQS_PER_TABLE * sizeof(u32)); + + if (ioapic) { + int i; + + for (i = 0; i < 32; ++i) + table->table[i] = IRTE_ALLOCATED; + } + + irq_lookup_table[devid] = table; + set_dte_irq_entry(devid, table); + iommu_flush_dte(iommu, devid); + if (devid != alias) { + irq_lookup_table[alias] = table; + set_dte_irq_entry(devid, table); + iommu_flush_dte(iommu, alias); + } + +out: + iommu_completion_wait(iommu); + +out_unlock: + write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); + + return table; +} + +static int alloc_irq_index(struct irq_cfg *cfg, u16 devid, int count) +{ + struct irq_remap_table *table; + unsigned long flags; + int index, c; + + table = get_irq_table(devid, false); + if (!table) + return -ENODEV; + + spin_lock_irqsave(&table->lock, flags); + + /* Scan table for free entries */ + for (c = 0, index = table->min_index; + index < MAX_IRQS_PER_TABLE; + ++index) { + if (table->table[index] == 0) + c += 1; + else + c = 0; + + if (c == count) { + struct irq_2_iommu *irte_info; + + for (; c != 0; --c) + table->table[index - c + 1] = IRTE_ALLOCATED; + + index -= count - 1; + + irte_info = &cfg->irq_2_iommu; + irte_info->sub_handle = devid; + irte_info->irte_index = index; + irte_info->iommu = (void *)cfg; + + goto out; + } + } + + index = -ENOSPC; + +out: + spin_unlock_irqrestore(&table->lock, flags); + + return index; +} + +static int get_irte(u16 devid, int index, union irte *irte) +{ + struct irq_remap_table *table; + unsigned long flags; + + table = get_irq_table(devid, false); + if (!table) + return -ENOMEM; + + spin_lock_irqsave(&table->lock, flags); + irte->val = table->table[index]; + spin_unlock_irqrestore(&table->lock, flags); + + return 0; +} + +static int modify_irte(u16 devid, int index, union irte irte) +{ + struct irq_remap_table *table; + struct amd_iommu *iommu; + unsigned long flags; + + iommu = amd_iommu_rlookup_table[devid]; + if (iommu == NULL) + return -EINVAL; + + table = get_irq_table(devid, false); + if (!table) + return -ENOMEM; + + spin_lock_irqsave(&table->lock, flags); + table->table[index] = irte.val; + spin_unlock_irqrestore(&table->lock, flags); + + iommu_flush_irt(iommu, devid); + iommu_completion_wait(iommu); + + return 0; +} + +static void free_irte(u16 devid, int index) +{ + struct irq_remap_table *table; + struct amd_iommu *iommu; + unsigned long flags; + + iommu = amd_iommu_rlookup_table[devid]; + if (iommu == NULL) + return; + + table = get_irq_table(devid, false); + if (!table) + return; + + spin_lock_irqsave(&table->lock, flags); + table->table[index] = 0; + spin_unlock_irqrestore(&table->lock, flags); + + iommu_flush_irt(iommu, devid); + iommu_completion_wait(iommu); +} + +#endif From 5527de744da61f55aaa9b804873923f95272e793 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 26 Jun 2012 11:17:32 +0200 Subject: [PATCH 17/26] iommu/amd: Add IOAPIC remapping routines Add the routine to setup interrupt remapping for ioapic interrupts. Also add a routine to change the affinity of an irq and to free an irq allocation for interrupt remapping. The last two functions will also be used for MSI interrupts. Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 126 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 126 insertions(+) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 4e429e781333..886b4c7fba72 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -4000,4 +4000,130 @@ static void free_irte(u16 devid, int index) iommu_completion_wait(iommu); } +static int setup_ioapic_entry(int irq, struct IO_APIC_route_entry *entry, + unsigned int destination, int vector, + struct io_apic_irq_attr *attr) +{ + struct irq_remap_table *table; + struct irq_2_iommu *irte_info; + struct irq_cfg *cfg; + union irte irte; + int ioapic_id; + int index; + int devid; + int ret; + + cfg = irq_get_chip_data(irq); + if (!cfg) + return -EINVAL; + + irte_info = &cfg->irq_2_iommu; + ioapic_id = mpc_ioapic_id(attr->ioapic); + devid = get_ioapic_devid(ioapic_id); + + if (devid < 0) + return devid; + + table = get_irq_table(devid, true); + if (table == NULL) + return -ENOMEM; + + index = attr->ioapic_pin; + + /* Setup IRQ remapping info */ + irte_info->sub_handle = devid; + irte_info->irte_index = index; + irte_info->iommu = (void *)cfg; + + /* Setup IRTE for IOMMU */ + irte.val = 0; + irte.fields.vector = vector; + irte.fields.int_type = apic->irq_delivery_mode; + irte.fields.destination = destination; + irte.fields.dm = apic->irq_dest_mode; + irte.fields.valid = 1; + + ret = modify_irte(devid, index, irte); + if (ret) + return ret; + + /* Setup IOAPIC entry */ + memset(entry, 0, sizeof(*entry)); + + entry->vector = index; + entry->mask = 0; + entry->trigger = attr->trigger; + entry->polarity = attr->polarity; + + /* + * Mask level triggered irqs. + * Use IRQ_DELAYED_DISABLE for edge triggered irqs. + */ + if (attr->trigger) + entry->mask = 1; + + return 0; +} + +static int set_affinity(struct irq_data *data, const struct cpumask *mask, + bool force) +{ + struct irq_2_iommu *irte_info; + unsigned int dest, irq; + struct irq_cfg *cfg; + union irte irte; + int err; + + if (!config_enabled(CONFIG_SMP)) + return -1; + + cfg = data->chip_data; + irq = data->irq; + irte_info = &cfg->irq_2_iommu; + + if (!cpumask_intersects(mask, cpu_online_mask)) + return -EINVAL; + + if (get_irte(irte_info->sub_handle, irte_info->irte_index, &irte)) + return -EBUSY; + + if (assign_irq_vector(irq, cfg, mask)) + return -EBUSY; + + err = apic->cpu_mask_to_apicid_and(cfg->domain, mask, &dest); + if (err) { + if (assign_irq_vector(irq, cfg, data->affinity)) + pr_err("AMD-Vi: Failed to recover vector for irq %d\n", irq); + return err; + } + + irte.fields.vector = cfg->vector; + irte.fields.destination = dest; + + modify_irte(irte_info->sub_handle, irte_info->irte_index, irte); + + if (cfg->move_in_progress) + send_cleanup_vector(cfg); + + cpumask_copy(data->affinity, mask); + + return 0; +} + +static int free_irq(int irq) +{ + struct irq_2_iommu *irte_info; + struct irq_cfg *cfg; + + cfg = irq_get_chip_data(irq); + if (!cfg) + return -EINVAL; + + irte_info = &cfg->irq_2_iommu; + + free_irte(irte_info->sub_handle, irte_info->irte_index); + + return 0; +} + #endif From 0b4d48cb2240d1686b3f0da234f0c9da18576503 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 26 Jun 2012 14:54:17 +0200 Subject: [PATCH 18/26] iommu/amd: Implement MSI routines for interrupt remapping Add routines to setup interrupt remapping for MSI interrupts. Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 74 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 886b4c7fba72..bcf48156868b 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -4126,4 +4126,78 @@ static int free_irq(int irq) return 0; } +static void compose_msi_msg(struct pci_dev *pdev, + unsigned int irq, unsigned int dest, + struct msi_msg *msg, u8 hpet_id) +{ + struct irq_2_iommu *irte_info; + struct irq_cfg *cfg; + union irte irte; + + cfg = irq_get_chip_data(irq); + if (!cfg) + return; + + irte_info = &cfg->irq_2_iommu; + + irte.val = 0; + irte.fields.vector = cfg->vector; + irte.fields.int_type = apic->irq_delivery_mode; + irte.fields.destination = dest; + irte.fields.dm = apic->irq_dest_mode; + irte.fields.valid = 1; + + modify_irte(irte_info->sub_handle, irte_info->irte_index, irte); + + msg->address_hi = MSI_ADDR_BASE_HI; + msg->address_lo = MSI_ADDR_BASE_LO; + msg->data = irte_info->irte_index; +} + +static int msi_alloc_irq(struct pci_dev *pdev, int irq, int nvec) +{ + struct irq_cfg *cfg; + int index; + u16 devid; + + if (!pdev) + return -EINVAL; + + cfg = irq_get_chip_data(irq); + if (!cfg) + return -EINVAL; + + devid = get_device_id(&pdev->dev); + index = alloc_irq_index(cfg, devid, nvec); + + return index < 0 ? MAX_IRQS_PER_TABLE : index; +} + +static int msi_setup_irq(struct pci_dev *pdev, unsigned int irq, + int index, int offset) +{ + struct irq_2_iommu *irte_info; + struct irq_cfg *cfg; + u16 devid; + + if (!pdev) + return -EINVAL; + + cfg = irq_get_chip_data(irq); + if (!cfg) + return -EINVAL; + + if (index >= MAX_IRQS_PER_TABLE) + return 0; + + devid = get_device_id(&pdev->dev); + irte_info = &cfg->irq_2_iommu; + + irte_info->sub_handle = devid; + irte_info->irte_index = index + offset; + irte_info->iommu = (void *)cfg; + + return 0; +} + #endif From d976195c93bce4512e990d170858d20d71c95c45 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 26 Jun 2012 16:00:08 +0200 Subject: [PATCH 19/26] iommu/amd: Add call-back routine for HPET MSI Add a routine to setup a HPET MSI interrupt for remapping. Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index bcf48156868b..aeb890c35f7e 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -4200,4 +4200,30 @@ static int msi_setup_irq(struct pci_dev *pdev, unsigned int irq, return 0; } +static int setup_hpet_msi(unsigned int irq, unsigned int id) +{ + struct irq_2_iommu *irte_info; + struct irq_cfg *cfg; + int index, devid; + + cfg = irq_get_chip_data(irq); + if (!cfg) + return -EINVAL; + + irte_info = &cfg->irq_2_iommu; + devid = get_hpet_devid(id); + if (devid < 0) + return devid; + + index = alloc_irq_index(cfg, devid, 1); + if (index < 0) + return index; + + irte_info->sub_handle = devid; + irte_info->irte_index = index; + irte_info->iommu = (void *)cfg; + + return 0; +} + #endif From 6b474b8224cdb473f19e8c925171e608499cc45d Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 26 Jun 2012 16:46:04 +0200 Subject: [PATCH 20/26] iommu/amd: Add initialization routines for AMD interrupt remapping Add the six routines required to setup interrupt remapping with the AMD IOMMU. Also put it all together into the AMD specific irq_remap_ops. Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 16 +++++++++++++ drivers/iommu/amd_iommu_init.c | 42 +++++++++++++++++++++++++++++++++ drivers/iommu/amd_iommu_proto.h | 8 +++++++ drivers/iommu/irq_remapping.h | 1 + 4 files changed, 67 insertions(+) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index aeb890c35f7e..33f144f7d676 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -45,6 +45,7 @@ #include "amd_iommu_proto.h" #include "amd_iommu_types.h" +#include "irq_remapping.h" #define CMD_SET_TYPE(cmd, t) ((cmd)->data[1] |= ((t) << 28)) @@ -4226,4 +4227,19 @@ static int setup_hpet_msi(unsigned int irq, unsigned int id) return 0; } +struct irq_remap_ops amd_iommu_irq_ops = { + .supported = amd_iommu_supported, + .prepare = amd_iommu_prepare, + .enable = amd_iommu_enable, + .disable = amd_iommu_disable, + .reenable = amd_iommu_reenable, + .enable_faulting = amd_iommu_enable_faulting, + .setup_ioapic_entry = setup_ioapic_entry, + .set_affinity = set_affinity, + .free_irq = free_irq, + .compose_msi_msg = compose_msi_msg, + .msi_alloc_irq = msi_alloc_irq, + .msi_setup_irq = msi_setup_irq, + .setup_hpet_msi = setup_hpet_msi, +}; #endif diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c index 68b3305a126a..d536d24b6f34 100644 --- a/drivers/iommu/amd_iommu_init.c +++ b/drivers/iommu/amd_iommu_init.c @@ -34,6 +34,7 @@ #include #include #include +#include #include "amd_iommu_proto.h" #include "amd_iommu_types.h" @@ -1897,7 +1898,48 @@ static int __init iommu_go_to_state(enum iommu_init_state state) return ret; } +#ifdef CONFIG_IRQ_REMAP +int __init amd_iommu_prepare(void) +{ + return iommu_go_to_state(IOMMU_ACPI_FINISHED); +} +int __init amd_iommu_supported(void) +{ + return amd_iommu_irq_remap ? 1 : 0; +} + +int __init amd_iommu_enable(void) +{ + int ret; + + ret = iommu_go_to_state(IOMMU_ENABLED); + if (ret) + return ret; + + irq_remapping_enabled = 1; + + return 0; +} + +void amd_iommu_disable(void) +{ + amd_iommu_suspend(); +} + +int amd_iommu_reenable(int mode) +{ + amd_iommu_resume(); + + return 0; +} + +int __init amd_iommu_enable_faulting(void) +{ + /* We enable MSI later when PCI is initialized */ + return 0; +} +#endif /* * This is the core init function for AMD IOMMU hardware in the system. diff --git a/drivers/iommu/amd_iommu_proto.h b/drivers/iommu/amd_iommu_proto.h index 1a7f41c6cc66..c294961bdd36 100644 --- a/drivers/iommu/amd_iommu_proto.h +++ b/drivers/iommu/amd_iommu_proto.h @@ -32,6 +32,14 @@ extern void amd_iommu_uninit_devices(void); extern void amd_iommu_init_notifier(void); extern void amd_iommu_init_api(void); +/* Needed for interrupt remapping */ +extern int amd_iommu_supported(void); +extern int amd_iommu_prepare(void); +extern int amd_iommu_enable(void); +extern void amd_iommu_disable(void); +extern int amd_iommu_reenable(int); +extern int amd_iommu_enable_faulting(void); + /* IOMMUv2 specific functions */ struct iommu_domain; diff --git a/drivers/iommu/irq_remapping.h b/drivers/iommu/irq_remapping.h index 624d360d9be4..95363acb583f 100644 --- a/drivers/iommu/irq_remapping.h +++ b/drivers/iommu/irq_remapping.h @@ -82,6 +82,7 @@ struct irq_remap_ops { }; extern struct irq_remap_ops intel_irq_remap_ops; +extern struct irq_remap_ops amd_iommu_irq_ops; #else /* CONFIG_IRQ_REMAP */ From d04e0ba3432e3fa68bd779330e0f79a9d979bc5f Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 2 Jul 2012 16:02:20 +0200 Subject: [PATCH 21/26] iommu/amd: Make sure irq remapping still works on dma init failure Do not deinitialize the AMD IOMMU driver completly when interrupt remapping is already in use but the initialization of the DMA layer fails for some reason. Make sure the IOMMU can still be used to remap interrupts. Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu_init.c | 40 ++++++++++++++++++++++++++-------- 1 file changed, 31 insertions(+), 9 deletions(-) diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c index d536d24b6f34..fee872f666f5 100644 --- a/drivers/iommu/amd_iommu_init.c +++ b/drivers/iommu/amd_iommu_init.c @@ -1399,6 +1399,16 @@ static void init_device_table_dma(void) } } +static void __init uninit_device_table_dma(void) +{ + u32 devid; + + for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) { + amd_iommu_dev_table[devid].data[0] = 0ULL; + amd_iommu_dev_table[devid].data[1] = 0ULL; + } +} + static void init_device_table(void) { u32 devid; @@ -1567,11 +1577,6 @@ static void __init free_on_init_error(void) } - amd_iommu_uninit_devices(); - - free_pages((unsigned long)amd_iommu_pd_alloc_bitmap, - get_order(MAX_DOMAIN_ID/8)); - free_pages((unsigned long)amd_iommu_rlookup_table, get_order(rlookup_table_size)); @@ -1583,8 +1588,6 @@ static void __init free_on_init_error(void) free_iommu_all(); - free_unity_maps(); - #ifdef CONFIG_GART_IOMMU /* * We failed to initialize the AMD IOMMU - try fallback to GART @@ -1612,6 +1615,16 @@ static bool __init check_ioapic_information(void) return true; } +static void __init free_dma_resources(void) +{ + amd_iommu_uninit_devices(); + + free_pages((unsigned long)amd_iommu_pd_alloc_bitmap, + get_order(MAX_DOMAIN_ID/8)); + + free_unity_maps(); +} + /* * This is the hardware init function for AMD IOMMU in the system. * This function is called either from amd_iommu_init or from the interrupt @@ -1952,8 +1965,17 @@ static int __init amd_iommu_init(void) ret = iommu_go_to_state(IOMMU_INITIALIZED); if (ret) { - disable_iommus(); - free_on_init_error(); + free_dma_resources(); + if (!irq_remapping_enabled) { + disable_iommus(); + free_on_init_error(); + } else { + struct amd_iommu *iommu; + + uninit_device_table_dma(); + for_each_iommu(iommu) + iommu_flush_all_caches(iommu); + } } return ret; From c18d2388170132ce015475f1fdab272b62c1e2da Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 26 Jun 2012 16:59:34 +0200 Subject: [PATCH 22/26] iommu/irq: Use amd_iommu_irq_ops if supported Finally enable interrupt remapping for AMD systems. Signed-off-by: Joerg Roedel --- drivers/iommu/irq_remapping.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/iommu/irq_remapping.c b/drivers/iommu/irq_remapping.c index 151690db692c..faf85d6e33fe 100644 --- a/drivers/iommu/irq_remapping.c +++ b/drivers/iommu/irq_remapping.c @@ -51,6 +51,11 @@ early_param("intremap", setup_irqremap); void __init setup_irq_remapping_ops(void) { remap_ops = &intel_irq_remap_ops; + +#ifdef CONFIG_AMD_IOMMU + if (amd_iommu_irq_ops.prepare() == 0) + remap_ops = &amd_iommu_irq_ops; +#endif } int irq_remapping_supported(void) From ebe60bbfdc3914bfb833595f36444a2f76a3cc0c Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 2 Jul 2012 18:36:03 +0200 Subject: [PATCH 23/26] iommu/amd: Print message to system log when irq remapping is enabled Print an indicator to dmesg to easily find out if interrupt remapping is enabled of a given system. Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu_init.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c index fee872f666f5..3d6e7c5471f2 100644 --- a/drivers/iommu/amd_iommu_init.c +++ b/drivers/iommu/amd_iommu_init.c @@ -1199,6 +1199,8 @@ static void print_iommu_info(void) } pr_cont("\n"); } + if (irq_remapping_enabled) + pr_info("AMD-Vi: Interrupt remapping enabled\n"); } static int __init amd_iommu_init_pci(void) From bdddadcb0783b965f524b6f43c475390f5f76765 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 2 Jul 2012 18:38:13 +0200 Subject: [PATCH 24/26] iommu/amd: Report irq remapping through IOMMU-API Report the availability of irq remapping through the IOMMU-API to allow KVM device passthrough again without additional module parameter overrides. Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 33f144f7d676..312dd4dd4791 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -3353,6 +3353,8 @@ static int amd_iommu_domain_has_cap(struct iommu_domain *domain, switch (cap) { case IOMMU_CAP_CACHE_COHERENCY: return 1; + case IOMMU_CAP_INTR_REMAP: + return irq_remapping_enabled; } return 0; From 821f0f68ca5fa93c757a892129392e751a7407a3 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 2 Oct 2012 11:34:40 +0300 Subject: [PATCH 25/26] iommu/amd: Fix possible use after free in get_irq_table() We should return NULL on error instead of the freed pointer. Signed-off-by: Dan Carpenter Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 312dd4dd4791..ff16c5ece735 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -3865,6 +3865,7 @@ static struct irq_remap_table *get_irq_table(u16 devid, bool ioapic) table->table = kmem_cache_alloc(amd_iommu_irq_cache, GFP_ATOMIC); if (!table->table) { kfree(table); + table = NULL; goto out; } From 44229b13bd74514ace76f7df40b388820a237288 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 2 Oct 2012 14:38:48 +0200 Subject: [PATCH 26/26] iommu/amd: Remove obsolete comment line IRQ_DELAYED_DISABLE does not exist anymore. So this comment is obsolete. Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index ff16c5ece735..b4e525908e66 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -4061,7 +4061,6 @@ static int setup_ioapic_entry(int irq, struct IO_APIC_route_entry *entry, /* * Mask level triggered irqs. - * Use IRQ_DELAYED_DISABLE for edge triggered irqs. */ if (attr->trigger) entry->mask = 1;