From 8365e42c0501f35e30f41426526dc4baa14f4e01 Mon Sep 17 00:00:00 2001 From: Besar Wicaksono Date: Mon, 21 Aug 2023 18:16:08 -0500 Subject: [PATCH 01/10] perf: arm_cspmu: Separate Arm and vendor module commit bfc653aa89cb05796d7b4e046600accb442c9b7a upstream. Arm Coresight PMU driver consists of main standard code and vendor backend code. Both are currently built as a single module. This patch adds vendor registration API to separate the two to keep things modular. The main driver requests each known backend module during initialization and defer device binding process. The backend module then registers an init callback to the main driver and continue the device driver binding process. Signed-off-by: Besar Wicaksono Reviewed-by: Suzuki K Poulose Reviewed-and-tested-by: Ilkka Koskinen Link: https://lore.kernel.org/r/20230821231608.50911-1-bwicaksono@nvidia.com Signed-off-by: Will Deacon Signed-off-by: Huang Cun --- drivers/perf/arm_cspmu/Kconfig | 9 +- drivers/perf/arm_cspmu/Makefile | 6 +- drivers/perf/arm_cspmu/arm_cspmu.c | 168 ++++++++++++++++++++------ drivers/perf/arm_cspmu/arm_cspmu.h | 25 +++- drivers/perf/arm_cspmu/nvidia_cspmu.c | 34 +++++- drivers/perf/arm_cspmu/nvidia_cspmu.h | 17 --- 6 files changed, 199 insertions(+), 60 deletions(-) delete mode 100644 drivers/perf/arm_cspmu/nvidia_cspmu.h diff --git a/drivers/perf/arm_cspmu/Kconfig b/drivers/perf/arm_cspmu/Kconfig index 25d25ded0983..d5f787d22234 100644 --- a/drivers/perf/arm_cspmu/Kconfig +++ b/drivers/perf/arm_cspmu/Kconfig @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 # -# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. config ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU tristate "ARM Coresight Architecture PMU" @@ -10,3 +10,10 @@ config ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU based on ARM CoreSight PMU architecture. Note that this PMU architecture does not have relationship with the ARM CoreSight Self-Hosted Tracing. + +config NVIDIA_CORESIGHT_PMU_ARCH_SYSTEM_PMU + tristate "NVIDIA Coresight Architecture PMU" + depends on ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU + help + Provides NVIDIA specific attributes for performance monitoring unit + (PMU) devices based on ARM CoreSight PMU architecture. diff --git a/drivers/perf/arm_cspmu/Makefile b/drivers/perf/arm_cspmu/Makefile index fedb17df982d..0309d2ff264a 100644 --- a/drivers/perf/arm_cspmu/Makefile +++ b/drivers/perf/arm_cspmu/Makefile @@ -1,6 +1,8 @@ -# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # # SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU) += arm_cspmu_module.o -arm_cspmu_module-y := arm_cspmu.o nvidia_cspmu.o +arm_cspmu_module-y := arm_cspmu.o + +obj-$(CONFIG_NVIDIA_CORESIGHT_PMU_ARCH_SYSTEM_PMU) += nvidia_cspmu.o diff --git a/drivers/perf/arm_cspmu/arm_cspmu.c b/drivers/perf/arm_cspmu/arm_cspmu.c index 9363c31f31b8..2167f9f156ec 100644 --- a/drivers/perf/arm_cspmu/arm_cspmu.c +++ b/drivers/perf/arm_cspmu/arm_cspmu.c @@ -16,7 +16,7 @@ * The user should refer to the vendor technical documentation to get details * about the supported events. * - * Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * */ @@ -26,11 +26,11 @@ #include #include #include +#include #include #include #include "arm_cspmu.h" -#include "nvidia_cspmu.h" #define PMUNAME "arm_cspmu" #define DRVNAME "arm-cs-arch-pmu" @@ -112,11 +112,10 @@ */ #define HILOHI_MAX_POLL 1000 -/* JEDEC-assigned JEP106 identification code */ -#define ARM_CSPMU_IMPL_ID_NVIDIA 0x36B - static unsigned long arm_cspmu_cpuhp_state; +static DEFINE_MUTEX(arm_cspmu_lock); + static struct acpi_apmt_node *arm_cspmu_apmt_node(struct device *dev) { return *(struct acpi_apmt_node **)dev_get_platdata(dev); @@ -373,27 +372,37 @@ static struct attribute_group arm_cspmu_cpumask_attr_group = { .attrs = arm_cspmu_cpumask_attrs, }; -struct impl_match { - u32 pmiidr; - u32 mask; - int (*impl_init_ops)(struct arm_cspmu *cspmu); +static struct arm_cspmu_impl_match impl_match[] = { + { + .module_name = "nvidia_cspmu", + .pmiidr_val = ARM_CSPMU_IMPL_ID_NVIDIA, + .pmiidr_mask = ARM_CSPMU_PMIIDR_IMPLEMENTER, + .module = NULL, + .impl_init_ops = NULL, + }, + {0} }; -static const struct impl_match impl_match[] = { - { - .pmiidr = ARM_CSPMU_IMPL_ID_NVIDIA, - .mask = ARM_CSPMU_PMIIDR_IMPLEMENTER, - .impl_init_ops = nv_cspmu_init_ops - }, - {} -}; +static struct arm_cspmu_impl_match *arm_cspmu_impl_match_get(u32 pmiidr) +{ + struct arm_cspmu_impl_match *match = impl_match; + + for (; match->pmiidr_val; match++) { + u32 mask = match->pmiidr_mask; + + if ((match->pmiidr_val & mask) == (pmiidr & mask)) + return match; + } + + return NULL; +} static int arm_cspmu_init_impl_ops(struct arm_cspmu *cspmu) { - int ret; + int ret = 0; struct arm_cspmu_impl_ops *impl_ops = &cspmu->impl.ops; struct acpi_apmt_node *apmt_node = arm_cspmu_apmt_node(cspmu->dev); - const struct impl_match *match = impl_match; + struct arm_cspmu_impl_match *match; /* * Get PMU implementer and product id from APMT node. @@ -405,17 +414,36 @@ static int arm_cspmu_init_impl_ops(struct arm_cspmu *cspmu) readl(cspmu->base0 + PMIIDR); /* Find implementer specific attribute ops. */ - for (; match->pmiidr; match++) { - const u32 mask = match->mask; + match = arm_cspmu_impl_match_get(cspmu->impl.pmiidr); - if ((match->pmiidr & mask) == (cspmu->impl.pmiidr & mask)) { - ret = match->impl_init_ops(cspmu); - if (ret) - return ret; + /* Load implementer module and initialize the callbacks. */ + if (match) { + mutex_lock(&arm_cspmu_lock); - break; + if (match->impl_init_ops) { + /* Prevent unload until PMU registration is done. */ + if (try_module_get(match->module)) { + cspmu->impl.module = match->module; + cspmu->impl.match = match; + ret = match->impl_init_ops(cspmu); + if (ret) + module_put(match->module); + } else { + WARN(1, "arm_cspmu failed to get module: %s\n", + match->module_name); + ret = -EINVAL; + } + } else { + request_module_nowait(match->module_name); + ret = -EPROBE_DEFER; } - } + + mutex_unlock(&arm_cspmu_lock); + + if (ret) + return ret; + } else + cspmu->impl.module = THIS_MODULE; /* Use default callbacks if implementer doesn't provide one. */ CHECK_DEFAULT_IMPL_OPS(impl_ops, get_event_attrs); @@ -478,11 +506,6 @@ arm_cspmu_alloc_attr_group(struct arm_cspmu *cspmu) struct attribute_group **attr_groups = NULL; struct device *dev = cspmu->dev; const struct arm_cspmu_impl_ops *impl_ops = &cspmu->impl.ops; - int ret; - - ret = arm_cspmu_init_impl_ops(cspmu); - if (ret) - return NULL; cspmu->identifier = impl_ops->get_identifier(cspmu); cspmu->name = impl_ops->get_name(cspmu); @@ -1152,7 +1175,7 @@ static int arm_cspmu_register_pmu(struct arm_cspmu *cspmu) cspmu->pmu = (struct pmu){ .task_ctx_nr = perf_invalid_context, - .module = THIS_MODULE, + .module = cspmu->impl.module, .pmu_enable = arm_cspmu_enable, .pmu_disable = arm_cspmu_disable, .event_init = arm_cspmu_event_init, @@ -1199,11 +1222,17 @@ static int arm_cspmu_device_probe(struct platform_device *pdev) if (ret) return ret; - ret = arm_cspmu_register_pmu(cspmu); + ret = arm_cspmu_init_impl_ops(cspmu); if (ret) return ret; - return 0; + ret = arm_cspmu_register_pmu(cspmu); + + /* Matches arm_cspmu_init_impl_ops() above. */ + if (cspmu->impl.module != THIS_MODULE) + module_put(cspmu->impl.module); + + return ret; } static int arm_cspmu_device_remove(struct platform_device *pdev) @@ -1303,6 +1332,75 @@ static void __exit arm_cspmu_exit(void) cpuhp_remove_multi_state(arm_cspmu_cpuhp_state); } +int arm_cspmu_impl_register(const struct arm_cspmu_impl_match *impl_match) +{ + struct arm_cspmu_impl_match *match; + int ret = 0; + + match = arm_cspmu_impl_match_get(impl_match->pmiidr_val); + + if (match) { + mutex_lock(&arm_cspmu_lock); + + if (!match->impl_init_ops) { + match->module = impl_match->module; + match->impl_init_ops = impl_match->impl_init_ops; + } else { + /* Broken match table may contain non-unique entries */ + WARN(1, "arm_cspmu backend already registered for module: %s, pmiidr: 0x%x, mask: 0x%x\n", + match->module_name, + match->pmiidr_val, + match->pmiidr_mask); + + ret = -EINVAL; + } + + mutex_unlock(&arm_cspmu_lock); + + if (!ret) + ret = driver_attach(&arm_cspmu_driver.driver); + } else { + pr_err("arm_cspmu reg failed, unable to find a match for pmiidr: 0x%x\n", + impl_match->pmiidr_val); + + ret = -EINVAL; + } + + return ret; +} +EXPORT_SYMBOL_GPL(arm_cspmu_impl_register); + +static int arm_cspmu_match_device(struct device *dev, const void *match) +{ + struct arm_cspmu *cspmu = platform_get_drvdata(to_platform_device(dev)); + + return (cspmu && cspmu->impl.match == match) ? 1 : 0; +} + +void arm_cspmu_impl_unregister(const struct arm_cspmu_impl_match *impl_match) +{ + struct device *dev; + struct arm_cspmu_impl_match *match; + + match = arm_cspmu_impl_match_get(impl_match->pmiidr_val); + + if (WARN_ON(!match)) + return; + + /* Unbind the driver from all matching backend devices. */ + while ((dev = driver_find_device(&arm_cspmu_driver.driver, NULL, + match, arm_cspmu_match_device))) + device_release_driver(dev); + + mutex_lock(&arm_cspmu_lock); + + match->module = NULL; + match->impl_init_ops = NULL; + + mutex_unlock(&arm_cspmu_lock); +} +EXPORT_SYMBOL_GPL(arm_cspmu_impl_unregister); + module_init(arm_cspmu_init); module_exit(arm_cspmu_exit); diff --git a/drivers/perf/arm_cspmu/arm_cspmu.h b/drivers/perf/arm_cspmu/arm_cspmu.h index 83df53d1c132..7936a90ded7f 100644 --- a/drivers/perf/arm_cspmu/arm_cspmu.h +++ b/drivers/perf/arm_cspmu/arm_cspmu.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 * * ARM CoreSight Architecture PMU driver. - * Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * */ @@ -69,6 +69,9 @@ #define ARM_CSPMU_PMIIDR_IMPLEMENTER GENMASK(11, 0) #define ARM_CSPMU_PMIIDR_PRODUCTID GENMASK(31, 20) +/* JEDEC-assigned JEP106 identification code */ +#define ARM_CSPMU_IMPL_ID_NVIDIA 0x36B + struct arm_cspmu; /* This tracks the events assigned to each counter in the PMU. */ @@ -106,9 +109,23 @@ struct arm_cspmu_impl_ops { struct attribute *attr, int unused); }; +/* Vendor/implementer registration parameter. */ +struct arm_cspmu_impl_match { + /* Backend module. */ + struct module *module; + const char *module_name; + /* PMIIDR value/mask. */ + u32 pmiidr_val; + u32 pmiidr_mask; + /* Callback to vendor backend to init arm_cspmu_impl::ops. */ + int (*impl_init_ops)(struct arm_cspmu *cspmu); +}; + /* Vendor/implementer descriptor. */ struct arm_cspmu_impl { u32 pmiidr; + struct module *module; + struct arm_cspmu_impl_match *match; struct arm_cspmu_impl_ops ops; void *ctx; }; @@ -147,4 +164,10 @@ ssize_t arm_cspmu_sysfs_format_show(struct device *dev, struct device_attribute *attr, char *buf); +/* Register vendor backend. */ +int arm_cspmu_impl_register(const struct arm_cspmu_impl_match *impl_match); + +/* Unregister vendor backend. */ +void arm_cspmu_impl_unregister(const struct arm_cspmu_impl_match *impl_match); + #endif /* __ARM_CSPMU_H__ */ diff --git a/drivers/perf/arm_cspmu/nvidia_cspmu.c b/drivers/perf/arm_cspmu/nvidia_cspmu.c index 72ef80caa3c8..0382b702f092 100644 --- a/drivers/perf/arm_cspmu/nvidia_cspmu.c +++ b/drivers/perf/arm_cspmu/nvidia_cspmu.c @@ -1,14 +1,15 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * */ /* Support for NVIDIA specific attributes. */ +#include #include -#include "nvidia_cspmu.h" +#include "arm_cspmu.h" #define NV_PCIE_PORT_COUNT 10ULL #define NV_PCIE_FILTER_ID_MASK GENMASK_ULL(NV_PCIE_PORT_COUNT - 1, 0) @@ -351,7 +352,7 @@ static char *nv_cspmu_format_name(const struct arm_cspmu *cspmu, return name; } -int nv_cspmu_init_ops(struct arm_cspmu *cspmu) +static int nv_cspmu_init_ops(struct arm_cspmu *cspmu) { u32 prodid; struct nv_cspmu_ctx *ctx; @@ -395,6 +396,31 @@ int nv_cspmu_init_ops(struct arm_cspmu *cspmu) return 0; } -EXPORT_SYMBOL_GPL(nv_cspmu_init_ops); + +/* Match all NVIDIA Coresight PMU devices */ +static const struct arm_cspmu_impl_match nv_cspmu_param = { + .pmiidr_val = ARM_CSPMU_IMPL_ID_NVIDIA, + .module = THIS_MODULE, + .impl_init_ops = nv_cspmu_init_ops +}; + +static int __init nvidia_cspmu_init(void) +{ + int ret; + + ret = arm_cspmu_impl_register(&nv_cspmu_param); + if (ret) + pr_err("nvidia_cspmu backend registration error: %d\n", ret); + + return ret; +} + +static void __exit nvidia_cspmu_exit(void) +{ + arm_cspmu_impl_unregister(&nv_cspmu_param); +} + +module_init(nvidia_cspmu_init); +module_exit(nvidia_cspmu_exit); MODULE_LICENSE("GPL v2"); diff --git a/drivers/perf/arm_cspmu/nvidia_cspmu.h b/drivers/perf/arm_cspmu/nvidia_cspmu.h deleted file mode 100644 index 71e18f0dc50b..000000000000 --- a/drivers/perf/arm_cspmu/nvidia_cspmu.h +++ /dev/null @@ -1,17 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 - * - * Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. - * - */ - -/* Support for NVIDIA specific attributes. */ - -#ifndef __NVIDIA_CSPMU_H__ -#define __NVIDIA_CSPMU_H__ - -#include "arm_cspmu.h" - -/* Allocate NVIDIA descriptor. */ -int nv_cspmu_init_ops(struct arm_cspmu *cspmu); - -#endif /* __NVIDIA_CSPMU_H__ */ From 2fddeaf9e6dc6af5fdaf9db71a9b0f3a84a35ab8 Mon Sep 17 00:00:00 2001 From: Ilkka Koskinen Date: Wed, 13 Sep 2023 16:39:38 -0700 Subject: [PATCH 02/10] perf: arm_cspmu: Split 64-bit write to 32-bit writes commit 8c282414ca6209977cb6d6cc66470ca2d1e56bf6 upstream. Split the 64-bit register accesses if 64-bit access is not supported by the PMU. Signed-off-by: Ilkka Koskinen Reviewed-by: Besar Wicaksono Reviewed-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20230913233941.9814-2-ilkka@os.amperecomputing.com Signed-off-by: Will Deacon Signed-off-by: Huang Cun --- drivers/perf/arm_cspmu/arm_cspmu.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/perf/arm_cspmu/arm_cspmu.c b/drivers/perf/arm_cspmu/arm_cspmu.c index 2167f9f156ec..22fc8ebd707b 100644 --- a/drivers/perf/arm_cspmu/arm_cspmu.c +++ b/drivers/perf/arm_cspmu/arm_cspmu.c @@ -722,7 +722,10 @@ static void arm_cspmu_write_counter(struct perf_event *event, u64 val) if (use_64b_counter_reg(cspmu)) { offset = counter_offset(sizeof(u64), event->hw.idx); - writeq(val, cspmu->base1 + offset); + if (cspmu->has_atomic_dword) + writeq(val, cspmu->base1 + offset); + else + lo_hi_writeq(val, cspmu->base1 + offset); } else { offset = counter_offset(sizeof(u32), event->hw.idx); From 773c54aa278ff5f301c8a4883e11e1e09c8d72b8 Mon Sep 17 00:00:00 2001 From: Ilkka Koskinen Date: Wed, 13 Sep 2023 16:39:39 -0700 Subject: [PATCH 03/10] perf: arm_cspmu: Support implementation specific filters commit 0a7603ab242e9bab530227cf0d0d344d4e334acc upstream. ARM Coresight PMU architecture specification [1] defines PMEVTYPER and PMEVFILT* registers as optional in Chapter 2.1. Moreover, implementers may choose to use PMIMPDEF* registers (offset: 0xD80-> 0xDFF) to filter the events. Add support for those by adding implementation specific filter callback function. [1] https://developer.arm.com/documentation/ihi0091/latest Signed-off-by: Ilkka Koskinen Reviewed-by: Besar Wicaksono Reviewed-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20230913233941.9814-3-ilkka@os.amperecomputing.com Signed-off-by: Will Deacon Signed-off-by: Huang Cun --- drivers/perf/arm_cspmu/arm_cspmu.c | 12 ++++++++---- drivers/perf/arm_cspmu/arm_cspmu.h | 3 +++ 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/perf/arm_cspmu/arm_cspmu.c b/drivers/perf/arm_cspmu/arm_cspmu.c index 22fc8ebd707b..865d458c0903 100644 --- a/drivers/perf/arm_cspmu/arm_cspmu.c +++ b/drivers/perf/arm_cspmu/arm_cspmu.c @@ -116,6 +116,9 @@ static unsigned long arm_cspmu_cpuhp_state; static DEFINE_MUTEX(arm_cspmu_lock); +static void arm_cspmu_set_ev_filter(struct arm_cspmu *cspmu, + struct hw_perf_event *hwc, u32 filter); + static struct acpi_apmt_node *arm_cspmu_apmt_node(struct device *dev) { return *(struct acpi_apmt_node **)dev_get_platdata(dev); @@ -454,6 +457,7 @@ static int arm_cspmu_init_impl_ops(struct arm_cspmu *cspmu) CHECK_DEFAULT_IMPL_OPS(impl_ops, event_type); CHECK_DEFAULT_IMPL_OPS(impl_ops, event_filter); CHECK_DEFAULT_IMPL_OPS(impl_ops, event_attr_is_visible); + CHECK_DEFAULT_IMPL_OPS(impl_ops, set_ev_filter); return 0; } @@ -818,9 +822,9 @@ static inline void arm_cspmu_set_event(struct arm_cspmu *cspmu, writel(hwc->config, cspmu->base0 + offset); } -static inline void arm_cspmu_set_ev_filter(struct arm_cspmu *cspmu, - struct hw_perf_event *hwc, - u32 filter) +static void arm_cspmu_set_ev_filter(struct arm_cspmu *cspmu, + struct hw_perf_event *hwc, + u32 filter) { u32 offset = PMEVFILTR + (4 * hwc->idx); @@ -852,7 +856,7 @@ static void arm_cspmu_start(struct perf_event *event, int pmu_flags) arm_cspmu_set_cc_filter(cspmu, filter); } else { arm_cspmu_set_event(cspmu, hwc); - arm_cspmu_set_ev_filter(cspmu, hwc, filter); + cspmu->impl.ops.set_ev_filter(cspmu, hwc, filter); } hwc->state = 0; diff --git a/drivers/perf/arm_cspmu/arm_cspmu.h b/drivers/perf/arm_cspmu/arm_cspmu.h index 7936a90ded7f..e0cca5b4aab9 100644 --- a/drivers/perf/arm_cspmu/arm_cspmu.h +++ b/drivers/perf/arm_cspmu/arm_cspmu.h @@ -104,6 +104,9 @@ struct arm_cspmu_impl_ops { u32 (*event_type)(const struct perf_event *event); /* Decode filter value from configs */ u32 (*event_filter)(const struct perf_event *event); + /* Set event filter */ + void (*set_ev_filter)(struct arm_cspmu *cspmu, + struct hw_perf_event *hwc, u32 filter); /* Hide/show unsupported events */ umode_t (*event_attr_is_visible)(struct kobject *kobj, struct attribute *attr, int unused); From e46dc8f19f4a1e71a102d6db0f927bd58717ad25 Mon Sep 17 00:00:00 2001 From: Ilkka Koskinen Date: Wed, 13 Sep 2023 16:39:40 -0700 Subject: [PATCH 04/10] perf: arm_cspmu: Support implementation specific validation commit 647d5c5a9e7672e285f54f0e141ee759e69382f2 upstream. Some platforms may use e.g. different filtering mechanism and, thus, may need different way to validate the events and group. Signed-off-by: Ilkka Koskinen Reviewed-by: Jonathan Cameron Reviewed-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20230913233941.9814-4-ilkka@os.amperecomputing.com Signed-off-by: Will Deacon Signed-off-by: Huang Cun --- drivers/perf/arm_cspmu/arm_cspmu.c | 8 +++++++- drivers/perf/arm_cspmu/arm_cspmu.h | 3 +++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/perf/arm_cspmu/arm_cspmu.c b/drivers/perf/arm_cspmu/arm_cspmu.c index 865d458c0903..dd18aede169a 100644 --- a/drivers/perf/arm_cspmu/arm_cspmu.c +++ b/drivers/perf/arm_cspmu/arm_cspmu.c @@ -576,7 +576,7 @@ static void arm_cspmu_disable(struct pmu *pmu) static int arm_cspmu_get_event_idx(struct arm_cspmu_hw_events *hw_events, struct perf_event *event) { - int idx; + int idx, ret; struct arm_cspmu *cspmu = to_arm_cspmu(event->pmu); if (supports_cycle_counter(cspmu)) { @@ -610,6 +610,12 @@ static int arm_cspmu_get_event_idx(struct arm_cspmu_hw_events *hw_events, if (idx >= cspmu->num_logical_ctrs) return -EAGAIN; + if (cspmu->impl.ops.validate_event) { + ret = cspmu->impl.ops.validate_event(cspmu, event); + if (ret) + return ret; + } + set_bit(idx, hw_events->used_ctrs); return idx; diff --git a/drivers/perf/arm_cspmu/arm_cspmu.h b/drivers/perf/arm_cspmu/arm_cspmu.h index e0cca5b4aab9..a30c8372214c 100644 --- a/drivers/perf/arm_cspmu/arm_cspmu.h +++ b/drivers/perf/arm_cspmu/arm_cspmu.h @@ -107,6 +107,9 @@ struct arm_cspmu_impl_ops { /* Set event filter */ void (*set_ev_filter)(struct arm_cspmu *cspmu, struct hw_perf_event *hwc, u32 filter); + /* Implementation specific event validation */ + int (*validate_event)(struct arm_cspmu *cspmu, + struct perf_event *event); /* Hide/show unsupported events */ umode_t (*event_attr_is_visible)(struct kobject *kobj, struct attribute *attr, int unused); From 34dc55de64c66806fff3909ce54cb4233c03fcd4 Mon Sep 17 00:00:00 2001 From: Ilkka Koskinen Date: Wed, 13 Sep 2023 16:39:41 -0700 Subject: [PATCH 05/10] perf: arm_cspmu: ampere_cspmu: Add support for Ampere SoC PMU commit 53a810ad3c5cde674cac71e629e6d10bfc9d838c upstream. Ampere SoC PMU follows CoreSight PMU architecture. It uses implementation specific registers to filter events rather than PMEVFILTnR registers. Signed-off-by: Ilkka Koskinen Link: https://lore.kernel.org/r/20230913233941.9814-5-ilkka@os.amperecomputing.com [will: Include linux/io.h in ampere_cspmu.c for writel()] Signed-off-by: Will Deacon Signed-off-by: Huang Cun --- .../admin-guide/perf/ampere_cspmu.rst | 29 ++ drivers/perf/arm_cspmu/Kconfig | 10 + drivers/perf/arm_cspmu/Makefile | 2 + drivers/perf/arm_cspmu/ampere_cspmu.c | 272 ++++++++++++++++++ drivers/perf/arm_cspmu/arm_cspmu.c | 8 + drivers/perf/arm_cspmu/arm_cspmu.h | 1 + 6 files changed, 322 insertions(+) create mode 100644 Documentation/admin-guide/perf/ampere_cspmu.rst create mode 100644 drivers/perf/arm_cspmu/ampere_cspmu.c diff --git a/Documentation/admin-guide/perf/ampere_cspmu.rst b/Documentation/admin-guide/perf/ampere_cspmu.rst new file mode 100644 index 000000000000..94f93f5aee6c --- /dev/null +++ b/Documentation/admin-guide/perf/ampere_cspmu.rst @@ -0,0 +1,29 @@ +.. SPDX-License-Identifier: GPL-2.0 + +============================================ +Ampere SoC Performance Monitoring Unit (PMU) +============================================ + +Ampere SoC PMU is a generic PMU IP that follows Arm CoreSight PMU architecture. +Therefore, the driver is implemented as a submodule of arm_cspmu driver. At the +first phase it's used for counting MCU events on AmpereOne. + + +MCU PMU events +-------------- + +The PMU driver supports setting filters for "rank", "bank", and "threshold". +Note, that the filters are per PMU instance rather than per event. + + +Example for perf tool use:: + + / # perf list ampere + + ampere_mcu_pmu_0/act_sent/ [Kernel PMU event] + <...> + ampere_mcu_pmu_1/rd_sent/ [Kernel PMU event] + <...> + + / # perf stat -a -e ampere_mcu_pmu_0/act_sent,bank=5,rank=3,threshold=2/,ampere_mcu_pmu_1/rd_sent/ \ + sleep 1 diff --git a/drivers/perf/arm_cspmu/Kconfig b/drivers/perf/arm_cspmu/Kconfig index d5f787d22234..6f4e28fc84a2 100644 --- a/drivers/perf/arm_cspmu/Kconfig +++ b/drivers/perf/arm_cspmu/Kconfig @@ -17,3 +17,13 @@ config NVIDIA_CORESIGHT_PMU_ARCH_SYSTEM_PMU help Provides NVIDIA specific attributes for performance monitoring unit (PMU) devices based on ARM CoreSight PMU architecture. + +config AMPERE_CORESIGHT_PMU_ARCH_SYSTEM_PMU + tristate "Ampere Coresight Architecture PMU" + depends on ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU + help + Provides Ampere specific attributes for performance monitoring unit + (PMU) devices based on ARM CoreSight PMU architecture. + + In the first phase, the driver enables support on MCU PMU used in + AmpereOne SoC family. diff --git a/drivers/perf/arm_cspmu/Makefile b/drivers/perf/arm_cspmu/Makefile index 0309d2ff264a..220a734efd54 100644 --- a/drivers/perf/arm_cspmu/Makefile +++ b/drivers/perf/arm_cspmu/Makefile @@ -3,6 +3,8 @@ # SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU) += arm_cspmu_module.o + arm_cspmu_module-y := arm_cspmu.o obj-$(CONFIG_NVIDIA_CORESIGHT_PMU_ARCH_SYSTEM_PMU) += nvidia_cspmu.o +obj-$(CONFIG_AMPERE_CORESIGHT_PMU_ARCH_SYSTEM_PMU) += ampere_cspmu.o diff --git a/drivers/perf/arm_cspmu/ampere_cspmu.c b/drivers/perf/arm_cspmu/ampere_cspmu.c new file mode 100644 index 000000000000..f146a455e838 --- /dev/null +++ b/drivers/perf/arm_cspmu/ampere_cspmu.c @@ -0,0 +1,272 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Ampere SoC PMU (Performance Monitor Unit) + * + * Copyright (c) 2023, Ampere Computing LLC + */ +#include +#include +#include + +#include "arm_cspmu.h" + +#define PMAUXR0 0xD80 +#define PMAUXR1 0xD84 +#define PMAUXR2 0xD88 +#define PMAUXR3 0xD8C + +#define to_ampere_cspmu_ctx(cspmu) ((struct ampere_cspmu_ctx *)(cspmu->impl.ctx)) + +struct ampere_cspmu_ctx { + const char *name; + struct attribute **event_attr; + struct attribute **format_attr; +}; + +static DEFINE_IDA(mcu_pmu_ida); + +#define SOC_PMU_EVENT_ATTR_EXTRACTOR(_name, _config, _start, _end) \ + static inline u32 get_##_name(const struct perf_event *event) \ + { \ + return FIELD_GET(GENMASK_ULL(_end, _start), \ + event->attr._config); \ + } \ + +SOC_PMU_EVENT_ATTR_EXTRACTOR(event, config, 0, 8); +SOC_PMU_EVENT_ATTR_EXTRACTOR(threshold, config1, 0, 7); +SOC_PMU_EVENT_ATTR_EXTRACTOR(rank, config1, 8, 23); +SOC_PMU_EVENT_ATTR_EXTRACTOR(bank, config1, 24, 55); + +static struct attribute *ampereone_mcu_pmu_event_attrs[] = { + ARM_CSPMU_EVENT_ATTR(cycle_count, 0x00), + ARM_CSPMU_EVENT_ATTR(act_sent, 0x01), + ARM_CSPMU_EVENT_ATTR(pre_sent, 0x02), + ARM_CSPMU_EVENT_ATTR(rd_sent, 0x03), + ARM_CSPMU_EVENT_ATTR(rda_sent, 0x04), + ARM_CSPMU_EVENT_ATTR(wr_sent, 0x05), + ARM_CSPMU_EVENT_ATTR(wra_sent, 0x06), + ARM_CSPMU_EVENT_ATTR(pd_entry_vld, 0x07), + ARM_CSPMU_EVENT_ATTR(sref_entry_vld, 0x08), + ARM_CSPMU_EVENT_ATTR(prea_sent, 0x09), + ARM_CSPMU_EVENT_ATTR(pre_sb_sent, 0x0a), + ARM_CSPMU_EVENT_ATTR(ref_sent, 0x0b), + ARM_CSPMU_EVENT_ATTR(rfm_sent, 0x0c), + ARM_CSPMU_EVENT_ATTR(ref_sb_sent, 0x0d), + ARM_CSPMU_EVENT_ATTR(rfm_sb_sent, 0x0e), + ARM_CSPMU_EVENT_ATTR(rd_rda_sent, 0x0f), + ARM_CSPMU_EVENT_ATTR(wr_wra_sent, 0x10), + ARM_CSPMU_EVENT_ATTR(raw_hazard, 0x11), + ARM_CSPMU_EVENT_ATTR(war_hazard, 0x12), + ARM_CSPMU_EVENT_ATTR(waw_hazard, 0x13), + ARM_CSPMU_EVENT_ATTR(rar_hazard, 0x14), + ARM_CSPMU_EVENT_ATTR(raw_war_waw_hazard, 0x15), + ARM_CSPMU_EVENT_ATTR(hprd_lprd_wr_req_vld, 0x16), + ARM_CSPMU_EVENT_ATTR(lprd_req_vld, 0x17), + ARM_CSPMU_EVENT_ATTR(hprd_req_vld, 0x18), + ARM_CSPMU_EVENT_ATTR(hprd_lprd_req_vld, 0x19), + ARM_CSPMU_EVENT_ATTR(prefetch_tgt, 0x1a), + ARM_CSPMU_EVENT_ATTR(wr_req_vld, 0x1b), + ARM_CSPMU_EVENT_ATTR(partial_wr_req_vld, 0x1c), + ARM_CSPMU_EVENT_ATTR(rd_retry, 0x1d), + ARM_CSPMU_EVENT_ATTR(wr_retry, 0x1e), + ARM_CSPMU_EVENT_ATTR(retry_gnt, 0x1f), + ARM_CSPMU_EVENT_ATTR(rank_change, 0x20), + ARM_CSPMU_EVENT_ATTR(dir_change, 0x21), + ARM_CSPMU_EVENT_ATTR(rank_dir_change, 0x22), + ARM_CSPMU_EVENT_ATTR(rank_active, 0x23), + ARM_CSPMU_EVENT_ATTR(rank_idle, 0x24), + ARM_CSPMU_EVENT_ATTR(rank_pd, 0x25), + ARM_CSPMU_EVENT_ATTR(rank_sref, 0x26), + ARM_CSPMU_EVENT_ATTR(queue_fill_gt_thresh, 0x27), + ARM_CSPMU_EVENT_ATTR(queue_rds_gt_thresh, 0x28), + ARM_CSPMU_EVENT_ATTR(queue_wrs_gt_thresh, 0x29), + ARM_CSPMU_EVENT_ATTR(phy_updt_complt, 0x2a), + ARM_CSPMU_EVENT_ATTR(tz_fail, 0x2b), + ARM_CSPMU_EVENT_ATTR(dram_errc, 0x2c), + ARM_CSPMU_EVENT_ATTR(dram_errd, 0x2d), + ARM_CSPMU_EVENT_ATTR(read_data_return, 0x32), + ARM_CSPMU_EVENT_ATTR(chi_wr_data_delta, 0x33), + ARM_CSPMU_EVENT_ATTR(zq_start, 0x34), + ARM_CSPMU_EVENT_ATTR(zq_latch, 0x35), + ARM_CSPMU_EVENT_ATTR(wr_fifo_full, 0x36), + ARM_CSPMU_EVENT_ATTR(info_fifo_full, 0x37), + ARM_CSPMU_EVENT_ATTR(cmd_fifo_full, 0x38), + ARM_CSPMU_EVENT_ATTR(dfi_nop, 0x39), + ARM_CSPMU_EVENT_ATTR(dfi_cmd, 0x3a), + ARM_CSPMU_EVENT_ATTR(rd_run_len, 0x3b), + ARM_CSPMU_EVENT_ATTR(wr_run_len, 0x3c), + + ARM_CSPMU_EVENT_ATTR(cycles, ARM_CSPMU_EVT_CYCLES_DEFAULT), + NULL, +}; + +static struct attribute *ampereone_mcu_format_attrs[] = { + ARM_CSPMU_FORMAT_EVENT_ATTR, + ARM_CSPMU_FORMAT_ATTR(threshold, "config1:0-7"), + ARM_CSPMU_FORMAT_ATTR(rank, "config1:8-23"), + ARM_CSPMU_FORMAT_ATTR(bank, "config1:24-55"), + NULL, +}; + +static struct attribute ** +ampere_cspmu_get_event_attrs(const struct arm_cspmu *cspmu) +{ + const struct ampere_cspmu_ctx *ctx = to_ampere_cspmu_ctx(cspmu); + + return ctx->event_attr; +} + +static struct attribute ** +ampere_cspmu_get_format_attrs(const struct arm_cspmu *cspmu) +{ + const struct ampere_cspmu_ctx *ctx = to_ampere_cspmu_ctx(cspmu); + + return ctx->format_attr; +} + +static const char * +ampere_cspmu_get_name(const struct arm_cspmu *cspmu) +{ + const struct ampere_cspmu_ctx *ctx = to_ampere_cspmu_ctx(cspmu); + + return ctx->name; +} + +static u32 ampere_cspmu_event_filter(const struct perf_event *event) +{ + /* + * PMEVFILTR or PMCCFILTR aren't used in Ampere SoC PMU but are marked + * as RES0. Make sure, PMCCFILTR is written zero. + */ + return 0; +} + +static void ampere_cspmu_set_ev_filter(struct arm_cspmu *cspmu, + struct hw_perf_event *hwc, + u32 filter) +{ + struct perf_event *event; + unsigned int idx; + u32 threshold, rank, bank; + + /* + * At this point, all the events have the same filter settings. + * Therefore, take the first event and use its configuration. + */ + idx = find_first_bit(cspmu->hw_events.used_ctrs, + cspmu->cycle_counter_logical_idx); + + event = cspmu->hw_events.events[idx]; + + threshold = get_threshold(event); + rank = get_rank(event); + bank = get_bank(event); + + writel(threshold, cspmu->base0 + PMAUXR0); + writel(rank, cspmu->base0 + PMAUXR1); + writel(bank, cspmu->base0 + PMAUXR2); +} + +static int ampere_cspmu_validate_configs(struct perf_event *event, + struct perf_event *event2) +{ + if (get_threshold(event) != get_threshold(event2) || + get_rank(event) != get_rank(event2) || + get_bank(event) != get_bank(event2)) + return -EINVAL; + + return 0; +} + +static int ampere_cspmu_validate_event(struct arm_cspmu *cspmu, + struct perf_event *new) +{ + struct perf_event *curr, *leader = new->group_leader; + unsigned int idx; + int ret; + + ret = ampere_cspmu_validate_configs(new, leader); + if (ret) + return ret; + + /* We compare the global filter settings to the existing events */ + idx = find_first_bit(cspmu->hw_events.used_ctrs, + cspmu->cycle_counter_logical_idx); + + /* This is the first event, thus any configuration is fine */ + if (idx == cspmu->cycle_counter_logical_idx) + return 0; + + curr = cspmu->hw_events.events[idx]; + + return ampere_cspmu_validate_configs(curr, new); +} + +static char *ampere_cspmu_format_name(const struct arm_cspmu *cspmu, + const char *name_pattern) +{ + struct device *dev = cspmu->dev; + int id; + + id = ida_alloc(&mcu_pmu_ida, GFP_KERNEL); + if (id < 0) + return ERR_PTR(id); + + return devm_kasprintf(dev, GFP_KERNEL, name_pattern, id); +} + +static int ampere_cspmu_init_ops(struct arm_cspmu *cspmu) +{ + struct device *dev = cspmu->dev; + struct ampere_cspmu_ctx *ctx; + struct arm_cspmu_impl_ops *impl_ops = &cspmu->impl.ops; + + ctx = devm_kzalloc(dev, sizeof(struct ampere_cspmu_ctx), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + + ctx->event_attr = ampereone_mcu_pmu_event_attrs; + ctx->format_attr = ampereone_mcu_format_attrs; + ctx->name = ampere_cspmu_format_name(cspmu, "ampere_mcu_pmu_%d"); + if (IS_ERR_OR_NULL(ctx->name)) + return ctx->name ? PTR_ERR(ctx->name) : -ENOMEM; + + cspmu->impl.ctx = ctx; + + impl_ops->event_filter = ampere_cspmu_event_filter; + impl_ops->set_ev_filter = ampere_cspmu_set_ev_filter; + impl_ops->validate_event = ampere_cspmu_validate_event; + impl_ops->get_name = ampere_cspmu_get_name; + impl_ops->get_event_attrs = ampere_cspmu_get_event_attrs; + impl_ops->get_format_attrs = ampere_cspmu_get_format_attrs; + + return 0; +} + +/* Match all Ampere Coresight PMU devices */ +static const struct arm_cspmu_impl_match ampere_cspmu_param = { + .pmiidr_val = ARM_CSPMU_IMPL_ID_AMPERE, + .module = THIS_MODULE, + .impl_init_ops = ampere_cspmu_init_ops +}; + +static int __init ampere_cspmu_init(void) +{ + int ret; + + ret = arm_cspmu_impl_register(&ere_cspmu_param); + if (ret) + pr_err("ampere_cspmu backend registration error: %d\n", ret); + + return ret; +} + +static void __exit ampere_cspmu_exit(void) +{ + arm_cspmu_impl_unregister(&ere_cspmu_param); +} + +module_init(ampere_cspmu_init); +module_exit(ampere_cspmu_exit); + +MODULE_LICENSE("GPL"); diff --git a/drivers/perf/arm_cspmu/arm_cspmu.c b/drivers/perf/arm_cspmu/arm_cspmu.c index dd18aede169a..9f478f2c8554 100644 --- a/drivers/perf/arm_cspmu/arm_cspmu.c +++ b/drivers/perf/arm_cspmu/arm_cspmu.c @@ -383,6 +383,14 @@ static struct arm_cspmu_impl_match impl_match[] = { .module = NULL, .impl_init_ops = NULL, }, + { + .module_name = "ampere_cspmu", + .pmiidr_val = ARM_CSPMU_IMPL_ID_AMPERE, + .pmiidr_mask = ARM_CSPMU_PMIIDR_IMPLEMENTER, + .module = NULL, + .impl_init_ops = NULL, + }, + {0} }; diff --git a/drivers/perf/arm_cspmu/arm_cspmu.h b/drivers/perf/arm_cspmu/arm_cspmu.h index a30c8372214c..2fe723555a6b 100644 --- a/drivers/perf/arm_cspmu/arm_cspmu.h +++ b/drivers/perf/arm_cspmu/arm_cspmu.h @@ -71,6 +71,7 @@ /* JEDEC-assigned JEP106 identification code */ #define ARM_CSPMU_IMPL_ID_NVIDIA 0x36B +#define ARM_CSPMU_IMPL_ID_AMPERE 0xA16 struct arm_cspmu; From 7c2a440c1da7c97a1977260ad65193084c48e976 Mon Sep 17 00:00:00 2001 From: Ilkka Koskinen Date: Thu, 12 Oct 2023 00:41:03 -0700 Subject: [PATCH 06/10] docs/perf: Add ampere_cspmu to toctree to fix a build warning commit 0abe7f61c28d62ee0530c31589e6ea209aa82cbd upstream. Add ampere_cspmu to toctree in order to address the following warning produced when building documents: Documentation/admin-guide/perf/ampere_cspmu.rst: WARNING: document isn't included in any toctree Reported-by: Stephen Rothwell Closes: https://lore.kernel.org/all/20231011172250.5a6498e5@canb.auug.org.au/ Fixes: 53a810ad3c5c ("perf: arm_cspmu: ampere_cspmu: Add support for Ampere SoC PMU") Signed-off-by: Ilkka Koskinen Link: https://lore.kernel.org/r/20231012074103.3772114-1-ilkka@os.amperecomputing.com Signed-off-by: Will Deacon Signed-off-by: Huang Cun --- Documentation/admin-guide/perf/index.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/admin-guide/perf/index.rst b/Documentation/admin-guide/perf/index.rst index f60be04e4e33..a2e6f2c81146 100644 --- a/Documentation/admin-guide/perf/index.rst +++ b/Documentation/admin-guide/perf/index.rst @@ -22,3 +22,4 @@ Performance monitor support nvidia-pmu meson-ddr-pmu cxl + ampere_cspmu From 3a23b1b952c10b86b390170c0f971a2faf55bbcc Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Wed, 20 Sep 2023 08:01:32 +0000 Subject: [PATCH 07/10] arm64: tlbflush: Rename MAX_TLBI_OPS commit ec1c3b9ff16082f880b304be40992568f4eee6a7 upstream. Perhaps unsurprisingly, I-cache invalidations suffer from performance issues similar to TLB invalidations on certain systems. TLB and I-cache maintenance all result in DVM on the mesh, which is where the real bottleneck lies. Rename the heuristic to point the finger at DVM, such that it may be reused for limiting I-cache invalidations. Reviewed-by: Gavin Shan Tested-by: Gavin Shan Acked-by: Will Deacon Link: https://lore.kernel.org/r/20230920080133.944717-2-oliver.upton@linux.dev Signed-off-by: Oliver Upton Signed-off-by: Huang Cun --- arch/arm64/include/asm/tlbflush.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h index b149cf9f91bc..3431d37e5054 100644 --- a/arch/arm64/include/asm/tlbflush.h +++ b/arch/arm64/include/asm/tlbflush.h @@ -333,7 +333,7 @@ static inline void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch) * This is meant to avoid soft lock-ups on large TLB flushing ranges and not * necessarily a performance improvement. */ -#define MAX_TLBI_OPS PTRS_PER_PTE +#define MAX_DVM_OPS PTRS_PER_PTE /* * __flush_tlb_range_op - Perform TLBI operation upon a range @@ -413,12 +413,12 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma, /* * When not uses TLB range ops, we can handle up to - * (MAX_TLBI_OPS - 1) pages; + * (MAX_DVM_OPS - 1) pages; * When uses TLB range ops, we can handle up to * (MAX_TLBI_RANGE_PAGES - 1) pages. */ if ((!system_supports_tlb_range() && - (end - start) >= (MAX_TLBI_OPS * stride)) || + (end - start) >= (MAX_DVM_OPS * stride)) || pages >= MAX_TLBI_RANGE_PAGES) { flush_tlb_mm(vma->vm_mm); return; @@ -451,7 +451,7 @@ static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end { unsigned long addr; - if ((end - start) > (MAX_TLBI_OPS * PAGE_SIZE)) { + if ((end - start) > (MAX_DVM_OPS * PAGE_SIZE)) { flush_tlb_all(); return; } From fc51f30f7cb13eff4744355d2d4651b4ba5b0f80 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Wed, 20 Sep 2023 08:01:33 +0000 Subject: [PATCH 08/10] KVM: arm64: Avoid soft lockups due to I-cache maintenance commit 909b583f81b5bb5a398d4580543f59b908a86ccc upstream. Gavin reports of soft lockups on his Ampere Altra Max machine when backing KVM guests with hugetlb pages. Upon further investigation, it was found that the system is unable to keep up with parallel I-cache invalidations done by KVM's stage-2 fault handler. This is ultimately an implementation problem. I-cache maintenance instructions are available at EL0, so nothing stops a malicious userspace from hammering a system with CMOs and cause it to fall over. "Fixing" this problem in KVM is nothing more than slapping a bandage over a much deeper problem. Anyway, the kernel already has a heuristic for limiting TLB invalidations to avoid soft lockups. Reuse that logic to limit I-cache CMOs done by KVM to map executable pages on systems without FEAT_DIC. While at it, restructure __invalidate_icache_guest_page() to improve readability and squeeze our new condition into the existing branching structure. Link: https://lore.kernel.org/kvmarm/20230904072826.1468907-1-gshan@redhat.com/ Reviewed-by: Gavin Shan Tested-by: Gavin Shan Link: https://lore.kernel.org/r/20230920080133.944717-3-oliver.upton@linux.dev Signed-off-by: Oliver Upton Signed-off-by: Huang Cun --- arch/arm64/include/asm/kvm_mmu.h | 37 ++++++++++++++++++++++++++------ 1 file changed, 31 insertions(+), 6 deletions(-) diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 96a80e8f6226..a425ecdd7be0 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -224,16 +224,41 @@ static inline void __clean_dcache_guest_page(void *va, size_t size) kvm_flush_dcache_to_poc(va, size); } +static inline size_t __invalidate_icache_max_range(void) +{ + u8 iminline; + u64 ctr; + + asm volatile(ALTERNATIVE_CB("movz %0, #0\n" + "movk %0, #0, lsl #16\n" + "movk %0, #0, lsl #32\n" + "movk %0, #0, lsl #48\n", + ARM64_ALWAYS_SYSTEM, + kvm_compute_final_ctr_el0) + : "=r" (ctr)); + + iminline = SYS_FIELD_GET(CTR_EL0, IminLine, ctr) + 2; + return MAX_DVM_OPS << iminline; +} + static inline void __invalidate_icache_guest_page(void *va, size_t size) { - if (icache_is_aliasing()) { - /* any kind of VIPT cache */ + /* + * VPIPT I-cache maintenance must be done from EL2. See comment in the + * nVHE flavor of __kvm_tlb_flush_vmid_ipa(). + */ + if (icache_is_vpipt() && read_sysreg(CurrentEL) != CurrentEL_EL2) + return; + + /* + * Blow the whole I-cache if it is aliasing (i.e. VIPT) or the + * invalidation range exceeds our arbitrary limit on invadations by + * cache line. + */ + if (icache_is_aliasing() || size > __invalidate_icache_max_range()) icache_inval_all_pou(); - } else if (read_sysreg(CurrentEL) != CurrentEL_EL1 || - !icache_is_vpipt()) { - /* PIPT or VPIPT at EL2 (see comment in __kvm_tlb_flush_vmid_ipa) */ + else icache_inval_pou((unsigned long)va, (unsigned long)va + size); - } } void kvm_set_way_flush(struct kvm_vcpu *vcpu); From 59149005ec2a66dabb2b22e1b5164d3e64e36c5f Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Fri, 22 Sep 2023 22:32:29 +0000 Subject: [PATCH 09/10] KVM: arm64: Always invalidate TLB for stage-2 permission faults commit be097997a273259f1723baac5463cf19d8564efa upstream. It is possible for multiple vCPUs to fault on the same IPA and attempt to resolve the fault. One of the page table walks will actually update the PTE and the rest will return -EAGAIN per our race detection scheme. KVM elides the TLB invalidation on the racing threads as the return value is nonzero. Before commit a12ab1378a88 ("KVM: arm64: Use local TLBI on permission relaxation") KVM always used broadcast TLB invalidations when handling permission faults, which had the convenient property of making the stage-2 updates visible to all CPUs in the system. However now we do a local invalidation, and TLBI elision leads to the vCPU thread faulting again on the stale entry. Remember that the architecture permits the TLB to cache translations that precipitate a permission fault. Invalidate the TLB entry responsible for the permission fault if the stage-2 descriptor has been relaxed, regardless of which thread actually did the job. Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20230922223229.1608155-1-oliver.upton@linux.dev Signed-off-by: Oliver Upton Signed-off-by: Huang Cun --- arch/arm64/kvm/hyp/pgtable.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index f155b8c9e98c..286888751793 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -1314,7 +1314,7 @@ int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr, ret = stage2_update_leaf_attrs(pgt, addr, 1, set, clr, NULL, &level, KVM_PGTABLE_WALK_HANDLE_FAULT | KVM_PGTABLE_WALK_SHARED); - if (!ret) + if (!ret || ret == -EAGAIN) kvm_call_hyp(__kvm_tlb_flush_vmid_ipa_nsh, pgt->mmu, addr, level); return ret; } From 3e14a8ae4f1964bc3ddc813fa26053333ec00f61 Mon Sep 17 00:00:00 2001 From: Ilkka Koskinen Date: Thu, 30 Nov 2023 18:15:50 -0800 Subject: [PATCH 10/10] perf vendor events arm64 AmpereOneX: Add core PMU events and metrics commit 16438b652b464ef7d0a877d31e93ab54338f6b0a upstream. Add JSON files for AmpereOneX core PMU events and metrics. Reviewed-by: Ian Rogers Signed-off-by: Ilkka Koskinen Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: John Garry Cc: Leo Yan Cc: Mark Rutland Cc: Mike Leach Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/20231201021550.1109196-4-ilkka@os.amperecomputing.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Huang Cun --- .../arch/arm64/ampere/ampereonex/branch.json | 125 +++++ .../arch/arm64/ampere/ampereonex/bus.json | 20 + .../arch/arm64/ampere/ampereonex/cache.json | 206 ++++++++ .../arm64/ampere/ampereonex/core-imp-def.json | 464 ++++++++++++++++++ .../arm64/ampere/ampereonex/exception.json | 47 ++ .../arm64/ampere/ampereonex/instruction.json | 128 +++++ .../arm64/ampere/ampereonex/intrinsic.json | 14 + .../arch/arm64/ampere/ampereonex/memory.json | 41 ++ .../arch/arm64/ampere/ampereonex/metrics.json | 442 +++++++++++++++++ .../arch/arm64/ampere/ampereonex/mmu.json | 170 +++++++ .../arm64/ampere/ampereonex/pipeline.json | 41 ++ .../arch/arm64/ampere/ampereonex/spe.json | 14 + tools/perf/pmu-events/arch/arm64/mapfile.csv | 1 + 13 files changed, 1713 insertions(+) create mode 100644 tools/perf/pmu-events/arch/arm64/ampere/ampereonex/branch.json create mode 100644 tools/perf/pmu-events/arch/arm64/ampere/ampereonex/bus.json create mode 100644 tools/perf/pmu-events/arch/arm64/ampere/ampereonex/cache.json create mode 100644 tools/perf/pmu-events/arch/arm64/ampere/ampereonex/core-imp-def.json create mode 100644 tools/perf/pmu-events/arch/arm64/ampere/ampereonex/exception.json create mode 100644 tools/perf/pmu-events/arch/arm64/ampere/ampereonex/instruction.json create mode 100644 tools/perf/pmu-events/arch/arm64/ampere/ampereonex/intrinsic.json create mode 100644 tools/perf/pmu-events/arch/arm64/ampere/ampereonex/memory.json create mode 100644 tools/perf/pmu-events/arch/arm64/ampere/ampereonex/metrics.json create mode 100644 tools/perf/pmu-events/arch/arm64/ampere/ampereonex/mmu.json create mode 100644 tools/perf/pmu-events/arch/arm64/ampere/ampereonex/pipeline.json create mode 100644 tools/perf/pmu-events/arch/arm64/ampere/ampereonex/spe.json diff --git a/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/branch.json b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/branch.json new file mode 100644 index 000000000000..a632755fc086 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/branch.json @@ -0,0 +1,125 @@ +[ + { + "ArchStdEvent": "BR_IMMED_SPEC" + }, + { + "ArchStdEvent": "BR_RETURN_SPEC" + }, + { + "ArchStdEvent": "BR_INDIRECT_SPEC" + }, + { + "ArchStdEvent": "BR_MIS_PRED" + }, + { + "ArchStdEvent": "BR_PRED" + }, + { + "PublicDescription": "Instruction architecturally executed, branch not taken", + "EventCode": "0x8107", + "EventName": "BR_SKIP_RETIRED", + "BriefDescription": "Instruction architecturally executed, branch not taken" + }, + { + "PublicDescription": "Instruction architecturally executed, immediate branch taken", + "EventCode": "0x8108", + "EventName": "BR_IMMED_TAKEN_RETIRED", + "BriefDescription": "Instruction architecturally executed, immediate branch taken" + }, + { + "PublicDescription": "Instruction architecturally executed, indirect branch excluding return retired", + "EventCode": "0x810c", + "EventName": "BR_INDNR_TAKEN_RETIRED", + "BriefDescription": "Instruction architecturally executed, indirect branch excluding return retired" + }, + { + "PublicDescription": "Instruction architecturally executed, predicted immediate branch", + "EventCode": "0x8110", + "EventName": "BR_IMMED_PRED_RETIRED", + "BriefDescription": "Instruction architecturally executed, predicted immediate branch" + }, + { + "PublicDescription": "Instruction architecturally executed, mispredicted immediate branch", + "EventCode": "0x8111", + "EventName": "BR_IMMED_MIS_PRED_RETIRED", + "BriefDescription": "Instruction architecturally executed, mispredicted immediate branch" + }, + { + "PublicDescription": "Instruction architecturally executed, predicted indirect branch", + "EventCode": "0x8112", + "EventName": "BR_IND_PRED_RETIRED", + "BriefDescription": "Instruction architecturally executed, predicted indirect branch" + }, + { + "PublicDescription": "Instruction architecturally executed, mispredicted indirect branch", + "EventCode": "0x8113", + "EventName": "BR_IND_MIS_PRED_RETIRED", + "BriefDescription": "Instruction architecturally executed, mispredicted indirect branch" + }, + { + "PublicDescription": "Instruction architecturally executed, predicted procedure return", + "EventCode": "0x8114", + "EventName": "BR_RETURN_PRED_RETIRED", + "BriefDescription": "Instruction architecturally executed, predicted procedure return" + }, + { + "PublicDescription": "Instruction architecturally executed, mispredicted procedure return", + "EventCode": "0x8115", + "EventName": "BR_RETURN_MIS_PRED_RETIRED", + "BriefDescription": "Instruction architecturally executed, mispredicted procedure return" + }, + { + "PublicDescription": "Instruction architecturally executed, predicted indirect branch excluding return", + "EventCode": "0x8116", + "EventName": "BR_INDNR_PRED_RETIRED", + "BriefDescription": "Instruction architecturally executed, predicted indirect branch excluding return" + }, + { + "PublicDescription": "Instruction architecturally executed, mispredicted indirect branch excluding return", + "EventCode": "0x8117", + "EventName": "BR_INDNR_MIS_PRED_RETIRED", + "BriefDescription": "Instruction architecturally executed, mispredicted indirect branch excluding return" + }, + { + "PublicDescription": "Instruction architecturally executed, predicted branch, taken", + "EventCode": "0x8118", + "EventName": "BR_TAKEN_PRED_RETIRED", + "BriefDescription": "Instruction architecturally executed, predicted branch, taken" + }, + { + "PublicDescription": "Instruction architecturally executed, mispredicted branch, taken", + "EventCode": "0x8119", + "EventName": "BR_TAKEN_MIS_PRED_RETIRED", + "BriefDescription": "Instruction architecturally executed, mispredicted branch, taken" + }, + { + "PublicDescription": "Instruction architecturally executed, predicted branch, not taken", + "EventCode": "0x811a", + "EventName": "BR_SKIP_PRED_RETIRED", + "BriefDescription": "Instruction architecturally executed, predicted branch, not taken" + }, + { + "PublicDescription": "Instruction architecturally executed, mispredicted branch, not taken", + "EventCode": "0x811b", + "EventName": "BR_SKIP_MIS_PRED_RETIRED", + "BriefDescription": "Instruction architecturally executed, mispredicted branch, not taken" + }, + { + "PublicDescription": "Instruction architecturally executed, predicted branch", + "EventCode": "0x811c", + "EventName": "BR_PRED_RETIRED", + "BriefDescription": "Instruction architecturally executed, predicted branch" + }, + { + "PublicDescription": "Instruction architecturally executed, indirect branch", + "EventCode": "0x811d", + "EventName": "BR_IND_RETIRED", + "BriefDescription": "Instruction architecturally executed, indirect branch" + }, + { + "PublicDescription": "Branch Record captured.", + "EventCode": "0x811f", + "EventName": "BRB_FILTRATE", + "BriefDescription": "Branch Record captured." + } +] diff --git a/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/bus.json b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/bus.json new file mode 100644 index 000000000000..2aeb9907831d --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/bus.json @@ -0,0 +1,20 @@ +[ + { + "ArchStdEvent": "CPU_CYCLES" + }, + { + "ArchStdEvent": "BUS_CYCLES" + }, + { + "ArchStdEvent": "BUS_ACCESS_RD" + }, + { + "ArchStdEvent": "BUS_ACCESS_WR" + }, + { + "ArchStdEvent": "BUS_ACCESS" + }, + { + "ArchStdEvent": "CNT_CYCLES" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/cache.json b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/cache.json new file mode 100644 index 000000000000..c50d8e930b05 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/cache.json @@ -0,0 +1,206 @@ +[ + { + "ArchStdEvent": "L1D_CACHE_RD" + }, + { + "ArchStdEvent": "L1D_CACHE_WR" + }, + { + "ArchStdEvent": "L1D_CACHE_REFILL_RD" + }, + { + "ArchStdEvent": "L1D_CACHE_INVAL" + }, + { + "ArchStdEvent": "L1D_TLB_REFILL_RD" + }, + { + "ArchStdEvent": "L1D_TLB_REFILL_WR" + }, + { + "ArchStdEvent": "L2D_CACHE_RD" + }, + { + "ArchStdEvent": "L2D_CACHE_WR" + }, + { + "ArchStdEvent": "L2D_CACHE_REFILL_RD" + }, + { + "ArchStdEvent": "L2D_CACHE_REFILL_WR" + }, + { + "ArchStdEvent": "L2D_CACHE_WB_VICTIM" + }, + { + "ArchStdEvent": "L2D_CACHE_WB_CLEAN" + }, + { + "ArchStdEvent": "L2D_CACHE_INVAL" + }, + { + "ArchStdEvent": "L1I_CACHE_REFILL" + }, + { + "ArchStdEvent": "L1I_TLB_REFILL" + }, + { + "ArchStdEvent": "L1D_CACHE_REFILL" + }, + { + "ArchStdEvent": "L1D_CACHE" + }, + { + "ArchStdEvent": "L1D_TLB_REFILL" + }, + { + "ArchStdEvent": "L1I_CACHE" + }, + { + "ArchStdEvent": "L2D_CACHE" + }, + { + "ArchStdEvent": "L2D_CACHE_REFILL" + }, + { + "ArchStdEvent": "L2D_CACHE_WB" + }, + { + "ArchStdEvent": "L1D_TLB" + }, + { + "ArchStdEvent": "L1I_TLB" + }, + { + "ArchStdEvent": "L2D_TLB_REFILL" + }, + { + "ArchStdEvent": "L2I_TLB_REFILL" + }, + { + "ArchStdEvent": "L2D_TLB" + }, + { + "ArchStdEvent": "L2I_TLB" + }, + { + "ArchStdEvent": "DTLB_WALK" + }, + { + "ArchStdEvent": "ITLB_WALK" + }, + { + "ArchStdEvent": "L1D_CACHE_REFILL_WR" + }, + { + "ArchStdEvent": "L1D_CACHE_LMISS_RD" + }, + { + "ArchStdEvent": "L1I_CACHE_LMISS" + }, + { + "ArchStdEvent": "L2D_CACHE_LMISS_RD" + }, + { + "PublicDescription": "Level 1 data or unified cache demand access", + "EventCode": "0x8140", + "EventName": "L1D_CACHE_RW", + "BriefDescription": "Level 1 data or unified cache demand access" + }, + { + "PublicDescription": "Level 1 data or unified cache preload or prefetch", + "EventCode": "0x8142", + "EventName": "L1D_CACHE_PRFM", + "BriefDescription": "Level 1 data or unified cache preload or prefetch" + }, + { + "PublicDescription": "Level 1 data or unified cache refill, preload or prefetch", + "EventCode": "0x8146", + "EventName": "L1D_CACHE_REFILL_PRFM", + "BriefDescription": "Level 1 data or unified cache refill, preload or prefetch" + }, + { + "ArchStdEvent": "L1D_TLB_RD" + }, + { + "ArchStdEvent": "L1D_TLB_WR" + }, + { + "ArchStdEvent": "L2D_TLB_REFILL_RD" + }, + { + "ArchStdEvent": "L2D_TLB_REFILL_WR" + }, + { + "ArchStdEvent": "L2D_TLB_RD" + }, + { + "ArchStdEvent": "L2D_TLB_WR" + }, + { + "PublicDescription": "L1D TLB miss", + "EventCode": "0xD600", + "EventName": "L1D_TLB_MISS", + "BriefDescription": "L1D TLB miss" + }, + { + "PublicDescription": "Level 1 prefetcher, load prefetch requests generated", + "EventCode": "0xd606", + "EventName": "L1_PREFETCH_LD_GEN", + "BriefDescription": "Level 1 prefetcher, load prefetch requests generated" + }, + { + "PublicDescription": "Level 1 prefetcher, load prefetch fills into the level 1 cache", + "EventCode": "0xd607", + "EventName": "L1_PREFETCH_LD_FILL", + "BriefDescription": "Level 1 prefetcher, load prefetch fills into the level 1 cache" + }, + { + "PublicDescription": "Level 1 prefetcher, load prefetch to level 2 generated", + "EventCode": "0xd608", + "EventName": "L1_PREFETCH_L2_REQ", + "BriefDescription": "Level 1 prefetcher, load prefetch to level 2 generated" + }, + { + "PublicDescription": "L1 prefetcher, distance was reset", + "EventCode": "0xd609", + "EventName": "L1_PREFETCH_DIST_RST", + "BriefDescription": "L1 prefetcher, distance was reset" + }, + { + "PublicDescription": "L1 prefetcher, distance was increased", + "EventCode": "0xd60a", + "EventName": "L1_PREFETCH_DIST_INC", + "BriefDescription": "L1 prefetcher, distance was increased" + }, + { + "PublicDescription": "Level 1 prefetcher, table entry is trained", + "EventCode": "0xd60b", + "EventName": "L1_PREFETCH_ENTRY_TRAINED", + "BriefDescription": "Level 1 prefetcher, table entry is trained" + }, + { + "PublicDescription": "L1 data cache refill - Read or Write", + "EventCode": "0xd60e", + "EventName": "L1D_CACHE_REFILL_RW", + "BriefDescription": "L1 data cache refill - Read or Write" + }, + { + "PublicDescription": "Level 2 cache refill from instruction-side miss, including IMMU refills", + "EventCode": "0xD701", + "EventName": "L2C_INST_REFILL", + "BriefDescription": "Level 2 cache refill from instruction-side miss, including IMMU refills" + }, + { + "PublicDescription": "Level 2 cache refill from data-side miss, including DMMU refills", + "EventCode": "0xD702", + "EventName": "L2C_DATA_REFILL", + "BriefDescription": "Level 2 cache refill from data-side miss, including DMMU refills" + }, + { + "PublicDescription": "Level 2 cache prefetcher, load prefetch requests generated", + "EventCode": "0xD703", + "EventName": "L2_PREFETCH_REQ", + "BriefDescription": "Level 2 cache prefetcher, load prefetch requests generated" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/core-imp-def.json b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/core-imp-def.json new file mode 100644 index 000000000000..eb5a2208d260 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/core-imp-def.json @@ -0,0 +1,464 @@ +[ + { + "PublicDescription": "Level 2 prefetch requests, refilled to L2 cache", + "EventCode": "0x10A", + "EventName": "L2_PREFETCH_REFILL", + "BriefDescription": "Level 2 prefetch requests, refilled to L2 cache" + }, + { + "PublicDescription": "Level 2 prefetch requests, late", + "EventCode": "0x10B", + "EventName": "L2_PREFETCH_UPGRADE", + "BriefDescription": "Level 2 prefetch requests, late" + }, + { + "PublicDescription": "Predictable branch speculatively executed that hit any level of BTB", + "EventCode": "0x110", + "EventName": "BPU_HIT_BTB", + "BriefDescription": "Predictable branch speculatively executed that hit any level of BTB" + }, + { + "PublicDescription": "Predictable conditional branch speculatively executed that hit any level of BTB", + "EventCode": "0x111", + "EventName": "BPU_CONDITIONAL_BRANCH_HIT_BTB", + "BriefDescription": "Predictable conditional branch speculatively executed that hit any level of BTB" + }, + { + "PublicDescription": "Predictable taken branch speculatively executed that hit any level of BTB that access the indirect predictor", + "EventCode": "0x112", + "EventName": "BPU_HIT_INDIRECT_PREDICTOR", + "BriefDescription": "Predictable taken branch speculatively executed that hit any level of BTB that access the indirect predictor" + }, + { + "PublicDescription": "Predictable taken branch speculatively executed that hit any level of BTB that access the return predictor", + "EventCode": "0x113", + "EventName": "BPU_HIT_RSB", + "BriefDescription": "Predictable taken branch speculatively executed that hit any level of BTB that access the return predictor" + }, + { + "PublicDescription": "Predictable unconditional branch speculatively executed that did not hit any level of BTB", + "EventCode": "0x114", + "EventName": "BPU_UNCONDITIONAL_BRANCH_MISS_BTB", + "BriefDescription": "Predictable unconditional branch speculatively executed that did not hit any level of BTB" + }, + { + "PublicDescription": "Predictable branch speculatively executed, unpredicted", + "EventCode": "0x115", + "EventName": "BPU_BRANCH_NO_HIT", + "BriefDescription": "Predictable branch speculatively executed, unpredicted" + }, + { + "PublicDescription": "Predictable branch speculatively executed that hit any level of BTB that mispredict", + "EventCode": "0x116", + "EventName": "BPU_HIT_BTB_AND_MISPREDICT", + "BriefDescription": "Predictable branch speculatively executed that hit any level of BTB that mispredict" + }, + { + "PublicDescription": "Predictable conditional branch speculatively executed that hit any level of BTB that (direction) mispredict", + "EventCode": "0x117", + "EventName": "BPU_CONDITIONAL_BRANCH_HIT_BTB_AND_MISPREDICT", + "BriefDescription": "Predictable conditional branch speculatively executed that hit any level of BTB that (direction) mispredict" + }, + { + "PublicDescription": "Predictable taken branch speculatively executed that hit any level of BTB that access the indirect predictor that mispredict", + "EventCode": "0x118", + "EventName": "BPU_INDIRECT_BRANCH_HIT_BTB_AND_MISPREDICT", + "BriefDescription": "Predictable taken branch speculatively executed that hit any level of BTB that access the indirect predictor that mispredict" + }, + { + "PublicDescription": "Predictable taken branch speculatively executed that hit any level of BTB that access the return predictor that mispredict", + "EventCode": "0x119", + "EventName": "BPU_HIT_RSB_AND_MISPREDICT", + "BriefDescription": "Predictable taken branch speculatively executed that hit any level of BTB that access the return predictor that mispredict" + }, + { + "PublicDescription": "Predictable taken branch speculatively executed that hit any level of BTB that access the overflow/underflow return predictor that mispredict", + "EventCode": "0x11a", + "EventName": "BPU_MISS_RSB_AND_MISPREDICT", + "BriefDescription": "Predictable taken branch speculatively executed that hit any level of BTB that access the overflow/underflow return predictor that mispredict" + }, + { + "PublicDescription": "Predictable branch speculatively executed, unpredicted, that mispredict", + "EventCode": "0x11b", + "EventName": "BPU_NO_PREDICTION_MISPREDICT", + "BriefDescription": "Predictable branch speculatively executed, unpredicted, that mispredict" + }, + { + "PublicDescription": "Preditable branch update the BTB region buffer entry", + "EventCode": "0x11c", + "EventName": "BPU_BTB_UPDATE", + "BriefDescription": "Preditable branch update the BTB region buffer entry" + }, + { + "PublicDescription": "Count predict pipe stalls due to speculative return address predictor full", + "EventCode": "0x11d", + "EventName": "BPU_RSB_FULL_STALL", + "BriefDescription": "Count predict pipe stalls due to speculative return address predictor full" + }, + { + "PublicDescription": "Macro-ops speculatively decoded", + "EventCode": "0x11f", + "EventName": "ICF_INST_SPEC_DECODE", + "BriefDescription": "Macro-ops speculatively decoded" + }, + { + "PublicDescription": "Flushes", + "EventCode": "0x120", + "EventName": "GPC_FLUSH", + "BriefDescription": "Flushes" + }, + { + "PublicDescription": "Flushes due to memory hazards", + "EventCode": "0x121", + "EventName": "GPC_FLUSH_MEM_FAULT", + "BriefDescription": "Flushes due to memory hazards" + }, + { + "PublicDescription": "ETM extout bit 0", + "EventCode": "0x141", + "EventName": "MSC_ETM_EXTOUT0", + "BriefDescription": "ETM extout bit 0" + }, + { + "PublicDescription": "ETM extout bit 1", + "EventCode": "0x142", + "EventName": "MSC_ETM_EXTOUT1", + "BriefDescription": "ETM extout bit 1" + }, + { + "PublicDescription": "ETM extout bit 2", + "EventCode": "0x143", + "EventName": "MSC_ETM_EXTOUT2", + "BriefDescription": "ETM extout bit 2" + }, + { + "PublicDescription": "ETM extout bit 3", + "EventCode": "0x144", + "EventName": "MSC_ETM_EXTOUT3", + "BriefDescription": "ETM extout bit 3" + }, + { + "PublicDescription": "Bus request sn", + "EventCode": "0x156", + "EventName": "L2C_SNOOP", + "BriefDescription": "Bus request sn" + }, + { + "PublicDescription": "L2 TXDAT LCRD blocked", + "EventCode": "0x169", + "EventName": "L2C_DAT_CRD_STALL", + "BriefDescription": "L2 TXDAT LCRD blocked" + }, + { + "PublicDescription": "L2 TXRSP LCRD blocked", + "EventCode": "0x16a", + "EventName": "L2C_RSP_CRD_STALL", + "BriefDescription": "L2 TXRSP LCRD blocked" + }, + { + "PublicDescription": "L2 TXREQ LCRD blocked", + "EventCode": "0x16b", + "EventName": "L2C_REQ_CRD_STALL", + "BriefDescription": "L2 TXREQ LCRD blocked" + }, + { + "PublicDescription": "Early mispredict", + "EventCode": "0xD100", + "EventName": "ICF_EARLY_MIS_PRED", + "BriefDescription": "Early mispredict" + }, + { + "PublicDescription": "FEQ full cycles", + "EventCode": "0xD101", + "EventName": "ICF_FEQ_FULL", + "BriefDescription": "FEQ full cycles" + }, + { + "PublicDescription": "Instruction FIFO Full", + "EventCode": "0xD102", + "EventName": "ICF_INST_FIFO_FULL", + "BriefDescription": "Instruction FIFO Full" + }, + { + "PublicDescription": "L1I TLB miss", + "EventCode": "0xD103", + "EventName": "L1I_TLB_MISS", + "BriefDescription": "L1I TLB miss" + }, + { + "PublicDescription": "ICF sent 0 instructions to IDR this cycle", + "EventCode": "0xD104", + "EventName": "ICF_STALL", + "BriefDescription": "ICF sent 0 instructions to IDR this cycle" + }, + { + "PublicDescription": "PC FIFO Full", + "EventCode": "0xD105", + "EventName": "ICF_PC_FIFO_FULL", + "BriefDescription": "PC FIFO Full" + }, + { + "PublicDescription": "Stall due to BOB ID", + "EventCode": "0xD200", + "EventName": "IDR_STALL_BOB_ID", + "BriefDescription": "Stall due to BOB ID" + }, + { + "PublicDescription": "Dispatch stall due to LOB entries", + "EventCode": "0xD201", + "EventName": "IDR_STALL_LOB_ID", + "BriefDescription": "Dispatch stall due to LOB entries" + }, + { + "PublicDescription": "Dispatch stall due to SOB entries", + "EventCode": "0xD202", + "EventName": "IDR_STALL_SOB_ID", + "BriefDescription": "Dispatch stall due to SOB entries" + }, + { + "PublicDescription": "Dispatch stall due to IXU scheduler entries", + "EventCode": "0xD203", + "EventName": "IDR_STALL_IXU_SCHED", + "BriefDescription": "Dispatch stall due to IXU scheduler entries" + }, + { + "PublicDescription": "Dispatch stall due to FSU scheduler entries", + "EventCode": "0xD204", + "EventName": "IDR_STALL_FSU_SCHED", + "BriefDescription": "Dispatch stall due to FSU scheduler entries" + }, + { + "PublicDescription": "Dispatch stall due to ROB entries", + "EventCode": "0xD205", + "EventName": "IDR_STALL_ROB_ID", + "BriefDescription": "Dispatch stall due to ROB entries" + }, + { + "PublicDescription": "Dispatch stall due to flush", + "EventCode": "0xD206", + "EventName": "IDR_STALL_FLUSH", + "BriefDescription": "Dispatch stall due to flush" + }, + { + "PublicDescription": "Dispatch stall due to WFI", + "EventCode": "0xD207", + "EventName": "IDR_STALL_WFI", + "BriefDescription": "Dispatch stall due to WFI" + }, + { + "PublicDescription": "Number of SWOB drains triggered by timeout", + "EventCode": "0xD208", + "EventName": "IDR_STALL_SWOB_TIMEOUT", + "BriefDescription": "Number of SWOB drains triggered by timeout" + }, + { + "PublicDescription": "Number of SWOB drains triggered by system register or special-purpose register read-after-write or specific special-purpose register writes that cause SWOB drain", + "EventCode": "0xD209", + "EventName": "IDR_STALL_SWOB_RAW", + "BriefDescription": "Number of SWOB drains triggered by system register or special-purpose register read-after-write or specific special-purpose register writes that cause SWOB drain" + }, + { + "PublicDescription": "Number of SWOB drains triggered by system register write when SWOB full", + "EventCode": "0xD20A", + "EventName": "IDR_STALL_SWOB_FULL", + "BriefDescription": "Number of SWOB drains triggered by system register write when SWOB full" + }, + { + "PublicDescription": "Dispatch stall due to L1 instruction cache miss", + "EventCode": "0xD20B", + "EventName": "STALL_FRONTEND_CACHE", + "BriefDescription": "Dispatch stall due to L1 instruction cache miss" + }, + { + "PublicDescription": "Dispatch stall due to L1 data cache miss", + "EventCode": "0xD20D", + "EventName": "STALL_BACKEND_CACHE", + "BriefDescription": "Dispatch stall due to L1 data cache miss" + }, + { + "PublicDescription": "Dispatch stall due to lack of any core resource", + "EventCode": "0xD20F", + "EventName": "STALL_BACKEND_RESOURCE", + "BriefDescription": "Dispatch stall due to lack of any core resource" + }, + { + "PublicDescription": "Instructions issued by the scheduler", + "EventCode": "0xD300", + "EventName": "IXU_NUM_UOPS_ISSUED", + "BriefDescription": "Instructions issued by the scheduler" + }, + { + "PublicDescription": "Any uop issued was canceled for any reason", + "EventCode": "0xD301", + "EventName": "IXU_ISSUE_CANCEL", + "BriefDescription": "Any uop issued was canceled for any reason" + }, + { + "PublicDescription": "A load wakeup to the scheduler has been canceled", + "EventCode": "0xD302", + "EventName": "IXU_LOAD_CANCEL", + "BriefDescription": "A load wakeup to the scheduler has been canceled" + }, + { + "PublicDescription": "The scheduler had to cancel one slow Uop due to resource conflict", + "EventCode": "0xD303", + "EventName": "IXU_SLOW_CANCEL", + "BriefDescription": "The scheduler had to cancel one slow Uop due to resource conflict" + }, + { + "PublicDescription": "Uops issued by the scheduler on IXA", + "EventCode": "0xD304", + "EventName": "IXU_IXA_ISSUED", + "BriefDescription": "Uops issued by the scheduler on IXA" + }, + { + "PublicDescription": "Uops issued by the scheduler on IXA Par 0", + "EventCode": "0xD305", + "EventName": "IXU_IXA_PAR0_ISSUED", + "BriefDescription": "Uops issued by the scheduler on IXA Par 0" + }, + { + "PublicDescription": "Uops issued by the scheduler on IXA Par 1", + "EventCode": "0xD306", + "EventName": "IXU_IXA_PAR1_ISSUED", + "BriefDescription": "Uops issued by the scheduler on IXA Par 1" + }, + { + "PublicDescription": "Uops issued by the scheduler on IXB", + "EventCode": "0xD307", + "EventName": "IXU_IXB_ISSUED", + "BriefDescription": "Uops issued by the scheduler on IXB" + }, + { + "PublicDescription": "Uops issued by the scheduler on IXB Par 0", + "EventCode": "0xD308", + "EventName": "IXU_IXB_PAR0_ISSUED", + "BriefDescription": "Uops issued by the scheduler on IXB Par 0" + }, + { + "PublicDescription": "Uops issued by the scheduler on IXB Par 1", + "EventCode": "0xD309", + "EventName": "IXU_IXB_PAR1_ISSUED", + "BriefDescription": "Uops issued by the scheduler on IXB Par 1" + }, + { + "PublicDescription": "Uops issued by the scheduler on IXC", + "EventCode": "0xD30A", + "EventName": "IXU_IXC_ISSUED", + "BriefDescription": "Uops issued by the scheduler on IXC" + }, + { + "PublicDescription": "Uops issued by the scheduler on IXC Par 0", + "EventCode": "0xD30B", + "EventName": "IXU_IXC_PAR0_ISSUED", + "BriefDescription": "Uops issued by the scheduler on IXC Par 0" + }, + { + "PublicDescription": "Uops issued by the scheduler on IXC Par 1", + "EventCode": "0xD30C", + "EventName": "IXU_IXC_PAR1_ISSUED", + "BriefDescription": "Uops issued by the scheduler on IXC Par 1" + }, + { + "PublicDescription": "Uops issued by the scheduler on IXD", + "EventCode": "0xD30D", + "EventName": "IXU_IXD_ISSUED", + "BriefDescription": "Uops issued by the scheduler on IXD" + }, + { + "PublicDescription": "Uops issued by the scheduler on IXD Par 0", + "EventCode": "0xD30E", + "EventName": "IXU_IXD_PAR0_ISSUED", + "BriefDescription": "Uops issued by the scheduler on IXD Par 0" + }, + { + "PublicDescription": "Uops issued by the scheduler on IXD Par 1", + "EventCode": "0xD30F", + "EventName": "IXU_IXD_PAR1_ISSUED", + "BriefDescription": "Uops issued by the scheduler on IXD Par 1" + }, + { + "PublicDescription": "Uops issued by the FSU scheduler", + "EventCode": "0xD400", + "EventName": "FSU_ISSUED", + "BriefDescription": "Uops issued by the FSU scheduler" + }, + { + "PublicDescription": "Uops issued by the scheduler on FSX", + "EventCode": "0xD401", + "EventName": "FSU_FSX_ISSUED", + "BriefDescription": "Uops issued by the scheduler on FSX" + }, + { + "PublicDescription": "Uops issued by the scheduler on FSY", + "EventCode": "0xD402", + "EventName": "FSU_FSY_ISSUED", + "BriefDescription": "Uops issued by the scheduler on FSY" + }, + { + "PublicDescription": "Uops issued by the scheduler on FSZ", + "EventCode": "0xD403", + "EventName": "FSU_FSZ_ISSUED", + "BriefDescription": "Uops issued by the scheduler on FSZ" + }, + { + "PublicDescription": "Uops canceled (load cancels)", + "EventCode": "0xD404", + "EventName": "FSU_CANCEL", + "BriefDescription": "Uops canceled (load cancels)" + }, + { + "PublicDescription": "Count scheduler stalls due to divide/sqrt", + "EventCode": "0xD405", + "EventName": "FSU_DIV_SQRT_STALL", + "BriefDescription": "Count scheduler stalls due to divide/sqrt" + }, + { + "PublicDescription": "Number of SWOB drains", + "EventCode": "0xD500", + "EventName": "GPC_SWOB_DRAIN", + "BriefDescription": "Number of SWOB drains" + }, + { + "PublicDescription": "GPC detected a Breakpoint instruction match", + "EventCode": "0xD501", + "EventName": "BREAKPOINT_MATCH", + "BriefDescription": "GPC detected a Breakpoint instruction match" + }, + { + "PublicDescription": "Core progress monitor triggered", + "EventCode": "0xd502", + "EventName": "GPC_CPM_TRIGGER", + "BriefDescription": "Core progress monitor triggered" + }, + { + "PublicDescription": "Fill buffer full", + "EventCode": "0xD601", + "EventName": "OFB_FULL", + "BriefDescription": "Fill buffer full" + }, + { + "PublicDescription": "Load satisified from store forwarded data", + "EventCode": "0xD605", + "EventName": "LD_FROM_ST_FWD", + "BriefDescription": "Load satisified from store forwarded data" + }, + { + "PublicDescription": "Store retirement pipe stall", + "EventCode": "0xD60C", + "EventName": "LSU_ST_RETIRE_STALL", + "BriefDescription": "Store retirement pipe stall" + }, + { + "PublicDescription": "LSU detected a Watchpoint data match", + "EventCode": "0xD60D", + "EventName": "WATCHPOINT_MATCH", + "BriefDescription": "LSU detected a Watchpoint data match" + }, + { + "PublicDescription": "Counts cycles that MSC is telling GPC to stall commit due to ETM ISTALL feature", + "EventCode": "0xda00", + "EventName": "MSC_ETM_COMMIT_STALL", + "BriefDescription": "Counts cycles that MSC is telling GPC to stall commit due to ETM ISTALL feature" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/exception.json b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/exception.json new file mode 100644 index 000000000000..bd59ba7b74e4 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/exception.json @@ -0,0 +1,47 @@ +[ + { + "ArchStdEvent": "EXC_UNDEF" + }, + { + "ArchStdEvent": "EXC_SVC" + }, + { + "ArchStdEvent": "EXC_PABORT" + }, + { + "ArchStdEvent": "EXC_DABORT" + }, + { + "ArchStdEvent": "EXC_IRQ" + }, + { + "ArchStdEvent": "EXC_FIQ" + }, + { + "ArchStdEvent": "EXC_HVC" + }, + { + "ArchStdEvent": "EXC_TRAP_PABORT" + }, + { + "ArchStdEvent": "EXC_TRAP_DABORT" + }, + { + "ArchStdEvent": "EXC_TRAP_OTHER" + }, + { + "ArchStdEvent": "EXC_TRAP_IRQ" + }, + { + "ArchStdEvent": "EXC_TRAP_FIQ" + }, + { + "ArchStdEvent": "EXC_TAKEN" + }, + { + "ArchStdEvent": "EXC_RETURN" + }, + { + "ArchStdEvent": "EXC_SMC" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/instruction.json b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/instruction.json new file mode 100644 index 000000000000..a6a20f541e33 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/instruction.json @@ -0,0 +1,128 @@ +[ + { + "ArchStdEvent": "SW_INCR" + }, + { + "ArchStdEvent": "ST_RETIRED" + }, + { + "ArchStdEvent": "LD_SPEC" + }, + { + "ArchStdEvent": "ST_SPEC" + }, + { + "ArchStdEvent": "LDST_SPEC" + }, + { + "ArchStdEvent": "DP_SPEC" + }, + { + "ArchStdEvent": "ASE_SPEC" + }, + { + "ArchStdEvent": "VFP_SPEC" + }, + { + "ArchStdEvent": "PC_WRITE_SPEC" + }, + { + "ArchStdEvent": "BR_IMMED_RETIRED" + }, + { + "ArchStdEvent": "BR_RETURN_RETIRED" + }, + { + "ArchStdEvent": "CRYPTO_SPEC" + }, + { + "ArchStdEvent": "ISB_SPEC" + }, + { + "ArchStdEvent": "DSB_SPEC" + }, + { + "ArchStdEvent": "DMB_SPEC" + }, + { + "ArchStdEvent": "RC_LD_SPEC" + }, + { + "ArchStdEvent": "RC_ST_SPEC" + }, + { + "ArchStdEvent": "INST_RETIRED" + }, + { + "ArchStdEvent": "CID_WRITE_RETIRED" + }, + { + "ArchStdEvent": "PC_WRITE_RETIRED" + }, + { + "ArchStdEvent": "INST_SPEC" + }, + { + "ArchStdEvent": "TTBR_WRITE_RETIRED" + }, + { + "ArchStdEvent": "BR_RETIRED" + }, + { + "ArchStdEvent": "BR_MIS_PRED_RETIRED" + }, + { + "ArchStdEvent": "OP_RETIRED" + }, + { + "ArchStdEvent": "OP_SPEC" + }, + { + "PublicDescription": "Operation speculatively executed - ASE Scalar", + "EventCode": "0xd210", + "EventName": "ASE_SCALAR_SPEC", + "BriefDescription": "Operation speculatively executed - ASE Scalar" + }, + { + "PublicDescription": "Operation speculatively executed - ASE Vector", + "EventCode": "0xd211", + "EventName": "ASE_VECTOR_SPEC", + "BriefDescription": "Operation speculatively executed - ASE Vector" + }, + { + "PublicDescription": "Barrier speculatively executed, CSDB", + "EventCode": "0x7f", + "EventName": "CSDB_SPEC", + "BriefDescription": "Barrier speculatively executed, CSDB" + }, + { + "PublicDescription": "Prefetch sent to L2.", + "EventCode": "0xd106", + "EventName": "ICF_PREFETCH_DISPATCH", + "BriefDescription": "Prefetch sent to L2." + }, + { + "PublicDescription": "Prefetch response received but was dropped since we don't support inflight upgrades.", + "EventCode": "0xd107", + "EventName": "ICF_PREFETCH_DROPPED_NO_UPGRADE", + "BriefDescription": "Prefetch response received but was dropped since we don't support inflight upgrades." + }, + { + "PublicDescription": "Prefetch request missed TLB.", + "EventCode": "0xd108", + "EventName": "ICF_PREFETCH_DROPPED_TLB_MISS", + "BriefDescription": "Prefetch request missed TLB." + }, + { + "PublicDescription": "Prefetch request dropped since duplicate was found in TLB.", + "EventCode": "0xd109", + "EventName": "ICF_PREFETCH_DROPPED_DUPLICATE", + "BriefDescription": "Prefetch request dropped since duplicate was found in TLB." + }, + { + "PublicDescription": "Prefetch request dropped since it was found in cache.", + "EventCode": "0xd10a", + "EventName": "ICF_PREFETCH_DROPPED_CACHE_HIT", + "BriefDescription": "Prefetch request dropped since it was found in cache." + } +] diff --git a/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/intrinsic.json b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/intrinsic.json new file mode 100644 index 000000000000..7ecffb989ae0 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/intrinsic.json @@ -0,0 +1,14 @@ +[ + { + "ArchStdEvent": "LDREX_SPEC" + }, + { + "ArchStdEvent": "STREX_PASS_SPEC" + }, + { + "ArchStdEvent": "STREX_FAIL_SPEC" + }, + { + "ArchStdEvent": "STREX_SPEC" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/memory.json b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/memory.json new file mode 100644 index 000000000000..a211d94aacde --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/memory.json @@ -0,0 +1,41 @@ +[ + { + "ArchStdEvent": "LD_RETIRED" + }, + { + "ArchStdEvent": "MEM_ACCESS_RD" + }, + { + "ArchStdEvent": "MEM_ACCESS_WR" + }, + { + "ArchStdEvent": "LD_ALIGN_LAT" + }, + { + "ArchStdEvent": "ST_ALIGN_LAT" + }, + { + "ArchStdEvent": "MEM_ACCESS" + }, + { + "ArchStdEvent": "MEMORY_ERROR" + }, + { + "ArchStdEvent": "LDST_ALIGN_LAT" + }, + { + "ArchStdEvent": "MEM_ACCESS_CHECKED" + }, + { + "ArchStdEvent": "MEM_ACCESS_CHECKED_RD" + }, + { + "ArchStdEvent": "MEM_ACCESS_CHECKED_WR" + }, + { + "PublicDescription": "Flushes due to memory hazards", + "EventCode": "0x121", + "EventName": "BPU_FLUSH_MEM_FAULT", + "BriefDescription": "Flushes due to memory hazards" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/metrics.json b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/metrics.json new file mode 100644 index 000000000000..c5d1d22bd034 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/metrics.json @@ -0,0 +1,442 @@ +[ + { + "MetricName": "branch_miss_pred_rate", + "MetricExpr": "BR_MIS_PRED / BR_PRED", + "BriefDescription": "Branch predictor misprediction rate. May not count branches that are never resolved because they are in the misprediction shadow of an earlier branch", + "MetricGroup": "branch", + "ScaleUnit": "100%" + }, + { + "MetricName": "bus_utilization", + "MetricExpr": "BUS_ACCESS / (BUS_CYCLES * 1)", + "BriefDescription": "Core-to-uncore bus utilization", + "MetricGroup": "Bus", + "ScaleUnit": "100percent of bus cycles" + }, + { + "MetricName": "l1d_cache_miss_ratio", + "MetricExpr": "L1D_CACHE_REFILL / L1D_CACHE", + "BriefDescription": "This metric measures the ratio of level 1 data cache accesses missed to the total number of level 1 data cache accesses. This gives an indication of the effectiveness of the level 1 data cache.", + "MetricGroup": "Miss_Ratio;L1D_Cache_Effectiveness", + "ScaleUnit": "1per cache access" + }, + { + "MetricName": "l1i_cache_miss_ratio", + "MetricExpr": "L1I_CACHE_REFILL / L1I_CACHE", + "BriefDescription": "This metric measures the ratio of level 1 instruction cache accesses missed to the total number of level 1 instruction cache accesses. This gives an indication of the effectiveness of the level 1 instruction cache.", + "MetricGroup": "Miss_Ratio;L1I_Cache_Effectiveness", + "ScaleUnit": "1per cache access" + }, + { + "MetricName": "Miss_Ratio;l1d_cache_read_miss", + "MetricExpr": "L1D_CACHE_LMISS_RD / L1D_CACHE_RD", + "BriefDescription": "L1D cache read miss rate", + "MetricGroup": "Cache", + "ScaleUnit": "1per cache read access" + }, + { + "MetricName": "l2_cache_miss_ratio", + "MetricExpr": "L2D_CACHE_REFILL / L2D_CACHE", + "BriefDescription": "This metric measures the ratio of level 2 cache accesses missed to the total number of level 2 cache accesses. This gives an indication of the effectiveness of the level 2 cache, which is a unified cache that stores both data and instruction. Note that cache accesses in this cache are either data memory access or instruction fetch as this is a unified cache.", + "MetricGroup": "Miss_Ratio;L2_Cache_Effectiveness", + "ScaleUnit": "1per cache access" + }, + { + "MetricName": "l1i_cache_read_miss_rate", + "MetricExpr": "L1I_CACHE_LMISS / L1I_CACHE", + "BriefDescription": "L1I cache read miss rate", + "MetricGroup": "Cache", + "ScaleUnit": "1per cache access" + }, + { + "MetricName": "l2d_cache_read_miss_rate", + "MetricExpr": "L2D_CACHE_LMISS_RD / L2D_CACHE_RD", + "BriefDescription": "L2 cache read miss rate", + "MetricGroup": "Cache", + "ScaleUnit": "1per cache read access" + }, + { + "MetricName": "l1d_cache_miss_mpki", + "MetricExpr": "(L1D_CACHE_LMISS_RD * 1e3) / INST_RETIRED", + "BriefDescription": "Misses per thousand instructions (data)", + "MetricGroup": "Cache", + "ScaleUnit": "1MPKI" + }, + { + "MetricName": "l1i_cache_miss_mpki", + "MetricExpr": "(L1I_CACHE_LMISS * 1e3) / INST_RETIRED", + "BriefDescription": "Misses per thousand instructions (instruction)", + "MetricGroup": "Cache", + "ScaleUnit": "1MPKI" + }, + { + "MetricName": "simd_percentage", + "MetricExpr": "ASE_SPEC / INST_SPEC", + "BriefDescription": "This metric measures advanced SIMD operations as a percentage of total operations speculatively executed.", + "MetricGroup": "Operation_Mix", + "ScaleUnit": "100percent of operations" + }, + { + "MetricName": "crypto_percentage", + "MetricExpr": "CRYPTO_SPEC / INST_SPEC", + "BriefDescription": "This metric measures crypto operations as a percentage of operations speculatively executed.", + "MetricGroup": "Operation_Mix", + "ScaleUnit": "100percent of operations" + }, + { + "MetricName": "gflops", + "MetricExpr": "VFP_SPEC / (duration_time * 1e9)", + "BriefDescription": "Giga-floating point operations per second", + "MetricGroup": "InstructionMix" + }, + { + "MetricName": "integer_dp_percentage", + "MetricExpr": "DP_SPEC / INST_SPEC", + "BriefDescription": "This metric measures scalar integer operations as a percentage of operations speculatively executed.", + "MetricGroup": "Operation_Mix", + "ScaleUnit": "100percent of operations" + }, + { + "MetricName": "ipc", + "MetricExpr": "INST_RETIRED / CPU_CYCLES", + "BriefDescription": "This metric measures the number of instructions retired per cycle.", + "MetricGroup": "General", + "ScaleUnit": "1per cycle" + }, + { + "MetricName": "load_percentage", + "MetricExpr": "LD_SPEC / INST_SPEC", + "BriefDescription": "This metric measures load operations as a percentage of operations speculatively executed.", + "MetricGroup": "Operation_Mix", + "ScaleUnit": "100percent of operations" + }, + { + "MetricName": "load_store_spec_rate", + "MetricExpr": "LDST_SPEC / INST_SPEC", + "BriefDescription": "The rate of load or store instructions speculatively executed to overall instructions speclatively executed", + "MetricGroup": "Operation_Mix", + "ScaleUnit": "100percent of operations" + }, + { + "MetricName": "retired_mips", + "MetricExpr": "INST_RETIRED / (duration_time * 1e6)", + "BriefDescription": "Millions of instructions per second", + "MetricGroup": "InstructionMix" + }, + { + "MetricName": "spec_utilization_mips", + "MetricExpr": "INST_SPEC / (duration_time * 1e6)", + "BriefDescription": "Millions of instructions per second", + "MetricGroup": "PEutilization" + }, + { + "MetricName": "pc_write_spec_rate", + "MetricExpr": "PC_WRITE_SPEC / INST_SPEC", + "BriefDescription": "The rate of software change of the PC speculatively executed to overall instructions speclatively executed", + "MetricGroup": "Operation_Mix", + "ScaleUnit": "100percent of operations" + }, + { + "MetricName": "store_percentage", + "MetricExpr": "ST_SPEC / INST_SPEC", + "BriefDescription": "This metric measures store operations as a percentage of operations speculatively executed.", + "MetricGroup": "Operation_Mix", + "ScaleUnit": "100percent of operations" + }, + { + "MetricName": "scalar_fp_percentage", + "MetricExpr": "VFP_SPEC / INST_SPEC", + "BriefDescription": "This metric measures scalar floating point operations as a percentage of operations speculatively executed.", + "MetricGroup": "Operation_Mix", + "ScaleUnit": "100percent of operations" + }, + { + "MetricName": "retired_rate", + "MetricExpr": "OP_RETIRED / OP_SPEC", + "BriefDescription": "Of all the micro-operations issued, what percentage are retired(committed)", + "MetricGroup": "General", + "ScaleUnit": "100%" + }, + { + "MetricName": "wasted", + "MetricExpr": "1 - (OP_RETIRED / (CPU_CYCLES * #slots))", + "BriefDescription": "Of all the micro-operations issued, what proportion are lost", + "MetricGroup": "General", + "ScaleUnit": "100%" + }, + { + "MetricName": "wasted_rate", + "MetricExpr": "1 - OP_RETIRED / OP_SPEC", + "BriefDescription": "Of all the micro-operations issued, what percentage are not retired(committed)", + "MetricGroup": "General", + "ScaleUnit": "100%" + }, + { + "MetricName": "stall_backend_cache_rate", + "MetricExpr": "STALL_BACKEND_CACHE / CPU_CYCLES", + "BriefDescription": "Proportion of cycles stalled and no operations issued to backend and cache miss", + "MetricGroup": "Stall", + "ScaleUnit": "100percent of cycles" + }, + { + "MetricName": "stall_backend_resource_rate", + "MetricExpr": "STALL_BACKEND_RESOURCE / CPU_CYCLES", + "BriefDescription": "Proportion of cycles stalled and no operations issued to backend and resource full", + "MetricGroup": "Stall", + "ScaleUnit": "100percent of cycles" + }, + { + "MetricName": "stall_backend_tlb_rate", + "MetricExpr": "STALL_BACKEND_TLB / CPU_CYCLES", + "BriefDescription": "Proportion of cycles stalled and no operations issued to backend and TLB miss", + "MetricGroup": "Stall", + "ScaleUnit": "100percent of cycles" + }, + { + "MetricName": "stall_frontend_cache_rate", + "MetricExpr": "STALL_FRONTEND_CACHE / CPU_CYCLES", + "BriefDescription": "Proportion of cycles stalled and no ops delivered from frontend and cache miss", + "MetricGroup": "Stall", + "ScaleUnit": "100percent of cycles" + }, + { + "MetricName": "stall_frontend_tlb_rate", + "MetricExpr": "STALL_FRONTEND_TLB / CPU_CYCLES", + "BriefDescription": "Proportion of cycles stalled and no ops delivered from frontend and TLB miss", + "MetricGroup": "Stall", + "ScaleUnit": "100percent of cycles" + }, + { + "MetricName": "dtlb_walk_ratio", + "MetricExpr": "DTLB_WALK / L1D_TLB", + "BriefDescription": "This metric measures the ratio of data TLB Walks to the total number of data TLB accesses. This gives an indication of the effectiveness of the data TLB accesses.", + "MetricGroup": "Miss_Ratio;DTLB_Effectiveness", + "ScaleUnit": "1per TLB access" + }, + { + "MetricName": "itlb_walk_ratio", + "MetricExpr": "ITLB_WALK / L1I_TLB", + "BriefDescription": "This metric measures the ratio of instruction TLB Walks to the total number of instruction TLB accesses. This gives an indication of the effectiveness of the instruction TLB accesses.", + "MetricGroup": "Miss_Ratio;ITLB_Effectiveness", + "ScaleUnit": "1per TLB access" + }, + { + "ArchStdEvent": "backend_bound" + }, + { + "ArchStdEvent": "frontend_bound", + "MetricExpr": "100 - (retired_fraction + slots_lost_misspeculation_fraction + backend_bound)" + }, + { + "MetricName": "slots_lost_misspeculation_fraction", + "MetricExpr": "(OP_SPEC - OP_RETIRED) / (CPU_CYCLES * #slots)", + "BriefDescription": "Fraction of slots lost due to misspeculation", + "DefaultMetricgroupName": "TopdownL1", + "MetricGroup": "Default;TopdownL1", + "ScaleUnit": "100percent of slots" + }, + { + "MetricName": "retired_fraction", + "MetricExpr": "OP_RETIRED / (CPU_CYCLES * #slots)", + "BriefDescription": "Fraction of slots retiring, useful work", + "DefaultMetricgroupName": "TopdownL1", + "MetricGroup": "Default;TopdownL1", + "ScaleUnit": "100percent of slots" + }, + { + "MetricName": "backend_core", + "MetricExpr": "(backend_bound / 100) - backend_memory", + "BriefDescription": "Fraction of slots the CPU was stalled due to backend non-memory subsystem issues", + "MetricGroup": "TopdownL2", + "ScaleUnit": "100%" + }, + { + "MetricName": "backend_memory", + "MetricExpr": "(STALL_BACKEND_TLB + STALL_BACKEND_CACHE) / CPU_CYCLES", + "BriefDescription": "Fraction of slots the CPU was stalled due to backend memory subsystem issues (cache/tlb miss)", + "MetricGroup": "TopdownL2", + "ScaleUnit": "100%" + }, + { + "MetricName": "branch_mispredict", + "MetricExpr": "(BR_MIS_PRED_RETIRED / GPC_FLUSH) * slots_lost_misspeculation_fraction", + "BriefDescription": "Fraction of slots lost due to branch misprediciton", + "MetricGroup": "TopdownL2", + "ScaleUnit": "1percent of slots" + }, + { + "MetricName": "frontend_bandwidth", + "MetricExpr": "frontend_bound - frontend_latency", + "BriefDescription": "Fraction of slots the CPU did not dispatch at full bandwidth - able to dispatch partial slots only (1, 2, or 3 uops)", + "MetricGroup": "TopdownL2", + "ScaleUnit": "1percent of slots" + }, + { + "MetricName": "frontend_latency", + "MetricExpr": "(STALL_FRONTEND - ((STALL_SLOT_FRONTEND - ((frontend_bound / 100) * CPU_CYCLES * #slots)) / #slots)) / CPU_CYCLES", + "BriefDescription": "Fraction of slots the CPU was stalled due to frontend latency issues (cache/tlb miss); nothing to dispatch", + "MetricGroup": "TopdownL2", + "ScaleUnit": "100percent of slots" + }, + { + "MetricName": "other_miss_pred", + "MetricExpr": "slots_lost_misspeculation_fraction - branch_mispredict", + "BriefDescription": "Fraction of slots lost due to other/non-branch misprediction misspeculation", + "MetricGroup": "TopdownL2", + "ScaleUnit": "1percent of slots" + }, + { + "MetricName": "pipe_utilization", + "MetricExpr": "100 * ((IXU_NUM_UOPS_ISSUED + FSU_ISSUED) / (CPU_CYCLES * 6))", + "BriefDescription": "Fraction of execute slots utilized", + "MetricGroup": "TopdownL2", + "ScaleUnit": "1percent of slots" + }, + { + "MetricName": "d_cache_l2_miss_rate", + "MetricExpr": "STALL_BACKEND_MEM / CPU_CYCLES", + "BriefDescription": "Fraction of cycles the CPU was stalled due to data L2 cache miss", + "MetricGroup": "TopdownL3", + "ScaleUnit": "100percent of cycles" + }, + { + "MetricName": "d_cache_miss_rate", + "MetricExpr": "STALL_BACKEND_CACHE / CPU_CYCLES", + "BriefDescription": "Fraction of cycles the CPU was stalled due to data cache miss", + "MetricGroup": "TopdownL3", + "ScaleUnit": "100percent of cycles" + }, + { + "MetricName": "d_tlb_miss_rate", + "MetricExpr": "STALL_BACKEND_TLB / CPU_CYCLES", + "BriefDescription": "Fraction of cycles the CPU was stalled due to data TLB miss", + "MetricGroup": "TopdownL3", + "ScaleUnit": "100percent of cycles" + }, + { + "MetricName": "fsu_pipe_utilization", + "MetricExpr": "FSU_ISSUED / (CPU_CYCLES * 2)", + "BriefDescription": "Fraction of FSU execute slots utilized", + "MetricGroup": "TopdownL3", + "ScaleUnit": "100percent of slots" + }, + { + "MetricName": "i_cache_miss_rate", + "MetricExpr": "STALL_FRONTEND_CACHE / CPU_CYCLES", + "BriefDescription": "Fraction of cycles the CPU was stalled due to instruction cache miss", + "MetricGroup": "TopdownL3", + "ScaleUnit": "100percent of slots" + }, + { + "MetricName": "i_tlb_miss_rate", + "MetricExpr": "STALL_FRONTEND_TLB / CPU_CYCLES", + "BriefDescription": "Fraction of cycles the CPU was stalled due to instruction TLB miss", + "MetricGroup": "TopdownL3", + "ScaleUnit": "100percent of slots" + }, + { + "MetricName": "ixu_pipe_utilization", + "MetricExpr": "IXU_NUM_UOPS_ISSUED / (CPU_CYCLES * #slots)", + "BriefDescription": "Fraction of IXU execute slots utilized", + "MetricGroup": "TopdownL3", + "ScaleUnit": "100percent of slots" + }, + { + "MetricName": "stall_recovery_rate", + "MetricExpr": "IDR_STALL_FLUSH / CPU_CYCLES", + "BriefDescription": "Fraction of cycles the CPU was stalled due to flush recovery", + "MetricGroup": "TopdownL3", + "ScaleUnit": "100percent of slots" + }, + { + "MetricName": "stall_fsu_sched_rate", + "MetricExpr": "IDR_STALL_FSU_SCHED / CPU_CYCLES", + "BriefDescription": "Fraction of cycles the CPU was stalled and FSU was full", + "MetricGroup": "TopdownL4", + "ScaleUnit": "100percent of cycles" + }, + { + "MetricName": "stall_ixu_sched_rate", + "MetricExpr": "IDR_STALL_IXU_SCHED / CPU_CYCLES", + "BriefDescription": "Fraction of cycles the CPU was stalled and IXU was full", + "MetricGroup": "TopdownL4", + "ScaleUnit": "100percent of cycles" + }, + { + "MetricName": "stall_lob_id_rate", + "MetricExpr": "IDR_STALL_LOB_ID / CPU_CYCLES", + "BriefDescription": "Fraction of cycles the CPU was stalled and LOB was full", + "MetricGroup": "TopdownL4", + "ScaleUnit": "100percent of cycles" + }, + { + "MetricName": "stall_rob_id_rate", + "MetricExpr": "IDR_STALL_ROB_ID / CPU_CYCLES", + "BriefDescription": "Fraction of cycles the CPU was stalled and ROB was full", + "MetricGroup": "TopdownL4", + "ScaleUnit": "100percent of cycles" + }, + { + "MetricName": "stall_sob_id_rate", + "MetricExpr": "IDR_STALL_SOB_ID / CPU_CYCLES", + "BriefDescription": "Fraction of cycles the CPU was stalled and SOB was full", + "MetricGroup": "TopdownL4", + "ScaleUnit": "100percent of cycles" + }, + { + "MetricName": "l1d_cache_access_demand", + "MetricExpr": "L1D_CACHE_RW / L1D_CACHE", + "BriefDescription": "L1D cache access - demand", + "MetricGroup": "Cache", + "ScaleUnit": "100percent of cache acceses" + }, + { + "MetricName": "l1d_cache_access_prefetces", + "MetricExpr": "L1D_CACHE_PRFM / L1D_CACHE", + "BriefDescription": "L1D cache access - prefetch", + "MetricGroup": "Cache", + "ScaleUnit": "100percent of cache acceses" + }, + { + "MetricName": "l1d_cache_demand_misses", + "MetricExpr": "L1D_CACHE_REFILL_RW / L1D_CACHE", + "BriefDescription": "L1D cache demand misses", + "MetricGroup": "Cache", + "ScaleUnit": "100percent of cache acceses" + }, + { + "MetricName": "l1d_cache_demand_misses_read", + "MetricExpr": "L1D_CACHE_REFILL_RD / L1D_CACHE", + "BriefDescription": "L1D cache demand misses - read", + "MetricGroup": "Cache", + "ScaleUnit": "100percent of cache acceses" + }, + { + "MetricName": "l1d_cache_demand_misses_write", + "MetricExpr": "L1D_CACHE_REFILL_WR / L1D_CACHE", + "BriefDescription": "L1D cache demand misses - write", + "MetricGroup": "Cache", + "ScaleUnit": "100percent of cache acceses" + }, + { + "MetricName": "l1d_cache_prefetch_misses", + "MetricExpr": "L1D_CACHE_REFILL_PRFM / L1D_CACHE", + "BriefDescription": "L1D cache prefetch misses", + "MetricGroup": "Cache", + "ScaleUnit": "100percent of cache acceses" + }, + { + "MetricName": "ase_scalar_mix", + "MetricExpr": "ASE_SCALAR_SPEC / OP_SPEC", + "BriefDescription": "Proportion of advanced SIMD data processing operations (excluding DP_SPEC/LD_SPEC) scalar operations", + "MetricGroup": "Instructions", + "ScaleUnit": "100percent of cache acceses" + }, + { + "MetricName": "ase_vector_mix", + "MetricExpr": "ASE_VECTOR_SPEC / OP_SPEC", + "BriefDescription": "Proportion of advanced SIMD data processing operations (excluding DP_SPEC/LD_SPEC) vector operations", + "MetricGroup": "Instructions", + "ScaleUnit": "100percent of cache acceses" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/mmu.json b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/mmu.json new file mode 100644 index 000000000000..66d83b680651 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/mmu.json @@ -0,0 +1,170 @@ +[ + { + "PublicDescription": "Level 2 data translation buffer allocation", + "EventCode": "0xD800", + "EventName": "MMU_D_OTB_ALLOC", + "BriefDescription": "Level 2 data translation buffer allocation" + }, + { + "PublicDescription": "Data TLB translation cache hit on S1L2 walk cache entry", + "EventCode": "0xd801", + "EventName": "MMU_D_TRANS_CACHE_HIT_S1L2_WALK", + "BriefDescription": "Data TLB translation cache hit on S1L2 walk cache entry" + }, + { + "PublicDescription": "Data TLB translation cache hit on S1L1 walk cache entry", + "EventCode": "0xd802", + "EventName": "MMU_D_TRANS_CACHE_HIT_S1L1_WALK", + "BriefDescription": "Data TLB translation cache hit on S1L1 walk cache entry" + }, + { + "PublicDescription": "Data TLB translation cache hit on S1L0 walk cache entry", + "EventCode": "0xd803", + "EventName": "MMU_D_TRANS_CACHE_HIT_S1L0_WALK", + "BriefDescription": "Data TLB translation cache hit on S1L0 walk cache entry" + }, + { + "PublicDescription": "Data TLB translation cache hit on S2L2 walk cache entry", + "EventCode": "0xd804", + "EventName": "MMU_D_TRANS_CACHE_HIT_S2L2_WALK", + "BriefDescription": "Data TLB translation cache hit on S2L2 walk cache entry" + }, + { + "PublicDescrition": "Data TLB translation cache hit on S2L1 walk cache entry", + "EventCode": "0xd805", + "EventName": "MMU_D_TRANS_CACHE_HIT_S2L1_WALK", + "BriefDescription": "Data TLB translation cache hit on S2L1 walk cache entry" + }, + { + "PublicDescrition": "Data TLB translation cache hit on S2L0 walk cache entry", + "EventCode": "0xd806", + "EventName": "MMU_D_TRANS_CACHE_HIT_S2L0_WALK", + "BriefDescription": "Data TLB translation cache hit on S2L0 walk cache entry" + }, + { + "PublicDescrition": "Data-side S1 page walk cache lookup", + "EventCode": "0xd807", + "EventName": "MMU_D_S1_WALK_CACHE_LOOKUP", + "BriefDescription": "Data-side S1 page walk cache lookup" + }, + { + "PublicDescrition": "Data-side S1 page walk cache refill", + "EventCode": "0xd808", + "EventName": "MMU_D_S1_WALK_CACHE_REFILL", + "BriefDescription": "Data-side S1 page walk cache refill" + }, + { + "PublicDescrition": "Data-side S2 page walk cache lookup", + "EventCode": "0xd809", + "EventName": "MMU_D_S2_WALK_CACHE_LOOKUP", + "BriefDescription": "Data-side S2 page walk cache lookup" + }, + { + "PublicDescrition": "Data-side S2 page walk cache refill", + "EventCode": "0xd80a", + "EventName": "MMU_D_S2_WALK_CACHE_REFILL", + "BriefDescription": "Data-side S2 page walk cache refill" + }, + { + "PublicDescription": "Data-side S1 table walk fault", + "EventCode": "0xD80B", + "EventName": "MMU_D_S1_WALK_FAULT", + "BriefDescription": "Data-side S1 table walk fault" + }, + { + "PublicDescription": "Data-side S2 table walk fault", + "EventCode": "0xD80C", + "EventName": "MMU_D_S2_WALK_FAULT", + "BriefDescription": "Data-side S2 table walk fault" + }, + { + "PublicDescription": "Data-side table walk steps or descriptor fetches", + "EventCode": "0xD80D", + "EventName": "MMU_D_WALK_STEPS", + "BriefDescription": "Data-side table walk steps or descriptor fetches" + }, + { + "PublicDescription": "Level 2 instruction translation buffer allocation", + "EventCode": "0xD900", + "EventName": "MMU_I_OTB_ALLOC", + "BriefDescription": "Level 2 instruction translation buffer allocation" + }, + { + "PublicDescrition": "Instruction TLB translation cache hit on S1L2 walk cache entry", + "EventCode": "0xd901", + "EventName": "MMU_I_TRANS_CACHE_HIT_S1L2_WALK", + "BriefDescription": "Instruction TLB translation cache hit on S1L2 walk cache entry" + }, + { + "PublicDescrition": "Instruction TLB translation cache hit on S1L1 walk cache entry", + "EventCode": "0xd902", + "EventName": "MMU_I_TRANS_CACHE_HIT_S1L1_WALK", + "BriefDescription": "Instruction TLB translation cache hit on S1L1 walk cache entry" + }, + { + "PublicDescrition": "Instruction TLB translation cache hit on S1L0 walk cache entry", + "EventCode": "0xd903", + "EventName": "MMU_I_TRANS_CACHE_HIT_S1L0_WALK", + "BriefDescription": "Instruction TLB translation cache hit on S1L0 walk cache entry" + }, + { + "PublicDescrition": "Instruction TLB translation cache hit on S2L2 walk cache entry", + "EventCode": "0xd904", + "EventName": "MMU_I_TRANS_CACHE_HIT_S2L2_WALK", + "BriefDescription": "Instruction TLB translation cache hit on S2L2 walk cache entry" + }, + { + "PublicDescrition": "Instruction TLB translation cache hit on S2L1 walk cache entry", + "EventCode": "0xd905", + "EventName": "MMU_I_TRANS_CACHE_HIT_S2L1_WALK", + "BriefDescription": "Instruction TLB translation cache hit on S2L1 walk cache entry" + }, + { + "PublicDescrition": "Instruction TLB translation cache hit on S2L0 walk cache entry", + "EventCode": "0xd906", + "EventName": "MMU_I_TRANS_CACHE_HIT_S2L0_WALK", + "BriefDescription": "Instruction TLB translation cache hit on S2L0 walk cache entry" + }, + { + "PublicDescrition": "Instruction-side S1 page walk cache lookup", + "EventCode": "0xd907", + "EventName": "MMU_I_S1_WALK_CACHE_LOOKUP", + "BriefDescription": "Instruction-side S1 page walk cache lookup" + }, + { + "PublicDescrition": "Instruction-side S1 page walk cache refill", + "EventCode": "0xd908", + "EventName": "MMU_I_S1_WALK_CACHE_REFILL", + "BriefDescription": "Instruction-side S1 page walk cache refill" + }, + { + "PublicDescrition": "Instruction-side S2 page walk cache lookup", + "EventCode": "0xd909", + "EventName": "MMU_I_S2_WALK_CACHE_LOOKUP", + "BriefDescription": "Instruction-side S2 page walk cache lookup" + }, + { + "PublicDescrition": "Instruction-side S2 page walk cache refill", + "EventCode": "0xd90a", + "EventName": "MMU_I_S2_WALK_CACHE_REFILL", + "BriefDescription": "Instruction-side S2 page walk cache refill" + }, + { + "PublicDescription": "Instruction-side S1 table walk fault", + "EventCode": "0xD90B", + "EventName": "MMU_I_S1_WALK_FAULT", + "BriefDescription": "Instruction-side S1 table walk fault" + }, + { + "PublicDescription": "Instruction-side S2 table walk fault", + "EventCode": "0xD90C", + "EventName": "MMU_I_S2_WALK_FAULT", + "BriefDescription": "Instruction-side S2 table walk fault" + }, + { + "PublicDescription": "Instruction-side table walk steps or descriptor fetches", + "EventCode": "0xD90D", + "EventName": "MMU_I_WALK_STEPS", + "BriefDescription": "Instruction-side table walk steps or descriptor fetches" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/pipeline.json b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/pipeline.json new file mode 100644 index 000000000000..2fb2d1f183fc --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/pipeline.json @@ -0,0 +1,41 @@ +[ + { + "ArchStdEvent": "STALL_FRONTEND", + "Errata": "Errata AC03_CPU_29", + "BriefDescription": "Impacted by errata, use metrics instead -" + }, + { + "ArchStdEvent": "STALL_BACKEND" + }, + { + "ArchStdEvent": "STALL", + "Errata": "Errata AC03_CPU_29", + "BriefDescription": "Impacted by errata, use metrics instead -" + }, + { + "ArchStdEvent": "STALL_SLOT_BACKEND" + }, + { + "ArchStdEvent": "STALL_SLOT_FRONTEND", + "Errata": "Errata AC03_CPU_29", + "BriefDescription": "Impacted by errata, use metrics instead -" + }, + { + "ArchStdEvent": "STALL_SLOT" + }, + { + "ArchStdEvent": "STALL_BACKEND_MEM" + }, + { + "PublicDescription": "Frontend stall cycles, TLB", + "EventCode": "0x815c", + "EventName": "STALL_FRONTEND_TLB", + "BriefDescription": "Frontend stall cycles, TLB" + }, + { + "PublicDescription": "Backend stall cycles, TLB", + "EventCode": "0x8167", + "EventName": "STALL_BACKEND_TLB", + "BriefDescription": "Backend stall cycles, TLB" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/spe.json b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/spe.json new file mode 100644 index 000000000000..20f2165c85fe --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/spe.json @@ -0,0 +1,14 @@ +[ + { + "ArchStdEvent": "SAMPLE_POP" + }, + { + "ArchStdEvent": "SAMPLE_FEED" + }, + { + "ArchStdEvent": "SAMPLE_FILTRATE" + }, + { + "ArchStdEvent": "SAMPLE_COLLISION" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/mapfile.csv b/tools/perf/pmu-events/arch/arm64/mapfile.csv index 32674ddd2b63..d286354d8753 100644 --- a/tools/perf/pmu-events/arch/arm64/mapfile.csv +++ b/tools/perf/pmu-events/arch/arm64/mapfile.csv @@ -42,3 +42,4 @@ 0x00000000480fd010,v1,hisilicon/hip08,core 0x00000000500f0000,v1,ampere/emag,core 0x00000000c00fac30,v1,ampere/ampereone,core +0x00000000c00fac40,v1,ampere/ampereonex,core