2019-05-19 20:07:45 +08:00
|
|
|
# SPDX-License-Identifier: GPL-2.0-only
|
2015-07-06 19:23:53 +08:00
|
|
|
#
|
|
|
|
# Performance Monitor Drivers
|
|
|
|
#
|
|
|
|
|
|
|
|
menu "Performance monitor support"
|
2017-06-13 20:45:51 +08:00
|
|
|
depends on PERF_EVENTS
|
2015-07-06 19:23:53 +08:00
|
|
|
|
2018-02-16 02:51:42 +08:00
|
|
|
config ARM_CCI_PMU
|
2018-05-14 21:34:53 +08:00
|
|
|
tristate "ARM CCI PMU driver"
|
|
|
|
depends on (ARM && CPU_V7) || ARM64
|
2018-02-16 02:51:42 +08:00
|
|
|
select ARM_CCI
|
2018-05-14 21:34:53 +08:00
|
|
|
help
|
|
|
|
Support for PMU events monitoring on the ARM CCI (Cache Coherent
|
|
|
|
Interconnect) family of products.
|
|
|
|
|
|
|
|
If compiled as a module, it will be called arm-cci.
|
2018-02-16 02:51:42 +08:00
|
|
|
|
|
|
|
config ARM_CCI400_PMU
|
2018-05-14 21:34:53 +08:00
|
|
|
bool "support CCI-400"
|
|
|
|
default y
|
|
|
|
depends on ARM_CCI_PMU
|
2018-02-16 02:51:42 +08:00
|
|
|
select ARM_CCI400_COMMON
|
|
|
|
help
|
2018-05-14 21:34:53 +08:00
|
|
|
CCI-400 provides 4 independent event counters counting events related
|
|
|
|
to the connected slave/master interfaces, plus a cycle counter.
|
2018-02-16 02:51:42 +08:00
|
|
|
|
|
|
|
config ARM_CCI5xx_PMU
|
2018-05-14 21:34:53 +08:00
|
|
|
bool "support CCI-500/CCI-550"
|
|
|
|
default y
|
|
|
|
depends on ARM_CCI_PMU
|
2018-02-16 02:51:42 +08:00
|
|
|
help
|
2018-05-14 21:34:53 +08:00
|
|
|
CCI-500/CCI-550 both provide 8 independent event counters, which can
|
|
|
|
count events pertaining to the slave/master interfaces as well as the
|
|
|
|
internal events to the CCI.
|
2018-02-16 02:51:42 +08:00
|
|
|
|
2018-02-16 02:51:41 +08:00
|
|
|
config ARM_CCN
|
|
|
|
tristate "ARM CCN driver support"
|
2021-10-01 18:48:46 +08:00
|
|
|
depends on ARM || ARM64 || COMPILE_TEST
|
2018-02-16 02:51:41 +08:00
|
|
|
help
|
|
|
|
PMU (perf) driver supporting the ARM CCN (Cache Coherent Network)
|
|
|
|
interconnect.
|
|
|
|
|
2020-09-18 21:28:38 +08:00
|
|
|
config ARM_CMN
|
|
|
|
tristate "Arm CMN-600 PMU support"
|
2021-12-03 19:44:52 +08:00
|
|
|
depends on ARM64 || COMPILE_TEST
|
2020-09-18 21:28:38 +08:00
|
|
|
help
|
|
|
|
Support for PMU events monitoring on the Arm CMN-600 Coherent Mesh
|
|
|
|
Network interconnect.
|
|
|
|
|
2015-07-06 19:23:53 +08:00
|
|
|
config ARM_PMU
|
2017-06-13 20:45:51 +08:00
|
|
|
depends on ARM || ARM64
|
2015-07-06 19:23:53 +08:00
|
|
|
bool "ARM PMU framework"
|
|
|
|
default y
|
|
|
|
help
|
|
|
|
Say y if you want to use CPU performance monitors on ARM-based
|
|
|
|
systems.
|
|
|
|
|
2022-02-19 08:46:54 +08:00
|
|
|
config RISCV_PMU
|
|
|
|
depends on RISCV
|
|
|
|
bool "RISC-V PMU framework"
|
|
|
|
default y
|
|
|
|
help
|
|
|
|
Say y if you want to use CPU performance monitors on RISCV-based
|
|
|
|
systems. This provides the core PMU framework that abstracts common
|
|
|
|
PMU functionalities in a core library so that different PMU drivers
|
|
|
|
can reuse it.
|
|
|
|
|
2022-02-19 08:46:55 +08:00
|
|
|
config RISCV_PMU_LEGACY
|
|
|
|
depends on RISCV_PMU
|
|
|
|
bool "RISC-V legacy PMU implementation"
|
|
|
|
default y
|
|
|
|
help
|
|
|
|
Say y if you want to use the legacy CPU performance monitor
|
|
|
|
implementation on RISC-V based systems. This only allows counting
|
|
|
|
of cycle/instruction counter and doesn't support counter overflow,
|
|
|
|
or programmable counters. It will be removed in future.
|
|
|
|
|
2022-02-19 08:46:57 +08:00
|
|
|
config RISCV_PMU_SBI
|
|
|
|
depends on RISCV_PMU && RISCV_SBI
|
|
|
|
bool "RISC-V PMU based on SBI PMU extension"
|
|
|
|
default y
|
|
|
|
help
|
|
|
|
Say y if you want to use the CPU performance monitor
|
|
|
|
using SBI PMU extension on RISC-V based systems. This option provides
|
|
|
|
full perf feature support i.e. counter overflow, privilege mode
|
|
|
|
filtering, counter configuration.
|
|
|
|
|
2017-04-11 16:39:55 +08:00
|
|
|
config ARM_PMU_ACPI
|
|
|
|
depends on ARM_PMU && ACPI
|
|
|
|
def_bool y
|
|
|
|
|
2019-03-26 23:17:51 +08:00
|
|
|
config ARM_SMMU_V3_PMU
|
|
|
|
tristate "ARM SMMUv3 Performance Monitors Extension"
|
2023-07-06 17:23:05 +08:00
|
|
|
depends on ARM64 || (COMPILE_TEST && 64BIT)
|
2022-11-11 21:54:40 +08:00
|
|
|
depends on GENERIC_MSI_IRQ
|
2019-03-26 23:17:51 +08:00
|
|
|
help
|
|
|
|
Provides support for the ARM SMMUv3 Performance Monitor Counter
|
|
|
|
Groups (PMCG), which provide monitoring of transactions passing
|
|
|
|
through the SMMU and allow the resulting information to be filtered
|
|
|
|
based on the Stream ID of the corresponding master.
|
|
|
|
|
2023-03-18 03:50:20 +08:00
|
|
|
config ARM_PMUV3
|
2023-03-18 03:50:26 +08:00
|
|
|
depends on HW_PERF_EVENTS && ((ARM && CPU_V7) || ARM64)
|
2023-03-18 03:50:20 +08:00
|
|
|
bool "ARM PMUv3 support" if !ARM64
|
2023-03-18 03:50:26 +08:00
|
|
|
default ARM64
|
2023-03-18 03:50:20 +08:00
|
|
|
help
|
|
|
|
Say y if you want to use the ARM performance monitor unit (PMU)
|
|
|
|
version 3. The PMUv3 is the CPU performance monitors on ARMv8
|
|
|
|
(aarch32 and aarch64) systems that implement the PMUv3
|
|
|
|
architecture.
|
|
|
|
|
2018-01-02 19:25:33 +08:00
|
|
|
config ARM_DSU_PMU
|
|
|
|
tristate "ARM DynamIQ Shared Unit (DSU) PMU"
|
|
|
|
depends on ARM64
|
|
|
|
help
|
|
|
|
Provides support for performance monitor unit in ARM DynamIQ Shared
|
|
|
|
Unit (DSU). The DSU integrates one or more cores with an L3 memory
|
|
|
|
system, control logic. The PMU allows counting various events related
|
|
|
|
to DSU.
|
|
|
|
|
2019-05-02 02:43:29 +08:00
|
|
|
config FSL_IMX8_DDR_PMU
|
|
|
|
tristate "Freescale i.MX8 DDR perf monitor"
|
2021-10-01 18:48:46 +08:00
|
|
|
depends on ARCH_MXC || COMPILE_TEST
|
2019-05-02 02:43:29 +08:00
|
|
|
help
|
|
|
|
Provides support for the DDR performance monitor in i.MX8, which
|
|
|
|
can give information about memory throughput and other related
|
|
|
|
events.
|
|
|
|
|
drivers/perf: imx_ddr: Add support for NXP i.MX9 SoC DDRC PMU driver
Add ddr performance monitor support for i.MX93.
There are 11 counters for ddr performance events.
- Counter 0 is a 64-bit counter that counts only clock cycles.
- Counter 1-10 are 32-bit counters that can monitor counter-specific
events in addition to counting reference events.
For example:
perf stat -a -e imx9_ddr0/ddrc_pm_1,counter=1/,imx9_ddr0/ddrc_pm_2,counter=2/ ls
Besides, this ddr pmu support AXI filter capability. It's implemented as
counter-specific events. It now supports read transaction, write transaction
and read beat events which corresponding respecitively to counter 2, 3 and 4.
axi_mask and axi_id need to be as event parameters.
For example:
perf stat -a -I 1000 -e imx9_ddr0/eddrtq_pm_rd_trans_filt,counter=2,axi_mask=ID_MASK,axi_id=ID/
perf stat -a -I 1000 -e imx9_ddr0/eddrtq_pm_wr_trans_filt,counter=3,axi_mask=ID_MASK,axi_id=ID/
perf stat -a -I 1000 -e imx9_ddr0/eddrtq_pm_rd_beat_filt,counter=4,axi_mask=ID_MASK,axi_id=ID/
Signed-off-by: Xu Yang <xu.yang_2@nxp.com>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Link: https://lore.kernel.org/r/20230418102910.2065651-1-xu.yang_2@nxp.com
[will: Remove redundant error message on platform_get_irq() failure]
Signed-off-by: Will Deacon <will@kernel.org>
2023-04-18 18:29:08 +08:00
|
|
|
config FSL_IMX9_DDR_PMU
|
|
|
|
tristate "Freescale i.MX9 DDR perf monitor"
|
|
|
|
depends on ARCH_MXC
|
|
|
|
help
|
|
|
|
Provides support for the DDR performance monitor in i.MX9, which
|
|
|
|
can give information about memory throughput and other related
|
|
|
|
events.
|
|
|
|
|
2017-02-08 02:14:04 +08:00
|
|
|
config QCOM_L2_PMU
|
|
|
|
bool "Qualcomm Technologies L2-cache PMU"
|
2017-06-13 20:45:51 +08:00
|
|
|
depends on ARCH_QCOM && ARM64 && ACPI
|
2020-07-03 16:49:41 +08:00
|
|
|
select QCOM_KRYO_L2_ACCESSORS
|
2017-02-08 02:14:04 +08:00
|
|
|
help
|
|
|
|
Provides support for the L2 cache performance monitor unit (PMU)
|
|
|
|
in Qualcomm Technologies processors.
|
|
|
|
Adds the L2 cache PMU into the perf events subsystem for
|
|
|
|
monitoring L2 cache events.
|
|
|
|
|
2017-04-01 02:13:43 +08:00
|
|
|
config QCOM_L3_PMU
|
|
|
|
bool "Qualcomm Technologies L3-cache PMU"
|
2017-06-13 20:45:51 +08:00
|
|
|
depends on ARCH_QCOM && ARM64 && ACPI
|
2017-04-01 02:13:43 +08:00
|
|
|
select QCOM_IRQ_COMBINER
|
|
|
|
help
|
|
|
|
Provides support for the L3 cache performance monitor unit (PMU)
|
|
|
|
in Qualcomm Technologies processors.
|
|
|
|
Adds the L3 cache PMU into the perf events subsystem for
|
|
|
|
monitoring L3 cache events.
|
|
|
|
|
2018-12-06 19:51:31 +08:00
|
|
|
config THUNDERX2_PMU
|
|
|
|
tristate "Cavium ThunderX2 SoC PMU UNCORE"
|
2021-10-01 18:48:46 +08:00
|
|
|
depends on ARCH_THUNDER2 || COMPILE_TEST
|
|
|
|
depends on NUMA && ACPI
|
2018-12-06 19:51:31 +08:00
|
|
|
default m
|
|
|
|
help
|
|
|
|
Provides support for ThunderX2 UNCORE events.
|
|
|
|
The SoC has PMU support in its L3 cache controller (L3C) and
|
|
|
|
in the DDR4 Memory Controller (DMC).
|
|
|
|
|
2016-07-16 01:38:04 +08:00
|
|
|
config XGENE_PMU
|
2021-10-01 18:48:46 +08:00
|
|
|
depends on ARCH_XGENE || (COMPILE_TEST && 64BIT)
|
2016-07-16 01:38:04 +08:00
|
|
|
bool "APM X-Gene SoC PMU"
|
|
|
|
default n
|
|
|
|
help
|
|
|
|
Say y if you want to use APM X-Gene SoC performance monitors.
|
|
|
|
|
drivers/perf: Add support for ARMv8.2 Statistical Profiling Extension
The ARMv8.2 architecture introduces the optional Statistical Profiling
Extension (SPE).
SPE can be used to profile a population of operations in the CPU pipeline
after instruction decode. These are either architected instructions (i.e.
a dynamic instruction trace) or CPU-specific uops and the choice is fixed
statically in the hardware and advertised to userspace via caps/. Sampling
is controlled using a sampling interval, similar to a regular PMU counter,
but also with an optional random perturbation to avoid falling into patterns
where you continuously profile the same instruction in a hot loop.
After each operation is decoded, the interval counter is decremented. When
it hits zero, an operation is chosen for profiling and tracked within the
pipeline until it retires. Along the way, information such as TLB lookups,
cache misses, time spent to issue etc is captured in the form of a sample.
The sample is then filtered according to certain criteria (e.g. load
latency) that can be specified in the event config (described under
format/) and, if the sample satisfies the filter, it is written out to
memory as a record, otherwise it is discarded. Only one operation can
be sampled at a time.
The in-memory buffer is linear and virtually addressed, raising an
interrupt when it fills up. The PMU driver handles these interrupts to
give the appearance of a ring buffer, as expected by the AUX code.
The in-memory trace-like format is self-describing (though not parseable
in reverse) and written as a series of records, with each record
corresponding to a sample and consisting of a sequence of packets. These
packets are defined by the architecture, although some have CPU-specific
fields for recording information specific to the microarchitecture.
As a simple example, a record generated for a branch instruction may
consist of the following packets:
0 (Address) : Virtual PC of the branch instruction
1 (Type) : Conditional direct branch
2 (Counter) : Number of cycles taken from Dispatch to Issue
3 (Address) : Virtual branch target + condition flags
4 (Counter) : Number of cycles taken from Dispatch to Complete
5 (Events) : Mispredicted as not-taken
6 (END) : End of record
It is also possible to toggle properties such as timestamp packets in
each record.
This patch adds support for SPE in the form of a new perf driver.
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
2016-09-22 18:36:32 +08:00
|
|
|
config ARM_SPE_PMU
|
|
|
|
tristate "Enable support for the ARMv8.2 Statistical Profiling Extension"
|
2018-05-22 23:54:04 +08:00
|
|
|
depends on ARM64
|
drivers/perf: Add support for ARMv8.2 Statistical Profiling Extension
The ARMv8.2 architecture introduces the optional Statistical Profiling
Extension (SPE).
SPE can be used to profile a population of operations in the CPU pipeline
after instruction decode. These are either architected instructions (i.e.
a dynamic instruction trace) or CPU-specific uops and the choice is fixed
statically in the hardware and advertised to userspace via caps/. Sampling
is controlled using a sampling interval, similar to a regular PMU counter,
but also with an optional random perturbation to avoid falling into patterns
where you continuously profile the same instruction in a hot loop.
After each operation is decoded, the interval counter is decremented. When
it hits zero, an operation is chosen for profiling and tracked within the
pipeline until it retires. Along the way, information such as TLB lookups,
cache misses, time spent to issue etc is captured in the form of a sample.
The sample is then filtered according to certain criteria (e.g. load
latency) that can be specified in the event config (described under
format/) and, if the sample satisfies the filter, it is written out to
memory as a record, otherwise it is discarded. Only one operation can
be sampled at a time.
The in-memory buffer is linear and virtually addressed, raising an
interrupt when it fills up. The PMU driver handles these interrupts to
give the appearance of a ring buffer, as expected by the AUX code.
The in-memory trace-like format is self-describing (though not parseable
in reverse) and written as a series of records, with each record
corresponding to a sample and consisting of a sequence of packets. These
packets are defined by the architecture, although some have CPU-specific
fields for recording information specific to the microarchitecture.
As a simple example, a record generated for a branch instruction may
consist of the following packets:
0 (Address) : Virtual PC of the branch instruction
1 (Type) : Conditional direct branch
2 (Counter) : Number of cycles taken from Dispatch to Issue
3 (Address) : Virtual branch target + condition flags
4 (Counter) : Number of cycles taken from Dispatch to Complete
5 (Events) : Mispredicted as not-taken
6 (END) : End of record
It is also possible to toggle properties such as timestamp packets in
each record.
This patch adds support for SPE in the form of a new perf driver.
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
2016-09-22 18:36:32 +08:00
|
|
|
help
|
|
|
|
Enable perf support for the ARMv8.2 Statistical Profiling
|
|
|
|
Extension, which provides periodic sampling of operations in
|
|
|
|
the CPU pipeline and reports this via the perf AUX interface.
|
|
|
|
|
2020-11-05 03:30:43 +08:00
|
|
|
config ARM_DMC620_PMU
|
|
|
|
tristate "Enable PMU support for the ARM DMC-620 memory controller"
|
|
|
|
depends on (ARM64 && ACPI) || COMPILE_TEST
|
|
|
|
help
|
|
|
|
Support for PMU events monitoring on the ARM DMC-620 memory
|
|
|
|
controller.
|
|
|
|
|
2021-11-15 12:35:05 +08:00
|
|
|
config MARVELL_CN10K_TAD_PMU
|
|
|
|
tristate "Marvell CN10K LLC-TAD PMU"
|
2022-01-12 22:00:47 +08:00
|
|
|
depends on ARCH_THUNDER || (COMPILE_TEST && 64BIT)
|
2021-11-15 12:35:05 +08:00
|
|
|
help
|
|
|
|
Provides support for Last-Level cache Tag-and-data Units (LLC-TAD)
|
|
|
|
performance monitors on CN10K family silicons.
|
|
|
|
|
2022-02-09 02:56:04 +08:00
|
|
|
config APPLE_M1_CPU_PMU
|
|
|
|
bool "Apple M1 CPU PMU support"
|
|
|
|
depends on ARM_PMU && ARCH_APPLE
|
|
|
|
help
|
|
|
|
Provides support for the non-architectural CPU PMUs present on
|
|
|
|
the Apple M1 SoCs and derivatives.
|
|
|
|
|
2022-08-18 11:18:21 +08:00
|
|
|
config ALIBABA_UNCORE_DRW_PMU
|
|
|
|
tristate "Alibaba T-Head Yitian 710 DDR Sub-system Driveway PMU driver"
|
2022-09-27 21:37:16 +08:00
|
|
|
depends on (ARM64 && ACPI) || COMPILE_TEST
|
2022-08-18 11:18:21 +08:00
|
|
|
help
|
|
|
|
Support for Driveway PMU events monitoring on Yitian 710 DDR
|
|
|
|
Sub-system.
|
|
|
|
|
2020-05-07 10:58:25 +08:00
|
|
|
source "drivers/perf/hisilicon/Kconfig"
|
|
|
|
|
2022-02-11 12:53:46 +08:00
|
|
|
config MARVELL_CN10K_DDR_PMU
|
|
|
|
tristate "Enable MARVELL CN10K DRAM Subsystem(DSS) PMU Support"
|
2022-03-29 21:10:10 +08:00
|
|
|
depends on ARCH_THUNDER || (COMPILE_TEST && 64BIT)
|
2022-02-11 12:53:46 +08:00
|
|
|
help
|
|
|
|
Enable perf support for Marvell DDR Performance monitoring
|
|
|
|
event on CN10K platform.
|
|
|
|
|
2022-11-12 06:23:28 +08:00
|
|
|
source "drivers/perf/arm_cspmu/Kconfig"
|
|
|
|
|
2022-11-21 10:15:58 +08:00
|
|
|
source "drivers/perf/amlogic/Kconfig"
|
|
|
|
|
perf: CXL Performance Monitoring Unit driver
CXL rev 3.0 introduces a standard performance monitoring hardware
block to CXL. Instances are discovered using CXL Register Locator DVSEC
entries. Each CXL component may have multiple PMUs.
This initial driver supports a subset of types of counter.
It supports counters that are either fixed or configurable, but requires
that they support the ability to freeze and write value whilst frozen.
Development done with QEMU model which will be posted shortly.
Example:
$ perf stat -a -e cxl_pmu_mem0.0/h2d_req_snpcur/ -e cxl_pmu_mem0.0/h2d_req_snpdata/ -e cxl_pmu_mem0.0/clock_ticks/ sleep 1
Performance counter stats for 'system wide':
96,757,023,244,321 cxl_pmu_mem0.0/h2d_req_snpcur/
96,757,023,244,365 cxl_pmu_mem0.0/h2d_req_snpdata/
193,514,046,488,653 cxl_pmu_mem0.0/clock_ticks/
1.090539600 seconds time elapsed
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Reviewed-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Link: https://lore.kernel.org/r/20230526095824.16336-5-Jonathan.Cameron@huawei.com
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
2023-05-26 17:58:23 +08:00
|
|
|
config CXL_PMU
|
|
|
|
tristate "CXL Performance Monitoring Unit"
|
|
|
|
depends on CXL_BUS
|
|
|
|
help
|
|
|
|
Support performance monitoring as defined in CXL rev 3.0
|
|
|
|
section 13.2: Performance Monitoring. CXL components may have
|
|
|
|
one or more CXL Performance Monitoring Units (CPMUs).
|
|
|
|
|
|
|
|
Say 'y/m' to enable a driver that will attach to performance
|
|
|
|
monitoring units and provide standard perf based interfaces.
|
|
|
|
|
|
|
|
If unsure say 'm'.
|
|
|
|
|
2015-07-06 19:23:53 +08:00
|
|
|
endmenu
|