From 59530adc3f1b802c275f0197fc3ac72dc014267a Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Tue, 18 Dec 2012 04:06:37 +0000 Subject: [PATCH 1/7] ARM: Define CPU part numbers and implementors Define implementor IDs, part numbers and Xscale architecture versions in cputype.h. Also create accessor functions for reading the implementor, part number, and Xscale architecture versions from the CPUID regiser. Signed-off-by: Christoffer Dall Signed-off-by: Will Deacon --- arch/arm/include/asm/cputype.h | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/arch/arm/include/asm/cputype.h b/arch/arm/include/asm/cputype.h index a59dcb5ab5fc..ad41ec2471e8 100644 --- a/arch/arm/include/asm/cputype.h +++ b/arch/arm/include/asm/cputype.h @@ -64,6 +64,24 @@ extern unsigned int processor_id; #define read_cpuid_ext(reg) 0 #endif +#define ARM_CPU_IMP_ARM 0x41 +#define ARM_CPU_IMP_INTEL 0x69 + +#define ARM_CPU_PART_ARM1136 0xB360 +#define ARM_CPU_PART_ARM1156 0xB560 +#define ARM_CPU_PART_ARM1176 0xB760 +#define ARM_CPU_PART_ARM11MPCORE 0xB020 +#define ARM_CPU_PART_CORTEX_A8 0xC080 +#define ARM_CPU_PART_CORTEX_A9 0xC090 +#define ARM_CPU_PART_CORTEX_A5 0xC050 +#define ARM_CPU_PART_CORTEX_A15 0xC0F0 +#define ARM_CPU_PART_CORTEX_A7 0xC070 + +#define ARM_CPU_XSCALE_ARCH_MASK 0xe000 +#define ARM_CPU_XSCALE_ARCH_V1 0x2000 +#define ARM_CPU_XSCALE_ARCH_V2 0x4000 +#define ARM_CPU_XSCALE_ARCH_V3 0x6000 + /* * The CPU ID never changes at run time, so we might as well tell the * compiler that it's constant. Use this function to read the CPU ID @@ -74,6 +92,21 @@ static inline unsigned int __attribute_const__ read_cpuid_id(void) return read_cpuid(CPUID_ID); } +static inline unsigned int __attribute_const__ read_cpuid_implementor(void) +{ + return (read_cpuid_id() & 0xFF000000) >> 24; +} + +static inline unsigned int __attribute_const__ read_cpuid_part_number(void) +{ + return read_cpuid_id() & 0xFFF0; +} + +static inline unsigned int __attribute_const__ xscale_cpu_arch_version(void) +{ + return read_cpuid_part_number() & ARM_CPU_XSCALE_ARCH_MASK; +} + static inline unsigned int __attribute_const__ read_cpuid_cachetype(void) { return read_cpuid(CPUID_CACHETYPE); From 3b953c9c159e99f5465dc05dc90ca405fe9a4436 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Tue, 18 Dec 2012 04:06:38 +0000 Subject: [PATCH 2/7] ARM: Use implementor and part defines from cputype.h Instead of decoding implementor numbers, part numbers and Xscale architecture masks inline in the pmu probing function, use defines and accessor functions from cputype.h, which can also be shared by other subsystems, such as KVM. Signed-off-by: Christoffer Dall Signed-off-by: Will Deacon --- arch/arm/kernel/perf_event_cpu.c | 34 +++++++++++++++----------------- 1 file changed, 16 insertions(+), 18 deletions(-) diff --git a/arch/arm/kernel/perf_event_cpu.c b/arch/arm/kernel/perf_event_cpu.c index 5f6620684e25..efa52954d628 100644 --- a/arch/arm/kernel/perf_event_cpu.c +++ b/arch/arm/kernel/perf_event_cpu.c @@ -201,48 +201,46 @@ static struct platform_device_id cpu_pmu_plat_device_ids[] = { static int probe_current_pmu(struct arm_pmu *pmu) { int cpu = get_cpu(); - unsigned long cpuid = read_cpuid_id(); - unsigned long implementor = (cpuid & 0xFF000000) >> 24; - unsigned long part_number = (cpuid & 0xFFF0); + unsigned long implementor = read_cpuid_implementor(); + unsigned long part_number = read_cpuid_part_number(); int ret = -ENODEV; pr_info("probing PMU on CPU %d\n", cpu); /* ARM Ltd CPUs. */ - if (0x41 == implementor) { + if (implementor == ARM_CPU_IMP_ARM) { switch (part_number) { - case 0xB360: /* ARM1136 */ - case 0xB560: /* ARM1156 */ - case 0xB760: /* ARM1176 */ + case ARM_CPU_PART_ARM1136: + case ARM_CPU_PART_ARM1156: + case ARM_CPU_PART_ARM1176: ret = armv6pmu_init(pmu); break; - case 0xB020: /* ARM11mpcore */ + case ARM_CPU_PART_ARM11MPCORE: ret = armv6mpcore_pmu_init(pmu); break; - case 0xC080: /* Cortex-A8 */ + case ARM_CPU_PART_CORTEX_A8: ret = armv7_a8_pmu_init(pmu); break; - case 0xC090: /* Cortex-A9 */ + case ARM_CPU_PART_CORTEX_A9: ret = armv7_a9_pmu_init(pmu); break; - case 0xC050: /* Cortex-A5 */ + case ARM_CPU_PART_CORTEX_A5: ret = armv7_a5_pmu_init(pmu); break; - case 0xC0F0: /* Cortex-A15 */ + case ARM_CPU_PART_CORTEX_A15: ret = armv7_a15_pmu_init(pmu); break; - case 0xC070: /* Cortex-A7 */ + case ARM_CPU_PART_CORTEX_A7: ret = armv7_a7_pmu_init(pmu); break; } /* Intel CPUs [xscale]. */ - } else if (0x69 == implementor) { - part_number = (cpuid >> 13) & 0x7; - switch (part_number) { - case 1: + } else if (implementor == ARM_CPU_IMP_INTEL) { + switch (xscale_cpu_arch_version()) { + case ARM_CPU_XSCALE_ARCH_V1: ret = xscale1pmu_init(pmu); break; - case 2: + case ARM_CPU_XSCALE_ARCH_V2: ret = xscale2pmu_init(pmu); break; } From 1764c591dfed2ce7075465df0591ce9564ff37a1 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 14 Jan 2013 17:27:35 +0000 Subject: [PATCH 3/7] ARM: perf: remove redundant NULL check on cpu_pmu cpu_pmu has already been dereferenced before we consider invoking the ->reset function, so remove the redundant NULL check. Reported-by: Cong Ding Signed-off-by: Will Deacon --- arch/arm/kernel/perf_event_cpu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/kernel/perf_event_cpu.c b/arch/arm/kernel/perf_event_cpu.c index efa52954d628..43496f600569 100644 --- a/arch/arm/kernel/perf_event_cpu.c +++ b/arch/arm/kernel/perf_event_cpu.c @@ -147,7 +147,7 @@ static void cpu_pmu_init(struct arm_pmu *cpu_pmu) cpu_pmu->free_irq = cpu_pmu_free_irq; /* Ensure the PMU has sane values out of reset. */ - if (cpu_pmu && cpu_pmu->reset) + if (cpu_pmu->reset) on_each_cpu(cpu_pmu->reset, cpu_pmu, 1); } From 40c390c768f898497e17d934f6715d516ff67294 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 16 Jan 2013 12:01:59 +0000 Subject: [PATCH 4/7] ARM: perf: don't pretend to support counting of L1I writes ARM has a harvard cache architecture and cannot write directly to the I-side. This patch removes the L1I write events from the cache map (which previously returned *read* events in many cases). Reported-by: Mike Williams Signed-off-by: Will Deacon --- arch/arm/kernel/perf_event_v6.c | 4 ++-- arch/arm/kernel/perf_event_v7.c | 18 +++++++++--------- arch/arm/kernel/perf_event_xscale.c | 2 +- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/arch/arm/kernel/perf_event_v6.c b/arch/arm/kernel/perf_event_v6.c index 041d0526a288..03664b0e8fa4 100644 --- a/arch/arm/kernel/perf_event_v6.c +++ b/arch/arm/kernel/perf_event_v6.c @@ -106,7 +106,7 @@ static const unsigned armv6_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] }, [C(OP_WRITE)] = { [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6_PERFCTR_ICACHE_MISS, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, }, [C(OP_PREFETCH)] = { [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, @@ -259,7 +259,7 @@ static const unsigned armv6mpcore_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] }, [C(OP_WRITE)] = { [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ICACHE_MISS, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, }, [C(OP_PREFETCH)] = { [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c index 4fbc757d9cff..8c79a9e70b83 100644 --- a/arch/arm/kernel/perf_event_v7.c +++ b/arch/arm/kernel/perf_event_v7.c @@ -157,8 +157,8 @@ static const unsigned armv7_a8_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_ICACHE_REFILL, }, [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = ARMV7_A8_PERFCTR_L1_ICACHE_ACCESS, - [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_ICACHE_REFILL, + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, }, [C(OP_PREFETCH)] = { [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, @@ -282,7 +282,7 @@ static const unsigned armv7_a9_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] }, [C(OP_WRITE)] = { [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_ICACHE_REFILL, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, }, [C(OP_PREFETCH)] = { [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, @@ -399,8 +399,8 @@ static const unsigned armv7_a5_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_ICACHE_REFILL, }, [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_ICACHE_ACCESS, - [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_ICACHE_REFILL, + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, }, /* * The prefetch counters don't differentiate between the I @@ -527,8 +527,8 @@ static const unsigned armv7_a15_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_ICACHE_REFILL, }, [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_ICACHE_ACCESS, - [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_ICACHE_REFILL, + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, }, [C(OP_PREFETCH)] = { [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, @@ -651,8 +651,8 @@ static const unsigned armv7_a7_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_ICACHE_REFILL, }, [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_ICACHE_ACCESS, - [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_ICACHE_REFILL, + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, }, [C(OP_PREFETCH)] = { [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, diff --git a/arch/arm/kernel/perf_event_xscale.c b/arch/arm/kernel/perf_event_xscale.c index 2b0fe30ec12e..63990c42fac9 100644 --- a/arch/arm/kernel/perf_event_xscale.c +++ b/arch/arm/kernel/perf_event_xscale.c @@ -83,7 +83,7 @@ static const unsigned xscale_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] }, [C(OP_WRITE)] = { [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = XSCALE_PERFCTR_ICACHE_MISS, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, }, [C(OP_PREFETCH)] = { [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, From 76b8a0e4c8bda5f03574b8a904331266d162c796 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 18 Jan 2013 13:42:58 +0000 Subject: [PATCH 5/7] ARM: perf: handle armpmu_register failing Currently perf_pmu_register may fail for several reasons (e.g. being unable to allocate memory for the struct device it associates with each PMU), and while any error is propagated by armpmu_register, it is ignored by cpu_pmu_device_probe and not propagated to the caller. This also results in a leak of a struct arm_pmu. This patch adds cleanup if armpmu_register fails, and updates the info messages to better differentiate this type of failure from a failure to probe the PMU type from the hardware or dt. Signed-off-by: Mark Rutland Signed-off-by: Will Deacon --- arch/arm/kernel/perf_event_cpu.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/arch/arm/kernel/perf_event_cpu.c b/arch/arm/kernel/perf_event_cpu.c index 43496f600569..1f2740e3dbc0 100644 --- a/arch/arm/kernel/perf_event_cpu.c +++ b/arch/arm/kernel/perf_event_cpu.c @@ -277,17 +277,22 @@ static int cpu_pmu_device_probe(struct platform_device *pdev) } if (ret) { - pr_info("failed to register PMU devices!"); - kfree(pmu); - return ret; + pr_info("failed to probe PMU!"); + goto out_free; } cpu_pmu = pmu; cpu_pmu->plat_device = pdev; cpu_pmu_init(cpu_pmu); - armpmu_register(cpu_pmu, PERF_TYPE_RAW); + ret = armpmu_register(cpu_pmu, PERF_TYPE_RAW); - return 0; + if (!ret) + return 0; + +out_free: + pr_info("failed to register PMU devices!"); + kfree(pmu); + return ret; } static struct platform_driver cpu_pmu_driver = { From 8f3b90b585d3e879b03ce2a202da04d59dd5b699 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 18 Jan 2013 13:42:59 +0000 Subject: [PATCH 6/7] ARM: perf: remove unnecessary checks for idx < 0 We currently check for hwx->idx < 0 in armpmu_read and armpmu_del unnecessarily. The only case where hwc->idx < 0 is when armpmu_add fails, in which case the event's state is set to PERF_EVENT_STATE_INACTIVE. The perf core will not attempt to read from an event in PERF_EVENT_STATE_INACTIVE, and so the check in armpmu_read is unnecessary. Similarly, if perf core cannot add an event it will not attempt to delete it, so the WARN_ON in armpmu_del is unnecessary. This patch removes these two redundant checks. Signed-off-by: Mark Rutland Signed-off-by: Will Deacon --- arch/arm/kernel/perf_event.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index f9e8657dd241..6df1969811c8 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -149,12 +149,6 @@ again: static void armpmu_read(struct perf_event *event) { - struct hw_perf_event *hwc = &event->hw; - - /* Don't read disabled counters! */ - if (hwc->idx < 0) - return; - armpmu_event_update(event); } @@ -207,8 +201,6 @@ armpmu_del(struct perf_event *event, int flags) struct hw_perf_event *hwc = &event->hw; int idx = hwc->idx; - WARN_ON(idx < 0); - armpmu_stop(event, PERF_EF_UPDATE); hw_events->events[idx] = NULL; clear_bit(idx, hw_events->used_mask); From 9dcbf466559f6f2f55d60eb5a1bbebc8e694b52a Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 18 Jan 2013 16:10:06 +0000 Subject: [PATCH 7/7] ARM: perf: simplify __hw_perf_event_init err handling Currently __hw_perf_event_init has an err variable that's ignored right until the end, where it's initialised, conditionally set, and then used as a boolean flag deciding whether to return another error code. This patch removes the err variable and simplifies the associated error handling logic. Signed-off-by: Mark Rutland Signed-off-by: Will Deacon --- arch/arm/kernel/perf_event.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index 6df1969811c8..31e0eb353cd8 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -350,7 +350,7 @@ __hw_perf_event_init(struct perf_event *event) { struct arm_pmu *armpmu = to_arm_pmu(event->pmu); struct hw_perf_event *hwc = &event->hw; - int mapping, err; + int mapping; mapping = armpmu->map_event(event); @@ -399,14 +399,12 @@ __hw_perf_event_init(struct perf_event *event) local64_set(&hwc->period_left, hwc->sample_period); } - err = 0; if (event->group_leader != event) { - err = validate_group(event); - if (err) + if (validate_group(event) != 0); return -EINVAL; } - return err; + return 0; } static int armpmu_event_init(struct perf_event *event)