From 42e446dbad95da6b4e5510309401bb0f98448525 Mon Sep 17 00:00:00 2001 From: Alexandru Elisei Date: Thu, 24 Sep 2020 12:07:00 +0100 Subject: [PATCH 1/7] arm64: perf: Add missing ISB in armv8pmu_enable_counter() commit 490d7b7c0845eacf5593db333fd2ae7715416e16 upstream. Writes to the PMXEVTYPER_EL0 register are not self-synchronising. In armv8pmu_enable_event(), the PE can reorder configuring the event type after we have enabled the counter and the interrupt. This can lead to an interrupt being asserted because of the previous event type that we were counting using the same counter, not the one that we've just configured. The same rationale applies to writes to the PMINTENSET_EL1 register. The PE can reorder enabling the interrupt at any point in the future after we have enabled the event. Prevent both situations from happening by adding an ISB just before we enable the event counter. Fixes: 030896885ade ("arm64: Performance counters support") Reported-by: Julien Thierry Signed-off-by: Alexandru Elisei Tested-by: Sumit Garg (Developerbox) Cc: Julien Thierry Cc: Will Deacon Cc: Mark Rutland Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Namhyung Kim Cc: Catalin Marinas Link: https://lore.kernel.org/r/20200924110706.254996-2-alexandru.elisei@arm.com Signed-off-by: Will Deacon Signed-off-by: huwentao --- arch/arm64/kernel/perf_event.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 19128d994ee9..a28cf89b0be8 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -515,6 +515,11 @@ static inline void armv8pmu_write_event_type(struct perf_event *event) static inline int armv8pmu_enable_counter(int idx) { u32 counter = ARMV8_IDX_TO_COUNTER(idx); + /* + * Make sure event configuration register writes are visible before we + * enable the counter. + * */ + isb(); write_sysreg(BIT(counter), pmcntenset_el0); return idx; } From 2e813f7d0a1f6d0dbd013c24b43d20c776081290 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 24 Sep 2020 12:07:01 +0100 Subject: [PATCH 2/7] arm64: perf: Avoid PMXEV* indirection commit 0fdf1bb75953a67e63e5055a7709c629ab6d7692 upstream. Currently we access the counter registers and their respective type registers indirectly. This requires us to write to PMSELR, issue an ISB, then access the relevant PMXEV* registers. This is unfortunate, because: * Under virtualization, accessing one register requires two traps to the hypervisor, even though we could access the register directly with a single trap. * We have to issue an ISB which we could otherwise avoid the cost of. * When we use NMIs, the NMI handler will have to save/restore the select register in case the code it preempted was attempting to access a counter or its type register. We can avoid these issues by directly accessing the relevant registers. This patch adds helpers to do so. In armv8pmu_enable_event() we still need the ISB to prevent the PE from reordering the write to PMINTENSET_EL1 register. If the interrupt is enabled before we disable the counter and the new event is configured, we might get an interrupt triggered by the previously programmed event overflowing, but which we wrongly attribute to the event that we are enabling. Execute an ISB after we disable the counter. In the process, remove the comment that refers to the ARMv7 PMU. [Julien T.: Don't inline read/write functions to avoid big code-size increase, remove unused read_pmevtypern function, fix counter index issue.] [Alexandru E.: Removed comment, removed trailing semicolons in macros, added ISB] Signed-off-by: Mark Rutland Signed-off-by: Julien Thierry Signed-off-by: Alexandru Elisei Tested-by: Sumit Garg (Developerbox) Cc: Julien Thierry Cc: Will Deacon Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Namhyung Kim Cc: Catalin Marinas Link: https://lore.kernel.org/r/20200924110706.254996-3-alexandru.elisei@arm.com Signed-off-by: Will Deacon Signed-off-by: huwentao --- arch/arm64/kernel/perf_event.c | 101 ++++++++++++++++++++++++++++----- 1 file changed, 86 insertions(+), 15 deletions(-) diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index a28cf89b0be8..2c755d22ef62 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -372,6 +372,73 @@ static inline bool armv8pmu_event_is_chained(struct perf_event *event) #define ARMV8_IDX_TO_COUNTER(x) \ (((x) - ARMV8_IDX_COUNTER0) & ARMV8_PMU_COUNTER_MASK) +/* + * This code is really good + */ + +#define PMEVN_CASE(n, case_macro) \ + case n: case_macro(n); break + +#define PMEVN_SWITCH(x, case_macro) \ + do { \ + switch (x) { \ + PMEVN_CASE(0, case_macro); \ + PMEVN_CASE(1, case_macro); \ + PMEVN_CASE(2, case_macro); \ + PMEVN_CASE(3, case_macro); \ + PMEVN_CASE(4, case_macro); \ + PMEVN_CASE(5, case_macro); \ + PMEVN_CASE(6, case_macro); \ + PMEVN_CASE(7, case_macro); \ + PMEVN_CASE(8, case_macro); \ + PMEVN_CASE(9, case_macro); \ + PMEVN_CASE(10, case_macro); \ + PMEVN_CASE(11, case_macro); \ + PMEVN_CASE(12, case_macro); \ + PMEVN_CASE(13, case_macro); \ + PMEVN_CASE(14, case_macro); \ + PMEVN_CASE(15, case_macro); \ + PMEVN_CASE(16, case_macro); \ + PMEVN_CASE(17, case_macro); \ + PMEVN_CASE(18, case_macro); \ + PMEVN_CASE(19, case_macro); \ + PMEVN_CASE(20, case_macro); \ + PMEVN_CASE(21, case_macro); \ + PMEVN_CASE(22, case_macro); \ + PMEVN_CASE(23, case_macro); \ + PMEVN_CASE(24, case_macro); \ + PMEVN_CASE(25, case_macro); \ + PMEVN_CASE(26, case_macro); \ + PMEVN_CASE(27, case_macro); \ + PMEVN_CASE(28, case_macro); \ + PMEVN_CASE(29, case_macro); \ + PMEVN_CASE(30, case_macro); \ + default: WARN(1, "Invalid PMEV* index\n"); \ + } \ + } while (0) + +#define RETURN_READ_PMEVCNTRN(n) \ + return read_sysreg(pmevcntr##n##_el0) +static unsigned long read_pmevcntrn(int n) +{ + PMEVN_SWITCH(n, RETURN_READ_PMEVCNTRN); + return 0; +} + +#define WRITE_PMEVCNTRN(n) \ + write_sysreg(val, pmevcntr##n##_el0) +static void write_pmevcntrn(int n, unsigned long val) +{ + PMEVN_SWITCH(n, WRITE_PMEVCNTRN); +} + +#define WRITE_PMEVTYPERN(n) \ + write_sysreg(val, pmevtyper##n##_el0) +static void write_pmevtypern(int n, unsigned long val) +{ + PMEVN_SWITCH(n, WRITE_PMEVTYPERN); +} + static inline u32 armv8pmu_pmcr_read(void) { return read_sysreg(pmcr_el0); @@ -400,17 +467,11 @@ static inline int armv8pmu_counter_has_overflowed(u32 pmnc, int idx) return pmnc & BIT(ARMV8_IDX_TO_COUNTER(idx)); } -static inline void armv8pmu_select_counter(int idx) -{ - u32 counter = ARMV8_IDX_TO_COUNTER(idx); - write_sysreg(counter, pmselr_el0); - isb(); -} - static inline u32 armv8pmu_read_evcntr(int idx) { - armv8pmu_select_counter(idx); - return read_sysreg(pmxevcntr_el0); + u32 counter = ARMV8_IDX_TO_COUNTER(idx); + + return read_pmevcntrn(counter); } static inline u64 armv8pmu_read_hw_counter(struct perf_event *event) @@ -444,8 +505,9 @@ static u64 armv8pmu_read_counter(struct perf_event *event) static inline void armv8pmu_write_evcntr(int idx, u32 value) { - armv8pmu_select_counter(idx); - write_sysreg(value, pmxevcntr_el0); + u32 counter = ARMV8_IDX_TO_COUNTER(idx); + + write_pmevcntrn(counter, value); } static inline void armv8pmu_write_hw_counter(struct perf_event *event, @@ -486,9 +548,10 @@ static void armv8pmu_write_counter(struct perf_event *event, u64 value) static inline void armv8pmu_write_evtype(int idx, u32 val) { - armv8pmu_select_counter(idx); + u32 counter = ARMV8_IDX_TO_COUNTER(idx); + val &= ARMV8_PMU_EVTYPE_MASK; - write_sysreg(val, pmxevtyper_el0); + write_pmevtypern(counter, val); } static inline void armv8pmu_write_event_type(struct perf_event *event) @@ -508,7 +571,10 @@ static inline void armv8pmu_write_event_type(struct perf_event *event) armv8pmu_write_evtype(idx - 1, hwc->config_base); armv8pmu_write_evtype(idx, chain_evt); } else { - armv8pmu_write_evtype(idx, hwc->config_base); + if (idx == ARMV8_IDX_CYCLE_COUNTER) + write_sysreg(hwc->config_base, pmccfiltr_el0); + else + armv8pmu_write_evtype(idx, hwc->config_base); } } @@ -547,6 +613,11 @@ static inline int armv8pmu_disable_counter(int idx) { u32 counter = ARMV8_IDX_TO_COUNTER(idx); write_sysreg(BIT(counter), pmcntenclr_el0); + /* + * Make sure the effects of disabling the counter are visible before we + * start configuring the event. + */ + isb(); return idx; } @@ -631,7 +702,7 @@ static void armv8pmu_enable_event(struct perf_event *event) armv8pmu_disable_event_counter(event); /* - * Set event (if destined for PMNx counters). + * Set event. */ armv8pmu_write_event_type(event); From 00baa292fc73fe5b37cc4346baac51cabcfc66cd Mon Sep 17 00:00:00 2001 From: Julien Thierry Date: Thu, 24 Sep 2020 12:07:02 +0100 Subject: [PATCH 3/7] arm64: perf: Remove PMU locking commit 2a0e2a02e4b719174547d6f04c27410c6fe456f5 upstream. The PMU is disabled and enabled, and the counters are programmed from contexts where interrupts or preemption is disabled. The functions to toggle the PMU and to program the PMU counters access the registers directly and don't access data modified by the interrupt handler. That, and the fact that they're always called from non-preemptible contexts, means that we don't need to disable interrupts or use a spinlock. [Alexandru E.: Explained why locking is not needed, removed WARN_ONs] Signed-off-by: Julien Thierry Signed-off-by: Alexandru Elisei Tested-by: Sumit Garg (Developerbox) Cc: Will Deacon Cc: Mark Rutland Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Namhyung Kim Cc: Catalin Marinas Link: https://lore.kernel.org/r/20200924110706.254996-4-alexandru.elisei@arm.com Signed-off-by: Will Deacon Signed-off-by: huwentao --- arch/arm64/kernel/perf_event.c | 28 ---------------------------- 1 file changed, 28 deletions(-) diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 2c755d22ef62..ae553eebefb9 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -686,15 +686,10 @@ static inline u32 armv8pmu_getreset_flags(void) static void armv8pmu_enable_event(struct perf_event *event) { - unsigned long flags; - struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); - struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events); - /* * Enable counter and interrupt, and set the counter to count * the event that we're interested in. */ - raw_spin_lock_irqsave(&events->pmu_lock, flags); /* * Disable counter @@ -715,21 +710,10 @@ static void armv8pmu_enable_event(struct perf_event *event) * Enable counter */ armv8pmu_enable_event_counter(event); - - raw_spin_unlock_irqrestore(&events->pmu_lock, flags); } static void armv8pmu_disable_event(struct perf_event *event) { - unsigned long flags; - struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); - struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events); - - /* - * Disable counter and interrupt - */ - raw_spin_lock_irqsave(&events->pmu_lock, flags); - /* * Disable counter */ @@ -739,30 +723,18 @@ static void armv8pmu_disable_event(struct perf_event *event) * Disable interrupt for this counter */ armv8pmu_disable_event_irq(event); - - raw_spin_unlock_irqrestore(&events->pmu_lock, flags); } static void armv8pmu_start(struct arm_pmu *cpu_pmu) { - unsigned long flags; - struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events); - - raw_spin_lock_irqsave(&events->pmu_lock, flags); /* Enable all counters */ armv8pmu_pmcr_write(armv8pmu_pmcr_read() | ARMV8_PMU_PMCR_E); - raw_spin_unlock_irqrestore(&events->pmu_lock, flags); } static void armv8pmu_stop(struct arm_pmu *cpu_pmu) { - unsigned long flags; - struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events); - - raw_spin_lock_irqsave(&events->pmu_lock, flags); /* Disable all counters */ armv8pmu_pmcr_write(armv8pmu_pmcr_read() & ~ARMV8_PMU_PMCR_E); - raw_spin_unlock_irqrestore(&events->pmu_lock, flags); } static irqreturn_t armv8pmu_handle_irq(struct arm_pmu *cpu_pmu) From db279eaccd4e2992042ac7a13a04bca189c1d7f7 Mon Sep 17 00:00:00 2001 From: Julien Thierry Date: Thu, 24 Sep 2020 12:07:03 +0100 Subject: [PATCH 4/7] arm64: perf: Defer irq_work to IPI_IRQ_WORK commit 05ab72813340d11205556c0d1bc08e6857a3856c upstream. When handling events, armv8pmu_handle_irq() calls perf_event_overflow(), and subsequently calls irq_work_run() to handle any work queued by perf_event_overflow(). As perf_event_overflow() raises IPI_IRQ_WORK when queuing the work, this isn't strictly necessary and the work could be handled as part of the IPI_IRQ_WORK handler. In the common case the IPI handler will run immediately after the PMU IRQ handler, and where the PE is heavily loaded with interrupts other handlers may run first, widening the window where some counters are disabled. In practice this window is unlikely to be a significant issue, and removing the call to irq_work_run() would make the PMU IRQ handler NMI safe in addition to making it simpler, so let's do that. [Alexandru E.: Reworded commit message] Signed-off-by: Julien Thierry Signed-off-by: Alexandru Elisei Cc: Julien Thierry Cc: Will Deacon Cc: Mark Rutland Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Namhyung Kim Cc: Catalin Marinas Link: https://lore.kernel.org/r/20200924110706.254996-5-alexandru.elisei@arm.com Signed-off-by: Will Deacon Signed-off-by: huwentao --- arch/arm64/kernel/perf_event.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index ae553eebefb9..27811f382734 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -787,20 +787,16 @@ static irqreturn_t armv8pmu_handle_irq(struct arm_pmu *cpu_pmu) if (!armpmu_event_set_period(event)) continue; + /* + * Perf event overflow will queue the processing of the event as + * an irq_work which will be taken care of in the handling of + * IPI_IRQ_WORK. + */ if (perf_event_overflow(event, &data, regs)) cpu_pmu->disable(event); } armv8pmu_start(cpu_pmu); - /* - * Handle the pending perf events. - * - * Note: this call *must* be run with interrupts disabled. For - * platforms that can have the PMU interrupts raised as an NMI, this - * will not work. - */ - irq_work_run(); - return IRQ_HANDLED; } From e2cabc0720cb3cdbc85fa1e96d5b2cf57cc0a757 Mon Sep 17 00:00:00 2001 From: Julien Thierry Date: Thu, 24 Sep 2020 12:07:04 +0100 Subject: [PATCH 5/7] KVM: arm64: pmu: Make overflow handler NMI safe commit 95e92e45a454a10a8114294d0f7aec930fb85891 upstream. kvm_vcpu_kick() is not NMI safe. When the overflow handler is called from NMI context, defer waking the vcpu to an irq_work queue. A vcpu can be freed while it's not running by kvm_destroy_vm(). Prevent running the irq_work for a non-existent vcpu by calling irq_work_sync() on the PMU destroy path. [Alexandru E.: Added irq_work_sync()] Signed-off-by: Julien Thierry Signed-off-by: Alexandru Elisei Tested-by: Sumit Garg (Developerbox) Cc: Julien Thierry Cc: Marc Zyngier Cc: Will Deacon Cc: Mark Rutland Cc: Catalin Marinas Cc: James Morse Cc: Suzuki K Pouloze Cc: kvm@vger.kernel.org Cc: kvmarm@lists.cs.columbia.edu Link: https://lore.kernel.org/r/20200924110706.254996-6-alexandru.elisei@arm.com Signed-off-by: Will Deacon Signed-off-by: huwentao --- include/kvm/arm_pmu.h | 1 + virt/kvm/arm/pmu.c | 26 +++++++++++++++++++++++++- 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h index 6db030439e29..dbf4f08d42e5 100644 --- a/include/kvm/arm_pmu.h +++ b/include/kvm/arm_pmu.h @@ -27,6 +27,7 @@ struct kvm_pmu { bool ready; bool created; bool irq_level; + struct irq_work overflow_work; }; #define kvm_arm_pmu_v3_ready(v) ((v)->arch.pmu.ready) diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c index 4c08fd009768..d522a1000497 100644 --- a/virt/kvm/arm/pmu.c +++ b/virt/kvm/arm/pmu.c @@ -259,6 +259,7 @@ void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu) for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) kvm_pmu_release_perf_event(&pmu->pmc[i]); + irq_work_sync(&vcpu->arch.pmu.overflow_work); } u64 kvm_pmu_valid_counter_mask(struct kvm_vcpu *vcpu) @@ -435,6 +436,22 @@ void kvm_pmu_sync_hwstate(struct kvm_vcpu *vcpu) kvm_pmu_update_state(vcpu); } +/** + * When perf interrupt is an NMI, we cannot safely notify the vcpu corresponding + * to the event. + * This is why we need a callback to do it once outside of the NMI context. + */ +static void kvm_pmu_perf_overflow_notify_vcpu(struct irq_work *work) +{ + struct kvm_vcpu *vcpu; + struct kvm_pmu *pmu; + + pmu = container_of(work, struct kvm_pmu, overflow_work); + vcpu = kvm_pmc_to_vcpu(pmu->pmc); + + kvm_vcpu_kick(vcpu); +} + /** * When the perf event overflows, set the overflow status and inform the vcpu. */ @@ -467,7 +484,11 @@ static void kvm_pmu_perf_overflow(struct perf_event *perf_event, if (kvm_pmu_overflow_status(vcpu)) { kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu); - kvm_vcpu_kick(vcpu); + + if (!in_nmi()) + kvm_vcpu_kick(vcpu); + else + irq_work_queue(&vcpu->arch.pmu.overflow_work); } cpu_pmu->pmu.start(perf_event, PERF_EF_RELOAD); @@ -760,6 +781,9 @@ static int kvm_arm_pmu_v3_init(struct kvm_vcpu *vcpu) return ret; } + init_irq_work(&vcpu->arch.pmu.overflow_work, + kvm_pmu_perf_overflow_notify_vcpu); + vcpu->arch.pmu.created = true; return 0; } From c6c3369a68ff051c34cdd165cbaf6c92279867d5 Mon Sep 17 00:00:00 2001 From: Julien Thierry Date: Thu, 24 Sep 2020 12:07:05 +0100 Subject: [PATCH 6/7] arm_pmu: Introduce pmu_irq_ops commit f76b130bdb8949eac002b8e0ddb85576ed137838 upstream. Currently the PMU interrupt can either be a normal irq or a percpu irq. Supporting NMI will introduce two cases for each existing one. It becomes a mess of 'if's when managing the interrupt. Define sets of callbacks for operations commonly done on the interrupt. The appropriate set of callbacks is selected at interrupt request time and simplifies interrupt enabling/disabling and freeing. Signed-off-by: Julien Thierry Signed-off-by: Alexandru Elisei Tested-by: Sumit Garg (Developerbox) Cc: Julien Thierry Cc: Will Deacon Cc: Mark Rutland Link: https://lore.kernel.org/r/20200924110706.254996-7-alexandru.elisei@arm.com Signed-off-by: Will Deacon Signed-off-by: huwentao --- drivers/perf/arm_pmu.c | 90 ++++++++++++++++++++++++++++++++++-------- 1 file changed, 74 insertions(+), 16 deletions(-) diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c index b377872a8f9d..b72d7d3124a6 100644 --- a/drivers/perf/arm_pmu.c +++ b/drivers/perf/arm_pmu.c @@ -26,8 +26,46 @@ #include +static int armpmu_count_irq_users(const int irq); + +struct pmu_irq_ops { + void (*enable_pmuirq)(unsigned int irq); + void (*disable_pmuirq)(unsigned int irq); + void (*free_pmuirq)(unsigned int irq, int cpu, void __percpu *devid); +}; + +static void armpmu_free_pmuirq(unsigned int irq, int cpu, void __percpu *devid) +{ + free_irq(irq, per_cpu_ptr(devid, cpu)); +} + +static const struct pmu_irq_ops pmuirq_ops = { + .enable_pmuirq = enable_irq, + .disable_pmuirq = disable_irq_nosync, + .free_pmuirq = armpmu_free_pmuirq +}; + +static void armpmu_enable_percpu_pmuirq(unsigned int irq) +{ + enable_percpu_irq(irq, IRQ_TYPE_NONE); +} + +static void armpmu_free_percpu_pmuirq(unsigned int irq, int cpu, + void __percpu *devid) +{ + if (armpmu_count_irq_users(irq) == 1) + free_percpu_irq(irq, devid); +} + +static const struct pmu_irq_ops percpu_pmuirq_ops = { + .enable_pmuirq = armpmu_enable_percpu_pmuirq, + .disable_pmuirq = disable_percpu_irq, + .free_pmuirq = armpmu_free_percpu_pmuirq +}; + static DEFINE_PER_CPU(struct arm_pmu *, cpu_armpmu); static DEFINE_PER_CPU(int, cpu_irq); +static DEFINE_PER_CPU(const struct pmu_irq_ops *, cpu_irq_ops); static inline u64 arm_pmu_event_max_period(struct perf_event *event) { @@ -542,6 +580,23 @@ static int armpmu_count_irq_users(const int irq) return count; } +static const struct pmu_irq_ops *armpmu_find_irq_ops(int irq) +{ + const struct pmu_irq_ops *ops = NULL; + int cpu; + + for_each_possible_cpu(cpu) { + if (per_cpu(cpu_irq, cpu) != irq) + continue; + + ops = per_cpu(cpu_irq_ops, cpu); + if (ops) + break; + } + + return ops; +} + void armpmu_free_irq(int irq, int cpu) { if (per_cpu(cpu_irq, cpu) == 0) @@ -549,18 +604,18 @@ void armpmu_free_irq(int irq, int cpu) if (WARN_ON(irq != per_cpu(cpu_irq, cpu))) return; - if (!irq_is_percpu_devid(irq)) - free_irq(irq, per_cpu_ptr(&cpu_armpmu, cpu)); - else if (armpmu_count_irq_users(irq) == 1) - free_percpu_irq(irq, &cpu_armpmu); + per_cpu(cpu_irq_ops, cpu)->free_pmuirq(irq, cpu, &cpu_armpmu); per_cpu(cpu_irq, cpu) = 0; + per_cpu(cpu_irq_ops, cpu) = NULL; } int armpmu_request_irq(int irq, int cpu) { int err = 0; const irq_handler_t handler = armpmu_dispatch_irq; + const struct pmu_irq_ops *irq_ops; + if (!irq) return 0; @@ -582,15 +637,26 @@ int armpmu_request_irq(int irq, int cpu) irq_set_status_flags(irq, IRQ_NOAUTOEN); err = request_irq(irq, handler, irq_flags, "arm-pmu", per_cpu_ptr(&cpu_armpmu, cpu)); + + irq_ops = &pmuirq_ops; } else if (armpmu_count_irq_users(irq) == 0) { err = request_percpu_irq(irq, handler, "arm-pmu", &cpu_armpmu); + + irq_ops = &percpu_pmuirq_ops; + } else { + /* Per cpudevid irq was already requested by another CPU */ + irq_ops = armpmu_find_irq_ops(irq); + + if (WARN_ON(!irq_ops)) + err = -EINVAL; } if (err) goto err_out; per_cpu(cpu_irq, cpu) = irq; + per_cpu(cpu_irq_ops, cpu) = irq_ops; return 0; err_out: @@ -623,12 +689,8 @@ static int arm_perf_starting_cpu(unsigned int cpu, struct hlist_node *node) per_cpu(cpu_armpmu, cpu) = pmu; irq = armpmu_get_cpu_irq(pmu, cpu); - if (irq) { - if (irq_is_percpu_devid(irq)) - enable_percpu_irq(irq, IRQ_TYPE_NONE); - else - enable_irq(irq); - } + if (irq) + per_cpu(cpu_irq_ops, cpu)->enable_pmuirq(irq); return 0; } @@ -642,12 +704,8 @@ static int arm_perf_teardown_cpu(unsigned int cpu, struct hlist_node *node) return 0; irq = armpmu_get_cpu_irq(pmu, cpu); - if (irq) { - if (irq_is_percpu_devid(irq)) - disable_percpu_irq(irq); - else - disable_irq_nosync(irq); - } + if (irq) + per_cpu(cpu_irq_ops, cpu)->disable_pmuirq(irq); per_cpu(cpu_armpmu, cpu) = NULL; From 0bdcc78fd5ce7787b7923dfdbaae58cd49c90455 Mon Sep 17 00:00:00 2001 From: Julien Thierry Date: Thu, 24 Sep 2020 12:07:06 +0100 Subject: [PATCH 7/7] arm_pmu: arm64: Use NMIs for PMU commit d8f6267f7ce5dc7b8920910e7e75216f77e06d21 upstream. Add required PMU interrupt operations for NMIs. Request interrupt lines as NMIs when possible, otherwise fall back to normal interrupts. NMIs are only supported on the arm64 architecture with a GICv3 irqchip. [Alexandru E.: Added that NMIs only work on arm64 + GICv3, print message when PMU is using NMIs] Signed-off-by: Julien Thierry Signed-off-by: Alexandru Elisei Tested-by: Sumit Garg (Developerbox) Cc: Julien Thierry Cc: Will Deacon Cc: Mark Rutland Link: https://lore.kernel.org/r/20200924110706.254996-8-alexandru.elisei@arm.com Signed-off-by: Will Deacon Signed-off-by: huwentao --- drivers/perf/arm_pmu.c | 69 +++++++++++++++++++++++++++++++++++++----- 1 file changed, 62 insertions(+), 7 deletions(-) diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c index b72d7d3124a6..7fd11ef5cb8a 100644 --- a/drivers/perf/arm_pmu.c +++ b/drivers/perf/arm_pmu.c @@ -45,6 +45,17 @@ static const struct pmu_irq_ops pmuirq_ops = { .free_pmuirq = armpmu_free_pmuirq }; +static void armpmu_free_pmunmi(unsigned int irq, int cpu, void __percpu *devid) +{ + free_nmi(irq, per_cpu_ptr(devid, cpu)); +} + +static const struct pmu_irq_ops pmunmi_ops = { + .enable_pmuirq = enable_nmi, + .disable_pmuirq = disable_nmi_nosync, + .free_pmuirq = armpmu_free_pmunmi +}; + static void armpmu_enable_percpu_pmuirq(unsigned int irq) { enable_percpu_irq(irq, IRQ_TYPE_NONE); @@ -63,10 +74,37 @@ static const struct pmu_irq_ops percpu_pmuirq_ops = { .free_pmuirq = armpmu_free_percpu_pmuirq }; +static void armpmu_enable_percpu_pmunmi(unsigned int irq) +{ + if (!prepare_percpu_nmi(irq)) + enable_percpu_nmi(irq, IRQ_TYPE_NONE); +} + +static void armpmu_disable_percpu_pmunmi(unsigned int irq) +{ + disable_percpu_nmi(irq); + teardown_percpu_nmi(irq); +} + +static void armpmu_free_percpu_pmunmi(unsigned int irq, int cpu, + void __percpu *devid) +{ + if (armpmu_count_irq_users(irq) == 1) + free_percpu_nmi(irq, devid); +} + +static const struct pmu_irq_ops percpu_pmunmi_ops = { + .enable_pmuirq = armpmu_enable_percpu_pmunmi, + .disable_pmuirq = armpmu_disable_percpu_pmunmi, + .free_pmuirq = armpmu_free_percpu_pmunmi +}; + static DEFINE_PER_CPU(struct arm_pmu *, cpu_armpmu); static DEFINE_PER_CPU(int, cpu_irq); static DEFINE_PER_CPU(const struct pmu_irq_ops *, cpu_irq_ops); +static bool has_nmi; + static inline u64 arm_pmu_event_max_period(struct perf_event *event) { if (event->hw.flags & ARMPMU_EVT_64BIT) @@ -635,15 +673,31 @@ int armpmu_request_irq(int irq, int cpu) IRQF_NO_THREAD; irq_set_status_flags(irq, IRQ_NOAUTOEN); - err = request_irq(irq, handler, irq_flags, "arm-pmu", + + err = request_nmi(irq, handler, irq_flags, "arm-pmu", per_cpu_ptr(&cpu_armpmu, cpu)); - irq_ops = &pmuirq_ops; + /* If cannot get an NMI, get a normal interrupt */ + if (err) { + err = request_irq(irq, handler, irq_flags, "arm-pmu", + per_cpu_ptr(&cpu_armpmu, cpu)); + irq_ops = &pmuirq_ops; + } else { + has_nmi = true; + irq_ops = &pmunmi_ops; + } } else if (armpmu_count_irq_users(irq) == 0) { - err = request_percpu_irq(irq, handler, "arm-pmu", - &cpu_armpmu); + err = request_percpu_nmi(irq, handler, "arm-pmu", &cpu_armpmu); - irq_ops = &percpu_pmuirq_ops; + /* If cannot get an NMI, get a normal interrupt */ + if (err) { + err = request_percpu_irq(irq, handler, "arm-pmu", + &cpu_armpmu); + irq_ops = &percpu_pmuirq_ops; + } else { + has_nmi= true; + irq_ops = &percpu_pmunmi_ops; + } } else { /* Per cpudevid irq was already requested by another CPU */ irq_ops = armpmu_find_irq_ops(irq); @@ -926,8 +980,9 @@ int armpmu_register(struct arm_pmu *pmu) if (!__oprofile_cpu_pmu) __oprofile_cpu_pmu = pmu; - pr_info("enabled with %s PMU driver, %d counters available\n", - pmu->name, pmu->num_events); + pr_info("enabled with %s PMU driver, %d counters available%s\n", + pmu->name, pmu->num_events, + has_nmi ? ", using NMIs" : ""); return 0;