From cbe16f35bee6880becca6f20d2ebf6b457148552 Mon Sep 17 00:00:00 2001 From: Barry Song Date: Wed, 3 Mar 2021 11:49:15 +1300 Subject: [PATCH 01/53] genirq: Add IRQF_NO_AUTOEN for request_irq/nmi() Many drivers don't want interrupts enabled automatically via request_irq(). So they are handling this issue by either way of the below two: (1) irq_set_status_flags(irq, IRQ_NOAUTOEN); request_irq(dev, irq...); (2) request_irq(dev, irq...); disable_irq(irq); The code in the second way is silly and unsafe. In the small time gap between request_irq() and disable_irq(), interrupts can still come. The code in the first way is safe though it's subobtimal. Add a new IRQF_NO_AUTOEN flag which can be handed in by drivers to request_irq() and request_nmi(). It prevents the automatic enabling of the requested interrupt/nmi in the same safe way as #1 above. With that the various usage sites of #1 and #2 above can be simplified and corrected. Signed-off-by: Barry Song Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Cc: dmitry.torokhov@gmail.com Link: https://lore.kernel.org/r/20210302224916.13980-2-song.bao.hua@hisilicon.com --- include/linux/interrupt.h | 4 ++++ kernel/irq/manage.c | 11 +++++++++-- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 967e25767153..76f1161a441a 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -61,6 +61,9 @@ * interrupt handler after suspending interrupts. For system * wakeup devices users need to implement wakeup detection in * their interrupt handlers. + * IRQF_NO_AUTOEN - Don't enable IRQ or NMI automatically when users request it. + * Users will enable it explicitly by enable_irq() or enable_nmi() + * later. */ #define IRQF_SHARED 0x00000080 #define IRQF_PROBE_SHARED 0x00000100 @@ -74,6 +77,7 @@ #define IRQF_NO_THREAD 0x00010000 #define IRQF_EARLY_RESUME 0x00020000 #define IRQF_COND_SUSPEND 0x00040000 +#define IRQF_NO_AUTOEN 0x00080000 #define IRQF_TIMER (__IRQF_TIMER | IRQF_NO_SUSPEND | IRQF_NO_THREAD) diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index dec3f73e8db9..97c231a5644c 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -1693,7 +1693,8 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) irqd_set(&desc->irq_data, IRQD_NO_BALANCING); } - if (irq_settings_can_autoenable(desc)) { + if (!(new->flags & IRQF_NO_AUTOEN) && + irq_settings_can_autoenable(desc)) { irq_startup(desc, IRQ_RESEND, IRQ_START_COND); } else { /* @@ -2086,10 +2087,15 @@ int request_threaded_irq(unsigned int irq, irq_handler_t handler, * which interrupt is which (messes up the interrupt freeing * logic etc). * + * Also shared interrupts do not go well with disabling auto enable. + * The sharing interrupt might request it while it's still disabled + * and then wait for interrupts forever. + * * Also IRQF_COND_SUSPEND only makes sense for shared interrupts and * it cannot be set along with IRQF_NO_SUSPEND. */ if (((irqflags & IRQF_SHARED) && !dev_id) || + ((irqflags & IRQF_SHARED) && (irqflags & IRQF_NO_AUTOEN)) || (!(irqflags & IRQF_SHARED) && (irqflags & IRQF_COND_SUSPEND)) || ((irqflags & IRQF_NO_SUSPEND) && (irqflags & IRQF_COND_SUSPEND))) return -EINVAL; @@ -2245,7 +2251,8 @@ int request_nmi(unsigned int irq, irq_handler_t handler, desc = irq_to_desc(irq); - if (!desc || irq_settings_can_autoenable(desc) || + if (!desc || (irq_settings_can_autoenable(desc) && + !(irqflags & IRQF_NO_AUTOEN)) || !irq_settings_can_request(desc) || WARN_ON(irq_settings_is_per_cpu_devid(desc)) || !irq_supports_nmi(desc)) From 3a0ade0c521a542f8a25e96ce8ea0dfaa532ac75 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Sat, 6 Mar 2021 13:36:58 -0800 Subject: [PATCH 02/53] tasklet: Remove tasklet_kill_immediate Ever since RCU was converted to softirq, it has no users. Signed-off-by: Davidlohr Bueso Signed-off-by: Thomas Gleixner Acked-by: Paul E. McKenney Link: https://lore.kernel.org/r/20210306213658.12862-1-dave@stgolabs.net --- include/linux/interrupt.h | 1 - kernel/softirq.c | 32 -------------------------------- 2 files changed, 33 deletions(-) diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 76f1161a441a..2b98156ec707 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -716,7 +716,6 @@ static inline void tasklet_enable(struct tasklet_struct *t) } extern void tasklet_kill(struct tasklet_struct *t); -extern void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu); extern void tasklet_init(struct tasklet_struct *t, void (*func)(unsigned long), unsigned long data); extern void tasklet_setup(struct tasklet_struct *t, diff --git a/kernel/softirq.c b/kernel/softirq.c index 9908ec4a9bfe..8b44ab9a2f69 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -658,38 +658,6 @@ static void run_ksoftirqd(unsigned int cpu) } #ifdef CONFIG_HOTPLUG_CPU -/* - * tasklet_kill_immediate is called to remove a tasklet which can already be - * scheduled for execution on @cpu. - * - * Unlike tasklet_kill, this function removes the tasklet - * _immediately_, even if the tasklet is in TASKLET_STATE_SCHED state. - * - * When this function is called, @cpu must be in the CPU_DEAD state. - */ -void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu) -{ - struct tasklet_struct **i; - - BUG_ON(cpu_online(cpu)); - BUG_ON(test_bit(TASKLET_STATE_RUN, &t->state)); - - if (!test_bit(TASKLET_STATE_SCHED, &t->state)) - return; - - /* CPU is dead, so no lock needed. */ - for (i = &per_cpu(tasklet_vec, cpu).head; *i; i = &(*i)->next) { - if (*i == t) { - *i = t->next; - /* If this was the tail element, move the tail ptr */ - if (*i == NULL) - per_cpu(tasklet_vec, cpu).tail = i; - return; - } - } - BUG(); -} - static int takeover_tasklets(unsigned int cpu) { /* CPU is dead, so no lock needed. */ From 5c982c58752118b6c1f295024d3fda5ff22d3c52 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 16 Mar 2021 11:02:05 +0100 Subject: [PATCH 03/53] genirq: Fix typos and misspellings in comments No functional change. Signed-off-by: Krzysztof Kozlowski Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20210316100205.23492-1-krzysztof.kozlowski@canonical.com --- kernel/irq/chip.c | 6 +++--- kernel/irq/ipi.c | 2 +- kernel/irq/manage.c | 6 +++--- kernel/irq/matrix.c | 2 +- kernel/irq/migration.c | 2 +- kernel/irq/resend.c | 2 +- kernel/irq/timings.c | 6 +++--- 7 files changed, 13 insertions(+), 13 deletions(-) diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index 6d89e33fe3aa..042399cb5caf 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -761,7 +761,7 @@ EXPORT_SYMBOL_GPL(handle_fasteoi_nmi); * handle_edge_irq - edge type IRQ handler * @desc: the interrupt description structure for this irq * - * Interrupt occures on the falling and/or rising edge of a hardware + * Interrupt occurs on the falling and/or rising edge of a hardware * signal. The occurrence is latched into the irq controller hardware * and must be acked in order to be reenabled. After the ack another * interrupt can happen on the same source even before the first one @@ -1419,7 +1419,7 @@ EXPORT_SYMBOL_GPL(irq_chip_eoi_parent); * @dest: The affinity mask to set * @force: Flag to enforce setting (disable online checks) * - * Conditinal, as the underlying parent chip might not implement it. + * Conditional, as the underlying parent chip might not implement it. */ int irq_chip_set_affinity_parent(struct irq_data *data, const struct cpumask *dest, bool force) @@ -1531,7 +1531,7 @@ EXPORT_SYMBOL_GPL(irq_chip_release_resources_parent); #endif /** - * irq_chip_compose_msi_msg - Componse msi message for a irq chip + * irq_chip_compose_msi_msg - Compose msi message for a irq chip * @data: Pointer to interrupt specific data * @msg: Pointer to the MSI message * diff --git a/kernel/irq/ipi.c b/kernel/irq/ipi.c index 43e3d1be622c..52f11c791bf8 100644 --- a/kernel/irq/ipi.c +++ b/kernel/irq/ipi.c @@ -107,7 +107,7 @@ free_descs: * @irq: linux irq number to be destroyed * @dest: cpumask of cpus which should have the IPI removed * - * The IPIs allocated with irq_reserve_ipi() are retuerned to the system + * The IPIs allocated with irq_reserve_ipi() are returned to the system * destroying all virqs associated with them. * * Return 0 on success or error code on failure. diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 97c231a5644c..07ed2e4ff9a3 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -179,7 +179,7 @@ bool irq_can_set_affinity_usr(unsigned int irq) /** * irq_set_thread_affinity - Notify irq threads to adjust affinity - * @desc: irq descriptor which has affitnity changed + * @desc: irq descriptor which has affinity changed * * We just set IRQTF_AFFINITY and delegate the affinity setting * to the interrupt thread itself. We can not call @@ -1153,7 +1153,7 @@ irq_forced_thread_fn(struct irq_desc *desc, struct irqaction *action) /* * Interrupts explicitly requested as threaded interrupts want to be - * preemtible - many of them need to sleep and wait for slow busses to + * preemptible - many of them need to sleep and wait for slow busses to * complete. */ static irqreturn_t irq_thread_fn(struct irq_desc *desc, @@ -2749,7 +2749,7 @@ int __irq_get_irqchip_state(struct irq_data *data, enum irqchip_irq_state which, * irq_get_irqchip_state - returns the irqchip state of a interrupt. * @irq: Interrupt line that is forwarded to a VM * @which: One of IRQCHIP_STATE_* the caller wants to know about - * @state: a pointer to a boolean where the state is to be storeed + * @state: a pointer to a boolean where the state is to be stored * * This call snapshots the internal irqchip state of an * interrupt, returning into @state the bit corresponding to diff --git a/kernel/irq/matrix.c b/kernel/irq/matrix.c index 651a4ad6d711..7a9465ffe71d 100644 --- a/kernel/irq/matrix.c +++ b/kernel/irq/matrix.c @@ -356,7 +356,7 @@ void irq_matrix_reserve(struct irq_matrix *m) * irq_matrix_remove_reserved - Remove interrupt reservation * @m: Matrix pointer * - * This is merily a book keeping call. It decrements the number of globally + * This is merely a book keeping call. It decrements the number of globally * reserved interrupt bits. This is used to undo irq_matrix_reserve() when the * interrupt was never in use and a real vector allocated, which undid the * reservation. diff --git a/kernel/irq/migration.c b/kernel/irq/migration.c index def48589ea48..61ca924ef4b4 100644 --- a/kernel/irq/migration.c +++ b/kernel/irq/migration.c @@ -7,7 +7,7 @@ /** * irq_fixup_move_pending - Cleanup irq move pending from a dying CPU - * @desc: Interrupt descpriptor to clean up + * @desc: Interrupt descriptor to clean up * @force_clear: If set clear the move pending bit unconditionally. * If not set, clear it only when the dying CPU is the * last one in the pending mask. diff --git a/kernel/irq/resend.c b/kernel/irq/resend.c index bd1d85c610aa..0c46e9fe3a89 100644 --- a/kernel/irq/resend.c +++ b/kernel/irq/resend.c @@ -128,7 +128,7 @@ int check_irq_resend(struct irq_desc *desc, bool inject) if (!try_retrigger(desc)) err = irq_sw_resend(desc); - /* If the retrigger was successfull, mark it with the REPLAY bit */ + /* If the retrigger was successful, mark it with the REPLAY bit */ if (!err) desc->istate |= IRQS_REPLAY; return err; diff --git a/kernel/irq/timings.c b/kernel/irq/timings.c index 773b6105c4ae..c31860569931 100644 --- a/kernel/irq/timings.c +++ b/kernel/irq/timings.c @@ -84,7 +84,7 @@ void irq_timings_disable(void) * 2. Log interval * * We saw the irq timings allow to compute the interval of the - * occurrences for a specific interrupt. We can reasonibly assume the + * occurrences for a specific interrupt. We can reasonably assume the * longer is the interval, the higher is the error for the next event * and we can consider storing those interval values into an array * where each slot in the array correspond to an interval at the power @@ -416,7 +416,7 @@ static u64 __irq_timings_next_event(struct irqt_stat *irqs, int irq, u64 now) * Copy the content of the circular buffer into another buffer * in order to linearize the buffer instead of dealing with * wrapping indexes and shifted array which will be prone to - * error and extremelly difficult to debug. + * error and extremely difficult to debug. */ for (i = 0; i < count; i++) { int index = (start + i) & IRQ_TIMINGS_MASK; @@ -514,7 +514,7 @@ static inline void irq_timings_store(int irq, struct irqt_stat *irqs, u64 ts) * If more than the array size interrupts happened during the * last busy/idle cycle, the index wrapped up and we have to * begin with the next element in the array which is the last one - * in the sequence, otherwise it is a the index 0. + * in the sequence, otherwise it is at the index 0. * * - have an indication of the interrupts activity on this CPU * (eg. irq/sec) From 6b2c339df90788ce6aeecee78d6494f262929206 Mon Sep 17 00:00:00 2001 From: Dirk Behme Date: Wed, 17 Mar 2021 11:20:12 +0100 Subject: [PATCH 04/53] softirq: s/BUG/WARN_ONCE/ on tasklet SCHED state not set Replace BUG() with WARN_ONCE() on wrong tasklet state, in order to: - increase the verbosity / aid in debugging - avoid fatal/unrecoverable state Suggested-by: Thomas Gleixner Signed-off-by: Dirk Behme Signed-off-by: Eugeniu Rosca Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20210317102012.32399-1-erosca@de.adit-jv.com --- kernel/softirq.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/kernel/softirq.c b/kernel/softirq.c index 8b44ab9a2f69..8d56bbf852ea 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -531,6 +531,18 @@ void __tasklet_hi_schedule(struct tasklet_struct *t) } EXPORT_SYMBOL(__tasklet_hi_schedule); +static bool tasklet_should_run(struct tasklet_struct *t) +{ + if (test_and_clear_bit(TASKLET_STATE_SCHED, &t->state)) + return true; + + WARN_ONCE(1, "tasklet SCHED state not set: %s %pS\n", + t->use_callback ? "callback" : "func", + t->use_callback ? (void *)t->callback : (void *)t->func); + + return false; +} + static void tasklet_action_common(struct softirq_action *a, struct tasklet_head *tl_head, unsigned int softirq_nr) @@ -550,13 +562,12 @@ static void tasklet_action_common(struct softirq_action *a, if (tasklet_trylock(t)) { if (!atomic_read(&t->count)) { - if (!test_and_clear_bit(TASKLET_STATE_SCHED, - &t->state)) - BUG(); - if (t->use_callback) - t->callback(t); - else - t->func(t->data); + if (tasklet_should_run(t)) { + if (t->use_callback) + t->callback(t); + else + t->func(t->data); + } tasklet_unlock(t); continue; } From d2da74d1278a1b51ef18beafa9da770f0db1c617 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 9 Mar 2021 09:42:04 +0100 Subject: [PATCH 05/53] tasklets: Replace barrier() with cpu_relax() in tasklet_unlock_wait() A barrier() in a tight loop which waits for something to happen on a remote CPU is a pointless exercise. Replace it with cpu_relax() which allows HT siblings to make progress. Signed-off-by: Thomas Gleixner Tested-by: Sebastian Andrzej Siewior Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20210309084241.249343366@linutronix.de --- include/linux/interrupt.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 2b98156ec707..d689fd738152 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -672,7 +672,8 @@ static inline void tasklet_unlock(struct tasklet_struct *t) static inline void tasklet_unlock_wait(struct tasklet_struct *t) { - while (test_bit(TASKLET_STATE_RUN, &(t)->state)) { barrier(); } + while (test_bit(TASKLET_STATE_RUN, &t->state)) + cpu_relax(); } #else #define tasklet_trylock(t) 1 From 6951547a1399c8f56468ed93bea8f769b891aec3 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 9 Mar 2021 09:42:05 +0100 Subject: [PATCH 06/53] tasklets: Use static inlines for stub implementations Inlines exist for a reason. Signed-off-by: Thomas Gleixner Tested-by: Sebastian Andrzej Siewior Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20210309084241.407702697@linutronix.de --- include/linux/interrupt.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index d689fd738152..0a4ce25c1464 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -676,9 +676,9 @@ static inline void tasklet_unlock_wait(struct tasklet_struct *t) cpu_relax(); } #else -#define tasklet_trylock(t) 1 -#define tasklet_unlock_wait(t) do { } while (0) -#define tasklet_unlock(t) do { } while (0) +static inline int tasklet_trylock(struct tasklet_struct *t) { return 1; } +static inline void tasklet_unlock(struct tasklet_struct *t) { } +static inline void tasklet_unlock_wait(struct tasklet_struct *t) { } #endif extern void __tasklet_schedule(struct tasklet_struct *t); From ca5f625118955fc544c3cb3dee7055d33ecadafb Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 9 Mar 2021 09:42:06 +0100 Subject: [PATCH 07/53] tasklets: Provide tasklet_disable_in_atomic() Replacing the spin wait loops in tasklet_unlock_wait() with wait_var_event() is not possible as a handful of tasklet_disable() invocations are happening in atomic context. All other invocations are in teardown paths which can sleep. Provide tasklet_disable_in_atomic() and tasklet_unlock_spin_wait() to convert the few atomic use cases over, which allows to change tasklet_disable() and tasklet_unlock_wait() in a later step. Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20210309084241.563164193@linutronix.de --- include/linux/interrupt.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 0a4ce25c1464..3c8a29176258 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -675,10 +675,21 @@ static inline void tasklet_unlock_wait(struct tasklet_struct *t) while (test_bit(TASKLET_STATE_RUN, &t->state)) cpu_relax(); } + +/* + * Do not use in new code. Waiting for tasklets from atomic contexts is + * error prone and should be avoided. + */ +static inline void tasklet_unlock_spin_wait(struct tasklet_struct *t) +{ + while (test_bit(TASKLET_STATE_RUN, &t->state)) + cpu_relax(); +} #else static inline int tasklet_trylock(struct tasklet_struct *t) { return 1; } static inline void tasklet_unlock(struct tasklet_struct *t) { } static inline void tasklet_unlock_wait(struct tasklet_struct *t) { } +static inline void tasklet_unlock_spin_wait(struct tasklet_struct *t) { } #endif extern void __tasklet_schedule(struct tasklet_struct *t); @@ -703,6 +714,17 @@ static inline void tasklet_disable_nosync(struct tasklet_struct *t) smp_mb__after_atomic(); } +/* + * Do not use in new code. Disabling tasklets from atomic contexts is + * error prone and should be avoided. + */ +static inline void tasklet_disable_in_atomic(struct tasklet_struct *t) +{ + tasklet_disable_nosync(t); + tasklet_unlock_spin_wait(t); + smp_mb(); +} + static inline void tasklet_disable(struct tasklet_struct *t) { tasklet_disable_nosync(t); From b0cd02c2a9494dbf0a1cc7dc7a3b8b400c158d37 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 9 Mar 2021 09:42:07 +0100 Subject: [PATCH 08/53] tasklets: Use spin wait in tasklet_disable() temporarily To ease the transition use spin waiting in tasklet_disable() until all usage sites from atomic context have been cleaned up. Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20210309084241.685352806@linutronix.de --- include/linux/interrupt.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 3c8a29176258..b7f00121f124 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -728,7 +728,8 @@ static inline void tasklet_disable_in_atomic(struct tasklet_struct *t) static inline void tasklet_disable(struct tasklet_struct *t) { tasklet_disable_nosync(t); - tasklet_unlock_wait(t); + /* Spin wait until all atomic users are converted */ + tasklet_unlock_spin_wait(t); smp_mb(); } From da044747401fc16202e223c9da970ed4e84fd84d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 9 Mar 2021 09:42:08 +0100 Subject: [PATCH 09/53] tasklets: Replace spin wait in tasklet_unlock_wait() tasklet_unlock_wait() spin waits for TASKLET_STATE_RUN to be cleared. This is wasting CPU cycles in a tight loop which is especially painful in a guest when the CPU running the tasklet is scheduled out. tasklet_unlock_wait() is invoked from tasklet_kill() which is used in teardown paths and not performance critical at all. Replace the spin wait with wait_var_event(). There are no users of tasklet_unlock_wait() which are invoked from atomic contexts. The usage in tasklet_disable() has been replaced temporarily with the spin waiting variant until the atomic users are fixed up and will be converted to the sleep wait variant later. Signed-off-by: Peter Zijlstra Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20210309084241.783936921@linutronix.de --- include/linux/interrupt.h | 13 ++----------- kernel/softirq.c | 18 ++++++++++++++++++ 2 files changed, 20 insertions(+), 11 deletions(-) diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index b7f00121f124..b50be4fbbc98 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -664,17 +664,8 @@ static inline int tasklet_trylock(struct tasklet_struct *t) return !test_and_set_bit(TASKLET_STATE_RUN, &(t)->state); } -static inline void tasklet_unlock(struct tasklet_struct *t) -{ - smp_mb__before_atomic(); - clear_bit(TASKLET_STATE_RUN, &(t)->state); -} - -static inline void tasklet_unlock_wait(struct tasklet_struct *t) -{ - while (test_bit(TASKLET_STATE_RUN, &t->state)) - cpu_relax(); -} +void tasklet_unlock(struct tasklet_struct *t); +void tasklet_unlock_wait(struct tasklet_struct *t); /* * Do not use in new code. Waiting for tasklets from atomic contexts is diff --git a/kernel/softirq.c b/kernel/softirq.c index 8d56bbf852ea..ef6429a33883 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -25,6 +25,7 @@ #include #include #include +#include #include @@ -632,6 +633,23 @@ void tasklet_kill(struct tasklet_struct *t) } EXPORT_SYMBOL(tasklet_kill); +#ifdef CONFIG_SMP +void tasklet_unlock(struct tasklet_struct *t) +{ + smp_mb__before_atomic(); + clear_bit(TASKLET_STATE_RUN, &t->state); + smp_mb__after_atomic(); + wake_up_var(&t->state); +} +EXPORT_SYMBOL_GPL(tasklet_unlock); + +void tasklet_unlock_wait(struct tasklet_struct *t) +{ + wait_var_event(&t->state, !test_bit(TASKLET_STATE_RUN, &t->state)); +} +EXPORT_SYMBOL_GPL(tasklet_unlock_wait); +#endif + void __init softirq_init(void) { int cpu; From 697d8c63c4a2991a22a896a5e6adcdbb28fefe56 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 9 Mar 2021 09:42:09 +0100 Subject: [PATCH 10/53] tasklets: Replace spin wait in tasklet_kill() tasklet_kill() spin waits for TASKLET_STATE_SCHED to be cleared invoking yield() from inside the loop. yield() is an ill defined mechanism and the result might still be wasting CPU cycles in a tight loop which is especially painful in a guest when the CPU running the tasklet is scheduled out. tasklet_kill() is used in teardown paths and not performance critical at all. Replace the spin wait with wait_var_event(). Signed-off-by: Peter Zijlstra Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20210309084241.890532921@linutronix.de --- kernel/softirq.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/kernel/softirq.c b/kernel/softirq.c index ef6429a33883..ba89ca77698a 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -532,10 +532,12 @@ void __tasklet_hi_schedule(struct tasklet_struct *t) } EXPORT_SYMBOL(__tasklet_hi_schedule); -static bool tasklet_should_run(struct tasklet_struct *t) +static bool tasklet_clear_sched(struct tasklet_struct *t) { - if (test_and_clear_bit(TASKLET_STATE_SCHED, &t->state)) + if (test_and_clear_bit(TASKLET_STATE_SCHED, &t->state)) { + wake_up_var(&t->state); return true; + } WARN_ONCE(1, "tasklet SCHED state not set: %s %pS\n", t->use_callback ? "callback" : "func", @@ -563,7 +565,7 @@ static void tasklet_action_common(struct softirq_action *a, if (tasklet_trylock(t)) { if (!atomic_read(&t->count)) { - if (tasklet_should_run(t)) { + if (tasklet_clear_sched(t)) { if (t->use_callback) t->callback(t); else @@ -623,13 +625,11 @@ void tasklet_kill(struct tasklet_struct *t) if (in_interrupt()) pr_notice("Attempt to kill tasklet from interrupt\n"); - while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) { - do { - yield(); - } while (test_bit(TASKLET_STATE_SCHED, &t->state)); - } + while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) + wait_var_event(&t->state, !test_bit(TASKLET_STATE_SCHED, &t->state)); + tasklet_unlock_wait(t); - clear_bit(TASKLET_STATE_SCHED, &t->state); + tasklet_clear_sched(t); } EXPORT_SYMBOL(tasklet_kill); From eb2dafbba8b824ee77f166629babd470dd0b1c0a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 9 Mar 2021 09:42:10 +0100 Subject: [PATCH 11/53] tasklets: Prevent tasklet_unlock_spin_wait() deadlock on RT tasklet_unlock_spin_wait() spin waits for the TASKLET_STATE_SCHED bit in the tasklet state to be cleared. This works on !RT nicely because the corresponding execution can only happen on a different CPU. On RT softirq processing is preemptible, therefore a task preempting the softirq processing thread can spin forever. Prevent this by invoking local_bh_disable()/enable() inside the loop. In case that the softirq processing thread was preempted by the current task, current will block on the local lock which yields the CPU to the preempted softirq processing thread. If the tasklet is processed on a different CPU then the local_bh_disable()/enable() pair is just a waste of processor cycles. Signed-off-by: Thomas Gleixner Tested-by: Sebastian Andrzej Siewior Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20210309084241.988908275@linutronix.de --- include/linux/interrupt.h | 12 ++---------- kernel/softirq.c | 28 +++++++++++++++++++++++++++- 2 files changed, 29 insertions(+), 11 deletions(-) diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index b50be4fbbc98..352db93c2eed 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -658,7 +658,7 @@ enum TASKLET_STATE_RUN /* Tasklet is running (SMP only) */ }; -#ifdef CONFIG_SMP +#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT) static inline int tasklet_trylock(struct tasklet_struct *t) { return !test_and_set_bit(TASKLET_STATE_RUN, &(t)->state); @@ -666,16 +666,8 @@ static inline int tasklet_trylock(struct tasklet_struct *t) void tasklet_unlock(struct tasklet_struct *t); void tasklet_unlock_wait(struct tasklet_struct *t); +void tasklet_unlock_spin_wait(struct tasklet_struct *t); -/* - * Do not use in new code. Waiting for tasklets from atomic contexts is - * error prone and should be avoided. - */ -static inline void tasklet_unlock_spin_wait(struct tasklet_struct *t) -{ - while (test_bit(TASKLET_STATE_RUN, &t->state)) - cpu_relax(); -} #else static inline int tasklet_trylock(struct tasklet_struct *t) { return 1; } static inline void tasklet_unlock(struct tasklet_struct *t) { } diff --git a/kernel/softirq.c b/kernel/softirq.c index ba89ca77698a..f1eb83dc3d50 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -620,6 +620,32 @@ void tasklet_init(struct tasklet_struct *t, } EXPORT_SYMBOL(tasklet_init); +#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT) +/* + * Do not use in new code. Waiting for tasklets from atomic contexts is + * error prone and should be avoided. + */ +void tasklet_unlock_spin_wait(struct tasklet_struct *t) +{ + while (test_bit(TASKLET_STATE_RUN, &(t)->state)) { + if (IS_ENABLED(CONFIG_PREEMPT_RT)) { + /* + * Prevent a live lock when current preempted soft + * interrupt processing or prevents ksoftirqd from + * running. If the tasklet runs on a different CPU + * then this has no effect other than doing the BH + * disable/enable dance for nothing. + */ + local_bh_disable(); + local_bh_enable(); + } else { + cpu_relax(); + } + } +} +EXPORT_SYMBOL(tasklet_unlock_spin_wait); +#endif + void tasklet_kill(struct tasklet_struct *t) { if (in_interrupt()) @@ -633,7 +659,7 @@ void tasklet_kill(struct tasklet_struct *t) } EXPORT_SYMBOL(tasklet_kill); -#ifdef CONFIG_SMP +#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT) void tasklet_unlock(struct tasklet_struct *t) { smp_mb__before_atomic(); From c62c38e349c73cad90f59f00fe8070b3648b6d08 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Tue, 9 Mar 2021 09:42:11 +0100 Subject: [PATCH 12/53] net: jme: Replace link-change tasklet with work The link change tasklet disables the tasklets for tx/rx processing while upating hw parameters and then enables the tasklets again. This update can also be pushed into a workqueue where it can be performed in preemptible context. This allows tasklet_disable() to become sleeping. Replace the linkch_task tasklet with a work. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20210309084242.106288922@linutronix.de --- drivers/net/ethernet/jme.c | 10 +++++----- drivers/net/ethernet/jme.h | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/jme.c b/drivers/net/ethernet/jme.c index e9efe074edc1..f1b9284e0bea 100644 --- a/drivers/net/ethernet/jme.c +++ b/drivers/net/ethernet/jme.c @@ -1265,9 +1265,9 @@ jme_stop_shutdown_timer(struct jme_adapter *jme) jwrite32f(jme, JME_APMC, apmc); } -static void jme_link_change_tasklet(struct tasklet_struct *t) +static void jme_link_change_work(struct work_struct *work) { - struct jme_adapter *jme = from_tasklet(jme, t, linkch_task); + struct jme_adapter *jme = container_of(work, struct jme_adapter, linkch_task); struct net_device *netdev = jme->dev; int rc; @@ -1510,7 +1510,7 @@ jme_intr_msi(struct jme_adapter *jme, u32 intrstat) * all other events are ignored */ jwrite32(jme, JME_IEVE, intrstat); - tasklet_schedule(&jme->linkch_task); + schedule_work(&jme->linkch_task); goto out_reenable; } @@ -1832,7 +1832,6 @@ jme_open(struct net_device *netdev) jme_clear_pm_disable_wol(jme); JME_NAPI_ENABLE(jme); - tasklet_setup(&jme->linkch_task, jme_link_change_tasklet); tasklet_setup(&jme->txclean_task, jme_tx_clean_tasklet); tasklet_setup(&jme->rxclean_task, jme_rx_clean_tasklet); tasklet_setup(&jme->rxempty_task, jme_rx_empty_tasklet); @@ -1920,7 +1919,7 @@ jme_close(struct net_device *netdev) JME_NAPI_DISABLE(jme); - tasklet_kill(&jme->linkch_task); + cancel_work_sync(&jme->linkch_task); tasklet_kill(&jme->txclean_task); tasklet_kill(&jme->rxclean_task); tasklet_kill(&jme->rxempty_task); @@ -3035,6 +3034,7 @@ jme_init_one(struct pci_dev *pdev, atomic_set(&jme->rx_empty, 1); tasklet_setup(&jme->pcc_task, jme_pcc_tasklet); + INIT_WORK(&jme->linkch_task, jme_link_change_work); jme->dpi.cur = PCC_P1; jme->reg_ghc = 0; diff --git a/drivers/net/ethernet/jme.h b/drivers/net/ethernet/jme.h index a2c3b00d939d..2af76329b4a2 100644 --- a/drivers/net/ethernet/jme.h +++ b/drivers/net/ethernet/jme.h @@ -411,7 +411,7 @@ struct jme_adapter { struct tasklet_struct rxempty_task; struct tasklet_struct rxclean_task; struct tasklet_struct txclean_task; - struct tasklet_struct linkch_task; + struct work_struct linkch_task; struct tasklet_struct pcc_task; unsigned long flags; u32 reg_txcs; From 25cf87df1a3a85959bf1bf27df0eb2e6e04b2161 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Tue, 9 Mar 2021 09:42:12 +0100 Subject: [PATCH 13/53] net: sundance: Use tasklet_disable_in_atomic(). tasklet_disable() is used in the timer callback. This might be distangled, but without access to the hardware that's a bit risky. Replace it with tasklet_disable_in_atomic() so tasklet_disable() can be changed to a sleep wait once all remaining atomic users are converted. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20210309084242.209110861@linutronix.de --- drivers/net/ethernet/dlink/sundance.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/dlink/sundance.c b/drivers/net/ethernet/dlink/sundance.c index e3a8858915b3..df0eab479d51 100644 --- a/drivers/net/ethernet/dlink/sundance.c +++ b/drivers/net/ethernet/dlink/sundance.c @@ -963,7 +963,7 @@ static void tx_timeout(struct net_device *dev, unsigned int txqueue) unsigned long flag; netif_stop_queue(dev); - tasklet_disable(&np->tx_tasklet); + tasklet_disable_in_atomic(&np->tx_tasklet); iowrite16(0, ioaddr + IntrEnable); printk(KERN_WARNING "%s: Transmit timed out, TxStatus %2.2x " "TxFrameId %2.2x," From 3250aa8a293b1859d76577714a3e1fe95732c721 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Tue, 9 Mar 2021 09:42:13 +0100 Subject: [PATCH 14/53] ath9k: Use tasklet_disable_in_atomic() All callers of ath9k_beacon_ensure_primary_slot() are preemptible / acquire a mutex except for this callchain: spin_lock_bh(&sc->sc_pcu_lock); ath_complete_reset() -> ath9k_calculate_summary_state() -> ath9k_beacon_ensure_primary_slot() It's unclear how that can be distangled, so use tasklet_disable_in_atomic() for now. This allows tasklet_disable() to become sleepable once the remaining atomic users are cleaned up. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Thomas Gleixner Acked-by: Kalle Valo Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20210309084242.313899703@linutronix.de --- drivers/net/wireless/ath/ath9k/beacon.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath9k/beacon.c b/drivers/net/wireless/ath/ath9k/beacon.c index 71e2ada86793..72e2e71aac0e 100644 --- a/drivers/net/wireless/ath/ath9k/beacon.c +++ b/drivers/net/wireless/ath/ath9k/beacon.c @@ -251,7 +251,7 @@ void ath9k_beacon_ensure_primary_slot(struct ath_softc *sc) int first_slot = ATH_BCBUF; int slot; - tasklet_disable(&sc->bcon_tasklet); + tasklet_disable_in_atomic(&sc->bcon_tasklet); /* Find first taken slot. */ for (slot = 0; slot < ATH_BCBUF; slot++) { From 405698ca359a23b1ef1a502ef2bdc4597dc6da36 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Tue, 9 Mar 2021 09:42:14 +0100 Subject: [PATCH 15/53] atm: eni: Use tasklet_disable_in_atomic() in the send() callback The atmdev_ops::send callback which calls tasklet_disable() is invoked with bottom halfs disabled from net_device_ops::ndo_start_xmit(). All other invocations of tasklet_disable() in this driver happen in preemptible context. Change the send() call to use tasklet_disable_in_atomic() which allows tasklet_disable() to be made sleepable once the remaining atomic context usage sites are cleaned up. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20210309084242.415583839@linutronix.de --- drivers/atm/eni.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/atm/eni.c b/drivers/atm/eni.c index 316a9947541f..e96a4e8a4a10 100644 --- a/drivers/atm/eni.c +++ b/drivers/atm/eni.c @@ -2054,7 +2054,7 @@ static int eni_send(struct atm_vcc *vcc,struct sk_buff *skb) } submitted++; ATM_SKB(skb)->vcc = vcc; - tasklet_disable(&ENI_DEV(vcc->dev)->task); + tasklet_disable_in_atomic(&ENI_DEV(vcc->dev)->task); res = do_tx(skb); tasklet_enable(&ENI_DEV(vcc->dev)->task); if (res == enq_ok) return 0; From be4017cea0aec6369275df7eafbb09682f810e7e Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Tue, 9 Mar 2021 09:42:15 +0100 Subject: [PATCH 16/53] PCI: hv: Use tasklet_disable_in_atomic() The hv_compose_msi_msg() callback in irq_chip::irq_compose_msi_msg is invoked via irq_chip_compose_msi_msg(), which itself is always invoked from atomic contexts from the guts of the interrupt core code. There is no way to change this w/o rewriting the whole driver, so use tasklet_disable_in_atomic() which allows to make tasklet_disable() sleepable once the remaining atomic users are addressed. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Thomas Gleixner Acked-by: Wei Liu Acked-by: Bjorn Helgaas Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20210309084242.516519290@linutronix.de --- drivers/pci/controller/pci-hyperv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c index 27a17a1e4a7c..a313708bcf75 100644 --- a/drivers/pci/controller/pci-hyperv.c +++ b/drivers/pci/controller/pci-hyperv.c @@ -1458,7 +1458,7 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) * Prevents hv_pci_onchannelcallback() from running concurrently * in the tasklet. */ - tasklet_disable(&channel->callback_event); + tasklet_disable_in_atomic(&channel->callback_event); /* * Since this function is called with IRQ locks held, can't From f339fc16fba0167d67c4026678ef4c405bca3085 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Tue, 9 Mar 2021 09:42:16 +0100 Subject: [PATCH 17/53] firewire: ohci: Use tasklet_disable_in_atomic() where required tasklet_disable() is invoked in several places. Some of them are in atomic context which prevents a conversion of tasklet_disable() to a sleepable function. The atomic callchains are: ar_context_tasklet() ohci_cancel_packet() tasklet_disable() ... ohci_flush_iso_completions() tasklet_disable() The invocation of tasklet_disable() from at_context_flush() is always in preemptible context. Use tasklet_disable_in_atomic() for the two invocations in ohci_cancel_packet() and ohci_flush_iso_completions(). Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20210309084242.616379058@linutronix.de --- drivers/firewire/ohci.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/firewire/ohci.c b/drivers/firewire/ohci.c index 9811c40956e5..17c9d825188b 100644 --- a/drivers/firewire/ohci.c +++ b/drivers/firewire/ohci.c @@ -2545,7 +2545,7 @@ static int ohci_cancel_packet(struct fw_card *card, struct fw_packet *packet) struct driver_data *driver_data = packet->driver_data; int ret = -ENOENT; - tasklet_disable(&ctx->tasklet); + tasklet_disable_in_atomic(&ctx->tasklet); if (packet->ack != 0) goto out; @@ -3465,7 +3465,7 @@ static int ohci_flush_iso_completions(struct fw_iso_context *base) struct iso_context *ctx = container_of(base, struct iso_context, base); int ret = 0; - tasklet_disable(&ctx->context.tasklet); + tasklet_disable_in_atomic(&ctx->context.tasklet); if (!test_and_set_bit_lock(0, &ctx->flushing_completions)) { context_tasklet((unsigned long)&ctx->context); From 6fd4e861250b5c89ad460a9f265caeb1bbbfc323 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 9 Mar 2021 09:42:17 +0100 Subject: [PATCH 18/53] tasklets: Switch tasklet_disable() to the sleep wait variant -- NOT FOR IMMEDIATE MERGING -- Now that all users of tasklet_disable() are invoked from sleepable context, convert it to use tasklet_unlock_wait() which might sleep. Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20210309084242.726452321@linutronix.de --- include/linux/interrupt.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 352db93c2eed..4777850a6dc7 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -711,8 +711,7 @@ static inline void tasklet_disable_in_atomic(struct tasklet_struct *t) static inline void tasklet_disable(struct tasklet_struct *t) { tasklet_disable_nosync(t); - /* Spin wait until all atomic users are converted */ - tasklet_unlock_spin_wait(t); + tasklet_unlock_wait(t); smp_mb(); } From 728b478d2d358480b333b42d0e10e0fecb20114c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 9 Mar 2021 09:55:53 +0100 Subject: [PATCH 19/53] softirq: Add RT specific softirq accounting RT requires the softirq processing and local bottomhalf disabled regions to be preemptible. Using the normal preempt count based serialization is therefore not possible because this implicitely disables preemption. RT kernels use a per CPU local lock to serialize bottomhalfs. As local_bh_disable() can nest the lock can only be acquired on the outermost invocation of local_bh_disable() and released when the nest count becomes zero. Tasks which hold the local lock can be preempted so its required to keep track of the nest count per task. Add a RT only counter to task struct and adjust the relevant macros in preempt.h. Signed-off-by: Thomas Gleixner Tested-by: Sebastian Andrzej Siewior Tested-by: Paul E. McKenney Reviewed-by: Frederic Weisbecker Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20210309085726.983627589@linutronix.de --- include/linux/hardirq.h | 1 + include/linux/preempt.h | 6 +++++- include/linux/sched.h | 3 +++ 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index 7c9d6a2d7e90..69bc86ea382c 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -6,6 +6,7 @@ #include #include #include +#include #include #include diff --git a/include/linux/preempt.h b/include/linux/preempt.h index 69cc8b64aa3a..9881eac0698f 100644 --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -79,7 +79,11 @@ #define nmi_count() (preempt_count() & NMI_MASK) #define hardirq_count() (preempt_count() & HARDIRQ_MASK) -#define softirq_count() (preempt_count() & SOFTIRQ_MASK) +#ifdef CONFIG_PREEMPT_RT +# define softirq_count() (current->softirq_disable_cnt & SOFTIRQ_MASK) +#else +# define softirq_count() (preempt_count() & SOFTIRQ_MASK) +#endif #define irq_count() (nmi_count() | hardirq_count() | softirq_count()) /* diff --git a/include/linux/sched.h b/include/linux/sched.h index ef00bb22164c..743a613c9cf3 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1044,6 +1044,9 @@ struct task_struct { int softirq_context; int irq_config; #endif +#ifdef CONFIG_PREEMPT_RT + int softirq_disable_cnt; +#endif #ifdef CONFIG_LOCKDEP # define MAX_LOCK_DEPTH 48UL From 6516b386d8a07102aac353daf9c0fe0045faeb74 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 9 Mar 2021 09:55:54 +0100 Subject: [PATCH 20/53] irqtime: Make accounting correct on RT vtime_account_irq and irqtime_account_irq() base checks on preempt_count() which fails on RT because preempt_count() does not contain the softirq accounting which is seperate on RT. These checks do not need the full preempt count as they only operate on the hard and softirq sections. Use irq_count() instead which provides the correct value on both RT and non RT kernels. The compiler is clever enough to fold the masking for !RT: 99b: 65 8b 05 00 00 00 00 mov %gs:0x0(%rip),%eax - 9a2: 25 ff ff ff 7f and $0x7fffffff,%eax + 9a2: 25 00 ff ff 00 and $0xffff00,%eax Reported-by: Sebastian Andrzej Siewior Signed-off-by: Thomas Gleixner Tested-by: Sebastian Andrzej Siewior Tested-by: Paul E. McKenney Reviewed-by: Frederic Weisbecker Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20210309085727.153926793@linutronix.de --- kernel/sched/cputime.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index 5f611658eeab..2c36a5fad589 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -60,7 +60,7 @@ void irqtime_account_irq(struct task_struct *curr, unsigned int offset) cpu = smp_processor_id(); delta = sched_clock_cpu(cpu) - irqtime->irq_start_time; irqtime->irq_start_time += delta; - pc = preempt_count() - offset; + pc = irq_count() - offset; /* * We do not account for softirq time from ksoftirqd here. @@ -421,7 +421,7 @@ void vtime_task_switch(struct task_struct *prev) void vtime_account_irq(struct task_struct *tsk, unsigned int offset) { - unsigned int pc = preempt_count() - offset; + unsigned int pc = irq_count() - offset; if (pc & HARDIRQ_OFFSET) { vtime_account_hardirq(tsk); From f02fc963e91160e7343933823e8b73a0b2ab0a16 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 9 Mar 2021 09:55:55 +0100 Subject: [PATCH 21/53] softirq: Move various protections into inline helpers To allow reuse of the bulk of softirq processing code for RT and to avoid #ifdeffery all over the place, split protections for various code sections out into inline helpers so the RT variant can just replace them in one go. Signed-off-by: Thomas Gleixner Tested-by: Sebastian Andrzej Siewior Tested-by: Paul E. McKenney Reviewed-by: Frederic Weisbecker Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20210309085727.310118772@linutronix.de --- kernel/softirq.c | 39 ++++++++++++++++++++++++++++++++------- 1 file changed, 32 insertions(+), 7 deletions(-) diff --git a/kernel/softirq.c b/kernel/softirq.c index f1eb83dc3d50..eaca3337dbda 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -207,6 +207,32 @@ void __local_bh_enable_ip(unsigned long ip, unsigned int cnt) } EXPORT_SYMBOL(__local_bh_enable_ip); +static inline void softirq_handle_begin(void) +{ + __local_bh_disable_ip(_RET_IP_, SOFTIRQ_OFFSET); +} + +static inline void softirq_handle_end(void) +{ + __local_bh_enable(SOFTIRQ_OFFSET); + WARN_ON_ONCE(in_interrupt()); +} + +static inline void ksoftirqd_run_begin(void) +{ + local_irq_disable(); +} + +static inline void ksoftirqd_run_end(void) +{ + local_irq_enable(); +} + +static inline bool should_wake_ksoftirqd(void) +{ + return true; +} + static inline void invoke_softirq(void) { if (ksoftirqd_running(local_softirq_pending())) @@ -319,7 +345,7 @@ asmlinkage __visible void __softirq_entry __do_softirq(void) pending = local_softirq_pending(); - __local_bh_disable_ip(_RET_IP_, SOFTIRQ_OFFSET); + softirq_handle_begin(); in_hardirq = lockdep_softirq_start(); account_softirq_enter(current); @@ -370,8 +396,7 @@ restart: account_softirq_exit(current); lockdep_softirq_end(in_hardirq); - __local_bh_enable(SOFTIRQ_OFFSET); - WARN_ON_ONCE(in_interrupt()); + softirq_handle_end(); current_restore_flags(old_flags, PF_MEMALLOC); } @@ -466,7 +491,7 @@ inline void raise_softirq_irqoff(unsigned int nr) * Otherwise we wake up ksoftirqd to make sure we * schedule the softirq soon. */ - if (!in_interrupt()) + if (!in_interrupt() && should_wake_ksoftirqd()) wakeup_softirqd(); } @@ -698,18 +723,18 @@ static int ksoftirqd_should_run(unsigned int cpu) static void run_ksoftirqd(unsigned int cpu) { - local_irq_disable(); + ksoftirqd_run_begin(); if (local_softirq_pending()) { /* * We can safely run softirq on inline stack, as we are not deep * in the task stack here. */ __do_softirq(); - local_irq_enable(); + ksoftirqd_run_end(); cond_resched(); return; } - local_irq_enable(); + ksoftirqd_run_end(); } #ifdef CONFIG_HOTPLUG_CPU From 8b1c04acad082dec76f3f8f7e1fa13493d6cbb79 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 9 Mar 2021 09:55:56 +0100 Subject: [PATCH 22/53] softirq: Make softirq control and processing RT aware Provide a local lock based serialization for soft interrupts on RT which allows the local_bh_disabled() sections and servicing soft interrupts to be preemptible. Provide the necessary inline helpers which allow to reuse the bulk of the softirq processing code. Signed-off-by: Thomas Gleixner Tested-by: Sebastian Andrzej Siewior Tested-by: Paul E. McKenney Reviewed-by: Frederic Weisbecker Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20210309085727.426370483@linutronix.de --- include/linux/bottom_half.h | 2 +- kernel/softirq.c | 188 ++++++++++++++++++++++++++++++++++-- 2 files changed, 182 insertions(+), 8 deletions(-) diff --git a/include/linux/bottom_half.h b/include/linux/bottom_half.h index a19519f4241d..e4dd613a070e 100644 --- a/include/linux/bottom_half.h +++ b/include/linux/bottom_half.h @@ -4,7 +4,7 @@ #include -#ifdef CONFIG_TRACE_IRQFLAGS +#if defined(CONFIG_PREEMPT_RT) || defined(CONFIG_TRACE_IRQFLAGS) extern void __local_bh_disable_ip(unsigned long ip, unsigned int cnt); #else static __always_inline void __local_bh_disable_ip(unsigned long ip, unsigned int cnt) diff --git a/kernel/softirq.c b/kernel/softirq.c index eaca3337dbda..1ed1c55aa2a7 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -103,20 +104,189 @@ EXPORT_PER_CPU_SYMBOL_GPL(hardirq_context); #endif /* - * preempt_count and SOFTIRQ_OFFSET usage: - * - preempt_count is changed by SOFTIRQ_OFFSET on entering or leaving - * softirq processing. - * - preempt_count is changed by SOFTIRQ_DISABLE_OFFSET (= 2 * SOFTIRQ_OFFSET) + * SOFTIRQ_OFFSET usage: + * + * On !RT kernels 'count' is the preempt counter, on RT kernels this applies + * to a per CPU counter and to task::softirqs_disabled_cnt. + * + * - count is changed by SOFTIRQ_OFFSET on entering or leaving softirq + * processing. + * + * - count is changed by SOFTIRQ_DISABLE_OFFSET (= 2 * SOFTIRQ_OFFSET) * on local_bh_disable or local_bh_enable. + * * This lets us distinguish between whether we are currently processing * softirq and whether we just have bh disabled. */ +#ifdef CONFIG_PREEMPT_RT -#ifdef CONFIG_TRACE_IRQFLAGS /* - * This is for softirq.c-internal use, where hardirqs are disabled + * RT accounts for BH disabled sections in task::softirqs_disabled_cnt and + * also in per CPU softirq_ctrl::cnt. This is necessary to allow tasks in a + * softirq disabled section to be preempted. + * + * The per task counter is used for softirq_count(), in_softirq() and + * in_serving_softirqs() because these counts are only valid when the task + * holding softirq_ctrl::lock is running. + * + * The per CPU counter prevents pointless wakeups of ksoftirqd in case that + * the task which is in a softirq disabled section is preempted or blocks. + */ +struct softirq_ctrl { + local_lock_t lock; + int cnt; +}; + +static DEFINE_PER_CPU(struct softirq_ctrl, softirq_ctrl) = { + .lock = INIT_LOCAL_LOCK(softirq_ctrl.lock), +}; + +void __local_bh_disable_ip(unsigned long ip, unsigned int cnt) +{ + unsigned long flags; + int newcnt; + + WARN_ON_ONCE(in_hardirq()); + + /* First entry of a task into a BH disabled section? */ + if (!current->softirq_disable_cnt) { + if (preemptible()) { + local_lock(&softirq_ctrl.lock); + /* Required to meet the RCU bottomhalf requirements. */ + rcu_read_lock(); + } else { + DEBUG_LOCKS_WARN_ON(this_cpu_read(softirq_ctrl.cnt)); + } + } + + /* + * Track the per CPU softirq disabled state. On RT this is per CPU + * state to allow preemption of bottom half disabled sections. + */ + newcnt = __this_cpu_add_return(softirq_ctrl.cnt, cnt); + /* + * Reflect the result in the task state to prevent recursion on the + * local lock and to make softirq_count() & al work. + */ + current->softirq_disable_cnt = newcnt; + + if (IS_ENABLED(CONFIG_TRACE_IRQFLAGS) && newcnt == cnt) { + raw_local_irq_save(flags); + lockdep_softirqs_off(ip); + raw_local_irq_restore(flags); + } +} +EXPORT_SYMBOL(__local_bh_disable_ip); + +static void __local_bh_enable(unsigned int cnt, bool unlock) +{ + unsigned long flags; + int newcnt; + + DEBUG_LOCKS_WARN_ON(current->softirq_disable_cnt != + this_cpu_read(softirq_ctrl.cnt)); + + if (IS_ENABLED(CONFIG_TRACE_IRQFLAGS) && softirq_count() == cnt) { + raw_local_irq_save(flags); + lockdep_softirqs_on(_RET_IP_); + raw_local_irq_restore(flags); + } + + newcnt = __this_cpu_sub_return(softirq_ctrl.cnt, cnt); + current->softirq_disable_cnt = newcnt; + + if (!newcnt && unlock) { + rcu_read_unlock(); + local_unlock(&softirq_ctrl.lock); + } +} + +void __local_bh_enable_ip(unsigned long ip, unsigned int cnt) +{ + bool preempt_on = preemptible(); + unsigned long flags; + u32 pending; + int curcnt; + + WARN_ON_ONCE(in_irq()); + lockdep_assert_irqs_enabled(); + + local_irq_save(flags); + curcnt = __this_cpu_read(softirq_ctrl.cnt); + + /* + * If this is not reenabling soft interrupts, no point in trying to + * run pending ones. + */ + if (curcnt != cnt) + goto out; + + pending = local_softirq_pending(); + if (!pending || ksoftirqd_running(pending)) + goto out; + + /* + * If this was called from non preemptible context, wake up the + * softirq daemon. + */ + if (!preempt_on) { + wakeup_softirqd(); + goto out; + } + + /* + * Adjust softirq count to SOFTIRQ_OFFSET which makes + * in_serving_softirq() become true. + */ + cnt = SOFTIRQ_OFFSET; + __local_bh_enable(cnt, false); + __do_softirq(); + +out: + __local_bh_enable(cnt, preempt_on); + local_irq_restore(flags); +} +EXPORT_SYMBOL(__local_bh_enable_ip); + +/* + * Invoked from ksoftirqd_run() outside of the interrupt disabled section + * to acquire the per CPU local lock for reentrancy protection. + */ +static inline void ksoftirqd_run_begin(void) +{ + __local_bh_disable_ip(_RET_IP_, SOFTIRQ_OFFSET); + local_irq_disable(); +} + +/* Counterpart to ksoftirqd_run_begin() */ +static inline void ksoftirqd_run_end(void) +{ + __local_bh_enable(SOFTIRQ_OFFSET, true); + WARN_ON_ONCE(in_interrupt()); + local_irq_enable(); +} + +static inline void softirq_handle_begin(void) { } +static inline void softirq_handle_end(void) { } + +static inline bool should_wake_ksoftirqd(void) +{ + return !this_cpu_read(softirq_ctrl.cnt); +} + +static inline void invoke_softirq(void) +{ + if (should_wake_ksoftirqd()) + wakeup_softirqd(); +} + +#else /* CONFIG_PREEMPT_RT */ + +/* + * This one is for softirq.c-internal use, where hardirqs are disabled * legitimately: */ +#ifdef CONFIG_TRACE_IRQFLAGS void __local_bh_disable_ip(unsigned long ip, unsigned int cnt) { unsigned long flags; @@ -277,6 +447,8 @@ asmlinkage __visible void do_softirq(void) local_irq_restore(flags); } +#endif /* !CONFIG_PREEMPT_RT */ + /* * We restart softirq processing for at most MAX_SOFTIRQ_RESTART times, * but break the loop if need_resched() is set or after 2 ms. @@ -381,8 +553,10 @@ restart: pending >>= softirq_bit; } - if (__this_cpu_read(ksoftirqd) == current) + if (!IS_ENABLED(CONFIG_PREEMPT_RT) && + __this_cpu_read(ksoftirqd) == current) rcu_softirq_qs(); + local_irq_disable(); pending = local_softirq_pending(); From 47c218dcae6587fb5bce30f1656b13e22391c8e3 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 9 Mar 2021 09:55:57 +0100 Subject: [PATCH 23/53] tick/sched: Prevent false positive softirq pending warnings on RT On RT a task which has soft interrupts disabled can block on a lock and schedule out to idle while soft interrupts are pending. This triggers the warning in the NOHZ idle code which complains about going idle with pending soft interrupts. But as the task is blocked soft interrupt processing is temporarily blocked as well which means that such a warning is a false positive. To prevent that check the per CPU state which indicates that a scheduled out task has soft interrupts disabled. Signed-off-by: Thomas Gleixner Tested-by: Sebastian Andrzej Siewior Tested-by: Paul E. McKenney Reviewed-by: Frederic Weisbecker Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20210309085727.527563866@linutronix.de --- include/linux/bottom_half.h | 6 ++++++ kernel/softirq.c | 15 +++++++++++++++ kernel/time/tick-sched.c | 2 +- 3 files changed, 22 insertions(+), 1 deletion(-) diff --git a/include/linux/bottom_half.h b/include/linux/bottom_half.h index e4dd613a070e..eed86eb0a1de 100644 --- a/include/linux/bottom_half.h +++ b/include/linux/bottom_half.h @@ -32,4 +32,10 @@ static inline void local_bh_enable(void) __local_bh_enable_ip(_THIS_IP_, SOFTIRQ_DISABLE_OFFSET); } +#ifdef CONFIG_PREEMPT_RT +extern bool local_bh_blocked(void); +#else +static inline bool local_bh_blocked(void) { return false; } +#endif + #endif /* _LINUX_BH_H */ diff --git a/kernel/softirq.c b/kernel/softirq.c index 1ed1c55aa2a7..5a99696da86a 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -141,6 +141,21 @@ static DEFINE_PER_CPU(struct softirq_ctrl, softirq_ctrl) = { .lock = INIT_LOCAL_LOCK(softirq_ctrl.lock), }; +/** + * local_bh_blocked() - Check for idle whether BH processing is blocked + * + * Returns false if the per CPU softirq::cnt is 0 otherwise true. + * + * This is invoked from the idle task to guard against false positive + * softirq pending warnings, which would happen when the task which holds + * softirq_ctrl::lock was the only running task on the CPU and blocks on + * some other lock. + */ +bool local_bh_blocked(void) +{ + return __this_cpu_read(softirq_ctrl.cnt) != 0; +} + void __local_bh_disable_ip(unsigned long ip, unsigned int cnt) { unsigned long flags; diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index e10a4af88737..0cc55791b2b6 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -973,7 +973,7 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts) if (unlikely(local_softirq_pending())) { static int ratelimit; - if (ratelimit < 10 && + if (ratelimit < 10 && !local_bh_blocked() && (local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK)) { pr_warn("NOHZ tick-stop error: Non-RCU local softirq work is pending, handler #%02x!!!\n", (unsigned int) local_softirq_pending()); From ba9e6cab49c1465c2c322dcb03d771d5cbecb692 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 9 Mar 2021 09:55:58 +0100 Subject: [PATCH 24/53] rcu: Prevent false positive softirq warning on RT Soft interrupt disabled sections can legitimately be preempted or schedule out when blocking on a lock on RT enabled kernels so the RCU preempt check warning has to be disabled for RT kernels. Signed-off-by: Thomas Gleixner Tested-by: Sebastian Andrzej Siewior Tested-by: Paul E. McKenney Reviewed-by: Paul E. McKenney Reviewed-by: Frederic Weisbecker Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20210309085727.626304079@linutronix.de --- include/linux/rcupdate.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index bd04f722714f..6d855ef091ba 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -334,7 +334,8 @@ static inline void rcu_preempt_sleep_check(void) { } #define rcu_sleep_check() \ do { \ rcu_preempt_sleep_check(); \ - RCU_LOCKDEP_WARN(lock_is_held(&rcu_bh_lock_map), \ + if (!IS_ENABLED(CONFIG_PREEMPT_RT)) \ + RCU_LOCKDEP_WARN(lock_is_held(&rcu_bh_lock_map), \ "Illegal context switch in RCU-bh read-side critical section"); \ RCU_LOCKDEP_WARN(lock_is_held(&rcu_sched_lock_map), \ "Illegal context switch in RCU-sched read-side critical section"); \ From 2c6b02185cc608c19a22691fadc6ca2cd114c286 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Thu, 11 Feb 2021 08:09:53 +0100 Subject: [PATCH 25/53] irq: Simplify condition in irq_matrix_reserve() The if condition in irq_matrix_reserve() can be much simpler. While at it fix a typo in the comment. Signed-off-by: Juergen Gross Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20210211070953.5914-1-jgross@suse.com --- kernel/irq/matrix.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/kernel/irq/matrix.c b/kernel/irq/matrix.c index 7a9465ffe71d..6f8b1d171cdc 100644 --- a/kernel/irq/matrix.c +++ b/kernel/irq/matrix.c @@ -337,15 +337,14 @@ void irq_matrix_assign(struct irq_matrix *m, unsigned int bit) * irq_matrix_reserve - Reserve interrupts * @m: Matrix pointer * - * This is merily a book keeping call. It increments the number of globally + * This is merely a book keeping call. It increments the number of globally * reserved interrupt bits w/o actually allocating them. This allows to * setup interrupt descriptors w/o assigning low level resources to it. * The actual allocation happens when the interrupt gets activated. */ void irq_matrix_reserve(struct irq_matrix *m) { - if (m->global_reserved <= m->global_available && - m->global_reserved + 1 > m->global_available) + if (m->global_reserved == m->global_available) pr_warn("Interrupt reservation exceeds available resources\n"); m->global_reserved++; From c93a5e20c3c2dabef8ea360a3d3f18c6f68233ab Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Fri, 19 Mar 2021 12:18:23 +0100 Subject: [PATCH 26/53] genirq/matrix: Prevent allocation counter corruption When irq_matrix_free() is called for an unallocated vector the managed_allocated and total_allocated counters get out of sync with the real state of the matrix. Later, when the last interrupt is freed, these counters will underflow resulting in UINTMAX because the counters are unsigned. While this is certainly a problem of the calling code, this can be catched in the allocator by checking the allocation bit for the to be freed vector which simplifies debugging. An example of the problem described above: https://lore.kernel.org/lkml/20210318192819.636943062@linutronix.de/ Add the missing sanity check and emit a warning when it triggers. Suggested-by: Thomas Gleixner Signed-off-by: Vitaly Kuznetsov Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20210319111823.1105248-1-vkuznets@redhat.com --- kernel/irq/matrix.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/irq/matrix.c b/kernel/irq/matrix.c index 6f8b1d171cdc..578596e41cb6 100644 --- a/kernel/irq/matrix.c +++ b/kernel/irq/matrix.c @@ -422,7 +422,9 @@ void irq_matrix_free(struct irq_matrix *m, unsigned int cpu, if (WARN_ON_ONCE(bit < m->alloc_start || bit >= m->alloc_end)) return; - clear_bit(bit, cm->alloc_map); + if (WARN_ON_ONCE(!test_and_clear_bit(bit, cm->alloc_map))) + return; + cm->allocated--; if(managed) cm->managed_allocated--; From a359f757965aafd0f58570de95dc6bc06cf12a9c Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 22 Mar 2021 04:21:30 +0100 Subject: [PATCH 27/53] irq: Fix typos in comments Fix ~36 single-word typos in the IRQ, irqchip and irqdomain code comments. Signed-off-by: Ingo Molnar Cc: Thomas Gleixner Cc: Marc Zyngier Cc: Borislav Petkov Cc: Peter Zijlstra Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- drivers/irqchip/irq-aspeed-vic.c | 4 ++-- drivers/irqchip/irq-bcm7120-l2.c | 2 +- drivers/irqchip/irq-csky-apb-intc.c | 2 +- drivers/irqchip/irq-gic-v2m.c | 2 +- drivers/irqchip/irq-gic-v3-its.c | 10 +++++----- drivers/irqchip/irq-gic-v3.c | 2 +- drivers/irqchip/irq-loongson-pch-pic.c | 2 +- drivers/irqchip/irq-meson-gpio.c | 2 +- drivers/irqchip/irq-mtk-cirq.c | 2 +- drivers/irqchip/irq-mxs.c | 4 ++-- drivers/irqchip/irq-sun4i.c | 2 +- drivers/irqchip/irq-ti-sci-inta.c | 2 +- drivers/irqchip/irq-vic.c | 4 ++-- drivers/irqchip/irq-xilinx-intc.c | 2 +- include/linux/irq.h | 4 ++-- include/linux/irqdesc.h | 2 +- kernel/irq/chip.c | 2 +- kernel/irq/dummychip.c | 2 +- kernel/irq/irqdesc.c | 2 +- kernel/irq/irqdomain.c | 8 ++++---- kernel/irq/manage.c | 6 +++--- kernel/irq/msi.c | 2 +- kernel/irq/timings.c | 2 +- 23 files changed, 36 insertions(+), 36 deletions(-) diff --git a/drivers/irqchip/irq-aspeed-vic.c b/drivers/irqchip/irq-aspeed-vic.c index 6567ed782f82..58717cd44f99 100644 --- a/drivers/irqchip/irq-aspeed-vic.c +++ b/drivers/irqchip/irq-aspeed-vic.c @@ -71,7 +71,7 @@ static void vic_init_hw(struct aspeed_vic *vic) writel(0, vic->base + AVIC_INT_SELECT); writel(0, vic->base + AVIC_INT_SELECT + 4); - /* Some interrupts have a programable high/low level trigger + /* Some interrupts have a programmable high/low level trigger * (4 GPIO direct inputs), for now we assume this was configured * by firmware. We read which ones are edge now. */ @@ -203,7 +203,7 @@ static int __init avic_of_init(struct device_node *node, } vic->base = regs; - /* Initialize soures, all masked */ + /* Initialize sources, all masked */ vic_init_hw(vic); /* Ready to receive interrupts */ diff --git a/drivers/irqchip/irq-bcm7120-l2.c b/drivers/irqchip/irq-bcm7120-l2.c index c7c9e976acbb..ad59656ccc28 100644 --- a/drivers/irqchip/irq-bcm7120-l2.c +++ b/drivers/irqchip/irq-bcm7120-l2.c @@ -309,7 +309,7 @@ static int __init bcm7120_l2_intc_probe(struct device_node *dn, if (data->can_wake) { /* This IRQ chip can wake the system, set all - * relevant child interupts in wake_enabled mask + * relevant child interrupts in wake_enabled mask */ gc->wake_enabled = 0xffffffff; gc->wake_enabled &= ~gc->unused; diff --git a/drivers/irqchip/irq-csky-apb-intc.c b/drivers/irqchip/irq-csky-apb-intc.c index 5a2ec43b7ddd..ab91afa86755 100644 --- a/drivers/irqchip/irq-csky-apb-intc.c +++ b/drivers/irqchip/irq-csky-apb-intc.c @@ -176,7 +176,7 @@ gx_intc_init(struct device_node *node, struct device_node *parent) writel(0x0, reg_base + GX_INTC_NEN63_32); /* - * Initial mask reg with all unmasked, because we only use enalbe reg + * Initial mask reg with all unmasked, because we only use enable reg */ writel(0x0, reg_base + GX_INTC_NMASK31_00); writel(0x0, reg_base + GX_INTC_NMASK63_32); diff --git a/drivers/irqchip/irq-gic-v2m.c b/drivers/irqchip/irq-gic-v2m.c index fbec07d634ad..4116b48e60af 100644 --- a/drivers/irqchip/irq-gic-v2m.c +++ b/drivers/irqchip/irq-gic-v2m.c @@ -371,7 +371,7 @@ static int __init gicv2m_init_one(struct fwnode_handle *fwnode, * the MSI data is the absolute value within the range from * spi_start to (spi_start + num_spis). * - * Broadom NS2 GICv2m implementation has an erratum where the MSI data + * Broadcom NS2 GICv2m implementation has an erratum where the MSI data * is 'spi_number - 32' * * Reading that register fails on the Graviton implementation diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index ed46e6057e33..c3485b230d70 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -1492,7 +1492,7 @@ static void its_vlpi_set_doorbell(struct irq_data *d, bool enable) * * Ideally, we'd issue a VMAPTI to set the doorbell to its LPI * value or to 1023, depending on the enable bit. But that - * would be issueing a mapping for an /existing/ DevID+EventID + * would be issuing a mapping for an /existing/ DevID+EventID * pair, which is UNPREDICTABLE. Instead, let's issue a VMOVI * to the /same/ vPE, using this opportunity to adjust the * doorbell. Mouahahahaha. We loves it, Precious. @@ -3122,7 +3122,7 @@ static void its_cpu_init_lpis(void) /* * It's possible for CPU to receive VLPIs before it is - * sheduled as a vPE, especially for the first CPU, and the + * scheduled as a vPE, especially for the first CPU, and the * VLPI with INTID larger than 2^(IDbits+1) will be considered * as out of range and dropped by GIC. * So we initialize IDbits to known value to avoid VLPI drop. @@ -3616,7 +3616,7 @@ static void its_irq_domain_free(struct irq_domain *domain, unsigned int virq, /* * If all interrupts have been freed, start mopping the - * floor. This is conditionned on the device not being shared. + * floor. This is conditioned on the device not being shared. */ if (!its_dev->shared && bitmap_empty(its_dev->event_map.lpi_map, @@ -4194,7 +4194,7 @@ static int its_sgi_set_affinity(struct irq_data *d, { /* * There is no notion of affinity for virtual SGIs, at least - * not on the host (since they can only be targetting a vPE). + * not on the host (since they can only be targeting a vPE). * Tell the kernel we've done whatever it asked for. */ irq_data_update_effective_affinity(d, mask_val); @@ -4239,7 +4239,7 @@ static int its_sgi_get_irqchip_state(struct irq_data *d, /* * Locking galore! We can race against two different events: * - * - Concurent vPE affinity change: we must make sure it cannot + * - Concurrent vPE affinity change: we must make sure it cannot * happen, or we'll talk to the wrong redistributor. This is * identical to what happens with vLPIs. * diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index eb0ee356a629..94b89258d045 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -1379,7 +1379,7 @@ static int gic_irq_domain_translate(struct irq_domain *d, /* * Make it clear that broken DTs are... broken. - * Partitionned PPIs are an unfortunate exception. + * Partitioned PPIs are an unfortunate exception. */ WARN_ON(*type == IRQ_TYPE_NONE && fwspec->param[0] != GIC_IRQ_TYPE_PARTITION); diff --git a/drivers/irqchip/irq-loongson-pch-pic.c b/drivers/irqchip/irq-loongson-pch-pic.c index 9bf6b9a5f734..f790ca6d78aa 100644 --- a/drivers/irqchip/irq-loongson-pch-pic.c +++ b/drivers/irqchip/irq-loongson-pch-pic.c @@ -163,7 +163,7 @@ static void pch_pic_reset(struct pch_pic *priv) int i; for (i = 0; i < PIC_COUNT; i++) { - /* Write vectore ID */ + /* Write vectored ID */ writeb(priv->ht_vec_base + i, priv->base + PCH_INT_HTVEC(i)); /* Hardcode route to HT0 Lo */ writeb(1, priv->base + PCH_INT_ROUTE(i)); diff --git a/drivers/irqchip/irq-meson-gpio.c b/drivers/irqchip/irq-meson-gpio.c index bc7aebcc96e9..e50676ce2ec8 100644 --- a/drivers/irqchip/irq-meson-gpio.c +++ b/drivers/irqchip/irq-meson-gpio.c @@ -227,7 +227,7 @@ meson_gpio_irq_request_channel(struct meson_gpio_irq_controller *ctl, /* * Get the hwirq number assigned to this channel through - * a pointer the channel_irq table. The added benifit of this + * a pointer the channel_irq table. The added benefit of this * method is that we can also retrieve the channel index with * it, using the table base. */ diff --git a/drivers/irqchip/irq-mtk-cirq.c b/drivers/irqchip/irq-mtk-cirq.c index 69ba8ce3c178..9bca0918078e 100644 --- a/drivers/irqchip/irq-mtk-cirq.c +++ b/drivers/irqchip/irq-mtk-cirq.c @@ -217,7 +217,7 @@ static void mtk_cirq_resume(void) { u32 value; - /* flush recored interrupts, will send signals to parent controller */ + /* flush recorded interrupts, will send signals to parent controller */ value = readl_relaxed(cirq_data->base + CIRQ_CONTROL); writel_relaxed(value | CIRQ_FLUSH, cirq_data->base + CIRQ_CONTROL); diff --git a/drivers/irqchip/irq-mxs.c b/drivers/irqchip/irq-mxs.c index a671938fd97f..d1f5740cd575 100644 --- a/drivers/irqchip/irq-mxs.c +++ b/drivers/irqchip/irq-mxs.c @@ -58,7 +58,7 @@ struct icoll_priv { static struct icoll_priv icoll_priv; static struct irq_domain *icoll_domain; -/* calculate bit offset depending on number of intterupt per register */ +/* calculate bit offset depending on number of interrupt per register */ static u32 icoll_intr_bitshift(struct irq_data *d, u32 bit) { /* @@ -68,7 +68,7 @@ static u32 icoll_intr_bitshift(struct irq_data *d, u32 bit) return bit << ((d->hwirq & 3) << 3); } -/* calculate mem offset depending on number of intterupt per register */ +/* calculate mem offset depending on number of interrupt per register */ static void __iomem *icoll_intr_reg(struct irq_data *d) { /* offset = hwirq / intr_per_reg * 0x10 */ diff --git a/drivers/irqchip/irq-sun4i.c b/drivers/irqchip/irq-sun4i.c index fb78d6623556..9ea94456b178 100644 --- a/drivers/irqchip/irq-sun4i.c +++ b/drivers/irqchip/irq-sun4i.c @@ -189,7 +189,7 @@ static void __exception_irq_entry sun4i_handle_irq(struct pt_regs *regs) * 3) spurious irq * So if we immediately get a reading of 0, check the irq-pending reg * to differentiate between 2 and 3. We only do this once to avoid - * the extra check in the common case of 1 hapening after having + * the extra check in the common case of 1 happening after having * read the vector-reg once. */ hwirq = readl(irq_ic_data->irq_base + SUN4I_IRQ_VECTOR_REG) >> 2; diff --git a/drivers/irqchip/irq-ti-sci-inta.c b/drivers/irqchip/irq-ti-sci-inta.c index 532d0ae172d9..ca1f593f4d13 100644 --- a/drivers/irqchip/irq-ti-sci-inta.c +++ b/drivers/irqchip/irq-ti-sci-inta.c @@ -78,7 +78,7 @@ struct ti_sci_inta_vint_desc { * struct ti_sci_inta_irq_domain - Structure representing a TISCI based * Interrupt Aggregator IRQ domain. * @sci: Pointer to TISCI handle - * @vint: TISCI resource pointer representing IA inerrupts. + * @vint: TISCI resource pointer representing IA interrupts. * @global_event: TISCI resource pointer representing global events. * @vint_list: List of the vints active in the system * @vint_mutex: Mutex to protect vint_list diff --git a/drivers/irqchip/irq-vic.c b/drivers/irqchip/irq-vic.c index e46036374227..62f3d29f9042 100644 --- a/drivers/irqchip/irq-vic.c +++ b/drivers/irqchip/irq-vic.c @@ -163,7 +163,7 @@ static struct syscore_ops vic_syscore_ops = { }; /** - * vic_pm_init - initicall to register VIC pm + * vic_pm_init - initcall to register VIC pm * * This is called via late_initcall() to register * the resources for the VICs due to the early @@ -397,7 +397,7 @@ static void __init vic_clear_interrupts(void __iomem *base) /* * The PL190 cell from ARM has been modified by ST to handle 64 interrupts. * The original cell has 32 interrupts, while the modified one has 64, - * replocating two blocks 0x00..0x1f in 0x20..0x3f. In that case + * replicating two blocks 0x00..0x1f in 0x20..0x3f. In that case * the probe function is called twice, with base set to offset 000 * and 020 within the page. We call this "second block". */ diff --git a/drivers/irqchip/irq-xilinx-intc.c b/drivers/irqchip/irq-xilinx-intc.c index 1d3d273309bd..8cd1bfc73057 100644 --- a/drivers/irqchip/irq-xilinx-intc.c +++ b/drivers/irqchip/irq-xilinx-intc.c @@ -210,7 +210,7 @@ static int __init xilinx_intc_of_init(struct device_node *intc, /* * Disable all external interrupts until they are - * explicity requested. + * explicitly requested. */ xintc_write(irqc, IER, 0); diff --git a/include/linux/irq.h b/include/linux/irq.h index 2efde6a79b7e..bee82809107c 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -116,7 +116,7 @@ enum { * IRQ_SET_MASK_NOCPY - OK, chip did update irq_common_data.affinity * IRQ_SET_MASK_OK_DONE - Same as IRQ_SET_MASK_OK for core. Special code to * support stacked irqchips, which indicates skipping - * all descendent irqchips. + * all descendant irqchips. */ enum { IRQ_SET_MASK_OK = 0, @@ -302,7 +302,7 @@ static inline bool irqd_is_level_type(struct irq_data *d) /* * Must only be called of irqchip.irq_set_affinity() or low level - * hieararchy domain allocation functions. + * hierarchy domain allocation functions. */ static inline void irqd_set_single_target(struct irq_data *d) { diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index 891b323266df..df4651250785 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -32,7 +32,7 @@ struct pt_regs; * @last_unhandled: aging timer for unhandled count * @irqs_unhandled: stats field for spurious unhandled interrupts * @threads_handled: stats field for deferred spurious detection of threaded handlers - * @threads_handled_last: comparator field for deferred spurious detection of theraded handlers + * @threads_handled_last: comparator field for deferred spurious detection of threaded handlers * @lock: locking for SMP * @affinity_hint: hint to user space for preferred irq affinity * @affinity_notify: context for notification of affinity changes diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index 042399cb5caf..8cc8e5713287 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -808,7 +808,7 @@ void handle_edge_irq(struct irq_desc *desc) /* * When another irq arrived while we were handling * one, we could have masked the irq. - * Renable it, if it was not disabled in meantime. + * Reenable it, if it was not disabled in meantime. */ if (unlikely(desc->istate & IRQS_PENDING)) { if (!irqd_irq_disabled(&desc->irq_data) && diff --git a/kernel/irq/dummychip.c b/kernel/irq/dummychip.c index 0b0cdf206dc4..7fe6cffe7d0d 100644 --- a/kernel/irq/dummychip.c +++ b/kernel/irq/dummychip.c @@ -13,7 +13,7 @@ /* * What should we do if we get a hw irq event on an illegal vector? - * Each architecture has to answer this themself. + * Each architecture has to answer this themselves. */ static void ack_bad(struct irq_data *data) { diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index cc1a09406c6e..4a617d7312a4 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -31,7 +31,7 @@ static int __init irq_affinity_setup(char *str) cpulist_parse(str, irq_default_affinity); /* * Set at least the boot cpu. We don't want to end up with - * bugreports caused by random comandline masks + * bugreports caused by random commandline masks */ cpumask_set_cpu(smp_processor_id(), irq_default_affinity); return 1; diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index 288151393a06..6cb7a9d7e8b1 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c @@ -62,7 +62,7 @@ EXPORT_SYMBOL_GPL(irqchip_fwnode_ops); * @name: Optional user provided domain name * @pa: Optional user-provided physical address * - * Allocate a struct irqchip_fwid, and return a poiner to the embedded + * Allocate a struct irqchip_fwid, and return a pointer to the embedded * fwnode_handle (or NULL on failure). * * Note: The types IRQCHIP_FWNODE_NAMED and IRQCHIP_FWNODE_NAMED_ID are @@ -665,7 +665,7 @@ unsigned int irq_create_mapping_affinity(struct irq_domain *domain, pr_debug("irq_create_mapping(0x%p, 0x%lx)\n", domain, hwirq); - /* Look for default domain if nececssary */ + /* Look for default domain if necessary */ if (domain == NULL) domain = irq_default_domain; if (domain == NULL) { @@ -906,7 +906,7 @@ unsigned int irq_find_mapping(struct irq_domain *domain, { struct irq_data *data; - /* Look for default domain if nececssary */ + /* Look for default domain if necessary */ if (domain == NULL) domain = irq_default_domain; if (domain == NULL) @@ -1436,7 +1436,7 @@ int irq_domain_alloc_irqs_hierarchy(struct irq_domain *domain, * The whole process to setup an IRQ has been split into two steps. * The first step, __irq_domain_alloc_irqs(), is to allocate IRQ * descriptor and required hardware resources. The second step, - * irq_domain_activate_irq(), is to program hardwares with preallocated + * irq_domain_activate_irq(), is to program the hardware with preallocated * resources. In this way, it's easier to rollback when failing to * allocate resources. */ diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 07ed2e4ff9a3..e976c4927b25 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -326,7 +326,7 @@ static bool irq_set_affinity_deactivated(struct irq_data *data, * If the interrupt is not yet activated, just store the affinity * mask and do not call the chip driver at all. On activation the * driver has to make sure anyway that the interrupt is in a - * useable state so startup works. + * usable state so startup works. */ if (!IS_ENABLED(CONFIG_IRQ_DOMAIN_HIERARCHY) || irqd_is_activated(data) || !irqd_affinity_on_activate(data)) @@ -1054,7 +1054,7 @@ again: * to IRQS_INPROGRESS and the irq line is masked forever. * * This also serializes the state of shared oneshot handlers - * versus "desc->threads_onehsot |= action->thread_mask;" in + * versus "desc->threads_oneshot |= action->thread_mask;" in * irq_wake_thread(). See the comment there which explains the * serialization. */ @@ -1909,7 +1909,7 @@ static struct irqaction *__free_irq(struct irq_desc *desc, void *dev_id) /* Last action releases resources */ if (!desc->action) { /* - * Reaquire bus lock as irq_release_resources() might + * Reacquire bus lock as irq_release_resources() might * require it to deallocate resources over the slow bus. */ chip_bus_lock(desc); diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c index b338d622f26e..c41965e348b5 100644 --- a/kernel/irq/msi.c +++ b/kernel/irq/msi.c @@ -5,7 +5,7 @@ * * This file is licensed under GPLv2. * - * This file contains common code to support Message Signalled Interrupt for + * This file contains common code to support Message Signaled Interrupts for * PCI compatible and non PCI compatible devices. */ #include diff --git a/kernel/irq/timings.c b/kernel/irq/timings.c index c31860569931..d309d6fbf5bd 100644 --- a/kernel/irq/timings.c +++ b/kernel/irq/timings.c @@ -485,7 +485,7 @@ static inline void irq_timings_store(int irq, struct irqt_stat *irqs, u64 ts) /* * The interrupt triggered more than one second apart, that - * ends the sequence as predictible for our purpose. In this + * ends the sequence as predictable for our purpose. In this * case, assume we have the beginning of a sequence and the * timestamp is the first value. As it is impossible to * predict anything at this point, return. From 6e457914935a3161eeb74e319abf9fd511aa1e4d Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Tue, 23 Mar 2021 10:22:21 +0100 Subject: [PATCH 28/53] drm/i915: Use tasklet_unlock_spin_wait() in __tasklet_disable_sync_once() The i915 driver has its own tasklet interface which was overseen in the tasklet rework. __tasklet_disable_sync_once() is a wrapper around tasklet_unlock_wait(). tasklet_unlock_wait() might sleep, but the i915 wrappers invokes it from non-preemtible contexts with bottom halves disabled. Use tasklet_unlock_spin_wait() instead which can be invoked from non-preemptible contexts. Fixes: da044747401fc ("tasklets: Replace spin wait in tasklet_unlock_wait()") Reported-by: kernel test robot Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20210323092221.awq7g5b2muzypjw3@flow --- drivers/gpu/drm/i915/i915_gem.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem.h b/drivers/gpu/drm/i915/i915_gem.h index e622aee6e4be..440c35f1abc9 100644 --- a/drivers/gpu/drm/i915/i915_gem.h +++ b/drivers/gpu/drm/i915/i915_gem.h @@ -105,7 +105,7 @@ static inline bool tasklet_is_locked(const struct tasklet_struct *t) static inline void __tasklet_disable_sync_once(struct tasklet_struct *t) { if (!atomic_fetch_inc(&t->count)) - tasklet_unlock_wait(t); + tasklet_unlock_spin_wait(t); } static inline bool __tasklet_is_enabled(const struct tasklet_struct *t) From 883ccef355b910398b99dfaf96d40557479a7e9b Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 1 Mar 2021 15:26:59 +0100 Subject: [PATCH 29/53] genirq/irq_sim: Shrink devm_irq_domain_create_sim() The custom devres structure manages only a single pointer which can can be achieved by using devm_add_action_or_reset() as well which makes the code simpler. [ tglx: Fixed return value handling - found by smatch ] Signed-off-by: Bartosz Golaszewski Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20210301142659.8971-1-brgl@bgdev.pl --- kernel/irq/irq_sim.c | 31 ++++++++++++------------------- 1 file changed, 12 insertions(+), 19 deletions(-) diff --git a/kernel/irq/irq_sim.c b/kernel/irq/irq_sim.c index 48006608baf0..6e935d4d1ec5 100644 --- a/kernel/irq/irq_sim.c +++ b/kernel/irq/irq_sim.c @@ -24,10 +24,6 @@ struct irq_sim_irq_ctx { struct irq_sim_work_ctx *work_ctx; }; -struct irq_sim_devres { - struct irq_domain *domain; -}; - static void irq_sim_irqmask(struct irq_data *data) { struct irq_sim_irq_ctx *irq_ctx = irq_data_get_irq_chip_data(data); @@ -216,11 +212,11 @@ void irq_domain_remove_sim(struct irq_domain *domain) } EXPORT_SYMBOL_GPL(irq_domain_remove_sim); -static void devm_irq_domain_release_sim(struct device *dev, void *res) +static void devm_irq_domain_remove_sim(void *data) { - struct irq_sim_devres *this = res; + struct irq_domain *domain = data; - irq_domain_remove_sim(this->domain); + irq_domain_remove_sim(domain); } /** @@ -238,20 +234,17 @@ struct irq_domain *devm_irq_domain_create_sim(struct device *dev, struct fwnode_handle *fwnode, unsigned int num_irqs) { - struct irq_sim_devres *dr; + struct irq_domain *domain; + int ret; - dr = devres_alloc(devm_irq_domain_release_sim, - sizeof(*dr), GFP_KERNEL); - if (!dr) - return ERR_PTR(-ENOMEM); + domain = irq_domain_create_sim(fwnode, num_irqs); + if (IS_ERR(domain)) + return domain; - dr->domain = irq_domain_create_sim(fwnode, num_irqs); - if (IS_ERR(dr->domain)) { - devres_free(dr); - return dr->domain; - } + ret = devm_add_action_or_reset(dev, devm_irq_domain_remove_sim, domain); + if (ret) + return ERR_PTR(ret); - devres_add(dev, dr); - return dr->domain; + return domain; } EXPORT_SYMBOL_GPL(devm_irq_domain_create_sim); From eef56c3a0492e4c1bc2a081da8f402a26d882489 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 2 Apr 2021 15:58:21 +0100 Subject: [PATCH 30/53] sh: intc: Drop the use of irq_create_identity_mapping() Instead of playing games with using irq_create_identity_mapping() and irq_domain_associate(), drop the use of the former and only use the latter, together with the allocation of the irq_desc as needed. It doesn't make the code less awful, but at least the intent is clearer. Tested-by: Geert Uytterhoeven Signed-off-by: Marc Zyngier --- drivers/sh/intc/core.c | 49 ++++++++++++++++-------------------------- 1 file changed, 19 insertions(+), 30 deletions(-) diff --git a/drivers/sh/intc/core.c b/drivers/sh/intc/core.c index a14684ffe4c1..ca4f4ca413f1 100644 --- a/drivers/sh/intc/core.c +++ b/drivers/sh/intc/core.c @@ -179,6 +179,21 @@ static unsigned int __init save_reg(struct intc_desc_int *d, return 0; } +static bool __init intc_map(struct irq_domain *domain, int irq) +{ + if (!irq_to_desc(irq) && irq_alloc_desc_at(irq, NUMA_NO_NODE) != irq) { + pr_err("uname to allocate IRQ %d\n", irq); + return false; + } + + if (irq_domain_associate(domain, irq, irq)) { + pr_err("domain association failure\n"); + return false; + } + + return true; +} + int __init register_intc_controller(struct intc_desc *desc) { unsigned int i, k, smp; @@ -311,24 +326,12 @@ int __init register_intc_controller(struct intc_desc *desc) for (i = 0; i < hw->nr_vectors; i++) { struct intc_vect *vect = hw->vectors + i; unsigned int irq = evt2irq(vect->vect); - int res; if (!vect->enum_id) continue; - res = irq_create_identity_mapping(d->domain, irq); - if (unlikely(res)) { - if (res == -EEXIST) { - res = irq_domain_associate(d->domain, irq, irq); - if (unlikely(res)) { - pr_err("domain association failure\n"); - continue; - } - } else { - pr_err("can't identity map IRQ %d\n", irq); - continue; - } - } + if (!intc_map(d->domain, irq)) + continue; intc_irq_xlate_set(irq, vect->enum_id, d); intc_register_irq(desc, d, vect->enum_id, irq); @@ -345,22 +348,8 @@ int __init register_intc_controller(struct intc_desc *desc) * IRQ support, each vector still needs to have * its own backing irq_desc. */ - res = irq_create_identity_mapping(d->domain, irq2); - if (unlikely(res)) { - if (res == -EEXIST) { - res = irq_domain_associate(d->domain, - irq2, irq2); - if (unlikely(res)) { - pr_err("domain association " - "failure\n"); - continue; - } - } else { - pr_err("can't identity map IRQ %d\n", - irq); - continue; - } - } + if (!intc_map(d->domain, irq2)) + continue; vect2->enum_id = 0; From 4a35d6a03744ded782c9301f5f5d78ad68ce680f Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 7 Apr 2021 13:17:10 +0100 Subject: [PATCH 31/53] irqdomain: Get rid of irq_create_identity_mapping() The sole user of irq_create_identity_mapping() having been converted, get rid of the unused helper. Signed-off-by: Marc Zyngier --- include/linux/irqdomain.h | 6 ------ kernel/irq/irqdomain.c | 3 --- 2 files changed, 9 deletions(-) diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index 33cacc8af26d..d2c61de208a8 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -419,12 +419,6 @@ extern int irq_create_strict_mappings(struct irq_domain *domain, unsigned int irq_base, irq_hw_number_t hwirq_base, int count); -static inline int irq_create_identity_mapping(struct irq_domain *host, - irq_hw_number_t hwirq) -{ - return irq_create_strict_mappings(host, hwirq, hwirq, 1); -} - extern const struct irq_domain_ops irq_domain_simple_ops; /* stock xlate functions */ diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index d10ab1d689d5..35c5a99f8884 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c @@ -715,9 +715,6 @@ EXPORT_SYMBOL_GPL(irq_create_mapping_affinity); * locations. For use by controllers that already have static mappings * to insert in to the domain. * - * Non-linear users can use irq_create_identity_mapping() for IRQ-at-a-time - * domain insertion. - * * 0 is returned upon success, while any failure to establish a static * mapping is treated as an error. */ From bd781ae53fac31acea9dec594d62a1424952dd4c Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 2 Apr 2021 18:09:49 +0100 Subject: [PATCH 32/53] mips: netlogic: Use irq_domain_simple_ops for XLP PIC Use the generic irq_domain_simple_ops structure instead of a home-grown one. Acked-by: Thomas Bogendoerfer Signed-off-by: Marc Zyngier --- arch/mips/netlogic/common/irq.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/arch/mips/netlogic/common/irq.c b/arch/mips/netlogic/common/irq.c index cf33dd8a487e..c25a2ce5e29f 100644 --- a/arch/mips/netlogic/common/irq.c +++ b/arch/mips/netlogic/common/irq.c @@ -276,10 +276,6 @@ asmlinkage void plat_irq_dispatch(void) } #ifdef CONFIG_CPU_XLP -static const struct irq_domain_ops xlp_pic_irq_domain_ops = { - .xlate = irq_domain_xlate_onetwocell, -}; - static int __init xlp_of_pic_init(struct device_node *node, struct device_node *parent) { @@ -324,7 +320,7 @@ static int __init xlp_of_pic_init(struct device_node *node, xlp_pic_domain = irq_domain_add_legacy(node, n_picirqs, nlm_irq_to_xirq(socid, PIC_IRQ_BASE), PIC_IRQ_BASE, - &xlp_pic_irq_domain_ops, NULL); + &irq_domain_simple_ops, NULL); if (xlp_pic_domain == NULL) { pr_err("PIC %pOFn: Creating legacy domain failed!\n", node); return -EINVAL; From 64ec2ad3b84d43926e618bb515f2382c266535ee Mon Sep 17 00:00:00 2001 From: Hao Fang Date: Tue, 30 Mar 2021 14:46:20 +0800 Subject: [PATCH 33/53] irqchip/hisi: Use the correct HiSilicon copyright s/Hisilicon/HiSilicon/ It should use capital S, according to https://www.hisilicon.com/en/terms-of-use. Signed-off-by: Hao Fang Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/1617086780-8521-1-git-send-email-fanghao11@huawei.com --- drivers/irqchip/irq-hip04.c | 4 ++-- drivers/irqchip/irq-mbigen.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/irqchip/irq-hip04.c b/drivers/irqchip/irq-hip04.c index a6ed877d9dd3..058ebaebe2c4 100644 --- a/drivers/irqchip/irq-hip04.c +++ b/drivers/irqchip/irq-hip04.c @@ -1,9 +1,9 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Hisilicon HiP04 INTC + * HiSilicon HiP04 INTC * * Copyright (C) 2002-2014 ARM Limited. - * Copyright (c) 2013-2014 Hisilicon Ltd. + * Copyright (c) 2013-2014 HiSilicon Ltd. * Copyright (c) 2013-2014 Linaro Ltd. * * Interrupt architecture for the HIP04 INTC: diff --git a/drivers/irqchip/irq-mbigen.c b/drivers/irqchip/irq-mbigen.c index ff7627b57772..2cb45c6b8501 100644 --- a/drivers/irqchip/irq-mbigen.c +++ b/drivers/irqchip/irq-mbigen.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Copyright (C) 2015 Hisilicon Limited, All Rights Reserved. + * Copyright (C) 2015 HiSilicon Limited, All Rights Reserved. * Author: Jun Ma * Author: Yun Wu */ @@ -390,4 +390,4 @@ module_platform_driver(mbigen_platform_driver); MODULE_AUTHOR("Jun Ma "); MODULE_AUTHOR("Yun Wu "); MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("Hisilicon MBI Generator driver"); +MODULE_DESCRIPTION("HiSilicon MBI Generator driver"); From e03b7c1bcbfad6f27b4682f638b98627c4e416ba Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Tue, 30 Mar 2021 02:09:11 +0800 Subject: [PATCH 34/53] irqchip/sifive-plic: Mark two global variables __ro_after_init All of these two are never modified after init, so they can be __ro_after_init. Signed-off-by: Jisheng Zhang Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210330020911.26423e9e@xhacker --- drivers/irqchip/irq-sifive-plic.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/irqchip/irq-sifive-plic.c b/drivers/irqchip/irq-sifive-plic.c index 6f432d2a5ceb..97d4d04b0a80 100644 --- a/drivers/irqchip/irq-sifive-plic.c +++ b/drivers/irqchip/irq-sifive-plic.c @@ -77,8 +77,8 @@ struct plic_handler { void __iomem *enable_base; struct plic_priv *priv; }; -static int plic_parent_irq; -static bool plic_cpuhp_setup_done; +static int plic_parent_irq __ro_after_init; +static bool plic_cpuhp_setup_done __ro_after_init; static DEFINE_PER_CPU(struct plic_handler, plic_handlers); static inline void plic_toggle(struct plic_handler *handler, From 8e13d96670a4c050d4883e6743a9e9858e5cfe10 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 23 Mar 2021 14:18:35 +0100 Subject: [PATCH 35/53] irqchip/gic-v3: Fix OF_BAD_ADDR error handling When building with extra warnings enabled, clang points out a mistake in the error handling: drivers/irqchip/irq-gic-v3-mbi.c:306:21: error: result of comparison of constant 18446744073709551615 with expression of type 'phys_addr_t' (aka 'unsigned int') is always false [-Werror,-Wtautological-constant-out-of-range-compare] if (mbi_phys_base == OF_BAD_ADDR) { Truncate the constant to the same type as the variable it gets compared to, to shut make the check work and void the warning. Fixes: 505287525c24 ("irqchip/gic-v3: Add support for Message Based Interrupts as an MSI controller") Signed-off-by: Arnd Bergmann Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210323131842.2773094-1-arnd@kernel.org --- drivers/irqchip/irq-gic-v3-mbi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-gic-v3-mbi.c b/drivers/irqchip/irq-gic-v3-mbi.c index 563a9b366294..e81e89a81cb5 100644 --- a/drivers/irqchip/irq-gic-v3-mbi.c +++ b/drivers/irqchip/irq-gic-v3-mbi.c @@ -303,7 +303,7 @@ int __init mbi_init(struct fwnode_handle *fwnode, struct irq_domain *parent) reg = of_get_property(np, "mbi-alias", NULL); if (reg) { mbi_phys_base = of_translate_address(np, reg); - if (mbi_phys_base == OF_BAD_ADDR) { + if (mbi_phys_base == (phys_addr_t)OF_BAD_ADDR) { ret = -ENXIO; goto err_free_mbi; } From e12c455055e9abc7403ce532616c0124a9d85ee7 Mon Sep 17 00:00:00 2001 From: Erwan Le Ray Date: Fri, 19 Mar 2021 19:42:51 +0100 Subject: [PATCH 36/53] irqchip/stm32: Add usart instances exti direct event support Add following usart instances exti direct event support (used for UART wake up). - exti 26 (USART1) is mapped to GIC 37 - exti 27 (USART2) is mapped to GIC 38 - exti 28 (USART3) is mapped to GIC 39 - exti 29 (USART6) is mapped to GIC 71 - exti 31 (UART5) is mapped to GIC 53 - exti 32 (UART7) is mapped to GIC 82 - exti 33 (UART8) is mapped to GIC 83 Signed-off-by: Erwan Le Ray Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210319184253.5841-4-erwan.leray@foss.st.com --- drivers/irqchip/irq-stm32-exti.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/irqchip/irq-stm32-exti.c b/drivers/irqchip/irq-stm32-exti.c index 8662d7b7b262..b9db90c4aa56 100644 --- a/drivers/irqchip/irq-stm32-exti.c +++ b/drivers/irqchip/irq-stm32-exti.c @@ -193,7 +193,14 @@ static const struct stm32_desc_irq stm32mp1_desc_irq[] = { { .exti = 23, .irq_parent = 72, .chip = &stm32_exti_h_chip_direct }, { .exti = 24, .irq_parent = 95, .chip = &stm32_exti_h_chip_direct }, { .exti = 25, .irq_parent = 107, .chip = &stm32_exti_h_chip_direct }, + { .exti = 26, .irq_parent = 37, .chip = &stm32_exti_h_chip_direct }, + { .exti = 27, .irq_parent = 38, .chip = &stm32_exti_h_chip_direct }, + { .exti = 28, .irq_parent = 39, .chip = &stm32_exti_h_chip_direct }, + { .exti = 29, .irq_parent = 71, .chip = &stm32_exti_h_chip_direct }, { .exti = 30, .irq_parent = 52, .chip = &stm32_exti_h_chip_direct }, + { .exti = 31, .irq_parent = 53, .chip = &stm32_exti_h_chip_direct }, + { .exti = 32, .irq_parent = 82, .chip = &stm32_exti_h_chip_direct }, + { .exti = 33, .irq_parent = 83, .chip = &stm32_exti_h_chip_direct }, { .exti = 47, .irq_parent = 93, .chip = &stm32_exti_h_chip_direct }, { .exti = 48, .irq_parent = 138, .chip = &stm32_exti_h_chip_direct }, { .exti = 50, .irq_parent = 139, .chip = &stm32_exti_h_chip_direct }, From 5deaa1d7c49151988b0bf919eeea6ad5535a29a2 Mon Sep 17 00:00:00 2001 From: Rajendra Nayak Date: Mon, 15 Mar 2021 11:29:06 +0530 Subject: [PATCH 37/53] dt-bindings: qcom,pdc: Add compatible for sc7280 Add the compatible string for sc7280 SoC from Qualcomm Signed-off-by: Rajendra Nayak Reviewed-by: Stephen Boyd Acked-by: Rob Herring Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/1615787946-26474-1-git-send-email-rnayak@codeaurora.org --- .../devicetree/bindings/interrupt-controller/qcom,pdc.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/interrupt-controller/qcom,pdc.txt b/Documentation/devicetree/bindings/interrupt-controller/qcom,pdc.txt index e9afb48182c7..98d89e53013d 100644 --- a/Documentation/devicetree/bindings/interrupt-controller/qcom,pdc.txt +++ b/Documentation/devicetree/bindings/interrupt-controller/qcom,pdc.txt @@ -19,6 +19,7 @@ Properties: Value type: Definition: Should contain "qcom,-pdc" and "qcom,pdc" - "qcom,sc7180-pdc": For SC7180 + - "qcom,sc7280-pdc": For SC7280 - "qcom,sdm845-pdc": For SDM845 - "qcom,sdm8250-pdc": For SM8250 - "qcom,sdm8350-pdc": For SM8350 From 7c18715546203a09f859dac2fe3ea8aceec5f235 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonathan=20Neusch=C3=A4fer?= Date: Tue, 6 Apr 2021 14:09:14 +0200 Subject: [PATCH 38/53] dt-bindings: interrupt-controller: Add nuvoton, wpcm450-aic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The WPCM450 AIC ("Advanced Interrupt Controller") is the interrupt controller found in the Nuvoton WPCM450 SoC and other Winbond/Nuvoton SoCs. Signed-off-by: Jonathan Neuschäfer Acked-by: Rob Herring Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210406120921.2484986-4-j.neuschaefer@gmx.net --- .../nuvoton,wpcm450-aic.yaml | 39 +++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100644 Documentation/devicetree/bindings/interrupt-controller/nuvoton,wpcm450-aic.yaml diff --git a/Documentation/devicetree/bindings/interrupt-controller/nuvoton,wpcm450-aic.yaml b/Documentation/devicetree/bindings/interrupt-controller/nuvoton,wpcm450-aic.yaml new file mode 100644 index 000000000000..9ce6804bdb99 --- /dev/null +++ b/Documentation/devicetree/bindings/interrupt-controller/nuvoton,wpcm450-aic.yaml @@ -0,0 +1,39 @@ +# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/interrupt-controller/nuvoton,wpcm450-aic.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Nuvoton WPCM450 Advanced Interrupt Controller bindings + +maintainers: + - Jonathan Neuschäfer + +properties: + '#interrupt-cells': + const: 2 + + compatible: + const: nuvoton,wpcm450-aic + + interrupt-controller: true + + reg: + maxItems: 1 + +additionalProperties: false + +required: + - '#interrupt-cells' + - compatible + - reg + - interrupt-controller + +examples: + - | + aic: interrupt-controller@b8002000 { + compatible = "nuvoton,wpcm450-aic"; + reg = <0xb8002000 0x1000>; + interrupt-controller; + #interrupt-cells = <2>; + }; From fead4dd496631707549f414b4059afb86ea8fb80 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonathan=20Neusch=C3=A4fer?= Date: Tue, 6 Apr 2021 14:09:17 +0200 Subject: [PATCH 39/53] irqchip: Add driver for WPCM450 interrupt controller MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The WPCM450 AIC ("Advanced Interrupt Controller") is the interrupt controller found in the Nuvoton WPCM450 SoC and other Winbond/Nuvoton SoCs. The list of registers if based on the AMI vendor kernel and the Nuvoton W90N745 datasheet. Although the hardware supports other interrupt modes, the driver only supports high-level interrupts at the moment, because other modes could not be tested so far. Signed-off-by: Jonathan Neuschäfer Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210406120921.2484986-7-j.neuschaefer@gmx.net --- drivers/irqchip/Kconfig | 6 ++ drivers/irqchip/Makefile | 1 + drivers/irqchip/irq-wpcm450-aic.c | 161 ++++++++++++++++++++++++++++++ 3 files changed, 168 insertions(+) create mode 100644 drivers/irqchip/irq-wpcm450-aic.c diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig index 15536e321df5..38ad9dcf42c3 100644 --- a/drivers/irqchip/Kconfig +++ b/drivers/irqchip/Kconfig @@ -577,4 +577,10 @@ config MST_IRQ help Support MStar Interrupt Controller. +config WPCM450_AIC + bool "Nuvoton WPCM450 Advanced Interrupt Controller" + depends on ARCH_WPCM450 || COMPILE_TEST + help + Support for the interrupt controller in the Nuvoton WPCM450 BMC SoC. + endmenu diff --git a/drivers/irqchip/Makefile b/drivers/irqchip/Makefile index c59b95a0532c..bef57937e729 100644 --- a/drivers/irqchip/Makefile +++ b/drivers/irqchip/Makefile @@ -113,3 +113,4 @@ obj-$(CONFIG_LOONGSON_PCH_MSI) += irq-loongson-pch-msi.o obj-$(CONFIG_MST_IRQ) += irq-mst-intc.o obj-$(CONFIG_SL28CPLD_INTC) += irq-sl28cpld.o obj-$(CONFIG_MACH_REALTEK_RTL) += irq-realtek-rtl.o +obj-$(CONFIG_WPCM450_AIC) += irq-wpcm450-aic.o diff --git a/drivers/irqchip/irq-wpcm450-aic.c b/drivers/irqchip/irq-wpcm450-aic.c new file mode 100644 index 000000000000..f3ac392d5bc8 --- /dev/null +++ b/drivers/irqchip/irq-wpcm450-aic.c @@ -0,0 +1,161 @@ +// SPDX-License-Identifier: GPL-2.0-only +// Copyright 2021 Jonathan Neuschäfer + +#include +#include +#include +#include + +#include + +#define AIC_SCR(x) ((x)*4) /* Source control registers */ +#define AIC_GEN 0x84 /* Interrupt group enable control register */ +#define AIC_GRSR 0x88 /* Interrupt group raw status register */ +#define AIC_IRSR 0x100 /* Interrupt raw status register */ +#define AIC_IASR 0x104 /* Interrupt active status register */ +#define AIC_ISR 0x108 /* Interrupt status register */ +#define AIC_IPER 0x10c /* Interrupt priority encoding register */ +#define AIC_ISNR 0x110 /* Interrupt source number register */ +#define AIC_IMR 0x114 /* Interrupt mask register */ +#define AIC_OISR 0x118 /* Output interrupt status register */ +#define AIC_MECR 0x120 /* Mask enable command register */ +#define AIC_MDCR 0x124 /* Mask disable command register */ +#define AIC_SSCR 0x128 /* Source set command register */ +#define AIC_SCCR 0x12c /* Source clear command register */ +#define AIC_EOSCR 0x130 /* End of service command register */ + +#define AIC_SCR_SRCTYPE_LOW_LEVEL (0 << 6) +#define AIC_SCR_SRCTYPE_HIGH_LEVEL (1 << 6) +#define AIC_SCR_SRCTYPE_NEG_EDGE (2 << 6) +#define AIC_SCR_SRCTYPE_POS_EDGE (3 << 6) +#define AIC_SCR_PRIORITY(x) (x) +#define AIC_SCR_PRIORITY_MASK 0x7 + +#define AIC_NUM_IRQS 32 + +struct wpcm450_aic { + void __iomem *regs; + struct irq_domain *domain; +}; + +static struct wpcm450_aic *aic; + +static void wpcm450_aic_init_hw(void) +{ + int i; + + /* Disable (mask) all interrupts */ + writel(0xffffffff, aic->regs + AIC_MDCR); + + /* + * Make sure the interrupt controller is ready to serve new interrupts. + * Reading from IPER indicates that the nIRQ signal may be deasserted, + * and writing to EOSCR indicates that interrupt handling has finished. + */ + readl(aic->regs + AIC_IPER); + writel(0, aic->regs + AIC_EOSCR); + + /* Initialize trigger mode and priority of each interrupt source */ + for (i = 0; i < AIC_NUM_IRQS; i++) + writel(AIC_SCR_SRCTYPE_HIGH_LEVEL | AIC_SCR_PRIORITY(7), + aic->regs + AIC_SCR(i)); +} + +static void __exception_irq_entry wpcm450_aic_handle_irq(struct pt_regs *regs) +{ + int hwirq; + + /* Determine the interrupt source */ + /* Read IPER to signal that nIRQ can be de-asserted */ + hwirq = readl(aic->regs + AIC_IPER) / 4; + + handle_domain_irq(aic->domain, hwirq, regs); +} + +static void wpcm450_aic_eoi(struct irq_data *d) +{ + /* Signal end-of-service */ + writel(0, aic->regs + AIC_EOSCR); +} + +static void wpcm450_aic_mask(struct irq_data *d) +{ + unsigned int mask = BIT(d->hwirq); + + /* Disable (mask) the interrupt */ + writel(mask, aic->regs + AIC_MDCR); +} + +static void wpcm450_aic_unmask(struct irq_data *d) +{ + unsigned int mask = BIT(d->hwirq); + + /* Enable (unmask) the interrupt */ + writel(mask, aic->regs + AIC_MECR); +} + +static int wpcm450_aic_set_type(struct irq_data *d, unsigned int flow_type) +{ + /* + * The hardware supports high/low level, as well as rising/falling edge + * modes, and the DT binding accommodates for that, but as long as + * other modes than high level mode are not used and can't be tested, + * they are rejected in this driver. + */ + if ((flow_type & IRQ_TYPE_SENSE_MASK) != IRQ_TYPE_LEVEL_HIGH) + return -EINVAL; + + return 0; +} + +static struct irq_chip wpcm450_aic_chip = { + .name = "wpcm450-aic", + .irq_eoi = wpcm450_aic_eoi, + .irq_mask = wpcm450_aic_mask, + .irq_unmask = wpcm450_aic_unmask, + .irq_set_type = wpcm450_aic_set_type, +}; + +static int wpcm450_aic_map(struct irq_domain *d, unsigned int irq, irq_hw_number_t hwirq) +{ + if (hwirq >= AIC_NUM_IRQS) + return -EPERM; + + irq_set_chip_and_handler(irq, &wpcm450_aic_chip, handle_fasteoi_irq); + irq_set_chip_data(irq, aic); + irq_set_probe(irq); + + return 0; +} + +static const struct irq_domain_ops wpcm450_aic_ops = { + .map = wpcm450_aic_map, + .xlate = irq_domain_xlate_twocell, +}; + +static int __init wpcm450_aic_of_init(struct device_node *node, + struct device_node *parent) +{ + if (parent) + return -EINVAL; + + aic = kzalloc(sizeof(*aic), GFP_KERNEL); + if (!aic) + return -ENOMEM; + + aic->regs = of_iomap(node, 0); + if (!aic->regs) { + pr_err("Failed to map WPCM450 AIC registers\n"); + return -ENOMEM; + } + + wpcm450_aic_init_hw(); + + set_handle_irq(wpcm450_aic_handle_irq); + + aic->domain = irq_domain_add_linear(node, AIC_NUM_IRQS, &wpcm450_aic_ops, aic); + + return 0; +} + +IRQCHIP_DECLARE(wpcm450_aic, "nuvoton,wpcm450-aic", wpcm450_aic_of_init); From ea4aeaa5c88906eb3ca3d7d3d17a45605d2dd0de Mon Sep 17 00:00:00 2001 From: Mark-PK Tsai Date: Mon, 15 Mar 2021 21:18:48 +0800 Subject: [PATCH 40/53] irqchip/irq-mst: Support polarity configuration Support irq polarity configuration and save and restore the config when system suspend and resume. Signed-off-by: Mark-PK Tsai [maz: fixed irq_set_type callback] Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210315131848.31840-1-mark-pk.tsai@mediatek.com --- drivers/irqchip/irq-mst-intc.c | 98 ++++++++++++++++++++++++++++++++-- 1 file changed, 95 insertions(+), 3 deletions(-) diff --git a/drivers/irqchip/irq-mst-intc.c b/drivers/irqchip/irq-mst-intc.c index 143657b0cf28..f6133ae28155 100644 --- a/drivers/irqchip/irq-mst-intc.c +++ b/drivers/irqchip/irq-mst-intc.c @@ -13,15 +13,27 @@ #include #include #include +#include -#define INTC_MASK 0x0 -#define INTC_EOI 0x20 +#define MST_INTC_MAX_IRQS 64 + +#define INTC_MASK 0x0 +#define INTC_REV_POLARITY 0x10 +#define INTC_EOI 0x20 + +#ifdef CONFIG_PM_SLEEP +static LIST_HEAD(mst_intc_list); +#endif struct mst_intc_chip_data { raw_spinlock_t lock; unsigned int irq_start, nr_irqs; void __iomem *base; bool no_eoi; +#ifdef CONFIG_PM_SLEEP + struct list_head entry; + u16 saved_polarity_conf[DIV_ROUND_UP(MST_INTC_MAX_IRQS, 16)]; +#endif }; static void mst_set_irq(struct irq_data *d, u32 offset) @@ -78,6 +90,24 @@ static void mst_intc_eoi_irq(struct irq_data *d) irq_chip_eoi_parent(d); } +static int mst_irq_chip_set_type(struct irq_data *data, unsigned int type) +{ + switch (type) { + case IRQ_TYPE_LEVEL_LOW: + case IRQ_TYPE_EDGE_FALLING: + mst_set_irq(data, INTC_REV_POLARITY); + break; + case IRQ_TYPE_LEVEL_HIGH: + case IRQ_TYPE_EDGE_RISING: + mst_clear_irq(data, INTC_REV_POLARITY); + break; + default: + return -EINVAL; + } + + return irq_chip_set_type_parent(data, IRQ_TYPE_LEVEL_HIGH); +} + static struct irq_chip mst_intc_chip = { .name = "mst-intc", .irq_mask = mst_intc_mask_irq, @@ -87,13 +117,62 @@ static struct irq_chip mst_intc_chip = { .irq_set_irqchip_state = irq_chip_set_parent_state, .irq_set_affinity = irq_chip_set_affinity_parent, .irq_set_vcpu_affinity = irq_chip_set_vcpu_affinity_parent, - .irq_set_type = irq_chip_set_type_parent, + .irq_set_type = mst_irq_chip_set_type, .irq_retrigger = irq_chip_retrigger_hierarchy, .flags = IRQCHIP_SET_TYPE_MASKED | IRQCHIP_SKIP_SET_WAKE | IRQCHIP_MASK_ON_SUSPEND, }; +#ifdef CONFIG_PM_SLEEP +static void mst_intc_polarity_save(struct mst_intc_chip_data *cd) +{ + int i; + void __iomem *addr = cd->base + INTC_REV_POLARITY; + + for (i = 0; i < DIV_ROUND_UP(cd->nr_irqs, 16); i++) + cd->saved_polarity_conf[i] = readw_relaxed(addr + i * 4); +} + +static void mst_intc_polarity_restore(struct mst_intc_chip_data *cd) +{ + int i; + void __iomem *addr = cd->base + INTC_REV_POLARITY; + + for (i = 0; i < DIV_ROUND_UP(cd->nr_irqs, 16); i++) + writew_relaxed(cd->saved_polarity_conf[i], addr + i * 4); +} + +static void mst_irq_resume(void) +{ + struct mst_intc_chip_data *cd; + + list_for_each_entry(cd, &mst_intc_list, entry) + mst_intc_polarity_restore(cd); +} + +static int mst_irq_suspend(void) +{ + struct mst_intc_chip_data *cd; + + list_for_each_entry(cd, &mst_intc_list, entry) + mst_intc_polarity_save(cd); + return 0; +} + +static struct syscore_ops mst_irq_syscore_ops = { + .suspend = mst_irq_suspend, + .resume = mst_irq_resume, +}; + +static int __init mst_irq_pm_init(void) +{ + register_syscore_ops(&mst_irq_syscore_ops); + return 0; +} +late_initcall(mst_irq_pm_init); +#endif + static int mst_intc_domain_translate(struct irq_domain *d, struct irq_fwspec *fwspec, unsigned long *hwirq, @@ -145,6 +224,15 @@ static int mst_intc_domain_alloc(struct irq_domain *domain, unsigned int virq, parent_fwspec = *fwspec; parent_fwspec.fwnode = domain->parent->fwnode; parent_fwspec.param[1] = cd->irq_start + hwirq; + + /* + * mst-intc latch the interrupt request if it's edge triggered, + * so the output signal to parent GIC is always level sensitive. + * And if the irq signal is active low, configure it to active high + * to meet GIC SPI spec in mst_irq_chip_set_type via REV_POLARITY bit. + */ + parent_fwspec.param[2] = IRQ_TYPE_LEVEL_HIGH; + return irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, &parent_fwspec); } @@ -193,6 +281,10 @@ static int __init mst_intc_of_init(struct device_node *dn, return -ENOMEM; } +#ifdef CONFIG_PM_SLEEP + INIT_LIST_HEAD(&cd->entry); + list_add_tail(&cd->entry, &mst_intc_list); +#endif return 0; } From 94bc94209a66f05532c065279f4a719058d447e4 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 8 Apr 2021 08:56:27 +0100 Subject: [PATCH 41/53] irqchip/wpcm450: Drop COMPILE_TEST MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This driver is (for now) ARM specific, and currently doesn't build with a variety of architectures (ia64, RISC-V, x86_64 at the very least). Drop COMPILE_TEST from Kconfig until it gets sorted out. Reviewed-by: Jonathan Neuschäfer Reported-by: Stephen Rothwell Signed-off-by: Marc Zyngier --- drivers/irqchip/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig index 38ad9dcf42c3..715eb4366e35 100644 --- a/drivers/irqchip/Kconfig +++ b/drivers/irqchip/Kconfig @@ -579,7 +579,7 @@ config MST_IRQ config WPCM450_AIC bool "Nuvoton WPCM450 Advanced Interrupt Controller" - depends on ARCH_WPCM450 || COMPILE_TEST + depends on ARCH_WPCM450 help Support for the interrupt controller in the Nuvoton WPCM450 BMC SoC. From c5e3a41187ac01425f5ad1abce927905e4ac44e4 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Thu, 1 Apr 2021 14:58:23 +0900 Subject: [PATCH 42/53] kernel: Initialize cpumask before parsing KMSAN complains that new_value at cpumask_parse_user() from write_irq_affinity() from irq_affinity_proc_write() is uninitialized. [ 148.133411][ T5509] ===================================================== [ 148.135383][ T5509] BUG: KMSAN: uninit-value in find_next_bit+0x325/0x340 [ 148.137819][ T5509] [ 148.138448][ T5509] Local variable ----new_value.i@irq_affinity_proc_write created at: [ 148.140768][ T5509] irq_affinity_proc_write+0xc3/0x3d0 [ 148.142298][ T5509] irq_affinity_proc_write+0xc3/0x3d0 [ 148.143823][ T5509] ===================================================== Since bitmap_parse() from cpumask_parse_user() calls find_next_bit(), any alloc_cpumask_var() + cpumask_parse_user() sequence has possibility that find_next_bit() accesses uninitialized cpu mask variable. Fix this problem by replacing alloc_cpumask_var() with zalloc_cpumask_var(). Signed-off-by: Tetsuo Handa Signed-off-by: Thomas Gleixner Acked-by: Steven Rostedt (VMware) Link: https://lore.kernel.org/r/20210401055823.3929-1-penguin-kernel@I-love.SAKURA.ne.jp --- kernel/irq/proc.c | 4 ++-- kernel/profile.c | 2 +- kernel/trace/trace.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c index 98138788cb04..7c5cd42df3b9 100644 --- a/kernel/irq/proc.c +++ b/kernel/irq/proc.c @@ -144,7 +144,7 @@ static ssize_t write_irq_affinity(int type, struct file *file, if (!irq_can_set_affinity_usr(irq) || no_irq_affinity) return -EIO; - if (!alloc_cpumask_var(&new_value, GFP_KERNEL)) + if (!zalloc_cpumask_var(&new_value, GFP_KERNEL)) return -ENOMEM; if (type) @@ -238,7 +238,7 @@ static ssize_t default_affinity_write(struct file *file, cpumask_var_t new_value; int err; - if (!alloc_cpumask_var(&new_value, GFP_KERNEL)) + if (!zalloc_cpumask_var(&new_value, GFP_KERNEL)) return -ENOMEM; err = cpumask_parse_user(buffer, count, new_value); diff --git a/kernel/profile.c b/kernel/profile.c index 6f69a4195d56..c2ebddb5e974 100644 --- a/kernel/profile.c +++ b/kernel/profile.c @@ -430,7 +430,7 @@ static ssize_t prof_cpu_mask_proc_write(struct file *file, cpumask_var_t new_value; int err; - if (!alloc_cpumask_var(&new_value, GFP_KERNEL)) + if (!zalloc_cpumask_var(&new_value, GFP_KERNEL)) return -ENOMEM; err = cpumask_parse_user(buffer, count, new_value); diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index eccb4e1187cc..7607dc8a20b2 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -4827,7 +4827,7 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf, cpumask_var_t tracing_cpumask_new; int err; - if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL)) + if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL)) return -ENOMEM; err = cpumask_parse_user(ubuf, count, tracing_cpumask_new); From 7c07012eb1be8b4a95d3502fd30795849007a40e Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Fri, 2 Apr 2021 23:20:37 +1000 Subject: [PATCH 43/53] genirq: Reduce irqdebug cacheline bouncing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit note_interrupt() increments desc->irq_count for each interrupt even for percpu interrupt handlers, even when they are handled successfully. This causes cacheline bouncing and limits scalability. Instead of incrementing irq_count every time, only start incrementing it after seeing an unhandled irq, which should avoid the cache line bouncing in the common path. This actually should give better consistency in handling misbehaving irqs too, because instead of the first unhandled irq arriving at an arbitrary point in the irq_count cycle, its arrival will begin the irq_count cycle. Cédric reports the result of his IPI throughput test: Millions of IPIs/s ----------- -------------------------------------- upstream upstream patched chips cpus default noirqdebug default (irqdebug) ----------- ----------------------------------------- 1 0-15 4.061 4.153 4.084 0-31 7.937 8.186 8.158 0-47 11.018 11.392 11.233 0-63 11.460 13.907 14.022 2 0-79 8.376 18.105 18.084 0-95 7.338 22.101 22.266 0-111 6.716 25.306 25.473 0-127 6.223 27.814 28.029 Signed-off-by: Nicholas Piggin Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20210402132037.574661-1-npiggin@gmail.com --- kernel/irq/spurious.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c index f865e5f4d382..c481d8458325 100644 --- a/kernel/irq/spurious.c +++ b/kernel/irq/spurious.c @@ -403,6 +403,10 @@ void note_interrupt(struct irq_desc *desc, irqreturn_t action_ret) desc->irqs_unhandled -= ok; } + if (likely(!desc->irqs_unhandled)) + return; + + /* Now getting into unhandled irq detection */ desc->irq_count++; if (likely(desc->irq_count < 100000)) return; From a6992bbe9774e044d3d0f973593d655c53efe089 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 21 Apr 2021 22:16:20 -0700 Subject: [PATCH 44/53] irqchip/tb10x: Use 'fallthrough' to eliminate a warning Use the 'fallthrough' macro to document that this switch case does indeed fall through to the next case. ../drivers/irqchip/irq-tb10x.c: In function 'tb10x_irq_set_type': ../drivers/irqchip/irq-tb10x.c:62:13: warning: this statement may fall through [-Wimplicit-fallthrough=] 62 | flow_type = IRQ_TYPE_LEVEL_LOW; ../drivers/irqchip/irq-tb10x.c:63:2: note: here 63 | case IRQ_TYPE_LEVEL_LOW: | ^~~~ Fixes: b06eb0173ef1 ("irqchip: Add TB10x interrupt controller driver") Signed-off-by: Randy Dunlap Cc: Thomas Gleixner Cc: Marc Zyngier Cc: Christian Ruppert Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210422051620.23021-1-rdunlap@infradead.org --- drivers/irqchip/irq-tb10x.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/irqchip/irq-tb10x.c b/drivers/irqchip/irq-tb10x.c index 9e456497c1c4..9a63b02b8176 100644 --- a/drivers/irqchip/irq-tb10x.c +++ b/drivers/irqchip/irq-tb10x.c @@ -60,6 +60,7 @@ static int tb10x_irq_set_type(struct irq_data *data, unsigned int flow_type) break; case IRQ_TYPE_NONE: flow_type = IRQ_TYPE_LEVEL_LOW; + fallthrough; case IRQ_TYPE_LEVEL_LOW: mod ^= im; pol ^= im; From 46135d6f878ab00261d4a2082d620bfb41019aab Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Wed, 17 Mar 2021 10:07:19 +0000 Subject: [PATCH 45/53] irqchip/gic-v4.1: Disable vSGI upon (GIC CPUIF < v4.1) detection GIC CPU interfaces versions predating GIC v4.1 were not built to accommodate vINTID within the vSGI range; as reported in the GIC specifications (8.2 "Changes to the CPU interface"), it is CONSTRAINED UNPREDICTABLE to deliver a vSGI to a PE with ID_AA64PFR0_EL1.GIC < b0011. Check the GIC CPUIF version by reading the SYS_ID_AA64_PFR0_EL1. Disable vSGIs if a CPUIF version < 4.1 is detected to prevent using vSGIs on systems where they may misbehave. Signed-off-by: Lorenzo Pieralisi Cc: Marc Zyngier Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210317100719.3331-2-lorenzo.pieralisi@arm.com --- arch/arm64/kvm/vgic/vgic-mmio-v3.c | 4 ++-- drivers/irqchip/irq-gic-v4.c | 27 +++++++++++++++++++++++++-- include/linux/irqchip/arm-gic-v4.h | 2 ++ 3 files changed, 29 insertions(+), 4 deletions(-) diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v3.c b/arch/arm64/kvm/vgic/vgic-mmio-v3.c index 15a6c98ee92f..2f1b156021a6 100644 --- a/arch/arm64/kvm/vgic/vgic-mmio-v3.c +++ b/arch/arm64/kvm/vgic/vgic-mmio-v3.c @@ -86,7 +86,7 @@ static unsigned long vgic_mmio_read_v3_misc(struct kvm_vcpu *vcpu, } break; case GICD_TYPER2: - if (kvm_vgic_global_state.has_gicv4_1) + if (kvm_vgic_global_state.has_gicv4_1 && gic_cpuif_has_vsgi()) value = GICD_TYPER2_nASSGIcap; break; case GICD_IIDR: @@ -119,7 +119,7 @@ static void vgic_mmio_write_v3_misc(struct kvm_vcpu *vcpu, dist->enabled = val & GICD_CTLR_ENABLE_SS_G1; /* Not a GICv4.1? No HW SGIs */ - if (!kvm_vgic_global_state.has_gicv4_1) + if (!kvm_vgic_global_state.has_gicv4_1 || !gic_cpuif_has_vsgi()) val &= ~GICD_CTLR_nASSGIreq; /* Dist stays enabled? nASSGIreq is RO */ diff --git a/drivers/irqchip/irq-gic-v4.c b/drivers/irqchip/irq-gic-v4.c index 5d1dc9915272..4ea71b28f9f5 100644 --- a/drivers/irqchip/irq-gic-v4.c +++ b/drivers/irqchip/irq-gic-v4.c @@ -87,17 +87,40 @@ static struct irq_domain *gic_domain; static const struct irq_domain_ops *vpe_domain_ops; static const struct irq_domain_ops *sgi_domain_ops; +#ifdef CONFIG_ARM64 +#include + +bool gic_cpuif_has_vsgi(void) +{ + unsigned long fld, reg = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1); + + fld = cpuid_feature_extract_unsigned_field(reg, ID_AA64PFR0_GIC_SHIFT); + + return fld >= 0x3; +} +#else +bool gic_cpuif_has_vsgi(void) +{ + return false; +} +#endif + static bool has_v4_1(void) { return !!sgi_domain_ops; } +static bool has_v4_1_sgi(void) +{ + return has_v4_1() && gic_cpuif_has_vsgi(); +} + static int its_alloc_vcpu_sgis(struct its_vpe *vpe, int idx) { char *name; int sgi_base; - if (!has_v4_1()) + if (!has_v4_1_sgi()) return 0; name = kasprintf(GFP_KERNEL, "GICv4-sgi-%d", task_pid_nr(current)); @@ -182,7 +205,7 @@ static void its_free_sgi_irqs(struct its_vm *vm) { int i; - if (!has_v4_1()) + if (!has_v4_1_sgi()) return; for (i = 0; i < vm->nr_vpes; i++) { diff --git a/include/linux/irqchip/arm-gic-v4.h b/include/linux/irqchip/arm-gic-v4.h index 943c3411ca10..2c63375bbd43 100644 --- a/include/linux/irqchip/arm-gic-v4.h +++ b/include/linux/irqchip/arm-gic-v4.h @@ -145,4 +145,6 @@ int its_init_v4(struct irq_domain *domain, const struct irq_domain_ops *vpe_ops, const struct irq_domain_ops *sgi_ops); +bool gic_cpuif_has_vsgi(void); + #endif From b68761da01114a64b9c521975c3bca6d10eeb950 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 6 Apr 2021 10:35:50 +0100 Subject: [PATCH 46/53] ARM: PXA: Kill use of irq_create_strict_mappings() irq_create_strict_mappings() is a poor way to allow the use of a linear IRQ domain as a legacy one. Let's be upfront about it and use a legacy domain when appropriate. Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210406093557.1073423-3-maz@kernel.org --- arch/arm/mach-pxa/pxa_cplds_irqs.c | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/arch/arm/mach-pxa/pxa_cplds_irqs.c b/arch/arm/mach-pxa/pxa_cplds_irqs.c index 45c19ca96f7a..ec0d9b094744 100644 --- a/arch/arm/mach-pxa/pxa_cplds_irqs.c +++ b/arch/arm/mach-pxa/pxa_cplds_irqs.c @@ -147,22 +147,20 @@ static int cplds_probe(struct platform_device *pdev) } irq_set_irq_wake(fpga->irq, 1); - fpga->irqdomain = irq_domain_add_linear(pdev->dev.of_node, - CPLDS_NB_IRQ, - &cplds_irq_domain_ops, fpga); + if (base_irq) + fpga->irqdomain = irq_domain_add_legacy(pdev->dev.of_node, + CPLDS_NB_IRQ, + base_irq, 0, + &cplds_irq_domain_ops, + fpga); + else + fpga->irqdomain = irq_domain_add_linear(pdev->dev.of_node, + CPLDS_NB_IRQ, + &cplds_irq_domain_ops, + fpga); if (!fpga->irqdomain) return -ENODEV; - if (base_irq) { - ret = irq_create_strict_mappings(fpga->irqdomain, base_irq, 0, - CPLDS_NB_IRQ); - if (ret) { - dev_err(&pdev->dev, "couldn't create the irq mapping %d..%d\n", - base_irq, base_irq + CPLDS_NB_IRQ); - return ret; - } - } - return 0; } From 5f8b938bd790cff6542c7fe3c1495c71f89fef1b Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 6 Apr 2021 10:35:51 +0100 Subject: [PATCH 47/53] irqchip/jcore-aic: Kill use of irq_create_strict_mappings() irq_create_strict_mappings() is a poor way to allow the use of a linear IRQ domain as a legacy one. Let's be upfront about it. Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210406093557.1073423-4-maz@kernel.org --- drivers/irqchip/irq-jcore-aic.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/irqchip/irq-jcore-aic.c b/drivers/irqchip/irq-jcore-aic.c index 033bccb41455..5f47d8ee4ae3 100644 --- a/drivers/irqchip/irq-jcore-aic.c +++ b/drivers/irqchip/irq-jcore-aic.c @@ -100,11 +100,11 @@ static int __init aic_irq_of_init(struct device_node *node, jcore_aic.irq_unmask = noop; jcore_aic.name = "AIC"; - domain = irq_domain_add_linear(node, dom_sz, &jcore_aic_irqdomain_ops, + domain = irq_domain_add_legacy(node, dom_sz - min_irq, min_irq, min_irq, + &jcore_aic_irqdomain_ops, &jcore_aic); if (!domain) return -ENOMEM; - irq_create_strict_mappings(domain, min_irq, min_irq, dom_sz - min_irq); return 0; } From 1a0b05e435544cd53cd3936bdab425d88784b71a Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 2 Apr 2021 16:02:37 +0100 Subject: [PATCH 48/53] irqdomain: Get rid of irq_create_strict_mappings() No user of this helper is left, remove it. Signed-off-by: Marc Zyngier --- include/linux/irqdomain.h | 3 --- kernel/irq/irqdomain.c | 32 -------------------------------- 2 files changed, 35 deletions(-) diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index d2c61de208a8..7a1dd7b969b6 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -415,9 +415,6 @@ static inline unsigned int irq_linear_revmap(struct irq_domain *domain, extern unsigned int irq_find_mapping(struct irq_domain *host, irq_hw_number_t hwirq); extern unsigned int irq_create_direct_mapping(struct irq_domain *host); -extern int irq_create_strict_mappings(struct irq_domain *domain, - unsigned int irq_base, - irq_hw_number_t hwirq_base, int count); extern const struct irq_domain_ops irq_domain_simple_ops; diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index 35c5a99f8884..24a3cefb3afe 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c @@ -703,38 +703,6 @@ unsigned int irq_create_mapping_affinity(struct irq_domain *domain, } EXPORT_SYMBOL_GPL(irq_create_mapping_affinity); -/** - * irq_create_strict_mappings() - Map a range of hw irqs to fixed linux irqs - * @domain: domain owning the interrupt range - * @irq_base: beginning of linux IRQ range - * @hwirq_base: beginning of hardware IRQ range - * @count: Number of interrupts to map - * - * This routine is used for allocating and mapping a range of hardware - * irqs to linux irqs where the linux irq numbers are at pre-defined - * locations. For use by controllers that already have static mappings - * to insert in to the domain. - * - * 0 is returned upon success, while any failure to establish a static - * mapping is treated as an error. - */ -int irq_create_strict_mappings(struct irq_domain *domain, unsigned int irq_base, - irq_hw_number_t hwirq_base, int count) -{ - struct device_node *of_node; - int ret; - - of_node = irq_domain_get_of_node(domain); - ret = irq_alloc_descs(irq_base, irq_base, count, - of_node_to_nid(of_node)); - if (unlikely(ret < 0)) - return ret; - - irq_domain_associate_many(domain, irq_base, hwirq_base, count); - return 0; -} -EXPORT_SYMBOL_GPL(irq_create_strict_mappings); - static int irq_domain_translate(struct irq_domain *d, struct irq_fwspec *fwspec, irq_hw_number_t *hwirq, unsigned int *type) From 817aad5d08d2ee61de7353ecb4593b0df495b12e Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 6 Apr 2021 10:35:55 +0100 Subject: [PATCH 49/53] irqdomain: Drop references to recusive irqdomain setup It was never completely implemented, and was removed a long time ago. Adjust the documentation to reflect this. Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210406093557.1073423-8-maz@kernel.org --- kernel/irq/irqdomain.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index 24a3cefb3afe..6ab428f25a66 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c @@ -1659,12 +1659,10 @@ void irq_domain_free_irqs(unsigned int virq, unsigned int nr_irqs) /** * irq_domain_alloc_irqs_parent - Allocate interrupts from parent domain + * @domain: Domain below which interrupts must be allocated * @irq_base: Base IRQ number * @nr_irqs: Number of IRQs to allocate * @arg: Allocation data (arch/domain specific) - * - * Check whether the domain has been setup recursive. If not allocate - * through the parent domain. */ int irq_domain_alloc_irqs_parent(struct irq_domain *domain, unsigned int irq_base, unsigned int nr_irqs, @@ -1680,11 +1678,9 @@ EXPORT_SYMBOL_GPL(irq_domain_alloc_irqs_parent); /** * irq_domain_free_irqs_parent - Free interrupts from parent domain + * @domain: Domain below which interrupts must be freed * @irq_base: Base IRQ number * @nr_irqs: Number of IRQs to free - * - * Check whether the domain has been setup recursive. If not free - * through the parent domain. */ void irq_domain_free_irqs_parent(struct irq_domain *domain, unsigned int irq_base, unsigned int nr_irqs) From 529ea36818112530791a2ec083a1a3066be6174c Mon Sep 17 00:00:00 2001 From: Thomas Bogendoerfer Date: Thu, 22 Apr 2021 16:53:28 +0200 Subject: [PATCH 50/53] irqchip: Add support for IDT 79rc3243x interrupt controller IDT 79rc3243x SoCs have rather simple interrupt controllers connected to the MIPS CPU interrupt lines. Each of them has room for up to 32 interrupts. Signed-off-by: Thomas Bogendoerfer Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210422145330.73452-1-tsbogend@alpha.franken.de --- drivers/irqchip/Kconfig | 5 ++ drivers/irqchip/Makefile | 1 + drivers/irqchip/irq-idt3243x.c | 124 +++++++++++++++++++++++++++++++++ 3 files changed, 130 insertions(+) create mode 100644 drivers/irqchip/irq-idt3243x.c diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig index 715eb4366e35..18b0d0b33b8b 100644 --- a/drivers/irqchip/Kconfig +++ b/drivers/irqchip/Kconfig @@ -583,4 +583,9 @@ config WPCM450_AIC help Support for the interrupt controller in the Nuvoton WPCM450 BMC SoC. +config IRQ_IDT3243X + bool + select GENERIC_IRQ_CHIP + select IRQ_DOMAIN + endmenu diff --git a/drivers/irqchip/Makefile b/drivers/irqchip/Makefile index bef57937e729..18573602a939 100644 --- a/drivers/irqchip/Makefile +++ b/drivers/irqchip/Makefile @@ -114,3 +114,4 @@ obj-$(CONFIG_MST_IRQ) += irq-mst-intc.o obj-$(CONFIG_SL28CPLD_INTC) += irq-sl28cpld.o obj-$(CONFIG_MACH_REALTEK_RTL) += irq-realtek-rtl.o obj-$(CONFIG_WPCM450_AIC) += irq-wpcm450-aic.o +obj-$(CONFIG_IRQ_IDT3243X) += irq-idt3243x.o diff --git a/drivers/irqchip/irq-idt3243x.c b/drivers/irqchip/irq-idt3243x.c new file mode 100644 index 000000000000..f0996820077a --- /dev/null +++ b/drivers/irqchip/irq-idt3243x.c @@ -0,0 +1,124 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Driver for IDT/Renesas 79RC3243x Interrupt Controller. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include + +#define IDT_PIC_NR_IRQS 32 + +#define IDT_PIC_IRQ_PEND 0x00 +#define IDT_PIC_IRQ_MASK 0x08 + +struct idt_pic_data { + void __iomem *base; + struct irq_domain *irq_domain; + struct irq_chip_generic *gc; +}; + +static void idt_irq_dispatch(struct irq_desc *desc) +{ + struct idt_pic_data *idtpic = irq_desc_get_handler_data(desc); + struct irq_chip *host_chip = irq_desc_get_chip(desc); + u32 pending, hwirq, virq; + + chained_irq_enter(host_chip, desc); + + pending = irq_reg_readl(idtpic->gc, IDT_PIC_IRQ_PEND); + pending &= ~idtpic->gc->mask_cache; + while (pending) { + hwirq = __fls(pending); + virq = irq_linear_revmap(idtpic->irq_domain, hwirq); + if (virq) + generic_handle_irq(virq); + pending &= ~(1 << hwirq); + } + + chained_irq_exit(host_chip, desc); +} + +static int idt_pic_init(struct device_node *of_node, struct device_node *parent) +{ + struct irq_domain *domain; + struct idt_pic_data *idtpic; + struct irq_chip_generic *gc; + struct irq_chip_type *ct; + unsigned int parent_irq; + int ret = 0; + + idtpic = kzalloc(sizeof(*idtpic), GFP_KERNEL); + if (!idtpic) { + ret = -ENOMEM; + goto out_err; + } + + parent_irq = irq_of_parse_and_map(of_node, 0); + if (!parent_irq) { + pr_err("Failed to map parent IRQ!\n"); + ret = -EINVAL; + goto out_free; + } + + idtpic->base = of_iomap(of_node, 0); + if (!idtpic->base) { + pr_err("Failed to map base address!\n"); + ret = -ENOMEM; + goto out_unmap_irq; + } + + domain = irq_domain_add_linear(of_node, IDT_PIC_NR_IRQS, + &irq_generic_chip_ops, NULL); + if (!domain) { + pr_err("Failed to add irqdomain!\n"); + ret = -ENOMEM; + goto out_iounmap; + } + idtpic->irq_domain = domain; + + ret = irq_alloc_domain_generic_chips(domain, 32, 1, "IDTPIC", + handle_level_irq, 0, + IRQ_NOPROBE | IRQ_LEVEL, 0); + if (ret) + goto out_domain_remove; + + gc = irq_get_domain_generic_chip(domain, 0); + gc->reg_base = idtpic->base; + gc->private = idtpic; + + ct = gc->chip_types; + ct->regs.mask = IDT_PIC_IRQ_MASK; + ct->chip.irq_mask = irq_gc_mask_set_bit; + ct->chip.irq_unmask = irq_gc_mask_clr_bit; + idtpic->gc = gc; + + /* Mask interrupts. */ + writel(0xffffffff, idtpic->base + IDT_PIC_IRQ_MASK); + gc->mask_cache = 0xffffffff; + + irq_set_chained_handler_and_data(parent_irq, + idt_irq_dispatch, idtpic); + + return 0; + +out_domain_remove: + irq_domain_remove(domain); +out_iounmap: + iounmap(idtpic->base); +out_unmap_irq: + irq_dispose_mapping(parent_irq); +out_free: + kfree(idtpic); +out_err: + pr_err("Failed to initialize! (errno = %d)\n", ret); + return ret; +} + +IRQCHIP_DECLARE(idt_pic, "idt,32434-pic", idt_pic_init); From 05d7bf817019890e4d049e0b851940c596adbd9b Mon Sep 17 00:00:00 2001 From: Thomas Bogendoerfer Date: Thu, 22 Apr 2021 16:53:29 +0200 Subject: [PATCH 51/53] dt-bindings: interrupt-controller: Add IDT 79RC3243x Interrupt Controller Document DT bindings for IDT 79RC3243x Interrupt Controller. Signed-off-by: Thomas Bogendoerfer Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210422145330.73452-2-tsbogend@alpha.franken.de --- .../interrupt-controller/idt,32434-pic.yaml | 48 +++++++++++++++++++ 1 file changed, 48 insertions(+) create mode 100644 Documentation/devicetree/bindings/interrupt-controller/idt,32434-pic.yaml diff --git a/Documentation/devicetree/bindings/interrupt-controller/idt,32434-pic.yaml b/Documentation/devicetree/bindings/interrupt-controller/idt,32434-pic.yaml new file mode 100644 index 000000000000..df5d8d1ead70 --- /dev/null +++ b/Documentation/devicetree/bindings/interrupt-controller/idt,32434-pic.yaml @@ -0,0 +1,48 @@ +# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/interrupt-controller/idt,32434-pic.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: IDT 79RC32434 Interrupt Controller Device Tree Bindings + +maintainers: + - Thomas Bogendoerfer + +allOf: + - $ref: /schemas/interrupt-controller.yaml# + +properties: + "#interrupt-cells": + const: 1 + + compatible: + const: idt,32434-pic + + reg: + maxItems: 1 + + interrupt-controller: true + +required: + - "#interrupt-cells" + - compatible + - reg + - interrupt-controller + +additionalProperties: false + +examples: + - | + idtpic3: interrupt-controller@3800c { + compatible = "idt,32434-pic"; + reg = <0x3800c 0x0c>; + + interrupt-controller; + #interrupt-cells = <1>; + + interrupt-parent = <&cpuintc>; + interrupts = <3>; + }; + +... From a97709f563a078e259bf0861cd259aa60332890a Mon Sep 17 00:00:00 2001 From: He Ying Date: Fri, 23 Apr 2021 04:35:16 -0400 Subject: [PATCH 52/53] irqchip/gic-v3: Do not enable irqs when handling spurious interrups We triggered the following error while running our 4.19 kernel with the pseudo-NMI patches backported to it: [ 14.816231] ------------[ cut here ]------------ [ 14.816231] kernel BUG at irq.c:99! [ 14.816232] Internal error: Oops - BUG: 0 [#1] SMP [ 14.816232] Process swapper/0 (pid: 0, stack limit = 0x(____ptrval____)) [ 14.816233] CPU: 0 PID: 0 Comm: swapper/0 Tainted: G O 4.19.95.aarch64 #14 [ 14.816233] Hardware name: evb (DT) [ 14.816234] pstate: 80400085 (Nzcv daIf +PAN -UAO) [ 14.816234] pc : asm_nmi_enter+0x94/0x98 [ 14.816235] lr : asm_nmi_enter+0x18/0x98 [ 14.816235] sp : ffff000008003c50 [ 14.816235] pmr_save: 00000070 [ 14.816237] x29: ffff000008003c50 x28: ffff0000095f56c0 [ 14.816238] x27: 0000000000000000 x26: ffff000008004000 [ 14.816239] x25: 00000000015e0000 x24: ffff8008fb916000 [ 14.816240] x23: 0000000020400005 x22: ffff0000080817cc [ 14.816241] x21: ffff000008003da0 x20: 0000000000000060 [ 14.816242] x19: 00000000000003ff x18: ffffffffffffffff [ 14.816243] x17: 0000000000000008 x16: 003d090000000000 [ 14.816244] x15: ffff0000095ea6c8 x14: ffff8008fff5ab40 [ 14.816244] x13: ffff8008fff58b9d x12: 0000000000000000 [ 14.816245] x11: ffff000008c8a200 x10: 000000008e31fca5 [ 14.816246] x9 : ffff000008c8a208 x8 : 000000000000000f [ 14.816247] x7 : 0000000000000004 x6 : ffff8008fff58b9e [ 14.816248] x5 : 0000000000000000 x4 : 0000000080000000 [ 14.816249] x3 : 0000000000000000 x2 : 0000000080000000 [ 14.816250] x1 : 0000000000120000 x0 : ffff0000095f56c0 [ 14.816251] Call trace: [ 14.816251] asm_nmi_enter+0x94/0x98 [ 14.816251] el1_irq+0x8c/0x180 (IRQ C) [ 14.816252] gic_handle_irq+0xbc/0x2e4 [ 14.816252] el1_irq+0xcc/0x180 (IRQ B) [ 14.816253] arch_timer_handler_virt+0x38/0x58 [ 14.816253] handle_percpu_devid_irq+0x90/0x240 [ 14.816253] generic_handle_irq+0x34/0x50 [ 14.816254] __handle_domain_irq+0x68/0xc0 [ 14.816254] gic_handle_irq+0xf8/0x2e4 [ 14.816255] el1_irq+0xcc/0x180 (IRQ A) [ 14.816255] arch_cpu_idle+0x34/0x1c8 [ 14.816255] default_idle_call+0x24/0x44 [ 14.816256] do_idle+0x1d0/0x2c8 [ 14.816256] cpu_startup_entry+0x28/0x30 [ 14.816256] rest_init+0xb8/0xc8 [ 14.816257] start_kernel+0x4c8/0x4f4 [ 14.816257] Code: 940587f1 d5384100 b9401001 36a7fd01 (d4210000) [ 14.816258] Modules linked in: start_dp(O) smeth(O) [ 15.103092] ---[ end trace 701753956cb14aa8 ]--- [ 15.103093] Kernel panic - not syncing: Fatal exception in interrupt [ 15.103099] SMP: stopping secondary CPUs [ 15.103100] Kernel Offset: disabled [ 15.103100] CPU features: 0x36,a2400218 [ 15.103100] Memory Limit: none which is cause by a 'BUG_ON(in_nmi())' in nmi_enter(). From the call trace, we can find three interrupts (noted A, B, C above): interrupt (A) is preempted by (B), which is further interrupted by (C). Subsequent investigations show that (B) results in nmi_enter() being called, but that it actually is a spurious interrupt. Furthermore, interrupts are reenabled in the context of (B), and (C) fires with NMI priority. We end-up with a nested NMI situation, something we definitely do not want to (and cannot) handle. The bug here is that spurious interrupts should never result in any state change, and we should just return to the interrupted context. Moving the handling of spurious interrupts as early as possible in the GICv3 handler fixes this issue. Fixes: 3f1f3234bc2d ("irqchip/gic-v3: Switch to PMR masking before calling IRQ handler") Acked-by: Mark Rutland Signed-off-by: He Ying [maz: rewrote commit message, corrected Fixes: tag] Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210423083516.170111-1-heying24@huawei.com Cc: stable@vger.kernel.org --- drivers/irqchip/irq-gic-v3.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index eb0ee356a629..00404024d7cd 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -648,6 +648,10 @@ static asmlinkage void __exception_irq_entry gic_handle_irq(struct pt_regs *regs irqnr = gic_read_iar(); + /* Check for special IDs first */ + if ((irqnr >= 1020 && irqnr <= 1023)) + return; + if (gic_supports_nmi() && unlikely(gic_read_rpr() == GICD_INT_NMI_PRI)) { gic_handle_nmi(irqnr, regs); @@ -659,10 +663,6 @@ static asmlinkage void __exception_irq_entry gic_handle_irq(struct pt_regs *regs gic_arch_enable_irqs(); } - /* Check for special IDs first */ - if ((irqnr >= 1020 && irqnr <= 1023)) - return; - if (static_branch_likely(&supports_deactivate_key)) gic_write_eoir(irqnr); else From debf69cfd4c618c7036a13cc4edd1faf87ce7d53 Mon Sep 17 00:00:00 2001 From: Robert Hancock Date: Fri, 23 Apr 2021 12:58:53 -0600 Subject: [PATCH 53/53] irqchip/xilinx: Expose Kconfig option for Zynq/ZynqMP Previously the XILINX_INTC config option was hidden and only auto-selected on the MicroBlaze platform. However, this IP can also be used on the Zynq and ZynqMP platforms as a secondary cascaded controller. Allow this option to be user-enabled on those platforms. Signed-off-by: Robert Hancock Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210423185853.2556087-1-robert.hancock@calian.com --- drivers/irqchip/Kconfig | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig index 18b0d0b33b8b..c8f57e3e058d 100644 --- a/drivers/irqchip/Kconfig +++ b/drivers/irqchip/Kconfig @@ -279,8 +279,13 @@ config XTENSA_MX select GENERIC_IRQ_EFFECTIVE_AFF_MASK config XILINX_INTC - bool + bool "Xilinx Interrupt Controller IP" + depends on MICROBLAZE || ARCH_ZYNQ || ARCH_ZYNQMP select IRQ_DOMAIN + help + Support for the Xilinx Interrupt Controller IP core. + This is used as a primary controller with MicroBlaze and can also + be used as a secondary chained controller on other platforms. config IRQ_CROSSBAR bool