x86, sparseirq: move irq_desc according to smp_affinity, v7
Impact: improve NUMA handling by migrating irq_desc on smp_affinity changes if CONFIG_NUMA_MIGRATE_IRQ_DESC is set: - make irq_desc to go with affinity aka irq_desc moving etc - call move_irq_desc in irq_complete_move() - legacy irq_desc is not moved, because they are allocated via static array for logical apic mode, need to add move_desc_in_progress_in_same_domain, otherwise it will not be moved ==> also could need two phases to get irq_desc moved. Signed-off-by: Yinghai Lu <yinghai@kernel.org> Signed-off-by: Ingo Molnar <mingo@elte.hu>
This commit is contained in:
parent
13bd41bc22
commit
48a1b10aff
|
@ -248,6 +248,15 @@ config SPARSE_IRQ
|
|||
|
||||
If you don't know what to do here, say Y.
|
||||
|
||||
config NUMA_MIGRATE_IRQ_DESC
|
||||
bool "Move irq desc when changing irq smp_affinity"
|
||||
depends on SPARSE_IRQ && SMP
|
||||
default n
|
||||
help
|
||||
This enables moving irq_desc to cpu/node that irq will use handled.
|
||||
|
||||
If you don't know what to do here, say N.
|
||||
|
||||
config X86_FIND_SMP_CONFIG
|
||||
def_bool y
|
||||
depends on X86_MPPARSE || X86_VOYAGER
|
||||
|
|
|
@ -141,6 +141,9 @@ struct irq_cfg {
|
|||
unsigned move_cleanup_count;
|
||||
u8 vector;
|
||||
u8 move_in_progress : 1;
|
||||
#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
|
||||
u8 move_desc_pending : 1;
|
||||
#endif
|
||||
};
|
||||
|
||||
/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
|
||||
|
@ -223,6 +226,121 @@ void arch_init_chip_data(struct irq_desc *desc, int cpu)
|
|||
}
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
|
||||
|
||||
static void
|
||||
init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int cpu)
|
||||
{
|
||||
struct irq_pin_list *old_entry, *head, *tail, *entry;
|
||||
|
||||
cfg->irq_2_pin = NULL;
|
||||
old_entry = old_cfg->irq_2_pin;
|
||||
if (!old_entry)
|
||||
return;
|
||||
|
||||
entry = get_one_free_irq_2_pin(cpu);
|
||||
if (!entry)
|
||||
return;
|
||||
|
||||
entry->apic = old_entry->apic;
|
||||
entry->pin = old_entry->pin;
|
||||
head = entry;
|
||||
tail = entry;
|
||||
old_entry = old_entry->next;
|
||||
while (old_entry) {
|
||||
entry = get_one_free_irq_2_pin(cpu);
|
||||
if (!entry) {
|
||||
entry = head;
|
||||
while (entry) {
|
||||
head = entry->next;
|
||||
kfree(entry);
|
||||
entry = head;
|
||||
}
|
||||
/* still use the old one */
|
||||
return;
|
||||
}
|
||||
entry->apic = old_entry->apic;
|
||||
entry->pin = old_entry->pin;
|
||||
tail->next = entry;
|
||||
tail = entry;
|
||||
old_entry = old_entry->next;
|
||||
}
|
||||
|
||||
tail->next = NULL;
|
||||
cfg->irq_2_pin = head;
|
||||
}
|
||||
|
||||
static void free_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg)
|
||||
{
|
||||
struct irq_pin_list *entry, *next;
|
||||
|
||||
if (old_cfg->irq_2_pin == cfg->irq_2_pin)
|
||||
return;
|
||||
|
||||
entry = old_cfg->irq_2_pin;
|
||||
|
||||
while (entry) {
|
||||
next = entry->next;
|
||||
kfree(entry);
|
||||
entry = next;
|
||||
}
|
||||
old_cfg->irq_2_pin = NULL;
|
||||
}
|
||||
|
||||
void arch_init_copy_chip_data(struct irq_desc *old_desc,
|
||||
struct irq_desc *desc, int cpu)
|
||||
{
|
||||
struct irq_cfg *cfg;
|
||||
struct irq_cfg *old_cfg;
|
||||
|
||||
cfg = get_one_free_irq_cfg(cpu);
|
||||
|
||||
if (!cfg)
|
||||
return;
|
||||
|
||||
desc->chip_data = cfg;
|
||||
|
||||
old_cfg = old_desc->chip_data;
|
||||
|
||||
memcpy(cfg, old_cfg, sizeof(struct irq_cfg));
|
||||
|
||||
init_copy_irq_2_pin(old_cfg, cfg, cpu);
|
||||
}
|
||||
|
||||
static void free_irq_cfg(struct irq_cfg *old_cfg)
|
||||
{
|
||||
kfree(old_cfg);
|
||||
}
|
||||
|
||||
void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc)
|
||||
{
|
||||
struct irq_cfg *old_cfg, *cfg;
|
||||
|
||||
old_cfg = old_desc->chip_data;
|
||||
cfg = desc->chip_data;
|
||||
|
||||
if (old_cfg == cfg)
|
||||
return;
|
||||
|
||||
if (old_cfg) {
|
||||
free_irq_2_pin(old_cfg, cfg);
|
||||
free_irq_cfg(old_cfg);
|
||||
old_desc->chip_data = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
static void set_extra_move_desc(struct irq_desc *desc, cpumask_t mask)
|
||||
{
|
||||
struct irq_cfg *cfg = desc->chip_data;
|
||||
|
||||
if (!cfg->move_in_progress) {
|
||||
/* it means that domain is not changed */
|
||||
if (!cpus_intersects(desc->affinity, mask))
|
||||
cfg->move_desc_pending = 1;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#else
|
||||
static struct irq_cfg *irq_cfg(unsigned int irq)
|
||||
{
|
||||
|
@ -231,9 +349,11 @@ static struct irq_cfg *irq_cfg(unsigned int irq)
|
|||
|
||||
#endif
|
||||
|
||||
#ifndef CONFIG_NUMA_MIGRATE_IRQ_DESC
|
||||
static inline void set_extra_move_desc(struct irq_desc *desc, cpumask_t mask)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
struct io_apic {
|
||||
unsigned int index;
|
||||
|
@ -2346,14 +2466,34 @@ static void irq_complete_move(struct irq_desc **descp)
|
|||
struct irq_cfg *cfg = desc->chip_data;
|
||||
unsigned vector, me;
|
||||
|
||||
if (likely(!cfg->move_in_progress))
|
||||
if (likely(!cfg->move_in_progress)) {
|
||||
#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
|
||||
if (likely(!cfg->move_desc_pending))
|
||||
return;
|
||||
|
||||
/* domain is not change, but affinity is changed */
|
||||
me = smp_processor_id();
|
||||
if (cpu_isset(me, desc->affinity)) {
|
||||
*descp = desc = move_irq_desc(desc, me);
|
||||
/* get the new one */
|
||||
cfg = desc->chip_data;
|
||||
cfg->move_desc_pending = 0;
|
||||
}
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
|
||||
vector = ~get_irq_regs()->orig_ax;
|
||||
me = smp_processor_id();
|
||||
if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) {
|
||||
cpumask_t cleanup_mask;
|
||||
|
||||
#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
|
||||
*descp = desc = move_irq_desc(desc, me);
|
||||
/* get the new one */
|
||||
cfg = desc->chip_data;
|
||||
#endif
|
||||
|
||||
cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
|
||||
cfg->move_cleanup_count = cpus_weight(cleanup_mask);
|
||||
send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
|
||||
|
|
|
@ -227,6 +227,16 @@ extern struct irq_desc *move_irq_desc(struct irq_desc *old_desc, int cpu);
|
|||
|
||||
#endif
|
||||
|
||||
static inline struct irq_desc *
|
||||
irq_remap_to_desc(unsigned int irq, struct irq_desc *desc)
|
||||
{
|
||||
#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
|
||||
return irq_to_desc(irq);
|
||||
#else
|
||||
return desc;
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
* Migration helpers for obsolete names, they will go away:
|
||||
*/
|
||||
|
|
|
@ -3,3 +3,4 @@ obj-y := handle.o manage.o spurious.o resend.o chip.o devres.o
|
|||
obj-$(CONFIG_GENERIC_IRQ_PROBE) += autoprobe.o
|
||||
obj-$(CONFIG_PROC_FS) += proc.o
|
||||
obj-$(CONFIG_GENERIC_PENDING_IRQ) += migration.o
|
||||
obj-$(CONFIG_NUMA_MIGRATE_IRQ_DESC) += numa_migrate.o
|
||||
|
|
|
@ -353,6 +353,7 @@ handle_level_irq(unsigned int irq, struct irq_desc *desc)
|
|||
|
||||
spin_lock(&desc->lock);
|
||||
mask_ack_irq(desc, irq);
|
||||
desc = irq_remap_to_desc(irq, desc);
|
||||
|
||||
if (unlikely(desc->status & IRQ_INPROGRESS))
|
||||
goto out_unlock;
|
||||
|
@ -430,6 +431,7 @@ handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc)
|
|||
desc->status &= ~IRQ_INPROGRESS;
|
||||
out:
|
||||
desc->chip->eoi(irq);
|
||||
desc = irq_remap_to_desc(irq, desc);
|
||||
|
||||
spin_unlock(&desc->lock);
|
||||
}
|
||||
|
@ -466,12 +468,14 @@ handle_edge_irq(unsigned int irq, struct irq_desc *desc)
|
|||
!desc->action)) {
|
||||
desc->status |= (IRQ_PENDING | IRQ_MASKED);
|
||||
mask_ack_irq(desc, irq);
|
||||
desc = irq_remap_to_desc(irq, desc);
|
||||
goto out_unlock;
|
||||
}
|
||||
kstat_incr_irqs_this_cpu(irq, desc);
|
||||
|
||||
/* Start handling the irq */
|
||||
desc->chip->ack(irq);
|
||||
desc = irq_remap_to_desc(irq, desc);
|
||||
|
||||
/* Mark the IRQ currently in progress.*/
|
||||
desc->status |= IRQ_INPROGRESS;
|
||||
|
@ -532,8 +536,10 @@ handle_percpu_irq(unsigned int irq, struct irq_desc *desc)
|
|||
if (!noirqdebug)
|
||||
note_interrupt(irq, desc, action_ret);
|
||||
|
||||
if (desc->chip->eoi)
|
||||
if (desc->chip->eoi) {
|
||||
desc->chip->eoi(irq);
|
||||
desc = irq_remap_to_desc(irq, desc);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -568,8 +574,10 @@ __set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained,
|
|||
|
||||
/* Uninstall? */
|
||||
if (handle == handle_bad_irq) {
|
||||
if (desc->chip != &no_irq_chip)
|
||||
if (desc->chip != &no_irq_chip) {
|
||||
mask_ack_irq(desc, irq);
|
||||
desc = irq_remap_to_desc(irq, desc);
|
||||
}
|
||||
desc->status |= IRQ_DISABLED;
|
||||
desc->depth = 1;
|
||||
}
|
||||
|
|
|
@ -23,7 +23,7 @@
|
|||
/*
|
||||
* lockdep: we want to handle all irq_desc locks as a single lock-class:
|
||||
*/
|
||||
static struct lock_class_key irq_desc_lock_class;
|
||||
struct lock_class_key irq_desc_lock_class;
|
||||
|
||||
/**
|
||||
* handle_bad_irq - handle spurious and unhandled irqs
|
||||
|
@ -73,7 +73,7 @@ static struct irq_desc irq_desc_init = {
|
|||
#endif
|
||||
};
|
||||
|
||||
static void init_kstat_irqs(struct irq_desc *desc, int cpu, int nr)
|
||||
void init_kstat_irqs(struct irq_desc *desc, int cpu, int nr)
|
||||
{
|
||||
unsigned long bytes;
|
||||
char *ptr;
|
||||
|
@ -113,7 +113,7 @@ static void init_one_irq_desc(int irq, struct irq_desc *desc, int cpu)
|
|||
/*
|
||||
* Protect the sparse_irqs:
|
||||
*/
|
||||
static DEFINE_SPINLOCK(sparse_irq_lock);
|
||||
DEFINE_SPINLOCK(sparse_irq_lock);
|
||||
|
||||
struct irq_desc *irq_desc_ptrs[NR_IRQS] __read_mostly;
|
||||
|
||||
|
@ -337,8 +337,11 @@ unsigned int __do_IRQ(unsigned int irq)
|
|||
/*
|
||||
* No locking required for CPU-local interrupts:
|
||||
*/
|
||||
if (desc->chip->ack)
|
||||
if (desc->chip->ack) {
|
||||
desc->chip->ack(irq);
|
||||
/* get new one */
|
||||
desc = irq_remap_to_desc(irq, desc);
|
||||
}
|
||||
if (likely(!(desc->status & IRQ_DISABLED))) {
|
||||
action_ret = handle_IRQ_event(irq, desc->action);
|
||||
if (!noirqdebug)
|
||||
|
@ -349,8 +352,10 @@ unsigned int __do_IRQ(unsigned int irq)
|
|||
}
|
||||
|
||||
spin_lock(&desc->lock);
|
||||
if (desc->chip->ack)
|
||||
if (desc->chip->ack) {
|
||||
desc->chip->ack(irq);
|
||||
desc = irq_remap_to_desc(irq, desc);
|
||||
}
|
||||
/*
|
||||
* REPLAY is when Linux resends an IRQ that was dropped earlier
|
||||
* WAITING is used by probe to mark irqs that are being tested
|
||||
|
|
|
@ -13,6 +13,11 @@ extern void compat_irq_chip_set_default_handler(struct irq_desc *desc);
|
|||
extern int __irq_set_trigger(struct irq_desc *desc, unsigned int irq,
|
||||
unsigned long flags);
|
||||
|
||||
extern struct lock_class_key irq_desc_lock_class;
|
||||
extern void init_kstat_irqs(struct irq_desc *desc, int cpu, int nr);
|
||||
extern spinlock_t sparse_irq_lock;
|
||||
extern struct irq_desc *irq_desc_ptrs[NR_IRQS];
|
||||
|
||||
#ifdef CONFIG_PROC_FS
|
||||
extern void register_irq_proc(unsigned int irq, struct irq_desc *desc);
|
||||
extern void register_handler_proc(unsigned int irq, struct irqaction *action);
|
||||
|
|
|
@ -0,0 +1,127 @@
|
|||
/*
|
||||
* linux/kernel/irq/handle.c
|
||||
*
|
||||
* Copyright (C) 1992, 1998-2006 Linus Torvalds, Ingo Molnar
|
||||
* Copyright (C) 2005-2006, Thomas Gleixner, Russell King
|
||||
*
|
||||
* This file contains the core interrupt handling code.
|
||||
*
|
||||
* Detailed information is available in Documentation/DocBook/genericirq
|
||||
*
|
||||
*/
|
||||
|
||||
#include <linux/irq.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/random.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/kernel_stat.h>
|
||||
|
||||
#include "internals.h"
|
||||
|
||||
static void init_copy_kstat_irqs(struct irq_desc *old_desc,
|
||||
struct irq_desc *desc,
|
||||
int cpu, int nr)
|
||||
{
|
||||
unsigned long bytes;
|
||||
|
||||
init_kstat_irqs(desc, cpu, nr);
|
||||
|
||||
if (desc->kstat_irqs != old_desc->kstat_irqs) {
|
||||
/* Compute how many bytes we need per irq and allocate them */
|
||||
bytes = nr * sizeof(unsigned int);
|
||||
|
||||
memcpy(desc->kstat_irqs, old_desc->kstat_irqs, bytes);
|
||||
}
|
||||
}
|
||||
|
||||
static void free_kstat_irqs(struct irq_desc *old_desc, struct irq_desc *desc)
|
||||
{
|
||||
if (old_desc->kstat_irqs == desc->kstat_irqs)
|
||||
return;
|
||||
|
||||
kfree(old_desc->kstat_irqs);
|
||||
old_desc->kstat_irqs = NULL;
|
||||
}
|
||||
|
||||
static void init_copy_one_irq_desc(int irq, struct irq_desc *old_desc,
|
||||
struct irq_desc *desc, int cpu)
|
||||
{
|
||||
memcpy(desc, old_desc, sizeof(struct irq_desc));
|
||||
desc->cpu = cpu;
|
||||
lockdep_set_class(&desc->lock, &irq_desc_lock_class);
|
||||
init_copy_kstat_irqs(old_desc, desc, cpu, nr_cpu_ids);
|
||||
arch_init_copy_chip_data(old_desc, desc, cpu);
|
||||
}
|
||||
|
||||
static void free_one_irq_desc(struct irq_desc *old_desc, struct irq_desc *desc)
|
||||
{
|
||||
free_kstat_irqs(old_desc, desc);
|
||||
arch_free_chip_data(old_desc, desc);
|
||||
}
|
||||
|
||||
static struct irq_desc *__real_move_irq_desc(struct irq_desc *old_desc,
|
||||
int cpu)
|
||||
{
|
||||
struct irq_desc *desc;
|
||||
unsigned int irq;
|
||||
unsigned long flags;
|
||||
int node;
|
||||
|
||||
irq = old_desc->irq;
|
||||
|
||||
spin_lock_irqsave(&sparse_irq_lock, flags);
|
||||
|
||||
/* We have to check it to avoid races with another CPU */
|
||||
desc = irq_desc_ptrs[irq];
|
||||
|
||||
if (desc && old_desc != desc)
|
||||
goto out_unlock;
|
||||
|
||||
node = cpu_to_node(cpu);
|
||||
desc = kzalloc_node(sizeof(*desc), GFP_ATOMIC, node);
|
||||
printk(KERN_DEBUG " move irq_desc for %d to cpu %d node %d\n",
|
||||
irq, cpu, node);
|
||||
if (!desc) {
|
||||
printk(KERN_ERR "can not get new irq_desc for moving\n");
|
||||
/* still use old one */
|
||||
desc = old_desc;
|
||||
goto out_unlock;
|
||||
}
|
||||
init_copy_one_irq_desc(irq, old_desc, desc, cpu);
|
||||
|
||||
irq_desc_ptrs[irq] = desc;
|
||||
|
||||
/* free the old one */
|
||||
free_one_irq_desc(old_desc, desc);
|
||||
kfree(old_desc);
|
||||
|
||||
out_unlock:
|
||||
spin_unlock_irqrestore(&sparse_irq_lock, flags);
|
||||
|
||||
return desc;
|
||||
}
|
||||
|
||||
struct irq_desc *move_irq_desc(struct irq_desc *desc, int cpu)
|
||||
{
|
||||
int old_cpu;
|
||||
int node, old_node;
|
||||
|
||||
/* those all static, do move them */
|
||||
if (desc->irq < NR_IRQS_LEGACY)
|
||||
return desc;
|
||||
|
||||
old_cpu = desc->cpu;
|
||||
printk(KERN_DEBUG
|
||||
"try to move irq_desc from cpu %d to %d\n", old_cpu, cpu);
|
||||
if (old_cpu != cpu) {
|
||||
node = cpu_to_node(cpu);
|
||||
old_node = cpu_to_node(old_cpu);
|
||||
if (old_node != node)
|
||||
desc = __real_move_irq_desc(desc, cpu);
|
||||
else
|
||||
desc->cpu = cpu;
|
||||
}
|
||||
|
||||
return desc;
|
||||
}
|
||||
|
Loading…
Reference in New Issue