x86/asm/entry/irq: Simplify interrupt dispatch table (IDT) layout

Interrupt entry points are handled with the following code,
each 32-byte code block contains seven entry points:

		...
		[push][jump 22] // 4 bytes
		[push][jump 18] // 4 bytes
		[push][jump 14] // 4 bytes
		[push][jump 10] // 4 bytes
		[push][jump  6] // 4 bytes
		[push][jump  2] // 4 bytes
		[push][jump common_interrupt][padding] // 8 bytes

		[push][jump]
		[push][jump]
		[push][jump]
		[push][jump]
		[push][jump]
		[push][jump]
		[push][jump common_interrupt][padding]

		[padding_2]
	common_interrupt:

And there is a table which holds pointers to every entry point,
IOW: to every push.

In cold cache, two jumps are still costlier than one, even
though we get the benefit of them residing in the same
cacheline.

This change replaces short jumps with near ones to
'common_interrupt', and pads every push+jump pair to 8 bytes. This
way, each interrupt takes only one jump.

This change replaces ".p2align CONFIG_X86_L1_CACHE_SHIFT" before
dispatch table with ".align 8" - we do not need anything
stronger than that.

The table of entry addresses (the interrupt[] array) is no
longer necessary, the address of entries can be easily
calculated as (irq_entries_start + i*8).

   text	   data	    bss	    dec	    hex	filename
  12546	      0	      0	  12546	   3102	entry_64.o.before
  11626	      0	      0	  11626	   2d6a	entry_64.o

The size decrease is because 1656 bytes of .init.rodata are
gone. That's initdata, though. The resident size does go up a
bit.

Run-tested (32 and 64 bits).

Acked-and-Tested-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Alexei Starovoitov <ast@plumgrid.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Will Drewry <wad@chromium.org>
Link: http://lkml.kernel.org/r/1428090553-7283-1-git-send-email-dvlasenk@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
Denys Vlasenko 2015-04-03 21:49:13 +02:00 committed by Ingo Molnar
parent fffbb5dcfd
commit 3304c9c37b
5 changed files with 26 additions and 67 deletions

View File

@ -181,10 +181,9 @@ extern __visible void smp_call_function_single_interrupt(struct pt_regs *);
extern __visible void smp_invalidate_interrupt(struct pt_regs *); extern __visible void smp_invalidate_interrupt(struct pt_regs *);
#endif #endif
extern void (*__initconst interrupt[FIRST_SYSTEM_VECTOR extern char irq_entries_start[];
- FIRST_EXTERNAL_VECTOR])(void);
#ifdef CONFIG_TRACING #ifdef CONFIG_TRACING
#define trace_interrupt interrupt #define trace_irq_entries_start irq_entries_start
#endif #endif
#define VECTOR_UNDEFINED (-1) #define VECTOR_UNDEFINED (-1)

View File

@ -723,43 +723,22 @@ END(sysenter_badsys)
.endm .endm
/* /*
* Build the entry stubs and pointer table with some assembler magic. * Build the entry stubs with some assembler magic.
* We pack 7 stubs into a single 32-byte chunk, which will fit in a * We pack 1 stub into every 8-byte block.
* single cache line on all modern x86 implementations.
*/ */
.section .init.rodata,"a" .align 8
ENTRY(interrupt)
.section .entry.text, "ax"
.p2align 5
.p2align CONFIG_X86_L1_CACHE_SHIFT
ENTRY(irq_entries_start) ENTRY(irq_entries_start)
RING0_INT_FRAME RING0_INT_FRAME
vector=FIRST_EXTERNAL_VECTOR vector=FIRST_EXTERNAL_VECTOR
.rept (FIRST_SYSTEM_VECTOR-FIRST_EXTERNAL_VECTOR+6)/7 .rept (FIRST_SYSTEM_VECTOR - FIRST_EXTERNAL_VECTOR)
.balign 32 pushl_cfi $(~vector+0x80) /* Note: always in signed byte range */
.rept 7
.if vector < FIRST_SYSTEM_VECTOR
.if vector <> FIRST_EXTERNAL_VECTOR
CFI_ADJUST_CFA_OFFSET -4
.endif
1: pushl_cfi $(~vector+0x80) /* Note: always in signed byte range */
.if ((vector-FIRST_EXTERNAL_VECTOR)%7) <> 6
jmp 2f
.endif
.previous
.long 1b
.section .entry.text, "ax"
vector=vector+1 vector=vector+1
.endif jmp common_interrupt
.endr CFI_ADJUST_CFA_OFFSET -4
2: jmp common_interrupt .align 8
.endr .endr
END(irq_entries_start) END(irq_entries_start)
.previous
END(interrupt)
.previous
/* /*
* the CPU automatically disables interrupts when executing an IRQ vector, * the CPU automatically disables interrupts when executing an IRQ vector,
* so IRQ-flags tracing has to follow that: * so IRQ-flags tracing has to follow that:

View File

@ -608,44 +608,23 @@ ENTRY(ret_from_fork)
END(ret_from_fork) END(ret_from_fork)
/* /*
* Build the entry stubs and pointer table with some assembler magic. * Build the entry stubs with some assembler magic.
* We pack 7 stubs into a single 32-byte chunk, which will fit in a * We pack 1 stub into every 8-byte block.
* single cache line on all modern x86 implementations.
*/ */
.section .init.rodata,"a" .align 8
ENTRY(interrupt)
.section .entry.text
.p2align 5
.p2align CONFIG_X86_L1_CACHE_SHIFT
ENTRY(irq_entries_start) ENTRY(irq_entries_start)
INTR_FRAME INTR_FRAME
vector=FIRST_EXTERNAL_VECTOR vector=FIRST_EXTERNAL_VECTOR
.rept (FIRST_SYSTEM_VECTOR-FIRST_EXTERNAL_VECTOR+6)/7 .rept (FIRST_SYSTEM_VECTOR - FIRST_EXTERNAL_VECTOR)
.balign 32 pushq_cfi $(~vector+0x80) /* Note: always in signed byte range */
.rept 7
.if vector < FIRST_SYSTEM_VECTOR
.if vector <> FIRST_EXTERNAL_VECTOR
CFI_ADJUST_CFA_OFFSET -8
.endif
1: pushq_cfi $(~vector+0x80) /* Note: always in signed byte range */
.if ((vector-FIRST_EXTERNAL_VECTOR)%7) <> 6
jmp 2f
.endif
.previous
.quad 1b
.section .entry.text
vector=vector+1 vector=vector+1
.endif jmp common_interrupt
.endr CFI_ADJUST_CFA_OFFSET -8
2: jmp common_interrupt .align 8
.endr .endr
CFI_ENDPROC CFI_ENDPROC
END(irq_entries_start) END(irq_entries_start)
.previous
END(interrupt)
.previous
/* /*
* Interrupt entry/exit. * Interrupt entry/exit.
* *

View File

@ -178,7 +178,8 @@ void __init native_init_IRQ(void)
#endif #endif
for_each_clear_bit_from(i, used_vectors, first_system_vector) { for_each_clear_bit_from(i, used_vectors, first_system_vector) {
/* IA32_SYSCALL_VECTOR could be used in trap_init already. */ /* IA32_SYSCALL_VECTOR could be used in trap_init already. */
set_intr_gate(i, interrupt[i - FIRST_EXTERNAL_VECTOR]); set_intr_gate(i, irq_entries_start +
8 * (i - FIRST_EXTERNAL_VECTOR));
} }
#ifdef CONFIG_X86_LOCAL_APIC #ifdef CONFIG_X86_LOCAL_APIC
for_each_clear_bit_from(i, used_vectors, NR_VECTORS) for_each_clear_bit_from(i, used_vectors, NR_VECTORS)

View File

@ -868,7 +868,8 @@ static void __init lguest_init_IRQ(void)
/* Some systems map "vectors" to interrupts weirdly. Not us! */ /* Some systems map "vectors" to interrupts weirdly. Not us! */
__this_cpu_write(vector_irq[i], i - FIRST_EXTERNAL_VECTOR); __this_cpu_write(vector_irq[i], i - FIRST_EXTERNAL_VECTOR);
if (i != SYSCALL_VECTOR) if (i != SYSCALL_VECTOR)
set_intr_gate(i, interrupt[i - FIRST_EXTERNAL_VECTOR]); set_intr_gate(i, irq_entries_start +
8 * (i - FIRST_EXTERNAL_VECTOR));
} }
/* /*