2009-04-29 15:47:18 +08:00
|
|
|
/*
|
|
|
|
* ld script for the x86 kernel
|
|
|
|
*
|
|
|
|
* Historic 32-bit version written by Martin Mares <mj@atrey.karlin.mff.cuni.cz>
|
|
|
|
*
|
2009-04-29 16:58:38 +08:00
|
|
|
* Modernisation, unification and other changes and fixes:
|
|
|
|
* Copyright (C) 2007-2009 Sam Ravnborg <sam@ravnborg.org>
|
2009-04-29 15:47:18 +08:00
|
|
|
*
|
|
|
|
*
|
|
|
|
* Don't define absolute symbols until and unless you know that symbol
|
|
|
|
* value is should remain constant even if kernel image is relocated
|
|
|
|
* at run time. Absolute symbols are not relocated. If symbol value should
|
|
|
|
* change if kernel is relocated, make the symbol section relative and
|
|
|
|
* put it inside the section definition.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifdef CONFIG_X86_32
|
|
|
|
#define LOAD_OFFSET __PAGE_OFFSET
|
|
|
|
#else
|
|
|
|
#define LOAD_OFFSET __START_KERNEL_map
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#include <asm-generic/vmlinux.lds.h>
|
|
|
|
#include <asm/asm-offsets.h>
|
|
|
|
#include <asm/thread_info.h>
|
|
|
|
#include <asm/page_types.h>
|
|
|
|
#include <asm/cache.h>
|
|
|
|
#include <asm/boot.h>
|
|
|
|
|
|
|
|
#undef i386 /* in case the preprocessor is a 32bit one */
|
|
|
|
|
|
|
|
OUTPUT_FORMAT(CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT)
|
|
|
|
|
|
|
|
#ifdef CONFIG_X86_32
|
|
|
|
OUTPUT_ARCH(i386)
|
|
|
|
ENTRY(phys_startup_32)
|
2011-01-19 17:09:42 +08:00
|
|
|
jiffies = jiffies_64;
|
2009-04-29 15:47:18 +08:00
|
|
|
#else
|
|
|
|
OUTPUT_ARCH(i386:x86-64)
|
|
|
|
ENTRY(phys_startup_64)
|
2011-01-19 17:09:42 +08:00
|
|
|
jiffies_64 = jiffies;
|
2009-04-29 15:47:18 +08:00
|
|
|
#endif
|
|
|
|
|
2016-02-18 06:41:14 +08:00
|
|
|
#if defined(CONFIG_X86_64)
|
2009-10-19 21:12:04 +08:00
|
|
|
/*
|
2016-02-18 06:41:14 +08:00
|
|
|
* On 64-bit, align RODATA to 2MB so we retain large page mappings for
|
|
|
|
* boundaries spanning kernel text, rodata and data sections.
|
2009-10-19 21:12:04 +08:00
|
|
|
*
|
|
|
|
* However, kernel identity mappings will have different RWX permissions
|
|
|
|
* to the pages mapping to text and to the pages padding (which are freed) the
|
|
|
|
* text section. Hence kernel identity mappings will be broken to smaller
|
|
|
|
* pages. For 64-bit, kernel text and kernel identity mappings are different,
|
2016-02-18 06:41:14 +08:00
|
|
|
* so we can enable protection checks as well as retain 2MB large page
|
|
|
|
* mappings for kernel text.
|
2009-10-19 21:12:04 +08:00
|
|
|
*/
|
2016-02-18 06:41:14 +08:00
|
|
|
#define X64_ALIGN_RODATA_BEGIN . = ALIGN(HPAGE_SIZE);
|
2009-10-15 05:46:56 +08:00
|
|
|
|
2016-02-18 06:41:14 +08:00
|
|
|
#define X64_ALIGN_RODATA_END \
|
2009-10-15 05:46:56 +08:00
|
|
|
. = ALIGN(HPAGE_SIZE); \
|
|
|
|
__end_rodata_hpage_align = .;
|
|
|
|
|
|
|
|
#else
|
|
|
|
|
2016-02-18 06:41:14 +08:00
|
|
|
#define X64_ALIGN_RODATA_BEGIN
|
|
|
|
#define X64_ALIGN_RODATA_END
|
2009-10-15 05:46:56 +08:00
|
|
|
|
|
|
|
#endif
|
|
|
|
|
2009-04-29 15:47:19 +08:00
|
|
|
PHDRS {
|
|
|
|
text PT_LOAD FLAGS(5); /* R_E */
|
2010-11-17 05:31:26 +08:00
|
|
|
data PT_LOAD FLAGS(6); /* RW_ */
|
2009-04-29 15:47:19 +08:00
|
|
|
#ifdef CONFIG_X86_64
|
|
|
|
#ifdef CONFIG_SMP
|
2009-09-04 16:18:07 +08:00
|
|
|
percpu PT_LOAD FLAGS(6); /* RW_ */
|
2009-04-29 15:47:19 +08:00
|
|
|
#endif
|
2009-08-25 21:50:53 +08:00
|
|
|
init PT_LOAD FLAGS(7); /* RWE */
|
2009-04-29 15:47:19 +08:00
|
|
|
#endif
|
|
|
|
note PT_NOTE FLAGS(0); /* ___ */
|
|
|
|
}
|
2009-04-29 15:47:18 +08:00
|
|
|
|
2009-04-29 15:47:20 +08:00
|
|
|
SECTIONS
|
|
|
|
{
|
|
|
|
#ifdef CONFIG_X86_32
|
x86/kallsyms: fix GOLD link failure with new relative kallsyms table format
Commit 2213e9a66bb8 ("kallsyms: add support for relative offsets in
kallsyms address table") changed the default kallsyms symbol table
format to use relative references rather than absolute addresses.
This reduces the size of the kallsyms symbol table by 50% on 64-bit
architectures, and further reduces the size of the relocation tables
used by relocatable kernels. Since the memory footprint of the static
kernel image is always much smaller than 4 GB, these relative references
are assumed to be representable in 32 bits, even when the native word
size is 64 bits.
On 64-bit architectures, this obviously only works if the distance
between each relative reference and the chosen anchor point is
representable in 32 bits, and so the table generation code in
scripts/kallsyms.c scans the table for the lowest value that is covered
by the kernel text, and selects it as the anchor point.
However, when using the GOLD linker rather than the default BFD linker
to build the x86_64 kernel, the symbol phys_offset_64, which is the
result of arithmetic defined in the linker script, is emitted as a 'T'
rather than an 'A' type symbol, resulting in scripts/kallsyms.c to
mistake it for a suitable anchor point, even though it is far away from
the actual kernel image in the virtual address space. This results in
out-of-range warnings from scripts/kallsyms.c and a broken build.
So let's align with the BFD linker, and emit the phys_offset_[32|64]
symbols as absolute symbols explicitly. Note that the out of range
issue does not exist on 32-bit x86, but this patch changes both symbols
for symmetry.
Reported-by: Markus Trippelsdorf <markus@trippelsdorf.de>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Kees Cook <keescook@chromium.org>
Cc: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-03-18 17:04:37 +08:00
|
|
|
. = LOAD_OFFSET + LOAD_PHYSICAL_ADDR;
|
|
|
|
phys_startup_32 = ABSOLUTE(startup_32 - LOAD_OFFSET);
|
2009-04-29 15:47:20 +08:00
|
|
|
#else
|
x86/kallsyms: fix GOLD link failure with new relative kallsyms table format
Commit 2213e9a66bb8 ("kallsyms: add support for relative offsets in
kallsyms address table") changed the default kallsyms symbol table
format to use relative references rather than absolute addresses.
This reduces the size of the kallsyms symbol table by 50% on 64-bit
architectures, and further reduces the size of the relocation tables
used by relocatable kernels. Since the memory footprint of the static
kernel image is always much smaller than 4 GB, these relative references
are assumed to be representable in 32 bits, even when the native word
size is 64 bits.
On 64-bit architectures, this obviously only works if the distance
between each relative reference and the chosen anchor point is
representable in 32 bits, and so the table generation code in
scripts/kallsyms.c scans the table for the lowest value that is covered
by the kernel text, and selects it as the anchor point.
However, when using the GOLD linker rather than the default BFD linker
to build the x86_64 kernel, the symbol phys_offset_64, which is the
result of arithmetic defined in the linker script, is emitted as a 'T'
rather than an 'A' type symbol, resulting in scripts/kallsyms.c to
mistake it for a suitable anchor point, even though it is far away from
the actual kernel image in the virtual address space. This results in
out-of-range warnings from scripts/kallsyms.c and a broken build.
So let's align with the BFD linker, and emit the phys_offset_[32|64]
symbols as absolute symbols explicitly. Note that the out of range
issue does not exist on 32-bit x86, but this patch changes both symbols
for symmetry.
Reported-by: Markus Trippelsdorf <markus@trippelsdorf.de>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Kees Cook <keescook@chromium.org>
Cc: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-03-18 17:04:37 +08:00
|
|
|
. = __START_KERNEL;
|
|
|
|
phys_startup_64 = ABSOLUTE(startup_64 - LOAD_OFFSET);
|
2009-04-29 15:47:20 +08:00
|
|
|
#endif
|
|
|
|
|
2009-04-29 15:47:21 +08:00
|
|
|
/* Text and read-only data */
|
|
|
|
.text : AT(ADDR(.text) - LOAD_OFFSET) {
|
2009-09-17 04:44:28 +08:00
|
|
|
_text = .;
|
|
|
|
/* bootstrapping code */
|
|
|
|
HEAD_TEXT
|
2009-04-29 15:47:21 +08:00
|
|
|
. = ALIGN(8);
|
|
|
|
_stext = .;
|
|
|
|
TEXT_TEXT
|
|
|
|
SCHED_TEXT
|
2016-10-08 08:02:55 +08:00
|
|
|
CPUIDLE_TEXT
|
2009-04-29 15:47:21 +08:00
|
|
|
LOCK_TEXT
|
|
|
|
KPROBES_TEXT
|
2011-03-08 02:10:39 +08:00
|
|
|
ENTRY_TEXT
|
2009-04-29 15:47:21 +08:00
|
|
|
IRQENTRY_TEXT
|
2016-03-26 05:22:05 +08:00
|
|
|
SOFTIRQENTRY_TEXT
|
2009-04-29 15:47:21 +08:00
|
|
|
*(.fixup)
|
|
|
|
*(.gnu.warning)
|
|
|
|
/* End of text section */
|
|
|
|
_etext = .;
|
|
|
|
} :text = 0x9090
|
|
|
|
|
|
|
|
NOTES :text :note
|
|
|
|
|
2009-09-17 04:44:30 +08:00
|
|
|
EXCEPTION_TABLE(16) :text = 0x9090
|
2009-04-29 15:47:22 +08:00
|
|
|
|
2010-11-17 05:31:26 +08:00
|
|
|
/* .text should occupy whole number of pages */
|
|
|
|
. = ALIGN(PAGE_SIZE);
|
2016-02-18 06:41:14 +08:00
|
|
|
X64_ALIGN_RODATA_BEGIN
|
2009-08-25 21:50:53 +08:00
|
|
|
RO_DATA(PAGE_SIZE)
|
2016-02-18 06:41:14 +08:00
|
|
|
X64_ALIGN_RODATA_END
|
2009-04-29 15:47:22 +08:00
|
|
|
|
2009-04-29 15:47:23 +08:00
|
|
|
/* Data */
|
|
|
|
.data : AT(ADDR(.data) - LOAD_OFFSET) {
|
2009-05-11 20:22:00 +08:00
|
|
|
/* Start of data section */
|
|
|
|
_sdata = .;
|
2009-08-25 21:50:53 +08:00
|
|
|
|
|
|
|
/* init_task */
|
|
|
|
INIT_TASK_DATA(THREAD_SIZE)
|
2009-04-29 15:47:23 +08:00
|
|
|
|
|
|
|
#ifdef CONFIG_X86_32
|
2009-08-25 21:50:53 +08:00
|
|
|
/* 32 bit has nosave before _edata */
|
|
|
|
NOSAVE_DATA
|
2009-04-29 15:47:23 +08:00
|
|
|
#endif
|
|
|
|
|
2009-08-25 21:50:53 +08:00
|
|
|
PAGE_ALIGNED_DATA(PAGE_SIZE)
|
2009-04-29 15:47:23 +08:00
|
|
|
|
2009-11-13 19:54:40 +08:00
|
|
|
CACHELINE_ALIGNED_DATA(L1_CACHE_BYTES)
|
2009-04-29 15:47:23 +08:00
|
|
|
|
2009-08-25 21:50:53 +08:00
|
|
|
DATA_DATA
|
|
|
|
CONSTRUCTORS
|
|
|
|
|
|
|
|
/* rarely changed data like cpu maps */
|
2009-11-13 19:54:40 +08:00
|
|
|
READ_MOSTLY_DATA(INTERNODE_CACHE_BYTES)
|
2009-04-29 15:47:23 +08:00
|
|
|
|
|
|
|
/* End of data section */
|
|
|
|
_edata = .;
|
2009-08-25 21:50:53 +08:00
|
|
|
} :data
|
2009-04-29 15:47:23 +08:00
|
|
|
|
2009-04-29 15:47:24 +08:00
|
|
|
|
2011-08-03 21:31:50 +08:00
|
|
|
. = ALIGN(PAGE_SIZE);
|
|
|
|
__vvar_page = .;
|
|
|
|
|
|
|
|
.vvar : AT(ADDR(.vvar) - LOAD_OFFSET) {
|
2011-08-03 21:31:51 +08:00
|
|
|
/* work around gold bug 13023 */
|
|
|
|
__vvar_beginning_hack = .;
|
2011-08-03 21:31:50 +08:00
|
|
|
|
2011-08-03 21:31:51 +08:00
|
|
|
/* Place all vvars at the offsets in asm/vvar.h. */
|
|
|
|
#define EMIT_VVAR(name, offset) \
|
|
|
|
. = __vvar_beginning_hack + offset; \
|
2011-08-03 21:31:50 +08:00
|
|
|
*(.vvar_ ## name)
|
|
|
|
#define __VVAR_KERNEL_LDS
|
|
|
|
#include <asm/vvar.h>
|
|
|
|
#undef __VVAR_KERNEL_LDS
|
|
|
|
#undef EMIT_VVAR
|
|
|
|
|
2014-03-18 06:22:11 +08:00
|
|
|
/*
|
|
|
|
* Pad the rest of the page with zeros. Otherwise the loader
|
|
|
|
* can leave garbage here.
|
|
|
|
*/
|
|
|
|
. = __vvar_beginning_hack + PAGE_SIZE;
|
2011-08-03 21:31:50 +08:00
|
|
|
} :data
|
|
|
|
|
|
|
|
. = ALIGN(__vvar_page + PAGE_SIZE, PAGE_SIZE);
|
|
|
|
|
2009-08-25 21:50:53 +08:00
|
|
|
/* Init code and data - will be freed after init */
|
|
|
|
. = ALIGN(PAGE_SIZE);
|
|
|
|
.init.begin : AT(ADDR(.init.begin) - LOAD_OFFSET) {
|
|
|
|
__init_begin = .; /* paired with __init_end */
|
2009-04-29 15:47:25 +08:00
|
|
|
}
|
|
|
|
|
2009-08-25 21:50:53 +08:00
|
|
|
#if defined(CONFIG_X86_64) && defined(CONFIG_SMP)
|
2009-04-29 15:47:25 +08:00
|
|
|
/*
|
2009-08-25 21:50:53 +08:00
|
|
|
* percpu offsets are zero-based on SMP. PERCPU_VADDR() changes the
|
|
|
|
* output PHDR, so the next output section - .init.text - should
|
|
|
|
* start another segment - init.
|
2009-04-29 15:47:25 +08:00
|
|
|
*/
|
2011-01-25 21:26:50 +08:00
|
|
|
PERCPU_VADDR(INTERNODE_CACHE_BYTES, 0, :percpu)
|
2014-11-04 16:50:48 +08:00
|
|
|
ASSERT(SIZEOF(.data..percpu) < CONFIG_PHYSICAL_START,
|
|
|
|
"per-CPU data too large - increase CONFIG_PHYSICAL_START")
|
2009-08-25 21:50:53 +08:00
|
|
|
#endif
|
2009-04-29 15:47:25 +08:00
|
|
|
|
2009-09-17 04:44:30 +08:00
|
|
|
INIT_TEXT_SECTION(PAGE_SIZE)
|
2009-08-25 21:50:53 +08:00
|
|
|
#ifdef CONFIG_X86_64
|
|
|
|
:init
|
|
|
|
#endif
|
2009-04-29 15:47:25 +08:00
|
|
|
|
2016-01-27 05:12:07 +08:00
|
|
|
/*
|
|
|
|
* Section for code used exclusively before alternatives are run. All
|
|
|
|
* references to such code must be patched out by alternatives, normally
|
|
|
|
* by using X86_FEATURE_ALWAYS CPU feature bit.
|
|
|
|
*
|
|
|
|
* See static_cpu_has() for an example.
|
|
|
|
*/
|
|
|
|
.altinstr_aux : AT(ADDR(.altinstr_aux) - LOAD_OFFSET) {
|
|
|
|
*(.altinstr_aux)
|
|
|
|
}
|
|
|
|
|
2009-09-17 04:44:30 +08:00
|
|
|
INIT_DATA_SECTION(16)
|
2009-04-29 15:47:25 +08:00
|
|
|
|
|
|
|
.x86_cpu_dev.init : AT(ADDR(.x86_cpu_dev.init) - LOAD_OFFSET) {
|
|
|
|
__x86_cpu_dev_start = .;
|
|
|
|
*(.x86_cpu_dev.init)
|
|
|
|
__x86_cpu_dev_end = .;
|
|
|
|
}
|
|
|
|
|
2013-10-18 06:35:35 +08:00
|
|
|
#ifdef CONFIG_X86_INTEL_MID
|
|
|
|
.x86_intel_mid_dev.init : AT(ADDR(.x86_intel_mid_dev.init) - \
|
|
|
|
LOAD_OFFSET) {
|
|
|
|
__x86_intel_mid_dev_start = .;
|
|
|
|
*(.x86_intel_mid_dev.init)
|
|
|
|
__x86_intel_mid_dev_end = .;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2010-08-28 02:19:33 +08:00
|
|
|
/*
|
|
|
|
* start address and size of operations which during runtime
|
|
|
|
* can be patched with virtualization friendly instructions or
|
|
|
|
* baremetal native ones. Think page table operations.
|
|
|
|
* Details in paravirt_types.h
|
|
|
|
*/
|
2009-04-29 15:47:26 +08:00
|
|
|
. = ALIGN(8);
|
|
|
|
.parainstructions : AT(ADDR(.parainstructions) - LOAD_OFFSET) {
|
|
|
|
__parainstructions = .;
|
|
|
|
*(.parainstructions)
|
|
|
|
__parainstructions_end = .;
|
|
|
|
}
|
|
|
|
|
2010-08-28 02:19:33 +08:00
|
|
|
/*
|
|
|
|
* struct alt_inst entries. From the header (alternative.h):
|
|
|
|
* "Alternative instructions for different CPU types or capabilities"
|
|
|
|
* Think locking instructions on spinlocks.
|
|
|
|
*/
|
2009-04-29 15:47:26 +08:00
|
|
|
. = ALIGN(8);
|
|
|
|
.altinstructions : AT(ADDR(.altinstructions) - LOAD_OFFSET) {
|
|
|
|
__alt_instructions = .;
|
|
|
|
*(.altinstructions)
|
|
|
|
__alt_instructions_end = .;
|
|
|
|
}
|
|
|
|
|
2010-08-28 02:19:33 +08:00
|
|
|
/*
|
|
|
|
* And here are the replacement instructions. The linker sticks
|
|
|
|
* them as binary blobs. The .altinstructions has enough data to
|
|
|
|
* get the address and the length of them to patch the kernel safely.
|
|
|
|
*/
|
2009-04-29 15:47:26 +08:00
|
|
|
.altinstr_replacement : AT(ADDR(.altinstr_replacement) - LOAD_OFFSET) {
|
|
|
|
*(.altinstr_replacement)
|
|
|
|
}
|
|
|
|
|
2010-08-28 02:19:33 +08:00
|
|
|
/*
|
|
|
|
* struct iommu_table_entry entries are injected in this section.
|
|
|
|
* It is an array of IOMMUs which during run time gets sorted depending
|
|
|
|
* on its dependency order. After rootfs_initcall is complete
|
|
|
|
* this section can be safely removed.
|
|
|
|
*/
|
x86, iommu: Add IOMMU_INIT macros, .iommu_table section, and iommu_table_entry structure
This patch set adds a mechanism to "modularize" the IOMMUs we have
on X86. Currently the count of IOMMUs is up to six and they have a complex
relationship that requires careful execution order. 'pci_iommu_alloc'
does that today, but most folks are unhappy with how it does it.
This patch set addresses this and also paves a mechanism to jettison
unused IOMMUs during run-time. For details that sparked this, please
refer to: http://lkml.org/lkml/2010/8/2/282
The first solution that comes to mind is to convert wholesale
the IOMMU detection routines to be called during initcall
time frame. Unfortunately that misses the dependency relationship
that some of the IOMMUs have (for example: for AMD-Vi IOMMU to work,
GART detection MUST run first, and before all of that SWIOTLB MUST run).
The second solution would be to introduce a registration call wherein
the IOMMU would provide its detection/init routines and as well on what
MUST run before it. That would work, except that the 'pci_iommu_alloc'
which would run through this list, is called during mem_init. This means we
don't have any memory allocator, and it is so early that we haven't yet
started running through the initcall_t list.
This solution borrows concepts from the 2nd idea and from how
MODULE_INIT works. A macro is provided that each IOMMU uses to define
it's detect function and early_init (before the memory allocate is
active), and as well what other IOMMU MUST run before us. Since most IOMMUs
depend on having SWIOTLB run first ("pci_swiotlb_detect") a convenience macro
to depends on that is also provided.
This macro is similar in design to MODULE_PARAM macro wherein
we setup a .iommu_table section in which we populate it with the values
that match a struct iommu_table_entry. During bootup we will sort
through the array so that the IOMMUs that MUST run before us are first
elements in the array. And then we just iterate through them calling the
detection routine and if appropiate, the init routines.
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
LKML-Reference: <1282845485-8991-2-git-send-email-konrad.wilk@oracle.com>
CC: H. Peter Anvin <hpa@zytor.com>
CC: Fujita Tomonori <fujita.tomonori@lab.ntt.co.jp>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Ingo Molnar <mingo@redhat.com>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
2010-08-27 01:57:56 +08:00
|
|
|
.iommu_table : AT(ADDR(.iommu_table) - LOAD_OFFSET) {
|
|
|
|
__iommu_table = .;
|
|
|
|
*(.iommu_table)
|
|
|
|
__iommu_table_end = .;
|
|
|
|
}
|
2011-02-15 07:34:57 +08:00
|
|
|
|
2011-05-21 08:51:17 +08:00
|
|
|
. = ALIGN(8);
|
|
|
|
.apicdrivers : AT(ADDR(.apicdrivers) - LOAD_OFFSET) {
|
|
|
|
__apicdrivers = .;
|
|
|
|
*(.apicdrivers);
|
|
|
|
__apicdrivers_end = .;
|
|
|
|
}
|
|
|
|
|
2010-08-31 02:10:02 +08:00
|
|
|
. = ALIGN(8);
|
2009-04-29 15:47:27 +08:00
|
|
|
/*
|
|
|
|
* .exit.text is discard at runtime, not link time, to deal with
|
|
|
|
* references from .altinstructions and .eh_frame
|
|
|
|
*/
|
|
|
|
.exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET) {
|
|
|
|
EXIT_TEXT
|
|
|
|
}
|
|
|
|
|
|
|
|
.exit.data : AT(ADDR(.exit.data) - LOAD_OFFSET) {
|
|
|
|
EXIT_DATA
|
|
|
|
}
|
|
|
|
|
2009-08-25 21:50:53 +08:00
|
|
|
#if !defined(CONFIG_X86_64) || !defined(CONFIG_SMP)
|
2011-03-25 01:50:09 +08:00
|
|
|
PERCPU_SECTION(INTERNODE_CACHE_BYTES)
|
2009-04-29 15:47:28 +08:00
|
|
|
#endif
|
|
|
|
|
|
|
|
. = ALIGN(PAGE_SIZE);
|
2009-04-29 18:56:58 +08:00
|
|
|
|
2009-04-29 15:47:28 +08:00
|
|
|
/* freed after init ends here */
|
2009-04-29 18:56:58 +08:00
|
|
|
.init.end : AT(ADDR(.init.end) - LOAD_OFFSET) {
|
|
|
|
__init_end = .;
|
|
|
|
}
|
2009-04-29 15:47:28 +08:00
|
|
|
|
2009-08-25 21:50:53 +08:00
|
|
|
/*
|
|
|
|
* smp_locks might be freed after init
|
|
|
|
* start/end must be page aligned
|
|
|
|
*/
|
|
|
|
. = ALIGN(PAGE_SIZE);
|
|
|
|
.smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) {
|
|
|
|
__smp_locks = .;
|
|
|
|
*(.smp_locks)
|
|
|
|
. = ALIGN(PAGE_SIZE);
|
2010-03-29 10:42:54 +08:00
|
|
|
__smp_locks_end = .;
|
2009-08-25 21:50:53 +08:00
|
|
|
}
|
|
|
|
|
2009-04-29 15:47:28 +08:00
|
|
|
#ifdef CONFIG_X86_64
|
|
|
|
.data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) {
|
2009-08-25 21:50:53 +08:00
|
|
|
NOSAVE_DATA
|
|
|
|
}
|
2009-04-29 15:47:28 +08:00
|
|
|
#endif
|
|
|
|
|
2009-04-29 15:47:29 +08:00
|
|
|
/* BSS */
|
|
|
|
. = ALIGN(PAGE_SIZE);
|
|
|
|
.bss : AT(ADDR(.bss) - LOAD_OFFSET) {
|
|
|
|
__bss_start = .;
|
2010-02-20 08:03:38 +08:00
|
|
|
*(.bss..page_aligned)
|
2009-04-29 15:47:29 +08:00
|
|
|
*(.bss)
|
2010-11-17 05:31:26 +08:00
|
|
|
. = ALIGN(PAGE_SIZE);
|
2009-04-29 15:47:29 +08:00
|
|
|
__bss_stop = .;
|
|
|
|
}
|
2009-04-29 15:47:28 +08:00
|
|
|
|
2009-04-29 15:47:29 +08:00
|
|
|
. = ALIGN(PAGE_SIZE);
|
|
|
|
.brk : AT(ADDR(.brk) - LOAD_OFFSET) {
|
|
|
|
__brk_base = .;
|
|
|
|
. += 64 * 1024; /* 64k alignment slop space */
|
|
|
|
*(.brk_reservation) /* areas brk users have reserved */
|
|
|
|
__brk_limit = .;
|
|
|
|
}
|
|
|
|
|
x86/boot: Move compressed kernel to the end of the decompression buffer
This change makes later calculations about where the kernel is located
easier to reason about. To better understand this change, we must first
clarify what 'VO' and 'ZO' are. These values were introduced in commits
by hpa:
77d1a4999502 ("x86, boot: make symbols from the main vmlinux available")
37ba7ab5e33c ("x86, boot: make kernel_alignment adjustable; new bzImage fields")
Specifically:
All names prefixed with 'VO_':
- relate to the uncompressed kernel image
- the size of the VO image is: VO__end-VO__text ("VO_INIT_SIZE" define)
All names prefixed with 'ZO_':
- relate to the bootable compressed kernel image (boot/compressed/vmlinux),
which is composed of the following memory areas:
- head text
- compressed kernel (VO image and relocs table)
- decompressor code
- the size of the ZO image is: ZO__end - ZO_startup_32 ("ZO_INIT_SIZE" define, though see below)
The 'INIT_SIZE' value is used to find the larger of the two image sizes:
#define ZO_INIT_SIZE (ZO__end - ZO_startup_32 + ZO_z_extract_offset)
#define VO_INIT_SIZE (VO__end - VO__text)
#if ZO_INIT_SIZE > VO_INIT_SIZE
# define INIT_SIZE ZO_INIT_SIZE
#else
# define INIT_SIZE VO_INIT_SIZE
#endif
The current code uses extract_offset to decide where to position the
copied ZO (i.e. ZO starts at extract_offset). (This is why ZO_INIT_SIZE
currently includes the extract_offset.)
Why does z_extract_offset exist? It's needed because we are trying to minimize
the amount of RAM used for the whole act of creating an uncompressed, executable,
properly relocation-linked kernel image in system memory. We do this so that
kernels can be booted on even very small systems.
To achieve the goal of minimal memory consumption we have implemented an in-place
decompression strategy: instead of cleanly separating the VO and ZO images and
also allocating some memory for the decompression code's runtime needs, we instead
create this elaborate layout of memory buffers where the output (decompressed)
stream, as it progresses, overlaps with and destroys the input (compressed)
stream. This can only be done safely if the ZO image is placed to the end of the
VO range, plus a certain amount of safety distance to make sure that when the last
bytes of the VO range are decompressed, the compressed stream pointer is safely
beyond the end of the VO range.
z_extract_offset is calculated in arch/x86/boot/compressed/mkpiggy.c during
the build process, at a point when we know the exact compressed and
uncompressed size of the kernel images and can calculate this safe minimum
offset value. (Note that the mkpiggy.c calculation is not perfect, because
we don't know the decompressor used at that stage, so the z_extract_offset
calculation is necessarily imprecise and is mostly based on gzip internals -
we'll improve that in the next patch.)
When INIT_SIZE is bigger than VO_INIT_SIZE (uncommon but possible),
the copied ZO occupies the memory from extract_offset to the end of
decompression buffer. It overlaps with the soon-to-be-uncompressed kernel
like this:
|-----compressed kernel image------|
V V
0 extract_offset +INIT_SIZE
|-----------|---------------|-------------------------|--------|
| | | |
VO__text startup_32 of ZO VO__end ZO__end
^ ^
|-------uncompressed kernel image---------|
When INIT_SIZE is equal to VO_INIT_SIZE (likely) there's still space
left from end of ZO to the end of decompressing buffer, like below.
|-compressed kernel image-|
V V
0 extract_offset +INIT_SIZE
|-----------|---------------|-------------------------|--------|
| | | |
VO__text startup_32 of ZO ZO__end VO__end
^ ^
|------------uncompressed kernel image-------------|
To simplify calculations and avoid special cases, it is cleaner to
always place the compressed kernel image in memory so that ZO__end
is at the end of the decompression buffer, instead of placing t at
the start of extract_offset as is currently done.
This patch adds BP_init_size (which is the INIT_SIZE as passed in from
the boot_params) into asm-offsets.c to make it visible to the assembly
code.
Then when moving the ZO, it calculates the starting position of
the copied ZO (via BP_init_size and the ZO run size) so that the VO__end
will be at the end of the decompression buffer. To make the position
calculation safe, the end of ZO is page aligned (and a comment is added
to the existing VO alignment for good measure).
Signed-off-by: Yinghai Lu <yinghai@kernel.org>
[ Rewrote changelog and comments. ]
Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Baoquan He <bhe@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Dave Young <dyoung@redhat.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vivek Goyal <vgoyal@redhat.com>
Cc: lasse.collin@tukaani.org
Link: http://lkml.kernel.org/r/1461888548-32439-3-git-send-email-keescook@chromium.org
[ Rewrote the changelog some more. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2016-04-29 08:09:04 +08:00
|
|
|
. = ALIGN(PAGE_SIZE); /* keep VO_INIT_SIZE page aligned */
|
2009-12-15 05:55:20 +08:00
|
|
|
_end = .;
|
2009-04-29 15:47:29 +08:00
|
|
|
|
2009-04-29 15:47:20 +08:00
|
|
|
STABS_DEBUG
|
|
|
|
DWARF_DEBUG
|
linker script: unify usage of discard definition
Discarded sections in different archs share some commonality but have
considerable differences. This led to linker script for each arch
implementing its own /DISCARD/ definition, which makes maintaining
tedious and adding new entries error-prone.
This patch makes all linker scripts to move discard definitions to the
end of the linker script and use the common DISCARDS macro. As ld
uses the first matching section definition, archs can include default
discarded sections by including them earlier in the linker script.
ia64 is notable because it first throws away some ia64 specific
subsections and then include the rest of the sections into the final
image, so those sections must be discarded before the inclusion.
defconfig compile tested for x86, x86-64, powerpc, powerpc64, ia64,
alpha, sparc, sparc64 and s390. Michal Simek tested microblaze.
Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Paul Mundt <lethal@linux-sh.org>
Acked-by: Mike Frysinger <vapier@gentoo.org>
Tested-by: Michal Simek <monstr@monstr.eu>
Cc: linux-arch@vger.kernel.org
Cc: Michal Simek <monstr@monstr.eu>
Cc: microblaze-uclinux@itee.uq.edu.au
Cc: Sam Ravnborg <sam@ravnborg.org>
Cc: Tony Luck <tony.luck@intel.com>
2009-07-09 10:27:40 +08:00
|
|
|
|
|
|
|
/* Sections to be discarded */
|
|
|
|
DISCARDS
|
2016-02-29 12:22:35 +08:00
|
|
|
/DISCARD/ : {
|
|
|
|
*(.eh_frame)
|
|
|
|
*(__func_stack_frame_non_standard)
|
|
|
|
}
|
2009-04-29 15:47:20 +08:00
|
|
|
}
|
|
|
|
|
2009-04-29 15:47:18 +08:00
|
|
|
|
|
|
|
#ifdef CONFIG_X86_32
|
2009-10-16 13:18:46 +08:00
|
|
|
/*
|
|
|
|
* The ASSERT() sink to . is intentional, for binutils 2.14 compatibility:
|
|
|
|
*/
|
2009-08-04 05:44:54 +08:00
|
|
|
. = ASSERT((_end - LOAD_OFFSET <= KERNEL_IMAGE_SIZE),
|
|
|
|
"kernel image bigger than KERNEL_IMAGE_SIZE");
|
2009-04-29 15:47:18 +08:00
|
|
|
#else
|
|
|
|
/*
|
|
|
|
* Per-cpu symbols which need to be offset from __per_cpu_load
|
|
|
|
* for the boot processor.
|
|
|
|
*/
|
2009-10-29 21:34:15 +08:00
|
|
|
#define INIT_PER_CPU(x) init_per_cpu__##x = x + __per_cpu_load
|
2009-04-29 15:47:18 +08:00
|
|
|
INIT_PER_CPU(gdt_page);
|
|
|
|
INIT_PER_CPU(irq_stack_union);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Build-time check on the image size:
|
|
|
|
*/
|
2009-08-04 05:44:54 +08:00
|
|
|
. = ASSERT((_end - _text <= KERNEL_IMAGE_SIZE),
|
|
|
|
"kernel image bigger than KERNEL_IMAGE_SIZE");
|
2009-04-29 15:47:18 +08:00
|
|
|
|
|
|
|
#ifdef CONFIG_SMP
|
2009-10-29 21:34:15 +08:00
|
|
|
. = ASSERT((irq_stack_union == 0),
|
2009-08-04 05:44:54 +08:00
|
|
|
"irq_stack_union is not at start of per-cpu area");
|
2009-04-29 15:47:18 +08:00
|
|
|
#endif
|
|
|
|
|
|
|
|
#endif /* CONFIG_X86_32 */
|
|
|
|
|
2015-09-10 06:38:55 +08:00
|
|
|
#ifdef CONFIG_KEXEC_CORE
|
2009-04-29 15:47:18 +08:00
|
|
|
#include <asm/kexec.h>
|
|
|
|
|
2009-08-04 05:44:54 +08:00
|
|
|
. = ASSERT(kexec_control_code_size <= KEXEC_CONTROL_CODE_MAX_SIZE,
|
|
|
|
"kexec control code size is too big");
|
2009-04-29 15:47:18 +08:00
|
|
|
#endif
|
|
|
|
|